{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.897269499204114, "eval_steps": 500, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016325864250438759, "grad_norm": 3.8940250873565674, "learning_rate": 2e-08, "loss": 3.8653, "step": 1 }, { "epoch": 0.00032651728500877517, "grad_norm": 4.336153507232666, "learning_rate": 4e-08, "loss": 3.8188, "step": 2 }, { "epoch": 0.0004897759275131628, "grad_norm": 4.264322280883789, "learning_rate": 6.000000000000001e-08, "loss": 3.9845, "step": 3 }, { "epoch": 0.0006530345700175503, "grad_norm": 4.262722015380859, "learning_rate": 8e-08, "loss": 4.0806, "step": 4 }, { "epoch": 0.0008162932125219379, "grad_norm": 4.321222305297852, "learning_rate": 1.0000000000000001e-07, "loss": 3.8829, "step": 5 }, { "epoch": 0.0009795518550263255, "grad_norm": 4.3391337394714355, "learning_rate": 1.2000000000000002e-07, "loss": 3.8705, "step": 6 }, { "epoch": 0.001142810497530713, "grad_norm": 4.004897594451904, "learning_rate": 1.4e-07, "loss": 3.936, "step": 7 }, { "epoch": 0.0013060691400351007, "grad_norm": 4.02420711517334, "learning_rate": 1.6e-07, "loss": 3.7699, "step": 8 }, { "epoch": 0.0014693277825394882, "grad_norm": 4.305410861968994, "learning_rate": 1.8e-07, "loss": 3.8538, "step": 9 }, { "epoch": 0.0016325864250438759, "grad_norm": 4.2583909034729, "learning_rate": 2.0000000000000002e-07, "loss": 4.0255, "step": 10 }, { "epoch": 0.0017958450675482633, "grad_norm": 4.059788703918457, "learning_rate": 2.2e-07, "loss": 3.8658, "step": 11 }, { "epoch": 0.001959103710052651, "grad_norm": 3.9189605712890625, "learning_rate": 2.4000000000000003e-07, "loss": 3.8069, "step": 12 }, { "epoch": 0.0021223623525570383, "grad_norm": 4.069554805755615, "learning_rate": 2.6e-07, "loss": 3.7588, "step": 13 }, { "epoch": 0.002285620995061426, "grad_norm": 4.214900970458984, "learning_rate": 2.8e-07, "loss": 4.0645, "step": 14 }, { "epoch": 0.0024488796375658137, "grad_norm": 4.301984786987305, "learning_rate": 3.0000000000000004e-07, "loss": 3.7761, "step": 15 }, { "epoch": 0.0026121382800702014, "grad_norm": 4.243221282958984, "learning_rate": 3.2e-07, "loss": 3.838, "step": 16 }, { "epoch": 0.0027753969225745886, "grad_norm": 4.22932767868042, "learning_rate": 3.4000000000000003e-07, "loss": 3.9033, "step": 17 }, { "epoch": 0.0029386555650789763, "grad_norm": 3.9647598266601562, "learning_rate": 3.6e-07, "loss": 3.9151, "step": 18 }, { "epoch": 0.003101914207583364, "grad_norm": 4.049233913421631, "learning_rate": 3.8e-07, "loss": 3.6165, "step": 19 }, { "epoch": 0.0032651728500877517, "grad_norm": 3.8059346675872803, "learning_rate": 4.0000000000000003e-07, "loss": 3.8758, "step": 20 }, { "epoch": 0.003428431492592139, "grad_norm": 4.043206691741943, "learning_rate": 4.2000000000000006e-07, "loss": 3.7916, "step": 21 }, { "epoch": 0.0035916901350965267, "grad_norm": 4.2179179191589355, "learning_rate": 4.4e-07, "loss": 3.6441, "step": 22 }, { "epoch": 0.0037549487776009144, "grad_norm": 4.163120746612549, "learning_rate": 4.6000000000000004e-07, "loss": 3.8476, "step": 23 }, { "epoch": 0.003918207420105302, "grad_norm": 4.13502836227417, "learning_rate": 4.800000000000001e-07, "loss": 3.7416, "step": 24 }, { "epoch": 0.00408146606260969, "grad_norm": 4.514458656311035, "learning_rate": 5.000000000000001e-07, "loss": 3.9791, "step": 25 }, { "epoch": 0.004244724705114077, "grad_norm": 4.239928245544434, "learning_rate": 5.2e-07, "loss": 3.835, "step": 26 }, { "epoch": 0.004407983347618464, "grad_norm": 4.167033672332764, "learning_rate": 5.4e-07, "loss": 4.013, "step": 27 }, { "epoch": 0.004571241990122852, "grad_norm": 4.550379276275635, "learning_rate": 5.6e-07, "loss": 3.8221, "step": 28 }, { "epoch": 0.00473450063262724, "grad_norm": 4.293161869049072, "learning_rate": 5.800000000000001e-07, "loss": 3.6854, "step": 29 }, { "epoch": 0.004897759275131627, "grad_norm": 4.095637798309326, "learning_rate": 6.000000000000001e-07, "loss": 3.695, "step": 30 }, { "epoch": 0.005061017917636015, "grad_norm": 4.084077835083008, "learning_rate": 6.200000000000001e-07, "loss": 3.8758, "step": 31 }, { "epoch": 0.005224276560140403, "grad_norm": 3.9697020053863525, "learning_rate": 6.4e-07, "loss": 3.8879, "step": 32 }, { "epoch": 0.00538753520264479, "grad_norm": 4.31948184967041, "learning_rate": 6.6e-07, "loss": 3.9556, "step": 33 }, { "epoch": 0.005550793845149177, "grad_norm": 4.28523588180542, "learning_rate": 6.800000000000001e-07, "loss": 3.9511, "step": 34 }, { "epoch": 0.005714052487653565, "grad_norm": 4.36411190032959, "learning_rate": 7.000000000000001e-07, "loss": 3.8729, "step": 35 }, { "epoch": 0.005877311130157953, "grad_norm": 4.2369585037231445, "learning_rate": 7.2e-07, "loss": 3.8878, "step": 36 }, { "epoch": 0.00604056977266234, "grad_norm": 4.080562114715576, "learning_rate": 7.4e-07, "loss": 3.7371, "step": 37 }, { "epoch": 0.006203828415166728, "grad_norm": 3.838061809539795, "learning_rate": 7.6e-07, "loss": 3.7239, "step": 38 }, { "epoch": 0.006367087057671116, "grad_norm": 4.09160852432251, "learning_rate": 7.8e-07, "loss": 3.6641, "step": 39 }, { "epoch": 0.006530345700175503, "grad_norm": 4.435591220855713, "learning_rate": 8.000000000000001e-07, "loss": 3.9295, "step": 40 }, { "epoch": 0.00669360434267989, "grad_norm": 4.016178131103516, "learning_rate": 8.200000000000001e-07, "loss": 3.6019, "step": 41 }, { "epoch": 0.006856862985184278, "grad_norm": 4.0641655921936035, "learning_rate": 8.400000000000001e-07, "loss": 3.8245, "step": 42 }, { "epoch": 0.007020121627688666, "grad_norm": 4.398347854614258, "learning_rate": 8.6e-07, "loss": 3.869, "step": 43 }, { "epoch": 0.007183380270193053, "grad_norm": 4.051236152648926, "learning_rate": 8.8e-07, "loss": 3.7443, "step": 44 }, { "epoch": 0.007346638912697441, "grad_norm": 4.111652374267578, "learning_rate": 9.000000000000001e-07, "loss": 3.8271, "step": 45 }, { "epoch": 0.007509897555201829, "grad_norm": 4.158977508544922, "learning_rate": 9.200000000000001e-07, "loss": 3.8406, "step": 46 }, { "epoch": 0.007673156197706216, "grad_norm": 4.135075569152832, "learning_rate": 9.400000000000001e-07, "loss": 3.8461, "step": 47 }, { "epoch": 0.007836414840210604, "grad_norm": 4.058779716491699, "learning_rate": 9.600000000000001e-07, "loss": 3.8268, "step": 48 }, { "epoch": 0.007999673482714992, "grad_norm": 4.241808891296387, "learning_rate": 9.800000000000001e-07, "loss": 3.7183, "step": 49 }, { "epoch": 0.00816293212521938, "grad_norm": 4.17903995513916, "learning_rate": 1.0000000000000002e-06, "loss": 3.601, "step": 50 }, { "epoch": 0.008326190767723767, "grad_norm": 4.153787612915039, "learning_rate": 1.02e-06, "loss": 3.5547, "step": 51 }, { "epoch": 0.008489449410228153, "grad_norm": 4.341866970062256, "learning_rate": 1.04e-06, "loss": 3.5597, "step": 52 }, { "epoch": 0.00865270805273254, "grad_norm": 3.7490618228912354, "learning_rate": 1.06e-06, "loss": 3.6132, "step": 53 }, { "epoch": 0.008815966695236929, "grad_norm": 4.080740928649902, "learning_rate": 1.08e-06, "loss": 3.5767, "step": 54 }, { "epoch": 0.008979225337741316, "grad_norm": 4.3830885887146, "learning_rate": 1.1e-06, "loss": 3.6163, "step": 55 }, { "epoch": 0.009142483980245704, "grad_norm": 3.9020934104919434, "learning_rate": 1.12e-06, "loss": 3.7569, "step": 56 }, { "epoch": 0.009305742622750092, "grad_norm": 4.30420446395874, "learning_rate": 1.14e-06, "loss": 3.6726, "step": 57 }, { "epoch": 0.00946900126525448, "grad_norm": 3.929931879043579, "learning_rate": 1.1600000000000001e-06, "loss": 3.6095, "step": 58 }, { "epoch": 0.009632259907758867, "grad_norm": 4.43489933013916, "learning_rate": 1.1800000000000001e-06, "loss": 3.4793, "step": 59 }, { "epoch": 0.009795518550263255, "grad_norm": 4.152418613433838, "learning_rate": 1.2000000000000002e-06, "loss": 3.5372, "step": 60 }, { "epoch": 0.009958777192767642, "grad_norm": 4.115171909332275, "learning_rate": 1.2200000000000002e-06, "loss": 3.4948, "step": 61 }, { "epoch": 0.01012203583527203, "grad_norm": 3.847032308578491, "learning_rate": 1.2400000000000002e-06, "loss": 3.513, "step": 62 }, { "epoch": 0.010285294477776418, "grad_norm": 4.086245059967041, "learning_rate": 1.26e-06, "loss": 3.4033, "step": 63 }, { "epoch": 0.010448553120280805, "grad_norm": 3.958975315093994, "learning_rate": 1.28e-06, "loss": 3.5568, "step": 64 }, { "epoch": 0.010611811762785193, "grad_norm": 3.9708852767944336, "learning_rate": 1.3e-06, "loss": 3.6564, "step": 65 }, { "epoch": 0.01077507040528958, "grad_norm": 3.8756113052368164, "learning_rate": 1.32e-06, "loss": 3.4258, "step": 66 }, { "epoch": 0.010938329047793967, "grad_norm": 3.692439556121826, "learning_rate": 1.34e-06, "loss": 3.5178, "step": 67 }, { "epoch": 0.011101587690298355, "grad_norm": 3.9252305030822754, "learning_rate": 1.3600000000000001e-06, "loss": 3.4414, "step": 68 }, { "epoch": 0.011264846332802742, "grad_norm": 3.8638274669647217, "learning_rate": 1.3800000000000001e-06, "loss": 3.5574, "step": 69 }, { "epoch": 0.01142810497530713, "grad_norm": 3.862952709197998, "learning_rate": 1.4000000000000001e-06, "loss": 3.4303, "step": 70 }, { "epoch": 0.011591363617811518, "grad_norm": 3.78873610496521, "learning_rate": 1.42e-06, "loss": 3.2491, "step": 71 }, { "epoch": 0.011754622260315905, "grad_norm": 3.7835533618927, "learning_rate": 1.44e-06, "loss": 3.3973, "step": 72 }, { "epoch": 0.011917880902820293, "grad_norm": 3.574653148651123, "learning_rate": 1.46e-06, "loss": 3.358, "step": 73 }, { "epoch": 0.01208113954532468, "grad_norm": 3.7934658527374268, "learning_rate": 1.48e-06, "loss": 3.1787, "step": 74 }, { "epoch": 0.012244398187829068, "grad_norm": 3.713402509689331, "learning_rate": 1.5e-06, "loss": 3.2767, "step": 75 }, { "epoch": 0.012407656830333456, "grad_norm": 3.637815237045288, "learning_rate": 1.52e-06, "loss": 3.1629, "step": 76 }, { "epoch": 0.012570915472837844, "grad_norm": 3.807894468307495, "learning_rate": 1.54e-06, "loss": 3.2457, "step": 77 }, { "epoch": 0.012734174115342231, "grad_norm": 3.632390022277832, "learning_rate": 1.56e-06, "loss": 3.2426, "step": 78 }, { "epoch": 0.01289743275784662, "grad_norm": 3.3352434635162354, "learning_rate": 1.5800000000000001e-06, "loss": 3.4808, "step": 79 }, { "epoch": 0.013060691400351007, "grad_norm": 3.6003615856170654, "learning_rate": 1.6000000000000001e-06, "loss": 3.1291, "step": 80 }, { "epoch": 0.013223950042855395, "grad_norm": 3.5547585487365723, "learning_rate": 1.6200000000000002e-06, "loss": 3.1638, "step": 81 }, { "epoch": 0.01338720868535978, "grad_norm": 3.4558591842651367, "learning_rate": 1.6400000000000002e-06, "loss": 3.1209, "step": 82 }, { "epoch": 0.013550467327864168, "grad_norm": 3.6796300411224365, "learning_rate": 1.6600000000000002e-06, "loss": 3.0756, "step": 83 }, { "epoch": 0.013713725970368556, "grad_norm": 3.5014994144439697, "learning_rate": 1.6800000000000002e-06, "loss": 2.8963, "step": 84 }, { "epoch": 0.013876984612872944, "grad_norm": 3.792721748352051, "learning_rate": 1.7000000000000002e-06, "loss": 3.0179, "step": 85 }, { "epoch": 0.014040243255377331, "grad_norm": 3.840487241744995, "learning_rate": 1.72e-06, "loss": 3.1982, "step": 86 }, { "epoch": 0.014203501897881719, "grad_norm": 3.6570310592651367, "learning_rate": 1.74e-06, "loss": 3.0096, "step": 87 }, { "epoch": 0.014366760540386107, "grad_norm": 3.714564800262451, "learning_rate": 1.76e-06, "loss": 3.0271, "step": 88 }, { "epoch": 0.014530019182890494, "grad_norm": 3.4747531414031982, "learning_rate": 1.7800000000000001e-06, "loss": 2.7865, "step": 89 }, { "epoch": 0.014693277825394882, "grad_norm": 3.772432804107666, "learning_rate": 1.8000000000000001e-06, "loss": 2.9273, "step": 90 }, { "epoch": 0.01485653646789927, "grad_norm": 4.21910285949707, "learning_rate": 1.8200000000000002e-06, "loss": 3.0643, "step": 91 }, { "epoch": 0.015019795110403657, "grad_norm": 4.24638557434082, "learning_rate": 1.8400000000000002e-06, "loss": 2.7228, "step": 92 }, { "epoch": 0.015183053752908045, "grad_norm": 3.876889944076538, "learning_rate": 1.8600000000000002e-06, "loss": 2.6013, "step": 93 }, { "epoch": 0.015346312395412433, "grad_norm": 4.061342716217041, "learning_rate": 1.8800000000000002e-06, "loss": 2.6742, "step": 94 }, { "epoch": 0.01550957103791682, "grad_norm": 4.50308084487915, "learning_rate": 1.9000000000000002e-06, "loss": 2.5976, "step": 95 }, { "epoch": 0.015672829680421208, "grad_norm": 4.5268144607543945, "learning_rate": 1.9200000000000003e-06, "loss": 2.857, "step": 96 }, { "epoch": 0.015836088322925594, "grad_norm": 4.298318386077881, "learning_rate": 1.94e-06, "loss": 2.832, "step": 97 }, { "epoch": 0.015999346965429984, "grad_norm": 4.293084144592285, "learning_rate": 1.9600000000000003e-06, "loss": 2.6671, "step": 98 }, { "epoch": 0.01616260560793437, "grad_norm": 4.648829460144043, "learning_rate": 1.98e-06, "loss": 2.6343, "step": 99 }, { "epoch": 0.01632586425043876, "grad_norm": 3.595898151397705, "learning_rate": 2.0000000000000003e-06, "loss": 2.7224, "step": 100 }, { "epoch": 0.016489122892943145, "grad_norm": 4.022105693817139, "learning_rate": 2.02e-06, "loss": 2.513, "step": 101 }, { "epoch": 0.016652381535447534, "grad_norm": 3.950529098510742, "learning_rate": 2.04e-06, "loss": 2.687, "step": 102 }, { "epoch": 0.01681564017795192, "grad_norm": 4.234622478485107, "learning_rate": 2.06e-06, "loss": 2.5834, "step": 103 }, { "epoch": 0.016978898820456306, "grad_norm": 4.103294372558594, "learning_rate": 2.08e-06, "loss": 2.4635, "step": 104 }, { "epoch": 0.017142157462960696, "grad_norm": 3.781449317932129, "learning_rate": 2.1000000000000002e-06, "loss": 2.5409, "step": 105 }, { "epoch": 0.01730541610546508, "grad_norm": 3.5710878372192383, "learning_rate": 2.12e-06, "loss": 2.3277, "step": 106 }, { "epoch": 0.01746867474796947, "grad_norm": 3.516742706298828, "learning_rate": 2.1400000000000003e-06, "loss": 2.4673, "step": 107 }, { "epoch": 0.017631933390473857, "grad_norm": 3.279285430908203, "learning_rate": 2.16e-06, "loss": 2.2148, "step": 108 }, { "epoch": 0.017795192032978246, "grad_norm": 3.3725409507751465, "learning_rate": 2.1800000000000003e-06, "loss": 2.4683, "step": 109 }, { "epoch": 0.017958450675482632, "grad_norm": 3.2345635890960693, "learning_rate": 2.2e-06, "loss": 2.5339, "step": 110 }, { "epoch": 0.018121709317987022, "grad_norm": 3.122805595397949, "learning_rate": 2.2200000000000003e-06, "loss": 2.1799, "step": 111 }, { "epoch": 0.018284967960491408, "grad_norm": 3.4444949626922607, "learning_rate": 2.24e-06, "loss": 2.3254, "step": 112 }, { "epoch": 0.018448226602995797, "grad_norm": 2.841085910797119, "learning_rate": 2.2600000000000004e-06, "loss": 2.2956, "step": 113 }, { "epoch": 0.018611485245500183, "grad_norm": 2.928736448287964, "learning_rate": 2.28e-06, "loss": 2.3472, "step": 114 }, { "epoch": 0.018774743888004573, "grad_norm": 3.3440299034118652, "learning_rate": 2.3000000000000004e-06, "loss": 2.3592, "step": 115 }, { "epoch": 0.01893800253050896, "grad_norm": 3.127239227294922, "learning_rate": 2.3200000000000002e-06, "loss": 2.1146, "step": 116 }, { "epoch": 0.019101261173013348, "grad_norm": 3.4106273651123047, "learning_rate": 2.3400000000000005e-06, "loss": 2.2139, "step": 117 }, { "epoch": 0.019264519815517734, "grad_norm": 3.391380786895752, "learning_rate": 2.3600000000000003e-06, "loss": 2.117, "step": 118 }, { "epoch": 0.01942777845802212, "grad_norm": 3.141145944595337, "learning_rate": 2.38e-06, "loss": 2.2376, "step": 119 }, { "epoch": 0.01959103710052651, "grad_norm": 3.175117015838623, "learning_rate": 2.4000000000000003e-06, "loss": 2.1991, "step": 120 }, { "epoch": 0.019754295743030895, "grad_norm": 3.890010118484497, "learning_rate": 2.42e-06, "loss": 2.0528, "step": 121 }, { "epoch": 0.019917554385535285, "grad_norm": 4.50818395614624, "learning_rate": 2.4400000000000004e-06, "loss": 2.178, "step": 122 }, { "epoch": 0.02008081302803967, "grad_norm": 3.7490060329437256, "learning_rate": 2.46e-06, "loss": 2.0463, "step": 123 }, { "epoch": 0.02024407167054406, "grad_norm": 3.70375919342041, "learning_rate": 2.4800000000000004e-06, "loss": 1.812, "step": 124 }, { "epoch": 0.020407330313048446, "grad_norm": 3.360556125640869, "learning_rate": 2.5e-06, "loss": 2.1769, "step": 125 }, { "epoch": 0.020570588955552836, "grad_norm": 3.3051016330718994, "learning_rate": 2.52e-06, "loss": 1.9025, "step": 126 }, { "epoch": 0.02073384759805722, "grad_norm": 3.0747995376586914, "learning_rate": 2.5400000000000002e-06, "loss": 1.9907, "step": 127 }, { "epoch": 0.02089710624056161, "grad_norm": 2.942265748977661, "learning_rate": 2.56e-06, "loss": 1.9382, "step": 128 }, { "epoch": 0.021060364883065997, "grad_norm": 2.773566722869873, "learning_rate": 2.5800000000000003e-06, "loss": 1.8774, "step": 129 }, { "epoch": 0.021223623525570386, "grad_norm": 2.6631617546081543, "learning_rate": 2.6e-06, "loss": 1.9356, "step": 130 }, { "epoch": 0.021386882168074772, "grad_norm": 2.6138885021209717, "learning_rate": 2.6200000000000003e-06, "loss": 1.8675, "step": 131 }, { "epoch": 0.02155014081057916, "grad_norm": 2.6504719257354736, "learning_rate": 2.64e-06, "loss": 1.9883, "step": 132 }, { "epoch": 0.021713399453083548, "grad_norm": 2.355680227279663, "learning_rate": 2.6600000000000004e-06, "loss": 1.9666, "step": 133 }, { "epoch": 0.021876658095587934, "grad_norm": 2.1541998386383057, "learning_rate": 2.68e-06, "loss": 1.7268, "step": 134 }, { "epoch": 0.022039916738092323, "grad_norm": 2.713064670562744, "learning_rate": 2.7000000000000004e-06, "loss": 1.7778, "step": 135 }, { "epoch": 0.02220317538059671, "grad_norm": 2.9988017082214355, "learning_rate": 2.7200000000000002e-06, "loss": 1.8801, "step": 136 }, { "epoch": 0.0223664340231011, "grad_norm": 3.5328543186187744, "learning_rate": 2.7400000000000004e-06, "loss": 1.796, "step": 137 }, { "epoch": 0.022529692665605484, "grad_norm": 2.6371665000915527, "learning_rate": 2.7600000000000003e-06, "loss": 2.0397, "step": 138 }, { "epoch": 0.022692951308109874, "grad_norm": 2.2349350452423096, "learning_rate": 2.7800000000000005e-06, "loss": 2.0042, "step": 139 }, { "epoch": 0.02285620995061426, "grad_norm": 2.0638904571533203, "learning_rate": 2.8000000000000003e-06, "loss": 1.9629, "step": 140 }, { "epoch": 0.02301946859311865, "grad_norm": 3.493224859237671, "learning_rate": 2.82e-06, "loss": 1.8393, "step": 141 }, { "epoch": 0.023182727235623035, "grad_norm": 2.459581136703491, "learning_rate": 2.84e-06, "loss": 1.8767, "step": 142 }, { "epoch": 0.023345985878127425, "grad_norm": 2.1086788177490234, "learning_rate": 2.86e-06, "loss": 1.6964, "step": 143 }, { "epoch": 0.02350924452063181, "grad_norm": 2.0590062141418457, "learning_rate": 2.88e-06, "loss": 1.9521, "step": 144 }, { "epoch": 0.0236725031631362, "grad_norm": 2.1320693492889404, "learning_rate": 2.9e-06, "loss": 1.8419, "step": 145 }, { "epoch": 0.023835761805640586, "grad_norm": 1.9610185623168945, "learning_rate": 2.92e-06, "loss": 1.8512, "step": 146 }, { "epoch": 0.023999020448144975, "grad_norm": 2.247410297393799, "learning_rate": 2.9400000000000002e-06, "loss": 1.9431, "step": 147 }, { "epoch": 0.02416227909064936, "grad_norm": 1.956328272819519, "learning_rate": 2.96e-06, "loss": 2.014, "step": 148 }, { "epoch": 0.024325537733153747, "grad_norm": 2.4147891998291016, "learning_rate": 2.9800000000000003e-06, "loss": 1.9079, "step": 149 }, { "epoch": 0.024488796375658137, "grad_norm": 2.321148633956909, "learning_rate": 3e-06, "loss": 1.9379, "step": 150 }, { "epoch": 0.024652055018162523, "grad_norm": 2.513935089111328, "learning_rate": 3.0200000000000003e-06, "loss": 1.7625, "step": 151 }, { "epoch": 0.024815313660666912, "grad_norm": 2.491192579269409, "learning_rate": 3.04e-06, "loss": 1.9114, "step": 152 }, { "epoch": 0.024978572303171298, "grad_norm": 2.820672035217285, "learning_rate": 3.0600000000000003e-06, "loss": 1.7354, "step": 153 }, { "epoch": 0.025141830945675687, "grad_norm": 3.12313175201416, "learning_rate": 3.08e-06, "loss": 1.8837, "step": 154 }, { "epoch": 0.025305089588180073, "grad_norm": 2.505821466445923, "learning_rate": 3.1000000000000004e-06, "loss": 1.8003, "step": 155 }, { "epoch": 0.025468348230684463, "grad_norm": 2.0326502323150635, "learning_rate": 3.12e-06, "loss": 1.6482, "step": 156 }, { "epoch": 0.02563160687318885, "grad_norm": 2.3250749111175537, "learning_rate": 3.1400000000000004e-06, "loss": 1.7049, "step": 157 }, { "epoch": 0.02579486551569324, "grad_norm": 2.5084614753723145, "learning_rate": 3.1600000000000002e-06, "loss": 1.9122, "step": 158 }, { "epoch": 0.025958124158197624, "grad_norm": 2.937220573425293, "learning_rate": 3.1800000000000005e-06, "loss": 1.6361, "step": 159 }, { "epoch": 0.026121382800702014, "grad_norm": 1.8123136758804321, "learning_rate": 3.2000000000000003e-06, "loss": 1.8041, "step": 160 }, { "epoch": 0.0262846414432064, "grad_norm": 3.2551817893981934, "learning_rate": 3.2200000000000005e-06, "loss": 1.7701, "step": 161 }, { "epoch": 0.02644790008571079, "grad_norm": 1.8641775846481323, "learning_rate": 3.2400000000000003e-06, "loss": 1.573, "step": 162 }, { "epoch": 0.026611158728215175, "grad_norm": 1.85505211353302, "learning_rate": 3.2600000000000006e-06, "loss": 1.7491, "step": 163 }, { "epoch": 0.02677441737071956, "grad_norm": 2.821972131729126, "learning_rate": 3.2800000000000004e-06, "loss": 1.7351, "step": 164 }, { "epoch": 0.02693767601322395, "grad_norm": 2.040527820587158, "learning_rate": 3.3000000000000006e-06, "loss": 1.9865, "step": 165 }, { "epoch": 0.027100934655728336, "grad_norm": 2.534010648727417, "learning_rate": 3.3200000000000004e-06, "loss": 1.551, "step": 166 }, { "epoch": 0.027264193298232726, "grad_norm": 1.8632395267486572, "learning_rate": 3.3400000000000006e-06, "loss": 1.6681, "step": 167 }, { "epoch": 0.027427451940737112, "grad_norm": 1.8485156297683716, "learning_rate": 3.3600000000000004e-06, "loss": 1.8248, "step": 168 }, { "epoch": 0.0275907105832415, "grad_norm": 1.8782747983932495, "learning_rate": 3.3800000000000007e-06, "loss": 1.7476, "step": 169 }, { "epoch": 0.027753969225745887, "grad_norm": 1.9981833696365356, "learning_rate": 3.4000000000000005e-06, "loss": 1.8497, "step": 170 }, { "epoch": 0.027917227868250277, "grad_norm": 1.8368138074874878, "learning_rate": 3.4200000000000007e-06, "loss": 1.6365, "step": 171 }, { "epoch": 0.028080486510754663, "grad_norm": 1.9599717855453491, "learning_rate": 3.44e-06, "loss": 1.7155, "step": 172 }, { "epoch": 0.028243745153259052, "grad_norm": 1.8508329391479492, "learning_rate": 3.46e-06, "loss": 1.7757, "step": 173 }, { "epoch": 0.028407003795763438, "grad_norm": 1.6875028610229492, "learning_rate": 3.48e-06, "loss": 1.6341, "step": 174 }, { "epoch": 0.028570262438267827, "grad_norm": 1.8853875398635864, "learning_rate": 3.5e-06, "loss": 1.7526, "step": 175 }, { "epoch": 0.028733521080772213, "grad_norm": 1.913346767425537, "learning_rate": 3.52e-06, "loss": 1.595, "step": 176 }, { "epoch": 0.0288967797232766, "grad_norm": 2.122236967086792, "learning_rate": 3.54e-06, "loss": 1.6908, "step": 177 }, { "epoch": 0.02906003836578099, "grad_norm": 1.7405691146850586, "learning_rate": 3.5600000000000002e-06, "loss": 1.5098, "step": 178 }, { "epoch": 0.029223297008285375, "grad_norm": 1.8728768825531006, "learning_rate": 3.58e-06, "loss": 1.6391, "step": 179 }, { "epoch": 0.029386555650789764, "grad_norm": 2.713186502456665, "learning_rate": 3.6000000000000003e-06, "loss": 1.624, "step": 180 }, { "epoch": 0.02954981429329415, "grad_norm": 2.450113296508789, "learning_rate": 3.62e-06, "loss": 1.4418, "step": 181 }, { "epoch": 0.02971307293579854, "grad_norm": 1.882285237312317, "learning_rate": 3.6400000000000003e-06, "loss": 1.69, "step": 182 }, { "epoch": 0.029876331578302925, "grad_norm": 2.0766921043395996, "learning_rate": 3.66e-06, "loss": 1.6312, "step": 183 }, { "epoch": 0.030039590220807315, "grad_norm": 1.9895544052124023, "learning_rate": 3.6800000000000003e-06, "loss": 1.6011, "step": 184 }, { "epoch": 0.0302028488633117, "grad_norm": 1.9018754959106445, "learning_rate": 3.7e-06, "loss": 1.724, "step": 185 }, { "epoch": 0.03036610750581609, "grad_norm": 1.8760651350021362, "learning_rate": 3.7200000000000004e-06, "loss": 1.6839, "step": 186 }, { "epoch": 0.030529366148320476, "grad_norm": 1.8383901119232178, "learning_rate": 3.74e-06, "loss": 1.5709, "step": 187 }, { "epoch": 0.030692624790824866, "grad_norm": 1.776957392692566, "learning_rate": 3.7600000000000004e-06, "loss": 1.7752, "step": 188 }, { "epoch": 0.03085588343332925, "grad_norm": 2.0259644985198975, "learning_rate": 3.7800000000000002e-06, "loss": 1.8049, "step": 189 }, { "epoch": 0.03101914207583364, "grad_norm": 1.9356402158737183, "learning_rate": 3.8000000000000005e-06, "loss": 1.5945, "step": 190 }, { "epoch": 0.031182400718338027, "grad_norm": 1.7910946607589722, "learning_rate": 3.820000000000001e-06, "loss": 1.7923, "step": 191 }, { "epoch": 0.031345659360842416, "grad_norm": 1.7282851934432983, "learning_rate": 3.8400000000000005e-06, "loss": 1.5659, "step": 192 }, { "epoch": 0.0315089180033468, "grad_norm": 1.7401597499847412, "learning_rate": 3.86e-06, "loss": 1.6694, "step": 193 }, { "epoch": 0.03167217664585119, "grad_norm": 1.8794223070144653, "learning_rate": 3.88e-06, "loss": 1.7232, "step": 194 }, { "epoch": 0.03183543528835558, "grad_norm": 1.9132089614868164, "learning_rate": 3.900000000000001e-06, "loss": 1.6076, "step": 195 }, { "epoch": 0.03199869393085997, "grad_norm": 1.73985755443573, "learning_rate": 3.920000000000001e-06, "loss": 1.744, "step": 196 }, { "epoch": 0.03216195257336435, "grad_norm": 1.8678113222122192, "learning_rate": 3.94e-06, "loss": 1.6197, "step": 197 }, { "epoch": 0.03232521121586874, "grad_norm": 1.9430009126663208, "learning_rate": 3.96e-06, "loss": 1.7997, "step": 198 }, { "epoch": 0.03248846985837313, "grad_norm": 2.0013115406036377, "learning_rate": 3.980000000000001e-06, "loss": 1.6285, "step": 199 }, { "epoch": 0.03265172850087752, "grad_norm": 1.7988117933273315, "learning_rate": 4.000000000000001e-06, "loss": 1.7269, "step": 200 }, { "epoch": 0.0328149871433819, "grad_norm": 1.9686278104782104, "learning_rate": 4.0200000000000005e-06, "loss": 1.7251, "step": 201 }, { "epoch": 0.03297824578588629, "grad_norm": 1.7730387449264526, "learning_rate": 4.04e-06, "loss": 1.6758, "step": 202 }, { "epoch": 0.03314150442839068, "grad_norm": 1.7975866794586182, "learning_rate": 4.060000000000001e-06, "loss": 1.4712, "step": 203 }, { "epoch": 0.03330476307089507, "grad_norm": 1.8847157955169678, "learning_rate": 4.08e-06, "loss": 1.613, "step": 204 }, { "epoch": 0.03346802171339945, "grad_norm": 1.9041622877120972, "learning_rate": 4.1e-06, "loss": 1.6296, "step": 205 }, { "epoch": 0.03363128035590384, "grad_norm": 1.8511282205581665, "learning_rate": 4.12e-06, "loss": 1.4316, "step": 206 }, { "epoch": 0.03379453899840823, "grad_norm": 2.1672275066375732, "learning_rate": 4.14e-06, "loss": 1.779, "step": 207 }, { "epoch": 0.03395779764091261, "grad_norm": 1.7254951000213623, "learning_rate": 4.16e-06, "loss": 1.4422, "step": 208 }, { "epoch": 0.034121056283417, "grad_norm": 2.2755041122436523, "learning_rate": 4.18e-06, "loss": 1.714, "step": 209 }, { "epoch": 0.03428431492592139, "grad_norm": 2.1062262058258057, "learning_rate": 4.2000000000000004e-06, "loss": 1.737, "step": 210 }, { "epoch": 0.03444757356842578, "grad_norm": 1.9646141529083252, "learning_rate": 4.22e-06, "loss": 1.5822, "step": 211 }, { "epoch": 0.03461083221093016, "grad_norm": 2.7861504554748535, "learning_rate": 4.24e-06, "loss": 1.6672, "step": 212 }, { "epoch": 0.03477409085343455, "grad_norm": 2.2591235637664795, "learning_rate": 4.26e-06, "loss": 1.4309, "step": 213 }, { "epoch": 0.03493734949593894, "grad_norm": 2.1734976768493652, "learning_rate": 4.2800000000000005e-06, "loss": 1.6467, "step": 214 }, { "epoch": 0.03510060813844333, "grad_norm": 2.1867618560791016, "learning_rate": 4.3e-06, "loss": 1.5974, "step": 215 }, { "epoch": 0.035263866780947714, "grad_norm": 1.877331018447876, "learning_rate": 4.32e-06, "loss": 1.5209, "step": 216 }, { "epoch": 0.035427125423452104, "grad_norm": 1.8426438570022583, "learning_rate": 4.34e-06, "loss": 1.4206, "step": 217 }, { "epoch": 0.03559038406595649, "grad_norm": 2.073631763458252, "learning_rate": 4.360000000000001e-06, "loss": 1.6185, "step": 218 }, { "epoch": 0.03575364270846088, "grad_norm": 1.9493093490600586, "learning_rate": 4.38e-06, "loss": 1.5834, "step": 219 }, { "epoch": 0.035916901350965265, "grad_norm": 1.9975570440292358, "learning_rate": 4.4e-06, "loss": 1.4099, "step": 220 }, { "epoch": 0.036080159993469654, "grad_norm": 1.8657957315444946, "learning_rate": 4.42e-06, "loss": 1.3332, "step": 221 }, { "epoch": 0.036243418635974044, "grad_norm": 2.043821334838867, "learning_rate": 4.440000000000001e-06, "loss": 1.6207, "step": 222 }, { "epoch": 0.036406677278478426, "grad_norm": 2.055511236190796, "learning_rate": 4.4600000000000005e-06, "loss": 1.621, "step": 223 }, { "epoch": 0.036569935920982816, "grad_norm": 2.0274274349212646, "learning_rate": 4.48e-06, "loss": 1.7844, "step": 224 }, { "epoch": 0.036733194563487205, "grad_norm": 1.7602957487106323, "learning_rate": 4.5e-06, "loss": 1.4642, "step": 225 }, { "epoch": 0.036896453205991595, "grad_norm": 1.9455811977386475, "learning_rate": 4.520000000000001e-06, "loss": 1.7599, "step": 226 }, { "epoch": 0.03705971184849598, "grad_norm": 1.9864171743392944, "learning_rate": 4.540000000000001e-06, "loss": 1.7501, "step": 227 }, { "epoch": 0.037222970491000366, "grad_norm": 1.9279485940933228, "learning_rate": 4.56e-06, "loss": 1.6027, "step": 228 }, { "epoch": 0.037386229133504756, "grad_norm": 2.0067670345306396, "learning_rate": 4.58e-06, "loss": 1.5474, "step": 229 }, { "epoch": 0.037549487776009145, "grad_norm": 1.881588101387024, "learning_rate": 4.600000000000001e-06, "loss": 1.5772, "step": 230 }, { "epoch": 0.03771274641851353, "grad_norm": 1.8529682159423828, "learning_rate": 4.620000000000001e-06, "loss": 1.4989, "step": 231 }, { "epoch": 0.03787600506101792, "grad_norm": 1.801202654838562, "learning_rate": 4.6400000000000005e-06, "loss": 1.5755, "step": 232 }, { "epoch": 0.03803926370352231, "grad_norm": 1.8933794498443604, "learning_rate": 4.66e-06, "loss": 1.4847, "step": 233 }, { "epoch": 0.038202522346026696, "grad_norm": 1.9672949314117432, "learning_rate": 4.680000000000001e-06, "loss": 1.6988, "step": 234 }, { "epoch": 0.03836578098853108, "grad_norm": 1.932323694229126, "learning_rate": 4.7e-06, "loss": 1.5146, "step": 235 }, { "epoch": 0.03852903963103547, "grad_norm": 1.9558227062225342, "learning_rate": 4.7200000000000005e-06, "loss": 1.4913, "step": 236 }, { "epoch": 0.03869229827353986, "grad_norm": 1.8266215324401855, "learning_rate": 4.74e-06, "loss": 1.5527, "step": 237 }, { "epoch": 0.03885555691604424, "grad_norm": 1.8291640281677246, "learning_rate": 4.76e-06, "loss": 1.4408, "step": 238 }, { "epoch": 0.03901881555854863, "grad_norm": 2.044167995452881, "learning_rate": 4.78e-06, "loss": 1.5259, "step": 239 }, { "epoch": 0.03918207420105302, "grad_norm": 2.1043365001678467, "learning_rate": 4.800000000000001e-06, "loss": 1.4876, "step": 240 }, { "epoch": 0.03934533284355741, "grad_norm": 2.008300304412842, "learning_rate": 4.8200000000000004e-06, "loss": 1.5249, "step": 241 }, { "epoch": 0.03950859148606179, "grad_norm": 1.898972511291504, "learning_rate": 4.84e-06, "loss": 1.5074, "step": 242 }, { "epoch": 0.03967185012856618, "grad_norm": 2.0210916996002197, "learning_rate": 4.86e-06, "loss": 1.6036, "step": 243 }, { "epoch": 0.03983510877107057, "grad_norm": 1.9798952341079712, "learning_rate": 4.880000000000001e-06, "loss": 1.571, "step": 244 }, { "epoch": 0.03999836741357496, "grad_norm": 2.170804977416992, "learning_rate": 4.9000000000000005e-06, "loss": 1.812, "step": 245 }, { "epoch": 0.04016162605607934, "grad_norm": 2.065699577331543, "learning_rate": 4.92e-06, "loss": 1.5487, "step": 246 }, { "epoch": 0.04032488469858373, "grad_norm": 2.055690288543701, "learning_rate": 4.94e-06, "loss": 1.2761, "step": 247 }, { "epoch": 0.04048814334108812, "grad_norm": 2.165557384490967, "learning_rate": 4.960000000000001e-06, "loss": 1.4651, "step": 248 }, { "epoch": 0.04065140198359251, "grad_norm": 2.200087547302246, "learning_rate": 4.980000000000001e-06, "loss": 1.7171, "step": 249 }, { "epoch": 0.04081466062609689, "grad_norm": 2.0792741775512695, "learning_rate": 5e-06, "loss": 1.6927, "step": 250 }, { "epoch": 0.04097791926860128, "grad_norm": 1.971835970878601, "learning_rate": 5.02e-06, "loss": 1.5153, "step": 251 }, { "epoch": 0.04114117791110567, "grad_norm": 2.1257874965667725, "learning_rate": 5.04e-06, "loss": 1.481, "step": 252 }, { "epoch": 0.041304436553610054, "grad_norm": 1.9549224376678467, "learning_rate": 5.060000000000001e-06, "loss": 1.4863, "step": 253 }, { "epoch": 0.04146769519611444, "grad_norm": 1.9676553010940552, "learning_rate": 5.0800000000000005e-06, "loss": 1.3468, "step": 254 }, { "epoch": 0.04163095383861883, "grad_norm": 2.221905469894409, "learning_rate": 5.1e-06, "loss": 1.5946, "step": 255 }, { "epoch": 0.04179421248112322, "grad_norm": 2.2209556102752686, "learning_rate": 5.12e-06, "loss": 1.6235, "step": 256 }, { "epoch": 0.041957471123627604, "grad_norm": 1.9270178079605103, "learning_rate": 5.140000000000001e-06, "loss": 1.5809, "step": 257 }, { "epoch": 0.042120729766131994, "grad_norm": 2.008897066116333, "learning_rate": 5.1600000000000006e-06, "loss": 1.3303, "step": 258 }, { "epoch": 0.04228398840863638, "grad_norm": 2.0664780139923096, "learning_rate": 5.18e-06, "loss": 1.458, "step": 259 }, { "epoch": 0.04244724705114077, "grad_norm": 1.9574780464172363, "learning_rate": 5.2e-06, "loss": 1.557, "step": 260 }, { "epoch": 0.042610505693645155, "grad_norm": 2.3997581005096436, "learning_rate": 5.220000000000001e-06, "loss": 1.3691, "step": 261 }, { "epoch": 0.042773764336149545, "grad_norm": 2.1015725135803223, "learning_rate": 5.240000000000001e-06, "loss": 1.524, "step": 262 }, { "epoch": 0.042937022978653934, "grad_norm": 2.618940830230713, "learning_rate": 5.2600000000000005e-06, "loss": 1.6965, "step": 263 }, { "epoch": 0.04310028162115832, "grad_norm": 2.099616527557373, "learning_rate": 5.28e-06, "loss": 1.4804, "step": 264 }, { "epoch": 0.043263540263662706, "grad_norm": 2.286447525024414, "learning_rate": 5.300000000000001e-06, "loss": 1.5715, "step": 265 }, { "epoch": 0.043426798906167095, "grad_norm": 2.096890926361084, "learning_rate": 5.320000000000001e-06, "loss": 1.5341, "step": 266 }, { "epoch": 0.043590057548671485, "grad_norm": 2.278791904449463, "learning_rate": 5.3400000000000005e-06, "loss": 1.3746, "step": 267 }, { "epoch": 0.04375331619117587, "grad_norm": 2.0742835998535156, "learning_rate": 5.36e-06, "loss": 1.3388, "step": 268 }, { "epoch": 0.04391657483368026, "grad_norm": 2.197054624557495, "learning_rate": 5.380000000000001e-06, "loss": 1.6083, "step": 269 }, { "epoch": 0.044079833476184646, "grad_norm": 2.1146371364593506, "learning_rate": 5.400000000000001e-06, "loss": 1.4871, "step": 270 }, { "epoch": 0.044243092118689036, "grad_norm": 2.154447317123413, "learning_rate": 5.420000000000001e-06, "loss": 1.6272, "step": 271 }, { "epoch": 0.04440635076119342, "grad_norm": 2.0834007263183594, "learning_rate": 5.4400000000000004e-06, "loss": 1.3383, "step": 272 }, { "epoch": 0.04456960940369781, "grad_norm": 2.0731825828552246, "learning_rate": 5.460000000000001e-06, "loss": 1.4728, "step": 273 }, { "epoch": 0.0447328680462022, "grad_norm": 2.3021061420440674, "learning_rate": 5.480000000000001e-06, "loss": 1.7726, "step": 274 }, { "epoch": 0.044896126688706586, "grad_norm": 1.999867558479309, "learning_rate": 5.500000000000001e-06, "loss": 1.47, "step": 275 }, { "epoch": 0.04505938533121097, "grad_norm": 2.146240711212158, "learning_rate": 5.5200000000000005e-06, "loss": 1.5197, "step": 276 }, { "epoch": 0.04522264397371536, "grad_norm": 2.3582894802093506, "learning_rate": 5.540000000000001e-06, "loss": 1.3452, "step": 277 }, { "epoch": 0.04538590261621975, "grad_norm": 2.2777888774871826, "learning_rate": 5.560000000000001e-06, "loss": 1.5316, "step": 278 }, { "epoch": 0.04554916125872414, "grad_norm": 2.1270227432250977, "learning_rate": 5.580000000000001e-06, "loss": 1.4538, "step": 279 }, { "epoch": 0.04571241990122852, "grad_norm": 2.0616724491119385, "learning_rate": 5.600000000000001e-06, "loss": 1.3011, "step": 280 }, { "epoch": 0.04587567854373291, "grad_norm": 2.2308924198150635, "learning_rate": 5.620000000000001e-06, "loss": 1.3037, "step": 281 }, { "epoch": 0.0460389371862373, "grad_norm": 2.285849094390869, "learning_rate": 5.64e-06, "loss": 1.3813, "step": 282 }, { "epoch": 0.04620219582874168, "grad_norm": 2.205855131149292, "learning_rate": 5.66e-06, "loss": 1.5657, "step": 283 }, { "epoch": 0.04636545447124607, "grad_norm": 2.2026219367980957, "learning_rate": 5.68e-06, "loss": 1.4891, "step": 284 }, { "epoch": 0.04652871311375046, "grad_norm": 2.3156931400299072, "learning_rate": 5.7e-06, "loss": 1.4659, "step": 285 }, { "epoch": 0.04669197175625485, "grad_norm": 2.173173666000366, "learning_rate": 5.72e-06, "loss": 1.4059, "step": 286 }, { "epoch": 0.04685523039875923, "grad_norm": 2.629859447479248, "learning_rate": 5.74e-06, "loss": 1.5386, "step": 287 }, { "epoch": 0.04701848904126362, "grad_norm": 2.1228151321411133, "learning_rate": 5.76e-06, "loss": 1.3514, "step": 288 }, { "epoch": 0.04718174768376801, "grad_norm": 2.010709524154663, "learning_rate": 5.78e-06, "loss": 1.3505, "step": 289 }, { "epoch": 0.0473450063262724, "grad_norm": 2.4754767417907715, "learning_rate": 5.8e-06, "loss": 1.6201, "step": 290 }, { "epoch": 0.04750826496877678, "grad_norm": 2.2102432250976562, "learning_rate": 5.82e-06, "loss": 1.4927, "step": 291 }, { "epoch": 0.04767152361128117, "grad_norm": 2.2376999855041504, "learning_rate": 5.84e-06, "loss": 1.5402, "step": 292 }, { "epoch": 0.04783478225378556, "grad_norm": 2.2875428199768066, "learning_rate": 5.86e-06, "loss": 1.4596, "step": 293 }, { "epoch": 0.04799804089628995, "grad_norm": 2.315011739730835, "learning_rate": 5.8800000000000005e-06, "loss": 1.3417, "step": 294 }, { "epoch": 0.04816129953879433, "grad_norm": 2.457437753677368, "learning_rate": 5.9e-06, "loss": 1.6215, "step": 295 }, { "epoch": 0.04832455818129872, "grad_norm": 2.4522922039031982, "learning_rate": 5.92e-06, "loss": 1.6354, "step": 296 }, { "epoch": 0.04848781682380311, "grad_norm": 2.2293028831481934, "learning_rate": 5.94e-06, "loss": 1.4854, "step": 297 }, { "epoch": 0.048651075466307495, "grad_norm": 2.1793153285980225, "learning_rate": 5.9600000000000005e-06, "loss": 1.2794, "step": 298 }, { "epoch": 0.048814334108811884, "grad_norm": 2.2398061752319336, "learning_rate": 5.98e-06, "loss": 1.5334, "step": 299 }, { "epoch": 0.04897759275131627, "grad_norm": 2.171593427658081, "learning_rate": 6e-06, "loss": 1.584, "step": 300 }, { "epoch": 0.04914085139382066, "grad_norm": 2.1772985458374023, "learning_rate": 6.02e-06, "loss": 1.4581, "step": 301 }, { "epoch": 0.049304110036325045, "grad_norm": 2.2488367557525635, "learning_rate": 6.040000000000001e-06, "loss": 1.3559, "step": 302 }, { "epoch": 0.049467368678829435, "grad_norm": 2.2670233249664307, "learning_rate": 6.0600000000000004e-06, "loss": 1.4895, "step": 303 }, { "epoch": 0.049630627321333824, "grad_norm": 2.404139757156372, "learning_rate": 6.08e-06, "loss": 1.7236, "step": 304 }, { "epoch": 0.049793885963838214, "grad_norm": 2.2703702449798584, "learning_rate": 6.1e-06, "loss": 1.4239, "step": 305 }, { "epoch": 0.049957144606342596, "grad_norm": 2.321847677230835, "learning_rate": 6.120000000000001e-06, "loss": 1.6767, "step": 306 }, { "epoch": 0.050120403248846986, "grad_norm": 2.3427727222442627, "learning_rate": 6.1400000000000005e-06, "loss": 1.3607, "step": 307 }, { "epoch": 0.050283661891351375, "grad_norm": 2.3473243713378906, "learning_rate": 6.16e-06, "loss": 1.5959, "step": 308 }, { "epoch": 0.050446920533855764, "grad_norm": 2.279501438140869, "learning_rate": 6.18e-06, "loss": 1.4615, "step": 309 }, { "epoch": 0.05061017917636015, "grad_norm": 2.7622315883636475, "learning_rate": 6.200000000000001e-06, "loss": 1.7203, "step": 310 }, { "epoch": 0.050773437818864536, "grad_norm": 2.2716753482818604, "learning_rate": 6.220000000000001e-06, "loss": 1.7341, "step": 311 }, { "epoch": 0.050936696461368926, "grad_norm": 2.655374765396118, "learning_rate": 6.24e-06, "loss": 1.6484, "step": 312 }, { "epoch": 0.05109995510387331, "grad_norm": 2.2065420150756836, "learning_rate": 6.26e-06, "loss": 1.2938, "step": 313 }, { "epoch": 0.0512632137463777, "grad_norm": 2.4770634174346924, "learning_rate": 6.280000000000001e-06, "loss": 1.4769, "step": 314 }, { "epoch": 0.05142647238888209, "grad_norm": 2.4592158794403076, "learning_rate": 6.300000000000001e-06, "loss": 1.6962, "step": 315 }, { "epoch": 0.05158973103138648, "grad_norm": 2.4060866832733154, "learning_rate": 6.3200000000000005e-06, "loss": 1.4304, "step": 316 }, { "epoch": 0.05175298967389086, "grad_norm": 2.451901912689209, "learning_rate": 6.34e-06, "loss": 1.5102, "step": 317 }, { "epoch": 0.05191624831639525, "grad_norm": 2.594799280166626, "learning_rate": 6.360000000000001e-06, "loss": 1.565, "step": 318 }, { "epoch": 0.05207950695889964, "grad_norm": 2.23992657661438, "learning_rate": 6.380000000000001e-06, "loss": 1.6632, "step": 319 }, { "epoch": 0.05224276560140403, "grad_norm": 2.331944465637207, "learning_rate": 6.4000000000000006e-06, "loss": 1.4018, "step": 320 }, { "epoch": 0.05240602424390841, "grad_norm": 2.261958360671997, "learning_rate": 6.42e-06, "loss": 1.433, "step": 321 }, { "epoch": 0.0525692828864128, "grad_norm": 2.4543161392211914, "learning_rate": 6.440000000000001e-06, "loss": 1.5128, "step": 322 }, { "epoch": 0.05273254152891719, "grad_norm": 2.508803367614746, "learning_rate": 6.460000000000001e-06, "loss": 1.2861, "step": 323 }, { "epoch": 0.05289580017142158, "grad_norm": 2.4078011512756348, "learning_rate": 6.480000000000001e-06, "loss": 1.5813, "step": 324 }, { "epoch": 0.05305905881392596, "grad_norm": 2.2360177040100098, "learning_rate": 6.5000000000000004e-06, "loss": 1.4354, "step": 325 }, { "epoch": 0.05322231745643035, "grad_norm": 2.5228071212768555, "learning_rate": 6.520000000000001e-06, "loss": 1.6744, "step": 326 }, { "epoch": 0.05338557609893474, "grad_norm": 2.458150863647461, "learning_rate": 6.540000000000001e-06, "loss": 1.6438, "step": 327 }, { "epoch": 0.05354883474143912, "grad_norm": 2.1806211471557617, "learning_rate": 6.560000000000001e-06, "loss": 1.3157, "step": 328 }, { "epoch": 0.05371209338394351, "grad_norm": 2.550381660461426, "learning_rate": 6.5800000000000005e-06, "loss": 1.3423, "step": 329 }, { "epoch": 0.0538753520264479, "grad_norm": 2.469952344894409, "learning_rate": 6.600000000000001e-06, "loss": 1.5404, "step": 330 }, { "epoch": 0.05403861066895229, "grad_norm": 2.6957924365997314, "learning_rate": 6.620000000000001e-06, "loss": 1.6478, "step": 331 }, { "epoch": 0.05420186931145667, "grad_norm": 2.4324185848236084, "learning_rate": 6.640000000000001e-06, "loss": 1.3951, "step": 332 }, { "epoch": 0.05436512795396106, "grad_norm": 2.6638412475585938, "learning_rate": 6.660000000000001e-06, "loss": 1.5616, "step": 333 }, { "epoch": 0.05452838659646545, "grad_norm": 2.4595937728881836, "learning_rate": 6.680000000000001e-06, "loss": 1.5886, "step": 334 }, { "epoch": 0.05469164523896984, "grad_norm": 2.7240712642669678, "learning_rate": 6.700000000000001e-06, "loss": 1.5621, "step": 335 }, { "epoch": 0.054854903881474223, "grad_norm": 2.477067232131958, "learning_rate": 6.720000000000001e-06, "loss": 1.5704, "step": 336 }, { "epoch": 0.05501816252397861, "grad_norm": 2.3289954662323, "learning_rate": 6.740000000000001e-06, "loss": 1.4563, "step": 337 }, { "epoch": 0.055181421166483, "grad_norm": 2.7271056175231934, "learning_rate": 6.760000000000001e-06, "loss": 1.4805, "step": 338 }, { "epoch": 0.055344679808987385, "grad_norm": 2.5240917205810547, "learning_rate": 6.780000000000001e-06, "loss": 1.4755, "step": 339 }, { "epoch": 0.055507938451491774, "grad_norm": 2.4470040798187256, "learning_rate": 6.800000000000001e-06, "loss": 1.5445, "step": 340 }, { "epoch": 0.055671197093996164, "grad_norm": 2.5847697257995605, "learning_rate": 6.820000000000001e-06, "loss": 1.2898, "step": 341 }, { "epoch": 0.05583445573650055, "grad_norm": 2.506521463394165, "learning_rate": 6.8400000000000014e-06, "loss": 1.5311, "step": 342 }, { "epoch": 0.055997714379004936, "grad_norm": 2.5028300285339355, "learning_rate": 6.860000000000001e-06, "loss": 1.8046, "step": 343 }, { "epoch": 0.056160973021509325, "grad_norm": 2.8069305419921875, "learning_rate": 6.88e-06, "loss": 1.5795, "step": 344 }, { "epoch": 0.056324231664013714, "grad_norm": 2.50437593460083, "learning_rate": 6.9e-06, "loss": 1.502, "step": 345 }, { "epoch": 0.056487490306518104, "grad_norm": 2.4879496097564697, "learning_rate": 6.92e-06, "loss": 1.5581, "step": 346 }, { "epoch": 0.056650748949022486, "grad_norm": 2.6722776889801025, "learning_rate": 6.9400000000000005e-06, "loss": 1.5513, "step": 347 }, { "epoch": 0.056814007591526876, "grad_norm": 2.547714948654175, "learning_rate": 6.96e-06, "loss": 1.442, "step": 348 }, { "epoch": 0.056977266234031265, "grad_norm": 2.3801822662353516, "learning_rate": 6.98e-06, "loss": 1.195, "step": 349 }, { "epoch": 0.057140524876535655, "grad_norm": 2.727755308151245, "learning_rate": 7e-06, "loss": 1.4814, "step": 350 }, { "epoch": 0.05730378351904004, "grad_norm": 2.4006495475769043, "learning_rate": 7.0200000000000006e-06, "loss": 1.3556, "step": 351 }, { "epoch": 0.05746704216154443, "grad_norm": 2.443844795227051, "learning_rate": 7.04e-06, "loss": 1.4449, "step": 352 }, { "epoch": 0.057630300804048816, "grad_norm": 2.5353009700775146, "learning_rate": 7.06e-06, "loss": 1.4135, "step": 353 }, { "epoch": 0.0577935594465532, "grad_norm": 2.6511318683624268, "learning_rate": 7.08e-06, "loss": 1.5074, "step": 354 }, { "epoch": 0.05795681808905759, "grad_norm": 2.413559675216675, "learning_rate": 7.100000000000001e-06, "loss": 1.3272, "step": 355 }, { "epoch": 0.05812007673156198, "grad_norm": 2.636533260345459, "learning_rate": 7.1200000000000004e-06, "loss": 1.7045, "step": 356 }, { "epoch": 0.05828333537406637, "grad_norm": 2.4086594581604004, "learning_rate": 7.14e-06, "loss": 1.2961, "step": 357 }, { "epoch": 0.05844659401657075, "grad_norm": 2.259873151779175, "learning_rate": 7.16e-06, "loss": 1.273, "step": 358 }, { "epoch": 0.05860985265907514, "grad_norm": 2.501580238342285, "learning_rate": 7.180000000000001e-06, "loss": 1.2734, "step": 359 }, { "epoch": 0.05877311130157953, "grad_norm": 2.5535762310028076, "learning_rate": 7.2000000000000005e-06, "loss": 1.2926, "step": 360 }, { "epoch": 0.05893636994408392, "grad_norm": 2.862374782562256, "learning_rate": 7.22e-06, "loss": 1.5747, "step": 361 }, { "epoch": 0.0590996285865883, "grad_norm": 2.627467155456543, "learning_rate": 7.24e-06, "loss": 1.4183, "step": 362 }, { "epoch": 0.05926288722909269, "grad_norm": 2.8779447078704834, "learning_rate": 7.260000000000001e-06, "loss": 1.5363, "step": 363 }, { "epoch": 0.05942614587159708, "grad_norm": 2.5978190898895264, "learning_rate": 7.280000000000001e-06, "loss": 1.3884, "step": 364 }, { "epoch": 0.05958940451410147, "grad_norm": 2.5822701454162598, "learning_rate": 7.3e-06, "loss": 1.3592, "step": 365 }, { "epoch": 0.05975266315660585, "grad_norm": 2.644973039627075, "learning_rate": 7.32e-06, "loss": 1.5729, "step": 366 }, { "epoch": 0.05991592179911024, "grad_norm": 2.751716136932373, "learning_rate": 7.340000000000001e-06, "loss": 1.477, "step": 367 }, { "epoch": 0.06007918044161463, "grad_norm": 2.790705680847168, "learning_rate": 7.360000000000001e-06, "loss": 1.4642, "step": 368 }, { "epoch": 0.06024243908411901, "grad_norm": 2.7860724925994873, "learning_rate": 7.3800000000000005e-06, "loss": 1.4554, "step": 369 }, { "epoch": 0.0604056977266234, "grad_norm": 2.6911513805389404, "learning_rate": 7.4e-06, "loss": 1.5444, "step": 370 }, { "epoch": 0.06056895636912779, "grad_norm": 2.6443896293640137, "learning_rate": 7.420000000000001e-06, "loss": 1.4506, "step": 371 }, { "epoch": 0.06073221501163218, "grad_norm": 2.6191112995147705, "learning_rate": 7.440000000000001e-06, "loss": 1.2842, "step": 372 }, { "epoch": 0.06089547365413656, "grad_norm": 2.9263150691986084, "learning_rate": 7.4600000000000006e-06, "loss": 1.4913, "step": 373 }, { "epoch": 0.06105873229664095, "grad_norm": 2.6348788738250732, "learning_rate": 7.48e-06, "loss": 1.494, "step": 374 }, { "epoch": 0.06122199093914534, "grad_norm": 2.757462501525879, "learning_rate": 7.500000000000001e-06, "loss": 1.3891, "step": 375 }, { "epoch": 0.06138524958164973, "grad_norm": 2.6230034828186035, "learning_rate": 7.520000000000001e-06, "loss": 1.6621, "step": 376 }, { "epoch": 0.061548508224154114, "grad_norm": 2.8606441020965576, "learning_rate": 7.540000000000001e-06, "loss": 1.4969, "step": 377 }, { "epoch": 0.0617117668666585, "grad_norm": 2.6721103191375732, "learning_rate": 7.5600000000000005e-06, "loss": 1.4126, "step": 378 }, { "epoch": 0.06187502550916289, "grad_norm": 2.640225648880005, "learning_rate": 7.58e-06, "loss": 1.3784, "step": 379 }, { "epoch": 0.06203828415166728, "grad_norm": 2.700697422027588, "learning_rate": 7.600000000000001e-06, "loss": 1.4802, "step": 380 }, { "epoch": 0.062201542794171664, "grad_norm": 2.5984110832214355, "learning_rate": 7.620000000000001e-06, "loss": 1.2925, "step": 381 }, { "epoch": 0.062364801436676054, "grad_norm": 2.8745505809783936, "learning_rate": 7.640000000000001e-06, "loss": 1.5507, "step": 382 }, { "epoch": 0.06252806007918044, "grad_norm": 2.6033518314361572, "learning_rate": 7.660000000000001e-06, "loss": 1.2372, "step": 383 }, { "epoch": 0.06269131872168483, "grad_norm": 2.7920680046081543, "learning_rate": 7.680000000000001e-06, "loss": 1.473, "step": 384 }, { "epoch": 0.06285457736418922, "grad_norm": 2.5285048484802246, "learning_rate": 7.7e-06, "loss": 1.4596, "step": 385 }, { "epoch": 0.0630178360066936, "grad_norm": 2.6343798637390137, "learning_rate": 7.72e-06, "loss": 1.5723, "step": 386 }, { "epoch": 0.063181094649198, "grad_norm": 2.7174503803253174, "learning_rate": 7.74e-06, "loss": 1.5692, "step": 387 }, { "epoch": 0.06334435329170238, "grad_norm": 2.7852280139923096, "learning_rate": 7.76e-06, "loss": 1.2888, "step": 388 }, { "epoch": 0.06350761193420677, "grad_norm": 2.6421947479248047, "learning_rate": 7.78e-06, "loss": 1.3813, "step": 389 }, { "epoch": 0.06367087057671116, "grad_norm": 2.7372682094573975, "learning_rate": 7.800000000000002e-06, "loss": 1.4658, "step": 390 }, { "epoch": 0.06383412921921554, "grad_norm": 2.884885549545288, "learning_rate": 7.820000000000001e-06, "loss": 1.5573, "step": 391 }, { "epoch": 0.06399738786171993, "grad_norm": 2.7295639514923096, "learning_rate": 7.840000000000001e-06, "loss": 1.2556, "step": 392 }, { "epoch": 0.06416064650422432, "grad_norm": 2.655609130859375, "learning_rate": 7.860000000000001e-06, "loss": 1.3525, "step": 393 }, { "epoch": 0.0643239051467287, "grad_norm": 2.7824413776397705, "learning_rate": 7.88e-06, "loss": 1.4289, "step": 394 }, { "epoch": 0.0644871637892331, "grad_norm": 3.0502681732177734, "learning_rate": 7.9e-06, "loss": 1.6998, "step": 395 }, { "epoch": 0.06465042243173748, "grad_norm": 2.997398614883423, "learning_rate": 7.92e-06, "loss": 1.62, "step": 396 }, { "epoch": 0.06481368107424187, "grad_norm": 2.993884325027466, "learning_rate": 7.94e-06, "loss": 1.6995, "step": 397 }, { "epoch": 0.06497693971674626, "grad_norm": 2.670727491378784, "learning_rate": 7.960000000000002e-06, "loss": 1.257, "step": 398 }, { "epoch": 0.06514019835925064, "grad_norm": 2.7382490634918213, "learning_rate": 7.980000000000002e-06, "loss": 1.2744, "step": 399 }, { "epoch": 0.06530345700175504, "grad_norm": 3.0625598430633545, "learning_rate": 8.000000000000001e-06, "loss": 1.5504, "step": 400 }, { "epoch": 0.06546671564425942, "grad_norm": 2.885852813720703, "learning_rate": 8.020000000000001e-06, "loss": 1.4128, "step": 401 }, { "epoch": 0.0656299742867638, "grad_norm": 2.7422409057617188, "learning_rate": 8.040000000000001e-06, "loss": 1.4327, "step": 402 }, { "epoch": 0.0657932329292682, "grad_norm": 2.791187047958374, "learning_rate": 8.06e-06, "loss": 1.4903, "step": 403 }, { "epoch": 0.06595649157177258, "grad_norm": 2.772167205810547, "learning_rate": 8.08e-06, "loss": 1.2936, "step": 404 }, { "epoch": 0.06611975021427696, "grad_norm": 2.790634870529175, "learning_rate": 8.1e-06, "loss": 1.4601, "step": 405 }, { "epoch": 0.06628300885678136, "grad_norm": 2.9451613426208496, "learning_rate": 8.120000000000002e-06, "loss": 1.4153, "step": 406 }, { "epoch": 0.06644626749928574, "grad_norm": 2.621548652648926, "learning_rate": 8.14e-06, "loss": 1.3082, "step": 407 }, { "epoch": 0.06660952614179014, "grad_norm": 2.5288751125335693, "learning_rate": 8.16e-06, "loss": 1.3629, "step": 408 }, { "epoch": 0.06677278478429452, "grad_norm": 2.7223217487335205, "learning_rate": 8.18e-06, "loss": 1.4146, "step": 409 }, { "epoch": 0.0669360434267989, "grad_norm": 2.724846124649048, "learning_rate": 8.2e-06, "loss": 1.2224, "step": 410 }, { "epoch": 0.0670993020693033, "grad_norm": 2.7524595260620117, "learning_rate": 8.220000000000001e-06, "loss": 1.3819, "step": 411 }, { "epoch": 0.06726256071180768, "grad_norm": 3.1960606575012207, "learning_rate": 8.24e-06, "loss": 1.3851, "step": 412 }, { "epoch": 0.06742581935431206, "grad_norm": 2.6458628177642822, "learning_rate": 8.26e-06, "loss": 1.2454, "step": 413 }, { "epoch": 0.06758907799681646, "grad_norm": 3.183701515197754, "learning_rate": 8.28e-06, "loss": 1.5646, "step": 414 }, { "epoch": 0.06775233663932084, "grad_norm": 2.943263053894043, "learning_rate": 8.3e-06, "loss": 1.314, "step": 415 }, { "epoch": 0.06791559528182523, "grad_norm": 2.9373369216918945, "learning_rate": 8.32e-06, "loss": 1.3645, "step": 416 }, { "epoch": 0.06807885392432962, "grad_norm": 3.0232484340667725, "learning_rate": 8.34e-06, "loss": 1.4363, "step": 417 }, { "epoch": 0.068242112566834, "grad_norm": 2.870512008666992, "learning_rate": 8.36e-06, "loss": 1.2403, "step": 418 }, { "epoch": 0.0684053712093384, "grad_norm": 3.1560239791870117, "learning_rate": 8.380000000000001e-06, "loss": 1.318, "step": 419 }, { "epoch": 0.06856862985184278, "grad_norm": 3.3922929763793945, "learning_rate": 8.400000000000001e-06, "loss": 1.3557, "step": 420 }, { "epoch": 0.06873188849434717, "grad_norm": 2.954237937927246, "learning_rate": 8.42e-06, "loss": 1.3034, "step": 421 }, { "epoch": 0.06889514713685156, "grad_norm": 3.2904584407806396, "learning_rate": 8.44e-06, "loss": 1.5187, "step": 422 }, { "epoch": 0.06905840577935594, "grad_norm": 2.899125099182129, "learning_rate": 8.46e-06, "loss": 1.4574, "step": 423 }, { "epoch": 0.06922166442186033, "grad_norm": 2.6708061695098877, "learning_rate": 8.48e-06, "loss": 1.2537, "step": 424 }, { "epoch": 0.06938492306436472, "grad_norm": 2.785524845123291, "learning_rate": 8.5e-06, "loss": 1.3818, "step": 425 }, { "epoch": 0.0695481817068691, "grad_norm": 2.7958033084869385, "learning_rate": 8.52e-06, "loss": 1.4467, "step": 426 }, { "epoch": 0.0697114403493735, "grad_norm": 2.9418132305145264, "learning_rate": 8.540000000000001e-06, "loss": 1.2244, "step": 427 }, { "epoch": 0.06987469899187788, "grad_norm": 2.590224266052246, "learning_rate": 8.560000000000001e-06, "loss": 1.0869, "step": 428 }, { "epoch": 0.07003795763438227, "grad_norm": 3.052757978439331, "learning_rate": 8.580000000000001e-06, "loss": 1.3494, "step": 429 }, { "epoch": 0.07020121627688666, "grad_norm": 3.095374345779419, "learning_rate": 8.6e-06, "loss": 1.4214, "step": 430 }, { "epoch": 0.07036447491939105, "grad_norm": 2.8665754795074463, "learning_rate": 8.62e-06, "loss": 1.4754, "step": 431 }, { "epoch": 0.07052773356189543, "grad_norm": 3.1334075927734375, "learning_rate": 8.64e-06, "loss": 1.5536, "step": 432 }, { "epoch": 0.07069099220439982, "grad_norm": 2.9876809120178223, "learning_rate": 8.66e-06, "loss": 1.4855, "step": 433 }, { "epoch": 0.07085425084690421, "grad_norm": 3.063946485519409, "learning_rate": 8.68e-06, "loss": 1.5934, "step": 434 }, { "epoch": 0.07101750948940859, "grad_norm": 3.2672183513641357, "learning_rate": 8.700000000000001e-06, "loss": 1.4605, "step": 435 }, { "epoch": 0.07118076813191299, "grad_norm": 3.0972812175750732, "learning_rate": 8.720000000000001e-06, "loss": 1.3961, "step": 436 }, { "epoch": 0.07134402677441737, "grad_norm": 3.1427392959594727, "learning_rate": 8.740000000000001e-06, "loss": 1.321, "step": 437 }, { "epoch": 0.07150728541692176, "grad_norm": 2.890151262283325, "learning_rate": 8.76e-06, "loss": 1.1651, "step": 438 }, { "epoch": 0.07167054405942615, "grad_norm": 3.071375846862793, "learning_rate": 8.78e-06, "loss": 1.4452, "step": 439 }, { "epoch": 0.07183380270193053, "grad_norm": 3.14211368560791, "learning_rate": 8.8e-06, "loss": 1.26, "step": 440 }, { "epoch": 0.07199706134443493, "grad_norm": 3.0328757762908936, "learning_rate": 8.82e-06, "loss": 1.4798, "step": 441 }, { "epoch": 0.07216031998693931, "grad_norm": 3.4091618061065674, "learning_rate": 8.84e-06, "loss": 1.3895, "step": 442 }, { "epoch": 0.07232357862944369, "grad_norm": 2.9544448852539062, "learning_rate": 8.860000000000002e-06, "loss": 1.3602, "step": 443 }, { "epoch": 0.07248683727194809, "grad_norm": 2.9379982948303223, "learning_rate": 8.880000000000001e-06, "loss": 1.3137, "step": 444 }, { "epoch": 0.07265009591445247, "grad_norm": 2.79106068611145, "learning_rate": 8.900000000000001e-06, "loss": 1.2034, "step": 445 }, { "epoch": 0.07281335455695685, "grad_norm": 2.9215550422668457, "learning_rate": 8.920000000000001e-06, "loss": 1.3095, "step": 446 }, { "epoch": 0.07297661319946125, "grad_norm": 3.0184879302978516, "learning_rate": 8.94e-06, "loss": 1.4593, "step": 447 }, { "epoch": 0.07313987184196563, "grad_norm": 2.9248905181884766, "learning_rate": 8.96e-06, "loss": 1.3182, "step": 448 }, { "epoch": 0.07330313048447003, "grad_norm": 2.9369287490844727, "learning_rate": 8.98e-06, "loss": 1.5302, "step": 449 }, { "epoch": 0.07346638912697441, "grad_norm": 2.854358196258545, "learning_rate": 9e-06, "loss": 1.3437, "step": 450 }, { "epoch": 0.07362964776947879, "grad_norm": 3.129603862762451, "learning_rate": 9.020000000000002e-06, "loss": 1.5339, "step": 451 }, { "epoch": 0.07379290641198319, "grad_norm": 2.9627983570098877, "learning_rate": 9.040000000000002e-06, "loss": 1.4659, "step": 452 }, { "epoch": 0.07395616505448757, "grad_norm": 2.7088677883148193, "learning_rate": 9.060000000000001e-06, "loss": 1.1543, "step": 453 }, { "epoch": 0.07411942369699195, "grad_norm": 2.916685104370117, "learning_rate": 9.080000000000001e-06, "loss": 1.2276, "step": 454 }, { "epoch": 0.07428268233949635, "grad_norm": 3.167635917663574, "learning_rate": 9.100000000000001e-06, "loss": 1.4021, "step": 455 }, { "epoch": 0.07444594098200073, "grad_norm": 2.961355686187744, "learning_rate": 9.12e-06, "loss": 1.2709, "step": 456 }, { "epoch": 0.07460919962450512, "grad_norm": 3.2109711170196533, "learning_rate": 9.14e-06, "loss": 1.4573, "step": 457 }, { "epoch": 0.07477245826700951, "grad_norm": 3.1851608753204346, "learning_rate": 9.16e-06, "loss": 1.4151, "step": 458 }, { "epoch": 0.0749357169095139, "grad_norm": 3.2694168090820312, "learning_rate": 9.180000000000002e-06, "loss": 1.3058, "step": 459 }, { "epoch": 0.07509897555201829, "grad_norm": 3.253614664077759, "learning_rate": 9.200000000000002e-06, "loss": 1.4023, "step": 460 }, { "epoch": 0.07526223419452267, "grad_norm": 3.101839303970337, "learning_rate": 9.220000000000002e-06, "loss": 1.3672, "step": 461 }, { "epoch": 0.07542549283702706, "grad_norm": 3.2639553546905518, "learning_rate": 9.240000000000001e-06, "loss": 1.505, "step": 462 }, { "epoch": 0.07558875147953145, "grad_norm": 3.207331657409668, "learning_rate": 9.260000000000001e-06, "loss": 1.2685, "step": 463 }, { "epoch": 0.07575201012203583, "grad_norm": 3.022047996520996, "learning_rate": 9.280000000000001e-06, "loss": 1.3089, "step": 464 }, { "epoch": 0.07591526876454022, "grad_norm": 3.0509884357452393, "learning_rate": 9.3e-06, "loss": 1.3489, "step": 465 }, { "epoch": 0.07607852740704461, "grad_norm": 2.99104905128479, "learning_rate": 9.32e-06, "loss": 1.3157, "step": 466 }, { "epoch": 0.076241786049549, "grad_norm": 3.282782554626465, "learning_rate": 9.340000000000002e-06, "loss": 1.403, "step": 467 }, { "epoch": 0.07640504469205339, "grad_norm": 3.1978933811187744, "learning_rate": 9.360000000000002e-06, "loss": 1.4839, "step": 468 }, { "epoch": 0.07656830333455777, "grad_norm": 3.065716028213501, "learning_rate": 9.38e-06, "loss": 1.2391, "step": 469 }, { "epoch": 0.07673156197706216, "grad_norm": 3.165813446044922, "learning_rate": 9.4e-06, "loss": 1.444, "step": 470 }, { "epoch": 0.07689482061956655, "grad_norm": 3.0755069255828857, "learning_rate": 9.42e-06, "loss": 1.3583, "step": 471 }, { "epoch": 0.07705807926207094, "grad_norm": 2.9595906734466553, "learning_rate": 9.440000000000001e-06, "loss": 1.3638, "step": 472 }, { "epoch": 0.07722133790457532, "grad_norm": 3.4928812980651855, "learning_rate": 9.460000000000001e-06, "loss": 1.5617, "step": 473 }, { "epoch": 0.07738459654707971, "grad_norm": 3.6254820823669434, "learning_rate": 9.48e-06, "loss": 1.7046, "step": 474 }, { "epoch": 0.0775478551895841, "grad_norm": 3.7804698944091797, "learning_rate": 9.5e-06, "loss": 1.3981, "step": 475 }, { "epoch": 0.07771111383208848, "grad_norm": 3.49983549118042, "learning_rate": 9.52e-06, "loss": 1.318, "step": 476 }, { "epoch": 0.07787437247459288, "grad_norm": 2.9219887256622314, "learning_rate": 9.54e-06, "loss": 1.3073, "step": 477 }, { "epoch": 0.07803763111709726, "grad_norm": 3.146648645401001, "learning_rate": 9.56e-06, "loss": 1.471, "step": 478 }, { "epoch": 0.07820088975960166, "grad_norm": 3.016389846801758, "learning_rate": 9.58e-06, "loss": 1.3406, "step": 479 }, { "epoch": 0.07836414840210604, "grad_norm": 3.0727014541625977, "learning_rate": 9.600000000000001e-06, "loss": 1.2241, "step": 480 }, { "epoch": 0.07852740704461042, "grad_norm": 2.8200032711029053, "learning_rate": 9.620000000000001e-06, "loss": 1.425, "step": 481 }, { "epoch": 0.07869066568711482, "grad_norm": 2.958997964859009, "learning_rate": 9.640000000000001e-06, "loss": 1.3275, "step": 482 }, { "epoch": 0.0788539243296192, "grad_norm": 2.9883453845977783, "learning_rate": 9.66e-06, "loss": 1.363, "step": 483 }, { "epoch": 0.07901718297212358, "grad_norm": 3.167055368423462, "learning_rate": 9.68e-06, "loss": 1.4759, "step": 484 }, { "epoch": 0.07918044161462798, "grad_norm": 3.129673719406128, "learning_rate": 9.7e-06, "loss": 1.3244, "step": 485 }, { "epoch": 0.07934370025713236, "grad_norm": 2.9256749153137207, "learning_rate": 9.72e-06, "loss": 1.2311, "step": 486 }, { "epoch": 0.07950695889963674, "grad_norm": 3.339545965194702, "learning_rate": 9.74e-06, "loss": 1.3116, "step": 487 }, { "epoch": 0.07967021754214114, "grad_norm": 3.050513744354248, "learning_rate": 9.760000000000001e-06, "loss": 1.2218, "step": 488 }, { "epoch": 0.07983347618464552, "grad_norm": 2.859565019607544, "learning_rate": 9.780000000000001e-06, "loss": 1.3826, "step": 489 }, { "epoch": 0.07999673482714992, "grad_norm": 3.107679605484009, "learning_rate": 9.800000000000001e-06, "loss": 1.3026, "step": 490 }, { "epoch": 0.0801599934696543, "grad_norm": 2.9991846084594727, "learning_rate": 9.820000000000001e-06, "loss": 1.2416, "step": 491 }, { "epoch": 0.08032325211215868, "grad_norm": 3.2471935749053955, "learning_rate": 9.84e-06, "loss": 1.5191, "step": 492 }, { "epoch": 0.08048651075466308, "grad_norm": 2.8939528465270996, "learning_rate": 9.86e-06, "loss": 1.2183, "step": 493 }, { "epoch": 0.08064976939716746, "grad_norm": 3.0562875270843506, "learning_rate": 9.88e-06, "loss": 1.1455, "step": 494 }, { "epoch": 0.08081302803967184, "grad_norm": 2.8149259090423584, "learning_rate": 9.9e-06, "loss": 1.1204, "step": 495 }, { "epoch": 0.08097628668217624, "grad_norm": 3.1184122562408447, "learning_rate": 9.920000000000002e-06, "loss": 1.2682, "step": 496 }, { "epoch": 0.08113954532468062, "grad_norm": 3.185497283935547, "learning_rate": 9.940000000000001e-06, "loss": 1.3927, "step": 497 }, { "epoch": 0.08130280396718502, "grad_norm": 3.6683290004730225, "learning_rate": 9.960000000000001e-06, "loss": 1.5153, "step": 498 }, { "epoch": 0.0814660626096894, "grad_norm": 3.092473030090332, "learning_rate": 9.980000000000001e-06, "loss": 1.3705, "step": 499 }, { "epoch": 0.08162932125219378, "grad_norm": 3.115736484527588, "learning_rate": 1e-05, "loss": 1.4116, "step": 500 }, { "epoch": 0.08179257989469818, "grad_norm": 3.2403082847595215, "learning_rate": 1.002e-05, "loss": 1.5052, "step": 501 }, { "epoch": 0.08195583853720256, "grad_norm": 3.2402281761169434, "learning_rate": 1.004e-05, "loss": 1.4701, "step": 502 }, { "epoch": 0.08211909717970695, "grad_norm": 3.3855080604553223, "learning_rate": 1.006e-05, "loss": 1.2916, "step": 503 }, { "epoch": 0.08228235582221134, "grad_norm": 3.316054582595825, "learning_rate": 1.008e-05, "loss": 1.3958, "step": 504 }, { "epoch": 0.08244561446471572, "grad_norm": 3.218883991241455, "learning_rate": 1.0100000000000002e-05, "loss": 1.2199, "step": 505 }, { "epoch": 0.08260887310722011, "grad_norm": 3.2176902294158936, "learning_rate": 1.0120000000000001e-05, "loss": 1.1862, "step": 506 }, { "epoch": 0.0827721317497245, "grad_norm": 3.3850715160369873, "learning_rate": 1.0140000000000001e-05, "loss": 1.2763, "step": 507 }, { "epoch": 0.08293539039222889, "grad_norm": 3.59910249710083, "learning_rate": 1.0160000000000001e-05, "loss": 1.3509, "step": 508 }, { "epoch": 0.08309864903473328, "grad_norm": 3.243034839630127, "learning_rate": 1.018e-05, "loss": 1.2464, "step": 509 }, { "epoch": 0.08326190767723766, "grad_norm": 3.1209068298339844, "learning_rate": 1.02e-05, "loss": 1.2278, "step": 510 }, { "epoch": 0.08342516631974205, "grad_norm": 3.3315155506134033, "learning_rate": 1.022e-05, "loss": 1.3143, "step": 511 }, { "epoch": 0.08358842496224644, "grad_norm": 3.278855323791504, "learning_rate": 1.024e-05, "loss": 1.2272, "step": 512 }, { "epoch": 0.08375168360475083, "grad_norm": 3.507819890975952, "learning_rate": 1.0260000000000002e-05, "loss": 1.2597, "step": 513 }, { "epoch": 0.08391494224725521, "grad_norm": 3.5296926498413086, "learning_rate": 1.0280000000000002e-05, "loss": 1.585, "step": 514 }, { "epoch": 0.0840782008897596, "grad_norm": 3.591172695159912, "learning_rate": 1.0300000000000001e-05, "loss": 1.5235, "step": 515 }, { "epoch": 0.08424145953226399, "grad_norm": 3.052189350128174, "learning_rate": 1.0320000000000001e-05, "loss": 1.3136, "step": 516 }, { "epoch": 0.08440471817476837, "grad_norm": 3.0337090492248535, "learning_rate": 1.0340000000000001e-05, "loss": 1.4054, "step": 517 }, { "epoch": 0.08456797681727277, "grad_norm": 2.9393928050994873, "learning_rate": 1.036e-05, "loss": 1.393, "step": 518 }, { "epoch": 0.08473123545977715, "grad_norm": 2.9128761291503906, "learning_rate": 1.038e-05, "loss": 1.3722, "step": 519 }, { "epoch": 0.08489449410228155, "grad_norm": 3.0225346088409424, "learning_rate": 1.04e-05, "loss": 1.3659, "step": 520 }, { "epoch": 0.08505775274478593, "grad_norm": 3.388819932937622, "learning_rate": 1.0420000000000002e-05, "loss": 1.3673, "step": 521 }, { "epoch": 0.08522101138729031, "grad_norm": 3.223410129547119, "learning_rate": 1.0440000000000002e-05, "loss": 1.5953, "step": 522 }, { "epoch": 0.0853842700297947, "grad_norm": 3.160454511642456, "learning_rate": 1.0460000000000001e-05, "loss": 1.4608, "step": 523 }, { "epoch": 0.08554752867229909, "grad_norm": 3.354294776916504, "learning_rate": 1.0480000000000001e-05, "loss": 1.3546, "step": 524 }, { "epoch": 0.08571078731480347, "grad_norm": 3.4357011318206787, "learning_rate": 1.0500000000000001e-05, "loss": 1.5565, "step": 525 }, { "epoch": 0.08587404595730787, "grad_norm": 3.2060067653656006, "learning_rate": 1.0520000000000001e-05, "loss": 1.2678, "step": 526 }, { "epoch": 0.08603730459981225, "grad_norm": 3.2733750343322754, "learning_rate": 1.054e-05, "loss": 1.2825, "step": 527 }, { "epoch": 0.08620056324231665, "grad_norm": 3.0280864238739014, "learning_rate": 1.056e-05, "loss": 1.2322, "step": 528 }, { "epoch": 0.08636382188482103, "grad_norm": 3.201385021209717, "learning_rate": 1.0580000000000002e-05, "loss": 1.3283, "step": 529 }, { "epoch": 0.08652708052732541, "grad_norm": 3.0981786251068115, "learning_rate": 1.0600000000000002e-05, "loss": 1.2912, "step": 530 }, { "epoch": 0.08669033916982981, "grad_norm": 3.246832847595215, "learning_rate": 1.0620000000000002e-05, "loss": 1.4631, "step": 531 }, { "epoch": 0.08685359781233419, "grad_norm": 3.1816036701202393, "learning_rate": 1.0640000000000001e-05, "loss": 1.287, "step": 532 }, { "epoch": 0.08701685645483857, "grad_norm": 3.2410333156585693, "learning_rate": 1.0660000000000001e-05, "loss": 1.3486, "step": 533 }, { "epoch": 0.08718011509734297, "grad_norm": 3.134855270385742, "learning_rate": 1.0680000000000001e-05, "loss": 1.1708, "step": 534 }, { "epoch": 0.08734337373984735, "grad_norm": 3.4560580253601074, "learning_rate": 1.0700000000000001e-05, "loss": 1.4893, "step": 535 }, { "epoch": 0.08750663238235173, "grad_norm": 3.556454658508301, "learning_rate": 1.072e-05, "loss": 1.4232, "step": 536 }, { "epoch": 0.08766989102485613, "grad_norm": 3.652381181716919, "learning_rate": 1.0740000000000002e-05, "loss": 1.3384, "step": 537 }, { "epoch": 0.08783314966736051, "grad_norm": 3.2212700843811035, "learning_rate": 1.0760000000000002e-05, "loss": 1.3198, "step": 538 }, { "epoch": 0.08799640830986491, "grad_norm": 3.102572202682495, "learning_rate": 1.0780000000000002e-05, "loss": 1.3274, "step": 539 }, { "epoch": 0.08815966695236929, "grad_norm": 3.6189913749694824, "learning_rate": 1.0800000000000002e-05, "loss": 1.534, "step": 540 }, { "epoch": 0.08832292559487367, "grad_norm": 3.1864349842071533, "learning_rate": 1.0820000000000001e-05, "loss": 1.3453, "step": 541 }, { "epoch": 0.08848618423737807, "grad_norm": 3.2667438983917236, "learning_rate": 1.0840000000000001e-05, "loss": 1.2646, "step": 542 }, { "epoch": 0.08864944287988245, "grad_norm": 3.231307029724121, "learning_rate": 1.0860000000000001e-05, "loss": 1.3513, "step": 543 }, { "epoch": 0.08881270152238684, "grad_norm": 3.450826406478882, "learning_rate": 1.0880000000000001e-05, "loss": 1.411, "step": 544 }, { "epoch": 0.08897596016489123, "grad_norm": 3.6843605041503906, "learning_rate": 1.0900000000000002e-05, "loss": 1.4447, "step": 545 }, { "epoch": 0.08913921880739561, "grad_norm": 3.2029852867126465, "learning_rate": 1.0920000000000002e-05, "loss": 1.2337, "step": 546 }, { "epoch": 0.0893024774499, "grad_norm": 3.280824661254883, "learning_rate": 1.0940000000000002e-05, "loss": 1.2486, "step": 547 }, { "epoch": 0.0894657360924044, "grad_norm": 3.127041816711426, "learning_rate": 1.0960000000000002e-05, "loss": 1.4378, "step": 548 }, { "epoch": 0.08962899473490878, "grad_norm": 3.279191493988037, "learning_rate": 1.0980000000000002e-05, "loss": 1.2523, "step": 549 }, { "epoch": 0.08979225337741317, "grad_norm": 3.2267308235168457, "learning_rate": 1.1000000000000001e-05, "loss": 1.4575, "step": 550 }, { "epoch": 0.08995551201991756, "grad_norm": 3.140105962753296, "learning_rate": 1.1020000000000001e-05, "loss": 1.4308, "step": 551 }, { "epoch": 0.09011877066242194, "grad_norm": 3.369239330291748, "learning_rate": 1.1040000000000001e-05, "loss": 1.2615, "step": 552 }, { "epoch": 0.09028202930492633, "grad_norm": 3.4518635272979736, "learning_rate": 1.1060000000000003e-05, "loss": 1.3994, "step": 553 }, { "epoch": 0.09044528794743072, "grad_norm": 3.3015122413635254, "learning_rate": 1.1080000000000002e-05, "loss": 1.3356, "step": 554 }, { "epoch": 0.0906085465899351, "grad_norm": 3.4418303966522217, "learning_rate": 1.1100000000000002e-05, "loss": 1.3101, "step": 555 }, { "epoch": 0.0907718052324395, "grad_norm": 3.591838836669922, "learning_rate": 1.1120000000000002e-05, "loss": 1.3138, "step": 556 }, { "epoch": 0.09093506387494388, "grad_norm": 3.326482057571411, "learning_rate": 1.1140000000000002e-05, "loss": 1.2645, "step": 557 }, { "epoch": 0.09109832251744827, "grad_norm": 3.539987802505493, "learning_rate": 1.1160000000000002e-05, "loss": 1.3875, "step": 558 }, { "epoch": 0.09126158115995266, "grad_norm": 3.9019522666931152, "learning_rate": 1.1180000000000001e-05, "loss": 1.2708, "step": 559 }, { "epoch": 0.09142483980245704, "grad_norm": 3.287991762161255, "learning_rate": 1.1200000000000001e-05, "loss": 1.3322, "step": 560 }, { "epoch": 0.09158809844496144, "grad_norm": 3.1459896564483643, "learning_rate": 1.1220000000000003e-05, "loss": 1.1383, "step": 561 }, { "epoch": 0.09175135708746582, "grad_norm": 3.2767138481140137, "learning_rate": 1.1240000000000002e-05, "loss": 1.4102, "step": 562 }, { "epoch": 0.0919146157299702, "grad_norm": 4.460055351257324, "learning_rate": 1.126e-05, "loss": 1.6402, "step": 563 }, { "epoch": 0.0920778743724746, "grad_norm": 2.996844530105591, "learning_rate": 1.128e-05, "loss": 1.0712, "step": 564 }, { "epoch": 0.09224113301497898, "grad_norm": 3.362281560897827, "learning_rate": 1.13e-05, "loss": 1.3494, "step": 565 }, { "epoch": 0.09240439165748336, "grad_norm": 3.329918146133423, "learning_rate": 1.132e-05, "loss": 1.3364, "step": 566 }, { "epoch": 0.09256765029998776, "grad_norm": 3.5739922523498535, "learning_rate": 1.134e-05, "loss": 1.3315, "step": 567 }, { "epoch": 0.09273090894249214, "grad_norm": 3.0538644790649414, "learning_rate": 1.136e-05, "loss": 1.2671, "step": 568 }, { "epoch": 0.09289416758499654, "grad_norm": 3.420135498046875, "learning_rate": 1.138e-05, "loss": 1.492, "step": 569 }, { "epoch": 0.09305742622750092, "grad_norm": 3.724452495574951, "learning_rate": 1.14e-05, "loss": 1.4907, "step": 570 }, { "epoch": 0.0932206848700053, "grad_norm": 3.386993885040283, "learning_rate": 1.142e-05, "loss": 1.3152, "step": 571 }, { "epoch": 0.0933839435125097, "grad_norm": 3.2963178157806396, "learning_rate": 1.144e-05, "loss": 1.3523, "step": 572 }, { "epoch": 0.09354720215501408, "grad_norm": 3.470891237258911, "learning_rate": 1.146e-05, "loss": 1.3238, "step": 573 }, { "epoch": 0.09371046079751846, "grad_norm": 3.382941484451294, "learning_rate": 1.148e-05, "loss": 1.2436, "step": 574 }, { "epoch": 0.09387371944002286, "grad_norm": 3.3696553707122803, "learning_rate": 1.15e-05, "loss": 1.1908, "step": 575 }, { "epoch": 0.09403697808252724, "grad_norm": 3.364718198776245, "learning_rate": 1.152e-05, "loss": 1.1284, "step": 576 }, { "epoch": 0.09420023672503162, "grad_norm": 3.7245821952819824, "learning_rate": 1.154e-05, "loss": 1.4378, "step": 577 }, { "epoch": 0.09436349536753602, "grad_norm": 3.859130620956421, "learning_rate": 1.156e-05, "loss": 1.496, "step": 578 }, { "epoch": 0.0945267540100404, "grad_norm": 3.6130621433258057, "learning_rate": 1.1580000000000001e-05, "loss": 1.3818, "step": 579 }, { "epoch": 0.0946900126525448, "grad_norm": 4.048120021820068, "learning_rate": 1.16e-05, "loss": 1.3436, "step": 580 }, { "epoch": 0.09485327129504918, "grad_norm": 3.543116807937622, "learning_rate": 1.162e-05, "loss": 1.227, "step": 581 }, { "epoch": 0.09501652993755356, "grad_norm": 3.9674839973449707, "learning_rate": 1.164e-05, "loss": 1.3747, "step": 582 }, { "epoch": 0.09517978858005796, "grad_norm": 3.731032609939575, "learning_rate": 1.166e-05, "loss": 1.2422, "step": 583 }, { "epoch": 0.09534304722256234, "grad_norm": 3.3200442790985107, "learning_rate": 1.168e-05, "loss": 1.0986, "step": 584 }, { "epoch": 0.09550630586506673, "grad_norm": 3.7044544219970703, "learning_rate": 1.17e-05, "loss": 1.4162, "step": 585 }, { "epoch": 0.09566956450757112, "grad_norm": 3.5169975757598877, "learning_rate": 1.172e-05, "loss": 1.2712, "step": 586 }, { "epoch": 0.0958328231500755, "grad_norm": 3.962479829788208, "learning_rate": 1.1740000000000001e-05, "loss": 1.5365, "step": 587 }, { "epoch": 0.0959960817925799, "grad_norm": 3.4059066772460938, "learning_rate": 1.1760000000000001e-05, "loss": 1.1396, "step": 588 }, { "epoch": 0.09615934043508428, "grad_norm": 3.6306028366088867, "learning_rate": 1.178e-05, "loss": 1.4135, "step": 589 }, { "epoch": 0.09632259907758867, "grad_norm": 3.2792325019836426, "learning_rate": 1.18e-05, "loss": 1.2674, "step": 590 }, { "epoch": 0.09648585772009306, "grad_norm": 2.933026075363159, "learning_rate": 1.182e-05, "loss": 1.0319, "step": 591 }, { "epoch": 0.09664911636259745, "grad_norm": 3.369436740875244, "learning_rate": 1.184e-05, "loss": 1.3312, "step": 592 }, { "epoch": 0.09681237500510183, "grad_norm": 3.597885847091675, "learning_rate": 1.186e-05, "loss": 1.2433, "step": 593 }, { "epoch": 0.09697563364760622, "grad_norm": 3.4532711505889893, "learning_rate": 1.188e-05, "loss": 1.2192, "step": 594 }, { "epoch": 0.0971388922901106, "grad_norm": 3.3869118690490723, "learning_rate": 1.1900000000000001e-05, "loss": 1.2338, "step": 595 }, { "epoch": 0.09730215093261499, "grad_norm": 3.415553331375122, "learning_rate": 1.1920000000000001e-05, "loss": 1.2431, "step": 596 }, { "epoch": 0.09746540957511939, "grad_norm": 3.294726610183716, "learning_rate": 1.1940000000000001e-05, "loss": 1.4013, "step": 597 }, { "epoch": 0.09762866821762377, "grad_norm": 3.5671210289001465, "learning_rate": 1.196e-05, "loss": 1.1737, "step": 598 }, { "epoch": 0.09779192686012816, "grad_norm": 3.1966323852539062, "learning_rate": 1.198e-05, "loss": 1.3739, "step": 599 }, { "epoch": 0.09795518550263255, "grad_norm": 3.126709222793579, "learning_rate": 1.2e-05, "loss": 1.2262, "step": 600 }, { "epoch": 0.09811844414513693, "grad_norm": 3.3238606452941895, "learning_rate": 1.202e-05, "loss": 1.396, "step": 601 }, { "epoch": 0.09828170278764133, "grad_norm": 3.3678059577941895, "learning_rate": 1.204e-05, "loss": 1.1504, "step": 602 }, { "epoch": 0.09844496143014571, "grad_norm": 3.223963737487793, "learning_rate": 1.2060000000000001e-05, "loss": 1.2576, "step": 603 }, { "epoch": 0.09860822007265009, "grad_norm": 3.7115049362182617, "learning_rate": 1.2080000000000001e-05, "loss": 1.3471, "step": 604 }, { "epoch": 0.09877147871515449, "grad_norm": 3.4346067905426025, "learning_rate": 1.2100000000000001e-05, "loss": 1.2147, "step": 605 }, { "epoch": 0.09893473735765887, "grad_norm": 3.828129529953003, "learning_rate": 1.2120000000000001e-05, "loss": 1.4282, "step": 606 }, { "epoch": 0.09909799600016325, "grad_norm": 3.821056365966797, "learning_rate": 1.214e-05, "loss": 1.3388, "step": 607 }, { "epoch": 0.09926125464266765, "grad_norm": 4.096508026123047, "learning_rate": 1.216e-05, "loss": 1.3764, "step": 608 }, { "epoch": 0.09942451328517203, "grad_norm": 3.9946491718292236, "learning_rate": 1.218e-05, "loss": 1.2672, "step": 609 }, { "epoch": 0.09958777192767643, "grad_norm": 3.7141332626342773, "learning_rate": 1.22e-05, "loss": 1.2222, "step": 610 }, { "epoch": 0.09975103057018081, "grad_norm": 4.169655799865723, "learning_rate": 1.2220000000000002e-05, "loss": 1.43, "step": 611 }, { "epoch": 0.09991428921268519, "grad_norm": 4.684153079986572, "learning_rate": 1.2240000000000001e-05, "loss": 1.5626, "step": 612 }, { "epoch": 0.10007754785518959, "grad_norm": 4.0022993087768555, "learning_rate": 1.2260000000000001e-05, "loss": 1.4512, "step": 613 }, { "epoch": 0.10024080649769397, "grad_norm": 3.5701169967651367, "learning_rate": 1.2280000000000001e-05, "loss": 1.3894, "step": 614 }, { "epoch": 0.10040406514019835, "grad_norm": 3.0262672901153564, "learning_rate": 1.23e-05, "loss": 1.0707, "step": 615 }, { "epoch": 0.10056732378270275, "grad_norm": 3.6173391342163086, "learning_rate": 1.232e-05, "loss": 1.3372, "step": 616 }, { "epoch": 0.10073058242520713, "grad_norm": 3.220006227493286, "learning_rate": 1.234e-05, "loss": 1.2814, "step": 617 }, { "epoch": 0.10089384106771153, "grad_norm": 3.3923614025115967, "learning_rate": 1.236e-05, "loss": 1.4355, "step": 618 }, { "epoch": 0.10105709971021591, "grad_norm": 3.4788033962249756, "learning_rate": 1.2380000000000002e-05, "loss": 1.2896, "step": 619 }, { "epoch": 0.1012203583527203, "grad_norm": 3.295891046524048, "learning_rate": 1.2400000000000002e-05, "loss": 1.2677, "step": 620 }, { "epoch": 0.10138361699522469, "grad_norm": 2.9716246128082275, "learning_rate": 1.2420000000000001e-05, "loss": 1.0672, "step": 621 }, { "epoch": 0.10154687563772907, "grad_norm": 3.0670087337493896, "learning_rate": 1.2440000000000001e-05, "loss": 1.3117, "step": 622 }, { "epoch": 0.10171013428023346, "grad_norm": 3.3751635551452637, "learning_rate": 1.2460000000000001e-05, "loss": 1.3195, "step": 623 }, { "epoch": 0.10187339292273785, "grad_norm": 3.4446237087249756, "learning_rate": 1.248e-05, "loss": 1.3697, "step": 624 }, { "epoch": 0.10203665156524223, "grad_norm": 3.4191486835479736, "learning_rate": 1.25e-05, "loss": 1.2691, "step": 625 }, { "epoch": 0.10219991020774662, "grad_norm": 3.638721227645874, "learning_rate": 1.252e-05, "loss": 1.3048, "step": 626 }, { "epoch": 0.10236316885025101, "grad_norm": 3.524495840072632, "learning_rate": 1.254e-05, "loss": 1.2426, "step": 627 }, { "epoch": 0.1025264274927554, "grad_norm": 3.9672465324401855, "learning_rate": 1.2560000000000002e-05, "loss": 1.4183, "step": 628 }, { "epoch": 0.10268968613525979, "grad_norm": 3.68884539604187, "learning_rate": 1.2580000000000002e-05, "loss": 1.3174, "step": 629 }, { "epoch": 0.10285294477776417, "grad_norm": 3.927175283432007, "learning_rate": 1.2600000000000001e-05, "loss": 1.466, "step": 630 }, { "epoch": 0.10301620342026856, "grad_norm": 3.5812079906463623, "learning_rate": 1.2620000000000001e-05, "loss": 1.453, "step": 631 }, { "epoch": 0.10317946206277295, "grad_norm": 3.5056145191192627, "learning_rate": 1.2640000000000001e-05, "loss": 1.2417, "step": 632 }, { "epoch": 0.10334272070527734, "grad_norm": 3.5882937908172607, "learning_rate": 1.266e-05, "loss": 1.2424, "step": 633 }, { "epoch": 0.10350597934778172, "grad_norm": 3.271097421646118, "learning_rate": 1.268e-05, "loss": 1.2584, "step": 634 }, { "epoch": 0.10366923799028611, "grad_norm": 3.2973146438598633, "learning_rate": 1.27e-05, "loss": 1.1721, "step": 635 }, { "epoch": 0.1038324966327905, "grad_norm": 3.229166269302368, "learning_rate": 1.2720000000000002e-05, "loss": 1.1873, "step": 636 }, { "epoch": 0.10399575527529488, "grad_norm": 3.5034255981445312, "learning_rate": 1.2740000000000002e-05, "loss": 1.4378, "step": 637 }, { "epoch": 0.10415901391779928, "grad_norm": 3.4592442512512207, "learning_rate": 1.2760000000000001e-05, "loss": 1.2722, "step": 638 }, { "epoch": 0.10432227256030366, "grad_norm": 3.2301406860351562, "learning_rate": 1.2780000000000001e-05, "loss": 1.1585, "step": 639 }, { "epoch": 0.10448553120280805, "grad_norm": 3.4407362937927246, "learning_rate": 1.2800000000000001e-05, "loss": 1.356, "step": 640 }, { "epoch": 0.10464878984531244, "grad_norm": 3.0948421955108643, "learning_rate": 1.2820000000000001e-05, "loss": 1.0977, "step": 641 }, { "epoch": 0.10481204848781682, "grad_norm": 3.670635223388672, "learning_rate": 1.284e-05, "loss": 1.2437, "step": 642 }, { "epoch": 0.10497530713032122, "grad_norm": 3.298997402191162, "learning_rate": 1.286e-05, "loss": 1.1094, "step": 643 }, { "epoch": 0.1051385657728256, "grad_norm": 3.7627744674682617, "learning_rate": 1.2880000000000002e-05, "loss": 1.3709, "step": 644 }, { "epoch": 0.10530182441532998, "grad_norm": 3.675348997116089, "learning_rate": 1.2900000000000002e-05, "loss": 1.2095, "step": 645 }, { "epoch": 0.10546508305783438, "grad_norm": 3.545684814453125, "learning_rate": 1.2920000000000002e-05, "loss": 1.2798, "step": 646 }, { "epoch": 0.10562834170033876, "grad_norm": 3.848721504211426, "learning_rate": 1.2940000000000001e-05, "loss": 1.3455, "step": 647 }, { "epoch": 0.10579160034284316, "grad_norm": 3.8616440296173096, "learning_rate": 1.2960000000000001e-05, "loss": 1.3087, "step": 648 }, { "epoch": 0.10595485898534754, "grad_norm": 3.828249216079712, "learning_rate": 1.2980000000000001e-05, "loss": 1.5652, "step": 649 }, { "epoch": 0.10611811762785192, "grad_norm": 3.5898706912994385, "learning_rate": 1.3000000000000001e-05, "loss": 1.3191, "step": 650 }, { "epoch": 0.10628137627035632, "grad_norm": 3.7245874404907227, "learning_rate": 1.302e-05, "loss": 1.5, "step": 651 }, { "epoch": 0.1064446349128607, "grad_norm": 3.6097216606140137, "learning_rate": 1.3040000000000002e-05, "loss": 1.196, "step": 652 }, { "epoch": 0.10660789355536508, "grad_norm": 3.436556816101074, "learning_rate": 1.3060000000000002e-05, "loss": 1.2205, "step": 653 }, { "epoch": 0.10677115219786948, "grad_norm": 3.416118621826172, "learning_rate": 1.3080000000000002e-05, "loss": 1.1413, "step": 654 }, { "epoch": 0.10693441084037386, "grad_norm": 3.817369222640991, "learning_rate": 1.3100000000000002e-05, "loss": 1.241, "step": 655 }, { "epoch": 0.10709766948287824, "grad_norm": 3.355257272720337, "learning_rate": 1.3120000000000001e-05, "loss": 1.0935, "step": 656 }, { "epoch": 0.10726092812538264, "grad_norm": 3.261915445327759, "learning_rate": 1.3140000000000001e-05, "loss": 1.1002, "step": 657 }, { "epoch": 0.10742418676788702, "grad_norm": 3.476895570755005, "learning_rate": 1.3160000000000001e-05, "loss": 1.2768, "step": 658 }, { "epoch": 0.10758744541039142, "grad_norm": 3.669179916381836, "learning_rate": 1.3180000000000001e-05, "loss": 1.3996, "step": 659 }, { "epoch": 0.1077507040528958, "grad_norm": 3.9235188961029053, "learning_rate": 1.3200000000000002e-05, "loss": 1.3288, "step": 660 }, { "epoch": 0.10791396269540018, "grad_norm": 3.486142158508301, "learning_rate": 1.3220000000000002e-05, "loss": 1.1989, "step": 661 }, { "epoch": 0.10807722133790458, "grad_norm": 4.400827884674072, "learning_rate": 1.3240000000000002e-05, "loss": 1.5055, "step": 662 }, { "epoch": 0.10824047998040896, "grad_norm": 3.0975263118743896, "learning_rate": 1.3260000000000002e-05, "loss": 0.9915, "step": 663 }, { "epoch": 0.10840373862291335, "grad_norm": 3.843268632888794, "learning_rate": 1.3280000000000002e-05, "loss": 1.4285, "step": 664 }, { "epoch": 0.10856699726541774, "grad_norm": 3.9305105209350586, "learning_rate": 1.3300000000000001e-05, "loss": 1.284, "step": 665 }, { "epoch": 0.10873025590792212, "grad_norm": 3.790658950805664, "learning_rate": 1.3320000000000001e-05, "loss": 1.3184, "step": 666 }, { "epoch": 0.1088935145504265, "grad_norm": 3.684332847595215, "learning_rate": 1.3340000000000001e-05, "loss": 1.3809, "step": 667 }, { "epoch": 0.1090567731929309, "grad_norm": 3.864351272583008, "learning_rate": 1.3360000000000003e-05, "loss": 1.3988, "step": 668 }, { "epoch": 0.10922003183543529, "grad_norm": 3.795365810394287, "learning_rate": 1.3380000000000002e-05, "loss": 1.3687, "step": 669 }, { "epoch": 0.10938329047793968, "grad_norm": 3.355266809463501, "learning_rate": 1.3400000000000002e-05, "loss": 1.0373, "step": 670 }, { "epoch": 0.10954654912044406, "grad_norm": 4.054567813873291, "learning_rate": 1.3420000000000002e-05, "loss": 1.4732, "step": 671 }, { "epoch": 0.10970980776294845, "grad_norm": 3.7713961601257324, "learning_rate": 1.3440000000000002e-05, "loss": 1.3526, "step": 672 }, { "epoch": 0.10987306640545284, "grad_norm": 3.5611002445220947, "learning_rate": 1.3460000000000002e-05, "loss": 1.1183, "step": 673 }, { "epoch": 0.11003632504795723, "grad_norm": 3.3765242099761963, "learning_rate": 1.3480000000000001e-05, "loss": 1.0509, "step": 674 }, { "epoch": 0.11019958369046161, "grad_norm": 3.4628748893737793, "learning_rate": 1.3500000000000001e-05, "loss": 1.0808, "step": 675 }, { "epoch": 0.110362842332966, "grad_norm": 3.8234899044036865, "learning_rate": 1.3520000000000003e-05, "loss": 1.2393, "step": 676 }, { "epoch": 0.11052610097547039, "grad_norm": 3.719684600830078, "learning_rate": 1.3540000000000003e-05, "loss": 1.3152, "step": 677 }, { "epoch": 0.11068935961797477, "grad_norm": 3.5199637413024902, "learning_rate": 1.3560000000000002e-05, "loss": 1.0964, "step": 678 }, { "epoch": 0.11085261826047917, "grad_norm": 3.9752182960510254, "learning_rate": 1.3580000000000002e-05, "loss": 1.5363, "step": 679 }, { "epoch": 0.11101587690298355, "grad_norm": 3.90409517288208, "learning_rate": 1.3600000000000002e-05, "loss": 1.4828, "step": 680 }, { "epoch": 0.11117913554548794, "grad_norm": 3.1978137493133545, "learning_rate": 1.3620000000000002e-05, "loss": 0.9901, "step": 681 }, { "epoch": 0.11134239418799233, "grad_norm": 3.610102891921997, "learning_rate": 1.3640000000000002e-05, "loss": 1.1225, "step": 682 }, { "epoch": 0.11150565283049671, "grad_norm": 3.911137580871582, "learning_rate": 1.3660000000000001e-05, "loss": 1.3129, "step": 683 }, { "epoch": 0.1116689114730011, "grad_norm": 3.214874505996704, "learning_rate": 1.3680000000000003e-05, "loss": 1.2243, "step": 684 }, { "epoch": 0.11183217011550549, "grad_norm": 3.3559815883636475, "learning_rate": 1.3700000000000003e-05, "loss": 1.2201, "step": 685 }, { "epoch": 0.11199542875800987, "grad_norm": 3.711862325668335, "learning_rate": 1.3720000000000002e-05, "loss": 1.3428, "step": 686 }, { "epoch": 0.11215868740051427, "grad_norm": 3.822732925415039, "learning_rate": 1.3740000000000002e-05, "loss": 1.4981, "step": 687 }, { "epoch": 0.11232194604301865, "grad_norm": 4.188013076782227, "learning_rate": 1.376e-05, "loss": 1.4549, "step": 688 }, { "epoch": 0.11248520468552305, "grad_norm": 3.9636952877044678, "learning_rate": 1.378e-05, "loss": 1.3649, "step": 689 }, { "epoch": 0.11264846332802743, "grad_norm": 4.047921180725098, "learning_rate": 1.38e-05, "loss": 1.4856, "step": 690 }, { "epoch": 0.11281172197053181, "grad_norm": 3.8994967937469482, "learning_rate": 1.382e-05, "loss": 1.2193, "step": 691 }, { "epoch": 0.11297498061303621, "grad_norm": 3.5854315757751465, "learning_rate": 1.384e-05, "loss": 1.2168, "step": 692 }, { "epoch": 0.11313823925554059, "grad_norm": 3.8290929794311523, "learning_rate": 1.386e-05, "loss": 1.2585, "step": 693 }, { "epoch": 0.11330149789804497, "grad_norm": 3.9346275329589844, "learning_rate": 1.3880000000000001e-05, "loss": 1.3229, "step": 694 }, { "epoch": 0.11346475654054937, "grad_norm": 3.9840924739837646, "learning_rate": 1.39e-05, "loss": 1.4636, "step": 695 }, { "epoch": 0.11362801518305375, "grad_norm": 3.569615364074707, "learning_rate": 1.392e-05, "loss": 1.1782, "step": 696 }, { "epoch": 0.11379127382555813, "grad_norm": 3.710904121398926, "learning_rate": 1.394e-05, "loss": 1.2842, "step": 697 }, { "epoch": 0.11395453246806253, "grad_norm": 3.684187889099121, "learning_rate": 1.396e-05, "loss": 1.3729, "step": 698 }, { "epoch": 0.11411779111056691, "grad_norm": 3.4666247367858887, "learning_rate": 1.398e-05, "loss": 1.3207, "step": 699 }, { "epoch": 0.11428104975307131, "grad_norm": 3.9589779376983643, "learning_rate": 1.4e-05, "loss": 1.4083, "step": 700 }, { "epoch": 0.11444430839557569, "grad_norm": 3.8131892681121826, "learning_rate": 1.402e-05, "loss": 1.4209, "step": 701 }, { "epoch": 0.11460756703808007, "grad_norm": 3.590853691101074, "learning_rate": 1.4040000000000001e-05, "loss": 1.3326, "step": 702 }, { "epoch": 0.11477082568058447, "grad_norm": 3.577561140060425, "learning_rate": 1.4060000000000001e-05, "loss": 1.2316, "step": 703 }, { "epoch": 0.11493408432308885, "grad_norm": 4.003983497619629, "learning_rate": 1.408e-05, "loss": 1.4162, "step": 704 }, { "epoch": 0.11509734296559324, "grad_norm": 3.7051079273223877, "learning_rate": 1.41e-05, "loss": 1.2234, "step": 705 }, { "epoch": 0.11526060160809763, "grad_norm": 3.5429012775421143, "learning_rate": 1.412e-05, "loss": 1.2799, "step": 706 }, { "epoch": 0.11542386025060201, "grad_norm": 3.5033915042877197, "learning_rate": 1.414e-05, "loss": 1.2349, "step": 707 }, { "epoch": 0.1155871188931064, "grad_norm": 3.4133660793304443, "learning_rate": 1.416e-05, "loss": 1.1589, "step": 708 }, { "epoch": 0.1157503775356108, "grad_norm": 3.3485007286071777, "learning_rate": 1.418e-05, "loss": 1.2035, "step": 709 }, { "epoch": 0.11591363617811518, "grad_norm": 3.978501319885254, "learning_rate": 1.4200000000000001e-05, "loss": 1.2472, "step": 710 }, { "epoch": 0.11607689482061957, "grad_norm": 3.6980855464935303, "learning_rate": 1.4220000000000001e-05, "loss": 1.1594, "step": 711 }, { "epoch": 0.11624015346312395, "grad_norm": 3.469383716583252, "learning_rate": 1.4240000000000001e-05, "loss": 1.1583, "step": 712 }, { "epoch": 0.11640341210562834, "grad_norm": 3.5357589721679688, "learning_rate": 1.426e-05, "loss": 1.0846, "step": 713 }, { "epoch": 0.11656667074813273, "grad_norm": 3.9870450496673584, "learning_rate": 1.428e-05, "loss": 1.2744, "step": 714 }, { "epoch": 0.11672992939063712, "grad_norm": 3.685786008834839, "learning_rate": 1.43e-05, "loss": 1.1836, "step": 715 }, { "epoch": 0.1168931880331415, "grad_norm": 4.074507713317871, "learning_rate": 1.432e-05, "loss": 1.4074, "step": 716 }, { "epoch": 0.1170564466756459, "grad_norm": 3.4234910011291504, "learning_rate": 1.434e-05, "loss": 1.2483, "step": 717 }, { "epoch": 0.11721970531815028, "grad_norm": 3.895641326904297, "learning_rate": 1.4360000000000001e-05, "loss": 1.4878, "step": 718 }, { "epoch": 0.11738296396065467, "grad_norm": 3.516202211380005, "learning_rate": 1.4380000000000001e-05, "loss": 1.1839, "step": 719 }, { "epoch": 0.11754622260315906, "grad_norm": 3.8012096881866455, "learning_rate": 1.4400000000000001e-05, "loss": 1.2258, "step": 720 }, { "epoch": 0.11770948124566344, "grad_norm": 3.6699516773223877, "learning_rate": 1.4420000000000001e-05, "loss": 1.3334, "step": 721 }, { "epoch": 0.11787273988816784, "grad_norm": 4.067016124725342, "learning_rate": 1.444e-05, "loss": 1.4258, "step": 722 }, { "epoch": 0.11803599853067222, "grad_norm": 3.7168731689453125, "learning_rate": 1.446e-05, "loss": 1.1293, "step": 723 }, { "epoch": 0.1181992571731766, "grad_norm": 3.750455379486084, "learning_rate": 1.448e-05, "loss": 1.2175, "step": 724 }, { "epoch": 0.118362515815681, "grad_norm": 4.05366849899292, "learning_rate": 1.45e-05, "loss": 1.2492, "step": 725 }, { "epoch": 0.11852577445818538, "grad_norm": 4.169066429138184, "learning_rate": 1.4520000000000002e-05, "loss": 1.2713, "step": 726 }, { "epoch": 0.11868903310068976, "grad_norm": 3.9037580490112305, "learning_rate": 1.4540000000000001e-05, "loss": 1.2398, "step": 727 }, { "epoch": 0.11885229174319416, "grad_norm": 3.6101796627044678, "learning_rate": 1.4560000000000001e-05, "loss": 1.1705, "step": 728 }, { "epoch": 0.11901555038569854, "grad_norm": 3.6156351566314697, "learning_rate": 1.4580000000000001e-05, "loss": 1.1653, "step": 729 }, { "epoch": 0.11917880902820294, "grad_norm": 3.8591625690460205, "learning_rate": 1.46e-05, "loss": 1.2191, "step": 730 }, { "epoch": 0.11934206767070732, "grad_norm": 3.5009090900421143, "learning_rate": 1.462e-05, "loss": 1.1861, "step": 731 }, { "epoch": 0.1195053263132117, "grad_norm": 3.8880536556243896, "learning_rate": 1.464e-05, "loss": 1.5812, "step": 732 }, { "epoch": 0.1196685849557161, "grad_norm": 3.6560070514678955, "learning_rate": 1.466e-05, "loss": 1.3227, "step": 733 }, { "epoch": 0.11983184359822048, "grad_norm": 4.021060466766357, "learning_rate": 1.4680000000000002e-05, "loss": 1.2716, "step": 734 }, { "epoch": 0.11999510224072486, "grad_norm": 3.6848957538604736, "learning_rate": 1.4700000000000002e-05, "loss": 1.1185, "step": 735 }, { "epoch": 0.12015836088322926, "grad_norm": 3.905407190322876, "learning_rate": 1.4720000000000001e-05, "loss": 1.2464, "step": 736 }, { "epoch": 0.12032161952573364, "grad_norm": 3.4452757835388184, "learning_rate": 1.4740000000000001e-05, "loss": 1.224, "step": 737 }, { "epoch": 0.12048487816823802, "grad_norm": 3.161283016204834, "learning_rate": 1.4760000000000001e-05, "loss": 1.0771, "step": 738 }, { "epoch": 0.12064813681074242, "grad_norm": 3.962526798248291, "learning_rate": 1.478e-05, "loss": 1.4313, "step": 739 }, { "epoch": 0.1208113954532468, "grad_norm": 3.3856589794158936, "learning_rate": 1.48e-05, "loss": 1.0813, "step": 740 }, { "epoch": 0.1209746540957512, "grad_norm": 3.597445249557495, "learning_rate": 1.482e-05, "loss": 1.0991, "step": 741 }, { "epoch": 0.12113791273825558, "grad_norm": 3.779435157775879, "learning_rate": 1.4840000000000002e-05, "loss": 1.3308, "step": 742 }, { "epoch": 0.12130117138075996, "grad_norm": 4.17114782333374, "learning_rate": 1.4860000000000002e-05, "loss": 1.2125, "step": 743 }, { "epoch": 0.12146443002326436, "grad_norm": 4.138970375061035, "learning_rate": 1.4880000000000002e-05, "loss": 1.3621, "step": 744 }, { "epoch": 0.12162768866576874, "grad_norm": 3.9532558917999268, "learning_rate": 1.4900000000000001e-05, "loss": 1.1364, "step": 745 }, { "epoch": 0.12179094730827313, "grad_norm": 3.419613838195801, "learning_rate": 1.4920000000000001e-05, "loss": 1.2141, "step": 746 }, { "epoch": 0.12195420595077752, "grad_norm": 3.6060585975646973, "learning_rate": 1.4940000000000001e-05, "loss": 1.1794, "step": 747 }, { "epoch": 0.1221174645932819, "grad_norm": 3.703245162963867, "learning_rate": 1.496e-05, "loss": 1.143, "step": 748 }, { "epoch": 0.1222807232357863, "grad_norm": 4.130499839782715, "learning_rate": 1.498e-05, "loss": 1.4333, "step": 749 }, { "epoch": 0.12244398187829068, "grad_norm": 4.083974361419678, "learning_rate": 1.5000000000000002e-05, "loss": 1.2917, "step": 750 }, { "epoch": 0.12260724052079507, "grad_norm": 3.663986921310425, "learning_rate": 1.5020000000000002e-05, "loss": 1.1978, "step": 751 }, { "epoch": 0.12277049916329946, "grad_norm": 4.184744358062744, "learning_rate": 1.5040000000000002e-05, "loss": 1.396, "step": 752 }, { "epoch": 0.12293375780580384, "grad_norm": 3.570444107055664, "learning_rate": 1.5060000000000001e-05, "loss": 1.1119, "step": 753 }, { "epoch": 0.12309701644830823, "grad_norm": 3.9097909927368164, "learning_rate": 1.5080000000000001e-05, "loss": 1.3995, "step": 754 }, { "epoch": 0.12326027509081262, "grad_norm": 4.048583984375, "learning_rate": 1.5100000000000001e-05, "loss": 1.3034, "step": 755 }, { "epoch": 0.123423533733317, "grad_norm": 3.79518461227417, "learning_rate": 1.5120000000000001e-05, "loss": 1.2271, "step": 756 }, { "epoch": 0.12358679237582139, "grad_norm": 3.7512080669403076, "learning_rate": 1.514e-05, "loss": 1.3109, "step": 757 }, { "epoch": 0.12375005101832579, "grad_norm": 3.892730236053467, "learning_rate": 1.516e-05, "loss": 1.2394, "step": 758 }, { "epoch": 0.12391330966083017, "grad_norm": 3.6238908767700195, "learning_rate": 1.5180000000000002e-05, "loss": 1.1686, "step": 759 }, { "epoch": 0.12407656830333456, "grad_norm": 3.663632869720459, "learning_rate": 1.5200000000000002e-05, "loss": 1.183, "step": 760 }, { "epoch": 0.12423982694583895, "grad_norm": 3.898149251937866, "learning_rate": 1.5220000000000002e-05, "loss": 1.2858, "step": 761 }, { "epoch": 0.12440308558834333, "grad_norm": 4.052553653717041, "learning_rate": 1.5240000000000001e-05, "loss": 1.1968, "step": 762 }, { "epoch": 0.12456634423084773, "grad_norm": 3.6441586017608643, "learning_rate": 1.5260000000000003e-05, "loss": 1.1395, "step": 763 }, { "epoch": 0.12472960287335211, "grad_norm": 3.5650839805603027, "learning_rate": 1.5280000000000003e-05, "loss": 1.0342, "step": 764 }, { "epoch": 0.12489286151585649, "grad_norm": 4.067885398864746, "learning_rate": 1.5300000000000003e-05, "loss": 1.465, "step": 765 }, { "epoch": 0.12505612015836087, "grad_norm": 4.047980785369873, "learning_rate": 1.5320000000000002e-05, "loss": 1.1669, "step": 766 }, { "epoch": 0.12521937880086528, "grad_norm": 3.8675081729888916, "learning_rate": 1.5340000000000002e-05, "loss": 1.1628, "step": 767 }, { "epoch": 0.12538263744336967, "grad_norm": 3.7997727394104004, "learning_rate": 1.5360000000000002e-05, "loss": 1.0492, "step": 768 }, { "epoch": 0.12554589608587405, "grad_norm": 4.03950309753418, "learning_rate": 1.5380000000000002e-05, "loss": 1.3983, "step": 769 }, { "epoch": 0.12570915472837843, "grad_norm": 3.4088656902313232, "learning_rate": 1.54e-05, "loss": 1.1545, "step": 770 }, { "epoch": 0.1258724133708828, "grad_norm": 3.864832639694214, "learning_rate": 1.542e-05, "loss": 1.3185, "step": 771 }, { "epoch": 0.1260356720133872, "grad_norm": 3.6551194190979004, "learning_rate": 1.544e-05, "loss": 1.2523, "step": 772 }, { "epoch": 0.1261989306558916, "grad_norm": 3.7685720920562744, "learning_rate": 1.546e-05, "loss": 1.2182, "step": 773 }, { "epoch": 0.126362189298396, "grad_norm": 3.330340623855591, "learning_rate": 1.548e-05, "loss": 1.1278, "step": 774 }, { "epoch": 0.12652544794090037, "grad_norm": 3.638216733932495, "learning_rate": 1.55e-05, "loss": 1.2359, "step": 775 }, { "epoch": 0.12668870658340475, "grad_norm": 3.5397541522979736, "learning_rate": 1.552e-05, "loss": 1.2524, "step": 776 }, { "epoch": 0.12685196522590914, "grad_norm": 3.4309122562408447, "learning_rate": 1.554e-05, "loss": 1.0289, "step": 777 }, { "epoch": 0.12701522386841355, "grad_norm": 4.0704851150512695, "learning_rate": 1.556e-05, "loss": 1.2226, "step": 778 }, { "epoch": 0.12717848251091793, "grad_norm": 3.7996082305908203, "learning_rate": 1.5580000000000003e-05, "loss": 1.1791, "step": 779 }, { "epoch": 0.1273417411534223, "grad_norm": 3.9137752056121826, "learning_rate": 1.5600000000000003e-05, "loss": 1.2188, "step": 780 }, { "epoch": 0.1275049997959267, "grad_norm": 4.0853166580200195, "learning_rate": 1.5620000000000003e-05, "loss": 1.3726, "step": 781 }, { "epoch": 0.12766825843843108, "grad_norm": 3.8568532466888428, "learning_rate": 1.5640000000000003e-05, "loss": 1.252, "step": 782 }, { "epoch": 0.12783151708093546, "grad_norm": 3.6168577671051025, "learning_rate": 1.5660000000000003e-05, "loss": 1.0652, "step": 783 }, { "epoch": 0.12799477572343987, "grad_norm": 4.017434120178223, "learning_rate": 1.5680000000000002e-05, "loss": 1.4678, "step": 784 }, { "epoch": 0.12815803436594425, "grad_norm": 3.7978549003601074, "learning_rate": 1.5700000000000002e-05, "loss": 1.2402, "step": 785 }, { "epoch": 0.12832129300844863, "grad_norm": 3.404134511947632, "learning_rate": 1.5720000000000002e-05, "loss": 1.1033, "step": 786 }, { "epoch": 0.12848455165095302, "grad_norm": 3.659363269805908, "learning_rate": 1.5740000000000002e-05, "loss": 1.3949, "step": 787 }, { "epoch": 0.1286478102934574, "grad_norm": 3.3192381858825684, "learning_rate": 1.576e-05, "loss": 1.0241, "step": 788 }, { "epoch": 0.1288110689359618, "grad_norm": 3.8511464595794678, "learning_rate": 1.578e-05, "loss": 1.3496, "step": 789 }, { "epoch": 0.1289743275784662, "grad_norm": 3.7339038848876953, "learning_rate": 1.58e-05, "loss": 1.2122, "step": 790 }, { "epoch": 0.12913758622097057, "grad_norm": 3.826043128967285, "learning_rate": 1.582e-05, "loss": 1.1365, "step": 791 }, { "epoch": 0.12930084486347496, "grad_norm": 3.8255510330200195, "learning_rate": 1.584e-05, "loss": 1.2386, "step": 792 }, { "epoch": 0.12946410350597934, "grad_norm": 3.5173110961914062, "learning_rate": 1.586e-05, "loss": 1.0919, "step": 793 }, { "epoch": 0.12962736214848375, "grad_norm": 3.6960577964782715, "learning_rate": 1.588e-05, "loss": 1.1693, "step": 794 }, { "epoch": 0.12979062079098813, "grad_norm": 3.743501663208008, "learning_rate": 1.5900000000000004e-05, "loss": 1.1948, "step": 795 }, { "epoch": 0.12995387943349251, "grad_norm": 3.81778883934021, "learning_rate": 1.5920000000000003e-05, "loss": 1.0807, "step": 796 }, { "epoch": 0.1301171380759969, "grad_norm": 3.9969635009765625, "learning_rate": 1.5940000000000003e-05, "loss": 1.2485, "step": 797 }, { "epoch": 0.13028039671850128, "grad_norm": 3.664074659347534, "learning_rate": 1.5960000000000003e-05, "loss": 1.0922, "step": 798 }, { "epoch": 0.13044365536100566, "grad_norm": 3.5609540939331055, "learning_rate": 1.5980000000000003e-05, "loss": 1.0286, "step": 799 }, { "epoch": 0.13060691400351007, "grad_norm": 4.071698188781738, "learning_rate": 1.6000000000000003e-05, "loss": 1.1795, "step": 800 }, { "epoch": 0.13077017264601445, "grad_norm": 4.198808670043945, "learning_rate": 1.6020000000000002e-05, "loss": 1.2676, "step": 801 }, { "epoch": 0.13093343128851884, "grad_norm": 3.7646617889404297, "learning_rate": 1.6040000000000002e-05, "loss": 1.1916, "step": 802 }, { "epoch": 0.13109668993102322, "grad_norm": 3.841550588607788, "learning_rate": 1.6060000000000002e-05, "loss": 1.2862, "step": 803 }, { "epoch": 0.1312599485735276, "grad_norm": 3.924927234649658, "learning_rate": 1.6080000000000002e-05, "loss": 1.3464, "step": 804 }, { "epoch": 0.131423207216032, "grad_norm": 3.939681053161621, "learning_rate": 1.6100000000000002e-05, "loss": 1.0901, "step": 805 }, { "epoch": 0.1315864658585364, "grad_norm": 4.230499267578125, "learning_rate": 1.612e-05, "loss": 1.2139, "step": 806 }, { "epoch": 0.13174972450104078, "grad_norm": 3.7999072074890137, "learning_rate": 1.614e-05, "loss": 1.2726, "step": 807 }, { "epoch": 0.13191298314354516, "grad_norm": 4.263776779174805, "learning_rate": 1.616e-05, "loss": 1.2494, "step": 808 }, { "epoch": 0.13207624178604954, "grad_norm": 3.8950114250183105, "learning_rate": 1.618e-05, "loss": 1.1266, "step": 809 }, { "epoch": 0.13223950042855392, "grad_norm": 3.8111355304718018, "learning_rate": 1.62e-05, "loss": 1.2928, "step": 810 }, { "epoch": 0.13240275907105833, "grad_norm": 3.659991979598999, "learning_rate": 1.6220000000000004e-05, "loss": 0.9983, "step": 811 }, { "epoch": 0.13256601771356272, "grad_norm": 4.087343692779541, "learning_rate": 1.6240000000000004e-05, "loss": 1.278, "step": 812 }, { "epoch": 0.1327292763560671, "grad_norm": 3.7438080310821533, "learning_rate": 1.626e-05, "loss": 1.1756, "step": 813 }, { "epoch": 0.13289253499857148, "grad_norm": 3.9519829750061035, "learning_rate": 1.628e-05, "loss": 1.2883, "step": 814 }, { "epoch": 0.13305579364107586, "grad_norm": 3.9763050079345703, "learning_rate": 1.63e-05, "loss": 1.4693, "step": 815 }, { "epoch": 0.13321905228358027, "grad_norm": 3.6088294982910156, "learning_rate": 1.632e-05, "loss": 1.1471, "step": 816 }, { "epoch": 0.13338231092608466, "grad_norm": 3.677157402038574, "learning_rate": 1.634e-05, "loss": 1.3543, "step": 817 }, { "epoch": 0.13354556956858904, "grad_norm": 4.075869560241699, "learning_rate": 1.636e-05, "loss": 1.3574, "step": 818 }, { "epoch": 0.13370882821109342, "grad_norm": 3.9465348720550537, "learning_rate": 1.638e-05, "loss": 1.1904, "step": 819 }, { "epoch": 0.1338720868535978, "grad_norm": 3.808359146118164, "learning_rate": 1.64e-05, "loss": 1.1209, "step": 820 }, { "epoch": 0.1340353454961022, "grad_norm": 4.820913314819336, "learning_rate": 1.6420000000000002e-05, "loss": 1.4835, "step": 821 }, { "epoch": 0.1341986041386066, "grad_norm": 4.289326190948486, "learning_rate": 1.6440000000000002e-05, "loss": 1.2277, "step": 822 }, { "epoch": 0.13436186278111098, "grad_norm": 3.574338912963867, "learning_rate": 1.646e-05, "loss": 1.2076, "step": 823 }, { "epoch": 0.13452512142361536, "grad_norm": 4.029977798461914, "learning_rate": 1.648e-05, "loss": 1.3888, "step": 824 }, { "epoch": 0.13468838006611975, "grad_norm": 3.4248008728027344, "learning_rate": 1.65e-05, "loss": 1.033, "step": 825 }, { "epoch": 0.13485163870862413, "grad_norm": 3.66709566116333, "learning_rate": 1.652e-05, "loss": 1.1163, "step": 826 }, { "epoch": 0.13501489735112854, "grad_norm": 3.720078468322754, "learning_rate": 1.654e-05, "loss": 1.2202, "step": 827 }, { "epoch": 0.13517815599363292, "grad_norm": 3.7879581451416016, "learning_rate": 1.656e-05, "loss": 1.1852, "step": 828 }, { "epoch": 0.1353414146361373, "grad_norm": 3.6591570377349854, "learning_rate": 1.658e-05, "loss": 1.0053, "step": 829 }, { "epoch": 0.13550467327864169, "grad_norm": 4.020118713378906, "learning_rate": 1.66e-05, "loss": 1.159, "step": 830 }, { "epoch": 0.13566793192114607, "grad_norm": 3.6668365001678467, "learning_rate": 1.662e-05, "loss": 1.274, "step": 831 }, { "epoch": 0.13583119056365045, "grad_norm": 3.87104868888855, "learning_rate": 1.664e-05, "loss": 1.2261, "step": 832 }, { "epoch": 0.13599444920615486, "grad_norm": 3.9240522384643555, "learning_rate": 1.666e-05, "loss": 1.2148, "step": 833 }, { "epoch": 0.13615770784865924, "grad_norm": 3.868813991546631, "learning_rate": 1.668e-05, "loss": 1.0511, "step": 834 }, { "epoch": 0.13632096649116363, "grad_norm": 4.203418731689453, "learning_rate": 1.67e-05, "loss": 1.4627, "step": 835 }, { "epoch": 0.136484225133668, "grad_norm": 4.290571212768555, "learning_rate": 1.672e-05, "loss": 1.4764, "step": 836 }, { "epoch": 0.1366474837761724, "grad_norm": 3.7100119590759277, "learning_rate": 1.6740000000000002e-05, "loss": 1.1596, "step": 837 }, { "epoch": 0.1368107424186768, "grad_norm": 3.552501916885376, "learning_rate": 1.6760000000000002e-05, "loss": 1.0548, "step": 838 }, { "epoch": 0.13697400106118118, "grad_norm": 3.754293203353882, "learning_rate": 1.6780000000000002e-05, "loss": 1.3714, "step": 839 }, { "epoch": 0.13713725970368557, "grad_norm": 3.873776912689209, "learning_rate": 1.6800000000000002e-05, "loss": 1.2058, "step": 840 }, { "epoch": 0.13730051834618995, "grad_norm": 3.6082277297973633, "learning_rate": 1.682e-05, "loss": 1.3255, "step": 841 }, { "epoch": 0.13746377698869433, "grad_norm": 3.747361898422241, "learning_rate": 1.684e-05, "loss": 1.1594, "step": 842 }, { "epoch": 0.1376270356311987, "grad_norm": 3.8969223499298096, "learning_rate": 1.686e-05, "loss": 1.2424, "step": 843 }, { "epoch": 0.13779029427370312, "grad_norm": 3.3347508907318115, "learning_rate": 1.688e-05, "loss": 1.015, "step": 844 }, { "epoch": 0.1379535529162075, "grad_norm": 3.617504835128784, "learning_rate": 1.69e-05, "loss": 1.1462, "step": 845 }, { "epoch": 0.1381168115587119, "grad_norm": 3.9567179679870605, "learning_rate": 1.692e-05, "loss": 1.3475, "step": 846 }, { "epoch": 0.13828007020121627, "grad_norm": 3.780381441116333, "learning_rate": 1.694e-05, "loss": 1.3378, "step": 847 }, { "epoch": 0.13844332884372065, "grad_norm": 4.151875972747803, "learning_rate": 1.696e-05, "loss": 1.3703, "step": 848 }, { "epoch": 0.13860658748622506, "grad_norm": 4.070801258087158, "learning_rate": 1.698e-05, "loss": 1.2843, "step": 849 }, { "epoch": 0.13876984612872945, "grad_norm": 3.715493679046631, "learning_rate": 1.7e-05, "loss": 1.0906, "step": 850 }, { "epoch": 0.13893310477123383, "grad_norm": 4.3163652420043945, "learning_rate": 1.702e-05, "loss": 1.415, "step": 851 }, { "epoch": 0.1390963634137382, "grad_norm": 3.9290943145751953, "learning_rate": 1.704e-05, "loss": 1.2624, "step": 852 }, { "epoch": 0.1392596220562426, "grad_norm": 3.7763426303863525, "learning_rate": 1.7060000000000003e-05, "loss": 1.1863, "step": 853 }, { "epoch": 0.139422880698747, "grad_norm": 4.634256839752197, "learning_rate": 1.7080000000000002e-05, "loss": 1.2481, "step": 854 }, { "epoch": 0.1395861393412514, "grad_norm": 3.8699991703033447, "learning_rate": 1.7100000000000002e-05, "loss": 1.2436, "step": 855 }, { "epoch": 0.13974939798375577, "grad_norm": 4.563699245452881, "learning_rate": 1.7120000000000002e-05, "loss": 1.4215, "step": 856 }, { "epoch": 0.13991265662626015, "grad_norm": 3.5618703365325928, "learning_rate": 1.7140000000000002e-05, "loss": 1.1663, "step": 857 }, { "epoch": 0.14007591526876453, "grad_norm": 3.614856004714966, "learning_rate": 1.7160000000000002e-05, "loss": 1.1788, "step": 858 }, { "epoch": 0.14023917391126892, "grad_norm": 3.689190149307251, "learning_rate": 1.718e-05, "loss": 1.3295, "step": 859 }, { "epoch": 0.14040243255377333, "grad_norm": 3.915456533432007, "learning_rate": 1.72e-05, "loss": 1.226, "step": 860 }, { "epoch": 0.1405656911962777, "grad_norm": 4.233343601226807, "learning_rate": 1.722e-05, "loss": 1.5542, "step": 861 }, { "epoch": 0.1407289498387821, "grad_norm": 4.061744213104248, "learning_rate": 1.724e-05, "loss": 1.3334, "step": 862 }, { "epoch": 0.14089220848128647, "grad_norm": 4.035317420959473, "learning_rate": 1.726e-05, "loss": 1.3401, "step": 863 }, { "epoch": 0.14105546712379086, "grad_norm": 3.687224864959717, "learning_rate": 1.728e-05, "loss": 1.3287, "step": 864 }, { "epoch": 0.14121872576629527, "grad_norm": 3.6284921169281006, "learning_rate": 1.73e-05, "loss": 1.4113, "step": 865 }, { "epoch": 0.14138198440879965, "grad_norm": 3.7581064701080322, "learning_rate": 1.732e-05, "loss": 1.2903, "step": 866 }, { "epoch": 0.14154524305130403, "grad_norm": 3.8146145343780518, "learning_rate": 1.734e-05, "loss": 1.3696, "step": 867 }, { "epoch": 0.14170850169380841, "grad_norm": 3.73210072517395, "learning_rate": 1.736e-05, "loss": 1.2091, "step": 868 }, { "epoch": 0.1418717603363128, "grad_norm": 4.134634494781494, "learning_rate": 1.7380000000000003e-05, "loss": 1.3704, "step": 869 }, { "epoch": 0.14203501897881718, "grad_norm": 3.6351566314697266, "learning_rate": 1.7400000000000003e-05, "loss": 1.1427, "step": 870 }, { "epoch": 0.1421982776213216, "grad_norm": 4.12448787689209, "learning_rate": 1.7420000000000003e-05, "loss": 1.0195, "step": 871 }, { "epoch": 0.14236153626382597, "grad_norm": 4.047516822814941, "learning_rate": 1.7440000000000002e-05, "loss": 1.0498, "step": 872 }, { "epoch": 0.14252479490633035, "grad_norm": 3.708630323410034, "learning_rate": 1.7460000000000002e-05, "loss": 1.0731, "step": 873 }, { "epoch": 0.14268805354883474, "grad_norm": 4.0086846351623535, "learning_rate": 1.7480000000000002e-05, "loss": 1.2123, "step": 874 }, { "epoch": 0.14285131219133912, "grad_norm": 3.838542938232422, "learning_rate": 1.7500000000000002e-05, "loss": 1.1763, "step": 875 }, { "epoch": 0.14301457083384353, "grad_norm": 3.737403392791748, "learning_rate": 1.752e-05, "loss": 1.1892, "step": 876 }, { "epoch": 0.1431778294763479, "grad_norm": 3.978010416030884, "learning_rate": 1.754e-05, "loss": 1.1558, "step": 877 }, { "epoch": 0.1433410881188523, "grad_norm": 3.889146089553833, "learning_rate": 1.756e-05, "loss": 1.1061, "step": 878 }, { "epoch": 0.14350434676135668, "grad_norm": 3.925863027572632, "learning_rate": 1.758e-05, "loss": 1.1968, "step": 879 }, { "epoch": 0.14366760540386106, "grad_norm": 4.2519989013671875, "learning_rate": 1.76e-05, "loss": 1.1441, "step": 880 }, { "epoch": 0.14383086404636544, "grad_norm": 4.344394207000732, "learning_rate": 1.762e-05, "loss": 1.1888, "step": 881 }, { "epoch": 0.14399412268886985, "grad_norm": 4.243584632873535, "learning_rate": 1.764e-05, "loss": 1.4847, "step": 882 }, { "epoch": 0.14415738133137423, "grad_norm": 3.811123847961426, "learning_rate": 1.766e-05, "loss": 1.1493, "step": 883 }, { "epoch": 0.14432063997387862, "grad_norm": 3.676389694213867, "learning_rate": 1.768e-05, "loss": 1.222, "step": 884 }, { "epoch": 0.144483898616383, "grad_norm": 4.011000156402588, "learning_rate": 1.77e-05, "loss": 1.4624, "step": 885 }, { "epoch": 0.14464715725888738, "grad_norm": 3.6981019973754883, "learning_rate": 1.7720000000000003e-05, "loss": 1.2749, "step": 886 }, { "epoch": 0.1448104159013918, "grad_norm": 3.6729278564453125, "learning_rate": 1.7740000000000003e-05, "loss": 1.0577, "step": 887 }, { "epoch": 0.14497367454389617, "grad_norm": 3.4469451904296875, "learning_rate": 1.7760000000000003e-05, "loss": 1.0797, "step": 888 }, { "epoch": 0.14513693318640056, "grad_norm": 4.000832557678223, "learning_rate": 1.7780000000000003e-05, "loss": 1.3949, "step": 889 }, { "epoch": 0.14530019182890494, "grad_norm": 3.5042126178741455, "learning_rate": 1.7800000000000002e-05, "loss": 1.0036, "step": 890 }, { "epoch": 0.14546345047140932, "grad_norm": 4.32045841217041, "learning_rate": 1.7820000000000002e-05, "loss": 1.2361, "step": 891 }, { "epoch": 0.1456267091139137, "grad_norm": 3.56363582611084, "learning_rate": 1.7840000000000002e-05, "loss": 1.1309, "step": 892 }, { "epoch": 0.14578996775641812, "grad_norm": 3.9022343158721924, "learning_rate": 1.7860000000000002e-05, "loss": 1.1408, "step": 893 }, { "epoch": 0.1459532263989225, "grad_norm": 4.2048797607421875, "learning_rate": 1.788e-05, "loss": 1.2413, "step": 894 }, { "epoch": 0.14611648504142688, "grad_norm": 4.125240802764893, "learning_rate": 1.79e-05, "loss": 1.1383, "step": 895 }, { "epoch": 0.14627974368393126, "grad_norm": 4.036040306091309, "learning_rate": 1.792e-05, "loss": 1.1707, "step": 896 }, { "epoch": 0.14644300232643565, "grad_norm": 3.7547965049743652, "learning_rate": 1.794e-05, "loss": 1.1468, "step": 897 }, { "epoch": 0.14660626096894006, "grad_norm": 3.9575328826904297, "learning_rate": 1.796e-05, "loss": 1.3, "step": 898 }, { "epoch": 0.14676951961144444, "grad_norm": 4.257845878601074, "learning_rate": 1.798e-05, "loss": 1.3838, "step": 899 }, { "epoch": 0.14693277825394882, "grad_norm": 3.9772706031799316, "learning_rate": 1.8e-05, "loss": 1.17, "step": 900 }, { "epoch": 0.1470960368964532, "grad_norm": 3.923588275909424, "learning_rate": 1.802e-05, "loss": 1.1624, "step": 901 }, { "epoch": 0.14725929553895759, "grad_norm": 3.5000929832458496, "learning_rate": 1.8040000000000003e-05, "loss": 1.1754, "step": 902 }, { "epoch": 0.14742255418146197, "grad_norm": 3.759549140930176, "learning_rate": 1.8060000000000003e-05, "loss": 1.1846, "step": 903 }, { "epoch": 0.14758581282396638, "grad_norm": 4.107501029968262, "learning_rate": 1.8080000000000003e-05, "loss": 1.2005, "step": 904 }, { "epoch": 0.14774907146647076, "grad_norm": 3.9876441955566406, "learning_rate": 1.8100000000000003e-05, "loss": 1.1263, "step": 905 }, { "epoch": 0.14791233010897514, "grad_norm": 4.332236289978027, "learning_rate": 1.8120000000000003e-05, "loss": 1.4115, "step": 906 }, { "epoch": 0.14807558875147953, "grad_norm": 3.4211266040802, "learning_rate": 1.8140000000000003e-05, "loss": 1.0959, "step": 907 }, { "epoch": 0.1482388473939839, "grad_norm": 3.876922607421875, "learning_rate": 1.8160000000000002e-05, "loss": 1.2126, "step": 908 }, { "epoch": 0.14840210603648832, "grad_norm": 3.6633400917053223, "learning_rate": 1.8180000000000002e-05, "loss": 1.0687, "step": 909 }, { "epoch": 0.1485653646789927, "grad_norm": 3.870004177093506, "learning_rate": 1.8200000000000002e-05, "loss": 1.3674, "step": 910 }, { "epoch": 0.14872862332149708, "grad_norm": 3.8107125759124756, "learning_rate": 1.8220000000000002e-05, "loss": 1.2328, "step": 911 }, { "epoch": 0.14889188196400147, "grad_norm": 3.9480977058410645, "learning_rate": 1.824e-05, "loss": 1.1869, "step": 912 }, { "epoch": 0.14905514060650585, "grad_norm": 4.795711517333984, "learning_rate": 1.826e-05, "loss": 1.3063, "step": 913 }, { "epoch": 0.14921839924901023, "grad_norm": 4.590252876281738, "learning_rate": 1.828e-05, "loss": 2.0215, "step": 914 }, { "epoch": 0.14938165789151464, "grad_norm": 4.430607795715332, "learning_rate": 1.83e-05, "loss": 1.3452, "step": 915 }, { "epoch": 0.14954491653401902, "grad_norm": 3.6174509525299072, "learning_rate": 1.832e-05, "loss": 1.2857, "step": 916 }, { "epoch": 0.1497081751765234, "grad_norm": 3.4804775714874268, "learning_rate": 1.834e-05, "loss": 1.1498, "step": 917 }, { "epoch": 0.1498714338190278, "grad_norm": 3.5771749019622803, "learning_rate": 1.8360000000000004e-05, "loss": 1.2973, "step": 918 }, { "epoch": 0.15003469246153217, "grad_norm": 3.7805728912353516, "learning_rate": 1.8380000000000004e-05, "loss": 1.3774, "step": 919 }, { "epoch": 0.15019795110403658, "grad_norm": 3.3416428565979004, "learning_rate": 1.8400000000000003e-05, "loss": 1.0671, "step": 920 }, { "epoch": 0.15036120974654096, "grad_norm": 3.9249918460845947, "learning_rate": 1.8420000000000003e-05, "loss": 1.1925, "step": 921 }, { "epoch": 0.15052446838904535, "grad_norm": 4.056691646575928, "learning_rate": 1.8440000000000003e-05, "loss": 1.3039, "step": 922 }, { "epoch": 0.15068772703154973, "grad_norm": 3.7876267433166504, "learning_rate": 1.8460000000000003e-05, "loss": 1.1843, "step": 923 }, { "epoch": 0.1508509856740541, "grad_norm": 3.922724962234497, "learning_rate": 1.8480000000000003e-05, "loss": 1.1205, "step": 924 }, { "epoch": 0.15101424431655852, "grad_norm": 3.8251254558563232, "learning_rate": 1.8500000000000002e-05, "loss": 1.1489, "step": 925 }, { "epoch": 0.1511775029590629, "grad_norm": 3.7672629356384277, "learning_rate": 1.8520000000000002e-05, "loss": 1.0921, "step": 926 }, { "epoch": 0.1513407616015673, "grad_norm": 3.8812201023101807, "learning_rate": 1.8540000000000002e-05, "loss": 1.1118, "step": 927 }, { "epoch": 0.15150402024407167, "grad_norm": 3.759100914001465, "learning_rate": 1.8560000000000002e-05, "loss": 1.2143, "step": 928 }, { "epoch": 0.15166727888657605, "grad_norm": 4.220949172973633, "learning_rate": 1.858e-05, "loss": 1.2577, "step": 929 }, { "epoch": 0.15183053752908043, "grad_norm": 4.277108669281006, "learning_rate": 1.86e-05, "loss": 1.2196, "step": 930 }, { "epoch": 0.15199379617158484, "grad_norm": 4.0764641761779785, "learning_rate": 1.862e-05, "loss": 1.1111, "step": 931 }, { "epoch": 0.15215705481408923, "grad_norm": 4.003574371337891, "learning_rate": 1.864e-05, "loss": 1.3876, "step": 932 }, { "epoch": 0.1523203134565936, "grad_norm": 3.6234166622161865, "learning_rate": 1.866e-05, "loss": 1.115, "step": 933 }, { "epoch": 0.152483572099098, "grad_norm": 3.967916965484619, "learning_rate": 1.8680000000000004e-05, "loss": 1.1637, "step": 934 }, { "epoch": 0.15264683074160237, "grad_norm": 4.211474895477295, "learning_rate": 1.8700000000000004e-05, "loss": 1.1236, "step": 935 }, { "epoch": 0.15281008938410678, "grad_norm": 4.861668109893799, "learning_rate": 1.8720000000000004e-05, "loss": 2.2841, "step": 936 }, { "epoch": 0.15297334802661117, "grad_norm": 3.9306201934814453, "learning_rate": 1.8740000000000004e-05, "loss": 1.1829, "step": 937 }, { "epoch": 0.15313660666911555, "grad_norm": 3.5618250370025635, "learning_rate": 1.876e-05, "loss": 1.027, "step": 938 }, { "epoch": 0.15329986531161993, "grad_norm": 3.814467668533325, "learning_rate": 1.878e-05, "loss": 1.0078, "step": 939 }, { "epoch": 0.15346312395412431, "grad_norm": 3.914926290512085, "learning_rate": 1.88e-05, "loss": 1.0477, "step": 940 }, { "epoch": 0.1536263825966287, "grad_norm": 3.9278836250305176, "learning_rate": 1.882e-05, "loss": 1.1153, "step": 941 }, { "epoch": 0.1537896412391331, "grad_norm": 4.292795658111572, "learning_rate": 1.884e-05, "loss": 1.1522, "step": 942 }, { "epoch": 0.1539528998816375, "grad_norm": 3.8160579204559326, "learning_rate": 1.886e-05, "loss": 1.1823, "step": 943 }, { "epoch": 0.15411615852414187, "grad_norm": 3.824455976486206, "learning_rate": 1.8880000000000002e-05, "loss": 1.1632, "step": 944 }, { "epoch": 0.15427941716664625, "grad_norm": 3.7333240509033203, "learning_rate": 1.8900000000000002e-05, "loss": 1.1225, "step": 945 }, { "epoch": 0.15444267580915064, "grad_norm": 3.8102588653564453, "learning_rate": 1.8920000000000002e-05, "loss": 1.0404, "step": 946 }, { "epoch": 0.15460593445165505, "grad_norm": 3.918807029724121, "learning_rate": 1.894e-05, "loss": 1.1434, "step": 947 }, { "epoch": 0.15476919309415943, "grad_norm": 3.489577531814575, "learning_rate": 1.896e-05, "loss": 0.9941, "step": 948 }, { "epoch": 0.1549324517366638, "grad_norm": 3.6847262382507324, "learning_rate": 1.898e-05, "loss": 1.0679, "step": 949 }, { "epoch": 0.1550957103791682, "grad_norm": 3.1666769981384277, "learning_rate": 1.9e-05, "loss": 0.904, "step": 950 }, { "epoch": 0.15525896902167258, "grad_norm": 3.731367349624634, "learning_rate": 1.902e-05, "loss": 1.0726, "step": 951 }, { "epoch": 0.15542222766417696, "grad_norm": 3.5811705589294434, "learning_rate": 1.904e-05, "loss": 1.1277, "step": 952 }, { "epoch": 0.15558548630668137, "grad_norm": 4.096112251281738, "learning_rate": 1.906e-05, "loss": 1.1827, "step": 953 }, { "epoch": 0.15574874494918575, "grad_norm": 4.119441032409668, "learning_rate": 1.908e-05, "loss": 1.1073, "step": 954 }, { "epoch": 0.15591200359169013, "grad_norm": 4.685227394104004, "learning_rate": 1.91e-05, "loss": 1.3024, "step": 955 }, { "epoch": 0.15607526223419452, "grad_norm": 4.348023891448975, "learning_rate": 1.912e-05, "loss": 1.3805, "step": 956 }, { "epoch": 0.1562385208766989, "grad_norm": 4.026956081390381, "learning_rate": 1.914e-05, "loss": 1.2217, "step": 957 }, { "epoch": 0.1564017795192033, "grad_norm": 3.675999641418457, "learning_rate": 1.916e-05, "loss": 0.96, "step": 958 }, { "epoch": 0.1565650381617077, "grad_norm": 4.2027997970581055, "learning_rate": 1.918e-05, "loss": 1.2736, "step": 959 }, { "epoch": 0.15672829680421207, "grad_norm": 3.852231979370117, "learning_rate": 1.9200000000000003e-05, "loss": 1.107, "step": 960 }, { "epoch": 0.15689155544671646, "grad_norm": 4.69041109085083, "learning_rate": 1.9220000000000002e-05, "loss": 1.3339, "step": 961 }, { "epoch": 0.15705481408922084, "grad_norm": 3.849813461303711, "learning_rate": 1.9240000000000002e-05, "loss": 1.3248, "step": 962 }, { "epoch": 0.15721807273172522, "grad_norm": 4.089381694793701, "learning_rate": 1.9260000000000002e-05, "loss": 1.2028, "step": 963 }, { "epoch": 0.15738133137422963, "grad_norm": 3.4601364135742188, "learning_rate": 1.9280000000000002e-05, "loss": 1.0437, "step": 964 }, { "epoch": 0.15754459001673402, "grad_norm": 3.673841714859009, "learning_rate": 1.93e-05, "loss": 1.2247, "step": 965 }, { "epoch": 0.1577078486592384, "grad_norm": 4.057040214538574, "learning_rate": 1.932e-05, "loss": 1.3313, "step": 966 }, { "epoch": 0.15787110730174278, "grad_norm": 3.757402181625366, "learning_rate": 1.934e-05, "loss": 1.1067, "step": 967 }, { "epoch": 0.15803436594424716, "grad_norm": 3.6236236095428467, "learning_rate": 1.936e-05, "loss": 1.201, "step": 968 }, { "epoch": 0.15819762458675157, "grad_norm": 3.4774343967437744, "learning_rate": 1.938e-05, "loss": 1.0681, "step": 969 }, { "epoch": 0.15836088322925596, "grad_norm": 4.088637351989746, "learning_rate": 1.94e-05, "loss": 1.1262, "step": 970 }, { "epoch": 0.15852414187176034, "grad_norm": 3.912189483642578, "learning_rate": 1.942e-05, "loss": 1.2106, "step": 971 }, { "epoch": 0.15868740051426472, "grad_norm": 3.6915788650512695, "learning_rate": 1.944e-05, "loss": 1.0596, "step": 972 }, { "epoch": 0.1588506591567691, "grad_norm": 4.518949031829834, "learning_rate": 1.946e-05, "loss": 1.3176, "step": 973 }, { "epoch": 0.15901391779927349, "grad_norm": 4.217410564422607, "learning_rate": 1.948e-05, "loss": 1.0713, "step": 974 }, { "epoch": 0.1591771764417779, "grad_norm": 4.17917013168335, "learning_rate": 1.95e-05, "loss": 1.1682, "step": 975 }, { "epoch": 0.15934043508428228, "grad_norm": 4.340623378753662, "learning_rate": 1.9520000000000003e-05, "loss": 1.1033, "step": 976 }, { "epoch": 0.15950369372678666, "grad_norm": 4.169734001159668, "learning_rate": 1.9540000000000003e-05, "loss": 0.9384, "step": 977 }, { "epoch": 0.15966695236929104, "grad_norm": 4.766945838928223, "learning_rate": 1.9560000000000002e-05, "loss": 1.0337, "step": 978 }, { "epoch": 0.15983021101179543, "grad_norm": 3.9166316986083984, "learning_rate": 1.9580000000000002e-05, "loss": 1.1134, "step": 979 }, { "epoch": 0.15999346965429984, "grad_norm": 3.736837387084961, "learning_rate": 1.9600000000000002e-05, "loss": 0.9516, "step": 980 }, { "epoch": 0.16015672829680422, "grad_norm": 3.862546443939209, "learning_rate": 1.9620000000000002e-05, "loss": 1.1481, "step": 981 }, { "epoch": 0.1603199869393086, "grad_norm": 4.004508018493652, "learning_rate": 1.9640000000000002e-05, "loss": 1.2574, "step": 982 }, { "epoch": 0.16048324558181298, "grad_norm": 3.7218291759490967, "learning_rate": 1.966e-05, "loss": 1.1873, "step": 983 }, { "epoch": 0.16064650422431737, "grad_norm": 3.7299065589904785, "learning_rate": 1.968e-05, "loss": 1.1485, "step": 984 }, { "epoch": 0.16080976286682178, "grad_norm": 3.6156952381134033, "learning_rate": 1.97e-05, "loss": 1.0758, "step": 985 }, { "epoch": 0.16097302150932616, "grad_norm": 3.7243640422821045, "learning_rate": 1.972e-05, "loss": 1.096, "step": 986 }, { "epoch": 0.16113628015183054, "grad_norm": 3.3104841709136963, "learning_rate": 1.974e-05, "loss": 0.9285, "step": 987 }, { "epoch": 0.16129953879433492, "grad_norm": 3.667454242706299, "learning_rate": 1.976e-05, "loss": 1.1551, "step": 988 }, { "epoch": 0.1614627974368393, "grad_norm": 4.257452487945557, "learning_rate": 1.978e-05, "loss": 1.1494, "step": 989 }, { "epoch": 0.1616260560793437, "grad_norm": 3.9013047218322754, "learning_rate": 1.98e-05, "loss": 1.3298, "step": 990 }, { "epoch": 0.1617893147218481, "grad_norm": 4.262466907501221, "learning_rate": 1.982e-05, "loss": 1.2892, "step": 991 }, { "epoch": 0.16195257336435248, "grad_norm": 4.111558437347412, "learning_rate": 1.9840000000000003e-05, "loss": 1.1339, "step": 992 }, { "epoch": 0.16211583200685686, "grad_norm": 4.238582611083984, "learning_rate": 1.9860000000000003e-05, "loss": 1.4772, "step": 993 }, { "epoch": 0.16227909064936125, "grad_norm": 4.243841171264648, "learning_rate": 1.9880000000000003e-05, "loss": 1.3229, "step": 994 }, { "epoch": 0.16244234929186563, "grad_norm": 4.167294979095459, "learning_rate": 1.9900000000000003e-05, "loss": 1.2357, "step": 995 }, { "epoch": 0.16260560793437004, "grad_norm": 3.606184482574463, "learning_rate": 1.9920000000000002e-05, "loss": 1.0757, "step": 996 }, { "epoch": 0.16276886657687442, "grad_norm": 3.99617600440979, "learning_rate": 1.9940000000000002e-05, "loss": 1.0021, "step": 997 }, { "epoch": 0.1629321252193788, "grad_norm": 3.591885566711426, "learning_rate": 1.9960000000000002e-05, "loss": 1.1411, "step": 998 }, { "epoch": 0.1630953838618832, "grad_norm": 3.6760902404785156, "learning_rate": 1.9980000000000002e-05, "loss": 1.1714, "step": 999 }, { "epoch": 0.16325864250438757, "grad_norm": 4.155306339263916, "learning_rate": 2e-05, "loss": 1.254, "step": 1000 }, { "epoch": 0.16342190114689195, "grad_norm": 3.8965601921081543, "learning_rate": 1.999999997944689e-05, "loss": 1.218, "step": 1001 }, { "epoch": 0.16358515978939636, "grad_norm": 4.279705047607422, "learning_rate": 1.9999999917787553e-05, "loss": 1.159, "step": 1002 }, { "epoch": 0.16374841843190074, "grad_norm": 3.7120468616485596, "learning_rate": 1.9999999815021996e-05, "loss": 1.2222, "step": 1003 }, { "epoch": 0.16391167707440513, "grad_norm": 4.154216289520264, "learning_rate": 1.999999967115021e-05, "loss": 1.3602, "step": 1004 }, { "epoch": 0.1640749357169095, "grad_norm": 4.191778182983398, "learning_rate": 1.9999999486172204e-05, "loss": 1.0319, "step": 1005 }, { "epoch": 0.1642381943594139, "grad_norm": 3.712935209274292, "learning_rate": 1.9999999260087977e-05, "loss": 1.1548, "step": 1006 }, { "epoch": 0.1644014530019183, "grad_norm": 3.68768572807312, "learning_rate": 1.999999899289753e-05, "loss": 1.0116, "step": 1007 }, { "epoch": 0.16456471164442268, "grad_norm": 4.10108757019043, "learning_rate": 1.999999868460086e-05, "loss": 0.9569, "step": 1008 }, { "epoch": 0.16472797028692707, "grad_norm": 4.679073810577393, "learning_rate": 1.999999833519797e-05, "loss": 1.2372, "step": 1009 }, { "epoch": 0.16489122892943145, "grad_norm": 4.403948783874512, "learning_rate": 1.9999997944688867e-05, "loss": 1.2546, "step": 1010 }, { "epoch": 0.16505448757193583, "grad_norm": 5.01808500289917, "learning_rate": 1.999999751307355e-05, "loss": 1.3951, "step": 1011 }, { "epoch": 0.16521774621444021, "grad_norm": 3.6671814918518066, "learning_rate": 1.9999997040352013e-05, "loss": 1.0453, "step": 1012 }, { "epoch": 0.16538100485694462, "grad_norm": 3.789060354232788, "learning_rate": 1.999999652652427e-05, "loss": 1.1142, "step": 1013 }, { "epoch": 0.165544263499449, "grad_norm": 3.967205047607422, "learning_rate": 1.9999995971590312e-05, "loss": 1.3549, "step": 1014 }, { "epoch": 0.1657075221419534, "grad_norm": 3.5511927604675293, "learning_rate": 1.999999537555015e-05, "loss": 1.1694, "step": 1015 }, { "epoch": 0.16587078078445777, "grad_norm": 3.6845195293426514, "learning_rate": 1.999999473840378e-05, "loss": 1.24, "step": 1016 }, { "epoch": 0.16603403942696215, "grad_norm": 3.5644140243530273, "learning_rate": 1.999999406015121e-05, "loss": 1.1477, "step": 1017 }, { "epoch": 0.16619729806946656, "grad_norm": 4.035319805145264, "learning_rate": 1.999999334079244e-05, "loss": 1.292, "step": 1018 }, { "epoch": 0.16636055671197095, "grad_norm": 3.790247917175293, "learning_rate": 1.9999992580327473e-05, "loss": 1.1834, "step": 1019 }, { "epoch": 0.16652381535447533, "grad_norm": 3.8441784381866455, "learning_rate": 1.9999991778756312e-05, "loss": 1.2879, "step": 1020 }, { "epoch": 0.1666870739969797, "grad_norm": 3.810910701751709, "learning_rate": 1.999999093607896e-05, "loss": 1.2005, "step": 1021 }, { "epoch": 0.1668503326394841, "grad_norm": 4.220419883728027, "learning_rate": 1.9999990052295425e-05, "loss": 1.3317, "step": 1022 }, { "epoch": 0.16701359128198848, "grad_norm": 3.7651987075805664, "learning_rate": 1.9999989127405703e-05, "loss": 1.0189, "step": 1023 }, { "epoch": 0.1671768499244929, "grad_norm": 3.630316734313965, "learning_rate": 1.9999988161409804e-05, "loss": 1.1447, "step": 1024 }, { "epoch": 0.16734010856699727, "grad_norm": 3.8417868614196777, "learning_rate": 1.999998715430773e-05, "loss": 1.1657, "step": 1025 }, { "epoch": 0.16750336720950165, "grad_norm": 3.7267696857452393, "learning_rate": 1.999998610609948e-05, "loss": 1.2795, "step": 1026 }, { "epoch": 0.16766662585200603, "grad_norm": 3.62048602104187, "learning_rate": 1.9999985016785066e-05, "loss": 1.9011, "step": 1027 }, { "epoch": 0.16782988449451042, "grad_norm": 4.028421878814697, "learning_rate": 1.999998388636449e-05, "loss": 1.3842, "step": 1028 }, { "epoch": 0.16799314313701483, "grad_norm": 3.974581241607666, "learning_rate": 1.9999982714837758e-05, "loss": 1.4111, "step": 1029 }, { "epoch": 0.1681564017795192, "grad_norm": 4.062854766845703, "learning_rate": 1.9999981502204872e-05, "loss": 1.3649, "step": 1030 }, { "epoch": 0.1683196604220236, "grad_norm": 3.8816564083099365, "learning_rate": 1.9999980248465833e-05, "loss": 1.1828, "step": 1031 }, { "epoch": 0.16848291906452798, "grad_norm": 3.6764965057373047, "learning_rate": 1.9999978953620654e-05, "loss": 1.3536, "step": 1032 }, { "epoch": 0.16864617770703236, "grad_norm": 3.281810998916626, "learning_rate": 1.9999977617669343e-05, "loss": 0.978, "step": 1033 }, { "epoch": 0.16880943634953674, "grad_norm": 3.6433820724487305, "learning_rate": 1.9999976240611895e-05, "loss": 1.077, "step": 1034 }, { "epoch": 0.16897269499204115, "grad_norm": 3.213484764099121, "learning_rate": 1.9999974822448318e-05, "loss": 1.0746, "step": 1035 }, { "epoch": 0.16913595363454553, "grad_norm": 3.754077672958374, "learning_rate": 1.9999973363178625e-05, "loss": 1.2898, "step": 1036 }, { "epoch": 0.16929921227704992, "grad_norm": 3.820417881011963, "learning_rate": 1.9999971862802816e-05, "loss": 1.108, "step": 1037 }, { "epoch": 0.1694624709195543, "grad_norm": 3.6963868141174316, "learning_rate": 1.9999970321320898e-05, "loss": 1.0581, "step": 1038 }, { "epoch": 0.16962572956205868, "grad_norm": 4.323473930358887, "learning_rate": 1.9999968738732877e-05, "loss": 1.2625, "step": 1039 }, { "epoch": 0.1697889882045631, "grad_norm": 3.8799030780792236, "learning_rate": 1.9999967115038765e-05, "loss": 1.3517, "step": 1040 }, { "epoch": 0.16995224684706747, "grad_norm": 3.503634452819824, "learning_rate": 1.999996545023856e-05, "loss": 1.0428, "step": 1041 }, { "epoch": 0.17011550548957186, "grad_norm": 3.6011762619018555, "learning_rate": 1.9999963744332273e-05, "loss": 0.9528, "step": 1042 }, { "epoch": 0.17027876413207624, "grad_norm": 3.852891445159912, "learning_rate": 1.999996199731991e-05, "loss": 1.165, "step": 1043 }, { "epoch": 0.17044202277458062, "grad_norm": 4.338034629821777, "learning_rate": 1.9999960209201484e-05, "loss": 1.2209, "step": 1044 }, { "epoch": 0.17060528141708503, "grad_norm": 3.8535375595092773, "learning_rate": 1.9999958379976995e-05, "loss": 1.277, "step": 1045 }, { "epoch": 0.1707685400595894, "grad_norm": 4.535771369934082, "learning_rate": 1.999995650964645e-05, "loss": 1.2289, "step": 1046 }, { "epoch": 0.1709317987020938, "grad_norm": 4.040627956390381, "learning_rate": 1.9999954598209862e-05, "loss": 1.2478, "step": 1047 }, { "epoch": 0.17109505734459818, "grad_norm": 3.6841795444488525, "learning_rate": 1.999995264566724e-05, "loss": 1.0388, "step": 1048 }, { "epoch": 0.17125831598710256, "grad_norm": 3.8419947624206543, "learning_rate": 1.9999950652018585e-05, "loss": 1.2319, "step": 1049 }, { "epoch": 0.17142157462960694, "grad_norm": 3.6403939723968506, "learning_rate": 1.999994861726391e-05, "loss": 1.114, "step": 1050 }, { "epoch": 0.17158483327211135, "grad_norm": 4.119410991668701, "learning_rate": 1.999994654140322e-05, "loss": 1.3749, "step": 1051 }, { "epoch": 0.17174809191461574, "grad_norm": 3.6533443927764893, "learning_rate": 1.9999944424436526e-05, "loss": 1.2102, "step": 1052 }, { "epoch": 0.17191135055712012, "grad_norm": 3.738165855407715, "learning_rate": 1.9999942266363835e-05, "loss": 1.1372, "step": 1053 }, { "epoch": 0.1720746091996245, "grad_norm": 3.504856824874878, "learning_rate": 1.9999940067185163e-05, "loss": 1.0132, "step": 1054 }, { "epoch": 0.17223786784212888, "grad_norm": 3.6137125492095947, "learning_rate": 1.9999937826900508e-05, "loss": 1.1447, "step": 1055 }, { "epoch": 0.1724011264846333, "grad_norm": 4.1140666007995605, "learning_rate": 1.9999935545509886e-05, "loss": 1.2849, "step": 1056 }, { "epoch": 0.17256438512713768, "grad_norm": 3.7917964458465576, "learning_rate": 1.9999933223013307e-05, "loss": 1.0798, "step": 1057 }, { "epoch": 0.17272764376964206, "grad_norm": 3.5479543209075928, "learning_rate": 1.9999930859410775e-05, "loss": 1.1931, "step": 1058 }, { "epoch": 0.17289090241214644, "grad_norm": 4.020903587341309, "learning_rate": 1.9999928454702307e-05, "loss": 1.2672, "step": 1059 }, { "epoch": 0.17305416105465082, "grad_norm": 4.157297134399414, "learning_rate": 1.9999926008887906e-05, "loss": 1.1937, "step": 1060 }, { "epoch": 0.1732174196971552, "grad_norm": 4.086353302001953, "learning_rate": 1.999992352196759e-05, "loss": 1.378, "step": 1061 }, { "epoch": 0.17338067833965962, "grad_norm": 4.150120735168457, "learning_rate": 1.9999920993941355e-05, "loss": 1.3199, "step": 1062 }, { "epoch": 0.173543936982164, "grad_norm": 3.8544602394104004, "learning_rate": 1.999991842480923e-05, "loss": 1.1757, "step": 1063 }, { "epoch": 0.17370719562466838, "grad_norm": 4.17141580581665, "learning_rate": 1.9999915814571208e-05, "loss": 1.0095, "step": 1064 }, { "epoch": 0.17387045426717276, "grad_norm": 3.8557145595550537, "learning_rate": 1.9999913163227315e-05, "loss": 1.2089, "step": 1065 }, { "epoch": 0.17403371290967715, "grad_norm": 3.91540265083313, "learning_rate": 1.9999910470777553e-05, "loss": 1.141, "step": 1066 }, { "epoch": 0.17419697155218156, "grad_norm": 3.5876104831695557, "learning_rate": 1.9999907737221933e-05, "loss": 1.0575, "step": 1067 }, { "epoch": 0.17436023019468594, "grad_norm": 3.6456804275512695, "learning_rate": 1.999990496256047e-05, "loss": 1.0866, "step": 1068 }, { "epoch": 0.17452348883719032, "grad_norm": 4.1336822509765625, "learning_rate": 1.999990214679317e-05, "loss": 1.3165, "step": 1069 }, { "epoch": 0.1746867474796947, "grad_norm": 4.081802845001221, "learning_rate": 1.9999899289920054e-05, "loss": 1.2087, "step": 1070 }, { "epoch": 0.1748500061221991, "grad_norm": 4.003814697265625, "learning_rate": 1.9999896391941125e-05, "loss": 1.3309, "step": 1071 }, { "epoch": 0.17501326476470347, "grad_norm": 3.4531972408294678, "learning_rate": 1.9999893452856396e-05, "loss": 1.0964, "step": 1072 }, { "epoch": 0.17517652340720788, "grad_norm": 3.5624070167541504, "learning_rate": 1.9999890472665883e-05, "loss": 1.0361, "step": 1073 }, { "epoch": 0.17533978204971226, "grad_norm": 3.8657515048980713, "learning_rate": 1.9999887451369596e-05, "loss": 1.1186, "step": 1074 }, { "epoch": 0.17550304069221664, "grad_norm": 3.879141092300415, "learning_rate": 1.999988438896755e-05, "loss": 1.144, "step": 1075 }, { "epoch": 0.17566629933472103, "grad_norm": 3.713594675064087, "learning_rate": 1.999988128545975e-05, "loss": 1.1934, "step": 1076 }, { "epoch": 0.1758295579772254, "grad_norm": 3.6927762031555176, "learning_rate": 1.999987814084622e-05, "loss": 1.1862, "step": 1077 }, { "epoch": 0.17599281661972982, "grad_norm": 3.4299778938293457, "learning_rate": 1.9999874955126962e-05, "loss": 0.8903, "step": 1078 }, { "epoch": 0.1761560752622342, "grad_norm": 4.024533748626709, "learning_rate": 1.9999871728301997e-05, "loss": 1.19, "step": 1079 }, { "epoch": 0.17631933390473858, "grad_norm": 3.9158546924591064, "learning_rate": 1.9999868460371332e-05, "loss": 1.1778, "step": 1080 }, { "epoch": 0.17648259254724297, "grad_norm": 3.8022055625915527, "learning_rate": 1.9999865151334985e-05, "loss": 1.0493, "step": 1081 }, { "epoch": 0.17664585118974735, "grad_norm": 3.575397253036499, "learning_rate": 1.999986180119297e-05, "loss": 1.1082, "step": 1082 }, { "epoch": 0.17680910983225173, "grad_norm": 4.072778701782227, "learning_rate": 1.9999858409945298e-05, "loss": 1.3475, "step": 1083 }, { "epoch": 0.17697236847475614, "grad_norm": 3.904151201248169, "learning_rate": 1.9999854977591982e-05, "loss": 1.2238, "step": 1084 }, { "epoch": 0.17713562711726052, "grad_norm": 3.676718235015869, "learning_rate": 1.999985150413304e-05, "loss": 0.9229, "step": 1085 }, { "epoch": 0.1772988857597649, "grad_norm": 3.512343645095825, "learning_rate": 1.9999847989568483e-05, "loss": 1.1036, "step": 1086 }, { "epoch": 0.1774621444022693, "grad_norm": 3.886535882949829, "learning_rate": 1.9999844433898328e-05, "loss": 1.4112, "step": 1087 }, { "epoch": 0.17762540304477367, "grad_norm": 3.612736701965332, "learning_rate": 1.9999840837122584e-05, "loss": 1.1478, "step": 1088 }, { "epoch": 0.17778866168727808, "grad_norm": 3.6950337886810303, "learning_rate": 1.9999837199241278e-05, "loss": 1.1515, "step": 1089 }, { "epoch": 0.17795192032978246, "grad_norm": 3.983963966369629, "learning_rate": 1.9999833520254414e-05, "loss": 1.1459, "step": 1090 }, { "epoch": 0.17811517897228685, "grad_norm": 4.313369274139404, "learning_rate": 1.999982980016201e-05, "loss": 1.3877, "step": 1091 }, { "epoch": 0.17827843761479123, "grad_norm": 3.3993306159973145, "learning_rate": 1.999982603896408e-05, "loss": 1.0583, "step": 1092 }, { "epoch": 0.1784416962572956, "grad_norm": 4.070005416870117, "learning_rate": 1.999982223666064e-05, "loss": 1.3233, "step": 1093 }, { "epoch": 0.1786049548998, "grad_norm": 3.610058307647705, "learning_rate": 1.9999818393251712e-05, "loss": 1.1191, "step": 1094 }, { "epoch": 0.1787682135423044, "grad_norm": 3.6807632446289062, "learning_rate": 1.99998145087373e-05, "loss": 1.2496, "step": 1095 }, { "epoch": 0.1789314721848088, "grad_norm": 3.7396693229675293, "learning_rate": 1.9999810583117432e-05, "loss": 1.1244, "step": 1096 }, { "epoch": 0.17909473082731317, "grad_norm": 3.959650993347168, "learning_rate": 1.9999806616392118e-05, "loss": 1.1946, "step": 1097 }, { "epoch": 0.17925798946981755, "grad_norm": 3.4226460456848145, "learning_rate": 1.999980260856137e-05, "loss": 1.0835, "step": 1098 }, { "epoch": 0.17942124811232193, "grad_norm": 3.879526376724243, "learning_rate": 1.9999798559625218e-05, "loss": 1.2908, "step": 1099 }, { "epoch": 0.17958450675482635, "grad_norm": 3.5882880687713623, "learning_rate": 1.9999794469583663e-05, "loss": 1.0664, "step": 1100 }, { "epoch": 0.17974776539733073, "grad_norm": 3.915165901184082, "learning_rate": 1.999979033843673e-05, "loss": 1.206, "step": 1101 }, { "epoch": 0.1799110240398351, "grad_norm": 4.233125686645508, "learning_rate": 1.9999786166184435e-05, "loss": 1.2871, "step": 1102 }, { "epoch": 0.1800742826823395, "grad_norm": 3.761413335800171, "learning_rate": 1.9999781952826796e-05, "loss": 0.9716, "step": 1103 }, { "epoch": 0.18023754132484388, "grad_norm": 3.9536406993865967, "learning_rate": 1.9999777698363827e-05, "loss": 1.0315, "step": 1104 }, { "epoch": 0.18040079996734826, "grad_norm": 3.8864707946777344, "learning_rate": 1.9999773402795548e-05, "loss": 1.3266, "step": 1105 }, { "epoch": 0.18056405860985267, "grad_norm": 4.021396160125732, "learning_rate": 1.999976906612198e-05, "loss": 1.3374, "step": 1106 }, { "epoch": 0.18072731725235705, "grad_norm": 4.487703800201416, "learning_rate": 1.9999764688343133e-05, "loss": 1.3569, "step": 1107 }, { "epoch": 0.18089057589486143, "grad_norm": 3.9328181743621826, "learning_rate": 1.999976026945903e-05, "loss": 1.1306, "step": 1108 }, { "epoch": 0.18105383453736582, "grad_norm": 3.837209463119507, "learning_rate": 1.999975580946969e-05, "loss": 1.1928, "step": 1109 }, { "epoch": 0.1812170931798702, "grad_norm": 3.9234437942504883, "learning_rate": 1.9999751308375132e-05, "loss": 1.3194, "step": 1110 }, { "epoch": 0.1813803518223746, "grad_norm": 3.5399293899536133, "learning_rate": 1.999974676617537e-05, "loss": 1.0813, "step": 1111 }, { "epoch": 0.181543610464879, "grad_norm": 3.453911542892456, "learning_rate": 1.999974218287042e-05, "loss": 1.1437, "step": 1112 }, { "epoch": 0.18170686910738337, "grad_norm": 3.407263994216919, "learning_rate": 1.999973755846031e-05, "loss": 1.1446, "step": 1113 }, { "epoch": 0.18187012774988776, "grad_norm": 3.537630558013916, "learning_rate": 1.9999732892945055e-05, "loss": 1.1598, "step": 1114 }, { "epoch": 0.18203338639239214, "grad_norm": 3.3879623413085938, "learning_rate": 1.9999728186324674e-05, "loss": 1.0762, "step": 1115 }, { "epoch": 0.18219664503489655, "grad_norm": 3.894986867904663, "learning_rate": 1.9999723438599187e-05, "loss": 1.1441, "step": 1116 }, { "epoch": 0.18235990367740093, "grad_norm": 3.4810829162597656, "learning_rate": 1.999971864976861e-05, "loss": 1.2472, "step": 1117 }, { "epoch": 0.1825231623199053, "grad_norm": 3.7500522136688232, "learning_rate": 1.9999713819832968e-05, "loss": 1.228, "step": 1118 }, { "epoch": 0.1826864209624097, "grad_norm": 3.7930593490600586, "learning_rate": 1.9999708948792276e-05, "loss": 0.9056, "step": 1119 }, { "epoch": 0.18284967960491408, "grad_norm": 3.8684451580047607, "learning_rate": 1.999970403664656e-05, "loss": 1.3687, "step": 1120 }, { "epoch": 0.18301293824741846, "grad_norm": 3.9690134525299072, "learning_rate": 1.9999699083395833e-05, "loss": 1.2311, "step": 1121 }, { "epoch": 0.18317619688992287, "grad_norm": 3.8110127449035645, "learning_rate": 1.9999694089040118e-05, "loss": 1.0709, "step": 1122 }, { "epoch": 0.18333945553242725, "grad_norm": 4.331480503082275, "learning_rate": 1.999968905357944e-05, "loss": 1.3309, "step": 1123 }, { "epoch": 0.18350271417493164, "grad_norm": 4.160747528076172, "learning_rate": 1.9999683977013813e-05, "loss": 1.1097, "step": 1124 }, { "epoch": 0.18366597281743602, "grad_norm": 4.092085361480713, "learning_rate": 1.9999678859343264e-05, "loss": 1.0968, "step": 1125 }, { "epoch": 0.1838292314599404, "grad_norm": 4.077855587005615, "learning_rate": 1.999967370056781e-05, "loss": 1.0877, "step": 1126 }, { "epoch": 0.1839924901024448, "grad_norm": 4.213071823120117, "learning_rate": 1.9999668500687474e-05, "loss": 1.2951, "step": 1127 }, { "epoch": 0.1841557487449492, "grad_norm": 3.9730944633483887, "learning_rate": 1.9999663259702273e-05, "loss": 1.2948, "step": 1128 }, { "epoch": 0.18431900738745358, "grad_norm": 3.495021104812622, "learning_rate": 1.9999657977612234e-05, "loss": 1.063, "step": 1129 }, { "epoch": 0.18448226602995796, "grad_norm": 3.6751708984375, "learning_rate": 1.999965265441738e-05, "loss": 1.053, "step": 1130 }, { "epoch": 0.18464552467246234, "grad_norm": 4.09637975692749, "learning_rate": 1.9999647290117725e-05, "loss": 1.3122, "step": 1131 }, { "epoch": 0.18480878331496672, "grad_norm": 3.1480352878570557, "learning_rate": 1.99996418847133e-05, "loss": 0.9658, "step": 1132 }, { "epoch": 0.18497204195747113, "grad_norm": 3.6982812881469727, "learning_rate": 1.999963643820412e-05, "loss": 1.2376, "step": 1133 }, { "epoch": 0.18513530059997552, "grad_norm": 3.481973648071289, "learning_rate": 1.999963095059021e-05, "loss": 1.0817, "step": 1134 }, { "epoch": 0.1852985592424799, "grad_norm": 4.114132881164551, "learning_rate": 1.9999625421871596e-05, "loss": 1.3126, "step": 1135 }, { "epoch": 0.18546181788498428, "grad_norm": 3.9998281002044678, "learning_rate": 1.9999619852048296e-05, "loss": 1.173, "step": 1136 }, { "epoch": 0.18562507652748866, "grad_norm": 4.447181701660156, "learning_rate": 1.9999614241120335e-05, "loss": 1.4737, "step": 1137 }, { "epoch": 0.18578833516999307, "grad_norm": 3.8858766555786133, "learning_rate": 1.9999608589087734e-05, "loss": 1.268, "step": 1138 }, { "epoch": 0.18595159381249746, "grad_norm": 3.4623541831970215, "learning_rate": 1.999960289595052e-05, "loss": 1.1164, "step": 1139 }, { "epoch": 0.18611485245500184, "grad_norm": 3.8736419677734375, "learning_rate": 1.9999597161708713e-05, "loss": 1.2028, "step": 1140 }, { "epoch": 0.18627811109750622, "grad_norm": 3.8066883087158203, "learning_rate": 1.999959138636234e-05, "loss": 1.1313, "step": 1141 }, { "epoch": 0.1864413697400106, "grad_norm": 3.6514384746551514, "learning_rate": 1.9999585569911417e-05, "loss": 1.1178, "step": 1142 }, { "epoch": 0.186604628382515, "grad_norm": 3.856887102127075, "learning_rate": 1.999957971235598e-05, "loss": 1.0999, "step": 1143 }, { "epoch": 0.1867678870250194, "grad_norm": 3.681608200073242, "learning_rate": 1.999957381369604e-05, "loss": 1.1071, "step": 1144 }, { "epoch": 0.18693114566752378, "grad_norm": 3.8582510948181152, "learning_rate": 1.9999567873931634e-05, "loss": 1.0738, "step": 1145 }, { "epoch": 0.18709440431002816, "grad_norm": 3.9815726280212402, "learning_rate": 1.999956189306278e-05, "loss": 1.2487, "step": 1146 }, { "epoch": 0.18725766295253254, "grad_norm": 3.7942357063293457, "learning_rate": 1.99995558710895e-05, "loss": 1.1705, "step": 1147 }, { "epoch": 0.18742092159503693, "grad_norm": 3.1515052318573, "learning_rate": 1.999954980801182e-05, "loss": 1.0424, "step": 1148 }, { "epoch": 0.18758418023754134, "grad_norm": 3.6135566234588623, "learning_rate": 1.999954370382977e-05, "loss": 1.1142, "step": 1149 }, { "epoch": 0.18774743888004572, "grad_norm": 3.3218953609466553, "learning_rate": 1.9999537558543373e-05, "loss": 1.1133, "step": 1150 }, { "epoch": 0.1879106975225501, "grad_norm": 3.610884666442871, "learning_rate": 1.999953137215265e-05, "loss": 1.2098, "step": 1151 }, { "epoch": 0.18807395616505448, "grad_norm": 3.5074374675750732, "learning_rate": 1.999952514465763e-05, "loss": 1.1889, "step": 1152 }, { "epoch": 0.18823721480755887, "grad_norm": 3.905378580093384, "learning_rate": 1.9999518876058337e-05, "loss": 1.2216, "step": 1153 }, { "epoch": 0.18840047345006325, "grad_norm": 3.5988683700561523, "learning_rate": 1.9999512566354797e-05, "loss": 1.1438, "step": 1154 }, { "epoch": 0.18856373209256766, "grad_norm": 3.3306169509887695, "learning_rate": 1.999950621554704e-05, "loss": 1.0306, "step": 1155 }, { "epoch": 0.18872699073507204, "grad_norm": 3.6543123722076416, "learning_rate": 1.9999499823635086e-05, "loss": 1.2708, "step": 1156 }, { "epoch": 0.18889024937757642, "grad_norm": 3.53204083442688, "learning_rate": 1.9999493390618966e-05, "loss": 1.1385, "step": 1157 }, { "epoch": 0.1890535080200808, "grad_norm": 3.6310620307922363, "learning_rate": 1.99994869164987e-05, "loss": 1.1558, "step": 1158 }, { "epoch": 0.1892167666625852, "grad_norm": 3.691850423812866, "learning_rate": 1.999948040127432e-05, "loss": 1.229, "step": 1159 }, { "epoch": 0.1893800253050896, "grad_norm": 3.956061363220215, "learning_rate": 1.9999473844945855e-05, "loss": 1.1886, "step": 1160 }, { "epoch": 0.18954328394759398, "grad_norm": 3.50788950920105, "learning_rate": 1.9999467247513327e-05, "loss": 1.1322, "step": 1161 }, { "epoch": 0.18970654259009836, "grad_norm": 3.8030004501342773, "learning_rate": 1.9999460608976768e-05, "loss": 1.0473, "step": 1162 }, { "epoch": 0.18986980123260275, "grad_norm": 3.913618326187134, "learning_rate": 1.99994539293362e-05, "loss": 1.225, "step": 1163 }, { "epoch": 0.19003305987510713, "grad_norm": 4.444163799285889, "learning_rate": 1.999944720859165e-05, "loss": 1.2532, "step": 1164 }, { "epoch": 0.1901963185176115, "grad_norm": 4.202075481414795, "learning_rate": 1.999944044674315e-05, "loss": 1.2115, "step": 1165 }, { "epoch": 0.19035957716011592, "grad_norm": 3.402817964553833, "learning_rate": 1.999943364379073e-05, "loss": 0.8984, "step": 1166 }, { "epoch": 0.1905228358026203, "grad_norm": 4.1686692237854, "learning_rate": 1.999942679973441e-05, "loss": 1.2136, "step": 1167 }, { "epoch": 0.1906860944451247, "grad_norm": 3.202367067337036, "learning_rate": 1.9999419914574222e-05, "loss": 0.9394, "step": 1168 }, { "epoch": 0.19084935308762907, "grad_norm": 3.9087886810302734, "learning_rate": 1.9999412988310192e-05, "loss": 0.9381, "step": 1169 }, { "epoch": 0.19101261173013345, "grad_norm": 4.416986465454102, "learning_rate": 1.9999406020942354e-05, "loss": 1.3121, "step": 1170 }, { "epoch": 0.19117587037263786, "grad_norm": 3.8523476123809814, "learning_rate": 1.9999399012470735e-05, "loss": 1.0667, "step": 1171 }, { "epoch": 0.19133912901514225, "grad_norm": 3.7350690364837646, "learning_rate": 1.999939196289536e-05, "loss": 1.092, "step": 1172 }, { "epoch": 0.19150238765764663, "grad_norm": 3.926711320877075, "learning_rate": 1.999938487221626e-05, "loss": 1.1902, "step": 1173 }, { "epoch": 0.191665646300151, "grad_norm": 4.388312816619873, "learning_rate": 1.999937774043347e-05, "loss": 1.2525, "step": 1174 }, { "epoch": 0.1918289049426554, "grad_norm": 4.237586975097656, "learning_rate": 1.9999370567547008e-05, "loss": 1.1814, "step": 1175 }, { "epoch": 0.1919921635851598, "grad_norm": 3.3772261142730713, "learning_rate": 1.9999363353556908e-05, "loss": 1.0047, "step": 1176 }, { "epoch": 0.19215542222766419, "grad_norm": 3.7421677112579346, "learning_rate": 1.9999356098463206e-05, "loss": 1.2822, "step": 1177 }, { "epoch": 0.19231868087016857, "grad_norm": 4.074155807495117, "learning_rate": 1.9999348802265922e-05, "loss": 1.2806, "step": 1178 }, { "epoch": 0.19248193951267295, "grad_norm": 3.4650423526763916, "learning_rate": 1.9999341464965095e-05, "loss": 1.0732, "step": 1179 }, { "epoch": 0.19264519815517733, "grad_norm": 3.5466480255126953, "learning_rate": 1.999933408656075e-05, "loss": 1.1593, "step": 1180 }, { "epoch": 0.19280845679768172, "grad_norm": 4.03796911239624, "learning_rate": 1.9999326667052918e-05, "loss": 1.3832, "step": 1181 }, { "epoch": 0.19297171544018613, "grad_norm": 3.3028652667999268, "learning_rate": 1.999931920644163e-05, "loss": 1.1511, "step": 1182 }, { "epoch": 0.1931349740826905, "grad_norm": 3.405165433883667, "learning_rate": 1.9999311704726913e-05, "loss": 1.0476, "step": 1183 }, { "epoch": 0.1932982327251949, "grad_norm": 3.6871416568756104, "learning_rate": 1.9999304161908807e-05, "loss": 1.0811, "step": 1184 }, { "epoch": 0.19346149136769927, "grad_norm": 3.464702606201172, "learning_rate": 1.9999296577987334e-05, "loss": 1.0625, "step": 1185 }, { "epoch": 0.19362475001020366, "grad_norm": 3.8513243198394775, "learning_rate": 1.999928895296253e-05, "loss": 1.2802, "step": 1186 }, { "epoch": 0.19378800865270807, "grad_norm": 3.7541861534118652, "learning_rate": 1.999928128683442e-05, "loss": 1.1755, "step": 1187 }, { "epoch": 0.19395126729521245, "grad_norm": 3.9863545894622803, "learning_rate": 1.9999273579603046e-05, "loss": 1.1792, "step": 1188 }, { "epoch": 0.19411452593771683, "grad_norm": 3.9771311283111572, "learning_rate": 1.9999265831268432e-05, "loss": 1.1916, "step": 1189 }, { "epoch": 0.1942777845802212, "grad_norm": 4.01314640045166, "learning_rate": 1.999925804183061e-05, "loss": 1.1794, "step": 1190 }, { "epoch": 0.1944410432227256, "grad_norm": 3.9004099369049072, "learning_rate": 1.9999250211289618e-05, "loss": 0.9665, "step": 1191 }, { "epoch": 0.19460430186522998, "grad_norm": 3.9079854488372803, "learning_rate": 1.999924233964548e-05, "loss": 0.9343, "step": 1192 }, { "epoch": 0.1947675605077344, "grad_norm": 3.994723081588745, "learning_rate": 1.9999234426898232e-05, "loss": 1.1244, "step": 1193 }, { "epoch": 0.19493081915023877, "grad_norm": 3.9841930866241455, "learning_rate": 1.999922647304791e-05, "loss": 1.2557, "step": 1194 }, { "epoch": 0.19509407779274315, "grad_norm": 4.01025915145874, "learning_rate": 1.9999218478094543e-05, "loss": 1.101, "step": 1195 }, { "epoch": 0.19525733643524754, "grad_norm": 4.012704372406006, "learning_rate": 1.9999210442038164e-05, "loss": 0.9978, "step": 1196 }, { "epoch": 0.19542059507775192, "grad_norm": 4.213815689086914, "learning_rate": 1.9999202364878804e-05, "loss": 1.3591, "step": 1197 }, { "epoch": 0.19558385372025633, "grad_norm": 3.4306843280792236, "learning_rate": 1.9999194246616502e-05, "loss": 1.1002, "step": 1198 }, { "epoch": 0.1957471123627607, "grad_norm": 3.704233407974243, "learning_rate": 1.9999186087251287e-05, "loss": 1.2066, "step": 1199 }, { "epoch": 0.1959103710052651, "grad_norm": 3.290311336517334, "learning_rate": 1.9999177886783194e-05, "loss": 1.0962, "step": 1200 }, { "epoch": 0.19607362964776948, "grad_norm": 3.3483123779296875, "learning_rate": 1.9999169645212255e-05, "loss": 1.0094, "step": 1201 }, { "epoch": 0.19623688829027386, "grad_norm": 3.6212995052337646, "learning_rate": 1.9999161362538507e-05, "loss": 1.141, "step": 1202 }, { "epoch": 0.19640014693277824, "grad_norm": 3.4306480884552, "learning_rate": 1.9999153038761984e-05, "loss": 1.1016, "step": 1203 }, { "epoch": 0.19656340557528265, "grad_norm": 3.8279874324798584, "learning_rate": 1.9999144673882713e-05, "loss": 1.3632, "step": 1204 }, { "epoch": 0.19672666421778703, "grad_norm": 3.537473201751709, "learning_rate": 1.9999136267900737e-05, "loss": 1.2652, "step": 1205 }, { "epoch": 0.19688992286029142, "grad_norm": 3.676084041595459, "learning_rate": 1.999912782081609e-05, "loss": 1.0731, "step": 1206 }, { "epoch": 0.1970531815027958, "grad_norm": 3.2136332988739014, "learning_rate": 1.99991193326288e-05, "loss": 1.0307, "step": 1207 }, { "epoch": 0.19721644014530018, "grad_norm": 3.7206923961639404, "learning_rate": 1.999911080333891e-05, "loss": 1.2451, "step": 1208 }, { "epoch": 0.1973796987878046, "grad_norm": 3.9434220790863037, "learning_rate": 1.9999102232946446e-05, "loss": 1.3048, "step": 1209 }, { "epoch": 0.19754295743030897, "grad_norm": 4.196806907653809, "learning_rate": 1.9999093621451452e-05, "loss": 1.8385, "step": 1210 }, { "epoch": 0.19770621607281336, "grad_norm": 4.047806262969971, "learning_rate": 1.9999084968853957e-05, "loss": 1.027, "step": 1211 }, { "epoch": 0.19786947471531774, "grad_norm": 3.5395004749298096, "learning_rate": 1.9999076275154002e-05, "loss": 1.0026, "step": 1212 }, { "epoch": 0.19803273335782212, "grad_norm": 4.151168346405029, "learning_rate": 1.9999067540351617e-05, "loss": 1.2769, "step": 1213 }, { "epoch": 0.1981959920003265, "grad_norm": 3.730494499206543, "learning_rate": 1.9999058764446843e-05, "loss": 1.0741, "step": 1214 }, { "epoch": 0.19835925064283091, "grad_norm": 3.9292824268341064, "learning_rate": 1.9999049947439715e-05, "loss": 1.2347, "step": 1215 }, { "epoch": 0.1985225092853353, "grad_norm": 4.184051036834717, "learning_rate": 1.9999041089330268e-05, "loss": 1.17, "step": 1216 }, { "epoch": 0.19868576792783968, "grad_norm": 3.696296215057373, "learning_rate": 1.9999032190118538e-05, "loss": 1.0289, "step": 1217 }, { "epoch": 0.19884902657034406, "grad_norm": 3.918485164642334, "learning_rate": 1.999902324980456e-05, "loss": 1.2047, "step": 1218 }, { "epoch": 0.19901228521284844, "grad_norm": 3.9426076412200928, "learning_rate": 1.9999014268388377e-05, "loss": 1.1932, "step": 1219 }, { "epoch": 0.19917554385535285, "grad_norm": 3.395935297012329, "learning_rate": 1.999900524587002e-05, "loss": 1.0153, "step": 1220 }, { "epoch": 0.19933880249785724, "grad_norm": 3.428037643432617, "learning_rate": 1.9998996182249525e-05, "loss": 1.0837, "step": 1221 }, { "epoch": 0.19950206114036162, "grad_norm": 3.1557440757751465, "learning_rate": 1.9998987077526937e-05, "loss": 1.0319, "step": 1222 }, { "epoch": 0.199665319782866, "grad_norm": 3.502368450164795, "learning_rate": 1.9998977931702286e-05, "loss": 1.2103, "step": 1223 }, { "epoch": 0.19982857842537038, "grad_norm": 3.1151580810546875, "learning_rate": 1.999896874477561e-05, "loss": 1.0726, "step": 1224 }, { "epoch": 0.19999183706787477, "grad_norm": 3.510155439376831, "learning_rate": 1.9998959516746955e-05, "loss": 0.9914, "step": 1225 }, { "epoch": 0.20015509571037918, "grad_norm": 3.8138229846954346, "learning_rate": 1.9998950247616353e-05, "loss": 1.1231, "step": 1226 }, { "epoch": 0.20031835435288356, "grad_norm": 4.264976978302002, "learning_rate": 1.9998940937383836e-05, "loss": 1.4068, "step": 1227 }, { "epoch": 0.20048161299538794, "grad_norm": 3.8225162029266357, "learning_rate": 1.999893158604945e-05, "loss": 1.0425, "step": 1228 }, { "epoch": 0.20064487163789232, "grad_norm": 3.81278920173645, "learning_rate": 1.9998922193613235e-05, "loss": 1.282, "step": 1229 }, { "epoch": 0.2008081302803967, "grad_norm": 3.5434231758117676, "learning_rate": 1.9998912760075226e-05, "loss": 0.9075, "step": 1230 }, { "epoch": 0.20097138892290112, "grad_norm": 3.5701262950897217, "learning_rate": 1.999890328543546e-05, "loss": 0.9697, "step": 1231 }, { "epoch": 0.2011346475654055, "grad_norm": 3.2877113819122314, "learning_rate": 1.9998893769693977e-05, "loss": 1.0522, "step": 1232 }, { "epoch": 0.20129790620790988, "grad_norm": 4.268685340881348, "learning_rate": 1.999888421285082e-05, "loss": 1.252, "step": 1233 }, { "epoch": 0.20146116485041426, "grad_norm": 3.4514520168304443, "learning_rate": 1.9998874614906026e-05, "loss": 1.1359, "step": 1234 }, { "epoch": 0.20162442349291865, "grad_norm": 4.21030855178833, "learning_rate": 1.9998864975859632e-05, "loss": 1.2804, "step": 1235 }, { "epoch": 0.20178768213542306, "grad_norm": 3.7246079444885254, "learning_rate": 1.999885529571168e-05, "loss": 1.011, "step": 1236 }, { "epoch": 0.20195094077792744, "grad_norm": 3.696511745452881, "learning_rate": 1.9998845574462213e-05, "loss": 1.1187, "step": 1237 }, { "epoch": 0.20211419942043182, "grad_norm": 3.851145029067993, "learning_rate": 1.999883581211126e-05, "loss": 1.1619, "step": 1238 }, { "epoch": 0.2022774580629362, "grad_norm": 3.5965495109558105, "learning_rate": 1.9998826008658872e-05, "loss": 1.0986, "step": 1239 }, { "epoch": 0.2024407167054406, "grad_norm": 3.702144145965576, "learning_rate": 1.9998816164105088e-05, "loss": 0.9664, "step": 1240 }, { "epoch": 0.20260397534794497, "grad_norm": 3.8471791744232178, "learning_rate": 1.9998806278449944e-05, "loss": 1.2791, "step": 1241 }, { "epoch": 0.20276723399044938, "grad_norm": 3.712937593460083, "learning_rate": 1.9998796351693485e-05, "loss": 1.0526, "step": 1242 }, { "epoch": 0.20293049263295376, "grad_norm": 3.440035820007324, "learning_rate": 1.9998786383835745e-05, "loss": 1.1097, "step": 1243 }, { "epoch": 0.20309375127545815, "grad_norm": 3.820525884628296, "learning_rate": 1.9998776374876772e-05, "loss": 1.3038, "step": 1244 }, { "epoch": 0.20325700991796253, "grad_norm": 3.6098687648773193, "learning_rate": 1.9998766324816606e-05, "loss": 0.9756, "step": 1245 }, { "epoch": 0.2034202685604669, "grad_norm": 3.9567532539367676, "learning_rate": 1.999875623365529e-05, "loss": 1.0294, "step": 1246 }, { "epoch": 0.20358352720297132, "grad_norm": 4.214916229248047, "learning_rate": 1.9998746101392857e-05, "loss": 1.1328, "step": 1247 }, { "epoch": 0.2037467858454757, "grad_norm": 3.539686679840088, "learning_rate": 1.999873592802936e-05, "loss": 1.1594, "step": 1248 }, { "epoch": 0.20391004448798009, "grad_norm": 3.9835312366485596, "learning_rate": 1.9998725713564828e-05, "loss": 1.2114, "step": 1249 }, { "epoch": 0.20407330313048447, "grad_norm": 3.1441810131073, "learning_rate": 1.9998715457999313e-05, "loss": 0.8897, "step": 1250 }, { "epoch": 0.20423656177298885, "grad_norm": 3.7001304626464844, "learning_rate": 1.9998705161332857e-05, "loss": 0.9841, "step": 1251 }, { "epoch": 0.20439982041549323, "grad_norm": 3.7143261432647705, "learning_rate": 1.9998694823565495e-05, "loss": 1.0174, "step": 1252 }, { "epoch": 0.20456307905799764, "grad_norm": 3.651423931121826, "learning_rate": 1.9998684444697276e-05, "loss": 1.0145, "step": 1253 }, { "epoch": 0.20472633770050203, "grad_norm": 4.040162563323975, "learning_rate": 1.999867402472824e-05, "loss": 1.294, "step": 1254 }, { "epoch": 0.2048895963430064, "grad_norm": 3.9123873710632324, "learning_rate": 1.9998663563658435e-05, "loss": 1.0277, "step": 1255 }, { "epoch": 0.2050528549855108, "grad_norm": 4.234955787658691, "learning_rate": 1.9998653061487894e-05, "loss": 1.22, "step": 1256 }, { "epoch": 0.20521611362801517, "grad_norm": 4.112478733062744, "learning_rate": 1.9998642518216668e-05, "loss": 1.0501, "step": 1257 }, { "epoch": 0.20537937227051958, "grad_norm": 3.8908684253692627, "learning_rate": 1.9998631933844795e-05, "loss": 1.0631, "step": 1258 }, { "epoch": 0.20554263091302397, "grad_norm": 3.4643754959106445, "learning_rate": 1.9998621308372325e-05, "loss": 0.9442, "step": 1259 }, { "epoch": 0.20570588955552835, "grad_norm": 4.196658134460449, "learning_rate": 1.99986106417993e-05, "loss": 1.3981, "step": 1260 }, { "epoch": 0.20586914819803273, "grad_norm": 3.7873125076293945, "learning_rate": 1.999859993412576e-05, "loss": 1.3173, "step": 1261 }, { "epoch": 0.2060324068405371, "grad_norm": 4.089221477508545, "learning_rate": 1.999858918535175e-05, "loss": 1.2381, "step": 1262 }, { "epoch": 0.2061956654830415, "grad_norm": 3.8548457622528076, "learning_rate": 1.9998578395477317e-05, "loss": 1.3781, "step": 1263 }, { "epoch": 0.2063589241255459, "grad_norm": 3.620670795440674, "learning_rate": 1.99985675645025e-05, "loss": 1.0426, "step": 1264 }, { "epoch": 0.2065221827680503, "grad_norm": 3.2808899879455566, "learning_rate": 1.9998556692427352e-05, "loss": 1.0346, "step": 1265 }, { "epoch": 0.20668544141055467, "grad_norm": 3.679600477218628, "learning_rate": 1.999854577925191e-05, "loss": 1.013, "step": 1266 }, { "epoch": 0.20684870005305905, "grad_norm": 3.3496737480163574, "learning_rate": 1.999853482497622e-05, "loss": 1.0156, "step": 1267 }, { "epoch": 0.20701195869556344, "grad_norm": 3.3404040336608887, "learning_rate": 1.9998523829600333e-05, "loss": 1.1024, "step": 1268 }, { "epoch": 0.20717521733806785, "grad_norm": 3.447646379470825, "learning_rate": 1.999851279312429e-05, "loss": 1.0189, "step": 1269 }, { "epoch": 0.20733847598057223, "grad_norm": 3.6653857231140137, "learning_rate": 1.9998501715548132e-05, "loss": 1.328, "step": 1270 }, { "epoch": 0.2075017346230766, "grad_norm": 3.284107208251953, "learning_rate": 1.999849059687191e-05, "loss": 0.9744, "step": 1271 }, { "epoch": 0.207664993265581, "grad_norm": 3.8102939128875732, "learning_rate": 1.999847943709567e-05, "loss": 1.3072, "step": 1272 }, { "epoch": 0.20782825190808538, "grad_norm": 3.5067050457000732, "learning_rate": 1.9998468236219455e-05, "loss": 1.1071, "step": 1273 }, { "epoch": 0.20799151055058976, "grad_norm": 4.151673316955566, "learning_rate": 1.9998456994243314e-05, "loss": 1.2483, "step": 1274 }, { "epoch": 0.20815476919309417, "grad_norm": 3.847637176513672, "learning_rate": 1.9998445711167292e-05, "loss": 1.0831, "step": 1275 }, { "epoch": 0.20831802783559855, "grad_norm": 4.253880977630615, "learning_rate": 1.999843438699143e-05, "loss": 1.2528, "step": 1276 }, { "epoch": 0.20848128647810293, "grad_norm": 3.7791247367858887, "learning_rate": 1.9998423021715788e-05, "loss": 1.1719, "step": 1277 }, { "epoch": 0.20864454512060732, "grad_norm": 4.07753849029541, "learning_rate": 1.9998411615340395e-05, "loss": 1.1259, "step": 1278 }, { "epoch": 0.2088078037631117, "grad_norm": 3.7911934852600098, "learning_rate": 1.9998400167865313e-05, "loss": 1.0666, "step": 1279 }, { "epoch": 0.2089710624056161, "grad_norm": 3.9251601696014404, "learning_rate": 1.9998388679290583e-05, "loss": 1.0137, "step": 1280 }, { "epoch": 0.2091343210481205, "grad_norm": 3.980333089828491, "learning_rate": 1.9998377149616253e-05, "loss": 1.1044, "step": 1281 }, { "epoch": 0.20929757969062487, "grad_norm": 3.7842001914978027, "learning_rate": 1.999836557884237e-05, "loss": 1.0445, "step": 1282 }, { "epoch": 0.20946083833312926, "grad_norm": 3.8284072875976562, "learning_rate": 1.9998353966968977e-05, "loss": 1.185, "step": 1283 }, { "epoch": 0.20962409697563364, "grad_norm": 3.7717325687408447, "learning_rate": 1.999834231399613e-05, "loss": 1.1404, "step": 1284 }, { "epoch": 0.20978735561813802, "grad_norm": 3.6760671138763428, "learning_rate": 1.999833061992387e-05, "loss": 1.1298, "step": 1285 }, { "epoch": 0.20995061426064243, "grad_norm": 3.512875556945801, "learning_rate": 1.9998318884752253e-05, "loss": 1.2021, "step": 1286 }, { "epoch": 0.21011387290314681, "grad_norm": 3.3913378715515137, "learning_rate": 1.999830710848132e-05, "loss": 1.0712, "step": 1287 }, { "epoch": 0.2102771315456512, "grad_norm": 3.3424789905548096, "learning_rate": 1.999829529111112e-05, "loss": 0.9762, "step": 1288 }, { "epoch": 0.21044039018815558, "grad_norm": 3.3971970081329346, "learning_rate": 1.9998283432641707e-05, "loss": 1.0523, "step": 1289 }, { "epoch": 0.21060364883065996, "grad_norm": 3.857379198074341, "learning_rate": 1.9998271533073123e-05, "loss": 1.0904, "step": 1290 }, { "epoch": 0.21076690747316437, "grad_norm": 3.7188336849212646, "learning_rate": 1.999825959240542e-05, "loss": 1.0685, "step": 1291 }, { "epoch": 0.21093016611566875, "grad_norm": 3.5236620903015137, "learning_rate": 1.999824761063865e-05, "loss": 1.1326, "step": 1292 }, { "epoch": 0.21109342475817314, "grad_norm": 3.501167058944702, "learning_rate": 1.9998235587772856e-05, "loss": 1.0263, "step": 1293 }, { "epoch": 0.21125668340067752, "grad_norm": 3.6277377605438232, "learning_rate": 1.9998223523808092e-05, "loss": 1.0221, "step": 1294 }, { "epoch": 0.2114199420431819, "grad_norm": 4.016097545623779, "learning_rate": 1.9998211418744406e-05, "loss": 1.245, "step": 1295 }, { "epoch": 0.2115832006856863, "grad_norm": 3.811779260635376, "learning_rate": 1.9998199272581848e-05, "loss": 1.2267, "step": 1296 }, { "epoch": 0.2117464593281907, "grad_norm": 4.111326694488525, "learning_rate": 1.999818708532047e-05, "loss": 1.0485, "step": 1297 }, { "epoch": 0.21190971797069508, "grad_norm": 3.7204277515411377, "learning_rate": 1.999817485696032e-05, "loss": 0.9816, "step": 1298 }, { "epoch": 0.21207297661319946, "grad_norm": 4.110367774963379, "learning_rate": 1.9998162587501446e-05, "loss": 1.0273, "step": 1299 }, { "epoch": 0.21223623525570384, "grad_norm": 3.925619602203369, "learning_rate": 1.99981502769439e-05, "loss": 1.0216, "step": 1300 }, { "epoch": 0.21239949389820822, "grad_norm": 3.616844892501831, "learning_rate": 1.9998137925287738e-05, "loss": 1.0708, "step": 1301 }, { "epoch": 0.21256275254071264, "grad_norm": 4.050342082977295, "learning_rate": 1.9998125532533e-05, "loss": 1.0727, "step": 1302 }, { "epoch": 0.21272601118321702, "grad_norm": 3.6856746673583984, "learning_rate": 1.999811309867975e-05, "loss": 1.1115, "step": 1303 }, { "epoch": 0.2128892698257214, "grad_norm": 3.6997463703155518, "learning_rate": 1.9998100623728026e-05, "loss": 1.053, "step": 1304 }, { "epoch": 0.21305252846822578, "grad_norm": 3.083793878555298, "learning_rate": 1.9998088107677892e-05, "loss": 0.9319, "step": 1305 }, { "epoch": 0.21321578711073016, "grad_norm": 3.917459011077881, "learning_rate": 1.999807555052939e-05, "loss": 1.1067, "step": 1306 }, { "epoch": 0.21337904575323458, "grad_norm": 3.890510082244873, "learning_rate": 1.9998062952282575e-05, "loss": 1.2438, "step": 1307 }, { "epoch": 0.21354230439573896, "grad_norm": 3.0267202854156494, "learning_rate": 1.9998050312937495e-05, "loss": 0.8488, "step": 1308 }, { "epoch": 0.21370556303824334, "grad_norm": 4.264025688171387, "learning_rate": 1.9998037632494208e-05, "loss": 1.3536, "step": 1309 }, { "epoch": 0.21386882168074772, "grad_norm": 3.7814579010009766, "learning_rate": 1.9998024910952764e-05, "loss": 1.205, "step": 1310 }, { "epoch": 0.2140320803232521, "grad_norm": 3.8422045707702637, "learning_rate": 1.9998012148313216e-05, "loss": 1.101, "step": 1311 }, { "epoch": 0.2141953389657565, "grad_norm": 3.605544090270996, "learning_rate": 1.9997999344575613e-05, "loss": 0.9961, "step": 1312 }, { "epoch": 0.2143585976082609, "grad_norm": 3.747958183288574, "learning_rate": 1.9997986499740012e-05, "loss": 0.9698, "step": 1313 }, { "epoch": 0.21452185625076528, "grad_norm": 3.855470657348633, "learning_rate": 1.999797361380646e-05, "loss": 1.1404, "step": 1314 }, { "epoch": 0.21468511489326966, "grad_norm": 4.323287487030029, "learning_rate": 1.9997960686775016e-05, "loss": 1.1788, "step": 1315 }, { "epoch": 0.21484837353577405, "grad_norm": 3.4689345359802246, "learning_rate": 1.9997947718645733e-05, "loss": 1.0802, "step": 1316 }, { "epoch": 0.21501163217827843, "grad_norm": 3.5066306591033936, "learning_rate": 1.999793470941866e-05, "loss": 1.1281, "step": 1317 }, { "epoch": 0.21517489082078284, "grad_norm": 3.625898599624634, "learning_rate": 1.9997921659093847e-05, "loss": 1.041, "step": 1318 }, { "epoch": 0.21533814946328722, "grad_norm": 3.4749913215637207, "learning_rate": 1.999790856767136e-05, "loss": 1.1782, "step": 1319 }, { "epoch": 0.2155014081057916, "grad_norm": 3.6581132411956787, "learning_rate": 1.9997895435151247e-05, "loss": 1.2049, "step": 1320 }, { "epoch": 0.21566466674829599, "grad_norm": 3.3498289585113525, "learning_rate": 1.999788226153356e-05, "loss": 0.9617, "step": 1321 }, { "epoch": 0.21582792539080037, "grad_norm": 3.9195210933685303, "learning_rate": 1.999786904681835e-05, "loss": 1.1366, "step": 1322 }, { "epoch": 0.21599118403330475, "grad_norm": 3.4218966960906982, "learning_rate": 1.999785579100568e-05, "loss": 0.8894, "step": 1323 }, { "epoch": 0.21615444267580916, "grad_norm": 3.6539080142974854, "learning_rate": 1.9997842494095597e-05, "loss": 1.1017, "step": 1324 }, { "epoch": 0.21631770131831354, "grad_norm": 4.208160877227783, "learning_rate": 1.9997829156088158e-05, "loss": 1.1439, "step": 1325 }, { "epoch": 0.21648095996081793, "grad_norm": 3.5791842937469482, "learning_rate": 1.9997815776983422e-05, "loss": 1.052, "step": 1326 }, { "epoch": 0.2166442186033223, "grad_norm": 3.986501693725586, "learning_rate": 1.9997802356781437e-05, "loss": 1.1064, "step": 1327 }, { "epoch": 0.2168074772458267, "grad_norm": 3.7725982666015625, "learning_rate": 1.9997788895482264e-05, "loss": 1.2151, "step": 1328 }, { "epoch": 0.2169707358883311, "grad_norm": 3.8912768363952637, "learning_rate": 1.9997775393085956e-05, "loss": 1.0237, "step": 1329 }, { "epoch": 0.21713399453083548, "grad_norm": 3.4126288890838623, "learning_rate": 1.9997761849592566e-05, "loss": 1.0905, "step": 1330 }, { "epoch": 0.21729725317333987, "grad_norm": 3.8330602645874023, "learning_rate": 1.9997748265002153e-05, "loss": 1.1611, "step": 1331 }, { "epoch": 0.21746051181584425, "grad_norm": 3.6865501403808594, "learning_rate": 1.9997734639314776e-05, "loss": 1.2343, "step": 1332 }, { "epoch": 0.21762377045834863, "grad_norm": 3.841609239578247, "learning_rate": 1.9997720972530482e-05, "loss": 1.1768, "step": 1333 }, { "epoch": 0.217787029100853, "grad_norm": 4.092486381530762, "learning_rate": 1.9997707264649335e-05, "loss": 2.4911, "step": 1334 }, { "epoch": 0.21795028774335742, "grad_norm": 4.191897392272949, "learning_rate": 1.999769351567139e-05, "loss": 1.3374, "step": 1335 }, { "epoch": 0.2181135463858618, "grad_norm": 4.082784175872803, "learning_rate": 1.9997679725596696e-05, "loss": 1.2147, "step": 1336 }, { "epoch": 0.2182768050283662, "grad_norm": 3.5603275299072266, "learning_rate": 1.999766589442532e-05, "loss": 1.1647, "step": 1337 }, { "epoch": 0.21844006367087057, "grad_norm": 3.4884026050567627, "learning_rate": 1.9997652022157313e-05, "loss": 1.0461, "step": 1338 }, { "epoch": 0.21860332231337495, "grad_norm": 3.402554750442505, "learning_rate": 1.9997638108792733e-05, "loss": 1.2232, "step": 1339 }, { "epoch": 0.21876658095587936, "grad_norm": 3.3881661891937256, "learning_rate": 1.999762415433164e-05, "loss": 1.0604, "step": 1340 }, { "epoch": 0.21892983959838375, "grad_norm": 3.5052809715270996, "learning_rate": 1.9997610158774087e-05, "loss": 1.1715, "step": 1341 }, { "epoch": 0.21909309824088813, "grad_norm": 3.866478204727173, "learning_rate": 1.9997596122120137e-05, "loss": 1.2482, "step": 1342 }, { "epoch": 0.2192563568833925, "grad_norm": 4.025859832763672, "learning_rate": 1.9997582044369843e-05, "loss": 1.1438, "step": 1343 }, { "epoch": 0.2194196155258969, "grad_norm": 3.687797784805298, "learning_rate": 1.999756792552326e-05, "loss": 1.2538, "step": 1344 }, { "epoch": 0.21958287416840128, "grad_norm": 4.001161098480225, "learning_rate": 1.9997553765580458e-05, "loss": 1.3053, "step": 1345 }, { "epoch": 0.2197461328109057, "grad_norm": 3.4303061962127686, "learning_rate": 1.9997539564541482e-05, "loss": 1.0828, "step": 1346 }, { "epoch": 0.21990939145341007, "grad_norm": 3.6473629474639893, "learning_rate": 1.9997525322406396e-05, "loss": 0.9491, "step": 1347 }, { "epoch": 0.22007265009591445, "grad_norm": 3.4672114849090576, "learning_rate": 1.999751103917526e-05, "loss": 0.9576, "step": 1348 }, { "epoch": 0.22023590873841883, "grad_norm": 3.7295010089874268, "learning_rate": 1.999749671484813e-05, "loss": 1.1995, "step": 1349 }, { "epoch": 0.22039916738092322, "grad_norm": 3.525073766708374, "learning_rate": 1.999748234942507e-05, "loss": 1.0248, "step": 1350 }, { "epoch": 0.22056242602342763, "grad_norm": 3.517911911010742, "learning_rate": 1.999746794290613e-05, "loss": 1.0136, "step": 1351 }, { "epoch": 0.220725684665932, "grad_norm": 3.7509918212890625, "learning_rate": 1.9997453495291373e-05, "loss": 1.1127, "step": 1352 }, { "epoch": 0.2208889433084364, "grad_norm": 3.7161545753479004, "learning_rate": 1.9997439006580866e-05, "loss": 1.036, "step": 1353 }, { "epoch": 0.22105220195094077, "grad_norm": 3.8143482208251953, "learning_rate": 1.9997424476774658e-05, "loss": 1.0343, "step": 1354 }, { "epoch": 0.22121546059344516, "grad_norm": 3.976823329925537, "learning_rate": 1.9997409905872813e-05, "loss": 1.1625, "step": 1355 }, { "epoch": 0.22137871923594954, "grad_norm": 3.3153200149536133, "learning_rate": 1.9997395293875393e-05, "loss": 0.8947, "step": 1356 }, { "epoch": 0.22154197787845395, "grad_norm": 4.2335405349731445, "learning_rate": 1.9997380640782452e-05, "loss": 1.2086, "step": 1357 }, { "epoch": 0.22170523652095833, "grad_norm": 3.3761885166168213, "learning_rate": 1.9997365946594057e-05, "loss": 0.8667, "step": 1358 }, { "epoch": 0.22186849516346271, "grad_norm": 3.8541619777679443, "learning_rate": 1.9997351211310267e-05, "loss": 0.972, "step": 1359 }, { "epoch": 0.2220317538059671, "grad_norm": 3.9971182346343994, "learning_rate": 1.9997336434931138e-05, "loss": 1.0551, "step": 1360 }, { "epoch": 0.22219501244847148, "grad_norm": 4.422454833984375, "learning_rate": 1.9997321617456735e-05, "loss": 1.4733, "step": 1361 }, { "epoch": 0.2223582710909759, "grad_norm": 4.3888092041015625, "learning_rate": 1.999730675888712e-05, "loss": 1.22, "step": 1362 }, { "epoch": 0.22252152973348027, "grad_norm": 3.5485382080078125, "learning_rate": 1.999729185922235e-05, "loss": 1.1635, "step": 1363 }, { "epoch": 0.22268478837598465, "grad_norm": 4.515972137451172, "learning_rate": 1.9997276918462484e-05, "loss": 1.2574, "step": 1364 }, { "epoch": 0.22284804701848904, "grad_norm": 3.7270002365112305, "learning_rate": 1.9997261936607592e-05, "loss": 1.0701, "step": 1365 }, { "epoch": 0.22301130566099342, "grad_norm": 3.3745524883270264, "learning_rate": 1.999724691365773e-05, "loss": 0.8966, "step": 1366 }, { "epoch": 0.22317456430349783, "grad_norm": 3.4260525703430176, "learning_rate": 1.9997231849612963e-05, "loss": 1.0908, "step": 1367 }, { "epoch": 0.2233378229460022, "grad_norm": 3.6117405891418457, "learning_rate": 1.9997216744473347e-05, "loss": 1.1038, "step": 1368 }, { "epoch": 0.2235010815885066, "grad_norm": 3.276843547821045, "learning_rate": 1.9997201598238952e-05, "loss": 0.9871, "step": 1369 }, { "epoch": 0.22366434023101098, "grad_norm": 3.295454502105713, "learning_rate": 1.9997186410909832e-05, "loss": 1.0406, "step": 1370 }, { "epoch": 0.22382759887351536, "grad_norm": 3.695645570755005, "learning_rate": 1.999717118248606e-05, "loss": 1.2622, "step": 1371 }, { "epoch": 0.22399085751601974, "grad_norm": 4.063730716705322, "learning_rate": 1.999715591296768e-05, "loss": 1.2102, "step": 1372 }, { "epoch": 0.22415411615852415, "grad_norm": 3.67513370513916, "learning_rate": 1.999714060235478e-05, "loss": 0.9892, "step": 1373 }, { "epoch": 0.22431737480102854, "grad_norm": 3.8431034088134766, "learning_rate": 1.9997125250647404e-05, "loss": 1.1305, "step": 1374 }, { "epoch": 0.22448063344353292, "grad_norm": 3.801236629486084, "learning_rate": 1.999710985784562e-05, "loss": 1.2118, "step": 1375 }, { "epoch": 0.2246438920860373, "grad_norm": 3.6994242668151855, "learning_rate": 1.9997094423949496e-05, "loss": 1.1268, "step": 1376 }, { "epoch": 0.22480715072854168, "grad_norm": 3.813148021697998, "learning_rate": 1.9997078948959087e-05, "loss": 1.1711, "step": 1377 }, { "epoch": 0.2249704093710461, "grad_norm": 3.8272039890289307, "learning_rate": 1.9997063432874464e-05, "loss": 1.2636, "step": 1378 }, { "epoch": 0.22513366801355048, "grad_norm": 3.661424398422241, "learning_rate": 1.999704787569569e-05, "loss": 1.1106, "step": 1379 }, { "epoch": 0.22529692665605486, "grad_norm": 4.024123191833496, "learning_rate": 1.9997032277422822e-05, "loss": 1.114, "step": 1380 }, { "epoch": 0.22546018529855924, "grad_norm": 3.5259010791778564, "learning_rate": 1.9997016638055932e-05, "loss": 0.8528, "step": 1381 }, { "epoch": 0.22562344394106362, "grad_norm": 3.719845771789551, "learning_rate": 1.999700095759508e-05, "loss": 1.138, "step": 1382 }, { "epoch": 0.225786702583568, "grad_norm": 3.5554041862487793, "learning_rate": 1.999698523604033e-05, "loss": 1.1073, "step": 1383 }, { "epoch": 0.22594996122607242, "grad_norm": 3.564838409423828, "learning_rate": 1.9996969473391753e-05, "loss": 0.9849, "step": 1384 }, { "epoch": 0.2261132198685768, "grad_norm": 3.612359046936035, "learning_rate": 1.9996953669649408e-05, "loss": 1.13, "step": 1385 }, { "epoch": 0.22627647851108118, "grad_norm": 3.788851261138916, "learning_rate": 1.999693782481336e-05, "loss": 1.2052, "step": 1386 }, { "epoch": 0.22643973715358556, "grad_norm": 4.185181617736816, "learning_rate": 1.9996921938883678e-05, "loss": 1.2102, "step": 1387 }, { "epoch": 0.22660299579608995, "grad_norm": 3.8340988159179688, "learning_rate": 1.999690601186042e-05, "loss": 1.044, "step": 1388 }, { "epoch": 0.22676625443859436, "grad_norm": 3.8814005851745605, "learning_rate": 1.9996890043743662e-05, "loss": 1.191, "step": 1389 }, { "epoch": 0.22692951308109874, "grad_norm": 3.412876605987549, "learning_rate": 1.9996874034533463e-05, "loss": 1.1281, "step": 1390 }, { "epoch": 0.22709277172360312, "grad_norm": 3.589545726776123, "learning_rate": 1.9996857984229886e-05, "loss": 1.1036, "step": 1391 }, { "epoch": 0.2272560303661075, "grad_norm": 3.9135193824768066, "learning_rate": 1.9996841892833e-05, "loss": 1.9947, "step": 1392 }, { "epoch": 0.22741928900861189, "grad_norm": 4.1146721839904785, "learning_rate": 1.9996825760342876e-05, "loss": 1.1738, "step": 1393 }, { "epoch": 0.22758254765111627, "grad_norm": 3.368621826171875, "learning_rate": 1.9996809586759573e-05, "loss": 1.0248, "step": 1394 }, { "epoch": 0.22774580629362068, "grad_norm": 3.5844039916992188, "learning_rate": 1.999679337208316e-05, "loss": 1.0791, "step": 1395 }, { "epoch": 0.22790906493612506, "grad_norm": 3.3905088901519775, "learning_rate": 1.9996777116313706e-05, "loss": 1.0276, "step": 1396 }, { "epoch": 0.22807232357862944, "grad_norm": 3.4366793632507324, "learning_rate": 1.9996760819451276e-05, "loss": 0.9546, "step": 1397 }, { "epoch": 0.22823558222113383, "grad_norm": 3.5135257244110107, "learning_rate": 1.9996744481495938e-05, "loss": 1.0471, "step": 1398 }, { "epoch": 0.2283988408636382, "grad_norm": 3.3224241733551025, "learning_rate": 1.9996728102447755e-05, "loss": 0.9032, "step": 1399 }, { "epoch": 0.22856209950614262, "grad_norm": 4.1507344245910645, "learning_rate": 1.99967116823068e-05, "loss": 1.1991, "step": 1400 }, { "epoch": 0.228725358148647, "grad_norm": 3.7529139518737793, "learning_rate": 1.9996695221073137e-05, "loss": 1.1788, "step": 1401 }, { "epoch": 0.22888861679115138, "grad_norm": 3.3764123916625977, "learning_rate": 1.9996678718746834e-05, "loss": 0.8901, "step": 1402 }, { "epoch": 0.22905187543365577, "grad_norm": 3.930738687515259, "learning_rate": 1.999666217532796e-05, "loss": 0.9222, "step": 1403 }, { "epoch": 0.22921513407616015, "grad_norm": 3.588534116744995, "learning_rate": 1.9996645590816584e-05, "loss": 0.956, "step": 1404 }, { "epoch": 0.22937839271866453, "grad_norm": 4.236833572387695, "learning_rate": 1.999662896521277e-05, "loss": 1.1493, "step": 1405 }, { "epoch": 0.22954165136116894, "grad_norm": 3.5582354068756104, "learning_rate": 1.999661229851659e-05, "loss": 1.1229, "step": 1406 }, { "epoch": 0.22970491000367332, "grad_norm": 3.360483169555664, "learning_rate": 1.9996595590728113e-05, "loss": 0.9488, "step": 1407 }, { "epoch": 0.2298681686461777, "grad_norm": 3.7751517295837402, "learning_rate": 1.9996578841847406e-05, "loss": 1.3068, "step": 1408 }, { "epoch": 0.2300314272886821, "grad_norm": 3.5819709300994873, "learning_rate": 1.9996562051874538e-05, "loss": 0.9952, "step": 1409 }, { "epoch": 0.23019468593118647, "grad_norm": 3.7043917179107666, "learning_rate": 1.9996545220809577e-05, "loss": 1.0741, "step": 1410 }, { "epoch": 0.23035794457369088, "grad_norm": 3.4909839630126953, "learning_rate": 1.9996528348652592e-05, "loss": 1.0555, "step": 1411 }, { "epoch": 0.23052120321619526, "grad_norm": 3.5567760467529297, "learning_rate": 1.9996511435403657e-05, "loss": 0.9941, "step": 1412 }, { "epoch": 0.23068446185869965, "grad_norm": 3.895538330078125, "learning_rate": 1.9996494481062833e-05, "loss": 1.0662, "step": 1413 }, { "epoch": 0.23084772050120403, "grad_norm": 3.758373498916626, "learning_rate": 1.9996477485630197e-05, "loss": 1.2071, "step": 1414 }, { "epoch": 0.2310109791437084, "grad_norm": 3.172443151473999, "learning_rate": 1.9996460449105818e-05, "loss": 1.0431, "step": 1415 }, { "epoch": 0.2311742377862128, "grad_norm": 3.619213819503784, "learning_rate": 1.9996443371489763e-05, "loss": 0.9501, "step": 1416 }, { "epoch": 0.2313374964287172, "grad_norm": 3.546781063079834, "learning_rate": 1.9996426252782107e-05, "loss": 1.486, "step": 1417 }, { "epoch": 0.2315007550712216, "grad_norm": 3.527642011642456, "learning_rate": 1.9996409092982915e-05, "loss": 1.1875, "step": 1418 }, { "epoch": 0.23166401371372597, "grad_norm": 3.8493127822875977, "learning_rate": 1.999639189209226e-05, "loss": 1.4831, "step": 1419 }, { "epoch": 0.23182727235623035, "grad_norm": 3.556709051132202, "learning_rate": 1.9996374650110214e-05, "loss": 1.0232, "step": 1420 }, { "epoch": 0.23199053099873473, "grad_norm": 3.585869312286377, "learning_rate": 1.9996357367036845e-05, "loss": 1.0865, "step": 1421 }, { "epoch": 0.23215378964123914, "grad_norm": 3.6303844451904297, "learning_rate": 1.9996340042872223e-05, "loss": 1.0761, "step": 1422 }, { "epoch": 0.23231704828374353, "grad_norm": 3.9723498821258545, "learning_rate": 1.9996322677616424e-05, "loss": 1.2945, "step": 1423 }, { "epoch": 0.2324803069262479, "grad_norm": 3.6244401931762695, "learning_rate": 1.999630527126952e-05, "loss": 1.1919, "step": 1424 }, { "epoch": 0.2326435655687523, "grad_norm": 3.753955364227295, "learning_rate": 1.9996287823831574e-05, "loss": 1.0965, "step": 1425 }, { "epoch": 0.23280682421125667, "grad_norm": 4.478501796722412, "learning_rate": 1.9996270335302666e-05, "loss": 1.2159, "step": 1426 }, { "epoch": 0.23297008285376108, "grad_norm": 3.7764692306518555, "learning_rate": 1.9996252805682865e-05, "loss": 1.168, "step": 1427 }, { "epoch": 0.23313334149626547, "grad_norm": 3.9066083431243896, "learning_rate": 1.9996235234972245e-05, "loss": 1.0399, "step": 1428 }, { "epoch": 0.23329660013876985, "grad_norm": 3.790604829788208, "learning_rate": 1.9996217623170874e-05, "loss": 1.1215, "step": 1429 }, { "epoch": 0.23345985878127423, "grad_norm": 3.721609592437744, "learning_rate": 1.9996199970278827e-05, "loss": 1.0844, "step": 1430 }, { "epoch": 0.23362311742377861, "grad_norm": 4.2610673904418945, "learning_rate": 1.999618227629618e-05, "loss": 1.2047, "step": 1431 }, { "epoch": 0.233786376066283, "grad_norm": 3.8017399311065674, "learning_rate": 1.9996164541223e-05, "loss": 1.0372, "step": 1432 }, { "epoch": 0.2339496347087874, "grad_norm": 3.4224531650543213, "learning_rate": 1.9996146765059358e-05, "loss": 1.0114, "step": 1433 }, { "epoch": 0.2341128933512918, "grad_norm": 3.4088268280029297, "learning_rate": 1.9996128947805334e-05, "loss": 1.0558, "step": 1434 }, { "epoch": 0.23427615199379617, "grad_norm": 4.066218852996826, "learning_rate": 1.9996111089461e-05, "loss": 1.0775, "step": 1435 }, { "epoch": 0.23443941063630055, "grad_norm": 3.832329750061035, "learning_rate": 1.9996093190026428e-05, "loss": 1.1265, "step": 1436 }, { "epoch": 0.23460266927880494, "grad_norm": 3.5189719200134277, "learning_rate": 1.999607524950169e-05, "loss": 0.9932, "step": 1437 }, { "epoch": 0.23476592792130935, "grad_norm": 3.7729244232177734, "learning_rate": 1.999605726788686e-05, "loss": 1.1201, "step": 1438 }, { "epoch": 0.23492918656381373, "grad_norm": 3.419494152069092, "learning_rate": 1.9996039245182014e-05, "loss": 1.011, "step": 1439 }, { "epoch": 0.2350924452063181, "grad_norm": 3.1256136894226074, "learning_rate": 1.9996021181387224e-05, "loss": 0.8164, "step": 1440 }, { "epoch": 0.2352557038488225, "grad_norm": 3.77006196975708, "learning_rate": 1.9996003076502567e-05, "loss": 1.1127, "step": 1441 }, { "epoch": 0.23541896249132688, "grad_norm": 3.7921955585479736, "learning_rate": 1.9995984930528115e-05, "loss": 0.8412, "step": 1442 }, { "epoch": 0.23558222113383126, "grad_norm": 3.3543272018432617, "learning_rate": 1.9995966743463945e-05, "loss": 1.0451, "step": 1443 }, { "epoch": 0.23574547977633567, "grad_norm": 3.8356072902679443, "learning_rate": 1.9995948515310126e-05, "loss": 1.3054, "step": 1444 }, { "epoch": 0.23590873841884005, "grad_norm": 4.079472541809082, "learning_rate": 1.9995930246066742e-05, "loss": 1.0138, "step": 1445 }, { "epoch": 0.23607199706134444, "grad_norm": 3.3661162853240967, "learning_rate": 1.999591193573386e-05, "loss": 0.8742, "step": 1446 }, { "epoch": 0.23623525570384882, "grad_norm": 4.197597503662109, "learning_rate": 1.9995893584311557e-05, "loss": 1.2373, "step": 1447 }, { "epoch": 0.2363985143463532, "grad_norm": 3.853841543197632, "learning_rate": 1.9995875191799916e-05, "loss": 1.2571, "step": 1448 }, { "epoch": 0.2365617729888576, "grad_norm": 3.355792760848999, "learning_rate": 1.9995856758199004e-05, "loss": 0.9435, "step": 1449 }, { "epoch": 0.236725031631362, "grad_norm": 3.6207799911499023, "learning_rate": 1.9995838283508897e-05, "loss": 1.2138, "step": 1450 }, { "epoch": 0.23688829027386638, "grad_norm": 3.6884384155273438, "learning_rate": 1.9995819767729675e-05, "loss": 1.1008, "step": 1451 }, { "epoch": 0.23705154891637076, "grad_norm": 3.780456781387329, "learning_rate": 1.999580121086141e-05, "loss": 1.084, "step": 1452 }, { "epoch": 0.23721480755887514, "grad_norm": 3.791825532913208, "learning_rate": 1.999578261290418e-05, "loss": 1.2923, "step": 1453 }, { "epoch": 0.23737806620137952, "grad_norm": 3.676764965057373, "learning_rate": 1.9995763973858064e-05, "loss": 0.9788, "step": 1454 }, { "epoch": 0.23754132484388393, "grad_norm": 3.744307041168213, "learning_rate": 1.999574529372314e-05, "loss": 1.1684, "step": 1455 }, { "epoch": 0.23770458348638832, "grad_norm": 3.659278392791748, "learning_rate": 1.9995726572499476e-05, "loss": 1.053, "step": 1456 }, { "epoch": 0.2378678421288927, "grad_norm": 4.40251350402832, "learning_rate": 1.9995707810187158e-05, "loss": 1.2406, "step": 1457 }, { "epoch": 0.23803110077139708, "grad_norm": 3.2346816062927246, "learning_rate": 1.9995689006786257e-05, "loss": 0.955, "step": 1458 }, { "epoch": 0.23819435941390146, "grad_norm": 3.2859489917755127, "learning_rate": 1.9995670162296853e-05, "loss": 1.9389, "step": 1459 }, { "epoch": 0.23835761805640587, "grad_norm": 3.4288125038146973, "learning_rate": 1.9995651276719025e-05, "loss": 1.0475, "step": 1460 }, { "epoch": 0.23852087669891026, "grad_norm": 3.6394968032836914, "learning_rate": 1.999563235005285e-05, "loss": 1.1895, "step": 1461 }, { "epoch": 0.23868413534141464, "grad_norm": 3.2851290702819824, "learning_rate": 1.99956133822984e-05, "loss": 1.0826, "step": 1462 }, { "epoch": 0.23884739398391902, "grad_norm": 3.617403268814087, "learning_rate": 1.999559437345576e-05, "loss": 0.9487, "step": 1463 }, { "epoch": 0.2390106526264234, "grad_norm": 3.484435558319092, "learning_rate": 1.9995575323525007e-05, "loss": 1.0688, "step": 1464 }, { "epoch": 0.23917391126892779, "grad_norm": 3.6466174125671387, "learning_rate": 1.999555623250622e-05, "loss": 1.0122, "step": 1465 }, { "epoch": 0.2393371699114322, "grad_norm": 3.376601219177246, "learning_rate": 1.9995537100399472e-05, "loss": 1.0235, "step": 1466 }, { "epoch": 0.23950042855393658, "grad_norm": 3.549838066101074, "learning_rate": 1.9995517927204844e-05, "loss": 1.1163, "step": 1467 }, { "epoch": 0.23966368719644096, "grad_norm": 3.7066900730133057, "learning_rate": 1.999549871292242e-05, "loss": 1.0213, "step": 1468 }, { "epoch": 0.23982694583894534, "grad_norm": 4.667047500610352, "learning_rate": 1.9995479457552275e-05, "loss": 1.4079, "step": 1469 }, { "epoch": 0.23999020448144973, "grad_norm": 3.411668539047241, "learning_rate": 1.999546016109448e-05, "loss": 1.0443, "step": 1470 }, { "epoch": 0.24015346312395414, "grad_norm": 3.8197381496429443, "learning_rate": 1.9995440823549133e-05, "loss": 1.2014, "step": 1471 }, { "epoch": 0.24031672176645852, "grad_norm": 3.2812650203704834, "learning_rate": 1.99954214449163e-05, "loss": 1.023, "step": 1472 }, { "epoch": 0.2404799804089629, "grad_norm": 4.171722888946533, "learning_rate": 1.9995402025196063e-05, "loss": 1.1455, "step": 1473 }, { "epoch": 0.24064323905146728, "grad_norm": 3.52169132232666, "learning_rate": 1.99953825643885e-05, "loss": 1.0791, "step": 1474 }, { "epoch": 0.24080649769397167, "grad_norm": 3.9306094646453857, "learning_rate": 1.9995363062493695e-05, "loss": 1.1873, "step": 1475 }, { "epoch": 0.24096975633647605, "grad_norm": 3.414422035217285, "learning_rate": 1.9995343519511726e-05, "loss": 0.9471, "step": 1476 }, { "epoch": 0.24113301497898046, "grad_norm": 3.3310296535491943, "learning_rate": 1.9995323935442676e-05, "loss": 1.4945, "step": 1477 }, { "epoch": 0.24129627362148484, "grad_norm": 3.2423899173736572, "learning_rate": 1.9995304310286623e-05, "loss": 0.8992, "step": 1478 }, { "epoch": 0.24145953226398922, "grad_norm": 3.491116523742676, "learning_rate": 1.9995284644043647e-05, "loss": 0.9622, "step": 1479 }, { "epoch": 0.2416227909064936, "grad_norm": 3.683063507080078, "learning_rate": 1.999526493671383e-05, "loss": 1.1561, "step": 1480 }, { "epoch": 0.241786049548998, "grad_norm": 3.6739561557769775, "learning_rate": 1.9995245188297255e-05, "loss": 1.1168, "step": 1481 }, { "epoch": 0.2419493081915024, "grad_norm": 3.1458489894866943, "learning_rate": 1.9995225398793997e-05, "loss": 0.8595, "step": 1482 }, { "epoch": 0.24211256683400678, "grad_norm": 3.5991227626800537, "learning_rate": 1.9995205568204146e-05, "loss": 1.1164, "step": 1483 }, { "epoch": 0.24227582547651116, "grad_norm": 3.912339448928833, "learning_rate": 1.9995185696527776e-05, "loss": 1.1849, "step": 1484 }, { "epoch": 0.24243908411901555, "grad_norm": 3.696345806121826, "learning_rate": 1.9995165783764975e-05, "loss": 1.1482, "step": 1485 }, { "epoch": 0.24260234276151993, "grad_norm": 3.7649569511413574, "learning_rate": 1.9995145829915818e-05, "loss": 1.0788, "step": 1486 }, { "epoch": 0.24276560140402434, "grad_norm": 4.239694595336914, "learning_rate": 1.9995125834980396e-05, "loss": 1.1071, "step": 1487 }, { "epoch": 0.24292886004652872, "grad_norm": 3.499861240386963, "learning_rate": 1.999510579895878e-05, "loss": 0.9494, "step": 1488 }, { "epoch": 0.2430921186890331, "grad_norm": 3.509993553161621, "learning_rate": 1.9995085721851063e-05, "loss": 1.2322, "step": 1489 }, { "epoch": 0.2432553773315375, "grad_norm": 3.5070035457611084, "learning_rate": 1.9995065603657317e-05, "loss": 1.0571, "step": 1490 }, { "epoch": 0.24341863597404187, "grad_norm": 3.7716057300567627, "learning_rate": 1.9995045444377635e-05, "loss": 1.358, "step": 1491 }, { "epoch": 0.24358189461654625, "grad_norm": 3.414506435394287, "learning_rate": 1.9995025244012092e-05, "loss": 0.958, "step": 1492 }, { "epoch": 0.24374515325905066, "grad_norm": 4.333141803741455, "learning_rate": 1.9995005002560775e-05, "loss": 1.1097, "step": 1493 }, { "epoch": 0.24390841190155504, "grad_norm": 4.077729225158691, "learning_rate": 1.999498472002377e-05, "loss": 1.2266, "step": 1494 }, { "epoch": 0.24407167054405943, "grad_norm": 3.690329074859619, "learning_rate": 1.9994964396401152e-05, "loss": 1.0814, "step": 1495 }, { "epoch": 0.2442349291865638, "grad_norm": 3.7531516551971436, "learning_rate": 1.9994944031693013e-05, "loss": 1.1576, "step": 1496 }, { "epoch": 0.2443981878290682, "grad_norm": 3.645915985107422, "learning_rate": 1.999492362589943e-05, "loss": 1.0463, "step": 1497 }, { "epoch": 0.2445614464715726, "grad_norm": 3.684969902038574, "learning_rate": 1.999490317902049e-05, "loss": 0.8919, "step": 1498 }, { "epoch": 0.24472470511407698, "grad_norm": 4.094334602355957, "learning_rate": 1.999488269105628e-05, "loss": 1.1301, "step": 1499 }, { "epoch": 0.24488796375658137, "grad_norm": 3.9700942039489746, "learning_rate": 1.999486216200688e-05, "loss": 1.069, "step": 1500 }, { "epoch": 0.24505122239908575, "grad_norm": 4.00081205368042, "learning_rate": 1.9994841591872376e-05, "loss": 1.1834, "step": 1501 }, { "epoch": 0.24521448104159013, "grad_norm": 4.129907131195068, "learning_rate": 1.999482098065285e-05, "loss": 1.31, "step": 1502 }, { "epoch": 0.24537773968409451, "grad_norm": 4.189781188964844, "learning_rate": 1.9994800328348392e-05, "loss": 1.1175, "step": 1503 }, { "epoch": 0.24554099832659892, "grad_norm": 3.509706974029541, "learning_rate": 1.9994779634959082e-05, "loss": 1.0784, "step": 1504 }, { "epoch": 0.2457042569691033, "grad_norm": 3.6407690048217773, "learning_rate": 1.9994758900485008e-05, "loss": 1.1725, "step": 1505 }, { "epoch": 0.2458675156116077, "grad_norm": 3.2345190048217773, "learning_rate": 1.9994738124926254e-05, "loss": 1.0283, "step": 1506 }, { "epoch": 0.24603077425411207, "grad_norm": 3.5462965965270996, "learning_rate": 1.9994717308282906e-05, "loss": 1.0149, "step": 1507 }, { "epoch": 0.24619403289661645, "grad_norm": 3.68650484085083, "learning_rate": 1.9994696450555045e-05, "loss": 1.1697, "step": 1508 }, { "epoch": 0.24635729153912087, "grad_norm": 3.4225375652313232, "learning_rate": 1.9994675551742766e-05, "loss": 1.1875, "step": 1509 }, { "epoch": 0.24652055018162525, "grad_norm": 3.42767596244812, "learning_rate": 1.9994654611846146e-05, "loss": 1.1172, "step": 1510 }, { "epoch": 0.24668380882412963, "grad_norm": 3.6706488132476807, "learning_rate": 1.9994633630865277e-05, "loss": 0.8712, "step": 1511 }, { "epoch": 0.246847067466634, "grad_norm": 3.5463438034057617, "learning_rate": 1.9994612608800244e-05, "loss": 0.9384, "step": 1512 }, { "epoch": 0.2470103261091384, "grad_norm": 3.5054285526275635, "learning_rate": 1.9994591545651132e-05, "loss": 1.0069, "step": 1513 }, { "epoch": 0.24717358475164278, "grad_norm": 3.0797295570373535, "learning_rate": 1.9994570441418025e-05, "loss": 0.9224, "step": 1514 }, { "epoch": 0.2473368433941472, "grad_norm": 3.738654851913452, "learning_rate": 1.9994549296101014e-05, "loss": 0.9521, "step": 1515 }, { "epoch": 0.24750010203665157, "grad_norm": 3.921811580657959, "learning_rate": 1.9994528109700185e-05, "loss": 0.974, "step": 1516 }, { "epoch": 0.24766336067915595, "grad_norm": 3.798982620239258, "learning_rate": 1.9994506882215626e-05, "loss": 1.1232, "step": 1517 }, { "epoch": 0.24782661932166034, "grad_norm": 3.8544113636016846, "learning_rate": 1.9994485613647424e-05, "loss": 1.1001, "step": 1518 }, { "epoch": 0.24798987796416472, "grad_norm": 3.6061980724334717, "learning_rate": 1.9994464303995664e-05, "loss": 0.8168, "step": 1519 }, { "epoch": 0.24815313660666913, "grad_norm": 3.895242691040039, "learning_rate": 1.9994442953260434e-05, "loss": 0.9381, "step": 1520 }, { "epoch": 0.2483163952491735, "grad_norm": 3.716881513595581, "learning_rate": 1.9994421561441824e-05, "loss": 1.0673, "step": 1521 }, { "epoch": 0.2484796538916779, "grad_norm": 3.8614954948425293, "learning_rate": 1.9994400128539923e-05, "loss": 1.1177, "step": 1522 }, { "epoch": 0.24864291253418228, "grad_norm": 3.5972580909729004, "learning_rate": 1.9994378654554813e-05, "loss": 0.9934, "step": 1523 }, { "epoch": 0.24880617117668666, "grad_norm": 3.328270673751831, "learning_rate": 1.9994357139486586e-05, "loss": 0.8749, "step": 1524 }, { "epoch": 0.24896942981919104, "grad_norm": 4.304625988006592, "learning_rate": 1.9994335583335336e-05, "loss": 1.2839, "step": 1525 }, { "epoch": 0.24913268846169545, "grad_norm": 3.321720838546753, "learning_rate": 1.999431398610114e-05, "loss": 0.9544, "step": 1526 }, { "epoch": 0.24929594710419983, "grad_norm": 3.6213858127593994, "learning_rate": 1.9994292347784095e-05, "loss": 1.028, "step": 1527 }, { "epoch": 0.24945920574670422, "grad_norm": 3.378556251525879, "learning_rate": 1.999427066838429e-05, "loss": 0.9339, "step": 1528 }, { "epoch": 0.2496224643892086, "grad_norm": 3.7450809478759766, "learning_rate": 1.9994248947901812e-05, "loss": 1.2114, "step": 1529 }, { "epoch": 0.24978572303171298, "grad_norm": 3.751370906829834, "learning_rate": 1.9994227186336748e-05, "loss": 1.1944, "step": 1530 }, { "epoch": 0.2499489816742174, "grad_norm": 3.6644442081451416, "learning_rate": 1.999420538368919e-05, "loss": 0.9469, "step": 1531 }, { "epoch": 0.25011224031672175, "grad_norm": 3.8562028408050537, "learning_rate": 1.9994183539959227e-05, "loss": 1.0762, "step": 1532 }, { "epoch": 0.25027549895922613, "grad_norm": 3.844930410385132, "learning_rate": 1.999416165514695e-05, "loss": 1.0814, "step": 1533 }, { "epoch": 0.25043875760173057, "grad_norm": 3.4380226135253906, "learning_rate": 1.9994139729252452e-05, "loss": 1.033, "step": 1534 }, { "epoch": 0.25060201624423495, "grad_norm": 3.7112045288085938, "learning_rate": 1.9994117762275814e-05, "loss": 1.1379, "step": 1535 }, { "epoch": 0.25076527488673933, "grad_norm": 3.845046043395996, "learning_rate": 1.9994095754217132e-05, "loss": 1.2529, "step": 1536 }, { "epoch": 0.2509285335292437, "grad_norm": 3.6152825355529785, "learning_rate": 1.99940737050765e-05, "loss": 0.9565, "step": 1537 }, { "epoch": 0.2510917921717481, "grad_norm": 4.033668518066406, "learning_rate": 1.9994051614854e-05, "loss": 1.0155, "step": 1538 }, { "epoch": 0.2512550508142525, "grad_norm": 3.36578631401062, "learning_rate": 1.9994029483549732e-05, "loss": 1.1204, "step": 1539 }, { "epoch": 0.25141830945675686, "grad_norm": 3.6289384365081787, "learning_rate": 1.999400731116378e-05, "loss": 1.051, "step": 1540 }, { "epoch": 0.25158156809926124, "grad_norm": 3.77543568611145, "learning_rate": 1.999398509769624e-05, "loss": 0.9494, "step": 1541 }, { "epoch": 0.2517448267417656, "grad_norm": 3.097186803817749, "learning_rate": 1.9993962843147195e-05, "loss": 0.8936, "step": 1542 }, { "epoch": 0.25190808538427, "grad_norm": 3.636993885040283, "learning_rate": 1.9993940547516745e-05, "loss": 0.8883, "step": 1543 }, { "epoch": 0.2520713440267744, "grad_norm": 3.689197063446045, "learning_rate": 1.999391821080498e-05, "loss": 1.0159, "step": 1544 }, { "epoch": 0.25223460266927883, "grad_norm": 4.144704341888428, "learning_rate": 1.9993895833011992e-05, "loss": 1.2241, "step": 1545 }, { "epoch": 0.2523978613117832, "grad_norm": 3.0411341190338135, "learning_rate": 1.9993873414137872e-05, "loss": 0.8881, "step": 1546 }, { "epoch": 0.2525611199542876, "grad_norm": 3.5928776264190674, "learning_rate": 1.9993850954182708e-05, "loss": 0.9798, "step": 1547 }, { "epoch": 0.252724378596792, "grad_norm": 3.9123995304107666, "learning_rate": 1.99938284531466e-05, "loss": 1.1588, "step": 1548 }, { "epoch": 0.25288763723929636, "grad_norm": 3.908637046813965, "learning_rate": 1.9993805911029636e-05, "loss": 1.1518, "step": 1549 }, { "epoch": 0.25305089588180074, "grad_norm": 3.4549968242645264, "learning_rate": 1.999378332783191e-05, "loss": 0.8766, "step": 1550 }, { "epoch": 0.2532141545243051, "grad_norm": 3.620157480239868, "learning_rate": 1.9993760703553513e-05, "loss": 1.093, "step": 1551 }, { "epoch": 0.2533774131668095, "grad_norm": 3.784285545349121, "learning_rate": 1.999373803819454e-05, "loss": 1.098, "step": 1552 }, { "epoch": 0.2535406718093139, "grad_norm": 3.897914171218872, "learning_rate": 1.9993715331755086e-05, "loss": 1.13, "step": 1553 }, { "epoch": 0.25370393045181827, "grad_norm": 3.9836676120758057, "learning_rate": 1.999369258423524e-05, "loss": 2.192, "step": 1554 }, { "epoch": 0.25386718909432265, "grad_norm": 3.92944073677063, "learning_rate": 1.9993669795635096e-05, "loss": 1.1002, "step": 1555 }, { "epoch": 0.2540304477368271, "grad_norm": 3.2756311893463135, "learning_rate": 1.999364696595475e-05, "loss": 0.9899, "step": 1556 }, { "epoch": 0.2541937063793315, "grad_norm": 3.4133498668670654, "learning_rate": 1.9993624095194296e-05, "loss": 1.0549, "step": 1557 }, { "epoch": 0.25435696502183586, "grad_norm": 3.332157850265503, "learning_rate": 1.9993601183353824e-05, "loss": 0.919, "step": 1558 }, { "epoch": 0.25452022366434024, "grad_norm": 3.2927331924438477, "learning_rate": 1.9993578230433434e-05, "loss": 0.8898, "step": 1559 }, { "epoch": 0.2546834823068446, "grad_norm": 3.7923946380615234, "learning_rate": 1.9993555236433216e-05, "loss": 1.2089, "step": 1560 }, { "epoch": 0.254846740949349, "grad_norm": 3.725468158721924, "learning_rate": 1.9993532201353266e-05, "loss": 1.1498, "step": 1561 }, { "epoch": 0.2550099995918534, "grad_norm": 3.2553036212921143, "learning_rate": 1.9993509125193678e-05, "loss": 1.0637, "step": 1562 }, { "epoch": 0.25517325823435777, "grad_norm": 3.1448514461517334, "learning_rate": 1.9993486007954547e-05, "loss": 0.9718, "step": 1563 }, { "epoch": 0.25533651687686215, "grad_norm": 3.6552038192749023, "learning_rate": 1.9993462849635968e-05, "loss": 0.9368, "step": 1564 }, { "epoch": 0.25549977551936653, "grad_norm": 3.5119900703430176, "learning_rate": 1.9993439650238043e-05, "loss": 0.906, "step": 1565 }, { "epoch": 0.2556630341618709, "grad_norm": 3.6190507411956787, "learning_rate": 1.9993416409760856e-05, "loss": 0.9292, "step": 1566 }, { "epoch": 0.25582629280437535, "grad_norm": 3.9329514503479004, "learning_rate": 1.9993393128204506e-05, "loss": 1.2492, "step": 1567 }, { "epoch": 0.25598955144687974, "grad_norm": 4.753753185272217, "learning_rate": 1.999336980556909e-05, "loss": 1.2829, "step": 1568 }, { "epoch": 0.2561528100893841, "grad_norm": 3.608225107192993, "learning_rate": 1.999334644185471e-05, "loss": 1.0828, "step": 1569 }, { "epoch": 0.2563160687318885, "grad_norm": 3.880018949508667, "learning_rate": 1.9993323037061452e-05, "loss": 1.1558, "step": 1570 }, { "epoch": 0.2564793273743929, "grad_norm": 4.295324325561523, "learning_rate": 1.9993299591189418e-05, "loss": 1.3155, "step": 1571 }, { "epoch": 0.25664258601689727, "grad_norm": 3.740574598312378, "learning_rate": 1.99932761042387e-05, "loss": 1.1557, "step": 1572 }, { "epoch": 0.25680584465940165, "grad_norm": 3.6898112297058105, "learning_rate": 1.9993252576209398e-05, "loss": 1.0616, "step": 1573 }, { "epoch": 0.25696910330190603, "grad_norm": 3.5671279430389404, "learning_rate": 1.9993229007101612e-05, "loss": 1.0158, "step": 1574 }, { "epoch": 0.2571323619444104, "grad_norm": 3.548241376876831, "learning_rate": 1.999320539691543e-05, "loss": 1.2369, "step": 1575 }, { "epoch": 0.2572956205869148, "grad_norm": 3.7193443775177, "learning_rate": 1.9993181745650955e-05, "loss": 1.2614, "step": 1576 }, { "epoch": 0.2574588792294192, "grad_norm": 3.4733502864837646, "learning_rate": 1.9993158053308285e-05, "loss": 1.2189, "step": 1577 }, { "epoch": 0.2576221378719236, "grad_norm": 3.3706653118133545, "learning_rate": 1.9993134319887515e-05, "loss": 1.0511, "step": 1578 }, { "epoch": 0.257785396514428, "grad_norm": 3.8293349742889404, "learning_rate": 1.9993110545388744e-05, "loss": 1.3098, "step": 1579 }, { "epoch": 0.2579486551569324, "grad_norm": 3.5235493183135986, "learning_rate": 1.9993086729812066e-05, "loss": 1.189, "step": 1580 }, { "epoch": 0.25811191379943677, "grad_norm": 3.5473549365997314, "learning_rate": 1.9993062873157587e-05, "loss": 1.0123, "step": 1581 }, { "epoch": 0.25827517244194115, "grad_norm": 3.3355257511138916, "learning_rate": 1.9993038975425395e-05, "loss": 1.0848, "step": 1582 }, { "epoch": 0.25843843108444553, "grad_norm": 3.7574431896209717, "learning_rate": 1.9993015036615595e-05, "loss": 1.2029, "step": 1583 }, { "epoch": 0.2586016897269499, "grad_norm": 3.4277780055999756, "learning_rate": 1.9992991056728282e-05, "loss": 1.0441, "step": 1584 }, { "epoch": 0.2587649483694543, "grad_norm": 3.672612428665161, "learning_rate": 1.9992967035763557e-05, "loss": 0.9933, "step": 1585 }, { "epoch": 0.2589282070119587, "grad_norm": 3.598001718521118, "learning_rate": 1.9992942973721518e-05, "loss": 0.9287, "step": 1586 }, { "epoch": 0.25909146565446306, "grad_norm": 3.6224300861358643, "learning_rate": 1.9992918870602265e-05, "loss": 1.057, "step": 1587 }, { "epoch": 0.2592547242969675, "grad_norm": 4.024545669555664, "learning_rate": 1.9992894726405894e-05, "loss": 1.1255, "step": 1588 }, { "epoch": 0.2594179829394719, "grad_norm": 3.717568874359131, "learning_rate": 1.999287054113251e-05, "loss": 1.3099, "step": 1589 }, { "epoch": 0.25958124158197626, "grad_norm": 3.494813919067383, "learning_rate": 1.9992846314782202e-05, "loss": 1.122, "step": 1590 }, { "epoch": 0.25974450022448065, "grad_norm": 3.991065502166748, "learning_rate": 1.9992822047355077e-05, "loss": 1.0832, "step": 1591 }, { "epoch": 0.25990775886698503, "grad_norm": 3.40527606010437, "learning_rate": 1.999279773885124e-05, "loss": 0.9107, "step": 1592 }, { "epoch": 0.2600710175094894, "grad_norm": 3.693756341934204, "learning_rate": 1.9992773389270778e-05, "loss": 1.0067, "step": 1593 }, { "epoch": 0.2602342761519938, "grad_norm": 3.2661337852478027, "learning_rate": 1.99927489986138e-05, "loss": 0.9655, "step": 1594 }, { "epoch": 0.2603975347944982, "grad_norm": 3.372965097427368, "learning_rate": 1.9992724566880403e-05, "loss": 1.0237, "step": 1595 }, { "epoch": 0.26056079343700256, "grad_norm": 4.182784557342529, "learning_rate": 1.9992700094070692e-05, "loss": 1.2702, "step": 1596 }, { "epoch": 0.26072405207950694, "grad_norm": 3.714305877685547, "learning_rate": 1.9992675580184762e-05, "loss": 1.1615, "step": 1597 }, { "epoch": 0.2608873107220113, "grad_norm": 3.8333237171173096, "learning_rate": 1.9992651025222715e-05, "loss": 1.0855, "step": 1598 }, { "epoch": 0.26105056936451576, "grad_norm": 3.7225875854492188, "learning_rate": 1.9992626429184656e-05, "loss": 0.9247, "step": 1599 }, { "epoch": 0.26121382800702014, "grad_norm": 3.5530898571014404, "learning_rate": 1.999260179207068e-05, "loss": 1.0302, "step": 1600 }, { "epoch": 0.2613770866495245, "grad_norm": 3.847618341445923, "learning_rate": 1.9992577113880892e-05, "loss": 1.0492, "step": 1601 }, { "epoch": 0.2615403452920289, "grad_norm": 4.1074700355529785, "learning_rate": 1.999255239461539e-05, "loss": 1.1526, "step": 1602 }, { "epoch": 0.2617036039345333, "grad_norm": 3.8883345127105713, "learning_rate": 1.999252763427428e-05, "loss": 1.1347, "step": 1603 }, { "epoch": 0.2618668625770377, "grad_norm": 3.551682472229004, "learning_rate": 1.9992502832857662e-05, "loss": 1.0732, "step": 1604 }, { "epoch": 0.26203012121954206, "grad_norm": 3.927143096923828, "learning_rate": 1.999247799036564e-05, "loss": 1.1605, "step": 1605 }, { "epoch": 0.26219337986204644, "grad_norm": 3.8016915321350098, "learning_rate": 1.9992453106798312e-05, "loss": 0.998, "step": 1606 }, { "epoch": 0.2623566385045508, "grad_norm": 3.8107802867889404, "learning_rate": 1.9992428182155782e-05, "loss": 1.0039, "step": 1607 }, { "epoch": 0.2625198971470552, "grad_norm": 4.058548450469971, "learning_rate": 1.9992403216438155e-05, "loss": 1.1476, "step": 1608 }, { "epoch": 0.2626831557895596, "grad_norm": 3.9034695625305176, "learning_rate": 1.999237820964553e-05, "loss": 1.068, "step": 1609 }, { "epoch": 0.262846414432064, "grad_norm": 4.065340042114258, "learning_rate": 1.999235316177801e-05, "loss": 1.0875, "step": 1610 }, { "epoch": 0.2630096730745684, "grad_norm": 3.7278432846069336, "learning_rate": 1.99923280728357e-05, "loss": 1.1034, "step": 1611 }, { "epoch": 0.2631729317170728, "grad_norm": 3.8666930198669434, "learning_rate": 1.9992302942818703e-05, "loss": 1.0687, "step": 1612 }, { "epoch": 0.26333619035957717, "grad_norm": 3.760094165802002, "learning_rate": 1.9992277771727126e-05, "loss": 0.9419, "step": 1613 }, { "epoch": 0.26349944900208155, "grad_norm": 3.512927770614624, "learning_rate": 1.999225255956106e-05, "loss": 1.0591, "step": 1614 }, { "epoch": 0.26366270764458594, "grad_norm": 3.704355001449585, "learning_rate": 1.999222730632062e-05, "loss": 1.1037, "step": 1615 }, { "epoch": 0.2638259662870903, "grad_norm": 3.8476004600524902, "learning_rate": 1.9992202012005906e-05, "loss": 1.0259, "step": 1616 }, { "epoch": 0.2639892249295947, "grad_norm": 2.9967844486236572, "learning_rate": 1.999217667661703e-05, "loss": 0.9383, "step": 1617 }, { "epoch": 0.2641524835720991, "grad_norm": 3.0758485794067383, "learning_rate": 1.9992151300154078e-05, "loss": 1.0697, "step": 1618 }, { "epoch": 0.26431574221460347, "grad_norm": 3.876570463180542, "learning_rate": 1.9992125882617167e-05, "loss": 1.0666, "step": 1619 }, { "epoch": 0.26447900085710785, "grad_norm": 3.6859169006347656, "learning_rate": 1.9992100424006406e-05, "loss": 1.189, "step": 1620 }, { "epoch": 0.2646422594996123, "grad_norm": 3.8233582973480225, "learning_rate": 1.9992074924321888e-05, "loss": 1.1414, "step": 1621 }, { "epoch": 0.26480551814211667, "grad_norm": 3.692548990249634, "learning_rate": 1.9992049383563724e-05, "loss": 1.1096, "step": 1622 }, { "epoch": 0.26496877678462105, "grad_norm": 3.8844735622406006, "learning_rate": 1.9992023801732016e-05, "loss": 1.0588, "step": 1623 }, { "epoch": 0.26513203542712543, "grad_norm": 3.9054746627807617, "learning_rate": 1.9991998178826872e-05, "loss": 1.1399, "step": 1624 }, { "epoch": 0.2652952940696298, "grad_norm": 3.582855463027954, "learning_rate": 1.99919725148484e-05, "loss": 1.0719, "step": 1625 }, { "epoch": 0.2654585527121342, "grad_norm": 3.4927942752838135, "learning_rate": 1.9991946809796698e-05, "loss": 1.0908, "step": 1626 }, { "epoch": 0.2656218113546386, "grad_norm": 3.5450401306152344, "learning_rate": 1.9991921063671875e-05, "loss": 1.0195, "step": 1627 }, { "epoch": 0.26578506999714296, "grad_norm": 4.056901931762695, "learning_rate": 1.999189527647404e-05, "loss": 1.1757, "step": 1628 }, { "epoch": 0.26594832863964735, "grad_norm": 4.1204962730407715, "learning_rate": 1.9991869448203296e-05, "loss": 1.2526, "step": 1629 }, { "epoch": 0.26611158728215173, "grad_norm": 3.7016217708587646, "learning_rate": 1.999184357885975e-05, "loss": 1.1968, "step": 1630 }, { "epoch": 0.2662748459246561, "grad_norm": 3.6105916500091553, "learning_rate": 1.9991817668443505e-05, "loss": 1.0221, "step": 1631 }, { "epoch": 0.26643810456716055, "grad_norm": 3.5420050621032715, "learning_rate": 1.9991791716954673e-05, "loss": 0.922, "step": 1632 }, { "epoch": 0.26660136320966493, "grad_norm": 3.5018563270568848, "learning_rate": 1.9991765724393356e-05, "loss": 1.0427, "step": 1633 }, { "epoch": 0.2667646218521693, "grad_norm": 3.521942377090454, "learning_rate": 1.9991739690759665e-05, "loss": 1.141, "step": 1634 }, { "epoch": 0.2669278804946737, "grad_norm": 3.8617587089538574, "learning_rate": 1.9991713616053703e-05, "loss": 1.1379, "step": 1635 }, { "epoch": 0.2670911391371781, "grad_norm": 3.1875290870666504, "learning_rate": 1.999168750027558e-05, "loss": 0.9086, "step": 1636 }, { "epoch": 0.26725439777968246, "grad_norm": 3.248845338821411, "learning_rate": 1.9991661343425402e-05, "loss": 0.759, "step": 1637 }, { "epoch": 0.26741765642218684, "grad_norm": 3.488802909851074, "learning_rate": 1.9991635145503278e-05, "loss": 0.9835, "step": 1638 }, { "epoch": 0.2675809150646912, "grad_norm": 3.8374133110046387, "learning_rate": 1.9991608906509316e-05, "loss": 0.9944, "step": 1639 }, { "epoch": 0.2677441737071956, "grad_norm": 3.7255899906158447, "learning_rate": 1.999158262644362e-05, "loss": 0.9826, "step": 1640 }, { "epoch": 0.2679074323497, "grad_norm": 3.3533549308776855, "learning_rate": 1.99915563053063e-05, "loss": 0.8602, "step": 1641 }, { "epoch": 0.2680706909922044, "grad_norm": 3.242532968521118, "learning_rate": 1.9991529943097467e-05, "loss": 0.9928, "step": 1642 }, { "epoch": 0.2682339496347088, "grad_norm": 3.1682217121124268, "learning_rate": 1.9991503539817226e-05, "loss": 0.8444, "step": 1643 }, { "epoch": 0.2683972082772132, "grad_norm": 3.5869863033294678, "learning_rate": 1.999147709546569e-05, "loss": 1.0998, "step": 1644 }, { "epoch": 0.2685604669197176, "grad_norm": 3.7576043605804443, "learning_rate": 1.999145061004296e-05, "loss": 1.0784, "step": 1645 }, { "epoch": 0.26872372556222196, "grad_norm": 3.475836992263794, "learning_rate": 1.999142408354915e-05, "loss": 0.9145, "step": 1646 }, { "epoch": 0.26888698420472634, "grad_norm": 3.5287957191467285, "learning_rate": 1.999139751598437e-05, "loss": 1.1313, "step": 1647 }, { "epoch": 0.2690502428472307, "grad_norm": 3.902226209640503, "learning_rate": 1.9991370907348728e-05, "loss": 1.115, "step": 1648 }, { "epoch": 0.2692135014897351, "grad_norm": 3.7204601764678955, "learning_rate": 1.999134425764233e-05, "loss": 0.9977, "step": 1649 }, { "epoch": 0.2693767601322395, "grad_norm": 4.094659805297852, "learning_rate": 1.999131756686529e-05, "loss": 1.0747, "step": 1650 }, { "epoch": 0.2695400187747439, "grad_norm": 3.803591012954712, "learning_rate": 1.9991290835017716e-05, "loss": 1.0633, "step": 1651 }, { "epoch": 0.26970327741724825, "grad_norm": 3.4298181533813477, "learning_rate": 1.9991264062099717e-05, "loss": 1.009, "step": 1652 }, { "epoch": 0.26986653605975264, "grad_norm": 3.8362514972686768, "learning_rate": 1.9991237248111407e-05, "loss": 1.244, "step": 1653 }, { "epoch": 0.2700297947022571, "grad_norm": 3.761167287826538, "learning_rate": 1.999121039305289e-05, "loss": 2.0535, "step": 1654 }, { "epoch": 0.27019305334476146, "grad_norm": 3.7671074867248535, "learning_rate": 1.9991183496924282e-05, "loss": 1.6853, "step": 1655 }, { "epoch": 0.27035631198726584, "grad_norm": 3.5122241973876953, "learning_rate": 1.999115655972569e-05, "loss": 1.1011, "step": 1656 }, { "epoch": 0.2705195706297702, "grad_norm": 3.5932321548461914, "learning_rate": 1.9991129581457227e-05, "loss": 1.0607, "step": 1657 }, { "epoch": 0.2706828292722746, "grad_norm": 3.446502208709717, "learning_rate": 1.9991102562119004e-05, "loss": 1.135, "step": 1658 }, { "epoch": 0.270846087914779, "grad_norm": 3.6055541038513184, "learning_rate": 1.9991075501711127e-05, "loss": 1.1456, "step": 1659 }, { "epoch": 0.27100934655728337, "grad_norm": 3.7904303073883057, "learning_rate": 1.9991048400233713e-05, "loss": 1.1841, "step": 1660 }, { "epoch": 0.27117260519978775, "grad_norm": 3.540783166885376, "learning_rate": 1.9991021257686873e-05, "loss": 0.9895, "step": 1661 }, { "epoch": 0.27133586384229214, "grad_norm": 3.273386240005493, "learning_rate": 1.9990994074070716e-05, "loss": 1.008, "step": 1662 }, { "epoch": 0.2714991224847965, "grad_norm": 3.363884449005127, "learning_rate": 1.9990966849385357e-05, "loss": 0.8959, "step": 1663 }, { "epoch": 0.2716623811273009, "grad_norm": 3.72542142868042, "learning_rate": 1.99909395836309e-05, "loss": 1.1808, "step": 1664 }, { "epoch": 0.27182563976980534, "grad_norm": 3.1182806491851807, "learning_rate": 1.999091227680747e-05, "loss": 0.9177, "step": 1665 }, { "epoch": 0.2719888984123097, "grad_norm": 3.425234794616699, "learning_rate": 1.9990884928915166e-05, "loss": 0.9306, "step": 1666 }, { "epoch": 0.2721521570548141, "grad_norm": 3.5724408626556396, "learning_rate": 1.9990857539954113e-05, "loss": 0.9354, "step": 1667 }, { "epoch": 0.2723154156973185, "grad_norm": 3.505136489868164, "learning_rate": 1.999083010992441e-05, "loss": 0.9883, "step": 1668 }, { "epoch": 0.27247867433982287, "grad_norm": 3.61745548248291, "learning_rate": 1.999080263882618e-05, "loss": 0.979, "step": 1669 }, { "epoch": 0.27264193298232725, "grad_norm": 3.9100794792175293, "learning_rate": 1.9990775126659537e-05, "loss": 1.2656, "step": 1670 }, { "epoch": 0.27280519162483163, "grad_norm": 3.403665065765381, "learning_rate": 1.9990747573424585e-05, "loss": 0.9592, "step": 1671 }, { "epoch": 0.272968450267336, "grad_norm": 3.781938314437866, "learning_rate": 1.9990719979121444e-05, "loss": 1.0639, "step": 1672 }, { "epoch": 0.2731317089098404, "grad_norm": 3.3681461811065674, "learning_rate": 1.9990692343750222e-05, "loss": 0.8888, "step": 1673 }, { "epoch": 0.2732949675523448, "grad_norm": 3.8670332431793213, "learning_rate": 1.999066466731104e-05, "loss": 1.1622, "step": 1674 }, { "epoch": 0.27345822619484916, "grad_norm": 3.5250978469848633, "learning_rate": 1.9990636949804007e-05, "loss": 0.7924, "step": 1675 }, { "epoch": 0.2736214848373536, "grad_norm": 4.262712478637695, "learning_rate": 1.9990609191229237e-05, "loss": 0.9837, "step": 1676 }, { "epoch": 0.273784743479858, "grad_norm": 3.177238941192627, "learning_rate": 1.9990581391586848e-05, "loss": 0.7573, "step": 1677 }, { "epoch": 0.27394800212236237, "grad_norm": 3.9850990772247314, "learning_rate": 1.9990553550876947e-05, "loss": 1.0779, "step": 1678 }, { "epoch": 0.27411126076486675, "grad_norm": 3.5360045433044434, "learning_rate": 1.9990525669099656e-05, "loss": 1.0009, "step": 1679 }, { "epoch": 0.27427451940737113, "grad_norm": 3.911698341369629, "learning_rate": 1.9990497746255088e-05, "loss": 1.1213, "step": 1680 }, { "epoch": 0.2744377780498755, "grad_norm": 3.4486591815948486, "learning_rate": 1.999046978234335e-05, "loss": 0.9575, "step": 1681 }, { "epoch": 0.2746010366923799, "grad_norm": 3.4819352626800537, "learning_rate": 1.999044177736457e-05, "loss": 0.9551, "step": 1682 }, { "epoch": 0.2747642953348843, "grad_norm": 3.5756406784057617, "learning_rate": 1.999041373131885e-05, "loss": 1.1498, "step": 1683 }, { "epoch": 0.27492755397738866, "grad_norm": 3.138639450073242, "learning_rate": 1.9990385644206314e-05, "loss": 0.9707, "step": 1684 }, { "epoch": 0.27509081261989304, "grad_norm": 3.5622615814208984, "learning_rate": 1.9990357516027074e-05, "loss": 0.9322, "step": 1685 }, { "epoch": 0.2752540712623974, "grad_norm": 3.688462257385254, "learning_rate": 1.999032934678125e-05, "loss": 1.0493, "step": 1686 }, { "epoch": 0.27541732990490186, "grad_norm": 3.5951485633850098, "learning_rate": 1.999030113646895e-05, "loss": 1.1171, "step": 1687 }, { "epoch": 0.27558058854740625, "grad_norm": 3.9313197135925293, "learning_rate": 1.9990272885090298e-05, "loss": 0.9677, "step": 1688 }, { "epoch": 0.27574384718991063, "grad_norm": 3.779662609100342, "learning_rate": 1.9990244592645403e-05, "loss": 1.1535, "step": 1689 }, { "epoch": 0.275907105832415, "grad_norm": 3.5514352321624756, "learning_rate": 1.9990216259134386e-05, "loss": 0.9539, "step": 1690 }, { "epoch": 0.2760703644749194, "grad_norm": 3.139761209487915, "learning_rate": 1.9990187884557362e-05, "loss": 0.9167, "step": 1691 }, { "epoch": 0.2762336231174238, "grad_norm": 3.6782639026641846, "learning_rate": 1.999015946891445e-05, "loss": 0.939, "step": 1692 }, { "epoch": 0.27639688175992816, "grad_norm": 3.3228163719177246, "learning_rate": 1.9990131012205763e-05, "loss": 0.9599, "step": 1693 }, { "epoch": 0.27656014040243254, "grad_norm": 4.333683967590332, "learning_rate": 1.999010251443142e-05, "loss": 1.0187, "step": 1694 }, { "epoch": 0.2767233990449369, "grad_norm": 4.343041896820068, "learning_rate": 1.999007397559154e-05, "loss": 1.2958, "step": 1695 }, { "epoch": 0.2768866576874413, "grad_norm": 3.463719367980957, "learning_rate": 1.9990045395686234e-05, "loss": 1.1136, "step": 1696 }, { "epoch": 0.2770499163299457, "grad_norm": 3.2786529064178467, "learning_rate": 1.9990016774715623e-05, "loss": 0.8346, "step": 1697 }, { "epoch": 0.2772131749724501, "grad_norm": 4.443971633911133, "learning_rate": 1.998998811267983e-05, "loss": 1.1645, "step": 1698 }, { "epoch": 0.2773764336149545, "grad_norm": 3.9527032375335693, "learning_rate": 1.9989959409578964e-05, "loss": 1.2461, "step": 1699 }, { "epoch": 0.2775396922574589, "grad_norm": 3.6342787742614746, "learning_rate": 1.9989930665413148e-05, "loss": 1.0325, "step": 1700 }, { "epoch": 0.2777029508999633, "grad_norm": 3.7862236499786377, "learning_rate": 1.99899018801825e-05, "loss": 1.0264, "step": 1701 }, { "epoch": 0.27786620954246766, "grad_norm": 3.796353578567505, "learning_rate": 1.9989873053887138e-05, "loss": 1.0151, "step": 1702 }, { "epoch": 0.27802946818497204, "grad_norm": 3.752091884613037, "learning_rate": 1.9989844186527175e-05, "loss": 1.1375, "step": 1703 }, { "epoch": 0.2781927268274764, "grad_norm": 3.829164743423462, "learning_rate": 1.998981527810274e-05, "loss": 1.0966, "step": 1704 }, { "epoch": 0.2783559854699808, "grad_norm": 3.4613828659057617, "learning_rate": 1.9989786328613948e-05, "loss": 0.9268, "step": 1705 }, { "epoch": 0.2785192441124852, "grad_norm": 3.9209821224212646, "learning_rate": 1.9989757338060914e-05, "loss": 1.1885, "step": 1706 }, { "epoch": 0.27868250275498957, "grad_norm": 3.946150302886963, "learning_rate": 1.998972830644376e-05, "loss": 1.1524, "step": 1707 }, { "epoch": 0.278845761397494, "grad_norm": 3.7288947105407715, "learning_rate": 1.9989699233762604e-05, "loss": 1.1756, "step": 1708 }, { "epoch": 0.2790090200399984, "grad_norm": 4.286917686462402, "learning_rate": 1.9989670120017566e-05, "loss": 1.1248, "step": 1709 }, { "epoch": 0.2791722786825028, "grad_norm": 3.7010560035705566, "learning_rate": 1.9989640965208767e-05, "loss": 0.9691, "step": 1710 }, { "epoch": 0.27933553732500715, "grad_norm": 3.5040884017944336, "learning_rate": 1.9989611769336322e-05, "loss": 0.9954, "step": 1711 }, { "epoch": 0.27949879596751154, "grad_norm": 3.8143954277038574, "learning_rate": 1.9989582532400363e-05, "loss": 1.025, "step": 1712 }, { "epoch": 0.2796620546100159, "grad_norm": 3.4822745323181152, "learning_rate": 1.9989553254400996e-05, "loss": 1.0376, "step": 1713 }, { "epoch": 0.2798253132525203, "grad_norm": 3.1620965003967285, "learning_rate": 1.998952393533835e-05, "loss": 0.9208, "step": 1714 }, { "epoch": 0.2799885718950247, "grad_norm": 3.609363079071045, "learning_rate": 1.9989494575212542e-05, "loss": 0.9915, "step": 1715 }, { "epoch": 0.28015183053752907, "grad_norm": 3.6841793060302734, "learning_rate": 1.9989465174023698e-05, "loss": 0.9668, "step": 1716 }, { "epoch": 0.28031508918003345, "grad_norm": 3.666637659072876, "learning_rate": 1.998943573177193e-05, "loss": 1.1843, "step": 1717 }, { "epoch": 0.28047834782253783, "grad_norm": 3.607872247695923, "learning_rate": 1.9989406248457364e-05, "loss": 1.2257, "step": 1718 }, { "epoch": 0.28064160646504227, "grad_norm": 3.240231513977051, "learning_rate": 1.9989376724080124e-05, "loss": 1.0087, "step": 1719 }, { "epoch": 0.28080486510754665, "grad_norm": 3.43747615814209, "learning_rate": 1.9989347158640323e-05, "loss": 0.9702, "step": 1720 }, { "epoch": 0.28096812375005104, "grad_norm": 3.36961030960083, "learning_rate": 1.9989317552138093e-05, "loss": 1.0255, "step": 1721 }, { "epoch": 0.2811313823925554, "grad_norm": 3.580723285675049, "learning_rate": 1.9989287904573545e-05, "loss": 1.189, "step": 1722 }, { "epoch": 0.2812946410350598, "grad_norm": 3.459404706954956, "learning_rate": 1.998925821594681e-05, "loss": 1.082, "step": 1723 }, { "epoch": 0.2814578996775642, "grad_norm": 3.1826491355895996, "learning_rate": 1.9989228486258008e-05, "loss": 0.8673, "step": 1724 }, { "epoch": 0.28162115832006857, "grad_norm": 3.2919132709503174, "learning_rate": 1.9989198715507253e-05, "loss": 0.9691, "step": 1725 }, { "epoch": 0.28178441696257295, "grad_norm": 3.69600248336792, "learning_rate": 1.9989168903694682e-05, "loss": 1.0991, "step": 1726 }, { "epoch": 0.28194767560507733, "grad_norm": 4.057249069213867, "learning_rate": 1.9989139050820406e-05, "loss": 1.326, "step": 1727 }, { "epoch": 0.2821109342475817, "grad_norm": 4.006848335266113, "learning_rate": 1.998910915688455e-05, "loss": 1.2299, "step": 1728 }, { "epoch": 0.2822741928900861, "grad_norm": 3.637277603149414, "learning_rate": 1.9989079221887237e-05, "loss": 0.9702, "step": 1729 }, { "epoch": 0.28243745153259053, "grad_norm": 2.9615895748138428, "learning_rate": 1.9989049245828594e-05, "loss": 0.8511, "step": 1730 }, { "epoch": 0.2826007101750949, "grad_norm": 3.2511274814605713, "learning_rate": 1.998901922870874e-05, "loss": 0.9995, "step": 1731 }, { "epoch": 0.2827639688175993, "grad_norm": 3.5656895637512207, "learning_rate": 1.99889891705278e-05, "loss": 1.0338, "step": 1732 }, { "epoch": 0.2829272274601037, "grad_norm": 3.229067087173462, "learning_rate": 1.99889590712859e-05, "loss": 0.8425, "step": 1733 }, { "epoch": 0.28309048610260806, "grad_norm": 3.846813917160034, "learning_rate": 1.998892893098316e-05, "loss": 0.9793, "step": 1734 }, { "epoch": 0.28325374474511245, "grad_norm": 3.9461467266082764, "learning_rate": 1.9988898749619702e-05, "loss": 1.1564, "step": 1735 }, { "epoch": 0.28341700338761683, "grad_norm": 3.784493923187256, "learning_rate": 1.9988868527195654e-05, "loss": 0.994, "step": 1736 }, { "epoch": 0.2835802620301212, "grad_norm": 3.71636700630188, "learning_rate": 1.998883826371114e-05, "loss": 1.1915, "step": 1737 }, { "epoch": 0.2837435206726256, "grad_norm": 3.6496329307556152, "learning_rate": 1.9988807959166282e-05, "loss": 1.2538, "step": 1738 }, { "epoch": 0.28390677931513, "grad_norm": 3.7479121685028076, "learning_rate": 1.998877761356121e-05, "loss": 0.9868, "step": 1739 }, { "epoch": 0.28407003795763436, "grad_norm": 3.7331314086914062, "learning_rate": 1.9988747226896043e-05, "loss": 1.0976, "step": 1740 }, { "epoch": 0.2842332966001388, "grad_norm": 3.8327462673187256, "learning_rate": 1.9988716799170908e-05, "loss": 1.1575, "step": 1741 }, { "epoch": 0.2843965552426432, "grad_norm": 3.625014066696167, "learning_rate": 1.998868633038593e-05, "loss": 0.9401, "step": 1742 }, { "epoch": 0.28455981388514756, "grad_norm": 3.666769504547119, "learning_rate": 1.998865582054123e-05, "loss": 1.0601, "step": 1743 }, { "epoch": 0.28472307252765194, "grad_norm": 3.5374135971069336, "learning_rate": 1.9988625269636942e-05, "loss": 0.9889, "step": 1744 }, { "epoch": 0.2848863311701563, "grad_norm": 3.335376024246216, "learning_rate": 1.9988594677673186e-05, "loss": 1.0131, "step": 1745 }, { "epoch": 0.2850495898126607, "grad_norm": 3.2055509090423584, "learning_rate": 1.9988564044650087e-05, "loss": 1.0471, "step": 1746 }, { "epoch": 0.2852128484551651, "grad_norm": 3.4799599647521973, "learning_rate": 1.9988533370567774e-05, "loss": 0.8836, "step": 1747 }, { "epoch": 0.2853761070976695, "grad_norm": 4.455090522766113, "learning_rate": 1.9988502655426375e-05, "loss": 1.1403, "step": 1748 }, { "epoch": 0.28553936574017386, "grad_norm": 3.595667600631714, "learning_rate": 1.998847189922601e-05, "loss": 0.9299, "step": 1749 }, { "epoch": 0.28570262438267824, "grad_norm": 3.8679189682006836, "learning_rate": 1.9988441101966807e-05, "loss": 0.9775, "step": 1750 }, { "epoch": 0.2858658830251826, "grad_norm": 3.598910331726074, "learning_rate": 1.9988410263648902e-05, "loss": 0.841, "step": 1751 }, { "epoch": 0.28602914166768706, "grad_norm": 3.859851598739624, "learning_rate": 1.9988379384272406e-05, "loss": 1.1105, "step": 1752 }, { "epoch": 0.28619240031019144, "grad_norm": 3.986241579055786, "learning_rate": 1.9988348463837455e-05, "loss": 1.002, "step": 1753 }, { "epoch": 0.2863556589526958, "grad_norm": 3.6102874279022217, "learning_rate": 1.9988317502344178e-05, "loss": 1.1134, "step": 1754 }, { "epoch": 0.2865189175952002, "grad_norm": 3.654158115386963, "learning_rate": 1.9988286499792697e-05, "loss": 1.1422, "step": 1755 }, { "epoch": 0.2866821762377046, "grad_norm": 3.6806604862213135, "learning_rate": 1.9988255456183143e-05, "loss": 0.9561, "step": 1756 }, { "epoch": 0.28684543488020897, "grad_norm": 3.5660817623138428, "learning_rate": 1.9988224371515642e-05, "loss": 0.9207, "step": 1757 }, { "epoch": 0.28700869352271335, "grad_norm": 3.7006402015686035, "learning_rate": 1.9988193245790324e-05, "loss": 0.8673, "step": 1758 }, { "epoch": 0.28717195216521774, "grad_norm": 2.9705986976623535, "learning_rate": 1.9988162079007312e-05, "loss": 0.8123, "step": 1759 }, { "epoch": 0.2873352108077221, "grad_norm": 3.3290393352508545, "learning_rate": 1.9988130871166737e-05, "loss": 0.9196, "step": 1760 }, { "epoch": 0.2874984694502265, "grad_norm": 3.35550856590271, "learning_rate": 1.998809962226873e-05, "loss": 0.9613, "step": 1761 }, { "epoch": 0.2876617280927309, "grad_norm": 3.812974691390991, "learning_rate": 1.9988068332313416e-05, "loss": 1.2594, "step": 1762 }, { "epoch": 0.2878249867352353, "grad_norm": 3.699981689453125, "learning_rate": 1.9988037001300924e-05, "loss": 1.1011, "step": 1763 }, { "epoch": 0.2879882453777397, "grad_norm": 3.340911626815796, "learning_rate": 1.998800562923138e-05, "loss": 1.0053, "step": 1764 }, { "epoch": 0.2881515040202441, "grad_norm": 3.782072067260742, "learning_rate": 1.9987974216104923e-05, "loss": 1.0051, "step": 1765 }, { "epoch": 0.28831476266274847, "grad_norm": 3.2961370944976807, "learning_rate": 1.998794276192167e-05, "loss": 0.9612, "step": 1766 }, { "epoch": 0.28847802130525285, "grad_norm": 4.1789445877075195, "learning_rate": 1.998791126668176e-05, "loss": 1.1688, "step": 1767 }, { "epoch": 0.28864127994775723, "grad_norm": 3.2750542163848877, "learning_rate": 1.9987879730385314e-05, "loss": 0.9591, "step": 1768 }, { "epoch": 0.2888045385902616, "grad_norm": 3.2507336139678955, "learning_rate": 1.998784815303247e-05, "loss": 0.9372, "step": 1769 }, { "epoch": 0.288967797232766, "grad_norm": 3.5994935035705566, "learning_rate": 1.9987816534623353e-05, "loss": 1.039, "step": 1770 }, { "epoch": 0.2891310558752704, "grad_norm": 3.4594292640686035, "learning_rate": 1.9987784875158088e-05, "loss": 1.1156, "step": 1771 }, { "epoch": 0.28929431451777476, "grad_norm": 3.4779410362243652, "learning_rate": 1.9987753174636815e-05, "loss": 1.1988, "step": 1772 }, { "epoch": 0.28945757316027915, "grad_norm": 3.5511577129364014, "learning_rate": 1.998772143305966e-05, "loss": 1.0063, "step": 1773 }, { "epoch": 0.2896208318027836, "grad_norm": 3.587144613265991, "learning_rate": 1.9987689650426753e-05, "loss": 1.083, "step": 1774 }, { "epoch": 0.28978409044528797, "grad_norm": 3.3586583137512207, "learning_rate": 1.9987657826738225e-05, "loss": 0.9789, "step": 1775 }, { "epoch": 0.28994734908779235, "grad_norm": 3.704251766204834, "learning_rate": 1.9987625961994207e-05, "loss": 1.1605, "step": 1776 }, { "epoch": 0.29011060773029673, "grad_norm": 4.091466426849365, "learning_rate": 1.998759405619483e-05, "loss": 1.1476, "step": 1777 }, { "epoch": 0.2902738663728011, "grad_norm": 3.7616169452667236, "learning_rate": 1.9987562109340225e-05, "loss": 1.0732, "step": 1778 }, { "epoch": 0.2904371250153055, "grad_norm": 3.4026095867156982, "learning_rate": 1.9987530121430525e-05, "loss": 0.9359, "step": 1779 }, { "epoch": 0.2906003836578099, "grad_norm": 3.854362964630127, "learning_rate": 1.998749809246586e-05, "loss": 1.1018, "step": 1780 }, { "epoch": 0.29076364230031426, "grad_norm": 3.7255847454071045, "learning_rate": 1.9987466022446358e-05, "loss": 1.2157, "step": 1781 }, { "epoch": 0.29092690094281864, "grad_norm": 4.073063850402832, "learning_rate": 1.9987433911372155e-05, "loss": 1.0881, "step": 1782 }, { "epoch": 0.291090159585323, "grad_norm": 3.653280019760132, "learning_rate": 1.9987401759243387e-05, "loss": 1.0314, "step": 1783 }, { "epoch": 0.2912534182278274, "grad_norm": 3.99397611618042, "learning_rate": 1.998736956606018e-05, "loss": 1.2855, "step": 1784 }, { "epoch": 0.29141667687033185, "grad_norm": 3.0392391681671143, "learning_rate": 1.9987337331822662e-05, "loss": 0.7948, "step": 1785 }, { "epoch": 0.29157993551283623, "grad_norm": 3.6894021034240723, "learning_rate": 1.9987305056530977e-05, "loss": 1.0503, "step": 1786 }, { "epoch": 0.2917431941553406, "grad_norm": 3.7784571647644043, "learning_rate": 1.998727274018525e-05, "loss": 0.9499, "step": 1787 }, { "epoch": 0.291906452797845, "grad_norm": 3.777456283569336, "learning_rate": 1.9987240382785614e-05, "loss": 1.1773, "step": 1788 }, { "epoch": 0.2920697114403494, "grad_norm": 3.791438579559326, "learning_rate": 1.9987207984332207e-05, "loss": 0.9165, "step": 1789 }, { "epoch": 0.29223297008285376, "grad_norm": 3.509392261505127, "learning_rate": 1.998717554482516e-05, "loss": 1.1348, "step": 1790 }, { "epoch": 0.29239622872535814, "grad_norm": 2.967902183532715, "learning_rate": 1.9987143064264605e-05, "loss": 1.0073, "step": 1791 }, { "epoch": 0.2925594873678625, "grad_norm": 3.687589406967163, "learning_rate": 1.9987110542650673e-05, "loss": 1.0956, "step": 1792 }, { "epoch": 0.2927227460103669, "grad_norm": 3.243523359298706, "learning_rate": 1.9987077979983504e-05, "loss": 0.9164, "step": 1793 }, { "epoch": 0.2928860046528713, "grad_norm": 3.7559211254119873, "learning_rate": 1.9987045376263226e-05, "loss": 1.1105, "step": 1794 }, { "epoch": 0.2930492632953757, "grad_norm": 3.4711432456970215, "learning_rate": 1.998701273148998e-05, "loss": 1.0812, "step": 1795 }, { "epoch": 0.2932125219378801, "grad_norm": 3.410066843032837, "learning_rate": 1.9986980045663887e-05, "loss": 0.8504, "step": 1796 }, { "epoch": 0.2933757805803845, "grad_norm": 3.7157411575317383, "learning_rate": 1.99869473187851e-05, "loss": 1.2763, "step": 1797 }, { "epoch": 0.2935390392228889, "grad_norm": 3.210782289505005, "learning_rate": 1.9986914550853737e-05, "loss": 0.9496, "step": 1798 }, { "epoch": 0.29370229786539326, "grad_norm": 4.047097206115723, "learning_rate": 1.9986881741869942e-05, "loss": 1.1921, "step": 1799 }, { "epoch": 0.29386555650789764, "grad_norm": 3.530292510986328, "learning_rate": 1.9986848891833846e-05, "loss": 1.012, "step": 1800 }, { "epoch": 0.294028815150402, "grad_norm": 3.6428098678588867, "learning_rate": 1.9986816000745586e-05, "loss": 1.0354, "step": 1801 }, { "epoch": 0.2941920737929064, "grad_norm": 3.591358184814453, "learning_rate": 1.9986783068605295e-05, "loss": 1.02, "step": 1802 }, { "epoch": 0.2943553324354108, "grad_norm": 3.1573376655578613, "learning_rate": 1.998675009541311e-05, "loss": 0.8633, "step": 1803 }, { "epoch": 0.29451859107791517, "grad_norm": 3.440657138824463, "learning_rate": 1.9986717081169168e-05, "loss": 0.9559, "step": 1804 }, { "epoch": 0.29468184972041955, "grad_norm": 3.2728986740112305, "learning_rate": 1.99866840258736e-05, "loss": 0.948, "step": 1805 }, { "epoch": 0.29484510836292394, "grad_norm": 4.172056198120117, "learning_rate": 1.9986650929526546e-05, "loss": 1.2156, "step": 1806 }, { "epoch": 0.2950083670054284, "grad_norm": 3.567281484603882, "learning_rate": 1.9986617792128142e-05, "loss": 0.8359, "step": 1807 }, { "epoch": 0.29517162564793276, "grad_norm": 3.9587478637695312, "learning_rate": 1.9986584613678524e-05, "loss": 1.0761, "step": 1808 }, { "epoch": 0.29533488429043714, "grad_norm": 4.788222789764404, "learning_rate": 1.9986551394177826e-05, "loss": 1.1471, "step": 1809 }, { "epoch": 0.2954981429329415, "grad_norm": 3.7838213443756104, "learning_rate": 1.9986518133626188e-05, "loss": 0.9897, "step": 1810 }, { "epoch": 0.2956614015754459, "grad_norm": 3.6860642433166504, "learning_rate": 1.998648483202374e-05, "loss": 1.0144, "step": 1811 }, { "epoch": 0.2958246602179503, "grad_norm": 3.671015501022339, "learning_rate": 1.9986451489370627e-05, "loss": 1.0083, "step": 1812 }, { "epoch": 0.29598791886045467, "grad_norm": 3.2485172748565674, "learning_rate": 1.9986418105666986e-05, "loss": 0.8705, "step": 1813 }, { "epoch": 0.29615117750295905, "grad_norm": 3.9341189861297607, "learning_rate": 1.9986384680912946e-05, "loss": 1.2908, "step": 1814 }, { "epoch": 0.29631443614546343, "grad_norm": 3.353576421737671, "learning_rate": 1.9986351215108654e-05, "loss": 0.9651, "step": 1815 }, { "epoch": 0.2964776947879678, "grad_norm": 3.624060869216919, "learning_rate": 1.9986317708254238e-05, "loss": 1.1964, "step": 1816 }, { "epoch": 0.2966409534304722, "grad_norm": 3.6363086700439453, "learning_rate": 1.998628416034984e-05, "loss": 1.072, "step": 1817 }, { "epoch": 0.29680421207297664, "grad_norm": 3.6337385177612305, "learning_rate": 1.99862505713956e-05, "loss": 1.2232, "step": 1818 }, { "epoch": 0.296967470715481, "grad_norm": 3.501817464828491, "learning_rate": 1.9986216941391658e-05, "loss": 0.9941, "step": 1819 }, { "epoch": 0.2971307293579854, "grad_norm": 3.0260276794433594, "learning_rate": 1.998618327033815e-05, "loss": 0.8027, "step": 1820 }, { "epoch": 0.2972939880004898, "grad_norm": 3.9980380535125732, "learning_rate": 1.998614955823521e-05, "loss": 1.1304, "step": 1821 }, { "epoch": 0.29745724664299417, "grad_norm": 3.6042652130126953, "learning_rate": 1.9986115805082976e-05, "loss": 1.1869, "step": 1822 }, { "epoch": 0.29762050528549855, "grad_norm": 3.277067184448242, "learning_rate": 1.9986082010881594e-05, "loss": 0.9679, "step": 1823 }, { "epoch": 0.29778376392800293, "grad_norm": 3.400824785232544, "learning_rate": 1.9986048175631204e-05, "loss": 0.8637, "step": 1824 }, { "epoch": 0.2979470225705073, "grad_norm": 3.769744634628296, "learning_rate": 1.9986014299331936e-05, "loss": 1.0391, "step": 1825 }, { "epoch": 0.2981102812130117, "grad_norm": 3.3470911979675293, "learning_rate": 1.9985980381983934e-05, "loss": 0.92, "step": 1826 }, { "epoch": 0.2982735398555161, "grad_norm": 3.81412935256958, "learning_rate": 1.9985946423587337e-05, "loss": 1.1052, "step": 1827 }, { "epoch": 0.29843679849802046, "grad_norm": 3.852921724319458, "learning_rate": 1.9985912424142286e-05, "loss": 1.2254, "step": 1828 }, { "epoch": 0.2986000571405249, "grad_norm": 3.629405975341797, "learning_rate": 1.9985878383648917e-05, "loss": 0.951, "step": 1829 }, { "epoch": 0.2987633157830293, "grad_norm": 3.628492832183838, "learning_rate": 1.9985844302107377e-05, "loss": 1.1358, "step": 1830 }, { "epoch": 0.29892657442553366, "grad_norm": 3.7965285778045654, "learning_rate": 1.99858101795178e-05, "loss": 1.139, "step": 1831 }, { "epoch": 0.29908983306803805, "grad_norm": 3.8004868030548096, "learning_rate": 1.9985776015880325e-05, "loss": 1.1395, "step": 1832 }, { "epoch": 0.29925309171054243, "grad_norm": 3.0755529403686523, "learning_rate": 1.9985741811195098e-05, "loss": 0.9354, "step": 1833 }, { "epoch": 0.2994163503530468, "grad_norm": 3.9866831302642822, "learning_rate": 1.9985707565462258e-05, "loss": 1.0159, "step": 1834 }, { "epoch": 0.2995796089955512, "grad_norm": 3.595879554748535, "learning_rate": 1.9985673278681944e-05, "loss": 0.9956, "step": 1835 }, { "epoch": 0.2997428676380556, "grad_norm": 3.636340618133545, "learning_rate": 1.99856389508543e-05, "loss": 1.0893, "step": 1836 }, { "epoch": 0.29990612628055996, "grad_norm": 3.4732725620269775, "learning_rate": 1.998560458197946e-05, "loss": 0.9294, "step": 1837 }, { "epoch": 0.30006938492306434, "grad_norm": 3.5273561477661133, "learning_rate": 1.998557017205757e-05, "loss": 1.1146, "step": 1838 }, { "epoch": 0.3002326435655688, "grad_norm": 3.235386848449707, "learning_rate": 1.9985535721088776e-05, "loss": 0.9585, "step": 1839 }, { "epoch": 0.30039590220807316, "grad_norm": 3.4520740509033203, "learning_rate": 1.9985501229073213e-05, "loss": 0.9618, "step": 1840 }, { "epoch": 0.30055916085057754, "grad_norm": 3.28031849861145, "learning_rate": 1.9985466696011022e-05, "loss": 0.9236, "step": 1841 }, { "epoch": 0.3007224194930819, "grad_norm": 3.9301469326019287, "learning_rate": 1.998543212190235e-05, "loss": 1.309, "step": 1842 }, { "epoch": 0.3008856781355863, "grad_norm": 3.521158456802368, "learning_rate": 1.9985397506747336e-05, "loss": 1.0678, "step": 1843 }, { "epoch": 0.3010489367780907, "grad_norm": 3.654141902923584, "learning_rate": 1.9985362850546128e-05, "loss": 1.0688, "step": 1844 }, { "epoch": 0.3012121954205951, "grad_norm": 3.6173179149627686, "learning_rate": 1.998532815329886e-05, "loss": 1.0785, "step": 1845 }, { "epoch": 0.30137545406309946, "grad_norm": 3.200686454772949, "learning_rate": 1.998529341500568e-05, "loss": 0.9892, "step": 1846 }, { "epoch": 0.30153871270560384, "grad_norm": 3.6615960597991943, "learning_rate": 1.9985258635666726e-05, "loss": 1.9257, "step": 1847 }, { "epoch": 0.3017019713481082, "grad_norm": 3.9894230365753174, "learning_rate": 1.9985223815282147e-05, "loss": 1.1324, "step": 1848 }, { "epoch": 0.3018652299906126, "grad_norm": 3.6591546535491943, "learning_rate": 1.9985188953852083e-05, "loss": 0.975, "step": 1849 }, { "epoch": 0.30202848863311704, "grad_norm": 3.61983323097229, "learning_rate": 1.9985154051376676e-05, "loss": 1.1882, "step": 1850 }, { "epoch": 0.3021917472756214, "grad_norm": 3.65116286277771, "learning_rate": 1.998511910785607e-05, "loss": 1.1484, "step": 1851 }, { "epoch": 0.3023550059181258, "grad_norm": 3.1952736377716064, "learning_rate": 1.9985084123290415e-05, "loss": 0.8388, "step": 1852 }, { "epoch": 0.3025182645606302, "grad_norm": 3.644787311553955, "learning_rate": 1.9985049097679846e-05, "loss": 1.0077, "step": 1853 }, { "epoch": 0.3026815232031346, "grad_norm": 3.2458925247192383, "learning_rate": 1.9985014031024507e-05, "loss": 0.8554, "step": 1854 }, { "epoch": 0.30284478184563896, "grad_norm": 3.5186564922332764, "learning_rate": 1.9984978923324553e-05, "loss": 1.0007, "step": 1855 }, { "epoch": 0.30300804048814334, "grad_norm": 3.3127923011779785, "learning_rate": 1.9984943774580117e-05, "loss": 0.9101, "step": 1856 }, { "epoch": 0.3031712991306477, "grad_norm": 3.34987211227417, "learning_rate": 1.9984908584791344e-05, "loss": 0.9809, "step": 1857 }, { "epoch": 0.3033345577731521, "grad_norm": 3.7653887271881104, "learning_rate": 1.9984873353958387e-05, "loss": 1.0068, "step": 1858 }, { "epoch": 0.3034978164156565, "grad_norm": 3.470407009124756, "learning_rate": 1.9984838082081385e-05, "loss": 0.9452, "step": 1859 }, { "epoch": 0.30366107505816087, "grad_norm": 4.171583652496338, "learning_rate": 1.9984802769160483e-05, "loss": 1.076, "step": 1860 }, { "epoch": 0.3038243337006653, "grad_norm": 3.2134242057800293, "learning_rate": 1.9984767415195827e-05, "loss": 0.8457, "step": 1861 }, { "epoch": 0.3039875923431697, "grad_norm": 3.498345375061035, "learning_rate": 1.9984732020187563e-05, "loss": 0.9669, "step": 1862 }, { "epoch": 0.30415085098567407, "grad_norm": 3.487656593322754, "learning_rate": 1.9984696584135836e-05, "loss": 0.8665, "step": 1863 }, { "epoch": 0.30431410962817845, "grad_norm": 3.5190980434417725, "learning_rate": 1.998466110704079e-05, "loss": 1.0098, "step": 1864 }, { "epoch": 0.30447736827068284, "grad_norm": 3.421844720840454, "learning_rate": 1.9984625588902574e-05, "loss": 1.0665, "step": 1865 }, { "epoch": 0.3046406269131872, "grad_norm": 3.9776618480682373, "learning_rate": 1.998459002972133e-05, "loss": 1.0385, "step": 1866 }, { "epoch": 0.3048038855556916, "grad_norm": 3.471113920211792, "learning_rate": 1.998455442949721e-05, "loss": 1.0349, "step": 1867 }, { "epoch": 0.304967144198196, "grad_norm": 3.4078290462493896, "learning_rate": 1.9984518788230352e-05, "loss": 0.9289, "step": 1868 }, { "epoch": 0.30513040284070037, "grad_norm": 3.6471645832061768, "learning_rate": 1.998448310592091e-05, "loss": 1.0395, "step": 1869 }, { "epoch": 0.30529366148320475, "grad_norm": 3.70487642288208, "learning_rate": 1.998444738256903e-05, "loss": 0.9812, "step": 1870 }, { "epoch": 0.30545692012570913, "grad_norm": 3.5942702293395996, "learning_rate": 1.9984411618174855e-05, "loss": 1.1595, "step": 1871 }, { "epoch": 0.30562017876821357, "grad_norm": 3.28462290763855, "learning_rate": 1.9984375812738534e-05, "loss": 0.9236, "step": 1872 }, { "epoch": 0.30578343741071795, "grad_norm": 3.4393415451049805, "learning_rate": 1.998433996626021e-05, "loss": 1.1696, "step": 1873 }, { "epoch": 0.30594669605322233, "grad_norm": 3.3523054122924805, "learning_rate": 1.9984304078740046e-05, "loss": 0.9923, "step": 1874 }, { "epoch": 0.3061099546957267, "grad_norm": 3.384363889694214, "learning_rate": 1.998426815017817e-05, "loss": 0.9413, "step": 1875 }, { "epoch": 0.3062732133382311, "grad_norm": 3.535604238510132, "learning_rate": 1.9984232180574734e-05, "loss": 1.026, "step": 1876 }, { "epoch": 0.3064364719807355, "grad_norm": 3.844248056411743, "learning_rate": 1.9984196169929893e-05, "loss": 1.1394, "step": 1877 }, { "epoch": 0.30659973062323986, "grad_norm": 3.149989128112793, "learning_rate": 1.9984160118243795e-05, "loss": 0.8608, "step": 1878 }, { "epoch": 0.30676298926574425, "grad_norm": 3.6915171146392822, "learning_rate": 1.998412402551658e-05, "loss": 1.1474, "step": 1879 }, { "epoch": 0.30692624790824863, "grad_norm": 3.48136043548584, "learning_rate": 1.99840878917484e-05, "loss": 0.8791, "step": 1880 }, { "epoch": 0.307089506550753, "grad_norm": 3.6257994174957275, "learning_rate": 1.998405171693941e-05, "loss": 0.9697, "step": 1881 }, { "epoch": 0.3072527651932574, "grad_norm": 3.442023515701294, "learning_rate": 1.998401550108975e-05, "loss": 1.0849, "step": 1882 }, { "epoch": 0.30741602383576183, "grad_norm": 3.410349130630493, "learning_rate": 1.9983979244199575e-05, "loss": 0.8675, "step": 1883 }, { "epoch": 0.3075792824782662, "grad_norm": 3.52683687210083, "learning_rate": 1.998394294626903e-05, "loss": 1.1264, "step": 1884 }, { "epoch": 0.3077425411207706, "grad_norm": 3.5475504398345947, "learning_rate": 1.9983906607298262e-05, "loss": 1.1144, "step": 1885 }, { "epoch": 0.307905799763275, "grad_norm": 3.083923816680908, "learning_rate": 1.998387022728743e-05, "loss": 0.8044, "step": 1886 }, { "epoch": 0.30806905840577936, "grad_norm": 3.2647624015808105, "learning_rate": 1.9983833806236673e-05, "loss": 0.9429, "step": 1887 }, { "epoch": 0.30823231704828374, "grad_norm": 3.4156880378723145, "learning_rate": 1.9983797344146144e-05, "loss": 0.9218, "step": 1888 }, { "epoch": 0.3083955756907881, "grad_norm": 3.7957944869995117, "learning_rate": 1.9983760841015995e-05, "loss": 1.0985, "step": 1889 }, { "epoch": 0.3085588343332925, "grad_norm": 3.630417585372925, "learning_rate": 1.9983724296846375e-05, "loss": 1.0325, "step": 1890 }, { "epoch": 0.3087220929757969, "grad_norm": 3.436586856842041, "learning_rate": 1.998368771163744e-05, "loss": 1.001, "step": 1891 }, { "epoch": 0.3088853516183013, "grad_norm": 3.6706628799438477, "learning_rate": 1.998365108538933e-05, "loss": 1.2392, "step": 1892 }, { "epoch": 0.30904861026080566, "grad_norm": 3.69321608543396, "learning_rate": 1.9983614418102196e-05, "loss": 1.1076, "step": 1893 }, { "epoch": 0.3092118689033101, "grad_norm": 3.6775739192962646, "learning_rate": 1.9983577709776195e-05, "loss": 1.077, "step": 1894 }, { "epoch": 0.3093751275458145, "grad_norm": 3.5102734565734863, "learning_rate": 1.998354096041148e-05, "loss": 1.0855, "step": 1895 }, { "epoch": 0.30953838618831886, "grad_norm": 3.3881170749664307, "learning_rate": 1.9983504170008193e-05, "loss": 0.9736, "step": 1896 }, { "epoch": 0.30970164483082324, "grad_norm": 3.2478106021881104, "learning_rate": 1.9983467338566495e-05, "loss": 0.8499, "step": 1897 }, { "epoch": 0.3098649034733276, "grad_norm": 3.824486494064331, "learning_rate": 1.9983430466086528e-05, "loss": 0.9293, "step": 1898 }, { "epoch": 0.310028162115832, "grad_norm": 3.3544504642486572, "learning_rate": 1.9983393552568452e-05, "loss": 0.9249, "step": 1899 }, { "epoch": 0.3101914207583364, "grad_norm": 3.9401988983154297, "learning_rate": 1.9983356598012412e-05, "loss": 0.906, "step": 1900 }, { "epoch": 0.31035467940084077, "grad_norm": 3.453230381011963, "learning_rate": 1.9983319602418564e-05, "loss": 0.9031, "step": 1901 }, { "epoch": 0.31051793804334515, "grad_norm": 3.477051258087158, "learning_rate": 1.9983282565787054e-05, "loss": 1.0778, "step": 1902 }, { "epoch": 0.31068119668584954, "grad_norm": 3.7559196949005127, "learning_rate": 1.9983245488118045e-05, "loss": 0.8973, "step": 1903 }, { "epoch": 0.3108444553283539, "grad_norm": 3.6455047130584717, "learning_rate": 1.9983208369411682e-05, "loss": 0.937, "step": 1904 }, { "epoch": 0.31100771397085836, "grad_norm": 3.728937864303589, "learning_rate": 1.9983171209668118e-05, "loss": 1.0616, "step": 1905 }, { "epoch": 0.31117097261336274, "grad_norm": 2.933354139328003, "learning_rate": 1.9983134008887505e-05, "loss": 0.9414, "step": 1906 }, { "epoch": 0.3113342312558671, "grad_norm": 3.4279422760009766, "learning_rate": 1.9983096767070002e-05, "loss": 0.9057, "step": 1907 }, { "epoch": 0.3114974898983715, "grad_norm": 3.4256527423858643, "learning_rate": 1.9983059484215756e-05, "loss": 0.8209, "step": 1908 }, { "epoch": 0.3116607485408759, "grad_norm": 3.4805123805999756, "learning_rate": 1.9983022160324917e-05, "loss": 1.0093, "step": 1909 }, { "epoch": 0.31182400718338027, "grad_norm": 3.7382590770721436, "learning_rate": 1.998298479539765e-05, "loss": 1.1528, "step": 1910 }, { "epoch": 0.31198726582588465, "grad_norm": 4.161679267883301, "learning_rate": 1.99829473894341e-05, "loss": 1.0772, "step": 1911 }, { "epoch": 0.31215052446838903, "grad_norm": 3.836561441421509, "learning_rate": 1.9982909942434423e-05, "loss": 1.0472, "step": 1912 }, { "epoch": 0.3123137831108934, "grad_norm": 3.719115972518921, "learning_rate": 1.9982872454398775e-05, "loss": 0.9783, "step": 1913 }, { "epoch": 0.3124770417533978, "grad_norm": 3.7850852012634277, "learning_rate": 1.9982834925327304e-05, "loss": 1.0234, "step": 1914 }, { "epoch": 0.3126403003959022, "grad_norm": 3.5199546813964844, "learning_rate": 1.998279735522017e-05, "loss": 0.9923, "step": 1915 }, { "epoch": 0.3128035590384066, "grad_norm": 3.6003096103668213, "learning_rate": 1.9982759744077525e-05, "loss": 1.0109, "step": 1916 }, { "epoch": 0.312966817680911, "grad_norm": 3.4506330490112305, "learning_rate": 1.9982722091899522e-05, "loss": 0.9598, "step": 1917 }, { "epoch": 0.3131300763234154, "grad_norm": 3.660299301147461, "learning_rate": 1.998268439868632e-05, "loss": 1.1196, "step": 1918 }, { "epoch": 0.31329333496591977, "grad_norm": 3.290066719055176, "learning_rate": 1.998264666443807e-05, "loss": 0.8635, "step": 1919 }, { "epoch": 0.31345659360842415, "grad_norm": 3.4874861240386963, "learning_rate": 1.998260888915493e-05, "loss": 1.0091, "step": 1920 }, { "epoch": 0.31361985225092853, "grad_norm": 3.363985538482666, "learning_rate": 1.998257107283706e-05, "loss": 0.8097, "step": 1921 }, { "epoch": 0.3137831108934329, "grad_norm": 3.3835461139678955, "learning_rate": 1.99825332154846e-05, "loss": 0.9071, "step": 1922 }, { "epoch": 0.3139463695359373, "grad_norm": 3.4018125534057617, "learning_rate": 1.998249531709772e-05, "loss": 0.9898, "step": 1923 }, { "epoch": 0.3141096281784417, "grad_norm": 3.5481295585632324, "learning_rate": 1.998245737767657e-05, "loss": 1.0834, "step": 1924 }, { "epoch": 0.31427288682094606, "grad_norm": 3.4994149208068848, "learning_rate": 1.998241939722131e-05, "loss": 0.844, "step": 1925 }, { "epoch": 0.31443614546345044, "grad_norm": 3.981675148010254, "learning_rate": 1.998238137573209e-05, "loss": 1.0459, "step": 1926 }, { "epoch": 0.3145994041059549, "grad_norm": 3.693819999694824, "learning_rate": 1.998234331320907e-05, "loss": 1.2678, "step": 1927 }, { "epoch": 0.31476266274845927, "grad_norm": 3.58555006980896, "learning_rate": 1.998230520965241e-05, "loss": 1.1098, "step": 1928 }, { "epoch": 0.31492592139096365, "grad_norm": 3.4461734294891357, "learning_rate": 1.9982267065062254e-05, "loss": 0.9578, "step": 1929 }, { "epoch": 0.31508918003346803, "grad_norm": 3.264138698577881, "learning_rate": 1.9982228879438774e-05, "loss": 0.8277, "step": 1930 }, { "epoch": 0.3152524386759724, "grad_norm": 3.2090394496917725, "learning_rate": 1.9982190652782122e-05, "loss": 0.8904, "step": 1931 }, { "epoch": 0.3154156973184768, "grad_norm": 3.703824758529663, "learning_rate": 1.9982152385092445e-05, "loss": 1.1921, "step": 1932 }, { "epoch": 0.3155789559609812, "grad_norm": 3.4215545654296875, "learning_rate": 1.9982114076369915e-05, "loss": 1.0246, "step": 1933 }, { "epoch": 0.31574221460348556, "grad_norm": 3.7952423095703125, "learning_rate": 1.9982075726614686e-05, "loss": 1.1029, "step": 1934 }, { "epoch": 0.31590547324598994, "grad_norm": 3.6386616230010986, "learning_rate": 1.9982037335826904e-05, "loss": 1.2418, "step": 1935 }, { "epoch": 0.3160687318884943, "grad_norm": 3.7575321197509766, "learning_rate": 1.9981998904006742e-05, "loss": 1.0829, "step": 1936 }, { "epoch": 0.3162319905309987, "grad_norm": 3.191546678543091, "learning_rate": 1.998196043115435e-05, "loss": 0.8991, "step": 1937 }, { "epoch": 0.31639524917350315, "grad_norm": 3.480557918548584, "learning_rate": 1.9981921917269888e-05, "loss": 0.9737, "step": 1938 }, { "epoch": 0.31655850781600753, "grad_norm": 3.4693195819854736, "learning_rate": 1.9981883362353515e-05, "loss": 0.9715, "step": 1939 }, { "epoch": 0.3167217664585119, "grad_norm": 3.7581543922424316, "learning_rate": 1.9981844766405388e-05, "loss": 0.9256, "step": 1940 }, { "epoch": 0.3168850251010163, "grad_norm": 3.2575700283050537, "learning_rate": 1.9981806129425664e-05, "loss": 0.9005, "step": 1941 }, { "epoch": 0.3170482837435207, "grad_norm": 3.970017910003662, "learning_rate": 1.9981767451414505e-05, "loss": 1.0721, "step": 1942 }, { "epoch": 0.31721154238602506, "grad_norm": 3.623724937438965, "learning_rate": 1.998172873237207e-05, "loss": 1.1097, "step": 1943 }, { "epoch": 0.31737480102852944, "grad_norm": 3.569053888320923, "learning_rate": 1.998168997229852e-05, "loss": 1.1133, "step": 1944 }, { "epoch": 0.3175380596710338, "grad_norm": 3.4276504516601562, "learning_rate": 1.9981651171194007e-05, "loss": 1.044, "step": 1945 }, { "epoch": 0.3177013183135382, "grad_norm": 3.4915339946746826, "learning_rate": 1.9981612329058698e-05, "loss": 0.9356, "step": 1946 }, { "epoch": 0.3178645769560426, "grad_norm": 3.1682658195495605, "learning_rate": 1.9981573445892748e-05, "loss": 1.0331, "step": 1947 }, { "epoch": 0.31802783559854697, "grad_norm": 3.0565898418426514, "learning_rate": 1.9981534521696318e-05, "loss": 0.7683, "step": 1948 }, { "epoch": 0.3181910942410514, "grad_norm": 3.1598753929138184, "learning_rate": 1.9981495556469572e-05, "loss": 1.0483, "step": 1949 }, { "epoch": 0.3183543528835558, "grad_norm": 3.4120702743530273, "learning_rate": 1.9981456550212665e-05, "loss": 0.9829, "step": 1950 }, { "epoch": 0.3185176115260602, "grad_norm": 3.4017670154571533, "learning_rate": 1.9981417502925756e-05, "loss": 0.9376, "step": 1951 }, { "epoch": 0.31868087016856456, "grad_norm": 3.5767385959625244, "learning_rate": 1.9981378414609012e-05, "loss": 0.9357, "step": 1952 }, { "epoch": 0.31884412881106894, "grad_norm": 3.1057639122009277, "learning_rate": 1.998133928526259e-05, "loss": 0.8784, "step": 1953 }, { "epoch": 0.3190073874535733, "grad_norm": 3.376537322998047, "learning_rate": 1.998130011488665e-05, "loss": 1.0236, "step": 1954 }, { "epoch": 0.3191706460960777, "grad_norm": 3.7657992839813232, "learning_rate": 1.9981260903481356e-05, "loss": 1.2521, "step": 1955 }, { "epoch": 0.3193339047385821, "grad_norm": 3.3923556804656982, "learning_rate": 1.9981221651046862e-05, "loss": 1.0242, "step": 1956 }, { "epoch": 0.31949716338108647, "grad_norm": 3.433332920074463, "learning_rate": 1.998118235758334e-05, "loss": 0.9882, "step": 1957 }, { "epoch": 0.31966042202359085, "grad_norm": 3.5416202545166016, "learning_rate": 1.9981143023090947e-05, "loss": 1.0196, "step": 1958 }, { "epoch": 0.3198236806660953, "grad_norm": 3.4190831184387207, "learning_rate": 1.998110364756984e-05, "loss": 0.8621, "step": 1959 }, { "epoch": 0.31998693930859967, "grad_norm": 3.3244879245758057, "learning_rate": 1.9981064231020185e-05, "loss": 0.8737, "step": 1960 }, { "epoch": 0.32015019795110405, "grad_norm": 4.112154483795166, "learning_rate": 1.9981024773442145e-05, "loss": 1.0098, "step": 1961 }, { "epoch": 0.32031345659360844, "grad_norm": 3.526425838470459, "learning_rate": 1.998098527483588e-05, "loss": 1.0295, "step": 1962 }, { "epoch": 0.3204767152361128, "grad_norm": 3.909961223602295, "learning_rate": 1.9980945735201552e-05, "loss": 1.0506, "step": 1963 }, { "epoch": 0.3206399738786172, "grad_norm": 3.6130521297454834, "learning_rate": 1.998090615453933e-05, "loss": 1.0019, "step": 1964 }, { "epoch": 0.3208032325211216, "grad_norm": 3.661022424697876, "learning_rate": 1.9980866532849366e-05, "loss": 1.0035, "step": 1965 }, { "epoch": 0.32096649116362597, "grad_norm": 3.2514073848724365, "learning_rate": 1.9980826870131832e-05, "loss": 0.9069, "step": 1966 }, { "epoch": 0.32112974980613035, "grad_norm": 3.728569507598877, "learning_rate": 1.9980787166386884e-05, "loss": 0.9737, "step": 1967 }, { "epoch": 0.32129300844863473, "grad_norm": 4.02882719039917, "learning_rate": 1.9980747421614693e-05, "loss": 1.095, "step": 1968 }, { "epoch": 0.3214562670911391, "grad_norm": 3.50148868560791, "learning_rate": 1.9980707635815415e-05, "loss": 1.0902, "step": 1969 }, { "epoch": 0.32161952573364355, "grad_norm": 3.565061330795288, "learning_rate": 1.9980667808989218e-05, "loss": 0.9727, "step": 1970 }, { "epoch": 0.32178278437614793, "grad_norm": 3.375886917114258, "learning_rate": 1.9980627941136265e-05, "loss": 1.069, "step": 1971 }, { "epoch": 0.3219460430186523, "grad_norm": 3.7332041263580322, "learning_rate": 1.9980588032256716e-05, "loss": 1.0789, "step": 1972 }, { "epoch": 0.3221093016611567, "grad_norm": 3.4676575660705566, "learning_rate": 1.998054808235074e-05, "loss": 0.8987, "step": 1973 }, { "epoch": 0.3222725603036611, "grad_norm": 3.052028179168701, "learning_rate": 1.99805080914185e-05, "loss": 0.7974, "step": 1974 }, { "epoch": 0.32243581894616546, "grad_norm": 3.6091623306274414, "learning_rate": 1.998046805946016e-05, "loss": 0.9814, "step": 1975 }, { "epoch": 0.32259907758866985, "grad_norm": 3.495880603790283, "learning_rate": 1.998042798647588e-05, "loss": 1.1125, "step": 1976 }, { "epoch": 0.32276233623117423, "grad_norm": 3.479278564453125, "learning_rate": 1.9980387872465834e-05, "loss": 1.0423, "step": 1977 }, { "epoch": 0.3229255948736786, "grad_norm": 3.231651782989502, "learning_rate": 1.9980347717430182e-05, "loss": 0.9479, "step": 1978 }, { "epoch": 0.323088853516183, "grad_norm": 3.3000285625457764, "learning_rate": 1.9980307521369086e-05, "loss": 1.0513, "step": 1979 }, { "epoch": 0.3232521121586874, "grad_norm": 3.4052820205688477, "learning_rate": 1.9980267284282718e-05, "loss": 1.2472, "step": 1980 }, { "epoch": 0.3234153708011918, "grad_norm": 3.135922431945801, "learning_rate": 1.9980227006171238e-05, "loss": 1.1011, "step": 1981 }, { "epoch": 0.3235786294436962, "grad_norm": 3.3033862113952637, "learning_rate": 1.9980186687034814e-05, "loss": 1.0034, "step": 1982 }, { "epoch": 0.3237418880862006, "grad_norm": 3.385246515274048, "learning_rate": 1.9980146326873607e-05, "loss": 1.0945, "step": 1983 }, { "epoch": 0.32390514672870496, "grad_norm": 3.7262380123138428, "learning_rate": 1.998010592568779e-05, "loss": 1.1693, "step": 1984 }, { "epoch": 0.32406840537120934, "grad_norm": 3.1355338096618652, "learning_rate": 1.9980065483477526e-05, "loss": 0.9387, "step": 1985 }, { "epoch": 0.3242316640137137, "grad_norm": 3.6558315753936768, "learning_rate": 1.9980025000242983e-05, "loss": 1.0998, "step": 1986 }, { "epoch": 0.3243949226562181, "grad_norm": 3.0909364223480225, "learning_rate": 1.997998447598432e-05, "loss": 1.0514, "step": 1987 }, { "epoch": 0.3245581812987225, "grad_norm": 3.019862413406372, "learning_rate": 1.9979943910701713e-05, "loss": 0.8473, "step": 1988 }, { "epoch": 0.3247214399412269, "grad_norm": 3.2704014778137207, "learning_rate": 1.9979903304395326e-05, "loss": 1.0541, "step": 1989 }, { "epoch": 0.32488469858373126, "grad_norm": 3.280458450317383, "learning_rate": 1.9979862657065325e-05, "loss": 1.0294, "step": 1990 }, { "epoch": 0.32504795722623564, "grad_norm": 3.1961863040924072, "learning_rate": 1.9979821968711875e-05, "loss": 1.0667, "step": 1991 }, { "epoch": 0.3252112158687401, "grad_norm": 3.3734095096588135, "learning_rate": 1.9979781239335145e-05, "loss": 0.9752, "step": 1992 }, { "epoch": 0.32537447451124446, "grad_norm": 3.8236618041992188, "learning_rate": 1.9979740468935302e-05, "loss": 0.8989, "step": 1993 }, { "epoch": 0.32553773315374884, "grad_norm": 3.7515947818756104, "learning_rate": 1.997969965751252e-05, "loss": 2.0324, "step": 1994 }, { "epoch": 0.3257009917962532, "grad_norm": 3.582848310470581, "learning_rate": 1.9979658805066954e-05, "loss": 0.9943, "step": 1995 }, { "epoch": 0.3258642504387576, "grad_norm": 4.292599201202393, "learning_rate": 1.9979617911598783e-05, "loss": 1.1077, "step": 1996 }, { "epoch": 0.326027509081262, "grad_norm": 3.4406869411468506, "learning_rate": 1.9979576977108167e-05, "loss": 0.939, "step": 1997 }, { "epoch": 0.3261907677237664, "grad_norm": 3.8744282722473145, "learning_rate": 1.9979536001595284e-05, "loss": 0.9438, "step": 1998 }, { "epoch": 0.32635402636627076, "grad_norm": 3.3434815406799316, "learning_rate": 1.9979494985060294e-05, "loss": 0.8469, "step": 1999 }, { "epoch": 0.32651728500877514, "grad_norm": 3.299182176589966, "learning_rate": 1.9979453927503366e-05, "loss": 0.8315, "step": 2000 }, { "epoch": 0.3266805436512795, "grad_norm": 4.308814525604248, "learning_rate": 1.997941282892467e-05, "loss": 1.1718, "step": 2001 }, { "epoch": 0.3268438022937839, "grad_norm": 3.9203972816467285, "learning_rate": 1.9979371689324382e-05, "loss": 1.2207, "step": 2002 }, { "epoch": 0.32700706093628834, "grad_norm": 3.293612241744995, "learning_rate": 1.997933050870266e-05, "loss": 0.9513, "step": 2003 }, { "epoch": 0.3271703195787927, "grad_norm": 3.778348445892334, "learning_rate": 1.9979289287059683e-05, "loss": 1.0736, "step": 2004 }, { "epoch": 0.3273335782212971, "grad_norm": 3.217588186264038, "learning_rate": 1.997924802439561e-05, "loss": 0.89, "step": 2005 }, { "epoch": 0.3274968368638015, "grad_norm": 3.668618679046631, "learning_rate": 1.997920672071062e-05, "loss": 1.1187, "step": 2006 }, { "epoch": 0.32766009550630587, "grad_norm": 3.265084743499756, "learning_rate": 1.997916537600488e-05, "loss": 1.0548, "step": 2007 }, { "epoch": 0.32782335414881025, "grad_norm": 3.42033314704895, "learning_rate": 1.9979123990278558e-05, "loss": 1.0538, "step": 2008 }, { "epoch": 0.32798661279131464, "grad_norm": 3.485903739929199, "learning_rate": 1.9979082563531827e-05, "loss": 1.0333, "step": 2009 }, { "epoch": 0.328149871433819, "grad_norm": 3.4959046840667725, "learning_rate": 1.9979041095764852e-05, "loss": 1.0089, "step": 2010 }, { "epoch": 0.3283131300763234, "grad_norm": 3.17093563079834, "learning_rate": 1.9978999586977814e-05, "loss": 0.8477, "step": 2011 }, { "epoch": 0.3284763887188278, "grad_norm": 3.7172322273254395, "learning_rate": 1.9978958037170867e-05, "loss": 1.2119, "step": 2012 }, { "epoch": 0.32863964736133217, "grad_norm": 3.18830943107605, "learning_rate": 1.9978916446344198e-05, "loss": 0.8947, "step": 2013 }, { "epoch": 0.3288029060038366, "grad_norm": 4.462177753448486, "learning_rate": 1.997887481449797e-05, "loss": 1.0697, "step": 2014 }, { "epoch": 0.328966164646341, "grad_norm": 4.122891426086426, "learning_rate": 1.9978833141632355e-05, "loss": 1.0419, "step": 2015 }, { "epoch": 0.32912942328884537, "grad_norm": 4.023115158081055, "learning_rate": 1.9978791427747526e-05, "loss": 1.183, "step": 2016 }, { "epoch": 0.32929268193134975, "grad_norm": 3.532216787338257, "learning_rate": 1.997874967284365e-05, "loss": 0.965, "step": 2017 }, { "epoch": 0.32945594057385413, "grad_norm": 3.735217332839966, "learning_rate": 1.9978707876920906e-05, "loss": 1.1213, "step": 2018 }, { "epoch": 0.3296191992163585, "grad_norm": 3.466926336288452, "learning_rate": 1.9978666039979463e-05, "loss": 1.1045, "step": 2019 }, { "epoch": 0.3297824578588629, "grad_norm": 3.564243793487549, "learning_rate": 1.9978624162019487e-05, "loss": 1.0167, "step": 2020 }, { "epoch": 0.3299457165013673, "grad_norm": 3.869558811187744, "learning_rate": 1.997858224304116e-05, "loss": 1.1748, "step": 2021 }, { "epoch": 0.33010897514387166, "grad_norm": 4.1309590339660645, "learning_rate": 1.9978540283044646e-05, "loss": 1.1109, "step": 2022 }, { "epoch": 0.33027223378637605, "grad_norm": 2.8148019313812256, "learning_rate": 1.997849828203012e-05, "loss": 0.8692, "step": 2023 }, { "epoch": 0.33043549242888043, "grad_norm": 3.153322219848633, "learning_rate": 1.9978456239997758e-05, "loss": 0.8473, "step": 2024 }, { "epoch": 0.33059875107138487, "grad_norm": 3.2847230434417725, "learning_rate": 1.9978414156947727e-05, "loss": 1.0292, "step": 2025 }, { "epoch": 0.33076200971388925, "grad_norm": 3.360165596008301, "learning_rate": 1.9978372032880208e-05, "loss": 0.9622, "step": 2026 }, { "epoch": 0.33092526835639363, "grad_norm": 3.512207269668579, "learning_rate": 1.9978329867795365e-05, "loss": 1.1216, "step": 2027 }, { "epoch": 0.331088526998898, "grad_norm": 3.313732385635376, "learning_rate": 1.9978287661693376e-05, "loss": 0.9278, "step": 2028 }, { "epoch": 0.3312517856414024, "grad_norm": 3.702655792236328, "learning_rate": 1.997824541457442e-05, "loss": 1.053, "step": 2029 }, { "epoch": 0.3314150442839068, "grad_norm": 3.5106091499328613, "learning_rate": 1.9978203126438654e-05, "loss": 0.9849, "step": 2030 }, { "epoch": 0.33157830292641116, "grad_norm": 3.3770811557769775, "learning_rate": 1.9978160797286274e-05, "loss": 0.9346, "step": 2031 }, { "epoch": 0.33174156156891554, "grad_norm": 3.4151968955993652, "learning_rate": 1.9978118427117435e-05, "loss": 1.0294, "step": 2032 }, { "epoch": 0.3319048202114199, "grad_norm": 3.7458906173706055, "learning_rate": 1.9978076015932323e-05, "loss": 0.829, "step": 2033 }, { "epoch": 0.3320680788539243, "grad_norm": 3.549246072769165, "learning_rate": 1.9978033563731104e-05, "loss": 1.0322, "step": 2034 }, { "epoch": 0.3322313374964287, "grad_norm": 3.3581788539886475, "learning_rate": 1.997799107051396e-05, "loss": 1.0437, "step": 2035 }, { "epoch": 0.33239459613893313, "grad_norm": 3.2784817218780518, "learning_rate": 1.997794853628106e-05, "loss": 1.1021, "step": 2036 }, { "epoch": 0.3325578547814375, "grad_norm": 3.462843418121338, "learning_rate": 1.997790596103258e-05, "loss": 1.0815, "step": 2037 }, { "epoch": 0.3327211134239419, "grad_norm": 3.2786238193511963, "learning_rate": 1.99778633447687e-05, "loss": 0.9654, "step": 2038 }, { "epoch": 0.3328843720664463, "grad_norm": 3.9124388694763184, "learning_rate": 1.997782068748959e-05, "loss": 1.0694, "step": 2039 }, { "epoch": 0.33304763070895066, "grad_norm": 3.529202699661255, "learning_rate": 1.9977777989195428e-05, "loss": 0.9518, "step": 2040 }, { "epoch": 0.33321088935145504, "grad_norm": 3.244601011276245, "learning_rate": 1.9977735249886387e-05, "loss": 0.956, "step": 2041 }, { "epoch": 0.3333741479939594, "grad_norm": 3.2972824573516846, "learning_rate": 1.9977692469562643e-05, "loss": 0.8839, "step": 2042 }, { "epoch": 0.3335374066364638, "grad_norm": 3.4823083877563477, "learning_rate": 1.9977649648224374e-05, "loss": 0.9598, "step": 2043 }, { "epoch": 0.3337006652789682, "grad_norm": 3.4943034648895264, "learning_rate": 1.9977606785871754e-05, "loss": 1.0193, "step": 2044 }, { "epoch": 0.33386392392147257, "grad_norm": 3.7322535514831543, "learning_rate": 1.997756388250496e-05, "loss": 1.0458, "step": 2045 }, { "epoch": 0.33402718256397695, "grad_norm": 3.1302390098571777, "learning_rate": 1.997752093812417e-05, "loss": 0.9568, "step": 2046 }, { "epoch": 0.3341904412064814, "grad_norm": 3.816169500350952, "learning_rate": 1.9977477952729557e-05, "loss": 2.2552, "step": 2047 }, { "epoch": 0.3343536998489858, "grad_norm": 4.322908401489258, "learning_rate": 1.9977434926321302e-05, "loss": 1.0778, "step": 2048 }, { "epoch": 0.33451695849149016, "grad_norm": 3.179224967956543, "learning_rate": 1.997739185889958e-05, "loss": 0.9398, "step": 2049 }, { "epoch": 0.33468021713399454, "grad_norm": 3.617117166519165, "learning_rate": 1.997734875046456e-05, "loss": 1.1772, "step": 2050 }, { "epoch": 0.3348434757764989, "grad_norm": 3.3546078205108643, "learning_rate": 1.9977305601016435e-05, "loss": 0.886, "step": 2051 }, { "epoch": 0.3350067344190033, "grad_norm": 3.3548805713653564, "learning_rate": 1.997726241055537e-05, "loss": 1.0647, "step": 2052 }, { "epoch": 0.3351699930615077, "grad_norm": 3.137936592102051, "learning_rate": 1.997721917908155e-05, "loss": 1.0058, "step": 2053 }, { "epoch": 0.33533325170401207, "grad_norm": 3.3381659984588623, "learning_rate": 1.9977175906595146e-05, "loss": 0.9911, "step": 2054 }, { "epoch": 0.33549651034651645, "grad_norm": 2.987356662750244, "learning_rate": 1.997713259309634e-05, "loss": 0.9992, "step": 2055 }, { "epoch": 0.33565976898902083, "grad_norm": 3.1822431087493896, "learning_rate": 1.9977089238585307e-05, "loss": 0.975, "step": 2056 }, { "epoch": 0.3358230276315252, "grad_norm": 3.109642267227173, "learning_rate": 1.997704584306223e-05, "loss": 1.0012, "step": 2057 }, { "epoch": 0.33598628627402966, "grad_norm": 3.2525246143341064, "learning_rate": 1.9977002406527287e-05, "loss": 1.04, "step": 2058 }, { "epoch": 0.33614954491653404, "grad_norm": 3.149958848953247, "learning_rate": 1.997695892898065e-05, "loss": 0.8327, "step": 2059 }, { "epoch": 0.3363128035590384, "grad_norm": 3.378586530685425, "learning_rate": 1.9976915410422502e-05, "loss": 1.0801, "step": 2060 }, { "epoch": 0.3364760622015428, "grad_norm": 3.809561252593994, "learning_rate": 1.9976871850853024e-05, "loss": 1.1196, "step": 2061 }, { "epoch": 0.3366393208440472, "grad_norm": 3.722630262374878, "learning_rate": 1.9976828250272393e-05, "loss": 0.9939, "step": 2062 }, { "epoch": 0.33680257948655157, "grad_norm": 3.6125056743621826, "learning_rate": 1.997678460868079e-05, "loss": 1.1361, "step": 2063 }, { "epoch": 0.33696583812905595, "grad_norm": 3.380913257598877, "learning_rate": 1.9976740926078388e-05, "loss": 1.0614, "step": 2064 }, { "epoch": 0.33712909677156033, "grad_norm": 3.363311767578125, "learning_rate": 1.9976697202465374e-05, "loss": 0.9831, "step": 2065 }, { "epoch": 0.3372923554140647, "grad_norm": 3.4521028995513916, "learning_rate": 1.9976653437841928e-05, "loss": 0.9813, "step": 2066 }, { "epoch": 0.3374556140565691, "grad_norm": 3.9176690578460693, "learning_rate": 1.997660963220822e-05, "loss": 0.998, "step": 2067 }, { "epoch": 0.3376188726990735, "grad_norm": 3.1025803089141846, "learning_rate": 1.9976565785564443e-05, "loss": 0.935, "step": 2068 }, { "epoch": 0.3377821313415779, "grad_norm": 3.2137644290924072, "learning_rate": 1.997652189791077e-05, "loss": 0.7953, "step": 2069 }, { "epoch": 0.3379453899840823, "grad_norm": 3.327969789505005, "learning_rate": 1.9976477969247382e-05, "loss": 1.1135, "step": 2070 }, { "epoch": 0.3381086486265867, "grad_norm": 3.6619949340820312, "learning_rate": 1.9976433999574454e-05, "loss": 1.1787, "step": 2071 }, { "epoch": 0.33827190726909107, "grad_norm": 3.6092307567596436, "learning_rate": 1.997638998889218e-05, "loss": 1.0361, "step": 2072 }, { "epoch": 0.33843516591159545, "grad_norm": 3.446044445037842, "learning_rate": 1.9976345937200733e-05, "loss": 0.9633, "step": 2073 }, { "epoch": 0.33859842455409983, "grad_norm": 3.4455254077911377, "learning_rate": 1.9976301844500295e-05, "loss": 1.0712, "step": 2074 }, { "epoch": 0.3387616831966042, "grad_norm": 3.302236557006836, "learning_rate": 1.9976257710791045e-05, "loss": 0.9814, "step": 2075 }, { "epoch": 0.3389249418391086, "grad_norm": 3.2056961059570312, "learning_rate": 1.997621353607317e-05, "loss": 0.9445, "step": 2076 }, { "epoch": 0.339088200481613, "grad_norm": 3.048002004623413, "learning_rate": 1.9976169320346843e-05, "loss": 0.8236, "step": 2077 }, { "epoch": 0.33925145912411736, "grad_norm": 3.479328155517578, "learning_rate": 1.9976125063612254e-05, "loss": 1.3192, "step": 2078 }, { "epoch": 0.33941471776662174, "grad_norm": 3.8140289783477783, "learning_rate": 1.997608076586958e-05, "loss": 1.0164, "step": 2079 }, { "epoch": 0.3395779764091262, "grad_norm": 3.4813880920410156, "learning_rate": 1.9976036427119008e-05, "loss": 0.9028, "step": 2080 }, { "epoch": 0.33974123505163056, "grad_norm": 3.7496981620788574, "learning_rate": 1.9975992047360714e-05, "loss": 1.0163, "step": 2081 }, { "epoch": 0.33990449369413495, "grad_norm": 3.620027780532837, "learning_rate": 1.9975947626594884e-05, "loss": 0.9583, "step": 2082 }, { "epoch": 0.34006775233663933, "grad_norm": 3.756725311279297, "learning_rate": 1.9975903164821703e-05, "loss": 1.0124, "step": 2083 }, { "epoch": 0.3402310109791437, "grad_norm": 3.6377711296081543, "learning_rate": 1.9975858662041346e-05, "loss": 0.9623, "step": 2084 }, { "epoch": 0.3403942696216481, "grad_norm": 3.538512945175171, "learning_rate": 1.9975814118254007e-05, "loss": 0.8732, "step": 2085 }, { "epoch": 0.3405575282641525, "grad_norm": 3.2928707599639893, "learning_rate": 1.9975769533459857e-05, "loss": 0.8981, "step": 2086 }, { "epoch": 0.34072078690665686, "grad_norm": 3.7443389892578125, "learning_rate": 1.9975724907659088e-05, "loss": 1.0332, "step": 2087 }, { "epoch": 0.34088404554916124, "grad_norm": 4.002806663513184, "learning_rate": 1.9975680240851882e-05, "loss": 1.1406, "step": 2088 }, { "epoch": 0.3410473041916656, "grad_norm": 3.3926565647125244, "learning_rate": 1.997563553303842e-05, "loss": 0.9288, "step": 2089 }, { "epoch": 0.34121056283417006, "grad_norm": 4.396987438201904, "learning_rate": 1.9975590784218886e-05, "loss": 0.9464, "step": 2090 }, { "epoch": 0.34137382147667444, "grad_norm": 3.5162534713745117, "learning_rate": 1.997554599439347e-05, "loss": 0.8789, "step": 2091 }, { "epoch": 0.3415370801191788, "grad_norm": 3.9992120265960693, "learning_rate": 1.9975501163562345e-05, "loss": 1.2009, "step": 2092 }, { "epoch": 0.3417003387616832, "grad_norm": 3.9611053466796875, "learning_rate": 1.9975456291725704e-05, "loss": 1.0189, "step": 2093 }, { "epoch": 0.3418635974041876, "grad_norm": 3.3937931060791016, "learning_rate": 1.9975411378883728e-05, "loss": 1.0338, "step": 2094 }, { "epoch": 0.342026856046692, "grad_norm": 3.598905086517334, "learning_rate": 1.9975366425036602e-05, "loss": 0.9614, "step": 2095 }, { "epoch": 0.34219011468919636, "grad_norm": 3.6538593769073486, "learning_rate": 1.9975321430184513e-05, "loss": 1.1042, "step": 2096 }, { "epoch": 0.34235337333170074, "grad_norm": 3.182905673980713, "learning_rate": 1.9975276394327643e-05, "loss": 0.8384, "step": 2097 }, { "epoch": 0.3425166319742051, "grad_norm": 3.98238468170166, "learning_rate": 1.997523131746618e-05, "loss": 1.2319, "step": 2098 }, { "epoch": 0.3426798906167095, "grad_norm": 3.6268229484558105, "learning_rate": 1.9975186199600305e-05, "loss": 1.1718, "step": 2099 }, { "epoch": 0.3428431492592139, "grad_norm": 3.429856538772583, "learning_rate": 1.997514104073021e-05, "loss": 1.0917, "step": 2100 }, { "epoch": 0.3430064079017183, "grad_norm": 3.2622056007385254, "learning_rate": 1.9975095840856072e-05, "loss": 0.9655, "step": 2101 }, { "epoch": 0.3431696665442227, "grad_norm": 3.3774774074554443, "learning_rate": 1.9975050599978083e-05, "loss": 0.9926, "step": 2102 }, { "epoch": 0.3433329251867271, "grad_norm": 3.573291301727295, "learning_rate": 1.9975005318096428e-05, "loss": 1.1814, "step": 2103 }, { "epoch": 0.34349618382923147, "grad_norm": 3.192711591720581, "learning_rate": 1.9974959995211294e-05, "loss": 1.0643, "step": 2104 }, { "epoch": 0.34365944247173585, "grad_norm": 3.639071226119995, "learning_rate": 1.9974914631322864e-05, "loss": 1.0705, "step": 2105 }, { "epoch": 0.34382270111424024, "grad_norm": 3.563976764678955, "learning_rate": 1.9974869226431327e-05, "loss": 0.8663, "step": 2106 }, { "epoch": 0.3439859597567446, "grad_norm": 3.6619248390197754, "learning_rate": 1.997482378053687e-05, "loss": 0.9848, "step": 2107 }, { "epoch": 0.344149218399249, "grad_norm": 3.413571357727051, "learning_rate": 1.997477829363968e-05, "loss": 1.1497, "step": 2108 }, { "epoch": 0.3443124770417534, "grad_norm": 2.8871819972991943, "learning_rate": 1.997473276573994e-05, "loss": 0.8685, "step": 2109 }, { "epoch": 0.34447573568425777, "grad_norm": 2.879568099975586, "learning_rate": 1.9974687196837838e-05, "loss": 0.8922, "step": 2110 }, { "epoch": 0.34463899432676215, "grad_norm": 3.1228692531585693, "learning_rate": 1.9974641586933567e-05, "loss": 1.0439, "step": 2111 }, { "epoch": 0.3448022529692666, "grad_norm": 3.5658340454101562, "learning_rate": 1.997459593602731e-05, "loss": 0.9309, "step": 2112 }, { "epoch": 0.34496551161177097, "grad_norm": 3.544304847717285, "learning_rate": 1.9974550244119256e-05, "loss": 1.051, "step": 2113 }, { "epoch": 0.34512877025427535, "grad_norm": 3.489992618560791, "learning_rate": 1.9974504511209592e-05, "loss": 0.9404, "step": 2114 }, { "epoch": 0.34529202889677973, "grad_norm": 3.2734148502349854, "learning_rate": 1.997445873729851e-05, "loss": 0.7826, "step": 2115 }, { "epoch": 0.3454552875392841, "grad_norm": 3.535189628601074, "learning_rate": 1.9974412922386187e-05, "loss": 1.0447, "step": 2116 }, { "epoch": 0.3456185461817885, "grad_norm": 3.260518789291382, "learning_rate": 1.997436706647282e-05, "loss": 0.9771, "step": 2117 }, { "epoch": 0.3457818048242929, "grad_norm": 3.3151144981384277, "learning_rate": 1.99743211695586e-05, "loss": 0.9312, "step": 2118 }, { "epoch": 0.34594506346679726, "grad_norm": 3.2186014652252197, "learning_rate": 1.9974275231643712e-05, "loss": 0.8771, "step": 2119 }, { "epoch": 0.34610832210930165, "grad_norm": 3.0645763874053955, "learning_rate": 1.9974229252728345e-05, "loss": 0.7761, "step": 2120 }, { "epoch": 0.34627158075180603, "grad_norm": 3.734816312789917, "learning_rate": 1.9974183232812684e-05, "loss": 0.973, "step": 2121 }, { "epoch": 0.3464348393943104, "grad_norm": 4.160892009735107, "learning_rate": 1.997413717189692e-05, "loss": 1.1636, "step": 2122 }, { "epoch": 0.34659809803681485, "grad_norm": 3.959371328353882, "learning_rate": 1.997409106998125e-05, "loss": 1.9232, "step": 2123 }, { "epoch": 0.34676135667931923, "grad_norm": 3.533341884613037, "learning_rate": 1.9974044927065857e-05, "loss": 0.9379, "step": 2124 }, { "epoch": 0.3469246153218236, "grad_norm": 3.5210797786712646, "learning_rate": 1.997399874315093e-05, "loss": 0.9858, "step": 2125 }, { "epoch": 0.347087873964328, "grad_norm": 3.419261932373047, "learning_rate": 1.9973952518236662e-05, "loss": 0.8273, "step": 2126 }, { "epoch": 0.3472511326068324, "grad_norm": 3.745011329650879, "learning_rate": 1.997390625232324e-05, "loss": 1.0629, "step": 2127 }, { "epoch": 0.34741439124933676, "grad_norm": 3.6376616954803467, "learning_rate": 1.9973859945410854e-05, "loss": 0.8525, "step": 2128 }, { "epoch": 0.34757764989184115, "grad_norm": 3.2946081161499023, "learning_rate": 1.9973813597499695e-05, "loss": 1.083, "step": 2129 }, { "epoch": 0.3477409085343455, "grad_norm": 3.700737476348877, "learning_rate": 1.997376720858996e-05, "loss": 1.029, "step": 2130 }, { "epoch": 0.3479041671768499, "grad_norm": 3.338536024093628, "learning_rate": 1.997372077868183e-05, "loss": 0.9253, "step": 2131 }, { "epoch": 0.3480674258193543, "grad_norm": 3.5673818588256836, "learning_rate": 1.99736743077755e-05, "loss": 0.9257, "step": 2132 }, { "epoch": 0.3482306844618587, "grad_norm": 3.385239362716675, "learning_rate": 1.997362779587116e-05, "loss": 1.0105, "step": 2133 }, { "epoch": 0.3483939431043631, "grad_norm": 4.137216567993164, "learning_rate": 1.9973581242969e-05, "loss": 1.2698, "step": 2134 }, { "epoch": 0.3485572017468675, "grad_norm": 3.467165231704712, "learning_rate": 1.997353464906922e-05, "loss": 0.9754, "step": 2135 }, { "epoch": 0.3487204603893719, "grad_norm": 3.064056396484375, "learning_rate": 1.9973488014172e-05, "loss": 1.0076, "step": 2136 }, { "epoch": 0.34888371903187626, "grad_norm": 3.3145322799682617, "learning_rate": 1.9973441338277542e-05, "loss": 1.019, "step": 2137 }, { "epoch": 0.34904697767438064, "grad_norm": 3.376323938369751, "learning_rate": 1.9973394621386028e-05, "loss": 0.9801, "step": 2138 }, { "epoch": 0.349210236316885, "grad_norm": 3.706911563873291, "learning_rate": 1.9973347863497658e-05, "loss": 1.9055, "step": 2139 }, { "epoch": 0.3493734949593894, "grad_norm": 3.5325560569763184, "learning_rate": 1.9973301064612615e-05, "loss": 0.8774, "step": 2140 }, { "epoch": 0.3495367536018938, "grad_norm": 3.43524432182312, "learning_rate": 1.9973254224731104e-05, "loss": 1.0312, "step": 2141 }, { "epoch": 0.3497000122443982, "grad_norm": 3.4360201358795166, "learning_rate": 1.9973207343853306e-05, "loss": 0.9775, "step": 2142 }, { "epoch": 0.34986327088690256, "grad_norm": 3.1801319122314453, "learning_rate": 1.997316042197942e-05, "loss": 0.9697, "step": 2143 }, { "epoch": 0.35002652952940694, "grad_norm": 3.2219178676605225, "learning_rate": 1.9973113459109637e-05, "loss": 0.9152, "step": 2144 }, { "epoch": 0.3501897881719114, "grad_norm": 3.477848529815674, "learning_rate": 1.997306645524415e-05, "loss": 1.0547, "step": 2145 }, { "epoch": 0.35035304681441576, "grad_norm": 3.9005165100097656, "learning_rate": 1.9973019410383155e-05, "loss": 1.0086, "step": 2146 }, { "epoch": 0.35051630545692014, "grad_norm": 3.558713912963867, "learning_rate": 1.997297232452684e-05, "loss": 1.1561, "step": 2147 }, { "epoch": 0.3506795640994245, "grad_norm": 3.2233903408050537, "learning_rate": 1.99729251976754e-05, "loss": 1.0291, "step": 2148 }, { "epoch": 0.3508428227419289, "grad_norm": 3.1193277835845947, "learning_rate": 1.9972878029829034e-05, "loss": 0.8867, "step": 2149 }, { "epoch": 0.3510060813844333, "grad_norm": 3.451944351196289, "learning_rate": 1.9972830820987933e-05, "loss": 1.0365, "step": 2150 }, { "epoch": 0.35116934002693767, "grad_norm": 3.561962366104126, "learning_rate": 1.9972783571152287e-05, "loss": 0.9571, "step": 2151 }, { "epoch": 0.35133259866944205, "grad_norm": 3.1693806648254395, "learning_rate": 1.9972736280322296e-05, "loss": 1.0329, "step": 2152 }, { "epoch": 0.35149585731194644, "grad_norm": 3.2677223682403564, "learning_rate": 1.997268894849815e-05, "loss": 0.9931, "step": 2153 }, { "epoch": 0.3516591159544508, "grad_norm": 3.6405200958251953, "learning_rate": 1.9972641575680045e-05, "loss": 1.0533, "step": 2154 }, { "epoch": 0.3518223745969552, "grad_norm": 3.000051498413086, "learning_rate": 1.997259416186818e-05, "loss": 0.8252, "step": 2155 }, { "epoch": 0.35198563323945964, "grad_norm": 3.474614381790161, "learning_rate": 1.997254670706274e-05, "loss": 1.2134, "step": 2156 }, { "epoch": 0.352148891881964, "grad_norm": 3.1840293407440186, "learning_rate": 1.9972499211263928e-05, "loss": 0.9254, "step": 2157 }, { "epoch": 0.3523121505244684, "grad_norm": 3.4303412437438965, "learning_rate": 1.997245167447194e-05, "loss": 1.0418, "step": 2158 }, { "epoch": 0.3524754091669728, "grad_norm": 2.8876843452453613, "learning_rate": 1.9972404096686967e-05, "loss": 0.8171, "step": 2159 }, { "epoch": 0.35263866780947717, "grad_norm": 3.178147077560425, "learning_rate": 1.9972356477909204e-05, "loss": 0.892, "step": 2160 }, { "epoch": 0.35280192645198155, "grad_norm": 3.515718698501587, "learning_rate": 1.997230881813885e-05, "loss": 0.898, "step": 2161 }, { "epoch": 0.35296518509448593, "grad_norm": 3.7137110233306885, "learning_rate": 1.9972261117376105e-05, "loss": 1.1051, "step": 2162 }, { "epoch": 0.3531284437369903, "grad_norm": 3.2051095962524414, "learning_rate": 1.9972213375621155e-05, "loss": 0.8742, "step": 2163 }, { "epoch": 0.3532917023794947, "grad_norm": 4.116945743560791, "learning_rate": 1.99721655928742e-05, "loss": 1.2565, "step": 2164 }, { "epoch": 0.3534549610219991, "grad_norm": 3.56455135345459, "learning_rate": 1.9972117769135438e-05, "loss": 1.0283, "step": 2165 }, { "epoch": 0.35361821966450346, "grad_norm": 3.16976261138916, "learning_rate": 1.997206990440507e-05, "loss": 0.8114, "step": 2166 }, { "epoch": 0.3537814783070079, "grad_norm": 3.733063220977783, "learning_rate": 1.9972021998683285e-05, "loss": 0.9558, "step": 2167 }, { "epoch": 0.3539447369495123, "grad_norm": 3.6225650310516357, "learning_rate": 1.9971974051970286e-05, "loss": 0.8359, "step": 2168 }, { "epoch": 0.35410799559201667, "grad_norm": 3.412644386291504, "learning_rate": 1.9971926064266262e-05, "loss": 0.9494, "step": 2169 }, { "epoch": 0.35427125423452105, "grad_norm": 3.373670816421509, "learning_rate": 1.9971878035571417e-05, "loss": 0.911, "step": 2170 }, { "epoch": 0.35443451287702543, "grad_norm": 3.7360212802886963, "learning_rate": 1.9971829965885947e-05, "loss": 1.0776, "step": 2171 }, { "epoch": 0.3545977715195298, "grad_norm": 3.5548617839813232, "learning_rate": 1.997178185521005e-05, "loss": 1.0019, "step": 2172 }, { "epoch": 0.3547610301620342, "grad_norm": 4.271081924438477, "learning_rate": 1.9971733703543924e-05, "loss": 1.1115, "step": 2173 }, { "epoch": 0.3549242888045386, "grad_norm": 3.8739328384399414, "learning_rate": 1.9971685510887762e-05, "loss": 1.0834, "step": 2174 }, { "epoch": 0.35508754744704296, "grad_norm": 3.9036030769348145, "learning_rate": 1.997163727724177e-05, "loss": 0.9761, "step": 2175 }, { "epoch": 0.35525080608954734, "grad_norm": 3.2610127925872803, "learning_rate": 1.997158900260614e-05, "loss": 0.7645, "step": 2176 }, { "epoch": 0.3554140647320517, "grad_norm": 3.5541210174560547, "learning_rate": 1.9971540686981074e-05, "loss": 1.0176, "step": 2177 }, { "epoch": 0.35557732337455616, "grad_norm": 3.076847553253174, "learning_rate": 1.997149233036677e-05, "loss": 0.8936, "step": 2178 }, { "epoch": 0.35574058201706055, "grad_norm": 3.4348199367523193, "learning_rate": 1.9971443932763426e-05, "loss": 0.9683, "step": 2179 }, { "epoch": 0.35590384065956493, "grad_norm": 3.507500648498535, "learning_rate": 1.9971395494171242e-05, "loss": 1.0362, "step": 2180 }, { "epoch": 0.3560670993020693, "grad_norm": 6.370018482208252, "learning_rate": 1.9971347014590412e-05, "loss": 0.9959, "step": 2181 }, { "epoch": 0.3562303579445737, "grad_norm": 3.3810548782348633, "learning_rate": 1.9971298494021142e-05, "loss": 1.1902, "step": 2182 }, { "epoch": 0.3563936165870781, "grad_norm": 3.070791006088257, "learning_rate": 1.997124993246363e-05, "loss": 0.8903, "step": 2183 }, { "epoch": 0.35655687522958246, "grad_norm": 3.7795701026916504, "learning_rate": 1.9971201329918075e-05, "loss": 0.8653, "step": 2184 }, { "epoch": 0.35672013387208684, "grad_norm": 3.7692973613739014, "learning_rate": 1.9971152686384677e-05, "loss": 0.9092, "step": 2185 }, { "epoch": 0.3568833925145912, "grad_norm": 3.0687732696533203, "learning_rate": 1.9971104001863632e-05, "loss": 0.9825, "step": 2186 }, { "epoch": 0.3570466511570956, "grad_norm": 3.6684420108795166, "learning_rate": 1.9971055276355144e-05, "loss": 0.8899, "step": 2187 }, { "epoch": 0.3572099097996, "grad_norm": 3.1402533054351807, "learning_rate": 1.9971006509859416e-05, "loss": 0.7295, "step": 2188 }, { "epoch": 0.3573731684421044, "grad_norm": 3.8397414684295654, "learning_rate": 1.997095770237664e-05, "loss": 0.9653, "step": 2189 }, { "epoch": 0.3575364270846088, "grad_norm": 3.9502360820770264, "learning_rate": 1.9970908853907027e-05, "loss": 1.0041, "step": 2190 }, { "epoch": 0.3576996857271132, "grad_norm": 3.9520835876464844, "learning_rate": 1.9970859964450772e-05, "loss": 1.8377, "step": 2191 }, { "epoch": 0.3578629443696176, "grad_norm": 3.2044777870178223, "learning_rate": 1.9970811034008077e-05, "loss": 0.8436, "step": 2192 }, { "epoch": 0.35802620301212196, "grad_norm": 3.5564181804656982, "learning_rate": 1.9970762062579138e-05, "loss": 1.021, "step": 2193 }, { "epoch": 0.35818946165462634, "grad_norm": 3.848295211791992, "learning_rate": 1.9970713050164165e-05, "loss": 0.9363, "step": 2194 }, { "epoch": 0.3583527202971307, "grad_norm": 3.289011001586914, "learning_rate": 1.9970663996763355e-05, "loss": 0.939, "step": 2195 }, { "epoch": 0.3585159789396351, "grad_norm": 3.3189964294433594, "learning_rate": 1.997061490237691e-05, "loss": 0.9378, "step": 2196 }, { "epoch": 0.3586792375821395, "grad_norm": 3.5453615188598633, "learning_rate": 1.9970565767005033e-05, "loss": 1.089, "step": 2197 }, { "epoch": 0.35884249622464387, "grad_norm": 2.8112034797668457, "learning_rate": 1.9970516590647924e-05, "loss": 0.6998, "step": 2198 }, { "epoch": 0.35900575486714825, "grad_norm": 3.0246524810791016, "learning_rate": 1.9970467373305788e-05, "loss": 1.0449, "step": 2199 }, { "epoch": 0.3591690135096527, "grad_norm": 3.5995583534240723, "learning_rate": 1.997041811497882e-05, "loss": 1.1121, "step": 2200 }, { "epoch": 0.3593322721521571, "grad_norm": 3.1210081577301025, "learning_rate": 1.9970368815667233e-05, "loss": 0.9362, "step": 2201 }, { "epoch": 0.35949553079466146, "grad_norm": 3.579019069671631, "learning_rate": 1.997031947537122e-05, "loss": 0.9217, "step": 2202 }, { "epoch": 0.35965878943716584, "grad_norm": 3.72670578956604, "learning_rate": 1.9970270094090994e-05, "loss": 0.8955, "step": 2203 }, { "epoch": 0.3598220480796702, "grad_norm": 3.5068795680999756, "learning_rate": 1.997022067182675e-05, "loss": 1.1827, "step": 2204 }, { "epoch": 0.3599853067221746, "grad_norm": 3.7139041423797607, "learning_rate": 1.9970171208578693e-05, "loss": 1.0506, "step": 2205 }, { "epoch": 0.360148565364679, "grad_norm": 3.8769543170928955, "learning_rate": 1.9970121704347026e-05, "loss": 1.1181, "step": 2206 }, { "epoch": 0.36031182400718337, "grad_norm": 4.268768310546875, "learning_rate": 1.9970072159131955e-05, "loss": 1.0578, "step": 2207 }, { "epoch": 0.36047508264968775, "grad_norm": 3.1962697505950928, "learning_rate": 1.997002257293368e-05, "loss": 0.811, "step": 2208 }, { "epoch": 0.36063834129219213, "grad_norm": 3.7729148864746094, "learning_rate": 1.9969972945752408e-05, "loss": 1.0233, "step": 2209 }, { "epoch": 0.3608015999346965, "grad_norm": 3.4830470085144043, "learning_rate": 1.9969923277588343e-05, "loss": 0.9199, "step": 2210 }, { "epoch": 0.36096485857720095, "grad_norm": 3.306997060775757, "learning_rate": 1.9969873568441686e-05, "loss": 0.9171, "step": 2211 }, { "epoch": 0.36112811721970534, "grad_norm": 3.6001813411712646, "learning_rate": 1.9969823818312645e-05, "loss": 1.1352, "step": 2212 }, { "epoch": 0.3612913758622097, "grad_norm": 3.723010540008545, "learning_rate": 1.996977402720142e-05, "loss": 0.9704, "step": 2213 }, { "epoch": 0.3614546345047141, "grad_norm": 3.134044885635376, "learning_rate": 1.996972419510822e-05, "loss": 0.838, "step": 2214 }, { "epoch": 0.3616178931472185, "grad_norm": 3.618777275085449, "learning_rate": 1.996967432203325e-05, "loss": 1.1295, "step": 2215 }, { "epoch": 0.36178115178972287, "grad_norm": 2.9090964794158936, "learning_rate": 1.9969624407976714e-05, "loss": 0.7967, "step": 2216 }, { "epoch": 0.36194441043222725, "grad_norm": 3.68804931640625, "learning_rate": 1.9969574452938812e-05, "loss": 1.0067, "step": 2217 }, { "epoch": 0.36210766907473163, "grad_norm": 3.355421781539917, "learning_rate": 1.9969524456919758e-05, "loss": 1.0212, "step": 2218 }, { "epoch": 0.362270927717236, "grad_norm": 3.931964159011841, "learning_rate": 1.9969474419919753e-05, "loss": 1.1488, "step": 2219 }, { "epoch": 0.3624341863597404, "grad_norm": 3.847097635269165, "learning_rate": 1.9969424341939e-05, "loss": 1.1573, "step": 2220 }, { "epoch": 0.36259744500224483, "grad_norm": 3.53804349899292, "learning_rate": 1.996937422297771e-05, "loss": 0.998, "step": 2221 }, { "epoch": 0.3627607036447492, "grad_norm": 3.147508382797241, "learning_rate": 1.9969324063036088e-05, "loss": 1.0093, "step": 2222 }, { "epoch": 0.3629239622872536, "grad_norm": 3.2707126140594482, "learning_rate": 1.9969273862114337e-05, "loss": 0.9376, "step": 2223 }, { "epoch": 0.363087220929758, "grad_norm": 3.185176134109497, "learning_rate": 1.9969223620212667e-05, "loss": 0.8746, "step": 2224 }, { "epoch": 0.36325047957226236, "grad_norm": 3.8207509517669678, "learning_rate": 1.9969173337331283e-05, "loss": 1.0751, "step": 2225 }, { "epoch": 0.36341373821476675, "grad_norm": 4.230364799499512, "learning_rate": 1.996912301347039e-05, "loss": 1.2639, "step": 2226 }, { "epoch": 0.36357699685727113, "grad_norm": 3.624915838241577, "learning_rate": 1.99690726486302e-05, "loss": 0.9725, "step": 2227 }, { "epoch": 0.3637402554997755, "grad_norm": 3.7202093601226807, "learning_rate": 1.996902224281091e-05, "loss": 1.0078, "step": 2228 }, { "epoch": 0.3639035141422799, "grad_norm": 3.762894868850708, "learning_rate": 1.996897179601274e-05, "loss": 0.938, "step": 2229 }, { "epoch": 0.3640667727847843, "grad_norm": 3.199633836746216, "learning_rate": 1.9968921308235888e-05, "loss": 1.0532, "step": 2230 }, { "epoch": 0.36423003142728866, "grad_norm": 3.197383403778076, "learning_rate": 1.9968870779480568e-05, "loss": 0.9963, "step": 2231 }, { "epoch": 0.3643932900697931, "grad_norm": 3.6842095851898193, "learning_rate": 1.9968820209746983e-05, "loss": 1.0097, "step": 2232 }, { "epoch": 0.3645565487122975, "grad_norm": 3.3016810417175293, "learning_rate": 1.996876959903534e-05, "loss": 1.0443, "step": 2233 }, { "epoch": 0.36471980735480186, "grad_norm": 2.892435073852539, "learning_rate": 1.996871894734585e-05, "loss": 1.0825, "step": 2234 }, { "epoch": 0.36488306599730624, "grad_norm": 3.5682287216186523, "learning_rate": 1.996866825467872e-05, "loss": 0.9029, "step": 2235 }, { "epoch": 0.3650463246398106, "grad_norm": 3.012126922607422, "learning_rate": 1.9968617521034164e-05, "loss": 1.0623, "step": 2236 }, { "epoch": 0.365209583282315, "grad_norm": 3.373264789581299, "learning_rate": 1.996856674641238e-05, "loss": 1.0354, "step": 2237 }, { "epoch": 0.3653728419248194, "grad_norm": 3.3601880073547363, "learning_rate": 1.9968515930813587e-05, "loss": 0.9117, "step": 2238 }, { "epoch": 0.3655361005673238, "grad_norm": 3.084181785583496, "learning_rate": 1.9968465074237984e-05, "loss": 0.8506, "step": 2239 }, { "epoch": 0.36569935920982816, "grad_norm": 3.3099095821380615, "learning_rate": 1.996841417668579e-05, "loss": 0.9264, "step": 2240 }, { "epoch": 0.36586261785233254, "grad_norm": 2.95800518989563, "learning_rate": 1.9968363238157203e-05, "loss": 0.8442, "step": 2241 }, { "epoch": 0.3660258764948369, "grad_norm": 3.096963405609131, "learning_rate": 1.9968312258652443e-05, "loss": 1.0739, "step": 2242 }, { "epoch": 0.36618913513734136, "grad_norm": 3.4837582111358643, "learning_rate": 1.9968261238171716e-05, "loss": 0.9403, "step": 2243 }, { "epoch": 0.36635239377984574, "grad_norm": 3.6408629417419434, "learning_rate": 1.9968210176715227e-05, "loss": 1.2604, "step": 2244 }, { "epoch": 0.3665156524223501, "grad_norm": 3.7152750492095947, "learning_rate": 1.9968159074283195e-05, "loss": 1.2473, "step": 2245 }, { "epoch": 0.3666789110648545, "grad_norm": 3.725308418273926, "learning_rate": 1.996810793087582e-05, "loss": 1.0339, "step": 2246 }, { "epoch": 0.3668421697073589, "grad_norm": 4.115779876708984, "learning_rate": 1.9968056746493324e-05, "loss": 1.1086, "step": 2247 }, { "epoch": 0.36700542834986327, "grad_norm": 3.1537301540374756, "learning_rate": 1.9968005521135906e-05, "loss": 0.8691, "step": 2248 }, { "epoch": 0.36716868699236765, "grad_norm": 3.5511419773101807, "learning_rate": 1.9967954254803777e-05, "loss": 1.0625, "step": 2249 }, { "epoch": 0.36733194563487204, "grad_norm": 3.047214984893799, "learning_rate": 1.9967902947497158e-05, "loss": 0.8753, "step": 2250 }, { "epoch": 0.3674952042773764, "grad_norm": 3.680708408355713, "learning_rate": 1.9967851599216252e-05, "loss": 0.9521, "step": 2251 }, { "epoch": 0.3676584629198808, "grad_norm": 3.857689142227173, "learning_rate": 1.9967800209961272e-05, "loss": 1.0963, "step": 2252 }, { "epoch": 0.3678217215623852, "grad_norm": 2.779219150543213, "learning_rate": 1.996774877973243e-05, "loss": 0.6802, "step": 2253 }, { "epoch": 0.3679849802048896, "grad_norm": 4.312717914581299, "learning_rate": 1.9967697308529935e-05, "loss": 1.0682, "step": 2254 }, { "epoch": 0.368148238847394, "grad_norm": 3.5425052642822266, "learning_rate": 1.9967645796354002e-05, "loss": 0.9518, "step": 2255 }, { "epoch": 0.3683114974898984, "grad_norm": 3.091992139816284, "learning_rate": 1.996759424320484e-05, "loss": 0.9203, "step": 2256 }, { "epoch": 0.36847475613240277, "grad_norm": 3.4598350524902344, "learning_rate": 1.996754264908266e-05, "loss": 0.9689, "step": 2257 }, { "epoch": 0.36863801477490715, "grad_norm": 3.444434404373169, "learning_rate": 1.9967491013987675e-05, "loss": 0.9404, "step": 2258 }, { "epoch": 0.36880127341741153, "grad_norm": 3.566096544265747, "learning_rate": 1.9967439337920103e-05, "loss": 1.9833, "step": 2259 }, { "epoch": 0.3689645320599159, "grad_norm": 3.421224594116211, "learning_rate": 1.9967387620880147e-05, "loss": 0.8542, "step": 2260 }, { "epoch": 0.3691277907024203, "grad_norm": 3.7864770889282227, "learning_rate": 1.996733586286803e-05, "loss": 0.9513, "step": 2261 }, { "epoch": 0.3692910493449247, "grad_norm": 3.74355149269104, "learning_rate": 1.996728406388395e-05, "loss": 1.1115, "step": 2262 }, { "epoch": 0.36945430798742906, "grad_norm": 3.2287514209747314, "learning_rate": 1.9967232223928134e-05, "loss": 1.0294, "step": 2263 }, { "epoch": 0.36961756662993345, "grad_norm": 3.39884352684021, "learning_rate": 1.996718034300079e-05, "loss": 0.86, "step": 2264 }, { "epoch": 0.3697808252724379, "grad_norm": 3.975569009780884, "learning_rate": 1.9967128421102132e-05, "loss": 1.097, "step": 2265 }, { "epoch": 0.36994408391494227, "grad_norm": 3.5087521076202393, "learning_rate": 1.996707645823237e-05, "loss": 1.0158, "step": 2266 }, { "epoch": 0.37010734255744665, "grad_norm": 3.4261815547943115, "learning_rate": 1.9967024454391722e-05, "loss": 0.9727, "step": 2267 }, { "epoch": 0.37027060119995103, "grad_norm": 3.5623714923858643, "learning_rate": 1.99669724095804e-05, "loss": 1.1072, "step": 2268 }, { "epoch": 0.3704338598424554, "grad_norm": 3.3089585304260254, "learning_rate": 1.9966920323798617e-05, "loss": 0.9196, "step": 2269 }, { "epoch": 0.3705971184849598, "grad_norm": 3.2244184017181396, "learning_rate": 1.996686819704659e-05, "loss": 0.9685, "step": 2270 }, { "epoch": 0.3707603771274642, "grad_norm": 3.6311936378479004, "learning_rate": 1.996681602932453e-05, "loss": 0.7681, "step": 2271 }, { "epoch": 0.37092363576996856, "grad_norm": 3.1358418464660645, "learning_rate": 1.996676382063265e-05, "loss": 0.8, "step": 2272 }, { "epoch": 0.37108689441247295, "grad_norm": 3.4825212955474854, "learning_rate": 1.9966711570971167e-05, "loss": 0.9772, "step": 2273 }, { "epoch": 0.3712501530549773, "grad_norm": 3.396693468093872, "learning_rate": 1.99666592803403e-05, "loss": 1.0015, "step": 2274 }, { "epoch": 0.3714134116974817, "grad_norm": 3.362931966781616, "learning_rate": 1.9966606948740258e-05, "loss": 0.9765, "step": 2275 }, { "epoch": 0.37157667033998615, "grad_norm": 3.2471845149993896, "learning_rate": 1.996655457617126e-05, "loss": 0.8812, "step": 2276 }, { "epoch": 0.37173992898249053, "grad_norm": 3.157592535018921, "learning_rate": 1.996650216263352e-05, "loss": 0.7573, "step": 2277 }, { "epoch": 0.3719031876249949, "grad_norm": 3.285759925842285, "learning_rate": 1.996644970812725e-05, "loss": 0.9783, "step": 2278 }, { "epoch": 0.3720664462674993, "grad_norm": 3.370115041732788, "learning_rate": 1.9966397212652667e-05, "loss": 0.9411, "step": 2279 }, { "epoch": 0.3722297049100037, "grad_norm": 3.309525728225708, "learning_rate": 1.9966344676209993e-05, "loss": 0.9401, "step": 2280 }, { "epoch": 0.37239296355250806, "grad_norm": 2.9801814556121826, "learning_rate": 1.9966292098799438e-05, "loss": 0.9503, "step": 2281 }, { "epoch": 0.37255622219501244, "grad_norm": 3.5182197093963623, "learning_rate": 1.996623948042122e-05, "loss": 0.9862, "step": 2282 }, { "epoch": 0.3727194808375168, "grad_norm": 3.4305574893951416, "learning_rate": 1.9966186821075552e-05, "loss": 0.8607, "step": 2283 }, { "epoch": 0.3728827394800212, "grad_norm": 3.7066266536712646, "learning_rate": 1.996613412076265e-05, "loss": 1.1806, "step": 2284 }, { "epoch": 0.3730459981225256, "grad_norm": 3.705768585205078, "learning_rate": 1.9966081379482744e-05, "loss": 1.0696, "step": 2285 }, { "epoch": 0.37320925676503, "grad_norm": 3.29054594039917, "learning_rate": 1.9966028597236034e-05, "loss": 0.9184, "step": 2286 }, { "epoch": 0.3733725154075344, "grad_norm": 3.4716556072235107, "learning_rate": 1.9965975774022742e-05, "loss": 0.9295, "step": 2287 }, { "epoch": 0.3735357740500388, "grad_norm": 3.520007371902466, "learning_rate": 1.9965922909843088e-05, "loss": 0.9852, "step": 2288 }, { "epoch": 0.3736990326925432, "grad_norm": 3.5189883708953857, "learning_rate": 1.996587000469729e-05, "loss": 1.1887, "step": 2289 }, { "epoch": 0.37386229133504756, "grad_norm": 2.8894197940826416, "learning_rate": 1.9965817058585563e-05, "loss": 0.9575, "step": 2290 }, { "epoch": 0.37402554997755194, "grad_norm": 2.996992588043213, "learning_rate": 1.9965764071508123e-05, "loss": 0.8984, "step": 2291 }, { "epoch": 0.3741888086200563, "grad_norm": 3.2609004974365234, "learning_rate": 1.9965711043465195e-05, "loss": 0.8865, "step": 2292 }, { "epoch": 0.3743520672625607, "grad_norm": 3.175701141357422, "learning_rate": 1.996565797445699e-05, "loss": 1.1211, "step": 2293 }, { "epoch": 0.3745153259050651, "grad_norm": 3.761770248413086, "learning_rate": 1.9965604864483725e-05, "loss": 1.1928, "step": 2294 }, { "epoch": 0.37467858454756947, "grad_norm": 3.600050210952759, "learning_rate": 1.996555171354562e-05, "loss": 1.0297, "step": 2295 }, { "epoch": 0.37484184319007385, "grad_norm": 3.2972171306610107, "learning_rate": 1.9965498521642898e-05, "loss": 0.9899, "step": 2296 }, { "epoch": 0.37500510183257824, "grad_norm": 3.533785104751587, "learning_rate": 1.9965445288775775e-05, "loss": 0.97, "step": 2297 }, { "epoch": 0.3751683604750827, "grad_norm": 2.861572027206421, "learning_rate": 1.996539201494447e-05, "loss": 0.9199, "step": 2298 }, { "epoch": 0.37533161911758706, "grad_norm": 3.4344868659973145, "learning_rate": 1.99653387001492e-05, "loss": 0.8583, "step": 2299 }, { "epoch": 0.37549487776009144, "grad_norm": 3.861640214920044, "learning_rate": 1.9965285344390185e-05, "loss": 1.0681, "step": 2300 }, { "epoch": 0.3756581364025958, "grad_norm": 3.148479700088501, "learning_rate": 1.9965231947667646e-05, "loss": 0.8899, "step": 2301 }, { "epoch": 0.3758213950451002, "grad_norm": 3.2508468627929688, "learning_rate": 1.9965178509981796e-05, "loss": 0.9352, "step": 2302 }, { "epoch": 0.3759846536876046, "grad_norm": 3.04530668258667, "learning_rate": 1.9965125031332865e-05, "loss": 0.8839, "step": 2303 }, { "epoch": 0.37614791233010897, "grad_norm": 4.074849605560303, "learning_rate": 1.9965071511721067e-05, "loss": 1.0697, "step": 2304 }, { "epoch": 0.37631117097261335, "grad_norm": 3.569883108139038, "learning_rate": 1.9965017951146625e-05, "loss": 0.872, "step": 2305 }, { "epoch": 0.37647442961511773, "grad_norm": 4.3187947273254395, "learning_rate": 1.9964964349609753e-05, "loss": 1.1762, "step": 2306 }, { "epoch": 0.3766376882576221, "grad_norm": 3.1148903369903564, "learning_rate": 1.9964910707110677e-05, "loss": 1.0204, "step": 2307 }, { "epoch": 0.3768009469001265, "grad_norm": 3.4809975624084473, "learning_rate": 1.9964857023649616e-05, "loss": 0.9527, "step": 2308 }, { "epoch": 0.37696420554263094, "grad_norm": 3.2348177433013916, "learning_rate": 1.996480329922679e-05, "loss": 0.9718, "step": 2309 }, { "epoch": 0.3771274641851353, "grad_norm": 3.1776599884033203, "learning_rate": 1.996474953384242e-05, "loss": 0.832, "step": 2310 }, { "epoch": 0.3772907228276397, "grad_norm": 3.484572410583496, "learning_rate": 1.996469572749673e-05, "loss": 0.9239, "step": 2311 }, { "epoch": 0.3774539814701441, "grad_norm": 3.64207124710083, "learning_rate": 1.9964641880189934e-05, "loss": 1.0516, "step": 2312 }, { "epoch": 0.37761724011264847, "grad_norm": 3.9054813385009766, "learning_rate": 1.996458799192226e-05, "loss": 1.0388, "step": 2313 }, { "epoch": 0.37778049875515285, "grad_norm": 2.970759391784668, "learning_rate": 1.9964534062693928e-05, "loss": 0.9649, "step": 2314 }, { "epoch": 0.37794375739765723, "grad_norm": 3.45681095123291, "learning_rate": 1.9964480092505156e-05, "loss": 1.0718, "step": 2315 }, { "epoch": 0.3781070160401616, "grad_norm": 3.7501604557037354, "learning_rate": 1.9964426081356172e-05, "loss": 0.9761, "step": 2316 }, { "epoch": 0.378270274682666, "grad_norm": 3.257992744445801, "learning_rate": 1.9964372029247195e-05, "loss": 0.8783, "step": 2317 }, { "epoch": 0.3784335333251704, "grad_norm": 3.4480981826782227, "learning_rate": 1.9964317936178446e-05, "loss": 0.9036, "step": 2318 }, { "epoch": 0.37859679196767476, "grad_norm": 3.323176622390747, "learning_rate": 1.996426380215015e-05, "loss": 0.8859, "step": 2319 }, { "epoch": 0.3787600506101792, "grad_norm": 3.653935194015503, "learning_rate": 1.9964209627162526e-05, "loss": 0.8501, "step": 2320 }, { "epoch": 0.3789233092526836, "grad_norm": 3.726771593093872, "learning_rate": 1.9964155411215798e-05, "loss": 1.0376, "step": 2321 }, { "epoch": 0.37908656789518796, "grad_norm": 3.121187686920166, "learning_rate": 1.9964101154310193e-05, "loss": 0.8315, "step": 2322 }, { "epoch": 0.37924982653769235, "grad_norm": 5.122281551361084, "learning_rate": 1.9964046856445926e-05, "loss": 0.9846, "step": 2323 }, { "epoch": 0.37941308518019673, "grad_norm": 3.5709595680236816, "learning_rate": 1.9963992517623225e-05, "loss": 0.9991, "step": 2324 }, { "epoch": 0.3795763438227011, "grad_norm": 3.5727572441101074, "learning_rate": 1.9963938137842314e-05, "loss": 0.9611, "step": 2325 }, { "epoch": 0.3797396024652055, "grad_norm": 3.410764694213867, "learning_rate": 1.996388371710342e-05, "loss": 1.0007, "step": 2326 }, { "epoch": 0.3799028611077099, "grad_norm": 3.29689884185791, "learning_rate": 1.996382925540676e-05, "loss": 0.9762, "step": 2327 }, { "epoch": 0.38006611975021426, "grad_norm": 3.254183292388916, "learning_rate": 1.9963774752752555e-05, "loss": 0.9887, "step": 2328 }, { "epoch": 0.38022937839271864, "grad_norm": 3.198925733566284, "learning_rate": 1.996372020914104e-05, "loss": 1.0029, "step": 2329 }, { "epoch": 0.380392637035223, "grad_norm": 3.1337783336639404, "learning_rate": 1.9963665624572433e-05, "loss": 0.8459, "step": 2330 }, { "epoch": 0.38055589567772746, "grad_norm": 3.460981607437134, "learning_rate": 1.9963610999046956e-05, "loss": 1.0864, "step": 2331 }, { "epoch": 0.38071915432023185, "grad_norm": 3.238992691040039, "learning_rate": 1.996355633256484e-05, "loss": 0.9829, "step": 2332 }, { "epoch": 0.3808824129627362, "grad_norm": 3.6259710788726807, "learning_rate": 1.9963501625126303e-05, "loss": 0.9615, "step": 2333 }, { "epoch": 0.3810456716052406, "grad_norm": 4.60214900970459, "learning_rate": 1.9963446876731574e-05, "loss": 1.1958, "step": 2334 }, { "epoch": 0.381208930247745, "grad_norm": 3.475637435913086, "learning_rate": 1.996339208738088e-05, "loss": 0.9347, "step": 2335 }, { "epoch": 0.3813721888902494, "grad_norm": 3.7668728828430176, "learning_rate": 1.996333725707444e-05, "loss": 1.3081, "step": 2336 }, { "epoch": 0.38153544753275376, "grad_norm": 3.316436529159546, "learning_rate": 1.9963282385812482e-05, "loss": 0.9377, "step": 2337 }, { "epoch": 0.38169870617525814, "grad_norm": 3.761495351791382, "learning_rate": 1.9963227473595233e-05, "loss": 1.1174, "step": 2338 }, { "epoch": 0.3818619648177625, "grad_norm": 3.890545129776001, "learning_rate": 1.9963172520422917e-05, "loss": 1.2107, "step": 2339 }, { "epoch": 0.3820252234602669, "grad_norm": 3.1609771251678467, "learning_rate": 1.996311752629576e-05, "loss": 0.8758, "step": 2340 }, { "epoch": 0.38218848210277134, "grad_norm": 3.532468795776367, "learning_rate": 1.9963062491213994e-05, "loss": 1.0359, "step": 2341 }, { "epoch": 0.3823517407452757, "grad_norm": 3.4111671447753906, "learning_rate": 1.9963007415177835e-05, "loss": 1.0369, "step": 2342 }, { "epoch": 0.3825149993877801, "grad_norm": 3.840258836746216, "learning_rate": 1.9962952298187518e-05, "loss": 1.3407, "step": 2343 }, { "epoch": 0.3826782580302845, "grad_norm": 3.158590793609619, "learning_rate": 1.9962897140243265e-05, "loss": 0.9103, "step": 2344 }, { "epoch": 0.3828415166727889, "grad_norm": 2.9901788234710693, "learning_rate": 1.9962841941345302e-05, "loss": 1.0201, "step": 2345 }, { "epoch": 0.38300477531529326, "grad_norm": 3.1711809635162354, "learning_rate": 1.996278670149386e-05, "loss": 0.8891, "step": 2346 }, { "epoch": 0.38316803395779764, "grad_norm": 3.121959924697876, "learning_rate": 1.996273142068916e-05, "loss": 0.8048, "step": 2347 }, { "epoch": 0.383331292600302, "grad_norm": 3.6887106895446777, "learning_rate": 1.9962676098931438e-05, "loss": 0.9706, "step": 2348 }, { "epoch": 0.3834945512428064, "grad_norm": 3.484344720840454, "learning_rate": 1.9962620736220915e-05, "loss": 0.9544, "step": 2349 }, { "epoch": 0.3836578098853108, "grad_norm": 3.71089768409729, "learning_rate": 1.996256533255782e-05, "loss": 0.8525, "step": 2350 }, { "epoch": 0.38382106852781517, "grad_norm": 3.5368945598602295, "learning_rate": 1.9962509887942377e-05, "loss": 1.1435, "step": 2351 }, { "epoch": 0.3839843271703196, "grad_norm": 3.2129812240600586, "learning_rate": 1.996245440237482e-05, "loss": 0.9213, "step": 2352 }, { "epoch": 0.384147585812824, "grad_norm": 2.864978790283203, "learning_rate": 1.9962398875855376e-05, "loss": 0.7765, "step": 2353 }, { "epoch": 0.38431084445532837, "grad_norm": 3.13179087638855, "learning_rate": 1.996234330838427e-05, "loss": 0.9587, "step": 2354 }, { "epoch": 0.38447410309783275, "grad_norm": 3.8382766246795654, "learning_rate": 1.9962287699961737e-05, "loss": 1.1073, "step": 2355 }, { "epoch": 0.38463736174033714, "grad_norm": 3.2985219955444336, "learning_rate": 1.9962232050587997e-05, "loss": 1.1073, "step": 2356 }, { "epoch": 0.3848006203828415, "grad_norm": 4.045105457305908, "learning_rate": 1.996217636026328e-05, "loss": 1.155, "step": 2357 }, { "epoch": 0.3849638790253459, "grad_norm": 3.9622092247009277, "learning_rate": 1.9962120628987823e-05, "loss": 1.1522, "step": 2358 }, { "epoch": 0.3851271376678503, "grad_norm": 3.233793258666992, "learning_rate": 1.9962064856761847e-05, "loss": 1.0573, "step": 2359 }, { "epoch": 0.38529039631035467, "grad_norm": 3.160651445388794, "learning_rate": 1.9962009043585586e-05, "loss": 1.102, "step": 2360 }, { "epoch": 0.38545365495285905, "grad_norm": 3.014387369155884, "learning_rate": 1.9961953189459265e-05, "loss": 0.9386, "step": 2361 }, { "epoch": 0.38561691359536343, "grad_norm": 3.362393617630005, "learning_rate": 1.9961897294383114e-05, "loss": 1.2509, "step": 2362 }, { "epoch": 0.38578017223786787, "grad_norm": 3.2298567295074463, "learning_rate": 1.996184135835737e-05, "loss": 1.0489, "step": 2363 }, { "epoch": 0.38594343088037225, "grad_norm": 3.2482972145080566, "learning_rate": 1.9961785381382254e-05, "loss": 1.0604, "step": 2364 }, { "epoch": 0.38610668952287663, "grad_norm": 3.322331428527832, "learning_rate": 1.9961729363458e-05, "loss": 1.2344, "step": 2365 }, { "epoch": 0.386269948165381, "grad_norm": 3.2658233642578125, "learning_rate": 1.996167330458484e-05, "loss": 1.041, "step": 2366 }, { "epoch": 0.3864332068078854, "grad_norm": 4.352099418640137, "learning_rate": 1.9961617204763004e-05, "loss": 1.159, "step": 2367 }, { "epoch": 0.3865964654503898, "grad_norm": 3.4586665630340576, "learning_rate": 1.9961561063992718e-05, "loss": 0.9309, "step": 2368 }, { "epoch": 0.38675972409289416, "grad_norm": 3.718782901763916, "learning_rate": 1.9961504882274217e-05, "loss": 0.982, "step": 2369 }, { "epoch": 0.38692298273539855, "grad_norm": 3.1593873500823975, "learning_rate": 1.996144865960773e-05, "loss": 1.0898, "step": 2370 }, { "epoch": 0.38708624137790293, "grad_norm": 3.499363422393799, "learning_rate": 1.996139239599349e-05, "loss": 1.0153, "step": 2371 }, { "epoch": 0.3872495000204073, "grad_norm": 4.113864898681641, "learning_rate": 1.9961336091431728e-05, "loss": 0.9384, "step": 2372 }, { "epoch": 0.3874127586629117, "grad_norm": 4.067612171173096, "learning_rate": 1.996127974592267e-05, "loss": 1.1205, "step": 2373 }, { "epoch": 0.38757601730541613, "grad_norm": 3.0960144996643066, "learning_rate": 1.9961223359466557e-05, "loss": 0.8776, "step": 2374 }, { "epoch": 0.3877392759479205, "grad_norm": 3.312964677810669, "learning_rate": 1.9961166932063615e-05, "loss": 0.862, "step": 2375 }, { "epoch": 0.3879025345904249, "grad_norm": 3.6816565990448, "learning_rate": 1.9961110463714077e-05, "loss": 1.0609, "step": 2376 }, { "epoch": 0.3880657932329293, "grad_norm": 3.5216424465179443, "learning_rate": 1.9961053954418174e-05, "loss": 1.1403, "step": 2377 }, { "epoch": 0.38822905187543366, "grad_norm": 3.6762893199920654, "learning_rate": 1.9960997404176142e-05, "loss": 1.275, "step": 2378 }, { "epoch": 0.38839231051793804, "grad_norm": 3.331660509109497, "learning_rate": 1.996094081298821e-05, "loss": 1.0484, "step": 2379 }, { "epoch": 0.3885555691604424, "grad_norm": 3.2669143676757812, "learning_rate": 1.996088418085461e-05, "loss": 0.9621, "step": 2380 }, { "epoch": 0.3887188278029468, "grad_norm": 3.0575215816497803, "learning_rate": 1.9960827507775575e-05, "loss": 0.7902, "step": 2381 }, { "epoch": 0.3888820864454512, "grad_norm": 2.733480930328369, "learning_rate": 1.9960770793751344e-05, "loss": 0.9767, "step": 2382 }, { "epoch": 0.3890453450879556, "grad_norm": 3.356322765350342, "learning_rate": 1.996071403878214e-05, "loss": 1.1637, "step": 2383 }, { "epoch": 0.38920860373045996, "grad_norm": 3.0525827407836914, "learning_rate": 1.9960657242868208e-05, "loss": 0.9934, "step": 2384 }, { "epoch": 0.3893718623729644, "grad_norm": 3.3745627403259277, "learning_rate": 1.996060040600977e-05, "loss": 1.0613, "step": 2385 }, { "epoch": 0.3895351210154688, "grad_norm": 3.0470664501190186, "learning_rate": 1.9960543528207067e-05, "loss": 0.8797, "step": 2386 }, { "epoch": 0.38969837965797316, "grad_norm": 2.8697664737701416, "learning_rate": 1.996048660946033e-05, "loss": 0.7837, "step": 2387 }, { "epoch": 0.38986163830047754, "grad_norm": 3.6553804874420166, "learning_rate": 1.996042964976979e-05, "loss": 0.9538, "step": 2388 }, { "epoch": 0.3900248969429819, "grad_norm": 4.09171724319458, "learning_rate": 1.996037264913569e-05, "loss": 1.1399, "step": 2389 }, { "epoch": 0.3901881555854863, "grad_norm": 3.640968084335327, "learning_rate": 1.9960315607558255e-05, "loss": 0.9046, "step": 2390 }, { "epoch": 0.3903514142279907, "grad_norm": 3.486224412918091, "learning_rate": 1.9960258525037727e-05, "loss": 1.1481, "step": 2391 }, { "epoch": 0.39051467287049507, "grad_norm": 3.451328754425049, "learning_rate": 1.9960201401574334e-05, "loss": 1.0313, "step": 2392 }, { "epoch": 0.39067793151299945, "grad_norm": 3.3511054515838623, "learning_rate": 1.9960144237168316e-05, "loss": 0.9681, "step": 2393 }, { "epoch": 0.39084119015550384, "grad_norm": 3.4206061363220215, "learning_rate": 1.9960087031819907e-05, "loss": 0.8174, "step": 2394 }, { "epoch": 0.3910044487980082, "grad_norm": 3.569704055786133, "learning_rate": 1.996002978552934e-05, "loss": 0.9338, "step": 2395 }, { "epoch": 0.39116770744051266, "grad_norm": 3.249037027359009, "learning_rate": 1.995997249829685e-05, "loss": 0.9368, "step": 2396 }, { "epoch": 0.39133096608301704, "grad_norm": 3.5054357051849365, "learning_rate": 1.9959915170122677e-05, "loss": 1.0218, "step": 2397 }, { "epoch": 0.3914942247255214, "grad_norm": 3.4939262866973877, "learning_rate": 1.995985780100705e-05, "loss": 0.8886, "step": 2398 }, { "epoch": 0.3916574833680258, "grad_norm": 2.8992385864257812, "learning_rate": 1.995980039095021e-05, "loss": 0.8259, "step": 2399 }, { "epoch": 0.3918207420105302, "grad_norm": 3.194441556930542, "learning_rate": 1.9959742939952393e-05, "loss": 0.8181, "step": 2400 }, { "epoch": 0.39198400065303457, "grad_norm": 3.5047552585601807, "learning_rate": 1.995968544801383e-05, "loss": 1.1411, "step": 2401 }, { "epoch": 0.39214725929553895, "grad_norm": 3.5367796421051025, "learning_rate": 1.9959627915134764e-05, "loss": 0.9393, "step": 2402 }, { "epoch": 0.39231051793804333, "grad_norm": 3.6444807052612305, "learning_rate": 1.9959570341315425e-05, "loss": 0.8898, "step": 2403 }, { "epoch": 0.3924737765805477, "grad_norm": 3.175123929977417, "learning_rate": 1.9959512726556057e-05, "loss": 0.9226, "step": 2404 }, { "epoch": 0.3926370352230521, "grad_norm": 3.4075262546539307, "learning_rate": 1.995945507085689e-05, "loss": 1.0136, "step": 2405 }, { "epoch": 0.3928002938655565, "grad_norm": 3.181309461593628, "learning_rate": 1.9959397374218166e-05, "loss": 0.8602, "step": 2406 }, { "epoch": 0.3929635525080609, "grad_norm": 3.473850965499878, "learning_rate": 1.9959339636640118e-05, "loss": 1.102, "step": 2407 }, { "epoch": 0.3931268111505653, "grad_norm": 3.405135154724121, "learning_rate": 1.9959281858122985e-05, "loss": 0.9591, "step": 2408 }, { "epoch": 0.3932900697930697, "grad_norm": 3.272139310836792, "learning_rate": 1.9959224038667006e-05, "loss": 0.8791, "step": 2409 }, { "epoch": 0.39345332843557407, "grad_norm": 3.538987636566162, "learning_rate": 1.9959166178272416e-05, "loss": 1.0193, "step": 2410 }, { "epoch": 0.39361658707807845, "grad_norm": 3.749150037765503, "learning_rate": 1.9959108276939457e-05, "loss": 1.0521, "step": 2411 }, { "epoch": 0.39377984572058283, "grad_norm": 3.410682439804077, "learning_rate": 1.9959050334668362e-05, "loss": 0.9667, "step": 2412 }, { "epoch": 0.3939431043630872, "grad_norm": 3.5052592754364014, "learning_rate": 1.9958992351459372e-05, "loss": 1.0014, "step": 2413 }, { "epoch": 0.3941063630055916, "grad_norm": 3.5710387229919434, "learning_rate": 1.9958934327312727e-05, "loss": 1.181, "step": 2414 }, { "epoch": 0.394269621648096, "grad_norm": 3.434814691543579, "learning_rate": 1.9958876262228662e-05, "loss": 0.9453, "step": 2415 }, { "epoch": 0.39443288029060036, "grad_norm": 3.455333709716797, "learning_rate": 1.9958818156207416e-05, "loss": 1.0071, "step": 2416 }, { "epoch": 0.39459613893310475, "grad_norm": 3.2552011013031006, "learning_rate": 1.995876000924923e-05, "loss": 0.9879, "step": 2417 }, { "epoch": 0.3947593975756092, "grad_norm": 3.8298165798187256, "learning_rate": 1.9958701821354342e-05, "loss": 0.9637, "step": 2418 }, { "epoch": 0.39492265621811357, "grad_norm": 3.6191537380218506, "learning_rate": 1.9958643592522992e-05, "loss": 1.2194, "step": 2419 }, { "epoch": 0.39508591486061795, "grad_norm": 3.1251938343048096, "learning_rate": 1.9958585322755418e-05, "loss": 0.8974, "step": 2420 }, { "epoch": 0.39524917350312233, "grad_norm": 3.118586301803589, "learning_rate": 1.995852701205186e-05, "loss": 0.988, "step": 2421 }, { "epoch": 0.3954124321456267, "grad_norm": 3.6988601684570312, "learning_rate": 1.9958468660412556e-05, "loss": 1.0482, "step": 2422 }, { "epoch": 0.3955756907881311, "grad_norm": 3.547192335128784, "learning_rate": 1.995841026783775e-05, "loss": 1.1061, "step": 2423 }, { "epoch": 0.3957389494306355, "grad_norm": 3.107029914855957, "learning_rate": 1.9958351834327678e-05, "loss": 0.9131, "step": 2424 }, { "epoch": 0.39590220807313986, "grad_norm": 3.4563636779785156, "learning_rate": 1.995829335988258e-05, "loss": 1.1129, "step": 2425 }, { "epoch": 0.39606546671564424, "grad_norm": 3.2393622398376465, "learning_rate": 1.9958234844502702e-05, "loss": 0.9147, "step": 2426 }, { "epoch": 0.3962287253581486, "grad_norm": 3.6519153118133545, "learning_rate": 1.995817628818828e-05, "loss": 1.0259, "step": 2427 }, { "epoch": 0.396391984000653, "grad_norm": 3.6144611835479736, "learning_rate": 1.9958117690939555e-05, "loss": 0.889, "step": 2428 }, { "epoch": 0.39655524264315745, "grad_norm": 3.0116312503814697, "learning_rate": 1.9958059052756765e-05, "loss": 0.8281, "step": 2429 }, { "epoch": 0.39671850128566183, "grad_norm": 3.8620150089263916, "learning_rate": 1.995800037364016e-05, "loss": 1.2206, "step": 2430 }, { "epoch": 0.3968817599281662, "grad_norm": 2.8311192989349365, "learning_rate": 1.995794165358997e-05, "loss": 0.7743, "step": 2431 }, { "epoch": 0.3970450185706706, "grad_norm": 3.2261109352111816, "learning_rate": 1.9957882892606445e-05, "loss": 0.9819, "step": 2432 }, { "epoch": 0.397208277213175, "grad_norm": 3.3072187900543213, "learning_rate": 1.9957824090689825e-05, "loss": 1.892, "step": 2433 }, { "epoch": 0.39737153585567936, "grad_norm": 3.392518997192383, "learning_rate": 1.9957765247840348e-05, "loss": 1.0229, "step": 2434 }, { "epoch": 0.39753479449818374, "grad_norm": 3.617563486099243, "learning_rate": 1.9957706364058257e-05, "loss": 0.9095, "step": 2435 }, { "epoch": 0.3976980531406881, "grad_norm": 3.2466013431549072, "learning_rate": 1.9957647439343798e-05, "loss": 1.0426, "step": 2436 }, { "epoch": 0.3978613117831925, "grad_norm": 3.6728367805480957, "learning_rate": 1.995758847369721e-05, "loss": 1.0286, "step": 2437 }, { "epoch": 0.3980245704256969, "grad_norm": 3.1672351360321045, "learning_rate": 1.9957529467118734e-05, "loss": 0.8695, "step": 2438 }, { "epoch": 0.39818782906820127, "grad_norm": 3.643685817718506, "learning_rate": 1.9957470419608614e-05, "loss": 0.9882, "step": 2439 }, { "epoch": 0.3983510877107057, "grad_norm": 3.0469532012939453, "learning_rate": 1.9957411331167097e-05, "loss": 0.8127, "step": 2440 }, { "epoch": 0.3985143463532101, "grad_norm": 3.722029209136963, "learning_rate": 1.9957352201794417e-05, "loss": 0.9785, "step": 2441 }, { "epoch": 0.3986776049957145, "grad_norm": 3.318284273147583, "learning_rate": 1.9957293031490822e-05, "loss": 0.8423, "step": 2442 }, { "epoch": 0.39884086363821886, "grad_norm": 3.731884002685547, "learning_rate": 1.9957233820256555e-05, "loss": 1.0501, "step": 2443 }, { "epoch": 0.39900412228072324, "grad_norm": 3.4297361373901367, "learning_rate": 1.9957174568091863e-05, "loss": 1.0213, "step": 2444 }, { "epoch": 0.3991673809232276, "grad_norm": 3.6209285259246826, "learning_rate": 1.9957115274996985e-05, "loss": 1.0961, "step": 2445 }, { "epoch": 0.399330639565732, "grad_norm": 3.6417863368988037, "learning_rate": 1.9957055940972167e-05, "loss": 1.0617, "step": 2446 }, { "epoch": 0.3994938982082364, "grad_norm": 3.7971136569976807, "learning_rate": 1.995699656601765e-05, "loss": 1.1822, "step": 2447 }, { "epoch": 0.39965715685074077, "grad_norm": 3.670741558074951, "learning_rate": 1.995693715013368e-05, "loss": 1.1289, "step": 2448 }, { "epoch": 0.39982041549324515, "grad_norm": 3.377723455429077, "learning_rate": 1.99568776933205e-05, "loss": 1.1145, "step": 2449 }, { "epoch": 0.39998367413574953, "grad_norm": 3.287956714630127, "learning_rate": 1.9956818195578357e-05, "loss": 1.0174, "step": 2450 }, { "epoch": 0.40014693277825397, "grad_norm": 3.3489010334014893, "learning_rate": 1.9956758656907495e-05, "loss": 1.0501, "step": 2451 }, { "epoch": 0.40031019142075835, "grad_norm": 3.3341002464294434, "learning_rate": 1.9956699077308154e-05, "loss": 1.0186, "step": 2452 }, { "epoch": 0.40047345006326274, "grad_norm": 3.183457374572754, "learning_rate": 1.9956639456780584e-05, "loss": 0.9308, "step": 2453 }, { "epoch": 0.4006367087057671, "grad_norm": 3.5650975704193115, "learning_rate": 1.995657979532503e-05, "loss": 1.1041, "step": 2454 }, { "epoch": 0.4007999673482715, "grad_norm": 3.2792606353759766, "learning_rate": 1.9956520092941736e-05, "loss": 1.1735, "step": 2455 }, { "epoch": 0.4009632259907759, "grad_norm": 3.678386926651001, "learning_rate": 1.9956460349630947e-05, "loss": 1.0545, "step": 2456 }, { "epoch": 0.40112648463328027, "grad_norm": 3.4691972732543945, "learning_rate": 1.9956400565392908e-05, "loss": 0.9888, "step": 2457 }, { "epoch": 0.40128974327578465, "grad_norm": 3.3364062309265137, "learning_rate": 1.9956340740227866e-05, "loss": 0.8714, "step": 2458 }, { "epoch": 0.40145300191828903, "grad_norm": 3.0386106967926025, "learning_rate": 1.995628087413607e-05, "loss": 0.9518, "step": 2459 }, { "epoch": 0.4016162605607934, "grad_norm": 3.3043739795684814, "learning_rate": 1.995622096711776e-05, "loss": 1.1247, "step": 2460 }, { "epoch": 0.4017795192032978, "grad_norm": 3.4985218048095703, "learning_rate": 1.9956161019173184e-05, "loss": 0.7978, "step": 2461 }, { "epoch": 0.40194277784580223, "grad_norm": 3.610609769821167, "learning_rate": 1.995610103030259e-05, "loss": 1.0144, "step": 2462 }, { "epoch": 0.4021060364883066, "grad_norm": 3.2593016624450684, "learning_rate": 1.9956041000506226e-05, "loss": 0.9474, "step": 2463 }, { "epoch": 0.402269295130811, "grad_norm": 3.5773611068725586, "learning_rate": 1.9955980929784334e-05, "loss": 1.049, "step": 2464 }, { "epoch": 0.4024325537733154, "grad_norm": 3.2632651329040527, "learning_rate": 1.9955920818137168e-05, "loss": 0.9799, "step": 2465 }, { "epoch": 0.40259581241581976, "grad_norm": 3.510472059249878, "learning_rate": 1.9955860665564965e-05, "loss": 1.0895, "step": 2466 }, { "epoch": 0.40275907105832415, "grad_norm": 3.3960084915161133, "learning_rate": 1.995580047206798e-05, "loss": 1.0541, "step": 2467 }, { "epoch": 0.40292232970082853, "grad_norm": 3.8397486209869385, "learning_rate": 1.9955740237646463e-05, "loss": 1.168, "step": 2468 }, { "epoch": 0.4030855883433329, "grad_norm": 3.2346601486206055, "learning_rate": 1.9955679962300652e-05, "loss": 0.8423, "step": 2469 }, { "epoch": 0.4032488469858373, "grad_norm": 3.374086380004883, "learning_rate": 1.99556196460308e-05, "loss": 1.0064, "step": 2470 }, { "epoch": 0.4034121056283417, "grad_norm": 3.5348665714263916, "learning_rate": 1.9955559288837158e-05, "loss": 1.1393, "step": 2471 }, { "epoch": 0.4035753642708461, "grad_norm": 3.250969171524048, "learning_rate": 1.995549889071997e-05, "loss": 1.0571, "step": 2472 }, { "epoch": 0.4037386229133505, "grad_norm": 4.723082065582275, "learning_rate": 1.995543845167948e-05, "loss": 0.9258, "step": 2473 }, { "epoch": 0.4039018815558549, "grad_norm": 3.165196418762207, "learning_rate": 1.995537797171595e-05, "loss": 0.9807, "step": 2474 }, { "epoch": 0.40406514019835926, "grad_norm": 3.4268438816070557, "learning_rate": 1.9955317450829615e-05, "loss": 1.0245, "step": 2475 }, { "epoch": 0.40422839884086365, "grad_norm": 3.1063520908355713, "learning_rate": 1.995525688902073e-05, "loss": 0.8145, "step": 2476 }, { "epoch": 0.404391657483368, "grad_norm": 4.090058326721191, "learning_rate": 1.9955196286289547e-05, "loss": 1.2089, "step": 2477 }, { "epoch": 0.4045549161258724, "grad_norm": 3.255247116088867, "learning_rate": 1.9955135642636305e-05, "loss": 0.9381, "step": 2478 }, { "epoch": 0.4047181747683768, "grad_norm": 3.479597330093384, "learning_rate": 1.9955074958061264e-05, "loss": 1.0993, "step": 2479 }, { "epoch": 0.4048814334108812, "grad_norm": 3.0442354679107666, "learning_rate": 1.9955014232564666e-05, "loss": 0.8559, "step": 2480 }, { "epoch": 0.40504469205338556, "grad_norm": 3.6304776668548584, "learning_rate": 1.9954953466146764e-05, "loss": 0.9871, "step": 2481 }, { "epoch": 0.40520795069588994, "grad_norm": 3.2157955169677734, "learning_rate": 1.995489265880781e-05, "loss": 0.8667, "step": 2482 }, { "epoch": 0.4053712093383944, "grad_norm": 3.2141401767730713, "learning_rate": 1.9954831810548042e-05, "loss": 1.1052, "step": 2483 }, { "epoch": 0.40553446798089876, "grad_norm": 3.1314923763275146, "learning_rate": 1.9954770921367727e-05, "loss": 0.8676, "step": 2484 }, { "epoch": 0.40569772662340314, "grad_norm": 2.9948737621307373, "learning_rate": 1.995470999126711e-05, "loss": 0.7463, "step": 2485 }, { "epoch": 0.4058609852659075, "grad_norm": 3.62831711769104, "learning_rate": 1.9954649020246433e-05, "loss": 0.9014, "step": 2486 }, { "epoch": 0.4060242439084119, "grad_norm": 3.1869006156921387, "learning_rate": 1.9954588008305955e-05, "loss": 0.9784, "step": 2487 }, { "epoch": 0.4061875025509163, "grad_norm": 3.299121141433716, "learning_rate": 1.9954526955445925e-05, "loss": 0.8144, "step": 2488 }, { "epoch": 0.4063507611934207, "grad_norm": 3.523123264312744, "learning_rate": 1.995446586166659e-05, "loss": 0.9064, "step": 2489 }, { "epoch": 0.40651401983592506, "grad_norm": 3.3743062019348145, "learning_rate": 1.995440472696821e-05, "loss": 0.9027, "step": 2490 }, { "epoch": 0.40667727847842944, "grad_norm": 3.9454457759857178, "learning_rate": 1.995434355135103e-05, "loss": 1.0377, "step": 2491 }, { "epoch": 0.4068405371209338, "grad_norm": 3.623850107192993, "learning_rate": 1.99542823348153e-05, "loss": 0.89, "step": 2492 }, { "epoch": 0.4070037957634382, "grad_norm": 4.636023044586182, "learning_rate": 1.9954221077361274e-05, "loss": 1.0095, "step": 2493 }, { "epoch": 0.40716705440594264, "grad_norm": 3.2474255561828613, "learning_rate": 1.9954159778989206e-05, "loss": 0.8746, "step": 2494 }, { "epoch": 0.407330313048447, "grad_norm": 3.604949951171875, "learning_rate": 1.9954098439699347e-05, "loss": 0.7874, "step": 2495 }, { "epoch": 0.4074935716909514, "grad_norm": 3.7801873683929443, "learning_rate": 1.9954037059491942e-05, "loss": 1.0682, "step": 2496 }, { "epoch": 0.4076568303334558, "grad_norm": 3.379528284072876, "learning_rate": 1.9953975638367252e-05, "loss": 1.0243, "step": 2497 }, { "epoch": 0.40782008897596017, "grad_norm": 3.333399534225464, "learning_rate": 1.9953914176325528e-05, "loss": 1.0475, "step": 2498 }, { "epoch": 0.40798334761846455, "grad_norm": 2.9483389854431152, "learning_rate": 1.995385267336702e-05, "loss": 0.7766, "step": 2499 }, { "epoch": 0.40814660626096894, "grad_norm": 3.001926898956299, "learning_rate": 1.9953791129491985e-05, "loss": 0.9431, "step": 2500 }, { "epoch": 0.4083098649034733, "grad_norm": 3.54738187789917, "learning_rate": 1.995372954470067e-05, "loss": 1.0345, "step": 2501 }, { "epoch": 0.4084731235459777, "grad_norm": 3.3633840084075928, "learning_rate": 1.995366791899333e-05, "loss": 0.8681, "step": 2502 }, { "epoch": 0.4086363821884821, "grad_norm": 3.1581263542175293, "learning_rate": 1.9953606252370222e-05, "loss": 0.8533, "step": 2503 }, { "epoch": 0.40879964083098647, "grad_norm": 3.177333116531372, "learning_rate": 1.99535445448316e-05, "loss": 0.8925, "step": 2504 }, { "epoch": 0.4089628994734909, "grad_norm": 3.0167455673217773, "learning_rate": 1.9953482796377713e-05, "loss": 0.9259, "step": 2505 }, { "epoch": 0.4091261581159953, "grad_norm": 3.2888870239257812, "learning_rate": 1.9953421007008813e-05, "loss": 1.0137, "step": 2506 }, { "epoch": 0.40928941675849967, "grad_norm": 3.1054000854492188, "learning_rate": 1.995335917672516e-05, "loss": 0.8292, "step": 2507 }, { "epoch": 0.40945267540100405, "grad_norm": 3.311276435852051, "learning_rate": 1.9953297305527006e-05, "loss": 1.0195, "step": 2508 }, { "epoch": 0.40961593404350843, "grad_norm": 3.266268253326416, "learning_rate": 1.9953235393414606e-05, "loss": 0.8647, "step": 2509 }, { "epoch": 0.4097791926860128, "grad_norm": 3.554257392883301, "learning_rate": 1.995317344038821e-05, "loss": 0.915, "step": 2510 }, { "epoch": 0.4099424513285172, "grad_norm": 3.267979145050049, "learning_rate": 1.995311144644808e-05, "loss": 0.8892, "step": 2511 }, { "epoch": 0.4101057099710216, "grad_norm": 3.701167106628418, "learning_rate": 1.9953049411594466e-05, "loss": 0.9649, "step": 2512 }, { "epoch": 0.41026896861352596, "grad_norm": 3.583627462387085, "learning_rate": 1.9952987335827623e-05, "loss": 0.9414, "step": 2513 }, { "epoch": 0.41043222725603035, "grad_norm": 3.272991418838501, "learning_rate": 1.9952925219147808e-05, "loss": 0.8762, "step": 2514 }, { "epoch": 0.41059548589853473, "grad_norm": 3.3052818775177, "learning_rate": 1.9952863061555276e-05, "loss": 0.9905, "step": 2515 }, { "epoch": 0.41075874454103917, "grad_norm": 3.64099383354187, "learning_rate": 1.9952800863050282e-05, "loss": 1.227, "step": 2516 }, { "epoch": 0.41092200318354355, "grad_norm": 3.155247688293457, "learning_rate": 1.995273862363308e-05, "loss": 0.9151, "step": 2517 }, { "epoch": 0.41108526182604793, "grad_norm": 3.6718313694000244, "learning_rate": 1.9952676343303928e-05, "loss": 1.0412, "step": 2518 }, { "epoch": 0.4112485204685523, "grad_norm": 3.5096309185028076, "learning_rate": 1.9952614022063085e-05, "loss": 1.0157, "step": 2519 }, { "epoch": 0.4114117791110567, "grad_norm": 3.333513021469116, "learning_rate": 1.99525516599108e-05, "loss": 0.951, "step": 2520 }, { "epoch": 0.4115750377535611, "grad_norm": 3.5536117553710938, "learning_rate": 1.9952489256847334e-05, "loss": 1.0071, "step": 2521 }, { "epoch": 0.41173829639606546, "grad_norm": 3.094721794128418, "learning_rate": 1.9952426812872945e-05, "loss": 0.9351, "step": 2522 }, { "epoch": 0.41190155503856984, "grad_norm": 3.4037246704101562, "learning_rate": 1.9952364327987888e-05, "loss": 0.8858, "step": 2523 }, { "epoch": 0.4120648136810742, "grad_norm": 3.160794496536255, "learning_rate": 1.9952301802192414e-05, "loss": 0.9477, "step": 2524 }, { "epoch": 0.4122280723235786, "grad_norm": 3.6211347579956055, "learning_rate": 1.995223923548679e-05, "loss": 0.9858, "step": 2525 }, { "epoch": 0.412391330966083, "grad_norm": 3.2226784229278564, "learning_rate": 1.9952176627871266e-05, "loss": 0.8552, "step": 2526 }, { "epoch": 0.41255458960858743, "grad_norm": 3.701413869857788, "learning_rate": 1.99521139793461e-05, "loss": 1.1265, "step": 2527 }, { "epoch": 0.4127178482510918, "grad_norm": 2.956573724746704, "learning_rate": 1.9952051289911555e-05, "loss": 0.9859, "step": 2528 }, { "epoch": 0.4128811068935962, "grad_norm": 3.773426055908203, "learning_rate": 1.9951988559567887e-05, "loss": 1.247, "step": 2529 }, { "epoch": 0.4130443655361006, "grad_norm": 3.612049102783203, "learning_rate": 1.9951925788315346e-05, "loss": 1.0167, "step": 2530 }, { "epoch": 0.41320762417860496, "grad_norm": 3.4776148796081543, "learning_rate": 1.9951862976154196e-05, "loss": 1.0938, "step": 2531 }, { "epoch": 0.41337088282110934, "grad_norm": 3.250998020172119, "learning_rate": 1.99518001230847e-05, "loss": 0.9125, "step": 2532 }, { "epoch": 0.4135341414636137, "grad_norm": 3.1724495887756348, "learning_rate": 1.995173722910711e-05, "loss": 1.123, "step": 2533 }, { "epoch": 0.4136974001061181, "grad_norm": 3.6358835697174072, "learning_rate": 1.995167429422168e-05, "loss": 1.0746, "step": 2534 }, { "epoch": 0.4138606587486225, "grad_norm": 2.8977231979370117, "learning_rate": 1.995161131842868e-05, "loss": 0.8416, "step": 2535 }, { "epoch": 0.41402391739112687, "grad_norm": 3.3083672523498535, "learning_rate": 1.9951548301728364e-05, "loss": 1.2005, "step": 2536 }, { "epoch": 0.41418717603363125, "grad_norm": 3.4021239280700684, "learning_rate": 1.995148524412099e-05, "loss": 1.149, "step": 2537 }, { "epoch": 0.4143504346761357, "grad_norm": 3.001132011413574, "learning_rate": 1.9951422145606817e-05, "loss": 0.9386, "step": 2538 }, { "epoch": 0.4145136933186401, "grad_norm": 2.9425156116485596, "learning_rate": 1.9951359006186103e-05, "loss": 0.8928, "step": 2539 }, { "epoch": 0.41467695196114446, "grad_norm": 3.3709452152252197, "learning_rate": 1.9951295825859115e-05, "loss": 0.942, "step": 2540 }, { "epoch": 0.41484021060364884, "grad_norm": 3.264430284500122, "learning_rate": 1.9951232604626105e-05, "loss": 0.9841, "step": 2541 }, { "epoch": 0.4150034692461532, "grad_norm": 3.163358688354492, "learning_rate": 1.995116934248733e-05, "loss": 1.0, "step": 2542 }, { "epoch": 0.4151667278886576, "grad_norm": 3.36675763130188, "learning_rate": 1.995110603944306e-05, "loss": 0.9676, "step": 2543 }, { "epoch": 0.415329986531162, "grad_norm": 3.4988105297088623, "learning_rate": 1.9951042695493553e-05, "loss": 1.1561, "step": 2544 }, { "epoch": 0.41549324517366637, "grad_norm": 3.256423234939575, "learning_rate": 1.9950979310639066e-05, "loss": 1.0825, "step": 2545 }, { "epoch": 0.41565650381617075, "grad_norm": 2.9081387519836426, "learning_rate": 1.995091588487986e-05, "loss": 0.9346, "step": 2546 }, { "epoch": 0.41581976245867514, "grad_norm": 2.9942052364349365, "learning_rate": 1.9950852418216193e-05, "loss": 0.7242, "step": 2547 }, { "epoch": 0.4159830211011795, "grad_norm": 3.4852356910705566, "learning_rate": 1.995078891064833e-05, "loss": 1.0282, "step": 2548 }, { "epoch": 0.41614627974368396, "grad_norm": 3.382343292236328, "learning_rate": 1.9950725362176537e-05, "loss": 0.9716, "step": 2549 }, { "epoch": 0.41630953838618834, "grad_norm": 3.5181028842926025, "learning_rate": 1.9950661772801062e-05, "loss": 1.0674, "step": 2550 }, { "epoch": 0.4164727970286927, "grad_norm": 3.287513256072998, "learning_rate": 1.9950598142522177e-05, "loss": 0.933, "step": 2551 }, { "epoch": 0.4166360556711971, "grad_norm": 3.1272900104522705, "learning_rate": 1.9950534471340138e-05, "loss": 0.8432, "step": 2552 }, { "epoch": 0.4167993143137015, "grad_norm": 3.304880380630493, "learning_rate": 1.9950470759255214e-05, "loss": 0.8335, "step": 2553 }, { "epoch": 0.41696257295620587, "grad_norm": 3.5880465507507324, "learning_rate": 1.9950407006267658e-05, "loss": 0.8821, "step": 2554 }, { "epoch": 0.41712583159871025, "grad_norm": 3.2319700717926025, "learning_rate": 1.995034321237774e-05, "loss": 0.819, "step": 2555 }, { "epoch": 0.41728909024121463, "grad_norm": 3.083883762359619, "learning_rate": 1.9950279377585713e-05, "loss": 0.9317, "step": 2556 }, { "epoch": 0.417452348883719, "grad_norm": 3.5857560634613037, "learning_rate": 1.9950215501891845e-05, "loss": 0.9635, "step": 2557 }, { "epoch": 0.4176156075262234, "grad_norm": 3.4390153884887695, "learning_rate": 1.9950151585296402e-05, "loss": 1.0111, "step": 2558 }, { "epoch": 0.4177788661687278, "grad_norm": 3.5405895709991455, "learning_rate": 1.9950087627799643e-05, "loss": 1.0354, "step": 2559 }, { "epoch": 0.4179421248112322, "grad_norm": 3.250699281692505, "learning_rate": 1.9950023629401824e-05, "loss": 0.8901, "step": 2560 }, { "epoch": 0.4181053834537366, "grad_norm": 3.844106912612915, "learning_rate": 1.9949959590103224e-05, "loss": 1.1413, "step": 2561 }, { "epoch": 0.418268642096241, "grad_norm": 4.523584365844727, "learning_rate": 1.994989550990409e-05, "loss": 0.944, "step": 2562 }, { "epoch": 0.41843190073874537, "grad_norm": 3.132131814956665, "learning_rate": 1.9949831388804696e-05, "loss": 0.8241, "step": 2563 }, { "epoch": 0.41859515938124975, "grad_norm": 3.7325618267059326, "learning_rate": 1.9949767226805302e-05, "loss": 0.9949, "step": 2564 }, { "epoch": 0.41875841802375413, "grad_norm": 3.747390031814575, "learning_rate": 1.994970302390617e-05, "loss": 1.1926, "step": 2565 }, { "epoch": 0.4189216766662585, "grad_norm": 3.4492347240448, "learning_rate": 1.994963878010757e-05, "loss": 1.057, "step": 2566 }, { "epoch": 0.4190849353087629, "grad_norm": 3.6004626750946045, "learning_rate": 1.994957449540976e-05, "loss": 1.1981, "step": 2567 }, { "epoch": 0.4192481939512673, "grad_norm": 3.3958075046539307, "learning_rate": 1.9949510169813006e-05, "loss": 0.9232, "step": 2568 }, { "epoch": 0.41941145259377166, "grad_norm": 3.4921774864196777, "learning_rate": 1.994944580331757e-05, "loss": 1.06, "step": 2569 }, { "epoch": 0.41957471123627604, "grad_norm": 3.2704005241394043, "learning_rate": 1.994938139592372e-05, "loss": 1.0373, "step": 2570 }, { "epoch": 0.4197379698787805, "grad_norm": 2.8735463619232178, "learning_rate": 1.9949316947631723e-05, "loss": 0.8421, "step": 2571 }, { "epoch": 0.41990122852128486, "grad_norm": 3.342190742492676, "learning_rate": 1.9949252458441835e-05, "loss": 1.0546, "step": 2572 }, { "epoch": 0.42006448716378925, "grad_norm": 3.132390022277832, "learning_rate": 1.994918792835433e-05, "loss": 1.0895, "step": 2573 }, { "epoch": 0.42022774580629363, "grad_norm": 3.449021100997925, "learning_rate": 1.9949123357369477e-05, "loss": 0.9804, "step": 2574 }, { "epoch": 0.420391004448798, "grad_norm": 2.8843061923980713, "learning_rate": 1.9949058745487524e-05, "loss": 0.9467, "step": 2575 }, { "epoch": 0.4205542630913024, "grad_norm": 3.3182458877563477, "learning_rate": 1.9948994092708754e-05, "loss": 1.0202, "step": 2576 }, { "epoch": 0.4207175217338068, "grad_norm": 3.1019763946533203, "learning_rate": 1.9948929399033424e-05, "loss": 1.0458, "step": 2577 }, { "epoch": 0.42088078037631116, "grad_norm": 3.329739809036255, "learning_rate": 1.99488646644618e-05, "loss": 0.968, "step": 2578 }, { "epoch": 0.42104403901881554, "grad_norm": 3.3674604892730713, "learning_rate": 1.994879988899415e-05, "loss": 1.0242, "step": 2579 }, { "epoch": 0.4212072976613199, "grad_norm": 2.87353777885437, "learning_rate": 1.9948735072630744e-05, "loss": 0.8784, "step": 2580 }, { "epoch": 0.4213705563038243, "grad_norm": 3.186774969100952, "learning_rate": 1.9948670215371844e-05, "loss": 0.8672, "step": 2581 }, { "epoch": 0.42153381494632874, "grad_norm": 3.3528993129730225, "learning_rate": 1.9948605317217712e-05, "loss": 0.866, "step": 2582 }, { "epoch": 0.4216970735888331, "grad_norm": 3.8203303813934326, "learning_rate": 1.9948540378168627e-05, "loss": 1.0427, "step": 2583 }, { "epoch": 0.4218603322313375, "grad_norm": 3.031071662902832, "learning_rate": 1.9948475398224844e-05, "loss": 0.8597, "step": 2584 }, { "epoch": 0.4220235908738419, "grad_norm": 3.652320146560669, "learning_rate": 1.9948410377386637e-05, "loss": 1.1251, "step": 2585 }, { "epoch": 0.4221868495163463, "grad_norm": 3.9411239624023438, "learning_rate": 1.9948345315654275e-05, "loss": 0.8292, "step": 2586 }, { "epoch": 0.42235010815885066, "grad_norm": 3.3498566150665283, "learning_rate": 1.9948280213028016e-05, "loss": 0.804, "step": 2587 }, { "epoch": 0.42251336680135504, "grad_norm": 3.249889612197876, "learning_rate": 1.9948215069508134e-05, "loss": 0.9436, "step": 2588 }, { "epoch": 0.4226766254438594, "grad_norm": 3.61334228515625, "learning_rate": 1.9948149885094898e-05, "loss": 1.0376, "step": 2589 }, { "epoch": 0.4228398840863638, "grad_norm": 3.3739945888519287, "learning_rate": 1.9948084659788575e-05, "loss": 0.9498, "step": 2590 }, { "epoch": 0.4230031427288682, "grad_norm": 3.2350313663482666, "learning_rate": 1.9948019393589432e-05, "loss": 0.9906, "step": 2591 }, { "epoch": 0.4231664013713726, "grad_norm": 3.0936436653137207, "learning_rate": 1.9947954086497738e-05, "loss": 0.9037, "step": 2592 }, { "epoch": 0.423329660013877, "grad_norm": 2.898189067840576, "learning_rate": 1.994788873851376e-05, "loss": 0.8531, "step": 2593 }, { "epoch": 0.4234929186563814, "grad_norm": 3.348349094390869, "learning_rate": 1.9947823349637767e-05, "loss": 0.9181, "step": 2594 }, { "epoch": 0.42365617729888577, "grad_norm": 3.1195456981658936, "learning_rate": 1.994775791987003e-05, "loss": 0.7477, "step": 2595 }, { "epoch": 0.42381943594139015, "grad_norm": 3.2876193523406982, "learning_rate": 1.9947692449210815e-05, "loss": 1.1107, "step": 2596 }, { "epoch": 0.42398269458389454, "grad_norm": 3.1489737033843994, "learning_rate": 1.994762693766039e-05, "loss": 1.0513, "step": 2597 }, { "epoch": 0.4241459532263989, "grad_norm": 4.100550651550293, "learning_rate": 1.9947561385219033e-05, "loss": 1.0265, "step": 2598 }, { "epoch": 0.4243092118689033, "grad_norm": 3.237032890319824, "learning_rate": 1.9947495791887004e-05, "loss": 0.898, "step": 2599 }, { "epoch": 0.4244724705114077, "grad_norm": 3.5839555263519287, "learning_rate": 1.9947430157664575e-05, "loss": 1.0989, "step": 2600 }, { "epoch": 0.42463572915391207, "grad_norm": 3.0937516689300537, "learning_rate": 1.994736448255202e-05, "loss": 0.8912, "step": 2601 }, { "epoch": 0.42479898779641645, "grad_norm": 3.424114942550659, "learning_rate": 1.9947298766549604e-05, "loss": 1.2735, "step": 2602 }, { "epoch": 0.4249622464389209, "grad_norm": 3.343552827835083, "learning_rate": 1.9947233009657597e-05, "loss": 0.9793, "step": 2603 }, { "epoch": 0.42512550508142527, "grad_norm": 3.2529337406158447, "learning_rate": 1.994716721187627e-05, "loss": 0.8163, "step": 2604 }, { "epoch": 0.42528876372392965, "grad_norm": 3.0517499446868896, "learning_rate": 1.9947101373205898e-05, "loss": 0.8758, "step": 2605 }, { "epoch": 0.42545202236643404, "grad_norm": 3.297443151473999, "learning_rate": 1.9947035493646746e-05, "loss": 1.1013, "step": 2606 }, { "epoch": 0.4256152810089384, "grad_norm": 3.1145851612091064, "learning_rate": 1.994696957319909e-05, "loss": 0.9185, "step": 2607 }, { "epoch": 0.4257785396514428, "grad_norm": 3.156924247741699, "learning_rate": 1.9946903611863196e-05, "loss": 1.0741, "step": 2608 }, { "epoch": 0.4259417982939472, "grad_norm": 2.956881523132324, "learning_rate": 1.9946837609639334e-05, "loss": 0.9486, "step": 2609 }, { "epoch": 0.42610505693645156, "grad_norm": 3.2637197971343994, "learning_rate": 1.9946771566527786e-05, "loss": 0.9499, "step": 2610 }, { "epoch": 0.42626831557895595, "grad_norm": 3.309677839279175, "learning_rate": 1.9946705482528812e-05, "loss": 1.0502, "step": 2611 }, { "epoch": 0.42643157422146033, "grad_norm": 3.4646527767181396, "learning_rate": 1.9946639357642684e-05, "loss": 0.9121, "step": 2612 }, { "epoch": 0.4265948328639647, "grad_norm": 3.753445625305176, "learning_rate": 1.9946573191869682e-05, "loss": 1.1031, "step": 2613 }, { "epoch": 0.42675809150646915, "grad_norm": 3.181401491165161, "learning_rate": 1.994650698521007e-05, "loss": 1.05, "step": 2614 }, { "epoch": 0.42692135014897353, "grad_norm": 2.7934114933013916, "learning_rate": 1.9946440737664126e-05, "loss": 0.7904, "step": 2615 }, { "epoch": 0.4270846087914779, "grad_norm": 2.90189528465271, "learning_rate": 1.994637444923212e-05, "loss": 0.982, "step": 2616 }, { "epoch": 0.4272478674339823, "grad_norm": 3.492222785949707, "learning_rate": 1.9946308119914323e-05, "loss": 0.9532, "step": 2617 }, { "epoch": 0.4274111260764867, "grad_norm": 3.281399726867676, "learning_rate": 1.9946241749711014e-05, "loss": 0.883, "step": 2618 }, { "epoch": 0.42757438471899106, "grad_norm": 3.192310333251953, "learning_rate": 1.9946175338622455e-05, "loss": 0.9317, "step": 2619 }, { "epoch": 0.42773764336149545, "grad_norm": 3.2939181327819824, "learning_rate": 1.9946108886648926e-05, "loss": 0.781, "step": 2620 }, { "epoch": 0.42790090200399983, "grad_norm": 3.3123667240142822, "learning_rate": 1.99460423937907e-05, "loss": 1.0014, "step": 2621 }, { "epoch": 0.4280641606465042, "grad_norm": 3.6610918045043945, "learning_rate": 1.994597586004805e-05, "loss": 1.016, "step": 2622 }, { "epoch": 0.4282274192890086, "grad_norm": 3.4906809329986572, "learning_rate": 1.9945909285421243e-05, "loss": 1.0333, "step": 2623 }, { "epoch": 0.428390677931513, "grad_norm": 3.783902883529663, "learning_rate": 1.9945842669910565e-05, "loss": 1.0903, "step": 2624 }, { "epoch": 0.4285539365740174, "grad_norm": 3.4912269115448, "learning_rate": 1.994577601351628e-05, "loss": 0.9452, "step": 2625 }, { "epoch": 0.4287171952165218, "grad_norm": 3.232412338256836, "learning_rate": 1.9945709316238666e-05, "loss": 0.9766, "step": 2626 }, { "epoch": 0.4288804538590262, "grad_norm": 3.206058979034424, "learning_rate": 1.9945642578077997e-05, "loss": 1.0036, "step": 2627 }, { "epoch": 0.42904371250153056, "grad_norm": 3.4176225662231445, "learning_rate": 1.9945575799034547e-05, "loss": 0.9087, "step": 2628 }, { "epoch": 0.42920697114403494, "grad_norm": 3.1824331283569336, "learning_rate": 1.9945508979108588e-05, "loss": 0.9528, "step": 2629 }, { "epoch": 0.4293702297865393, "grad_norm": 3.321073055267334, "learning_rate": 1.99454421183004e-05, "loss": 0.9993, "step": 2630 }, { "epoch": 0.4295334884290437, "grad_norm": 3.1975724697113037, "learning_rate": 1.9945375216610254e-05, "loss": 0.8797, "step": 2631 }, { "epoch": 0.4296967470715481, "grad_norm": 3.3392443656921387, "learning_rate": 1.9945308274038427e-05, "loss": 1.0087, "step": 2632 }, { "epoch": 0.4298600057140525, "grad_norm": 2.891153335571289, "learning_rate": 1.994524129058519e-05, "loss": 0.8834, "step": 2633 }, { "epoch": 0.43002326435655686, "grad_norm": 3.0229568481445312, "learning_rate": 1.9945174266250823e-05, "loss": 0.9436, "step": 2634 }, { "epoch": 0.43018652299906124, "grad_norm": 3.019327163696289, "learning_rate": 1.9945107201035598e-05, "loss": 0.9081, "step": 2635 }, { "epoch": 0.4303497816415657, "grad_norm": 2.924574375152588, "learning_rate": 1.9945040094939795e-05, "loss": 0.8533, "step": 2636 }, { "epoch": 0.43051304028407006, "grad_norm": 3.036299228668213, "learning_rate": 1.9944972947963685e-05, "loss": 0.8481, "step": 2637 }, { "epoch": 0.43067629892657444, "grad_norm": 3.158043146133423, "learning_rate": 1.994490576010755e-05, "loss": 0.8961, "step": 2638 }, { "epoch": 0.4308395575690788, "grad_norm": 3.1423208713531494, "learning_rate": 1.994483853137166e-05, "loss": 0.9961, "step": 2639 }, { "epoch": 0.4310028162115832, "grad_norm": 3.0370569229125977, "learning_rate": 1.9944771261756294e-05, "loss": 0.9398, "step": 2640 }, { "epoch": 0.4311660748540876, "grad_norm": 3.064333438873291, "learning_rate": 1.9944703951261728e-05, "loss": 0.904, "step": 2641 }, { "epoch": 0.43132933349659197, "grad_norm": 3.3341987133026123, "learning_rate": 1.994463659988824e-05, "loss": 1.0561, "step": 2642 }, { "epoch": 0.43149259213909635, "grad_norm": 3.6186983585357666, "learning_rate": 1.9944569207636104e-05, "loss": 0.9799, "step": 2643 }, { "epoch": 0.43165585078160074, "grad_norm": 3.6432666778564453, "learning_rate": 1.9944501774505603e-05, "loss": 0.9843, "step": 2644 }, { "epoch": 0.4318191094241051, "grad_norm": 3.5642802715301514, "learning_rate": 1.9944434300497008e-05, "loss": 1.0601, "step": 2645 }, { "epoch": 0.4319823680666095, "grad_norm": 3.5142159461975098, "learning_rate": 1.9944366785610596e-05, "loss": 0.9921, "step": 2646 }, { "epoch": 0.43214562670911394, "grad_norm": 3.1708273887634277, "learning_rate": 1.994429922984665e-05, "loss": 0.8715, "step": 2647 }, { "epoch": 0.4323088853516183, "grad_norm": 3.253525733947754, "learning_rate": 1.9944231633205446e-05, "loss": 0.9971, "step": 2648 }, { "epoch": 0.4324721439941227, "grad_norm": 3.111069679260254, "learning_rate": 1.9944163995687257e-05, "loss": 0.9078, "step": 2649 }, { "epoch": 0.4326354026366271, "grad_norm": 3.802187919616699, "learning_rate": 1.994409631729237e-05, "loss": 0.9589, "step": 2650 }, { "epoch": 0.43279866127913147, "grad_norm": 3.5409927368164062, "learning_rate": 1.9944028598021054e-05, "loss": 1.0459, "step": 2651 }, { "epoch": 0.43296191992163585, "grad_norm": 3.125596046447754, "learning_rate": 1.994396083787359e-05, "loss": 0.8174, "step": 2652 }, { "epoch": 0.43312517856414023, "grad_norm": 3.2678563594818115, "learning_rate": 1.9943893036850258e-05, "loss": 0.8272, "step": 2653 }, { "epoch": 0.4332884372066446, "grad_norm": 3.0140771865844727, "learning_rate": 1.9943825194951337e-05, "loss": 0.8773, "step": 2654 }, { "epoch": 0.433451695849149, "grad_norm": 3.7548835277557373, "learning_rate": 1.994375731217711e-05, "loss": 1.0221, "step": 2655 }, { "epoch": 0.4336149544916534, "grad_norm": 3.4436919689178467, "learning_rate": 1.9943689388527843e-05, "loss": 0.9024, "step": 2656 }, { "epoch": 0.43377821313415776, "grad_norm": 3.275449275970459, "learning_rate": 1.994362142400383e-05, "loss": 0.8641, "step": 2657 }, { "epoch": 0.4339414717766622, "grad_norm": 3.3179075717926025, "learning_rate": 1.994355341860534e-05, "loss": 0.951, "step": 2658 }, { "epoch": 0.4341047304191666, "grad_norm": 3.0002710819244385, "learning_rate": 1.994348537233266e-05, "loss": 0.9759, "step": 2659 }, { "epoch": 0.43426798906167097, "grad_norm": 3.62680721282959, "learning_rate": 1.9943417285186065e-05, "loss": 1.2457, "step": 2660 }, { "epoch": 0.43443124770417535, "grad_norm": 3.270695686340332, "learning_rate": 1.9943349157165835e-05, "loss": 1.0465, "step": 2661 }, { "epoch": 0.43459450634667973, "grad_norm": 3.1437935829162598, "learning_rate": 1.9943280988272254e-05, "loss": 0.9376, "step": 2662 }, { "epoch": 0.4347577649891841, "grad_norm": 2.9551377296447754, "learning_rate": 1.99432127785056e-05, "loss": 0.8654, "step": 2663 }, { "epoch": 0.4349210236316885, "grad_norm": 3.3685057163238525, "learning_rate": 1.9943144527866148e-05, "loss": 0.9004, "step": 2664 }, { "epoch": 0.4350842822741929, "grad_norm": 3.337303638458252, "learning_rate": 1.9943076236354186e-05, "loss": 0.7829, "step": 2665 }, { "epoch": 0.43524754091669726, "grad_norm": 3.614868640899658, "learning_rate": 1.994300790396999e-05, "loss": 1.153, "step": 2666 }, { "epoch": 0.43541079955920164, "grad_norm": 3.4397270679473877, "learning_rate": 1.9942939530713845e-05, "loss": 1.1273, "step": 2667 }, { "epoch": 0.435574058201706, "grad_norm": 2.6888647079467773, "learning_rate": 1.9942871116586027e-05, "loss": 0.7272, "step": 2668 }, { "epoch": 0.43573731684421046, "grad_norm": 2.6902527809143066, "learning_rate": 1.9942802661586827e-05, "loss": 0.7619, "step": 2669 }, { "epoch": 0.43590057548671485, "grad_norm": 3.270603656768799, "learning_rate": 1.9942734165716512e-05, "loss": 0.9941, "step": 2670 }, { "epoch": 0.43606383412921923, "grad_norm": 3.7324187755584717, "learning_rate": 1.9942665628975376e-05, "loss": 1.1605, "step": 2671 }, { "epoch": 0.4362270927717236, "grad_norm": 3.523719549179077, "learning_rate": 1.9942597051363695e-05, "loss": 0.991, "step": 2672 }, { "epoch": 0.436390351414228, "grad_norm": 3.7569849491119385, "learning_rate": 1.9942528432881752e-05, "loss": 1.2412, "step": 2673 }, { "epoch": 0.4365536100567324, "grad_norm": 3.392698049545288, "learning_rate": 1.994245977352983e-05, "loss": 0.9983, "step": 2674 }, { "epoch": 0.43671686869923676, "grad_norm": 3.7499325275421143, "learning_rate": 1.9942391073308204e-05, "loss": 1.0757, "step": 2675 }, { "epoch": 0.43688012734174114, "grad_norm": 3.720615863800049, "learning_rate": 1.9942322332217168e-05, "loss": 0.9779, "step": 2676 }, { "epoch": 0.4370433859842455, "grad_norm": 3.6350772380828857, "learning_rate": 1.9942253550257e-05, "loss": 1.0456, "step": 2677 }, { "epoch": 0.4372066446267499, "grad_norm": 3.4972569942474365, "learning_rate": 1.9942184727427975e-05, "loss": 0.9401, "step": 2678 }, { "epoch": 0.4373699032692543, "grad_norm": 3.10093092918396, "learning_rate": 1.994211586373039e-05, "loss": 0.8365, "step": 2679 }, { "epoch": 0.43753316191175873, "grad_norm": 3.4128897190093994, "learning_rate": 1.9942046959164516e-05, "loss": 0.9917, "step": 2680 }, { "epoch": 0.4376964205542631, "grad_norm": 2.9461238384246826, "learning_rate": 1.9941978013730644e-05, "loss": 0.866, "step": 2681 }, { "epoch": 0.4378596791967675, "grad_norm": 3.4216344356536865, "learning_rate": 1.9941909027429052e-05, "loss": 1.0814, "step": 2682 }, { "epoch": 0.4380229378392719, "grad_norm": 3.268404006958008, "learning_rate": 1.9941840000260028e-05, "loss": 0.9618, "step": 2683 }, { "epoch": 0.43818619648177626, "grad_norm": 2.97288179397583, "learning_rate": 1.9941770932223852e-05, "loss": 0.975, "step": 2684 }, { "epoch": 0.43834945512428064, "grad_norm": 2.9747862815856934, "learning_rate": 1.994170182332081e-05, "loss": 0.8951, "step": 2685 }, { "epoch": 0.438512713766785, "grad_norm": 3.0344133377075195, "learning_rate": 1.994163267355119e-05, "loss": 0.9333, "step": 2686 }, { "epoch": 0.4386759724092894, "grad_norm": 3.0665810108184814, "learning_rate": 1.9941563482915267e-05, "loss": 0.9496, "step": 2687 }, { "epoch": 0.4388392310517938, "grad_norm": 2.7882041931152344, "learning_rate": 1.994149425141333e-05, "loss": 0.8733, "step": 2688 }, { "epoch": 0.43900248969429817, "grad_norm": 4.031488418579102, "learning_rate": 1.9941424979045666e-05, "loss": 0.8893, "step": 2689 }, { "epoch": 0.43916574833680255, "grad_norm": 2.9062612056732178, "learning_rate": 1.9941355665812558e-05, "loss": 0.8653, "step": 2690 }, { "epoch": 0.439329006979307, "grad_norm": 2.7166712284088135, "learning_rate": 1.994128631171429e-05, "loss": 0.8216, "step": 2691 }, { "epoch": 0.4394922656218114, "grad_norm": 3.3742897510528564, "learning_rate": 1.994121691675115e-05, "loss": 0.9546, "step": 2692 }, { "epoch": 0.43965552426431576, "grad_norm": 3.2069203853607178, "learning_rate": 1.9941147480923417e-05, "loss": 1.0209, "step": 2693 }, { "epoch": 0.43981878290682014, "grad_norm": 3.117173671722412, "learning_rate": 1.994107800423138e-05, "loss": 0.9038, "step": 2694 }, { "epoch": 0.4399820415493245, "grad_norm": 3.5461225509643555, "learning_rate": 1.994100848667533e-05, "loss": 0.9023, "step": 2695 }, { "epoch": 0.4401453001918289, "grad_norm": 3.2818422317504883, "learning_rate": 1.9940938928255544e-05, "loss": 0.9627, "step": 2696 }, { "epoch": 0.4403085588343333, "grad_norm": 3.656289577484131, "learning_rate": 1.9940869328972316e-05, "loss": 0.925, "step": 2697 }, { "epoch": 0.44047181747683767, "grad_norm": 2.975473165512085, "learning_rate": 1.9940799688825923e-05, "loss": 0.8574, "step": 2698 }, { "epoch": 0.44063507611934205, "grad_norm": 3.1092302799224854, "learning_rate": 1.9940730007816656e-05, "loss": 0.9519, "step": 2699 }, { "epoch": 0.44079833476184643, "grad_norm": 3.1583125591278076, "learning_rate": 1.9940660285944805e-05, "loss": 0.881, "step": 2700 }, { "epoch": 0.4409615934043508, "grad_norm": 3.208289384841919, "learning_rate": 1.994059052321065e-05, "loss": 1.055, "step": 2701 }, { "epoch": 0.44112485204685525, "grad_norm": 3.2243621349334717, "learning_rate": 1.9940520719614484e-05, "loss": 1.033, "step": 2702 }, { "epoch": 0.44128811068935964, "grad_norm": 3.6829171180725098, "learning_rate": 1.9940450875156586e-05, "loss": 1.0066, "step": 2703 }, { "epoch": 0.441451369331864, "grad_norm": 3.4867780208587646, "learning_rate": 1.9940380989837252e-05, "loss": 1.0252, "step": 2704 }, { "epoch": 0.4416146279743684, "grad_norm": 3.155858039855957, "learning_rate": 1.9940311063656765e-05, "loss": 0.8446, "step": 2705 }, { "epoch": 0.4417778866168728, "grad_norm": 3.0601227283477783, "learning_rate": 1.9940241096615412e-05, "loss": 0.8884, "step": 2706 }, { "epoch": 0.44194114525937717, "grad_norm": 3.151365280151367, "learning_rate": 1.994017108871348e-05, "loss": 0.7819, "step": 2707 }, { "epoch": 0.44210440390188155, "grad_norm": 3.4117183685302734, "learning_rate": 1.994010103995126e-05, "loss": 1.2067, "step": 2708 }, { "epoch": 0.44226766254438593, "grad_norm": 3.0807950496673584, "learning_rate": 1.9940030950329036e-05, "loss": 0.9958, "step": 2709 }, { "epoch": 0.4424309211868903, "grad_norm": 3.1250665187835693, "learning_rate": 1.99399608198471e-05, "loss": 0.8774, "step": 2710 }, { "epoch": 0.4425941798293947, "grad_norm": 3.5419740676879883, "learning_rate": 1.9939890648505736e-05, "loss": 1.0956, "step": 2711 }, { "epoch": 0.4427574384718991, "grad_norm": 2.8161559104919434, "learning_rate": 1.9939820436305234e-05, "loss": 0.8386, "step": 2712 }, { "epoch": 0.4429206971144035, "grad_norm": 3.316861391067505, "learning_rate": 1.9939750183245886e-05, "loss": 0.8265, "step": 2713 }, { "epoch": 0.4430839557569079, "grad_norm": 3.518434762954712, "learning_rate": 1.9939679889327976e-05, "loss": 0.948, "step": 2714 }, { "epoch": 0.4432472143994123, "grad_norm": 3.2261955738067627, "learning_rate": 1.99396095545518e-05, "loss": 0.8609, "step": 2715 }, { "epoch": 0.44341047304191666, "grad_norm": 3.2937612533569336, "learning_rate": 1.993953917891764e-05, "loss": 0.8687, "step": 2716 }, { "epoch": 0.44357373168442105, "grad_norm": 3.3197152614593506, "learning_rate": 1.9939468762425784e-05, "loss": 1.0178, "step": 2717 }, { "epoch": 0.44373699032692543, "grad_norm": 3.2606303691864014, "learning_rate": 1.9939398305076526e-05, "loss": 0.9068, "step": 2718 }, { "epoch": 0.4439002489694298, "grad_norm": 3.375143527984619, "learning_rate": 1.9939327806870156e-05, "loss": 1.1158, "step": 2719 }, { "epoch": 0.4440635076119342, "grad_norm": 3.6633424758911133, "learning_rate": 1.9939257267806966e-05, "loss": 1.08, "step": 2720 }, { "epoch": 0.4442267662544386, "grad_norm": 3.4492390155792236, "learning_rate": 1.9939186687887236e-05, "loss": 0.9984, "step": 2721 }, { "epoch": 0.44439002489694296, "grad_norm": 2.7440028190612793, "learning_rate": 1.9939116067111265e-05, "loss": 0.8808, "step": 2722 }, { "epoch": 0.4445532835394474, "grad_norm": 3.2381715774536133, "learning_rate": 1.9939045405479344e-05, "loss": 0.8816, "step": 2723 }, { "epoch": 0.4447165421819518, "grad_norm": 3.202448844909668, "learning_rate": 1.9938974702991754e-05, "loss": 0.8873, "step": 2724 }, { "epoch": 0.44487980082445616, "grad_norm": 2.9999568462371826, "learning_rate": 1.9938903959648798e-05, "loss": 0.9961, "step": 2725 }, { "epoch": 0.44504305946696054, "grad_norm": 3.4275238513946533, "learning_rate": 1.9938833175450756e-05, "loss": 1.032, "step": 2726 }, { "epoch": 0.4452063181094649, "grad_norm": 3.1486008167266846, "learning_rate": 1.9938762350397926e-05, "loss": 0.9612, "step": 2727 }, { "epoch": 0.4453695767519693, "grad_norm": 3.996633291244507, "learning_rate": 1.9938691484490596e-05, "loss": 0.953, "step": 2728 }, { "epoch": 0.4455328353944737, "grad_norm": 3.5701467990875244, "learning_rate": 1.9938620577729058e-05, "loss": 1.1013, "step": 2729 }, { "epoch": 0.4456960940369781, "grad_norm": 3.810779333114624, "learning_rate": 1.9938549630113603e-05, "loss": 1.164, "step": 2730 }, { "epoch": 0.44585935267948246, "grad_norm": 3.40360951423645, "learning_rate": 1.993847864164453e-05, "loss": 1.1236, "step": 2731 }, { "epoch": 0.44602261132198684, "grad_norm": 3.5403668880462646, "learning_rate": 1.9938407612322114e-05, "loss": 0.9528, "step": 2732 }, { "epoch": 0.4461858699644912, "grad_norm": 4.433794975280762, "learning_rate": 1.993833654214666e-05, "loss": 1.0522, "step": 2733 }, { "epoch": 0.44634912860699566, "grad_norm": 3.1581270694732666, "learning_rate": 1.993826543111846e-05, "loss": 0.8914, "step": 2734 }, { "epoch": 0.44651238724950004, "grad_norm": 3.2799723148345947, "learning_rate": 1.9938194279237803e-05, "loss": 0.8821, "step": 2735 }, { "epoch": 0.4466756458920044, "grad_norm": 3.1522624492645264, "learning_rate": 1.993812308650498e-05, "loss": 0.9208, "step": 2736 }, { "epoch": 0.4468389045345088, "grad_norm": 3.5133237838745117, "learning_rate": 1.9938051852920284e-05, "loss": 1.0679, "step": 2737 }, { "epoch": 0.4470021631770132, "grad_norm": 3.2648050785064697, "learning_rate": 1.9937980578484014e-05, "loss": 1.5653, "step": 2738 }, { "epoch": 0.4471654218195176, "grad_norm": 3.1634750366210938, "learning_rate": 1.9937909263196456e-05, "loss": 0.949, "step": 2739 }, { "epoch": 0.44732868046202195, "grad_norm": 3.009258508682251, "learning_rate": 1.9937837907057905e-05, "loss": 1.0215, "step": 2740 }, { "epoch": 0.44749193910452634, "grad_norm": 3.1667604446411133, "learning_rate": 1.9937766510068656e-05, "loss": 0.9861, "step": 2741 }, { "epoch": 0.4476551977470307, "grad_norm": 2.7177770137786865, "learning_rate": 1.9937695072229003e-05, "loss": 0.8179, "step": 2742 }, { "epoch": 0.4478184563895351, "grad_norm": 3.3556981086730957, "learning_rate": 1.9937623593539235e-05, "loss": 0.9848, "step": 2743 }, { "epoch": 0.4479817150320395, "grad_norm": 2.9075121879577637, "learning_rate": 1.9937552073999647e-05, "loss": 1.0231, "step": 2744 }, { "epoch": 0.4481449736745439, "grad_norm": 3.219527244567871, "learning_rate": 1.993748051361054e-05, "loss": 1.1435, "step": 2745 }, { "epoch": 0.4483082323170483, "grad_norm": 3.4740819931030273, "learning_rate": 1.9937408912372196e-05, "loss": 0.9091, "step": 2746 }, { "epoch": 0.4484714909595527, "grad_norm": 3.052110195159912, "learning_rate": 1.9937337270284925e-05, "loss": 1.0054, "step": 2747 }, { "epoch": 0.44863474960205707, "grad_norm": 3.7208781242370605, "learning_rate": 1.9937265587349008e-05, "loss": 1.1495, "step": 2748 }, { "epoch": 0.44879800824456145, "grad_norm": 3.2846271991729736, "learning_rate": 1.993719386356474e-05, "loss": 1.0438, "step": 2749 }, { "epoch": 0.44896126688706584, "grad_norm": 3.1222891807556152, "learning_rate": 1.9937122098932428e-05, "loss": 0.8001, "step": 2750 }, { "epoch": 0.4491245255295702, "grad_norm": 3.4708940982818604, "learning_rate": 1.9937050293452355e-05, "loss": 0.9908, "step": 2751 }, { "epoch": 0.4492877841720746, "grad_norm": 3.25927734375, "learning_rate": 1.9936978447124822e-05, "loss": 1.0055, "step": 2752 }, { "epoch": 0.449451042814579, "grad_norm": 3.480980396270752, "learning_rate": 1.9936906559950122e-05, "loss": 0.9558, "step": 2753 }, { "epoch": 0.44961430145708337, "grad_norm": 3.605012893676758, "learning_rate": 1.9936834631928552e-05, "loss": 1.0427, "step": 2754 }, { "epoch": 0.44977756009958775, "grad_norm": 3.55690336227417, "learning_rate": 1.9936762663060408e-05, "loss": 1.0427, "step": 2755 }, { "epoch": 0.4499408187420922, "grad_norm": 3.131781578063965, "learning_rate": 1.993669065334598e-05, "loss": 0.8748, "step": 2756 }, { "epoch": 0.45010407738459657, "grad_norm": 3.310664653778076, "learning_rate": 1.9936618602785573e-05, "loss": 0.9908, "step": 2757 }, { "epoch": 0.45026733602710095, "grad_norm": 3.101667642593384, "learning_rate": 1.993654651137948e-05, "loss": 0.7763, "step": 2758 }, { "epoch": 0.45043059466960533, "grad_norm": 3.743237018585205, "learning_rate": 1.9936474379127993e-05, "loss": 1.0613, "step": 2759 }, { "epoch": 0.4505938533121097, "grad_norm": 3.2619640827178955, "learning_rate": 1.9936402206031413e-05, "loss": 0.9691, "step": 2760 }, { "epoch": 0.4507571119546141, "grad_norm": 3.1165246963500977, "learning_rate": 1.9936329992090037e-05, "loss": 0.8372, "step": 2761 }, { "epoch": 0.4509203705971185, "grad_norm": 2.7356889247894287, "learning_rate": 1.9936257737304155e-05, "loss": 0.8913, "step": 2762 }, { "epoch": 0.45108362923962286, "grad_norm": 2.9247305393218994, "learning_rate": 1.9936185441674075e-05, "loss": 0.7621, "step": 2763 }, { "epoch": 0.45124688788212725, "grad_norm": 3.3023698329925537, "learning_rate": 1.9936113105200085e-05, "loss": 0.9835, "step": 2764 }, { "epoch": 0.45141014652463163, "grad_norm": 3.313175678253174, "learning_rate": 1.993604072788249e-05, "loss": 1.0526, "step": 2765 }, { "epoch": 0.451573405167136, "grad_norm": 3.3976783752441406, "learning_rate": 1.9935968309721578e-05, "loss": 1.0455, "step": 2766 }, { "epoch": 0.45173666380964045, "grad_norm": 2.9868404865264893, "learning_rate": 1.9935895850717655e-05, "loss": 0.8543, "step": 2767 }, { "epoch": 0.45189992245214483, "grad_norm": 3.812732696533203, "learning_rate": 1.9935823350871018e-05, "loss": 1.0066, "step": 2768 }, { "epoch": 0.4520631810946492, "grad_norm": 3.586250066757202, "learning_rate": 1.993575081018196e-05, "loss": 0.9435, "step": 2769 }, { "epoch": 0.4522264397371536, "grad_norm": 3.0821292400360107, "learning_rate": 1.993567822865078e-05, "loss": 0.9413, "step": 2770 }, { "epoch": 0.452389698379658, "grad_norm": 3.1435999870300293, "learning_rate": 1.993560560627778e-05, "loss": 0.8441, "step": 2771 }, { "epoch": 0.45255295702216236, "grad_norm": 2.9566187858581543, "learning_rate": 1.993553294306326e-05, "loss": 0.6577, "step": 2772 }, { "epoch": 0.45271621566466674, "grad_norm": 3.204275131225586, "learning_rate": 1.9935460239007513e-05, "loss": 0.9142, "step": 2773 }, { "epoch": 0.4528794743071711, "grad_norm": 3.3038136959075928, "learning_rate": 1.993538749411084e-05, "loss": 0.8792, "step": 2774 }, { "epoch": 0.4530427329496755, "grad_norm": 3.2355291843414307, "learning_rate": 1.9935314708373542e-05, "loss": 0.7904, "step": 2775 }, { "epoch": 0.4532059915921799, "grad_norm": 3.7213962078094482, "learning_rate": 1.993524188179592e-05, "loss": 0.9337, "step": 2776 }, { "epoch": 0.4533692502346843, "grad_norm": 3.8281898498535156, "learning_rate": 1.9935169014378263e-05, "loss": 1.1307, "step": 2777 }, { "epoch": 0.4535325088771887, "grad_norm": 3.3255412578582764, "learning_rate": 1.9935096106120883e-05, "loss": 0.9247, "step": 2778 }, { "epoch": 0.4536957675196931, "grad_norm": 3.356356143951416, "learning_rate": 1.9935023157024073e-05, "loss": 0.8626, "step": 2779 }, { "epoch": 0.4538590261621975, "grad_norm": 3.5742790699005127, "learning_rate": 1.9934950167088133e-05, "loss": 1.0439, "step": 2780 }, { "epoch": 0.45402228480470186, "grad_norm": 3.5716071128845215, "learning_rate": 1.9934877136313363e-05, "loss": 1.0004, "step": 2781 }, { "epoch": 0.45418554344720624, "grad_norm": 3.514798879623413, "learning_rate": 1.9934804064700068e-05, "loss": 1.0371, "step": 2782 }, { "epoch": 0.4543488020897106, "grad_norm": 3.287517547607422, "learning_rate": 1.9934730952248544e-05, "loss": 0.9305, "step": 2783 }, { "epoch": 0.454512060732215, "grad_norm": 3.362757444381714, "learning_rate": 1.993465779895909e-05, "loss": 0.8065, "step": 2784 }, { "epoch": 0.4546753193747194, "grad_norm": 3.7568013668060303, "learning_rate": 1.993458460483201e-05, "loss": 1.1319, "step": 2785 }, { "epoch": 0.45483857801722377, "grad_norm": 3.3947360515594482, "learning_rate": 1.9934511369867606e-05, "loss": 0.8766, "step": 2786 }, { "epoch": 0.45500183665972815, "grad_norm": 3.7532801628112793, "learning_rate": 1.9934438094066174e-05, "loss": 0.953, "step": 2787 }, { "epoch": 0.45516509530223254, "grad_norm": 3.561481475830078, "learning_rate": 1.9934364777428023e-05, "loss": 1.1083, "step": 2788 }, { "epoch": 0.455328353944737, "grad_norm": 3.610806703567505, "learning_rate": 1.9934291419953445e-05, "loss": 1.0156, "step": 2789 }, { "epoch": 0.45549161258724136, "grad_norm": 3.9390039443969727, "learning_rate": 1.9934218021642748e-05, "loss": 1.0988, "step": 2790 }, { "epoch": 0.45565487122974574, "grad_norm": 3.478715658187866, "learning_rate": 1.993414458249623e-05, "loss": 0.9714, "step": 2791 }, { "epoch": 0.4558181298722501, "grad_norm": 2.9433388710021973, "learning_rate": 1.9934071102514195e-05, "loss": 0.8255, "step": 2792 }, { "epoch": 0.4559813885147545, "grad_norm": 3.358706474304199, "learning_rate": 1.993399758169695e-05, "loss": 0.966, "step": 2793 }, { "epoch": 0.4561446471572589, "grad_norm": 3.372039318084717, "learning_rate": 1.9933924020044785e-05, "loss": 1.0808, "step": 2794 }, { "epoch": 0.45630790579976327, "grad_norm": 3.84380841255188, "learning_rate": 1.9933850417558012e-05, "loss": 1.0925, "step": 2795 }, { "epoch": 0.45647116444226765, "grad_norm": 3.0562117099761963, "learning_rate": 1.993377677423693e-05, "loss": 0.9302, "step": 2796 }, { "epoch": 0.45663442308477203, "grad_norm": 3.1517748832702637, "learning_rate": 1.9933703090081846e-05, "loss": 1.0062, "step": 2797 }, { "epoch": 0.4567976817272764, "grad_norm": 3.2231409549713135, "learning_rate": 1.9933629365093055e-05, "loss": 1.0272, "step": 2798 }, { "epoch": 0.4569609403697808, "grad_norm": 3.0025389194488525, "learning_rate": 1.9933555599270865e-05, "loss": 0.6946, "step": 2799 }, { "epoch": 0.45712419901228524, "grad_norm": 3.1601548194885254, "learning_rate": 1.9933481792615583e-05, "loss": 1.05, "step": 2800 }, { "epoch": 0.4572874576547896, "grad_norm": 3.2363102436065674, "learning_rate": 1.9933407945127505e-05, "loss": 1.0499, "step": 2801 }, { "epoch": 0.457450716297294, "grad_norm": 3.085251569747925, "learning_rate": 1.9933334056806936e-05, "loss": 0.8496, "step": 2802 }, { "epoch": 0.4576139749397984, "grad_norm": 2.744699716567993, "learning_rate": 1.9933260127654182e-05, "loss": 0.8768, "step": 2803 }, { "epoch": 0.45777723358230277, "grad_norm": 3.591695547103882, "learning_rate": 1.993318615766955e-05, "loss": 1.0288, "step": 2804 }, { "epoch": 0.45794049222480715, "grad_norm": 3.3595056533813477, "learning_rate": 1.9933112146853338e-05, "loss": 0.8677, "step": 2805 }, { "epoch": 0.45810375086731153, "grad_norm": 3.5761048793792725, "learning_rate": 1.9933038095205854e-05, "loss": 0.9402, "step": 2806 }, { "epoch": 0.4582670095098159, "grad_norm": 2.9414470195770264, "learning_rate": 1.99329640027274e-05, "loss": 0.8341, "step": 2807 }, { "epoch": 0.4584302681523203, "grad_norm": 3.1061596870422363, "learning_rate": 1.993288986941828e-05, "loss": 0.879, "step": 2808 }, { "epoch": 0.4585935267948247, "grad_norm": 3.7799408435821533, "learning_rate": 1.9932815695278803e-05, "loss": 1.1245, "step": 2809 }, { "epoch": 0.45875678543732906, "grad_norm": 3.6622672080993652, "learning_rate": 1.9932741480309272e-05, "loss": 0.9794, "step": 2810 }, { "epoch": 0.4589200440798335, "grad_norm": 3.3798439502716064, "learning_rate": 1.993266722450999e-05, "loss": 1.0101, "step": 2811 }, { "epoch": 0.4590833027223379, "grad_norm": 3.3881723880767822, "learning_rate": 1.993259292788126e-05, "loss": 0.8987, "step": 2812 }, { "epoch": 0.45924656136484227, "grad_norm": 3.028451681137085, "learning_rate": 1.9932518590423396e-05, "loss": 0.7683, "step": 2813 }, { "epoch": 0.45940982000734665, "grad_norm": 3.7126786708831787, "learning_rate": 1.9932444212136697e-05, "loss": 0.9124, "step": 2814 }, { "epoch": 0.45957307864985103, "grad_norm": 3.4201278686523438, "learning_rate": 1.993236979302147e-05, "loss": 1.044, "step": 2815 }, { "epoch": 0.4597363372923554, "grad_norm": 2.9784388542175293, "learning_rate": 1.993229533307802e-05, "loss": 0.8099, "step": 2816 }, { "epoch": 0.4598995959348598, "grad_norm": 3.902888059616089, "learning_rate": 1.9932220832306656e-05, "loss": 1.1069, "step": 2817 }, { "epoch": 0.4600628545773642, "grad_norm": 3.5163559913635254, "learning_rate": 1.9932146290707682e-05, "loss": 1.0726, "step": 2818 }, { "epoch": 0.46022611321986856, "grad_norm": 3.8695459365844727, "learning_rate": 1.9932071708281404e-05, "loss": 1.2064, "step": 2819 }, { "epoch": 0.46038937186237294, "grad_norm": 3.387688636779785, "learning_rate": 1.9931997085028132e-05, "loss": 0.9263, "step": 2820 }, { "epoch": 0.4605526305048773, "grad_norm": 3.6349449157714844, "learning_rate": 1.9931922420948166e-05, "loss": 1.1046, "step": 2821 }, { "epoch": 0.46071588914738176, "grad_norm": 3.482262134552002, "learning_rate": 1.993184771604182e-05, "loss": 0.958, "step": 2822 }, { "epoch": 0.46087914778988615, "grad_norm": 2.8718552589416504, "learning_rate": 1.99317729703094e-05, "loss": 0.8484, "step": 2823 }, { "epoch": 0.46104240643239053, "grad_norm": 3.1095130443573, "learning_rate": 1.9931698183751206e-05, "loss": 1.0819, "step": 2824 }, { "epoch": 0.4612056650748949, "grad_norm": 3.3654253482818604, "learning_rate": 1.9931623356367554e-05, "loss": 0.909, "step": 2825 }, { "epoch": 0.4613689237173993, "grad_norm": 3.248572587966919, "learning_rate": 1.993154848815875e-05, "loss": 0.947, "step": 2826 }, { "epoch": 0.4615321823599037, "grad_norm": 2.920163631439209, "learning_rate": 1.99314735791251e-05, "loss": 0.9374, "step": 2827 }, { "epoch": 0.46169544100240806, "grad_norm": 2.9322147369384766, "learning_rate": 1.993139862926691e-05, "loss": 0.8062, "step": 2828 }, { "epoch": 0.46185869964491244, "grad_norm": 3.257927417755127, "learning_rate": 1.993132363858449e-05, "loss": 0.9733, "step": 2829 }, { "epoch": 0.4620219582874168, "grad_norm": 3.640886068344116, "learning_rate": 1.9931248607078154e-05, "loss": 1.1183, "step": 2830 }, { "epoch": 0.4621852169299212, "grad_norm": 3.115746259689331, "learning_rate": 1.99311735347482e-05, "loss": 0.8452, "step": 2831 }, { "epoch": 0.4623484755724256, "grad_norm": 3.170289993286133, "learning_rate": 1.9931098421594943e-05, "loss": 0.9048, "step": 2832 }, { "epoch": 0.46251173421493, "grad_norm": 3.122187376022339, "learning_rate": 1.9931023267618686e-05, "loss": 0.8698, "step": 2833 }, { "epoch": 0.4626749928574344, "grad_norm": 3.293821334838867, "learning_rate": 1.9930948072819748e-05, "loss": 0.8851, "step": 2834 }, { "epoch": 0.4628382514999388, "grad_norm": 3.4145140647888184, "learning_rate": 1.993087283719843e-05, "loss": 0.8804, "step": 2835 }, { "epoch": 0.4630015101424432, "grad_norm": 3.4223074913024902, "learning_rate": 1.9930797560755043e-05, "loss": 0.9502, "step": 2836 }, { "epoch": 0.46316476878494756, "grad_norm": 3.7695555686950684, "learning_rate": 1.99307222434899e-05, "loss": 0.967, "step": 2837 }, { "epoch": 0.46332802742745194, "grad_norm": 3.313762903213501, "learning_rate": 1.9930646885403307e-05, "loss": 0.8425, "step": 2838 }, { "epoch": 0.4634912860699563, "grad_norm": 3.076345443725586, "learning_rate": 1.993057148649557e-05, "loss": 0.9981, "step": 2839 }, { "epoch": 0.4636545447124607, "grad_norm": 2.8448703289031982, "learning_rate": 1.993049604676701e-05, "loss": 0.7266, "step": 2840 }, { "epoch": 0.4638178033549651, "grad_norm": 3.4815306663513184, "learning_rate": 1.9930420566217926e-05, "loss": 0.9443, "step": 2841 }, { "epoch": 0.46398106199746947, "grad_norm": 3.1903281211853027, "learning_rate": 1.993034504484863e-05, "loss": 0.8149, "step": 2842 }, { "epoch": 0.4641443206399739, "grad_norm": 3.0734190940856934, "learning_rate": 1.993026948265944e-05, "loss": 0.8383, "step": 2843 }, { "epoch": 0.4643075792824783, "grad_norm": 3.907954692840576, "learning_rate": 1.993019387965066e-05, "loss": 0.8993, "step": 2844 }, { "epoch": 0.46447083792498267, "grad_norm": 3.3049821853637695, "learning_rate": 1.9930118235822606e-05, "loss": 0.8929, "step": 2845 }, { "epoch": 0.46463409656748705, "grad_norm": 2.9342782497406006, "learning_rate": 1.9930042551175583e-05, "loss": 0.7909, "step": 2846 }, { "epoch": 0.46479735520999144, "grad_norm": 3.6567375659942627, "learning_rate": 1.9929966825709903e-05, "loss": 0.9229, "step": 2847 }, { "epoch": 0.4649606138524958, "grad_norm": 3.4881887435913086, "learning_rate": 1.992989105942588e-05, "loss": 0.8686, "step": 2848 }, { "epoch": 0.4651238724950002, "grad_norm": 2.8876218795776367, "learning_rate": 1.992981525232382e-05, "loss": 0.7555, "step": 2849 }, { "epoch": 0.4652871311375046, "grad_norm": 3.4120960235595703, "learning_rate": 1.9929739404404046e-05, "loss": 1.0729, "step": 2850 }, { "epoch": 0.46545038978000897, "grad_norm": 3.31915283203125, "learning_rate": 1.992966351566686e-05, "loss": 0.9804, "step": 2851 }, { "epoch": 0.46561364842251335, "grad_norm": 3.5315418243408203, "learning_rate": 1.9929587586112577e-05, "loss": 0.8349, "step": 2852 }, { "epoch": 0.46577690706501773, "grad_norm": 3.422276496887207, "learning_rate": 1.9929511615741507e-05, "loss": 0.9228, "step": 2853 }, { "epoch": 0.46594016570752217, "grad_norm": 3.556076765060425, "learning_rate": 1.992943560455397e-05, "loss": 0.8407, "step": 2854 }, { "epoch": 0.46610342435002655, "grad_norm": 3.5422825813293457, "learning_rate": 1.9929359552550265e-05, "loss": 1.0293, "step": 2855 }, { "epoch": 0.46626668299253093, "grad_norm": 3.7962915897369385, "learning_rate": 1.9929283459730716e-05, "loss": 0.9308, "step": 2856 }, { "epoch": 0.4664299416350353, "grad_norm": 3.2818362712860107, "learning_rate": 1.992920732609563e-05, "loss": 0.8797, "step": 2857 }, { "epoch": 0.4665932002775397, "grad_norm": 3.406862497329712, "learning_rate": 1.9929131151645322e-05, "loss": 0.8817, "step": 2858 }, { "epoch": 0.4667564589200441, "grad_norm": 3.2537496089935303, "learning_rate": 1.9929054936380106e-05, "loss": 0.9799, "step": 2859 }, { "epoch": 0.46691971756254846, "grad_norm": 3.6993520259857178, "learning_rate": 1.9928978680300293e-05, "loss": 1.0769, "step": 2860 }, { "epoch": 0.46708297620505285, "grad_norm": 3.2273504734039307, "learning_rate": 1.99289023834062e-05, "loss": 0.9012, "step": 2861 }, { "epoch": 0.46724623484755723, "grad_norm": 2.814800500869751, "learning_rate": 1.9928826045698138e-05, "loss": 0.9059, "step": 2862 }, { "epoch": 0.4674094934900616, "grad_norm": 3.0300564765930176, "learning_rate": 1.9928749667176423e-05, "loss": 0.8788, "step": 2863 }, { "epoch": 0.467572752132566, "grad_norm": 3.009647846221924, "learning_rate": 1.992867324784136e-05, "loss": 0.8914, "step": 2864 }, { "epoch": 0.46773601077507043, "grad_norm": 2.9892001152038574, "learning_rate": 1.9928596787693278e-05, "loss": 0.888, "step": 2865 }, { "epoch": 0.4678992694175748, "grad_norm": 3.5057380199432373, "learning_rate": 1.992852028673248e-05, "loss": 1.0722, "step": 2866 }, { "epoch": 0.4680625280600792, "grad_norm": 3.18969464302063, "learning_rate": 1.9928443744959285e-05, "loss": 1.0987, "step": 2867 }, { "epoch": 0.4682257867025836, "grad_norm": 3.470700979232788, "learning_rate": 1.9928367162374007e-05, "loss": 1.052, "step": 2868 }, { "epoch": 0.46838904534508796, "grad_norm": 2.9889583587646484, "learning_rate": 1.9928290538976957e-05, "loss": 0.8548, "step": 2869 }, { "epoch": 0.46855230398759234, "grad_norm": 3.3267252445220947, "learning_rate": 1.9928213874768457e-05, "loss": 0.8963, "step": 2870 }, { "epoch": 0.4687155626300967, "grad_norm": 3.0624616146087646, "learning_rate": 1.992813716974882e-05, "loss": 0.9252, "step": 2871 }, { "epoch": 0.4688788212726011, "grad_norm": 3.1480119228363037, "learning_rate": 1.9928060423918357e-05, "loss": 0.8117, "step": 2872 }, { "epoch": 0.4690420799151055, "grad_norm": 3.3458194732666016, "learning_rate": 1.9927983637277387e-05, "loss": 0.9907, "step": 2873 }, { "epoch": 0.4692053385576099, "grad_norm": 3.646399736404419, "learning_rate": 1.9927906809826225e-05, "loss": 1.0707, "step": 2874 }, { "epoch": 0.46936859720011426, "grad_norm": 3.016909599304199, "learning_rate": 1.9927829941565187e-05, "loss": 0.9301, "step": 2875 }, { "epoch": 0.4695318558426187, "grad_norm": 3.179856061935425, "learning_rate": 1.992775303249459e-05, "loss": 0.9435, "step": 2876 }, { "epoch": 0.4696951144851231, "grad_norm": 2.9960451126098633, "learning_rate": 1.9927676082614745e-05, "loss": 0.8719, "step": 2877 }, { "epoch": 0.46985837312762746, "grad_norm": 3.252023220062256, "learning_rate": 1.9927599091925978e-05, "loss": 1.0043, "step": 2878 }, { "epoch": 0.47002163177013184, "grad_norm": 3.3714840412139893, "learning_rate": 1.9927522060428593e-05, "loss": 0.8028, "step": 2879 }, { "epoch": 0.4701848904126362, "grad_norm": 3.379636287689209, "learning_rate": 1.992744498812292e-05, "loss": 1.0207, "step": 2880 }, { "epoch": 0.4703481490551406, "grad_norm": 3.30598521232605, "learning_rate": 1.9927367875009264e-05, "loss": 1.0008, "step": 2881 }, { "epoch": 0.470511407697645, "grad_norm": 3.603403091430664, "learning_rate": 1.992729072108795e-05, "loss": 1.0569, "step": 2882 }, { "epoch": 0.4706746663401494, "grad_norm": 3.5317206382751465, "learning_rate": 1.9927213526359294e-05, "loss": 0.9063, "step": 2883 }, { "epoch": 0.47083792498265375, "grad_norm": 3.06813383102417, "learning_rate": 1.992713629082361e-05, "loss": 0.9415, "step": 2884 }, { "epoch": 0.47100118362515814, "grad_norm": 3.1247901916503906, "learning_rate": 1.9927059014481213e-05, "loss": 0.9876, "step": 2885 }, { "epoch": 0.4711644422676625, "grad_norm": 3.3207762241363525, "learning_rate": 1.992698169733243e-05, "loss": 1.0012, "step": 2886 }, { "epoch": 0.47132770091016696, "grad_norm": 3.065023899078369, "learning_rate": 1.9926904339377572e-05, "loss": 0.8528, "step": 2887 }, { "epoch": 0.47149095955267134, "grad_norm": 2.8675336837768555, "learning_rate": 1.9926826940616957e-05, "loss": 0.7547, "step": 2888 }, { "epoch": 0.4716542181951757, "grad_norm": 2.864055871963501, "learning_rate": 1.992674950105091e-05, "loss": 0.8755, "step": 2889 }, { "epoch": 0.4718174768376801, "grad_norm": 3.100951671600342, "learning_rate": 1.9926672020679737e-05, "loss": 0.9025, "step": 2890 }, { "epoch": 0.4719807354801845, "grad_norm": 3.0050718784332275, "learning_rate": 1.992659449950377e-05, "loss": 0.9001, "step": 2891 }, { "epoch": 0.47214399412268887, "grad_norm": 3.3019683361053467, "learning_rate": 1.9926516937523317e-05, "loss": 1.0067, "step": 2892 }, { "epoch": 0.47230725276519325, "grad_norm": 3.396190643310547, "learning_rate": 1.99264393347387e-05, "loss": 1.0204, "step": 2893 }, { "epoch": 0.47247051140769764, "grad_norm": 3.1351675987243652, "learning_rate": 1.992636169115024e-05, "loss": 0.8748, "step": 2894 }, { "epoch": 0.472633770050202, "grad_norm": 3.4502010345458984, "learning_rate": 1.9926284006758258e-05, "loss": 1.0121, "step": 2895 }, { "epoch": 0.4727970286927064, "grad_norm": 3.207646608352661, "learning_rate": 1.992620628156307e-05, "loss": 0.7149, "step": 2896 }, { "epoch": 0.4729602873352108, "grad_norm": 2.8451571464538574, "learning_rate": 1.9926128515564996e-05, "loss": 0.7533, "step": 2897 }, { "epoch": 0.4731235459777152, "grad_norm": 3.014084815979004, "learning_rate": 1.992605070876435e-05, "loss": 0.7159, "step": 2898 }, { "epoch": 0.4732868046202196, "grad_norm": 3.088777780532837, "learning_rate": 1.9925972861161466e-05, "loss": 0.7302, "step": 2899 }, { "epoch": 0.473450063262724, "grad_norm": 3.492537021636963, "learning_rate": 1.992589497275665e-05, "loss": 0.9559, "step": 2900 }, { "epoch": 0.47361332190522837, "grad_norm": 2.7518208026885986, "learning_rate": 1.992581704355023e-05, "loss": 0.8376, "step": 2901 }, { "epoch": 0.47377658054773275, "grad_norm": 3.1356024742126465, "learning_rate": 1.9925739073542526e-05, "loss": 1.001, "step": 2902 }, { "epoch": 0.47393983919023713, "grad_norm": 3.518402099609375, "learning_rate": 1.9925661062733853e-05, "loss": 0.8583, "step": 2903 }, { "epoch": 0.4741030978327415, "grad_norm": 3.1657488346099854, "learning_rate": 1.9925583011124536e-05, "loss": 0.9875, "step": 2904 }, { "epoch": 0.4742663564752459, "grad_norm": 3.247067451477051, "learning_rate": 1.9925504918714894e-05, "loss": 1.0314, "step": 2905 }, { "epoch": 0.4744296151177503, "grad_norm": 3.000164031982422, "learning_rate": 1.992542678550525e-05, "loss": 0.9334, "step": 2906 }, { "epoch": 0.47459287376025466, "grad_norm": 3.264237642288208, "learning_rate": 1.9925348611495926e-05, "loss": 1.0368, "step": 2907 }, { "epoch": 0.47475613240275905, "grad_norm": 3.2346012592315674, "learning_rate": 1.9925270396687242e-05, "loss": 1.122, "step": 2908 }, { "epoch": 0.4749193910452635, "grad_norm": 2.793304920196533, "learning_rate": 1.9925192141079516e-05, "loss": 0.7903, "step": 2909 }, { "epoch": 0.47508264968776787, "grad_norm": 3.4719936847686768, "learning_rate": 1.9925113844673075e-05, "loss": 1.0845, "step": 2910 }, { "epoch": 0.47524590833027225, "grad_norm": 3.079035520553589, "learning_rate": 1.992503550746824e-05, "loss": 0.8788, "step": 2911 }, { "epoch": 0.47540916697277663, "grad_norm": 3.092761278152466, "learning_rate": 1.992495712946533e-05, "loss": 0.8557, "step": 2912 }, { "epoch": 0.475572425615281, "grad_norm": 3.7979516983032227, "learning_rate": 1.9924878710664667e-05, "loss": 1.0432, "step": 2913 }, { "epoch": 0.4757356842577854, "grad_norm": 3.5572986602783203, "learning_rate": 1.992480025106658e-05, "loss": 1.0476, "step": 2914 }, { "epoch": 0.4758989429002898, "grad_norm": 3.7755446434020996, "learning_rate": 1.9924721750671382e-05, "loss": 0.9245, "step": 2915 }, { "epoch": 0.47606220154279416, "grad_norm": 3.3251636028289795, "learning_rate": 1.99246432094794e-05, "loss": 0.9804, "step": 2916 }, { "epoch": 0.47622546018529854, "grad_norm": 3.4262828826904297, "learning_rate": 1.992456462749096e-05, "loss": 0.9833, "step": 2917 }, { "epoch": 0.4763887188278029, "grad_norm": 3.267793655395508, "learning_rate": 1.992448600470638e-05, "loss": 0.9375, "step": 2918 }, { "epoch": 0.4765519774703073, "grad_norm": 3.115237236022949, "learning_rate": 1.9924407341125986e-05, "loss": 1.0441, "step": 2919 }, { "epoch": 0.47671523611281175, "grad_norm": 2.928943157196045, "learning_rate": 1.9924328636750105e-05, "loss": 0.9067, "step": 2920 }, { "epoch": 0.47687849475531613, "grad_norm": 2.9242630004882812, "learning_rate": 1.9924249891579052e-05, "loss": 0.8161, "step": 2921 }, { "epoch": 0.4770417533978205, "grad_norm": 3.19171142578125, "learning_rate": 1.9924171105613156e-05, "loss": 0.9006, "step": 2922 }, { "epoch": 0.4772050120403249, "grad_norm": 3.0281670093536377, "learning_rate": 1.992409227885274e-05, "loss": 0.8493, "step": 2923 }, { "epoch": 0.4773682706828293, "grad_norm": 3.939929962158203, "learning_rate": 1.9924013411298125e-05, "loss": 1.1675, "step": 2924 }, { "epoch": 0.47753152932533366, "grad_norm": 3.147920608520508, "learning_rate": 1.9923934502949645e-05, "loss": 0.8955, "step": 2925 }, { "epoch": 0.47769478796783804, "grad_norm": 3.3028063774108887, "learning_rate": 1.992385555380761e-05, "loss": 0.922, "step": 2926 }, { "epoch": 0.4778580466103424, "grad_norm": 3.4131946563720703, "learning_rate": 1.9923776563872355e-05, "loss": 1.882, "step": 2927 }, { "epoch": 0.4780213052528468, "grad_norm": 3.109696626663208, "learning_rate": 1.9923697533144203e-05, "loss": 1.0355, "step": 2928 }, { "epoch": 0.4781845638953512, "grad_norm": 4.1512579917907715, "learning_rate": 1.992361846162348e-05, "loss": 1.0477, "step": 2929 }, { "epoch": 0.47834782253785557, "grad_norm": 3.7562339305877686, "learning_rate": 1.9923539349310502e-05, "loss": 1.147, "step": 2930 }, { "epoch": 0.47851108118036, "grad_norm": 3.31227970123291, "learning_rate": 1.9923460196205604e-05, "loss": 1.0462, "step": 2931 }, { "epoch": 0.4786743398228644, "grad_norm": 3.216015338897705, "learning_rate": 1.992338100230911e-05, "loss": 0.8851, "step": 2932 }, { "epoch": 0.4788375984653688, "grad_norm": 3.372702121734619, "learning_rate": 1.992330176762134e-05, "loss": 0.9936, "step": 2933 }, { "epoch": 0.47900085710787316, "grad_norm": 2.9812755584716797, "learning_rate": 1.9923222492142625e-05, "loss": 0.8667, "step": 2934 }, { "epoch": 0.47916411575037754, "grad_norm": 3.139204263687134, "learning_rate": 1.992314317587329e-05, "loss": 0.8686, "step": 2935 }, { "epoch": 0.4793273743928819, "grad_norm": 3.4762659072875977, "learning_rate": 1.992306381881366e-05, "loss": 0.9186, "step": 2936 }, { "epoch": 0.4794906330353863, "grad_norm": 3.2233901023864746, "learning_rate": 1.9922984420964064e-05, "loss": 0.8313, "step": 2937 }, { "epoch": 0.4796538916778907, "grad_norm": 2.960458517074585, "learning_rate": 1.9922904982324824e-05, "loss": 0.8135, "step": 2938 }, { "epoch": 0.47981715032039507, "grad_norm": 2.7271761894226074, "learning_rate": 1.9922825502896265e-05, "loss": 0.7784, "step": 2939 }, { "epoch": 0.47998040896289945, "grad_norm": 3.1712472438812256, "learning_rate": 1.992274598267872e-05, "loss": 0.9745, "step": 2940 }, { "epoch": 0.48014366760540383, "grad_norm": 2.780183792114258, "learning_rate": 1.9922666421672515e-05, "loss": 0.868, "step": 2941 }, { "epoch": 0.4803069262479083, "grad_norm": 2.98502779006958, "learning_rate": 1.9922586819877972e-05, "loss": 0.8946, "step": 2942 }, { "epoch": 0.48047018489041265, "grad_norm": 3.4907288551330566, "learning_rate": 1.992250717729542e-05, "loss": 0.969, "step": 2943 }, { "epoch": 0.48063344353291704, "grad_norm": 3.004624366760254, "learning_rate": 1.992242749392519e-05, "loss": 0.9611, "step": 2944 }, { "epoch": 0.4807967021754214, "grad_norm": 3.107499837875366, "learning_rate": 1.992234776976761e-05, "loss": 0.8743, "step": 2945 }, { "epoch": 0.4809599608179258, "grad_norm": 4.103081226348877, "learning_rate": 1.9922268004823002e-05, "loss": 1.1421, "step": 2946 }, { "epoch": 0.4811232194604302, "grad_norm": 3.0462100505828857, "learning_rate": 1.9922188199091697e-05, "loss": 0.7413, "step": 2947 }, { "epoch": 0.48128647810293457, "grad_norm": 3.3032422065734863, "learning_rate": 1.992210835257402e-05, "loss": 0.8993, "step": 2948 }, { "epoch": 0.48144973674543895, "grad_norm": 3.3560850620269775, "learning_rate": 1.9922028465270305e-05, "loss": 0.9962, "step": 2949 }, { "epoch": 0.48161299538794333, "grad_norm": 4.00833797454834, "learning_rate": 1.992194853718088e-05, "loss": 1.0385, "step": 2950 }, { "epoch": 0.4817762540304477, "grad_norm": 3.569512367248535, "learning_rate": 1.992186856830607e-05, "loss": 0.9241, "step": 2951 }, { "epoch": 0.4819395126729521, "grad_norm": 3.73423171043396, "learning_rate": 1.99217885586462e-05, "loss": 1.0658, "step": 2952 }, { "epoch": 0.48210277131545654, "grad_norm": 3.1560213565826416, "learning_rate": 1.992170850820161e-05, "loss": 0.8224, "step": 2953 }, { "epoch": 0.4822660299579609, "grad_norm": 3.8159799575805664, "learning_rate": 1.9921628416972618e-05, "loss": 1.0597, "step": 2954 }, { "epoch": 0.4824292886004653, "grad_norm": 3.808631420135498, "learning_rate": 1.9921548284959556e-05, "loss": 1.0287, "step": 2955 }, { "epoch": 0.4825925472429697, "grad_norm": 3.1378774642944336, "learning_rate": 1.992146811216276e-05, "loss": 1.0381, "step": 2956 }, { "epoch": 0.48275580588547407, "grad_norm": 2.877244234085083, "learning_rate": 1.9921387898582556e-05, "loss": 0.8432, "step": 2957 }, { "epoch": 0.48291906452797845, "grad_norm": 3.1099817752838135, "learning_rate": 1.992130764421927e-05, "loss": 0.9873, "step": 2958 }, { "epoch": 0.48308232317048283, "grad_norm": 3.0484302043914795, "learning_rate": 1.9921227349073234e-05, "loss": 1.0055, "step": 2959 }, { "epoch": 0.4832455818129872, "grad_norm": 2.8420214653015137, "learning_rate": 1.9921147013144782e-05, "loss": 0.9636, "step": 2960 }, { "epoch": 0.4834088404554916, "grad_norm": 3.349830389022827, "learning_rate": 1.9921066636434236e-05, "loss": 0.9479, "step": 2961 }, { "epoch": 0.483572099097996, "grad_norm": 3.051565170288086, "learning_rate": 1.9920986218941933e-05, "loss": 1.1453, "step": 2962 }, { "epoch": 0.48373535774050036, "grad_norm": 3.095437526702881, "learning_rate": 1.9920905760668202e-05, "loss": 0.9673, "step": 2963 }, { "epoch": 0.4838986163830048, "grad_norm": 2.8650739192962646, "learning_rate": 1.9920825261613372e-05, "loss": 0.9016, "step": 2964 }, { "epoch": 0.4840618750255092, "grad_norm": 3.1660661697387695, "learning_rate": 1.992074472177778e-05, "loss": 0.9759, "step": 2965 }, { "epoch": 0.48422513366801356, "grad_norm": 3.1681668758392334, "learning_rate": 1.9920664141161746e-05, "loss": 0.8594, "step": 2966 }, { "epoch": 0.48438839231051795, "grad_norm": 3.797969102859497, "learning_rate": 1.9920583519765614e-05, "loss": 1.1679, "step": 2967 }, { "epoch": 0.48455165095302233, "grad_norm": 3.5065512657165527, "learning_rate": 1.9920502857589704e-05, "loss": 0.9329, "step": 2968 }, { "epoch": 0.4847149095955267, "grad_norm": 2.904998540878296, "learning_rate": 1.992042215463436e-05, "loss": 0.9891, "step": 2969 }, { "epoch": 0.4848781682380311, "grad_norm": 3.377927303314209, "learning_rate": 1.9920341410899896e-05, "loss": 0.8864, "step": 2970 }, { "epoch": 0.4850414268805355, "grad_norm": 3.5618956089019775, "learning_rate": 1.9920260626386662e-05, "loss": 0.9727, "step": 2971 }, { "epoch": 0.48520468552303986, "grad_norm": 3.615940570831299, "learning_rate": 1.9920179801094978e-05, "loss": 0.9832, "step": 2972 }, { "epoch": 0.48536794416554424, "grad_norm": 3.5181076526641846, "learning_rate": 1.9920098935025183e-05, "loss": 0.8575, "step": 2973 }, { "epoch": 0.4855312028080487, "grad_norm": 3.3862385749816895, "learning_rate": 1.9920018028177606e-05, "loss": 1.0132, "step": 2974 }, { "epoch": 0.48569446145055306, "grad_norm": 2.945814371109009, "learning_rate": 1.9919937080552583e-05, "loss": 0.8152, "step": 2975 }, { "epoch": 0.48585772009305744, "grad_norm": 3.3120057582855225, "learning_rate": 1.9919856092150444e-05, "loss": 1.0023, "step": 2976 }, { "epoch": 0.4860209787355618, "grad_norm": 3.1503374576568604, "learning_rate": 1.991977506297152e-05, "loss": 0.9062, "step": 2977 }, { "epoch": 0.4861842373780662, "grad_norm": 3.6989998817443848, "learning_rate": 1.9919693993016148e-05, "loss": 0.9752, "step": 2978 }, { "epoch": 0.4863474960205706, "grad_norm": 3.5891356468200684, "learning_rate": 1.9919612882284657e-05, "loss": 0.9831, "step": 2979 }, { "epoch": 0.486510754663075, "grad_norm": 3.663156270980835, "learning_rate": 1.9919531730777387e-05, "loss": 1.7371, "step": 2980 }, { "epoch": 0.48667401330557936, "grad_norm": 3.0853171348571777, "learning_rate": 1.9919450538494662e-05, "loss": 0.9467, "step": 2981 }, { "epoch": 0.48683727194808374, "grad_norm": 2.840189218521118, "learning_rate": 1.9919369305436828e-05, "loss": 0.9064, "step": 2982 }, { "epoch": 0.4870005305905881, "grad_norm": 3.131692886352539, "learning_rate": 1.9919288031604207e-05, "loss": 0.7288, "step": 2983 }, { "epoch": 0.4871637892330925, "grad_norm": 3.345078706741333, "learning_rate": 1.991920671699714e-05, "loss": 1.0238, "step": 2984 }, { "epoch": 0.48732704787559694, "grad_norm": 3.3460476398468018, "learning_rate": 1.9919125361615962e-05, "loss": 1.112, "step": 2985 }, { "epoch": 0.4874903065181013, "grad_norm": 3.238969087600708, "learning_rate": 1.9919043965461002e-05, "loss": 0.9721, "step": 2986 }, { "epoch": 0.4876535651606057, "grad_norm": 2.8367984294891357, "learning_rate": 1.99189625285326e-05, "loss": 0.8089, "step": 2987 }, { "epoch": 0.4878168238031101, "grad_norm": 2.998173713684082, "learning_rate": 1.9918881050831086e-05, "loss": 0.8791, "step": 2988 }, { "epoch": 0.48798008244561447, "grad_norm": 3.7706820964813232, "learning_rate": 1.99187995323568e-05, "loss": 1.1151, "step": 2989 }, { "epoch": 0.48814334108811885, "grad_norm": 3.236182689666748, "learning_rate": 1.9918717973110074e-05, "loss": 0.8888, "step": 2990 }, { "epoch": 0.48830659973062324, "grad_norm": 3.456773042678833, "learning_rate": 1.991863637309124e-05, "loss": 1.1412, "step": 2991 }, { "epoch": 0.4884698583731276, "grad_norm": 3.2951860427856445, "learning_rate": 1.9918554732300637e-05, "loss": 0.9084, "step": 2992 }, { "epoch": 0.488633117015632, "grad_norm": 3.208186388015747, "learning_rate": 1.9918473050738607e-05, "loss": 1.0266, "step": 2993 }, { "epoch": 0.4887963756581364, "grad_norm": 3.008413076400757, "learning_rate": 1.9918391328405476e-05, "loss": 0.7597, "step": 2994 }, { "epoch": 0.48895963430064077, "grad_norm": 3.5311994552612305, "learning_rate": 1.991830956530158e-05, "loss": 0.9378, "step": 2995 }, { "epoch": 0.4891228929431452, "grad_norm": 3.1315112113952637, "learning_rate": 1.9918227761427263e-05, "loss": 0.9213, "step": 2996 }, { "epoch": 0.4892861515856496, "grad_norm": 3.1545779705047607, "learning_rate": 1.9918145916782856e-05, "loss": 0.9847, "step": 2997 }, { "epoch": 0.48944941022815397, "grad_norm": 3.715040683746338, "learning_rate": 1.9918064031368696e-05, "loss": 0.9654, "step": 2998 }, { "epoch": 0.48961266887065835, "grad_norm": 3.109912157058716, "learning_rate": 1.991798210518512e-05, "loss": 0.9411, "step": 2999 }, { "epoch": 0.48977592751316273, "grad_norm": 3.740410804748535, "learning_rate": 1.991790013823246e-05, "loss": 1.0014, "step": 3000 }, { "epoch": 0.4899391861556671, "grad_norm": 3.073458671569824, "learning_rate": 1.9917818130511064e-05, "loss": 0.7819, "step": 3001 }, { "epoch": 0.4901024447981715, "grad_norm": 3.2082738876342773, "learning_rate": 1.991773608202126e-05, "loss": 1.0144, "step": 3002 }, { "epoch": 0.4902657034406759, "grad_norm": 3.440441608428955, "learning_rate": 1.991765399276339e-05, "loss": 0.8793, "step": 3003 }, { "epoch": 0.49042896208318026, "grad_norm": 3.5781683921813965, "learning_rate": 1.9917571862737786e-05, "loss": 0.9081, "step": 3004 }, { "epoch": 0.49059222072568465, "grad_norm": 3.3715503215789795, "learning_rate": 1.991748969194479e-05, "loss": 0.9926, "step": 3005 }, { "epoch": 0.49075547936818903, "grad_norm": 3.365523338317871, "learning_rate": 1.9917407480384737e-05, "loss": 1.0317, "step": 3006 }, { "epoch": 0.49091873801069347, "grad_norm": 3.4606575965881348, "learning_rate": 1.9917325228057968e-05, "loss": 1.0267, "step": 3007 }, { "epoch": 0.49108199665319785, "grad_norm": 3.0678889751434326, "learning_rate": 1.991724293496482e-05, "loss": 0.8447, "step": 3008 }, { "epoch": 0.49124525529570223, "grad_norm": 2.6807923316955566, "learning_rate": 1.9917160601105632e-05, "loss": 0.7602, "step": 3009 }, { "epoch": 0.4914085139382066, "grad_norm": 3.123460292816162, "learning_rate": 1.9917078226480736e-05, "loss": 0.8398, "step": 3010 }, { "epoch": 0.491571772580711, "grad_norm": 3.0700998306274414, "learning_rate": 1.9916995811090482e-05, "loss": 0.9602, "step": 3011 }, { "epoch": 0.4917350312232154, "grad_norm": 3.1155200004577637, "learning_rate": 1.9916913354935202e-05, "loss": 0.748, "step": 3012 }, { "epoch": 0.49189828986571976, "grad_norm": 2.808112144470215, "learning_rate": 1.9916830858015235e-05, "loss": 0.7629, "step": 3013 }, { "epoch": 0.49206154850822414, "grad_norm": 3.3052263259887695, "learning_rate": 1.991674832033092e-05, "loss": 0.9771, "step": 3014 }, { "epoch": 0.4922248071507285, "grad_norm": 3.4263718128204346, "learning_rate": 1.99166657418826e-05, "loss": 0.8426, "step": 3015 }, { "epoch": 0.4923880657932329, "grad_norm": 3.1533234119415283, "learning_rate": 1.9916583122670607e-05, "loss": 0.8599, "step": 3016 }, { "epoch": 0.4925513244357373, "grad_norm": 3.5208323001861572, "learning_rate": 1.9916500462695286e-05, "loss": 1.0325, "step": 3017 }, { "epoch": 0.49271458307824173, "grad_norm": 2.856614351272583, "learning_rate": 1.991641776195698e-05, "loss": 0.7354, "step": 3018 }, { "epoch": 0.4928778417207461, "grad_norm": 3.467578887939453, "learning_rate": 1.9916335020456022e-05, "loss": 0.9314, "step": 3019 }, { "epoch": 0.4930411003632505, "grad_norm": 3.9592132568359375, "learning_rate": 1.9916252238192756e-05, "loss": 1.0141, "step": 3020 }, { "epoch": 0.4932043590057549, "grad_norm": 3.2027721405029297, "learning_rate": 1.991616941516752e-05, "loss": 0.8756, "step": 3021 }, { "epoch": 0.49336761764825926, "grad_norm": 3.5300118923187256, "learning_rate": 1.9916086551380656e-05, "loss": 1.0236, "step": 3022 }, { "epoch": 0.49353087629076364, "grad_norm": 3.081209897994995, "learning_rate": 1.9916003646832507e-05, "loss": 0.7772, "step": 3023 }, { "epoch": 0.493694134933268, "grad_norm": 3.3508384227752686, "learning_rate": 1.991592070152341e-05, "loss": 0.937, "step": 3024 }, { "epoch": 0.4938573935757724, "grad_norm": 3.4289019107818604, "learning_rate": 1.991583771545371e-05, "loss": 1.0807, "step": 3025 }, { "epoch": 0.4940206522182768, "grad_norm": 3.340498208999634, "learning_rate": 1.991575468862374e-05, "loss": 1.0188, "step": 3026 }, { "epoch": 0.4941839108607812, "grad_norm": 3.3253817558288574, "learning_rate": 1.9915671621033847e-05, "loss": 0.9371, "step": 3027 }, { "epoch": 0.49434716950328556, "grad_norm": 3.3128292560577393, "learning_rate": 1.9915588512684374e-05, "loss": 0.9174, "step": 3028 }, { "epoch": 0.49451042814579, "grad_norm": 3.1428425312042236, "learning_rate": 1.991550536357566e-05, "loss": 0.8889, "step": 3029 }, { "epoch": 0.4946736867882944, "grad_norm": 3.590972661972046, "learning_rate": 1.991542217370805e-05, "loss": 0.9972, "step": 3030 }, { "epoch": 0.49483694543079876, "grad_norm": 3.021289110183716, "learning_rate": 1.991533894308188e-05, "loss": 0.8651, "step": 3031 }, { "epoch": 0.49500020407330314, "grad_norm": 3.047534942626953, "learning_rate": 1.99152556716975e-05, "loss": 0.9048, "step": 3032 }, { "epoch": 0.4951634627158075, "grad_norm": 3.3554091453552246, "learning_rate": 1.9915172359555243e-05, "loss": 1.0173, "step": 3033 }, { "epoch": 0.4953267213583119, "grad_norm": 3.4854135513305664, "learning_rate": 1.9915089006655455e-05, "loss": 0.8827, "step": 3034 }, { "epoch": 0.4954899800008163, "grad_norm": 3.62160325050354, "learning_rate": 1.9915005612998484e-05, "loss": 0.9865, "step": 3035 }, { "epoch": 0.49565323864332067, "grad_norm": 3.3495466709136963, "learning_rate": 1.9914922178584667e-05, "loss": 0.9284, "step": 3036 }, { "epoch": 0.49581649728582505, "grad_norm": 3.475637912750244, "learning_rate": 1.9914838703414352e-05, "loss": 0.9054, "step": 3037 }, { "epoch": 0.49597975592832944, "grad_norm": 3.750371217727661, "learning_rate": 1.9914755187487875e-05, "loss": 1.0671, "step": 3038 }, { "epoch": 0.4961430145708338, "grad_norm": 3.5205981731414795, "learning_rate": 1.9914671630805583e-05, "loss": 0.9959, "step": 3039 }, { "epoch": 0.49630627321333826, "grad_norm": 3.4397871494293213, "learning_rate": 1.9914588033367824e-05, "loss": 0.9096, "step": 3040 }, { "epoch": 0.49646953185584264, "grad_norm": 3.247540235519409, "learning_rate": 1.9914504395174932e-05, "loss": 1.0297, "step": 3041 }, { "epoch": 0.496632790498347, "grad_norm": 3.0941131114959717, "learning_rate": 1.9914420716227257e-05, "loss": 0.8764, "step": 3042 }, { "epoch": 0.4967960491408514, "grad_norm": 3.5328171253204346, "learning_rate": 1.9914336996525147e-05, "loss": 1.1546, "step": 3043 }, { "epoch": 0.4969593077833558, "grad_norm": 3.1886024475097656, "learning_rate": 1.9914253236068938e-05, "loss": 0.876, "step": 3044 }, { "epoch": 0.49712256642586017, "grad_norm": 3.541116952896118, "learning_rate": 1.9914169434858975e-05, "loss": 1.1356, "step": 3045 }, { "epoch": 0.49728582506836455, "grad_norm": 3.4335930347442627, "learning_rate": 1.9914085592895608e-05, "loss": 1.0378, "step": 3046 }, { "epoch": 0.49744908371086893, "grad_norm": 3.0518085956573486, "learning_rate": 1.9914001710179178e-05, "loss": 0.9185, "step": 3047 }, { "epoch": 0.4976123423533733, "grad_norm": 3.2054731845855713, "learning_rate": 1.991391778671003e-05, "loss": 0.9471, "step": 3048 }, { "epoch": 0.4977756009958777, "grad_norm": 3.3104729652404785, "learning_rate": 1.991383382248851e-05, "loss": 1.0246, "step": 3049 }, { "epoch": 0.4979388596383821, "grad_norm": 3.3084099292755127, "learning_rate": 1.9913749817514963e-05, "loss": 0.9375, "step": 3050 }, { "epoch": 0.4981021182808865, "grad_norm": 3.7248356342315674, "learning_rate": 1.9913665771789733e-05, "loss": 1.0832, "step": 3051 }, { "epoch": 0.4982653769233909, "grad_norm": 2.9816267490386963, "learning_rate": 1.9913581685313165e-05, "loss": 0.8179, "step": 3052 }, { "epoch": 0.4984286355658953, "grad_norm": 3.183593988418579, "learning_rate": 1.991349755808561e-05, "loss": 0.9152, "step": 3053 }, { "epoch": 0.49859189420839967, "grad_norm": 3.0596630573272705, "learning_rate": 1.9913413390107407e-05, "loss": 0.9184, "step": 3054 }, { "epoch": 0.49875515285090405, "grad_norm": 3.447143793106079, "learning_rate": 1.9913329181378906e-05, "loss": 0.9243, "step": 3055 }, { "epoch": 0.49891841149340843, "grad_norm": 2.7419698238372803, "learning_rate": 1.991324493190045e-05, "loss": 0.7613, "step": 3056 }, { "epoch": 0.4990816701359128, "grad_norm": 2.9931042194366455, "learning_rate": 1.991316064167239e-05, "loss": 0.9089, "step": 3057 }, { "epoch": 0.4992449287784172, "grad_norm": 3.4312081336975098, "learning_rate": 1.9913076310695068e-05, "loss": 0.9637, "step": 3058 }, { "epoch": 0.4994081874209216, "grad_norm": 3.6824593544006348, "learning_rate": 1.9912991938968832e-05, "loss": 0.9344, "step": 3059 }, { "epoch": 0.49957144606342596, "grad_norm": 3.215358018875122, "learning_rate": 1.991290752649403e-05, "loss": 0.9607, "step": 3060 }, { "epoch": 0.49973470470593034, "grad_norm": 2.9715964794158936, "learning_rate": 1.991282307327101e-05, "loss": 0.7933, "step": 3061 }, { "epoch": 0.4998979633484348, "grad_norm": 3.3628182411193848, "learning_rate": 1.9912738579300114e-05, "loss": 0.9792, "step": 3062 }, { "epoch": 0.5000612219909392, "grad_norm": 3.369938373565674, "learning_rate": 1.9912654044581693e-05, "loss": 1.0068, "step": 3063 }, { "epoch": 0.5002244806334435, "grad_norm": 3.3259382247924805, "learning_rate": 1.9912569469116098e-05, "loss": 0.8267, "step": 3064 }, { "epoch": 0.5003877392759479, "grad_norm": 3.090313196182251, "learning_rate": 1.9912484852903667e-05, "loss": 0.8655, "step": 3065 }, { "epoch": 0.5005509979184523, "grad_norm": 3.2641632556915283, "learning_rate": 1.9912400195944758e-05, "loss": 0.879, "step": 3066 }, { "epoch": 0.5007142565609567, "grad_norm": 3.595738649368286, "learning_rate": 1.9912315498239715e-05, "loss": 0.9452, "step": 3067 }, { "epoch": 0.5008775152034611, "grad_norm": 3.8745925426483154, "learning_rate": 1.9912230759788883e-05, "loss": 1.1436, "step": 3068 }, { "epoch": 0.5010407738459655, "grad_norm": 3.5661306381225586, "learning_rate": 1.9912145980592613e-05, "loss": 0.8322, "step": 3069 }, { "epoch": 0.5012040324884699, "grad_norm": 3.464893102645874, "learning_rate": 1.9912061160651256e-05, "loss": 1.0237, "step": 3070 }, { "epoch": 0.5013672911309742, "grad_norm": 3.0651679039001465, "learning_rate": 1.9911976299965157e-05, "loss": 0.8822, "step": 3071 }, { "epoch": 0.5015305497734787, "grad_norm": 3.471364736557007, "learning_rate": 1.9911891398534667e-05, "loss": 0.939, "step": 3072 }, { "epoch": 0.501693808415983, "grad_norm": 2.9942209720611572, "learning_rate": 1.9911806456360133e-05, "loss": 0.7956, "step": 3073 }, { "epoch": 0.5018570670584874, "grad_norm": 3.448533773422241, "learning_rate": 1.9911721473441905e-05, "loss": 0.9714, "step": 3074 }, { "epoch": 0.5020203257009918, "grad_norm": 3.1132349967956543, "learning_rate": 1.9911636449780332e-05, "loss": 0.8537, "step": 3075 }, { "epoch": 0.5021835843434962, "grad_norm": 2.737189531326294, "learning_rate": 1.9911551385375766e-05, "loss": 0.7838, "step": 3076 }, { "epoch": 0.5023468429860005, "grad_norm": 3.3170900344848633, "learning_rate": 1.9911466280228556e-05, "loss": 0.8566, "step": 3077 }, { "epoch": 0.502510101628505, "grad_norm": 2.9817326068878174, "learning_rate": 1.9911381134339048e-05, "loss": 1.0441, "step": 3078 }, { "epoch": 0.5026733602710094, "grad_norm": 3.5270581245422363, "learning_rate": 1.9911295947707594e-05, "loss": 1.0445, "step": 3079 }, { "epoch": 0.5028366189135137, "grad_norm": 3.1172847747802734, "learning_rate": 1.991121072033455e-05, "loss": 0.7325, "step": 3080 }, { "epoch": 0.5029998775560182, "grad_norm": 3.2410390377044678, "learning_rate": 1.9911125452220257e-05, "loss": 0.9117, "step": 3081 }, { "epoch": 0.5031631361985225, "grad_norm": 3.4616076946258545, "learning_rate": 1.991104014336507e-05, "loss": 0.8932, "step": 3082 }, { "epoch": 0.5033263948410269, "grad_norm": 3.236478567123413, "learning_rate": 1.991095479376934e-05, "loss": 1.0449, "step": 3083 }, { "epoch": 0.5034896534835313, "grad_norm": 2.940084218978882, "learning_rate": 1.9910869403433415e-05, "loss": 0.8881, "step": 3084 }, { "epoch": 0.5036529121260357, "grad_norm": 3.565406322479248, "learning_rate": 1.991078397235765e-05, "loss": 0.9432, "step": 3085 }, { "epoch": 0.50381617076854, "grad_norm": 2.996171712875366, "learning_rate": 1.9910698500542394e-05, "loss": 0.8702, "step": 3086 }, { "epoch": 0.5039794294110445, "grad_norm": 3.391947031021118, "learning_rate": 1.9910612987988e-05, "loss": 0.9684, "step": 3087 }, { "epoch": 0.5041426880535488, "grad_norm": 3.499166965484619, "learning_rate": 1.9910527434694814e-05, "loss": 0.8792, "step": 3088 }, { "epoch": 0.5043059466960532, "grad_norm": 3.0818467140197754, "learning_rate": 1.9910441840663196e-05, "loss": 0.8529, "step": 3089 }, { "epoch": 0.5044692053385577, "grad_norm": 2.898764133453369, "learning_rate": 1.9910356205893494e-05, "loss": 0.8646, "step": 3090 }, { "epoch": 0.504632463981062, "grad_norm": 3.162609338760376, "learning_rate": 1.9910270530386058e-05, "loss": 0.7619, "step": 3091 }, { "epoch": 0.5047957226235664, "grad_norm": 3.440469264984131, "learning_rate": 1.9910184814141242e-05, "loss": 0.9793, "step": 3092 }, { "epoch": 0.5049589812660707, "grad_norm": 3.3696775436401367, "learning_rate": 1.9910099057159398e-05, "loss": 1.0151, "step": 3093 }, { "epoch": 0.5051222399085752, "grad_norm": 3.6215250492095947, "learning_rate": 1.9910013259440882e-05, "loss": 0.9647, "step": 3094 }, { "epoch": 0.5052854985510795, "grad_norm": 3.3773255348205566, "learning_rate": 1.990992742098604e-05, "loss": 0.8872, "step": 3095 }, { "epoch": 0.505448757193584, "grad_norm": 3.166578531265259, "learning_rate": 1.990984154179523e-05, "loss": 0.7393, "step": 3096 }, { "epoch": 0.5056120158360883, "grad_norm": 3.526052951812744, "learning_rate": 1.9909755621868802e-05, "loss": 1.0918, "step": 3097 }, { "epoch": 0.5057752744785927, "grad_norm": 3.255392074584961, "learning_rate": 1.990966966120711e-05, "loss": 0.9887, "step": 3098 }, { "epoch": 0.505938533121097, "grad_norm": 3.068023443222046, "learning_rate": 1.9909583659810507e-05, "loss": 0.8618, "step": 3099 }, { "epoch": 0.5061017917636015, "grad_norm": 3.4324216842651367, "learning_rate": 1.990949761767935e-05, "loss": 0.8453, "step": 3100 }, { "epoch": 0.5062650504061059, "grad_norm": 2.845393419265747, "learning_rate": 1.9909411534813987e-05, "loss": 0.872, "step": 3101 }, { "epoch": 0.5064283090486102, "grad_norm": 3.486758232116699, "learning_rate": 1.9909325411214778e-05, "loss": 1.1012, "step": 3102 }, { "epoch": 0.5065915676911147, "grad_norm": 2.5663657188415527, "learning_rate": 1.9909239246882073e-05, "loss": 0.709, "step": 3103 }, { "epoch": 0.506754826333619, "grad_norm": 2.8274149894714355, "learning_rate": 1.9909153041816228e-05, "loss": 0.8804, "step": 3104 }, { "epoch": 0.5069180849761235, "grad_norm": 2.7579405307769775, "learning_rate": 1.9909066796017595e-05, "loss": 0.8262, "step": 3105 }, { "epoch": 0.5070813436186278, "grad_norm": 4.021721839904785, "learning_rate": 1.990898050948653e-05, "loss": 0.9844, "step": 3106 }, { "epoch": 0.5072446022611322, "grad_norm": 3.835693359375, "learning_rate": 1.990889418222339e-05, "loss": 1.0064, "step": 3107 }, { "epoch": 0.5074078609036365, "grad_norm": 3.0373363494873047, "learning_rate": 1.9908807814228524e-05, "loss": 0.7952, "step": 3108 }, { "epoch": 0.507571119546141, "grad_norm": 3.141982078552246, "learning_rate": 1.9908721405502294e-05, "loss": 0.8646, "step": 3109 }, { "epoch": 0.5077343781886453, "grad_norm": 2.92325496673584, "learning_rate": 1.9908634956045048e-05, "loss": 0.7236, "step": 3110 }, { "epoch": 0.5078976368311497, "grad_norm": 3.1767637729644775, "learning_rate": 1.9908548465857146e-05, "loss": 0.8842, "step": 3111 }, { "epoch": 0.5080608954736542, "grad_norm": 2.988158941268921, "learning_rate": 1.990846193493895e-05, "loss": 1.0172, "step": 3112 }, { "epoch": 0.5082241541161585, "grad_norm": 2.9847257137298584, "learning_rate": 1.99083753632908e-05, "loss": 0.731, "step": 3113 }, { "epoch": 0.508387412758663, "grad_norm": 3.1664516925811768, "learning_rate": 1.9908288750913062e-05, "loss": 1.1007, "step": 3114 }, { "epoch": 0.5085506714011673, "grad_norm": 3.4824059009552, "learning_rate": 1.9908202097806087e-05, "loss": 0.9738, "step": 3115 }, { "epoch": 0.5087139300436717, "grad_norm": 3.3556272983551025, "learning_rate": 1.990811540397024e-05, "loss": 0.9721, "step": 3116 }, { "epoch": 0.508877188686176, "grad_norm": 3.074422597885132, "learning_rate": 1.990802866940587e-05, "loss": 0.8339, "step": 3117 }, { "epoch": 0.5090404473286805, "grad_norm": 2.7830071449279785, "learning_rate": 1.9907941894113337e-05, "loss": 0.8387, "step": 3118 }, { "epoch": 0.5092037059711848, "grad_norm": 3.2070958614349365, "learning_rate": 1.9907855078092995e-05, "loss": 0.9763, "step": 3119 }, { "epoch": 0.5093669646136892, "grad_norm": 3.1490986347198486, "learning_rate": 1.99077682213452e-05, "loss": 0.9245, "step": 3120 }, { "epoch": 0.5095302232561936, "grad_norm": 3.791170835494995, "learning_rate": 1.9907681323870312e-05, "loss": 1.1204, "step": 3121 }, { "epoch": 0.509693481898698, "grad_norm": 3.0761020183563232, "learning_rate": 1.990759438566869e-05, "loss": 0.917, "step": 3122 }, { "epoch": 0.5098567405412024, "grad_norm": 3.0239486694335938, "learning_rate": 1.9907507406740682e-05, "loss": 0.9458, "step": 3123 }, { "epoch": 0.5100199991837068, "grad_norm": 3.1974968910217285, "learning_rate": 1.9907420387086657e-05, "loss": 0.9322, "step": 3124 }, { "epoch": 0.5101832578262112, "grad_norm": 3.3597564697265625, "learning_rate": 1.990733332670697e-05, "loss": 0.9974, "step": 3125 }, { "epoch": 0.5103465164687155, "grad_norm": 2.924398183822632, "learning_rate": 1.990724622560197e-05, "loss": 0.8369, "step": 3126 }, { "epoch": 0.51050977511122, "grad_norm": 3.068504810333252, "learning_rate": 1.990715908377203e-05, "loss": 0.7686, "step": 3127 }, { "epoch": 0.5106730337537243, "grad_norm": 3.226559638977051, "learning_rate": 1.990707190121749e-05, "loss": 0.7459, "step": 3128 }, { "epoch": 0.5108362923962287, "grad_norm": 3.696746826171875, "learning_rate": 1.9906984677938724e-05, "loss": 0.966, "step": 3129 }, { "epoch": 0.5109995510387331, "grad_norm": 3.2867283821105957, "learning_rate": 1.9906897413936086e-05, "loss": 0.8489, "step": 3130 }, { "epoch": 0.5111628096812375, "grad_norm": 3.3322019577026367, "learning_rate": 1.990681010920993e-05, "loss": 1.0491, "step": 3131 }, { "epoch": 0.5113260683237418, "grad_norm": 3.2320334911346436, "learning_rate": 1.990672276376062e-05, "loss": 1.0409, "step": 3132 }, { "epoch": 0.5114893269662463, "grad_norm": 3.2997875213623047, "learning_rate": 1.9906635377588513e-05, "loss": 1.035, "step": 3133 }, { "epoch": 0.5116525856087507, "grad_norm": 3.693354368209839, "learning_rate": 1.990654795069397e-05, "loss": 0.9968, "step": 3134 }, { "epoch": 0.511815844251255, "grad_norm": 3.293085813522339, "learning_rate": 1.9906460483077347e-05, "loss": 0.9539, "step": 3135 }, { "epoch": 0.5119791028937595, "grad_norm": 3.288959264755249, "learning_rate": 1.9906372974739007e-05, "loss": 0.9151, "step": 3136 }, { "epoch": 0.5121423615362638, "grad_norm": 3.1788177490234375, "learning_rate": 1.990628542567931e-05, "loss": 0.9009, "step": 3137 }, { "epoch": 0.5123056201787682, "grad_norm": 3.587881326675415, "learning_rate": 1.9906197835898612e-05, "loss": 0.8958, "step": 3138 }, { "epoch": 0.5124688788212726, "grad_norm": 3.2073514461517334, "learning_rate": 1.9906110205397273e-05, "loss": 0.88, "step": 3139 }, { "epoch": 0.512632137463777, "grad_norm": 3.2774391174316406, "learning_rate": 1.990602253417566e-05, "loss": 0.9761, "step": 3140 }, { "epoch": 0.5127953961062813, "grad_norm": 2.833791732788086, "learning_rate": 1.9905934822234125e-05, "loss": 0.6995, "step": 3141 }, { "epoch": 0.5129586547487858, "grad_norm": 3.5339081287384033, "learning_rate": 1.9905847069573033e-05, "loss": 1.009, "step": 3142 }, { "epoch": 0.5131219133912901, "grad_norm": 3.2104332447052, "learning_rate": 1.9905759276192748e-05, "loss": 0.8901, "step": 3143 }, { "epoch": 0.5132851720337945, "grad_norm": 3.497798204421997, "learning_rate": 1.9905671442093623e-05, "loss": 1.0585, "step": 3144 }, { "epoch": 0.513448430676299, "grad_norm": 3.3074376583099365, "learning_rate": 1.9905583567276025e-05, "loss": 1.0044, "step": 3145 }, { "epoch": 0.5136116893188033, "grad_norm": 3.363564968109131, "learning_rate": 1.990549565174031e-05, "loss": 0.9658, "step": 3146 }, { "epoch": 0.5137749479613077, "grad_norm": 3.6077303886413574, "learning_rate": 1.9905407695486844e-05, "loss": 0.9122, "step": 3147 }, { "epoch": 0.5139382066038121, "grad_norm": 2.926696300506592, "learning_rate": 1.990531969851599e-05, "loss": 0.8703, "step": 3148 }, { "epoch": 0.5141014652463165, "grad_norm": 3.133378028869629, "learning_rate": 1.9905231660828104e-05, "loss": 0.7732, "step": 3149 }, { "epoch": 0.5142647238888208, "grad_norm": 3.5782463550567627, "learning_rate": 1.9905143582423552e-05, "loss": 1.0555, "step": 3150 }, { "epoch": 0.5144279825313253, "grad_norm": 3.088231325149536, "learning_rate": 1.9905055463302692e-05, "loss": 0.9256, "step": 3151 }, { "epoch": 0.5145912411738296, "grad_norm": 3.6379716396331787, "learning_rate": 1.9904967303465894e-05, "loss": 1.0631, "step": 3152 }, { "epoch": 0.514754499816334, "grad_norm": 3.0330851078033447, "learning_rate": 1.9904879102913513e-05, "loss": 0.8021, "step": 3153 }, { "epoch": 0.5149177584588384, "grad_norm": 3.014402389526367, "learning_rate": 1.9904790861645914e-05, "loss": 0.9319, "step": 3154 }, { "epoch": 0.5150810171013428, "grad_norm": 3.1583471298217773, "learning_rate": 1.990470257966346e-05, "loss": 1.0895, "step": 3155 }, { "epoch": 0.5152442757438472, "grad_norm": 2.914720058441162, "learning_rate": 1.9904614256966514e-05, "loss": 0.8073, "step": 3156 }, { "epoch": 0.5154075343863516, "grad_norm": 3.4448657035827637, "learning_rate": 1.990452589355544e-05, "loss": 0.9062, "step": 3157 }, { "epoch": 0.515570793028856, "grad_norm": 3.7515242099761963, "learning_rate": 1.9904437489430595e-05, "loss": 1.0073, "step": 3158 }, { "epoch": 0.5157340516713603, "grad_norm": 3.2788240909576416, "learning_rate": 1.9904349044592354e-05, "loss": 0.9018, "step": 3159 }, { "epoch": 0.5158973103138648, "grad_norm": 3.3916733264923096, "learning_rate": 1.990426055904107e-05, "loss": 1.2253, "step": 3160 }, { "epoch": 0.5160605689563691, "grad_norm": 3.401028633117676, "learning_rate": 1.990417203277711e-05, "loss": 0.9849, "step": 3161 }, { "epoch": 0.5162238275988735, "grad_norm": 2.9367265701293945, "learning_rate": 1.9904083465800838e-05, "loss": 0.8431, "step": 3162 }, { "epoch": 0.5163870862413779, "grad_norm": 3.202122926712036, "learning_rate": 1.990399485811262e-05, "loss": 1.1434, "step": 3163 }, { "epoch": 0.5165503448838823, "grad_norm": 3.2019968032836914, "learning_rate": 1.990390620971282e-05, "loss": 0.9994, "step": 3164 }, { "epoch": 0.5167136035263867, "grad_norm": 2.776916980743408, "learning_rate": 1.99038175206018e-05, "loss": 0.8508, "step": 3165 }, { "epoch": 0.5168768621688911, "grad_norm": 3.064530849456787, "learning_rate": 1.9903728790779924e-05, "loss": 0.9977, "step": 3166 }, { "epoch": 0.5170401208113955, "grad_norm": 2.6146764755249023, "learning_rate": 1.9903640020247563e-05, "loss": 0.7102, "step": 3167 }, { "epoch": 0.5172033794538998, "grad_norm": 3.295973300933838, "learning_rate": 1.9903551209005078e-05, "loss": 1.1538, "step": 3168 }, { "epoch": 0.5173666380964043, "grad_norm": 2.9834296703338623, "learning_rate": 1.9903462357052827e-05, "loss": 0.9299, "step": 3169 }, { "epoch": 0.5175298967389086, "grad_norm": 3.250770092010498, "learning_rate": 1.9903373464391186e-05, "loss": 0.9517, "step": 3170 }, { "epoch": 0.517693155381413, "grad_norm": 2.7887957096099854, "learning_rate": 1.9903284531020516e-05, "loss": 0.8093, "step": 3171 }, { "epoch": 0.5178564140239174, "grad_norm": 3.3283848762512207, "learning_rate": 1.990319555694118e-05, "loss": 1.0456, "step": 3172 }, { "epoch": 0.5180196726664218, "grad_norm": 3.2138099670410156, "learning_rate": 1.990310654215355e-05, "loss": 1.0652, "step": 3173 }, { "epoch": 0.5181829313089261, "grad_norm": 2.7310705184936523, "learning_rate": 1.9903017486657984e-05, "loss": 0.862, "step": 3174 }, { "epoch": 0.5183461899514306, "grad_norm": 3.1645100116729736, "learning_rate": 1.990292839045486e-05, "loss": 0.8336, "step": 3175 }, { "epoch": 0.518509448593935, "grad_norm": 2.977222442626953, "learning_rate": 1.990283925354453e-05, "loss": 1.0546, "step": 3176 }, { "epoch": 0.5186727072364393, "grad_norm": 3.408963918685913, "learning_rate": 1.9902750075927368e-05, "loss": 0.9097, "step": 3177 }, { "epoch": 0.5188359658789438, "grad_norm": 3.1905503273010254, "learning_rate": 1.990266085760374e-05, "loss": 1.4918, "step": 3178 }, { "epoch": 0.5189992245214481, "grad_norm": 3.35005784034729, "learning_rate": 1.9902571598574015e-05, "loss": 0.9612, "step": 3179 }, { "epoch": 0.5191624831639525, "grad_norm": 3.3760528564453125, "learning_rate": 1.9902482298838555e-05, "loss": 0.9867, "step": 3180 }, { "epoch": 0.5193257418064569, "grad_norm": 2.7636916637420654, "learning_rate": 1.9902392958397728e-05, "loss": 0.6975, "step": 3181 }, { "epoch": 0.5194890004489613, "grad_norm": 3.220067262649536, "learning_rate": 1.9902303577251906e-05, "loss": 0.9573, "step": 3182 }, { "epoch": 0.5196522590914656, "grad_norm": 2.963834524154663, "learning_rate": 1.990221415540145e-05, "loss": 0.8867, "step": 3183 }, { "epoch": 0.5198155177339701, "grad_norm": 2.754504919052124, "learning_rate": 1.9902124692846733e-05, "loss": 0.8108, "step": 3184 }, { "epoch": 0.5199787763764744, "grad_norm": 3.2216193675994873, "learning_rate": 1.990203518958812e-05, "loss": 0.8233, "step": 3185 }, { "epoch": 0.5201420350189788, "grad_norm": 3.2047996520996094, "learning_rate": 1.9901945645625978e-05, "loss": 0.9857, "step": 3186 }, { "epoch": 0.5203052936614833, "grad_norm": 3.1374285221099854, "learning_rate": 1.990185606096068e-05, "loss": 0.7385, "step": 3187 }, { "epoch": 0.5204685523039876, "grad_norm": 2.679056406021118, "learning_rate": 1.9901766435592585e-05, "loss": 0.6557, "step": 3188 }, { "epoch": 0.520631810946492, "grad_norm": 3.7360622882843018, "learning_rate": 1.990167676952207e-05, "loss": 0.9603, "step": 3189 }, { "epoch": 0.5207950695889964, "grad_norm": 2.825352668762207, "learning_rate": 1.99015870627495e-05, "loss": 0.8569, "step": 3190 }, { "epoch": 0.5209583282315008, "grad_norm": 3.522617816925049, "learning_rate": 1.9901497315275246e-05, "loss": 1.0198, "step": 3191 }, { "epoch": 0.5211215868740051, "grad_norm": 3.211676597595215, "learning_rate": 1.9901407527099673e-05, "loss": 0.8959, "step": 3192 }, { "epoch": 0.5212848455165096, "grad_norm": 2.889425754547119, "learning_rate": 1.990131769822315e-05, "loss": 0.9461, "step": 3193 }, { "epoch": 0.5214481041590139, "grad_norm": 3.8853676319122314, "learning_rate": 1.9901227828646056e-05, "loss": 0.9677, "step": 3194 }, { "epoch": 0.5216113628015183, "grad_norm": 3.047067403793335, "learning_rate": 1.990113791836875e-05, "loss": 0.8505, "step": 3195 }, { "epoch": 0.5217746214440226, "grad_norm": 2.907865285873413, "learning_rate": 1.9901047967391603e-05, "loss": 0.7912, "step": 3196 }, { "epoch": 0.5219378800865271, "grad_norm": 2.736313819885254, "learning_rate": 1.9900957975714986e-05, "loss": 0.6893, "step": 3197 }, { "epoch": 0.5221011387290315, "grad_norm": 3.5484652519226074, "learning_rate": 1.990086794333927e-05, "loss": 0.9649, "step": 3198 }, { "epoch": 0.5222643973715358, "grad_norm": 3.3783159255981445, "learning_rate": 1.9900777870264826e-05, "loss": 0.846, "step": 3199 }, { "epoch": 0.5224276560140403, "grad_norm": 2.3815503120422363, "learning_rate": 1.9900687756492022e-05, "loss": 0.6052, "step": 3200 }, { "epoch": 0.5225909146565446, "grad_norm": 2.7632312774658203, "learning_rate": 1.990059760202123e-05, "loss": 0.8343, "step": 3201 }, { "epoch": 0.522754173299049, "grad_norm": 3.407611608505249, "learning_rate": 1.9900507406852814e-05, "loss": 1.1213, "step": 3202 }, { "epoch": 0.5229174319415534, "grad_norm": 3.2849206924438477, "learning_rate": 1.9900417170987153e-05, "loss": 0.8653, "step": 3203 }, { "epoch": 0.5230806905840578, "grad_norm": 3.464749574661255, "learning_rate": 1.9900326894424618e-05, "loss": 1.0762, "step": 3204 }, { "epoch": 0.5232439492265621, "grad_norm": 2.75175142288208, "learning_rate": 1.990023657716558e-05, "loss": 0.8516, "step": 3205 }, { "epoch": 0.5234072078690666, "grad_norm": 2.9439613819122314, "learning_rate": 1.99001462192104e-05, "loss": 0.8117, "step": 3206 }, { "epoch": 0.5235704665115709, "grad_norm": 2.631845235824585, "learning_rate": 1.9900055820559464e-05, "loss": 0.698, "step": 3207 }, { "epoch": 0.5237337251540753, "grad_norm": 3.2988293170928955, "learning_rate": 1.989996538121313e-05, "loss": 1.0527, "step": 3208 }, { "epoch": 0.5238969837965798, "grad_norm": 3.0301802158355713, "learning_rate": 1.9899874901171784e-05, "loss": 0.8304, "step": 3209 }, { "epoch": 0.5240602424390841, "grad_norm": 2.725069761276245, "learning_rate": 1.9899784380435783e-05, "loss": 0.6781, "step": 3210 }, { "epoch": 0.5242235010815885, "grad_norm": 3.0063304901123047, "learning_rate": 1.989969381900551e-05, "loss": 0.8403, "step": 3211 }, { "epoch": 0.5243867597240929, "grad_norm": 3.310019016265869, "learning_rate": 1.9899603216881336e-05, "loss": 0.9773, "step": 3212 }, { "epoch": 0.5245500183665973, "grad_norm": 3.27280855178833, "learning_rate": 1.989951257406363e-05, "loss": 0.8031, "step": 3213 }, { "epoch": 0.5247132770091016, "grad_norm": 2.7513036727905273, "learning_rate": 1.9899421890552767e-05, "loss": 0.7525, "step": 3214 }, { "epoch": 0.5248765356516061, "grad_norm": 3.7381367683410645, "learning_rate": 1.9899331166349115e-05, "loss": 1.0149, "step": 3215 }, { "epoch": 0.5250397942941104, "grad_norm": 3.1517019271850586, "learning_rate": 1.989924040145305e-05, "loss": 1.0168, "step": 3216 }, { "epoch": 0.5252030529366148, "grad_norm": 3.313075065612793, "learning_rate": 1.989914959586495e-05, "loss": 0.8786, "step": 3217 }, { "epoch": 0.5253663115791192, "grad_norm": 3.1989505290985107, "learning_rate": 1.989905874958518e-05, "loss": 0.8428, "step": 3218 }, { "epoch": 0.5255295702216236, "grad_norm": 3.539905071258545, "learning_rate": 1.9898967862614122e-05, "loss": 0.9193, "step": 3219 }, { "epoch": 0.525692828864128, "grad_norm": 3.178896427154541, "learning_rate": 1.989887693495214e-05, "loss": 0.8874, "step": 3220 }, { "epoch": 0.5258560875066324, "grad_norm": 3.309692621231079, "learning_rate": 1.9898785966599612e-05, "loss": 0.9047, "step": 3221 }, { "epoch": 0.5260193461491368, "grad_norm": 3.3898746967315674, "learning_rate": 1.9898694957556913e-05, "loss": 0.9195, "step": 3222 }, { "epoch": 0.5261826047916411, "grad_norm": 3.0102572441101074, "learning_rate": 1.989860390782442e-05, "loss": 0.8094, "step": 3223 }, { "epoch": 0.5263458634341456, "grad_norm": 3.230201244354248, "learning_rate": 1.9898512817402504e-05, "loss": 0.9917, "step": 3224 }, { "epoch": 0.5265091220766499, "grad_norm": 3.143786668777466, "learning_rate": 1.989842168629154e-05, "loss": 0.8746, "step": 3225 }, { "epoch": 0.5266723807191543, "grad_norm": 3.203611135482788, "learning_rate": 1.98983305144919e-05, "loss": 0.9042, "step": 3226 }, { "epoch": 0.5268356393616587, "grad_norm": 3.5292165279388428, "learning_rate": 1.9898239302003958e-05, "loss": 1.137, "step": 3227 }, { "epoch": 0.5269988980041631, "grad_norm": 3.289964199066162, "learning_rate": 1.9898148048828095e-05, "loss": 0.9513, "step": 3228 }, { "epoch": 0.5271621566466674, "grad_norm": 3.0249319076538086, "learning_rate": 1.9898056754964682e-05, "loss": 0.9061, "step": 3229 }, { "epoch": 0.5273254152891719, "grad_norm": 3.2538740634918213, "learning_rate": 1.98979654204141e-05, "loss": 1.1298, "step": 3230 }, { "epoch": 0.5274886739316763, "grad_norm": 3.4472036361694336, "learning_rate": 1.9897874045176713e-05, "loss": 0.9159, "step": 3231 }, { "epoch": 0.5276519325741806, "grad_norm": 3.678908586502075, "learning_rate": 1.9897782629252904e-05, "loss": 0.9652, "step": 3232 }, { "epoch": 0.5278151912166851, "grad_norm": 2.959533929824829, "learning_rate": 1.989769117264305e-05, "loss": 0.8949, "step": 3233 }, { "epoch": 0.5279784498591894, "grad_norm": 3.0488500595092773, "learning_rate": 1.9897599675347522e-05, "loss": 0.9459, "step": 3234 }, { "epoch": 0.5281417085016938, "grad_norm": 3.1356213092803955, "learning_rate": 1.9897508137366703e-05, "loss": 0.8561, "step": 3235 }, { "epoch": 0.5283049671441982, "grad_norm": 2.7618720531463623, "learning_rate": 1.9897416558700968e-05, "loss": 0.7317, "step": 3236 }, { "epoch": 0.5284682257867026, "grad_norm": 2.882312297821045, "learning_rate": 1.989732493935068e-05, "loss": 0.8382, "step": 3237 }, { "epoch": 0.5286314844292069, "grad_norm": 3.5410566329956055, "learning_rate": 1.9897233279316236e-05, "loss": 0.9679, "step": 3238 }, { "epoch": 0.5287947430717114, "grad_norm": 3.2371504306793213, "learning_rate": 1.9897141578598e-05, "loss": 0.9501, "step": 3239 }, { "epoch": 0.5289580017142157, "grad_norm": 3.1691339015960693, "learning_rate": 1.989704983719635e-05, "loss": 0.9081, "step": 3240 }, { "epoch": 0.5291212603567201, "grad_norm": 3.303204298019409, "learning_rate": 1.9896958055111668e-05, "loss": 1.1106, "step": 3241 }, { "epoch": 0.5292845189992246, "grad_norm": 3.0836427211761475, "learning_rate": 1.989686623234433e-05, "loss": 1.0035, "step": 3242 }, { "epoch": 0.5294477776417289, "grad_norm": 2.7123451232910156, "learning_rate": 1.9896774368894708e-05, "loss": 0.6753, "step": 3243 }, { "epoch": 0.5296110362842333, "grad_norm": 2.921778440475464, "learning_rate": 1.9896682464763183e-05, "loss": 0.9596, "step": 3244 }, { "epoch": 0.5297742949267377, "grad_norm": 3.764202833175659, "learning_rate": 1.9896590519950136e-05, "loss": 1.0849, "step": 3245 }, { "epoch": 0.5299375535692421, "grad_norm": 2.891489028930664, "learning_rate": 1.9896498534455943e-05, "loss": 0.8805, "step": 3246 }, { "epoch": 0.5301008122117464, "grad_norm": 3.365450620651245, "learning_rate": 1.989640650828098e-05, "loss": 0.9927, "step": 3247 }, { "epoch": 0.5302640708542509, "grad_norm": 3.650071859359741, "learning_rate": 1.9896314441425626e-05, "loss": 1.0482, "step": 3248 }, { "epoch": 0.5304273294967552, "grad_norm": 3.103394031524658, "learning_rate": 1.989622233389026e-05, "loss": 0.8665, "step": 3249 }, { "epoch": 0.5305905881392596, "grad_norm": 3.0710630416870117, "learning_rate": 1.9896130185675263e-05, "loss": 0.8607, "step": 3250 }, { "epoch": 0.530753846781764, "grad_norm": 3.3505265712738037, "learning_rate": 1.9896037996781005e-05, "loss": 0.9316, "step": 3251 }, { "epoch": 0.5309171054242684, "grad_norm": 3.498786211013794, "learning_rate": 1.9895945767207878e-05, "loss": 0.9423, "step": 3252 }, { "epoch": 0.5310803640667728, "grad_norm": 3.4100563526153564, "learning_rate": 1.9895853496956255e-05, "loss": 1.1072, "step": 3253 }, { "epoch": 0.5312436227092772, "grad_norm": 2.641408681869507, "learning_rate": 1.989576118602651e-05, "loss": 0.6743, "step": 3254 }, { "epoch": 0.5314068813517816, "grad_norm": 4.138669967651367, "learning_rate": 1.9895668834419033e-05, "loss": 0.8592, "step": 3255 }, { "epoch": 0.5315701399942859, "grad_norm": 3.0910754203796387, "learning_rate": 1.989557644213419e-05, "loss": 0.701, "step": 3256 }, { "epoch": 0.5317333986367904, "grad_norm": 2.98163104057312, "learning_rate": 1.989548400917238e-05, "loss": 0.7763, "step": 3257 }, { "epoch": 0.5318966572792947, "grad_norm": 3.2172229290008545, "learning_rate": 1.9895391535533963e-05, "loss": 0.9219, "step": 3258 }, { "epoch": 0.5320599159217991, "grad_norm": 2.9274940490722656, "learning_rate": 1.989529902121933e-05, "loss": 0.8211, "step": 3259 }, { "epoch": 0.5322231745643035, "grad_norm": 3.8313333988189697, "learning_rate": 1.989520646622886e-05, "loss": 0.8792, "step": 3260 }, { "epoch": 0.5323864332068079, "grad_norm": 3.2442426681518555, "learning_rate": 1.989511387056293e-05, "loss": 0.8419, "step": 3261 }, { "epoch": 0.5325496918493122, "grad_norm": 3.2939889430999756, "learning_rate": 1.9895021234221926e-05, "loss": 0.8418, "step": 3262 }, { "epoch": 0.5327129504918167, "grad_norm": 3.6672208309173584, "learning_rate": 1.9894928557206226e-05, "loss": 1.0465, "step": 3263 }, { "epoch": 0.5328762091343211, "grad_norm": 3.201582431793213, "learning_rate": 1.989483583951621e-05, "loss": 0.8893, "step": 3264 }, { "epoch": 0.5330394677768254, "grad_norm": 2.8549678325653076, "learning_rate": 1.989474308115226e-05, "loss": 0.7732, "step": 3265 }, { "epoch": 0.5332027264193299, "grad_norm": 3.3353748321533203, "learning_rate": 1.989465028211476e-05, "loss": 0.9152, "step": 3266 }, { "epoch": 0.5333659850618342, "grad_norm": 3.322648048400879, "learning_rate": 1.9894557442404085e-05, "loss": 0.8718, "step": 3267 }, { "epoch": 0.5335292437043386, "grad_norm": 3.289687395095825, "learning_rate": 1.9894464562020623e-05, "loss": 0.9631, "step": 3268 }, { "epoch": 0.533692502346843, "grad_norm": 2.4407830238342285, "learning_rate": 1.9894371640964754e-05, "loss": 0.6311, "step": 3269 }, { "epoch": 0.5338557609893474, "grad_norm": 3.323446273803711, "learning_rate": 1.9894278679236856e-05, "loss": 0.9037, "step": 3270 }, { "epoch": 0.5340190196318517, "grad_norm": 3.387660264968872, "learning_rate": 1.9894185676837315e-05, "loss": 0.9885, "step": 3271 }, { "epoch": 0.5341822782743562, "grad_norm": 3.6682865619659424, "learning_rate": 1.9894092633766515e-05, "loss": 1.1667, "step": 3272 }, { "epoch": 0.5343455369168605, "grad_norm": 2.8874666690826416, "learning_rate": 1.9893999550024836e-05, "loss": 0.8528, "step": 3273 }, { "epoch": 0.5345087955593649, "grad_norm": 3.4726696014404297, "learning_rate": 1.989390642561266e-05, "loss": 0.9346, "step": 3274 }, { "epoch": 0.5346720542018694, "grad_norm": 3.342957019805908, "learning_rate": 1.9893813260530368e-05, "loss": 0.9572, "step": 3275 }, { "epoch": 0.5348353128443737, "grad_norm": 3.1871798038482666, "learning_rate": 1.9893720054778346e-05, "loss": 1.0347, "step": 3276 }, { "epoch": 0.5349985714868781, "grad_norm": 3.2054455280303955, "learning_rate": 1.989362680835698e-05, "loss": 0.9208, "step": 3277 }, { "epoch": 0.5351618301293825, "grad_norm": 2.9415431022644043, "learning_rate": 1.989353352126665e-05, "loss": 0.9539, "step": 3278 }, { "epoch": 0.5353250887718869, "grad_norm": 2.9324822425842285, "learning_rate": 1.9893440193507737e-05, "loss": 0.8519, "step": 3279 }, { "epoch": 0.5354883474143912, "grad_norm": 3.111205816268921, "learning_rate": 1.9893346825080625e-05, "loss": 0.874, "step": 3280 }, { "epoch": 0.5356516060568957, "grad_norm": 3.1661794185638428, "learning_rate": 1.9893253415985707e-05, "loss": 0.9411, "step": 3281 }, { "epoch": 0.5358148646994, "grad_norm": 3.13370418548584, "learning_rate": 1.9893159966223353e-05, "loss": 0.851, "step": 3282 }, { "epoch": 0.5359781233419044, "grad_norm": 3.2474095821380615, "learning_rate": 1.9893066475793956e-05, "loss": 0.9501, "step": 3283 }, { "epoch": 0.5361413819844087, "grad_norm": 3.1215922832489014, "learning_rate": 1.98929729446979e-05, "loss": 0.9006, "step": 3284 }, { "epoch": 0.5363046406269132, "grad_norm": 2.8624658584594727, "learning_rate": 1.9892879372935566e-05, "loss": 0.8829, "step": 3285 }, { "epoch": 0.5364678992694176, "grad_norm": 3.3566150665283203, "learning_rate": 1.9892785760507342e-05, "loss": 0.9243, "step": 3286 }, { "epoch": 0.536631157911922, "grad_norm": 2.7808620929718018, "learning_rate": 1.9892692107413608e-05, "loss": 0.812, "step": 3287 }, { "epoch": 0.5367944165544264, "grad_norm": 3.529400110244751, "learning_rate": 1.9892598413654755e-05, "loss": 1.003, "step": 3288 }, { "epoch": 0.5369576751969307, "grad_norm": 3.6311233043670654, "learning_rate": 1.9892504679231164e-05, "loss": 1.0017, "step": 3289 }, { "epoch": 0.5371209338394352, "grad_norm": 3.7406198978424072, "learning_rate": 1.9892410904143222e-05, "loss": 1.0359, "step": 3290 }, { "epoch": 0.5372841924819395, "grad_norm": 3.049804925918579, "learning_rate": 1.9892317088391313e-05, "loss": 0.9877, "step": 3291 }, { "epoch": 0.5374474511244439, "grad_norm": 2.985567092895508, "learning_rate": 1.9892223231975824e-05, "loss": 0.7763, "step": 3292 }, { "epoch": 0.5376107097669482, "grad_norm": 3.8023271560668945, "learning_rate": 1.9892129334897144e-05, "loss": 0.8447, "step": 3293 }, { "epoch": 0.5377739684094527, "grad_norm": 3.3772456645965576, "learning_rate": 1.989203539715565e-05, "loss": 0.9313, "step": 3294 }, { "epoch": 0.537937227051957, "grad_norm": 3.0372815132141113, "learning_rate": 1.9891941418751738e-05, "loss": 0.8264, "step": 3295 }, { "epoch": 0.5381004856944615, "grad_norm": 3.313385248184204, "learning_rate": 1.9891847399685788e-05, "loss": 0.9314, "step": 3296 }, { "epoch": 0.5382637443369659, "grad_norm": 3.014791965484619, "learning_rate": 1.989175333995819e-05, "loss": 0.9582, "step": 3297 }, { "epoch": 0.5384270029794702, "grad_norm": 2.861828327178955, "learning_rate": 1.9891659239569326e-05, "loss": 0.8231, "step": 3298 }, { "epoch": 0.5385902616219747, "grad_norm": 2.944126605987549, "learning_rate": 1.989156509851959e-05, "loss": 0.7648, "step": 3299 }, { "epoch": 0.538753520264479, "grad_norm": 3.4203407764434814, "learning_rate": 1.9891470916809362e-05, "loss": 1.0044, "step": 3300 }, { "epoch": 0.5389167789069834, "grad_norm": 3.4495182037353516, "learning_rate": 1.9891376694439035e-05, "loss": 1.0811, "step": 3301 }, { "epoch": 0.5390800375494877, "grad_norm": 3.2506754398345947, "learning_rate": 1.9891282431408992e-05, "loss": 0.9999, "step": 3302 }, { "epoch": 0.5392432961919922, "grad_norm": 3.2268943786621094, "learning_rate": 1.989118812771962e-05, "loss": 0.8958, "step": 3303 }, { "epoch": 0.5394065548344965, "grad_norm": 3.378955602645874, "learning_rate": 1.989109378337131e-05, "loss": 0.8352, "step": 3304 }, { "epoch": 0.539569813477001, "grad_norm": 3.298427104949951, "learning_rate": 1.9890999398364447e-05, "loss": 0.9236, "step": 3305 }, { "epoch": 0.5397330721195053, "grad_norm": 3.774160861968994, "learning_rate": 1.989090497269942e-05, "loss": 1.1588, "step": 3306 }, { "epoch": 0.5398963307620097, "grad_norm": 3.3639492988586426, "learning_rate": 1.9890810506376618e-05, "loss": 0.992, "step": 3307 }, { "epoch": 0.5400595894045142, "grad_norm": 3.2821104526519775, "learning_rate": 1.989071599939643e-05, "loss": 0.9991, "step": 3308 }, { "epoch": 0.5402228480470185, "grad_norm": 2.993446111679077, "learning_rate": 1.9890621451759242e-05, "loss": 0.784, "step": 3309 }, { "epoch": 0.5403861066895229, "grad_norm": 3.5888946056365967, "learning_rate": 1.9890526863465444e-05, "loss": 1.1282, "step": 3310 }, { "epoch": 0.5405493653320272, "grad_norm": 3.3200082778930664, "learning_rate": 1.9890432234515423e-05, "loss": 0.9756, "step": 3311 }, { "epoch": 0.5407126239745317, "grad_norm": 2.7980458736419678, "learning_rate": 1.9890337564909576e-05, "loss": 0.7967, "step": 3312 }, { "epoch": 0.540875882617036, "grad_norm": 2.954742431640625, "learning_rate": 1.989024285464828e-05, "loss": 0.8358, "step": 3313 }, { "epoch": 0.5410391412595404, "grad_norm": 3.2570838928222656, "learning_rate": 1.9890148103731933e-05, "loss": 1.0026, "step": 3314 }, { "epoch": 0.5412023999020448, "grad_norm": 2.705240249633789, "learning_rate": 1.9890053312160917e-05, "loss": 0.7347, "step": 3315 }, { "epoch": 0.5413656585445492, "grad_norm": 3.1204707622528076, "learning_rate": 1.988995847993563e-05, "loss": 0.9542, "step": 3316 }, { "epoch": 0.5415289171870535, "grad_norm": 3.4606285095214844, "learning_rate": 1.9889863607056458e-05, "loss": 0.9467, "step": 3317 }, { "epoch": 0.541692175829558, "grad_norm": 3.0213632583618164, "learning_rate": 1.988976869352379e-05, "loss": 0.9388, "step": 3318 }, { "epoch": 0.5418554344720624, "grad_norm": 3.485741138458252, "learning_rate": 1.9889673739338017e-05, "loss": 0.8142, "step": 3319 }, { "epoch": 0.5420186931145667, "grad_norm": 3.235208511352539, "learning_rate": 1.988957874449953e-05, "loss": 0.979, "step": 3320 }, { "epoch": 0.5421819517570712, "grad_norm": 3.11604642868042, "learning_rate": 1.988948370900872e-05, "loss": 0.901, "step": 3321 }, { "epoch": 0.5423452103995755, "grad_norm": 2.468731641769409, "learning_rate": 1.9889388632865976e-05, "loss": 0.7103, "step": 3322 }, { "epoch": 0.5425084690420799, "grad_norm": 3.227396011352539, "learning_rate": 1.988929351607169e-05, "loss": 1.0228, "step": 3323 }, { "epoch": 0.5426717276845843, "grad_norm": 3.2046468257904053, "learning_rate": 1.9889198358626252e-05, "loss": 0.8278, "step": 3324 }, { "epoch": 0.5428349863270887, "grad_norm": 3.0919461250305176, "learning_rate": 1.988910316053005e-05, "loss": 0.8146, "step": 3325 }, { "epoch": 0.542998244969593, "grad_norm": 3.1627867221832275, "learning_rate": 1.9889007921783484e-05, "loss": 0.8962, "step": 3326 }, { "epoch": 0.5431615036120975, "grad_norm": 3.193864583969116, "learning_rate": 1.988891264238694e-05, "loss": 0.9565, "step": 3327 }, { "epoch": 0.5433247622546018, "grad_norm": 3.1606013774871826, "learning_rate": 1.988881732234081e-05, "loss": 0.8621, "step": 3328 }, { "epoch": 0.5434880208971062, "grad_norm": 3.2684872150421143, "learning_rate": 1.9888721961645483e-05, "loss": 0.974, "step": 3329 }, { "epoch": 0.5436512795396107, "grad_norm": 3.5520410537719727, "learning_rate": 1.9888626560301357e-05, "loss": 0.9139, "step": 3330 }, { "epoch": 0.543814538182115, "grad_norm": 2.9309637546539307, "learning_rate": 1.9888531118308818e-05, "loss": 0.8701, "step": 3331 }, { "epoch": 0.5439777968246194, "grad_norm": 3.361410140991211, "learning_rate": 1.9888435635668266e-05, "loss": 0.9203, "step": 3332 }, { "epoch": 0.5441410554671238, "grad_norm": 2.876671075820923, "learning_rate": 1.9888340112380086e-05, "loss": 1.0045, "step": 3333 }, { "epoch": 0.5443043141096282, "grad_norm": 2.5620970726013184, "learning_rate": 1.9888244548444673e-05, "loss": 0.7394, "step": 3334 }, { "epoch": 0.5444675727521325, "grad_norm": 2.8396482467651367, "learning_rate": 1.988814894386242e-05, "loss": 0.8884, "step": 3335 }, { "epoch": 0.544630831394637, "grad_norm": 2.9777562618255615, "learning_rate": 1.9888053298633722e-05, "loss": 0.9982, "step": 3336 }, { "epoch": 0.5447940900371413, "grad_norm": 2.755927801132202, "learning_rate": 1.988795761275897e-05, "loss": 0.8952, "step": 3337 }, { "epoch": 0.5449573486796457, "grad_norm": 3.1805472373962402, "learning_rate": 1.9887861886238558e-05, "loss": 1.0449, "step": 3338 }, { "epoch": 0.5451206073221501, "grad_norm": 3.0798752307891846, "learning_rate": 1.988776611907288e-05, "loss": 1.0615, "step": 3339 }, { "epoch": 0.5452838659646545, "grad_norm": 2.931276559829712, "learning_rate": 1.9887670311262332e-05, "loss": 0.8396, "step": 3340 }, { "epoch": 0.5454471246071589, "grad_norm": 2.5393800735473633, "learning_rate": 1.98875744628073e-05, "loss": 0.8331, "step": 3341 }, { "epoch": 0.5456103832496633, "grad_norm": 2.838460922241211, "learning_rate": 1.988747857370818e-05, "loss": 0.8142, "step": 3342 }, { "epoch": 0.5457736418921677, "grad_norm": 3.054786205291748, "learning_rate": 1.9887382643965376e-05, "loss": 0.8591, "step": 3343 }, { "epoch": 0.545936900534672, "grad_norm": 3.310682535171509, "learning_rate": 1.9887286673579275e-05, "loss": 0.9142, "step": 3344 }, { "epoch": 0.5461001591771765, "grad_norm": 3.126875877380371, "learning_rate": 1.9887190662550267e-05, "loss": 0.9897, "step": 3345 }, { "epoch": 0.5462634178196808, "grad_norm": 3.1909451484680176, "learning_rate": 1.9887094610878754e-05, "loss": 0.8735, "step": 3346 }, { "epoch": 0.5464266764621852, "grad_norm": 3.2159647941589355, "learning_rate": 1.988699851856513e-05, "loss": 0.7962, "step": 3347 }, { "epoch": 0.5465899351046896, "grad_norm": 2.867907762527466, "learning_rate": 1.9886902385609784e-05, "loss": 0.7972, "step": 3348 }, { "epoch": 0.546753193747194, "grad_norm": 2.9948766231536865, "learning_rate": 1.988680621201312e-05, "loss": 0.976, "step": 3349 }, { "epoch": 0.5469164523896983, "grad_norm": 3.1269166469573975, "learning_rate": 1.9886709997775527e-05, "loss": 0.8596, "step": 3350 }, { "epoch": 0.5470797110322028, "grad_norm": 3.0416371822357178, "learning_rate": 1.98866137428974e-05, "loss": 0.822, "step": 3351 }, { "epoch": 0.5472429696747072, "grad_norm": 3.0555520057678223, "learning_rate": 1.988651744737914e-05, "loss": 1.0117, "step": 3352 }, { "epoch": 0.5474062283172115, "grad_norm": 3.29671049118042, "learning_rate": 1.9886421111221142e-05, "loss": 0.9594, "step": 3353 }, { "epoch": 0.547569486959716, "grad_norm": 3.0217933654785156, "learning_rate": 1.9886324734423795e-05, "loss": 0.875, "step": 3354 }, { "epoch": 0.5477327456022203, "grad_norm": 3.122911214828491, "learning_rate": 1.9886228316987504e-05, "loss": 0.8215, "step": 3355 }, { "epoch": 0.5478960042447247, "grad_norm": 3.076601505279541, "learning_rate": 1.988613185891266e-05, "loss": 0.9659, "step": 3356 }, { "epoch": 0.5480592628872291, "grad_norm": 3.0318241119384766, "learning_rate": 1.988603536019966e-05, "loss": 0.853, "step": 3357 }, { "epoch": 0.5482225215297335, "grad_norm": 3.0690319538116455, "learning_rate": 1.9885938820848902e-05, "loss": 1.0141, "step": 3358 }, { "epoch": 0.5483857801722378, "grad_norm": 3.6083126068115234, "learning_rate": 1.988584224086078e-05, "loss": 0.9738, "step": 3359 }, { "epoch": 0.5485490388147423, "grad_norm": 2.9276163578033447, "learning_rate": 1.98857456202357e-05, "loss": 0.7654, "step": 3360 }, { "epoch": 0.5487122974572466, "grad_norm": 3.1762781143188477, "learning_rate": 1.9885648958974046e-05, "loss": 0.9976, "step": 3361 }, { "epoch": 0.548875556099751, "grad_norm": 3.396876573562622, "learning_rate": 1.9885552257076224e-05, "loss": 1.0127, "step": 3362 }, { "epoch": 0.5490388147422555, "grad_norm": 2.8618288040161133, "learning_rate": 1.988545551454263e-05, "loss": 0.7171, "step": 3363 }, { "epoch": 0.5492020733847598, "grad_norm": 3.3565175533294678, "learning_rate": 1.988535873137366e-05, "loss": 0.9817, "step": 3364 }, { "epoch": 0.5493653320272642, "grad_norm": 3.076735496520996, "learning_rate": 1.9885261907569715e-05, "loss": 0.859, "step": 3365 }, { "epoch": 0.5495285906697686, "grad_norm": 3.703481912612915, "learning_rate": 1.988516504313119e-05, "loss": 1.0266, "step": 3366 }, { "epoch": 0.549691849312273, "grad_norm": 3.2639150619506836, "learning_rate": 1.9885068138058482e-05, "loss": 0.9078, "step": 3367 }, { "epoch": 0.5498551079547773, "grad_norm": 3.095897912979126, "learning_rate": 1.9884971192351995e-05, "loss": 0.7675, "step": 3368 }, { "epoch": 0.5500183665972818, "grad_norm": 3.0822765827178955, "learning_rate": 1.988487420601212e-05, "loss": 0.8604, "step": 3369 }, { "epoch": 0.5501816252397861, "grad_norm": 3.548720359802246, "learning_rate": 1.9884777179039262e-05, "loss": 0.9774, "step": 3370 }, { "epoch": 0.5503448838822905, "grad_norm": 3.0141477584838867, "learning_rate": 1.9884680111433816e-05, "loss": 0.9773, "step": 3371 }, { "epoch": 0.5505081425247949, "grad_norm": 2.8057661056518555, "learning_rate": 1.9884583003196183e-05, "loss": 0.8015, "step": 3372 }, { "epoch": 0.5506714011672993, "grad_norm": 3.2585067749023438, "learning_rate": 1.9884485854326763e-05, "loss": 0.9702, "step": 3373 }, { "epoch": 0.5508346598098037, "grad_norm": 3.308209180831909, "learning_rate": 1.9884388664825955e-05, "loss": 1.0209, "step": 3374 }, { "epoch": 0.550997918452308, "grad_norm": 3.2336039543151855, "learning_rate": 1.9884291434694155e-05, "loss": 0.964, "step": 3375 }, { "epoch": 0.5511611770948125, "grad_norm": 2.7821602821350098, "learning_rate": 1.9884194163931764e-05, "loss": 0.7685, "step": 3376 }, { "epoch": 0.5513244357373168, "grad_norm": 3.1608002185821533, "learning_rate": 1.9884096852539184e-05, "loss": 1.0136, "step": 3377 }, { "epoch": 0.5514876943798213, "grad_norm": 3.0094730854034424, "learning_rate": 1.988399950051682e-05, "loss": 1.6264, "step": 3378 }, { "epoch": 0.5516509530223256, "grad_norm": 3.1109039783477783, "learning_rate": 1.9883902107865055e-05, "loss": 1.0021, "step": 3379 }, { "epoch": 0.55181421166483, "grad_norm": 3.27534556388855, "learning_rate": 1.9883804674584312e-05, "loss": 1.0445, "step": 3380 }, { "epoch": 0.5519774703073344, "grad_norm": 2.613316297531128, "learning_rate": 1.988370720067497e-05, "loss": 0.6933, "step": 3381 }, { "epoch": 0.5521407289498388, "grad_norm": 3.3692445755004883, "learning_rate": 1.9883609686137445e-05, "loss": 0.9131, "step": 3382 }, { "epoch": 0.5523039875923431, "grad_norm": 3.676302909851074, "learning_rate": 1.988351213097213e-05, "loss": 1.074, "step": 3383 }, { "epoch": 0.5524672462348476, "grad_norm": 3.0385935306549072, "learning_rate": 1.9883414535179433e-05, "loss": 0.8498, "step": 3384 }, { "epoch": 0.552630504877352, "grad_norm": 3.1364643573760986, "learning_rate": 1.988331689875975e-05, "loss": 0.938, "step": 3385 }, { "epoch": 0.5527937635198563, "grad_norm": 3.505741596221924, "learning_rate": 1.9883219221713478e-05, "loss": 1.0147, "step": 3386 }, { "epoch": 0.5529570221623608, "grad_norm": 3.4552814960479736, "learning_rate": 1.9883121504041026e-05, "loss": 1.0711, "step": 3387 }, { "epoch": 0.5531202808048651, "grad_norm": 3.431018590927124, "learning_rate": 1.9883023745742795e-05, "loss": 0.9729, "step": 3388 }, { "epoch": 0.5532835394473695, "grad_norm": 3.8478946685791016, "learning_rate": 1.9882925946819186e-05, "loss": 1.0425, "step": 3389 }, { "epoch": 0.5534467980898738, "grad_norm": 3.5212132930755615, "learning_rate": 1.98828281072706e-05, "loss": 1.1889, "step": 3390 }, { "epoch": 0.5536100567323783, "grad_norm": 3.4246647357940674, "learning_rate": 1.9882730227097435e-05, "loss": 0.9422, "step": 3391 }, { "epoch": 0.5537733153748826, "grad_norm": 3.4316306114196777, "learning_rate": 1.9882632306300103e-05, "loss": 1.643, "step": 3392 }, { "epoch": 0.553936574017387, "grad_norm": 2.7876946926116943, "learning_rate": 1.9882534344878996e-05, "loss": 0.7973, "step": 3393 }, { "epoch": 0.5540998326598914, "grad_norm": 2.5838518142700195, "learning_rate": 1.9882436342834525e-05, "loss": 0.77, "step": 3394 }, { "epoch": 0.5542630913023958, "grad_norm": 3.166374444961548, "learning_rate": 1.988233830016709e-05, "loss": 0.8763, "step": 3395 }, { "epoch": 0.5544263499449003, "grad_norm": 2.754079818725586, "learning_rate": 1.9882240216877094e-05, "loss": 0.7627, "step": 3396 }, { "epoch": 0.5545896085874046, "grad_norm": 2.5428435802459717, "learning_rate": 1.9882142092964938e-05, "loss": 0.7609, "step": 3397 }, { "epoch": 0.554752867229909, "grad_norm": 2.968249559402466, "learning_rate": 1.9882043928431026e-05, "loss": 0.8314, "step": 3398 }, { "epoch": 0.5549161258724133, "grad_norm": 2.909247636795044, "learning_rate": 1.9881945723275765e-05, "loss": 0.7929, "step": 3399 }, { "epoch": 0.5550793845149178, "grad_norm": 2.4599037170410156, "learning_rate": 1.988184747749956e-05, "loss": 0.7396, "step": 3400 }, { "epoch": 0.5552426431574221, "grad_norm": 2.959038734436035, "learning_rate": 1.9881749191102807e-05, "loss": 0.7678, "step": 3401 }, { "epoch": 0.5554059017999265, "grad_norm": 2.955496072769165, "learning_rate": 1.9881650864085916e-05, "loss": 0.8223, "step": 3402 }, { "epoch": 0.5555691604424309, "grad_norm": 3.1642401218414307, "learning_rate": 1.9881552496449288e-05, "loss": 0.9439, "step": 3403 }, { "epoch": 0.5557324190849353, "grad_norm": 3.263810396194458, "learning_rate": 1.988145408819333e-05, "loss": 0.8551, "step": 3404 }, { "epoch": 0.5558956777274396, "grad_norm": 3.378016471862793, "learning_rate": 1.9881355639318445e-05, "loss": 0.8776, "step": 3405 }, { "epoch": 0.5560589363699441, "grad_norm": 3.5475258827209473, "learning_rate": 1.988125714982504e-05, "loss": 0.9463, "step": 3406 }, { "epoch": 0.5562221950124485, "grad_norm": 3.178246021270752, "learning_rate": 1.9881158619713513e-05, "loss": 0.771, "step": 3407 }, { "epoch": 0.5563854536549528, "grad_norm": 3.5735044479370117, "learning_rate": 1.9881060048984277e-05, "loss": 0.9109, "step": 3408 }, { "epoch": 0.5565487122974573, "grad_norm": 3.0916337966918945, "learning_rate": 1.9880961437637734e-05, "loss": 0.824, "step": 3409 }, { "epoch": 0.5567119709399616, "grad_norm": 3.7738125324249268, "learning_rate": 1.9880862785674295e-05, "loss": 0.9715, "step": 3410 }, { "epoch": 0.556875229582466, "grad_norm": 3.1185104846954346, "learning_rate": 1.988076409309435e-05, "loss": 1.0034, "step": 3411 }, { "epoch": 0.5570384882249704, "grad_norm": 3.3367979526519775, "learning_rate": 1.988066535989832e-05, "loss": 0.9003, "step": 3412 }, { "epoch": 0.5572017468674748, "grad_norm": 3.6914360523223877, "learning_rate": 1.9880566586086604e-05, "loss": 0.9993, "step": 3413 }, { "epoch": 0.5573650055099791, "grad_norm": 3.385636329650879, "learning_rate": 1.9880467771659612e-05, "loss": 0.9005, "step": 3414 }, { "epoch": 0.5575282641524836, "grad_norm": 3.2010955810546875, "learning_rate": 1.988036891661775e-05, "loss": 1.1389, "step": 3415 }, { "epoch": 0.557691522794988, "grad_norm": 2.7451462745666504, "learning_rate": 1.9880270020961417e-05, "loss": 0.7731, "step": 3416 }, { "epoch": 0.5578547814374923, "grad_norm": 3.3380980491638184, "learning_rate": 1.9880171084691023e-05, "loss": 0.9761, "step": 3417 }, { "epoch": 0.5580180400799968, "grad_norm": 3.190274477005005, "learning_rate": 1.9880072107806982e-05, "loss": 0.9766, "step": 3418 }, { "epoch": 0.5581812987225011, "grad_norm": 3.193833351135254, "learning_rate": 1.9879973090309693e-05, "loss": 1.1047, "step": 3419 }, { "epoch": 0.5583445573650055, "grad_norm": 2.9443130493164062, "learning_rate": 1.9879874032199566e-05, "loss": 0.8154, "step": 3420 }, { "epoch": 0.5585078160075099, "grad_norm": 2.7744882106781006, "learning_rate": 1.9879774933477008e-05, "loss": 0.8704, "step": 3421 }, { "epoch": 0.5586710746500143, "grad_norm": 3.0742669105529785, "learning_rate": 1.9879675794142425e-05, "loss": 1.0295, "step": 3422 }, { "epoch": 0.5588343332925186, "grad_norm": 3.1163299083709717, "learning_rate": 1.9879576614196224e-05, "loss": 1.0174, "step": 3423 }, { "epoch": 0.5589975919350231, "grad_norm": 3.1276676654815674, "learning_rate": 1.9879477393638815e-05, "loss": 0.9811, "step": 3424 }, { "epoch": 0.5591608505775274, "grad_norm": 3.296416997909546, "learning_rate": 1.9879378132470604e-05, "loss": 0.8015, "step": 3425 }, { "epoch": 0.5593241092200318, "grad_norm": 2.7265326976776123, "learning_rate": 1.9879278830692002e-05, "loss": 0.8491, "step": 3426 }, { "epoch": 0.5594873678625363, "grad_norm": 3.2868247032165527, "learning_rate": 1.9879179488303412e-05, "loss": 1.037, "step": 3427 }, { "epoch": 0.5596506265050406, "grad_norm": 3.0453853607177734, "learning_rate": 1.9879080105305246e-05, "loss": 0.7512, "step": 3428 }, { "epoch": 0.559813885147545, "grad_norm": 3.2632980346679688, "learning_rate": 1.9878980681697912e-05, "loss": 0.8993, "step": 3429 }, { "epoch": 0.5599771437900494, "grad_norm": 3.1084141731262207, "learning_rate": 1.9878881217481823e-05, "loss": 0.8492, "step": 3430 }, { "epoch": 0.5601404024325538, "grad_norm": 3.4591071605682373, "learning_rate": 1.987878171265738e-05, "loss": 0.9463, "step": 3431 }, { "epoch": 0.5603036610750581, "grad_norm": 3.1463701725006104, "learning_rate": 1.9878682167224992e-05, "loss": 0.8167, "step": 3432 }, { "epoch": 0.5604669197175626, "grad_norm": 3.5423479080200195, "learning_rate": 1.9878582581185077e-05, "loss": 0.9409, "step": 3433 }, { "epoch": 0.5606301783600669, "grad_norm": 3.8266258239746094, "learning_rate": 1.9878482954538037e-05, "loss": 0.8533, "step": 3434 }, { "epoch": 0.5607934370025713, "grad_norm": 3.316819429397583, "learning_rate": 1.9878383287284285e-05, "loss": 0.922, "step": 3435 }, { "epoch": 0.5609566956450757, "grad_norm": 2.5856006145477295, "learning_rate": 1.987828357942423e-05, "loss": 0.6902, "step": 3436 }, { "epoch": 0.5611199542875801, "grad_norm": 3.126107931137085, "learning_rate": 1.987818383095828e-05, "loss": 0.8227, "step": 3437 }, { "epoch": 0.5612832129300845, "grad_norm": 3.281547784805298, "learning_rate": 1.9878084041886845e-05, "loss": 0.9496, "step": 3438 }, { "epoch": 0.5614464715725889, "grad_norm": 3.4202167987823486, "learning_rate": 1.9877984212210337e-05, "loss": 0.9409, "step": 3439 }, { "epoch": 0.5616097302150933, "grad_norm": 3.702868700027466, "learning_rate": 1.9877884341929167e-05, "loss": 1.0122, "step": 3440 }, { "epoch": 0.5617729888575976, "grad_norm": 2.8540711402893066, "learning_rate": 1.987778443104374e-05, "loss": 0.749, "step": 3441 }, { "epoch": 0.5619362475001021, "grad_norm": 3.7281501293182373, "learning_rate": 1.9877684479554477e-05, "loss": 1.1141, "step": 3442 }, { "epoch": 0.5620995061426064, "grad_norm": 3.014772653579712, "learning_rate": 1.987758448746178e-05, "loss": 0.825, "step": 3443 }, { "epoch": 0.5622627647851108, "grad_norm": 3.1310932636260986, "learning_rate": 1.9877484454766064e-05, "loss": 1.0552, "step": 3444 }, { "epoch": 0.5624260234276152, "grad_norm": 3.143998384475708, "learning_rate": 1.987738438146774e-05, "loss": 0.9109, "step": 3445 }, { "epoch": 0.5625892820701196, "grad_norm": 2.961303472518921, "learning_rate": 1.9877284267567215e-05, "loss": 0.9085, "step": 3446 }, { "epoch": 0.5627525407126239, "grad_norm": 2.783173084259033, "learning_rate": 1.9877184113064905e-05, "loss": 0.9043, "step": 3447 }, { "epoch": 0.5629157993551284, "grad_norm": 2.762988805770874, "learning_rate": 1.9877083917961223e-05, "loss": 0.7612, "step": 3448 }, { "epoch": 0.5630790579976328, "grad_norm": 2.7943763732910156, "learning_rate": 1.9876983682256575e-05, "loss": 0.8699, "step": 3449 }, { "epoch": 0.5632423166401371, "grad_norm": 2.722038745880127, "learning_rate": 1.9876883405951378e-05, "loss": 0.9004, "step": 3450 }, { "epoch": 0.5634055752826416, "grad_norm": 3.531597375869751, "learning_rate": 1.9876783089046046e-05, "loss": 0.9581, "step": 3451 }, { "epoch": 0.5635688339251459, "grad_norm": 3.770517587661743, "learning_rate": 1.9876682731540984e-05, "loss": 1.1366, "step": 3452 }, { "epoch": 0.5637320925676503, "grad_norm": 2.575887441635132, "learning_rate": 1.987658233343661e-05, "loss": 0.7089, "step": 3453 }, { "epoch": 0.5638953512101547, "grad_norm": 3.6315457820892334, "learning_rate": 1.9876481894733337e-05, "loss": 0.9136, "step": 3454 }, { "epoch": 0.5640586098526591, "grad_norm": 3.325979232788086, "learning_rate": 1.9876381415431574e-05, "loss": 0.9107, "step": 3455 }, { "epoch": 0.5642218684951634, "grad_norm": 3.1024513244628906, "learning_rate": 1.9876280895531733e-05, "loss": 0.6922, "step": 3456 }, { "epoch": 0.5643851271376679, "grad_norm": 3.579958438873291, "learning_rate": 1.9876180335034236e-05, "loss": 1.024, "step": 3457 }, { "epoch": 0.5645483857801722, "grad_norm": 3.1969664096832275, "learning_rate": 1.9876079733939487e-05, "loss": 0.8695, "step": 3458 }, { "epoch": 0.5647116444226766, "grad_norm": 2.743340492248535, "learning_rate": 1.987597909224791e-05, "loss": 0.8517, "step": 3459 }, { "epoch": 0.5648749030651811, "grad_norm": 3.2104196548461914, "learning_rate": 1.9875878409959905e-05, "loss": 0.947, "step": 3460 }, { "epoch": 0.5650381617076854, "grad_norm": 3.067755699157715, "learning_rate": 1.9875777687075895e-05, "loss": 0.8545, "step": 3461 }, { "epoch": 0.5652014203501898, "grad_norm": 3.4014434814453125, "learning_rate": 1.987567692359629e-05, "loss": 0.8957, "step": 3462 }, { "epoch": 0.5653646789926942, "grad_norm": 3.203601598739624, "learning_rate": 1.987557611952151e-05, "loss": 0.9267, "step": 3463 }, { "epoch": 0.5655279376351986, "grad_norm": 3.2084531784057617, "learning_rate": 1.9875475274851967e-05, "loss": 0.9237, "step": 3464 }, { "epoch": 0.5656911962777029, "grad_norm": 3.3158843517303467, "learning_rate": 1.987537438958807e-05, "loss": 1.0746, "step": 3465 }, { "epoch": 0.5658544549202074, "grad_norm": 2.921358823776245, "learning_rate": 1.987527346373024e-05, "loss": 0.8479, "step": 3466 }, { "epoch": 0.5660177135627117, "grad_norm": 3.4609415531158447, "learning_rate": 1.9875172497278887e-05, "loss": 0.8942, "step": 3467 }, { "epoch": 0.5661809722052161, "grad_norm": 3.631592273712158, "learning_rate": 1.987507149023443e-05, "loss": 1.081, "step": 3468 }, { "epoch": 0.5663442308477205, "grad_norm": 3.5361433029174805, "learning_rate": 1.9874970442597283e-05, "loss": 0.8166, "step": 3469 }, { "epoch": 0.5665074894902249, "grad_norm": 2.9397993087768555, "learning_rate": 1.9874869354367862e-05, "loss": 0.8425, "step": 3470 }, { "epoch": 0.5666707481327293, "grad_norm": 3.4038097858428955, "learning_rate": 1.9874768225546582e-05, "loss": 0.8424, "step": 3471 }, { "epoch": 0.5668340067752337, "grad_norm": 3.210388422012329, "learning_rate": 1.9874667056133857e-05, "loss": 1.1425, "step": 3472 }, { "epoch": 0.5669972654177381, "grad_norm": 2.573962926864624, "learning_rate": 1.9874565846130104e-05, "loss": 0.683, "step": 3473 }, { "epoch": 0.5671605240602424, "grad_norm": 3.379528045654297, "learning_rate": 1.9874464595535742e-05, "loss": 0.8233, "step": 3474 }, { "epoch": 0.5673237827027469, "grad_norm": 3.163327217102051, "learning_rate": 1.9874363304351182e-05, "loss": 0.9373, "step": 3475 }, { "epoch": 0.5674870413452512, "grad_norm": 3.0820679664611816, "learning_rate": 1.9874261972576844e-05, "loss": 0.8581, "step": 3476 }, { "epoch": 0.5676502999877556, "grad_norm": 3.06736421585083, "learning_rate": 1.9874160600213144e-05, "loss": 0.8857, "step": 3477 }, { "epoch": 0.56781355863026, "grad_norm": 3.4642632007598877, "learning_rate": 1.9874059187260497e-05, "loss": 1.0061, "step": 3478 }, { "epoch": 0.5679768172727644, "grad_norm": 3.193842887878418, "learning_rate": 1.987395773371932e-05, "loss": 0.8567, "step": 3479 }, { "epoch": 0.5681400759152687, "grad_norm": 3.6071248054504395, "learning_rate": 1.9873856239590034e-05, "loss": 1.0181, "step": 3480 }, { "epoch": 0.5683033345577732, "grad_norm": 3.3215372562408447, "learning_rate": 1.9873754704873053e-05, "loss": 0.9061, "step": 3481 }, { "epoch": 0.5684665932002776, "grad_norm": 4.684751987457275, "learning_rate": 1.9873653129568794e-05, "loss": 0.8687, "step": 3482 }, { "epoch": 0.5686298518427819, "grad_norm": 3.1425905227661133, "learning_rate": 1.9873551513677674e-05, "loss": 0.8614, "step": 3483 }, { "epoch": 0.5687931104852864, "grad_norm": 3.104128360748291, "learning_rate": 1.9873449857200113e-05, "loss": 0.9563, "step": 3484 }, { "epoch": 0.5689563691277907, "grad_norm": 3.2116994857788086, "learning_rate": 1.9873348160136525e-05, "loss": 1.0222, "step": 3485 }, { "epoch": 0.5691196277702951, "grad_norm": 2.9625332355499268, "learning_rate": 1.9873246422487336e-05, "loss": 0.9949, "step": 3486 }, { "epoch": 0.5692828864127994, "grad_norm": 2.7573325634002686, "learning_rate": 1.9873144644252956e-05, "loss": 0.7554, "step": 3487 }, { "epoch": 0.5694461450553039, "grad_norm": 2.5446996688842773, "learning_rate": 1.9873042825433806e-05, "loss": 0.7081, "step": 3488 }, { "epoch": 0.5696094036978082, "grad_norm": 3.261977195739746, "learning_rate": 1.9872940966030306e-05, "loss": 0.9218, "step": 3489 }, { "epoch": 0.5697726623403127, "grad_norm": 2.902089834213257, "learning_rate": 1.9872839066042875e-05, "loss": 0.8918, "step": 3490 }, { "epoch": 0.569935920982817, "grad_norm": 2.95198655128479, "learning_rate": 1.9872737125471927e-05, "loss": 0.8133, "step": 3491 }, { "epoch": 0.5700991796253214, "grad_norm": 2.5335588455200195, "learning_rate": 1.9872635144317886e-05, "loss": 0.7265, "step": 3492 }, { "epoch": 0.5702624382678259, "grad_norm": 3.191183090209961, "learning_rate": 1.9872533122581172e-05, "loss": 0.9563, "step": 3493 }, { "epoch": 0.5704256969103302, "grad_norm": 2.9146900177001953, "learning_rate": 1.9872431060262197e-05, "loss": 0.7835, "step": 3494 }, { "epoch": 0.5705889555528346, "grad_norm": 3.051057815551758, "learning_rate": 1.9872328957361394e-05, "loss": 0.7242, "step": 3495 }, { "epoch": 0.570752214195339, "grad_norm": 3.38645339012146, "learning_rate": 1.9872226813879167e-05, "loss": 1.0076, "step": 3496 }, { "epoch": 0.5709154728378434, "grad_norm": 3.1776273250579834, "learning_rate": 1.9872124629815947e-05, "loss": 0.8692, "step": 3497 }, { "epoch": 0.5710787314803477, "grad_norm": 3.0552499294281006, "learning_rate": 1.9872022405172148e-05, "loss": 0.8415, "step": 3498 }, { "epoch": 0.5712419901228522, "grad_norm": 3.281764030456543, "learning_rate": 1.9871920139948193e-05, "loss": 0.8657, "step": 3499 }, { "epoch": 0.5714052487653565, "grad_norm": 2.8134191036224365, "learning_rate": 1.9871817834144506e-05, "loss": 0.8537, "step": 3500 }, { "epoch": 0.5715685074078609, "grad_norm": 3.303953170776367, "learning_rate": 1.9871715487761496e-05, "loss": 1.0139, "step": 3501 }, { "epoch": 0.5717317660503652, "grad_norm": 4.290492057800293, "learning_rate": 1.9871613100799598e-05, "loss": 1.057, "step": 3502 }, { "epoch": 0.5718950246928697, "grad_norm": 3.271899938583374, "learning_rate": 1.9871510673259224e-05, "loss": 0.9077, "step": 3503 }, { "epoch": 0.5720582833353741, "grad_norm": 3.2336583137512207, "learning_rate": 1.987140820514079e-05, "loss": 0.8824, "step": 3504 }, { "epoch": 0.5722215419778784, "grad_norm": 3.1501379013061523, "learning_rate": 1.987130569644473e-05, "loss": 1.0662, "step": 3505 }, { "epoch": 0.5723848006203829, "grad_norm": 3.2841362953186035, "learning_rate": 1.987120314717146e-05, "loss": 1.0348, "step": 3506 }, { "epoch": 0.5725480592628872, "grad_norm": 3.135437250137329, "learning_rate": 1.9871100557321405e-05, "loss": 1.1245, "step": 3507 }, { "epoch": 0.5727113179053916, "grad_norm": 3.1923952102661133, "learning_rate": 1.987099792689498e-05, "loss": 0.9082, "step": 3508 }, { "epoch": 0.572874576547896, "grad_norm": 3.326977491378784, "learning_rate": 1.9870895255892607e-05, "loss": 0.9644, "step": 3509 }, { "epoch": 0.5730378351904004, "grad_norm": 3.061195135116577, "learning_rate": 1.9870792544314713e-05, "loss": 0.7916, "step": 3510 }, { "epoch": 0.5732010938329047, "grad_norm": 2.7722840309143066, "learning_rate": 1.987068979216172e-05, "loss": 0.8514, "step": 3511 }, { "epoch": 0.5733643524754092, "grad_norm": 3.4001431465148926, "learning_rate": 1.987058699943404e-05, "loss": 0.9658, "step": 3512 }, { "epoch": 0.5735276111179135, "grad_norm": 2.7951817512512207, "learning_rate": 1.9870484166132114e-05, "loss": 0.8591, "step": 3513 }, { "epoch": 0.5736908697604179, "grad_norm": 3.1191208362579346, "learning_rate": 1.987038129225635e-05, "loss": 1.0937, "step": 3514 }, { "epoch": 0.5738541284029224, "grad_norm": 3.5310895442962646, "learning_rate": 1.9870278377807172e-05, "loss": 1.1368, "step": 3515 }, { "epoch": 0.5740173870454267, "grad_norm": 3.168513774871826, "learning_rate": 1.9870175422785013e-05, "loss": 0.7374, "step": 3516 }, { "epoch": 0.5741806456879311, "grad_norm": 3.021533250808716, "learning_rate": 1.9870072427190287e-05, "loss": 0.8906, "step": 3517 }, { "epoch": 0.5743439043304355, "grad_norm": 3.3392560482025146, "learning_rate": 1.986996939102342e-05, "loss": 1.0611, "step": 3518 }, { "epoch": 0.5745071629729399, "grad_norm": 2.9277031421661377, "learning_rate": 1.9869866314284837e-05, "loss": 0.7449, "step": 3519 }, { "epoch": 0.5746704216154442, "grad_norm": 3.4977164268493652, "learning_rate": 1.9869763196974957e-05, "loss": 1.1234, "step": 3520 }, { "epoch": 0.5748336802579487, "grad_norm": 2.7971744537353516, "learning_rate": 1.986966003909421e-05, "loss": 0.797, "step": 3521 }, { "epoch": 0.574996938900453, "grad_norm": 3.0387275218963623, "learning_rate": 1.986955684064302e-05, "loss": 0.7431, "step": 3522 }, { "epoch": 0.5751601975429574, "grad_norm": 2.740898370742798, "learning_rate": 1.98694536016218e-05, "loss": 0.7768, "step": 3523 }, { "epoch": 0.5753234561854618, "grad_norm": 3.2633750438690186, "learning_rate": 1.9869350322030993e-05, "loss": 0.9546, "step": 3524 }, { "epoch": 0.5754867148279662, "grad_norm": 2.8876187801361084, "learning_rate": 1.9869247001871006e-05, "loss": 0.761, "step": 3525 }, { "epoch": 0.5756499734704706, "grad_norm": 3.8232836723327637, "learning_rate": 1.9869143641142274e-05, "loss": 1.1171, "step": 3526 }, { "epoch": 0.575813232112975, "grad_norm": 3.161721706390381, "learning_rate": 1.986904023984522e-05, "loss": 0.953, "step": 3527 }, { "epoch": 0.5759764907554794, "grad_norm": 3.25476336479187, "learning_rate": 1.9868936797980267e-05, "loss": 0.9568, "step": 3528 }, { "epoch": 0.5761397493979837, "grad_norm": 2.694995164871216, "learning_rate": 1.986883331554784e-05, "loss": 0.7562, "step": 3529 }, { "epoch": 0.5763030080404882, "grad_norm": 2.8624751567840576, "learning_rate": 1.986872979254837e-05, "loss": 0.7267, "step": 3530 }, { "epoch": 0.5764662666829925, "grad_norm": 2.939268112182617, "learning_rate": 1.986862622898227e-05, "loss": 0.8241, "step": 3531 }, { "epoch": 0.5766295253254969, "grad_norm": 3.200535774230957, "learning_rate": 1.9868522624849983e-05, "loss": 0.8225, "step": 3532 }, { "epoch": 0.5767927839680013, "grad_norm": 2.9493141174316406, "learning_rate": 1.9868418980151924e-05, "loss": 0.7893, "step": 3533 }, { "epoch": 0.5769560426105057, "grad_norm": 3.25384783744812, "learning_rate": 1.9868315294888515e-05, "loss": 0.8181, "step": 3534 }, { "epoch": 0.57711930125301, "grad_norm": 3.1689541339874268, "learning_rate": 1.986821156906019e-05, "loss": 0.8214, "step": 3535 }, { "epoch": 0.5772825598955145, "grad_norm": 2.9872846603393555, "learning_rate": 1.9868107802667377e-05, "loss": 0.9191, "step": 3536 }, { "epoch": 0.5774458185380189, "grad_norm": 2.7416679859161377, "learning_rate": 1.9868003995710498e-05, "loss": 0.8685, "step": 3537 }, { "epoch": 0.5776090771805232, "grad_norm": 3.325650691986084, "learning_rate": 1.986790014818998e-05, "loss": 1.0579, "step": 3538 }, { "epoch": 0.5777723358230277, "grad_norm": 2.8097424507141113, "learning_rate": 1.986779626010625e-05, "loss": 0.8131, "step": 3539 }, { "epoch": 0.577935594465532, "grad_norm": 3.2268478870391846, "learning_rate": 1.9867692331459735e-05, "loss": 0.975, "step": 3540 }, { "epoch": 0.5780988531080364, "grad_norm": 3.0927627086639404, "learning_rate": 1.986758836225086e-05, "loss": 0.8237, "step": 3541 }, { "epoch": 0.5782621117505408, "grad_norm": 3.85640025138855, "learning_rate": 1.986748435248006e-05, "loss": 0.9403, "step": 3542 }, { "epoch": 0.5784253703930452, "grad_norm": 3.069150447845459, "learning_rate": 1.986738030214776e-05, "loss": 0.9034, "step": 3543 }, { "epoch": 0.5785886290355495, "grad_norm": 3.155395746231079, "learning_rate": 1.986727621125438e-05, "loss": 1.0407, "step": 3544 }, { "epoch": 0.578751887678054, "grad_norm": 2.8883070945739746, "learning_rate": 1.9867172079800355e-05, "loss": 0.8962, "step": 3545 }, { "epoch": 0.5789151463205583, "grad_norm": 2.9706547260284424, "learning_rate": 1.9867067907786112e-05, "loss": 0.8374, "step": 3546 }, { "epoch": 0.5790784049630627, "grad_norm": 2.504568576812744, "learning_rate": 1.9866963695212077e-05, "loss": 0.6744, "step": 3547 }, { "epoch": 0.5792416636055672, "grad_norm": 2.904569149017334, "learning_rate": 1.986685944207868e-05, "loss": 0.8295, "step": 3548 }, { "epoch": 0.5794049222480715, "grad_norm": 2.8814117908477783, "learning_rate": 1.9866755148386353e-05, "loss": 0.8298, "step": 3549 }, { "epoch": 0.5795681808905759, "grad_norm": 3.2335307598114014, "learning_rate": 1.986665081413552e-05, "loss": 0.8787, "step": 3550 }, { "epoch": 0.5797314395330803, "grad_norm": 2.8400204181671143, "learning_rate": 1.9866546439326607e-05, "loss": 0.8896, "step": 3551 }, { "epoch": 0.5798946981755847, "grad_norm": 2.804882764816284, "learning_rate": 1.986644202396005e-05, "loss": 0.7689, "step": 3552 }, { "epoch": 0.580057956818089, "grad_norm": 2.7923195362091064, "learning_rate": 1.9866337568036277e-05, "loss": 0.8564, "step": 3553 }, { "epoch": 0.5802212154605935, "grad_norm": 2.8599843978881836, "learning_rate": 1.9866233071555714e-05, "loss": 0.7935, "step": 3554 }, { "epoch": 0.5803844741030978, "grad_norm": 3.0632309913635254, "learning_rate": 1.9866128534518792e-05, "loss": 0.8858, "step": 3555 }, { "epoch": 0.5805477327456022, "grad_norm": 3.501556873321533, "learning_rate": 1.986602395692594e-05, "loss": 1.0103, "step": 3556 }, { "epoch": 0.5807109913881066, "grad_norm": 2.8854541778564453, "learning_rate": 1.986591933877759e-05, "loss": 0.8153, "step": 3557 }, { "epoch": 0.580874250030611, "grad_norm": 3.1737821102142334, "learning_rate": 1.9865814680074175e-05, "loss": 0.8291, "step": 3558 }, { "epoch": 0.5810375086731154, "grad_norm": 3.5656116008758545, "learning_rate": 1.9865709980816116e-05, "loss": 1.0071, "step": 3559 }, { "epoch": 0.5812007673156198, "grad_norm": 3.367739200592041, "learning_rate": 1.986560524100385e-05, "loss": 1.0113, "step": 3560 }, { "epoch": 0.5813640259581242, "grad_norm": 3.064889907836914, "learning_rate": 1.9865500460637806e-05, "loss": 0.9475, "step": 3561 }, { "epoch": 0.5815272846006285, "grad_norm": 3.5985569953918457, "learning_rate": 1.9865395639718416e-05, "loss": 0.8827, "step": 3562 }, { "epoch": 0.581690543243133, "grad_norm": 2.922333240509033, "learning_rate": 1.9865290778246103e-05, "loss": 0.713, "step": 3563 }, { "epoch": 0.5818538018856373, "grad_norm": 3.3702709674835205, "learning_rate": 1.9865185876221313e-05, "loss": 0.72, "step": 3564 }, { "epoch": 0.5820170605281417, "grad_norm": 3.1265335083007812, "learning_rate": 1.9865080933644468e-05, "loss": 0.8657, "step": 3565 }, { "epoch": 0.582180319170646, "grad_norm": 3.975504159927368, "learning_rate": 1.9864975950515998e-05, "loss": 0.9456, "step": 3566 }, { "epoch": 0.5823435778131505, "grad_norm": 3.0550971031188965, "learning_rate": 1.9864870926836334e-05, "loss": 0.9354, "step": 3567 }, { "epoch": 0.5825068364556548, "grad_norm": 3.1756935119628906, "learning_rate": 1.9864765862605913e-05, "loss": 0.889, "step": 3568 }, { "epoch": 0.5826700950981593, "grad_norm": 3.7162933349609375, "learning_rate": 1.9864660757825166e-05, "loss": 1.0917, "step": 3569 }, { "epoch": 0.5828333537406637, "grad_norm": 2.901801347732544, "learning_rate": 1.986455561249452e-05, "loss": 0.9047, "step": 3570 }, { "epoch": 0.582996612383168, "grad_norm": 3.1549086570739746, "learning_rate": 1.9864450426614414e-05, "loss": 0.9865, "step": 3571 }, { "epoch": 0.5831598710256725, "grad_norm": 3.1190035343170166, "learning_rate": 1.9864345200185275e-05, "loss": 0.9645, "step": 3572 }, { "epoch": 0.5833231296681768, "grad_norm": 3.2017128467559814, "learning_rate": 1.986423993320754e-05, "loss": 1.5229, "step": 3573 }, { "epoch": 0.5834863883106812, "grad_norm": 2.552229642868042, "learning_rate": 1.9864134625681637e-05, "loss": 0.806, "step": 3574 }, { "epoch": 0.5836496469531856, "grad_norm": 3.3029754161834717, "learning_rate": 1.9864029277608002e-05, "loss": 1.0555, "step": 3575 }, { "epoch": 0.58381290559569, "grad_norm": 3.144739866256714, "learning_rate": 1.986392388898707e-05, "loss": 0.8194, "step": 3576 }, { "epoch": 0.5839761642381943, "grad_norm": 2.709317445755005, "learning_rate": 1.9863818459819266e-05, "loss": 0.827, "step": 3577 }, { "epoch": 0.5841394228806988, "grad_norm": 3.086143732070923, "learning_rate": 1.9863712990105032e-05, "loss": 1.1329, "step": 3578 }, { "epoch": 0.5843026815232031, "grad_norm": 3.162989854812622, "learning_rate": 1.9863607479844798e-05, "loss": 0.8438, "step": 3579 }, { "epoch": 0.5844659401657075, "grad_norm": 2.691783905029297, "learning_rate": 1.9863501929039e-05, "loss": 0.9216, "step": 3580 }, { "epoch": 0.584629198808212, "grad_norm": 2.7708399295806885, "learning_rate": 1.9863396337688064e-05, "loss": 0.8771, "step": 3581 }, { "epoch": 0.5847924574507163, "grad_norm": 2.9998443126678467, "learning_rate": 1.986329070579244e-05, "loss": 0.8693, "step": 3582 }, { "epoch": 0.5849557160932207, "grad_norm": 2.943087339401245, "learning_rate": 1.9863185033352544e-05, "loss": 0.8273, "step": 3583 }, { "epoch": 0.585118974735725, "grad_norm": 2.8676798343658447, "learning_rate": 1.9863079320368822e-05, "loss": 0.9793, "step": 3584 }, { "epoch": 0.5852822333782295, "grad_norm": 3.1090986728668213, "learning_rate": 1.9862973566841705e-05, "loss": 0.8446, "step": 3585 }, { "epoch": 0.5854454920207338, "grad_norm": 2.891040325164795, "learning_rate": 1.9862867772771626e-05, "loss": 0.9292, "step": 3586 }, { "epoch": 0.5856087506632383, "grad_norm": 3.0234270095825195, "learning_rate": 1.986276193815902e-05, "loss": 0.9247, "step": 3587 }, { "epoch": 0.5857720093057426, "grad_norm": 3.242583990097046, "learning_rate": 1.9862656063004325e-05, "loss": 0.9418, "step": 3588 }, { "epoch": 0.585935267948247, "grad_norm": 2.634531021118164, "learning_rate": 1.986255014730798e-05, "loss": 0.7381, "step": 3589 }, { "epoch": 0.5860985265907513, "grad_norm": 3.4049882888793945, "learning_rate": 1.9862444191070408e-05, "loss": 1.0338, "step": 3590 }, { "epoch": 0.5862617852332558, "grad_norm": 2.7360548973083496, "learning_rate": 1.986233819429206e-05, "loss": 0.8063, "step": 3591 }, { "epoch": 0.5864250438757602, "grad_norm": 3.1208994388580322, "learning_rate": 1.9862232156973357e-05, "loss": 0.9782, "step": 3592 }, { "epoch": 0.5865883025182645, "grad_norm": 2.777367353439331, "learning_rate": 1.9862126079114743e-05, "loss": 0.7857, "step": 3593 }, { "epoch": 0.586751561160769, "grad_norm": 2.995924472808838, "learning_rate": 1.9862019960716654e-05, "loss": 0.7122, "step": 3594 }, { "epoch": 0.5869148198032733, "grad_norm": 3.1997110843658447, "learning_rate": 1.9861913801779523e-05, "loss": 0.8796, "step": 3595 }, { "epoch": 0.5870780784457778, "grad_norm": 3.200626850128174, "learning_rate": 1.986180760230379e-05, "loss": 0.7822, "step": 3596 }, { "epoch": 0.5872413370882821, "grad_norm": 3.052297592163086, "learning_rate": 1.9861701362289892e-05, "loss": 1.7175, "step": 3597 }, { "epoch": 0.5874045957307865, "grad_norm": 2.994511842727661, "learning_rate": 1.986159508173826e-05, "loss": 0.6764, "step": 3598 }, { "epoch": 0.5875678543732908, "grad_norm": 3.532432794570923, "learning_rate": 1.9861488760649332e-05, "loss": 0.8787, "step": 3599 }, { "epoch": 0.5877311130157953, "grad_norm": 3.163602113723755, "learning_rate": 1.986138239902355e-05, "loss": 0.8442, "step": 3600 }, { "epoch": 0.5878943716582996, "grad_norm": 3.1198081970214844, "learning_rate": 1.986127599686135e-05, "loss": 0.8227, "step": 3601 }, { "epoch": 0.588057630300804, "grad_norm": 3.1463379859924316, "learning_rate": 1.9861169554163168e-05, "loss": 0.7643, "step": 3602 }, { "epoch": 0.5882208889433085, "grad_norm": 3.244539976119995, "learning_rate": 1.986106307092944e-05, "loss": 0.9786, "step": 3603 }, { "epoch": 0.5883841475858128, "grad_norm": 3.6168100833892822, "learning_rate": 1.9860956547160605e-05, "loss": 1.0091, "step": 3604 }, { "epoch": 0.5885474062283172, "grad_norm": 3.435528516769409, "learning_rate": 1.98608499828571e-05, "loss": 1.022, "step": 3605 }, { "epoch": 0.5887106648708216, "grad_norm": 3.207494020462036, "learning_rate": 1.9860743378019368e-05, "loss": 0.8355, "step": 3606 }, { "epoch": 0.588873923513326, "grad_norm": 3.258345603942871, "learning_rate": 1.986063673264784e-05, "loss": 0.8346, "step": 3607 }, { "epoch": 0.5890371821558303, "grad_norm": 2.9891109466552734, "learning_rate": 1.9860530046742957e-05, "loss": 0.7841, "step": 3608 }, { "epoch": 0.5892004407983348, "grad_norm": 3.3285088539123535, "learning_rate": 1.986042332030516e-05, "loss": 0.995, "step": 3609 }, { "epoch": 0.5893636994408391, "grad_norm": 3.63259220123291, "learning_rate": 1.9860316553334885e-05, "loss": 1.1512, "step": 3610 }, { "epoch": 0.5895269580833435, "grad_norm": 3.0937514305114746, "learning_rate": 1.9860209745832573e-05, "loss": 0.8282, "step": 3611 }, { "epoch": 0.5896902167258479, "grad_norm": 3.524902820587158, "learning_rate": 1.986010289779866e-05, "loss": 0.9312, "step": 3612 }, { "epoch": 0.5898534753683523, "grad_norm": 2.6288700103759766, "learning_rate": 1.985999600923359e-05, "loss": 0.8031, "step": 3613 }, { "epoch": 0.5900167340108567, "grad_norm": 3.179124593734741, "learning_rate": 1.9859889080137797e-05, "loss": 0.8526, "step": 3614 }, { "epoch": 0.5901799926533611, "grad_norm": 3.337430715560913, "learning_rate": 1.9859782110511724e-05, "loss": 1.0492, "step": 3615 }, { "epoch": 0.5903432512958655, "grad_norm": 2.849104404449463, "learning_rate": 1.985967510035581e-05, "loss": 0.9321, "step": 3616 }, { "epoch": 0.5905065099383698, "grad_norm": 2.885763645172119, "learning_rate": 1.9859568049670494e-05, "loss": 0.7895, "step": 3617 }, { "epoch": 0.5906697685808743, "grad_norm": 3.0992391109466553, "learning_rate": 1.9859460958456217e-05, "loss": 1.0047, "step": 3618 }, { "epoch": 0.5908330272233786, "grad_norm": 3.1297504901885986, "learning_rate": 1.985935382671342e-05, "loss": 0.9406, "step": 3619 }, { "epoch": 0.590996285865883, "grad_norm": 3.3790030479431152, "learning_rate": 1.985924665444254e-05, "loss": 0.9034, "step": 3620 }, { "epoch": 0.5911595445083874, "grad_norm": 3.240278720855713, "learning_rate": 1.9859139441644023e-05, "loss": 1.1238, "step": 3621 }, { "epoch": 0.5913228031508918, "grad_norm": 2.644111394882202, "learning_rate": 1.9859032188318303e-05, "loss": 0.7513, "step": 3622 }, { "epoch": 0.5914860617933961, "grad_norm": 3.2703328132629395, "learning_rate": 1.9858924894465826e-05, "loss": 0.903, "step": 3623 }, { "epoch": 0.5916493204359006, "grad_norm": 3.0021066665649414, "learning_rate": 1.985881756008703e-05, "loss": 0.8565, "step": 3624 }, { "epoch": 0.591812579078405, "grad_norm": 3.201343059539795, "learning_rate": 1.985871018518236e-05, "loss": 0.9249, "step": 3625 }, { "epoch": 0.5919758377209093, "grad_norm": 3.32202410697937, "learning_rate": 1.9858602769752252e-05, "loss": 0.8663, "step": 3626 }, { "epoch": 0.5921390963634138, "grad_norm": 3.3904671669006348, "learning_rate": 1.9858495313797155e-05, "loss": 0.9055, "step": 3627 }, { "epoch": 0.5923023550059181, "grad_norm": 3.043792486190796, "learning_rate": 1.9858387817317504e-05, "loss": 0.9714, "step": 3628 }, { "epoch": 0.5924656136484225, "grad_norm": 2.714359760284424, "learning_rate": 1.985828028031374e-05, "loss": 0.7611, "step": 3629 }, { "epoch": 0.5926288722909269, "grad_norm": 3.117208957672119, "learning_rate": 1.9858172702786315e-05, "loss": 0.8688, "step": 3630 }, { "epoch": 0.5927921309334313, "grad_norm": 3.9498119354248047, "learning_rate": 1.9858065084735658e-05, "loss": 1.1032, "step": 3631 }, { "epoch": 0.5929553895759356, "grad_norm": 3.1183364391326904, "learning_rate": 1.985795742616222e-05, "loss": 0.9568, "step": 3632 }, { "epoch": 0.5931186482184401, "grad_norm": 3.5630481243133545, "learning_rate": 1.9857849727066443e-05, "loss": 0.8858, "step": 3633 }, { "epoch": 0.5932819068609444, "grad_norm": 3.3246984481811523, "learning_rate": 1.9857741987448767e-05, "loss": 0.7958, "step": 3634 }, { "epoch": 0.5934451655034488, "grad_norm": 3.7307796478271484, "learning_rate": 1.9857634207309634e-05, "loss": 0.8796, "step": 3635 }, { "epoch": 0.5936084241459533, "grad_norm": 2.453545570373535, "learning_rate": 1.9857526386649492e-05, "loss": 0.7478, "step": 3636 }, { "epoch": 0.5937716827884576, "grad_norm": 3.3498313426971436, "learning_rate": 1.9857418525468778e-05, "loss": 1.0151, "step": 3637 }, { "epoch": 0.593934941430962, "grad_norm": 3.1079039573669434, "learning_rate": 1.9857310623767942e-05, "loss": 0.8741, "step": 3638 }, { "epoch": 0.5940982000734664, "grad_norm": 3.4259018898010254, "learning_rate": 1.985720268154742e-05, "loss": 0.9364, "step": 3639 }, { "epoch": 0.5942614587159708, "grad_norm": 2.8150875568389893, "learning_rate": 1.9857094698807664e-05, "loss": 0.853, "step": 3640 }, { "epoch": 0.5944247173584751, "grad_norm": 2.4600443840026855, "learning_rate": 1.9856986675549113e-05, "loss": 0.6851, "step": 3641 }, { "epoch": 0.5945879760009796, "grad_norm": 4.209109783172607, "learning_rate": 1.985687861177221e-05, "loss": 0.9709, "step": 3642 }, { "epoch": 0.5947512346434839, "grad_norm": 2.8404879570007324, "learning_rate": 1.98567705074774e-05, "loss": 0.8517, "step": 3643 }, { "epoch": 0.5949144932859883, "grad_norm": 3.662057876586914, "learning_rate": 1.985666236266513e-05, "loss": 1.1354, "step": 3644 }, { "epoch": 0.5950777519284927, "grad_norm": 2.9388468265533447, "learning_rate": 1.9856554177335844e-05, "loss": 0.9679, "step": 3645 }, { "epoch": 0.5952410105709971, "grad_norm": 2.8866398334503174, "learning_rate": 1.9856445951489984e-05, "loss": 0.7821, "step": 3646 }, { "epoch": 0.5954042692135015, "grad_norm": 3.3452165126800537, "learning_rate": 1.9856337685127993e-05, "loss": 0.986, "step": 3647 }, { "epoch": 0.5955675278560059, "grad_norm": 3.174227476119995, "learning_rate": 1.9856229378250326e-05, "loss": 0.9608, "step": 3648 }, { "epoch": 0.5957307864985103, "grad_norm": 3.318237543106079, "learning_rate": 1.9856121030857416e-05, "loss": 0.7607, "step": 3649 }, { "epoch": 0.5958940451410146, "grad_norm": 3.210975408554077, "learning_rate": 1.9856012642949717e-05, "loss": 1.0721, "step": 3650 }, { "epoch": 0.5960573037835191, "grad_norm": 2.781816244125366, "learning_rate": 1.9855904214527674e-05, "loss": 0.8392, "step": 3651 }, { "epoch": 0.5962205624260234, "grad_norm": 2.9264895915985107, "learning_rate": 1.9855795745591723e-05, "loss": 0.8084, "step": 3652 }, { "epoch": 0.5963838210685278, "grad_norm": 2.7678728103637695, "learning_rate": 1.9855687236142325e-05, "loss": 0.639, "step": 3653 }, { "epoch": 0.5965470797110322, "grad_norm": 3.099499464035034, "learning_rate": 1.9855578686179913e-05, "loss": 0.8168, "step": 3654 }, { "epoch": 0.5967103383535366, "grad_norm": 3.2301108837127686, "learning_rate": 1.9855470095704943e-05, "loss": 0.7919, "step": 3655 }, { "epoch": 0.5968735969960409, "grad_norm": 3.424539089202881, "learning_rate": 1.9855361464717853e-05, "loss": 0.8194, "step": 3656 }, { "epoch": 0.5970368556385454, "grad_norm": 3.3012139797210693, "learning_rate": 1.9855252793219092e-05, "loss": 0.782, "step": 3657 }, { "epoch": 0.5972001142810498, "grad_norm": 3.327528238296509, "learning_rate": 1.9855144081209114e-05, "loss": 0.9703, "step": 3658 }, { "epoch": 0.5973633729235541, "grad_norm": 3.0099613666534424, "learning_rate": 1.9855035328688355e-05, "loss": 0.7863, "step": 3659 }, { "epoch": 0.5975266315660586, "grad_norm": 3.3301124572753906, "learning_rate": 1.985492653565727e-05, "loss": 0.8777, "step": 3660 }, { "epoch": 0.5976898902085629, "grad_norm": 3.6946752071380615, "learning_rate": 1.98548177021163e-05, "loss": 0.8682, "step": 3661 }, { "epoch": 0.5978531488510673, "grad_norm": 3.048170566558838, "learning_rate": 1.9854708828065898e-05, "loss": 0.8122, "step": 3662 }, { "epoch": 0.5980164074935717, "grad_norm": 3.2684004306793213, "learning_rate": 1.9854599913506506e-05, "loss": 0.8342, "step": 3663 }, { "epoch": 0.5981796661360761, "grad_norm": 3.2078263759613037, "learning_rate": 1.985449095843858e-05, "loss": 0.7633, "step": 3664 }, { "epoch": 0.5983429247785804, "grad_norm": 4.043259620666504, "learning_rate": 1.9854381962862557e-05, "loss": 1.0151, "step": 3665 }, { "epoch": 0.5985061834210849, "grad_norm": 3.6731715202331543, "learning_rate": 1.9854272926778894e-05, "loss": 1.0373, "step": 3666 }, { "epoch": 0.5986694420635893, "grad_norm": 3.224156141281128, "learning_rate": 1.9854163850188036e-05, "loss": 0.942, "step": 3667 }, { "epoch": 0.5988327007060936, "grad_norm": 2.909836769104004, "learning_rate": 1.985405473309043e-05, "loss": 0.7586, "step": 3668 }, { "epoch": 0.5989959593485981, "grad_norm": 3.2186825275421143, "learning_rate": 1.9853945575486527e-05, "loss": 0.8246, "step": 3669 }, { "epoch": 0.5991592179911024, "grad_norm": 2.7357890605926514, "learning_rate": 1.9853836377376774e-05, "loss": 0.8415, "step": 3670 }, { "epoch": 0.5993224766336068, "grad_norm": 2.781975746154785, "learning_rate": 1.985372713876162e-05, "loss": 0.8158, "step": 3671 }, { "epoch": 0.5994857352761112, "grad_norm": 2.947260856628418, "learning_rate": 1.9853617859641513e-05, "loss": 0.8847, "step": 3672 }, { "epoch": 0.5996489939186156, "grad_norm": 3.4116532802581787, "learning_rate": 1.9853508540016908e-05, "loss": 0.9926, "step": 3673 }, { "epoch": 0.5998122525611199, "grad_norm": 3.007206916809082, "learning_rate": 1.9853399179888245e-05, "loss": 0.7839, "step": 3674 }, { "epoch": 0.5999755112036244, "grad_norm": 3.3814377784729004, "learning_rate": 1.985328977925598e-05, "loss": 0.9174, "step": 3675 }, { "epoch": 0.6001387698461287, "grad_norm": 3.2323734760284424, "learning_rate": 1.9853180338120564e-05, "loss": 1.1091, "step": 3676 }, { "epoch": 0.6003020284886331, "grad_norm": 3.2961134910583496, "learning_rate": 1.9853070856482443e-05, "loss": 1.0661, "step": 3677 }, { "epoch": 0.6004652871311376, "grad_norm": 2.9862594604492188, "learning_rate": 1.985296133434207e-05, "loss": 0.7621, "step": 3678 }, { "epoch": 0.6006285457736419, "grad_norm": 3.2947604656219482, "learning_rate": 1.9852851771699887e-05, "loss": 0.8498, "step": 3679 }, { "epoch": 0.6007918044161463, "grad_norm": 3.224703311920166, "learning_rate": 1.9852742168556357e-05, "loss": 0.8464, "step": 3680 }, { "epoch": 0.6009550630586507, "grad_norm": 2.968881607055664, "learning_rate": 1.9852632524911923e-05, "loss": 0.912, "step": 3681 }, { "epoch": 0.6011183217011551, "grad_norm": 2.8098835945129395, "learning_rate": 1.985252284076703e-05, "loss": 0.9305, "step": 3682 }, { "epoch": 0.6012815803436594, "grad_norm": 3.1792502403259277, "learning_rate": 1.9852413116122148e-05, "loss": 1.0774, "step": 3683 }, { "epoch": 0.6014448389861639, "grad_norm": 3.2104878425598145, "learning_rate": 1.9852303350977705e-05, "loss": 0.9656, "step": 3684 }, { "epoch": 0.6016080976286682, "grad_norm": 3.0592598915100098, "learning_rate": 1.9852193545334167e-05, "loss": 0.9182, "step": 3685 }, { "epoch": 0.6017713562711726, "grad_norm": 2.7242162227630615, "learning_rate": 1.9852083699191985e-05, "loss": 0.8046, "step": 3686 }, { "epoch": 0.601934614913677, "grad_norm": 3.060917854309082, "learning_rate": 1.9851973812551603e-05, "loss": 0.9759, "step": 3687 }, { "epoch": 0.6020978735561814, "grad_norm": 2.935675621032715, "learning_rate": 1.9851863885413478e-05, "loss": 0.693, "step": 3688 }, { "epoch": 0.6022611321986858, "grad_norm": 3.0814239978790283, "learning_rate": 1.9851753917778064e-05, "loss": 1.005, "step": 3689 }, { "epoch": 0.6024243908411901, "grad_norm": 3.592531681060791, "learning_rate": 1.9851643909645805e-05, "loss": 0.9412, "step": 3690 }, { "epoch": 0.6025876494836946, "grad_norm": 2.922959804534912, "learning_rate": 1.9851533861017154e-05, "loss": 0.8718, "step": 3691 }, { "epoch": 0.6027509081261989, "grad_norm": 3.1586763858795166, "learning_rate": 1.9851423771892574e-05, "loss": 0.7796, "step": 3692 }, { "epoch": 0.6029141667687034, "grad_norm": 3.036543369293213, "learning_rate": 1.985131364227251e-05, "loss": 0.9224, "step": 3693 }, { "epoch": 0.6030774254112077, "grad_norm": 3.3578927516937256, "learning_rate": 1.9851203472157414e-05, "loss": 0.8562, "step": 3694 }, { "epoch": 0.6032406840537121, "grad_norm": 3.187159299850464, "learning_rate": 1.985109326154774e-05, "loss": 0.9512, "step": 3695 }, { "epoch": 0.6034039426962164, "grad_norm": 2.9897665977478027, "learning_rate": 1.9850983010443944e-05, "loss": 0.7436, "step": 3696 }, { "epoch": 0.6035672013387209, "grad_norm": 3.232905626296997, "learning_rate": 1.985087271884647e-05, "loss": 0.9193, "step": 3697 }, { "epoch": 0.6037304599812252, "grad_norm": 3.2961528301239014, "learning_rate": 1.9850762386755785e-05, "loss": 1.012, "step": 3698 }, { "epoch": 0.6038937186237296, "grad_norm": 2.813800811767578, "learning_rate": 1.9850652014172333e-05, "loss": 0.8191, "step": 3699 }, { "epoch": 0.6040569772662341, "grad_norm": 3.6781020164489746, "learning_rate": 1.985054160109657e-05, "loss": 0.9133, "step": 3700 }, { "epoch": 0.6042202359087384, "grad_norm": 2.8718578815460205, "learning_rate": 1.985043114752895e-05, "loss": 0.7376, "step": 3701 }, { "epoch": 0.6043834945512429, "grad_norm": 3.173490047454834, "learning_rate": 1.9850320653469927e-05, "loss": 0.8629, "step": 3702 }, { "epoch": 0.6045467531937472, "grad_norm": 2.8765010833740234, "learning_rate": 1.9850210118919952e-05, "loss": 0.9065, "step": 3703 }, { "epoch": 0.6047100118362516, "grad_norm": 3.354264974594116, "learning_rate": 1.9850099543879485e-05, "loss": 0.9648, "step": 3704 }, { "epoch": 0.6048732704787559, "grad_norm": 3.1690876483917236, "learning_rate": 1.984998892834898e-05, "loss": 0.8805, "step": 3705 }, { "epoch": 0.6050365291212604, "grad_norm": 3.1229214668273926, "learning_rate": 1.984987827232889e-05, "loss": 0.9905, "step": 3706 }, { "epoch": 0.6051997877637647, "grad_norm": 3.441721200942993, "learning_rate": 1.9849767575819666e-05, "loss": 1.0385, "step": 3707 }, { "epoch": 0.6053630464062691, "grad_norm": 3.0153465270996094, "learning_rate": 1.984965683882177e-05, "loss": 1.0526, "step": 3708 }, { "epoch": 0.6055263050487735, "grad_norm": 2.6333084106445312, "learning_rate": 1.984954606133565e-05, "loss": 0.7502, "step": 3709 }, { "epoch": 0.6056895636912779, "grad_norm": 3.069087266921997, "learning_rate": 1.984943524336177e-05, "loss": 0.8635, "step": 3710 }, { "epoch": 0.6058528223337823, "grad_norm": 2.8074493408203125, "learning_rate": 1.984932438490058e-05, "loss": 0.7978, "step": 3711 }, { "epoch": 0.6060160809762867, "grad_norm": 3.1432909965515137, "learning_rate": 1.984921348595253e-05, "loss": 0.8669, "step": 3712 }, { "epoch": 0.6061793396187911, "grad_norm": 3.209568977355957, "learning_rate": 1.984910254651809e-05, "loss": 0.9379, "step": 3713 }, { "epoch": 0.6063425982612954, "grad_norm": 2.7538199424743652, "learning_rate": 1.9848991566597707e-05, "loss": 0.8031, "step": 3714 }, { "epoch": 0.6065058569037999, "grad_norm": 3.134795665740967, "learning_rate": 1.9848880546191836e-05, "loss": 0.8333, "step": 3715 }, { "epoch": 0.6066691155463042, "grad_norm": 3.327697992324829, "learning_rate": 1.984876948530094e-05, "loss": 0.8744, "step": 3716 }, { "epoch": 0.6068323741888086, "grad_norm": 3.5026028156280518, "learning_rate": 1.9848658383925466e-05, "loss": 0.9487, "step": 3717 }, { "epoch": 0.606995632831313, "grad_norm": 3.053997039794922, "learning_rate": 1.9848547242065882e-05, "loss": 0.8501, "step": 3718 }, { "epoch": 0.6071588914738174, "grad_norm": 3.036237955093384, "learning_rate": 1.9848436059722636e-05, "loss": 0.9126, "step": 3719 }, { "epoch": 0.6073221501163217, "grad_norm": 3.540963888168335, "learning_rate": 1.9848324836896185e-05, "loss": 0.9049, "step": 3720 }, { "epoch": 0.6074854087588262, "grad_norm": 3.1301088333129883, "learning_rate": 1.9848213573586995e-05, "loss": 1.0738, "step": 3721 }, { "epoch": 0.6076486674013306, "grad_norm": 2.6936089992523193, "learning_rate": 1.9848102269795514e-05, "loss": 0.8471, "step": 3722 }, { "epoch": 0.6078119260438349, "grad_norm": 2.846346139907837, "learning_rate": 1.9847990925522205e-05, "loss": 0.7319, "step": 3723 }, { "epoch": 0.6079751846863394, "grad_norm": 2.7054333686828613, "learning_rate": 1.9847879540767524e-05, "loss": 0.7298, "step": 3724 }, { "epoch": 0.6081384433288437, "grad_norm": 2.623857021331787, "learning_rate": 1.9847768115531925e-05, "loss": 0.8812, "step": 3725 }, { "epoch": 0.6083017019713481, "grad_norm": 3.3965930938720703, "learning_rate": 1.9847656649815877e-05, "loss": 0.9838, "step": 3726 }, { "epoch": 0.6084649606138525, "grad_norm": 2.92431378364563, "learning_rate": 1.9847545143619826e-05, "loss": 0.7647, "step": 3727 }, { "epoch": 0.6086282192563569, "grad_norm": 3.6418519020080566, "learning_rate": 1.9847433596944233e-05, "loss": 1.0321, "step": 3728 }, { "epoch": 0.6087914778988612, "grad_norm": 3.202261209487915, "learning_rate": 1.9847322009789562e-05, "loss": 0.8923, "step": 3729 }, { "epoch": 0.6089547365413657, "grad_norm": 3.0098907947540283, "learning_rate": 1.9847210382156267e-05, "loss": 0.9096, "step": 3730 }, { "epoch": 0.60911799518387, "grad_norm": 3.6020350456237793, "learning_rate": 1.9847098714044813e-05, "loss": 0.8552, "step": 3731 }, { "epoch": 0.6092812538263744, "grad_norm": 2.8903329372406006, "learning_rate": 1.984698700545565e-05, "loss": 0.8668, "step": 3732 }, { "epoch": 0.6094445124688789, "grad_norm": 3.1220970153808594, "learning_rate": 1.9846875256389243e-05, "loss": 0.883, "step": 3733 }, { "epoch": 0.6096077711113832, "grad_norm": 3.2043404579162598, "learning_rate": 1.9846763466846048e-05, "loss": 0.9138, "step": 3734 }, { "epoch": 0.6097710297538876, "grad_norm": 3.4184861183166504, "learning_rate": 1.9846651636826526e-05, "loss": 1.0602, "step": 3735 }, { "epoch": 0.609934288396392, "grad_norm": 3.149286985397339, "learning_rate": 1.9846539766331142e-05, "loss": 1.0515, "step": 3736 }, { "epoch": 0.6100975470388964, "grad_norm": 3.5046732425689697, "learning_rate": 1.9846427855360346e-05, "loss": 0.9091, "step": 3737 }, { "epoch": 0.6102608056814007, "grad_norm": 2.779719591140747, "learning_rate": 1.9846315903914606e-05, "loss": 0.9347, "step": 3738 }, { "epoch": 0.6104240643239052, "grad_norm": 3.226801872253418, "learning_rate": 1.984620391199438e-05, "loss": 0.9827, "step": 3739 }, { "epoch": 0.6105873229664095, "grad_norm": 2.5146942138671875, "learning_rate": 1.9846091879600126e-05, "loss": 0.7107, "step": 3740 }, { "epoch": 0.6107505816089139, "grad_norm": 2.744988203048706, "learning_rate": 1.9845979806732304e-05, "loss": 0.7785, "step": 3741 }, { "epoch": 0.6109138402514183, "grad_norm": 2.808535575866699, "learning_rate": 1.9845867693391378e-05, "loss": 0.8303, "step": 3742 }, { "epoch": 0.6110770988939227, "grad_norm": 3.064154624938965, "learning_rate": 1.9845755539577808e-05, "loss": 0.9518, "step": 3743 }, { "epoch": 0.6112403575364271, "grad_norm": 3.394260883331299, "learning_rate": 1.9845643345292055e-05, "loss": 0.9429, "step": 3744 }, { "epoch": 0.6114036161789315, "grad_norm": 3.2839467525482178, "learning_rate": 1.984553111053458e-05, "loss": 0.9866, "step": 3745 }, { "epoch": 0.6115668748214359, "grad_norm": 3.5322659015655518, "learning_rate": 1.9845418835305845e-05, "loss": 0.9913, "step": 3746 }, { "epoch": 0.6117301334639402, "grad_norm": 3.267765998840332, "learning_rate": 1.9845306519606308e-05, "loss": 0.9636, "step": 3747 }, { "epoch": 0.6118933921064447, "grad_norm": 2.7915077209472656, "learning_rate": 1.9845194163436435e-05, "loss": 0.8509, "step": 3748 }, { "epoch": 0.612056650748949, "grad_norm": 2.417836904525757, "learning_rate": 1.9845081766796686e-05, "loss": 0.7061, "step": 3749 }, { "epoch": 0.6122199093914534, "grad_norm": 2.3667125701904297, "learning_rate": 1.9844969329687526e-05, "loss": 0.6198, "step": 3750 }, { "epoch": 0.6123831680339578, "grad_norm": 2.8048949241638184, "learning_rate": 1.9844856852109412e-05, "loss": 0.812, "step": 3751 }, { "epoch": 0.6125464266764622, "grad_norm": 3.185330390930176, "learning_rate": 1.984474433406281e-05, "loss": 0.889, "step": 3752 }, { "epoch": 0.6127096853189665, "grad_norm": 2.544123649597168, "learning_rate": 1.984463177554818e-05, "loss": 0.7584, "step": 3753 }, { "epoch": 0.612872943961471, "grad_norm": 3.2238800525665283, "learning_rate": 1.9844519176565984e-05, "loss": 0.8389, "step": 3754 }, { "epoch": 0.6130362026039754, "grad_norm": 3.241471290588379, "learning_rate": 1.9844406537116693e-05, "loss": 0.8673, "step": 3755 }, { "epoch": 0.6131994612464797, "grad_norm": 3.229391098022461, "learning_rate": 1.984429385720076e-05, "loss": 1.0393, "step": 3756 }, { "epoch": 0.6133627198889842, "grad_norm": 3.0227251052856445, "learning_rate": 1.984418113681865e-05, "loss": 0.8309, "step": 3757 }, { "epoch": 0.6135259785314885, "grad_norm": 2.933745861053467, "learning_rate": 1.984406837597083e-05, "loss": 0.9729, "step": 3758 }, { "epoch": 0.6136892371739929, "grad_norm": 3.061519145965576, "learning_rate": 1.9843955574657765e-05, "loss": 0.9118, "step": 3759 }, { "epoch": 0.6138524958164973, "grad_norm": 2.9702301025390625, "learning_rate": 1.9843842732879916e-05, "loss": 0.86, "step": 3760 }, { "epoch": 0.6140157544590017, "grad_norm": 3.0129687786102295, "learning_rate": 1.9843729850637744e-05, "loss": 0.8168, "step": 3761 }, { "epoch": 0.614179013101506, "grad_norm": 3.4032578468322754, "learning_rate": 1.9843616927931714e-05, "loss": 1.0716, "step": 3762 }, { "epoch": 0.6143422717440105, "grad_norm": 3.4378767013549805, "learning_rate": 1.9843503964762293e-05, "loss": 0.8946, "step": 3763 }, { "epoch": 0.6145055303865148, "grad_norm": 3.273935556411743, "learning_rate": 1.9843390961129945e-05, "loss": 0.9219, "step": 3764 }, { "epoch": 0.6146687890290192, "grad_norm": 3.217672109603882, "learning_rate": 1.9843277917035132e-05, "loss": 0.7845, "step": 3765 }, { "epoch": 0.6148320476715237, "grad_norm": 3.5672714710235596, "learning_rate": 1.984316483247832e-05, "loss": 0.9159, "step": 3766 }, { "epoch": 0.614995306314028, "grad_norm": 2.945770740509033, "learning_rate": 1.9843051707459975e-05, "loss": 0.9053, "step": 3767 }, { "epoch": 0.6151585649565324, "grad_norm": 3.3659780025482178, "learning_rate": 1.984293854198056e-05, "loss": 1.024, "step": 3768 }, { "epoch": 0.6153218235990368, "grad_norm": 3.3075761795043945, "learning_rate": 1.9842825336040543e-05, "loss": 0.8953, "step": 3769 }, { "epoch": 0.6154850822415412, "grad_norm": 2.92923641204834, "learning_rate": 1.9842712089640387e-05, "loss": 0.8975, "step": 3770 }, { "epoch": 0.6156483408840455, "grad_norm": 2.9528141021728516, "learning_rate": 1.984259880278056e-05, "loss": 0.7418, "step": 3771 }, { "epoch": 0.61581159952655, "grad_norm": 2.7017674446105957, "learning_rate": 1.9842485475461522e-05, "loss": 0.7782, "step": 3772 }, { "epoch": 0.6159748581690543, "grad_norm": 2.5515291690826416, "learning_rate": 1.9842372107683748e-05, "loss": 0.7187, "step": 3773 }, { "epoch": 0.6161381168115587, "grad_norm": 2.9594781398773193, "learning_rate": 1.9842258699447695e-05, "loss": 0.9197, "step": 3774 }, { "epoch": 0.616301375454063, "grad_norm": 2.879539966583252, "learning_rate": 1.9842145250753832e-05, "loss": 0.7459, "step": 3775 }, { "epoch": 0.6164646340965675, "grad_norm": 2.833263874053955, "learning_rate": 1.984203176160263e-05, "loss": 0.9838, "step": 3776 }, { "epoch": 0.6166278927390719, "grad_norm": 2.7740910053253174, "learning_rate": 1.9841918231994548e-05, "loss": 0.8803, "step": 3777 }, { "epoch": 0.6167911513815763, "grad_norm": 2.697628974914551, "learning_rate": 1.9841804661930056e-05, "loss": 0.8801, "step": 3778 }, { "epoch": 0.6169544100240807, "grad_norm": 2.6776745319366455, "learning_rate": 1.9841691051409623e-05, "loss": 0.7751, "step": 3779 }, { "epoch": 0.617117668666585, "grad_norm": 3.727506399154663, "learning_rate": 1.9841577400433715e-05, "loss": 1.0258, "step": 3780 }, { "epoch": 0.6172809273090895, "grad_norm": 2.9881863594055176, "learning_rate": 1.9841463709002798e-05, "loss": 0.958, "step": 3781 }, { "epoch": 0.6174441859515938, "grad_norm": 3.058079957962036, "learning_rate": 1.984134997711734e-05, "loss": 0.8906, "step": 3782 }, { "epoch": 0.6176074445940982, "grad_norm": 3.1774256229400635, "learning_rate": 1.984123620477781e-05, "loss": 0.9271, "step": 3783 }, { "epoch": 0.6177707032366025, "grad_norm": 2.9581711292266846, "learning_rate": 1.9841122391984668e-05, "loss": 0.9119, "step": 3784 }, { "epoch": 0.617933961879107, "grad_norm": 3.2644717693328857, "learning_rate": 1.984100853873839e-05, "loss": 0.9056, "step": 3785 }, { "epoch": 0.6180972205216113, "grad_norm": 3.2668614387512207, "learning_rate": 1.9840894645039444e-05, "loss": 0.9883, "step": 3786 }, { "epoch": 0.6182604791641158, "grad_norm": 3.3148531913757324, "learning_rate": 1.9840780710888298e-05, "loss": 0.9084, "step": 3787 }, { "epoch": 0.6184237378066202, "grad_norm": 3.4748706817626953, "learning_rate": 1.9840666736285414e-05, "loss": 0.9378, "step": 3788 }, { "epoch": 0.6185869964491245, "grad_norm": 2.9159154891967773, "learning_rate": 1.9840552721231265e-05, "loss": 0.8551, "step": 3789 }, { "epoch": 0.618750255091629, "grad_norm": 2.6874477863311768, "learning_rate": 1.9840438665726322e-05, "loss": 0.6478, "step": 3790 }, { "epoch": 0.6189135137341333, "grad_norm": 3.398869276046753, "learning_rate": 1.984032456977105e-05, "loss": 1.0557, "step": 3791 }, { "epoch": 0.6190767723766377, "grad_norm": 3.0629220008850098, "learning_rate": 1.984021043336592e-05, "loss": 0.9499, "step": 3792 }, { "epoch": 0.619240031019142, "grad_norm": 3.2377769947052, "learning_rate": 1.9840096256511398e-05, "loss": 0.9491, "step": 3793 }, { "epoch": 0.6194032896616465, "grad_norm": 2.9923477172851562, "learning_rate": 1.983998203920796e-05, "loss": 0.8992, "step": 3794 }, { "epoch": 0.6195665483041508, "grad_norm": 3.2334697246551514, "learning_rate": 1.983986778145607e-05, "loss": 0.8912, "step": 3795 }, { "epoch": 0.6197298069466552, "grad_norm": 2.9234232902526855, "learning_rate": 1.9839753483256197e-05, "loss": 0.9189, "step": 3796 }, { "epoch": 0.6198930655891596, "grad_norm": 3.2058143615722656, "learning_rate": 1.9839639144608815e-05, "loss": 0.9619, "step": 3797 }, { "epoch": 0.620056324231664, "grad_norm": 3.0094048976898193, "learning_rate": 1.983952476551439e-05, "loss": 0.9718, "step": 3798 }, { "epoch": 0.6202195828741685, "grad_norm": 3.0922553539276123, "learning_rate": 1.9839410345973395e-05, "loss": 1.1139, "step": 3799 }, { "epoch": 0.6203828415166728, "grad_norm": 2.7322351932525635, "learning_rate": 1.98392958859863e-05, "loss": 0.6956, "step": 3800 }, { "epoch": 0.6205461001591772, "grad_norm": 3.0480122566223145, "learning_rate": 1.983918138555357e-05, "loss": 0.8333, "step": 3801 }, { "epoch": 0.6207093588016815, "grad_norm": 3.3601105213165283, "learning_rate": 1.9839066844675687e-05, "loss": 0.9132, "step": 3802 }, { "epoch": 0.620872617444186, "grad_norm": 3.1273751258850098, "learning_rate": 1.9838952263353114e-05, "loss": 0.9473, "step": 3803 }, { "epoch": 0.6210358760866903, "grad_norm": 2.993865489959717, "learning_rate": 1.983883764158632e-05, "loss": 0.845, "step": 3804 }, { "epoch": 0.6211991347291947, "grad_norm": 3.5317752361297607, "learning_rate": 1.983872297937578e-05, "loss": 0.8856, "step": 3805 }, { "epoch": 0.6213623933716991, "grad_norm": 3.354746103286743, "learning_rate": 1.9838608276721973e-05, "loss": 1.0155, "step": 3806 }, { "epoch": 0.6215256520142035, "grad_norm": 3.2376914024353027, "learning_rate": 1.9838493533625353e-05, "loss": 0.8494, "step": 3807 }, { "epoch": 0.6216889106567078, "grad_norm": 2.879958152770996, "learning_rate": 1.9838378750086404e-05, "loss": 0.8778, "step": 3808 }, { "epoch": 0.6218521692992123, "grad_norm": 2.741001844406128, "learning_rate": 1.9838263926105595e-05, "loss": 0.8391, "step": 3809 }, { "epoch": 0.6220154279417167, "grad_norm": 3.048452377319336, "learning_rate": 1.98381490616834e-05, "loss": 0.9151, "step": 3810 }, { "epoch": 0.622178686584221, "grad_norm": 2.9529037475585938, "learning_rate": 1.9838034156820285e-05, "loss": 0.9913, "step": 3811 }, { "epoch": 0.6223419452267255, "grad_norm": 3.2189040184020996, "learning_rate": 1.9837919211516728e-05, "loss": 0.947, "step": 3812 }, { "epoch": 0.6225052038692298, "grad_norm": 2.7866456508636475, "learning_rate": 1.9837804225773203e-05, "loss": 0.7845, "step": 3813 }, { "epoch": 0.6226684625117342, "grad_norm": 2.8704864978790283, "learning_rate": 1.9837689199590176e-05, "loss": 0.8526, "step": 3814 }, { "epoch": 0.6228317211542386, "grad_norm": 2.5279479026794434, "learning_rate": 1.9837574132968123e-05, "loss": 0.7086, "step": 3815 }, { "epoch": 0.622994979796743, "grad_norm": 2.812643051147461, "learning_rate": 1.9837459025907518e-05, "loss": 0.9517, "step": 3816 }, { "epoch": 0.6231582384392473, "grad_norm": 2.9009244441986084, "learning_rate": 1.9837343878408835e-05, "loss": 0.8299, "step": 3817 }, { "epoch": 0.6233214970817518, "grad_norm": 2.807159185409546, "learning_rate": 1.9837228690472545e-05, "loss": 0.9099, "step": 3818 }, { "epoch": 0.6234847557242561, "grad_norm": 3.670997381210327, "learning_rate": 1.983711346209912e-05, "loss": 1.1719, "step": 3819 }, { "epoch": 0.6236480143667605, "grad_norm": 3.1611597537994385, "learning_rate": 1.983699819328904e-05, "loss": 0.8254, "step": 3820 }, { "epoch": 0.623811273009265, "grad_norm": 2.9242758750915527, "learning_rate": 1.9836882884042772e-05, "loss": 0.7494, "step": 3821 }, { "epoch": 0.6239745316517693, "grad_norm": 2.838319778442383, "learning_rate": 1.9836767534360793e-05, "loss": 0.8742, "step": 3822 }, { "epoch": 0.6241377902942737, "grad_norm": 3.784038782119751, "learning_rate": 1.9836652144243576e-05, "loss": 1.0221, "step": 3823 }, { "epoch": 0.6243010489367781, "grad_norm": 3.409975051879883, "learning_rate": 1.9836536713691602e-05, "loss": 0.9203, "step": 3824 }, { "epoch": 0.6244643075792825, "grad_norm": 2.796003818511963, "learning_rate": 1.9836421242705334e-05, "loss": 0.7181, "step": 3825 }, { "epoch": 0.6246275662217868, "grad_norm": 3.2177505493164062, "learning_rate": 1.9836305731285253e-05, "loss": 0.8587, "step": 3826 }, { "epoch": 0.6247908248642913, "grad_norm": 3.4076766967773438, "learning_rate": 1.9836190179431835e-05, "loss": 0.9736, "step": 3827 }, { "epoch": 0.6249540835067956, "grad_norm": 2.6539466381073, "learning_rate": 1.9836074587145557e-05, "loss": 0.8278, "step": 3828 }, { "epoch": 0.6251173421493, "grad_norm": 2.9572391510009766, "learning_rate": 1.9835958954426885e-05, "loss": 0.7421, "step": 3829 }, { "epoch": 0.6252806007918044, "grad_norm": 2.457771062850952, "learning_rate": 1.98358432812763e-05, "loss": 0.5781, "step": 3830 }, { "epoch": 0.6254438594343088, "grad_norm": 3.3561456203460693, "learning_rate": 1.983572756769428e-05, "loss": 0.8414, "step": 3831 }, { "epoch": 0.6256071180768132, "grad_norm": 3.135972738265991, "learning_rate": 1.9835611813681293e-05, "loss": 0.9282, "step": 3832 }, { "epoch": 0.6257703767193176, "grad_norm": 3.220388174057007, "learning_rate": 1.983549601923782e-05, "loss": 0.7642, "step": 3833 }, { "epoch": 0.625933635361822, "grad_norm": 3.199568033218384, "learning_rate": 1.9835380184364343e-05, "loss": 0.9067, "step": 3834 }, { "epoch": 0.6260968940043263, "grad_norm": 3.1623897552490234, "learning_rate": 1.9835264309061327e-05, "loss": 0.7601, "step": 3835 }, { "epoch": 0.6262601526468308, "grad_norm": 2.6785616874694824, "learning_rate": 1.9835148393329257e-05, "loss": 0.7359, "step": 3836 }, { "epoch": 0.6264234112893351, "grad_norm": 3.0857255458831787, "learning_rate": 1.9835032437168598e-05, "loss": 0.8884, "step": 3837 }, { "epoch": 0.6265866699318395, "grad_norm": 3.241243362426758, "learning_rate": 1.983491644057984e-05, "loss": 0.9389, "step": 3838 }, { "epoch": 0.6267499285743439, "grad_norm": 3.3409836292266846, "learning_rate": 1.9834800403563456e-05, "loss": 1.0197, "step": 3839 }, { "epoch": 0.6269131872168483, "grad_norm": 3.309462070465088, "learning_rate": 1.9834684326119917e-05, "loss": 1.02, "step": 3840 }, { "epoch": 0.6270764458593526, "grad_norm": 2.8821375370025635, "learning_rate": 1.983456820824971e-05, "loss": 0.8495, "step": 3841 }, { "epoch": 0.6272397045018571, "grad_norm": 2.9730191230773926, "learning_rate": 1.98344520499533e-05, "loss": 0.7735, "step": 3842 }, { "epoch": 0.6274029631443615, "grad_norm": 3.564887523651123, "learning_rate": 1.983433585123117e-05, "loss": 0.9977, "step": 3843 }, { "epoch": 0.6275662217868658, "grad_norm": 2.8028457164764404, "learning_rate": 1.9834219612083805e-05, "loss": 0.8482, "step": 3844 }, { "epoch": 0.6277294804293703, "grad_norm": 2.9970362186431885, "learning_rate": 1.9834103332511672e-05, "loss": 0.8923, "step": 3845 }, { "epoch": 0.6278927390718746, "grad_norm": 3.2789864540100098, "learning_rate": 1.9833987012515255e-05, "loss": 0.9456, "step": 3846 }, { "epoch": 0.628055997714379, "grad_norm": 2.9285197257995605, "learning_rate": 1.983387065209503e-05, "loss": 0.8656, "step": 3847 }, { "epoch": 0.6282192563568834, "grad_norm": 2.9215736389160156, "learning_rate": 1.9833754251251477e-05, "loss": 0.7489, "step": 3848 }, { "epoch": 0.6283825149993878, "grad_norm": 2.9527697563171387, "learning_rate": 1.9833637809985075e-05, "loss": 0.9509, "step": 3849 }, { "epoch": 0.6285457736418921, "grad_norm": 3.119387626647949, "learning_rate": 1.9833521328296296e-05, "loss": 0.9033, "step": 3850 }, { "epoch": 0.6287090322843966, "grad_norm": 3.338845729827881, "learning_rate": 1.983340480618563e-05, "loss": 0.9884, "step": 3851 }, { "epoch": 0.6288722909269009, "grad_norm": 2.886787176132202, "learning_rate": 1.9833288243653546e-05, "loss": 0.8751, "step": 3852 }, { "epoch": 0.6290355495694053, "grad_norm": 3.40315318107605, "learning_rate": 1.9833171640700528e-05, "loss": 1.0158, "step": 3853 }, { "epoch": 0.6291988082119098, "grad_norm": 3.357557535171509, "learning_rate": 1.9833054997327054e-05, "loss": 1.0032, "step": 3854 }, { "epoch": 0.6293620668544141, "grad_norm": 2.8196332454681396, "learning_rate": 1.9832938313533604e-05, "loss": 0.8155, "step": 3855 }, { "epoch": 0.6295253254969185, "grad_norm": 2.5038952827453613, "learning_rate": 1.9832821589320657e-05, "loss": 0.6976, "step": 3856 }, { "epoch": 0.6296885841394229, "grad_norm": 3.100142240524292, "learning_rate": 1.9832704824688694e-05, "loss": 0.9597, "step": 3857 }, { "epoch": 0.6298518427819273, "grad_norm": 3.019361734390259, "learning_rate": 1.9832588019638196e-05, "loss": 0.884, "step": 3858 }, { "epoch": 0.6300151014244316, "grad_norm": 3.234314203262329, "learning_rate": 1.983247117416964e-05, "loss": 1.0447, "step": 3859 }, { "epoch": 0.6301783600669361, "grad_norm": 3.184634208679199, "learning_rate": 1.9832354288283504e-05, "loss": 0.8298, "step": 3860 }, { "epoch": 0.6303416187094404, "grad_norm": 3.1366820335388184, "learning_rate": 1.9832237361980276e-05, "loss": 0.7857, "step": 3861 }, { "epoch": 0.6305048773519448, "grad_norm": 2.8065035343170166, "learning_rate": 1.983212039526043e-05, "loss": 0.7093, "step": 3862 }, { "epoch": 0.6306681359944492, "grad_norm": 3.2713935375213623, "learning_rate": 1.9832003388124453e-05, "loss": 0.9256, "step": 3863 }, { "epoch": 0.6308313946369536, "grad_norm": 2.986154317855835, "learning_rate": 1.9831886340572823e-05, "loss": 0.8969, "step": 3864 }, { "epoch": 0.630994653279458, "grad_norm": 3.100717782974243, "learning_rate": 1.983176925260602e-05, "loss": 0.8511, "step": 3865 }, { "epoch": 0.6311579119219624, "grad_norm": 2.9746975898742676, "learning_rate": 1.9831652124224524e-05, "loss": 0.7046, "step": 3866 }, { "epoch": 0.6313211705644668, "grad_norm": 2.972072124481201, "learning_rate": 1.983153495542882e-05, "loss": 0.8238, "step": 3867 }, { "epoch": 0.6314844292069711, "grad_norm": 3.325896978378296, "learning_rate": 1.9831417746219385e-05, "loss": 0.9494, "step": 3868 }, { "epoch": 0.6316476878494756, "grad_norm": 2.8736045360565186, "learning_rate": 1.983130049659671e-05, "loss": 0.8561, "step": 3869 }, { "epoch": 0.6318109464919799, "grad_norm": 2.585280656814575, "learning_rate": 1.9831183206561264e-05, "loss": 0.7357, "step": 3870 }, { "epoch": 0.6319742051344843, "grad_norm": 2.839336395263672, "learning_rate": 1.983106587611354e-05, "loss": 0.8623, "step": 3871 }, { "epoch": 0.6321374637769887, "grad_norm": 3.0647354125976562, "learning_rate": 1.9830948505254016e-05, "loss": 0.9088, "step": 3872 }, { "epoch": 0.6323007224194931, "grad_norm": 3.410295248031616, "learning_rate": 1.9830831093983172e-05, "loss": 0.8572, "step": 3873 }, { "epoch": 0.6324639810619974, "grad_norm": 3.320204019546509, "learning_rate": 1.98307136423015e-05, "loss": 0.8453, "step": 3874 }, { "epoch": 0.6326272397045019, "grad_norm": 3.6078202724456787, "learning_rate": 1.983059615020947e-05, "loss": 0.8849, "step": 3875 }, { "epoch": 0.6327904983470063, "grad_norm": 3.07131028175354, "learning_rate": 1.9830478617707575e-05, "loss": 0.9391, "step": 3876 }, { "epoch": 0.6329537569895106, "grad_norm": 2.8718466758728027, "learning_rate": 1.9830361044796292e-05, "loss": 0.7542, "step": 3877 }, { "epoch": 0.6331170156320151, "grad_norm": 3.2709529399871826, "learning_rate": 1.983024343147611e-05, "loss": 0.9189, "step": 3878 }, { "epoch": 0.6332802742745194, "grad_norm": 2.6670820713043213, "learning_rate": 1.9830125777747506e-05, "loss": 0.6857, "step": 3879 }, { "epoch": 0.6334435329170238, "grad_norm": 3.233816385269165, "learning_rate": 1.9830008083610965e-05, "loss": 0.984, "step": 3880 }, { "epoch": 0.6336067915595281, "grad_norm": 3.027925968170166, "learning_rate": 1.9829890349066974e-05, "loss": 0.9731, "step": 3881 }, { "epoch": 0.6337700502020326, "grad_norm": 3.351177453994751, "learning_rate": 1.982977257411602e-05, "loss": 0.7542, "step": 3882 }, { "epoch": 0.6339333088445369, "grad_norm": 2.9005753993988037, "learning_rate": 1.9829654758758577e-05, "loss": 0.861, "step": 3883 }, { "epoch": 0.6340965674870414, "grad_norm": 2.943369150161743, "learning_rate": 1.9829536902995136e-05, "loss": 0.8329, "step": 3884 }, { "epoch": 0.6342598261295457, "grad_norm": 3.292489767074585, "learning_rate": 1.982941900682618e-05, "loss": 0.8917, "step": 3885 }, { "epoch": 0.6344230847720501, "grad_norm": 3.1572487354278564, "learning_rate": 1.9829301070252197e-05, "loss": 1.006, "step": 3886 }, { "epoch": 0.6345863434145546, "grad_norm": 2.3107898235321045, "learning_rate": 1.9829183093273666e-05, "loss": 0.6539, "step": 3887 }, { "epoch": 0.6347496020570589, "grad_norm": 2.7495884895324707, "learning_rate": 1.9829065075891073e-05, "loss": 0.8038, "step": 3888 }, { "epoch": 0.6349128606995633, "grad_norm": 2.7644927501678467, "learning_rate": 1.982894701810491e-05, "loss": 0.7306, "step": 3889 }, { "epoch": 0.6350761193420676, "grad_norm": 3.0196330547332764, "learning_rate": 1.9828828919915652e-05, "loss": 0.8932, "step": 3890 }, { "epoch": 0.6352393779845721, "grad_norm": 3.701772451400757, "learning_rate": 1.9828710781323793e-05, "loss": 1.0608, "step": 3891 }, { "epoch": 0.6354026366270764, "grad_norm": 3.0952324867248535, "learning_rate": 1.9828592602329815e-05, "loss": 0.7939, "step": 3892 }, { "epoch": 0.6355658952695808, "grad_norm": 2.999957323074341, "learning_rate": 1.9828474382934204e-05, "loss": 0.9708, "step": 3893 }, { "epoch": 0.6357291539120852, "grad_norm": 3.4278578758239746, "learning_rate": 1.9828356123137443e-05, "loss": 1.0451, "step": 3894 }, { "epoch": 0.6358924125545896, "grad_norm": 2.9002537727355957, "learning_rate": 1.9828237822940026e-05, "loss": 0.8923, "step": 3895 }, { "epoch": 0.6360556711970939, "grad_norm": 2.880476951599121, "learning_rate": 1.982811948234243e-05, "loss": 0.931, "step": 3896 }, { "epoch": 0.6362189298395984, "grad_norm": 3.1926968097686768, "learning_rate": 1.9828001101345148e-05, "loss": 0.8382, "step": 3897 }, { "epoch": 0.6363821884821028, "grad_norm": 3.37007212638855, "learning_rate": 1.9827882679948666e-05, "loss": 0.9564, "step": 3898 }, { "epoch": 0.6365454471246071, "grad_norm": 3.965630292892456, "learning_rate": 1.9827764218153466e-05, "loss": 1.239, "step": 3899 }, { "epoch": 0.6367087057671116, "grad_norm": 3.383882522583008, "learning_rate": 1.982764571596004e-05, "loss": 0.9038, "step": 3900 }, { "epoch": 0.6368719644096159, "grad_norm": 2.851203680038452, "learning_rate": 1.9827527173368874e-05, "loss": 0.7362, "step": 3901 }, { "epoch": 0.6370352230521203, "grad_norm": 2.9328393936157227, "learning_rate": 1.982740859038045e-05, "loss": 0.9123, "step": 3902 }, { "epoch": 0.6371984816946247, "grad_norm": 3.454108238220215, "learning_rate": 1.9827289966995268e-05, "loss": 1.0951, "step": 3903 }, { "epoch": 0.6373617403371291, "grad_norm": 3.454514741897583, "learning_rate": 1.98271713032138e-05, "loss": 0.8227, "step": 3904 }, { "epoch": 0.6375249989796334, "grad_norm": 3.287677764892578, "learning_rate": 1.9827052599036548e-05, "loss": 0.8936, "step": 3905 }, { "epoch": 0.6376882576221379, "grad_norm": 2.7518787384033203, "learning_rate": 1.982693385446399e-05, "loss": 0.8356, "step": 3906 }, { "epoch": 0.6378515162646422, "grad_norm": 3.14766788482666, "learning_rate": 1.9826815069496616e-05, "loss": 0.9832, "step": 3907 }, { "epoch": 0.6380147749071466, "grad_norm": 2.98791241645813, "learning_rate": 1.9826696244134916e-05, "loss": 0.9608, "step": 3908 }, { "epoch": 0.6381780335496511, "grad_norm": 3.2073960304260254, "learning_rate": 1.982657737837938e-05, "loss": 0.9393, "step": 3909 }, { "epoch": 0.6383412921921554, "grad_norm": 3.1788599491119385, "learning_rate": 1.9826458472230496e-05, "loss": 0.8614, "step": 3910 }, { "epoch": 0.6385045508346598, "grad_norm": 2.646660089492798, "learning_rate": 1.982633952568875e-05, "loss": 0.8949, "step": 3911 }, { "epoch": 0.6386678094771642, "grad_norm": 2.8928704261779785, "learning_rate": 1.9826220538754633e-05, "loss": 0.8781, "step": 3912 }, { "epoch": 0.6388310681196686, "grad_norm": 3.4865105152130127, "learning_rate": 1.9826101511428633e-05, "loss": 0.9922, "step": 3913 }, { "epoch": 0.6389943267621729, "grad_norm": 3.0138728618621826, "learning_rate": 1.9825982443711244e-05, "loss": 0.9922, "step": 3914 }, { "epoch": 0.6391575854046774, "grad_norm": 2.996791362762451, "learning_rate": 1.9825863335602946e-05, "loss": 1.0309, "step": 3915 }, { "epoch": 0.6393208440471817, "grad_norm": 2.8468916416168213, "learning_rate": 1.9825744187104234e-05, "loss": 0.8824, "step": 3916 }, { "epoch": 0.6394841026896861, "grad_norm": 3.4796483516693115, "learning_rate": 1.98256249982156e-05, "loss": 0.8898, "step": 3917 }, { "epoch": 0.6396473613321906, "grad_norm": 3.202174186706543, "learning_rate": 1.982550576893753e-05, "loss": 0.9284, "step": 3918 }, { "epoch": 0.6398106199746949, "grad_norm": 3.100980281829834, "learning_rate": 1.982538649927052e-05, "loss": 0.9471, "step": 3919 }, { "epoch": 0.6399738786171993, "grad_norm": 2.602989673614502, "learning_rate": 1.9825267189215052e-05, "loss": 0.8282, "step": 3920 }, { "epoch": 0.6401371372597037, "grad_norm": 3.026740312576294, "learning_rate": 1.982514783877162e-05, "loss": 0.8781, "step": 3921 }, { "epoch": 0.6403003959022081, "grad_norm": 3.0946731567382812, "learning_rate": 1.982502844794072e-05, "loss": 0.9669, "step": 3922 }, { "epoch": 0.6404636545447124, "grad_norm": 2.889418840408325, "learning_rate": 1.9824909016722834e-05, "loss": 0.9063, "step": 3923 }, { "epoch": 0.6406269131872169, "grad_norm": 2.9146475791931152, "learning_rate": 1.982478954511846e-05, "loss": 1.0018, "step": 3924 }, { "epoch": 0.6407901718297212, "grad_norm": 2.5161666870117188, "learning_rate": 1.9824670033128083e-05, "loss": 0.7745, "step": 3925 }, { "epoch": 0.6409534304722256, "grad_norm": 3.216055393218994, "learning_rate": 1.9824550480752197e-05, "loss": 1.054, "step": 3926 }, { "epoch": 0.64111668911473, "grad_norm": 3.124485969543457, "learning_rate": 1.982443088799129e-05, "loss": 0.9772, "step": 3927 }, { "epoch": 0.6412799477572344, "grad_norm": 2.363288402557373, "learning_rate": 1.9824311254845864e-05, "loss": 0.6501, "step": 3928 }, { "epoch": 0.6414432063997388, "grad_norm": 3.1659963130950928, "learning_rate": 1.9824191581316402e-05, "loss": 0.9255, "step": 3929 }, { "epoch": 0.6416064650422432, "grad_norm": 3.5145437717437744, "learning_rate": 1.9824071867403398e-05, "loss": 0.8862, "step": 3930 }, { "epoch": 0.6417697236847476, "grad_norm": 2.9606711864471436, "learning_rate": 1.9823952113107342e-05, "loss": 0.7976, "step": 3931 }, { "epoch": 0.6419329823272519, "grad_norm": 3.4096179008483887, "learning_rate": 1.9823832318428734e-05, "loss": 0.8516, "step": 3932 }, { "epoch": 0.6420962409697564, "grad_norm": 3.2840993404388428, "learning_rate": 1.9823712483368056e-05, "loss": 0.8757, "step": 3933 }, { "epoch": 0.6422594996122607, "grad_norm": 3.116621971130371, "learning_rate": 1.9823592607925805e-05, "loss": 0.9038, "step": 3934 }, { "epoch": 0.6424227582547651, "grad_norm": 2.9589264392852783, "learning_rate": 1.9823472692102477e-05, "loss": 0.7894, "step": 3935 }, { "epoch": 0.6425860168972695, "grad_norm": 2.925694227218628, "learning_rate": 1.982335273589856e-05, "loss": 0.9367, "step": 3936 }, { "epoch": 0.6427492755397739, "grad_norm": 2.855741024017334, "learning_rate": 1.9823232739314552e-05, "loss": 0.9434, "step": 3937 }, { "epoch": 0.6429125341822782, "grad_norm": 2.401609420776367, "learning_rate": 1.9823112702350937e-05, "loss": 0.7262, "step": 3938 }, { "epoch": 0.6430757928247827, "grad_norm": 2.9616520404815674, "learning_rate": 1.982299262500822e-05, "loss": 0.8676, "step": 3939 }, { "epoch": 0.6432390514672871, "grad_norm": 3.346693992614746, "learning_rate": 1.982287250728689e-05, "loss": 0.9388, "step": 3940 }, { "epoch": 0.6434023101097914, "grad_norm": 3.3192551136016846, "learning_rate": 1.9822752349187437e-05, "loss": 0.9277, "step": 3941 }, { "epoch": 0.6435655687522959, "grad_norm": 2.9450347423553467, "learning_rate": 1.982263215071036e-05, "loss": 0.898, "step": 3942 }, { "epoch": 0.6437288273948002, "grad_norm": 3.271937608718872, "learning_rate": 1.982251191185615e-05, "loss": 0.8424, "step": 3943 }, { "epoch": 0.6438920860373046, "grad_norm": 3.2083687782287598, "learning_rate": 1.98223916326253e-05, "loss": 0.8935, "step": 3944 }, { "epoch": 0.644055344679809, "grad_norm": 3.0567257404327393, "learning_rate": 1.9822271313018314e-05, "loss": 0.8559, "step": 3945 }, { "epoch": 0.6442186033223134, "grad_norm": 2.8480401039123535, "learning_rate": 1.9822150953035676e-05, "loss": 0.8655, "step": 3946 }, { "epoch": 0.6443818619648177, "grad_norm": 2.9988152980804443, "learning_rate": 1.982203055267788e-05, "loss": 0.8492, "step": 3947 }, { "epoch": 0.6445451206073222, "grad_norm": 3.339154005050659, "learning_rate": 1.982191011194543e-05, "loss": 0.875, "step": 3948 }, { "epoch": 0.6447083792498265, "grad_norm": 3.201810836791992, "learning_rate": 1.9821789630838816e-05, "loss": 0.9237, "step": 3949 }, { "epoch": 0.6448716378923309, "grad_norm": 3.2240066528320312, "learning_rate": 1.9821669109358532e-05, "loss": 0.9235, "step": 3950 }, { "epoch": 0.6450348965348354, "grad_norm": 2.6274986267089844, "learning_rate": 1.9821548547505076e-05, "loss": 0.9515, "step": 3951 }, { "epoch": 0.6451981551773397, "grad_norm": 2.650967836380005, "learning_rate": 1.9821427945278944e-05, "loss": 0.6932, "step": 3952 }, { "epoch": 0.6453614138198441, "grad_norm": 3.0735394954681396, "learning_rate": 1.9821307302680626e-05, "loss": 1.0283, "step": 3953 }, { "epoch": 0.6455246724623485, "grad_norm": 3.1400575637817383, "learning_rate": 1.9821186619710623e-05, "loss": 1.0008, "step": 3954 }, { "epoch": 0.6456879311048529, "grad_norm": 2.926954507827759, "learning_rate": 1.9821065896369435e-05, "loss": 0.8259, "step": 3955 }, { "epoch": 0.6458511897473572, "grad_norm": 2.6525211334228516, "learning_rate": 1.982094513265755e-05, "loss": 0.8899, "step": 3956 }, { "epoch": 0.6460144483898617, "grad_norm": 2.973858118057251, "learning_rate": 1.9820824328575465e-05, "loss": 0.7507, "step": 3957 }, { "epoch": 0.646177707032366, "grad_norm": 3.755634307861328, "learning_rate": 1.9820703484123683e-05, "loss": 1.091, "step": 3958 }, { "epoch": 0.6463409656748704, "grad_norm": 2.6258273124694824, "learning_rate": 1.9820582599302697e-05, "loss": 0.7739, "step": 3959 }, { "epoch": 0.6465042243173748, "grad_norm": 2.790565252304077, "learning_rate": 1.9820461674113002e-05, "loss": 0.7763, "step": 3960 }, { "epoch": 0.6466674829598792, "grad_norm": 3.077523946762085, "learning_rate": 1.9820340708555102e-05, "loss": 0.8443, "step": 3961 }, { "epoch": 0.6468307416023836, "grad_norm": 3.793990135192871, "learning_rate": 1.9820219702629485e-05, "loss": 0.994, "step": 3962 }, { "epoch": 0.646994000244888, "grad_norm": 3.5199737548828125, "learning_rate": 1.9820098656336654e-05, "loss": 1.0798, "step": 3963 }, { "epoch": 0.6471572588873924, "grad_norm": 3.1504733562469482, "learning_rate": 1.9819977569677105e-05, "loss": 0.8482, "step": 3964 }, { "epoch": 0.6473205175298967, "grad_norm": 2.92995548248291, "learning_rate": 1.9819856442651337e-05, "loss": 0.761, "step": 3965 }, { "epoch": 0.6474837761724012, "grad_norm": 3.3386077880859375, "learning_rate": 1.9819735275259848e-05, "loss": 0.8807, "step": 3966 }, { "epoch": 0.6476470348149055, "grad_norm": 2.787383556365967, "learning_rate": 1.9819614067503133e-05, "loss": 0.8389, "step": 3967 }, { "epoch": 0.6478102934574099, "grad_norm": 3.527400016784668, "learning_rate": 1.9819492819381693e-05, "loss": 1.0233, "step": 3968 }, { "epoch": 0.6479735520999143, "grad_norm": 2.8187098503112793, "learning_rate": 1.981937153089603e-05, "loss": 0.7233, "step": 3969 }, { "epoch": 0.6481368107424187, "grad_norm": 3.4902541637420654, "learning_rate": 1.981925020204663e-05, "loss": 0.8528, "step": 3970 }, { "epoch": 0.648300069384923, "grad_norm": 2.9057812690734863, "learning_rate": 1.9819128832834007e-05, "loss": 0.818, "step": 3971 }, { "epoch": 0.6484633280274275, "grad_norm": 3.0120043754577637, "learning_rate": 1.981900742325865e-05, "loss": 0.9084, "step": 3972 }, { "epoch": 0.6486265866699319, "grad_norm": 3.3041059970855713, "learning_rate": 1.981888597332106e-05, "loss": 1.0567, "step": 3973 }, { "epoch": 0.6487898453124362, "grad_norm": 2.8662941455841064, "learning_rate": 1.981876448302174e-05, "loss": 0.854, "step": 3974 }, { "epoch": 0.6489531039549407, "grad_norm": 2.7356514930725098, "learning_rate": 1.9818642952361188e-05, "loss": 0.8869, "step": 3975 }, { "epoch": 0.649116362597445, "grad_norm": 3.1793999671936035, "learning_rate": 1.9818521381339895e-05, "loss": 0.9793, "step": 3976 }, { "epoch": 0.6492796212399494, "grad_norm": 3.1392931938171387, "learning_rate": 1.9818399769958377e-05, "loss": 0.8514, "step": 3977 }, { "epoch": 0.6494428798824537, "grad_norm": 2.613595724105835, "learning_rate": 1.981827811821712e-05, "loss": 0.68, "step": 3978 }, { "epoch": 0.6496061385249582, "grad_norm": 2.692629337310791, "learning_rate": 1.9818156426116627e-05, "loss": 0.9152, "step": 3979 }, { "epoch": 0.6497693971674625, "grad_norm": 2.990654945373535, "learning_rate": 1.9818034693657404e-05, "loss": 0.9684, "step": 3980 }, { "epoch": 0.649932655809967, "grad_norm": 2.5670242309570312, "learning_rate": 1.9817912920839947e-05, "loss": 0.8303, "step": 3981 }, { "epoch": 0.6500959144524713, "grad_norm": 2.995119333267212, "learning_rate": 1.9817791107664756e-05, "loss": 0.7984, "step": 3982 }, { "epoch": 0.6502591730949757, "grad_norm": 3.257441520690918, "learning_rate": 1.9817669254132338e-05, "loss": 1.1107, "step": 3983 }, { "epoch": 0.6504224317374802, "grad_norm": 2.84625506401062, "learning_rate": 1.981754736024318e-05, "loss": 0.8486, "step": 3984 }, { "epoch": 0.6505856903799845, "grad_norm": 2.830303430557251, "learning_rate": 1.9817425425997802e-05, "loss": 0.7168, "step": 3985 }, { "epoch": 0.6507489490224889, "grad_norm": 2.664661169052124, "learning_rate": 1.9817303451396687e-05, "loss": 1.0347, "step": 3986 }, { "epoch": 0.6509122076649932, "grad_norm": 2.6115400791168213, "learning_rate": 1.981718143644035e-05, "loss": 0.7363, "step": 3987 }, { "epoch": 0.6510754663074977, "grad_norm": 2.8587021827697754, "learning_rate": 1.9817059381129283e-05, "loss": 0.934, "step": 3988 }, { "epoch": 0.651238724950002, "grad_norm": 2.8286874294281006, "learning_rate": 1.9816937285463992e-05, "loss": 0.8177, "step": 3989 }, { "epoch": 0.6514019835925065, "grad_norm": 2.928020715713501, "learning_rate": 1.9816815149444978e-05, "loss": 0.7876, "step": 3990 }, { "epoch": 0.6515652422350108, "grad_norm": 2.61568021774292, "learning_rate": 1.9816692973072745e-05, "loss": 0.7321, "step": 3991 }, { "epoch": 0.6517285008775152, "grad_norm": 2.8929243087768555, "learning_rate": 1.981657075634779e-05, "loss": 0.7863, "step": 3992 }, { "epoch": 0.6518917595200195, "grad_norm": 3.100496292114258, "learning_rate": 1.9816448499270625e-05, "loss": 0.9739, "step": 3993 }, { "epoch": 0.652055018162524, "grad_norm": 2.9888339042663574, "learning_rate": 1.9816326201841747e-05, "loss": 0.9411, "step": 3994 }, { "epoch": 0.6522182768050284, "grad_norm": 3.258553981781006, "learning_rate": 1.9816203864061657e-05, "loss": 0.8562, "step": 3995 }, { "epoch": 0.6523815354475327, "grad_norm": 2.925974130630493, "learning_rate": 1.981608148593086e-05, "loss": 0.8438, "step": 3996 }, { "epoch": 0.6525447940900372, "grad_norm": 2.9333739280700684, "learning_rate": 1.9815959067449856e-05, "loss": 0.9532, "step": 3997 }, { "epoch": 0.6527080527325415, "grad_norm": 3.005046844482422, "learning_rate": 1.9815836608619152e-05, "loss": 1.0025, "step": 3998 }, { "epoch": 0.652871311375046, "grad_norm": 3.077254056930542, "learning_rate": 1.981571410943925e-05, "loss": 0.8336, "step": 3999 }, { "epoch": 0.6530345700175503, "grad_norm": 2.7989118099212646, "learning_rate": 1.9815591569910654e-05, "loss": 0.6927, "step": 4000 }, { "epoch": 0.6531978286600547, "grad_norm": 2.8571555614471436, "learning_rate": 1.9815468990033867e-05, "loss": 0.7513, "step": 4001 }, { "epoch": 0.653361087302559, "grad_norm": 2.943603515625, "learning_rate": 1.9815346369809397e-05, "loss": 0.6888, "step": 4002 }, { "epoch": 0.6535243459450635, "grad_norm": 2.9785845279693604, "learning_rate": 1.9815223709237742e-05, "loss": 0.9586, "step": 4003 }, { "epoch": 0.6536876045875678, "grad_norm": 2.8775529861450195, "learning_rate": 1.9815101008319404e-05, "loss": 0.845, "step": 4004 }, { "epoch": 0.6538508632300722, "grad_norm": 2.85452938079834, "learning_rate": 1.9814978267054895e-05, "loss": 0.8751, "step": 4005 }, { "epoch": 0.6540141218725767, "grad_norm": 3.060373067855835, "learning_rate": 1.981485548544472e-05, "loss": 0.9767, "step": 4006 }, { "epoch": 0.654177380515081, "grad_norm": 3.340667963027954, "learning_rate": 1.9814732663489374e-05, "loss": 1.0651, "step": 4007 }, { "epoch": 0.6543406391575854, "grad_norm": 3.264380693435669, "learning_rate": 1.9814609801189372e-05, "loss": 0.9762, "step": 4008 }, { "epoch": 0.6545038978000898, "grad_norm": 2.990487813949585, "learning_rate": 1.9814486898545212e-05, "loss": 0.9625, "step": 4009 }, { "epoch": 0.6546671564425942, "grad_norm": 2.5353524684906006, "learning_rate": 1.9814363955557404e-05, "loss": 0.7823, "step": 4010 }, { "epoch": 0.6548304150850985, "grad_norm": 3.027418613433838, "learning_rate": 1.9814240972226453e-05, "loss": 0.8894, "step": 4011 }, { "epoch": 0.654993673727603, "grad_norm": 2.6382718086242676, "learning_rate": 1.9814117948552862e-05, "loss": 0.6882, "step": 4012 }, { "epoch": 0.6551569323701073, "grad_norm": 3.633514642715454, "learning_rate": 1.9813994884537137e-05, "loss": 1.0273, "step": 4013 }, { "epoch": 0.6553201910126117, "grad_norm": 3.159424304962158, "learning_rate": 1.9813871780179785e-05, "loss": 0.9396, "step": 4014 }, { "epoch": 0.6554834496551161, "grad_norm": 2.6206324100494385, "learning_rate": 1.9813748635481313e-05, "loss": 0.7318, "step": 4015 }, { "epoch": 0.6556467082976205, "grad_norm": 3.416536569595337, "learning_rate": 1.9813625450442226e-05, "loss": 0.7514, "step": 4016 }, { "epoch": 0.6558099669401249, "grad_norm": 2.9554824829101562, "learning_rate": 1.9813502225063032e-05, "loss": 0.9547, "step": 4017 }, { "epoch": 0.6559732255826293, "grad_norm": 2.8751349449157715, "learning_rate": 1.9813378959344233e-05, "loss": 0.9536, "step": 4018 }, { "epoch": 0.6561364842251337, "grad_norm": 3.0870559215545654, "learning_rate": 1.981325565328634e-05, "loss": 0.8115, "step": 4019 }, { "epoch": 0.656299742867638, "grad_norm": 3.0044288635253906, "learning_rate": 1.9813132306889855e-05, "loss": 0.8381, "step": 4020 }, { "epoch": 0.6564630015101425, "grad_norm": 3.0265588760375977, "learning_rate": 1.9813008920155293e-05, "loss": 0.8812, "step": 4021 }, { "epoch": 0.6566262601526468, "grad_norm": 2.7848291397094727, "learning_rate": 1.981288549308315e-05, "loss": 0.7698, "step": 4022 }, { "epoch": 0.6567895187951512, "grad_norm": 2.562717914581299, "learning_rate": 1.981276202567395e-05, "loss": 0.7394, "step": 4023 }, { "epoch": 0.6569527774376556, "grad_norm": 2.9006168842315674, "learning_rate": 1.9812638517928183e-05, "loss": 0.7497, "step": 4024 }, { "epoch": 0.65711603608016, "grad_norm": 3.147186040878296, "learning_rate": 1.9812514969846367e-05, "loss": 0.832, "step": 4025 }, { "epoch": 0.6572792947226643, "grad_norm": 3.384608507156372, "learning_rate": 1.9812391381429005e-05, "loss": 0.7903, "step": 4026 }, { "epoch": 0.6574425533651688, "grad_norm": 3.1694276332855225, "learning_rate": 1.9812267752676607e-05, "loss": 0.8436, "step": 4027 }, { "epoch": 0.6576058120076732, "grad_norm": 2.951796770095825, "learning_rate": 1.9812144083589687e-05, "loss": 0.8203, "step": 4028 }, { "epoch": 0.6577690706501775, "grad_norm": 3.1654722690582275, "learning_rate": 1.981202037416874e-05, "loss": 0.8708, "step": 4029 }, { "epoch": 0.657932329292682, "grad_norm": 2.9213249683380127, "learning_rate": 1.9811896624414286e-05, "loss": 0.8742, "step": 4030 }, { "epoch": 0.6580955879351863, "grad_norm": 2.7171525955200195, "learning_rate": 1.981177283432683e-05, "loss": 0.8312, "step": 4031 }, { "epoch": 0.6582588465776907, "grad_norm": 2.9041333198547363, "learning_rate": 1.981164900390688e-05, "loss": 0.7799, "step": 4032 }, { "epoch": 0.6584221052201951, "grad_norm": 3.1968955993652344, "learning_rate": 1.9811525133154948e-05, "loss": 0.8592, "step": 4033 }, { "epoch": 0.6585853638626995, "grad_norm": 2.9666073322296143, "learning_rate": 1.9811401222071538e-05, "loss": 0.8098, "step": 4034 }, { "epoch": 0.6587486225052038, "grad_norm": 3.024256944656372, "learning_rate": 1.9811277270657166e-05, "loss": 0.9063, "step": 4035 }, { "epoch": 0.6589118811477083, "grad_norm": 2.9568581581115723, "learning_rate": 1.9811153278912333e-05, "loss": 0.8542, "step": 4036 }, { "epoch": 0.6590751397902126, "grad_norm": 3.119633197784424, "learning_rate": 1.9811029246837557e-05, "loss": 0.9744, "step": 4037 }, { "epoch": 0.659238398432717, "grad_norm": 2.6365418434143066, "learning_rate": 1.981090517443334e-05, "loss": 0.698, "step": 4038 }, { "epoch": 0.6594016570752215, "grad_norm": 2.905374050140381, "learning_rate": 1.9810781061700202e-05, "loss": 0.9354, "step": 4039 }, { "epoch": 0.6595649157177258, "grad_norm": 3.323296308517456, "learning_rate": 1.9810656908638646e-05, "loss": 0.9786, "step": 4040 }, { "epoch": 0.6597281743602302, "grad_norm": 2.883216142654419, "learning_rate": 1.9810532715249183e-05, "loss": 0.8734, "step": 4041 }, { "epoch": 0.6598914330027346, "grad_norm": 2.9758408069610596, "learning_rate": 1.9810408481532323e-05, "loss": 0.9168, "step": 4042 }, { "epoch": 0.660054691645239, "grad_norm": 2.5139737129211426, "learning_rate": 1.981028420748858e-05, "loss": 0.6766, "step": 4043 }, { "epoch": 0.6602179502877433, "grad_norm": 2.641523599624634, "learning_rate": 1.981015989311846e-05, "loss": 0.8158, "step": 4044 }, { "epoch": 0.6603812089302478, "grad_norm": 3.2282235622406006, "learning_rate": 1.981003553842248e-05, "loss": 0.8791, "step": 4045 }, { "epoch": 0.6605444675727521, "grad_norm": 2.861628293991089, "learning_rate": 1.9809911143401148e-05, "loss": 0.9211, "step": 4046 }, { "epoch": 0.6607077262152565, "grad_norm": 3.055603265762329, "learning_rate": 1.9809786708054976e-05, "loss": 0.9696, "step": 4047 }, { "epoch": 0.6608709848577609, "grad_norm": 3.197199821472168, "learning_rate": 1.9809662232384476e-05, "loss": 0.937, "step": 4048 }, { "epoch": 0.6610342435002653, "grad_norm": 2.9583420753479004, "learning_rate": 1.9809537716390155e-05, "loss": 0.6975, "step": 4049 }, { "epoch": 0.6611975021427697, "grad_norm": 2.911470651626587, "learning_rate": 1.980941316007253e-05, "loss": 0.8514, "step": 4050 }, { "epoch": 0.6613607607852741, "grad_norm": 2.9177474975585938, "learning_rate": 1.980928856343211e-05, "loss": 0.7951, "step": 4051 }, { "epoch": 0.6615240194277785, "grad_norm": 3.0525591373443604, "learning_rate": 1.980916392646941e-05, "loss": 0.8847, "step": 4052 }, { "epoch": 0.6616872780702828, "grad_norm": 2.983704090118408, "learning_rate": 1.980903924918494e-05, "loss": 0.7847, "step": 4053 }, { "epoch": 0.6618505367127873, "grad_norm": 3.267725706100464, "learning_rate": 1.9808914531579217e-05, "loss": 0.8614, "step": 4054 }, { "epoch": 0.6620137953552916, "grad_norm": 2.7900943756103516, "learning_rate": 1.9808789773652747e-05, "loss": 0.8512, "step": 4055 }, { "epoch": 0.662177053997796, "grad_norm": 3.408125877380371, "learning_rate": 1.9808664975406046e-05, "loss": 0.9799, "step": 4056 }, { "epoch": 0.6623403126403004, "grad_norm": 2.9617512226104736, "learning_rate": 1.9808540136839627e-05, "loss": 0.9204, "step": 4057 }, { "epoch": 0.6625035712828048, "grad_norm": 3.716087579727173, "learning_rate": 1.9808415257954004e-05, "loss": 0.7526, "step": 4058 }, { "epoch": 0.6626668299253091, "grad_norm": 2.6306874752044678, "learning_rate": 1.9808290338749686e-05, "loss": 0.7527, "step": 4059 }, { "epoch": 0.6628300885678136, "grad_norm": 2.809023857116699, "learning_rate": 1.9808165379227196e-05, "loss": 0.8205, "step": 4060 }, { "epoch": 0.662993347210318, "grad_norm": 2.4723331928253174, "learning_rate": 1.980804037938704e-05, "loss": 0.6591, "step": 4061 }, { "epoch": 0.6631566058528223, "grad_norm": 3.267911434173584, "learning_rate": 1.980791533922973e-05, "loss": 0.843, "step": 4062 }, { "epoch": 0.6633198644953268, "grad_norm": 3.2047476768493652, "learning_rate": 1.980779025875579e-05, "loss": 0.911, "step": 4063 }, { "epoch": 0.6634831231378311, "grad_norm": 3.4444897174835205, "learning_rate": 1.980766513796572e-05, "loss": 0.9318, "step": 4064 }, { "epoch": 0.6636463817803355, "grad_norm": 3.293252468109131, "learning_rate": 1.9807539976860045e-05, "loss": 1.0888, "step": 4065 }, { "epoch": 0.6638096404228399, "grad_norm": 3.0761356353759766, "learning_rate": 1.9807414775439277e-05, "loss": 0.8848, "step": 4066 }, { "epoch": 0.6639728990653443, "grad_norm": 2.9671289920806885, "learning_rate": 1.980728953370393e-05, "loss": 0.826, "step": 4067 }, { "epoch": 0.6641361577078486, "grad_norm": 2.9040253162384033, "learning_rate": 1.980716425165452e-05, "loss": 0.7584, "step": 4068 }, { "epoch": 0.6642994163503531, "grad_norm": 3.6008388996124268, "learning_rate": 1.980703892929156e-05, "loss": 0.9821, "step": 4069 }, { "epoch": 0.6644626749928574, "grad_norm": 3.080979585647583, "learning_rate": 1.9806913566615562e-05, "loss": 0.9392, "step": 4070 }, { "epoch": 0.6646259336353618, "grad_norm": 3.2389237880706787, "learning_rate": 1.980678816362705e-05, "loss": 1.9858, "step": 4071 }, { "epoch": 0.6647891922778663, "grad_norm": 3.139070987701416, "learning_rate": 1.9806662720326533e-05, "loss": 0.9204, "step": 4072 }, { "epoch": 0.6649524509203706, "grad_norm": 3.2649176120758057, "learning_rate": 1.980653723671453e-05, "loss": 0.923, "step": 4073 }, { "epoch": 0.665115709562875, "grad_norm": 3.1765308380126953, "learning_rate": 1.9806411712791555e-05, "loss": 0.89, "step": 4074 }, { "epoch": 0.6652789682053794, "grad_norm": 3.1368043422698975, "learning_rate": 1.9806286148558122e-05, "loss": 0.9037, "step": 4075 }, { "epoch": 0.6654422268478838, "grad_norm": 3.1664631366729736, "learning_rate": 1.980616054401475e-05, "loss": 0.9539, "step": 4076 }, { "epoch": 0.6656054854903881, "grad_norm": 2.8827123641967773, "learning_rate": 1.9806034899161956e-05, "loss": 0.8407, "step": 4077 }, { "epoch": 0.6657687441328926, "grad_norm": 2.8284072875976562, "learning_rate": 1.9805909214000256e-05, "loss": 0.789, "step": 4078 }, { "epoch": 0.6659320027753969, "grad_norm": 3.009615182876587, "learning_rate": 1.9805783488530165e-05, "loss": 0.9099, "step": 4079 }, { "epoch": 0.6660952614179013, "grad_norm": 3.1552391052246094, "learning_rate": 1.9805657722752202e-05, "loss": 0.8412, "step": 4080 }, { "epoch": 0.6662585200604056, "grad_norm": 3.5631964206695557, "learning_rate": 1.980553191666688e-05, "loss": 1.0261, "step": 4081 }, { "epoch": 0.6664217787029101, "grad_norm": 2.9554901123046875, "learning_rate": 1.980540607027472e-05, "loss": 0.7861, "step": 4082 }, { "epoch": 0.6665850373454145, "grad_norm": 2.5845563411712646, "learning_rate": 1.980528018357624e-05, "loss": 0.803, "step": 4083 }, { "epoch": 0.6667482959879188, "grad_norm": 3.237962245941162, "learning_rate": 1.9805154256571954e-05, "loss": 0.9362, "step": 4084 }, { "epoch": 0.6669115546304233, "grad_norm": 3.0372679233551025, "learning_rate": 1.980502828926238e-05, "loss": 0.9583, "step": 4085 }, { "epoch": 0.6670748132729276, "grad_norm": 3.0826337337493896, "learning_rate": 1.9804902281648036e-05, "loss": 0.8205, "step": 4086 }, { "epoch": 0.667238071915432, "grad_norm": 2.8649260997772217, "learning_rate": 1.9804776233729446e-05, "loss": 0.6431, "step": 4087 }, { "epoch": 0.6674013305579364, "grad_norm": 2.4860708713531494, "learning_rate": 1.980465014550712e-05, "loss": 0.7202, "step": 4088 }, { "epoch": 0.6675645892004408, "grad_norm": 3.9273757934570312, "learning_rate": 1.980452401698158e-05, "loss": 1.0056, "step": 4089 }, { "epoch": 0.6677278478429451, "grad_norm": 3.279404878616333, "learning_rate": 1.9804397848153343e-05, "loss": 0.8395, "step": 4090 }, { "epoch": 0.6678911064854496, "grad_norm": 2.9225566387176514, "learning_rate": 1.980427163902293e-05, "loss": 0.723, "step": 4091 }, { "epoch": 0.6680543651279539, "grad_norm": 2.796818733215332, "learning_rate": 1.9804145389590858e-05, "loss": 0.7979, "step": 4092 }, { "epoch": 0.6682176237704583, "grad_norm": 3.1283950805664062, "learning_rate": 1.9804019099857643e-05, "loss": 0.9894, "step": 4093 }, { "epoch": 0.6683808824129628, "grad_norm": 2.55389142036438, "learning_rate": 1.980389276982381e-05, "loss": 0.6969, "step": 4094 }, { "epoch": 0.6685441410554671, "grad_norm": 2.674264669418335, "learning_rate": 1.9803766399489878e-05, "loss": 0.7264, "step": 4095 }, { "epoch": 0.6687073996979715, "grad_norm": 3.093355417251587, "learning_rate": 1.9803639988856364e-05, "loss": 0.9537, "step": 4096 }, { "epoch": 0.6688706583404759, "grad_norm": 2.9730889797210693, "learning_rate": 1.9803513537923784e-05, "loss": 0.7526, "step": 4097 }, { "epoch": 0.6690339169829803, "grad_norm": 2.8317677974700928, "learning_rate": 1.9803387046692663e-05, "loss": 0.8849, "step": 4098 }, { "epoch": 0.6691971756254846, "grad_norm": 3.1481282711029053, "learning_rate": 1.9803260515163522e-05, "loss": 0.8945, "step": 4099 }, { "epoch": 0.6693604342679891, "grad_norm": 2.9568660259246826, "learning_rate": 1.9803133943336874e-05, "loss": 0.8954, "step": 4100 }, { "epoch": 0.6695236929104934, "grad_norm": 2.9382545948028564, "learning_rate": 1.9803007331213246e-05, "loss": 0.8087, "step": 4101 }, { "epoch": 0.6696869515529978, "grad_norm": 2.569917917251587, "learning_rate": 1.9802880678793158e-05, "loss": 0.7002, "step": 4102 }, { "epoch": 0.6698502101955022, "grad_norm": 3.066267490386963, "learning_rate": 1.9802753986077127e-05, "loss": 0.7685, "step": 4103 }, { "epoch": 0.6700134688380066, "grad_norm": 2.9434878826141357, "learning_rate": 1.9802627253065675e-05, "loss": 0.8415, "step": 4104 }, { "epoch": 0.670176727480511, "grad_norm": 2.955052375793457, "learning_rate": 1.9802500479759325e-05, "loss": 0.7913, "step": 4105 }, { "epoch": 0.6703399861230154, "grad_norm": 2.599966526031494, "learning_rate": 1.9802373666158597e-05, "loss": 0.7494, "step": 4106 }, { "epoch": 0.6705032447655198, "grad_norm": 3.202876329421997, "learning_rate": 1.9802246812264014e-05, "loss": 0.8929, "step": 4107 }, { "epoch": 0.6706665034080241, "grad_norm": 3.031982421875, "learning_rate": 1.9802119918076098e-05, "loss": 0.8228, "step": 4108 }, { "epoch": 0.6708297620505286, "grad_norm": 3.121690511703491, "learning_rate": 1.9801992983595362e-05, "loss": 0.9362, "step": 4109 }, { "epoch": 0.6709930206930329, "grad_norm": 3.0582737922668457, "learning_rate": 1.9801866008822337e-05, "loss": 0.9086, "step": 4110 }, { "epoch": 0.6711562793355373, "grad_norm": 3.82979679107666, "learning_rate": 1.980173899375754e-05, "loss": 0.8295, "step": 4111 }, { "epoch": 0.6713195379780417, "grad_norm": 2.7161970138549805, "learning_rate": 1.9801611938401497e-05, "loss": 0.6628, "step": 4112 }, { "epoch": 0.6714827966205461, "grad_norm": 3.1183855533599854, "learning_rate": 1.980148484275473e-05, "loss": 0.797, "step": 4113 }, { "epoch": 0.6716460552630504, "grad_norm": 3.3515303134918213, "learning_rate": 1.9801357706817758e-05, "loss": 0.9714, "step": 4114 }, { "epoch": 0.6718093139055549, "grad_norm": 3.3664824962615967, "learning_rate": 1.9801230530591106e-05, "loss": 0.8168, "step": 4115 }, { "epoch": 0.6719725725480593, "grad_norm": 3.0125138759613037, "learning_rate": 1.9801103314075294e-05, "loss": 0.7932, "step": 4116 }, { "epoch": 0.6721358311905636, "grad_norm": 3.0854220390319824, "learning_rate": 1.9800976057270845e-05, "loss": 0.8754, "step": 4117 }, { "epoch": 0.6722990898330681, "grad_norm": 2.5788309574127197, "learning_rate": 1.9800848760178287e-05, "loss": 0.7146, "step": 4118 }, { "epoch": 0.6724623484755724, "grad_norm": 2.6594934463500977, "learning_rate": 1.9800721422798145e-05, "loss": 0.6699, "step": 4119 }, { "epoch": 0.6726256071180768, "grad_norm": 2.7635436058044434, "learning_rate": 1.9800594045130934e-05, "loss": 0.8215, "step": 4120 }, { "epoch": 0.6727888657605812, "grad_norm": 3.1162946224212646, "learning_rate": 1.980046662717718e-05, "loss": 1.0125, "step": 4121 }, { "epoch": 0.6729521244030856, "grad_norm": 2.819249391555786, "learning_rate": 1.980033916893741e-05, "loss": 0.8125, "step": 4122 }, { "epoch": 0.6731153830455899, "grad_norm": 2.8659744262695312, "learning_rate": 1.9800211670412145e-05, "loss": 0.8106, "step": 4123 }, { "epoch": 0.6732786416880944, "grad_norm": 2.707761764526367, "learning_rate": 1.9800084131601913e-05, "loss": 0.7823, "step": 4124 }, { "epoch": 0.6734419003305987, "grad_norm": 2.619127035140991, "learning_rate": 1.9799956552507235e-05, "loss": 0.7342, "step": 4125 }, { "epoch": 0.6736051589731031, "grad_norm": 3.186978578567505, "learning_rate": 1.979982893312863e-05, "loss": 0.8626, "step": 4126 }, { "epoch": 0.6737684176156076, "grad_norm": 2.9224934577941895, "learning_rate": 1.9799701273466636e-05, "loss": 0.8565, "step": 4127 }, { "epoch": 0.6739316762581119, "grad_norm": 3.573316812515259, "learning_rate": 1.9799573573521765e-05, "loss": 0.9077, "step": 4128 }, { "epoch": 0.6740949349006163, "grad_norm": 3.8142943382263184, "learning_rate": 1.9799445833294553e-05, "loss": 1.0743, "step": 4129 }, { "epoch": 0.6742581935431207, "grad_norm": 2.7072665691375732, "learning_rate": 1.9799318052785516e-05, "loss": 0.7293, "step": 4130 }, { "epoch": 0.6744214521856251, "grad_norm": 3.3355486392974854, "learning_rate": 1.9799190231995183e-05, "loss": 1.0599, "step": 4131 }, { "epoch": 0.6745847108281294, "grad_norm": 3.130653142929077, "learning_rate": 1.979906237092408e-05, "loss": 0.757, "step": 4132 }, { "epoch": 0.6747479694706339, "grad_norm": 3.2663516998291016, "learning_rate": 1.9798934469572732e-05, "loss": 0.8763, "step": 4133 }, { "epoch": 0.6749112281131382, "grad_norm": 2.970633029937744, "learning_rate": 1.979880652794166e-05, "loss": 0.7346, "step": 4134 }, { "epoch": 0.6750744867556426, "grad_norm": 2.99379563331604, "learning_rate": 1.97986785460314e-05, "loss": 0.9294, "step": 4135 }, { "epoch": 0.675237745398147, "grad_norm": 2.6690969467163086, "learning_rate": 1.979855052384247e-05, "loss": 0.7571, "step": 4136 }, { "epoch": 0.6754010040406514, "grad_norm": 3.3563969135284424, "learning_rate": 1.97984224613754e-05, "loss": 0.7997, "step": 4137 }, { "epoch": 0.6755642626831558, "grad_norm": 3.067204236984253, "learning_rate": 1.979829435863071e-05, "loss": 0.7277, "step": 4138 }, { "epoch": 0.6757275213256602, "grad_norm": 3.600219249725342, "learning_rate": 1.9798166215608937e-05, "loss": 0.8275, "step": 4139 }, { "epoch": 0.6758907799681646, "grad_norm": 3.1945831775665283, "learning_rate": 1.9798038032310604e-05, "loss": 0.961, "step": 4140 }, { "epoch": 0.6760540386106689, "grad_norm": 2.953847885131836, "learning_rate": 1.9797909808736233e-05, "loss": 0.8522, "step": 4141 }, { "epoch": 0.6762172972531734, "grad_norm": 2.9569718837738037, "learning_rate": 1.9797781544886354e-05, "loss": 0.9027, "step": 4142 }, { "epoch": 0.6763805558956777, "grad_norm": 3.381347179412842, "learning_rate": 1.9797653240761495e-05, "loss": 1.0333, "step": 4143 }, { "epoch": 0.6765438145381821, "grad_norm": 3.328613758087158, "learning_rate": 1.9797524896362185e-05, "loss": 1.0305, "step": 4144 }, { "epoch": 0.6767070731806865, "grad_norm": 2.858431339263916, "learning_rate": 1.9797396511688945e-05, "loss": 0.8133, "step": 4145 }, { "epoch": 0.6768703318231909, "grad_norm": 3.2159981727600098, "learning_rate": 1.9797268086742313e-05, "loss": 0.9493, "step": 4146 }, { "epoch": 0.6770335904656952, "grad_norm": 2.789088726043701, "learning_rate": 1.9797139621522806e-05, "loss": 0.8116, "step": 4147 }, { "epoch": 0.6771968491081997, "grad_norm": 2.871492624282837, "learning_rate": 1.979701111603096e-05, "loss": 0.8475, "step": 4148 }, { "epoch": 0.6773601077507041, "grad_norm": 2.5775206089019775, "learning_rate": 1.97968825702673e-05, "loss": 0.7368, "step": 4149 }, { "epoch": 0.6775233663932084, "grad_norm": 2.481600522994995, "learning_rate": 1.9796753984232357e-05, "loss": 0.8206, "step": 4150 }, { "epoch": 0.6776866250357129, "grad_norm": 2.93520450592041, "learning_rate": 1.9796625357926658e-05, "loss": 0.7407, "step": 4151 }, { "epoch": 0.6778498836782172, "grad_norm": 2.7607524394989014, "learning_rate": 1.9796496691350726e-05, "loss": 0.9021, "step": 4152 }, { "epoch": 0.6780131423207216, "grad_norm": 3.276534080505371, "learning_rate": 1.97963679845051e-05, "loss": 0.7533, "step": 4153 }, { "epoch": 0.678176400963226, "grad_norm": 2.8259928226470947, "learning_rate": 1.9796239237390303e-05, "loss": 0.8296, "step": 4154 }, { "epoch": 0.6783396596057304, "grad_norm": 3.0980398654937744, "learning_rate": 1.9796110450006866e-05, "loss": 0.8439, "step": 4155 }, { "epoch": 0.6785029182482347, "grad_norm": 3.0289034843444824, "learning_rate": 1.9795981622355316e-05, "loss": 0.9181, "step": 4156 }, { "epoch": 0.6786661768907392, "grad_norm": 3.205056667327881, "learning_rate": 1.9795852754436186e-05, "loss": 0.7763, "step": 4157 }, { "epoch": 0.6788294355332435, "grad_norm": 2.919534206390381, "learning_rate": 1.9795723846250005e-05, "loss": 0.9105, "step": 4158 }, { "epoch": 0.6789926941757479, "grad_norm": 2.8804941177368164, "learning_rate": 1.97955948977973e-05, "loss": 0.889, "step": 4159 }, { "epoch": 0.6791559528182524, "grad_norm": 2.719329595565796, "learning_rate": 1.9795465909078607e-05, "loss": 0.6476, "step": 4160 }, { "epoch": 0.6793192114607567, "grad_norm": 3.259979724884033, "learning_rate": 1.9795336880094448e-05, "loss": 0.9524, "step": 4161 }, { "epoch": 0.6794824701032611, "grad_norm": 2.972409963607788, "learning_rate": 1.9795207810845357e-05, "loss": 0.7847, "step": 4162 }, { "epoch": 0.6796457287457655, "grad_norm": 3.277078151702881, "learning_rate": 1.979507870133187e-05, "loss": 1.0682, "step": 4163 }, { "epoch": 0.6798089873882699, "grad_norm": 2.8953847885131836, "learning_rate": 1.979494955155451e-05, "loss": 0.6998, "step": 4164 }, { "epoch": 0.6799722460307742, "grad_norm": 2.9846160411834717, "learning_rate": 1.979482036151381e-05, "loss": 1.0208, "step": 4165 }, { "epoch": 0.6801355046732787, "grad_norm": 3.004577875137329, "learning_rate": 1.9794691131210304e-05, "loss": 0.9137, "step": 4166 }, { "epoch": 0.680298763315783, "grad_norm": 2.538585662841797, "learning_rate": 1.979456186064452e-05, "loss": 0.6614, "step": 4167 }, { "epoch": 0.6804620219582874, "grad_norm": 3.070972204208374, "learning_rate": 1.9794432549816993e-05, "loss": 0.907, "step": 4168 }, { "epoch": 0.6806252806007919, "grad_norm": 2.9853014945983887, "learning_rate": 1.979430319872825e-05, "loss": 0.7778, "step": 4169 }, { "epoch": 0.6807885392432962, "grad_norm": 3.294884443283081, "learning_rate": 1.9794173807378826e-05, "loss": 1.0972, "step": 4170 }, { "epoch": 0.6809517978858006, "grad_norm": 3.42999005317688, "learning_rate": 1.9794044375769248e-05, "loss": 0.9823, "step": 4171 }, { "epoch": 0.681115056528305, "grad_norm": 2.805434226989746, "learning_rate": 1.9793914903900055e-05, "loss": 0.6732, "step": 4172 }, { "epoch": 0.6812783151708094, "grad_norm": 2.7531542778015137, "learning_rate": 1.9793785391771775e-05, "loss": 0.735, "step": 4173 }, { "epoch": 0.6814415738133137, "grad_norm": 2.5191726684570312, "learning_rate": 1.979365583938494e-05, "loss": 0.7497, "step": 4174 }, { "epoch": 0.6816048324558182, "grad_norm": 3.239884853363037, "learning_rate": 1.9793526246740088e-05, "loss": 0.9256, "step": 4175 }, { "epoch": 0.6817680910983225, "grad_norm": 2.688138961791992, "learning_rate": 1.9793396613837745e-05, "loss": 0.5904, "step": 4176 }, { "epoch": 0.6819313497408269, "grad_norm": 3.144613027572632, "learning_rate": 1.9793266940678446e-05, "loss": 0.87, "step": 4177 }, { "epoch": 0.6820946083833312, "grad_norm": 3.220618963241577, "learning_rate": 1.9793137227262727e-05, "loss": 0.8563, "step": 4178 }, { "epoch": 0.6822578670258357, "grad_norm": 2.470885992050171, "learning_rate": 1.9793007473591114e-05, "loss": 0.7007, "step": 4179 }, { "epoch": 0.6824211256683401, "grad_norm": 3.234374523162842, "learning_rate": 1.9792877679664148e-05, "loss": 0.8646, "step": 4180 }, { "epoch": 0.6825843843108444, "grad_norm": 3.231748342514038, "learning_rate": 1.979274784548236e-05, "loss": 0.8001, "step": 4181 }, { "epoch": 0.6827476429533489, "grad_norm": 2.9797260761260986, "learning_rate": 1.9792617971046283e-05, "loss": 0.9243, "step": 4182 }, { "epoch": 0.6829109015958532, "grad_norm": 3.146784543991089, "learning_rate": 1.979248805635645e-05, "loss": 0.8162, "step": 4183 }, { "epoch": 0.6830741602383577, "grad_norm": 2.6863088607788086, "learning_rate": 1.97923581014134e-05, "loss": 0.7069, "step": 4184 }, { "epoch": 0.683237418880862, "grad_norm": 3.032723903656006, "learning_rate": 1.979222810621766e-05, "loss": 0.8769, "step": 4185 }, { "epoch": 0.6834006775233664, "grad_norm": 2.894554615020752, "learning_rate": 1.9792098070769766e-05, "loss": 0.9063, "step": 4186 }, { "epoch": 0.6835639361658707, "grad_norm": 2.801300048828125, "learning_rate": 1.9791967995070257e-05, "loss": 0.8552, "step": 4187 }, { "epoch": 0.6837271948083752, "grad_norm": 2.8029701709747314, "learning_rate": 1.9791837879119664e-05, "loss": 0.8541, "step": 4188 }, { "epoch": 0.6838904534508795, "grad_norm": 3.3701305389404297, "learning_rate": 1.9791707722918526e-05, "loss": 1.0022, "step": 4189 }, { "epoch": 0.684053712093384, "grad_norm": 3.1714813709259033, "learning_rate": 1.9791577526467368e-05, "loss": 0.8666, "step": 4190 }, { "epoch": 0.6842169707358884, "grad_norm": 3.0802574157714844, "learning_rate": 1.979144728976674e-05, "loss": 0.8325, "step": 4191 }, { "epoch": 0.6843802293783927, "grad_norm": 2.710867166519165, "learning_rate": 1.9791317012817165e-05, "loss": 0.8394, "step": 4192 }, { "epoch": 0.6845434880208972, "grad_norm": 2.7273688316345215, "learning_rate": 1.9791186695619184e-05, "loss": 0.825, "step": 4193 }, { "epoch": 0.6847067466634015, "grad_norm": 3.0777668952941895, "learning_rate": 1.979105633817333e-05, "loss": 0.8582, "step": 4194 }, { "epoch": 0.6848700053059059, "grad_norm": 2.85880184173584, "learning_rate": 1.979092594048014e-05, "loss": 0.7936, "step": 4195 }, { "epoch": 0.6850332639484102, "grad_norm": 3.1013731956481934, "learning_rate": 1.9790795502540155e-05, "loss": 0.92, "step": 4196 }, { "epoch": 0.6851965225909147, "grad_norm": 3.354647159576416, "learning_rate": 1.9790665024353903e-05, "loss": 0.9772, "step": 4197 }, { "epoch": 0.685359781233419, "grad_norm": 2.4608123302459717, "learning_rate": 1.979053450592192e-05, "loss": 0.707, "step": 4198 }, { "epoch": 0.6855230398759234, "grad_norm": 3.313040018081665, "learning_rate": 1.9790403947244756e-05, "loss": 1.119, "step": 4199 }, { "epoch": 0.6856862985184278, "grad_norm": 3.159444808959961, "learning_rate": 1.979027334832293e-05, "loss": 0.7993, "step": 4200 }, { "epoch": 0.6858495571609322, "grad_norm": 2.6195406913757324, "learning_rate": 1.9790142709156994e-05, "loss": 0.7451, "step": 4201 }, { "epoch": 0.6860128158034366, "grad_norm": 2.89721417427063, "learning_rate": 1.9790012029747473e-05, "loss": 0.6793, "step": 4202 }, { "epoch": 0.686176074445941, "grad_norm": 2.9277470111846924, "learning_rate": 1.9789881310094906e-05, "loss": 0.8652, "step": 4203 }, { "epoch": 0.6863393330884454, "grad_norm": 2.9968225955963135, "learning_rate": 1.978975055019984e-05, "loss": 0.8128, "step": 4204 }, { "epoch": 0.6865025917309497, "grad_norm": 3.1746132373809814, "learning_rate": 1.9789619750062802e-05, "loss": 1.6126, "step": 4205 }, { "epoch": 0.6866658503734542, "grad_norm": 2.9389243125915527, "learning_rate": 1.9789488909684337e-05, "loss": 0.9228, "step": 4206 }, { "epoch": 0.6868291090159585, "grad_norm": 3.2469592094421387, "learning_rate": 1.978935802906498e-05, "loss": 0.9625, "step": 4207 }, { "epoch": 0.6869923676584629, "grad_norm": 2.8514633178710938, "learning_rate": 1.9789227108205265e-05, "loss": 0.7721, "step": 4208 }, { "epoch": 0.6871556263009673, "grad_norm": 2.6606874465942383, "learning_rate": 1.9789096147105736e-05, "loss": 0.7473, "step": 4209 }, { "epoch": 0.6873188849434717, "grad_norm": 3.038919687271118, "learning_rate": 1.9788965145766927e-05, "loss": 0.8332, "step": 4210 }, { "epoch": 0.687482143585976, "grad_norm": 3.20259165763855, "learning_rate": 1.978883410418938e-05, "loss": 0.9917, "step": 4211 }, { "epoch": 0.6876454022284805, "grad_norm": 2.9429919719696045, "learning_rate": 1.9788703022373634e-05, "loss": 0.8128, "step": 4212 }, { "epoch": 0.6878086608709849, "grad_norm": 2.9760429859161377, "learning_rate": 1.9788571900320223e-05, "loss": 0.7974, "step": 4213 }, { "epoch": 0.6879719195134892, "grad_norm": 3.0872464179992676, "learning_rate": 1.978844073802969e-05, "loss": 1.0278, "step": 4214 }, { "epoch": 0.6881351781559937, "grad_norm": 3.0040390491485596, "learning_rate": 1.9788309535502574e-05, "loss": 1.6545, "step": 4215 }, { "epoch": 0.688298436798498, "grad_norm": 3.2103493213653564, "learning_rate": 1.9788178292739412e-05, "loss": 0.8947, "step": 4216 }, { "epoch": 0.6884616954410024, "grad_norm": 3.2277889251708984, "learning_rate": 1.9788047009740746e-05, "loss": 1.0715, "step": 4217 }, { "epoch": 0.6886249540835068, "grad_norm": 3.1278486251831055, "learning_rate": 1.9787915686507114e-05, "loss": 0.7865, "step": 4218 }, { "epoch": 0.6887882127260112, "grad_norm": 2.7404072284698486, "learning_rate": 1.9787784323039057e-05, "loss": 0.8904, "step": 4219 }, { "epoch": 0.6889514713685155, "grad_norm": 2.9326164722442627, "learning_rate": 1.978765291933712e-05, "loss": 0.8683, "step": 4220 }, { "epoch": 0.68911473001102, "grad_norm": 3.0069069862365723, "learning_rate": 1.9787521475401826e-05, "loss": 0.8882, "step": 4221 }, { "epoch": 0.6892779886535243, "grad_norm": 2.6066935062408447, "learning_rate": 1.9787389991233733e-05, "loss": 0.6961, "step": 4222 }, { "epoch": 0.6894412472960287, "grad_norm": 3.1935324668884277, "learning_rate": 1.9787258466833377e-05, "loss": 0.9358, "step": 4223 }, { "epoch": 0.6896045059385332, "grad_norm": 3.367359161376953, "learning_rate": 1.9787126902201294e-05, "loss": 0.9124, "step": 4224 }, { "epoch": 0.6897677645810375, "grad_norm": 2.8484718799591064, "learning_rate": 1.978699529733803e-05, "loss": 0.9764, "step": 4225 }, { "epoch": 0.6899310232235419, "grad_norm": 2.423691987991333, "learning_rate": 1.9786863652244125e-05, "loss": 0.8499, "step": 4226 }, { "epoch": 0.6900942818660463, "grad_norm": 2.6941816806793213, "learning_rate": 1.9786731966920113e-05, "loss": 0.8491, "step": 4227 }, { "epoch": 0.6902575405085507, "grad_norm": 3.2474632263183594, "learning_rate": 1.9786600241366546e-05, "loss": 0.8009, "step": 4228 }, { "epoch": 0.690420799151055, "grad_norm": 2.9615297317504883, "learning_rate": 1.9786468475583962e-05, "loss": 0.8424, "step": 4229 }, { "epoch": 0.6905840577935595, "grad_norm": 3.234395980834961, "learning_rate": 1.97863366695729e-05, "loss": 1.5857, "step": 4230 }, { "epoch": 0.6907473164360638, "grad_norm": 2.9173173904418945, "learning_rate": 1.9786204823333902e-05, "loss": 0.8462, "step": 4231 }, { "epoch": 0.6909105750785682, "grad_norm": 2.77461576461792, "learning_rate": 1.9786072936867514e-05, "loss": 0.7797, "step": 4232 }, { "epoch": 0.6910738337210726, "grad_norm": 3.3162899017333984, "learning_rate": 1.9785941010174274e-05, "loss": 0.904, "step": 4233 }, { "epoch": 0.691237092363577, "grad_norm": 3.2764980792999268, "learning_rate": 1.978580904325472e-05, "loss": 0.9337, "step": 4234 }, { "epoch": 0.6914003510060814, "grad_norm": 3.2710065841674805, "learning_rate": 1.9785677036109407e-05, "loss": 0.8252, "step": 4235 }, { "epoch": 0.6915636096485858, "grad_norm": 2.9759769439697266, "learning_rate": 1.978554498873887e-05, "loss": 0.7975, "step": 4236 }, { "epoch": 0.6917268682910902, "grad_norm": 2.8192291259765625, "learning_rate": 1.978541290114365e-05, "loss": 0.7459, "step": 4237 }, { "epoch": 0.6918901269335945, "grad_norm": 2.7799291610717773, "learning_rate": 1.9785280773324293e-05, "loss": 0.8196, "step": 4238 }, { "epoch": 0.692053385576099, "grad_norm": 2.9267048835754395, "learning_rate": 1.9785148605281344e-05, "loss": 0.8554, "step": 4239 }, { "epoch": 0.6922166442186033, "grad_norm": 3.041604518890381, "learning_rate": 1.9785016397015342e-05, "loss": 0.7938, "step": 4240 }, { "epoch": 0.6923799028611077, "grad_norm": 2.9102063179016113, "learning_rate": 1.978488414852683e-05, "loss": 0.735, "step": 4241 }, { "epoch": 0.6925431615036121, "grad_norm": 3.0087292194366455, "learning_rate": 1.9784751859816362e-05, "loss": 0.7728, "step": 4242 }, { "epoch": 0.6927064201461165, "grad_norm": 3.3981544971466064, "learning_rate": 1.9784619530884465e-05, "loss": 0.952, "step": 4243 }, { "epoch": 0.6928696787886208, "grad_norm": 2.6398274898529053, "learning_rate": 1.9784487161731696e-05, "loss": 0.7337, "step": 4244 }, { "epoch": 0.6930329374311253, "grad_norm": 3.009423017501831, "learning_rate": 1.978435475235859e-05, "loss": 0.782, "step": 4245 }, { "epoch": 0.6931961960736297, "grad_norm": 3.177967071533203, "learning_rate": 1.9784222302765703e-05, "loss": 0.9226, "step": 4246 }, { "epoch": 0.693359454716134, "grad_norm": 2.758437395095825, "learning_rate": 1.978408981295357e-05, "loss": 0.7719, "step": 4247 }, { "epoch": 0.6935227133586385, "grad_norm": 3.174058437347412, "learning_rate": 1.9783957282922738e-05, "loss": 0.9966, "step": 4248 }, { "epoch": 0.6936859720011428, "grad_norm": 3.0885517597198486, "learning_rate": 1.978382471267375e-05, "loss": 0.868, "step": 4249 }, { "epoch": 0.6938492306436472, "grad_norm": 3.268786668777466, "learning_rate": 1.9783692102207156e-05, "loss": 0.991, "step": 4250 }, { "epoch": 0.6940124892861516, "grad_norm": 2.898672103881836, "learning_rate": 1.9783559451523495e-05, "loss": 0.8563, "step": 4251 }, { "epoch": 0.694175747928656, "grad_norm": 2.9244630336761475, "learning_rate": 1.9783426760623317e-05, "loss": 0.9211, "step": 4252 }, { "epoch": 0.6943390065711603, "grad_norm": 3.0139036178588867, "learning_rate": 1.9783294029507166e-05, "loss": 0.8595, "step": 4253 }, { "epoch": 0.6945022652136648, "grad_norm": 2.575472831726074, "learning_rate": 1.978316125817559e-05, "loss": 0.7781, "step": 4254 }, { "epoch": 0.6946655238561691, "grad_norm": 3.0811734199523926, "learning_rate": 1.9783028446629123e-05, "loss": 0.9327, "step": 4255 }, { "epoch": 0.6948287824986735, "grad_norm": 2.939481735229492, "learning_rate": 1.9782895594868328e-05, "loss": 0.9649, "step": 4256 }, { "epoch": 0.694992041141178, "grad_norm": 2.849292516708374, "learning_rate": 1.9782762702893743e-05, "loss": 0.9288, "step": 4257 }, { "epoch": 0.6951552997836823, "grad_norm": 2.854513645172119, "learning_rate": 1.9782629770705913e-05, "loss": 0.8538, "step": 4258 }, { "epoch": 0.6953185584261867, "grad_norm": 2.9589767456054688, "learning_rate": 1.9782496798305386e-05, "loss": 0.8425, "step": 4259 }, { "epoch": 0.695481817068691, "grad_norm": 3.103714942932129, "learning_rate": 1.9782363785692706e-05, "loss": 0.9299, "step": 4260 }, { "epoch": 0.6956450757111955, "grad_norm": 2.825073719024658, "learning_rate": 1.9782230732868427e-05, "loss": 0.8858, "step": 4261 }, { "epoch": 0.6958083343536998, "grad_norm": 3.0721828937530518, "learning_rate": 1.9782097639833086e-05, "loss": 1.0291, "step": 4262 }, { "epoch": 0.6959715929962043, "grad_norm": 3.0957889556884766, "learning_rate": 1.978196450658724e-05, "loss": 0.8291, "step": 4263 }, { "epoch": 0.6961348516387086, "grad_norm": 2.63181209564209, "learning_rate": 1.978183133313143e-05, "loss": 0.7999, "step": 4264 }, { "epoch": 0.696298110281213, "grad_norm": 2.3131043910980225, "learning_rate": 1.9781698119466203e-05, "loss": 0.6958, "step": 4265 }, { "epoch": 0.6964613689237173, "grad_norm": 2.5718510150909424, "learning_rate": 1.978156486559211e-05, "loss": 0.8867, "step": 4266 }, { "epoch": 0.6966246275662218, "grad_norm": 3.396181344985962, "learning_rate": 1.9781431571509696e-05, "loss": 0.8022, "step": 4267 }, { "epoch": 0.6967878862087262, "grad_norm": 2.585995674133301, "learning_rate": 1.9781298237219513e-05, "loss": 0.7862, "step": 4268 }, { "epoch": 0.6969511448512306, "grad_norm": 3.0770044326782227, "learning_rate": 1.9781164862722103e-05, "loss": 0.9045, "step": 4269 }, { "epoch": 0.697114403493735, "grad_norm": 2.7393863201141357, "learning_rate": 1.978103144801802e-05, "loss": 0.7707, "step": 4270 }, { "epoch": 0.6972776621362393, "grad_norm": 3.141622304916382, "learning_rate": 1.9780897993107808e-05, "loss": 0.8677, "step": 4271 }, { "epoch": 0.6974409207787438, "grad_norm": 3.081219434738159, "learning_rate": 1.978076449799202e-05, "loss": 0.9076, "step": 4272 }, { "epoch": 0.6976041794212481, "grad_norm": 2.789057493209839, "learning_rate": 1.97806309626712e-05, "loss": 0.8196, "step": 4273 }, { "epoch": 0.6977674380637525, "grad_norm": 2.902709722518921, "learning_rate": 1.9780497387145902e-05, "loss": 0.8293, "step": 4274 }, { "epoch": 0.6979306967062568, "grad_norm": 3.15315580368042, "learning_rate": 1.978036377141667e-05, "loss": 0.9871, "step": 4275 }, { "epoch": 0.6980939553487613, "grad_norm": 2.790079116821289, "learning_rate": 1.9780230115484055e-05, "loss": 0.7022, "step": 4276 }, { "epoch": 0.6982572139912656, "grad_norm": 3.06390380859375, "learning_rate": 1.9780096419348607e-05, "loss": 0.9203, "step": 4277 }, { "epoch": 0.69842047263377, "grad_norm": 3.072561025619507, "learning_rate": 1.9779962683010878e-05, "loss": 0.8795, "step": 4278 }, { "epoch": 0.6985837312762745, "grad_norm": 2.8565480709075928, "learning_rate": 1.9779828906471414e-05, "loss": 0.949, "step": 4279 }, { "epoch": 0.6987469899187788, "grad_norm": 3.0535898208618164, "learning_rate": 1.9779695089730767e-05, "loss": 0.8941, "step": 4280 }, { "epoch": 0.6989102485612833, "grad_norm": 3.776855230331421, "learning_rate": 1.9779561232789485e-05, "loss": 0.912, "step": 4281 }, { "epoch": 0.6990735072037876, "grad_norm": 2.7800703048706055, "learning_rate": 1.9779427335648118e-05, "loss": 0.7855, "step": 4282 }, { "epoch": 0.699236765846292, "grad_norm": 3.0413401126861572, "learning_rate": 1.977929339830722e-05, "loss": 0.8925, "step": 4283 }, { "epoch": 0.6994000244887963, "grad_norm": 3.418330192565918, "learning_rate": 1.977915942076734e-05, "loss": 0.9322, "step": 4284 }, { "epoch": 0.6995632831313008, "grad_norm": 3.336177110671997, "learning_rate": 1.9779025403029027e-05, "loss": 1.0325, "step": 4285 }, { "epoch": 0.6997265417738051, "grad_norm": 2.847998857498169, "learning_rate": 1.977889134509283e-05, "loss": 0.8943, "step": 4286 }, { "epoch": 0.6998898004163095, "grad_norm": 2.735734701156616, "learning_rate": 1.9778757246959307e-05, "loss": 0.7283, "step": 4287 }, { "epoch": 0.7000530590588139, "grad_norm": 2.876283884048462, "learning_rate": 1.9778623108629004e-05, "loss": 0.9261, "step": 4288 }, { "epoch": 0.7002163177013183, "grad_norm": 2.7921431064605713, "learning_rate": 1.9778488930102476e-05, "loss": 0.8195, "step": 4289 }, { "epoch": 0.7003795763438228, "grad_norm": 2.6886162757873535, "learning_rate": 1.9778354711380273e-05, "loss": 0.779, "step": 4290 }, { "epoch": 0.7005428349863271, "grad_norm": 2.9302968978881836, "learning_rate": 1.977822045246294e-05, "loss": 0.8979, "step": 4291 }, { "epoch": 0.7007060936288315, "grad_norm": 2.795585870742798, "learning_rate": 1.977808615335104e-05, "loss": 0.9381, "step": 4292 }, { "epoch": 0.7008693522713358, "grad_norm": 2.942887306213379, "learning_rate": 1.977795181404512e-05, "loss": 0.9258, "step": 4293 }, { "epoch": 0.7010326109138403, "grad_norm": 2.6450724601745605, "learning_rate": 1.977781743454573e-05, "loss": 0.7273, "step": 4294 }, { "epoch": 0.7011958695563446, "grad_norm": 3.220561981201172, "learning_rate": 1.9777683014853426e-05, "loss": 0.7522, "step": 4295 }, { "epoch": 0.701359128198849, "grad_norm": 3.170170307159424, "learning_rate": 1.9777548554968756e-05, "loss": 0.789, "step": 4296 }, { "epoch": 0.7015223868413534, "grad_norm": 3.537400960922241, "learning_rate": 1.977741405489228e-05, "loss": 1.048, "step": 4297 }, { "epoch": 0.7016856454838578, "grad_norm": 2.5098657608032227, "learning_rate": 1.9777279514624544e-05, "loss": 0.8147, "step": 4298 }, { "epoch": 0.7018489041263621, "grad_norm": 2.462048053741455, "learning_rate": 1.9777144934166106e-05, "loss": 0.6365, "step": 4299 }, { "epoch": 0.7020121627688666, "grad_norm": 3.213599681854248, "learning_rate": 1.9777010313517517e-05, "loss": 0.8338, "step": 4300 }, { "epoch": 0.702175421411371, "grad_norm": 2.6431102752685547, "learning_rate": 1.977687565267933e-05, "loss": 0.6708, "step": 4301 }, { "epoch": 0.7023386800538753, "grad_norm": 3.79746675491333, "learning_rate": 1.9776740951652097e-05, "loss": 0.885, "step": 4302 }, { "epoch": 0.7025019386963798, "grad_norm": 3.2914388179779053, "learning_rate": 1.9776606210436374e-05, "loss": 1.1449, "step": 4303 }, { "epoch": 0.7026651973388841, "grad_norm": 3.4501254558563232, "learning_rate": 1.9776471429032715e-05, "loss": 0.8366, "step": 4304 }, { "epoch": 0.7028284559813885, "grad_norm": 3.147549867630005, "learning_rate": 1.9776336607441673e-05, "loss": 0.9475, "step": 4305 }, { "epoch": 0.7029917146238929, "grad_norm": 2.91845965385437, "learning_rate": 1.9776201745663803e-05, "loss": 0.9872, "step": 4306 }, { "epoch": 0.7031549732663973, "grad_norm": 3.0614213943481445, "learning_rate": 1.9776066843699658e-05, "loss": 0.8454, "step": 4307 }, { "epoch": 0.7033182319089016, "grad_norm": 2.672433853149414, "learning_rate": 1.9775931901549797e-05, "loss": 0.6826, "step": 4308 }, { "epoch": 0.7034814905514061, "grad_norm": 3.327174425125122, "learning_rate": 1.9775796919214768e-05, "loss": 0.9377, "step": 4309 }, { "epoch": 0.7036447491939104, "grad_norm": 2.8073301315307617, "learning_rate": 1.977566189669513e-05, "loss": 0.8925, "step": 4310 }, { "epoch": 0.7038080078364148, "grad_norm": 3.08072566986084, "learning_rate": 1.977552683399144e-05, "loss": 0.9502, "step": 4311 }, { "epoch": 0.7039712664789193, "grad_norm": 3.164350748062134, "learning_rate": 1.9775391731104246e-05, "loss": 1.0269, "step": 4312 }, { "epoch": 0.7041345251214236, "grad_norm": 3.122328519821167, "learning_rate": 1.977525658803411e-05, "loss": 1.0056, "step": 4313 }, { "epoch": 0.704297783763928, "grad_norm": 3.1172163486480713, "learning_rate": 1.9775121404781586e-05, "loss": 0.8719, "step": 4314 }, { "epoch": 0.7044610424064324, "grad_norm": 2.882580041885376, "learning_rate": 1.9774986181347228e-05, "loss": 0.8878, "step": 4315 }, { "epoch": 0.7046243010489368, "grad_norm": 3.4535093307495117, "learning_rate": 1.977485091773159e-05, "loss": 1.1301, "step": 4316 }, { "epoch": 0.7047875596914411, "grad_norm": 3.200749397277832, "learning_rate": 1.9774715613935233e-05, "loss": 1.0516, "step": 4317 }, { "epoch": 0.7049508183339456, "grad_norm": 3.14105486869812, "learning_rate": 1.977458026995871e-05, "loss": 0.7489, "step": 4318 }, { "epoch": 0.7051140769764499, "grad_norm": 2.804170846939087, "learning_rate": 1.9774444885802582e-05, "loss": 0.7996, "step": 4319 }, { "epoch": 0.7052773356189543, "grad_norm": 2.388988971710205, "learning_rate": 1.9774309461467398e-05, "loss": 0.6344, "step": 4320 }, { "epoch": 0.7054405942614587, "grad_norm": 2.82749342918396, "learning_rate": 1.977417399695372e-05, "loss": 0.7239, "step": 4321 }, { "epoch": 0.7056038529039631, "grad_norm": 3.050846815109253, "learning_rate": 1.9774038492262103e-05, "loss": 0.9375, "step": 4322 }, { "epoch": 0.7057671115464675, "grad_norm": 2.888814687728882, "learning_rate": 1.9773902947393107e-05, "loss": 0.8326, "step": 4323 }, { "epoch": 0.7059303701889719, "grad_norm": 3.1642470359802246, "learning_rate": 1.9773767362347285e-05, "loss": 0.7516, "step": 4324 }, { "epoch": 0.7060936288314763, "grad_norm": 2.950792074203491, "learning_rate": 1.9773631737125192e-05, "loss": 0.9276, "step": 4325 }, { "epoch": 0.7062568874739806, "grad_norm": 2.9101455211639404, "learning_rate": 1.9773496071727392e-05, "loss": 0.8113, "step": 4326 }, { "epoch": 0.7064201461164851, "grad_norm": 2.6776649951934814, "learning_rate": 1.9773360366154444e-05, "loss": 0.8412, "step": 4327 }, { "epoch": 0.7065834047589894, "grad_norm": 2.967250347137451, "learning_rate": 1.97732246204069e-05, "loss": 0.9359, "step": 4328 }, { "epoch": 0.7067466634014938, "grad_norm": 3.1040213108062744, "learning_rate": 1.9773088834485316e-05, "loss": 0.9618, "step": 4329 }, { "epoch": 0.7069099220439982, "grad_norm": 3.0892412662506104, "learning_rate": 1.9772953008390258e-05, "loss": 1.0406, "step": 4330 }, { "epoch": 0.7070731806865026, "grad_norm": 3.2494332790374756, "learning_rate": 1.9772817142122277e-05, "loss": 0.9682, "step": 4331 }, { "epoch": 0.7072364393290069, "grad_norm": 3.2200331687927246, "learning_rate": 1.9772681235681936e-05, "loss": 0.9875, "step": 4332 }, { "epoch": 0.7073996979715114, "grad_norm": 3.0435361862182617, "learning_rate": 1.9772545289069794e-05, "loss": 0.7771, "step": 4333 }, { "epoch": 0.7075629566140158, "grad_norm": 3.2536933422088623, "learning_rate": 1.9772409302286406e-05, "loss": 0.9745, "step": 4334 }, { "epoch": 0.7077262152565201, "grad_norm": 3.411046266555786, "learning_rate": 1.9772273275332335e-05, "loss": 0.7796, "step": 4335 }, { "epoch": 0.7078894738990246, "grad_norm": 2.795668840408325, "learning_rate": 1.977213720820814e-05, "loss": 0.7057, "step": 4336 }, { "epoch": 0.7080527325415289, "grad_norm": 3.707221269607544, "learning_rate": 1.9772001100914376e-05, "loss": 0.9647, "step": 4337 }, { "epoch": 0.7082159911840333, "grad_norm": 3.1821627616882324, "learning_rate": 1.9771864953451606e-05, "loss": 0.7285, "step": 4338 }, { "epoch": 0.7083792498265377, "grad_norm": 2.8981096744537354, "learning_rate": 1.977172876582039e-05, "loss": 0.8515, "step": 4339 }, { "epoch": 0.7085425084690421, "grad_norm": 2.999974250793457, "learning_rate": 1.9771592538021288e-05, "loss": 0.9952, "step": 4340 }, { "epoch": 0.7087057671115464, "grad_norm": 2.5066282749176025, "learning_rate": 1.9771456270054858e-05, "loss": 0.7372, "step": 4341 }, { "epoch": 0.7088690257540509, "grad_norm": 2.6498749256134033, "learning_rate": 1.9771319961921657e-05, "loss": 0.8528, "step": 4342 }, { "epoch": 0.7090322843965552, "grad_norm": 3.185635566711426, "learning_rate": 1.9771183613622253e-05, "loss": 1.0453, "step": 4343 }, { "epoch": 0.7091955430390596, "grad_norm": 3.1725306510925293, "learning_rate": 1.9771047225157203e-05, "loss": 1.0681, "step": 4344 }, { "epoch": 0.7093588016815641, "grad_norm": 2.589695453643799, "learning_rate": 1.9770910796527064e-05, "loss": 0.7346, "step": 4345 }, { "epoch": 0.7095220603240684, "grad_norm": 2.579680919647217, "learning_rate": 1.9770774327732405e-05, "loss": 0.7357, "step": 4346 }, { "epoch": 0.7096853189665728, "grad_norm": 3.054161310195923, "learning_rate": 1.9770637818773778e-05, "loss": 0.96, "step": 4347 }, { "epoch": 0.7098485776090772, "grad_norm": 3.115645408630371, "learning_rate": 1.9770501269651752e-05, "loss": 0.914, "step": 4348 }, { "epoch": 0.7100118362515816, "grad_norm": 3.436683177947998, "learning_rate": 1.9770364680366883e-05, "loss": 0.9815, "step": 4349 }, { "epoch": 0.7101750948940859, "grad_norm": 2.639430046081543, "learning_rate": 1.977022805091973e-05, "loss": 0.8081, "step": 4350 }, { "epoch": 0.7103383535365904, "grad_norm": 2.657311201095581, "learning_rate": 1.9770091381310866e-05, "loss": 0.7206, "step": 4351 }, { "epoch": 0.7105016121790947, "grad_norm": 2.657228946685791, "learning_rate": 1.9769954671540842e-05, "loss": 0.8185, "step": 4352 }, { "epoch": 0.7106648708215991, "grad_norm": 3.1034538745880127, "learning_rate": 1.9769817921610222e-05, "loss": 0.8602, "step": 4353 }, { "epoch": 0.7108281294641035, "grad_norm": 2.6050312519073486, "learning_rate": 1.9769681131519568e-05, "loss": 0.6965, "step": 4354 }, { "epoch": 0.7109913881066079, "grad_norm": 2.6971073150634766, "learning_rate": 1.9769544301269444e-05, "loss": 0.7064, "step": 4355 }, { "epoch": 0.7111546467491123, "grad_norm": 3.0948548316955566, "learning_rate": 1.9769407430860415e-05, "loss": 0.9407, "step": 4356 }, { "epoch": 0.7113179053916167, "grad_norm": 3.1397042274475098, "learning_rate": 1.9769270520293046e-05, "loss": 0.9169, "step": 4357 }, { "epoch": 0.7114811640341211, "grad_norm": 3.3227596282958984, "learning_rate": 1.9769133569567885e-05, "loss": 0.9343, "step": 4358 }, { "epoch": 0.7116444226766254, "grad_norm": 3.1469719409942627, "learning_rate": 1.9768996578685512e-05, "loss": 0.9063, "step": 4359 }, { "epoch": 0.7118076813191299, "grad_norm": 3.091104030609131, "learning_rate": 1.9768859547646476e-05, "loss": 0.8765, "step": 4360 }, { "epoch": 0.7119709399616342, "grad_norm": 3.14685320854187, "learning_rate": 1.9768722476451353e-05, "loss": 0.8748, "step": 4361 }, { "epoch": 0.7121341986041386, "grad_norm": 3.174013614654541, "learning_rate": 1.97685853651007e-05, "loss": 0.9602, "step": 4362 }, { "epoch": 0.712297457246643, "grad_norm": 2.9394495487213135, "learning_rate": 1.976844821359508e-05, "loss": 0.9254, "step": 4363 }, { "epoch": 0.7124607158891474, "grad_norm": 2.620849847793579, "learning_rate": 1.9768311021935055e-05, "loss": 0.8178, "step": 4364 }, { "epoch": 0.7126239745316517, "grad_norm": 2.6661813259124756, "learning_rate": 1.976817379012119e-05, "loss": 0.9275, "step": 4365 }, { "epoch": 0.7127872331741562, "grad_norm": 2.7020199298858643, "learning_rate": 1.976803651815406e-05, "loss": 0.7834, "step": 4366 }, { "epoch": 0.7129504918166606, "grad_norm": 2.760230302810669, "learning_rate": 1.9767899206034213e-05, "loss": 0.8013, "step": 4367 }, { "epoch": 0.7131137504591649, "grad_norm": 3.265740156173706, "learning_rate": 1.9767761853762225e-05, "loss": 1.0411, "step": 4368 }, { "epoch": 0.7132770091016694, "grad_norm": 2.8640711307525635, "learning_rate": 1.9767624461338656e-05, "loss": 0.958, "step": 4369 }, { "epoch": 0.7134402677441737, "grad_norm": 3.617521286010742, "learning_rate": 1.9767487028764068e-05, "loss": 0.9854, "step": 4370 }, { "epoch": 0.7136035263866781, "grad_norm": 3.0185952186584473, "learning_rate": 1.9767349556039027e-05, "loss": 1.026, "step": 4371 }, { "epoch": 0.7137667850291824, "grad_norm": 2.8612966537475586, "learning_rate": 1.9767212043164107e-05, "loss": 0.8622, "step": 4372 }, { "epoch": 0.7139300436716869, "grad_norm": 3.047924518585205, "learning_rate": 1.9767074490139858e-05, "loss": 1.0209, "step": 4373 }, { "epoch": 0.7140933023141912, "grad_norm": 2.8935327529907227, "learning_rate": 1.976693689696686e-05, "loss": 1.0044, "step": 4374 }, { "epoch": 0.7142565609566957, "grad_norm": 2.7600631713867188, "learning_rate": 1.9766799263645672e-05, "loss": 0.8217, "step": 4375 }, { "epoch": 0.7144198195992, "grad_norm": 2.358802556991577, "learning_rate": 1.976666159017686e-05, "loss": 0.6716, "step": 4376 }, { "epoch": 0.7145830782417044, "grad_norm": 2.8053019046783447, "learning_rate": 1.9766523876560988e-05, "loss": 0.819, "step": 4377 }, { "epoch": 0.7147463368842089, "grad_norm": 3.4157440662384033, "learning_rate": 1.9766386122798625e-05, "loss": 1.0259, "step": 4378 }, { "epoch": 0.7149095955267132, "grad_norm": 3.0226597785949707, "learning_rate": 1.9766248328890336e-05, "loss": 0.8177, "step": 4379 }, { "epoch": 0.7150728541692176, "grad_norm": 2.8654284477233887, "learning_rate": 1.9766110494836686e-05, "loss": 1.0051, "step": 4380 }, { "epoch": 0.715236112811722, "grad_norm": 2.383466958999634, "learning_rate": 1.976597262063825e-05, "loss": 0.6966, "step": 4381 }, { "epoch": 0.7153993714542264, "grad_norm": 2.8992486000061035, "learning_rate": 1.976583470629558e-05, "loss": 0.8909, "step": 4382 }, { "epoch": 0.7155626300967307, "grad_norm": 2.7257041931152344, "learning_rate": 1.9765696751809255e-05, "loss": 0.8031, "step": 4383 }, { "epoch": 0.7157258887392351, "grad_norm": 2.619637966156006, "learning_rate": 1.976555875717984e-05, "loss": 0.759, "step": 4384 }, { "epoch": 0.7158891473817395, "grad_norm": 3.058938980102539, "learning_rate": 1.97654207224079e-05, "loss": 0.9672, "step": 4385 }, { "epoch": 0.7160524060242439, "grad_norm": 3.1507163047790527, "learning_rate": 1.9765282647493997e-05, "loss": 0.9112, "step": 4386 }, { "epoch": 0.7162156646667482, "grad_norm": 2.6915457248687744, "learning_rate": 1.976514453243871e-05, "loss": 0.7557, "step": 4387 }, { "epoch": 0.7163789233092527, "grad_norm": 2.825632095336914, "learning_rate": 1.97650063772426e-05, "loss": 0.7404, "step": 4388 }, { "epoch": 0.7165421819517571, "grad_norm": 2.6332366466522217, "learning_rate": 1.9764868181906235e-05, "loss": 0.7242, "step": 4389 }, { "epoch": 0.7167054405942614, "grad_norm": 2.6445531845092773, "learning_rate": 1.9764729946430183e-05, "loss": 0.7173, "step": 4390 }, { "epoch": 0.7168686992367659, "grad_norm": 2.7085728645324707, "learning_rate": 1.9764591670815013e-05, "loss": 0.8531, "step": 4391 }, { "epoch": 0.7170319578792702, "grad_norm": 3.2388670444488525, "learning_rate": 1.97644533550613e-05, "loss": 0.9053, "step": 4392 }, { "epoch": 0.7171952165217746, "grad_norm": 3.37361741065979, "learning_rate": 1.9764314999169597e-05, "loss": 0.8041, "step": 4393 }, { "epoch": 0.717358475164279, "grad_norm": 3.5844242572784424, "learning_rate": 1.9764176603140487e-05, "loss": 1.0087, "step": 4394 }, { "epoch": 0.7175217338067834, "grad_norm": 2.5853843688964844, "learning_rate": 1.9764038166974534e-05, "loss": 0.758, "step": 4395 }, { "epoch": 0.7176849924492877, "grad_norm": 3.399271011352539, "learning_rate": 1.9763899690672308e-05, "loss": 1.0323, "step": 4396 }, { "epoch": 0.7178482510917922, "grad_norm": 3.1931254863739014, "learning_rate": 1.9763761174234374e-05, "loss": 1.018, "step": 4397 }, { "epoch": 0.7180115097342965, "grad_norm": 3.2047083377838135, "learning_rate": 1.9763622617661306e-05, "loss": 0.8048, "step": 4398 }, { "epoch": 0.7181747683768009, "grad_norm": 2.7835347652435303, "learning_rate": 1.976348402095367e-05, "loss": 0.8604, "step": 4399 }, { "epoch": 0.7183380270193054, "grad_norm": 2.840677499771118, "learning_rate": 1.9763345384112044e-05, "loss": 0.7221, "step": 4400 }, { "epoch": 0.7185012856618097, "grad_norm": 3.3140034675598145, "learning_rate": 1.976320670713699e-05, "loss": 0.8766, "step": 4401 }, { "epoch": 0.7186645443043141, "grad_norm": 3.112011432647705, "learning_rate": 1.9763067990029073e-05, "loss": 0.9727, "step": 4402 }, { "epoch": 0.7188278029468185, "grad_norm": 3.193626880645752, "learning_rate": 1.9762929232788875e-05, "loss": 0.8551, "step": 4403 }, { "epoch": 0.7189910615893229, "grad_norm": 2.46091365814209, "learning_rate": 1.976279043541696e-05, "loss": 0.6853, "step": 4404 }, { "epoch": 0.7191543202318272, "grad_norm": 2.925335168838501, "learning_rate": 1.9762651597913902e-05, "loss": 0.9141, "step": 4405 }, { "epoch": 0.7193175788743317, "grad_norm": 3.1238975524902344, "learning_rate": 1.976251272028027e-05, "loss": 0.894, "step": 4406 }, { "epoch": 0.719480837516836, "grad_norm": 2.6946115493774414, "learning_rate": 1.976237380251663e-05, "loss": 0.7511, "step": 4407 }, { "epoch": 0.7196440961593404, "grad_norm": 2.9338135719299316, "learning_rate": 1.9762234844623564e-05, "loss": 0.8842, "step": 4408 }, { "epoch": 0.7198073548018448, "grad_norm": 2.9165000915527344, "learning_rate": 1.976209584660163e-05, "loss": 0.8152, "step": 4409 }, { "epoch": 0.7199706134443492, "grad_norm": 2.8610074520111084, "learning_rate": 1.976195680845141e-05, "loss": 0.8246, "step": 4410 }, { "epoch": 0.7201338720868536, "grad_norm": 2.8597002029418945, "learning_rate": 1.976181773017347e-05, "loss": 0.8672, "step": 4411 }, { "epoch": 0.720297130729358, "grad_norm": 3.1581218242645264, "learning_rate": 1.9761678611768385e-05, "loss": 1.0621, "step": 4412 }, { "epoch": 0.7204603893718624, "grad_norm": 2.5812735557556152, "learning_rate": 1.9761539453236726e-05, "loss": 0.756, "step": 4413 }, { "epoch": 0.7206236480143667, "grad_norm": 2.827373743057251, "learning_rate": 1.976140025457906e-05, "loss": 0.7704, "step": 4414 }, { "epoch": 0.7207869066568712, "grad_norm": 2.608196258544922, "learning_rate": 1.976126101579597e-05, "loss": 0.8132, "step": 4415 }, { "epoch": 0.7209501652993755, "grad_norm": 3.030799388885498, "learning_rate": 1.9761121736888012e-05, "loss": 0.8368, "step": 4416 }, { "epoch": 0.7211134239418799, "grad_norm": 2.911034345626831, "learning_rate": 1.9760982417855773e-05, "loss": 0.8474, "step": 4417 }, { "epoch": 0.7212766825843843, "grad_norm": 2.833780288696289, "learning_rate": 1.9760843058699824e-05, "loss": 0.8656, "step": 4418 }, { "epoch": 0.7214399412268887, "grad_norm": 2.8183069229125977, "learning_rate": 1.9760703659420733e-05, "loss": 1.0002, "step": 4419 }, { "epoch": 0.721603199869393, "grad_norm": 2.6340436935424805, "learning_rate": 1.9760564220019075e-05, "loss": 0.7285, "step": 4420 }, { "epoch": 0.7217664585118975, "grad_norm": 3.145500421524048, "learning_rate": 1.976042474049542e-05, "loss": 0.9007, "step": 4421 }, { "epoch": 0.7219297171544019, "grad_norm": 3.452558994293213, "learning_rate": 1.976028522085035e-05, "loss": 0.8578, "step": 4422 }, { "epoch": 0.7220929757969062, "grad_norm": 2.8910598754882812, "learning_rate": 1.9760145661084428e-05, "loss": 0.7811, "step": 4423 }, { "epoch": 0.7222562344394107, "grad_norm": 2.9908406734466553, "learning_rate": 1.9760006061198232e-05, "loss": 0.8213, "step": 4424 }, { "epoch": 0.722419493081915, "grad_norm": 2.6687378883361816, "learning_rate": 1.975986642119234e-05, "loss": 0.7108, "step": 4425 }, { "epoch": 0.7225827517244194, "grad_norm": 3.2398476600646973, "learning_rate": 1.975972674106732e-05, "loss": 0.8465, "step": 4426 }, { "epoch": 0.7227460103669238, "grad_norm": 3.10367751121521, "learning_rate": 1.975958702082375e-05, "loss": 0.8852, "step": 4427 }, { "epoch": 0.7229092690094282, "grad_norm": 2.458381175994873, "learning_rate": 1.9759447260462202e-05, "loss": 0.71, "step": 4428 }, { "epoch": 0.7230725276519325, "grad_norm": 3.192317485809326, "learning_rate": 1.9759307459983256e-05, "loss": 0.941, "step": 4429 }, { "epoch": 0.723235786294437, "grad_norm": 3.003235101699829, "learning_rate": 1.9759167619387474e-05, "loss": 0.7593, "step": 4430 }, { "epoch": 0.7233990449369414, "grad_norm": 2.6914525032043457, "learning_rate": 1.9759027738675443e-05, "loss": 0.6971, "step": 4431 }, { "epoch": 0.7235623035794457, "grad_norm": 3.3415682315826416, "learning_rate": 1.9758887817847735e-05, "loss": 0.8716, "step": 4432 }, { "epoch": 0.7237255622219502, "grad_norm": 3.016521453857422, "learning_rate": 1.9758747856904923e-05, "loss": 0.8046, "step": 4433 }, { "epoch": 0.7238888208644545, "grad_norm": 3.309431791305542, "learning_rate": 1.9758607855847582e-05, "loss": 0.713, "step": 4434 }, { "epoch": 0.7240520795069589, "grad_norm": 3.0196785926818848, "learning_rate": 1.9758467814676292e-05, "loss": 0.8073, "step": 4435 }, { "epoch": 0.7242153381494633, "grad_norm": 3.387627601623535, "learning_rate": 1.9758327733391623e-05, "loss": 0.8676, "step": 4436 }, { "epoch": 0.7243785967919677, "grad_norm": 2.954033613204956, "learning_rate": 1.9758187611994157e-05, "loss": 0.8058, "step": 4437 }, { "epoch": 0.724541855434472, "grad_norm": 3.1041228771209717, "learning_rate": 1.9758047450484463e-05, "loss": 0.7871, "step": 4438 }, { "epoch": 0.7247051140769765, "grad_norm": 2.8758156299591064, "learning_rate": 1.975790724886312e-05, "loss": 0.9111, "step": 4439 }, { "epoch": 0.7248683727194808, "grad_norm": 2.470081090927124, "learning_rate": 1.9757767007130705e-05, "loss": 0.6791, "step": 4440 }, { "epoch": 0.7250316313619852, "grad_norm": 2.570347547531128, "learning_rate": 1.9757626725287794e-05, "loss": 0.8429, "step": 4441 }, { "epoch": 0.7251948900044897, "grad_norm": 3.8046650886535645, "learning_rate": 1.9757486403334965e-05, "loss": 0.9778, "step": 4442 }, { "epoch": 0.725358148646994, "grad_norm": 3.1966323852539062, "learning_rate": 1.9757346041272796e-05, "loss": 0.8513, "step": 4443 }, { "epoch": 0.7255214072894984, "grad_norm": 3.4642484188079834, "learning_rate": 1.9757205639101858e-05, "loss": 0.8835, "step": 4444 }, { "epoch": 0.7256846659320028, "grad_norm": 2.9152021408081055, "learning_rate": 1.9757065196822737e-05, "loss": 0.8681, "step": 4445 }, { "epoch": 0.7258479245745072, "grad_norm": 2.8747122287750244, "learning_rate": 1.9756924714436e-05, "loss": 0.9761, "step": 4446 }, { "epoch": 0.7260111832170115, "grad_norm": 3.0064289569854736, "learning_rate": 1.975678419194223e-05, "loss": 0.8499, "step": 4447 }, { "epoch": 0.726174441859516, "grad_norm": 3.1798644065856934, "learning_rate": 1.9756643629342008e-05, "loss": 0.8125, "step": 4448 }, { "epoch": 0.7263377005020203, "grad_norm": 2.975597858428955, "learning_rate": 1.97565030266359e-05, "loss": 0.7879, "step": 4449 }, { "epoch": 0.7265009591445247, "grad_norm": 2.7845888137817383, "learning_rate": 1.9756362383824502e-05, "loss": 0.87, "step": 4450 }, { "epoch": 0.726664217787029, "grad_norm": 3.0275230407714844, "learning_rate": 1.9756221700908376e-05, "loss": 1.0767, "step": 4451 }, { "epoch": 0.7268274764295335, "grad_norm": 2.627058506011963, "learning_rate": 1.9756080977888107e-05, "loss": 0.828, "step": 4452 }, { "epoch": 0.7269907350720379, "grad_norm": 2.638394832611084, "learning_rate": 1.9755940214764277e-05, "loss": 0.7447, "step": 4453 }, { "epoch": 0.7271539937145423, "grad_norm": 2.8725638389587402, "learning_rate": 1.9755799411537458e-05, "loss": 0.8653, "step": 4454 }, { "epoch": 0.7273172523570467, "grad_norm": 2.79317045211792, "learning_rate": 1.975565856820823e-05, "loss": 0.8153, "step": 4455 }, { "epoch": 0.727480510999551, "grad_norm": 2.8329055309295654, "learning_rate": 1.9755517684777173e-05, "loss": 0.8472, "step": 4456 }, { "epoch": 0.7276437696420555, "grad_norm": 3.0634849071502686, "learning_rate": 1.975537676124487e-05, "loss": 0.9566, "step": 4457 }, { "epoch": 0.7278070282845598, "grad_norm": 2.992593765258789, "learning_rate": 1.9755235797611894e-05, "loss": 0.9151, "step": 4458 }, { "epoch": 0.7279702869270642, "grad_norm": 2.9816408157348633, "learning_rate": 1.9755094793878828e-05, "loss": 0.8814, "step": 4459 }, { "epoch": 0.7281335455695686, "grad_norm": 2.8902268409729004, "learning_rate": 1.975495375004625e-05, "loss": 0.7874, "step": 4460 }, { "epoch": 0.728296804212073, "grad_norm": 3.6190078258514404, "learning_rate": 1.9754812666114742e-05, "loss": 1.0058, "step": 4461 }, { "epoch": 0.7284600628545773, "grad_norm": 3.0080931186676025, "learning_rate": 1.975467154208488e-05, "loss": 0.8216, "step": 4462 }, { "epoch": 0.7286233214970818, "grad_norm": 2.649223566055298, "learning_rate": 1.975453037795725e-05, "loss": 0.7709, "step": 4463 }, { "epoch": 0.7287865801395862, "grad_norm": 2.926849126815796, "learning_rate": 1.9754389173732426e-05, "loss": 0.7013, "step": 4464 }, { "epoch": 0.7289498387820905, "grad_norm": 2.9312968254089355, "learning_rate": 1.9754247929410994e-05, "loss": 0.8466, "step": 4465 }, { "epoch": 0.729113097424595, "grad_norm": 2.91721248626709, "learning_rate": 1.975410664499353e-05, "loss": 0.7161, "step": 4466 }, { "epoch": 0.7292763560670993, "grad_norm": 2.9064412117004395, "learning_rate": 1.9753965320480616e-05, "loss": 0.8022, "step": 4467 }, { "epoch": 0.7294396147096037, "grad_norm": 2.5699663162231445, "learning_rate": 1.9753823955872836e-05, "loss": 0.6639, "step": 4468 }, { "epoch": 0.729602873352108, "grad_norm": 3.112100839614868, "learning_rate": 1.9753682551170764e-05, "loss": 0.9491, "step": 4469 }, { "epoch": 0.7297661319946125, "grad_norm": 3.8237762451171875, "learning_rate": 1.975354110637499e-05, "loss": 0.9401, "step": 4470 }, { "epoch": 0.7299293906371168, "grad_norm": 3.101195812225342, "learning_rate": 1.9753399621486095e-05, "loss": 0.8914, "step": 4471 }, { "epoch": 0.7300926492796213, "grad_norm": 2.7362008094787598, "learning_rate": 1.9753258096504647e-05, "loss": 0.7436, "step": 4472 }, { "epoch": 0.7302559079221256, "grad_norm": 2.905003786087036, "learning_rate": 1.9753116531431245e-05, "loss": 1.0302, "step": 4473 }, { "epoch": 0.73041916656463, "grad_norm": 3.176208734512329, "learning_rate": 1.975297492626646e-05, "loss": 0.7904, "step": 4474 }, { "epoch": 0.7305824252071345, "grad_norm": 3.5159313678741455, "learning_rate": 1.9752833281010883e-05, "loss": 1.0, "step": 4475 }, { "epoch": 0.7307456838496388, "grad_norm": 2.8727755546569824, "learning_rate": 1.9752691595665085e-05, "loss": 0.8558, "step": 4476 }, { "epoch": 0.7309089424921432, "grad_norm": 2.5865814685821533, "learning_rate": 1.9752549870229658e-05, "loss": 0.6957, "step": 4477 }, { "epoch": 0.7310722011346475, "grad_norm": 3.1544313430786133, "learning_rate": 1.975240810470518e-05, "loss": 0.8933, "step": 4478 }, { "epoch": 0.731235459777152, "grad_norm": 3.2032065391540527, "learning_rate": 1.9752266299092234e-05, "loss": 0.9037, "step": 4479 }, { "epoch": 0.7313987184196563, "grad_norm": 3.0723252296447754, "learning_rate": 1.9752124453391406e-05, "loss": 0.9207, "step": 4480 }, { "epoch": 0.7315619770621608, "grad_norm": 3.434664726257324, "learning_rate": 1.9751982567603277e-05, "loss": 0.9519, "step": 4481 }, { "epoch": 0.7317252357046651, "grad_norm": 2.9794812202453613, "learning_rate": 1.9751840641728426e-05, "loss": 1.0177, "step": 4482 }, { "epoch": 0.7318884943471695, "grad_norm": 2.8404340744018555, "learning_rate": 1.975169867576744e-05, "loss": 0.8954, "step": 4483 }, { "epoch": 0.7320517529896738, "grad_norm": 3.1964714527130127, "learning_rate": 1.975155666972091e-05, "loss": 0.9286, "step": 4484 }, { "epoch": 0.7322150116321783, "grad_norm": 2.8991293907165527, "learning_rate": 1.9751414623589407e-05, "loss": 0.8438, "step": 4485 }, { "epoch": 0.7323782702746827, "grad_norm": 3.0580873489379883, "learning_rate": 1.975127253737352e-05, "loss": 0.8704, "step": 4486 }, { "epoch": 0.732541528917187, "grad_norm": 3.103972911834717, "learning_rate": 1.9751130411073834e-05, "loss": 0.9927, "step": 4487 }, { "epoch": 0.7327047875596915, "grad_norm": 3.022350549697876, "learning_rate": 1.9750988244690937e-05, "loss": 0.9156, "step": 4488 }, { "epoch": 0.7328680462021958, "grad_norm": 3.926051139831543, "learning_rate": 1.9750846038225404e-05, "loss": 0.9188, "step": 4489 }, { "epoch": 0.7330313048447002, "grad_norm": 3.1802499294281006, "learning_rate": 1.975070379167783e-05, "loss": 0.8569, "step": 4490 }, { "epoch": 0.7331945634872046, "grad_norm": 2.8006045818328857, "learning_rate": 1.975056150504879e-05, "loss": 0.8317, "step": 4491 }, { "epoch": 0.733357822129709, "grad_norm": 2.6827309131622314, "learning_rate": 1.9750419178338872e-05, "loss": 0.7787, "step": 4492 }, { "epoch": 0.7335210807722133, "grad_norm": 3.0113370418548584, "learning_rate": 1.9750276811548668e-05, "loss": 0.8072, "step": 4493 }, { "epoch": 0.7336843394147178, "grad_norm": 2.7152605056762695, "learning_rate": 1.9750134404678754e-05, "loss": 0.8592, "step": 4494 }, { "epoch": 0.7338475980572221, "grad_norm": 3.015648365020752, "learning_rate": 1.9749991957729722e-05, "loss": 0.7089, "step": 4495 }, { "epoch": 0.7340108566997265, "grad_norm": 3.1454131603240967, "learning_rate": 1.9749849470702154e-05, "loss": 0.8431, "step": 4496 }, { "epoch": 0.734174115342231, "grad_norm": 3.2659904956817627, "learning_rate": 1.9749706943596633e-05, "loss": 0.8444, "step": 4497 }, { "epoch": 0.7343373739847353, "grad_norm": 3.299321174621582, "learning_rate": 1.974956437641375e-05, "loss": 0.9056, "step": 4498 }, { "epoch": 0.7345006326272397, "grad_norm": 2.731739044189453, "learning_rate": 1.974942176915409e-05, "loss": 0.836, "step": 4499 }, { "epoch": 0.7346638912697441, "grad_norm": 3.143937110900879, "learning_rate": 1.9749279121818235e-05, "loss": 0.9515, "step": 4500 }, { "epoch": 0.7348271499122485, "grad_norm": 3.044679641723633, "learning_rate": 1.9749136434406782e-05, "loss": 0.8944, "step": 4501 }, { "epoch": 0.7349904085547528, "grad_norm": 2.8639938831329346, "learning_rate": 1.9748993706920302e-05, "loss": 0.7279, "step": 4502 }, { "epoch": 0.7351536671972573, "grad_norm": 2.8554985523223877, "learning_rate": 1.9748850939359396e-05, "loss": 0.9078, "step": 4503 }, { "epoch": 0.7353169258397616, "grad_norm": 2.5712380409240723, "learning_rate": 1.9748708131724643e-05, "loss": 0.6073, "step": 4504 }, { "epoch": 0.735480184482266, "grad_norm": 2.8705801963806152, "learning_rate": 1.9748565284016632e-05, "loss": 0.8589, "step": 4505 }, { "epoch": 0.7356434431247704, "grad_norm": 2.9186313152313232, "learning_rate": 1.974842239623595e-05, "loss": 0.8881, "step": 4506 }, { "epoch": 0.7358067017672748, "grad_norm": 2.7583019733428955, "learning_rate": 1.9748279468383183e-05, "loss": 0.725, "step": 4507 }, { "epoch": 0.7359699604097792, "grad_norm": 3.0153660774230957, "learning_rate": 1.9748136500458924e-05, "loss": 0.8657, "step": 4508 }, { "epoch": 0.7361332190522836, "grad_norm": 3.0911099910736084, "learning_rate": 1.974799349246375e-05, "loss": 0.8501, "step": 4509 }, { "epoch": 0.736296477694788, "grad_norm": 2.574275255203247, "learning_rate": 1.9747850444398263e-05, "loss": 0.7709, "step": 4510 }, { "epoch": 0.7364597363372923, "grad_norm": 3.402535915374756, "learning_rate": 1.974770735626304e-05, "loss": 0.9339, "step": 4511 }, { "epoch": 0.7366229949797968, "grad_norm": 3.0721280574798584, "learning_rate": 1.9747564228058673e-05, "loss": 0.9929, "step": 4512 }, { "epoch": 0.7367862536223011, "grad_norm": 3.225571393966675, "learning_rate": 1.974742105978575e-05, "loss": 0.7351, "step": 4513 }, { "epoch": 0.7369495122648055, "grad_norm": 3.2841808795928955, "learning_rate": 1.974727785144486e-05, "loss": 0.9611, "step": 4514 }, { "epoch": 0.7371127709073099, "grad_norm": 2.812267780303955, "learning_rate": 1.974713460303659e-05, "loss": 0.8681, "step": 4515 }, { "epoch": 0.7372760295498143, "grad_norm": 2.554666757583618, "learning_rate": 1.974699131456153e-05, "loss": 0.6907, "step": 4516 }, { "epoch": 0.7374392881923186, "grad_norm": 3.1210622787475586, "learning_rate": 1.9746847986020274e-05, "loss": 0.9524, "step": 4517 }, { "epoch": 0.7376025468348231, "grad_norm": 3.0147149562835693, "learning_rate": 1.97467046174134e-05, "loss": 0.8249, "step": 4518 }, { "epoch": 0.7377658054773275, "grad_norm": 3.2324743270874023, "learning_rate": 1.9746561208741507e-05, "loss": 0.9179, "step": 4519 }, { "epoch": 0.7379290641198318, "grad_norm": 3.1642544269561768, "learning_rate": 1.974641776000518e-05, "loss": 0.9309, "step": 4520 }, { "epoch": 0.7380923227623363, "grad_norm": 3.169560194015503, "learning_rate": 1.9746274271205008e-05, "loss": 0.817, "step": 4521 }, { "epoch": 0.7382555814048406, "grad_norm": 2.8252670764923096, "learning_rate": 1.9746130742341585e-05, "loss": 0.8356, "step": 4522 }, { "epoch": 0.738418840047345, "grad_norm": 2.6304569244384766, "learning_rate": 1.9745987173415496e-05, "loss": 0.7146, "step": 4523 }, { "epoch": 0.7385820986898494, "grad_norm": 2.981560230255127, "learning_rate": 1.9745843564427337e-05, "loss": 0.877, "step": 4524 }, { "epoch": 0.7387453573323538, "grad_norm": 2.8826088905334473, "learning_rate": 1.9745699915377695e-05, "loss": 0.9028, "step": 4525 }, { "epoch": 0.7389086159748581, "grad_norm": 3.099393844604492, "learning_rate": 1.9745556226267158e-05, "loss": 0.8235, "step": 4526 }, { "epoch": 0.7390718746173626, "grad_norm": 3.238020658493042, "learning_rate": 1.974541249709632e-05, "loss": 0.9927, "step": 4527 }, { "epoch": 0.7392351332598669, "grad_norm": 2.965121030807495, "learning_rate": 1.9745268727865774e-05, "loss": 0.8689, "step": 4528 }, { "epoch": 0.7393983919023713, "grad_norm": 2.589355707168579, "learning_rate": 1.9745124918576105e-05, "loss": 0.7022, "step": 4529 }, { "epoch": 0.7395616505448758, "grad_norm": 3.888036012649536, "learning_rate": 1.9744981069227907e-05, "loss": 0.9218, "step": 4530 }, { "epoch": 0.7397249091873801, "grad_norm": 3.27158784866333, "learning_rate": 1.9744837179821773e-05, "loss": 0.7703, "step": 4531 }, { "epoch": 0.7398881678298845, "grad_norm": 3.177152633666992, "learning_rate": 1.9744693250358292e-05, "loss": 0.855, "step": 4532 }, { "epoch": 0.7400514264723889, "grad_norm": 2.4691286087036133, "learning_rate": 1.9744549280838057e-05, "loss": 0.6578, "step": 4533 }, { "epoch": 0.7402146851148933, "grad_norm": 2.8739542961120605, "learning_rate": 1.9744405271261658e-05, "loss": 0.8606, "step": 4534 }, { "epoch": 0.7403779437573976, "grad_norm": 2.7565975189208984, "learning_rate": 1.974426122162969e-05, "loss": 0.9278, "step": 4535 }, { "epoch": 0.7405412023999021, "grad_norm": 2.9536964893341064, "learning_rate": 1.9744117131942745e-05, "loss": 0.7994, "step": 4536 }, { "epoch": 0.7407044610424064, "grad_norm": 2.6937789916992188, "learning_rate": 1.974397300220141e-05, "loss": 0.9238, "step": 4537 }, { "epoch": 0.7408677196849108, "grad_norm": 3.6159186363220215, "learning_rate": 1.9743828832406283e-05, "loss": 1.183, "step": 4538 }, { "epoch": 0.7410309783274152, "grad_norm": 3.326282262802124, "learning_rate": 1.9743684622557956e-05, "loss": 1.02, "step": 4539 }, { "epoch": 0.7411942369699196, "grad_norm": 2.603264570236206, "learning_rate": 1.974354037265702e-05, "loss": 0.6577, "step": 4540 }, { "epoch": 0.741357495612424, "grad_norm": 3.0719332695007324, "learning_rate": 1.9743396082704067e-05, "loss": 1.0014, "step": 4541 }, { "epoch": 0.7415207542549284, "grad_norm": 3.0497806072235107, "learning_rate": 1.974325175269969e-05, "loss": 0.9513, "step": 4542 }, { "epoch": 0.7416840128974328, "grad_norm": 3.142392158508301, "learning_rate": 1.974310738264449e-05, "loss": 0.9044, "step": 4543 }, { "epoch": 0.7418472715399371, "grad_norm": 2.788339376449585, "learning_rate": 1.9742962972539052e-05, "loss": 0.9018, "step": 4544 }, { "epoch": 0.7420105301824416, "grad_norm": 2.8714687824249268, "learning_rate": 1.9742818522383975e-05, "loss": 0.8401, "step": 4545 }, { "epoch": 0.7421737888249459, "grad_norm": 3.31032133102417, "learning_rate": 1.9742674032179844e-05, "loss": 0.7322, "step": 4546 }, { "epoch": 0.7423370474674503, "grad_norm": 2.7193267345428467, "learning_rate": 1.9742529501927258e-05, "loss": 0.7611, "step": 4547 }, { "epoch": 0.7425003061099547, "grad_norm": 3.372225522994995, "learning_rate": 1.9742384931626818e-05, "loss": 1.0198, "step": 4548 }, { "epoch": 0.7426635647524591, "grad_norm": 2.904956817626953, "learning_rate": 1.974224032127911e-05, "loss": 0.7626, "step": 4549 }, { "epoch": 0.7428268233949634, "grad_norm": 2.992483377456665, "learning_rate": 1.974209567088473e-05, "loss": 0.8807, "step": 4550 }, { "epoch": 0.7429900820374679, "grad_norm": 3.3100438117980957, "learning_rate": 1.9741950980444274e-05, "loss": 1.0037, "step": 4551 }, { "epoch": 0.7431533406799723, "grad_norm": 2.761492967605591, "learning_rate": 1.9741806249958336e-05, "loss": 0.8371, "step": 4552 }, { "epoch": 0.7433165993224766, "grad_norm": 3.2189767360687256, "learning_rate": 1.974166147942751e-05, "loss": 0.798, "step": 4553 }, { "epoch": 0.7434798579649811, "grad_norm": 2.693012237548828, "learning_rate": 1.9741516668852392e-05, "loss": 0.7092, "step": 4554 }, { "epoch": 0.7436431166074854, "grad_norm": 2.776777744293213, "learning_rate": 1.9741371818233577e-05, "loss": 0.9238, "step": 4555 }, { "epoch": 0.7438063752499898, "grad_norm": 3.086418628692627, "learning_rate": 1.974122692757166e-05, "loss": 0.9131, "step": 4556 }, { "epoch": 0.7439696338924942, "grad_norm": 3.1763367652893066, "learning_rate": 1.974108199686724e-05, "loss": 0.7816, "step": 4557 }, { "epoch": 0.7441328925349986, "grad_norm": 2.642233371734619, "learning_rate": 1.9740937026120908e-05, "loss": 0.7455, "step": 4558 }, { "epoch": 0.7442961511775029, "grad_norm": 2.8469667434692383, "learning_rate": 1.9740792015333262e-05, "loss": 0.938, "step": 4559 }, { "epoch": 0.7444594098200074, "grad_norm": 2.804379940032959, "learning_rate": 1.9740646964504902e-05, "loss": 0.8288, "step": 4560 }, { "epoch": 0.7446226684625117, "grad_norm": 3.0780608654022217, "learning_rate": 1.9740501873636414e-05, "loss": 0.9376, "step": 4561 }, { "epoch": 0.7447859271050161, "grad_norm": 2.8596744537353516, "learning_rate": 1.9740356742728407e-05, "loss": 0.7746, "step": 4562 }, { "epoch": 0.7449491857475206, "grad_norm": 2.6431875228881836, "learning_rate": 1.9740211571781468e-05, "loss": 0.6933, "step": 4563 }, { "epoch": 0.7451124443900249, "grad_norm": 2.6708667278289795, "learning_rate": 1.9740066360796196e-05, "loss": 0.7662, "step": 4564 }, { "epoch": 0.7452757030325293, "grad_norm": 2.8367183208465576, "learning_rate": 1.973992110977319e-05, "loss": 0.7337, "step": 4565 }, { "epoch": 0.7454389616750337, "grad_norm": 3.3064393997192383, "learning_rate": 1.9739775818713046e-05, "loss": 1.0298, "step": 4566 }, { "epoch": 0.7456022203175381, "grad_norm": 2.4990663528442383, "learning_rate": 1.973963048761636e-05, "loss": 0.6468, "step": 4567 }, { "epoch": 0.7457654789600424, "grad_norm": 2.928208827972412, "learning_rate": 1.9739485116483736e-05, "loss": 0.8106, "step": 4568 }, { "epoch": 0.7459287376025469, "grad_norm": 3.247393846511841, "learning_rate": 1.9739339705315762e-05, "loss": 0.9092, "step": 4569 }, { "epoch": 0.7460919962450512, "grad_norm": 3.371189594268799, "learning_rate": 1.973919425411304e-05, "loss": 0.9339, "step": 4570 }, { "epoch": 0.7462552548875556, "grad_norm": 3.4718074798583984, "learning_rate": 1.9739048762876168e-05, "loss": 0.9416, "step": 4571 }, { "epoch": 0.74641851353006, "grad_norm": 3.404503107070923, "learning_rate": 1.9738903231605747e-05, "loss": 0.9305, "step": 4572 }, { "epoch": 0.7465817721725644, "grad_norm": 3.261871337890625, "learning_rate": 1.9738757660302366e-05, "loss": 0.8144, "step": 4573 }, { "epoch": 0.7467450308150688, "grad_norm": 3.2933170795440674, "learning_rate": 1.9738612048966636e-05, "loss": 0.9186, "step": 4574 }, { "epoch": 0.7469082894575731, "grad_norm": 3.228355646133423, "learning_rate": 1.9738466397599144e-05, "loss": 0.8748, "step": 4575 }, { "epoch": 0.7470715481000776, "grad_norm": 3.027123212814331, "learning_rate": 1.9738320706200498e-05, "loss": 0.8585, "step": 4576 }, { "epoch": 0.7472348067425819, "grad_norm": 3.169814109802246, "learning_rate": 1.9738174974771288e-05, "loss": 0.8341, "step": 4577 }, { "epoch": 0.7473980653850864, "grad_norm": 3.155911684036255, "learning_rate": 1.9738029203312125e-05, "loss": 0.9627, "step": 4578 }, { "epoch": 0.7475613240275907, "grad_norm": 2.412130117416382, "learning_rate": 1.9737883391823597e-05, "loss": 0.708, "step": 4579 }, { "epoch": 0.7477245826700951, "grad_norm": 2.8634274005889893, "learning_rate": 1.9737737540306305e-05, "loss": 0.8591, "step": 4580 }, { "epoch": 0.7478878413125994, "grad_norm": 2.9340834617614746, "learning_rate": 1.973759164876085e-05, "loss": 0.803, "step": 4581 }, { "epoch": 0.7480510999551039, "grad_norm": 3.0708041191101074, "learning_rate": 1.9737445717187836e-05, "loss": 0.8395, "step": 4582 }, { "epoch": 0.7482143585976082, "grad_norm": 2.873349905014038, "learning_rate": 1.973729974558786e-05, "loss": 0.9403, "step": 4583 }, { "epoch": 0.7483776172401126, "grad_norm": 2.513280153274536, "learning_rate": 1.973715373396152e-05, "loss": 0.6564, "step": 4584 }, { "epoch": 0.7485408758826171, "grad_norm": 2.680238723754883, "learning_rate": 1.9737007682309417e-05, "loss": 0.7743, "step": 4585 }, { "epoch": 0.7487041345251214, "grad_norm": 2.7087013721466064, "learning_rate": 1.9736861590632155e-05, "loss": 0.7456, "step": 4586 }, { "epoch": 0.7488673931676258, "grad_norm": 2.6656365394592285, "learning_rate": 1.973671545893033e-05, "loss": 0.8438, "step": 4587 }, { "epoch": 0.7490306518101302, "grad_norm": 2.7323479652404785, "learning_rate": 1.9736569287204544e-05, "loss": 0.7746, "step": 4588 }, { "epoch": 0.7491939104526346, "grad_norm": 2.7549755573272705, "learning_rate": 1.97364230754554e-05, "loss": 0.7012, "step": 4589 }, { "epoch": 0.7493571690951389, "grad_norm": 2.8886313438415527, "learning_rate": 1.9736276823683492e-05, "loss": 0.8474, "step": 4590 }, { "epoch": 0.7495204277376434, "grad_norm": 3.0971529483795166, "learning_rate": 1.973613053188943e-05, "loss": 0.9706, "step": 4591 }, { "epoch": 0.7496836863801477, "grad_norm": 3.3024935722351074, "learning_rate": 1.973598420007381e-05, "loss": 0.8999, "step": 4592 }, { "epoch": 0.7498469450226521, "grad_norm": 2.7614142894744873, "learning_rate": 1.9735837828237235e-05, "loss": 0.8492, "step": 4593 }, { "epoch": 0.7500102036651565, "grad_norm": 3.111835479736328, "learning_rate": 1.9735691416380313e-05, "loss": 0.9671, "step": 4594 }, { "epoch": 0.7501734623076609, "grad_norm": 3.544055223464966, "learning_rate": 1.973554496450363e-05, "loss": 0.8364, "step": 4595 }, { "epoch": 0.7503367209501653, "grad_norm": 3.089153289794922, "learning_rate": 1.9735398472607805e-05, "loss": 0.7804, "step": 4596 }, { "epoch": 0.7504999795926697, "grad_norm": 3.084481954574585, "learning_rate": 1.973525194069343e-05, "loss": 0.971, "step": 4597 }, { "epoch": 0.7506632382351741, "grad_norm": 2.710116386413574, "learning_rate": 1.973510536876111e-05, "loss": 0.8662, "step": 4598 }, { "epoch": 0.7508264968776784, "grad_norm": 2.7028698921203613, "learning_rate": 1.9734958756811448e-05, "loss": 0.9354, "step": 4599 }, { "epoch": 0.7509897555201829, "grad_norm": 3.1277658939361572, "learning_rate": 1.973481210484505e-05, "loss": 0.9338, "step": 4600 }, { "epoch": 0.7511530141626872, "grad_norm": 2.7198033332824707, "learning_rate": 1.9734665412862514e-05, "loss": 0.7281, "step": 4601 }, { "epoch": 0.7513162728051916, "grad_norm": 2.798750638961792, "learning_rate": 1.973451868086444e-05, "loss": 0.8421, "step": 4602 }, { "epoch": 0.751479531447696, "grad_norm": 3.1397228240966797, "learning_rate": 1.9734371908851437e-05, "loss": 0.9922, "step": 4603 }, { "epoch": 0.7516427900902004, "grad_norm": 2.7729876041412354, "learning_rate": 1.973422509682411e-05, "loss": 0.8214, "step": 4604 }, { "epoch": 0.7518060487327047, "grad_norm": 2.8127546310424805, "learning_rate": 1.9734078244783053e-05, "loss": 0.7702, "step": 4605 }, { "epoch": 0.7519693073752092, "grad_norm": 2.5734686851501465, "learning_rate": 1.9733931352728882e-05, "loss": 0.7365, "step": 4606 }, { "epoch": 0.7521325660177136, "grad_norm": 2.6576218605041504, "learning_rate": 1.9733784420662193e-05, "loss": 0.7534, "step": 4607 }, { "epoch": 0.7522958246602179, "grad_norm": 2.856638193130493, "learning_rate": 1.973363744858359e-05, "loss": 0.7532, "step": 4608 }, { "epoch": 0.7524590833027224, "grad_norm": 2.8904829025268555, "learning_rate": 1.9733490436493682e-05, "loss": 0.801, "step": 4609 }, { "epoch": 0.7526223419452267, "grad_norm": 2.7883217334747314, "learning_rate": 1.9733343384393066e-05, "loss": 0.8506, "step": 4610 }, { "epoch": 0.7527856005877311, "grad_norm": 2.853619337081909, "learning_rate": 1.973319629228235e-05, "loss": 0.9254, "step": 4611 }, { "epoch": 0.7529488592302355, "grad_norm": 2.9610707759857178, "learning_rate": 1.9733049160162143e-05, "loss": 0.9238, "step": 4612 }, { "epoch": 0.7531121178727399, "grad_norm": 2.5652356147766113, "learning_rate": 1.9732901988033045e-05, "loss": 0.6703, "step": 4613 }, { "epoch": 0.7532753765152442, "grad_norm": 3.0040595531463623, "learning_rate": 1.973275477589566e-05, "loss": 0.9424, "step": 4614 }, { "epoch": 0.7534386351577487, "grad_norm": 2.923064708709717, "learning_rate": 1.97326075237506e-05, "loss": 0.8018, "step": 4615 }, { "epoch": 0.753601893800253, "grad_norm": 2.946559429168701, "learning_rate": 1.9732460231598464e-05, "loss": 0.8395, "step": 4616 }, { "epoch": 0.7537651524427574, "grad_norm": 2.8072752952575684, "learning_rate": 1.9732312899439855e-05, "loss": 0.7486, "step": 4617 }, { "epoch": 0.7539284110852619, "grad_norm": 2.9845240116119385, "learning_rate": 1.9732165527275385e-05, "loss": 0.9302, "step": 4618 }, { "epoch": 0.7540916697277662, "grad_norm": 3.049736738204956, "learning_rate": 1.9732018115105655e-05, "loss": 1.0375, "step": 4619 }, { "epoch": 0.7542549283702706, "grad_norm": 3.2597646713256836, "learning_rate": 1.973187066293128e-05, "loss": 0.9344, "step": 4620 }, { "epoch": 0.754418187012775, "grad_norm": 2.5274436473846436, "learning_rate": 1.9731723170752853e-05, "loss": 0.7808, "step": 4621 }, { "epoch": 0.7545814456552794, "grad_norm": 3.7703065872192383, "learning_rate": 1.9731575638570984e-05, "loss": 0.8012, "step": 4622 }, { "epoch": 0.7547447042977837, "grad_norm": 3.3755245208740234, "learning_rate": 1.973142806638629e-05, "loss": 0.7542, "step": 4623 }, { "epoch": 0.7549079629402882, "grad_norm": 3.1431164741516113, "learning_rate": 1.9731280454199365e-05, "loss": 0.7363, "step": 4624 }, { "epoch": 0.7550712215827925, "grad_norm": 2.817899227142334, "learning_rate": 1.973113280201082e-05, "loss": 0.7974, "step": 4625 }, { "epoch": 0.7552344802252969, "grad_norm": 2.7658369541168213, "learning_rate": 1.9730985109821268e-05, "loss": 0.7399, "step": 4626 }, { "epoch": 0.7553977388678013, "grad_norm": 3.0406525135040283, "learning_rate": 1.9730837377631305e-05, "loss": 0.8355, "step": 4627 }, { "epoch": 0.7555609975103057, "grad_norm": 3.356077194213867, "learning_rate": 1.9730689605441547e-05, "loss": 0.791, "step": 4628 }, { "epoch": 0.7557242561528101, "grad_norm": 2.1217854022979736, "learning_rate": 1.9730541793252595e-05, "loss": 0.5569, "step": 4629 }, { "epoch": 0.7558875147953145, "grad_norm": 3.740302324295044, "learning_rate": 1.9730393941065064e-05, "loss": 1.1101, "step": 4630 }, { "epoch": 0.7560507734378189, "grad_norm": 3.515331506729126, "learning_rate": 1.9730246048879556e-05, "loss": 1.1678, "step": 4631 }, { "epoch": 0.7562140320803232, "grad_norm": 2.915503740310669, "learning_rate": 1.973009811669668e-05, "loss": 0.7818, "step": 4632 }, { "epoch": 0.7563772907228277, "grad_norm": 3.3744149208068848, "learning_rate": 1.9729950144517044e-05, "loss": 1.0538, "step": 4633 }, { "epoch": 0.756540549365332, "grad_norm": 3.304849624633789, "learning_rate": 1.972980213234126e-05, "loss": 0.7951, "step": 4634 }, { "epoch": 0.7567038080078364, "grad_norm": 2.915463447570801, "learning_rate": 1.972965408016993e-05, "loss": 0.8194, "step": 4635 }, { "epoch": 0.7568670666503408, "grad_norm": 2.4046759605407715, "learning_rate": 1.9729505988003667e-05, "loss": 0.668, "step": 4636 }, { "epoch": 0.7570303252928452, "grad_norm": 2.9441144466400146, "learning_rate": 1.9729357855843076e-05, "loss": 0.9633, "step": 4637 }, { "epoch": 0.7571935839353495, "grad_norm": 2.8586130142211914, "learning_rate": 1.972920968368877e-05, "loss": 0.9248, "step": 4638 }, { "epoch": 0.757356842577854, "grad_norm": 3.131542921066284, "learning_rate": 1.9729061471541362e-05, "loss": 0.8981, "step": 4639 }, { "epoch": 0.7575201012203584, "grad_norm": 2.968519449234009, "learning_rate": 1.972891321940145e-05, "loss": 0.851, "step": 4640 }, { "epoch": 0.7576833598628627, "grad_norm": 2.536651849746704, "learning_rate": 1.9728764927269652e-05, "loss": 0.7364, "step": 4641 }, { "epoch": 0.7578466185053672, "grad_norm": 2.8570098876953125, "learning_rate": 1.9728616595146572e-05, "loss": 0.8192, "step": 4642 }, { "epoch": 0.7580098771478715, "grad_norm": 2.1870174407958984, "learning_rate": 1.9728468223032826e-05, "loss": 0.5477, "step": 4643 }, { "epoch": 0.7581731357903759, "grad_norm": 2.8475894927978516, "learning_rate": 1.972831981092902e-05, "loss": 0.7936, "step": 4644 }, { "epoch": 0.7583363944328803, "grad_norm": 2.3268795013427734, "learning_rate": 1.9728171358835763e-05, "loss": 0.5486, "step": 4645 }, { "epoch": 0.7584996530753847, "grad_norm": 2.8584237098693848, "learning_rate": 1.9728022866753668e-05, "loss": 0.8233, "step": 4646 }, { "epoch": 0.758662911717889, "grad_norm": 2.834808111190796, "learning_rate": 1.9727874334683345e-05, "loss": 0.8046, "step": 4647 }, { "epoch": 0.7588261703603935, "grad_norm": 2.9559826850891113, "learning_rate": 1.9727725762625402e-05, "loss": 0.6642, "step": 4648 }, { "epoch": 0.7589894290028978, "grad_norm": 2.9578661918640137, "learning_rate": 1.9727577150580453e-05, "loss": 0.7774, "step": 4649 }, { "epoch": 0.7591526876454022, "grad_norm": 2.562744379043579, "learning_rate": 1.9727428498549105e-05, "loss": 0.7968, "step": 4650 }, { "epoch": 0.7593159462879067, "grad_norm": 3.0116560459136963, "learning_rate": 1.9727279806531975e-05, "loss": 0.8667, "step": 4651 }, { "epoch": 0.759479204930411, "grad_norm": 2.7177014350891113, "learning_rate": 1.9727131074529668e-05, "loss": 0.6859, "step": 4652 }, { "epoch": 0.7596424635729154, "grad_norm": 2.5460197925567627, "learning_rate": 1.9726982302542798e-05, "loss": 0.6943, "step": 4653 }, { "epoch": 0.7598057222154198, "grad_norm": 2.931213855743408, "learning_rate": 1.972683349057198e-05, "loss": 0.7383, "step": 4654 }, { "epoch": 0.7599689808579242, "grad_norm": 3.1818058490753174, "learning_rate": 1.9726684638617816e-05, "loss": 0.9733, "step": 4655 }, { "epoch": 0.7601322395004285, "grad_norm": 3.125507354736328, "learning_rate": 1.972653574668093e-05, "loss": 0.9046, "step": 4656 }, { "epoch": 0.760295498142933, "grad_norm": 2.861382007598877, "learning_rate": 1.9726386814761927e-05, "loss": 0.9308, "step": 4657 }, { "epoch": 0.7604587567854373, "grad_norm": 2.88645076751709, "learning_rate": 1.972623784286142e-05, "loss": 0.8372, "step": 4658 }, { "epoch": 0.7606220154279417, "grad_norm": 3.0539379119873047, "learning_rate": 1.972608883098002e-05, "loss": 0.892, "step": 4659 }, { "epoch": 0.760785274070446, "grad_norm": 3.036149501800537, "learning_rate": 1.9725939779118344e-05, "loss": 0.8484, "step": 4660 }, { "epoch": 0.7609485327129505, "grad_norm": 3.299076557159424, "learning_rate": 1.9725790687277e-05, "loss": 0.7638, "step": 4661 }, { "epoch": 0.7611117913554549, "grad_norm": 2.8815395832061768, "learning_rate": 1.9725641555456602e-05, "loss": 0.9246, "step": 4662 }, { "epoch": 0.7612750499979593, "grad_norm": 2.7091939449310303, "learning_rate": 1.9725492383657767e-05, "loss": 0.7448, "step": 4663 }, { "epoch": 0.7614383086404637, "grad_norm": 2.972435474395752, "learning_rate": 1.9725343171881105e-05, "loss": 0.8611, "step": 4664 }, { "epoch": 0.761601567282968, "grad_norm": 3.416217565536499, "learning_rate": 1.9725193920127225e-05, "loss": 0.9457, "step": 4665 }, { "epoch": 0.7617648259254725, "grad_norm": 3.538658380508423, "learning_rate": 1.972504462839675e-05, "loss": 1.0024, "step": 4666 }, { "epoch": 0.7619280845679768, "grad_norm": 2.7346396446228027, "learning_rate": 1.9724895296690286e-05, "loss": 0.8856, "step": 4667 }, { "epoch": 0.7620913432104812, "grad_norm": 2.971402645111084, "learning_rate": 1.972474592500845e-05, "loss": 0.7661, "step": 4668 }, { "epoch": 0.7622546018529855, "grad_norm": 3.0064022541046143, "learning_rate": 1.9724596513351856e-05, "loss": 0.9666, "step": 4669 }, { "epoch": 0.76241786049549, "grad_norm": 2.890916347503662, "learning_rate": 1.9724447061721118e-05, "loss": 0.7716, "step": 4670 }, { "epoch": 0.7625811191379943, "grad_norm": 3.435814380645752, "learning_rate": 1.9724297570116852e-05, "loss": 1.0143, "step": 4671 }, { "epoch": 0.7627443777804988, "grad_norm": 2.572913408279419, "learning_rate": 1.9724148038539665e-05, "loss": 0.7086, "step": 4672 }, { "epoch": 0.7629076364230032, "grad_norm": 3.011331081390381, "learning_rate": 1.972399846699018e-05, "loss": 0.9014, "step": 4673 }, { "epoch": 0.7630708950655075, "grad_norm": 2.8813672065734863, "learning_rate": 1.972384885546901e-05, "loss": 0.7208, "step": 4674 }, { "epoch": 0.763234153708012, "grad_norm": 2.7405543327331543, "learning_rate": 1.9723699203976768e-05, "loss": 0.844, "step": 4675 }, { "epoch": 0.7633974123505163, "grad_norm": 2.7918550968170166, "learning_rate": 1.972354951251407e-05, "loss": 0.8144, "step": 4676 }, { "epoch": 0.7635606709930207, "grad_norm": 2.7523512840270996, "learning_rate": 1.972339978108153e-05, "loss": 0.7105, "step": 4677 }, { "epoch": 0.763723929635525, "grad_norm": 3.974275827407837, "learning_rate": 1.972325000967977e-05, "loss": 0.8866, "step": 4678 }, { "epoch": 0.7638871882780295, "grad_norm": 3.354917287826538, "learning_rate": 1.9723100198309394e-05, "loss": 0.8625, "step": 4679 }, { "epoch": 0.7640504469205338, "grad_norm": 2.554011821746826, "learning_rate": 1.9722950346971032e-05, "loss": 0.6601, "step": 4680 }, { "epoch": 0.7642137055630382, "grad_norm": 2.98124361038208, "learning_rate": 1.9722800455665284e-05, "loss": 0.8604, "step": 4681 }, { "epoch": 0.7643769642055427, "grad_norm": 2.4968769550323486, "learning_rate": 1.972265052439278e-05, "loss": 0.7518, "step": 4682 }, { "epoch": 0.764540222848047, "grad_norm": 3.0315818786621094, "learning_rate": 1.972250055315413e-05, "loss": 0.9017, "step": 4683 }, { "epoch": 0.7647034814905515, "grad_norm": 3.0198252201080322, "learning_rate": 1.972235054194995e-05, "loss": 0.9195, "step": 4684 }, { "epoch": 0.7648667401330558, "grad_norm": 2.679103136062622, "learning_rate": 1.9722200490780863e-05, "loss": 0.7471, "step": 4685 }, { "epoch": 0.7650299987755602, "grad_norm": 3.515249729156494, "learning_rate": 1.9722050399647473e-05, "loss": 0.953, "step": 4686 }, { "epoch": 0.7651932574180645, "grad_norm": 3.1638848781585693, "learning_rate": 1.9721900268550412e-05, "loss": 0.9296, "step": 4687 }, { "epoch": 0.765356516060569, "grad_norm": 2.605095863342285, "learning_rate": 1.9721750097490285e-05, "loss": 0.8605, "step": 4688 }, { "epoch": 0.7655197747030733, "grad_norm": 2.487682580947876, "learning_rate": 1.9721599886467716e-05, "loss": 0.721, "step": 4689 }, { "epoch": 0.7656830333455777, "grad_norm": 2.6693665981292725, "learning_rate": 1.9721449635483322e-05, "loss": 0.8054, "step": 4690 }, { "epoch": 0.7658462919880821, "grad_norm": 3.0715410709381104, "learning_rate": 1.972129934453772e-05, "loss": 0.8602, "step": 4691 }, { "epoch": 0.7660095506305865, "grad_norm": 2.968975067138672, "learning_rate": 1.9721149013631522e-05, "loss": 0.8536, "step": 4692 }, { "epoch": 0.766172809273091, "grad_norm": 3.643099069595337, "learning_rate": 1.9720998642765356e-05, "loss": 1.0929, "step": 4693 }, { "epoch": 0.7663360679155953, "grad_norm": 2.460289239883423, "learning_rate": 1.9720848231939834e-05, "loss": 0.5955, "step": 4694 }, { "epoch": 0.7664993265580997, "grad_norm": 2.8343307971954346, "learning_rate": 1.9720697781155574e-05, "loss": 0.8646, "step": 4695 }, { "epoch": 0.766662585200604, "grad_norm": 2.7531609535217285, "learning_rate": 1.9720547290413195e-05, "loss": 0.7873, "step": 4696 }, { "epoch": 0.7668258438431085, "grad_norm": 2.635080575942993, "learning_rate": 1.972039675971332e-05, "loss": 0.7822, "step": 4697 }, { "epoch": 0.7669891024856128, "grad_norm": 3.287172317504883, "learning_rate": 1.9720246189056562e-05, "loss": 0.8572, "step": 4698 }, { "epoch": 0.7671523611281172, "grad_norm": 3.080866575241089, "learning_rate": 1.9720095578443544e-05, "loss": 0.7787, "step": 4699 }, { "epoch": 0.7673156197706216, "grad_norm": 2.4449620246887207, "learning_rate": 1.971994492787488e-05, "loss": 0.757, "step": 4700 }, { "epoch": 0.767478878413126, "grad_norm": 3.1549696922302246, "learning_rate": 1.9719794237351196e-05, "loss": 0.9159, "step": 4701 }, { "epoch": 0.7676421370556303, "grad_norm": 2.984269618988037, "learning_rate": 1.971964350687311e-05, "loss": 0.8389, "step": 4702 }, { "epoch": 0.7678053956981348, "grad_norm": 3.281257152557373, "learning_rate": 1.9719492736441234e-05, "loss": 0.8962, "step": 4703 }, { "epoch": 0.7679686543406392, "grad_norm": 3.019685983657837, "learning_rate": 1.9719341926056193e-05, "loss": 0.8114, "step": 4704 }, { "epoch": 0.7681319129831435, "grad_norm": 3.1256136894226074, "learning_rate": 1.971919107571861e-05, "loss": 0.823, "step": 4705 }, { "epoch": 0.768295171625648, "grad_norm": 3.095655679702759, "learning_rate": 1.9719040185429103e-05, "loss": 0.8636, "step": 4706 }, { "epoch": 0.7684584302681523, "grad_norm": 2.572111129760742, "learning_rate": 1.971888925518829e-05, "loss": 0.6585, "step": 4707 }, { "epoch": 0.7686216889106567, "grad_norm": 3.2600247859954834, "learning_rate": 1.9718738284996797e-05, "loss": 0.8321, "step": 4708 }, { "epoch": 0.7687849475531611, "grad_norm": 2.71035099029541, "learning_rate": 1.9718587274855238e-05, "loss": 0.7061, "step": 4709 }, { "epoch": 0.7689482061956655, "grad_norm": 3.2977287769317627, "learning_rate": 1.9718436224764234e-05, "loss": 0.8151, "step": 4710 }, { "epoch": 0.7691114648381698, "grad_norm": 2.8070669174194336, "learning_rate": 1.971828513472441e-05, "loss": 0.7026, "step": 4711 }, { "epoch": 0.7692747234806743, "grad_norm": 2.9807004928588867, "learning_rate": 1.9718134004736384e-05, "loss": 0.8409, "step": 4712 }, { "epoch": 0.7694379821231786, "grad_norm": 2.894676923751831, "learning_rate": 1.9717982834800783e-05, "loss": 0.7134, "step": 4713 }, { "epoch": 0.769601240765683, "grad_norm": 3.6453282833099365, "learning_rate": 1.9717831624918223e-05, "loss": 0.8211, "step": 4714 }, { "epoch": 0.7697644994081875, "grad_norm": 2.6339447498321533, "learning_rate": 1.9717680375089324e-05, "loss": 0.6437, "step": 4715 }, { "epoch": 0.7699277580506918, "grad_norm": 2.869224786758423, "learning_rate": 1.9717529085314712e-05, "loss": 0.8662, "step": 4716 }, { "epoch": 0.7700910166931962, "grad_norm": 2.974276542663574, "learning_rate": 1.9717377755595004e-05, "loss": 0.7983, "step": 4717 }, { "epoch": 0.7702542753357006, "grad_norm": 2.918203353881836, "learning_rate": 1.971722638593083e-05, "loss": 0.8494, "step": 4718 }, { "epoch": 0.770417533978205, "grad_norm": 3.418139934539795, "learning_rate": 1.9717074976322803e-05, "loss": 0.7955, "step": 4719 }, { "epoch": 0.7705807926207093, "grad_norm": 3.4519190788269043, "learning_rate": 1.9716923526771552e-05, "loss": 0.9844, "step": 4720 }, { "epoch": 0.7707440512632138, "grad_norm": 3.195607900619507, "learning_rate": 1.9716772037277697e-05, "loss": 0.8492, "step": 4721 }, { "epoch": 0.7709073099057181, "grad_norm": 2.882479429244995, "learning_rate": 1.971662050784186e-05, "loss": 0.7841, "step": 4722 }, { "epoch": 0.7710705685482225, "grad_norm": 3.2786152362823486, "learning_rate": 1.971646893846467e-05, "loss": 1.1184, "step": 4723 }, { "epoch": 0.7712338271907269, "grad_norm": 3.239333152770996, "learning_rate": 1.971631732914674e-05, "loss": 0.9331, "step": 4724 }, { "epoch": 0.7713970858332313, "grad_norm": 2.4649734497070312, "learning_rate": 1.9716165679888702e-05, "loss": 0.7042, "step": 4725 }, { "epoch": 0.7715603444757357, "grad_norm": 2.827240228652954, "learning_rate": 1.9716013990691173e-05, "loss": 0.7768, "step": 4726 }, { "epoch": 0.7717236031182401, "grad_norm": 3.2281007766723633, "learning_rate": 1.971586226155478e-05, "loss": 0.9307, "step": 4727 }, { "epoch": 0.7718868617607445, "grad_norm": 2.6158177852630615, "learning_rate": 1.9715710492480147e-05, "loss": 0.7377, "step": 4728 }, { "epoch": 0.7720501204032488, "grad_norm": 2.966662645339966, "learning_rate": 1.9715558683467893e-05, "loss": 0.7462, "step": 4729 }, { "epoch": 0.7722133790457533, "grad_norm": 3.00727915763855, "learning_rate": 1.971540683451865e-05, "loss": 0.8997, "step": 4730 }, { "epoch": 0.7723766376882576, "grad_norm": 2.435283660888672, "learning_rate": 1.9715254945633035e-05, "loss": 0.7246, "step": 4731 }, { "epoch": 0.772539896330762, "grad_norm": 2.9273157119750977, "learning_rate": 1.9715103016811678e-05, "loss": 1.0547, "step": 4732 }, { "epoch": 0.7727031549732664, "grad_norm": 2.911414384841919, "learning_rate": 1.97149510480552e-05, "loss": 0.899, "step": 4733 }, { "epoch": 0.7728664136157708, "grad_norm": 3.0877058506011963, "learning_rate": 1.9714799039364227e-05, "loss": 0.8761, "step": 4734 }, { "epoch": 0.7730296722582751, "grad_norm": 3.100616216659546, "learning_rate": 1.971464699073938e-05, "loss": 0.9514, "step": 4735 }, { "epoch": 0.7731929309007796, "grad_norm": 2.602221727371216, "learning_rate": 1.971449490218129e-05, "loss": 0.7289, "step": 4736 }, { "epoch": 0.773356189543284, "grad_norm": 2.8497931957244873, "learning_rate": 1.971434277369058e-05, "loss": 0.9326, "step": 4737 }, { "epoch": 0.7735194481857883, "grad_norm": 3.3046035766601562, "learning_rate": 1.9714190605267875e-05, "loss": 0.798, "step": 4738 }, { "epoch": 0.7736827068282928, "grad_norm": 3.0945513248443604, "learning_rate": 1.9714038396913797e-05, "loss": 0.9448, "step": 4739 }, { "epoch": 0.7738459654707971, "grad_norm": 3.1851437091827393, "learning_rate": 1.971388614862898e-05, "loss": 1.1098, "step": 4740 }, { "epoch": 0.7740092241133015, "grad_norm": 2.8660154342651367, "learning_rate": 1.9713733860414043e-05, "loss": 0.8411, "step": 4741 }, { "epoch": 0.7741724827558059, "grad_norm": 3.3839197158813477, "learning_rate": 1.9713581532269614e-05, "loss": 1.0617, "step": 4742 }, { "epoch": 0.7743357413983103, "grad_norm": 2.6857268810272217, "learning_rate": 1.971342916419632e-05, "loss": 0.8211, "step": 4743 }, { "epoch": 0.7744990000408146, "grad_norm": 2.928813934326172, "learning_rate": 1.9713276756194783e-05, "loss": 0.9675, "step": 4744 }, { "epoch": 0.7746622586833191, "grad_norm": 2.222456455230713, "learning_rate": 1.9713124308265635e-05, "loss": 0.6374, "step": 4745 }, { "epoch": 0.7748255173258234, "grad_norm": 3.220747232437134, "learning_rate": 1.9712971820409502e-05, "loss": 1.0419, "step": 4746 }, { "epoch": 0.7749887759683278, "grad_norm": 3.082876443862915, "learning_rate": 1.9712819292627007e-05, "loss": 0.91, "step": 4747 }, { "epoch": 0.7751520346108323, "grad_norm": 3.262737274169922, "learning_rate": 1.971266672491878e-05, "loss": 0.928, "step": 4748 }, { "epoch": 0.7753152932533366, "grad_norm": 2.677565813064575, "learning_rate": 1.9712514117285447e-05, "loss": 0.8604, "step": 4749 }, { "epoch": 0.775478551895841, "grad_norm": 2.7867469787597656, "learning_rate": 1.971236146972764e-05, "loss": 0.8082, "step": 4750 }, { "epoch": 0.7756418105383454, "grad_norm": 2.6727049350738525, "learning_rate": 1.9712208782245978e-05, "loss": 0.7995, "step": 4751 }, { "epoch": 0.7758050691808498, "grad_norm": 2.656630754470825, "learning_rate": 1.9712056054841094e-05, "loss": 0.7226, "step": 4752 }, { "epoch": 0.7759683278233541, "grad_norm": 3.4746482372283936, "learning_rate": 1.9711903287513616e-05, "loss": 0.8061, "step": 4753 }, { "epoch": 0.7761315864658586, "grad_norm": 2.759033203125, "learning_rate": 1.9711750480264167e-05, "loss": 0.7731, "step": 4754 }, { "epoch": 0.7762948451083629, "grad_norm": 2.6023366451263428, "learning_rate": 1.9711597633093385e-05, "loss": 0.7628, "step": 4755 }, { "epoch": 0.7764581037508673, "grad_norm": 3.133256196975708, "learning_rate": 1.971144474600189e-05, "loss": 0.8047, "step": 4756 }, { "epoch": 0.7766213623933717, "grad_norm": 3.1878747940063477, "learning_rate": 1.971129181899031e-05, "loss": 1.1102, "step": 4757 }, { "epoch": 0.7767846210358761, "grad_norm": 3.1100497245788574, "learning_rate": 1.9711138852059277e-05, "loss": 1.0354, "step": 4758 }, { "epoch": 0.7769478796783805, "grad_norm": 3.857506036758423, "learning_rate": 1.971098584520942e-05, "loss": 0.989, "step": 4759 }, { "epoch": 0.7771111383208849, "grad_norm": 3.2673091888427734, "learning_rate": 1.9710832798441366e-05, "loss": 1.0472, "step": 4760 }, { "epoch": 0.7772743969633893, "grad_norm": 2.705951452255249, "learning_rate": 1.9710679711755748e-05, "loss": 0.7232, "step": 4761 }, { "epoch": 0.7774376556058936, "grad_norm": 2.8354811668395996, "learning_rate": 1.9710526585153187e-05, "loss": 0.8544, "step": 4762 }, { "epoch": 0.7776009142483981, "grad_norm": 2.9027085304260254, "learning_rate": 1.9710373418634324e-05, "loss": 0.8807, "step": 4763 }, { "epoch": 0.7777641728909024, "grad_norm": 2.9610559940338135, "learning_rate": 1.971022021219978e-05, "loss": 0.7273, "step": 4764 }, { "epoch": 0.7779274315334068, "grad_norm": 3.0686590671539307, "learning_rate": 1.971006696585019e-05, "loss": 0.8507, "step": 4765 }, { "epoch": 0.7780906901759111, "grad_norm": 2.9157509803771973, "learning_rate": 1.9709913679586173e-05, "loss": 0.8254, "step": 4766 }, { "epoch": 0.7782539488184156, "grad_norm": 2.921471118927002, "learning_rate": 1.9709760353408373e-05, "loss": 0.7943, "step": 4767 }, { "epoch": 0.7784172074609199, "grad_norm": 3.0661559104919434, "learning_rate": 1.9709606987317414e-05, "loss": 0.8707, "step": 4768 }, { "epoch": 0.7785804661034244, "grad_norm": 2.7380082607269287, "learning_rate": 1.9709453581313927e-05, "loss": 0.6753, "step": 4769 }, { "epoch": 0.7787437247459288, "grad_norm": 2.6612818241119385, "learning_rate": 1.9709300135398543e-05, "loss": 0.7789, "step": 4770 }, { "epoch": 0.7789069833884331, "grad_norm": 3.402494430541992, "learning_rate": 1.970914664957189e-05, "loss": 0.9613, "step": 4771 }, { "epoch": 0.7790702420309376, "grad_norm": 3.3129830360412598, "learning_rate": 1.9708993123834602e-05, "loss": 1.1556, "step": 4772 }, { "epoch": 0.7792335006734419, "grad_norm": 3.079021453857422, "learning_rate": 1.9708839558187313e-05, "loss": 0.8525, "step": 4773 }, { "epoch": 0.7793967593159463, "grad_norm": 2.451061487197876, "learning_rate": 1.9708685952630646e-05, "loss": 0.6513, "step": 4774 }, { "epoch": 0.7795600179584506, "grad_norm": 2.350382089614868, "learning_rate": 1.9708532307165236e-05, "loss": 0.7295, "step": 4775 }, { "epoch": 0.7797232766009551, "grad_norm": 2.918069839477539, "learning_rate": 1.970837862179172e-05, "loss": 0.8001, "step": 4776 }, { "epoch": 0.7798865352434594, "grad_norm": 3.0859835147857666, "learning_rate": 1.9708224896510725e-05, "loss": 0.8057, "step": 4777 }, { "epoch": 0.7800497938859638, "grad_norm": 2.7211155891418457, "learning_rate": 1.9708071131322883e-05, "loss": 0.7865, "step": 4778 }, { "epoch": 0.7802130525284682, "grad_norm": 2.7718286514282227, "learning_rate": 1.9707917326228822e-05, "loss": 0.8077, "step": 4779 }, { "epoch": 0.7803763111709726, "grad_norm": 2.6208739280700684, "learning_rate": 1.9707763481229182e-05, "loss": 0.7215, "step": 4780 }, { "epoch": 0.780539569813477, "grad_norm": 2.932391405105591, "learning_rate": 1.9707609596324594e-05, "loss": 0.7761, "step": 4781 }, { "epoch": 0.7807028284559814, "grad_norm": 3.1277565956115723, "learning_rate": 1.9707455671515683e-05, "loss": 0.9138, "step": 4782 }, { "epoch": 0.7808660870984858, "grad_norm": 3.072155475616455, "learning_rate": 1.970730170680309e-05, "loss": 0.9107, "step": 4783 }, { "epoch": 0.7810293457409901, "grad_norm": 3.433004140853882, "learning_rate": 1.9707147702187445e-05, "loss": 0.9897, "step": 4784 }, { "epoch": 0.7811926043834946, "grad_norm": 2.832782506942749, "learning_rate": 1.9706993657669384e-05, "loss": 0.8925, "step": 4785 }, { "epoch": 0.7813558630259989, "grad_norm": 2.811690330505371, "learning_rate": 1.9706839573249533e-05, "loss": 0.7777, "step": 4786 }, { "epoch": 0.7815191216685033, "grad_norm": 3.750073194503784, "learning_rate": 1.9706685448928534e-05, "loss": 0.8162, "step": 4787 }, { "epoch": 0.7816823803110077, "grad_norm": 2.709095001220703, "learning_rate": 1.9706531284707015e-05, "loss": 0.8731, "step": 4788 }, { "epoch": 0.7818456389535121, "grad_norm": 3.272259473800659, "learning_rate": 1.970637708058561e-05, "loss": 0.8855, "step": 4789 }, { "epoch": 0.7820088975960164, "grad_norm": 3.667264699935913, "learning_rate": 1.9706222836564953e-05, "loss": 0.9951, "step": 4790 }, { "epoch": 0.7821721562385209, "grad_norm": 2.9188666343688965, "learning_rate": 1.9706068552645677e-05, "loss": 0.7394, "step": 4791 }, { "epoch": 0.7823354148810253, "grad_norm": 4.34838342666626, "learning_rate": 1.9705914228828423e-05, "loss": 0.8724, "step": 4792 }, { "epoch": 0.7824986735235296, "grad_norm": 2.7353360652923584, "learning_rate": 1.970575986511382e-05, "loss": 0.8239, "step": 4793 }, { "epoch": 0.7826619321660341, "grad_norm": 2.6925652027130127, "learning_rate": 1.97056054615025e-05, "loss": 0.7015, "step": 4794 }, { "epoch": 0.7828251908085384, "grad_norm": 3.082111120223999, "learning_rate": 1.9705451017995102e-05, "loss": 0.8799, "step": 4795 }, { "epoch": 0.7829884494510428, "grad_norm": 3.048891305923462, "learning_rate": 1.970529653459226e-05, "loss": 0.9144, "step": 4796 }, { "epoch": 0.7831517080935472, "grad_norm": 2.414896011352539, "learning_rate": 1.9705142011294605e-05, "loss": 0.6363, "step": 4797 }, { "epoch": 0.7833149667360516, "grad_norm": 2.644235849380493, "learning_rate": 1.970498744810278e-05, "loss": 0.7156, "step": 4798 }, { "epoch": 0.7834782253785559, "grad_norm": 3.3062326908111572, "learning_rate": 1.9704832845017418e-05, "loss": 0.8496, "step": 4799 }, { "epoch": 0.7836414840210604, "grad_norm": 2.866882801055908, "learning_rate": 1.9704678202039148e-05, "loss": 0.8111, "step": 4800 }, { "epoch": 0.7838047426635647, "grad_norm": 2.629066228866577, "learning_rate": 1.970452351916861e-05, "loss": 0.8488, "step": 4801 }, { "epoch": 0.7839680013060691, "grad_norm": 3.2000722885131836, "learning_rate": 1.9704368796406445e-05, "loss": 1.0925, "step": 4802 }, { "epoch": 0.7841312599485736, "grad_norm": 3.492006778717041, "learning_rate": 1.9704214033753282e-05, "loss": 0.961, "step": 4803 }, { "epoch": 0.7842945185910779, "grad_norm": 2.432926893234253, "learning_rate": 1.9704059231209757e-05, "loss": 0.7221, "step": 4804 }, { "epoch": 0.7844577772335823, "grad_norm": 2.8461177349090576, "learning_rate": 1.9703904388776512e-05, "loss": 0.9245, "step": 4805 }, { "epoch": 0.7846210358760867, "grad_norm": 3.1014301776885986, "learning_rate": 1.970374950645418e-05, "loss": 0.8807, "step": 4806 }, { "epoch": 0.7847842945185911, "grad_norm": 2.8576886653900146, "learning_rate": 1.9703594584243394e-05, "loss": 0.9502, "step": 4807 }, { "epoch": 0.7849475531610954, "grad_norm": 2.53722882270813, "learning_rate": 1.97034396221448e-05, "loss": 0.8463, "step": 4808 }, { "epoch": 0.7851108118035999, "grad_norm": 2.845552444458008, "learning_rate": 1.9703284620159026e-05, "loss": 0.825, "step": 4809 }, { "epoch": 0.7852740704461042, "grad_norm": 2.8877906799316406, "learning_rate": 1.9703129578286714e-05, "loss": 0.911, "step": 4810 }, { "epoch": 0.7854373290886086, "grad_norm": 2.9124844074249268, "learning_rate": 1.97029744965285e-05, "loss": 0.795, "step": 4811 }, { "epoch": 0.785600587731113, "grad_norm": 3.4227089881896973, "learning_rate": 1.970281937488502e-05, "loss": 0.9118, "step": 4812 }, { "epoch": 0.7857638463736174, "grad_norm": 2.739997625350952, "learning_rate": 1.9702664213356915e-05, "loss": 0.7411, "step": 4813 }, { "epoch": 0.7859271050161218, "grad_norm": 2.4214859008789062, "learning_rate": 1.9702509011944822e-05, "loss": 0.7581, "step": 4814 }, { "epoch": 0.7860903636586262, "grad_norm": 2.8720974922180176, "learning_rate": 1.9702353770649377e-05, "loss": 0.7807, "step": 4815 }, { "epoch": 0.7862536223011306, "grad_norm": 2.966137409210205, "learning_rate": 1.970219848947122e-05, "loss": 0.9437, "step": 4816 }, { "epoch": 0.7864168809436349, "grad_norm": 2.9888086318969727, "learning_rate": 1.9702043168410987e-05, "loss": 0.8022, "step": 4817 }, { "epoch": 0.7865801395861394, "grad_norm": 3.1939189434051514, "learning_rate": 1.970188780746932e-05, "loss": 0.8386, "step": 4818 }, { "epoch": 0.7867433982286437, "grad_norm": 3.0313949584960938, "learning_rate": 1.9701732406646853e-05, "loss": 0.9252, "step": 4819 }, { "epoch": 0.7869066568711481, "grad_norm": 3.1291918754577637, "learning_rate": 1.970157696594423e-05, "loss": 0.8828, "step": 4820 }, { "epoch": 0.7870699155136525, "grad_norm": 2.5133306980133057, "learning_rate": 1.9701421485362084e-05, "loss": 0.7465, "step": 4821 }, { "epoch": 0.7872331741561569, "grad_norm": 2.9796063899993896, "learning_rate": 1.970126596490106e-05, "loss": 0.8004, "step": 4822 }, { "epoch": 0.7873964327986612, "grad_norm": 2.98349666595459, "learning_rate": 1.9701110404561795e-05, "loss": 0.88, "step": 4823 }, { "epoch": 0.7875596914411657, "grad_norm": 3.0009965896606445, "learning_rate": 1.9700954804344927e-05, "loss": 0.7725, "step": 4824 }, { "epoch": 0.7877229500836701, "grad_norm": 3.084545612335205, "learning_rate": 1.9700799164251096e-05, "loss": 0.8629, "step": 4825 }, { "epoch": 0.7878862087261744, "grad_norm": 3.175812244415283, "learning_rate": 1.9700643484280944e-05, "loss": 1.0224, "step": 4826 }, { "epoch": 0.7880494673686789, "grad_norm": 2.8502700328826904, "learning_rate": 1.970048776443511e-05, "loss": 0.7625, "step": 4827 }, { "epoch": 0.7882127260111832, "grad_norm": 2.3216164112091064, "learning_rate": 1.9700332004714232e-05, "loss": 0.7156, "step": 4828 }, { "epoch": 0.7883759846536876, "grad_norm": 2.7164340019226074, "learning_rate": 1.970017620511895e-05, "loss": 0.779, "step": 4829 }, { "epoch": 0.788539243296192, "grad_norm": 2.852827787399292, "learning_rate": 1.9700020365649913e-05, "loss": 0.8143, "step": 4830 }, { "epoch": 0.7887025019386964, "grad_norm": 2.7626705169677734, "learning_rate": 1.969986448630775e-05, "loss": 0.8335, "step": 4831 }, { "epoch": 0.7888657605812007, "grad_norm": 2.732985496520996, "learning_rate": 1.969970856709311e-05, "loss": 0.8899, "step": 4832 }, { "epoch": 0.7890290192237052, "grad_norm": 3.3221871852874756, "learning_rate": 1.969955260800663e-05, "loss": 0.9452, "step": 4833 }, { "epoch": 0.7891922778662095, "grad_norm": 3.5037810802459717, "learning_rate": 1.969939660904895e-05, "loss": 1.025, "step": 4834 }, { "epoch": 0.7893555365087139, "grad_norm": 2.6510791778564453, "learning_rate": 1.969924057022071e-05, "loss": 0.6884, "step": 4835 }, { "epoch": 0.7895187951512184, "grad_norm": 3.02824068069458, "learning_rate": 1.969908449152256e-05, "loss": 0.8713, "step": 4836 }, { "epoch": 0.7896820537937227, "grad_norm": 3.184926748275757, "learning_rate": 1.9698928372955134e-05, "loss": 0.9902, "step": 4837 }, { "epoch": 0.7898453124362271, "grad_norm": 3.005657911300659, "learning_rate": 1.9698772214519075e-05, "loss": 0.9728, "step": 4838 }, { "epoch": 0.7900085710787315, "grad_norm": 3.1309452056884766, "learning_rate": 1.9698616016215025e-05, "loss": 1.1124, "step": 4839 }, { "epoch": 0.7901718297212359, "grad_norm": 2.887946367263794, "learning_rate": 1.9698459778043627e-05, "loss": 0.8154, "step": 4840 }, { "epoch": 0.7903350883637402, "grad_norm": 2.283249616622925, "learning_rate": 1.9698303500005523e-05, "loss": 0.6291, "step": 4841 }, { "epoch": 0.7904983470062447, "grad_norm": 2.632122039794922, "learning_rate": 1.9698147182101356e-05, "loss": 0.8861, "step": 4842 }, { "epoch": 0.790661605648749, "grad_norm": 3.391664505004883, "learning_rate": 1.969799082433177e-05, "loss": 1.0042, "step": 4843 }, { "epoch": 0.7908248642912534, "grad_norm": 2.8446223735809326, "learning_rate": 1.9697834426697404e-05, "loss": 0.8617, "step": 4844 }, { "epoch": 0.7909881229337578, "grad_norm": 3.4989402294158936, "learning_rate": 1.96976779891989e-05, "loss": 1.0745, "step": 4845 }, { "epoch": 0.7911513815762622, "grad_norm": 3.177138328552246, "learning_rate": 1.9697521511836907e-05, "loss": 1.0802, "step": 4846 }, { "epoch": 0.7913146402187666, "grad_norm": 2.550302267074585, "learning_rate": 1.969736499461206e-05, "loss": 0.6829, "step": 4847 }, { "epoch": 0.791477898861271, "grad_norm": 2.9546148777008057, "learning_rate": 1.9697208437525013e-05, "loss": 0.9058, "step": 4848 }, { "epoch": 0.7916411575037754, "grad_norm": 3.1028754711151123, "learning_rate": 1.9697051840576402e-05, "loss": 0.8804, "step": 4849 }, { "epoch": 0.7918044161462797, "grad_norm": 2.666532039642334, "learning_rate": 1.969689520376687e-05, "loss": 0.7896, "step": 4850 }, { "epoch": 0.7919676747887842, "grad_norm": 3.0705184936523438, "learning_rate": 1.9696738527097064e-05, "loss": 0.8159, "step": 4851 }, { "epoch": 0.7921309334312885, "grad_norm": 2.900324821472168, "learning_rate": 1.969658181056763e-05, "loss": 0.9967, "step": 4852 }, { "epoch": 0.7922941920737929, "grad_norm": 2.9089667797088623, "learning_rate": 1.9696425054179207e-05, "loss": 0.9653, "step": 4853 }, { "epoch": 0.7924574507162973, "grad_norm": 2.584885835647583, "learning_rate": 1.969626825793244e-05, "loss": 0.8734, "step": 4854 }, { "epoch": 0.7926207093588017, "grad_norm": 3.033998489379883, "learning_rate": 1.969611142182798e-05, "loss": 0.8742, "step": 4855 }, { "epoch": 0.792783968001306, "grad_norm": 2.793329954147339, "learning_rate": 1.9695954545866466e-05, "loss": 0.8868, "step": 4856 }, { "epoch": 0.7929472266438105, "grad_norm": 2.5971767902374268, "learning_rate": 1.9695797630048545e-05, "loss": 0.6456, "step": 4857 }, { "epoch": 0.7931104852863149, "grad_norm": 3.487159490585327, "learning_rate": 1.9695640674374857e-05, "loss": 1.0178, "step": 4858 }, { "epoch": 0.7932737439288192, "grad_norm": 2.425628662109375, "learning_rate": 1.9695483678846053e-05, "loss": 0.7412, "step": 4859 }, { "epoch": 0.7934370025713237, "grad_norm": 3.0237576961517334, "learning_rate": 1.9695326643462777e-05, "loss": 0.825, "step": 4860 }, { "epoch": 0.793600261213828, "grad_norm": 2.653154134750366, "learning_rate": 1.9695169568225675e-05, "loss": 0.7812, "step": 4861 }, { "epoch": 0.7937635198563324, "grad_norm": 3.0746841430664062, "learning_rate": 1.9695012453135393e-05, "loss": 0.8146, "step": 4862 }, { "epoch": 0.7939267784988367, "grad_norm": 3.0315358638763428, "learning_rate": 1.9694855298192572e-05, "loss": 0.8989, "step": 4863 }, { "epoch": 0.7940900371413412, "grad_norm": 3.1020498275756836, "learning_rate": 1.969469810339786e-05, "loss": 0.8861, "step": 4864 }, { "epoch": 0.7942532957838455, "grad_norm": 3.076432228088379, "learning_rate": 1.969454086875191e-05, "loss": 0.8044, "step": 4865 }, { "epoch": 0.79441655442635, "grad_norm": 3.1369292736053467, "learning_rate": 1.969438359425536e-05, "loss": 0.7452, "step": 4866 }, { "epoch": 0.7945798130688543, "grad_norm": 3.078744888305664, "learning_rate": 1.969422627990886e-05, "loss": 0.7819, "step": 4867 }, { "epoch": 0.7947430717113587, "grad_norm": 2.571061611175537, "learning_rate": 1.9694068925713056e-05, "loss": 0.8241, "step": 4868 }, { "epoch": 0.7949063303538632, "grad_norm": 3.8642451763153076, "learning_rate": 1.9693911531668596e-05, "loss": 0.9621, "step": 4869 }, { "epoch": 0.7950695889963675, "grad_norm": 2.966740608215332, "learning_rate": 1.969375409777613e-05, "loss": 0.8797, "step": 4870 }, { "epoch": 0.7952328476388719, "grad_norm": 2.658071756362915, "learning_rate": 1.9693596624036294e-05, "loss": 0.6938, "step": 4871 }, { "epoch": 0.7953961062813762, "grad_norm": 3.4041249752044678, "learning_rate": 1.9693439110449746e-05, "loss": 0.7834, "step": 4872 }, { "epoch": 0.7955593649238807, "grad_norm": 2.8757784366607666, "learning_rate": 1.969328155701713e-05, "loss": 0.8306, "step": 4873 }, { "epoch": 0.795722623566385, "grad_norm": 2.828058958053589, "learning_rate": 1.9693123963739094e-05, "loss": 0.76, "step": 4874 }, { "epoch": 0.7958858822088895, "grad_norm": 3.1442062854766846, "learning_rate": 1.9692966330616285e-05, "loss": 0.8189, "step": 4875 }, { "epoch": 0.7960491408513938, "grad_norm": 2.853790760040283, "learning_rate": 1.969280865764935e-05, "loss": 0.7012, "step": 4876 }, { "epoch": 0.7962123994938982, "grad_norm": 3.3299319744110107, "learning_rate": 1.969265094483894e-05, "loss": 0.8508, "step": 4877 }, { "epoch": 0.7963756581364025, "grad_norm": 3.316946029663086, "learning_rate": 1.96924931921857e-05, "loss": 0.8379, "step": 4878 }, { "epoch": 0.796538916778907, "grad_norm": 3.3426101207733154, "learning_rate": 1.9692335399690284e-05, "loss": 0.8866, "step": 4879 }, { "epoch": 0.7967021754214114, "grad_norm": 3.454881429672241, "learning_rate": 1.9692177567353332e-05, "loss": 0.9273, "step": 4880 }, { "epoch": 0.7968654340639157, "grad_norm": 2.4873549938201904, "learning_rate": 1.96920196951755e-05, "loss": 0.7318, "step": 4881 }, { "epoch": 0.7970286927064202, "grad_norm": 2.504392385482788, "learning_rate": 1.9691861783157434e-05, "loss": 0.7491, "step": 4882 }, { "epoch": 0.7971919513489245, "grad_norm": 3.1505372524261475, "learning_rate": 1.9691703831299786e-05, "loss": 0.8557, "step": 4883 }, { "epoch": 0.797355209991429, "grad_norm": 2.8173983097076416, "learning_rate": 1.9691545839603204e-05, "loss": 0.697, "step": 4884 }, { "epoch": 0.7975184686339333, "grad_norm": 3.0409154891967773, "learning_rate": 1.969138780806833e-05, "loss": 0.9628, "step": 4885 }, { "epoch": 0.7976817272764377, "grad_norm": 2.890205144882202, "learning_rate": 1.9691229736695828e-05, "loss": 0.8457, "step": 4886 }, { "epoch": 0.797844985918942, "grad_norm": 2.938734292984009, "learning_rate": 1.9691071625486336e-05, "loss": 0.8612, "step": 4887 }, { "epoch": 0.7980082445614465, "grad_norm": 3.0590462684631348, "learning_rate": 1.9690913474440508e-05, "loss": 1.0436, "step": 4888 }, { "epoch": 0.7981715032039508, "grad_norm": 2.8551394939422607, "learning_rate": 1.9690755283558992e-05, "loss": 0.9112, "step": 4889 }, { "epoch": 0.7983347618464552, "grad_norm": 2.7822511196136475, "learning_rate": 1.9690597052842448e-05, "loss": 0.7924, "step": 4890 }, { "epoch": 0.7984980204889597, "grad_norm": 2.5000362396240234, "learning_rate": 1.969043878229151e-05, "loss": 0.632, "step": 4891 }, { "epoch": 0.798661279131464, "grad_norm": 2.4901106357574463, "learning_rate": 1.969028047190684e-05, "loss": 0.7289, "step": 4892 }, { "epoch": 0.7988245377739684, "grad_norm": 2.6856772899627686, "learning_rate": 1.9690122121689087e-05, "loss": 0.9344, "step": 4893 }, { "epoch": 0.7989877964164728, "grad_norm": 2.6865792274475098, "learning_rate": 1.96899637316389e-05, "loss": 0.8642, "step": 4894 }, { "epoch": 0.7991510550589772, "grad_norm": 3.015941858291626, "learning_rate": 1.968980530175693e-05, "loss": 0.8999, "step": 4895 }, { "epoch": 0.7993143137014815, "grad_norm": 2.4052977561950684, "learning_rate": 1.9689646832043833e-05, "loss": 0.7909, "step": 4896 }, { "epoch": 0.799477572343986, "grad_norm": 2.6271629333496094, "learning_rate": 1.968948832250025e-05, "loss": 0.9087, "step": 4897 }, { "epoch": 0.7996408309864903, "grad_norm": 2.4007487297058105, "learning_rate": 1.9689329773126843e-05, "loss": 0.7383, "step": 4898 }, { "epoch": 0.7998040896289947, "grad_norm": 3.1165478229522705, "learning_rate": 1.968917118392426e-05, "loss": 1.0589, "step": 4899 }, { "epoch": 0.7999673482714991, "grad_norm": 2.912405490875244, "learning_rate": 1.9689012554893154e-05, "loss": 0.8173, "step": 4900 }, { "epoch": 0.8001306069140035, "grad_norm": 2.2962582111358643, "learning_rate": 1.9688853886034175e-05, "loss": 0.5989, "step": 4901 }, { "epoch": 0.8002938655565079, "grad_norm": 2.638514518737793, "learning_rate": 1.9688695177347977e-05, "loss": 0.7468, "step": 4902 }, { "epoch": 0.8004571241990123, "grad_norm": 3.106320858001709, "learning_rate": 1.968853642883521e-05, "loss": 0.9656, "step": 4903 }, { "epoch": 0.8006203828415167, "grad_norm": 3.0665841102600098, "learning_rate": 1.9688377640496526e-05, "loss": 0.8871, "step": 4904 }, { "epoch": 0.800783641484021, "grad_norm": 3.647657871246338, "learning_rate": 1.9688218812332584e-05, "loss": 0.9588, "step": 4905 }, { "epoch": 0.8009469001265255, "grad_norm": 2.918325662612915, "learning_rate": 1.9688059944344033e-05, "loss": 0.8392, "step": 4906 }, { "epoch": 0.8011101587690298, "grad_norm": 3.0009655952453613, "learning_rate": 1.9687901036531522e-05, "loss": 0.8564, "step": 4907 }, { "epoch": 0.8012734174115342, "grad_norm": 2.9791526794433594, "learning_rate": 1.968774208889571e-05, "loss": 0.8615, "step": 4908 }, { "epoch": 0.8014366760540386, "grad_norm": 2.845475912094116, "learning_rate": 1.968758310143725e-05, "loss": 0.8875, "step": 4909 }, { "epoch": 0.801599934696543, "grad_norm": 2.87274432182312, "learning_rate": 1.968742407415679e-05, "loss": 0.859, "step": 4910 }, { "epoch": 0.8017631933390473, "grad_norm": 2.4237821102142334, "learning_rate": 1.9687265007054986e-05, "loss": 0.8981, "step": 4911 }, { "epoch": 0.8019264519815518, "grad_norm": 2.6555793285369873, "learning_rate": 1.9687105900132498e-05, "loss": 0.7378, "step": 4912 }, { "epoch": 0.8020897106240562, "grad_norm": 2.70582914352417, "learning_rate": 1.9686946753389974e-05, "loss": 0.8395, "step": 4913 }, { "epoch": 0.8022529692665605, "grad_norm": 2.327043294906616, "learning_rate": 1.968678756682807e-05, "loss": 0.7201, "step": 4914 }, { "epoch": 0.802416227909065, "grad_norm": 3.096003532409668, "learning_rate": 1.968662834044744e-05, "loss": 0.8762, "step": 4915 }, { "epoch": 0.8025794865515693, "grad_norm": 2.9239203929901123, "learning_rate": 1.9686469074248737e-05, "loss": 0.807, "step": 4916 }, { "epoch": 0.8027427451940737, "grad_norm": 3.0472934246063232, "learning_rate": 1.968630976823262e-05, "loss": 0.7255, "step": 4917 }, { "epoch": 0.8029060038365781, "grad_norm": 2.7278528213500977, "learning_rate": 1.968615042239974e-05, "loss": 0.7923, "step": 4918 }, { "epoch": 0.8030692624790825, "grad_norm": 2.705674171447754, "learning_rate": 1.968599103675075e-05, "loss": 0.818, "step": 4919 }, { "epoch": 0.8032325211215868, "grad_norm": 3.08219051361084, "learning_rate": 1.9685831611286312e-05, "loss": 0.9971, "step": 4920 }, { "epoch": 0.8033957797640913, "grad_norm": 2.3404321670532227, "learning_rate": 1.9685672146007078e-05, "loss": 0.7097, "step": 4921 }, { "epoch": 0.8035590384065956, "grad_norm": 3.149963140487671, "learning_rate": 1.96855126409137e-05, "loss": 0.8596, "step": 4922 }, { "epoch": 0.8037222970491, "grad_norm": 3.1269850730895996, "learning_rate": 1.968535309600684e-05, "loss": 0.943, "step": 4923 }, { "epoch": 0.8038855556916045, "grad_norm": 2.4993982315063477, "learning_rate": 1.968519351128715e-05, "loss": 0.5733, "step": 4924 }, { "epoch": 0.8040488143341088, "grad_norm": 3.0482826232910156, "learning_rate": 1.968503388675528e-05, "loss": 0.912, "step": 4925 }, { "epoch": 0.8042120729766132, "grad_norm": 3.0919182300567627, "learning_rate": 1.96848742224119e-05, "loss": 0.8261, "step": 4926 }, { "epoch": 0.8043753316191176, "grad_norm": 3.27392315864563, "learning_rate": 1.9684714518257657e-05, "loss": 0.7976, "step": 4927 }, { "epoch": 0.804538590261622, "grad_norm": 3.0519628524780273, "learning_rate": 1.968455477429321e-05, "loss": 0.8293, "step": 4928 }, { "epoch": 0.8047018489041263, "grad_norm": 2.909759998321533, "learning_rate": 1.9684394990519215e-05, "loss": 0.8179, "step": 4929 }, { "epoch": 0.8048651075466308, "grad_norm": 2.7773091793060303, "learning_rate": 1.968423516693633e-05, "loss": 0.8545, "step": 4930 }, { "epoch": 0.8050283661891351, "grad_norm": 2.9992787837982178, "learning_rate": 1.9684075303545205e-05, "loss": 0.8512, "step": 4931 }, { "epoch": 0.8051916248316395, "grad_norm": 2.992008924484253, "learning_rate": 1.9683915400346508e-05, "loss": 0.7323, "step": 4932 }, { "epoch": 0.805354883474144, "grad_norm": 2.7475430965423584, "learning_rate": 1.968375545734089e-05, "loss": 0.6968, "step": 4933 }, { "epoch": 0.8055181421166483, "grad_norm": 3.323331117630005, "learning_rate": 1.9683595474529015e-05, "loss": 1.0201, "step": 4934 }, { "epoch": 0.8056814007591527, "grad_norm": 2.6945083141326904, "learning_rate": 1.9683435451911526e-05, "loss": 0.7415, "step": 4935 }, { "epoch": 0.8058446594016571, "grad_norm": 2.926893949508667, "learning_rate": 1.9683275389489097e-05, "loss": 0.829, "step": 4936 }, { "epoch": 0.8060079180441615, "grad_norm": 2.9545278549194336, "learning_rate": 1.968311528726238e-05, "loss": 0.9518, "step": 4937 }, { "epoch": 0.8061711766866658, "grad_norm": 2.9485459327697754, "learning_rate": 1.9682955145232027e-05, "loss": 0.8232, "step": 4938 }, { "epoch": 0.8063344353291703, "grad_norm": 2.790372610092163, "learning_rate": 1.9682794963398707e-05, "loss": 0.8508, "step": 4939 }, { "epoch": 0.8064976939716746, "grad_norm": 2.8872594833374023, "learning_rate": 1.968263474176307e-05, "loss": 0.8682, "step": 4940 }, { "epoch": 0.806660952614179, "grad_norm": 2.763976573944092, "learning_rate": 1.9682474480325776e-05, "loss": 0.7278, "step": 4941 }, { "epoch": 0.8068242112566834, "grad_norm": 2.9177005290985107, "learning_rate": 1.968231417908749e-05, "loss": 0.8501, "step": 4942 }, { "epoch": 0.8069874698991878, "grad_norm": 3.589872121810913, "learning_rate": 1.9682153838048866e-05, "loss": 0.8744, "step": 4943 }, { "epoch": 0.8071507285416922, "grad_norm": 2.8716559410095215, "learning_rate": 1.968199345721056e-05, "loss": 0.8192, "step": 4944 }, { "epoch": 0.8073139871841966, "grad_norm": 3.059208631515503, "learning_rate": 1.9681833036573238e-05, "loss": 0.8851, "step": 4945 }, { "epoch": 0.807477245826701, "grad_norm": 2.9672493934631348, "learning_rate": 1.9681672576137552e-05, "loss": 0.9232, "step": 4946 }, { "epoch": 0.8076405044692053, "grad_norm": 3.1783697605133057, "learning_rate": 1.9681512075904168e-05, "loss": 0.863, "step": 4947 }, { "epoch": 0.8078037631117098, "grad_norm": 3.1197409629821777, "learning_rate": 1.9681351535873744e-05, "loss": 0.7479, "step": 4948 }, { "epoch": 0.8079670217542141, "grad_norm": 2.6715290546417236, "learning_rate": 1.9681190956046938e-05, "loss": 0.9431, "step": 4949 }, { "epoch": 0.8081302803967185, "grad_norm": 2.836576461791992, "learning_rate": 1.9681030336424416e-05, "loss": 0.9655, "step": 4950 }, { "epoch": 0.8082935390392229, "grad_norm": 2.5810983180999756, "learning_rate": 1.968086967700683e-05, "loss": 0.7211, "step": 4951 }, { "epoch": 0.8084567976817273, "grad_norm": 2.802873373031616, "learning_rate": 1.9680708977794846e-05, "loss": 0.9239, "step": 4952 }, { "epoch": 0.8086200563242316, "grad_norm": 2.6664044857025146, "learning_rate": 1.968054823878912e-05, "loss": 0.7785, "step": 4953 }, { "epoch": 0.808783314966736, "grad_norm": 2.895920991897583, "learning_rate": 1.9680387459990315e-05, "loss": 0.9131, "step": 4954 }, { "epoch": 0.8089465736092405, "grad_norm": 2.900550365447998, "learning_rate": 1.9680226641399097e-05, "loss": 0.8984, "step": 4955 }, { "epoch": 0.8091098322517448, "grad_norm": 2.715928792953491, "learning_rate": 1.968006578301612e-05, "loss": 0.8135, "step": 4956 }, { "epoch": 0.8092730908942493, "grad_norm": 2.251127243041992, "learning_rate": 1.9679904884842047e-05, "loss": 0.6176, "step": 4957 }, { "epoch": 0.8094363495367536, "grad_norm": 2.401334762573242, "learning_rate": 1.967974394687754e-05, "loss": 0.6413, "step": 4958 }, { "epoch": 0.809599608179258, "grad_norm": 2.8440427780151367, "learning_rate": 1.9679582969123264e-05, "loss": 0.847, "step": 4959 }, { "epoch": 0.8097628668217624, "grad_norm": 2.8503949642181396, "learning_rate": 1.9679421951579873e-05, "loss": 0.7731, "step": 4960 }, { "epoch": 0.8099261254642668, "grad_norm": 2.7983322143554688, "learning_rate": 1.9679260894248035e-05, "loss": 0.7472, "step": 4961 }, { "epoch": 0.8100893841067711, "grad_norm": 2.9432241916656494, "learning_rate": 1.967909979712841e-05, "loss": 0.8542, "step": 4962 }, { "epoch": 0.8102526427492756, "grad_norm": 2.675917387008667, "learning_rate": 1.9678938660221663e-05, "loss": 0.7507, "step": 4963 }, { "epoch": 0.8104159013917799, "grad_norm": 2.564946174621582, "learning_rate": 1.9678777483528452e-05, "loss": 0.7372, "step": 4964 }, { "epoch": 0.8105791600342843, "grad_norm": 2.9620041847229004, "learning_rate": 1.967861626704944e-05, "loss": 0.8856, "step": 4965 }, { "epoch": 0.8107424186767888, "grad_norm": 3.2268624305725098, "learning_rate": 1.9678455010785292e-05, "loss": 0.8587, "step": 4966 }, { "epoch": 0.8109056773192931, "grad_norm": 2.6595823764801025, "learning_rate": 1.967829371473667e-05, "loss": 0.763, "step": 4967 }, { "epoch": 0.8110689359617975, "grad_norm": 2.6303598880767822, "learning_rate": 1.9678132378904236e-05, "loss": 0.7083, "step": 4968 }, { "epoch": 0.8112321946043018, "grad_norm": 3.5315802097320557, "learning_rate": 1.9677971003288657e-05, "loss": 0.8708, "step": 4969 }, { "epoch": 0.8113954532468063, "grad_norm": 3.1999409198760986, "learning_rate": 1.9677809587890594e-05, "loss": 0.8488, "step": 4970 }, { "epoch": 0.8115587118893106, "grad_norm": 2.7635657787323, "learning_rate": 1.9677648132710704e-05, "loss": 0.7504, "step": 4971 }, { "epoch": 0.811721970531815, "grad_norm": 3.0637006759643555, "learning_rate": 1.967748663774966e-05, "loss": 0.7513, "step": 4972 }, { "epoch": 0.8118852291743194, "grad_norm": 3.2268495559692383, "learning_rate": 1.9677325103008124e-05, "loss": 0.859, "step": 4973 }, { "epoch": 0.8120484878168238, "grad_norm": 3.4261348247528076, "learning_rate": 1.967716352848676e-05, "loss": 1.0654, "step": 4974 }, { "epoch": 0.8122117464593281, "grad_norm": 3.8086395263671875, "learning_rate": 1.9677001914186225e-05, "loss": 0.7849, "step": 4975 }, { "epoch": 0.8123750051018326, "grad_norm": 3.2047107219696045, "learning_rate": 1.9676840260107196e-05, "loss": 1.0622, "step": 4976 }, { "epoch": 0.812538263744337, "grad_norm": 3.076155662536621, "learning_rate": 1.9676678566250328e-05, "loss": 0.9023, "step": 4977 }, { "epoch": 0.8127015223868413, "grad_norm": 3.145596504211426, "learning_rate": 1.9676516832616288e-05, "loss": 1.026, "step": 4978 }, { "epoch": 0.8128647810293458, "grad_norm": 2.873523235321045, "learning_rate": 1.967635505920574e-05, "loss": 0.7914, "step": 4979 }, { "epoch": 0.8130280396718501, "grad_norm": 3.0483500957489014, "learning_rate": 1.967619324601935e-05, "loss": 0.9014, "step": 4980 }, { "epoch": 0.8131912983143545, "grad_norm": 2.756852626800537, "learning_rate": 1.9676031393057783e-05, "loss": 0.9011, "step": 4981 }, { "epoch": 0.8133545569568589, "grad_norm": 3.251920223236084, "learning_rate": 1.9675869500321705e-05, "loss": 0.9424, "step": 4982 }, { "epoch": 0.8135178155993633, "grad_norm": 2.8940742015838623, "learning_rate": 1.9675707567811783e-05, "loss": 0.737, "step": 4983 }, { "epoch": 0.8136810742418676, "grad_norm": 2.887834072113037, "learning_rate": 1.967554559552868e-05, "loss": 0.8744, "step": 4984 }, { "epoch": 0.8138443328843721, "grad_norm": 3.0869805812835693, "learning_rate": 1.9675383583473064e-05, "loss": 0.9669, "step": 4985 }, { "epoch": 0.8140075915268764, "grad_norm": 2.353659152984619, "learning_rate": 1.9675221531645598e-05, "loss": 0.7391, "step": 4986 }, { "epoch": 0.8141708501693808, "grad_norm": 2.7837157249450684, "learning_rate": 1.9675059440046947e-05, "loss": 0.8134, "step": 4987 }, { "epoch": 0.8143341088118853, "grad_norm": 3.2399446964263916, "learning_rate": 1.967489730867778e-05, "loss": 0.9545, "step": 4988 }, { "epoch": 0.8144973674543896, "grad_norm": 2.817091703414917, "learning_rate": 1.9674735137538766e-05, "loss": 0.8367, "step": 4989 }, { "epoch": 0.814660626096894, "grad_norm": 2.8126420974731445, "learning_rate": 1.9674572926630568e-05, "loss": 0.8937, "step": 4990 }, { "epoch": 0.8148238847393984, "grad_norm": 2.5966312885284424, "learning_rate": 1.9674410675953854e-05, "loss": 0.8491, "step": 4991 }, { "epoch": 0.8149871433819028, "grad_norm": 3.394932270050049, "learning_rate": 1.9674248385509292e-05, "loss": 1.0296, "step": 4992 }, { "epoch": 0.8151504020244071, "grad_norm": 2.633849859237671, "learning_rate": 1.9674086055297543e-05, "loss": 0.7506, "step": 4993 }, { "epoch": 0.8153136606669116, "grad_norm": 2.3758084774017334, "learning_rate": 1.9673923685319285e-05, "loss": 0.7526, "step": 4994 }, { "epoch": 0.8154769193094159, "grad_norm": 2.8646528720855713, "learning_rate": 1.9673761275575178e-05, "loss": 0.8892, "step": 4995 }, { "epoch": 0.8156401779519203, "grad_norm": 2.9104671478271484, "learning_rate": 1.967359882606589e-05, "loss": 0.9609, "step": 4996 }, { "epoch": 0.8158034365944247, "grad_norm": 2.7134194374084473, "learning_rate": 1.967343633679209e-05, "loss": 0.8225, "step": 4997 }, { "epoch": 0.8159666952369291, "grad_norm": 2.5426437854766846, "learning_rate": 1.9673273807754443e-05, "loss": 0.7264, "step": 4998 }, { "epoch": 0.8161299538794335, "grad_norm": 2.5240256786346436, "learning_rate": 1.9673111238953625e-05, "loss": 0.7445, "step": 4999 }, { "epoch": 0.8162932125219379, "grad_norm": 3.0618398189544678, "learning_rate": 1.9672948630390296e-05, "loss": 0.8196, "step": 5000 }, { "epoch": 0.8164564711644423, "grad_norm": 2.8585216999053955, "learning_rate": 1.967278598206513e-05, "loss": 0.8539, "step": 5001 }, { "epoch": 0.8166197298069466, "grad_norm": 2.420274257659912, "learning_rate": 1.9672623293978788e-05, "loss": 0.695, "step": 5002 }, { "epoch": 0.8167829884494511, "grad_norm": 3.2272136211395264, "learning_rate": 1.967246056613195e-05, "loss": 0.9154, "step": 5003 }, { "epoch": 0.8169462470919554, "grad_norm": 2.727327346801758, "learning_rate": 1.9672297798525278e-05, "loss": 0.7131, "step": 5004 }, { "epoch": 0.8171095057344598, "grad_norm": 2.9885613918304443, "learning_rate": 1.967213499115944e-05, "loss": 0.8178, "step": 5005 }, { "epoch": 0.8172727643769642, "grad_norm": 3.1358797550201416, "learning_rate": 1.9671972144035107e-05, "loss": 0.8507, "step": 5006 }, { "epoch": 0.8174360230194686, "grad_norm": 2.899588108062744, "learning_rate": 1.967180925715295e-05, "loss": 0.7531, "step": 5007 }, { "epoch": 0.8175992816619729, "grad_norm": 3.076209783554077, "learning_rate": 1.9671646330513637e-05, "loss": 0.9204, "step": 5008 }, { "epoch": 0.8177625403044774, "grad_norm": 2.871664047241211, "learning_rate": 1.9671483364117837e-05, "loss": 0.8248, "step": 5009 }, { "epoch": 0.8179257989469818, "grad_norm": 2.6003668308258057, "learning_rate": 1.967132035796622e-05, "loss": 0.7511, "step": 5010 }, { "epoch": 0.8180890575894861, "grad_norm": 3.1626062393188477, "learning_rate": 1.9671157312059458e-05, "loss": 0.7656, "step": 5011 }, { "epoch": 0.8182523162319906, "grad_norm": 2.959486484527588, "learning_rate": 1.967099422639822e-05, "loss": 0.7844, "step": 5012 }, { "epoch": 0.8184155748744949, "grad_norm": 3.1578781604766846, "learning_rate": 1.967083110098318e-05, "loss": 0.726, "step": 5013 }, { "epoch": 0.8185788335169993, "grad_norm": 2.9134328365325928, "learning_rate": 1.9670667935815e-05, "loss": 0.8001, "step": 5014 }, { "epoch": 0.8187420921595037, "grad_norm": 3.1952412128448486, "learning_rate": 1.9670504730894357e-05, "loss": 0.8841, "step": 5015 }, { "epoch": 0.8189053508020081, "grad_norm": 3.5687100887298584, "learning_rate": 1.967034148622192e-05, "loss": 1.0349, "step": 5016 }, { "epoch": 0.8190686094445124, "grad_norm": 3.7433085441589355, "learning_rate": 1.9670178201798363e-05, "loss": 1.0152, "step": 5017 }, { "epoch": 0.8192318680870169, "grad_norm": 3.096665143966675, "learning_rate": 1.9670014877624353e-05, "loss": 0.7497, "step": 5018 }, { "epoch": 0.8193951267295212, "grad_norm": 2.634091377258301, "learning_rate": 1.9669851513700565e-05, "loss": 0.6938, "step": 5019 }, { "epoch": 0.8195583853720256, "grad_norm": 2.868907928466797, "learning_rate": 1.9669688110027665e-05, "loss": 0.8468, "step": 5020 }, { "epoch": 0.8197216440145301, "grad_norm": 3.0882225036621094, "learning_rate": 1.966952466660633e-05, "loss": 0.8449, "step": 5021 }, { "epoch": 0.8198849026570344, "grad_norm": 3.2654359340667725, "learning_rate": 1.966936118343723e-05, "loss": 0.9154, "step": 5022 }, { "epoch": 0.8200481612995388, "grad_norm": 2.9886505603790283, "learning_rate": 1.966919766052104e-05, "loss": 0.9967, "step": 5023 }, { "epoch": 0.8202114199420432, "grad_norm": 3.0694093704223633, "learning_rate": 1.9669034097858425e-05, "loss": 0.8445, "step": 5024 }, { "epoch": 0.8203746785845476, "grad_norm": 2.6164093017578125, "learning_rate": 1.9668870495450064e-05, "loss": 0.8056, "step": 5025 }, { "epoch": 0.8205379372270519, "grad_norm": 2.613515615463257, "learning_rate": 1.966870685329663e-05, "loss": 0.6898, "step": 5026 }, { "epoch": 0.8207011958695564, "grad_norm": 2.900273561477661, "learning_rate": 1.966854317139879e-05, "loss": 0.7702, "step": 5027 }, { "epoch": 0.8208644545120607, "grad_norm": 2.884030342102051, "learning_rate": 1.966837944975722e-05, "loss": 0.8944, "step": 5028 }, { "epoch": 0.8210277131545651, "grad_norm": 2.82549786567688, "learning_rate": 1.9668215688372594e-05, "loss": 0.8936, "step": 5029 }, { "epoch": 0.8211909717970695, "grad_norm": 2.7710351943969727, "learning_rate": 1.9668051887245584e-05, "loss": 0.858, "step": 5030 }, { "epoch": 0.8213542304395739, "grad_norm": 2.6393208503723145, "learning_rate": 1.9667888046376862e-05, "loss": 0.7422, "step": 5031 }, { "epoch": 0.8215174890820783, "grad_norm": 2.8506863117218018, "learning_rate": 1.9667724165767103e-05, "loss": 0.8168, "step": 5032 }, { "epoch": 0.8216807477245827, "grad_norm": 3.2983360290527344, "learning_rate": 1.966756024541698e-05, "loss": 0.73, "step": 5033 }, { "epoch": 0.8218440063670871, "grad_norm": 3.6419472694396973, "learning_rate": 1.9667396285327168e-05, "loss": 0.9857, "step": 5034 }, { "epoch": 0.8220072650095914, "grad_norm": 2.8359787464141846, "learning_rate": 1.9667232285498338e-05, "loss": 0.8798, "step": 5035 }, { "epoch": 0.8221705236520959, "grad_norm": 2.9776408672332764, "learning_rate": 1.966706824593117e-05, "loss": 0.7875, "step": 5036 }, { "epoch": 0.8223337822946002, "grad_norm": 2.5022270679473877, "learning_rate": 1.9666904166626333e-05, "loss": 0.8913, "step": 5037 }, { "epoch": 0.8224970409371046, "grad_norm": 2.918330669403076, "learning_rate": 1.9666740047584504e-05, "loss": 0.8553, "step": 5038 }, { "epoch": 0.822660299579609, "grad_norm": 3.1415743827819824, "learning_rate": 1.9666575888806356e-05, "loss": 0.6705, "step": 5039 }, { "epoch": 0.8228235582221134, "grad_norm": 2.339108467102051, "learning_rate": 1.9666411690292565e-05, "loss": 0.6852, "step": 5040 }, { "epoch": 0.8229868168646177, "grad_norm": 2.991703987121582, "learning_rate": 1.9666247452043805e-05, "loss": 0.9366, "step": 5041 }, { "epoch": 0.8231500755071222, "grad_norm": 2.745403289794922, "learning_rate": 1.966608317406075e-05, "loss": 0.7308, "step": 5042 }, { "epoch": 0.8233133341496266, "grad_norm": 2.899928331375122, "learning_rate": 1.966591885634408e-05, "loss": 0.7164, "step": 5043 }, { "epoch": 0.8234765927921309, "grad_norm": 2.777038812637329, "learning_rate": 1.9665754498894466e-05, "loss": 0.6916, "step": 5044 }, { "epoch": 0.8236398514346354, "grad_norm": 2.910935163497925, "learning_rate": 1.9665590101712583e-05, "loss": 0.8714, "step": 5045 }, { "epoch": 0.8238031100771397, "grad_norm": 3.4032320976257324, "learning_rate": 1.9665425664799113e-05, "loss": 0.9756, "step": 5046 }, { "epoch": 0.8239663687196441, "grad_norm": 3.4189064502716064, "learning_rate": 1.966526118815472e-05, "loss": 1.4617, "step": 5047 }, { "epoch": 0.8241296273621485, "grad_norm": 2.9261465072631836, "learning_rate": 1.9665096671780097e-05, "loss": 0.826, "step": 5048 }, { "epoch": 0.8242928860046529, "grad_norm": 2.591804027557373, "learning_rate": 1.9664932115675908e-05, "loss": 0.6863, "step": 5049 }, { "epoch": 0.8244561446471572, "grad_norm": 2.839372396469116, "learning_rate": 1.966476751984283e-05, "loss": 0.8705, "step": 5050 }, { "epoch": 0.8246194032896617, "grad_norm": 3.0228216648101807, "learning_rate": 1.9664602884281547e-05, "loss": 0.8079, "step": 5051 }, { "epoch": 0.824782661932166, "grad_norm": 2.843194007873535, "learning_rate": 1.9664438208992727e-05, "loss": 0.8682, "step": 5052 }, { "epoch": 0.8249459205746704, "grad_norm": 3.140451192855835, "learning_rate": 1.9664273493977052e-05, "loss": 0.8279, "step": 5053 }, { "epoch": 0.8251091792171749, "grad_norm": 2.640894651412964, "learning_rate": 1.9664108739235196e-05, "loss": 0.7814, "step": 5054 }, { "epoch": 0.8252724378596792, "grad_norm": 2.480558156967163, "learning_rate": 1.966394394476784e-05, "loss": 0.8126, "step": 5055 }, { "epoch": 0.8254356965021836, "grad_norm": 2.7000086307525635, "learning_rate": 1.9663779110575657e-05, "loss": 0.7069, "step": 5056 }, { "epoch": 0.825598955144688, "grad_norm": 2.6367828845977783, "learning_rate": 1.966361423665933e-05, "loss": 0.746, "step": 5057 }, { "epoch": 0.8257622137871924, "grad_norm": 3.365098714828491, "learning_rate": 1.966344932301953e-05, "loss": 1.0444, "step": 5058 }, { "epoch": 0.8259254724296967, "grad_norm": 2.5116782188415527, "learning_rate": 1.9663284369656943e-05, "loss": 0.7724, "step": 5059 }, { "epoch": 0.8260887310722012, "grad_norm": 2.979548454284668, "learning_rate": 1.966311937657224e-05, "loss": 0.8541, "step": 5060 }, { "epoch": 0.8262519897147055, "grad_norm": 2.9045770168304443, "learning_rate": 1.9662954343766105e-05, "loss": 0.8569, "step": 5061 }, { "epoch": 0.8264152483572099, "grad_norm": 2.940519094467163, "learning_rate": 1.9662789271239212e-05, "loss": 0.8006, "step": 5062 }, { "epoch": 0.8265785069997142, "grad_norm": 3.63330078125, "learning_rate": 1.966262415899224e-05, "loss": 0.9455, "step": 5063 }, { "epoch": 0.8267417656422187, "grad_norm": 2.63108229637146, "learning_rate": 1.966245900702587e-05, "loss": 0.7196, "step": 5064 }, { "epoch": 0.8269050242847231, "grad_norm": 2.885942220687866, "learning_rate": 1.9662293815340776e-05, "loss": 0.8023, "step": 5065 }, { "epoch": 0.8270682829272274, "grad_norm": 2.6281259059906006, "learning_rate": 1.9662128583937642e-05, "loss": 0.7401, "step": 5066 }, { "epoch": 0.8272315415697319, "grad_norm": 2.7333590984344482, "learning_rate": 1.966196331281715e-05, "loss": 0.7874, "step": 5067 }, { "epoch": 0.8273948002122362, "grad_norm": 2.964607000350952, "learning_rate": 1.966179800197997e-05, "loss": 0.8679, "step": 5068 }, { "epoch": 0.8275580588547407, "grad_norm": 3.1410515308380127, "learning_rate": 1.966163265142679e-05, "loss": 0.9392, "step": 5069 }, { "epoch": 0.827721317497245, "grad_norm": 2.35909366607666, "learning_rate": 1.9661467261158284e-05, "loss": 0.6381, "step": 5070 }, { "epoch": 0.8278845761397494, "grad_norm": 2.990948438644409, "learning_rate": 1.9661301831175136e-05, "loss": 0.9059, "step": 5071 }, { "epoch": 0.8280478347822537, "grad_norm": 2.7034764289855957, "learning_rate": 1.966113636147802e-05, "loss": 0.7172, "step": 5072 }, { "epoch": 0.8282110934247582, "grad_norm": 2.654712200164795, "learning_rate": 1.9660970852067628e-05, "loss": 0.6578, "step": 5073 }, { "epoch": 0.8283743520672625, "grad_norm": 3.4070889949798584, "learning_rate": 1.9660805302944627e-05, "loss": 0.8572, "step": 5074 }, { "epoch": 0.828537610709767, "grad_norm": 2.957638740539551, "learning_rate": 1.9660639714109706e-05, "loss": 0.9171, "step": 5075 }, { "epoch": 0.8287008693522714, "grad_norm": 2.857614040374756, "learning_rate": 1.966047408556354e-05, "loss": 0.8722, "step": 5076 }, { "epoch": 0.8288641279947757, "grad_norm": 2.651247978210449, "learning_rate": 1.9660308417306815e-05, "loss": 0.7349, "step": 5077 }, { "epoch": 0.8290273866372802, "grad_norm": 3.2630391120910645, "learning_rate": 1.966014270934021e-05, "loss": 0.8522, "step": 5078 }, { "epoch": 0.8291906452797845, "grad_norm": 3.111570358276367, "learning_rate": 1.9659976961664405e-05, "loss": 0.9052, "step": 5079 }, { "epoch": 0.8293539039222889, "grad_norm": 2.8052423000335693, "learning_rate": 1.9659811174280083e-05, "loss": 0.8615, "step": 5080 }, { "epoch": 0.8295171625647932, "grad_norm": 2.881025552749634, "learning_rate": 1.9659645347187923e-05, "loss": 0.8766, "step": 5081 }, { "epoch": 0.8296804212072977, "grad_norm": 2.74733829498291, "learning_rate": 1.9659479480388607e-05, "loss": 0.822, "step": 5082 }, { "epoch": 0.829843679849802, "grad_norm": 2.66963791847229, "learning_rate": 1.9659313573882822e-05, "loss": 0.7038, "step": 5083 }, { "epoch": 0.8300069384923064, "grad_norm": 2.6319119930267334, "learning_rate": 1.9659147627671246e-05, "loss": 0.7925, "step": 5084 }, { "epoch": 0.8301701971348108, "grad_norm": 3.0745797157287598, "learning_rate": 1.965898164175456e-05, "loss": 0.9786, "step": 5085 }, { "epoch": 0.8303334557773152, "grad_norm": 2.8262107372283936, "learning_rate": 1.965881561613345e-05, "loss": 0.6944, "step": 5086 }, { "epoch": 0.8304967144198196, "grad_norm": 3.013928174972534, "learning_rate": 1.9658649550808594e-05, "loss": 0.7279, "step": 5087 }, { "epoch": 0.830659973062324, "grad_norm": 2.927022695541382, "learning_rate": 1.9658483445780675e-05, "loss": 0.7714, "step": 5088 }, { "epoch": 0.8308232317048284, "grad_norm": 2.4687108993530273, "learning_rate": 1.965831730105038e-05, "loss": 0.6354, "step": 5089 }, { "epoch": 0.8309864903473327, "grad_norm": 3.021855354309082, "learning_rate": 1.965815111661839e-05, "loss": 0.8859, "step": 5090 }, { "epoch": 0.8311497489898372, "grad_norm": 3.0656545162200928, "learning_rate": 1.9657984892485386e-05, "loss": 0.8637, "step": 5091 }, { "epoch": 0.8313130076323415, "grad_norm": 2.5389719009399414, "learning_rate": 1.965781862865205e-05, "loss": 0.7037, "step": 5092 }, { "epoch": 0.8314762662748459, "grad_norm": 2.707263231277466, "learning_rate": 1.9657652325119074e-05, "loss": 0.8202, "step": 5093 }, { "epoch": 0.8316395249173503, "grad_norm": 2.996371030807495, "learning_rate": 1.9657485981887133e-05, "loss": 0.8657, "step": 5094 }, { "epoch": 0.8318027835598547, "grad_norm": 2.322956085205078, "learning_rate": 1.9657319598956913e-05, "loss": 0.6159, "step": 5095 }, { "epoch": 0.831966042202359, "grad_norm": 2.867182731628418, "learning_rate": 1.9657153176329102e-05, "loss": 0.7069, "step": 5096 }, { "epoch": 0.8321293008448635, "grad_norm": 2.9820284843444824, "learning_rate": 1.9656986714004377e-05, "loss": 0.711, "step": 5097 }, { "epoch": 0.8322925594873679, "grad_norm": 2.846219301223755, "learning_rate": 1.9656820211983428e-05, "loss": 0.703, "step": 5098 }, { "epoch": 0.8324558181298722, "grad_norm": 3.0827672481536865, "learning_rate": 1.9656653670266935e-05, "loss": 0.8058, "step": 5099 }, { "epoch": 0.8326190767723767, "grad_norm": 2.650102376937866, "learning_rate": 1.965648708885559e-05, "loss": 0.7007, "step": 5100 }, { "epoch": 0.832782335414881, "grad_norm": 2.7112441062927246, "learning_rate": 1.965632046775007e-05, "loss": 0.7975, "step": 5101 }, { "epoch": 0.8329455940573854, "grad_norm": 2.7529385089874268, "learning_rate": 1.9656153806951065e-05, "loss": 0.8611, "step": 5102 }, { "epoch": 0.8331088526998898, "grad_norm": 2.679729700088501, "learning_rate": 1.9655987106459252e-05, "loss": 0.7703, "step": 5103 }, { "epoch": 0.8332721113423942, "grad_norm": 3.3367724418640137, "learning_rate": 1.9655820366275326e-05, "loss": 1.0379, "step": 5104 }, { "epoch": 0.8334353699848985, "grad_norm": 2.8258423805236816, "learning_rate": 1.965565358639997e-05, "loss": 0.8053, "step": 5105 }, { "epoch": 0.833598628627403, "grad_norm": 2.841602325439453, "learning_rate": 1.9655486766833866e-05, "loss": 0.7209, "step": 5106 }, { "epoch": 0.8337618872699073, "grad_norm": 3.2200324535369873, "learning_rate": 1.96553199075777e-05, "loss": 0.918, "step": 5107 }, { "epoch": 0.8339251459124117, "grad_norm": 3.4131526947021484, "learning_rate": 1.9655153008632163e-05, "loss": 0.8821, "step": 5108 }, { "epoch": 0.8340884045549162, "grad_norm": 3.371530532836914, "learning_rate": 1.9654986069997938e-05, "loss": 1.0107, "step": 5109 }, { "epoch": 0.8342516631974205, "grad_norm": 3.122516632080078, "learning_rate": 1.9654819091675706e-05, "loss": 0.7897, "step": 5110 }, { "epoch": 0.8344149218399249, "grad_norm": 2.7488508224487305, "learning_rate": 1.9654652073666163e-05, "loss": 0.8012, "step": 5111 }, { "epoch": 0.8345781804824293, "grad_norm": 2.862701416015625, "learning_rate": 1.965448501596999e-05, "loss": 0.7887, "step": 5112 }, { "epoch": 0.8347414391249337, "grad_norm": 2.5071861743927, "learning_rate": 1.9654317918587874e-05, "loss": 0.7569, "step": 5113 }, { "epoch": 0.834904697767438, "grad_norm": 2.4797463417053223, "learning_rate": 1.96541507815205e-05, "loss": 0.7528, "step": 5114 }, { "epoch": 0.8350679564099425, "grad_norm": 3.3180432319641113, "learning_rate": 1.965398360476856e-05, "loss": 0.838, "step": 5115 }, { "epoch": 0.8352312150524468, "grad_norm": 2.4893460273742676, "learning_rate": 1.965381638833274e-05, "loss": 0.6892, "step": 5116 }, { "epoch": 0.8353944736949512, "grad_norm": 2.9045822620391846, "learning_rate": 1.9653649132213727e-05, "loss": 0.9794, "step": 5117 }, { "epoch": 0.8355577323374556, "grad_norm": 2.611928701400757, "learning_rate": 1.9653481836412203e-05, "loss": 0.8837, "step": 5118 }, { "epoch": 0.83572099097996, "grad_norm": 2.9157121181488037, "learning_rate": 1.9653314500928863e-05, "loss": 0.8603, "step": 5119 }, { "epoch": 0.8358842496224644, "grad_norm": 2.684481143951416, "learning_rate": 1.965314712576439e-05, "loss": 0.8123, "step": 5120 }, { "epoch": 0.8360475082649688, "grad_norm": 2.873217821121216, "learning_rate": 1.9652979710919475e-05, "loss": 0.8425, "step": 5121 }, { "epoch": 0.8362107669074732, "grad_norm": 2.752962112426758, "learning_rate": 1.9652812256394805e-05, "loss": 0.8151, "step": 5122 }, { "epoch": 0.8363740255499775, "grad_norm": 2.7990825176239014, "learning_rate": 1.9652644762191074e-05, "loss": 0.7382, "step": 5123 }, { "epoch": 0.836537284192482, "grad_norm": 2.3942997455596924, "learning_rate": 1.9652477228308958e-05, "loss": 0.6691, "step": 5124 }, { "epoch": 0.8367005428349863, "grad_norm": 2.8924243450164795, "learning_rate": 1.9652309654749156e-05, "loss": 0.995, "step": 5125 }, { "epoch": 0.8368638014774907, "grad_norm": 2.824268341064453, "learning_rate": 1.9652142041512356e-05, "loss": 0.7232, "step": 5126 }, { "epoch": 0.8370270601199951, "grad_norm": 2.3854246139526367, "learning_rate": 1.965197438859924e-05, "loss": 0.7287, "step": 5127 }, { "epoch": 0.8371903187624995, "grad_norm": 3.557485342025757, "learning_rate": 1.9651806696010503e-05, "loss": 0.9592, "step": 5128 }, { "epoch": 0.8373535774050038, "grad_norm": 3.113501787185669, "learning_rate": 1.9651638963746834e-05, "loss": 0.985, "step": 5129 }, { "epoch": 0.8375168360475083, "grad_norm": 3.0721683502197266, "learning_rate": 1.9651471191808924e-05, "loss": 0.7813, "step": 5130 }, { "epoch": 0.8376800946900127, "grad_norm": 2.610952377319336, "learning_rate": 1.9651303380197455e-05, "loss": 0.6592, "step": 5131 }, { "epoch": 0.837843353332517, "grad_norm": 2.61875319480896, "learning_rate": 1.9651135528913127e-05, "loss": 0.7289, "step": 5132 }, { "epoch": 0.8380066119750215, "grad_norm": 2.9127495288848877, "learning_rate": 1.9650967637956625e-05, "loss": 0.8474, "step": 5133 }, { "epoch": 0.8381698706175258, "grad_norm": 3.5226633548736572, "learning_rate": 1.9650799707328634e-05, "loss": 1.0063, "step": 5134 }, { "epoch": 0.8383331292600302, "grad_norm": 2.759152889251709, "learning_rate": 1.9650631737029852e-05, "loss": 0.9741, "step": 5135 }, { "epoch": 0.8384963879025346, "grad_norm": 2.6239051818847656, "learning_rate": 1.965046372706097e-05, "loss": 0.6831, "step": 5136 }, { "epoch": 0.838659646545039, "grad_norm": 2.7051217555999756, "learning_rate": 1.965029567742267e-05, "loss": 0.7465, "step": 5137 }, { "epoch": 0.8388229051875433, "grad_norm": 3.521212100982666, "learning_rate": 1.9650127588115654e-05, "loss": 0.9271, "step": 5138 }, { "epoch": 0.8389861638300478, "grad_norm": 2.9162890911102295, "learning_rate": 1.9649959459140604e-05, "loss": 0.8041, "step": 5139 }, { "epoch": 0.8391494224725521, "grad_norm": 2.3757684230804443, "learning_rate": 1.9649791290498214e-05, "loss": 0.7677, "step": 5140 }, { "epoch": 0.8393126811150565, "grad_norm": 2.543856382369995, "learning_rate": 1.9649623082189178e-05, "loss": 0.6493, "step": 5141 }, { "epoch": 0.839475939757561, "grad_norm": 2.173098564147949, "learning_rate": 1.9649454834214184e-05, "loss": 0.5336, "step": 5142 }, { "epoch": 0.8396391984000653, "grad_norm": 3.2211320400238037, "learning_rate": 1.9649286546573925e-05, "loss": 0.8905, "step": 5143 }, { "epoch": 0.8398024570425697, "grad_norm": 2.8706226348876953, "learning_rate": 1.9649118219269092e-05, "loss": 0.9375, "step": 5144 }, { "epoch": 0.839965715685074, "grad_norm": 3.2940964698791504, "learning_rate": 1.964894985230038e-05, "loss": 0.833, "step": 5145 }, { "epoch": 0.8401289743275785, "grad_norm": 3.052159309387207, "learning_rate": 1.9648781445668474e-05, "loss": 0.8473, "step": 5146 }, { "epoch": 0.8402922329700828, "grad_norm": 2.646920680999756, "learning_rate": 1.9648612999374072e-05, "loss": 0.7419, "step": 5147 }, { "epoch": 0.8404554916125873, "grad_norm": 2.9398505687713623, "learning_rate": 1.9648444513417867e-05, "loss": 0.7566, "step": 5148 }, { "epoch": 0.8406187502550916, "grad_norm": 2.6337850093841553, "learning_rate": 1.9648275987800548e-05, "loss": 0.7086, "step": 5149 }, { "epoch": 0.840782008897596, "grad_norm": 3.1177971363067627, "learning_rate": 1.964810742252281e-05, "loss": 0.9145, "step": 5150 }, { "epoch": 0.8409452675401003, "grad_norm": 2.881619691848755, "learning_rate": 1.9647938817585348e-05, "loss": 0.9093, "step": 5151 }, { "epoch": 0.8411085261826048, "grad_norm": 3.2238309383392334, "learning_rate": 1.964777017298885e-05, "loss": 0.8834, "step": 5152 }, { "epoch": 0.8412717848251092, "grad_norm": 3.154601573944092, "learning_rate": 1.9647601488734013e-05, "loss": 0.9827, "step": 5153 }, { "epoch": 0.8414350434676136, "grad_norm": 2.9696121215820312, "learning_rate": 1.9647432764821527e-05, "loss": 0.8165, "step": 5154 }, { "epoch": 0.841598302110118, "grad_norm": 2.7793962955474854, "learning_rate": 1.964726400125209e-05, "loss": 0.818, "step": 5155 }, { "epoch": 0.8417615607526223, "grad_norm": 2.716958522796631, "learning_rate": 1.9647095198026393e-05, "loss": 0.8209, "step": 5156 }, { "epoch": 0.8419248193951268, "grad_norm": 2.9793384075164795, "learning_rate": 1.964692635514513e-05, "loss": 0.9352, "step": 5157 }, { "epoch": 0.8420880780376311, "grad_norm": 2.728221893310547, "learning_rate": 1.9646757472608998e-05, "loss": 0.9378, "step": 5158 }, { "epoch": 0.8422513366801355, "grad_norm": 3.145582914352417, "learning_rate": 1.9646588550418685e-05, "loss": 0.9057, "step": 5159 }, { "epoch": 0.8424145953226398, "grad_norm": 3.082665205001831, "learning_rate": 1.964641958857489e-05, "loss": 0.9018, "step": 5160 }, { "epoch": 0.8425778539651443, "grad_norm": 2.9318125247955322, "learning_rate": 1.9646250587078307e-05, "loss": 0.9493, "step": 5161 }, { "epoch": 0.8427411126076486, "grad_norm": 2.2350387573242188, "learning_rate": 1.9646081545929627e-05, "loss": 0.6603, "step": 5162 }, { "epoch": 0.842904371250153, "grad_norm": 2.764965772628784, "learning_rate": 1.964591246512955e-05, "loss": 0.7822, "step": 5163 }, { "epoch": 0.8430676298926575, "grad_norm": 2.5481138229370117, "learning_rate": 1.9645743344678772e-05, "loss": 0.7315, "step": 5164 }, { "epoch": 0.8432308885351618, "grad_norm": 2.3535642623901367, "learning_rate": 1.9645574184577982e-05, "loss": 0.671, "step": 5165 }, { "epoch": 0.8433941471776663, "grad_norm": 3.0003530979156494, "learning_rate": 1.9645404984827882e-05, "loss": 0.8507, "step": 5166 }, { "epoch": 0.8435574058201706, "grad_norm": 2.86672043800354, "learning_rate": 1.964523574542916e-05, "loss": 0.8765, "step": 5167 }, { "epoch": 0.843720664462675, "grad_norm": 2.5017521381378174, "learning_rate": 1.9645066466382517e-05, "loss": 0.7419, "step": 5168 }, { "epoch": 0.8438839231051793, "grad_norm": 2.87660813331604, "learning_rate": 1.964489714768865e-05, "loss": 0.9856, "step": 5169 }, { "epoch": 0.8440471817476838, "grad_norm": 2.7169783115386963, "learning_rate": 1.964472778934825e-05, "loss": 0.7228, "step": 5170 }, { "epoch": 0.8442104403901881, "grad_norm": 2.8499855995178223, "learning_rate": 1.9644558391362015e-05, "loss": 0.8252, "step": 5171 }, { "epoch": 0.8443736990326925, "grad_norm": 2.961883544921875, "learning_rate": 1.9644388953730647e-05, "loss": 0.7716, "step": 5172 }, { "epoch": 0.8445369576751969, "grad_norm": 2.9870493412017822, "learning_rate": 1.9644219476454833e-05, "loss": 0.8495, "step": 5173 }, { "epoch": 0.8447002163177013, "grad_norm": 2.5719921588897705, "learning_rate": 1.9644049959535275e-05, "loss": 0.6697, "step": 5174 }, { "epoch": 0.8448634749602058, "grad_norm": 2.6040377616882324, "learning_rate": 1.964388040297267e-05, "loss": 0.7595, "step": 5175 }, { "epoch": 0.8450267336027101, "grad_norm": 2.9678239822387695, "learning_rate": 1.9643710806767715e-05, "loss": 0.935, "step": 5176 }, { "epoch": 0.8451899922452145, "grad_norm": 2.9900388717651367, "learning_rate": 1.9643541170921106e-05, "loss": 0.7445, "step": 5177 }, { "epoch": 0.8453532508877188, "grad_norm": 3.274646043777466, "learning_rate": 1.964337149543354e-05, "loss": 0.9577, "step": 5178 }, { "epoch": 0.8455165095302233, "grad_norm": 3.113849401473999, "learning_rate": 1.9643201780305716e-05, "loss": 0.8741, "step": 5179 }, { "epoch": 0.8456797681727276, "grad_norm": 2.7872955799102783, "learning_rate": 1.964303202553833e-05, "loss": 0.7914, "step": 5180 }, { "epoch": 0.845843026815232, "grad_norm": 2.55851149559021, "learning_rate": 1.964286223113208e-05, "loss": 0.606, "step": 5181 }, { "epoch": 0.8460062854577364, "grad_norm": 3.103510618209839, "learning_rate": 1.9642692397087666e-05, "loss": 0.9009, "step": 5182 }, { "epoch": 0.8461695441002408, "grad_norm": 2.9056649208068848, "learning_rate": 1.9642522523405782e-05, "loss": 0.78, "step": 5183 }, { "epoch": 0.8463328027427452, "grad_norm": 3.095651626586914, "learning_rate": 1.964235261008713e-05, "loss": 0.7755, "step": 5184 }, { "epoch": 0.8464960613852496, "grad_norm": 2.8193163871765137, "learning_rate": 1.964218265713241e-05, "loss": 0.7841, "step": 5185 }, { "epoch": 0.846659320027754, "grad_norm": 2.9241349697113037, "learning_rate": 1.9642012664542313e-05, "loss": 0.7113, "step": 5186 }, { "epoch": 0.8468225786702583, "grad_norm": 2.8434245586395264, "learning_rate": 1.9641842632317546e-05, "loss": 0.8991, "step": 5187 }, { "epoch": 0.8469858373127628, "grad_norm": 2.9946682453155518, "learning_rate": 1.9641672560458803e-05, "loss": 0.9065, "step": 5188 }, { "epoch": 0.8471490959552671, "grad_norm": 2.7993786334991455, "learning_rate": 1.964150244896679e-05, "loss": 0.8115, "step": 5189 }, { "epoch": 0.8473123545977715, "grad_norm": 2.964542865753174, "learning_rate": 1.9641332297842194e-05, "loss": 0.9198, "step": 5190 }, { "epoch": 0.8474756132402759, "grad_norm": 2.8650999069213867, "learning_rate": 1.9641162107085724e-05, "loss": 0.8023, "step": 5191 }, { "epoch": 0.8476388718827803, "grad_norm": 2.892061710357666, "learning_rate": 1.9640991876698077e-05, "loss": 0.8163, "step": 5192 }, { "epoch": 0.8478021305252846, "grad_norm": 2.876312017440796, "learning_rate": 1.9640821606679953e-05, "loss": 0.8438, "step": 5193 }, { "epoch": 0.8479653891677891, "grad_norm": 2.642976760864258, "learning_rate": 1.9640651297032048e-05, "loss": 0.723, "step": 5194 }, { "epoch": 0.8481286478102935, "grad_norm": 3.000091791152954, "learning_rate": 1.9640480947755073e-05, "loss": 0.6967, "step": 5195 }, { "epoch": 0.8482919064527978, "grad_norm": 2.847036361694336, "learning_rate": 1.964031055884972e-05, "loss": 0.9302, "step": 5196 }, { "epoch": 0.8484551650953023, "grad_norm": 3.1030564308166504, "learning_rate": 1.9640140130316684e-05, "loss": 0.8336, "step": 5197 }, { "epoch": 0.8486184237378066, "grad_norm": 2.237313747406006, "learning_rate": 1.9639969662156673e-05, "loss": 0.793, "step": 5198 }, { "epoch": 0.848781682380311, "grad_norm": 2.154623031616211, "learning_rate": 1.9639799154370394e-05, "loss": 0.6144, "step": 5199 }, { "epoch": 0.8489449410228154, "grad_norm": 2.529526948928833, "learning_rate": 1.9639628606958535e-05, "loss": 0.7911, "step": 5200 }, { "epoch": 0.8491081996653198, "grad_norm": 3.0510356426239014, "learning_rate": 1.96394580199218e-05, "loss": 0.9763, "step": 5201 }, { "epoch": 0.8492714583078241, "grad_norm": 3.374208688735962, "learning_rate": 1.96392873932609e-05, "loss": 0.9989, "step": 5202 }, { "epoch": 0.8494347169503286, "grad_norm": 3.0879900455474854, "learning_rate": 1.9639116726976527e-05, "loss": 0.9875, "step": 5203 }, { "epoch": 0.8495979755928329, "grad_norm": 2.5776567459106445, "learning_rate": 1.963894602106938e-05, "loss": 0.847, "step": 5204 }, { "epoch": 0.8497612342353373, "grad_norm": 2.644099473953247, "learning_rate": 1.963877527554017e-05, "loss": 0.7956, "step": 5205 }, { "epoch": 0.8499244928778418, "grad_norm": 2.9806268215179443, "learning_rate": 1.9638604490389596e-05, "loss": 0.8586, "step": 5206 }, { "epoch": 0.8500877515203461, "grad_norm": 2.9881181716918945, "learning_rate": 1.9638433665618356e-05, "loss": 0.9283, "step": 5207 }, { "epoch": 0.8502510101628505, "grad_norm": 2.849547863006592, "learning_rate": 1.9638262801227154e-05, "loss": 0.7631, "step": 5208 }, { "epoch": 0.8504142688053549, "grad_norm": 2.572023391723633, "learning_rate": 1.963809189721669e-05, "loss": 0.6943, "step": 5209 }, { "epoch": 0.8505775274478593, "grad_norm": 2.51076340675354, "learning_rate": 1.9637920953587675e-05, "loss": 0.7306, "step": 5210 }, { "epoch": 0.8507407860903636, "grad_norm": 2.830197811126709, "learning_rate": 1.9637749970340806e-05, "loss": 0.7537, "step": 5211 }, { "epoch": 0.8509040447328681, "grad_norm": 2.82411789894104, "learning_rate": 1.9637578947476784e-05, "loss": 0.9072, "step": 5212 }, { "epoch": 0.8510673033753724, "grad_norm": 2.5762805938720703, "learning_rate": 1.9637407884996312e-05, "loss": 0.7857, "step": 5213 }, { "epoch": 0.8512305620178768, "grad_norm": 2.9817819595336914, "learning_rate": 1.96372367829001e-05, "loss": 0.9482, "step": 5214 }, { "epoch": 0.8513938206603812, "grad_norm": 3.0431933403015137, "learning_rate": 1.9637065641188843e-05, "loss": 0.8723, "step": 5215 }, { "epoch": 0.8515570793028856, "grad_norm": 2.81748104095459, "learning_rate": 1.963689445986325e-05, "loss": 0.8386, "step": 5216 }, { "epoch": 0.85172033794539, "grad_norm": 3.6147730350494385, "learning_rate": 1.963672323892402e-05, "loss": 1.0527, "step": 5217 }, { "epoch": 0.8518835965878944, "grad_norm": 2.875941514968872, "learning_rate": 1.963655197837186e-05, "loss": 0.7794, "step": 5218 }, { "epoch": 0.8520468552303988, "grad_norm": 3.196443557739258, "learning_rate": 1.9636380678207476e-05, "loss": 0.9085, "step": 5219 }, { "epoch": 0.8522101138729031, "grad_norm": 2.6051888465881348, "learning_rate": 1.9636209338431568e-05, "loss": 0.704, "step": 5220 }, { "epoch": 0.8523733725154076, "grad_norm": 2.8062679767608643, "learning_rate": 1.9636037959044843e-05, "loss": 0.8571, "step": 5221 }, { "epoch": 0.8525366311579119, "grad_norm": 3.0469272136688232, "learning_rate": 1.9635866540048007e-05, "loss": 0.773, "step": 5222 }, { "epoch": 0.8526998898004163, "grad_norm": 2.810662269592285, "learning_rate": 1.9635695081441756e-05, "loss": 0.7833, "step": 5223 }, { "epoch": 0.8528631484429207, "grad_norm": 2.443366765975952, "learning_rate": 1.9635523583226804e-05, "loss": 0.731, "step": 5224 }, { "epoch": 0.8530264070854251, "grad_norm": 2.726872444152832, "learning_rate": 1.9635352045403853e-05, "loss": 0.8244, "step": 5225 }, { "epoch": 0.8531896657279294, "grad_norm": 2.801720142364502, "learning_rate": 1.9635180467973608e-05, "loss": 0.8077, "step": 5226 }, { "epoch": 0.8533529243704339, "grad_norm": 3.0947625637054443, "learning_rate": 1.9635008850936776e-05, "loss": 0.8347, "step": 5227 }, { "epoch": 0.8535161830129383, "grad_norm": 2.6387171745300293, "learning_rate": 1.9634837194294056e-05, "loss": 0.6684, "step": 5228 }, { "epoch": 0.8536794416554426, "grad_norm": 2.9011332988739014, "learning_rate": 1.9634665498046162e-05, "loss": 0.8018, "step": 5229 }, { "epoch": 0.8538427002979471, "grad_norm": 2.809203863143921, "learning_rate": 1.9634493762193796e-05, "loss": 0.8353, "step": 5230 }, { "epoch": 0.8540059589404514, "grad_norm": 2.724071502685547, "learning_rate": 1.9634321986737662e-05, "loss": 0.8306, "step": 5231 }, { "epoch": 0.8541692175829558, "grad_norm": 2.54190731048584, "learning_rate": 1.9634150171678466e-05, "loss": 0.6525, "step": 5232 }, { "epoch": 0.8543324762254602, "grad_norm": 3.0114786624908447, "learning_rate": 1.9633978317016922e-05, "loss": 0.9106, "step": 5233 }, { "epoch": 0.8544957348679646, "grad_norm": 3.1189210414886475, "learning_rate": 1.963380642275373e-05, "loss": 0.9771, "step": 5234 }, { "epoch": 0.8546589935104689, "grad_norm": 2.9210691452026367, "learning_rate": 1.9633634488889596e-05, "loss": 0.7189, "step": 5235 }, { "epoch": 0.8548222521529734, "grad_norm": 2.4949746131896973, "learning_rate": 1.963346251542523e-05, "loss": 0.5651, "step": 5236 }, { "epoch": 0.8549855107954777, "grad_norm": 2.6440558433532715, "learning_rate": 1.9633290502361336e-05, "loss": 0.7541, "step": 5237 }, { "epoch": 0.8551487694379821, "grad_norm": 2.7475874423980713, "learning_rate": 1.9633118449698617e-05, "loss": 0.7167, "step": 5238 }, { "epoch": 0.8553120280804866, "grad_norm": 2.710836887359619, "learning_rate": 1.9632946357437796e-05, "loss": 0.7076, "step": 5239 }, { "epoch": 0.8554752867229909, "grad_norm": 2.9779045581817627, "learning_rate": 1.963277422557956e-05, "loss": 0.8163, "step": 5240 }, { "epoch": 0.8556385453654953, "grad_norm": 2.6819562911987305, "learning_rate": 1.963260205412463e-05, "loss": 0.7016, "step": 5241 }, { "epoch": 0.8558018040079997, "grad_norm": 2.9117774963378906, "learning_rate": 1.9632429843073713e-05, "loss": 0.9223, "step": 5242 }, { "epoch": 0.8559650626505041, "grad_norm": 2.817441940307617, "learning_rate": 1.963225759242751e-05, "loss": 0.8072, "step": 5243 }, { "epoch": 0.8561283212930084, "grad_norm": 2.540325880050659, "learning_rate": 1.9632085302186736e-05, "loss": 0.6182, "step": 5244 }, { "epoch": 0.8562915799355129, "grad_norm": 2.782149076461792, "learning_rate": 1.9631912972352096e-05, "loss": 0.7471, "step": 5245 }, { "epoch": 0.8564548385780172, "grad_norm": 2.8126728534698486, "learning_rate": 1.96317406029243e-05, "loss": 0.7772, "step": 5246 }, { "epoch": 0.8566180972205216, "grad_norm": 2.682323932647705, "learning_rate": 1.9631568193904052e-05, "loss": 0.7557, "step": 5247 }, { "epoch": 0.856781355863026, "grad_norm": 2.664532423019409, "learning_rate": 1.9631395745292062e-05, "loss": 0.7133, "step": 5248 }, { "epoch": 0.8569446145055304, "grad_norm": 4.735931396484375, "learning_rate": 1.9631223257089047e-05, "loss": 0.9467, "step": 5249 }, { "epoch": 0.8571078731480348, "grad_norm": 2.3342092037200928, "learning_rate": 1.9631050729295705e-05, "loss": 0.7397, "step": 5250 }, { "epoch": 0.8572711317905392, "grad_norm": 2.871518611907959, "learning_rate": 1.9630878161912752e-05, "loss": 0.7185, "step": 5251 }, { "epoch": 0.8574343904330436, "grad_norm": 2.985565662384033, "learning_rate": 1.9630705554940898e-05, "loss": 0.8806, "step": 5252 }, { "epoch": 0.8575976490755479, "grad_norm": 2.9435102939605713, "learning_rate": 1.9630532908380845e-05, "loss": 0.7584, "step": 5253 }, { "epoch": 0.8577609077180524, "grad_norm": 2.9143240451812744, "learning_rate": 1.9630360222233312e-05, "loss": 0.8941, "step": 5254 }, { "epoch": 0.8579241663605567, "grad_norm": 2.6021251678466797, "learning_rate": 1.9630187496499e-05, "loss": 0.753, "step": 5255 }, { "epoch": 0.8580874250030611, "grad_norm": 2.9062912464141846, "learning_rate": 1.9630014731178625e-05, "loss": 0.8263, "step": 5256 }, { "epoch": 0.8582506836455654, "grad_norm": 2.647996664047241, "learning_rate": 1.9629841926272898e-05, "loss": 0.7242, "step": 5257 }, { "epoch": 0.8584139422880699, "grad_norm": 3.026707887649536, "learning_rate": 1.9629669081782527e-05, "loss": 0.6998, "step": 5258 }, { "epoch": 0.8585772009305742, "grad_norm": 3.181084156036377, "learning_rate": 1.962949619770822e-05, "loss": 1.0317, "step": 5259 }, { "epoch": 0.8587404595730787, "grad_norm": 2.9309241771698, "learning_rate": 1.962932327405069e-05, "loss": 0.7949, "step": 5260 }, { "epoch": 0.8589037182155831, "grad_norm": 3.580526113510132, "learning_rate": 1.962915031081065e-05, "loss": 1.0341, "step": 5261 }, { "epoch": 0.8590669768580874, "grad_norm": 3.1185014247894287, "learning_rate": 1.962897730798881e-05, "loss": 0.8904, "step": 5262 }, { "epoch": 0.8592302355005919, "grad_norm": 2.8899502754211426, "learning_rate": 1.9628804265585878e-05, "loss": 0.8615, "step": 5263 }, { "epoch": 0.8593934941430962, "grad_norm": 2.9559690952301025, "learning_rate": 1.962863118360257e-05, "loss": 0.7416, "step": 5264 }, { "epoch": 0.8595567527856006, "grad_norm": 3.08679461479187, "learning_rate": 1.9628458062039592e-05, "loss": 0.8496, "step": 5265 }, { "epoch": 0.859720011428105, "grad_norm": 3.154738187789917, "learning_rate": 1.962828490089766e-05, "loss": 0.8702, "step": 5266 }, { "epoch": 0.8598832700706094, "grad_norm": 2.7138173580169678, "learning_rate": 1.962811170017748e-05, "loss": 0.8591, "step": 5267 }, { "epoch": 0.8600465287131137, "grad_norm": 3.114870071411133, "learning_rate": 1.962793845987977e-05, "loss": 0.839, "step": 5268 }, { "epoch": 0.8602097873556181, "grad_norm": 2.7429187297821045, "learning_rate": 1.9627765180005248e-05, "loss": 0.7082, "step": 5269 }, { "epoch": 0.8603730459981225, "grad_norm": 2.7834668159484863, "learning_rate": 1.9627591860554612e-05, "loss": 0.7325, "step": 5270 }, { "epoch": 0.8605363046406269, "grad_norm": 2.6780765056610107, "learning_rate": 1.962741850152858e-05, "loss": 0.7478, "step": 5271 }, { "epoch": 0.8606995632831314, "grad_norm": 3.1064746379852295, "learning_rate": 1.962724510292787e-05, "loss": 0.9341, "step": 5272 }, { "epoch": 0.8608628219256357, "grad_norm": 2.527226209640503, "learning_rate": 1.9627071664753186e-05, "loss": 0.7913, "step": 5273 }, { "epoch": 0.8610260805681401, "grad_norm": 2.575195789337158, "learning_rate": 1.962689818700525e-05, "loss": 0.8362, "step": 5274 }, { "epoch": 0.8611893392106444, "grad_norm": 2.490283727645874, "learning_rate": 1.962672466968477e-05, "loss": 0.7445, "step": 5275 }, { "epoch": 0.8613525978531489, "grad_norm": 2.551907539367676, "learning_rate": 1.9626551112792458e-05, "loss": 0.7642, "step": 5276 }, { "epoch": 0.8615158564956532, "grad_norm": 2.7525508403778076, "learning_rate": 1.962637751632903e-05, "loss": 0.8731, "step": 5277 }, { "epoch": 0.8616791151381576, "grad_norm": 2.21828556060791, "learning_rate": 1.96262038802952e-05, "loss": 0.7049, "step": 5278 }, { "epoch": 0.861842373780662, "grad_norm": 2.617957353591919, "learning_rate": 1.9626030204691678e-05, "loss": 0.8807, "step": 5279 }, { "epoch": 0.8620056324231664, "grad_norm": 3.2312893867492676, "learning_rate": 1.9625856489519183e-05, "loss": 0.9223, "step": 5280 }, { "epoch": 0.8621688910656707, "grad_norm": 2.09525990486145, "learning_rate": 1.9625682734778424e-05, "loss": 0.5947, "step": 5281 }, { "epoch": 0.8623321497081752, "grad_norm": 3.4807329177856445, "learning_rate": 1.962550894047012e-05, "loss": 0.9623, "step": 5282 }, { "epoch": 0.8624954083506796, "grad_norm": 2.7328295707702637, "learning_rate": 1.9625335106594986e-05, "loss": 0.7272, "step": 5283 }, { "epoch": 0.8626586669931839, "grad_norm": 2.7777576446533203, "learning_rate": 1.9625161233153726e-05, "loss": 0.8241, "step": 5284 }, { "epoch": 0.8628219256356884, "grad_norm": 2.8044536113739014, "learning_rate": 1.962498732014707e-05, "loss": 0.7102, "step": 5285 }, { "epoch": 0.8629851842781927, "grad_norm": 3.0627236366271973, "learning_rate": 1.9624813367575722e-05, "loss": 0.8277, "step": 5286 }, { "epoch": 0.8631484429206971, "grad_norm": 2.7723398208618164, "learning_rate": 1.9624639375440403e-05, "loss": 0.7707, "step": 5287 }, { "epoch": 0.8633117015632015, "grad_norm": 2.826007843017578, "learning_rate": 1.9624465343741822e-05, "loss": 0.7901, "step": 5288 }, { "epoch": 0.8634749602057059, "grad_norm": 2.3491015434265137, "learning_rate": 1.96242912724807e-05, "loss": 0.5832, "step": 5289 }, { "epoch": 0.8636382188482102, "grad_norm": 3.2928154468536377, "learning_rate": 1.9624117161657753e-05, "loss": 0.8791, "step": 5290 }, { "epoch": 0.8638014774907147, "grad_norm": 3.1190359592437744, "learning_rate": 1.962394301127369e-05, "loss": 0.832, "step": 5291 }, { "epoch": 0.863964736133219, "grad_norm": 3.722743511199951, "learning_rate": 1.9623768821329234e-05, "loss": 0.8322, "step": 5292 }, { "epoch": 0.8641279947757234, "grad_norm": 2.7198688983917236, "learning_rate": 1.9623594591825098e-05, "loss": 0.7376, "step": 5293 }, { "epoch": 0.8642912534182279, "grad_norm": 2.9857897758483887, "learning_rate": 1.9623420322761997e-05, "loss": 0.8299, "step": 5294 }, { "epoch": 0.8644545120607322, "grad_norm": 2.909968376159668, "learning_rate": 1.962324601414065e-05, "loss": 0.8462, "step": 5295 }, { "epoch": 0.8646177707032366, "grad_norm": 2.672696590423584, "learning_rate": 1.962307166596177e-05, "loss": 0.837, "step": 5296 }, { "epoch": 0.864781029345741, "grad_norm": 4.124011039733887, "learning_rate": 1.962289727822608e-05, "loss": 0.9258, "step": 5297 }, { "epoch": 0.8649442879882454, "grad_norm": 3.000046730041504, "learning_rate": 1.962272285093429e-05, "loss": 0.8315, "step": 5298 }, { "epoch": 0.8651075466307497, "grad_norm": 2.718372344970703, "learning_rate": 1.9622548384087122e-05, "loss": 0.7491, "step": 5299 }, { "epoch": 0.8652708052732542, "grad_norm": 2.6690566539764404, "learning_rate": 1.962237387768529e-05, "loss": 0.663, "step": 5300 }, { "epoch": 0.8654340639157585, "grad_norm": 2.963486671447754, "learning_rate": 1.962219933172951e-05, "loss": 0.7914, "step": 5301 }, { "epoch": 0.8655973225582629, "grad_norm": 2.6321163177490234, "learning_rate": 1.9622024746220507e-05, "loss": 0.7708, "step": 5302 }, { "epoch": 0.8657605812007673, "grad_norm": 2.991973400115967, "learning_rate": 1.962185012115899e-05, "loss": 0.7822, "step": 5303 }, { "epoch": 0.8659238398432717, "grad_norm": 2.463435173034668, "learning_rate": 1.962167545654568e-05, "loss": 0.7043, "step": 5304 }, { "epoch": 0.8660870984857761, "grad_norm": 2.8788371086120605, "learning_rate": 1.9621500752381296e-05, "loss": 0.7389, "step": 5305 }, { "epoch": 0.8662503571282805, "grad_norm": 3.0023202896118164, "learning_rate": 1.9621326008666556e-05, "loss": 0.8786, "step": 5306 }, { "epoch": 0.8664136157707849, "grad_norm": 2.943434953689575, "learning_rate": 1.9621151225402176e-05, "loss": 0.8081, "step": 5307 }, { "epoch": 0.8665768744132892, "grad_norm": 2.6855275630950928, "learning_rate": 1.962097640258888e-05, "loss": 0.7925, "step": 5308 }, { "epoch": 0.8667401330557937, "grad_norm": 2.836057662963867, "learning_rate": 1.9620801540227378e-05, "loss": 0.6601, "step": 5309 }, { "epoch": 0.866903391698298, "grad_norm": 2.7541487216949463, "learning_rate": 1.9620626638318396e-05, "loss": 0.8772, "step": 5310 }, { "epoch": 0.8670666503408024, "grad_norm": 2.467010498046875, "learning_rate": 1.962045169686265e-05, "loss": 0.6437, "step": 5311 }, { "epoch": 0.8672299089833068, "grad_norm": 2.9586100578308105, "learning_rate": 1.962027671586086e-05, "loss": 0.8342, "step": 5312 }, { "epoch": 0.8673931676258112, "grad_norm": 2.706913471221924, "learning_rate": 1.9620101695313746e-05, "loss": 0.8302, "step": 5313 }, { "epoch": 0.8675564262683155, "grad_norm": 2.9053173065185547, "learning_rate": 1.9619926635222028e-05, "loss": 0.7779, "step": 5314 }, { "epoch": 0.86771968491082, "grad_norm": 3.217681646347046, "learning_rate": 1.9619751535586418e-05, "loss": 0.7557, "step": 5315 }, { "epoch": 0.8678829435533244, "grad_norm": 2.879211664199829, "learning_rate": 1.9619576396407644e-05, "loss": 0.805, "step": 5316 }, { "epoch": 0.8680462021958287, "grad_norm": 2.7714860439300537, "learning_rate": 1.9619401217686427e-05, "loss": 0.8411, "step": 5317 }, { "epoch": 0.8682094608383332, "grad_norm": 3.072133779525757, "learning_rate": 1.961922599942348e-05, "loss": 1.0375, "step": 5318 }, { "epoch": 0.8683727194808375, "grad_norm": 3.8416693210601807, "learning_rate": 1.961905074161953e-05, "loss": 0.9866, "step": 5319 }, { "epoch": 0.8685359781233419, "grad_norm": 3.4265835285186768, "learning_rate": 1.9618875444275294e-05, "loss": 1.0647, "step": 5320 }, { "epoch": 0.8686992367658463, "grad_norm": 3.2600207328796387, "learning_rate": 1.961870010739149e-05, "loss": 1.0916, "step": 5321 }, { "epoch": 0.8688624954083507, "grad_norm": 3.021677255630493, "learning_rate": 1.9618524730968848e-05, "loss": 0.8017, "step": 5322 }, { "epoch": 0.869025754050855, "grad_norm": 2.6352646350860596, "learning_rate": 1.961834931500808e-05, "loss": 0.7393, "step": 5323 }, { "epoch": 0.8691890126933595, "grad_norm": 3.3291423320770264, "learning_rate": 1.961817385950991e-05, "loss": 0.7078, "step": 5324 }, { "epoch": 0.8693522713358638, "grad_norm": 2.7999393939971924, "learning_rate": 1.9617998364475054e-05, "loss": 0.7995, "step": 5325 }, { "epoch": 0.8695155299783682, "grad_norm": 2.883540630340576, "learning_rate": 1.9617822829904244e-05, "loss": 0.8967, "step": 5326 }, { "epoch": 0.8696787886208727, "grad_norm": 2.618311643600464, "learning_rate": 1.9617647255798192e-05, "loss": 0.7593, "step": 5327 }, { "epoch": 0.869842047263377, "grad_norm": 2.7134172916412354, "learning_rate": 1.961747164215763e-05, "loss": 0.8045, "step": 5328 }, { "epoch": 0.8700053059058814, "grad_norm": 2.7313485145568848, "learning_rate": 1.9617295988983268e-05, "loss": 0.723, "step": 5329 }, { "epoch": 0.8701685645483858, "grad_norm": 2.8104753494262695, "learning_rate": 1.9617120296275832e-05, "loss": 0.8895, "step": 5330 }, { "epoch": 0.8703318231908902, "grad_norm": 2.5566606521606445, "learning_rate": 1.961694456403605e-05, "loss": 0.6423, "step": 5331 }, { "epoch": 0.8704950818333945, "grad_norm": 3.0252015590667725, "learning_rate": 1.961676879226464e-05, "loss": 0.7409, "step": 5332 }, { "epoch": 0.870658340475899, "grad_norm": 2.802703380584717, "learning_rate": 1.9616592980962323e-05, "loss": 0.8116, "step": 5333 }, { "epoch": 0.8708215991184033, "grad_norm": 2.8698182106018066, "learning_rate": 1.9616417130129825e-05, "loss": 1.002, "step": 5334 }, { "epoch": 0.8709848577609077, "grad_norm": 2.598947286605835, "learning_rate": 1.9616241239767867e-05, "loss": 0.7638, "step": 5335 }, { "epoch": 0.871148116403412, "grad_norm": 2.217221975326538, "learning_rate": 1.9616065309877172e-05, "loss": 0.6283, "step": 5336 }, { "epoch": 0.8713113750459165, "grad_norm": 2.843820333480835, "learning_rate": 1.9615889340458462e-05, "loss": 0.839, "step": 5337 }, { "epoch": 0.8714746336884209, "grad_norm": 2.8420989513397217, "learning_rate": 1.9615713331512465e-05, "loss": 0.8947, "step": 5338 }, { "epoch": 0.8716378923309253, "grad_norm": 2.8168859481811523, "learning_rate": 1.9615537283039898e-05, "loss": 0.8348, "step": 5339 }, { "epoch": 0.8718011509734297, "grad_norm": 3.1207735538482666, "learning_rate": 1.961536119504149e-05, "loss": 1.0043, "step": 5340 }, { "epoch": 0.871964409615934, "grad_norm": 2.9493207931518555, "learning_rate": 1.9615185067517962e-05, "loss": 0.9467, "step": 5341 }, { "epoch": 0.8721276682584385, "grad_norm": 2.787151575088501, "learning_rate": 1.961500890047004e-05, "loss": 0.7119, "step": 5342 }, { "epoch": 0.8722909269009428, "grad_norm": 2.802440881729126, "learning_rate": 1.961483269389845e-05, "loss": 0.8938, "step": 5343 }, { "epoch": 0.8724541855434472, "grad_norm": 2.7219741344451904, "learning_rate": 1.9614656447803907e-05, "loss": 0.8602, "step": 5344 }, { "epoch": 0.8726174441859516, "grad_norm": 3.522479772567749, "learning_rate": 1.9614480162187143e-05, "loss": 0.854, "step": 5345 }, { "epoch": 0.872780702828456, "grad_norm": 3.1348955631256104, "learning_rate": 1.961430383704888e-05, "loss": 0.788, "step": 5346 }, { "epoch": 0.8729439614709603, "grad_norm": 3.2490503787994385, "learning_rate": 1.9614127472389847e-05, "loss": 1.4543, "step": 5347 }, { "epoch": 0.8731072201134648, "grad_norm": 3.3190667629241943, "learning_rate": 1.9613951068210765e-05, "loss": 0.9171, "step": 5348 }, { "epoch": 0.8732704787559692, "grad_norm": 3.0931811332702637, "learning_rate": 1.961377462451236e-05, "loss": 0.9084, "step": 5349 }, { "epoch": 0.8734337373984735, "grad_norm": 2.706578254699707, "learning_rate": 1.961359814129536e-05, "loss": 0.6456, "step": 5350 }, { "epoch": 0.873596996040978, "grad_norm": 2.379058599472046, "learning_rate": 1.961342161856049e-05, "loss": 0.6989, "step": 5351 }, { "epoch": 0.8737602546834823, "grad_norm": 2.8019180297851562, "learning_rate": 1.961324505630847e-05, "loss": 0.7623, "step": 5352 }, { "epoch": 0.8739235133259867, "grad_norm": 2.496758460998535, "learning_rate": 1.961306845454003e-05, "loss": 0.8394, "step": 5353 }, { "epoch": 0.874086771968491, "grad_norm": 3.1127798557281494, "learning_rate": 1.9612891813255898e-05, "loss": 0.9599, "step": 5354 }, { "epoch": 0.8742500306109955, "grad_norm": 2.774886131286621, "learning_rate": 1.9612715132456794e-05, "loss": 0.9126, "step": 5355 }, { "epoch": 0.8744132892534998, "grad_norm": 2.9210119247436523, "learning_rate": 1.9612538412143447e-05, "loss": 0.7856, "step": 5356 }, { "epoch": 0.8745765478960043, "grad_norm": 2.522470474243164, "learning_rate": 1.961236165231659e-05, "loss": 0.7192, "step": 5357 }, { "epoch": 0.8747398065385086, "grad_norm": 2.78171706199646, "learning_rate": 1.9612184852976938e-05, "loss": 0.8856, "step": 5358 }, { "epoch": 0.874903065181013, "grad_norm": 2.6407010555267334, "learning_rate": 1.9612008014125226e-05, "loss": 0.708, "step": 5359 }, { "epoch": 0.8750663238235175, "grad_norm": 3.014101505279541, "learning_rate": 1.9611831135762175e-05, "loss": 0.8648, "step": 5360 }, { "epoch": 0.8752295824660218, "grad_norm": 2.9701156616210938, "learning_rate": 1.961165421788852e-05, "loss": 0.8392, "step": 5361 }, { "epoch": 0.8753928411085262, "grad_norm": 2.730180025100708, "learning_rate": 1.9611477260504984e-05, "loss": 0.7349, "step": 5362 }, { "epoch": 0.8755560997510305, "grad_norm": 2.6288774013519287, "learning_rate": 1.9611300263612293e-05, "loss": 0.811, "step": 5363 }, { "epoch": 0.875719358393535, "grad_norm": 3.0817365646362305, "learning_rate": 1.9611123227211176e-05, "loss": 0.9251, "step": 5364 }, { "epoch": 0.8758826170360393, "grad_norm": 2.587792158126831, "learning_rate": 1.961094615130236e-05, "loss": 0.7659, "step": 5365 }, { "epoch": 0.8760458756785438, "grad_norm": 2.832576274871826, "learning_rate": 1.961076903588657e-05, "loss": 0.75, "step": 5366 }, { "epoch": 0.8762091343210481, "grad_norm": 3.4125170707702637, "learning_rate": 1.9610591880964542e-05, "loss": 0.97, "step": 5367 }, { "epoch": 0.8763723929635525, "grad_norm": 2.9758031368255615, "learning_rate": 1.9610414686536998e-05, "loss": 0.9064, "step": 5368 }, { "epoch": 0.8765356516060568, "grad_norm": 2.439192295074463, "learning_rate": 1.9610237452604668e-05, "loss": 0.6581, "step": 5369 }, { "epoch": 0.8766989102485613, "grad_norm": 2.6731350421905518, "learning_rate": 1.9610060179168283e-05, "loss": 0.7547, "step": 5370 }, { "epoch": 0.8768621688910657, "grad_norm": 3.125786304473877, "learning_rate": 1.9609882866228568e-05, "loss": 0.9099, "step": 5371 }, { "epoch": 0.87702542753357, "grad_norm": 2.571415424346924, "learning_rate": 1.9609705513786252e-05, "loss": 0.8967, "step": 5372 }, { "epoch": 0.8771886861760745, "grad_norm": 2.910525321960449, "learning_rate": 1.9609528121842067e-05, "loss": 0.6487, "step": 5373 }, { "epoch": 0.8773519448185788, "grad_norm": 2.5054759979248047, "learning_rate": 1.9609350690396737e-05, "loss": 0.7057, "step": 5374 }, { "epoch": 0.8775152034610832, "grad_norm": 3.186866044998169, "learning_rate": 1.9609173219450998e-05, "loss": 0.9302, "step": 5375 }, { "epoch": 0.8776784621035876, "grad_norm": 2.9908084869384766, "learning_rate": 1.9608995709005575e-05, "loss": 0.7999, "step": 5376 }, { "epoch": 0.877841720746092, "grad_norm": 2.8550264835357666, "learning_rate": 1.9608818159061197e-05, "loss": 0.9071, "step": 5377 }, { "epoch": 0.8780049793885963, "grad_norm": 2.649775266647339, "learning_rate": 1.9608640569618598e-05, "loss": 0.7774, "step": 5378 }, { "epoch": 0.8781682380311008, "grad_norm": 2.846808910369873, "learning_rate": 1.9608462940678507e-05, "loss": 0.7574, "step": 5379 }, { "epoch": 0.8783314966736051, "grad_norm": 2.7951276302337646, "learning_rate": 1.9608285272241653e-05, "loss": 0.9742, "step": 5380 }, { "epoch": 0.8784947553161095, "grad_norm": 2.9760327339172363, "learning_rate": 1.9608107564308764e-05, "loss": 0.8456, "step": 5381 }, { "epoch": 0.878658013958614, "grad_norm": 2.6567838191986084, "learning_rate": 1.9607929816880572e-05, "loss": 0.8292, "step": 5382 }, { "epoch": 0.8788212726011183, "grad_norm": 3.0234341621398926, "learning_rate": 1.960775202995781e-05, "loss": 0.8207, "step": 5383 }, { "epoch": 0.8789845312436227, "grad_norm": 3.110701322555542, "learning_rate": 1.9607574203541208e-05, "loss": 0.9573, "step": 5384 }, { "epoch": 0.8791477898861271, "grad_norm": 2.870213508605957, "learning_rate": 1.9607396337631494e-05, "loss": 0.8362, "step": 5385 }, { "epoch": 0.8793110485286315, "grad_norm": 2.980884313583374, "learning_rate": 1.9607218432229404e-05, "loss": 0.9157, "step": 5386 }, { "epoch": 0.8794743071711358, "grad_norm": 2.881187677383423, "learning_rate": 1.9607040487335666e-05, "loss": 0.8319, "step": 5387 }, { "epoch": 0.8796375658136403, "grad_norm": 2.33768630027771, "learning_rate": 1.9606862502951013e-05, "loss": 0.6946, "step": 5388 }, { "epoch": 0.8798008244561446, "grad_norm": 2.856215476989746, "learning_rate": 1.9606684479076176e-05, "loss": 0.8201, "step": 5389 }, { "epoch": 0.879964083098649, "grad_norm": 3.145249843597412, "learning_rate": 1.960650641571188e-05, "loss": 0.9154, "step": 5390 }, { "epoch": 0.8801273417411534, "grad_norm": 2.787055253982544, "learning_rate": 1.960632831285887e-05, "loss": 0.8181, "step": 5391 }, { "epoch": 0.8802906003836578, "grad_norm": 2.656064748764038, "learning_rate": 1.960615017051787e-05, "loss": 0.7775, "step": 5392 }, { "epoch": 0.8804538590261622, "grad_norm": 3.036881685256958, "learning_rate": 1.9605971988689615e-05, "loss": 0.7894, "step": 5393 }, { "epoch": 0.8806171176686666, "grad_norm": 2.9856483936309814, "learning_rate": 1.9605793767374834e-05, "loss": 0.8741, "step": 5394 }, { "epoch": 0.880780376311171, "grad_norm": 3.0911471843719482, "learning_rate": 1.9605615506574263e-05, "loss": 0.933, "step": 5395 }, { "epoch": 0.8809436349536753, "grad_norm": 2.769207239151001, "learning_rate": 1.9605437206288635e-05, "loss": 0.8416, "step": 5396 }, { "epoch": 0.8811068935961798, "grad_norm": 2.1859147548675537, "learning_rate": 1.9605258866518678e-05, "loss": 0.657, "step": 5397 }, { "epoch": 0.8812701522386841, "grad_norm": 3.098895788192749, "learning_rate": 1.960508048726513e-05, "loss": 0.9238, "step": 5398 }, { "epoch": 0.8814334108811885, "grad_norm": 2.4494073390960693, "learning_rate": 1.9604902068528728e-05, "loss": 0.7726, "step": 5399 }, { "epoch": 0.8815966695236929, "grad_norm": 2.232076406478882, "learning_rate": 1.9604723610310195e-05, "loss": 0.5959, "step": 5400 }, { "epoch": 0.8817599281661973, "grad_norm": 2.5213165283203125, "learning_rate": 1.960454511261027e-05, "loss": 0.8842, "step": 5401 }, { "epoch": 0.8819231868087016, "grad_norm": 3.0127220153808594, "learning_rate": 1.9604366575429687e-05, "loss": 0.8986, "step": 5402 }, { "epoch": 0.8820864454512061, "grad_norm": 2.908712387084961, "learning_rate": 1.960418799876918e-05, "loss": 0.8867, "step": 5403 }, { "epoch": 0.8822497040937105, "grad_norm": 2.1948187351226807, "learning_rate": 1.9604009382629484e-05, "loss": 0.6556, "step": 5404 }, { "epoch": 0.8824129627362148, "grad_norm": 3.446448564529419, "learning_rate": 1.9603830727011332e-05, "loss": 0.711, "step": 5405 }, { "epoch": 0.8825762213787193, "grad_norm": 3.5165224075317383, "learning_rate": 1.9603652031915458e-05, "loss": 0.7614, "step": 5406 }, { "epoch": 0.8827394800212236, "grad_norm": 2.7425129413604736, "learning_rate": 1.9603473297342594e-05, "loss": 0.8276, "step": 5407 }, { "epoch": 0.882902738663728, "grad_norm": 2.806771993637085, "learning_rate": 1.9603294523293478e-05, "loss": 0.8559, "step": 5408 }, { "epoch": 0.8830659973062324, "grad_norm": 3.216759443283081, "learning_rate": 1.960311570976885e-05, "loss": 0.9284, "step": 5409 }, { "epoch": 0.8832292559487368, "grad_norm": 2.513277053833008, "learning_rate": 1.9602936856769432e-05, "loss": 0.7865, "step": 5410 }, { "epoch": 0.8833925145912411, "grad_norm": 2.854607343673706, "learning_rate": 1.960275796429597e-05, "loss": 0.753, "step": 5411 }, { "epoch": 0.8835557732337456, "grad_norm": 2.9817276000976562, "learning_rate": 1.9602579032349195e-05, "loss": 0.8298, "step": 5412 }, { "epoch": 0.8837190318762499, "grad_norm": 2.8130524158477783, "learning_rate": 1.9602400060929845e-05, "loss": 0.85, "step": 5413 }, { "epoch": 0.8838822905187543, "grad_norm": 2.6367857456207275, "learning_rate": 1.9602221050038652e-05, "loss": 0.7529, "step": 5414 }, { "epoch": 0.8840455491612588, "grad_norm": 2.804323673248291, "learning_rate": 1.9602041999676357e-05, "loss": 0.8483, "step": 5415 }, { "epoch": 0.8842088078037631, "grad_norm": 2.9654061794281006, "learning_rate": 1.960186290984369e-05, "loss": 0.844, "step": 5416 }, { "epoch": 0.8843720664462675, "grad_norm": 3.316039562225342, "learning_rate": 1.9601683780541392e-05, "loss": 0.8384, "step": 5417 }, { "epoch": 0.8845353250887719, "grad_norm": 2.8931822776794434, "learning_rate": 1.9601504611770196e-05, "loss": 0.8802, "step": 5418 }, { "epoch": 0.8846985837312763, "grad_norm": 2.6531834602355957, "learning_rate": 1.9601325403530843e-05, "loss": 0.7715, "step": 5419 }, { "epoch": 0.8848618423737806, "grad_norm": 2.898303508758545, "learning_rate": 1.9601146155824064e-05, "loss": 0.9559, "step": 5420 }, { "epoch": 0.8850251010162851, "grad_norm": 3.0014970302581787, "learning_rate": 1.9600966868650598e-05, "loss": 0.9278, "step": 5421 }, { "epoch": 0.8851883596587894, "grad_norm": 2.920422315597534, "learning_rate": 1.9600787542011184e-05, "loss": 0.7561, "step": 5422 }, { "epoch": 0.8853516183012938, "grad_norm": 2.9059219360351562, "learning_rate": 1.9600608175906556e-05, "loss": 0.8028, "step": 5423 }, { "epoch": 0.8855148769437982, "grad_norm": 3.398510217666626, "learning_rate": 1.9600428770337454e-05, "loss": 0.9553, "step": 5424 }, { "epoch": 0.8856781355863026, "grad_norm": 2.5540506839752197, "learning_rate": 1.9600249325304616e-05, "loss": 0.6862, "step": 5425 }, { "epoch": 0.885841394228807, "grad_norm": 2.521778106689453, "learning_rate": 1.9600069840808777e-05, "loss": 0.8292, "step": 5426 }, { "epoch": 0.8860046528713114, "grad_norm": 2.897866725921631, "learning_rate": 1.9599890316850673e-05, "loss": 0.8526, "step": 5427 }, { "epoch": 0.8861679115138158, "grad_norm": 2.500546932220459, "learning_rate": 1.959971075343105e-05, "loss": 0.6012, "step": 5428 }, { "epoch": 0.8863311701563201, "grad_norm": 2.617635726928711, "learning_rate": 1.9599531150550636e-05, "loss": 0.6992, "step": 5429 }, { "epoch": 0.8864944287988246, "grad_norm": 2.805748701095581, "learning_rate": 1.9599351508210177e-05, "loss": 0.8215, "step": 5430 }, { "epoch": 0.8866576874413289, "grad_norm": 2.555077314376831, "learning_rate": 1.9599171826410406e-05, "loss": 0.6675, "step": 5431 }, { "epoch": 0.8868209460838333, "grad_norm": 2.683465003967285, "learning_rate": 1.9598992105152067e-05, "loss": 0.7743, "step": 5432 }, { "epoch": 0.8869842047263377, "grad_norm": 2.927147626876831, "learning_rate": 1.9598812344435894e-05, "loss": 0.8455, "step": 5433 }, { "epoch": 0.8871474633688421, "grad_norm": 2.847968578338623, "learning_rate": 1.959863254426263e-05, "loss": 0.8759, "step": 5434 }, { "epoch": 0.8873107220113465, "grad_norm": 2.57542085647583, "learning_rate": 1.959845270463301e-05, "loss": 0.6647, "step": 5435 }, { "epoch": 0.8874739806538509, "grad_norm": 2.857163190841675, "learning_rate": 1.959827282554778e-05, "loss": 0.9282, "step": 5436 }, { "epoch": 0.8876372392963553, "grad_norm": 2.8223230838775635, "learning_rate": 1.9598092907007667e-05, "loss": 0.8471, "step": 5437 }, { "epoch": 0.8878004979388596, "grad_norm": 2.957291841506958, "learning_rate": 1.959791294901342e-05, "loss": 0.8365, "step": 5438 }, { "epoch": 0.8879637565813641, "grad_norm": 2.4641668796539307, "learning_rate": 1.9597732951565783e-05, "loss": 0.6632, "step": 5439 }, { "epoch": 0.8881270152238684, "grad_norm": 3.018480062484741, "learning_rate": 1.9597552914665484e-05, "loss": 0.8752, "step": 5440 }, { "epoch": 0.8882902738663728, "grad_norm": 3.105272054672241, "learning_rate": 1.959737283831327e-05, "loss": 0.7967, "step": 5441 }, { "epoch": 0.8884535325088772, "grad_norm": 2.9689996242523193, "learning_rate": 1.9597192722509882e-05, "loss": 0.7766, "step": 5442 }, { "epoch": 0.8886167911513816, "grad_norm": 2.59950590133667, "learning_rate": 1.959701256725606e-05, "loss": 0.7222, "step": 5443 }, { "epoch": 0.8887800497938859, "grad_norm": 2.975555419921875, "learning_rate": 1.959683237255254e-05, "loss": 0.8531, "step": 5444 }, { "epoch": 0.8889433084363904, "grad_norm": 2.9805734157562256, "learning_rate": 1.9596652138400064e-05, "loss": 0.7681, "step": 5445 }, { "epoch": 0.8891065670788948, "grad_norm": 2.737729072570801, "learning_rate": 1.9596471864799377e-05, "loss": 0.797, "step": 5446 }, { "epoch": 0.8892698257213991, "grad_norm": 2.8164994716644287, "learning_rate": 1.959629155175122e-05, "loss": 0.9052, "step": 5447 }, { "epoch": 0.8894330843639036, "grad_norm": 2.7863097190856934, "learning_rate": 1.959611119925633e-05, "loss": 0.8241, "step": 5448 }, { "epoch": 0.8895963430064079, "grad_norm": 2.6735610961914062, "learning_rate": 1.959593080731545e-05, "loss": 0.7098, "step": 5449 }, { "epoch": 0.8897596016489123, "grad_norm": 2.9869627952575684, "learning_rate": 1.9595750375929322e-05, "loss": 0.8711, "step": 5450 }, { "epoch": 0.8899228602914167, "grad_norm": 2.7780117988586426, "learning_rate": 1.9595569905098687e-05, "loss": 0.8601, "step": 5451 }, { "epoch": 0.8900861189339211, "grad_norm": 2.7795653343200684, "learning_rate": 1.959538939482429e-05, "loss": 0.6783, "step": 5452 }, { "epoch": 0.8902493775764254, "grad_norm": 3.1774535179138184, "learning_rate": 1.9595208845106866e-05, "loss": 0.8289, "step": 5453 }, { "epoch": 0.8904126362189299, "grad_norm": 2.7823989391326904, "learning_rate": 1.959502825594716e-05, "loss": 0.7556, "step": 5454 }, { "epoch": 0.8905758948614342, "grad_norm": 2.898620128631592, "learning_rate": 1.9594847627345922e-05, "loss": 0.8399, "step": 5455 }, { "epoch": 0.8907391535039386, "grad_norm": 2.8340768814086914, "learning_rate": 1.9594666959303886e-05, "loss": 0.6897, "step": 5456 }, { "epoch": 0.8909024121464431, "grad_norm": 2.6453447341918945, "learning_rate": 1.9594486251821796e-05, "loss": 0.6334, "step": 5457 }, { "epoch": 0.8910656707889474, "grad_norm": 2.872319221496582, "learning_rate": 1.95943055049004e-05, "loss": 0.8591, "step": 5458 }, { "epoch": 0.8912289294314518, "grad_norm": 3.123926877975464, "learning_rate": 1.959412471854043e-05, "loss": 0.9741, "step": 5459 }, { "epoch": 0.8913921880739561, "grad_norm": 2.718156576156616, "learning_rate": 1.959394389274264e-05, "loss": 0.7879, "step": 5460 }, { "epoch": 0.8915554467164606, "grad_norm": 2.8839707374572754, "learning_rate": 1.959376302750777e-05, "loss": 0.7969, "step": 5461 }, { "epoch": 0.8917187053589649, "grad_norm": 2.6405813694000244, "learning_rate": 1.959358212283656e-05, "loss": 0.7156, "step": 5462 }, { "epoch": 0.8918819640014694, "grad_norm": 2.4264166355133057, "learning_rate": 1.9593401178729756e-05, "loss": 0.7077, "step": 5463 }, { "epoch": 0.8920452226439737, "grad_norm": 2.571911096572876, "learning_rate": 1.9593220195188106e-05, "loss": 0.7673, "step": 5464 }, { "epoch": 0.8922084812864781, "grad_norm": 2.8399791717529297, "learning_rate": 1.9593039172212346e-05, "loss": 0.8788, "step": 5465 }, { "epoch": 0.8923717399289824, "grad_norm": 2.4618003368377686, "learning_rate": 1.9592858109803227e-05, "loss": 0.6883, "step": 5466 }, { "epoch": 0.8925349985714869, "grad_norm": 2.461454391479492, "learning_rate": 1.959267700796149e-05, "loss": 0.7341, "step": 5467 }, { "epoch": 0.8926982572139913, "grad_norm": 3.3680381774902344, "learning_rate": 1.959249586668788e-05, "loss": 0.8795, "step": 5468 }, { "epoch": 0.8928615158564956, "grad_norm": 3.126370429992676, "learning_rate": 1.959231468598314e-05, "loss": 0.7996, "step": 5469 }, { "epoch": 0.8930247744990001, "grad_norm": 2.8774449825286865, "learning_rate": 1.959213346584802e-05, "loss": 0.8333, "step": 5470 }, { "epoch": 0.8931880331415044, "grad_norm": 3.123535633087158, "learning_rate": 1.9591952206283257e-05, "loss": 0.9616, "step": 5471 }, { "epoch": 0.8933512917840088, "grad_norm": 2.584472894668579, "learning_rate": 1.9591770907289603e-05, "loss": 0.7803, "step": 5472 }, { "epoch": 0.8935145504265132, "grad_norm": 2.585986852645874, "learning_rate": 1.9591589568867802e-05, "loss": 0.7948, "step": 5473 }, { "epoch": 0.8936778090690176, "grad_norm": 3.010815382003784, "learning_rate": 1.9591408191018597e-05, "loss": 0.7897, "step": 5474 }, { "epoch": 0.8938410677115219, "grad_norm": 2.5731077194213867, "learning_rate": 1.959122677374273e-05, "loss": 0.7571, "step": 5475 }, { "epoch": 0.8940043263540264, "grad_norm": 2.8708159923553467, "learning_rate": 1.9591045317040955e-05, "loss": 0.8261, "step": 5476 }, { "epoch": 0.8941675849965307, "grad_norm": 2.492424726486206, "learning_rate": 1.9590863820914013e-05, "loss": 0.7209, "step": 5477 }, { "epoch": 0.8943308436390351, "grad_norm": 2.7699637413024902, "learning_rate": 1.9590682285362655e-05, "loss": 0.8129, "step": 5478 }, { "epoch": 0.8944941022815396, "grad_norm": 2.6790716648101807, "learning_rate": 1.959050071038762e-05, "loss": 0.7506, "step": 5479 }, { "epoch": 0.8946573609240439, "grad_norm": 2.8874032497406006, "learning_rate": 1.959031909598966e-05, "loss": 0.7492, "step": 5480 }, { "epoch": 0.8948206195665483, "grad_norm": 2.506643295288086, "learning_rate": 1.9590137442169517e-05, "loss": 0.7021, "step": 5481 }, { "epoch": 0.8949838782090527, "grad_norm": 2.7915422916412354, "learning_rate": 1.958995574892794e-05, "loss": 0.8679, "step": 5482 }, { "epoch": 0.8951471368515571, "grad_norm": 3.613109588623047, "learning_rate": 1.9589774016265678e-05, "loss": 0.95, "step": 5483 }, { "epoch": 0.8953103954940614, "grad_norm": 3.102045774459839, "learning_rate": 1.9589592244183476e-05, "loss": 0.7376, "step": 5484 }, { "epoch": 0.8954736541365659, "grad_norm": 3.1826961040496826, "learning_rate": 1.958941043268208e-05, "loss": 0.9214, "step": 5485 }, { "epoch": 0.8956369127790702, "grad_norm": 2.9403557777404785, "learning_rate": 1.9589228581762237e-05, "loss": 0.8491, "step": 5486 }, { "epoch": 0.8958001714215746, "grad_norm": 2.429375410079956, "learning_rate": 1.95890466914247e-05, "loss": 0.6888, "step": 5487 }, { "epoch": 0.895963430064079, "grad_norm": 2.970271348953247, "learning_rate": 1.9588864761670213e-05, "loss": 0.9674, "step": 5488 }, { "epoch": 0.8961266887065834, "grad_norm": 2.7113492488861084, "learning_rate": 1.958868279249952e-05, "loss": 0.8098, "step": 5489 }, { "epoch": 0.8962899473490878, "grad_norm": 2.5094995498657227, "learning_rate": 1.9588500783913374e-05, "loss": 0.6096, "step": 5490 }, { "epoch": 0.8964532059915922, "grad_norm": 2.675766706466675, "learning_rate": 1.9588318735912524e-05, "loss": 0.7439, "step": 5491 }, { "epoch": 0.8966164646340966, "grad_norm": 3.1530182361602783, "learning_rate": 1.9588136648497713e-05, "loss": 0.7857, "step": 5492 }, { "epoch": 0.8967797232766009, "grad_norm": 3.0250864028930664, "learning_rate": 1.9587954521669693e-05, "loss": 0.849, "step": 5493 }, { "epoch": 0.8969429819191054, "grad_norm": 2.660182476043701, "learning_rate": 1.9587772355429213e-05, "loss": 0.7293, "step": 5494 }, { "epoch": 0.8971062405616097, "grad_norm": 2.863732099533081, "learning_rate": 1.9587590149777026e-05, "loss": 0.851, "step": 5495 }, { "epoch": 0.8972694992041141, "grad_norm": 2.7574050426483154, "learning_rate": 1.958740790471387e-05, "loss": 0.8018, "step": 5496 }, { "epoch": 0.8974327578466185, "grad_norm": 2.9702134132385254, "learning_rate": 1.95872256202405e-05, "loss": 0.7055, "step": 5497 }, { "epoch": 0.8975960164891229, "grad_norm": 3.0629749298095703, "learning_rate": 1.958704329635767e-05, "loss": 1.0248, "step": 5498 }, { "epoch": 0.8977592751316272, "grad_norm": 3.4304065704345703, "learning_rate": 1.9586860933066122e-05, "loss": 0.9636, "step": 5499 }, { "epoch": 0.8979225337741317, "grad_norm": 2.8729395866394043, "learning_rate": 1.9586678530366607e-05, "loss": 0.835, "step": 5500 }, { "epoch": 0.8980857924166361, "grad_norm": 3.1038925647735596, "learning_rate": 1.958649608825988e-05, "loss": 0.6932, "step": 5501 }, { "epoch": 0.8982490510591404, "grad_norm": 2.613825559616089, "learning_rate": 1.9586313606746686e-05, "loss": 0.8548, "step": 5502 }, { "epoch": 0.8984123097016449, "grad_norm": 2.9343032836914062, "learning_rate": 1.9586131085827775e-05, "loss": 0.7466, "step": 5503 }, { "epoch": 0.8985755683441492, "grad_norm": 3.091461658477783, "learning_rate": 1.9585948525503902e-05, "loss": 0.8172, "step": 5504 }, { "epoch": 0.8987388269866536, "grad_norm": 2.5989511013031006, "learning_rate": 1.9585765925775812e-05, "loss": 0.6475, "step": 5505 }, { "epoch": 0.898902085629158, "grad_norm": 2.630589485168457, "learning_rate": 1.9585583286644255e-05, "loss": 0.8029, "step": 5506 }, { "epoch": 0.8990653442716624, "grad_norm": 2.5409889221191406, "learning_rate": 1.958540060810999e-05, "loss": 0.7135, "step": 5507 }, { "epoch": 0.8992286029141667, "grad_norm": 2.262665271759033, "learning_rate": 1.958521789017376e-05, "loss": 0.6428, "step": 5508 }, { "epoch": 0.8993918615566712, "grad_norm": 2.809109926223755, "learning_rate": 1.958503513283632e-05, "loss": 0.9146, "step": 5509 }, { "epoch": 0.8995551201991755, "grad_norm": 2.544059991836548, "learning_rate": 1.958485233609842e-05, "loss": 0.8046, "step": 5510 }, { "epoch": 0.8997183788416799, "grad_norm": 2.9615561962127686, "learning_rate": 1.9584669499960805e-05, "loss": 0.8853, "step": 5511 }, { "epoch": 0.8998816374841844, "grad_norm": 2.850168466567993, "learning_rate": 1.958448662442424e-05, "loss": 0.9308, "step": 5512 }, { "epoch": 0.9000448961266887, "grad_norm": 2.6913700103759766, "learning_rate": 1.9584303709489467e-05, "loss": 0.7433, "step": 5513 }, { "epoch": 0.9002081547691931, "grad_norm": 2.829540491104126, "learning_rate": 1.958412075515724e-05, "loss": 0.7242, "step": 5514 }, { "epoch": 0.9003714134116975, "grad_norm": 3.080601453781128, "learning_rate": 1.9583937761428313e-05, "loss": 0.8801, "step": 5515 }, { "epoch": 0.9005346720542019, "grad_norm": 2.793313980102539, "learning_rate": 1.9583754728303436e-05, "loss": 0.762, "step": 5516 }, { "epoch": 0.9006979306967062, "grad_norm": 2.769217014312744, "learning_rate": 1.958357165578336e-05, "loss": 0.7502, "step": 5517 }, { "epoch": 0.9008611893392107, "grad_norm": 2.774498224258423, "learning_rate": 1.9583388543868844e-05, "loss": 0.7859, "step": 5518 }, { "epoch": 0.901024447981715, "grad_norm": 2.8758809566497803, "learning_rate": 1.9583205392560634e-05, "loss": 0.9409, "step": 5519 }, { "epoch": 0.9011877066242194, "grad_norm": 2.3982133865356445, "learning_rate": 1.9583022201859487e-05, "loss": 0.7567, "step": 5520 }, { "epoch": 0.9013509652667238, "grad_norm": 2.504084348678589, "learning_rate": 1.958283897176615e-05, "loss": 0.66, "step": 5521 }, { "epoch": 0.9015142239092282, "grad_norm": 3.112046241760254, "learning_rate": 1.958265570228138e-05, "loss": 0.8605, "step": 5522 }, { "epoch": 0.9016774825517326, "grad_norm": 2.5863215923309326, "learning_rate": 1.9582472393405937e-05, "loss": 0.7967, "step": 5523 }, { "epoch": 0.901840741194237, "grad_norm": 3.6803996562957764, "learning_rate": 1.9582289045140565e-05, "loss": 1.0137, "step": 5524 }, { "epoch": 0.9020039998367414, "grad_norm": 2.374979257583618, "learning_rate": 1.958210565748602e-05, "loss": 0.6408, "step": 5525 }, { "epoch": 0.9021672584792457, "grad_norm": 2.942241907119751, "learning_rate": 1.9581922230443055e-05, "loss": 0.7608, "step": 5526 }, { "epoch": 0.9023305171217502, "grad_norm": 3.2577626705169678, "learning_rate": 1.958173876401243e-05, "loss": 0.7869, "step": 5527 }, { "epoch": 0.9024937757642545, "grad_norm": 3.6008377075195312, "learning_rate": 1.958155525819489e-05, "loss": 1.5311, "step": 5528 }, { "epoch": 0.9026570344067589, "grad_norm": 2.8928792476654053, "learning_rate": 1.95813717129912e-05, "loss": 0.8132, "step": 5529 }, { "epoch": 0.9028202930492633, "grad_norm": 3.5331459045410156, "learning_rate": 1.9581188128402106e-05, "loss": 0.9806, "step": 5530 }, { "epoch": 0.9029835516917677, "grad_norm": 2.915354013442993, "learning_rate": 1.9581004504428366e-05, "loss": 0.7431, "step": 5531 }, { "epoch": 0.903146810334272, "grad_norm": 3.2659053802490234, "learning_rate": 1.9580820841070732e-05, "loss": 0.8515, "step": 5532 }, { "epoch": 0.9033100689767765, "grad_norm": 3.06435489654541, "learning_rate": 1.9580637138329965e-05, "loss": 0.8685, "step": 5533 }, { "epoch": 0.9034733276192809, "grad_norm": 3.3398659229278564, "learning_rate": 1.9580453396206815e-05, "loss": 0.9489, "step": 5534 }, { "epoch": 0.9036365862617852, "grad_norm": 2.6983489990234375, "learning_rate": 1.9580269614702033e-05, "loss": 0.8232, "step": 5535 }, { "epoch": 0.9037998449042897, "grad_norm": 3.3574860095977783, "learning_rate": 1.958008579381639e-05, "loss": 1.0162, "step": 5536 }, { "epoch": 0.903963103546794, "grad_norm": 2.8178420066833496, "learning_rate": 1.9579901933550625e-05, "loss": 0.7924, "step": 5537 }, { "epoch": 0.9041263621892984, "grad_norm": 2.7160909175872803, "learning_rate": 1.9579718033905503e-05, "loss": 0.9168, "step": 5538 }, { "epoch": 0.9042896208318028, "grad_norm": 3.0861401557922363, "learning_rate": 1.9579534094881775e-05, "loss": 0.9528, "step": 5539 }, { "epoch": 0.9044528794743072, "grad_norm": 2.4826297760009766, "learning_rate": 1.95793501164802e-05, "loss": 0.6773, "step": 5540 }, { "epoch": 0.9046161381168115, "grad_norm": 2.8870205879211426, "learning_rate": 1.9579166098701535e-05, "loss": 0.7114, "step": 5541 }, { "epoch": 0.904779396759316, "grad_norm": 2.421368360519409, "learning_rate": 1.9578982041546533e-05, "loss": 0.6446, "step": 5542 }, { "epoch": 0.9049426554018203, "grad_norm": 2.803398370742798, "learning_rate": 1.9578797945015958e-05, "loss": 0.9202, "step": 5543 }, { "epoch": 0.9051059140443247, "grad_norm": 2.6065266132354736, "learning_rate": 1.9578613809110555e-05, "loss": 0.7735, "step": 5544 }, { "epoch": 0.9052691726868292, "grad_norm": 2.3749167919158936, "learning_rate": 1.9578429633831093e-05, "loss": 0.7114, "step": 5545 }, { "epoch": 0.9054324313293335, "grad_norm": 2.518254518508911, "learning_rate": 1.957824541917832e-05, "loss": 0.8414, "step": 5546 }, { "epoch": 0.9055956899718379, "grad_norm": 2.761619806289673, "learning_rate": 1.9578061165152998e-05, "loss": 0.7375, "step": 5547 }, { "epoch": 0.9057589486143423, "grad_norm": 2.808704376220703, "learning_rate": 1.9577876871755883e-05, "loss": 0.7965, "step": 5548 }, { "epoch": 0.9059222072568467, "grad_norm": 2.6775050163269043, "learning_rate": 1.9577692538987734e-05, "loss": 0.79, "step": 5549 }, { "epoch": 0.906085465899351, "grad_norm": 2.648430824279785, "learning_rate": 1.9577508166849308e-05, "loss": 0.7414, "step": 5550 }, { "epoch": 0.9062487245418555, "grad_norm": 2.651960611343384, "learning_rate": 1.957732375534136e-05, "loss": 0.6793, "step": 5551 }, { "epoch": 0.9064119831843598, "grad_norm": 2.41679048538208, "learning_rate": 1.957713930446465e-05, "loss": 0.6486, "step": 5552 }, { "epoch": 0.9065752418268642, "grad_norm": 3.4659574031829834, "learning_rate": 1.9576954814219938e-05, "loss": 1.026, "step": 5553 }, { "epoch": 0.9067385004693685, "grad_norm": 2.8189027309417725, "learning_rate": 1.957677028460798e-05, "loss": 0.9494, "step": 5554 }, { "epoch": 0.906901759111873, "grad_norm": 3.022552013397217, "learning_rate": 1.957658571562954e-05, "loss": 0.821, "step": 5555 }, { "epoch": 0.9070650177543774, "grad_norm": 2.740189790725708, "learning_rate": 1.9576401107285363e-05, "loss": 0.657, "step": 5556 }, { "epoch": 0.9072282763968817, "grad_norm": 3.092046022415161, "learning_rate": 1.9576216459576222e-05, "loss": 0.8117, "step": 5557 }, { "epoch": 0.9073915350393862, "grad_norm": 3.0647685527801514, "learning_rate": 1.9576031772502872e-05, "loss": 0.9224, "step": 5558 }, { "epoch": 0.9075547936818905, "grad_norm": 3.507911205291748, "learning_rate": 1.957584704606607e-05, "loss": 1.1508, "step": 5559 }, { "epoch": 0.907718052324395, "grad_norm": 2.9504222869873047, "learning_rate": 1.957566228026658e-05, "loss": 0.866, "step": 5560 }, { "epoch": 0.9078813109668993, "grad_norm": 2.783106803894043, "learning_rate": 1.9575477475105154e-05, "loss": 0.6878, "step": 5561 }, { "epoch": 0.9080445696094037, "grad_norm": 2.888779878616333, "learning_rate": 1.9575292630582558e-05, "loss": 0.7972, "step": 5562 }, { "epoch": 0.908207828251908, "grad_norm": 3.142540693283081, "learning_rate": 1.9575107746699548e-05, "loss": 1.1019, "step": 5563 }, { "epoch": 0.9083710868944125, "grad_norm": 2.8446969985961914, "learning_rate": 1.9574922823456884e-05, "loss": 0.8159, "step": 5564 }, { "epoch": 0.9085343455369168, "grad_norm": 3.262817144393921, "learning_rate": 1.957473786085533e-05, "loss": 0.8582, "step": 5565 }, { "epoch": 0.9086976041794212, "grad_norm": 2.9262642860412598, "learning_rate": 1.9574552858895646e-05, "loss": 0.8034, "step": 5566 }, { "epoch": 0.9088608628219257, "grad_norm": 3.7486507892608643, "learning_rate": 1.957436781757859e-05, "loss": 0.9682, "step": 5567 }, { "epoch": 0.90902412146443, "grad_norm": 3.17718243598938, "learning_rate": 1.957418273690492e-05, "loss": 0.9624, "step": 5568 }, { "epoch": 0.9091873801069345, "grad_norm": 2.5748531818389893, "learning_rate": 1.95739976168754e-05, "loss": 0.7086, "step": 5569 }, { "epoch": 0.9093506387494388, "grad_norm": 2.9902596473693848, "learning_rate": 1.9573812457490794e-05, "loss": 0.8032, "step": 5570 }, { "epoch": 0.9095138973919432, "grad_norm": 2.887502908706665, "learning_rate": 1.9573627258751857e-05, "loss": 0.9422, "step": 5571 }, { "epoch": 0.9096771560344475, "grad_norm": 2.750377893447876, "learning_rate": 1.9573442020659353e-05, "loss": 0.851, "step": 5572 }, { "epoch": 0.909840414676952, "grad_norm": 2.686127185821533, "learning_rate": 1.9573256743214048e-05, "loss": 0.7346, "step": 5573 }, { "epoch": 0.9100036733194563, "grad_norm": 2.3153138160705566, "learning_rate": 1.9573071426416695e-05, "loss": 0.6732, "step": 5574 }, { "epoch": 0.9101669319619607, "grad_norm": 2.641282320022583, "learning_rate": 1.957288607026806e-05, "loss": 0.6931, "step": 5575 }, { "epoch": 0.9103301906044651, "grad_norm": 2.5710160732269287, "learning_rate": 1.957270067476891e-05, "loss": 0.7704, "step": 5576 }, { "epoch": 0.9104934492469695, "grad_norm": 2.2676520347595215, "learning_rate": 1.9572515239919996e-05, "loss": 0.6397, "step": 5577 }, { "epoch": 0.910656707889474, "grad_norm": 2.3467745780944824, "learning_rate": 1.957232976572209e-05, "loss": 0.6816, "step": 5578 }, { "epoch": 0.9108199665319783, "grad_norm": 3.0248868465423584, "learning_rate": 1.957214425217595e-05, "loss": 0.9277, "step": 5579 }, { "epoch": 0.9109832251744827, "grad_norm": 2.599008321762085, "learning_rate": 1.957195869928234e-05, "loss": 0.7237, "step": 5580 }, { "epoch": 0.911146483816987, "grad_norm": 2.9950177669525146, "learning_rate": 1.957177310704202e-05, "loss": 0.7509, "step": 5581 }, { "epoch": 0.9113097424594915, "grad_norm": 2.6955301761627197, "learning_rate": 1.9571587475455757e-05, "loss": 0.7726, "step": 5582 }, { "epoch": 0.9114730011019958, "grad_norm": 2.7650458812713623, "learning_rate": 1.9571401804524314e-05, "loss": 0.7533, "step": 5583 }, { "epoch": 0.9116362597445002, "grad_norm": 2.9259164333343506, "learning_rate": 1.957121609424845e-05, "loss": 0.7408, "step": 5584 }, { "epoch": 0.9117995183870046, "grad_norm": 2.822924852371216, "learning_rate": 1.957103034462893e-05, "loss": 0.8606, "step": 5585 }, { "epoch": 0.911962777029509, "grad_norm": 2.5563998222351074, "learning_rate": 1.9570844555666518e-05, "loss": 0.6975, "step": 5586 }, { "epoch": 0.9121260356720133, "grad_norm": 3.4093170166015625, "learning_rate": 1.9570658727361978e-05, "loss": 0.912, "step": 5587 }, { "epoch": 0.9122892943145178, "grad_norm": 2.777082681655884, "learning_rate": 1.957047285971607e-05, "loss": 0.7479, "step": 5588 }, { "epoch": 0.9124525529570222, "grad_norm": 2.8854591846466064, "learning_rate": 1.957028695272957e-05, "loss": 0.7731, "step": 5589 }, { "epoch": 0.9126158115995265, "grad_norm": 2.865962266921997, "learning_rate": 1.9570101006403227e-05, "loss": 0.7996, "step": 5590 }, { "epoch": 0.912779070242031, "grad_norm": 3.080955982208252, "learning_rate": 1.9569915020737818e-05, "loss": 0.782, "step": 5591 }, { "epoch": 0.9129423288845353, "grad_norm": 2.779315948486328, "learning_rate": 1.95697289957341e-05, "loss": 0.6891, "step": 5592 }, { "epoch": 0.9131055875270397, "grad_norm": 3.321413516998291, "learning_rate": 1.956954293139284e-05, "loss": 0.8545, "step": 5593 }, { "epoch": 0.9132688461695441, "grad_norm": 2.811283588409424, "learning_rate": 1.95693568277148e-05, "loss": 0.8602, "step": 5594 }, { "epoch": 0.9134321048120485, "grad_norm": 3.0590028762817383, "learning_rate": 1.956917068470075e-05, "loss": 0.8465, "step": 5595 }, { "epoch": 0.9135953634545528, "grad_norm": 2.323430299758911, "learning_rate": 1.956898450235145e-05, "loss": 0.7436, "step": 5596 }, { "epoch": 0.9137586220970573, "grad_norm": 2.68053936958313, "learning_rate": 1.956879828066767e-05, "loss": 0.724, "step": 5597 }, { "epoch": 0.9139218807395616, "grad_norm": 2.851963520050049, "learning_rate": 1.9568612019650172e-05, "loss": 0.9567, "step": 5598 }, { "epoch": 0.914085139382066, "grad_norm": 3.038557767868042, "learning_rate": 1.9568425719299724e-05, "loss": 0.7062, "step": 5599 }, { "epoch": 0.9142483980245705, "grad_norm": 2.900242567062378, "learning_rate": 1.956823937961709e-05, "loss": 0.7535, "step": 5600 }, { "epoch": 0.9144116566670748, "grad_norm": 2.287036657333374, "learning_rate": 1.9568053000603035e-05, "loss": 0.6265, "step": 5601 }, { "epoch": 0.9145749153095792, "grad_norm": 3.3307979106903076, "learning_rate": 1.9567866582258332e-05, "loss": 1.0209, "step": 5602 }, { "epoch": 0.9147381739520836, "grad_norm": 3.0032358169555664, "learning_rate": 1.9567680124583738e-05, "loss": 0.8537, "step": 5603 }, { "epoch": 0.914901432594588, "grad_norm": 2.7140116691589355, "learning_rate": 1.956749362758003e-05, "loss": 0.8479, "step": 5604 }, { "epoch": 0.9150646912370923, "grad_norm": 3.0236997604370117, "learning_rate": 1.956730709124796e-05, "loss": 0.8421, "step": 5605 }, { "epoch": 0.9152279498795968, "grad_norm": 3.169405221939087, "learning_rate": 1.9567120515588307e-05, "loss": 1.1707, "step": 5606 }, { "epoch": 0.9153912085221011, "grad_norm": 2.8420374393463135, "learning_rate": 1.9566933900601836e-05, "loss": 0.7591, "step": 5607 }, { "epoch": 0.9155544671646055, "grad_norm": 2.451683521270752, "learning_rate": 1.956674724628931e-05, "loss": 0.7314, "step": 5608 }, { "epoch": 0.9157177258071099, "grad_norm": 2.9704504013061523, "learning_rate": 1.9566560552651495e-05, "loss": 0.8385, "step": 5609 }, { "epoch": 0.9158809844496143, "grad_norm": 2.2262561321258545, "learning_rate": 1.9566373819689164e-05, "loss": 0.5449, "step": 5610 }, { "epoch": 0.9160442430921187, "grad_norm": 2.8477959632873535, "learning_rate": 1.9566187047403084e-05, "loss": 0.8245, "step": 5611 }, { "epoch": 0.9162075017346231, "grad_norm": 2.7677559852600098, "learning_rate": 1.9566000235794022e-05, "loss": 0.8362, "step": 5612 }, { "epoch": 0.9163707603771275, "grad_norm": 2.8405139446258545, "learning_rate": 1.9565813384862746e-05, "loss": 0.7507, "step": 5613 }, { "epoch": 0.9165340190196318, "grad_norm": 2.811365842819214, "learning_rate": 1.9565626494610018e-05, "loss": 0.6752, "step": 5614 }, { "epoch": 0.9166972776621363, "grad_norm": 2.896758556365967, "learning_rate": 1.9565439565036615e-05, "loss": 0.746, "step": 5615 }, { "epoch": 0.9168605363046406, "grad_norm": 3.066608428955078, "learning_rate": 1.9565252596143302e-05, "loss": 0.8212, "step": 5616 }, { "epoch": 0.917023794947145, "grad_norm": 2.736166477203369, "learning_rate": 1.9565065587930845e-05, "loss": 0.7597, "step": 5617 }, { "epoch": 0.9171870535896494, "grad_norm": 2.6040663719177246, "learning_rate": 1.9564878540400017e-05, "loss": 0.8447, "step": 5618 }, { "epoch": 0.9173503122321538, "grad_norm": 3.2323358058929443, "learning_rate": 1.9564691453551586e-05, "loss": 0.7753, "step": 5619 }, { "epoch": 0.9175135708746581, "grad_norm": 2.782388687133789, "learning_rate": 1.9564504327386318e-05, "loss": 0.6505, "step": 5620 }, { "epoch": 0.9176768295171626, "grad_norm": 3.4720916748046875, "learning_rate": 1.9564317161904983e-05, "loss": 1.0214, "step": 5621 }, { "epoch": 0.917840088159667, "grad_norm": 2.865452289581299, "learning_rate": 1.9564129957108356e-05, "loss": 0.8645, "step": 5622 }, { "epoch": 0.9180033468021713, "grad_norm": 2.438814401626587, "learning_rate": 1.9563942712997196e-05, "loss": 0.7808, "step": 5623 }, { "epoch": 0.9181666054446758, "grad_norm": 3.129944324493408, "learning_rate": 1.9563755429572286e-05, "loss": 0.9433, "step": 5624 }, { "epoch": 0.9183298640871801, "grad_norm": 3.0588624477386475, "learning_rate": 1.9563568106834385e-05, "loss": 0.8476, "step": 5625 }, { "epoch": 0.9184931227296845, "grad_norm": 2.8604555130004883, "learning_rate": 1.9563380744784265e-05, "loss": 0.8336, "step": 5626 }, { "epoch": 0.9186563813721889, "grad_norm": 2.7200558185577393, "learning_rate": 1.95631933434227e-05, "loss": 0.8143, "step": 5627 }, { "epoch": 0.9188196400146933, "grad_norm": 2.4511146545410156, "learning_rate": 1.9563005902750455e-05, "loss": 0.6223, "step": 5628 }, { "epoch": 0.9189828986571976, "grad_norm": 2.26181697845459, "learning_rate": 1.9562818422768307e-05, "loss": 0.6173, "step": 5629 }, { "epoch": 0.9191461572997021, "grad_norm": 2.96232533454895, "learning_rate": 1.956263090347702e-05, "loss": 0.7367, "step": 5630 }, { "epoch": 0.9193094159422064, "grad_norm": 2.785001754760742, "learning_rate": 1.9562443344877374e-05, "loss": 0.835, "step": 5631 }, { "epoch": 0.9194726745847108, "grad_norm": 2.8117294311523438, "learning_rate": 1.956225574697013e-05, "loss": 0.7465, "step": 5632 }, { "epoch": 0.9196359332272153, "grad_norm": 2.8134992122650146, "learning_rate": 1.9562068109756066e-05, "loss": 0.9332, "step": 5633 }, { "epoch": 0.9197991918697196, "grad_norm": 2.757462739944458, "learning_rate": 1.9561880433235948e-05, "loss": 0.8006, "step": 5634 }, { "epoch": 0.919962450512224, "grad_norm": 2.7139947414398193, "learning_rate": 1.956169271741055e-05, "loss": 0.7713, "step": 5635 }, { "epoch": 0.9201257091547284, "grad_norm": 2.8149940967559814, "learning_rate": 1.9561504962280643e-05, "loss": 0.7353, "step": 5636 }, { "epoch": 0.9202889677972328, "grad_norm": 2.762359142303467, "learning_rate": 1.9561317167847004e-05, "loss": 0.7331, "step": 5637 }, { "epoch": 0.9204522264397371, "grad_norm": 2.683053731918335, "learning_rate": 1.9561129334110395e-05, "loss": 0.6818, "step": 5638 }, { "epoch": 0.9206154850822416, "grad_norm": 2.961857795715332, "learning_rate": 1.9560941461071597e-05, "loss": 0.8897, "step": 5639 }, { "epoch": 0.9207787437247459, "grad_norm": 2.4706153869628906, "learning_rate": 1.9560753548731374e-05, "loss": 0.5779, "step": 5640 }, { "epoch": 0.9209420023672503, "grad_norm": 2.6190261840820312, "learning_rate": 1.956056559709051e-05, "loss": 0.6954, "step": 5641 }, { "epoch": 0.9211052610097547, "grad_norm": 2.848379373550415, "learning_rate": 1.9560377606149768e-05, "loss": 0.7741, "step": 5642 }, { "epoch": 0.9212685196522591, "grad_norm": 2.951927423477173, "learning_rate": 1.956018957590992e-05, "loss": 0.9659, "step": 5643 }, { "epoch": 0.9214317782947635, "grad_norm": 2.753061056137085, "learning_rate": 1.9560001506371748e-05, "loss": 0.7446, "step": 5644 }, { "epoch": 0.9215950369372679, "grad_norm": 2.917720317840576, "learning_rate": 1.955981339753602e-05, "loss": 0.7164, "step": 5645 }, { "epoch": 0.9217582955797723, "grad_norm": 3.0082671642303467, "learning_rate": 1.9559625249403505e-05, "loss": 0.9619, "step": 5646 }, { "epoch": 0.9219215542222766, "grad_norm": 2.4795825481414795, "learning_rate": 1.955943706197498e-05, "loss": 0.6567, "step": 5647 }, { "epoch": 0.9220848128647811, "grad_norm": 2.626917839050293, "learning_rate": 1.9559248835251222e-05, "loss": 0.7658, "step": 5648 }, { "epoch": 0.9222480715072854, "grad_norm": 3.5287554264068604, "learning_rate": 1.9559060569232996e-05, "loss": 1.0793, "step": 5649 }, { "epoch": 0.9224113301497898, "grad_norm": 3.0580530166625977, "learning_rate": 1.9558872263921087e-05, "loss": 0.7965, "step": 5650 }, { "epoch": 0.9225745887922941, "grad_norm": 2.8056752681732178, "learning_rate": 1.9558683919316262e-05, "loss": 0.6763, "step": 5651 }, { "epoch": 0.9227378474347986, "grad_norm": 2.832345724105835, "learning_rate": 1.9558495535419296e-05, "loss": 0.854, "step": 5652 }, { "epoch": 0.9229011060773029, "grad_norm": 2.7360312938690186, "learning_rate": 1.9558307112230966e-05, "loss": 0.621, "step": 5653 }, { "epoch": 0.9230643647198074, "grad_norm": 2.609152317047119, "learning_rate": 1.9558118649752043e-05, "loss": 0.7212, "step": 5654 }, { "epoch": 0.9232276233623118, "grad_norm": 2.737295627593994, "learning_rate": 1.9557930147983303e-05, "loss": 0.6951, "step": 5655 }, { "epoch": 0.9233908820048161, "grad_norm": 2.8434247970581055, "learning_rate": 1.9557741606925524e-05, "loss": 0.8844, "step": 5656 }, { "epoch": 0.9235541406473206, "grad_norm": 3.1209607124328613, "learning_rate": 1.9557553026579472e-05, "loss": 0.8735, "step": 5657 }, { "epoch": 0.9237173992898249, "grad_norm": 2.7033181190490723, "learning_rate": 1.9557364406945933e-05, "loss": 0.6772, "step": 5658 }, { "epoch": 0.9238806579323293, "grad_norm": 2.708242416381836, "learning_rate": 1.955717574802568e-05, "loss": 0.765, "step": 5659 }, { "epoch": 0.9240439165748336, "grad_norm": 3.236107587814331, "learning_rate": 1.955698704981948e-05, "loss": 0.8673, "step": 5660 }, { "epoch": 0.9242071752173381, "grad_norm": 3.832427501678467, "learning_rate": 1.955679831232812e-05, "loss": 0.9533, "step": 5661 }, { "epoch": 0.9243704338598424, "grad_norm": 2.9751198291778564, "learning_rate": 1.9556609535552366e-05, "loss": 0.9064, "step": 5662 }, { "epoch": 0.9245336925023468, "grad_norm": 2.9805679321289062, "learning_rate": 1.9556420719493004e-05, "loss": 0.8745, "step": 5663 }, { "epoch": 0.9246969511448512, "grad_norm": 2.5602762699127197, "learning_rate": 1.95562318641508e-05, "loss": 0.6892, "step": 5664 }, { "epoch": 0.9248602097873556, "grad_norm": 2.8991000652313232, "learning_rate": 1.9556042969526538e-05, "loss": 0.8456, "step": 5665 }, { "epoch": 0.92502346842986, "grad_norm": 2.759749174118042, "learning_rate": 1.9555854035620993e-05, "loss": 0.6474, "step": 5666 }, { "epoch": 0.9251867270723644, "grad_norm": 3.239359140396118, "learning_rate": 1.9555665062434935e-05, "loss": 1.8586, "step": 5667 }, { "epoch": 0.9253499857148688, "grad_norm": 2.956148386001587, "learning_rate": 1.955547604996915e-05, "loss": 0.9646, "step": 5668 }, { "epoch": 0.9255132443573731, "grad_norm": 2.8337087631225586, "learning_rate": 1.9555286998224412e-05, "loss": 0.8707, "step": 5669 }, { "epoch": 0.9256765029998776, "grad_norm": 2.7832579612731934, "learning_rate": 1.9555097907201496e-05, "loss": 0.7923, "step": 5670 }, { "epoch": 0.9258397616423819, "grad_norm": 2.4796743392944336, "learning_rate": 1.9554908776901184e-05, "loss": 0.8055, "step": 5671 }, { "epoch": 0.9260030202848863, "grad_norm": 2.854797124862671, "learning_rate": 1.9554719607324242e-05, "loss": 0.9529, "step": 5672 }, { "epoch": 0.9261662789273907, "grad_norm": 2.9757397174835205, "learning_rate": 1.955453039847146e-05, "loss": 0.9992, "step": 5673 }, { "epoch": 0.9263295375698951, "grad_norm": 2.71282958984375, "learning_rate": 1.955434115034361e-05, "loss": 0.8863, "step": 5674 }, { "epoch": 0.9264927962123994, "grad_norm": 2.5766711235046387, "learning_rate": 1.9554151862941472e-05, "loss": 0.8608, "step": 5675 }, { "epoch": 0.9266560548549039, "grad_norm": 3.0703091621398926, "learning_rate": 1.9553962536265824e-05, "loss": 0.7999, "step": 5676 }, { "epoch": 0.9268193134974083, "grad_norm": 2.718123197555542, "learning_rate": 1.9553773170317447e-05, "loss": 0.8252, "step": 5677 }, { "epoch": 0.9269825721399126, "grad_norm": 2.6843671798706055, "learning_rate": 1.955358376509711e-05, "loss": 0.7623, "step": 5678 }, { "epoch": 0.9271458307824171, "grad_norm": 2.8116257190704346, "learning_rate": 1.9553394320605598e-05, "loss": 0.7849, "step": 5679 }, { "epoch": 0.9273090894249214, "grad_norm": 2.784721612930298, "learning_rate": 1.9553204836843692e-05, "loss": 0.6138, "step": 5680 }, { "epoch": 0.9274723480674258, "grad_norm": 2.7404606342315674, "learning_rate": 1.9553015313812166e-05, "loss": 0.8343, "step": 5681 }, { "epoch": 0.9276356067099302, "grad_norm": 2.9711763858795166, "learning_rate": 1.95528257515118e-05, "loss": 0.922, "step": 5682 }, { "epoch": 0.9277988653524346, "grad_norm": 2.931119203567505, "learning_rate": 1.9552636149943377e-05, "loss": 0.7766, "step": 5683 }, { "epoch": 0.9279621239949389, "grad_norm": 2.581129789352417, "learning_rate": 1.9552446509107674e-05, "loss": 0.7322, "step": 5684 }, { "epoch": 0.9281253826374434, "grad_norm": 2.877168893814087, "learning_rate": 1.955225682900547e-05, "loss": 0.8444, "step": 5685 }, { "epoch": 0.9282886412799478, "grad_norm": 2.504040002822876, "learning_rate": 1.9552067109637544e-05, "loss": 0.675, "step": 5686 }, { "epoch": 0.9284518999224521, "grad_norm": 2.7773807048797607, "learning_rate": 1.955187735100468e-05, "loss": 0.9239, "step": 5687 }, { "epoch": 0.9286151585649566, "grad_norm": 3.0484023094177246, "learning_rate": 1.955168755310765e-05, "loss": 0.9004, "step": 5688 }, { "epoch": 0.9287784172074609, "grad_norm": 2.5157830715179443, "learning_rate": 1.9551497715947242e-05, "loss": 0.7188, "step": 5689 }, { "epoch": 0.9289416758499653, "grad_norm": 2.744951009750366, "learning_rate": 1.9551307839524233e-05, "loss": 0.6591, "step": 5690 }, { "epoch": 0.9291049344924697, "grad_norm": 2.546724557876587, "learning_rate": 1.9551117923839404e-05, "loss": 0.7263, "step": 5691 }, { "epoch": 0.9292681931349741, "grad_norm": 3.1068379878997803, "learning_rate": 1.9550927968893534e-05, "loss": 0.8402, "step": 5692 }, { "epoch": 0.9294314517774784, "grad_norm": 2.909552812576294, "learning_rate": 1.955073797468741e-05, "loss": 0.8335, "step": 5693 }, { "epoch": 0.9295947104199829, "grad_norm": 2.707043170928955, "learning_rate": 1.9550547941221805e-05, "loss": 0.8364, "step": 5694 }, { "epoch": 0.9297579690624872, "grad_norm": 3.038806200027466, "learning_rate": 1.9550357868497505e-05, "loss": 0.8348, "step": 5695 }, { "epoch": 0.9299212277049916, "grad_norm": 2.7457380294799805, "learning_rate": 1.9550167756515287e-05, "loss": 0.8312, "step": 5696 }, { "epoch": 0.9300844863474961, "grad_norm": 2.591547966003418, "learning_rate": 1.954997760527594e-05, "loss": 0.6631, "step": 5697 }, { "epoch": 0.9302477449900004, "grad_norm": 2.9373281002044678, "learning_rate": 1.954978741478024e-05, "loss": 0.7565, "step": 5698 }, { "epoch": 0.9304110036325048, "grad_norm": 2.87072491645813, "learning_rate": 1.9549597185028965e-05, "loss": 0.6341, "step": 5699 }, { "epoch": 0.9305742622750092, "grad_norm": 2.8435628414154053, "learning_rate": 1.9549406916022906e-05, "loss": 0.8688, "step": 5700 }, { "epoch": 0.9307375209175136, "grad_norm": 2.9112706184387207, "learning_rate": 1.9549216607762843e-05, "loss": 0.8389, "step": 5701 }, { "epoch": 0.9309007795600179, "grad_norm": 2.880882978439331, "learning_rate": 1.954902626024955e-05, "loss": 0.8021, "step": 5702 }, { "epoch": 0.9310640382025224, "grad_norm": 2.8344132900238037, "learning_rate": 1.9548835873483822e-05, "loss": 0.7767, "step": 5703 }, { "epoch": 0.9312272968450267, "grad_norm": 2.474585771560669, "learning_rate": 1.9548645447466433e-05, "loss": 0.6943, "step": 5704 }, { "epoch": 0.9313905554875311, "grad_norm": 3.0479233264923096, "learning_rate": 1.9548454982198167e-05, "loss": 0.9388, "step": 5705 }, { "epoch": 0.9315538141300355, "grad_norm": 3.1902267932891846, "learning_rate": 1.9548264477679808e-05, "loss": 0.7192, "step": 5706 }, { "epoch": 0.9317170727725399, "grad_norm": 3.1087005138397217, "learning_rate": 1.954807393391214e-05, "loss": 0.8834, "step": 5707 }, { "epoch": 0.9318803314150443, "grad_norm": 2.734511137008667, "learning_rate": 1.9547883350895944e-05, "loss": 0.7847, "step": 5708 }, { "epoch": 0.9320435900575487, "grad_norm": 2.817858934402466, "learning_rate": 1.9547692728632006e-05, "loss": 0.727, "step": 5709 }, { "epoch": 0.9322068487000531, "grad_norm": 2.5328516960144043, "learning_rate": 1.9547502067121108e-05, "loss": 0.6611, "step": 5710 }, { "epoch": 0.9323701073425574, "grad_norm": 2.780348062515259, "learning_rate": 1.9547311366364035e-05, "loss": 0.7973, "step": 5711 }, { "epoch": 0.9325333659850619, "grad_norm": 3.471705198287964, "learning_rate": 1.9547120626361572e-05, "loss": 0.8567, "step": 5712 }, { "epoch": 0.9326966246275662, "grad_norm": 2.8028404712677, "learning_rate": 1.9546929847114496e-05, "loss": 0.7247, "step": 5713 }, { "epoch": 0.9328598832700706, "grad_norm": 2.9211666584014893, "learning_rate": 1.95467390286236e-05, "loss": 0.7822, "step": 5714 }, { "epoch": 0.933023141912575, "grad_norm": 2.5802817344665527, "learning_rate": 1.954654817088966e-05, "loss": 0.6674, "step": 5715 }, { "epoch": 0.9331864005550794, "grad_norm": 2.7980611324310303, "learning_rate": 1.954635727391347e-05, "loss": 0.8252, "step": 5716 }, { "epoch": 0.9333496591975837, "grad_norm": 2.871015787124634, "learning_rate": 1.954616633769581e-05, "loss": 0.6137, "step": 5717 }, { "epoch": 0.9335129178400882, "grad_norm": 3.311380386352539, "learning_rate": 1.9545975362237462e-05, "loss": 0.8881, "step": 5718 }, { "epoch": 0.9336761764825926, "grad_norm": 2.891993284225464, "learning_rate": 1.9545784347539215e-05, "loss": 0.7972, "step": 5719 }, { "epoch": 0.9338394351250969, "grad_norm": 3.036623954772949, "learning_rate": 1.9545593293601852e-05, "loss": 0.9153, "step": 5720 }, { "epoch": 0.9340026937676014, "grad_norm": 3.0458076000213623, "learning_rate": 1.954540220042616e-05, "loss": 0.848, "step": 5721 }, { "epoch": 0.9341659524101057, "grad_norm": 2.734255790710449, "learning_rate": 1.9545211068012924e-05, "loss": 0.8248, "step": 5722 }, { "epoch": 0.9343292110526101, "grad_norm": 2.8303635120391846, "learning_rate": 1.9545019896362932e-05, "loss": 0.7602, "step": 5723 }, { "epoch": 0.9344924696951145, "grad_norm": 2.448288679122925, "learning_rate": 1.9544828685476964e-05, "loss": 0.755, "step": 5724 }, { "epoch": 0.9346557283376189, "grad_norm": 2.6608095169067383, "learning_rate": 1.954463743535581e-05, "loss": 0.7936, "step": 5725 }, { "epoch": 0.9348189869801232, "grad_norm": 2.9005179405212402, "learning_rate": 1.9544446146000255e-05, "loss": 0.7457, "step": 5726 }, { "epoch": 0.9349822456226277, "grad_norm": 2.6783814430236816, "learning_rate": 1.9544254817411086e-05, "loss": 0.7652, "step": 5727 }, { "epoch": 0.935145504265132, "grad_norm": 2.273106813430786, "learning_rate": 1.954406344958909e-05, "loss": 0.6167, "step": 5728 }, { "epoch": 0.9353087629076364, "grad_norm": 2.747105836868286, "learning_rate": 1.9543872042535055e-05, "loss": 0.8001, "step": 5729 }, { "epoch": 0.9354720215501409, "grad_norm": 2.4703521728515625, "learning_rate": 1.9543680596249764e-05, "loss": 0.6448, "step": 5730 }, { "epoch": 0.9356352801926452, "grad_norm": 2.799198627471924, "learning_rate": 1.9543489110734003e-05, "loss": 0.8536, "step": 5731 }, { "epoch": 0.9357985388351496, "grad_norm": 2.6038990020751953, "learning_rate": 1.9543297585988562e-05, "loss": 0.7919, "step": 5732 }, { "epoch": 0.935961797477654, "grad_norm": 3.4281005859375, "learning_rate": 1.954310602201423e-05, "loss": 0.924, "step": 5733 }, { "epoch": 0.9361250561201584, "grad_norm": 2.7191162109375, "learning_rate": 1.9542914418811792e-05, "loss": 0.8722, "step": 5734 }, { "epoch": 0.9362883147626627, "grad_norm": 3.2231569290161133, "learning_rate": 1.9542722776382043e-05, "loss": 1.0544, "step": 5735 }, { "epoch": 0.9364515734051672, "grad_norm": 2.6994121074676514, "learning_rate": 1.9542531094725757e-05, "loss": 0.8515, "step": 5736 }, { "epoch": 0.9366148320476715, "grad_norm": 3.0011777877807617, "learning_rate": 1.954233937384373e-05, "loss": 0.865, "step": 5737 }, { "epoch": 0.9367780906901759, "grad_norm": 2.94162654876709, "learning_rate": 1.9542147613736745e-05, "loss": 0.7816, "step": 5738 }, { "epoch": 0.9369413493326803, "grad_norm": 3.109212636947632, "learning_rate": 1.9541955814405598e-05, "loss": 1.0112, "step": 5739 }, { "epoch": 0.9371046079751847, "grad_norm": 2.9026947021484375, "learning_rate": 1.9541763975851075e-05, "loss": 0.7795, "step": 5740 }, { "epoch": 0.9372678666176891, "grad_norm": 2.5528411865234375, "learning_rate": 1.954157209807396e-05, "loss": 0.6747, "step": 5741 }, { "epoch": 0.9374311252601935, "grad_norm": 2.542067766189575, "learning_rate": 1.954138018107505e-05, "loss": 0.7525, "step": 5742 }, { "epoch": 0.9375943839026979, "grad_norm": 2.2401537895202637, "learning_rate": 1.9541188224855124e-05, "loss": 0.6778, "step": 5743 }, { "epoch": 0.9377576425452022, "grad_norm": 2.893120288848877, "learning_rate": 1.9540996229414978e-05, "loss": 0.8026, "step": 5744 }, { "epoch": 0.9379209011877067, "grad_norm": 3.3598287105560303, "learning_rate": 1.9540804194755397e-05, "loss": 0.8982, "step": 5745 }, { "epoch": 0.938084159830211, "grad_norm": 2.630099058151245, "learning_rate": 1.9540612120877175e-05, "loss": 0.7447, "step": 5746 }, { "epoch": 0.9382474184727154, "grad_norm": 2.7554259300231934, "learning_rate": 1.9540420007781097e-05, "loss": 0.8689, "step": 5747 }, { "epoch": 0.9384106771152197, "grad_norm": 2.5386345386505127, "learning_rate": 1.9540227855467956e-05, "loss": 0.7816, "step": 5748 }, { "epoch": 0.9385739357577242, "grad_norm": 3.476591110229492, "learning_rate": 1.9540035663938537e-05, "loss": 0.81, "step": 5749 }, { "epoch": 0.9387371944002285, "grad_norm": 2.928373336791992, "learning_rate": 1.953984343319364e-05, "loss": 0.9259, "step": 5750 }, { "epoch": 0.938900453042733, "grad_norm": 2.546591281890869, "learning_rate": 1.9539651163234045e-05, "loss": 0.7325, "step": 5751 }, { "epoch": 0.9390637116852374, "grad_norm": 3.1142537593841553, "learning_rate": 1.953945885406055e-05, "loss": 0.9427, "step": 5752 }, { "epoch": 0.9392269703277417, "grad_norm": 2.3607912063598633, "learning_rate": 1.9539266505673938e-05, "loss": 0.5591, "step": 5753 }, { "epoch": 0.9393902289702462, "grad_norm": 3.4500789642333984, "learning_rate": 1.9539074118075003e-05, "loss": 0.966, "step": 5754 }, { "epoch": 0.9395534876127505, "grad_norm": 2.776552438735962, "learning_rate": 1.953888169126454e-05, "loss": 0.7847, "step": 5755 }, { "epoch": 0.9397167462552549, "grad_norm": 2.8165557384490967, "learning_rate": 1.953868922524333e-05, "loss": 0.7787, "step": 5756 }, { "epoch": 0.9398800048977592, "grad_norm": 3.159376859664917, "learning_rate": 1.9538496720012173e-05, "loss": 0.9049, "step": 5757 }, { "epoch": 0.9400432635402637, "grad_norm": 3.0515120029449463, "learning_rate": 1.953830417557186e-05, "loss": 0.8607, "step": 5758 }, { "epoch": 0.940206522182768, "grad_norm": 2.8047735691070557, "learning_rate": 1.953811159192318e-05, "loss": 0.7838, "step": 5759 }, { "epoch": 0.9403697808252724, "grad_norm": 2.8837878704071045, "learning_rate": 1.9537918969066923e-05, "loss": 0.9414, "step": 5760 }, { "epoch": 0.9405330394677768, "grad_norm": 2.8246068954467773, "learning_rate": 1.9537726307003885e-05, "loss": 0.7994, "step": 5761 }, { "epoch": 0.9406962981102812, "grad_norm": 2.633850574493408, "learning_rate": 1.953753360573485e-05, "loss": 0.7419, "step": 5762 }, { "epoch": 0.9408595567527857, "grad_norm": 2.896256923675537, "learning_rate": 1.9537340865260618e-05, "loss": 0.8859, "step": 5763 }, { "epoch": 0.94102281539529, "grad_norm": 2.988276720046997, "learning_rate": 1.953714808558198e-05, "loss": 1.049, "step": 5764 }, { "epoch": 0.9411860740377944, "grad_norm": 2.818098783493042, "learning_rate": 1.9536955266699728e-05, "loss": 0.9303, "step": 5765 }, { "epoch": 0.9413493326802987, "grad_norm": 2.7586240768432617, "learning_rate": 1.9536762408614655e-05, "loss": 0.78, "step": 5766 }, { "epoch": 0.9415125913228032, "grad_norm": 2.950310468673706, "learning_rate": 1.953656951132755e-05, "loss": 0.7533, "step": 5767 }, { "epoch": 0.9416758499653075, "grad_norm": 2.832411766052246, "learning_rate": 1.9536376574839214e-05, "loss": 0.7575, "step": 5768 }, { "epoch": 0.941839108607812, "grad_norm": 2.7688608169555664, "learning_rate": 1.953618359915043e-05, "loss": 0.7382, "step": 5769 }, { "epoch": 0.9420023672503163, "grad_norm": 2.508366823196411, "learning_rate": 1.9535990584261995e-05, "loss": 0.6781, "step": 5770 }, { "epoch": 0.9421656258928207, "grad_norm": 2.9687399864196777, "learning_rate": 1.9535797530174705e-05, "loss": 0.8204, "step": 5771 }, { "epoch": 0.942328884535325, "grad_norm": 2.6953415870666504, "learning_rate": 1.9535604436889353e-05, "loss": 0.6889, "step": 5772 }, { "epoch": 0.9424921431778295, "grad_norm": 3.0283401012420654, "learning_rate": 1.953541130440673e-05, "loss": 0.8366, "step": 5773 }, { "epoch": 0.9426554018203339, "grad_norm": 3.050121545791626, "learning_rate": 1.9535218132727635e-05, "loss": 0.8527, "step": 5774 }, { "epoch": 0.9428186604628382, "grad_norm": 2.686037540435791, "learning_rate": 1.9535024921852856e-05, "loss": 0.6533, "step": 5775 }, { "epoch": 0.9429819191053427, "grad_norm": 2.908951997756958, "learning_rate": 1.953483167178319e-05, "loss": 0.8407, "step": 5776 }, { "epoch": 0.943145177747847, "grad_norm": 2.6645913124084473, "learning_rate": 1.953463838251943e-05, "loss": 0.9746, "step": 5777 }, { "epoch": 0.9433084363903514, "grad_norm": 2.6145308017730713, "learning_rate": 1.9534445054062373e-05, "loss": 0.8641, "step": 5778 }, { "epoch": 0.9434716950328558, "grad_norm": 3.5889132022857666, "learning_rate": 1.9534251686412813e-05, "loss": 1.2311, "step": 5779 }, { "epoch": 0.9436349536753602, "grad_norm": 2.0230376720428467, "learning_rate": 1.9534058279571544e-05, "loss": 0.551, "step": 5780 }, { "epoch": 0.9437982123178645, "grad_norm": 2.6072444915771484, "learning_rate": 1.953386483353936e-05, "loss": 0.8021, "step": 5781 }, { "epoch": 0.943961470960369, "grad_norm": 2.7175536155700684, "learning_rate": 1.953367134831706e-05, "loss": 0.7755, "step": 5782 }, { "epoch": 0.9441247296028733, "grad_norm": 2.945958137512207, "learning_rate": 1.9533477823905437e-05, "loss": 0.9607, "step": 5783 }, { "epoch": 0.9442879882453777, "grad_norm": 2.9862284660339355, "learning_rate": 1.9533284260305288e-05, "loss": 0.8426, "step": 5784 }, { "epoch": 0.9444512468878822, "grad_norm": 2.6266376972198486, "learning_rate": 1.9533090657517403e-05, "loss": 0.7776, "step": 5785 }, { "epoch": 0.9446145055303865, "grad_norm": 2.82328462600708, "learning_rate": 1.9532897015542583e-05, "loss": 0.8641, "step": 5786 }, { "epoch": 0.9447777641728909, "grad_norm": 2.896713972091675, "learning_rate": 1.9532703334381623e-05, "loss": 0.9633, "step": 5787 }, { "epoch": 0.9449410228153953, "grad_norm": 2.799433708190918, "learning_rate": 1.9532509614035323e-05, "loss": 0.7573, "step": 5788 }, { "epoch": 0.9451042814578997, "grad_norm": 3.0168533325195312, "learning_rate": 1.9532315854504473e-05, "loss": 0.8349, "step": 5789 }, { "epoch": 0.945267540100404, "grad_norm": 2.5006282329559326, "learning_rate": 1.9532122055789865e-05, "loss": 0.6812, "step": 5790 }, { "epoch": 0.9454307987429085, "grad_norm": 2.749506711959839, "learning_rate": 1.9531928217892312e-05, "loss": 0.649, "step": 5791 }, { "epoch": 0.9455940573854128, "grad_norm": 2.9376955032348633, "learning_rate": 1.9531734340812594e-05, "loss": 0.8415, "step": 5792 }, { "epoch": 0.9457573160279172, "grad_norm": 3.119025230407715, "learning_rate": 1.953154042455152e-05, "loss": 0.9049, "step": 5793 }, { "epoch": 0.9459205746704216, "grad_norm": 3.1292285919189453, "learning_rate": 1.9531346469109885e-05, "loss": 0.8021, "step": 5794 }, { "epoch": 0.946083833312926, "grad_norm": 2.7937052249908447, "learning_rate": 1.9531152474488476e-05, "loss": 0.7833, "step": 5795 }, { "epoch": 0.9462470919554304, "grad_norm": 2.8726186752319336, "learning_rate": 1.9530958440688103e-05, "loss": 0.7683, "step": 5796 }, { "epoch": 0.9464103505979348, "grad_norm": 3.204672336578369, "learning_rate": 1.953076436770956e-05, "loss": 0.9037, "step": 5797 }, { "epoch": 0.9465736092404392, "grad_norm": 2.7853899002075195, "learning_rate": 1.9530570255553642e-05, "loss": 0.9023, "step": 5798 }, { "epoch": 0.9467368678829435, "grad_norm": 3.029601812362671, "learning_rate": 1.9530376104221146e-05, "loss": 1.7261, "step": 5799 }, { "epoch": 0.946900126525448, "grad_norm": 2.9915289878845215, "learning_rate": 1.9530181913712875e-05, "loss": 0.7637, "step": 5800 }, { "epoch": 0.9470633851679523, "grad_norm": 2.7046780586242676, "learning_rate": 1.9529987684029624e-05, "loss": 0.7652, "step": 5801 }, { "epoch": 0.9472266438104567, "grad_norm": 2.725620746612549, "learning_rate": 1.952979341517219e-05, "loss": 0.8477, "step": 5802 }, { "epoch": 0.9473899024529611, "grad_norm": 2.951577663421631, "learning_rate": 1.9529599107141374e-05, "loss": 0.7441, "step": 5803 }, { "epoch": 0.9475531610954655, "grad_norm": 2.77909779548645, "learning_rate": 1.9529404759937978e-05, "loss": 0.7349, "step": 5804 }, { "epoch": 0.9477164197379698, "grad_norm": 2.861891746520996, "learning_rate": 1.9529210373562794e-05, "loss": 0.7567, "step": 5805 }, { "epoch": 0.9478796783804743, "grad_norm": 2.7322299480438232, "learning_rate": 1.9529015948016625e-05, "loss": 0.7664, "step": 5806 }, { "epoch": 0.9480429370229787, "grad_norm": 2.9219818115234375, "learning_rate": 1.952882148330027e-05, "loss": 0.8926, "step": 5807 }, { "epoch": 0.948206195665483, "grad_norm": 3.6594409942626953, "learning_rate": 1.952862697941453e-05, "loss": 0.9529, "step": 5808 }, { "epoch": 0.9483694543079875, "grad_norm": 2.596379518508911, "learning_rate": 1.95284324363602e-05, "loss": 0.7376, "step": 5809 }, { "epoch": 0.9485327129504918, "grad_norm": 3.0290403366088867, "learning_rate": 1.952823785413808e-05, "loss": 0.8555, "step": 5810 }, { "epoch": 0.9486959715929962, "grad_norm": 2.7266087532043457, "learning_rate": 1.9528043232748978e-05, "loss": 0.7509, "step": 5811 }, { "epoch": 0.9488592302355006, "grad_norm": 2.752300262451172, "learning_rate": 1.952784857219368e-05, "loss": 0.7759, "step": 5812 }, { "epoch": 0.949022488878005, "grad_norm": 2.804672956466675, "learning_rate": 1.9527653872473e-05, "loss": 0.7897, "step": 5813 }, { "epoch": 0.9491857475205093, "grad_norm": 2.515216588973999, "learning_rate": 1.952745913358773e-05, "loss": 0.6787, "step": 5814 }, { "epoch": 0.9493490061630138, "grad_norm": 3.049837350845337, "learning_rate": 1.9527264355538675e-05, "loss": 0.8925, "step": 5815 }, { "epoch": 0.9495122648055181, "grad_norm": 2.1543898582458496, "learning_rate": 1.9527069538326633e-05, "loss": 0.6046, "step": 5816 }, { "epoch": 0.9496755234480225, "grad_norm": 2.280987501144409, "learning_rate": 1.9526874681952406e-05, "loss": 0.5981, "step": 5817 }, { "epoch": 0.949838782090527, "grad_norm": 2.6431541442871094, "learning_rate": 1.9526679786416793e-05, "loss": 0.7329, "step": 5818 }, { "epoch": 0.9500020407330313, "grad_norm": 3.0214836597442627, "learning_rate": 1.9526484851720597e-05, "loss": 0.8355, "step": 5819 }, { "epoch": 0.9501652993755357, "grad_norm": 2.527813196182251, "learning_rate": 1.952628987786462e-05, "loss": 0.6399, "step": 5820 }, { "epoch": 0.9503285580180401, "grad_norm": 2.6852316856384277, "learning_rate": 1.952609486484966e-05, "loss": 0.8658, "step": 5821 }, { "epoch": 0.9504918166605445, "grad_norm": 2.890267848968506, "learning_rate": 1.9525899812676525e-05, "loss": 0.8674, "step": 5822 }, { "epoch": 0.9506550753030488, "grad_norm": 2.6360950469970703, "learning_rate": 1.952570472134601e-05, "loss": 0.791, "step": 5823 }, { "epoch": 0.9508183339455533, "grad_norm": 2.9043519496917725, "learning_rate": 1.952550959085892e-05, "loss": 0.8467, "step": 5824 }, { "epoch": 0.9509815925880576, "grad_norm": 2.5567266941070557, "learning_rate": 1.9525314421216057e-05, "loss": 0.8358, "step": 5825 }, { "epoch": 0.951144851230562, "grad_norm": 2.760530471801758, "learning_rate": 1.9525119212418224e-05, "loss": 0.8405, "step": 5826 }, { "epoch": 0.9513081098730664, "grad_norm": 2.7648401260375977, "learning_rate": 1.952492396446622e-05, "loss": 0.7522, "step": 5827 }, { "epoch": 0.9514713685155708, "grad_norm": 2.750999927520752, "learning_rate": 1.952472867736085e-05, "loss": 0.8354, "step": 5828 }, { "epoch": 0.9516346271580752, "grad_norm": 3.107997179031372, "learning_rate": 1.952453335110292e-05, "loss": 0.7717, "step": 5829 }, { "epoch": 0.9517978858005796, "grad_norm": 3.5016205310821533, "learning_rate": 1.952433798569323e-05, "loss": 1.2213, "step": 5830 }, { "epoch": 0.951961144443084, "grad_norm": 2.884601593017578, "learning_rate": 1.952414258113258e-05, "loss": 0.806, "step": 5831 }, { "epoch": 0.9521244030855883, "grad_norm": 2.7481582164764404, "learning_rate": 1.9523947137421778e-05, "loss": 0.7487, "step": 5832 }, { "epoch": 0.9522876617280928, "grad_norm": 2.2686855792999268, "learning_rate": 1.9523751654561624e-05, "loss": 0.6112, "step": 5833 }, { "epoch": 0.9524509203705971, "grad_norm": 2.4875006675720215, "learning_rate": 1.9523556132552925e-05, "loss": 0.711, "step": 5834 }, { "epoch": 0.9526141790131015, "grad_norm": 2.6523044109344482, "learning_rate": 1.952336057139648e-05, "loss": 0.8338, "step": 5835 }, { "epoch": 0.9527774376556059, "grad_norm": 2.749328136444092, "learning_rate": 1.95231649710931e-05, "loss": 0.7708, "step": 5836 }, { "epoch": 0.9529406962981103, "grad_norm": 2.6227993965148926, "learning_rate": 1.9522969331643578e-05, "loss": 0.7306, "step": 5837 }, { "epoch": 0.9531039549406146, "grad_norm": 2.779400110244751, "learning_rate": 1.9522773653048733e-05, "loss": 0.8604, "step": 5838 }, { "epoch": 0.953267213583119, "grad_norm": 2.943279504776001, "learning_rate": 1.9522577935309358e-05, "loss": 0.7765, "step": 5839 }, { "epoch": 0.9534304722256235, "grad_norm": 2.567296028137207, "learning_rate": 1.952238217842626e-05, "loss": 0.733, "step": 5840 }, { "epoch": 0.9535937308681278, "grad_norm": 2.8739986419677734, "learning_rate": 1.9522186382400243e-05, "loss": 0.8387, "step": 5841 }, { "epoch": 0.9537569895106323, "grad_norm": 2.6374573707580566, "learning_rate": 1.9521990547232113e-05, "loss": 0.8612, "step": 5842 }, { "epoch": 0.9539202481531366, "grad_norm": 2.3270225524902344, "learning_rate": 1.952179467292268e-05, "loss": 0.6579, "step": 5843 }, { "epoch": 0.954083506795641, "grad_norm": 2.4444634914398193, "learning_rate": 1.952159875947274e-05, "loss": 0.6305, "step": 5844 }, { "epoch": 0.9542467654381454, "grad_norm": 3.1010191440582275, "learning_rate": 1.9521402806883106e-05, "loss": 0.9215, "step": 5845 }, { "epoch": 0.9544100240806498, "grad_norm": 2.6669132709503174, "learning_rate": 1.952120681515458e-05, "loss": 0.792, "step": 5846 }, { "epoch": 0.9545732827231541, "grad_norm": 2.380455255508423, "learning_rate": 1.9521010784287964e-05, "loss": 0.6962, "step": 5847 }, { "epoch": 0.9547365413656586, "grad_norm": 2.7839267253875732, "learning_rate": 1.952081471428407e-05, "loss": 0.6358, "step": 5848 }, { "epoch": 0.9548998000081629, "grad_norm": 2.57110333442688, "learning_rate": 1.9520618605143702e-05, "loss": 0.7104, "step": 5849 }, { "epoch": 0.9550630586506673, "grad_norm": 3.166125774383545, "learning_rate": 1.9520422456867668e-05, "loss": 1.1033, "step": 5850 }, { "epoch": 0.9552263172931718, "grad_norm": 2.4926812648773193, "learning_rate": 1.9520226269456767e-05, "loss": 0.7146, "step": 5851 }, { "epoch": 0.9553895759356761, "grad_norm": 2.77881121635437, "learning_rate": 1.9520030042911812e-05, "loss": 0.739, "step": 5852 }, { "epoch": 0.9555528345781805, "grad_norm": 2.8643457889556885, "learning_rate": 1.9519833777233613e-05, "loss": 0.9423, "step": 5853 }, { "epoch": 0.9557160932206848, "grad_norm": 2.9867470264434814, "learning_rate": 1.9519637472422967e-05, "loss": 0.8669, "step": 5854 }, { "epoch": 0.9558793518631893, "grad_norm": 2.9722745418548584, "learning_rate": 1.951944112848069e-05, "loss": 0.7525, "step": 5855 }, { "epoch": 0.9560426105056936, "grad_norm": 2.5046660900115967, "learning_rate": 1.9519244745407582e-05, "loss": 0.6679, "step": 5856 }, { "epoch": 0.956205869148198, "grad_norm": 3.0713233947753906, "learning_rate": 1.9519048323204454e-05, "loss": 0.8601, "step": 5857 }, { "epoch": 0.9563691277907024, "grad_norm": 2.450089693069458, "learning_rate": 1.951885186187211e-05, "loss": 0.7172, "step": 5858 }, { "epoch": 0.9565323864332068, "grad_norm": 2.886470079421997, "learning_rate": 1.9518655361411364e-05, "loss": 0.9098, "step": 5859 }, { "epoch": 0.9566956450757111, "grad_norm": 2.9311928749084473, "learning_rate": 1.9518458821823022e-05, "loss": 0.8428, "step": 5860 }, { "epoch": 0.9568589037182156, "grad_norm": 3.2897069454193115, "learning_rate": 1.9518262243107884e-05, "loss": 0.8903, "step": 5861 }, { "epoch": 0.95702216236072, "grad_norm": 2.4474315643310547, "learning_rate": 1.9518065625266765e-05, "loss": 0.6077, "step": 5862 }, { "epoch": 0.9571854210032243, "grad_norm": 2.5171523094177246, "learning_rate": 1.9517868968300476e-05, "loss": 0.5896, "step": 5863 }, { "epoch": 0.9573486796457288, "grad_norm": 2.8439579010009766, "learning_rate": 1.951767227220982e-05, "loss": 0.9576, "step": 5864 }, { "epoch": 0.9575119382882331, "grad_norm": 2.7801673412323, "learning_rate": 1.9517475536995604e-05, "loss": 0.7228, "step": 5865 }, { "epoch": 0.9576751969307375, "grad_norm": 2.6858367919921875, "learning_rate": 1.9517278762658644e-05, "loss": 0.7987, "step": 5866 }, { "epoch": 0.9578384555732419, "grad_norm": 2.777312755584717, "learning_rate": 1.9517081949199742e-05, "loss": 0.7254, "step": 5867 }, { "epoch": 0.9580017142157463, "grad_norm": 3.1325697898864746, "learning_rate": 1.9516885096619712e-05, "loss": 1.0562, "step": 5868 }, { "epoch": 0.9581649728582506, "grad_norm": 2.873220443725586, "learning_rate": 1.951668820491936e-05, "loss": 0.7267, "step": 5869 }, { "epoch": 0.9583282315007551, "grad_norm": 2.9008285999298096, "learning_rate": 1.9516491274099496e-05, "loss": 0.7294, "step": 5870 }, { "epoch": 0.9584914901432594, "grad_norm": 2.8408000469207764, "learning_rate": 1.9516294304160927e-05, "loss": 0.9072, "step": 5871 }, { "epoch": 0.9586547487857638, "grad_norm": 2.6049633026123047, "learning_rate": 1.951609729510447e-05, "loss": 0.6172, "step": 5872 }, { "epoch": 0.9588180074282683, "grad_norm": 3.8597304821014404, "learning_rate": 1.9515900246930925e-05, "loss": 1.0505, "step": 5873 }, { "epoch": 0.9589812660707726, "grad_norm": 2.4806175231933594, "learning_rate": 1.9515703159641114e-05, "loss": 0.6136, "step": 5874 }, { "epoch": 0.959144524713277, "grad_norm": 3.017202377319336, "learning_rate": 1.9515506033235834e-05, "loss": 0.8089, "step": 5875 }, { "epoch": 0.9593077833557814, "grad_norm": 2.9067673683166504, "learning_rate": 1.9515308867715904e-05, "loss": 0.826, "step": 5876 }, { "epoch": 0.9594710419982858, "grad_norm": 2.5350043773651123, "learning_rate": 1.9515111663082137e-05, "loss": 0.7648, "step": 5877 }, { "epoch": 0.9596343006407901, "grad_norm": 2.840157985687256, "learning_rate": 1.9514914419335333e-05, "loss": 0.8274, "step": 5878 }, { "epoch": 0.9597975592832946, "grad_norm": 2.8200418949127197, "learning_rate": 1.9514717136476308e-05, "loss": 0.7007, "step": 5879 }, { "epoch": 0.9599608179257989, "grad_norm": 2.8414480686187744, "learning_rate": 1.9514519814505875e-05, "loss": 0.8886, "step": 5880 }, { "epoch": 0.9601240765683033, "grad_norm": 2.735478162765503, "learning_rate": 1.9514322453424845e-05, "loss": 0.7112, "step": 5881 }, { "epoch": 0.9602873352108077, "grad_norm": 2.579983949661255, "learning_rate": 1.9514125053234027e-05, "loss": 0.7383, "step": 5882 }, { "epoch": 0.9604505938533121, "grad_norm": 2.9304039478302, "learning_rate": 1.9513927613934234e-05, "loss": 0.8597, "step": 5883 }, { "epoch": 0.9606138524958165, "grad_norm": 2.5904533863067627, "learning_rate": 1.9513730135526276e-05, "loss": 0.7573, "step": 5884 }, { "epoch": 0.9607771111383209, "grad_norm": 2.7045493125915527, "learning_rate": 1.9513532618010966e-05, "loss": 0.7749, "step": 5885 }, { "epoch": 0.9609403697808253, "grad_norm": 2.4100382328033447, "learning_rate": 1.9513335061389117e-05, "loss": 0.66, "step": 5886 }, { "epoch": 0.9611036284233296, "grad_norm": 2.5213470458984375, "learning_rate": 1.951313746566154e-05, "loss": 0.8286, "step": 5887 }, { "epoch": 0.9612668870658341, "grad_norm": 2.5660388469696045, "learning_rate": 1.9512939830829042e-05, "loss": 0.7251, "step": 5888 }, { "epoch": 0.9614301457083384, "grad_norm": 2.769382953643799, "learning_rate": 1.9512742156892445e-05, "loss": 0.7587, "step": 5889 }, { "epoch": 0.9615934043508428, "grad_norm": 2.4229862689971924, "learning_rate": 1.9512544443852556e-05, "loss": 0.7234, "step": 5890 }, { "epoch": 0.9617566629933472, "grad_norm": 2.8556010723114014, "learning_rate": 1.951234669171019e-05, "loss": 0.9878, "step": 5891 }, { "epoch": 0.9619199216358516, "grad_norm": 2.41485333442688, "learning_rate": 1.9512148900466158e-05, "loss": 0.7133, "step": 5892 }, { "epoch": 0.9620831802783559, "grad_norm": 2.584794282913208, "learning_rate": 1.9511951070121272e-05, "loss": 0.8634, "step": 5893 }, { "epoch": 0.9622464389208604, "grad_norm": 2.790391445159912, "learning_rate": 1.951175320067635e-05, "loss": 0.7192, "step": 5894 }, { "epoch": 0.9624096975633648, "grad_norm": 3.1761536598205566, "learning_rate": 1.9511555292132195e-05, "loss": 0.944, "step": 5895 }, { "epoch": 0.9625729562058691, "grad_norm": 2.60888934135437, "learning_rate": 1.9511357344489635e-05, "loss": 0.7017, "step": 5896 }, { "epoch": 0.9627362148483736, "grad_norm": 2.2528531551361084, "learning_rate": 1.9511159357749473e-05, "loss": 0.6687, "step": 5897 }, { "epoch": 0.9628994734908779, "grad_norm": 3.060858726501465, "learning_rate": 1.9510961331912527e-05, "loss": 0.898, "step": 5898 }, { "epoch": 0.9630627321333823, "grad_norm": 2.6433324813842773, "learning_rate": 1.951076326697961e-05, "loss": 0.7875, "step": 5899 }, { "epoch": 0.9632259907758867, "grad_norm": 3.1119816303253174, "learning_rate": 1.9510565162951538e-05, "loss": 0.8843, "step": 5900 }, { "epoch": 0.9633892494183911, "grad_norm": 2.899332284927368, "learning_rate": 1.951036701982912e-05, "loss": 0.8054, "step": 5901 }, { "epoch": 0.9635525080608954, "grad_norm": 3.1045172214508057, "learning_rate": 1.9510168837613176e-05, "loss": 0.8245, "step": 5902 }, { "epoch": 0.9637157667033999, "grad_norm": 2.711574077606201, "learning_rate": 1.9509970616304522e-05, "loss": 0.8296, "step": 5903 }, { "epoch": 0.9638790253459042, "grad_norm": 2.7155394554138184, "learning_rate": 1.9509772355903967e-05, "loss": 0.7305, "step": 5904 }, { "epoch": 0.9640422839884086, "grad_norm": 3.3541736602783203, "learning_rate": 1.950957405641233e-05, "loss": 0.9366, "step": 5905 }, { "epoch": 0.9642055426309131, "grad_norm": 2.669050455093384, "learning_rate": 1.9509375717830422e-05, "loss": 0.6682, "step": 5906 }, { "epoch": 0.9643688012734174, "grad_norm": 2.873943328857422, "learning_rate": 1.9509177340159063e-05, "loss": 0.6997, "step": 5907 }, { "epoch": 0.9645320599159218, "grad_norm": 2.6477839946746826, "learning_rate": 1.950897892339907e-05, "loss": 0.8242, "step": 5908 }, { "epoch": 0.9646953185584262, "grad_norm": 2.721341371536255, "learning_rate": 1.950878046755125e-05, "loss": 0.8465, "step": 5909 }, { "epoch": 0.9648585772009306, "grad_norm": 3.156907320022583, "learning_rate": 1.9508581972616423e-05, "loss": 0.9153, "step": 5910 }, { "epoch": 0.9650218358434349, "grad_norm": 2.8642969131469727, "learning_rate": 1.950838343859541e-05, "loss": 0.925, "step": 5911 }, { "epoch": 0.9651850944859394, "grad_norm": 2.956144094467163, "learning_rate": 1.9508184865489023e-05, "loss": 0.7982, "step": 5912 }, { "epoch": 0.9653483531284437, "grad_norm": 3.1532182693481445, "learning_rate": 1.9507986253298076e-05, "loss": 0.7885, "step": 5913 }, { "epoch": 0.9655116117709481, "grad_norm": 2.6243953704833984, "learning_rate": 1.9507787602023386e-05, "loss": 0.7016, "step": 5914 }, { "epoch": 0.9656748704134525, "grad_norm": 2.6870994567871094, "learning_rate": 1.9507588911665776e-05, "loss": 0.7883, "step": 5915 }, { "epoch": 0.9658381290559569, "grad_norm": 3.6439337730407715, "learning_rate": 1.9507390182226055e-05, "loss": 0.898, "step": 5916 }, { "epoch": 0.9660013876984613, "grad_norm": 2.892869472503662, "learning_rate": 1.950719141370504e-05, "loss": 0.8353, "step": 5917 }, { "epoch": 0.9661646463409657, "grad_norm": 2.7037224769592285, "learning_rate": 1.950699260610356e-05, "loss": 0.7046, "step": 5918 }, { "epoch": 0.9663279049834701, "grad_norm": 2.4859442710876465, "learning_rate": 1.9506793759422417e-05, "loss": 0.617, "step": 5919 }, { "epoch": 0.9664911636259744, "grad_norm": 2.5026333332061768, "learning_rate": 1.9506594873662434e-05, "loss": 0.7576, "step": 5920 }, { "epoch": 0.9666544222684789, "grad_norm": 3.278474807739258, "learning_rate": 1.9506395948824427e-05, "loss": 0.9304, "step": 5921 }, { "epoch": 0.9668176809109832, "grad_norm": 3.0247244834899902, "learning_rate": 1.9506196984909222e-05, "loss": 0.9332, "step": 5922 }, { "epoch": 0.9669809395534876, "grad_norm": 2.4110381603240967, "learning_rate": 1.9505997981917626e-05, "loss": 0.6568, "step": 5923 }, { "epoch": 0.967144198195992, "grad_norm": 2.8120980262756348, "learning_rate": 1.9505798939850465e-05, "loss": 0.8692, "step": 5924 }, { "epoch": 0.9673074568384964, "grad_norm": 2.5512683391571045, "learning_rate": 1.9505599858708553e-05, "loss": 0.6348, "step": 5925 }, { "epoch": 0.9674707154810007, "grad_norm": 2.5249271392822266, "learning_rate": 1.9505400738492707e-05, "loss": 0.719, "step": 5926 }, { "epoch": 0.9676339741235052, "grad_norm": 2.8768818378448486, "learning_rate": 1.950520157920375e-05, "loss": 0.9225, "step": 5927 }, { "epoch": 0.9677972327660096, "grad_norm": 2.514298439025879, "learning_rate": 1.9505002380842496e-05, "loss": 0.8014, "step": 5928 }, { "epoch": 0.9679604914085139, "grad_norm": 2.977496862411499, "learning_rate": 1.950480314340977e-05, "loss": 0.8498, "step": 5929 }, { "epoch": 0.9681237500510184, "grad_norm": 2.9042177200317383, "learning_rate": 1.9504603866906384e-05, "loss": 0.8325, "step": 5930 }, { "epoch": 0.9682870086935227, "grad_norm": 3.1091086864471436, "learning_rate": 1.9504404551333163e-05, "loss": 0.9194, "step": 5931 }, { "epoch": 0.9684502673360271, "grad_norm": 2.3380489349365234, "learning_rate": 1.9504205196690922e-05, "loss": 0.6955, "step": 5932 }, { "epoch": 0.9686135259785315, "grad_norm": 2.6545214653015137, "learning_rate": 1.950400580298048e-05, "loss": 0.7506, "step": 5933 }, { "epoch": 0.9687767846210359, "grad_norm": 2.5714800357818604, "learning_rate": 1.9503806370202666e-05, "loss": 0.7409, "step": 5934 }, { "epoch": 0.9689400432635402, "grad_norm": 2.800703525543213, "learning_rate": 1.9503606898358286e-05, "loss": 0.9349, "step": 5935 }, { "epoch": 0.9691033019060447, "grad_norm": 2.4534177780151367, "learning_rate": 1.9503407387448167e-05, "loss": 0.7176, "step": 5936 }, { "epoch": 0.9692665605485491, "grad_norm": 2.815142869949341, "learning_rate": 1.950320783747313e-05, "loss": 0.8196, "step": 5937 }, { "epoch": 0.9694298191910534, "grad_norm": 3.160766124725342, "learning_rate": 1.9503008248433996e-05, "loss": 0.8439, "step": 5938 }, { "epoch": 0.9695930778335579, "grad_norm": 2.534231185913086, "learning_rate": 1.950280862033158e-05, "loss": 0.6285, "step": 5939 }, { "epoch": 0.9697563364760622, "grad_norm": 2.4344992637634277, "learning_rate": 1.9502608953166708e-05, "loss": 0.6701, "step": 5940 }, { "epoch": 0.9699195951185666, "grad_norm": 2.7836060523986816, "learning_rate": 1.9502409246940197e-05, "loss": 0.8574, "step": 5941 }, { "epoch": 0.970082853761071, "grad_norm": 2.8940770626068115, "learning_rate": 1.950220950165287e-05, "loss": 0.7817, "step": 5942 }, { "epoch": 0.9702461124035754, "grad_norm": 3.01581072807312, "learning_rate": 1.950200971730555e-05, "loss": 0.9091, "step": 5943 }, { "epoch": 0.9704093710460797, "grad_norm": 2.8800716400146484, "learning_rate": 1.950180989389905e-05, "loss": 0.9518, "step": 5944 }, { "epoch": 0.9705726296885842, "grad_norm": 3.3590211868286133, "learning_rate": 1.9501610031434203e-05, "loss": 1.0899, "step": 5945 }, { "epoch": 0.9707358883310885, "grad_norm": 2.2410898208618164, "learning_rate": 1.9501410129911825e-05, "loss": 0.7074, "step": 5946 }, { "epoch": 0.9708991469735929, "grad_norm": 2.974198579788208, "learning_rate": 1.9501210189332734e-05, "loss": 0.8955, "step": 5947 }, { "epoch": 0.9710624056160974, "grad_norm": 3.0487186908721924, "learning_rate": 1.950101020969776e-05, "loss": 0.7181, "step": 5948 }, { "epoch": 0.9712256642586017, "grad_norm": 2.9983344078063965, "learning_rate": 1.9500810191007717e-05, "loss": 0.9527, "step": 5949 }, { "epoch": 0.9713889229011061, "grad_norm": 2.7688446044921875, "learning_rate": 1.9500610133263434e-05, "loss": 0.8855, "step": 5950 }, { "epoch": 0.9715521815436104, "grad_norm": 3.2186179161071777, "learning_rate": 1.9500410036465725e-05, "loss": 0.9476, "step": 5951 }, { "epoch": 0.9717154401861149, "grad_norm": 2.8763747215270996, "learning_rate": 1.9500209900615423e-05, "loss": 0.6456, "step": 5952 }, { "epoch": 0.9718786988286192, "grad_norm": 3.1023080348968506, "learning_rate": 1.9500009725713344e-05, "loss": 0.7548, "step": 5953 }, { "epoch": 0.9720419574711237, "grad_norm": 2.469557046890259, "learning_rate": 1.949980951176031e-05, "loss": 0.6461, "step": 5954 }, { "epoch": 0.972205216113628, "grad_norm": 2.9637703895568848, "learning_rate": 1.9499609258757147e-05, "loss": 0.9163, "step": 5955 }, { "epoch": 0.9723684747561324, "grad_norm": 2.3141298294067383, "learning_rate": 1.949940896670468e-05, "loss": 0.67, "step": 5956 }, { "epoch": 0.9725317333986367, "grad_norm": 3.1355652809143066, "learning_rate": 1.949920863560373e-05, "loss": 0.797, "step": 5957 }, { "epoch": 0.9726949920411412, "grad_norm": 2.895251989364624, "learning_rate": 1.9499008265455114e-05, "loss": 0.7605, "step": 5958 }, { "epoch": 0.9728582506836456, "grad_norm": 2.9321229457855225, "learning_rate": 1.9498807856259667e-05, "loss": 0.7374, "step": 5959 }, { "epoch": 0.97302150932615, "grad_norm": 2.369396448135376, "learning_rate": 1.9498607408018206e-05, "loss": 0.7075, "step": 5960 }, { "epoch": 0.9731847679686544, "grad_norm": 3.2616400718688965, "learning_rate": 1.9498406920731555e-05, "loss": 0.9862, "step": 5961 }, { "epoch": 0.9733480266111587, "grad_norm": 3.0563619136810303, "learning_rate": 1.949820639440054e-05, "loss": 0.7778, "step": 5962 }, { "epoch": 0.9735112852536632, "grad_norm": 2.7571403980255127, "learning_rate": 1.9498005829025985e-05, "loss": 0.8108, "step": 5963 }, { "epoch": 0.9736745438961675, "grad_norm": 2.7412610054016113, "learning_rate": 1.9497805224608718e-05, "loss": 0.7237, "step": 5964 }, { "epoch": 0.9738378025386719, "grad_norm": 2.6651458740234375, "learning_rate": 1.9497604581149558e-05, "loss": 0.6972, "step": 5965 }, { "epoch": 0.9740010611811762, "grad_norm": 2.3990232944488525, "learning_rate": 1.9497403898649328e-05, "loss": 0.6975, "step": 5966 }, { "epoch": 0.9741643198236807, "grad_norm": 2.8131492137908936, "learning_rate": 1.9497203177108862e-05, "loss": 0.7606, "step": 5967 }, { "epoch": 0.974327578466185, "grad_norm": 3.0856611728668213, "learning_rate": 1.949700241652898e-05, "loss": 0.9068, "step": 5968 }, { "epoch": 0.9744908371086894, "grad_norm": 2.593914270401001, "learning_rate": 1.9496801616910503e-05, "loss": 0.7654, "step": 5969 }, { "epoch": 0.9746540957511939, "grad_norm": 2.9095895290374756, "learning_rate": 1.9496600778254264e-05, "loss": 0.8238, "step": 5970 }, { "epoch": 0.9748173543936982, "grad_norm": 2.6571712493896484, "learning_rate": 1.949639990056108e-05, "loss": 0.7752, "step": 5971 }, { "epoch": 0.9749806130362026, "grad_norm": 3.169746160507202, "learning_rate": 1.949619898383179e-05, "loss": 0.8593, "step": 5972 }, { "epoch": 0.975143871678707, "grad_norm": 3.1708712577819824, "learning_rate": 1.9495998028067205e-05, "loss": 0.8926, "step": 5973 }, { "epoch": 0.9753071303212114, "grad_norm": 2.4917142391204834, "learning_rate": 1.949579703326816e-05, "loss": 0.6678, "step": 5974 }, { "epoch": 0.9754703889637157, "grad_norm": 2.946939706802368, "learning_rate": 1.9495595999435475e-05, "loss": 0.7278, "step": 5975 }, { "epoch": 0.9756336476062202, "grad_norm": 3.0282890796661377, "learning_rate": 1.9495394926569985e-05, "loss": 0.9995, "step": 5976 }, { "epoch": 0.9757969062487245, "grad_norm": 2.454667091369629, "learning_rate": 1.9495193814672514e-05, "loss": 0.6761, "step": 5977 }, { "epoch": 0.9759601648912289, "grad_norm": 2.8564529418945312, "learning_rate": 1.9494992663743885e-05, "loss": 0.7269, "step": 5978 }, { "epoch": 0.9761234235337333, "grad_norm": 2.6360669136047363, "learning_rate": 1.9494791473784922e-05, "loss": 0.7122, "step": 5979 }, { "epoch": 0.9762866821762377, "grad_norm": 2.6424143314361572, "learning_rate": 1.949459024479646e-05, "loss": 0.5733, "step": 5980 }, { "epoch": 0.9764499408187421, "grad_norm": 2.67240309715271, "learning_rate": 1.949438897677932e-05, "loss": 0.7768, "step": 5981 }, { "epoch": 0.9766131994612465, "grad_norm": 2.795182466506958, "learning_rate": 1.9494187669734333e-05, "loss": 0.7329, "step": 5982 }, { "epoch": 0.9767764581037509, "grad_norm": 3.090789556503296, "learning_rate": 1.949398632366233e-05, "loss": 0.8337, "step": 5983 }, { "epoch": 0.9769397167462552, "grad_norm": 2.771162986755371, "learning_rate": 1.949378493856413e-05, "loss": 0.6354, "step": 5984 }, { "epoch": 0.9771029753887597, "grad_norm": 3.4214439392089844, "learning_rate": 1.9493583514440564e-05, "loss": 0.9544, "step": 5985 }, { "epoch": 0.977266234031264, "grad_norm": 2.637240409851074, "learning_rate": 1.9493382051292462e-05, "loss": 0.6856, "step": 5986 }, { "epoch": 0.9774294926737684, "grad_norm": 2.8674685955047607, "learning_rate": 1.9493180549120652e-05, "loss": 0.8606, "step": 5987 }, { "epoch": 0.9775927513162728, "grad_norm": 3.1319773197174072, "learning_rate": 1.949297900792596e-05, "loss": 0.8502, "step": 5988 }, { "epoch": 0.9777560099587772, "grad_norm": 2.8647592067718506, "learning_rate": 1.9492777427709217e-05, "loss": 0.907, "step": 5989 }, { "epoch": 0.9779192686012815, "grad_norm": 2.6205291748046875, "learning_rate": 1.949257580847125e-05, "loss": 0.6866, "step": 5990 }, { "epoch": 0.978082527243786, "grad_norm": 3.3402023315429688, "learning_rate": 1.9492374150212886e-05, "loss": 0.9184, "step": 5991 }, { "epoch": 0.9782457858862904, "grad_norm": 2.9545810222625732, "learning_rate": 1.9492172452934957e-05, "loss": 0.7869, "step": 5992 }, { "epoch": 0.9784090445287947, "grad_norm": 2.641571521759033, "learning_rate": 1.9491970716638294e-05, "loss": 0.7382, "step": 5993 }, { "epoch": 0.9785723031712992, "grad_norm": 2.2995309829711914, "learning_rate": 1.949176894132372e-05, "loss": 0.6913, "step": 5994 }, { "epoch": 0.9787355618138035, "grad_norm": 2.522523880004883, "learning_rate": 1.949156712699207e-05, "loss": 0.8556, "step": 5995 }, { "epoch": 0.9788988204563079, "grad_norm": 2.7447235584259033, "learning_rate": 1.949136527364417e-05, "loss": 0.7413, "step": 5996 }, { "epoch": 0.9790620790988123, "grad_norm": 2.6163885593414307, "learning_rate": 1.9491163381280852e-05, "loss": 0.8034, "step": 5997 }, { "epoch": 0.9792253377413167, "grad_norm": 2.417247772216797, "learning_rate": 1.9490961449902946e-05, "loss": 0.7253, "step": 5998 }, { "epoch": 0.979388596383821, "grad_norm": 2.8196263313293457, "learning_rate": 1.949075947951128e-05, "loss": 0.7902, "step": 5999 }, { "epoch": 0.9795518550263255, "grad_norm": 2.3490257263183594, "learning_rate": 1.949055747010669e-05, "loss": 0.6665, "step": 6000 }, { "epoch": 0.9797151136688298, "grad_norm": 2.7474875450134277, "learning_rate": 1.9490355421689995e-05, "loss": 0.7292, "step": 6001 }, { "epoch": 0.9798783723113342, "grad_norm": 2.550469398498535, "learning_rate": 1.9490153334262037e-05, "loss": 0.6886, "step": 6002 }, { "epoch": 0.9800416309538387, "grad_norm": 2.8687825202941895, "learning_rate": 1.9489951207823637e-05, "loss": 0.8682, "step": 6003 }, { "epoch": 0.980204889596343, "grad_norm": 3.4928669929504395, "learning_rate": 1.948974904237564e-05, "loss": 0.9529, "step": 6004 }, { "epoch": 0.9803681482388474, "grad_norm": 2.4815382957458496, "learning_rate": 1.948954683791886e-05, "loss": 0.6999, "step": 6005 }, { "epoch": 0.9805314068813518, "grad_norm": 2.54062557220459, "learning_rate": 1.948934459445414e-05, "loss": 0.7291, "step": 6006 }, { "epoch": 0.9806946655238562, "grad_norm": 2.9906368255615234, "learning_rate": 1.9489142311982303e-05, "loss": 0.851, "step": 6007 }, { "epoch": 0.9808579241663605, "grad_norm": 2.714017391204834, "learning_rate": 1.948893999050419e-05, "loss": 0.7105, "step": 6008 }, { "epoch": 0.981021182808865, "grad_norm": 3.013676881790161, "learning_rate": 1.9488737630020626e-05, "loss": 0.751, "step": 6009 }, { "epoch": 0.9811844414513693, "grad_norm": 2.339500904083252, "learning_rate": 1.948853523053244e-05, "loss": 0.6062, "step": 6010 }, { "epoch": 0.9813477000938737, "grad_norm": 3.177302598953247, "learning_rate": 1.9488332792040474e-05, "loss": 0.7538, "step": 6011 }, { "epoch": 0.9815109587363781, "grad_norm": 2.4812214374542236, "learning_rate": 1.9488130314545556e-05, "loss": 0.8075, "step": 6012 }, { "epoch": 0.9816742173788825, "grad_norm": 2.2458035945892334, "learning_rate": 1.948792779804851e-05, "loss": 0.6512, "step": 6013 }, { "epoch": 0.9818374760213869, "grad_norm": 3.053258180618286, "learning_rate": 1.9487725242550183e-05, "loss": 0.9401, "step": 6014 }, { "epoch": 0.9820007346638913, "grad_norm": 2.5412466526031494, "learning_rate": 1.9487522648051396e-05, "loss": 0.7655, "step": 6015 }, { "epoch": 0.9821639933063957, "grad_norm": 2.508671283721924, "learning_rate": 1.9487320014552985e-05, "loss": 0.6519, "step": 6016 }, { "epoch": 0.9823272519489, "grad_norm": 2.9585065841674805, "learning_rate": 1.9487117342055785e-05, "loss": 0.625, "step": 6017 }, { "epoch": 0.9824905105914045, "grad_norm": 2.975106716156006, "learning_rate": 1.9486914630560626e-05, "loss": 0.8097, "step": 6018 }, { "epoch": 0.9826537692339088, "grad_norm": 2.924079656600952, "learning_rate": 1.9486711880068348e-05, "loss": 0.9303, "step": 6019 }, { "epoch": 0.9828170278764132, "grad_norm": 2.8876354694366455, "learning_rate": 1.9486509090579777e-05, "loss": 0.7977, "step": 6020 }, { "epoch": 0.9829802865189176, "grad_norm": 3.206570625305176, "learning_rate": 1.9486306262095748e-05, "loss": 0.8736, "step": 6021 }, { "epoch": 0.983143545161422, "grad_norm": 2.4575133323669434, "learning_rate": 1.9486103394617096e-05, "loss": 0.7811, "step": 6022 }, { "epoch": 0.9833068038039263, "grad_norm": 3.259888172149658, "learning_rate": 1.9485900488144656e-05, "loss": 0.9375, "step": 6023 }, { "epoch": 0.9834700624464308, "grad_norm": 2.8123419284820557, "learning_rate": 1.948569754267926e-05, "loss": 0.7445, "step": 6024 }, { "epoch": 0.9836333210889352, "grad_norm": 3.1317577362060547, "learning_rate": 1.9485494558221745e-05, "loss": 0.8074, "step": 6025 }, { "epoch": 0.9837965797314395, "grad_norm": 3.131296396255493, "learning_rate": 1.9485291534772943e-05, "loss": 1.0964, "step": 6026 }, { "epoch": 0.983959838373944, "grad_norm": 2.685976266860962, "learning_rate": 1.9485088472333687e-05, "loss": 0.909, "step": 6027 }, { "epoch": 0.9841230970164483, "grad_norm": 3.047581911087036, "learning_rate": 1.9484885370904814e-05, "loss": 0.9602, "step": 6028 }, { "epoch": 0.9842863556589527, "grad_norm": 2.968869924545288, "learning_rate": 1.9484682230487163e-05, "loss": 0.9782, "step": 6029 }, { "epoch": 0.984449614301457, "grad_norm": 2.828160285949707, "learning_rate": 1.948447905108156e-05, "loss": 0.7843, "step": 6030 }, { "epoch": 0.9846128729439615, "grad_norm": 2.174215078353882, "learning_rate": 1.9484275832688845e-05, "loss": 0.5881, "step": 6031 }, { "epoch": 0.9847761315864658, "grad_norm": 2.675225019454956, "learning_rate": 1.9484072575309855e-05, "loss": 0.9177, "step": 6032 }, { "epoch": 0.9849393902289703, "grad_norm": 2.3234498500823975, "learning_rate": 1.9483869278945426e-05, "loss": 0.6581, "step": 6033 }, { "epoch": 0.9851026488714746, "grad_norm": 2.6424098014831543, "learning_rate": 1.9483665943596386e-05, "loss": 0.7263, "step": 6034 }, { "epoch": 0.985265907513979, "grad_norm": 2.313417434692383, "learning_rate": 1.948346256926358e-05, "loss": 0.6882, "step": 6035 }, { "epoch": 0.9854291661564835, "grad_norm": 2.784231424331665, "learning_rate": 1.9483259155947837e-05, "loss": 0.7412, "step": 6036 }, { "epoch": 0.9855924247989878, "grad_norm": 2.8142523765563965, "learning_rate": 1.9483055703650002e-05, "loss": 0.8427, "step": 6037 }, { "epoch": 0.9857556834414922, "grad_norm": 2.5107967853546143, "learning_rate": 1.94828522123709e-05, "loss": 0.7381, "step": 6038 }, { "epoch": 0.9859189420839966, "grad_norm": 3.0500383377075195, "learning_rate": 1.9482648682111377e-05, "loss": 0.9649, "step": 6039 }, { "epoch": 0.986082200726501, "grad_norm": 2.866809368133545, "learning_rate": 1.9482445112872265e-05, "loss": 0.7685, "step": 6040 }, { "epoch": 0.9862454593690053, "grad_norm": 2.914456367492676, "learning_rate": 1.94822415046544e-05, "loss": 0.7377, "step": 6041 }, { "epoch": 0.9864087180115098, "grad_norm": 2.394482374191284, "learning_rate": 1.9482037857458624e-05, "loss": 0.6617, "step": 6042 }, { "epoch": 0.9865719766540141, "grad_norm": 2.4830832481384277, "learning_rate": 1.948183417128577e-05, "loss": 0.7333, "step": 6043 }, { "epoch": 0.9867352352965185, "grad_norm": 2.6716196537017822, "learning_rate": 1.9481630446136674e-05, "loss": 0.7191, "step": 6044 }, { "epoch": 0.9868984939390228, "grad_norm": 3.3042352199554443, "learning_rate": 1.9481426682012173e-05, "loss": 1.0359, "step": 6045 }, { "epoch": 0.9870617525815273, "grad_norm": 2.5838541984558105, "learning_rate": 1.9481222878913112e-05, "loss": 0.7283, "step": 6046 }, { "epoch": 0.9872250112240317, "grad_norm": 2.6350858211517334, "learning_rate": 1.948101903684032e-05, "loss": 0.7598, "step": 6047 }, { "epoch": 0.987388269866536, "grad_norm": 2.5369749069213867, "learning_rate": 1.9480815155794644e-05, "loss": 0.6781, "step": 6048 }, { "epoch": 0.9875515285090405, "grad_norm": 2.7590115070343018, "learning_rate": 1.948061123577691e-05, "loss": 0.7119, "step": 6049 }, { "epoch": 0.9877147871515448, "grad_norm": 2.7505736351013184, "learning_rate": 1.9480407276787968e-05, "loss": 0.6494, "step": 6050 }, { "epoch": 0.9878780457940493, "grad_norm": 2.6756811141967773, "learning_rate": 1.9480203278828652e-05, "loss": 0.6781, "step": 6051 }, { "epoch": 0.9880413044365536, "grad_norm": 2.7601141929626465, "learning_rate": 1.9479999241899796e-05, "loss": 0.7027, "step": 6052 }, { "epoch": 0.988204563079058, "grad_norm": 2.691448926925659, "learning_rate": 1.9479795166002245e-05, "loss": 0.7509, "step": 6053 }, { "epoch": 0.9883678217215623, "grad_norm": 2.588062286376953, "learning_rate": 1.9479591051136834e-05, "loss": 0.7939, "step": 6054 }, { "epoch": 0.9885310803640668, "grad_norm": 2.5308351516723633, "learning_rate": 1.9479386897304407e-05, "loss": 0.7564, "step": 6055 }, { "epoch": 0.9886943390065711, "grad_norm": 3.301252841949463, "learning_rate": 1.9479182704505795e-05, "loss": 0.9754, "step": 6056 }, { "epoch": 0.9888575976490755, "grad_norm": 2.971860885620117, "learning_rate": 1.9478978472741845e-05, "loss": 0.9812, "step": 6057 }, { "epoch": 0.98902085629158, "grad_norm": 2.7815592288970947, "learning_rate": 1.9478774202013393e-05, "loss": 0.9517, "step": 6058 }, { "epoch": 0.9891841149340843, "grad_norm": 2.8373727798461914, "learning_rate": 1.947856989232128e-05, "loss": 0.752, "step": 6059 }, { "epoch": 0.9893473735765888, "grad_norm": 2.8545117378234863, "learning_rate": 1.9478365543666346e-05, "loss": 0.7933, "step": 6060 }, { "epoch": 0.9895106322190931, "grad_norm": 2.561969518661499, "learning_rate": 1.9478161156049428e-05, "loss": 0.8187, "step": 6061 }, { "epoch": 0.9896738908615975, "grad_norm": 3.062631368637085, "learning_rate": 1.947795672947137e-05, "loss": 0.902, "step": 6062 }, { "epoch": 0.9898371495041018, "grad_norm": 2.8747901916503906, "learning_rate": 1.947775226393301e-05, "loss": 0.8647, "step": 6063 }, { "epoch": 0.9900004081466063, "grad_norm": 2.809968948364258, "learning_rate": 1.947754775943519e-05, "loss": 0.6858, "step": 6064 }, { "epoch": 0.9901636667891106, "grad_norm": 3.110830068588257, "learning_rate": 1.947734321597875e-05, "loss": 0.8538, "step": 6065 }, { "epoch": 0.990326925431615, "grad_norm": 2.460669755935669, "learning_rate": 1.9477138633564528e-05, "loss": 0.7366, "step": 6066 }, { "epoch": 0.9904901840741194, "grad_norm": 2.830713987350464, "learning_rate": 1.947693401219337e-05, "loss": 0.8502, "step": 6067 }, { "epoch": 0.9906534427166238, "grad_norm": 2.6890640258789062, "learning_rate": 1.9476729351866118e-05, "loss": 0.7678, "step": 6068 }, { "epoch": 0.9908167013591282, "grad_norm": 3.4024574756622314, "learning_rate": 1.9476524652583606e-05, "loss": 0.8315, "step": 6069 }, { "epoch": 0.9909799600016326, "grad_norm": 2.845172643661499, "learning_rate": 1.9476319914346676e-05, "loss": 0.897, "step": 6070 }, { "epoch": 0.991143218644137, "grad_norm": 3.182246446609497, "learning_rate": 1.947611513715618e-05, "loss": 0.8918, "step": 6071 }, { "epoch": 0.9913064772866413, "grad_norm": 3.142709255218506, "learning_rate": 1.9475910321012946e-05, "loss": 0.8128, "step": 6072 }, { "epoch": 0.9914697359291458, "grad_norm": 3.0443930625915527, "learning_rate": 1.947570546591783e-05, "loss": 0.8395, "step": 6073 }, { "epoch": 0.9916329945716501, "grad_norm": 2.320504665374756, "learning_rate": 1.9475500571871662e-05, "loss": 0.6794, "step": 6074 }, { "epoch": 0.9917962532141545, "grad_norm": 2.60103702545166, "learning_rate": 1.947529563887529e-05, "loss": 0.7552, "step": 6075 }, { "epoch": 0.9919595118566589, "grad_norm": 2.9405858516693115, "learning_rate": 1.9475090666929553e-05, "loss": 0.8582, "step": 6076 }, { "epoch": 0.9921227704991633, "grad_norm": 2.8213047981262207, "learning_rate": 1.94748856560353e-05, "loss": 0.7794, "step": 6077 }, { "epoch": 0.9922860291416676, "grad_norm": 2.9855332374572754, "learning_rate": 1.947468060619337e-05, "loss": 0.985, "step": 6078 }, { "epoch": 0.9924492877841721, "grad_norm": 2.842679738998413, "learning_rate": 1.9474475517404604e-05, "loss": 0.7949, "step": 6079 }, { "epoch": 0.9926125464266765, "grad_norm": 2.3946011066436768, "learning_rate": 1.9474270389669846e-05, "loss": 0.6241, "step": 6080 }, { "epoch": 0.9927758050691808, "grad_norm": 2.5371315479278564, "learning_rate": 1.947406522298994e-05, "loss": 0.6702, "step": 6081 }, { "epoch": 0.9929390637116853, "grad_norm": 2.7121200561523438, "learning_rate": 1.9473860017365728e-05, "loss": 0.7148, "step": 6082 }, { "epoch": 0.9931023223541896, "grad_norm": 2.0770609378814697, "learning_rate": 1.9473654772798055e-05, "loss": 0.6003, "step": 6083 }, { "epoch": 0.993265580996694, "grad_norm": 3.0289077758789062, "learning_rate": 1.947344948928777e-05, "loss": 0.6793, "step": 6084 }, { "epoch": 0.9934288396391984, "grad_norm": 2.4370949268341064, "learning_rate": 1.9473244166835704e-05, "loss": 0.6958, "step": 6085 }, { "epoch": 0.9935920982817028, "grad_norm": 2.935652732849121, "learning_rate": 1.9473038805442713e-05, "loss": 0.9502, "step": 6086 }, { "epoch": 0.9937553569242071, "grad_norm": 3.0953657627105713, "learning_rate": 1.9472833405109632e-05, "loss": 0.927, "step": 6087 }, { "epoch": 0.9939186155667116, "grad_norm": 3.3061676025390625, "learning_rate": 1.9472627965837315e-05, "loss": 0.9563, "step": 6088 }, { "epoch": 0.9940818742092159, "grad_norm": 2.693873643875122, "learning_rate": 1.9472422487626595e-05, "loss": 0.7003, "step": 6089 }, { "epoch": 0.9942451328517203, "grad_norm": 2.9937376976013184, "learning_rate": 1.947221697047833e-05, "loss": 0.8379, "step": 6090 }, { "epoch": 0.9944083914942248, "grad_norm": 2.8667027950286865, "learning_rate": 1.9472011414393352e-05, "loss": 0.8221, "step": 6091 }, { "epoch": 0.9945716501367291, "grad_norm": 2.791395664215088, "learning_rate": 1.9471805819372516e-05, "loss": 0.7476, "step": 6092 }, { "epoch": 0.9947349087792335, "grad_norm": 2.8067429065704346, "learning_rate": 1.947160018541666e-05, "loss": 0.8594, "step": 6093 }, { "epoch": 0.9948981674217379, "grad_norm": 2.937155246734619, "learning_rate": 1.9471394512526634e-05, "loss": 0.831, "step": 6094 }, { "epoch": 0.9950614260642423, "grad_norm": 3.203615665435791, "learning_rate": 1.947118880070328e-05, "loss": 0.8506, "step": 6095 }, { "epoch": 0.9952246847067466, "grad_norm": 3.198080062866211, "learning_rate": 1.9470983049947446e-05, "loss": 0.9695, "step": 6096 }, { "epoch": 0.9953879433492511, "grad_norm": 2.996363878250122, "learning_rate": 1.9470777260259974e-05, "loss": 0.8446, "step": 6097 }, { "epoch": 0.9955512019917554, "grad_norm": 3.1817312240600586, "learning_rate": 1.9470571431641715e-05, "loss": 0.9897, "step": 6098 }, { "epoch": 0.9957144606342598, "grad_norm": 2.4945743083953857, "learning_rate": 1.9470365564093514e-05, "loss": 0.6184, "step": 6099 }, { "epoch": 0.9958777192767642, "grad_norm": 2.4972641468048096, "learning_rate": 1.9470159657616214e-05, "loss": 0.6158, "step": 6100 }, { "epoch": 0.9960409779192686, "grad_norm": 2.406414270401001, "learning_rate": 1.9469953712210665e-05, "loss": 0.678, "step": 6101 }, { "epoch": 0.996204236561773, "grad_norm": 2.903740882873535, "learning_rate": 1.9469747727877712e-05, "loss": 0.8666, "step": 6102 }, { "epoch": 0.9963674952042774, "grad_norm": 2.940117597579956, "learning_rate": 1.94695417046182e-05, "loss": 0.8279, "step": 6103 }, { "epoch": 0.9965307538467818, "grad_norm": 2.8312807083129883, "learning_rate": 1.946933564243298e-05, "loss": 0.7028, "step": 6104 }, { "epoch": 0.9966940124892861, "grad_norm": 2.687431573867798, "learning_rate": 1.9469129541322896e-05, "loss": 0.8244, "step": 6105 }, { "epoch": 0.9968572711317906, "grad_norm": 2.4623730182647705, "learning_rate": 1.9468923401288796e-05, "loss": 0.7427, "step": 6106 }, { "epoch": 0.9970205297742949, "grad_norm": 2.435673713684082, "learning_rate": 1.9468717222331524e-05, "loss": 0.7237, "step": 6107 }, { "epoch": 0.9971837884167993, "grad_norm": 2.4611849784851074, "learning_rate": 1.946851100445194e-05, "loss": 0.8013, "step": 6108 }, { "epoch": 0.9973470470593037, "grad_norm": 2.8912353515625, "learning_rate": 1.9468304747650872e-05, "loss": 0.7851, "step": 6109 }, { "epoch": 0.9975103057018081, "grad_norm": 3.0224480628967285, "learning_rate": 1.9468098451929183e-05, "loss": 0.8267, "step": 6110 }, { "epoch": 0.9976735643443124, "grad_norm": 3.103308916091919, "learning_rate": 1.9467892117287716e-05, "loss": 0.9295, "step": 6111 }, { "epoch": 0.9978368229868169, "grad_norm": 2.549581289291382, "learning_rate": 1.946768574372732e-05, "loss": 0.7464, "step": 6112 }, { "epoch": 0.9980000816293213, "grad_norm": 3.064728021621704, "learning_rate": 1.946747933124884e-05, "loss": 0.7019, "step": 6113 }, { "epoch": 0.9981633402718256, "grad_norm": 2.5637147426605225, "learning_rate": 1.9467272879853132e-05, "loss": 0.709, "step": 6114 }, { "epoch": 0.9983265989143301, "grad_norm": 2.7766757011413574, "learning_rate": 1.946706638954104e-05, "loss": 0.748, "step": 6115 }, { "epoch": 0.9984898575568344, "grad_norm": 3.030160903930664, "learning_rate": 1.9466859860313407e-05, "loss": 0.8626, "step": 6116 }, { "epoch": 0.9986531161993388, "grad_norm": 2.619102716445923, "learning_rate": 1.9466653292171092e-05, "loss": 0.7165, "step": 6117 }, { "epoch": 0.9988163748418432, "grad_norm": 2.6897220611572266, "learning_rate": 1.946644668511494e-05, "loss": 0.7962, "step": 6118 }, { "epoch": 0.9989796334843476, "grad_norm": 2.8649537563323975, "learning_rate": 1.94662400391458e-05, "loss": 0.8979, "step": 6119 }, { "epoch": 0.9991428921268519, "grad_norm": 2.8838367462158203, "learning_rate": 1.946603335426452e-05, "loss": 0.7851, "step": 6120 }, { "epoch": 0.9993061507693564, "grad_norm": 2.6064138412475586, "learning_rate": 1.9465826630471953e-05, "loss": 0.8132, "step": 6121 }, { "epoch": 0.9994694094118607, "grad_norm": 2.6791160106658936, "learning_rate": 1.9465619867768945e-05, "loss": 0.7289, "step": 6122 }, { "epoch": 0.9996326680543651, "grad_norm": 2.368741512298584, "learning_rate": 1.946541306615635e-05, "loss": 0.6165, "step": 6123 }, { "epoch": 0.9997959266968696, "grad_norm": 2.657670736312866, "learning_rate": 1.9465206225635013e-05, "loss": 0.6757, "step": 6124 }, { "epoch": 0.9999591853393739, "grad_norm": 2.6460158824920654, "learning_rate": 1.946499934620579e-05, "loss": 0.7392, "step": 6125 }, { "epoch": 1.0, "grad_norm": 6.502165794372559, "learning_rate": 1.9464792427869527e-05, "loss": 1.0672, "step": 6126 }, { "epoch": 1.0001632586425044, "grad_norm": 2.5621933937072754, "learning_rate": 1.9464585470627076e-05, "loss": 0.6008, "step": 6127 }, { "epoch": 1.0003265172850089, "grad_norm": 2.723024368286133, "learning_rate": 1.9464378474479288e-05, "loss": 0.7371, "step": 6128 }, { "epoch": 1.000489775927513, "grad_norm": 3.303851842880249, "learning_rate": 1.9464171439427018e-05, "loss": 0.8585, "step": 6129 }, { "epoch": 1.0006530345700175, "grad_norm": 3.6589338779449463, "learning_rate": 1.9463964365471105e-05, "loss": 0.8525, "step": 6130 }, { "epoch": 1.000816293212522, "grad_norm": 2.546863555908203, "learning_rate": 1.9463757252612415e-05, "loss": 0.7281, "step": 6131 }, { "epoch": 1.0009795518550264, "grad_norm": 2.713782787322998, "learning_rate": 1.946355010085179e-05, "loss": 0.6441, "step": 6132 }, { "epoch": 1.0011428104975306, "grad_norm": 2.7806081771850586, "learning_rate": 1.9463342910190084e-05, "loss": 0.6415, "step": 6133 }, { "epoch": 1.001306069140035, "grad_norm": 2.969183921813965, "learning_rate": 1.9463135680628147e-05, "loss": 0.9056, "step": 6134 }, { "epoch": 1.0014693277825395, "grad_norm": 2.865473985671997, "learning_rate": 1.946292841216683e-05, "loss": 0.795, "step": 6135 }, { "epoch": 1.001632586425044, "grad_norm": 3.773432493209839, "learning_rate": 1.9462721104806995e-05, "loss": 0.9076, "step": 6136 }, { "epoch": 1.0017958450675484, "grad_norm": 2.8259263038635254, "learning_rate": 1.9462513758549484e-05, "loss": 0.7495, "step": 6137 }, { "epoch": 1.0019591037100526, "grad_norm": 2.762739896774292, "learning_rate": 1.946230637339515e-05, "loss": 0.8014, "step": 6138 }, { "epoch": 1.002122362352557, "grad_norm": 2.9500699043273926, "learning_rate": 1.9462098949344846e-05, "loss": 0.7879, "step": 6139 }, { "epoch": 1.0022856209950615, "grad_norm": 2.8053719997406006, "learning_rate": 1.946189148639943e-05, "loss": 0.5952, "step": 6140 }, { "epoch": 1.002448879637566, "grad_norm": 2.599480628967285, "learning_rate": 1.9461683984559748e-05, "loss": 0.6996, "step": 6141 }, { "epoch": 1.0026121382800701, "grad_norm": 2.6541566848754883, "learning_rate": 1.9461476443826658e-05, "loss": 0.6869, "step": 6142 }, { "epoch": 1.0027753969225746, "grad_norm": 3.304792642593384, "learning_rate": 1.946126886420101e-05, "loss": 0.8787, "step": 6143 }, { "epoch": 1.002938655565079, "grad_norm": 2.8977560997009277, "learning_rate": 1.9461061245683657e-05, "loss": 0.781, "step": 6144 }, { "epoch": 1.0031019142075834, "grad_norm": 3.0256476402282715, "learning_rate": 1.9460853588275454e-05, "loss": 0.8331, "step": 6145 }, { "epoch": 1.0032651728500876, "grad_norm": 2.4471356868743896, "learning_rate": 1.9460645891977255e-05, "loss": 0.5916, "step": 6146 }, { "epoch": 1.003428431492592, "grad_norm": 2.551454782485962, "learning_rate": 1.9460438156789913e-05, "loss": 0.7037, "step": 6147 }, { "epoch": 1.0035916901350965, "grad_norm": 2.809514045715332, "learning_rate": 1.9460230382714282e-05, "loss": 0.7783, "step": 6148 }, { "epoch": 1.003754948777601, "grad_norm": 4.021465301513672, "learning_rate": 1.9460022569751217e-05, "loss": 0.719, "step": 6149 }, { "epoch": 1.0039182074201054, "grad_norm": 2.9349536895751953, "learning_rate": 1.945981471790157e-05, "loss": 0.7815, "step": 6150 }, { "epoch": 1.0040814660626096, "grad_norm": 2.573564052581787, "learning_rate": 1.94596068271662e-05, "loss": 0.7178, "step": 6151 }, { "epoch": 1.004244724705114, "grad_norm": 3.0805678367614746, "learning_rate": 1.945939889754595e-05, "loss": 0.7107, "step": 6152 }, { "epoch": 1.0044079833476185, "grad_norm": 3.3304026126861572, "learning_rate": 1.945919092904169e-05, "loss": 0.9762, "step": 6153 }, { "epoch": 1.004571241990123, "grad_norm": 3.758829355239868, "learning_rate": 1.9458982921654265e-05, "loss": 0.8376, "step": 6154 }, { "epoch": 1.0047345006326271, "grad_norm": 2.7811102867126465, "learning_rate": 1.9458774875384532e-05, "loss": 0.7946, "step": 6155 }, { "epoch": 1.0048977592751316, "grad_norm": 2.657419443130493, "learning_rate": 1.945856679023335e-05, "loss": 0.7258, "step": 6156 }, { "epoch": 1.005061017917636, "grad_norm": 2.9926321506500244, "learning_rate": 1.945835866620157e-05, "loss": 0.7591, "step": 6157 }, { "epoch": 1.0052242765601405, "grad_norm": 2.7442996501922607, "learning_rate": 1.9458150503290046e-05, "loss": 0.6954, "step": 6158 }, { "epoch": 1.005387535202645, "grad_norm": 2.8879024982452393, "learning_rate": 1.9457942301499643e-05, "loss": 0.7916, "step": 6159 }, { "epoch": 1.0055507938451491, "grad_norm": 2.5844178199768066, "learning_rate": 1.9457734060831207e-05, "loss": 0.6852, "step": 6160 }, { "epoch": 1.0057140524876536, "grad_norm": 3.0878047943115234, "learning_rate": 1.9457525781285596e-05, "loss": 0.7424, "step": 6161 }, { "epoch": 1.005877311130158, "grad_norm": 3.1499428749084473, "learning_rate": 1.945731746286367e-05, "loss": 0.7151, "step": 6162 }, { "epoch": 1.0060405697726624, "grad_norm": 2.383063793182373, "learning_rate": 1.945710910556628e-05, "loss": 0.5968, "step": 6163 }, { "epoch": 1.0062038284151666, "grad_norm": 2.818511962890625, "learning_rate": 1.945690070939429e-05, "loss": 0.799, "step": 6164 }, { "epoch": 1.006367087057671, "grad_norm": 2.653716564178467, "learning_rate": 1.9456692274348548e-05, "loss": 0.7748, "step": 6165 }, { "epoch": 1.0065303457001755, "grad_norm": 2.824570894241333, "learning_rate": 1.945648380042992e-05, "loss": 0.7167, "step": 6166 }, { "epoch": 1.00669360434268, "grad_norm": 2.9922540187835693, "learning_rate": 1.9456275287639253e-05, "loss": 0.7438, "step": 6167 }, { "epoch": 1.0068568629851842, "grad_norm": 2.682572364807129, "learning_rate": 1.945606673597741e-05, "loss": 0.6216, "step": 6168 }, { "epoch": 1.0070201216276886, "grad_norm": 2.964632511138916, "learning_rate": 1.9455858145445247e-05, "loss": 0.6582, "step": 6169 }, { "epoch": 1.007183380270193, "grad_norm": 2.3570849895477295, "learning_rate": 1.9455649516043623e-05, "loss": 0.6862, "step": 6170 }, { "epoch": 1.0073466389126975, "grad_norm": 2.7258856296539307, "learning_rate": 1.945544084777339e-05, "loss": 0.8121, "step": 6171 }, { "epoch": 1.007509897555202, "grad_norm": 2.8764448165893555, "learning_rate": 1.9455232140635415e-05, "loss": 0.8442, "step": 6172 }, { "epoch": 1.0076731561977061, "grad_norm": 2.410538673400879, "learning_rate": 1.945502339463055e-05, "loss": 0.7055, "step": 6173 }, { "epoch": 1.0078364148402106, "grad_norm": 2.722874641418457, "learning_rate": 1.9454814609759653e-05, "loss": 0.8147, "step": 6174 }, { "epoch": 1.007999673482715, "grad_norm": 2.810610055923462, "learning_rate": 1.9454605786023582e-05, "loss": 0.6233, "step": 6175 }, { "epoch": 1.0081629321252195, "grad_norm": 3.0399112701416016, "learning_rate": 1.9454396923423197e-05, "loss": 0.7293, "step": 6176 }, { "epoch": 1.0083261907677237, "grad_norm": 2.965813159942627, "learning_rate": 1.9454188021959358e-05, "loss": 0.7931, "step": 6177 }, { "epoch": 1.0084894494102281, "grad_norm": 2.990636110305786, "learning_rate": 1.9453979081632922e-05, "loss": 0.7027, "step": 6178 }, { "epoch": 1.0086527080527325, "grad_norm": 2.9929676055908203, "learning_rate": 1.9453770102444743e-05, "loss": 0.6869, "step": 6179 }, { "epoch": 1.008815966695237, "grad_norm": 2.80962872505188, "learning_rate": 1.945356108439569e-05, "loss": 0.7563, "step": 6180 }, { "epoch": 1.0089792253377414, "grad_norm": 3.1182432174682617, "learning_rate": 1.9453352027486617e-05, "loss": 0.7329, "step": 6181 }, { "epoch": 1.0091424839802456, "grad_norm": 2.7585246562957764, "learning_rate": 1.945314293171838e-05, "loss": 0.638, "step": 6182 }, { "epoch": 1.00930574262275, "grad_norm": 2.4797704219818115, "learning_rate": 1.9452933797091846e-05, "loss": 0.5304, "step": 6183 }, { "epoch": 1.0094690012652545, "grad_norm": 2.801663637161255, "learning_rate": 1.9452724623607868e-05, "loss": 0.7047, "step": 6184 }, { "epoch": 1.009632259907759, "grad_norm": 2.4133403301239014, "learning_rate": 1.9452515411267308e-05, "loss": 0.5762, "step": 6185 }, { "epoch": 1.0097955185502632, "grad_norm": 2.909909963607788, "learning_rate": 1.9452306160071026e-05, "loss": 0.9216, "step": 6186 }, { "epoch": 1.0099587771927676, "grad_norm": 3.01108980178833, "learning_rate": 1.9452096870019882e-05, "loss": 0.6566, "step": 6187 }, { "epoch": 1.010122035835272, "grad_norm": 2.3768818378448486, "learning_rate": 1.945188754111474e-05, "loss": 0.6528, "step": 6188 }, { "epoch": 1.0102852944777765, "grad_norm": 2.661942481994629, "learning_rate": 1.9451678173356455e-05, "loss": 0.6591, "step": 6189 }, { "epoch": 1.0104485531202807, "grad_norm": 2.9505956172943115, "learning_rate": 1.945146876674589e-05, "loss": 0.7554, "step": 6190 }, { "epoch": 1.0106118117627851, "grad_norm": 2.7089710235595703, "learning_rate": 1.9451259321283906e-05, "loss": 0.6986, "step": 6191 }, { "epoch": 1.0107750704052896, "grad_norm": 3.100635528564453, "learning_rate": 1.9451049836971366e-05, "loss": 0.8697, "step": 6192 }, { "epoch": 1.010938329047794, "grad_norm": 2.665205717086792, "learning_rate": 1.9450840313809125e-05, "loss": 0.686, "step": 6193 }, { "epoch": 1.0111015876902985, "grad_norm": 2.511538028717041, "learning_rate": 1.945063075179805e-05, "loss": 0.6653, "step": 6194 }, { "epoch": 1.0112648463328027, "grad_norm": 2.7299747467041016, "learning_rate": 1.9450421150939e-05, "loss": 0.6906, "step": 6195 }, { "epoch": 1.011428104975307, "grad_norm": 2.3868987560272217, "learning_rate": 1.9450211511232836e-05, "loss": 0.6415, "step": 6196 }, { "epoch": 1.0115913636178115, "grad_norm": 3.1747374534606934, "learning_rate": 1.9450001832680422e-05, "loss": 0.8164, "step": 6197 }, { "epoch": 1.011754622260316, "grad_norm": 2.83841609954834, "learning_rate": 1.944979211528262e-05, "loss": 0.7154, "step": 6198 }, { "epoch": 1.0119178809028202, "grad_norm": 3.1931076049804688, "learning_rate": 1.944958235904029e-05, "loss": 0.8722, "step": 6199 }, { "epoch": 1.0120811395453246, "grad_norm": 2.975133180618286, "learning_rate": 1.9449372563954293e-05, "loss": 0.8326, "step": 6200 }, { "epoch": 1.012244398187829, "grad_norm": 2.876207113265991, "learning_rate": 1.9449162730025496e-05, "loss": 0.7689, "step": 6201 }, { "epoch": 1.0124076568303335, "grad_norm": 3.3513576984405518, "learning_rate": 1.9448952857254756e-05, "loss": 0.8896, "step": 6202 }, { "epoch": 1.012570915472838, "grad_norm": 3.103562116622925, "learning_rate": 1.944874294564294e-05, "loss": 0.6402, "step": 6203 }, { "epoch": 1.0127341741153422, "grad_norm": 2.9255645275115967, "learning_rate": 1.944853299519091e-05, "loss": 0.7337, "step": 6204 }, { "epoch": 1.0128974327578466, "grad_norm": 2.766101360321045, "learning_rate": 1.944832300589953e-05, "loss": 0.641, "step": 6205 }, { "epoch": 1.013060691400351, "grad_norm": 2.9667344093322754, "learning_rate": 1.944811297776966e-05, "loss": 0.782, "step": 6206 }, { "epoch": 1.0132239500428555, "grad_norm": 2.956205368041992, "learning_rate": 1.9447902910802168e-05, "loss": 0.7528, "step": 6207 }, { "epoch": 1.0133872086853597, "grad_norm": 3.3722126483917236, "learning_rate": 1.9447692804997912e-05, "loss": 0.8466, "step": 6208 }, { "epoch": 1.0135504673278641, "grad_norm": 2.6990087032318115, "learning_rate": 1.944748266035776e-05, "loss": 0.6819, "step": 6209 }, { "epoch": 1.0137137259703686, "grad_norm": 2.604466676712036, "learning_rate": 1.944727247688257e-05, "loss": 0.6472, "step": 6210 }, { "epoch": 1.013876984612873, "grad_norm": 2.7172484397888184, "learning_rate": 1.9447062254573216e-05, "loss": 0.8691, "step": 6211 }, { "epoch": 1.0140402432553772, "grad_norm": 2.549086570739746, "learning_rate": 1.944685199343055e-05, "loss": 0.6401, "step": 6212 }, { "epoch": 1.0142035018978817, "grad_norm": 2.8768930435180664, "learning_rate": 1.9446641693455445e-05, "loss": 0.643, "step": 6213 }, { "epoch": 1.014366760540386, "grad_norm": 2.7019193172454834, "learning_rate": 1.9446431354648764e-05, "loss": 0.7326, "step": 6214 }, { "epoch": 1.0145300191828905, "grad_norm": 2.718830108642578, "learning_rate": 1.944622097701137e-05, "loss": 0.6655, "step": 6215 }, { "epoch": 1.014693277825395, "grad_norm": 2.6735970973968506, "learning_rate": 1.9446010560544128e-05, "loss": 0.698, "step": 6216 }, { "epoch": 1.0148565364678992, "grad_norm": 2.642322063446045, "learning_rate": 1.9445800105247902e-05, "loss": 0.6586, "step": 6217 }, { "epoch": 1.0150197951104036, "grad_norm": 3.042269229888916, "learning_rate": 1.944558961112356e-05, "loss": 0.7633, "step": 6218 }, { "epoch": 1.015183053752908, "grad_norm": 2.411966562271118, "learning_rate": 1.9445379078171967e-05, "loss": 0.6043, "step": 6219 }, { "epoch": 1.0153463123954125, "grad_norm": 2.9817585945129395, "learning_rate": 1.9445168506393986e-05, "loss": 0.7114, "step": 6220 }, { "epoch": 1.0155095710379167, "grad_norm": 2.802732467651367, "learning_rate": 1.9444957895790485e-05, "loss": 0.6211, "step": 6221 }, { "epoch": 1.0156728296804212, "grad_norm": 3.2564854621887207, "learning_rate": 1.9444747246362327e-05, "loss": 0.8383, "step": 6222 }, { "epoch": 1.0158360883229256, "grad_norm": 3.4316725730895996, "learning_rate": 1.944453655811038e-05, "loss": 0.8519, "step": 6223 }, { "epoch": 1.01599934696543, "grad_norm": 4.268990993499756, "learning_rate": 1.9444325831035508e-05, "loss": 0.8202, "step": 6224 }, { "epoch": 1.0161626056079345, "grad_norm": 2.9718055725097656, "learning_rate": 1.944411506513858e-05, "loss": 0.8401, "step": 6225 }, { "epoch": 1.0163258642504387, "grad_norm": 3.400467872619629, "learning_rate": 1.944390426042046e-05, "loss": 0.6487, "step": 6226 }, { "epoch": 1.0164891228929431, "grad_norm": 3.0652356147766113, "learning_rate": 1.9443693416882016e-05, "loss": 0.6775, "step": 6227 }, { "epoch": 1.0166523815354476, "grad_norm": 2.3946475982666016, "learning_rate": 1.9443482534524114e-05, "loss": 0.5227, "step": 6228 }, { "epoch": 1.016815640177952, "grad_norm": 2.6646928787231445, "learning_rate": 1.944327161334762e-05, "loss": 0.699, "step": 6229 }, { "epoch": 1.0169788988204562, "grad_norm": 3.2948083877563477, "learning_rate": 1.9443060653353406e-05, "loss": 0.8142, "step": 6230 }, { "epoch": 1.0171421574629607, "grad_norm": 3.1621670722961426, "learning_rate": 1.9442849654542332e-05, "loss": 0.7711, "step": 6231 }, { "epoch": 1.017305416105465, "grad_norm": 3.0824785232543945, "learning_rate": 1.9442638616915268e-05, "loss": 0.7677, "step": 6232 }, { "epoch": 1.0174686747479695, "grad_norm": 2.630182981491089, "learning_rate": 1.9442427540473085e-05, "loss": 0.7127, "step": 6233 }, { "epoch": 1.0176319333904738, "grad_norm": 2.3272457122802734, "learning_rate": 1.9442216425216644e-05, "loss": 0.5921, "step": 6234 }, { "epoch": 1.0177951920329782, "grad_norm": 3.023667812347412, "learning_rate": 1.944200527114682e-05, "loss": 0.8176, "step": 6235 }, { "epoch": 1.0179584506754826, "grad_norm": 2.800137996673584, "learning_rate": 1.9441794078264477e-05, "loss": 0.7225, "step": 6236 }, { "epoch": 1.018121709317987, "grad_norm": 3.8829362392425537, "learning_rate": 1.9441582846570482e-05, "loss": 0.8164, "step": 6237 }, { "epoch": 1.0182849679604915, "grad_norm": 2.768293857574463, "learning_rate": 1.9441371576065705e-05, "loss": 0.7392, "step": 6238 }, { "epoch": 1.0184482266029957, "grad_norm": 2.854807138442993, "learning_rate": 1.9441160266751014e-05, "loss": 0.8422, "step": 6239 }, { "epoch": 1.0186114852455002, "grad_norm": 2.90812087059021, "learning_rate": 1.9440948918627282e-05, "loss": 0.6738, "step": 6240 }, { "epoch": 1.0187747438880046, "grad_norm": 2.8774187564849854, "learning_rate": 1.9440737531695374e-05, "loss": 0.742, "step": 6241 }, { "epoch": 1.018938002530509, "grad_norm": 2.8965506553649902, "learning_rate": 1.9440526105956153e-05, "loss": 0.6177, "step": 6242 }, { "epoch": 1.0191012611730133, "grad_norm": 2.861787796020508, "learning_rate": 1.94403146414105e-05, "loss": 0.7621, "step": 6243 }, { "epoch": 1.0192645198155177, "grad_norm": 2.8015918731689453, "learning_rate": 1.9440103138059274e-05, "loss": 0.6776, "step": 6244 }, { "epoch": 1.0194277784580221, "grad_norm": 2.5468130111694336, "learning_rate": 1.943989159590335e-05, "loss": 0.6595, "step": 6245 }, { "epoch": 1.0195910371005266, "grad_norm": 2.867241621017456, "learning_rate": 1.9439680014943597e-05, "loss": 0.694, "step": 6246 }, { "epoch": 1.019754295743031, "grad_norm": 2.546616792678833, "learning_rate": 1.9439468395180882e-05, "loss": 0.5827, "step": 6247 }, { "epoch": 1.0199175543855352, "grad_norm": 2.770612955093384, "learning_rate": 1.9439256736616075e-05, "loss": 0.6612, "step": 6248 }, { "epoch": 1.0200808130280397, "grad_norm": 3.167037010192871, "learning_rate": 1.9439045039250054e-05, "loss": 0.8776, "step": 6249 }, { "epoch": 1.020244071670544, "grad_norm": 3.197441816329956, "learning_rate": 1.9438833303083677e-05, "loss": 0.8087, "step": 6250 }, { "epoch": 1.0204073303130485, "grad_norm": 2.8403077125549316, "learning_rate": 1.9438621528117824e-05, "loss": 0.7547, "step": 6251 }, { "epoch": 1.0205705889555527, "grad_norm": 2.9649159908294678, "learning_rate": 1.943840971435336e-05, "loss": 0.864, "step": 6252 }, { "epoch": 1.0207338475980572, "grad_norm": 3.960470199584961, "learning_rate": 1.9438197861791156e-05, "loss": 0.8206, "step": 6253 }, { "epoch": 1.0208971062405616, "grad_norm": 2.673330068588257, "learning_rate": 1.9437985970432087e-05, "loss": 0.6759, "step": 6254 }, { "epoch": 1.021060364883066, "grad_norm": 2.934893846511841, "learning_rate": 1.943777404027702e-05, "loss": 0.7121, "step": 6255 }, { "epoch": 1.0212236235255703, "grad_norm": 2.7413759231567383, "learning_rate": 1.9437562071326826e-05, "loss": 0.6706, "step": 6256 }, { "epoch": 1.0213868821680747, "grad_norm": 3.0483651161193848, "learning_rate": 1.9437350063582384e-05, "loss": 0.6429, "step": 6257 }, { "epoch": 1.0215501408105792, "grad_norm": 3.4669349193573, "learning_rate": 1.9437138017044554e-05, "loss": 0.9276, "step": 6258 }, { "epoch": 1.0217133994530836, "grad_norm": 3.273160696029663, "learning_rate": 1.9436925931714214e-05, "loss": 0.8562, "step": 6259 }, { "epoch": 1.021876658095588, "grad_norm": 3.08355450630188, "learning_rate": 1.9436713807592236e-05, "loss": 0.7338, "step": 6260 }, { "epoch": 1.0220399167380922, "grad_norm": 2.902233600616455, "learning_rate": 1.9436501644679492e-05, "loss": 0.6738, "step": 6261 }, { "epoch": 1.0222031753805967, "grad_norm": 3.054476499557495, "learning_rate": 1.943628944297685e-05, "loss": 0.6737, "step": 6262 }, { "epoch": 1.0223664340231011, "grad_norm": 2.308645725250244, "learning_rate": 1.9436077202485187e-05, "loss": 0.5531, "step": 6263 }, { "epoch": 1.0225296926656056, "grad_norm": 2.930659770965576, "learning_rate": 1.9435864923205373e-05, "loss": 0.7916, "step": 6264 }, { "epoch": 1.0226929513081098, "grad_norm": 3.0983998775482178, "learning_rate": 1.943565260513828e-05, "loss": 0.735, "step": 6265 }, { "epoch": 1.0228562099506142, "grad_norm": 2.9242587089538574, "learning_rate": 1.9435440248284784e-05, "loss": 0.7398, "step": 6266 }, { "epoch": 1.0230194685931187, "grad_norm": 3.134957790374756, "learning_rate": 1.9435227852645754e-05, "loss": 0.7774, "step": 6267 }, { "epoch": 1.023182727235623, "grad_norm": 3.5037145614624023, "learning_rate": 1.9435015418222064e-05, "loss": 0.9931, "step": 6268 }, { "epoch": 1.0233459858781275, "grad_norm": 3.0435750484466553, "learning_rate": 1.943480294501459e-05, "loss": 0.7138, "step": 6269 }, { "epoch": 1.0235092445206317, "grad_norm": 2.963257312774658, "learning_rate": 1.9434590433024206e-05, "loss": 0.6364, "step": 6270 }, { "epoch": 1.0236725031631362, "grad_norm": 2.974839925765991, "learning_rate": 1.943437788225178e-05, "loss": 0.8162, "step": 6271 }, { "epoch": 1.0238357618056406, "grad_norm": 3.1162328720092773, "learning_rate": 1.943416529269819e-05, "loss": 0.7591, "step": 6272 }, { "epoch": 1.023999020448145, "grad_norm": 3.1078548431396484, "learning_rate": 1.9433952664364308e-05, "loss": 0.7666, "step": 6273 }, { "epoch": 1.0241622790906493, "grad_norm": 2.8126206398010254, "learning_rate": 1.9433739997251008e-05, "loss": 0.7177, "step": 6274 }, { "epoch": 1.0243255377331537, "grad_norm": 2.8066351413726807, "learning_rate": 1.9433527291359168e-05, "loss": 0.7487, "step": 6275 }, { "epoch": 1.0244887963756582, "grad_norm": 3.4001128673553467, "learning_rate": 1.9433314546689658e-05, "loss": 0.89, "step": 6276 }, { "epoch": 1.0246520550181626, "grad_norm": 2.918592691421509, "learning_rate": 1.9433101763243355e-05, "loss": 0.7167, "step": 6277 }, { "epoch": 1.0248153136606668, "grad_norm": 2.7793991565704346, "learning_rate": 1.9432888941021128e-05, "loss": 0.7344, "step": 6278 }, { "epoch": 1.0249785723031712, "grad_norm": 2.8304927349090576, "learning_rate": 1.943267608002386e-05, "loss": 0.7901, "step": 6279 }, { "epoch": 1.0251418309456757, "grad_norm": 2.836421251296997, "learning_rate": 1.9432463180252422e-05, "loss": 0.7681, "step": 6280 }, { "epoch": 1.0253050895881801, "grad_norm": 2.761324644088745, "learning_rate": 1.9432250241707692e-05, "loss": 0.7136, "step": 6281 }, { "epoch": 1.0254683482306846, "grad_norm": 2.6182773113250732, "learning_rate": 1.943203726439054e-05, "loss": 0.7088, "step": 6282 }, { "epoch": 1.0256316068731888, "grad_norm": 2.30389666557312, "learning_rate": 1.9431824248301842e-05, "loss": 0.6374, "step": 6283 }, { "epoch": 1.0257948655156932, "grad_norm": 2.689687728881836, "learning_rate": 1.943161119344248e-05, "loss": 0.6967, "step": 6284 }, { "epoch": 1.0259581241581976, "grad_norm": 2.508890390396118, "learning_rate": 1.9431398099813322e-05, "loss": 0.6129, "step": 6285 }, { "epoch": 1.026121382800702, "grad_norm": 2.8639028072357178, "learning_rate": 1.943118496741525e-05, "loss": 0.8037, "step": 6286 }, { "epoch": 1.0262846414432063, "grad_norm": 2.560697317123413, "learning_rate": 1.943097179624914e-05, "loss": 0.731, "step": 6287 }, { "epoch": 1.0264479000857107, "grad_norm": 2.7338321208953857, "learning_rate": 1.9430758586315863e-05, "loss": 0.7617, "step": 6288 }, { "epoch": 1.0266111587282152, "grad_norm": 2.687406063079834, "learning_rate": 1.9430545337616297e-05, "loss": 0.7253, "step": 6289 }, { "epoch": 1.0267744173707196, "grad_norm": 2.3725197315216064, "learning_rate": 1.9430332050151323e-05, "loss": 0.5914, "step": 6290 }, { "epoch": 1.026937676013224, "grad_norm": 2.2148966789245605, "learning_rate": 1.9430118723921812e-05, "loss": 0.5497, "step": 6291 }, { "epoch": 1.0271009346557283, "grad_norm": 3.149733066558838, "learning_rate": 1.9429905358928648e-05, "loss": 0.8235, "step": 6292 }, { "epoch": 1.0272641932982327, "grad_norm": 2.998157262802124, "learning_rate": 1.94296919551727e-05, "loss": 0.7976, "step": 6293 }, { "epoch": 1.0274274519407371, "grad_norm": 2.5534889698028564, "learning_rate": 1.942947851265485e-05, "loss": 0.6118, "step": 6294 }, { "epoch": 1.0275907105832416, "grad_norm": 3.1815803050994873, "learning_rate": 1.9429265031375975e-05, "loss": 0.7993, "step": 6295 }, { "epoch": 1.0277539692257458, "grad_norm": 2.785928249359131, "learning_rate": 1.942905151133695e-05, "loss": 0.7033, "step": 6296 }, { "epoch": 1.0279172278682502, "grad_norm": 2.7237741947174072, "learning_rate": 1.9428837952538657e-05, "loss": 0.7291, "step": 6297 }, { "epoch": 1.0280804865107547, "grad_norm": 2.9848015308380127, "learning_rate": 1.9428624354981972e-05, "loss": 0.8049, "step": 6298 }, { "epoch": 1.0282437451532591, "grad_norm": 2.7766849994659424, "learning_rate": 1.9428410718667766e-05, "loss": 0.8006, "step": 6299 }, { "epoch": 1.0284070037957633, "grad_norm": 3.227585554122925, "learning_rate": 1.942819704359693e-05, "loss": 0.7999, "step": 6300 }, { "epoch": 1.0285702624382678, "grad_norm": 2.894432783126831, "learning_rate": 1.9427983329770334e-05, "loss": 0.8272, "step": 6301 }, { "epoch": 1.0287335210807722, "grad_norm": 2.8632073402404785, "learning_rate": 1.942776957718886e-05, "loss": 0.7155, "step": 6302 }, { "epoch": 1.0288967797232766, "grad_norm": 3.2092676162719727, "learning_rate": 1.9427555785853384e-05, "loss": 0.7738, "step": 6303 }, { "epoch": 1.029060038365781, "grad_norm": 2.964409112930298, "learning_rate": 1.9427341955764786e-05, "loss": 0.831, "step": 6304 }, { "epoch": 1.0292232970082853, "grad_norm": 3.1429738998413086, "learning_rate": 1.9427128086923946e-05, "loss": 0.7976, "step": 6305 }, { "epoch": 1.0293865556507897, "grad_norm": 3.2370052337646484, "learning_rate": 1.942691417933174e-05, "loss": 0.819, "step": 6306 }, { "epoch": 1.0295498142932942, "grad_norm": 2.6547553539276123, "learning_rate": 1.9426700232989048e-05, "loss": 0.6663, "step": 6307 }, { "epoch": 1.0297130729357986, "grad_norm": 2.866209030151367, "learning_rate": 1.9426486247896755e-05, "loss": 0.7349, "step": 6308 }, { "epoch": 1.0298763315783028, "grad_norm": 2.7975735664367676, "learning_rate": 1.9426272224055736e-05, "loss": 0.7539, "step": 6309 }, { "epoch": 1.0300395902208073, "grad_norm": 3.0666556358337402, "learning_rate": 1.9426058161466867e-05, "loss": 0.9545, "step": 6310 }, { "epoch": 1.0302028488633117, "grad_norm": 2.484081506729126, "learning_rate": 1.9425844060131035e-05, "loss": 0.5949, "step": 6311 }, { "epoch": 1.0303661075058161, "grad_norm": 2.767813205718994, "learning_rate": 1.9425629920049117e-05, "loss": 0.6961, "step": 6312 }, { "epoch": 1.0305293661483206, "grad_norm": 3.3267734050750732, "learning_rate": 1.9425415741221994e-05, "loss": 0.8554, "step": 6313 }, { "epoch": 1.0306926247908248, "grad_norm": 2.616835117340088, "learning_rate": 1.9425201523650545e-05, "loss": 0.725, "step": 6314 }, { "epoch": 1.0308558834333292, "grad_norm": 3.580709218978882, "learning_rate": 1.9424987267335652e-05, "loss": 0.864, "step": 6315 }, { "epoch": 1.0310191420758337, "grad_norm": 2.8645901679992676, "learning_rate": 1.9424772972278198e-05, "loss": 0.7137, "step": 6316 }, { "epoch": 1.031182400718338, "grad_norm": 3.483738899230957, "learning_rate": 1.9424558638479055e-05, "loss": 0.7493, "step": 6317 }, { "epoch": 1.0313456593608423, "grad_norm": 3.2228338718414307, "learning_rate": 1.9424344265939116e-05, "loss": 0.9561, "step": 6318 }, { "epoch": 1.0315089180033468, "grad_norm": 2.8154616355895996, "learning_rate": 1.942412985465925e-05, "loss": 0.649, "step": 6319 }, { "epoch": 1.0316721766458512, "grad_norm": 3.0350182056427, "learning_rate": 1.942391540464035e-05, "loss": 0.6935, "step": 6320 }, { "epoch": 1.0318354352883556, "grad_norm": 3.2948079109191895, "learning_rate": 1.942370091588329e-05, "loss": 0.6231, "step": 6321 }, { "epoch": 1.03199869393086, "grad_norm": 2.7749083042144775, "learning_rate": 1.9423486388388953e-05, "loss": 0.8347, "step": 6322 }, { "epoch": 1.0321619525733643, "grad_norm": 2.753307819366455, "learning_rate": 1.9423271822158224e-05, "loss": 0.824, "step": 6323 }, { "epoch": 1.0323252112158687, "grad_norm": 2.96871018409729, "learning_rate": 1.942305721719198e-05, "loss": 0.9005, "step": 6324 }, { "epoch": 1.0324884698583732, "grad_norm": 3.0821800231933594, "learning_rate": 1.942284257349111e-05, "loss": 0.8278, "step": 6325 }, { "epoch": 1.0326517285008776, "grad_norm": 3.0907340049743652, "learning_rate": 1.942262789105649e-05, "loss": 0.7604, "step": 6326 }, { "epoch": 1.0328149871433818, "grad_norm": 2.6506478786468506, "learning_rate": 1.9422413169889003e-05, "loss": 0.6619, "step": 6327 }, { "epoch": 1.0329782457858863, "grad_norm": 2.4282164573669434, "learning_rate": 1.9422198409989533e-05, "loss": 0.7131, "step": 6328 }, { "epoch": 1.0331415044283907, "grad_norm": 2.518099546432495, "learning_rate": 1.9421983611358967e-05, "loss": 0.7171, "step": 6329 }, { "epoch": 1.0333047630708951, "grad_norm": 2.76648211479187, "learning_rate": 1.942176877399818e-05, "loss": 0.6642, "step": 6330 }, { "epoch": 1.0334680217133994, "grad_norm": 2.8911256790161133, "learning_rate": 1.9421553897908062e-05, "loss": 0.7214, "step": 6331 }, { "epoch": 1.0336312803559038, "grad_norm": 2.6680967807769775, "learning_rate": 1.9421338983089492e-05, "loss": 0.7005, "step": 6332 }, { "epoch": 1.0337945389984082, "grad_norm": 2.853219509124756, "learning_rate": 1.9421124029543356e-05, "loss": 0.794, "step": 6333 }, { "epoch": 1.0339577976409127, "grad_norm": 2.4769725799560547, "learning_rate": 1.942090903727053e-05, "loss": 0.5848, "step": 6334 }, { "epoch": 1.034121056283417, "grad_norm": 2.7559139728546143, "learning_rate": 1.9420694006271915e-05, "loss": 0.7155, "step": 6335 }, { "epoch": 1.0342843149259213, "grad_norm": 2.967522382736206, "learning_rate": 1.942047893654838e-05, "loss": 0.9335, "step": 6336 }, { "epoch": 1.0344475735684258, "grad_norm": 2.434950351715088, "learning_rate": 1.942026382810081e-05, "loss": 0.648, "step": 6337 }, { "epoch": 1.0346108322109302, "grad_norm": 2.685605525970459, "learning_rate": 1.9420048680930096e-05, "loss": 0.6832, "step": 6338 }, { "epoch": 1.0347740908534346, "grad_norm": 2.7690393924713135, "learning_rate": 1.9419833495037114e-05, "loss": 0.799, "step": 6339 }, { "epoch": 1.0349373494959389, "grad_norm": 2.6157827377319336, "learning_rate": 1.941961827042276e-05, "loss": 0.8768, "step": 6340 }, { "epoch": 1.0351006081384433, "grad_norm": 2.95131516456604, "learning_rate": 1.9419403007087908e-05, "loss": 0.7543, "step": 6341 }, { "epoch": 1.0352638667809477, "grad_norm": 3.6104698181152344, "learning_rate": 1.9419187705033447e-05, "loss": 0.9373, "step": 6342 }, { "epoch": 1.0354271254234522, "grad_norm": 2.771965980529785, "learning_rate": 1.9418972364260266e-05, "loss": 0.8755, "step": 6343 }, { "epoch": 1.0355903840659564, "grad_norm": 2.8482015132904053, "learning_rate": 1.941875698476924e-05, "loss": 0.7508, "step": 6344 }, { "epoch": 1.0357536427084608, "grad_norm": 2.914853572845459, "learning_rate": 1.9418541566561263e-05, "loss": 0.6702, "step": 6345 }, { "epoch": 1.0359169013509653, "grad_norm": 3.0339231491088867, "learning_rate": 1.941832610963722e-05, "loss": 0.8011, "step": 6346 }, { "epoch": 1.0360801599934697, "grad_norm": 3.0560708045959473, "learning_rate": 1.9418110613997992e-05, "loss": 0.7564, "step": 6347 }, { "epoch": 1.0362434186359741, "grad_norm": 3.4070186614990234, "learning_rate": 1.9417895079644467e-05, "loss": 0.8511, "step": 6348 }, { "epoch": 1.0364066772784783, "grad_norm": 2.9086594581604004, "learning_rate": 1.9417679506577535e-05, "loss": 0.7844, "step": 6349 }, { "epoch": 1.0365699359209828, "grad_norm": 2.743495464324951, "learning_rate": 1.9417463894798078e-05, "loss": 0.6882, "step": 6350 }, { "epoch": 1.0367331945634872, "grad_norm": 3.20906662940979, "learning_rate": 1.941724824430698e-05, "loss": 0.8066, "step": 6351 }, { "epoch": 1.0368964532059917, "grad_norm": 2.6905136108398438, "learning_rate": 1.9417032555105134e-05, "loss": 0.672, "step": 6352 }, { "epoch": 1.0370597118484959, "grad_norm": 3.133999824523926, "learning_rate": 1.941681682719342e-05, "loss": 0.7998, "step": 6353 }, { "epoch": 1.0372229704910003, "grad_norm": 3.1088004112243652, "learning_rate": 1.9416601060572728e-05, "loss": 0.7313, "step": 6354 }, { "epoch": 1.0373862291335048, "grad_norm": 2.6001763343811035, "learning_rate": 1.9416385255243944e-05, "loss": 0.6816, "step": 6355 }, { "epoch": 1.0375494877760092, "grad_norm": 3.0914106369018555, "learning_rate": 1.9416169411207957e-05, "loss": 0.8054, "step": 6356 }, { "epoch": 1.0377127464185136, "grad_norm": 2.3817529678344727, "learning_rate": 1.9415953528465655e-05, "loss": 0.6694, "step": 6357 }, { "epoch": 1.0378760050610178, "grad_norm": 2.9112868309020996, "learning_rate": 1.941573760701792e-05, "loss": 0.7811, "step": 6358 }, { "epoch": 1.0380392637035223, "grad_norm": 2.586466073989868, "learning_rate": 1.9415521646865646e-05, "loss": 0.6805, "step": 6359 }, { "epoch": 1.0382025223460267, "grad_norm": 2.870588541030884, "learning_rate": 1.9415305648009716e-05, "loss": 0.734, "step": 6360 }, { "epoch": 1.0383657809885312, "grad_norm": 3.0551838874816895, "learning_rate": 1.941508961045102e-05, "loss": 0.9626, "step": 6361 }, { "epoch": 1.0385290396310354, "grad_norm": 2.605088710784912, "learning_rate": 1.9414873534190445e-05, "loss": 0.6896, "step": 6362 }, { "epoch": 1.0386922982735398, "grad_norm": 2.698319673538208, "learning_rate": 1.9414657419228882e-05, "loss": 0.687, "step": 6363 }, { "epoch": 1.0388555569160443, "grad_norm": 2.124525547027588, "learning_rate": 1.9414441265567215e-05, "loss": 0.523, "step": 6364 }, { "epoch": 1.0390188155585487, "grad_norm": 2.9077396392822266, "learning_rate": 1.9414225073206337e-05, "loss": 0.802, "step": 6365 }, { "epoch": 1.0391820742010531, "grad_norm": 2.603912353515625, "learning_rate": 1.9414008842147135e-05, "loss": 0.6816, "step": 6366 }, { "epoch": 1.0393453328435573, "grad_norm": 5.040230751037598, "learning_rate": 1.941379257239049e-05, "loss": 0.7721, "step": 6367 }, { "epoch": 1.0395085914860618, "grad_norm": 2.6408255100250244, "learning_rate": 1.9413576263937307e-05, "loss": 0.7571, "step": 6368 }, { "epoch": 1.0396718501285662, "grad_norm": 2.3332061767578125, "learning_rate": 1.9413359916788463e-05, "loss": 0.6676, "step": 6369 }, { "epoch": 1.0398351087710707, "grad_norm": 3.2793667316436768, "learning_rate": 1.9413143530944852e-05, "loss": 0.8584, "step": 6370 }, { "epoch": 1.0399983674135749, "grad_norm": 2.745293617248535, "learning_rate": 1.941292710640736e-05, "loss": 0.7892, "step": 6371 }, { "epoch": 1.0401616260560793, "grad_norm": 2.826108694076538, "learning_rate": 1.9412710643176883e-05, "loss": 0.7886, "step": 6372 }, { "epoch": 1.0403248846985838, "grad_norm": 2.806939125061035, "learning_rate": 1.9412494141254302e-05, "loss": 0.7864, "step": 6373 }, { "epoch": 1.0404881433410882, "grad_norm": 2.4622087478637695, "learning_rate": 1.9412277600640513e-05, "loss": 0.6525, "step": 6374 }, { "epoch": 1.0406514019835924, "grad_norm": 2.6856160163879395, "learning_rate": 1.9412061021336404e-05, "loss": 0.6954, "step": 6375 }, { "epoch": 1.0408146606260968, "grad_norm": 2.836193799972534, "learning_rate": 1.941184440334287e-05, "loss": 0.8257, "step": 6376 }, { "epoch": 1.0409779192686013, "grad_norm": 2.218841314315796, "learning_rate": 1.9411627746660796e-05, "loss": 0.6168, "step": 6377 }, { "epoch": 1.0411411779111057, "grad_norm": 2.5852417945861816, "learning_rate": 1.9411411051291072e-05, "loss": 0.614, "step": 6378 }, { "epoch": 1.0413044365536102, "grad_norm": 2.4192469120025635, "learning_rate": 1.9411194317234593e-05, "loss": 0.6514, "step": 6379 }, { "epoch": 1.0414676951961144, "grad_norm": 3.2027337551116943, "learning_rate": 1.9410977544492248e-05, "loss": 0.9607, "step": 6380 }, { "epoch": 1.0416309538386188, "grad_norm": 2.895900249481201, "learning_rate": 1.9410760733064923e-05, "loss": 0.6649, "step": 6381 }, { "epoch": 1.0417942124811232, "grad_norm": 2.5838370323181152, "learning_rate": 1.9410543882953518e-05, "loss": 0.7044, "step": 6382 }, { "epoch": 1.0419574711236277, "grad_norm": 3.6960391998291016, "learning_rate": 1.9410326994158922e-05, "loss": 0.8938, "step": 6383 }, { "epoch": 1.042120729766132, "grad_norm": 2.937156915664673, "learning_rate": 1.941011006668202e-05, "loss": 0.6842, "step": 6384 }, { "epoch": 1.0422839884086363, "grad_norm": 2.9068658351898193, "learning_rate": 1.9409893100523715e-05, "loss": 0.7634, "step": 6385 }, { "epoch": 1.0424472470511408, "grad_norm": 2.6821224689483643, "learning_rate": 1.940967609568489e-05, "loss": 0.6694, "step": 6386 }, { "epoch": 1.0426105056936452, "grad_norm": 3.338974952697754, "learning_rate": 1.9409459052166437e-05, "loss": 0.8662, "step": 6387 }, { "epoch": 1.0427737643361494, "grad_norm": 2.9919612407684326, "learning_rate": 1.9409241969969255e-05, "loss": 0.673, "step": 6388 }, { "epoch": 1.0429370229786539, "grad_norm": 3.2973580360412598, "learning_rate": 1.9409024849094228e-05, "loss": 0.7717, "step": 6389 }, { "epoch": 1.0431002816211583, "grad_norm": 2.8091676235198975, "learning_rate": 1.9408807689542257e-05, "loss": 0.7019, "step": 6390 }, { "epoch": 1.0432635402636627, "grad_norm": 3.2140278816223145, "learning_rate": 1.940859049131423e-05, "loss": 0.86, "step": 6391 }, { "epoch": 1.0434267989061672, "grad_norm": 3.0547704696655273, "learning_rate": 1.9408373254411036e-05, "loss": 0.6034, "step": 6392 }, { "epoch": 1.0435900575486714, "grad_norm": 2.72973895072937, "learning_rate": 1.9408155978833573e-05, "loss": 0.704, "step": 6393 }, { "epoch": 1.0437533161911758, "grad_norm": 3.1679515838623047, "learning_rate": 1.9407938664582736e-05, "loss": 0.7871, "step": 6394 }, { "epoch": 1.0439165748336803, "grad_norm": 2.828636646270752, "learning_rate": 1.9407721311659415e-05, "loss": 0.781, "step": 6395 }, { "epoch": 1.0440798334761847, "grad_norm": 2.983654260635376, "learning_rate": 1.9407503920064502e-05, "loss": 0.8089, "step": 6396 }, { "epoch": 1.044243092118689, "grad_norm": 2.5285592079162598, "learning_rate": 1.940728648979889e-05, "loss": 0.6659, "step": 6397 }, { "epoch": 1.0444063507611934, "grad_norm": 3.242260217666626, "learning_rate": 1.940706902086348e-05, "loss": 0.7233, "step": 6398 }, { "epoch": 1.0445696094036978, "grad_norm": 2.6088051795959473, "learning_rate": 1.9406851513259162e-05, "loss": 0.7158, "step": 6399 }, { "epoch": 1.0447328680462022, "grad_norm": 3.17697811126709, "learning_rate": 1.9406633966986828e-05, "loss": 0.8809, "step": 6400 }, { "epoch": 1.0448961266887067, "grad_norm": 2.9861905574798584, "learning_rate": 1.9406416382047372e-05, "loss": 0.8296, "step": 6401 }, { "epoch": 1.045059385331211, "grad_norm": 3.339890480041504, "learning_rate": 1.9406198758441692e-05, "loss": 0.7768, "step": 6402 }, { "epoch": 1.0452226439737153, "grad_norm": 2.588907480239868, "learning_rate": 1.940598109617068e-05, "loss": 0.7361, "step": 6403 }, { "epoch": 1.0453859026162198, "grad_norm": 3.5497069358825684, "learning_rate": 1.940576339523523e-05, "loss": 0.8795, "step": 6404 }, { "epoch": 1.0455491612587242, "grad_norm": 3.164293050765991, "learning_rate": 1.940554565563624e-05, "loss": 0.9133, "step": 6405 }, { "epoch": 1.0457124199012284, "grad_norm": 2.612067222595215, "learning_rate": 1.9405327877374602e-05, "loss": 0.8238, "step": 6406 }, { "epoch": 1.0458756785437329, "grad_norm": 2.8516926765441895, "learning_rate": 1.9405110060451212e-05, "loss": 0.8653, "step": 6407 }, { "epoch": 1.0460389371862373, "grad_norm": 2.815227746963501, "learning_rate": 1.940489220486697e-05, "loss": 0.8384, "step": 6408 }, { "epoch": 1.0462021958287417, "grad_norm": 2.1935079097747803, "learning_rate": 1.9404674310622763e-05, "loss": 0.5557, "step": 6409 }, { "epoch": 1.0463654544712462, "grad_norm": 2.6309821605682373, "learning_rate": 1.9404456377719493e-05, "loss": 0.6824, "step": 6410 }, { "epoch": 1.0465287131137504, "grad_norm": 2.758307695388794, "learning_rate": 1.9404238406158055e-05, "loss": 0.6501, "step": 6411 }, { "epoch": 1.0466919717562548, "grad_norm": 3.2099313735961914, "learning_rate": 1.940402039593934e-05, "loss": 0.7443, "step": 6412 }, { "epoch": 1.0468552303987593, "grad_norm": 3.0858747959136963, "learning_rate": 1.940380234706425e-05, "loss": 0.773, "step": 6413 }, { "epoch": 1.0470184890412637, "grad_norm": 3.8967244625091553, "learning_rate": 1.940358425953368e-05, "loss": 0.9361, "step": 6414 }, { "epoch": 1.047181747683768, "grad_norm": 3.13344669342041, "learning_rate": 1.9403366133348527e-05, "loss": 0.8036, "step": 6415 }, { "epoch": 1.0473450063262724, "grad_norm": 2.609896183013916, "learning_rate": 1.9403147968509682e-05, "loss": 0.7239, "step": 6416 }, { "epoch": 1.0475082649687768, "grad_norm": 2.725155830383301, "learning_rate": 1.9402929765018053e-05, "loss": 0.7931, "step": 6417 }, { "epoch": 1.0476715236112812, "grad_norm": 3.13685941696167, "learning_rate": 1.9402711522874526e-05, "loss": 0.8635, "step": 6418 }, { "epoch": 1.0478347822537855, "grad_norm": 2.9032747745513916, "learning_rate": 1.9402493242080004e-05, "loss": 0.7776, "step": 6419 }, { "epoch": 1.04799804089629, "grad_norm": 2.7277982234954834, "learning_rate": 1.940227492263538e-05, "loss": 0.6954, "step": 6420 }, { "epoch": 1.0481612995387943, "grad_norm": 2.707754611968994, "learning_rate": 1.9402056564541555e-05, "loss": 0.6944, "step": 6421 }, { "epoch": 1.0483245581812988, "grad_norm": 3.2348806858062744, "learning_rate": 1.9401838167799428e-05, "loss": 0.6979, "step": 6422 }, { "epoch": 1.0484878168238032, "grad_norm": 2.607245683670044, "learning_rate": 1.940161973240989e-05, "loss": 0.5603, "step": 6423 }, { "epoch": 1.0486510754663074, "grad_norm": 2.9992170333862305, "learning_rate": 1.9401401258373848e-05, "loss": 0.7151, "step": 6424 }, { "epoch": 1.0488143341088119, "grad_norm": 3.5459415912628174, "learning_rate": 1.940118274569219e-05, "loss": 0.9561, "step": 6425 }, { "epoch": 1.0489775927513163, "grad_norm": 2.696218967437744, "learning_rate": 1.9400964194365826e-05, "loss": 0.7406, "step": 6426 }, { "epoch": 1.0491408513938207, "grad_norm": 3.129322052001953, "learning_rate": 1.9400745604395642e-05, "loss": 0.921, "step": 6427 }, { "epoch": 1.049304110036325, "grad_norm": 4.768367767333984, "learning_rate": 1.9400526975782544e-05, "loss": 0.8009, "step": 6428 }, { "epoch": 1.0494673686788294, "grad_norm": 2.8905019760131836, "learning_rate": 1.9400308308527434e-05, "loss": 0.7769, "step": 6429 }, { "epoch": 1.0496306273213338, "grad_norm": 2.6017343997955322, "learning_rate": 1.94000896026312e-05, "loss": 0.6039, "step": 6430 }, { "epoch": 1.0497938859638383, "grad_norm": 2.9733543395996094, "learning_rate": 1.9399870858094752e-05, "loss": 0.8004, "step": 6431 }, { "epoch": 1.0499571446063425, "grad_norm": 3.0921249389648438, "learning_rate": 1.939965207491898e-05, "loss": 0.8591, "step": 6432 }, { "epoch": 1.050120403248847, "grad_norm": 3.0158002376556396, "learning_rate": 1.9399433253104788e-05, "loss": 0.8077, "step": 6433 }, { "epoch": 1.0502836618913514, "grad_norm": 3.1590914726257324, "learning_rate": 1.9399214392653077e-05, "loss": 0.8792, "step": 6434 }, { "epoch": 1.0504469205338558, "grad_norm": 2.8895950317382812, "learning_rate": 1.9398995493564745e-05, "loss": 0.7166, "step": 6435 }, { "epoch": 1.0506101791763602, "grad_norm": 2.4258158206939697, "learning_rate": 1.939877655584069e-05, "loss": 0.7097, "step": 6436 }, { "epoch": 1.0507734378188645, "grad_norm": 2.398707389831543, "learning_rate": 1.9398557579481812e-05, "loss": 0.647, "step": 6437 }, { "epoch": 1.050936696461369, "grad_norm": 2.536439895629883, "learning_rate": 1.9398338564489015e-05, "loss": 0.6808, "step": 6438 }, { "epoch": 1.0510999551038733, "grad_norm": 2.601456880569458, "learning_rate": 1.9398119510863197e-05, "loss": 0.6505, "step": 6439 }, { "epoch": 1.0512632137463778, "grad_norm": 2.6422629356384277, "learning_rate": 1.9397900418605257e-05, "loss": 0.6183, "step": 6440 }, { "epoch": 1.051426472388882, "grad_norm": 2.6638689041137695, "learning_rate": 1.93976812877161e-05, "loss": 0.6855, "step": 6441 }, { "epoch": 1.0515897310313864, "grad_norm": 3.2069714069366455, "learning_rate": 1.9397462118196622e-05, "loss": 0.6751, "step": 6442 }, { "epoch": 1.0517529896738909, "grad_norm": 2.6123294830322266, "learning_rate": 1.9397242910047726e-05, "loss": 0.7178, "step": 6443 }, { "epoch": 1.0519162483163953, "grad_norm": 2.648123264312744, "learning_rate": 1.9397023663270312e-05, "loss": 0.7087, "step": 6444 }, { "epoch": 1.0520795069588997, "grad_norm": 2.9202332496643066, "learning_rate": 1.9396804377865282e-05, "loss": 0.8883, "step": 6445 }, { "epoch": 1.052242765601404, "grad_norm": 2.68113374710083, "learning_rate": 1.9396585053833537e-05, "loss": 0.6225, "step": 6446 }, { "epoch": 1.0524060242439084, "grad_norm": 2.793936252593994, "learning_rate": 1.939636569117598e-05, "loss": 0.7648, "step": 6447 }, { "epoch": 1.0525692828864128, "grad_norm": 2.7955732345581055, "learning_rate": 1.939614628989351e-05, "loss": 0.7465, "step": 6448 }, { "epoch": 1.0527325415289173, "grad_norm": 2.713823080062866, "learning_rate": 1.9395926849987032e-05, "loss": 0.7125, "step": 6449 }, { "epoch": 1.0528958001714215, "grad_norm": 2.657031536102295, "learning_rate": 1.9395707371457443e-05, "loss": 0.7124, "step": 6450 }, { "epoch": 1.053059058813926, "grad_norm": 2.5613675117492676, "learning_rate": 1.9395487854305655e-05, "loss": 0.5957, "step": 6451 }, { "epoch": 1.0532223174564304, "grad_norm": 2.4175519943237305, "learning_rate": 1.939526829853256e-05, "loss": 0.6034, "step": 6452 }, { "epoch": 1.0533855760989348, "grad_norm": 2.615114212036133, "learning_rate": 1.9395048704139063e-05, "loss": 0.658, "step": 6453 }, { "epoch": 1.0535488347414392, "grad_norm": 2.89875864982605, "learning_rate": 1.9394829071126074e-05, "loss": 0.7425, "step": 6454 }, { "epoch": 1.0537120933839434, "grad_norm": 2.6599721908569336, "learning_rate": 1.9394609399494487e-05, "loss": 0.7776, "step": 6455 }, { "epoch": 1.0538753520264479, "grad_norm": 2.9197676181793213, "learning_rate": 1.939438968924521e-05, "loss": 0.7464, "step": 6456 }, { "epoch": 1.0540386106689523, "grad_norm": 3.413301706314087, "learning_rate": 1.939416994037914e-05, "loss": 0.8276, "step": 6457 }, { "epoch": 1.0542018693114568, "grad_norm": 2.626971483230591, "learning_rate": 1.9393950152897186e-05, "loss": 0.7167, "step": 6458 }, { "epoch": 1.054365127953961, "grad_norm": 2.547001361846924, "learning_rate": 1.9393730326800252e-05, "loss": 0.5881, "step": 6459 }, { "epoch": 1.0545283865964654, "grad_norm": 3.225968837738037, "learning_rate": 1.9393510462089237e-05, "loss": 0.864, "step": 6460 }, { "epoch": 1.0546916452389699, "grad_norm": 2.9256205558776855, "learning_rate": 1.939329055876505e-05, "loss": 0.6834, "step": 6461 }, { "epoch": 1.0548549038814743, "grad_norm": 2.596557855606079, "learning_rate": 1.939307061682859e-05, "loss": 0.6355, "step": 6462 }, { "epoch": 1.0550181625239785, "grad_norm": 2.841796636581421, "learning_rate": 1.9392850636280764e-05, "loss": 0.6531, "step": 6463 }, { "epoch": 1.055181421166483, "grad_norm": 2.630561113357544, "learning_rate": 1.939263061712248e-05, "loss": 0.6439, "step": 6464 }, { "epoch": 1.0553446798089874, "grad_norm": 2.4486000537872314, "learning_rate": 1.939241055935463e-05, "loss": 0.6044, "step": 6465 }, { "epoch": 1.0555079384514918, "grad_norm": 3.0323634147644043, "learning_rate": 1.9392190462978132e-05, "loss": 0.7865, "step": 6466 }, { "epoch": 1.0556711970939963, "grad_norm": 3.0437614917755127, "learning_rate": 1.9391970327993886e-05, "loss": 0.6598, "step": 6467 }, { "epoch": 1.0558344557365005, "grad_norm": 2.831400156021118, "learning_rate": 1.9391750154402796e-05, "loss": 0.6671, "step": 6468 }, { "epoch": 1.055997714379005, "grad_norm": 2.786672830581665, "learning_rate": 1.9391529942205764e-05, "loss": 0.6929, "step": 6469 }, { "epoch": 1.0561609730215094, "grad_norm": 3.280756950378418, "learning_rate": 1.9391309691403704e-05, "loss": 0.775, "step": 6470 }, { "epoch": 1.0563242316640138, "grad_norm": 2.4252841472625732, "learning_rate": 1.939108940199751e-05, "loss": 0.6874, "step": 6471 }, { "epoch": 1.056487490306518, "grad_norm": 3.1766746044158936, "learning_rate": 1.9390869073988097e-05, "loss": 0.7971, "step": 6472 }, { "epoch": 1.0566507489490224, "grad_norm": 2.7985446453094482, "learning_rate": 1.9390648707376366e-05, "loss": 0.7038, "step": 6473 }, { "epoch": 1.0568140075915269, "grad_norm": 2.8468658924102783, "learning_rate": 1.9390428302163225e-05, "loss": 0.7274, "step": 6474 }, { "epoch": 1.0569772662340313, "grad_norm": 2.6940765380859375, "learning_rate": 1.939020785834958e-05, "loss": 0.7597, "step": 6475 }, { "epoch": 1.0571405248765358, "grad_norm": 3.5321927070617676, "learning_rate": 1.9389987375936333e-05, "loss": 0.9614, "step": 6476 }, { "epoch": 1.05730378351904, "grad_norm": 3.1566684246063232, "learning_rate": 1.9389766854924396e-05, "loss": 0.8774, "step": 6477 }, { "epoch": 1.0574670421615444, "grad_norm": 2.675985097885132, "learning_rate": 1.9389546295314673e-05, "loss": 0.6684, "step": 6478 }, { "epoch": 1.0576303008040489, "grad_norm": 2.959427833557129, "learning_rate": 1.938932569710807e-05, "loss": 0.7122, "step": 6479 }, { "epoch": 1.0577935594465533, "grad_norm": 2.9132721424102783, "learning_rate": 1.9389105060305494e-05, "loss": 0.779, "step": 6480 }, { "epoch": 1.0579568180890575, "grad_norm": 3.1552252769470215, "learning_rate": 1.938888438490785e-05, "loss": 0.8237, "step": 6481 }, { "epoch": 1.058120076731562, "grad_norm": 3.1032721996307373, "learning_rate": 1.9388663670916053e-05, "loss": 0.789, "step": 6482 }, { "epoch": 1.0582833353740664, "grad_norm": 2.8475708961486816, "learning_rate": 1.9388442918331e-05, "loss": 0.7474, "step": 6483 }, { "epoch": 1.0584465940165708, "grad_norm": 2.583258867263794, "learning_rate": 1.9388222127153607e-05, "loss": 0.6921, "step": 6484 }, { "epoch": 1.058609852659075, "grad_norm": 2.823122024536133, "learning_rate": 1.9388001297384775e-05, "loss": 0.6681, "step": 6485 }, { "epoch": 1.0587731113015795, "grad_norm": 3.0564351081848145, "learning_rate": 1.9387780429025416e-05, "loss": 0.7722, "step": 6486 }, { "epoch": 1.058936369944084, "grad_norm": 2.426116943359375, "learning_rate": 1.9387559522076436e-05, "loss": 0.6528, "step": 6487 }, { "epoch": 1.0590996285865883, "grad_norm": 2.662154197692871, "learning_rate": 1.9387338576538743e-05, "loss": 0.7849, "step": 6488 }, { "epoch": 1.0592628872290928, "grad_norm": 2.550926685333252, "learning_rate": 1.9387117592413247e-05, "loss": 0.6366, "step": 6489 }, { "epoch": 1.059426145871597, "grad_norm": 2.1710076332092285, "learning_rate": 1.9386896569700853e-05, "loss": 0.5352, "step": 6490 }, { "epoch": 1.0595894045141014, "grad_norm": 3.041471481323242, "learning_rate": 1.9386675508402474e-05, "loss": 0.5813, "step": 6491 }, { "epoch": 1.0597526631566059, "grad_norm": 2.7493417263031006, "learning_rate": 1.9386454408519012e-05, "loss": 0.8455, "step": 6492 }, { "epoch": 1.0599159217991103, "grad_norm": 2.5078771114349365, "learning_rate": 1.9386233270051384e-05, "loss": 0.5746, "step": 6493 }, { "epoch": 1.0600791804416145, "grad_norm": 3.0383858680725098, "learning_rate": 1.9386012093000494e-05, "loss": 0.6947, "step": 6494 }, { "epoch": 1.060242439084119, "grad_norm": 3.021545648574829, "learning_rate": 1.9385790877367253e-05, "loss": 0.8427, "step": 6495 }, { "epoch": 1.0604056977266234, "grad_norm": 2.8326878547668457, "learning_rate": 1.9385569623152573e-05, "loss": 0.7031, "step": 6496 }, { "epoch": 1.0605689563691278, "grad_norm": 3.209537982940674, "learning_rate": 1.9385348330357354e-05, "loss": 0.8796, "step": 6497 }, { "epoch": 1.0607322150116323, "grad_norm": 3.261169195175171, "learning_rate": 1.9385126998982512e-05, "loss": 0.8135, "step": 6498 }, { "epoch": 1.0608954736541365, "grad_norm": 2.7986323833465576, "learning_rate": 1.9384905629028962e-05, "loss": 0.6915, "step": 6499 }, { "epoch": 1.061058732296641, "grad_norm": 2.953045129776001, "learning_rate": 1.9384684220497605e-05, "loss": 0.7034, "step": 6500 }, { "epoch": 1.0612219909391454, "grad_norm": 2.8868350982666016, "learning_rate": 1.9384462773389355e-05, "loss": 0.6381, "step": 6501 }, { "epoch": 1.0613852495816498, "grad_norm": 2.9818103313446045, "learning_rate": 1.9384241287705127e-05, "loss": 0.6878, "step": 6502 }, { "epoch": 1.061548508224154, "grad_norm": 3.0382659435272217, "learning_rate": 1.938401976344582e-05, "loss": 0.7797, "step": 6503 }, { "epoch": 1.0617117668666585, "grad_norm": 3.153297185897827, "learning_rate": 1.9383798200612354e-05, "loss": 0.726, "step": 6504 }, { "epoch": 1.061875025509163, "grad_norm": 2.8519697189331055, "learning_rate": 1.9383576599205635e-05, "loss": 0.8075, "step": 6505 }, { "epoch": 1.0620382841516673, "grad_norm": 2.9032983779907227, "learning_rate": 1.9383354959226577e-05, "loss": 0.725, "step": 6506 }, { "epoch": 1.0622015427941716, "grad_norm": 2.6035208702087402, "learning_rate": 1.938313328067609e-05, "loss": 0.7883, "step": 6507 }, { "epoch": 1.062364801436676, "grad_norm": 3.001574993133545, "learning_rate": 1.9382911563555086e-05, "loss": 0.8165, "step": 6508 }, { "epoch": 1.0625280600791804, "grad_norm": 2.702911376953125, "learning_rate": 1.9382689807864473e-05, "loss": 0.734, "step": 6509 }, { "epoch": 1.0626913187216849, "grad_norm": 2.5971269607543945, "learning_rate": 1.9382468013605167e-05, "loss": 0.5941, "step": 6510 }, { "epoch": 1.0628545773641893, "grad_norm": 3.1474757194519043, "learning_rate": 1.9382246180778076e-05, "loss": 0.8185, "step": 6511 }, { "epoch": 1.0630178360066935, "grad_norm": 2.9285531044006348, "learning_rate": 1.9382024309384114e-05, "loss": 0.7142, "step": 6512 }, { "epoch": 1.063181094649198, "grad_norm": 2.9656944274902344, "learning_rate": 1.9381802399424194e-05, "loss": 0.9006, "step": 6513 }, { "epoch": 1.0633443532917024, "grad_norm": 2.7786993980407715, "learning_rate": 1.9381580450899226e-05, "loss": 0.8349, "step": 6514 }, { "epoch": 1.0635076119342068, "grad_norm": 3.0017073154449463, "learning_rate": 1.9381358463810125e-05, "loss": 0.7401, "step": 6515 }, { "epoch": 1.063670870576711, "grad_norm": 3.2755649089813232, "learning_rate": 1.93811364381578e-05, "loss": 0.8833, "step": 6516 }, { "epoch": 1.0638341292192155, "grad_norm": 2.690579414367676, "learning_rate": 1.9380914373943163e-05, "loss": 0.6276, "step": 6517 }, { "epoch": 1.06399738786172, "grad_norm": 2.852717161178589, "learning_rate": 1.9380692271167133e-05, "loss": 0.8804, "step": 6518 }, { "epoch": 1.0641606465042244, "grad_norm": 3.1643810272216797, "learning_rate": 1.9380470129830618e-05, "loss": 0.8772, "step": 6519 }, { "epoch": 1.0643239051467286, "grad_norm": 3.4099395275115967, "learning_rate": 1.9380247949934535e-05, "loss": 0.8908, "step": 6520 }, { "epoch": 1.064487163789233, "grad_norm": 2.489696979522705, "learning_rate": 1.938002573147979e-05, "loss": 0.7529, "step": 6521 }, { "epoch": 1.0646504224317375, "grad_norm": 2.5413455963134766, "learning_rate": 1.9379803474467303e-05, "loss": 0.7157, "step": 6522 }, { "epoch": 1.064813681074242, "grad_norm": 2.739827871322632, "learning_rate": 1.9379581178897985e-05, "loss": 0.726, "step": 6523 }, { "epoch": 1.0649769397167463, "grad_norm": 2.498171806335449, "learning_rate": 1.937935884477275e-05, "loss": 0.6568, "step": 6524 }, { "epoch": 1.0651401983592506, "grad_norm": 2.4744017124176025, "learning_rate": 1.9379136472092516e-05, "loss": 0.6895, "step": 6525 }, { "epoch": 1.065303457001755, "grad_norm": 2.6385440826416016, "learning_rate": 1.937891406085819e-05, "loss": 0.7701, "step": 6526 }, { "epoch": 1.0654667156442594, "grad_norm": 3.0421183109283447, "learning_rate": 1.937869161107069e-05, "loss": 0.7963, "step": 6527 }, { "epoch": 1.0656299742867639, "grad_norm": 2.5307154655456543, "learning_rate": 1.9378469122730933e-05, "loss": 0.7417, "step": 6528 }, { "epoch": 1.065793232929268, "grad_norm": 2.615708827972412, "learning_rate": 1.9378246595839827e-05, "loss": 0.76, "step": 6529 }, { "epoch": 1.0659564915717725, "grad_norm": 2.768946409225464, "learning_rate": 1.9378024030398293e-05, "loss": 0.7065, "step": 6530 }, { "epoch": 1.066119750214277, "grad_norm": 3.1468136310577393, "learning_rate": 1.937780142640724e-05, "loss": 1.0208, "step": 6531 }, { "epoch": 1.0662830088567814, "grad_norm": 2.59779691696167, "learning_rate": 1.937757878386759e-05, "loss": 0.6879, "step": 6532 }, { "epoch": 1.0664462674992858, "grad_norm": 2.8880012035369873, "learning_rate": 1.937735610278025e-05, "loss": 0.7552, "step": 6533 }, { "epoch": 1.06660952614179, "grad_norm": 2.5266644954681396, "learning_rate": 1.9377133383146143e-05, "loss": 0.5643, "step": 6534 }, { "epoch": 1.0667727847842945, "grad_norm": 2.8954901695251465, "learning_rate": 1.9376910624966182e-05, "loss": 0.7801, "step": 6535 }, { "epoch": 1.066936043426799, "grad_norm": 2.8386118412017822, "learning_rate": 1.9376687828241282e-05, "loss": 0.7725, "step": 6536 }, { "epoch": 1.0670993020693034, "grad_norm": 3.0982491970062256, "learning_rate": 1.9376464992972358e-05, "loss": 0.6829, "step": 6537 }, { "epoch": 1.0672625607118076, "grad_norm": 2.864013910293579, "learning_rate": 1.9376242119160328e-05, "loss": 0.7575, "step": 6538 }, { "epoch": 1.067425819354312, "grad_norm": 2.5705466270446777, "learning_rate": 1.9376019206806104e-05, "loss": 0.6541, "step": 6539 }, { "epoch": 1.0675890779968165, "grad_norm": 2.8204598426818848, "learning_rate": 1.937579625591061e-05, "loss": 0.8938, "step": 6540 }, { "epoch": 1.067752336639321, "grad_norm": 2.734575033187866, "learning_rate": 1.9375573266474753e-05, "loss": 0.7396, "step": 6541 }, { "epoch": 1.0679155952818253, "grad_norm": 2.6080875396728516, "learning_rate": 1.9375350238499457e-05, "loss": 0.7835, "step": 6542 }, { "epoch": 1.0680788539243296, "grad_norm": 3.0738718509674072, "learning_rate": 1.9375127171985636e-05, "loss": 0.7901, "step": 6543 }, { "epoch": 1.068242112566834, "grad_norm": 2.8777167797088623, "learning_rate": 1.9374904066934208e-05, "loss": 0.6777, "step": 6544 }, { "epoch": 1.0684053712093384, "grad_norm": 2.7813143730163574, "learning_rate": 1.937468092334609e-05, "loss": 0.6723, "step": 6545 }, { "epoch": 1.0685686298518429, "grad_norm": 2.5282161235809326, "learning_rate": 1.9374457741222196e-05, "loss": 0.7582, "step": 6546 }, { "epoch": 1.068731888494347, "grad_norm": 2.631681442260742, "learning_rate": 1.937423452056345e-05, "loss": 0.6597, "step": 6547 }, { "epoch": 1.0688951471368515, "grad_norm": 3.2907259464263916, "learning_rate": 1.937401126137076e-05, "loss": 0.8163, "step": 6548 }, { "epoch": 1.069058405779356, "grad_norm": 2.761472702026367, "learning_rate": 1.9373787963645052e-05, "loss": 0.6562, "step": 6549 }, { "epoch": 1.0692216644218604, "grad_norm": 2.411534547805786, "learning_rate": 1.9373564627387243e-05, "loss": 0.6121, "step": 6550 }, { "epoch": 1.0693849230643648, "grad_norm": 2.726848602294922, "learning_rate": 1.9373341252598244e-05, "loss": 0.6511, "step": 6551 }, { "epoch": 1.069548181706869, "grad_norm": 2.385592222213745, "learning_rate": 1.937311783927898e-05, "loss": 0.6261, "step": 6552 }, { "epoch": 1.0697114403493735, "grad_norm": 3.085615396499634, "learning_rate": 1.9372894387430372e-05, "loss": 0.8943, "step": 6553 }, { "epoch": 1.069874698991878, "grad_norm": 2.5389628410339355, "learning_rate": 1.9372670897053332e-05, "loss": 0.6477, "step": 6554 }, { "epoch": 1.0700379576343824, "grad_norm": 3.033397674560547, "learning_rate": 1.9372447368148782e-05, "loss": 0.7868, "step": 6555 }, { "epoch": 1.0702012162768866, "grad_norm": 2.9703495502471924, "learning_rate": 1.9372223800717637e-05, "loss": 0.7131, "step": 6556 }, { "epoch": 1.070364474919391, "grad_norm": 2.5804030895233154, "learning_rate": 1.937200019476082e-05, "loss": 0.689, "step": 6557 }, { "epoch": 1.0705277335618955, "grad_norm": 2.761305332183838, "learning_rate": 1.937177655027925e-05, "loss": 0.7137, "step": 6558 }, { "epoch": 1.0706909922044, "grad_norm": 2.9518065452575684, "learning_rate": 1.9371552867273844e-05, "loss": 0.8179, "step": 6559 }, { "epoch": 1.070854250846904, "grad_norm": 2.2935516834259033, "learning_rate": 1.937132914574552e-05, "loss": 0.574, "step": 6560 }, { "epoch": 1.0710175094894085, "grad_norm": 3.2357401847839355, "learning_rate": 1.9371105385695208e-05, "loss": 0.7814, "step": 6561 }, { "epoch": 1.071180768131913, "grad_norm": 3.1823763847351074, "learning_rate": 1.9370881587123817e-05, "loss": 0.8593, "step": 6562 }, { "epoch": 1.0713440267744174, "grad_norm": 2.5032379627227783, "learning_rate": 1.937065775003227e-05, "loss": 0.5816, "step": 6563 }, { "epoch": 1.0715072854169219, "grad_norm": 2.703444242477417, "learning_rate": 1.9370433874421487e-05, "loss": 0.6071, "step": 6564 }, { "epoch": 1.071670544059426, "grad_norm": 3.0096724033355713, "learning_rate": 1.9370209960292387e-05, "loss": 0.7966, "step": 6565 }, { "epoch": 1.0718338027019305, "grad_norm": 2.8512215614318848, "learning_rate": 1.9369986007645893e-05, "loss": 0.6959, "step": 6566 }, { "epoch": 1.071997061344435, "grad_norm": 2.8070576190948486, "learning_rate": 1.9369762016482924e-05, "loss": 0.6697, "step": 6567 }, { "epoch": 1.0721603199869394, "grad_norm": 2.9621224403381348, "learning_rate": 1.9369537986804404e-05, "loss": 0.808, "step": 6568 }, { "epoch": 1.0723235786294436, "grad_norm": 2.79364013671875, "learning_rate": 1.936931391861125e-05, "loss": 0.693, "step": 6569 }, { "epoch": 1.072486837271948, "grad_norm": 2.7395522594451904, "learning_rate": 1.9369089811904384e-05, "loss": 0.831, "step": 6570 }, { "epoch": 1.0726500959144525, "grad_norm": 3.263563394546509, "learning_rate": 1.9368865666684726e-05, "loss": 0.765, "step": 6571 }, { "epoch": 1.072813354556957, "grad_norm": 2.7465431690216064, "learning_rate": 1.93686414829532e-05, "loss": 0.7089, "step": 6572 }, { "epoch": 1.0729766131994611, "grad_norm": 2.5364174842834473, "learning_rate": 1.9368417260710726e-05, "loss": 0.6433, "step": 6573 }, { "epoch": 1.0731398718419656, "grad_norm": 2.722513437271118, "learning_rate": 1.9368192999958227e-05, "loss": 0.782, "step": 6574 }, { "epoch": 1.07330313048447, "grad_norm": 2.540560483932495, "learning_rate": 1.9367968700696626e-05, "loss": 0.7183, "step": 6575 }, { "epoch": 1.0734663891269745, "grad_norm": 2.816556453704834, "learning_rate": 1.9367744362926836e-05, "loss": 0.6687, "step": 6576 }, { "epoch": 1.073629647769479, "grad_norm": 3.2089908123016357, "learning_rate": 1.9367519986649794e-05, "loss": 0.9177, "step": 6577 }, { "epoch": 1.073792906411983, "grad_norm": 2.8883163928985596, "learning_rate": 1.936729557186641e-05, "loss": 0.6582, "step": 6578 }, { "epoch": 1.0739561650544875, "grad_norm": 2.9128143787384033, "learning_rate": 1.9367071118577613e-05, "loss": 0.721, "step": 6579 }, { "epoch": 1.074119423696992, "grad_norm": 2.829834222793579, "learning_rate": 1.936684662678432e-05, "loss": 0.8085, "step": 6580 }, { "epoch": 1.0742826823394964, "grad_norm": 3.2156028747558594, "learning_rate": 1.9366622096487463e-05, "loss": 0.7432, "step": 6581 }, { "epoch": 1.0744459409820006, "grad_norm": 3.2832748889923096, "learning_rate": 1.9366397527687953e-05, "loss": 0.6094, "step": 6582 }, { "epoch": 1.074609199624505, "grad_norm": 2.3036394119262695, "learning_rate": 1.9366172920386727e-05, "loss": 0.5406, "step": 6583 }, { "epoch": 1.0747724582670095, "grad_norm": 2.816941499710083, "learning_rate": 1.9365948274584695e-05, "loss": 0.7602, "step": 6584 }, { "epoch": 1.074935716909514, "grad_norm": 3.172194480895996, "learning_rate": 1.9365723590282786e-05, "loss": 0.8576, "step": 6585 }, { "epoch": 1.0750989755520184, "grad_norm": 2.5578269958496094, "learning_rate": 1.9365498867481926e-05, "loss": 0.692, "step": 6586 }, { "epoch": 1.0752622341945226, "grad_norm": 2.9155871868133545, "learning_rate": 1.9365274106183035e-05, "loss": 0.7866, "step": 6587 }, { "epoch": 1.075425492837027, "grad_norm": 3.043234348297119, "learning_rate": 1.936504930638704e-05, "loss": 0.7192, "step": 6588 }, { "epoch": 1.0755887514795315, "grad_norm": 2.4573137760162354, "learning_rate": 1.9364824468094863e-05, "loss": 0.5612, "step": 6589 }, { "epoch": 1.075752010122036, "grad_norm": 2.8008804321289062, "learning_rate": 1.9364599591307427e-05, "loss": 0.7481, "step": 6590 }, { "epoch": 1.0759152687645401, "grad_norm": 3.227071523666382, "learning_rate": 1.9364374676025658e-05, "loss": 0.679, "step": 6591 }, { "epoch": 1.0760785274070446, "grad_norm": 2.8510076999664307, "learning_rate": 1.9364149722250483e-05, "loss": 0.7176, "step": 6592 }, { "epoch": 1.076241786049549, "grad_norm": 2.9656219482421875, "learning_rate": 1.9363924729982825e-05, "loss": 0.8134, "step": 6593 }, { "epoch": 1.0764050446920534, "grad_norm": 2.734549045562744, "learning_rate": 1.9363699699223607e-05, "loss": 0.6121, "step": 6594 }, { "epoch": 1.0765683033345579, "grad_norm": 3.074525833129883, "learning_rate": 1.9363474629973752e-05, "loss": 0.7664, "step": 6595 }, { "epoch": 1.076731561977062, "grad_norm": 2.8683948516845703, "learning_rate": 1.9363249522234195e-05, "loss": 0.7841, "step": 6596 }, { "epoch": 1.0768948206195665, "grad_norm": 2.777660608291626, "learning_rate": 1.9363024376005846e-05, "loss": 0.6777, "step": 6597 }, { "epoch": 1.077058079262071, "grad_norm": 2.997394323348999, "learning_rate": 1.9362799191289646e-05, "loss": 0.774, "step": 6598 }, { "epoch": 1.0772213379045754, "grad_norm": 2.3336434364318848, "learning_rate": 1.9362573968086515e-05, "loss": 0.5292, "step": 6599 }, { "epoch": 1.0773845965470796, "grad_norm": 2.5106842517852783, "learning_rate": 1.9362348706397374e-05, "loss": 0.567, "step": 6600 }, { "epoch": 1.077547855189584, "grad_norm": 2.634387969970703, "learning_rate": 1.9362123406223153e-05, "loss": 0.6744, "step": 6601 }, { "epoch": 1.0777111138320885, "grad_norm": 2.8846683502197266, "learning_rate": 1.936189806756478e-05, "loss": 0.8152, "step": 6602 }, { "epoch": 1.077874372474593, "grad_norm": 2.8994529247283936, "learning_rate": 1.9361672690423177e-05, "loss": 0.7348, "step": 6603 }, { "epoch": 1.0780376311170972, "grad_norm": 2.3988139629364014, "learning_rate": 1.9361447274799273e-05, "loss": 0.5519, "step": 6604 }, { "epoch": 1.0782008897596016, "grad_norm": 2.9400510787963867, "learning_rate": 1.936122182069399e-05, "loss": 0.8271, "step": 6605 }, { "epoch": 1.078364148402106, "grad_norm": 2.958667278289795, "learning_rate": 1.9360996328108267e-05, "loss": 0.65, "step": 6606 }, { "epoch": 1.0785274070446105, "grad_norm": 2.6924493312835693, "learning_rate": 1.9360770797043015e-05, "loss": 0.7096, "step": 6607 }, { "epoch": 1.078690665687115, "grad_norm": 2.6813459396362305, "learning_rate": 1.9360545227499174e-05, "loss": 0.6765, "step": 6608 }, { "epoch": 1.0788539243296191, "grad_norm": 3.0596725940704346, "learning_rate": 1.9360319619477665e-05, "loss": 0.7344, "step": 6609 }, { "epoch": 1.0790171829721236, "grad_norm": 2.546032190322876, "learning_rate": 1.9360093972979413e-05, "loss": 0.6456, "step": 6610 }, { "epoch": 1.079180441614628, "grad_norm": 2.9555046558380127, "learning_rate": 1.935986828800535e-05, "loss": 0.8176, "step": 6611 }, { "epoch": 1.0793437002571324, "grad_norm": 2.970360040664673, "learning_rate": 1.9359642564556406e-05, "loss": 0.7441, "step": 6612 }, { "epoch": 1.0795069588996367, "grad_norm": 2.8953235149383545, "learning_rate": 1.9359416802633503e-05, "loss": 0.8113, "step": 6613 }, { "epoch": 1.079670217542141, "grad_norm": 3.1019372940063477, "learning_rate": 1.9359191002237574e-05, "loss": 0.8431, "step": 6614 }, { "epoch": 1.0798334761846455, "grad_norm": 3.1414756774902344, "learning_rate": 1.935896516336954e-05, "loss": 0.783, "step": 6615 }, { "epoch": 1.07999673482715, "grad_norm": 2.713582992553711, "learning_rate": 1.9358739286030343e-05, "loss": 0.6531, "step": 6616 }, { "epoch": 1.0801599934696542, "grad_norm": 2.886852741241455, "learning_rate": 1.9358513370220896e-05, "loss": 0.714, "step": 6617 }, { "epoch": 1.0803232521121586, "grad_norm": 2.6218950748443604, "learning_rate": 1.935828741594213e-05, "loss": 0.628, "step": 6618 }, { "epoch": 1.080486510754663, "grad_norm": 2.4160046577453613, "learning_rate": 1.9358061423194988e-05, "loss": 0.7426, "step": 6619 }, { "epoch": 1.0806497693971675, "grad_norm": 2.8288590908050537, "learning_rate": 1.9357835391980382e-05, "loss": 0.7432, "step": 6620 }, { "epoch": 1.080813028039672, "grad_norm": 2.8156559467315674, "learning_rate": 1.9357609322299252e-05, "loss": 0.6553, "step": 6621 }, { "epoch": 1.0809762866821762, "grad_norm": 2.6871986389160156, "learning_rate": 1.935738321415252e-05, "loss": 0.6947, "step": 6622 }, { "epoch": 1.0811395453246806, "grad_norm": 3.1232144832611084, "learning_rate": 1.9357157067541124e-05, "loss": 0.7729, "step": 6623 }, { "epoch": 1.081302803967185, "grad_norm": 2.9014859199523926, "learning_rate": 1.9356930882465982e-05, "loss": 0.887, "step": 6624 }, { "epoch": 1.0814660626096895, "grad_norm": 3.045844554901123, "learning_rate": 1.9356704658928036e-05, "loss": 0.8224, "step": 6625 }, { "epoch": 1.0816293212521937, "grad_norm": 2.8164491653442383, "learning_rate": 1.9356478396928208e-05, "loss": 0.8776, "step": 6626 }, { "epoch": 1.0817925798946981, "grad_norm": 3.2200441360473633, "learning_rate": 1.935625209646743e-05, "loss": 0.8066, "step": 6627 }, { "epoch": 1.0819558385372026, "grad_norm": 3.206001043319702, "learning_rate": 1.935602575754663e-05, "loss": 0.7759, "step": 6628 }, { "epoch": 1.082119097179707, "grad_norm": 2.5380704402923584, "learning_rate": 1.9355799380166743e-05, "loss": 0.5685, "step": 6629 }, { "epoch": 1.0822823558222114, "grad_norm": 3.0850675106048584, "learning_rate": 1.93555729643287e-05, "loss": 0.8074, "step": 6630 }, { "epoch": 1.0824456144647157, "grad_norm": 3.0070838928222656, "learning_rate": 1.9355346510033425e-05, "loss": 0.7772, "step": 6631 }, { "epoch": 1.08260887310722, "grad_norm": 2.6825242042541504, "learning_rate": 1.9355120017281856e-05, "loss": 0.7004, "step": 6632 }, { "epoch": 1.0827721317497245, "grad_norm": 2.7953948974609375, "learning_rate": 1.935489348607492e-05, "loss": 0.6415, "step": 6633 }, { "epoch": 1.082935390392229, "grad_norm": 2.662219285964966, "learning_rate": 1.9354666916413548e-05, "loss": 0.7791, "step": 6634 }, { "epoch": 1.0830986490347332, "grad_norm": 3.177739143371582, "learning_rate": 1.9354440308298676e-05, "loss": 0.8661, "step": 6635 }, { "epoch": 1.0832619076772376, "grad_norm": 2.746035099029541, "learning_rate": 1.935421366173123e-05, "loss": 0.7226, "step": 6636 }, { "epoch": 1.083425166319742, "grad_norm": 3.064955711364746, "learning_rate": 1.9353986976712143e-05, "loss": 0.8499, "step": 6637 }, { "epoch": 1.0835884249622465, "grad_norm": 2.7164130210876465, "learning_rate": 1.9353760253242348e-05, "loss": 0.6823, "step": 6638 }, { "epoch": 1.083751683604751, "grad_norm": 2.9740049839019775, "learning_rate": 1.9353533491322776e-05, "loss": 0.7266, "step": 6639 }, { "epoch": 1.0839149422472552, "grad_norm": 2.8581364154815674, "learning_rate": 1.935330669095436e-05, "loss": 0.7313, "step": 6640 }, { "epoch": 1.0840782008897596, "grad_norm": 2.743962287902832, "learning_rate": 1.9353079852138034e-05, "loss": 0.6968, "step": 6641 }, { "epoch": 1.084241459532264, "grad_norm": 3.004457950592041, "learning_rate": 1.9352852974874726e-05, "loss": 0.811, "step": 6642 }, { "epoch": 1.0844047181747685, "grad_norm": 2.810910224914551, "learning_rate": 1.935262605916537e-05, "loss": 0.7174, "step": 6643 }, { "epoch": 1.0845679768172727, "grad_norm": 3.0553886890411377, "learning_rate": 1.93523991050109e-05, "loss": 0.8109, "step": 6644 }, { "epoch": 1.0847312354597771, "grad_norm": 3.0226380825042725, "learning_rate": 1.9352172112412254e-05, "loss": 0.829, "step": 6645 }, { "epoch": 1.0848944941022816, "grad_norm": 2.6571953296661377, "learning_rate": 1.9351945081370352e-05, "loss": 0.6611, "step": 6646 }, { "epoch": 1.085057752744786, "grad_norm": 2.6940948963165283, "learning_rate": 1.935171801188614e-05, "loss": 0.7015, "step": 6647 }, { "epoch": 1.0852210113872902, "grad_norm": 2.5039608478546143, "learning_rate": 1.9351490903960546e-05, "loss": 0.6009, "step": 6648 }, { "epoch": 1.0853842700297947, "grad_norm": 2.7744040489196777, "learning_rate": 1.9351263757594502e-05, "loss": 0.726, "step": 6649 }, { "epoch": 1.085547528672299, "grad_norm": 2.87314510345459, "learning_rate": 1.9351036572788948e-05, "loss": 0.8671, "step": 6650 }, { "epoch": 1.0857107873148035, "grad_norm": 2.5590360164642334, "learning_rate": 1.9350809349544808e-05, "loss": 0.7016, "step": 6651 }, { "epoch": 1.085874045957308, "grad_norm": 2.198742628097534, "learning_rate": 1.9350582087863024e-05, "loss": 0.6257, "step": 6652 }, { "epoch": 1.0860373045998122, "grad_norm": 3.1738545894622803, "learning_rate": 1.9350354787744528e-05, "loss": 0.8214, "step": 6653 }, { "epoch": 1.0862005632423166, "grad_norm": 2.773066520690918, "learning_rate": 1.9350127449190256e-05, "loss": 0.8002, "step": 6654 }, { "epoch": 1.086363821884821, "grad_norm": 2.8173866271972656, "learning_rate": 1.9349900072201135e-05, "loss": 0.7727, "step": 6655 }, { "epoch": 1.0865270805273255, "grad_norm": 2.653247356414795, "learning_rate": 1.9349672656778108e-05, "loss": 0.7059, "step": 6656 }, { "epoch": 1.0866903391698297, "grad_norm": 2.464216947555542, "learning_rate": 1.934944520292211e-05, "loss": 0.5558, "step": 6657 }, { "epoch": 1.0868535978123341, "grad_norm": 3.4296457767486572, "learning_rate": 1.934921771063407e-05, "loss": 0.8825, "step": 6658 }, { "epoch": 1.0870168564548386, "grad_norm": 2.6554205417633057, "learning_rate": 1.9348990179914928e-05, "loss": 0.6941, "step": 6659 }, { "epoch": 1.087180115097343, "grad_norm": 2.628450632095337, "learning_rate": 1.9348762610765615e-05, "loss": 0.6027, "step": 6660 }, { "epoch": 1.0873433737398472, "grad_norm": 2.5226337909698486, "learning_rate": 1.9348535003187072e-05, "loss": 0.6259, "step": 6661 }, { "epoch": 1.0875066323823517, "grad_norm": 2.678126811981201, "learning_rate": 1.934830735718023e-05, "loss": 0.9456, "step": 6662 }, { "epoch": 1.0876698910248561, "grad_norm": 3.0778818130493164, "learning_rate": 1.9348079672746026e-05, "loss": 0.8128, "step": 6663 }, { "epoch": 1.0878331496673606, "grad_norm": 2.424777030944824, "learning_rate": 1.9347851949885398e-05, "loss": 0.5359, "step": 6664 }, { "epoch": 1.087996408309865, "grad_norm": 2.948277711868286, "learning_rate": 1.934762418859928e-05, "loss": 0.949, "step": 6665 }, { "epoch": 1.0881596669523692, "grad_norm": 3.0511839389801025, "learning_rate": 1.9347396388888608e-05, "loss": 0.8474, "step": 6666 }, { "epoch": 1.0883229255948736, "grad_norm": 2.5841665267944336, "learning_rate": 1.9347168550754316e-05, "loss": 0.631, "step": 6667 }, { "epoch": 1.088486184237378, "grad_norm": 2.4009125232696533, "learning_rate": 1.9346940674197347e-05, "loss": 0.5819, "step": 6668 }, { "epoch": 1.0886494428798825, "grad_norm": 3.000653028488159, "learning_rate": 1.934671275921863e-05, "loss": 0.8779, "step": 6669 }, { "epoch": 1.0888127015223867, "grad_norm": 2.138201951980591, "learning_rate": 1.934648480581911e-05, "loss": 0.48, "step": 6670 }, { "epoch": 1.0889759601648912, "grad_norm": 3.192903995513916, "learning_rate": 1.934625681399972e-05, "loss": 0.8405, "step": 6671 }, { "epoch": 1.0891392188073956, "grad_norm": 2.739826202392578, "learning_rate": 1.93460287837614e-05, "loss": 0.6632, "step": 6672 }, { "epoch": 1.0893024774499, "grad_norm": 3.506943941116333, "learning_rate": 1.934580071510508e-05, "loss": 1.1337, "step": 6673 }, { "epoch": 1.0894657360924045, "grad_norm": 2.6256725788116455, "learning_rate": 1.93455726080317e-05, "loss": 0.7262, "step": 6674 }, { "epoch": 1.0896289947349087, "grad_norm": 2.827730655670166, "learning_rate": 1.9345344462542205e-05, "loss": 0.6873, "step": 6675 }, { "epoch": 1.0897922533774131, "grad_norm": 3.047639846801758, "learning_rate": 1.9345116278637528e-05, "loss": 0.6867, "step": 6676 }, { "epoch": 1.0899555120199176, "grad_norm": 2.8565165996551514, "learning_rate": 1.9344888056318606e-05, "loss": 0.8216, "step": 6677 }, { "epoch": 1.090118770662422, "grad_norm": 2.640373468399048, "learning_rate": 1.9344659795586374e-05, "loss": 0.6919, "step": 6678 }, { "epoch": 1.0902820293049262, "grad_norm": 3.0154941082000732, "learning_rate": 1.9344431496441774e-05, "loss": 0.7701, "step": 6679 }, { "epoch": 1.0904452879474307, "grad_norm": 3.030074119567871, "learning_rate": 1.934420315888575e-05, "loss": 0.7163, "step": 6680 }, { "epoch": 1.0906085465899351, "grad_norm": 2.6477746963500977, "learning_rate": 1.9343974782919233e-05, "loss": 0.7649, "step": 6681 }, { "epoch": 1.0907718052324396, "grad_norm": 2.7474522590637207, "learning_rate": 1.9343746368543163e-05, "loss": 0.7442, "step": 6682 }, { "epoch": 1.090935063874944, "grad_norm": 3.1089348793029785, "learning_rate": 1.9343517915758482e-05, "loss": 0.7292, "step": 6683 }, { "epoch": 1.0910983225174482, "grad_norm": 2.2573416233062744, "learning_rate": 1.9343289424566122e-05, "loss": 0.614, "step": 6684 }, { "epoch": 1.0912615811599526, "grad_norm": 2.7480671405792236, "learning_rate": 1.9343060894967028e-05, "loss": 0.7646, "step": 6685 }, { "epoch": 1.091424839802457, "grad_norm": 2.9147121906280518, "learning_rate": 1.9342832326962142e-05, "loss": 0.7414, "step": 6686 }, { "epoch": 1.0915880984449615, "grad_norm": 2.768444299697876, "learning_rate": 1.9342603720552397e-05, "loss": 0.6891, "step": 6687 }, { "epoch": 1.0917513570874657, "grad_norm": 2.8893871307373047, "learning_rate": 1.934237507573874e-05, "loss": 0.7259, "step": 6688 }, { "epoch": 1.0919146157299702, "grad_norm": 2.6011714935302734, "learning_rate": 1.9342146392522103e-05, "loss": 0.5978, "step": 6689 }, { "epoch": 1.0920778743724746, "grad_norm": 2.9700076580047607, "learning_rate": 1.9341917670903432e-05, "loss": 0.7425, "step": 6690 }, { "epoch": 1.092241133014979, "grad_norm": 2.6437902450561523, "learning_rate": 1.9341688910883665e-05, "loss": 0.5735, "step": 6691 }, { "epoch": 1.0924043916574833, "grad_norm": 2.97845721244812, "learning_rate": 1.934146011246374e-05, "loss": 0.822, "step": 6692 }, { "epoch": 1.0925676502999877, "grad_norm": 2.663619041442871, "learning_rate": 1.93412312756446e-05, "loss": 0.6479, "step": 6693 }, { "epoch": 1.0927309089424921, "grad_norm": 3.0785696506500244, "learning_rate": 1.9341002400427188e-05, "loss": 0.8565, "step": 6694 }, { "epoch": 1.0928941675849966, "grad_norm": 2.506707191467285, "learning_rate": 1.934077348681244e-05, "loss": 0.6583, "step": 6695 }, { "epoch": 1.093057426227501, "grad_norm": 2.719759941101074, "learning_rate": 1.9340544534801298e-05, "loss": 0.6965, "step": 6696 }, { "epoch": 1.0932206848700052, "grad_norm": 2.8408634662628174, "learning_rate": 1.9340315544394705e-05, "loss": 0.7605, "step": 6697 }, { "epoch": 1.0933839435125097, "grad_norm": 2.6817867755889893, "learning_rate": 1.9340086515593603e-05, "loss": 0.8134, "step": 6698 }, { "epoch": 1.093547202155014, "grad_norm": 2.7542710304260254, "learning_rate": 1.9339857448398934e-05, "loss": 0.7357, "step": 6699 }, { "epoch": 1.0937104607975185, "grad_norm": 2.818941831588745, "learning_rate": 1.9339628342811635e-05, "loss": 0.744, "step": 6700 }, { "epoch": 1.0938737194400228, "grad_norm": 3.141939163208008, "learning_rate": 1.933939919883265e-05, "loss": 0.7568, "step": 6701 }, { "epoch": 1.0940369780825272, "grad_norm": 2.816509962081909, "learning_rate": 1.9339170016462923e-05, "loss": 0.69, "step": 6702 }, { "epoch": 1.0942002367250316, "grad_norm": 3.1721229553222656, "learning_rate": 1.9338940795703393e-05, "loss": 0.8262, "step": 6703 }, { "epoch": 1.094363495367536, "grad_norm": 2.876526117324829, "learning_rate": 1.9338711536555004e-05, "loss": 0.7955, "step": 6704 }, { "epoch": 1.0945267540100403, "grad_norm": 3.16575026512146, "learning_rate": 1.93384822390187e-05, "loss": 0.8078, "step": 6705 }, { "epoch": 1.0946900126525447, "grad_norm": 2.9136741161346436, "learning_rate": 1.9338252903095418e-05, "loss": 0.6848, "step": 6706 }, { "epoch": 1.0948532712950492, "grad_norm": 3.0387141704559326, "learning_rate": 1.9338023528786108e-05, "loss": 0.7896, "step": 6707 }, { "epoch": 1.0950165299375536, "grad_norm": 2.9381933212280273, "learning_rate": 1.9337794116091707e-05, "loss": 0.839, "step": 6708 }, { "epoch": 1.095179788580058, "grad_norm": 3.2968504428863525, "learning_rate": 1.933756466501316e-05, "loss": 0.7895, "step": 6709 }, { "epoch": 1.0953430472225623, "grad_norm": 3.1340537071228027, "learning_rate": 1.933733517555141e-05, "loss": 0.7887, "step": 6710 }, { "epoch": 1.0955063058650667, "grad_norm": 2.859785556793213, "learning_rate": 1.9337105647707404e-05, "loss": 0.7358, "step": 6711 }, { "epoch": 1.0956695645075711, "grad_norm": 3.312166213989258, "learning_rate": 1.933687608148208e-05, "loss": 1.5337, "step": 6712 }, { "epoch": 1.0958328231500756, "grad_norm": 2.859609842300415, "learning_rate": 1.9336646476876388e-05, "loss": 0.7369, "step": 6713 }, { "epoch": 1.0959960817925798, "grad_norm": 3.2975337505340576, "learning_rate": 1.9336416833891262e-05, "loss": 1.3638, "step": 6714 }, { "epoch": 1.0961593404350842, "grad_norm": 2.336594820022583, "learning_rate": 1.9336187152527656e-05, "loss": 0.5383, "step": 6715 }, { "epoch": 1.0963225990775887, "grad_norm": 2.61124587059021, "learning_rate": 1.933595743278651e-05, "loss": 0.5621, "step": 6716 }, { "epoch": 1.096485857720093, "grad_norm": 2.955134630203247, "learning_rate": 1.9335727674668762e-05, "loss": 0.7833, "step": 6717 }, { "epoch": 1.0966491163625975, "grad_norm": 2.3890562057495117, "learning_rate": 1.933549787817537e-05, "loss": 0.5889, "step": 6718 }, { "epoch": 1.0968123750051018, "grad_norm": 3.139092206954956, "learning_rate": 1.9335268043307267e-05, "loss": 0.6968, "step": 6719 }, { "epoch": 1.0969756336476062, "grad_norm": 3.3276052474975586, "learning_rate": 1.9335038170065403e-05, "loss": 0.8554, "step": 6720 }, { "epoch": 1.0971388922901106, "grad_norm": 2.598130941390991, "learning_rate": 1.933480825845072e-05, "loss": 0.6568, "step": 6721 }, { "epoch": 1.097302150932615, "grad_norm": 3.24367094039917, "learning_rate": 1.933457830846417e-05, "loss": 0.7951, "step": 6722 }, { "epoch": 1.0974654095751193, "grad_norm": 3.015122413635254, "learning_rate": 1.933434832010669e-05, "loss": 0.7691, "step": 6723 }, { "epoch": 1.0976286682176237, "grad_norm": 3.124943256378174, "learning_rate": 1.933411829337923e-05, "loss": 0.8685, "step": 6724 }, { "epoch": 1.0977919268601282, "grad_norm": 2.927210569381714, "learning_rate": 1.933388822828273e-05, "loss": 0.7396, "step": 6725 }, { "epoch": 1.0979551855026326, "grad_norm": 2.7060248851776123, "learning_rate": 1.9333658124818148e-05, "loss": 0.6892, "step": 6726 }, { "epoch": 1.098118444145137, "grad_norm": 2.6610467433929443, "learning_rate": 1.9333427982986412e-05, "loss": 0.6313, "step": 6727 }, { "epoch": 1.0982817027876413, "grad_norm": 2.599790573120117, "learning_rate": 1.9333197802788484e-05, "loss": 0.5893, "step": 6728 }, { "epoch": 1.0984449614301457, "grad_norm": 2.898056745529175, "learning_rate": 1.9332967584225304e-05, "loss": 0.7795, "step": 6729 }, { "epoch": 1.0986082200726501, "grad_norm": 2.7643895149230957, "learning_rate": 1.9332737327297817e-05, "loss": 0.6862, "step": 6730 }, { "epoch": 1.0987714787151546, "grad_norm": 2.745208501815796, "learning_rate": 1.933250703200697e-05, "loss": 0.6382, "step": 6731 }, { "epoch": 1.0989347373576588, "grad_norm": 2.9796159267425537, "learning_rate": 1.933227669835371e-05, "loss": 0.8893, "step": 6732 }, { "epoch": 1.0990979960001632, "grad_norm": 2.790482759475708, "learning_rate": 1.9332046326338985e-05, "loss": 0.7137, "step": 6733 }, { "epoch": 1.0992612546426677, "grad_norm": 3.2873263359069824, "learning_rate": 1.9331815915963744e-05, "loss": 0.7946, "step": 6734 }, { "epoch": 1.099424513285172, "grad_norm": 3.2225770950317383, "learning_rate": 1.933158546722893e-05, "loss": 0.6849, "step": 6735 }, { "epoch": 1.0995877719276765, "grad_norm": 3.4450523853302, "learning_rate": 1.933135498013549e-05, "loss": 0.8999, "step": 6736 }, { "epoch": 1.0997510305701808, "grad_norm": 2.520951747894287, "learning_rate": 1.9331124454684373e-05, "loss": 0.6014, "step": 6737 }, { "epoch": 1.0999142892126852, "grad_norm": 2.974919319152832, "learning_rate": 1.933089389087653e-05, "loss": 0.8051, "step": 6738 }, { "epoch": 1.1000775478551896, "grad_norm": 3.2679386138916016, "learning_rate": 1.9330663288712905e-05, "loss": 0.8114, "step": 6739 }, { "epoch": 1.100240806497694, "grad_norm": 2.9966466426849365, "learning_rate": 1.9330432648194444e-05, "loss": 0.6311, "step": 6740 }, { "epoch": 1.1004040651401983, "grad_norm": 3.2237093448638916, "learning_rate": 1.93302019693221e-05, "loss": 0.7605, "step": 6741 }, { "epoch": 1.1005673237827027, "grad_norm": 3.023855686187744, "learning_rate": 1.9329971252096812e-05, "loss": 0.7788, "step": 6742 }, { "epoch": 1.1007305824252072, "grad_norm": 2.8048222064971924, "learning_rate": 1.932974049651954e-05, "loss": 0.7527, "step": 6743 }, { "epoch": 1.1008938410677116, "grad_norm": 3.444096088409424, "learning_rate": 1.932950970259123e-05, "loss": 0.7504, "step": 6744 }, { "epoch": 1.1010570997102158, "grad_norm": 2.759235382080078, "learning_rate": 1.9329278870312826e-05, "loss": 0.7086, "step": 6745 }, { "epoch": 1.1012203583527203, "grad_norm": 2.5843026638031006, "learning_rate": 1.9329047999685282e-05, "loss": 0.6754, "step": 6746 }, { "epoch": 1.1013836169952247, "grad_norm": 3.484410285949707, "learning_rate": 1.932881709070954e-05, "loss": 0.8559, "step": 6747 }, { "epoch": 1.1015468756377291, "grad_norm": 3.242699384689331, "learning_rate": 1.9328586143386555e-05, "loss": 0.8476, "step": 6748 }, { "epoch": 1.1017101342802333, "grad_norm": 3.086320638656616, "learning_rate": 1.9328355157717277e-05, "loss": 0.8057, "step": 6749 }, { "epoch": 1.1018733929227378, "grad_norm": 3.1965551376342773, "learning_rate": 1.932812413370265e-05, "loss": 0.7315, "step": 6750 }, { "epoch": 1.1020366515652422, "grad_norm": 3.492873191833496, "learning_rate": 1.9327893071343626e-05, "loss": 0.7765, "step": 6751 }, { "epoch": 1.1021999102077467, "grad_norm": 2.8753316402435303, "learning_rate": 1.9327661970641158e-05, "loss": 0.7288, "step": 6752 }, { "epoch": 1.102363168850251, "grad_norm": 2.9499919414520264, "learning_rate": 1.9327430831596198e-05, "loss": 0.7779, "step": 6753 }, { "epoch": 1.1025264274927553, "grad_norm": 2.842264413833618, "learning_rate": 1.9327199654209686e-05, "loss": 0.7513, "step": 6754 }, { "epoch": 1.1026896861352598, "grad_norm": 2.488279342651367, "learning_rate": 1.932696843848258e-05, "loss": 0.6982, "step": 6755 }, { "epoch": 1.1028529447777642, "grad_norm": 3.2505109310150146, "learning_rate": 1.9326737184415827e-05, "loss": 0.8232, "step": 6756 }, { "epoch": 1.1030162034202686, "grad_norm": 2.5714423656463623, "learning_rate": 1.9326505892010382e-05, "loss": 0.5836, "step": 6757 }, { "epoch": 1.1031794620627728, "grad_norm": 2.8830361366271973, "learning_rate": 1.932627456126719e-05, "loss": 0.7048, "step": 6758 }, { "epoch": 1.1033427207052773, "grad_norm": 2.7509992122650146, "learning_rate": 1.9326043192187204e-05, "loss": 0.7204, "step": 6759 }, { "epoch": 1.1035059793477817, "grad_norm": 3.0191965103149414, "learning_rate": 1.932581178477138e-05, "loss": 0.772, "step": 6760 }, { "epoch": 1.1036692379902862, "grad_norm": 2.7150704860687256, "learning_rate": 1.9325580339020663e-05, "loss": 0.702, "step": 6761 }, { "epoch": 1.1038324966327906, "grad_norm": 3.227113962173462, "learning_rate": 1.9325348854936006e-05, "loss": 0.7597, "step": 6762 }, { "epoch": 1.1039957552752948, "grad_norm": 3.004821300506592, "learning_rate": 1.932511733251836e-05, "loss": 0.7192, "step": 6763 }, { "epoch": 1.1041590139177992, "grad_norm": 3.261112928390503, "learning_rate": 1.932488577176868e-05, "loss": 0.7545, "step": 6764 }, { "epoch": 1.1043222725603037, "grad_norm": 3.2431528568267822, "learning_rate": 1.9324654172687913e-05, "loss": 0.9213, "step": 6765 }, { "epoch": 1.1044855312028081, "grad_norm": 2.9009101390838623, "learning_rate": 1.9324422535277017e-05, "loss": 0.6936, "step": 6766 }, { "epoch": 1.1046487898453123, "grad_norm": 3.033156633377075, "learning_rate": 1.932419085953694e-05, "loss": 0.7153, "step": 6767 }, { "epoch": 1.1048120484878168, "grad_norm": 3.284745216369629, "learning_rate": 1.9323959145468633e-05, "loss": 0.9041, "step": 6768 }, { "epoch": 1.1049753071303212, "grad_norm": 2.811438798904419, "learning_rate": 1.9323727393073052e-05, "loss": 0.6391, "step": 6769 }, { "epoch": 1.1051385657728257, "grad_norm": 2.539389133453369, "learning_rate": 1.932349560235115e-05, "loss": 0.6876, "step": 6770 }, { "epoch": 1.10530182441533, "grad_norm": 2.6861519813537598, "learning_rate": 1.9323263773303876e-05, "loss": 0.787, "step": 6771 }, { "epoch": 1.1054650830578343, "grad_norm": 2.653421401977539, "learning_rate": 1.9323031905932186e-05, "loss": 0.7709, "step": 6772 }, { "epoch": 1.1056283417003387, "grad_norm": 2.600219249725342, "learning_rate": 1.932280000023703e-05, "loss": 0.7201, "step": 6773 }, { "epoch": 1.1057916003428432, "grad_norm": 2.5745794773101807, "learning_rate": 1.9322568056219365e-05, "loss": 0.6801, "step": 6774 }, { "epoch": 1.1059548589853476, "grad_norm": 2.53790283203125, "learning_rate": 1.9322336073880143e-05, "loss": 0.6947, "step": 6775 }, { "epoch": 1.1061181176278518, "grad_norm": 2.5001378059387207, "learning_rate": 1.9322104053220317e-05, "loss": 0.5489, "step": 6776 }, { "epoch": 1.1062813762703563, "grad_norm": 2.8200080394744873, "learning_rate": 1.9321871994240842e-05, "loss": 0.7347, "step": 6777 }, { "epoch": 1.1064446349128607, "grad_norm": 2.9650678634643555, "learning_rate": 1.932163989694267e-05, "loss": 0.6507, "step": 6778 }, { "epoch": 1.1066078935553652, "grad_norm": 3.62650203704834, "learning_rate": 1.932140776132676e-05, "loss": 0.9056, "step": 6779 }, { "epoch": 1.1067711521978696, "grad_norm": 2.8140709400177, "learning_rate": 1.9321175587394058e-05, "loss": 0.8015, "step": 6780 }, { "epoch": 1.1069344108403738, "grad_norm": 3.1466431617736816, "learning_rate": 1.9320943375145523e-05, "loss": 0.7131, "step": 6781 }, { "epoch": 1.1070976694828782, "grad_norm": 2.85532808303833, "learning_rate": 1.932071112458211e-05, "loss": 0.6258, "step": 6782 }, { "epoch": 1.1072609281253827, "grad_norm": 3.2128186225891113, "learning_rate": 1.9320478835704775e-05, "loss": 0.7079, "step": 6783 }, { "epoch": 1.1074241867678871, "grad_norm": 3.2626309394836426, "learning_rate": 1.932024650851447e-05, "loss": 0.707, "step": 6784 }, { "epoch": 1.1075874454103913, "grad_norm": 2.7419533729553223, "learning_rate": 1.9320014143012148e-05, "loss": 0.7593, "step": 6785 }, { "epoch": 1.1077507040528958, "grad_norm": 3.1839351654052734, "learning_rate": 1.9319781739198767e-05, "loss": 0.9157, "step": 6786 }, { "epoch": 1.1079139626954002, "grad_norm": 2.9697976112365723, "learning_rate": 1.9319549297075286e-05, "loss": 0.7346, "step": 6787 }, { "epoch": 1.1080772213379046, "grad_norm": 2.9501121044158936, "learning_rate": 1.9319316816642656e-05, "loss": 0.7512, "step": 6788 }, { "epoch": 1.1082404799804089, "grad_norm": 2.582237482070923, "learning_rate": 1.9319084297901832e-05, "loss": 0.6697, "step": 6789 }, { "epoch": 1.1084037386229133, "grad_norm": 3.2565977573394775, "learning_rate": 1.931885174085377e-05, "loss": 1.5454, "step": 6790 }, { "epoch": 1.1085669972654177, "grad_norm": 2.934535026550293, "learning_rate": 1.9318619145499434e-05, "loss": 0.7992, "step": 6791 }, { "epoch": 1.1087302559079222, "grad_norm": 3.0548524856567383, "learning_rate": 1.9318386511839767e-05, "loss": 0.8012, "step": 6792 }, { "epoch": 1.1088935145504264, "grad_norm": 3.2061498165130615, "learning_rate": 1.9318153839875735e-05, "loss": 0.9341, "step": 6793 }, { "epoch": 1.1090567731929308, "grad_norm": 2.7239482402801514, "learning_rate": 1.9317921129608287e-05, "loss": 0.8078, "step": 6794 }, { "epoch": 1.1092200318354353, "grad_norm": 2.6014976501464844, "learning_rate": 1.9317688381038388e-05, "loss": 0.7616, "step": 6795 }, { "epoch": 1.1093832904779397, "grad_norm": 2.6385550498962402, "learning_rate": 1.9317455594166988e-05, "loss": 0.692, "step": 6796 }, { "epoch": 1.1095465491204441, "grad_norm": 2.4754183292388916, "learning_rate": 1.9317222768995045e-05, "loss": 0.6445, "step": 6797 }, { "epoch": 1.1097098077629484, "grad_norm": 3.1299428939819336, "learning_rate": 1.931698990552352e-05, "loss": 0.8807, "step": 6798 }, { "epoch": 1.1098730664054528, "grad_norm": 2.940572738647461, "learning_rate": 1.9316757003753366e-05, "loss": 0.8263, "step": 6799 }, { "epoch": 1.1100363250479572, "grad_norm": 3.234410285949707, "learning_rate": 1.9316524063685544e-05, "loss": 0.6747, "step": 6800 }, { "epoch": 1.1101995836904617, "grad_norm": 2.5866239070892334, "learning_rate": 1.9316291085321007e-05, "loss": 0.6951, "step": 6801 }, { "epoch": 1.110362842332966, "grad_norm": 2.491813898086548, "learning_rate": 1.9316058068660716e-05, "loss": 0.6831, "step": 6802 }, { "epoch": 1.1105261009754703, "grad_norm": 2.3088293075561523, "learning_rate": 1.9315825013705627e-05, "loss": 0.5351, "step": 6803 }, { "epoch": 1.1106893596179748, "grad_norm": 2.633516550064087, "learning_rate": 1.93155919204567e-05, "loss": 0.6896, "step": 6804 }, { "epoch": 1.1108526182604792, "grad_norm": 3.1253502368927, "learning_rate": 1.9315358788914893e-05, "loss": 0.84, "step": 6805 }, { "epoch": 1.1110158769029836, "grad_norm": 3.0916781425476074, "learning_rate": 1.931512561908116e-05, "loss": 0.6588, "step": 6806 }, { "epoch": 1.1111791355454879, "grad_norm": 3.0652410984039307, "learning_rate": 1.9314892410956468e-05, "loss": 0.8022, "step": 6807 }, { "epoch": 1.1113423941879923, "grad_norm": 2.628451108932495, "learning_rate": 1.9314659164541765e-05, "loss": 0.8446, "step": 6808 }, { "epoch": 1.1115056528304967, "grad_norm": 2.580735683441162, "learning_rate": 1.931442587983802e-05, "loss": 0.5896, "step": 6809 }, { "epoch": 1.1116689114730012, "grad_norm": 2.578547239303589, "learning_rate": 1.9314192556846185e-05, "loss": 0.6426, "step": 6810 }, { "epoch": 1.1118321701155054, "grad_norm": 2.792090892791748, "learning_rate": 1.9313959195567222e-05, "loss": 0.7713, "step": 6811 }, { "epoch": 1.1119954287580098, "grad_norm": 3.1137828826904297, "learning_rate": 1.9313725796002088e-05, "loss": 0.7292, "step": 6812 }, { "epoch": 1.1121586874005143, "grad_norm": 2.7279136180877686, "learning_rate": 1.9313492358151746e-05, "loss": 0.7475, "step": 6813 }, { "epoch": 1.1123219460430187, "grad_norm": 2.9578487873077393, "learning_rate": 1.9313258882017155e-05, "loss": 0.7668, "step": 6814 }, { "epoch": 1.1124852046855231, "grad_norm": 3.234952211380005, "learning_rate": 1.931302536759927e-05, "loss": 0.7454, "step": 6815 }, { "epoch": 1.1126484633280274, "grad_norm": 2.957094669342041, "learning_rate": 1.9312791814899056e-05, "loss": 0.7775, "step": 6816 }, { "epoch": 1.1128117219705318, "grad_norm": 2.5121824741363525, "learning_rate": 1.931255822391747e-05, "loss": 0.7365, "step": 6817 }, { "epoch": 1.1129749806130362, "grad_norm": 2.8809099197387695, "learning_rate": 1.9312324594655476e-05, "loss": 0.7895, "step": 6818 }, { "epoch": 1.1131382392555407, "grad_norm": 3.385761022567749, "learning_rate": 1.931209092711403e-05, "loss": 0.8156, "step": 6819 }, { "epoch": 1.113301497898045, "grad_norm": 3.0418529510498047, "learning_rate": 1.9311857221294096e-05, "loss": 0.7844, "step": 6820 }, { "epoch": 1.1134647565405493, "grad_norm": 2.55759596824646, "learning_rate": 1.931162347719663e-05, "loss": 0.5803, "step": 6821 }, { "epoch": 1.1136280151830538, "grad_norm": 3.0948400497436523, "learning_rate": 1.9311389694822596e-05, "loss": 0.9197, "step": 6822 }, { "epoch": 1.1137912738255582, "grad_norm": 2.8326497077941895, "learning_rate": 1.931115587417296e-05, "loss": 0.7635, "step": 6823 }, { "epoch": 1.1139545324680626, "grad_norm": 2.3699214458465576, "learning_rate": 1.9310922015248675e-05, "loss": 0.567, "step": 6824 }, { "epoch": 1.1141177911105669, "grad_norm": 3.001748561859131, "learning_rate": 1.9310688118050702e-05, "loss": 0.7075, "step": 6825 }, { "epoch": 1.1142810497530713, "grad_norm": 2.72063946723938, "learning_rate": 1.931045418258001e-05, "loss": 0.7724, "step": 6826 }, { "epoch": 1.1144443083955757, "grad_norm": 2.323694944381714, "learning_rate": 1.9310220208837557e-05, "loss": 0.5466, "step": 6827 }, { "epoch": 1.1146075670380802, "grad_norm": 2.4236159324645996, "learning_rate": 1.9309986196824302e-05, "loss": 0.6208, "step": 6828 }, { "epoch": 1.1147708256805844, "grad_norm": 2.5446419715881348, "learning_rate": 1.930975214654121e-05, "loss": 0.634, "step": 6829 }, { "epoch": 1.1149340843230888, "grad_norm": 2.813753366470337, "learning_rate": 1.930951805798924e-05, "loss": 0.7449, "step": 6830 }, { "epoch": 1.1150973429655933, "grad_norm": 2.4141578674316406, "learning_rate": 1.930928393116936e-05, "loss": 0.6552, "step": 6831 }, { "epoch": 1.1152606016080977, "grad_norm": 2.7942049503326416, "learning_rate": 1.9309049766082525e-05, "loss": 0.7416, "step": 6832 }, { "epoch": 1.115423860250602, "grad_norm": 2.6987667083740234, "learning_rate": 1.9308815562729705e-05, "loss": 0.6846, "step": 6833 }, { "epoch": 1.1155871188931064, "grad_norm": 3.106450319290161, "learning_rate": 1.930858132111186e-05, "loss": 0.8389, "step": 6834 }, { "epoch": 1.1157503775356108, "grad_norm": 2.779250383377075, "learning_rate": 1.9308347041229947e-05, "loss": 0.706, "step": 6835 }, { "epoch": 1.1159136361781152, "grad_norm": 2.923375129699707, "learning_rate": 1.9308112723084937e-05, "loss": 0.6327, "step": 6836 }, { "epoch": 1.1160768948206197, "grad_norm": 3.009840726852417, "learning_rate": 1.930787836667779e-05, "loss": 0.8133, "step": 6837 }, { "epoch": 1.1162401534631239, "grad_norm": 2.9073383808135986, "learning_rate": 1.930764397200947e-05, "loss": 0.8277, "step": 6838 }, { "epoch": 1.1164034121056283, "grad_norm": 3.2463951110839844, "learning_rate": 1.930740953908094e-05, "loss": 0.8638, "step": 6839 }, { "epoch": 1.1165666707481328, "grad_norm": 3.3685081005096436, "learning_rate": 1.9307175067893166e-05, "loss": 0.7442, "step": 6840 }, { "epoch": 1.1167299293906372, "grad_norm": 2.8399770259857178, "learning_rate": 1.9306940558447107e-05, "loss": 0.6247, "step": 6841 }, { "epoch": 1.1168931880331414, "grad_norm": 2.7832324504852295, "learning_rate": 1.9306706010743728e-05, "loss": 0.5442, "step": 6842 }, { "epoch": 1.1170564466756459, "grad_norm": 2.154639720916748, "learning_rate": 1.9306471424783997e-05, "loss": 0.4827, "step": 6843 }, { "epoch": 1.1172197053181503, "grad_norm": 2.7557756900787354, "learning_rate": 1.9306236800568877e-05, "loss": 0.6346, "step": 6844 }, { "epoch": 1.1173829639606547, "grad_norm": 2.9683587551116943, "learning_rate": 1.930600213809933e-05, "loss": 0.772, "step": 6845 }, { "epoch": 1.117546222603159, "grad_norm": 2.892526149749756, "learning_rate": 1.9305767437376322e-05, "loss": 0.7514, "step": 6846 }, { "epoch": 1.1177094812456634, "grad_norm": 3.605558156967163, "learning_rate": 1.9305532698400818e-05, "loss": 0.9295, "step": 6847 }, { "epoch": 1.1178727398881678, "grad_norm": 3.00273060798645, "learning_rate": 1.9305297921173783e-05, "loss": 0.6604, "step": 6848 }, { "epoch": 1.1180359985306723, "grad_norm": 3.2297234535217285, "learning_rate": 1.930506310569618e-05, "loss": 0.875, "step": 6849 }, { "epoch": 1.1181992571731767, "grad_norm": 2.483048439025879, "learning_rate": 1.930482825196898e-05, "loss": 0.5794, "step": 6850 }, { "epoch": 1.118362515815681, "grad_norm": 3.032435655593872, "learning_rate": 1.9304593359993142e-05, "loss": 0.6223, "step": 6851 }, { "epoch": 1.1185257744581854, "grad_norm": 2.7010676860809326, "learning_rate": 1.9304358429769636e-05, "loss": 0.6574, "step": 6852 }, { "epoch": 1.1186890331006898, "grad_norm": 3.3554978370666504, "learning_rate": 1.9304123461299422e-05, "loss": 0.9396, "step": 6853 }, { "epoch": 1.1188522917431942, "grad_norm": 3.319605588912964, "learning_rate": 1.9303888454583474e-05, "loss": 0.7572, "step": 6854 }, { "epoch": 1.1190155503856984, "grad_norm": 2.8106040954589844, "learning_rate": 1.9303653409622752e-05, "loss": 0.8009, "step": 6855 }, { "epoch": 1.1191788090282029, "grad_norm": 2.7611026763916016, "learning_rate": 1.9303418326418222e-05, "loss": 0.7293, "step": 6856 }, { "epoch": 1.1193420676707073, "grad_norm": 3.492717742919922, "learning_rate": 1.9303183204970852e-05, "loss": 0.9067, "step": 6857 }, { "epoch": 1.1195053263132118, "grad_norm": 2.821260690689087, "learning_rate": 1.930294804528161e-05, "loss": 0.713, "step": 6858 }, { "epoch": 1.1196685849557162, "grad_norm": 3.3848226070404053, "learning_rate": 1.9302712847351462e-05, "loss": 0.8936, "step": 6859 }, { "epoch": 1.1198318435982204, "grad_norm": 2.8764383792877197, "learning_rate": 1.9302477611181377e-05, "loss": 0.7579, "step": 6860 }, { "epoch": 1.1199951022407248, "grad_norm": 2.5501229763031006, "learning_rate": 1.9302242336772315e-05, "loss": 0.6011, "step": 6861 }, { "epoch": 1.1201583608832293, "grad_norm": 2.6362667083740234, "learning_rate": 1.930200702412525e-05, "loss": 0.6663, "step": 6862 }, { "epoch": 1.1203216195257337, "grad_norm": 2.554530620574951, "learning_rate": 1.9301771673241142e-05, "loss": 0.6474, "step": 6863 }, { "epoch": 1.120484878168238, "grad_norm": 2.932246208190918, "learning_rate": 1.9301536284120966e-05, "loss": 0.7677, "step": 6864 }, { "epoch": 1.1206481368107424, "grad_norm": 2.808897018432617, "learning_rate": 1.9301300856765682e-05, "loss": 0.6088, "step": 6865 }, { "epoch": 1.1208113954532468, "grad_norm": 2.97680401802063, "learning_rate": 1.930106539117627e-05, "loss": 0.7077, "step": 6866 }, { "epoch": 1.1209746540957513, "grad_norm": 2.303032159805298, "learning_rate": 1.9300829887353683e-05, "loss": 0.5533, "step": 6867 }, { "epoch": 1.1211379127382557, "grad_norm": 2.9683265686035156, "learning_rate": 1.9300594345298903e-05, "loss": 0.7968, "step": 6868 }, { "epoch": 1.12130117138076, "grad_norm": 2.88626766204834, "learning_rate": 1.9300358765012888e-05, "loss": 0.771, "step": 6869 }, { "epoch": 1.1214644300232643, "grad_norm": 2.624547243118286, "learning_rate": 1.930012314649661e-05, "loss": 0.7565, "step": 6870 }, { "epoch": 1.1216276886657688, "grad_norm": 3.050722360610962, "learning_rate": 1.9299887489751037e-05, "loss": 0.7398, "step": 6871 }, { "epoch": 1.1217909473082732, "grad_norm": 2.915609836578369, "learning_rate": 1.929965179477714e-05, "loss": 1.4537, "step": 6872 }, { "epoch": 1.1219542059507774, "grad_norm": 3.073269844055176, "learning_rate": 1.9299416061575884e-05, "loss": 0.9249, "step": 6873 }, { "epoch": 1.1221174645932819, "grad_norm": 2.6262428760528564, "learning_rate": 1.929918029014824e-05, "loss": 0.6654, "step": 6874 }, { "epoch": 1.1222807232357863, "grad_norm": 2.9665660858154297, "learning_rate": 1.9298944480495177e-05, "loss": 0.8165, "step": 6875 }, { "epoch": 1.1224439818782908, "grad_norm": 2.5101962089538574, "learning_rate": 1.9298708632617667e-05, "loss": 0.6527, "step": 6876 }, { "epoch": 1.122607240520795, "grad_norm": 3.1297287940979004, "learning_rate": 1.9298472746516674e-05, "loss": 0.8112, "step": 6877 }, { "epoch": 1.1227704991632994, "grad_norm": 2.803662061691284, "learning_rate": 1.929823682219317e-05, "loss": 0.5701, "step": 6878 }, { "epoch": 1.1229337578058038, "grad_norm": 2.892777919769287, "learning_rate": 1.9298000859648128e-05, "loss": 0.6266, "step": 6879 }, { "epoch": 1.1230970164483083, "grad_norm": 1.7786797285079956, "learning_rate": 1.9297764858882516e-05, "loss": 0.4398, "step": 6880 }, { "epoch": 1.1232602750908127, "grad_norm": 2.9275381565093994, "learning_rate": 1.92975288198973e-05, "loss": 0.6517, "step": 6881 }, { "epoch": 1.123423533733317, "grad_norm": 2.5542123317718506, "learning_rate": 1.9297292742693457e-05, "loss": 0.6671, "step": 6882 }, { "epoch": 1.1235867923758214, "grad_norm": 3.393659830093384, "learning_rate": 1.929705662727195e-05, "loss": 0.8164, "step": 6883 }, { "epoch": 1.1237500510183258, "grad_norm": 3.522843360900879, "learning_rate": 1.929682047363376e-05, "loss": 0.8435, "step": 6884 }, { "epoch": 1.1239133096608303, "grad_norm": 2.6768743991851807, "learning_rate": 1.9296584281779846e-05, "loss": 0.7206, "step": 6885 }, { "epoch": 1.1240765683033345, "grad_norm": 3.080888509750366, "learning_rate": 1.929634805171119e-05, "loss": 0.7605, "step": 6886 }, { "epoch": 1.124239826945839, "grad_norm": 3.1583173274993896, "learning_rate": 1.9296111783428752e-05, "loss": 0.7962, "step": 6887 }, { "epoch": 1.1244030855883433, "grad_norm": 3.1497597694396973, "learning_rate": 1.929587547693351e-05, "loss": 0.7211, "step": 6888 }, { "epoch": 1.1245663442308478, "grad_norm": 3.148310422897339, "learning_rate": 1.9295639132226433e-05, "loss": 0.6733, "step": 6889 }, { "epoch": 1.124729602873352, "grad_norm": 2.6103687286376953, "learning_rate": 1.9295402749308497e-05, "loss": 0.6232, "step": 6890 }, { "epoch": 1.1248928615158564, "grad_norm": 3.322821855545044, "learning_rate": 1.9295166328180667e-05, "loss": 0.7826, "step": 6891 }, { "epoch": 1.1250561201583609, "grad_norm": 3.120305299758911, "learning_rate": 1.929492986884392e-05, "loss": 0.6966, "step": 6892 }, { "epoch": 1.1252193788008653, "grad_norm": 2.439821720123291, "learning_rate": 1.929469337129922e-05, "loss": 0.6233, "step": 6893 }, { "epoch": 1.1253826374433697, "grad_norm": 2.575885057449341, "learning_rate": 1.9294456835547552e-05, "loss": 0.7173, "step": 6894 }, { "epoch": 1.125545896085874, "grad_norm": 3.0561461448669434, "learning_rate": 1.9294220261589883e-05, "loss": 0.8032, "step": 6895 }, { "epoch": 1.1257091547283784, "grad_norm": 3.115222692489624, "learning_rate": 1.929398364942718e-05, "loss": 0.8184, "step": 6896 }, { "epoch": 1.1258724133708828, "grad_norm": 3.015512704849243, "learning_rate": 1.929374699906042e-05, "loss": 0.6334, "step": 6897 }, { "epoch": 1.1260356720133873, "grad_norm": 3.318962574005127, "learning_rate": 1.9293510310490574e-05, "loss": 0.8967, "step": 6898 }, { "epoch": 1.1261989306558915, "grad_norm": 2.586252450942993, "learning_rate": 1.9293273583718617e-05, "loss": 0.6043, "step": 6899 }, { "epoch": 1.126362189298396, "grad_norm": 2.8899343013763428, "learning_rate": 1.929303681874552e-05, "loss": 0.6963, "step": 6900 }, { "epoch": 1.1265254479409004, "grad_norm": 2.6993165016174316, "learning_rate": 1.9292800015572263e-05, "loss": 0.6978, "step": 6901 }, { "epoch": 1.1266887065834048, "grad_norm": 2.7513587474823, "learning_rate": 1.929256317419981e-05, "loss": 0.7295, "step": 6902 }, { "epoch": 1.1268519652259092, "grad_norm": 2.9901325702667236, "learning_rate": 1.9292326294629138e-05, "loss": 0.7823, "step": 6903 }, { "epoch": 1.1270152238684135, "grad_norm": 2.9756717681884766, "learning_rate": 1.9292089376861222e-05, "loss": 0.8001, "step": 6904 }, { "epoch": 1.127178482510918, "grad_norm": 2.8741612434387207, "learning_rate": 1.929185242089704e-05, "loss": 0.715, "step": 6905 }, { "epoch": 1.1273417411534223, "grad_norm": 2.800717830657959, "learning_rate": 1.9291615426737555e-05, "loss": 0.758, "step": 6906 }, { "epoch": 1.1275049997959268, "grad_norm": 2.447589159011841, "learning_rate": 1.9291378394383747e-05, "loss": 0.6061, "step": 6907 }, { "epoch": 1.127668258438431, "grad_norm": 2.807523727416992, "learning_rate": 1.9291141323836594e-05, "loss": 0.7372, "step": 6908 }, { "epoch": 1.1278315170809354, "grad_norm": 2.75557541847229, "learning_rate": 1.9290904215097067e-05, "loss": 0.6832, "step": 6909 }, { "epoch": 1.1279947757234399, "grad_norm": 2.3628625869750977, "learning_rate": 1.9290667068166143e-05, "loss": 0.626, "step": 6910 }, { "epoch": 1.1281580343659443, "grad_norm": 3.130530595779419, "learning_rate": 1.9290429883044794e-05, "loss": 0.605, "step": 6911 }, { "epoch": 1.1283212930084487, "grad_norm": 2.893962860107422, "learning_rate": 1.929019265973399e-05, "loss": 0.7065, "step": 6912 }, { "epoch": 1.128484551650953, "grad_norm": 2.6747944355010986, "learning_rate": 1.928995539823472e-05, "loss": 0.6232, "step": 6913 }, { "epoch": 1.1286478102934574, "grad_norm": 2.824758768081665, "learning_rate": 1.9289718098547945e-05, "loss": 0.6704, "step": 6914 }, { "epoch": 1.1288110689359618, "grad_norm": 3.077641248703003, "learning_rate": 1.928948076067465e-05, "loss": 0.8132, "step": 6915 }, { "epoch": 1.1289743275784663, "grad_norm": 3.1289937496185303, "learning_rate": 1.9289243384615807e-05, "loss": 0.8221, "step": 6916 }, { "epoch": 1.1291375862209705, "grad_norm": 2.819154739379883, "learning_rate": 1.928900597037239e-05, "loss": 0.752, "step": 6917 }, { "epoch": 1.129300844863475, "grad_norm": 2.8071045875549316, "learning_rate": 1.9288768517945377e-05, "loss": 0.7039, "step": 6918 }, { "epoch": 1.1294641035059794, "grad_norm": 3.1551005840301514, "learning_rate": 1.9288531027335744e-05, "loss": 0.6367, "step": 6919 }, { "epoch": 1.1296273621484838, "grad_norm": 3.288383722305298, "learning_rate": 1.9288293498544467e-05, "loss": 0.8535, "step": 6920 }, { "epoch": 1.1297906207909882, "grad_norm": 3.1483986377716064, "learning_rate": 1.9288055931572523e-05, "loss": 0.7218, "step": 6921 }, { "epoch": 1.1299538794334925, "grad_norm": 3.1345064640045166, "learning_rate": 1.928781832642089e-05, "loss": 0.7631, "step": 6922 }, { "epoch": 1.130117138075997, "grad_norm": 2.6736176013946533, "learning_rate": 1.9287580683090544e-05, "loss": 0.5964, "step": 6923 }, { "epoch": 1.1302803967185013, "grad_norm": 2.8949201107025146, "learning_rate": 1.928734300158246e-05, "loss": 0.7061, "step": 6924 }, { "epoch": 1.1304436553610056, "grad_norm": 2.4904496669769287, "learning_rate": 1.9287105281897614e-05, "loss": 0.6554, "step": 6925 }, { "epoch": 1.13060691400351, "grad_norm": 2.7281088829040527, "learning_rate": 1.9286867524036987e-05, "loss": 0.6974, "step": 6926 }, { "epoch": 1.1307701726460144, "grad_norm": 2.826512575149536, "learning_rate": 1.928662972800155e-05, "loss": 0.6909, "step": 6927 }, { "epoch": 1.1309334312885189, "grad_norm": 2.764139175415039, "learning_rate": 1.9286391893792287e-05, "loss": 0.711, "step": 6928 }, { "epoch": 1.1310966899310233, "grad_norm": 2.8342466354370117, "learning_rate": 1.9286154021410177e-05, "loss": 0.709, "step": 6929 }, { "epoch": 1.1312599485735275, "grad_norm": 2.874277353286743, "learning_rate": 1.9285916110856187e-05, "loss": 0.7029, "step": 6930 }, { "epoch": 1.131423207216032, "grad_norm": 2.9369266033172607, "learning_rate": 1.928567816213131e-05, "loss": 0.6862, "step": 6931 }, { "epoch": 1.1315864658585364, "grad_norm": 2.6465115547180176, "learning_rate": 1.928544017523651e-05, "loss": 0.6377, "step": 6932 }, { "epoch": 1.1317497245010408, "grad_norm": 2.6545920372009277, "learning_rate": 1.9285202150172774e-05, "loss": 0.7186, "step": 6933 }, { "epoch": 1.131912983143545, "grad_norm": 3.1266684532165527, "learning_rate": 1.9284964086941082e-05, "loss": 0.8639, "step": 6934 }, { "epoch": 1.1320762417860495, "grad_norm": 2.9855496883392334, "learning_rate": 1.9284725985542404e-05, "loss": 0.703, "step": 6935 }, { "epoch": 1.132239500428554, "grad_norm": 3.0392072200775146, "learning_rate": 1.9284487845977723e-05, "loss": 0.7031, "step": 6936 }, { "epoch": 1.1324027590710584, "grad_norm": 3.0771172046661377, "learning_rate": 1.9284249668248017e-05, "loss": 0.8503, "step": 6937 }, { "epoch": 1.1325660177135628, "grad_norm": 3.144012928009033, "learning_rate": 1.928401145235427e-05, "loss": 0.6968, "step": 6938 }, { "epoch": 1.132729276356067, "grad_norm": 3.0841588973999023, "learning_rate": 1.9283773198297456e-05, "loss": 0.7622, "step": 6939 }, { "epoch": 1.1328925349985715, "grad_norm": 3.0527632236480713, "learning_rate": 1.9283534906078552e-05, "loss": 0.6591, "step": 6940 }, { "epoch": 1.133055793641076, "grad_norm": 2.4530398845672607, "learning_rate": 1.9283296575698548e-05, "loss": 0.6004, "step": 6941 }, { "epoch": 1.1332190522835803, "grad_norm": 2.756951332092285, "learning_rate": 1.9283058207158414e-05, "loss": 0.6877, "step": 6942 }, { "epoch": 1.1333823109260845, "grad_norm": 2.2753961086273193, "learning_rate": 1.9282819800459135e-05, "loss": 0.5366, "step": 6943 }, { "epoch": 1.133545569568589, "grad_norm": 2.8878602981567383, "learning_rate": 1.9282581355601686e-05, "loss": 0.7273, "step": 6944 }, { "epoch": 1.1337088282110934, "grad_norm": 3.0584359169006348, "learning_rate": 1.9282342872587052e-05, "loss": 0.8607, "step": 6945 }, { "epoch": 1.1338720868535979, "grad_norm": 3.064523220062256, "learning_rate": 1.928210435141621e-05, "loss": 0.7604, "step": 6946 }, { "epoch": 1.1340353454961023, "grad_norm": 3.1301186084747314, "learning_rate": 1.9281865792090145e-05, "loss": 0.8887, "step": 6947 }, { "epoch": 1.1341986041386065, "grad_norm": 2.7102980613708496, "learning_rate": 1.928162719460983e-05, "loss": 0.7426, "step": 6948 }, { "epoch": 1.134361862781111, "grad_norm": 3.0100250244140625, "learning_rate": 1.928138855897625e-05, "loss": 0.7902, "step": 6949 }, { "epoch": 1.1345251214236154, "grad_norm": 2.8703203201293945, "learning_rate": 1.928114988519039e-05, "loss": 0.7379, "step": 6950 }, { "epoch": 1.1346883800661198, "grad_norm": 2.73097825050354, "learning_rate": 1.9280911173253227e-05, "loss": 0.7046, "step": 6951 }, { "epoch": 1.134851638708624, "grad_norm": 2.777482509613037, "learning_rate": 1.928067242316574e-05, "loss": 0.6787, "step": 6952 }, { "epoch": 1.1350148973511285, "grad_norm": 2.726041078567505, "learning_rate": 1.9280433634928917e-05, "loss": 0.7156, "step": 6953 }, { "epoch": 1.135178155993633, "grad_norm": 3.1899971961975098, "learning_rate": 1.9280194808543732e-05, "loss": 0.6846, "step": 6954 }, { "epoch": 1.1353414146361374, "grad_norm": 2.6497087478637695, "learning_rate": 1.927995594401117e-05, "loss": 0.7188, "step": 6955 }, { "epoch": 1.1355046732786418, "grad_norm": 2.48061466217041, "learning_rate": 1.9279717041332217e-05, "loss": 0.6123, "step": 6956 }, { "epoch": 1.135667931921146, "grad_norm": 2.886735677719116, "learning_rate": 1.927947810050785e-05, "loss": 0.7399, "step": 6957 }, { "epoch": 1.1358311905636505, "grad_norm": 2.882314443588257, "learning_rate": 1.927923912153905e-05, "loss": 0.7284, "step": 6958 }, { "epoch": 1.1359944492061549, "grad_norm": 2.4926583766937256, "learning_rate": 1.9279000104426802e-05, "loss": 0.6585, "step": 6959 }, { "epoch": 1.1361577078486593, "grad_norm": 2.5690650939941406, "learning_rate": 1.9278761049172088e-05, "loss": 0.7564, "step": 6960 }, { "epoch": 1.1363209664911635, "grad_norm": 2.901519536972046, "learning_rate": 1.9278521955775893e-05, "loss": 0.7482, "step": 6961 }, { "epoch": 1.136484225133668, "grad_norm": 3.2501046657562256, "learning_rate": 1.9278282824239195e-05, "loss": 0.7209, "step": 6962 }, { "epoch": 1.1366474837761724, "grad_norm": 2.6677353382110596, "learning_rate": 1.9278043654562984e-05, "loss": 0.6877, "step": 6963 }, { "epoch": 1.1368107424186769, "grad_norm": 3.1315345764160156, "learning_rate": 1.9277804446748237e-05, "loss": 0.78, "step": 6964 }, { "epoch": 1.1369740010611813, "grad_norm": 2.786649465560913, "learning_rate": 1.9277565200795937e-05, "loss": 0.6797, "step": 6965 }, { "epoch": 1.1371372597036855, "grad_norm": 2.7521846294403076, "learning_rate": 1.927732591670707e-05, "loss": 0.7844, "step": 6966 }, { "epoch": 1.13730051834619, "grad_norm": 2.6852056980133057, "learning_rate": 1.9277086594482624e-05, "loss": 0.7753, "step": 6967 }, { "epoch": 1.1374637769886944, "grad_norm": 2.507106065750122, "learning_rate": 1.9276847234123573e-05, "loss": 0.693, "step": 6968 }, { "epoch": 1.1376270356311986, "grad_norm": 2.701167345046997, "learning_rate": 1.9276607835630905e-05, "loss": 0.6857, "step": 6969 }, { "epoch": 1.137790294273703, "grad_norm": 2.58359956741333, "learning_rate": 1.927636839900561e-05, "loss": 0.691, "step": 6970 }, { "epoch": 1.1379535529162075, "grad_norm": 2.933990001678467, "learning_rate": 1.9276128924248664e-05, "loss": 0.6837, "step": 6971 }, { "epoch": 1.138116811558712, "grad_norm": 2.756664514541626, "learning_rate": 1.9275889411361052e-05, "loss": 0.6351, "step": 6972 }, { "epoch": 1.1382800702012164, "grad_norm": 2.242706775665283, "learning_rate": 1.9275649860343768e-05, "loss": 0.4805, "step": 6973 }, { "epoch": 1.1384433288437206, "grad_norm": 3.1959054470062256, "learning_rate": 1.9275410271197784e-05, "loss": 0.8099, "step": 6974 }, { "epoch": 1.138606587486225, "grad_norm": 3.149646043777466, "learning_rate": 1.9275170643924092e-05, "loss": 0.8904, "step": 6975 }, { "epoch": 1.1387698461287294, "grad_norm": 3.0212531089782715, "learning_rate": 1.927493097852368e-05, "loss": 0.7256, "step": 6976 }, { "epoch": 1.1389331047712339, "grad_norm": 2.725729465484619, "learning_rate": 1.9274691274997523e-05, "loss": 0.7138, "step": 6977 }, { "epoch": 1.139096363413738, "grad_norm": 2.435837507247925, "learning_rate": 1.9274451533346617e-05, "loss": 0.6415, "step": 6978 }, { "epoch": 1.1392596220562425, "grad_norm": 3.484365463256836, "learning_rate": 1.9274211753571938e-05, "loss": 0.9148, "step": 6979 }, { "epoch": 1.139422880698747, "grad_norm": 3.3944058418273926, "learning_rate": 1.927397193567448e-05, "loss": 0.7006, "step": 6980 }, { "epoch": 1.1395861393412514, "grad_norm": 3.3083600997924805, "learning_rate": 1.927373207965522e-05, "loss": 0.7586, "step": 6981 }, { "epoch": 1.1397493979837559, "grad_norm": 3.0474002361297607, "learning_rate": 1.9273492185515156e-05, "loss": 0.7876, "step": 6982 }, { "epoch": 1.13991265662626, "grad_norm": 2.851358652114868, "learning_rate": 1.9273252253255264e-05, "loss": 0.7205, "step": 6983 }, { "epoch": 1.1400759152687645, "grad_norm": 2.7502520084381104, "learning_rate": 1.9273012282876532e-05, "loss": 0.6741, "step": 6984 }, { "epoch": 1.140239173911269, "grad_norm": 2.9350759983062744, "learning_rate": 1.927277227437995e-05, "loss": 0.6275, "step": 6985 }, { "epoch": 1.1404024325537734, "grad_norm": 2.563237428665161, "learning_rate": 1.9272532227766505e-05, "loss": 0.6411, "step": 6986 }, { "epoch": 1.1405656911962776, "grad_norm": 2.4978108406066895, "learning_rate": 1.9272292143037176e-05, "loss": 0.692, "step": 6987 }, { "epoch": 1.140728949838782, "grad_norm": 2.7889516353607178, "learning_rate": 1.927205202019296e-05, "loss": 0.6534, "step": 6988 }, { "epoch": 1.1408922084812865, "grad_norm": 2.4578912258148193, "learning_rate": 1.9271811859234834e-05, "loss": 0.6279, "step": 6989 }, { "epoch": 1.141055467123791, "grad_norm": 2.928154706954956, "learning_rate": 1.9271571660163795e-05, "loss": 0.7016, "step": 6990 }, { "epoch": 1.1412187257662953, "grad_norm": 3.1830270290374756, "learning_rate": 1.9271331422980825e-05, "loss": 0.9021, "step": 6991 }, { "epoch": 1.1413819844087996, "grad_norm": 3.2897415161132812, "learning_rate": 1.9271091147686908e-05, "loss": 0.9715, "step": 6992 }, { "epoch": 1.141545243051304, "grad_norm": 2.6703763008117676, "learning_rate": 1.927085083428304e-05, "loss": 0.6383, "step": 6993 }, { "epoch": 1.1417085016938084, "grad_norm": 2.90759015083313, "learning_rate": 1.9270610482770205e-05, "loss": 0.7299, "step": 6994 }, { "epoch": 1.1418717603363129, "grad_norm": 2.6274170875549316, "learning_rate": 1.927037009314939e-05, "loss": 0.7053, "step": 6995 }, { "epoch": 1.142035018978817, "grad_norm": 2.5230963230133057, "learning_rate": 1.9270129665421585e-05, "loss": 0.6669, "step": 6996 }, { "epoch": 1.1421982776213215, "grad_norm": 2.6382970809936523, "learning_rate": 1.9269889199587776e-05, "loss": 0.7675, "step": 6997 }, { "epoch": 1.142361536263826, "grad_norm": 2.4293103218078613, "learning_rate": 1.9269648695648953e-05, "loss": 0.5537, "step": 6998 }, { "epoch": 1.1425247949063304, "grad_norm": 2.9906005859375, "learning_rate": 1.9269408153606105e-05, "loss": 0.6699, "step": 6999 }, { "epoch": 1.1426880535488348, "grad_norm": 2.8834898471832275, "learning_rate": 1.926916757346022e-05, "loss": 0.7697, "step": 7000 }, { "epoch": 1.142851312191339, "grad_norm": 2.742938280105591, "learning_rate": 1.9268926955212288e-05, "loss": 0.7347, "step": 7001 }, { "epoch": 1.1430145708338435, "grad_norm": 2.486811399459839, "learning_rate": 1.9268686298863292e-05, "loss": 0.6197, "step": 7002 }, { "epoch": 1.143177829476348, "grad_norm": 2.7424914836883545, "learning_rate": 1.9268445604414234e-05, "loss": 0.7168, "step": 7003 }, { "epoch": 1.1433410881188524, "grad_norm": 2.719062328338623, "learning_rate": 1.926820487186609e-05, "loss": 0.639, "step": 7004 }, { "epoch": 1.1435043467613566, "grad_norm": 2.574153184890747, "learning_rate": 1.926796410121986e-05, "loss": 0.6827, "step": 7005 }, { "epoch": 1.143667605403861, "grad_norm": 2.733081579208374, "learning_rate": 1.9267723292476524e-05, "loss": 0.6655, "step": 7006 }, { "epoch": 1.1438308640463655, "grad_norm": 2.834355354309082, "learning_rate": 1.926748244563708e-05, "loss": 0.7007, "step": 7007 }, { "epoch": 1.14399412268887, "grad_norm": 2.9096696376800537, "learning_rate": 1.9267241560702516e-05, "loss": 0.7902, "step": 7008 }, { "epoch": 1.1441573813313743, "grad_norm": 3.352511405944824, "learning_rate": 1.926700063767382e-05, "loss": 0.787, "step": 7009 }, { "epoch": 1.1443206399738786, "grad_norm": 2.8445327281951904, "learning_rate": 1.9266759676551982e-05, "loss": 0.6468, "step": 7010 }, { "epoch": 1.144483898616383, "grad_norm": 3.123473644256592, "learning_rate": 1.9266518677337992e-05, "loss": 0.8379, "step": 7011 }, { "epoch": 1.1446471572588874, "grad_norm": 2.7681214809417725, "learning_rate": 1.926627764003285e-05, "loss": 0.6906, "step": 7012 }, { "epoch": 1.1448104159013919, "grad_norm": 3.1981019973754883, "learning_rate": 1.9266036564637533e-05, "loss": 0.8674, "step": 7013 }, { "epoch": 1.144973674543896, "grad_norm": 2.541308641433716, "learning_rate": 1.9265795451153042e-05, "loss": 0.7102, "step": 7014 }, { "epoch": 1.1451369331864005, "grad_norm": 2.48966908454895, "learning_rate": 1.9265554299580366e-05, "loss": 0.6613, "step": 7015 }, { "epoch": 1.145300191828905, "grad_norm": 2.6996042728424072, "learning_rate": 1.9265313109920488e-05, "loss": 0.6702, "step": 7016 }, { "epoch": 1.1454634504714094, "grad_norm": 2.683258533477783, "learning_rate": 1.9265071882174413e-05, "loss": 0.6437, "step": 7017 }, { "epoch": 1.1456267091139136, "grad_norm": 2.661176919937134, "learning_rate": 1.9264830616343125e-05, "loss": 0.666, "step": 7018 }, { "epoch": 1.145789967756418, "grad_norm": 3.0383479595184326, "learning_rate": 1.9264589312427616e-05, "loss": 0.767, "step": 7019 }, { "epoch": 1.1459532263989225, "grad_norm": 2.9818122386932373, "learning_rate": 1.9264347970428876e-05, "loss": 0.7243, "step": 7020 }, { "epoch": 1.146116485041427, "grad_norm": 3.1126091480255127, "learning_rate": 1.92641065903479e-05, "loss": 0.7949, "step": 7021 }, { "epoch": 1.1462797436839312, "grad_norm": 2.765655994415283, "learning_rate": 1.9263865172185685e-05, "loss": 0.7401, "step": 7022 }, { "epoch": 1.1464430023264356, "grad_norm": 2.6660728454589844, "learning_rate": 1.9263623715943214e-05, "loss": 0.7241, "step": 7023 }, { "epoch": 1.14660626096894, "grad_norm": 3.1011552810668945, "learning_rate": 1.926338222162148e-05, "loss": 0.7531, "step": 7024 }, { "epoch": 1.1467695196114445, "grad_norm": 3.0235602855682373, "learning_rate": 1.9263140689221488e-05, "loss": 0.7476, "step": 7025 }, { "epoch": 1.146932778253949, "grad_norm": 3.1285769939422607, "learning_rate": 1.926289911874422e-05, "loss": 0.7127, "step": 7026 }, { "epoch": 1.1470960368964531, "grad_norm": 3.413921356201172, "learning_rate": 1.926265751019067e-05, "loss": 0.911, "step": 7027 }, { "epoch": 1.1472592955389576, "grad_norm": 3.275717258453369, "learning_rate": 1.926241586356183e-05, "loss": 0.6877, "step": 7028 }, { "epoch": 1.147422554181462, "grad_norm": 2.910360097885132, "learning_rate": 1.92621741788587e-05, "loss": 0.7546, "step": 7029 }, { "epoch": 1.1475858128239664, "grad_norm": 2.555135726928711, "learning_rate": 1.9261932456082265e-05, "loss": 0.6704, "step": 7030 }, { "epoch": 1.1477490714664706, "grad_norm": 3.118466377258301, "learning_rate": 1.9261690695233525e-05, "loss": 0.8513, "step": 7031 }, { "epoch": 1.147912330108975, "grad_norm": 3.337634325027466, "learning_rate": 1.9261448896313473e-05, "loss": 0.8748, "step": 7032 }, { "epoch": 1.1480755887514795, "grad_norm": 3.1215546131134033, "learning_rate": 1.92612070593231e-05, "loss": 0.8562, "step": 7033 }, { "epoch": 1.148238847393984, "grad_norm": 2.7792186737060547, "learning_rate": 1.9260965184263403e-05, "loss": 0.8061, "step": 7034 }, { "epoch": 1.1484021060364884, "grad_norm": 2.8929967880249023, "learning_rate": 1.926072327113537e-05, "loss": 0.7426, "step": 7035 }, { "epoch": 1.1485653646789926, "grad_norm": 2.807011365890503, "learning_rate": 1.926048131994001e-05, "loss": 0.7753, "step": 7036 }, { "epoch": 1.148728623321497, "grad_norm": 3.1596338748931885, "learning_rate": 1.92602393306783e-05, "loss": 0.799, "step": 7037 }, { "epoch": 1.1488918819640015, "grad_norm": 2.8000755310058594, "learning_rate": 1.9259997303351247e-05, "loss": 0.8341, "step": 7038 }, { "epoch": 1.149055140606506, "grad_norm": 2.847473382949829, "learning_rate": 1.925975523795984e-05, "loss": 0.688, "step": 7039 }, { "epoch": 1.1492183992490101, "grad_norm": 3.0179386138916016, "learning_rate": 1.9259513134505078e-05, "loss": 0.8332, "step": 7040 }, { "epoch": 1.1493816578915146, "grad_norm": 2.819434881210327, "learning_rate": 1.9259270992987947e-05, "loss": 0.6912, "step": 7041 }, { "epoch": 1.149544916534019, "grad_norm": 2.929020404815674, "learning_rate": 1.9259028813409456e-05, "loss": 0.6445, "step": 7042 }, { "epoch": 1.1497081751765235, "grad_norm": 2.7230591773986816, "learning_rate": 1.9258786595770592e-05, "loss": 0.8713, "step": 7043 }, { "epoch": 1.149871433819028, "grad_norm": 2.7566680908203125, "learning_rate": 1.9258544340072353e-05, "loss": 0.8764, "step": 7044 }, { "epoch": 1.1500346924615321, "grad_norm": 2.627713680267334, "learning_rate": 1.925830204631573e-05, "loss": 0.7274, "step": 7045 }, { "epoch": 1.1501979511040366, "grad_norm": 2.8377327919006348, "learning_rate": 1.925805971450173e-05, "loss": 0.7083, "step": 7046 }, { "epoch": 1.150361209746541, "grad_norm": 2.811715602874756, "learning_rate": 1.9257817344631337e-05, "loss": 0.7885, "step": 7047 }, { "epoch": 1.1505244683890454, "grad_norm": 2.617140054702759, "learning_rate": 1.9257574936705553e-05, "loss": 0.7739, "step": 7048 }, { "epoch": 1.1506877270315496, "grad_norm": 3.0253045558929443, "learning_rate": 1.9257332490725377e-05, "loss": 0.7192, "step": 7049 }, { "epoch": 1.150850985674054, "grad_norm": 2.6679413318634033, "learning_rate": 1.92570900066918e-05, "loss": 0.8077, "step": 7050 }, { "epoch": 1.1510142443165585, "grad_norm": 3.058009147644043, "learning_rate": 1.925684748460582e-05, "loss": 0.7569, "step": 7051 }, { "epoch": 1.151177502959063, "grad_norm": 2.544325351715088, "learning_rate": 1.925660492446844e-05, "loss": 0.7447, "step": 7052 }, { "epoch": 1.1513407616015674, "grad_norm": 2.6611363887786865, "learning_rate": 1.925636232628065e-05, "loss": 0.7231, "step": 7053 }, { "epoch": 1.1515040202440716, "grad_norm": 2.3876230716705322, "learning_rate": 1.925611969004345e-05, "loss": 0.726, "step": 7054 }, { "epoch": 1.151667278886576, "grad_norm": 2.5273056030273438, "learning_rate": 1.9255877015757835e-05, "loss": 0.687, "step": 7055 }, { "epoch": 1.1518305375290805, "grad_norm": 3.2171764373779297, "learning_rate": 1.9255634303424805e-05, "loss": 0.7949, "step": 7056 }, { "epoch": 1.151993796171585, "grad_norm": 2.435823678970337, "learning_rate": 1.925539155304536e-05, "loss": 0.674, "step": 7057 }, { "epoch": 1.1521570548140891, "grad_norm": 2.5629520416259766, "learning_rate": 1.9255148764620493e-05, "loss": 0.6409, "step": 7058 }, { "epoch": 1.1523203134565936, "grad_norm": 2.929425001144409, "learning_rate": 1.9254905938151203e-05, "loss": 0.8226, "step": 7059 }, { "epoch": 1.152483572099098, "grad_norm": 2.6447319984436035, "learning_rate": 1.9254663073638492e-05, "loss": 0.6425, "step": 7060 }, { "epoch": 1.1526468307416025, "grad_norm": 2.9209742546081543, "learning_rate": 1.9254420171083353e-05, "loss": 0.7255, "step": 7061 }, { "epoch": 1.152810089384107, "grad_norm": 3.0779340267181396, "learning_rate": 1.925417723048679e-05, "loss": 0.8061, "step": 7062 }, { "epoch": 1.1529733480266111, "grad_norm": 2.645261287689209, "learning_rate": 1.92539342518498e-05, "loss": 0.7465, "step": 7063 }, { "epoch": 1.1531366066691155, "grad_norm": 3.3048603534698486, "learning_rate": 1.9253691235173375e-05, "loss": 0.8308, "step": 7064 }, { "epoch": 1.15329986531162, "grad_norm": 3.5958621501922607, "learning_rate": 1.9253448180458522e-05, "loss": 0.8995, "step": 7065 }, { "epoch": 1.1534631239541242, "grad_norm": 3.0111372470855713, "learning_rate": 1.9253205087706238e-05, "loss": 0.7385, "step": 7066 }, { "epoch": 1.1536263825966286, "grad_norm": 2.9798574447631836, "learning_rate": 1.9252961956917522e-05, "loss": 0.7653, "step": 7067 }, { "epoch": 1.153789641239133, "grad_norm": 2.6891815662384033, "learning_rate": 1.925271878809337e-05, "loss": 0.7104, "step": 7068 }, { "epoch": 1.1539528998816375, "grad_norm": 2.8317019939422607, "learning_rate": 1.925247558123479e-05, "loss": 0.7547, "step": 7069 }, { "epoch": 1.154116158524142, "grad_norm": 2.8240246772766113, "learning_rate": 1.9252232336342773e-05, "loss": 0.7127, "step": 7070 }, { "epoch": 1.1542794171666462, "grad_norm": 2.944122552871704, "learning_rate": 1.9251989053418324e-05, "loss": 0.9, "step": 7071 }, { "epoch": 1.1544426758091506, "grad_norm": 2.8420116901397705, "learning_rate": 1.925174573246244e-05, "loss": 0.6139, "step": 7072 }, { "epoch": 1.154605934451655, "grad_norm": 2.796602487564087, "learning_rate": 1.9251502373476128e-05, "loss": 0.7265, "step": 7073 }, { "epoch": 1.1547691930941595, "grad_norm": 2.9442920684814453, "learning_rate": 1.9251258976460376e-05, "loss": 0.7167, "step": 7074 }, { "epoch": 1.1549324517366637, "grad_norm": 3.1086463928222656, "learning_rate": 1.9251015541416196e-05, "loss": 0.8507, "step": 7075 }, { "epoch": 1.1550957103791681, "grad_norm": 2.9856667518615723, "learning_rate": 1.925077206834458e-05, "loss": 0.7343, "step": 7076 }, { "epoch": 1.1552589690216726, "grad_norm": 2.5539767742156982, "learning_rate": 1.9250528557246538e-05, "loss": 0.5906, "step": 7077 }, { "epoch": 1.155422227664177, "grad_norm": 2.709982395172119, "learning_rate": 1.9250285008123063e-05, "loss": 0.6156, "step": 7078 }, { "epoch": 1.1555854863066815, "grad_norm": 2.7849812507629395, "learning_rate": 1.925004142097516e-05, "loss": 0.7124, "step": 7079 }, { "epoch": 1.1557487449491857, "grad_norm": 2.9380154609680176, "learning_rate": 1.924979779580383e-05, "loss": 0.7876, "step": 7080 }, { "epoch": 1.15591200359169, "grad_norm": 2.61957049369812, "learning_rate": 1.9249554132610072e-05, "loss": 0.7703, "step": 7081 }, { "epoch": 1.1560752622341945, "grad_norm": 2.4348487854003906, "learning_rate": 1.9249310431394893e-05, "loss": 0.6389, "step": 7082 }, { "epoch": 1.156238520876699, "grad_norm": 2.4292514324188232, "learning_rate": 1.9249066692159287e-05, "loss": 0.6949, "step": 7083 }, { "epoch": 1.1564017795192032, "grad_norm": 3.1548166275024414, "learning_rate": 1.9248822914904265e-05, "loss": 0.6907, "step": 7084 }, { "epoch": 1.1565650381617076, "grad_norm": 2.8859195709228516, "learning_rate": 1.924857909963082e-05, "loss": 0.6673, "step": 7085 }, { "epoch": 1.156728296804212, "grad_norm": 2.2798690795898438, "learning_rate": 1.924833524633996e-05, "loss": 0.4969, "step": 7086 }, { "epoch": 1.1568915554467165, "grad_norm": 2.760176181793213, "learning_rate": 1.924809135503269e-05, "loss": 0.7224, "step": 7087 }, { "epoch": 1.157054814089221, "grad_norm": 2.5177290439605713, "learning_rate": 1.9247847425710002e-05, "loss": 0.6021, "step": 7088 }, { "epoch": 1.1572180727317252, "grad_norm": 2.9175350666046143, "learning_rate": 1.9247603458372906e-05, "loss": 0.7848, "step": 7089 }, { "epoch": 1.1573813313742296, "grad_norm": 2.692246198654175, "learning_rate": 1.9247359453022408e-05, "loss": 0.7373, "step": 7090 }, { "epoch": 1.157544590016734, "grad_norm": 2.695026159286499, "learning_rate": 1.9247115409659504e-05, "loss": 0.6987, "step": 7091 }, { "epoch": 1.1577078486592385, "grad_norm": 2.849781036376953, "learning_rate": 1.92468713282852e-05, "loss": 0.8446, "step": 7092 }, { "epoch": 1.1578711073017427, "grad_norm": 2.752875328063965, "learning_rate": 1.92466272089005e-05, "loss": 0.8762, "step": 7093 }, { "epoch": 1.1580343659442471, "grad_norm": 3.0987701416015625, "learning_rate": 1.924638305150641e-05, "loss": 0.8627, "step": 7094 }, { "epoch": 1.1581976245867516, "grad_norm": 3.6136627197265625, "learning_rate": 1.9246138856103927e-05, "loss": 0.9658, "step": 7095 }, { "epoch": 1.158360883229256, "grad_norm": 2.919220209121704, "learning_rate": 1.924589462269406e-05, "loss": 0.8229, "step": 7096 }, { "epoch": 1.1585241418717604, "grad_norm": 2.4707608222961426, "learning_rate": 1.9245650351277812e-05, "loss": 0.5365, "step": 7097 }, { "epoch": 1.1586874005142647, "grad_norm": 2.850250005722046, "learning_rate": 1.9245406041856184e-05, "loss": 0.6649, "step": 7098 }, { "epoch": 1.158850659156769, "grad_norm": 3.541367292404175, "learning_rate": 1.9245161694430187e-05, "loss": 0.7877, "step": 7099 }, { "epoch": 1.1590139177992735, "grad_norm": 2.5310018062591553, "learning_rate": 1.9244917309000817e-05, "loss": 0.6048, "step": 7100 }, { "epoch": 1.159177176441778, "grad_norm": 2.421928882598877, "learning_rate": 1.9244672885569083e-05, "loss": 0.6312, "step": 7101 }, { "epoch": 1.1593404350842822, "grad_norm": 3.039600372314453, "learning_rate": 1.924442842413599e-05, "loss": 0.8045, "step": 7102 }, { "epoch": 1.1595036937267866, "grad_norm": 2.600914478302002, "learning_rate": 1.9244183924702543e-05, "loss": 0.7539, "step": 7103 }, { "epoch": 1.159666952369291, "grad_norm": 2.529710054397583, "learning_rate": 1.924393938726975e-05, "loss": 0.7384, "step": 7104 }, { "epoch": 1.1598302110117955, "grad_norm": 2.935839891433716, "learning_rate": 1.924369481183861e-05, "loss": 0.7803, "step": 7105 }, { "epoch": 1.1599934696543, "grad_norm": 2.7059450149536133, "learning_rate": 1.924345019841013e-05, "loss": 0.6446, "step": 7106 }, { "epoch": 1.1601567282968042, "grad_norm": 2.5808920860290527, "learning_rate": 1.9243205546985315e-05, "loss": 0.6248, "step": 7107 }, { "epoch": 1.1603199869393086, "grad_norm": 2.6545748710632324, "learning_rate": 1.9242960857565178e-05, "loss": 0.6949, "step": 7108 }, { "epoch": 1.160483245581813, "grad_norm": 3.1271932125091553, "learning_rate": 1.924271613015071e-05, "loss": 0.841, "step": 7109 }, { "epoch": 1.1606465042243173, "grad_norm": 2.9675564765930176, "learning_rate": 1.9242471364742932e-05, "loss": 0.7302, "step": 7110 }, { "epoch": 1.1608097628668217, "grad_norm": 2.3091254234313965, "learning_rate": 1.9242226561342844e-05, "loss": 0.6174, "step": 7111 }, { "epoch": 1.1609730215093261, "grad_norm": 3.5849034786224365, "learning_rate": 1.924198171995145e-05, "loss": 0.8806, "step": 7112 }, { "epoch": 1.1611362801518306, "grad_norm": 2.7458951473236084, "learning_rate": 1.924173684056976e-05, "loss": 0.7574, "step": 7113 }, { "epoch": 1.161299538794335, "grad_norm": 2.9075629711151123, "learning_rate": 1.924149192319878e-05, "loss": 0.7947, "step": 7114 }, { "epoch": 1.1614627974368392, "grad_norm": 3.0466508865356445, "learning_rate": 1.9241246967839514e-05, "loss": 0.7628, "step": 7115 }, { "epoch": 1.1616260560793437, "grad_norm": 3.930722713470459, "learning_rate": 1.9241001974492978e-05, "loss": 0.8797, "step": 7116 }, { "epoch": 1.161789314721848, "grad_norm": 3.01228928565979, "learning_rate": 1.9240756943160165e-05, "loss": 0.7269, "step": 7117 }, { "epoch": 1.1619525733643525, "grad_norm": 2.936753273010254, "learning_rate": 1.9240511873842092e-05, "loss": 0.6744, "step": 7118 }, { "epoch": 1.1621158320068568, "grad_norm": 2.4547572135925293, "learning_rate": 1.9240266766539765e-05, "loss": 0.5446, "step": 7119 }, { "epoch": 1.1622790906493612, "grad_norm": 2.5012564659118652, "learning_rate": 1.9240021621254187e-05, "loss": 0.6169, "step": 7120 }, { "epoch": 1.1624423492918656, "grad_norm": 2.860569715499878, "learning_rate": 1.9239776437986374e-05, "loss": 0.7226, "step": 7121 }, { "epoch": 1.16260560793437, "grad_norm": 2.4643571376800537, "learning_rate": 1.9239531216737326e-05, "loss": 0.6086, "step": 7122 }, { "epoch": 1.1627688665768745, "grad_norm": 2.86350417137146, "learning_rate": 1.9239285957508052e-05, "loss": 0.642, "step": 7123 }, { "epoch": 1.1629321252193787, "grad_norm": 2.9204578399658203, "learning_rate": 1.9239040660299566e-05, "loss": 0.7034, "step": 7124 }, { "epoch": 1.1630953838618832, "grad_norm": 3.275055408477783, "learning_rate": 1.9238795325112867e-05, "loss": 0.7097, "step": 7125 }, { "epoch": 1.1632586425043876, "grad_norm": 3.2557406425476074, "learning_rate": 1.9238549951948975e-05, "loss": 0.9427, "step": 7126 }, { "epoch": 1.163421901146892, "grad_norm": 3.090203285217285, "learning_rate": 1.923830454080889e-05, "loss": 0.7339, "step": 7127 }, { "epoch": 1.1635851597893963, "grad_norm": 2.9899420738220215, "learning_rate": 1.9238059091693623e-05, "loss": 0.7318, "step": 7128 }, { "epoch": 1.1637484184319007, "grad_norm": 2.5115749835968018, "learning_rate": 1.9237813604604186e-05, "loss": 0.6096, "step": 7129 }, { "epoch": 1.1639116770744051, "grad_norm": 2.872370958328247, "learning_rate": 1.923756807954158e-05, "loss": 0.7578, "step": 7130 }, { "epoch": 1.1640749357169096, "grad_norm": 2.5576400756835938, "learning_rate": 1.9237322516506823e-05, "loss": 0.6657, "step": 7131 }, { "epoch": 1.164238194359414, "grad_norm": 2.734827756881714, "learning_rate": 1.9237076915500925e-05, "loss": 0.6764, "step": 7132 }, { "epoch": 1.1644014530019182, "grad_norm": 2.837106227874756, "learning_rate": 1.9236831276524883e-05, "loss": 0.7244, "step": 7133 }, { "epoch": 1.1645647116444227, "grad_norm": 2.573322296142578, "learning_rate": 1.9236585599579723e-05, "loss": 0.6414, "step": 7134 }, { "epoch": 1.164727970286927, "grad_norm": 2.8867621421813965, "learning_rate": 1.9236339884666442e-05, "loss": 0.6606, "step": 7135 }, { "epoch": 1.1648912289294315, "grad_norm": 2.590827465057373, "learning_rate": 1.923609413178606e-05, "loss": 0.7401, "step": 7136 }, { "epoch": 1.1650544875719357, "grad_norm": 3.075345516204834, "learning_rate": 1.923584834093958e-05, "loss": 0.7364, "step": 7137 }, { "epoch": 1.1652177462144402, "grad_norm": 3.0186564922332764, "learning_rate": 1.9235602512128014e-05, "loss": 0.744, "step": 7138 }, { "epoch": 1.1653810048569446, "grad_norm": 2.5110461711883545, "learning_rate": 1.9235356645352375e-05, "loss": 0.666, "step": 7139 }, { "epoch": 1.165544263499449, "grad_norm": 2.9105658531188965, "learning_rate": 1.9235110740613668e-05, "loss": 0.7984, "step": 7140 }, { "epoch": 1.1657075221419535, "grad_norm": 2.690260171890259, "learning_rate": 1.9234864797912913e-05, "loss": 0.6107, "step": 7141 }, { "epoch": 1.1658707807844577, "grad_norm": 2.3555026054382324, "learning_rate": 1.9234618817251113e-05, "loss": 0.6082, "step": 7142 }, { "epoch": 1.1660340394269622, "grad_norm": 2.6591031551361084, "learning_rate": 1.9234372798629283e-05, "loss": 0.665, "step": 7143 }, { "epoch": 1.1661972980694666, "grad_norm": 3.127490997314453, "learning_rate": 1.9234126742048435e-05, "loss": 0.835, "step": 7144 }, { "epoch": 1.166360556711971, "grad_norm": 3.0739638805389404, "learning_rate": 1.9233880647509577e-05, "loss": 0.6826, "step": 7145 }, { "epoch": 1.1665238153544752, "grad_norm": 2.406947374343872, "learning_rate": 1.9233634515013723e-05, "loss": 0.5833, "step": 7146 }, { "epoch": 1.1666870739969797, "grad_norm": 2.7305736541748047, "learning_rate": 1.923338834456188e-05, "loss": 0.6324, "step": 7147 }, { "epoch": 1.1668503326394841, "grad_norm": 2.62190318107605, "learning_rate": 1.9233142136155073e-05, "loss": 0.7181, "step": 7148 }, { "epoch": 1.1670135912819886, "grad_norm": 2.8724074363708496, "learning_rate": 1.9232895889794297e-05, "loss": 0.7263, "step": 7149 }, { "epoch": 1.167176849924493, "grad_norm": 3.5707638263702393, "learning_rate": 1.9232649605480574e-05, "loss": 0.7217, "step": 7150 }, { "epoch": 1.1673401085669972, "grad_norm": 2.3833937644958496, "learning_rate": 1.9232403283214916e-05, "loss": 0.6366, "step": 7151 }, { "epoch": 1.1675033672095017, "grad_norm": 2.622889518737793, "learning_rate": 1.9232156922998336e-05, "loss": 0.6914, "step": 7152 }, { "epoch": 1.167666625852006, "grad_norm": 2.5778608322143555, "learning_rate": 1.923191052483184e-05, "loss": 0.6436, "step": 7153 }, { "epoch": 1.1678298844945103, "grad_norm": 2.4157497882843018, "learning_rate": 1.9231664088716448e-05, "loss": 0.6251, "step": 7154 }, { "epoch": 1.1679931431370147, "grad_norm": 2.4105923175811768, "learning_rate": 1.923141761465317e-05, "loss": 0.6369, "step": 7155 }, { "epoch": 1.1681564017795192, "grad_norm": 2.9354605674743652, "learning_rate": 1.9231171102643023e-05, "loss": 0.7247, "step": 7156 }, { "epoch": 1.1683196604220236, "grad_norm": 2.90049147605896, "learning_rate": 1.9230924552687018e-05, "loss": 0.713, "step": 7157 }, { "epoch": 1.168482919064528, "grad_norm": 2.701944589614868, "learning_rate": 1.9230677964786162e-05, "loss": 0.7341, "step": 7158 }, { "epoch": 1.1686461777070323, "grad_norm": 3.69744873046875, "learning_rate": 1.9230431338941477e-05, "loss": 0.8933, "step": 7159 }, { "epoch": 1.1688094363495367, "grad_norm": 3.04203724861145, "learning_rate": 1.9230184675153974e-05, "loss": 0.6511, "step": 7160 }, { "epoch": 1.1689726949920412, "grad_norm": 2.76979660987854, "learning_rate": 1.9229937973424667e-05, "loss": 0.6447, "step": 7161 }, { "epoch": 1.1691359536345456, "grad_norm": 2.470132350921631, "learning_rate": 1.9229691233754573e-05, "loss": 0.5068, "step": 7162 }, { "epoch": 1.1692992122770498, "grad_norm": 3.2748265266418457, "learning_rate": 1.92294444561447e-05, "loss": 0.8031, "step": 7163 }, { "epoch": 1.1694624709195542, "grad_norm": 2.8661859035491943, "learning_rate": 1.9229197640596067e-05, "loss": 0.7251, "step": 7164 }, { "epoch": 1.1696257295620587, "grad_norm": 2.8905234336853027, "learning_rate": 1.9228950787109684e-05, "loss": 0.6932, "step": 7165 }, { "epoch": 1.1697889882045631, "grad_norm": 3.1711015701293945, "learning_rate": 1.9228703895686577e-05, "loss": 0.8553, "step": 7166 }, { "epoch": 1.1699522468470676, "grad_norm": 2.647613525390625, "learning_rate": 1.9228456966327743e-05, "loss": 0.5917, "step": 7167 }, { "epoch": 1.1701155054895718, "grad_norm": 2.708564043045044, "learning_rate": 1.9228209999034214e-05, "loss": 0.6387, "step": 7168 }, { "epoch": 1.1702787641320762, "grad_norm": 2.5493404865264893, "learning_rate": 1.9227962993806998e-05, "loss": 0.5568, "step": 7169 }, { "epoch": 1.1704420227745806, "grad_norm": 2.8255295753479004, "learning_rate": 1.9227715950647106e-05, "loss": 0.6315, "step": 7170 }, { "epoch": 1.170605281417085, "grad_norm": 2.5979034900665283, "learning_rate": 1.9227468869555564e-05, "loss": 0.6385, "step": 7171 }, { "epoch": 1.1707685400595893, "grad_norm": 3.1569790840148926, "learning_rate": 1.922722175053338e-05, "loss": 0.7348, "step": 7172 }, { "epoch": 1.1709317987020937, "grad_norm": 2.841447353363037, "learning_rate": 1.922697459358157e-05, "loss": 0.6353, "step": 7173 }, { "epoch": 1.1710950573445982, "grad_norm": 2.979381799697876, "learning_rate": 1.922672739870115e-05, "loss": 0.6702, "step": 7174 }, { "epoch": 1.1712583159871026, "grad_norm": 2.5551788806915283, "learning_rate": 1.922648016589314e-05, "loss": 0.6393, "step": 7175 }, { "epoch": 1.171421574629607, "grad_norm": 2.537391185760498, "learning_rate": 1.922623289515855e-05, "loss": 0.6197, "step": 7176 }, { "epoch": 1.1715848332721113, "grad_norm": 2.901660442352295, "learning_rate": 1.9225985586498407e-05, "loss": 0.7096, "step": 7177 }, { "epoch": 1.1717480919146157, "grad_norm": 2.99902081489563, "learning_rate": 1.9225738239913716e-05, "loss": 0.8181, "step": 7178 }, { "epoch": 1.1719113505571201, "grad_norm": 3.1051926612854004, "learning_rate": 1.92254908554055e-05, "loss": 0.767, "step": 7179 }, { "epoch": 1.1720746091996246, "grad_norm": 2.7944729328155518, "learning_rate": 1.9225243432974777e-05, "loss": 0.6839, "step": 7180 }, { "epoch": 1.1722378678421288, "grad_norm": 3.0348258018493652, "learning_rate": 1.922499597262256e-05, "loss": 0.8393, "step": 7181 }, { "epoch": 1.1724011264846332, "grad_norm": 2.8821375370025635, "learning_rate": 1.9224748474349865e-05, "loss": 0.7688, "step": 7182 }, { "epoch": 1.1725643851271377, "grad_norm": 3.0784175395965576, "learning_rate": 1.9224500938157716e-05, "loss": 0.692, "step": 7183 }, { "epoch": 1.1727276437696421, "grad_norm": 2.4669902324676514, "learning_rate": 1.9224253364047124e-05, "loss": 0.5994, "step": 7184 }, { "epoch": 1.1728909024121466, "grad_norm": 2.91676664352417, "learning_rate": 1.922400575201911e-05, "loss": 0.8008, "step": 7185 }, { "epoch": 1.1730541610546508, "grad_norm": 2.815297842025757, "learning_rate": 1.9223758102074696e-05, "loss": 0.6841, "step": 7186 }, { "epoch": 1.1732174196971552, "grad_norm": 2.757657289505005, "learning_rate": 1.922351041421489e-05, "loss": 0.6715, "step": 7187 }, { "epoch": 1.1733806783396596, "grad_norm": 3.0093154907226562, "learning_rate": 1.9223262688440714e-05, "loss": 0.7394, "step": 7188 }, { "epoch": 1.173543936982164, "grad_norm": 2.846205711364746, "learning_rate": 1.922301492475319e-05, "loss": 0.7766, "step": 7189 }, { "epoch": 1.1737071956246683, "grad_norm": 2.826772689819336, "learning_rate": 1.9222767123153337e-05, "loss": 0.8705, "step": 7190 }, { "epoch": 1.1738704542671727, "grad_norm": 2.9278769493103027, "learning_rate": 1.922251928364217e-05, "loss": 0.6739, "step": 7191 }, { "epoch": 1.1740337129096772, "grad_norm": 2.4154610633850098, "learning_rate": 1.9222271406220706e-05, "loss": 0.6641, "step": 7192 }, { "epoch": 1.1741969715521816, "grad_norm": 2.585373640060425, "learning_rate": 1.922202349088997e-05, "loss": 0.5517, "step": 7193 }, { "epoch": 1.174360230194686, "grad_norm": 2.6100854873657227, "learning_rate": 1.922177553765097e-05, "loss": 0.6896, "step": 7194 }, { "epoch": 1.1745234888371903, "grad_norm": 2.7817063331604004, "learning_rate": 1.922152754650474e-05, "loss": 0.7336, "step": 7195 }, { "epoch": 1.1746867474796947, "grad_norm": 2.547304630279541, "learning_rate": 1.9221279517452293e-05, "loss": 0.7059, "step": 7196 }, { "epoch": 1.1748500061221991, "grad_norm": 2.5134453773498535, "learning_rate": 1.922103145049464e-05, "loss": 0.7638, "step": 7197 }, { "epoch": 1.1750132647647034, "grad_norm": 2.6186633110046387, "learning_rate": 1.9220783345632815e-05, "loss": 0.7308, "step": 7198 }, { "epoch": 1.1751765234072078, "grad_norm": 3.0020971298217773, "learning_rate": 1.9220535202867832e-05, "loss": 0.6368, "step": 7199 }, { "epoch": 1.1753397820497122, "grad_norm": 2.469527006149292, "learning_rate": 1.9220287022200707e-05, "loss": 0.6768, "step": 7200 }, { "epoch": 1.1755030406922167, "grad_norm": 2.2092204093933105, "learning_rate": 1.9220038803632466e-05, "loss": 0.5959, "step": 7201 }, { "epoch": 1.175666299334721, "grad_norm": 2.9300789833068848, "learning_rate": 1.9219790547164122e-05, "loss": 0.7314, "step": 7202 }, { "epoch": 1.1758295579772253, "grad_norm": 3.181309700012207, "learning_rate": 1.9219542252796705e-05, "loss": 0.6845, "step": 7203 }, { "epoch": 1.1759928166197298, "grad_norm": 2.9485416412353516, "learning_rate": 1.921929392053123e-05, "loss": 0.7698, "step": 7204 }, { "epoch": 1.1761560752622342, "grad_norm": 2.5065841674804688, "learning_rate": 1.9219045550368724e-05, "loss": 0.6187, "step": 7205 }, { "epoch": 1.1763193339047386, "grad_norm": 2.670994281768799, "learning_rate": 1.9218797142310193e-05, "loss": 0.789, "step": 7206 }, { "epoch": 1.1764825925472429, "grad_norm": 3.312346935272217, "learning_rate": 1.9218548696356672e-05, "loss": 0.6982, "step": 7207 }, { "epoch": 1.1766458511897473, "grad_norm": 3.1810660362243652, "learning_rate": 1.921830021250918e-05, "loss": 0.7912, "step": 7208 }, { "epoch": 1.1768091098322517, "grad_norm": 3.117018699645996, "learning_rate": 1.921805169076874e-05, "loss": 0.7346, "step": 7209 }, { "epoch": 1.1769723684747562, "grad_norm": 3.095189332962036, "learning_rate": 1.9217803131136363e-05, "loss": 0.8798, "step": 7210 }, { "epoch": 1.1771356271172606, "grad_norm": 3.13330078125, "learning_rate": 1.9217554533613082e-05, "loss": 0.8201, "step": 7211 }, { "epoch": 1.1772988857597648, "grad_norm": 2.981398344039917, "learning_rate": 1.9217305898199914e-05, "loss": 0.706, "step": 7212 }, { "epoch": 1.1774621444022693, "grad_norm": 2.8811113834381104, "learning_rate": 1.9217057224897878e-05, "loss": 0.5983, "step": 7213 }, { "epoch": 1.1776254030447737, "grad_norm": 2.7091386318206787, "learning_rate": 1.9216808513708004e-05, "loss": 0.6937, "step": 7214 }, { "epoch": 1.1777886616872781, "grad_norm": 2.6364059448242188, "learning_rate": 1.921655976463131e-05, "loss": 0.681, "step": 7215 }, { "epoch": 1.1779519203297824, "grad_norm": 2.7223968505859375, "learning_rate": 1.921631097766882e-05, "loss": 0.6383, "step": 7216 }, { "epoch": 1.1781151789722868, "grad_norm": 3.1321372985839844, "learning_rate": 1.9216062152821556e-05, "loss": 0.7038, "step": 7217 }, { "epoch": 1.1782784376147912, "grad_norm": 2.7450361251831055, "learning_rate": 1.9215813290090538e-05, "loss": 0.6932, "step": 7218 }, { "epoch": 1.1784416962572957, "grad_norm": 2.8053672313690186, "learning_rate": 1.9215564389476792e-05, "loss": 0.7251, "step": 7219 }, { "epoch": 1.1786049548998, "grad_norm": 3.013401508331299, "learning_rate": 1.9215315450981337e-05, "loss": 0.7682, "step": 7220 }, { "epoch": 1.1787682135423043, "grad_norm": 2.724453926086426, "learning_rate": 1.9215066474605204e-05, "loss": 0.7962, "step": 7221 }, { "epoch": 1.1789314721848088, "grad_norm": 2.8656387329101562, "learning_rate": 1.9214817460349414e-05, "loss": 0.694, "step": 7222 }, { "epoch": 1.1790947308273132, "grad_norm": 2.954951047897339, "learning_rate": 1.9214568408214986e-05, "loss": 0.7244, "step": 7223 }, { "epoch": 1.1792579894698176, "grad_norm": 3.1392219066619873, "learning_rate": 1.9214319318202948e-05, "loss": 0.7414, "step": 7224 }, { "epoch": 1.1794212481123219, "grad_norm": 2.6351935863494873, "learning_rate": 1.921407019031432e-05, "loss": 0.6749, "step": 7225 }, { "epoch": 1.1795845067548263, "grad_norm": 2.846510648727417, "learning_rate": 1.921382102455013e-05, "loss": 0.7542, "step": 7226 }, { "epoch": 1.1797477653973307, "grad_norm": 3.042698383331299, "learning_rate": 1.9213571820911402e-05, "loss": 0.6387, "step": 7227 }, { "epoch": 1.1799110240398352, "grad_norm": 3.0564913749694824, "learning_rate": 1.9213322579399157e-05, "loss": 0.719, "step": 7228 }, { "epoch": 1.1800742826823396, "grad_norm": 3.4942843914031982, "learning_rate": 1.9213073300014425e-05, "loss": 0.823, "step": 7229 }, { "epoch": 1.1802375413248438, "grad_norm": 2.9543769359588623, "learning_rate": 1.9212823982758227e-05, "loss": 0.6376, "step": 7230 }, { "epoch": 1.1804007999673483, "grad_norm": 2.9048781394958496, "learning_rate": 1.9212574627631585e-05, "loss": 0.6957, "step": 7231 }, { "epoch": 1.1805640586098527, "grad_norm": 3.3352596759796143, "learning_rate": 1.921232523463553e-05, "loss": 0.7329, "step": 7232 }, { "epoch": 1.1807273172523571, "grad_norm": 3.154846668243408, "learning_rate": 1.9212075803771085e-05, "loss": 0.7978, "step": 7233 }, { "epoch": 1.1808905758948613, "grad_norm": 2.795020818710327, "learning_rate": 1.9211826335039273e-05, "loss": 0.6955, "step": 7234 }, { "epoch": 1.1810538345373658, "grad_norm": 3.0664255619049072, "learning_rate": 1.921157682844112e-05, "loss": 0.8269, "step": 7235 }, { "epoch": 1.1812170931798702, "grad_norm": 2.989920139312744, "learning_rate": 1.9211327283977656e-05, "loss": 0.7266, "step": 7236 }, { "epoch": 1.1813803518223747, "grad_norm": 2.6432995796203613, "learning_rate": 1.9211077701649903e-05, "loss": 0.6464, "step": 7237 }, { "epoch": 1.181543610464879, "grad_norm": 2.765723466873169, "learning_rate": 1.9210828081458887e-05, "loss": 0.5741, "step": 7238 }, { "epoch": 1.1817068691073833, "grad_norm": 3.465468168258667, "learning_rate": 1.9210578423405636e-05, "loss": 0.8565, "step": 7239 }, { "epoch": 1.1818701277498878, "grad_norm": 3.0978190898895264, "learning_rate": 1.9210328727491175e-05, "loss": 0.7076, "step": 7240 }, { "epoch": 1.1820333863923922, "grad_norm": 2.8732736110687256, "learning_rate": 1.9210078993716527e-05, "loss": 0.6227, "step": 7241 }, { "epoch": 1.1821966450348966, "grad_norm": 2.978782892227173, "learning_rate": 1.9209829222082724e-05, "loss": 0.6213, "step": 7242 }, { "epoch": 1.1823599036774008, "grad_norm": 2.5751070976257324, "learning_rate": 1.920957941259079e-05, "loss": 0.5931, "step": 7243 }, { "epoch": 1.1825231623199053, "grad_norm": 2.7885279655456543, "learning_rate": 1.9209329565241754e-05, "loss": 0.6867, "step": 7244 }, { "epoch": 1.1826864209624097, "grad_norm": 2.609163999557495, "learning_rate": 1.9209079680036642e-05, "loss": 0.7702, "step": 7245 }, { "epoch": 1.1828496796049142, "grad_norm": 2.536207675933838, "learning_rate": 1.920882975697648e-05, "loss": 0.6238, "step": 7246 }, { "epoch": 1.1830129382474184, "grad_norm": 2.371635675430298, "learning_rate": 1.9208579796062296e-05, "loss": 0.638, "step": 7247 }, { "epoch": 1.1831761968899228, "grad_norm": 2.9354677200317383, "learning_rate": 1.9208329797295115e-05, "loss": 0.8793, "step": 7248 }, { "epoch": 1.1833394555324273, "grad_norm": 3.1639864444732666, "learning_rate": 1.920807976067597e-05, "loss": 0.6954, "step": 7249 }, { "epoch": 1.1835027141749317, "grad_norm": 2.778263568878174, "learning_rate": 1.9207829686205882e-05, "loss": 0.7118, "step": 7250 }, { "epoch": 1.183665972817436, "grad_norm": 2.8718020915985107, "learning_rate": 1.9207579573885887e-05, "loss": 0.7611, "step": 7251 }, { "epoch": 1.1838292314599403, "grad_norm": 3.2046947479248047, "learning_rate": 1.9207329423717008e-05, "loss": 0.799, "step": 7252 }, { "epoch": 1.1839924901024448, "grad_norm": 2.4452691078186035, "learning_rate": 1.9207079235700277e-05, "loss": 0.5386, "step": 7253 }, { "epoch": 1.1841557487449492, "grad_norm": 2.7030551433563232, "learning_rate": 1.9206829009836715e-05, "loss": 0.6094, "step": 7254 }, { "epoch": 1.1843190073874537, "grad_norm": 2.8923206329345703, "learning_rate": 1.920657874612736e-05, "loss": 0.6763, "step": 7255 }, { "epoch": 1.1844822660299579, "grad_norm": 2.5908520221710205, "learning_rate": 1.9206328444573234e-05, "loss": 0.7125, "step": 7256 }, { "epoch": 1.1846455246724623, "grad_norm": 3.082576036453247, "learning_rate": 1.920607810517537e-05, "loss": 0.7554, "step": 7257 }, { "epoch": 1.1848087833149668, "grad_norm": 2.648062229156494, "learning_rate": 1.920582772793479e-05, "loss": 0.679, "step": 7258 }, { "epoch": 1.1849720419574712, "grad_norm": 2.9544379711151123, "learning_rate": 1.9205577312852532e-05, "loss": 0.6595, "step": 7259 }, { "epoch": 1.1851353005999754, "grad_norm": 3.1519246101379395, "learning_rate": 1.9205326859929622e-05, "loss": 0.7772, "step": 7260 }, { "epoch": 1.1852985592424798, "grad_norm": 2.973468542098999, "learning_rate": 1.9205076369167087e-05, "loss": 0.6396, "step": 7261 }, { "epoch": 1.1854618178849843, "grad_norm": 2.6391425132751465, "learning_rate": 1.920482584056596e-05, "loss": 0.6262, "step": 7262 }, { "epoch": 1.1856250765274887, "grad_norm": 2.7129344940185547, "learning_rate": 1.920457527412727e-05, "loss": 0.5998, "step": 7263 }, { "epoch": 1.1857883351699932, "grad_norm": 3.478609085083008, "learning_rate": 1.920432466985205e-05, "loss": 0.7534, "step": 7264 }, { "epoch": 1.1859515938124974, "grad_norm": 3.0973970890045166, "learning_rate": 1.920407402774132e-05, "loss": 0.862, "step": 7265 }, { "epoch": 1.1861148524550018, "grad_norm": 3.4001944065093994, "learning_rate": 1.920382334779612e-05, "loss": 0.7228, "step": 7266 }, { "epoch": 1.1862781110975062, "grad_norm": 3.0442163944244385, "learning_rate": 1.920357263001748e-05, "loss": 0.7315, "step": 7267 }, { "epoch": 1.1864413697400107, "grad_norm": 2.675078868865967, "learning_rate": 1.9203321874406423e-05, "loss": 0.6989, "step": 7268 }, { "epoch": 1.186604628382515, "grad_norm": 2.8125076293945312, "learning_rate": 1.920307108096399e-05, "loss": 0.7078, "step": 7269 }, { "epoch": 1.1867678870250193, "grad_norm": 2.8589372634887695, "learning_rate": 1.92028202496912e-05, "loss": 0.6646, "step": 7270 }, { "epoch": 1.1869311456675238, "grad_norm": 3.199906587600708, "learning_rate": 1.9202569380589098e-05, "loss": 0.6705, "step": 7271 }, { "epoch": 1.1870944043100282, "grad_norm": 2.4682130813598633, "learning_rate": 1.9202318473658707e-05, "loss": 0.6143, "step": 7272 }, { "epoch": 1.1872576629525327, "grad_norm": 2.464200973510742, "learning_rate": 1.9202067528901057e-05, "loss": 0.6533, "step": 7273 }, { "epoch": 1.1874209215950369, "grad_norm": 2.724949836730957, "learning_rate": 1.9201816546317183e-05, "loss": 0.5649, "step": 7274 }, { "epoch": 1.1875841802375413, "grad_norm": 2.618117570877075, "learning_rate": 1.9201565525908114e-05, "loss": 0.679, "step": 7275 }, { "epoch": 1.1877474388800457, "grad_norm": 2.610116958618164, "learning_rate": 1.9201314467674887e-05, "loss": 0.6981, "step": 7276 }, { "epoch": 1.1879106975225502, "grad_norm": 2.857206344604492, "learning_rate": 1.9201063371618528e-05, "loss": 0.7041, "step": 7277 }, { "epoch": 1.1880739561650544, "grad_norm": 2.900266408920288, "learning_rate": 1.920081223774007e-05, "loss": 0.6303, "step": 7278 }, { "epoch": 1.1882372148075588, "grad_norm": 2.3774871826171875, "learning_rate": 1.9200561066040554e-05, "loss": 0.582, "step": 7279 }, { "epoch": 1.1884004734500633, "grad_norm": 2.5422120094299316, "learning_rate": 1.9200309856520998e-05, "loss": 0.613, "step": 7280 }, { "epoch": 1.1885637320925677, "grad_norm": 3.015773057937622, "learning_rate": 1.9200058609182444e-05, "loss": 0.7162, "step": 7281 }, { "epoch": 1.1887269907350722, "grad_norm": 2.900010108947754, "learning_rate": 1.9199807324025926e-05, "loss": 0.6403, "step": 7282 }, { "epoch": 1.1888902493775764, "grad_norm": 2.2079741954803467, "learning_rate": 1.9199556001052474e-05, "loss": 0.5635, "step": 7283 }, { "epoch": 1.1890535080200808, "grad_norm": 2.5590670108795166, "learning_rate": 1.9199304640263116e-05, "loss": 0.6442, "step": 7284 }, { "epoch": 1.1892167666625852, "grad_norm": 2.3389129638671875, "learning_rate": 1.9199053241658895e-05, "loss": 0.5373, "step": 7285 }, { "epoch": 1.1893800253050897, "grad_norm": 2.979280471801758, "learning_rate": 1.919880180524084e-05, "loss": 0.7133, "step": 7286 }, { "epoch": 1.189543283947594, "grad_norm": 2.962704658508301, "learning_rate": 1.9198550331009982e-05, "loss": 0.6885, "step": 7287 }, { "epoch": 1.1897065425900983, "grad_norm": 2.985896587371826, "learning_rate": 1.9198298818967354e-05, "loss": 0.7571, "step": 7288 }, { "epoch": 1.1898698012326028, "grad_norm": 2.450269937515259, "learning_rate": 1.9198047269114e-05, "loss": 0.6976, "step": 7289 }, { "epoch": 1.1900330598751072, "grad_norm": 2.745884656906128, "learning_rate": 1.919779568145094e-05, "loss": 0.7265, "step": 7290 }, { "epoch": 1.1901963185176114, "grad_norm": 3.5347321033477783, "learning_rate": 1.919754405597922e-05, "loss": 0.8493, "step": 7291 }, { "epoch": 1.1903595771601159, "grad_norm": 3.5059902667999268, "learning_rate": 1.9197292392699865e-05, "loss": 0.8065, "step": 7292 }, { "epoch": 1.1905228358026203, "grad_norm": 3.7617874145507812, "learning_rate": 1.9197040691613916e-05, "loss": 0.7845, "step": 7293 }, { "epoch": 1.1906860944451247, "grad_norm": 2.675544023513794, "learning_rate": 1.9196788952722404e-05, "loss": 0.6807, "step": 7294 }, { "epoch": 1.190849353087629, "grad_norm": 2.5856945514678955, "learning_rate": 1.9196537176026368e-05, "loss": 0.6056, "step": 7295 }, { "epoch": 1.1910126117301334, "grad_norm": 3.171307325363159, "learning_rate": 1.919628536152684e-05, "loss": 0.6246, "step": 7296 }, { "epoch": 1.1911758703726378, "grad_norm": 2.4002721309661865, "learning_rate": 1.919603350922485e-05, "loss": 0.6173, "step": 7297 }, { "epoch": 1.1913391290151423, "grad_norm": 2.437587022781372, "learning_rate": 1.9195781619121445e-05, "loss": 0.6298, "step": 7298 }, { "epoch": 1.1915023876576467, "grad_norm": 2.7607412338256836, "learning_rate": 1.919552969121765e-05, "loss": 0.6756, "step": 7299 }, { "epoch": 1.191665646300151, "grad_norm": 2.994581460952759, "learning_rate": 1.919527772551451e-05, "loss": 0.7847, "step": 7300 }, { "epoch": 1.1918289049426554, "grad_norm": 3.3732991218566895, "learning_rate": 1.919502572201305e-05, "loss": 0.8261, "step": 7301 }, { "epoch": 1.1919921635851598, "grad_norm": 2.640727996826172, "learning_rate": 1.9194773680714312e-05, "loss": 0.7179, "step": 7302 }, { "epoch": 1.1921554222276642, "grad_norm": 2.5203471183776855, "learning_rate": 1.9194521601619335e-05, "loss": 0.6316, "step": 7303 }, { "epoch": 1.1923186808701685, "grad_norm": 2.892315149307251, "learning_rate": 1.9194269484729148e-05, "loss": 0.8335, "step": 7304 }, { "epoch": 1.192481939512673, "grad_norm": 3.039628028869629, "learning_rate": 1.9194017330044788e-05, "loss": 0.8108, "step": 7305 }, { "epoch": 1.1926451981551773, "grad_norm": 2.5109188556671143, "learning_rate": 1.91937651375673e-05, "loss": 0.5959, "step": 7306 }, { "epoch": 1.1928084567976818, "grad_norm": 3.009509801864624, "learning_rate": 1.9193512907297713e-05, "loss": 0.7648, "step": 7307 }, { "epoch": 1.1929717154401862, "grad_norm": 2.9765167236328125, "learning_rate": 1.9193260639237065e-05, "loss": 0.6926, "step": 7308 }, { "epoch": 1.1931349740826904, "grad_norm": 3.1187610626220703, "learning_rate": 1.9193008333386394e-05, "loss": 0.8203, "step": 7309 }, { "epoch": 1.1932982327251949, "grad_norm": 2.6584548950195312, "learning_rate": 1.9192755989746737e-05, "loss": 0.6988, "step": 7310 }, { "epoch": 1.1934614913676993, "grad_norm": 2.9537179470062256, "learning_rate": 1.919250360831913e-05, "loss": 0.6494, "step": 7311 }, { "epoch": 1.1936247500102037, "grad_norm": 3.4196290969848633, "learning_rate": 1.9192251189104617e-05, "loss": 0.8349, "step": 7312 }, { "epoch": 1.193788008652708, "grad_norm": 2.5488877296447754, "learning_rate": 1.9191998732104227e-05, "loss": 0.7141, "step": 7313 }, { "epoch": 1.1939512672952124, "grad_norm": 2.962634325027466, "learning_rate": 1.9191746237319e-05, "loss": 0.694, "step": 7314 }, { "epoch": 1.1941145259377168, "grad_norm": 3.1195356845855713, "learning_rate": 1.9191493704749974e-05, "loss": 0.895, "step": 7315 }, { "epoch": 1.1942777845802213, "grad_norm": 2.7967662811279297, "learning_rate": 1.919124113439819e-05, "loss": 0.7134, "step": 7316 }, { "epoch": 1.1944410432227257, "grad_norm": 2.7076973915100098, "learning_rate": 1.9190988526264687e-05, "loss": 0.7042, "step": 7317 }, { "epoch": 1.19460430186523, "grad_norm": 3.079448699951172, "learning_rate": 1.91907358803505e-05, "loss": 0.9292, "step": 7318 }, { "epoch": 1.1947675605077344, "grad_norm": 2.6192848682403564, "learning_rate": 1.919048319665666e-05, "loss": 0.7387, "step": 7319 }, { "epoch": 1.1949308191502388, "grad_norm": 2.8401732444763184, "learning_rate": 1.9190230475184223e-05, "loss": 0.6702, "step": 7320 }, { "epoch": 1.1950940777927432, "grad_norm": 3.2597086429595947, "learning_rate": 1.9189977715934214e-05, "loss": 0.7623, "step": 7321 }, { "epoch": 1.1952573364352475, "grad_norm": 3.1653218269348145, "learning_rate": 1.918972491890768e-05, "loss": 0.7761, "step": 7322 }, { "epoch": 1.195420595077752, "grad_norm": 2.4484996795654297, "learning_rate": 1.9189472084105655e-05, "loss": 0.6493, "step": 7323 }, { "epoch": 1.1955838537202563, "grad_norm": 2.764674663543701, "learning_rate": 1.9189219211529177e-05, "loss": 0.7158, "step": 7324 }, { "epoch": 1.1957471123627608, "grad_norm": 2.7173166275024414, "learning_rate": 1.9188966301179294e-05, "loss": 0.5508, "step": 7325 }, { "epoch": 1.1959103710052652, "grad_norm": 3.1219050884246826, "learning_rate": 1.9188713353057038e-05, "loss": 0.8277, "step": 7326 }, { "epoch": 1.1960736296477694, "grad_norm": 2.6095612049102783, "learning_rate": 1.918846036716345e-05, "loss": 0.6818, "step": 7327 }, { "epoch": 1.1962368882902739, "grad_norm": 2.830270528793335, "learning_rate": 1.918820734349957e-05, "loss": 0.7724, "step": 7328 }, { "epoch": 1.1964001469327783, "grad_norm": 2.608617067337036, "learning_rate": 1.9187954282066445e-05, "loss": 0.7731, "step": 7329 }, { "epoch": 1.1965634055752827, "grad_norm": 3.04038405418396, "learning_rate": 1.9187701182865105e-05, "loss": 0.7691, "step": 7330 }, { "epoch": 1.196726664217787, "grad_norm": 3.0292367935180664, "learning_rate": 1.9187448045896592e-05, "loss": 0.717, "step": 7331 }, { "epoch": 1.1968899228602914, "grad_norm": 3.2371790409088135, "learning_rate": 1.9187194871161953e-05, "loss": 0.8669, "step": 7332 }, { "epoch": 1.1970531815027958, "grad_norm": 2.528959035873413, "learning_rate": 1.9186941658662225e-05, "loss": 0.6188, "step": 7333 }, { "epoch": 1.1972164401453003, "grad_norm": 2.7411396503448486, "learning_rate": 1.9186688408398448e-05, "loss": 0.7617, "step": 7334 }, { "epoch": 1.1973796987878047, "grad_norm": 3.1577250957489014, "learning_rate": 1.918643512037166e-05, "loss": 0.8848, "step": 7335 }, { "epoch": 1.197542957430309, "grad_norm": 2.6671500205993652, "learning_rate": 1.9186181794582913e-05, "loss": 0.6754, "step": 7336 }, { "epoch": 1.1977062160728134, "grad_norm": 3.066068410873413, "learning_rate": 1.9185928431033236e-05, "loss": 0.7096, "step": 7337 }, { "epoch": 1.1978694747153178, "grad_norm": 3.2628636360168457, "learning_rate": 1.9185675029723677e-05, "loss": 0.8161, "step": 7338 }, { "epoch": 1.198032733357822, "grad_norm": 2.9131712913513184, "learning_rate": 1.9185421590655276e-05, "loss": 0.6192, "step": 7339 }, { "epoch": 1.1981959920003264, "grad_norm": 2.8488149642944336, "learning_rate": 1.9185168113829078e-05, "loss": 0.6995, "step": 7340 }, { "epoch": 1.1983592506428309, "grad_norm": 2.8620965480804443, "learning_rate": 1.918491459924612e-05, "loss": 0.7503, "step": 7341 }, { "epoch": 1.1985225092853353, "grad_norm": 2.797484874725342, "learning_rate": 1.9184661046907447e-05, "loss": 0.6704, "step": 7342 }, { "epoch": 1.1986857679278398, "grad_norm": 2.6484594345092773, "learning_rate": 1.91844074568141e-05, "loss": 0.7604, "step": 7343 }, { "epoch": 1.198849026570344, "grad_norm": 2.182870388031006, "learning_rate": 1.9184153828967123e-05, "loss": 0.5336, "step": 7344 }, { "epoch": 1.1990122852128484, "grad_norm": 2.911072254180908, "learning_rate": 1.9183900163367557e-05, "loss": 0.7701, "step": 7345 }, { "epoch": 1.1991755438553529, "grad_norm": 2.872819662094116, "learning_rate": 1.9183646460016444e-05, "loss": 0.7299, "step": 7346 }, { "epoch": 1.1993388024978573, "grad_norm": 3.46225905418396, "learning_rate": 1.9183392718914828e-05, "loss": 0.8402, "step": 7347 }, { "epoch": 1.1995020611403615, "grad_norm": 2.9659652709960938, "learning_rate": 1.918313894006375e-05, "loss": 0.8136, "step": 7348 }, { "epoch": 1.199665319782866, "grad_norm": 2.7493462562561035, "learning_rate": 1.9182885123464262e-05, "loss": 0.6545, "step": 7349 }, { "epoch": 1.1998285784253704, "grad_norm": 2.897085189819336, "learning_rate": 1.9182631269117397e-05, "loss": 0.6888, "step": 7350 }, { "epoch": 1.1999918370678748, "grad_norm": 3.0362842082977295, "learning_rate": 1.9182377377024202e-05, "loss": 0.9088, "step": 7351 }, { "epoch": 1.2001550957103793, "grad_norm": 3.098952293395996, "learning_rate": 1.918212344718572e-05, "loss": 0.8777, "step": 7352 }, { "epoch": 1.2003183543528835, "grad_norm": 2.484461545944214, "learning_rate": 1.9181869479602996e-05, "loss": 0.5958, "step": 7353 }, { "epoch": 1.200481612995388, "grad_norm": 2.981297492980957, "learning_rate": 1.9181615474277075e-05, "loss": 0.7696, "step": 7354 }, { "epoch": 1.2006448716378924, "grad_norm": 3.2159416675567627, "learning_rate": 1.9181361431209e-05, "loss": 0.7805, "step": 7355 }, { "epoch": 1.2008081302803968, "grad_norm": 2.913609743118286, "learning_rate": 1.918110735039981e-05, "loss": 0.7482, "step": 7356 }, { "epoch": 1.200971388922901, "grad_norm": 2.5901055335998535, "learning_rate": 1.918085323185056e-05, "loss": 0.6474, "step": 7357 }, { "epoch": 1.2011346475654054, "grad_norm": 2.707705497741699, "learning_rate": 1.9180599075562286e-05, "loss": 0.7037, "step": 7358 }, { "epoch": 1.2012979062079099, "grad_norm": 2.6456189155578613, "learning_rate": 1.918034488153604e-05, "loss": 0.6585, "step": 7359 }, { "epoch": 1.2014611648504143, "grad_norm": 3.824739933013916, "learning_rate": 1.9180090649772857e-05, "loss": 0.8187, "step": 7360 }, { "epoch": 1.2016244234929188, "grad_norm": 2.5219671726226807, "learning_rate": 1.917983638027379e-05, "loss": 0.5638, "step": 7361 }, { "epoch": 1.201787682135423, "grad_norm": 2.6455445289611816, "learning_rate": 1.917958207303988e-05, "loss": 0.6938, "step": 7362 }, { "epoch": 1.2019509407779274, "grad_norm": 2.389348268508911, "learning_rate": 1.9179327728072176e-05, "loss": 0.6583, "step": 7363 }, { "epoch": 1.2021141994204319, "grad_norm": 2.83415150642395, "learning_rate": 1.9179073345371725e-05, "loss": 0.6477, "step": 7364 }, { "epoch": 1.2022774580629363, "grad_norm": 2.752835750579834, "learning_rate": 1.9178818924939568e-05, "loss": 0.7684, "step": 7365 }, { "epoch": 1.2024407167054405, "grad_norm": 2.6832549571990967, "learning_rate": 1.9178564466776748e-05, "loss": 0.6649, "step": 7366 }, { "epoch": 1.202603975347945, "grad_norm": 2.840108633041382, "learning_rate": 1.917830997088432e-05, "loss": 0.7486, "step": 7367 }, { "epoch": 1.2027672339904494, "grad_norm": 2.3898348808288574, "learning_rate": 1.9178055437263322e-05, "loss": 0.6015, "step": 7368 }, { "epoch": 1.2029304926329538, "grad_norm": 2.6699306964874268, "learning_rate": 1.9177800865914804e-05, "loss": 0.81, "step": 7369 }, { "epoch": 1.2030937512754583, "grad_norm": 2.9305474758148193, "learning_rate": 1.9177546256839814e-05, "loss": 0.8602, "step": 7370 }, { "epoch": 1.2032570099179625, "grad_norm": 2.8999075889587402, "learning_rate": 1.9177291610039396e-05, "loss": 0.8326, "step": 7371 }, { "epoch": 1.203420268560467, "grad_norm": 2.825328826904297, "learning_rate": 1.9177036925514597e-05, "loss": 0.6025, "step": 7372 }, { "epoch": 1.2035835272029713, "grad_norm": 2.696398973464966, "learning_rate": 1.9176782203266464e-05, "loss": 0.776, "step": 7373 }, { "epoch": 1.2037467858454758, "grad_norm": 2.5168349742889404, "learning_rate": 1.9176527443296044e-05, "loss": 0.7105, "step": 7374 }, { "epoch": 1.20391004448798, "grad_norm": 2.851233959197998, "learning_rate": 1.9176272645604387e-05, "loss": 1.1891, "step": 7375 }, { "epoch": 1.2040733031304844, "grad_norm": 2.796424388885498, "learning_rate": 1.9176017810192537e-05, "loss": 0.6814, "step": 7376 }, { "epoch": 1.2042365617729889, "grad_norm": 3.3244779109954834, "learning_rate": 1.917576293706154e-05, "loss": 0.9292, "step": 7377 }, { "epoch": 1.2043998204154933, "grad_norm": 2.8154258728027344, "learning_rate": 1.917550802621245e-05, "loss": 0.65, "step": 7378 }, { "epoch": 1.2045630790579978, "grad_norm": 3.355581283569336, "learning_rate": 1.917525307764631e-05, "loss": 0.6965, "step": 7379 }, { "epoch": 1.204726337700502, "grad_norm": 3.1151208877563477, "learning_rate": 1.917499809136417e-05, "loss": 0.885, "step": 7380 }, { "epoch": 1.2048895963430064, "grad_norm": 3.027163028717041, "learning_rate": 1.9174743067367077e-05, "loss": 0.8344, "step": 7381 }, { "epoch": 1.2050528549855108, "grad_norm": 2.645533561706543, "learning_rate": 1.9174488005656077e-05, "loss": 0.6515, "step": 7382 }, { "epoch": 1.205216113628015, "grad_norm": 2.8870131969451904, "learning_rate": 1.9174232906232223e-05, "loss": 0.6437, "step": 7383 }, { "epoch": 1.2053793722705195, "grad_norm": 2.662870407104492, "learning_rate": 1.917397776909656e-05, "loss": 0.6307, "step": 7384 }, { "epoch": 1.205542630913024, "grad_norm": 2.9119181632995605, "learning_rate": 1.9173722594250143e-05, "loss": 0.7038, "step": 7385 }, { "epoch": 1.2057058895555284, "grad_norm": 3.1000380516052246, "learning_rate": 1.9173467381694014e-05, "loss": 0.7175, "step": 7386 }, { "epoch": 1.2058691481980328, "grad_norm": 3.5425798892974854, "learning_rate": 1.9173212131429224e-05, "loss": 0.7066, "step": 7387 }, { "epoch": 1.206032406840537, "grad_norm": 2.539461135864258, "learning_rate": 1.9172956843456822e-05, "loss": 0.6382, "step": 7388 }, { "epoch": 1.2061956654830415, "grad_norm": 2.8895366191864014, "learning_rate": 1.9172701517777857e-05, "loss": 0.7848, "step": 7389 }, { "epoch": 1.206358924125546, "grad_norm": 2.873497486114502, "learning_rate": 1.917244615439338e-05, "loss": 0.65, "step": 7390 }, { "epoch": 1.2065221827680503, "grad_norm": 2.92941951751709, "learning_rate": 1.9172190753304444e-05, "loss": 0.8022, "step": 7391 }, { "epoch": 1.2066854414105546, "grad_norm": 2.8723058700561523, "learning_rate": 1.9171935314512093e-05, "loss": 0.7866, "step": 7392 }, { "epoch": 1.206848700053059, "grad_norm": 2.894171714782715, "learning_rate": 1.9171679838017377e-05, "loss": 0.7356, "step": 7393 }, { "epoch": 1.2070119586955634, "grad_norm": 2.9866209030151367, "learning_rate": 1.917142432382135e-05, "loss": 0.839, "step": 7394 }, { "epoch": 1.2071752173380679, "grad_norm": 2.5851495265960693, "learning_rate": 1.917116877192506e-05, "loss": 0.5912, "step": 7395 }, { "epoch": 1.2073384759805723, "grad_norm": 2.9191863536834717, "learning_rate": 1.917091318232956e-05, "loss": 0.8395, "step": 7396 }, { "epoch": 1.2075017346230765, "grad_norm": 3.136575698852539, "learning_rate": 1.9170657555035898e-05, "loss": 0.8072, "step": 7397 }, { "epoch": 1.207664993265581, "grad_norm": 3.122739315032959, "learning_rate": 1.9170401890045126e-05, "loss": 0.8499, "step": 7398 }, { "epoch": 1.2078282519080854, "grad_norm": 3.0754098892211914, "learning_rate": 1.9170146187358295e-05, "loss": 0.8357, "step": 7399 }, { "epoch": 1.2079915105505898, "grad_norm": 2.3456008434295654, "learning_rate": 1.9169890446976454e-05, "loss": 0.5871, "step": 7400 }, { "epoch": 1.208154769193094, "grad_norm": 3.09975528717041, "learning_rate": 1.9169634668900655e-05, "loss": 0.6233, "step": 7401 }, { "epoch": 1.2083180278355985, "grad_norm": 2.9104561805725098, "learning_rate": 1.9169378853131953e-05, "loss": 0.7092, "step": 7402 }, { "epoch": 1.208481286478103, "grad_norm": 3.085637092590332, "learning_rate": 1.9169122999671394e-05, "loss": 0.8802, "step": 7403 }, { "epoch": 1.2086445451206074, "grad_norm": 2.3812525272369385, "learning_rate": 1.9168867108520033e-05, "loss": 0.6887, "step": 7404 }, { "epoch": 1.2088078037631118, "grad_norm": 2.755352020263672, "learning_rate": 1.9168611179678925e-05, "loss": 0.845, "step": 7405 }, { "epoch": 1.208971062405616, "grad_norm": 2.580472230911255, "learning_rate": 1.9168355213149114e-05, "loss": 0.5966, "step": 7406 }, { "epoch": 1.2091343210481205, "grad_norm": 2.242255210876465, "learning_rate": 1.916809920893166e-05, "loss": 0.677, "step": 7407 }, { "epoch": 1.209297579690625, "grad_norm": 2.6714365482330322, "learning_rate": 1.916784316702761e-05, "loss": 0.7184, "step": 7408 }, { "epoch": 1.2094608383331293, "grad_norm": 2.4972453117370605, "learning_rate": 1.916758708743802e-05, "loss": 0.7478, "step": 7409 }, { "epoch": 1.2096240969756336, "grad_norm": 2.5914788246154785, "learning_rate": 1.916733097016394e-05, "loss": 0.7122, "step": 7410 }, { "epoch": 1.209787355618138, "grad_norm": 2.8784093856811523, "learning_rate": 1.9167074815206423e-05, "loss": 0.8206, "step": 7411 }, { "epoch": 1.2099506142606424, "grad_norm": 2.8577780723571777, "learning_rate": 1.9166818622566523e-05, "loss": 0.7441, "step": 7412 }, { "epoch": 1.2101138729031469, "grad_norm": 2.475825071334839, "learning_rate": 1.9166562392245294e-05, "loss": 0.7235, "step": 7413 }, { "epoch": 1.2102771315456513, "grad_norm": 2.885033369064331, "learning_rate": 1.916630612424379e-05, "loss": 0.7804, "step": 7414 }, { "epoch": 1.2104403901881555, "grad_norm": 2.811523675918579, "learning_rate": 1.916604981856306e-05, "loss": 0.6168, "step": 7415 }, { "epoch": 1.21060364883066, "grad_norm": 2.941545248031616, "learning_rate": 1.916579347520416e-05, "loss": 0.7671, "step": 7416 }, { "epoch": 1.2107669074731644, "grad_norm": 3.4706552028656006, "learning_rate": 1.9165537094168142e-05, "loss": 0.9093, "step": 7417 }, { "epoch": 1.2109301661156688, "grad_norm": 2.422344207763672, "learning_rate": 1.9165280675456066e-05, "loss": 0.6226, "step": 7418 }, { "epoch": 1.211093424758173, "grad_norm": 3.1052417755126953, "learning_rate": 1.916502421906898e-05, "loss": 0.7587, "step": 7419 }, { "epoch": 1.2112566834006775, "grad_norm": 2.62026309967041, "learning_rate": 1.9164767725007944e-05, "loss": 0.7082, "step": 7420 }, { "epoch": 1.211419942043182, "grad_norm": 3.6970467567443848, "learning_rate": 1.9164511193274005e-05, "loss": 0.933, "step": 7421 }, { "epoch": 1.2115832006856864, "grad_norm": 2.9392690658569336, "learning_rate": 1.916425462386822e-05, "loss": 0.7671, "step": 7422 }, { "epoch": 1.2117464593281908, "grad_norm": 3.006056070327759, "learning_rate": 1.9163998016791645e-05, "loss": 0.7057, "step": 7423 }, { "epoch": 1.211909717970695, "grad_norm": 2.509382486343384, "learning_rate": 1.9163741372045334e-05, "loss": 0.6044, "step": 7424 }, { "epoch": 1.2120729766131995, "grad_norm": 2.662985324859619, "learning_rate": 1.9163484689630346e-05, "loss": 0.6748, "step": 7425 }, { "epoch": 1.212236235255704, "grad_norm": 2.594966411590576, "learning_rate": 1.916322796954773e-05, "loss": 0.5921, "step": 7426 }, { "epoch": 1.2123994938982081, "grad_norm": 2.9738616943359375, "learning_rate": 1.9162971211798542e-05, "loss": 0.7763, "step": 7427 }, { "epoch": 1.2125627525407126, "grad_norm": 2.476832628250122, "learning_rate": 1.9162714416383843e-05, "loss": 0.6321, "step": 7428 }, { "epoch": 1.212726011183217, "grad_norm": 3.2747631072998047, "learning_rate": 1.9162457583304683e-05, "loss": 0.812, "step": 7429 }, { "epoch": 1.2128892698257214, "grad_norm": 3.078556776046753, "learning_rate": 1.9162200712562117e-05, "loss": 0.6655, "step": 7430 }, { "epoch": 1.2130525284682259, "grad_norm": 3.1629528999328613, "learning_rate": 1.9161943804157208e-05, "loss": 0.8135, "step": 7431 }, { "epoch": 1.21321578711073, "grad_norm": 2.6625189781188965, "learning_rate": 1.9161686858091005e-05, "loss": 0.5795, "step": 7432 }, { "epoch": 1.2133790457532345, "grad_norm": 3.161017656326294, "learning_rate": 1.9161429874364564e-05, "loss": 0.8426, "step": 7433 }, { "epoch": 1.213542304395739, "grad_norm": 3.1360554695129395, "learning_rate": 1.9161172852978948e-05, "loss": 0.7798, "step": 7434 }, { "epoch": 1.2137055630382434, "grad_norm": 2.5029215812683105, "learning_rate": 1.9160915793935212e-05, "loss": 0.6568, "step": 7435 }, { "epoch": 1.2138688216807476, "grad_norm": 3.2154338359832764, "learning_rate": 1.9160658697234408e-05, "loss": 0.7552, "step": 7436 }, { "epoch": 1.214032080323252, "grad_norm": 2.810652256011963, "learning_rate": 1.916040156287759e-05, "loss": 0.7082, "step": 7437 }, { "epoch": 1.2141953389657565, "grad_norm": 3.0411272048950195, "learning_rate": 1.9160144390865824e-05, "loss": 0.9056, "step": 7438 }, { "epoch": 1.214358597608261, "grad_norm": 3.0967612266540527, "learning_rate": 1.9159887181200165e-05, "loss": 0.8194, "step": 7439 }, { "epoch": 1.2145218562507654, "grad_norm": 2.8769140243530273, "learning_rate": 1.9159629933881666e-05, "loss": 0.7462, "step": 7440 }, { "epoch": 1.2146851148932696, "grad_norm": 2.678316593170166, "learning_rate": 1.915937264891139e-05, "loss": 0.7337, "step": 7441 }, { "epoch": 1.214848373535774, "grad_norm": 2.4734623432159424, "learning_rate": 1.9159115326290388e-05, "loss": 0.6509, "step": 7442 }, { "epoch": 1.2150116321782785, "grad_norm": 2.555598735809326, "learning_rate": 1.9158857966019724e-05, "loss": 0.7394, "step": 7443 }, { "epoch": 1.215174890820783, "grad_norm": 2.576235294342041, "learning_rate": 1.915860056810045e-05, "loss": 0.6912, "step": 7444 }, { "epoch": 1.215338149463287, "grad_norm": 2.366938829421997, "learning_rate": 1.9158343132533633e-05, "loss": 0.5842, "step": 7445 }, { "epoch": 1.2155014081057915, "grad_norm": 2.826927900314331, "learning_rate": 1.9158085659320317e-05, "loss": 0.6569, "step": 7446 }, { "epoch": 1.215664666748296, "grad_norm": 3.0128018856048584, "learning_rate": 1.9157828148461577e-05, "loss": 0.6397, "step": 7447 }, { "epoch": 1.2158279253908004, "grad_norm": 2.8669815063476562, "learning_rate": 1.915757059995846e-05, "loss": 0.7321, "step": 7448 }, { "epoch": 1.2159911840333049, "grad_norm": 3.3191471099853516, "learning_rate": 1.915731301381203e-05, "loss": 0.7807, "step": 7449 }, { "epoch": 1.216154442675809, "grad_norm": 2.908216953277588, "learning_rate": 1.915705539002334e-05, "loss": 0.8019, "step": 7450 }, { "epoch": 1.2163177013183135, "grad_norm": 2.946338653564453, "learning_rate": 1.9156797728593456e-05, "loss": 0.8689, "step": 7451 }, { "epoch": 1.216480959960818, "grad_norm": 2.778053045272827, "learning_rate": 1.9156540029523433e-05, "loss": 0.7072, "step": 7452 }, { "epoch": 1.2166442186033224, "grad_norm": 3.154496908187866, "learning_rate": 1.915628229281433e-05, "loss": 0.7933, "step": 7453 }, { "epoch": 1.2168074772458266, "grad_norm": 2.8372442722320557, "learning_rate": 1.915602451846721e-05, "loss": 0.6768, "step": 7454 }, { "epoch": 1.216970735888331, "grad_norm": 2.4792234897613525, "learning_rate": 1.915576670648313e-05, "loss": 0.5872, "step": 7455 }, { "epoch": 1.2171339945308355, "grad_norm": 3.3785979747772217, "learning_rate": 1.9155508856863153e-05, "loss": 0.8438, "step": 7456 }, { "epoch": 1.21729725317334, "grad_norm": 2.467895269393921, "learning_rate": 1.915525096960833e-05, "loss": 0.714, "step": 7457 }, { "epoch": 1.2174605118158444, "grad_norm": 2.599025249481201, "learning_rate": 1.915499304471973e-05, "loss": 0.6139, "step": 7458 }, { "epoch": 1.2176237704583486, "grad_norm": 2.77363657951355, "learning_rate": 1.9154735082198412e-05, "loss": 0.7164, "step": 7459 }, { "epoch": 1.217787029100853, "grad_norm": 2.5460782051086426, "learning_rate": 1.9154477082045436e-05, "loss": 0.6641, "step": 7460 }, { "epoch": 1.2179502877433575, "grad_norm": 2.621593952178955, "learning_rate": 1.9154219044261858e-05, "loss": 0.7391, "step": 7461 }, { "epoch": 1.218113546385862, "grad_norm": 2.809398651123047, "learning_rate": 1.9153960968848744e-05, "loss": 1.2193, "step": 7462 }, { "epoch": 1.218276805028366, "grad_norm": 2.8564610481262207, "learning_rate": 1.915370285580715e-05, "loss": 0.8377, "step": 7463 }, { "epoch": 1.2184400636708705, "grad_norm": 2.6951372623443604, "learning_rate": 1.9153444705138146e-05, "loss": 0.6487, "step": 7464 }, { "epoch": 1.218603322313375, "grad_norm": 2.6421055793762207, "learning_rate": 1.915318651684278e-05, "loss": 0.6761, "step": 7465 }, { "epoch": 1.2187665809558794, "grad_norm": 3.6915667057037354, "learning_rate": 1.9152928290922123e-05, "loss": 0.9339, "step": 7466 }, { "epoch": 1.2189298395983839, "grad_norm": 3.147141218185425, "learning_rate": 1.9152670027377235e-05, "loss": 0.7412, "step": 7467 }, { "epoch": 1.219093098240888, "grad_norm": 2.7352404594421387, "learning_rate": 1.9152411726209176e-05, "loss": 0.6957, "step": 7468 }, { "epoch": 1.2192563568833925, "grad_norm": 2.603994607925415, "learning_rate": 1.915215338741901e-05, "loss": 0.5812, "step": 7469 }, { "epoch": 1.219419615525897, "grad_norm": 3.1395652294158936, "learning_rate": 1.9151895011007795e-05, "loss": 0.7045, "step": 7470 }, { "epoch": 1.2195828741684012, "grad_norm": 2.9711782932281494, "learning_rate": 1.9151636596976593e-05, "loss": 0.8293, "step": 7471 }, { "epoch": 1.2197461328109056, "grad_norm": 2.528693437576294, "learning_rate": 1.9151378145326474e-05, "loss": 0.5853, "step": 7472 }, { "epoch": 1.21990939145341, "grad_norm": 3.248002767562866, "learning_rate": 1.9151119656058492e-05, "loss": 0.7781, "step": 7473 }, { "epoch": 1.2200726500959145, "grad_norm": 2.765568971633911, "learning_rate": 1.9150861129173714e-05, "loss": 0.6817, "step": 7474 }, { "epoch": 1.220235908738419, "grad_norm": 2.9376370906829834, "learning_rate": 1.91506025646732e-05, "loss": 0.7285, "step": 7475 }, { "epoch": 1.2203991673809231, "grad_norm": 2.251577138900757, "learning_rate": 1.9150343962558015e-05, "loss": 0.5555, "step": 7476 }, { "epoch": 1.2205624260234276, "grad_norm": 2.8131306171417236, "learning_rate": 1.915008532282922e-05, "loss": 0.7482, "step": 7477 }, { "epoch": 1.220725684665932, "grad_norm": 2.7560558319091797, "learning_rate": 1.914982664548788e-05, "loss": 0.78, "step": 7478 }, { "epoch": 1.2208889433084364, "grad_norm": 3.240859031677246, "learning_rate": 1.9149567930535058e-05, "loss": 0.7245, "step": 7479 }, { "epoch": 1.2210522019509407, "grad_norm": 2.388425827026367, "learning_rate": 1.9149309177971815e-05, "loss": 0.6577, "step": 7480 }, { "epoch": 1.221215460593445, "grad_norm": 3.2676472663879395, "learning_rate": 1.9149050387799218e-05, "loss": 0.8259, "step": 7481 }, { "epoch": 1.2213787192359495, "grad_norm": 2.704741954803467, "learning_rate": 1.914879156001833e-05, "loss": 0.8247, "step": 7482 }, { "epoch": 1.221541977878454, "grad_norm": 2.481215476989746, "learning_rate": 1.9148532694630214e-05, "loss": 0.6457, "step": 7483 }, { "epoch": 1.2217052365209584, "grad_norm": 2.19291090965271, "learning_rate": 1.9148273791635932e-05, "loss": 0.5476, "step": 7484 }, { "epoch": 1.2218684951634626, "grad_norm": 2.7663516998291016, "learning_rate": 1.9148014851036552e-05, "loss": 0.6965, "step": 7485 }, { "epoch": 1.222031753805967, "grad_norm": 3.1536097526550293, "learning_rate": 1.914775587283314e-05, "loss": 0.7051, "step": 7486 }, { "epoch": 1.2221950124484715, "grad_norm": 2.8776235580444336, "learning_rate": 1.914749685702676e-05, "loss": 0.7451, "step": 7487 }, { "epoch": 1.222358271090976, "grad_norm": 2.9212965965270996, "learning_rate": 1.9147237803618466e-05, "loss": 0.6797, "step": 7488 }, { "epoch": 1.2225215297334802, "grad_norm": 3.0045578479766846, "learning_rate": 1.9146978712609335e-05, "loss": 0.6834, "step": 7489 }, { "epoch": 1.2226847883759846, "grad_norm": 3.1737523078918457, "learning_rate": 1.9146719584000428e-05, "loss": 0.7702, "step": 7490 }, { "epoch": 1.222848047018489, "grad_norm": 2.3912622928619385, "learning_rate": 1.9146460417792812e-05, "loss": 0.6091, "step": 7491 }, { "epoch": 1.2230113056609935, "grad_norm": 2.659025192260742, "learning_rate": 1.9146201213987552e-05, "loss": 0.7139, "step": 7492 }, { "epoch": 1.223174564303498, "grad_norm": 2.946444272994995, "learning_rate": 1.914594197258571e-05, "loss": 0.8366, "step": 7493 }, { "epoch": 1.2233378229460021, "grad_norm": 3.0114200115203857, "learning_rate": 1.9145682693588354e-05, "loss": 0.6811, "step": 7494 }, { "epoch": 1.2235010815885066, "grad_norm": 3.088569402694702, "learning_rate": 1.914542337699655e-05, "loss": 0.7966, "step": 7495 }, { "epoch": 1.223664340231011, "grad_norm": 2.6154561042785645, "learning_rate": 1.9145164022811365e-05, "loss": 0.5905, "step": 7496 }, { "epoch": 1.2238275988735154, "grad_norm": 2.6878721714019775, "learning_rate": 1.9144904631033865e-05, "loss": 0.7117, "step": 7497 }, { "epoch": 1.2239908575160197, "grad_norm": 2.8732848167419434, "learning_rate": 1.9144645201665114e-05, "loss": 0.8056, "step": 7498 }, { "epoch": 1.224154116158524, "grad_norm": 2.6154091358184814, "learning_rate": 1.914438573470618e-05, "loss": 0.5958, "step": 7499 }, { "epoch": 1.2243173748010285, "grad_norm": 3.0448551177978516, "learning_rate": 1.9144126230158127e-05, "loss": 0.7401, "step": 7500 }, { "epoch": 1.224480633443533, "grad_norm": 2.9207396507263184, "learning_rate": 1.9143866688022025e-05, "loss": 0.7775, "step": 7501 }, { "epoch": 1.2246438920860374, "grad_norm": 2.666490077972412, "learning_rate": 1.914360710829894e-05, "loss": 0.7726, "step": 7502 }, { "epoch": 1.2248071507285416, "grad_norm": 2.750173807144165, "learning_rate": 1.9143347490989938e-05, "loss": 0.7137, "step": 7503 }, { "epoch": 1.224970409371046, "grad_norm": 2.5744009017944336, "learning_rate": 1.9143087836096086e-05, "loss": 0.6657, "step": 7504 }, { "epoch": 1.2251336680135505, "grad_norm": 3.0290019512176514, "learning_rate": 1.9142828143618454e-05, "loss": 0.6384, "step": 7505 }, { "epoch": 1.225296926656055, "grad_norm": 2.479745864868164, "learning_rate": 1.9142568413558108e-05, "loss": 0.5673, "step": 7506 }, { "epoch": 1.2254601852985592, "grad_norm": 2.5830421447753906, "learning_rate": 1.9142308645916114e-05, "loss": 0.6366, "step": 7507 }, { "epoch": 1.2256234439410636, "grad_norm": 3.0484120845794678, "learning_rate": 1.914204884069354e-05, "loss": 0.7436, "step": 7508 }, { "epoch": 1.225786702583568, "grad_norm": 2.9798619747161865, "learning_rate": 1.914178899789146e-05, "loss": 0.855, "step": 7509 }, { "epoch": 1.2259499612260725, "grad_norm": 3.2377214431762695, "learning_rate": 1.9141529117510933e-05, "loss": 0.7088, "step": 7510 }, { "epoch": 1.226113219868577, "grad_norm": 2.6978752613067627, "learning_rate": 1.9141269199553035e-05, "loss": 0.7373, "step": 7511 }, { "epoch": 1.2262764785110811, "grad_norm": 2.482017755508423, "learning_rate": 1.9141009244018828e-05, "loss": 0.7079, "step": 7512 }, { "epoch": 1.2264397371535856, "grad_norm": 3.4169540405273438, "learning_rate": 1.9140749250909386e-05, "loss": 0.6988, "step": 7513 }, { "epoch": 1.22660299579609, "grad_norm": 2.8970463275909424, "learning_rate": 1.9140489220225772e-05, "loss": 0.6419, "step": 7514 }, { "epoch": 1.2267662544385944, "grad_norm": 2.925877094268799, "learning_rate": 1.914022915196906e-05, "loss": 0.6889, "step": 7515 }, { "epoch": 1.2269295130810987, "grad_norm": 3.1179380416870117, "learning_rate": 1.913996904614032e-05, "loss": 0.8202, "step": 7516 }, { "epoch": 1.227092771723603, "grad_norm": 2.553084373474121, "learning_rate": 1.913970890274061e-05, "loss": 0.6499, "step": 7517 }, { "epoch": 1.2272560303661075, "grad_norm": 3.0513103008270264, "learning_rate": 1.9139448721771018e-05, "loss": 0.8133, "step": 7518 }, { "epoch": 1.227419289008612, "grad_norm": 3.1390902996063232, "learning_rate": 1.9139188503232596e-05, "loss": 0.9399, "step": 7519 }, { "epoch": 1.2275825476511162, "grad_norm": 3.083735942840576, "learning_rate": 1.9138928247126423e-05, "loss": 0.7395, "step": 7520 }, { "epoch": 1.2277458062936206, "grad_norm": 2.4522206783294678, "learning_rate": 1.913866795345357e-05, "loss": 0.59, "step": 7521 }, { "epoch": 1.227909064936125, "grad_norm": 2.6320552825927734, "learning_rate": 1.9138407622215104e-05, "loss": 0.691, "step": 7522 }, { "epoch": 1.2280723235786295, "grad_norm": 2.6115119457244873, "learning_rate": 1.913814725341209e-05, "loss": 0.6979, "step": 7523 }, { "epoch": 1.2282355822211337, "grad_norm": 3.319380044937134, "learning_rate": 1.9137886847045606e-05, "loss": 0.8642, "step": 7524 }, { "epoch": 1.2283988408636382, "grad_norm": 3.109221935272217, "learning_rate": 1.913762640311672e-05, "loss": 1.1598, "step": 7525 }, { "epoch": 1.2285620995061426, "grad_norm": 3.150343656539917, "learning_rate": 1.9137365921626502e-05, "loss": 0.7585, "step": 7526 }, { "epoch": 1.228725358148647, "grad_norm": 2.60140323638916, "learning_rate": 1.913710540257602e-05, "loss": 0.6978, "step": 7527 }, { "epoch": 1.2288886167911515, "grad_norm": 2.604243278503418, "learning_rate": 1.913684484596635e-05, "loss": 0.7552, "step": 7528 }, { "epoch": 1.2290518754336557, "grad_norm": 2.967548370361328, "learning_rate": 1.9136584251798564e-05, "loss": 0.7399, "step": 7529 }, { "epoch": 1.2292151340761601, "grad_norm": 2.782249689102173, "learning_rate": 1.9136323620073723e-05, "loss": 0.6715, "step": 7530 }, { "epoch": 1.2293783927186646, "grad_norm": 2.575449228286743, "learning_rate": 1.9136062950792912e-05, "loss": 0.6493, "step": 7531 }, { "epoch": 1.229541651361169, "grad_norm": 2.67608380317688, "learning_rate": 1.913580224395719e-05, "loss": 0.6223, "step": 7532 }, { "epoch": 1.2297049100036732, "grad_norm": 2.891711950302124, "learning_rate": 1.913554149956764e-05, "loss": 0.8063, "step": 7533 }, { "epoch": 1.2298681686461777, "grad_norm": 3.537970781326294, "learning_rate": 1.9135280717625325e-05, "loss": 0.7158, "step": 7534 }, { "epoch": 1.230031427288682, "grad_norm": 3.1092188358306885, "learning_rate": 1.9135019898131322e-05, "loss": 0.6028, "step": 7535 }, { "epoch": 1.2301946859311865, "grad_norm": 2.5739381313323975, "learning_rate": 1.91347590410867e-05, "loss": 0.7479, "step": 7536 }, { "epoch": 1.230357944573691, "grad_norm": 2.5090997219085693, "learning_rate": 1.9134498146492535e-05, "loss": 0.6501, "step": 7537 }, { "epoch": 1.2305212032161952, "grad_norm": 2.57847261428833, "learning_rate": 1.9134237214349895e-05, "loss": 0.6303, "step": 7538 }, { "epoch": 1.2306844618586996, "grad_norm": 3.4482228755950928, "learning_rate": 1.9133976244659854e-05, "loss": 0.8678, "step": 7539 }, { "epoch": 1.230847720501204, "grad_norm": 3.0112664699554443, "learning_rate": 1.9133715237423488e-05, "loss": 0.7626, "step": 7540 }, { "epoch": 1.2310109791437085, "grad_norm": 2.4352970123291016, "learning_rate": 1.913345419264187e-05, "loss": 0.6272, "step": 7541 }, { "epoch": 1.2311742377862127, "grad_norm": 3.2147347927093506, "learning_rate": 1.913319311031607e-05, "loss": 0.7748, "step": 7542 }, { "epoch": 1.2313374964287171, "grad_norm": 3.021759033203125, "learning_rate": 1.9132931990447158e-05, "loss": 0.8202, "step": 7543 }, { "epoch": 1.2315007550712216, "grad_norm": 2.6196842193603516, "learning_rate": 1.913267083303621e-05, "loss": 0.7173, "step": 7544 }, { "epoch": 1.231664013713726, "grad_norm": 2.5892536640167236, "learning_rate": 1.9132409638084306e-05, "loss": 0.7664, "step": 7545 }, { "epoch": 1.2318272723562305, "grad_norm": 2.652632474899292, "learning_rate": 1.913214840559251e-05, "loss": 0.6282, "step": 7546 }, { "epoch": 1.2319905309987347, "grad_norm": 2.926814079284668, "learning_rate": 1.91318871355619e-05, "loss": 0.8163, "step": 7547 }, { "epoch": 1.2321537896412391, "grad_norm": 3.0898356437683105, "learning_rate": 1.9131625827993554e-05, "loss": 0.7654, "step": 7548 }, { "epoch": 1.2323170482837436, "grad_norm": 3.27728271484375, "learning_rate": 1.913136448288854e-05, "loss": 0.9009, "step": 7549 }, { "epoch": 1.232480306926248, "grad_norm": 3.0942888259887695, "learning_rate": 1.9131103100247934e-05, "loss": 0.8187, "step": 7550 }, { "epoch": 1.2326435655687522, "grad_norm": 2.7509982585906982, "learning_rate": 1.913084168007281e-05, "loss": 0.8572, "step": 7551 }, { "epoch": 1.2328068242112566, "grad_norm": 2.5341262817382812, "learning_rate": 1.913058022236425e-05, "loss": 0.6487, "step": 7552 }, { "epoch": 1.232970082853761, "grad_norm": 2.9720489978790283, "learning_rate": 1.9130318727123312e-05, "loss": 0.7722, "step": 7553 }, { "epoch": 1.2331333414962655, "grad_norm": 2.5056614875793457, "learning_rate": 1.9130057194351087e-05, "loss": 0.6872, "step": 7554 }, { "epoch": 1.23329660013877, "grad_norm": 3.0719950199127197, "learning_rate": 1.9129795624048645e-05, "loss": 0.8972, "step": 7555 }, { "epoch": 1.2334598587812742, "grad_norm": 2.8293228149414062, "learning_rate": 1.9129534016217062e-05, "loss": 0.6711, "step": 7556 }, { "epoch": 1.2336231174237786, "grad_norm": 2.811235189437866, "learning_rate": 1.9129272370857408e-05, "loss": 0.7604, "step": 7557 }, { "epoch": 1.233786376066283, "grad_norm": 3.380380868911743, "learning_rate": 1.9129010687970765e-05, "loss": 0.7977, "step": 7558 }, { "epoch": 1.2339496347087875, "grad_norm": 2.6652610301971436, "learning_rate": 1.9128748967558204e-05, "loss": 0.6798, "step": 7559 }, { "epoch": 1.2341128933512917, "grad_norm": 2.8564066886901855, "learning_rate": 1.9128487209620803e-05, "loss": 0.799, "step": 7560 }, { "epoch": 1.2342761519937961, "grad_norm": 2.4405064582824707, "learning_rate": 1.912822541415964e-05, "loss": 0.6428, "step": 7561 }, { "epoch": 1.2344394106363006, "grad_norm": 2.627445697784424, "learning_rate": 1.912796358117579e-05, "loss": 0.6691, "step": 7562 }, { "epoch": 1.234602669278805, "grad_norm": 2.8881144523620605, "learning_rate": 1.9127701710670322e-05, "loss": 0.7926, "step": 7563 }, { "epoch": 1.2347659279213095, "grad_norm": 2.2779617309570312, "learning_rate": 1.9127439802644324e-05, "loss": 0.5473, "step": 7564 }, { "epoch": 1.2349291865638137, "grad_norm": 2.4456913471221924, "learning_rate": 1.912717785709887e-05, "loss": 0.684, "step": 7565 }, { "epoch": 1.2350924452063181, "grad_norm": 2.7791495323181152, "learning_rate": 1.912691587403503e-05, "loss": 0.7651, "step": 7566 }, { "epoch": 1.2352557038488226, "grad_norm": 2.6182291507720947, "learning_rate": 1.9126653853453886e-05, "loss": 0.7903, "step": 7567 }, { "epoch": 1.2354189624913268, "grad_norm": 2.9695277214050293, "learning_rate": 1.9126391795356513e-05, "loss": 0.7463, "step": 7568 }, { "epoch": 1.2355822211338312, "grad_norm": 2.8923768997192383, "learning_rate": 1.9126129699743993e-05, "loss": 0.8202, "step": 7569 }, { "epoch": 1.2357454797763356, "grad_norm": 3.2010395526885986, "learning_rate": 1.9125867566617397e-05, "loss": 1.3923, "step": 7570 }, { "epoch": 1.23590873841884, "grad_norm": 3.374661922454834, "learning_rate": 1.9125605395977805e-05, "loss": 0.9439, "step": 7571 }, { "epoch": 1.2360719970613445, "grad_norm": 2.8081278800964355, "learning_rate": 1.9125343187826296e-05, "loss": 0.7243, "step": 7572 }, { "epoch": 1.2362352557038487, "grad_norm": 3.325204849243164, "learning_rate": 1.9125080942163946e-05, "loss": 0.8436, "step": 7573 }, { "epoch": 1.2363985143463532, "grad_norm": 3.1099491119384766, "learning_rate": 1.9124818658991832e-05, "loss": 0.7033, "step": 7574 }, { "epoch": 1.2365617729888576, "grad_norm": 3.172102928161621, "learning_rate": 1.9124556338311037e-05, "loss": 0.8132, "step": 7575 }, { "epoch": 1.236725031631362, "grad_norm": 2.889798164367676, "learning_rate": 1.9124293980122637e-05, "loss": 0.7346, "step": 7576 }, { "epoch": 1.2368882902738663, "grad_norm": 2.6413815021514893, "learning_rate": 1.912403158442771e-05, "loss": 0.6505, "step": 7577 }, { "epoch": 1.2370515489163707, "grad_norm": 3.5464999675750732, "learning_rate": 1.9123769151227328e-05, "loss": 0.7931, "step": 7578 }, { "epoch": 1.2372148075588751, "grad_norm": 2.733956813812256, "learning_rate": 1.9123506680522584e-05, "loss": 0.6885, "step": 7579 }, { "epoch": 1.2373780662013796, "grad_norm": 2.6730527877807617, "learning_rate": 1.9123244172314546e-05, "loss": 0.6793, "step": 7580 }, { "epoch": 1.237541324843884, "grad_norm": 3.162238836288452, "learning_rate": 1.9122981626604296e-05, "loss": 0.8331, "step": 7581 }, { "epoch": 1.2377045834863882, "grad_norm": 3.0741381645202637, "learning_rate": 1.9122719043392913e-05, "loss": 0.7557, "step": 7582 }, { "epoch": 1.2378678421288927, "grad_norm": 3.2087326049804688, "learning_rate": 1.9122456422681477e-05, "loss": 0.7514, "step": 7583 }, { "epoch": 1.238031100771397, "grad_norm": 2.623100519180298, "learning_rate": 1.9122193764471066e-05, "loss": 0.7607, "step": 7584 }, { "epoch": 1.2381943594139015, "grad_norm": 2.716179370880127, "learning_rate": 1.9121931068762764e-05, "loss": 0.6514, "step": 7585 }, { "epoch": 1.2383576180564058, "grad_norm": 3.104093074798584, "learning_rate": 1.9121668335557644e-05, "loss": 0.775, "step": 7586 }, { "epoch": 1.2385208766989102, "grad_norm": 2.922088384628296, "learning_rate": 1.912140556485679e-05, "loss": 0.7576, "step": 7587 }, { "epoch": 1.2386841353414146, "grad_norm": 2.858189344406128, "learning_rate": 1.9121142756661285e-05, "loss": 0.7591, "step": 7588 }, { "epoch": 1.238847393983919, "grad_norm": 2.911897897720337, "learning_rate": 1.9120879910972206e-05, "loss": 0.8169, "step": 7589 }, { "epoch": 1.2390106526264235, "grad_norm": 3.182332992553711, "learning_rate": 1.912061702779063e-05, "loss": 0.7797, "step": 7590 }, { "epoch": 1.2391739112689277, "grad_norm": 2.5808465480804443, "learning_rate": 1.912035410711764e-05, "loss": 0.6661, "step": 7591 }, { "epoch": 1.2393371699114322, "grad_norm": 3.5595850944519043, "learning_rate": 1.9120091148954324e-05, "loss": 0.9538, "step": 7592 }, { "epoch": 1.2395004285539366, "grad_norm": 3.007307529449463, "learning_rate": 1.9119828153301755e-05, "loss": 1.3325, "step": 7593 }, { "epoch": 1.239663687196441, "grad_norm": 2.8789737224578857, "learning_rate": 1.9119565120161015e-05, "loss": 0.8286, "step": 7594 }, { "epoch": 1.2398269458389453, "grad_norm": 2.6391701698303223, "learning_rate": 1.911930204953319e-05, "loss": 0.6994, "step": 7595 }, { "epoch": 1.2399902044814497, "grad_norm": 3.1958673000335693, "learning_rate": 1.9119038941419355e-05, "loss": 0.7543, "step": 7596 }, { "epoch": 1.2401534631239541, "grad_norm": 2.707451581954956, "learning_rate": 1.9118775795820592e-05, "loss": 0.7811, "step": 7597 }, { "epoch": 1.2403167217664586, "grad_norm": 3.0305898189544678, "learning_rate": 1.9118512612737986e-05, "loss": 0.8432, "step": 7598 }, { "epoch": 1.240479980408963, "grad_norm": 2.7913906574249268, "learning_rate": 1.911824939217262e-05, "loss": 0.6239, "step": 7599 }, { "epoch": 1.2406432390514672, "grad_norm": 2.948521137237549, "learning_rate": 1.911798613412557e-05, "loss": 0.7263, "step": 7600 }, { "epoch": 1.2408064976939717, "grad_norm": 2.5945560932159424, "learning_rate": 1.9117722838597924e-05, "loss": 0.7043, "step": 7601 }, { "epoch": 1.240969756336476, "grad_norm": 2.6337931156158447, "learning_rate": 1.9117459505590763e-05, "loss": 0.7037, "step": 7602 }, { "epoch": 1.2411330149789805, "grad_norm": 2.610835313796997, "learning_rate": 1.911719613510517e-05, "loss": 0.7719, "step": 7603 }, { "epoch": 1.2412962736214848, "grad_norm": 2.444760799407959, "learning_rate": 1.911693272714222e-05, "loss": 0.7262, "step": 7604 }, { "epoch": 1.2414595322639892, "grad_norm": 2.8555214405059814, "learning_rate": 1.9116669281703007e-05, "loss": 0.8016, "step": 7605 }, { "epoch": 1.2416227909064936, "grad_norm": 2.687502145767212, "learning_rate": 1.911640579878861e-05, "loss": 0.7445, "step": 7606 }, { "epoch": 1.241786049548998, "grad_norm": 2.6430602073669434, "learning_rate": 1.911614227840011e-05, "loss": 0.7475, "step": 7607 }, { "epoch": 1.2419493081915025, "grad_norm": 2.419600009918213, "learning_rate": 1.911587872053859e-05, "loss": 0.6608, "step": 7608 }, { "epoch": 1.2421125668340067, "grad_norm": 3.223205804824829, "learning_rate": 1.9115615125205135e-05, "loss": 0.8825, "step": 7609 }, { "epoch": 1.2422758254765112, "grad_norm": 2.5565056800842285, "learning_rate": 1.9115351492400828e-05, "loss": 0.644, "step": 7610 }, { "epoch": 1.2424390841190156, "grad_norm": 2.7178943157196045, "learning_rate": 1.9115087822126754e-05, "loss": 0.777, "step": 7611 }, { "epoch": 1.2426023427615198, "grad_norm": 2.469158887863159, "learning_rate": 1.9114824114383994e-05, "loss": 0.6216, "step": 7612 }, { "epoch": 1.2427656014040243, "grad_norm": 2.4141359329223633, "learning_rate": 1.9114560369173637e-05, "loss": 0.7026, "step": 7613 }, { "epoch": 1.2429288600465287, "grad_norm": 3.3731184005737305, "learning_rate": 1.911429658649676e-05, "loss": 0.7677, "step": 7614 }, { "epoch": 1.2430921186890331, "grad_norm": 2.8550870418548584, "learning_rate": 1.9114032766354453e-05, "loss": 0.7565, "step": 7615 }, { "epoch": 1.2432553773315376, "grad_norm": 2.710547685623169, "learning_rate": 1.91137689087478e-05, "loss": 0.7018, "step": 7616 }, { "epoch": 1.2434186359740418, "grad_norm": 2.88620924949646, "learning_rate": 1.9113505013677884e-05, "loss": 0.7397, "step": 7617 }, { "epoch": 1.2435818946165462, "grad_norm": 3.068784475326538, "learning_rate": 1.9113241081145788e-05, "loss": 0.7813, "step": 7618 }, { "epoch": 1.2437451532590507, "grad_norm": 2.4076240062713623, "learning_rate": 1.91129771111526e-05, "loss": 0.6131, "step": 7619 }, { "epoch": 1.243908411901555, "grad_norm": 2.6099698543548584, "learning_rate": 1.9112713103699404e-05, "loss": 0.6674, "step": 7620 }, { "epoch": 1.2440716705440593, "grad_norm": 2.9871745109558105, "learning_rate": 1.9112449058787286e-05, "loss": 0.726, "step": 7621 }, { "epoch": 1.2442349291865638, "grad_norm": 2.9617621898651123, "learning_rate": 1.911218497641733e-05, "loss": 0.7634, "step": 7622 }, { "epoch": 1.2443981878290682, "grad_norm": 2.5491559505462646, "learning_rate": 1.9111920856590624e-05, "loss": 0.7446, "step": 7623 }, { "epoch": 1.2445614464715726, "grad_norm": 2.7029576301574707, "learning_rate": 1.9111656699308248e-05, "loss": 0.7812, "step": 7624 }, { "epoch": 1.244724705114077, "grad_norm": 2.429060935974121, "learning_rate": 1.9111392504571295e-05, "loss": 0.5718, "step": 7625 }, { "epoch": 1.2448879637565813, "grad_norm": 2.9852638244628906, "learning_rate": 1.911112827238085e-05, "loss": 0.7318, "step": 7626 }, { "epoch": 1.2450512223990857, "grad_norm": 2.3895745277404785, "learning_rate": 1.9110864002737992e-05, "loss": 0.5478, "step": 7627 }, { "epoch": 1.2452144810415902, "grad_norm": 2.7397618293762207, "learning_rate": 1.9110599695643816e-05, "loss": 0.6618, "step": 7628 }, { "epoch": 1.2453777396840946, "grad_norm": 2.885089159011841, "learning_rate": 1.9110335351099403e-05, "loss": 0.6501, "step": 7629 }, { "epoch": 1.2455409983265988, "grad_norm": 4.277817249298096, "learning_rate": 1.911007096910584e-05, "loss": 0.6751, "step": 7630 }, { "epoch": 1.2457042569691033, "grad_norm": 2.9173693656921387, "learning_rate": 1.9109806549664217e-05, "loss": 0.7335, "step": 7631 }, { "epoch": 1.2458675156116077, "grad_norm": 2.6981849670410156, "learning_rate": 1.910954209277562e-05, "loss": 0.7009, "step": 7632 }, { "epoch": 1.2460307742541121, "grad_norm": 3.018573522567749, "learning_rate": 1.9109277598441134e-05, "loss": 0.8193, "step": 7633 }, { "epoch": 1.2461940328966166, "grad_norm": 2.424771547317505, "learning_rate": 1.9109013066661847e-05, "loss": 0.6194, "step": 7634 }, { "epoch": 1.2463572915391208, "grad_norm": 3.0731558799743652, "learning_rate": 1.910874849743885e-05, "loss": 0.7999, "step": 7635 }, { "epoch": 1.2465205501816252, "grad_norm": 2.6494734287261963, "learning_rate": 1.9108483890773222e-05, "loss": 0.6192, "step": 7636 }, { "epoch": 1.2466838088241297, "grad_norm": 3.019928455352783, "learning_rate": 1.9108219246666057e-05, "loss": 0.8192, "step": 7637 }, { "epoch": 1.246847067466634, "grad_norm": 2.2180497646331787, "learning_rate": 1.9107954565118446e-05, "loss": 0.555, "step": 7638 }, { "epoch": 1.2470103261091383, "grad_norm": 2.6110167503356934, "learning_rate": 1.9107689846131467e-05, "loss": 0.6892, "step": 7639 }, { "epoch": 1.2471735847516427, "grad_norm": 2.780158519744873, "learning_rate": 1.9107425089706218e-05, "loss": 0.6399, "step": 7640 }, { "epoch": 1.2473368433941472, "grad_norm": 2.771613121032715, "learning_rate": 1.9107160295843787e-05, "loss": 0.6472, "step": 7641 }, { "epoch": 1.2475001020366516, "grad_norm": 2.6698546409606934, "learning_rate": 1.9106895464545253e-05, "loss": 0.7066, "step": 7642 }, { "epoch": 1.247663360679156, "grad_norm": 3.469646692276001, "learning_rate": 1.910663059581171e-05, "loss": 0.9048, "step": 7643 }, { "epoch": 1.2478266193216603, "grad_norm": 3.388007640838623, "learning_rate": 1.910636568964425e-05, "loss": 0.8215, "step": 7644 }, { "epoch": 1.2479898779641647, "grad_norm": 2.947131633758545, "learning_rate": 1.910610074604396e-05, "loss": 0.7327, "step": 7645 }, { "epoch": 1.2481531366066692, "grad_norm": 3.3171794414520264, "learning_rate": 1.9105835765011926e-05, "loss": 1.292, "step": 7646 }, { "epoch": 1.2483163952491736, "grad_norm": 2.770348072052002, "learning_rate": 1.910557074654924e-05, "loss": 0.8163, "step": 7647 }, { "epoch": 1.2484796538916778, "grad_norm": 3.1773715019226074, "learning_rate": 1.910530569065699e-05, "loss": 0.7157, "step": 7648 }, { "epoch": 1.2486429125341822, "grad_norm": 2.930039644241333, "learning_rate": 1.9105040597336268e-05, "loss": 0.8511, "step": 7649 }, { "epoch": 1.2488061711766867, "grad_norm": 2.9879860877990723, "learning_rate": 1.9104775466588162e-05, "loss": 0.8472, "step": 7650 }, { "epoch": 1.2489694298191911, "grad_norm": 2.40463924407959, "learning_rate": 1.910451029841376e-05, "loss": 0.6794, "step": 7651 }, { "epoch": 1.2491326884616956, "grad_norm": 3.3824973106384277, "learning_rate": 1.9104245092814156e-05, "loss": 0.8901, "step": 7652 }, { "epoch": 1.2492959471041998, "grad_norm": 2.428809642791748, "learning_rate": 1.9103979849790437e-05, "loss": 0.671, "step": 7653 }, { "epoch": 1.2494592057467042, "grad_norm": 2.2726962566375732, "learning_rate": 1.9103714569343695e-05, "loss": 0.6946, "step": 7654 }, { "epoch": 1.2496224643892087, "grad_norm": 2.4882493019104004, "learning_rate": 1.910344925147502e-05, "loss": 0.6791, "step": 7655 }, { "epoch": 1.2497857230317129, "grad_norm": 3.2738685607910156, "learning_rate": 1.91031838961855e-05, "loss": 0.8737, "step": 7656 }, { "epoch": 1.2499489816742173, "grad_norm": 2.763889789581299, "learning_rate": 1.9102918503476234e-05, "loss": 0.7357, "step": 7657 }, { "epoch": 1.2501122403167217, "grad_norm": 2.5574140548706055, "learning_rate": 1.9102653073348304e-05, "loss": 0.6273, "step": 7658 }, { "epoch": 1.2502754989592262, "grad_norm": 2.799668312072754, "learning_rate": 1.9102387605802802e-05, "loss": 0.8232, "step": 7659 }, { "epoch": 1.2504387576017306, "grad_norm": 2.719632148742676, "learning_rate": 1.9102122100840826e-05, "loss": 0.7247, "step": 7660 }, { "epoch": 1.250602016244235, "grad_norm": 2.818910598754883, "learning_rate": 1.910185655846346e-05, "loss": 0.7327, "step": 7661 }, { "epoch": 1.2507652748867393, "grad_norm": 2.8770687580108643, "learning_rate": 1.91015909786718e-05, "loss": 0.7789, "step": 7662 }, { "epoch": 1.2509285335292437, "grad_norm": 2.6922593116760254, "learning_rate": 1.9101325361466934e-05, "loss": 0.7585, "step": 7663 }, { "epoch": 1.2510917921717482, "grad_norm": 2.6539857387542725, "learning_rate": 1.9101059706849957e-05, "loss": 0.6342, "step": 7664 }, { "epoch": 1.2512550508142524, "grad_norm": 2.565580129623413, "learning_rate": 1.910079401482196e-05, "loss": 0.6975, "step": 7665 }, { "epoch": 1.2514183094567568, "grad_norm": 2.979135513305664, "learning_rate": 1.9100528285384036e-05, "loss": 0.717, "step": 7666 }, { "epoch": 1.2515815680992612, "grad_norm": 2.611142873764038, "learning_rate": 1.9100262518537277e-05, "loss": 0.6318, "step": 7667 }, { "epoch": 1.2517448267417657, "grad_norm": 3.170079469680786, "learning_rate": 1.9099996714282775e-05, "loss": 0.8951, "step": 7668 }, { "epoch": 1.2519080853842701, "grad_norm": 2.5715444087982178, "learning_rate": 1.909973087262162e-05, "loss": 0.6223, "step": 7669 }, { "epoch": 1.2520713440267743, "grad_norm": 2.4257404804229736, "learning_rate": 1.909946499355491e-05, "loss": 0.6137, "step": 7670 }, { "epoch": 1.2522346026692788, "grad_norm": 3.126343250274658, "learning_rate": 1.9099199077083738e-05, "loss": 0.7774, "step": 7671 }, { "epoch": 1.2523978613117832, "grad_norm": 3.001565456390381, "learning_rate": 1.909893312320919e-05, "loss": 0.7611, "step": 7672 }, { "epoch": 1.2525611199542876, "grad_norm": 3.200861692428589, "learning_rate": 1.909866713193236e-05, "loss": 0.8047, "step": 7673 }, { "epoch": 1.2527243785967919, "grad_norm": 3.386380195617676, "learning_rate": 1.9098401103254352e-05, "loss": 0.8348, "step": 7674 }, { "epoch": 1.2528876372392963, "grad_norm": 3.020071029663086, "learning_rate": 1.9098135037176254e-05, "loss": 0.7022, "step": 7675 }, { "epoch": 1.2530508958818007, "grad_norm": 3.1802313327789307, "learning_rate": 1.909786893369915e-05, "loss": 0.8598, "step": 7676 }, { "epoch": 1.2532141545243052, "grad_norm": 2.8282485008239746, "learning_rate": 1.909760279282415e-05, "loss": 0.7488, "step": 7677 }, { "epoch": 1.2533774131668096, "grad_norm": 3.3774919509887695, "learning_rate": 1.9097336614552337e-05, "loss": 0.8602, "step": 7678 }, { "epoch": 1.2535406718093138, "grad_norm": 2.970787763595581, "learning_rate": 1.9097070398884808e-05, "loss": 0.7127, "step": 7679 }, { "epoch": 1.2537039304518183, "grad_norm": 2.4978525638580322, "learning_rate": 1.9096804145822655e-05, "loss": 0.628, "step": 7680 }, { "epoch": 1.2538671890943227, "grad_norm": 2.976747751235962, "learning_rate": 1.9096537855366976e-05, "loss": 0.6871, "step": 7681 }, { "epoch": 1.2540304477368271, "grad_norm": 2.722971200942993, "learning_rate": 1.9096271527518868e-05, "loss": 0.7651, "step": 7682 }, { "epoch": 1.2541937063793314, "grad_norm": 3.111269950866699, "learning_rate": 1.9096005162279422e-05, "loss": 0.8605, "step": 7683 }, { "epoch": 1.2543569650218358, "grad_norm": 2.6812708377838135, "learning_rate": 1.909573875964973e-05, "loss": 0.6568, "step": 7684 }, { "epoch": 1.2545202236643402, "grad_norm": 2.3929364681243896, "learning_rate": 1.9095472319630892e-05, "loss": 0.6694, "step": 7685 }, { "epoch": 1.2546834823068447, "grad_norm": 2.7755401134490967, "learning_rate": 1.9095205842224006e-05, "loss": 0.7391, "step": 7686 }, { "epoch": 1.2548467409493491, "grad_norm": 3.2050933837890625, "learning_rate": 1.9094939327430156e-05, "loss": 0.8776, "step": 7687 }, { "epoch": 1.2550099995918533, "grad_norm": 2.490244150161743, "learning_rate": 1.9094672775250448e-05, "loss": 0.6292, "step": 7688 }, { "epoch": 1.2551732582343578, "grad_norm": 3.3478498458862305, "learning_rate": 1.9094406185685974e-05, "loss": 0.837, "step": 7689 }, { "epoch": 1.2553365168768622, "grad_norm": 2.386256694793701, "learning_rate": 1.909413955873783e-05, "loss": 0.641, "step": 7690 }, { "epoch": 1.2554997755193664, "grad_norm": 3.2810473442077637, "learning_rate": 1.9093872894407114e-05, "loss": 0.7188, "step": 7691 }, { "epoch": 1.2556630341618709, "grad_norm": 2.9457664489746094, "learning_rate": 1.909360619269492e-05, "loss": 0.6883, "step": 7692 }, { "epoch": 1.2558262928043753, "grad_norm": 3.052734613418579, "learning_rate": 1.9093339453602344e-05, "loss": 0.7361, "step": 7693 }, { "epoch": 1.2559895514468797, "grad_norm": 2.437438488006592, "learning_rate": 1.9093072677130483e-05, "loss": 0.6236, "step": 7694 }, { "epoch": 1.2561528100893842, "grad_norm": 2.554438591003418, "learning_rate": 1.909280586328043e-05, "loss": 0.6209, "step": 7695 }, { "epoch": 1.2563160687318886, "grad_norm": 2.5502054691314697, "learning_rate": 1.909253901205329e-05, "loss": 0.6693, "step": 7696 }, { "epoch": 1.2564793273743928, "grad_norm": 2.9864156246185303, "learning_rate": 1.9092272123450156e-05, "loss": 0.7769, "step": 7697 }, { "epoch": 1.2566425860168973, "grad_norm": 2.703667402267456, "learning_rate": 1.909200519747212e-05, "loss": 0.8149, "step": 7698 }, { "epoch": 1.2568058446594017, "grad_norm": 2.7916815280914307, "learning_rate": 1.909173823412029e-05, "loss": 0.7015, "step": 7699 }, { "epoch": 1.256969103301906, "grad_norm": 3.117810010910034, "learning_rate": 1.909147123339575e-05, "loss": 0.8416, "step": 7700 }, { "epoch": 1.2571323619444104, "grad_norm": 3.09555721282959, "learning_rate": 1.909120419529961e-05, "loss": 0.7225, "step": 7701 }, { "epoch": 1.2572956205869148, "grad_norm": 2.496025562286377, "learning_rate": 1.9090937119832958e-05, "loss": 0.6072, "step": 7702 }, { "epoch": 1.2574588792294192, "grad_norm": 2.64020037651062, "learning_rate": 1.90906700069969e-05, "loss": 0.7194, "step": 7703 }, { "epoch": 1.2576221378719237, "grad_norm": 2.639573574066162, "learning_rate": 1.909040285679253e-05, "loss": 0.6183, "step": 7704 }, { "epoch": 1.2577853965144281, "grad_norm": 2.96073055267334, "learning_rate": 1.9090135669220943e-05, "loss": 0.7231, "step": 7705 }, { "epoch": 1.2579486551569323, "grad_norm": 2.505906581878662, "learning_rate": 1.9089868444283242e-05, "loss": 0.6293, "step": 7706 }, { "epoch": 1.2581119137994368, "grad_norm": 2.8467252254486084, "learning_rate": 1.908960118198053e-05, "loss": 0.7542, "step": 7707 }, { "epoch": 1.2582751724419412, "grad_norm": 3.169301986694336, "learning_rate": 1.9089333882313893e-05, "loss": 0.8592, "step": 7708 }, { "epoch": 1.2584384310844454, "grad_norm": 3.1599085330963135, "learning_rate": 1.9089066545284436e-05, "loss": 0.7328, "step": 7709 }, { "epoch": 1.2586016897269499, "grad_norm": 2.9699110984802246, "learning_rate": 1.908879917089326e-05, "loss": 0.6573, "step": 7710 }, { "epoch": 1.2587649483694543, "grad_norm": 2.7831106185913086, "learning_rate": 1.9088531759141465e-05, "loss": 0.7138, "step": 7711 }, { "epoch": 1.2589282070119587, "grad_norm": 2.8573250770568848, "learning_rate": 1.9088264310030145e-05, "loss": 0.6471, "step": 7712 }, { "epoch": 1.2590914656544632, "grad_norm": 3.0701863765716553, "learning_rate": 1.9087996823560404e-05, "loss": 0.8554, "step": 7713 }, { "epoch": 1.2592547242969676, "grad_norm": 2.9650917053222656, "learning_rate": 1.9087729299733338e-05, "loss": 0.7364, "step": 7714 }, { "epoch": 1.2594179829394718, "grad_norm": 2.8284764289855957, "learning_rate": 1.908746173855005e-05, "loss": 0.8303, "step": 7715 }, { "epoch": 1.2595812415819763, "grad_norm": 2.895660877227783, "learning_rate": 1.9087194140011632e-05, "loss": 0.6502, "step": 7716 }, { "epoch": 1.2597445002244807, "grad_norm": 2.8011209964752197, "learning_rate": 1.90869265041192e-05, "loss": 0.6902, "step": 7717 }, { "epoch": 1.259907758866985, "grad_norm": 3.158583164215088, "learning_rate": 1.9086658830873835e-05, "loss": 0.7456, "step": 7718 }, { "epoch": 1.2600710175094894, "grad_norm": 2.9304730892181396, "learning_rate": 1.9086391120276652e-05, "loss": 0.6886, "step": 7719 }, { "epoch": 1.2602342761519938, "grad_norm": 2.60298490524292, "learning_rate": 1.9086123372328748e-05, "loss": 0.6415, "step": 7720 }, { "epoch": 1.2603975347944982, "grad_norm": 2.5770223140716553, "learning_rate": 1.9085855587031217e-05, "loss": 0.5465, "step": 7721 }, { "epoch": 1.2605607934370027, "grad_norm": 2.8012144565582275, "learning_rate": 1.9085587764385164e-05, "loss": 0.6894, "step": 7722 }, { "epoch": 1.2607240520795069, "grad_norm": 2.416428565979004, "learning_rate": 1.9085319904391695e-05, "loss": 0.6227, "step": 7723 }, { "epoch": 1.2608873107220113, "grad_norm": 3.016737461090088, "learning_rate": 1.90850520070519e-05, "loss": 0.7087, "step": 7724 }, { "epoch": 1.2610505693645158, "grad_norm": 2.3094077110290527, "learning_rate": 1.9084784072366897e-05, "loss": 0.5427, "step": 7725 }, { "epoch": 1.2612138280070202, "grad_norm": 2.694988965988159, "learning_rate": 1.9084516100337768e-05, "loss": 0.6672, "step": 7726 }, { "epoch": 1.2613770866495244, "grad_norm": 3.224515438079834, "learning_rate": 1.9084248090965623e-05, "loss": 0.8769, "step": 7727 }, { "epoch": 1.2615403452920289, "grad_norm": 2.290627956390381, "learning_rate": 1.908398004425157e-05, "loss": 0.6021, "step": 7728 }, { "epoch": 1.2617036039345333, "grad_norm": 2.9517745971679688, "learning_rate": 1.9083711960196703e-05, "loss": 0.7409, "step": 7729 }, { "epoch": 1.2618668625770377, "grad_norm": 2.491177797317505, "learning_rate": 1.9083443838802127e-05, "loss": 0.6551, "step": 7730 }, { "epoch": 1.2620301212195422, "grad_norm": 2.37190580368042, "learning_rate": 1.908317568006894e-05, "loss": 0.6872, "step": 7731 }, { "epoch": 1.2621933798620464, "grad_norm": 2.6178994178771973, "learning_rate": 1.908290748399825e-05, "loss": 0.7395, "step": 7732 }, { "epoch": 1.2623566385045508, "grad_norm": 3.2919857501983643, "learning_rate": 1.9082639250591156e-05, "loss": 0.8083, "step": 7733 }, { "epoch": 1.2625198971470553, "grad_norm": 2.9490888118743896, "learning_rate": 1.908237097984876e-05, "loss": 0.6507, "step": 7734 }, { "epoch": 1.2626831557895595, "grad_norm": 3.131563186645508, "learning_rate": 1.9082102671772168e-05, "loss": 0.8431, "step": 7735 }, { "epoch": 1.262846414432064, "grad_norm": 3.4879045486450195, "learning_rate": 1.9081834326362486e-05, "loss": 0.6914, "step": 7736 }, { "epoch": 1.2630096730745684, "grad_norm": 3.364062547683716, "learning_rate": 1.9081565943620807e-05, "loss": 0.795, "step": 7737 }, { "epoch": 1.2631729317170728, "grad_norm": 2.864616632461548, "learning_rate": 1.908129752354824e-05, "loss": 0.7439, "step": 7738 }, { "epoch": 1.2633361903595772, "grad_norm": 3.0219345092773438, "learning_rate": 1.908102906614589e-05, "loss": 0.7368, "step": 7739 }, { "epoch": 1.2634994490020817, "grad_norm": 2.480545997619629, "learning_rate": 1.9080760571414853e-05, "loss": 0.6826, "step": 7740 }, { "epoch": 1.2636627076445859, "grad_norm": 2.928072214126587, "learning_rate": 1.9080492039356244e-05, "loss": 0.9108, "step": 7741 }, { "epoch": 1.2638259662870903, "grad_norm": 2.8679912090301514, "learning_rate": 1.908022346997116e-05, "loss": 0.8202, "step": 7742 }, { "epoch": 1.2639892249295948, "grad_norm": 2.8133385181427, "learning_rate": 1.9079954863260704e-05, "loss": 0.6998, "step": 7743 }, { "epoch": 1.264152483572099, "grad_norm": 3.1380960941314697, "learning_rate": 1.9079686219225982e-05, "loss": 1.285, "step": 7744 }, { "epoch": 1.2643157422146034, "grad_norm": 2.7672431468963623, "learning_rate": 1.90794175378681e-05, "loss": 0.7831, "step": 7745 }, { "epoch": 1.2644790008571078, "grad_norm": 2.27591609954834, "learning_rate": 1.907914881918816e-05, "loss": 0.5625, "step": 7746 }, { "epoch": 1.2646422594996123, "grad_norm": 2.8159587383270264, "learning_rate": 1.907888006318727e-05, "loss": 0.7289, "step": 7747 }, { "epoch": 1.2648055181421167, "grad_norm": 3.0947210788726807, "learning_rate": 1.9078611269866527e-05, "loss": 0.7465, "step": 7748 }, { "epoch": 1.2649687767846212, "grad_norm": 2.8701071739196777, "learning_rate": 1.9078342439227042e-05, "loss": 0.8695, "step": 7749 }, { "epoch": 1.2651320354271254, "grad_norm": 2.5936219692230225, "learning_rate": 1.9078073571269922e-05, "loss": 0.5626, "step": 7750 }, { "epoch": 1.2652952940696298, "grad_norm": 2.7000043392181396, "learning_rate": 1.907780466599627e-05, "loss": 0.7203, "step": 7751 }, { "epoch": 1.2654585527121343, "grad_norm": 2.376465320587158, "learning_rate": 1.9077535723407188e-05, "loss": 0.5855, "step": 7752 }, { "epoch": 1.2656218113546385, "grad_norm": 2.457768201828003, "learning_rate": 1.9077266743503788e-05, "loss": 0.5623, "step": 7753 }, { "epoch": 1.265785069997143, "grad_norm": 2.6449592113494873, "learning_rate": 1.9076997726287166e-05, "loss": 0.7064, "step": 7754 }, { "epoch": 1.2659483286396473, "grad_norm": 3.013856887817383, "learning_rate": 1.907672867175844e-05, "loss": 0.8075, "step": 7755 }, { "epoch": 1.2661115872821518, "grad_norm": 2.8492958545684814, "learning_rate": 1.9076459579918706e-05, "loss": 0.6657, "step": 7756 }, { "epoch": 1.2662748459246562, "grad_norm": 3.039903163909912, "learning_rate": 1.9076190450769072e-05, "loss": 0.7826, "step": 7757 }, { "epoch": 1.2664381045671607, "grad_norm": 2.5588529109954834, "learning_rate": 1.9075921284310653e-05, "loss": 0.7158, "step": 7758 }, { "epoch": 1.2666013632096649, "grad_norm": 2.854156970977783, "learning_rate": 1.9075652080544542e-05, "loss": 0.7165, "step": 7759 }, { "epoch": 1.2667646218521693, "grad_norm": 2.707341432571411, "learning_rate": 1.9075382839471858e-05, "loss": 0.6926, "step": 7760 }, { "epoch": 1.2669278804946738, "grad_norm": 3.324941396713257, "learning_rate": 1.9075113561093698e-05, "loss": 1.4834, "step": 7761 }, { "epoch": 1.267091139137178, "grad_norm": 2.9125146865844727, "learning_rate": 1.907484424541117e-05, "loss": 1.6415, "step": 7762 }, { "epoch": 1.2672543977796824, "grad_norm": 3.8244380950927734, "learning_rate": 1.907457489242539e-05, "loss": 0.9907, "step": 7763 }, { "epoch": 1.2674176564221868, "grad_norm": 2.5473601818084717, "learning_rate": 1.9074305502137457e-05, "loss": 0.7448, "step": 7764 }, { "epoch": 1.2675809150646913, "grad_norm": 3.1485350131988525, "learning_rate": 1.907403607454848e-05, "loss": 0.802, "step": 7765 }, { "epoch": 1.2677441737071957, "grad_norm": 4.025180816650391, "learning_rate": 1.9073766609659564e-05, "loss": 0.8367, "step": 7766 }, { "epoch": 1.2679074323497, "grad_norm": 2.5998971462249756, "learning_rate": 1.9073497107471823e-05, "loss": 0.5092, "step": 7767 }, { "epoch": 1.2680706909922044, "grad_norm": 2.9350945949554443, "learning_rate": 1.9073227567986362e-05, "loss": 0.7216, "step": 7768 }, { "epoch": 1.2682339496347088, "grad_norm": 2.9601755142211914, "learning_rate": 1.9072957991204285e-05, "loss": 0.8101, "step": 7769 }, { "epoch": 1.2683972082772133, "grad_norm": 2.8004307746887207, "learning_rate": 1.907268837712671e-05, "loss": 0.6896, "step": 7770 }, { "epoch": 1.2685604669197175, "grad_norm": 2.512932538986206, "learning_rate": 1.9072418725754732e-05, "loss": 0.6811, "step": 7771 }, { "epoch": 1.268723725562222, "grad_norm": 2.2753357887268066, "learning_rate": 1.9072149037089473e-05, "loss": 0.5379, "step": 7772 }, { "epoch": 1.2688869842047263, "grad_norm": 3.090141773223877, "learning_rate": 1.9071879311132032e-05, "loss": 0.8442, "step": 7773 }, { "epoch": 1.2690502428472308, "grad_norm": 2.465485095977783, "learning_rate": 1.907160954788352e-05, "loss": 0.6469, "step": 7774 }, { "epoch": 1.2692135014897352, "grad_norm": 2.628340244293213, "learning_rate": 1.9071339747345045e-05, "loss": 0.7012, "step": 7775 }, { "epoch": 1.2693767601322394, "grad_norm": 2.755981683731079, "learning_rate": 1.9071069909517717e-05, "loss": 0.6539, "step": 7776 }, { "epoch": 1.2695400187747439, "grad_norm": 2.2212016582489014, "learning_rate": 1.9070800034402647e-05, "loss": 0.6107, "step": 7777 }, { "epoch": 1.2697032774172483, "grad_norm": 2.6539182662963867, "learning_rate": 1.9070530122000946e-05, "loss": 0.5948, "step": 7778 }, { "epoch": 1.2698665360597525, "grad_norm": 2.853976011276245, "learning_rate": 1.9070260172313718e-05, "loss": 0.7773, "step": 7779 }, { "epoch": 1.270029794702257, "grad_norm": 2.9053738117218018, "learning_rate": 1.9069990185342076e-05, "loss": 0.765, "step": 7780 }, { "epoch": 1.2701930533447614, "grad_norm": 2.695101737976074, "learning_rate": 1.906972016108713e-05, "loss": 0.6249, "step": 7781 }, { "epoch": 1.2703563119872658, "grad_norm": 2.6766505241394043, "learning_rate": 1.9069450099549987e-05, "loss": 0.6405, "step": 7782 }, { "epoch": 1.2705195706297703, "grad_norm": 2.555734634399414, "learning_rate": 1.906918000073176e-05, "loss": 0.6093, "step": 7783 }, { "epoch": 1.2706828292722747, "grad_norm": 2.9289278984069824, "learning_rate": 1.9068909864633558e-05, "loss": 0.5832, "step": 7784 }, { "epoch": 1.270846087914779, "grad_norm": 3.2362136840820312, "learning_rate": 1.9068639691256493e-05, "loss": 0.6661, "step": 7785 }, { "epoch": 1.2710093465572834, "grad_norm": 3.3331356048583984, "learning_rate": 1.9068369480601677e-05, "loss": 0.8544, "step": 7786 }, { "epoch": 1.2711726051997878, "grad_norm": 2.7387900352478027, "learning_rate": 1.9068099232670215e-05, "loss": 0.6427, "step": 7787 }, { "epoch": 1.271335863842292, "grad_norm": 2.8579728603363037, "learning_rate": 1.9067828947463222e-05, "loss": 0.5558, "step": 7788 }, { "epoch": 1.2714991224847965, "grad_norm": 2.7831923961639404, "learning_rate": 1.9067558624981805e-05, "loss": 0.6782, "step": 7789 }, { "epoch": 1.271662381127301, "grad_norm": 2.6714727878570557, "learning_rate": 1.9067288265227083e-05, "loss": 0.6267, "step": 7790 }, { "epoch": 1.2718256397698053, "grad_norm": 2.943289041519165, "learning_rate": 1.906701786820016e-05, "loss": 0.725, "step": 7791 }, { "epoch": 1.2719888984123098, "grad_norm": 3.1005146503448486, "learning_rate": 1.906674743390215e-05, "loss": 0.7164, "step": 7792 }, { "epoch": 1.2721521570548142, "grad_norm": 3.2816028594970703, "learning_rate": 1.9066476962334168e-05, "loss": 0.7556, "step": 7793 }, { "epoch": 1.2723154156973184, "grad_norm": 3.0574769973754883, "learning_rate": 1.9066206453497317e-05, "loss": 0.7531, "step": 7794 }, { "epoch": 1.2724786743398229, "grad_norm": 3.092674970626831, "learning_rate": 1.9065935907392717e-05, "loss": 0.6606, "step": 7795 }, { "epoch": 1.2726419329823273, "grad_norm": 2.289698362350464, "learning_rate": 1.906566532402148e-05, "loss": 0.5813, "step": 7796 }, { "epoch": 1.2728051916248315, "grad_norm": 2.574268102645874, "learning_rate": 1.9065394703384717e-05, "loss": 0.6802, "step": 7797 }, { "epoch": 1.272968450267336, "grad_norm": 2.8571789264678955, "learning_rate": 1.9065124045483535e-05, "loss": 0.6497, "step": 7798 }, { "epoch": 1.2731317089098404, "grad_norm": 2.6562275886535645, "learning_rate": 1.9064853350319053e-05, "loss": 0.7119, "step": 7799 }, { "epoch": 1.2732949675523448, "grad_norm": 3.0540664196014404, "learning_rate": 1.9064582617892383e-05, "loss": 0.6696, "step": 7800 }, { "epoch": 1.2734582261948493, "grad_norm": 2.762774705886841, "learning_rate": 1.9064311848204634e-05, "loss": 0.6876, "step": 7801 }, { "epoch": 1.2736214848373537, "grad_norm": 2.430539608001709, "learning_rate": 1.9064041041256923e-05, "loss": 0.6933, "step": 7802 }, { "epoch": 1.273784743479858, "grad_norm": 2.814669132232666, "learning_rate": 1.9063770197050363e-05, "loss": 0.9327, "step": 7803 }, { "epoch": 1.2739480021223624, "grad_norm": 2.7109556198120117, "learning_rate": 1.9063499315586064e-05, "loss": 0.7136, "step": 7804 }, { "epoch": 1.2741112607648668, "grad_norm": 2.6399803161621094, "learning_rate": 1.9063228396865145e-05, "loss": 0.6107, "step": 7805 }, { "epoch": 1.274274519407371, "grad_norm": 2.720888137817383, "learning_rate": 1.9062957440888712e-05, "loss": 0.6796, "step": 7806 }, { "epoch": 1.2744377780498755, "grad_norm": 2.4832239151000977, "learning_rate": 1.9062686447657886e-05, "loss": 0.5566, "step": 7807 }, { "epoch": 1.27460103669238, "grad_norm": 2.7477524280548096, "learning_rate": 1.906241541717378e-05, "loss": 0.7152, "step": 7808 }, { "epoch": 1.2747642953348843, "grad_norm": 2.916264772415161, "learning_rate": 1.9062144349437498e-05, "loss": 0.5641, "step": 7809 }, { "epoch": 1.2749275539773888, "grad_norm": 2.79193377494812, "learning_rate": 1.906187324445017e-05, "loss": 0.6623, "step": 7810 }, { "epoch": 1.275090812619893, "grad_norm": 2.5921175479888916, "learning_rate": 1.9061602102212898e-05, "loss": 0.6998, "step": 7811 }, { "epoch": 1.2752540712623974, "grad_norm": 3.100646734237671, "learning_rate": 1.9061330922726805e-05, "loss": 0.8643, "step": 7812 }, { "epoch": 1.2754173299049019, "grad_norm": 2.7846152782440186, "learning_rate": 1.9061059705993e-05, "loss": 0.7042, "step": 7813 }, { "epoch": 1.2755805885474063, "grad_norm": 2.540712833404541, "learning_rate": 1.9060788452012603e-05, "loss": 0.6543, "step": 7814 }, { "epoch": 1.2757438471899105, "grad_norm": 3.0438811779022217, "learning_rate": 1.9060517160786723e-05, "loss": 0.7878, "step": 7815 }, { "epoch": 1.275907105832415, "grad_norm": 3.0960519313812256, "learning_rate": 1.906024583231648e-05, "loss": 0.8734, "step": 7816 }, { "epoch": 1.2760703644749194, "grad_norm": 2.7581403255462646, "learning_rate": 1.9059974466602982e-05, "loss": 0.6671, "step": 7817 }, { "epoch": 1.2762336231174238, "grad_norm": 2.636047124862671, "learning_rate": 1.9059703063647358e-05, "loss": 0.652, "step": 7818 }, { "epoch": 1.2763968817599283, "grad_norm": 3.2462711334228516, "learning_rate": 1.905943162345071e-05, "loss": 0.8445, "step": 7819 }, { "epoch": 1.2765601404024325, "grad_norm": 2.7776741981506348, "learning_rate": 1.905916014601416e-05, "loss": 0.6964, "step": 7820 }, { "epoch": 1.276723399044937, "grad_norm": 2.6724886894226074, "learning_rate": 1.9058888631338828e-05, "loss": 0.6457, "step": 7821 }, { "epoch": 1.2768866576874414, "grad_norm": 2.648681163787842, "learning_rate": 1.905861707942582e-05, "loss": 0.6364, "step": 7822 }, { "epoch": 1.2770499163299456, "grad_norm": 2.5624823570251465, "learning_rate": 1.905834549027626e-05, "loss": 0.644, "step": 7823 }, { "epoch": 1.27721317497245, "grad_norm": 2.9017927646636963, "learning_rate": 1.9058073863891264e-05, "loss": 0.6512, "step": 7824 }, { "epoch": 1.2773764336149545, "grad_norm": 3.165364980697632, "learning_rate": 1.9057802200271943e-05, "loss": 0.7751, "step": 7825 }, { "epoch": 1.277539692257459, "grad_norm": 2.692514657974243, "learning_rate": 1.9057530499419415e-05, "loss": 0.7336, "step": 7826 }, { "epoch": 1.2777029508999633, "grad_norm": 2.9022457599639893, "learning_rate": 1.9057258761334805e-05, "loss": 0.8811, "step": 7827 }, { "epoch": 1.2778662095424678, "grad_norm": 3.2952187061309814, "learning_rate": 1.9056986986019225e-05, "loss": 1.4016, "step": 7828 }, { "epoch": 1.278029468184972, "grad_norm": 2.829596996307373, "learning_rate": 1.905671517347379e-05, "loss": 0.7221, "step": 7829 }, { "epoch": 1.2781927268274764, "grad_norm": 3.0845210552215576, "learning_rate": 1.905644332369961e-05, "loss": 0.6932, "step": 7830 }, { "epoch": 1.2783559854699809, "grad_norm": 2.749657154083252, "learning_rate": 1.905617143669782e-05, "loss": 0.7489, "step": 7831 }, { "epoch": 1.278519244112485, "grad_norm": 2.934802293777466, "learning_rate": 1.9055899512469528e-05, "loss": 0.6209, "step": 7832 }, { "epoch": 1.2786825027549895, "grad_norm": 2.6710028648376465, "learning_rate": 1.9055627551015852e-05, "loss": 0.6705, "step": 7833 }, { "epoch": 1.278845761397494, "grad_norm": 2.7956347465515137, "learning_rate": 1.905535555233791e-05, "loss": 0.761, "step": 7834 }, { "epoch": 1.2790090200399984, "grad_norm": 2.3375117778778076, "learning_rate": 1.905508351643682e-05, "loss": 0.73, "step": 7835 }, { "epoch": 1.2791722786825028, "grad_norm": 2.7696731090545654, "learning_rate": 1.90548114433137e-05, "loss": 0.6784, "step": 7836 }, { "epoch": 1.2793355373250073, "grad_norm": 2.754210948944092, "learning_rate": 1.9054539332969673e-05, "loss": 0.6243, "step": 7837 }, { "epoch": 1.2794987959675115, "grad_norm": 2.2881996631622314, "learning_rate": 1.905426718540585e-05, "loss": 0.5506, "step": 7838 }, { "epoch": 1.279662054610016, "grad_norm": 2.7885477542877197, "learning_rate": 1.9053995000623356e-05, "loss": 0.8903, "step": 7839 }, { "epoch": 1.2798253132525204, "grad_norm": 2.8318686485290527, "learning_rate": 1.9053722778623304e-05, "loss": 0.7291, "step": 7840 }, { "epoch": 1.2799885718950246, "grad_norm": 3.0254740715026855, "learning_rate": 1.905345051940682e-05, "loss": 0.7416, "step": 7841 }, { "epoch": 1.280151830537529, "grad_norm": 3.3510165214538574, "learning_rate": 1.905317822297502e-05, "loss": 0.8285, "step": 7842 }, { "epoch": 1.2803150891800334, "grad_norm": 3.042635917663574, "learning_rate": 1.9052905889329018e-05, "loss": 0.7846, "step": 7843 }, { "epoch": 1.2804783478225379, "grad_norm": 3.1776046752929688, "learning_rate": 1.905263351846994e-05, "loss": 0.7174, "step": 7844 }, { "epoch": 1.2806416064650423, "grad_norm": 2.438131332397461, "learning_rate": 1.9052361110398904e-05, "loss": 0.6208, "step": 7845 }, { "epoch": 1.2808048651075468, "grad_norm": 3.2457568645477295, "learning_rate": 1.905208866511703e-05, "loss": 0.8347, "step": 7846 }, { "epoch": 1.280968123750051, "grad_norm": 3.0216586589813232, "learning_rate": 1.9051816182625436e-05, "loss": 0.6653, "step": 7847 }, { "epoch": 1.2811313823925554, "grad_norm": 2.4764909744262695, "learning_rate": 1.905154366292525e-05, "loss": 0.6385, "step": 7848 }, { "epoch": 1.2812946410350599, "grad_norm": 2.9901504516601562, "learning_rate": 1.905127110601758e-05, "loss": 0.6955, "step": 7849 }, { "epoch": 1.281457899677564, "grad_norm": 2.879608154296875, "learning_rate": 1.905099851190355e-05, "loss": 0.7134, "step": 7850 }, { "epoch": 1.2816211583200685, "grad_norm": 2.77852201461792, "learning_rate": 1.905072588058429e-05, "loss": 0.7391, "step": 7851 }, { "epoch": 1.281784416962573, "grad_norm": 2.8452529907226562, "learning_rate": 1.9050453212060907e-05, "loss": 0.6952, "step": 7852 }, { "epoch": 1.2819476756050774, "grad_norm": 3.2792582511901855, "learning_rate": 1.9050180506334532e-05, "loss": 0.8435, "step": 7853 }, { "epoch": 1.2821109342475818, "grad_norm": 2.4910717010498047, "learning_rate": 1.904990776340628e-05, "loss": 0.6166, "step": 7854 }, { "epoch": 1.282274192890086, "grad_norm": 3.520930290222168, "learning_rate": 1.9049634983277273e-05, "loss": 0.705, "step": 7855 }, { "epoch": 1.2824374515325905, "grad_norm": 2.4354538917541504, "learning_rate": 1.904936216594864e-05, "loss": 0.6616, "step": 7856 }, { "epoch": 1.282600710175095, "grad_norm": 2.830845832824707, "learning_rate": 1.904908931142149e-05, "loss": 0.6335, "step": 7857 }, { "epoch": 1.2827639688175994, "grad_norm": 3.0573010444641113, "learning_rate": 1.9048816419696955e-05, "loss": 0.7883, "step": 7858 }, { "epoch": 1.2829272274601036, "grad_norm": 2.9313838481903076, "learning_rate": 1.9048543490776148e-05, "loss": 0.728, "step": 7859 }, { "epoch": 1.283090486102608, "grad_norm": 2.4294869899749756, "learning_rate": 1.9048270524660197e-05, "loss": 0.5978, "step": 7860 }, { "epoch": 1.2832537447451124, "grad_norm": 3.1198534965515137, "learning_rate": 1.9047997521350222e-05, "loss": 0.7453, "step": 7861 }, { "epoch": 1.2834170033876169, "grad_norm": 2.976077079772949, "learning_rate": 1.9047724480847346e-05, "loss": 0.7949, "step": 7862 }, { "epoch": 1.2835802620301213, "grad_norm": 2.5468404293060303, "learning_rate": 1.9047451403152693e-05, "loss": 0.6137, "step": 7863 }, { "epoch": 1.2837435206726255, "grad_norm": 2.7675726413726807, "learning_rate": 1.904717828826738e-05, "loss": 0.6394, "step": 7864 }, { "epoch": 1.28390677931513, "grad_norm": 2.9473392963409424, "learning_rate": 1.9046905136192536e-05, "loss": 0.7392, "step": 7865 }, { "epoch": 1.2840700379576344, "grad_norm": 2.1691412925720215, "learning_rate": 1.9046631946929285e-05, "loss": 0.4776, "step": 7866 }, { "epoch": 1.2842332966001389, "grad_norm": 2.6116111278533936, "learning_rate": 1.904635872047874e-05, "loss": 0.7119, "step": 7867 }, { "epoch": 1.284396555242643, "grad_norm": 3.1738250255584717, "learning_rate": 1.904608545684203e-05, "loss": 0.7528, "step": 7868 }, { "epoch": 1.2845598138851475, "grad_norm": 2.8769452571868896, "learning_rate": 1.904581215602028e-05, "loss": 0.7299, "step": 7869 }, { "epoch": 1.284723072527652, "grad_norm": 2.7768101692199707, "learning_rate": 1.904553881801461e-05, "loss": 0.7106, "step": 7870 }, { "epoch": 1.2848863311701564, "grad_norm": 2.7895090579986572, "learning_rate": 1.9045265442826148e-05, "loss": 0.7066, "step": 7871 }, { "epoch": 1.2850495898126608, "grad_norm": 3.39233660697937, "learning_rate": 1.9044992030456014e-05, "loss": 0.941, "step": 7872 }, { "epoch": 1.285212848455165, "grad_norm": 2.827281951904297, "learning_rate": 1.9044718580905336e-05, "loss": 0.7491, "step": 7873 }, { "epoch": 1.2853761070976695, "grad_norm": 3.4564287662506104, "learning_rate": 1.9044445094175234e-05, "loss": 0.7558, "step": 7874 }, { "epoch": 1.285539365740174, "grad_norm": 2.7922887802124023, "learning_rate": 1.904417157026683e-05, "loss": 0.7042, "step": 7875 }, { "epoch": 1.2857026243826781, "grad_norm": 3.038754940032959, "learning_rate": 1.9043898009181255e-05, "loss": 0.7119, "step": 7876 }, { "epoch": 1.2858658830251826, "grad_norm": 3.009800434112549, "learning_rate": 1.9043624410919627e-05, "loss": 0.6946, "step": 7877 }, { "epoch": 1.286029141667687, "grad_norm": 3.467630624771118, "learning_rate": 1.9043350775483076e-05, "loss": 0.8592, "step": 7878 }, { "epoch": 1.2861924003101914, "grad_norm": 3.117810010910034, "learning_rate": 1.9043077102872725e-05, "loss": 0.6742, "step": 7879 }, { "epoch": 1.2863556589526959, "grad_norm": 3.31772518157959, "learning_rate": 1.90428033930897e-05, "loss": 0.7772, "step": 7880 }, { "epoch": 1.2865189175952003, "grad_norm": 2.77201771736145, "learning_rate": 1.904252964613512e-05, "loss": 0.7715, "step": 7881 }, { "epoch": 1.2866821762377045, "grad_norm": 2.965019941329956, "learning_rate": 1.904225586201012e-05, "loss": 0.8196, "step": 7882 }, { "epoch": 1.286845434880209, "grad_norm": 2.507441520690918, "learning_rate": 1.904198204071582e-05, "loss": 0.6751, "step": 7883 }, { "epoch": 1.2870086935227134, "grad_norm": 2.418356418609619, "learning_rate": 1.904170818225334e-05, "loss": 0.7516, "step": 7884 }, { "epoch": 1.2871719521652176, "grad_norm": 3.161771059036255, "learning_rate": 1.904143428662382e-05, "loss": 0.7293, "step": 7885 }, { "epoch": 1.287335210807722, "grad_norm": 3.2863869667053223, "learning_rate": 1.9041160353828373e-05, "loss": 0.885, "step": 7886 }, { "epoch": 1.2874984694502265, "grad_norm": 2.617231607437134, "learning_rate": 1.9040886383868133e-05, "loss": 0.6772, "step": 7887 }, { "epoch": 1.287661728092731, "grad_norm": 2.511538028717041, "learning_rate": 1.9040612376744217e-05, "loss": 0.7105, "step": 7888 }, { "epoch": 1.2878249867352354, "grad_norm": 3.0345749855041504, "learning_rate": 1.904033833245776e-05, "loss": 0.7296, "step": 7889 }, { "epoch": 1.2879882453777398, "grad_norm": 3.1500794887542725, "learning_rate": 1.9040064251009885e-05, "loss": 0.8264, "step": 7890 }, { "epoch": 1.288151504020244, "grad_norm": 2.591465473175049, "learning_rate": 1.9039790132401722e-05, "loss": 0.6189, "step": 7891 }, { "epoch": 1.2883147626627485, "grad_norm": 2.820983648300171, "learning_rate": 1.9039515976634396e-05, "loss": 0.7412, "step": 7892 }, { "epoch": 1.288478021305253, "grad_norm": 2.5439724922180176, "learning_rate": 1.903924178370903e-05, "loss": 0.7485, "step": 7893 }, { "epoch": 1.2886412799477571, "grad_norm": 2.3335564136505127, "learning_rate": 1.9038967553626753e-05, "loss": 0.5974, "step": 7894 }, { "epoch": 1.2888045385902616, "grad_norm": 2.9245219230651855, "learning_rate": 1.90386932863887e-05, "loss": 0.7337, "step": 7895 }, { "epoch": 1.288967797232766, "grad_norm": 3.044827699661255, "learning_rate": 1.9038418981995982e-05, "loss": 0.686, "step": 7896 }, { "epoch": 1.2891310558752704, "grad_norm": 2.441513776779175, "learning_rate": 1.9038144640449743e-05, "loss": 0.6712, "step": 7897 }, { "epoch": 1.2892943145177749, "grad_norm": 3.413270950317383, "learning_rate": 1.9037870261751105e-05, "loss": 0.8523, "step": 7898 }, { "epoch": 1.289457573160279, "grad_norm": 2.666276454925537, "learning_rate": 1.903759584590119e-05, "loss": 0.7391, "step": 7899 }, { "epoch": 1.2896208318027835, "grad_norm": 2.7218449115753174, "learning_rate": 1.9037321392901134e-05, "loss": 0.7513, "step": 7900 }, { "epoch": 1.289784090445288, "grad_norm": 2.6672286987304688, "learning_rate": 1.9037046902752062e-05, "loss": 0.6363, "step": 7901 }, { "epoch": 1.2899473490877924, "grad_norm": 2.6820225715637207, "learning_rate": 1.9036772375455106e-05, "loss": 0.5657, "step": 7902 }, { "epoch": 1.2901106077302966, "grad_norm": 2.5906014442443848, "learning_rate": 1.903649781101139e-05, "loss": 0.6066, "step": 7903 }, { "epoch": 1.290273866372801, "grad_norm": 3.4118764400482178, "learning_rate": 1.9036223209422034e-05, "loss": 0.7837, "step": 7904 }, { "epoch": 1.2904371250153055, "grad_norm": 2.6182966232299805, "learning_rate": 1.9035948570688187e-05, "loss": 0.6001, "step": 7905 }, { "epoch": 1.29060038365781, "grad_norm": 4.076473236083984, "learning_rate": 1.9035673894810963e-05, "loss": 0.8991, "step": 7906 }, { "epoch": 1.2907636423003144, "grad_norm": 3.142991065979004, "learning_rate": 1.9035399181791497e-05, "loss": 0.7345, "step": 7907 }, { "epoch": 1.2909269009428186, "grad_norm": 2.6604979038238525, "learning_rate": 1.9035124431630916e-05, "loss": 0.6741, "step": 7908 }, { "epoch": 1.291090159585323, "grad_norm": 2.9787027835845947, "learning_rate": 1.903484964433035e-05, "loss": 0.6939, "step": 7909 }, { "epoch": 1.2912534182278275, "grad_norm": 2.453565835952759, "learning_rate": 1.9034574819890926e-05, "loss": 0.6345, "step": 7910 }, { "epoch": 1.291416676870332, "grad_norm": 2.3637404441833496, "learning_rate": 1.903429995831378e-05, "loss": 0.5571, "step": 7911 }, { "epoch": 1.2915799355128361, "grad_norm": 2.421027183532715, "learning_rate": 1.903402505960004e-05, "loss": 0.625, "step": 7912 }, { "epoch": 1.2917431941553406, "grad_norm": 2.5285048484802246, "learning_rate": 1.903375012375083e-05, "loss": 0.653, "step": 7913 }, { "epoch": 1.291906452797845, "grad_norm": 2.753702402114868, "learning_rate": 1.9033475150767285e-05, "loss": 0.7194, "step": 7914 }, { "epoch": 1.2920697114403494, "grad_norm": 3.1320693492889404, "learning_rate": 1.9033200140650535e-05, "loss": 1.4493, "step": 7915 }, { "epoch": 1.2922329700828539, "grad_norm": 2.9104626178741455, "learning_rate": 1.903292509340171e-05, "loss": 0.806, "step": 7916 }, { "epoch": 1.292396228725358, "grad_norm": 3.1813416481018066, "learning_rate": 1.9032650009021943e-05, "loss": 0.823, "step": 7917 }, { "epoch": 1.2925594873678625, "grad_norm": 2.5831151008605957, "learning_rate": 1.903237488751236e-05, "loss": 0.6013, "step": 7918 }, { "epoch": 1.292722746010367, "grad_norm": 2.8608710765838623, "learning_rate": 1.9032099728874095e-05, "loss": 0.6645, "step": 7919 }, { "epoch": 1.2928860046528712, "grad_norm": 2.6182639598846436, "learning_rate": 1.903182453310828e-05, "loss": 0.633, "step": 7920 }, { "epoch": 1.2930492632953756, "grad_norm": 2.6522786617279053, "learning_rate": 1.9031549300216042e-05, "loss": 0.5605, "step": 7921 }, { "epoch": 1.29321252193788, "grad_norm": 2.9280102252960205, "learning_rate": 1.9031274030198518e-05, "loss": 0.8206, "step": 7922 }, { "epoch": 1.2933757805803845, "grad_norm": 2.6778621673583984, "learning_rate": 1.9030998723056833e-05, "loss": 0.7524, "step": 7923 }, { "epoch": 1.293539039222889, "grad_norm": 3.13012957572937, "learning_rate": 1.9030723378792128e-05, "loss": 0.7204, "step": 7924 }, { "epoch": 1.2937022978653934, "grad_norm": 3.1328694820404053, "learning_rate": 1.9030447997405522e-05, "loss": 0.7282, "step": 7925 }, { "epoch": 1.2938655565078976, "grad_norm": 2.8548784255981445, "learning_rate": 1.9030172578898155e-05, "loss": 0.645, "step": 7926 }, { "epoch": 1.294028815150402, "grad_norm": 3.130197286605835, "learning_rate": 1.9029897123271165e-05, "loss": 0.7377, "step": 7927 }, { "epoch": 1.2941920737929065, "grad_norm": 2.624157190322876, "learning_rate": 1.9029621630525674e-05, "loss": 0.5527, "step": 7928 }, { "epoch": 1.2943553324354107, "grad_norm": 2.582610845565796, "learning_rate": 1.9029346100662812e-05, "loss": 0.7098, "step": 7929 }, { "epoch": 1.2945185910779151, "grad_norm": 3.23158597946167, "learning_rate": 1.9029070533683725e-05, "loss": 0.9759, "step": 7930 }, { "epoch": 1.2946818497204196, "grad_norm": 2.747173309326172, "learning_rate": 1.902879492958954e-05, "loss": 0.7503, "step": 7931 }, { "epoch": 1.294845108362924, "grad_norm": 3.0206503868103027, "learning_rate": 1.902851928838138e-05, "loss": 0.6945, "step": 7932 }, { "epoch": 1.2950083670054284, "grad_norm": 2.7653932571411133, "learning_rate": 1.9028243610060392e-05, "loss": 0.7357, "step": 7933 }, { "epoch": 1.2951716256479329, "grad_norm": 3.1086838245391846, "learning_rate": 1.9027967894627703e-05, "loss": 0.7668, "step": 7934 }, { "epoch": 1.295334884290437, "grad_norm": 2.8319263458251953, "learning_rate": 1.9027692142084446e-05, "loss": 0.7413, "step": 7935 }, { "epoch": 1.2954981429329415, "grad_norm": 3.295945644378662, "learning_rate": 1.9027416352431755e-05, "loss": 0.9477, "step": 7936 }, { "epoch": 1.295661401575446, "grad_norm": 2.6393330097198486, "learning_rate": 1.9027140525670767e-05, "loss": 0.696, "step": 7937 }, { "epoch": 1.2958246602179502, "grad_norm": 2.4997291564941406, "learning_rate": 1.9026864661802607e-05, "loss": 0.5495, "step": 7938 }, { "epoch": 1.2959879188604546, "grad_norm": 2.717301607131958, "learning_rate": 1.902658876082842e-05, "loss": 0.6172, "step": 7939 }, { "epoch": 1.296151177502959, "grad_norm": 2.873406410217285, "learning_rate": 1.9026312822749333e-05, "loss": 0.594, "step": 7940 }, { "epoch": 1.2963144361454635, "grad_norm": 2.788275957107544, "learning_rate": 1.9026036847566485e-05, "loss": 0.6793, "step": 7941 }, { "epoch": 1.296477694787968, "grad_norm": 2.6042709350585938, "learning_rate": 1.9025760835281003e-05, "loss": 0.6477, "step": 7942 }, { "epoch": 1.2966409534304721, "grad_norm": 3.349609851837158, "learning_rate": 1.9025484785894027e-05, "loss": 0.9007, "step": 7943 }, { "epoch": 1.2968042120729766, "grad_norm": 2.8316781520843506, "learning_rate": 1.9025208699406693e-05, "loss": 0.8635, "step": 7944 }, { "epoch": 1.296967470715481, "grad_norm": 3.010915994644165, "learning_rate": 1.9024932575820135e-05, "loss": 0.6809, "step": 7945 }, { "epoch": 1.2971307293579855, "grad_norm": 3.4151721000671387, "learning_rate": 1.9024656415135486e-05, "loss": 0.8141, "step": 7946 }, { "epoch": 1.2972939880004897, "grad_norm": 2.3833818435668945, "learning_rate": 1.902438021735388e-05, "loss": 0.5328, "step": 7947 }, { "epoch": 1.297457246642994, "grad_norm": 2.8162338733673096, "learning_rate": 1.9024103982476458e-05, "loss": 0.6176, "step": 7948 }, { "epoch": 1.2976205052854985, "grad_norm": 2.766007661819458, "learning_rate": 1.902382771050435e-05, "loss": 0.7216, "step": 7949 }, { "epoch": 1.297783763928003, "grad_norm": 2.785282611846924, "learning_rate": 1.9023551401438693e-05, "loss": 0.744, "step": 7950 }, { "epoch": 1.2979470225705074, "grad_norm": 2.7588133811950684, "learning_rate": 1.9023275055280622e-05, "loss": 0.6455, "step": 7951 }, { "epoch": 1.2981102812130116, "grad_norm": 2.6083285808563232, "learning_rate": 1.902299867203128e-05, "loss": 0.7058, "step": 7952 }, { "epoch": 1.298273539855516, "grad_norm": 3.024470090866089, "learning_rate": 1.9022722251691795e-05, "loss": 0.7772, "step": 7953 }, { "epoch": 1.2984367984980205, "grad_norm": 2.7865207195281982, "learning_rate": 1.9022445794263302e-05, "loss": 0.6835, "step": 7954 }, { "epoch": 1.298600057140525, "grad_norm": 2.185546875, "learning_rate": 1.9022169299746943e-05, "loss": 0.5484, "step": 7955 }, { "epoch": 1.2987633157830292, "grad_norm": 3.2854807376861572, "learning_rate": 1.9021892768143856e-05, "loss": 0.8897, "step": 7956 }, { "epoch": 1.2989265744255336, "grad_norm": 3.3794491291046143, "learning_rate": 1.9021616199455173e-05, "loss": 0.8025, "step": 7957 }, { "epoch": 1.299089833068038, "grad_norm": 2.544565200805664, "learning_rate": 1.902133959368203e-05, "loss": 0.7122, "step": 7958 }, { "epoch": 1.2992530917105425, "grad_norm": 2.755850076675415, "learning_rate": 1.9021062950825565e-05, "loss": 0.6342, "step": 7959 }, { "epoch": 1.299416350353047, "grad_norm": 2.5901851654052734, "learning_rate": 1.902078627088692e-05, "loss": 0.6731, "step": 7960 }, { "epoch": 1.2995796089955511, "grad_norm": 2.602220058441162, "learning_rate": 1.902050955386723e-05, "loss": 0.6174, "step": 7961 }, { "epoch": 1.2997428676380556, "grad_norm": 2.875816822052002, "learning_rate": 1.902023279976763e-05, "loss": 0.7431, "step": 7962 }, { "epoch": 1.29990612628056, "grad_norm": 3.0655014514923096, "learning_rate": 1.9019956008589257e-05, "loss": 0.8422, "step": 7963 }, { "epoch": 1.3000693849230642, "grad_norm": 3.512575387954712, "learning_rate": 1.9019679180333253e-05, "loss": 0.8539, "step": 7964 }, { "epoch": 1.3002326435655687, "grad_norm": 3.2399988174438477, "learning_rate": 1.9019402315000753e-05, "loss": 0.7825, "step": 7965 }, { "epoch": 1.300395902208073, "grad_norm": 2.853407621383667, "learning_rate": 1.9019125412592893e-05, "loss": 0.7788, "step": 7966 }, { "epoch": 1.3005591608505775, "grad_norm": 2.936326742172241, "learning_rate": 1.9018848473110818e-05, "loss": 0.6438, "step": 7967 }, { "epoch": 1.300722419493082, "grad_norm": 2.4372963905334473, "learning_rate": 1.901857149655566e-05, "loss": 0.6955, "step": 7968 }, { "epoch": 1.3008856781355864, "grad_norm": 2.820272445678711, "learning_rate": 1.901829448292856e-05, "loss": 0.761, "step": 7969 }, { "epoch": 1.3010489367780906, "grad_norm": 2.6757378578186035, "learning_rate": 1.9018017432230657e-05, "loss": 0.6754, "step": 7970 }, { "epoch": 1.301212195420595, "grad_norm": 3.323580026626587, "learning_rate": 1.901774034446309e-05, "loss": 0.8598, "step": 7971 }, { "epoch": 1.3013754540630995, "grad_norm": 2.4753401279449463, "learning_rate": 1.9017463219626998e-05, "loss": 0.6076, "step": 7972 }, { "epoch": 1.3015387127056037, "grad_norm": 2.58343768119812, "learning_rate": 1.901718605772352e-05, "loss": 0.6578, "step": 7973 }, { "epoch": 1.3017019713481082, "grad_norm": 2.619093656539917, "learning_rate": 1.9016908858753792e-05, "loss": 0.6294, "step": 7974 }, { "epoch": 1.3018652299906126, "grad_norm": 2.816650867462158, "learning_rate": 1.9016631622718963e-05, "loss": 0.6733, "step": 7975 }, { "epoch": 1.302028488633117, "grad_norm": 2.8973331451416016, "learning_rate": 1.9016354349620158e-05, "loss": 0.6671, "step": 7976 }, { "epoch": 1.3021917472756215, "grad_norm": 2.743603467941284, "learning_rate": 1.9016077039458528e-05, "loss": 0.7012, "step": 7977 }, { "epoch": 1.302355005918126, "grad_norm": 2.8576226234436035, "learning_rate": 1.9015799692235212e-05, "loss": 0.6544, "step": 7978 }, { "epoch": 1.3025182645606301, "grad_norm": 2.6299188137054443, "learning_rate": 1.9015522307951345e-05, "loss": 0.6227, "step": 7979 }, { "epoch": 1.3026815232031346, "grad_norm": 3.0807230472564697, "learning_rate": 1.9015244886608072e-05, "loss": 0.7925, "step": 7980 }, { "epoch": 1.302844781845639, "grad_norm": 2.7063517570495605, "learning_rate": 1.901496742820653e-05, "loss": 0.6968, "step": 7981 }, { "epoch": 1.3030080404881432, "grad_norm": 2.7362730503082275, "learning_rate": 1.901468993274786e-05, "loss": 0.6282, "step": 7982 }, { "epoch": 1.3031712991306477, "grad_norm": 2.8259854316711426, "learning_rate": 1.9014412400233203e-05, "loss": 0.7375, "step": 7983 }, { "epoch": 1.303334557773152, "grad_norm": 3.026306629180908, "learning_rate": 1.9014134830663705e-05, "loss": 0.8554, "step": 7984 }, { "epoch": 1.3034978164156565, "grad_norm": 3.212449550628662, "learning_rate": 1.9013857224040496e-05, "loss": 0.7265, "step": 7985 }, { "epoch": 1.303661075058161, "grad_norm": 2.4378840923309326, "learning_rate": 1.901357958036473e-05, "loss": 0.6597, "step": 7986 }, { "epoch": 1.3038243337006654, "grad_norm": 2.7582783699035645, "learning_rate": 1.9013301899637535e-05, "loss": 0.7309, "step": 7987 }, { "epoch": 1.3039875923431696, "grad_norm": 3.086233139038086, "learning_rate": 1.9013024181860066e-05, "loss": 0.802, "step": 7988 }, { "epoch": 1.304150850985674, "grad_norm": 2.791490316390991, "learning_rate": 1.9012746427033454e-05, "loss": 0.6611, "step": 7989 }, { "epoch": 1.3043141096281785, "grad_norm": 2.8927059173583984, "learning_rate": 1.9012468635158844e-05, "loss": 0.7305, "step": 7990 }, { "epoch": 1.3044773682706827, "grad_norm": 2.888796806335449, "learning_rate": 1.9012190806237377e-05, "loss": 0.7028, "step": 7991 }, { "epoch": 1.3046406269131872, "grad_norm": 3.0169689655303955, "learning_rate": 1.90119129402702e-05, "loss": 0.7642, "step": 7992 }, { "epoch": 1.3048038855556916, "grad_norm": 2.6369147300720215, "learning_rate": 1.9011635037258454e-05, "loss": 0.6707, "step": 7993 }, { "epoch": 1.304967144198196, "grad_norm": 2.4592173099517822, "learning_rate": 1.9011357097203276e-05, "loss": 0.6711, "step": 7994 }, { "epoch": 1.3051304028407005, "grad_norm": 2.6874160766601562, "learning_rate": 1.901107912010581e-05, "loss": 0.6279, "step": 7995 }, { "epoch": 1.3052936614832047, "grad_norm": 3.243159294128418, "learning_rate": 1.9010801105967203e-05, "loss": 0.7258, "step": 7996 }, { "epoch": 1.3054569201257091, "grad_norm": 2.784231662750244, "learning_rate": 1.9010523054788595e-05, "loss": 0.7591, "step": 7997 }, { "epoch": 1.3056201787682136, "grad_norm": 2.429199457168579, "learning_rate": 1.9010244966571126e-05, "loss": 0.7399, "step": 7998 }, { "epoch": 1.305783437410718, "grad_norm": 2.630986452102661, "learning_rate": 1.9009966841315945e-05, "loss": 0.703, "step": 7999 }, { "epoch": 1.3059466960532222, "grad_norm": 3.1096627712249756, "learning_rate": 1.900968867902419e-05, "loss": 0.7096, "step": 8000 }, { "epoch": 1.3061099546957267, "grad_norm": 1.9560695886611938, "learning_rate": 1.9009410479697013e-05, "loss": 0.5353, "step": 8001 }, { "epoch": 1.306273213338231, "grad_norm": 2.797008991241455, "learning_rate": 1.9009132243335545e-05, "loss": 0.7379, "step": 8002 }, { "epoch": 1.3064364719807355, "grad_norm": 2.3196606636047363, "learning_rate": 1.900885396994094e-05, "loss": 0.5807, "step": 8003 }, { "epoch": 1.30659973062324, "grad_norm": 2.8300065994262695, "learning_rate": 1.9008575659514335e-05, "loss": 0.6825, "step": 8004 }, { "epoch": 1.3067629892657442, "grad_norm": 2.505856990814209, "learning_rate": 1.900829731205688e-05, "loss": 0.7685, "step": 8005 }, { "epoch": 1.3069262479082486, "grad_norm": 2.713503360748291, "learning_rate": 1.900801892756971e-05, "loss": 0.7185, "step": 8006 }, { "epoch": 1.307089506550753, "grad_norm": 2.7588465213775635, "learning_rate": 1.9007740506053983e-05, "loss": 0.6924, "step": 8007 }, { "epoch": 1.3072527651932573, "grad_norm": 2.8986129760742188, "learning_rate": 1.900746204751083e-05, "loss": 0.6204, "step": 8008 }, { "epoch": 1.3074160238357617, "grad_norm": 3.2863965034484863, "learning_rate": 1.9007183551941407e-05, "loss": 0.7766, "step": 8009 }, { "epoch": 1.3075792824782662, "grad_norm": 2.472825527191162, "learning_rate": 1.9006905019346852e-05, "loss": 0.5987, "step": 8010 }, { "epoch": 1.3077425411207706, "grad_norm": 2.7289299964904785, "learning_rate": 1.9006626449728308e-05, "loss": 0.6488, "step": 8011 }, { "epoch": 1.307905799763275, "grad_norm": 2.8153982162475586, "learning_rate": 1.900634784308693e-05, "loss": 0.6944, "step": 8012 }, { "epoch": 1.3080690584057795, "grad_norm": 3.0567874908447266, "learning_rate": 1.900606919942385e-05, "loss": 0.6828, "step": 8013 }, { "epoch": 1.3082323170482837, "grad_norm": 2.6785547733306885, "learning_rate": 1.9005790518740222e-05, "loss": 0.7249, "step": 8014 }, { "epoch": 1.3083955756907881, "grad_norm": 3.2554898262023926, "learning_rate": 1.9005511801037194e-05, "loss": 0.7843, "step": 8015 }, { "epoch": 1.3085588343332926, "grad_norm": 2.5867762565612793, "learning_rate": 1.9005233046315903e-05, "loss": 0.6749, "step": 8016 }, { "epoch": 1.3087220929757968, "grad_norm": 2.855616331100464, "learning_rate": 1.90049542545775e-05, "loss": 0.5951, "step": 8017 }, { "epoch": 1.3088853516183012, "grad_norm": 2.986034393310547, "learning_rate": 1.900467542582313e-05, "loss": 0.6753, "step": 8018 }, { "epoch": 1.3090486102608057, "grad_norm": 2.8028934001922607, "learning_rate": 1.9004396560053942e-05, "loss": 0.7251, "step": 8019 }, { "epoch": 1.30921186890331, "grad_norm": 2.5716235637664795, "learning_rate": 1.900411765727108e-05, "loss": 0.668, "step": 8020 }, { "epoch": 1.3093751275458145, "grad_norm": 2.661648988723755, "learning_rate": 1.9003838717475683e-05, "loss": 0.6212, "step": 8021 }, { "epoch": 1.309538386188319, "grad_norm": 3.023463249206543, "learning_rate": 1.9003559740668913e-05, "loss": 0.6562, "step": 8022 }, { "epoch": 1.3097016448308232, "grad_norm": 2.301593780517578, "learning_rate": 1.9003280726851905e-05, "loss": 0.5552, "step": 8023 }, { "epoch": 1.3098649034733276, "grad_norm": 3.090317487716675, "learning_rate": 1.9003001676025808e-05, "loss": 0.8694, "step": 8024 }, { "epoch": 1.310028162115832, "grad_norm": 2.9454808235168457, "learning_rate": 1.9002722588191775e-05, "loss": 0.6534, "step": 8025 }, { "epoch": 1.3101914207583363, "grad_norm": 2.6852657794952393, "learning_rate": 1.9002443463350943e-05, "loss": 0.8294, "step": 8026 }, { "epoch": 1.3103546794008407, "grad_norm": 2.640117645263672, "learning_rate": 1.900216430150447e-05, "loss": 0.6465, "step": 8027 }, { "epoch": 1.3105179380433452, "grad_norm": 2.7520358562469482, "learning_rate": 1.9001885102653496e-05, "loss": 0.6231, "step": 8028 }, { "epoch": 1.3106811966858496, "grad_norm": 3.100762367248535, "learning_rate": 1.9001605866799174e-05, "loss": 0.7276, "step": 8029 }, { "epoch": 1.310844455328354, "grad_norm": 3.241696357727051, "learning_rate": 1.900132659394265e-05, "loss": 0.8204, "step": 8030 }, { "epoch": 1.3110077139708585, "grad_norm": 3.075943946838379, "learning_rate": 1.9001047284085067e-05, "loss": 0.7014, "step": 8031 }, { "epoch": 1.3111709726133627, "grad_norm": 3.092805862426758, "learning_rate": 1.900076793722758e-05, "loss": 0.8216, "step": 8032 }, { "epoch": 1.3113342312558671, "grad_norm": 3.2266719341278076, "learning_rate": 1.9000488553371333e-05, "loss": 0.8213, "step": 8033 }, { "epoch": 1.3114974898983716, "grad_norm": 2.5277352333068848, "learning_rate": 1.9000209132517477e-05, "loss": 0.5379, "step": 8034 }, { "epoch": 1.3116607485408758, "grad_norm": 2.620842695236206, "learning_rate": 1.899992967466716e-05, "loss": 0.6329, "step": 8035 }, { "epoch": 1.3118240071833802, "grad_norm": 2.892455816268921, "learning_rate": 1.899965017982153e-05, "loss": 0.7691, "step": 8036 }, { "epoch": 1.3119872658258847, "grad_norm": 3.412794828414917, "learning_rate": 1.8999370647981737e-05, "loss": 0.8456, "step": 8037 }, { "epoch": 1.312150524468389, "grad_norm": 2.790858745574951, "learning_rate": 1.899909107914893e-05, "loss": 0.8202, "step": 8038 }, { "epoch": 1.3123137831108935, "grad_norm": 2.749234437942505, "learning_rate": 1.8998811473324256e-05, "loss": 0.7572, "step": 8039 }, { "epoch": 1.3124770417533977, "grad_norm": 2.5206964015960693, "learning_rate": 1.899853183050887e-05, "loss": 0.6022, "step": 8040 }, { "epoch": 1.3126403003959022, "grad_norm": 2.6953978538513184, "learning_rate": 1.899825215070391e-05, "loss": 0.746, "step": 8041 }, { "epoch": 1.3128035590384066, "grad_norm": 2.5500338077545166, "learning_rate": 1.8997972433910537e-05, "loss": 0.6217, "step": 8042 }, { "epoch": 1.312966817680911, "grad_norm": 2.7125661373138428, "learning_rate": 1.8997692680129897e-05, "loss": 0.7738, "step": 8043 }, { "epoch": 1.3131300763234153, "grad_norm": 2.585742235183716, "learning_rate": 1.8997412889363143e-05, "loss": 0.7065, "step": 8044 }, { "epoch": 1.3132933349659197, "grad_norm": 2.6241774559020996, "learning_rate": 1.8997133061611416e-05, "loss": 0.7551, "step": 8045 }, { "epoch": 1.3134565936084241, "grad_norm": 2.7556703090667725, "learning_rate": 1.8996853196875874e-05, "loss": 0.7206, "step": 8046 }, { "epoch": 1.3136198522509286, "grad_norm": 2.6639912128448486, "learning_rate": 1.8996573295157666e-05, "loss": 0.611, "step": 8047 }, { "epoch": 1.313783110893433, "grad_norm": 3.0851852893829346, "learning_rate": 1.8996293356457943e-05, "loss": 0.8691, "step": 8048 }, { "epoch": 1.3139463695359372, "grad_norm": 2.7599165439605713, "learning_rate": 1.899601338077786e-05, "loss": 0.7376, "step": 8049 }, { "epoch": 1.3141096281784417, "grad_norm": 2.406552314758301, "learning_rate": 1.8995733368118556e-05, "loss": 0.6527, "step": 8050 }, { "epoch": 1.3142728868209461, "grad_norm": 3.2422051429748535, "learning_rate": 1.899545331848119e-05, "loss": 0.7555, "step": 8051 }, { "epoch": 1.3144361454634503, "grad_norm": 2.6262335777282715, "learning_rate": 1.899517323186691e-05, "loss": 0.6396, "step": 8052 }, { "epoch": 1.3145994041059548, "grad_norm": 2.7468791007995605, "learning_rate": 1.8994893108276873e-05, "loss": 0.7535, "step": 8053 }, { "epoch": 1.3147626627484592, "grad_norm": 3.0823349952697754, "learning_rate": 1.8994612947712228e-05, "loss": 0.7742, "step": 8054 }, { "epoch": 1.3149259213909636, "grad_norm": 4.224412441253662, "learning_rate": 1.8994332750174123e-05, "loss": 0.7939, "step": 8055 }, { "epoch": 1.315089180033468, "grad_norm": 2.6930932998657227, "learning_rate": 1.899405251566371e-05, "loss": 0.6762, "step": 8056 }, { "epoch": 1.3152524386759725, "grad_norm": 2.7805676460266113, "learning_rate": 1.8993772244182148e-05, "loss": 0.7858, "step": 8057 }, { "epoch": 1.3154156973184767, "grad_norm": 2.8286027908325195, "learning_rate": 1.899349193573058e-05, "loss": 0.7021, "step": 8058 }, { "epoch": 1.3155789559609812, "grad_norm": 2.5546646118164062, "learning_rate": 1.8993211590310166e-05, "loss": 0.668, "step": 8059 }, { "epoch": 1.3157422146034856, "grad_norm": 2.5941221714019775, "learning_rate": 1.8992931207922056e-05, "loss": 0.6735, "step": 8060 }, { "epoch": 1.3159054732459898, "grad_norm": 2.719545602798462, "learning_rate": 1.8992650788567398e-05, "loss": 0.5891, "step": 8061 }, { "epoch": 1.3160687318884943, "grad_norm": 3.0313663482666016, "learning_rate": 1.8992370332247348e-05, "loss": 0.7366, "step": 8062 }, { "epoch": 1.3162319905309987, "grad_norm": 2.5183145999908447, "learning_rate": 1.899208983896306e-05, "loss": 0.6497, "step": 8063 }, { "epoch": 1.3163952491735031, "grad_norm": 2.4645886421203613, "learning_rate": 1.8991809308715687e-05, "loss": 0.592, "step": 8064 }, { "epoch": 1.3165585078160076, "grad_norm": 2.9044573307037354, "learning_rate": 1.899152874150638e-05, "loss": 0.7442, "step": 8065 }, { "epoch": 1.316721766458512, "grad_norm": 2.6423113346099854, "learning_rate": 1.8991248137336296e-05, "loss": 0.7042, "step": 8066 }, { "epoch": 1.3168850251010162, "grad_norm": 2.8258745670318604, "learning_rate": 1.8990967496206583e-05, "loss": 0.6525, "step": 8067 }, { "epoch": 1.3170482837435207, "grad_norm": 2.8294677734375, "learning_rate": 1.89906868181184e-05, "loss": 0.7508, "step": 8068 }, { "epoch": 1.3172115423860251, "grad_norm": 2.655168056488037, "learning_rate": 1.8990406103072895e-05, "loss": 0.6262, "step": 8069 }, { "epoch": 1.3173748010285293, "grad_norm": 2.60839581489563, "learning_rate": 1.8990125351071228e-05, "loss": 0.6963, "step": 8070 }, { "epoch": 1.3175380596710338, "grad_norm": 2.8547959327697754, "learning_rate": 1.8989844562114546e-05, "loss": 0.7628, "step": 8071 }, { "epoch": 1.3177013183135382, "grad_norm": 2.4257354736328125, "learning_rate": 1.898956373620401e-05, "loss": 0.5355, "step": 8072 }, { "epoch": 1.3178645769560426, "grad_norm": 2.7440435886383057, "learning_rate": 1.8989282873340773e-05, "loss": 0.6627, "step": 8073 }, { "epoch": 1.318027835598547, "grad_norm": 2.5561065673828125, "learning_rate": 1.8989001973525988e-05, "loss": 0.6591, "step": 8074 }, { "epoch": 1.3181910942410515, "grad_norm": 2.6091225147247314, "learning_rate": 1.898872103676081e-05, "loss": 0.5657, "step": 8075 }, { "epoch": 1.3183543528835557, "grad_norm": 2.787532329559326, "learning_rate": 1.8988440063046396e-05, "loss": 0.6434, "step": 8076 }, { "epoch": 1.3185176115260602, "grad_norm": 3.428478956222534, "learning_rate": 1.8988159052383896e-05, "loss": 0.8624, "step": 8077 }, { "epoch": 1.3186808701685646, "grad_norm": 3.2094945907592773, "learning_rate": 1.898787800477447e-05, "loss": 0.8328, "step": 8078 }, { "epoch": 1.3188441288110688, "grad_norm": 2.7856383323669434, "learning_rate": 1.898759692021927e-05, "loss": 0.6632, "step": 8079 }, { "epoch": 1.3190073874535733, "grad_norm": 2.8925387859344482, "learning_rate": 1.8987315798719453e-05, "loss": 0.652, "step": 8080 }, { "epoch": 1.3191706460960777, "grad_norm": 2.713245391845703, "learning_rate": 1.8987034640276175e-05, "loss": 0.674, "step": 8081 }, { "epoch": 1.3193339047385821, "grad_norm": 2.376438617706299, "learning_rate": 1.898675344489059e-05, "loss": 0.5939, "step": 8082 }, { "epoch": 1.3194971633810866, "grad_norm": 2.8848633766174316, "learning_rate": 1.8986472212563852e-05, "loss": 0.701, "step": 8083 }, { "epoch": 1.3196604220235908, "grad_norm": 3.283165693283081, "learning_rate": 1.8986190943297124e-05, "loss": 0.8134, "step": 8084 }, { "epoch": 1.3198236806660952, "grad_norm": 2.5451385974884033, "learning_rate": 1.8985909637091557e-05, "loss": 0.635, "step": 8085 }, { "epoch": 1.3199869393085997, "grad_norm": 2.8022589683532715, "learning_rate": 1.898562829394831e-05, "loss": 0.6105, "step": 8086 }, { "epoch": 1.320150197951104, "grad_norm": 2.447192907333374, "learning_rate": 1.8985346913868538e-05, "loss": 0.6908, "step": 8087 }, { "epoch": 1.3203134565936083, "grad_norm": 2.6424784660339355, "learning_rate": 1.8985065496853396e-05, "loss": 0.7135, "step": 8088 }, { "epoch": 1.3204767152361128, "grad_norm": 2.772066116333008, "learning_rate": 1.8984784042904043e-05, "loss": 0.6688, "step": 8089 }, { "epoch": 1.3206399738786172, "grad_norm": 2.7499804496765137, "learning_rate": 1.8984502552021634e-05, "loss": 0.7169, "step": 8090 }, { "epoch": 1.3208032325211216, "grad_norm": 2.6960997581481934, "learning_rate": 1.898422102420733e-05, "loss": 0.6481, "step": 8091 }, { "epoch": 1.320966491163626, "grad_norm": 3.226083517074585, "learning_rate": 1.8983939459462285e-05, "loss": 0.809, "step": 8092 }, { "epoch": 1.3211297498061303, "grad_norm": 3.16208815574646, "learning_rate": 1.8983657857787655e-05, "loss": 0.7638, "step": 8093 }, { "epoch": 1.3212930084486347, "grad_norm": 2.9920859336853027, "learning_rate": 1.89833762191846e-05, "loss": 0.8737, "step": 8094 }, { "epoch": 1.3214562670911392, "grad_norm": 2.9448964595794678, "learning_rate": 1.8983094543654282e-05, "loss": 0.7937, "step": 8095 }, { "epoch": 1.3216195257336436, "grad_norm": 3.1719257831573486, "learning_rate": 1.898281283119785e-05, "loss": 0.7892, "step": 8096 }, { "epoch": 1.3217827843761478, "grad_norm": 3.049502372741699, "learning_rate": 1.898253108181647e-05, "loss": 0.7981, "step": 8097 }, { "epoch": 1.3219460430186523, "grad_norm": 2.9095568656921387, "learning_rate": 1.898224929551129e-05, "loss": 0.6868, "step": 8098 }, { "epoch": 1.3221093016611567, "grad_norm": 2.910248041152954, "learning_rate": 1.898196747228348e-05, "loss": 0.7055, "step": 8099 }, { "epoch": 1.3222725603036611, "grad_norm": 2.9557433128356934, "learning_rate": 1.898168561213419e-05, "loss": 0.7716, "step": 8100 }, { "epoch": 1.3224358189461656, "grad_norm": 2.8374154567718506, "learning_rate": 1.8981403715064586e-05, "loss": 0.7459, "step": 8101 }, { "epoch": 1.3225990775886698, "grad_norm": 3.2336697578430176, "learning_rate": 1.898112178107582e-05, "loss": 0.7135, "step": 8102 }, { "epoch": 1.3227623362311742, "grad_norm": 3.08484148979187, "learning_rate": 1.8980839810169053e-05, "loss": 0.7454, "step": 8103 }, { "epoch": 1.3229255948736787, "grad_norm": 2.724121570587158, "learning_rate": 1.898055780234545e-05, "loss": 0.7249, "step": 8104 }, { "epoch": 1.3230888535161829, "grad_norm": 2.876770496368408, "learning_rate": 1.8980275757606157e-05, "loss": 0.6662, "step": 8105 }, { "epoch": 1.3232521121586873, "grad_norm": 2.5253403186798096, "learning_rate": 1.8979993675952345e-05, "loss": 0.6788, "step": 8106 }, { "epoch": 1.3234153708011918, "grad_norm": 2.437805414199829, "learning_rate": 1.897971155738517e-05, "loss": 0.5491, "step": 8107 }, { "epoch": 1.3235786294436962, "grad_norm": 2.930899143218994, "learning_rate": 1.8979429401905793e-05, "loss": 0.71, "step": 8108 }, { "epoch": 1.3237418880862006, "grad_norm": 2.670578956604004, "learning_rate": 1.897914720951537e-05, "loss": 0.7236, "step": 8109 }, { "epoch": 1.323905146728705, "grad_norm": 2.509127378463745, "learning_rate": 1.8978864980215062e-05, "loss": 0.5846, "step": 8110 }, { "epoch": 1.3240684053712093, "grad_norm": 3.2014968395233154, "learning_rate": 1.8978582714006032e-05, "loss": 1.2258, "step": 8111 }, { "epoch": 1.3242316640137137, "grad_norm": 2.918616533279419, "learning_rate": 1.897830041088944e-05, "loss": 0.8054, "step": 8112 }, { "epoch": 1.3243949226562182, "grad_norm": 2.8289058208465576, "learning_rate": 1.8978018070866442e-05, "loss": 0.7294, "step": 8113 }, { "epoch": 1.3245581812987224, "grad_norm": 2.9359538555145264, "learning_rate": 1.8977735693938205e-05, "loss": 0.7067, "step": 8114 }, { "epoch": 1.3247214399412268, "grad_norm": 3.1379072666168213, "learning_rate": 1.8977453280105885e-05, "loss": 0.7265, "step": 8115 }, { "epoch": 1.3248846985837313, "grad_norm": 2.6469807624816895, "learning_rate": 1.8977170829370647e-05, "loss": 0.6622, "step": 8116 }, { "epoch": 1.3250479572262357, "grad_norm": 2.794642925262451, "learning_rate": 1.8976888341733645e-05, "loss": 0.5863, "step": 8117 }, { "epoch": 1.3252112158687401, "grad_norm": 2.673889636993408, "learning_rate": 1.8976605817196047e-05, "loss": 0.7182, "step": 8118 }, { "epoch": 1.3253744745112446, "grad_norm": 2.709360122680664, "learning_rate": 1.8976323255759013e-05, "loss": 0.7359, "step": 8119 }, { "epoch": 1.3255377331537488, "grad_norm": 2.559645414352417, "learning_rate": 1.8976040657423702e-05, "loss": 0.6961, "step": 8120 }, { "epoch": 1.3257009917962532, "grad_norm": 2.6725544929504395, "learning_rate": 1.8975758022191274e-05, "loss": 0.7379, "step": 8121 }, { "epoch": 1.3258642504387577, "grad_norm": 2.5543010234832764, "learning_rate": 1.89754753500629e-05, "loss": 0.755, "step": 8122 }, { "epoch": 1.3260275090812619, "grad_norm": 2.762486219406128, "learning_rate": 1.897519264103973e-05, "loss": 0.7019, "step": 8123 }, { "epoch": 1.3261907677237663, "grad_norm": 3.1598715782165527, "learning_rate": 1.897490989512294e-05, "loss": 0.8352, "step": 8124 }, { "epoch": 1.3263540263662708, "grad_norm": 2.7824230194091797, "learning_rate": 1.897462711231368e-05, "loss": 0.8215, "step": 8125 }, { "epoch": 1.3265172850087752, "grad_norm": 2.8561792373657227, "learning_rate": 1.8974344292613115e-05, "loss": 0.6546, "step": 8126 }, { "epoch": 1.3266805436512796, "grad_norm": 3.3631043434143066, "learning_rate": 1.8974061436022412e-05, "loss": 0.8227, "step": 8127 }, { "epoch": 1.3268438022937838, "grad_norm": 2.9669604301452637, "learning_rate": 1.8973778542542727e-05, "loss": 0.8046, "step": 8128 }, { "epoch": 1.3270070609362883, "grad_norm": 2.834625482559204, "learning_rate": 1.897349561217523e-05, "loss": 0.7932, "step": 8129 }, { "epoch": 1.3271703195787927, "grad_norm": 2.262582540512085, "learning_rate": 1.897321264492108e-05, "loss": 0.5638, "step": 8130 }, { "epoch": 1.3273335782212972, "grad_norm": 2.729724884033203, "learning_rate": 1.897292964078144e-05, "loss": 0.777, "step": 8131 }, { "epoch": 1.3274968368638014, "grad_norm": 2.4713504314422607, "learning_rate": 1.8972646599757478e-05, "loss": 0.7461, "step": 8132 }, { "epoch": 1.3276600955063058, "grad_norm": 2.4183402061462402, "learning_rate": 1.897236352185035e-05, "loss": 0.6367, "step": 8133 }, { "epoch": 1.3278233541488103, "grad_norm": 2.3942649364471436, "learning_rate": 1.8972080407061225e-05, "loss": 0.5751, "step": 8134 }, { "epoch": 1.3279866127913147, "grad_norm": 2.734299421310425, "learning_rate": 1.8971797255391267e-05, "loss": 0.7548, "step": 8135 }, { "epoch": 1.3281498714338191, "grad_norm": 2.769465684890747, "learning_rate": 1.8971514066841634e-05, "loss": 0.7501, "step": 8136 }, { "epoch": 1.3283131300763233, "grad_norm": 2.9236669540405273, "learning_rate": 1.8971230841413498e-05, "loss": 0.6525, "step": 8137 }, { "epoch": 1.3284763887188278, "grad_norm": 2.785173177719116, "learning_rate": 1.897094757910802e-05, "loss": 0.6407, "step": 8138 }, { "epoch": 1.3286396473613322, "grad_norm": 2.8830807209014893, "learning_rate": 1.897066427992636e-05, "loss": 0.8285, "step": 8139 }, { "epoch": 1.3288029060038367, "grad_norm": 2.7359445095062256, "learning_rate": 1.897038094386969e-05, "loss": 0.7405, "step": 8140 }, { "epoch": 1.3289661646463409, "grad_norm": 3.1556761264801025, "learning_rate": 1.8970097570939168e-05, "loss": 0.7837, "step": 8141 }, { "epoch": 1.3291294232888453, "grad_norm": 2.691507339477539, "learning_rate": 1.8969814161135964e-05, "loss": 0.7408, "step": 8142 }, { "epoch": 1.3292926819313498, "grad_norm": 2.888721466064453, "learning_rate": 1.896953071446124e-05, "loss": 0.7843, "step": 8143 }, { "epoch": 1.3294559405738542, "grad_norm": 2.304502010345459, "learning_rate": 1.8969247230916164e-05, "loss": 0.7003, "step": 8144 }, { "epoch": 1.3296191992163586, "grad_norm": 2.578629493713379, "learning_rate": 1.8968963710501895e-05, "loss": 0.7097, "step": 8145 }, { "epoch": 1.3297824578588628, "grad_norm": 2.691492795944214, "learning_rate": 1.896868015321961e-05, "loss": 0.6348, "step": 8146 }, { "epoch": 1.3299457165013673, "grad_norm": 2.8596293926239014, "learning_rate": 1.896839655907046e-05, "loss": 0.7359, "step": 8147 }, { "epoch": 1.3301089751438717, "grad_norm": 2.4438154697418213, "learning_rate": 1.896811292805562e-05, "loss": 0.6149, "step": 8148 }, { "epoch": 1.330272233786376, "grad_norm": 2.4902915954589844, "learning_rate": 1.8967829260176257e-05, "loss": 0.6497, "step": 8149 }, { "epoch": 1.3304354924288804, "grad_norm": 3.0662224292755127, "learning_rate": 1.8967545555433535e-05, "loss": 0.8588, "step": 8150 }, { "epoch": 1.3305987510713848, "grad_norm": 2.627701759338379, "learning_rate": 1.8967261813828612e-05, "loss": 0.7019, "step": 8151 }, { "epoch": 1.3307620097138892, "grad_norm": 2.793806314468384, "learning_rate": 1.8966978035362666e-05, "loss": 0.7339, "step": 8152 }, { "epoch": 1.3309252683563937, "grad_norm": 2.72922945022583, "learning_rate": 1.8966694220036862e-05, "loss": 0.5844, "step": 8153 }, { "epoch": 1.3310885269988981, "grad_norm": 2.888066053390503, "learning_rate": 1.896641036785236e-05, "loss": 0.799, "step": 8154 }, { "epoch": 1.3312517856414023, "grad_norm": 2.7725470066070557, "learning_rate": 1.8966126478810333e-05, "loss": 0.7495, "step": 8155 }, { "epoch": 1.3314150442839068, "grad_norm": 3.3477416038513184, "learning_rate": 1.8965842552911943e-05, "loss": 0.7777, "step": 8156 }, { "epoch": 1.3315783029264112, "grad_norm": 2.784010410308838, "learning_rate": 1.8965558590158363e-05, "loss": 0.6493, "step": 8157 }, { "epoch": 1.3317415615689154, "grad_norm": 3.286820411682129, "learning_rate": 1.8965274590550753e-05, "loss": 0.7527, "step": 8158 }, { "epoch": 1.3319048202114199, "grad_norm": 2.1303439140319824, "learning_rate": 1.8964990554090287e-05, "loss": 0.5718, "step": 8159 }, { "epoch": 1.3320680788539243, "grad_norm": 2.988176107406616, "learning_rate": 1.896470648077813e-05, "loss": 0.7669, "step": 8160 }, { "epoch": 1.3322313374964287, "grad_norm": 2.763371706008911, "learning_rate": 1.896442237061545e-05, "loss": 0.7786, "step": 8161 }, { "epoch": 1.3323945961389332, "grad_norm": 3.2292635440826416, "learning_rate": 1.896413822360341e-05, "loss": 0.6987, "step": 8162 }, { "epoch": 1.3325578547814376, "grad_norm": 2.343916893005371, "learning_rate": 1.896385403974319e-05, "loss": 0.6131, "step": 8163 }, { "epoch": 1.3327211134239418, "grad_norm": 2.5902364253997803, "learning_rate": 1.8963569819035948e-05, "loss": 0.5727, "step": 8164 }, { "epoch": 1.3328843720664463, "grad_norm": 3.4122557640075684, "learning_rate": 1.8963285561482854e-05, "loss": 0.8071, "step": 8165 }, { "epoch": 1.3330476307089507, "grad_norm": 2.5982391834259033, "learning_rate": 1.8963001267085078e-05, "loss": 0.6738, "step": 8166 }, { "epoch": 1.333210889351455, "grad_norm": 3.0185582637786865, "learning_rate": 1.8962716935843787e-05, "loss": 0.7441, "step": 8167 }, { "epoch": 1.3333741479939594, "grad_norm": 2.877983808517456, "learning_rate": 1.896243256776015e-05, "loss": 0.6502, "step": 8168 }, { "epoch": 1.3335374066364638, "grad_norm": 2.886627674102783, "learning_rate": 1.8962148162835342e-05, "loss": 0.7269, "step": 8169 }, { "epoch": 1.3337006652789682, "grad_norm": 2.9919989109039307, "learning_rate": 1.8961863721070523e-05, "loss": 0.7647, "step": 8170 }, { "epoch": 1.3338639239214727, "grad_norm": 2.8383171558380127, "learning_rate": 1.8961579242466865e-05, "loss": 0.7503, "step": 8171 }, { "epoch": 1.334027182563977, "grad_norm": 3.590848684310913, "learning_rate": 1.896129472702554e-05, "loss": 0.6675, "step": 8172 }, { "epoch": 1.3341904412064813, "grad_norm": 2.8717637062072754, "learning_rate": 1.896101017474772e-05, "loss": 0.7277, "step": 8173 }, { "epoch": 1.3343536998489858, "grad_norm": 2.9507038593292236, "learning_rate": 1.8960725585634566e-05, "loss": 0.7585, "step": 8174 }, { "epoch": 1.3345169584914902, "grad_norm": 2.5119831562042236, "learning_rate": 1.8960440959687254e-05, "loss": 0.65, "step": 8175 }, { "epoch": 1.3346802171339944, "grad_norm": 2.562009334564209, "learning_rate": 1.896015629690695e-05, "loss": 0.6787, "step": 8176 }, { "epoch": 1.3348434757764989, "grad_norm": 2.983302354812622, "learning_rate": 1.895987159729483e-05, "loss": 0.9025, "step": 8177 }, { "epoch": 1.3350067344190033, "grad_norm": 2.8659613132476807, "learning_rate": 1.895958686085206e-05, "loss": 0.7102, "step": 8178 }, { "epoch": 1.3351699930615077, "grad_norm": 2.5951054096221924, "learning_rate": 1.895930208757981e-05, "loss": 0.6789, "step": 8179 }, { "epoch": 1.3353332517040122, "grad_norm": 2.3961193561553955, "learning_rate": 1.8959017277479258e-05, "loss": 0.6311, "step": 8180 }, { "epoch": 1.3354965103465164, "grad_norm": 3.0552256107330322, "learning_rate": 1.8958732430551566e-05, "loss": 0.7443, "step": 8181 }, { "epoch": 1.3356597689890208, "grad_norm": 2.915079116821289, "learning_rate": 1.8958447546797904e-05, "loss": 0.6091, "step": 8182 }, { "epoch": 1.3358230276315253, "grad_norm": 2.6196556091308594, "learning_rate": 1.895816262621945e-05, "loss": 0.6372, "step": 8183 }, { "epoch": 1.3359862862740297, "grad_norm": 2.6043591499328613, "learning_rate": 1.895787766881737e-05, "loss": 0.7435, "step": 8184 }, { "epoch": 1.336149544916534, "grad_norm": 2.9840164184570312, "learning_rate": 1.895759267459284e-05, "loss": 0.8347, "step": 8185 }, { "epoch": 1.3363128035590384, "grad_norm": 2.738055944442749, "learning_rate": 1.895730764354703e-05, "loss": 0.8041, "step": 8186 }, { "epoch": 1.3364760622015428, "grad_norm": 2.3541736602783203, "learning_rate": 1.8957022575681106e-05, "loss": 0.5759, "step": 8187 }, { "epoch": 1.3366393208440472, "grad_norm": 2.658982276916504, "learning_rate": 1.8956737470996244e-05, "loss": 0.6943, "step": 8188 }, { "epoch": 1.3368025794865517, "grad_norm": 3.2454607486724854, "learning_rate": 1.895645232949362e-05, "loss": 0.7732, "step": 8189 }, { "epoch": 1.336965838129056, "grad_norm": 2.848289966583252, "learning_rate": 1.89561671511744e-05, "loss": 0.6434, "step": 8190 }, { "epoch": 1.3371290967715603, "grad_norm": 2.6478166580200195, "learning_rate": 1.8955881936039763e-05, "loss": 0.6438, "step": 8191 }, { "epoch": 1.3372923554140648, "grad_norm": 3.244590997695923, "learning_rate": 1.895559668409087e-05, "loss": 0.8012, "step": 8192 }, { "epoch": 1.337455614056569, "grad_norm": 2.8347771167755127, "learning_rate": 1.8955311395328906e-05, "loss": 0.8103, "step": 8193 }, { "epoch": 1.3376188726990734, "grad_norm": 2.944425582885742, "learning_rate": 1.8955026069755035e-05, "loss": 0.7028, "step": 8194 }, { "epoch": 1.3377821313415779, "grad_norm": 2.6253445148468018, "learning_rate": 1.8954740707370438e-05, "loss": 0.6355, "step": 8195 }, { "epoch": 1.3379453899840823, "grad_norm": 2.841785430908203, "learning_rate": 1.8954455308176278e-05, "loss": 0.6955, "step": 8196 }, { "epoch": 1.3381086486265867, "grad_norm": 2.8686585426330566, "learning_rate": 1.8954169872173736e-05, "loss": 0.7364, "step": 8197 }, { "epoch": 1.3382719072690912, "grad_norm": 3.1213741302490234, "learning_rate": 1.895388439936398e-05, "loss": 0.733, "step": 8198 }, { "epoch": 1.3384351659115954, "grad_norm": 2.571274518966675, "learning_rate": 1.8953598889748187e-05, "loss": 0.6106, "step": 8199 }, { "epoch": 1.3385984245540998, "grad_norm": 3.042329788208008, "learning_rate": 1.895331334332753e-05, "loss": 0.8958, "step": 8200 }, { "epoch": 1.3387616831966043, "grad_norm": 3.1196770668029785, "learning_rate": 1.8953027760103186e-05, "loss": 0.7511, "step": 8201 }, { "epoch": 1.3389249418391085, "grad_norm": 3.1897106170654297, "learning_rate": 1.8952742140076323e-05, "loss": 0.7842, "step": 8202 }, { "epoch": 1.339088200481613, "grad_norm": 2.605950117111206, "learning_rate": 1.8952456483248117e-05, "loss": 0.6741, "step": 8203 }, { "epoch": 1.3392514591241174, "grad_norm": 2.876009941101074, "learning_rate": 1.8952170789619745e-05, "loss": 0.6656, "step": 8204 }, { "epoch": 1.3394147177666218, "grad_norm": 2.9147908687591553, "learning_rate": 1.895188505919238e-05, "loss": 0.6999, "step": 8205 }, { "epoch": 1.3395779764091262, "grad_norm": 3.201530933380127, "learning_rate": 1.895159929196719e-05, "loss": 0.832, "step": 8206 }, { "epoch": 1.3397412350516307, "grad_norm": 2.536986827850342, "learning_rate": 1.895131348794536e-05, "loss": 0.6661, "step": 8207 }, { "epoch": 1.339904493694135, "grad_norm": 2.565808057785034, "learning_rate": 1.8951027647128058e-05, "loss": 0.6795, "step": 8208 }, { "epoch": 1.3400677523366393, "grad_norm": 2.9769554138183594, "learning_rate": 1.895074176951646e-05, "loss": 0.7724, "step": 8209 }, { "epoch": 1.3402310109791438, "grad_norm": 3.7295501232147217, "learning_rate": 1.8950455855111745e-05, "loss": 0.813, "step": 8210 }, { "epoch": 1.340394269621648, "grad_norm": 3.123659372329712, "learning_rate": 1.8950169903915084e-05, "loss": 0.851, "step": 8211 }, { "epoch": 1.3405575282641524, "grad_norm": 2.7976279258728027, "learning_rate": 1.8949883915927656e-05, "loss": 0.6589, "step": 8212 }, { "epoch": 1.3407207869066569, "grad_norm": 3.1406280994415283, "learning_rate": 1.8949597891150635e-05, "loss": 0.9143, "step": 8213 }, { "epoch": 1.3408840455491613, "grad_norm": 2.888327121734619, "learning_rate": 1.8949311829585192e-05, "loss": 0.6036, "step": 8214 }, { "epoch": 1.3410473041916657, "grad_norm": 2.575357437133789, "learning_rate": 1.8949025731232514e-05, "loss": 0.5991, "step": 8215 }, { "epoch": 1.3412105628341702, "grad_norm": 2.7364554405212402, "learning_rate": 1.8948739596093765e-05, "loss": 0.6289, "step": 8216 }, { "epoch": 1.3413738214766744, "grad_norm": 2.7391836643218994, "learning_rate": 1.8948453424170127e-05, "loss": 0.6025, "step": 8217 }, { "epoch": 1.3415370801191788, "grad_norm": 2.7881951332092285, "learning_rate": 1.894816721546278e-05, "loss": 0.7273, "step": 8218 }, { "epoch": 1.3417003387616833, "grad_norm": 2.8083553314208984, "learning_rate": 1.8947880969972892e-05, "loss": 0.766, "step": 8219 }, { "epoch": 1.3418635974041875, "grad_norm": 3.336195707321167, "learning_rate": 1.8947594687701644e-05, "loss": 0.7839, "step": 8220 }, { "epoch": 1.342026856046692, "grad_norm": 2.769914388656616, "learning_rate": 1.8947308368650215e-05, "loss": 0.7521, "step": 8221 }, { "epoch": 1.3421901146891964, "grad_norm": 2.700037956237793, "learning_rate": 1.894702201281978e-05, "loss": 0.6809, "step": 8222 }, { "epoch": 1.3423533733317008, "grad_norm": 2.80789852142334, "learning_rate": 1.8946735620211515e-05, "loss": 0.6736, "step": 8223 }, { "epoch": 1.3425166319742052, "grad_norm": 2.378117561340332, "learning_rate": 1.8946449190826595e-05, "loss": 0.6113, "step": 8224 }, { "epoch": 1.3426798906167094, "grad_norm": 2.7230756282806396, "learning_rate": 1.8946162724666205e-05, "loss": 0.6464, "step": 8225 }, { "epoch": 1.3428431492592139, "grad_norm": 2.3713736534118652, "learning_rate": 1.8945876221731516e-05, "loss": 0.5573, "step": 8226 }, { "epoch": 1.3430064079017183, "grad_norm": 2.3883981704711914, "learning_rate": 1.894558968202371e-05, "loss": 0.6194, "step": 8227 }, { "epoch": 1.3431696665442228, "grad_norm": 2.7045223712921143, "learning_rate": 1.894530310554396e-05, "loss": 0.6821, "step": 8228 }, { "epoch": 1.343332925186727, "grad_norm": 2.8465099334716797, "learning_rate": 1.8945016492293448e-05, "loss": 0.6848, "step": 8229 }, { "epoch": 1.3434961838292314, "grad_norm": 2.346789598464966, "learning_rate": 1.8944729842273348e-05, "loss": 0.6117, "step": 8230 }, { "epoch": 1.3436594424717359, "grad_norm": 3.617506742477417, "learning_rate": 1.8944443155484843e-05, "loss": 0.5845, "step": 8231 }, { "epoch": 1.3438227011142403, "grad_norm": 2.921186923980713, "learning_rate": 1.894415643192911e-05, "loss": 0.6814, "step": 8232 }, { "epoch": 1.3439859597567447, "grad_norm": 2.7355902194976807, "learning_rate": 1.8943869671607327e-05, "loss": 0.6332, "step": 8233 }, { "epoch": 1.344149218399249, "grad_norm": 2.798969268798828, "learning_rate": 1.894358287452067e-05, "loss": 0.7163, "step": 8234 }, { "epoch": 1.3443124770417534, "grad_norm": 2.66408371925354, "learning_rate": 1.8943296040670325e-05, "loss": 0.6635, "step": 8235 }, { "epoch": 1.3444757356842578, "grad_norm": 2.932858943939209, "learning_rate": 1.8943009170057466e-05, "loss": 0.8327, "step": 8236 }, { "epoch": 1.344638994326762, "grad_norm": 4.572553634643555, "learning_rate": 1.894272226268327e-05, "loss": 0.8581, "step": 8237 }, { "epoch": 1.3448022529692665, "grad_norm": 2.7943899631500244, "learning_rate": 1.8942435318548928e-05, "loss": 0.6386, "step": 8238 }, { "epoch": 1.344965511611771, "grad_norm": 3.4349822998046875, "learning_rate": 1.8942148337655604e-05, "loss": 0.7718, "step": 8239 }, { "epoch": 1.3451287702542754, "grad_norm": 3.2218921184539795, "learning_rate": 1.8941861320004483e-05, "loss": 0.7693, "step": 8240 }, { "epoch": 1.3452920288967798, "grad_norm": 3.0104191303253174, "learning_rate": 1.8941574265596753e-05, "loss": 0.7349, "step": 8241 }, { "epoch": 1.3454552875392842, "grad_norm": 3.172394037246704, "learning_rate": 1.8941287174433583e-05, "loss": 0.7886, "step": 8242 }, { "epoch": 1.3456185461817884, "grad_norm": 2.855799674987793, "learning_rate": 1.8941000046516156e-05, "loss": 0.6754, "step": 8243 }, { "epoch": 1.3457818048242929, "grad_norm": 2.8361637592315674, "learning_rate": 1.894071288184566e-05, "loss": 0.7233, "step": 8244 }, { "epoch": 1.3459450634667973, "grad_norm": 2.859874963760376, "learning_rate": 1.8940425680423265e-05, "loss": 0.7554, "step": 8245 }, { "epoch": 1.3461083221093015, "grad_norm": 2.7085764408111572, "learning_rate": 1.8940138442250155e-05, "loss": 0.5901, "step": 8246 }, { "epoch": 1.346271580751806, "grad_norm": 2.177539587020874, "learning_rate": 1.8939851167327514e-05, "loss": 0.5466, "step": 8247 }, { "epoch": 1.3464348393943104, "grad_norm": 2.7431912422180176, "learning_rate": 1.893956385565652e-05, "loss": 0.6457, "step": 8248 }, { "epoch": 1.3465980980368149, "grad_norm": 3.245037794113159, "learning_rate": 1.8939276507238353e-05, "loss": 0.8418, "step": 8249 }, { "epoch": 1.3467613566793193, "grad_norm": 2.792088508605957, "learning_rate": 1.8938989122074195e-05, "loss": 0.6438, "step": 8250 }, { "epoch": 1.3469246153218237, "grad_norm": 3.175767660140991, "learning_rate": 1.8938701700165232e-05, "loss": 0.9462, "step": 8251 }, { "epoch": 1.347087873964328, "grad_norm": 2.504232883453369, "learning_rate": 1.893841424151264e-05, "loss": 0.6284, "step": 8252 }, { "epoch": 1.3472511326068324, "grad_norm": 2.6145148277282715, "learning_rate": 1.89381267461176e-05, "loss": 0.6405, "step": 8253 }, { "epoch": 1.3474143912493368, "grad_norm": 2.8003695011138916, "learning_rate": 1.89378392139813e-05, "loss": 0.5903, "step": 8254 }, { "epoch": 1.347577649891841, "grad_norm": 3.370084047317505, "learning_rate": 1.8937551645104913e-05, "loss": 0.7797, "step": 8255 }, { "epoch": 1.3477409085343455, "grad_norm": 3.02734112739563, "learning_rate": 1.893726403948963e-05, "loss": 0.7798, "step": 8256 }, { "epoch": 1.34790416717685, "grad_norm": 2.386806011199951, "learning_rate": 1.8936976397136625e-05, "loss": 0.6152, "step": 8257 }, { "epoch": 1.3480674258193543, "grad_norm": 2.40883469581604, "learning_rate": 1.893668871804709e-05, "loss": 0.5537, "step": 8258 }, { "epoch": 1.3482306844618588, "grad_norm": 2.866230010986328, "learning_rate": 1.8936401002222198e-05, "loss": 0.7318, "step": 8259 }, { "epoch": 1.3483939431043632, "grad_norm": 2.9761364459991455, "learning_rate": 1.8936113249663136e-05, "loss": 0.8423, "step": 8260 }, { "epoch": 1.3485572017468674, "grad_norm": 2.5564305782318115, "learning_rate": 1.8935825460371088e-05, "loss": 0.5827, "step": 8261 }, { "epoch": 1.3487204603893719, "grad_norm": 2.945173501968384, "learning_rate": 1.8935537634347233e-05, "loss": 0.8063, "step": 8262 }, { "epoch": 1.3488837190318763, "grad_norm": 2.92295241355896, "learning_rate": 1.893524977159276e-05, "loss": 0.8272, "step": 8263 }, { "epoch": 1.3490469776743805, "grad_norm": 2.586294651031494, "learning_rate": 1.893496187210885e-05, "loss": 0.6837, "step": 8264 }, { "epoch": 1.349210236316885, "grad_norm": 3.2050557136535645, "learning_rate": 1.8934673935896684e-05, "loss": 0.7635, "step": 8265 }, { "epoch": 1.3493734949593894, "grad_norm": 3.312912940979004, "learning_rate": 1.8934385962957448e-05, "loss": 0.741, "step": 8266 }, { "epoch": 1.3495367536018938, "grad_norm": 2.7885046005249023, "learning_rate": 1.893409795329232e-05, "loss": 0.7408, "step": 8267 }, { "epoch": 1.3497000122443983, "grad_norm": 2.802541732788086, "learning_rate": 1.8933809906902492e-05, "loss": 0.7674, "step": 8268 }, { "epoch": 1.3498632708869025, "grad_norm": 3.0419983863830566, "learning_rate": 1.893352182378915e-05, "loss": 0.794, "step": 8269 }, { "epoch": 1.350026529529407, "grad_norm": 3.0097591876983643, "learning_rate": 1.8933233703953467e-05, "loss": 0.8427, "step": 8270 }, { "epoch": 1.3501897881719114, "grad_norm": 3.2408807277679443, "learning_rate": 1.8932945547396634e-05, "loss": 1.5042, "step": 8271 }, { "epoch": 1.3503530468144158, "grad_norm": 2.662381887435913, "learning_rate": 1.8932657354119835e-05, "loss": 0.7187, "step": 8272 }, { "epoch": 1.35051630545692, "grad_norm": 3.2355401515960693, "learning_rate": 1.8932369124124253e-05, "loss": 0.848, "step": 8273 }, { "epoch": 1.3506795640994245, "grad_norm": 2.7734622955322266, "learning_rate": 1.8932080857411076e-05, "loss": 0.6931, "step": 8274 }, { "epoch": 1.350842822741929, "grad_norm": 2.281505584716797, "learning_rate": 1.893179255398149e-05, "loss": 0.5631, "step": 8275 }, { "epoch": 1.3510060813844333, "grad_norm": 2.3078572750091553, "learning_rate": 1.8931504213836677e-05, "loss": 0.5956, "step": 8276 }, { "epoch": 1.3511693400269378, "grad_norm": 3.202514410018921, "learning_rate": 1.8931215836977816e-05, "loss": 0.869, "step": 8277 }, { "epoch": 1.351332598669442, "grad_norm": 2.528916120529175, "learning_rate": 1.8930927423406108e-05, "loss": 0.68, "step": 8278 }, { "epoch": 1.3514958573119464, "grad_norm": 2.6816320419311523, "learning_rate": 1.8930638973122724e-05, "loss": 0.7403, "step": 8279 }, { "epoch": 1.3516591159544509, "grad_norm": 2.8360347747802734, "learning_rate": 1.8930350486128855e-05, "loss": 0.6799, "step": 8280 }, { "epoch": 1.351822374596955, "grad_norm": 2.7034237384796143, "learning_rate": 1.893006196242569e-05, "loss": 0.6583, "step": 8281 }, { "epoch": 1.3519856332394595, "grad_norm": 2.764892339706421, "learning_rate": 1.892977340201441e-05, "loss": 0.8019, "step": 8282 }, { "epoch": 1.352148891881964, "grad_norm": 2.661581516265869, "learning_rate": 1.8929484804896204e-05, "loss": 0.8141, "step": 8283 }, { "epoch": 1.3523121505244684, "grad_norm": 2.518897294998169, "learning_rate": 1.892919617107226e-05, "loss": 0.7988, "step": 8284 }, { "epoch": 1.3524754091669728, "grad_norm": 2.562554359436035, "learning_rate": 1.892890750054376e-05, "loss": 0.6778, "step": 8285 }, { "epoch": 1.3526386678094773, "grad_norm": 3.275195598602295, "learning_rate": 1.892861879331189e-05, "loss": 0.8364, "step": 8286 }, { "epoch": 1.3528019264519815, "grad_norm": 2.4640371799468994, "learning_rate": 1.8928330049377845e-05, "loss": 0.6315, "step": 8287 }, { "epoch": 1.352965185094486, "grad_norm": 2.7239439487457275, "learning_rate": 1.8928041268742806e-05, "loss": 0.7823, "step": 8288 }, { "epoch": 1.3531284437369904, "grad_norm": 2.8894219398498535, "learning_rate": 1.8927752451407958e-05, "loss": 0.7497, "step": 8289 }, { "epoch": 1.3532917023794946, "grad_norm": 2.826451063156128, "learning_rate": 1.8927463597374492e-05, "loss": 0.7635, "step": 8290 }, { "epoch": 1.353454961021999, "grad_norm": 2.846165418624878, "learning_rate": 1.8927174706643592e-05, "loss": 0.7857, "step": 8291 }, { "epoch": 1.3536182196645035, "grad_norm": 2.636091709136963, "learning_rate": 1.892688577921645e-05, "loss": 0.5849, "step": 8292 }, { "epoch": 1.353781478307008, "grad_norm": 2.4409115314483643, "learning_rate": 1.892659681509425e-05, "loss": 0.7213, "step": 8293 }, { "epoch": 1.3539447369495123, "grad_norm": 3.031759262084961, "learning_rate": 1.892630781427818e-05, "loss": 0.8489, "step": 8294 }, { "epoch": 1.3541079955920168, "grad_norm": 2.8017797470092773, "learning_rate": 1.892601877676943e-05, "loss": 0.7593, "step": 8295 }, { "epoch": 1.354271254234521, "grad_norm": 3.092027425765991, "learning_rate": 1.8925729702569188e-05, "loss": 0.7887, "step": 8296 }, { "epoch": 1.3544345128770254, "grad_norm": 2.68420672416687, "learning_rate": 1.8925440591678642e-05, "loss": 0.6321, "step": 8297 }, { "epoch": 1.3545977715195299, "grad_norm": 2.807166576385498, "learning_rate": 1.892515144409898e-05, "loss": 0.6888, "step": 8298 }, { "epoch": 1.354761030162034, "grad_norm": 3.2647528648376465, "learning_rate": 1.892486225983139e-05, "loss": 0.8752, "step": 8299 }, { "epoch": 1.3549242888045385, "grad_norm": 2.6359097957611084, "learning_rate": 1.8924573038877062e-05, "loss": 0.7149, "step": 8300 }, { "epoch": 1.355087547447043, "grad_norm": 2.7024362087249756, "learning_rate": 1.892428378123718e-05, "loss": 0.6496, "step": 8301 }, { "epoch": 1.3552508060895474, "grad_norm": 2.7523646354675293, "learning_rate": 1.8923994486912943e-05, "loss": 0.6968, "step": 8302 }, { "epoch": 1.3554140647320518, "grad_norm": 2.9256997108459473, "learning_rate": 1.8923705155905528e-05, "loss": 0.7202, "step": 8303 }, { "epoch": 1.3555773233745563, "grad_norm": 2.632603645324707, "learning_rate": 1.8923415788216135e-05, "loss": 0.8026, "step": 8304 }, { "epoch": 1.3557405820170605, "grad_norm": 3.2983639240264893, "learning_rate": 1.8923126383845946e-05, "loss": 0.7761, "step": 8305 }, { "epoch": 1.355903840659565, "grad_norm": 2.36279559135437, "learning_rate": 1.8922836942796157e-05, "loss": 0.6398, "step": 8306 }, { "epoch": 1.3560670993020694, "grad_norm": 3.2427847385406494, "learning_rate": 1.8922547465067955e-05, "loss": 0.9074, "step": 8307 }, { "epoch": 1.3562303579445736, "grad_norm": 2.7321279048919678, "learning_rate": 1.8922257950662528e-05, "loss": 0.7133, "step": 8308 }, { "epoch": 1.356393616587078, "grad_norm": 3.012573003768921, "learning_rate": 1.892196839958107e-05, "loss": 0.7891, "step": 8309 }, { "epoch": 1.3565568752295825, "grad_norm": 3.0880486965179443, "learning_rate": 1.892167881182476e-05, "loss": 0.828, "step": 8310 }, { "epoch": 1.356720133872087, "grad_norm": 2.8935272693634033, "learning_rate": 1.8921389187394806e-05, "loss": 0.6753, "step": 8311 }, { "epoch": 1.3568833925145913, "grad_norm": 2.8184773921966553, "learning_rate": 1.8921099526292387e-05, "loss": 0.7049, "step": 8312 }, { "epoch": 1.3570466511570956, "grad_norm": 2.874321937561035, "learning_rate": 1.89208098285187e-05, "loss": 0.7902, "step": 8313 }, { "epoch": 1.3572099097996, "grad_norm": 2.8795197010040283, "learning_rate": 1.892052009407493e-05, "loss": 0.8514, "step": 8314 }, { "epoch": 1.3573731684421044, "grad_norm": 2.994408369064331, "learning_rate": 1.8920230322962266e-05, "loss": 0.8044, "step": 8315 }, { "epoch": 1.3575364270846089, "grad_norm": 3.0486550331115723, "learning_rate": 1.8919940515181906e-05, "loss": 0.8088, "step": 8316 }, { "epoch": 1.357699685727113, "grad_norm": 2.796269655227661, "learning_rate": 1.8919650670735038e-05, "loss": 0.703, "step": 8317 }, { "epoch": 1.3578629443696175, "grad_norm": 2.675048589706421, "learning_rate": 1.8919360789622854e-05, "loss": 0.6104, "step": 8318 }, { "epoch": 1.358026203012122, "grad_norm": 2.772949695587158, "learning_rate": 1.891907087184655e-05, "loss": 0.7529, "step": 8319 }, { "epoch": 1.3581894616546264, "grad_norm": 2.852071762084961, "learning_rate": 1.891878091740731e-05, "loss": 0.8093, "step": 8320 }, { "epoch": 1.3583527202971308, "grad_norm": 2.8923401832580566, "learning_rate": 1.891849092630633e-05, "loss": 0.712, "step": 8321 }, { "epoch": 1.358515978939635, "grad_norm": 2.8811891078948975, "learning_rate": 1.89182008985448e-05, "loss": 0.8287, "step": 8322 }, { "epoch": 1.3586792375821395, "grad_norm": 2.3163154125213623, "learning_rate": 1.8917910834123912e-05, "loss": 0.73, "step": 8323 }, { "epoch": 1.358842496224644, "grad_norm": 2.6167542934417725, "learning_rate": 1.8917620733044865e-05, "loss": 0.7091, "step": 8324 }, { "epoch": 1.3590057548671481, "grad_norm": 3.0148892402648926, "learning_rate": 1.891733059530884e-05, "loss": 0.7956, "step": 8325 }, { "epoch": 1.3591690135096526, "grad_norm": 2.926006317138672, "learning_rate": 1.8917040420917043e-05, "loss": 0.8651, "step": 8326 }, { "epoch": 1.359332272152157, "grad_norm": 2.187819004058838, "learning_rate": 1.891675020987066e-05, "loss": 0.6273, "step": 8327 }, { "epoch": 1.3594955307946615, "grad_norm": 2.75946307182312, "learning_rate": 1.8916459962170876e-05, "loss": 0.7462, "step": 8328 }, { "epoch": 1.359658789437166, "grad_norm": 2.73612904548645, "learning_rate": 1.8916169677818897e-05, "loss": 0.6416, "step": 8329 }, { "epoch": 1.3598220480796703, "grad_norm": 2.449676513671875, "learning_rate": 1.891587935681591e-05, "loss": 0.6302, "step": 8330 }, { "epoch": 1.3599853067221745, "grad_norm": 2.4926352500915527, "learning_rate": 1.8915588999163113e-05, "loss": 0.6337, "step": 8331 }, { "epoch": 1.360148565364679, "grad_norm": 2.3752734661102295, "learning_rate": 1.891529860486169e-05, "loss": 0.6001, "step": 8332 }, { "epoch": 1.3603118240071834, "grad_norm": 3.286881446838379, "learning_rate": 1.8915008173912845e-05, "loss": 0.7321, "step": 8333 }, { "epoch": 1.3604750826496876, "grad_norm": 2.6606242656707764, "learning_rate": 1.8914717706317766e-05, "loss": 0.6022, "step": 8334 }, { "epoch": 1.360638341292192, "grad_norm": 2.7676639556884766, "learning_rate": 1.8914427202077653e-05, "loss": 0.7224, "step": 8335 }, { "epoch": 1.3608015999346965, "grad_norm": 2.7740073204040527, "learning_rate": 1.891413666119369e-05, "loss": 0.8058, "step": 8336 }, { "epoch": 1.360964858577201, "grad_norm": 2.882866144180298, "learning_rate": 1.8913846083667076e-05, "loss": 0.7506, "step": 8337 }, { "epoch": 1.3611281172197054, "grad_norm": 2.7368528842926025, "learning_rate": 1.891355546949901e-05, "loss": 0.7726, "step": 8338 }, { "epoch": 1.3612913758622098, "grad_norm": 2.67439603805542, "learning_rate": 1.8913264818690685e-05, "loss": 0.7001, "step": 8339 }, { "epoch": 1.361454634504714, "grad_norm": 2.5171711444854736, "learning_rate": 1.8912974131243292e-05, "loss": 0.6662, "step": 8340 }, { "epoch": 1.3616178931472185, "grad_norm": 2.68222713470459, "learning_rate": 1.8912683407158027e-05, "loss": 0.6825, "step": 8341 }, { "epoch": 1.361781151789723, "grad_norm": 2.8259365558624268, "learning_rate": 1.8912392646436087e-05, "loss": 0.6112, "step": 8342 }, { "epoch": 1.3619444104322271, "grad_norm": 2.5214850902557373, "learning_rate": 1.8912101849078665e-05, "loss": 0.6239, "step": 8343 }, { "epoch": 1.3621076690747316, "grad_norm": 2.7558090686798096, "learning_rate": 1.891181101508696e-05, "loss": 0.6884, "step": 8344 }, { "epoch": 1.362270927717236, "grad_norm": 2.8862740993499756, "learning_rate": 1.8911520144462162e-05, "loss": 0.6996, "step": 8345 }, { "epoch": 1.3624341863597405, "grad_norm": 3.2348875999450684, "learning_rate": 1.891122923720547e-05, "loss": 0.8275, "step": 8346 }, { "epoch": 1.362597445002245, "grad_norm": 2.8455095291137695, "learning_rate": 1.891093829331808e-05, "loss": 0.7821, "step": 8347 }, { "epoch": 1.3627607036447493, "grad_norm": 2.6231918334960938, "learning_rate": 1.891064731280119e-05, "loss": 0.7012, "step": 8348 }, { "epoch": 1.3629239622872535, "grad_norm": 2.330949068069458, "learning_rate": 1.891035629565599e-05, "loss": 0.5485, "step": 8349 }, { "epoch": 1.363087220929758, "grad_norm": 3.1361279487609863, "learning_rate": 1.891006524188368e-05, "loss": 0.8389, "step": 8350 }, { "epoch": 1.3632504795722624, "grad_norm": 3.330838203430176, "learning_rate": 1.8909774151485458e-05, "loss": 0.8484, "step": 8351 }, { "epoch": 1.3634137382147666, "grad_norm": 2.4260597229003906, "learning_rate": 1.8909483024462517e-05, "loss": 0.634, "step": 8352 }, { "epoch": 1.363576996857271, "grad_norm": 2.8827133178710938, "learning_rate": 1.8909191860816058e-05, "loss": 0.7465, "step": 8353 }, { "epoch": 1.3637402554997755, "grad_norm": 3.0160632133483887, "learning_rate": 1.890890066054727e-05, "loss": 0.8161, "step": 8354 }, { "epoch": 1.36390351414228, "grad_norm": 2.419806718826294, "learning_rate": 1.890860942365736e-05, "loss": 0.6414, "step": 8355 }, { "epoch": 1.3640667727847844, "grad_norm": 2.6865806579589844, "learning_rate": 1.890831815014752e-05, "loss": 0.7214, "step": 8356 }, { "epoch": 1.3642300314272886, "grad_norm": 3.316115617752075, "learning_rate": 1.8908026840018946e-05, "loss": 0.8301, "step": 8357 }, { "epoch": 1.364393290069793, "grad_norm": 3.2343664169311523, "learning_rate": 1.890773549327284e-05, "loss": 0.8059, "step": 8358 }, { "epoch": 1.3645565487122975, "grad_norm": 2.8142738342285156, "learning_rate": 1.8907444109910394e-05, "loss": 0.7423, "step": 8359 }, { "epoch": 1.364719807354802, "grad_norm": 3.1471750736236572, "learning_rate": 1.8907152689932808e-05, "loss": 0.7682, "step": 8360 }, { "epoch": 1.3648830659973061, "grad_norm": 2.12528395652771, "learning_rate": 1.8906861233341284e-05, "loss": 0.503, "step": 8361 }, { "epoch": 1.3650463246398106, "grad_norm": 2.698014974594116, "learning_rate": 1.8906569740137015e-05, "loss": 0.6378, "step": 8362 }, { "epoch": 1.365209583282315, "grad_norm": 2.3077588081359863, "learning_rate": 1.89062782103212e-05, "loss": 0.5508, "step": 8363 }, { "epoch": 1.3653728419248194, "grad_norm": 2.7538537979125977, "learning_rate": 1.8905986643895043e-05, "loss": 0.709, "step": 8364 }, { "epoch": 1.3655361005673239, "grad_norm": 2.647500991821289, "learning_rate": 1.890569504085973e-05, "loss": 0.749, "step": 8365 }, { "epoch": 1.365699359209828, "grad_norm": 3.019109010696411, "learning_rate": 1.8905403401216473e-05, "loss": 0.7261, "step": 8366 }, { "epoch": 1.3658626178523325, "grad_norm": 2.6145577430725098, "learning_rate": 1.8905111724966467e-05, "loss": 0.6982, "step": 8367 }, { "epoch": 1.366025876494837, "grad_norm": 3.1563990116119385, "learning_rate": 1.89048200121109e-05, "loss": 0.83, "step": 8368 }, { "epoch": 1.3661891351373414, "grad_norm": 2.4725522994995117, "learning_rate": 1.890452826265099e-05, "loss": 0.5507, "step": 8369 }, { "epoch": 1.3663523937798456, "grad_norm": 2.3779404163360596, "learning_rate": 1.8904236476587922e-05, "loss": 0.6284, "step": 8370 }, { "epoch": 1.36651565242235, "grad_norm": 3.168231248855591, "learning_rate": 1.89039446539229e-05, "loss": 0.8579, "step": 8371 }, { "epoch": 1.3666789110648545, "grad_norm": 3.1980926990509033, "learning_rate": 1.8903652794657124e-05, "loss": 0.7594, "step": 8372 }, { "epoch": 1.366842169707359, "grad_norm": 2.467371702194214, "learning_rate": 1.8903360898791793e-05, "loss": 0.5763, "step": 8373 }, { "epoch": 1.3670054283498634, "grad_norm": 2.451739549636841, "learning_rate": 1.890306896632811e-05, "loss": 0.6548, "step": 8374 }, { "epoch": 1.3671686869923676, "grad_norm": 3.0123536586761475, "learning_rate": 1.890277699726727e-05, "loss": 0.6937, "step": 8375 }, { "epoch": 1.367331945634872, "grad_norm": 2.9398584365844727, "learning_rate": 1.8902484991610474e-05, "loss": 0.6172, "step": 8376 }, { "epoch": 1.3674952042773765, "grad_norm": 2.6973066329956055, "learning_rate": 1.8902192949358927e-05, "loss": 0.6326, "step": 8377 }, { "epoch": 1.3676584629198807, "grad_norm": 2.6235857009887695, "learning_rate": 1.890190087051382e-05, "loss": 0.6255, "step": 8378 }, { "epoch": 1.3678217215623851, "grad_norm": 2.923318386077881, "learning_rate": 1.8901608755076368e-05, "loss": 0.7597, "step": 8379 }, { "epoch": 1.3679849802048896, "grad_norm": 2.8422834873199463, "learning_rate": 1.890131660304776e-05, "loss": 0.6518, "step": 8380 }, { "epoch": 1.368148238847394, "grad_norm": 3.0083155632019043, "learning_rate": 1.89010244144292e-05, "loss": 0.8828, "step": 8381 }, { "epoch": 1.3683114974898984, "grad_norm": 2.907857894897461, "learning_rate": 1.890073218922189e-05, "loss": 0.7635, "step": 8382 }, { "epoch": 1.3684747561324029, "grad_norm": 2.7794225215911865, "learning_rate": 1.8900439927427033e-05, "loss": 0.6359, "step": 8383 }, { "epoch": 1.368638014774907, "grad_norm": 2.9349427223205566, "learning_rate": 1.8900147629045824e-05, "loss": 0.8677, "step": 8384 }, { "epoch": 1.3688012734174115, "grad_norm": 2.9371535778045654, "learning_rate": 1.8899855294079474e-05, "loss": 0.6982, "step": 8385 }, { "epoch": 1.368964532059916, "grad_norm": 3.0296270847320557, "learning_rate": 1.8899562922529174e-05, "loss": 0.6839, "step": 8386 }, { "epoch": 1.3691277907024202, "grad_norm": 2.441498279571533, "learning_rate": 1.8899270514396134e-05, "loss": 0.6038, "step": 8387 }, { "epoch": 1.3692910493449246, "grad_norm": 2.7490670680999756, "learning_rate": 1.889897806968155e-05, "loss": 0.7133, "step": 8388 }, { "epoch": 1.369454307987429, "grad_norm": 2.677941083908081, "learning_rate": 1.889868558838663e-05, "loss": 0.6911, "step": 8389 }, { "epoch": 1.3696175666299335, "grad_norm": 2.5154645442962646, "learning_rate": 1.889839307051257e-05, "loss": 0.6634, "step": 8390 }, { "epoch": 1.369780825272438, "grad_norm": 2.8850691318511963, "learning_rate": 1.8898100516060584e-05, "loss": 0.6745, "step": 8391 }, { "epoch": 1.3699440839149424, "grad_norm": 2.9155113697052, "learning_rate": 1.8897807925031864e-05, "loss": 0.8602, "step": 8392 }, { "epoch": 1.3701073425574466, "grad_norm": 2.5144753456115723, "learning_rate": 1.8897515297427614e-05, "loss": 0.7191, "step": 8393 }, { "epoch": 1.370270601199951, "grad_norm": 2.75892972946167, "learning_rate": 1.8897222633249037e-05, "loss": 0.6513, "step": 8394 }, { "epoch": 1.3704338598424555, "grad_norm": 2.9012043476104736, "learning_rate": 1.889692993249734e-05, "loss": 0.7281, "step": 8395 }, { "epoch": 1.3705971184849597, "grad_norm": 2.614051103591919, "learning_rate": 1.889663719517372e-05, "loss": 0.7163, "step": 8396 }, { "epoch": 1.3707603771274641, "grad_norm": 2.610856533050537, "learning_rate": 1.8896344421279386e-05, "loss": 0.623, "step": 8397 }, { "epoch": 1.3709236357699686, "grad_norm": 3.119405508041382, "learning_rate": 1.889605161081554e-05, "loss": 0.6923, "step": 8398 }, { "epoch": 1.371086894412473, "grad_norm": 2.6889328956604004, "learning_rate": 1.8895758763783383e-05, "loss": 0.6658, "step": 8399 }, { "epoch": 1.3712501530549774, "grad_norm": 2.5907771587371826, "learning_rate": 1.889546588018412e-05, "loss": 0.6014, "step": 8400 }, { "epoch": 1.3714134116974817, "grad_norm": 2.6590073108673096, "learning_rate": 1.889517296001896e-05, "loss": 0.6925, "step": 8401 }, { "epoch": 1.371576670339986, "grad_norm": 2.9191091060638428, "learning_rate": 1.8894880003289096e-05, "loss": 0.7075, "step": 8402 }, { "epoch": 1.3717399289824905, "grad_norm": 2.3979315757751465, "learning_rate": 1.8894587009995742e-05, "loss": 0.592, "step": 8403 }, { "epoch": 1.371903187624995, "grad_norm": 2.9156556129455566, "learning_rate": 1.8894293980140103e-05, "loss": 0.7457, "step": 8404 }, { "epoch": 1.3720664462674992, "grad_norm": 2.7674527168273926, "learning_rate": 1.8894000913723377e-05, "loss": 0.7092, "step": 8405 }, { "epoch": 1.3722297049100036, "grad_norm": 2.7916195392608643, "learning_rate": 1.889370781074677e-05, "loss": 0.6912, "step": 8406 }, { "epoch": 1.372392963552508, "grad_norm": 2.648811101913452, "learning_rate": 1.889341467121149e-05, "loss": 0.7228, "step": 8407 }, { "epoch": 1.3725562221950125, "grad_norm": 3.0431060791015625, "learning_rate": 1.889312149511874e-05, "loss": 1.6298, "step": 8408 }, { "epoch": 1.372719480837517, "grad_norm": 2.5499343872070312, "learning_rate": 1.8892828282469726e-05, "loss": 0.6065, "step": 8409 }, { "epoch": 1.3728827394800212, "grad_norm": 3.0914547443389893, "learning_rate": 1.8892535033265652e-05, "loss": 0.7156, "step": 8410 }, { "epoch": 1.3730459981225256, "grad_norm": 2.3901240825653076, "learning_rate": 1.889224174750773e-05, "loss": 0.6874, "step": 8411 }, { "epoch": 1.37320925676503, "grad_norm": 2.3973159790039062, "learning_rate": 1.8891948425197152e-05, "loss": 0.5971, "step": 8412 }, { "epoch": 1.3733725154075345, "grad_norm": 2.354442834854126, "learning_rate": 1.8891655066335132e-05, "loss": 0.5778, "step": 8413 }, { "epoch": 1.3735357740500387, "grad_norm": 2.7276246547698975, "learning_rate": 1.8891361670922878e-05, "loss": 0.6605, "step": 8414 }, { "epoch": 1.3736990326925431, "grad_norm": 2.3706769943237305, "learning_rate": 1.8891068238961593e-05, "loss": 0.5338, "step": 8415 }, { "epoch": 1.3738622913350476, "grad_norm": 2.9482288360595703, "learning_rate": 1.8890774770452486e-05, "loss": 0.7215, "step": 8416 }, { "epoch": 1.374025549977552, "grad_norm": 2.6633903980255127, "learning_rate": 1.8890481265396757e-05, "loss": 0.658, "step": 8417 }, { "epoch": 1.3741888086200564, "grad_norm": 2.8104352951049805, "learning_rate": 1.889018772379562e-05, "loss": 0.7056, "step": 8418 }, { "epoch": 1.3743520672625607, "grad_norm": 2.998866081237793, "learning_rate": 1.8889894145650277e-05, "loss": 0.8816, "step": 8419 }, { "epoch": 1.374515325905065, "grad_norm": 2.194789409637451, "learning_rate": 1.8889600530961935e-05, "loss": 0.5419, "step": 8420 }, { "epoch": 1.3746785845475695, "grad_norm": 3.088984251022339, "learning_rate": 1.8889306879731802e-05, "loss": 0.768, "step": 8421 }, { "epoch": 1.3748418431900737, "grad_norm": 2.7944352626800537, "learning_rate": 1.8889013191961085e-05, "loss": 0.7775, "step": 8422 }, { "epoch": 1.3750051018325782, "grad_norm": 3.533947706222534, "learning_rate": 1.888871946765099e-05, "loss": 0.9107, "step": 8423 }, { "epoch": 1.3751683604750826, "grad_norm": 3.0043158531188965, "learning_rate": 1.8888425706802728e-05, "loss": 0.7839, "step": 8424 }, { "epoch": 1.375331619117587, "grad_norm": 2.659721612930298, "learning_rate": 1.8888131909417504e-05, "loss": 0.7195, "step": 8425 }, { "epoch": 1.3754948777600915, "grad_norm": 2.742246389389038, "learning_rate": 1.8887838075496524e-05, "loss": 0.6471, "step": 8426 }, { "epoch": 1.375658136402596, "grad_norm": 2.70233416557312, "learning_rate": 1.8887544205041e-05, "loss": 0.6698, "step": 8427 }, { "epoch": 1.3758213950451001, "grad_norm": 2.7151339054107666, "learning_rate": 1.8887250298052135e-05, "loss": 0.6714, "step": 8428 }, { "epoch": 1.3759846536876046, "grad_norm": 3.0988926887512207, "learning_rate": 1.888695635453114e-05, "loss": 0.7682, "step": 8429 }, { "epoch": 1.376147912330109, "grad_norm": 2.2736856937408447, "learning_rate": 1.8886662374479225e-05, "loss": 0.557, "step": 8430 }, { "epoch": 1.3763111709726132, "grad_norm": 2.9008848667144775, "learning_rate": 1.88863683578976e-05, "loss": 0.7588, "step": 8431 }, { "epoch": 1.3764744296151177, "grad_norm": 2.314098834991455, "learning_rate": 1.8886074304787466e-05, "loss": 0.5657, "step": 8432 }, { "epoch": 1.3766376882576221, "grad_norm": 2.703071117401123, "learning_rate": 1.8885780215150035e-05, "loss": 0.669, "step": 8433 }, { "epoch": 1.3768009469001266, "grad_norm": 2.4868953227996826, "learning_rate": 1.8885486088986516e-05, "loss": 0.6451, "step": 8434 }, { "epoch": 1.376964205542631, "grad_norm": 2.5971245765686035, "learning_rate": 1.888519192629812e-05, "loss": 0.7245, "step": 8435 }, { "epoch": 1.3771274641851354, "grad_norm": 2.6209757328033447, "learning_rate": 1.8884897727086056e-05, "loss": 0.6471, "step": 8436 }, { "epoch": 1.3772907228276396, "grad_norm": 2.428284168243408, "learning_rate": 1.888460349135153e-05, "loss": 0.6057, "step": 8437 }, { "epoch": 1.377453981470144, "grad_norm": 2.42647385597229, "learning_rate": 1.8884309219095754e-05, "loss": 0.5638, "step": 8438 }, { "epoch": 1.3776172401126485, "grad_norm": 2.719484806060791, "learning_rate": 1.888401491031994e-05, "loss": 0.627, "step": 8439 }, { "epoch": 1.3777804987551527, "grad_norm": 3.1455414295196533, "learning_rate": 1.8883720565025296e-05, "loss": 0.7875, "step": 8440 }, { "epoch": 1.3779437573976572, "grad_norm": 2.2405126094818115, "learning_rate": 1.888342618321303e-05, "loss": 0.5585, "step": 8441 }, { "epoch": 1.3781070160401616, "grad_norm": 2.9904985427856445, "learning_rate": 1.888313176488435e-05, "loss": 0.7277, "step": 8442 }, { "epoch": 1.378270274682666, "grad_norm": 2.7848598957061768, "learning_rate": 1.8882837310040475e-05, "loss": 0.6557, "step": 8443 }, { "epoch": 1.3784335333251705, "grad_norm": 2.772747039794922, "learning_rate": 1.8882542818682608e-05, "loss": 0.6542, "step": 8444 }, { "epoch": 1.3785967919676747, "grad_norm": 2.9547276496887207, "learning_rate": 1.888224829081196e-05, "loss": 0.8217, "step": 8445 }, { "epoch": 1.3787600506101791, "grad_norm": 2.5041496753692627, "learning_rate": 1.888195372642974e-05, "loss": 0.6111, "step": 8446 }, { "epoch": 1.3789233092526836, "grad_norm": 3.3523566722869873, "learning_rate": 1.8881659125537168e-05, "loss": 0.7809, "step": 8447 }, { "epoch": 1.379086567895188, "grad_norm": 1.994910478591919, "learning_rate": 1.8881364488135448e-05, "loss": 0.4309, "step": 8448 }, { "epoch": 1.3792498265376922, "grad_norm": 3.2685232162475586, "learning_rate": 1.888106981422579e-05, "loss": 0.8112, "step": 8449 }, { "epoch": 1.3794130851801967, "grad_norm": 3.108224391937256, "learning_rate": 1.888077510380941e-05, "loss": 0.7659, "step": 8450 }, { "epoch": 1.3795763438227011, "grad_norm": 3.034177780151367, "learning_rate": 1.8880480356887512e-05, "loss": 0.7458, "step": 8451 }, { "epoch": 1.3797396024652056, "grad_norm": 2.9543116092681885, "learning_rate": 1.8880185573461317e-05, "loss": 0.7175, "step": 8452 }, { "epoch": 1.37990286110771, "grad_norm": 2.4799251556396484, "learning_rate": 1.887989075353203e-05, "loss": 0.5583, "step": 8453 }, { "epoch": 1.3800661197502142, "grad_norm": 3.0323729515075684, "learning_rate": 1.8879595897100866e-05, "loss": 0.7796, "step": 8454 }, { "epoch": 1.3802293783927186, "grad_norm": 2.3962178230285645, "learning_rate": 1.887930100416904e-05, "loss": 0.6931, "step": 8455 }, { "epoch": 1.380392637035223, "grad_norm": 2.982229471206665, "learning_rate": 1.8879006074737752e-05, "loss": 0.8505, "step": 8456 }, { "epoch": 1.3805558956777275, "grad_norm": 3.1808066368103027, "learning_rate": 1.887871110880823e-05, "loss": 0.7934, "step": 8457 }, { "epoch": 1.3807191543202317, "grad_norm": 3.4097182750701904, "learning_rate": 1.8878416106381672e-05, "loss": 0.8875, "step": 8458 }, { "epoch": 1.3808824129627362, "grad_norm": 2.6339480876922607, "learning_rate": 1.8878121067459302e-05, "loss": 0.7397, "step": 8459 }, { "epoch": 1.3810456716052406, "grad_norm": 2.8112149238586426, "learning_rate": 1.887782599204233e-05, "loss": 0.6917, "step": 8460 }, { "epoch": 1.381208930247745, "grad_norm": 3.0408613681793213, "learning_rate": 1.8877530880131968e-05, "loss": 0.8154, "step": 8461 }, { "epoch": 1.3813721888902495, "grad_norm": 2.9151523113250732, "learning_rate": 1.8877235731729427e-05, "loss": 0.8475, "step": 8462 }, { "epoch": 1.3815354475327537, "grad_norm": 2.708369731903076, "learning_rate": 1.887694054683592e-05, "loss": 0.7174, "step": 8463 }, { "epoch": 1.3816987061752581, "grad_norm": 2.6972947120666504, "learning_rate": 1.8876645325452665e-05, "loss": 0.7155, "step": 8464 }, { "epoch": 1.3818619648177626, "grad_norm": 2.255972146987915, "learning_rate": 1.887635006758087e-05, "loss": 0.649, "step": 8465 }, { "epoch": 1.3820252234602668, "grad_norm": 2.769896984100342, "learning_rate": 1.8876054773221755e-05, "loss": 0.7484, "step": 8466 }, { "epoch": 1.3821884821027712, "grad_norm": 2.2400662899017334, "learning_rate": 1.887575944237653e-05, "loss": 0.5345, "step": 8467 }, { "epoch": 1.3823517407452757, "grad_norm": 2.646021604537964, "learning_rate": 1.8875464075046408e-05, "loss": 0.7294, "step": 8468 }, { "epoch": 1.38251499938778, "grad_norm": 2.7809016704559326, "learning_rate": 1.8875168671232605e-05, "loss": 0.7359, "step": 8469 }, { "epoch": 1.3826782580302845, "grad_norm": 2.540039300918579, "learning_rate": 1.8874873230936334e-05, "loss": 0.6798, "step": 8470 }, { "epoch": 1.382841516672789, "grad_norm": 2.803168535232544, "learning_rate": 1.8874577754158808e-05, "loss": 0.7619, "step": 8471 }, { "epoch": 1.3830047753152932, "grad_norm": 2.9294495582580566, "learning_rate": 1.887428224090125e-05, "loss": 0.7884, "step": 8472 }, { "epoch": 1.3831680339577976, "grad_norm": 2.7166943550109863, "learning_rate": 1.8873986691164866e-05, "loss": 0.6831, "step": 8473 }, { "epoch": 1.383331292600302, "grad_norm": 3.111940383911133, "learning_rate": 1.887369110495087e-05, "loss": 0.8666, "step": 8474 }, { "epoch": 1.3834945512428063, "grad_norm": 3.0369110107421875, "learning_rate": 1.8873395482260486e-05, "loss": 0.8202, "step": 8475 }, { "epoch": 1.3836578098853107, "grad_norm": 2.6863620281219482, "learning_rate": 1.8873099823094916e-05, "loss": 0.7253, "step": 8476 }, { "epoch": 1.3838210685278152, "grad_norm": 2.4025087356567383, "learning_rate": 1.887280412745539e-05, "loss": 0.6591, "step": 8477 }, { "epoch": 1.3839843271703196, "grad_norm": 2.6435799598693848, "learning_rate": 1.8872508395343115e-05, "loss": 0.8862, "step": 8478 }, { "epoch": 1.384147585812824, "grad_norm": 2.5300586223602295, "learning_rate": 1.8872212626759307e-05, "loss": 0.7558, "step": 8479 }, { "epoch": 1.3843108444553285, "grad_norm": 2.681403398513794, "learning_rate": 1.8871916821705187e-05, "loss": 0.6524, "step": 8480 }, { "epoch": 1.3844741030978327, "grad_norm": 2.798856496810913, "learning_rate": 1.887162098018196e-05, "loss": 0.8166, "step": 8481 }, { "epoch": 1.3846373617403371, "grad_norm": 2.57326078414917, "learning_rate": 1.8871325102190854e-05, "loss": 0.5675, "step": 8482 }, { "epoch": 1.3848006203828416, "grad_norm": 2.2110204696655273, "learning_rate": 1.8871029187733078e-05, "loss": 0.5844, "step": 8483 }, { "epoch": 1.3849638790253458, "grad_norm": 2.356299638748169, "learning_rate": 1.8870733236809852e-05, "loss": 0.6192, "step": 8484 }, { "epoch": 1.3851271376678502, "grad_norm": 2.746338367462158, "learning_rate": 1.887043724942239e-05, "loss": 0.7302, "step": 8485 }, { "epoch": 1.3852903963103547, "grad_norm": 2.9820291996002197, "learning_rate": 1.887014122557191e-05, "loss": 0.8424, "step": 8486 }, { "epoch": 1.385453654952859, "grad_norm": 2.239903688430786, "learning_rate": 1.886984516525963e-05, "loss": 0.5402, "step": 8487 }, { "epoch": 1.3856169135953635, "grad_norm": 2.437025785446167, "learning_rate": 1.8869549068486764e-05, "loss": 0.6907, "step": 8488 }, { "epoch": 1.385780172237868, "grad_norm": 3.024690866470337, "learning_rate": 1.8869252935254533e-05, "loss": 0.7921, "step": 8489 }, { "epoch": 1.3859434308803722, "grad_norm": 2.7944676876068115, "learning_rate": 1.886895676556415e-05, "loss": 0.6864, "step": 8490 }, { "epoch": 1.3861066895228766, "grad_norm": 2.4637954235076904, "learning_rate": 1.8868660559416837e-05, "loss": 0.6031, "step": 8491 }, { "epoch": 1.386269948165381, "grad_norm": 3.20927357673645, "learning_rate": 1.8868364316813807e-05, "loss": 0.845, "step": 8492 }, { "epoch": 1.3864332068078853, "grad_norm": 2.8736228942871094, "learning_rate": 1.8868068037756282e-05, "loss": 0.6963, "step": 8493 }, { "epoch": 1.3865964654503897, "grad_norm": 2.839934825897217, "learning_rate": 1.8867771722245475e-05, "loss": 0.7925, "step": 8494 }, { "epoch": 1.3867597240928942, "grad_norm": 2.959754228591919, "learning_rate": 1.8867475370282608e-05, "loss": 0.7573, "step": 8495 }, { "epoch": 1.3869229827353986, "grad_norm": 2.9485561847686768, "learning_rate": 1.8867178981868898e-05, "loss": 0.7272, "step": 8496 }, { "epoch": 1.387086241377903, "grad_norm": 3.037363290786743, "learning_rate": 1.8866882557005567e-05, "loss": 0.7594, "step": 8497 }, { "epoch": 1.3872495000204073, "grad_norm": 3.2662851810455322, "learning_rate": 1.8866586095693827e-05, "loss": 0.7613, "step": 8498 }, { "epoch": 1.3874127586629117, "grad_norm": 2.9510574340820312, "learning_rate": 1.88662895979349e-05, "loss": 0.6935, "step": 8499 }, { "epoch": 1.3875760173054161, "grad_norm": 2.7797486782073975, "learning_rate": 1.8865993063730003e-05, "loss": 0.6627, "step": 8500 }, { "epoch": 1.3877392759479206, "grad_norm": 3.0808708667755127, "learning_rate": 1.8865696493080356e-05, "loss": 0.6261, "step": 8501 }, { "epoch": 1.3879025345904248, "grad_norm": 2.733208179473877, "learning_rate": 1.886539988598718e-05, "loss": 0.7327, "step": 8502 }, { "epoch": 1.3880657932329292, "grad_norm": 2.5634477138519287, "learning_rate": 1.886510324245169e-05, "loss": 0.6265, "step": 8503 }, { "epoch": 1.3882290518754337, "grad_norm": 2.6619086265563965, "learning_rate": 1.886480656247511e-05, "loss": 0.7579, "step": 8504 }, { "epoch": 1.388392310517938, "grad_norm": 2.5265145301818848, "learning_rate": 1.886450984605866e-05, "loss": 0.6369, "step": 8505 }, { "epoch": 1.3885555691604425, "grad_norm": 2.7177698612213135, "learning_rate": 1.8864213093203552e-05, "loss": 0.5909, "step": 8506 }, { "epoch": 1.3887188278029468, "grad_norm": 2.920650005340576, "learning_rate": 1.8863916303911015e-05, "loss": 0.7324, "step": 8507 }, { "epoch": 1.3888820864454512, "grad_norm": 2.9039552211761475, "learning_rate": 1.886361947818226e-05, "loss": 0.9337, "step": 8508 }, { "epoch": 1.3890453450879556, "grad_norm": 2.909712553024292, "learning_rate": 1.8863322616018517e-05, "loss": 0.7212, "step": 8509 }, { "epoch": 1.3892086037304598, "grad_norm": 3.1437325477600098, "learning_rate": 1.8863025717421003e-05, "loss": 0.9021, "step": 8510 }, { "epoch": 1.3893718623729643, "grad_norm": 2.909698247909546, "learning_rate": 1.8862728782390935e-05, "loss": 0.7943, "step": 8511 }, { "epoch": 1.3895351210154687, "grad_norm": 2.841998338699341, "learning_rate": 1.8862431810929532e-05, "loss": 0.7305, "step": 8512 }, { "epoch": 1.3896983796579732, "grad_norm": 2.4090099334716797, "learning_rate": 1.8862134803038022e-05, "loss": 0.5512, "step": 8513 }, { "epoch": 1.3898616383004776, "grad_norm": 3.6829893589019775, "learning_rate": 1.886183775871762e-05, "loss": 0.8618, "step": 8514 }, { "epoch": 1.390024896942982, "grad_norm": 3.1856560707092285, "learning_rate": 1.8861540677969554e-05, "loss": 0.8377, "step": 8515 }, { "epoch": 1.3901881555854863, "grad_norm": 2.7987005710601807, "learning_rate": 1.886124356079503e-05, "loss": 0.6349, "step": 8516 }, { "epoch": 1.3903514142279907, "grad_norm": 2.5911896228790283, "learning_rate": 1.8860946407195287e-05, "loss": 0.6784, "step": 8517 }, { "epoch": 1.3905146728704951, "grad_norm": 2.716963768005371, "learning_rate": 1.886064921717154e-05, "loss": 0.6789, "step": 8518 }, { "epoch": 1.3906779315129993, "grad_norm": 2.5144236087799072, "learning_rate": 1.886035199072501e-05, "loss": 0.5965, "step": 8519 }, { "epoch": 1.3908411901555038, "grad_norm": 2.948464870452881, "learning_rate": 1.8860054727856916e-05, "loss": 0.6793, "step": 8520 }, { "epoch": 1.3910044487980082, "grad_norm": 2.5296363830566406, "learning_rate": 1.8859757428568484e-05, "loss": 0.6369, "step": 8521 }, { "epoch": 1.3911677074405127, "grad_norm": 2.761904239654541, "learning_rate": 1.8859460092860932e-05, "loss": 0.7547, "step": 8522 }, { "epoch": 1.391330966083017, "grad_norm": 2.518857717514038, "learning_rate": 1.8859162720735486e-05, "loss": 0.6838, "step": 8523 }, { "epoch": 1.3914942247255215, "grad_norm": 3.153944969177246, "learning_rate": 1.8858865312193367e-05, "loss": 0.7924, "step": 8524 }, { "epoch": 1.3916574833680257, "grad_norm": 2.721893072128296, "learning_rate": 1.88585678672358e-05, "loss": 0.8231, "step": 8525 }, { "epoch": 1.3918207420105302, "grad_norm": 2.672147274017334, "learning_rate": 1.8858270385864002e-05, "loss": 0.6972, "step": 8526 }, { "epoch": 1.3919840006530346, "grad_norm": 2.3867881298065186, "learning_rate": 1.8857972868079203e-05, "loss": 0.6955, "step": 8527 }, { "epoch": 1.3921472592955388, "grad_norm": 2.5978376865386963, "learning_rate": 1.885767531388262e-05, "loss": 0.764, "step": 8528 }, { "epoch": 1.3923105179380433, "grad_norm": 2.762538433074951, "learning_rate": 1.885737772327548e-05, "loss": 0.7625, "step": 8529 }, { "epoch": 1.3924737765805477, "grad_norm": 3.0697388648986816, "learning_rate": 1.8857080096259004e-05, "loss": 0.8015, "step": 8530 }, { "epoch": 1.3926370352230522, "grad_norm": 2.9291794300079346, "learning_rate": 1.8856782432834415e-05, "loss": 0.7363, "step": 8531 }, { "epoch": 1.3928002938655566, "grad_norm": 3.0327556133270264, "learning_rate": 1.8856484733002937e-05, "loss": 0.7661, "step": 8532 }, { "epoch": 1.392963552508061, "grad_norm": 2.8313095569610596, "learning_rate": 1.8856186996765796e-05, "loss": 0.6497, "step": 8533 }, { "epoch": 1.3931268111505652, "grad_norm": 2.9722015857696533, "learning_rate": 1.8855889224124217e-05, "loss": 0.8899, "step": 8534 }, { "epoch": 1.3932900697930697, "grad_norm": 3.074253559112549, "learning_rate": 1.8855591415079417e-05, "loss": 0.915, "step": 8535 }, { "epoch": 1.3934533284355741, "grad_norm": 2.5378034114837646, "learning_rate": 1.8855293569632628e-05, "loss": 0.5965, "step": 8536 }, { "epoch": 1.3936165870780783, "grad_norm": 2.5355100631713867, "learning_rate": 1.8854995687785066e-05, "loss": 0.6941, "step": 8537 }, { "epoch": 1.3937798457205828, "grad_norm": 2.4936177730560303, "learning_rate": 1.8854697769537963e-05, "loss": 0.715, "step": 8538 }, { "epoch": 1.3939431043630872, "grad_norm": 2.82589054107666, "learning_rate": 1.885439981489254e-05, "loss": 0.7503, "step": 8539 }, { "epoch": 1.3941063630055917, "grad_norm": 2.6027309894561768, "learning_rate": 1.8854101823850027e-05, "loss": 0.6668, "step": 8540 }, { "epoch": 1.394269621648096, "grad_norm": 2.820791244506836, "learning_rate": 1.885380379641164e-05, "loss": 0.7303, "step": 8541 }, { "epoch": 1.3944328802906003, "grad_norm": 2.662839651107788, "learning_rate": 1.8853505732578608e-05, "loss": 0.6968, "step": 8542 }, { "epoch": 1.3945961389331047, "grad_norm": 2.7177066802978516, "learning_rate": 1.8853207632352162e-05, "loss": 0.6434, "step": 8543 }, { "epoch": 1.3947593975756092, "grad_norm": 2.508180618286133, "learning_rate": 1.885290949573352e-05, "loss": 0.6453, "step": 8544 }, { "epoch": 1.3949226562181136, "grad_norm": 3.290388584136963, "learning_rate": 1.885261132272391e-05, "loss": 0.7726, "step": 8545 }, { "epoch": 1.3950859148606178, "grad_norm": 2.6294827461242676, "learning_rate": 1.8852313113324553e-05, "loss": 0.674, "step": 8546 }, { "epoch": 1.3952491735031223, "grad_norm": 2.4772472381591797, "learning_rate": 1.8852014867536684e-05, "loss": 0.6568, "step": 8547 }, { "epoch": 1.3954124321456267, "grad_norm": 3.3747496604919434, "learning_rate": 1.8851716585361524e-05, "loss": 0.8001, "step": 8548 }, { "epoch": 1.3955756907881312, "grad_norm": 2.5978715419769287, "learning_rate": 1.8851418266800298e-05, "loss": 0.5457, "step": 8549 }, { "epoch": 1.3957389494306356, "grad_norm": 2.58613920211792, "learning_rate": 1.8851119911854236e-05, "loss": 0.6776, "step": 8550 }, { "epoch": 1.3959022080731398, "grad_norm": 2.7795445919036865, "learning_rate": 1.8850821520524562e-05, "loss": 0.7464, "step": 8551 }, { "epoch": 1.3960654667156442, "grad_norm": 2.7252933979034424, "learning_rate": 1.88505230928125e-05, "loss": 0.6806, "step": 8552 }, { "epoch": 1.3962287253581487, "grad_norm": 2.9896092414855957, "learning_rate": 1.8850224628719282e-05, "loss": 0.7862, "step": 8553 }, { "epoch": 1.396391984000653, "grad_norm": 2.7848398685455322, "learning_rate": 1.884992612824613e-05, "loss": 0.6738, "step": 8554 }, { "epoch": 1.3965552426431573, "grad_norm": 2.164189338684082, "learning_rate": 1.8849627591394274e-05, "loss": 0.5339, "step": 8555 }, { "epoch": 1.3967185012856618, "grad_norm": 2.8585197925567627, "learning_rate": 1.8849329018164942e-05, "loss": 0.7223, "step": 8556 }, { "epoch": 1.3968817599281662, "grad_norm": 3.1064116954803467, "learning_rate": 1.884903040855936e-05, "loss": 0.7563, "step": 8557 }, { "epoch": 1.3970450185706706, "grad_norm": 3.001209259033203, "learning_rate": 1.8848731762578754e-05, "loss": 0.6995, "step": 8558 }, { "epoch": 1.397208277213175, "grad_norm": 2.237896203994751, "learning_rate": 1.884843308022435e-05, "loss": 0.5393, "step": 8559 }, { "epoch": 1.3973715358556793, "grad_norm": 3.6330718994140625, "learning_rate": 1.8848134361497385e-05, "loss": 0.967, "step": 8560 }, { "epoch": 1.3975347944981837, "grad_norm": 3.089303493499756, "learning_rate": 1.8847835606399078e-05, "loss": 0.7318, "step": 8561 }, { "epoch": 1.3976980531406882, "grad_norm": 3.1955392360687256, "learning_rate": 1.884753681493066e-05, "loss": 0.795, "step": 8562 }, { "epoch": 1.3978613117831924, "grad_norm": 2.710101366043091, "learning_rate": 1.8847237987093358e-05, "loss": 0.7391, "step": 8563 }, { "epoch": 1.3980245704256968, "grad_norm": 2.488191604614258, "learning_rate": 1.88469391228884e-05, "loss": 0.6246, "step": 8564 }, { "epoch": 1.3981878290682013, "grad_norm": 2.696638584136963, "learning_rate": 1.8846640222317017e-05, "loss": 0.6994, "step": 8565 }, { "epoch": 1.3983510877107057, "grad_norm": 2.3325510025024414, "learning_rate": 1.884634128538044e-05, "loss": 0.5428, "step": 8566 }, { "epoch": 1.3985143463532101, "grad_norm": 3.050022840499878, "learning_rate": 1.8846042312079895e-05, "loss": 0.8069, "step": 8567 }, { "epoch": 1.3986776049957146, "grad_norm": 2.596053123474121, "learning_rate": 1.8845743302416603e-05, "loss": 0.683, "step": 8568 }, { "epoch": 1.3988408636382188, "grad_norm": 2.9019711017608643, "learning_rate": 1.8845444256391806e-05, "loss": 0.7681, "step": 8569 }, { "epoch": 1.3990041222807232, "grad_norm": 2.779907703399658, "learning_rate": 1.8845145174006724e-05, "loss": 0.7214, "step": 8570 }, { "epoch": 1.3991673809232277, "grad_norm": 3.4939022064208984, "learning_rate": 1.884484605526259e-05, "loss": 0.9089, "step": 8571 }, { "epoch": 1.399330639565732, "grad_norm": 2.7769765853881836, "learning_rate": 1.8844546900160637e-05, "loss": 0.7369, "step": 8572 }, { "epoch": 1.3994938982082363, "grad_norm": 2.9855782985687256, "learning_rate": 1.884424770870209e-05, "loss": 0.9358, "step": 8573 }, { "epoch": 1.3996571568507408, "grad_norm": 2.824948310852051, "learning_rate": 1.884394848088818e-05, "loss": 0.8195, "step": 8574 }, { "epoch": 1.3998204154932452, "grad_norm": 2.609586715698242, "learning_rate": 1.8843649216720137e-05, "loss": 0.6855, "step": 8575 }, { "epoch": 1.3999836741357496, "grad_norm": 3.1142489910125732, "learning_rate": 1.884334991619919e-05, "loss": 0.9032, "step": 8576 }, { "epoch": 1.400146932778254, "grad_norm": 2.5075175762176514, "learning_rate": 1.8843050579326574e-05, "loss": 0.6754, "step": 8577 }, { "epoch": 1.4003101914207583, "grad_norm": 2.395791530609131, "learning_rate": 1.8842751206103514e-05, "loss": 0.6516, "step": 8578 }, { "epoch": 1.4004734500632627, "grad_norm": 2.668419361114502, "learning_rate": 1.884245179653124e-05, "loss": 0.6623, "step": 8579 }, { "epoch": 1.4006367087057672, "grad_norm": 3.230081081390381, "learning_rate": 1.8842152350610992e-05, "loss": 0.6592, "step": 8580 }, { "epoch": 1.4007999673482714, "grad_norm": 3.1059484481811523, "learning_rate": 1.884185286834399e-05, "loss": 1.2001, "step": 8581 }, { "epoch": 1.4009632259907758, "grad_norm": 2.995939254760742, "learning_rate": 1.8841553349731468e-05, "loss": 0.7927, "step": 8582 }, { "epoch": 1.4011264846332803, "grad_norm": 2.168890953063965, "learning_rate": 1.8841253794774667e-05, "loss": 0.5677, "step": 8583 }, { "epoch": 1.4012897432757847, "grad_norm": 2.684898853302002, "learning_rate": 1.88409542034748e-05, "loss": 0.6221, "step": 8584 }, { "epoch": 1.4014530019182891, "grad_norm": 2.8229193687438965, "learning_rate": 1.8840654575833114e-05, "loss": 0.7759, "step": 8585 }, { "epoch": 1.4016162605607934, "grad_norm": 2.615192174911499, "learning_rate": 1.8840354911850833e-05, "loss": 0.766, "step": 8586 }, { "epoch": 1.4017795192032978, "grad_norm": 3.018200159072876, "learning_rate": 1.8840055211529192e-05, "loss": 0.8195, "step": 8587 }, { "epoch": 1.4019427778458022, "grad_norm": 2.58854341506958, "learning_rate": 1.8839755474869424e-05, "loss": 0.6638, "step": 8588 }, { "epoch": 1.4021060364883067, "grad_norm": 3.0839407444000244, "learning_rate": 1.8839455701872756e-05, "loss": 0.8915, "step": 8589 }, { "epoch": 1.4022692951308109, "grad_norm": 2.910457134246826, "learning_rate": 1.8839155892540427e-05, "loss": 0.606, "step": 8590 }, { "epoch": 1.4024325537733153, "grad_norm": 2.907970905303955, "learning_rate": 1.8838856046873658e-05, "loss": 0.8029, "step": 8591 }, { "epoch": 1.4025958124158198, "grad_norm": 2.656824827194214, "learning_rate": 1.88385561648737e-05, "loss": 0.6875, "step": 8592 }, { "epoch": 1.4027590710583242, "grad_norm": 2.996393918991089, "learning_rate": 1.8838256246541765e-05, "loss": 0.7899, "step": 8593 }, { "epoch": 1.4029223297008286, "grad_norm": 3.0317842960357666, "learning_rate": 1.88379562918791e-05, "loss": 0.8154, "step": 8594 }, { "epoch": 1.4030855883433329, "grad_norm": 2.958562135696411, "learning_rate": 1.8837656300886937e-05, "loss": 0.7797, "step": 8595 }, { "epoch": 1.4032488469858373, "grad_norm": 2.8991100788116455, "learning_rate": 1.8837356273566502e-05, "loss": 0.7337, "step": 8596 }, { "epoch": 1.4034121056283417, "grad_norm": 2.4666740894317627, "learning_rate": 1.8837056209919033e-05, "loss": 0.6136, "step": 8597 }, { "epoch": 1.4035753642708462, "grad_norm": 2.639909505844116, "learning_rate": 1.8836756109945763e-05, "loss": 0.7391, "step": 8598 }, { "epoch": 1.4037386229133504, "grad_norm": 2.534090280532837, "learning_rate": 1.8836455973647925e-05, "loss": 0.5782, "step": 8599 }, { "epoch": 1.4039018815558548, "grad_norm": 2.2721474170684814, "learning_rate": 1.8836155801026754e-05, "loss": 0.6569, "step": 8600 }, { "epoch": 1.4040651401983593, "grad_norm": 2.4946751594543457, "learning_rate": 1.883585559208348e-05, "loss": 0.6268, "step": 8601 }, { "epoch": 1.4042283988408637, "grad_norm": 2.6452603340148926, "learning_rate": 1.8835555346819344e-05, "loss": 0.6026, "step": 8602 }, { "epoch": 1.4043916574833681, "grad_norm": 2.561840295791626, "learning_rate": 1.8835255065235577e-05, "loss": 0.607, "step": 8603 }, { "epoch": 1.4045549161258724, "grad_norm": 2.82814621925354, "learning_rate": 1.8834954747333408e-05, "loss": 0.6704, "step": 8604 }, { "epoch": 1.4047181747683768, "grad_norm": 2.9040606021881104, "learning_rate": 1.883465439311408e-05, "loss": 0.7642, "step": 8605 }, { "epoch": 1.4048814334108812, "grad_norm": 2.428584575653076, "learning_rate": 1.883435400257882e-05, "loss": 0.5995, "step": 8606 }, { "epoch": 1.4050446920533854, "grad_norm": 2.6603076457977295, "learning_rate": 1.883405357572887e-05, "loss": 0.6255, "step": 8607 }, { "epoch": 1.4052079506958899, "grad_norm": 2.7704951763153076, "learning_rate": 1.883375311256546e-05, "loss": 0.7397, "step": 8608 }, { "epoch": 1.4053712093383943, "grad_norm": 3.166130781173706, "learning_rate": 1.8833452613089827e-05, "loss": 1.584, "step": 8609 }, { "epoch": 1.4055344679808988, "grad_norm": 2.5739428997039795, "learning_rate": 1.8833152077303205e-05, "loss": 0.6705, "step": 8610 }, { "epoch": 1.4056977266234032, "grad_norm": 2.4276034832000732, "learning_rate": 1.883285150520683e-05, "loss": 0.654, "step": 8611 }, { "epoch": 1.4058609852659076, "grad_norm": 3.3149795532226562, "learning_rate": 1.8832550896801937e-05, "loss": 0.7406, "step": 8612 }, { "epoch": 1.4060242439084119, "grad_norm": 2.6424832344055176, "learning_rate": 1.8832250252089764e-05, "loss": 0.5808, "step": 8613 }, { "epoch": 1.4061875025509163, "grad_norm": 2.6348769664764404, "learning_rate": 1.8831949571071547e-05, "loss": 0.6407, "step": 8614 }, { "epoch": 1.4063507611934207, "grad_norm": 2.5174660682678223, "learning_rate": 1.8831648853748515e-05, "loss": 0.6558, "step": 8615 }, { "epoch": 1.406514019835925, "grad_norm": 3.0452065467834473, "learning_rate": 1.8831348100121912e-05, "loss": 0.8729, "step": 8616 }, { "epoch": 1.4066772784784294, "grad_norm": 2.9811317920684814, "learning_rate": 1.8831047310192974e-05, "loss": 0.6595, "step": 8617 }, { "epoch": 1.4068405371209338, "grad_norm": 2.3867669105529785, "learning_rate": 1.8830746483962935e-05, "loss": 0.6353, "step": 8618 }, { "epoch": 1.4070037957634383, "grad_norm": 2.855348587036133, "learning_rate": 1.8830445621433028e-05, "loss": 0.7099, "step": 8619 }, { "epoch": 1.4071670544059427, "grad_norm": 2.3926358222961426, "learning_rate": 1.8830144722604493e-05, "loss": 0.6119, "step": 8620 }, { "epoch": 1.4073303130484471, "grad_norm": 3.588916301727295, "learning_rate": 1.8829843787478574e-05, "loss": 0.6365, "step": 8621 }, { "epoch": 1.4074935716909514, "grad_norm": 3.062690496444702, "learning_rate": 1.8829542816056493e-05, "loss": 0.7937, "step": 8622 }, { "epoch": 1.4076568303334558, "grad_norm": 2.916654586791992, "learning_rate": 1.8829241808339498e-05, "loss": 0.7529, "step": 8623 }, { "epoch": 1.4078200889759602, "grad_norm": 3.0998690128326416, "learning_rate": 1.8828940764328828e-05, "loss": 0.6984, "step": 8624 }, { "epoch": 1.4079833476184644, "grad_norm": 2.7267496585845947, "learning_rate": 1.882863968402571e-05, "loss": 0.6929, "step": 8625 }, { "epoch": 1.4081466062609689, "grad_norm": 2.271296739578247, "learning_rate": 1.882833856743139e-05, "loss": 0.5602, "step": 8626 }, { "epoch": 1.4083098649034733, "grad_norm": 2.7429816722869873, "learning_rate": 1.8828037414547107e-05, "loss": 0.7899, "step": 8627 }, { "epoch": 1.4084731235459778, "grad_norm": 2.42940616607666, "learning_rate": 1.8827736225374093e-05, "loss": 0.6093, "step": 8628 }, { "epoch": 1.4086363821884822, "grad_norm": 2.612596035003662, "learning_rate": 1.8827434999913587e-05, "loss": 0.7035, "step": 8629 }, { "epoch": 1.4087996408309864, "grad_norm": 2.9540374279022217, "learning_rate": 1.8827133738166833e-05, "loss": 0.7503, "step": 8630 }, { "epoch": 1.4089628994734908, "grad_norm": 2.7409181594848633, "learning_rate": 1.8826832440135062e-05, "loss": 0.6768, "step": 8631 }, { "epoch": 1.4091261581159953, "grad_norm": 2.5875377655029297, "learning_rate": 1.8826531105819516e-05, "loss": 0.7116, "step": 8632 }, { "epoch": 1.4092894167584997, "grad_norm": 2.536280393600464, "learning_rate": 1.8826229735221436e-05, "loss": 0.5773, "step": 8633 }, { "epoch": 1.409452675401004, "grad_norm": 2.339614152908325, "learning_rate": 1.8825928328342055e-05, "loss": 0.5635, "step": 8634 }, { "epoch": 1.4096159340435084, "grad_norm": 2.6203196048736572, "learning_rate": 1.8825626885182616e-05, "loss": 0.7697, "step": 8635 }, { "epoch": 1.4097791926860128, "grad_norm": 2.6809403896331787, "learning_rate": 1.882532540574436e-05, "loss": 0.5534, "step": 8636 }, { "epoch": 1.4099424513285173, "grad_norm": 3.0466196537017822, "learning_rate": 1.8825023890028522e-05, "loss": 0.7671, "step": 8637 }, { "epoch": 1.4101057099710217, "grad_norm": 2.723019599914551, "learning_rate": 1.8824722338036344e-05, "loss": 0.7177, "step": 8638 }, { "epoch": 1.410268968613526, "grad_norm": 2.9166648387908936, "learning_rate": 1.8824420749769062e-05, "loss": 0.6192, "step": 8639 }, { "epoch": 1.4104322272560303, "grad_norm": 2.413499593734741, "learning_rate": 1.882411912522792e-05, "loss": 0.655, "step": 8640 }, { "epoch": 1.4105954858985348, "grad_norm": 2.573892116546631, "learning_rate": 1.8823817464414156e-05, "loss": 0.585, "step": 8641 }, { "epoch": 1.4107587445410392, "grad_norm": 2.813671827316284, "learning_rate": 1.8823515767329012e-05, "loss": 0.728, "step": 8642 }, { "epoch": 1.4109220031835434, "grad_norm": 2.7484467029571533, "learning_rate": 1.8823214033973724e-05, "loss": 0.6463, "step": 8643 }, { "epoch": 1.4110852618260479, "grad_norm": 2.549499988555908, "learning_rate": 1.8822912264349535e-05, "loss": 0.6704, "step": 8644 }, { "epoch": 1.4112485204685523, "grad_norm": 2.653170108795166, "learning_rate": 1.8822610458457683e-05, "loss": 0.6064, "step": 8645 }, { "epoch": 1.4114117791110568, "grad_norm": 2.8693900108337402, "learning_rate": 1.8822308616299417e-05, "loss": 0.6477, "step": 8646 }, { "epoch": 1.4115750377535612, "grad_norm": 3.01271390914917, "learning_rate": 1.8822006737875967e-05, "loss": 0.7657, "step": 8647 }, { "epoch": 1.4117382963960654, "grad_norm": 3.3409762382507324, "learning_rate": 1.8821704823188577e-05, "loss": 0.7876, "step": 8648 }, { "epoch": 1.4119015550385698, "grad_norm": 3.16965913772583, "learning_rate": 1.8821402872238494e-05, "loss": 0.7825, "step": 8649 }, { "epoch": 1.4120648136810743, "grad_norm": 2.8239192962646484, "learning_rate": 1.882110088502695e-05, "loss": 0.6704, "step": 8650 }, { "epoch": 1.4122280723235785, "grad_norm": 2.739197015762329, "learning_rate": 1.8820798861555194e-05, "loss": 0.7079, "step": 8651 }, { "epoch": 1.412391330966083, "grad_norm": 2.7248566150665283, "learning_rate": 1.8820496801824464e-05, "loss": 0.678, "step": 8652 }, { "epoch": 1.4125545896085874, "grad_norm": 3.149986982345581, "learning_rate": 1.8820194705836004e-05, "loss": 0.7997, "step": 8653 }, { "epoch": 1.4127178482510918, "grad_norm": 3.011143445968628, "learning_rate": 1.881989257359105e-05, "loss": 0.8404, "step": 8654 }, { "epoch": 1.4128811068935963, "grad_norm": 2.8372371196746826, "learning_rate": 1.8819590405090852e-05, "loss": 0.662, "step": 8655 }, { "epoch": 1.4130443655361007, "grad_norm": 3.084411382675171, "learning_rate": 1.8819288200336646e-05, "loss": 0.803, "step": 8656 }, { "epoch": 1.413207624178605, "grad_norm": 2.9445836544036865, "learning_rate": 1.8818985959329678e-05, "loss": 1.4627, "step": 8657 }, { "epoch": 1.4133708828211093, "grad_norm": 2.618727207183838, "learning_rate": 1.8818683682071187e-05, "loss": 0.6969, "step": 8658 }, { "epoch": 1.4135341414636138, "grad_norm": 2.7914371490478516, "learning_rate": 1.8818381368562417e-05, "loss": 0.7404, "step": 8659 }, { "epoch": 1.413697400106118, "grad_norm": 2.743372917175293, "learning_rate": 1.881807901880461e-05, "loss": 0.7341, "step": 8660 }, { "epoch": 1.4138606587486224, "grad_norm": 2.9014766216278076, "learning_rate": 1.8817776632799015e-05, "loss": 0.8119, "step": 8661 }, { "epoch": 1.4140239173911269, "grad_norm": 2.9703361988067627, "learning_rate": 1.881747421054686e-05, "loss": 0.8781, "step": 8662 }, { "epoch": 1.4141871760336313, "grad_norm": 3.1261496543884277, "learning_rate": 1.8817171752049407e-05, "loss": 0.9038, "step": 8663 }, { "epoch": 1.4143504346761357, "grad_norm": 3.0098702907562256, "learning_rate": 1.881686925730789e-05, "loss": 0.8388, "step": 8664 }, { "epoch": 1.4145136933186402, "grad_norm": 2.7841508388519287, "learning_rate": 1.8816566726323548e-05, "loss": 0.8106, "step": 8665 }, { "epoch": 1.4146769519611444, "grad_norm": 2.148336410522461, "learning_rate": 1.881626415909763e-05, "loss": 0.6978, "step": 8666 }, { "epoch": 1.4148402106036488, "grad_norm": 2.100830078125, "learning_rate": 1.881596155563138e-05, "loss": 0.4558, "step": 8667 }, { "epoch": 1.4150034692461533, "grad_norm": 2.5967986583709717, "learning_rate": 1.8815658915926042e-05, "loss": 0.7723, "step": 8668 }, { "epoch": 1.4151667278886575, "grad_norm": 3.2272844314575195, "learning_rate": 1.881535623998286e-05, "loss": 0.7275, "step": 8669 }, { "epoch": 1.415329986531162, "grad_norm": 3.059480667114258, "learning_rate": 1.8815053527803075e-05, "loss": 0.7473, "step": 8670 }, { "epoch": 1.4154932451736664, "grad_norm": 2.7528769969940186, "learning_rate": 1.8814750779387935e-05, "loss": 0.7285, "step": 8671 }, { "epoch": 1.4156565038161708, "grad_norm": 2.7002980709075928, "learning_rate": 1.8814447994738677e-05, "loss": 0.7068, "step": 8672 }, { "epoch": 1.4158197624586752, "grad_norm": 2.7387373447418213, "learning_rate": 1.8814145173856558e-05, "loss": 0.7871, "step": 8673 }, { "epoch": 1.4159830211011795, "grad_norm": 2.5665535926818848, "learning_rate": 1.8813842316742815e-05, "loss": 0.6783, "step": 8674 }, { "epoch": 1.416146279743684, "grad_norm": 2.945019483566284, "learning_rate": 1.8813539423398694e-05, "loss": 0.6914, "step": 8675 }, { "epoch": 1.4163095383861883, "grad_norm": 2.721745491027832, "learning_rate": 1.881323649382544e-05, "loss": 0.6943, "step": 8676 }, { "epoch": 1.4164727970286928, "grad_norm": 2.225780963897705, "learning_rate": 1.88129335280243e-05, "loss": 0.5469, "step": 8677 }, { "epoch": 1.416636055671197, "grad_norm": 2.867408037185669, "learning_rate": 1.8812630525996515e-05, "loss": 0.813, "step": 8678 }, { "epoch": 1.4167993143137014, "grad_norm": 2.9703409671783447, "learning_rate": 1.8812327487743336e-05, "loss": 0.7257, "step": 8679 }, { "epoch": 1.4169625729562059, "grad_norm": 3.379660129547119, "learning_rate": 1.8812024413266007e-05, "loss": 0.6702, "step": 8680 }, { "epoch": 1.4171258315987103, "grad_norm": 2.9383397102355957, "learning_rate": 1.881172130256577e-05, "loss": 0.7055, "step": 8681 }, { "epoch": 1.4172890902412147, "grad_norm": 2.9300811290740967, "learning_rate": 1.8811418155643876e-05, "loss": 0.8215, "step": 8682 }, { "epoch": 1.417452348883719, "grad_norm": 2.6951818466186523, "learning_rate": 1.881111497250157e-05, "loss": 0.6181, "step": 8683 }, { "epoch": 1.4176156075262234, "grad_norm": 3.0128846168518066, "learning_rate": 1.8810811753140093e-05, "loss": 0.7441, "step": 8684 }, { "epoch": 1.4177788661687278, "grad_norm": 3.0546979904174805, "learning_rate": 1.88105084975607e-05, "loss": 0.9402, "step": 8685 }, { "epoch": 1.4179421248112323, "grad_norm": 2.76751708984375, "learning_rate": 1.8810205205764634e-05, "loss": 0.7724, "step": 8686 }, { "epoch": 1.4181053834537365, "grad_norm": 2.7059028148651123, "learning_rate": 1.880990187775314e-05, "loss": 0.7659, "step": 8687 }, { "epoch": 1.418268642096241, "grad_norm": 2.4345455169677734, "learning_rate": 1.880959851352746e-05, "loss": 0.6191, "step": 8688 }, { "epoch": 1.4184319007387454, "grad_norm": 2.267857789993286, "learning_rate": 1.8809295113088855e-05, "loss": 0.6347, "step": 8689 }, { "epoch": 1.4185951593812498, "grad_norm": 2.3083102703094482, "learning_rate": 1.880899167643856e-05, "loss": 0.5956, "step": 8690 }, { "epoch": 1.4187584180237542, "grad_norm": 3.130315065383911, "learning_rate": 1.880868820357783e-05, "loss": 0.783, "step": 8691 }, { "epoch": 1.4189216766662585, "grad_norm": 3.2144129276275635, "learning_rate": 1.8808384694507905e-05, "loss": 0.8777, "step": 8692 }, { "epoch": 1.419084935308763, "grad_norm": 2.6372640132904053, "learning_rate": 1.8808081149230036e-05, "loss": 0.6521, "step": 8693 }, { "epoch": 1.4192481939512673, "grad_norm": 2.2453603744506836, "learning_rate": 1.8807777567745473e-05, "loss": 0.5257, "step": 8694 }, { "epoch": 1.4194114525937715, "grad_norm": 2.95646071434021, "learning_rate": 1.8807473950055466e-05, "loss": 0.7005, "step": 8695 }, { "epoch": 1.419574711236276, "grad_norm": 2.6209232807159424, "learning_rate": 1.8807170296161255e-05, "loss": 0.5453, "step": 8696 }, { "epoch": 1.4197379698787804, "grad_norm": 2.7686498165130615, "learning_rate": 1.8806866606064093e-05, "loss": 0.7048, "step": 8697 }, { "epoch": 1.4199012285212849, "grad_norm": 2.6195430755615234, "learning_rate": 1.8806562879765228e-05, "loss": 0.6935, "step": 8698 }, { "epoch": 1.4200644871637893, "grad_norm": 2.5327632427215576, "learning_rate": 1.8806259117265905e-05, "loss": 0.5816, "step": 8699 }, { "epoch": 1.4202277458062937, "grad_norm": 2.751572847366333, "learning_rate": 1.880595531856738e-05, "loss": 0.6128, "step": 8700 }, { "epoch": 1.420391004448798, "grad_norm": 3.0064499378204346, "learning_rate": 1.8805651483670898e-05, "loss": 0.7833, "step": 8701 }, { "epoch": 1.4205542630913024, "grad_norm": 3.026139736175537, "learning_rate": 1.8805347612577707e-05, "loss": 0.792, "step": 8702 }, { "epoch": 1.4207175217338068, "grad_norm": 2.49340558052063, "learning_rate": 1.8805043705289056e-05, "loss": 0.6232, "step": 8703 }, { "epoch": 1.420880780376311, "grad_norm": 2.758068799972534, "learning_rate": 1.8804739761806196e-05, "loss": 0.6903, "step": 8704 }, { "epoch": 1.4210440390188155, "grad_norm": 2.471005916595459, "learning_rate": 1.8804435782130377e-05, "loss": 0.6211, "step": 8705 }, { "epoch": 1.42120729766132, "grad_norm": 2.644603729248047, "learning_rate": 1.8804131766262843e-05, "loss": 0.6129, "step": 8706 }, { "epoch": 1.4213705563038244, "grad_norm": 2.870488166809082, "learning_rate": 1.880382771420485e-05, "loss": 0.6763, "step": 8707 }, { "epoch": 1.4215338149463288, "grad_norm": 2.2915022373199463, "learning_rate": 1.8803523625957646e-05, "loss": 0.5733, "step": 8708 }, { "epoch": 1.4216970735888332, "grad_norm": 2.9802303314208984, "learning_rate": 1.880321950152248e-05, "loss": 0.7752, "step": 8709 }, { "epoch": 1.4218603322313375, "grad_norm": 2.8670296669006348, "learning_rate": 1.88029153409006e-05, "loss": 0.7088, "step": 8710 }, { "epoch": 1.422023590873842, "grad_norm": 2.3348047733306885, "learning_rate": 1.8802611144093263e-05, "loss": 0.6093, "step": 8711 }, { "epoch": 1.4221868495163463, "grad_norm": 2.6923177242279053, "learning_rate": 1.8802306911101714e-05, "loss": 0.6607, "step": 8712 }, { "epoch": 1.4223501081588505, "grad_norm": 2.7808785438537598, "learning_rate": 1.8802002641927205e-05, "loss": 0.7019, "step": 8713 }, { "epoch": 1.422513366801355, "grad_norm": 2.687640428543091, "learning_rate": 1.8801698336570986e-05, "loss": 0.5752, "step": 8714 }, { "epoch": 1.4226766254438594, "grad_norm": 3.4330835342407227, "learning_rate": 1.8801393995034306e-05, "loss": 0.8319, "step": 8715 }, { "epoch": 1.4228398840863639, "grad_norm": 2.5958099365234375, "learning_rate": 1.8801089617318424e-05, "loss": 0.7185, "step": 8716 }, { "epoch": 1.4230031427288683, "grad_norm": 2.940826892852783, "learning_rate": 1.880078520342458e-05, "loss": 0.7395, "step": 8717 }, { "epoch": 1.4231664013713727, "grad_norm": 2.7776925563812256, "learning_rate": 1.8800480753354037e-05, "loss": 0.6568, "step": 8718 }, { "epoch": 1.423329660013877, "grad_norm": 2.8028814792633057, "learning_rate": 1.880017626710804e-05, "loss": 0.7088, "step": 8719 }, { "epoch": 1.4234929186563814, "grad_norm": 3.1424076557159424, "learning_rate": 1.879987174468784e-05, "loss": 0.7171, "step": 8720 }, { "epoch": 1.4236561772988858, "grad_norm": 2.8624894618988037, "learning_rate": 1.879956718609469e-05, "loss": 0.6667, "step": 8721 }, { "epoch": 1.42381943594139, "grad_norm": 2.7020821571350098, "learning_rate": 1.879926259132984e-05, "loss": 0.7238, "step": 8722 }, { "epoch": 1.4239826945838945, "grad_norm": 3.3650832176208496, "learning_rate": 1.8798957960394544e-05, "loss": 0.7257, "step": 8723 }, { "epoch": 1.424145953226399, "grad_norm": 2.9828243255615234, "learning_rate": 1.8798653293290055e-05, "loss": 0.7484, "step": 8724 }, { "epoch": 1.4243092118689034, "grad_norm": 2.375664234161377, "learning_rate": 1.879834859001763e-05, "loss": 0.5446, "step": 8725 }, { "epoch": 1.4244724705114078, "grad_norm": 3.0310494899749756, "learning_rate": 1.879804385057851e-05, "loss": 0.7353, "step": 8726 }, { "epoch": 1.424635729153912, "grad_norm": 2.591787576675415, "learning_rate": 1.8797739074973954e-05, "loss": 0.6579, "step": 8727 }, { "epoch": 1.4247989877964164, "grad_norm": 3.055140733718872, "learning_rate": 1.8797434263205216e-05, "loss": 0.7026, "step": 8728 }, { "epoch": 1.4249622464389209, "grad_norm": 2.7225677967071533, "learning_rate": 1.8797129415273548e-05, "loss": 0.6384, "step": 8729 }, { "epoch": 1.4251255050814253, "grad_norm": 2.3177597522735596, "learning_rate": 1.8796824531180204e-05, "loss": 0.6318, "step": 8730 }, { "epoch": 1.4252887637239295, "grad_norm": 2.5855231285095215, "learning_rate": 1.8796519610926434e-05, "loss": 0.6973, "step": 8731 }, { "epoch": 1.425452022366434, "grad_norm": 2.6904265880584717, "learning_rate": 1.8796214654513495e-05, "loss": 0.7749, "step": 8732 }, { "epoch": 1.4256152810089384, "grad_norm": 2.6741950511932373, "learning_rate": 1.8795909661942637e-05, "loss": 0.7206, "step": 8733 }, { "epoch": 1.4257785396514429, "grad_norm": 3.140730619430542, "learning_rate": 1.8795604633215117e-05, "loss": 0.8696, "step": 8734 }, { "epoch": 1.4259417982939473, "grad_norm": 2.821223020553589, "learning_rate": 1.8795299568332182e-05, "loss": 0.6969, "step": 8735 }, { "epoch": 1.4261050569364515, "grad_norm": 2.904172658920288, "learning_rate": 1.87949944672951e-05, "loss": 0.6025, "step": 8736 }, { "epoch": 1.426268315578956, "grad_norm": 2.836186408996582, "learning_rate": 1.8794689330105112e-05, "loss": 0.748, "step": 8737 }, { "epoch": 1.4264315742214604, "grad_norm": 2.8146684169769287, "learning_rate": 1.8794384156763478e-05, "loss": 0.7167, "step": 8738 }, { "epoch": 1.4265948328639646, "grad_norm": 2.604687452316284, "learning_rate": 1.8794078947271453e-05, "loss": 0.6051, "step": 8739 }, { "epoch": 1.426758091506469, "grad_norm": 2.7622921466827393, "learning_rate": 1.8793773701630288e-05, "loss": 0.6432, "step": 8740 }, { "epoch": 1.4269213501489735, "grad_norm": 3.152235507965088, "learning_rate": 1.879346841984124e-05, "loss": 0.7472, "step": 8741 }, { "epoch": 1.427084608791478, "grad_norm": 2.7675445079803467, "learning_rate": 1.8793163101905562e-05, "loss": 0.7679, "step": 8742 }, { "epoch": 1.4272478674339824, "grad_norm": 2.6843528747558594, "learning_rate": 1.8792857747824515e-05, "loss": 0.7682, "step": 8743 }, { "epoch": 1.4274111260764868, "grad_norm": 2.9448580741882324, "learning_rate": 1.879255235759935e-05, "loss": 0.7461, "step": 8744 }, { "epoch": 1.427574384718991, "grad_norm": 2.8524973392486572, "learning_rate": 1.8792246931231317e-05, "loss": 0.68, "step": 8745 }, { "epoch": 1.4277376433614954, "grad_norm": 3.077694892883301, "learning_rate": 1.879194146872168e-05, "loss": 0.6947, "step": 8746 }, { "epoch": 1.4279009020039999, "grad_norm": 2.9013893604278564, "learning_rate": 1.879163597007169e-05, "loss": 0.6865, "step": 8747 }, { "epoch": 1.428064160646504, "grad_norm": 2.6556806564331055, "learning_rate": 1.8791330435282608e-05, "loss": 0.6458, "step": 8748 }, { "epoch": 1.4282274192890085, "grad_norm": 2.625720262527466, "learning_rate": 1.879102486435568e-05, "loss": 0.7408, "step": 8749 }, { "epoch": 1.428390677931513, "grad_norm": 2.7467284202575684, "learning_rate": 1.8790719257292175e-05, "loss": 0.7078, "step": 8750 }, { "epoch": 1.4285539365740174, "grad_norm": 2.875896453857422, "learning_rate": 1.8790413614093338e-05, "loss": 0.7263, "step": 8751 }, { "epoch": 1.4287171952165219, "grad_norm": 3.038170337677002, "learning_rate": 1.879010793476043e-05, "loss": 0.789, "step": 8752 }, { "epoch": 1.4288804538590263, "grad_norm": 2.182317018508911, "learning_rate": 1.878980221929471e-05, "loss": 0.5145, "step": 8753 }, { "epoch": 1.4290437125015305, "grad_norm": 2.191910982131958, "learning_rate": 1.8789496467697427e-05, "loss": 0.5845, "step": 8754 }, { "epoch": 1.429206971144035, "grad_norm": 3.0802676677703857, "learning_rate": 1.878919067996985e-05, "loss": 0.8244, "step": 8755 }, { "epoch": 1.4293702297865394, "grad_norm": 2.3353238105773926, "learning_rate": 1.8788884856113224e-05, "loss": 0.5539, "step": 8756 }, { "epoch": 1.4295334884290436, "grad_norm": 2.773850679397583, "learning_rate": 1.8788578996128813e-05, "loss": 0.7091, "step": 8757 }, { "epoch": 1.429696747071548, "grad_norm": 2.6555991172790527, "learning_rate": 1.8788273100017868e-05, "loss": 0.6792, "step": 8758 }, { "epoch": 1.4298600057140525, "grad_norm": 2.7535204887390137, "learning_rate": 1.8787967167781655e-05, "loss": 0.721, "step": 8759 }, { "epoch": 1.430023264356557, "grad_norm": 2.6779274940490723, "learning_rate": 1.878766119942143e-05, "loss": 0.6823, "step": 8760 }, { "epoch": 1.4301865229990613, "grad_norm": 2.756649971008301, "learning_rate": 1.8787355194938442e-05, "loss": 0.7675, "step": 8761 }, { "epoch": 1.4303497816415658, "grad_norm": 2.685103416442871, "learning_rate": 1.8787049154333956e-05, "loss": 0.7385, "step": 8762 }, { "epoch": 1.43051304028407, "grad_norm": 2.759631872177124, "learning_rate": 1.878674307760923e-05, "loss": 0.6993, "step": 8763 }, { "epoch": 1.4306762989265744, "grad_norm": 2.871577024459839, "learning_rate": 1.878643696476552e-05, "loss": 0.896, "step": 8764 }, { "epoch": 1.4308395575690789, "grad_norm": 4.268462657928467, "learning_rate": 1.8786130815804088e-05, "loss": 0.6632, "step": 8765 }, { "epoch": 1.431002816211583, "grad_norm": 2.280686855316162, "learning_rate": 1.878582463072619e-05, "loss": 0.569, "step": 8766 }, { "epoch": 1.4311660748540875, "grad_norm": 2.611825466156006, "learning_rate": 1.878551840953308e-05, "loss": 0.6987, "step": 8767 }, { "epoch": 1.431329333496592, "grad_norm": 2.57963490486145, "learning_rate": 1.878521215222602e-05, "loss": 0.7059, "step": 8768 }, { "epoch": 1.4314925921390964, "grad_norm": 2.723409652709961, "learning_rate": 1.8784905858806276e-05, "loss": 0.6153, "step": 8769 }, { "epoch": 1.4316558507816008, "grad_norm": 2.5436484813690186, "learning_rate": 1.87845995292751e-05, "loss": 0.6448, "step": 8770 }, { "epoch": 1.431819109424105, "grad_norm": 2.747922420501709, "learning_rate": 1.878429316363375e-05, "loss": 0.7137, "step": 8771 }, { "epoch": 1.4319823680666095, "grad_norm": 2.3509910106658936, "learning_rate": 1.878398676188349e-05, "loss": 0.6145, "step": 8772 }, { "epoch": 1.432145626709114, "grad_norm": 2.7001876831054688, "learning_rate": 1.8783680324025576e-05, "loss": 0.6978, "step": 8773 }, { "epoch": 1.4323088853516184, "grad_norm": 2.6055047512054443, "learning_rate": 1.878337385006127e-05, "loss": 0.6529, "step": 8774 }, { "epoch": 1.4324721439941226, "grad_norm": 3.3639185428619385, "learning_rate": 1.8783067339991828e-05, "loss": 0.8934, "step": 8775 }, { "epoch": 1.432635402636627, "grad_norm": 3.0130345821380615, "learning_rate": 1.8782760793818515e-05, "loss": 0.7126, "step": 8776 }, { "epoch": 1.4327986612791315, "grad_norm": 2.586925983428955, "learning_rate": 1.878245421154259e-05, "loss": 0.6174, "step": 8777 }, { "epoch": 1.432961919921636, "grad_norm": 2.843609571456909, "learning_rate": 1.878214759316531e-05, "loss": 0.7244, "step": 8778 }, { "epoch": 1.4331251785641403, "grad_norm": 3.216601848602295, "learning_rate": 1.8781840938687933e-05, "loss": 0.7251, "step": 8779 }, { "epoch": 1.4332884372066446, "grad_norm": 2.881101608276367, "learning_rate": 1.878153424811173e-05, "loss": 0.705, "step": 8780 }, { "epoch": 1.433451695849149, "grad_norm": 3.2497498989105225, "learning_rate": 1.878122752143795e-05, "loss": 0.8622, "step": 8781 }, { "epoch": 1.4336149544916534, "grad_norm": 3.083629608154297, "learning_rate": 1.8780920758667868e-05, "loss": 0.69, "step": 8782 }, { "epoch": 1.4337782131341577, "grad_norm": 2.3843677043914795, "learning_rate": 1.878061395980273e-05, "loss": 0.6465, "step": 8783 }, { "epoch": 1.433941471776662, "grad_norm": 2.702585458755493, "learning_rate": 1.8780307124843803e-05, "loss": 0.5928, "step": 8784 }, { "epoch": 1.4341047304191665, "grad_norm": 2.8745899200439453, "learning_rate": 1.8780000253792355e-05, "loss": 0.7159, "step": 8785 }, { "epoch": 1.434267989061671, "grad_norm": 2.7348484992980957, "learning_rate": 1.8779693346649633e-05, "loss": 0.6245, "step": 8786 }, { "epoch": 1.4344312477041754, "grad_norm": 2.886167287826538, "learning_rate": 1.8779386403416914e-05, "loss": 0.7327, "step": 8787 }, { "epoch": 1.4345945063466798, "grad_norm": 2.5848751068115234, "learning_rate": 1.8779079424095448e-05, "loss": 0.6621, "step": 8788 }, { "epoch": 1.434757764989184, "grad_norm": 2.4414384365081787, "learning_rate": 1.8778772408686503e-05, "loss": 0.6551, "step": 8789 }, { "epoch": 1.4349210236316885, "grad_norm": 2.474813938140869, "learning_rate": 1.877846535719134e-05, "loss": 0.6426, "step": 8790 }, { "epoch": 1.435084282274193, "grad_norm": 3.081155776977539, "learning_rate": 1.877815826961122e-05, "loss": 0.7799, "step": 8791 }, { "epoch": 1.4352475409166972, "grad_norm": 2.43243670463562, "learning_rate": 1.8777851145947403e-05, "loss": 0.6242, "step": 8792 }, { "epoch": 1.4354107995592016, "grad_norm": 2.7942183017730713, "learning_rate": 1.877754398620116e-05, "loss": 0.7897, "step": 8793 }, { "epoch": 1.435574058201706, "grad_norm": 2.569085121154785, "learning_rate": 1.8777236790373743e-05, "loss": 0.631, "step": 8794 }, { "epoch": 1.4357373168442105, "grad_norm": 3.389893054962158, "learning_rate": 1.8776929558466423e-05, "loss": 0.8428, "step": 8795 }, { "epoch": 1.435900575486715, "grad_norm": 2.3873634338378906, "learning_rate": 1.877662229048046e-05, "loss": 0.6085, "step": 8796 }, { "epoch": 1.4360638341292193, "grad_norm": 2.7575342655181885, "learning_rate": 1.877631498641711e-05, "loss": 0.8058, "step": 8797 }, { "epoch": 1.4362270927717236, "grad_norm": 2.58410906791687, "learning_rate": 1.877600764627765e-05, "loss": 0.6714, "step": 8798 }, { "epoch": 1.436390351414228, "grad_norm": 2.6712913513183594, "learning_rate": 1.8775700270063333e-05, "loss": 0.6243, "step": 8799 }, { "epoch": 1.4365536100567324, "grad_norm": 2.881248712539673, "learning_rate": 1.877539285777543e-05, "loss": 0.7471, "step": 8800 }, { "epoch": 1.4367168686992366, "grad_norm": 3.240130662918091, "learning_rate": 1.87750854094152e-05, "loss": 0.8071, "step": 8801 }, { "epoch": 1.436880127341741, "grad_norm": 2.8532700538635254, "learning_rate": 1.8774777924983904e-05, "loss": 0.78, "step": 8802 }, { "epoch": 1.4370433859842455, "grad_norm": 2.5562214851379395, "learning_rate": 1.877447040448281e-05, "loss": 0.6614, "step": 8803 }, { "epoch": 1.43720664462675, "grad_norm": 2.848268985748291, "learning_rate": 1.877416284791318e-05, "loss": 0.7052, "step": 8804 }, { "epoch": 1.4373699032692544, "grad_norm": 2.669454336166382, "learning_rate": 1.8773855255276283e-05, "loss": 0.7106, "step": 8805 }, { "epoch": 1.4375331619117588, "grad_norm": 2.774531364440918, "learning_rate": 1.877354762657338e-05, "loss": 0.7466, "step": 8806 }, { "epoch": 1.437696420554263, "grad_norm": 2.6665446758270264, "learning_rate": 1.8773239961805734e-05, "loss": 0.7123, "step": 8807 }, { "epoch": 1.4378596791967675, "grad_norm": 2.7723193168640137, "learning_rate": 1.877293226097461e-05, "loss": 0.7387, "step": 8808 }, { "epoch": 1.438022937839272, "grad_norm": 3.108091354370117, "learning_rate": 1.8772624524081276e-05, "loss": 0.9008, "step": 8809 }, { "epoch": 1.4381861964817761, "grad_norm": 2.8785884380340576, "learning_rate": 1.8772316751126995e-05, "loss": 0.7571, "step": 8810 }, { "epoch": 1.4383494551242806, "grad_norm": 2.8688971996307373, "learning_rate": 1.877200894211303e-05, "loss": 0.7603, "step": 8811 }, { "epoch": 1.438512713766785, "grad_norm": 2.4840633869171143, "learning_rate": 1.877170109704065e-05, "loss": 0.594, "step": 8812 }, { "epoch": 1.4386759724092895, "grad_norm": 2.5850718021392822, "learning_rate": 1.877139321591112e-05, "loss": 0.6, "step": 8813 }, { "epoch": 1.438839231051794, "grad_norm": 3.4185667037963867, "learning_rate": 1.87710852987257e-05, "loss": 0.7981, "step": 8814 }, { "epoch": 1.4390024896942981, "grad_norm": 2.9086437225341797, "learning_rate": 1.8770777345485668e-05, "loss": 0.7911, "step": 8815 }, { "epoch": 1.4391657483368026, "grad_norm": 2.7563228607177734, "learning_rate": 1.8770469356192277e-05, "loss": 0.649, "step": 8816 }, { "epoch": 1.439329006979307, "grad_norm": 2.889101028442383, "learning_rate": 1.87701613308468e-05, "loss": 0.6077, "step": 8817 }, { "epoch": 1.4394922656218114, "grad_norm": 2.767441749572754, "learning_rate": 1.8769853269450497e-05, "loss": 0.797, "step": 8818 }, { "epoch": 1.4396555242643156, "grad_norm": 2.5909783840179443, "learning_rate": 1.8769545172004643e-05, "loss": 0.6465, "step": 8819 }, { "epoch": 1.43981878290682, "grad_norm": 2.842961072921753, "learning_rate": 1.8769237038510503e-05, "loss": 0.6765, "step": 8820 }, { "epoch": 1.4399820415493245, "grad_norm": 2.4220938682556152, "learning_rate": 1.8768928868969334e-05, "loss": 0.6323, "step": 8821 }, { "epoch": 1.440145300191829, "grad_norm": 2.7970430850982666, "learning_rate": 1.8768620663382414e-05, "loss": 0.7349, "step": 8822 }, { "epoch": 1.4403085588343334, "grad_norm": 2.8872334957122803, "learning_rate": 1.8768312421751004e-05, "loss": 0.7905, "step": 8823 }, { "epoch": 1.4404718174768376, "grad_norm": 2.8430185317993164, "learning_rate": 1.876800414407637e-05, "loss": 0.6888, "step": 8824 }, { "epoch": 1.440635076119342, "grad_norm": 2.9152491092681885, "learning_rate": 1.8767695830359784e-05, "loss": 0.6834, "step": 8825 }, { "epoch": 1.4407983347618465, "grad_norm": 2.916917562484741, "learning_rate": 1.8767387480602514e-05, "loss": 0.7222, "step": 8826 }, { "epoch": 1.4409615934043507, "grad_norm": 2.4463424682617188, "learning_rate": 1.8767079094805817e-05, "loss": 0.6118, "step": 8827 }, { "epoch": 1.4411248520468551, "grad_norm": 2.8631398677825928, "learning_rate": 1.8766770672970976e-05, "loss": 0.6554, "step": 8828 }, { "epoch": 1.4412881106893596, "grad_norm": 2.8245530128479004, "learning_rate": 1.8766462215099246e-05, "loss": 0.6541, "step": 8829 }, { "epoch": 1.441451369331864, "grad_norm": 2.8673977851867676, "learning_rate": 1.8766153721191903e-05, "loss": 0.7066, "step": 8830 }, { "epoch": 1.4416146279743685, "grad_norm": 2.7506656646728516, "learning_rate": 1.8765845191250213e-05, "loss": 0.5889, "step": 8831 }, { "epoch": 1.441777886616873, "grad_norm": 3.275385618209839, "learning_rate": 1.876553662527544e-05, "loss": 0.7608, "step": 8832 }, { "epoch": 1.441941145259377, "grad_norm": 2.806813955307007, "learning_rate": 1.8765228023268857e-05, "loss": 0.7603, "step": 8833 }, { "epoch": 1.4421044039018815, "grad_norm": 3.066680908203125, "learning_rate": 1.876491938523173e-05, "loss": 0.6789, "step": 8834 }, { "epoch": 1.442267662544386, "grad_norm": 2.96063494682312, "learning_rate": 1.8764610711165333e-05, "loss": 0.7115, "step": 8835 }, { "epoch": 1.4424309211868902, "grad_norm": 2.6303226947784424, "learning_rate": 1.8764302001070925e-05, "loss": 0.6258, "step": 8836 }, { "epoch": 1.4425941798293946, "grad_norm": 3.120736598968506, "learning_rate": 1.8763993254949785e-05, "loss": 0.7613, "step": 8837 }, { "epoch": 1.442757438471899, "grad_norm": 2.9336447715759277, "learning_rate": 1.8763684472803177e-05, "loss": 0.7256, "step": 8838 }, { "epoch": 1.4429206971144035, "grad_norm": 2.9579384326934814, "learning_rate": 1.8763375654632373e-05, "loss": 0.6721, "step": 8839 }, { "epoch": 1.443083955756908, "grad_norm": 2.313920021057129, "learning_rate": 1.8763066800438638e-05, "loss": 0.6494, "step": 8840 }, { "epoch": 1.4432472143994124, "grad_norm": 3.461108446121216, "learning_rate": 1.8762757910223243e-05, "loss": 0.8464, "step": 8841 }, { "epoch": 1.4434104730419166, "grad_norm": 2.467200756072998, "learning_rate": 1.8762448983987465e-05, "loss": 0.6169, "step": 8842 }, { "epoch": 1.443573731684421, "grad_norm": 2.4202322959899902, "learning_rate": 1.876214002173256e-05, "loss": 0.6222, "step": 8843 }, { "epoch": 1.4437369903269255, "grad_norm": 2.667564868927002, "learning_rate": 1.8761831023459813e-05, "loss": 0.6458, "step": 8844 }, { "epoch": 1.4439002489694297, "grad_norm": 2.474978446960449, "learning_rate": 1.876152198917048e-05, "loss": 0.621, "step": 8845 }, { "epoch": 1.4440635076119341, "grad_norm": 2.823281764984131, "learning_rate": 1.8761212918865846e-05, "loss": 0.7786, "step": 8846 }, { "epoch": 1.4442267662544386, "grad_norm": 3.1709084510803223, "learning_rate": 1.876090381254717e-05, "loss": 0.8251, "step": 8847 }, { "epoch": 1.444390024896943, "grad_norm": 2.4516232013702393, "learning_rate": 1.8760594670215726e-05, "loss": 0.5386, "step": 8848 }, { "epoch": 1.4445532835394475, "grad_norm": 2.9289207458496094, "learning_rate": 1.8760285491872788e-05, "loss": 0.7241, "step": 8849 }, { "epoch": 1.444716542181952, "grad_norm": 2.612760543823242, "learning_rate": 1.875997627751962e-05, "loss": 0.5764, "step": 8850 }, { "epoch": 1.444879800824456, "grad_norm": 2.9735043048858643, "learning_rate": 1.8759667027157498e-05, "loss": 0.6463, "step": 8851 }, { "epoch": 1.4450430594669605, "grad_norm": 3.0690300464630127, "learning_rate": 1.8759357740787695e-05, "loss": 0.8797, "step": 8852 }, { "epoch": 1.445206318109465, "grad_norm": 2.618190288543701, "learning_rate": 1.875904841841148e-05, "loss": 0.605, "step": 8853 }, { "epoch": 1.4453695767519692, "grad_norm": 2.618384838104248, "learning_rate": 1.8758739060030123e-05, "loss": 0.5792, "step": 8854 }, { "epoch": 1.4455328353944736, "grad_norm": 2.3219563961029053, "learning_rate": 1.8758429665644896e-05, "loss": 0.6045, "step": 8855 }, { "epoch": 1.445696094036978, "grad_norm": 2.5618579387664795, "learning_rate": 1.8758120235257073e-05, "loss": 0.5718, "step": 8856 }, { "epoch": 1.4458593526794825, "grad_norm": 2.5431172847747803, "learning_rate": 1.8757810768867926e-05, "loss": 0.6692, "step": 8857 }, { "epoch": 1.446022611321987, "grad_norm": 2.7768468856811523, "learning_rate": 1.8757501266478724e-05, "loss": 0.7296, "step": 8858 }, { "epoch": 1.4461858699644912, "grad_norm": 2.998157024383545, "learning_rate": 1.875719172809074e-05, "loss": 0.8284, "step": 8859 }, { "epoch": 1.4463491286069956, "grad_norm": 2.8362605571746826, "learning_rate": 1.875688215370525e-05, "loss": 0.6522, "step": 8860 }, { "epoch": 1.4465123872495, "grad_norm": 3.1892611980438232, "learning_rate": 1.875657254332352e-05, "loss": 0.8322, "step": 8861 }, { "epoch": 1.4466756458920045, "grad_norm": 2.817430019378662, "learning_rate": 1.875626289694683e-05, "loss": 0.6453, "step": 8862 }, { "epoch": 1.4468389045345087, "grad_norm": 2.672071933746338, "learning_rate": 1.8755953214576454e-05, "loss": 0.6946, "step": 8863 }, { "epoch": 1.4470021631770131, "grad_norm": 3.227264881134033, "learning_rate": 1.875564349621365e-05, "loss": 0.714, "step": 8864 }, { "epoch": 1.4471654218195176, "grad_norm": 2.6855571269989014, "learning_rate": 1.875533374185971e-05, "loss": 0.6557, "step": 8865 }, { "epoch": 1.447328680462022, "grad_norm": 2.986248016357422, "learning_rate": 1.8755023951515897e-05, "loss": 0.7081, "step": 8866 }, { "epoch": 1.4474919391045264, "grad_norm": 3.1154372692108154, "learning_rate": 1.8754714125183484e-05, "loss": 0.7827, "step": 8867 }, { "epoch": 1.4476551977470307, "grad_norm": 2.545712947845459, "learning_rate": 1.875440426286375e-05, "loss": 0.6119, "step": 8868 }, { "epoch": 1.447818456389535, "grad_norm": 2.9407873153686523, "learning_rate": 1.8754094364557964e-05, "loss": 0.7318, "step": 8869 }, { "epoch": 1.4479817150320395, "grad_norm": 2.8174970149993896, "learning_rate": 1.87537844302674e-05, "loss": 0.7087, "step": 8870 }, { "epoch": 1.448144973674544, "grad_norm": 3.339479684829712, "learning_rate": 1.8753474459993336e-05, "loss": 0.8433, "step": 8871 }, { "epoch": 1.4483082323170482, "grad_norm": 2.6763715744018555, "learning_rate": 1.8753164453737044e-05, "loss": 0.6577, "step": 8872 }, { "epoch": 1.4484714909595526, "grad_norm": 2.38642954826355, "learning_rate": 1.8752854411499796e-05, "loss": 0.5635, "step": 8873 }, { "epoch": 1.448634749602057, "grad_norm": 2.758486270904541, "learning_rate": 1.8752544333282868e-05, "loss": 0.6319, "step": 8874 }, { "epoch": 1.4487980082445615, "grad_norm": 2.1484534740448, "learning_rate": 1.8752234219087538e-05, "loss": 0.4653, "step": 8875 }, { "epoch": 1.448961266887066, "grad_norm": 2.628416061401367, "learning_rate": 1.8751924068915076e-05, "loss": 0.7132, "step": 8876 }, { "epoch": 1.4491245255295702, "grad_norm": 2.547182321548462, "learning_rate": 1.875161388276676e-05, "loss": 0.6753, "step": 8877 }, { "epoch": 1.4492877841720746, "grad_norm": 2.8103792667388916, "learning_rate": 1.8751303660643865e-05, "loss": 0.8747, "step": 8878 }, { "epoch": 1.449451042814579, "grad_norm": 1.9919852018356323, "learning_rate": 1.8750993402547663e-05, "loss": 0.5449, "step": 8879 }, { "epoch": 1.4496143014570833, "grad_norm": 2.179137945175171, "learning_rate": 1.875068310847943e-05, "loss": 0.5961, "step": 8880 }, { "epoch": 1.4497775600995877, "grad_norm": 2.8977267742156982, "learning_rate": 1.8750372778440445e-05, "loss": 0.8019, "step": 8881 }, { "epoch": 1.4499408187420921, "grad_norm": 3.0227622985839844, "learning_rate": 1.8750062412431985e-05, "loss": 0.6939, "step": 8882 }, { "epoch": 1.4501040773845966, "grad_norm": 2.9497687816619873, "learning_rate": 1.8749752010455317e-05, "loss": 0.7656, "step": 8883 }, { "epoch": 1.450267336027101, "grad_norm": 3.4316580295562744, "learning_rate": 1.8749441572511723e-05, "loss": 0.7391, "step": 8884 }, { "epoch": 1.4504305946696054, "grad_norm": 2.8727288246154785, "learning_rate": 1.8749131098602483e-05, "loss": 0.7772, "step": 8885 }, { "epoch": 1.4505938533121097, "grad_norm": 2.7573883533477783, "learning_rate": 1.8748820588728865e-05, "loss": 0.704, "step": 8886 }, { "epoch": 1.450757111954614, "grad_norm": 2.76261305809021, "learning_rate": 1.874851004289215e-05, "loss": 0.6961, "step": 8887 }, { "epoch": 1.4509203705971185, "grad_norm": 2.7693066596984863, "learning_rate": 1.874819946109361e-05, "loss": 0.7519, "step": 8888 }, { "epoch": 1.4510836292396228, "grad_norm": 2.974341630935669, "learning_rate": 1.8747888843334528e-05, "loss": 0.7853, "step": 8889 }, { "epoch": 1.4512468878821272, "grad_norm": 2.994544506072998, "learning_rate": 1.874757818961618e-05, "loss": 0.8434, "step": 8890 }, { "epoch": 1.4514101465246316, "grad_norm": 2.736112117767334, "learning_rate": 1.8747267499939837e-05, "loss": 0.6146, "step": 8891 }, { "epoch": 1.451573405167136, "grad_norm": 2.513166666030884, "learning_rate": 1.8746956774306785e-05, "loss": 0.6318, "step": 8892 }, { "epoch": 1.4517366638096405, "grad_norm": 2.988250255584717, "learning_rate": 1.8746646012718295e-05, "loss": 0.8187, "step": 8893 }, { "epoch": 1.451899922452145, "grad_norm": 2.8971428871154785, "learning_rate": 1.8746335215175645e-05, "loss": 0.9092, "step": 8894 }, { "epoch": 1.4520631810946492, "grad_norm": 2.4901669025421143, "learning_rate": 1.8746024381680112e-05, "loss": 0.6683, "step": 8895 }, { "epoch": 1.4522264397371536, "grad_norm": 2.927525520324707, "learning_rate": 1.8745713512232978e-05, "loss": 0.6573, "step": 8896 }, { "epoch": 1.452389698379658, "grad_norm": 2.8086464405059814, "learning_rate": 1.8745402606835515e-05, "loss": 0.7651, "step": 8897 }, { "epoch": 1.4525529570221622, "grad_norm": 2.307202100753784, "learning_rate": 1.874509166548901e-05, "loss": 0.5393, "step": 8898 }, { "epoch": 1.4527162156646667, "grad_norm": 2.6966588497161865, "learning_rate": 1.874478068819473e-05, "loss": 0.7062, "step": 8899 }, { "epoch": 1.4528794743071711, "grad_norm": 2.8679816722869873, "learning_rate": 1.8744469674953957e-05, "loss": 0.6744, "step": 8900 }, { "epoch": 1.4530427329496756, "grad_norm": 2.89109206199646, "learning_rate": 1.8744158625767976e-05, "loss": 0.7166, "step": 8901 }, { "epoch": 1.45320599159218, "grad_norm": 2.6869475841522217, "learning_rate": 1.8743847540638057e-05, "loss": 0.6581, "step": 8902 }, { "epoch": 1.4533692502346842, "grad_norm": 3.1060609817504883, "learning_rate": 1.8743536419565485e-05, "loss": 0.7274, "step": 8903 }, { "epoch": 1.4535325088771887, "grad_norm": 2.846013069152832, "learning_rate": 1.8743225262551535e-05, "loss": 0.6963, "step": 8904 }, { "epoch": 1.453695767519693, "grad_norm": 2.795283317565918, "learning_rate": 1.8742914069597487e-05, "loss": 0.8145, "step": 8905 }, { "epoch": 1.4538590261621975, "grad_norm": 2.8546671867370605, "learning_rate": 1.874260284070462e-05, "loss": 0.8084, "step": 8906 }, { "epoch": 1.4540222848047017, "grad_norm": 2.9376931190490723, "learning_rate": 1.8742291575874215e-05, "loss": 0.6675, "step": 8907 }, { "epoch": 1.4541855434472062, "grad_norm": 2.5751800537109375, "learning_rate": 1.8741980275107553e-05, "loss": 0.645, "step": 8908 }, { "epoch": 1.4543488020897106, "grad_norm": 2.924793243408203, "learning_rate": 1.8741668938405908e-05, "loss": 0.6997, "step": 8909 }, { "epoch": 1.454512060732215, "grad_norm": 2.576450824737549, "learning_rate": 1.874135756577056e-05, "loss": 0.627, "step": 8910 }, { "epoch": 1.4546753193747195, "grad_norm": 2.551053762435913, "learning_rate": 1.8741046157202798e-05, "loss": 0.6373, "step": 8911 }, { "epoch": 1.4548385780172237, "grad_norm": 2.855924606323242, "learning_rate": 1.874073471270389e-05, "loss": 0.7928, "step": 8912 }, { "epoch": 1.4550018366597282, "grad_norm": 2.3212215900421143, "learning_rate": 1.8740423232275127e-05, "loss": 0.6261, "step": 8913 }, { "epoch": 1.4551650953022326, "grad_norm": 2.347390651702881, "learning_rate": 1.874011171591778e-05, "loss": 0.54, "step": 8914 }, { "epoch": 1.455328353944737, "grad_norm": 2.513314962387085, "learning_rate": 1.8739800163633138e-05, "loss": 0.6298, "step": 8915 }, { "epoch": 1.4554916125872412, "grad_norm": 2.808607578277588, "learning_rate": 1.8739488575422474e-05, "loss": 0.7123, "step": 8916 }, { "epoch": 1.4556548712297457, "grad_norm": 2.7780797481536865, "learning_rate": 1.8739176951287073e-05, "loss": 0.7233, "step": 8917 }, { "epoch": 1.4558181298722501, "grad_norm": 2.945086717605591, "learning_rate": 1.873886529122822e-05, "loss": 0.8515, "step": 8918 }, { "epoch": 1.4559813885147546, "grad_norm": 3.076748847961426, "learning_rate": 1.8738553595247188e-05, "loss": 0.9197, "step": 8919 }, { "epoch": 1.456144647157259, "grad_norm": 2.410794258117676, "learning_rate": 1.8738241863345262e-05, "loss": 0.5367, "step": 8920 }, { "epoch": 1.4563079057997632, "grad_norm": 3.262564182281494, "learning_rate": 1.8737930095523718e-05, "loss": 0.83, "step": 8921 }, { "epoch": 1.4564711644422677, "grad_norm": 2.504798650741577, "learning_rate": 1.873761829178385e-05, "loss": 0.6043, "step": 8922 }, { "epoch": 1.456634423084772, "grad_norm": 2.8717987537384033, "learning_rate": 1.873730645212693e-05, "loss": 0.7138, "step": 8923 }, { "epoch": 1.4567976817272763, "grad_norm": 3.010824680328369, "learning_rate": 1.873699457655424e-05, "loss": 0.7056, "step": 8924 }, { "epoch": 1.4569609403697807, "grad_norm": 2.7076313495635986, "learning_rate": 1.873668266506707e-05, "loss": 0.6743, "step": 8925 }, { "epoch": 1.4571241990122852, "grad_norm": 2.949047088623047, "learning_rate": 1.8736370717666693e-05, "loss": 0.6791, "step": 8926 }, { "epoch": 1.4572874576547896, "grad_norm": 2.5845847129821777, "learning_rate": 1.8736058734354397e-05, "loss": 0.7299, "step": 8927 }, { "epoch": 1.457450716297294, "grad_norm": 3.0799107551574707, "learning_rate": 1.873574671513146e-05, "loss": 0.8598, "step": 8928 }, { "epoch": 1.4576139749397985, "grad_norm": 2.7183074951171875, "learning_rate": 1.873543465999917e-05, "loss": 0.8059, "step": 8929 }, { "epoch": 1.4577772335823027, "grad_norm": 2.659329891204834, "learning_rate": 1.87351225689588e-05, "loss": 0.6913, "step": 8930 }, { "epoch": 1.4579404922248071, "grad_norm": 3.17417573928833, "learning_rate": 1.873481044201164e-05, "loss": 0.7454, "step": 8931 }, { "epoch": 1.4581037508673116, "grad_norm": 2.5181260108947754, "learning_rate": 1.873449827915898e-05, "loss": 0.5391, "step": 8932 }, { "epoch": 1.4582670095098158, "grad_norm": 3.0067834854125977, "learning_rate": 1.873418608040209e-05, "loss": 0.8172, "step": 8933 }, { "epoch": 1.4584302681523202, "grad_norm": 2.6633312702178955, "learning_rate": 1.8733873845742262e-05, "loss": 0.7107, "step": 8934 }, { "epoch": 1.4585935267948247, "grad_norm": 2.7388486862182617, "learning_rate": 1.8733561575180772e-05, "loss": 0.6241, "step": 8935 }, { "epoch": 1.4587567854373291, "grad_norm": 2.777090549468994, "learning_rate": 1.873324926871891e-05, "loss": 0.6779, "step": 8936 }, { "epoch": 1.4589200440798336, "grad_norm": 2.760826349258423, "learning_rate": 1.8732936926357962e-05, "loss": 0.763, "step": 8937 }, { "epoch": 1.459083302722338, "grad_norm": 2.6597862243652344, "learning_rate": 1.8732624548099204e-05, "loss": 0.6213, "step": 8938 }, { "epoch": 1.4592465613648422, "grad_norm": 2.4420580863952637, "learning_rate": 1.8732312133943925e-05, "loss": 0.6071, "step": 8939 }, { "epoch": 1.4594098200073466, "grad_norm": 2.4907102584838867, "learning_rate": 1.8731999683893406e-05, "loss": 0.6145, "step": 8940 }, { "epoch": 1.459573078649851, "grad_norm": 2.295746088027954, "learning_rate": 1.8731687197948937e-05, "loss": 0.6149, "step": 8941 }, { "epoch": 1.4597363372923553, "grad_norm": 3.0132839679718018, "learning_rate": 1.8731374676111796e-05, "loss": 0.7975, "step": 8942 }, { "epoch": 1.4598995959348597, "grad_norm": 2.3746018409729004, "learning_rate": 1.873106211838327e-05, "loss": 0.5803, "step": 8943 }, { "epoch": 1.4600628545773642, "grad_norm": 2.7739181518554688, "learning_rate": 1.8730749524764647e-05, "loss": 0.71, "step": 8944 }, { "epoch": 1.4602261132198686, "grad_norm": 2.7664308547973633, "learning_rate": 1.8730436895257207e-05, "loss": 0.6916, "step": 8945 }, { "epoch": 1.460389371862373, "grad_norm": 2.830911636352539, "learning_rate": 1.8730124229862242e-05, "loss": 0.7567, "step": 8946 }, { "epoch": 1.4605526305048773, "grad_norm": 2.6976969242095947, "learning_rate": 1.8729811528581026e-05, "loss": 0.6494, "step": 8947 }, { "epoch": 1.4607158891473817, "grad_norm": 2.8708043098449707, "learning_rate": 1.8729498791414855e-05, "loss": 0.7527, "step": 8948 }, { "epoch": 1.4608791477898861, "grad_norm": 2.2081098556518555, "learning_rate": 1.872918601836501e-05, "loss": 0.5846, "step": 8949 }, { "epoch": 1.4610424064323906, "grad_norm": 2.4160525798797607, "learning_rate": 1.8728873209432778e-05, "loss": 0.5503, "step": 8950 }, { "epoch": 1.4612056650748948, "grad_norm": 3.506385087966919, "learning_rate": 1.8728560364619443e-05, "loss": 0.8952, "step": 8951 }, { "epoch": 1.4613689237173992, "grad_norm": 2.4490442276000977, "learning_rate": 1.8728247483926294e-05, "loss": 0.5563, "step": 8952 }, { "epoch": 1.4615321823599037, "grad_norm": 3.1201610565185547, "learning_rate": 1.8727934567354615e-05, "loss": 0.826, "step": 8953 }, { "epoch": 1.4616954410024081, "grad_norm": 2.574164390563965, "learning_rate": 1.8727621614905693e-05, "loss": 0.6272, "step": 8954 }, { "epoch": 1.4618586996449126, "grad_norm": 3.0545666217803955, "learning_rate": 1.8727308626580813e-05, "loss": 0.781, "step": 8955 }, { "epoch": 1.4620219582874168, "grad_norm": 2.8604111671447754, "learning_rate": 1.8726995602381262e-05, "loss": 0.7213, "step": 8956 }, { "epoch": 1.4621852169299212, "grad_norm": 2.726022243499756, "learning_rate": 1.872668254230833e-05, "loss": 0.7218, "step": 8957 }, { "epoch": 1.4623484755724256, "grad_norm": 2.9582467079162598, "learning_rate": 1.8726369446363296e-05, "loss": 0.7258, "step": 8958 }, { "epoch": 1.46251173421493, "grad_norm": 2.101804256439209, "learning_rate": 1.8726056314547457e-05, "loss": 0.5529, "step": 8959 }, { "epoch": 1.4626749928574343, "grad_norm": 2.455571413040161, "learning_rate": 1.8725743146862093e-05, "loss": 0.6112, "step": 8960 }, { "epoch": 1.4628382514999387, "grad_norm": 3.0459537506103516, "learning_rate": 1.8725429943308496e-05, "loss": 0.7258, "step": 8961 }, { "epoch": 1.4630015101424432, "grad_norm": 2.886909008026123, "learning_rate": 1.872511670388795e-05, "loss": 0.7176, "step": 8962 }, { "epoch": 1.4631647687849476, "grad_norm": 2.66650390625, "learning_rate": 1.872480342860174e-05, "loss": 0.6445, "step": 8963 }, { "epoch": 1.463328027427452, "grad_norm": 2.666135787963867, "learning_rate": 1.8724490117451163e-05, "loss": 0.61, "step": 8964 }, { "epoch": 1.4634912860699563, "grad_norm": 3.0341238975524902, "learning_rate": 1.87241767704375e-05, "loss": 0.7347, "step": 8965 }, { "epoch": 1.4636545447124607, "grad_norm": 2.9875826835632324, "learning_rate": 1.872386338756204e-05, "loss": 0.7155, "step": 8966 }, { "epoch": 1.4638178033549651, "grad_norm": 2.6458370685577393, "learning_rate": 1.8723549968826072e-05, "loss": 0.6474, "step": 8967 }, { "epoch": 1.4639810619974694, "grad_norm": 2.8187801837921143, "learning_rate": 1.8723236514230882e-05, "loss": 0.7372, "step": 8968 }, { "epoch": 1.4641443206399738, "grad_norm": 2.8644907474517822, "learning_rate": 1.8722923023777763e-05, "loss": 0.7229, "step": 8969 }, { "epoch": 1.4643075792824782, "grad_norm": 2.8115105628967285, "learning_rate": 1.8722609497468e-05, "loss": 0.7174, "step": 8970 }, { "epoch": 1.4644708379249827, "grad_norm": 3.1124532222747803, "learning_rate": 1.8722295935302886e-05, "loss": 0.7219, "step": 8971 }, { "epoch": 1.464634096567487, "grad_norm": 2.92691707611084, "learning_rate": 1.87219823372837e-05, "loss": 0.7588, "step": 8972 }, { "epoch": 1.4647973552099915, "grad_norm": 2.717709541320801, "learning_rate": 1.8721668703411745e-05, "loss": 0.7847, "step": 8973 }, { "epoch": 1.4649606138524958, "grad_norm": 2.9148147106170654, "learning_rate": 1.87213550336883e-05, "loss": 0.6923, "step": 8974 }, { "epoch": 1.4651238724950002, "grad_norm": 2.862379789352417, "learning_rate": 1.8721041328114656e-05, "loss": 0.7884, "step": 8975 }, { "epoch": 1.4652871311375046, "grad_norm": 2.8710994720458984, "learning_rate": 1.8720727586692108e-05, "loss": 0.726, "step": 8976 }, { "epoch": 1.4654503897800089, "grad_norm": 2.890040636062622, "learning_rate": 1.872041380942194e-05, "loss": 0.666, "step": 8977 }, { "epoch": 1.4656136484225133, "grad_norm": 2.578061819076538, "learning_rate": 1.8720099996305446e-05, "loss": 0.6509, "step": 8978 }, { "epoch": 1.4657769070650177, "grad_norm": 2.7899169921875, "learning_rate": 1.871978614734391e-05, "loss": 0.6094, "step": 8979 }, { "epoch": 1.4659401657075222, "grad_norm": 3.119946002960205, "learning_rate": 1.8719472262538624e-05, "loss": 0.6991, "step": 8980 }, { "epoch": 1.4661034243500266, "grad_norm": 3.4608049392700195, "learning_rate": 1.8719158341890884e-05, "loss": 0.7096, "step": 8981 }, { "epoch": 1.466266682992531, "grad_norm": 2.6798183917999268, "learning_rate": 1.8718844385401975e-05, "loss": 0.6896, "step": 8982 }, { "epoch": 1.4664299416350353, "grad_norm": 2.2292966842651367, "learning_rate": 1.8718530393073187e-05, "loss": 0.5228, "step": 8983 }, { "epoch": 1.4665932002775397, "grad_norm": 3.000948667526245, "learning_rate": 1.8718216364905816e-05, "loss": 0.7275, "step": 8984 }, { "epoch": 1.4667564589200441, "grad_norm": 2.8863935470581055, "learning_rate": 1.871790230090115e-05, "loss": 0.7435, "step": 8985 }, { "epoch": 1.4669197175625484, "grad_norm": 3.003655195236206, "learning_rate": 1.8717588201060474e-05, "loss": 0.8614, "step": 8986 }, { "epoch": 1.4670829762050528, "grad_norm": 3.083264112472534, "learning_rate": 1.8717274065385092e-05, "loss": 0.6784, "step": 8987 }, { "epoch": 1.4672462348475572, "grad_norm": 2.812582015991211, "learning_rate": 1.8716959893876283e-05, "loss": 0.7701, "step": 8988 }, { "epoch": 1.4674094934900617, "grad_norm": 3.0732455253601074, "learning_rate": 1.8716645686535345e-05, "loss": 0.775, "step": 8989 }, { "epoch": 1.467572752132566, "grad_norm": 2.9174365997314453, "learning_rate": 1.8716331443363564e-05, "loss": 0.7277, "step": 8990 }, { "epoch": 1.4677360107750705, "grad_norm": 2.9098470211029053, "learning_rate": 1.8716017164362242e-05, "loss": 0.6992, "step": 8991 }, { "epoch": 1.4678992694175748, "grad_norm": 2.55149507522583, "learning_rate": 1.8715702849532663e-05, "loss": 0.6972, "step": 8992 }, { "epoch": 1.4680625280600792, "grad_norm": 2.7467291355133057, "learning_rate": 1.871538849887612e-05, "loss": 0.7329, "step": 8993 }, { "epoch": 1.4682257867025836, "grad_norm": 2.5509564876556396, "learning_rate": 1.87150741123939e-05, "loss": 0.6538, "step": 8994 }, { "epoch": 1.4683890453450879, "grad_norm": 2.8336942195892334, "learning_rate": 1.8714759690087307e-05, "loss": 0.8578, "step": 8995 }, { "epoch": 1.4685523039875923, "grad_norm": 2.5888993740081787, "learning_rate": 1.8714445231957626e-05, "loss": 0.7034, "step": 8996 }, { "epoch": 1.4687155626300967, "grad_norm": 2.5092062950134277, "learning_rate": 1.8714130738006152e-05, "loss": 0.5772, "step": 8997 }, { "epoch": 1.4688788212726012, "grad_norm": 3.394679069519043, "learning_rate": 1.8713816208234177e-05, "loss": 0.7827, "step": 8998 }, { "epoch": 1.4690420799151056, "grad_norm": 2.610619068145752, "learning_rate": 1.871350164264299e-05, "loss": 0.7478, "step": 8999 }, { "epoch": 1.4692053385576098, "grad_norm": 2.508166790008545, "learning_rate": 1.8713187041233896e-05, "loss": 0.584, "step": 9000 }, { "epoch": 1.4693685972001143, "grad_norm": 3.044116497039795, "learning_rate": 1.871287240400817e-05, "loss": 0.9531, "step": 9001 }, { "epoch": 1.4695318558426187, "grad_norm": 2.875784158706665, "learning_rate": 1.8712557730967126e-05, "loss": 0.6853, "step": 9002 }, { "epoch": 1.4696951144851231, "grad_norm": 2.8608574867248535, "learning_rate": 1.871224302211204e-05, "loss": 0.7644, "step": 9003 }, { "epoch": 1.4698583731276273, "grad_norm": 2.449650764465332, "learning_rate": 1.8711928277444215e-05, "loss": 0.7019, "step": 9004 }, { "epoch": 1.4700216317701318, "grad_norm": 2.9502177238464355, "learning_rate": 1.8711613496964945e-05, "loss": 0.912, "step": 9005 }, { "epoch": 1.4701848904126362, "grad_norm": 2.6874184608459473, "learning_rate": 1.8711298680675517e-05, "loss": 0.6463, "step": 9006 }, { "epoch": 1.4703481490551407, "grad_norm": 2.8953630924224854, "learning_rate": 1.871098382857723e-05, "loss": 0.663, "step": 9007 }, { "epoch": 1.470511407697645, "grad_norm": 2.7868754863739014, "learning_rate": 1.871066894067138e-05, "loss": 0.6784, "step": 9008 }, { "epoch": 1.4706746663401493, "grad_norm": 2.2092788219451904, "learning_rate": 1.8710354016959257e-05, "loss": 0.5902, "step": 9009 }, { "epoch": 1.4708379249826538, "grad_norm": 2.5960395336151123, "learning_rate": 1.8710039057442158e-05, "loss": 0.6923, "step": 9010 }, { "epoch": 1.4710011836251582, "grad_norm": 3.0296754837036133, "learning_rate": 1.8709724062121376e-05, "loss": 0.9071, "step": 9011 }, { "epoch": 1.4711644422676624, "grad_norm": 2.8574061393737793, "learning_rate": 1.870940903099821e-05, "loss": 0.7537, "step": 9012 }, { "epoch": 1.4713277009101668, "grad_norm": 2.7529380321502686, "learning_rate": 1.870909396407395e-05, "loss": 0.7365, "step": 9013 }, { "epoch": 1.4714909595526713, "grad_norm": 3.008723497390747, "learning_rate": 1.8708778861349894e-05, "loss": 0.6549, "step": 9014 }, { "epoch": 1.4716542181951757, "grad_norm": 3.0066423416137695, "learning_rate": 1.8708463722827337e-05, "loss": 0.9106, "step": 9015 }, { "epoch": 1.4718174768376802, "grad_norm": 3.6018543243408203, "learning_rate": 1.8708148548507572e-05, "loss": 0.7489, "step": 9016 }, { "epoch": 1.4719807354801846, "grad_norm": 2.6922545433044434, "learning_rate": 1.8707833338391896e-05, "loss": 0.7212, "step": 9017 }, { "epoch": 1.4721439941226888, "grad_norm": 2.5723605155944824, "learning_rate": 1.8707518092481608e-05, "loss": 0.6468, "step": 9018 }, { "epoch": 1.4723072527651933, "grad_norm": 3.095410108566284, "learning_rate": 1.8707202810778e-05, "loss": 0.7891, "step": 9019 }, { "epoch": 1.4724705114076977, "grad_norm": 3.0366079807281494, "learning_rate": 1.870688749328237e-05, "loss": 0.852, "step": 9020 }, { "epoch": 1.472633770050202, "grad_norm": 2.9731686115264893, "learning_rate": 1.8706572139996015e-05, "loss": 0.6754, "step": 9021 }, { "epoch": 1.4727970286927063, "grad_norm": 2.6767873764038086, "learning_rate": 1.8706256750920224e-05, "loss": 0.608, "step": 9022 }, { "epoch": 1.4729602873352108, "grad_norm": 2.5867152214050293, "learning_rate": 1.8705941326056302e-05, "loss": 0.6719, "step": 9023 }, { "epoch": 1.4731235459777152, "grad_norm": 2.596259593963623, "learning_rate": 1.870562586540554e-05, "loss": 0.6967, "step": 9024 }, { "epoch": 1.4732868046202197, "grad_norm": 2.6440494060516357, "learning_rate": 1.8705310368969242e-05, "loss": 0.6937, "step": 9025 }, { "epoch": 1.473450063262724, "grad_norm": 3.0491065979003906, "learning_rate": 1.8704994836748695e-05, "loss": 0.8253, "step": 9026 }, { "epoch": 1.4736133219052283, "grad_norm": 2.4876277446746826, "learning_rate": 1.8704679268745204e-05, "loss": 0.6659, "step": 9027 }, { "epoch": 1.4737765805477328, "grad_norm": 3.010560989379883, "learning_rate": 1.8704363664960063e-05, "loss": 0.802, "step": 9028 }, { "epoch": 1.4739398391902372, "grad_norm": 3.157153367996216, "learning_rate": 1.870404802539457e-05, "loss": 0.8369, "step": 9029 }, { "epoch": 1.4741030978327414, "grad_norm": 2.628541946411133, "learning_rate": 1.8703732350050022e-05, "loss": 0.6409, "step": 9030 }, { "epoch": 1.4742663564752458, "grad_norm": 2.918520212173462, "learning_rate": 1.8703416638927714e-05, "loss": 0.768, "step": 9031 }, { "epoch": 1.4744296151177503, "grad_norm": 2.629163980484009, "learning_rate": 1.870310089202895e-05, "loss": 0.6313, "step": 9032 }, { "epoch": 1.4745928737602547, "grad_norm": 3.161059617996216, "learning_rate": 1.8702785109355025e-05, "loss": 0.8412, "step": 9033 }, { "epoch": 1.4747561324027592, "grad_norm": 3.32006573677063, "learning_rate": 1.8702469290907236e-05, "loss": 0.7635, "step": 9034 }, { "epoch": 1.4749193910452636, "grad_norm": 2.88002872467041, "learning_rate": 1.8702153436686877e-05, "loss": 0.6639, "step": 9035 }, { "epoch": 1.4750826496877678, "grad_norm": 3.046957492828369, "learning_rate": 1.870183754669526e-05, "loss": 0.8369, "step": 9036 }, { "epoch": 1.4752459083302722, "grad_norm": 2.8493812084198, "learning_rate": 1.8701521620933668e-05, "loss": 0.8377, "step": 9037 }, { "epoch": 1.4754091669727767, "grad_norm": 2.7162630558013916, "learning_rate": 1.8701205659403406e-05, "loss": 0.8468, "step": 9038 }, { "epoch": 1.475572425615281, "grad_norm": 2.339230537414551, "learning_rate": 1.8700889662105776e-05, "loss": 0.6463, "step": 9039 }, { "epoch": 1.4757356842577853, "grad_norm": 3.0225698947906494, "learning_rate": 1.8700573629042074e-05, "loss": 0.8729, "step": 9040 }, { "epoch": 1.4758989429002898, "grad_norm": 2.5733416080474854, "learning_rate": 1.87002575602136e-05, "loss": 0.7097, "step": 9041 }, { "epoch": 1.4760622015427942, "grad_norm": 2.4115073680877686, "learning_rate": 1.8699941455621647e-05, "loss": 0.6255, "step": 9042 }, { "epoch": 1.4762254601852987, "grad_norm": 2.9573557376861572, "learning_rate": 1.8699625315267524e-05, "loss": 0.8342, "step": 9043 }, { "epoch": 1.4763887188278029, "grad_norm": 2.5731284618377686, "learning_rate": 1.8699309139152524e-05, "loss": 0.6715, "step": 9044 }, { "epoch": 1.4765519774703073, "grad_norm": 2.425985813140869, "learning_rate": 1.869899292727795e-05, "loss": 0.7325, "step": 9045 }, { "epoch": 1.4767152361128117, "grad_norm": 2.8986005783081055, "learning_rate": 1.8698676679645104e-05, "loss": 0.7651, "step": 9046 }, { "epoch": 1.4768784947553162, "grad_norm": 2.747736930847168, "learning_rate": 1.8698360396255278e-05, "loss": 0.7882, "step": 9047 }, { "epoch": 1.4770417533978204, "grad_norm": 2.293044328689575, "learning_rate": 1.869804407710978e-05, "loss": 0.5543, "step": 9048 }, { "epoch": 1.4772050120403248, "grad_norm": 3.2852256298065186, "learning_rate": 1.8697727722209903e-05, "loss": 0.6992, "step": 9049 }, { "epoch": 1.4773682706828293, "grad_norm": 2.6571733951568604, "learning_rate": 1.8697411331556958e-05, "loss": 0.7379, "step": 9050 }, { "epoch": 1.4775315293253337, "grad_norm": 2.775416851043701, "learning_rate": 1.8697094905152233e-05, "loss": 0.7659, "step": 9051 }, { "epoch": 1.4776947879678382, "grad_norm": 2.8230273723602295, "learning_rate": 1.8696778442997036e-05, "loss": 0.8885, "step": 9052 }, { "epoch": 1.4778580466103424, "grad_norm": 2.708627223968506, "learning_rate": 1.869646194509267e-05, "loss": 0.7609, "step": 9053 }, { "epoch": 1.4780213052528468, "grad_norm": 2.339343547821045, "learning_rate": 1.869614541144043e-05, "loss": 0.5858, "step": 9054 }, { "epoch": 1.4781845638953512, "grad_norm": 2.7960853576660156, "learning_rate": 1.8695828842041617e-05, "loss": 0.7896, "step": 9055 }, { "epoch": 1.4783478225378555, "grad_norm": 2.595285654067993, "learning_rate": 1.8695512236897538e-05, "loss": 0.6945, "step": 9056 }, { "epoch": 1.47851108118036, "grad_norm": 2.514214515686035, "learning_rate": 1.8695195596009495e-05, "loss": 0.6852, "step": 9057 }, { "epoch": 1.4786743398228643, "grad_norm": 2.524182081222534, "learning_rate": 1.8694878919378783e-05, "loss": 0.7179, "step": 9058 }, { "epoch": 1.4788375984653688, "grad_norm": 2.712602376937866, "learning_rate": 1.869456220700671e-05, "loss": 0.7178, "step": 9059 }, { "epoch": 1.4790008571078732, "grad_norm": 2.5661439895629883, "learning_rate": 1.8694245458894568e-05, "loss": 0.6567, "step": 9060 }, { "epoch": 1.4791641157503777, "grad_norm": 2.6178770065307617, "learning_rate": 1.869392867504367e-05, "loss": 0.5863, "step": 9061 }, { "epoch": 1.4793273743928819, "grad_norm": 3.096959114074707, "learning_rate": 1.8693611855455314e-05, "loss": 0.8004, "step": 9062 }, { "epoch": 1.4794906330353863, "grad_norm": 2.9915409088134766, "learning_rate": 1.86932950001308e-05, "loss": 0.7006, "step": 9063 }, { "epoch": 1.4796538916778907, "grad_norm": 2.406081438064575, "learning_rate": 1.869297810907144e-05, "loss": 0.5931, "step": 9064 }, { "epoch": 1.479817150320395, "grad_norm": 2.2493410110473633, "learning_rate": 1.8692661182278524e-05, "loss": 0.575, "step": 9065 }, { "epoch": 1.4799804089628994, "grad_norm": 2.7632555961608887, "learning_rate": 1.8692344219753358e-05, "loss": 0.774, "step": 9066 }, { "epoch": 1.4801436676054038, "grad_norm": 2.663769006729126, "learning_rate": 1.869202722149725e-05, "loss": 0.6892, "step": 9067 }, { "epoch": 1.4803069262479083, "grad_norm": 3.345625638961792, "learning_rate": 1.86917101875115e-05, "loss": 0.7677, "step": 9068 }, { "epoch": 1.4804701848904127, "grad_norm": 2.9501984119415283, "learning_rate": 1.869139311779741e-05, "loss": 0.8338, "step": 9069 }, { "epoch": 1.4806334435329171, "grad_norm": 3.0113799571990967, "learning_rate": 1.8691076012356285e-05, "loss": 0.7614, "step": 9070 }, { "epoch": 1.4807967021754214, "grad_norm": 3.0570199489593506, "learning_rate": 1.8690758871189428e-05, "loss": 0.7352, "step": 9071 }, { "epoch": 1.4809599608179258, "grad_norm": 2.790678024291992, "learning_rate": 1.8690441694298143e-05, "loss": 0.768, "step": 9072 }, { "epoch": 1.4811232194604302, "grad_norm": 3.033236503601074, "learning_rate": 1.8690124481683735e-05, "loss": 0.9428, "step": 9073 }, { "epoch": 1.4812864781029345, "grad_norm": 2.5565333366394043, "learning_rate": 1.8689807233347505e-05, "loss": 0.7755, "step": 9074 }, { "epoch": 1.481449736745439, "grad_norm": 3.067319631576538, "learning_rate": 1.8689489949290764e-05, "loss": 0.7884, "step": 9075 }, { "epoch": 1.4816129953879433, "grad_norm": 2.630197048187256, "learning_rate": 1.8689172629514806e-05, "loss": 0.6342, "step": 9076 }, { "epoch": 1.4817762540304478, "grad_norm": 3.331796169281006, "learning_rate": 1.868885527402094e-05, "loss": 0.8513, "step": 9077 }, { "epoch": 1.4819395126729522, "grad_norm": 2.61523699760437, "learning_rate": 1.868853788281047e-05, "loss": 0.7472, "step": 9078 }, { "epoch": 1.4821027713154566, "grad_norm": 2.4648172855377197, "learning_rate": 1.8688220455884702e-05, "loss": 0.5561, "step": 9079 }, { "epoch": 1.4822660299579609, "grad_norm": 2.9264843463897705, "learning_rate": 1.8687902993244945e-05, "loss": 0.6486, "step": 9080 }, { "epoch": 1.4824292886004653, "grad_norm": 3.153245449066162, "learning_rate": 1.8687585494892494e-05, "loss": 0.7441, "step": 9081 }, { "epoch": 1.4825925472429697, "grad_norm": 2.6611735820770264, "learning_rate": 1.8687267960828662e-05, "loss": 0.6764, "step": 9082 }, { "epoch": 1.482755805885474, "grad_norm": 2.799720048904419, "learning_rate": 1.8686950391054748e-05, "loss": 0.6969, "step": 9083 }, { "epoch": 1.4829190645279784, "grad_norm": 2.958038091659546, "learning_rate": 1.8686632785572066e-05, "loss": 0.7724, "step": 9084 }, { "epoch": 1.4830823231704828, "grad_norm": 2.3558738231658936, "learning_rate": 1.8686315144381914e-05, "loss": 0.5524, "step": 9085 }, { "epoch": 1.4832455818129873, "grad_norm": 2.7113771438598633, "learning_rate": 1.86859974674856e-05, "loss": 0.6367, "step": 9086 }, { "epoch": 1.4834088404554917, "grad_norm": 2.5427870750427246, "learning_rate": 1.8685679754884432e-05, "loss": 0.637, "step": 9087 }, { "epoch": 1.483572099097996, "grad_norm": 2.9513931274414062, "learning_rate": 1.8685362006579716e-05, "loss": 0.8032, "step": 9088 }, { "epoch": 1.4837353577405004, "grad_norm": 2.7875068187713623, "learning_rate": 1.8685044222572752e-05, "loss": 1.354, "step": 9089 }, { "epoch": 1.4838986163830048, "grad_norm": 2.091982126235962, "learning_rate": 1.868472640286485e-05, "loss": 0.6188, "step": 9090 }, { "epoch": 1.4840618750255092, "grad_norm": 2.610659599304199, "learning_rate": 1.868440854745732e-05, "loss": 0.6324, "step": 9091 }, { "epoch": 1.4842251336680135, "grad_norm": 2.7120540142059326, "learning_rate": 1.8684090656351464e-05, "loss": 0.6664, "step": 9092 }, { "epoch": 1.484388392310518, "grad_norm": 2.7380318641662598, "learning_rate": 1.8683772729548592e-05, "loss": 0.6731, "step": 9093 }, { "epoch": 1.4845516509530223, "grad_norm": 2.661198616027832, "learning_rate": 1.868345476705001e-05, "loss": 0.6961, "step": 9094 }, { "epoch": 1.4847149095955268, "grad_norm": 3.011173963546753, "learning_rate": 1.8683136768857023e-05, "loss": 0.8634, "step": 9095 }, { "epoch": 1.4848781682380312, "grad_norm": 2.7878990173339844, "learning_rate": 1.8682818734970938e-05, "loss": 0.688, "step": 9096 }, { "epoch": 1.4850414268805354, "grad_norm": 2.242277145385742, "learning_rate": 1.8682500665393064e-05, "loss": 0.4915, "step": 9097 }, { "epoch": 1.4852046855230399, "grad_norm": 2.9294919967651367, "learning_rate": 1.868218256012471e-05, "loss": 0.7094, "step": 9098 }, { "epoch": 1.4853679441655443, "grad_norm": 3.400613784790039, "learning_rate": 1.8681864419167183e-05, "loss": 0.8944, "step": 9099 }, { "epoch": 1.4855312028080487, "grad_norm": 2.9332423210144043, "learning_rate": 1.8681546242521785e-05, "loss": 0.753, "step": 9100 }, { "epoch": 1.485694461450553, "grad_norm": 3.2379229068756104, "learning_rate": 1.868122803018983e-05, "loss": 0.725, "step": 9101 }, { "epoch": 1.4858577200930574, "grad_norm": 2.775562286376953, "learning_rate": 1.8680909782172626e-05, "loss": 0.6265, "step": 9102 }, { "epoch": 1.4860209787355618, "grad_norm": 2.8914942741394043, "learning_rate": 1.868059149847148e-05, "loss": 0.924, "step": 9103 }, { "epoch": 1.4861842373780663, "grad_norm": 2.7203402519226074, "learning_rate": 1.86802731790877e-05, "loss": 0.6756, "step": 9104 }, { "epoch": 1.4863474960205707, "grad_norm": 2.5586235523223877, "learning_rate": 1.8679954824022594e-05, "loss": 0.5786, "step": 9105 }, { "epoch": 1.486510754663075, "grad_norm": 3.615809440612793, "learning_rate": 1.8679636433277473e-05, "loss": 0.7427, "step": 9106 }, { "epoch": 1.4866740133055794, "grad_norm": 2.9381356239318848, "learning_rate": 1.867931800685364e-05, "loss": 0.8118, "step": 9107 }, { "epoch": 1.4868372719480838, "grad_norm": 2.4733524322509766, "learning_rate": 1.867899954475241e-05, "loss": 0.6067, "step": 9108 }, { "epoch": 1.487000530590588, "grad_norm": 2.708075761795044, "learning_rate": 1.867868104697509e-05, "loss": 0.6737, "step": 9109 }, { "epoch": 1.4871637892330924, "grad_norm": 2.980031728744507, "learning_rate": 1.867836251352299e-05, "loss": 0.7092, "step": 9110 }, { "epoch": 1.4873270478755969, "grad_norm": 2.6701722145080566, "learning_rate": 1.867804394439742e-05, "loss": 0.6713, "step": 9111 }, { "epoch": 1.4874903065181013, "grad_norm": 2.33882999420166, "learning_rate": 1.8677725339599684e-05, "loss": 0.5718, "step": 9112 }, { "epoch": 1.4876535651606058, "grad_norm": 2.9486184120178223, "learning_rate": 1.86774066991311e-05, "loss": 0.7432, "step": 9113 }, { "epoch": 1.4878168238031102, "grad_norm": 3.0163702964782715, "learning_rate": 1.8677088022992972e-05, "loss": 0.7712, "step": 9114 }, { "epoch": 1.4879800824456144, "grad_norm": 2.9226605892181396, "learning_rate": 1.867676931118661e-05, "loss": 0.8497, "step": 9115 }, { "epoch": 1.4881433410881189, "grad_norm": 2.279125690460205, "learning_rate": 1.8676450563713328e-05, "loss": 0.5334, "step": 9116 }, { "epoch": 1.4883065997306233, "grad_norm": 2.9135549068450928, "learning_rate": 1.867613178057443e-05, "loss": 0.7815, "step": 9117 }, { "epoch": 1.4884698583731275, "grad_norm": 3.4165990352630615, "learning_rate": 1.8675812961771235e-05, "loss": 0.9787, "step": 9118 }, { "epoch": 1.488633117015632, "grad_norm": 2.963934898376465, "learning_rate": 1.8675494107305045e-05, "loss": 0.6602, "step": 9119 }, { "epoch": 1.4887963756581364, "grad_norm": 3.070918560028076, "learning_rate": 1.8675175217177176e-05, "loss": 0.8337, "step": 9120 }, { "epoch": 1.4889596343006408, "grad_norm": 3.0833919048309326, "learning_rate": 1.867485629138894e-05, "loss": 0.8402, "step": 9121 }, { "epoch": 1.4891228929431453, "grad_norm": 2.5668442249298096, "learning_rate": 1.867453732994164e-05, "loss": 0.6357, "step": 9122 }, { "epoch": 1.4892861515856497, "grad_norm": 2.365528106689453, "learning_rate": 1.8674218332836597e-05, "loss": 0.5588, "step": 9123 }, { "epoch": 1.489449410228154, "grad_norm": 2.9423227310180664, "learning_rate": 1.8673899300075118e-05, "loss": 0.7393, "step": 9124 }, { "epoch": 1.4896126688706584, "grad_norm": 2.8065483570098877, "learning_rate": 1.867358023165851e-05, "loss": 0.8244, "step": 9125 }, { "epoch": 1.4897759275131628, "grad_norm": 3.110910415649414, "learning_rate": 1.867326112758809e-05, "loss": 0.9273, "step": 9126 }, { "epoch": 1.489939186155667, "grad_norm": 2.611293077468872, "learning_rate": 1.8672941987865173e-05, "loss": 0.6259, "step": 9127 }, { "epoch": 1.4901024447981714, "grad_norm": 2.8989996910095215, "learning_rate": 1.867262281249106e-05, "loss": 0.8691, "step": 9128 }, { "epoch": 1.4902657034406759, "grad_norm": 2.86999773979187, "learning_rate": 1.8672303601467073e-05, "loss": 0.6818, "step": 9129 }, { "epoch": 1.4904289620831803, "grad_norm": 2.5032055377960205, "learning_rate": 1.8671984354794522e-05, "loss": 0.5885, "step": 9130 }, { "epoch": 1.4905922207256848, "grad_norm": 3.1497602462768555, "learning_rate": 1.8671665072474714e-05, "loss": 0.8685, "step": 9131 }, { "epoch": 1.490755479368189, "grad_norm": 2.6231770515441895, "learning_rate": 1.867134575450897e-05, "loss": 0.6023, "step": 9132 }, { "epoch": 1.4909187380106934, "grad_norm": 2.813837766647339, "learning_rate": 1.8671026400898592e-05, "loss": 0.7777, "step": 9133 }, { "epoch": 1.4910819966531978, "grad_norm": 2.9699223041534424, "learning_rate": 1.86707070116449e-05, "loss": 0.676, "step": 9134 }, { "epoch": 1.4912452552957023, "grad_norm": 3.1505370140075684, "learning_rate": 1.8670387586749207e-05, "loss": 0.863, "step": 9135 }, { "epoch": 1.4914085139382065, "grad_norm": 3.0468056201934814, "learning_rate": 1.8670068126212827e-05, "loss": 0.7499, "step": 9136 }, { "epoch": 1.491571772580711, "grad_norm": 3.136169195175171, "learning_rate": 1.866974863003707e-05, "loss": 0.8765, "step": 9137 }, { "epoch": 1.4917350312232154, "grad_norm": 3.747854232788086, "learning_rate": 1.8669429098223246e-05, "loss": 0.8367, "step": 9138 }, { "epoch": 1.4918982898657198, "grad_norm": 2.402216672897339, "learning_rate": 1.8669109530772673e-05, "loss": 0.5338, "step": 9139 }, { "epoch": 1.4920615485082243, "grad_norm": 2.648803234100342, "learning_rate": 1.866878992768667e-05, "loss": 0.6285, "step": 9140 }, { "epoch": 1.4922248071507285, "grad_norm": 2.7133772373199463, "learning_rate": 1.8668470288966537e-05, "loss": 0.5882, "step": 9141 }, { "epoch": 1.492388065793233, "grad_norm": 2.724733352661133, "learning_rate": 1.86681506146136e-05, "loss": 0.7814, "step": 9142 }, { "epoch": 1.4925513244357373, "grad_norm": 2.530137062072754, "learning_rate": 1.8667830904629168e-05, "loss": 0.5093, "step": 9143 }, { "epoch": 1.4927145830782418, "grad_norm": 2.6555793285369873, "learning_rate": 1.8667511159014556e-05, "loss": 0.69, "step": 9144 }, { "epoch": 1.492877841720746, "grad_norm": 3.2642769813537598, "learning_rate": 1.866719137777108e-05, "loss": 0.9069, "step": 9145 }, { "epoch": 1.4930411003632504, "grad_norm": 2.5728278160095215, "learning_rate": 1.866687156090005e-05, "loss": 0.5988, "step": 9146 }, { "epoch": 1.4932043590057549, "grad_norm": 2.5978903770446777, "learning_rate": 1.8666551708402786e-05, "loss": 0.6748, "step": 9147 }, { "epoch": 1.4933676176482593, "grad_norm": 2.9240450859069824, "learning_rate": 1.86662318202806e-05, "loss": 0.8085, "step": 9148 }, { "epoch": 1.4935308762907638, "grad_norm": 2.6336307525634766, "learning_rate": 1.8665911896534807e-05, "loss": 0.7041, "step": 9149 }, { "epoch": 1.493694134933268, "grad_norm": 2.948453426361084, "learning_rate": 1.866559193716672e-05, "loss": 0.7284, "step": 9150 }, { "epoch": 1.4938573935757724, "grad_norm": 2.7290380001068115, "learning_rate": 1.866527194217766e-05, "loss": 0.7138, "step": 9151 }, { "epoch": 1.4940206522182768, "grad_norm": 2.8914167881011963, "learning_rate": 1.866495191156894e-05, "loss": 0.7674, "step": 9152 }, { "epoch": 1.494183910860781, "grad_norm": 2.93035626411438, "learning_rate": 1.8664631845341872e-05, "loss": 0.7541, "step": 9153 }, { "epoch": 1.4943471695032855, "grad_norm": 2.6660916805267334, "learning_rate": 1.8664311743497773e-05, "loss": 0.7062, "step": 9154 }, { "epoch": 1.49451042814579, "grad_norm": 2.9500346183776855, "learning_rate": 1.8663991606037964e-05, "loss": 0.7555, "step": 9155 }, { "epoch": 1.4946736867882944, "grad_norm": 2.292527914047241, "learning_rate": 1.8663671432963754e-05, "loss": 0.5572, "step": 9156 }, { "epoch": 1.4948369454307988, "grad_norm": 2.8056859970092773, "learning_rate": 1.8663351224276463e-05, "loss": 0.6829, "step": 9157 }, { "epoch": 1.4950002040733033, "grad_norm": 3.164186954498291, "learning_rate": 1.8663030979977407e-05, "loss": 0.8125, "step": 9158 }, { "epoch": 1.4951634627158075, "grad_norm": 2.8309717178344727, "learning_rate": 1.86627107000679e-05, "loss": 0.7166, "step": 9159 }, { "epoch": 1.495326721358312, "grad_norm": 2.8472647666931152, "learning_rate": 1.866239038454926e-05, "loss": 0.8335, "step": 9160 }, { "epoch": 1.4954899800008163, "grad_norm": 2.930634021759033, "learning_rate": 1.8662070033422805e-05, "loss": 0.7693, "step": 9161 }, { "epoch": 1.4956532386433206, "grad_norm": 3.2618892192840576, "learning_rate": 1.866174964668985e-05, "loss": 0.8222, "step": 9162 }, { "epoch": 1.495816497285825, "grad_norm": 2.884629011154175, "learning_rate": 1.8661429224351716e-05, "loss": 0.6899, "step": 9163 }, { "epoch": 1.4959797559283294, "grad_norm": 2.8275985717773438, "learning_rate": 1.8661108766409714e-05, "loss": 0.6505, "step": 9164 }, { "epoch": 1.4961430145708339, "grad_norm": 2.7622156143188477, "learning_rate": 1.8660788272865165e-05, "loss": 0.6864, "step": 9165 }, { "epoch": 1.4963062732133383, "grad_norm": 2.4674813747406006, "learning_rate": 1.8660467743719385e-05, "loss": 0.7479, "step": 9166 }, { "epoch": 1.4964695318558427, "grad_norm": 2.633479356765747, "learning_rate": 1.8660147178973694e-05, "loss": 0.7121, "step": 9167 }, { "epoch": 1.496632790498347, "grad_norm": 2.8674941062927246, "learning_rate": 1.8659826578629404e-05, "loss": 0.7945, "step": 9168 }, { "epoch": 1.4967960491408514, "grad_norm": 2.516115665435791, "learning_rate": 1.865950594268784e-05, "loss": 0.6855, "step": 9169 }, { "epoch": 1.4969593077833558, "grad_norm": 2.6442360877990723, "learning_rate": 1.865918527115032e-05, "loss": 0.6433, "step": 9170 }, { "epoch": 1.49712256642586, "grad_norm": 2.768008232116699, "learning_rate": 1.8658864564018152e-05, "loss": 0.7478, "step": 9171 }, { "epoch": 1.4972858250683645, "grad_norm": 2.921614170074463, "learning_rate": 1.8658543821292668e-05, "loss": 0.8152, "step": 9172 }, { "epoch": 1.497449083710869, "grad_norm": 2.2955687046051025, "learning_rate": 1.8658223042975175e-05, "loss": 0.5836, "step": 9173 }, { "epoch": 1.4976123423533734, "grad_norm": 2.4888386726379395, "learning_rate": 1.8657902229066998e-05, "loss": 0.6347, "step": 9174 }, { "epoch": 1.4977756009958778, "grad_norm": 2.6872060298919678, "learning_rate": 1.8657581379569454e-05, "loss": 0.7582, "step": 9175 }, { "epoch": 1.497938859638382, "grad_norm": 2.7899575233459473, "learning_rate": 1.865726049448386e-05, "loss": 0.7172, "step": 9176 }, { "epoch": 1.4981021182808865, "grad_norm": 2.5989279747009277, "learning_rate": 1.865693957381154e-05, "loss": 0.6538, "step": 9177 }, { "epoch": 1.498265376923391, "grad_norm": 2.622036933898926, "learning_rate": 1.865661861755381e-05, "loss": 0.6626, "step": 9178 }, { "epoch": 1.4984286355658953, "grad_norm": 2.5576353073120117, "learning_rate": 1.865629762571199e-05, "loss": 0.6166, "step": 9179 }, { "epoch": 1.4985918942083996, "grad_norm": 2.637287139892578, "learning_rate": 1.8655976598287394e-05, "loss": 0.6527, "step": 9180 }, { "epoch": 1.498755152850904, "grad_norm": 2.4463627338409424, "learning_rate": 1.865565553528135e-05, "loss": 0.6209, "step": 9181 }, { "epoch": 1.4989184114934084, "grad_norm": 3.0875773429870605, "learning_rate": 1.865533443669518e-05, "loss": 0.8075, "step": 9182 }, { "epoch": 1.4990816701359129, "grad_norm": 2.7322306632995605, "learning_rate": 1.8655013302530193e-05, "loss": 0.6812, "step": 9183 }, { "epoch": 1.4992449287784173, "grad_norm": 2.6726624965667725, "learning_rate": 1.8654692132787713e-05, "loss": 0.7335, "step": 9184 }, { "epoch": 1.4994081874209215, "grad_norm": 2.2334744930267334, "learning_rate": 1.8654370927469063e-05, "loss": 0.6215, "step": 9185 }, { "epoch": 1.499571446063426, "grad_norm": 3.184939384460449, "learning_rate": 1.8654049686575564e-05, "loss": 0.7885, "step": 9186 }, { "epoch": 1.4997347047059304, "grad_norm": 2.5712506771087646, "learning_rate": 1.8653728410108532e-05, "loss": 0.6216, "step": 9187 }, { "epoch": 1.4998979633484348, "grad_norm": 2.888770580291748, "learning_rate": 1.865340709806929e-05, "loss": 0.806, "step": 9188 }, { "epoch": 1.500061221990939, "grad_norm": 2.558239221572876, "learning_rate": 1.865308575045916e-05, "loss": 0.6304, "step": 9189 }, { "epoch": 1.5002244806334435, "grad_norm": 2.4310574531555176, "learning_rate": 1.8652764367279463e-05, "loss": 0.6332, "step": 9190 }, { "epoch": 1.500387739275948, "grad_norm": 2.7358477115631104, "learning_rate": 1.8652442948531516e-05, "loss": 0.724, "step": 9191 }, { "epoch": 1.5005509979184524, "grad_norm": 2.501014471054077, "learning_rate": 1.8652121494216645e-05, "loss": 0.666, "step": 9192 }, { "epoch": 1.5007142565609568, "grad_norm": 2.5387237071990967, "learning_rate": 1.8651800004336168e-05, "loss": 0.7233, "step": 9193 }, { "epoch": 1.5008775152034612, "grad_norm": 3.234555959701538, "learning_rate": 1.865147847889141e-05, "loss": 0.9043, "step": 9194 }, { "epoch": 1.5010407738459655, "grad_norm": 2.9806735515594482, "learning_rate": 1.8651156917883687e-05, "loss": 0.7794, "step": 9195 }, { "epoch": 1.50120403248847, "grad_norm": 2.788295030593872, "learning_rate": 1.865083532131433e-05, "loss": 0.7099, "step": 9196 }, { "epoch": 1.5013672911309741, "grad_norm": 2.9321463108062744, "learning_rate": 1.865051368918465e-05, "loss": 0.808, "step": 9197 }, { "epoch": 1.5015305497734786, "grad_norm": 2.4005560874938965, "learning_rate": 1.865019202149598e-05, "loss": 0.5788, "step": 9198 }, { "epoch": 1.501693808415983, "grad_norm": 3.0338926315307617, "learning_rate": 1.8649870318249635e-05, "loss": 0.8347, "step": 9199 }, { "epoch": 1.5018570670584874, "grad_norm": 2.68597412109375, "learning_rate": 1.8649548579446938e-05, "loss": 0.6604, "step": 9200 }, { "epoch": 1.5020203257009919, "grad_norm": 2.6724817752838135, "learning_rate": 1.8649226805089213e-05, "loss": 0.6657, "step": 9201 }, { "epoch": 1.5021835843434963, "grad_norm": 3.239668607711792, "learning_rate": 1.8648904995177784e-05, "loss": 0.7026, "step": 9202 }, { "epoch": 1.5023468429860005, "grad_norm": 2.344754219055176, "learning_rate": 1.864858314971397e-05, "loss": 0.5954, "step": 9203 }, { "epoch": 1.502510101628505, "grad_norm": 2.728114604949951, "learning_rate": 1.8648261268699097e-05, "loss": 0.5645, "step": 9204 }, { "epoch": 1.5026733602710094, "grad_norm": 3.176290512084961, "learning_rate": 1.8647939352134488e-05, "loss": 0.7737, "step": 9205 }, { "epoch": 1.5028366189135136, "grad_norm": 2.3003039360046387, "learning_rate": 1.8647617400021464e-05, "loss": 0.5836, "step": 9206 }, { "epoch": 1.502999877556018, "grad_norm": 2.7325522899627686, "learning_rate": 1.864729541236135e-05, "loss": 0.6289, "step": 9207 }, { "epoch": 1.5031631361985225, "grad_norm": 2.688162326812744, "learning_rate": 1.8646973389155477e-05, "loss": 0.7942, "step": 9208 }, { "epoch": 1.503326394841027, "grad_norm": 3.144920825958252, "learning_rate": 1.8646651330405155e-05, "loss": 0.716, "step": 9209 }, { "epoch": 1.5034896534835314, "grad_norm": 2.6775155067443848, "learning_rate": 1.8646329236111715e-05, "loss": 0.6829, "step": 9210 }, { "epoch": 1.5036529121260358, "grad_norm": 2.7627549171447754, "learning_rate": 1.8646007106276482e-05, "loss": 0.7855, "step": 9211 }, { "epoch": 1.50381617076854, "grad_norm": 2.6401898860931396, "learning_rate": 1.8645684940900776e-05, "loss": 0.6537, "step": 9212 }, { "epoch": 1.5039794294110445, "grad_norm": 3.0514214038848877, "learning_rate": 1.8645362739985925e-05, "loss": 0.7242, "step": 9213 }, { "epoch": 1.5041426880535487, "grad_norm": 2.976684093475342, "learning_rate": 1.8645040503533248e-05, "loss": 0.6836, "step": 9214 }, { "epoch": 1.504305946696053, "grad_norm": 3.121053695678711, "learning_rate": 1.864471823154408e-05, "loss": 0.7331, "step": 9215 }, { "epoch": 1.5044692053385575, "grad_norm": 3.263472557067871, "learning_rate": 1.8644395924019738e-05, "loss": 0.9382, "step": 9216 }, { "epoch": 1.504632463981062, "grad_norm": 3.3032782077789307, "learning_rate": 1.8644073580961548e-05, "loss": 0.7298, "step": 9217 }, { "epoch": 1.5047957226235664, "grad_norm": 2.983933925628662, "learning_rate": 1.8643751202370833e-05, "loss": 0.8839, "step": 9218 }, { "epoch": 1.5049589812660709, "grad_norm": 2.9528112411499023, "learning_rate": 1.8643428788248923e-05, "loss": 0.7577, "step": 9219 }, { "epoch": 1.5051222399085753, "grad_norm": 3.212893486022949, "learning_rate": 1.8643106338597142e-05, "loss": 0.8879, "step": 9220 }, { "epoch": 1.5052854985510795, "grad_norm": 2.755972146987915, "learning_rate": 1.8642783853416815e-05, "loss": 0.8611, "step": 9221 }, { "epoch": 1.505448757193584, "grad_norm": 2.7370989322662354, "learning_rate": 1.8642461332709266e-05, "loss": 0.6315, "step": 9222 }, { "epoch": 1.5056120158360882, "grad_norm": 2.8441178798675537, "learning_rate": 1.864213877647582e-05, "loss": 0.7204, "step": 9223 }, { "epoch": 1.5057752744785926, "grad_norm": 2.8206357955932617, "learning_rate": 1.8641816184717803e-05, "loss": 0.7632, "step": 9224 }, { "epoch": 1.505938533121097, "grad_norm": 2.8106324672698975, "learning_rate": 1.864149355743655e-05, "loss": 0.7158, "step": 9225 }, { "epoch": 1.5061017917636015, "grad_norm": 2.9259328842163086, "learning_rate": 1.8641170894633375e-05, "loss": 0.7806, "step": 9226 }, { "epoch": 1.506265050406106, "grad_norm": 2.9716804027557373, "learning_rate": 1.864084819630961e-05, "loss": 0.8388, "step": 9227 }, { "epoch": 1.5064283090486104, "grad_norm": 2.6047749519348145, "learning_rate": 1.864052546246658e-05, "loss": 0.657, "step": 9228 }, { "epoch": 1.5065915676911148, "grad_norm": 2.907050371170044, "learning_rate": 1.864020269310561e-05, "loss": 0.828, "step": 9229 }, { "epoch": 1.506754826333619, "grad_norm": 2.9653868675231934, "learning_rate": 1.8639879888228036e-05, "loss": 0.7882, "step": 9230 }, { "epoch": 1.5069180849761235, "grad_norm": 2.9958364963531494, "learning_rate": 1.8639557047835172e-05, "loss": 0.7789, "step": 9231 }, { "epoch": 1.5070813436186277, "grad_norm": 2.6706173419952393, "learning_rate": 1.8639234171928355e-05, "loss": 0.7286, "step": 9232 }, { "epoch": 1.507244602261132, "grad_norm": 2.9349846839904785, "learning_rate": 1.863891126050891e-05, "loss": 0.7059, "step": 9233 }, { "epoch": 1.5074078609036365, "grad_norm": 2.6172852516174316, "learning_rate": 1.8638588313578155e-05, "loss": 0.6853, "step": 9234 }, { "epoch": 1.507571119546141, "grad_norm": 2.745065689086914, "learning_rate": 1.8638265331137432e-05, "loss": 0.7369, "step": 9235 }, { "epoch": 1.5077343781886454, "grad_norm": 2.328627347946167, "learning_rate": 1.863794231318806e-05, "loss": 0.5757, "step": 9236 }, { "epoch": 1.5078976368311499, "grad_norm": 2.919429302215576, "learning_rate": 1.863761925973137e-05, "loss": 0.7531, "step": 9237 }, { "epoch": 1.5080608954736543, "grad_norm": 2.637178897857666, "learning_rate": 1.8637296170768687e-05, "loss": 0.727, "step": 9238 }, { "epoch": 1.5082241541161585, "grad_norm": 2.98940110206604, "learning_rate": 1.8636973046301342e-05, "loss": 0.8173, "step": 9239 }, { "epoch": 1.508387412758663, "grad_norm": 2.6424713134765625, "learning_rate": 1.8636649886330663e-05, "loss": 0.7376, "step": 9240 }, { "epoch": 1.5085506714011672, "grad_norm": 3.3350841999053955, "learning_rate": 1.8636326690857976e-05, "loss": 0.8807, "step": 9241 }, { "epoch": 1.5087139300436716, "grad_norm": 3.030978202819824, "learning_rate": 1.863600345988461e-05, "loss": 0.8369, "step": 9242 }, { "epoch": 1.508877188686176, "grad_norm": 3.4779884815216064, "learning_rate": 1.8635680193411897e-05, "loss": 0.7981, "step": 9243 }, { "epoch": 1.5090404473286805, "grad_norm": 2.4634647369384766, "learning_rate": 1.8635356891441158e-05, "loss": 0.6646, "step": 9244 }, { "epoch": 1.509203705971185, "grad_norm": 2.47048282623291, "learning_rate": 1.8635033553973734e-05, "loss": 0.6098, "step": 9245 }, { "epoch": 1.5093669646136894, "grad_norm": 2.5040931701660156, "learning_rate": 1.8634710181010948e-05, "loss": 0.701, "step": 9246 }, { "epoch": 1.5095302232561936, "grad_norm": 2.3029327392578125, "learning_rate": 1.8634386772554124e-05, "loss": 0.5705, "step": 9247 }, { "epoch": 1.509693481898698, "grad_norm": 2.4118292331695557, "learning_rate": 1.8634063328604596e-05, "loss": 0.7343, "step": 9248 }, { "epoch": 1.5098567405412024, "grad_norm": 2.809892177581787, "learning_rate": 1.8633739849163697e-05, "loss": 0.7273, "step": 9249 }, { "epoch": 1.5100199991837067, "grad_norm": 2.63207745552063, "learning_rate": 1.8633416334232754e-05, "loss": 0.6416, "step": 9250 }, { "epoch": 1.510183257826211, "grad_norm": 3.165287733078003, "learning_rate": 1.8633092783813092e-05, "loss": 0.7915, "step": 9251 }, { "epoch": 1.5103465164687155, "grad_norm": 3.0396602153778076, "learning_rate": 1.863276919790605e-05, "loss": 0.7346, "step": 9252 }, { "epoch": 1.51050977511122, "grad_norm": 2.302644729614258, "learning_rate": 1.8632445576512953e-05, "loss": 0.5109, "step": 9253 }, { "epoch": 1.5106730337537244, "grad_norm": 2.672527313232422, "learning_rate": 1.863212191963513e-05, "loss": 0.7002, "step": 9254 }, { "epoch": 1.5108362923962289, "grad_norm": 2.596280097961426, "learning_rate": 1.8631798227273917e-05, "loss": 0.6563, "step": 9255 }, { "epoch": 1.510999551038733, "grad_norm": 3.057222366333008, "learning_rate": 1.8631474499430634e-05, "loss": 0.6565, "step": 9256 }, { "epoch": 1.5111628096812375, "grad_norm": 2.67219614982605, "learning_rate": 1.8631150736106626e-05, "loss": 0.7534, "step": 9257 }, { "epoch": 1.5113260683237417, "grad_norm": 3.3333194255828857, "learning_rate": 1.8630826937303213e-05, "loss": 0.7933, "step": 9258 }, { "epoch": 1.5114893269662462, "grad_norm": 2.212672233581543, "learning_rate": 1.863050310302173e-05, "loss": 0.6413, "step": 9259 }, { "epoch": 1.5116525856087506, "grad_norm": 2.4337167739868164, "learning_rate": 1.8630179233263505e-05, "loss": 0.6322, "step": 9260 }, { "epoch": 1.511815844251255, "grad_norm": 2.839536190032959, "learning_rate": 1.8629855328029873e-05, "loss": 0.7805, "step": 9261 }, { "epoch": 1.5119791028937595, "grad_norm": 3.2087297439575195, "learning_rate": 1.8629531387322166e-05, "loss": 0.7558, "step": 9262 }, { "epoch": 1.512142361536264, "grad_norm": 2.3540000915527344, "learning_rate": 1.8629207411141712e-05, "loss": 0.6226, "step": 9263 }, { "epoch": 1.5123056201787684, "grad_norm": 3.2428879737854004, "learning_rate": 1.8628883399489846e-05, "loss": 0.8137, "step": 9264 }, { "epoch": 1.5124688788212726, "grad_norm": 2.772559404373169, "learning_rate": 1.8628559352367896e-05, "loss": 0.5918, "step": 9265 }, { "epoch": 1.512632137463777, "grad_norm": 2.4803779125213623, "learning_rate": 1.86282352697772e-05, "loss": 0.6423, "step": 9266 }, { "epoch": 1.5127953961062812, "grad_norm": 2.5939927101135254, "learning_rate": 1.8627911151719086e-05, "loss": 0.7393, "step": 9267 }, { "epoch": 1.5129586547487857, "grad_norm": 3.1048996448516846, "learning_rate": 1.8627586998194887e-05, "loss": 0.7338, "step": 9268 }, { "epoch": 1.51312191339129, "grad_norm": 2.909416913986206, "learning_rate": 1.8627262809205936e-05, "loss": 0.696, "step": 9269 }, { "epoch": 1.5132851720337945, "grad_norm": 2.507389545440674, "learning_rate": 1.8626938584753563e-05, "loss": 0.6841, "step": 9270 }, { "epoch": 1.513448430676299, "grad_norm": 2.710510492324829, "learning_rate": 1.8626614324839104e-05, "loss": 0.6775, "step": 9271 }, { "epoch": 1.5136116893188034, "grad_norm": 2.750370502471924, "learning_rate": 1.8626290029463893e-05, "loss": 0.656, "step": 9272 }, { "epoch": 1.5137749479613078, "grad_norm": 2.7553772926330566, "learning_rate": 1.8625965698629256e-05, "loss": 0.7184, "step": 9273 }, { "epoch": 1.513938206603812, "grad_norm": 3.086207389831543, "learning_rate": 1.8625641332336533e-05, "loss": 0.8413, "step": 9274 }, { "epoch": 1.5141014652463165, "grad_norm": 2.341918706893921, "learning_rate": 1.862531693058706e-05, "loss": 0.6155, "step": 9275 }, { "epoch": 1.5142647238888207, "grad_norm": 3.149909019470215, "learning_rate": 1.862499249338216e-05, "loss": 0.8326, "step": 9276 }, { "epoch": 1.5144279825313252, "grad_norm": 3.353158473968506, "learning_rate": 1.8624668020723177e-05, "loss": 0.9333, "step": 9277 }, { "epoch": 1.5145912411738296, "grad_norm": 2.805481433868408, "learning_rate": 1.862434351261144e-05, "loss": 0.8123, "step": 9278 }, { "epoch": 1.514754499816334, "grad_norm": 2.5712010860443115, "learning_rate": 1.862401896904828e-05, "loss": 0.5894, "step": 9279 }, { "epoch": 1.5149177584588385, "grad_norm": 3.2022275924682617, "learning_rate": 1.8623694390035036e-05, "loss": 0.7361, "step": 9280 }, { "epoch": 1.515081017101343, "grad_norm": 2.7615559101104736, "learning_rate": 1.862336977557304e-05, "loss": 0.6973, "step": 9281 }, { "epoch": 1.5152442757438473, "grad_norm": 3.3119146823883057, "learning_rate": 1.8623045125663627e-05, "loss": 0.8889, "step": 9282 }, { "epoch": 1.5154075343863516, "grad_norm": 3.6826324462890625, "learning_rate": 1.8622720440308135e-05, "loss": 0.7394, "step": 9283 }, { "epoch": 1.515570793028856, "grad_norm": 3.0519652366638184, "learning_rate": 1.8622395719507895e-05, "loss": 0.8116, "step": 9284 }, { "epoch": 1.5157340516713602, "grad_norm": 3.0872793197631836, "learning_rate": 1.8622070963264235e-05, "loss": 0.7138, "step": 9285 }, { "epoch": 1.5158973103138647, "grad_norm": 2.7727954387664795, "learning_rate": 1.8621746171578505e-05, "loss": 0.78, "step": 9286 }, { "epoch": 1.516060568956369, "grad_norm": 2.8541207313537598, "learning_rate": 1.8621421344452028e-05, "loss": 0.7828, "step": 9287 }, { "epoch": 1.5162238275988735, "grad_norm": 2.3723018169403076, "learning_rate": 1.8621096481886144e-05, "loss": 0.6351, "step": 9288 }, { "epoch": 1.516387086241378, "grad_norm": 2.996140480041504, "learning_rate": 1.8620771583882186e-05, "loss": 0.7598, "step": 9289 }, { "epoch": 1.5165503448838824, "grad_norm": 2.56722354888916, "learning_rate": 1.8620446650441494e-05, "loss": 0.6607, "step": 9290 }, { "epoch": 1.5167136035263868, "grad_norm": 2.9995291233062744, "learning_rate": 1.86201216815654e-05, "loss": 0.6368, "step": 9291 }, { "epoch": 1.516876862168891, "grad_norm": 3.2778525352478027, "learning_rate": 1.861979667725524e-05, "loss": 0.7422, "step": 9292 }, { "epoch": 1.5170401208113955, "grad_norm": 3.4398980140686035, "learning_rate": 1.861947163751235e-05, "loss": 0.8336, "step": 9293 }, { "epoch": 1.5172033794538997, "grad_norm": 2.596383571624756, "learning_rate": 1.8619146562338067e-05, "loss": 0.6698, "step": 9294 }, { "epoch": 1.5173666380964042, "grad_norm": 2.7601397037506104, "learning_rate": 1.8618821451733726e-05, "loss": 0.6632, "step": 9295 }, { "epoch": 1.5175298967389086, "grad_norm": 3.361623764038086, "learning_rate": 1.8618496305700666e-05, "loss": 0.6591, "step": 9296 }, { "epoch": 1.517693155381413, "grad_norm": 2.7674005031585693, "learning_rate": 1.861817112424022e-05, "loss": 0.7081, "step": 9297 }, { "epoch": 1.5178564140239175, "grad_norm": 2.4839909076690674, "learning_rate": 1.8617845907353727e-05, "loss": 0.6624, "step": 9298 }, { "epoch": 1.518019672666422, "grad_norm": 2.7572832107543945, "learning_rate": 1.861752065504253e-05, "loss": 0.7463, "step": 9299 }, { "epoch": 1.5181829313089261, "grad_norm": 2.8000643253326416, "learning_rate": 1.861719536730795e-05, "loss": 0.7176, "step": 9300 }, { "epoch": 1.5183461899514306, "grad_norm": 3.003608226776123, "learning_rate": 1.861687004415134e-05, "loss": 0.7247, "step": 9301 }, { "epoch": 1.518509448593935, "grad_norm": 2.5853793621063232, "learning_rate": 1.8616544685574028e-05, "loss": 0.5958, "step": 9302 }, { "epoch": 1.5186727072364392, "grad_norm": 2.4370858669281006, "learning_rate": 1.861621929157736e-05, "loss": 0.5564, "step": 9303 }, { "epoch": 1.5188359658789437, "grad_norm": 2.7085864543914795, "learning_rate": 1.8615893862162663e-05, "loss": 0.7053, "step": 9304 }, { "epoch": 1.518999224521448, "grad_norm": 2.6337599754333496, "learning_rate": 1.8615568397331278e-05, "loss": 0.6273, "step": 9305 }, { "epoch": 1.5191624831639525, "grad_norm": 2.560192108154297, "learning_rate": 1.8615242897084543e-05, "loss": 0.6027, "step": 9306 }, { "epoch": 1.519325741806457, "grad_norm": 2.251573324203491, "learning_rate": 1.8614917361423802e-05, "loss": 0.5019, "step": 9307 }, { "epoch": 1.5194890004489614, "grad_norm": 2.916224956512451, "learning_rate": 1.861459179035039e-05, "loss": 0.733, "step": 9308 }, { "epoch": 1.5196522590914656, "grad_norm": 2.5395689010620117, "learning_rate": 1.861426618386564e-05, "loss": 0.565, "step": 9309 }, { "epoch": 1.51981551773397, "grad_norm": 3.0791540145874023, "learning_rate": 1.8613940541970898e-05, "loss": 0.7933, "step": 9310 }, { "epoch": 1.5199787763764743, "grad_norm": 2.6735265254974365, "learning_rate": 1.8613614864667498e-05, "loss": 0.7541, "step": 9311 }, { "epoch": 1.5201420350189787, "grad_norm": 2.7436363697052, "learning_rate": 1.8613289151956776e-05, "loss": 0.6987, "step": 9312 }, { "epoch": 1.5203052936614831, "grad_norm": 2.4496705532073975, "learning_rate": 1.8612963403840076e-05, "loss": 0.6846, "step": 9313 }, { "epoch": 1.5204685523039876, "grad_norm": 2.731778383255005, "learning_rate": 1.8612637620318738e-05, "loss": 0.7318, "step": 9314 }, { "epoch": 1.520631810946492, "grad_norm": 2.5780770778656006, "learning_rate": 1.86123118013941e-05, "loss": 0.7536, "step": 9315 }, { "epoch": 1.5207950695889965, "grad_norm": 3.310823440551758, "learning_rate": 1.8611985947067497e-05, "loss": 0.9346, "step": 9316 }, { "epoch": 1.520958328231501, "grad_norm": 2.736619234085083, "learning_rate": 1.8611660057340272e-05, "loss": 0.6867, "step": 9317 }, { "epoch": 1.5211215868740051, "grad_norm": 2.2850890159606934, "learning_rate": 1.8611334132213765e-05, "loss": 0.5422, "step": 9318 }, { "epoch": 1.5212848455165096, "grad_norm": 3.411101818084717, "learning_rate": 1.8611008171689317e-05, "loss": 0.8051, "step": 9319 }, { "epoch": 1.5214481041590138, "grad_norm": 2.8878629207611084, "learning_rate": 1.861068217576826e-05, "loss": 0.705, "step": 9320 }, { "epoch": 1.5216113628015182, "grad_norm": 2.9574103355407715, "learning_rate": 1.8610356144451945e-05, "loss": 0.6542, "step": 9321 }, { "epoch": 1.5217746214440226, "grad_norm": 3.376436233520508, "learning_rate": 1.8610030077741706e-05, "loss": 0.7307, "step": 9322 }, { "epoch": 1.521937880086527, "grad_norm": 3.002340078353882, "learning_rate": 1.8609703975638885e-05, "loss": 0.7629, "step": 9323 }, { "epoch": 1.5221011387290315, "grad_norm": 2.802225112915039, "learning_rate": 1.860937783814482e-05, "loss": 0.7872, "step": 9324 }, { "epoch": 1.522264397371536, "grad_norm": 3.4844653606414795, "learning_rate": 1.8609051665260853e-05, "loss": 0.7573, "step": 9325 }, { "epoch": 1.5224276560140404, "grad_norm": 2.4472758769989014, "learning_rate": 1.860872545698833e-05, "loss": 0.5564, "step": 9326 }, { "epoch": 1.5225909146565446, "grad_norm": 2.715528964996338, "learning_rate": 1.8608399213328582e-05, "loss": 0.7241, "step": 9327 }, { "epoch": 1.522754173299049, "grad_norm": 2.806821584701538, "learning_rate": 1.860807293428296e-05, "loss": 0.7685, "step": 9328 }, { "epoch": 1.5229174319415533, "grad_norm": 3.6247987747192383, "learning_rate": 1.86077466198528e-05, "loss": 0.7465, "step": 9329 }, { "epoch": 1.5230806905840577, "grad_norm": 2.8686351776123047, "learning_rate": 1.860742027003944e-05, "loss": 0.7357, "step": 9330 }, { "epoch": 1.5232439492265621, "grad_norm": 2.6739492416381836, "learning_rate": 1.8607093884844225e-05, "loss": 0.6183, "step": 9331 }, { "epoch": 1.5234072078690666, "grad_norm": 2.75515079498291, "learning_rate": 1.8606767464268502e-05, "loss": 0.7007, "step": 9332 }, { "epoch": 1.523570466511571, "grad_norm": 2.256499767303467, "learning_rate": 1.8606441008313602e-05, "loss": 0.5273, "step": 9333 }, { "epoch": 1.5237337251540755, "grad_norm": 2.5225796699523926, "learning_rate": 1.8606114516980877e-05, "loss": 0.6423, "step": 9334 }, { "epoch": 1.52389698379658, "grad_norm": 2.6453664302825928, "learning_rate": 1.8605787990271662e-05, "loss": 0.7613, "step": 9335 }, { "epoch": 1.5240602424390841, "grad_norm": 2.60720157623291, "learning_rate": 1.8605461428187305e-05, "loss": 0.73, "step": 9336 }, { "epoch": 1.5242235010815885, "grad_norm": 2.774188995361328, "learning_rate": 1.8605134830729144e-05, "loss": 0.7055, "step": 9337 }, { "epoch": 1.5243867597240928, "grad_norm": 3.593440532684326, "learning_rate": 1.860480819789852e-05, "loss": 1.0222, "step": 9338 }, { "epoch": 1.5245500183665972, "grad_norm": 3.095750570297241, "learning_rate": 1.8604481529696786e-05, "loss": 0.85, "step": 9339 }, { "epoch": 1.5247132770091016, "grad_norm": 2.350046157836914, "learning_rate": 1.8604154826125272e-05, "loss": 0.565, "step": 9340 }, { "epoch": 1.524876535651606, "grad_norm": 2.833019733428955, "learning_rate": 1.8603828087185328e-05, "loss": 0.7462, "step": 9341 }, { "epoch": 1.5250397942941105, "grad_norm": 2.4955906867980957, "learning_rate": 1.8603501312878296e-05, "loss": 0.6867, "step": 9342 }, { "epoch": 1.525203052936615, "grad_norm": 2.854644298553467, "learning_rate": 1.8603174503205517e-05, "loss": 0.746, "step": 9343 }, { "epoch": 1.5253663115791192, "grad_norm": 2.4947664737701416, "learning_rate": 1.8602847658168337e-05, "loss": 0.673, "step": 9344 }, { "epoch": 1.5255295702216236, "grad_norm": 2.367520332336426, "learning_rate": 1.8602520777768097e-05, "loss": 0.5878, "step": 9345 }, { "epoch": 1.525692828864128, "grad_norm": 2.669440984725952, "learning_rate": 1.8602193862006146e-05, "loss": 0.5879, "step": 9346 }, { "epoch": 1.5258560875066323, "grad_norm": 2.9617600440979004, "learning_rate": 1.8601866910883818e-05, "loss": 0.7133, "step": 9347 }, { "epoch": 1.5260193461491367, "grad_norm": 2.503727912902832, "learning_rate": 1.860153992440247e-05, "loss": 0.5914, "step": 9348 }, { "epoch": 1.5261826047916411, "grad_norm": 2.707307815551758, "learning_rate": 1.8601212902563438e-05, "loss": 0.7181, "step": 9349 }, { "epoch": 1.5263458634341456, "grad_norm": 3.6990437507629395, "learning_rate": 1.8600885845368066e-05, "loss": 0.6426, "step": 9350 }, { "epoch": 1.52650912207665, "grad_norm": 2.539522409439087, "learning_rate": 1.8600558752817697e-05, "loss": 0.5829, "step": 9351 }, { "epoch": 1.5266723807191545, "grad_norm": 2.757676839828491, "learning_rate": 1.860023162491368e-05, "loss": 0.7095, "step": 9352 }, { "epoch": 1.5268356393616587, "grad_norm": 3.579657793045044, "learning_rate": 1.8599904461657363e-05, "loss": 0.8454, "step": 9353 }, { "epoch": 1.526998898004163, "grad_norm": 2.9971251487731934, "learning_rate": 1.859957726305008e-05, "loss": 0.6763, "step": 9354 }, { "epoch": 1.5271621566466673, "grad_norm": 2.887512445449829, "learning_rate": 1.8599250029093186e-05, "loss": 0.8169, "step": 9355 }, { "epoch": 1.5273254152891718, "grad_norm": 2.837921142578125, "learning_rate": 1.859892275978802e-05, "loss": 0.6452, "step": 9356 }, { "epoch": 1.5274886739316762, "grad_norm": 3.09299898147583, "learning_rate": 1.8598595455135934e-05, "loss": 0.7309, "step": 9357 }, { "epoch": 1.5276519325741806, "grad_norm": 2.79347562789917, "learning_rate": 1.8598268115138263e-05, "loss": 0.7553, "step": 9358 }, { "epoch": 1.527815191216685, "grad_norm": 2.708665132522583, "learning_rate": 1.859794073979636e-05, "loss": 0.6843, "step": 9359 }, { "epoch": 1.5279784498591895, "grad_norm": 3.2760508060455322, "learning_rate": 1.8597613329111567e-05, "loss": 0.802, "step": 9360 }, { "epoch": 1.528141708501694, "grad_norm": 2.8811442852020264, "learning_rate": 1.8597285883085236e-05, "loss": 0.8021, "step": 9361 }, { "epoch": 1.5283049671441982, "grad_norm": 2.700460433959961, "learning_rate": 1.8596958401718708e-05, "loss": 0.6377, "step": 9362 }, { "epoch": 1.5284682257867026, "grad_norm": 2.7702078819274902, "learning_rate": 1.8596630885013328e-05, "loss": 0.8248, "step": 9363 }, { "epoch": 1.5286314844292068, "grad_norm": 2.5732438564300537, "learning_rate": 1.859630333297045e-05, "loss": 0.7102, "step": 9364 }, { "epoch": 1.5287947430717113, "grad_norm": 2.763381242752075, "learning_rate": 1.8595975745591412e-05, "loss": 0.7507, "step": 9365 }, { "epoch": 1.5289580017142157, "grad_norm": 2.6541616916656494, "learning_rate": 1.8595648122877562e-05, "loss": 0.8165, "step": 9366 }, { "epoch": 1.5291212603567201, "grad_norm": 2.7532780170440674, "learning_rate": 1.8595320464830247e-05, "loss": 0.799, "step": 9367 }, { "epoch": 1.5292845189992246, "grad_norm": 2.5661396980285645, "learning_rate": 1.8594992771450817e-05, "loss": 0.6259, "step": 9368 }, { "epoch": 1.529447777641729, "grad_norm": 2.7812652587890625, "learning_rate": 1.8594665042740616e-05, "loss": 0.7781, "step": 9369 }, { "epoch": 1.5296110362842334, "grad_norm": 2.7640669345855713, "learning_rate": 1.8594337278700996e-05, "loss": 0.7, "step": 9370 }, { "epoch": 1.5297742949267377, "grad_norm": 2.1216773986816406, "learning_rate": 1.85940094793333e-05, "loss": 0.5597, "step": 9371 }, { "epoch": 1.529937553569242, "grad_norm": 2.890735626220703, "learning_rate": 1.859368164463887e-05, "loss": 0.7402, "step": 9372 }, { "epoch": 1.5301008122117463, "grad_norm": 2.6338484287261963, "learning_rate": 1.8593353774619066e-05, "loss": 0.7149, "step": 9373 }, { "epoch": 1.5302640708542508, "grad_norm": 2.7050771713256836, "learning_rate": 1.8593025869275228e-05, "loss": 0.6893, "step": 9374 }, { "epoch": 1.5304273294967552, "grad_norm": 2.29565167427063, "learning_rate": 1.8592697928608702e-05, "loss": 0.6239, "step": 9375 }, { "epoch": 1.5305905881392596, "grad_norm": 2.9577014446258545, "learning_rate": 1.8592369952620842e-05, "loss": 0.71, "step": 9376 }, { "epoch": 1.530753846781764, "grad_norm": 2.746600866317749, "learning_rate": 1.8592041941312996e-05, "loss": 0.7858, "step": 9377 }, { "epoch": 1.5309171054242685, "grad_norm": 2.583402395248413, "learning_rate": 1.859171389468651e-05, "loss": 0.7124, "step": 9378 }, { "epoch": 1.531080364066773, "grad_norm": 2.5811519622802734, "learning_rate": 1.8591385812742724e-05, "loss": 0.7909, "step": 9379 }, { "epoch": 1.5312436227092772, "grad_norm": 2.533362865447998, "learning_rate": 1.8591057695483003e-05, "loss": 0.7104, "step": 9380 }, { "epoch": 1.5314068813517816, "grad_norm": 3.1263513565063477, "learning_rate": 1.8590729542908685e-05, "loss": 0.6351, "step": 9381 }, { "epoch": 1.5315701399942858, "grad_norm": 2.756408929824829, "learning_rate": 1.859040135502112e-05, "loss": 0.6822, "step": 9382 }, { "epoch": 1.5317333986367903, "grad_norm": 3.137238025665283, "learning_rate": 1.8590073131821664e-05, "loss": 0.803, "step": 9383 }, { "epoch": 1.5318966572792947, "grad_norm": 2.3393466472625732, "learning_rate": 1.8589744873311656e-05, "loss": 0.5645, "step": 9384 }, { "epoch": 1.5320599159217991, "grad_norm": 2.7032461166381836, "learning_rate": 1.8589416579492453e-05, "loss": 0.7462, "step": 9385 }, { "epoch": 1.5322231745643036, "grad_norm": 2.5201151371002197, "learning_rate": 1.85890882503654e-05, "loss": 0.6856, "step": 9386 }, { "epoch": 1.532386433206808, "grad_norm": 2.4771509170532227, "learning_rate": 1.858875988593185e-05, "loss": 0.5886, "step": 9387 }, { "epoch": 1.5325496918493122, "grad_norm": 2.329859733581543, "learning_rate": 1.858843148619315e-05, "loss": 0.6236, "step": 9388 }, { "epoch": 1.5327129504918167, "grad_norm": 2.613740921020508, "learning_rate": 1.858810305115065e-05, "loss": 0.6289, "step": 9389 }, { "epoch": 1.532876209134321, "grad_norm": 2.836327314376831, "learning_rate": 1.8587774580805705e-05, "loss": 0.6822, "step": 9390 }, { "epoch": 1.5330394677768253, "grad_norm": 2.5725135803222656, "learning_rate": 1.8587446075159656e-05, "loss": 0.6371, "step": 9391 }, { "epoch": 1.5332027264193298, "grad_norm": 2.848466634750366, "learning_rate": 1.8587117534213865e-05, "loss": 0.7901, "step": 9392 }, { "epoch": 1.5333659850618342, "grad_norm": 2.1507222652435303, "learning_rate": 1.8586788957969672e-05, "loss": 0.6491, "step": 9393 }, { "epoch": 1.5335292437043386, "grad_norm": 2.8195154666900635, "learning_rate": 1.858646034642843e-05, "loss": 0.6907, "step": 9394 }, { "epoch": 1.533692502346843, "grad_norm": 2.9088101387023926, "learning_rate": 1.8586131699591497e-05, "loss": 0.7676, "step": 9395 }, { "epoch": 1.5338557609893475, "grad_norm": 2.72780179977417, "learning_rate": 1.8585803017460214e-05, "loss": 0.6642, "step": 9396 }, { "epoch": 1.5340190196318517, "grad_norm": 3.049321413040161, "learning_rate": 1.858547430003594e-05, "loss": 0.7773, "step": 9397 }, { "epoch": 1.5341822782743562, "grad_norm": 2.5537970066070557, "learning_rate": 1.858514554732002e-05, "loss": 0.6657, "step": 9398 }, { "epoch": 1.5343455369168604, "grad_norm": 2.6661758422851562, "learning_rate": 1.8584816759313815e-05, "loss": 0.733, "step": 9399 }, { "epoch": 1.5345087955593648, "grad_norm": 2.636075496673584, "learning_rate": 1.8584487936018663e-05, "loss": 0.6222, "step": 9400 }, { "epoch": 1.5346720542018693, "grad_norm": 3.064256191253662, "learning_rate": 1.8584159077435923e-05, "loss": 0.7525, "step": 9401 }, { "epoch": 1.5348353128443737, "grad_norm": 3.280506134033203, "learning_rate": 1.858383018356695e-05, "loss": 0.7332, "step": 9402 }, { "epoch": 1.5349985714868781, "grad_norm": 3.066049814224243, "learning_rate": 1.858350125441309e-05, "loss": 0.6739, "step": 9403 }, { "epoch": 1.5351618301293826, "grad_norm": 2.873490571975708, "learning_rate": 1.8583172289975697e-05, "loss": 0.6353, "step": 9404 }, { "epoch": 1.535325088771887, "grad_norm": 2.941038131713867, "learning_rate": 1.8582843290256126e-05, "loss": 0.779, "step": 9405 }, { "epoch": 1.5354883474143912, "grad_norm": 3.420165538787842, "learning_rate": 1.8582514255255725e-05, "loss": 0.7111, "step": 9406 }, { "epoch": 1.5356516060568957, "grad_norm": 2.8803701400756836, "learning_rate": 1.858218518497585e-05, "loss": 0.6286, "step": 9407 }, { "epoch": 1.5358148646993999, "grad_norm": 2.3307557106018066, "learning_rate": 1.8581856079417852e-05, "loss": 0.5772, "step": 9408 }, { "epoch": 1.5359781233419043, "grad_norm": 3.2477056980133057, "learning_rate": 1.8581526938583083e-05, "loss": 0.7304, "step": 9409 }, { "epoch": 1.5361413819844087, "grad_norm": 2.6648781299591064, "learning_rate": 1.8581197762472895e-05, "loss": 0.6526, "step": 9410 }, { "epoch": 1.5363046406269132, "grad_norm": 2.878110885620117, "learning_rate": 1.8580868551088647e-05, "loss": 0.7915, "step": 9411 }, { "epoch": 1.5364678992694176, "grad_norm": 2.568455457687378, "learning_rate": 1.8580539304431687e-05, "loss": 0.707, "step": 9412 }, { "epoch": 1.536631157911922, "grad_norm": 2.5451509952545166, "learning_rate": 1.8580210022503368e-05, "loss": 0.7293, "step": 9413 }, { "epoch": 1.5367944165544265, "grad_norm": 2.4095191955566406, "learning_rate": 1.857988070530505e-05, "loss": 0.4787, "step": 9414 }, { "epoch": 1.5369576751969307, "grad_norm": 2.056401252746582, "learning_rate": 1.857955135283808e-05, "loss": 0.6537, "step": 9415 }, { "epoch": 1.5371209338394352, "grad_norm": 3.220693588256836, "learning_rate": 1.857922196510381e-05, "loss": 0.799, "step": 9416 }, { "epoch": 1.5372841924819394, "grad_norm": 2.5382277965545654, "learning_rate": 1.8578892542103604e-05, "loss": 0.5898, "step": 9417 }, { "epoch": 1.5374474511244438, "grad_norm": 2.6737558841705322, "learning_rate": 1.8578563083838807e-05, "loss": 0.767, "step": 9418 }, { "epoch": 1.5376107097669482, "grad_norm": 2.9580488204956055, "learning_rate": 1.8578233590310778e-05, "loss": 0.8831, "step": 9419 }, { "epoch": 1.5377739684094527, "grad_norm": 2.692779302597046, "learning_rate": 1.857790406152087e-05, "loss": 0.6825, "step": 9420 }, { "epoch": 1.5379372270519571, "grad_norm": 2.753382682800293, "learning_rate": 1.8577574497470438e-05, "loss": 0.6424, "step": 9421 }, { "epoch": 1.5381004856944616, "grad_norm": 3.1019387245178223, "learning_rate": 1.857724489816083e-05, "loss": 0.7713, "step": 9422 }, { "epoch": 1.538263744336966, "grad_norm": 3.4572770595550537, "learning_rate": 1.8576915263593413e-05, "loss": 0.8696, "step": 9423 }, { "epoch": 1.5384270029794702, "grad_norm": 2.6719884872436523, "learning_rate": 1.8576585593769537e-05, "loss": 0.6735, "step": 9424 }, { "epoch": 1.5385902616219747, "grad_norm": 2.5219976902008057, "learning_rate": 1.857625588869055e-05, "loss": 0.6176, "step": 9425 }, { "epoch": 1.5387535202644789, "grad_norm": 2.735931396484375, "learning_rate": 1.8575926148357818e-05, "loss": 0.6529, "step": 9426 }, { "epoch": 1.5389167789069833, "grad_norm": 2.597564697265625, "learning_rate": 1.857559637277269e-05, "loss": 0.6279, "step": 9427 }, { "epoch": 1.5390800375494877, "grad_norm": 3.0183510780334473, "learning_rate": 1.8575266561936526e-05, "loss": 0.6895, "step": 9428 }, { "epoch": 1.5392432961919922, "grad_norm": 2.2917492389678955, "learning_rate": 1.8574936715850677e-05, "loss": 0.5437, "step": 9429 }, { "epoch": 1.5394065548344966, "grad_norm": 2.8712337017059326, "learning_rate": 1.8574606834516497e-05, "loss": 0.7223, "step": 9430 }, { "epoch": 1.539569813477001, "grad_norm": 2.9533638954162598, "learning_rate": 1.857427691793535e-05, "loss": 0.7489, "step": 9431 }, { "epoch": 1.5397330721195053, "grad_norm": 2.658045768737793, "learning_rate": 1.8573946966108593e-05, "loss": 0.6787, "step": 9432 }, { "epoch": 1.5398963307620097, "grad_norm": 3.1381523609161377, "learning_rate": 1.8573616979037572e-05, "loss": 0.6274, "step": 9433 }, { "epoch": 1.5400595894045142, "grad_norm": 2.7031683921813965, "learning_rate": 1.857328695672365e-05, "loss": 0.6608, "step": 9434 }, { "epoch": 1.5402228480470184, "grad_norm": 2.4839391708374023, "learning_rate": 1.857295689916818e-05, "loss": 0.6068, "step": 9435 }, { "epoch": 1.5403861066895228, "grad_norm": 2.302731513977051, "learning_rate": 1.8572626806372523e-05, "loss": 0.6106, "step": 9436 }, { "epoch": 1.5405493653320272, "grad_norm": 2.4043891429901123, "learning_rate": 1.8572296678338036e-05, "loss": 0.6701, "step": 9437 }, { "epoch": 1.5407126239745317, "grad_norm": 2.866072654724121, "learning_rate": 1.857196651506607e-05, "loss": 0.7858, "step": 9438 }, { "epoch": 1.5408758826170361, "grad_norm": 3.313674211502075, "learning_rate": 1.8571636316557993e-05, "loss": 0.7545, "step": 9439 }, { "epoch": 1.5410391412595406, "grad_norm": 2.636220932006836, "learning_rate": 1.857130608281515e-05, "loss": 0.7324, "step": 9440 }, { "epoch": 1.5412023999020448, "grad_norm": 3.008319854736328, "learning_rate": 1.8570975813838906e-05, "loss": 0.7299, "step": 9441 }, { "epoch": 1.5413656585445492, "grad_norm": 2.8363423347473145, "learning_rate": 1.8570645509630618e-05, "loss": 0.7191, "step": 9442 }, { "epoch": 1.5415289171870534, "grad_norm": 3.0460126399993896, "learning_rate": 1.8570315170191642e-05, "loss": 0.708, "step": 9443 }, { "epoch": 1.5416921758295579, "grad_norm": 2.739581346511841, "learning_rate": 1.8569984795523335e-05, "loss": 0.6514, "step": 9444 }, { "epoch": 1.5418554344720623, "grad_norm": 2.5884530544281006, "learning_rate": 1.8569654385627057e-05, "loss": 0.6397, "step": 9445 }, { "epoch": 1.5420186931145667, "grad_norm": 2.5704336166381836, "learning_rate": 1.8569323940504166e-05, "loss": 0.7064, "step": 9446 }, { "epoch": 1.5421819517570712, "grad_norm": 2.9297547340393066, "learning_rate": 1.856899346015602e-05, "loss": 0.6744, "step": 9447 }, { "epoch": 1.5423452103995756, "grad_norm": 2.620554208755493, "learning_rate": 1.8568662944583977e-05, "loss": 0.6323, "step": 9448 }, { "epoch": 1.54250846904208, "grad_norm": 3.0192039012908936, "learning_rate": 1.8568332393789396e-05, "loss": 0.8244, "step": 9449 }, { "epoch": 1.5426717276845843, "grad_norm": 3.478667974472046, "learning_rate": 1.8568001807773636e-05, "loss": 0.8821, "step": 9450 }, { "epoch": 1.5428349863270887, "grad_norm": 2.5592575073242188, "learning_rate": 1.8567671186538057e-05, "loss": 0.6012, "step": 9451 }, { "epoch": 1.542998244969593, "grad_norm": 2.174921751022339, "learning_rate": 1.8567340530084017e-05, "loss": 0.5638, "step": 9452 }, { "epoch": 1.5431615036120974, "grad_norm": 3.1245908737182617, "learning_rate": 1.8567009838412874e-05, "loss": 0.7022, "step": 9453 }, { "epoch": 1.5433247622546018, "grad_norm": 3.0620715618133545, "learning_rate": 1.8566679111525988e-05, "loss": 0.843, "step": 9454 }, { "epoch": 1.5434880208971062, "grad_norm": 2.6108314990997314, "learning_rate": 1.856634834942472e-05, "loss": 0.6292, "step": 9455 }, { "epoch": 1.5436512795396107, "grad_norm": 2.5004491806030273, "learning_rate": 1.856601755211043e-05, "loss": 0.6458, "step": 9456 }, { "epoch": 1.5438145381821151, "grad_norm": 2.2600841522216797, "learning_rate": 1.856568671958447e-05, "loss": 0.4973, "step": 9457 }, { "epoch": 1.5439777968246196, "grad_norm": 2.8047709465026855, "learning_rate": 1.856535585184821e-05, "loss": 0.7543, "step": 9458 }, { "epoch": 1.5441410554671238, "grad_norm": 2.997295379638672, "learning_rate": 1.856502494890301e-05, "loss": 0.8095, "step": 9459 }, { "epoch": 1.5443043141096282, "grad_norm": 2.7562713623046875, "learning_rate": 1.8564694010750223e-05, "loss": 0.7504, "step": 9460 }, { "epoch": 1.5444675727521324, "grad_norm": 2.5873520374298096, "learning_rate": 1.8564363037391212e-05, "loss": 0.6836, "step": 9461 }, { "epoch": 1.5446308313946369, "grad_norm": 2.686915636062622, "learning_rate": 1.856403202882734e-05, "loss": 0.7679, "step": 9462 }, { "epoch": 1.5447940900371413, "grad_norm": 2.8571650981903076, "learning_rate": 1.8563700985059968e-05, "loss": 0.6945, "step": 9463 }, { "epoch": 1.5449573486796457, "grad_norm": 2.5951595306396484, "learning_rate": 1.856336990609045e-05, "loss": 0.6748, "step": 9464 }, { "epoch": 1.5451206073221502, "grad_norm": 3.366947650909424, "learning_rate": 1.8563038791920155e-05, "loss": 0.8941, "step": 9465 }, { "epoch": 1.5452838659646546, "grad_norm": 2.4071927070617676, "learning_rate": 1.8562707642550442e-05, "loss": 0.6628, "step": 9466 }, { "epoch": 1.545447124607159, "grad_norm": 2.9241206645965576, "learning_rate": 1.8562376457982666e-05, "loss": 0.7062, "step": 9467 }, { "epoch": 1.5456103832496633, "grad_norm": 2.5278658866882324, "learning_rate": 1.8562045238218197e-05, "loss": 0.6756, "step": 9468 }, { "epoch": 1.5457736418921677, "grad_norm": 3.0680079460144043, "learning_rate": 1.8561713983258394e-05, "loss": 0.8775, "step": 9469 }, { "epoch": 1.545936900534672, "grad_norm": 2.72617244720459, "learning_rate": 1.8561382693104617e-05, "loss": 0.7355, "step": 9470 }, { "epoch": 1.5461001591771764, "grad_norm": 2.258880853652954, "learning_rate": 1.8561051367758226e-05, "loss": 0.5017, "step": 9471 }, { "epoch": 1.5462634178196808, "grad_norm": 2.662060022354126, "learning_rate": 1.8560720007220588e-05, "loss": 0.6591, "step": 9472 }, { "epoch": 1.5464266764621852, "grad_norm": 2.8290834426879883, "learning_rate": 1.8560388611493064e-05, "loss": 0.7089, "step": 9473 }, { "epoch": 1.5465899351046897, "grad_norm": 2.5618972778320312, "learning_rate": 1.856005718057701e-05, "loss": 0.6298, "step": 9474 }, { "epoch": 1.546753193747194, "grad_norm": 2.9525697231292725, "learning_rate": 1.8559725714473798e-05, "loss": 0.7712, "step": 9475 }, { "epoch": 1.5469164523896983, "grad_norm": 2.7429535388946533, "learning_rate": 1.8559394213184783e-05, "loss": 0.6109, "step": 9476 }, { "epoch": 1.5470797110322028, "grad_norm": 2.7039382457733154, "learning_rate": 1.855906267671133e-05, "loss": 0.808, "step": 9477 }, { "epoch": 1.5472429696747072, "grad_norm": 2.6224632263183594, "learning_rate": 1.8558731105054803e-05, "loss": 0.8055, "step": 9478 }, { "epoch": 1.5474062283172114, "grad_norm": 2.366337299346924, "learning_rate": 1.8558399498216566e-05, "loss": 0.5648, "step": 9479 }, { "epoch": 1.5475694869597159, "grad_norm": 2.791149854660034, "learning_rate": 1.855806785619798e-05, "loss": 0.7886, "step": 9480 }, { "epoch": 1.5477327456022203, "grad_norm": 2.9897336959838867, "learning_rate": 1.8557736179000405e-05, "loss": 0.763, "step": 9481 }, { "epoch": 1.5478960042447247, "grad_norm": 3.0593321323394775, "learning_rate": 1.855740446662521e-05, "loss": 0.857, "step": 9482 }, { "epoch": 1.5480592628872292, "grad_norm": 3.04656720161438, "learning_rate": 1.8557072719073756e-05, "loss": 0.7786, "step": 9483 }, { "epoch": 1.5482225215297336, "grad_norm": 2.72415828704834, "learning_rate": 1.855674093634741e-05, "loss": 0.6712, "step": 9484 }, { "epoch": 1.5483857801722378, "grad_norm": 3.215003252029419, "learning_rate": 1.855640911844753e-05, "loss": 0.9512, "step": 9485 }, { "epoch": 1.5485490388147423, "grad_norm": 2.6747913360595703, "learning_rate": 1.8556077265375484e-05, "loss": 0.638, "step": 9486 }, { "epoch": 1.5487122974572465, "grad_norm": 2.88873291015625, "learning_rate": 1.8555745377132633e-05, "loss": 0.6743, "step": 9487 }, { "epoch": 1.548875556099751, "grad_norm": 2.7317004203796387, "learning_rate": 1.8555413453720345e-05, "loss": 0.7215, "step": 9488 }, { "epoch": 1.5490388147422554, "grad_norm": 2.437176465988159, "learning_rate": 1.8555081495139983e-05, "loss": 0.6284, "step": 9489 }, { "epoch": 1.5492020733847598, "grad_norm": 2.8494997024536133, "learning_rate": 1.855474950139291e-05, "loss": 0.6695, "step": 9490 }, { "epoch": 1.5493653320272642, "grad_norm": 2.751739978790283, "learning_rate": 1.8554417472480493e-05, "loss": 0.7104, "step": 9491 }, { "epoch": 1.5495285906697687, "grad_norm": 2.9232285022735596, "learning_rate": 1.8554085408404096e-05, "loss": 0.674, "step": 9492 }, { "epoch": 1.549691849312273, "grad_norm": 2.810300827026367, "learning_rate": 1.8553753309165084e-05, "loss": 0.5834, "step": 9493 }, { "epoch": 1.5498551079547773, "grad_norm": 2.941209554672241, "learning_rate": 1.8553421174764823e-05, "loss": 0.7282, "step": 9494 }, { "epoch": 1.5500183665972818, "grad_norm": 2.6890265941619873, "learning_rate": 1.8553089005204675e-05, "loss": 0.6818, "step": 9495 }, { "epoch": 1.550181625239786, "grad_norm": 2.685011386871338, "learning_rate": 1.855275680048601e-05, "loss": 0.669, "step": 9496 }, { "epoch": 1.5503448838822904, "grad_norm": 2.9811806678771973, "learning_rate": 1.855242456061019e-05, "loss": 0.7085, "step": 9497 }, { "epoch": 1.5505081425247949, "grad_norm": 3.389681577682495, "learning_rate": 1.855209228557858e-05, "loss": 0.8255, "step": 9498 }, { "epoch": 1.5506714011672993, "grad_norm": 2.9489588737487793, "learning_rate": 1.855175997539255e-05, "loss": 0.7057, "step": 9499 }, { "epoch": 1.5508346598098037, "grad_norm": 3.295605182647705, "learning_rate": 1.8551427630053464e-05, "loss": 0.8047, "step": 9500 }, { "epoch": 1.5509979184523082, "grad_norm": 3.0322766304016113, "learning_rate": 1.8551095249562685e-05, "loss": 0.7435, "step": 9501 }, { "epoch": 1.5511611770948126, "grad_norm": 3.0456020832061768, "learning_rate": 1.8550762833921587e-05, "loss": 0.7998, "step": 9502 }, { "epoch": 1.5513244357373168, "grad_norm": 2.6837873458862305, "learning_rate": 1.855043038313153e-05, "loss": 0.6621, "step": 9503 }, { "epoch": 1.5514876943798213, "grad_norm": 2.5656607151031494, "learning_rate": 1.855009789719388e-05, "loss": 0.7324, "step": 9504 }, { "epoch": 1.5516509530223255, "grad_norm": 2.978372573852539, "learning_rate": 1.854976537611001e-05, "loss": 0.749, "step": 9505 }, { "epoch": 1.55181421166483, "grad_norm": 2.6487669944763184, "learning_rate": 1.854943281988128e-05, "loss": 0.6985, "step": 9506 }, { "epoch": 1.5519774703073344, "grad_norm": 2.4602715969085693, "learning_rate": 1.854910022850906e-05, "loss": 0.6252, "step": 9507 }, { "epoch": 1.5521407289498388, "grad_norm": 3.149824857711792, "learning_rate": 1.8548767601994716e-05, "loss": 0.7593, "step": 9508 }, { "epoch": 1.5523039875923432, "grad_norm": 2.614194393157959, "learning_rate": 1.854843494033962e-05, "loss": 0.6609, "step": 9509 }, { "epoch": 1.5524672462348477, "grad_norm": 2.5998623371124268, "learning_rate": 1.8548102243545135e-05, "loss": 0.7357, "step": 9510 }, { "epoch": 1.552630504877352, "grad_norm": 2.8014490604400635, "learning_rate": 1.8547769511612624e-05, "loss": 0.788, "step": 9511 }, { "epoch": 1.5527937635198563, "grad_norm": 2.2770814895629883, "learning_rate": 1.8547436744543468e-05, "loss": 0.5206, "step": 9512 }, { "epoch": 1.5529570221623608, "grad_norm": 2.7218101024627686, "learning_rate": 1.8547103942339023e-05, "loss": 0.6747, "step": 9513 }, { "epoch": 1.553120280804865, "grad_norm": 2.24733829498291, "learning_rate": 1.854677110500066e-05, "loss": 0.5573, "step": 9514 }, { "epoch": 1.5532835394473694, "grad_norm": 2.879807233810425, "learning_rate": 1.854643823252975e-05, "loss": 0.7782, "step": 9515 }, { "epoch": 1.5534467980898738, "grad_norm": 3.0260846614837646, "learning_rate": 1.8546105324927658e-05, "loss": 0.7942, "step": 9516 }, { "epoch": 1.5536100567323783, "grad_norm": 2.9472463130950928, "learning_rate": 1.8545772382195758e-05, "loss": 0.7828, "step": 9517 }, { "epoch": 1.5537733153748827, "grad_norm": 2.32773756980896, "learning_rate": 1.854543940433541e-05, "loss": 0.5671, "step": 9518 }, { "epoch": 1.5539365740173872, "grad_norm": 2.516798257827759, "learning_rate": 1.8545106391347988e-05, "loss": 0.6007, "step": 9519 }, { "epoch": 1.5540998326598914, "grad_norm": 2.810875654220581, "learning_rate": 1.8544773343234862e-05, "loss": 0.5897, "step": 9520 }, { "epoch": 1.5542630913023958, "grad_norm": 2.6869540214538574, "learning_rate": 1.8544440259997398e-05, "loss": 0.6488, "step": 9521 }, { "epoch": 1.5544263499449003, "grad_norm": 2.8129935264587402, "learning_rate": 1.8544107141636963e-05, "loss": 0.7464, "step": 9522 }, { "epoch": 1.5545896085874045, "grad_norm": 2.420344352722168, "learning_rate": 1.8543773988154935e-05, "loss": 0.5206, "step": 9523 }, { "epoch": 1.554752867229909, "grad_norm": 2.7147176265716553, "learning_rate": 1.8543440799552677e-05, "loss": 0.6348, "step": 9524 }, { "epoch": 1.5549161258724133, "grad_norm": 2.4353394508361816, "learning_rate": 1.854310757583156e-05, "loss": 0.5968, "step": 9525 }, { "epoch": 1.5550793845149178, "grad_norm": 3.0293707847595215, "learning_rate": 1.8542774316992953e-05, "loss": 0.7476, "step": 9526 }, { "epoch": 1.5552426431574222, "grad_norm": 3.2510244846343994, "learning_rate": 1.8542441023038228e-05, "loss": 0.8185, "step": 9527 }, { "epoch": 1.5554059017999267, "grad_norm": 3.2470529079437256, "learning_rate": 1.854210769396875e-05, "loss": 0.8707, "step": 9528 }, { "epoch": 1.5555691604424309, "grad_norm": 2.899961233139038, "learning_rate": 1.8541774329785896e-05, "loss": 0.7644, "step": 9529 }, { "epoch": 1.5557324190849353, "grad_norm": 3.213425874710083, "learning_rate": 1.854144093049103e-05, "loss": 0.7673, "step": 9530 }, { "epoch": 1.5558956777274395, "grad_norm": 2.7586071491241455, "learning_rate": 1.8541107496085527e-05, "loss": 0.7539, "step": 9531 }, { "epoch": 1.556058936369944, "grad_norm": 3.240288257598877, "learning_rate": 1.854077402657076e-05, "loss": 0.8551, "step": 9532 }, { "epoch": 1.5562221950124484, "grad_norm": 2.717820167541504, "learning_rate": 1.854044052194809e-05, "loss": 0.7099, "step": 9533 }, { "epoch": 1.5563854536549528, "grad_norm": 2.5193228721618652, "learning_rate": 1.8540106982218896e-05, "loss": 0.5212, "step": 9534 }, { "epoch": 1.5565487122974573, "grad_norm": 3.3435137271881104, "learning_rate": 1.853977340738455e-05, "loss": 0.6817, "step": 9535 }, { "epoch": 1.5567119709399617, "grad_norm": 2.9653003215789795, "learning_rate": 1.8539439797446416e-05, "loss": 0.7794, "step": 9536 }, { "epoch": 1.5568752295824662, "grad_norm": 3.059617519378662, "learning_rate": 1.853910615240587e-05, "loss": 0.7957, "step": 9537 }, { "epoch": 1.5570384882249704, "grad_norm": 3.0290019512176514, "learning_rate": 1.8538772472264283e-05, "loss": 0.7478, "step": 9538 }, { "epoch": 1.5572017468674748, "grad_norm": 2.775496482849121, "learning_rate": 1.8538438757023028e-05, "loss": 0.7132, "step": 9539 }, { "epoch": 1.557365005509979, "grad_norm": 2.8753528594970703, "learning_rate": 1.8538105006683474e-05, "loss": 0.6768, "step": 9540 }, { "epoch": 1.5575282641524835, "grad_norm": 3.1584300994873047, "learning_rate": 1.8537771221246993e-05, "loss": 0.919, "step": 9541 }, { "epoch": 1.557691522794988, "grad_norm": 3.115018129348755, "learning_rate": 1.853743740071496e-05, "loss": 0.8428, "step": 9542 }, { "epoch": 1.5578547814374923, "grad_norm": 2.5864953994750977, "learning_rate": 1.8537103545088748e-05, "loss": 0.7079, "step": 9543 }, { "epoch": 1.5580180400799968, "grad_norm": 2.286613702774048, "learning_rate": 1.8536769654369723e-05, "loss": 0.5818, "step": 9544 }, { "epoch": 1.5581812987225012, "grad_norm": 2.325591564178467, "learning_rate": 1.8536435728559262e-05, "loss": 0.6003, "step": 9545 }, { "epoch": 1.5583445573650057, "grad_norm": 2.6237826347351074, "learning_rate": 1.853610176765874e-05, "loss": 0.703, "step": 9546 }, { "epoch": 1.5585078160075099, "grad_norm": 2.620838165283203, "learning_rate": 1.8535767771669525e-05, "loss": 0.7069, "step": 9547 }, { "epoch": 1.5586710746500143, "grad_norm": 2.4660565853118896, "learning_rate": 1.853543374059299e-05, "loss": 0.6987, "step": 9548 }, { "epoch": 1.5588343332925185, "grad_norm": 2.7976572513580322, "learning_rate": 1.853509967443051e-05, "loss": 0.6636, "step": 9549 }, { "epoch": 1.558997591935023, "grad_norm": 2.6115782260894775, "learning_rate": 1.853476557318346e-05, "loss": 0.7255, "step": 9550 }, { "epoch": 1.5591608505775274, "grad_norm": 3.272778034210205, "learning_rate": 1.8534431436853212e-05, "loss": 0.8264, "step": 9551 }, { "epoch": 1.5593241092200318, "grad_norm": 2.703704833984375, "learning_rate": 1.8534097265441136e-05, "loss": 0.6193, "step": 9552 }, { "epoch": 1.5594873678625363, "grad_norm": 2.6800787448883057, "learning_rate": 1.853376305894861e-05, "loss": 0.606, "step": 9553 }, { "epoch": 1.5596506265050407, "grad_norm": 2.941765546798706, "learning_rate": 1.8533428817377007e-05, "loss": 0.7257, "step": 9554 }, { "epoch": 1.5598138851475452, "grad_norm": 2.683354139328003, "learning_rate": 1.85330945407277e-05, "loss": 0.6455, "step": 9555 }, { "epoch": 1.5599771437900494, "grad_norm": 2.4925756454467773, "learning_rate": 1.8532760229002066e-05, "loss": 0.623, "step": 9556 }, { "epoch": 1.5601404024325538, "grad_norm": 2.5728797912597656, "learning_rate": 1.853242588220147e-05, "loss": 0.5915, "step": 9557 }, { "epoch": 1.560303661075058, "grad_norm": 2.7715952396392822, "learning_rate": 1.8532091500327295e-05, "loss": 0.6204, "step": 9558 }, { "epoch": 1.5604669197175625, "grad_norm": 3.123725652694702, "learning_rate": 1.8531757083380917e-05, "loss": 0.8204, "step": 9559 }, { "epoch": 1.560630178360067, "grad_norm": 2.8862993717193604, "learning_rate": 1.8531422631363706e-05, "loss": 0.7616, "step": 9560 }, { "epoch": 1.5607934370025713, "grad_norm": 2.96451997756958, "learning_rate": 1.853108814427704e-05, "loss": 0.582, "step": 9561 }, { "epoch": 1.5609566956450758, "grad_norm": 2.9269351959228516, "learning_rate": 1.8530753622122288e-05, "loss": 0.7127, "step": 9562 }, { "epoch": 1.5611199542875802, "grad_norm": 3.035771369934082, "learning_rate": 1.853041906490083e-05, "loss": 0.7449, "step": 9563 }, { "epoch": 1.5612832129300847, "grad_norm": 2.85050892829895, "learning_rate": 1.853008447261404e-05, "loss": 0.6885, "step": 9564 }, { "epoch": 1.5614464715725889, "grad_norm": 2.7266290187835693, "learning_rate": 1.8529749845263293e-05, "loss": 0.8184, "step": 9565 }, { "epoch": 1.5616097302150933, "grad_norm": 2.7265076637268066, "learning_rate": 1.8529415182849968e-05, "loss": 0.6999, "step": 9566 }, { "epoch": 1.5617729888575975, "grad_norm": 2.3387563228607178, "learning_rate": 1.8529080485375436e-05, "loss": 0.6295, "step": 9567 }, { "epoch": 1.561936247500102, "grad_norm": 2.727926015853882, "learning_rate": 1.8528745752841075e-05, "loss": 0.6544, "step": 9568 }, { "epoch": 1.5620995061426064, "grad_norm": 2.6798582077026367, "learning_rate": 1.852841098524826e-05, "loss": 0.8022, "step": 9569 }, { "epoch": 1.5622627647851108, "grad_norm": 3.153455972671509, "learning_rate": 1.852807618259837e-05, "loss": 0.7289, "step": 9570 }, { "epoch": 1.5624260234276153, "grad_norm": 3.0537242889404297, "learning_rate": 1.8527741344892776e-05, "loss": 0.7284, "step": 9571 }, { "epoch": 1.5625892820701197, "grad_norm": 2.275728702545166, "learning_rate": 1.852740647213286e-05, "loss": 0.5997, "step": 9572 }, { "epoch": 1.562752540712624, "grad_norm": 2.7162387371063232, "learning_rate": 1.8527071564319992e-05, "loss": 0.6668, "step": 9573 }, { "epoch": 1.5629157993551284, "grad_norm": 2.9166853427886963, "learning_rate": 1.8526736621455556e-05, "loss": 0.6844, "step": 9574 }, { "epoch": 1.5630790579976328, "grad_norm": 2.93760085105896, "learning_rate": 1.8526401643540924e-05, "loss": 0.6638, "step": 9575 }, { "epoch": 1.563242316640137, "grad_norm": 3.4274675846099854, "learning_rate": 1.8526066630577475e-05, "loss": 0.8234, "step": 9576 }, { "epoch": 1.5634055752826415, "grad_norm": 3.21376371383667, "learning_rate": 1.8525731582566587e-05, "loss": 0.6755, "step": 9577 }, { "epoch": 1.563568833925146, "grad_norm": 2.365231513977051, "learning_rate": 1.852539649950963e-05, "loss": 0.5362, "step": 9578 }, { "epoch": 1.5637320925676503, "grad_norm": 2.080252170562744, "learning_rate": 1.8525061381407992e-05, "loss": 0.4982, "step": 9579 }, { "epoch": 1.5638953512101548, "grad_norm": 2.660510778427124, "learning_rate": 1.8524726228263047e-05, "loss": 0.6251, "step": 9580 }, { "epoch": 1.5640586098526592, "grad_norm": 2.7733190059661865, "learning_rate": 1.852439104007617e-05, "loss": 0.8339, "step": 9581 }, { "epoch": 1.5642218684951634, "grad_norm": 3.263810634613037, "learning_rate": 1.8524055816848736e-05, "loss": 0.7117, "step": 9582 }, { "epoch": 1.5643851271376679, "grad_norm": 2.519052743911743, "learning_rate": 1.8523720558582133e-05, "loss": 0.5874, "step": 9583 }, { "epoch": 1.564548385780172, "grad_norm": 2.6043412685394287, "learning_rate": 1.8523385265277727e-05, "loss": 0.6854, "step": 9584 }, { "epoch": 1.5647116444226765, "grad_norm": 2.642284631729126, "learning_rate": 1.852304993693691e-05, "loss": 0.7486, "step": 9585 }, { "epoch": 1.564874903065181, "grad_norm": 2.6762866973876953, "learning_rate": 1.852271457356105e-05, "loss": 0.6529, "step": 9586 }, { "epoch": 1.5650381617076854, "grad_norm": 2.355055093765259, "learning_rate": 1.8522379175151526e-05, "loss": 0.6202, "step": 9587 }, { "epoch": 1.5652014203501898, "grad_norm": 2.5801830291748047, "learning_rate": 1.852204374170972e-05, "loss": 0.7267, "step": 9588 }, { "epoch": 1.5653646789926943, "grad_norm": 2.653479814529419, "learning_rate": 1.852170827323701e-05, "loss": 0.7144, "step": 9589 }, { "epoch": 1.5655279376351987, "grad_norm": 3.1546385288238525, "learning_rate": 1.8521372769734776e-05, "loss": 0.8894, "step": 9590 }, { "epoch": 1.565691196277703, "grad_norm": 2.5513930320739746, "learning_rate": 1.8521037231204398e-05, "loss": 0.6534, "step": 9591 }, { "epoch": 1.5658544549202074, "grad_norm": 2.9782657623291016, "learning_rate": 1.8520701657647253e-05, "loss": 0.7379, "step": 9592 }, { "epoch": 1.5660177135627116, "grad_norm": 2.803708791732788, "learning_rate": 1.852036604906472e-05, "loss": 0.5985, "step": 9593 }, { "epoch": 1.566180972205216, "grad_norm": 3.135662078857422, "learning_rate": 1.8520030405458176e-05, "loss": 0.6836, "step": 9594 }, { "epoch": 1.5663442308477205, "grad_norm": 2.6121726036071777, "learning_rate": 1.8519694726829006e-05, "loss": 0.5728, "step": 9595 }, { "epoch": 1.566507489490225, "grad_norm": 2.601655960083008, "learning_rate": 1.851935901317859e-05, "loss": 0.5963, "step": 9596 }, { "epoch": 1.5666707481327293, "grad_norm": 2.9049501419067383, "learning_rate": 1.8519023264508304e-05, "loss": 0.6819, "step": 9597 }, { "epoch": 1.5668340067752338, "grad_norm": 1.8842782974243164, "learning_rate": 1.851868748081953e-05, "loss": 0.4473, "step": 9598 }, { "epoch": 1.5669972654177382, "grad_norm": 2.691620349884033, "learning_rate": 1.8518351662113652e-05, "loss": 0.6228, "step": 9599 }, { "epoch": 1.5671605240602424, "grad_norm": 2.2181127071380615, "learning_rate": 1.8518015808392045e-05, "loss": 0.5725, "step": 9600 }, { "epoch": 1.5673237827027469, "grad_norm": 2.50907826423645, "learning_rate": 1.8517679919656088e-05, "loss": 0.6294, "step": 9601 }, { "epoch": 1.567487041345251, "grad_norm": 2.8533835411071777, "learning_rate": 1.8517343995907167e-05, "loss": 0.7487, "step": 9602 }, { "epoch": 1.5676502999877555, "grad_norm": 2.6168503761291504, "learning_rate": 1.851700803714666e-05, "loss": 0.635, "step": 9603 }, { "epoch": 1.56781355863026, "grad_norm": 2.1010961532592773, "learning_rate": 1.8516672043375953e-05, "loss": 0.5521, "step": 9604 }, { "epoch": 1.5679768172727644, "grad_norm": 2.7945899963378906, "learning_rate": 1.851633601459642e-05, "loss": 0.6807, "step": 9605 }, { "epoch": 1.5681400759152688, "grad_norm": 2.463322639465332, "learning_rate": 1.8515999950809446e-05, "loss": 0.5505, "step": 9606 }, { "epoch": 1.5683033345577733, "grad_norm": 2.8828012943267822, "learning_rate": 1.8515663852016413e-05, "loss": 0.6106, "step": 9607 }, { "epoch": 1.5684665932002777, "grad_norm": 3.015273332595825, "learning_rate": 1.85153277182187e-05, "loss": 0.7208, "step": 9608 }, { "epoch": 1.568629851842782, "grad_norm": 4.971329212188721, "learning_rate": 1.851499154941769e-05, "loss": 0.6381, "step": 9609 }, { "epoch": 1.5687931104852864, "grad_norm": 2.8577382564544678, "learning_rate": 1.8514655345614766e-05, "loss": 0.7893, "step": 9610 }, { "epoch": 1.5689563691277906, "grad_norm": 2.5160233974456787, "learning_rate": 1.8514319106811304e-05, "loss": 0.5316, "step": 9611 }, { "epoch": 1.569119627770295, "grad_norm": 2.844679832458496, "learning_rate": 1.8513982833008697e-05, "loss": 0.6441, "step": 9612 }, { "epoch": 1.5692828864127994, "grad_norm": 2.8052761554718018, "learning_rate": 1.851364652420832e-05, "loss": 0.6672, "step": 9613 }, { "epoch": 1.5694461450553039, "grad_norm": 2.4343180656433105, "learning_rate": 1.8513310180411555e-05, "loss": 0.5543, "step": 9614 }, { "epoch": 1.5696094036978083, "grad_norm": 2.924612045288086, "learning_rate": 1.851297380161979e-05, "loss": 0.6645, "step": 9615 }, { "epoch": 1.5697726623403128, "grad_norm": 3.0627169609069824, "learning_rate": 1.8512637387834402e-05, "loss": 0.7343, "step": 9616 }, { "epoch": 1.569935920982817, "grad_norm": 2.6173524856567383, "learning_rate": 1.8512300939056774e-05, "loss": 0.7248, "step": 9617 }, { "epoch": 1.5700991796253214, "grad_norm": 2.505039930343628, "learning_rate": 1.8511964455288293e-05, "loss": 0.6405, "step": 9618 }, { "epoch": 1.5702624382678259, "grad_norm": 2.8704962730407715, "learning_rate": 1.851162793653034e-05, "loss": 0.6608, "step": 9619 }, { "epoch": 1.57042569691033, "grad_norm": 2.7614243030548096, "learning_rate": 1.85112913827843e-05, "loss": 0.6713, "step": 9620 }, { "epoch": 1.5705889555528345, "grad_norm": 2.8965137004852295, "learning_rate": 1.8510954794051552e-05, "loss": 0.6963, "step": 9621 }, { "epoch": 1.570752214195339, "grad_norm": 3.1240506172180176, "learning_rate": 1.8510618170333485e-05, "loss": 0.8117, "step": 9622 }, { "epoch": 1.5709154728378434, "grad_norm": 2.8942387104034424, "learning_rate": 1.8510281511631477e-05, "loss": 0.7083, "step": 9623 }, { "epoch": 1.5710787314803478, "grad_norm": 2.5030128955841064, "learning_rate": 1.850994481794692e-05, "loss": 0.5663, "step": 9624 }, { "epoch": 1.5712419901228523, "grad_norm": 2.7024917602539062, "learning_rate": 1.850960808928119e-05, "loss": 0.6094, "step": 9625 }, { "epoch": 1.5714052487653565, "grad_norm": 2.7417356967926025, "learning_rate": 1.8509271325635677e-05, "loss": 0.7968, "step": 9626 }, { "epoch": 1.571568507407861, "grad_norm": 2.517094373703003, "learning_rate": 1.8508934527011758e-05, "loss": 0.6872, "step": 9627 }, { "epoch": 1.5717317660503651, "grad_norm": 2.699000120162964, "learning_rate": 1.8508597693410826e-05, "loss": 0.8053, "step": 9628 }, { "epoch": 1.5718950246928696, "grad_norm": 2.7657253742218018, "learning_rate": 1.850826082483426e-05, "loss": 0.7254, "step": 9629 }, { "epoch": 1.572058283335374, "grad_norm": 3.065819263458252, "learning_rate": 1.8507923921283448e-05, "loss": 0.6962, "step": 9630 }, { "epoch": 1.5722215419778784, "grad_norm": 3.0363874435424805, "learning_rate": 1.8507586982759773e-05, "loss": 0.8213, "step": 9631 }, { "epoch": 1.5723848006203829, "grad_norm": 2.7938599586486816, "learning_rate": 1.8507250009264617e-05, "loss": 0.6138, "step": 9632 }, { "epoch": 1.5725480592628873, "grad_norm": 2.2466626167297363, "learning_rate": 1.850691300079937e-05, "loss": 0.5495, "step": 9633 }, { "epoch": 1.5727113179053918, "grad_norm": 3.0675885677337646, "learning_rate": 1.8506575957365418e-05, "loss": 0.7764, "step": 9634 }, { "epoch": 1.572874576547896, "grad_norm": 2.6563854217529297, "learning_rate": 1.8506238878964144e-05, "loss": 0.6469, "step": 9635 }, { "epoch": 1.5730378351904004, "grad_norm": 2.6515724658966064, "learning_rate": 1.850590176559693e-05, "loss": 0.633, "step": 9636 }, { "epoch": 1.5732010938329046, "grad_norm": 2.4077565670013428, "learning_rate": 1.850556461726517e-05, "loss": 0.5951, "step": 9637 }, { "epoch": 1.573364352475409, "grad_norm": 2.810105800628662, "learning_rate": 1.8505227433970243e-05, "loss": 0.7982, "step": 9638 }, { "epoch": 1.5735276111179135, "grad_norm": 3.52764892578125, "learning_rate": 1.8504890215713537e-05, "loss": 0.7671, "step": 9639 }, { "epoch": 1.573690869760418, "grad_norm": 2.9850268363952637, "learning_rate": 1.850455296249644e-05, "loss": 0.7908, "step": 9640 }, { "epoch": 1.5738541284029224, "grad_norm": 2.6182684898376465, "learning_rate": 1.850421567432034e-05, "loss": 0.6671, "step": 9641 }, { "epoch": 1.5740173870454268, "grad_norm": 3.7131993770599365, "learning_rate": 1.8503878351186617e-05, "loss": 0.917, "step": 9642 }, { "epoch": 1.5741806456879313, "grad_norm": 2.7896676063537598, "learning_rate": 1.8503540993096663e-05, "loss": 0.6999, "step": 9643 }, { "epoch": 1.5743439043304355, "grad_norm": 2.594702959060669, "learning_rate": 1.8503203600051863e-05, "loss": 0.6674, "step": 9644 }, { "epoch": 1.57450716297294, "grad_norm": 2.5810399055480957, "learning_rate": 1.85028661720536e-05, "loss": 0.6288, "step": 9645 }, { "epoch": 1.5746704216154441, "grad_norm": 3.0804662704467773, "learning_rate": 1.850252870910327e-05, "loss": 0.7774, "step": 9646 }, { "epoch": 1.5748336802579486, "grad_norm": 2.7801513671875, "learning_rate": 1.850219121120225e-05, "loss": 0.7669, "step": 9647 }, { "epoch": 1.574996938900453, "grad_norm": 2.9139583110809326, "learning_rate": 1.8501853678351934e-05, "loss": 0.7342, "step": 9648 }, { "epoch": 1.5751601975429574, "grad_norm": 2.161100149154663, "learning_rate": 1.8501516110553706e-05, "loss": 0.4958, "step": 9649 }, { "epoch": 1.5753234561854619, "grad_norm": 1.934377908706665, "learning_rate": 1.8501178507808962e-05, "loss": 0.4903, "step": 9650 }, { "epoch": 1.5754867148279663, "grad_norm": 2.5151331424713135, "learning_rate": 1.8500840870119078e-05, "loss": 0.579, "step": 9651 }, { "epoch": 1.5756499734704708, "grad_norm": 2.3597967624664307, "learning_rate": 1.8500503197485445e-05, "loss": 0.6591, "step": 9652 }, { "epoch": 1.575813232112975, "grad_norm": 2.633655548095703, "learning_rate": 1.8500165489909458e-05, "loss": 0.5917, "step": 9653 }, { "epoch": 1.5759764907554794, "grad_norm": 2.5097715854644775, "learning_rate": 1.84998277473925e-05, "loss": 0.7029, "step": 9654 }, { "epoch": 1.5761397493979836, "grad_norm": 2.8751604557037354, "learning_rate": 1.849948996993596e-05, "loss": 0.7888, "step": 9655 }, { "epoch": 1.576303008040488, "grad_norm": 2.600362777709961, "learning_rate": 1.849915215754122e-05, "loss": 0.6272, "step": 9656 }, { "epoch": 1.5764662666829925, "grad_norm": 2.7513256072998047, "learning_rate": 1.8498814310209677e-05, "loss": 0.6721, "step": 9657 }, { "epoch": 1.576629525325497, "grad_norm": 2.7734529972076416, "learning_rate": 1.8498476427942722e-05, "loss": 0.6631, "step": 9658 }, { "epoch": 1.5767927839680014, "grad_norm": 2.373002052307129, "learning_rate": 1.8498138510741737e-05, "loss": 0.5903, "step": 9659 }, { "epoch": 1.5769560426105058, "grad_norm": 2.3363184928894043, "learning_rate": 1.8497800558608113e-05, "loss": 0.5528, "step": 9660 }, { "epoch": 1.57711930125301, "grad_norm": 2.431281328201294, "learning_rate": 1.849746257154324e-05, "loss": 0.6072, "step": 9661 }, { "epoch": 1.5772825598955145, "grad_norm": 2.612631320953369, "learning_rate": 1.8497124549548503e-05, "loss": 0.631, "step": 9662 }, { "epoch": 1.577445818538019, "grad_norm": 3.07309889793396, "learning_rate": 1.84967864926253e-05, "loss": 0.6401, "step": 9663 }, { "epoch": 1.5776090771805231, "grad_norm": 2.847990036010742, "learning_rate": 1.8496448400775017e-05, "loss": 0.7001, "step": 9664 }, { "epoch": 1.5777723358230276, "grad_norm": 2.639427661895752, "learning_rate": 1.849611027399904e-05, "loss": 0.587, "step": 9665 }, { "epoch": 1.577935594465532, "grad_norm": 2.7771670818328857, "learning_rate": 1.849577211229876e-05, "loss": 0.7231, "step": 9666 }, { "epoch": 1.5780988531080364, "grad_norm": 3.2617971897125244, "learning_rate": 1.8495433915675575e-05, "loss": 0.6177, "step": 9667 }, { "epoch": 1.5782621117505409, "grad_norm": 2.9478490352630615, "learning_rate": 1.8495095684130864e-05, "loss": 0.7068, "step": 9668 }, { "epoch": 1.5784253703930453, "grad_norm": 3.4098165035247803, "learning_rate": 1.8494757417666024e-05, "loss": 0.9705, "step": 9669 }, { "epoch": 1.5785886290355495, "grad_norm": 2.912846565246582, "learning_rate": 1.8494419116282444e-05, "loss": 0.7285, "step": 9670 }, { "epoch": 1.578751887678054, "grad_norm": 2.8307483196258545, "learning_rate": 1.8494080779981512e-05, "loss": 0.8467, "step": 9671 }, { "epoch": 1.5789151463205582, "grad_norm": 3.524796962738037, "learning_rate": 1.8493742408764623e-05, "loss": 0.8629, "step": 9672 }, { "epoch": 1.5790784049630626, "grad_norm": 2.9293782711029053, "learning_rate": 1.8493404002633167e-05, "loss": 0.6978, "step": 9673 }, { "epoch": 1.579241663605567, "grad_norm": 3.2084271907806396, "learning_rate": 1.849306556158853e-05, "loss": 0.7208, "step": 9674 }, { "epoch": 1.5794049222480715, "grad_norm": 2.688478469848633, "learning_rate": 1.8492727085632114e-05, "loss": 0.6496, "step": 9675 }, { "epoch": 1.579568180890576, "grad_norm": 2.6461904048919678, "learning_rate": 1.8492388574765302e-05, "loss": 0.6722, "step": 9676 }, { "epoch": 1.5797314395330804, "grad_norm": 2.5354325771331787, "learning_rate": 1.8492050028989484e-05, "loss": 0.7026, "step": 9677 }, { "epoch": 1.5798946981755848, "grad_norm": 3.222039222717285, "learning_rate": 1.8491711448306056e-05, "loss": 0.8998, "step": 9678 }, { "epoch": 1.580057956818089, "grad_norm": 2.8970463275909424, "learning_rate": 1.849137283271641e-05, "loss": 0.7033, "step": 9679 }, { "epoch": 1.5802212154605935, "grad_norm": 2.7910332679748535, "learning_rate": 1.849103418222194e-05, "loss": 0.8081, "step": 9680 }, { "epoch": 1.5803844741030977, "grad_norm": 2.914552688598633, "learning_rate": 1.849069549682403e-05, "loss": 0.753, "step": 9681 }, { "epoch": 1.5805477327456021, "grad_norm": 2.3734357357025146, "learning_rate": 1.8490356776524073e-05, "loss": 0.5976, "step": 9682 }, { "epoch": 1.5807109913881066, "grad_norm": 2.386587142944336, "learning_rate": 1.8490018021323472e-05, "loss": 0.601, "step": 9683 }, { "epoch": 1.580874250030611, "grad_norm": 3.114729404449463, "learning_rate": 1.848967923122361e-05, "loss": 0.8291, "step": 9684 }, { "epoch": 1.5810375086731154, "grad_norm": 2.8613710403442383, "learning_rate": 1.8489340406225884e-05, "loss": 0.7374, "step": 9685 }, { "epoch": 1.5812007673156199, "grad_norm": 2.6629161834716797, "learning_rate": 1.8489001546331682e-05, "loss": 0.6509, "step": 9686 }, { "epoch": 1.5813640259581243, "grad_norm": 2.3418805599212646, "learning_rate": 1.8488662651542402e-05, "loss": 0.69, "step": 9687 }, { "epoch": 1.5815272846006285, "grad_norm": 2.397838592529297, "learning_rate": 1.8488323721859435e-05, "loss": 0.6064, "step": 9688 }, { "epoch": 1.581690543243133, "grad_norm": 2.6462619304656982, "learning_rate": 1.8487984757284175e-05, "loss": 0.6555, "step": 9689 }, { "epoch": 1.5818538018856372, "grad_norm": 2.75067400932312, "learning_rate": 1.8487645757818015e-05, "loss": 0.7424, "step": 9690 }, { "epoch": 1.5820170605281416, "grad_norm": 2.6779839992523193, "learning_rate": 1.8487306723462344e-05, "loss": 0.7467, "step": 9691 }, { "epoch": 1.582180319170646, "grad_norm": 2.655726432800293, "learning_rate": 1.8486967654218566e-05, "loss": 0.6718, "step": 9692 }, { "epoch": 1.5823435778131505, "grad_norm": 2.7162532806396484, "learning_rate": 1.8486628550088065e-05, "loss": 0.7279, "step": 9693 }, { "epoch": 1.582506836455655, "grad_norm": 2.283888339996338, "learning_rate": 1.848628941107224e-05, "loss": 0.5419, "step": 9694 }, { "epoch": 1.5826700950981594, "grad_norm": 2.754581928253174, "learning_rate": 1.8485950237172483e-05, "loss": 0.5778, "step": 9695 }, { "epoch": 1.5828333537406638, "grad_norm": 2.688795566558838, "learning_rate": 1.8485611028390188e-05, "loss": 0.643, "step": 9696 }, { "epoch": 1.582996612383168, "grad_norm": 3.0771682262420654, "learning_rate": 1.8485271784726747e-05, "loss": 0.7546, "step": 9697 }, { "epoch": 1.5831598710256725, "grad_norm": 3.0471558570861816, "learning_rate": 1.8484932506183563e-05, "loss": 0.8542, "step": 9698 }, { "epoch": 1.5833231296681767, "grad_norm": 2.5062954425811768, "learning_rate": 1.8484593192762025e-05, "loss": 0.5447, "step": 9699 }, { "epoch": 1.5834863883106811, "grad_norm": 2.498892068862915, "learning_rate": 1.8484253844463527e-05, "loss": 0.6231, "step": 9700 }, { "epoch": 1.5836496469531856, "grad_norm": 2.913785934448242, "learning_rate": 1.8483914461289463e-05, "loss": 0.7235, "step": 9701 }, { "epoch": 1.58381290559569, "grad_norm": 2.694182872772217, "learning_rate": 1.8483575043241234e-05, "loss": 0.7485, "step": 9702 }, { "epoch": 1.5839761642381944, "grad_norm": 2.6136302947998047, "learning_rate": 1.8483235590320228e-05, "loss": 0.6758, "step": 9703 }, { "epoch": 1.5841394228806989, "grad_norm": 2.9038069248199463, "learning_rate": 1.8482896102527844e-05, "loss": 0.7529, "step": 9704 }, { "epoch": 1.584302681523203, "grad_norm": 2.6820976734161377, "learning_rate": 1.848255657986548e-05, "loss": 0.6389, "step": 9705 }, { "epoch": 1.5844659401657075, "grad_norm": 2.7465317249298096, "learning_rate": 1.8482217022334525e-05, "loss": 0.7484, "step": 9706 }, { "epoch": 1.584629198808212, "grad_norm": 2.6831233501434326, "learning_rate": 1.848187742993638e-05, "loss": 0.7067, "step": 9707 }, { "epoch": 1.5847924574507162, "grad_norm": 3.015145778656006, "learning_rate": 1.8481537802672443e-05, "loss": 0.7777, "step": 9708 }, { "epoch": 1.5849557160932206, "grad_norm": 2.7456836700439453, "learning_rate": 1.8481198140544104e-05, "loss": 0.7135, "step": 9709 }, { "epoch": 1.585118974735725, "grad_norm": 3.5668392181396484, "learning_rate": 1.8480858443552763e-05, "loss": 0.8733, "step": 9710 }, { "epoch": 1.5852822333782295, "grad_norm": 2.912895679473877, "learning_rate": 1.8480518711699813e-05, "loss": 0.6989, "step": 9711 }, { "epoch": 1.585445492020734, "grad_norm": 2.8335869312286377, "learning_rate": 1.8480178944986656e-05, "loss": 0.6789, "step": 9712 }, { "epoch": 1.5856087506632384, "grad_norm": 2.3913071155548096, "learning_rate": 1.847983914341468e-05, "loss": 0.6109, "step": 9713 }, { "epoch": 1.5857720093057426, "grad_norm": 2.9139602184295654, "learning_rate": 1.847949930698529e-05, "loss": 0.7678, "step": 9714 }, { "epoch": 1.585935267948247, "grad_norm": 3.09609317779541, "learning_rate": 1.8479159435699883e-05, "loss": 0.7777, "step": 9715 }, { "epoch": 1.5860985265907512, "grad_norm": 2.8512561321258545, "learning_rate": 1.847881952955985e-05, "loss": 0.6196, "step": 9716 }, { "epoch": 1.5862617852332557, "grad_norm": 2.7900753021240234, "learning_rate": 1.8478479588566595e-05, "loss": 0.7134, "step": 9717 }, { "epoch": 1.58642504387576, "grad_norm": 3.24993634223938, "learning_rate": 1.8478139612721505e-05, "loss": 0.8891, "step": 9718 }, { "epoch": 1.5865883025182645, "grad_norm": 2.865652322769165, "learning_rate": 1.847779960202599e-05, "loss": 0.6875, "step": 9719 }, { "epoch": 1.586751561160769, "grad_norm": 2.641613245010376, "learning_rate": 1.847745955648144e-05, "loss": 0.6936, "step": 9720 }, { "epoch": 1.5869148198032734, "grad_norm": 2.634474754333496, "learning_rate": 1.8477119476089256e-05, "loss": 0.663, "step": 9721 }, { "epoch": 1.5870780784457779, "grad_norm": 2.6977782249450684, "learning_rate": 1.8476779360850833e-05, "loss": 0.7745, "step": 9722 }, { "epoch": 1.587241337088282, "grad_norm": 2.4392905235290527, "learning_rate": 1.8476439210767572e-05, "loss": 0.6422, "step": 9723 }, { "epoch": 1.5874045957307865, "grad_norm": 2.634849786758423, "learning_rate": 1.847609902584087e-05, "loss": 0.6933, "step": 9724 }, { "epoch": 1.5875678543732907, "grad_norm": 2.41943097114563, "learning_rate": 1.8475758806072127e-05, "loss": 0.6136, "step": 9725 }, { "epoch": 1.5877311130157952, "grad_norm": 2.7689661979675293, "learning_rate": 1.8475418551462737e-05, "loss": 0.6507, "step": 9726 }, { "epoch": 1.5878943716582996, "grad_norm": 2.9401800632476807, "learning_rate": 1.8475078262014103e-05, "loss": 0.7161, "step": 9727 }, { "epoch": 1.588057630300804, "grad_norm": 2.54299259185791, "learning_rate": 1.8474737937727624e-05, "loss": 0.6526, "step": 9728 }, { "epoch": 1.5882208889433085, "grad_norm": 2.744746446609497, "learning_rate": 1.8474397578604695e-05, "loss": 0.724, "step": 9729 }, { "epoch": 1.588384147585813, "grad_norm": 3.0541880130767822, "learning_rate": 1.8474057184646717e-05, "loss": 0.7476, "step": 9730 }, { "epoch": 1.5885474062283174, "grad_norm": 2.691657781600952, "learning_rate": 1.847371675585509e-05, "loss": 0.6637, "step": 9731 }, { "epoch": 1.5887106648708216, "grad_norm": 3.1336405277252197, "learning_rate": 1.8473376292231215e-05, "loss": 0.9332, "step": 9732 }, { "epoch": 1.588873923513326, "grad_norm": 2.504350185394287, "learning_rate": 1.847303579377649e-05, "loss": 0.6146, "step": 9733 }, { "epoch": 1.5890371821558302, "grad_norm": 2.5372202396392822, "learning_rate": 1.847269526049231e-05, "loss": 0.7975, "step": 9734 }, { "epoch": 1.5892004407983347, "grad_norm": 3.0420351028442383, "learning_rate": 1.847235469238008e-05, "loss": 0.7145, "step": 9735 }, { "epoch": 1.589363699440839, "grad_norm": 2.8972675800323486, "learning_rate": 1.84720140894412e-05, "loss": 0.7264, "step": 9736 }, { "epoch": 1.5895269580833435, "grad_norm": 2.3804097175598145, "learning_rate": 1.847167345167707e-05, "loss": 0.6133, "step": 9737 }, { "epoch": 1.589690216725848, "grad_norm": 3.102524995803833, "learning_rate": 1.8471332779089092e-05, "loss": 0.809, "step": 9738 }, { "epoch": 1.5898534753683524, "grad_norm": 2.218897581100464, "learning_rate": 1.847099207167866e-05, "loss": 0.6018, "step": 9739 }, { "epoch": 1.5900167340108569, "grad_norm": 2.6743130683898926, "learning_rate": 1.847065132944718e-05, "loss": 0.7864, "step": 9740 }, { "epoch": 1.590179992653361, "grad_norm": 2.5497169494628906, "learning_rate": 1.847031055239605e-05, "loss": 0.6877, "step": 9741 }, { "epoch": 1.5903432512958655, "grad_norm": 3.067747116088867, "learning_rate": 1.8469969740526668e-05, "loss": 0.6985, "step": 9742 }, { "epoch": 1.5905065099383697, "grad_norm": 2.6899592876434326, "learning_rate": 1.8469628893840442e-05, "loss": 0.7035, "step": 9743 }, { "epoch": 1.5906697685808742, "grad_norm": 2.86630916595459, "learning_rate": 1.8469288012338773e-05, "loss": 0.8255, "step": 9744 }, { "epoch": 1.5908330272233786, "grad_norm": 2.7257156372070312, "learning_rate": 1.8468947096023054e-05, "loss": 0.7486, "step": 9745 }, { "epoch": 1.590996285865883, "grad_norm": 2.350172519683838, "learning_rate": 1.846860614489469e-05, "loss": 0.5813, "step": 9746 }, { "epoch": 1.5911595445083875, "grad_norm": 3.019585132598877, "learning_rate": 1.846826515895509e-05, "loss": 0.8001, "step": 9747 }, { "epoch": 1.591322803150892, "grad_norm": 2.5753774642944336, "learning_rate": 1.8467924138205645e-05, "loss": 0.7011, "step": 9748 }, { "epoch": 1.5914860617933961, "grad_norm": 3.0446197986602783, "learning_rate": 1.8467583082647765e-05, "loss": 0.6905, "step": 9749 }, { "epoch": 1.5916493204359006, "grad_norm": 2.7794177532196045, "learning_rate": 1.8467241992282842e-05, "loss": 0.6605, "step": 9750 }, { "epoch": 1.591812579078405, "grad_norm": 2.8180015087127686, "learning_rate": 1.846690086711229e-05, "loss": 0.6187, "step": 9751 }, { "epoch": 1.5919758377209092, "grad_norm": 2.652275323867798, "learning_rate": 1.8466559707137502e-05, "loss": 0.6627, "step": 9752 }, { "epoch": 1.5921390963634137, "grad_norm": 2.85196852684021, "learning_rate": 1.8466218512359888e-05, "loss": 0.7324, "step": 9753 }, { "epoch": 1.592302355005918, "grad_norm": 2.5587985515594482, "learning_rate": 1.846587728278084e-05, "loss": 0.6378, "step": 9754 }, { "epoch": 1.5924656136484225, "grad_norm": 2.978450298309326, "learning_rate": 1.8465536018401774e-05, "loss": 0.8042, "step": 9755 }, { "epoch": 1.592628872290927, "grad_norm": 3.0709924697875977, "learning_rate": 1.8465194719224082e-05, "loss": 0.7533, "step": 9756 }, { "epoch": 1.5927921309334314, "grad_norm": 2.5195906162261963, "learning_rate": 1.8464853385249173e-05, "loss": 0.5359, "step": 9757 }, { "epoch": 1.5929553895759356, "grad_norm": 2.989854574203491, "learning_rate": 1.8464512016478445e-05, "loss": 0.7507, "step": 9758 }, { "epoch": 1.59311864821844, "grad_norm": 2.6986191272735596, "learning_rate": 1.846417061291331e-05, "loss": 0.5606, "step": 9759 }, { "epoch": 1.5932819068609443, "grad_norm": 2.432709217071533, "learning_rate": 1.8463829174555158e-05, "loss": 0.5989, "step": 9760 }, { "epoch": 1.5934451655034487, "grad_norm": 2.470811367034912, "learning_rate": 1.8463487701405407e-05, "loss": 0.6139, "step": 9761 }, { "epoch": 1.5936084241459532, "grad_norm": 2.559080123901367, "learning_rate": 1.846314619346545e-05, "loss": 0.6011, "step": 9762 }, { "epoch": 1.5937716827884576, "grad_norm": 2.825549840927124, "learning_rate": 1.8462804650736697e-05, "loss": 0.7414, "step": 9763 }, { "epoch": 1.593934941430962, "grad_norm": 2.6949639320373535, "learning_rate": 1.8462463073220547e-05, "loss": 0.6839, "step": 9764 }, { "epoch": 1.5940982000734665, "grad_norm": 2.3784754276275635, "learning_rate": 1.8462121460918407e-05, "loss": 0.654, "step": 9765 }, { "epoch": 1.594261458715971, "grad_norm": 2.241790533065796, "learning_rate": 1.846177981383168e-05, "loss": 0.5807, "step": 9766 }, { "epoch": 1.5944247173584751, "grad_norm": 2.9643170833587646, "learning_rate": 1.8461438131961777e-05, "loss": 0.6585, "step": 9767 }, { "epoch": 1.5945879760009796, "grad_norm": 2.6341049671173096, "learning_rate": 1.846109641531009e-05, "loss": 0.7049, "step": 9768 }, { "epoch": 1.5947512346434838, "grad_norm": 2.93481183052063, "learning_rate": 1.8460754663878036e-05, "loss": 0.7129, "step": 9769 }, { "epoch": 1.5949144932859882, "grad_norm": 2.4645426273345947, "learning_rate": 1.846041287766701e-05, "loss": 0.5097, "step": 9770 }, { "epoch": 1.5950777519284927, "grad_norm": 3.0663435459136963, "learning_rate": 1.8460071056678424e-05, "loss": 0.6841, "step": 9771 }, { "epoch": 1.595241010570997, "grad_norm": 3.0205888748168945, "learning_rate": 1.845972920091368e-05, "loss": 0.7509, "step": 9772 }, { "epoch": 1.5954042692135015, "grad_norm": 2.602243185043335, "learning_rate": 1.8459387310374182e-05, "loss": 0.6446, "step": 9773 }, { "epoch": 1.595567527856006, "grad_norm": 2.825289726257324, "learning_rate": 1.8459045385061337e-05, "loss": 0.6914, "step": 9774 }, { "epoch": 1.5957307864985104, "grad_norm": 2.732008695602417, "learning_rate": 1.8458703424976553e-05, "loss": 0.7152, "step": 9775 }, { "epoch": 1.5958940451410146, "grad_norm": 3.385089159011841, "learning_rate": 1.845836143012123e-05, "loss": 1.32, "step": 9776 }, { "epoch": 1.596057303783519, "grad_norm": 2.744431734085083, "learning_rate": 1.8458019400496778e-05, "loss": 0.6944, "step": 9777 }, { "epoch": 1.5962205624260233, "grad_norm": 2.6293091773986816, "learning_rate": 1.8457677336104606e-05, "loss": 0.6047, "step": 9778 }, { "epoch": 1.5963838210685277, "grad_norm": 2.8335912227630615, "learning_rate": 1.845733523694611e-05, "loss": 0.6793, "step": 9779 }, { "epoch": 1.5965470797110322, "grad_norm": 2.7877142429351807, "learning_rate": 1.8456993103022704e-05, "loss": 0.7749, "step": 9780 }, { "epoch": 1.5967103383535366, "grad_norm": 3.001767635345459, "learning_rate": 1.8456650934335795e-05, "loss": 0.7752, "step": 9781 }, { "epoch": 1.596873596996041, "grad_norm": 2.6289565563201904, "learning_rate": 1.8456308730886783e-05, "loss": 0.669, "step": 9782 }, { "epoch": 1.5970368556385455, "grad_norm": 2.832653522491455, "learning_rate": 1.845596649267708e-05, "loss": 0.6291, "step": 9783 }, { "epoch": 1.59720011428105, "grad_norm": 2.5273241996765137, "learning_rate": 1.8455624219708097e-05, "loss": 0.629, "step": 9784 }, { "epoch": 1.5973633729235541, "grad_norm": 3.148083448410034, "learning_rate": 1.8455281911981232e-05, "loss": 0.9528, "step": 9785 }, { "epoch": 1.5975266315660586, "grad_norm": 2.2801871299743652, "learning_rate": 1.8454939569497898e-05, "loss": 0.5734, "step": 9786 }, { "epoch": 1.5976898902085628, "grad_norm": 2.976893424987793, "learning_rate": 1.8454597192259494e-05, "loss": 0.86, "step": 9787 }, { "epoch": 1.5978531488510672, "grad_norm": 2.6187119483947754, "learning_rate": 1.8454254780267437e-05, "loss": 0.707, "step": 9788 }, { "epoch": 1.5980164074935717, "grad_norm": 2.363149881362915, "learning_rate": 1.845391233352313e-05, "loss": 0.6024, "step": 9789 }, { "epoch": 1.598179666136076, "grad_norm": 2.631471872329712, "learning_rate": 1.8453569852027983e-05, "loss": 0.7551, "step": 9790 }, { "epoch": 1.5983429247785805, "grad_norm": 2.566864013671875, "learning_rate": 1.84532273357834e-05, "loss": 0.745, "step": 9791 }, { "epoch": 1.598506183421085, "grad_norm": 2.3022685050964355, "learning_rate": 1.845288478479079e-05, "loss": 0.519, "step": 9792 }, { "epoch": 1.5986694420635894, "grad_norm": 2.358394145965576, "learning_rate": 1.845254219905157e-05, "loss": 0.6391, "step": 9793 }, { "epoch": 1.5988327007060936, "grad_norm": 2.4024040699005127, "learning_rate": 1.8452199578567135e-05, "loss": 0.665, "step": 9794 }, { "epoch": 1.598995959348598, "grad_norm": 2.6218395233154297, "learning_rate": 1.84518569233389e-05, "loss": 0.7216, "step": 9795 }, { "epoch": 1.5991592179911023, "grad_norm": 2.129014492034912, "learning_rate": 1.845151423336827e-05, "loss": 0.6243, "step": 9796 }, { "epoch": 1.5993224766336067, "grad_norm": 2.9803249835968018, "learning_rate": 1.845117150865666e-05, "loss": 0.7043, "step": 9797 }, { "epoch": 1.5994857352761112, "grad_norm": 2.720381259918213, "learning_rate": 1.8450828749205473e-05, "loss": 0.6831, "step": 9798 }, { "epoch": 1.5996489939186156, "grad_norm": 2.647115707397461, "learning_rate": 1.845048595501612e-05, "loss": 0.7357, "step": 9799 }, { "epoch": 1.59981225256112, "grad_norm": 2.642951488494873, "learning_rate": 1.8450143126090015e-05, "loss": 0.6807, "step": 9800 }, { "epoch": 1.5999755112036245, "grad_norm": 2.1996757984161377, "learning_rate": 1.8449800262428558e-05, "loss": 0.546, "step": 9801 }, { "epoch": 1.6001387698461287, "grad_norm": 2.6040031909942627, "learning_rate": 1.844945736403316e-05, "loss": 0.7123, "step": 9802 }, { "epoch": 1.6003020284886331, "grad_norm": 2.9592738151550293, "learning_rate": 1.8449114430905238e-05, "loss": 0.8953, "step": 9803 }, { "epoch": 1.6004652871311376, "grad_norm": 2.930233955383301, "learning_rate": 1.8448771463046196e-05, "loss": 0.7156, "step": 9804 }, { "epoch": 1.6006285457736418, "grad_norm": 2.676173448562622, "learning_rate": 1.844842846045744e-05, "loss": 0.6196, "step": 9805 }, { "epoch": 1.6007918044161462, "grad_norm": 2.337907075881958, "learning_rate": 1.844808542314039e-05, "loss": 0.6316, "step": 9806 }, { "epoch": 1.6009550630586507, "grad_norm": 2.5244312286376953, "learning_rate": 1.844774235109645e-05, "loss": 0.5997, "step": 9807 }, { "epoch": 1.601118321701155, "grad_norm": 2.9479780197143555, "learning_rate": 1.8447399244327033e-05, "loss": 0.6651, "step": 9808 }, { "epoch": 1.6012815803436595, "grad_norm": 3.054053783416748, "learning_rate": 1.844705610283354e-05, "loss": 0.8126, "step": 9809 }, { "epoch": 1.601444838986164, "grad_norm": 3.0586280822753906, "learning_rate": 1.8446712926617395e-05, "loss": 0.719, "step": 9810 }, { "epoch": 1.6016080976286682, "grad_norm": 2.47894287109375, "learning_rate": 1.8446369715680003e-05, "loss": 0.558, "step": 9811 }, { "epoch": 1.6017713562711726, "grad_norm": 2.3388376235961914, "learning_rate": 1.8446026470022767e-05, "loss": 0.5781, "step": 9812 }, { "epoch": 1.6019346149136768, "grad_norm": 2.3896124362945557, "learning_rate": 1.8445683189647115e-05, "loss": 0.5523, "step": 9813 }, { "epoch": 1.6020978735561813, "grad_norm": 2.5041277408599854, "learning_rate": 1.8445339874554445e-05, "loss": 0.6187, "step": 9814 }, { "epoch": 1.6022611321986857, "grad_norm": 2.5197134017944336, "learning_rate": 1.844499652474617e-05, "loss": 0.7039, "step": 9815 }, { "epoch": 1.6024243908411901, "grad_norm": 2.2553446292877197, "learning_rate": 1.8444653140223704e-05, "loss": 0.6291, "step": 9816 }, { "epoch": 1.6025876494836946, "grad_norm": 3.0493247509002686, "learning_rate": 1.8444309720988455e-05, "loss": 0.7737, "step": 9817 }, { "epoch": 1.602750908126199, "grad_norm": 3.2906291484832764, "learning_rate": 1.8443966267041843e-05, "loss": 0.8486, "step": 9818 }, { "epoch": 1.6029141667687035, "grad_norm": 2.794926643371582, "learning_rate": 1.8443622778385268e-05, "loss": 0.6876, "step": 9819 }, { "epoch": 1.6030774254112077, "grad_norm": 2.5535573959350586, "learning_rate": 1.8443279255020153e-05, "loss": 0.6169, "step": 9820 }, { "epoch": 1.6032406840537121, "grad_norm": 2.793100595474243, "learning_rate": 1.8442935696947903e-05, "loss": 0.7054, "step": 9821 }, { "epoch": 1.6034039426962163, "grad_norm": 2.5471227169036865, "learning_rate": 1.844259210416993e-05, "loss": 0.6269, "step": 9822 }, { "epoch": 1.6035672013387208, "grad_norm": 2.881882667541504, "learning_rate": 1.844224847668765e-05, "loss": 0.7059, "step": 9823 }, { "epoch": 1.6037304599812252, "grad_norm": 2.9463648796081543, "learning_rate": 1.844190481450248e-05, "loss": 0.7786, "step": 9824 }, { "epoch": 1.6038937186237296, "grad_norm": 2.777559757232666, "learning_rate": 1.844156111761582e-05, "loss": 0.7527, "step": 9825 }, { "epoch": 1.604056977266234, "grad_norm": 2.254713773727417, "learning_rate": 1.844121738602909e-05, "loss": 0.535, "step": 9826 }, { "epoch": 1.6042202359087385, "grad_norm": 2.6306986808776855, "learning_rate": 1.8440873619743705e-05, "loss": 0.6276, "step": 9827 }, { "epoch": 1.604383494551243, "grad_norm": 3.3280210494995117, "learning_rate": 1.8440529818761078e-05, "loss": 0.7735, "step": 9828 }, { "epoch": 1.6045467531937472, "grad_norm": 2.64890718460083, "learning_rate": 1.8440185983082616e-05, "loss": 0.7401, "step": 9829 }, { "epoch": 1.6047100118362516, "grad_norm": 2.3649282455444336, "learning_rate": 1.8439842112709742e-05, "loss": 0.5929, "step": 9830 }, { "epoch": 1.6048732704787558, "grad_norm": 2.5815939903259277, "learning_rate": 1.843949820764386e-05, "loss": 0.6373, "step": 9831 }, { "epoch": 1.6050365291212603, "grad_norm": 2.6008753776550293, "learning_rate": 1.8439154267886383e-05, "loss": 0.7438, "step": 9832 }, { "epoch": 1.6051997877637647, "grad_norm": 2.2726175785064697, "learning_rate": 1.8438810293438733e-05, "loss": 0.5511, "step": 9833 }, { "epoch": 1.6053630464062691, "grad_norm": 2.9012928009033203, "learning_rate": 1.843846628430232e-05, "loss": 0.6526, "step": 9834 }, { "epoch": 1.6055263050487736, "grad_norm": 2.6131129264831543, "learning_rate": 1.843812224047856e-05, "loss": 0.6318, "step": 9835 }, { "epoch": 1.605689563691278, "grad_norm": 2.783357620239258, "learning_rate": 1.8437778161968866e-05, "loss": 0.5968, "step": 9836 }, { "epoch": 1.6058528223337825, "grad_norm": 2.3541746139526367, "learning_rate": 1.843743404877465e-05, "loss": 0.5955, "step": 9837 }, { "epoch": 1.6060160809762867, "grad_norm": 2.9205708503723145, "learning_rate": 1.843708990089733e-05, "loss": 0.7821, "step": 9838 }, { "epoch": 1.6061793396187911, "grad_norm": 2.859710931777954, "learning_rate": 1.843674571833832e-05, "loss": 0.714, "step": 9839 }, { "epoch": 1.6063425982612953, "grad_norm": 2.9925427436828613, "learning_rate": 1.8436401501099035e-05, "loss": 0.7538, "step": 9840 }, { "epoch": 1.6065058569037998, "grad_norm": 3.2286741733551025, "learning_rate": 1.8436057249180885e-05, "loss": 0.7311, "step": 9841 }, { "epoch": 1.6066691155463042, "grad_norm": 2.8984477519989014, "learning_rate": 1.8435712962585294e-05, "loss": 0.827, "step": 9842 }, { "epoch": 1.6068323741888086, "grad_norm": 2.445122241973877, "learning_rate": 1.8435368641313668e-05, "loss": 0.4709, "step": 9843 }, { "epoch": 1.606995632831313, "grad_norm": 2.7069284915924072, "learning_rate": 1.8435024285367427e-05, "loss": 0.6795, "step": 9844 }, { "epoch": 1.6071588914738175, "grad_norm": 3.420598030090332, "learning_rate": 1.8434679894747988e-05, "loss": 0.9347, "step": 9845 }, { "epoch": 1.6073221501163217, "grad_norm": 2.820997953414917, "learning_rate": 1.8434335469456762e-05, "loss": 0.6592, "step": 9846 }, { "epoch": 1.6074854087588262, "grad_norm": 2.4675843715667725, "learning_rate": 1.843399100949517e-05, "loss": 0.6857, "step": 9847 }, { "epoch": 1.6076486674013306, "grad_norm": 2.851980209350586, "learning_rate": 1.8433646514864625e-05, "loss": 0.7691, "step": 9848 }, { "epoch": 1.6078119260438348, "grad_norm": 2.7024405002593994, "learning_rate": 1.8433301985566542e-05, "loss": 0.6699, "step": 9849 }, { "epoch": 1.6079751846863393, "grad_norm": 2.9880568981170654, "learning_rate": 1.8432957421602342e-05, "loss": 0.7163, "step": 9850 }, { "epoch": 1.6081384433288437, "grad_norm": 2.8481197357177734, "learning_rate": 1.8432612822973437e-05, "loss": 0.6868, "step": 9851 }, { "epoch": 1.6083017019713481, "grad_norm": 2.5535504817962646, "learning_rate": 1.8432268189681247e-05, "loss": 0.6684, "step": 9852 }, { "epoch": 1.6084649606138526, "grad_norm": 2.943460464477539, "learning_rate": 1.843192352172718e-05, "loss": 0.8426, "step": 9853 }, { "epoch": 1.608628219256357, "grad_norm": 2.602778911590576, "learning_rate": 1.8431578819112666e-05, "loss": 0.7656, "step": 9854 }, { "epoch": 1.6087914778988612, "grad_norm": 2.7942299842834473, "learning_rate": 1.843123408183911e-05, "loss": 0.7323, "step": 9855 }, { "epoch": 1.6089547365413657, "grad_norm": 2.54101824760437, "learning_rate": 1.843088930990794e-05, "loss": 0.7204, "step": 9856 }, { "epoch": 1.6091179951838699, "grad_norm": 2.5860331058502197, "learning_rate": 1.8430544503320563e-05, "loss": 0.6168, "step": 9857 }, { "epoch": 1.6092812538263743, "grad_norm": 2.376504421234131, "learning_rate": 1.84301996620784e-05, "loss": 0.6229, "step": 9858 }, { "epoch": 1.6094445124688788, "grad_norm": 2.6657052040100098, "learning_rate": 1.8429854786182875e-05, "loss": 0.7335, "step": 9859 }, { "epoch": 1.6096077711113832, "grad_norm": 2.7318501472473145, "learning_rate": 1.8429509875635395e-05, "loss": 0.7136, "step": 9860 }, { "epoch": 1.6097710297538876, "grad_norm": 2.724975109100342, "learning_rate": 1.8429164930437386e-05, "loss": 0.5655, "step": 9861 }, { "epoch": 1.609934288396392, "grad_norm": 3.0944032669067383, "learning_rate": 1.842881995059026e-05, "loss": 0.8694, "step": 9862 }, { "epoch": 1.6100975470388965, "grad_norm": 2.339808702468872, "learning_rate": 1.842847493609544e-05, "loss": 0.5867, "step": 9863 }, { "epoch": 1.6102608056814007, "grad_norm": 3.165966749191284, "learning_rate": 1.842812988695434e-05, "loss": 0.9407, "step": 9864 }, { "epoch": 1.6104240643239052, "grad_norm": 2.5013155937194824, "learning_rate": 1.842778480316838e-05, "loss": 0.6842, "step": 9865 }, { "epoch": 1.6105873229664094, "grad_norm": 2.7348196506500244, "learning_rate": 1.842743968473898e-05, "loss": 0.8707, "step": 9866 }, { "epoch": 1.6107505816089138, "grad_norm": 2.723392963409424, "learning_rate": 1.842709453166756e-05, "loss": 0.7244, "step": 9867 }, { "epoch": 1.6109138402514183, "grad_norm": 2.265810966491699, "learning_rate": 1.8426749343955535e-05, "loss": 0.5308, "step": 9868 }, { "epoch": 1.6110770988939227, "grad_norm": 2.4542124271392822, "learning_rate": 1.8426404121604324e-05, "loss": 0.6105, "step": 9869 }, { "epoch": 1.6112403575364271, "grad_norm": 2.412761926651001, "learning_rate": 1.8426058864615347e-05, "loss": 0.6366, "step": 9870 }, { "epoch": 1.6114036161789316, "grad_norm": 2.535468339920044, "learning_rate": 1.8425713572990024e-05, "loss": 0.6996, "step": 9871 }, { "epoch": 1.611566874821436, "grad_norm": 3.052272081375122, "learning_rate": 1.8425368246729778e-05, "loss": 0.7421, "step": 9872 }, { "epoch": 1.6117301334639402, "grad_norm": 3.0529184341430664, "learning_rate": 1.8425022885836022e-05, "loss": 0.8154, "step": 9873 }, { "epoch": 1.6118933921064447, "grad_norm": 3.041011333465576, "learning_rate": 1.8424677490310175e-05, "loss": 0.7206, "step": 9874 }, { "epoch": 1.6120566507489489, "grad_norm": 3.074288845062256, "learning_rate": 1.8424332060153664e-05, "loss": 0.8203, "step": 9875 }, { "epoch": 1.6122199093914533, "grad_norm": 2.8362791538238525, "learning_rate": 1.8423986595367905e-05, "loss": 0.7919, "step": 9876 }, { "epoch": 1.6123831680339578, "grad_norm": 2.470174551010132, "learning_rate": 1.8423641095954315e-05, "loss": 0.6741, "step": 9877 }, { "epoch": 1.6125464266764622, "grad_norm": 2.823734760284424, "learning_rate": 1.8423295561914315e-05, "loss": 0.7758, "step": 9878 }, { "epoch": 1.6127096853189666, "grad_norm": 2.9765803813934326, "learning_rate": 1.8422949993249337e-05, "loss": 0.837, "step": 9879 }, { "epoch": 1.612872943961471, "grad_norm": 2.789991617202759, "learning_rate": 1.8422604389960782e-05, "loss": 0.7727, "step": 9880 }, { "epoch": 1.6130362026039755, "grad_norm": 2.812394857406616, "learning_rate": 1.8422258752050088e-05, "loss": 0.7077, "step": 9881 }, { "epoch": 1.6131994612464797, "grad_norm": 2.5545804500579834, "learning_rate": 1.8421913079518663e-05, "loss": 0.7095, "step": 9882 }, { "epoch": 1.6133627198889842, "grad_norm": 2.749659776687622, "learning_rate": 1.8421567372367935e-05, "loss": 0.6397, "step": 9883 }, { "epoch": 1.6135259785314884, "grad_norm": 2.9897420406341553, "learning_rate": 1.8421221630599326e-05, "loss": 0.7287, "step": 9884 }, { "epoch": 1.6136892371739928, "grad_norm": 2.5975852012634277, "learning_rate": 1.8420875854214252e-05, "loss": 0.6445, "step": 9885 }, { "epoch": 1.6138524958164973, "grad_norm": 2.9479541778564453, "learning_rate": 1.8420530043214137e-05, "loss": 0.7398, "step": 9886 }, { "epoch": 1.6140157544590017, "grad_norm": 2.478689670562744, "learning_rate": 1.8420184197600403e-05, "loss": 0.5792, "step": 9887 }, { "epoch": 1.6141790131015061, "grad_norm": 2.6234123706817627, "learning_rate": 1.841983831737447e-05, "loss": 0.712, "step": 9888 }, { "epoch": 1.6143422717440106, "grad_norm": 3.4297640323638916, "learning_rate": 1.841949240253776e-05, "loss": 0.9494, "step": 9889 }, { "epoch": 1.6145055303865148, "grad_norm": 3.3359923362731934, "learning_rate": 1.8419146453091703e-05, "loss": 0.8055, "step": 9890 }, { "epoch": 1.6146687890290192, "grad_norm": 2.435471296310425, "learning_rate": 1.841880046903771e-05, "loss": 0.7521, "step": 9891 }, { "epoch": 1.6148320476715237, "grad_norm": 2.595961332321167, "learning_rate": 1.8418454450377204e-05, "loss": 0.6937, "step": 9892 }, { "epoch": 1.6149953063140279, "grad_norm": 2.908890724182129, "learning_rate": 1.8418108397111615e-05, "loss": 0.714, "step": 9893 }, { "epoch": 1.6151585649565323, "grad_norm": 2.289076328277588, "learning_rate": 1.841776230924236e-05, "loss": 0.4886, "step": 9894 }, { "epoch": 1.6153218235990368, "grad_norm": 2.1705448627471924, "learning_rate": 1.8417416186770858e-05, "loss": 0.5777, "step": 9895 }, { "epoch": 1.6154850822415412, "grad_norm": 2.251519203186035, "learning_rate": 1.8417070029698542e-05, "loss": 0.6152, "step": 9896 }, { "epoch": 1.6156483408840456, "grad_norm": 2.898639440536499, "learning_rate": 1.8416723838026828e-05, "loss": 0.8371, "step": 9897 }, { "epoch": 1.61581159952655, "grad_norm": 2.5165176391601562, "learning_rate": 1.8416377611757138e-05, "loss": 0.6504, "step": 9898 }, { "epoch": 1.6159748581690543, "grad_norm": 2.3043129444122314, "learning_rate": 1.84160313508909e-05, "loss": 0.6378, "step": 9899 }, { "epoch": 1.6161381168115587, "grad_norm": 2.644026279449463, "learning_rate": 1.8415685055429534e-05, "loss": 0.7485, "step": 9900 }, { "epoch": 1.616301375454063, "grad_norm": 2.6029608249664307, "learning_rate": 1.8415338725374466e-05, "loss": 0.7745, "step": 9901 }, { "epoch": 1.6164646340965674, "grad_norm": 2.694364547729492, "learning_rate": 1.8414992360727114e-05, "loss": 0.7615, "step": 9902 }, { "epoch": 1.6166278927390718, "grad_norm": 2.368433952331543, "learning_rate": 1.841464596148891e-05, "loss": 0.6004, "step": 9903 }, { "epoch": 1.6167911513815763, "grad_norm": 3.0695559978485107, "learning_rate": 1.8414299527661273e-05, "loss": 0.7722, "step": 9904 }, { "epoch": 1.6169544100240807, "grad_norm": 2.997990369796753, "learning_rate": 1.8413953059245626e-05, "loss": 0.8255, "step": 9905 }, { "epoch": 1.6171176686665851, "grad_norm": 3.1900105476379395, "learning_rate": 1.8413606556243398e-05, "loss": 0.8177, "step": 9906 }, { "epoch": 1.6172809273090896, "grad_norm": 2.462012529373169, "learning_rate": 1.841326001865601e-05, "loss": 0.5554, "step": 9907 }, { "epoch": 1.6174441859515938, "grad_norm": 3.2652852535247803, "learning_rate": 1.841291344648488e-05, "loss": 0.8221, "step": 9908 }, { "epoch": 1.6176074445940982, "grad_norm": 2.5453131198883057, "learning_rate": 1.8412566839731446e-05, "loss": 0.6238, "step": 9909 }, { "epoch": 1.6177707032366024, "grad_norm": 3.062697649002075, "learning_rate": 1.8412220198397127e-05, "loss": 0.6699, "step": 9910 }, { "epoch": 1.6179339618791069, "grad_norm": 2.5614118576049805, "learning_rate": 1.8411873522483344e-05, "loss": 0.6108, "step": 9911 }, { "epoch": 1.6180972205216113, "grad_norm": 3.3055951595306396, "learning_rate": 1.8411526811991526e-05, "loss": 0.879, "step": 9912 }, { "epoch": 1.6182604791641158, "grad_norm": 2.477754592895508, "learning_rate": 1.84111800669231e-05, "loss": 0.569, "step": 9913 }, { "epoch": 1.6184237378066202, "grad_norm": 2.5199198722839355, "learning_rate": 1.8410833287279485e-05, "loss": 0.6654, "step": 9914 }, { "epoch": 1.6185869964491246, "grad_norm": 2.877474069595337, "learning_rate": 1.8410486473062112e-05, "loss": 0.7049, "step": 9915 }, { "epoch": 1.618750255091629, "grad_norm": 2.8892529010772705, "learning_rate": 1.8410139624272405e-05, "loss": 0.6628, "step": 9916 }, { "epoch": 1.6189135137341333, "grad_norm": 3.0500855445861816, "learning_rate": 1.840979274091179e-05, "loss": 0.5844, "step": 9917 }, { "epoch": 1.6190767723766377, "grad_norm": 2.671443223953247, "learning_rate": 1.8409445822981694e-05, "loss": 0.6794, "step": 9918 }, { "epoch": 1.619240031019142, "grad_norm": 2.880211114883423, "learning_rate": 1.840909887048354e-05, "loss": 0.6955, "step": 9919 }, { "epoch": 1.6194032896616464, "grad_norm": 2.389251708984375, "learning_rate": 1.8408751883418753e-05, "loss": 0.5283, "step": 9920 }, { "epoch": 1.6195665483041508, "grad_norm": 2.797762155532837, "learning_rate": 1.8408404861788765e-05, "loss": 0.6663, "step": 9921 }, { "epoch": 1.6197298069466552, "grad_norm": 3.2945711612701416, "learning_rate": 1.8408057805594998e-05, "loss": 0.8824, "step": 9922 }, { "epoch": 1.6198930655891597, "grad_norm": 2.5252881050109863, "learning_rate": 1.840771071483888e-05, "loss": 0.6139, "step": 9923 }, { "epoch": 1.6200563242316641, "grad_norm": 2.418144941329956, "learning_rate": 1.8407363589521844e-05, "loss": 0.6274, "step": 9924 }, { "epoch": 1.6202195828741686, "grad_norm": 2.4366207122802734, "learning_rate": 1.8407016429645305e-05, "loss": 0.712, "step": 9925 }, { "epoch": 1.6203828415166728, "grad_norm": 2.7769734859466553, "learning_rate": 1.8406669235210695e-05, "loss": 0.7437, "step": 9926 }, { "epoch": 1.6205461001591772, "grad_norm": 2.446730852127075, "learning_rate": 1.8406322006219445e-05, "loss": 0.6423, "step": 9927 }, { "epoch": 1.6207093588016814, "grad_norm": 2.2879271507263184, "learning_rate": 1.8405974742672978e-05, "loss": 0.5833, "step": 9928 }, { "epoch": 1.6208726174441859, "grad_norm": 2.8605902194976807, "learning_rate": 1.8405627444572723e-05, "loss": 0.7022, "step": 9929 }, { "epoch": 1.6210358760866903, "grad_norm": 2.5872714519500732, "learning_rate": 1.8405280111920107e-05, "loss": 0.6627, "step": 9930 }, { "epoch": 1.6211991347291947, "grad_norm": 2.810159921646118, "learning_rate": 1.840493274471656e-05, "loss": 0.7697, "step": 9931 }, { "epoch": 1.6213623933716992, "grad_norm": 2.4989936351776123, "learning_rate": 1.8404585342963507e-05, "loss": 0.5773, "step": 9932 }, { "epoch": 1.6215256520142036, "grad_norm": 2.948584794998169, "learning_rate": 1.8404237906662376e-05, "loss": 0.8436, "step": 9933 }, { "epoch": 1.6216889106567078, "grad_norm": 2.879362106323242, "learning_rate": 1.84038904358146e-05, "loss": 0.767, "step": 9934 }, { "epoch": 1.6218521692992123, "grad_norm": 2.851813554763794, "learning_rate": 1.8403542930421602e-05, "loss": 0.6912, "step": 9935 }, { "epoch": 1.6220154279417167, "grad_norm": 3.0709993839263916, "learning_rate": 1.8403195390484808e-05, "loss": 0.8751, "step": 9936 }, { "epoch": 1.622178686584221, "grad_norm": 2.233062267303467, "learning_rate": 1.8402847816005654e-05, "loss": 0.6741, "step": 9937 }, { "epoch": 1.6223419452267254, "grad_norm": 2.896531105041504, "learning_rate": 1.8402500206985567e-05, "loss": 0.7164, "step": 9938 }, { "epoch": 1.6225052038692298, "grad_norm": 2.3510236740112305, "learning_rate": 1.840215256342597e-05, "loss": 0.5374, "step": 9939 }, { "epoch": 1.6226684625117342, "grad_norm": 2.4754796028137207, "learning_rate": 1.84018048853283e-05, "loss": 0.6039, "step": 9940 }, { "epoch": 1.6228317211542387, "grad_norm": 2.715977907180786, "learning_rate": 1.8401457172693984e-05, "loss": 0.6183, "step": 9941 }, { "epoch": 1.6229949797967431, "grad_norm": 3.089266777038574, "learning_rate": 1.8401109425524447e-05, "loss": 0.7521, "step": 9942 }, { "epoch": 1.6231582384392473, "grad_norm": 2.535752296447754, "learning_rate": 1.8400761643821122e-05, "loss": 0.6453, "step": 9943 }, { "epoch": 1.6233214970817518, "grad_norm": 2.1762173175811768, "learning_rate": 1.840041382758544e-05, "loss": 0.5921, "step": 9944 }, { "epoch": 1.623484755724256, "grad_norm": 2.795144557952881, "learning_rate": 1.8400065976818826e-05, "loss": 0.6667, "step": 9945 }, { "epoch": 1.6236480143667604, "grad_norm": 3.2105660438537598, "learning_rate": 1.8399718091522715e-05, "loss": 0.7689, "step": 9946 }, { "epoch": 1.6238112730092649, "grad_norm": 2.8045096397399902, "learning_rate": 1.8399370171698532e-05, "loss": 0.5692, "step": 9947 }, { "epoch": 1.6239745316517693, "grad_norm": 2.3185176849365234, "learning_rate": 1.839902221734771e-05, "loss": 0.5839, "step": 9948 }, { "epoch": 1.6241377902942737, "grad_norm": 2.7122583389282227, "learning_rate": 1.839867422847168e-05, "loss": 0.7323, "step": 9949 }, { "epoch": 1.6243010489367782, "grad_norm": 2.6131677627563477, "learning_rate": 1.8398326205071875e-05, "loss": 0.6421, "step": 9950 }, { "epoch": 1.6244643075792826, "grad_norm": 3.0188958644866943, "learning_rate": 1.8397978147149717e-05, "loss": 0.9222, "step": 9951 }, { "epoch": 1.6246275662217868, "grad_norm": 3.2642862796783447, "learning_rate": 1.8397630054706643e-05, "loss": 1.4152, "step": 9952 }, { "epoch": 1.6247908248642913, "grad_norm": 2.610060691833496, "learning_rate": 1.8397281927744083e-05, "loss": 0.8065, "step": 9953 }, { "epoch": 1.6249540835067955, "grad_norm": 2.8523144721984863, "learning_rate": 1.839693376626347e-05, "loss": 1.0193, "step": 9954 }, { "epoch": 1.6251173421493, "grad_norm": 2.8802130222320557, "learning_rate": 1.839658557026623e-05, "loss": 0.6642, "step": 9955 }, { "epoch": 1.6252806007918044, "grad_norm": 2.6292624473571777, "learning_rate": 1.83962373397538e-05, "loss": 0.6622, "step": 9956 }, { "epoch": 1.6254438594343088, "grad_norm": 2.076988697052002, "learning_rate": 1.8395889074727606e-05, "loss": 0.5003, "step": 9957 }, { "epoch": 1.6256071180768132, "grad_norm": 2.605739116668701, "learning_rate": 1.839554077518908e-05, "loss": 0.6602, "step": 9958 }, { "epoch": 1.6257703767193177, "grad_norm": 2.9736547470092773, "learning_rate": 1.8395192441139663e-05, "loss": 0.8397, "step": 9959 }, { "epoch": 1.6259336353618221, "grad_norm": 3.348750591278076, "learning_rate": 1.8394844072580772e-05, "loss": 0.9617, "step": 9960 }, { "epoch": 1.6260968940043263, "grad_norm": 2.3417725563049316, "learning_rate": 1.8394495669513853e-05, "loss": 0.613, "step": 9961 }, { "epoch": 1.6262601526468308, "grad_norm": 2.5466530323028564, "learning_rate": 1.839414723194033e-05, "loss": 0.6553, "step": 9962 }, { "epoch": 1.626423411289335, "grad_norm": 2.6071786880493164, "learning_rate": 1.8393798759861637e-05, "loss": 0.7126, "step": 9963 }, { "epoch": 1.6265866699318394, "grad_norm": 2.5587856769561768, "learning_rate": 1.8393450253279205e-05, "loss": 0.5999, "step": 9964 }, { "epoch": 1.6267499285743439, "grad_norm": 3.027336359024048, "learning_rate": 1.839310171219447e-05, "loss": 0.7374, "step": 9965 }, { "epoch": 1.6269131872168483, "grad_norm": 3.2248220443725586, "learning_rate": 1.8392753136608866e-05, "loss": 0.5183, "step": 9966 }, { "epoch": 1.6270764458593527, "grad_norm": 2.8337535858154297, "learning_rate": 1.8392404526523816e-05, "loss": 0.7626, "step": 9967 }, { "epoch": 1.6272397045018572, "grad_norm": 2.7331888675689697, "learning_rate": 1.8392055881940767e-05, "loss": 0.6715, "step": 9968 }, { "epoch": 1.6274029631443616, "grad_norm": 2.8361268043518066, "learning_rate": 1.8391707202861143e-05, "loss": 0.7599, "step": 9969 }, { "epoch": 1.6275662217868658, "grad_norm": 3.001166582107544, "learning_rate": 1.8391358489286376e-05, "loss": 0.7173, "step": 9970 }, { "epoch": 1.6277294804293703, "grad_norm": 2.7394073009490967, "learning_rate": 1.8391009741217907e-05, "loss": 0.6887, "step": 9971 }, { "epoch": 1.6278927390718745, "grad_norm": 3.1386263370513916, "learning_rate": 1.8390660958657162e-05, "loss": 0.8225, "step": 9972 }, { "epoch": 1.628055997714379, "grad_norm": 2.777771472930908, "learning_rate": 1.839031214160558e-05, "loss": 0.7126, "step": 9973 }, { "epoch": 1.6282192563568834, "grad_norm": 3.195676326751709, "learning_rate": 1.8389963290064594e-05, "loss": 1.3724, "step": 9974 }, { "epoch": 1.6283825149993878, "grad_norm": 2.844430446624756, "learning_rate": 1.8389614404035633e-05, "loss": 0.6626, "step": 9975 }, { "epoch": 1.6285457736418922, "grad_norm": 3.0177555084228516, "learning_rate": 1.838926548352014e-05, "loss": 0.8329, "step": 9976 }, { "epoch": 1.6287090322843967, "grad_norm": 3.1075916290283203, "learning_rate": 1.838891652851954e-05, "loss": 0.8217, "step": 9977 }, { "epoch": 1.628872290926901, "grad_norm": 2.714592933654785, "learning_rate": 1.8388567539035273e-05, "loss": 0.6841, "step": 9978 }, { "epoch": 1.6290355495694053, "grad_norm": 1.9628615379333496, "learning_rate": 1.8388218515068776e-05, "loss": 0.5018, "step": 9979 }, { "epoch": 1.6291988082119098, "grad_norm": 2.637895345687866, "learning_rate": 1.8387869456621475e-05, "loss": 0.647, "step": 9980 }, { "epoch": 1.629362066854414, "grad_norm": 2.50113844871521, "learning_rate": 1.8387520363694813e-05, "loss": 0.6351, "step": 9981 }, { "epoch": 1.6295253254969184, "grad_norm": 2.9133718013763428, "learning_rate": 1.8387171236290218e-05, "loss": 0.7915, "step": 9982 }, { "epoch": 1.6296885841394229, "grad_norm": 2.9589643478393555, "learning_rate": 1.8386822074409132e-05, "loss": 0.8588, "step": 9983 }, { "epoch": 1.6298518427819273, "grad_norm": 3.1760339736938477, "learning_rate": 1.838647287805299e-05, "loss": 0.7013, "step": 9984 }, { "epoch": 1.6300151014244317, "grad_norm": 2.736907482147217, "learning_rate": 1.8386123647223218e-05, "loss": 0.6284, "step": 9985 }, { "epoch": 1.6301783600669362, "grad_norm": 2.4431753158569336, "learning_rate": 1.8385774381921262e-05, "loss": 0.613, "step": 9986 }, { "epoch": 1.6303416187094404, "grad_norm": 2.488502025604248, "learning_rate": 1.8385425082148556e-05, "loss": 0.6546, "step": 9987 }, { "epoch": 1.6305048773519448, "grad_norm": 2.627031087875366, "learning_rate": 1.8385075747906527e-05, "loss": 0.88, "step": 9988 }, { "epoch": 1.630668135994449, "grad_norm": 2.616274118423462, "learning_rate": 1.8384726379196625e-05, "loss": 0.7312, "step": 9989 }, { "epoch": 1.6308313946369535, "grad_norm": 2.7001543045043945, "learning_rate": 1.8384376976020277e-05, "loss": 0.5414, "step": 9990 }, { "epoch": 1.630994653279458, "grad_norm": 2.546851873397827, "learning_rate": 1.8384027538378918e-05, "loss": 0.6734, "step": 9991 }, { "epoch": 1.6311579119219624, "grad_norm": 2.3067476749420166, "learning_rate": 1.838367806627399e-05, "loss": 0.4992, "step": 9992 }, { "epoch": 1.6313211705644668, "grad_norm": 2.8875322341918945, "learning_rate": 1.8383328559706928e-05, "loss": 0.5988, "step": 9993 }, { "epoch": 1.6314844292069712, "grad_norm": 3.1054210662841797, "learning_rate": 1.838297901867917e-05, "loss": 0.7048, "step": 9994 }, { "epoch": 1.6316476878494757, "grad_norm": 2.7816507816314697, "learning_rate": 1.8382629443192143e-05, "loss": 0.7641, "step": 9995 }, { "epoch": 1.6318109464919799, "grad_norm": 2.804454803466797, "learning_rate": 1.8382279833247297e-05, "loss": 0.6937, "step": 9996 }, { "epoch": 1.6319742051344843, "grad_norm": 2.5247368812561035, "learning_rate": 1.8381930188846063e-05, "loss": 0.5981, "step": 9997 }, { "epoch": 1.6321374637769885, "grad_norm": 2.682345151901245, "learning_rate": 1.8381580509989876e-05, "loss": 0.6962, "step": 9998 }, { "epoch": 1.632300722419493, "grad_norm": 3.1064341068267822, "learning_rate": 1.838123079668018e-05, "loss": 0.7271, "step": 9999 }, { "epoch": 1.6324639810619974, "grad_norm": 3.025204658508301, "learning_rate": 1.8380881048918406e-05, "loss": 0.5889, "step": 10000 }, { "epoch": 1.6326272397045019, "grad_norm": 2.750318765640259, "learning_rate": 1.8380531266706e-05, "loss": 0.7624, "step": 10001 }, { "epoch": 1.6327904983470063, "grad_norm": 3.4014196395874023, "learning_rate": 1.838018145004439e-05, "loss": 0.7272, "step": 10002 }, { "epoch": 1.6329537569895107, "grad_norm": 2.468620777130127, "learning_rate": 1.837983159893502e-05, "loss": 0.6292, "step": 10003 }, { "epoch": 1.6331170156320152, "grad_norm": 2.414870023727417, "learning_rate": 1.8379481713379324e-05, "loss": 0.5833, "step": 10004 }, { "epoch": 1.6332802742745194, "grad_norm": 2.315347194671631, "learning_rate": 1.8379131793378743e-05, "loss": 0.5537, "step": 10005 }, { "epoch": 1.6334435329170238, "grad_norm": 2.2007155418395996, "learning_rate": 1.8378781838934715e-05, "loss": 0.4971, "step": 10006 }, { "epoch": 1.633606791559528, "grad_norm": 3.006070137023926, "learning_rate": 1.8378431850048684e-05, "loss": 0.7716, "step": 10007 }, { "epoch": 1.6337700502020325, "grad_norm": 2.534623384475708, "learning_rate": 1.8378081826722077e-05, "loss": 0.6488, "step": 10008 }, { "epoch": 1.633933308844537, "grad_norm": 3.3188893795013428, "learning_rate": 1.837773176895634e-05, "loss": 0.7615, "step": 10009 }, { "epoch": 1.6340965674870414, "grad_norm": 2.9687423706054688, "learning_rate": 1.8377381676752914e-05, "loss": 0.7109, "step": 10010 }, { "epoch": 1.6342598261295458, "grad_norm": 2.3405516147613525, "learning_rate": 1.8377031550113234e-05, "loss": 0.7069, "step": 10011 }, { "epoch": 1.6344230847720502, "grad_norm": 2.781630516052246, "learning_rate": 1.837668138903874e-05, "loss": 0.6683, "step": 10012 }, { "epoch": 1.6345863434145547, "grad_norm": 2.9970946311950684, "learning_rate": 1.837633119353087e-05, "loss": 0.8159, "step": 10013 }, { "epoch": 1.6347496020570589, "grad_norm": 3.3355839252471924, "learning_rate": 1.8375980963591068e-05, "loss": 0.8367, "step": 10014 }, { "epoch": 1.6349128606995633, "grad_norm": 2.3416454792022705, "learning_rate": 1.837563069922077e-05, "loss": 0.4015, "step": 10015 }, { "epoch": 1.6350761193420675, "grad_norm": 2.5177550315856934, "learning_rate": 1.837528040042142e-05, "loss": 0.7501, "step": 10016 }, { "epoch": 1.635239377984572, "grad_norm": 2.4590563774108887, "learning_rate": 1.837493006719445e-05, "loss": 0.6853, "step": 10017 }, { "epoch": 1.6354026366270764, "grad_norm": 2.5370912551879883, "learning_rate": 1.8374579699541307e-05, "loss": 0.6008, "step": 10018 }, { "epoch": 1.6355658952695808, "grad_norm": 2.4773623943328857, "learning_rate": 1.837422929746343e-05, "loss": 0.592, "step": 10019 }, { "epoch": 1.6357291539120853, "grad_norm": 3.0041587352752686, "learning_rate": 1.8373878860962255e-05, "loss": 0.8252, "step": 10020 }, { "epoch": 1.6358924125545897, "grad_norm": 2.949995279312134, "learning_rate": 1.8373528390039227e-05, "loss": 0.8335, "step": 10021 }, { "epoch": 1.636055671197094, "grad_norm": 2.4528236389160156, "learning_rate": 1.837317788469579e-05, "loss": 0.6066, "step": 10022 }, { "epoch": 1.6362189298395984, "grad_norm": 2.662097930908203, "learning_rate": 1.8372827344933373e-05, "loss": 0.6284, "step": 10023 }, { "epoch": 1.6363821884821028, "grad_norm": 2.876938581466675, "learning_rate": 1.8372476770753427e-05, "loss": 0.7552, "step": 10024 }, { "epoch": 1.636545447124607, "grad_norm": 2.6739158630371094, "learning_rate": 1.8372126162157394e-05, "loss": 0.7511, "step": 10025 }, { "epoch": 1.6367087057671115, "grad_norm": 2.8659162521362305, "learning_rate": 1.837177551914671e-05, "loss": 0.6738, "step": 10026 }, { "epoch": 1.636871964409616, "grad_norm": 2.543823719024658, "learning_rate": 1.8371424841722817e-05, "loss": 0.6889, "step": 10027 }, { "epoch": 1.6370352230521203, "grad_norm": 2.362072229385376, "learning_rate": 1.837107412988716e-05, "loss": 0.6143, "step": 10028 }, { "epoch": 1.6371984816946248, "grad_norm": 2.7272138595581055, "learning_rate": 1.8370723383641174e-05, "loss": 0.7087, "step": 10029 }, { "epoch": 1.6373617403371292, "grad_norm": 3.0139060020446777, "learning_rate": 1.8370372602986305e-05, "loss": 0.7885, "step": 10030 }, { "epoch": 1.6375249989796334, "grad_norm": 2.5209131240844727, "learning_rate": 1.8370021787923996e-05, "loss": 0.6229, "step": 10031 }, { "epoch": 1.6376882576221379, "grad_norm": 2.4004628658294678, "learning_rate": 1.836967093845569e-05, "loss": 0.6137, "step": 10032 }, { "epoch": 1.637851516264642, "grad_norm": 3.1695539951324463, "learning_rate": 1.8369320054582825e-05, "loss": 0.8519, "step": 10033 }, { "epoch": 1.6380147749071465, "grad_norm": 2.7293474674224854, "learning_rate": 1.8368969136306844e-05, "loss": 0.6606, "step": 10034 }, { "epoch": 1.638178033549651, "grad_norm": 2.702458143234253, "learning_rate": 1.8368618183629196e-05, "loss": 0.6962, "step": 10035 }, { "epoch": 1.6383412921921554, "grad_norm": 2.6020872592926025, "learning_rate": 1.8368267196551313e-05, "loss": 0.6347, "step": 10036 }, { "epoch": 1.6385045508346598, "grad_norm": 2.7158548831939697, "learning_rate": 1.8367916175074647e-05, "loss": 0.706, "step": 10037 }, { "epoch": 1.6386678094771643, "grad_norm": 2.574604034423828, "learning_rate": 1.8367565119200637e-05, "loss": 0.6214, "step": 10038 }, { "epoch": 1.6388310681196687, "grad_norm": 2.8297278881073, "learning_rate": 1.8367214028930723e-05, "loss": 0.6472, "step": 10039 }, { "epoch": 1.638994326762173, "grad_norm": 2.705944299697876, "learning_rate": 1.8366862904266355e-05, "loss": 0.649, "step": 10040 }, { "epoch": 1.6391575854046774, "grad_norm": 2.594714879989624, "learning_rate": 1.8366511745208973e-05, "loss": 0.7222, "step": 10041 }, { "epoch": 1.6393208440471816, "grad_norm": 2.931807279586792, "learning_rate": 1.836616055176002e-05, "loss": 0.8872, "step": 10042 }, { "epoch": 1.639484102689686, "grad_norm": 2.308946371078491, "learning_rate": 1.8365809323920934e-05, "loss": 0.5331, "step": 10043 }, { "epoch": 1.6396473613321905, "grad_norm": 2.967707395553589, "learning_rate": 1.8365458061693174e-05, "loss": 0.7818, "step": 10044 }, { "epoch": 1.639810619974695, "grad_norm": 2.4083566665649414, "learning_rate": 1.836510676507817e-05, "loss": 0.5507, "step": 10045 }, { "epoch": 1.6399738786171993, "grad_norm": 2.9095077514648438, "learning_rate": 1.836475543407737e-05, "loss": 0.6545, "step": 10046 }, { "epoch": 1.6401371372597038, "grad_norm": 3.2851130962371826, "learning_rate": 1.836440406869222e-05, "loss": 0.7853, "step": 10047 }, { "epoch": 1.6403003959022082, "grad_norm": 2.70876145362854, "learning_rate": 1.8364052668924163e-05, "loss": 0.695, "step": 10048 }, { "epoch": 1.6404636545447124, "grad_norm": 2.5738723278045654, "learning_rate": 1.8363701234774644e-05, "loss": 0.6916, "step": 10049 }, { "epoch": 1.6406269131872169, "grad_norm": 3.0414345264434814, "learning_rate": 1.8363349766245108e-05, "loss": 0.8027, "step": 10050 }, { "epoch": 1.640790171829721, "grad_norm": 2.7152140140533447, "learning_rate": 1.8362998263337e-05, "loss": 0.7089, "step": 10051 }, { "epoch": 1.6409534304722255, "grad_norm": 2.6891250610351562, "learning_rate": 1.8362646726051762e-05, "loss": 0.6606, "step": 10052 }, { "epoch": 1.64111668911473, "grad_norm": 2.8018581867218018, "learning_rate": 1.8362295154390838e-05, "loss": 0.6311, "step": 10053 }, { "epoch": 1.6412799477572344, "grad_norm": 2.567225694656372, "learning_rate": 1.836194354835568e-05, "loss": 0.5539, "step": 10054 }, { "epoch": 1.6414432063997388, "grad_norm": 2.9783973693847656, "learning_rate": 1.836159190794773e-05, "loss": 0.6468, "step": 10055 }, { "epoch": 1.6416064650422433, "grad_norm": 2.813953161239624, "learning_rate": 1.836124023316843e-05, "loss": 0.8048, "step": 10056 }, { "epoch": 1.6417697236847477, "grad_norm": 2.6557204723358154, "learning_rate": 1.8360888524019233e-05, "loss": 0.6771, "step": 10057 }, { "epoch": 1.641932982327252, "grad_norm": 3.176697015762329, "learning_rate": 1.8360536780501575e-05, "loss": 0.6987, "step": 10058 }, { "epoch": 1.6420962409697564, "grad_norm": 3.2345335483551025, "learning_rate": 1.836018500261691e-05, "loss": 0.6927, "step": 10059 }, { "epoch": 1.6422594996122606, "grad_norm": 2.731343984603882, "learning_rate": 1.8359833190366684e-05, "loss": 0.6923, "step": 10060 }, { "epoch": 1.642422758254765, "grad_norm": 2.6576287746429443, "learning_rate": 1.835948134375234e-05, "loss": 0.6598, "step": 10061 }, { "epoch": 1.6425860168972695, "grad_norm": 3.0418851375579834, "learning_rate": 1.835912946277532e-05, "loss": 0.7775, "step": 10062 }, { "epoch": 1.642749275539774, "grad_norm": 2.4210188388824463, "learning_rate": 1.8358777547437077e-05, "loss": 0.592, "step": 10063 }, { "epoch": 1.6429125341822783, "grad_norm": 2.9553983211517334, "learning_rate": 1.8358425597739057e-05, "loss": 0.8173, "step": 10064 }, { "epoch": 1.6430757928247828, "grad_norm": 2.9411065578460693, "learning_rate": 1.8358073613682705e-05, "loss": 0.6969, "step": 10065 }, { "epoch": 1.6432390514672872, "grad_norm": 2.99692702293396, "learning_rate": 1.8357721595269466e-05, "loss": 0.8256, "step": 10066 }, { "epoch": 1.6434023101097914, "grad_norm": 2.4564836025238037, "learning_rate": 1.8357369542500794e-05, "loss": 0.6496, "step": 10067 }, { "epoch": 1.6435655687522959, "grad_norm": 2.5998237133026123, "learning_rate": 1.8357017455378127e-05, "loss": 0.7441, "step": 10068 }, { "epoch": 1.6437288273948, "grad_norm": 2.3422813415527344, "learning_rate": 1.835666533390292e-05, "loss": 0.6404, "step": 10069 }, { "epoch": 1.6438920860373045, "grad_norm": 3.2960410118103027, "learning_rate": 1.8356313178076613e-05, "loss": 0.7854, "step": 10070 }, { "epoch": 1.644055344679809, "grad_norm": 2.7791740894317627, "learning_rate": 1.835596098790066e-05, "loss": 0.7445, "step": 10071 }, { "epoch": 1.6442186033223134, "grad_norm": 3.2029285430908203, "learning_rate": 1.835560876337651e-05, "loss": 0.7544, "step": 10072 }, { "epoch": 1.6443818619648178, "grad_norm": 2.7982139587402344, "learning_rate": 1.8355256504505603e-05, "loss": 0.7318, "step": 10073 }, { "epoch": 1.6445451206073223, "grad_norm": 2.208833694458008, "learning_rate": 1.8354904211289394e-05, "loss": 0.5232, "step": 10074 }, { "epoch": 1.6447083792498265, "grad_norm": 2.61232590675354, "learning_rate": 1.8354551883729326e-05, "loss": 0.7274, "step": 10075 }, { "epoch": 1.644871637892331, "grad_norm": 2.7542879581451416, "learning_rate": 1.8354199521826853e-05, "loss": 0.7824, "step": 10076 }, { "epoch": 1.6450348965348354, "grad_norm": 2.1756176948547363, "learning_rate": 1.8353847125583418e-05, "loss": 0.5475, "step": 10077 }, { "epoch": 1.6451981551773396, "grad_norm": 2.3065025806427, "learning_rate": 1.8353494695000474e-05, "loss": 0.6785, "step": 10078 }, { "epoch": 1.645361413819844, "grad_norm": 2.257988214492798, "learning_rate": 1.8353142230079465e-05, "loss": 0.6062, "step": 10079 }, { "epoch": 1.6455246724623485, "grad_norm": 2.7128705978393555, "learning_rate": 1.8352789730821844e-05, "loss": 0.7199, "step": 10080 }, { "epoch": 1.645687931104853, "grad_norm": 2.7158279418945312, "learning_rate": 1.8352437197229055e-05, "loss": 0.6858, "step": 10081 }, { "epoch": 1.6458511897473573, "grad_norm": 2.5735878944396973, "learning_rate": 1.8352084629302552e-05, "loss": 0.7253, "step": 10082 }, { "epoch": 1.6460144483898618, "grad_norm": 2.202291965484619, "learning_rate": 1.8351732027043783e-05, "loss": 0.516, "step": 10083 }, { "epoch": 1.646177707032366, "grad_norm": 2.9032235145568848, "learning_rate": 1.8351379390454204e-05, "loss": 0.7199, "step": 10084 }, { "epoch": 1.6463409656748704, "grad_norm": 2.9209184646606445, "learning_rate": 1.835102671953525e-05, "loss": 0.6861, "step": 10085 }, { "epoch": 1.6465042243173746, "grad_norm": 2.02030348777771, "learning_rate": 1.8350674014288378e-05, "loss": 0.5556, "step": 10086 }, { "epoch": 1.646667482959879, "grad_norm": 2.916887044906616, "learning_rate": 1.835032127471504e-05, "loss": 0.6925, "step": 10087 }, { "epoch": 1.6468307416023835, "grad_norm": 2.399406671524048, "learning_rate": 1.8349968500816685e-05, "loss": 0.6397, "step": 10088 }, { "epoch": 1.646994000244888, "grad_norm": 3.1773862838745117, "learning_rate": 1.8349615692594764e-05, "loss": 0.7701, "step": 10089 }, { "epoch": 1.6471572588873924, "grad_norm": 3.199249267578125, "learning_rate": 1.8349262850050726e-05, "loss": 0.8569, "step": 10090 }, { "epoch": 1.6473205175298968, "grad_norm": 2.5726306438446045, "learning_rate": 1.8348909973186017e-05, "loss": 0.5725, "step": 10091 }, { "epoch": 1.6474837761724013, "grad_norm": 2.781855583190918, "learning_rate": 1.8348557062002095e-05, "loss": 0.7324, "step": 10092 }, { "epoch": 1.6476470348149055, "grad_norm": 2.488312005996704, "learning_rate": 1.8348204116500406e-05, "loss": 0.6205, "step": 10093 }, { "epoch": 1.64781029345741, "grad_norm": 2.6430208683013916, "learning_rate": 1.83478511366824e-05, "loss": 0.6677, "step": 10094 }, { "epoch": 1.6479735520999141, "grad_norm": 2.7220652103424072, "learning_rate": 1.834749812254953e-05, "loss": 0.6743, "step": 10095 }, { "epoch": 1.6481368107424186, "grad_norm": 2.732815980911255, "learning_rate": 1.8347145074103252e-05, "loss": 0.7787, "step": 10096 }, { "epoch": 1.648300069384923, "grad_norm": 2.9066381454467773, "learning_rate": 1.834679199134501e-05, "loss": 0.7005, "step": 10097 }, { "epoch": 1.6484633280274275, "grad_norm": 2.490208148956299, "learning_rate": 1.834643887427626e-05, "loss": 0.6672, "step": 10098 }, { "epoch": 1.648626586669932, "grad_norm": 2.8933980464935303, "learning_rate": 1.834608572289845e-05, "loss": 0.7515, "step": 10099 }, { "epoch": 1.6487898453124363, "grad_norm": 3.180310010910034, "learning_rate": 1.834573253721303e-05, "loss": 0.9978, "step": 10100 }, { "epoch": 1.6489531039549408, "grad_norm": 2.698495864868164, "learning_rate": 1.8345379317221453e-05, "loss": 0.7415, "step": 10101 }, { "epoch": 1.649116362597445, "grad_norm": 2.707850456237793, "learning_rate": 1.834502606292518e-05, "loss": 0.7341, "step": 10102 }, { "epoch": 1.6492796212399494, "grad_norm": 2.963730573654175, "learning_rate": 1.8344672774325652e-05, "loss": 0.6853, "step": 10103 }, { "epoch": 1.6494428798824536, "grad_norm": 2.098029375076294, "learning_rate": 1.8344319451424324e-05, "loss": 0.4925, "step": 10104 }, { "epoch": 1.649606138524958, "grad_norm": 2.7659597396850586, "learning_rate": 1.834396609422265e-05, "loss": 0.7561, "step": 10105 }, { "epoch": 1.6497693971674625, "grad_norm": 3.2215394973754883, "learning_rate": 1.8343612702722085e-05, "loss": 0.9259, "step": 10106 }, { "epoch": 1.649932655809967, "grad_norm": 2.5794007778167725, "learning_rate": 1.834325927692408e-05, "loss": 0.6376, "step": 10107 }, { "epoch": 1.6500959144524714, "grad_norm": 2.350627899169922, "learning_rate": 1.834290581683008e-05, "loss": 0.6282, "step": 10108 }, { "epoch": 1.6502591730949758, "grad_norm": 2.91231632232666, "learning_rate": 1.8342552322441545e-05, "loss": 0.779, "step": 10109 }, { "epoch": 1.6504224317374803, "grad_norm": 2.9960198402404785, "learning_rate": 1.834219879375993e-05, "loss": 0.9004, "step": 10110 }, { "epoch": 1.6505856903799845, "grad_norm": 2.5800631046295166, "learning_rate": 1.8341845230786687e-05, "loss": 0.6157, "step": 10111 }, { "epoch": 1.650748949022489, "grad_norm": 2.5056753158569336, "learning_rate": 1.8341491633523266e-05, "loss": 0.6348, "step": 10112 }, { "epoch": 1.6509122076649931, "grad_norm": 3.0094707012176514, "learning_rate": 1.8341138001971127e-05, "loss": 0.727, "step": 10113 }, { "epoch": 1.6510754663074976, "grad_norm": 2.641752004623413, "learning_rate": 1.8340784336131715e-05, "loss": 0.6976, "step": 10114 }, { "epoch": 1.651238724950002, "grad_norm": 2.563997268676758, "learning_rate": 1.8340430636006486e-05, "loss": 0.7153, "step": 10115 }, { "epoch": 1.6514019835925065, "grad_norm": 2.757068395614624, "learning_rate": 1.83400769015969e-05, "loss": 0.714, "step": 10116 }, { "epoch": 1.6515652422350109, "grad_norm": 2.8571345806121826, "learning_rate": 1.8339723132904405e-05, "loss": 0.6017, "step": 10117 }, { "epoch": 1.6517285008775153, "grad_norm": 2.3614699840545654, "learning_rate": 1.8339369329930457e-05, "loss": 0.7362, "step": 10118 }, { "epoch": 1.6518917595200195, "grad_norm": 2.500253677368164, "learning_rate": 1.8339015492676513e-05, "loss": 0.7144, "step": 10119 }, { "epoch": 1.652055018162524, "grad_norm": 2.42452073097229, "learning_rate": 1.8338661621144023e-05, "loss": 0.6407, "step": 10120 }, { "epoch": 1.6522182768050284, "grad_norm": 2.1801998615264893, "learning_rate": 1.8338307715334445e-05, "loss": 0.5992, "step": 10121 }, { "epoch": 1.6523815354475326, "grad_norm": 2.621584892272949, "learning_rate": 1.833795377524923e-05, "loss": 0.7328, "step": 10122 }, { "epoch": 1.652544794090037, "grad_norm": 2.9975664615631104, "learning_rate": 1.8337599800889837e-05, "loss": 0.7716, "step": 10123 }, { "epoch": 1.6527080527325415, "grad_norm": 2.9644742012023926, "learning_rate": 1.833724579225772e-05, "loss": 0.8281, "step": 10124 }, { "epoch": 1.652871311375046, "grad_norm": 2.6558704376220703, "learning_rate": 1.8336891749354337e-05, "loss": 0.7092, "step": 10125 }, { "epoch": 1.6530345700175504, "grad_norm": 2.7910141944885254, "learning_rate": 1.8336537672181133e-05, "loss": 0.8795, "step": 10126 }, { "epoch": 1.6531978286600548, "grad_norm": 2.5253894329071045, "learning_rate": 1.8336183560739574e-05, "loss": 0.6626, "step": 10127 }, { "epoch": 1.653361087302559, "grad_norm": 2.7156121730804443, "learning_rate": 1.8335829415031112e-05, "loss": 0.7543, "step": 10128 }, { "epoch": 1.6535243459450635, "grad_norm": 2.625936985015869, "learning_rate": 1.8335475235057202e-05, "loss": 0.6935, "step": 10129 }, { "epoch": 1.6536876045875677, "grad_norm": 3.010042428970337, "learning_rate": 1.8335121020819298e-05, "loss": 0.8468, "step": 10130 }, { "epoch": 1.6538508632300721, "grad_norm": 2.434666872024536, "learning_rate": 1.8334766772318864e-05, "loss": 0.5496, "step": 10131 }, { "epoch": 1.6540141218725766, "grad_norm": 3.0636417865753174, "learning_rate": 1.833441248955735e-05, "loss": 0.7614, "step": 10132 }, { "epoch": 1.654177380515081, "grad_norm": 2.773179531097412, "learning_rate": 1.8334058172536213e-05, "loss": 0.799, "step": 10133 }, { "epoch": 1.6543406391575854, "grad_norm": 2.9195139408111572, "learning_rate": 1.8333703821256906e-05, "loss": 0.7741, "step": 10134 }, { "epoch": 1.6545038978000899, "grad_norm": 2.802999973297119, "learning_rate": 1.8333349435720894e-05, "loss": 0.843, "step": 10135 }, { "epoch": 1.6546671564425943, "grad_norm": 2.5710456371307373, "learning_rate": 1.833299501592963e-05, "loss": 0.5742, "step": 10136 }, { "epoch": 1.6548304150850985, "grad_norm": 2.8509483337402344, "learning_rate": 1.8332640561884565e-05, "loss": 0.6557, "step": 10137 }, { "epoch": 1.654993673727603, "grad_norm": 2.834552764892578, "learning_rate": 1.8332286073587165e-05, "loss": 0.6811, "step": 10138 }, { "epoch": 1.6551569323701072, "grad_norm": 2.3137848377227783, "learning_rate": 1.833193155103888e-05, "loss": 0.5073, "step": 10139 }, { "epoch": 1.6553201910126116, "grad_norm": 2.800691843032837, "learning_rate": 1.8331576994241175e-05, "loss": 0.6571, "step": 10140 }, { "epoch": 1.655483449655116, "grad_norm": 2.886993408203125, "learning_rate": 1.8331222403195498e-05, "loss": 0.8285, "step": 10141 }, { "epoch": 1.6556467082976205, "grad_norm": 2.6543326377868652, "learning_rate": 1.8330867777903313e-05, "loss": 0.7054, "step": 10142 }, { "epoch": 1.655809966940125, "grad_norm": 2.805417776107788, "learning_rate": 1.8330513118366077e-05, "loss": 0.7262, "step": 10143 }, { "epoch": 1.6559732255826294, "grad_norm": 2.9306020736694336, "learning_rate": 1.8330158424585247e-05, "loss": 0.7283, "step": 10144 }, { "epoch": 1.6561364842251338, "grad_norm": 3.1138222217559814, "learning_rate": 1.832980369656228e-05, "loss": 0.6972, "step": 10145 }, { "epoch": 1.656299742867638, "grad_norm": 2.8584132194519043, "learning_rate": 1.8329448934298638e-05, "loss": 0.7331, "step": 10146 }, { "epoch": 1.6564630015101425, "grad_norm": 2.725146770477295, "learning_rate": 1.8329094137795775e-05, "loss": 0.6239, "step": 10147 }, { "epoch": 1.6566262601526467, "grad_norm": 2.771106481552124, "learning_rate": 1.832873930705515e-05, "loss": 0.694, "step": 10148 }, { "epoch": 1.6567895187951511, "grad_norm": 2.813325881958008, "learning_rate": 1.8328384442078223e-05, "loss": 0.6037, "step": 10149 }, { "epoch": 1.6569527774376556, "grad_norm": 3.0393030643463135, "learning_rate": 1.8328029542866456e-05, "loss": 0.768, "step": 10150 }, { "epoch": 1.65711603608016, "grad_norm": 3.196545124053955, "learning_rate": 1.83276746094213e-05, "loss": 0.679, "step": 10151 }, { "epoch": 1.6572792947226644, "grad_norm": 2.567396640777588, "learning_rate": 1.8327319641744218e-05, "loss": 0.6357, "step": 10152 }, { "epoch": 1.6574425533651689, "grad_norm": 2.7144477367401123, "learning_rate": 1.832696463983667e-05, "loss": 0.6455, "step": 10153 }, { "epoch": 1.6576058120076733, "grad_norm": 2.7777788639068604, "learning_rate": 1.8326609603700114e-05, "loss": 0.7535, "step": 10154 }, { "epoch": 1.6577690706501775, "grad_norm": 2.7246906757354736, "learning_rate": 1.832625453333601e-05, "loss": 0.6442, "step": 10155 }, { "epoch": 1.657932329292682, "grad_norm": 2.7059006690979004, "learning_rate": 1.8325899428745822e-05, "loss": 0.7486, "step": 10156 }, { "epoch": 1.6580955879351862, "grad_norm": 3.1811418533325195, "learning_rate": 1.8325544289931e-05, "loss": 0.6533, "step": 10157 }, { "epoch": 1.6582588465776906, "grad_norm": 2.415322780609131, "learning_rate": 1.832518911689301e-05, "loss": 0.6526, "step": 10158 }, { "epoch": 1.658422105220195, "grad_norm": 2.4004364013671875, "learning_rate": 1.832483390963331e-05, "loss": 0.6221, "step": 10159 }, { "epoch": 1.6585853638626995, "grad_norm": 2.465479612350464, "learning_rate": 1.832447866815337e-05, "loss": 0.6487, "step": 10160 }, { "epoch": 1.658748622505204, "grad_norm": 2.6480138301849365, "learning_rate": 1.8324123392454633e-05, "loss": 0.7027, "step": 10161 }, { "epoch": 1.6589118811477084, "grad_norm": 2.362645387649536, "learning_rate": 1.8323768082538568e-05, "loss": 0.5648, "step": 10162 }, { "epoch": 1.6590751397902126, "grad_norm": 2.178225040435791, "learning_rate": 1.8323412738406638e-05, "loss": 0.4671, "step": 10163 }, { "epoch": 1.659238398432717, "grad_norm": 2.3425862789154053, "learning_rate": 1.83230573600603e-05, "loss": 0.5798, "step": 10164 }, { "epoch": 1.6594016570752215, "grad_norm": 2.9391965866088867, "learning_rate": 1.8322701947501015e-05, "loss": 0.6899, "step": 10165 }, { "epoch": 1.6595649157177257, "grad_norm": 3.0249006748199463, "learning_rate": 1.8322346500730247e-05, "loss": 0.8055, "step": 10166 }, { "epoch": 1.6597281743602301, "grad_norm": 2.904587745666504, "learning_rate": 1.832199101974945e-05, "loss": 0.7925, "step": 10167 }, { "epoch": 1.6598914330027346, "grad_norm": 3.17964506149292, "learning_rate": 1.8321635504560095e-05, "loss": 0.8856, "step": 10168 }, { "epoch": 1.660054691645239, "grad_norm": 2.924006938934326, "learning_rate": 1.8321279955163638e-05, "loss": 0.7872, "step": 10169 }, { "epoch": 1.6602179502877434, "grad_norm": 3.126732110977173, "learning_rate": 1.832092437156154e-05, "loss": 0.7245, "step": 10170 }, { "epoch": 1.6603812089302479, "grad_norm": 3.210590362548828, "learning_rate": 1.8320568753755265e-05, "loss": 0.6895, "step": 10171 }, { "epoch": 1.660544467572752, "grad_norm": 2.518587112426758, "learning_rate": 1.8320213101746275e-05, "loss": 0.6052, "step": 10172 }, { "epoch": 1.6607077262152565, "grad_norm": 3.116184711456299, "learning_rate": 1.8319857415536027e-05, "loss": 0.6503, "step": 10173 }, { "epoch": 1.6608709848577607, "grad_norm": 3.1145315170288086, "learning_rate": 1.831950169512599e-05, "loss": 0.6947, "step": 10174 }, { "epoch": 1.6610342435002652, "grad_norm": 3.341094970703125, "learning_rate": 1.831914594051762e-05, "loss": 0.7144, "step": 10175 }, { "epoch": 1.6611975021427696, "grad_norm": 3.030120611190796, "learning_rate": 1.8318790151712384e-05, "loss": 0.7686, "step": 10176 }, { "epoch": 1.661360760785274, "grad_norm": 2.8985724449157715, "learning_rate": 1.8318434328711742e-05, "loss": 0.7091, "step": 10177 }, { "epoch": 1.6615240194277785, "grad_norm": 2.8219449520111084, "learning_rate": 1.831807847151716e-05, "loss": 0.6058, "step": 10178 }, { "epoch": 1.661687278070283, "grad_norm": 2.603365659713745, "learning_rate": 1.8317722580130097e-05, "loss": 0.7452, "step": 10179 }, { "epoch": 1.6618505367127874, "grad_norm": 3.026724338531494, "learning_rate": 1.8317366654552017e-05, "loss": 0.7832, "step": 10180 }, { "epoch": 1.6620137953552916, "grad_norm": 2.4982149600982666, "learning_rate": 1.831701069478438e-05, "loss": 0.6323, "step": 10181 }, { "epoch": 1.662177053997796, "grad_norm": 2.8522884845733643, "learning_rate": 1.8316654700828658e-05, "loss": 0.7046, "step": 10182 }, { "epoch": 1.6623403126403002, "grad_norm": 2.428089141845703, "learning_rate": 1.8316298672686306e-05, "loss": 0.5905, "step": 10183 }, { "epoch": 1.6625035712828047, "grad_norm": 2.3828015327453613, "learning_rate": 1.831594261035879e-05, "loss": 0.574, "step": 10184 }, { "epoch": 1.6626668299253091, "grad_norm": 3.0942416191101074, "learning_rate": 1.8315586513847573e-05, "loss": 0.7714, "step": 10185 }, { "epoch": 1.6628300885678136, "grad_norm": 2.59639310836792, "learning_rate": 1.831523038315412e-05, "loss": 0.5957, "step": 10186 }, { "epoch": 1.662993347210318, "grad_norm": 2.55249285697937, "learning_rate": 1.8314874218279898e-05, "loss": 0.6574, "step": 10187 }, { "epoch": 1.6631566058528224, "grad_norm": 3.0001001358032227, "learning_rate": 1.8314518019226365e-05, "loss": 0.7654, "step": 10188 }, { "epoch": 1.6633198644953269, "grad_norm": 2.5438852310180664, "learning_rate": 1.831416178599499e-05, "loss": 0.6569, "step": 10189 }, { "epoch": 1.663483123137831, "grad_norm": 2.8339767456054688, "learning_rate": 1.8313805518587232e-05, "loss": 0.7824, "step": 10190 }, { "epoch": 1.6636463817803355, "grad_norm": 2.935854434967041, "learning_rate": 1.8313449217004563e-05, "loss": 0.6563, "step": 10191 }, { "epoch": 1.6638096404228397, "grad_norm": 2.5393824577331543, "learning_rate": 1.831309288124844e-05, "loss": 0.7211, "step": 10192 }, { "epoch": 1.6639728990653442, "grad_norm": 2.6239964962005615, "learning_rate": 1.8312736511320333e-05, "loss": 0.7268, "step": 10193 }, { "epoch": 1.6641361577078486, "grad_norm": 2.807136297225952, "learning_rate": 1.8312380107221703e-05, "loss": 0.7051, "step": 10194 }, { "epoch": 1.664299416350353, "grad_norm": 2.822035551071167, "learning_rate": 1.8312023668954018e-05, "loss": 0.7525, "step": 10195 }, { "epoch": 1.6644626749928575, "grad_norm": 2.5662083625793457, "learning_rate": 1.8311667196518745e-05, "loss": 0.7416, "step": 10196 }, { "epoch": 1.664625933635362, "grad_norm": 2.5946907997131348, "learning_rate": 1.8311310689917344e-05, "loss": 0.6346, "step": 10197 }, { "epoch": 1.6647891922778664, "grad_norm": 2.4596481323242188, "learning_rate": 1.831095414915128e-05, "loss": 0.6877, "step": 10198 }, { "epoch": 1.6649524509203706, "grad_norm": 2.4897401332855225, "learning_rate": 1.8310597574222028e-05, "loss": 0.6797, "step": 10199 }, { "epoch": 1.665115709562875, "grad_norm": 2.742469549179077, "learning_rate": 1.831024096513104e-05, "loss": 0.7611, "step": 10200 }, { "epoch": 1.6652789682053792, "grad_norm": 3.2672548294067383, "learning_rate": 1.8309884321879793e-05, "loss": 0.8658, "step": 10201 }, { "epoch": 1.6654422268478837, "grad_norm": 2.6304848194122314, "learning_rate": 1.830952764446975e-05, "loss": 0.7683, "step": 10202 }, { "epoch": 1.6656054854903881, "grad_norm": 2.854593515396118, "learning_rate": 1.8309170932902378e-05, "loss": 0.8476, "step": 10203 }, { "epoch": 1.6657687441328926, "grad_norm": 2.8110668659210205, "learning_rate": 1.830881418717914e-05, "loss": 0.768, "step": 10204 }, { "epoch": 1.665932002775397, "grad_norm": 2.6434545516967773, "learning_rate": 1.8308457407301503e-05, "loss": 0.6902, "step": 10205 }, { "epoch": 1.6660952614179014, "grad_norm": 2.1173954010009766, "learning_rate": 1.8308100593270938e-05, "loss": 0.5037, "step": 10206 }, { "epoch": 1.6662585200604056, "grad_norm": 2.3176889419555664, "learning_rate": 1.8307743745088905e-05, "loss": 0.6613, "step": 10207 }, { "epoch": 1.66642177870291, "grad_norm": 2.7167587280273438, "learning_rate": 1.8307386862756876e-05, "loss": 0.7017, "step": 10208 }, { "epoch": 1.6665850373454145, "grad_norm": 2.545382499694824, "learning_rate": 1.8307029946276317e-05, "loss": 0.6076, "step": 10209 }, { "epoch": 1.6667482959879187, "grad_norm": 2.8386850357055664, "learning_rate": 1.8306672995648695e-05, "loss": 0.715, "step": 10210 }, { "epoch": 1.6669115546304232, "grad_norm": 3.227504014968872, "learning_rate": 1.8306316010875474e-05, "loss": 0.9258, "step": 10211 }, { "epoch": 1.6670748132729276, "grad_norm": 2.6856937408447266, "learning_rate": 1.830595899195813e-05, "loss": 0.6445, "step": 10212 }, { "epoch": 1.667238071915432, "grad_norm": 2.548586130142212, "learning_rate": 1.8305601938898118e-05, "loss": 0.7029, "step": 10213 }, { "epoch": 1.6674013305579365, "grad_norm": 2.6882083415985107, "learning_rate": 1.8305244851696917e-05, "loss": 0.7498, "step": 10214 }, { "epoch": 1.667564589200441, "grad_norm": 2.5188188552856445, "learning_rate": 1.8304887730355988e-05, "loss": 0.5292, "step": 10215 }, { "epoch": 1.6677278478429451, "grad_norm": 1.931257724761963, "learning_rate": 1.8304530574876805e-05, "loss": 0.4736, "step": 10216 }, { "epoch": 1.6678911064854496, "grad_norm": 3.186424493789673, "learning_rate": 1.830417338526083e-05, "loss": 0.8063, "step": 10217 }, { "epoch": 1.6680543651279538, "grad_norm": 2.591657876968384, "learning_rate": 1.830381616150953e-05, "loss": 0.6835, "step": 10218 }, { "epoch": 1.6682176237704582, "grad_norm": 2.179396867752075, "learning_rate": 1.8303458903624384e-05, "loss": 0.5067, "step": 10219 }, { "epoch": 1.6683808824129627, "grad_norm": 2.2745797634124756, "learning_rate": 1.830310161160685e-05, "loss": 0.5483, "step": 10220 }, { "epoch": 1.6685441410554671, "grad_norm": 2.8786559104919434, "learning_rate": 1.83027442854584e-05, "loss": 0.8108, "step": 10221 }, { "epoch": 1.6687073996979715, "grad_norm": 2.8025143146514893, "learning_rate": 1.8302386925180506e-05, "loss": 0.6918, "step": 10222 }, { "epoch": 1.668870658340476, "grad_norm": 2.3867616653442383, "learning_rate": 1.830202953077463e-05, "loss": 0.5955, "step": 10223 }, { "epoch": 1.6690339169829804, "grad_norm": 3.058929920196533, "learning_rate": 1.8301672102242252e-05, "loss": 0.776, "step": 10224 }, { "epoch": 1.6691971756254846, "grad_norm": 3.027909755706787, "learning_rate": 1.830131463958483e-05, "loss": 0.8022, "step": 10225 }, { "epoch": 1.669360434267989, "grad_norm": 2.653193950653076, "learning_rate": 1.8300957142803838e-05, "loss": 0.6334, "step": 10226 }, { "epoch": 1.6695236929104933, "grad_norm": 2.7755126953125, "learning_rate": 1.8300599611900748e-05, "loss": 0.745, "step": 10227 }, { "epoch": 1.6696869515529977, "grad_norm": 2.769493341445923, "learning_rate": 1.8300242046877026e-05, "loss": 0.6947, "step": 10228 }, { "epoch": 1.6698502101955022, "grad_norm": 3.0791869163513184, "learning_rate": 1.8299884447734142e-05, "loss": 0.7116, "step": 10229 }, { "epoch": 1.6700134688380066, "grad_norm": 2.3903443813323975, "learning_rate": 1.829952681447357e-05, "loss": 0.5714, "step": 10230 }, { "epoch": 1.670176727480511, "grad_norm": 2.75107741355896, "learning_rate": 1.8299169147096773e-05, "loss": 0.6664, "step": 10231 }, { "epoch": 1.6703399861230155, "grad_norm": 2.512622356414795, "learning_rate": 1.8298811445605228e-05, "loss": 0.5372, "step": 10232 }, { "epoch": 1.67050324476552, "grad_norm": 2.6228339672088623, "learning_rate": 1.82984537100004e-05, "loss": 0.6922, "step": 10233 }, { "epoch": 1.6706665034080241, "grad_norm": 2.6725714206695557, "learning_rate": 1.8298095940283763e-05, "loss": 0.5617, "step": 10234 }, { "epoch": 1.6708297620505286, "grad_norm": 2.902787685394287, "learning_rate": 1.829773813645679e-05, "loss": 0.6638, "step": 10235 }, { "epoch": 1.6709930206930328, "grad_norm": 2.9313266277313232, "learning_rate": 1.8297380298520947e-05, "loss": 0.8175, "step": 10236 }, { "epoch": 1.6711562793355372, "grad_norm": 2.66817569732666, "learning_rate": 1.8297022426477706e-05, "loss": 0.6754, "step": 10237 }, { "epoch": 1.6713195379780417, "grad_norm": 2.7018637657165527, "learning_rate": 1.829666452032854e-05, "loss": 0.6372, "step": 10238 }, { "epoch": 1.671482796620546, "grad_norm": 2.6545698642730713, "learning_rate": 1.8296306580074918e-05, "loss": 0.5954, "step": 10239 }, { "epoch": 1.6716460552630505, "grad_norm": 2.4600253105163574, "learning_rate": 1.8295948605718316e-05, "loss": 0.5876, "step": 10240 }, { "epoch": 1.671809313905555, "grad_norm": 2.511183261871338, "learning_rate": 1.8295590597260194e-05, "loss": 0.7002, "step": 10241 }, { "epoch": 1.6719725725480594, "grad_norm": 2.5139105319976807, "learning_rate": 1.8295232554702038e-05, "loss": 0.6215, "step": 10242 }, { "epoch": 1.6721358311905636, "grad_norm": 2.4204297065734863, "learning_rate": 1.829487447804531e-05, "loss": 0.5483, "step": 10243 }, { "epoch": 1.672299089833068, "grad_norm": 2.8583195209503174, "learning_rate": 1.8294516367291487e-05, "loss": 0.6708, "step": 10244 }, { "epoch": 1.6724623484755723, "grad_norm": 3.1032540798187256, "learning_rate": 1.8294158222442036e-05, "loss": 0.8092, "step": 10245 }, { "epoch": 1.6726256071180767, "grad_norm": 2.707120895385742, "learning_rate": 1.8293800043498436e-05, "loss": 0.8182, "step": 10246 }, { "epoch": 1.6727888657605812, "grad_norm": 2.98707914352417, "learning_rate": 1.8293441830462153e-05, "loss": 0.7215, "step": 10247 }, { "epoch": 1.6729521244030856, "grad_norm": 3.119662284851074, "learning_rate": 1.8293083583334663e-05, "loss": 0.7344, "step": 10248 }, { "epoch": 1.67311538304559, "grad_norm": 2.3731608390808105, "learning_rate": 1.829272530211744e-05, "loss": 0.5461, "step": 10249 }, { "epoch": 1.6732786416880945, "grad_norm": 2.7419047355651855, "learning_rate": 1.8292366986811952e-05, "loss": 0.7445, "step": 10250 }, { "epoch": 1.6734419003305987, "grad_norm": 2.9063830375671387, "learning_rate": 1.8292008637419675e-05, "loss": 0.7002, "step": 10251 }, { "epoch": 1.6736051589731031, "grad_norm": 2.5819554328918457, "learning_rate": 1.8291650253942083e-05, "loss": 0.6607, "step": 10252 }, { "epoch": 1.6737684176156076, "grad_norm": 2.206228733062744, "learning_rate": 1.8291291836380645e-05, "loss": 0.5102, "step": 10253 }, { "epoch": 1.6739316762581118, "grad_norm": 2.680915594100952, "learning_rate": 1.8290933384736837e-05, "loss": 0.567, "step": 10254 }, { "epoch": 1.6740949349006162, "grad_norm": 2.6586735248565674, "learning_rate": 1.8290574899012137e-05, "loss": 0.5873, "step": 10255 }, { "epoch": 1.6742581935431207, "grad_norm": 3.423280715942383, "learning_rate": 1.8290216379208008e-05, "loss": 0.8787, "step": 10256 }, { "epoch": 1.674421452185625, "grad_norm": 2.782606601715088, "learning_rate": 1.8289857825325934e-05, "loss": 0.7331, "step": 10257 }, { "epoch": 1.6745847108281295, "grad_norm": 2.5110390186309814, "learning_rate": 1.8289499237367382e-05, "loss": 0.6014, "step": 10258 }, { "epoch": 1.674747969470634, "grad_norm": 2.897966146469116, "learning_rate": 1.828914061533383e-05, "loss": 0.7801, "step": 10259 }, { "epoch": 1.6749112281131382, "grad_norm": 2.825716018676758, "learning_rate": 1.8288781959226752e-05, "loss": 0.7169, "step": 10260 }, { "epoch": 1.6750744867556426, "grad_norm": 2.484865427017212, "learning_rate": 1.828842326904762e-05, "loss": 0.6191, "step": 10261 }, { "epoch": 1.6752377453981468, "grad_norm": 2.77955961227417, "learning_rate": 1.828806454479791e-05, "loss": 0.725, "step": 10262 }, { "epoch": 1.6754010040406513, "grad_norm": 2.4745709896087646, "learning_rate": 1.82877057864791e-05, "loss": 0.6304, "step": 10263 }, { "epoch": 1.6755642626831557, "grad_norm": 2.4029083251953125, "learning_rate": 1.8287346994092652e-05, "loss": 0.6232, "step": 10264 }, { "epoch": 1.6757275213256602, "grad_norm": 2.317009210586548, "learning_rate": 1.8286988167640055e-05, "loss": 0.6088, "step": 10265 }, { "epoch": 1.6758907799681646, "grad_norm": 2.317446708679199, "learning_rate": 1.828662930712278e-05, "loss": 0.5634, "step": 10266 }, { "epoch": 1.676054038610669, "grad_norm": 3.189326524734497, "learning_rate": 1.82862704125423e-05, "loss": 0.8288, "step": 10267 }, { "epoch": 1.6762172972531735, "grad_norm": 2.620542287826538, "learning_rate": 1.8285911483900088e-05, "loss": 0.6052, "step": 10268 }, { "epoch": 1.6763805558956777, "grad_norm": 2.3672540187835693, "learning_rate": 1.8285552521197626e-05, "loss": 0.5383, "step": 10269 }, { "epoch": 1.6765438145381821, "grad_norm": 2.995645523071289, "learning_rate": 1.8285193524436383e-05, "loss": 0.6307, "step": 10270 }, { "epoch": 1.6767070731806863, "grad_norm": 2.6289379596710205, "learning_rate": 1.8284834493617843e-05, "loss": 0.6539, "step": 10271 }, { "epoch": 1.6768703318231908, "grad_norm": 1.9615973234176636, "learning_rate": 1.8284475428743472e-05, "loss": 0.5373, "step": 10272 }, { "epoch": 1.6770335904656952, "grad_norm": 2.5992493629455566, "learning_rate": 1.8284116329814755e-05, "loss": 0.6401, "step": 10273 }, { "epoch": 1.6771968491081997, "grad_norm": 2.2097723484039307, "learning_rate": 1.828375719683316e-05, "loss": 0.5872, "step": 10274 }, { "epoch": 1.677360107750704, "grad_norm": 2.3037025928497314, "learning_rate": 1.8283398029800167e-05, "loss": 0.629, "step": 10275 }, { "epoch": 1.6775233663932085, "grad_norm": 3.1985437870025635, "learning_rate": 1.8283038828717254e-05, "loss": 0.7553, "step": 10276 }, { "epoch": 1.677686625035713, "grad_norm": 2.5666399002075195, "learning_rate": 1.8282679593585896e-05, "loss": 0.61, "step": 10277 }, { "epoch": 1.6778498836782172, "grad_norm": 3.008589029312134, "learning_rate": 1.828232032440757e-05, "loss": 0.6571, "step": 10278 }, { "epoch": 1.6780131423207216, "grad_norm": 2.204558849334717, "learning_rate": 1.828196102118375e-05, "loss": 0.5451, "step": 10279 }, { "epoch": 1.6781764009632258, "grad_norm": 2.4438796043395996, "learning_rate": 1.8281601683915917e-05, "loss": 0.5961, "step": 10280 }, { "epoch": 1.6783396596057303, "grad_norm": 2.3661394119262695, "learning_rate": 1.8281242312605548e-05, "loss": 0.577, "step": 10281 }, { "epoch": 1.6785029182482347, "grad_norm": 2.8761587142944336, "learning_rate": 1.8280882907254117e-05, "loss": 0.7293, "step": 10282 }, { "epoch": 1.6786661768907392, "grad_norm": 3.2029170989990234, "learning_rate": 1.8280523467863106e-05, "loss": 0.7766, "step": 10283 }, { "epoch": 1.6788294355332436, "grad_norm": 2.6571364402770996, "learning_rate": 1.8280163994433984e-05, "loss": 0.5884, "step": 10284 }, { "epoch": 1.678992694175748, "grad_norm": 2.9023048877716064, "learning_rate": 1.827980448696824e-05, "loss": 0.7259, "step": 10285 }, { "epoch": 1.6791559528182525, "grad_norm": 2.3250787258148193, "learning_rate": 1.8279444945467344e-05, "loss": 0.6205, "step": 10286 }, { "epoch": 1.6793192114607567, "grad_norm": 2.6549670696258545, "learning_rate": 1.8279085369932775e-05, "loss": 0.6779, "step": 10287 }, { "epoch": 1.6794824701032611, "grad_norm": 3.3132457733154297, "learning_rate": 1.8278725760366013e-05, "loss": 0.799, "step": 10288 }, { "epoch": 1.6796457287457653, "grad_norm": 2.8906748294830322, "learning_rate": 1.8278366116768535e-05, "loss": 0.6515, "step": 10289 }, { "epoch": 1.6798089873882698, "grad_norm": 2.5249078273773193, "learning_rate": 1.8278006439141822e-05, "loss": 0.6171, "step": 10290 }, { "epoch": 1.6799722460307742, "grad_norm": 3.062549591064453, "learning_rate": 1.8277646727487347e-05, "loss": 0.8198, "step": 10291 }, { "epoch": 1.6801355046732787, "grad_norm": 2.887026309967041, "learning_rate": 1.8277286981806596e-05, "loss": 0.8205, "step": 10292 }, { "epoch": 1.680298763315783, "grad_norm": 2.8419394493103027, "learning_rate": 1.827692720210104e-05, "loss": 0.7577, "step": 10293 }, { "epoch": 1.6804620219582875, "grad_norm": 2.7038235664367676, "learning_rate": 1.8276567388372162e-05, "loss": 0.6283, "step": 10294 }, { "epoch": 1.680625280600792, "grad_norm": 2.744109630584717, "learning_rate": 1.8276207540621444e-05, "loss": 0.6568, "step": 10295 }, { "epoch": 1.6807885392432962, "grad_norm": 2.5676395893096924, "learning_rate": 1.827584765885036e-05, "loss": 0.6965, "step": 10296 }, { "epoch": 1.6809517978858006, "grad_norm": 2.5808470249176025, "learning_rate": 1.8275487743060387e-05, "loss": 0.6343, "step": 10297 }, { "epoch": 1.6811150565283048, "grad_norm": 2.3364031314849854, "learning_rate": 1.8275127793253014e-05, "loss": 0.5899, "step": 10298 }, { "epoch": 1.6812783151708093, "grad_norm": 2.7200582027435303, "learning_rate": 1.8274767809429712e-05, "loss": 0.7078, "step": 10299 }, { "epoch": 1.6814415738133137, "grad_norm": 2.9078073501586914, "learning_rate": 1.8274407791591966e-05, "loss": 0.6543, "step": 10300 }, { "epoch": 1.6816048324558182, "grad_norm": 2.7012507915496826, "learning_rate": 1.8274047739741252e-05, "loss": 0.7869, "step": 10301 }, { "epoch": 1.6817680910983226, "grad_norm": 3.113887071609497, "learning_rate": 1.8273687653879056e-05, "loss": 0.818, "step": 10302 }, { "epoch": 1.681931349740827, "grad_norm": 2.6642348766326904, "learning_rate": 1.827332753400685e-05, "loss": 0.6561, "step": 10303 }, { "epoch": 1.6820946083833312, "grad_norm": 2.8686957359313965, "learning_rate": 1.8272967380126122e-05, "loss": 0.751, "step": 10304 }, { "epoch": 1.6822578670258357, "grad_norm": 2.9312744140625, "learning_rate": 1.8272607192238347e-05, "loss": 0.6775, "step": 10305 }, { "epoch": 1.6824211256683401, "grad_norm": 2.474080801010132, "learning_rate": 1.8272246970345007e-05, "loss": 0.6531, "step": 10306 }, { "epoch": 1.6825843843108443, "grad_norm": 2.2392890453338623, "learning_rate": 1.8271886714447583e-05, "loss": 0.5658, "step": 10307 }, { "epoch": 1.6827476429533488, "grad_norm": 2.9229421615600586, "learning_rate": 1.8271526424547557e-05, "loss": 0.9363, "step": 10308 }, { "epoch": 1.6829109015958532, "grad_norm": 2.804502248764038, "learning_rate": 1.827116610064641e-05, "loss": 0.7251, "step": 10309 }, { "epoch": 1.6830741602383577, "grad_norm": 2.834397077560425, "learning_rate": 1.827080574274562e-05, "loss": 0.5725, "step": 10310 }, { "epoch": 1.683237418880862, "grad_norm": 2.8306398391723633, "learning_rate": 1.827044535084667e-05, "loss": 0.7065, "step": 10311 }, { "epoch": 1.6834006775233665, "grad_norm": 3.032447576522827, "learning_rate": 1.8270084924951047e-05, "loss": 0.8054, "step": 10312 }, { "epoch": 1.6835639361658707, "grad_norm": 2.708172559738159, "learning_rate": 1.8269724465060222e-05, "loss": 0.7858, "step": 10313 }, { "epoch": 1.6837271948083752, "grad_norm": 2.508587598800659, "learning_rate": 1.8269363971175685e-05, "loss": 0.7314, "step": 10314 }, { "epoch": 1.6838904534508794, "grad_norm": 2.6118602752685547, "learning_rate": 1.8269003443298917e-05, "loss": 0.7812, "step": 10315 }, { "epoch": 1.6840537120933838, "grad_norm": 2.83154296875, "learning_rate": 1.8268642881431394e-05, "loss": 0.8616, "step": 10316 }, { "epoch": 1.6842169707358883, "grad_norm": 2.9314968585968018, "learning_rate": 1.8268282285574607e-05, "loss": 0.783, "step": 10317 }, { "epoch": 1.6843802293783927, "grad_norm": 2.8867366313934326, "learning_rate": 1.826792165573003e-05, "loss": 0.7592, "step": 10318 }, { "epoch": 1.6845434880208972, "grad_norm": 2.751295328140259, "learning_rate": 1.826756099189915e-05, "loss": 0.8635, "step": 10319 }, { "epoch": 1.6847067466634016, "grad_norm": 2.7941620349884033, "learning_rate": 1.8267200294083446e-05, "loss": 0.763, "step": 10320 }, { "epoch": 1.684870005305906, "grad_norm": 2.800610065460205, "learning_rate": 1.8266839562284407e-05, "loss": 0.698, "step": 10321 }, { "epoch": 1.6850332639484102, "grad_norm": 2.399418830871582, "learning_rate": 1.8266478796503514e-05, "loss": 0.601, "step": 10322 }, { "epoch": 1.6851965225909147, "grad_norm": 2.753103494644165, "learning_rate": 1.826611799674224e-05, "loss": 0.7057, "step": 10323 }, { "epoch": 1.685359781233419, "grad_norm": 2.583047389984131, "learning_rate": 1.826575716300208e-05, "loss": 0.7028, "step": 10324 }, { "epoch": 1.6855230398759233, "grad_norm": 2.948561668395996, "learning_rate": 1.8265396295284517e-05, "loss": 0.7803, "step": 10325 }, { "epoch": 1.6856862985184278, "grad_norm": 2.091073751449585, "learning_rate": 1.826503539359103e-05, "loss": 0.575, "step": 10326 }, { "epoch": 1.6858495571609322, "grad_norm": 2.393738031387329, "learning_rate": 1.8264674457923098e-05, "loss": 0.5986, "step": 10327 }, { "epoch": 1.6860128158034366, "grad_norm": 2.2690858840942383, "learning_rate": 1.8264313488282215e-05, "loss": 0.6274, "step": 10328 }, { "epoch": 1.686176074445941, "grad_norm": 2.8724539279937744, "learning_rate": 1.826395248466986e-05, "loss": 0.7187, "step": 10329 }, { "epoch": 1.6863393330884455, "grad_norm": 3.355295419692993, "learning_rate": 1.8263591447087515e-05, "loss": 0.6838, "step": 10330 }, { "epoch": 1.6865025917309497, "grad_norm": 2.847877025604248, "learning_rate": 1.8263230375536665e-05, "loss": 0.7783, "step": 10331 }, { "epoch": 1.6866658503734542, "grad_norm": 2.4574735164642334, "learning_rate": 1.8262869270018795e-05, "loss": 0.6923, "step": 10332 }, { "epoch": 1.6868291090159584, "grad_norm": 2.7744860649108887, "learning_rate": 1.8262508130535392e-05, "loss": 0.7721, "step": 10333 }, { "epoch": 1.6869923676584628, "grad_norm": 2.7801949977874756, "learning_rate": 1.8262146957087936e-05, "loss": 0.6457, "step": 10334 }, { "epoch": 1.6871556263009673, "grad_norm": 3.0796077251434326, "learning_rate": 1.8261785749677915e-05, "loss": 0.7138, "step": 10335 }, { "epoch": 1.6873188849434717, "grad_norm": 2.9539427757263184, "learning_rate": 1.826142450830681e-05, "loss": 0.7678, "step": 10336 }, { "epoch": 1.6874821435859761, "grad_norm": 2.291146755218506, "learning_rate": 1.826106323297611e-05, "loss": 0.5723, "step": 10337 }, { "epoch": 1.6876454022284806, "grad_norm": 2.8825604915618896, "learning_rate": 1.82607019236873e-05, "loss": 0.7956, "step": 10338 }, { "epoch": 1.687808660870985, "grad_norm": 2.9299681186676025, "learning_rate": 1.826034058044186e-05, "loss": 0.6979, "step": 10339 }, { "epoch": 1.6879719195134892, "grad_norm": 2.68410587310791, "learning_rate": 1.825997920324128e-05, "loss": 0.6638, "step": 10340 }, { "epoch": 1.6881351781559937, "grad_norm": 2.9121055603027344, "learning_rate": 1.8259617792087045e-05, "loss": 0.743, "step": 10341 }, { "epoch": 1.688298436798498, "grad_norm": 3.045332670211792, "learning_rate": 1.825925634698064e-05, "loss": 0.9592, "step": 10342 }, { "epoch": 1.6884616954410023, "grad_norm": 2.79718017578125, "learning_rate": 1.825889486792355e-05, "loss": 0.7317, "step": 10343 }, { "epoch": 1.6886249540835068, "grad_norm": 2.4263973236083984, "learning_rate": 1.8258533354917266e-05, "loss": 0.6375, "step": 10344 }, { "epoch": 1.6887882127260112, "grad_norm": 2.8036916255950928, "learning_rate": 1.8258171807963263e-05, "loss": 0.6977, "step": 10345 }, { "epoch": 1.6889514713685156, "grad_norm": 3.040740966796875, "learning_rate": 1.825781022706304e-05, "loss": 0.6772, "step": 10346 }, { "epoch": 1.68911473001102, "grad_norm": 2.6536853313446045, "learning_rate": 1.825744861221807e-05, "loss": 0.7229, "step": 10347 }, { "epoch": 1.6892779886535243, "grad_norm": 2.2972023487091064, "learning_rate": 1.8257086963429853e-05, "loss": 0.6159, "step": 10348 }, { "epoch": 1.6894412472960287, "grad_norm": 3.036940574645996, "learning_rate": 1.825672528069987e-05, "loss": 0.8134, "step": 10349 }, { "epoch": 1.6896045059385332, "grad_norm": 3.1737349033355713, "learning_rate": 1.8256363564029603e-05, "loss": 0.7692, "step": 10350 }, { "epoch": 1.6897677645810374, "grad_norm": 2.4805619716644287, "learning_rate": 1.8256001813420544e-05, "loss": 0.649, "step": 10351 }, { "epoch": 1.6899310232235418, "grad_norm": 2.5775527954101562, "learning_rate": 1.825564002887418e-05, "loss": 0.6612, "step": 10352 }, { "epoch": 1.6900942818660463, "grad_norm": 2.570235013961792, "learning_rate": 1.8255278210391994e-05, "loss": 0.6948, "step": 10353 }, { "epoch": 1.6902575405085507, "grad_norm": 2.979097604751587, "learning_rate": 1.825491635797548e-05, "loss": 0.7664, "step": 10354 }, { "epoch": 1.6904207991510551, "grad_norm": 2.677476406097412, "learning_rate": 1.8254554471626123e-05, "loss": 0.6014, "step": 10355 }, { "epoch": 1.6905840577935596, "grad_norm": 2.484952449798584, "learning_rate": 1.8254192551345408e-05, "loss": 0.643, "step": 10356 }, { "epoch": 1.6907473164360638, "grad_norm": 2.9833667278289795, "learning_rate": 1.8253830597134823e-05, "loss": 0.6755, "step": 10357 }, { "epoch": 1.6909105750785682, "grad_norm": 2.8397254943847656, "learning_rate": 1.8253468608995855e-05, "loss": 0.6762, "step": 10358 }, { "epoch": 1.6910738337210725, "grad_norm": 2.534119129180908, "learning_rate": 1.825310658693e-05, "loss": 0.6786, "step": 10359 }, { "epoch": 1.6912370923635769, "grad_norm": 2.4582362174987793, "learning_rate": 1.8252744530938736e-05, "loss": 0.6663, "step": 10360 }, { "epoch": 1.6914003510060813, "grad_norm": 2.734365701675415, "learning_rate": 1.8252382441023556e-05, "loss": 0.7743, "step": 10361 }, { "epoch": 1.6915636096485858, "grad_norm": 2.4538214206695557, "learning_rate": 1.8252020317185946e-05, "loss": 0.6656, "step": 10362 }, { "epoch": 1.6917268682910902, "grad_norm": 2.626892566680908, "learning_rate": 1.8251658159427398e-05, "loss": 0.6182, "step": 10363 }, { "epoch": 1.6918901269335946, "grad_norm": 2.368403673171997, "learning_rate": 1.82512959677494e-05, "loss": 0.5835, "step": 10364 }, { "epoch": 1.692053385576099, "grad_norm": 2.5461854934692383, "learning_rate": 1.8250933742153438e-05, "loss": 0.6562, "step": 10365 }, { "epoch": 1.6922166442186033, "grad_norm": 2.7478697299957275, "learning_rate": 1.8250571482641005e-05, "loss": 0.6661, "step": 10366 }, { "epoch": 1.6923799028611077, "grad_norm": 2.8464088439941406, "learning_rate": 1.8250209189213586e-05, "loss": 0.7033, "step": 10367 }, { "epoch": 1.692543161503612, "grad_norm": 3.010603666305542, "learning_rate": 1.824984686187267e-05, "loss": 0.7829, "step": 10368 }, { "epoch": 1.6927064201461164, "grad_norm": 2.653127908706665, "learning_rate": 1.8249484500619753e-05, "loss": 0.6333, "step": 10369 }, { "epoch": 1.6928696787886208, "grad_norm": 2.691561698913574, "learning_rate": 1.824912210545632e-05, "loss": 0.7359, "step": 10370 }, { "epoch": 1.6930329374311253, "grad_norm": 2.315438985824585, "learning_rate": 1.824875967638386e-05, "loss": 0.6295, "step": 10371 }, { "epoch": 1.6931961960736297, "grad_norm": 2.6384012699127197, "learning_rate": 1.8248397213403864e-05, "loss": 0.6091, "step": 10372 }, { "epoch": 1.6933594547161341, "grad_norm": 2.8492660522460938, "learning_rate": 1.8248034716517823e-05, "loss": 0.7308, "step": 10373 }, { "epoch": 1.6935227133586386, "grad_norm": 2.892357110977173, "learning_rate": 1.8247672185727222e-05, "loss": 0.6512, "step": 10374 }, { "epoch": 1.6936859720011428, "grad_norm": 2.123422145843506, "learning_rate": 1.824730962103356e-05, "loss": 0.5219, "step": 10375 }, { "epoch": 1.6938492306436472, "grad_norm": 3.0090768337249756, "learning_rate": 1.8246947022438318e-05, "loss": 1.2777, "step": 10376 }, { "epoch": 1.6940124892861514, "grad_norm": 2.534484386444092, "learning_rate": 1.8246584389942993e-05, "loss": 0.6001, "step": 10377 }, { "epoch": 1.6941757479286559, "grad_norm": 3.0147695541381836, "learning_rate": 1.824622172354907e-05, "loss": 0.859, "step": 10378 }, { "epoch": 1.6943390065711603, "grad_norm": 2.7155723571777344, "learning_rate": 1.8245859023258046e-05, "loss": 0.6628, "step": 10379 }, { "epoch": 1.6945022652136648, "grad_norm": 2.694000720977783, "learning_rate": 1.824549628907141e-05, "loss": 0.71, "step": 10380 }, { "epoch": 1.6946655238561692, "grad_norm": 2.864255905151367, "learning_rate": 1.824513352099065e-05, "loss": 0.7699, "step": 10381 }, { "epoch": 1.6948287824986736, "grad_norm": 2.9117209911346436, "learning_rate": 1.824477071901726e-05, "loss": 0.6898, "step": 10382 }, { "epoch": 1.694992041141178, "grad_norm": 3.0062386989593506, "learning_rate": 1.824440788315273e-05, "loss": 0.7696, "step": 10383 }, { "epoch": 1.6951552997836823, "grad_norm": 2.6996426582336426, "learning_rate": 1.8244045013398553e-05, "loss": 0.785, "step": 10384 }, { "epoch": 1.6953185584261867, "grad_norm": 2.8349344730377197, "learning_rate": 1.8243682109756222e-05, "loss": 0.7087, "step": 10385 }, { "epoch": 1.695481817068691, "grad_norm": 2.576838731765747, "learning_rate": 1.8243319172227223e-05, "loss": 0.6423, "step": 10386 }, { "epoch": 1.6956450757111954, "grad_norm": 2.65277099609375, "learning_rate": 1.8242956200813054e-05, "loss": 0.6371, "step": 10387 }, { "epoch": 1.6958083343536998, "grad_norm": 2.5479955673217773, "learning_rate": 1.8242593195515202e-05, "loss": 0.6339, "step": 10388 }, { "epoch": 1.6959715929962043, "grad_norm": 2.6849327087402344, "learning_rate": 1.8242230156335165e-05, "loss": 0.6121, "step": 10389 }, { "epoch": 1.6961348516387087, "grad_norm": 2.2332355976104736, "learning_rate": 1.824186708327443e-05, "loss": 0.5791, "step": 10390 }, { "epoch": 1.6962981102812131, "grad_norm": 2.9494707584381104, "learning_rate": 1.8241503976334495e-05, "loss": 0.6487, "step": 10391 }, { "epoch": 1.6964613689237173, "grad_norm": 3.288738489151001, "learning_rate": 1.8241140835516843e-05, "loss": 0.7177, "step": 10392 }, { "epoch": 1.6966246275662218, "grad_norm": 2.9798340797424316, "learning_rate": 1.8240777660822976e-05, "loss": 0.6733, "step": 10393 }, { "epoch": 1.6967878862087262, "grad_norm": 2.9293649196624756, "learning_rate": 1.8240414452254385e-05, "loss": 0.6915, "step": 10394 }, { "epoch": 1.6969511448512304, "grad_norm": 2.570845127105713, "learning_rate": 1.824005120981256e-05, "loss": 0.6765, "step": 10395 }, { "epoch": 1.6971144034937349, "grad_norm": 3.0966172218322754, "learning_rate": 1.8239687933498994e-05, "loss": 0.8004, "step": 10396 }, { "epoch": 1.6972776621362393, "grad_norm": 3.126969575881958, "learning_rate": 1.8239324623315187e-05, "loss": 0.7142, "step": 10397 }, { "epoch": 1.6974409207787438, "grad_norm": 2.7353384494781494, "learning_rate": 1.8238961279262625e-05, "loss": 0.7226, "step": 10398 }, { "epoch": 1.6976041794212482, "grad_norm": 2.3882737159729004, "learning_rate": 1.8238597901342803e-05, "loss": 0.6348, "step": 10399 }, { "epoch": 1.6977674380637526, "grad_norm": 2.9904208183288574, "learning_rate": 1.8238234489557217e-05, "loss": 0.7401, "step": 10400 }, { "epoch": 1.6979306967062568, "grad_norm": 2.8525378704071045, "learning_rate": 1.823787104390736e-05, "loss": 0.7661, "step": 10401 }, { "epoch": 1.6980939553487613, "grad_norm": 2.646672248840332, "learning_rate": 1.8237507564394724e-05, "loss": 0.7087, "step": 10402 }, { "epoch": 1.6982572139912655, "grad_norm": 2.4225680828094482, "learning_rate": 1.8237144051020808e-05, "loss": 0.6241, "step": 10403 }, { "epoch": 1.69842047263377, "grad_norm": 2.7387990951538086, "learning_rate": 1.8236780503787103e-05, "loss": 0.7489, "step": 10404 }, { "epoch": 1.6985837312762744, "grad_norm": 2.6116013526916504, "learning_rate": 1.82364169226951e-05, "loss": 0.6864, "step": 10405 }, { "epoch": 1.6987469899187788, "grad_norm": 2.2865333557128906, "learning_rate": 1.82360533077463e-05, "loss": 0.5825, "step": 10406 }, { "epoch": 1.6989102485612833, "grad_norm": 2.4180827140808105, "learning_rate": 1.8235689658942192e-05, "loss": 0.6226, "step": 10407 }, { "epoch": 1.6990735072037877, "grad_norm": 2.299065351486206, "learning_rate": 1.8235325976284276e-05, "loss": 0.5979, "step": 10408 }, { "epoch": 1.6992367658462921, "grad_norm": 2.96256685256958, "learning_rate": 1.8234962259774044e-05, "loss": 0.7477, "step": 10409 }, { "epoch": 1.6994000244887963, "grad_norm": 2.7290520668029785, "learning_rate": 1.823459850941299e-05, "loss": 0.557, "step": 10410 }, { "epoch": 1.6995632831313008, "grad_norm": 2.2077462673187256, "learning_rate": 1.823423472520261e-05, "loss": 0.621, "step": 10411 }, { "epoch": 1.699726541773805, "grad_norm": 2.835935592651367, "learning_rate": 1.8233870907144405e-05, "loss": 0.6632, "step": 10412 }, { "epoch": 1.6998898004163094, "grad_norm": 3.144707441329956, "learning_rate": 1.8233507055239867e-05, "loss": 0.7572, "step": 10413 }, { "epoch": 1.7000530590588139, "grad_norm": 2.733081817626953, "learning_rate": 1.8233143169490484e-05, "loss": 0.7642, "step": 10414 }, { "epoch": 1.7002163177013183, "grad_norm": 2.801734209060669, "learning_rate": 1.823277924989776e-05, "loss": 0.6741, "step": 10415 }, { "epoch": 1.7003795763438228, "grad_norm": 2.2085390090942383, "learning_rate": 1.8232415296463188e-05, "loss": 0.5373, "step": 10416 }, { "epoch": 1.7005428349863272, "grad_norm": 2.8013482093811035, "learning_rate": 1.823205130918827e-05, "loss": 0.6692, "step": 10417 }, { "epoch": 1.7007060936288316, "grad_norm": 2.9993631839752197, "learning_rate": 1.8231687288074494e-05, "loss": 0.6929, "step": 10418 }, { "epoch": 1.7008693522713358, "grad_norm": 2.983661413192749, "learning_rate": 1.8231323233123358e-05, "loss": 0.7323, "step": 10419 }, { "epoch": 1.7010326109138403, "grad_norm": 2.82857608795166, "learning_rate": 1.8230959144336366e-05, "loss": 0.6349, "step": 10420 }, { "epoch": 1.7011958695563445, "grad_norm": 2.5495786666870117, "learning_rate": 1.8230595021715002e-05, "loss": 0.696, "step": 10421 }, { "epoch": 1.701359128198849, "grad_norm": 2.812620162963867, "learning_rate": 1.8230230865260777e-05, "loss": 0.8162, "step": 10422 }, { "epoch": 1.7015223868413534, "grad_norm": 2.742047071456909, "learning_rate": 1.8229866674975175e-05, "loss": 0.8062, "step": 10423 }, { "epoch": 1.7016856454838578, "grad_norm": 2.4217166900634766, "learning_rate": 1.82295024508597e-05, "loss": 0.6758, "step": 10424 }, { "epoch": 1.7018489041263622, "grad_norm": 2.346122980117798, "learning_rate": 1.822913819291585e-05, "loss": 0.5437, "step": 10425 }, { "epoch": 1.7020121627688667, "grad_norm": 2.4758071899414062, "learning_rate": 1.822877390114512e-05, "loss": 0.6975, "step": 10426 }, { "epoch": 1.7021754214113711, "grad_norm": 2.6591851711273193, "learning_rate": 1.8228409575549006e-05, "loss": 0.7523, "step": 10427 }, { "epoch": 1.7023386800538753, "grad_norm": 2.700993537902832, "learning_rate": 1.8228045216129007e-05, "loss": 0.7039, "step": 10428 }, { "epoch": 1.7025019386963798, "grad_norm": 2.80761981010437, "learning_rate": 1.822768082288662e-05, "loss": 0.6217, "step": 10429 }, { "epoch": 1.702665197338884, "grad_norm": 2.90718150138855, "learning_rate": 1.8227316395823346e-05, "loss": 0.7073, "step": 10430 }, { "epoch": 1.7028284559813884, "grad_norm": 3.008730173110962, "learning_rate": 1.822695193494068e-05, "loss": 0.7868, "step": 10431 }, { "epoch": 1.7029917146238929, "grad_norm": 2.893646001815796, "learning_rate": 1.822658744024012e-05, "loss": 0.641, "step": 10432 }, { "epoch": 1.7031549732663973, "grad_norm": 2.9159927368164062, "learning_rate": 1.8226222911723166e-05, "loss": 0.775, "step": 10433 }, { "epoch": 1.7033182319089017, "grad_norm": 2.822744131088257, "learning_rate": 1.822585834939132e-05, "loss": 0.6742, "step": 10434 }, { "epoch": 1.7034814905514062, "grad_norm": 2.9593496322631836, "learning_rate": 1.8225493753246072e-05, "loss": 0.7056, "step": 10435 }, { "epoch": 1.7036447491939104, "grad_norm": 2.529871940612793, "learning_rate": 1.8225129123288924e-05, "loss": 0.5946, "step": 10436 }, { "epoch": 1.7038080078364148, "grad_norm": 2.0208210945129395, "learning_rate": 1.8224764459521375e-05, "loss": 0.4719, "step": 10437 }, { "epoch": 1.7039712664789193, "grad_norm": 3.064671039581299, "learning_rate": 1.8224399761944932e-05, "loss": 0.7109, "step": 10438 }, { "epoch": 1.7041345251214235, "grad_norm": 2.182340383529663, "learning_rate": 1.822403503056108e-05, "loss": 0.592, "step": 10439 }, { "epoch": 1.704297783763928, "grad_norm": 2.5561091899871826, "learning_rate": 1.822367026537133e-05, "loss": 0.6399, "step": 10440 }, { "epoch": 1.7044610424064324, "grad_norm": 2.964576005935669, "learning_rate": 1.8223305466377173e-05, "loss": 0.7348, "step": 10441 }, { "epoch": 1.7046243010489368, "grad_norm": 3.791140079498291, "learning_rate": 1.8222940633580113e-05, "loss": 0.9566, "step": 10442 }, { "epoch": 1.7047875596914412, "grad_norm": 3.064162492752075, "learning_rate": 1.8222575766981648e-05, "loss": 0.9054, "step": 10443 }, { "epoch": 1.7049508183339457, "grad_norm": 2.3319284915924072, "learning_rate": 1.8222210866583285e-05, "loss": 0.6508, "step": 10444 }, { "epoch": 1.70511407697645, "grad_norm": 2.443966865539551, "learning_rate": 1.822184593238651e-05, "loss": 0.6002, "step": 10445 }, { "epoch": 1.7052773356189543, "grad_norm": 2.3453564643859863, "learning_rate": 1.822148096439284e-05, "loss": 0.6033, "step": 10446 }, { "epoch": 1.7054405942614586, "grad_norm": 2.226585865020752, "learning_rate": 1.8221115962603757e-05, "loss": 0.5215, "step": 10447 }, { "epoch": 1.705603852903963, "grad_norm": 3.2067935466766357, "learning_rate": 1.8220750927020776e-05, "loss": 0.7512, "step": 10448 }, { "epoch": 1.7057671115464674, "grad_norm": 2.6477768421173096, "learning_rate": 1.8220385857645387e-05, "loss": 0.5436, "step": 10449 }, { "epoch": 1.7059303701889719, "grad_norm": 3.0598533153533936, "learning_rate": 1.8220020754479104e-05, "loss": 0.6613, "step": 10450 }, { "epoch": 1.7060936288314763, "grad_norm": 2.7943227291107178, "learning_rate": 1.8219655617523412e-05, "loss": 0.5749, "step": 10451 }, { "epoch": 1.7062568874739807, "grad_norm": 3.0438687801361084, "learning_rate": 1.821929044677982e-05, "loss": 0.7521, "step": 10452 }, { "epoch": 1.7064201461164852, "grad_norm": 2.6952741146087646, "learning_rate": 1.821892524224983e-05, "loss": 0.5716, "step": 10453 }, { "epoch": 1.7065834047589894, "grad_norm": 2.4877541065216064, "learning_rate": 1.821856000393494e-05, "loss": 0.6598, "step": 10454 }, { "epoch": 1.7067466634014938, "grad_norm": 2.796811103820801, "learning_rate": 1.8218194731836658e-05, "loss": 0.7528, "step": 10455 }, { "epoch": 1.706909922043998, "grad_norm": 3.184401035308838, "learning_rate": 1.8217829425956477e-05, "loss": 0.8336, "step": 10456 }, { "epoch": 1.7070731806865025, "grad_norm": 2.9693520069122314, "learning_rate": 1.8217464086295904e-05, "loss": 0.837, "step": 10457 }, { "epoch": 1.707236439329007, "grad_norm": 2.483509063720703, "learning_rate": 1.8217098712856437e-05, "loss": 0.6478, "step": 10458 }, { "epoch": 1.7073996979715114, "grad_norm": 2.864332437515259, "learning_rate": 1.8216733305639582e-05, "loss": 0.7532, "step": 10459 }, { "epoch": 1.7075629566140158, "grad_norm": 2.703737497329712, "learning_rate": 1.821636786464684e-05, "loss": 0.6733, "step": 10460 }, { "epoch": 1.7077262152565202, "grad_norm": 2.8509140014648438, "learning_rate": 1.8216002389879706e-05, "loss": 0.7344, "step": 10461 }, { "epoch": 1.7078894738990247, "grad_norm": 3.059779405593872, "learning_rate": 1.8215636881339693e-05, "loss": 0.7602, "step": 10462 }, { "epoch": 1.708052732541529, "grad_norm": 2.343916416168213, "learning_rate": 1.82152713390283e-05, "loss": 0.6096, "step": 10463 }, { "epoch": 1.7082159911840333, "grad_norm": 2.51387357711792, "learning_rate": 1.8214905762947025e-05, "loss": 0.6357, "step": 10464 }, { "epoch": 1.7083792498265375, "grad_norm": 2.908318519592285, "learning_rate": 1.8214540153097376e-05, "loss": 0.7783, "step": 10465 }, { "epoch": 1.708542508469042, "grad_norm": 3.20054292678833, "learning_rate": 1.8214174509480856e-05, "loss": 0.8941, "step": 10466 }, { "epoch": 1.7087057671115464, "grad_norm": 2.710050582885742, "learning_rate": 1.8213808832098964e-05, "loss": 0.5482, "step": 10467 }, { "epoch": 1.7088690257540509, "grad_norm": 3.2774059772491455, "learning_rate": 1.8213443120953207e-05, "loss": 0.8609, "step": 10468 }, { "epoch": 1.7090322843965553, "grad_norm": 2.624598979949951, "learning_rate": 1.821307737604509e-05, "loss": 0.6434, "step": 10469 }, { "epoch": 1.7091955430390597, "grad_norm": 2.0689589977264404, "learning_rate": 1.8212711597376104e-05, "loss": 0.5643, "step": 10470 }, { "epoch": 1.7093588016815642, "grad_norm": 2.90669846534729, "learning_rate": 1.8212345784947768e-05, "loss": 0.7456, "step": 10471 }, { "epoch": 1.7095220603240684, "grad_norm": 2.828418493270874, "learning_rate": 1.8211979938761578e-05, "loss": 0.6565, "step": 10472 }, { "epoch": 1.7096853189665728, "grad_norm": 2.7296183109283447, "learning_rate": 1.8211614058819037e-05, "loss": 0.7498, "step": 10473 }, { "epoch": 1.709848577609077, "grad_norm": 2.4540855884552, "learning_rate": 1.8211248145121658e-05, "loss": 0.6263, "step": 10474 }, { "epoch": 1.7100118362515815, "grad_norm": 2.381239891052246, "learning_rate": 1.8210882197670933e-05, "loss": 0.6603, "step": 10475 }, { "epoch": 1.710175094894086, "grad_norm": 2.945368766784668, "learning_rate": 1.8210516216468372e-05, "loss": 0.7256, "step": 10476 }, { "epoch": 1.7103383535365904, "grad_norm": 2.365487575531006, "learning_rate": 1.8210150201515478e-05, "loss": 0.6542, "step": 10477 }, { "epoch": 1.7105016121790948, "grad_norm": 2.815030574798584, "learning_rate": 1.820978415281376e-05, "loss": 0.6906, "step": 10478 }, { "epoch": 1.7106648708215992, "grad_norm": 2.6076645851135254, "learning_rate": 1.8209418070364716e-05, "loss": 0.6281, "step": 10479 }, { "epoch": 1.7108281294641035, "grad_norm": 2.930741310119629, "learning_rate": 1.8209051954169856e-05, "loss": 0.7672, "step": 10480 }, { "epoch": 1.710991388106608, "grad_norm": 2.4737279415130615, "learning_rate": 1.8208685804230685e-05, "loss": 0.6302, "step": 10481 }, { "epoch": 1.7111546467491123, "grad_norm": 2.9819493293762207, "learning_rate": 1.8208319620548702e-05, "loss": 0.8432, "step": 10482 }, { "epoch": 1.7113179053916165, "grad_norm": 2.328336477279663, "learning_rate": 1.8207953403125416e-05, "loss": 0.6154, "step": 10483 }, { "epoch": 1.711481164034121, "grad_norm": 2.678887128829956, "learning_rate": 1.8207587151962335e-05, "loss": 0.668, "step": 10484 }, { "epoch": 1.7116444226766254, "grad_norm": 2.9434449672698975, "learning_rate": 1.8207220867060966e-05, "loss": 0.7547, "step": 10485 }, { "epoch": 1.7118076813191299, "grad_norm": 3.0247080326080322, "learning_rate": 1.820685454842281e-05, "loss": 0.8505, "step": 10486 }, { "epoch": 1.7119709399616343, "grad_norm": 2.8433308601379395, "learning_rate": 1.8206488196049367e-05, "loss": 0.6907, "step": 10487 }, { "epoch": 1.7121341986041387, "grad_norm": 2.814053535461426, "learning_rate": 1.8206121809942153e-05, "loss": 0.6981, "step": 10488 }, { "epoch": 1.712297457246643, "grad_norm": 2.778472900390625, "learning_rate": 1.8205755390102675e-05, "loss": 0.813, "step": 10489 }, { "epoch": 1.7124607158891474, "grad_norm": 2.7632131576538086, "learning_rate": 1.820538893653243e-05, "loss": 0.7294, "step": 10490 }, { "epoch": 1.7126239745316516, "grad_norm": 2.4582772254943848, "learning_rate": 1.8205022449232932e-05, "loss": 0.6899, "step": 10491 }, { "epoch": 1.712787233174156, "grad_norm": 2.815387487411499, "learning_rate": 1.8204655928205686e-05, "loss": 0.7465, "step": 10492 }, { "epoch": 1.7129504918166605, "grad_norm": 3.1214330196380615, "learning_rate": 1.8204289373452196e-05, "loss": 0.8, "step": 10493 }, { "epoch": 1.713113750459165, "grad_norm": 2.45896315574646, "learning_rate": 1.8203922784973968e-05, "loss": 0.6565, "step": 10494 }, { "epoch": 1.7132770091016694, "grad_norm": 2.5387203693389893, "learning_rate": 1.8203556162772515e-05, "loss": 0.7223, "step": 10495 }, { "epoch": 1.7134402677441738, "grad_norm": 2.523813486099243, "learning_rate": 1.820318950684934e-05, "loss": 0.7441, "step": 10496 }, { "epoch": 1.7136035263866782, "grad_norm": 2.6565492153167725, "learning_rate": 1.8202822817205947e-05, "loss": 0.6436, "step": 10497 }, { "epoch": 1.7137667850291824, "grad_norm": 2.3450443744659424, "learning_rate": 1.8202456093843848e-05, "loss": 0.5805, "step": 10498 }, { "epoch": 1.7139300436716869, "grad_norm": 2.727644920349121, "learning_rate": 1.820208933676455e-05, "loss": 0.6983, "step": 10499 }, { "epoch": 1.714093302314191, "grad_norm": 2.3434319496154785, "learning_rate": 1.820172254596956e-05, "loss": 0.6136, "step": 10500 }, { "epoch": 1.7142565609566955, "grad_norm": 3.00980806350708, "learning_rate": 1.8201355721460386e-05, "loss": 0.7219, "step": 10501 }, { "epoch": 1.7144198195992, "grad_norm": 2.864729642868042, "learning_rate": 1.8200988863238532e-05, "loss": 0.7731, "step": 10502 }, { "epoch": 1.7145830782417044, "grad_norm": 2.5382087230682373, "learning_rate": 1.8200621971305516e-05, "loss": 0.6618, "step": 10503 }, { "epoch": 1.7147463368842089, "grad_norm": 2.6168487071990967, "learning_rate": 1.8200255045662834e-05, "loss": 0.7329, "step": 10504 }, { "epoch": 1.7149095955267133, "grad_norm": 2.7046685218811035, "learning_rate": 1.8199888086312002e-05, "loss": 0.6324, "step": 10505 }, { "epoch": 1.7150728541692177, "grad_norm": 2.403400182723999, "learning_rate": 1.8199521093254524e-05, "loss": 0.6167, "step": 10506 }, { "epoch": 1.715236112811722, "grad_norm": 2.65069842338562, "learning_rate": 1.8199154066491915e-05, "loss": 0.6076, "step": 10507 }, { "epoch": 1.7153993714542264, "grad_norm": 2.396763801574707, "learning_rate": 1.8198787006025674e-05, "loss": 0.5264, "step": 10508 }, { "epoch": 1.7155626300967306, "grad_norm": 2.8965156078338623, "learning_rate": 1.819841991185732e-05, "loss": 0.6114, "step": 10509 }, { "epoch": 1.715725888739235, "grad_norm": 2.9121434688568115, "learning_rate": 1.819805278398836e-05, "loss": 0.6804, "step": 10510 }, { "epoch": 1.7158891473817395, "grad_norm": 3.343538761138916, "learning_rate": 1.8197685622420294e-05, "loss": 0.7327, "step": 10511 }, { "epoch": 1.716052406024244, "grad_norm": 3.2381138801574707, "learning_rate": 1.819731842715464e-05, "loss": 0.6537, "step": 10512 }, { "epoch": 1.7162156646667484, "grad_norm": 2.605278253555298, "learning_rate": 1.8196951198192904e-05, "loss": 0.5954, "step": 10513 }, { "epoch": 1.7163789233092528, "grad_norm": 2.394136905670166, "learning_rate": 1.81965839355366e-05, "loss": 0.539, "step": 10514 }, { "epoch": 1.7165421819517572, "grad_norm": 3.067005157470703, "learning_rate": 1.819621663918723e-05, "loss": 0.7448, "step": 10515 }, { "epoch": 1.7167054405942614, "grad_norm": 2.9508116245269775, "learning_rate": 1.819584930914631e-05, "loss": 0.7108, "step": 10516 }, { "epoch": 1.7168686992367659, "grad_norm": 2.4116322994232178, "learning_rate": 1.8195481945415353e-05, "loss": 0.571, "step": 10517 }, { "epoch": 1.71703195787927, "grad_norm": 3.089818239212036, "learning_rate": 1.819511454799586e-05, "loss": 0.7447, "step": 10518 }, { "epoch": 1.7171952165217745, "grad_norm": 2.989240884780884, "learning_rate": 1.8194747116889346e-05, "loss": 0.7112, "step": 10519 }, { "epoch": 1.717358475164279, "grad_norm": 2.9432544708251953, "learning_rate": 1.819437965209732e-05, "loss": 0.7934, "step": 10520 }, { "epoch": 1.7175217338067834, "grad_norm": 2.749046802520752, "learning_rate": 1.8194012153621295e-05, "loss": 0.6028, "step": 10521 }, { "epoch": 1.7176849924492879, "grad_norm": 2.7589828968048096, "learning_rate": 1.819364462146278e-05, "loss": 0.681, "step": 10522 }, { "epoch": 1.7178482510917923, "grad_norm": 2.7891652584075928, "learning_rate": 1.8193277055623283e-05, "loss": 0.6097, "step": 10523 }, { "epoch": 1.7180115097342965, "grad_norm": 2.503390073776245, "learning_rate": 1.8192909456104323e-05, "loss": 0.6997, "step": 10524 }, { "epoch": 1.718174768376801, "grad_norm": 2.6996963024139404, "learning_rate": 1.81925418229074e-05, "loss": 1.0084, "step": 10525 }, { "epoch": 1.7183380270193054, "grad_norm": 2.366084575653076, "learning_rate": 1.8192174156034034e-05, "loss": 0.5931, "step": 10526 }, { "epoch": 1.7185012856618096, "grad_norm": 3.268460988998413, "learning_rate": 1.819180645548573e-05, "loss": 0.8873, "step": 10527 }, { "epoch": 1.718664544304314, "grad_norm": 2.390932083129883, "learning_rate": 1.819143872126401e-05, "loss": 0.6335, "step": 10528 }, { "epoch": 1.7188278029468185, "grad_norm": 2.996098756790161, "learning_rate": 1.8191070953370372e-05, "loss": 0.8027, "step": 10529 }, { "epoch": 1.718991061589323, "grad_norm": 3.0836641788482666, "learning_rate": 1.8190703151806336e-05, "loss": 0.725, "step": 10530 }, { "epoch": 1.7191543202318273, "grad_norm": 2.6616930961608887, "learning_rate": 1.8190335316573414e-05, "loss": 0.7455, "step": 10531 }, { "epoch": 1.7193175788743318, "grad_norm": 2.50254225730896, "learning_rate": 1.8189967447673115e-05, "loss": 0.5674, "step": 10532 }, { "epoch": 1.719480837516836, "grad_norm": 2.9123547077178955, "learning_rate": 1.8189599545106952e-05, "loss": 0.6888, "step": 10533 }, { "epoch": 1.7196440961593404, "grad_norm": 3.0854289531707764, "learning_rate": 1.8189231608876437e-05, "loss": 0.7787, "step": 10534 }, { "epoch": 1.7198073548018447, "grad_norm": 3.570526123046875, "learning_rate": 1.8188863638983085e-05, "loss": 0.8226, "step": 10535 }, { "epoch": 1.719970613444349, "grad_norm": 3.031564712524414, "learning_rate": 1.8188495635428406e-05, "loss": 0.8303, "step": 10536 }, { "epoch": 1.7201338720868535, "grad_norm": 2.549464225769043, "learning_rate": 1.8188127598213912e-05, "loss": 0.6315, "step": 10537 }, { "epoch": 1.720297130729358, "grad_norm": 2.4383468627929688, "learning_rate": 1.8187759527341117e-05, "loss": 0.6744, "step": 10538 }, { "epoch": 1.7204603893718624, "grad_norm": 2.658910036087036, "learning_rate": 1.8187391422811536e-05, "loss": 0.608, "step": 10539 }, { "epoch": 1.7206236480143668, "grad_norm": 2.9068031311035156, "learning_rate": 1.818702328462668e-05, "loss": 0.7998, "step": 10540 }, { "epoch": 1.7207869066568713, "grad_norm": 2.504706621170044, "learning_rate": 1.8186655112788065e-05, "loss": 0.658, "step": 10541 }, { "epoch": 1.7209501652993755, "grad_norm": 2.4317855834960938, "learning_rate": 1.8186286907297197e-05, "loss": 0.6755, "step": 10542 }, { "epoch": 1.72111342394188, "grad_norm": 2.8638370037078857, "learning_rate": 1.8185918668155595e-05, "loss": 0.6759, "step": 10543 }, { "epoch": 1.7212766825843842, "grad_norm": 2.6731743812561035, "learning_rate": 1.818555039536478e-05, "loss": 0.6917, "step": 10544 }, { "epoch": 1.7214399412268886, "grad_norm": 2.255882740020752, "learning_rate": 1.8185182088926246e-05, "loss": 0.6008, "step": 10545 }, { "epoch": 1.721603199869393, "grad_norm": 2.807093620300293, "learning_rate": 1.818481374884153e-05, "loss": 0.6848, "step": 10546 }, { "epoch": 1.7217664585118975, "grad_norm": 3.1778507232666016, "learning_rate": 1.818444537511213e-05, "loss": 0.707, "step": 10547 }, { "epoch": 1.721929717154402, "grad_norm": 2.757622718811035, "learning_rate": 1.8184076967739566e-05, "loss": 0.6475, "step": 10548 }, { "epoch": 1.7220929757969063, "grad_norm": 2.955118179321289, "learning_rate": 1.8183708526725353e-05, "loss": 0.764, "step": 10549 }, { "epoch": 1.7222562344394108, "grad_norm": 2.8157172203063965, "learning_rate": 1.8183340052071e-05, "loss": 0.6496, "step": 10550 }, { "epoch": 1.722419493081915, "grad_norm": 2.552492618560791, "learning_rate": 1.818297154377803e-05, "loss": 0.6202, "step": 10551 }, { "epoch": 1.7225827517244194, "grad_norm": 2.4691057205200195, "learning_rate": 1.8182603001847954e-05, "loss": 0.7816, "step": 10552 }, { "epoch": 1.7227460103669237, "grad_norm": 2.6993346214294434, "learning_rate": 1.8182234426282287e-05, "loss": 0.7024, "step": 10553 }, { "epoch": 1.722909269009428, "grad_norm": 2.9695680141448975, "learning_rate": 1.818186581708254e-05, "loss": 0.7859, "step": 10554 }, { "epoch": 1.7230725276519325, "grad_norm": 2.962811231613159, "learning_rate": 1.8181497174250236e-05, "loss": 0.7822, "step": 10555 }, { "epoch": 1.723235786294437, "grad_norm": 3.0435492992401123, "learning_rate": 1.8181128497786885e-05, "loss": 0.803, "step": 10556 }, { "epoch": 1.7233990449369414, "grad_norm": 2.5740206241607666, "learning_rate": 1.8180759787694005e-05, "loss": 0.6294, "step": 10557 }, { "epoch": 1.7235623035794458, "grad_norm": 2.8006412982940674, "learning_rate": 1.818039104397311e-05, "loss": 0.7112, "step": 10558 }, { "epoch": 1.7237255622219503, "grad_norm": 2.851191997528076, "learning_rate": 1.8180022266625713e-05, "loss": 0.7612, "step": 10559 }, { "epoch": 1.7238888208644545, "grad_norm": 2.68534779548645, "learning_rate": 1.817965345565334e-05, "loss": 0.7453, "step": 10560 }, { "epoch": 1.724052079506959, "grad_norm": 2.2659647464752197, "learning_rate": 1.8179284611057497e-05, "loss": 0.6626, "step": 10561 }, { "epoch": 1.7242153381494632, "grad_norm": 2.3745665550231934, "learning_rate": 1.81789157328397e-05, "loss": 0.5894, "step": 10562 }, { "epoch": 1.7243785967919676, "grad_norm": 3.2223544120788574, "learning_rate": 1.817854682100147e-05, "loss": 1.6169, "step": 10563 }, { "epoch": 1.724541855434472, "grad_norm": 2.2304296493530273, "learning_rate": 1.8178177875544326e-05, "loss": 0.5416, "step": 10564 }, { "epoch": 1.7247051140769765, "grad_norm": 2.9037322998046875, "learning_rate": 1.8177808896469777e-05, "loss": 0.6831, "step": 10565 }, { "epoch": 1.724868372719481, "grad_norm": 2.7529525756835938, "learning_rate": 1.8177439883779344e-05, "loss": 0.6415, "step": 10566 }, { "epoch": 1.7250316313619853, "grad_norm": 2.33233642578125, "learning_rate": 1.8177070837474543e-05, "loss": 0.6476, "step": 10567 }, { "epoch": 1.7251948900044898, "grad_norm": 2.474780559539795, "learning_rate": 1.8176701757556897e-05, "loss": 0.6283, "step": 10568 }, { "epoch": 1.725358148646994, "grad_norm": 2.4413487911224365, "learning_rate": 1.8176332644027913e-05, "loss": 0.715, "step": 10569 }, { "epoch": 1.7255214072894984, "grad_norm": 2.752354383468628, "learning_rate": 1.8175963496889113e-05, "loss": 0.688, "step": 10570 }, { "epoch": 1.7256846659320026, "grad_norm": 3.332756757736206, "learning_rate": 1.8175594316142013e-05, "loss": 0.7717, "step": 10571 }, { "epoch": 1.725847924574507, "grad_norm": 2.7804930210113525, "learning_rate": 1.8175225101788136e-05, "loss": 0.7312, "step": 10572 }, { "epoch": 1.7260111832170115, "grad_norm": 2.818180561065674, "learning_rate": 1.817485585382899e-05, "loss": 0.7695, "step": 10573 }, { "epoch": 1.726174441859516, "grad_norm": 2.634239912033081, "learning_rate": 1.81744865722661e-05, "loss": 0.5949, "step": 10574 }, { "epoch": 1.7263377005020204, "grad_norm": 2.585789442062378, "learning_rate": 1.8174117257100984e-05, "loss": 0.6307, "step": 10575 }, { "epoch": 1.7265009591445248, "grad_norm": 2.650339365005493, "learning_rate": 1.8173747908335158e-05, "loss": 0.6474, "step": 10576 }, { "epoch": 1.726664217787029, "grad_norm": 2.842315912246704, "learning_rate": 1.817337852597014e-05, "loss": 0.7915, "step": 10577 }, { "epoch": 1.7268274764295335, "grad_norm": 2.390831708908081, "learning_rate": 1.817300911000745e-05, "loss": 0.6298, "step": 10578 }, { "epoch": 1.726990735072038, "grad_norm": 2.9501638412475586, "learning_rate": 1.8172639660448607e-05, "loss": 0.6903, "step": 10579 }, { "epoch": 1.7271539937145421, "grad_norm": 2.8069376945495605, "learning_rate": 1.8172270177295126e-05, "loss": 0.7708, "step": 10580 }, { "epoch": 1.7273172523570466, "grad_norm": 2.5015249252319336, "learning_rate": 1.8171900660548528e-05, "loss": 0.7266, "step": 10581 }, { "epoch": 1.727480510999551, "grad_norm": 2.6944897174835205, "learning_rate": 1.8171531110210332e-05, "loss": 0.6668, "step": 10582 }, { "epoch": 1.7276437696420555, "grad_norm": 2.750420093536377, "learning_rate": 1.8171161526282058e-05, "loss": 0.7952, "step": 10583 }, { "epoch": 1.72780702828456, "grad_norm": 2.4140994548797607, "learning_rate": 1.8170791908765224e-05, "loss": 0.6781, "step": 10584 }, { "epoch": 1.7279702869270643, "grad_norm": 2.1788904666900635, "learning_rate": 1.8170422257661353e-05, "loss": 0.5303, "step": 10585 }, { "epoch": 1.7281335455695686, "grad_norm": 2.5156612396240234, "learning_rate": 1.8170052572971957e-05, "loss": 0.5219, "step": 10586 }, { "epoch": 1.728296804212073, "grad_norm": 2.2962284088134766, "learning_rate": 1.816968285469856e-05, "loss": 0.612, "step": 10587 }, { "epoch": 1.7284600628545772, "grad_norm": 3.294252872467041, "learning_rate": 1.8169313102842683e-05, "loss": 0.7585, "step": 10588 }, { "epoch": 1.7286233214970816, "grad_norm": 2.4406073093414307, "learning_rate": 1.8168943317405843e-05, "loss": 0.5843, "step": 10589 }, { "epoch": 1.728786580139586, "grad_norm": 2.2465593814849854, "learning_rate": 1.8168573498389565e-05, "loss": 0.5043, "step": 10590 }, { "epoch": 1.7289498387820905, "grad_norm": 2.48833966255188, "learning_rate": 1.816820364579536e-05, "loss": 0.6223, "step": 10591 }, { "epoch": 1.729113097424595, "grad_norm": 2.389479398727417, "learning_rate": 1.8167833759624757e-05, "loss": 0.5539, "step": 10592 }, { "epoch": 1.7292763560670994, "grad_norm": 2.542100429534912, "learning_rate": 1.8167463839879277e-05, "loss": 0.6681, "step": 10593 }, { "epoch": 1.7294396147096038, "grad_norm": 2.7557942867279053, "learning_rate": 1.8167093886560433e-05, "loss": 0.6901, "step": 10594 }, { "epoch": 1.729602873352108, "grad_norm": 2.98325252532959, "learning_rate": 1.816672389966975e-05, "loss": 0.7806, "step": 10595 }, { "epoch": 1.7297661319946125, "grad_norm": 2.962266206741333, "learning_rate": 1.8166353879208748e-05, "loss": 0.6945, "step": 10596 }, { "epoch": 1.7299293906371167, "grad_norm": 2.8028604984283447, "learning_rate": 1.8165983825178954e-05, "loss": 0.6157, "step": 10597 }, { "epoch": 1.7300926492796211, "grad_norm": 3.093064069747925, "learning_rate": 1.8165613737581877e-05, "loss": 0.7425, "step": 10598 }, { "epoch": 1.7302559079221256, "grad_norm": 2.836890459060669, "learning_rate": 1.816524361641905e-05, "loss": 0.6685, "step": 10599 }, { "epoch": 1.73041916656463, "grad_norm": 2.538573741912842, "learning_rate": 1.8164873461691987e-05, "loss": 0.5432, "step": 10600 }, { "epoch": 1.7305824252071345, "grad_norm": 2.422616720199585, "learning_rate": 1.8164503273402215e-05, "loss": 0.5672, "step": 10601 }, { "epoch": 1.730745683849639, "grad_norm": 2.9581594467163086, "learning_rate": 1.816413305155125e-05, "loss": 0.7367, "step": 10602 }, { "epoch": 1.7309089424921433, "grad_norm": 2.275428295135498, "learning_rate": 1.816376279614062e-05, "loss": 0.556, "step": 10603 }, { "epoch": 1.7310722011346475, "grad_norm": 2.7798287868499756, "learning_rate": 1.816339250717184e-05, "loss": 0.6554, "step": 10604 }, { "epoch": 1.731235459777152, "grad_norm": 2.380749464035034, "learning_rate": 1.8163022184646436e-05, "loss": 0.6101, "step": 10605 }, { "epoch": 1.7313987184196562, "grad_norm": 2.8496954441070557, "learning_rate": 1.8162651828565936e-05, "loss": 0.5345, "step": 10606 }, { "epoch": 1.7315619770621606, "grad_norm": 3.0856306552886963, "learning_rate": 1.8162281438931852e-05, "loss": 0.6537, "step": 10607 }, { "epoch": 1.731725235704665, "grad_norm": 2.8827223777770996, "learning_rate": 1.816191101574571e-05, "loss": 0.8008, "step": 10608 }, { "epoch": 1.7318884943471695, "grad_norm": 2.8068182468414307, "learning_rate": 1.8161540559009038e-05, "loss": 0.7194, "step": 10609 }, { "epoch": 1.732051752989674, "grad_norm": 2.5369198322296143, "learning_rate": 1.816117006872335e-05, "loss": 0.5433, "step": 10610 }, { "epoch": 1.7322150116321784, "grad_norm": 2.8545939922332764, "learning_rate": 1.8160799544890174e-05, "loss": 0.7158, "step": 10611 }, { "epoch": 1.7323782702746828, "grad_norm": 3.2879116535186768, "learning_rate": 1.8160428987511034e-05, "loss": 0.7561, "step": 10612 }, { "epoch": 1.732541528917187, "grad_norm": 3.391739845275879, "learning_rate": 1.816005839658745e-05, "loss": 0.8863, "step": 10613 }, { "epoch": 1.7327047875596915, "grad_norm": 2.773914337158203, "learning_rate": 1.815968777212095e-05, "loss": 0.6788, "step": 10614 }, { "epoch": 1.7328680462021957, "grad_norm": 2.545114040374756, "learning_rate": 1.8159317114113052e-05, "loss": 0.6452, "step": 10615 }, { "epoch": 1.7330313048447001, "grad_norm": 2.614619493484497, "learning_rate": 1.8158946422565282e-05, "loss": 0.6913, "step": 10616 }, { "epoch": 1.7331945634872046, "grad_norm": 2.610891103744507, "learning_rate": 1.8158575697479168e-05, "loss": 0.6237, "step": 10617 }, { "epoch": 1.733357822129709, "grad_norm": 2.71730637550354, "learning_rate": 1.8158204938856224e-05, "loss": 0.6698, "step": 10618 }, { "epoch": 1.7335210807722135, "grad_norm": 2.934720754623413, "learning_rate": 1.8157834146697982e-05, "loss": 0.7904, "step": 10619 }, { "epoch": 1.733684339414718, "grad_norm": 2.521754026412964, "learning_rate": 1.8157463321005967e-05, "loss": 0.7312, "step": 10620 }, { "epoch": 1.733847598057222, "grad_norm": 2.7809343338012695, "learning_rate": 1.81570924617817e-05, "loss": 0.7153, "step": 10621 }, { "epoch": 1.7340108566997265, "grad_norm": 3.0789029598236084, "learning_rate": 1.8156721569026704e-05, "loss": 0.7652, "step": 10622 }, { "epoch": 1.734174115342231, "grad_norm": 2.3948752880096436, "learning_rate": 1.815635064274251e-05, "loss": 0.5634, "step": 10623 }, { "epoch": 1.7343373739847352, "grad_norm": 2.8312020301818848, "learning_rate": 1.815597968293063e-05, "loss": 0.7823, "step": 10624 }, { "epoch": 1.7345006326272396, "grad_norm": 3.1521944999694824, "learning_rate": 1.8155608689592604e-05, "loss": 0.7026, "step": 10625 }, { "epoch": 1.734663891269744, "grad_norm": 2.5055174827575684, "learning_rate": 1.8155237662729948e-05, "loss": 0.5463, "step": 10626 }, { "epoch": 1.7348271499122485, "grad_norm": 2.7068402767181396, "learning_rate": 1.815486660234419e-05, "loss": 0.6466, "step": 10627 }, { "epoch": 1.734990408554753, "grad_norm": 2.446692705154419, "learning_rate": 1.8154495508436853e-05, "loss": 0.5155, "step": 10628 }, { "epoch": 1.7351536671972574, "grad_norm": 2.655106782913208, "learning_rate": 1.8154124381009465e-05, "loss": 0.6271, "step": 10629 }, { "epoch": 1.7353169258397616, "grad_norm": 3.3635265827178955, "learning_rate": 1.815375322006355e-05, "loss": 0.7499, "step": 10630 }, { "epoch": 1.735480184482266, "grad_norm": 2.472553253173828, "learning_rate": 1.8153382025600638e-05, "loss": 0.6316, "step": 10631 }, { "epoch": 1.7356434431247703, "grad_norm": 2.906128168106079, "learning_rate": 1.8153010797622247e-05, "loss": 0.7457, "step": 10632 }, { "epoch": 1.7358067017672747, "grad_norm": 3.013794422149658, "learning_rate": 1.8152639536129907e-05, "loss": 0.6973, "step": 10633 }, { "epoch": 1.7359699604097791, "grad_norm": 2.5547189712524414, "learning_rate": 1.8152268241125145e-05, "loss": 0.5984, "step": 10634 }, { "epoch": 1.7361332190522836, "grad_norm": 3.036761999130249, "learning_rate": 1.8151896912609486e-05, "loss": 0.8864, "step": 10635 }, { "epoch": 1.736296477694788, "grad_norm": 2.491412401199341, "learning_rate": 1.815152555058446e-05, "loss": 0.6805, "step": 10636 }, { "epoch": 1.7364597363372924, "grad_norm": 2.9877099990844727, "learning_rate": 1.8151154155051587e-05, "loss": 0.7107, "step": 10637 }, { "epoch": 1.7366229949797969, "grad_norm": 2.951756000518799, "learning_rate": 1.8150782726012396e-05, "loss": 0.7466, "step": 10638 }, { "epoch": 1.736786253622301, "grad_norm": 2.6211113929748535, "learning_rate": 1.815041126346842e-05, "loss": 0.6876, "step": 10639 }, { "epoch": 1.7369495122648055, "grad_norm": 1.7892152070999146, "learning_rate": 1.815003976742118e-05, "loss": 0.4833, "step": 10640 }, { "epoch": 1.7371127709073098, "grad_norm": 2.4170024394989014, "learning_rate": 1.81496682378722e-05, "loss": 0.5341, "step": 10641 }, { "epoch": 1.7372760295498142, "grad_norm": 2.5067648887634277, "learning_rate": 1.8149296674823012e-05, "loss": 0.5947, "step": 10642 }, { "epoch": 1.7374392881923186, "grad_norm": 2.5705716609954834, "learning_rate": 1.8148925078275145e-05, "loss": 0.5541, "step": 10643 }, { "epoch": 1.737602546834823, "grad_norm": 2.59364914894104, "learning_rate": 1.8148553448230123e-05, "loss": 0.6371, "step": 10644 }, { "epoch": 1.7377658054773275, "grad_norm": 2.341602325439453, "learning_rate": 1.8148181784689474e-05, "loss": 0.6388, "step": 10645 }, { "epoch": 1.737929064119832, "grad_norm": 2.4804880619049072, "learning_rate": 1.8147810087654727e-05, "loss": 0.6517, "step": 10646 }, { "epoch": 1.7380923227623364, "grad_norm": 2.8148183822631836, "learning_rate": 1.814743835712741e-05, "loss": 0.6006, "step": 10647 }, { "epoch": 1.7382555814048406, "grad_norm": 2.7656795978546143, "learning_rate": 1.814706659310905e-05, "loss": 0.6643, "step": 10648 }, { "epoch": 1.738418840047345, "grad_norm": 2.8849127292633057, "learning_rate": 1.8146694795601175e-05, "loss": 0.743, "step": 10649 }, { "epoch": 1.7385820986898493, "grad_norm": 2.834649085998535, "learning_rate": 1.814632296460531e-05, "loss": 0.7419, "step": 10650 }, { "epoch": 1.7387453573323537, "grad_norm": 2.4432239532470703, "learning_rate": 1.8145951100122992e-05, "loss": 0.5927, "step": 10651 }, { "epoch": 1.7389086159748581, "grad_norm": 2.6591804027557373, "learning_rate": 1.8145579202155743e-05, "loss": 0.7468, "step": 10652 }, { "epoch": 1.7390718746173626, "grad_norm": 3.0139942169189453, "learning_rate": 1.8145207270705095e-05, "loss": 0.7628, "step": 10653 }, { "epoch": 1.739235133259867, "grad_norm": 2.8617665767669678, "learning_rate": 1.8144835305772574e-05, "loss": 0.6623, "step": 10654 }, { "epoch": 1.7393983919023714, "grad_norm": 2.4337234497070312, "learning_rate": 1.8144463307359713e-05, "loss": 0.6616, "step": 10655 }, { "epoch": 1.7395616505448759, "grad_norm": 3.0358145236968994, "learning_rate": 1.814409127546804e-05, "loss": 0.7859, "step": 10656 }, { "epoch": 1.73972490918738, "grad_norm": 2.933053493499756, "learning_rate": 1.8143719210099078e-05, "loss": 0.727, "step": 10657 }, { "epoch": 1.7398881678298845, "grad_norm": 2.9172778129577637, "learning_rate": 1.814334711125436e-05, "loss": 0.8335, "step": 10658 }, { "epoch": 1.7400514264723888, "grad_norm": 2.8269917964935303, "learning_rate": 1.8142974978935423e-05, "loss": 0.695, "step": 10659 }, { "epoch": 1.7402146851148932, "grad_norm": 2.7320399284362793, "learning_rate": 1.8142602813143786e-05, "loss": 0.7122, "step": 10660 }, { "epoch": 1.7403779437573976, "grad_norm": 2.7213242053985596, "learning_rate": 1.8142230613880985e-05, "loss": 0.6762, "step": 10661 }, { "epoch": 1.740541202399902, "grad_norm": 2.4190824031829834, "learning_rate": 1.814185838114855e-05, "loss": 0.6199, "step": 10662 }, { "epoch": 1.7407044610424065, "grad_norm": 2.7588043212890625, "learning_rate": 1.814148611494801e-05, "loss": 0.6587, "step": 10663 }, { "epoch": 1.740867719684911, "grad_norm": 2.763164520263672, "learning_rate": 1.8141113815280894e-05, "loss": 0.6688, "step": 10664 }, { "epoch": 1.7410309783274152, "grad_norm": 2.424816131591797, "learning_rate": 1.814074148214873e-05, "loss": 0.6892, "step": 10665 }, { "epoch": 1.7411942369699196, "grad_norm": 2.783625841140747, "learning_rate": 1.8140369115553053e-05, "loss": 0.5589, "step": 10666 }, { "epoch": 1.741357495612424, "grad_norm": 2.437469482421875, "learning_rate": 1.8139996715495393e-05, "loss": 0.6499, "step": 10667 }, { "epoch": 1.7415207542549282, "grad_norm": 2.7020750045776367, "learning_rate": 1.8139624281977282e-05, "loss": 0.6715, "step": 10668 }, { "epoch": 1.7416840128974327, "grad_norm": 2.3923773765563965, "learning_rate": 1.8139251815000245e-05, "loss": 0.5799, "step": 10669 }, { "epoch": 1.7418472715399371, "grad_norm": 2.58081316947937, "learning_rate": 1.813887931456582e-05, "loss": 0.6603, "step": 10670 }, { "epoch": 1.7420105301824416, "grad_norm": 2.4937546253204346, "learning_rate": 1.8138506780675535e-05, "loss": 0.6503, "step": 10671 }, { "epoch": 1.742173788824946, "grad_norm": 2.4924087524414062, "learning_rate": 1.813813421333092e-05, "loss": 0.6259, "step": 10672 }, { "epoch": 1.7423370474674504, "grad_norm": 3.002241849899292, "learning_rate": 1.813776161253351e-05, "loss": 0.7951, "step": 10673 }, { "epoch": 1.7425003061099547, "grad_norm": 2.740894079208374, "learning_rate": 1.8137388978284837e-05, "loss": 0.6047, "step": 10674 }, { "epoch": 1.742663564752459, "grad_norm": 2.510815382003784, "learning_rate": 1.8137016310586422e-05, "loss": 0.6769, "step": 10675 }, { "epoch": 1.7428268233949633, "grad_norm": 2.9545204639434814, "learning_rate": 1.8136643609439815e-05, "loss": 0.6862, "step": 10676 }, { "epoch": 1.7429900820374677, "grad_norm": 3.100780487060547, "learning_rate": 1.8136270874846534e-05, "loss": 0.8526, "step": 10677 }, { "epoch": 1.7431533406799722, "grad_norm": 2.4512569904327393, "learning_rate": 1.813589810680812e-05, "loss": 0.6737, "step": 10678 }, { "epoch": 1.7433165993224766, "grad_norm": 2.9501357078552246, "learning_rate": 1.8135525305326095e-05, "loss": 0.7799, "step": 10679 }, { "epoch": 1.743479857964981, "grad_norm": 2.12256121635437, "learning_rate": 1.8135152470401998e-05, "loss": 0.5352, "step": 10680 }, { "epoch": 1.7436431166074855, "grad_norm": 2.6542880535125732, "learning_rate": 1.8134779602037365e-05, "loss": 0.6847, "step": 10681 }, { "epoch": 1.74380637524999, "grad_norm": 2.624211072921753, "learning_rate": 1.813440670023372e-05, "loss": 0.6986, "step": 10682 }, { "epoch": 1.7439696338924942, "grad_norm": 2.689145565032959, "learning_rate": 1.8134033764992605e-05, "loss": 0.7602, "step": 10683 }, { "epoch": 1.7441328925349986, "grad_norm": 2.64774227142334, "learning_rate": 1.8133660796315548e-05, "loss": 0.6468, "step": 10684 }, { "epoch": 1.7442961511775028, "grad_norm": 2.696708917617798, "learning_rate": 1.8133287794204077e-05, "loss": 0.645, "step": 10685 }, { "epoch": 1.7444594098200072, "grad_norm": 2.423246145248413, "learning_rate": 1.8132914758659735e-05, "loss": 0.6129, "step": 10686 }, { "epoch": 1.7446226684625117, "grad_norm": 2.2092971801757812, "learning_rate": 1.8132541689684053e-05, "loss": 0.5488, "step": 10687 }, { "epoch": 1.7447859271050161, "grad_norm": 2.608914613723755, "learning_rate": 1.8132168587278562e-05, "loss": 0.5803, "step": 10688 }, { "epoch": 1.7449491857475206, "grad_norm": 2.540327548980713, "learning_rate": 1.8131795451444794e-05, "loss": 0.64, "step": 10689 }, { "epoch": 1.745112444390025, "grad_norm": 2.3557188510894775, "learning_rate": 1.8131422282184287e-05, "loss": 0.59, "step": 10690 }, { "epoch": 1.7452757030325294, "grad_norm": 2.6688709259033203, "learning_rate": 1.8131049079498576e-05, "loss": 0.7031, "step": 10691 }, { "epoch": 1.7454389616750337, "grad_norm": 2.902440071105957, "learning_rate": 1.813067584338919e-05, "loss": 0.8879, "step": 10692 }, { "epoch": 1.745602220317538, "grad_norm": 2.4661989212036133, "learning_rate": 1.8130302573857666e-05, "loss": 0.638, "step": 10693 }, { "epoch": 1.7457654789600423, "grad_norm": 2.5268125534057617, "learning_rate": 1.8129929270905538e-05, "loss": 0.5666, "step": 10694 }, { "epoch": 1.7459287376025467, "grad_norm": 3.0790624618530273, "learning_rate": 1.812955593453434e-05, "loss": 0.7826, "step": 10695 }, { "epoch": 1.7460919962450512, "grad_norm": 2.778430700302124, "learning_rate": 1.8129182564745607e-05, "loss": 0.5572, "step": 10696 }, { "epoch": 1.7462552548875556, "grad_norm": 3.1347742080688477, "learning_rate": 1.8128809161540875e-05, "loss": 0.7713, "step": 10697 }, { "epoch": 1.74641851353006, "grad_norm": 3.3883004188537598, "learning_rate": 1.812843572492168e-05, "loss": 0.6987, "step": 10698 }, { "epoch": 1.7465817721725645, "grad_norm": 2.6875052452087402, "learning_rate": 1.812806225488955e-05, "loss": 0.6499, "step": 10699 }, { "epoch": 1.746745030815069, "grad_norm": 2.6039767265319824, "learning_rate": 1.8127688751446028e-05, "loss": 0.6633, "step": 10700 }, { "epoch": 1.7469082894575731, "grad_norm": 2.957683563232422, "learning_rate": 1.8127315214592646e-05, "loss": 0.7516, "step": 10701 }, { "epoch": 1.7470715481000776, "grad_norm": 2.509491205215454, "learning_rate": 1.812694164433094e-05, "loss": 0.616, "step": 10702 }, { "epoch": 1.7472348067425818, "grad_norm": 2.683875560760498, "learning_rate": 1.812656804066245e-05, "loss": 0.7481, "step": 10703 }, { "epoch": 1.7473980653850862, "grad_norm": 2.592395305633545, "learning_rate": 1.81261944035887e-05, "loss": 0.6713, "step": 10704 }, { "epoch": 1.7475613240275907, "grad_norm": 3.078935384750366, "learning_rate": 1.8125820733111237e-05, "loss": 0.7485, "step": 10705 }, { "epoch": 1.7477245826700951, "grad_norm": 3.050002098083496, "learning_rate": 1.8125447029231594e-05, "loss": 0.7864, "step": 10706 }, { "epoch": 1.7478878413125996, "grad_norm": 2.7894392013549805, "learning_rate": 1.8125073291951305e-05, "loss": 0.777, "step": 10707 }, { "epoch": 1.748051099955104, "grad_norm": 3.058943510055542, "learning_rate": 1.8124699521271906e-05, "loss": 0.7478, "step": 10708 }, { "epoch": 1.7482143585976082, "grad_norm": 2.5487287044525146, "learning_rate": 1.8124325717194936e-05, "loss": 0.7248, "step": 10709 }, { "epoch": 1.7483776172401126, "grad_norm": 2.5841000080108643, "learning_rate": 1.812395187972193e-05, "loss": 0.6636, "step": 10710 }, { "epoch": 1.748540875882617, "grad_norm": 2.8529083728790283, "learning_rate": 1.812357800885443e-05, "loss": 0.611, "step": 10711 }, { "epoch": 1.7487041345251213, "grad_norm": 2.6091442108154297, "learning_rate": 1.8123204104593967e-05, "loss": 0.8045, "step": 10712 }, { "epoch": 1.7488673931676257, "grad_norm": 2.7736830711364746, "learning_rate": 1.8122830166942073e-05, "loss": 0.798, "step": 10713 }, { "epoch": 1.7490306518101302, "grad_norm": 2.4812657833099365, "learning_rate": 1.8122456195900297e-05, "loss": 0.602, "step": 10714 }, { "epoch": 1.7491939104526346, "grad_norm": 2.708850145339966, "learning_rate": 1.812208219147017e-05, "loss": 0.7534, "step": 10715 }, { "epoch": 1.749357169095139, "grad_norm": 3.091353178024292, "learning_rate": 1.812170815365323e-05, "loss": 0.7544, "step": 10716 }, { "epoch": 1.7495204277376435, "grad_norm": 2.7648279666900635, "learning_rate": 1.8121334082451012e-05, "loss": 0.8463, "step": 10717 }, { "epoch": 1.7496836863801477, "grad_norm": 2.4510505199432373, "learning_rate": 1.812095997786506e-05, "loss": 0.6722, "step": 10718 }, { "epoch": 1.7498469450226521, "grad_norm": 2.7311527729034424, "learning_rate": 1.8120585839896904e-05, "loss": 0.8072, "step": 10719 }, { "epoch": 1.7500102036651564, "grad_norm": 2.4106249809265137, "learning_rate": 1.812021166854809e-05, "loss": 0.588, "step": 10720 }, { "epoch": 1.7501734623076608, "grad_norm": 2.6672780513763428, "learning_rate": 1.8119837463820148e-05, "loss": 0.6367, "step": 10721 }, { "epoch": 1.7503367209501652, "grad_norm": 2.8230350017547607, "learning_rate": 1.811946322571462e-05, "loss": 0.7885, "step": 10722 }, { "epoch": 1.7504999795926697, "grad_norm": 2.3421292304992676, "learning_rate": 1.811908895423305e-05, "loss": 0.5382, "step": 10723 }, { "epoch": 1.7506632382351741, "grad_norm": 2.749061346054077, "learning_rate": 1.8118714649376963e-05, "loss": 0.651, "step": 10724 }, { "epoch": 1.7508264968776786, "grad_norm": 2.2819838523864746, "learning_rate": 1.811834031114791e-05, "loss": 0.5615, "step": 10725 }, { "epoch": 1.750989755520183, "grad_norm": 2.1256825923919678, "learning_rate": 1.8117965939547427e-05, "loss": 0.6071, "step": 10726 }, { "epoch": 1.7511530141626872, "grad_norm": 2.493809938430786, "learning_rate": 1.8117591534577048e-05, "loss": 0.6416, "step": 10727 }, { "epoch": 1.7513162728051916, "grad_norm": 3.1291112899780273, "learning_rate": 1.8117217096238318e-05, "loss": 0.8156, "step": 10728 }, { "epoch": 1.7514795314476959, "grad_norm": 2.5381598472595215, "learning_rate": 1.811684262453277e-05, "loss": 0.6906, "step": 10729 }, { "epoch": 1.7516427900902003, "grad_norm": 2.938978672027588, "learning_rate": 1.8116468119461953e-05, "loss": 0.6922, "step": 10730 }, { "epoch": 1.7518060487327047, "grad_norm": 2.4397363662719727, "learning_rate": 1.8116093581027395e-05, "loss": 0.6425, "step": 10731 }, { "epoch": 1.7519693073752092, "grad_norm": 2.500598430633545, "learning_rate": 1.8115719009230642e-05, "loss": 0.6651, "step": 10732 }, { "epoch": 1.7521325660177136, "grad_norm": 3.0602474212646484, "learning_rate": 1.8115344404073235e-05, "loss": 0.6956, "step": 10733 }, { "epoch": 1.752295824660218, "grad_norm": 2.674821138381958, "learning_rate": 1.811496976555671e-05, "loss": 0.7652, "step": 10734 }, { "epoch": 1.7524590833027225, "grad_norm": 3.0526273250579834, "learning_rate": 1.8114595093682607e-05, "loss": 0.7144, "step": 10735 }, { "epoch": 1.7526223419452267, "grad_norm": 2.610291004180908, "learning_rate": 1.811422038845247e-05, "loss": 0.7158, "step": 10736 }, { "epoch": 1.7527856005877311, "grad_norm": 2.1491827964782715, "learning_rate": 1.8113845649867837e-05, "loss": 0.5327, "step": 10737 }, { "epoch": 1.7529488592302354, "grad_norm": 2.4697985649108887, "learning_rate": 1.8113470877930246e-05, "loss": 0.6416, "step": 10738 }, { "epoch": 1.7531121178727398, "grad_norm": 2.5627236366271973, "learning_rate": 1.811309607264124e-05, "loss": 0.6651, "step": 10739 }, { "epoch": 1.7532753765152442, "grad_norm": 2.535726308822632, "learning_rate": 1.811272123400236e-05, "loss": 0.7609, "step": 10740 }, { "epoch": 1.7534386351577487, "grad_norm": 2.991330862045288, "learning_rate": 1.8112346362015145e-05, "loss": 0.6966, "step": 10741 }, { "epoch": 1.753601893800253, "grad_norm": 2.480323553085327, "learning_rate": 1.811197145668114e-05, "loss": 0.533, "step": 10742 }, { "epoch": 1.7537651524427575, "grad_norm": 3.1893482208251953, "learning_rate": 1.811159651800188e-05, "loss": 0.8505, "step": 10743 }, { "epoch": 1.753928411085262, "grad_norm": 2.904301166534424, "learning_rate": 1.8111221545978913e-05, "loss": 0.6271, "step": 10744 }, { "epoch": 1.7540916697277662, "grad_norm": 2.6974284648895264, "learning_rate": 1.8110846540613776e-05, "loss": 0.6465, "step": 10745 }, { "epoch": 1.7542549283702706, "grad_norm": 2.4847664833068848, "learning_rate": 1.811047150190801e-05, "loss": 0.5734, "step": 10746 }, { "epoch": 1.7544181870127749, "grad_norm": 3.2149829864501953, "learning_rate": 1.8110096429863163e-05, "loss": 0.6482, "step": 10747 }, { "epoch": 1.7545814456552793, "grad_norm": 2.7865915298461914, "learning_rate": 1.8109721324480766e-05, "loss": 0.5932, "step": 10748 }, { "epoch": 1.7547447042977837, "grad_norm": 2.5119781494140625, "learning_rate": 1.8109346185762367e-05, "loss": 0.6012, "step": 10749 }, { "epoch": 1.7549079629402882, "grad_norm": 2.534918785095215, "learning_rate": 1.8108971013709512e-05, "loss": 0.6418, "step": 10750 }, { "epoch": 1.7550712215827926, "grad_norm": 2.0746169090270996, "learning_rate": 1.8108595808323736e-05, "loss": 0.4725, "step": 10751 }, { "epoch": 1.755234480225297, "grad_norm": 2.8316688537597656, "learning_rate": 1.8108220569606586e-05, "loss": 0.6662, "step": 10752 }, { "epoch": 1.7553977388678013, "grad_norm": 3.230590581893921, "learning_rate": 1.81078452975596e-05, "loss": 0.7698, "step": 10753 }, { "epoch": 1.7555609975103057, "grad_norm": 2.992037296295166, "learning_rate": 1.810746999218433e-05, "loss": 0.6786, "step": 10754 }, { "epoch": 1.7557242561528101, "grad_norm": 2.7532153129577637, "learning_rate": 1.8107094653482305e-05, "loss": 0.5998, "step": 10755 }, { "epoch": 1.7558875147953144, "grad_norm": 2.5006844997406006, "learning_rate": 1.810671928145508e-05, "loss": 0.5782, "step": 10756 }, { "epoch": 1.7560507734378188, "grad_norm": 3.2773501873016357, "learning_rate": 1.810634387610419e-05, "loss": 0.7905, "step": 10757 }, { "epoch": 1.7562140320803232, "grad_norm": 2.4861485958099365, "learning_rate": 1.810596843743118e-05, "loss": 0.6338, "step": 10758 }, { "epoch": 1.7563772907228277, "grad_norm": 3.1022324562072754, "learning_rate": 1.81055929654376e-05, "loss": 0.6828, "step": 10759 }, { "epoch": 1.756540549365332, "grad_norm": 3.094133138656616, "learning_rate": 1.810521746012498e-05, "loss": 0.7549, "step": 10760 }, { "epoch": 1.7567038080078365, "grad_norm": 2.701463222503662, "learning_rate": 1.8104841921494877e-05, "loss": 0.6174, "step": 10761 }, { "epoch": 1.7568670666503408, "grad_norm": 2.633782148361206, "learning_rate": 1.810446634954883e-05, "loss": 0.5812, "step": 10762 }, { "epoch": 1.7570303252928452, "grad_norm": 2.5178489685058594, "learning_rate": 1.8104090744288377e-05, "loss": 0.6351, "step": 10763 }, { "epoch": 1.7571935839353494, "grad_norm": 2.5207457542419434, "learning_rate": 1.8103715105715074e-05, "loss": 0.6448, "step": 10764 }, { "epoch": 1.7573568425778539, "grad_norm": 2.7024881839752197, "learning_rate": 1.8103339433830455e-05, "loss": 0.6199, "step": 10765 }, { "epoch": 1.7575201012203583, "grad_norm": 2.703946828842163, "learning_rate": 1.8102963728636066e-05, "loss": 0.7985, "step": 10766 }, { "epoch": 1.7576833598628627, "grad_norm": 2.3470630645751953, "learning_rate": 1.8102587990133454e-05, "loss": 0.5142, "step": 10767 }, { "epoch": 1.7578466185053672, "grad_norm": 2.4123923778533936, "learning_rate": 1.810221221832416e-05, "loss": 0.729, "step": 10768 }, { "epoch": 1.7580098771478716, "grad_norm": 2.8919122219085693, "learning_rate": 1.810183641320973e-05, "loss": 0.6941, "step": 10769 }, { "epoch": 1.758173135790376, "grad_norm": 2.423733949661255, "learning_rate": 1.8101460574791716e-05, "loss": 0.6435, "step": 10770 }, { "epoch": 1.7583363944328803, "grad_norm": 2.65240740776062, "learning_rate": 1.8101084703071653e-05, "loss": 0.6841, "step": 10771 }, { "epoch": 1.7584996530753847, "grad_norm": 3.2546751499176025, "learning_rate": 1.810070879805109e-05, "loss": 0.7842, "step": 10772 }, { "epoch": 1.758662911717889, "grad_norm": 3.0075907707214355, "learning_rate": 1.810033285973157e-05, "loss": 0.7903, "step": 10773 }, { "epoch": 1.7588261703603933, "grad_norm": 2.66400408744812, "learning_rate": 1.8099956888114644e-05, "loss": 0.7531, "step": 10774 }, { "epoch": 1.7589894290028978, "grad_norm": 3.0303266048431396, "learning_rate": 1.809958088320185e-05, "loss": 0.7023, "step": 10775 }, { "epoch": 1.7591526876454022, "grad_norm": 2.9006779193878174, "learning_rate": 1.809920484499474e-05, "loss": 0.865, "step": 10776 }, { "epoch": 1.7593159462879067, "grad_norm": 2.822209358215332, "learning_rate": 1.8098828773494855e-05, "loss": 0.6847, "step": 10777 }, { "epoch": 1.759479204930411, "grad_norm": 3.0306882858276367, "learning_rate": 1.8098452668703745e-05, "loss": 0.7147, "step": 10778 }, { "epoch": 1.7596424635729155, "grad_norm": 2.5981075763702393, "learning_rate": 1.8098076530622953e-05, "loss": 0.6567, "step": 10779 }, { "epoch": 1.7598057222154198, "grad_norm": 2.7579190731048584, "learning_rate": 1.8097700359254027e-05, "loss": 0.726, "step": 10780 }, { "epoch": 1.7599689808579242, "grad_norm": 2.955479860305786, "learning_rate": 1.809732415459851e-05, "loss": 0.6094, "step": 10781 }, { "epoch": 1.7601322395004284, "grad_norm": 2.4629664421081543, "learning_rate": 1.8096947916657952e-05, "loss": 0.5491, "step": 10782 }, { "epoch": 1.7602954981429328, "grad_norm": 2.5600407123565674, "learning_rate": 1.80965716454339e-05, "loss": 0.6586, "step": 10783 }, { "epoch": 1.7604587567854373, "grad_norm": 2.8343210220336914, "learning_rate": 1.80961953409279e-05, "loss": 0.6887, "step": 10784 }, { "epoch": 1.7606220154279417, "grad_norm": 2.670247793197632, "learning_rate": 1.8095819003141495e-05, "loss": 0.6626, "step": 10785 }, { "epoch": 1.7607852740704462, "grad_norm": 3.011786937713623, "learning_rate": 1.8095442632076237e-05, "loss": 0.692, "step": 10786 }, { "epoch": 1.7609485327129506, "grad_norm": 3.178424119949341, "learning_rate": 1.809506622773367e-05, "loss": 0.7621, "step": 10787 }, { "epoch": 1.761111791355455, "grad_norm": 2.741487503051758, "learning_rate": 1.8094689790115343e-05, "loss": 0.6303, "step": 10788 }, { "epoch": 1.7612750499979593, "grad_norm": 2.60890793800354, "learning_rate": 1.8094313319222804e-05, "loss": 0.6597, "step": 10789 }, { "epoch": 1.7614383086404637, "grad_norm": 2.8360483646392822, "learning_rate": 1.8093936815057594e-05, "loss": 0.7214, "step": 10790 }, { "epoch": 1.761601567282968, "grad_norm": 2.515789747238159, "learning_rate": 1.8093560277621272e-05, "loss": 0.6328, "step": 10791 }, { "epoch": 1.7617648259254723, "grad_norm": 2.7273216247558594, "learning_rate": 1.809318370691538e-05, "loss": 0.6675, "step": 10792 }, { "epoch": 1.7619280845679768, "grad_norm": 3.2944865226745605, "learning_rate": 1.8092807102941462e-05, "loss": 0.8246, "step": 10793 }, { "epoch": 1.7620913432104812, "grad_norm": 2.936260938644409, "learning_rate": 1.809243046570107e-05, "loss": 0.8117, "step": 10794 }, { "epoch": 1.7622546018529857, "grad_norm": 2.4966578483581543, "learning_rate": 1.8092053795195755e-05, "loss": 0.5869, "step": 10795 }, { "epoch": 1.76241786049549, "grad_norm": 2.8615620136260986, "learning_rate": 1.809167709142706e-05, "loss": 0.6861, "step": 10796 }, { "epoch": 1.7625811191379943, "grad_norm": 2.4721713066101074, "learning_rate": 1.8091300354396536e-05, "loss": 0.6192, "step": 10797 }, { "epoch": 1.7627443777804988, "grad_norm": 2.495159149169922, "learning_rate": 1.809092358410573e-05, "loss": 0.6068, "step": 10798 }, { "epoch": 1.7629076364230032, "grad_norm": 2.859839677810669, "learning_rate": 1.8090546780556194e-05, "loss": 0.8177, "step": 10799 }, { "epoch": 1.7630708950655074, "grad_norm": 3.43328595161438, "learning_rate": 1.8090169943749477e-05, "loss": 0.6227, "step": 10800 }, { "epoch": 1.7632341537080118, "grad_norm": 2.1349403858184814, "learning_rate": 1.808979307368712e-05, "loss": 0.5517, "step": 10801 }, { "epoch": 1.7633974123505163, "grad_norm": 2.7109382152557373, "learning_rate": 1.8089416170370686e-05, "loss": 0.7035, "step": 10802 }, { "epoch": 1.7635606709930207, "grad_norm": 2.923712730407715, "learning_rate": 1.8089039233801713e-05, "loss": 0.7485, "step": 10803 }, { "epoch": 1.7637239296355252, "grad_norm": 2.5294528007507324, "learning_rate": 1.8088662263981756e-05, "loss": 0.7216, "step": 10804 }, { "epoch": 1.7638871882780296, "grad_norm": 2.6609227657318115, "learning_rate": 1.8088285260912358e-05, "loss": 0.6179, "step": 10805 }, { "epoch": 1.7640504469205338, "grad_norm": 2.6159753799438477, "learning_rate": 1.8087908224595077e-05, "loss": 0.7359, "step": 10806 }, { "epoch": 1.7642137055630382, "grad_norm": 2.04645037651062, "learning_rate": 1.808753115503146e-05, "loss": 0.4544, "step": 10807 }, { "epoch": 1.7643769642055427, "grad_norm": 2.9155588150024414, "learning_rate": 1.8087154052223053e-05, "loss": 0.7398, "step": 10808 }, { "epoch": 1.764540222848047, "grad_norm": 3.1315720081329346, "learning_rate": 1.8086776916171414e-05, "loss": 0.7489, "step": 10809 }, { "epoch": 1.7647034814905513, "grad_norm": 3.047457218170166, "learning_rate": 1.808639974687808e-05, "loss": 0.6581, "step": 10810 }, { "epoch": 1.7648667401330558, "grad_norm": 2.31390643119812, "learning_rate": 1.8086022544344622e-05, "loss": 0.5969, "step": 10811 }, { "epoch": 1.7650299987755602, "grad_norm": 2.693934679031372, "learning_rate": 1.808564530857257e-05, "loss": 0.6507, "step": 10812 }, { "epoch": 1.7651932574180647, "grad_norm": 3.142040491104126, "learning_rate": 1.8085268039563486e-05, "loss": 0.8731, "step": 10813 }, { "epoch": 1.765356516060569, "grad_norm": 2.3710315227508545, "learning_rate": 1.8084890737318917e-05, "loss": 0.6174, "step": 10814 }, { "epoch": 1.7655197747030733, "grad_norm": 2.3721823692321777, "learning_rate": 1.8084513401840418e-05, "loss": 0.5622, "step": 10815 }, { "epoch": 1.7656830333455777, "grad_norm": 2.5600996017456055, "learning_rate": 1.8084136033129532e-05, "loss": 0.5674, "step": 10816 }, { "epoch": 1.765846291988082, "grad_norm": 2.888411283493042, "learning_rate": 1.8083758631187817e-05, "loss": 0.6705, "step": 10817 }, { "epoch": 1.7660095506305864, "grad_norm": 2.1641032695770264, "learning_rate": 1.8083381196016826e-05, "loss": 0.5242, "step": 10818 }, { "epoch": 1.7661728092730908, "grad_norm": 2.634894847869873, "learning_rate": 1.8083003727618104e-05, "loss": 0.6437, "step": 10819 }, { "epoch": 1.7663360679155953, "grad_norm": 2.725332736968994, "learning_rate": 1.8082626225993208e-05, "loss": 0.5837, "step": 10820 }, { "epoch": 1.7664993265580997, "grad_norm": 2.5445756912231445, "learning_rate": 1.8082248691143685e-05, "loss": 0.5624, "step": 10821 }, { "epoch": 1.7666625852006042, "grad_norm": 2.8154990673065186, "learning_rate": 1.808187112307109e-05, "loss": 0.8081, "step": 10822 }, { "epoch": 1.7668258438431086, "grad_norm": 3.1989855766296387, "learning_rate": 1.8081493521776975e-05, "loss": 0.8078, "step": 10823 }, { "epoch": 1.7669891024856128, "grad_norm": 2.636119842529297, "learning_rate": 1.808111588726289e-05, "loss": 0.6673, "step": 10824 }, { "epoch": 1.7671523611281172, "grad_norm": 2.8335328102111816, "learning_rate": 1.8080738219530393e-05, "loss": 0.6541, "step": 10825 }, { "epoch": 1.7673156197706215, "grad_norm": 2.8113045692443848, "learning_rate": 1.808036051858103e-05, "loss": 0.7633, "step": 10826 }, { "epoch": 1.767478878413126, "grad_norm": 3.082119941711426, "learning_rate": 1.8079982784416356e-05, "loss": 0.7515, "step": 10827 }, { "epoch": 1.7676421370556303, "grad_norm": 2.754561424255371, "learning_rate": 1.807960501703792e-05, "loss": 0.6932, "step": 10828 }, { "epoch": 1.7678053956981348, "grad_norm": 2.915161371231079, "learning_rate": 1.8079227216447284e-05, "loss": 0.7103, "step": 10829 }, { "epoch": 1.7679686543406392, "grad_norm": 2.746495008468628, "learning_rate": 1.8078849382645997e-05, "loss": 0.6825, "step": 10830 }, { "epoch": 1.7681319129831436, "grad_norm": 2.6377573013305664, "learning_rate": 1.8078471515635607e-05, "loss": 0.6572, "step": 10831 }, { "epoch": 1.768295171625648, "grad_norm": 2.592273712158203, "learning_rate": 1.8078093615417673e-05, "loss": 0.7353, "step": 10832 }, { "epoch": 1.7684584302681523, "grad_norm": 2.2812325954437256, "learning_rate": 1.807771568199374e-05, "loss": 0.5483, "step": 10833 }, { "epoch": 1.7686216889106567, "grad_norm": 2.8554089069366455, "learning_rate": 1.8077337715365374e-05, "loss": 0.6782, "step": 10834 }, { "epoch": 1.768784947553161, "grad_norm": 2.4321868419647217, "learning_rate": 1.8076959715534122e-05, "loss": 0.636, "step": 10835 }, { "epoch": 1.7689482061956654, "grad_norm": 2.7123167514801025, "learning_rate": 1.807658168250154e-05, "loss": 0.6017, "step": 10836 }, { "epoch": 1.7691114648381698, "grad_norm": 2.8850343227386475, "learning_rate": 1.8076203616269178e-05, "loss": 0.5945, "step": 10837 }, { "epoch": 1.7692747234806743, "grad_norm": 2.6279821395874023, "learning_rate": 1.8075825516838592e-05, "loss": 0.6271, "step": 10838 }, { "epoch": 1.7694379821231787, "grad_norm": 2.31215500831604, "learning_rate": 1.8075447384211338e-05, "loss": 0.5728, "step": 10839 }, { "epoch": 1.7696012407656831, "grad_norm": 2.6965229511260986, "learning_rate": 1.8075069218388964e-05, "loss": 0.6294, "step": 10840 }, { "epoch": 1.7697644994081876, "grad_norm": 2.6406261920928955, "learning_rate": 1.8074691019373036e-05, "loss": 0.6979, "step": 10841 }, { "epoch": 1.7699277580506918, "grad_norm": 2.6493265628814697, "learning_rate": 1.8074312787165103e-05, "loss": 0.6713, "step": 10842 }, { "epoch": 1.7700910166931962, "grad_norm": 2.7696094512939453, "learning_rate": 1.8073934521766715e-05, "loss": 0.705, "step": 10843 }, { "epoch": 1.7702542753357005, "grad_norm": 3.4328665733337402, "learning_rate": 1.8073556223179428e-05, "loss": 0.7061, "step": 10844 }, { "epoch": 1.770417533978205, "grad_norm": 2.5484049320220947, "learning_rate": 1.8073177891404804e-05, "loss": 0.5851, "step": 10845 }, { "epoch": 1.7705807926207093, "grad_norm": 2.8350658416748047, "learning_rate": 1.80727995264444e-05, "loss": 0.7829, "step": 10846 }, { "epoch": 1.7707440512632138, "grad_norm": 3.1051113605499268, "learning_rate": 1.8072421128299756e-05, "loss": 0.7435, "step": 10847 }, { "epoch": 1.7709073099057182, "grad_norm": 2.9049346446990967, "learning_rate": 1.807204269697244e-05, "loss": 0.7802, "step": 10848 }, { "epoch": 1.7710705685482226, "grad_norm": 2.5041792392730713, "learning_rate": 1.8071664232464005e-05, "loss": 0.6658, "step": 10849 }, { "epoch": 1.7712338271907269, "grad_norm": 2.8113274574279785, "learning_rate": 1.8071285734776006e-05, "loss": 0.6304, "step": 10850 }, { "epoch": 1.7713970858332313, "grad_norm": 2.9493749141693115, "learning_rate": 1.8070907203909998e-05, "loss": 0.7755, "step": 10851 }, { "epoch": 1.7715603444757357, "grad_norm": 3.2685632705688477, "learning_rate": 1.807052863986754e-05, "loss": 0.7101, "step": 10852 }, { "epoch": 1.77172360311824, "grad_norm": 2.460268259048462, "learning_rate": 1.8070150042650184e-05, "loss": 0.5998, "step": 10853 }, { "epoch": 1.7718868617607444, "grad_norm": 2.349306344985962, "learning_rate": 1.8069771412259485e-05, "loss": 0.5144, "step": 10854 }, { "epoch": 1.7720501204032488, "grad_norm": 2.801358699798584, "learning_rate": 1.806939274869701e-05, "loss": 0.7604, "step": 10855 }, { "epoch": 1.7722133790457533, "grad_norm": 2.638704299926758, "learning_rate": 1.8069014051964307e-05, "loss": 0.611, "step": 10856 }, { "epoch": 1.7723766376882577, "grad_norm": 2.5562825202941895, "learning_rate": 1.8068635322062934e-05, "loss": 0.6429, "step": 10857 }, { "epoch": 1.7725398963307621, "grad_norm": 2.9198803901672363, "learning_rate": 1.8068256558994444e-05, "loss": 0.7927, "step": 10858 }, { "epoch": 1.7727031549732664, "grad_norm": 2.7672946453094482, "learning_rate": 1.80678777627604e-05, "loss": 0.7443, "step": 10859 }, { "epoch": 1.7728664136157708, "grad_norm": 2.849008321762085, "learning_rate": 1.8067498933362356e-05, "loss": 0.8668, "step": 10860 }, { "epoch": 1.773029672258275, "grad_norm": 2.699244737625122, "learning_rate": 1.8067120070801875e-05, "loss": 0.6518, "step": 10861 }, { "epoch": 1.7731929309007795, "grad_norm": 2.863043785095215, "learning_rate": 1.8066741175080504e-05, "loss": 0.7731, "step": 10862 }, { "epoch": 1.773356189543284, "grad_norm": 2.7915875911712646, "learning_rate": 1.8066362246199806e-05, "loss": 0.7098, "step": 10863 }, { "epoch": 1.7735194481857883, "grad_norm": 2.8199596405029297, "learning_rate": 1.8065983284161342e-05, "loss": 0.6619, "step": 10864 }, { "epoch": 1.7736827068282928, "grad_norm": 2.693427562713623, "learning_rate": 1.8065604288966662e-05, "loss": 0.7009, "step": 10865 }, { "epoch": 1.7738459654707972, "grad_norm": 3.1518425941467285, "learning_rate": 1.8065225260617333e-05, "loss": 0.9164, "step": 10866 }, { "epoch": 1.7740092241133016, "grad_norm": 2.6152725219726562, "learning_rate": 1.8064846199114904e-05, "loss": 0.7034, "step": 10867 }, { "epoch": 1.7741724827558059, "grad_norm": 2.6273016929626465, "learning_rate": 1.806446710446094e-05, "loss": 0.7555, "step": 10868 }, { "epoch": 1.7743357413983103, "grad_norm": 3.18400502204895, "learning_rate": 1.8064087976656998e-05, "loss": 0.8392, "step": 10869 }, { "epoch": 1.7744990000408145, "grad_norm": 2.528150796890259, "learning_rate": 1.8063708815704634e-05, "loss": 0.702, "step": 10870 }, { "epoch": 1.774662258683319, "grad_norm": 2.794968366622925, "learning_rate": 1.8063329621605404e-05, "loss": 0.7157, "step": 10871 }, { "epoch": 1.7748255173258234, "grad_norm": 2.668846845626831, "learning_rate": 1.8062950394360877e-05, "loss": 0.7505, "step": 10872 }, { "epoch": 1.7749887759683278, "grad_norm": 2.611023426055908, "learning_rate": 1.8062571133972602e-05, "loss": 0.6557, "step": 10873 }, { "epoch": 1.7751520346108323, "grad_norm": 2.7219879627227783, "learning_rate": 1.8062191840442143e-05, "loss": 0.6686, "step": 10874 }, { "epoch": 1.7753152932533367, "grad_norm": 3.0679404735565186, "learning_rate": 1.8061812513771056e-05, "loss": 0.8658, "step": 10875 }, { "epoch": 1.7754785518958411, "grad_norm": 2.828272819519043, "learning_rate": 1.8061433153960903e-05, "loss": 0.8329, "step": 10876 }, { "epoch": 1.7756418105383454, "grad_norm": 2.4585344791412354, "learning_rate": 1.8061053761013242e-05, "loss": 0.7028, "step": 10877 }, { "epoch": 1.7758050691808498, "grad_norm": 2.4422686100006104, "learning_rate": 1.8060674334929635e-05, "loss": 0.6409, "step": 10878 }, { "epoch": 1.775968327823354, "grad_norm": 2.337218999862671, "learning_rate": 1.8060294875711637e-05, "loss": 0.6207, "step": 10879 }, { "epoch": 1.7761315864658584, "grad_norm": 2.169645309448242, "learning_rate": 1.8059915383360807e-05, "loss": 0.5491, "step": 10880 }, { "epoch": 1.7762948451083629, "grad_norm": 2.6955301761627197, "learning_rate": 1.8059535857878714e-05, "loss": 0.7432, "step": 10881 }, { "epoch": 1.7764581037508673, "grad_norm": 2.5083582401275635, "learning_rate": 1.805915629926691e-05, "loss": 0.5929, "step": 10882 }, { "epoch": 1.7766213623933718, "grad_norm": 2.6350929737091064, "learning_rate": 1.8058776707526958e-05, "loss": 0.7094, "step": 10883 }, { "epoch": 1.7767846210358762, "grad_norm": 3.219205379486084, "learning_rate": 1.8058397082660415e-05, "loss": 0.7543, "step": 10884 }, { "epoch": 1.7769478796783806, "grad_norm": 2.8639485836029053, "learning_rate": 1.8058017424668847e-05, "loss": 0.7436, "step": 10885 }, { "epoch": 1.7771111383208849, "grad_norm": 2.6469385623931885, "learning_rate": 1.805763773355381e-05, "loss": 0.6982, "step": 10886 }, { "epoch": 1.7772743969633893, "grad_norm": 2.315563917160034, "learning_rate": 1.805725800931687e-05, "loss": 0.6539, "step": 10887 }, { "epoch": 1.7774376556058935, "grad_norm": 2.863172769546509, "learning_rate": 1.8056878251959586e-05, "loss": 0.7505, "step": 10888 }, { "epoch": 1.777600914248398, "grad_norm": 2.5567626953125, "learning_rate": 1.8056498461483513e-05, "loss": 0.6846, "step": 10889 }, { "epoch": 1.7777641728909024, "grad_norm": 2.2569267749786377, "learning_rate": 1.805611863789022e-05, "loss": 0.5469, "step": 10890 }, { "epoch": 1.7779274315334068, "grad_norm": 2.788057327270508, "learning_rate": 1.8055738781181263e-05, "loss": 0.7623, "step": 10891 }, { "epoch": 1.7780906901759113, "grad_norm": 2.8093137741088867, "learning_rate": 1.8055358891358207e-05, "loss": 0.7089, "step": 10892 }, { "epoch": 1.7782539488184157, "grad_norm": 2.666720390319824, "learning_rate": 1.8054978968422613e-05, "loss": 0.7285, "step": 10893 }, { "epoch": 1.77841720746092, "grad_norm": 2.6049461364746094, "learning_rate": 1.8054599012376038e-05, "loss": 0.7351, "step": 10894 }, { "epoch": 1.7785804661034244, "grad_norm": 2.724257469177246, "learning_rate": 1.8054219023220052e-05, "loss": 0.6568, "step": 10895 }, { "epoch": 1.7787437247459288, "grad_norm": 2.2789320945739746, "learning_rate": 1.8053839000956212e-05, "loss": 0.4773, "step": 10896 }, { "epoch": 1.778906983388433, "grad_norm": 2.4184629917144775, "learning_rate": 1.8053458945586082e-05, "loss": 0.5952, "step": 10897 }, { "epoch": 1.7790702420309374, "grad_norm": 3.2098500728607178, "learning_rate": 1.8053078857111218e-05, "loss": 0.7458, "step": 10898 }, { "epoch": 1.7792335006734419, "grad_norm": 2.7043330669403076, "learning_rate": 1.8052698735533195e-05, "loss": 0.6444, "step": 10899 }, { "epoch": 1.7793967593159463, "grad_norm": 2.9148037433624268, "learning_rate": 1.8052318580853564e-05, "loss": 0.8009, "step": 10900 }, { "epoch": 1.7795600179584508, "grad_norm": 2.9739503860473633, "learning_rate": 1.8051938393073892e-05, "loss": 0.7912, "step": 10901 }, { "epoch": 1.7797232766009552, "grad_norm": 2.9470770359039307, "learning_rate": 1.8051558172195744e-05, "loss": 0.7616, "step": 10902 }, { "epoch": 1.7798865352434594, "grad_norm": 2.6763012409210205, "learning_rate": 1.8051177918220677e-05, "loss": 0.6074, "step": 10903 }, { "epoch": 1.7800497938859638, "grad_norm": 2.446276903152466, "learning_rate": 1.805079763115026e-05, "loss": 0.6859, "step": 10904 }, { "epoch": 1.780213052528468, "grad_norm": 2.715930461883545, "learning_rate": 1.8050417310986052e-05, "loss": 0.7214, "step": 10905 }, { "epoch": 1.7803763111709725, "grad_norm": 3.1940226554870605, "learning_rate": 1.805003695772962e-05, "loss": 0.8343, "step": 10906 }, { "epoch": 1.780539569813477, "grad_norm": 2.430889844894409, "learning_rate": 1.8049656571382525e-05, "loss": 0.6468, "step": 10907 }, { "epoch": 1.7807028284559814, "grad_norm": 3.111654281616211, "learning_rate": 1.804927615194633e-05, "loss": 0.7977, "step": 10908 }, { "epoch": 1.7808660870984858, "grad_norm": 2.3790555000305176, "learning_rate": 1.8048895699422605e-05, "loss": 0.4771, "step": 10909 }, { "epoch": 1.7810293457409903, "grad_norm": 2.5697133541107178, "learning_rate": 1.8048515213812904e-05, "loss": 0.6315, "step": 10910 }, { "epoch": 1.7811926043834947, "grad_norm": 2.8080503940582275, "learning_rate": 1.8048134695118795e-05, "loss": 0.6644, "step": 10911 }, { "epoch": 1.781355863025999, "grad_norm": 2.0597009658813477, "learning_rate": 1.8047754143341845e-05, "loss": 0.5432, "step": 10912 }, { "epoch": 1.7815191216685033, "grad_norm": 2.552443027496338, "learning_rate": 1.8047373558483617e-05, "loss": 0.5972, "step": 10913 }, { "epoch": 1.7816823803110076, "grad_norm": 2.9915199279785156, "learning_rate": 1.804699294054568e-05, "loss": 0.6981, "step": 10914 }, { "epoch": 1.781845638953512, "grad_norm": 2.5014126300811768, "learning_rate": 1.8046612289529587e-05, "loss": 0.597, "step": 10915 }, { "epoch": 1.7820088975960164, "grad_norm": 2.357233762741089, "learning_rate": 1.804623160543691e-05, "loss": 0.6529, "step": 10916 }, { "epoch": 1.7821721562385209, "grad_norm": 2.8522891998291016, "learning_rate": 1.8045850888269214e-05, "loss": 0.5905, "step": 10917 }, { "epoch": 1.7823354148810253, "grad_norm": 2.557481527328491, "learning_rate": 1.8045470138028064e-05, "loss": 0.7024, "step": 10918 }, { "epoch": 1.7824986735235298, "grad_norm": 2.827230215072632, "learning_rate": 1.804508935471502e-05, "loss": 0.7521, "step": 10919 }, { "epoch": 1.7826619321660342, "grad_norm": 3.2852652072906494, "learning_rate": 1.8044708538331656e-05, "loss": 1.5576, "step": 10920 }, { "epoch": 1.7828251908085384, "grad_norm": 2.768883228302002, "learning_rate": 1.8044327688879533e-05, "loss": 0.7027, "step": 10921 }, { "epoch": 1.7829884494510428, "grad_norm": 2.943873167037964, "learning_rate": 1.804394680636021e-05, "loss": 0.7244, "step": 10922 }, { "epoch": 1.783151708093547, "grad_norm": 2.6334023475646973, "learning_rate": 1.804356589077527e-05, "loss": 0.7101, "step": 10923 }, { "epoch": 1.7833149667360515, "grad_norm": 2.847510814666748, "learning_rate": 1.804318494212626e-05, "loss": 0.7066, "step": 10924 }, { "epoch": 1.783478225378556, "grad_norm": 2.5498104095458984, "learning_rate": 1.8042803960414756e-05, "loss": 0.6465, "step": 10925 }, { "epoch": 1.7836414840210604, "grad_norm": 2.3058762550354004, "learning_rate": 1.8042422945642323e-05, "loss": 0.5723, "step": 10926 }, { "epoch": 1.7838047426635648, "grad_norm": 2.445432424545288, "learning_rate": 1.8042041897810522e-05, "loss": 0.7119, "step": 10927 }, { "epoch": 1.7839680013060693, "grad_norm": 2.850736379623413, "learning_rate": 1.8041660816920924e-05, "loss": 0.7588, "step": 10928 }, { "epoch": 1.7841312599485737, "grad_norm": 2.412374496459961, "learning_rate": 1.8041279702975098e-05, "loss": 0.606, "step": 10929 }, { "epoch": 1.784294518591078, "grad_norm": 2.293098211288452, "learning_rate": 1.8040898555974605e-05, "loss": 0.5786, "step": 10930 }, { "epoch": 1.7844577772335823, "grad_norm": 3.0281152725219727, "learning_rate": 1.8040517375921016e-05, "loss": 0.7632, "step": 10931 }, { "epoch": 1.7846210358760866, "grad_norm": 2.9645538330078125, "learning_rate": 1.8040136162815893e-05, "loss": 0.8136, "step": 10932 }, { "epoch": 1.784784294518591, "grad_norm": 2.813953399658203, "learning_rate": 1.803975491666081e-05, "loss": 0.5581, "step": 10933 }, { "epoch": 1.7849475531610954, "grad_norm": 2.862424612045288, "learning_rate": 1.803937363745733e-05, "loss": 0.8723, "step": 10934 }, { "epoch": 1.7851108118035999, "grad_norm": 2.4323625564575195, "learning_rate": 1.803899232520702e-05, "loss": 0.6613, "step": 10935 }, { "epoch": 1.7852740704461043, "grad_norm": 2.0485875606536865, "learning_rate": 1.8038610979911445e-05, "loss": 0.4439, "step": 10936 }, { "epoch": 1.7854373290886087, "grad_norm": 2.7820680141448975, "learning_rate": 1.803822960157218e-05, "loss": 0.6931, "step": 10937 }, { "epoch": 1.785600587731113, "grad_norm": 2.996718168258667, "learning_rate": 1.8037848190190786e-05, "loss": 0.6675, "step": 10938 }, { "epoch": 1.7857638463736174, "grad_norm": 2.6688804626464844, "learning_rate": 1.8037466745768832e-05, "loss": 0.6481, "step": 10939 }, { "epoch": 1.7859271050161218, "grad_norm": 2.502774715423584, "learning_rate": 1.8037085268307887e-05, "loss": 0.6057, "step": 10940 }, { "epoch": 1.786090363658626, "grad_norm": 2.6298911571502686, "learning_rate": 1.803670375780952e-05, "loss": 0.7439, "step": 10941 }, { "epoch": 1.7862536223011305, "grad_norm": 2.8410627841949463, "learning_rate": 1.8036322214275303e-05, "loss": 0.6883, "step": 10942 }, { "epoch": 1.786416880943635, "grad_norm": 2.3384933471679688, "learning_rate": 1.8035940637706793e-05, "loss": 0.5769, "step": 10943 }, { "epoch": 1.7865801395861394, "grad_norm": 2.5712435245513916, "learning_rate": 1.8035559028105568e-05, "loss": 0.6956, "step": 10944 }, { "epoch": 1.7867433982286438, "grad_norm": 1.9630342721939087, "learning_rate": 1.80351773854732e-05, "loss": 0.5367, "step": 10945 }, { "epoch": 1.7869066568711482, "grad_norm": 2.8978090286254883, "learning_rate": 1.803479570981124e-05, "loss": 0.8132, "step": 10946 }, { "epoch": 1.7870699155136525, "grad_norm": 2.4580438137054443, "learning_rate": 1.8034414001121278e-05, "loss": 0.6612, "step": 10947 }, { "epoch": 1.787233174156157, "grad_norm": 2.6433420181274414, "learning_rate": 1.803403225940487e-05, "loss": 0.614, "step": 10948 }, { "epoch": 1.7873964327986611, "grad_norm": 2.946260690689087, "learning_rate": 1.803365048466359e-05, "loss": 0.8504, "step": 10949 }, { "epoch": 1.7875596914411656, "grad_norm": 2.357917308807373, "learning_rate": 1.803326867689901e-05, "loss": 0.5996, "step": 10950 }, { "epoch": 1.78772295008367, "grad_norm": 3.0208961963653564, "learning_rate": 1.803288683611269e-05, "loss": 0.6875, "step": 10951 }, { "epoch": 1.7878862087261744, "grad_norm": 2.7043089866638184, "learning_rate": 1.8032504962306205e-05, "loss": 0.6599, "step": 10952 }, { "epoch": 1.7880494673686789, "grad_norm": 2.9827632904052734, "learning_rate": 1.8032123055481128e-05, "loss": 0.7664, "step": 10953 }, { "epoch": 1.7882127260111833, "grad_norm": 3.199084520339966, "learning_rate": 1.8031741115639026e-05, "loss": 0.8374, "step": 10954 }, { "epoch": 1.7883759846536877, "grad_norm": 2.648078441619873, "learning_rate": 1.8031359142781467e-05, "loss": 0.6172, "step": 10955 }, { "epoch": 1.788539243296192, "grad_norm": 2.602015495300293, "learning_rate": 1.8030977136910022e-05, "loss": 0.7276, "step": 10956 }, { "epoch": 1.7887025019386964, "grad_norm": 3.1504087448120117, "learning_rate": 1.8030595098026265e-05, "loss": 0.6702, "step": 10957 }, { "epoch": 1.7888657605812006, "grad_norm": 2.6063194274902344, "learning_rate": 1.8030213026131767e-05, "loss": 0.7277, "step": 10958 }, { "epoch": 1.789029019223705, "grad_norm": 2.3992271423339844, "learning_rate": 1.802983092122809e-05, "loss": 0.5929, "step": 10959 }, { "epoch": 1.7891922778662095, "grad_norm": 3.1890928745269775, "learning_rate": 1.8029448783316813e-05, "loss": 0.8417, "step": 10960 }, { "epoch": 1.789355536508714, "grad_norm": 2.607588768005371, "learning_rate": 1.8029066612399502e-05, "loss": 0.7, "step": 10961 }, { "epoch": 1.7895187951512184, "grad_norm": 2.5989134311676025, "learning_rate": 1.802868440847773e-05, "loss": 0.6726, "step": 10962 }, { "epoch": 1.7896820537937228, "grad_norm": 2.875173568725586, "learning_rate": 1.8028302171553073e-05, "loss": 0.7063, "step": 10963 }, { "epoch": 1.7898453124362272, "grad_norm": 2.602008581161499, "learning_rate": 1.802791990162709e-05, "loss": 0.6724, "step": 10964 }, { "epoch": 1.7900085710787315, "grad_norm": 2.2751598358154297, "learning_rate": 1.8027537598701364e-05, "loss": 0.6029, "step": 10965 }, { "epoch": 1.790171829721236, "grad_norm": 2.5121381282806396, "learning_rate": 1.802715526277746e-05, "loss": 0.7182, "step": 10966 }, { "epoch": 1.7903350883637401, "grad_norm": 2.6718947887420654, "learning_rate": 1.8026772893856954e-05, "loss": 0.7056, "step": 10967 }, { "epoch": 1.7904983470062446, "grad_norm": 2.405573844909668, "learning_rate": 1.8026390491941415e-05, "loss": 0.6996, "step": 10968 }, { "epoch": 1.790661605648749, "grad_norm": 2.9404101371765137, "learning_rate": 1.802600805703241e-05, "loss": 0.7541, "step": 10969 }, { "epoch": 1.7908248642912534, "grad_norm": 2.9903483390808105, "learning_rate": 1.8025625589131524e-05, "loss": 0.7277, "step": 10970 }, { "epoch": 1.7909881229337579, "grad_norm": 2.244567632675171, "learning_rate": 1.8025243088240317e-05, "loss": 0.5039, "step": 10971 }, { "epoch": 1.7911513815762623, "grad_norm": 2.2101900577545166, "learning_rate": 1.802486055436037e-05, "loss": 0.5974, "step": 10972 }, { "epoch": 1.7913146402187667, "grad_norm": 2.586977005004883, "learning_rate": 1.802447798749325e-05, "loss": 0.6347, "step": 10973 }, { "epoch": 1.791477898861271, "grad_norm": 3.103623390197754, "learning_rate": 1.8024095387640527e-05, "loss": 0.7082, "step": 10974 }, { "epoch": 1.7916411575037754, "grad_norm": 2.5228519439697266, "learning_rate": 1.8023712754803783e-05, "loss": 0.6217, "step": 10975 }, { "epoch": 1.7918044161462796, "grad_norm": 2.3435702323913574, "learning_rate": 1.802333008898458e-05, "loss": 0.5442, "step": 10976 }, { "epoch": 1.791967674788784, "grad_norm": 2.1232693195343018, "learning_rate": 1.80229473901845e-05, "loss": 0.5597, "step": 10977 }, { "epoch": 1.7921309334312885, "grad_norm": 2.9474618434906006, "learning_rate": 1.8022564658405114e-05, "loss": 0.8115, "step": 10978 }, { "epoch": 1.792294192073793, "grad_norm": 2.6968021392822266, "learning_rate": 1.8022181893647996e-05, "loss": 0.6426, "step": 10979 }, { "epoch": 1.7924574507162974, "grad_norm": 2.356992483139038, "learning_rate": 1.802179909591471e-05, "loss": 0.6296, "step": 10980 }, { "epoch": 1.7926207093588018, "grad_norm": 2.3717684745788574, "learning_rate": 1.8021416265206843e-05, "loss": 0.6174, "step": 10981 }, { "epoch": 1.792783968001306, "grad_norm": 2.7185933589935303, "learning_rate": 1.802103340152596e-05, "loss": 0.7152, "step": 10982 }, { "epoch": 1.7929472266438105, "grad_norm": 2.567230224609375, "learning_rate": 1.8020650504873634e-05, "loss": 0.655, "step": 10983 }, { "epoch": 1.793110485286315, "grad_norm": 2.3909974098205566, "learning_rate": 1.8020267575251446e-05, "loss": 0.5321, "step": 10984 }, { "epoch": 1.793273743928819, "grad_norm": 2.911884069442749, "learning_rate": 1.8019884612660967e-05, "loss": 0.7836, "step": 10985 }, { "epoch": 1.7934370025713235, "grad_norm": 2.544603109359741, "learning_rate": 1.801950161710377e-05, "loss": 0.6045, "step": 10986 }, { "epoch": 1.793600261213828, "grad_norm": 2.3580985069274902, "learning_rate": 1.801911858858143e-05, "loss": 0.5491, "step": 10987 }, { "epoch": 1.7937635198563324, "grad_norm": 2.8026788234710693, "learning_rate": 1.801873552709552e-05, "loss": 0.5797, "step": 10988 }, { "epoch": 1.7939267784988369, "grad_norm": 2.867023468017578, "learning_rate": 1.801835243264762e-05, "loss": 0.6665, "step": 10989 }, { "epoch": 1.7940900371413413, "grad_norm": 2.6193618774414062, "learning_rate": 1.8017969305239298e-05, "loss": 0.6053, "step": 10990 }, { "epoch": 1.7942532957838455, "grad_norm": 2.340510845184326, "learning_rate": 1.801758614487213e-05, "loss": 0.5488, "step": 10991 }, { "epoch": 1.79441655442635, "grad_norm": 2.6656785011291504, "learning_rate": 1.8017202951547695e-05, "loss": 0.5707, "step": 10992 }, { "epoch": 1.7945798130688542, "grad_norm": 2.7485177516937256, "learning_rate": 1.8016819725267565e-05, "loss": 0.6946, "step": 10993 }, { "epoch": 1.7947430717113586, "grad_norm": 3.2363626956939697, "learning_rate": 1.8016436466033317e-05, "loss": 0.7816, "step": 10994 }, { "epoch": 1.794906330353863, "grad_norm": 3.1671578884124756, "learning_rate": 1.801605317384653e-05, "loss": 0.8174, "step": 10995 }, { "epoch": 1.7950695889963675, "grad_norm": 2.924758195877075, "learning_rate": 1.8015669848708768e-05, "loss": 0.7497, "step": 10996 }, { "epoch": 1.795232847638872, "grad_norm": 2.1708052158355713, "learning_rate": 1.8015286490621617e-05, "loss": 0.4959, "step": 10997 }, { "epoch": 1.7953961062813764, "grad_norm": 2.4495902061462402, "learning_rate": 1.801490309958665e-05, "loss": 0.7082, "step": 10998 }, { "epoch": 1.7955593649238808, "grad_norm": 2.6572999954223633, "learning_rate": 1.801451967560544e-05, "loss": 0.7328, "step": 10999 }, { "epoch": 1.795722623566385, "grad_norm": 2.9887843132019043, "learning_rate": 1.8014136218679566e-05, "loss": 1.4217, "step": 11000 }, { "epoch": 1.7958858822088895, "grad_norm": 2.802950382232666, "learning_rate": 1.8013752728810606e-05, "loss": 0.696, "step": 11001 }, { "epoch": 1.7960491408513937, "grad_norm": 2.6729464530944824, "learning_rate": 1.801336920600014e-05, "loss": 0.6891, "step": 11002 }, { "epoch": 1.796212399493898, "grad_norm": 2.274736166000366, "learning_rate": 1.801298565024973e-05, "loss": 0.6625, "step": 11003 }, { "epoch": 1.7963756581364025, "grad_norm": 2.927856922149658, "learning_rate": 1.8012602061560965e-05, "loss": 0.7181, "step": 11004 }, { "epoch": 1.796538916778907, "grad_norm": 3.3696584701538086, "learning_rate": 1.8012218439935417e-05, "loss": 0.5137, "step": 11005 }, { "epoch": 1.7967021754214114, "grad_norm": 2.3409078121185303, "learning_rate": 1.8011834785374667e-05, "loss": 0.5685, "step": 11006 }, { "epoch": 1.7968654340639159, "grad_norm": 2.549712896347046, "learning_rate": 1.801145109788029e-05, "loss": 0.6573, "step": 11007 }, { "epoch": 1.7970286927064203, "grad_norm": 2.7131714820861816, "learning_rate": 1.801106737745386e-05, "loss": 0.7263, "step": 11008 }, { "epoch": 1.7971919513489245, "grad_norm": 2.922346830368042, "learning_rate": 1.801068362409696e-05, "loss": 0.7001, "step": 11009 }, { "epoch": 1.797355209991429, "grad_norm": 3.029648780822754, "learning_rate": 1.801029983781116e-05, "loss": 0.7083, "step": 11010 }, { "epoch": 1.7975184686339332, "grad_norm": 2.472928762435913, "learning_rate": 1.8009916018598047e-05, "loss": 0.6356, "step": 11011 }, { "epoch": 1.7976817272764376, "grad_norm": 2.7225399017333984, "learning_rate": 1.8009532166459188e-05, "loss": 0.6183, "step": 11012 }, { "epoch": 1.797844985918942, "grad_norm": 3.1300852298736572, "learning_rate": 1.8009148281396168e-05, "loss": 0.6928, "step": 11013 }, { "epoch": 1.7980082445614465, "grad_norm": 2.9812662601470947, "learning_rate": 1.800876436341057e-05, "loss": 0.7268, "step": 11014 }, { "epoch": 1.798171503203951, "grad_norm": 2.297525644302368, "learning_rate": 1.8008380412503958e-05, "loss": 0.5254, "step": 11015 }, { "epoch": 1.7983347618464554, "grad_norm": 3.154414415359497, "learning_rate": 1.800799642867792e-05, "loss": 0.5778, "step": 11016 }, { "epoch": 1.7984980204889598, "grad_norm": 2.770193099975586, "learning_rate": 1.8007612411934033e-05, "loss": 0.7641, "step": 11017 }, { "epoch": 1.798661279131464, "grad_norm": 2.3011980056762695, "learning_rate": 1.800722836227388e-05, "loss": 0.5305, "step": 11018 }, { "epoch": 1.7988245377739684, "grad_norm": 2.214940309524536, "learning_rate": 1.8006844279699026e-05, "loss": 0.4632, "step": 11019 }, { "epoch": 1.7989877964164727, "grad_norm": 2.6465916633605957, "learning_rate": 1.8006460164211064e-05, "loss": 0.6292, "step": 11020 }, { "epoch": 1.799151055058977, "grad_norm": 2.8232920169830322, "learning_rate": 1.8006076015811565e-05, "loss": 0.6963, "step": 11021 }, { "epoch": 1.7993143137014815, "grad_norm": 2.1729893684387207, "learning_rate": 1.8005691834502113e-05, "loss": 0.5425, "step": 11022 }, { "epoch": 1.799477572343986, "grad_norm": 2.6432321071624756, "learning_rate": 1.800530762028428e-05, "loss": 0.6534, "step": 11023 }, { "epoch": 1.7996408309864904, "grad_norm": 2.86683988571167, "learning_rate": 1.8004923373159657e-05, "loss": 0.7225, "step": 11024 }, { "epoch": 1.7998040896289949, "grad_norm": 2.5954277515411377, "learning_rate": 1.8004539093129812e-05, "loss": 0.5891, "step": 11025 }, { "epoch": 1.799967348271499, "grad_norm": 3.0920510292053223, "learning_rate": 1.800415478019633e-05, "loss": 0.7474, "step": 11026 }, { "epoch": 1.8001306069140035, "grad_norm": 2.4841487407684326, "learning_rate": 1.8003770434360792e-05, "loss": 0.5474, "step": 11027 }, { "epoch": 1.800293865556508, "grad_norm": 2.4891366958618164, "learning_rate": 1.8003386055624775e-05, "loss": 0.6226, "step": 11028 }, { "epoch": 1.8004571241990122, "grad_norm": 3.1860508918762207, "learning_rate": 1.8003001643989856e-05, "loss": 0.6442, "step": 11029 }, { "epoch": 1.8006203828415166, "grad_norm": 2.3544585704803467, "learning_rate": 1.8002617199457627e-05, "loss": 0.6154, "step": 11030 }, { "epoch": 1.800783641484021, "grad_norm": 2.630864381790161, "learning_rate": 1.8002232722029654e-05, "loss": 0.74, "step": 11031 }, { "epoch": 1.8009469001265255, "grad_norm": 2.790964126586914, "learning_rate": 1.8001848211707526e-05, "loss": 0.7262, "step": 11032 }, { "epoch": 1.80111015876903, "grad_norm": 2.345942974090576, "learning_rate": 1.800146366849282e-05, "loss": 0.6236, "step": 11033 }, { "epoch": 1.8012734174115343, "grad_norm": 2.516511917114258, "learning_rate": 1.8001079092387122e-05, "loss": 0.6264, "step": 11034 }, { "epoch": 1.8014366760540386, "grad_norm": 2.868854522705078, "learning_rate": 1.8000694483392e-05, "loss": 0.7898, "step": 11035 }, { "epoch": 1.801599934696543, "grad_norm": 2.7955734729766846, "learning_rate": 1.8000309841509055e-05, "loss": 0.7094, "step": 11036 }, { "epoch": 1.8017631933390472, "grad_norm": 2.7084970474243164, "learning_rate": 1.7999925166739854e-05, "loss": 0.679, "step": 11037 }, { "epoch": 1.8019264519815517, "grad_norm": 2.5431442260742188, "learning_rate": 1.799954045908598e-05, "loss": 0.6363, "step": 11038 }, { "epoch": 1.802089710624056, "grad_norm": 4.431112289428711, "learning_rate": 1.7999155718549012e-05, "loss": 0.7005, "step": 11039 }, { "epoch": 1.8022529692665605, "grad_norm": 2.5727972984313965, "learning_rate": 1.7998770945130542e-05, "loss": 0.6751, "step": 11040 }, { "epoch": 1.802416227909065, "grad_norm": 2.468172788619995, "learning_rate": 1.799838613883214e-05, "loss": 0.5972, "step": 11041 }, { "epoch": 1.8025794865515694, "grad_norm": 2.3970460891723633, "learning_rate": 1.7998001299655397e-05, "loss": 0.5772, "step": 11042 }, { "epoch": 1.8027427451940738, "grad_norm": 2.25048565864563, "learning_rate": 1.7997616427601885e-05, "loss": 0.5423, "step": 11043 }, { "epoch": 1.802906003836578, "grad_norm": 2.6209278106689453, "learning_rate": 1.7997231522673196e-05, "loss": 0.7201, "step": 11044 }, { "epoch": 1.8030692624790825, "grad_norm": 2.3853259086608887, "learning_rate": 1.799684658487091e-05, "loss": 0.6024, "step": 11045 }, { "epoch": 1.8032325211215867, "grad_norm": 2.8058080673217773, "learning_rate": 1.79964616141966e-05, "loss": 0.7257, "step": 11046 }, { "epoch": 1.8033957797640912, "grad_norm": 3.203246831893921, "learning_rate": 1.799607661065186e-05, "loss": 0.7684, "step": 11047 }, { "epoch": 1.8035590384065956, "grad_norm": 2.909680128097534, "learning_rate": 1.799569157423827e-05, "loss": 0.6556, "step": 11048 }, { "epoch": 1.8037222970491, "grad_norm": 3.1485424041748047, "learning_rate": 1.7995306504957407e-05, "loss": 0.877, "step": 11049 }, { "epoch": 1.8038855556916045, "grad_norm": 2.5037972927093506, "learning_rate": 1.7994921402810862e-05, "loss": 0.6393, "step": 11050 }, { "epoch": 1.804048814334109, "grad_norm": 2.5890297889709473, "learning_rate": 1.799453626780021e-05, "loss": 0.7249, "step": 11051 }, { "epoch": 1.8042120729766133, "grad_norm": 2.647909641265869, "learning_rate": 1.7994151099927042e-05, "loss": 0.6936, "step": 11052 }, { "epoch": 1.8043753316191176, "grad_norm": 2.0694475173950195, "learning_rate": 1.7993765899192936e-05, "loss": 0.4677, "step": 11053 }, { "epoch": 1.804538590261622, "grad_norm": 3.07737398147583, "learning_rate": 1.7993380665599477e-05, "loss": 0.719, "step": 11054 }, { "epoch": 1.8047018489041262, "grad_norm": 3.129343032836914, "learning_rate": 1.7992995399148245e-05, "loss": 0.8185, "step": 11055 }, { "epoch": 1.8048651075466307, "grad_norm": 2.7886195182800293, "learning_rate": 1.799261009984083e-05, "loss": 0.6192, "step": 11056 }, { "epoch": 1.805028366189135, "grad_norm": 2.795182466506958, "learning_rate": 1.799222476767881e-05, "loss": 0.7789, "step": 11057 }, { "epoch": 1.8051916248316395, "grad_norm": 2.7057137489318848, "learning_rate": 1.7991839402663776e-05, "loss": 0.6658, "step": 11058 }, { "epoch": 1.805354883474144, "grad_norm": 2.57523512840271, "learning_rate": 1.7991454004797306e-05, "loss": 0.646, "step": 11059 }, { "epoch": 1.8055181421166484, "grad_norm": 2.6229629516601562, "learning_rate": 1.799106857408099e-05, "loss": 0.6919, "step": 11060 }, { "epoch": 1.8056814007591528, "grad_norm": 2.872715950012207, "learning_rate": 1.7990683110516404e-05, "loss": 0.6077, "step": 11061 }, { "epoch": 1.805844659401657, "grad_norm": 3.1048483848571777, "learning_rate": 1.7990297614105136e-05, "loss": 0.7511, "step": 11062 }, { "epoch": 1.8060079180441615, "grad_norm": 3.1843161582946777, "learning_rate": 1.7989912084848772e-05, "loss": 0.9124, "step": 11063 }, { "epoch": 1.8061711766866657, "grad_norm": 2.8689587116241455, "learning_rate": 1.7989526522748896e-05, "loss": 0.7845, "step": 11064 }, { "epoch": 1.8063344353291702, "grad_norm": 2.5204684734344482, "learning_rate": 1.79891409278071e-05, "loss": 0.6321, "step": 11065 }, { "epoch": 1.8064976939716746, "grad_norm": 2.856515884399414, "learning_rate": 1.7988755300024955e-05, "loss": 0.5743, "step": 11066 }, { "epoch": 1.806660952614179, "grad_norm": 2.659907102584839, "learning_rate": 1.7988369639404054e-05, "loss": 0.6235, "step": 11067 }, { "epoch": 1.8068242112566835, "grad_norm": 2.9247570037841797, "learning_rate": 1.7987983945945983e-05, "loss": 0.6942, "step": 11068 }, { "epoch": 1.806987469899188, "grad_norm": 2.6649603843688965, "learning_rate": 1.7987598219652328e-05, "loss": 0.7168, "step": 11069 }, { "epoch": 1.8071507285416923, "grad_norm": 3.181313991546631, "learning_rate": 1.798721246052467e-05, "loss": 0.8689, "step": 11070 }, { "epoch": 1.8073139871841966, "grad_norm": 3.409605026245117, "learning_rate": 1.79868266685646e-05, "loss": 0.61, "step": 11071 }, { "epoch": 1.807477245826701, "grad_norm": 2.439100980758667, "learning_rate": 1.79864408437737e-05, "loss": 0.632, "step": 11072 }, { "epoch": 1.8076405044692052, "grad_norm": 2.8688976764678955, "learning_rate": 1.7986054986153557e-05, "loss": 0.7238, "step": 11073 }, { "epoch": 1.8078037631117096, "grad_norm": 2.9494426250457764, "learning_rate": 1.798566909570576e-05, "loss": 0.7926, "step": 11074 }, { "epoch": 1.807967021754214, "grad_norm": 2.3386855125427246, "learning_rate": 1.798528317243189e-05, "loss": 0.6098, "step": 11075 }, { "epoch": 1.8081302803967185, "grad_norm": 2.8036916255950928, "learning_rate": 1.7984897216333536e-05, "loss": 0.7492, "step": 11076 }, { "epoch": 1.808293539039223, "grad_norm": 2.8011314868927, "learning_rate": 1.7984511227412284e-05, "loss": 0.6768, "step": 11077 }, { "epoch": 1.8084567976817274, "grad_norm": 2.6164088249206543, "learning_rate": 1.7984125205669725e-05, "loss": 0.6309, "step": 11078 }, { "epoch": 1.8086200563242316, "grad_norm": 2.660437822341919, "learning_rate": 1.7983739151107437e-05, "loss": 0.6789, "step": 11079 }, { "epoch": 1.808783314966736, "grad_norm": 2.523172378540039, "learning_rate": 1.7983353063727014e-05, "loss": 0.6291, "step": 11080 }, { "epoch": 1.8089465736092405, "grad_norm": 2.238722085952759, "learning_rate": 1.798296694353004e-05, "loss": 0.6199, "step": 11081 }, { "epoch": 1.8091098322517447, "grad_norm": 2.9799740314483643, "learning_rate": 1.7982580790518108e-05, "loss": 0.8192, "step": 11082 }, { "epoch": 1.8092730908942491, "grad_norm": 2.62701153755188, "learning_rate": 1.7982194604692798e-05, "loss": 0.7489, "step": 11083 }, { "epoch": 1.8094363495367536, "grad_norm": 2.40544056892395, "learning_rate": 1.79818083860557e-05, "loss": 0.6218, "step": 11084 }, { "epoch": 1.809599608179258, "grad_norm": 2.3113489151000977, "learning_rate": 1.7981422134608403e-05, "loss": 0.6009, "step": 11085 }, { "epoch": 1.8097628668217625, "grad_norm": 2.6209495067596436, "learning_rate": 1.7981035850352488e-05, "loss": 0.6602, "step": 11086 }, { "epoch": 1.809926125464267, "grad_norm": 2.381103992462158, "learning_rate": 1.798064953328955e-05, "loss": 0.6278, "step": 11087 }, { "epoch": 1.8100893841067711, "grad_norm": 2.472977876663208, "learning_rate": 1.798026318342118e-05, "loss": 0.7105, "step": 11088 }, { "epoch": 1.8102526427492756, "grad_norm": 2.567157030105591, "learning_rate": 1.7979876800748958e-05, "loss": 0.6707, "step": 11089 }, { "epoch": 1.8104159013917798, "grad_norm": 2.4864501953125, "learning_rate": 1.7979490385274473e-05, "loss": 0.5881, "step": 11090 }, { "epoch": 1.8105791600342842, "grad_norm": 3.0138676166534424, "learning_rate": 1.797910393699932e-05, "loss": 0.7975, "step": 11091 }, { "epoch": 1.8107424186767886, "grad_norm": 2.442289113998413, "learning_rate": 1.7978717455925083e-05, "loss": 0.6177, "step": 11092 }, { "epoch": 1.810905677319293, "grad_norm": 3.032517433166504, "learning_rate": 1.797833094205335e-05, "loss": 0.702, "step": 11093 }, { "epoch": 1.8110689359617975, "grad_norm": 2.860154628753662, "learning_rate": 1.7977944395385713e-05, "loss": 0.6271, "step": 11094 }, { "epoch": 1.811232194604302, "grad_norm": 3.0691733360290527, "learning_rate": 1.7977557815923756e-05, "loss": 0.6619, "step": 11095 }, { "epoch": 1.8113954532468064, "grad_norm": 2.633286952972412, "learning_rate": 1.7977171203669074e-05, "loss": 0.6549, "step": 11096 }, { "epoch": 1.8115587118893106, "grad_norm": 2.2744295597076416, "learning_rate": 1.7976784558623253e-05, "loss": 0.534, "step": 11097 }, { "epoch": 1.811721970531815, "grad_norm": 2.848240852355957, "learning_rate": 1.797639788078788e-05, "loss": 0.6101, "step": 11098 }, { "epoch": 1.8118852291743193, "grad_norm": 3.17600679397583, "learning_rate": 1.797601117016455e-05, "loss": 0.6945, "step": 11099 }, { "epoch": 1.8120484878168237, "grad_norm": 2.6150200366973877, "learning_rate": 1.7975624426754847e-05, "loss": 0.6493, "step": 11100 }, { "epoch": 1.8122117464593281, "grad_norm": 2.8791697025299072, "learning_rate": 1.7975237650560363e-05, "loss": 0.6545, "step": 11101 }, { "epoch": 1.8123750051018326, "grad_norm": 2.8469302654266357, "learning_rate": 1.7974850841582693e-05, "loss": 0.7107, "step": 11102 }, { "epoch": 1.812538263744337, "grad_norm": 2.305812120437622, "learning_rate": 1.797446399982342e-05, "loss": 0.6207, "step": 11103 }, { "epoch": 1.8127015223868415, "grad_norm": 2.6254773139953613, "learning_rate": 1.7974077125284137e-05, "loss": 0.5642, "step": 11104 }, { "epoch": 1.812864781029346, "grad_norm": 2.206228017807007, "learning_rate": 1.7973690217966432e-05, "loss": 0.4302, "step": 11105 }, { "epoch": 1.8130280396718501, "grad_norm": 3.0599265098571777, "learning_rate": 1.79733032778719e-05, "loss": 0.7216, "step": 11106 }, { "epoch": 1.8131912983143545, "grad_norm": 2.2995662689208984, "learning_rate": 1.7972916305002127e-05, "loss": 0.6329, "step": 11107 }, { "epoch": 1.8133545569568588, "grad_norm": 3.2592639923095703, "learning_rate": 1.7972529299358707e-05, "loss": 0.8152, "step": 11108 }, { "epoch": 1.8135178155993632, "grad_norm": 2.9323086738586426, "learning_rate": 1.797214226094323e-05, "loss": 0.7387, "step": 11109 }, { "epoch": 1.8136810742418676, "grad_norm": 2.5474607944488525, "learning_rate": 1.7971755189757284e-05, "loss": 0.5998, "step": 11110 }, { "epoch": 1.813844332884372, "grad_norm": 2.659512758255005, "learning_rate": 1.7971368085802464e-05, "loss": 0.7867, "step": 11111 }, { "epoch": 1.8140075915268765, "grad_norm": 2.472731113433838, "learning_rate": 1.797098094908036e-05, "loss": 0.6804, "step": 11112 }, { "epoch": 1.814170850169381, "grad_norm": 2.9906091690063477, "learning_rate": 1.797059377959256e-05, "loss": 0.7652, "step": 11113 }, { "epoch": 1.8143341088118854, "grad_norm": 2.7502260208129883, "learning_rate": 1.7970206577340665e-05, "loss": 0.7615, "step": 11114 }, { "epoch": 1.8144973674543896, "grad_norm": 2.573897123336792, "learning_rate": 1.7969819342326252e-05, "loss": 0.6101, "step": 11115 }, { "epoch": 1.814660626096894, "grad_norm": 2.514094591140747, "learning_rate": 1.7969432074550926e-05, "loss": 0.5751, "step": 11116 }, { "epoch": 1.8148238847393983, "grad_norm": 2.746950387954712, "learning_rate": 1.7969044774016272e-05, "loss": 0.7535, "step": 11117 }, { "epoch": 1.8149871433819027, "grad_norm": 3.3745458126068115, "learning_rate": 1.7968657440723886e-05, "loss": 0.8545, "step": 11118 }, { "epoch": 1.8151504020244071, "grad_norm": 2.6330406665802, "learning_rate": 1.796827007467536e-05, "loss": 0.6078, "step": 11119 }, { "epoch": 1.8153136606669116, "grad_norm": 2.2417614459991455, "learning_rate": 1.796788267587228e-05, "loss": 0.5719, "step": 11120 }, { "epoch": 1.815476919309416, "grad_norm": 2.6550979614257812, "learning_rate": 1.7967495244316245e-05, "loss": 0.7185, "step": 11121 }, { "epoch": 1.8156401779519205, "grad_norm": 2.934705972671509, "learning_rate": 1.7967107780008848e-05, "loss": 0.7935, "step": 11122 }, { "epoch": 1.8158034365944247, "grad_norm": 2.4435718059539795, "learning_rate": 1.7966720282951675e-05, "loss": 0.6649, "step": 11123 }, { "epoch": 1.815966695236929, "grad_norm": 2.777608633041382, "learning_rate": 1.7966332753146327e-05, "loss": 0.7553, "step": 11124 }, { "epoch": 1.8161299538794335, "grad_norm": 2.464632749557495, "learning_rate": 1.796594519059439e-05, "loss": 0.5635, "step": 11125 }, { "epoch": 1.8162932125219378, "grad_norm": 2.8464417457580566, "learning_rate": 1.796555759529746e-05, "loss": 0.6615, "step": 11126 }, { "epoch": 1.8164564711644422, "grad_norm": 2.980410099029541, "learning_rate": 1.796516996725713e-05, "loss": 0.7925, "step": 11127 }, { "epoch": 1.8166197298069466, "grad_norm": 2.549622058868408, "learning_rate": 1.7964782306474998e-05, "loss": 0.595, "step": 11128 }, { "epoch": 1.816782988449451, "grad_norm": 2.6956779956817627, "learning_rate": 1.796439461295265e-05, "loss": 0.7387, "step": 11129 }, { "epoch": 1.8169462470919555, "grad_norm": 2.9896373748779297, "learning_rate": 1.7964006886691683e-05, "loss": 0.7224, "step": 11130 }, { "epoch": 1.81710950573446, "grad_norm": 2.61714768409729, "learning_rate": 1.7963619127693694e-05, "loss": 0.6871, "step": 11131 }, { "epoch": 1.8172727643769642, "grad_norm": 2.6176645755767822, "learning_rate": 1.796323133596027e-05, "loss": 0.6108, "step": 11132 }, { "epoch": 1.8174360230194686, "grad_norm": 2.34067702293396, "learning_rate": 1.796284351149301e-05, "loss": 0.6616, "step": 11133 }, { "epoch": 1.8175992816619728, "grad_norm": 2.590588092803955, "learning_rate": 1.7962455654293507e-05, "loss": 0.6882, "step": 11134 }, { "epoch": 1.8177625403044773, "grad_norm": 2.5646870136260986, "learning_rate": 1.7962067764363357e-05, "loss": 0.6681, "step": 11135 }, { "epoch": 1.8179257989469817, "grad_norm": 2.38187837600708, "learning_rate": 1.796167984170415e-05, "loss": 0.6313, "step": 11136 }, { "epoch": 1.8180890575894861, "grad_norm": 2.5638816356658936, "learning_rate": 1.7961291886317487e-05, "loss": 0.7057, "step": 11137 }, { "epoch": 1.8182523162319906, "grad_norm": 2.639411449432373, "learning_rate": 1.7960903898204956e-05, "loss": 0.7941, "step": 11138 }, { "epoch": 1.818415574874495, "grad_norm": 2.4709854125976562, "learning_rate": 1.7960515877368158e-05, "loss": 0.6771, "step": 11139 }, { "epoch": 1.8185788335169994, "grad_norm": 2.2792038917541504, "learning_rate": 1.796012782380868e-05, "loss": 0.5747, "step": 11140 }, { "epoch": 1.8187420921595037, "grad_norm": 2.5638391971588135, "learning_rate": 1.7959739737528128e-05, "loss": 0.5803, "step": 11141 }, { "epoch": 1.818905350802008, "grad_norm": 2.6874399185180664, "learning_rate": 1.795935161852809e-05, "loss": 0.6991, "step": 11142 }, { "epoch": 1.8190686094445123, "grad_norm": 2.6324119567871094, "learning_rate": 1.795896346681016e-05, "loss": 0.6725, "step": 11143 }, { "epoch": 1.8192318680870168, "grad_norm": 2.5868465900421143, "learning_rate": 1.795857528237594e-05, "loss": 0.6359, "step": 11144 }, { "epoch": 1.8193951267295212, "grad_norm": 3.2756662368774414, "learning_rate": 1.795818706522702e-05, "loss": 0.8045, "step": 11145 }, { "epoch": 1.8195583853720256, "grad_norm": 2.7129592895507812, "learning_rate": 1.7957798815364998e-05, "loss": 0.753, "step": 11146 }, { "epoch": 1.81972164401453, "grad_norm": 2.839277744293213, "learning_rate": 1.7957410532791472e-05, "loss": 0.749, "step": 11147 }, { "epoch": 1.8198849026570345, "grad_norm": 2.742070198059082, "learning_rate": 1.795702221750803e-05, "loss": 0.7463, "step": 11148 }, { "epoch": 1.820048161299539, "grad_norm": 3.345060348510742, "learning_rate": 1.795663386951628e-05, "loss": 0.9276, "step": 11149 }, { "epoch": 1.8202114199420432, "grad_norm": 2.6124308109283447, "learning_rate": 1.795624548881781e-05, "loss": 0.6343, "step": 11150 }, { "epoch": 1.8203746785845476, "grad_norm": 2.415097951889038, "learning_rate": 1.795585707541422e-05, "loss": 0.5531, "step": 11151 }, { "epoch": 1.8205379372270518, "grad_norm": 2.4495747089385986, "learning_rate": 1.795546862930711e-05, "loss": 0.6769, "step": 11152 }, { "epoch": 1.8207011958695563, "grad_norm": 2.568830966949463, "learning_rate": 1.7955080150498065e-05, "loss": 0.7169, "step": 11153 }, { "epoch": 1.8208644545120607, "grad_norm": 2.9729039669036865, "learning_rate": 1.7954691638988694e-05, "loss": 0.7872, "step": 11154 }, { "epoch": 1.8210277131545651, "grad_norm": 2.9865782260894775, "learning_rate": 1.7954303094780588e-05, "loss": 0.651, "step": 11155 }, { "epoch": 1.8211909717970696, "grad_norm": 2.330066680908203, "learning_rate": 1.7953914517875344e-05, "loss": 0.5855, "step": 11156 }, { "epoch": 1.821354230439574, "grad_norm": 2.813880443572998, "learning_rate": 1.7953525908274563e-05, "loss": 0.687, "step": 11157 }, { "epoch": 1.8215174890820784, "grad_norm": 2.531094551086426, "learning_rate": 1.795313726597984e-05, "loss": 0.706, "step": 11158 }, { "epoch": 1.8216807477245827, "grad_norm": 2.5862433910369873, "learning_rate": 1.795274859099277e-05, "loss": 0.6856, "step": 11159 }, { "epoch": 1.821844006367087, "grad_norm": 3.148869514465332, "learning_rate": 1.7952359883314955e-05, "loss": 0.7604, "step": 11160 }, { "epoch": 1.8220072650095913, "grad_norm": 2.5136799812316895, "learning_rate": 1.795197114294799e-05, "loss": 0.6625, "step": 11161 }, { "epoch": 1.8221705236520958, "grad_norm": 2.3972740173339844, "learning_rate": 1.7951582369893478e-05, "loss": 0.5803, "step": 11162 }, { "epoch": 1.8223337822946002, "grad_norm": 2.7972097396850586, "learning_rate": 1.7951193564153012e-05, "loss": 0.6382, "step": 11163 }, { "epoch": 1.8224970409371046, "grad_norm": 2.812549591064453, "learning_rate": 1.795080472572819e-05, "loss": 0.6667, "step": 11164 }, { "epoch": 1.822660299579609, "grad_norm": 2.6263818740844727, "learning_rate": 1.7950415854620612e-05, "loss": 0.6398, "step": 11165 }, { "epoch": 1.8228235582221135, "grad_norm": 2.7110087871551514, "learning_rate": 1.795002695083188e-05, "loss": 0.6487, "step": 11166 }, { "epoch": 1.8229868168646177, "grad_norm": 2.7066164016723633, "learning_rate": 1.7949638014363584e-05, "loss": 0.7686, "step": 11167 }, { "epoch": 1.8231500755071222, "grad_norm": 2.5873348712921143, "learning_rate": 1.794924904521733e-05, "loss": 0.7735, "step": 11168 }, { "epoch": 1.8233133341496266, "grad_norm": 3.2211484909057617, "learning_rate": 1.7948860043394715e-05, "loss": 0.8791, "step": 11169 }, { "epoch": 1.8234765927921308, "grad_norm": 2.965590476989746, "learning_rate": 1.794847100889734e-05, "loss": 0.6736, "step": 11170 }, { "epoch": 1.8236398514346353, "grad_norm": 2.548043966293335, "learning_rate": 1.7948081941726796e-05, "loss": 0.679, "step": 11171 }, { "epoch": 1.8238031100771397, "grad_norm": 2.0163090229034424, "learning_rate": 1.7947692841884694e-05, "loss": 0.5055, "step": 11172 }, { "epoch": 1.8239663687196441, "grad_norm": 2.649075746536255, "learning_rate": 1.7947303709372622e-05, "loss": 0.6894, "step": 11173 }, { "epoch": 1.8241296273621486, "grad_norm": 2.495842218399048, "learning_rate": 1.794691454419219e-05, "loss": 0.6834, "step": 11174 }, { "epoch": 1.824292886004653, "grad_norm": 2.782982587814331, "learning_rate": 1.7946525346344993e-05, "loss": 0.6304, "step": 11175 }, { "epoch": 1.8244561446471572, "grad_norm": 3.3407418727874756, "learning_rate": 1.7946136115832632e-05, "loss": 0.8508, "step": 11176 }, { "epoch": 1.8246194032896617, "grad_norm": 2.8442912101745605, "learning_rate": 1.7945746852656703e-05, "loss": 0.759, "step": 11177 }, { "epoch": 1.8247826619321659, "grad_norm": 2.3303678035736084, "learning_rate": 1.794535755681881e-05, "loss": 0.5442, "step": 11178 }, { "epoch": 1.8249459205746703, "grad_norm": 2.356363296508789, "learning_rate": 1.794496822832055e-05, "loss": 0.5718, "step": 11179 }, { "epoch": 1.8251091792171747, "grad_norm": 3.147785186767578, "learning_rate": 1.794457886716353e-05, "loss": 0.7317, "step": 11180 }, { "epoch": 1.8252724378596792, "grad_norm": 3.012359380722046, "learning_rate": 1.794418947334934e-05, "loss": 0.807, "step": 11181 }, { "epoch": 1.8254356965021836, "grad_norm": 2.4511773586273193, "learning_rate": 1.7943800046879593e-05, "loss": 0.5723, "step": 11182 }, { "epoch": 1.825598955144688, "grad_norm": 2.405163526535034, "learning_rate": 1.794341058775588e-05, "loss": 0.6223, "step": 11183 }, { "epoch": 1.8257622137871925, "grad_norm": 2.5518887042999268, "learning_rate": 1.7943021095979805e-05, "loss": 0.6948, "step": 11184 }, { "epoch": 1.8259254724296967, "grad_norm": 2.835186243057251, "learning_rate": 1.7942631571552972e-05, "loss": 0.7648, "step": 11185 }, { "epoch": 1.8260887310722012, "grad_norm": 2.787708044052124, "learning_rate": 1.794224201447698e-05, "loss": 1.0604, "step": 11186 }, { "epoch": 1.8262519897147054, "grad_norm": 3.1421871185302734, "learning_rate": 1.7941852424753427e-05, "loss": 0.7963, "step": 11187 }, { "epoch": 1.8264152483572098, "grad_norm": 1.906891942024231, "learning_rate": 1.7941462802383916e-05, "loss": 0.4473, "step": 11188 }, { "epoch": 1.8265785069997142, "grad_norm": 2.7799315452575684, "learning_rate": 1.7941073147370054e-05, "loss": 0.682, "step": 11189 }, { "epoch": 1.8267417656422187, "grad_norm": 2.7958731651306152, "learning_rate": 1.794068345971344e-05, "loss": 0.7213, "step": 11190 }, { "epoch": 1.8269050242847231, "grad_norm": 3.0752177238464355, "learning_rate": 1.7940293739415672e-05, "loss": 0.7492, "step": 11191 }, { "epoch": 1.8270682829272276, "grad_norm": 2.9796535968780518, "learning_rate": 1.7939903986478354e-05, "loss": 0.7233, "step": 11192 }, { "epoch": 1.827231541569732, "grad_norm": 2.7075867652893066, "learning_rate": 1.793951420090309e-05, "loss": 0.6808, "step": 11193 }, { "epoch": 1.8273948002122362, "grad_norm": 2.447194814682007, "learning_rate": 1.7939124382691483e-05, "loss": 0.6473, "step": 11194 }, { "epoch": 1.8275580588547407, "grad_norm": 2.6527180671691895, "learning_rate": 1.793873453184513e-05, "loss": 0.5771, "step": 11195 }, { "epoch": 1.8277213174972449, "grad_norm": 2.674363374710083, "learning_rate": 1.793834464836564e-05, "loss": 0.7075, "step": 11196 }, { "epoch": 1.8278845761397493, "grad_norm": 2.8311357498168945, "learning_rate": 1.793795473225461e-05, "loss": 0.6041, "step": 11197 }, { "epoch": 1.8280478347822537, "grad_norm": 2.3454947471618652, "learning_rate": 1.7937564783513646e-05, "loss": 0.6139, "step": 11198 }, { "epoch": 1.8282110934247582, "grad_norm": 2.9996955394744873, "learning_rate": 1.7937174802144352e-05, "loss": 0.6901, "step": 11199 }, { "epoch": 1.8283743520672626, "grad_norm": 2.9717156887054443, "learning_rate": 1.793678478814833e-05, "loss": 0.6705, "step": 11200 }, { "epoch": 1.828537610709767, "grad_norm": 3.3923628330230713, "learning_rate": 1.7936394741527177e-05, "loss": 0.7747, "step": 11201 }, { "epoch": 1.8287008693522715, "grad_norm": 3.1777780055999756, "learning_rate": 1.7936004662282505e-05, "loss": 0.7396, "step": 11202 }, { "epoch": 1.8288641279947757, "grad_norm": 3.076955556869507, "learning_rate": 1.7935614550415917e-05, "loss": 0.6886, "step": 11203 }, { "epoch": 1.8290273866372802, "grad_norm": 2.86271071434021, "learning_rate": 1.7935224405929013e-05, "loss": 0.6563, "step": 11204 }, { "epoch": 1.8291906452797844, "grad_norm": 2.836228609085083, "learning_rate": 1.7934834228823396e-05, "loss": 0.5949, "step": 11205 }, { "epoch": 1.8293539039222888, "grad_norm": 2.738938570022583, "learning_rate": 1.7934444019100674e-05, "loss": 0.7648, "step": 11206 }, { "epoch": 1.8295171625647932, "grad_norm": 2.6640636920928955, "learning_rate": 1.7934053776762447e-05, "loss": 0.6098, "step": 11207 }, { "epoch": 1.8296804212072977, "grad_norm": 2.502089023590088, "learning_rate": 1.7933663501810317e-05, "loss": 0.6435, "step": 11208 }, { "epoch": 1.8298436798498021, "grad_norm": 3.025938034057617, "learning_rate": 1.7933273194245898e-05, "loss": 0.7051, "step": 11209 }, { "epoch": 1.8300069384923066, "grad_norm": 3.180481195449829, "learning_rate": 1.7932882854070785e-05, "loss": 0.6746, "step": 11210 }, { "epoch": 1.8301701971348108, "grad_norm": 3.18444561958313, "learning_rate": 1.7932492481286587e-05, "loss": 0.7647, "step": 11211 }, { "epoch": 1.8303334557773152, "grad_norm": 3.1942028999328613, "learning_rate": 1.7932102075894907e-05, "loss": 0.7585, "step": 11212 }, { "epoch": 1.8304967144198196, "grad_norm": 2.6749348640441895, "learning_rate": 1.7931711637897355e-05, "loss": 0.7222, "step": 11213 }, { "epoch": 1.8306599730623239, "grad_norm": 2.7567949295043945, "learning_rate": 1.7931321167295526e-05, "loss": 0.7352, "step": 11214 }, { "epoch": 1.8308232317048283, "grad_norm": 2.419182538986206, "learning_rate": 1.7930930664091032e-05, "loss": 0.5675, "step": 11215 }, { "epoch": 1.8309864903473327, "grad_norm": 2.7299985885620117, "learning_rate": 1.7930540128285478e-05, "loss": 0.659, "step": 11216 }, { "epoch": 1.8311497489898372, "grad_norm": 2.711498737335205, "learning_rate": 1.7930149559880464e-05, "loss": 0.6709, "step": 11217 }, { "epoch": 1.8313130076323416, "grad_norm": 2.8639209270477295, "learning_rate": 1.7929758958877604e-05, "loss": 0.645, "step": 11218 }, { "epoch": 1.831476266274846, "grad_norm": 2.967555522918701, "learning_rate": 1.79293683252785e-05, "loss": 0.7891, "step": 11219 }, { "epoch": 1.8316395249173503, "grad_norm": 2.716221809387207, "learning_rate": 1.7928977659084753e-05, "loss": 0.6568, "step": 11220 }, { "epoch": 1.8318027835598547, "grad_norm": 2.5137200355529785, "learning_rate": 1.7928586960297974e-05, "loss": 0.6543, "step": 11221 }, { "epoch": 1.831966042202359, "grad_norm": 2.7193474769592285, "learning_rate": 1.792819622891977e-05, "loss": 0.6768, "step": 11222 }, { "epoch": 1.8321293008448634, "grad_norm": 2.765474319458008, "learning_rate": 1.792780546495174e-05, "loss": 0.7593, "step": 11223 }, { "epoch": 1.8322925594873678, "grad_norm": 2.7326438426971436, "learning_rate": 1.7927414668395503e-05, "loss": 0.7393, "step": 11224 }, { "epoch": 1.8324558181298722, "grad_norm": 2.616318702697754, "learning_rate": 1.7927023839252654e-05, "loss": 0.6804, "step": 11225 }, { "epoch": 1.8326190767723767, "grad_norm": 2.507575750350952, "learning_rate": 1.79266329775248e-05, "loss": 0.6038, "step": 11226 }, { "epoch": 1.8327823354148811, "grad_norm": 2.7317304611206055, "learning_rate": 1.7926242083213553e-05, "loss": 0.6895, "step": 11227 }, { "epoch": 1.8329455940573856, "grad_norm": 2.8298726081848145, "learning_rate": 1.7925851156320516e-05, "loss": 0.6901, "step": 11228 }, { "epoch": 1.8331088526998898, "grad_norm": 2.4809720516204834, "learning_rate": 1.7925460196847303e-05, "loss": 0.782, "step": 11229 }, { "epoch": 1.8332721113423942, "grad_norm": 2.4486947059631348, "learning_rate": 1.792506920479551e-05, "loss": 0.6474, "step": 11230 }, { "epoch": 1.8334353699848984, "grad_norm": 2.4014461040496826, "learning_rate": 1.7924678180166752e-05, "loss": 0.5767, "step": 11231 }, { "epoch": 1.8335986286274029, "grad_norm": 2.4264400005340576, "learning_rate": 1.7924287122962636e-05, "loss": 0.6268, "step": 11232 }, { "epoch": 1.8337618872699073, "grad_norm": 2.4912662506103516, "learning_rate": 1.7923896033184767e-05, "loss": 0.6218, "step": 11233 }, { "epoch": 1.8339251459124117, "grad_norm": 3.0703840255737305, "learning_rate": 1.792350491083475e-05, "loss": 0.754, "step": 11234 }, { "epoch": 1.8340884045549162, "grad_norm": 2.7414722442626953, "learning_rate": 1.79231137559142e-05, "loss": 0.6724, "step": 11235 }, { "epoch": 1.8342516631974206, "grad_norm": 2.603376865386963, "learning_rate": 1.792272256842472e-05, "loss": 0.5977, "step": 11236 }, { "epoch": 1.834414921839925, "grad_norm": 2.9882116317749023, "learning_rate": 1.792233134836792e-05, "loss": 0.7958, "step": 11237 }, { "epoch": 1.8345781804824293, "grad_norm": 2.529189348220825, "learning_rate": 1.7921940095745407e-05, "loss": 0.5862, "step": 11238 }, { "epoch": 1.8347414391249337, "grad_norm": 2.5550172328948975, "learning_rate": 1.7921548810558786e-05, "loss": 0.6357, "step": 11239 }, { "epoch": 1.834904697767438, "grad_norm": 2.8688414096832275, "learning_rate": 1.7921157492809674e-05, "loss": 0.6181, "step": 11240 }, { "epoch": 1.8350679564099424, "grad_norm": 3.4275472164154053, "learning_rate": 1.7920766142499673e-05, "loss": 0.8339, "step": 11241 }, { "epoch": 1.8352312150524468, "grad_norm": 2.498112440109253, "learning_rate": 1.792037475963039e-05, "loss": 0.6927, "step": 11242 }, { "epoch": 1.8353944736949512, "grad_norm": 2.497478723526001, "learning_rate": 1.791998334420344e-05, "loss": 0.6847, "step": 11243 }, { "epoch": 1.8355577323374557, "grad_norm": 2.4279778003692627, "learning_rate": 1.791959189622043e-05, "loss": 0.6029, "step": 11244 }, { "epoch": 1.83572099097996, "grad_norm": 2.593226432800293, "learning_rate": 1.7919200415682967e-05, "loss": 0.5592, "step": 11245 }, { "epoch": 1.8358842496224645, "grad_norm": 2.3662943840026855, "learning_rate": 1.791880890259266e-05, "loss": 0.6075, "step": 11246 }, { "epoch": 1.8360475082649688, "grad_norm": 2.6166391372680664, "learning_rate": 1.791841735695112e-05, "loss": 0.7214, "step": 11247 }, { "epoch": 1.8362107669074732, "grad_norm": 2.8478803634643555, "learning_rate": 1.791802577875996e-05, "loss": 0.6882, "step": 11248 }, { "epoch": 1.8363740255499774, "grad_norm": 2.7171175479888916, "learning_rate": 1.791763416802078e-05, "loss": 0.6771, "step": 11249 }, { "epoch": 1.8365372841924819, "grad_norm": 3.4544906616210938, "learning_rate": 1.79172425247352e-05, "loss": 0.8921, "step": 11250 }, { "epoch": 1.8367005428349863, "grad_norm": 2.828463554382324, "learning_rate": 1.7916850848904823e-05, "loss": 0.69, "step": 11251 }, { "epoch": 1.8368638014774907, "grad_norm": 2.4045674800872803, "learning_rate": 1.791645914053126e-05, "loss": 0.5215, "step": 11252 }, { "epoch": 1.8370270601199952, "grad_norm": 2.3195548057556152, "learning_rate": 1.7916067399616127e-05, "loss": 0.5524, "step": 11253 }, { "epoch": 1.8371903187624996, "grad_norm": 2.709200143814087, "learning_rate": 1.791567562616103e-05, "loss": 0.5906, "step": 11254 }, { "epoch": 1.8373535774050038, "grad_norm": 2.888507843017578, "learning_rate": 1.7915283820167574e-05, "loss": 0.7221, "step": 11255 }, { "epoch": 1.8375168360475083, "grad_norm": 2.4339962005615234, "learning_rate": 1.7914891981637378e-05, "loss": 0.603, "step": 11256 }, { "epoch": 1.8376800946900127, "grad_norm": 2.667064905166626, "learning_rate": 1.7914500110572053e-05, "loss": 0.5666, "step": 11257 }, { "epoch": 1.837843353332517, "grad_norm": 2.842517375946045, "learning_rate": 1.7914108206973206e-05, "loss": 0.5731, "step": 11258 }, { "epoch": 1.8380066119750214, "grad_norm": 3.1501307487487793, "learning_rate": 1.7913716270842442e-05, "loss": 0.821, "step": 11259 }, { "epoch": 1.8381698706175258, "grad_norm": 3.0181961059570312, "learning_rate": 1.791332430218138e-05, "loss": 0.6247, "step": 11260 }, { "epoch": 1.8383331292600302, "grad_norm": 3.007843494415283, "learning_rate": 1.7912932300991633e-05, "loss": 0.7009, "step": 11261 }, { "epoch": 1.8384963879025347, "grad_norm": 2.881265640258789, "learning_rate": 1.791254026727481e-05, "loss": 0.6539, "step": 11262 }, { "epoch": 1.838659646545039, "grad_norm": 2.9894251823425293, "learning_rate": 1.791214820103252e-05, "loss": 0.7215, "step": 11263 }, { "epoch": 1.8388229051875433, "grad_norm": 2.5337789058685303, "learning_rate": 1.7911756102266377e-05, "loss": 0.567, "step": 11264 }, { "epoch": 1.8389861638300478, "grad_norm": 2.625401496887207, "learning_rate": 1.7911363970977992e-05, "loss": 0.5751, "step": 11265 }, { "epoch": 1.839149422472552, "grad_norm": 2.689699411392212, "learning_rate": 1.7910971807168977e-05, "loss": 0.7276, "step": 11266 }, { "epoch": 1.8393126811150564, "grad_norm": 2.815098762512207, "learning_rate": 1.7910579610840944e-05, "loss": 0.5546, "step": 11267 }, { "epoch": 1.8394759397575609, "grad_norm": 2.9860763549804688, "learning_rate": 1.7910187381995506e-05, "loss": 0.6622, "step": 11268 }, { "epoch": 1.8396391984000653, "grad_norm": 3.242849349975586, "learning_rate": 1.7909795120634275e-05, "loss": 0.7126, "step": 11269 }, { "epoch": 1.8398024570425697, "grad_norm": 2.830890417098999, "learning_rate": 1.790940282675886e-05, "loss": 0.6939, "step": 11270 }, { "epoch": 1.8399657156850742, "grad_norm": 3.032491445541382, "learning_rate": 1.7909010500370878e-05, "loss": 0.694, "step": 11271 }, { "epoch": 1.8401289743275786, "grad_norm": 3.4295268058776855, "learning_rate": 1.790861814147194e-05, "loss": 0.8955, "step": 11272 }, { "epoch": 1.8402922329700828, "grad_norm": 2.9041013717651367, "learning_rate": 1.790822575006366e-05, "loss": 0.6983, "step": 11273 }, { "epoch": 1.8404554916125873, "grad_norm": 2.7372970581054688, "learning_rate": 1.790783332614765e-05, "loss": 0.7631, "step": 11274 }, { "epoch": 1.8406187502550915, "grad_norm": 2.521235466003418, "learning_rate": 1.7907440869725522e-05, "loss": 0.5643, "step": 11275 }, { "epoch": 1.840782008897596, "grad_norm": 2.8096585273742676, "learning_rate": 1.790704838079889e-05, "loss": 0.6444, "step": 11276 }, { "epoch": 1.8409452675401003, "grad_norm": 2.4757189750671387, "learning_rate": 1.7906655859369365e-05, "loss": 0.6597, "step": 11277 }, { "epoch": 1.8411085261826048, "grad_norm": 2.581805467605591, "learning_rate": 1.790626330543857e-05, "loss": 0.5588, "step": 11278 }, { "epoch": 1.8412717848251092, "grad_norm": 2.522963762283325, "learning_rate": 1.7905870719008107e-05, "loss": 0.6379, "step": 11279 }, { "epoch": 1.8414350434676137, "grad_norm": 2.4981772899627686, "learning_rate": 1.79054781000796e-05, "loss": 0.6578, "step": 11280 }, { "epoch": 1.841598302110118, "grad_norm": 2.73123836517334, "learning_rate": 1.7905085448654654e-05, "loss": 0.7138, "step": 11281 }, { "epoch": 1.8417615607526223, "grad_norm": 2.5858428478240967, "learning_rate": 1.7904692764734887e-05, "loss": 0.6056, "step": 11282 }, { "epoch": 1.8419248193951268, "grad_norm": 2.743999481201172, "learning_rate": 1.790430004832191e-05, "loss": 0.6489, "step": 11283 }, { "epoch": 1.842088078037631, "grad_norm": 2.225083112716675, "learning_rate": 1.7903907299417343e-05, "loss": 0.5932, "step": 11284 }, { "epoch": 1.8422513366801354, "grad_norm": 2.6709234714508057, "learning_rate": 1.7903514518022796e-05, "loss": 0.7502, "step": 11285 }, { "epoch": 1.8424145953226398, "grad_norm": 2.349945545196533, "learning_rate": 1.7903121704139884e-05, "loss": 0.5638, "step": 11286 }, { "epoch": 1.8425778539651443, "grad_norm": 2.708409547805786, "learning_rate": 1.790272885777023e-05, "loss": 0.7066, "step": 11287 }, { "epoch": 1.8427411126076487, "grad_norm": 2.441549062728882, "learning_rate": 1.7902335978915434e-05, "loss": 0.5851, "step": 11288 }, { "epoch": 1.8429043712501532, "grad_norm": 2.873406410217285, "learning_rate": 1.7901943067577122e-05, "loss": 0.7194, "step": 11289 }, { "epoch": 1.8430676298926576, "grad_norm": 3.156191349029541, "learning_rate": 1.7901550123756906e-05, "loss": 1.5027, "step": 11290 }, { "epoch": 1.8432308885351618, "grad_norm": 2.5085225105285645, "learning_rate": 1.7901157147456398e-05, "loss": 0.5854, "step": 11291 }, { "epoch": 1.8433941471776663, "grad_norm": 2.69199538230896, "learning_rate": 1.790076413867722e-05, "loss": 0.6814, "step": 11292 }, { "epoch": 1.8435574058201705, "grad_norm": 2.5676801204681396, "learning_rate": 1.7900371097420982e-05, "loss": 0.7279, "step": 11293 }, { "epoch": 1.843720664462675, "grad_norm": 2.762195110321045, "learning_rate": 1.78999780236893e-05, "loss": 0.6691, "step": 11294 }, { "epoch": 1.8438839231051793, "grad_norm": 2.9046387672424316, "learning_rate": 1.7899584917483798e-05, "loss": 0.6984, "step": 11295 }, { "epoch": 1.8440471817476838, "grad_norm": 2.658771276473999, "learning_rate": 1.7899191778806078e-05, "loss": 0.6875, "step": 11296 }, { "epoch": 1.8442104403901882, "grad_norm": 2.684835195541382, "learning_rate": 1.7898798607657767e-05, "loss": 0.6295, "step": 11297 }, { "epoch": 1.8443736990326927, "grad_norm": 3.0934743881225586, "learning_rate": 1.7898405404040477e-05, "loss": 0.7516, "step": 11298 }, { "epoch": 1.8445369576751969, "grad_norm": 2.152068614959717, "learning_rate": 1.7898012167955822e-05, "loss": 0.5011, "step": 11299 }, { "epoch": 1.8447002163177013, "grad_norm": 3.3937549591064453, "learning_rate": 1.7897618899405423e-05, "loss": 0.871, "step": 11300 }, { "epoch": 1.8448634749602058, "grad_norm": 2.679180145263672, "learning_rate": 1.78972255983909e-05, "loss": 0.6616, "step": 11301 }, { "epoch": 1.84502673360271, "grad_norm": 3.0221471786499023, "learning_rate": 1.7896832264913856e-05, "loss": 0.8068, "step": 11302 }, { "epoch": 1.8451899922452144, "grad_norm": 2.6296732425689697, "learning_rate": 1.7896438898975922e-05, "loss": 0.7417, "step": 11303 }, { "epoch": 1.8453532508877188, "grad_norm": 2.793846607208252, "learning_rate": 1.7896045500578707e-05, "loss": 0.682, "step": 11304 }, { "epoch": 1.8455165095302233, "grad_norm": 3.161071300506592, "learning_rate": 1.789565206972383e-05, "loss": 0.7557, "step": 11305 }, { "epoch": 1.8456797681727277, "grad_norm": 2.119922637939453, "learning_rate": 1.789525860641291e-05, "loss": 0.5687, "step": 11306 }, { "epoch": 1.8458430268152322, "grad_norm": 3.0385375022888184, "learning_rate": 1.7894865110647563e-05, "loss": 0.7406, "step": 11307 }, { "epoch": 1.8460062854577364, "grad_norm": 2.4759669303894043, "learning_rate": 1.7894471582429406e-05, "loss": 0.6335, "step": 11308 }, { "epoch": 1.8461695441002408, "grad_norm": 2.3854892253875732, "learning_rate": 1.7894078021760055e-05, "loss": 0.6533, "step": 11309 }, { "epoch": 1.8463328027427452, "grad_norm": 3.287144660949707, "learning_rate": 1.7893684428641132e-05, "loss": 0.8591, "step": 11310 }, { "epoch": 1.8464960613852495, "grad_norm": 2.7441632747650146, "learning_rate": 1.789329080307425e-05, "loss": 0.6942, "step": 11311 }, { "epoch": 1.846659320027754, "grad_norm": 2.7810628414154053, "learning_rate": 1.7892897145061038e-05, "loss": 0.6471, "step": 11312 }, { "epoch": 1.8468225786702583, "grad_norm": 2.8986525535583496, "learning_rate": 1.78925034546031e-05, "loss": 0.6719, "step": 11313 }, { "epoch": 1.8469858373127628, "grad_norm": 2.3311972618103027, "learning_rate": 1.789210973170206e-05, "loss": 0.5582, "step": 11314 }, { "epoch": 1.8471490959552672, "grad_norm": 2.5241360664367676, "learning_rate": 1.789171597635954e-05, "loss": 0.6686, "step": 11315 }, { "epoch": 1.8473123545977717, "grad_norm": 2.8473048210144043, "learning_rate": 1.7891322188577157e-05, "loss": 0.7953, "step": 11316 }, { "epoch": 1.8474756132402759, "grad_norm": 2.624704360961914, "learning_rate": 1.7890928368356524e-05, "loss": 0.5929, "step": 11317 }, { "epoch": 1.8476388718827803, "grad_norm": 2.7115676403045654, "learning_rate": 1.7890534515699266e-05, "loss": 0.6164, "step": 11318 }, { "epoch": 1.8478021305252845, "grad_norm": 2.8120620250701904, "learning_rate": 1.7890140630607e-05, "loss": 0.7181, "step": 11319 }, { "epoch": 1.847965389167789, "grad_norm": 2.4739816188812256, "learning_rate": 1.7889746713081343e-05, "loss": 0.6153, "step": 11320 }, { "epoch": 1.8481286478102934, "grad_norm": 2.5452218055725098, "learning_rate": 1.7889352763123916e-05, "loss": 0.5816, "step": 11321 }, { "epoch": 1.8482919064527978, "grad_norm": 2.597444772720337, "learning_rate": 1.7888958780736342e-05, "loss": 0.704, "step": 11322 }, { "epoch": 1.8484551650953023, "grad_norm": 2.9759926795959473, "learning_rate": 1.7888564765920237e-05, "loss": 0.8068, "step": 11323 }, { "epoch": 1.8486184237378067, "grad_norm": 2.8671700954437256, "learning_rate": 1.788817071867722e-05, "loss": 0.6898, "step": 11324 }, { "epoch": 1.8487816823803112, "grad_norm": 2.3730173110961914, "learning_rate": 1.7887776639008912e-05, "loss": 0.699, "step": 11325 }, { "epoch": 1.8489449410228154, "grad_norm": 2.6492629051208496, "learning_rate": 1.788738252691693e-05, "loss": 0.6793, "step": 11326 }, { "epoch": 1.8491081996653198, "grad_norm": 2.510725498199463, "learning_rate": 1.78869883824029e-05, "loss": 0.7255, "step": 11327 }, { "epoch": 1.849271458307824, "grad_norm": 2.4732677936553955, "learning_rate": 1.7886594205468437e-05, "loss": 0.6682, "step": 11328 }, { "epoch": 1.8494347169503285, "grad_norm": 2.3242175579071045, "learning_rate": 1.7886199996115166e-05, "loss": 0.5558, "step": 11329 }, { "epoch": 1.849597975592833, "grad_norm": 2.378079652786255, "learning_rate": 1.78858057543447e-05, "loss": 0.6108, "step": 11330 }, { "epoch": 1.8497612342353373, "grad_norm": 2.42267107963562, "learning_rate": 1.7885411480158668e-05, "loss": 0.666, "step": 11331 }, { "epoch": 1.8499244928778418, "grad_norm": 3.0736355781555176, "learning_rate": 1.7885017173558686e-05, "loss": 0.7848, "step": 11332 }, { "epoch": 1.8500877515203462, "grad_norm": 2.9690115451812744, "learning_rate": 1.7884622834546376e-05, "loss": 0.7529, "step": 11333 }, { "epoch": 1.8502510101628507, "grad_norm": 2.430668830871582, "learning_rate": 1.7884228463123355e-05, "loss": 0.6014, "step": 11334 }, { "epoch": 1.8504142688053549, "grad_norm": 2.9935717582702637, "learning_rate": 1.7883834059291252e-05, "loss": 0.5835, "step": 11335 }, { "epoch": 1.8505775274478593, "grad_norm": 2.0164811611175537, "learning_rate": 1.7883439623051685e-05, "loss": 0.4499, "step": 11336 }, { "epoch": 1.8507407860903635, "grad_norm": 2.296889066696167, "learning_rate": 1.7883045154406274e-05, "loss": 0.5775, "step": 11337 }, { "epoch": 1.850904044732868, "grad_norm": 2.9771065711975098, "learning_rate": 1.788265065335664e-05, "loss": 0.6884, "step": 11338 }, { "epoch": 1.8510673033753724, "grad_norm": 2.676314115524292, "learning_rate": 1.78822561199044e-05, "loss": 0.6667, "step": 11339 }, { "epoch": 1.8512305620178768, "grad_norm": 2.206437826156616, "learning_rate": 1.788186155405119e-05, "loss": 0.4986, "step": 11340 }, { "epoch": 1.8513938206603813, "grad_norm": 2.2815282344818115, "learning_rate": 1.7881466955798617e-05, "loss": 0.6224, "step": 11341 }, { "epoch": 1.8515570793028857, "grad_norm": 2.720066785812378, "learning_rate": 1.7881072325148312e-05, "loss": 0.7739, "step": 11342 }, { "epoch": 1.8517203379453901, "grad_norm": 2.344831943511963, "learning_rate": 1.7880677662101894e-05, "loss": 0.55, "step": 11343 }, { "epoch": 1.8518835965878944, "grad_norm": 2.484586000442505, "learning_rate": 1.7880282966660986e-05, "loss": 0.6412, "step": 11344 }, { "epoch": 1.8520468552303988, "grad_norm": 2.857329845428467, "learning_rate": 1.7879888238827213e-05, "loss": 0.7382, "step": 11345 }, { "epoch": 1.852210113872903, "grad_norm": 2.246513605117798, "learning_rate": 1.7879493478602192e-05, "loss": 0.5597, "step": 11346 }, { "epoch": 1.8523733725154075, "grad_norm": 2.5801351070404053, "learning_rate": 1.787909868598755e-05, "loss": 0.5837, "step": 11347 }, { "epoch": 1.852536631157912, "grad_norm": 2.4867970943450928, "learning_rate": 1.7878703860984905e-05, "loss": 0.5752, "step": 11348 }, { "epoch": 1.8526998898004163, "grad_norm": 2.3817007541656494, "learning_rate": 1.7878309003595888e-05, "loss": 0.5825, "step": 11349 }, { "epoch": 1.8528631484429208, "grad_norm": 2.887450695037842, "learning_rate": 1.7877914113822114e-05, "loss": 0.7741, "step": 11350 }, { "epoch": 1.8530264070854252, "grad_norm": 2.8263492584228516, "learning_rate": 1.787751919166521e-05, "loss": 0.7635, "step": 11351 }, { "epoch": 1.8531896657279294, "grad_norm": 2.7444214820861816, "learning_rate": 1.78771242371268e-05, "loss": 0.6706, "step": 11352 }, { "epoch": 1.8533529243704339, "grad_norm": 2.474038600921631, "learning_rate": 1.7876729250208505e-05, "loss": 0.5482, "step": 11353 }, { "epoch": 1.8535161830129383, "grad_norm": 2.7581818103790283, "learning_rate": 1.7876334230911952e-05, "loss": 0.6518, "step": 11354 }, { "epoch": 1.8536794416554425, "grad_norm": 2.406073570251465, "learning_rate": 1.7875939179238764e-05, "loss": 0.5895, "step": 11355 }, { "epoch": 1.853842700297947, "grad_norm": 2.612971782684326, "learning_rate": 1.787554409519056e-05, "loss": 0.5751, "step": 11356 }, { "epoch": 1.8540059589404514, "grad_norm": 2.527407169342041, "learning_rate": 1.7875148978768972e-05, "loss": 0.6398, "step": 11357 }, { "epoch": 1.8541692175829558, "grad_norm": 2.9929819107055664, "learning_rate": 1.7874753829975623e-05, "loss": 0.8014, "step": 11358 }, { "epoch": 1.8543324762254603, "grad_norm": 2.590022087097168, "learning_rate": 1.7874358648812128e-05, "loss": 0.619, "step": 11359 }, { "epoch": 1.8544957348679647, "grad_norm": 2.7657411098480225, "learning_rate": 1.7873963435280122e-05, "loss": 0.5693, "step": 11360 }, { "epoch": 1.854658993510469, "grad_norm": 2.789696216583252, "learning_rate": 1.7873568189381222e-05, "loss": 0.7359, "step": 11361 }, { "epoch": 1.8548222521529734, "grad_norm": 2.715191125869751, "learning_rate": 1.7873172911117058e-05, "loss": 0.6179, "step": 11362 }, { "epoch": 1.8549855107954776, "grad_norm": 2.785820722579956, "learning_rate": 1.7872777600489252e-05, "loss": 0.6665, "step": 11363 }, { "epoch": 1.855148769437982, "grad_norm": 3.0090725421905518, "learning_rate": 1.787238225749943e-05, "loss": 0.7589, "step": 11364 }, { "epoch": 1.8553120280804865, "grad_norm": 3.0763659477233887, "learning_rate": 1.787198688214922e-05, "loss": 0.6881, "step": 11365 }, { "epoch": 1.855475286722991, "grad_norm": 3.6992433071136475, "learning_rate": 1.7871591474440242e-05, "loss": 0.8274, "step": 11366 }, { "epoch": 1.8556385453654953, "grad_norm": 2.72353196144104, "learning_rate": 1.7871196034374127e-05, "loss": 0.7254, "step": 11367 }, { "epoch": 1.8558018040079998, "grad_norm": 2.6052634716033936, "learning_rate": 1.7870800561952494e-05, "loss": 0.6539, "step": 11368 }, { "epoch": 1.8559650626505042, "grad_norm": 2.644888162612915, "learning_rate": 1.7870405057176968e-05, "loss": 0.7244, "step": 11369 }, { "epoch": 1.8561283212930084, "grad_norm": 2.589536190032959, "learning_rate": 1.7870009520049184e-05, "loss": 0.7413, "step": 11370 }, { "epoch": 1.8562915799355129, "grad_norm": 2.4000296592712402, "learning_rate": 1.7869613950570763e-05, "loss": 0.603, "step": 11371 }, { "epoch": 1.856454838578017, "grad_norm": 2.791964530944824, "learning_rate": 1.786921834874333e-05, "loss": 0.686, "step": 11372 }, { "epoch": 1.8566180972205215, "grad_norm": 2.681129217147827, "learning_rate": 1.786882271456851e-05, "loss": 0.7723, "step": 11373 }, { "epoch": 1.856781355863026, "grad_norm": 2.313169479370117, "learning_rate": 1.786842704804793e-05, "loss": 0.6289, "step": 11374 }, { "epoch": 1.8569446145055304, "grad_norm": 2.3441975116729736, "learning_rate": 1.786803134918322e-05, "loss": 0.6603, "step": 11375 }, { "epoch": 1.8571078731480348, "grad_norm": 2.597036361694336, "learning_rate": 1.7867635617976e-05, "loss": 0.7352, "step": 11376 }, { "epoch": 1.8572711317905393, "grad_norm": 2.417897939682007, "learning_rate": 1.7867239854427908e-05, "loss": 0.5841, "step": 11377 }, { "epoch": 1.8574343904330437, "grad_norm": 2.5868282318115234, "learning_rate": 1.786684405854056e-05, "loss": 0.6345, "step": 11378 }, { "epoch": 1.857597649075548, "grad_norm": 2.451349973678589, "learning_rate": 1.7866448230315584e-05, "loss": 0.5604, "step": 11379 }, { "epoch": 1.8577609077180524, "grad_norm": 2.9232239723205566, "learning_rate": 1.7866052369754615e-05, "loss": 0.7373, "step": 11380 }, { "epoch": 1.8579241663605566, "grad_norm": 2.300185441970825, "learning_rate": 1.786565647685927e-05, "loss": 0.5824, "step": 11381 }, { "epoch": 1.858087425003061, "grad_norm": 3.4589731693267822, "learning_rate": 1.7865260551631183e-05, "loss": 0.8634, "step": 11382 }, { "epoch": 1.8582506836455654, "grad_norm": 2.9087021350860596, "learning_rate": 1.786486459407198e-05, "loss": 0.7384, "step": 11383 }, { "epoch": 1.8584139422880699, "grad_norm": 2.8296284675598145, "learning_rate": 1.786446860418329e-05, "loss": 0.7148, "step": 11384 }, { "epoch": 1.8585772009305743, "grad_norm": 2.5877208709716797, "learning_rate": 1.7864072581966737e-05, "loss": 0.6719, "step": 11385 }, { "epoch": 1.8587404595730788, "grad_norm": 2.6356656551361084, "learning_rate": 1.7863676527423947e-05, "loss": 0.6465, "step": 11386 }, { "epoch": 1.8589037182155832, "grad_norm": 2.0398595333099365, "learning_rate": 1.7863280440556558e-05, "loss": 0.544, "step": 11387 }, { "epoch": 1.8590669768580874, "grad_norm": 2.7859270572662354, "learning_rate": 1.786288432136619e-05, "loss": 0.7743, "step": 11388 }, { "epoch": 1.8592302355005919, "grad_norm": 2.812268018722534, "learning_rate": 1.7862488169854474e-05, "loss": 0.6683, "step": 11389 }, { "epoch": 1.859393494143096, "grad_norm": 3.01802134513855, "learning_rate": 1.786209198602304e-05, "loss": 0.8014, "step": 11390 }, { "epoch": 1.8595567527856005, "grad_norm": 2.6847310066223145, "learning_rate": 1.786169576987351e-05, "loss": 0.6634, "step": 11391 }, { "epoch": 1.859720011428105, "grad_norm": 2.58626389503479, "learning_rate": 1.786129952140752e-05, "loss": 0.726, "step": 11392 }, { "epoch": 1.8598832700706094, "grad_norm": 2.8840982913970947, "learning_rate": 1.7860903240626697e-05, "loss": 0.6945, "step": 11393 }, { "epoch": 1.8600465287131138, "grad_norm": 2.536571979522705, "learning_rate": 1.7860506927532667e-05, "loss": 0.5909, "step": 11394 }, { "epoch": 1.8602097873556183, "grad_norm": 3.266772508621216, "learning_rate": 1.786011058212706e-05, "loss": 0.8379, "step": 11395 }, { "epoch": 1.8603730459981225, "grad_norm": 2.6766393184661865, "learning_rate": 1.785971420441151e-05, "loss": 0.661, "step": 11396 }, { "epoch": 1.860536304640627, "grad_norm": 1.8433122634887695, "learning_rate": 1.785931779438764e-05, "loss": 0.4477, "step": 11397 }, { "epoch": 1.8606995632831314, "grad_norm": 2.4199061393737793, "learning_rate": 1.7858921352057085e-05, "loss": 0.5795, "step": 11398 }, { "epoch": 1.8608628219256356, "grad_norm": 2.849760055541992, "learning_rate": 1.785852487742147e-05, "loss": 0.7278, "step": 11399 }, { "epoch": 1.86102608056814, "grad_norm": 2.3426015377044678, "learning_rate": 1.7858128370482427e-05, "loss": 0.5915, "step": 11400 }, { "epoch": 1.8611893392106444, "grad_norm": 2.5450422763824463, "learning_rate": 1.785773183124158e-05, "loss": 0.5794, "step": 11401 }, { "epoch": 1.8613525978531489, "grad_norm": 3.439868450164795, "learning_rate": 1.7857335259700574e-05, "loss": 0.9561, "step": 11402 }, { "epoch": 1.8615158564956533, "grad_norm": 2.644044876098633, "learning_rate": 1.7856938655861022e-05, "loss": 0.624, "step": 11403 }, { "epoch": 1.8616791151381578, "grad_norm": 2.4283030033111572, "learning_rate": 1.7856542019724567e-05, "loss": 0.6565, "step": 11404 }, { "epoch": 1.861842373780662, "grad_norm": 2.956352710723877, "learning_rate": 1.785614535129283e-05, "loss": 0.7012, "step": 11405 }, { "epoch": 1.8620056324231664, "grad_norm": 2.7111239433288574, "learning_rate": 1.785574865056745e-05, "loss": 0.6165, "step": 11406 }, { "epoch": 1.8621688910656706, "grad_norm": 2.706774950027466, "learning_rate": 1.785535191755005e-05, "loss": 0.5863, "step": 11407 }, { "epoch": 1.862332149708175, "grad_norm": 2.6820054054260254, "learning_rate": 1.7854955152242268e-05, "loss": 0.7429, "step": 11408 }, { "epoch": 1.8624954083506795, "grad_norm": 3.017245054244995, "learning_rate": 1.785455835464573e-05, "loss": 0.7117, "step": 11409 }, { "epoch": 1.862658666993184, "grad_norm": 2.8066179752349854, "learning_rate": 1.7854161524762067e-05, "loss": 0.7322, "step": 11410 }, { "epoch": 1.8628219256356884, "grad_norm": 2.380951166152954, "learning_rate": 1.7853764662592912e-05, "loss": 0.5474, "step": 11411 }, { "epoch": 1.8629851842781928, "grad_norm": 2.785851001739502, "learning_rate": 1.7853367768139896e-05, "loss": 0.7308, "step": 11412 }, { "epoch": 1.8631484429206973, "grad_norm": 2.3198015689849854, "learning_rate": 1.7852970841404652e-05, "loss": 0.5359, "step": 11413 }, { "epoch": 1.8633117015632015, "grad_norm": 2.4059760570526123, "learning_rate": 1.785257388238881e-05, "loss": 0.5522, "step": 11414 }, { "epoch": 1.863474960205706, "grad_norm": 2.3154749870300293, "learning_rate": 1.7852176891094e-05, "loss": 0.5569, "step": 11415 }, { "epoch": 1.8636382188482101, "grad_norm": 2.9894495010375977, "learning_rate": 1.7851779867521858e-05, "loss": 0.5468, "step": 11416 }, { "epoch": 1.8638014774907146, "grad_norm": 2.6624505519866943, "learning_rate": 1.7851382811674008e-05, "loss": 0.5695, "step": 11417 }, { "epoch": 1.863964736133219, "grad_norm": 2.5507187843322754, "learning_rate": 1.7850985723552096e-05, "loss": 0.6586, "step": 11418 }, { "epoch": 1.8641279947757234, "grad_norm": 2.295806646347046, "learning_rate": 1.785058860315774e-05, "loss": 0.5808, "step": 11419 }, { "epoch": 1.8642912534182279, "grad_norm": 2.6378729343414307, "learning_rate": 1.7850191450492582e-05, "loss": 0.7176, "step": 11420 }, { "epoch": 1.8644545120607323, "grad_norm": 2.3582983016967773, "learning_rate": 1.784979426555825e-05, "loss": 0.6502, "step": 11421 }, { "epoch": 1.8646177707032368, "grad_norm": 2.9376578330993652, "learning_rate": 1.784939704835638e-05, "loss": 0.8012, "step": 11422 }, { "epoch": 1.864781029345741, "grad_norm": 2.6764376163482666, "learning_rate": 1.7848999798888596e-05, "loss": 0.6341, "step": 11423 }, { "epoch": 1.8649442879882454, "grad_norm": 2.608736276626587, "learning_rate": 1.7848602517156545e-05, "loss": 0.6626, "step": 11424 }, { "epoch": 1.8651075466307496, "grad_norm": 2.630279779434204, "learning_rate": 1.7848205203161852e-05, "loss": 0.6659, "step": 11425 }, { "epoch": 1.865270805273254, "grad_norm": 2.8400566577911377, "learning_rate": 1.7847807856906144e-05, "loss": 0.6549, "step": 11426 }, { "epoch": 1.8654340639157585, "grad_norm": 2.511655807495117, "learning_rate": 1.7847410478391068e-05, "loss": 0.5919, "step": 11427 }, { "epoch": 1.865597322558263, "grad_norm": 2.7856383323669434, "learning_rate": 1.784701306761825e-05, "loss": 0.6627, "step": 11428 }, { "epoch": 1.8657605812007674, "grad_norm": 2.1750762462615967, "learning_rate": 1.7846615624589323e-05, "loss": 0.5755, "step": 11429 }, { "epoch": 1.8659238398432718, "grad_norm": 3.042781114578247, "learning_rate": 1.784621814930592e-05, "loss": 0.8236, "step": 11430 }, { "epoch": 1.8660870984857763, "grad_norm": 3.1390087604522705, "learning_rate": 1.784582064176968e-05, "loss": 0.6898, "step": 11431 }, { "epoch": 1.8662503571282805, "grad_norm": 3.1758971214294434, "learning_rate": 1.7845423101982235e-05, "loss": 0.7272, "step": 11432 }, { "epoch": 1.866413615770785, "grad_norm": 2.517773389816284, "learning_rate": 1.7845025529945213e-05, "loss": 0.6782, "step": 11433 }, { "epoch": 1.8665768744132891, "grad_norm": 2.7432703971862793, "learning_rate": 1.7844627925660253e-05, "loss": 0.7581, "step": 11434 }, { "epoch": 1.8667401330557936, "grad_norm": 2.6466169357299805, "learning_rate": 1.7844230289128997e-05, "loss": 0.7812, "step": 11435 }, { "epoch": 1.866903391698298, "grad_norm": 2.3000636100769043, "learning_rate": 1.7843832620353064e-05, "loss": 0.5529, "step": 11436 }, { "epoch": 1.8670666503408024, "grad_norm": 2.8353750705718994, "learning_rate": 1.7843434919334103e-05, "loss": 0.6908, "step": 11437 }, { "epoch": 1.8672299089833069, "grad_norm": 2.72548246383667, "learning_rate": 1.784303718607374e-05, "loss": 0.77, "step": 11438 }, { "epoch": 1.8673931676258113, "grad_norm": 2.563445568084717, "learning_rate": 1.7842639420573614e-05, "loss": 0.6226, "step": 11439 }, { "epoch": 1.8675564262683155, "grad_norm": 3.1167163848876953, "learning_rate": 1.7842241622835356e-05, "loss": 0.6686, "step": 11440 }, { "epoch": 1.86771968491082, "grad_norm": 2.8028836250305176, "learning_rate": 1.7841843792860606e-05, "loss": 0.6709, "step": 11441 }, { "epoch": 1.8678829435533244, "grad_norm": 2.556903123855591, "learning_rate": 1.7841445930650994e-05, "loss": 0.6072, "step": 11442 }, { "epoch": 1.8680462021958286, "grad_norm": 3.1697874069213867, "learning_rate": 1.7841048036208164e-05, "loss": 0.791, "step": 11443 }, { "epoch": 1.868209460838333, "grad_norm": 3.0588362216949463, "learning_rate": 1.7840650109533744e-05, "loss": 0.7167, "step": 11444 }, { "epoch": 1.8683727194808375, "grad_norm": 2.3605899810791016, "learning_rate": 1.784025215062937e-05, "loss": 0.5309, "step": 11445 }, { "epoch": 1.868535978123342, "grad_norm": 2.789999008178711, "learning_rate": 1.7839854159496683e-05, "loss": 0.5942, "step": 11446 }, { "epoch": 1.8686992367658464, "grad_norm": 2.7740142345428467, "learning_rate": 1.783945613613731e-05, "loss": 0.5752, "step": 11447 }, { "epoch": 1.8688624954083508, "grad_norm": 2.9298226833343506, "learning_rate": 1.78390580805529e-05, "loss": 0.6526, "step": 11448 }, { "epoch": 1.869025754050855, "grad_norm": 2.6803786754608154, "learning_rate": 1.7838659992745078e-05, "loss": 0.6131, "step": 11449 }, { "epoch": 1.8691890126933595, "grad_norm": 2.8849282264709473, "learning_rate": 1.7838261872715486e-05, "loss": 0.7048, "step": 11450 }, { "epoch": 1.8693522713358637, "grad_norm": 2.6164915561676025, "learning_rate": 1.783786372046576e-05, "loss": 0.6262, "step": 11451 }, { "epoch": 1.8695155299783681, "grad_norm": 3.3124446868896484, "learning_rate": 1.7837465535997537e-05, "loss": 0.8947, "step": 11452 }, { "epoch": 1.8696787886208726, "grad_norm": 2.5786218643188477, "learning_rate": 1.7837067319312446e-05, "loss": 0.6093, "step": 11453 }, { "epoch": 1.869842047263377, "grad_norm": 2.8552818298339844, "learning_rate": 1.7836669070412135e-05, "loss": 0.6797, "step": 11454 }, { "epoch": 1.8700053059058814, "grad_norm": 2.835348606109619, "learning_rate": 1.7836270789298238e-05, "loss": 0.6993, "step": 11455 }, { "epoch": 1.8701685645483859, "grad_norm": 2.446035146713257, "learning_rate": 1.783587247597239e-05, "loss": 0.5782, "step": 11456 }, { "epoch": 1.8703318231908903, "grad_norm": 3.3701207637786865, "learning_rate": 1.7835474130436228e-05, "loss": 0.9303, "step": 11457 }, { "epoch": 1.8704950818333945, "grad_norm": 2.794038772583008, "learning_rate": 1.783507575269139e-05, "loss": 0.6352, "step": 11458 }, { "epoch": 1.870658340475899, "grad_norm": 2.6083123683929443, "learning_rate": 1.7834677342739516e-05, "loss": 0.5749, "step": 11459 }, { "epoch": 1.8708215991184032, "grad_norm": 3.014730930328369, "learning_rate": 1.7834278900582238e-05, "loss": 0.7217, "step": 11460 }, { "epoch": 1.8709848577609076, "grad_norm": 2.535703420639038, "learning_rate": 1.7833880426221203e-05, "loss": 0.6721, "step": 11461 }, { "epoch": 1.871148116403412, "grad_norm": 2.7189977169036865, "learning_rate": 1.783348191965804e-05, "loss": 0.6893, "step": 11462 }, { "epoch": 1.8713113750459165, "grad_norm": 2.772012948989868, "learning_rate": 1.783308338089439e-05, "loss": 0.6747, "step": 11463 }, { "epoch": 1.871474633688421, "grad_norm": 2.890803098678589, "learning_rate": 1.7832684809931894e-05, "loss": 0.6904, "step": 11464 }, { "epoch": 1.8716378923309254, "grad_norm": 2.6471426486968994, "learning_rate": 1.7832286206772188e-05, "loss": 0.7405, "step": 11465 }, { "epoch": 1.8718011509734298, "grad_norm": 2.3830273151397705, "learning_rate": 1.783188757141691e-05, "loss": 0.4927, "step": 11466 }, { "epoch": 1.871964409615934, "grad_norm": 3.0553946495056152, "learning_rate": 1.78314889038677e-05, "loss": 0.8476, "step": 11467 }, { "epoch": 1.8721276682584385, "grad_norm": 2.8927574157714844, "learning_rate": 1.7831090204126194e-05, "loss": 0.7077, "step": 11468 }, { "epoch": 1.8722909269009427, "grad_norm": 2.9299488067626953, "learning_rate": 1.7830691472194038e-05, "loss": 0.6994, "step": 11469 }, { "epoch": 1.8724541855434471, "grad_norm": 3.1664438247680664, "learning_rate": 1.783029270807286e-05, "loss": 0.6814, "step": 11470 }, { "epoch": 1.8726174441859516, "grad_norm": 2.728924036026001, "learning_rate": 1.782989391176431e-05, "loss": 0.6489, "step": 11471 }, { "epoch": 1.872780702828456, "grad_norm": 2.8184926509857178, "learning_rate": 1.7829495083270022e-05, "loss": 0.7577, "step": 11472 }, { "epoch": 1.8729439614709604, "grad_norm": 2.8053359985351562, "learning_rate": 1.7829096222591637e-05, "loss": 0.7154, "step": 11473 }, { "epoch": 1.8731072201134649, "grad_norm": 2.777589797973633, "learning_rate": 1.782869732973079e-05, "loss": 0.6621, "step": 11474 }, { "epoch": 1.8732704787559693, "grad_norm": 3.0745115280151367, "learning_rate": 1.7828298404689128e-05, "loss": 0.7024, "step": 11475 }, { "epoch": 1.8734337373984735, "grad_norm": 3.0754199028015137, "learning_rate": 1.7827899447468285e-05, "loss": 0.7588, "step": 11476 }, { "epoch": 1.873596996040978, "grad_norm": 2.4115517139434814, "learning_rate": 1.7827500458069904e-05, "loss": 0.6025, "step": 11477 }, { "epoch": 1.8737602546834822, "grad_norm": 2.7637102603912354, "learning_rate": 1.7827101436495624e-05, "loss": 0.8233, "step": 11478 }, { "epoch": 1.8739235133259866, "grad_norm": 2.8087832927703857, "learning_rate": 1.7826702382747086e-05, "loss": 0.6361, "step": 11479 }, { "epoch": 1.874086771968491, "grad_norm": 2.5669455528259277, "learning_rate": 1.7826303296825926e-05, "loss": 0.681, "step": 11480 }, { "epoch": 1.8742500306109955, "grad_norm": 2.63183331489563, "learning_rate": 1.7825904178733794e-05, "loss": 0.6952, "step": 11481 }, { "epoch": 1.8744132892535, "grad_norm": 2.62042236328125, "learning_rate": 1.7825505028472322e-05, "loss": 0.7272, "step": 11482 }, { "epoch": 1.8745765478960044, "grad_norm": 2.0328259468078613, "learning_rate": 1.7825105846043156e-05, "loss": 0.5002, "step": 11483 }, { "epoch": 1.8747398065385086, "grad_norm": 2.747002601623535, "learning_rate": 1.782470663144793e-05, "loss": 0.7108, "step": 11484 }, { "epoch": 1.874903065181013, "grad_norm": 2.2663955688476562, "learning_rate": 1.7824307384688294e-05, "loss": 0.5369, "step": 11485 }, { "epoch": 1.8750663238235175, "grad_norm": 2.648134708404541, "learning_rate": 1.7823908105765883e-05, "loss": 0.668, "step": 11486 }, { "epoch": 1.8752295824660217, "grad_norm": 2.925515651702881, "learning_rate": 1.7823508794682342e-05, "loss": 0.6303, "step": 11487 }, { "epoch": 1.875392841108526, "grad_norm": 2.6408092975616455, "learning_rate": 1.7823109451439308e-05, "loss": 0.7106, "step": 11488 }, { "epoch": 1.8755560997510305, "grad_norm": 2.652420997619629, "learning_rate": 1.7822710076038424e-05, "loss": 0.7353, "step": 11489 }, { "epoch": 1.875719358393535, "grad_norm": 2.9481935501098633, "learning_rate": 1.7822310668481334e-05, "loss": 0.69, "step": 11490 }, { "epoch": 1.8758826170360394, "grad_norm": 2.528242349624634, "learning_rate": 1.782191122876968e-05, "loss": 0.6495, "step": 11491 }, { "epoch": 1.8760458756785439, "grad_norm": 2.878757953643799, "learning_rate": 1.78215117569051e-05, "loss": 0.7509, "step": 11492 }, { "epoch": 1.876209134321048, "grad_norm": 2.8085899353027344, "learning_rate": 1.7821112252889243e-05, "loss": 0.751, "step": 11493 }, { "epoch": 1.8763723929635525, "grad_norm": 2.8539741039276123, "learning_rate": 1.782071271672374e-05, "loss": 0.7353, "step": 11494 }, { "epoch": 1.8765356516060567, "grad_norm": 2.3704447746276855, "learning_rate": 1.7820313148410244e-05, "loss": 0.5807, "step": 11495 }, { "epoch": 1.8766989102485612, "grad_norm": 2.494720697402954, "learning_rate": 1.7819913547950395e-05, "loss": 0.6162, "step": 11496 }, { "epoch": 1.8768621688910656, "grad_norm": 2.855987071990967, "learning_rate": 1.781951391534583e-05, "loss": 0.6624, "step": 11497 }, { "epoch": 1.87702542753357, "grad_norm": 2.227299690246582, "learning_rate": 1.78191142505982e-05, "loss": 0.6538, "step": 11498 }, { "epoch": 1.8771886861760745, "grad_norm": 2.530287265777588, "learning_rate": 1.7818714553709142e-05, "loss": 0.6966, "step": 11499 }, { "epoch": 1.877351944818579, "grad_norm": 2.481170892715454, "learning_rate": 1.78183148246803e-05, "loss": 0.7436, "step": 11500 }, { "epoch": 1.8775152034610834, "grad_norm": 2.7096967697143555, "learning_rate": 1.7817915063513317e-05, "loss": 0.7677, "step": 11501 }, { "epoch": 1.8776784621035876, "grad_norm": 2.3829872608184814, "learning_rate": 1.781751527020984e-05, "loss": 0.6066, "step": 11502 }, { "epoch": 1.877841720746092, "grad_norm": 2.572340250015259, "learning_rate": 1.7817115444771505e-05, "loss": 0.6047, "step": 11503 }, { "epoch": 1.8780049793885962, "grad_norm": 2.113408327102661, "learning_rate": 1.7816715587199966e-05, "loss": 0.5426, "step": 11504 }, { "epoch": 1.8781682380311007, "grad_norm": 2.7903528213500977, "learning_rate": 1.7816315697496858e-05, "loss": 0.6634, "step": 11505 }, { "epoch": 1.878331496673605, "grad_norm": 1.8433257341384888, "learning_rate": 1.781591577566383e-05, "loss": 0.4438, "step": 11506 }, { "epoch": 1.8784947553161095, "grad_norm": 2.772733449935913, "learning_rate": 1.7815515821702522e-05, "loss": 0.7819, "step": 11507 }, { "epoch": 1.878658013958614, "grad_norm": 2.687434196472168, "learning_rate": 1.7815115835614576e-05, "loss": 0.706, "step": 11508 }, { "epoch": 1.8788212726011184, "grad_norm": 2.483288049697876, "learning_rate": 1.7814715817401643e-05, "loss": 0.5864, "step": 11509 }, { "epoch": 1.8789845312436229, "grad_norm": 2.7732608318328857, "learning_rate": 1.7814315767065364e-05, "loss": 0.6472, "step": 11510 }, { "epoch": 1.879147789886127, "grad_norm": 2.7975270748138428, "learning_rate": 1.7813915684607383e-05, "loss": 0.6665, "step": 11511 }, { "epoch": 1.8793110485286315, "grad_norm": 2.5693325996398926, "learning_rate": 1.7813515570029344e-05, "loss": 0.6702, "step": 11512 }, { "epoch": 1.8794743071711357, "grad_norm": 2.8784844875335693, "learning_rate": 1.7813115423332896e-05, "loss": 0.6827, "step": 11513 }, { "epoch": 1.8796375658136402, "grad_norm": 2.6823666095733643, "learning_rate": 1.7812715244519676e-05, "loss": 0.6573, "step": 11514 }, { "epoch": 1.8798008244561446, "grad_norm": 2.631204128265381, "learning_rate": 1.7812315033591338e-05, "loss": 0.7089, "step": 11515 }, { "epoch": 1.879964083098649, "grad_norm": 2.5026803016662598, "learning_rate": 1.781191479054952e-05, "loss": 0.5644, "step": 11516 }, { "epoch": 1.8801273417411535, "grad_norm": 2.431051015853882, "learning_rate": 1.781151451539587e-05, "loss": 0.5983, "step": 11517 }, { "epoch": 1.880290600383658, "grad_norm": 3.1446609497070312, "learning_rate": 1.7811114208132036e-05, "loss": 0.8234, "step": 11518 }, { "epoch": 1.8804538590261624, "grad_norm": 2.4583027362823486, "learning_rate": 1.7810713868759662e-05, "loss": 0.6911, "step": 11519 }, { "epoch": 1.8806171176686666, "grad_norm": 2.6697239875793457, "learning_rate": 1.781031349728039e-05, "loss": 0.6804, "step": 11520 }, { "epoch": 1.880780376311171, "grad_norm": 2.689476251602173, "learning_rate": 1.7809913093695867e-05, "loss": 0.6722, "step": 11521 }, { "epoch": 1.8809436349536752, "grad_norm": 2.482783079147339, "learning_rate": 1.7809512658007742e-05, "loss": 0.6377, "step": 11522 }, { "epoch": 1.8811068935961797, "grad_norm": 3.1306967735290527, "learning_rate": 1.780911219021766e-05, "loss": 0.8698, "step": 11523 }, { "epoch": 1.881270152238684, "grad_norm": 2.553988456726074, "learning_rate": 1.7808711690327266e-05, "loss": 0.6942, "step": 11524 }, { "epoch": 1.8814334108811885, "grad_norm": 2.875913381576538, "learning_rate": 1.7808311158338204e-05, "loss": 0.678, "step": 11525 }, { "epoch": 1.881596669523693, "grad_norm": 2.9514200687408447, "learning_rate": 1.7807910594252126e-05, "loss": 0.7854, "step": 11526 }, { "epoch": 1.8817599281661974, "grad_norm": 2.553740978240967, "learning_rate": 1.7807509998070675e-05, "loss": 0.6949, "step": 11527 }, { "epoch": 1.8819231868087016, "grad_norm": 3.2070977687835693, "learning_rate": 1.7807109369795495e-05, "loss": 0.7745, "step": 11528 }, { "epoch": 1.882086445451206, "grad_norm": 2.915862798690796, "learning_rate": 1.7806708709428246e-05, "loss": 0.7306, "step": 11529 }, { "epoch": 1.8822497040937105, "grad_norm": 2.3883726596832275, "learning_rate": 1.7806308016970557e-05, "loss": 0.7062, "step": 11530 }, { "epoch": 1.8824129627362147, "grad_norm": 2.4111571311950684, "learning_rate": 1.7805907292424086e-05, "loss": 0.581, "step": 11531 }, { "epoch": 1.8825762213787192, "grad_norm": 2.213008165359497, "learning_rate": 1.7805506535790478e-05, "loss": 0.5201, "step": 11532 }, { "epoch": 1.8827394800212236, "grad_norm": 2.6417019367218018, "learning_rate": 1.7805105747071378e-05, "loss": 0.7167, "step": 11533 }, { "epoch": 1.882902738663728, "grad_norm": 2.660675048828125, "learning_rate": 1.7804704926268434e-05, "loss": 0.6736, "step": 11534 }, { "epoch": 1.8830659973062325, "grad_norm": 2.6787021160125732, "learning_rate": 1.7804304073383298e-05, "loss": 0.6912, "step": 11535 }, { "epoch": 1.883229255948737, "grad_norm": 2.433321475982666, "learning_rate": 1.7803903188417616e-05, "loss": 0.654, "step": 11536 }, { "epoch": 1.8833925145912411, "grad_norm": 2.441941022872925, "learning_rate": 1.780350227137303e-05, "loss": 0.6831, "step": 11537 }, { "epoch": 1.8835557732337456, "grad_norm": 2.409844160079956, "learning_rate": 1.78031013222512e-05, "loss": 0.6066, "step": 11538 }, { "epoch": 1.8837190318762498, "grad_norm": 2.550246238708496, "learning_rate": 1.780270034105376e-05, "loss": 0.6912, "step": 11539 }, { "epoch": 1.8838822905187542, "grad_norm": 2.778653144836426, "learning_rate": 1.780229932778237e-05, "loss": 0.6694, "step": 11540 }, { "epoch": 1.8840455491612587, "grad_norm": 2.4149651527404785, "learning_rate": 1.7801898282438668e-05, "loss": 0.6894, "step": 11541 }, { "epoch": 1.884208807803763, "grad_norm": 2.41375732421875, "learning_rate": 1.7801497205024315e-05, "loss": 0.6005, "step": 11542 }, { "epoch": 1.8843720664462675, "grad_norm": 2.7683935165405273, "learning_rate": 1.780109609554095e-05, "loss": 0.6264, "step": 11543 }, { "epoch": 1.884535325088772, "grad_norm": 2.2773895263671875, "learning_rate": 1.780069495399022e-05, "loss": 0.5948, "step": 11544 }, { "epoch": 1.8846985837312764, "grad_norm": 2.4905471801757812, "learning_rate": 1.7800293780373782e-05, "loss": 0.5389, "step": 11545 }, { "epoch": 1.8848618423737806, "grad_norm": 2.4340980052948, "learning_rate": 1.779989257469328e-05, "loss": 0.6496, "step": 11546 }, { "epoch": 1.885025101016285, "grad_norm": 2.306299924850464, "learning_rate": 1.779949133695037e-05, "loss": 0.5264, "step": 11547 }, { "epoch": 1.8851883596587893, "grad_norm": 2.6476902961730957, "learning_rate": 1.7799090067146693e-05, "loss": 0.6042, "step": 11548 }, { "epoch": 1.8853516183012937, "grad_norm": 1.9309256076812744, "learning_rate": 1.77986887652839e-05, "loss": 0.4843, "step": 11549 }, { "epoch": 1.8855148769437982, "grad_norm": 2.595625877380371, "learning_rate": 1.7798287431363644e-05, "loss": 0.6867, "step": 11550 }, { "epoch": 1.8856781355863026, "grad_norm": 2.518697500228882, "learning_rate": 1.7797886065387575e-05, "loss": 0.6458, "step": 11551 }, { "epoch": 1.885841394228807, "grad_norm": 2.670013189315796, "learning_rate": 1.779748466735734e-05, "loss": 0.7572, "step": 11552 }, { "epoch": 1.8860046528713115, "grad_norm": 2.9732751846313477, "learning_rate": 1.7797083237274586e-05, "loss": 0.8117, "step": 11553 }, { "epoch": 1.886167911513816, "grad_norm": 2.305497407913208, "learning_rate": 1.7796681775140968e-05, "loss": 0.5834, "step": 11554 }, { "epoch": 1.8863311701563201, "grad_norm": 2.5515549182891846, "learning_rate": 1.7796280280958135e-05, "loss": 0.649, "step": 11555 }, { "epoch": 1.8864944287988246, "grad_norm": 3.0175137519836426, "learning_rate": 1.7795878754727742e-05, "loss": 0.8427, "step": 11556 }, { "epoch": 1.8866576874413288, "grad_norm": 2.7554054260253906, "learning_rate": 1.7795477196451433e-05, "loss": 0.6742, "step": 11557 }, { "epoch": 1.8868209460838332, "grad_norm": 2.40444016456604, "learning_rate": 1.7795075606130858e-05, "loss": 0.6535, "step": 11558 }, { "epoch": 1.8869842047263377, "grad_norm": 1.9314090013504028, "learning_rate": 1.779467398376767e-05, "loss": 0.5536, "step": 11559 }, { "epoch": 1.887147463368842, "grad_norm": 2.7242190837860107, "learning_rate": 1.7794272329363527e-05, "loss": 0.6827, "step": 11560 }, { "epoch": 1.8873107220113465, "grad_norm": 3.3070781230926514, "learning_rate": 1.7793870642920068e-05, "loss": 0.7382, "step": 11561 }, { "epoch": 1.887473980653851, "grad_norm": 2.5408496856689453, "learning_rate": 1.7793468924438952e-05, "loss": 0.6616, "step": 11562 }, { "epoch": 1.8876372392963554, "grad_norm": 2.479846477508545, "learning_rate": 1.7793067173921828e-05, "loss": 0.6319, "step": 11563 }, { "epoch": 1.8878004979388596, "grad_norm": 2.591151475906372, "learning_rate": 1.7792665391370344e-05, "loss": 0.5313, "step": 11564 }, { "epoch": 1.887963756581364, "grad_norm": 2.5383386611938477, "learning_rate": 1.7792263576786158e-05, "loss": 0.6353, "step": 11565 }, { "epoch": 1.8881270152238683, "grad_norm": 2.0678837299346924, "learning_rate": 1.779186173017092e-05, "loss": 0.4916, "step": 11566 }, { "epoch": 1.8882902738663727, "grad_norm": 2.8374826908111572, "learning_rate": 1.779145985152628e-05, "loss": 0.5689, "step": 11567 }, { "epoch": 1.8884535325088772, "grad_norm": 3.0651607513427734, "learning_rate": 1.779105794085389e-05, "loss": 0.8079, "step": 11568 }, { "epoch": 1.8886167911513816, "grad_norm": 2.505903720855713, "learning_rate": 1.77906559981554e-05, "loss": 0.5957, "step": 11569 }, { "epoch": 1.888780049793886, "grad_norm": 2.759746789932251, "learning_rate": 1.7790254023432467e-05, "loss": 0.6035, "step": 11570 }, { "epoch": 1.8889433084363905, "grad_norm": 2.4866092205047607, "learning_rate": 1.7789852016686745e-05, "loss": 0.6067, "step": 11571 }, { "epoch": 1.889106567078895, "grad_norm": 3.1418001651763916, "learning_rate": 1.7789449977919873e-05, "loss": 0.6757, "step": 11572 }, { "epoch": 1.8892698257213991, "grad_norm": 3.5998003482818604, "learning_rate": 1.778904790713352e-05, "loss": 0.8251, "step": 11573 }, { "epoch": 1.8894330843639036, "grad_norm": 2.601787567138672, "learning_rate": 1.7788645804329335e-05, "loss": 0.6246, "step": 11574 }, { "epoch": 1.8895963430064078, "grad_norm": 2.1882500648498535, "learning_rate": 1.778824366950896e-05, "loss": 0.5719, "step": 11575 }, { "epoch": 1.8897596016489122, "grad_norm": 2.6286895275115967, "learning_rate": 1.7787841502674066e-05, "loss": 0.7157, "step": 11576 }, { "epoch": 1.8899228602914167, "grad_norm": 2.810940742492676, "learning_rate": 1.778743930382629e-05, "loss": 0.6775, "step": 11577 }, { "epoch": 1.890086118933921, "grad_norm": 2.8857898712158203, "learning_rate": 1.778703707296729e-05, "loss": 0.6883, "step": 11578 }, { "epoch": 1.8902493775764255, "grad_norm": 2.706348419189453, "learning_rate": 1.7786634810098726e-05, "loss": 0.7618, "step": 11579 }, { "epoch": 1.89041263621893, "grad_norm": 3.043564796447754, "learning_rate": 1.7786232515222244e-05, "loss": 0.7026, "step": 11580 }, { "epoch": 1.8905758948614342, "grad_norm": 2.446991205215454, "learning_rate": 1.7785830188339497e-05, "loss": 0.5733, "step": 11581 }, { "epoch": 1.8907391535039386, "grad_norm": 2.3722760677337646, "learning_rate": 1.778542782945215e-05, "loss": 0.7146, "step": 11582 }, { "epoch": 1.890902412146443, "grad_norm": 2.5758886337280273, "learning_rate": 1.7785025438561844e-05, "loss": 0.6475, "step": 11583 }, { "epoch": 1.8910656707889473, "grad_norm": 2.8184125423431396, "learning_rate": 1.7784623015670237e-05, "loss": 0.6145, "step": 11584 }, { "epoch": 1.8912289294314517, "grad_norm": 2.464829444885254, "learning_rate": 1.7784220560778986e-05, "loss": 0.6559, "step": 11585 }, { "epoch": 1.8913921880739561, "grad_norm": 3.0659356117248535, "learning_rate": 1.778381807388974e-05, "loss": 0.7496, "step": 11586 }, { "epoch": 1.8915554467164606, "grad_norm": 2.428579807281494, "learning_rate": 1.7783415555004164e-05, "loss": 0.6114, "step": 11587 }, { "epoch": 1.891718705358965, "grad_norm": 2.8998594284057617, "learning_rate": 1.7783013004123896e-05, "loss": 0.7279, "step": 11588 }, { "epoch": 1.8918819640014695, "grad_norm": 2.498514413833618, "learning_rate": 1.778261042125061e-05, "loss": 0.5991, "step": 11589 }, { "epoch": 1.8920452226439737, "grad_norm": 2.7467234134674072, "learning_rate": 1.7782207806385943e-05, "loss": 0.7709, "step": 11590 }, { "epoch": 1.8922084812864781, "grad_norm": 2.9729583263397217, "learning_rate": 1.778180515953156e-05, "loss": 0.7721, "step": 11591 }, { "epoch": 1.8923717399289823, "grad_norm": 2.7851033210754395, "learning_rate": 1.778140248068912e-05, "loss": 0.7109, "step": 11592 }, { "epoch": 1.8925349985714868, "grad_norm": 2.8217668533325195, "learning_rate": 1.778099976986027e-05, "loss": 0.753, "step": 11593 }, { "epoch": 1.8926982572139912, "grad_norm": 2.6858837604522705, "learning_rate": 1.7780597027046666e-05, "loss": 0.7176, "step": 11594 }, { "epoch": 1.8928615158564956, "grad_norm": 3.2064576148986816, "learning_rate": 1.7780194252249966e-05, "loss": 0.6938, "step": 11595 }, { "epoch": 1.893024774499, "grad_norm": 2.1108782291412354, "learning_rate": 1.777979144547183e-05, "loss": 0.5496, "step": 11596 }, { "epoch": 1.8931880331415045, "grad_norm": 3.0134341716766357, "learning_rate": 1.77793886067139e-05, "loss": 0.8118, "step": 11597 }, { "epoch": 1.893351291784009, "grad_norm": 3.123913288116455, "learning_rate": 1.777898573597785e-05, "loss": 0.7757, "step": 11598 }, { "epoch": 1.8935145504265132, "grad_norm": 2.442246198654175, "learning_rate": 1.7778582833265318e-05, "loss": 0.6287, "step": 11599 }, { "epoch": 1.8936778090690176, "grad_norm": 2.440213203430176, "learning_rate": 1.7778179898577973e-05, "loss": 0.6655, "step": 11600 }, { "epoch": 1.8938410677115218, "grad_norm": 2.3408455848693848, "learning_rate": 1.7777776931917472e-05, "loss": 0.5556, "step": 11601 }, { "epoch": 1.8940043263540263, "grad_norm": 2.77349853515625, "learning_rate": 1.777737393328546e-05, "loss": 0.7524, "step": 11602 }, { "epoch": 1.8941675849965307, "grad_norm": 2.665858507156372, "learning_rate": 1.7776970902683605e-05, "loss": 0.6975, "step": 11603 }, { "epoch": 1.8943308436390351, "grad_norm": 3.0643749237060547, "learning_rate": 1.777656784011356e-05, "loss": 0.7136, "step": 11604 }, { "epoch": 1.8944941022815396, "grad_norm": 2.6848959922790527, "learning_rate": 1.7776164745576976e-05, "loss": 0.6416, "step": 11605 }, { "epoch": 1.894657360924044, "grad_norm": 2.794130563735962, "learning_rate": 1.7775761619075517e-05, "loss": 0.7056, "step": 11606 }, { "epoch": 1.8948206195665485, "grad_norm": 3.122002601623535, "learning_rate": 1.777535846061084e-05, "loss": 0.7631, "step": 11607 }, { "epoch": 1.8949838782090527, "grad_norm": 2.637021064758301, "learning_rate": 1.77749552701846e-05, "loss": 0.5993, "step": 11608 }, { "epoch": 1.8951471368515571, "grad_norm": 2.769122362136841, "learning_rate": 1.7774552047798455e-05, "loss": 0.7835, "step": 11609 }, { "epoch": 1.8953103954940613, "grad_norm": 2.7894678115844727, "learning_rate": 1.777414879345406e-05, "loss": 0.7396, "step": 11610 }, { "epoch": 1.8954736541365658, "grad_norm": 2.6384990215301514, "learning_rate": 1.777374550715307e-05, "loss": 0.709, "step": 11611 }, { "epoch": 1.8956369127790702, "grad_norm": 2.7890067100524902, "learning_rate": 1.7773342188897156e-05, "loss": 0.7848, "step": 11612 }, { "epoch": 1.8958001714215746, "grad_norm": 2.7404658794403076, "learning_rate": 1.7772938838687966e-05, "loss": 0.6367, "step": 11613 }, { "epoch": 1.895963430064079, "grad_norm": 2.8833203315734863, "learning_rate": 1.777253545652716e-05, "loss": 0.7684, "step": 11614 }, { "epoch": 1.8961266887065835, "grad_norm": 2.6108200550079346, "learning_rate": 1.777213204241639e-05, "loss": 0.6558, "step": 11615 }, { "epoch": 1.896289947349088, "grad_norm": 2.4681806564331055, "learning_rate": 1.7771728596357326e-05, "loss": 0.605, "step": 11616 }, { "epoch": 1.8964532059915922, "grad_norm": 2.878981351852417, "learning_rate": 1.7771325118351616e-05, "loss": 0.6766, "step": 11617 }, { "epoch": 1.8966164646340966, "grad_norm": 3.0033791065216064, "learning_rate": 1.7770921608400927e-05, "loss": 0.7872, "step": 11618 }, { "epoch": 1.8967797232766008, "grad_norm": 2.566655397415161, "learning_rate": 1.777051806650691e-05, "loss": 0.7087, "step": 11619 }, { "epoch": 1.8969429819191053, "grad_norm": 2.6613616943359375, "learning_rate": 1.7770114492671228e-05, "loss": 0.7384, "step": 11620 }, { "epoch": 1.8971062405616097, "grad_norm": 2.424229145050049, "learning_rate": 1.776971088689554e-05, "loss": 0.6337, "step": 11621 }, { "epoch": 1.8972694992041141, "grad_norm": 3.2994887828826904, "learning_rate": 1.7769307249181503e-05, "loss": 0.933, "step": 11622 }, { "epoch": 1.8974327578466186, "grad_norm": 2.7463366985321045, "learning_rate": 1.7768903579530778e-05, "loss": 0.5854, "step": 11623 }, { "epoch": 1.897596016489123, "grad_norm": 2.54933500289917, "learning_rate": 1.7768499877945023e-05, "loss": 0.5829, "step": 11624 }, { "epoch": 1.8977592751316272, "grad_norm": 2.5627012252807617, "learning_rate": 1.7768096144425903e-05, "loss": 0.7167, "step": 11625 }, { "epoch": 1.8979225337741317, "grad_norm": 2.4189233779907227, "learning_rate": 1.776769237897507e-05, "loss": 0.5811, "step": 11626 }, { "epoch": 1.898085792416636, "grad_norm": 2.3697078227996826, "learning_rate": 1.7767288581594183e-05, "loss": 0.6069, "step": 11627 }, { "epoch": 1.8982490510591403, "grad_norm": 2.622650623321533, "learning_rate": 1.776688475228491e-05, "loss": 0.6408, "step": 11628 }, { "epoch": 1.8984123097016448, "grad_norm": 2.295779228210449, "learning_rate": 1.7766480891048905e-05, "loss": 0.5876, "step": 11629 }, { "epoch": 1.8985755683441492, "grad_norm": 2.479215145111084, "learning_rate": 1.776607699788783e-05, "loss": 0.6514, "step": 11630 }, { "epoch": 1.8987388269866536, "grad_norm": 2.4367589950561523, "learning_rate": 1.7765673072803347e-05, "loss": 0.6226, "step": 11631 }, { "epoch": 1.898902085629158, "grad_norm": 3.1172080039978027, "learning_rate": 1.776526911579711e-05, "loss": 0.7433, "step": 11632 }, { "epoch": 1.8990653442716625, "grad_norm": 2.26238751411438, "learning_rate": 1.7764865126870788e-05, "loss": 0.54, "step": 11633 }, { "epoch": 1.8992286029141667, "grad_norm": 2.6572251319885254, "learning_rate": 1.7764461106026037e-05, "loss": 0.6619, "step": 11634 }, { "epoch": 1.8993918615566712, "grad_norm": 2.6051149368286133, "learning_rate": 1.7764057053264516e-05, "loss": 0.67, "step": 11635 }, { "epoch": 1.8995551201991754, "grad_norm": 2.4776132106781006, "learning_rate": 1.7763652968587887e-05, "loss": 0.671, "step": 11636 }, { "epoch": 1.8997183788416798, "grad_norm": 2.394479990005493, "learning_rate": 1.7763248851997815e-05, "loss": 0.6414, "step": 11637 }, { "epoch": 1.8998816374841843, "grad_norm": 2.987867593765259, "learning_rate": 1.7762844703495956e-05, "loss": 0.6724, "step": 11638 }, { "epoch": 1.9000448961266887, "grad_norm": 2.7362895011901855, "learning_rate": 1.7762440523083974e-05, "loss": 0.6757, "step": 11639 }, { "epoch": 1.9002081547691931, "grad_norm": 2.7432355880737305, "learning_rate": 1.7762036310763533e-05, "loss": 0.7621, "step": 11640 }, { "epoch": 1.9003714134116976, "grad_norm": 2.6565330028533936, "learning_rate": 1.7761632066536288e-05, "loss": 0.6958, "step": 11641 }, { "epoch": 1.900534672054202, "grad_norm": 2.961188793182373, "learning_rate": 1.7761227790403905e-05, "loss": 0.6927, "step": 11642 }, { "epoch": 1.9006979306967062, "grad_norm": 2.2586584091186523, "learning_rate": 1.7760823482368046e-05, "loss": 0.6011, "step": 11643 }, { "epoch": 1.9008611893392107, "grad_norm": 2.4921441078186035, "learning_rate": 1.776041914243037e-05, "loss": 0.6318, "step": 11644 }, { "epoch": 1.9010244479817149, "grad_norm": 2.833909034729004, "learning_rate": 1.7760014770592545e-05, "loss": 0.7133, "step": 11645 }, { "epoch": 1.9011877066242193, "grad_norm": 2.761728525161743, "learning_rate": 1.7759610366856227e-05, "loss": 0.6521, "step": 11646 }, { "epoch": 1.9013509652667238, "grad_norm": 2.779428482055664, "learning_rate": 1.7759205931223084e-05, "loss": 0.6937, "step": 11647 }, { "epoch": 1.9015142239092282, "grad_norm": 2.6379237174987793, "learning_rate": 1.7758801463694773e-05, "loss": 0.6429, "step": 11648 }, { "epoch": 1.9016774825517326, "grad_norm": 2.15179181098938, "learning_rate": 1.7758396964272958e-05, "loss": 0.5688, "step": 11649 }, { "epoch": 1.901840741194237, "grad_norm": 2.9660470485687256, "learning_rate": 1.7757992432959302e-05, "loss": 0.6796, "step": 11650 }, { "epoch": 1.9020039998367415, "grad_norm": 3.0938618183135986, "learning_rate": 1.7757587869755472e-05, "loss": 0.723, "step": 11651 }, { "epoch": 1.9021672584792457, "grad_norm": 2.164005756378174, "learning_rate": 1.7757183274663122e-05, "loss": 0.478, "step": 11652 }, { "epoch": 1.9023305171217502, "grad_norm": 2.647274971008301, "learning_rate": 1.7756778647683925e-05, "loss": 0.6731, "step": 11653 }, { "epoch": 1.9024937757642544, "grad_norm": 2.750781297683716, "learning_rate": 1.7756373988819538e-05, "loss": 0.6688, "step": 11654 }, { "epoch": 1.9026570344067588, "grad_norm": 2.5923991203308105, "learning_rate": 1.775596929807163e-05, "loss": 0.6427, "step": 11655 }, { "epoch": 1.9028202930492633, "grad_norm": 3.30178165435791, "learning_rate": 1.7755564575441856e-05, "loss": 0.8574, "step": 11656 }, { "epoch": 1.9029835516917677, "grad_norm": 2.5148606300354004, "learning_rate": 1.7755159820931888e-05, "loss": 0.6526, "step": 11657 }, { "epoch": 1.9031468103342721, "grad_norm": 2.7653772830963135, "learning_rate": 1.775475503454338e-05, "loss": 0.7391, "step": 11658 }, { "epoch": 1.9033100689767766, "grad_norm": 2.4752705097198486, "learning_rate": 1.775435021627801e-05, "loss": 0.5847, "step": 11659 }, { "epoch": 1.903473327619281, "grad_norm": 2.7498114109039307, "learning_rate": 1.7753945366137428e-05, "loss": 0.6114, "step": 11660 }, { "epoch": 1.9036365862617852, "grad_norm": 2.858182668685913, "learning_rate": 1.775354048412331e-05, "loss": 0.7538, "step": 11661 }, { "epoch": 1.9037998449042897, "grad_norm": 2.461416482925415, "learning_rate": 1.775313557023731e-05, "loss": 0.6199, "step": 11662 }, { "epoch": 1.9039631035467939, "grad_norm": 2.6188275814056396, "learning_rate": 1.7752730624481097e-05, "loss": 0.7735, "step": 11663 }, { "epoch": 1.9041263621892983, "grad_norm": 2.543379306793213, "learning_rate": 1.7752325646856342e-05, "loss": 0.6465, "step": 11664 }, { "epoch": 1.9042896208318028, "grad_norm": 2.883223056793213, "learning_rate": 1.77519206373647e-05, "loss": 0.6829, "step": 11665 }, { "epoch": 1.9044528794743072, "grad_norm": 3.0378711223602295, "learning_rate": 1.7751515596007837e-05, "loss": 0.6789, "step": 11666 }, { "epoch": 1.9046161381168116, "grad_norm": 3.252854108810425, "learning_rate": 1.775111052278742e-05, "loss": 0.8272, "step": 11667 }, { "epoch": 1.904779396759316, "grad_norm": 2.498116970062256, "learning_rate": 1.775070541770512e-05, "loss": 0.6551, "step": 11668 }, { "epoch": 1.9049426554018203, "grad_norm": 2.7907466888427734, "learning_rate": 1.7750300280762594e-05, "loss": 0.6252, "step": 11669 }, { "epoch": 1.9051059140443247, "grad_norm": 2.6657216548919678, "learning_rate": 1.7749895111961507e-05, "loss": 0.6731, "step": 11670 }, { "epoch": 1.9052691726868292, "grad_norm": 2.671912908554077, "learning_rate": 1.774948991130353e-05, "loss": 0.662, "step": 11671 }, { "epoch": 1.9054324313293334, "grad_norm": 2.83267879486084, "learning_rate": 1.7749084678790326e-05, "loss": 0.7251, "step": 11672 }, { "epoch": 1.9055956899718378, "grad_norm": 3.0852386951446533, "learning_rate": 1.7748679414423562e-05, "loss": 0.6715, "step": 11673 }, { "epoch": 1.9057589486143423, "grad_norm": 2.2097702026367188, "learning_rate": 1.77482741182049e-05, "loss": 0.5913, "step": 11674 }, { "epoch": 1.9059222072568467, "grad_norm": 3.0608928203582764, "learning_rate": 1.7747868790136012e-05, "loss": 0.9085, "step": 11675 }, { "epoch": 1.9060854658993511, "grad_norm": 2.742319345474243, "learning_rate": 1.774746343021856e-05, "loss": 0.6138, "step": 11676 }, { "epoch": 1.9062487245418556, "grad_norm": 2.7918245792388916, "learning_rate": 1.774705803845421e-05, "loss": 0.7404, "step": 11677 }, { "epoch": 1.9064119831843598, "grad_norm": 2.850397825241089, "learning_rate": 1.7746652614844627e-05, "loss": 0.7237, "step": 11678 }, { "epoch": 1.9065752418268642, "grad_norm": 2.4478566646575928, "learning_rate": 1.7746247159391487e-05, "loss": 0.654, "step": 11679 }, { "epoch": 1.9067385004693684, "grad_norm": 2.988851547241211, "learning_rate": 1.7745841672096443e-05, "loss": 0.7607, "step": 11680 }, { "epoch": 1.9069017591118729, "grad_norm": 2.1442341804504395, "learning_rate": 1.7745436152961177e-05, "loss": 0.6769, "step": 11681 }, { "epoch": 1.9070650177543773, "grad_norm": 2.448863983154297, "learning_rate": 1.7745030601987338e-05, "loss": 0.6432, "step": 11682 }, { "epoch": 1.9072282763968817, "grad_norm": 2.3591275215148926, "learning_rate": 1.774462501917661e-05, "loss": 0.6441, "step": 11683 }, { "epoch": 1.9073915350393862, "grad_norm": 2.2502591609954834, "learning_rate": 1.774421940453065e-05, "loss": 0.6196, "step": 11684 }, { "epoch": 1.9075547936818906, "grad_norm": 2.5738611221313477, "learning_rate": 1.7743813758051126e-05, "loss": 0.6986, "step": 11685 }, { "epoch": 1.907718052324395, "grad_norm": 2.4641544818878174, "learning_rate": 1.774340807973971e-05, "loss": 0.6804, "step": 11686 }, { "epoch": 1.9078813109668993, "grad_norm": 2.102938413619995, "learning_rate": 1.7743002369598068e-05, "loss": 0.5207, "step": 11687 }, { "epoch": 1.9080445696094037, "grad_norm": 2.5055949687957764, "learning_rate": 1.7742596627627867e-05, "loss": 0.5025, "step": 11688 }, { "epoch": 1.908207828251908, "grad_norm": 2.6348748207092285, "learning_rate": 1.7742190853830773e-05, "loss": 0.7127, "step": 11689 }, { "epoch": 1.9083710868944124, "grad_norm": 2.6973636150360107, "learning_rate": 1.774178504820846e-05, "loss": 0.6894, "step": 11690 }, { "epoch": 1.9085343455369168, "grad_norm": 2.6037533283233643, "learning_rate": 1.7741379210762585e-05, "loss": 0.6365, "step": 11691 }, { "epoch": 1.9086976041794212, "grad_norm": 2.45979380607605, "learning_rate": 1.7740973341494825e-05, "loss": 0.6238, "step": 11692 }, { "epoch": 1.9088608628219257, "grad_norm": 2.5668795108795166, "learning_rate": 1.774056744040685e-05, "loss": 0.6717, "step": 11693 }, { "epoch": 1.9090241214644301, "grad_norm": 2.986609697341919, "learning_rate": 1.7740161507500326e-05, "loss": 0.7548, "step": 11694 }, { "epoch": 1.9091873801069346, "grad_norm": 2.7301456928253174, "learning_rate": 1.7739755542776916e-05, "loss": 0.6851, "step": 11695 }, { "epoch": 1.9093506387494388, "grad_norm": 2.956474781036377, "learning_rate": 1.7739349546238295e-05, "loss": 0.8034, "step": 11696 }, { "epoch": 1.9095138973919432, "grad_norm": 2.7580482959747314, "learning_rate": 1.7738943517886134e-05, "loss": 0.797, "step": 11697 }, { "epoch": 1.9096771560344474, "grad_norm": 3.0324084758758545, "learning_rate": 1.7738537457722094e-05, "loss": 0.654, "step": 11698 }, { "epoch": 1.9098404146769519, "grad_norm": 2.709319591522217, "learning_rate": 1.773813136574785e-05, "loss": 0.7648, "step": 11699 }, { "epoch": 1.9100036733194563, "grad_norm": 2.5823347568511963, "learning_rate": 1.7737725241965068e-05, "loss": 0.6848, "step": 11700 }, { "epoch": 1.9101669319619607, "grad_norm": 2.785719633102417, "learning_rate": 1.773731908637542e-05, "loss": 0.6669, "step": 11701 }, { "epoch": 1.9103301906044652, "grad_norm": 2.3604910373687744, "learning_rate": 1.7736912898980576e-05, "loss": 0.5825, "step": 11702 }, { "epoch": 1.9104934492469696, "grad_norm": 2.586315393447876, "learning_rate": 1.7736506679782208e-05, "loss": 0.5988, "step": 11703 }, { "epoch": 1.910656707889474, "grad_norm": 2.505979299545288, "learning_rate": 1.7736100428781977e-05, "loss": 0.5342, "step": 11704 }, { "epoch": 1.9108199665319783, "grad_norm": 2.7032103538513184, "learning_rate": 1.773569414598156e-05, "loss": 0.658, "step": 11705 }, { "epoch": 1.9109832251744827, "grad_norm": 2.395533323287964, "learning_rate": 1.7735287831382625e-05, "loss": 0.5555, "step": 11706 }, { "epoch": 1.911146483816987, "grad_norm": 2.089360237121582, "learning_rate": 1.773488148498684e-05, "loss": 0.5957, "step": 11707 }, { "epoch": 1.9113097424594914, "grad_norm": 3.057466983795166, "learning_rate": 1.7734475106795882e-05, "loss": 0.8684, "step": 11708 }, { "epoch": 1.9114730011019958, "grad_norm": 2.7352359294891357, "learning_rate": 1.7734068696811416e-05, "loss": 0.7781, "step": 11709 }, { "epoch": 1.9116362597445002, "grad_norm": 2.4725918769836426, "learning_rate": 1.7733662255035114e-05, "loss": 0.59, "step": 11710 }, { "epoch": 1.9117995183870047, "grad_norm": 2.298213005065918, "learning_rate": 1.7733255781468645e-05, "loss": 0.582, "step": 11711 }, { "epoch": 1.9119627770295091, "grad_norm": 2.8507819175720215, "learning_rate": 1.773284927611368e-05, "loss": 0.8231, "step": 11712 }, { "epoch": 1.9121260356720133, "grad_norm": 2.712907314300537, "learning_rate": 1.7732442738971897e-05, "loss": 0.6838, "step": 11713 }, { "epoch": 1.9122892943145178, "grad_norm": 2.48366117477417, "learning_rate": 1.773203617004496e-05, "loss": 0.5976, "step": 11714 }, { "epoch": 1.9124525529570222, "grad_norm": 2.4604244232177734, "learning_rate": 1.7731629569334538e-05, "loss": 0.6522, "step": 11715 }, { "epoch": 1.9126158115995264, "grad_norm": 3.1113646030426025, "learning_rate": 1.7731222936842313e-05, "loss": 0.8956, "step": 11716 }, { "epoch": 1.9127790702420309, "grad_norm": 2.79837965965271, "learning_rate": 1.7730816272569944e-05, "loss": 0.6873, "step": 11717 }, { "epoch": 1.9129423288845353, "grad_norm": 3.1334431171417236, "learning_rate": 1.773040957651911e-05, "loss": 1.6749, "step": 11718 }, { "epoch": 1.9131055875270397, "grad_norm": 2.43703556060791, "learning_rate": 1.7730002848691482e-05, "loss": 0.6344, "step": 11719 }, { "epoch": 1.9132688461695442, "grad_norm": 2.6618969440460205, "learning_rate": 1.772959608908873e-05, "loss": 0.5764, "step": 11720 }, { "epoch": 1.9134321048120486, "grad_norm": 2.7673916816711426, "learning_rate": 1.7729189297712528e-05, "loss": 0.6471, "step": 11721 }, { "epoch": 1.9135953634545528, "grad_norm": 2.9044065475463867, "learning_rate": 1.7728782474564547e-05, "loss": 0.772, "step": 11722 }, { "epoch": 1.9137586220970573, "grad_norm": 2.2647550106048584, "learning_rate": 1.7728375619646457e-05, "loss": 0.5895, "step": 11723 }, { "epoch": 1.9139218807395615, "grad_norm": 2.8238117694854736, "learning_rate": 1.7727968732959937e-05, "loss": 0.8355, "step": 11724 }, { "epoch": 1.914085139382066, "grad_norm": 2.4911904335021973, "learning_rate": 1.7727561814506655e-05, "loss": 0.6323, "step": 11725 }, { "epoch": 1.9142483980245704, "grad_norm": 3.0365538597106934, "learning_rate": 1.7727154864288282e-05, "loss": 0.7575, "step": 11726 }, { "epoch": 1.9144116566670748, "grad_norm": 2.3678789138793945, "learning_rate": 1.7726747882306495e-05, "loss": 0.5241, "step": 11727 }, { "epoch": 1.9145749153095792, "grad_norm": 2.7609031200408936, "learning_rate": 1.7726340868562966e-05, "loss": 0.5806, "step": 11728 }, { "epoch": 1.9147381739520837, "grad_norm": 3.4768078327178955, "learning_rate": 1.7725933823059366e-05, "loss": 0.9064, "step": 11729 }, { "epoch": 1.9149014325945881, "grad_norm": 2.9625277519226074, "learning_rate": 1.7725526745797368e-05, "loss": 0.7724, "step": 11730 }, { "epoch": 1.9150646912370923, "grad_norm": 2.9861958026885986, "learning_rate": 1.7725119636778644e-05, "loss": 0.8963, "step": 11731 }, { "epoch": 1.9152279498795968, "grad_norm": 2.6730594635009766, "learning_rate": 1.7724712496004875e-05, "loss": 0.6486, "step": 11732 }, { "epoch": 1.915391208522101, "grad_norm": 2.8227667808532715, "learning_rate": 1.772430532347773e-05, "loss": 0.7598, "step": 11733 }, { "epoch": 1.9155544671646054, "grad_norm": 2.4964423179626465, "learning_rate": 1.772389811919888e-05, "loss": 0.562, "step": 11734 }, { "epoch": 1.9157177258071099, "grad_norm": 2.574455738067627, "learning_rate": 1.7723490883170002e-05, "loss": 0.7179, "step": 11735 }, { "epoch": 1.9158809844496143, "grad_norm": 2.5895678997039795, "learning_rate": 1.772308361539277e-05, "loss": 0.6772, "step": 11736 }, { "epoch": 1.9160442430921187, "grad_norm": 2.4512853622436523, "learning_rate": 1.7722676315868855e-05, "loss": 0.6654, "step": 11737 }, { "epoch": 1.9162075017346232, "grad_norm": 2.5652923583984375, "learning_rate": 1.7722268984599938e-05, "loss": 0.6822, "step": 11738 }, { "epoch": 1.9163707603771276, "grad_norm": 3.048042058944702, "learning_rate": 1.7721861621587685e-05, "loss": 0.8609, "step": 11739 }, { "epoch": 1.9165340190196318, "grad_norm": 2.5731418132781982, "learning_rate": 1.7721454226833777e-05, "loss": 0.6499, "step": 11740 }, { "epoch": 1.9166972776621363, "grad_norm": 3.3147923946380615, "learning_rate": 1.7721046800339884e-05, "loss": 0.8473, "step": 11741 }, { "epoch": 1.9168605363046405, "grad_norm": 2.730316162109375, "learning_rate": 1.7720639342107688e-05, "loss": 0.7515, "step": 11742 }, { "epoch": 1.917023794947145, "grad_norm": 2.8853907585144043, "learning_rate": 1.7720231852138853e-05, "loss": 0.7347, "step": 11743 }, { "epoch": 1.9171870535896494, "grad_norm": 2.640430212020874, "learning_rate": 1.7719824330435062e-05, "loss": 0.5865, "step": 11744 }, { "epoch": 1.9173503122321538, "grad_norm": 2.486452579498291, "learning_rate": 1.7719416776997992e-05, "loss": 0.5822, "step": 11745 }, { "epoch": 1.9175135708746582, "grad_norm": 2.3776681423187256, "learning_rate": 1.771900919182931e-05, "loss": 0.5986, "step": 11746 }, { "epoch": 1.9176768295171627, "grad_norm": 2.0374014377593994, "learning_rate": 1.7718601574930697e-05, "loss": 0.5815, "step": 11747 }, { "epoch": 1.9178400881596671, "grad_norm": 2.360504627227783, "learning_rate": 1.7718193926303823e-05, "loss": 0.5241, "step": 11748 }, { "epoch": 1.9180033468021713, "grad_norm": 2.6438629627227783, "learning_rate": 1.7717786245950375e-05, "loss": 0.6288, "step": 11749 }, { "epoch": 1.9181666054446758, "grad_norm": 2.564310312271118, "learning_rate": 1.771737853387202e-05, "loss": 0.6632, "step": 11750 }, { "epoch": 1.91832986408718, "grad_norm": 2.8016960620880127, "learning_rate": 1.7716970790070433e-05, "loss": 0.6761, "step": 11751 }, { "epoch": 1.9184931227296844, "grad_norm": 2.9820609092712402, "learning_rate": 1.7716563014547297e-05, "loss": 0.8741, "step": 11752 }, { "epoch": 1.9186563813721889, "grad_norm": 2.7623660564422607, "learning_rate": 1.7716155207304282e-05, "loss": 0.7269, "step": 11753 }, { "epoch": 1.9188196400146933, "grad_norm": 2.2317769527435303, "learning_rate": 1.7715747368343067e-05, "loss": 0.4854, "step": 11754 }, { "epoch": 1.9189828986571977, "grad_norm": 2.483624219894409, "learning_rate": 1.771533949766533e-05, "loss": 0.6351, "step": 11755 }, { "epoch": 1.9191461572997022, "grad_norm": 2.4510810375213623, "learning_rate": 1.7714931595272743e-05, "loss": 0.6508, "step": 11756 }, { "epoch": 1.9193094159422064, "grad_norm": 2.451406955718994, "learning_rate": 1.7714523661166984e-05, "loss": 0.6728, "step": 11757 }, { "epoch": 1.9194726745847108, "grad_norm": 3.249088764190674, "learning_rate": 1.771411569534973e-05, "loss": 0.8304, "step": 11758 }, { "epoch": 1.9196359332272153, "grad_norm": 2.4805519580841064, "learning_rate": 1.7713707697822664e-05, "loss": 0.6059, "step": 11759 }, { "epoch": 1.9197991918697195, "grad_norm": 3.3909010887145996, "learning_rate": 1.771329966858746e-05, "loss": 0.877, "step": 11760 }, { "epoch": 1.919962450512224, "grad_norm": 3.1396210193634033, "learning_rate": 1.7712891607645786e-05, "loss": 0.6738, "step": 11761 }, { "epoch": 1.9201257091547284, "grad_norm": 3.2069578170776367, "learning_rate": 1.7712483514999332e-05, "loss": 0.7077, "step": 11762 }, { "epoch": 1.9202889677972328, "grad_norm": 2.9240047931671143, "learning_rate": 1.771207539064977e-05, "loss": 0.8278, "step": 11763 }, { "epoch": 1.9204522264397372, "grad_norm": 2.9102394580841064, "learning_rate": 1.7711667234598777e-05, "loss": 0.6405, "step": 11764 }, { "epoch": 1.9206154850822417, "grad_norm": 3.306788682937622, "learning_rate": 1.7711259046848032e-05, "loss": 0.7628, "step": 11765 }, { "epoch": 1.9207787437247459, "grad_norm": 2.6731953620910645, "learning_rate": 1.7710850827399215e-05, "loss": 0.6598, "step": 11766 }, { "epoch": 1.9209420023672503, "grad_norm": 2.5026190280914307, "learning_rate": 1.7710442576253997e-05, "loss": 0.6026, "step": 11767 }, { "epoch": 1.9211052610097545, "grad_norm": 2.894418954849243, "learning_rate": 1.7710034293414064e-05, "loss": 0.7166, "step": 11768 }, { "epoch": 1.921268519652259, "grad_norm": 2.483402967453003, "learning_rate": 1.7709625978881092e-05, "loss": 0.6739, "step": 11769 }, { "epoch": 1.9214317782947634, "grad_norm": 2.453699827194214, "learning_rate": 1.7709217632656758e-05, "loss": 0.6753, "step": 11770 }, { "epoch": 1.9215950369372679, "grad_norm": 2.5453474521636963, "learning_rate": 1.7708809254742743e-05, "loss": 0.7208, "step": 11771 }, { "epoch": 1.9217582955797723, "grad_norm": 2.2754812240600586, "learning_rate": 1.770840084514072e-05, "loss": 0.5782, "step": 11772 }, { "epoch": 1.9219215542222767, "grad_norm": 2.721463918685913, "learning_rate": 1.7707992403852375e-05, "loss": 0.7092, "step": 11773 }, { "epoch": 1.9220848128647812, "grad_norm": 2.592320442199707, "learning_rate": 1.7707583930879383e-05, "loss": 0.5916, "step": 11774 }, { "epoch": 1.9222480715072854, "grad_norm": 2.5465409755706787, "learning_rate": 1.7707175426223422e-05, "loss": 0.6469, "step": 11775 }, { "epoch": 1.9224113301497898, "grad_norm": 2.330007791519165, "learning_rate": 1.7706766889886175e-05, "loss": 0.661, "step": 11776 }, { "epoch": 1.922574588792294, "grad_norm": 2.474586248397827, "learning_rate": 1.770635832186932e-05, "loss": 0.5997, "step": 11777 }, { "epoch": 1.9227378474347985, "grad_norm": 2.2437143325805664, "learning_rate": 1.7705949722174534e-05, "loss": 0.6149, "step": 11778 }, { "epoch": 1.922901106077303, "grad_norm": 2.761012315750122, "learning_rate": 1.77055410908035e-05, "loss": 0.7207, "step": 11779 }, { "epoch": 1.9230643647198074, "grad_norm": 2.6630971431732178, "learning_rate": 1.7705132427757895e-05, "loss": 0.6443, "step": 11780 }, { "epoch": 1.9232276233623118, "grad_norm": 2.171481132507324, "learning_rate": 1.77047237330394e-05, "loss": 0.5567, "step": 11781 }, { "epoch": 1.9233908820048162, "grad_norm": 2.4304120540618896, "learning_rate": 1.7704315006649693e-05, "loss": 0.6775, "step": 11782 }, { "epoch": 1.9235541406473207, "grad_norm": 2.637377977371216, "learning_rate": 1.770390624859046e-05, "loss": 0.6495, "step": 11783 }, { "epoch": 1.9237173992898249, "grad_norm": 2.372664213180542, "learning_rate": 1.7703497458863375e-05, "loss": 0.6644, "step": 11784 }, { "epoch": 1.9238806579323293, "grad_norm": 2.585465908050537, "learning_rate": 1.7703088637470122e-05, "loss": 0.6861, "step": 11785 }, { "epoch": 1.9240439165748335, "grad_norm": 3.272815704345703, "learning_rate": 1.770267978441238e-05, "loss": 0.7516, "step": 11786 }, { "epoch": 1.924207175217338, "grad_norm": 2.8091092109680176, "learning_rate": 1.7702270899691826e-05, "loss": 0.7505, "step": 11787 }, { "epoch": 1.9243704338598424, "grad_norm": 3.181842088699341, "learning_rate": 1.770186198331015e-05, "loss": 0.7968, "step": 11788 }, { "epoch": 1.9245336925023468, "grad_norm": 2.8110389709472656, "learning_rate": 1.7701453035269027e-05, "loss": 0.6716, "step": 11789 }, { "epoch": 1.9246969511448513, "grad_norm": 2.902290105819702, "learning_rate": 1.7701044055570134e-05, "loss": 0.6973, "step": 11790 }, { "epoch": 1.9248602097873557, "grad_norm": 2.9655439853668213, "learning_rate": 1.770063504421516e-05, "loss": 0.7851, "step": 11791 }, { "epoch": 1.9250234684298602, "grad_norm": 2.9825034141540527, "learning_rate": 1.7700226001205783e-05, "loss": 0.828, "step": 11792 }, { "epoch": 1.9251867270723644, "grad_norm": 2.819035053253174, "learning_rate": 1.7699816926543686e-05, "loss": 0.7189, "step": 11793 }, { "epoch": 1.9253499857148688, "grad_norm": 3.041175603866577, "learning_rate": 1.769940782023055e-05, "loss": 0.8047, "step": 11794 }, { "epoch": 1.925513244357373, "grad_norm": 2.4605143070220947, "learning_rate": 1.769899868226805e-05, "loss": 0.6162, "step": 11795 }, { "epoch": 1.9256765029998775, "grad_norm": 2.4763216972351074, "learning_rate": 1.769858951265788e-05, "loss": 0.5823, "step": 11796 }, { "epoch": 1.925839761642382, "grad_norm": 2.4932973384857178, "learning_rate": 1.769818031140171e-05, "loss": 0.6241, "step": 11797 }, { "epoch": 1.9260030202848863, "grad_norm": 2.9635002613067627, "learning_rate": 1.769777107850123e-05, "loss": 0.6688, "step": 11798 }, { "epoch": 1.9261662789273908, "grad_norm": 2.792771577835083, "learning_rate": 1.7697361813958118e-05, "loss": 0.6856, "step": 11799 }, { "epoch": 1.9263295375698952, "grad_norm": 2.8548238277435303, "learning_rate": 1.769695251777406e-05, "loss": 0.7155, "step": 11800 }, { "epoch": 1.9264927962123994, "grad_norm": 2.9471335411071777, "learning_rate": 1.7696543189950737e-05, "loss": 0.7608, "step": 11801 }, { "epoch": 1.9266560548549039, "grad_norm": 2.679034471511841, "learning_rate": 1.769613383048983e-05, "loss": 0.6821, "step": 11802 }, { "epoch": 1.9268193134974083, "grad_norm": 3.0369925498962402, "learning_rate": 1.7695724439393025e-05, "loss": 0.7976, "step": 11803 }, { "epoch": 1.9269825721399125, "grad_norm": 2.6359541416168213, "learning_rate": 1.7695315016662e-05, "loss": 0.6386, "step": 11804 }, { "epoch": 1.927145830782417, "grad_norm": 2.327054977416992, "learning_rate": 1.769490556229844e-05, "loss": 0.6438, "step": 11805 }, { "epoch": 1.9273090894249214, "grad_norm": 2.62280011177063, "learning_rate": 1.7694496076304032e-05, "loss": 0.6745, "step": 11806 }, { "epoch": 1.9274723480674258, "grad_norm": 2.911940097808838, "learning_rate": 1.769408655868045e-05, "loss": 0.6882, "step": 11807 }, { "epoch": 1.9276356067099303, "grad_norm": 2.4014697074890137, "learning_rate": 1.7693677009429388e-05, "loss": 0.6195, "step": 11808 }, { "epoch": 1.9277988653524347, "grad_norm": 3.0635507106781006, "learning_rate": 1.7693267428552527e-05, "loss": 0.7721, "step": 11809 }, { "epoch": 1.927962123994939, "grad_norm": 2.981771945953369, "learning_rate": 1.7692857816051545e-05, "loss": 0.7818, "step": 11810 }, { "epoch": 1.9281253826374434, "grad_norm": 3.174818754196167, "learning_rate": 1.769244817192813e-05, "loss": 0.8336, "step": 11811 }, { "epoch": 1.9282886412799478, "grad_norm": 2.6367456912994385, "learning_rate": 1.7692038496183965e-05, "loss": 0.6554, "step": 11812 }, { "epoch": 1.928451899922452, "grad_norm": 2.6157116889953613, "learning_rate": 1.7691628788820735e-05, "loss": 0.6258, "step": 11813 }, { "epoch": 1.9286151585649565, "grad_norm": 2.5741262435913086, "learning_rate": 1.7691219049840126e-05, "loss": 0.6611, "step": 11814 }, { "epoch": 1.928778417207461, "grad_norm": 2.9135732650756836, "learning_rate": 1.7690809279243813e-05, "loss": 0.7146, "step": 11815 }, { "epoch": 1.9289416758499653, "grad_norm": 2.6288671493530273, "learning_rate": 1.7690399477033492e-05, "loss": 0.6538, "step": 11816 }, { "epoch": 1.9291049344924698, "grad_norm": 2.3157808780670166, "learning_rate": 1.768998964321084e-05, "loss": 0.6111, "step": 11817 }, { "epoch": 1.9292681931349742, "grad_norm": 2.7079806327819824, "learning_rate": 1.7689579777777548e-05, "loss": 0.7237, "step": 11818 }, { "epoch": 1.9294314517774784, "grad_norm": 3.3746628761291504, "learning_rate": 1.7689169880735292e-05, "loss": 0.7686, "step": 11819 }, { "epoch": 1.9295947104199829, "grad_norm": 2.6168034076690674, "learning_rate": 1.7688759952085765e-05, "loss": 0.7175, "step": 11820 }, { "epoch": 1.929757969062487, "grad_norm": 2.6772634983062744, "learning_rate": 1.768834999183065e-05, "loss": 0.7098, "step": 11821 }, { "epoch": 1.9299212277049915, "grad_norm": 2.787949562072754, "learning_rate": 1.768793999997163e-05, "loss": 0.5306, "step": 11822 }, { "epoch": 1.930084486347496, "grad_norm": 2.7075791358947754, "learning_rate": 1.7687529976510392e-05, "loss": 0.6636, "step": 11823 }, { "epoch": 1.9302477449900004, "grad_norm": 2.52683162689209, "learning_rate": 1.768711992144862e-05, "loss": 0.6177, "step": 11824 }, { "epoch": 1.9304110036325048, "grad_norm": 3.043185234069824, "learning_rate": 1.7686709834788e-05, "loss": 0.6658, "step": 11825 }, { "epoch": 1.9305742622750093, "grad_norm": 2.6155526638031006, "learning_rate": 1.768629971653022e-05, "loss": 0.6331, "step": 11826 }, { "epoch": 1.9307375209175137, "grad_norm": 2.968890905380249, "learning_rate": 1.7685889566676964e-05, "loss": 0.7176, "step": 11827 }, { "epoch": 1.930900779560018, "grad_norm": 2.504303455352783, "learning_rate": 1.7685479385229918e-05, "loss": 0.58, "step": 11828 }, { "epoch": 1.9310640382025224, "grad_norm": 2.2493069171905518, "learning_rate": 1.7685069172190766e-05, "loss": 0.5252, "step": 11829 }, { "epoch": 1.9312272968450266, "grad_norm": 2.8819141387939453, "learning_rate": 1.7684658927561202e-05, "loss": 0.5623, "step": 11830 }, { "epoch": 1.931390555487531, "grad_norm": 2.860858917236328, "learning_rate": 1.7684248651342902e-05, "loss": 0.6888, "step": 11831 }, { "epoch": 1.9315538141300355, "grad_norm": 2.55509877204895, "learning_rate": 1.7683838343537558e-05, "loss": 0.6167, "step": 11832 }, { "epoch": 1.93171707277254, "grad_norm": 2.4031503200531006, "learning_rate": 1.7683428004146857e-05, "loss": 0.5423, "step": 11833 }, { "epoch": 1.9318803314150443, "grad_norm": 2.503476619720459, "learning_rate": 1.7683017633172483e-05, "loss": 0.6835, "step": 11834 }, { "epoch": 1.9320435900575488, "grad_norm": 2.7266435623168945, "learning_rate": 1.7682607230616126e-05, "loss": 0.6758, "step": 11835 }, { "epoch": 1.9322068487000532, "grad_norm": 2.4172983169555664, "learning_rate": 1.768219679647947e-05, "loss": 0.5481, "step": 11836 }, { "epoch": 1.9323701073425574, "grad_norm": 2.513385057449341, "learning_rate": 1.7681786330764207e-05, "loss": 0.5819, "step": 11837 }, { "epoch": 1.9325333659850619, "grad_norm": 2.1829464435577393, "learning_rate": 1.7681375833472022e-05, "loss": 0.661, "step": 11838 }, { "epoch": 1.932696624627566, "grad_norm": 2.7410614490509033, "learning_rate": 1.7680965304604594e-05, "loss": 0.7419, "step": 11839 }, { "epoch": 1.9328598832700705, "grad_norm": 2.7951784133911133, "learning_rate": 1.7680554744163624e-05, "loss": 0.6865, "step": 11840 }, { "epoch": 1.933023141912575, "grad_norm": 2.3450286388397217, "learning_rate": 1.7680144152150794e-05, "loss": 0.5623, "step": 11841 }, { "epoch": 1.9331864005550794, "grad_norm": 2.241151809692383, "learning_rate": 1.7679733528567786e-05, "loss": 0.5562, "step": 11842 }, { "epoch": 1.9333496591975838, "grad_norm": 2.6665778160095215, "learning_rate": 1.7679322873416298e-05, "loss": 0.6425, "step": 11843 }, { "epoch": 1.9335129178400883, "grad_norm": 3.0145821571350098, "learning_rate": 1.7678912186698012e-05, "loss": 0.6466, "step": 11844 }, { "epoch": 1.9336761764825927, "grad_norm": 2.789365768432617, "learning_rate": 1.767850146841462e-05, "loss": 0.6911, "step": 11845 }, { "epoch": 1.933839435125097, "grad_norm": 2.8197174072265625, "learning_rate": 1.7678090718567806e-05, "loss": 0.7227, "step": 11846 }, { "epoch": 1.9340026937676014, "grad_norm": 2.783963918685913, "learning_rate": 1.7677679937159256e-05, "loss": 0.6351, "step": 11847 }, { "epoch": 1.9341659524101056, "grad_norm": 3.0275676250457764, "learning_rate": 1.767726912419067e-05, "loss": 0.7694, "step": 11848 }, { "epoch": 1.93432921105261, "grad_norm": 3.174347400665283, "learning_rate": 1.767685827966372e-05, "loss": 0.7208, "step": 11849 }, { "epoch": 1.9344924696951145, "grad_norm": 3.099480152130127, "learning_rate": 1.7676447403580114e-05, "loss": 0.7721, "step": 11850 }, { "epoch": 1.934655728337619, "grad_norm": 3.0771102905273438, "learning_rate": 1.7676036495941528e-05, "loss": 0.5874, "step": 11851 }, { "epoch": 1.9348189869801233, "grad_norm": 2.70017409324646, "learning_rate": 1.7675625556749653e-05, "loss": 0.5846, "step": 11852 }, { "epoch": 1.9349822456226278, "grad_norm": 2.2240407466888428, "learning_rate": 1.767521458600618e-05, "loss": 0.5581, "step": 11853 }, { "epoch": 1.935145504265132, "grad_norm": 3.044700860977173, "learning_rate": 1.76748035837128e-05, "loss": 0.6272, "step": 11854 }, { "epoch": 1.9353087629076364, "grad_norm": 2.655392646789551, "learning_rate": 1.7674392549871197e-05, "loss": 0.6184, "step": 11855 }, { "epoch": 1.9354720215501409, "grad_norm": 2.984567642211914, "learning_rate": 1.7673981484483068e-05, "loss": 0.7327, "step": 11856 }, { "epoch": 1.935635280192645, "grad_norm": 2.536105155944824, "learning_rate": 1.7673570387550097e-05, "loss": 0.5687, "step": 11857 }, { "epoch": 1.9357985388351495, "grad_norm": 2.533585548400879, "learning_rate": 1.767315925907397e-05, "loss": 0.6443, "step": 11858 }, { "epoch": 1.935961797477654, "grad_norm": 2.5534539222717285, "learning_rate": 1.7672748099056392e-05, "loss": 0.7073, "step": 11859 }, { "epoch": 1.9361250561201584, "grad_norm": 2.8333728313446045, "learning_rate": 1.767233690749904e-05, "loss": 0.8652, "step": 11860 }, { "epoch": 1.9362883147626628, "grad_norm": 3.136460065841675, "learning_rate": 1.7671925684403605e-05, "loss": 0.6893, "step": 11861 }, { "epoch": 1.9364515734051673, "grad_norm": 2.5031092166900635, "learning_rate": 1.7671514429771785e-05, "loss": 0.5943, "step": 11862 }, { "epoch": 1.9366148320476715, "grad_norm": 3.080050230026245, "learning_rate": 1.7671103143605264e-05, "loss": 0.8206, "step": 11863 }, { "epoch": 1.936778090690176, "grad_norm": 2.633619546890259, "learning_rate": 1.7670691825905732e-05, "loss": 0.6091, "step": 11864 }, { "epoch": 1.9369413493326801, "grad_norm": 2.486271858215332, "learning_rate": 1.7670280476674887e-05, "loss": 0.5461, "step": 11865 }, { "epoch": 1.9371046079751846, "grad_norm": 2.187610149383545, "learning_rate": 1.7669869095914413e-05, "loss": 0.5151, "step": 11866 }, { "epoch": 1.937267866617689, "grad_norm": 2.452120542526245, "learning_rate": 1.7669457683626004e-05, "loss": 0.6069, "step": 11867 }, { "epoch": 1.9374311252601935, "grad_norm": 2.631207227706909, "learning_rate": 1.7669046239811347e-05, "loss": 0.6424, "step": 11868 }, { "epoch": 1.937594383902698, "grad_norm": 2.43172550201416, "learning_rate": 1.766863476447214e-05, "loss": 0.6847, "step": 11869 }, { "epoch": 1.9377576425452023, "grad_norm": 2.7188618183135986, "learning_rate": 1.7668223257610073e-05, "loss": 0.7003, "step": 11870 }, { "epoch": 1.9379209011877068, "grad_norm": 2.508470296859741, "learning_rate": 1.7667811719226833e-05, "loss": 0.6374, "step": 11871 }, { "epoch": 1.938084159830211, "grad_norm": 2.6684255599975586, "learning_rate": 1.7667400149324116e-05, "loss": 0.7943, "step": 11872 }, { "epoch": 1.9382474184727154, "grad_norm": 2.4235270023345947, "learning_rate": 1.7666988547903607e-05, "loss": 0.66, "step": 11873 }, { "epoch": 1.9384106771152196, "grad_norm": 2.4717257022857666, "learning_rate": 1.7666576914967008e-05, "loss": 0.5195, "step": 11874 }, { "epoch": 1.938573935757724, "grad_norm": 2.382760763168335, "learning_rate": 1.7666165250516006e-05, "loss": 0.5954, "step": 11875 }, { "epoch": 1.9387371944002285, "grad_norm": 2.532949924468994, "learning_rate": 1.766575355455229e-05, "loss": 0.6339, "step": 11876 }, { "epoch": 1.938900453042733, "grad_norm": 2.734076976776123, "learning_rate": 1.7665341827077556e-05, "loss": 0.7165, "step": 11877 }, { "epoch": 1.9390637116852374, "grad_norm": 2.6538119316101074, "learning_rate": 1.76649300680935e-05, "loss": 0.6671, "step": 11878 }, { "epoch": 1.9392269703277418, "grad_norm": 2.1383140087127686, "learning_rate": 1.766451827760181e-05, "loss": 0.5886, "step": 11879 }, { "epoch": 1.9393902289702463, "grad_norm": 2.5549416542053223, "learning_rate": 1.7664106455604178e-05, "loss": 0.6261, "step": 11880 }, { "epoch": 1.9395534876127505, "grad_norm": 2.3220160007476807, "learning_rate": 1.7663694602102297e-05, "loss": 0.5777, "step": 11881 }, { "epoch": 1.939716746255255, "grad_norm": 2.8452532291412354, "learning_rate": 1.7663282717097864e-05, "loss": 0.6951, "step": 11882 }, { "epoch": 1.9398800048977591, "grad_norm": 2.317495107650757, "learning_rate": 1.7662870800592568e-05, "loss": 0.6299, "step": 11883 }, { "epoch": 1.9400432635402636, "grad_norm": 2.4511308670043945, "learning_rate": 1.7662458852588102e-05, "loss": 0.6425, "step": 11884 }, { "epoch": 1.940206522182768, "grad_norm": 2.3594789505004883, "learning_rate": 1.7662046873086163e-05, "loss": 0.5937, "step": 11885 }, { "epoch": 1.9403697808252724, "grad_norm": 3.056867837905884, "learning_rate": 1.7661634862088443e-05, "loss": 0.765, "step": 11886 }, { "epoch": 1.9405330394677769, "grad_norm": 2.5195071697235107, "learning_rate": 1.7661222819596635e-05, "loss": 0.7289, "step": 11887 }, { "epoch": 1.9406962981102813, "grad_norm": 2.5914883613586426, "learning_rate": 1.766081074561243e-05, "loss": 0.7143, "step": 11888 }, { "epoch": 1.9408595567527858, "grad_norm": 3.03218936920166, "learning_rate": 1.766039864013753e-05, "loss": 0.782, "step": 11889 }, { "epoch": 1.94102281539529, "grad_norm": 2.4077391624450684, "learning_rate": 1.7659986503173616e-05, "loss": 0.5962, "step": 11890 }, { "epoch": 1.9411860740377944, "grad_norm": 2.8845512866973877, "learning_rate": 1.7659574334722394e-05, "loss": 0.7916, "step": 11891 }, { "epoch": 1.9413493326802986, "grad_norm": 2.6166374683380127, "learning_rate": 1.7659162134785556e-05, "loss": 0.6702, "step": 11892 }, { "epoch": 1.941512591322803, "grad_norm": 2.5339508056640625, "learning_rate": 1.7658749903364794e-05, "loss": 0.5663, "step": 11893 }, { "epoch": 1.9416758499653075, "grad_norm": 2.396829128265381, "learning_rate": 1.76583376404618e-05, "loss": 0.6122, "step": 11894 }, { "epoch": 1.941839108607812, "grad_norm": 2.5194287300109863, "learning_rate": 1.7657925346078273e-05, "loss": 0.5187, "step": 11895 }, { "epoch": 1.9420023672503164, "grad_norm": 2.788884162902832, "learning_rate": 1.765751302021591e-05, "loss": 0.5927, "step": 11896 }, { "epoch": 1.9421656258928208, "grad_norm": 2.637883424758911, "learning_rate": 1.7657100662876398e-05, "loss": 0.723, "step": 11897 }, { "epoch": 1.942328884535325, "grad_norm": 2.5956332683563232, "learning_rate": 1.7656688274061434e-05, "loss": 0.6429, "step": 11898 }, { "epoch": 1.9424921431778295, "grad_norm": 2.886781930923462, "learning_rate": 1.765627585377272e-05, "loss": 0.7233, "step": 11899 }, { "epoch": 1.942655401820334, "grad_norm": 2.9833362102508545, "learning_rate": 1.7655863402011947e-05, "loss": 0.8486, "step": 11900 }, { "epoch": 1.9428186604628381, "grad_norm": 2.900963306427002, "learning_rate": 1.7655450918780813e-05, "loss": 0.7207, "step": 11901 }, { "epoch": 1.9429819191053426, "grad_norm": 2.9177048206329346, "learning_rate": 1.7655038404081005e-05, "loss": 0.6448, "step": 11902 }, { "epoch": 1.943145177747847, "grad_norm": 2.5462758541107178, "learning_rate": 1.7654625857914228e-05, "loss": 0.7439, "step": 11903 }, { "epoch": 1.9433084363903514, "grad_norm": 3.19283127784729, "learning_rate": 1.7654213280282176e-05, "loss": 0.7417, "step": 11904 }, { "epoch": 1.9434716950328559, "grad_norm": 2.79825758934021, "learning_rate": 1.765380067118654e-05, "loss": 0.6936, "step": 11905 }, { "epoch": 1.9436349536753603, "grad_norm": 2.64332914352417, "learning_rate": 1.765338803062902e-05, "loss": 0.6837, "step": 11906 }, { "epoch": 1.9437982123178645, "grad_norm": 2.7212040424346924, "learning_rate": 1.7652975358611314e-05, "loss": 0.6023, "step": 11907 }, { "epoch": 1.943961470960369, "grad_norm": 2.7650814056396484, "learning_rate": 1.7652562655135113e-05, "loss": 0.5413, "step": 11908 }, { "epoch": 1.9441247296028732, "grad_norm": 2.7074334621429443, "learning_rate": 1.7652149920202117e-05, "loss": 0.5576, "step": 11909 }, { "epoch": 1.9442879882453776, "grad_norm": 2.6569643020629883, "learning_rate": 1.7651737153814024e-05, "loss": 0.73, "step": 11910 }, { "epoch": 1.944451246887882, "grad_norm": 2.8228743076324463, "learning_rate": 1.7651324355972525e-05, "loss": 0.6249, "step": 11911 }, { "epoch": 1.9446145055303865, "grad_norm": 2.856529712677002, "learning_rate": 1.7650911526679328e-05, "loss": 0.6904, "step": 11912 }, { "epoch": 1.944777764172891, "grad_norm": 2.9505553245544434, "learning_rate": 1.7650498665936115e-05, "loss": 0.8995, "step": 11913 }, { "epoch": 1.9449410228153954, "grad_norm": 2.482351541519165, "learning_rate": 1.7650085773744595e-05, "loss": 0.6195, "step": 11914 }, { "epoch": 1.9451042814578998, "grad_norm": 2.301793336868286, "learning_rate": 1.764967285010646e-05, "loss": 0.5931, "step": 11915 }, { "epoch": 1.945267540100404, "grad_norm": 2.7211060523986816, "learning_rate": 1.7649259895023407e-05, "loss": 0.665, "step": 11916 }, { "epoch": 1.9454307987429085, "grad_norm": 2.0345449447631836, "learning_rate": 1.7648846908497136e-05, "loss": 0.5875, "step": 11917 }, { "epoch": 1.9455940573854127, "grad_norm": 3.0945637226104736, "learning_rate": 1.7648433890529345e-05, "loss": 0.7062, "step": 11918 }, { "epoch": 1.9457573160279171, "grad_norm": 3.074495553970337, "learning_rate": 1.7648020841121727e-05, "loss": 0.8135, "step": 11919 }, { "epoch": 1.9459205746704216, "grad_norm": 3.0365238189697266, "learning_rate": 1.7647607760275987e-05, "loss": 0.6593, "step": 11920 }, { "epoch": 1.946083833312926, "grad_norm": 2.7531445026397705, "learning_rate": 1.7647194647993814e-05, "loss": 0.7331, "step": 11921 }, { "epoch": 1.9462470919554304, "grad_norm": 2.3623132705688477, "learning_rate": 1.7646781504276915e-05, "loss": 0.6263, "step": 11922 }, { "epoch": 1.9464103505979349, "grad_norm": 3.1676557064056396, "learning_rate": 1.7646368329126985e-05, "loss": 0.8131, "step": 11923 }, { "epoch": 1.9465736092404393, "grad_norm": 2.8864197731018066, "learning_rate": 1.7645955122545722e-05, "loss": 0.7792, "step": 11924 }, { "epoch": 1.9467368678829435, "grad_norm": 2.2703921794891357, "learning_rate": 1.7645541884534824e-05, "loss": 0.5462, "step": 11925 }, { "epoch": 1.946900126525448, "grad_norm": 2.6714296340942383, "learning_rate": 1.7645128615095988e-05, "loss": 0.6777, "step": 11926 }, { "epoch": 1.9470633851679522, "grad_norm": 2.841604471206665, "learning_rate": 1.764471531423092e-05, "loss": 0.7577, "step": 11927 }, { "epoch": 1.9472266438104566, "grad_norm": 3.164663553237915, "learning_rate": 1.764430198194131e-05, "loss": 0.8386, "step": 11928 }, { "epoch": 1.947389902452961, "grad_norm": 1.943120002746582, "learning_rate": 1.764388861822886e-05, "loss": 0.5102, "step": 11929 }, { "epoch": 1.9475531610954655, "grad_norm": 2.6663079261779785, "learning_rate": 1.764347522309527e-05, "loss": 0.7034, "step": 11930 }, { "epoch": 1.94771641973797, "grad_norm": 2.0848371982574463, "learning_rate": 1.7643061796542246e-05, "loss": 0.518, "step": 11931 }, { "epoch": 1.9478796783804744, "grad_norm": 2.408860206604004, "learning_rate": 1.7642648338571474e-05, "loss": 0.6079, "step": 11932 }, { "epoch": 1.9480429370229788, "grad_norm": 2.644580364227295, "learning_rate": 1.7642234849184663e-05, "loss": 0.6089, "step": 11933 }, { "epoch": 1.948206195665483, "grad_norm": 2.5696005821228027, "learning_rate": 1.764182132838351e-05, "loss": 0.7395, "step": 11934 }, { "epoch": 1.9483694543079875, "grad_norm": 2.846179723739624, "learning_rate": 1.7641407776169716e-05, "loss": 0.681, "step": 11935 }, { "epoch": 1.9485327129504917, "grad_norm": 2.6756153106689453, "learning_rate": 1.7640994192544978e-05, "loss": 0.6958, "step": 11936 }, { "epoch": 1.9486959715929961, "grad_norm": 2.8484129905700684, "learning_rate": 1.7640580577511e-05, "loss": 0.6583, "step": 11937 }, { "epoch": 1.9488592302355006, "grad_norm": 2.549887180328369, "learning_rate": 1.764016693106948e-05, "loss": 0.613, "step": 11938 }, { "epoch": 1.949022488878005, "grad_norm": 3.0177695751190186, "learning_rate": 1.7639753253222117e-05, "loss": 0.7186, "step": 11939 }, { "epoch": 1.9491857475205094, "grad_norm": 2.803183078765869, "learning_rate": 1.7639339543970616e-05, "loss": 0.6651, "step": 11940 }, { "epoch": 1.9493490061630139, "grad_norm": 3.239957094192505, "learning_rate": 1.7638925803316672e-05, "loss": 0.6693, "step": 11941 }, { "epoch": 1.949512264805518, "grad_norm": 2.7474617958068848, "learning_rate": 1.763851203126199e-05, "loss": 0.6529, "step": 11942 }, { "epoch": 1.9496755234480225, "grad_norm": 2.5609264373779297, "learning_rate": 1.763809822780827e-05, "loss": 0.6983, "step": 11943 }, { "epoch": 1.949838782090527, "grad_norm": 2.762641191482544, "learning_rate": 1.7637684392957207e-05, "loss": 0.6831, "step": 11944 }, { "epoch": 1.9500020407330312, "grad_norm": 2.3952715396881104, "learning_rate": 1.7637270526710514e-05, "loss": 0.5352, "step": 11945 }, { "epoch": 1.9501652993755356, "grad_norm": 2.7497622966766357, "learning_rate": 1.7636856629069883e-05, "loss": 0.6691, "step": 11946 }, { "epoch": 1.95032855801804, "grad_norm": 2.995715618133545, "learning_rate": 1.7636442700037014e-05, "loss": 0.7299, "step": 11947 }, { "epoch": 1.9504918166605445, "grad_norm": 3.2059686183929443, "learning_rate": 1.763602873961362e-05, "loss": 0.7629, "step": 11948 }, { "epoch": 1.950655075303049, "grad_norm": 2.316293239593506, "learning_rate": 1.763561474780139e-05, "loss": 0.5039, "step": 11949 }, { "epoch": 1.9508183339455534, "grad_norm": 2.7341184616088867, "learning_rate": 1.7635200724602036e-05, "loss": 0.6772, "step": 11950 }, { "epoch": 1.9509815925880576, "grad_norm": 2.43882155418396, "learning_rate": 1.763478667001725e-05, "loss": 0.6344, "step": 11951 }, { "epoch": 1.951144851230562, "grad_norm": 2.686473846435547, "learning_rate": 1.7634372584048738e-05, "loss": 0.6712, "step": 11952 }, { "epoch": 1.9513081098730662, "grad_norm": 3.354982614517212, "learning_rate": 1.7633958466698206e-05, "loss": 0.7136, "step": 11953 }, { "epoch": 1.9514713685155707, "grad_norm": 2.621112585067749, "learning_rate": 1.7633544317967353e-05, "loss": 0.6894, "step": 11954 }, { "epoch": 1.9516346271580751, "grad_norm": 3.222621440887451, "learning_rate": 1.7633130137857883e-05, "loss": 0.8345, "step": 11955 }, { "epoch": 1.9517978858005796, "grad_norm": 2.936866283416748, "learning_rate": 1.7632715926371494e-05, "loss": 0.6866, "step": 11956 }, { "epoch": 1.951961144443084, "grad_norm": 2.9434945583343506, "learning_rate": 1.7632301683509893e-05, "loss": 0.8337, "step": 11957 }, { "epoch": 1.9521244030855884, "grad_norm": 2.672823429107666, "learning_rate": 1.763188740927478e-05, "loss": 0.6817, "step": 11958 }, { "epoch": 1.9522876617280929, "grad_norm": 3.0888867378234863, "learning_rate": 1.7631473103667863e-05, "loss": 0.8355, "step": 11959 }, { "epoch": 1.952450920370597, "grad_norm": 2.5502657890319824, "learning_rate": 1.7631058766690842e-05, "loss": 0.6013, "step": 11960 }, { "epoch": 1.9526141790131015, "grad_norm": 2.6932578086853027, "learning_rate": 1.7630644398345416e-05, "loss": 0.7675, "step": 11961 }, { "epoch": 1.9527774376556057, "grad_norm": 2.4771077632904053, "learning_rate": 1.7630229998633293e-05, "loss": 0.6952, "step": 11962 }, { "epoch": 1.9529406962981102, "grad_norm": 2.783872365951538, "learning_rate": 1.7629815567556177e-05, "loss": 0.6962, "step": 11963 }, { "epoch": 1.9531039549406146, "grad_norm": 2.164550304412842, "learning_rate": 1.7629401105115773e-05, "loss": 0.5266, "step": 11964 }, { "epoch": 1.953267213583119, "grad_norm": 2.435302257537842, "learning_rate": 1.762898661131378e-05, "loss": 0.7515, "step": 11965 }, { "epoch": 1.9534304722256235, "grad_norm": 2.5853099822998047, "learning_rate": 1.76285720861519e-05, "loss": 0.6486, "step": 11966 }, { "epoch": 1.953593730868128, "grad_norm": 2.777373790740967, "learning_rate": 1.7628157529631844e-05, "loss": 0.6813, "step": 11967 }, { "epoch": 1.9537569895106324, "grad_norm": 2.7897145748138428, "learning_rate": 1.7627742941755312e-05, "loss": 0.7082, "step": 11968 }, { "epoch": 1.9539202481531366, "grad_norm": 2.6098251342773438, "learning_rate": 1.762732832252401e-05, "loss": 0.6705, "step": 11969 }, { "epoch": 1.954083506795641, "grad_norm": 2.4928948879241943, "learning_rate": 1.762691367193964e-05, "loss": 0.6747, "step": 11970 }, { "epoch": 1.9542467654381452, "grad_norm": 2.6894259452819824, "learning_rate": 1.762649899000391e-05, "loss": 0.7201, "step": 11971 }, { "epoch": 1.9544100240806497, "grad_norm": 2.6130247116088867, "learning_rate": 1.762608427671852e-05, "loss": 0.6444, "step": 11972 }, { "epoch": 1.9545732827231541, "grad_norm": 2.314439296722412, "learning_rate": 1.7625669532085177e-05, "loss": 0.589, "step": 11973 }, { "epoch": 1.9547365413656586, "grad_norm": 2.2777152061462402, "learning_rate": 1.7625254756105586e-05, "loss": 0.5892, "step": 11974 }, { "epoch": 1.954899800008163, "grad_norm": 2.442965030670166, "learning_rate": 1.7624839948781453e-05, "loss": 0.6608, "step": 11975 }, { "epoch": 1.9550630586506674, "grad_norm": 2.5798158645629883, "learning_rate": 1.762442511011448e-05, "loss": 0.5878, "step": 11976 }, { "epoch": 1.9552263172931719, "grad_norm": 2.5321364402770996, "learning_rate": 1.7624010240106376e-05, "loss": 0.7299, "step": 11977 }, { "epoch": 1.955389575935676, "grad_norm": 2.627979278564453, "learning_rate": 1.7623595338758845e-05, "loss": 0.6355, "step": 11978 }, { "epoch": 1.9555528345781805, "grad_norm": 2.4296741485595703, "learning_rate": 1.762318040607359e-05, "loss": 0.6018, "step": 11979 }, { "epoch": 1.9557160932206847, "grad_norm": 2.371401309967041, "learning_rate": 1.762276544205232e-05, "loss": 0.5988, "step": 11980 }, { "epoch": 1.9558793518631892, "grad_norm": 2.4817111492156982, "learning_rate": 1.7622350446696742e-05, "loss": 0.6953, "step": 11981 }, { "epoch": 1.9560426105056936, "grad_norm": 2.504472255706787, "learning_rate": 1.762193542000856e-05, "loss": 0.5406, "step": 11982 }, { "epoch": 1.956205869148198, "grad_norm": 2.5434582233428955, "learning_rate": 1.7621520361989476e-05, "loss": 0.5628, "step": 11983 }, { "epoch": 1.9563691277907025, "grad_norm": 2.359229564666748, "learning_rate": 1.76211052726412e-05, "loss": 0.6279, "step": 11984 }, { "epoch": 1.956532386433207, "grad_norm": 2.837627410888672, "learning_rate": 1.7620690151965437e-05, "loss": 0.6844, "step": 11985 }, { "epoch": 1.9566956450757111, "grad_norm": 2.7123336791992188, "learning_rate": 1.7620274999963896e-05, "loss": 0.6896, "step": 11986 }, { "epoch": 1.9568589037182156, "grad_norm": 2.845358371734619, "learning_rate": 1.7619859816638283e-05, "loss": 0.6817, "step": 11987 }, { "epoch": 1.95702216236072, "grad_norm": 2.5719046592712402, "learning_rate": 1.76194446019903e-05, "loss": 0.6707, "step": 11988 }, { "epoch": 1.9571854210032242, "grad_norm": 2.4674105644226074, "learning_rate": 1.761902935602166e-05, "loss": 0.556, "step": 11989 }, { "epoch": 1.9573486796457287, "grad_norm": 2.718611717224121, "learning_rate": 1.761861407873407e-05, "loss": 0.7323, "step": 11990 }, { "epoch": 1.957511938288233, "grad_norm": 3.2131574153900146, "learning_rate": 1.7618198770129228e-05, "loss": 0.7675, "step": 11991 }, { "epoch": 1.9576751969307375, "grad_norm": 2.678898572921753, "learning_rate": 1.7617783430208853e-05, "loss": 0.8235, "step": 11992 }, { "epoch": 1.957838455573242, "grad_norm": 2.756131410598755, "learning_rate": 1.7617368058974643e-05, "loss": 0.7091, "step": 11993 }, { "epoch": 1.9580017142157464, "grad_norm": 2.8343491554260254, "learning_rate": 1.761695265642831e-05, "loss": 0.6668, "step": 11994 }, { "epoch": 1.9581649728582506, "grad_norm": 3.1839282512664795, "learning_rate": 1.7616537222571562e-05, "loss": 0.6999, "step": 11995 }, { "epoch": 1.958328231500755, "grad_norm": 2.8630542755126953, "learning_rate": 1.76161217574061e-05, "loss": 0.681, "step": 11996 }, { "epoch": 1.9584914901432593, "grad_norm": 2.509202003479004, "learning_rate": 1.7615706260933644e-05, "loss": 0.5275, "step": 11997 }, { "epoch": 1.9586547487857637, "grad_norm": 2.6556105613708496, "learning_rate": 1.7615290733155893e-05, "loss": 0.598, "step": 11998 }, { "epoch": 1.9588180074282682, "grad_norm": 3.1140081882476807, "learning_rate": 1.761487517407456e-05, "loss": 0.8478, "step": 11999 }, { "epoch": 1.9589812660707726, "grad_norm": 2.0963709354400635, "learning_rate": 1.7614459583691346e-05, "loss": 0.5637, "step": 12000 }, { "epoch": 1.959144524713277, "grad_norm": 2.4640374183654785, "learning_rate": 1.7614043962007965e-05, "loss": 0.6276, "step": 12001 }, { "epoch": 1.9593077833557815, "grad_norm": 3.3647964000701904, "learning_rate": 1.7613628309026124e-05, "loss": 0.7873, "step": 12002 }, { "epoch": 1.959471041998286, "grad_norm": 2.646768808364868, "learning_rate": 1.7613212624747533e-05, "loss": 0.6489, "step": 12003 }, { "epoch": 1.9596343006407901, "grad_norm": 2.3800535202026367, "learning_rate": 1.76127969091739e-05, "loss": 0.5901, "step": 12004 }, { "epoch": 1.9597975592832946, "grad_norm": 2.792384147644043, "learning_rate": 1.7612381162306934e-05, "loss": 0.6871, "step": 12005 }, { "epoch": 1.9599608179257988, "grad_norm": 1.8733333349227905, "learning_rate": 1.761196538414834e-05, "loss": 0.5356, "step": 12006 }, { "epoch": 1.9601240765683032, "grad_norm": 3.040530204772949, "learning_rate": 1.7611549574699833e-05, "loss": 0.7522, "step": 12007 }, { "epoch": 1.9602873352108077, "grad_norm": 3.09272837638855, "learning_rate": 1.761113373396312e-05, "loss": 0.8397, "step": 12008 }, { "epoch": 1.960450593853312, "grad_norm": 2.7703819274902344, "learning_rate": 1.761071786193991e-05, "loss": 0.6729, "step": 12009 }, { "epoch": 1.9606138524958165, "grad_norm": 2.8557894229888916, "learning_rate": 1.7610301958631912e-05, "loss": 0.6558, "step": 12010 }, { "epoch": 1.960777111138321, "grad_norm": 2.8084607124328613, "learning_rate": 1.7609886024040835e-05, "loss": 0.8867, "step": 12011 }, { "epoch": 1.9609403697808254, "grad_norm": 2.7019317150115967, "learning_rate": 1.7609470058168395e-05, "loss": 0.7034, "step": 12012 }, { "epoch": 1.9611036284233296, "grad_norm": 3.044682025909424, "learning_rate": 1.760905406101629e-05, "loss": 0.6919, "step": 12013 }, { "epoch": 1.961266887065834, "grad_norm": 2.572880268096924, "learning_rate": 1.7608638032586242e-05, "loss": 0.6498, "step": 12014 }, { "epoch": 1.9614301457083383, "grad_norm": 2.087789535522461, "learning_rate": 1.7608221972879955e-05, "loss": 0.4947, "step": 12015 }, { "epoch": 1.9615934043508427, "grad_norm": 3.012298583984375, "learning_rate": 1.760780588189914e-05, "loss": 0.6621, "step": 12016 }, { "epoch": 1.9617566629933472, "grad_norm": 2.3793044090270996, "learning_rate": 1.7607389759645507e-05, "loss": 0.5578, "step": 12017 }, { "epoch": 1.9619199216358516, "grad_norm": 2.5935633182525635, "learning_rate": 1.7606973606120767e-05, "loss": 0.7495, "step": 12018 }, { "epoch": 1.962083180278356, "grad_norm": 2.8032214641571045, "learning_rate": 1.7606557421326633e-05, "loss": 0.7832, "step": 12019 }, { "epoch": 1.9622464389208605, "grad_norm": 2.711714029312134, "learning_rate": 1.760614120526481e-05, "loss": 0.7, "step": 12020 }, { "epoch": 1.962409697563365, "grad_norm": 2.4969284534454346, "learning_rate": 1.7605724957937016e-05, "loss": 0.5424, "step": 12021 }, { "epoch": 1.9625729562058691, "grad_norm": 3.013216495513916, "learning_rate": 1.7605308679344958e-05, "loss": 0.8311, "step": 12022 }, { "epoch": 1.9627362148483736, "grad_norm": 2.8461480140686035, "learning_rate": 1.7604892369490344e-05, "loss": 0.7043, "step": 12023 }, { "epoch": 1.9628994734908778, "grad_norm": 2.5910160541534424, "learning_rate": 1.7604476028374894e-05, "loss": 0.6958, "step": 12024 }, { "epoch": 1.9630627321333822, "grad_norm": 2.4395742416381836, "learning_rate": 1.7604059656000313e-05, "loss": 0.6836, "step": 12025 }, { "epoch": 1.9632259907758867, "grad_norm": 2.571056604385376, "learning_rate": 1.760364325236831e-05, "loss": 0.6384, "step": 12026 }, { "epoch": 1.963389249418391, "grad_norm": 2.3007915019989014, "learning_rate": 1.7603226817480607e-05, "loss": 0.5951, "step": 12027 }, { "epoch": 1.9635525080608955, "grad_norm": 2.8503873348236084, "learning_rate": 1.7602810351338907e-05, "loss": 0.6948, "step": 12028 }, { "epoch": 1.9637157667034, "grad_norm": 2.705613136291504, "learning_rate": 1.7602393853944923e-05, "loss": 0.7037, "step": 12029 }, { "epoch": 1.9638790253459042, "grad_norm": 2.7856457233428955, "learning_rate": 1.7601977325300366e-05, "loss": 0.7741, "step": 12030 }, { "epoch": 1.9640422839884086, "grad_norm": 2.2972869873046875, "learning_rate": 1.7601560765406955e-05, "loss": 0.4949, "step": 12031 }, { "epoch": 1.964205542630913, "grad_norm": 2.8626418113708496, "learning_rate": 1.7601144174266397e-05, "loss": 0.748, "step": 12032 }, { "epoch": 1.9643688012734173, "grad_norm": 2.554797410964966, "learning_rate": 1.7600727551880406e-05, "loss": 0.6349, "step": 12033 }, { "epoch": 1.9645320599159217, "grad_norm": 2.8603367805480957, "learning_rate": 1.7600310898250694e-05, "loss": 0.8396, "step": 12034 }, { "epoch": 1.9646953185584262, "grad_norm": 2.6172916889190674, "learning_rate": 1.759989421337897e-05, "loss": 0.5946, "step": 12035 }, { "epoch": 1.9648585772009306, "grad_norm": 3.113806962966919, "learning_rate": 1.759947749726696e-05, "loss": 0.7684, "step": 12036 }, { "epoch": 1.965021835843435, "grad_norm": 2.4631190299987793, "learning_rate": 1.7599060749916357e-05, "loss": 0.6171, "step": 12037 }, { "epoch": 1.9651850944859395, "grad_norm": 2.5231122970581055, "learning_rate": 1.7598643971328892e-05, "loss": 0.6476, "step": 12038 }, { "epoch": 1.9653483531284437, "grad_norm": 2.5548603534698486, "learning_rate": 1.7598227161506267e-05, "loss": 0.8168, "step": 12039 }, { "epoch": 1.9655116117709481, "grad_norm": 2.0190374851226807, "learning_rate": 1.75978103204502e-05, "loss": 0.543, "step": 12040 }, { "epoch": 1.9656748704134523, "grad_norm": 2.2438178062438965, "learning_rate": 1.7597393448162404e-05, "loss": 0.6336, "step": 12041 }, { "epoch": 1.9658381290559568, "grad_norm": 2.670109510421753, "learning_rate": 1.759697654464459e-05, "loss": 0.7343, "step": 12042 }, { "epoch": 1.9660013876984612, "grad_norm": 2.608768939971924, "learning_rate": 1.7596559609898476e-05, "loss": 0.6375, "step": 12043 }, { "epoch": 1.9661646463409657, "grad_norm": 2.6128151416778564, "learning_rate": 1.7596142643925774e-05, "loss": 0.6049, "step": 12044 }, { "epoch": 1.96632790498347, "grad_norm": 2.572969436645508, "learning_rate": 1.7595725646728195e-05, "loss": 0.5818, "step": 12045 }, { "epoch": 1.9664911636259745, "grad_norm": 2.3934848308563232, "learning_rate": 1.759530861830746e-05, "loss": 0.5956, "step": 12046 }, { "epoch": 1.966654422268479, "grad_norm": 3.1827504634857178, "learning_rate": 1.7594891558665278e-05, "loss": 0.7992, "step": 12047 }, { "epoch": 1.9668176809109832, "grad_norm": 2.435295820236206, "learning_rate": 1.7594474467803366e-05, "loss": 0.5951, "step": 12048 }, { "epoch": 1.9669809395534876, "grad_norm": 2.6313705444335938, "learning_rate": 1.7594057345723433e-05, "loss": 0.5886, "step": 12049 }, { "epoch": 1.9671441981959918, "grad_norm": 2.5695180892944336, "learning_rate": 1.75936401924272e-05, "loss": 0.636, "step": 12050 }, { "epoch": 1.9673074568384963, "grad_norm": 2.5502421855926514, "learning_rate": 1.759322300791638e-05, "loss": 0.7261, "step": 12051 }, { "epoch": 1.9674707154810007, "grad_norm": 2.5027058124542236, "learning_rate": 1.7592805792192686e-05, "loss": 0.6575, "step": 12052 }, { "epoch": 1.9676339741235052, "grad_norm": 2.1830766201019287, "learning_rate": 1.7592388545257837e-05, "loss": 0.5245, "step": 12053 }, { "epoch": 1.9677972327660096, "grad_norm": 2.41361927986145, "learning_rate": 1.7591971267113542e-05, "loss": 0.7058, "step": 12054 }, { "epoch": 1.967960491408514, "grad_norm": 2.93876576423645, "learning_rate": 1.7591553957761523e-05, "loss": 0.6817, "step": 12055 }, { "epoch": 1.9681237500510185, "grad_norm": 2.7769908905029297, "learning_rate": 1.759113661720349e-05, "loss": 0.6988, "step": 12056 }, { "epoch": 1.9682870086935227, "grad_norm": 3.0635879039764404, "learning_rate": 1.7590719245441162e-05, "loss": 0.6522, "step": 12057 }, { "epoch": 1.9684502673360271, "grad_norm": 2.7850661277770996, "learning_rate": 1.7590301842476255e-05, "loss": 0.6531, "step": 12058 }, { "epoch": 1.9686135259785313, "grad_norm": 2.8024821281433105, "learning_rate": 1.758988440831048e-05, "loss": 0.551, "step": 12059 }, { "epoch": 1.9687767846210358, "grad_norm": 2.660290241241455, "learning_rate": 1.7589466942945556e-05, "loss": 0.5836, "step": 12060 }, { "epoch": 1.9689400432635402, "grad_norm": 2.6901192665100098, "learning_rate": 1.7589049446383203e-05, "loss": 0.7025, "step": 12061 }, { "epoch": 1.9691033019060447, "grad_norm": 2.5369951725006104, "learning_rate": 1.758863191862513e-05, "loss": 0.5855, "step": 12062 }, { "epoch": 1.969266560548549, "grad_norm": 2.286048650741577, "learning_rate": 1.7588214359673056e-05, "loss": 0.6338, "step": 12063 }, { "epoch": 1.9694298191910535, "grad_norm": 2.6361546516418457, "learning_rate": 1.75877967695287e-05, "loss": 0.6487, "step": 12064 }, { "epoch": 1.969593077833558, "grad_norm": 2.6221303939819336, "learning_rate": 1.7587379148193777e-05, "loss": 0.6609, "step": 12065 }, { "epoch": 1.9697563364760622, "grad_norm": 2.9541683197021484, "learning_rate": 1.7586961495670002e-05, "loss": 0.7798, "step": 12066 }, { "epoch": 1.9699195951185666, "grad_norm": 2.288243293762207, "learning_rate": 1.758654381195909e-05, "loss": 0.597, "step": 12067 }, { "epoch": 1.9700828537610708, "grad_norm": 2.611830711364746, "learning_rate": 1.7586126097062764e-05, "loss": 0.6798, "step": 12068 }, { "epoch": 1.9702461124035753, "grad_norm": 2.3338329792022705, "learning_rate": 1.758570835098274e-05, "loss": 0.5859, "step": 12069 }, { "epoch": 1.9704093710460797, "grad_norm": 2.302747964859009, "learning_rate": 1.7585290573720733e-05, "loss": 0.5917, "step": 12070 }, { "epoch": 1.9705726296885842, "grad_norm": 1.9022709131240845, "learning_rate": 1.7584872765278457e-05, "loss": 0.5134, "step": 12071 }, { "epoch": 1.9707358883310886, "grad_norm": 2.5986275672912598, "learning_rate": 1.7584454925657637e-05, "loss": 0.6069, "step": 12072 }, { "epoch": 1.970899146973593, "grad_norm": 3.162972927093506, "learning_rate": 1.7584037054859987e-05, "loss": 0.7379, "step": 12073 }, { "epoch": 1.9710624056160975, "grad_norm": 2.635038375854492, "learning_rate": 1.7583619152887222e-05, "loss": 0.7772, "step": 12074 }, { "epoch": 1.9712256642586017, "grad_norm": 2.844475269317627, "learning_rate": 1.758320121974106e-05, "loss": 0.8134, "step": 12075 }, { "epoch": 1.9713889229011061, "grad_norm": 2.7567849159240723, "learning_rate": 1.7582783255423223e-05, "loss": 0.6598, "step": 12076 }, { "epoch": 1.9715521815436103, "grad_norm": 2.727587938308716, "learning_rate": 1.758236525993543e-05, "loss": 0.7547, "step": 12077 }, { "epoch": 1.9717154401861148, "grad_norm": 2.9520187377929688, "learning_rate": 1.7581947233279396e-05, "loss": 0.6946, "step": 12078 }, { "epoch": 1.9718786988286192, "grad_norm": 2.5693819522857666, "learning_rate": 1.7581529175456834e-05, "loss": 0.612, "step": 12079 }, { "epoch": 1.9720419574711237, "grad_norm": 2.9624242782592773, "learning_rate": 1.7581111086469477e-05, "loss": 0.7694, "step": 12080 }, { "epoch": 1.972205216113628, "grad_norm": 2.674731969833374, "learning_rate": 1.7580692966319026e-05, "loss": 0.7026, "step": 12081 }, { "epoch": 1.9723684747561325, "grad_norm": 2.9291675090789795, "learning_rate": 1.7580274815007215e-05, "loss": 0.7985, "step": 12082 }, { "epoch": 1.9725317333986367, "grad_norm": 3.0872652530670166, "learning_rate": 1.7579856632535755e-05, "loss": 0.7838, "step": 12083 }, { "epoch": 1.9726949920411412, "grad_norm": 3.33174729347229, "learning_rate": 1.7579438418906365e-05, "loss": 0.9073, "step": 12084 }, { "epoch": 1.9728582506836456, "grad_norm": 2.7599711418151855, "learning_rate": 1.7579020174120765e-05, "loss": 0.643, "step": 12085 }, { "epoch": 1.9730215093261498, "grad_norm": 2.8781144618988037, "learning_rate": 1.7578601898180677e-05, "loss": 0.753, "step": 12086 }, { "epoch": 1.9731847679686543, "grad_norm": 2.340437412261963, "learning_rate": 1.7578183591087818e-05, "loss": 0.6054, "step": 12087 }, { "epoch": 1.9733480266111587, "grad_norm": 2.8692400455474854, "learning_rate": 1.757776525284391e-05, "loss": 0.7464, "step": 12088 }, { "epoch": 1.9735112852536632, "grad_norm": 2.7486512660980225, "learning_rate": 1.7577346883450666e-05, "loss": 0.7191, "step": 12089 }, { "epoch": 1.9736745438961676, "grad_norm": 2.887176036834717, "learning_rate": 1.757692848290981e-05, "loss": 0.7419, "step": 12090 }, { "epoch": 1.973837802538672, "grad_norm": 2.6457300186157227, "learning_rate": 1.7576510051223065e-05, "loss": 0.8291, "step": 12091 }, { "epoch": 1.9740010611811762, "grad_norm": 3.1624414920806885, "learning_rate": 1.7576091588392146e-05, "loss": 1.5698, "step": 12092 }, { "epoch": 1.9741643198236807, "grad_norm": 2.387134313583374, "learning_rate": 1.757567309441878e-05, "loss": 0.6189, "step": 12093 }, { "epoch": 1.974327578466185, "grad_norm": 2.8825011253356934, "learning_rate": 1.7575254569304676e-05, "loss": 0.8013, "step": 12094 }, { "epoch": 1.9744908371086893, "grad_norm": 2.227158308029175, "learning_rate": 1.7574836013051563e-05, "loss": 0.7335, "step": 12095 }, { "epoch": 1.9746540957511938, "grad_norm": 2.74216365814209, "learning_rate": 1.757441742566116e-05, "loss": 0.7403, "step": 12096 }, { "epoch": 1.9748173543936982, "grad_norm": 2.6723527908325195, "learning_rate": 1.7573998807135188e-05, "loss": 0.7327, "step": 12097 }, { "epoch": 1.9749806130362026, "grad_norm": 2.435622453689575, "learning_rate": 1.7573580157475364e-05, "loss": 0.7201, "step": 12098 }, { "epoch": 1.975143871678707, "grad_norm": 3.1269445419311523, "learning_rate": 1.7573161476683413e-05, "loss": 0.7002, "step": 12099 }, { "epoch": 1.9753071303212115, "grad_norm": 2.7910315990448, "learning_rate": 1.7572742764761054e-05, "loss": 0.724, "step": 12100 }, { "epoch": 1.9754703889637157, "grad_norm": 3.106945753097534, "learning_rate": 1.7572324021710014e-05, "loss": 1.6118, "step": 12101 }, { "epoch": 1.9756336476062202, "grad_norm": 2.619128942489624, "learning_rate": 1.7571905247532003e-05, "loss": 0.6492, "step": 12102 }, { "epoch": 1.9757969062487244, "grad_norm": 2.8980634212493896, "learning_rate": 1.757148644222875e-05, "loss": 0.7334, "step": 12103 }, { "epoch": 1.9759601648912288, "grad_norm": 2.5978448390960693, "learning_rate": 1.7571067605801974e-05, "loss": 0.6247, "step": 12104 }, { "epoch": 1.9761234235337333, "grad_norm": 2.7288100719451904, "learning_rate": 1.7570648738253404e-05, "loss": 0.6566, "step": 12105 }, { "epoch": 1.9762866821762377, "grad_norm": 2.4183242321014404, "learning_rate": 1.757022983958475e-05, "loss": 0.6237, "step": 12106 }, { "epoch": 1.9764499408187421, "grad_norm": 2.9436001777648926, "learning_rate": 1.7569810909797743e-05, "loss": 0.6861, "step": 12107 }, { "epoch": 1.9766131994612466, "grad_norm": 2.825894832611084, "learning_rate": 1.7569391948894097e-05, "loss": 0.6876, "step": 12108 }, { "epoch": 1.976776458103751, "grad_norm": 2.5618138313293457, "learning_rate": 1.756897295687554e-05, "loss": 0.6921, "step": 12109 }, { "epoch": 1.9769397167462552, "grad_norm": 2.662909746170044, "learning_rate": 1.7568553933743796e-05, "loss": 0.6502, "step": 12110 }, { "epoch": 1.9771029753887597, "grad_norm": 2.634357452392578, "learning_rate": 1.7568134879500586e-05, "loss": 0.6614, "step": 12111 }, { "epoch": 1.977266234031264, "grad_norm": 2.36606502532959, "learning_rate": 1.7567715794147628e-05, "loss": 0.5422, "step": 12112 }, { "epoch": 1.9774294926737683, "grad_norm": 2.427814483642578, "learning_rate": 1.7567296677686646e-05, "loss": 0.6585, "step": 12113 }, { "epoch": 1.9775927513162728, "grad_norm": 2.920056104660034, "learning_rate": 1.7566877530119368e-05, "loss": 0.6462, "step": 12114 }, { "epoch": 1.9777560099587772, "grad_norm": 2.2980728149414062, "learning_rate": 1.7566458351447513e-05, "loss": 0.5603, "step": 12115 }, { "epoch": 1.9779192686012816, "grad_norm": 2.6546833515167236, "learning_rate": 1.7566039141672804e-05, "loss": 0.6283, "step": 12116 }, { "epoch": 1.978082527243786, "grad_norm": 2.834392786026001, "learning_rate": 1.7565619900796966e-05, "loss": 0.722, "step": 12117 }, { "epoch": 1.9782457858862905, "grad_norm": 2.6462738513946533, "learning_rate": 1.756520062882172e-05, "loss": 0.7208, "step": 12118 }, { "epoch": 1.9784090445287947, "grad_norm": 2.8136637210845947, "learning_rate": 1.7564781325748793e-05, "loss": 0.6815, "step": 12119 }, { "epoch": 1.9785723031712992, "grad_norm": 2.445249557495117, "learning_rate": 1.7564361991579906e-05, "loss": 0.5423, "step": 12120 }, { "epoch": 1.9787355618138034, "grad_norm": 2.402857780456543, "learning_rate": 1.7563942626316783e-05, "loss": 0.6354, "step": 12121 }, { "epoch": 1.9788988204563078, "grad_norm": 2.367924928665161, "learning_rate": 1.7563523229961146e-05, "loss": 0.6416, "step": 12122 }, { "epoch": 1.9790620790988123, "grad_norm": 2.627997636795044, "learning_rate": 1.756310380251472e-05, "loss": 0.6396, "step": 12123 }, { "epoch": 1.9792253377413167, "grad_norm": 2.643982410430908, "learning_rate": 1.7562684343979236e-05, "loss": 0.7383, "step": 12124 }, { "epoch": 1.9793885963838211, "grad_norm": 2.7238597869873047, "learning_rate": 1.7562264854356405e-05, "loss": 0.5667, "step": 12125 }, { "epoch": 1.9795518550263256, "grad_norm": 2.761627197265625, "learning_rate": 1.7561845333647963e-05, "loss": 0.6671, "step": 12126 }, { "epoch": 1.9797151136688298, "grad_norm": 2.575209856033325, "learning_rate": 1.7561425781855628e-05, "loss": 0.5652, "step": 12127 }, { "epoch": 1.9798783723113342, "grad_norm": 2.7470083236694336, "learning_rate": 1.756100619898113e-05, "loss": 0.7531, "step": 12128 }, { "epoch": 1.9800416309538387, "grad_norm": 2.5621421337127686, "learning_rate": 1.7560586585026186e-05, "loss": 0.6478, "step": 12129 }, { "epoch": 1.9802048895963429, "grad_norm": 3.025161027908325, "learning_rate": 1.7560166939992528e-05, "loss": 0.7169, "step": 12130 }, { "epoch": 1.9803681482388473, "grad_norm": 2.6301445960998535, "learning_rate": 1.755974726388188e-05, "loss": 0.6877, "step": 12131 }, { "epoch": 1.9805314068813518, "grad_norm": 2.7342708110809326, "learning_rate": 1.7559327556695966e-05, "loss": 0.5818, "step": 12132 }, { "epoch": 1.9806946655238562, "grad_norm": 3.0012807846069336, "learning_rate": 1.7558907818436506e-05, "loss": 0.8922, "step": 12133 }, { "epoch": 1.9808579241663606, "grad_norm": 2.5246472358703613, "learning_rate": 1.7558488049105233e-05, "loss": 0.5994, "step": 12134 }, { "epoch": 1.981021182808865, "grad_norm": 2.6143765449523926, "learning_rate": 1.755806824870387e-05, "loss": 0.6533, "step": 12135 }, { "epoch": 1.9811844414513693, "grad_norm": 2.9239680767059326, "learning_rate": 1.755764841723414e-05, "loss": 0.8122, "step": 12136 }, { "epoch": 1.9813477000938737, "grad_norm": 3.106320858001709, "learning_rate": 1.7557228554697772e-05, "loss": 0.7058, "step": 12137 }, { "epoch": 1.981510958736378, "grad_norm": 2.610727548599243, "learning_rate": 1.7556808661096492e-05, "loss": 0.67, "step": 12138 }, { "epoch": 1.9816742173788824, "grad_norm": 2.1498682498931885, "learning_rate": 1.7556388736432027e-05, "loss": 0.5183, "step": 12139 }, { "epoch": 1.9818374760213868, "grad_norm": 2.2281577587127686, "learning_rate": 1.7555968780706095e-05, "loss": 0.527, "step": 12140 }, { "epoch": 1.9820007346638913, "grad_norm": 2.1562607288360596, "learning_rate": 1.7555548793920434e-05, "loss": 0.5189, "step": 12141 }, { "epoch": 1.9821639933063957, "grad_norm": 2.5173494815826416, "learning_rate": 1.7555128776076764e-05, "loss": 0.6557, "step": 12142 }, { "epoch": 1.9823272519489001, "grad_norm": 2.6833841800689697, "learning_rate": 1.755470872717681e-05, "loss": 0.7082, "step": 12143 }, { "epoch": 1.9824905105914046, "grad_norm": 2.5753695964813232, "learning_rate": 1.7554288647222303e-05, "loss": 0.6098, "step": 12144 }, { "epoch": 1.9826537692339088, "grad_norm": 2.792543411254883, "learning_rate": 1.7553868536214967e-05, "loss": 0.5919, "step": 12145 }, { "epoch": 1.9828170278764132, "grad_norm": 2.7776334285736084, "learning_rate": 1.755344839415653e-05, "loss": 0.6798, "step": 12146 }, { "epoch": 1.9829802865189174, "grad_norm": 2.793336868286133, "learning_rate": 1.7553028221048717e-05, "loss": 0.7367, "step": 12147 }, { "epoch": 1.9831435451614219, "grad_norm": 2.7825517654418945, "learning_rate": 1.755260801689326e-05, "loss": 0.7632, "step": 12148 }, { "epoch": 1.9833068038039263, "grad_norm": 3.4718174934387207, "learning_rate": 1.7552187781691883e-05, "loss": 0.9761, "step": 12149 }, { "epoch": 1.9834700624464308, "grad_norm": 2.560408353805542, "learning_rate": 1.755176751544631e-05, "loss": 0.5867, "step": 12150 }, { "epoch": 1.9836333210889352, "grad_norm": 2.7742137908935547, "learning_rate": 1.7551347218158277e-05, "loss": 0.683, "step": 12151 }, { "epoch": 1.9837965797314396, "grad_norm": 2.9070727825164795, "learning_rate": 1.7550926889829507e-05, "loss": 0.7387, "step": 12152 }, { "epoch": 1.983959838373944, "grad_norm": 2.8236255645751953, "learning_rate": 1.7550506530461722e-05, "loss": 0.7286, "step": 12153 }, { "epoch": 1.9841230970164483, "grad_norm": 2.20995831489563, "learning_rate": 1.7550086140056662e-05, "loss": 0.5481, "step": 12154 }, { "epoch": 1.9842863556589527, "grad_norm": 2.5283730030059814, "learning_rate": 1.7549665718616044e-05, "loss": 0.5913, "step": 12155 }, { "epoch": 1.984449614301457, "grad_norm": 2.611589193344116, "learning_rate": 1.7549245266141602e-05, "loss": 0.6375, "step": 12156 }, { "epoch": 1.9846128729439614, "grad_norm": 2.920276165008545, "learning_rate": 1.7548824782635065e-05, "loss": 0.6575, "step": 12157 }, { "epoch": 1.9847761315864658, "grad_norm": 2.642720937728882, "learning_rate": 1.754840426809816e-05, "loss": 0.5673, "step": 12158 }, { "epoch": 1.9849393902289703, "grad_norm": 2.7767534255981445, "learning_rate": 1.7547983722532615e-05, "loss": 0.7444, "step": 12159 }, { "epoch": 1.9851026488714747, "grad_norm": 2.897470712661743, "learning_rate": 1.7547563145940158e-05, "loss": 0.6701, "step": 12160 }, { "epoch": 1.9852659075139791, "grad_norm": 2.9499239921569824, "learning_rate": 1.7547142538322518e-05, "loss": 0.6404, "step": 12161 }, { "epoch": 1.9854291661564836, "grad_norm": 2.8104019165039062, "learning_rate": 1.754672189968143e-05, "loss": 0.6566, "step": 12162 }, { "epoch": 1.9855924247989878, "grad_norm": 2.401344060897827, "learning_rate": 1.7546301230018608e-05, "loss": 0.5602, "step": 12163 }, { "epoch": 1.9857556834414922, "grad_norm": 3.149435520172119, "learning_rate": 1.7545880529335798e-05, "loss": 0.7287, "step": 12164 }, { "epoch": 1.9859189420839964, "grad_norm": 3.15726637840271, "learning_rate": 1.7545459797634722e-05, "loss": 0.8042, "step": 12165 }, { "epoch": 1.9860822007265009, "grad_norm": 2.683525562286377, "learning_rate": 1.754503903491711e-05, "loss": 0.6711, "step": 12166 }, { "epoch": 1.9862454593690053, "grad_norm": 2.5891947746276855, "learning_rate": 1.754461824118469e-05, "loss": 0.6903, "step": 12167 }, { "epoch": 1.9864087180115098, "grad_norm": 2.813776969909668, "learning_rate": 1.7544197416439194e-05, "loss": 0.6599, "step": 12168 }, { "epoch": 1.9865719766540142, "grad_norm": 2.8571579456329346, "learning_rate": 1.754377656068235e-05, "loss": 0.8475, "step": 12169 }, { "epoch": 1.9867352352965186, "grad_norm": 2.5346343517303467, "learning_rate": 1.754335567391589e-05, "loss": 0.6691, "step": 12170 }, { "epoch": 1.9868984939390228, "grad_norm": 2.556727170944214, "learning_rate": 1.7542934756141542e-05, "loss": 0.5683, "step": 12171 }, { "epoch": 1.9870617525815273, "grad_norm": 2.900946617126465, "learning_rate": 1.754251380736104e-05, "loss": 0.6428, "step": 12172 }, { "epoch": 1.9872250112240317, "grad_norm": 2.5946669578552246, "learning_rate": 1.754209282757611e-05, "loss": 0.6009, "step": 12173 }, { "epoch": 1.987388269866536, "grad_norm": 2.8680336475372314, "learning_rate": 1.7541671816788483e-05, "loss": 0.8013, "step": 12174 }, { "epoch": 1.9875515285090404, "grad_norm": 2.736485719680786, "learning_rate": 1.7541250774999894e-05, "loss": 0.6833, "step": 12175 }, { "epoch": 1.9877147871515448, "grad_norm": 2.4378368854522705, "learning_rate": 1.754082970221207e-05, "loss": 0.6529, "step": 12176 }, { "epoch": 1.9878780457940493, "grad_norm": 3.0211055278778076, "learning_rate": 1.754040859842674e-05, "loss": 0.6951, "step": 12177 }, { "epoch": 1.9880413044365537, "grad_norm": 2.8634514808654785, "learning_rate": 1.7539987463645638e-05, "loss": 0.7886, "step": 12178 }, { "epoch": 1.9882045630790581, "grad_norm": 2.832732677459717, "learning_rate": 1.7539566297870494e-05, "loss": 0.7639, "step": 12179 }, { "epoch": 1.9883678217215623, "grad_norm": 2.3563106060028076, "learning_rate": 1.7539145101103044e-05, "loss": 0.4959, "step": 12180 }, { "epoch": 1.9885310803640668, "grad_norm": 2.503061056137085, "learning_rate": 1.753872387334501e-05, "loss": 0.646, "step": 12181 }, { "epoch": 1.988694339006571, "grad_norm": 2.821950912475586, "learning_rate": 1.753830261459813e-05, "loss": 0.7253, "step": 12182 }, { "epoch": 1.9888575976490754, "grad_norm": 2.7054383754730225, "learning_rate": 1.7537881324864137e-05, "loss": 0.8093, "step": 12183 }, { "epoch": 1.9890208562915799, "grad_norm": 3.4885318279266357, "learning_rate": 1.753746000414476e-05, "loss": 0.7421, "step": 12184 }, { "epoch": 1.9891841149340843, "grad_norm": 2.4064183235168457, "learning_rate": 1.753703865244173e-05, "loss": 0.6547, "step": 12185 }, { "epoch": 1.9893473735765888, "grad_norm": 2.2761542797088623, "learning_rate": 1.7536617269756775e-05, "loss": 0.535, "step": 12186 }, { "epoch": 1.9895106322190932, "grad_norm": 2.2695488929748535, "learning_rate": 1.753619585609164e-05, "loss": 0.5647, "step": 12187 }, { "epoch": 1.9896738908615976, "grad_norm": 2.3010363578796387, "learning_rate": 1.7535774411448048e-05, "loss": 0.6009, "step": 12188 }, { "epoch": 1.9898371495041018, "grad_norm": 2.4973363876342773, "learning_rate": 1.753535293582773e-05, "loss": 0.634, "step": 12189 }, { "epoch": 1.9900004081466063, "grad_norm": 2.9357621669769287, "learning_rate": 1.7534931429232423e-05, "loss": 0.6186, "step": 12190 }, { "epoch": 1.9901636667891105, "grad_norm": 2.9279587268829346, "learning_rate": 1.753450989166386e-05, "loss": 0.7587, "step": 12191 }, { "epoch": 1.990326925431615, "grad_norm": 2.599456310272217, "learning_rate": 1.7534088323123766e-05, "loss": 0.6002, "step": 12192 }, { "epoch": 1.9904901840741194, "grad_norm": 2.2984232902526855, "learning_rate": 1.7533666723613882e-05, "loss": 0.526, "step": 12193 }, { "epoch": 1.9906534427166238, "grad_norm": 2.744194269180298, "learning_rate": 1.753324509313594e-05, "loss": 0.8491, "step": 12194 }, { "epoch": 1.9908167013591282, "grad_norm": 2.4992048740386963, "learning_rate": 1.7532823431691675e-05, "loss": 0.6593, "step": 12195 }, { "epoch": 1.9909799600016327, "grad_norm": 2.623464345932007, "learning_rate": 1.7532401739282812e-05, "loss": 0.6338, "step": 12196 }, { "epoch": 1.9911432186441371, "grad_norm": 2.292076826095581, "learning_rate": 1.7531980015911093e-05, "loss": 0.5044, "step": 12197 }, { "epoch": 1.9913064772866413, "grad_norm": 2.75026798248291, "learning_rate": 1.7531558261578247e-05, "loss": 0.6655, "step": 12198 }, { "epoch": 1.9914697359291458, "grad_norm": 2.5221729278564453, "learning_rate": 1.7531136476286007e-05, "loss": 0.6267, "step": 12199 }, { "epoch": 1.99163299457165, "grad_norm": 2.726578950881958, "learning_rate": 1.7530714660036112e-05, "loss": 0.7031, "step": 12200 }, { "epoch": 1.9917962532141544, "grad_norm": 2.6723079681396484, "learning_rate": 1.753029281283029e-05, "loss": 0.6532, "step": 12201 }, { "epoch": 1.9919595118566589, "grad_norm": 2.3709325790405273, "learning_rate": 1.752987093467028e-05, "loss": 0.6181, "step": 12202 }, { "epoch": 1.9921227704991633, "grad_norm": 2.584027051925659, "learning_rate": 1.7529449025557812e-05, "loss": 0.7028, "step": 12203 }, { "epoch": 1.9922860291416677, "grad_norm": 2.379640817642212, "learning_rate": 1.752902708549462e-05, "loss": 0.6193, "step": 12204 }, { "epoch": 1.9924492877841722, "grad_norm": 3.076338291168213, "learning_rate": 1.7528605114482443e-05, "loss": 0.7435, "step": 12205 }, { "epoch": 1.9926125464266766, "grad_norm": 2.6474454402923584, "learning_rate": 1.7528183112523014e-05, "loss": 0.7668, "step": 12206 }, { "epoch": 1.9927758050691808, "grad_norm": 2.2785236835479736, "learning_rate": 1.7527761079618066e-05, "loss": 0.6034, "step": 12207 }, { "epoch": 1.9929390637116853, "grad_norm": 2.8489747047424316, "learning_rate": 1.7527339015769335e-05, "loss": 0.5941, "step": 12208 }, { "epoch": 1.9931023223541895, "grad_norm": 2.914196252822876, "learning_rate": 1.7526916920978555e-05, "loss": 0.7161, "step": 12209 }, { "epoch": 1.993265580996694, "grad_norm": 2.7780921459198, "learning_rate": 1.7526494795247463e-05, "loss": 0.6355, "step": 12210 }, { "epoch": 1.9934288396391984, "grad_norm": 2.554511785507202, "learning_rate": 1.752607263857779e-05, "loss": 0.6024, "step": 12211 }, { "epoch": 1.9935920982817028, "grad_norm": 3.2686188220977783, "learning_rate": 1.7525650450971275e-05, "loss": 0.871, "step": 12212 }, { "epoch": 1.9937553569242072, "grad_norm": 3.05698561668396, "learning_rate": 1.7525228232429655e-05, "loss": 0.7315, "step": 12213 }, { "epoch": 1.9939186155667117, "grad_norm": 2.5032618045806885, "learning_rate": 1.7524805982954663e-05, "loss": 0.5883, "step": 12214 }, { "epoch": 1.994081874209216, "grad_norm": 2.7305874824523926, "learning_rate": 1.7524383702548034e-05, "loss": 0.7491, "step": 12215 }, { "epoch": 1.9942451328517203, "grad_norm": 2.1276772022247314, "learning_rate": 1.7523961391211505e-05, "loss": 0.4551, "step": 12216 }, { "epoch": 1.9944083914942248, "grad_norm": 2.6125998497009277, "learning_rate": 1.7523539048946808e-05, "loss": 0.7396, "step": 12217 }, { "epoch": 1.994571650136729, "grad_norm": 2.3025190830230713, "learning_rate": 1.7523116675755688e-05, "loss": 0.5376, "step": 12218 }, { "epoch": 1.9947349087792334, "grad_norm": 2.5419552326202393, "learning_rate": 1.7522694271639876e-05, "loss": 0.6325, "step": 12219 }, { "epoch": 1.9948981674217379, "grad_norm": 2.9069693088531494, "learning_rate": 1.7522271836601104e-05, "loss": 0.6717, "step": 12220 }, { "epoch": 1.9950614260642423, "grad_norm": 2.5840790271759033, "learning_rate": 1.7521849370641116e-05, "loss": 0.5724, "step": 12221 }, { "epoch": 1.9952246847067467, "grad_norm": 2.4065496921539307, "learning_rate": 1.7521426873761644e-05, "loss": 0.5814, "step": 12222 }, { "epoch": 1.9953879433492512, "grad_norm": 2.7300002574920654, "learning_rate": 1.752100434596443e-05, "loss": 0.6858, "step": 12223 }, { "epoch": 1.9955512019917554, "grad_norm": 2.838958501815796, "learning_rate": 1.75205817872512e-05, "loss": 0.5573, "step": 12224 }, { "epoch": 1.9957144606342598, "grad_norm": 2.2974131107330322, "learning_rate": 1.7520159197623703e-05, "loss": 0.6258, "step": 12225 }, { "epoch": 1.995877719276764, "grad_norm": 2.5043110847473145, "learning_rate": 1.7519736577083672e-05, "loss": 0.6076, "step": 12226 }, { "epoch": 1.9960409779192685, "grad_norm": 2.4573988914489746, "learning_rate": 1.751931392563284e-05, "loss": 0.6945, "step": 12227 }, { "epoch": 1.996204236561773, "grad_norm": 2.9730517864227295, "learning_rate": 1.751889124327295e-05, "loss": 0.736, "step": 12228 }, { "epoch": 1.9963674952042774, "grad_norm": 2.6386680603027344, "learning_rate": 1.7518468530005734e-05, "loss": 0.6991, "step": 12229 }, { "epoch": 1.9965307538467818, "grad_norm": 3.362578868865967, "learning_rate": 1.7518045785832937e-05, "loss": 0.842, "step": 12230 }, { "epoch": 1.9966940124892862, "grad_norm": 2.9505600929260254, "learning_rate": 1.7517623010756288e-05, "loss": 0.7273, "step": 12231 }, { "epoch": 1.9968572711317907, "grad_norm": 2.91526460647583, "learning_rate": 1.7517200204777532e-05, "loss": 0.6819, "step": 12232 }, { "epoch": 1.997020529774295, "grad_norm": 2.4323973655700684, "learning_rate": 1.7516777367898405e-05, "loss": 0.6798, "step": 12233 }, { "epoch": 1.9971837884167993, "grad_norm": 3.1382381916046143, "learning_rate": 1.7516354500120644e-05, "loss": 0.8478, "step": 12234 }, { "epoch": 1.9973470470593035, "grad_norm": 2.8479466438293457, "learning_rate": 1.7515931601445987e-05, "loss": 0.7774, "step": 12235 }, { "epoch": 1.997510305701808, "grad_norm": 2.7918202877044678, "learning_rate": 1.7515508671876173e-05, "loss": 0.6831, "step": 12236 }, { "epoch": 1.9976735643443124, "grad_norm": 2.988391399383545, "learning_rate": 1.7515085711412944e-05, "loss": 1.1685, "step": 12237 }, { "epoch": 1.9978368229868169, "grad_norm": 2.832094192504883, "learning_rate": 1.7514662720058032e-05, "loss": 0.5953, "step": 12238 }, { "epoch": 1.9980000816293213, "grad_norm": 3.1359758377075195, "learning_rate": 1.751423969781318e-05, "loss": 0.815, "step": 12239 }, { "epoch": 1.9981633402718257, "grad_norm": 2.7415921688079834, "learning_rate": 1.7513816644680126e-05, "loss": 0.7979, "step": 12240 }, { "epoch": 1.9983265989143302, "grad_norm": 2.7695071697235107, "learning_rate": 1.7513393560660606e-05, "loss": 0.7649, "step": 12241 }, { "epoch": 1.9984898575568344, "grad_norm": 2.614295721054077, "learning_rate": 1.751297044575637e-05, "loss": 0.8158, "step": 12242 }, { "epoch": 1.9986531161993388, "grad_norm": 2.589231014251709, "learning_rate": 1.751254729996914e-05, "loss": 0.5927, "step": 12243 }, { "epoch": 1.998816374841843, "grad_norm": 1.999716877937317, "learning_rate": 1.751212412330067e-05, "loss": 0.487, "step": 12244 }, { "epoch": 1.9989796334843475, "grad_norm": 2.7044479846954346, "learning_rate": 1.7511700915752693e-05, "loss": 0.8173, "step": 12245 }, { "epoch": 1.999142892126852, "grad_norm": 2.3959970474243164, "learning_rate": 1.751127767732695e-05, "loss": 0.5884, "step": 12246 }, { "epoch": 1.9993061507693564, "grad_norm": 2.6301894187927246, "learning_rate": 1.751085440802518e-05, "loss": 0.7554, "step": 12247 }, { "epoch": 1.9994694094118608, "grad_norm": 2.4454421997070312, "learning_rate": 1.7510431107849123e-05, "loss": 0.6298, "step": 12248 }, { "epoch": 1.9996326680543652, "grad_norm": 2.887280225753784, "learning_rate": 1.7510007776800525e-05, "loss": 0.7385, "step": 12249 }, { "epoch": 1.9997959266968697, "grad_norm": 2.1852662563323975, "learning_rate": 1.7509584414881114e-05, "loss": 0.5752, "step": 12250 }, { "epoch": 1.999959185339374, "grad_norm": 2.5532610416412354, "learning_rate": 1.750916102209264e-05, "loss": 0.663, "step": 12251 }, { "epoch": 2.0, "grad_norm": 6.106594562530518, "learning_rate": 1.750873759843684e-05, "loss": 1.0414, "step": 12252 }, { "epoch": 2.0001632586425044, "grad_norm": 2.54601788520813, "learning_rate": 1.7508314143915456e-05, "loss": 0.6887, "step": 12253 }, { "epoch": 2.000326517285009, "grad_norm": 2.384610652923584, "learning_rate": 1.7507890658530225e-05, "loss": 0.6082, "step": 12254 }, { "epoch": 2.0004897759275133, "grad_norm": 2.2587199211120605, "learning_rate": 1.750746714228289e-05, "loss": 0.5575, "step": 12255 }, { "epoch": 2.0006530345700178, "grad_norm": 1.7518222332000732, "learning_rate": 1.7507043595175196e-05, "loss": 0.4879, "step": 12256 }, { "epoch": 2.0008162932125217, "grad_norm": 2.1285061836242676, "learning_rate": 1.750662001720888e-05, "loss": 0.5765, "step": 12257 }, { "epoch": 2.000979551855026, "grad_norm": 2.490168809890747, "learning_rate": 1.7506196408385684e-05, "loss": 0.7325, "step": 12258 }, { "epoch": 2.0011428104975306, "grad_norm": 2.756220579147339, "learning_rate": 1.7505772768707348e-05, "loss": 0.7732, "step": 12259 }, { "epoch": 2.001306069140035, "grad_norm": 2.1337170600891113, "learning_rate": 1.7505349098175612e-05, "loss": 0.6235, "step": 12260 }, { "epoch": 2.0014693277825395, "grad_norm": 2.6894707679748535, "learning_rate": 1.7504925396792222e-05, "loss": 0.6193, "step": 12261 }, { "epoch": 2.001632586425044, "grad_norm": 2.481069326400757, "learning_rate": 1.7504501664558917e-05, "loss": 0.7082, "step": 12262 }, { "epoch": 2.0017958450675484, "grad_norm": 3.223480224609375, "learning_rate": 1.7504077901477438e-05, "loss": 0.7012, "step": 12263 }, { "epoch": 2.001959103710053, "grad_norm": 2.5588300228118896, "learning_rate": 1.750365410754953e-05, "loss": 0.6255, "step": 12264 }, { "epoch": 2.0021223623525572, "grad_norm": 2.268970012664795, "learning_rate": 1.7503230282776932e-05, "loss": 0.5115, "step": 12265 }, { "epoch": 2.0022856209950612, "grad_norm": 2.275127410888672, "learning_rate": 1.750280642716139e-05, "loss": 0.4905, "step": 12266 }, { "epoch": 2.0024488796375657, "grad_norm": 2.404773235321045, "learning_rate": 1.7502382540704642e-05, "loss": 0.4816, "step": 12267 }, { "epoch": 2.00261213828007, "grad_norm": 2.73836088180542, "learning_rate": 1.750195862340843e-05, "loss": 0.6113, "step": 12268 }, { "epoch": 2.0027753969225746, "grad_norm": 2.850665330886841, "learning_rate": 1.7501534675274504e-05, "loss": 0.8036, "step": 12269 }, { "epoch": 2.002938655565079, "grad_norm": 2.817324638366699, "learning_rate": 1.7501110696304598e-05, "loss": 0.6766, "step": 12270 }, { "epoch": 2.0031019142075834, "grad_norm": 2.598843574523926, "learning_rate": 1.750068668650046e-05, "loss": 0.6218, "step": 12271 }, { "epoch": 2.003265172850088, "grad_norm": 2.200711250305176, "learning_rate": 1.7500262645863827e-05, "loss": 0.5682, "step": 12272 }, { "epoch": 2.0034284314925923, "grad_norm": 2.544966459274292, "learning_rate": 1.7499838574396452e-05, "loss": 0.553, "step": 12273 }, { "epoch": 2.0035916901350967, "grad_norm": 2.593032121658325, "learning_rate": 1.7499414472100066e-05, "loss": 0.6396, "step": 12274 }, { "epoch": 2.0037549487776007, "grad_norm": 3.0544731616973877, "learning_rate": 1.7498990338976425e-05, "loss": 0.6895, "step": 12275 }, { "epoch": 2.003918207420105, "grad_norm": 2.5722978115081787, "learning_rate": 1.7498566175027265e-05, "loss": 0.5091, "step": 12276 }, { "epoch": 2.0040814660626096, "grad_norm": 2.5325803756713867, "learning_rate": 1.749814198025433e-05, "loss": 0.6381, "step": 12277 }, { "epoch": 2.004244724705114, "grad_norm": 2.486769437789917, "learning_rate": 1.7497717754659366e-05, "loss": 0.5824, "step": 12278 }, { "epoch": 2.0044079833476185, "grad_norm": 2.8271896839141846, "learning_rate": 1.7497293498244112e-05, "loss": 0.5704, "step": 12279 }, { "epoch": 2.004571241990123, "grad_norm": 2.507114887237549, "learning_rate": 1.7496869211010317e-05, "loss": 0.5522, "step": 12280 }, { "epoch": 2.0047345006326274, "grad_norm": 2.6363790035247803, "learning_rate": 1.7496444892959722e-05, "loss": 0.5418, "step": 12281 }, { "epoch": 2.004897759275132, "grad_norm": 2.7641866207122803, "learning_rate": 1.7496020544094078e-05, "loss": 0.617, "step": 12282 }, { "epoch": 2.005061017917636, "grad_norm": 2.711552619934082, "learning_rate": 1.7495596164415117e-05, "loss": 0.5346, "step": 12283 }, { "epoch": 2.0052242765601402, "grad_norm": 2.6882143020629883, "learning_rate": 1.7495171753924594e-05, "loss": 0.544, "step": 12284 }, { "epoch": 2.0053875352026447, "grad_norm": 2.4834976196289062, "learning_rate": 1.749474731262425e-05, "loss": 0.5283, "step": 12285 }, { "epoch": 2.005550793845149, "grad_norm": 2.851736307144165, "learning_rate": 1.7494322840515828e-05, "loss": 0.5718, "step": 12286 }, { "epoch": 2.0057140524876536, "grad_norm": 2.637258768081665, "learning_rate": 1.7493898337601078e-05, "loss": 0.5831, "step": 12287 }, { "epoch": 2.005877311130158, "grad_norm": 2.5137948989868164, "learning_rate": 1.7493473803881742e-05, "loss": 0.4952, "step": 12288 }, { "epoch": 2.0060405697726624, "grad_norm": 3.686662435531616, "learning_rate": 1.7493049239359562e-05, "loss": 0.6454, "step": 12289 }, { "epoch": 2.006203828415167, "grad_norm": 2.538965940475464, "learning_rate": 1.7492624644036285e-05, "loss": 0.5532, "step": 12290 }, { "epoch": 2.0063670870576713, "grad_norm": 3.061511754989624, "learning_rate": 1.7492200017913658e-05, "loss": 0.6233, "step": 12291 }, { "epoch": 2.0065303457001753, "grad_norm": 3.0766963958740234, "learning_rate": 1.7491775360993427e-05, "loss": 0.6645, "step": 12292 }, { "epoch": 2.0066936043426797, "grad_norm": 2.501868724822998, "learning_rate": 1.7491350673277335e-05, "loss": 0.5161, "step": 12293 }, { "epoch": 2.006856862985184, "grad_norm": 2.5402939319610596, "learning_rate": 1.749092595476713e-05, "loss": 0.6285, "step": 12294 }, { "epoch": 2.0070201216276886, "grad_norm": 2.6776375770568848, "learning_rate": 1.749050120546456e-05, "loss": 0.5508, "step": 12295 }, { "epoch": 2.007183380270193, "grad_norm": 2.9396862983703613, "learning_rate": 1.7490076425371362e-05, "loss": 0.7014, "step": 12296 }, { "epoch": 2.0073466389126975, "grad_norm": 2.4962241649627686, "learning_rate": 1.748965161448929e-05, "loss": 0.5769, "step": 12297 }, { "epoch": 2.007509897555202, "grad_norm": 2.7307920455932617, "learning_rate": 1.748922677282009e-05, "loss": 0.5383, "step": 12298 }, { "epoch": 2.0076731561977064, "grad_norm": 2.56459641456604, "learning_rate": 1.7488801900365503e-05, "loss": 0.5359, "step": 12299 }, { "epoch": 2.007836414840211, "grad_norm": 2.8706297874450684, "learning_rate": 1.748837699712728e-05, "loss": 0.6178, "step": 12300 }, { "epoch": 2.007999673482715, "grad_norm": 3.0793983936309814, "learning_rate": 1.7487952063107175e-05, "loss": 0.8398, "step": 12301 }, { "epoch": 2.0081629321252192, "grad_norm": 2.7035319805145264, "learning_rate": 1.7487527098306917e-05, "loss": 0.555, "step": 12302 }, { "epoch": 2.0083261907677237, "grad_norm": 3.014213800430298, "learning_rate": 1.7487102102728264e-05, "loss": 0.5832, "step": 12303 }, { "epoch": 2.008489449410228, "grad_norm": 2.2897541522979736, "learning_rate": 1.7486677076372963e-05, "loss": 0.5327, "step": 12304 }, { "epoch": 2.0086527080527325, "grad_norm": 2.8456227779388428, "learning_rate": 1.748625201924276e-05, "loss": 0.7069, "step": 12305 }, { "epoch": 2.008815966695237, "grad_norm": 2.7012951374053955, "learning_rate": 1.74858269313394e-05, "loss": 0.6249, "step": 12306 }, { "epoch": 2.0089792253377414, "grad_norm": 2.5457046031951904, "learning_rate": 1.7485401812664633e-05, "loss": 0.5053, "step": 12307 }, { "epoch": 2.009142483980246, "grad_norm": 2.263807535171509, "learning_rate": 1.7484976663220205e-05, "loss": 0.4464, "step": 12308 }, { "epoch": 2.0093057426227503, "grad_norm": 3.2343509197235107, "learning_rate": 1.7484551483007864e-05, "loss": 0.7093, "step": 12309 }, { "epoch": 2.0094690012652543, "grad_norm": 2.749582529067993, "learning_rate": 1.748412627202936e-05, "loss": 0.6131, "step": 12310 }, { "epoch": 2.0096322599077587, "grad_norm": 3.546461343765259, "learning_rate": 1.7483701030286438e-05, "loss": 0.5934, "step": 12311 }, { "epoch": 2.009795518550263, "grad_norm": 2.681900978088379, "learning_rate": 1.7483275757780846e-05, "loss": 0.567, "step": 12312 }, { "epoch": 2.0099587771927676, "grad_norm": 2.670567274093628, "learning_rate": 1.7482850454514334e-05, "loss": 0.5172, "step": 12313 }, { "epoch": 2.010122035835272, "grad_norm": 3.3048548698425293, "learning_rate": 1.7482425120488647e-05, "loss": 0.7407, "step": 12314 }, { "epoch": 2.0102852944777765, "grad_norm": 3.672640085220337, "learning_rate": 1.748199975570554e-05, "loss": 0.8008, "step": 12315 }, { "epoch": 2.010448553120281, "grad_norm": 2.836791753768921, "learning_rate": 1.7481574360166752e-05, "loss": 0.6012, "step": 12316 }, { "epoch": 2.0106118117627854, "grad_norm": 2.8513381481170654, "learning_rate": 1.748114893387404e-05, "loss": 0.6409, "step": 12317 }, { "epoch": 2.01077507040529, "grad_norm": 3.0087897777557373, "learning_rate": 1.7480723476829147e-05, "loss": 0.5828, "step": 12318 }, { "epoch": 2.010938329047794, "grad_norm": 2.570667266845703, "learning_rate": 1.7480297989033824e-05, "loss": 0.5358, "step": 12319 }, { "epoch": 2.0111015876902982, "grad_norm": 2.7510695457458496, "learning_rate": 1.7479872470489826e-05, "loss": 0.578, "step": 12320 }, { "epoch": 2.0112648463328027, "grad_norm": 2.813387393951416, "learning_rate": 1.747944692119889e-05, "loss": 0.6925, "step": 12321 }, { "epoch": 2.011428104975307, "grad_norm": 2.6383097171783447, "learning_rate": 1.7479021341162778e-05, "loss": 0.5534, "step": 12322 }, { "epoch": 2.0115913636178115, "grad_norm": 2.9180145263671875, "learning_rate": 1.747859573038323e-05, "loss": 0.567, "step": 12323 }, { "epoch": 2.011754622260316, "grad_norm": 2.441068410873413, "learning_rate": 1.7478170088862e-05, "loss": 0.6324, "step": 12324 }, { "epoch": 2.0119178809028204, "grad_norm": 2.469135284423828, "learning_rate": 1.7477744416600833e-05, "loss": 0.5064, "step": 12325 }, { "epoch": 2.012081139545325, "grad_norm": 2.6022136211395264, "learning_rate": 1.747731871360149e-05, "loss": 0.5395, "step": 12326 }, { "epoch": 2.012244398187829, "grad_norm": 3.1055264472961426, "learning_rate": 1.7476892979865704e-05, "loss": 0.6283, "step": 12327 }, { "epoch": 2.0124076568303333, "grad_norm": 2.8253960609436035, "learning_rate": 1.7476467215395238e-05, "loss": 0.5093, "step": 12328 }, { "epoch": 2.0125709154728377, "grad_norm": 2.7686688899993896, "learning_rate": 1.747604142019184e-05, "loss": 0.5732, "step": 12329 }, { "epoch": 2.012734174115342, "grad_norm": 2.7140605449676514, "learning_rate": 1.747561559425726e-05, "loss": 0.6353, "step": 12330 }, { "epoch": 2.0128974327578466, "grad_norm": 3.278217315673828, "learning_rate": 1.7475189737593243e-05, "loss": 0.7053, "step": 12331 }, { "epoch": 2.013060691400351, "grad_norm": 2.5041322708129883, "learning_rate": 1.7474763850201545e-05, "loss": 0.5691, "step": 12332 }, { "epoch": 2.0132239500428555, "grad_norm": 2.8094890117645264, "learning_rate": 1.7474337932083916e-05, "loss": 0.606, "step": 12333 }, { "epoch": 2.01338720868536, "grad_norm": 3.0312139987945557, "learning_rate": 1.7473911983242105e-05, "loss": 0.7225, "step": 12334 }, { "epoch": 2.0135504673278644, "grad_norm": 2.7843728065490723, "learning_rate": 1.747348600367786e-05, "loss": 0.6393, "step": 12335 }, { "epoch": 2.0137137259703684, "grad_norm": 3.0083510875701904, "learning_rate": 1.747305999339294e-05, "loss": 0.591, "step": 12336 }, { "epoch": 2.013876984612873, "grad_norm": 2.862088680267334, "learning_rate": 1.7472633952389097e-05, "loss": 0.6617, "step": 12337 }, { "epoch": 2.0140402432553772, "grad_norm": 3.2497289180755615, "learning_rate": 1.747220788066807e-05, "loss": 0.7085, "step": 12338 }, { "epoch": 2.0142035018978817, "grad_norm": 2.2265162467956543, "learning_rate": 1.747178177823162e-05, "loss": 0.476, "step": 12339 }, { "epoch": 2.014366760540386, "grad_norm": 3.043325662612915, "learning_rate": 1.74713556450815e-05, "loss": 0.6037, "step": 12340 }, { "epoch": 2.0145300191828905, "grad_norm": 2.9235496520996094, "learning_rate": 1.7470929481219453e-05, "loss": 0.6491, "step": 12341 }, { "epoch": 2.014693277825395, "grad_norm": 2.896059513092041, "learning_rate": 1.747050328664724e-05, "loss": 0.5978, "step": 12342 }, { "epoch": 2.0148565364678994, "grad_norm": 2.4975013732910156, "learning_rate": 1.7470077061366603e-05, "loss": 0.5225, "step": 12343 }, { "epoch": 2.015019795110404, "grad_norm": 2.4593889713287354, "learning_rate": 1.7469650805379306e-05, "loss": 0.4989, "step": 12344 }, { "epoch": 2.015183053752908, "grad_norm": 2.7840888500213623, "learning_rate": 1.7469224518687094e-05, "loss": 0.6256, "step": 12345 }, { "epoch": 2.0153463123954123, "grad_norm": 2.749541997909546, "learning_rate": 1.7468798201291717e-05, "loss": 0.5322, "step": 12346 }, { "epoch": 2.0155095710379167, "grad_norm": 2.8556902408599854, "learning_rate": 1.7468371853194933e-05, "loss": 0.5686, "step": 12347 }, { "epoch": 2.015672829680421, "grad_norm": 2.7453739643096924, "learning_rate": 1.746794547439849e-05, "loss": 0.5498, "step": 12348 }, { "epoch": 2.0158360883229256, "grad_norm": 2.327538251876831, "learning_rate": 1.7467519064904147e-05, "loss": 0.4707, "step": 12349 }, { "epoch": 2.01599934696543, "grad_norm": 3.0898964405059814, "learning_rate": 1.7467092624713652e-05, "loss": 0.5875, "step": 12350 }, { "epoch": 2.0161626056079345, "grad_norm": 2.5210483074188232, "learning_rate": 1.7466666153828756e-05, "loss": 0.532, "step": 12351 }, { "epoch": 2.016325864250439, "grad_norm": 3.075040340423584, "learning_rate": 1.7466239652251218e-05, "loss": 0.5331, "step": 12352 }, { "epoch": 2.0164891228929434, "grad_norm": 3.2099311351776123, "learning_rate": 1.7465813119982783e-05, "loss": 0.629, "step": 12353 }, { "epoch": 2.0166523815354473, "grad_norm": 2.453733205795288, "learning_rate": 1.7465386557025216e-05, "loss": 0.5274, "step": 12354 }, { "epoch": 2.016815640177952, "grad_norm": 3.243844985961914, "learning_rate": 1.746495996338026e-05, "loss": 0.62, "step": 12355 }, { "epoch": 2.0169788988204562, "grad_norm": 2.8170506954193115, "learning_rate": 1.7464533339049673e-05, "loss": 0.641, "step": 12356 }, { "epoch": 2.0171421574629607, "grad_norm": 3.1807994842529297, "learning_rate": 1.7464106684035206e-05, "loss": 0.6682, "step": 12357 }, { "epoch": 2.017305416105465, "grad_norm": 2.9553091526031494, "learning_rate": 1.7463679998338618e-05, "loss": 0.5609, "step": 12358 }, { "epoch": 2.0174686747479695, "grad_norm": 2.7903337478637695, "learning_rate": 1.7463253281961656e-05, "loss": 0.557, "step": 12359 }, { "epoch": 2.017631933390474, "grad_norm": 2.567357301712036, "learning_rate": 1.746282653490608e-05, "loss": 0.4988, "step": 12360 }, { "epoch": 2.0177951920329784, "grad_norm": 2.9419052600860596, "learning_rate": 1.7462399757173646e-05, "loss": 1.3391, "step": 12361 }, { "epoch": 2.017958450675483, "grad_norm": 3.148350238800049, "learning_rate": 1.74619729487661e-05, "loss": 0.8218, "step": 12362 }, { "epoch": 2.018121709317987, "grad_norm": 2.8556337356567383, "learning_rate": 1.7461546109685198e-05, "loss": 0.588, "step": 12363 }, { "epoch": 2.0182849679604913, "grad_norm": 2.4347143173217773, "learning_rate": 1.7461119239932704e-05, "loss": 0.5422, "step": 12364 }, { "epoch": 2.0184482266029957, "grad_norm": 2.5087711811065674, "learning_rate": 1.746069233951036e-05, "loss": 0.4758, "step": 12365 }, { "epoch": 2.0186114852455, "grad_norm": 2.49981689453125, "learning_rate": 1.746026540841993e-05, "loss": 0.5471, "step": 12366 }, { "epoch": 2.0187747438880046, "grad_norm": 2.649789333343506, "learning_rate": 1.7459838446663166e-05, "loss": 0.5227, "step": 12367 }, { "epoch": 2.018938002530509, "grad_norm": 2.9273428916931152, "learning_rate": 1.7459411454241822e-05, "loss": 0.6243, "step": 12368 }, { "epoch": 2.0191012611730135, "grad_norm": 2.566497564315796, "learning_rate": 1.7458984431157656e-05, "loss": 0.5948, "step": 12369 }, { "epoch": 2.019264519815518, "grad_norm": 3.2424843311309814, "learning_rate": 1.745855737741242e-05, "loss": 0.6837, "step": 12370 }, { "epoch": 2.019427778458022, "grad_norm": 2.6347005367279053, "learning_rate": 1.745813029300787e-05, "loss": 0.5573, "step": 12371 }, { "epoch": 2.0195910371005263, "grad_norm": 2.410764694213867, "learning_rate": 1.7457703177945765e-05, "loss": 0.4575, "step": 12372 }, { "epoch": 2.019754295743031, "grad_norm": 2.736855983734131, "learning_rate": 1.7457276032227856e-05, "loss": 0.5476, "step": 12373 }, { "epoch": 2.019917554385535, "grad_norm": 3.2066709995269775, "learning_rate": 1.74568488558559e-05, "loss": 0.5901, "step": 12374 }, { "epoch": 2.0200808130280397, "grad_norm": 3.046391010284424, "learning_rate": 1.7456421648831658e-05, "loss": 0.6589, "step": 12375 }, { "epoch": 2.020244071670544, "grad_norm": 2.3602561950683594, "learning_rate": 1.745599441115688e-05, "loss": 0.4757, "step": 12376 }, { "epoch": 2.0204073303130485, "grad_norm": 3.0896658897399902, "learning_rate": 1.7455567142833322e-05, "loss": 0.6656, "step": 12377 }, { "epoch": 2.020570588955553, "grad_norm": 2.9552083015441895, "learning_rate": 1.745513984386275e-05, "loss": 0.7157, "step": 12378 }, { "epoch": 2.0207338475980574, "grad_norm": 2.8717148303985596, "learning_rate": 1.7454712514246903e-05, "loss": 0.5547, "step": 12379 }, { "epoch": 2.0208971062405614, "grad_norm": 2.6121933460235596, "learning_rate": 1.7454285153987553e-05, "loss": 0.5724, "step": 12380 }, { "epoch": 2.021060364883066, "grad_norm": 2.832324743270874, "learning_rate": 1.7453857763086454e-05, "loss": 0.5893, "step": 12381 }, { "epoch": 2.0212236235255703, "grad_norm": 3.0467748641967773, "learning_rate": 1.7453430341545355e-05, "loss": 0.5476, "step": 12382 }, { "epoch": 2.0213868821680747, "grad_norm": 2.6485660076141357, "learning_rate": 1.7453002889366023e-05, "loss": 0.5796, "step": 12383 }, { "epoch": 2.021550140810579, "grad_norm": 2.4829773902893066, "learning_rate": 1.7452575406550205e-05, "loss": 0.5255, "step": 12384 }, { "epoch": 2.0217133994530836, "grad_norm": 3.194545269012451, "learning_rate": 1.7452147893099668e-05, "loss": 0.5566, "step": 12385 }, { "epoch": 2.021876658095588, "grad_norm": 3.1001157760620117, "learning_rate": 1.7451720349016164e-05, "loss": 1.6974, "step": 12386 }, { "epoch": 2.0220399167380925, "grad_norm": 2.6810054779052734, "learning_rate": 1.745129277430145e-05, "loss": 0.6007, "step": 12387 }, { "epoch": 2.022203175380597, "grad_norm": 2.571451425552368, "learning_rate": 1.7450865168957286e-05, "loss": 0.5804, "step": 12388 }, { "epoch": 2.022366434023101, "grad_norm": 2.8117194175720215, "learning_rate": 1.745043753298543e-05, "loss": 0.6787, "step": 12389 }, { "epoch": 2.0225296926656053, "grad_norm": 2.5683934688568115, "learning_rate": 1.7450009866387635e-05, "loss": 0.6095, "step": 12390 }, { "epoch": 2.0226929513081098, "grad_norm": 2.7229061126708984, "learning_rate": 1.7449582169165667e-05, "loss": 0.6449, "step": 12391 }, { "epoch": 2.022856209950614, "grad_norm": 2.450406312942505, "learning_rate": 1.7449154441321275e-05, "loss": 0.5241, "step": 12392 }, { "epoch": 2.0230194685931187, "grad_norm": 3.080275297164917, "learning_rate": 1.7448726682856226e-05, "loss": 0.6782, "step": 12393 }, { "epoch": 2.023182727235623, "grad_norm": 2.4620909690856934, "learning_rate": 1.7448298893772267e-05, "loss": 0.5573, "step": 12394 }, { "epoch": 2.0233459858781275, "grad_norm": 2.6903436183929443, "learning_rate": 1.744787107407117e-05, "loss": 0.5797, "step": 12395 }, { "epoch": 2.023509244520632, "grad_norm": 2.7415027618408203, "learning_rate": 1.744744322375469e-05, "loss": 0.4788, "step": 12396 }, { "epoch": 2.0236725031631364, "grad_norm": 3.208588123321533, "learning_rate": 1.7447015342824576e-05, "loss": 0.6587, "step": 12397 }, { "epoch": 2.0238357618056404, "grad_norm": 2.762887954711914, "learning_rate": 1.7446587431282598e-05, "loss": 0.6423, "step": 12398 }, { "epoch": 2.023999020448145, "grad_norm": 3.3518149852752686, "learning_rate": 1.744615948913051e-05, "loss": 0.7419, "step": 12399 }, { "epoch": 2.0241622790906493, "grad_norm": 2.7177093029022217, "learning_rate": 1.744573151637007e-05, "loss": 0.5803, "step": 12400 }, { "epoch": 2.0243255377331537, "grad_norm": 2.9713854789733887, "learning_rate": 1.7445303513003043e-05, "loss": 0.6196, "step": 12401 }, { "epoch": 2.024488796375658, "grad_norm": 2.5258262157440186, "learning_rate": 1.7444875479031184e-05, "loss": 0.5263, "step": 12402 }, { "epoch": 2.0246520550181626, "grad_norm": 2.777301073074341, "learning_rate": 1.744444741445625e-05, "loss": 0.5999, "step": 12403 }, { "epoch": 2.024815313660667, "grad_norm": 2.95021653175354, "learning_rate": 1.7444019319280006e-05, "loss": 0.6202, "step": 12404 }, { "epoch": 2.0249785723031715, "grad_norm": 3.04333758354187, "learning_rate": 1.7443591193504208e-05, "loss": 0.5046, "step": 12405 }, { "epoch": 2.025141830945676, "grad_norm": 3.0690155029296875, "learning_rate": 1.7443163037130616e-05, "loss": 0.6716, "step": 12406 }, { "epoch": 2.02530508958818, "grad_norm": 2.439528703689575, "learning_rate": 1.7442734850160994e-05, "loss": 0.4816, "step": 12407 }, { "epoch": 2.0254683482306843, "grad_norm": 2.5259909629821777, "learning_rate": 1.7442306632597097e-05, "loss": 0.4046, "step": 12408 }, { "epoch": 2.0256316068731888, "grad_norm": 2.6380512714385986, "learning_rate": 1.7441878384440692e-05, "loss": 0.4286, "step": 12409 }, { "epoch": 2.025794865515693, "grad_norm": 2.344895124435425, "learning_rate": 1.7441450105693532e-05, "loss": 0.5075, "step": 12410 }, { "epoch": 2.0259581241581976, "grad_norm": 2.996617555618286, "learning_rate": 1.744102179635738e-05, "loss": 0.5092, "step": 12411 }, { "epoch": 2.026121382800702, "grad_norm": 3.124732732772827, "learning_rate": 1.7440593456433998e-05, "loss": 0.5407, "step": 12412 }, { "epoch": 2.0262846414432065, "grad_norm": 2.900883913040161, "learning_rate": 1.7440165085925144e-05, "loss": 0.5198, "step": 12413 }, { "epoch": 2.026447900085711, "grad_norm": 3.3532700538635254, "learning_rate": 1.7439736684832582e-05, "loss": 0.6479, "step": 12414 }, { "epoch": 2.026611158728215, "grad_norm": 3.3783199787139893, "learning_rate": 1.7439308253158073e-05, "loss": 0.6043, "step": 12415 }, { "epoch": 2.0267744173707194, "grad_norm": 2.758920669555664, "learning_rate": 1.7438879790903376e-05, "loss": 0.5219, "step": 12416 }, { "epoch": 2.026937676013224, "grad_norm": 2.8675122261047363, "learning_rate": 1.7438451298070252e-05, "loss": 0.5318, "step": 12417 }, { "epoch": 2.0271009346557283, "grad_norm": 2.6935789585113525, "learning_rate": 1.7438022774660465e-05, "loss": 0.5858, "step": 12418 }, { "epoch": 2.0272641932982327, "grad_norm": 2.948309898376465, "learning_rate": 1.7437594220675773e-05, "loss": 0.6672, "step": 12419 }, { "epoch": 2.027427451940737, "grad_norm": 2.830263376235962, "learning_rate": 1.7437165636117943e-05, "loss": 0.5476, "step": 12420 }, { "epoch": 2.0275907105832416, "grad_norm": 2.4737985134124756, "learning_rate": 1.7436737020988727e-05, "loss": 0.4587, "step": 12421 }, { "epoch": 2.027753969225746, "grad_norm": 2.576425075531006, "learning_rate": 1.74363083752899e-05, "loss": 0.5476, "step": 12422 }, { "epoch": 2.0279172278682505, "grad_norm": 2.74765944480896, "learning_rate": 1.7435879699023212e-05, "loss": 0.5577, "step": 12423 }, { "epoch": 2.0280804865107545, "grad_norm": 2.753699779510498, "learning_rate": 1.7435450992190436e-05, "loss": 0.5679, "step": 12424 }, { "epoch": 2.028243745153259, "grad_norm": 3.0572900772094727, "learning_rate": 1.7435022254793323e-05, "loss": 0.6496, "step": 12425 }, { "epoch": 2.0284070037957633, "grad_norm": 2.8076295852661133, "learning_rate": 1.743459348683364e-05, "loss": 0.5731, "step": 12426 }, { "epoch": 2.0285702624382678, "grad_norm": 2.5660593509674072, "learning_rate": 1.7434164688313157e-05, "loss": 0.5232, "step": 12427 }, { "epoch": 2.028733521080772, "grad_norm": 2.6791563034057617, "learning_rate": 1.7433735859233625e-05, "loss": 0.6122, "step": 12428 }, { "epoch": 2.0288967797232766, "grad_norm": 2.8812918663024902, "learning_rate": 1.7433306999596813e-05, "loss": 0.6142, "step": 12429 }, { "epoch": 2.029060038365781, "grad_norm": 2.3399364948272705, "learning_rate": 1.7432878109404483e-05, "loss": 0.4705, "step": 12430 }, { "epoch": 2.0292232970082855, "grad_norm": 2.619732141494751, "learning_rate": 1.7432449188658398e-05, "loss": 0.5732, "step": 12431 }, { "epoch": 2.02938655565079, "grad_norm": 2.6776537895202637, "learning_rate": 1.7432020237360322e-05, "loss": 0.5121, "step": 12432 }, { "epoch": 2.029549814293294, "grad_norm": 2.954876184463501, "learning_rate": 1.7431591255512015e-05, "loss": 0.6102, "step": 12433 }, { "epoch": 2.0297130729357984, "grad_norm": 2.4032063484191895, "learning_rate": 1.7431162243115244e-05, "loss": 0.4817, "step": 12434 }, { "epoch": 2.029876331578303, "grad_norm": 3.1737256050109863, "learning_rate": 1.7430733200171767e-05, "loss": 0.6689, "step": 12435 }, { "epoch": 2.0300395902208073, "grad_norm": 2.8064842224121094, "learning_rate": 1.7430304126683358e-05, "loss": 0.5239, "step": 12436 }, { "epoch": 2.0302028488633117, "grad_norm": 5.6394782066345215, "learning_rate": 1.742987502265177e-05, "loss": 0.6251, "step": 12437 }, { "epoch": 2.030366107505816, "grad_norm": 3.34909725189209, "learning_rate": 1.7429445888078774e-05, "loss": 0.6608, "step": 12438 }, { "epoch": 2.0305293661483206, "grad_norm": 2.8156626224517822, "learning_rate": 1.7429016722966126e-05, "loss": 0.5417, "step": 12439 }, { "epoch": 2.030692624790825, "grad_norm": 2.982494354248047, "learning_rate": 1.74285875273156e-05, "loss": 0.5952, "step": 12440 }, { "epoch": 2.0308558834333295, "grad_norm": 2.769404172897339, "learning_rate": 1.7428158301128956e-05, "loss": 0.5174, "step": 12441 }, { "epoch": 2.0310191420758335, "grad_norm": 2.6481235027313232, "learning_rate": 1.7427729044407954e-05, "loss": 0.5599, "step": 12442 }, { "epoch": 2.031182400718338, "grad_norm": 2.4031007289886475, "learning_rate": 1.7427299757154368e-05, "loss": 0.5224, "step": 12443 }, { "epoch": 2.0313456593608423, "grad_norm": 3.2586355209350586, "learning_rate": 1.742687043936995e-05, "loss": 0.573, "step": 12444 }, { "epoch": 2.0315089180033468, "grad_norm": 2.9970197677612305, "learning_rate": 1.742644109105648e-05, "loss": 0.626, "step": 12445 }, { "epoch": 2.031672176645851, "grad_norm": 2.58634877204895, "learning_rate": 1.742601171221571e-05, "loss": 0.5297, "step": 12446 }, { "epoch": 2.0318354352883556, "grad_norm": 3.1005799770355225, "learning_rate": 1.7425582302849412e-05, "loss": 0.5767, "step": 12447 }, { "epoch": 2.03199869393086, "grad_norm": 2.842254877090454, "learning_rate": 1.7425152862959348e-05, "loss": 0.5766, "step": 12448 }, { "epoch": 2.0321619525733645, "grad_norm": 2.609954833984375, "learning_rate": 1.7424723392547284e-05, "loss": 0.5144, "step": 12449 }, { "epoch": 2.032325211215869, "grad_norm": 2.7836544513702393, "learning_rate": 1.7424293891614988e-05, "loss": 0.6759, "step": 12450 }, { "epoch": 2.032488469858373, "grad_norm": 2.744158983230591, "learning_rate": 1.742386436016422e-05, "loss": 0.5553, "step": 12451 }, { "epoch": 2.0326517285008774, "grad_norm": 2.762359619140625, "learning_rate": 1.742343479819675e-05, "loss": 0.4491, "step": 12452 }, { "epoch": 2.032814987143382, "grad_norm": 3.20906662940979, "learning_rate": 1.7423005205714344e-05, "loss": 0.6492, "step": 12453 }, { "epoch": 2.0329782457858863, "grad_norm": 2.2321391105651855, "learning_rate": 1.7422575582718762e-05, "loss": 0.5579, "step": 12454 }, { "epoch": 2.0331415044283907, "grad_norm": 3.193023204803467, "learning_rate": 1.742214592921178e-05, "loss": 0.6188, "step": 12455 }, { "epoch": 2.033304763070895, "grad_norm": 3.3901150226593018, "learning_rate": 1.7421716245195154e-05, "loss": 0.572, "step": 12456 }, { "epoch": 2.0334680217133996, "grad_norm": 2.6272780895233154, "learning_rate": 1.7421286530670658e-05, "loss": 0.499, "step": 12457 }, { "epoch": 2.033631280355904, "grad_norm": 3.0956616401672363, "learning_rate": 1.7420856785640054e-05, "loss": 0.6323, "step": 12458 }, { "epoch": 2.033794538998408, "grad_norm": 2.433002471923828, "learning_rate": 1.742042701010511e-05, "loss": 0.4954, "step": 12459 }, { "epoch": 2.0339577976409124, "grad_norm": 2.725111246109009, "learning_rate": 1.7419997204067592e-05, "loss": 0.5627, "step": 12460 }, { "epoch": 2.034121056283417, "grad_norm": 3.1993420124053955, "learning_rate": 1.7419567367529268e-05, "loss": 0.5879, "step": 12461 }, { "epoch": 2.0342843149259213, "grad_norm": 2.539320945739746, "learning_rate": 1.7419137500491906e-05, "loss": 0.5138, "step": 12462 }, { "epoch": 2.0344475735684258, "grad_norm": 3.4469754695892334, "learning_rate": 1.741870760295727e-05, "loss": 0.7149, "step": 12463 }, { "epoch": 2.03461083221093, "grad_norm": 3.316706657409668, "learning_rate": 1.7418277674927127e-05, "loss": 0.7452, "step": 12464 }, { "epoch": 2.0347740908534346, "grad_norm": 2.7656378746032715, "learning_rate": 1.7417847716403248e-05, "loss": 0.5544, "step": 12465 }, { "epoch": 2.034937349495939, "grad_norm": 2.3888301849365234, "learning_rate": 1.7417417727387392e-05, "loss": 0.4797, "step": 12466 }, { "epoch": 2.0351006081384435, "grad_norm": 2.919074535369873, "learning_rate": 1.741698770788134e-05, "loss": 0.6335, "step": 12467 }, { "epoch": 2.0352638667809475, "grad_norm": 2.2019219398498535, "learning_rate": 1.741655765788685e-05, "loss": 0.4784, "step": 12468 }, { "epoch": 2.035427125423452, "grad_norm": 2.85058331489563, "learning_rate": 1.741612757740569e-05, "loss": 0.602, "step": 12469 }, { "epoch": 2.0355903840659564, "grad_norm": 3.267136812210083, "learning_rate": 1.741569746643963e-05, "loss": 0.6628, "step": 12470 }, { "epoch": 2.035753642708461, "grad_norm": 2.9609766006469727, "learning_rate": 1.7415267324990437e-05, "loss": 0.5313, "step": 12471 }, { "epoch": 2.0359169013509653, "grad_norm": 2.527400493621826, "learning_rate": 1.741483715305988e-05, "loss": 0.5104, "step": 12472 }, { "epoch": 2.0360801599934697, "grad_norm": 2.7136504650115967, "learning_rate": 1.741440695064973e-05, "loss": 0.5745, "step": 12473 }, { "epoch": 2.036243418635974, "grad_norm": 3.0764243602752686, "learning_rate": 1.741397671776175e-05, "loss": 0.6377, "step": 12474 }, { "epoch": 2.0364066772784786, "grad_norm": 2.872257709503174, "learning_rate": 1.7413546454397714e-05, "loss": 0.6082, "step": 12475 }, { "epoch": 2.036569935920983, "grad_norm": 3.178328275680542, "learning_rate": 1.7413116160559386e-05, "loss": 0.652, "step": 12476 }, { "epoch": 2.036733194563487, "grad_norm": 2.6409265995025635, "learning_rate": 1.7412685836248536e-05, "loss": 0.5138, "step": 12477 }, { "epoch": 2.0368964532059914, "grad_norm": 2.7792632579803467, "learning_rate": 1.7412255481466937e-05, "loss": 0.6237, "step": 12478 }, { "epoch": 2.037059711848496, "grad_norm": 3.1744048595428467, "learning_rate": 1.741182509621635e-05, "loss": 0.7815, "step": 12479 }, { "epoch": 2.0372229704910003, "grad_norm": 3.1494557857513428, "learning_rate": 1.741139468049855e-05, "loss": 0.5281, "step": 12480 }, { "epoch": 2.0373862291335048, "grad_norm": 3.227570056915283, "learning_rate": 1.7410964234315305e-05, "loss": 0.6669, "step": 12481 }, { "epoch": 2.037549487776009, "grad_norm": 2.3245296478271484, "learning_rate": 1.7410533757668385e-05, "loss": 0.5035, "step": 12482 }, { "epoch": 2.0377127464185136, "grad_norm": 2.7014529705047607, "learning_rate": 1.741010325055956e-05, "loss": 0.5887, "step": 12483 }, { "epoch": 2.037876005061018, "grad_norm": 2.857391834259033, "learning_rate": 1.7409672712990597e-05, "loss": 0.6706, "step": 12484 }, { "epoch": 2.0380392637035225, "grad_norm": 2.7448599338531494, "learning_rate": 1.740924214496327e-05, "loss": 0.5188, "step": 12485 }, { "epoch": 2.0382025223460265, "grad_norm": 2.126972198486328, "learning_rate": 1.740881154647934e-05, "loss": 0.3956, "step": 12486 }, { "epoch": 2.038365780988531, "grad_norm": 2.6345067024230957, "learning_rate": 1.740838091754059e-05, "loss": 0.5213, "step": 12487 }, { "epoch": 2.0385290396310354, "grad_norm": 2.683643341064453, "learning_rate": 1.7407950258148778e-05, "loss": 0.5192, "step": 12488 }, { "epoch": 2.03869229827354, "grad_norm": 3.2154722213745117, "learning_rate": 1.740751956830568e-05, "loss": 0.5507, "step": 12489 }, { "epoch": 2.0388555569160443, "grad_norm": 2.5612571239471436, "learning_rate": 1.740708884801307e-05, "loss": 0.5497, "step": 12490 }, { "epoch": 2.0390188155585487, "grad_norm": 3.2169604301452637, "learning_rate": 1.7406658097272714e-05, "loss": 0.626, "step": 12491 }, { "epoch": 2.039182074201053, "grad_norm": 3.2897913455963135, "learning_rate": 1.740622731608638e-05, "loss": 0.5285, "step": 12492 }, { "epoch": 2.0393453328435576, "grad_norm": 2.774582862854004, "learning_rate": 1.7405796504455846e-05, "loss": 0.575, "step": 12493 }, { "epoch": 2.039508591486062, "grad_norm": 2.756455183029175, "learning_rate": 1.7405365662382877e-05, "loss": 0.5572, "step": 12494 }, { "epoch": 2.039671850128566, "grad_norm": 2.722454309463501, "learning_rate": 1.7404934789869248e-05, "loss": 0.5581, "step": 12495 }, { "epoch": 2.0398351087710704, "grad_norm": 3.097294569015503, "learning_rate": 1.7404503886916724e-05, "loss": 0.5829, "step": 12496 }, { "epoch": 2.039998367413575, "grad_norm": 2.5587728023529053, "learning_rate": 1.7404072953527084e-05, "loss": 0.5034, "step": 12497 }, { "epoch": 2.0401616260560793, "grad_norm": 2.9887008666992188, "learning_rate": 1.7403641989702094e-05, "loss": 0.6241, "step": 12498 }, { "epoch": 2.0403248846985838, "grad_norm": 2.5739641189575195, "learning_rate": 1.740321099544353e-05, "loss": 0.4903, "step": 12499 }, { "epoch": 2.040488143341088, "grad_norm": 3.1429848670959473, "learning_rate": 1.7402779970753156e-05, "loss": 0.5264, "step": 12500 }, { "epoch": 2.0406514019835926, "grad_norm": 2.706345319747925, "learning_rate": 1.7402348915632756e-05, "loss": 0.5816, "step": 12501 }, { "epoch": 2.040814660626097, "grad_norm": 2.9791226387023926, "learning_rate": 1.740191783008409e-05, "loss": 0.6696, "step": 12502 }, { "epoch": 2.0409779192686015, "grad_norm": 2.9957802295684814, "learning_rate": 1.7401486714108932e-05, "loss": 0.604, "step": 12503 }, { "epoch": 2.0411411779111055, "grad_norm": 2.91165828704834, "learning_rate": 1.740105556770906e-05, "loss": 0.6584, "step": 12504 }, { "epoch": 2.04130443655361, "grad_norm": 2.5970420837402344, "learning_rate": 1.7400624390886243e-05, "loss": 0.5617, "step": 12505 }, { "epoch": 2.0414676951961144, "grad_norm": 2.6727969646453857, "learning_rate": 1.7400193183642253e-05, "loss": 0.6197, "step": 12506 }, { "epoch": 2.041630953838619, "grad_norm": 2.375014543533325, "learning_rate": 1.7399761945978863e-05, "loss": 0.556, "step": 12507 }, { "epoch": 2.0417942124811232, "grad_norm": 2.5695271492004395, "learning_rate": 1.739933067789785e-05, "loss": 0.5575, "step": 12508 }, { "epoch": 2.0419574711236277, "grad_norm": 2.4938833713531494, "learning_rate": 1.7398899379400976e-05, "loss": 0.4988, "step": 12509 }, { "epoch": 2.042120729766132, "grad_norm": 3.18878173828125, "learning_rate": 1.7398468050490023e-05, "loss": 0.6217, "step": 12510 }, { "epoch": 2.0422839884086366, "grad_norm": 2.726706027984619, "learning_rate": 1.7398036691166762e-05, "loss": 0.6386, "step": 12511 }, { "epoch": 2.0424472470511406, "grad_norm": 2.5883102416992188, "learning_rate": 1.7397605301432966e-05, "loss": 0.5178, "step": 12512 }, { "epoch": 2.042610505693645, "grad_norm": 2.27166748046875, "learning_rate": 1.7397173881290408e-05, "loss": 0.4659, "step": 12513 }, { "epoch": 2.0427737643361494, "grad_norm": 2.49833083152771, "learning_rate": 1.7396742430740862e-05, "loss": 0.5358, "step": 12514 }, { "epoch": 2.042937022978654, "grad_norm": 2.945167303085327, "learning_rate": 1.73963109497861e-05, "loss": 0.5856, "step": 12515 }, { "epoch": 2.0431002816211583, "grad_norm": 3.0812625885009766, "learning_rate": 1.7395879438427896e-05, "loss": 1.1343, "step": 12516 }, { "epoch": 2.0432635402636627, "grad_norm": 2.6031668186187744, "learning_rate": 1.7395447896668022e-05, "loss": 0.4598, "step": 12517 }, { "epoch": 2.043426798906167, "grad_norm": 2.2200968265533447, "learning_rate": 1.739501632450826e-05, "loss": 0.4682, "step": 12518 }, { "epoch": 2.0435900575486716, "grad_norm": 2.4443764686584473, "learning_rate": 1.7394584721950373e-05, "loss": 0.5215, "step": 12519 }, { "epoch": 2.043753316191176, "grad_norm": 2.512950897216797, "learning_rate": 1.739415308899614e-05, "loss": 0.5999, "step": 12520 }, { "epoch": 2.04391657483368, "grad_norm": 2.4516475200653076, "learning_rate": 1.7393721425647337e-05, "loss": 0.4113, "step": 12521 }, { "epoch": 2.0440798334761845, "grad_norm": 2.676990032196045, "learning_rate": 1.7393289731905742e-05, "loss": 0.5722, "step": 12522 }, { "epoch": 2.044243092118689, "grad_norm": 2.952603816986084, "learning_rate": 1.739285800777312e-05, "loss": 0.6424, "step": 12523 }, { "epoch": 2.0444063507611934, "grad_norm": 2.802257537841797, "learning_rate": 1.7392426253251248e-05, "loss": 0.5151, "step": 12524 }, { "epoch": 2.044569609403698, "grad_norm": 2.6589653491973877, "learning_rate": 1.7391994468341907e-05, "loss": 0.5139, "step": 12525 }, { "epoch": 2.0447328680462022, "grad_norm": 2.9069418907165527, "learning_rate": 1.7391562653046865e-05, "loss": 0.5648, "step": 12526 }, { "epoch": 2.0448961266887067, "grad_norm": 3.1586496829986572, "learning_rate": 1.73911308073679e-05, "loss": 0.6609, "step": 12527 }, { "epoch": 2.045059385331211, "grad_norm": 3.2806761264801025, "learning_rate": 1.739069893130679e-05, "loss": 0.5488, "step": 12528 }, { "epoch": 2.0452226439737156, "grad_norm": 3.6181986331939697, "learning_rate": 1.7390267024865308e-05, "loss": 0.6148, "step": 12529 }, { "epoch": 2.0453859026162196, "grad_norm": 3.1149473190307617, "learning_rate": 1.7389835088045222e-05, "loss": 0.6006, "step": 12530 }, { "epoch": 2.045549161258724, "grad_norm": 3.025561809539795, "learning_rate": 1.7389403120848323e-05, "loss": 0.6453, "step": 12531 }, { "epoch": 2.0457124199012284, "grad_norm": 2.821462631225586, "learning_rate": 1.7388971123276372e-05, "loss": 0.6, "step": 12532 }, { "epoch": 2.045875678543733, "grad_norm": 2.9322824478149414, "learning_rate": 1.7388539095331154e-05, "loss": 0.7069, "step": 12533 }, { "epoch": 2.0460389371862373, "grad_norm": 2.7825584411621094, "learning_rate": 1.7388107037014438e-05, "loss": 0.5639, "step": 12534 }, { "epoch": 2.0462021958287417, "grad_norm": 3.40573787689209, "learning_rate": 1.738767494832801e-05, "loss": 0.6716, "step": 12535 }, { "epoch": 2.046365454471246, "grad_norm": 2.8783209323883057, "learning_rate": 1.7387242829273634e-05, "loss": 0.6268, "step": 12536 }, { "epoch": 2.0465287131137506, "grad_norm": 2.661639928817749, "learning_rate": 1.7386810679853094e-05, "loss": 0.5441, "step": 12537 }, { "epoch": 2.046691971756255, "grad_norm": 3.0646681785583496, "learning_rate": 1.7386378500068164e-05, "loss": 0.5404, "step": 12538 }, { "epoch": 2.046855230398759, "grad_norm": 2.9794559478759766, "learning_rate": 1.7385946289920623e-05, "loss": 0.5354, "step": 12539 }, { "epoch": 2.0470184890412635, "grad_norm": 2.316577434539795, "learning_rate": 1.7385514049412243e-05, "loss": 0.5108, "step": 12540 }, { "epoch": 2.047181747683768, "grad_norm": 3.288389205932617, "learning_rate": 1.7385081778544803e-05, "loss": 0.6164, "step": 12541 }, { "epoch": 2.0473450063262724, "grad_norm": 3.277421236038208, "learning_rate": 1.738464947732008e-05, "loss": 0.5554, "step": 12542 }, { "epoch": 2.047508264968777, "grad_norm": 2.700469493865967, "learning_rate": 1.7384217145739857e-05, "loss": 0.5264, "step": 12543 }, { "epoch": 2.0476715236112812, "grad_norm": 2.7951295375823975, "learning_rate": 1.73837847838059e-05, "loss": 0.6763, "step": 12544 }, { "epoch": 2.0478347822537857, "grad_norm": 2.690345525741577, "learning_rate": 1.7383352391519993e-05, "loss": 0.4556, "step": 12545 }, { "epoch": 2.04799804089629, "grad_norm": 2.61702036857605, "learning_rate": 1.7382919968883913e-05, "loss": 0.5137, "step": 12546 }, { "epoch": 2.048161299538794, "grad_norm": 3.1408088207244873, "learning_rate": 1.7382487515899436e-05, "loss": 0.6969, "step": 12547 }, { "epoch": 2.0483245581812985, "grad_norm": 2.8411552906036377, "learning_rate": 1.7382055032568343e-05, "loss": 0.5839, "step": 12548 }, { "epoch": 2.048487816823803, "grad_norm": 2.859879970550537, "learning_rate": 1.7381622518892407e-05, "loss": 0.6511, "step": 12549 }, { "epoch": 2.0486510754663074, "grad_norm": 2.702735424041748, "learning_rate": 1.738118997487341e-05, "loss": 0.5883, "step": 12550 }, { "epoch": 2.048814334108812, "grad_norm": 2.465805768966675, "learning_rate": 1.7380757400513123e-05, "loss": 0.484, "step": 12551 }, { "epoch": 2.0489775927513163, "grad_norm": 2.854607582092285, "learning_rate": 1.7380324795813335e-05, "loss": 0.5493, "step": 12552 }, { "epoch": 2.0491408513938207, "grad_norm": 3.27693510055542, "learning_rate": 1.7379892160775816e-05, "loss": 0.6265, "step": 12553 }, { "epoch": 2.049304110036325, "grad_norm": 3.261916399002075, "learning_rate": 1.7379459495402347e-05, "loss": 0.737, "step": 12554 }, { "epoch": 2.0494673686788296, "grad_norm": 3.0215530395507812, "learning_rate": 1.7379026799694707e-05, "loss": 0.6254, "step": 12555 }, { "epoch": 2.0496306273213336, "grad_norm": 3.3655362129211426, "learning_rate": 1.737859407365467e-05, "loss": 0.606, "step": 12556 }, { "epoch": 2.049793885963838, "grad_norm": 3.227003574371338, "learning_rate": 1.7378161317284026e-05, "loss": 0.69, "step": 12557 }, { "epoch": 2.0499571446063425, "grad_norm": 2.9248390197753906, "learning_rate": 1.7377728530584543e-05, "loss": 0.552, "step": 12558 }, { "epoch": 2.050120403248847, "grad_norm": 3.16125750541687, "learning_rate": 1.7377295713558004e-05, "loss": 0.6437, "step": 12559 }, { "epoch": 2.0502836618913514, "grad_norm": 3.269819736480713, "learning_rate": 1.7376862866206188e-05, "loss": 0.5864, "step": 12560 }, { "epoch": 2.050446920533856, "grad_norm": 2.9852633476257324, "learning_rate": 1.7376429988530873e-05, "loss": 0.6686, "step": 12561 }, { "epoch": 2.0506101791763602, "grad_norm": 2.580005407333374, "learning_rate": 1.7375997080533842e-05, "loss": 0.4854, "step": 12562 }, { "epoch": 2.0507734378188647, "grad_norm": 2.812957286834717, "learning_rate": 1.737556414221687e-05, "loss": 0.5719, "step": 12563 }, { "epoch": 2.050936696461369, "grad_norm": 2.7673180103302, "learning_rate": 1.737513117358174e-05, "loss": 0.6912, "step": 12564 }, { "epoch": 2.051099955103873, "grad_norm": 3.040127754211426, "learning_rate": 1.737469817463023e-05, "loss": 0.6258, "step": 12565 }, { "epoch": 2.0512632137463775, "grad_norm": 2.8237011432647705, "learning_rate": 1.737426514536412e-05, "loss": 0.5418, "step": 12566 }, { "epoch": 2.051426472388882, "grad_norm": 2.4619476795196533, "learning_rate": 1.7373832085785193e-05, "loss": 0.5038, "step": 12567 }, { "epoch": 2.0515897310313864, "grad_norm": 2.8037452697753906, "learning_rate": 1.7373398995895227e-05, "loss": 0.5861, "step": 12568 }, { "epoch": 2.051752989673891, "grad_norm": 3.178370237350464, "learning_rate": 1.7372965875696e-05, "loss": 0.6639, "step": 12569 }, { "epoch": 2.0519162483163953, "grad_norm": 2.7694766521453857, "learning_rate": 1.737253272518929e-05, "loss": 0.5437, "step": 12570 }, { "epoch": 2.0520795069588997, "grad_norm": 2.759462356567383, "learning_rate": 1.7372099544376887e-05, "loss": 0.5706, "step": 12571 }, { "epoch": 2.052242765601404, "grad_norm": 3.048884391784668, "learning_rate": 1.7371666333260565e-05, "loss": 0.6025, "step": 12572 }, { "epoch": 2.0524060242439086, "grad_norm": 2.9301350116729736, "learning_rate": 1.7371233091842106e-05, "loss": 0.5998, "step": 12573 }, { "epoch": 2.0525692828864126, "grad_norm": 2.5472769737243652, "learning_rate": 1.7370799820123294e-05, "loss": 0.4404, "step": 12574 }, { "epoch": 2.052732541528917, "grad_norm": 3.0131614208221436, "learning_rate": 1.7370366518105902e-05, "loss": 0.7584, "step": 12575 }, { "epoch": 2.0528958001714215, "grad_norm": 2.4179153442382812, "learning_rate": 1.736993318579172e-05, "loss": 0.5047, "step": 12576 }, { "epoch": 2.053059058813926, "grad_norm": 3.1527328491210938, "learning_rate": 1.7369499823182523e-05, "loss": 0.6587, "step": 12577 }, { "epoch": 2.0532223174564304, "grad_norm": 2.8128936290740967, "learning_rate": 1.7369066430280096e-05, "loss": 0.5204, "step": 12578 }, { "epoch": 2.053385576098935, "grad_norm": 4.142251491546631, "learning_rate": 1.7368633007086222e-05, "loss": 0.5503, "step": 12579 }, { "epoch": 2.0535488347414392, "grad_norm": 3.434277057647705, "learning_rate": 1.7368199553602677e-05, "loss": 0.5871, "step": 12580 }, { "epoch": 2.0537120933839437, "grad_norm": 2.7561604976654053, "learning_rate": 1.7367766069831246e-05, "loss": 0.5256, "step": 12581 }, { "epoch": 2.053875352026448, "grad_norm": 2.8380398750305176, "learning_rate": 1.7367332555773712e-05, "loss": 0.613, "step": 12582 }, { "epoch": 2.054038610668952, "grad_norm": 2.927183151245117, "learning_rate": 1.7366899011431853e-05, "loss": 0.6388, "step": 12583 }, { "epoch": 2.0542018693114565, "grad_norm": 3.1078269481658936, "learning_rate": 1.736646543680746e-05, "loss": 0.6343, "step": 12584 }, { "epoch": 2.054365127953961, "grad_norm": 2.738532066345215, "learning_rate": 1.7366031831902304e-05, "loss": 0.5349, "step": 12585 }, { "epoch": 2.0545283865964654, "grad_norm": 2.7546424865722656, "learning_rate": 1.7365598196718174e-05, "loss": 0.6107, "step": 12586 }, { "epoch": 2.05469164523897, "grad_norm": 2.3396966457366943, "learning_rate": 1.736516453125685e-05, "loss": 0.4502, "step": 12587 }, { "epoch": 2.0548549038814743, "grad_norm": 2.5593602657318115, "learning_rate": 1.7364730835520115e-05, "loss": 0.5214, "step": 12588 }, { "epoch": 2.0550181625239787, "grad_norm": 2.4379913806915283, "learning_rate": 1.7364297109509753e-05, "loss": 0.5038, "step": 12589 }, { "epoch": 2.055181421166483, "grad_norm": 2.7577221393585205, "learning_rate": 1.736386335322755e-05, "loss": 0.5118, "step": 12590 }, { "epoch": 2.0553446798089876, "grad_norm": 2.6580958366394043, "learning_rate": 1.736342956667528e-05, "loss": 0.4944, "step": 12591 }, { "epoch": 2.0555079384514916, "grad_norm": 2.9295871257781982, "learning_rate": 1.7362995749854734e-05, "loss": 0.6237, "step": 12592 }, { "epoch": 2.055671197093996, "grad_norm": 2.5009381771087646, "learning_rate": 1.736256190276769e-05, "loss": 0.5095, "step": 12593 }, { "epoch": 2.0558344557365005, "grad_norm": 2.782796621322632, "learning_rate": 1.736212802541594e-05, "loss": 0.5495, "step": 12594 }, { "epoch": 2.055997714379005, "grad_norm": 2.846365451812744, "learning_rate": 1.7361694117801257e-05, "loss": 0.6775, "step": 12595 }, { "epoch": 2.0561609730215094, "grad_norm": 2.5632777214050293, "learning_rate": 1.7361260179925427e-05, "loss": 0.5232, "step": 12596 }, { "epoch": 2.056324231664014, "grad_norm": 2.6608879566192627, "learning_rate": 1.736082621179024e-05, "loss": 0.531, "step": 12597 }, { "epoch": 2.0564874903065182, "grad_norm": 2.8472931385040283, "learning_rate": 1.7360392213397475e-05, "loss": 0.5324, "step": 12598 }, { "epoch": 2.0566507489490227, "grad_norm": 3.3626809120178223, "learning_rate": 1.7359958184748916e-05, "loss": 0.6932, "step": 12599 }, { "epoch": 2.0568140075915267, "grad_norm": 3.0045320987701416, "learning_rate": 1.7359524125846353e-05, "loss": 0.6356, "step": 12600 }, { "epoch": 2.056977266234031, "grad_norm": 2.6626474857330322, "learning_rate": 1.735909003669156e-05, "loss": 0.496, "step": 12601 }, { "epoch": 2.0571405248765355, "grad_norm": 3.208489418029785, "learning_rate": 1.7358655917286325e-05, "loss": 0.6841, "step": 12602 }, { "epoch": 2.05730378351904, "grad_norm": 2.708604097366333, "learning_rate": 1.7358221767632438e-05, "loss": 0.5433, "step": 12603 }, { "epoch": 2.0574670421615444, "grad_norm": 3.209265947341919, "learning_rate": 1.735778758773168e-05, "loss": 0.5741, "step": 12604 }, { "epoch": 2.057630300804049, "grad_norm": 3.3074638843536377, "learning_rate": 1.735735337758583e-05, "loss": 0.6525, "step": 12605 }, { "epoch": 2.0577935594465533, "grad_norm": 3.3128108978271484, "learning_rate": 1.7356919137196686e-05, "loss": 0.6245, "step": 12606 }, { "epoch": 2.0579568180890577, "grad_norm": 3.9417309761047363, "learning_rate": 1.735648486656602e-05, "loss": 0.7699, "step": 12607 }, { "epoch": 2.058120076731562, "grad_norm": 2.883862257003784, "learning_rate": 1.7356050565695627e-05, "loss": 0.5971, "step": 12608 }, { "epoch": 2.058283335374066, "grad_norm": 3.2048332691192627, "learning_rate": 1.7355616234587283e-05, "loss": 0.5953, "step": 12609 }, { "epoch": 2.0584465940165706, "grad_norm": 3.766388416290283, "learning_rate": 1.735518187324278e-05, "loss": 0.6556, "step": 12610 }, { "epoch": 2.058609852659075, "grad_norm": 3.074613094329834, "learning_rate": 1.73547474816639e-05, "loss": 0.5847, "step": 12611 }, { "epoch": 2.0587731113015795, "grad_norm": 2.9233038425445557, "learning_rate": 1.7354313059852432e-05, "loss": 0.5325, "step": 12612 }, { "epoch": 2.058936369944084, "grad_norm": 3.034245729446411, "learning_rate": 1.735387860781016e-05, "loss": 0.6141, "step": 12613 }, { "epoch": 2.0590996285865883, "grad_norm": 3.5684187412261963, "learning_rate": 1.735344412553887e-05, "loss": 0.6118, "step": 12614 }, { "epoch": 2.059262887229093, "grad_norm": 2.663642644882202, "learning_rate": 1.735300961304035e-05, "loss": 0.5376, "step": 12615 }, { "epoch": 2.0594261458715972, "grad_norm": 3.10274076461792, "learning_rate": 1.735257507031638e-05, "loss": 0.6351, "step": 12616 }, { "epoch": 2.0595894045141017, "grad_norm": 2.743539333343506, "learning_rate": 1.735214049736875e-05, "loss": 0.5416, "step": 12617 }, { "epoch": 2.0597526631566057, "grad_norm": 3.1485090255737305, "learning_rate": 1.7351705894199247e-05, "loss": 0.7198, "step": 12618 }, { "epoch": 2.05991592179911, "grad_norm": 2.9781341552734375, "learning_rate": 1.735127126080966e-05, "loss": 0.4745, "step": 12619 }, { "epoch": 2.0600791804416145, "grad_norm": 2.4043877124786377, "learning_rate": 1.7350836597201767e-05, "loss": 0.4657, "step": 12620 }, { "epoch": 2.060242439084119, "grad_norm": 3.014019012451172, "learning_rate": 1.7350401903377364e-05, "loss": 0.5839, "step": 12621 }, { "epoch": 2.0604056977266234, "grad_norm": 2.8024559020996094, "learning_rate": 1.7349967179338235e-05, "loss": 0.5479, "step": 12622 }, { "epoch": 2.060568956369128, "grad_norm": 3.2411277294158936, "learning_rate": 1.734953242508617e-05, "loss": 0.6467, "step": 12623 }, { "epoch": 2.0607322150116323, "grad_norm": 3.4933433532714844, "learning_rate": 1.7349097640622946e-05, "loss": 0.5453, "step": 12624 }, { "epoch": 2.0608954736541367, "grad_norm": 2.8888628482818604, "learning_rate": 1.7348662825950356e-05, "loss": 0.6078, "step": 12625 }, { "epoch": 2.061058732296641, "grad_norm": 2.7661099433898926, "learning_rate": 1.734822798107019e-05, "loss": 0.5713, "step": 12626 }, { "epoch": 2.061221990939145, "grad_norm": 2.495985269546509, "learning_rate": 1.7347793105984236e-05, "loss": 0.4935, "step": 12627 }, { "epoch": 2.0613852495816496, "grad_norm": 3.3738534450531006, "learning_rate": 1.7347358200694277e-05, "loss": 0.6987, "step": 12628 }, { "epoch": 2.061548508224154, "grad_norm": 3.0073163509368896, "learning_rate": 1.73469232652021e-05, "loss": 0.6229, "step": 12629 }, { "epoch": 2.0617117668666585, "grad_norm": 2.7898972034454346, "learning_rate": 1.73464882995095e-05, "loss": 0.58, "step": 12630 }, { "epoch": 2.061875025509163, "grad_norm": 2.6300177574157715, "learning_rate": 1.7346053303618256e-05, "loss": 0.5831, "step": 12631 }, { "epoch": 2.0620382841516673, "grad_norm": 3.302380084991455, "learning_rate": 1.7345618277530164e-05, "loss": 1.2639, "step": 12632 }, { "epoch": 2.062201542794172, "grad_norm": 2.5088586807250977, "learning_rate": 1.734518322124701e-05, "loss": 0.5481, "step": 12633 }, { "epoch": 2.062364801436676, "grad_norm": 2.6496946811676025, "learning_rate": 1.734474813477058e-05, "loss": 0.5728, "step": 12634 }, { "epoch": 2.06252806007918, "grad_norm": 3.094527244567871, "learning_rate": 1.7344313018102664e-05, "loss": 0.5989, "step": 12635 }, { "epoch": 2.0626913187216847, "grad_norm": 2.9237420558929443, "learning_rate": 1.7343877871245047e-05, "loss": 0.5881, "step": 12636 }, { "epoch": 2.062854577364189, "grad_norm": 3.01237154006958, "learning_rate": 1.7343442694199525e-05, "loss": 0.6274, "step": 12637 }, { "epoch": 2.0630178360066935, "grad_norm": 2.6950440406799316, "learning_rate": 1.7343007486967883e-05, "loss": 0.5285, "step": 12638 }, { "epoch": 2.063181094649198, "grad_norm": 2.8582727909088135, "learning_rate": 1.7342572249551907e-05, "loss": 0.6186, "step": 12639 }, { "epoch": 2.0633443532917024, "grad_norm": 2.9644603729248047, "learning_rate": 1.734213698195339e-05, "loss": 0.6449, "step": 12640 }, { "epoch": 2.063507611934207, "grad_norm": 3.142723560333252, "learning_rate": 1.7341701684174122e-05, "loss": 0.6579, "step": 12641 }, { "epoch": 2.0636708705767113, "grad_norm": 2.919760227203369, "learning_rate": 1.7341266356215887e-05, "loss": 0.6605, "step": 12642 }, { "epoch": 2.0638341292192157, "grad_norm": 3.193331718444824, "learning_rate": 1.734083099808048e-05, "loss": 0.5517, "step": 12643 }, { "epoch": 2.06399738786172, "grad_norm": 2.557814359664917, "learning_rate": 1.7340395609769692e-05, "loss": 0.5799, "step": 12644 }, { "epoch": 2.064160646504224, "grad_norm": 3.263476610183716, "learning_rate": 1.733996019128531e-05, "loss": 0.5852, "step": 12645 }, { "epoch": 2.0643239051467286, "grad_norm": 2.9768869876861572, "learning_rate": 1.7339524742629114e-05, "loss": 0.5674, "step": 12646 }, { "epoch": 2.064487163789233, "grad_norm": 3.1167423725128174, "learning_rate": 1.7339089263802907e-05, "loss": 0.5782, "step": 12647 }, { "epoch": 2.0646504224317375, "grad_norm": 2.923877239227295, "learning_rate": 1.733865375480848e-05, "loss": 0.5829, "step": 12648 }, { "epoch": 2.064813681074242, "grad_norm": 2.8381693363189697, "learning_rate": 1.7338218215647614e-05, "loss": 0.5815, "step": 12649 }, { "epoch": 2.0649769397167463, "grad_norm": 2.8597402572631836, "learning_rate": 1.7337782646322106e-05, "loss": 0.5274, "step": 12650 }, { "epoch": 2.065140198359251, "grad_norm": 3.086488723754883, "learning_rate": 1.7337347046833742e-05, "loss": 0.6341, "step": 12651 }, { "epoch": 2.065303457001755, "grad_norm": 3.117313861846924, "learning_rate": 1.7336911417184316e-05, "loss": 0.583, "step": 12652 }, { "epoch": 2.065466715644259, "grad_norm": 3.1980764865875244, "learning_rate": 1.733647575737562e-05, "loss": 0.7792, "step": 12653 }, { "epoch": 2.0656299742867636, "grad_norm": 2.720379114151001, "learning_rate": 1.7336040067409437e-05, "loss": 0.5765, "step": 12654 }, { "epoch": 2.065793232929268, "grad_norm": 2.7946038246154785, "learning_rate": 1.733560434728757e-05, "loss": 0.5481, "step": 12655 }, { "epoch": 2.0659564915717725, "grad_norm": 2.843369960784912, "learning_rate": 1.73351685970118e-05, "loss": 0.705, "step": 12656 }, { "epoch": 2.066119750214277, "grad_norm": 3.304990291595459, "learning_rate": 1.7334732816583918e-05, "loss": 0.5951, "step": 12657 }, { "epoch": 2.0662830088567814, "grad_norm": 2.7308218479156494, "learning_rate": 1.7334297006005724e-05, "loss": 0.5954, "step": 12658 }, { "epoch": 2.066446267499286, "grad_norm": 3.205947160720825, "learning_rate": 1.7333861165279e-05, "loss": 0.6382, "step": 12659 }, { "epoch": 2.0666095261417903, "grad_norm": 2.410499095916748, "learning_rate": 1.733342529440555e-05, "loss": 0.5321, "step": 12660 }, { "epoch": 2.0667727847842947, "grad_norm": 2.595820665359497, "learning_rate": 1.733298939338715e-05, "loss": 0.5154, "step": 12661 }, { "epoch": 2.0669360434267987, "grad_norm": 2.683582305908203, "learning_rate": 1.7332553462225604e-05, "loss": 0.449, "step": 12662 }, { "epoch": 2.067099302069303, "grad_norm": 2.6733181476593018, "learning_rate": 1.7332117500922698e-05, "loss": 0.5736, "step": 12663 }, { "epoch": 2.0672625607118076, "grad_norm": 2.9768483638763428, "learning_rate": 1.733168150948022e-05, "loss": 0.6007, "step": 12664 }, { "epoch": 2.067425819354312, "grad_norm": 2.9106438159942627, "learning_rate": 1.7331245487899974e-05, "loss": 0.5268, "step": 12665 }, { "epoch": 2.0675890779968165, "grad_norm": 3.016099691390991, "learning_rate": 1.7330809436183744e-05, "loss": 0.5828, "step": 12666 }, { "epoch": 2.067752336639321, "grad_norm": 2.793783664703369, "learning_rate": 1.733037335433333e-05, "loss": 0.56, "step": 12667 }, { "epoch": 2.0679155952818253, "grad_norm": 2.437998056411743, "learning_rate": 1.7329937242350508e-05, "loss": 0.4458, "step": 12668 }, { "epoch": 2.0680788539243298, "grad_norm": 3.101792097091675, "learning_rate": 1.732950110023709e-05, "loss": 0.6187, "step": 12669 }, { "epoch": 2.068242112566834, "grad_norm": 3.300938606262207, "learning_rate": 1.7329064927994858e-05, "loss": 0.5538, "step": 12670 }, { "epoch": 2.068405371209338, "grad_norm": 2.890350580215454, "learning_rate": 1.732862872562561e-05, "loss": 0.5724, "step": 12671 }, { "epoch": 2.0685686298518426, "grad_norm": 3.4967429637908936, "learning_rate": 1.7328192493131133e-05, "loss": 0.6415, "step": 12672 }, { "epoch": 2.068731888494347, "grad_norm": 3.1879024505615234, "learning_rate": 1.7327756230513223e-05, "loss": 0.6008, "step": 12673 }, { "epoch": 2.0688951471368515, "grad_norm": 3.0733094215393066, "learning_rate": 1.7327319937773677e-05, "loss": 0.6307, "step": 12674 }, { "epoch": 2.069058405779356, "grad_norm": 2.986625909805298, "learning_rate": 1.7326883614914283e-05, "loss": 0.6158, "step": 12675 }, { "epoch": 2.0692216644218604, "grad_norm": 2.5024123191833496, "learning_rate": 1.732644726193684e-05, "loss": 0.5011, "step": 12676 }, { "epoch": 2.069384923064365, "grad_norm": 2.5689046382904053, "learning_rate": 1.7326010878843138e-05, "loss": 0.5664, "step": 12677 }, { "epoch": 2.0695481817068693, "grad_norm": 3.3047282695770264, "learning_rate": 1.732557446563497e-05, "loss": 0.7886, "step": 12678 }, { "epoch": 2.0697114403493737, "grad_norm": 2.9494662284851074, "learning_rate": 1.7325138022314133e-05, "loss": 0.7048, "step": 12679 }, { "epoch": 2.0698746989918777, "grad_norm": 2.569295644760132, "learning_rate": 1.732470154888242e-05, "loss": 0.4685, "step": 12680 }, { "epoch": 2.070037957634382, "grad_norm": 2.884307622909546, "learning_rate": 1.7324265045341623e-05, "loss": 0.6204, "step": 12681 }, { "epoch": 2.0702012162768866, "grad_norm": 2.8442208766937256, "learning_rate": 1.732382851169354e-05, "loss": 0.6073, "step": 12682 }, { "epoch": 2.070364474919391, "grad_norm": 2.3921234607696533, "learning_rate": 1.732339194793996e-05, "loss": 0.4886, "step": 12683 }, { "epoch": 2.0705277335618955, "grad_norm": 3.4833626747131348, "learning_rate": 1.7322955354082686e-05, "loss": 0.6884, "step": 12684 }, { "epoch": 2.0706909922044, "grad_norm": 2.9465973377227783, "learning_rate": 1.7322518730123503e-05, "loss": 0.6697, "step": 12685 }, { "epoch": 2.0708542508469043, "grad_norm": 2.9098658561706543, "learning_rate": 1.732208207606421e-05, "loss": 0.5874, "step": 12686 }, { "epoch": 2.0710175094894088, "grad_norm": 2.9065098762512207, "learning_rate": 1.7321645391906607e-05, "loss": 0.5976, "step": 12687 }, { "epoch": 2.0711807681319128, "grad_norm": 3.080155849456787, "learning_rate": 1.732120867765248e-05, "loss": 0.6034, "step": 12688 }, { "epoch": 2.071344026774417, "grad_norm": 2.9294040203094482, "learning_rate": 1.732077193330363e-05, "loss": 0.6358, "step": 12689 }, { "epoch": 2.0715072854169216, "grad_norm": 3.0170881748199463, "learning_rate": 1.7320335158861852e-05, "loss": 0.6004, "step": 12690 }, { "epoch": 2.071670544059426, "grad_norm": 2.5953261852264404, "learning_rate": 1.731989835432894e-05, "loss": 0.5531, "step": 12691 }, { "epoch": 2.0718338027019305, "grad_norm": 2.7693099975585938, "learning_rate": 1.731946151970669e-05, "loss": 0.5946, "step": 12692 }, { "epoch": 2.071997061344435, "grad_norm": 3.0085718631744385, "learning_rate": 1.7319024654996897e-05, "loss": 0.6717, "step": 12693 }, { "epoch": 2.0721603199869394, "grad_norm": 2.5370192527770996, "learning_rate": 1.731858776020136e-05, "loss": 0.5482, "step": 12694 }, { "epoch": 2.072323578629444, "grad_norm": 4.4399094581604, "learning_rate": 1.7318150835321866e-05, "loss": 0.5758, "step": 12695 }, { "epoch": 2.0724868372719483, "grad_norm": 2.7652602195739746, "learning_rate": 1.731771388036022e-05, "loss": 0.5347, "step": 12696 }, { "epoch": 2.0726500959144523, "grad_norm": 2.69368839263916, "learning_rate": 1.731727689531822e-05, "loss": 0.5383, "step": 12697 }, { "epoch": 2.0728133545569567, "grad_norm": 2.4695310592651367, "learning_rate": 1.731683988019765e-05, "loss": 0.48, "step": 12698 }, { "epoch": 2.072976613199461, "grad_norm": 2.4270193576812744, "learning_rate": 1.7316402835000317e-05, "loss": 0.4916, "step": 12699 }, { "epoch": 2.0731398718419656, "grad_norm": 2.448477268218994, "learning_rate": 1.7315965759728014e-05, "loss": 0.5399, "step": 12700 }, { "epoch": 2.07330313048447, "grad_norm": 2.6404356956481934, "learning_rate": 1.731552865438254e-05, "loss": 0.5287, "step": 12701 }, { "epoch": 2.0734663891269745, "grad_norm": 2.627135992050171, "learning_rate": 1.731509151896569e-05, "loss": 0.5807, "step": 12702 }, { "epoch": 2.073629647769479, "grad_norm": 3.193544864654541, "learning_rate": 1.731465435347926e-05, "loss": 0.6016, "step": 12703 }, { "epoch": 2.0737929064119833, "grad_norm": 3.159553050994873, "learning_rate": 1.731421715792505e-05, "loss": 0.6576, "step": 12704 }, { "epoch": 2.0739561650544878, "grad_norm": 2.8064987659454346, "learning_rate": 1.731377993230485e-05, "loss": 0.6357, "step": 12705 }, { "epoch": 2.0741194236969918, "grad_norm": 2.5398406982421875, "learning_rate": 1.731334267662047e-05, "loss": 0.5083, "step": 12706 }, { "epoch": 2.074282682339496, "grad_norm": 2.7317118644714355, "learning_rate": 1.7312905390873693e-05, "loss": 0.6248, "step": 12707 }, { "epoch": 2.0744459409820006, "grad_norm": 3.440941333770752, "learning_rate": 1.7312468075066323e-05, "loss": 1.2007, "step": 12708 }, { "epoch": 2.074609199624505, "grad_norm": 3.169398307800293, "learning_rate": 1.7312030729200163e-05, "loss": 0.7441, "step": 12709 }, { "epoch": 2.0747724582670095, "grad_norm": 2.502610921859741, "learning_rate": 1.7311593353277005e-05, "loss": 0.4863, "step": 12710 }, { "epoch": 2.074935716909514, "grad_norm": 3.0548946857452393, "learning_rate": 1.7311155947298644e-05, "loss": 0.6089, "step": 12711 }, { "epoch": 2.0750989755520184, "grad_norm": 2.460028886795044, "learning_rate": 1.731071851126688e-05, "loss": 0.4244, "step": 12712 }, { "epoch": 2.075262234194523, "grad_norm": 2.0252792835235596, "learning_rate": 1.7310281045183516e-05, "loss": 0.404, "step": 12713 }, { "epoch": 2.0754254928370273, "grad_norm": 2.8093128204345703, "learning_rate": 1.730984354905035e-05, "loss": 0.5511, "step": 12714 }, { "epoch": 2.0755887514795313, "grad_norm": 2.6677780151367188, "learning_rate": 1.7309406022869172e-05, "loss": 0.5463, "step": 12715 }, { "epoch": 2.0757520101220357, "grad_norm": 2.3081166744232178, "learning_rate": 1.730896846664179e-05, "loss": 0.4725, "step": 12716 }, { "epoch": 2.07591526876454, "grad_norm": 2.7461392879486084, "learning_rate": 1.7308530880369993e-05, "loss": 0.5622, "step": 12717 }, { "epoch": 2.0760785274070446, "grad_norm": 2.9326729774475098, "learning_rate": 1.730809326405559e-05, "loss": 0.5549, "step": 12718 }, { "epoch": 2.076241786049549, "grad_norm": 2.897737503051758, "learning_rate": 1.7307655617700373e-05, "loss": 0.5579, "step": 12719 }, { "epoch": 2.0764050446920534, "grad_norm": 3.0029454231262207, "learning_rate": 1.7307217941306145e-05, "loss": 0.5167, "step": 12720 }, { "epoch": 2.076568303334558, "grad_norm": 3.109356641769409, "learning_rate": 1.73067802348747e-05, "loss": 0.5568, "step": 12721 }, { "epoch": 2.0767315619770623, "grad_norm": 2.487921714782715, "learning_rate": 1.7306342498407846e-05, "loss": 0.5773, "step": 12722 }, { "epoch": 2.0768948206195668, "grad_norm": 2.794912099838257, "learning_rate": 1.7305904731907374e-05, "loss": 0.6076, "step": 12723 }, { "epoch": 2.0770580792620708, "grad_norm": 2.867004871368408, "learning_rate": 1.7305466935375085e-05, "loss": 0.5758, "step": 12724 }, { "epoch": 2.077221337904575, "grad_norm": 2.761647939682007, "learning_rate": 1.7305029108812777e-05, "loss": 0.5753, "step": 12725 }, { "epoch": 2.0773845965470796, "grad_norm": 2.272822141647339, "learning_rate": 1.7304591252222257e-05, "loss": 0.4864, "step": 12726 }, { "epoch": 2.077547855189584, "grad_norm": 3.055262804031372, "learning_rate": 1.730415336560532e-05, "loss": 0.6514, "step": 12727 }, { "epoch": 2.0777111138320885, "grad_norm": 2.515157699584961, "learning_rate": 1.7303715448963765e-05, "loss": 0.5495, "step": 12728 }, { "epoch": 2.077874372474593, "grad_norm": 2.8691534996032715, "learning_rate": 1.7303277502299396e-05, "loss": 0.5605, "step": 12729 }, { "epoch": 2.0780376311170974, "grad_norm": 2.8998239040374756, "learning_rate": 1.730283952561401e-05, "loss": 0.5488, "step": 12730 }, { "epoch": 2.078200889759602, "grad_norm": 2.7422308921813965, "learning_rate": 1.7302401518909407e-05, "loss": 0.5516, "step": 12731 }, { "epoch": 2.0783641484021063, "grad_norm": 2.087099075317383, "learning_rate": 1.730196348218739e-05, "loss": 0.4502, "step": 12732 }, { "epoch": 2.0785274070446103, "grad_norm": 3.2372288703918457, "learning_rate": 1.7301525415449757e-05, "loss": 0.5674, "step": 12733 }, { "epoch": 2.0786906656871147, "grad_norm": 3.389711856842041, "learning_rate": 1.7301087318698312e-05, "loss": 0.7611, "step": 12734 }, { "epoch": 2.078853924329619, "grad_norm": 2.5418756008148193, "learning_rate": 1.730064919193485e-05, "loss": 0.4649, "step": 12735 }, { "epoch": 2.0790171829721236, "grad_norm": 3.3269379138946533, "learning_rate": 1.7300211035161182e-05, "loss": 0.6136, "step": 12736 }, { "epoch": 2.079180441614628, "grad_norm": 3.4303195476531982, "learning_rate": 1.7299772848379096e-05, "loss": 0.6954, "step": 12737 }, { "epoch": 2.0793437002571324, "grad_norm": 2.4788079261779785, "learning_rate": 1.7299334631590407e-05, "loss": 0.5524, "step": 12738 }, { "epoch": 2.079506958899637, "grad_norm": 3.0857410430908203, "learning_rate": 1.7298896384796907e-05, "loss": 0.6024, "step": 12739 }, { "epoch": 2.0796702175421413, "grad_norm": 3.0688793659210205, "learning_rate": 1.7298458108000396e-05, "loss": 0.5999, "step": 12740 }, { "epoch": 2.0798334761846453, "grad_norm": 2.925617218017578, "learning_rate": 1.7298019801202683e-05, "loss": 0.6341, "step": 12741 }, { "epoch": 2.0799967348271498, "grad_norm": 2.9471776485443115, "learning_rate": 1.7297581464405566e-05, "loss": 0.5732, "step": 12742 }, { "epoch": 2.080159993469654, "grad_norm": 2.8825032711029053, "learning_rate": 1.729714309761085e-05, "loss": 0.558, "step": 12743 }, { "epoch": 2.0803232521121586, "grad_norm": 2.6794817447662354, "learning_rate": 1.729670470082033e-05, "loss": 0.581, "step": 12744 }, { "epoch": 2.080486510754663, "grad_norm": 2.4297075271606445, "learning_rate": 1.7296266274035812e-05, "loss": 0.5038, "step": 12745 }, { "epoch": 2.0806497693971675, "grad_norm": 3.0202014446258545, "learning_rate": 1.72958278172591e-05, "loss": 0.6032, "step": 12746 }, { "epoch": 2.080813028039672, "grad_norm": 3.0425479412078857, "learning_rate": 1.7295389330491995e-05, "loss": 0.6018, "step": 12747 }, { "epoch": 2.0809762866821764, "grad_norm": 2.9462668895721436, "learning_rate": 1.72949508137363e-05, "loss": 0.5722, "step": 12748 }, { "epoch": 2.081139545324681, "grad_norm": 2.2694311141967773, "learning_rate": 1.7294512266993812e-05, "loss": 0.4346, "step": 12749 }, { "epoch": 2.081302803967185, "grad_norm": 3.031949996948242, "learning_rate": 1.7294073690266343e-05, "loss": 0.6366, "step": 12750 }, { "epoch": 2.0814660626096892, "grad_norm": 3.0911543369293213, "learning_rate": 1.729363508355569e-05, "loss": 0.6592, "step": 12751 }, { "epoch": 2.0816293212521937, "grad_norm": 2.8999433517456055, "learning_rate": 1.7293196446863658e-05, "loss": 0.5719, "step": 12752 }, { "epoch": 2.081792579894698, "grad_norm": 2.563465118408203, "learning_rate": 1.7292757780192047e-05, "loss": 0.4784, "step": 12753 }, { "epoch": 2.0819558385372026, "grad_norm": 2.6158149242401123, "learning_rate": 1.7292319083542666e-05, "loss": 0.4983, "step": 12754 }, { "epoch": 2.082119097179707, "grad_norm": 3.2252700328826904, "learning_rate": 1.7291880356917314e-05, "loss": 0.6479, "step": 12755 }, { "epoch": 2.0822823558222114, "grad_norm": 2.9171574115753174, "learning_rate": 1.729144160031779e-05, "loss": 0.6512, "step": 12756 }, { "epoch": 2.082445614464716, "grad_norm": 2.8544561862945557, "learning_rate": 1.7291002813745907e-05, "loss": 0.5211, "step": 12757 }, { "epoch": 2.0826088731072203, "grad_norm": 2.3914577960968018, "learning_rate": 1.7290563997203468e-05, "loss": 0.54, "step": 12758 }, { "epoch": 2.0827721317497243, "grad_norm": 2.980010509490967, "learning_rate": 1.7290125150692265e-05, "loss": 0.5779, "step": 12759 }, { "epoch": 2.0829353903922287, "grad_norm": 2.647876501083374, "learning_rate": 1.7289686274214116e-05, "loss": 0.5327, "step": 12760 }, { "epoch": 2.083098649034733, "grad_norm": 2.3072433471679688, "learning_rate": 1.728924736777082e-05, "loss": 0.4886, "step": 12761 }, { "epoch": 2.0832619076772376, "grad_norm": 2.5240907669067383, "learning_rate": 1.7288808431364178e-05, "loss": 0.5357, "step": 12762 }, { "epoch": 2.083425166319742, "grad_norm": 2.965798854827881, "learning_rate": 1.7288369464995996e-05, "loss": 0.6024, "step": 12763 }, { "epoch": 2.0835884249622465, "grad_norm": 3.2356820106506348, "learning_rate": 1.728793046866808e-05, "loss": 0.6292, "step": 12764 }, { "epoch": 2.083751683604751, "grad_norm": 3.0938780307769775, "learning_rate": 1.7287491442382234e-05, "loss": 0.5912, "step": 12765 }, { "epoch": 2.0839149422472554, "grad_norm": 2.9185750484466553, "learning_rate": 1.7287052386140262e-05, "loss": 0.7968, "step": 12766 }, { "epoch": 2.08407820088976, "grad_norm": 2.3811659812927246, "learning_rate": 1.728661329994397e-05, "loss": 0.5208, "step": 12767 }, { "epoch": 2.084241459532264, "grad_norm": 3.109482526779175, "learning_rate": 1.7286174183795165e-05, "loss": 0.6702, "step": 12768 }, { "epoch": 2.0844047181747682, "grad_norm": 3.132070779800415, "learning_rate": 1.7285735037695645e-05, "loss": 0.5315, "step": 12769 }, { "epoch": 2.0845679768172727, "grad_norm": 2.4399538040161133, "learning_rate": 1.728529586164722e-05, "loss": 0.4256, "step": 12770 }, { "epoch": 2.084731235459777, "grad_norm": 2.8046982288360596, "learning_rate": 1.7284856655651695e-05, "loss": 0.5751, "step": 12771 }, { "epoch": 2.0848944941022816, "grad_norm": 2.970193862915039, "learning_rate": 1.7284417419710873e-05, "loss": 0.706, "step": 12772 }, { "epoch": 2.085057752744786, "grad_norm": 2.6680116653442383, "learning_rate": 1.7283978153826563e-05, "loss": 0.622, "step": 12773 }, { "epoch": 2.0852210113872904, "grad_norm": 3.1584200859069824, "learning_rate": 1.728353885800057e-05, "loss": 0.7145, "step": 12774 }, { "epoch": 2.085384270029795, "grad_norm": 2.4844119548797607, "learning_rate": 1.72830995322347e-05, "loss": 0.4332, "step": 12775 }, { "epoch": 2.085547528672299, "grad_norm": 3.2462708950042725, "learning_rate": 1.7282660176530753e-05, "loss": 0.5963, "step": 12776 }, { "epoch": 2.0857107873148033, "grad_norm": 2.853137254714966, "learning_rate": 1.7282220790890546e-05, "loss": 0.6472, "step": 12777 }, { "epoch": 2.0858740459573077, "grad_norm": 3.3957483768463135, "learning_rate": 1.7281781375315873e-05, "loss": 0.6487, "step": 12778 }, { "epoch": 2.086037304599812, "grad_norm": 3.7501211166381836, "learning_rate": 1.7281341929808553e-05, "loss": 0.6273, "step": 12779 }, { "epoch": 2.0862005632423166, "grad_norm": 2.6716487407684326, "learning_rate": 1.7280902454370382e-05, "loss": 0.5795, "step": 12780 }, { "epoch": 2.086363821884821, "grad_norm": 3.044182538986206, "learning_rate": 1.728046294900317e-05, "loss": 0.5153, "step": 12781 }, { "epoch": 2.0865270805273255, "grad_norm": 2.8685667514801025, "learning_rate": 1.7280023413708727e-05, "loss": 0.5898, "step": 12782 }, { "epoch": 2.08669033916983, "grad_norm": 2.6038827896118164, "learning_rate": 1.727958384848885e-05, "loss": 0.4983, "step": 12783 }, { "epoch": 2.0868535978123344, "grad_norm": 2.6969423294067383, "learning_rate": 1.7279144253345358e-05, "loss": 0.4831, "step": 12784 }, { "epoch": 2.0870168564548384, "grad_norm": 2.984229564666748, "learning_rate": 1.7278704628280053e-05, "loss": 0.6119, "step": 12785 }, { "epoch": 2.087180115097343, "grad_norm": 2.863065481185913, "learning_rate": 1.727826497329474e-05, "loss": 0.5387, "step": 12786 }, { "epoch": 2.0873433737398472, "grad_norm": 2.644991159439087, "learning_rate": 1.7277825288391226e-05, "loss": 0.4957, "step": 12787 }, { "epoch": 2.0875066323823517, "grad_norm": 3.1477560997009277, "learning_rate": 1.7277385573571322e-05, "loss": 0.6045, "step": 12788 }, { "epoch": 2.087669891024856, "grad_norm": 2.7446298599243164, "learning_rate": 1.7276945828836833e-05, "loss": 0.555, "step": 12789 }, { "epoch": 2.0878331496673606, "grad_norm": 2.880035400390625, "learning_rate": 1.727650605418957e-05, "loss": 0.6119, "step": 12790 }, { "epoch": 2.087996408309865, "grad_norm": 2.6907949447631836, "learning_rate": 1.7276066249631336e-05, "loss": 0.5195, "step": 12791 }, { "epoch": 2.0881596669523694, "grad_norm": 3.008070945739746, "learning_rate": 1.727562641516394e-05, "loss": 0.769, "step": 12792 }, { "epoch": 2.088322925594874, "grad_norm": 2.7507593631744385, "learning_rate": 1.7275186550789193e-05, "loss": 0.5457, "step": 12793 }, { "epoch": 2.088486184237378, "grad_norm": 2.785268545150757, "learning_rate": 1.72747466565089e-05, "loss": 0.5399, "step": 12794 }, { "epoch": 2.0886494428798823, "grad_norm": 2.9567480087280273, "learning_rate": 1.7274306732324874e-05, "loss": 0.5744, "step": 12795 }, { "epoch": 2.0888127015223867, "grad_norm": 2.794402837753296, "learning_rate": 1.7273866778238914e-05, "loss": 0.593, "step": 12796 }, { "epoch": 2.088975960164891, "grad_norm": 3.472705364227295, "learning_rate": 1.7273426794252838e-05, "loss": 0.6325, "step": 12797 }, { "epoch": 2.0891392188073956, "grad_norm": 3.3495469093322754, "learning_rate": 1.727298678036845e-05, "loss": 0.6516, "step": 12798 }, { "epoch": 2.0893024774499, "grad_norm": 2.6039276123046875, "learning_rate": 1.727254673658756e-05, "loss": 0.6398, "step": 12799 }, { "epoch": 2.0894657360924045, "grad_norm": 3.209416627883911, "learning_rate": 1.7272106662911972e-05, "loss": 0.5795, "step": 12800 }, { "epoch": 2.089628994734909, "grad_norm": 3.1628220081329346, "learning_rate": 1.7271666559343505e-05, "loss": 0.6097, "step": 12801 }, { "epoch": 2.0897922533774134, "grad_norm": 2.618546485900879, "learning_rate": 1.7271226425883964e-05, "loss": 0.5758, "step": 12802 }, { "epoch": 2.0899555120199174, "grad_norm": 2.560617446899414, "learning_rate": 1.727078626253515e-05, "loss": 0.5232, "step": 12803 }, { "epoch": 2.090118770662422, "grad_norm": 3.017102003097534, "learning_rate": 1.7270346069298885e-05, "loss": 0.6775, "step": 12804 }, { "epoch": 2.0902820293049262, "grad_norm": 2.4885449409484863, "learning_rate": 1.726990584617697e-05, "loss": 0.5436, "step": 12805 }, { "epoch": 2.0904452879474307, "grad_norm": 3.093904495239258, "learning_rate": 1.7269465593171215e-05, "loss": 0.6801, "step": 12806 }, { "epoch": 2.090608546589935, "grad_norm": 3.087146759033203, "learning_rate": 1.7269025310283437e-05, "loss": 0.592, "step": 12807 }, { "epoch": 2.0907718052324396, "grad_norm": 2.9861326217651367, "learning_rate": 1.7268584997515437e-05, "loss": 0.6598, "step": 12808 }, { "epoch": 2.090935063874944, "grad_norm": 3.194234848022461, "learning_rate": 1.726814465486903e-05, "loss": 0.6113, "step": 12809 }, { "epoch": 2.0910983225174484, "grad_norm": 3.4568910598754883, "learning_rate": 1.7267704282346024e-05, "loss": 0.6354, "step": 12810 }, { "epoch": 2.091261581159953, "grad_norm": 2.9732813835144043, "learning_rate": 1.726726387994823e-05, "loss": 0.6459, "step": 12811 }, { "epoch": 2.091424839802457, "grad_norm": 3.09676194190979, "learning_rate": 1.726682344767746e-05, "loss": 0.6576, "step": 12812 }, { "epoch": 2.0915880984449613, "grad_norm": 2.8822593688964844, "learning_rate": 1.726638298553552e-05, "loss": 0.6329, "step": 12813 }, { "epoch": 2.0917513570874657, "grad_norm": 2.871588706970215, "learning_rate": 1.7265942493524223e-05, "loss": 0.6408, "step": 12814 }, { "epoch": 2.09191461572997, "grad_norm": 2.841153144836426, "learning_rate": 1.726550197164538e-05, "loss": 0.5894, "step": 12815 }, { "epoch": 2.0920778743724746, "grad_norm": 2.7120449542999268, "learning_rate": 1.7265061419900807e-05, "loss": 0.5172, "step": 12816 }, { "epoch": 2.092241133014979, "grad_norm": 3.3277077674865723, "learning_rate": 1.7264620838292305e-05, "loss": 0.6562, "step": 12817 }, { "epoch": 2.0924043916574835, "grad_norm": 3.0260486602783203, "learning_rate": 1.726418022682169e-05, "loss": 0.6503, "step": 12818 }, { "epoch": 2.092567650299988, "grad_norm": 3.5089263916015625, "learning_rate": 1.7263739585490775e-05, "loss": 0.6487, "step": 12819 }, { "epoch": 2.0927309089424924, "grad_norm": 2.657653331756592, "learning_rate": 1.726329891430137e-05, "loss": 0.5548, "step": 12820 }, { "epoch": 2.0928941675849964, "grad_norm": 3.193873167037964, "learning_rate": 1.726285821325528e-05, "loss": 0.6275, "step": 12821 }, { "epoch": 2.093057426227501, "grad_norm": 2.7210395336151123, "learning_rate": 1.726241748235433e-05, "loss": 0.5778, "step": 12822 }, { "epoch": 2.0932206848700052, "grad_norm": 2.5727922916412354, "learning_rate": 1.7261976721600317e-05, "loss": 0.5645, "step": 12823 }, { "epoch": 2.0933839435125097, "grad_norm": 3.062809467315674, "learning_rate": 1.7261535930995064e-05, "loss": 0.6509, "step": 12824 }, { "epoch": 2.093547202155014, "grad_norm": 2.934971570968628, "learning_rate": 1.7261095110540374e-05, "loss": 0.5732, "step": 12825 }, { "epoch": 2.0937104607975185, "grad_norm": 2.8811326026916504, "learning_rate": 1.7260654260238072e-05, "loss": 0.6981, "step": 12826 }, { "epoch": 2.093873719440023, "grad_norm": 2.4523820877075195, "learning_rate": 1.7260213380089955e-05, "loss": 0.5288, "step": 12827 }, { "epoch": 2.0940369780825274, "grad_norm": 2.61189866065979, "learning_rate": 1.7259772470097845e-05, "loss": 0.5074, "step": 12828 }, { "epoch": 2.0942002367250314, "grad_norm": 2.618311882019043, "learning_rate": 1.725933153026355e-05, "loss": 0.5927, "step": 12829 }, { "epoch": 2.094363495367536, "grad_norm": 3.549255132675171, "learning_rate": 1.725889056058889e-05, "loss": 0.7412, "step": 12830 }, { "epoch": 2.0945267540100403, "grad_norm": 2.7843170166015625, "learning_rate": 1.7258449561075662e-05, "loss": 0.5293, "step": 12831 }, { "epoch": 2.0946900126525447, "grad_norm": 3.2242982387542725, "learning_rate": 1.7258008531725697e-05, "loss": 0.569, "step": 12832 }, { "epoch": 2.094853271295049, "grad_norm": 3.1524910926818848, "learning_rate": 1.725756747254079e-05, "loss": 1.3153, "step": 12833 }, { "epoch": 2.0950165299375536, "grad_norm": 2.3901965618133545, "learning_rate": 1.7257126383522772e-05, "loss": 0.5331, "step": 12834 }, { "epoch": 2.095179788580058, "grad_norm": 2.655294418334961, "learning_rate": 1.7256685264673448e-05, "loss": 0.5702, "step": 12835 }, { "epoch": 2.0953430472225625, "grad_norm": 2.5653247833251953, "learning_rate": 1.7256244115994626e-05, "loss": 0.5421, "step": 12836 }, { "epoch": 2.095506305865067, "grad_norm": 3.245215654373169, "learning_rate": 1.7255802937488128e-05, "loss": 0.7504, "step": 12837 }, { "epoch": 2.095669564507571, "grad_norm": 3.198629140853882, "learning_rate": 1.725536172915576e-05, "loss": 0.7736, "step": 12838 }, { "epoch": 2.0958328231500754, "grad_norm": 3.2473955154418945, "learning_rate": 1.7254920490999346e-05, "loss": 0.5589, "step": 12839 }, { "epoch": 2.09599608179258, "grad_norm": 2.834374189376831, "learning_rate": 1.7254479223020687e-05, "loss": 0.499, "step": 12840 }, { "epoch": 2.0961593404350842, "grad_norm": 3.1033883094787598, "learning_rate": 1.7254037925221606e-05, "loss": 0.6154, "step": 12841 }, { "epoch": 2.0963225990775887, "grad_norm": 3.4261856079101562, "learning_rate": 1.7253596597603916e-05, "loss": 0.7039, "step": 12842 }, { "epoch": 2.096485857720093, "grad_norm": 3.3375117778778076, "learning_rate": 1.7253155240169426e-05, "loss": 0.8157, "step": 12843 }, { "epoch": 2.0966491163625975, "grad_norm": 2.851224422454834, "learning_rate": 1.7252713852919952e-05, "loss": 0.4879, "step": 12844 }, { "epoch": 2.096812375005102, "grad_norm": 3.004068374633789, "learning_rate": 1.725227243585731e-05, "loss": 0.5277, "step": 12845 }, { "epoch": 2.0969756336476064, "grad_norm": 2.862154006958008, "learning_rate": 1.725183098898332e-05, "loss": 0.6878, "step": 12846 }, { "epoch": 2.0971388922901104, "grad_norm": 2.9805068969726562, "learning_rate": 1.725138951229979e-05, "loss": 0.5697, "step": 12847 }, { "epoch": 2.097302150932615, "grad_norm": 2.682579755783081, "learning_rate": 1.725094800580853e-05, "loss": 0.5625, "step": 12848 }, { "epoch": 2.0974654095751193, "grad_norm": 2.704313278198242, "learning_rate": 1.725050646951137e-05, "loss": 0.664, "step": 12849 }, { "epoch": 2.0976286682176237, "grad_norm": 2.91880464553833, "learning_rate": 1.7250064903410106e-05, "loss": 0.5943, "step": 12850 }, { "epoch": 2.097791926860128, "grad_norm": 2.7961418628692627, "learning_rate": 1.7249623307506568e-05, "loss": 0.5236, "step": 12851 }, { "epoch": 2.0979551855026326, "grad_norm": 3.091017723083496, "learning_rate": 1.724918168180256e-05, "loss": 0.6077, "step": 12852 }, { "epoch": 2.098118444145137, "grad_norm": 2.3045501708984375, "learning_rate": 1.724874002629991e-05, "loss": 0.5341, "step": 12853 }, { "epoch": 2.0982817027876415, "grad_norm": 3.0083301067352295, "learning_rate": 1.7248298341000428e-05, "loss": 0.6068, "step": 12854 }, { "epoch": 2.098444961430146, "grad_norm": 2.8877696990966797, "learning_rate": 1.7247856625905924e-05, "loss": 0.625, "step": 12855 }, { "epoch": 2.09860822007265, "grad_norm": 3.034010648727417, "learning_rate": 1.7247414881018222e-05, "loss": 0.6444, "step": 12856 }, { "epoch": 2.0987714787151543, "grad_norm": 2.5934016704559326, "learning_rate": 1.7246973106339127e-05, "loss": 0.5241, "step": 12857 }, { "epoch": 2.098934737357659, "grad_norm": 2.8231091499328613, "learning_rate": 1.7246531301870467e-05, "loss": 0.5908, "step": 12858 }, { "epoch": 2.0990979960001632, "grad_norm": 3.0349881649017334, "learning_rate": 1.7246089467614053e-05, "loss": 0.5666, "step": 12859 }, { "epoch": 2.0992612546426677, "grad_norm": 2.9160685539245605, "learning_rate": 1.7245647603571702e-05, "loss": 0.5272, "step": 12860 }, { "epoch": 2.099424513285172, "grad_norm": 3.7239155769348145, "learning_rate": 1.7245205709745226e-05, "loss": 0.7378, "step": 12861 }, { "epoch": 2.0995877719276765, "grad_norm": 2.8965606689453125, "learning_rate": 1.724476378613645e-05, "loss": 0.5627, "step": 12862 }, { "epoch": 2.099751030570181, "grad_norm": 3.0633347034454346, "learning_rate": 1.7244321832747182e-05, "loss": 0.634, "step": 12863 }, { "epoch": 2.099914289212685, "grad_norm": 2.945538282394409, "learning_rate": 1.7243879849579242e-05, "loss": 0.5742, "step": 12864 }, { "epoch": 2.1000775478551894, "grad_norm": 2.7404608726501465, "learning_rate": 1.7243437836634452e-05, "loss": 0.613, "step": 12865 }, { "epoch": 2.100240806497694, "grad_norm": 2.8317551612854004, "learning_rate": 1.7242995793914617e-05, "loss": 0.5964, "step": 12866 }, { "epoch": 2.1004040651401983, "grad_norm": 3.263826370239258, "learning_rate": 1.7242553721421567e-05, "loss": 0.6422, "step": 12867 }, { "epoch": 2.1005673237827027, "grad_norm": 3.0784757137298584, "learning_rate": 1.7242111619157112e-05, "loss": 0.6597, "step": 12868 }, { "epoch": 2.100730582425207, "grad_norm": 2.799534559249878, "learning_rate": 1.724166948712307e-05, "loss": 0.5894, "step": 12869 }, { "epoch": 2.1008938410677116, "grad_norm": 2.7414872646331787, "learning_rate": 1.724122732532126e-05, "loss": 0.6453, "step": 12870 }, { "epoch": 2.101057099710216, "grad_norm": 3.053551435470581, "learning_rate": 1.7240785133753497e-05, "loss": 0.5249, "step": 12871 }, { "epoch": 2.1012203583527205, "grad_norm": 3.143535614013672, "learning_rate": 1.72403429124216e-05, "loss": 0.5536, "step": 12872 }, { "epoch": 2.101383616995225, "grad_norm": 3.051353931427002, "learning_rate": 1.723990066132739e-05, "loss": 0.5532, "step": 12873 }, { "epoch": 2.101546875637729, "grad_norm": 3.3333780765533447, "learning_rate": 1.7239458380472683e-05, "loss": 0.6286, "step": 12874 }, { "epoch": 2.1017101342802333, "grad_norm": 2.492706775665283, "learning_rate": 1.7239016069859292e-05, "loss": 0.4857, "step": 12875 }, { "epoch": 2.101873392922738, "grad_norm": 2.973721504211426, "learning_rate": 1.723857372948904e-05, "loss": 0.5523, "step": 12876 }, { "epoch": 2.102036651565242, "grad_norm": 2.83613657951355, "learning_rate": 1.7238131359363745e-05, "loss": 0.5621, "step": 12877 }, { "epoch": 2.1021999102077467, "grad_norm": 2.880786180496216, "learning_rate": 1.7237688959485227e-05, "loss": 0.5625, "step": 12878 }, { "epoch": 2.102363168850251, "grad_norm": 3.5521152019500732, "learning_rate": 1.72372465298553e-05, "loss": 0.6601, "step": 12879 }, { "epoch": 2.1025264274927555, "grad_norm": 3.1925437450408936, "learning_rate": 1.723680407047579e-05, "loss": 0.6923, "step": 12880 }, { "epoch": 2.10268968613526, "grad_norm": 2.650250196456909, "learning_rate": 1.7236361581348507e-05, "loss": 0.5579, "step": 12881 }, { "epoch": 2.102852944777764, "grad_norm": 2.5940845012664795, "learning_rate": 1.7235919062475274e-05, "loss": 0.5725, "step": 12882 }, { "epoch": 2.1030162034202684, "grad_norm": 3.1466550827026367, "learning_rate": 1.7235476513857913e-05, "loss": 0.6921, "step": 12883 }, { "epoch": 2.103179462062773, "grad_norm": 2.604487895965576, "learning_rate": 1.7235033935498234e-05, "loss": 0.5366, "step": 12884 }, { "epoch": 2.1033427207052773, "grad_norm": 2.913548231124878, "learning_rate": 1.7234591327398066e-05, "loss": 0.6405, "step": 12885 }, { "epoch": 2.1035059793477817, "grad_norm": 3.3620481491088867, "learning_rate": 1.7234148689559225e-05, "loss": 0.6758, "step": 12886 }, { "epoch": 2.103669237990286, "grad_norm": 2.5261833667755127, "learning_rate": 1.723370602198353e-05, "loss": 0.5168, "step": 12887 }, { "epoch": 2.1038324966327906, "grad_norm": 2.326774835586548, "learning_rate": 1.7233263324672803e-05, "loss": 0.511, "step": 12888 }, { "epoch": 2.103995755275295, "grad_norm": 3.4798474311828613, "learning_rate": 1.723282059762886e-05, "loss": 0.6349, "step": 12889 }, { "epoch": 2.1041590139177995, "grad_norm": 2.836937189102173, "learning_rate": 1.723237784085352e-05, "loss": 0.561, "step": 12890 }, { "epoch": 2.1043222725603035, "grad_norm": 3.1451375484466553, "learning_rate": 1.723193505434861e-05, "loss": 0.5543, "step": 12891 }, { "epoch": 2.104485531202808, "grad_norm": 2.660372495651245, "learning_rate": 1.7231492238115945e-05, "loss": 0.5505, "step": 12892 }, { "epoch": 2.1046487898453123, "grad_norm": 2.9141416549682617, "learning_rate": 1.7231049392157344e-05, "loss": 0.5715, "step": 12893 }, { "epoch": 2.104812048487817, "grad_norm": 2.8201191425323486, "learning_rate": 1.723060651647463e-05, "loss": 0.533, "step": 12894 }, { "epoch": 2.104975307130321, "grad_norm": 2.983450412750244, "learning_rate": 1.7230163611069624e-05, "loss": 0.6044, "step": 12895 }, { "epoch": 2.1051385657728257, "grad_norm": 3.010333776473999, "learning_rate": 1.7229720675944144e-05, "loss": 0.5629, "step": 12896 }, { "epoch": 2.10530182441533, "grad_norm": 3.1916234493255615, "learning_rate": 1.722927771110001e-05, "loss": 0.6946, "step": 12897 }, { "epoch": 2.1054650830578345, "grad_norm": 2.4800267219543457, "learning_rate": 1.7228834716539048e-05, "loss": 0.6338, "step": 12898 }, { "epoch": 2.105628341700339, "grad_norm": 2.9346139430999756, "learning_rate": 1.7228391692263078e-05, "loss": 0.6535, "step": 12899 }, { "epoch": 2.105791600342843, "grad_norm": 2.3650481700897217, "learning_rate": 1.7227948638273918e-05, "loss": 0.4657, "step": 12900 }, { "epoch": 2.1059548589853474, "grad_norm": 2.51212739944458, "learning_rate": 1.722750555457339e-05, "loss": 0.536, "step": 12901 }, { "epoch": 2.106118117627852, "grad_norm": 2.978788137435913, "learning_rate": 1.7227062441163313e-05, "loss": 0.6241, "step": 12902 }, { "epoch": 2.1062813762703563, "grad_norm": 2.7644407749176025, "learning_rate": 1.7226619298045514e-05, "loss": 0.6507, "step": 12903 }, { "epoch": 2.1064446349128607, "grad_norm": 2.6088807582855225, "learning_rate": 1.7226176125221813e-05, "loss": 0.4527, "step": 12904 }, { "epoch": 2.106607893555365, "grad_norm": 2.6382241249084473, "learning_rate": 1.7225732922694027e-05, "loss": 0.6383, "step": 12905 }, { "epoch": 2.1067711521978696, "grad_norm": 2.56321382522583, "learning_rate": 1.722528969046398e-05, "loss": 0.5648, "step": 12906 }, { "epoch": 2.106934410840374, "grad_norm": 3.0008864402770996, "learning_rate": 1.7224846428533498e-05, "loss": 0.6613, "step": 12907 }, { "epoch": 2.1070976694828785, "grad_norm": 2.57450532913208, "learning_rate": 1.7224403136904403e-05, "loss": 0.537, "step": 12908 }, { "epoch": 2.1072609281253825, "grad_norm": 2.478025436401367, "learning_rate": 1.7223959815578512e-05, "loss": 0.5105, "step": 12909 }, { "epoch": 2.107424186767887, "grad_norm": 2.211796522140503, "learning_rate": 1.722351646455765e-05, "loss": 0.4369, "step": 12910 }, { "epoch": 2.1075874454103913, "grad_norm": 2.9524192810058594, "learning_rate": 1.7223073083843638e-05, "loss": 0.6737, "step": 12911 }, { "epoch": 2.1077507040528958, "grad_norm": 2.958291530609131, "learning_rate": 1.72226296734383e-05, "loss": 0.6228, "step": 12912 }, { "epoch": 2.1079139626954, "grad_norm": 2.754754066467285, "learning_rate": 1.7222186233343463e-05, "loss": 0.5422, "step": 12913 }, { "epoch": 2.1080772213379046, "grad_norm": 2.504938840866089, "learning_rate": 1.7221742763560943e-05, "loss": 0.5254, "step": 12914 }, { "epoch": 2.108240479980409, "grad_norm": 2.630666971206665, "learning_rate": 1.7221299264092565e-05, "loss": 0.5175, "step": 12915 }, { "epoch": 2.1084037386229135, "grad_norm": 3.445892810821533, "learning_rate": 1.722085573494015e-05, "loss": 0.6565, "step": 12916 }, { "epoch": 2.1085669972654175, "grad_norm": 2.9788126945495605, "learning_rate": 1.7220412176105527e-05, "loss": 0.6262, "step": 12917 }, { "epoch": 2.108730255907922, "grad_norm": 2.7255327701568604, "learning_rate": 1.7219968587590513e-05, "loss": 0.5767, "step": 12918 }, { "epoch": 2.1088935145504264, "grad_norm": 3.3840489387512207, "learning_rate": 1.721952496939694e-05, "loss": 0.7011, "step": 12919 }, { "epoch": 2.109056773192931, "grad_norm": 3.088595390319824, "learning_rate": 1.721908132152662e-05, "loss": 0.6568, "step": 12920 }, { "epoch": 2.1092200318354353, "grad_norm": 3.0125460624694824, "learning_rate": 1.7218637643981385e-05, "loss": 0.6091, "step": 12921 }, { "epoch": 2.1093832904779397, "grad_norm": 2.9974939823150635, "learning_rate": 1.7218193936763055e-05, "loss": 0.5839, "step": 12922 }, { "epoch": 2.109546549120444, "grad_norm": 2.98903489112854, "learning_rate": 1.7217750199873456e-05, "loss": 0.7091, "step": 12923 }, { "epoch": 2.1097098077629486, "grad_norm": 2.8746371269226074, "learning_rate": 1.721730643331441e-05, "loss": 0.5515, "step": 12924 }, { "epoch": 2.109873066405453, "grad_norm": 2.9516193866729736, "learning_rate": 1.7216862637087744e-05, "loss": 0.5268, "step": 12925 }, { "epoch": 2.110036325047957, "grad_norm": 2.872947931289673, "learning_rate": 1.7216418811195284e-05, "loss": 0.6525, "step": 12926 }, { "epoch": 2.1101995836904615, "grad_norm": 2.8281753063201904, "learning_rate": 1.721597495563885e-05, "loss": 0.582, "step": 12927 }, { "epoch": 2.110362842332966, "grad_norm": 2.152728796005249, "learning_rate": 1.7215531070420262e-05, "loss": 0.505, "step": 12928 }, { "epoch": 2.1105261009754703, "grad_norm": 2.895195484161377, "learning_rate": 1.7215087155541353e-05, "loss": 0.6113, "step": 12929 }, { "epoch": 2.1106893596179748, "grad_norm": 2.8888015747070312, "learning_rate": 1.7214643211003948e-05, "loss": 0.5739, "step": 12930 }, { "epoch": 2.110852618260479, "grad_norm": 3.195516586303711, "learning_rate": 1.7214199236809866e-05, "loss": 0.6933, "step": 12931 }, { "epoch": 2.1110158769029836, "grad_norm": 2.472728967666626, "learning_rate": 1.7213755232960937e-05, "loss": 0.5011, "step": 12932 }, { "epoch": 2.111179135545488, "grad_norm": 3.0593836307525635, "learning_rate": 1.7213311199458983e-05, "loss": 0.6375, "step": 12933 }, { "epoch": 2.1113423941879925, "grad_norm": 2.8561763763427734, "learning_rate": 1.7212867136305828e-05, "loss": 0.54, "step": 12934 }, { "epoch": 2.1115056528304965, "grad_norm": 3.1389870643615723, "learning_rate": 1.7212423043503305e-05, "loss": 0.6713, "step": 12935 }, { "epoch": 2.111668911473001, "grad_norm": 3.38655424118042, "learning_rate": 1.721197892105323e-05, "loss": 0.5145, "step": 12936 }, { "epoch": 2.1118321701155054, "grad_norm": 2.990682363510132, "learning_rate": 1.7211534768957434e-05, "loss": 0.5546, "step": 12937 }, { "epoch": 2.11199542875801, "grad_norm": 3.1071808338165283, "learning_rate": 1.7211090587217742e-05, "loss": 0.6176, "step": 12938 }, { "epoch": 2.1121586874005143, "grad_norm": 2.980769395828247, "learning_rate": 1.7210646375835976e-05, "loss": 0.5681, "step": 12939 }, { "epoch": 2.1123219460430187, "grad_norm": 2.905806303024292, "learning_rate": 1.7210202134813973e-05, "loss": 0.6522, "step": 12940 }, { "epoch": 2.112485204685523, "grad_norm": 1.8820725679397583, "learning_rate": 1.7209757864153545e-05, "loss": 0.3494, "step": 12941 }, { "epoch": 2.1126484633280276, "grad_norm": 2.620581865310669, "learning_rate": 1.7209313563856528e-05, "loss": 0.5586, "step": 12942 }, { "epoch": 2.112811721970532, "grad_norm": 2.361424684524536, "learning_rate": 1.7208869233924742e-05, "loss": 0.5677, "step": 12943 }, { "epoch": 2.112974980613036, "grad_norm": 2.816701889038086, "learning_rate": 1.720842487436002e-05, "loss": 0.5822, "step": 12944 }, { "epoch": 2.1131382392555405, "grad_norm": 2.4587483406066895, "learning_rate": 1.7207980485164183e-05, "loss": 0.5355, "step": 12945 }, { "epoch": 2.113301497898045, "grad_norm": 2.8472836017608643, "learning_rate": 1.720753606633906e-05, "loss": 0.571, "step": 12946 }, { "epoch": 2.1134647565405493, "grad_norm": 3.308234691619873, "learning_rate": 1.720709161788648e-05, "loss": 0.6579, "step": 12947 }, { "epoch": 2.1136280151830538, "grad_norm": 3.0169425010681152, "learning_rate": 1.7206647139808267e-05, "loss": 0.6272, "step": 12948 }, { "epoch": 2.113791273825558, "grad_norm": 2.837094306945801, "learning_rate": 1.720620263210625e-05, "loss": 0.5597, "step": 12949 }, { "epoch": 2.1139545324680626, "grad_norm": 2.5946435928344727, "learning_rate": 1.7205758094782254e-05, "loss": 0.5738, "step": 12950 }, { "epoch": 2.114117791110567, "grad_norm": 2.588975191116333, "learning_rate": 1.7205313527838107e-05, "loss": 0.5141, "step": 12951 }, { "epoch": 2.1142810497530715, "grad_norm": 2.2100279331207275, "learning_rate": 1.7204868931275633e-05, "loss": 0.5078, "step": 12952 }, { "epoch": 2.1144443083955755, "grad_norm": 2.3231325149536133, "learning_rate": 1.7204424305096666e-05, "loss": 0.4866, "step": 12953 }, { "epoch": 2.11460756703808, "grad_norm": 2.8669490814208984, "learning_rate": 1.7203979649303032e-05, "loss": 0.5831, "step": 12954 }, { "epoch": 2.1147708256805844, "grad_norm": 3.0913479328155518, "learning_rate": 1.7203534963896557e-05, "loss": 0.6131, "step": 12955 }, { "epoch": 2.114934084323089, "grad_norm": 2.892207622528076, "learning_rate": 1.720309024887907e-05, "loss": 0.5174, "step": 12956 }, { "epoch": 2.1150973429655933, "grad_norm": 2.9371635913848877, "learning_rate": 1.7202645504252398e-05, "loss": 0.6687, "step": 12957 }, { "epoch": 2.1152606016080977, "grad_norm": 3.280890703201294, "learning_rate": 1.7202200730018374e-05, "loss": 0.6006, "step": 12958 }, { "epoch": 2.115423860250602, "grad_norm": 2.9029979705810547, "learning_rate": 1.7201755926178817e-05, "loss": 0.5684, "step": 12959 }, { "epoch": 2.1155871188931066, "grad_norm": 3.228069305419922, "learning_rate": 1.7201311092735563e-05, "loss": 0.6761, "step": 12960 }, { "epoch": 2.115750377535611, "grad_norm": 2.4103057384490967, "learning_rate": 1.720086622969044e-05, "loss": 0.4627, "step": 12961 }, { "epoch": 2.115913636178115, "grad_norm": 2.891437530517578, "learning_rate": 1.720042133704527e-05, "loss": 0.539, "step": 12962 }, { "epoch": 2.1160768948206194, "grad_norm": 2.791300058364868, "learning_rate": 1.7199976414801893e-05, "loss": 0.5726, "step": 12963 }, { "epoch": 2.116240153463124, "grad_norm": 3.021334648132324, "learning_rate": 1.719953146296213e-05, "loss": 0.5499, "step": 12964 }, { "epoch": 2.1164034121056283, "grad_norm": 3.006071090698242, "learning_rate": 1.7199086481527806e-05, "loss": 0.604, "step": 12965 }, { "epoch": 2.1165666707481328, "grad_norm": 2.956092357635498, "learning_rate": 1.719864147050076e-05, "loss": 0.6248, "step": 12966 }, { "epoch": 2.116729929390637, "grad_norm": 2.597639799118042, "learning_rate": 1.7198196429882817e-05, "loss": 0.5032, "step": 12967 }, { "epoch": 2.1168931880331416, "grad_norm": 2.8493311405181885, "learning_rate": 1.7197751359675806e-05, "loss": 0.6349, "step": 12968 }, { "epoch": 2.117056446675646, "grad_norm": 3.128696918487549, "learning_rate": 1.719730625988156e-05, "loss": 0.581, "step": 12969 }, { "epoch": 2.11721970531815, "grad_norm": 3.0785179138183594, "learning_rate": 1.7196861130501907e-05, "loss": 0.5479, "step": 12970 }, { "epoch": 2.1173829639606545, "grad_norm": 2.8373045921325684, "learning_rate": 1.7196415971538667e-05, "loss": 0.5729, "step": 12971 }, { "epoch": 2.117546222603159, "grad_norm": 2.725847005844116, "learning_rate": 1.7195970782993683e-05, "loss": 0.5455, "step": 12972 }, { "epoch": 2.1177094812456634, "grad_norm": 3.019247531890869, "learning_rate": 1.7195525564868783e-05, "loss": 0.5654, "step": 12973 }, { "epoch": 2.117872739888168, "grad_norm": 2.762345790863037, "learning_rate": 1.7195080317165788e-05, "loss": 0.4773, "step": 12974 }, { "epoch": 2.1180359985306723, "grad_norm": 2.175431728363037, "learning_rate": 1.719463503988654e-05, "loss": 0.4862, "step": 12975 }, { "epoch": 2.1181992571731767, "grad_norm": 2.8402791023254395, "learning_rate": 1.7194189733032862e-05, "loss": 0.5611, "step": 12976 }, { "epoch": 2.118362515815681, "grad_norm": 3.3504226207733154, "learning_rate": 1.7193744396606584e-05, "loss": 1.151, "step": 12977 }, { "epoch": 2.1185257744581856, "grad_norm": 2.6738946437835693, "learning_rate": 1.7193299030609542e-05, "loss": 0.5499, "step": 12978 }, { "epoch": 2.1186890331006896, "grad_norm": 3.223808765411377, "learning_rate": 1.7192853635043566e-05, "loss": 0.6519, "step": 12979 }, { "epoch": 2.118852291743194, "grad_norm": 3.0785863399505615, "learning_rate": 1.7192408209910482e-05, "loss": 0.5846, "step": 12980 }, { "epoch": 2.1190155503856984, "grad_norm": 2.8056201934814453, "learning_rate": 1.7191962755212123e-05, "loss": 0.5354, "step": 12981 }, { "epoch": 2.119178809028203, "grad_norm": 2.5409679412841797, "learning_rate": 1.7191517270950318e-05, "loss": 0.5047, "step": 12982 }, { "epoch": 2.1193420676707073, "grad_norm": 3.4151806831359863, "learning_rate": 1.7191071757126908e-05, "loss": 0.5789, "step": 12983 }, { "epoch": 2.1195053263132118, "grad_norm": 2.6701221466064453, "learning_rate": 1.719062621374371e-05, "loss": 0.5861, "step": 12984 }, { "epoch": 2.119668584955716, "grad_norm": 3.4793946743011475, "learning_rate": 1.7190180640802568e-05, "loss": 0.7649, "step": 12985 }, { "epoch": 2.1198318435982206, "grad_norm": 2.960695505142212, "learning_rate": 1.7189735038305308e-05, "loss": 0.561, "step": 12986 }, { "epoch": 2.119995102240725, "grad_norm": 2.5674753189086914, "learning_rate": 1.7189289406253764e-05, "loss": 0.6034, "step": 12987 }, { "epoch": 2.120158360883229, "grad_norm": 2.7141871452331543, "learning_rate": 1.7188843744649762e-05, "loss": 0.5683, "step": 12988 }, { "epoch": 2.1203216195257335, "grad_norm": 2.39648699760437, "learning_rate": 1.718839805349514e-05, "loss": 0.5083, "step": 12989 }, { "epoch": 2.120484878168238, "grad_norm": 2.7144036293029785, "learning_rate": 1.7187952332791728e-05, "loss": 0.5165, "step": 12990 }, { "epoch": 2.1206481368107424, "grad_norm": 2.4448516368865967, "learning_rate": 1.7187506582541354e-05, "loss": 0.5454, "step": 12991 }, { "epoch": 2.120811395453247, "grad_norm": 2.537052631378174, "learning_rate": 1.7187060802745857e-05, "loss": 0.6364, "step": 12992 }, { "epoch": 2.1209746540957513, "grad_norm": 2.5525660514831543, "learning_rate": 1.718661499340707e-05, "loss": 0.4692, "step": 12993 }, { "epoch": 2.1211379127382557, "grad_norm": 2.961239814758301, "learning_rate": 1.7186169154526816e-05, "loss": 0.567, "step": 12994 }, { "epoch": 2.12130117138076, "grad_norm": 3.1150712966918945, "learning_rate": 1.718572328610694e-05, "loss": 0.7126, "step": 12995 }, { "epoch": 2.1214644300232646, "grad_norm": 2.8862762451171875, "learning_rate": 1.7185277388149263e-05, "loss": 0.594, "step": 12996 }, { "epoch": 2.1216276886657686, "grad_norm": 2.7692947387695312, "learning_rate": 1.718483146065563e-05, "loss": 0.6043, "step": 12997 }, { "epoch": 2.121790947308273, "grad_norm": 2.527189016342163, "learning_rate": 1.718438550362786e-05, "loss": 0.5114, "step": 12998 }, { "epoch": 2.1219542059507774, "grad_norm": 2.858922004699707, "learning_rate": 1.71839395170678e-05, "loss": 0.5611, "step": 12999 }, { "epoch": 2.122117464593282, "grad_norm": 2.94515323638916, "learning_rate": 1.7183493500977277e-05, "loss": 0.5583, "step": 13000 }, { "epoch": 2.1222807232357863, "grad_norm": 2.793200731277466, "learning_rate": 1.7183047455358123e-05, "loss": 0.5858, "step": 13001 }, { "epoch": 2.1224439818782908, "grad_norm": 3.039393663406372, "learning_rate": 1.7182601380212175e-05, "loss": 0.6187, "step": 13002 }, { "epoch": 2.122607240520795, "grad_norm": 2.809619426727295, "learning_rate": 1.7182155275541264e-05, "loss": 0.5719, "step": 13003 }, { "epoch": 2.1227704991632996, "grad_norm": 2.5070767402648926, "learning_rate": 1.7181709141347224e-05, "loss": 0.5328, "step": 13004 }, { "epoch": 2.1229337578058036, "grad_norm": 2.686833143234253, "learning_rate": 1.718126297763189e-05, "loss": 0.461, "step": 13005 }, { "epoch": 2.123097016448308, "grad_norm": 2.8147733211517334, "learning_rate": 1.7180816784397097e-05, "loss": 0.6482, "step": 13006 }, { "epoch": 2.1232602750908125, "grad_norm": 3.4121851921081543, "learning_rate": 1.7180370561644674e-05, "loss": 0.5591, "step": 13007 }, { "epoch": 2.123423533733317, "grad_norm": 2.9349327087402344, "learning_rate": 1.717992430937646e-05, "loss": 0.5178, "step": 13008 }, { "epoch": 2.1235867923758214, "grad_norm": 2.9234933853149414, "learning_rate": 1.717947802759429e-05, "loss": 0.5911, "step": 13009 }, { "epoch": 2.123750051018326, "grad_norm": 2.969630718231201, "learning_rate": 1.7179031716299996e-05, "loss": 0.6355, "step": 13010 }, { "epoch": 2.1239133096608303, "grad_norm": 2.7100934982299805, "learning_rate": 1.7178585375495412e-05, "loss": 0.5156, "step": 13011 }, { "epoch": 2.1240765683033347, "grad_norm": 3.4070043563842773, "learning_rate": 1.7178139005182375e-05, "loss": 0.6832, "step": 13012 }, { "epoch": 2.124239826945839, "grad_norm": 2.347960948944092, "learning_rate": 1.717769260536272e-05, "loss": 0.4848, "step": 13013 }, { "epoch": 2.124403085588343, "grad_norm": 2.9016027450561523, "learning_rate": 1.7177246176038283e-05, "loss": 0.5312, "step": 13014 }, { "epoch": 2.1245663442308476, "grad_norm": 3.141680955886841, "learning_rate": 1.7176799717210893e-05, "loss": 0.5523, "step": 13015 }, { "epoch": 2.124729602873352, "grad_norm": 2.4872827529907227, "learning_rate": 1.717635322888239e-05, "loss": 0.4788, "step": 13016 }, { "epoch": 2.1248928615158564, "grad_norm": 2.5464539527893066, "learning_rate": 1.717590671105461e-05, "loss": 0.5751, "step": 13017 }, { "epoch": 2.125056120158361, "grad_norm": 2.5614166259765625, "learning_rate": 1.7175460163729384e-05, "loss": 0.5732, "step": 13018 }, { "epoch": 2.1252193788008653, "grad_norm": 2.7582671642303467, "learning_rate": 1.7175013586908553e-05, "loss": 0.5333, "step": 13019 }, { "epoch": 2.1253826374433697, "grad_norm": 2.9095590114593506, "learning_rate": 1.7174566980593953e-05, "loss": 0.5897, "step": 13020 }, { "epoch": 2.125545896085874, "grad_norm": 2.7303836345672607, "learning_rate": 1.7174120344787414e-05, "loss": 0.5855, "step": 13021 }, { "epoch": 2.1257091547283786, "grad_norm": 2.8531699180603027, "learning_rate": 1.7173673679490773e-05, "loss": 0.7099, "step": 13022 }, { "epoch": 2.1258724133708826, "grad_norm": 2.6834261417388916, "learning_rate": 1.7173226984705872e-05, "loss": 0.5949, "step": 13023 }, { "epoch": 2.126035672013387, "grad_norm": 3.0007174015045166, "learning_rate": 1.7172780260434545e-05, "loss": 0.6194, "step": 13024 }, { "epoch": 2.1261989306558915, "grad_norm": 2.798285722732544, "learning_rate": 1.717233350667862e-05, "loss": 0.5109, "step": 13025 }, { "epoch": 2.126362189298396, "grad_norm": 3.0082848072052, "learning_rate": 1.7171886723439946e-05, "loss": 0.5741, "step": 13026 }, { "epoch": 2.1265254479409004, "grad_norm": 2.831969976425171, "learning_rate": 1.7171439910720355e-05, "loss": 0.6145, "step": 13027 }, { "epoch": 2.126688706583405, "grad_norm": 2.5579748153686523, "learning_rate": 1.717099306852168e-05, "loss": 0.5765, "step": 13028 }, { "epoch": 2.1268519652259092, "grad_norm": 2.8682048320770264, "learning_rate": 1.7170546196845758e-05, "loss": 0.5419, "step": 13029 }, { "epoch": 2.1270152238684137, "grad_norm": 2.785625696182251, "learning_rate": 1.717009929569443e-05, "loss": 0.5456, "step": 13030 }, { "epoch": 2.127178482510918, "grad_norm": 2.904359817504883, "learning_rate": 1.7169652365069528e-05, "loss": 0.5591, "step": 13031 }, { "epoch": 2.127341741153422, "grad_norm": 2.8165504932403564, "learning_rate": 1.7169205404972898e-05, "loss": 0.641, "step": 13032 }, { "epoch": 2.1275049997959266, "grad_norm": 3.7044222354888916, "learning_rate": 1.716875841540637e-05, "loss": 0.6832, "step": 13033 }, { "epoch": 2.127668258438431, "grad_norm": 3.1535823345184326, "learning_rate": 1.716831139637178e-05, "loss": 0.6069, "step": 13034 }, { "epoch": 2.1278315170809354, "grad_norm": 2.434551239013672, "learning_rate": 1.716786434787097e-05, "loss": 0.4615, "step": 13035 }, { "epoch": 2.12799477572344, "grad_norm": 3.152899980545044, "learning_rate": 1.716741726990578e-05, "loss": 0.6791, "step": 13036 }, { "epoch": 2.1281580343659443, "grad_norm": 3.3222827911376953, "learning_rate": 1.7166970162478038e-05, "loss": 0.7847, "step": 13037 }, { "epoch": 2.1283212930084487, "grad_norm": 2.822303533554077, "learning_rate": 1.7166523025589592e-05, "loss": 0.589, "step": 13038 }, { "epoch": 2.128484551650953, "grad_norm": 3.3318030834198, "learning_rate": 1.7166075859242273e-05, "loss": 0.5809, "step": 13039 }, { "epoch": 2.128647810293457, "grad_norm": 2.611159086227417, "learning_rate": 1.7165628663437923e-05, "loss": 0.5408, "step": 13040 }, { "epoch": 2.1288110689359616, "grad_norm": 2.5732836723327637, "learning_rate": 1.716518143817838e-05, "loss": 0.5188, "step": 13041 }, { "epoch": 2.128974327578466, "grad_norm": 2.5137977600097656, "learning_rate": 1.7164734183465485e-05, "loss": 0.5365, "step": 13042 }, { "epoch": 2.1291375862209705, "grad_norm": 3.0678679943084717, "learning_rate": 1.7164286899301066e-05, "loss": 0.6516, "step": 13043 }, { "epoch": 2.129300844863475, "grad_norm": 3.279022455215454, "learning_rate": 1.7163839585686976e-05, "loss": 0.7037, "step": 13044 }, { "epoch": 2.1294641035059794, "grad_norm": 2.6681764125823975, "learning_rate": 1.716339224262504e-05, "loss": 0.5285, "step": 13045 }, { "epoch": 2.129627362148484, "grad_norm": 2.800431966781616, "learning_rate": 1.7162944870117107e-05, "loss": 0.5553, "step": 13046 }, { "epoch": 2.1297906207909882, "grad_norm": 3.084268808364868, "learning_rate": 1.7162497468165012e-05, "loss": 0.7158, "step": 13047 }, { "epoch": 2.1299538794334927, "grad_norm": 2.5893917083740234, "learning_rate": 1.716205003677059e-05, "loss": 0.5089, "step": 13048 }, { "epoch": 2.130117138075997, "grad_norm": 2.700685739517212, "learning_rate": 1.716160257593569e-05, "loss": 0.6086, "step": 13049 }, { "epoch": 2.130280396718501, "grad_norm": 2.745410919189453, "learning_rate": 1.7161155085662144e-05, "loss": 0.5163, "step": 13050 }, { "epoch": 2.1304436553610056, "grad_norm": 2.9737260341644287, "learning_rate": 1.716070756595179e-05, "loss": 0.6422, "step": 13051 }, { "epoch": 2.13060691400351, "grad_norm": 2.843040704727173, "learning_rate": 1.7160260016806482e-05, "loss": 0.7008, "step": 13052 }, { "epoch": 2.1307701726460144, "grad_norm": 2.5101709365844727, "learning_rate": 1.715981243822804e-05, "loss": 0.502, "step": 13053 }, { "epoch": 2.130933431288519, "grad_norm": 2.959214687347412, "learning_rate": 1.7159364830218312e-05, "loss": 0.6179, "step": 13054 }, { "epoch": 2.1310966899310233, "grad_norm": 2.5565195083618164, "learning_rate": 1.7158917192779145e-05, "loss": 0.5491, "step": 13055 }, { "epoch": 2.1312599485735277, "grad_norm": 2.3440093994140625, "learning_rate": 1.7158469525912366e-05, "loss": 0.4483, "step": 13056 }, { "epoch": 2.131423207216032, "grad_norm": 2.9421799182891846, "learning_rate": 1.715802182961982e-05, "loss": 0.6298, "step": 13057 }, { "epoch": 2.131586465858536, "grad_norm": 2.895650625228882, "learning_rate": 1.7157574103903355e-05, "loss": 0.5246, "step": 13058 }, { "epoch": 2.1317497245010406, "grad_norm": 2.7285168170928955, "learning_rate": 1.7157126348764806e-05, "loss": 0.6113, "step": 13059 }, { "epoch": 2.131912983143545, "grad_norm": 3.327528238296509, "learning_rate": 1.715667856420601e-05, "loss": 0.6228, "step": 13060 }, { "epoch": 2.1320762417860495, "grad_norm": 2.527371644973755, "learning_rate": 1.7156230750228813e-05, "loss": 0.5103, "step": 13061 }, { "epoch": 2.132239500428554, "grad_norm": 3.5931944847106934, "learning_rate": 1.715578290683505e-05, "loss": 0.628, "step": 13062 }, { "epoch": 2.1324027590710584, "grad_norm": 3.0939836502075195, "learning_rate": 1.7155335034026568e-05, "loss": 0.7344, "step": 13063 }, { "epoch": 2.132566017713563, "grad_norm": 2.781325101852417, "learning_rate": 1.7154887131805206e-05, "loss": 0.5538, "step": 13064 }, { "epoch": 2.1327292763560672, "grad_norm": 2.832670211791992, "learning_rate": 1.7154439200172804e-05, "loss": 0.5414, "step": 13065 }, { "epoch": 2.1328925349985717, "grad_norm": 2.794593334197998, "learning_rate": 1.71539912391312e-05, "loss": 0.5536, "step": 13066 }, { "epoch": 2.1330557936410757, "grad_norm": 2.648648262023926, "learning_rate": 1.7153543248682245e-05, "loss": 0.5425, "step": 13067 }, { "epoch": 2.13321905228358, "grad_norm": 2.473074197769165, "learning_rate": 1.7153095228827773e-05, "loss": 0.5573, "step": 13068 }, { "epoch": 2.1333823109260845, "grad_norm": 2.869717836380005, "learning_rate": 1.7152647179569627e-05, "loss": 0.5677, "step": 13069 }, { "epoch": 2.133545569568589, "grad_norm": 2.548490524291992, "learning_rate": 1.715219910090965e-05, "loss": 0.5611, "step": 13070 }, { "epoch": 2.1337088282110934, "grad_norm": 2.953206777572632, "learning_rate": 1.7151750992849683e-05, "loss": 0.7007, "step": 13071 }, { "epoch": 2.133872086853598, "grad_norm": 3.2518410682678223, "learning_rate": 1.715130285539157e-05, "loss": 0.6395, "step": 13072 }, { "epoch": 2.1340353454961023, "grad_norm": 3.0518174171447754, "learning_rate": 1.7150854688537145e-05, "loss": 0.623, "step": 13073 }, { "epoch": 2.1341986041386067, "grad_norm": 3.1377463340759277, "learning_rate": 1.7150406492288262e-05, "loss": 0.6365, "step": 13074 }, { "epoch": 2.134361862781111, "grad_norm": 3.089094638824463, "learning_rate": 1.7149958266646756e-05, "loss": 0.662, "step": 13075 }, { "epoch": 2.134525121423615, "grad_norm": 2.7523956298828125, "learning_rate": 1.714951001161447e-05, "loss": 0.5737, "step": 13076 }, { "epoch": 2.1346883800661196, "grad_norm": 3.2727065086364746, "learning_rate": 1.714906172719325e-05, "loss": 0.5736, "step": 13077 }, { "epoch": 2.134851638708624, "grad_norm": 2.679886817932129, "learning_rate": 1.7148613413384936e-05, "loss": 0.5293, "step": 13078 }, { "epoch": 2.1350148973511285, "grad_norm": 3.426436424255371, "learning_rate": 1.714816507019137e-05, "loss": 0.5605, "step": 13079 }, { "epoch": 2.135178155993633, "grad_norm": 2.766242504119873, "learning_rate": 1.71477166976144e-05, "loss": 0.5166, "step": 13080 }, { "epoch": 2.1353414146361374, "grad_norm": 2.863647222518921, "learning_rate": 1.7147268295655864e-05, "loss": 0.6013, "step": 13081 }, { "epoch": 2.135504673278642, "grad_norm": 3.204484701156616, "learning_rate": 1.7146819864317605e-05, "loss": 0.6346, "step": 13082 }, { "epoch": 2.1356679319211462, "grad_norm": 2.977943181991577, "learning_rate": 1.7146371403601472e-05, "loss": 0.6862, "step": 13083 }, { "epoch": 2.1358311905636507, "grad_norm": 2.9612374305725098, "learning_rate": 1.7145922913509304e-05, "loss": 0.6741, "step": 13084 }, { "epoch": 2.1359944492061547, "grad_norm": 3.109053134918213, "learning_rate": 1.714547439404294e-05, "loss": 0.7392, "step": 13085 }, { "epoch": 2.136157707848659, "grad_norm": 2.7903733253479004, "learning_rate": 1.714502584520423e-05, "loss": 0.6028, "step": 13086 }, { "epoch": 2.1363209664911635, "grad_norm": 2.682581663131714, "learning_rate": 1.7144577266995023e-05, "loss": 0.6039, "step": 13087 }, { "epoch": 2.136484225133668, "grad_norm": 3.2743980884552, "learning_rate": 1.7144128659417152e-05, "loss": 0.729, "step": 13088 }, { "epoch": 2.1366474837761724, "grad_norm": 2.489755630493164, "learning_rate": 1.7143680022472467e-05, "loss": 0.5336, "step": 13089 }, { "epoch": 2.136810742418677, "grad_norm": 2.9348247051239014, "learning_rate": 1.714323135616281e-05, "loss": 0.614, "step": 13090 }, { "epoch": 2.1369740010611813, "grad_norm": 3.361407995223999, "learning_rate": 1.7142782660490026e-05, "loss": 0.6324, "step": 13091 }, { "epoch": 2.1371372597036857, "grad_norm": 2.2916533946990967, "learning_rate": 1.714233393545596e-05, "loss": 0.5093, "step": 13092 }, { "epoch": 2.1373005183461897, "grad_norm": 3.1401302814483643, "learning_rate": 1.7141885181062456e-05, "loss": 0.6223, "step": 13093 }, { "epoch": 2.137463776988694, "grad_norm": 2.405374050140381, "learning_rate": 1.7141436397311358e-05, "loss": 0.4655, "step": 13094 }, { "epoch": 2.1376270356311986, "grad_norm": 3.2795891761779785, "learning_rate": 1.7140987584204514e-05, "loss": 0.6312, "step": 13095 }, { "epoch": 2.137790294273703, "grad_norm": 2.7685513496398926, "learning_rate": 1.7140538741743764e-05, "loss": 0.5506, "step": 13096 }, { "epoch": 2.1379535529162075, "grad_norm": 3.3213071823120117, "learning_rate": 1.7140089869930957e-05, "loss": 0.5791, "step": 13097 }, { "epoch": 2.138116811558712, "grad_norm": 3.069965124130249, "learning_rate": 1.7139640968767933e-05, "loss": 0.6118, "step": 13098 }, { "epoch": 2.1382800702012164, "grad_norm": 2.868027448654175, "learning_rate": 1.7139192038256546e-05, "loss": 0.6301, "step": 13099 }, { "epoch": 2.138443328843721, "grad_norm": 2.7568626403808594, "learning_rate": 1.7138743078398632e-05, "loss": 0.5679, "step": 13100 }, { "epoch": 2.1386065874862252, "grad_norm": 3.1795358657836914, "learning_rate": 1.713829408919604e-05, "loss": 0.6481, "step": 13101 }, { "epoch": 2.1387698461287297, "grad_norm": 2.5528616905212402, "learning_rate": 1.7137845070650622e-05, "loss": 0.5233, "step": 13102 }, { "epoch": 2.1389331047712337, "grad_norm": 2.566093921661377, "learning_rate": 1.7137396022764216e-05, "loss": 0.5563, "step": 13103 }, { "epoch": 2.139096363413738, "grad_norm": 2.808198928833008, "learning_rate": 1.713694694553867e-05, "loss": 0.556, "step": 13104 }, { "epoch": 2.1392596220562425, "grad_norm": 3.3445615768432617, "learning_rate": 1.7136497838975827e-05, "loss": 0.6463, "step": 13105 }, { "epoch": 2.139422880698747, "grad_norm": 2.6309595108032227, "learning_rate": 1.7136048703077537e-05, "loss": 0.5229, "step": 13106 }, { "epoch": 2.1395861393412514, "grad_norm": 2.384093761444092, "learning_rate": 1.713559953784565e-05, "loss": 0.4878, "step": 13107 }, { "epoch": 2.139749397983756, "grad_norm": 2.8388495445251465, "learning_rate": 1.7135150343282e-05, "loss": 0.4996, "step": 13108 }, { "epoch": 2.1399126566262603, "grad_norm": 2.9571335315704346, "learning_rate": 1.7134701119388447e-05, "loss": 0.6242, "step": 13109 }, { "epoch": 2.1400759152687647, "grad_norm": 2.7773492336273193, "learning_rate": 1.713425186616683e-05, "loss": 0.5403, "step": 13110 }, { "epoch": 2.1402391739112687, "grad_norm": 3.0758533477783203, "learning_rate": 1.7133802583618996e-05, "loss": 0.65, "step": 13111 }, { "epoch": 2.140402432553773, "grad_norm": 2.5343875885009766, "learning_rate": 1.7133353271746796e-05, "loss": 0.5486, "step": 13112 }, { "epoch": 2.1405656911962776, "grad_norm": 3.0750186443328857, "learning_rate": 1.713290393055207e-05, "loss": 0.6065, "step": 13113 }, { "epoch": 2.140728949838782, "grad_norm": 2.215026617050171, "learning_rate": 1.7132454560036673e-05, "loss": 0.455, "step": 13114 }, { "epoch": 2.1408922084812865, "grad_norm": 2.7296226024627686, "learning_rate": 1.713200516020245e-05, "loss": 0.5276, "step": 13115 }, { "epoch": 2.141055467123791, "grad_norm": 3.1549789905548096, "learning_rate": 1.7131555731051244e-05, "loss": 0.5988, "step": 13116 }, { "epoch": 2.1412187257662953, "grad_norm": 2.734663486480713, "learning_rate": 1.7131106272584905e-05, "loss": 0.5482, "step": 13117 }, { "epoch": 2.1413819844088, "grad_norm": 2.8247241973876953, "learning_rate": 1.713065678480528e-05, "loss": 0.5931, "step": 13118 }, { "epoch": 2.1415452430513042, "grad_norm": 2.7957956790924072, "learning_rate": 1.713020726771422e-05, "loss": 0.56, "step": 13119 }, { "epoch": 2.141708501693808, "grad_norm": 2.849489450454712, "learning_rate": 1.712975772131357e-05, "loss": 0.6168, "step": 13120 }, { "epoch": 2.1418717603363127, "grad_norm": 2.4228227138519287, "learning_rate": 1.7129308145605174e-05, "loss": 0.4448, "step": 13121 }, { "epoch": 2.142035018978817, "grad_norm": 3.8135457038879395, "learning_rate": 1.712885854059089e-05, "loss": 0.6855, "step": 13122 }, { "epoch": 2.1421982776213215, "grad_norm": 2.714707612991333, "learning_rate": 1.7128408906272557e-05, "loss": 0.5375, "step": 13123 }, { "epoch": 2.142361536263826, "grad_norm": 2.878091812133789, "learning_rate": 1.7127959242652026e-05, "loss": 0.531, "step": 13124 }, { "epoch": 2.1425247949063304, "grad_norm": 3.3860297203063965, "learning_rate": 1.712750954973115e-05, "loss": 0.5657, "step": 13125 }, { "epoch": 2.142688053548835, "grad_norm": 2.6279618740081787, "learning_rate": 1.7127059827511767e-05, "loss": 0.5365, "step": 13126 }, { "epoch": 2.1428513121913393, "grad_norm": 3.0128443241119385, "learning_rate": 1.712661007599574e-05, "loss": 0.5281, "step": 13127 }, { "epoch": 2.1430145708338437, "grad_norm": 2.836092948913574, "learning_rate": 1.7126160295184903e-05, "loss": 0.6464, "step": 13128 }, { "epoch": 2.1431778294763477, "grad_norm": 2.9027042388916016, "learning_rate": 1.7125710485081114e-05, "loss": 0.5806, "step": 13129 }, { "epoch": 2.143341088118852, "grad_norm": 3.1385490894317627, "learning_rate": 1.712526064568622e-05, "loss": 0.678, "step": 13130 }, { "epoch": 2.1435043467613566, "grad_norm": 3.202087163925171, "learning_rate": 1.7124810777002072e-05, "loss": 0.5799, "step": 13131 }, { "epoch": 2.143667605403861, "grad_norm": 2.540536880493164, "learning_rate": 1.7124360879030517e-05, "loss": 0.5227, "step": 13132 }, { "epoch": 2.1438308640463655, "grad_norm": 2.801518678665161, "learning_rate": 1.7123910951773404e-05, "loss": 0.6394, "step": 13133 }, { "epoch": 2.14399412268887, "grad_norm": 3.3242735862731934, "learning_rate": 1.7123460995232583e-05, "loss": 0.6861, "step": 13134 }, { "epoch": 2.1441573813313743, "grad_norm": 2.4723970890045166, "learning_rate": 1.71230110094099e-05, "loss": 0.4461, "step": 13135 }, { "epoch": 2.144320639973879, "grad_norm": 3.4242634773254395, "learning_rate": 1.712256099430721e-05, "loss": 0.7321, "step": 13136 }, { "epoch": 2.144483898616383, "grad_norm": 2.3643102645874023, "learning_rate": 1.7122110949926363e-05, "loss": 0.4339, "step": 13137 }, { "epoch": 2.144647157258887, "grad_norm": 2.757450819015503, "learning_rate": 1.7121660876269207e-05, "loss": 0.5251, "step": 13138 }, { "epoch": 2.1448104159013917, "grad_norm": 3.1030161380767822, "learning_rate": 1.7121210773337593e-05, "loss": 0.6526, "step": 13139 }, { "epoch": 2.144973674543896, "grad_norm": 2.9487931728363037, "learning_rate": 1.712076064113337e-05, "loss": 0.5669, "step": 13140 }, { "epoch": 2.1451369331864005, "grad_norm": 2.2878167629241943, "learning_rate": 1.7120310479658387e-05, "loss": 0.5564, "step": 13141 }, { "epoch": 2.145300191828905, "grad_norm": 3.63090443611145, "learning_rate": 1.7119860288914498e-05, "loss": 0.7312, "step": 13142 }, { "epoch": 2.1454634504714094, "grad_norm": 3.1977062225341797, "learning_rate": 1.711941006890355e-05, "loss": 0.6627, "step": 13143 }, { "epoch": 2.145626709113914, "grad_norm": 2.6647422313690186, "learning_rate": 1.7118959819627397e-05, "loss": 0.4711, "step": 13144 }, { "epoch": 2.1457899677564183, "grad_norm": 2.583312511444092, "learning_rate": 1.7118509541087886e-05, "loss": 0.5667, "step": 13145 }, { "epoch": 2.1459532263989223, "grad_norm": 3.2161927223205566, "learning_rate": 1.7118059233286876e-05, "loss": 0.6037, "step": 13146 }, { "epoch": 2.1461164850414267, "grad_norm": 2.9096429347991943, "learning_rate": 1.7117608896226204e-05, "loss": 0.5291, "step": 13147 }, { "epoch": 2.146279743683931, "grad_norm": 2.8973309993743896, "learning_rate": 1.7117158529907736e-05, "loss": 0.5226, "step": 13148 }, { "epoch": 2.1464430023264356, "grad_norm": 3.421848773956299, "learning_rate": 1.7116708134333313e-05, "loss": 0.6614, "step": 13149 }, { "epoch": 2.14660626096894, "grad_norm": 3.0140163898468018, "learning_rate": 1.7116257709504795e-05, "loss": 0.5827, "step": 13150 }, { "epoch": 2.1467695196114445, "grad_norm": 2.923762559890747, "learning_rate": 1.711580725542402e-05, "loss": 0.7176, "step": 13151 }, { "epoch": 2.146932778253949, "grad_norm": 2.26840877532959, "learning_rate": 1.7115356772092858e-05, "loss": 0.4667, "step": 13152 }, { "epoch": 2.1470960368964533, "grad_norm": 2.2137207984924316, "learning_rate": 1.7114906259513145e-05, "loss": 0.4833, "step": 13153 }, { "epoch": 2.147259295538958, "grad_norm": 3.136054754257202, "learning_rate": 1.7114455717686737e-05, "loss": 0.6994, "step": 13154 }, { "epoch": 2.1474225541814618, "grad_norm": 1.9402189254760742, "learning_rate": 1.7114005146615494e-05, "loss": 0.4145, "step": 13155 }, { "epoch": 2.147585812823966, "grad_norm": 3.919492721557617, "learning_rate": 1.7113554546301257e-05, "loss": 0.6534, "step": 13156 }, { "epoch": 2.1477490714664706, "grad_norm": 2.623941659927368, "learning_rate": 1.7113103916745885e-05, "loss": 0.507, "step": 13157 }, { "epoch": 2.147912330108975, "grad_norm": 2.7726097106933594, "learning_rate": 1.711265325795123e-05, "loss": 0.5921, "step": 13158 }, { "epoch": 2.1480755887514795, "grad_norm": 2.960232734680176, "learning_rate": 1.711220256991914e-05, "loss": 0.6356, "step": 13159 }, { "epoch": 2.148238847393984, "grad_norm": 3.0871715545654297, "learning_rate": 1.7111751852651476e-05, "loss": 0.603, "step": 13160 }, { "epoch": 2.1484021060364884, "grad_norm": 2.5562829971313477, "learning_rate": 1.7111301106150084e-05, "loss": 0.4732, "step": 13161 }, { "epoch": 2.148565364678993, "grad_norm": 3.224040985107422, "learning_rate": 1.711085033041681e-05, "loss": 0.5826, "step": 13162 }, { "epoch": 2.1487286233214973, "grad_norm": 2.446830987930298, "learning_rate": 1.7110399525453526e-05, "loss": 0.4432, "step": 13163 }, { "epoch": 2.1488918819640013, "grad_norm": 2.7175850868225098, "learning_rate": 1.7109948691262072e-05, "loss": 0.5855, "step": 13164 }, { "epoch": 2.1490551406065057, "grad_norm": 2.546678304672241, "learning_rate": 1.71094978278443e-05, "loss": 0.571, "step": 13165 }, { "epoch": 2.14921839924901, "grad_norm": 3.088312864303589, "learning_rate": 1.7109046935202068e-05, "loss": 0.5946, "step": 13166 }, { "epoch": 2.1493816578915146, "grad_norm": 2.7086730003356934, "learning_rate": 1.710859601333723e-05, "loss": 0.5417, "step": 13167 }, { "epoch": 2.149544916534019, "grad_norm": 2.895498037338257, "learning_rate": 1.710814506225164e-05, "loss": 0.6238, "step": 13168 }, { "epoch": 2.1497081751765235, "grad_norm": 2.955596923828125, "learning_rate": 1.7107694081947147e-05, "loss": 0.6294, "step": 13169 }, { "epoch": 2.149871433819028, "grad_norm": 3.4399633407592773, "learning_rate": 1.7107243072425602e-05, "loss": 0.6846, "step": 13170 }, { "epoch": 2.1500346924615323, "grad_norm": 2.6995251178741455, "learning_rate": 1.7106792033688874e-05, "loss": 0.5601, "step": 13171 }, { "epoch": 2.1501979511040368, "grad_norm": 2.621718168258667, "learning_rate": 1.7106340965738803e-05, "loss": 0.4793, "step": 13172 }, { "epoch": 2.1503612097465408, "grad_norm": 3.111351728439331, "learning_rate": 1.7105889868577245e-05, "loss": 0.6199, "step": 13173 }, { "epoch": 2.150524468389045, "grad_norm": 2.715630054473877, "learning_rate": 1.7105438742206062e-05, "loss": 0.6011, "step": 13174 }, { "epoch": 2.1506877270315496, "grad_norm": 2.9107654094696045, "learning_rate": 1.71049875866271e-05, "loss": 0.5658, "step": 13175 }, { "epoch": 2.150850985674054, "grad_norm": 3.3071084022521973, "learning_rate": 1.710453640184222e-05, "loss": 0.6906, "step": 13176 }, { "epoch": 2.1510142443165585, "grad_norm": 3.0520074367523193, "learning_rate": 1.710408518785327e-05, "loss": 0.641, "step": 13177 }, { "epoch": 2.151177502959063, "grad_norm": 2.8636205196380615, "learning_rate": 1.710363394466211e-05, "loss": 0.638, "step": 13178 }, { "epoch": 2.1513407616015674, "grad_norm": 2.703197956085205, "learning_rate": 1.7103182672270592e-05, "loss": 0.494, "step": 13179 }, { "epoch": 2.151504020244072, "grad_norm": 3.6369638442993164, "learning_rate": 1.710273137068057e-05, "loss": 0.757, "step": 13180 }, { "epoch": 2.151667278886576, "grad_norm": 3.059213876724243, "learning_rate": 1.7102280039893906e-05, "loss": 0.6291, "step": 13181 }, { "epoch": 2.1518305375290803, "grad_norm": 3.2025578022003174, "learning_rate": 1.7101828679912448e-05, "loss": 0.6171, "step": 13182 }, { "epoch": 2.1519937961715847, "grad_norm": 2.9794583320617676, "learning_rate": 1.7101377290738054e-05, "loss": 0.5627, "step": 13183 }, { "epoch": 2.152157054814089, "grad_norm": 2.6855857372283936, "learning_rate": 1.710092587237258e-05, "loss": 0.665, "step": 13184 }, { "epoch": 2.1523203134565936, "grad_norm": 2.6666440963745117, "learning_rate": 1.710047442481788e-05, "loss": 0.6738, "step": 13185 }, { "epoch": 2.152483572099098, "grad_norm": 2.9547932147979736, "learning_rate": 1.7100022948075815e-05, "loss": 0.5908, "step": 13186 }, { "epoch": 2.1526468307416025, "grad_norm": 2.3790807723999023, "learning_rate": 1.709957144214823e-05, "loss": 0.5014, "step": 13187 }, { "epoch": 2.152810089384107, "grad_norm": 3.34116530418396, "learning_rate": 1.709911990703699e-05, "loss": 0.5818, "step": 13188 }, { "epoch": 2.1529733480266113, "grad_norm": 3.0051217079162598, "learning_rate": 1.7098668342743952e-05, "loss": 0.6012, "step": 13189 }, { "epoch": 2.1531366066691158, "grad_norm": 3.3804538249969482, "learning_rate": 1.7098216749270965e-05, "loss": 0.6457, "step": 13190 }, { "epoch": 2.1532998653116198, "grad_norm": 3.4339194297790527, "learning_rate": 1.709776512661989e-05, "loss": 0.7163, "step": 13191 }, { "epoch": 2.153463123954124, "grad_norm": 2.882957935333252, "learning_rate": 1.7097313474792584e-05, "loss": 0.6461, "step": 13192 }, { "epoch": 2.1536263825966286, "grad_norm": 3.9510860443115234, "learning_rate": 1.7096861793790905e-05, "loss": 0.746, "step": 13193 }, { "epoch": 2.153789641239133, "grad_norm": 3.391867160797119, "learning_rate": 1.7096410083616702e-05, "loss": 0.6212, "step": 13194 }, { "epoch": 2.1539528998816375, "grad_norm": 3.0655314922332764, "learning_rate": 1.7095958344271837e-05, "loss": 0.5265, "step": 13195 }, { "epoch": 2.154116158524142, "grad_norm": 2.8466129302978516, "learning_rate": 1.7095506575758166e-05, "loss": 0.5384, "step": 13196 }, { "epoch": 2.1542794171666464, "grad_norm": 2.6500113010406494, "learning_rate": 1.709505477807755e-05, "loss": 0.5339, "step": 13197 }, { "epoch": 2.154442675809151, "grad_norm": 2.6431000232696533, "learning_rate": 1.7094602951231843e-05, "loss": 0.589, "step": 13198 }, { "epoch": 2.154605934451655, "grad_norm": 2.6736669540405273, "learning_rate": 1.70941510952229e-05, "loss": 0.5742, "step": 13199 }, { "epoch": 2.1547691930941593, "grad_norm": 2.9318301677703857, "learning_rate": 1.709369921005258e-05, "loss": 0.5268, "step": 13200 }, { "epoch": 2.1549324517366637, "grad_norm": 2.5982351303100586, "learning_rate": 1.709324729572274e-05, "loss": 0.5275, "step": 13201 }, { "epoch": 2.155095710379168, "grad_norm": 2.6354026794433594, "learning_rate": 1.7092795352235242e-05, "loss": 0.5184, "step": 13202 }, { "epoch": 2.1552589690216726, "grad_norm": 2.7642858028411865, "learning_rate": 1.7092343379591937e-05, "loss": 0.559, "step": 13203 }, { "epoch": 2.155422227664177, "grad_norm": 2.7108314037323, "learning_rate": 1.709189137779469e-05, "loss": 0.5396, "step": 13204 }, { "epoch": 2.1555854863066815, "grad_norm": 2.587405204772949, "learning_rate": 1.7091439346845352e-05, "loss": 0.5553, "step": 13205 }, { "epoch": 2.155748744949186, "grad_norm": 2.803762197494507, "learning_rate": 1.709098728674579e-05, "loss": 0.5355, "step": 13206 }, { "epoch": 2.1559120035916903, "grad_norm": 2.8144190311431885, "learning_rate": 1.7090535197497852e-05, "loss": 0.5748, "step": 13207 }, { "epoch": 2.1560752622341943, "grad_norm": 3.0110509395599365, "learning_rate": 1.7090083079103402e-05, "loss": 0.588, "step": 13208 }, { "epoch": 2.1562385208766988, "grad_norm": 3.113881826400757, "learning_rate": 1.7089630931564296e-05, "loss": 0.6616, "step": 13209 }, { "epoch": 2.156401779519203, "grad_norm": 2.7067580223083496, "learning_rate": 1.70891787548824e-05, "loss": 0.5906, "step": 13210 }, { "epoch": 2.1565650381617076, "grad_norm": 3.4508020877838135, "learning_rate": 1.7088726549059563e-05, "loss": 0.709, "step": 13211 }, { "epoch": 2.156728296804212, "grad_norm": 2.603447675704956, "learning_rate": 1.7088274314097644e-05, "loss": 0.6261, "step": 13212 }, { "epoch": 2.1568915554467165, "grad_norm": 3.4145143032073975, "learning_rate": 1.708782204999851e-05, "loss": 0.6132, "step": 13213 }, { "epoch": 2.157054814089221, "grad_norm": 3.1056125164031982, "learning_rate": 1.7087369756764016e-05, "loss": 0.6547, "step": 13214 }, { "epoch": 2.1572180727317254, "grad_norm": 2.9975924491882324, "learning_rate": 1.7086917434396023e-05, "loss": 0.581, "step": 13215 }, { "epoch": 2.15738133137423, "grad_norm": 3.4130513668060303, "learning_rate": 1.7086465082896384e-05, "loss": 0.6375, "step": 13216 }, { "epoch": 2.157544590016734, "grad_norm": 2.76921010017395, "learning_rate": 1.7086012702266965e-05, "loss": 0.5392, "step": 13217 }, { "epoch": 2.1577078486592383, "grad_norm": 3.0816686153411865, "learning_rate": 1.7085560292509623e-05, "loss": 0.5847, "step": 13218 }, { "epoch": 2.1578711073017427, "grad_norm": 3.253441095352173, "learning_rate": 1.708510785362622e-05, "loss": 0.5977, "step": 13219 }, { "epoch": 2.158034365944247, "grad_norm": 2.6251251697540283, "learning_rate": 1.7084655385618613e-05, "loss": 0.4888, "step": 13220 }, { "epoch": 2.1581976245867516, "grad_norm": 3.352052927017212, "learning_rate": 1.708420288848866e-05, "loss": 0.7799, "step": 13221 }, { "epoch": 2.158360883229256, "grad_norm": 2.590256690979004, "learning_rate": 1.7083750362238228e-05, "loss": 0.4635, "step": 13222 }, { "epoch": 2.1585241418717604, "grad_norm": 2.7932980060577393, "learning_rate": 1.708329780686917e-05, "loss": 0.5464, "step": 13223 }, { "epoch": 2.158687400514265, "grad_norm": 2.9252381324768066, "learning_rate": 1.7082845222383352e-05, "loss": 0.5965, "step": 13224 }, { "epoch": 2.1588506591567693, "grad_norm": 3.4859471321105957, "learning_rate": 1.708239260878263e-05, "loss": 0.5538, "step": 13225 }, { "epoch": 2.1590139177992733, "grad_norm": 2.99135160446167, "learning_rate": 1.7081939966068866e-05, "loss": 0.5948, "step": 13226 }, { "epoch": 2.1591771764417778, "grad_norm": 3.158679246902466, "learning_rate": 1.708148729424392e-05, "loss": 0.6715, "step": 13227 }, { "epoch": 2.159340435084282, "grad_norm": 2.6702873706817627, "learning_rate": 1.7081034593309657e-05, "loss": 0.6098, "step": 13228 }, { "epoch": 2.1595036937267866, "grad_norm": 2.5206515789031982, "learning_rate": 1.708058186326793e-05, "loss": 0.4804, "step": 13229 }, { "epoch": 2.159666952369291, "grad_norm": 2.282064914703369, "learning_rate": 1.708012910412061e-05, "loss": 0.4535, "step": 13230 }, { "epoch": 2.1598302110117955, "grad_norm": 3.279287815093994, "learning_rate": 1.707967631586955e-05, "loss": 0.617, "step": 13231 }, { "epoch": 2.1599934696543, "grad_norm": 2.479464292526245, "learning_rate": 1.707922349851661e-05, "loss": 0.5866, "step": 13232 }, { "epoch": 2.1601567282968044, "grad_norm": 2.309208631515503, "learning_rate": 1.707877065206366e-05, "loss": 0.4333, "step": 13233 }, { "epoch": 2.1603199869393084, "grad_norm": 2.596971273422241, "learning_rate": 1.7078317776512557e-05, "loss": 0.5417, "step": 13234 }, { "epoch": 2.160483245581813, "grad_norm": 2.409395456314087, "learning_rate": 1.707786487186516e-05, "loss": 0.5508, "step": 13235 }, { "epoch": 2.1606465042243173, "grad_norm": 2.7434542179107666, "learning_rate": 1.7077411938123335e-05, "loss": 0.5463, "step": 13236 }, { "epoch": 2.1608097628668217, "grad_norm": 2.7343952655792236, "learning_rate": 1.7076958975288943e-05, "loss": 0.5171, "step": 13237 }, { "epoch": 2.160973021509326, "grad_norm": 2.987072229385376, "learning_rate": 1.707650598336384e-05, "loss": 0.6568, "step": 13238 }, { "epoch": 2.1611362801518306, "grad_norm": 2.738960027694702, "learning_rate": 1.7076052962349896e-05, "loss": 0.5683, "step": 13239 }, { "epoch": 2.161299538794335, "grad_norm": 2.6409552097320557, "learning_rate": 1.7075599912248972e-05, "loss": 0.5346, "step": 13240 }, { "epoch": 2.1614627974368394, "grad_norm": 3.061189889907837, "learning_rate": 1.7075146833062922e-05, "loss": 0.628, "step": 13241 }, { "epoch": 2.161626056079344, "grad_norm": 2.9241859912872314, "learning_rate": 1.7074693724793623e-05, "loss": 0.682, "step": 13242 }, { "epoch": 2.1617893147218483, "grad_norm": 2.9266650676727295, "learning_rate": 1.7074240587442925e-05, "loss": 0.5553, "step": 13243 }, { "epoch": 2.1619525733643523, "grad_norm": 2.8913488388061523, "learning_rate": 1.70737874210127e-05, "loss": 0.6549, "step": 13244 }, { "epoch": 2.1621158320068568, "grad_norm": 3.202084541320801, "learning_rate": 1.70733342255048e-05, "loss": 0.6593, "step": 13245 }, { "epoch": 2.162279090649361, "grad_norm": 2.7983720302581787, "learning_rate": 1.7072881000921093e-05, "loss": 0.6044, "step": 13246 }, { "epoch": 2.1624423492918656, "grad_norm": 2.8469605445861816, "learning_rate": 1.7072427747263444e-05, "loss": 0.5458, "step": 13247 }, { "epoch": 2.16260560793437, "grad_norm": 2.7686431407928467, "learning_rate": 1.707197446453372e-05, "loss": 0.6415, "step": 13248 }, { "epoch": 2.1627688665768745, "grad_norm": 3.3106212615966797, "learning_rate": 1.7071521152733774e-05, "loss": 0.6657, "step": 13249 }, { "epoch": 2.162932125219379, "grad_norm": 2.961059808731079, "learning_rate": 1.7071067811865477e-05, "loss": 0.558, "step": 13250 }, { "epoch": 2.1630953838618834, "grad_norm": 2.81998610496521, "learning_rate": 1.707061444193069e-05, "loss": 0.5197, "step": 13251 }, { "epoch": 2.1632586425043874, "grad_norm": 3.2154502868652344, "learning_rate": 1.7070161042931273e-05, "loss": 0.5548, "step": 13252 }, { "epoch": 2.163421901146892, "grad_norm": 2.7166244983673096, "learning_rate": 1.7069707614869103e-05, "loss": 0.5607, "step": 13253 }, { "epoch": 2.1635851597893963, "grad_norm": 2.605905771255493, "learning_rate": 1.7069254157746026e-05, "loss": 0.4972, "step": 13254 }, { "epoch": 2.1637484184319007, "grad_norm": 3.501430034637451, "learning_rate": 1.7068800671563918e-05, "loss": 0.6228, "step": 13255 }, { "epoch": 2.163911677074405, "grad_norm": 3.3829903602600098, "learning_rate": 1.706834715632464e-05, "loss": 0.6476, "step": 13256 }, { "epoch": 2.1640749357169096, "grad_norm": 3.038691997528076, "learning_rate": 1.7067893612030053e-05, "loss": 0.7292, "step": 13257 }, { "epoch": 2.164238194359414, "grad_norm": 3.2640159130096436, "learning_rate": 1.7067440038682028e-05, "loss": 1.1219, "step": 13258 }, { "epoch": 2.1644014530019184, "grad_norm": 2.8600456714630127, "learning_rate": 1.7066986436282422e-05, "loss": 0.6584, "step": 13259 }, { "epoch": 2.164564711644423, "grad_norm": 2.936385154724121, "learning_rate": 1.7066532804833103e-05, "loss": 0.5826, "step": 13260 }, { "epoch": 2.164727970286927, "grad_norm": 3.485377073287964, "learning_rate": 1.7066079144335937e-05, "loss": 0.6351, "step": 13261 }, { "epoch": 2.1648912289294313, "grad_norm": 2.7991232872009277, "learning_rate": 1.706562545479279e-05, "loss": 0.5816, "step": 13262 }, { "epoch": 2.1650544875719357, "grad_norm": 3.427140235900879, "learning_rate": 1.706517173620552e-05, "loss": 0.6523, "step": 13263 }, { "epoch": 2.16521774621444, "grad_norm": 2.3451366424560547, "learning_rate": 1.7064717988576004e-05, "loss": 0.5039, "step": 13264 }, { "epoch": 2.1653810048569446, "grad_norm": 2.8454840183258057, "learning_rate": 1.706426421190609e-05, "loss": 0.5756, "step": 13265 }, { "epoch": 2.165544263499449, "grad_norm": 2.396172046661377, "learning_rate": 1.7063810406197663e-05, "loss": 0.4825, "step": 13266 }, { "epoch": 2.1657075221419535, "grad_norm": 2.654219150543213, "learning_rate": 1.7063356571452573e-05, "loss": 0.5222, "step": 13267 }, { "epoch": 2.165870780784458, "grad_norm": 2.807271957397461, "learning_rate": 1.7062902707672694e-05, "loss": 0.5269, "step": 13268 }, { "epoch": 2.166034039426962, "grad_norm": 2.9642748832702637, "learning_rate": 1.7062448814859887e-05, "loss": 0.632, "step": 13269 }, { "epoch": 2.1661972980694664, "grad_norm": 2.5225346088409424, "learning_rate": 1.7061994893016022e-05, "loss": 0.4513, "step": 13270 }, { "epoch": 2.166360556711971, "grad_norm": 1.8554539680480957, "learning_rate": 1.706154094214296e-05, "loss": 0.4054, "step": 13271 }, { "epoch": 2.1665238153544752, "grad_norm": 2.7288575172424316, "learning_rate": 1.706108696224257e-05, "loss": 0.6365, "step": 13272 }, { "epoch": 2.1666870739969797, "grad_norm": 3.0791094303131104, "learning_rate": 1.706063295331672e-05, "loss": 0.5957, "step": 13273 }, { "epoch": 2.166850332639484, "grad_norm": 2.7923173904418945, "learning_rate": 1.706017891536727e-05, "loss": 0.5235, "step": 13274 }, { "epoch": 2.1670135912819886, "grad_norm": 2.1536498069763184, "learning_rate": 1.7059724848396092e-05, "loss": 0.4835, "step": 13275 }, { "epoch": 2.167176849924493, "grad_norm": 3.3017101287841797, "learning_rate": 1.705927075240505e-05, "loss": 0.9932, "step": 13276 }, { "epoch": 2.1673401085669974, "grad_norm": 2.827683448791504, "learning_rate": 1.7058816627396017e-05, "loss": 0.5402, "step": 13277 }, { "epoch": 2.167503367209502, "grad_norm": 2.653902769088745, "learning_rate": 1.705836247337085e-05, "loss": 0.5582, "step": 13278 }, { "epoch": 2.167666625852006, "grad_norm": 2.7424938678741455, "learning_rate": 1.705790829033142e-05, "loss": 0.5189, "step": 13279 }, { "epoch": 2.1678298844945103, "grad_norm": 3.093827486038208, "learning_rate": 1.705745407827959e-05, "loss": 0.6158, "step": 13280 }, { "epoch": 2.1679931431370147, "grad_norm": 2.5608842372894287, "learning_rate": 1.7056999837217235e-05, "loss": 0.5005, "step": 13281 }, { "epoch": 2.168156401779519, "grad_norm": 2.793029308319092, "learning_rate": 1.7056545567146222e-05, "loss": 0.673, "step": 13282 }, { "epoch": 2.1683196604220236, "grad_norm": 2.959167003631592, "learning_rate": 1.705609126806841e-05, "loss": 0.5863, "step": 13283 }, { "epoch": 2.168482919064528, "grad_norm": 2.691140651702881, "learning_rate": 1.7055636939985674e-05, "loss": 0.5971, "step": 13284 }, { "epoch": 2.1686461777070325, "grad_norm": 2.7411913871765137, "learning_rate": 1.7055182582899875e-05, "loss": 0.4299, "step": 13285 }, { "epoch": 2.168809436349537, "grad_norm": 2.9804141521453857, "learning_rate": 1.7054728196812887e-05, "loss": 0.6427, "step": 13286 }, { "epoch": 2.168972694992041, "grad_norm": 3.0552337169647217, "learning_rate": 1.7054273781726575e-05, "loss": 0.6055, "step": 13287 }, { "epoch": 2.1691359536345454, "grad_norm": 3.3933844566345215, "learning_rate": 1.7053819337642808e-05, "loss": 0.5891, "step": 13288 }, { "epoch": 2.16929921227705, "grad_norm": 2.9136316776275635, "learning_rate": 1.705336486456345e-05, "loss": 0.4928, "step": 13289 }, { "epoch": 2.1694624709195542, "grad_norm": 2.2675626277923584, "learning_rate": 1.7052910362490377e-05, "loss": 0.4142, "step": 13290 }, { "epoch": 2.1696257295620587, "grad_norm": 2.980792760848999, "learning_rate": 1.7052455831425447e-05, "loss": 0.6351, "step": 13291 }, { "epoch": 2.169788988204563, "grad_norm": 2.299598455429077, "learning_rate": 1.705200127137054e-05, "loss": 0.4307, "step": 13292 }, { "epoch": 2.1699522468470676, "grad_norm": 3.0600829124450684, "learning_rate": 1.7051546682327513e-05, "loss": 0.5967, "step": 13293 }, { "epoch": 2.170115505489572, "grad_norm": 2.8458900451660156, "learning_rate": 1.7051092064298242e-05, "loss": 0.517, "step": 13294 }, { "epoch": 2.1702787641320764, "grad_norm": 3.3532321453094482, "learning_rate": 1.70506374172846e-05, "loss": 0.7025, "step": 13295 }, { "epoch": 2.1704420227745804, "grad_norm": 2.580601215362549, "learning_rate": 1.7050182741288438e-05, "loss": 0.5391, "step": 13296 }, { "epoch": 2.170605281417085, "grad_norm": 2.8209753036499023, "learning_rate": 1.7049728036311646e-05, "loss": 0.5668, "step": 13297 }, { "epoch": 2.1707685400595893, "grad_norm": 2.90944766998291, "learning_rate": 1.704927330235608e-05, "loss": 0.5007, "step": 13298 }, { "epoch": 2.1709317987020937, "grad_norm": 2.904244899749756, "learning_rate": 1.7048818539423616e-05, "loss": 0.555, "step": 13299 }, { "epoch": 2.171095057344598, "grad_norm": 2.6719274520874023, "learning_rate": 1.704836374751612e-05, "loss": 0.6073, "step": 13300 }, { "epoch": 2.1712583159871026, "grad_norm": 3.1070070266723633, "learning_rate": 1.704790892663546e-05, "loss": 0.595, "step": 13301 }, { "epoch": 2.171421574629607, "grad_norm": 3.2855401039123535, "learning_rate": 1.704745407678351e-05, "loss": 0.6499, "step": 13302 }, { "epoch": 2.1715848332721115, "grad_norm": 3.6903586387634277, "learning_rate": 1.704699919796214e-05, "loss": 0.7257, "step": 13303 }, { "epoch": 2.171748091914616, "grad_norm": 3.6481873989105225, "learning_rate": 1.7046544290173207e-05, "loss": 0.7991, "step": 13304 }, { "epoch": 2.17191135055712, "grad_norm": 2.934483766555786, "learning_rate": 1.70460893534186e-05, "loss": 0.6412, "step": 13305 }, { "epoch": 2.1720746091996244, "grad_norm": 3.1629745960235596, "learning_rate": 1.7045634387700175e-05, "loss": 0.5906, "step": 13306 }, { "epoch": 2.172237867842129, "grad_norm": 3.392754316329956, "learning_rate": 1.704517939301981e-05, "loss": 0.6821, "step": 13307 }, { "epoch": 2.1724011264846332, "grad_norm": 3.1888439655303955, "learning_rate": 1.7044724369379374e-05, "loss": 0.6495, "step": 13308 }, { "epoch": 2.1725643851271377, "grad_norm": 3.008425712585449, "learning_rate": 1.7044269316780738e-05, "loss": 0.5429, "step": 13309 }, { "epoch": 2.172727643769642, "grad_norm": 3.1227564811706543, "learning_rate": 1.7043814235225765e-05, "loss": 0.5398, "step": 13310 }, { "epoch": 2.1728909024121466, "grad_norm": 2.9499199390411377, "learning_rate": 1.7043359124716333e-05, "loss": 0.6186, "step": 13311 }, { "epoch": 2.173054161054651, "grad_norm": 2.890824556350708, "learning_rate": 1.7042903985254315e-05, "loss": 0.598, "step": 13312 }, { "epoch": 2.1732174196971554, "grad_norm": 2.994884729385376, "learning_rate": 1.7042448816841575e-05, "loss": 0.6653, "step": 13313 }, { "epoch": 2.1733806783396594, "grad_norm": 2.6741549968719482, "learning_rate": 1.7041993619479987e-05, "loss": 0.5625, "step": 13314 }, { "epoch": 2.173543936982164, "grad_norm": 2.97932505607605, "learning_rate": 1.7041538393171423e-05, "loss": 0.6188, "step": 13315 }, { "epoch": 2.1737071956246683, "grad_norm": 2.8171279430389404, "learning_rate": 1.7041083137917755e-05, "loss": 0.6333, "step": 13316 }, { "epoch": 2.1738704542671727, "grad_norm": 2.895601749420166, "learning_rate": 1.7040627853720853e-05, "loss": 0.5887, "step": 13317 }, { "epoch": 2.174033712909677, "grad_norm": 3.152219772338867, "learning_rate": 1.7040172540582583e-05, "loss": 0.6649, "step": 13318 }, { "epoch": 2.1741969715521816, "grad_norm": 3.067359447479248, "learning_rate": 1.7039717198504826e-05, "loss": 0.6521, "step": 13319 }, { "epoch": 2.174360230194686, "grad_norm": 2.809683322906494, "learning_rate": 1.7039261827489452e-05, "loss": 0.5737, "step": 13320 }, { "epoch": 2.1745234888371905, "grad_norm": 2.9548497200012207, "learning_rate": 1.703880642753833e-05, "loss": 0.6203, "step": 13321 }, { "epoch": 2.1746867474796945, "grad_norm": 2.9390172958374023, "learning_rate": 1.7038350998653328e-05, "loss": 0.6299, "step": 13322 }, { "epoch": 2.174850006122199, "grad_norm": 2.620199680328369, "learning_rate": 1.7037895540836325e-05, "loss": 0.5495, "step": 13323 }, { "epoch": 2.1750132647647034, "grad_norm": 2.894913673400879, "learning_rate": 1.7037440054089193e-05, "loss": 0.6226, "step": 13324 }, { "epoch": 2.175176523407208, "grad_norm": 2.824420690536499, "learning_rate": 1.70369845384138e-05, "loss": 0.5444, "step": 13325 }, { "epoch": 2.1753397820497122, "grad_norm": 2.8049588203430176, "learning_rate": 1.703652899381202e-05, "loss": 0.5157, "step": 13326 }, { "epoch": 2.1755030406922167, "grad_norm": 3.696117877960205, "learning_rate": 1.7036073420285725e-05, "loss": 0.8155, "step": 13327 }, { "epoch": 2.175666299334721, "grad_norm": 3.0665714740753174, "learning_rate": 1.7035617817836792e-05, "loss": 0.6701, "step": 13328 }, { "epoch": 2.1758295579772255, "grad_norm": 3.1305618286132812, "learning_rate": 1.703516218646709e-05, "loss": 0.6576, "step": 13329 }, { "epoch": 2.17599281661973, "grad_norm": 2.686703681945801, "learning_rate": 1.7034706526178494e-05, "loss": 0.6071, "step": 13330 }, { "epoch": 2.1761560752622344, "grad_norm": 2.2611122131347656, "learning_rate": 1.7034250836972873e-05, "loss": 0.4679, "step": 13331 }, { "epoch": 2.1763193339047384, "grad_norm": 3.1801164150238037, "learning_rate": 1.7033795118852105e-05, "loss": 0.6068, "step": 13332 }, { "epoch": 2.176482592547243, "grad_norm": 2.9036407470703125, "learning_rate": 1.7033339371818063e-05, "loss": 0.7095, "step": 13333 }, { "epoch": 2.1766458511897473, "grad_norm": 3.2633843421936035, "learning_rate": 1.7032883595872616e-05, "loss": 0.7029, "step": 13334 }, { "epoch": 2.1768091098322517, "grad_norm": 3.265347957611084, "learning_rate": 1.703242779101764e-05, "loss": 0.6839, "step": 13335 }, { "epoch": 2.176972368474756, "grad_norm": 3.4307479858398438, "learning_rate": 1.7031971957255006e-05, "loss": 1.283, "step": 13336 }, { "epoch": 2.1771356271172606, "grad_norm": 2.9776487350463867, "learning_rate": 1.7031516094586595e-05, "loss": 0.5305, "step": 13337 }, { "epoch": 2.177298885759765, "grad_norm": 2.9363815784454346, "learning_rate": 1.7031060203014273e-05, "loss": 0.5175, "step": 13338 }, { "epoch": 2.1774621444022695, "grad_norm": 2.6780846118927, "learning_rate": 1.7030604282539922e-05, "loss": 0.6055, "step": 13339 }, { "epoch": 2.1776254030447735, "grad_norm": 2.582887887954712, "learning_rate": 1.7030148333165407e-05, "loss": 0.6186, "step": 13340 }, { "epoch": 2.177788661687278, "grad_norm": 2.7789549827575684, "learning_rate": 1.702969235489261e-05, "loss": 0.6367, "step": 13341 }, { "epoch": 2.1779519203297824, "grad_norm": 2.732267141342163, "learning_rate": 1.70292363477234e-05, "loss": 0.5576, "step": 13342 }, { "epoch": 2.178115178972287, "grad_norm": 3.19101619720459, "learning_rate": 1.7028780311659655e-05, "loss": 0.5603, "step": 13343 }, { "epoch": 2.1782784376147912, "grad_norm": 2.3382551670074463, "learning_rate": 1.702832424670325e-05, "loss": 0.4658, "step": 13344 }, { "epoch": 2.1784416962572957, "grad_norm": 2.9814608097076416, "learning_rate": 1.702786815285605e-05, "loss": 0.5708, "step": 13345 }, { "epoch": 2.1786049548998, "grad_norm": 2.787196397781372, "learning_rate": 1.7027412030119945e-05, "loss": 0.5283, "step": 13346 }, { "epoch": 2.1787682135423045, "grad_norm": 3.149423122406006, "learning_rate": 1.70269558784968e-05, "loss": 0.6166, "step": 13347 }, { "epoch": 2.178931472184809, "grad_norm": 2.779938220977783, "learning_rate": 1.7026499697988496e-05, "loss": 0.5342, "step": 13348 }, { "epoch": 2.179094730827313, "grad_norm": 2.888117551803589, "learning_rate": 1.70260434885969e-05, "loss": 0.627, "step": 13349 }, { "epoch": 2.1792579894698174, "grad_norm": 2.639679431915283, "learning_rate": 1.7025587250323894e-05, "loss": 0.5037, "step": 13350 }, { "epoch": 2.179421248112322, "grad_norm": 2.474273920059204, "learning_rate": 1.702513098317135e-05, "loss": 0.5355, "step": 13351 }, { "epoch": 2.1795845067548263, "grad_norm": 3.141974449157715, "learning_rate": 1.702467468714115e-05, "loss": 0.6411, "step": 13352 }, { "epoch": 2.1797477653973307, "grad_norm": 2.9073164463043213, "learning_rate": 1.7024218362235162e-05, "loss": 0.6646, "step": 13353 }, { "epoch": 2.179911024039835, "grad_norm": 2.73479962348938, "learning_rate": 1.7023762008455263e-05, "loss": 0.5322, "step": 13354 }, { "epoch": 2.1800742826823396, "grad_norm": 2.7553069591522217, "learning_rate": 1.702330562580333e-05, "loss": 0.5035, "step": 13355 }, { "epoch": 2.180237541324844, "grad_norm": 3.342305898666382, "learning_rate": 1.7022849214281246e-05, "loss": 0.9967, "step": 13356 }, { "epoch": 2.180400799967348, "grad_norm": 2.546051025390625, "learning_rate": 1.7022392773890876e-05, "loss": 0.5463, "step": 13357 }, { "epoch": 2.1805640586098525, "grad_norm": 2.660677909851074, "learning_rate": 1.70219363046341e-05, "loss": 0.5385, "step": 13358 }, { "epoch": 2.180727317252357, "grad_norm": 2.709897518157959, "learning_rate": 1.7021479806512797e-05, "loss": 0.573, "step": 13359 }, { "epoch": 2.1808905758948613, "grad_norm": 3.059319019317627, "learning_rate": 1.702102327952884e-05, "loss": 0.6785, "step": 13360 }, { "epoch": 2.181053834537366, "grad_norm": 2.6646668910980225, "learning_rate": 1.702056672368411e-05, "loss": 0.5175, "step": 13361 }, { "epoch": 2.1812170931798702, "grad_norm": 3.2608838081359863, "learning_rate": 1.702011013898048e-05, "loss": 0.6265, "step": 13362 }, { "epoch": 2.1813803518223747, "grad_norm": 2.7429111003875732, "learning_rate": 1.7019653525419827e-05, "loss": 0.6294, "step": 13363 }, { "epoch": 2.181543610464879, "grad_norm": 2.80080246925354, "learning_rate": 1.701919688300403e-05, "loss": 0.5704, "step": 13364 }, { "epoch": 2.1817068691073835, "grad_norm": 2.621110439300537, "learning_rate": 1.7018740211734967e-05, "loss": 0.6156, "step": 13365 }, { "epoch": 2.181870127749888, "grad_norm": 2.8760342597961426, "learning_rate": 1.701828351161451e-05, "loss": 0.6539, "step": 13366 }, { "epoch": 2.182033386392392, "grad_norm": 3.2385594844818115, "learning_rate": 1.701782678264454e-05, "loss": 0.6383, "step": 13367 }, { "epoch": 2.1821966450348964, "grad_norm": 2.741986036300659, "learning_rate": 1.7017370024826934e-05, "loss": 0.5595, "step": 13368 }, { "epoch": 2.182359903677401, "grad_norm": 2.8232829570770264, "learning_rate": 1.701691323816357e-05, "loss": 0.5165, "step": 13369 }, { "epoch": 2.1825231623199053, "grad_norm": 2.674389362335205, "learning_rate": 1.7016456422656327e-05, "loss": 0.5546, "step": 13370 }, { "epoch": 2.1826864209624097, "grad_norm": 2.861215353012085, "learning_rate": 1.7015999578307078e-05, "loss": 0.536, "step": 13371 }, { "epoch": 2.182849679604914, "grad_norm": 2.5975615978240967, "learning_rate": 1.7015542705117708e-05, "loss": 0.5102, "step": 13372 }, { "epoch": 2.1830129382474186, "grad_norm": 3.267972946166992, "learning_rate": 1.7015085803090086e-05, "loss": 0.6114, "step": 13373 }, { "epoch": 2.183176196889923, "grad_norm": 3.559647798538208, "learning_rate": 1.7014628872226097e-05, "loss": 0.6682, "step": 13374 }, { "epoch": 2.183339455532427, "grad_norm": 3.7848002910614014, "learning_rate": 1.7014171912527616e-05, "loss": 0.7789, "step": 13375 }, { "epoch": 2.1835027141749315, "grad_norm": 3.153341054916382, "learning_rate": 1.7013714923996527e-05, "loss": 0.6226, "step": 13376 }, { "epoch": 2.183665972817436, "grad_norm": 2.855329751968384, "learning_rate": 1.7013257906634703e-05, "loss": 0.5864, "step": 13377 }, { "epoch": 2.1838292314599403, "grad_norm": 3.491489887237549, "learning_rate": 1.701280086044402e-05, "loss": 0.6578, "step": 13378 }, { "epoch": 2.183992490102445, "grad_norm": 3.2989697456359863, "learning_rate": 1.7012343785426365e-05, "loss": 0.5934, "step": 13379 }, { "epoch": 2.184155748744949, "grad_norm": 2.7704341411590576, "learning_rate": 1.7011886681583608e-05, "loss": 0.5874, "step": 13380 }, { "epoch": 2.1843190073874537, "grad_norm": 2.9727532863616943, "learning_rate": 1.701142954891764e-05, "loss": 0.5686, "step": 13381 }, { "epoch": 2.184482266029958, "grad_norm": 2.9406473636627197, "learning_rate": 1.7010972387430326e-05, "loss": 0.6119, "step": 13382 }, { "epoch": 2.1846455246724625, "grad_norm": 2.3300552368164062, "learning_rate": 1.701051519712355e-05, "loss": 0.4176, "step": 13383 }, { "epoch": 2.1848087833149665, "grad_norm": 2.730846643447876, "learning_rate": 1.70100579779992e-05, "loss": 0.5247, "step": 13384 }, { "epoch": 2.184972041957471, "grad_norm": 2.469841480255127, "learning_rate": 1.7009600730059145e-05, "loss": 0.5821, "step": 13385 }, { "epoch": 2.1851353005999754, "grad_norm": 3.000807046890259, "learning_rate": 1.700914345330527e-05, "loss": 0.5461, "step": 13386 }, { "epoch": 2.18529855924248, "grad_norm": 3.103527545928955, "learning_rate": 1.700868614773945e-05, "loss": 0.5652, "step": 13387 }, { "epoch": 2.1854618178849843, "grad_norm": 2.7404580116271973, "learning_rate": 1.700822881336357e-05, "loss": 0.573, "step": 13388 }, { "epoch": 2.1856250765274887, "grad_norm": 2.7294387817382812, "learning_rate": 1.7007771450179506e-05, "loss": 0.4661, "step": 13389 }, { "epoch": 2.185788335169993, "grad_norm": 2.4492459297180176, "learning_rate": 1.7007314058189138e-05, "loss": 0.4782, "step": 13390 }, { "epoch": 2.1859515938124976, "grad_norm": 3.2826449871063232, "learning_rate": 1.7006856637394354e-05, "loss": 0.5992, "step": 13391 }, { "epoch": 2.186114852455002, "grad_norm": 3.1161813735961914, "learning_rate": 1.7006399187797024e-05, "loss": 0.5959, "step": 13392 }, { "epoch": 2.186278111097506, "grad_norm": 2.703303098678589, "learning_rate": 1.7005941709399033e-05, "loss": 0.4551, "step": 13393 }, { "epoch": 2.1864413697400105, "grad_norm": 2.7775156497955322, "learning_rate": 1.700548420220226e-05, "loss": 0.584, "step": 13394 }, { "epoch": 2.186604628382515, "grad_norm": 3.184865951538086, "learning_rate": 1.700502666620859e-05, "loss": 0.6597, "step": 13395 }, { "epoch": 2.1867678870250193, "grad_norm": 2.6141440868377686, "learning_rate": 1.7004569101419895e-05, "loss": 0.5467, "step": 13396 }, { "epoch": 2.186931145667524, "grad_norm": 3.175748825073242, "learning_rate": 1.7004111507838067e-05, "loss": 0.5776, "step": 13397 }, { "epoch": 2.187094404310028, "grad_norm": 2.7686402797698975, "learning_rate": 1.7003653885464976e-05, "loss": 0.5256, "step": 13398 }, { "epoch": 2.1872576629525327, "grad_norm": 2.819413185119629, "learning_rate": 1.700319623430251e-05, "loss": 0.6194, "step": 13399 }, { "epoch": 2.187420921595037, "grad_norm": 3.0434887409210205, "learning_rate": 1.700273855435255e-05, "loss": 0.6027, "step": 13400 }, { "epoch": 2.1875841802375415, "grad_norm": 2.8847603797912598, "learning_rate": 1.700228084561698e-05, "loss": 1.0991, "step": 13401 }, { "epoch": 2.1877474388800455, "grad_norm": 2.716547727584839, "learning_rate": 1.700182310809767e-05, "loss": 0.5695, "step": 13402 }, { "epoch": 2.18791069752255, "grad_norm": 2.5566394329071045, "learning_rate": 1.7001365341796514e-05, "loss": 0.515, "step": 13403 }, { "epoch": 2.1880739561650544, "grad_norm": 3.1100807189941406, "learning_rate": 1.7000907546715387e-05, "loss": 0.5607, "step": 13404 }, { "epoch": 2.188237214807559, "grad_norm": 2.970355272293091, "learning_rate": 1.700044972285617e-05, "loss": 0.6856, "step": 13405 }, { "epoch": 2.1884004734500633, "grad_norm": 2.9225783348083496, "learning_rate": 1.699999187022075e-05, "loss": 0.6016, "step": 13406 }, { "epoch": 2.1885637320925677, "grad_norm": 3.130213737487793, "learning_rate": 1.6999533988811008e-05, "loss": 0.5182, "step": 13407 }, { "epoch": 2.188726990735072, "grad_norm": 3.2487475872039795, "learning_rate": 1.6999076078628824e-05, "loss": 0.6005, "step": 13408 }, { "epoch": 2.1888902493775766, "grad_norm": 2.640287399291992, "learning_rate": 1.6998618139676078e-05, "loss": 0.6023, "step": 13409 }, { "epoch": 2.1890535080200806, "grad_norm": 2.918207883834839, "learning_rate": 1.6998160171954658e-05, "loss": 0.6105, "step": 13410 }, { "epoch": 2.189216766662585, "grad_norm": 3.1134095191955566, "learning_rate": 1.6997702175466446e-05, "loss": 0.5775, "step": 13411 }, { "epoch": 2.1893800253050895, "grad_norm": 3.2143657207489014, "learning_rate": 1.699724415021332e-05, "loss": 0.6394, "step": 13412 }, { "epoch": 2.189543283947594, "grad_norm": 2.828260660171509, "learning_rate": 1.6996786096197167e-05, "loss": 0.5882, "step": 13413 }, { "epoch": 2.1897065425900983, "grad_norm": 3.2555899620056152, "learning_rate": 1.6996328013419863e-05, "loss": 0.6416, "step": 13414 }, { "epoch": 2.1898698012326028, "grad_norm": 3.1615970134735107, "learning_rate": 1.6995869901883304e-05, "loss": 0.6454, "step": 13415 }, { "epoch": 2.190033059875107, "grad_norm": 2.917726516723633, "learning_rate": 1.6995411761589363e-05, "loss": 0.5846, "step": 13416 }, { "epoch": 2.1901963185176117, "grad_norm": 2.9719176292419434, "learning_rate": 1.6994953592539923e-05, "loss": 0.5645, "step": 13417 }, { "epoch": 2.190359577160116, "grad_norm": 3.076420545578003, "learning_rate": 1.6994495394736873e-05, "loss": 0.7132, "step": 13418 }, { "epoch": 2.1905228358026205, "grad_norm": 3.14223313331604, "learning_rate": 1.6994037168182093e-05, "loss": 0.6907, "step": 13419 }, { "epoch": 2.1906860944451245, "grad_norm": 2.7253284454345703, "learning_rate": 1.6993578912877463e-05, "loss": 0.5792, "step": 13420 }, { "epoch": 2.190849353087629, "grad_norm": 2.4357845783233643, "learning_rate": 1.6993120628824876e-05, "loss": 0.5315, "step": 13421 }, { "epoch": 2.1910126117301334, "grad_norm": 2.6089746952056885, "learning_rate": 1.699266231602621e-05, "loss": 0.6174, "step": 13422 }, { "epoch": 2.191175870372638, "grad_norm": 2.725400924682617, "learning_rate": 1.6992203974483347e-05, "loss": 0.5257, "step": 13423 }, { "epoch": 2.1913391290151423, "grad_norm": 2.9822702407836914, "learning_rate": 1.6991745604198175e-05, "loss": 0.6844, "step": 13424 }, { "epoch": 2.1915023876576467, "grad_norm": 2.7204718589782715, "learning_rate": 1.6991287205172575e-05, "loss": 0.5097, "step": 13425 }, { "epoch": 2.191665646300151, "grad_norm": 3.1068480014801025, "learning_rate": 1.6990828777408438e-05, "loss": 0.623, "step": 13426 }, { "epoch": 2.1918289049426556, "grad_norm": 2.7404189109802246, "learning_rate": 1.6990370320907638e-05, "loss": 0.5277, "step": 13427 }, { "epoch": 2.1919921635851596, "grad_norm": 2.983152389526367, "learning_rate": 1.698991183567207e-05, "loss": 0.6428, "step": 13428 }, { "epoch": 2.192155422227664, "grad_norm": 2.446579694747925, "learning_rate": 1.6989453321703613e-05, "loss": 0.5759, "step": 13429 }, { "epoch": 2.1923186808701685, "grad_norm": 3.151862621307373, "learning_rate": 1.698899477900415e-05, "loss": 0.7445, "step": 13430 }, { "epoch": 2.192481939512673, "grad_norm": 2.9667482376098633, "learning_rate": 1.6988536207575568e-05, "loss": 0.5754, "step": 13431 }, { "epoch": 2.1926451981551773, "grad_norm": 2.746648073196411, "learning_rate": 1.6988077607419752e-05, "loss": 0.6014, "step": 13432 }, { "epoch": 2.1928084567976818, "grad_norm": 2.859806776046753, "learning_rate": 1.6987618978538592e-05, "loss": 0.6689, "step": 13433 }, { "epoch": 2.192971715440186, "grad_norm": 2.9726929664611816, "learning_rate": 1.6987160320933966e-05, "loss": 0.7126, "step": 13434 }, { "epoch": 2.1931349740826906, "grad_norm": 2.7685418128967285, "learning_rate": 1.6986701634607764e-05, "loss": 0.6128, "step": 13435 }, { "epoch": 2.193298232725195, "grad_norm": 2.614297389984131, "learning_rate": 1.6986242919561866e-05, "loss": 0.5245, "step": 13436 }, { "epoch": 2.193461491367699, "grad_norm": 2.9124410152435303, "learning_rate": 1.6985784175798167e-05, "loss": 0.6528, "step": 13437 }, { "epoch": 2.1936247500102035, "grad_norm": 2.3597211837768555, "learning_rate": 1.6985325403318543e-05, "loss": 0.5115, "step": 13438 }, { "epoch": 2.193788008652708, "grad_norm": 3.0405917167663574, "learning_rate": 1.6984866602124884e-05, "loss": 0.6345, "step": 13439 }, { "epoch": 2.1939512672952124, "grad_norm": 3.22255802154541, "learning_rate": 1.6984407772219076e-05, "loss": 0.5889, "step": 13440 }, { "epoch": 2.194114525937717, "grad_norm": 3.098299026489258, "learning_rate": 1.6983948913603008e-05, "loss": 0.5772, "step": 13441 }, { "epoch": 2.1942777845802213, "grad_norm": 2.7192723751068115, "learning_rate": 1.6983490026278557e-05, "loss": 0.5809, "step": 13442 }, { "epoch": 2.1944410432227257, "grad_norm": 3.032986640930176, "learning_rate": 1.698303111024762e-05, "loss": 0.5774, "step": 13443 }, { "epoch": 2.19460430186523, "grad_norm": 2.877873420715332, "learning_rate": 1.698257216551208e-05, "loss": 0.5469, "step": 13444 }, { "epoch": 2.1947675605077346, "grad_norm": 3.0878825187683105, "learning_rate": 1.6982113192073818e-05, "loss": 0.616, "step": 13445 }, { "epoch": 2.1949308191502386, "grad_norm": 2.9799046516418457, "learning_rate": 1.698165418993473e-05, "loss": 0.6671, "step": 13446 }, { "epoch": 2.195094077792743, "grad_norm": 3.446016550064087, "learning_rate": 1.6981195159096696e-05, "loss": 0.7099, "step": 13447 }, { "epoch": 2.1952573364352475, "grad_norm": 2.9180638790130615, "learning_rate": 1.69807360995616e-05, "loss": 0.6404, "step": 13448 }, { "epoch": 2.195420595077752, "grad_norm": 2.7654097080230713, "learning_rate": 1.698027701133134e-05, "loss": 0.5658, "step": 13449 }, { "epoch": 2.1955838537202563, "grad_norm": 2.7048332691192627, "learning_rate": 1.6979817894407794e-05, "loss": 0.5436, "step": 13450 }, { "epoch": 2.1957471123627608, "grad_norm": 3.225975513458252, "learning_rate": 1.6979358748792854e-05, "loss": 0.6575, "step": 13451 }, { "epoch": 2.195910371005265, "grad_norm": 3.372483968734741, "learning_rate": 1.6978899574488402e-05, "loss": 0.5388, "step": 13452 }, { "epoch": 2.1960736296477696, "grad_norm": 2.5049548149108887, "learning_rate": 1.697844037149633e-05, "loss": 0.5238, "step": 13453 }, { "epoch": 2.196236888290274, "grad_norm": 3.104367733001709, "learning_rate": 1.697798113981853e-05, "loss": 0.5863, "step": 13454 }, { "epoch": 2.196400146932778, "grad_norm": 2.622570514678955, "learning_rate": 1.697752187945688e-05, "loss": 0.5173, "step": 13455 }, { "epoch": 2.1965634055752825, "grad_norm": 2.721682071685791, "learning_rate": 1.697706259041327e-05, "loss": 0.4732, "step": 13456 }, { "epoch": 2.196726664217787, "grad_norm": 2.8207240104675293, "learning_rate": 1.6976603272689593e-05, "loss": 0.5428, "step": 13457 }, { "epoch": 2.1968899228602914, "grad_norm": 2.451415777206421, "learning_rate": 1.6976143926287736e-05, "loss": 0.5801, "step": 13458 }, { "epoch": 2.197053181502796, "grad_norm": 2.7629899978637695, "learning_rate": 1.6975684551209584e-05, "loss": 0.6531, "step": 13459 }, { "epoch": 2.1972164401453003, "grad_norm": 2.5257744789123535, "learning_rate": 1.6975225147457026e-05, "loss": 0.4519, "step": 13460 }, { "epoch": 2.1973796987878047, "grad_norm": 2.677699565887451, "learning_rate": 1.6974765715031953e-05, "loss": 0.5517, "step": 13461 }, { "epoch": 2.197542957430309, "grad_norm": 2.8141095638275146, "learning_rate": 1.6974306253936247e-05, "loss": 0.5453, "step": 13462 }, { "epoch": 2.197706216072813, "grad_norm": 2.770315170288086, "learning_rate": 1.6973846764171806e-05, "loss": 0.5696, "step": 13463 }, { "epoch": 2.1978694747153176, "grad_norm": 2.535856008529663, "learning_rate": 1.6973387245740515e-05, "loss": 0.5459, "step": 13464 }, { "epoch": 2.198032733357822, "grad_norm": 2.532456398010254, "learning_rate": 1.6972927698644256e-05, "loss": 0.6333, "step": 13465 }, { "epoch": 2.1981959920003264, "grad_norm": 3.1375651359558105, "learning_rate": 1.697246812288493e-05, "loss": 0.551, "step": 13466 }, { "epoch": 2.198359250642831, "grad_norm": 3.1102354526519775, "learning_rate": 1.697200851846442e-05, "loss": 0.5406, "step": 13467 }, { "epoch": 2.1985225092853353, "grad_norm": 2.762284994125366, "learning_rate": 1.6971548885384613e-05, "loss": 0.4558, "step": 13468 }, { "epoch": 2.1986857679278398, "grad_norm": 2.6233034133911133, "learning_rate": 1.6971089223647402e-05, "loss": 0.4908, "step": 13469 }, { "epoch": 2.198849026570344, "grad_norm": 2.399075984954834, "learning_rate": 1.6970629533254673e-05, "loss": 0.5324, "step": 13470 }, { "epoch": 2.1990122852128486, "grad_norm": 3.0737483501434326, "learning_rate": 1.6970169814208324e-05, "loss": 0.6599, "step": 13471 }, { "epoch": 2.199175543855353, "grad_norm": 3.199902057647705, "learning_rate": 1.6969710066510235e-05, "loss": 0.7169, "step": 13472 }, { "epoch": 2.199338802497857, "grad_norm": 2.5784242153167725, "learning_rate": 1.6969250290162304e-05, "loss": 0.5666, "step": 13473 }, { "epoch": 2.1995020611403615, "grad_norm": 3.855799913406372, "learning_rate": 1.696879048516641e-05, "loss": 0.7432, "step": 13474 }, { "epoch": 2.199665319782866, "grad_norm": 2.21146559715271, "learning_rate": 1.696833065152445e-05, "loss": 0.4912, "step": 13475 }, { "epoch": 2.1998285784253704, "grad_norm": 3.26236891746521, "learning_rate": 1.6967870789238316e-05, "loss": 0.6459, "step": 13476 }, { "epoch": 2.199991837067875, "grad_norm": 3.2292990684509277, "learning_rate": 1.6967410898309896e-05, "loss": 0.6094, "step": 13477 }, { "epoch": 2.2001550957103793, "grad_norm": 3.0214483737945557, "learning_rate": 1.6966950978741084e-05, "loss": 0.591, "step": 13478 }, { "epoch": 2.2003183543528837, "grad_norm": 2.3340628147125244, "learning_rate": 1.6966491030533765e-05, "loss": 0.4599, "step": 13479 }, { "epoch": 2.200481612995388, "grad_norm": 3.0896170139312744, "learning_rate": 1.696603105368983e-05, "loss": 0.628, "step": 13480 }, { "epoch": 2.200644871637892, "grad_norm": 2.9554078578948975, "learning_rate": 1.696557104821117e-05, "loss": 0.549, "step": 13481 }, { "epoch": 2.2008081302803966, "grad_norm": 2.8332343101501465, "learning_rate": 1.696511101409968e-05, "loss": 0.489, "step": 13482 }, { "epoch": 2.200971388922901, "grad_norm": 2.544363260269165, "learning_rate": 1.6964650951357252e-05, "loss": 0.4778, "step": 13483 }, { "epoch": 2.2011346475654054, "grad_norm": 2.184246778488159, "learning_rate": 1.696419085998577e-05, "loss": 0.3833, "step": 13484 }, { "epoch": 2.20129790620791, "grad_norm": 2.3440544605255127, "learning_rate": 1.696373073998713e-05, "loss": 0.4432, "step": 13485 }, { "epoch": 2.2014611648504143, "grad_norm": 2.8018901348114014, "learning_rate": 1.696327059136322e-05, "loss": 0.5301, "step": 13486 }, { "epoch": 2.2016244234929188, "grad_norm": 2.7442357540130615, "learning_rate": 1.6962810414115936e-05, "loss": 0.577, "step": 13487 }, { "epoch": 2.201787682135423, "grad_norm": 2.7972474098205566, "learning_rate": 1.696235020824717e-05, "loss": 0.5958, "step": 13488 }, { "epoch": 2.2019509407779276, "grad_norm": 3.138298988342285, "learning_rate": 1.6961889973758808e-05, "loss": 0.6197, "step": 13489 }, { "epoch": 2.2021141994204316, "grad_norm": 2.9870474338531494, "learning_rate": 1.6961429710652744e-05, "loss": 0.58, "step": 13490 }, { "epoch": 2.202277458062936, "grad_norm": 3.1022379398345947, "learning_rate": 1.6960969418930875e-05, "loss": 0.615, "step": 13491 }, { "epoch": 2.2024407167054405, "grad_norm": 3.0076076984405518, "learning_rate": 1.6960509098595085e-05, "loss": 0.6668, "step": 13492 }, { "epoch": 2.202603975347945, "grad_norm": 3.027587652206421, "learning_rate": 1.6960048749647277e-05, "loss": 0.5401, "step": 13493 }, { "epoch": 2.2027672339904494, "grad_norm": 3.059583902359009, "learning_rate": 1.6959588372089332e-05, "loss": 0.5819, "step": 13494 }, { "epoch": 2.202930492632954, "grad_norm": 3.304225206375122, "learning_rate": 1.6959127965923144e-05, "loss": 0.6864, "step": 13495 }, { "epoch": 2.2030937512754583, "grad_norm": 2.864018201828003, "learning_rate": 1.695866753115061e-05, "loss": 0.5043, "step": 13496 }, { "epoch": 2.2032570099179627, "grad_norm": 2.6666672229766846, "learning_rate": 1.6958207067773626e-05, "loss": 0.6239, "step": 13497 }, { "epoch": 2.2034202685604667, "grad_norm": 2.9029479026794434, "learning_rate": 1.6957746575794076e-05, "loss": 0.6246, "step": 13498 }, { "epoch": 2.203583527202971, "grad_norm": 3.2154805660247803, "learning_rate": 1.695728605521386e-05, "loss": 0.6752, "step": 13499 }, { "epoch": 2.2037467858454756, "grad_norm": 2.622145891189575, "learning_rate": 1.6956825506034866e-05, "loss": 0.5654, "step": 13500 }, { "epoch": 2.20391004448798, "grad_norm": 2.6718645095825195, "learning_rate": 1.695636492825899e-05, "loss": 0.5516, "step": 13501 }, { "epoch": 2.2040733031304844, "grad_norm": 2.5007004737854004, "learning_rate": 1.6955904321888125e-05, "loss": 0.4799, "step": 13502 }, { "epoch": 2.204236561772989, "grad_norm": 3.080686569213867, "learning_rate": 1.6955443686924162e-05, "loss": 0.6177, "step": 13503 }, { "epoch": 2.2043998204154933, "grad_norm": 3.0394301414489746, "learning_rate": 1.6954983023368995e-05, "loss": 0.6186, "step": 13504 }, { "epoch": 2.2045630790579978, "grad_norm": 2.5415737628936768, "learning_rate": 1.695452233122452e-05, "loss": 0.5951, "step": 13505 }, { "epoch": 2.204726337700502, "grad_norm": 2.7678472995758057, "learning_rate": 1.695406161049263e-05, "loss": 0.5973, "step": 13506 }, { "epoch": 2.2048895963430066, "grad_norm": 2.969162702560425, "learning_rate": 1.6953600861175223e-05, "loss": 0.6016, "step": 13507 }, { "epoch": 2.2050528549855106, "grad_norm": 2.9278695583343506, "learning_rate": 1.6953140083274186e-05, "loss": 0.577, "step": 13508 }, { "epoch": 2.205216113628015, "grad_norm": 2.1928606033325195, "learning_rate": 1.6952679276791414e-05, "loss": 0.4344, "step": 13509 }, { "epoch": 2.2053793722705195, "grad_norm": 2.825338840484619, "learning_rate": 1.6952218441728803e-05, "loss": 0.621, "step": 13510 }, { "epoch": 2.205542630913024, "grad_norm": 2.4710702896118164, "learning_rate": 1.6951757578088245e-05, "loss": 0.5319, "step": 13511 }, { "epoch": 2.2057058895555284, "grad_norm": 3.2602803707122803, "learning_rate": 1.695129668587164e-05, "loss": 0.5926, "step": 13512 }, { "epoch": 2.205869148198033, "grad_norm": 2.840270519256592, "learning_rate": 1.6950835765080876e-05, "loss": 0.6726, "step": 13513 }, { "epoch": 2.2060324068405373, "grad_norm": 2.695962429046631, "learning_rate": 1.6950374815717853e-05, "loss": 0.4906, "step": 13514 }, { "epoch": 2.2061956654830417, "grad_norm": 2.496447801589966, "learning_rate": 1.6949913837784463e-05, "loss": 0.4776, "step": 13515 }, { "epoch": 2.2063589241255457, "grad_norm": 3.0278968811035156, "learning_rate": 1.6949452831282603e-05, "loss": 0.5826, "step": 13516 }, { "epoch": 2.20652218276805, "grad_norm": 2.6743004322052, "learning_rate": 1.6948991796214164e-05, "loss": 0.6209, "step": 13517 }, { "epoch": 2.2066854414105546, "grad_norm": 2.901540517807007, "learning_rate": 1.6948530732581047e-05, "loss": 0.5933, "step": 13518 }, { "epoch": 2.206848700053059, "grad_norm": 3.398642063140869, "learning_rate": 1.6948069640385138e-05, "loss": 0.6094, "step": 13519 }, { "epoch": 2.2070119586955634, "grad_norm": 3.1100265979766846, "learning_rate": 1.6947608519628345e-05, "loss": 0.5335, "step": 13520 }, { "epoch": 2.207175217338068, "grad_norm": 2.715651750564575, "learning_rate": 1.694714737031255e-05, "loss": 0.5329, "step": 13521 }, { "epoch": 2.2073384759805723, "grad_norm": 2.7650632858276367, "learning_rate": 1.694668619243966e-05, "loss": 0.532, "step": 13522 }, { "epoch": 2.2075017346230768, "grad_norm": 3.0702106952667236, "learning_rate": 1.6946224986011568e-05, "loss": 0.5731, "step": 13523 }, { "epoch": 2.207664993265581, "grad_norm": 2.8476498126983643, "learning_rate": 1.6945763751030164e-05, "loss": 0.5544, "step": 13524 }, { "epoch": 2.207828251908085, "grad_norm": 3.1432759761810303, "learning_rate": 1.694530248749735e-05, "loss": 0.6904, "step": 13525 }, { "epoch": 2.2079915105505896, "grad_norm": 2.8326194286346436, "learning_rate": 1.6944841195415017e-05, "loss": 0.5438, "step": 13526 }, { "epoch": 2.208154769193094, "grad_norm": 2.76023006439209, "learning_rate": 1.694437987478507e-05, "loss": 0.5883, "step": 13527 }, { "epoch": 2.2083180278355985, "grad_norm": 2.735363721847534, "learning_rate": 1.6943918525609393e-05, "loss": 0.563, "step": 13528 }, { "epoch": 2.208481286478103, "grad_norm": 3.4110827445983887, "learning_rate": 1.6943457147889893e-05, "loss": 0.646, "step": 13529 }, { "epoch": 2.2086445451206074, "grad_norm": 3.0116167068481445, "learning_rate": 1.694299574162846e-05, "loss": 0.5358, "step": 13530 }, { "epoch": 2.208807803763112, "grad_norm": 3.135617256164551, "learning_rate": 1.694253430682699e-05, "loss": 0.5884, "step": 13531 }, { "epoch": 2.2089710624056162, "grad_norm": 3.1106042861938477, "learning_rate": 1.6942072843487388e-05, "loss": 0.6375, "step": 13532 }, { "epoch": 2.2091343210481207, "grad_norm": 3.146707773208618, "learning_rate": 1.6941611351611546e-05, "loss": 0.6567, "step": 13533 }, { "epoch": 2.2092975796906247, "grad_norm": 2.5123250484466553, "learning_rate": 1.694114983120136e-05, "loss": 0.5978, "step": 13534 }, { "epoch": 2.209460838333129, "grad_norm": 2.3352530002593994, "learning_rate": 1.694068828225873e-05, "loss": 0.529, "step": 13535 }, { "epoch": 2.2096240969756336, "grad_norm": 2.942063093185425, "learning_rate": 1.6940226704785546e-05, "loss": 0.6434, "step": 13536 }, { "epoch": 2.209787355618138, "grad_norm": 2.781494140625, "learning_rate": 1.6939765098783714e-05, "loss": 0.5786, "step": 13537 }, { "epoch": 2.2099506142606424, "grad_norm": 2.4520556926727295, "learning_rate": 1.6939303464255127e-05, "loss": 0.4537, "step": 13538 }, { "epoch": 2.210113872903147, "grad_norm": 2.888519287109375, "learning_rate": 1.6938841801201685e-05, "loss": 0.5451, "step": 13539 }, { "epoch": 2.2102771315456513, "grad_norm": 2.595036506652832, "learning_rate": 1.6938380109625283e-05, "loss": 0.5497, "step": 13540 }, { "epoch": 2.2104403901881557, "grad_norm": 2.6338584423065186, "learning_rate": 1.693791838952782e-05, "loss": 0.5817, "step": 13541 }, { "epoch": 2.21060364883066, "grad_norm": 2.918064594268799, "learning_rate": 1.6937456640911198e-05, "loss": 0.5745, "step": 13542 }, { "epoch": 2.210766907473164, "grad_norm": 2.5600039958953857, "learning_rate": 1.693699486377731e-05, "loss": 0.4842, "step": 13543 }, { "epoch": 2.2109301661156686, "grad_norm": 3.17305064201355, "learning_rate": 1.693653305812805e-05, "loss": 0.6275, "step": 13544 }, { "epoch": 2.211093424758173, "grad_norm": 2.7879912853240967, "learning_rate": 1.6936071223965326e-05, "loss": 0.5573, "step": 13545 }, { "epoch": 2.2112566834006775, "grad_norm": 3.037546157836914, "learning_rate": 1.6935609361291033e-05, "loss": 0.5633, "step": 13546 }, { "epoch": 2.211419942043182, "grad_norm": 2.817941188812256, "learning_rate": 1.693514747010707e-05, "loss": 0.5959, "step": 13547 }, { "epoch": 2.2115832006856864, "grad_norm": 2.680229425430298, "learning_rate": 1.693468555041533e-05, "loss": 0.5703, "step": 13548 }, { "epoch": 2.211746459328191, "grad_norm": 3.0210928916931152, "learning_rate": 1.6934223602217716e-05, "loss": 0.6494, "step": 13549 }, { "epoch": 2.2119097179706952, "grad_norm": 2.958019256591797, "learning_rate": 1.693376162551613e-05, "loss": 0.6232, "step": 13550 }, { "epoch": 2.2120729766131992, "grad_norm": 2.6793994903564453, "learning_rate": 1.693329962031247e-05, "loss": 0.4944, "step": 13551 }, { "epoch": 2.2122362352557037, "grad_norm": 3.068077802658081, "learning_rate": 1.693283758660863e-05, "loss": 0.6334, "step": 13552 }, { "epoch": 2.212399493898208, "grad_norm": 2.5919179916381836, "learning_rate": 1.6932375524406513e-05, "loss": 0.5357, "step": 13553 }, { "epoch": 2.2125627525407126, "grad_norm": 2.7171854972839355, "learning_rate": 1.6931913433708017e-05, "loss": 0.5858, "step": 13554 }, { "epoch": 2.212726011183217, "grad_norm": 2.4474985599517822, "learning_rate": 1.6931451314515044e-05, "loss": 0.5563, "step": 13555 }, { "epoch": 2.2128892698257214, "grad_norm": 2.855696201324463, "learning_rate": 1.6930989166829493e-05, "loss": 0.609, "step": 13556 }, { "epoch": 2.213052528468226, "grad_norm": 2.8433332443237305, "learning_rate": 1.693052699065326e-05, "loss": 0.6459, "step": 13557 }, { "epoch": 2.2132157871107303, "grad_norm": 3.4006168842315674, "learning_rate": 1.6930064785988246e-05, "loss": 0.6598, "step": 13558 }, { "epoch": 2.2133790457532347, "grad_norm": 2.0481035709381104, "learning_rate": 1.6929602552836355e-05, "loss": 0.4339, "step": 13559 }, { "epoch": 2.213542304395739, "grad_norm": 3.102578639984131, "learning_rate": 1.6929140291199485e-05, "loss": 0.6807, "step": 13560 }, { "epoch": 2.213705563038243, "grad_norm": 2.978646993637085, "learning_rate": 1.6928678001079536e-05, "loss": 0.5425, "step": 13561 }, { "epoch": 2.2138688216807476, "grad_norm": 2.8116676807403564, "learning_rate": 1.692821568247841e-05, "loss": 0.526, "step": 13562 }, { "epoch": 2.214032080323252, "grad_norm": 2.9770965576171875, "learning_rate": 1.6927753335398e-05, "loss": 0.6691, "step": 13563 }, { "epoch": 2.2141953389657565, "grad_norm": 2.635087490081787, "learning_rate": 1.692729095984021e-05, "loss": 0.5415, "step": 13564 }, { "epoch": 2.214358597608261, "grad_norm": 3.010706663131714, "learning_rate": 1.692682855580695e-05, "loss": 0.595, "step": 13565 }, { "epoch": 2.2145218562507654, "grad_norm": 2.6262259483337402, "learning_rate": 1.692636612330011e-05, "loss": 0.5598, "step": 13566 }, { "epoch": 2.21468511489327, "grad_norm": 3.37888765335083, "learning_rate": 1.6925903662321595e-05, "loss": 0.7396, "step": 13567 }, { "epoch": 2.2148483735357742, "grad_norm": 2.784250259399414, "learning_rate": 1.6925441172873303e-05, "loss": 0.5172, "step": 13568 }, { "epoch": 2.2150116321782782, "grad_norm": 3.212998628616333, "learning_rate": 1.692497865495714e-05, "loss": 0.5854, "step": 13569 }, { "epoch": 2.2151748908207827, "grad_norm": 2.646671772003174, "learning_rate": 1.6924516108575e-05, "loss": 0.5403, "step": 13570 }, { "epoch": 2.215338149463287, "grad_norm": 3.140223741531372, "learning_rate": 1.6924053533728793e-05, "loss": 0.7016, "step": 13571 }, { "epoch": 2.2155014081057915, "grad_norm": 2.7223105430603027, "learning_rate": 1.6923590930420418e-05, "loss": 0.5305, "step": 13572 }, { "epoch": 2.215664666748296, "grad_norm": 2.429994821548462, "learning_rate": 1.6923128298651773e-05, "loss": 0.543, "step": 13573 }, { "epoch": 2.2158279253908004, "grad_norm": 2.4743616580963135, "learning_rate": 1.692266563842476e-05, "loss": 0.4937, "step": 13574 }, { "epoch": 2.215991184033305, "grad_norm": 3.3889567852020264, "learning_rate": 1.6922202949741283e-05, "loss": 0.5906, "step": 13575 }, { "epoch": 2.2161544426758093, "grad_norm": 2.7762343883514404, "learning_rate": 1.6921740232603247e-05, "loss": 0.602, "step": 13576 }, { "epoch": 2.2163177013183137, "grad_norm": 2.6664235591888428, "learning_rate": 1.6921277487012547e-05, "loss": 0.5787, "step": 13577 }, { "epoch": 2.2164809599608177, "grad_norm": 2.9921987056732178, "learning_rate": 1.692081471297109e-05, "loss": 0.6002, "step": 13578 }, { "epoch": 2.216644218603322, "grad_norm": 2.8661656379699707, "learning_rate": 1.6920351910480778e-05, "loss": 0.6601, "step": 13579 }, { "epoch": 2.2168074772458266, "grad_norm": 2.4819345474243164, "learning_rate": 1.691988907954351e-05, "loss": 0.4817, "step": 13580 }, { "epoch": 2.216970735888331, "grad_norm": 2.726972818374634, "learning_rate": 1.6919426220161194e-05, "loss": 0.5218, "step": 13581 }, { "epoch": 2.2171339945308355, "grad_norm": 2.5625197887420654, "learning_rate": 1.6918963332335725e-05, "loss": 0.5515, "step": 13582 }, { "epoch": 2.21729725317334, "grad_norm": 2.941539764404297, "learning_rate": 1.6918500416069012e-05, "loss": 0.6476, "step": 13583 }, { "epoch": 2.2174605118158444, "grad_norm": 2.9263339042663574, "learning_rate": 1.6918037471362957e-05, "loss": 0.6457, "step": 13584 }, { "epoch": 2.217623770458349, "grad_norm": 2.650641441345215, "learning_rate": 1.691757449821946e-05, "loss": 0.5543, "step": 13585 }, { "epoch": 2.217787029100853, "grad_norm": 2.5305662155151367, "learning_rate": 1.6917111496640433e-05, "loss": 0.5066, "step": 13586 }, { "epoch": 2.2179502877433572, "grad_norm": 2.6395392417907715, "learning_rate": 1.6916648466627765e-05, "loss": 0.5295, "step": 13587 }, { "epoch": 2.2181135463858617, "grad_norm": 2.8602664470672607, "learning_rate": 1.691618540818337e-05, "loss": 0.5617, "step": 13588 }, { "epoch": 2.218276805028366, "grad_norm": 2.8731579780578613, "learning_rate": 1.6915722321309146e-05, "loss": 0.5944, "step": 13589 }, { "epoch": 2.2184400636708705, "grad_norm": 2.3984506130218506, "learning_rate": 1.6915259206007003e-05, "loss": 0.4884, "step": 13590 }, { "epoch": 2.218603322313375, "grad_norm": 2.4657962322235107, "learning_rate": 1.6914796062278838e-05, "loss": 0.5191, "step": 13591 }, { "epoch": 2.2187665809558794, "grad_norm": 3.1423916816711426, "learning_rate": 1.6914332890126557e-05, "loss": 0.5347, "step": 13592 }, { "epoch": 2.218929839598384, "grad_norm": 2.8912689685821533, "learning_rate": 1.6913869689552066e-05, "loss": 0.5734, "step": 13593 }, { "epoch": 2.2190930982408883, "grad_norm": 2.563101053237915, "learning_rate": 1.6913406460557268e-05, "loss": 0.5582, "step": 13594 }, { "epoch": 2.2192563568833927, "grad_norm": 2.7150938510894775, "learning_rate": 1.6912943203144064e-05, "loss": 0.5337, "step": 13595 }, { "epoch": 2.2194196155258967, "grad_norm": 2.778975009918213, "learning_rate": 1.6912479917314358e-05, "loss": 0.5595, "step": 13596 }, { "epoch": 2.219582874168401, "grad_norm": 3.031188726425171, "learning_rate": 1.6912016603070064e-05, "loss": 0.6182, "step": 13597 }, { "epoch": 2.2197461328109056, "grad_norm": 3.068042278289795, "learning_rate": 1.6911553260413077e-05, "loss": 0.5727, "step": 13598 }, { "epoch": 2.21990939145341, "grad_norm": 3.535973072052002, "learning_rate": 1.6911089889345304e-05, "loss": 0.6931, "step": 13599 }, { "epoch": 2.2200726500959145, "grad_norm": 2.297123670578003, "learning_rate": 1.691062648986865e-05, "loss": 0.4946, "step": 13600 }, { "epoch": 2.220235908738419, "grad_norm": 2.4912638664245605, "learning_rate": 1.6910163061985016e-05, "loss": 0.5309, "step": 13601 }, { "epoch": 2.2203991673809234, "grad_norm": 2.3885250091552734, "learning_rate": 1.6909699605696316e-05, "loss": 0.503, "step": 13602 }, { "epoch": 2.220562426023428, "grad_norm": 2.752647876739502, "learning_rate": 1.690923612100445e-05, "loss": 0.5282, "step": 13603 }, { "epoch": 2.220725684665932, "grad_norm": 2.9647233486175537, "learning_rate": 1.690877260791132e-05, "loss": 0.5894, "step": 13604 }, { "epoch": 2.2208889433084362, "grad_norm": 2.8866968154907227, "learning_rate": 1.690830906641884e-05, "loss": 0.5109, "step": 13605 }, { "epoch": 2.2210522019509407, "grad_norm": 3.253161907196045, "learning_rate": 1.6907845496528902e-05, "loss": 0.699, "step": 13606 }, { "epoch": 2.221215460593445, "grad_norm": 2.5527942180633545, "learning_rate": 1.6907381898243426e-05, "loss": 0.5274, "step": 13607 }, { "epoch": 2.2213787192359495, "grad_norm": 2.594130039215088, "learning_rate": 1.6906918271564308e-05, "loss": 0.4573, "step": 13608 }, { "epoch": 2.221541977878454, "grad_norm": 3.001920461654663, "learning_rate": 1.6906454616493458e-05, "loss": 0.563, "step": 13609 }, { "epoch": 2.2217052365209584, "grad_norm": 2.9299914836883545, "learning_rate": 1.690599093303278e-05, "loss": 0.5521, "step": 13610 }, { "epoch": 2.221868495163463, "grad_norm": 3.7846262454986572, "learning_rate": 1.6905527221184185e-05, "loss": 0.641, "step": 13611 }, { "epoch": 2.2220317538059673, "grad_norm": 2.29888916015625, "learning_rate": 1.690506348094957e-05, "loss": 0.4814, "step": 13612 }, { "epoch": 2.2221950124484713, "grad_norm": 3.0837509632110596, "learning_rate": 1.690459971233085e-05, "loss": 0.5521, "step": 13613 }, { "epoch": 2.2223582710909757, "grad_norm": 3.129523992538452, "learning_rate": 1.6904135915329924e-05, "loss": 0.6249, "step": 13614 }, { "epoch": 2.22252152973348, "grad_norm": 3.0899932384490967, "learning_rate": 1.6903672089948708e-05, "loss": 0.5679, "step": 13615 }, { "epoch": 2.2226847883759846, "grad_norm": 3.5812714099884033, "learning_rate": 1.6903208236189096e-05, "loss": 0.5247, "step": 13616 }, { "epoch": 2.222848047018489, "grad_norm": 2.60379695892334, "learning_rate": 1.6902744354053006e-05, "loss": 0.5252, "step": 13617 }, { "epoch": 2.2230113056609935, "grad_norm": 3.3758010864257812, "learning_rate": 1.6902280443542336e-05, "loss": 0.7369, "step": 13618 }, { "epoch": 2.223174564303498, "grad_norm": 2.6921730041503906, "learning_rate": 1.6901816504659e-05, "loss": 0.535, "step": 13619 }, { "epoch": 2.2233378229460024, "grad_norm": 3.1962099075317383, "learning_rate": 1.69013525374049e-05, "loss": 0.4923, "step": 13620 }, { "epoch": 2.223501081588507, "grad_norm": 2.8372960090637207, "learning_rate": 1.690088854178195e-05, "loss": 0.6327, "step": 13621 }, { "epoch": 2.223664340231011, "grad_norm": 3.4716572761535645, "learning_rate": 1.6900424517792054e-05, "loss": 0.6313, "step": 13622 }, { "epoch": 2.223827598873515, "grad_norm": 2.889599323272705, "learning_rate": 1.6899960465437115e-05, "loss": 0.5637, "step": 13623 }, { "epoch": 2.2239908575160197, "grad_norm": 2.800119400024414, "learning_rate": 1.6899496384719043e-05, "loss": 0.6868, "step": 13624 }, { "epoch": 2.224154116158524, "grad_norm": 3.0839195251464844, "learning_rate": 1.689903227563975e-05, "loss": 0.6154, "step": 13625 }, { "epoch": 2.2243173748010285, "grad_norm": 2.6975674629211426, "learning_rate": 1.689856813820114e-05, "loss": 0.5996, "step": 13626 }, { "epoch": 2.224480633443533, "grad_norm": 2.7825369834899902, "learning_rate": 1.6898103972405118e-05, "loss": 0.5576, "step": 13627 }, { "epoch": 2.2246438920860374, "grad_norm": 2.574880599975586, "learning_rate": 1.6897639778253596e-05, "loss": 0.4988, "step": 13628 }, { "epoch": 2.224807150728542, "grad_norm": 2.8980977535247803, "learning_rate": 1.6897175555748484e-05, "loss": 0.5678, "step": 13629 }, { "epoch": 2.2249704093710463, "grad_norm": 3.146543264389038, "learning_rate": 1.6896711304891685e-05, "loss": 1.0809, "step": 13630 }, { "epoch": 2.2251336680135503, "grad_norm": 3.0163748264312744, "learning_rate": 1.6896247025685113e-05, "loss": 0.5784, "step": 13631 }, { "epoch": 2.2252969266560547, "grad_norm": 2.9167866706848145, "learning_rate": 1.689578271813067e-05, "loss": 0.6697, "step": 13632 }, { "epoch": 2.225460185298559, "grad_norm": 2.8135364055633545, "learning_rate": 1.6895318382230268e-05, "loss": 0.5581, "step": 13633 }, { "epoch": 2.2256234439410636, "grad_norm": 2.3910043239593506, "learning_rate": 1.6894854017985817e-05, "loss": 0.4157, "step": 13634 }, { "epoch": 2.225786702583568, "grad_norm": 1.9231313467025757, "learning_rate": 1.6894389625399226e-05, "loss": 0.3228, "step": 13635 }, { "epoch": 2.2259499612260725, "grad_norm": 3.6544158458709717, "learning_rate": 1.68939252044724e-05, "loss": 0.8125, "step": 13636 }, { "epoch": 2.226113219868577, "grad_norm": 2.884580612182617, "learning_rate": 1.6893460755207255e-05, "loss": 0.6411, "step": 13637 }, { "epoch": 2.2262764785110813, "grad_norm": 2.973677635192871, "learning_rate": 1.6892996277605692e-05, "loss": 0.6487, "step": 13638 }, { "epoch": 2.2264397371535853, "grad_norm": 2.9233052730560303, "learning_rate": 1.6892531771669623e-05, "loss": 0.5724, "step": 13639 }, { "epoch": 2.22660299579609, "grad_norm": 3.5844345092773438, "learning_rate": 1.689206723740096e-05, "loss": 0.7523, "step": 13640 }, { "epoch": 2.226766254438594, "grad_norm": 2.766153335571289, "learning_rate": 1.689160267480161e-05, "loss": 0.5448, "step": 13641 }, { "epoch": 2.2269295130810987, "grad_norm": 2.9175891876220703, "learning_rate": 1.6891138083873486e-05, "loss": 0.6132, "step": 13642 }, { "epoch": 2.227092771723603, "grad_norm": 3.1323702335357666, "learning_rate": 1.6890673464618494e-05, "loss": 0.7026, "step": 13643 }, { "epoch": 2.2272560303661075, "grad_norm": 2.6569128036499023, "learning_rate": 1.6890208817038547e-05, "loss": 0.4642, "step": 13644 }, { "epoch": 2.227419289008612, "grad_norm": 2.5942368507385254, "learning_rate": 1.6889744141135553e-05, "loss": 0.5325, "step": 13645 }, { "epoch": 2.2275825476511164, "grad_norm": 3.104419231414795, "learning_rate": 1.688927943691142e-05, "loss": 0.978, "step": 13646 }, { "epoch": 2.227745806293621, "grad_norm": 3.4169483184814453, "learning_rate": 1.688881470436806e-05, "loss": 0.7835, "step": 13647 }, { "epoch": 2.2279090649361253, "grad_norm": 3.204975128173828, "learning_rate": 1.6888349943507387e-05, "loss": 0.5294, "step": 13648 }, { "epoch": 2.2280723235786293, "grad_norm": 3.008366107940674, "learning_rate": 1.6887885154331307e-05, "loss": 0.5523, "step": 13649 }, { "epoch": 2.2282355822211337, "grad_norm": 3.061232566833496, "learning_rate": 1.6887420336841732e-05, "loss": 0.6146, "step": 13650 }, { "epoch": 2.228398840863638, "grad_norm": 2.732998847961426, "learning_rate": 1.688695549104057e-05, "loss": 0.5646, "step": 13651 }, { "epoch": 2.2285620995061426, "grad_norm": 2.454972505569458, "learning_rate": 1.688649061692974e-05, "loss": 0.5517, "step": 13652 }, { "epoch": 2.228725358148647, "grad_norm": 2.8742756843566895, "learning_rate": 1.6886025714511142e-05, "loss": 0.6471, "step": 13653 }, { "epoch": 2.2288886167911515, "grad_norm": 2.9041130542755127, "learning_rate": 1.6885560783786695e-05, "loss": 0.6511, "step": 13654 }, { "epoch": 2.229051875433656, "grad_norm": 2.507870674133301, "learning_rate": 1.6885095824758307e-05, "loss": 0.5961, "step": 13655 }, { "epoch": 2.2292151340761603, "grad_norm": 3.229736328125, "learning_rate": 1.688463083742789e-05, "loss": 0.6401, "step": 13656 }, { "epoch": 2.2293783927186643, "grad_norm": 3.5594704151153564, "learning_rate": 1.6884165821797357e-05, "loss": 0.7261, "step": 13657 }, { "epoch": 2.2295416513611688, "grad_norm": 2.6563405990600586, "learning_rate": 1.6883700777868613e-05, "loss": 0.5636, "step": 13658 }, { "epoch": 2.229704910003673, "grad_norm": 3.1598806381225586, "learning_rate": 1.688323570564358e-05, "loss": 0.7238, "step": 13659 }, { "epoch": 2.2298681686461777, "grad_norm": 2.507533550262451, "learning_rate": 1.688277060512416e-05, "loss": 0.5361, "step": 13660 }, { "epoch": 2.230031427288682, "grad_norm": 4.979901313781738, "learning_rate": 1.688230547631227e-05, "loss": 0.5575, "step": 13661 }, { "epoch": 2.2301946859311865, "grad_norm": 2.7266645431518555, "learning_rate": 1.6881840319209822e-05, "loss": 0.609, "step": 13662 }, { "epoch": 2.230357944573691, "grad_norm": 2.891777753829956, "learning_rate": 1.6881375133818726e-05, "loss": 0.5664, "step": 13663 }, { "epoch": 2.2305212032161954, "grad_norm": 2.925347328186035, "learning_rate": 1.6880909920140897e-05, "loss": 0.9027, "step": 13664 }, { "epoch": 2.2306844618587, "grad_norm": 2.8615238666534424, "learning_rate": 1.6880444678178243e-05, "loss": 0.5889, "step": 13665 }, { "epoch": 2.230847720501204, "grad_norm": 3.2391529083251953, "learning_rate": 1.687997940793268e-05, "loss": 0.5987, "step": 13666 }, { "epoch": 2.2310109791437083, "grad_norm": 3.473963975906372, "learning_rate": 1.6879514109406118e-05, "loss": 0.6744, "step": 13667 }, { "epoch": 2.2311742377862127, "grad_norm": 3.2153897285461426, "learning_rate": 1.6879048782600472e-05, "loss": 0.6814, "step": 13668 }, { "epoch": 2.231337496428717, "grad_norm": 2.538789987564087, "learning_rate": 1.6878583427517655e-05, "loss": 0.5294, "step": 13669 }, { "epoch": 2.2315007550712216, "grad_norm": 2.264399766921997, "learning_rate": 1.6878118044159578e-05, "loss": 0.422, "step": 13670 }, { "epoch": 2.231664013713726, "grad_norm": 2.6333141326904297, "learning_rate": 1.687765263252816e-05, "loss": 0.5179, "step": 13671 }, { "epoch": 2.2318272723562305, "grad_norm": 3.2232253551483154, "learning_rate": 1.68771871926253e-05, "loss": 0.6244, "step": 13672 }, { "epoch": 2.231990530998735, "grad_norm": 2.552273750305176, "learning_rate": 1.6876721724452926e-05, "loss": 0.5608, "step": 13673 }, { "epoch": 2.2321537896412393, "grad_norm": 2.5679056644439697, "learning_rate": 1.6876256228012944e-05, "loss": 0.5077, "step": 13674 }, { "epoch": 2.2323170482837433, "grad_norm": 2.5943808555603027, "learning_rate": 1.687579070330727e-05, "loss": 0.5295, "step": 13675 }, { "epoch": 2.2324803069262478, "grad_norm": 2.3917272090911865, "learning_rate": 1.6875325150337817e-05, "loss": 0.4808, "step": 13676 }, { "epoch": 2.232643565568752, "grad_norm": 2.710362434387207, "learning_rate": 1.6874859569106497e-05, "loss": 0.5503, "step": 13677 }, { "epoch": 2.2328068242112566, "grad_norm": 2.8097939491271973, "learning_rate": 1.6874393959615223e-05, "loss": 0.5379, "step": 13678 }, { "epoch": 2.232970082853761, "grad_norm": 2.9180572032928467, "learning_rate": 1.6873928321865914e-05, "loss": 0.6021, "step": 13679 }, { "epoch": 2.2331333414962655, "grad_norm": 3.0996382236480713, "learning_rate": 1.687346265586048e-05, "loss": 0.5979, "step": 13680 }, { "epoch": 2.23329660013877, "grad_norm": 2.9855399131774902, "learning_rate": 1.687299696160084e-05, "loss": 0.5644, "step": 13681 }, { "epoch": 2.2334598587812744, "grad_norm": 3.173274517059326, "learning_rate": 1.68725312390889e-05, "loss": 0.559, "step": 13682 }, { "epoch": 2.233623117423779, "grad_norm": 2.913140058517456, "learning_rate": 1.6872065488326585e-05, "loss": 0.5354, "step": 13683 }, { "epoch": 2.233786376066283, "grad_norm": 3.005371570587158, "learning_rate": 1.6871599709315795e-05, "loss": 0.5352, "step": 13684 }, { "epoch": 2.2339496347087873, "grad_norm": 2.6594114303588867, "learning_rate": 1.687113390205846e-05, "loss": 0.537, "step": 13685 }, { "epoch": 2.2341128933512917, "grad_norm": 3.100599527359009, "learning_rate": 1.6870668066556485e-05, "loss": 0.5789, "step": 13686 }, { "epoch": 2.234276151993796, "grad_norm": 2.8393545150756836, "learning_rate": 1.687020220281179e-05, "loss": 0.4992, "step": 13687 }, { "epoch": 2.2344394106363006, "grad_norm": 2.435542106628418, "learning_rate": 1.6869736310826286e-05, "loss": 0.4853, "step": 13688 }, { "epoch": 2.234602669278805, "grad_norm": 3.329164981842041, "learning_rate": 1.6869270390601887e-05, "loss": 0.6472, "step": 13689 }, { "epoch": 2.2347659279213095, "grad_norm": 2.7643091678619385, "learning_rate": 1.6868804442140517e-05, "loss": 0.521, "step": 13690 }, { "epoch": 2.234929186563814, "grad_norm": 2.6422853469848633, "learning_rate": 1.6868338465444086e-05, "loss": 0.5517, "step": 13691 }, { "epoch": 2.235092445206318, "grad_norm": 2.811944007873535, "learning_rate": 1.6867872460514502e-05, "loss": 0.4978, "step": 13692 }, { "epoch": 2.2352557038488223, "grad_norm": 2.8682968616485596, "learning_rate": 1.6867406427353696e-05, "loss": 0.5646, "step": 13693 }, { "epoch": 2.2354189624913268, "grad_norm": 3.411984443664551, "learning_rate": 1.686694036596357e-05, "loss": 0.5152, "step": 13694 }, { "epoch": 2.235582221133831, "grad_norm": 3.1207947731018066, "learning_rate": 1.6866474276346045e-05, "loss": 0.5709, "step": 13695 }, { "epoch": 2.2357454797763356, "grad_norm": 2.8616931438446045, "learning_rate": 1.686600815850304e-05, "loss": 0.4986, "step": 13696 }, { "epoch": 2.23590873841884, "grad_norm": 3.4247560501098633, "learning_rate": 1.6865542012436464e-05, "loss": 0.6148, "step": 13697 }, { "epoch": 2.2360719970613445, "grad_norm": 3.0626814365386963, "learning_rate": 1.6865075838148243e-05, "loss": 0.5779, "step": 13698 }, { "epoch": 2.236235255703849, "grad_norm": 2.680692434310913, "learning_rate": 1.6864609635640282e-05, "loss": 0.5357, "step": 13699 }, { "epoch": 2.2363985143463534, "grad_norm": 3.0341532230377197, "learning_rate": 1.6864143404914506e-05, "loss": 0.6036, "step": 13700 }, { "epoch": 2.236561772988858, "grad_norm": 3.5143816471099854, "learning_rate": 1.6863677145972826e-05, "loss": 0.7166, "step": 13701 }, { "epoch": 2.236725031631362, "grad_norm": 2.7141029834747314, "learning_rate": 1.686321085881716e-05, "loss": 0.4869, "step": 13702 }, { "epoch": 2.2368882902738663, "grad_norm": 2.518935203552246, "learning_rate": 1.686274454344943e-05, "loss": 0.5191, "step": 13703 }, { "epoch": 2.2370515489163707, "grad_norm": 2.863180637359619, "learning_rate": 1.686227819987155e-05, "loss": 0.5505, "step": 13704 }, { "epoch": 2.237214807558875, "grad_norm": 3.3141071796417236, "learning_rate": 1.686181182808543e-05, "loss": 0.6957, "step": 13705 }, { "epoch": 2.2373780662013796, "grad_norm": 2.5093514919281006, "learning_rate": 1.6861345428092993e-05, "loss": 0.5181, "step": 13706 }, { "epoch": 2.237541324843884, "grad_norm": 2.5732579231262207, "learning_rate": 1.6860878999896156e-05, "loss": 0.5286, "step": 13707 }, { "epoch": 2.2377045834863885, "grad_norm": 2.9969592094421387, "learning_rate": 1.686041254349684e-05, "loss": 0.6033, "step": 13708 }, { "epoch": 2.237867842128893, "grad_norm": 2.795923948287964, "learning_rate": 1.6859946058896956e-05, "loss": 0.6046, "step": 13709 }, { "epoch": 2.238031100771397, "grad_norm": 2.512888193130493, "learning_rate": 1.685947954609842e-05, "loss": 0.551, "step": 13710 }, { "epoch": 2.2381943594139013, "grad_norm": 2.7377030849456787, "learning_rate": 1.6859013005103158e-05, "loss": 0.5705, "step": 13711 }, { "epoch": 2.2383576180564058, "grad_norm": 2.693423271179199, "learning_rate": 1.6858546435913082e-05, "loss": 0.613, "step": 13712 }, { "epoch": 2.23852087669891, "grad_norm": 3.079726219177246, "learning_rate": 1.685807983853011e-05, "loss": 0.5963, "step": 13713 }, { "epoch": 2.2386841353414146, "grad_norm": 2.727827787399292, "learning_rate": 1.6857613212956167e-05, "loss": 0.5789, "step": 13714 }, { "epoch": 2.238847393983919, "grad_norm": 3.150259017944336, "learning_rate": 1.685714655919316e-05, "loss": 0.6584, "step": 13715 }, { "epoch": 2.2390106526264235, "grad_norm": 3.5469589233398438, "learning_rate": 1.6856679877243018e-05, "loss": 0.6957, "step": 13716 }, { "epoch": 2.239173911268928, "grad_norm": 2.860775947570801, "learning_rate": 1.6856213167107646e-05, "loss": 0.6146, "step": 13717 }, { "epoch": 2.2393371699114324, "grad_norm": 3.1953673362731934, "learning_rate": 1.6855746428788977e-05, "loss": 0.6836, "step": 13718 }, { "epoch": 2.2395004285539364, "grad_norm": 2.7649402618408203, "learning_rate": 1.6855279662288922e-05, "loss": 0.5591, "step": 13719 }, { "epoch": 2.239663687196441, "grad_norm": 2.831819772720337, "learning_rate": 1.68548128676094e-05, "loss": 0.5164, "step": 13720 }, { "epoch": 2.2398269458389453, "grad_norm": 3.0123302936553955, "learning_rate": 1.685434604475233e-05, "loss": 0.6614, "step": 13721 }, { "epoch": 2.2399902044814497, "grad_norm": 3.2181172370910645, "learning_rate": 1.685387919371963e-05, "loss": 0.671, "step": 13722 }, { "epoch": 2.240153463123954, "grad_norm": 3.1346426010131836, "learning_rate": 1.6853412314513223e-05, "loss": 0.5962, "step": 13723 }, { "epoch": 2.2403167217664586, "grad_norm": 3.161695718765259, "learning_rate": 1.6852945407135028e-05, "loss": 0.6441, "step": 13724 }, { "epoch": 2.240479980408963, "grad_norm": 2.5207486152648926, "learning_rate": 1.685247847158696e-05, "loss": 0.4932, "step": 13725 }, { "epoch": 2.2406432390514675, "grad_norm": 3.3615875244140625, "learning_rate": 1.685201150787094e-05, "loss": 0.6207, "step": 13726 }, { "epoch": 2.2408064976939714, "grad_norm": 2.802469253540039, "learning_rate": 1.685154451598889e-05, "loss": 0.5533, "step": 13727 }, { "epoch": 2.240969756336476, "grad_norm": 3.290860652923584, "learning_rate": 1.6851077495942726e-05, "loss": 0.6877, "step": 13728 }, { "epoch": 2.2411330149789803, "grad_norm": 2.8467605113983154, "learning_rate": 1.6850610447734367e-05, "loss": 0.4927, "step": 13729 }, { "epoch": 2.2412962736214848, "grad_norm": 2.746211290359497, "learning_rate": 1.6850143371365737e-05, "loss": 0.536, "step": 13730 }, { "epoch": 2.241459532263989, "grad_norm": 3.428354263305664, "learning_rate": 1.6849676266838756e-05, "loss": 0.6758, "step": 13731 }, { "epoch": 2.2416227909064936, "grad_norm": 3.575150728225708, "learning_rate": 1.684920913415534e-05, "loss": 0.6247, "step": 13732 }, { "epoch": 2.241786049548998, "grad_norm": 3.3926961421966553, "learning_rate": 1.6848741973317414e-05, "loss": 0.6286, "step": 13733 }, { "epoch": 2.2419493081915025, "grad_norm": 3.0831458568573, "learning_rate": 1.6848274784326893e-05, "loss": 0.5351, "step": 13734 }, { "epoch": 2.242112566834007, "grad_norm": 2.877976179122925, "learning_rate": 1.68478075671857e-05, "loss": 0.646, "step": 13735 }, { "epoch": 2.2422758254765114, "grad_norm": 3.1447830200195312, "learning_rate": 1.684734032189576e-05, "loss": 0.6507, "step": 13736 }, { "epoch": 2.2424390841190154, "grad_norm": 2.6275477409362793, "learning_rate": 1.6846873048458984e-05, "loss": 0.5718, "step": 13737 }, { "epoch": 2.24260234276152, "grad_norm": 3.1651322841644287, "learning_rate": 1.68464057468773e-05, "loss": 0.6181, "step": 13738 }, { "epoch": 2.2427656014040243, "grad_norm": 2.7803869247436523, "learning_rate": 1.684593841715263e-05, "loss": 0.5319, "step": 13739 }, { "epoch": 2.2429288600465287, "grad_norm": 3.0382978916168213, "learning_rate": 1.684547105928689e-05, "loss": 0.6286, "step": 13740 }, { "epoch": 2.243092118689033, "grad_norm": 3.009819507598877, "learning_rate": 1.6845003673282e-05, "loss": 0.5931, "step": 13741 }, { "epoch": 2.2432553773315376, "grad_norm": 3.11899995803833, "learning_rate": 1.684453625913989e-05, "loss": 0.7402, "step": 13742 }, { "epoch": 2.243418635974042, "grad_norm": 3.351630449295044, "learning_rate": 1.6844068816862473e-05, "loss": 0.6944, "step": 13743 }, { "epoch": 2.2435818946165464, "grad_norm": 2.786025047302246, "learning_rate": 1.6843601346451673e-05, "loss": 0.6111, "step": 13744 }, { "epoch": 2.2437451532590504, "grad_norm": 3.024996280670166, "learning_rate": 1.6843133847909415e-05, "loss": 0.6205, "step": 13745 }, { "epoch": 2.243908411901555, "grad_norm": 3.0308573246002197, "learning_rate": 1.6842666321237615e-05, "loss": 0.5885, "step": 13746 }, { "epoch": 2.2440716705440593, "grad_norm": 2.7326838970184326, "learning_rate": 1.6842198766438197e-05, "loss": 0.5283, "step": 13747 }, { "epoch": 2.2442349291865638, "grad_norm": 2.852968692779541, "learning_rate": 1.6841731183513086e-05, "loss": 0.5551, "step": 13748 }, { "epoch": 2.244398187829068, "grad_norm": 2.822249174118042, "learning_rate": 1.68412635724642e-05, "loss": 0.4982, "step": 13749 }, { "epoch": 2.2445614464715726, "grad_norm": 3.33921217918396, "learning_rate": 1.6840795933293464e-05, "loss": 0.5801, "step": 13750 }, { "epoch": 2.244724705114077, "grad_norm": 2.689553737640381, "learning_rate": 1.6840328266002798e-05, "loss": 0.5522, "step": 13751 }, { "epoch": 2.2448879637565815, "grad_norm": 2.843365430831909, "learning_rate": 1.6839860570594124e-05, "loss": 0.5351, "step": 13752 }, { "epoch": 2.245051222399086, "grad_norm": 2.402888536453247, "learning_rate": 1.6839392847069368e-05, "loss": 0.5768, "step": 13753 }, { "epoch": 2.24521448104159, "grad_norm": 2.6497976779937744, "learning_rate": 1.683892509543045e-05, "loss": 0.5232, "step": 13754 }, { "epoch": 2.2453777396840944, "grad_norm": 2.7529191970825195, "learning_rate": 1.683845731567929e-05, "loss": 0.5033, "step": 13755 }, { "epoch": 2.245540998326599, "grad_norm": 2.384549856185913, "learning_rate": 1.6837989507817818e-05, "loss": 0.4845, "step": 13756 }, { "epoch": 2.2457042569691033, "grad_norm": 2.599411964416504, "learning_rate": 1.6837521671847953e-05, "loss": 0.5278, "step": 13757 }, { "epoch": 2.2458675156116077, "grad_norm": 3.241002321243286, "learning_rate": 1.6837053807771618e-05, "loss": 0.6262, "step": 13758 }, { "epoch": 2.246030774254112, "grad_norm": 2.6874544620513916, "learning_rate": 1.6836585915590733e-05, "loss": 0.5803, "step": 13759 }, { "epoch": 2.2461940328966166, "grad_norm": 3.1474666595458984, "learning_rate": 1.6836117995307226e-05, "loss": 0.6909, "step": 13760 }, { "epoch": 2.246357291539121, "grad_norm": 3.472987174987793, "learning_rate": 1.6835650046923022e-05, "loss": 0.9134, "step": 13761 }, { "epoch": 2.2465205501816254, "grad_norm": 2.8833391666412354, "learning_rate": 1.683518207044004e-05, "loss": 0.5484, "step": 13762 }, { "epoch": 2.2466838088241294, "grad_norm": 2.7861969470977783, "learning_rate": 1.6834714065860208e-05, "loss": 0.5221, "step": 13763 }, { "epoch": 2.246847067466634, "grad_norm": 2.9614784717559814, "learning_rate": 1.6834246033185444e-05, "loss": 0.6095, "step": 13764 }, { "epoch": 2.2470103261091383, "grad_norm": 3.658255100250244, "learning_rate": 1.6833777972417674e-05, "loss": 0.7276, "step": 13765 }, { "epoch": 2.2471735847516427, "grad_norm": 2.6303701400756836, "learning_rate": 1.683330988355883e-05, "loss": 0.4663, "step": 13766 }, { "epoch": 2.247336843394147, "grad_norm": 3.2939794063568115, "learning_rate": 1.6832841766610823e-05, "loss": 0.6328, "step": 13767 }, { "epoch": 2.2475001020366516, "grad_norm": 3.1980085372924805, "learning_rate": 1.6832373621575583e-05, "loss": 0.6472, "step": 13768 }, { "epoch": 2.247663360679156, "grad_norm": 3.79606556892395, "learning_rate": 1.6831905448455035e-05, "loss": 0.6974, "step": 13769 }, { "epoch": 2.2478266193216605, "grad_norm": 2.9764673709869385, "learning_rate": 1.6831437247251107e-05, "loss": 0.6642, "step": 13770 }, { "epoch": 2.247989877964165, "grad_norm": 2.767328977584839, "learning_rate": 1.683096901796572e-05, "loss": 0.5252, "step": 13771 }, { "epoch": 2.248153136606669, "grad_norm": 2.8151421546936035, "learning_rate": 1.6830500760600798e-05, "loss": 0.5835, "step": 13772 }, { "epoch": 2.2483163952491734, "grad_norm": 3.3492257595062256, "learning_rate": 1.6830032475158264e-05, "loss": 0.7117, "step": 13773 }, { "epoch": 2.248479653891678, "grad_norm": 2.641035795211792, "learning_rate": 1.6829564161640047e-05, "loss": 0.5191, "step": 13774 }, { "epoch": 2.2486429125341822, "grad_norm": 3.8290836811065674, "learning_rate": 1.682909582004807e-05, "loss": 0.7435, "step": 13775 }, { "epoch": 2.2488061711766867, "grad_norm": 2.815354824066162, "learning_rate": 1.6828627450384264e-05, "loss": 0.531, "step": 13776 }, { "epoch": 2.248969429819191, "grad_norm": 3.29402756690979, "learning_rate": 1.6828159052650542e-05, "loss": 0.5802, "step": 13777 }, { "epoch": 2.2491326884616956, "grad_norm": 3.111215114593506, "learning_rate": 1.6827690626848837e-05, "loss": 0.683, "step": 13778 }, { "epoch": 2.2492959471042, "grad_norm": 2.76237416267395, "learning_rate": 1.6827222172981076e-05, "loss": 0.5599, "step": 13779 }, { "epoch": 2.249459205746704, "grad_norm": 2.739659070968628, "learning_rate": 1.682675369104918e-05, "loss": 0.5649, "step": 13780 }, { "epoch": 2.2496224643892084, "grad_norm": 2.7441186904907227, "learning_rate": 1.6826285181055082e-05, "loss": 0.538, "step": 13781 }, { "epoch": 2.249785723031713, "grad_norm": 2.5084214210510254, "learning_rate": 1.6825816643000702e-05, "loss": 0.475, "step": 13782 }, { "epoch": 2.2499489816742173, "grad_norm": 2.9012234210968018, "learning_rate": 1.6825348076887963e-05, "loss": 0.5195, "step": 13783 }, { "epoch": 2.2501122403167217, "grad_norm": 2.9679629802703857, "learning_rate": 1.68248794827188e-05, "loss": 0.6308, "step": 13784 }, { "epoch": 2.250275498959226, "grad_norm": 2.4590532779693604, "learning_rate": 1.682441086049513e-05, "loss": 0.468, "step": 13785 }, { "epoch": 2.2504387576017306, "grad_norm": 3.2233505249023438, "learning_rate": 1.6823942210218888e-05, "loss": 0.7249, "step": 13786 }, { "epoch": 2.250602016244235, "grad_norm": 3.386469602584839, "learning_rate": 1.6823473531891997e-05, "loss": 0.668, "step": 13787 }, { "epoch": 2.2507652748867395, "grad_norm": 2.682084321975708, "learning_rate": 1.682300482551638e-05, "loss": 0.5657, "step": 13788 }, { "epoch": 2.250928533529244, "grad_norm": 2.6526126861572266, "learning_rate": 1.6822536091093967e-05, "loss": 0.516, "step": 13789 }, { "epoch": 2.251091792171748, "grad_norm": 2.627397298812866, "learning_rate": 1.6822067328626682e-05, "loss": 0.5981, "step": 13790 }, { "epoch": 2.2512550508142524, "grad_norm": 2.923495054244995, "learning_rate": 1.682159853811646e-05, "loss": 0.6497, "step": 13791 }, { "epoch": 2.251418309456757, "grad_norm": 2.512478828430176, "learning_rate": 1.6821129719565215e-05, "loss": 0.4435, "step": 13792 }, { "epoch": 2.2515815680992612, "grad_norm": 2.6759397983551025, "learning_rate": 1.6820660872974887e-05, "loss": 0.514, "step": 13793 }, { "epoch": 2.2517448267417657, "grad_norm": 3.096780776977539, "learning_rate": 1.6820191998347395e-05, "loss": 0.7098, "step": 13794 }, { "epoch": 2.25190808538427, "grad_norm": 2.472238540649414, "learning_rate": 1.6819723095684667e-05, "loss": 0.5409, "step": 13795 }, { "epoch": 2.2520713440267746, "grad_norm": 3.2439398765563965, "learning_rate": 1.6819254164988636e-05, "loss": 0.7521, "step": 13796 }, { "epoch": 2.252234602669279, "grad_norm": 3.3307223320007324, "learning_rate": 1.6818785206261225e-05, "loss": 0.7014, "step": 13797 }, { "epoch": 2.252397861311783, "grad_norm": 2.726857900619507, "learning_rate": 1.6818316219504365e-05, "loss": 0.5842, "step": 13798 }, { "epoch": 2.2525611199542874, "grad_norm": 3.051913022994995, "learning_rate": 1.6817847204719978e-05, "loss": 0.6823, "step": 13799 }, { "epoch": 2.252724378596792, "grad_norm": 2.822838544845581, "learning_rate": 1.6817378161909995e-05, "loss": 0.5314, "step": 13800 }, { "epoch": 2.2528876372392963, "grad_norm": 3.0369794368743896, "learning_rate": 1.6816909091076346e-05, "loss": 0.6625, "step": 13801 }, { "epoch": 2.2530508958818007, "grad_norm": 2.464242696762085, "learning_rate": 1.681643999222096e-05, "loss": 0.5436, "step": 13802 }, { "epoch": 2.253214154524305, "grad_norm": 2.455404758453369, "learning_rate": 1.681597086534576e-05, "loss": 0.4998, "step": 13803 }, { "epoch": 2.2533774131668096, "grad_norm": 2.8845560550689697, "learning_rate": 1.681550171045268e-05, "loss": 0.5913, "step": 13804 }, { "epoch": 2.253540671809314, "grad_norm": 2.6887712478637695, "learning_rate": 1.6815032527543644e-05, "loss": 0.5795, "step": 13805 }, { "epoch": 2.2537039304518185, "grad_norm": 2.5244388580322266, "learning_rate": 1.6814563316620586e-05, "loss": 0.5322, "step": 13806 }, { "epoch": 2.2538671890943225, "grad_norm": 2.5169668197631836, "learning_rate": 1.6814094077685426e-05, "loss": 0.5594, "step": 13807 }, { "epoch": 2.254030447736827, "grad_norm": 3.016918897628784, "learning_rate": 1.68136248107401e-05, "loss": 0.5621, "step": 13808 }, { "epoch": 2.2541937063793314, "grad_norm": 3.197925567626953, "learning_rate": 1.6813155515786538e-05, "loss": 0.7752, "step": 13809 }, { "epoch": 2.254356965021836, "grad_norm": 2.606905698776245, "learning_rate": 1.6812686192826665e-05, "loss": 0.5185, "step": 13810 }, { "epoch": 2.2545202236643402, "grad_norm": 2.899306297302246, "learning_rate": 1.681221684186241e-05, "loss": 0.5788, "step": 13811 }, { "epoch": 2.2546834823068447, "grad_norm": 2.4631855487823486, "learning_rate": 1.68117474628957e-05, "loss": 0.4437, "step": 13812 }, { "epoch": 2.254846740949349, "grad_norm": 2.8787453174591064, "learning_rate": 1.6811278055928477e-05, "loss": 0.4883, "step": 13813 }, { "epoch": 2.2550099995918536, "grad_norm": 2.572171211242676, "learning_rate": 1.6810808620962656e-05, "loss": 0.4951, "step": 13814 }, { "epoch": 2.2551732582343575, "grad_norm": 3.1757242679595947, "learning_rate": 1.6810339158000177e-05, "loss": 0.6455, "step": 13815 }, { "epoch": 2.255336516876862, "grad_norm": 2.5370333194732666, "learning_rate": 1.680986966704296e-05, "loss": 0.5118, "step": 13816 }, { "epoch": 2.2554997755193664, "grad_norm": 3.1319499015808105, "learning_rate": 1.6809400148092942e-05, "loss": 0.6441, "step": 13817 }, { "epoch": 2.255663034161871, "grad_norm": 2.9046876430511475, "learning_rate": 1.6808930601152052e-05, "loss": 0.5147, "step": 13818 }, { "epoch": 2.2558262928043753, "grad_norm": 3.438544511795044, "learning_rate": 1.680846102622222e-05, "loss": 0.6711, "step": 13819 }, { "epoch": 2.2559895514468797, "grad_norm": 3.0966432094573975, "learning_rate": 1.6807991423305374e-05, "loss": 0.5678, "step": 13820 }, { "epoch": 2.256152810089384, "grad_norm": 2.9440970420837402, "learning_rate": 1.6807521792403444e-05, "loss": 0.4936, "step": 13821 }, { "epoch": 2.2563160687318886, "grad_norm": 2.613349437713623, "learning_rate": 1.680705213351837e-05, "loss": 0.5164, "step": 13822 }, { "epoch": 2.256479327374393, "grad_norm": 3.1220147609710693, "learning_rate": 1.6806582446652067e-05, "loss": 0.6785, "step": 13823 }, { "epoch": 2.2566425860168975, "grad_norm": 2.1399149894714355, "learning_rate": 1.6806112731806476e-05, "loss": 0.4103, "step": 13824 }, { "epoch": 2.2568058446594015, "grad_norm": 2.363523244857788, "learning_rate": 1.6805642988983525e-05, "loss": 0.4513, "step": 13825 }, { "epoch": 2.256969103301906, "grad_norm": 2.9990193843841553, "learning_rate": 1.6805173218185146e-05, "loss": 0.6517, "step": 13826 }, { "epoch": 2.2571323619444104, "grad_norm": 2.4122121334075928, "learning_rate": 1.680470341941327e-05, "loss": 0.5767, "step": 13827 }, { "epoch": 2.257295620586915, "grad_norm": 2.951124429702759, "learning_rate": 1.680423359266983e-05, "loss": 0.6318, "step": 13828 }, { "epoch": 2.2574588792294192, "grad_norm": 3.1415674686431885, "learning_rate": 1.6803763737956753e-05, "loss": 0.5436, "step": 13829 }, { "epoch": 2.2576221378719237, "grad_norm": 2.9545652866363525, "learning_rate": 1.680329385527597e-05, "loss": 0.6193, "step": 13830 }, { "epoch": 2.257785396514428, "grad_norm": 2.717071294784546, "learning_rate": 1.6802823944629416e-05, "loss": 0.5855, "step": 13831 }, { "epoch": 2.2579486551569325, "grad_norm": 3.2583539485931396, "learning_rate": 1.680235400601902e-05, "loss": 0.5694, "step": 13832 }, { "epoch": 2.2581119137994365, "grad_norm": 2.565192699432373, "learning_rate": 1.680188403944672e-05, "loss": 0.4757, "step": 13833 }, { "epoch": 2.258275172441941, "grad_norm": 3.190037250518799, "learning_rate": 1.680141404491444e-05, "loss": 0.6744, "step": 13834 }, { "epoch": 2.2584384310844454, "grad_norm": 2.5308423042297363, "learning_rate": 1.6800944022424114e-05, "loss": 0.4572, "step": 13835 }, { "epoch": 2.25860168972695, "grad_norm": 2.7065012454986572, "learning_rate": 1.680047397197768e-05, "loss": 0.5039, "step": 13836 }, { "epoch": 2.2587649483694543, "grad_norm": 2.623307466506958, "learning_rate": 1.680000389357706e-05, "loss": 0.5433, "step": 13837 }, { "epoch": 2.2589282070119587, "grad_norm": 2.5298311710357666, "learning_rate": 1.6799533787224192e-05, "loss": 0.5159, "step": 13838 }, { "epoch": 2.259091465654463, "grad_norm": 2.98533296585083, "learning_rate": 1.679906365292101e-05, "loss": 0.612, "step": 13839 }, { "epoch": 2.2592547242969676, "grad_norm": 3.2682619094848633, "learning_rate": 1.6798593490669445e-05, "loss": 0.6401, "step": 13840 }, { "epoch": 2.259417982939472, "grad_norm": 2.837442398071289, "learning_rate": 1.6798123300471427e-05, "loss": 0.5519, "step": 13841 }, { "epoch": 2.2595812415819765, "grad_norm": 2.9461376667022705, "learning_rate": 1.6797653082328894e-05, "loss": 0.569, "step": 13842 }, { "epoch": 2.2597445002244805, "grad_norm": 3.245290994644165, "learning_rate": 1.6797182836243773e-05, "loss": 0.6024, "step": 13843 }, { "epoch": 2.259907758866985, "grad_norm": 2.7785356044769287, "learning_rate": 1.6796712562218004e-05, "loss": 0.5803, "step": 13844 }, { "epoch": 2.2600710175094894, "grad_norm": 2.852635622024536, "learning_rate": 1.679624226025351e-05, "loss": 0.5611, "step": 13845 }, { "epoch": 2.260234276151994, "grad_norm": 2.793168067932129, "learning_rate": 1.6795771930352238e-05, "loss": 0.5528, "step": 13846 }, { "epoch": 2.2603975347944982, "grad_norm": 2.8643088340759277, "learning_rate": 1.6795301572516106e-05, "loss": 0.5679, "step": 13847 }, { "epoch": 2.2605607934370027, "grad_norm": 3.164961338043213, "learning_rate": 1.679483118674706e-05, "loss": 0.7205, "step": 13848 }, { "epoch": 2.260724052079507, "grad_norm": 2.7067322731018066, "learning_rate": 1.679436077304703e-05, "loss": 0.539, "step": 13849 }, { "epoch": 2.260887310722011, "grad_norm": 3.3451156616210938, "learning_rate": 1.6793890331417942e-05, "loss": 0.5871, "step": 13850 }, { "epoch": 2.2610505693645155, "grad_norm": 2.235898017883301, "learning_rate": 1.679341986186174e-05, "loss": 0.5179, "step": 13851 }, { "epoch": 2.26121382800702, "grad_norm": 2.5083274841308594, "learning_rate": 1.6792949364380354e-05, "loss": 0.534, "step": 13852 }, { "epoch": 2.2613770866495244, "grad_norm": 3.3618881702423096, "learning_rate": 1.6792478838975718e-05, "loss": 0.6366, "step": 13853 }, { "epoch": 2.261540345292029, "grad_norm": 2.4960358142852783, "learning_rate": 1.6792008285649766e-05, "loss": 0.5419, "step": 13854 }, { "epoch": 2.2617036039345333, "grad_norm": 2.362539768218994, "learning_rate": 1.679153770440443e-05, "loss": 0.4801, "step": 13855 }, { "epoch": 2.2618668625770377, "grad_norm": 3.0761520862579346, "learning_rate": 1.6791067095241654e-05, "loss": 0.6328, "step": 13856 }, { "epoch": 2.262030121219542, "grad_norm": 2.577059268951416, "learning_rate": 1.6790596458163357e-05, "loss": 0.4453, "step": 13857 }, { "epoch": 2.2621933798620466, "grad_norm": 3.7013099193573, "learning_rate": 1.6790125793171486e-05, "loss": 0.719, "step": 13858 }, { "epoch": 2.262356638504551, "grad_norm": 2.6591076850891113, "learning_rate": 1.6789655100267972e-05, "loss": 0.5042, "step": 13859 }, { "epoch": 2.262519897147055, "grad_norm": 2.978466749191284, "learning_rate": 1.678918437945475e-05, "loss": 0.6004, "step": 13860 }, { "epoch": 2.2626831557895595, "grad_norm": 2.366600513458252, "learning_rate": 1.6788713630733756e-05, "loss": 0.5324, "step": 13861 }, { "epoch": 2.262846414432064, "grad_norm": 2.5359115600585938, "learning_rate": 1.678824285410692e-05, "loss": 0.4675, "step": 13862 }, { "epoch": 2.2630096730745684, "grad_norm": 2.8933351039886475, "learning_rate": 1.6787772049576183e-05, "loss": 0.6336, "step": 13863 }, { "epoch": 2.263172931717073, "grad_norm": 2.4094388484954834, "learning_rate": 1.678730121714348e-05, "loss": 0.4924, "step": 13864 }, { "epoch": 2.2633361903595772, "grad_norm": 2.991633892059326, "learning_rate": 1.678683035681074e-05, "loss": 0.5545, "step": 13865 }, { "epoch": 2.2634994490020817, "grad_norm": 2.83097767829895, "learning_rate": 1.6786359468579907e-05, "loss": 0.5487, "step": 13866 }, { "epoch": 2.263662707644586, "grad_norm": 3.031426191329956, "learning_rate": 1.6785888552452908e-05, "loss": 0.5892, "step": 13867 }, { "epoch": 2.26382596628709, "grad_norm": 2.994399309158325, "learning_rate": 1.6785417608431688e-05, "loss": 0.6146, "step": 13868 }, { "epoch": 2.2639892249295945, "grad_norm": 2.574585437774658, "learning_rate": 1.6784946636518177e-05, "loss": 0.5014, "step": 13869 }, { "epoch": 2.264152483572099, "grad_norm": 2.724961757659912, "learning_rate": 1.6784475636714314e-05, "loss": 0.568, "step": 13870 }, { "epoch": 2.2643157422146034, "grad_norm": 2.6487932205200195, "learning_rate": 1.678400460902203e-05, "loss": 0.5363, "step": 13871 }, { "epoch": 2.264479000857108, "grad_norm": 2.542227029800415, "learning_rate": 1.6783533553443264e-05, "loss": 0.5556, "step": 13872 }, { "epoch": 2.2646422594996123, "grad_norm": 2.889812707901001, "learning_rate": 1.6783062469979956e-05, "loss": 0.6228, "step": 13873 }, { "epoch": 2.2648055181421167, "grad_norm": 2.80519437789917, "learning_rate": 1.678259135863404e-05, "loss": 0.4884, "step": 13874 }, { "epoch": 2.264968776784621, "grad_norm": 2.9950616359710693, "learning_rate": 1.678212021940745e-05, "loss": 0.7202, "step": 13875 }, { "epoch": 2.2651320354271256, "grad_norm": 2.7340087890625, "learning_rate": 1.6781649052302126e-05, "loss": 0.6088, "step": 13876 }, { "epoch": 2.26529529406963, "grad_norm": 3.043034315109253, "learning_rate": 1.678117785732e-05, "loss": 0.6487, "step": 13877 }, { "epoch": 2.265458552712134, "grad_norm": 3.050919532775879, "learning_rate": 1.6780706634463016e-05, "loss": 0.6071, "step": 13878 }, { "epoch": 2.2656218113546385, "grad_norm": 3.2970192432403564, "learning_rate": 1.6780235383733106e-05, "loss": 0.6083, "step": 13879 }, { "epoch": 2.265785069997143, "grad_norm": 3.2913818359375, "learning_rate": 1.677976410513221e-05, "loss": 0.5737, "step": 13880 }, { "epoch": 2.2659483286396473, "grad_norm": 2.785322427749634, "learning_rate": 1.6779292798662262e-05, "loss": 0.6039, "step": 13881 }, { "epoch": 2.266111587282152, "grad_norm": 2.6473073959350586, "learning_rate": 1.6778821464325203e-05, "loss": 0.5971, "step": 13882 }, { "epoch": 2.2662748459246562, "grad_norm": 2.6675307750701904, "learning_rate": 1.6778350102122966e-05, "loss": 0.5124, "step": 13883 }, { "epoch": 2.2664381045671607, "grad_norm": 2.600759744644165, "learning_rate": 1.6777878712057492e-05, "loss": 0.502, "step": 13884 }, { "epoch": 2.266601363209665, "grad_norm": 2.9392919540405273, "learning_rate": 1.677740729413072e-05, "loss": 0.5755, "step": 13885 }, { "epoch": 2.266764621852169, "grad_norm": 2.888045072555542, "learning_rate": 1.6776935848344586e-05, "loss": 0.5658, "step": 13886 }, { "epoch": 2.2669278804946735, "grad_norm": 2.942110061645508, "learning_rate": 1.6776464374701026e-05, "loss": 0.5246, "step": 13887 }, { "epoch": 2.267091139137178, "grad_norm": 3.07873272895813, "learning_rate": 1.6775992873201977e-05, "loss": 0.6582, "step": 13888 }, { "epoch": 2.2672543977796824, "grad_norm": 2.6683237552642822, "learning_rate": 1.6775521343849382e-05, "loss": 0.5074, "step": 13889 }, { "epoch": 2.267417656422187, "grad_norm": 3.006699562072754, "learning_rate": 1.677504978664518e-05, "loss": 0.6266, "step": 13890 }, { "epoch": 2.2675809150646913, "grad_norm": 3.730762004852295, "learning_rate": 1.6774578201591304e-05, "loss": 0.6059, "step": 13891 }, { "epoch": 2.2677441737071957, "grad_norm": 2.8870925903320312, "learning_rate": 1.6774106588689693e-05, "loss": 0.5453, "step": 13892 }, { "epoch": 2.2679074323497, "grad_norm": 2.9481630325317383, "learning_rate": 1.677363494794229e-05, "loss": 0.5641, "step": 13893 }, { "epoch": 2.2680706909922046, "grad_norm": 2.760806083679199, "learning_rate": 1.6773163279351035e-05, "loss": 0.5778, "step": 13894 }, { "epoch": 2.268233949634709, "grad_norm": 3.2556464672088623, "learning_rate": 1.6772691582917857e-05, "loss": 0.6151, "step": 13895 }, { "epoch": 2.268397208277213, "grad_norm": 2.834883689880371, "learning_rate": 1.6772219858644706e-05, "loss": 0.6193, "step": 13896 }, { "epoch": 2.2685604669197175, "grad_norm": 2.592611789703369, "learning_rate": 1.677174810653351e-05, "loss": 0.4831, "step": 13897 }, { "epoch": 2.268723725562222, "grad_norm": 3.758427858352661, "learning_rate": 1.6771276326586222e-05, "loss": 0.8096, "step": 13898 }, { "epoch": 2.2688869842047263, "grad_norm": 2.7113547325134277, "learning_rate": 1.677080451880477e-05, "loss": 0.5006, "step": 13899 }, { "epoch": 2.269050242847231, "grad_norm": 2.8101539611816406, "learning_rate": 1.67703326831911e-05, "loss": 0.5538, "step": 13900 }, { "epoch": 2.269213501489735, "grad_norm": 2.787334442138672, "learning_rate": 1.6769860819747144e-05, "loss": 0.5326, "step": 13901 }, { "epoch": 2.2693767601322397, "grad_norm": 3.3409860134124756, "learning_rate": 1.6769388928474846e-05, "loss": 0.6678, "step": 13902 }, { "epoch": 2.2695400187747437, "grad_norm": 3.2008321285247803, "learning_rate": 1.676891700937615e-05, "loss": 0.5479, "step": 13903 }, { "epoch": 2.269703277417248, "grad_norm": 2.709949016571045, "learning_rate": 1.6768445062452993e-05, "loss": 0.5945, "step": 13904 }, { "epoch": 2.2698665360597525, "grad_norm": 2.62648868560791, "learning_rate": 1.676797308770731e-05, "loss": 0.533, "step": 13905 }, { "epoch": 2.270029794702257, "grad_norm": 2.9239230155944824, "learning_rate": 1.6767501085141046e-05, "loss": 0.5779, "step": 13906 }, { "epoch": 2.2701930533447614, "grad_norm": 2.5069353580474854, "learning_rate": 1.6767029054756143e-05, "loss": 0.5332, "step": 13907 }, { "epoch": 2.270356311987266, "grad_norm": 3.6004276275634766, "learning_rate": 1.676655699655454e-05, "loss": 0.6578, "step": 13908 }, { "epoch": 2.2705195706297703, "grad_norm": 3.7604150772094727, "learning_rate": 1.6766084910538173e-05, "loss": 0.7737, "step": 13909 }, { "epoch": 2.2706828292722747, "grad_norm": 2.316282272338867, "learning_rate": 1.6765612796708986e-05, "loss": 0.4831, "step": 13910 }, { "epoch": 2.270846087914779, "grad_norm": 3.4512014389038086, "learning_rate": 1.676514065506892e-05, "loss": 0.7215, "step": 13911 }, { "epoch": 2.2710093465572836, "grad_norm": 2.748257637023926, "learning_rate": 1.6764668485619914e-05, "loss": 0.6135, "step": 13912 }, { "epoch": 2.2711726051997876, "grad_norm": 3.2880661487579346, "learning_rate": 1.676419628836391e-05, "loss": 0.6193, "step": 13913 }, { "epoch": 2.271335863842292, "grad_norm": 3.113572359085083, "learning_rate": 1.6763724063302854e-05, "loss": 0.5793, "step": 13914 }, { "epoch": 2.2714991224847965, "grad_norm": 2.3839359283447266, "learning_rate": 1.676325181043868e-05, "loss": 0.5565, "step": 13915 }, { "epoch": 2.271662381127301, "grad_norm": 2.6235082149505615, "learning_rate": 1.676277952977333e-05, "loss": 0.5524, "step": 13916 }, { "epoch": 2.2718256397698053, "grad_norm": 3.2378218173980713, "learning_rate": 1.6762307221308743e-05, "loss": 0.6453, "step": 13917 }, { "epoch": 2.2719888984123098, "grad_norm": 2.4849557876586914, "learning_rate": 1.676183488504687e-05, "loss": 0.3888, "step": 13918 }, { "epoch": 2.272152157054814, "grad_norm": 3.4314143657684326, "learning_rate": 1.6761362520989645e-05, "loss": 0.5879, "step": 13919 }, { "epoch": 2.2723154156973187, "grad_norm": 3.2239723205566406, "learning_rate": 1.6760890129139012e-05, "loss": 0.6685, "step": 13920 }, { "epoch": 2.2724786743398226, "grad_norm": 3.08925199508667, "learning_rate": 1.6760417709496914e-05, "loss": 0.5548, "step": 13921 }, { "epoch": 2.272641932982327, "grad_norm": 3.267838954925537, "learning_rate": 1.675994526206529e-05, "loss": 0.7426, "step": 13922 }, { "epoch": 2.2728051916248315, "grad_norm": 2.5059287548065186, "learning_rate": 1.6759472786846087e-05, "loss": 0.4602, "step": 13923 }, { "epoch": 2.272968450267336, "grad_norm": 2.758918046951294, "learning_rate": 1.675900028384124e-05, "loss": 0.6004, "step": 13924 }, { "epoch": 2.2731317089098404, "grad_norm": 2.5139307975769043, "learning_rate": 1.6758527753052693e-05, "loss": 0.4841, "step": 13925 }, { "epoch": 2.273294967552345, "grad_norm": 2.604421377182007, "learning_rate": 1.6758055194482395e-05, "loss": 0.4571, "step": 13926 }, { "epoch": 2.2734582261948493, "grad_norm": 3.0419626235961914, "learning_rate": 1.6757582608132283e-05, "loss": 0.696, "step": 13927 }, { "epoch": 2.2736214848373537, "grad_norm": 2.805880308151245, "learning_rate": 1.6757109994004298e-05, "loss": 0.5088, "step": 13928 }, { "epoch": 2.273784743479858, "grad_norm": 2.885073184967041, "learning_rate": 1.6756637352100388e-05, "loss": 0.6156, "step": 13929 }, { "epoch": 2.2739480021223626, "grad_norm": 3.32953143119812, "learning_rate": 1.675616468242249e-05, "loss": 0.6394, "step": 13930 }, { "epoch": 2.2741112607648666, "grad_norm": 2.9928131103515625, "learning_rate": 1.6755691984972552e-05, "loss": 0.5811, "step": 13931 }, { "epoch": 2.274274519407371, "grad_norm": 2.5566625595092773, "learning_rate": 1.6755219259752517e-05, "loss": 0.5117, "step": 13932 }, { "epoch": 2.2744377780498755, "grad_norm": 2.551591634750366, "learning_rate": 1.675474650676432e-05, "loss": 0.562, "step": 13933 }, { "epoch": 2.27460103669238, "grad_norm": 2.7694313526153564, "learning_rate": 1.675427372600992e-05, "loss": 0.498, "step": 13934 }, { "epoch": 2.2747642953348843, "grad_norm": 2.5387022495269775, "learning_rate": 1.6753800917491244e-05, "loss": 0.4968, "step": 13935 }, { "epoch": 2.2749275539773888, "grad_norm": 2.7429630756378174, "learning_rate": 1.6753328081210244e-05, "loss": 0.5371, "step": 13936 }, { "epoch": 2.275090812619893, "grad_norm": 3.2218079566955566, "learning_rate": 1.6752855217168863e-05, "loss": 0.5541, "step": 13937 }, { "epoch": 2.275254071262397, "grad_norm": 3.0149505138397217, "learning_rate": 1.6752382325369048e-05, "loss": 0.6342, "step": 13938 }, { "epoch": 2.2754173299049016, "grad_norm": 2.4800920486450195, "learning_rate": 1.6751909405812736e-05, "loss": 0.4771, "step": 13939 }, { "epoch": 2.275580588547406, "grad_norm": 2.586897134780884, "learning_rate": 1.675143645850187e-05, "loss": 0.4632, "step": 13940 }, { "epoch": 2.2757438471899105, "grad_norm": 2.972412109375, "learning_rate": 1.67509634834384e-05, "loss": 0.5498, "step": 13941 }, { "epoch": 2.275907105832415, "grad_norm": 2.9393441677093506, "learning_rate": 1.675049048062427e-05, "loss": 0.5715, "step": 13942 }, { "epoch": 2.2760703644749194, "grad_norm": 3.146555185317993, "learning_rate": 1.6750017450061423e-05, "loss": 0.6613, "step": 13943 }, { "epoch": 2.276233623117424, "grad_norm": 2.631216049194336, "learning_rate": 1.67495443917518e-05, "loss": 0.5586, "step": 13944 }, { "epoch": 2.2763968817599283, "grad_norm": 2.729879379272461, "learning_rate": 1.6749071305697354e-05, "loss": 0.5606, "step": 13945 }, { "epoch": 2.2765601404024327, "grad_norm": 3.260209083557129, "learning_rate": 1.674859819190002e-05, "loss": 0.5931, "step": 13946 }, { "epoch": 2.276723399044937, "grad_norm": 2.65799880027771, "learning_rate": 1.6748125050361745e-05, "loss": 0.618, "step": 13947 }, { "epoch": 2.276886657687441, "grad_norm": 3.3584511280059814, "learning_rate": 1.674765188108448e-05, "loss": 0.5536, "step": 13948 }, { "epoch": 2.2770499163299456, "grad_norm": 2.881044864654541, "learning_rate": 1.6747178684070165e-05, "loss": 0.5438, "step": 13949 }, { "epoch": 2.27721317497245, "grad_norm": 2.4279978275299072, "learning_rate": 1.6746705459320746e-05, "loss": 0.4483, "step": 13950 }, { "epoch": 2.2773764336149545, "grad_norm": 2.6050610542297363, "learning_rate": 1.6746232206838168e-05, "loss": 0.5481, "step": 13951 }, { "epoch": 2.277539692257459, "grad_norm": 3.3151235580444336, "learning_rate": 1.6745758926624374e-05, "loss": 0.6158, "step": 13952 }, { "epoch": 2.2777029508999633, "grad_norm": 2.443978786468506, "learning_rate": 1.6745285618681317e-05, "loss": 0.495, "step": 13953 }, { "epoch": 2.2778662095424678, "grad_norm": 2.7794084548950195, "learning_rate": 1.6744812283010935e-05, "loss": 0.4942, "step": 13954 }, { "epoch": 2.278029468184972, "grad_norm": 2.466710090637207, "learning_rate": 1.6744338919615176e-05, "loss": 0.4672, "step": 13955 }, { "epoch": 2.278192726827476, "grad_norm": 3.255476713180542, "learning_rate": 1.674386552849599e-05, "loss": 0.7785, "step": 13956 }, { "epoch": 2.2783559854699806, "grad_norm": 2.7540743350982666, "learning_rate": 1.6743392109655318e-05, "loss": 0.6179, "step": 13957 }, { "epoch": 2.278519244112485, "grad_norm": 2.8934803009033203, "learning_rate": 1.6742918663095106e-05, "loss": 0.5406, "step": 13958 }, { "epoch": 2.2786825027549895, "grad_norm": 3.070301055908203, "learning_rate": 1.6742445188817302e-05, "loss": 0.6689, "step": 13959 }, { "epoch": 2.278845761397494, "grad_norm": 3.120593786239624, "learning_rate": 1.674197168682385e-05, "loss": 0.6412, "step": 13960 }, { "epoch": 2.2790090200399984, "grad_norm": 2.6791348457336426, "learning_rate": 1.67414981571167e-05, "loss": 0.5019, "step": 13961 }, { "epoch": 2.279172278682503, "grad_norm": 2.8684840202331543, "learning_rate": 1.67410245996978e-05, "loss": 0.5904, "step": 13962 }, { "epoch": 2.2793355373250073, "grad_norm": 2.9978339672088623, "learning_rate": 1.6740551014569085e-05, "loss": 0.6008, "step": 13963 }, { "epoch": 2.2794987959675117, "grad_norm": 3.1477530002593994, "learning_rate": 1.6740077401732517e-05, "loss": 0.6016, "step": 13964 }, { "epoch": 2.279662054610016, "grad_norm": 3.1034364700317383, "learning_rate": 1.6739603761190034e-05, "loss": 0.5635, "step": 13965 }, { "epoch": 2.27982531325252, "grad_norm": 2.7020456790924072, "learning_rate": 1.673913009294358e-05, "loss": 0.5148, "step": 13966 }, { "epoch": 2.2799885718950246, "grad_norm": 2.642817497253418, "learning_rate": 1.6738656396995116e-05, "loss": 0.571, "step": 13967 }, { "epoch": 2.280151830537529, "grad_norm": 3.129772424697876, "learning_rate": 1.6738182673346574e-05, "loss": 0.6061, "step": 13968 }, { "epoch": 2.2803150891800334, "grad_norm": 3.937397003173828, "learning_rate": 1.6737708921999906e-05, "loss": 0.6889, "step": 13969 }, { "epoch": 2.280478347822538, "grad_norm": 2.974916934967041, "learning_rate": 1.6737235142957066e-05, "loss": 0.6394, "step": 13970 }, { "epoch": 2.2806416064650423, "grad_norm": 2.9204750061035156, "learning_rate": 1.673676133621999e-05, "loss": 0.5971, "step": 13971 }, { "epoch": 2.2808048651075468, "grad_norm": 3.4863483905792236, "learning_rate": 1.6736287501790638e-05, "loss": 0.631, "step": 13972 }, { "epoch": 2.280968123750051, "grad_norm": 2.888223171234131, "learning_rate": 1.6735813639670946e-05, "loss": 0.655, "step": 13973 }, { "epoch": 2.281131382392555, "grad_norm": 3.02412486076355, "learning_rate": 1.6735339749862868e-05, "loss": 0.6478, "step": 13974 }, { "epoch": 2.2812946410350596, "grad_norm": 2.8101909160614014, "learning_rate": 1.6734865832368356e-05, "loss": 0.5926, "step": 13975 }, { "epoch": 2.281457899677564, "grad_norm": 2.9929819107055664, "learning_rate": 1.6734391887189352e-05, "loss": 0.6032, "step": 13976 }, { "epoch": 2.2816211583200685, "grad_norm": 2.7448952198028564, "learning_rate": 1.6733917914327803e-05, "loss": 0.5436, "step": 13977 }, { "epoch": 2.281784416962573, "grad_norm": 2.6984004974365234, "learning_rate": 1.6733443913785665e-05, "loss": 0.5568, "step": 13978 }, { "epoch": 2.2819476756050774, "grad_norm": 2.749812364578247, "learning_rate": 1.6732969885564878e-05, "loss": 0.567, "step": 13979 }, { "epoch": 2.282110934247582, "grad_norm": 2.7883682250976562, "learning_rate": 1.6732495829667395e-05, "loss": 0.5632, "step": 13980 }, { "epoch": 2.2822741928900863, "grad_norm": 2.91715145111084, "learning_rate": 1.6732021746095164e-05, "loss": 0.6065, "step": 13981 }, { "epoch": 2.2824374515325907, "grad_norm": 2.8538384437561035, "learning_rate": 1.6731547634850133e-05, "loss": 0.5456, "step": 13982 }, { "epoch": 2.282600710175095, "grad_norm": 2.78308367729187, "learning_rate": 1.6731073495934253e-05, "loss": 0.5625, "step": 13983 }, { "epoch": 2.282763968817599, "grad_norm": 2.9847118854522705, "learning_rate": 1.6730599329349472e-05, "loss": 0.655, "step": 13984 }, { "epoch": 2.2829272274601036, "grad_norm": 2.81990647315979, "learning_rate": 1.6730125135097736e-05, "loss": 0.5665, "step": 13985 }, { "epoch": 2.283090486102608, "grad_norm": 3.861663341522217, "learning_rate": 1.6729650913180996e-05, "loss": 0.7691, "step": 13986 }, { "epoch": 2.2832537447451124, "grad_norm": 2.9856631755828857, "learning_rate": 1.6729176663601207e-05, "loss": 0.6899, "step": 13987 }, { "epoch": 2.283417003387617, "grad_norm": 2.498131513595581, "learning_rate": 1.6728702386360306e-05, "loss": 0.5704, "step": 13988 }, { "epoch": 2.2835802620301213, "grad_norm": 3.1200172901153564, "learning_rate": 1.672822808146026e-05, "loss": 0.5775, "step": 13989 }, { "epoch": 2.2837435206726258, "grad_norm": 3.3349575996398926, "learning_rate": 1.6727753748903e-05, "loss": 0.6765, "step": 13990 }, { "epoch": 2.2839067793151298, "grad_norm": 2.742729425430298, "learning_rate": 1.6727279388690488e-05, "loss": 0.5947, "step": 13991 }, { "epoch": 2.284070037957634, "grad_norm": 2.7056055068969727, "learning_rate": 1.6726805000824672e-05, "loss": 0.5891, "step": 13992 }, { "epoch": 2.2842332966001386, "grad_norm": 2.7287437915802, "learning_rate": 1.67263305853075e-05, "loss": 0.5578, "step": 13993 }, { "epoch": 2.284396555242643, "grad_norm": 2.5952365398406982, "learning_rate": 1.6725856142140922e-05, "loss": 0.5221, "step": 13994 }, { "epoch": 2.2845598138851475, "grad_norm": 3.2093451023101807, "learning_rate": 1.672538167132689e-05, "loss": 0.5942, "step": 13995 }, { "epoch": 2.284723072527652, "grad_norm": 2.0337064266204834, "learning_rate": 1.672490717286735e-05, "loss": 0.4307, "step": 13996 }, { "epoch": 2.2848863311701564, "grad_norm": 2.691244602203369, "learning_rate": 1.672443264676426e-05, "loss": 0.4777, "step": 13997 }, { "epoch": 2.285049589812661, "grad_norm": 2.8350818157196045, "learning_rate": 1.6723958093019565e-05, "loss": 0.591, "step": 13998 }, { "epoch": 2.2852128484551653, "grad_norm": 3.1942954063415527, "learning_rate": 1.672348351163522e-05, "loss": 0.6561, "step": 13999 }, { "epoch": 2.2853761070976697, "grad_norm": 3.1547741889953613, "learning_rate": 1.672300890261317e-05, "loss": 0.6225, "step": 14000 }, { "epoch": 2.2855393657401737, "grad_norm": 3.2213525772094727, "learning_rate": 1.6722534265955366e-05, "loss": 0.7448, "step": 14001 }, { "epoch": 2.285702624382678, "grad_norm": 2.5476632118225098, "learning_rate": 1.672205960166377e-05, "loss": 0.5214, "step": 14002 }, { "epoch": 2.2858658830251826, "grad_norm": 2.9527907371520996, "learning_rate": 1.672158490974032e-05, "loss": 0.5698, "step": 14003 }, { "epoch": 2.286029141667687, "grad_norm": 2.9781439304351807, "learning_rate": 1.672111019018697e-05, "loss": 0.6084, "step": 14004 }, { "epoch": 2.2861924003101914, "grad_norm": 2.581273317337036, "learning_rate": 1.6720635443005678e-05, "loss": 0.6102, "step": 14005 }, { "epoch": 2.286355658952696, "grad_norm": 3.1134746074676514, "learning_rate": 1.6720160668198393e-05, "loss": 0.6135, "step": 14006 }, { "epoch": 2.2865189175952003, "grad_norm": 3.37333345413208, "learning_rate": 1.671968586576706e-05, "loss": 0.6311, "step": 14007 }, { "epoch": 2.2866821762377048, "grad_norm": 2.705549955368042, "learning_rate": 1.6719211035713637e-05, "loss": 0.4641, "step": 14008 }, { "epoch": 2.2868454348802087, "grad_norm": 3.393136978149414, "learning_rate": 1.671873617804008e-05, "loss": 0.6825, "step": 14009 }, { "epoch": 2.287008693522713, "grad_norm": 2.8962552547454834, "learning_rate": 1.6718261292748328e-05, "loss": 0.6884, "step": 14010 }, { "epoch": 2.2871719521652176, "grad_norm": 3.095752716064453, "learning_rate": 1.6717786379840344e-05, "loss": 0.6563, "step": 14011 }, { "epoch": 2.287335210807722, "grad_norm": 3.0471200942993164, "learning_rate": 1.6717311439318077e-05, "loss": 0.6333, "step": 14012 }, { "epoch": 2.2874984694502265, "grad_norm": 2.8850388526916504, "learning_rate": 1.6716836471183477e-05, "loss": 0.6445, "step": 14013 }, { "epoch": 2.287661728092731, "grad_norm": 2.947145938873291, "learning_rate": 1.67163614754385e-05, "loss": 0.6015, "step": 14014 }, { "epoch": 2.2878249867352354, "grad_norm": 2.66637921333313, "learning_rate": 1.6715886452085098e-05, "loss": 0.5099, "step": 14015 }, { "epoch": 2.28798824537774, "grad_norm": 3.1432220935821533, "learning_rate": 1.6715411401125225e-05, "loss": 0.4717, "step": 14016 }, { "epoch": 2.2881515040202443, "grad_norm": 2.654263734817505, "learning_rate": 1.6714936322560826e-05, "loss": 0.5586, "step": 14017 }, { "epoch": 2.2883147626627487, "grad_norm": 2.4908032417297363, "learning_rate": 1.6714461216393862e-05, "loss": 0.5502, "step": 14018 }, { "epoch": 2.2884780213052527, "grad_norm": 3.097913980484009, "learning_rate": 1.6713986082626283e-05, "loss": 0.6181, "step": 14019 }, { "epoch": 2.288641279947757, "grad_norm": 3.000149965286255, "learning_rate": 1.671351092126004e-05, "loss": 0.5539, "step": 14020 }, { "epoch": 2.2888045385902616, "grad_norm": 3.357780933380127, "learning_rate": 1.6713035732297093e-05, "loss": 0.6602, "step": 14021 }, { "epoch": 2.288967797232766, "grad_norm": 2.8908066749572754, "learning_rate": 1.6712560515739385e-05, "loss": 0.616, "step": 14022 }, { "epoch": 2.2891310558752704, "grad_norm": 2.535271644592285, "learning_rate": 1.671208527158888e-05, "loss": 0.4567, "step": 14023 }, { "epoch": 2.289294314517775, "grad_norm": 3.042012929916382, "learning_rate": 1.6711609999847526e-05, "loss": 0.5665, "step": 14024 }, { "epoch": 2.2894575731602793, "grad_norm": 2.9247725009918213, "learning_rate": 1.671113470051728e-05, "loss": 0.5891, "step": 14025 }, { "epoch": 2.2896208318027838, "grad_norm": 2.790757894515991, "learning_rate": 1.671065937360009e-05, "loss": 0.5875, "step": 14026 }, { "epoch": 2.2897840904452877, "grad_norm": 2.4047815799713135, "learning_rate": 1.6710184019097914e-05, "loss": 0.5146, "step": 14027 }, { "epoch": 2.289947349087792, "grad_norm": 3.145233631134033, "learning_rate": 1.6709708637012706e-05, "loss": 0.639, "step": 14028 }, { "epoch": 2.2901106077302966, "grad_norm": 2.6767032146453857, "learning_rate": 1.6709233227346414e-05, "loss": 0.5041, "step": 14029 }, { "epoch": 2.290273866372801, "grad_norm": 3.3197999000549316, "learning_rate": 1.6708757790101004e-05, "loss": 0.7527, "step": 14030 }, { "epoch": 2.2904371250153055, "grad_norm": 2.6209819316864014, "learning_rate": 1.670828232527842e-05, "loss": 0.5038, "step": 14031 }, { "epoch": 2.29060038365781, "grad_norm": 3.10192608833313, "learning_rate": 1.6707806832880625e-05, "loss": 0.5662, "step": 14032 }, { "epoch": 2.2907636423003144, "grad_norm": 2.7537598609924316, "learning_rate": 1.6707331312909566e-05, "loss": 0.5259, "step": 14033 }, { "epoch": 2.290926900942819, "grad_norm": 2.478226900100708, "learning_rate": 1.6706855765367202e-05, "loss": 0.5214, "step": 14034 }, { "epoch": 2.2910901595853232, "grad_norm": 2.9045684337615967, "learning_rate": 1.6706380190255484e-05, "loss": 0.7133, "step": 14035 }, { "epoch": 2.2912534182278272, "grad_norm": 3.2337307929992676, "learning_rate": 1.670590458757637e-05, "loss": 0.6714, "step": 14036 }, { "epoch": 2.2914166768703317, "grad_norm": 2.84397029876709, "learning_rate": 1.6705428957331817e-05, "loss": 0.5613, "step": 14037 }, { "epoch": 2.291579935512836, "grad_norm": 3.448765516281128, "learning_rate": 1.6704953299523774e-05, "loss": 0.7253, "step": 14038 }, { "epoch": 2.2917431941553406, "grad_norm": 2.4074580669403076, "learning_rate": 1.6704477614154204e-05, "loss": 0.5662, "step": 14039 }, { "epoch": 2.291906452797845, "grad_norm": 3.4882233142852783, "learning_rate": 1.6704001901225052e-05, "loss": 0.6631, "step": 14040 }, { "epoch": 2.2920697114403494, "grad_norm": 2.7811505794525146, "learning_rate": 1.6703526160738286e-05, "loss": 0.584, "step": 14041 }, { "epoch": 2.292232970082854, "grad_norm": 2.7113893032073975, "learning_rate": 1.670305039269585e-05, "loss": 0.5188, "step": 14042 }, { "epoch": 2.2923962287253583, "grad_norm": 3.122438430786133, "learning_rate": 1.670257459709971e-05, "loss": 0.7659, "step": 14043 }, { "epoch": 2.2925594873678623, "grad_norm": 2.213672399520874, "learning_rate": 1.670209877395181e-05, "loss": 0.3901, "step": 14044 }, { "epoch": 2.2927227460103667, "grad_norm": 2.964289903640747, "learning_rate": 1.6701622923254118e-05, "loss": 0.6582, "step": 14045 }, { "epoch": 2.292886004652871, "grad_norm": 2.8795368671417236, "learning_rate": 1.6701147045008583e-05, "loss": 0.5392, "step": 14046 }, { "epoch": 2.2930492632953756, "grad_norm": 3.1277852058410645, "learning_rate": 1.670067113921716e-05, "loss": 0.5491, "step": 14047 }, { "epoch": 2.29321252193788, "grad_norm": 2.8996269702911377, "learning_rate": 1.6700195205881813e-05, "loss": 0.636, "step": 14048 }, { "epoch": 2.2933757805803845, "grad_norm": 3.0067429542541504, "learning_rate": 1.669971924500449e-05, "loss": 0.7611, "step": 14049 }, { "epoch": 2.293539039222889, "grad_norm": 2.4959802627563477, "learning_rate": 1.6699243256587156e-05, "loss": 0.5225, "step": 14050 }, { "epoch": 2.2937022978653934, "grad_norm": 2.5259501934051514, "learning_rate": 1.6698767240631757e-05, "loss": 0.5431, "step": 14051 }, { "epoch": 2.293865556507898, "grad_norm": 3.1841230392456055, "learning_rate": 1.6698291197140255e-05, "loss": 0.6685, "step": 14052 }, { "epoch": 2.2940288151504022, "grad_norm": 2.7356784343719482, "learning_rate": 1.669781512611461e-05, "loss": 0.5632, "step": 14053 }, { "epoch": 2.2941920737929062, "grad_norm": 3.0042262077331543, "learning_rate": 1.6697339027556774e-05, "loss": 0.6173, "step": 14054 }, { "epoch": 2.2943553324354107, "grad_norm": 2.5835018157958984, "learning_rate": 1.669686290146871e-05, "loss": 0.5493, "step": 14055 }, { "epoch": 2.294518591077915, "grad_norm": 2.6305699348449707, "learning_rate": 1.6696386747852365e-05, "loss": 0.4724, "step": 14056 }, { "epoch": 2.2946818497204196, "grad_norm": 2.8598012924194336, "learning_rate": 1.669591056670971e-05, "loss": 0.5376, "step": 14057 }, { "epoch": 2.294845108362924, "grad_norm": 3.143735647201538, "learning_rate": 1.669543435804269e-05, "loss": 0.5928, "step": 14058 }, { "epoch": 2.2950083670054284, "grad_norm": 2.780514717102051, "learning_rate": 1.6694958121853266e-05, "loss": 0.5353, "step": 14059 }, { "epoch": 2.295171625647933, "grad_norm": 2.894050121307373, "learning_rate": 1.6694481858143403e-05, "loss": 0.5316, "step": 14060 }, { "epoch": 2.2953348842904373, "grad_norm": 2.7016892433166504, "learning_rate": 1.669400556691505e-05, "loss": 0.5053, "step": 14061 }, { "epoch": 2.2954981429329413, "grad_norm": 2.9531748294830322, "learning_rate": 1.669352924817017e-05, "loss": 0.6883, "step": 14062 }, { "epoch": 2.2956614015754457, "grad_norm": 3.2367031574249268, "learning_rate": 1.6693052901910714e-05, "loss": 0.5272, "step": 14063 }, { "epoch": 2.29582466021795, "grad_norm": 2.9748520851135254, "learning_rate": 1.6692576528138644e-05, "loss": 0.6329, "step": 14064 }, { "epoch": 2.2959879188604546, "grad_norm": 2.6529648303985596, "learning_rate": 1.6692100126855922e-05, "loss": 0.4785, "step": 14065 }, { "epoch": 2.296151177502959, "grad_norm": 2.591996669769287, "learning_rate": 1.6691623698064505e-05, "loss": 0.5983, "step": 14066 }, { "epoch": 2.2963144361454635, "grad_norm": 2.683340072631836, "learning_rate": 1.6691147241766346e-05, "loss": 0.5512, "step": 14067 }, { "epoch": 2.296477694787968, "grad_norm": 3.0969064235687256, "learning_rate": 1.669067075796341e-05, "loss": 0.6368, "step": 14068 }, { "epoch": 2.2966409534304724, "grad_norm": 3.096855878829956, "learning_rate": 1.6690194246657653e-05, "loss": 0.6169, "step": 14069 }, { "epoch": 2.296804212072977, "grad_norm": 3.050569534301758, "learning_rate": 1.6689717707851035e-05, "loss": 0.6914, "step": 14070 }, { "epoch": 2.2969674707154812, "grad_norm": 2.9525649547576904, "learning_rate": 1.668924114154551e-05, "loss": 0.6167, "step": 14071 }, { "epoch": 2.2971307293579852, "grad_norm": 3.16290020942688, "learning_rate": 1.668876454774304e-05, "loss": 0.6341, "step": 14072 }, { "epoch": 2.2972939880004897, "grad_norm": 2.7691845893859863, "learning_rate": 1.6688287926445585e-05, "loss": 0.5321, "step": 14073 }, { "epoch": 2.297457246642994, "grad_norm": 2.7844271659851074, "learning_rate": 1.6687811277655106e-05, "loss": 0.5941, "step": 14074 }, { "epoch": 2.2976205052854985, "grad_norm": 2.532633066177368, "learning_rate": 1.668733460137356e-05, "loss": 0.4971, "step": 14075 }, { "epoch": 2.297783763928003, "grad_norm": 3.02264142036438, "learning_rate": 1.6686857897602905e-05, "loss": 0.6509, "step": 14076 }, { "epoch": 2.2979470225705074, "grad_norm": 3.171578884124756, "learning_rate": 1.6686381166345102e-05, "loss": 0.6642, "step": 14077 }, { "epoch": 2.298110281213012, "grad_norm": 3.1295323371887207, "learning_rate": 1.668590440760211e-05, "loss": 0.6382, "step": 14078 }, { "epoch": 2.298273539855516, "grad_norm": 2.74448299407959, "learning_rate": 1.668542762137589e-05, "loss": 0.5922, "step": 14079 }, { "epoch": 2.2984367984980203, "grad_norm": 2.805341958999634, "learning_rate": 1.6684950807668406e-05, "loss": 0.5405, "step": 14080 }, { "epoch": 2.2986000571405247, "grad_norm": 2.3303394317626953, "learning_rate": 1.668447396648161e-05, "loss": 0.51, "step": 14081 }, { "epoch": 2.298763315783029, "grad_norm": 3.0643198490142822, "learning_rate": 1.6683997097817465e-05, "loss": 0.5609, "step": 14082 }, { "epoch": 2.2989265744255336, "grad_norm": 3.058131694793701, "learning_rate": 1.6683520201677933e-05, "loss": 0.5751, "step": 14083 }, { "epoch": 2.299089833068038, "grad_norm": 3.014780044555664, "learning_rate": 1.6683043278064972e-05, "loss": 0.6872, "step": 14084 }, { "epoch": 2.2992530917105425, "grad_norm": 2.6011545658111572, "learning_rate": 1.6682566326980544e-05, "loss": 0.5522, "step": 14085 }, { "epoch": 2.299416350353047, "grad_norm": 3.153822422027588, "learning_rate": 1.6682089348426607e-05, "loss": 0.6045, "step": 14086 }, { "epoch": 2.2995796089955514, "grad_norm": 2.9593234062194824, "learning_rate": 1.6681612342405126e-05, "loss": 0.5413, "step": 14087 }, { "epoch": 2.299742867638056, "grad_norm": 3.000610589981079, "learning_rate": 1.668113530891806e-05, "loss": 0.5896, "step": 14088 }, { "epoch": 2.29990612628056, "grad_norm": 3.0309627056121826, "learning_rate": 1.6680658247967367e-05, "loss": 0.5594, "step": 14089 }, { "epoch": 2.3000693849230642, "grad_norm": 3.1666951179504395, "learning_rate": 1.6680181159555015e-05, "loss": 0.4778, "step": 14090 }, { "epoch": 2.3002326435655687, "grad_norm": 2.5627830028533936, "learning_rate": 1.667970404368296e-05, "loss": 0.5733, "step": 14091 }, { "epoch": 2.300395902208073, "grad_norm": 3.0807487964630127, "learning_rate": 1.6679226900353162e-05, "loss": 0.6969, "step": 14092 }, { "epoch": 2.3005591608505775, "grad_norm": 2.475252866744995, "learning_rate": 1.667874972956758e-05, "loss": 0.5594, "step": 14093 }, { "epoch": 2.300722419493082, "grad_norm": 2.7923245429992676, "learning_rate": 1.6678272531328185e-05, "loss": 0.4762, "step": 14094 }, { "epoch": 2.3008856781355864, "grad_norm": 2.898573160171509, "learning_rate": 1.6677795305636932e-05, "loss": 0.6434, "step": 14095 }, { "epoch": 2.301048936778091, "grad_norm": 3.632561683654785, "learning_rate": 1.6677318052495785e-05, "loss": 0.661, "step": 14096 }, { "epoch": 2.301212195420595, "grad_norm": 2.9627113342285156, "learning_rate": 1.6676840771906704e-05, "loss": 0.6352, "step": 14097 }, { "epoch": 2.3013754540630993, "grad_norm": 2.710036039352417, "learning_rate": 1.667636346387165e-05, "loss": 0.5586, "step": 14098 }, { "epoch": 2.3015387127056037, "grad_norm": 3.1633858680725098, "learning_rate": 1.6675886128392593e-05, "loss": 0.5094, "step": 14099 }, { "epoch": 2.301701971348108, "grad_norm": 3.1795709133148193, "learning_rate": 1.6675408765471482e-05, "loss": 0.6739, "step": 14100 }, { "epoch": 2.3018652299906126, "grad_norm": 3.1901822090148926, "learning_rate": 1.667493137511029e-05, "loss": 0.5928, "step": 14101 }, { "epoch": 2.302028488633117, "grad_norm": 2.734042167663574, "learning_rate": 1.667445395731097e-05, "loss": 0.5623, "step": 14102 }, { "epoch": 2.3021917472756215, "grad_norm": 2.830261707305908, "learning_rate": 1.6673976512075497e-05, "loss": 0.5167, "step": 14103 }, { "epoch": 2.302355005918126, "grad_norm": 3.3328943252563477, "learning_rate": 1.6673499039405822e-05, "loss": 0.5988, "step": 14104 }, { "epoch": 2.3025182645606304, "grad_norm": 3.188394546508789, "learning_rate": 1.6673021539303914e-05, "loss": 0.6307, "step": 14105 }, { "epoch": 2.302681523203135, "grad_norm": 2.790381908416748, "learning_rate": 1.6672544011771736e-05, "loss": 0.4658, "step": 14106 }, { "epoch": 2.302844781845639, "grad_norm": 3.5159802436828613, "learning_rate": 1.6672066456811244e-05, "loss": 0.611, "step": 14107 }, { "epoch": 2.3030080404881432, "grad_norm": 2.9428279399871826, "learning_rate": 1.6671588874424412e-05, "loss": 0.5725, "step": 14108 }, { "epoch": 2.3031712991306477, "grad_norm": 2.8668479919433594, "learning_rate": 1.6671111264613195e-05, "loss": 0.6442, "step": 14109 }, { "epoch": 2.303334557773152, "grad_norm": 2.8610713481903076, "learning_rate": 1.6670633627379557e-05, "loss": 0.5178, "step": 14110 }, { "epoch": 2.3034978164156565, "grad_norm": 3.059152364730835, "learning_rate": 1.6670155962725463e-05, "loss": 0.6487, "step": 14111 }, { "epoch": 2.303661075058161, "grad_norm": 3.2927136421203613, "learning_rate": 1.6669678270652875e-05, "loss": 0.6443, "step": 14112 }, { "epoch": 2.3038243337006654, "grad_norm": 3.525233030319214, "learning_rate": 1.6669200551163762e-05, "loss": 0.6755, "step": 14113 }, { "epoch": 2.30398759234317, "grad_norm": 2.310297966003418, "learning_rate": 1.6668722804260086e-05, "loss": 0.4712, "step": 14114 }, { "epoch": 2.304150850985674, "grad_norm": 2.89436936378479, "learning_rate": 1.66682450299438e-05, "loss": 0.5684, "step": 14115 }, { "epoch": 2.3043141096281783, "grad_norm": 3.286752223968506, "learning_rate": 1.6667767228216886e-05, "loss": 0.5947, "step": 14116 }, { "epoch": 2.3044773682706827, "grad_norm": 2.919832468032837, "learning_rate": 1.6667289399081293e-05, "loss": 0.4916, "step": 14117 }, { "epoch": 2.304640626913187, "grad_norm": 2.848874807357788, "learning_rate": 1.6666811542538993e-05, "loss": 0.5744, "step": 14118 }, { "epoch": 2.3048038855556916, "grad_norm": 2.6178789138793945, "learning_rate": 1.6666333658591945e-05, "loss": 0.5877, "step": 14119 }, { "epoch": 2.304967144198196, "grad_norm": 3.0225679874420166, "learning_rate": 1.6665855747242118e-05, "loss": 0.6551, "step": 14120 }, { "epoch": 2.3051304028407005, "grad_norm": 3.01564359664917, "learning_rate": 1.6665377808491476e-05, "loss": 0.6411, "step": 14121 }, { "epoch": 2.305293661483205, "grad_norm": 3.083561658859253, "learning_rate": 1.666489984234198e-05, "loss": 0.6183, "step": 14122 }, { "epoch": 2.3054569201257094, "grad_norm": 2.7160143852233887, "learning_rate": 1.6664421848795602e-05, "loss": 0.5375, "step": 14123 }, { "epoch": 2.305620178768214, "grad_norm": 3.396152973175049, "learning_rate": 1.66639438278543e-05, "loss": 0.6693, "step": 14124 }, { "epoch": 2.305783437410718, "grad_norm": 2.656625986099243, "learning_rate": 1.6663465779520042e-05, "loss": 0.5505, "step": 14125 }, { "epoch": 2.3059466960532222, "grad_norm": 2.9575116634368896, "learning_rate": 1.666298770379479e-05, "loss": 0.5582, "step": 14126 }, { "epoch": 2.3061099546957267, "grad_norm": 2.4642457962036133, "learning_rate": 1.6662509600680512e-05, "loss": 0.5127, "step": 14127 }, { "epoch": 2.306273213338231, "grad_norm": 3.3564863204956055, "learning_rate": 1.6662031470179175e-05, "loss": 0.5469, "step": 14128 }, { "epoch": 2.3064364719807355, "grad_norm": 3.5177457332611084, "learning_rate": 1.666155331229274e-05, "loss": 0.7053, "step": 14129 }, { "epoch": 2.30659973062324, "grad_norm": 2.93467116355896, "learning_rate": 1.6661075127023175e-05, "loss": 0.5612, "step": 14130 }, { "epoch": 2.3067629892657444, "grad_norm": 2.4512314796447754, "learning_rate": 1.6660596914372446e-05, "loss": 0.5233, "step": 14131 }, { "epoch": 2.3069262479082484, "grad_norm": 2.6173059940338135, "learning_rate": 1.666011867434252e-05, "loss": 0.5699, "step": 14132 }, { "epoch": 2.307089506550753, "grad_norm": 2.5210111141204834, "learning_rate": 1.6659640406935355e-05, "loss": 0.5278, "step": 14133 }, { "epoch": 2.3072527651932573, "grad_norm": 2.6022751331329346, "learning_rate": 1.665916211215293e-05, "loss": 0.5002, "step": 14134 }, { "epoch": 2.3074160238357617, "grad_norm": 2.67954421043396, "learning_rate": 1.6658683789997198e-05, "loss": 0.5379, "step": 14135 }, { "epoch": 2.307579282478266, "grad_norm": 2.9057776927948, "learning_rate": 1.6658205440470136e-05, "loss": 0.5734, "step": 14136 }, { "epoch": 2.3077425411207706, "grad_norm": 3.143083095550537, "learning_rate": 1.66577270635737e-05, "loss": 0.6511, "step": 14137 }, { "epoch": 2.307905799763275, "grad_norm": 2.705627918243408, "learning_rate": 1.665724865930987e-05, "loss": 0.5838, "step": 14138 }, { "epoch": 2.3080690584057795, "grad_norm": 2.705402135848999, "learning_rate": 1.6656770227680597e-05, "loss": 0.6153, "step": 14139 }, { "epoch": 2.308232317048284, "grad_norm": 2.731825113296509, "learning_rate": 1.665629176868786e-05, "loss": 0.5173, "step": 14140 }, { "epoch": 2.3083955756907883, "grad_norm": 2.786407947540283, "learning_rate": 1.6655813282333618e-05, "loss": 0.5881, "step": 14141 }, { "epoch": 2.3085588343332923, "grad_norm": 2.771726131439209, "learning_rate": 1.665533476861984e-05, "loss": 0.5898, "step": 14142 }, { "epoch": 2.308722092975797, "grad_norm": 3.239865303039551, "learning_rate": 1.6654856227548498e-05, "loss": 0.6612, "step": 14143 }, { "epoch": 2.308885351618301, "grad_norm": 2.6467642784118652, "learning_rate": 1.665437765912155e-05, "loss": 0.531, "step": 14144 }, { "epoch": 2.3090486102608057, "grad_norm": 2.7010231018066406, "learning_rate": 1.6653899063340972e-05, "loss": 0.4887, "step": 14145 }, { "epoch": 2.30921186890331, "grad_norm": 3.4710919857025146, "learning_rate": 1.6653420440208725e-05, "loss": 1.0856, "step": 14146 }, { "epoch": 2.3093751275458145, "grad_norm": 2.466738224029541, "learning_rate": 1.665294178972678e-05, "loss": 0.4905, "step": 14147 }, { "epoch": 2.309538386188319, "grad_norm": 2.719560384750366, "learning_rate": 1.6652463111897104e-05, "loss": 0.5248, "step": 14148 }, { "epoch": 2.3097016448308234, "grad_norm": 3.319824695587158, "learning_rate": 1.6651984406721662e-05, "loss": 0.6451, "step": 14149 }, { "epoch": 2.3098649034733274, "grad_norm": 2.571425437927246, "learning_rate": 1.6651505674202425e-05, "loss": 0.5554, "step": 14150 }, { "epoch": 2.310028162115832, "grad_norm": 2.670774459838867, "learning_rate": 1.6651026914341358e-05, "loss": 0.6043, "step": 14151 }, { "epoch": 2.3101914207583363, "grad_norm": 3.0216445922851562, "learning_rate": 1.6650548127140432e-05, "loss": 0.6246, "step": 14152 }, { "epoch": 2.3103546794008407, "grad_norm": 2.991173028945923, "learning_rate": 1.6650069312601616e-05, "loss": 0.5962, "step": 14153 }, { "epoch": 2.310517938043345, "grad_norm": 2.2324845790863037, "learning_rate": 1.6649590470726875e-05, "loss": 0.4493, "step": 14154 }, { "epoch": 2.3106811966858496, "grad_norm": 2.634402275085449, "learning_rate": 1.6649111601518177e-05, "loss": 0.5203, "step": 14155 }, { "epoch": 2.310844455328354, "grad_norm": 2.473578453063965, "learning_rate": 1.664863270497749e-05, "loss": 0.491, "step": 14156 }, { "epoch": 2.3110077139708585, "grad_norm": 3.0972023010253906, "learning_rate": 1.664815378110679e-05, "loss": 0.6231, "step": 14157 }, { "epoch": 2.311170972613363, "grad_norm": 3.2892861366271973, "learning_rate": 1.6647674829908037e-05, "loss": 0.703, "step": 14158 }, { "epoch": 2.3113342312558673, "grad_norm": 3.4488437175750732, "learning_rate": 1.6647195851383205e-05, "loss": 0.6376, "step": 14159 }, { "epoch": 2.3114974898983713, "grad_norm": 3.0810320377349854, "learning_rate": 1.664671684553426e-05, "loss": 0.5463, "step": 14160 }, { "epoch": 2.3116607485408758, "grad_norm": 3.221712350845337, "learning_rate": 1.6646237812363168e-05, "loss": 0.5373, "step": 14161 }, { "epoch": 2.31182400718338, "grad_norm": 2.907822847366333, "learning_rate": 1.6645758751871907e-05, "loss": 0.6855, "step": 14162 }, { "epoch": 2.3119872658258847, "grad_norm": 2.5233819484710693, "learning_rate": 1.6645279664062437e-05, "loss": 0.4778, "step": 14163 }, { "epoch": 2.312150524468389, "grad_norm": 2.9635815620422363, "learning_rate": 1.6644800548936734e-05, "loss": 0.5841, "step": 14164 }, { "epoch": 2.3123137831108935, "grad_norm": 2.8250346183776855, "learning_rate": 1.6644321406496763e-05, "loss": 0.5901, "step": 14165 }, { "epoch": 2.312477041753398, "grad_norm": 3.1702966690063477, "learning_rate": 1.66438422367445e-05, "loss": 0.6908, "step": 14166 }, { "epoch": 2.312640300395902, "grad_norm": 3.042529344558716, "learning_rate": 1.6643363039681904e-05, "loss": 0.674, "step": 14167 }, { "epoch": 2.3128035590384064, "grad_norm": 3.2014031410217285, "learning_rate": 1.6642883815310957e-05, "loss": 0.5768, "step": 14168 }, { "epoch": 2.312966817680911, "grad_norm": 3.486330270767212, "learning_rate": 1.664240456363362e-05, "loss": 0.6541, "step": 14169 }, { "epoch": 2.3131300763234153, "grad_norm": 2.837956428527832, "learning_rate": 1.6641925284651862e-05, "loss": 0.5801, "step": 14170 }, { "epoch": 2.3132933349659197, "grad_norm": 2.9184181690216064, "learning_rate": 1.664144597836766e-05, "loss": 0.5211, "step": 14171 }, { "epoch": 2.313456593608424, "grad_norm": 2.4068386554718018, "learning_rate": 1.6640966644782986e-05, "loss": 0.5141, "step": 14172 }, { "epoch": 2.3136198522509286, "grad_norm": 2.9016568660736084, "learning_rate": 1.66404872838998e-05, "loss": 0.6214, "step": 14173 }, { "epoch": 2.313783110893433, "grad_norm": 2.5508930683135986, "learning_rate": 1.664000789572008e-05, "loss": 0.4841, "step": 14174 }, { "epoch": 2.3139463695359375, "grad_norm": 2.886847972869873, "learning_rate": 1.6639528480245796e-05, "loss": 0.6484, "step": 14175 }, { "epoch": 2.314109628178442, "grad_norm": 2.270620107650757, "learning_rate": 1.663904903747892e-05, "loss": 0.4594, "step": 14176 }, { "epoch": 2.314272886820946, "grad_norm": 2.5498015880584717, "learning_rate": 1.6638569567421417e-05, "loss": 0.5542, "step": 14177 }, { "epoch": 2.3144361454634503, "grad_norm": 2.815855026245117, "learning_rate": 1.6638090070075258e-05, "loss": 0.4926, "step": 14178 }, { "epoch": 2.3145994041059548, "grad_norm": 3.385019302368164, "learning_rate": 1.6637610545442423e-05, "loss": 0.5522, "step": 14179 }, { "epoch": 2.314762662748459, "grad_norm": 2.531329870223999, "learning_rate": 1.6637130993524872e-05, "loss": 0.5336, "step": 14180 }, { "epoch": 2.3149259213909636, "grad_norm": 2.5362648963928223, "learning_rate": 1.6636651414324586e-05, "loss": 0.5445, "step": 14181 }, { "epoch": 2.315089180033468, "grad_norm": 3.284116744995117, "learning_rate": 1.663617180784353e-05, "loss": 0.741, "step": 14182 }, { "epoch": 2.3152524386759725, "grad_norm": 3.1027512550354004, "learning_rate": 1.6635692174083677e-05, "loss": 0.7067, "step": 14183 }, { "epoch": 2.315415697318477, "grad_norm": 3.154897928237915, "learning_rate": 1.6635212513047e-05, "loss": 0.6209, "step": 14184 }, { "epoch": 2.315578955960981, "grad_norm": 2.8327736854553223, "learning_rate": 1.6634732824735472e-05, "loss": 0.6328, "step": 14185 }, { "epoch": 2.3157422146034854, "grad_norm": 2.867949962615967, "learning_rate": 1.663425310915106e-05, "loss": 0.6053, "step": 14186 }, { "epoch": 2.31590547324599, "grad_norm": 2.8445873260498047, "learning_rate": 1.6633773366295738e-05, "loss": 0.5498, "step": 14187 }, { "epoch": 2.3160687318884943, "grad_norm": 2.729041337966919, "learning_rate": 1.6633293596171478e-05, "loss": 0.5211, "step": 14188 }, { "epoch": 2.3162319905309987, "grad_norm": 2.637040376663208, "learning_rate": 1.6632813798780255e-05, "loss": 0.5153, "step": 14189 }, { "epoch": 2.316395249173503, "grad_norm": 2.665609121322632, "learning_rate": 1.663233397412404e-05, "loss": 0.5785, "step": 14190 }, { "epoch": 2.3165585078160076, "grad_norm": 2.8566017150878906, "learning_rate": 1.66318541222048e-05, "loss": 0.545, "step": 14191 }, { "epoch": 2.316721766458512, "grad_norm": 2.8388936519622803, "learning_rate": 1.6631374243024517e-05, "loss": 0.5331, "step": 14192 }, { "epoch": 2.3168850251010165, "grad_norm": 3.590564489364624, "learning_rate": 1.6630894336585154e-05, "loss": 0.6322, "step": 14193 }, { "epoch": 2.317048283743521, "grad_norm": 2.740277051925659, "learning_rate": 1.6630414402888688e-05, "loss": 0.5462, "step": 14194 }, { "epoch": 2.317211542386025, "grad_norm": 2.916332483291626, "learning_rate": 1.6629934441937097e-05, "loss": 0.5618, "step": 14195 }, { "epoch": 2.3173748010285293, "grad_norm": 2.8957619667053223, "learning_rate": 1.6629454453732346e-05, "loss": 0.5648, "step": 14196 }, { "epoch": 2.3175380596710338, "grad_norm": 3.128734588623047, "learning_rate": 1.662897443827641e-05, "loss": 0.577, "step": 14197 }, { "epoch": 2.317701318313538, "grad_norm": 2.6785130500793457, "learning_rate": 1.6628494395571263e-05, "loss": 0.5238, "step": 14198 }, { "epoch": 2.3178645769560426, "grad_norm": 2.8416006565093994, "learning_rate": 1.6628014325618878e-05, "loss": 0.5796, "step": 14199 }, { "epoch": 2.318027835598547, "grad_norm": 3.5922560691833496, "learning_rate": 1.662753422842123e-05, "loss": 0.7047, "step": 14200 }, { "epoch": 2.3181910942410515, "grad_norm": 2.6608035564422607, "learning_rate": 1.6627054103980294e-05, "loss": 0.5092, "step": 14201 }, { "epoch": 2.318354352883556, "grad_norm": 2.8535661697387695, "learning_rate": 1.662657395229804e-05, "loss": 0.5653, "step": 14202 }, { "epoch": 2.31851761152606, "grad_norm": 3.1260924339294434, "learning_rate": 1.6626093773376437e-05, "loss": 0.7338, "step": 14203 }, { "epoch": 2.3186808701685644, "grad_norm": 3.0292930603027344, "learning_rate": 1.662561356721747e-05, "loss": 0.6358, "step": 14204 }, { "epoch": 2.318844128811069, "grad_norm": 3.3444252014160156, "learning_rate": 1.6625133333823106e-05, "loss": 0.6202, "step": 14205 }, { "epoch": 2.3190073874535733, "grad_norm": 3.221571445465088, "learning_rate": 1.6624653073195317e-05, "loss": 0.6734, "step": 14206 }, { "epoch": 2.3191706460960777, "grad_norm": 2.842344284057617, "learning_rate": 1.6624172785336086e-05, "loss": 0.6114, "step": 14207 }, { "epoch": 2.319333904738582, "grad_norm": 2.4454634189605713, "learning_rate": 1.6623692470247383e-05, "loss": 0.4435, "step": 14208 }, { "epoch": 2.3194971633810866, "grad_norm": 3.1903159618377686, "learning_rate": 1.6623212127931177e-05, "loss": 0.6033, "step": 14209 }, { "epoch": 2.319660422023591, "grad_norm": 2.966926336288452, "learning_rate": 1.662273175838945e-05, "loss": 0.5669, "step": 14210 }, { "epoch": 2.3198236806660955, "grad_norm": 2.7591934204101562, "learning_rate": 1.662225136162417e-05, "loss": 0.4815, "step": 14211 }, { "epoch": 2.3199869393086, "grad_norm": 2.635026216506958, "learning_rate": 1.6621770937637316e-05, "loss": 0.5093, "step": 14212 }, { "epoch": 2.320150197951104, "grad_norm": 2.5546631813049316, "learning_rate": 1.662129048643086e-05, "loss": 0.5579, "step": 14213 }, { "epoch": 2.3203134565936083, "grad_norm": 2.842465877532959, "learning_rate": 1.6620810008006785e-05, "loss": 0.5742, "step": 14214 }, { "epoch": 2.3204767152361128, "grad_norm": 2.8755805492401123, "learning_rate": 1.6620329502367055e-05, "loss": 0.5535, "step": 14215 }, { "epoch": 2.320639973878617, "grad_norm": 3.254488229751587, "learning_rate": 1.661984896951365e-05, "loss": 0.6435, "step": 14216 }, { "epoch": 2.3208032325211216, "grad_norm": 2.7793383598327637, "learning_rate": 1.6619368409448552e-05, "loss": 0.5308, "step": 14217 }, { "epoch": 2.320966491163626, "grad_norm": 3.459909200668335, "learning_rate": 1.6618887822173722e-05, "loss": 0.6281, "step": 14218 }, { "epoch": 2.3211297498061305, "grad_norm": 3.3408331871032715, "learning_rate": 1.6618407207691146e-05, "loss": 0.6618, "step": 14219 }, { "epoch": 2.3212930084486345, "grad_norm": 2.8517568111419678, "learning_rate": 1.6617926566002798e-05, "loss": 0.5774, "step": 14220 }, { "epoch": 2.321456267091139, "grad_norm": 2.931185245513916, "learning_rate": 1.6617445897110654e-05, "loss": 0.5523, "step": 14221 }, { "epoch": 2.3216195257336434, "grad_norm": 2.8720650672912598, "learning_rate": 1.6616965201016685e-05, "loss": 0.6383, "step": 14222 }, { "epoch": 2.321782784376148, "grad_norm": 2.3894155025482178, "learning_rate": 1.6616484477722874e-05, "loss": 0.4906, "step": 14223 }, { "epoch": 2.3219460430186523, "grad_norm": 3.2933645248413086, "learning_rate": 1.6616003727231192e-05, "loss": 0.6725, "step": 14224 }, { "epoch": 2.3221093016611567, "grad_norm": 3.104741096496582, "learning_rate": 1.661552294954362e-05, "loss": 0.5367, "step": 14225 }, { "epoch": 2.322272560303661, "grad_norm": 3.3857035636901855, "learning_rate": 1.6615042144662125e-05, "loss": 0.5992, "step": 14226 }, { "epoch": 2.3224358189461656, "grad_norm": 3.149108648300171, "learning_rate": 1.6614561312588695e-05, "loss": 0.6468, "step": 14227 }, { "epoch": 2.32259907758867, "grad_norm": 3.0622153282165527, "learning_rate": 1.6614080453325297e-05, "loss": 0.5902, "step": 14228 }, { "epoch": 2.3227623362311745, "grad_norm": 3.333143949508667, "learning_rate": 1.6613599566873914e-05, "loss": 0.673, "step": 14229 }, { "epoch": 2.3229255948736784, "grad_norm": 2.9215505123138428, "learning_rate": 1.661311865323652e-05, "loss": 0.7121, "step": 14230 }, { "epoch": 2.323088853516183, "grad_norm": 2.8927433490753174, "learning_rate": 1.6612637712415094e-05, "loss": 0.6085, "step": 14231 }, { "epoch": 2.3232521121586873, "grad_norm": 3.082702398300171, "learning_rate": 1.661215674441161e-05, "loss": 0.5913, "step": 14232 }, { "epoch": 2.3234153708011918, "grad_norm": 2.702749490737915, "learning_rate": 1.6611675749228046e-05, "loss": 0.5073, "step": 14233 }, { "epoch": 2.323578629443696, "grad_norm": 2.7019543647766113, "learning_rate": 1.6611194726866377e-05, "loss": 0.5203, "step": 14234 }, { "epoch": 2.3237418880862006, "grad_norm": 2.801506757736206, "learning_rate": 1.6610713677328584e-05, "loss": 0.4992, "step": 14235 }, { "epoch": 2.323905146728705, "grad_norm": 3.4774436950683594, "learning_rate": 1.6610232600616646e-05, "loss": 0.6567, "step": 14236 }, { "epoch": 2.3240684053712095, "grad_norm": 2.829674243927002, "learning_rate": 1.660975149673254e-05, "loss": 0.5068, "step": 14237 }, { "epoch": 2.3242316640137135, "grad_norm": 3.1101155281066895, "learning_rate": 1.6609270365678233e-05, "loss": 0.5482, "step": 14238 }, { "epoch": 2.324394922656218, "grad_norm": 3.273040294647217, "learning_rate": 1.660878920745572e-05, "loss": 0.588, "step": 14239 }, { "epoch": 2.3245581812987224, "grad_norm": 3.0074806213378906, "learning_rate": 1.660830802206696e-05, "loss": 0.6147, "step": 14240 }, { "epoch": 2.324721439941227, "grad_norm": 2.8643641471862793, "learning_rate": 1.6607826809513953e-05, "loss": 0.5819, "step": 14241 }, { "epoch": 2.3248846985837313, "grad_norm": 3.96301531791687, "learning_rate": 1.6607345569798656e-05, "loss": 0.6409, "step": 14242 }, { "epoch": 2.3250479572262357, "grad_norm": 3.0988426208496094, "learning_rate": 1.6606864302923058e-05, "loss": 0.6934, "step": 14243 }, { "epoch": 2.32521121586874, "grad_norm": 2.5563271045684814, "learning_rate": 1.6606383008889134e-05, "loss": 0.5788, "step": 14244 }, { "epoch": 2.3253744745112446, "grad_norm": 3.488797426223755, "learning_rate": 1.660590168769887e-05, "loss": 0.7371, "step": 14245 }, { "epoch": 2.325537733153749, "grad_norm": 3.231985092163086, "learning_rate": 1.6605420339354233e-05, "loss": 0.6188, "step": 14246 }, { "epoch": 2.3257009917962534, "grad_norm": 2.692551851272583, "learning_rate": 1.660493896385721e-05, "loss": 0.5737, "step": 14247 }, { "epoch": 2.3258642504387574, "grad_norm": 2.6593306064605713, "learning_rate": 1.6604457561209776e-05, "loss": 0.5593, "step": 14248 }, { "epoch": 2.326027509081262, "grad_norm": 3.042937755584717, "learning_rate": 1.6603976131413912e-05, "loss": 0.5277, "step": 14249 }, { "epoch": 2.3261907677237663, "grad_norm": 2.998298406600952, "learning_rate": 1.6603494674471595e-05, "loss": 0.5765, "step": 14250 }, { "epoch": 2.3263540263662708, "grad_norm": 2.523881435394287, "learning_rate": 1.6603013190384806e-05, "loss": 0.5901, "step": 14251 }, { "epoch": 2.326517285008775, "grad_norm": 2.8789749145507812, "learning_rate": 1.660253167915552e-05, "loss": 0.6122, "step": 14252 }, { "epoch": 2.3266805436512796, "grad_norm": 3.2767629623413086, "learning_rate": 1.660205014078572e-05, "loss": 0.6625, "step": 14253 }, { "epoch": 2.326843802293784, "grad_norm": 3.324138641357422, "learning_rate": 1.6601568575277388e-05, "loss": 0.6778, "step": 14254 }, { "epoch": 2.3270070609362885, "grad_norm": 2.5411603450775146, "learning_rate": 1.6601086982632498e-05, "loss": 0.5426, "step": 14255 }, { "epoch": 2.3271703195787925, "grad_norm": 3.429967164993286, "learning_rate": 1.660060536285303e-05, "loss": 0.635, "step": 14256 }, { "epoch": 2.327333578221297, "grad_norm": 2.9785361289978027, "learning_rate": 1.6600123715940972e-05, "loss": 0.5721, "step": 14257 }, { "epoch": 2.3274968368638014, "grad_norm": 2.88484787940979, "learning_rate": 1.659964204189829e-05, "loss": 0.5998, "step": 14258 }, { "epoch": 2.327660095506306, "grad_norm": 2.4240801334381104, "learning_rate": 1.6599160340726974e-05, "loss": 0.4914, "step": 14259 }, { "epoch": 2.3278233541488103, "grad_norm": 2.4147636890411377, "learning_rate": 1.6598678612429003e-05, "loss": 0.4971, "step": 14260 }, { "epoch": 2.3279866127913147, "grad_norm": 3.546583414077759, "learning_rate": 1.6598196857006356e-05, "loss": 0.6127, "step": 14261 }, { "epoch": 2.328149871433819, "grad_norm": 2.739485502243042, "learning_rate": 1.6597715074461013e-05, "loss": 0.5353, "step": 14262 }, { "epoch": 2.3283131300763236, "grad_norm": 2.4491126537323, "learning_rate": 1.6597233264794952e-05, "loss": 0.495, "step": 14263 }, { "epoch": 2.328476388718828, "grad_norm": 2.9765207767486572, "learning_rate": 1.659675142801016e-05, "loss": 0.6274, "step": 14264 }, { "epoch": 2.328639647361332, "grad_norm": 2.90356707572937, "learning_rate": 1.6596269564108612e-05, "loss": 0.5988, "step": 14265 }, { "epoch": 2.3288029060038364, "grad_norm": 3.3439576625823975, "learning_rate": 1.659578767309229e-05, "loss": 0.7824, "step": 14266 }, { "epoch": 2.328966164646341, "grad_norm": 3.1489202976226807, "learning_rate": 1.6595305754963177e-05, "loss": 0.6878, "step": 14267 }, { "epoch": 2.3291294232888453, "grad_norm": 2.8413262367248535, "learning_rate": 1.659482380972325e-05, "loss": 0.4999, "step": 14268 }, { "epoch": 2.3292926819313498, "grad_norm": 3.056419610977173, "learning_rate": 1.659434183737449e-05, "loss": 0.5844, "step": 14269 }, { "epoch": 2.329455940573854, "grad_norm": 3.6186909675598145, "learning_rate": 1.6593859837918888e-05, "loss": 0.6283, "step": 14270 }, { "epoch": 2.3296191992163586, "grad_norm": 3.0099055767059326, "learning_rate": 1.6593377811358413e-05, "loss": 0.6171, "step": 14271 }, { "epoch": 2.329782457858863, "grad_norm": 2.417884588241577, "learning_rate": 1.6592895757695052e-05, "loss": 0.4517, "step": 14272 }, { "epoch": 2.329945716501367, "grad_norm": 2.6830127239227295, "learning_rate": 1.6592413676930787e-05, "loss": 0.495, "step": 14273 }, { "epoch": 2.3301089751438715, "grad_norm": 2.7497053146362305, "learning_rate": 1.6591931569067597e-05, "loss": 0.5794, "step": 14274 }, { "epoch": 2.330272233786376, "grad_norm": 2.4506566524505615, "learning_rate": 1.6591449434107468e-05, "loss": 0.4946, "step": 14275 }, { "epoch": 2.3304354924288804, "grad_norm": 2.868039608001709, "learning_rate": 1.6590967272052377e-05, "loss": 0.5777, "step": 14276 }, { "epoch": 2.330598751071385, "grad_norm": 2.8062968254089355, "learning_rate": 1.659048508290431e-05, "loss": 0.5389, "step": 14277 }, { "epoch": 2.3307620097138892, "grad_norm": 2.7400453090667725, "learning_rate": 1.6590002866665247e-05, "loss": 0.6065, "step": 14278 }, { "epoch": 2.3309252683563937, "grad_norm": 2.543738603591919, "learning_rate": 1.6589520623337173e-05, "loss": 0.5791, "step": 14279 }, { "epoch": 2.331088526998898, "grad_norm": 2.9724535942077637, "learning_rate": 1.658903835292206e-05, "loss": 0.5337, "step": 14280 }, { "epoch": 2.3312517856414026, "grad_norm": 3.037998914718628, "learning_rate": 1.6588556055421907e-05, "loss": 0.5238, "step": 14281 }, { "epoch": 2.331415044283907, "grad_norm": 3.174002170562744, "learning_rate": 1.6588073730838683e-05, "loss": 0.6229, "step": 14282 }, { "epoch": 2.331578302926411, "grad_norm": 3.4292638301849365, "learning_rate": 1.6587591379174376e-05, "loss": 0.7265, "step": 14283 }, { "epoch": 2.3317415615689154, "grad_norm": 2.621281862258911, "learning_rate": 1.658710900043097e-05, "loss": 0.5255, "step": 14284 }, { "epoch": 2.33190482021142, "grad_norm": 2.7630250453948975, "learning_rate": 1.6586626594610447e-05, "loss": 0.5545, "step": 14285 }, { "epoch": 2.3320680788539243, "grad_norm": 2.88940167427063, "learning_rate": 1.658614416171479e-05, "loss": 0.6076, "step": 14286 }, { "epoch": 2.3322313374964287, "grad_norm": 2.8020122051239014, "learning_rate": 1.6585661701745973e-05, "loss": 0.5078, "step": 14287 }, { "epoch": 2.332394596138933, "grad_norm": 2.424948215484619, "learning_rate": 1.6585179214705994e-05, "loss": 0.4586, "step": 14288 }, { "epoch": 2.3325578547814376, "grad_norm": 3.1121809482574463, "learning_rate": 1.6584696700596827e-05, "loss": 0.6038, "step": 14289 }, { "epoch": 2.332721113423942, "grad_norm": 2.9039387702941895, "learning_rate": 1.6584214159420462e-05, "loss": 0.5472, "step": 14290 }, { "epoch": 2.332884372066446, "grad_norm": 2.3214306831359863, "learning_rate": 1.6583731591178876e-05, "loss": 0.4628, "step": 14291 }, { "epoch": 2.3330476307089505, "grad_norm": 2.86627459526062, "learning_rate": 1.6583248995874057e-05, "loss": 0.5421, "step": 14292 }, { "epoch": 2.333210889351455, "grad_norm": 2.5407817363739014, "learning_rate": 1.6582766373507987e-05, "loss": 0.4773, "step": 14293 }, { "epoch": 2.3333741479939594, "grad_norm": 2.7335169315338135, "learning_rate": 1.6582283724082648e-05, "loss": 0.5597, "step": 14294 }, { "epoch": 2.333537406636464, "grad_norm": 3.1518726348876953, "learning_rate": 1.6581801047600027e-05, "loss": 0.6892, "step": 14295 }, { "epoch": 2.3337006652789682, "grad_norm": 2.9642858505249023, "learning_rate": 1.658131834406211e-05, "loss": 0.5547, "step": 14296 }, { "epoch": 2.3338639239214727, "grad_norm": 2.7447032928466797, "learning_rate": 1.6580835613470878e-05, "loss": 0.526, "step": 14297 }, { "epoch": 2.334027182563977, "grad_norm": 3.0538582801818848, "learning_rate": 1.658035285582831e-05, "loss": 0.5977, "step": 14298 }, { "epoch": 2.3341904412064816, "grad_norm": 3.346606969833374, "learning_rate": 1.65798700711364e-05, "loss": 0.6444, "step": 14299 }, { "epoch": 2.334353699848986, "grad_norm": 2.765561580657959, "learning_rate": 1.657938725939713e-05, "loss": 0.5088, "step": 14300 }, { "epoch": 2.33451695849149, "grad_norm": 2.8428375720977783, "learning_rate": 1.6578904420612478e-05, "loss": 0.6021, "step": 14301 }, { "epoch": 2.3346802171339944, "grad_norm": 2.8205618858337402, "learning_rate": 1.657842155478444e-05, "loss": 0.5743, "step": 14302 }, { "epoch": 2.334843475776499, "grad_norm": 2.7865681648254395, "learning_rate": 1.657793866191499e-05, "loss": 0.5239, "step": 14303 }, { "epoch": 2.3350067344190033, "grad_norm": 3.023977756500244, "learning_rate": 1.6577455742006122e-05, "loss": 0.642, "step": 14304 }, { "epoch": 2.3351699930615077, "grad_norm": 2.9517436027526855, "learning_rate": 1.657697279505982e-05, "loss": 0.5575, "step": 14305 }, { "epoch": 2.335333251704012, "grad_norm": 2.548302412033081, "learning_rate": 1.6576489821078058e-05, "loss": 0.5374, "step": 14306 }, { "epoch": 2.3354965103465166, "grad_norm": 2.682955741882324, "learning_rate": 1.6576006820062834e-05, "loss": 0.5909, "step": 14307 }, { "epoch": 2.3356597689890206, "grad_norm": 2.6235134601593018, "learning_rate": 1.6575523792016128e-05, "loss": 0.5457, "step": 14308 }, { "epoch": 2.335823027631525, "grad_norm": 3.1795341968536377, "learning_rate": 1.6575040736939927e-05, "loss": 0.6605, "step": 14309 }, { "epoch": 2.3359862862740295, "grad_norm": 2.9658873081207275, "learning_rate": 1.6574557654836216e-05, "loss": 0.5494, "step": 14310 }, { "epoch": 2.336149544916534, "grad_norm": 2.4639034271240234, "learning_rate": 1.6574074545706983e-05, "loss": 0.5186, "step": 14311 }, { "epoch": 2.3363128035590384, "grad_norm": 2.466506242752075, "learning_rate": 1.6573591409554208e-05, "loss": 0.4739, "step": 14312 }, { "epoch": 2.336476062201543, "grad_norm": 2.75160551071167, "learning_rate": 1.6573108246379883e-05, "loss": 0.5453, "step": 14313 }, { "epoch": 2.3366393208440472, "grad_norm": 2.976304769515991, "learning_rate": 1.6572625056185992e-05, "loss": 0.6369, "step": 14314 }, { "epoch": 2.3368025794865517, "grad_norm": 3.1818418502807617, "learning_rate": 1.6572141838974522e-05, "loss": 0.674, "step": 14315 }, { "epoch": 2.336965838129056, "grad_norm": 2.322667121887207, "learning_rate": 1.6571658594747458e-05, "loss": 0.4178, "step": 14316 }, { "epoch": 2.3371290967715606, "grad_norm": 3.1749415397644043, "learning_rate": 1.657117532350679e-05, "loss": 0.6522, "step": 14317 }, { "epoch": 2.3372923554140645, "grad_norm": 2.6094372272491455, "learning_rate": 1.6570692025254493e-05, "loss": 0.5329, "step": 14318 }, { "epoch": 2.337455614056569, "grad_norm": 2.724041700363159, "learning_rate": 1.657020869999257e-05, "loss": 0.551, "step": 14319 }, { "epoch": 2.3376188726990734, "grad_norm": 2.8382818698883057, "learning_rate": 1.6569725347722996e-05, "loss": 0.5928, "step": 14320 }, { "epoch": 2.337782131341578, "grad_norm": 2.9991674423217773, "learning_rate": 1.6569241968447764e-05, "loss": 0.6568, "step": 14321 }, { "epoch": 2.3379453899840823, "grad_norm": 2.6263272762298584, "learning_rate": 1.6568758562168857e-05, "loss": 0.5375, "step": 14322 }, { "epoch": 2.3381086486265867, "grad_norm": 2.982919454574585, "learning_rate": 1.6568275128888264e-05, "loss": 0.6277, "step": 14323 }, { "epoch": 2.338271907269091, "grad_norm": 2.8162243366241455, "learning_rate": 1.6567791668607974e-05, "loss": 0.5589, "step": 14324 }, { "epoch": 2.3384351659115956, "grad_norm": 2.90927791595459, "learning_rate": 1.6567308181329972e-05, "loss": 0.6579, "step": 14325 }, { "epoch": 2.3385984245540996, "grad_norm": 2.527144432067871, "learning_rate": 1.656682466705624e-05, "loss": 0.5295, "step": 14326 }, { "epoch": 2.338761683196604, "grad_norm": 3.2615203857421875, "learning_rate": 1.656634112578878e-05, "loss": 0.6526, "step": 14327 }, { "epoch": 2.3389249418391085, "grad_norm": 2.880960702896118, "learning_rate": 1.6565857557529567e-05, "loss": 0.6381, "step": 14328 }, { "epoch": 2.339088200481613, "grad_norm": 2.9976212978363037, "learning_rate": 1.6565373962280594e-05, "loss": 0.6056, "step": 14329 }, { "epoch": 2.3392514591241174, "grad_norm": 2.9117038249969482, "learning_rate": 1.6564890340043844e-05, "loss": 0.6199, "step": 14330 }, { "epoch": 2.339414717766622, "grad_norm": 2.77673602104187, "learning_rate": 1.656440669082131e-05, "loss": 0.6013, "step": 14331 }, { "epoch": 2.3395779764091262, "grad_norm": 3.3232312202453613, "learning_rate": 1.6563923014614983e-05, "loss": 0.6536, "step": 14332 }, { "epoch": 2.3397412350516307, "grad_norm": 3.1164286136627197, "learning_rate": 1.6563439311426843e-05, "loss": 0.6989, "step": 14333 }, { "epoch": 2.339904493694135, "grad_norm": 3.223033905029297, "learning_rate": 1.6562955581258885e-05, "loss": 0.6291, "step": 14334 }, { "epoch": 2.3400677523366396, "grad_norm": 2.693474769592285, "learning_rate": 1.6562471824113093e-05, "loss": 0.5714, "step": 14335 }, { "epoch": 2.3402310109791435, "grad_norm": 2.905653476715088, "learning_rate": 1.656198803999146e-05, "loss": 0.5759, "step": 14336 }, { "epoch": 2.340394269621648, "grad_norm": 2.7277743816375732, "learning_rate": 1.6561504228895965e-05, "loss": 0.5745, "step": 14337 }, { "epoch": 2.3405575282641524, "grad_norm": 3.227177143096924, "learning_rate": 1.656102039082861e-05, "loss": 0.6635, "step": 14338 }, { "epoch": 2.340720786906657, "grad_norm": 3.5938711166381836, "learning_rate": 1.6560536525791376e-05, "loss": 0.7543, "step": 14339 }, { "epoch": 2.3408840455491613, "grad_norm": 2.958404302597046, "learning_rate": 1.6560052633786253e-05, "loss": 0.6641, "step": 14340 }, { "epoch": 2.3410473041916657, "grad_norm": 3.6482486724853516, "learning_rate": 1.655956871481523e-05, "loss": 0.7482, "step": 14341 }, { "epoch": 2.34121056283417, "grad_norm": 3.7064435482025146, "learning_rate": 1.65590847688803e-05, "loss": 0.5664, "step": 14342 }, { "epoch": 2.3413738214766746, "grad_norm": 2.7214741706848145, "learning_rate": 1.6558600795983445e-05, "loss": 0.505, "step": 14343 }, { "epoch": 2.3415370801191786, "grad_norm": 2.725787878036499, "learning_rate": 1.655811679612666e-05, "loss": 0.5755, "step": 14344 }, { "epoch": 2.341700338761683, "grad_norm": 3.2424874305725098, "learning_rate": 1.6557632769311935e-05, "loss": 0.7037, "step": 14345 }, { "epoch": 2.3418635974041875, "grad_norm": 3.2080533504486084, "learning_rate": 1.6557148715541256e-05, "loss": 0.726, "step": 14346 }, { "epoch": 2.342026856046692, "grad_norm": 2.770524263381958, "learning_rate": 1.6556664634816616e-05, "loss": 0.5637, "step": 14347 }, { "epoch": 2.3421901146891964, "grad_norm": 2.770881414413452, "learning_rate": 1.6556180527140002e-05, "loss": 0.5047, "step": 14348 }, { "epoch": 2.342353373331701, "grad_norm": 2.743042469024658, "learning_rate": 1.6555696392513408e-05, "loss": 0.7181, "step": 14349 }, { "epoch": 2.3425166319742052, "grad_norm": 2.627189874649048, "learning_rate": 1.655521223093882e-05, "loss": 0.5369, "step": 14350 }, { "epoch": 2.3426798906167097, "grad_norm": 3.070314884185791, "learning_rate": 1.6554728042418227e-05, "loss": 0.6036, "step": 14351 }, { "epoch": 2.342843149259214, "grad_norm": 2.9324889183044434, "learning_rate": 1.6554243826953624e-05, "loss": 0.5771, "step": 14352 }, { "epoch": 2.3430064079017185, "grad_norm": 2.7346863746643066, "learning_rate": 1.6553759584547e-05, "loss": 0.5131, "step": 14353 }, { "epoch": 2.3431696665442225, "grad_norm": 3.003481388092041, "learning_rate": 1.6553275315200344e-05, "loss": 0.5508, "step": 14354 }, { "epoch": 2.343332925186727, "grad_norm": 2.9447107315063477, "learning_rate": 1.655279101891565e-05, "loss": 0.5235, "step": 14355 }, { "epoch": 2.3434961838292314, "grad_norm": 2.740565061569214, "learning_rate": 1.6552306695694903e-05, "loss": 0.5207, "step": 14356 }, { "epoch": 2.343659442471736, "grad_norm": 3.018079996109009, "learning_rate": 1.65518223455401e-05, "loss": 0.5523, "step": 14357 }, { "epoch": 2.3438227011142403, "grad_norm": 2.644510269165039, "learning_rate": 1.655133796845323e-05, "loss": 0.5417, "step": 14358 }, { "epoch": 2.3439859597567447, "grad_norm": 2.4403631687164307, "learning_rate": 1.6550853564436284e-05, "loss": 0.4447, "step": 14359 }, { "epoch": 2.344149218399249, "grad_norm": 2.697782039642334, "learning_rate": 1.655036913349125e-05, "loss": 0.5608, "step": 14360 }, { "epoch": 2.344312477041753, "grad_norm": 2.786874771118164, "learning_rate": 1.6549884675620118e-05, "loss": 0.5786, "step": 14361 }, { "epoch": 2.3444757356842576, "grad_norm": 2.674928903579712, "learning_rate": 1.654940019082489e-05, "loss": 0.4597, "step": 14362 }, { "epoch": 2.344638994326762, "grad_norm": 3.21598219871521, "learning_rate": 1.6548915679107545e-05, "loss": 0.4872, "step": 14363 }, { "epoch": 2.3448022529692665, "grad_norm": 2.9490370750427246, "learning_rate": 1.6548431140470084e-05, "loss": 0.6236, "step": 14364 }, { "epoch": 2.344965511611771, "grad_norm": 2.726388931274414, "learning_rate": 1.6547946574914496e-05, "loss": 0.5094, "step": 14365 }, { "epoch": 2.3451287702542754, "grad_norm": 2.5999746322631836, "learning_rate": 1.6547461982442772e-05, "loss": 0.5288, "step": 14366 }, { "epoch": 2.34529202889678, "grad_norm": 3.1145715713500977, "learning_rate": 1.6546977363056905e-05, "loss": 0.6225, "step": 14367 }, { "epoch": 2.3454552875392842, "grad_norm": 3.1184494495391846, "learning_rate": 1.6546492716758883e-05, "loss": 0.5528, "step": 14368 }, { "epoch": 2.3456185461817887, "grad_norm": 2.6332895755767822, "learning_rate": 1.6546008043550703e-05, "loss": 0.4705, "step": 14369 }, { "epoch": 2.345781804824293, "grad_norm": 2.7447938919067383, "learning_rate": 1.6545523343434356e-05, "loss": 0.522, "step": 14370 }, { "epoch": 2.345945063466797, "grad_norm": 2.7819247245788574, "learning_rate": 1.654503861641183e-05, "loss": 0.5392, "step": 14371 }, { "epoch": 2.3461083221093015, "grad_norm": 3.2281460762023926, "learning_rate": 1.6544553862485128e-05, "loss": 0.5832, "step": 14372 }, { "epoch": 2.346271580751806, "grad_norm": 2.9354007244110107, "learning_rate": 1.654406908165623e-05, "loss": 0.5346, "step": 14373 }, { "epoch": 2.3464348393943104, "grad_norm": 2.9084911346435547, "learning_rate": 1.654358427392714e-05, "loss": 0.5, "step": 14374 }, { "epoch": 2.346598098036815, "grad_norm": 2.458022117614746, "learning_rate": 1.6543099439299847e-05, "loss": 0.5276, "step": 14375 }, { "epoch": 2.3467613566793193, "grad_norm": 3.0679266452789307, "learning_rate": 1.6542614577776337e-05, "loss": 0.5684, "step": 14376 }, { "epoch": 2.3469246153218237, "grad_norm": 2.6555161476135254, "learning_rate": 1.6542129689358613e-05, "loss": 0.5713, "step": 14377 }, { "epoch": 2.347087873964328, "grad_norm": 3.062265634536743, "learning_rate": 1.6541644774048663e-05, "loss": 0.6254, "step": 14378 }, { "epoch": 2.347251132606832, "grad_norm": 2.7858262062072754, "learning_rate": 1.654115983184848e-05, "loss": 0.5408, "step": 14379 }, { "epoch": 2.3474143912493366, "grad_norm": 2.7608675956726074, "learning_rate": 1.6540674862760063e-05, "loss": 0.5603, "step": 14380 }, { "epoch": 2.347577649891841, "grad_norm": 2.8119306564331055, "learning_rate": 1.6540189866785398e-05, "loss": 0.5554, "step": 14381 }, { "epoch": 2.3477409085343455, "grad_norm": 3.17116379737854, "learning_rate": 1.6539704843926482e-05, "loss": 0.7295, "step": 14382 }, { "epoch": 2.34790416717685, "grad_norm": 3.5252397060394287, "learning_rate": 1.653921979418531e-05, "loss": 0.6318, "step": 14383 }, { "epoch": 2.3480674258193543, "grad_norm": 2.5901317596435547, "learning_rate": 1.6538734717563874e-05, "loss": 0.4927, "step": 14384 }, { "epoch": 2.348230684461859, "grad_norm": 2.9843249320983887, "learning_rate": 1.6538249614064166e-05, "loss": 0.5862, "step": 14385 }, { "epoch": 2.3483939431043632, "grad_norm": 3.1744039058685303, "learning_rate": 1.6537764483688187e-05, "loss": 0.6126, "step": 14386 }, { "epoch": 2.3485572017468677, "grad_norm": 3.353271245956421, "learning_rate": 1.6537279326437923e-05, "loss": 0.6345, "step": 14387 }, { "epoch": 2.348720460389372, "grad_norm": 2.5629401206970215, "learning_rate": 1.6536794142315377e-05, "loss": 0.5481, "step": 14388 }, { "epoch": 2.348883719031876, "grad_norm": 3.2028796672821045, "learning_rate": 1.6536308931322532e-05, "loss": 0.6475, "step": 14389 }, { "epoch": 2.3490469776743805, "grad_norm": 2.811068296432495, "learning_rate": 1.6535823693461396e-05, "loss": 0.5992, "step": 14390 }, { "epoch": 2.349210236316885, "grad_norm": 2.8213326930999756, "learning_rate": 1.653533842873395e-05, "loss": 0.5992, "step": 14391 }, { "epoch": 2.3493734949593894, "grad_norm": 2.8552088737487793, "learning_rate": 1.6534853137142198e-05, "loss": 0.5468, "step": 14392 }, { "epoch": 2.349536753601894, "grad_norm": 2.6465840339660645, "learning_rate": 1.6534367818688137e-05, "loss": 0.5388, "step": 14393 }, { "epoch": 2.3497000122443983, "grad_norm": 2.6974527835845947, "learning_rate": 1.6533882473373753e-05, "loss": 0.5608, "step": 14394 }, { "epoch": 2.3498632708869027, "grad_norm": 2.85227370262146, "learning_rate": 1.6533397101201045e-05, "loss": 0.5129, "step": 14395 }, { "epoch": 2.3500265295294067, "grad_norm": 2.9378113746643066, "learning_rate": 1.6532911702172006e-05, "loss": 0.5333, "step": 14396 }, { "epoch": 2.350189788171911, "grad_norm": 2.5236546993255615, "learning_rate": 1.6532426276288638e-05, "loss": 0.5682, "step": 14397 }, { "epoch": 2.3503530468144156, "grad_norm": 2.9346225261688232, "learning_rate": 1.6531940823552936e-05, "loss": 0.6373, "step": 14398 }, { "epoch": 2.35051630545692, "grad_norm": 2.6443192958831787, "learning_rate": 1.6531455343966883e-05, "loss": 0.5279, "step": 14399 }, { "epoch": 2.3506795640994245, "grad_norm": 2.922973871231079, "learning_rate": 1.6530969837532487e-05, "loss": 0.6413, "step": 14400 }, { "epoch": 2.350842822741929, "grad_norm": 2.741238832473755, "learning_rate": 1.653048430425174e-05, "loss": 0.5748, "step": 14401 }, { "epoch": 2.3510060813844333, "grad_norm": 2.6440773010253906, "learning_rate": 1.6529998744126637e-05, "loss": 0.5814, "step": 14402 }, { "epoch": 2.351169340026938, "grad_norm": 2.9676482677459717, "learning_rate": 1.6529513157159178e-05, "loss": 0.7042, "step": 14403 }, { "epoch": 2.351332598669442, "grad_norm": 3.3383419513702393, "learning_rate": 1.6529027543351355e-05, "loss": 0.7101, "step": 14404 }, { "epoch": 2.3514958573119467, "grad_norm": 3.1115875244140625, "learning_rate": 1.652854190270516e-05, "loss": 0.5912, "step": 14405 }, { "epoch": 2.3516591159544507, "grad_norm": 2.8164284229278564, "learning_rate": 1.65280562352226e-05, "loss": 0.5848, "step": 14406 }, { "epoch": 2.351822374596955, "grad_norm": 2.4558887481689453, "learning_rate": 1.6527570540905667e-05, "loss": 0.4842, "step": 14407 }, { "epoch": 2.3519856332394595, "grad_norm": 3.0977749824523926, "learning_rate": 1.652708481975635e-05, "loss": 0.6169, "step": 14408 }, { "epoch": 2.352148891881964, "grad_norm": 2.774994373321533, "learning_rate": 1.6526599071776653e-05, "loss": 0.6465, "step": 14409 }, { "epoch": 2.3523121505244684, "grad_norm": 2.766411542892456, "learning_rate": 1.652611329696858e-05, "loss": 0.622, "step": 14410 }, { "epoch": 2.352475409166973, "grad_norm": 2.8524811267852783, "learning_rate": 1.652562749533411e-05, "loss": 0.5261, "step": 14411 }, { "epoch": 2.3526386678094773, "grad_norm": 3.1251330375671387, "learning_rate": 1.6525141666875254e-05, "loss": 0.5534, "step": 14412 }, { "epoch": 2.3528019264519817, "grad_norm": 2.7176291942596436, "learning_rate": 1.6524655811594e-05, "loss": 0.5833, "step": 14413 }, { "epoch": 2.3529651850944857, "grad_norm": 3.3347339630126953, "learning_rate": 1.6524169929492355e-05, "loss": 0.6131, "step": 14414 }, { "epoch": 2.35312844373699, "grad_norm": 2.661043882369995, "learning_rate": 1.6523684020572308e-05, "loss": 0.5545, "step": 14415 }, { "epoch": 2.3532917023794946, "grad_norm": 2.524610996246338, "learning_rate": 1.6523198084835858e-05, "loss": 0.5014, "step": 14416 }, { "epoch": 2.353454961021999, "grad_norm": 2.8097262382507324, "learning_rate": 1.6522712122285008e-05, "loss": 0.5919, "step": 14417 }, { "epoch": 2.3536182196645035, "grad_norm": 3.032127857208252, "learning_rate": 1.652222613292175e-05, "loss": 0.5946, "step": 14418 }, { "epoch": 2.353781478307008, "grad_norm": 2.284621000289917, "learning_rate": 1.6521740116748083e-05, "loss": 0.5163, "step": 14419 }, { "epoch": 2.3539447369495123, "grad_norm": 3.020906686782837, "learning_rate": 1.6521254073766004e-05, "loss": 0.6695, "step": 14420 }, { "epoch": 2.3541079955920168, "grad_norm": 2.8598008155822754, "learning_rate": 1.652076800397751e-05, "loss": 0.5967, "step": 14421 }, { "epoch": 2.354271254234521, "grad_norm": 3.0174646377563477, "learning_rate": 1.6520281907384604e-05, "loss": 0.5818, "step": 14422 }, { "epoch": 2.3544345128770257, "grad_norm": 2.531975746154785, "learning_rate": 1.6519795783989278e-05, "loss": 0.4876, "step": 14423 }, { "epoch": 2.3545977715195296, "grad_norm": 3.2565596103668213, "learning_rate": 1.6519309633793535e-05, "loss": 0.7301, "step": 14424 }, { "epoch": 2.354761030162034, "grad_norm": 3.502650499343872, "learning_rate": 1.6518823456799374e-05, "loss": 0.6936, "step": 14425 }, { "epoch": 2.3549242888045385, "grad_norm": 2.62068247795105, "learning_rate": 1.651833725300879e-05, "loss": 0.5236, "step": 14426 }, { "epoch": 2.355087547447043, "grad_norm": 2.766083002090454, "learning_rate": 1.651785102242378e-05, "loss": 0.6487, "step": 14427 }, { "epoch": 2.3552508060895474, "grad_norm": 2.722820281982422, "learning_rate": 1.6517364765046352e-05, "loss": 0.593, "step": 14428 }, { "epoch": 2.355414064732052, "grad_norm": 3.0627169609069824, "learning_rate": 1.6516878480878494e-05, "loss": 0.5562, "step": 14429 }, { "epoch": 2.3555773233745563, "grad_norm": 2.599858522415161, "learning_rate": 1.6516392169922207e-05, "loss": 0.5881, "step": 14430 }, { "epoch": 2.3557405820170607, "grad_norm": 2.858273506164551, "learning_rate": 1.6515905832179492e-05, "loss": 0.5733, "step": 14431 }, { "epoch": 2.3559038406595647, "grad_norm": 2.9900600910186768, "learning_rate": 1.6515419467652355e-05, "loss": 0.6291, "step": 14432 }, { "epoch": 2.356067099302069, "grad_norm": 3.0545129776000977, "learning_rate": 1.6514933076342785e-05, "loss": 0.6517, "step": 14433 }, { "epoch": 2.3562303579445736, "grad_norm": 3.2151267528533936, "learning_rate": 1.6514446658252785e-05, "loss": 0.6582, "step": 14434 }, { "epoch": 2.356393616587078, "grad_norm": 3.395721197128296, "learning_rate": 1.6513960213384352e-05, "loss": 0.7645, "step": 14435 }, { "epoch": 2.3565568752295825, "grad_norm": 3.041916608810425, "learning_rate": 1.651347374173949e-05, "loss": 0.521, "step": 14436 }, { "epoch": 2.356720133872087, "grad_norm": 3.1634445190429688, "learning_rate": 1.6512987243320203e-05, "loss": 0.6475, "step": 14437 }, { "epoch": 2.3568833925145913, "grad_norm": 3.3995747566223145, "learning_rate": 1.651250071812848e-05, "loss": 0.6668, "step": 14438 }, { "epoch": 2.3570466511570958, "grad_norm": 2.662245750427246, "learning_rate": 1.6512014166166325e-05, "loss": 0.5217, "step": 14439 }, { "epoch": 2.3572099097996, "grad_norm": 2.9666097164154053, "learning_rate": 1.6511527587435736e-05, "loss": 0.6038, "step": 14440 }, { "epoch": 2.3573731684421046, "grad_norm": 2.666743755340576, "learning_rate": 1.651104098193872e-05, "loss": 0.5126, "step": 14441 }, { "epoch": 2.3575364270846086, "grad_norm": 2.87916898727417, "learning_rate": 1.6510554349677273e-05, "loss": 0.5319, "step": 14442 }, { "epoch": 2.357699685727113, "grad_norm": 2.668712615966797, "learning_rate": 1.6510067690653392e-05, "loss": 0.6123, "step": 14443 }, { "epoch": 2.3578629443696175, "grad_norm": 2.742988109588623, "learning_rate": 1.6509581004869084e-05, "loss": 0.5946, "step": 14444 }, { "epoch": 2.358026203012122, "grad_norm": 2.17517352104187, "learning_rate": 1.6509094292326345e-05, "loss": 0.4961, "step": 14445 }, { "epoch": 2.3581894616546264, "grad_norm": 2.5515224933624268, "learning_rate": 1.6508607553027177e-05, "loss": 0.545, "step": 14446 }, { "epoch": 2.358352720297131, "grad_norm": 2.7857272624969482, "learning_rate": 1.650812078697358e-05, "loss": 0.5504, "step": 14447 }, { "epoch": 2.3585159789396353, "grad_norm": 3.2141199111938477, "learning_rate": 1.650763399416756e-05, "loss": 0.6815, "step": 14448 }, { "epoch": 2.3586792375821393, "grad_norm": 2.6934454441070557, "learning_rate": 1.650714717461111e-05, "loss": 0.5138, "step": 14449 }, { "epoch": 2.3588424962246437, "grad_norm": 2.717263698577881, "learning_rate": 1.6506660328306236e-05, "loss": 0.5238, "step": 14450 }, { "epoch": 2.359005754867148, "grad_norm": 2.4656713008880615, "learning_rate": 1.6506173455254938e-05, "loss": 0.4832, "step": 14451 }, { "epoch": 2.3591690135096526, "grad_norm": 3.0400776863098145, "learning_rate": 1.6505686555459218e-05, "loss": 0.5426, "step": 14452 }, { "epoch": 2.359332272152157, "grad_norm": 3.314872980117798, "learning_rate": 1.650519962892108e-05, "loss": 0.6498, "step": 14453 }, { "epoch": 2.3594955307946615, "grad_norm": 2.7731945514678955, "learning_rate": 1.650471267564252e-05, "loss": 0.5532, "step": 14454 }, { "epoch": 2.359658789437166, "grad_norm": 3.4777557849884033, "learning_rate": 1.650422569562554e-05, "loss": 0.778, "step": 14455 }, { "epoch": 2.3598220480796703, "grad_norm": 2.7770111560821533, "learning_rate": 1.650373868887215e-05, "loss": 0.4918, "step": 14456 }, { "epoch": 2.3599853067221748, "grad_norm": 2.1717724800109863, "learning_rate": 1.650325165538434e-05, "loss": 0.4948, "step": 14457 }, { "epoch": 2.360148565364679, "grad_norm": 2.5545425415039062, "learning_rate": 1.650276459516412e-05, "loss": 0.5324, "step": 14458 }, { "epoch": 2.360311824007183, "grad_norm": 2.9118497371673584, "learning_rate": 1.650227750821349e-05, "loss": 0.6873, "step": 14459 }, { "epoch": 2.3604750826496876, "grad_norm": 3.0134541988372803, "learning_rate": 1.6501790394534453e-05, "loss": 0.6087, "step": 14460 }, { "epoch": 2.360638341292192, "grad_norm": 3.3841185569763184, "learning_rate": 1.6501303254129012e-05, "loss": 0.6567, "step": 14461 }, { "epoch": 2.3608015999346965, "grad_norm": 2.966248035430908, "learning_rate": 1.6500816086999167e-05, "loss": 0.5926, "step": 14462 }, { "epoch": 2.360964858577201, "grad_norm": 3.2092928886413574, "learning_rate": 1.650032889314692e-05, "loss": 0.5549, "step": 14463 }, { "epoch": 2.3611281172197054, "grad_norm": 2.924265146255493, "learning_rate": 1.649984167257428e-05, "loss": 0.5771, "step": 14464 }, { "epoch": 2.36129137586221, "grad_norm": 3.023406982421875, "learning_rate": 1.649935442528324e-05, "loss": 0.5957, "step": 14465 }, { "epoch": 2.3614546345047143, "grad_norm": 3.043858766555786, "learning_rate": 1.6498867151275814e-05, "loss": 0.5563, "step": 14466 }, { "epoch": 2.3616178931472183, "grad_norm": 3.36409854888916, "learning_rate": 1.6498379850553992e-05, "loss": 0.4933, "step": 14467 }, { "epoch": 2.3617811517897227, "grad_norm": 2.6260786056518555, "learning_rate": 1.6497892523119788e-05, "loss": 0.4852, "step": 14468 }, { "epoch": 2.361944410432227, "grad_norm": 2.9702320098876953, "learning_rate": 1.64974051689752e-05, "loss": 0.5896, "step": 14469 }, { "epoch": 2.3621076690747316, "grad_norm": 2.5556955337524414, "learning_rate": 1.6496917788122234e-05, "loss": 0.5035, "step": 14470 }, { "epoch": 2.362270927717236, "grad_norm": 3.431469202041626, "learning_rate": 1.649643038056289e-05, "loss": 0.6921, "step": 14471 }, { "epoch": 2.3624341863597405, "grad_norm": 3.5427980422973633, "learning_rate": 1.649594294629918e-05, "loss": 0.6526, "step": 14472 }, { "epoch": 2.362597445002245, "grad_norm": 3.1616127490997314, "learning_rate": 1.6495455485333092e-05, "loss": 0.6394, "step": 14473 }, { "epoch": 2.3627607036447493, "grad_norm": 2.953273296356201, "learning_rate": 1.6494967997666647e-05, "loss": 0.5289, "step": 14474 }, { "epoch": 2.3629239622872538, "grad_norm": 2.5280284881591797, "learning_rate": 1.6494480483301836e-05, "loss": 0.5189, "step": 14475 }, { "epoch": 2.363087220929758, "grad_norm": 3.426344394683838, "learning_rate": 1.6493992942240673e-05, "loss": 1.3207, "step": 14476 }, { "epoch": 2.363250479572262, "grad_norm": 3.0307729244232178, "learning_rate": 1.6493505374485152e-05, "loss": 0.5916, "step": 14477 }, { "epoch": 2.3634137382147666, "grad_norm": 3.5971224308013916, "learning_rate": 1.6493017780037286e-05, "loss": 0.7342, "step": 14478 }, { "epoch": 2.363576996857271, "grad_norm": 2.573319435119629, "learning_rate": 1.6492530158899075e-05, "loss": 0.4815, "step": 14479 }, { "epoch": 2.3637402554997755, "grad_norm": 2.320479154586792, "learning_rate": 1.649204251107252e-05, "loss": 0.4906, "step": 14480 }, { "epoch": 2.36390351414228, "grad_norm": 3.5084285736083984, "learning_rate": 1.6491554836559636e-05, "loss": 0.6985, "step": 14481 }, { "epoch": 2.3640667727847844, "grad_norm": 2.918198347091675, "learning_rate": 1.6491067135362416e-05, "loss": 0.5899, "step": 14482 }, { "epoch": 2.364230031427289, "grad_norm": 2.820065498352051, "learning_rate": 1.6490579407482875e-05, "loss": 0.6349, "step": 14483 }, { "epoch": 2.3643932900697933, "grad_norm": 3.0082578659057617, "learning_rate": 1.649009165292301e-05, "loss": 0.6655, "step": 14484 }, { "epoch": 2.3645565487122973, "grad_norm": 2.4543087482452393, "learning_rate": 1.6489603871684825e-05, "loss": 0.477, "step": 14485 }, { "epoch": 2.3647198073548017, "grad_norm": 2.6434788703918457, "learning_rate": 1.6489116063770337e-05, "loss": 0.5869, "step": 14486 }, { "epoch": 2.364883065997306, "grad_norm": 2.790933847427368, "learning_rate": 1.6488628229181535e-05, "loss": 0.5988, "step": 14487 }, { "epoch": 2.3650463246398106, "grad_norm": 2.4481494426727295, "learning_rate": 1.6488140367920438e-05, "loss": 0.5058, "step": 14488 }, { "epoch": 2.365209583282315, "grad_norm": 2.608630895614624, "learning_rate": 1.648765247998904e-05, "loss": 0.5097, "step": 14489 }, { "epoch": 2.3653728419248194, "grad_norm": 3.2966578006744385, "learning_rate": 1.648716456538936e-05, "loss": 0.6834, "step": 14490 }, { "epoch": 2.365536100567324, "grad_norm": 3.1407458782196045, "learning_rate": 1.6486676624123393e-05, "loss": 0.5604, "step": 14491 }, { "epoch": 2.3656993592098283, "grad_norm": 3.083534002304077, "learning_rate": 1.6486188656193147e-05, "loss": 0.6517, "step": 14492 }, { "epoch": 2.3658626178523328, "grad_norm": 2.906611204147339, "learning_rate": 1.6485700661600627e-05, "loss": 0.5245, "step": 14493 }, { "epoch": 2.3660258764948368, "grad_norm": 2.90864634513855, "learning_rate": 1.648521264034784e-05, "loss": 0.6003, "step": 14494 }, { "epoch": 2.366189135137341, "grad_norm": 2.9335904121398926, "learning_rate": 1.6484724592436796e-05, "loss": 0.5373, "step": 14495 }, { "epoch": 2.3663523937798456, "grad_norm": 2.845062017440796, "learning_rate": 1.64842365178695e-05, "loss": 0.6225, "step": 14496 }, { "epoch": 2.36651565242235, "grad_norm": 2.7840640544891357, "learning_rate": 1.6483748416647952e-05, "loss": 0.5452, "step": 14497 }, { "epoch": 2.3666789110648545, "grad_norm": 3.192927122116089, "learning_rate": 1.648326028877416e-05, "loss": 0.6502, "step": 14498 }, { "epoch": 2.366842169707359, "grad_norm": 3.0012736320495605, "learning_rate": 1.648277213425014e-05, "loss": 0.6146, "step": 14499 }, { "epoch": 2.3670054283498634, "grad_norm": 2.9652934074401855, "learning_rate": 1.6482283953077887e-05, "loss": 0.6121, "step": 14500 }, { "epoch": 2.367168686992368, "grad_norm": 2.410919427871704, "learning_rate": 1.6481795745259413e-05, "loss": 0.5343, "step": 14501 }, { "epoch": 2.367331945634872, "grad_norm": 3.091266632080078, "learning_rate": 1.6481307510796724e-05, "loss": 0.6776, "step": 14502 }, { "epoch": 2.3674952042773763, "grad_norm": 2.9953808784484863, "learning_rate": 1.6480819249691827e-05, "loss": 0.5454, "step": 14503 }, { "epoch": 2.3676584629198807, "grad_norm": 2.7374370098114014, "learning_rate": 1.648033096194673e-05, "loss": 0.531, "step": 14504 }, { "epoch": 2.367821721562385, "grad_norm": 3.1753458976745605, "learning_rate": 1.6479842647563437e-05, "loss": 0.6576, "step": 14505 }, { "epoch": 2.3679849802048896, "grad_norm": 2.9102067947387695, "learning_rate": 1.6479354306543962e-05, "loss": 0.5982, "step": 14506 }, { "epoch": 2.368148238847394, "grad_norm": 2.737975835800171, "learning_rate": 1.6478865938890303e-05, "loss": 0.4692, "step": 14507 }, { "epoch": 2.3683114974898984, "grad_norm": 2.6598706245422363, "learning_rate": 1.6478377544604477e-05, "loss": 0.5533, "step": 14508 }, { "epoch": 2.368474756132403, "grad_norm": 2.9899508953094482, "learning_rate": 1.6477889123688484e-05, "loss": 0.6847, "step": 14509 }, { "epoch": 2.3686380147749073, "grad_norm": 2.7092859745025635, "learning_rate": 1.6477400676144334e-05, "loss": 0.6192, "step": 14510 }, { "epoch": 2.3688012734174118, "grad_norm": 2.846454620361328, "learning_rate": 1.647691220197404e-05, "loss": 0.4991, "step": 14511 }, { "epoch": 2.3689645320599158, "grad_norm": 2.9232609272003174, "learning_rate": 1.6476423701179603e-05, "loss": 0.5319, "step": 14512 }, { "epoch": 2.36912779070242, "grad_norm": 3.3550920486450195, "learning_rate": 1.6475935173763034e-05, "loss": 0.6169, "step": 14513 }, { "epoch": 2.3692910493449246, "grad_norm": 3.568557024002075, "learning_rate": 1.647544661972634e-05, "loss": 0.5622, "step": 14514 }, { "epoch": 2.369454307987429, "grad_norm": 2.8347532749176025, "learning_rate": 1.647495803907153e-05, "loss": 0.6326, "step": 14515 }, { "epoch": 2.3696175666299335, "grad_norm": 3.617464780807495, "learning_rate": 1.6474469431800613e-05, "loss": 0.7184, "step": 14516 }, { "epoch": 2.369780825272438, "grad_norm": 2.8147642612457275, "learning_rate": 1.6473980797915598e-05, "loss": 0.5924, "step": 14517 }, { "epoch": 2.3699440839149424, "grad_norm": 2.345144271850586, "learning_rate": 1.647349213741849e-05, "loss": 0.4478, "step": 14518 }, { "epoch": 2.370107342557447, "grad_norm": 2.9666881561279297, "learning_rate": 1.64730034503113e-05, "loss": 0.5079, "step": 14519 }, { "epoch": 2.370270601199951, "grad_norm": 2.80169415473938, "learning_rate": 1.6472514736596043e-05, "loss": 0.5703, "step": 14520 }, { "epoch": 2.3704338598424552, "grad_norm": 2.4865236282348633, "learning_rate": 1.6472025996274714e-05, "loss": 0.5693, "step": 14521 }, { "epoch": 2.3705971184849597, "grad_norm": 3.094482421875, "learning_rate": 1.6471537229349333e-05, "loss": 0.6245, "step": 14522 }, { "epoch": 2.370760377127464, "grad_norm": 2.923879384994507, "learning_rate": 1.6471048435821907e-05, "loss": 0.538, "step": 14523 }, { "epoch": 2.3709236357699686, "grad_norm": 3.128722667694092, "learning_rate": 1.6470559615694445e-05, "loss": 0.6108, "step": 14524 }, { "epoch": 2.371086894412473, "grad_norm": 3.2768898010253906, "learning_rate": 1.6470070768968956e-05, "loss": 0.5482, "step": 14525 }, { "epoch": 2.3712501530549774, "grad_norm": 2.5277762413024902, "learning_rate": 1.6469581895647445e-05, "loss": 0.4895, "step": 14526 }, { "epoch": 2.371413411697482, "grad_norm": 2.7313807010650635, "learning_rate": 1.646909299573193e-05, "loss": 0.5866, "step": 14527 }, { "epoch": 2.3715766703399863, "grad_norm": 3.053654193878174, "learning_rate": 1.6468604069224416e-05, "loss": 0.6607, "step": 14528 }, { "epoch": 2.3717399289824908, "grad_norm": 2.3588130474090576, "learning_rate": 1.646811511612691e-05, "loss": 0.5283, "step": 14529 }, { "epoch": 2.3719031876249947, "grad_norm": 2.990007162094116, "learning_rate": 1.6467626136441425e-05, "loss": 0.6648, "step": 14530 }, { "epoch": 2.372066446267499, "grad_norm": 2.569058895111084, "learning_rate": 1.6467137130169975e-05, "loss": 0.5583, "step": 14531 }, { "epoch": 2.3722297049100036, "grad_norm": 2.9524106979370117, "learning_rate": 1.6466648097314562e-05, "loss": 0.5749, "step": 14532 }, { "epoch": 2.372392963552508, "grad_norm": 2.9189093112945557, "learning_rate": 1.6466159037877202e-05, "loss": 0.5941, "step": 14533 }, { "epoch": 2.3725562221950125, "grad_norm": 2.4009766578674316, "learning_rate": 1.6465669951859908e-05, "loss": 0.4967, "step": 14534 }, { "epoch": 2.372719480837517, "grad_norm": 2.6948723793029785, "learning_rate": 1.646518083926468e-05, "loss": 0.5548, "step": 14535 }, { "epoch": 2.3728827394800214, "grad_norm": 2.994607448577881, "learning_rate": 1.6464691700093538e-05, "loss": 0.5666, "step": 14536 }, { "epoch": 2.3730459981225254, "grad_norm": 2.9849631786346436, "learning_rate": 1.6464202534348488e-05, "loss": 0.5123, "step": 14537 }, { "epoch": 2.37320925676503, "grad_norm": 2.940960168838501, "learning_rate": 1.6463713342031543e-05, "loss": 0.5718, "step": 14538 }, { "epoch": 2.3733725154075342, "grad_norm": 2.602442502975464, "learning_rate": 1.6463224123144714e-05, "loss": 0.5112, "step": 14539 }, { "epoch": 2.3735357740500387, "grad_norm": 3.0867819786071777, "learning_rate": 1.646273487769001e-05, "loss": 0.5179, "step": 14540 }, { "epoch": 2.373699032692543, "grad_norm": 2.662076711654663, "learning_rate": 1.646224560566944e-05, "loss": 0.4933, "step": 14541 }, { "epoch": 2.3738622913350476, "grad_norm": 2.6957039833068848, "learning_rate": 1.6461756307085024e-05, "loss": 0.4694, "step": 14542 }, { "epoch": 2.374025549977552, "grad_norm": 2.7086617946624756, "learning_rate": 1.6461266981938768e-05, "loss": 0.5044, "step": 14543 }, { "epoch": 2.3741888086200564, "grad_norm": 2.8639721870422363, "learning_rate": 1.646077763023268e-05, "loss": 0.5502, "step": 14544 }, { "epoch": 2.374352067262561, "grad_norm": 3.0185277462005615, "learning_rate": 1.6460288251968774e-05, "loss": 0.5296, "step": 14545 }, { "epoch": 2.3745153259050653, "grad_norm": 2.8525335788726807, "learning_rate": 1.6459798847149064e-05, "loss": 0.5801, "step": 14546 }, { "epoch": 2.3746785845475693, "grad_norm": 2.775263786315918, "learning_rate": 1.645930941577556e-05, "loss": 0.5312, "step": 14547 }, { "epoch": 2.3748418431900737, "grad_norm": 2.9170241355895996, "learning_rate": 1.6458819957850276e-05, "loss": 0.6688, "step": 14548 }, { "epoch": 2.375005101832578, "grad_norm": 3.2618987560272217, "learning_rate": 1.6458330473375218e-05, "loss": 0.6742, "step": 14549 }, { "epoch": 2.3751683604750826, "grad_norm": 2.7943155765533447, "learning_rate": 1.6457840962352403e-05, "loss": 0.564, "step": 14550 }, { "epoch": 2.375331619117587, "grad_norm": 3.031729221343994, "learning_rate": 1.6457351424783844e-05, "loss": 0.5431, "step": 14551 }, { "epoch": 2.3754948777600915, "grad_norm": 2.415478467941284, "learning_rate": 1.645686186067155e-05, "loss": 0.5231, "step": 14552 }, { "epoch": 2.375658136402596, "grad_norm": 3.1588213443756104, "learning_rate": 1.6456372270017536e-05, "loss": 0.6296, "step": 14553 }, { "epoch": 2.3758213950451004, "grad_norm": 2.8740739822387695, "learning_rate": 1.6455882652823817e-05, "loss": 0.5788, "step": 14554 }, { "epoch": 2.3759846536876044, "grad_norm": 2.6394498348236084, "learning_rate": 1.6455393009092396e-05, "loss": 0.5276, "step": 14555 }, { "epoch": 2.376147912330109, "grad_norm": 2.8352174758911133, "learning_rate": 1.6454903338825294e-05, "loss": 0.5815, "step": 14556 }, { "epoch": 2.3763111709726132, "grad_norm": 2.817577838897705, "learning_rate": 1.645441364202452e-05, "loss": 0.582, "step": 14557 }, { "epoch": 2.3764744296151177, "grad_norm": 3.3083279132843018, "learning_rate": 1.6453923918692092e-05, "loss": 0.5784, "step": 14558 }, { "epoch": 2.376637688257622, "grad_norm": 2.750979423522949, "learning_rate": 1.645343416883002e-05, "loss": 0.5843, "step": 14559 }, { "epoch": 2.3768009469001266, "grad_norm": 3.0443286895751953, "learning_rate": 1.6452944392440312e-05, "loss": 0.5706, "step": 14560 }, { "epoch": 2.376964205542631, "grad_norm": 2.98665189743042, "learning_rate": 1.645245458952499e-05, "loss": 0.5484, "step": 14561 }, { "epoch": 2.3771274641851354, "grad_norm": 2.5781164169311523, "learning_rate": 1.6451964760086065e-05, "loss": 0.559, "step": 14562 }, { "epoch": 2.37729072282764, "grad_norm": 3.30238676071167, "learning_rate": 1.6451474904125545e-05, "loss": 0.6299, "step": 14563 }, { "epoch": 2.3774539814701443, "grad_norm": 2.894385814666748, "learning_rate": 1.645098502164545e-05, "loss": 0.5821, "step": 14564 }, { "epoch": 2.3776172401126483, "grad_norm": 2.674186944961548, "learning_rate": 1.6450495112647793e-05, "loss": 0.5738, "step": 14565 }, { "epoch": 2.3777804987551527, "grad_norm": 2.755645751953125, "learning_rate": 1.6450005177134583e-05, "loss": 0.509, "step": 14566 }, { "epoch": 2.377943757397657, "grad_norm": 2.6140849590301514, "learning_rate": 1.644951521510784e-05, "loss": 0.5514, "step": 14567 }, { "epoch": 2.3781070160401616, "grad_norm": 2.501339912414551, "learning_rate": 1.6449025226569573e-05, "loss": 0.5164, "step": 14568 }, { "epoch": 2.378270274682666, "grad_norm": 2.5337746143341064, "learning_rate": 1.6448535211521802e-05, "loss": 0.5433, "step": 14569 }, { "epoch": 2.3784335333251705, "grad_norm": 3.0536324977874756, "learning_rate": 1.6448045169966535e-05, "loss": 0.6137, "step": 14570 }, { "epoch": 2.378596791967675, "grad_norm": 3.8407175540924072, "learning_rate": 1.6447555101905787e-05, "loss": 0.797, "step": 14571 }, { "epoch": 2.3787600506101794, "grad_norm": 2.7879388332366943, "learning_rate": 1.6447065007341575e-05, "loss": 0.5852, "step": 14572 }, { "epoch": 2.3789233092526834, "grad_norm": 2.841306447982788, "learning_rate": 1.6446574886275914e-05, "loss": 0.582, "step": 14573 }, { "epoch": 2.379086567895188, "grad_norm": 2.934053421020508, "learning_rate": 1.6446084738710818e-05, "loss": 0.5364, "step": 14574 }, { "epoch": 2.3792498265376922, "grad_norm": 3.1615145206451416, "learning_rate": 1.6445594564648305e-05, "loss": 0.7672, "step": 14575 }, { "epoch": 2.3794130851801967, "grad_norm": 2.567805290222168, "learning_rate": 1.6445104364090384e-05, "loss": 0.4984, "step": 14576 }, { "epoch": 2.379576343822701, "grad_norm": 3.3036608695983887, "learning_rate": 1.644461413703907e-05, "loss": 0.6769, "step": 14577 }, { "epoch": 2.3797396024652056, "grad_norm": 3.186088800430298, "learning_rate": 1.6444123883496384e-05, "loss": 0.7049, "step": 14578 }, { "epoch": 2.37990286110771, "grad_norm": 3.430372953414917, "learning_rate": 1.6443633603464334e-05, "loss": 0.6621, "step": 14579 }, { "epoch": 2.3800661197502144, "grad_norm": 3.2729432582855225, "learning_rate": 1.6443143296944946e-05, "loss": 0.6646, "step": 14580 }, { "epoch": 2.380229378392719, "grad_norm": 2.6581122875213623, "learning_rate": 1.6442652963940224e-05, "loss": 0.5592, "step": 14581 }, { "epoch": 2.380392637035223, "grad_norm": 3.3211660385131836, "learning_rate": 1.644216260445219e-05, "loss": 0.6143, "step": 14582 }, { "epoch": 2.3805558956777273, "grad_norm": 2.6511707305908203, "learning_rate": 1.6441672218482857e-05, "loss": 0.5818, "step": 14583 }, { "epoch": 2.3807191543202317, "grad_norm": 3.0404748916625977, "learning_rate": 1.644118180603424e-05, "loss": 0.5653, "step": 14584 }, { "epoch": 2.380882412962736, "grad_norm": 2.8203632831573486, "learning_rate": 1.644069136710836e-05, "loss": 0.5477, "step": 14585 }, { "epoch": 2.3810456716052406, "grad_norm": 2.27091121673584, "learning_rate": 1.6440200901707232e-05, "loss": 0.4561, "step": 14586 }, { "epoch": 2.381208930247745, "grad_norm": 3.021960973739624, "learning_rate": 1.6439710409832865e-05, "loss": 0.5392, "step": 14587 }, { "epoch": 2.3813721888902495, "grad_norm": 2.3696725368499756, "learning_rate": 1.643921989148728e-05, "loss": 0.5145, "step": 14588 }, { "epoch": 2.381535447532754, "grad_norm": 3.479137420654297, "learning_rate": 1.6438729346672498e-05, "loss": 0.5416, "step": 14589 }, { "epoch": 2.381698706175258, "grad_norm": 3.1949660778045654, "learning_rate": 1.6438238775390526e-05, "loss": 0.5463, "step": 14590 }, { "epoch": 2.3818619648177624, "grad_norm": 2.8924715518951416, "learning_rate": 1.643774817764339e-05, "loss": 0.5553, "step": 14591 }, { "epoch": 2.382025223460267, "grad_norm": 2.717456102371216, "learning_rate": 1.6437257553433098e-05, "loss": 0.5596, "step": 14592 }, { "epoch": 2.3821884821027712, "grad_norm": 2.9465584754943848, "learning_rate": 1.6436766902761673e-05, "loss": 0.5709, "step": 14593 }, { "epoch": 2.3823517407452757, "grad_norm": 3.0341951847076416, "learning_rate": 1.643627622563113e-05, "loss": 0.6638, "step": 14594 }, { "epoch": 2.38251499938778, "grad_norm": 3.2177915573120117, "learning_rate": 1.6435785522043483e-05, "loss": 0.6166, "step": 14595 }, { "epoch": 2.3826782580302845, "grad_norm": 2.4841792583465576, "learning_rate": 1.6435294792000754e-05, "loss": 0.473, "step": 14596 }, { "epoch": 2.382841516672789, "grad_norm": 2.957914352416992, "learning_rate": 1.643480403550496e-05, "loss": 0.6045, "step": 14597 }, { "epoch": 2.3830047753152934, "grad_norm": 3.0963480472564697, "learning_rate": 1.6434313252558115e-05, "loss": 0.654, "step": 14598 }, { "epoch": 2.383168033957798, "grad_norm": 3.104682445526123, "learning_rate": 1.6433822443162236e-05, "loss": 0.5835, "step": 14599 }, { "epoch": 2.383331292600302, "grad_norm": 3.4281399250030518, "learning_rate": 1.6433331607319342e-05, "loss": 0.5813, "step": 14600 }, { "epoch": 2.3834945512428063, "grad_norm": 3.585106372833252, "learning_rate": 1.6432840745031453e-05, "loss": 0.6397, "step": 14601 }, { "epoch": 2.3836578098853107, "grad_norm": 2.9409430027008057, "learning_rate": 1.643234985630058e-05, "loss": 0.5972, "step": 14602 }, { "epoch": 2.383821068527815, "grad_norm": 2.8240292072296143, "learning_rate": 1.6431858941128753e-05, "loss": 0.5492, "step": 14603 }, { "epoch": 2.3839843271703196, "grad_norm": 2.926391124725342, "learning_rate": 1.643136799951798e-05, "loss": 0.5738, "step": 14604 }, { "epoch": 2.384147585812824, "grad_norm": 2.9296376705169678, "learning_rate": 1.643087703147028e-05, "loss": 0.6749, "step": 14605 }, { "epoch": 2.3843108444553285, "grad_norm": 2.7902815341949463, "learning_rate": 1.6430386036987672e-05, "loss": 0.5137, "step": 14606 }, { "epoch": 2.384474103097833, "grad_norm": 2.5474750995635986, "learning_rate": 1.6429895016072175e-05, "loss": 0.5095, "step": 14607 }, { "epoch": 2.384637361740337, "grad_norm": 3.1983213424682617, "learning_rate": 1.6429403968725812e-05, "loss": 0.64, "step": 14608 }, { "epoch": 2.3848006203828414, "grad_norm": 2.7838919162750244, "learning_rate": 1.642891289495059e-05, "loss": 0.5776, "step": 14609 }, { "epoch": 2.384963879025346, "grad_norm": 2.6626551151275635, "learning_rate": 1.6428421794748542e-05, "loss": 0.538, "step": 14610 }, { "epoch": 2.3851271376678502, "grad_norm": 2.4858155250549316, "learning_rate": 1.6427930668121677e-05, "loss": 0.5227, "step": 14611 }, { "epoch": 2.3852903963103547, "grad_norm": 2.793239116668701, "learning_rate": 1.6427439515072016e-05, "loss": 0.6377, "step": 14612 }, { "epoch": 2.385453654952859, "grad_norm": 2.9165220260620117, "learning_rate": 1.6426948335601577e-05, "loss": 0.6147, "step": 14613 }, { "epoch": 2.3856169135953635, "grad_norm": 2.6443636417388916, "learning_rate": 1.6426457129712378e-05, "loss": 0.5253, "step": 14614 }, { "epoch": 2.385780172237868, "grad_norm": 2.3033394813537598, "learning_rate": 1.6425965897406443e-05, "loss": 0.5331, "step": 14615 }, { "epoch": 2.3859434308803724, "grad_norm": 2.5026001930236816, "learning_rate": 1.6425474638685786e-05, "loss": 0.4539, "step": 14616 }, { "epoch": 2.386106689522877, "grad_norm": 2.753547191619873, "learning_rate": 1.6424983353552433e-05, "loss": 0.5837, "step": 14617 }, { "epoch": 2.386269948165381, "grad_norm": 2.7276570796966553, "learning_rate": 1.64244920420084e-05, "loss": 0.6514, "step": 14618 }, { "epoch": 2.3864332068078853, "grad_norm": 3.4940240383148193, "learning_rate": 1.6424000704055704e-05, "loss": 0.8171, "step": 14619 }, { "epoch": 2.3865964654503897, "grad_norm": 2.8268704414367676, "learning_rate": 1.6423509339696364e-05, "loss": 0.5266, "step": 14620 }, { "epoch": 2.386759724092894, "grad_norm": 3.255701780319214, "learning_rate": 1.6423017948932405e-05, "loss": 0.7, "step": 14621 }, { "epoch": 2.3869229827353986, "grad_norm": 2.460789918899536, "learning_rate": 1.6422526531765846e-05, "loss": 0.4978, "step": 14622 }, { "epoch": 2.387086241377903, "grad_norm": 3.526489734649658, "learning_rate": 1.6422035088198704e-05, "loss": 0.6901, "step": 14623 }, { "epoch": 2.3872495000204075, "grad_norm": 2.36631441116333, "learning_rate": 1.6421543618233e-05, "loss": 0.507, "step": 14624 }, { "epoch": 2.3874127586629115, "grad_norm": 3.000779628753662, "learning_rate": 1.6421052121870755e-05, "loss": 0.5533, "step": 14625 }, { "epoch": 2.387576017305416, "grad_norm": 2.888855457305908, "learning_rate": 1.642056059911399e-05, "loss": 0.6555, "step": 14626 }, { "epoch": 2.3877392759479203, "grad_norm": 2.5938704013824463, "learning_rate": 1.6420069049964723e-05, "loss": 0.4398, "step": 14627 }, { "epoch": 2.387902534590425, "grad_norm": 2.602393388748169, "learning_rate": 1.641957747442498e-05, "loss": 0.5094, "step": 14628 }, { "epoch": 2.3880657932329292, "grad_norm": 2.6749963760375977, "learning_rate": 1.641908587249678e-05, "loss": 0.4838, "step": 14629 }, { "epoch": 2.3882290518754337, "grad_norm": 2.8823771476745605, "learning_rate": 1.6418594244182132e-05, "loss": 0.6201, "step": 14630 }, { "epoch": 2.388392310517938, "grad_norm": 3.2319142818450928, "learning_rate": 1.6418102589483073e-05, "loss": 0.6053, "step": 14631 }, { "epoch": 2.3885555691604425, "grad_norm": 2.896578550338745, "learning_rate": 1.641761090840162e-05, "loss": 0.6434, "step": 14632 }, { "epoch": 2.388718827802947, "grad_norm": 2.6709578037261963, "learning_rate": 1.6417119200939787e-05, "loss": 0.5871, "step": 14633 }, { "epoch": 2.3888820864454514, "grad_norm": 3.543257236480713, "learning_rate": 1.6416627467099604e-05, "loss": 0.5947, "step": 14634 }, { "epoch": 2.3890453450879554, "grad_norm": 2.8369250297546387, "learning_rate": 1.641613570688309e-05, "loss": 0.587, "step": 14635 }, { "epoch": 2.38920860373046, "grad_norm": 3.0264976024627686, "learning_rate": 1.6415643920292258e-05, "loss": 0.5504, "step": 14636 }, { "epoch": 2.3893718623729643, "grad_norm": 2.858675479888916, "learning_rate": 1.641515210732914e-05, "loss": 0.5185, "step": 14637 }, { "epoch": 2.3895351210154687, "grad_norm": 2.9828810691833496, "learning_rate": 1.6414660267995754e-05, "loss": 0.5297, "step": 14638 }, { "epoch": 2.389698379657973, "grad_norm": 2.7493603229522705, "learning_rate": 1.641416840229412e-05, "loss": 0.6264, "step": 14639 }, { "epoch": 2.3898616383004776, "grad_norm": 2.920818567276001, "learning_rate": 1.6413676510226263e-05, "loss": 0.594, "step": 14640 }, { "epoch": 2.390024896942982, "grad_norm": 2.7200989723205566, "learning_rate": 1.6413184591794202e-05, "loss": 0.6566, "step": 14641 }, { "epoch": 2.3901881555854865, "grad_norm": 2.4746079444885254, "learning_rate": 1.6412692646999965e-05, "loss": 0.5013, "step": 14642 }, { "epoch": 2.3903514142279905, "grad_norm": 2.4391818046569824, "learning_rate": 1.6412200675845568e-05, "loss": 0.5321, "step": 14643 }, { "epoch": 2.390514672870495, "grad_norm": 2.998906135559082, "learning_rate": 1.6411708678333033e-05, "loss": 0.5591, "step": 14644 }, { "epoch": 2.3906779315129993, "grad_norm": 2.2806859016418457, "learning_rate": 1.6411216654464384e-05, "loss": 0.4519, "step": 14645 }, { "epoch": 2.390841190155504, "grad_norm": 2.7346675395965576, "learning_rate": 1.6410724604241647e-05, "loss": 0.5464, "step": 14646 }, { "epoch": 2.391004448798008, "grad_norm": 3.0556275844573975, "learning_rate": 1.6410232527666844e-05, "loss": 0.5615, "step": 14647 }, { "epoch": 2.3911677074405127, "grad_norm": 3.3382315635681152, "learning_rate": 1.640974042474199e-05, "loss": 0.6622, "step": 14648 }, { "epoch": 2.391330966083017, "grad_norm": 2.928852081298828, "learning_rate": 1.6409248295469117e-05, "loss": 0.5782, "step": 14649 }, { "epoch": 2.3914942247255215, "grad_norm": 3.3087973594665527, "learning_rate": 1.6408756139850243e-05, "loss": 0.6114, "step": 14650 }, { "epoch": 2.391657483368026, "grad_norm": 3.1242682933807373, "learning_rate": 1.640826395788739e-05, "loss": 0.6006, "step": 14651 }, { "epoch": 2.3918207420105304, "grad_norm": 2.806736707687378, "learning_rate": 1.640777174958259e-05, "loss": 0.6308, "step": 14652 }, { "epoch": 2.3919840006530344, "grad_norm": 2.748126983642578, "learning_rate": 1.6407279514937855e-05, "loss": 0.5117, "step": 14653 }, { "epoch": 2.392147259295539, "grad_norm": 2.7667462825775146, "learning_rate": 1.6406787253955214e-05, "loss": 0.5274, "step": 14654 }, { "epoch": 2.3923105179380433, "grad_norm": 2.5293350219726562, "learning_rate": 1.6406294966636692e-05, "loss": 0.4375, "step": 14655 }, { "epoch": 2.3924737765805477, "grad_norm": 2.891087293624878, "learning_rate": 1.640580265298431e-05, "loss": 0.572, "step": 14656 }, { "epoch": 2.392637035223052, "grad_norm": 2.825514078140259, "learning_rate": 1.6405310313000088e-05, "loss": 0.5509, "step": 14657 }, { "epoch": 2.3928002938655566, "grad_norm": 3.4182040691375732, "learning_rate": 1.6404817946686057e-05, "loss": 0.7051, "step": 14658 }, { "epoch": 2.392963552508061, "grad_norm": 3.4646785259246826, "learning_rate": 1.6404325554044237e-05, "loss": 0.7145, "step": 14659 }, { "epoch": 2.3931268111505655, "grad_norm": 3.2487690448760986, "learning_rate": 1.6403833135076653e-05, "loss": 0.6275, "step": 14660 }, { "epoch": 2.3932900697930695, "grad_norm": 3.3970212936401367, "learning_rate": 1.6403340689785328e-05, "loss": 0.7619, "step": 14661 }, { "epoch": 2.393453328435574, "grad_norm": 3.4688644409179688, "learning_rate": 1.640284821817229e-05, "loss": 0.698, "step": 14662 }, { "epoch": 2.3936165870780783, "grad_norm": 2.976309299468994, "learning_rate": 1.640235572023956e-05, "loss": 0.5675, "step": 14663 }, { "epoch": 2.3937798457205828, "grad_norm": 2.8946406841278076, "learning_rate": 1.6401863195989162e-05, "loss": 0.5418, "step": 14664 }, { "epoch": 2.393943104363087, "grad_norm": 2.880079984664917, "learning_rate": 1.6401370645423124e-05, "loss": 0.6404, "step": 14665 }, { "epoch": 2.3941063630055917, "grad_norm": 2.45473575592041, "learning_rate": 1.6400878068543465e-05, "loss": 0.4891, "step": 14666 }, { "epoch": 2.394269621648096, "grad_norm": 2.9724392890930176, "learning_rate": 1.6400385465352216e-05, "loss": 0.574, "step": 14667 }, { "epoch": 2.3944328802906005, "grad_norm": 2.70634388923645, "learning_rate": 1.6399892835851398e-05, "loss": 0.586, "step": 14668 }, { "epoch": 2.394596138933105, "grad_norm": 2.859437942504883, "learning_rate": 1.6399400180043037e-05, "loss": 0.5138, "step": 14669 }, { "epoch": 2.3947593975756094, "grad_norm": 3.5269298553466797, "learning_rate": 1.6398907497929157e-05, "loss": 0.7717, "step": 14670 }, { "epoch": 2.3949226562181134, "grad_norm": 3.007075309753418, "learning_rate": 1.6398414789511784e-05, "loss": 0.7428, "step": 14671 }, { "epoch": 2.395085914860618, "grad_norm": 2.9464731216430664, "learning_rate": 1.639792205479295e-05, "loss": 0.6156, "step": 14672 }, { "epoch": 2.3952491735031223, "grad_norm": 2.472160577774048, "learning_rate": 1.6397429293774668e-05, "loss": 0.5027, "step": 14673 }, { "epoch": 2.3954124321456267, "grad_norm": 2.9675464630126953, "learning_rate": 1.639693650645897e-05, "loss": 0.6532, "step": 14674 }, { "epoch": 2.395575690788131, "grad_norm": 2.494311571121216, "learning_rate": 1.6396443692847884e-05, "loss": 0.4412, "step": 14675 }, { "epoch": 2.3957389494306356, "grad_norm": 2.2292118072509766, "learning_rate": 1.639595085294343e-05, "loss": 0.5597, "step": 14676 }, { "epoch": 2.39590220807314, "grad_norm": 2.506503105163574, "learning_rate": 1.6395457986747642e-05, "loss": 0.5607, "step": 14677 }, { "epoch": 2.396065466715644, "grad_norm": 2.69490385055542, "learning_rate": 1.6394965094262535e-05, "loss": 0.5592, "step": 14678 }, { "epoch": 2.3962287253581485, "grad_norm": 2.7629528045654297, "learning_rate": 1.6394472175490146e-05, "loss": 0.4875, "step": 14679 }, { "epoch": 2.396391984000653, "grad_norm": 2.942800760269165, "learning_rate": 1.6393979230432496e-05, "loss": 0.6594, "step": 14680 }, { "epoch": 2.3965552426431573, "grad_norm": 2.680194854736328, "learning_rate": 1.639348625909161e-05, "loss": 0.5496, "step": 14681 }, { "epoch": 2.3967185012856618, "grad_norm": 2.3812241554260254, "learning_rate": 1.6392993261469518e-05, "loss": 0.5325, "step": 14682 }, { "epoch": 2.396881759928166, "grad_norm": 3.2257378101348877, "learning_rate": 1.639250023756824e-05, "loss": 0.728, "step": 14683 }, { "epoch": 2.3970450185706706, "grad_norm": 2.644137144088745, "learning_rate": 1.6392007187389815e-05, "loss": 0.5438, "step": 14684 }, { "epoch": 2.397208277213175, "grad_norm": 3.3559014797210693, "learning_rate": 1.6391514110936256e-05, "loss": 0.6945, "step": 14685 }, { "epoch": 2.3973715358556795, "grad_norm": 3.047484874725342, "learning_rate": 1.63910210082096e-05, "loss": 0.6305, "step": 14686 }, { "epoch": 2.397534794498184, "grad_norm": 2.4875004291534424, "learning_rate": 1.6390527879211867e-05, "loss": 0.4909, "step": 14687 }, { "epoch": 2.397698053140688, "grad_norm": 3.1915040016174316, "learning_rate": 1.6390034723945088e-05, "loss": 0.6132, "step": 14688 }, { "epoch": 2.3978613117831924, "grad_norm": 2.6711487770080566, "learning_rate": 1.638954154241129e-05, "loss": 0.5233, "step": 14689 }, { "epoch": 2.398024570425697, "grad_norm": 2.6936423778533936, "learning_rate": 1.6389048334612495e-05, "loss": 0.5959, "step": 14690 }, { "epoch": 2.3981878290682013, "grad_norm": 2.7491555213928223, "learning_rate": 1.6388555100550737e-05, "loss": 0.5529, "step": 14691 }, { "epoch": 2.3983510877107057, "grad_norm": 2.6710495948791504, "learning_rate": 1.6388061840228044e-05, "loss": 0.5722, "step": 14692 }, { "epoch": 2.39851434635321, "grad_norm": 2.852585792541504, "learning_rate": 1.6387568553646435e-05, "loss": 0.6141, "step": 14693 }, { "epoch": 2.3986776049957146, "grad_norm": 2.723515748977661, "learning_rate": 1.638707524080795e-05, "loss": 0.4944, "step": 14694 }, { "epoch": 2.398840863638219, "grad_norm": 3.1733272075653076, "learning_rate": 1.6386581901714607e-05, "loss": 0.7276, "step": 14695 }, { "epoch": 2.399004122280723, "grad_norm": 2.6388633251190186, "learning_rate": 1.638608853636844e-05, "loss": 0.5239, "step": 14696 }, { "epoch": 2.3991673809232275, "grad_norm": 2.949002742767334, "learning_rate": 1.638559514477147e-05, "loss": 0.6765, "step": 14697 }, { "epoch": 2.399330639565732, "grad_norm": 3.344374656677246, "learning_rate": 1.6385101726925735e-05, "loss": 0.7059, "step": 14698 }, { "epoch": 2.3994938982082363, "grad_norm": 2.7156636714935303, "learning_rate": 1.6384608282833253e-05, "loss": 0.5515, "step": 14699 }, { "epoch": 2.3996571568507408, "grad_norm": 2.4519248008728027, "learning_rate": 1.6384114812496056e-05, "loss": 0.539, "step": 14700 }, { "epoch": 2.399820415493245, "grad_norm": 3.0764031410217285, "learning_rate": 1.6383621315916182e-05, "loss": 0.6423, "step": 14701 }, { "epoch": 2.3999836741357496, "grad_norm": 2.472027063369751, "learning_rate": 1.6383127793095644e-05, "loss": 0.5142, "step": 14702 }, { "epoch": 2.400146932778254, "grad_norm": 2.9305479526519775, "learning_rate": 1.6382634244036478e-05, "loss": 0.7154, "step": 14703 }, { "epoch": 2.4003101914207585, "grad_norm": 2.4149179458618164, "learning_rate": 1.6382140668740714e-05, "loss": 0.4832, "step": 14704 }, { "epoch": 2.400473450063263, "grad_norm": 3.096968173980713, "learning_rate": 1.638164706721038e-05, "loss": 0.667, "step": 14705 }, { "epoch": 2.400636708705767, "grad_norm": 2.764413595199585, "learning_rate": 1.6381153439447506e-05, "loss": 0.5165, "step": 14706 }, { "epoch": 2.4007999673482714, "grad_norm": 2.769392490386963, "learning_rate": 1.6380659785454115e-05, "loss": 0.6103, "step": 14707 }, { "epoch": 2.400963225990776, "grad_norm": 2.6813485622406006, "learning_rate": 1.6380166105232246e-05, "loss": 0.5814, "step": 14708 }, { "epoch": 2.4011264846332803, "grad_norm": 3.051419496536255, "learning_rate": 1.637967239878392e-05, "loss": 0.6534, "step": 14709 }, { "epoch": 2.4012897432757847, "grad_norm": 2.711867570877075, "learning_rate": 1.6379178666111172e-05, "loss": 0.5182, "step": 14710 }, { "epoch": 2.401453001918289, "grad_norm": 2.6314520835876465, "learning_rate": 1.6378684907216026e-05, "loss": 0.5582, "step": 14711 }, { "epoch": 2.4016162605607936, "grad_norm": 2.6627068519592285, "learning_rate": 1.6378191122100518e-05, "loss": 0.5418, "step": 14712 }, { "epoch": 2.4017795192032976, "grad_norm": 2.9754140377044678, "learning_rate": 1.6377697310766674e-05, "loss": 0.5328, "step": 14713 }, { "epoch": 2.401942777845802, "grad_norm": 2.9711883068084717, "learning_rate": 1.6377203473216526e-05, "loss": 0.6045, "step": 14714 }, { "epoch": 2.4021060364883065, "grad_norm": 2.706571340560913, "learning_rate": 1.6376709609452098e-05, "loss": 0.5041, "step": 14715 }, { "epoch": 2.402269295130811, "grad_norm": 3.3106040954589844, "learning_rate": 1.637621571947543e-05, "loss": 0.7247, "step": 14716 }, { "epoch": 2.4024325537733153, "grad_norm": 3.533867597579956, "learning_rate": 1.637572180328854e-05, "loss": 0.7236, "step": 14717 }, { "epoch": 2.4025958124158198, "grad_norm": 3.6218619346618652, "learning_rate": 1.6375227860893472e-05, "loss": 0.7108, "step": 14718 }, { "epoch": 2.402759071058324, "grad_norm": 2.54317307472229, "learning_rate": 1.637473389229225e-05, "loss": 0.4726, "step": 14719 }, { "epoch": 2.4029223297008286, "grad_norm": 3.1054179668426514, "learning_rate": 1.63742398974869e-05, "loss": 0.6184, "step": 14720 }, { "epoch": 2.403085588343333, "grad_norm": 2.801368474960327, "learning_rate": 1.6373745876479457e-05, "loss": 0.4693, "step": 14721 }, { "epoch": 2.4032488469858375, "grad_norm": 3.1115736961364746, "learning_rate": 1.637325182927195e-05, "loss": 0.6148, "step": 14722 }, { "epoch": 2.4034121056283415, "grad_norm": 2.902618646621704, "learning_rate": 1.6372757755866414e-05, "loss": 0.5942, "step": 14723 }, { "epoch": 2.403575364270846, "grad_norm": 2.877671957015991, "learning_rate": 1.6372263656264876e-05, "loss": 0.634, "step": 14724 }, { "epoch": 2.4037386229133504, "grad_norm": 2.8165132999420166, "learning_rate": 1.637176953046937e-05, "loss": 0.6197, "step": 14725 }, { "epoch": 2.403901881555855, "grad_norm": 2.843334197998047, "learning_rate": 1.6371275378481922e-05, "loss": 0.5308, "step": 14726 }, { "epoch": 2.4040651401983593, "grad_norm": 2.7814364433288574, "learning_rate": 1.637078120030457e-05, "loss": 0.4583, "step": 14727 }, { "epoch": 2.4042283988408637, "grad_norm": 2.8422505855560303, "learning_rate": 1.637028699593934e-05, "loss": 0.539, "step": 14728 }, { "epoch": 2.404391657483368, "grad_norm": 2.483309745788574, "learning_rate": 1.6369792765388267e-05, "loss": 0.4696, "step": 14729 }, { "epoch": 2.4045549161258726, "grad_norm": 2.085817337036133, "learning_rate": 1.636929850865338e-05, "loss": 0.4584, "step": 14730 }, { "epoch": 2.4047181747683766, "grad_norm": 2.693004608154297, "learning_rate": 1.6368804225736712e-05, "loss": 0.5126, "step": 14731 }, { "epoch": 2.404881433410881, "grad_norm": 3.013021469116211, "learning_rate": 1.6368309916640294e-05, "loss": 0.5895, "step": 14732 }, { "epoch": 2.4050446920533854, "grad_norm": 2.3413126468658447, "learning_rate": 1.636781558136616e-05, "loss": 0.4894, "step": 14733 }, { "epoch": 2.40520795069589, "grad_norm": 3.027487277984619, "learning_rate": 1.636732121991634e-05, "loss": 0.6682, "step": 14734 }, { "epoch": 2.4053712093383943, "grad_norm": 2.7132692337036133, "learning_rate": 1.6366826832292865e-05, "loss": 0.529, "step": 14735 }, { "epoch": 2.4055344679808988, "grad_norm": 2.9279139041900635, "learning_rate": 1.6366332418497773e-05, "loss": 0.5289, "step": 14736 }, { "epoch": 2.405697726623403, "grad_norm": 2.501444101333618, "learning_rate": 1.6365837978533087e-05, "loss": 0.4523, "step": 14737 }, { "epoch": 2.4058609852659076, "grad_norm": 2.767054557800293, "learning_rate": 1.636534351240085e-05, "loss": 0.5653, "step": 14738 }, { "epoch": 2.406024243908412, "grad_norm": 2.3809878826141357, "learning_rate": 1.6364849020103087e-05, "loss": 0.4569, "step": 14739 }, { "epoch": 2.4061875025509165, "grad_norm": 2.969322443008423, "learning_rate": 1.6364354501641835e-05, "loss": 0.6265, "step": 14740 }, { "epoch": 2.4063507611934205, "grad_norm": 3.1890299320220947, "learning_rate": 1.636385995701912e-05, "loss": 0.544, "step": 14741 }, { "epoch": 2.406514019835925, "grad_norm": 2.788583278656006, "learning_rate": 1.6363365386236982e-05, "loss": 0.5505, "step": 14742 }, { "epoch": 2.4066772784784294, "grad_norm": 2.371670961380005, "learning_rate": 1.6362870789297454e-05, "loss": 0.5319, "step": 14743 }, { "epoch": 2.406840537120934, "grad_norm": 3.0464608669281006, "learning_rate": 1.6362376166202564e-05, "loss": 0.6316, "step": 14744 }, { "epoch": 2.4070037957634383, "grad_norm": 3.458226442337036, "learning_rate": 1.636188151695435e-05, "loss": 0.672, "step": 14745 }, { "epoch": 2.4071670544059427, "grad_norm": 2.1673927307128906, "learning_rate": 1.6361386841554844e-05, "loss": 0.4546, "step": 14746 }, { "epoch": 2.407330313048447, "grad_norm": 3.1804895401000977, "learning_rate": 1.6360892140006072e-05, "loss": 0.6523, "step": 14747 }, { "epoch": 2.4074935716909516, "grad_norm": 2.8240606784820557, "learning_rate": 1.636039741231008e-05, "loss": 0.5304, "step": 14748 }, { "epoch": 2.4076568303334556, "grad_norm": 2.5273022651672363, "learning_rate": 1.6359902658468896e-05, "loss": 0.5561, "step": 14749 }, { "epoch": 2.40782008897596, "grad_norm": 3.2312963008880615, "learning_rate": 1.635940787848455e-05, "loss": 0.5322, "step": 14750 }, { "epoch": 2.4079833476184644, "grad_norm": 2.971235752105713, "learning_rate": 1.6358913072359086e-05, "loss": 0.6334, "step": 14751 }, { "epoch": 2.408146606260969, "grad_norm": 2.524667263031006, "learning_rate": 1.6358418240094526e-05, "loss": 0.5024, "step": 14752 }, { "epoch": 2.4083098649034733, "grad_norm": 2.7429893016815186, "learning_rate": 1.635792338169291e-05, "loss": 0.5688, "step": 14753 }, { "epoch": 2.4084731235459778, "grad_norm": 2.8921217918395996, "learning_rate": 1.6357428497156275e-05, "loss": 0.6126, "step": 14754 }, { "epoch": 2.408636382188482, "grad_norm": 3.013617753982544, "learning_rate": 1.635693358648665e-05, "loss": 0.5929, "step": 14755 }, { "epoch": 2.4087996408309866, "grad_norm": 3.013221263885498, "learning_rate": 1.635643864968607e-05, "loss": 0.6477, "step": 14756 }, { "epoch": 2.408962899473491, "grad_norm": 2.7394206523895264, "learning_rate": 1.635594368675657e-05, "loss": 0.5407, "step": 14757 }, { "epoch": 2.4091261581159955, "grad_norm": 2.665822744369507, "learning_rate": 1.6355448697700192e-05, "loss": 0.4913, "step": 14758 }, { "epoch": 2.4092894167584995, "grad_norm": 2.954681396484375, "learning_rate": 1.6354953682518956e-05, "loss": 0.654, "step": 14759 }, { "epoch": 2.409452675401004, "grad_norm": 2.6677615642547607, "learning_rate": 1.635445864121491e-05, "loss": 0.5663, "step": 14760 }, { "epoch": 2.4096159340435084, "grad_norm": 2.9947128295898438, "learning_rate": 1.6353963573790085e-05, "loss": 0.6249, "step": 14761 }, { "epoch": 2.409779192686013, "grad_norm": 2.6075589656829834, "learning_rate": 1.635346848024651e-05, "loss": 0.5137, "step": 14762 }, { "epoch": 2.4099424513285173, "grad_norm": 3.2166385650634766, "learning_rate": 1.635297336058623e-05, "loss": 0.659, "step": 14763 }, { "epoch": 2.4101057099710217, "grad_norm": 3.531719446182251, "learning_rate": 1.635247821481127e-05, "loss": 0.714, "step": 14764 }, { "epoch": 2.410268968613526, "grad_norm": 2.726083993911743, "learning_rate": 1.6351983042923675e-05, "loss": 0.5222, "step": 14765 }, { "epoch": 2.41043222725603, "grad_norm": 2.567356586456299, "learning_rate": 1.6351487844925477e-05, "loss": 0.4922, "step": 14766 }, { "epoch": 2.4105954858985346, "grad_norm": 2.590055465698242, "learning_rate": 1.6350992620818705e-05, "loss": 0.4876, "step": 14767 }, { "epoch": 2.410758744541039, "grad_norm": 3.4268240928649902, "learning_rate": 1.6350497370605406e-05, "loss": 0.7025, "step": 14768 }, { "epoch": 2.4109220031835434, "grad_norm": 3.3362417221069336, "learning_rate": 1.6350002094287608e-05, "loss": 0.6307, "step": 14769 }, { "epoch": 2.411085261826048, "grad_norm": 3.002856731414795, "learning_rate": 1.6349506791867348e-05, "loss": 0.5619, "step": 14770 }, { "epoch": 2.4112485204685523, "grad_norm": 2.9612255096435547, "learning_rate": 1.6349011463346665e-05, "loss": 0.5586, "step": 14771 }, { "epoch": 2.4114117791110568, "grad_norm": 2.7879793643951416, "learning_rate": 1.6348516108727594e-05, "loss": 0.548, "step": 14772 }, { "epoch": 2.411575037753561, "grad_norm": 2.9923083782196045, "learning_rate": 1.6348020728012168e-05, "loss": 0.5798, "step": 14773 }, { "epoch": 2.4117382963960656, "grad_norm": 2.7700388431549072, "learning_rate": 1.634752532120243e-05, "loss": 0.5693, "step": 14774 }, { "epoch": 2.41190155503857, "grad_norm": 2.83890438079834, "learning_rate": 1.6347029888300407e-05, "loss": 0.5431, "step": 14775 }, { "epoch": 2.412064813681074, "grad_norm": 2.4152214527130127, "learning_rate": 1.6346534429308142e-05, "loss": 0.4934, "step": 14776 }, { "epoch": 2.4122280723235785, "grad_norm": 3.0178956985473633, "learning_rate": 1.6346038944227674e-05, "loss": 0.5383, "step": 14777 }, { "epoch": 2.412391330966083, "grad_norm": 3.181438446044922, "learning_rate": 1.634554343306103e-05, "loss": 0.7881, "step": 14778 }, { "epoch": 2.4125545896085874, "grad_norm": 2.6822075843811035, "learning_rate": 1.6345047895810256e-05, "loss": 0.5232, "step": 14779 }, { "epoch": 2.412717848251092, "grad_norm": 3.4177310466766357, "learning_rate": 1.6344552332477387e-05, "loss": 0.6703, "step": 14780 }, { "epoch": 2.4128811068935963, "grad_norm": 2.6427528858184814, "learning_rate": 1.6344056743064462e-05, "loss": 0.541, "step": 14781 }, { "epoch": 2.4130443655361007, "grad_norm": 2.7014169692993164, "learning_rate": 1.634356112757351e-05, "loss": 0.5953, "step": 14782 }, { "epoch": 2.413207624178605, "grad_norm": 2.7526657581329346, "learning_rate": 1.6343065486006574e-05, "loss": 0.5883, "step": 14783 }, { "epoch": 2.413370882821109, "grad_norm": 2.7450292110443115, "learning_rate": 1.6342569818365693e-05, "loss": 0.6004, "step": 14784 }, { "epoch": 2.4135341414636136, "grad_norm": 2.71816086769104, "learning_rate": 1.6342074124652902e-05, "loss": 0.5056, "step": 14785 }, { "epoch": 2.413697400106118, "grad_norm": 2.886634588241577, "learning_rate": 1.634157840487024e-05, "loss": 0.6033, "step": 14786 }, { "epoch": 2.4138606587486224, "grad_norm": 3.0610828399658203, "learning_rate": 1.6341082659019737e-05, "loss": 0.54, "step": 14787 }, { "epoch": 2.414023917391127, "grad_norm": 2.9969630241394043, "learning_rate": 1.6340586887103446e-05, "loss": 0.564, "step": 14788 }, { "epoch": 2.4141871760336313, "grad_norm": 2.1591992378234863, "learning_rate": 1.6340091089123392e-05, "loss": 0.4693, "step": 14789 }, { "epoch": 2.4143504346761357, "grad_norm": 2.5829427242279053, "learning_rate": 1.633959526508162e-05, "loss": 0.4975, "step": 14790 }, { "epoch": 2.41451369331864, "grad_norm": 3.033076524734497, "learning_rate": 1.6339099414980162e-05, "loss": 0.6213, "step": 14791 }, { "epoch": 2.4146769519611446, "grad_norm": 2.8562192916870117, "learning_rate": 1.633860353882106e-05, "loss": 0.5794, "step": 14792 }, { "epoch": 2.414840210603649, "grad_norm": 2.5800883769989014, "learning_rate": 1.6338107636606356e-05, "loss": 0.5604, "step": 14793 }, { "epoch": 2.415003469246153, "grad_norm": 3.15099835395813, "learning_rate": 1.6337611708338083e-05, "loss": 0.6659, "step": 14794 }, { "epoch": 2.4151667278886575, "grad_norm": 2.3878679275512695, "learning_rate": 1.633711575401828e-05, "loss": 0.5077, "step": 14795 }, { "epoch": 2.415329986531162, "grad_norm": 3.292191982269287, "learning_rate": 1.6336619773648988e-05, "loss": 0.6261, "step": 14796 }, { "epoch": 2.4154932451736664, "grad_norm": 2.7590770721435547, "learning_rate": 1.6336123767232246e-05, "loss": 0.5463, "step": 14797 }, { "epoch": 2.415656503816171, "grad_norm": 2.5996522903442383, "learning_rate": 1.633562773477009e-05, "loss": 0.4525, "step": 14798 }, { "epoch": 2.4158197624586752, "grad_norm": 2.826472282409668, "learning_rate": 1.6335131676264563e-05, "loss": 0.5535, "step": 14799 }, { "epoch": 2.4159830211011797, "grad_norm": 2.681438684463501, "learning_rate": 1.63346355917177e-05, "loss": 0.5208, "step": 14800 }, { "epoch": 2.416146279743684, "grad_norm": 2.731473922729492, "learning_rate": 1.6334139481131543e-05, "loss": 0.5146, "step": 14801 }, { "epoch": 2.416309538386188, "grad_norm": 3.1469101905822754, "learning_rate": 1.633364334450813e-05, "loss": 0.6191, "step": 14802 }, { "epoch": 2.4164727970286926, "grad_norm": 2.519331693649292, "learning_rate": 1.6333147181849496e-05, "loss": 0.6067, "step": 14803 }, { "epoch": 2.416636055671197, "grad_norm": 2.905388832092285, "learning_rate": 1.6332650993157695e-05, "loss": 0.6266, "step": 14804 }, { "epoch": 2.4167993143137014, "grad_norm": 3.126063585281372, "learning_rate": 1.633215477843475e-05, "loss": 0.6833, "step": 14805 }, { "epoch": 2.416962572956206, "grad_norm": 2.4770009517669678, "learning_rate": 1.6331658537682708e-05, "loss": 0.5093, "step": 14806 }, { "epoch": 2.4171258315987103, "grad_norm": 2.783533811569214, "learning_rate": 1.633116227090361e-05, "loss": 0.5561, "step": 14807 }, { "epoch": 2.4172890902412147, "grad_norm": 2.5264806747436523, "learning_rate": 1.6330665978099495e-05, "loss": 0.4654, "step": 14808 }, { "epoch": 2.417452348883719, "grad_norm": 2.943784475326538, "learning_rate": 1.6330169659272404e-05, "loss": 0.5382, "step": 14809 }, { "epoch": 2.4176156075262236, "grad_norm": 3.158207654953003, "learning_rate": 1.6329673314424373e-05, "loss": 0.5793, "step": 14810 }, { "epoch": 2.4177788661687276, "grad_norm": 2.7069268226623535, "learning_rate": 1.6329176943557444e-05, "loss": 0.5157, "step": 14811 }, { "epoch": 2.417942124811232, "grad_norm": 3.080796480178833, "learning_rate": 1.6328680546673662e-05, "loss": 0.6554, "step": 14812 }, { "epoch": 2.4181053834537365, "grad_norm": 2.921480655670166, "learning_rate": 1.632818412377506e-05, "loss": 0.5923, "step": 14813 }, { "epoch": 2.418268642096241, "grad_norm": 2.9575095176696777, "learning_rate": 1.632768767486369e-05, "loss": 0.5882, "step": 14814 }, { "epoch": 2.4184319007387454, "grad_norm": 3.509843587875366, "learning_rate": 1.6327191199941575e-05, "loss": 0.673, "step": 14815 }, { "epoch": 2.41859515938125, "grad_norm": 3.2237496376037598, "learning_rate": 1.6326694699010773e-05, "loss": 0.6247, "step": 14816 }, { "epoch": 2.4187584180237542, "grad_norm": 2.4192566871643066, "learning_rate": 1.6326198172073314e-05, "loss": 0.4653, "step": 14817 }, { "epoch": 2.4189216766662587, "grad_norm": 3.0841329097747803, "learning_rate": 1.6325701619131246e-05, "loss": 0.5451, "step": 14818 }, { "epoch": 2.4190849353087627, "grad_norm": 2.8906519412994385, "learning_rate": 1.6325205040186605e-05, "loss": 0.6243, "step": 14819 }, { "epoch": 2.419248193951267, "grad_norm": 2.905463695526123, "learning_rate": 1.6324708435241437e-05, "loss": 0.6136, "step": 14820 }, { "epoch": 2.4194114525937715, "grad_norm": 2.8768973350524902, "learning_rate": 1.632421180429778e-05, "loss": 0.5444, "step": 14821 }, { "epoch": 2.419574711236276, "grad_norm": 2.6185781955718994, "learning_rate": 1.6323715147357673e-05, "loss": 0.5507, "step": 14822 }, { "epoch": 2.4197379698787804, "grad_norm": 2.736253261566162, "learning_rate": 1.6323218464423167e-05, "loss": 0.5114, "step": 14823 }, { "epoch": 2.419901228521285, "grad_norm": 2.809732675552368, "learning_rate": 1.6322721755496295e-05, "loss": 0.5837, "step": 14824 }, { "epoch": 2.4200644871637893, "grad_norm": 3.7790002822875977, "learning_rate": 1.63222250205791e-05, "loss": 0.8584, "step": 14825 }, { "epoch": 2.4202277458062937, "grad_norm": 2.711127281188965, "learning_rate": 1.6321728259673623e-05, "loss": 0.5615, "step": 14826 }, { "epoch": 2.420391004448798, "grad_norm": 2.4555256366729736, "learning_rate": 1.6321231472781913e-05, "loss": 0.5764, "step": 14827 }, { "epoch": 2.4205542630913026, "grad_norm": 2.8072619438171387, "learning_rate": 1.6320734659906006e-05, "loss": 0.5304, "step": 14828 }, { "epoch": 2.4207175217338066, "grad_norm": 2.2968716621398926, "learning_rate": 1.6320237821047943e-05, "loss": 0.466, "step": 14829 }, { "epoch": 2.420880780376311, "grad_norm": 3.320821523666382, "learning_rate": 1.631974095620977e-05, "loss": 0.6307, "step": 14830 }, { "epoch": 2.4210440390188155, "grad_norm": 3.452868700027466, "learning_rate": 1.6319244065393527e-05, "loss": 0.6003, "step": 14831 }, { "epoch": 2.42120729766132, "grad_norm": 2.889976978302002, "learning_rate": 1.631874714860126e-05, "loss": 0.5796, "step": 14832 }, { "epoch": 2.4213705563038244, "grad_norm": 2.7365355491638184, "learning_rate": 1.631825020583501e-05, "loss": 0.5854, "step": 14833 }, { "epoch": 2.421533814946329, "grad_norm": 3.2163140773773193, "learning_rate": 1.6317753237096817e-05, "loss": 0.6198, "step": 14834 }, { "epoch": 2.4216970735888332, "grad_norm": 2.5788633823394775, "learning_rate": 1.6317256242388727e-05, "loss": 0.5108, "step": 14835 }, { "epoch": 2.4218603322313377, "grad_norm": 2.656515598297119, "learning_rate": 1.6316759221712782e-05, "loss": 0.5308, "step": 14836 }, { "epoch": 2.4220235908738417, "grad_norm": 3.211005926132202, "learning_rate": 1.631626217507102e-05, "loss": 0.573, "step": 14837 }, { "epoch": 2.422186849516346, "grad_norm": 3.6870484352111816, "learning_rate": 1.6315765102465496e-05, "loss": 0.7634, "step": 14838 }, { "epoch": 2.4223501081588505, "grad_norm": 2.6025006771087646, "learning_rate": 1.6315268003898247e-05, "loss": 0.5742, "step": 14839 }, { "epoch": 2.422513366801355, "grad_norm": 2.814863920211792, "learning_rate": 1.6314770879371313e-05, "loss": 0.6074, "step": 14840 }, { "epoch": 2.4226766254438594, "grad_norm": 3.1415891647338867, "learning_rate": 1.631427372888674e-05, "loss": 0.6061, "step": 14841 }, { "epoch": 2.422839884086364, "grad_norm": 2.782198667526245, "learning_rate": 1.631377655244657e-05, "loss": 0.5709, "step": 14842 }, { "epoch": 2.4230031427288683, "grad_norm": 3.1356613636016846, "learning_rate": 1.6313279350052853e-05, "loss": 0.5564, "step": 14843 }, { "epoch": 2.4231664013713727, "grad_norm": 3.233567476272583, "learning_rate": 1.6312782121707628e-05, "loss": 0.5906, "step": 14844 }, { "epoch": 2.423329660013877, "grad_norm": 2.6348929405212402, "learning_rate": 1.631228486741294e-05, "loss": 0.5499, "step": 14845 }, { "epoch": 2.4234929186563816, "grad_norm": 2.9405975341796875, "learning_rate": 1.6311787587170828e-05, "loss": 0.6523, "step": 14846 }, { "epoch": 2.4236561772988856, "grad_norm": 2.8639719486236572, "learning_rate": 1.631129028098334e-05, "loss": 0.544, "step": 14847 }, { "epoch": 2.42381943594139, "grad_norm": 3.543769121170044, "learning_rate": 1.6310792948852526e-05, "loss": 0.6495, "step": 14848 }, { "epoch": 2.4239826945838945, "grad_norm": 2.825942039489746, "learning_rate": 1.631029559078042e-05, "loss": 0.6368, "step": 14849 }, { "epoch": 2.424145953226399, "grad_norm": 3.2315292358398438, "learning_rate": 1.6309798206769073e-05, "loss": 0.6445, "step": 14850 }, { "epoch": 2.4243092118689034, "grad_norm": 2.8091373443603516, "learning_rate": 1.6309300796820532e-05, "loss": 0.6141, "step": 14851 }, { "epoch": 2.424472470511408, "grad_norm": 2.491945266723633, "learning_rate": 1.6308803360936836e-05, "loss": 0.5257, "step": 14852 }, { "epoch": 2.4246357291539122, "grad_norm": 2.9751412868499756, "learning_rate": 1.630830589912003e-05, "loss": 0.5174, "step": 14853 }, { "epoch": 2.4247989877964162, "grad_norm": 2.925013780593872, "learning_rate": 1.630780841137216e-05, "loss": 0.6176, "step": 14854 }, { "epoch": 2.4249622464389207, "grad_norm": 2.364176034927368, "learning_rate": 1.6307310897695275e-05, "loss": 0.4489, "step": 14855 }, { "epoch": 2.425125505081425, "grad_norm": 2.592911720275879, "learning_rate": 1.630681335809141e-05, "loss": 0.4983, "step": 14856 }, { "epoch": 2.4252887637239295, "grad_norm": 2.6087558269500732, "learning_rate": 1.6306315792562624e-05, "loss": 0.5488, "step": 14857 }, { "epoch": 2.425452022366434, "grad_norm": 3.0527596473693848, "learning_rate": 1.630581820111095e-05, "loss": 0.6442, "step": 14858 }, { "epoch": 2.4256152810089384, "grad_norm": 2.822740077972412, "learning_rate": 1.6305320583738444e-05, "loss": 0.5947, "step": 14859 }, { "epoch": 2.425778539651443, "grad_norm": 3.119508743286133, "learning_rate": 1.630482294044714e-05, "loss": 0.6124, "step": 14860 }, { "epoch": 2.4259417982939473, "grad_norm": 3.1738460063934326, "learning_rate": 1.630432527123909e-05, "loss": 0.681, "step": 14861 }, { "epoch": 2.4261050569364517, "grad_norm": 2.830954074859619, "learning_rate": 1.630382757611634e-05, "loss": 0.5813, "step": 14862 }, { "epoch": 2.426268315578956, "grad_norm": 2.882807493209839, "learning_rate": 1.6303329855080936e-05, "loss": 0.5782, "step": 14863 }, { "epoch": 2.42643157422146, "grad_norm": 3.1955268383026123, "learning_rate": 1.6302832108134926e-05, "loss": 0.6929, "step": 14864 }, { "epoch": 2.4265948328639646, "grad_norm": 2.584789276123047, "learning_rate": 1.630233433528035e-05, "loss": 0.532, "step": 14865 }, { "epoch": 2.426758091506469, "grad_norm": 2.632236957550049, "learning_rate": 1.6301836536519257e-05, "loss": 0.5511, "step": 14866 }, { "epoch": 2.4269213501489735, "grad_norm": 2.7564642429351807, "learning_rate": 1.6301338711853695e-05, "loss": 0.5511, "step": 14867 }, { "epoch": 2.427084608791478, "grad_norm": 2.8377554416656494, "learning_rate": 1.6300840861285704e-05, "loss": 0.5951, "step": 14868 }, { "epoch": 2.4272478674339824, "grad_norm": 2.9955406188964844, "learning_rate": 1.6300342984817342e-05, "loss": 0.6307, "step": 14869 }, { "epoch": 2.427411126076487, "grad_norm": 2.8069117069244385, "learning_rate": 1.6299845082450644e-05, "loss": 0.604, "step": 14870 }, { "epoch": 2.4275743847189912, "grad_norm": 3.098939895629883, "learning_rate": 1.6299347154187667e-05, "loss": 0.5518, "step": 14871 }, { "epoch": 2.4277376433614952, "grad_norm": 2.565871477127075, "learning_rate": 1.6298849200030447e-05, "loss": 0.5159, "step": 14872 }, { "epoch": 2.4279009020039997, "grad_norm": 2.840510606765747, "learning_rate": 1.629835121998104e-05, "loss": 0.5925, "step": 14873 }, { "epoch": 2.428064160646504, "grad_norm": 3.1602654457092285, "learning_rate": 1.6297853214041485e-05, "loss": 0.6054, "step": 14874 }, { "epoch": 2.4282274192890085, "grad_norm": 3.1779441833496094, "learning_rate": 1.6297355182213837e-05, "loss": 0.5339, "step": 14875 }, { "epoch": 2.428390677931513, "grad_norm": 3.076957941055298, "learning_rate": 1.629685712450014e-05, "loss": 0.5902, "step": 14876 }, { "epoch": 2.4285539365740174, "grad_norm": 2.5673553943634033, "learning_rate": 1.629635904090244e-05, "loss": 0.5835, "step": 14877 }, { "epoch": 2.428717195216522, "grad_norm": 3.873156785964966, "learning_rate": 1.6295860931422787e-05, "loss": 0.6212, "step": 14878 }, { "epoch": 2.4288804538590263, "grad_norm": 3.109144687652588, "learning_rate": 1.6295362796063227e-05, "loss": 0.5822, "step": 14879 }, { "epoch": 2.4290437125015307, "grad_norm": 3.285005569458008, "learning_rate": 1.6294864634825804e-05, "loss": 0.6521, "step": 14880 }, { "epoch": 2.429206971144035, "grad_norm": 2.8437068462371826, "learning_rate": 1.629436644771257e-05, "loss": 0.5808, "step": 14881 }, { "epoch": 2.429370229786539, "grad_norm": 3.2226226329803467, "learning_rate": 1.6293868234725575e-05, "loss": 0.6435, "step": 14882 }, { "epoch": 2.4295334884290436, "grad_norm": 2.6185600757598877, "learning_rate": 1.6293369995866862e-05, "loss": 0.4963, "step": 14883 }, { "epoch": 2.429696747071548, "grad_norm": 2.809983253479004, "learning_rate": 1.6292871731138483e-05, "loss": 0.5445, "step": 14884 }, { "epoch": 2.4298600057140525, "grad_norm": 3.0664658546447754, "learning_rate": 1.6292373440542483e-05, "loss": 0.5948, "step": 14885 }, { "epoch": 2.430023264356557, "grad_norm": 2.747481346130371, "learning_rate": 1.6291875124080912e-05, "loss": 0.61, "step": 14886 }, { "epoch": 2.4301865229990613, "grad_norm": 2.9412295818328857, "learning_rate": 1.629137678175582e-05, "loss": 0.572, "step": 14887 }, { "epoch": 2.430349781641566, "grad_norm": 2.757157564163208, "learning_rate": 1.6290878413569254e-05, "loss": 0.6309, "step": 14888 }, { "epoch": 2.4305130402840702, "grad_norm": 2.248382329940796, "learning_rate": 1.629038001952326e-05, "loss": 0.5331, "step": 14889 }, { "epoch": 2.430676298926574, "grad_norm": 2.7834298610687256, "learning_rate": 1.6289881599619888e-05, "loss": 0.5603, "step": 14890 }, { "epoch": 2.4308395575690787, "grad_norm": 2.967742681503296, "learning_rate": 1.628938315386119e-05, "loss": 0.6555, "step": 14891 }, { "epoch": 2.431002816211583, "grad_norm": 2.197370767593384, "learning_rate": 1.628888468224921e-05, "loss": 0.4622, "step": 14892 }, { "epoch": 2.4311660748540875, "grad_norm": 2.726712465286255, "learning_rate": 1.6288386184786004e-05, "loss": 0.6756, "step": 14893 }, { "epoch": 2.431329333496592, "grad_norm": 2.5908167362213135, "learning_rate": 1.6287887661473614e-05, "loss": 0.5665, "step": 14894 }, { "epoch": 2.4314925921390964, "grad_norm": 2.5385193824768066, "learning_rate": 1.6287389112314095e-05, "loss": 0.594, "step": 14895 }, { "epoch": 2.431655850781601, "grad_norm": 3.021237373352051, "learning_rate": 1.6286890537309486e-05, "loss": 0.5386, "step": 14896 }, { "epoch": 2.4318191094241053, "grad_norm": 3.2926292419433594, "learning_rate": 1.628639193646185e-05, "loss": 0.6096, "step": 14897 }, { "epoch": 2.4319823680666097, "grad_norm": 3.1648104190826416, "learning_rate": 1.6285893309773234e-05, "loss": 0.6073, "step": 14898 }, { "epoch": 2.432145626709114, "grad_norm": 3.2037460803985596, "learning_rate": 1.6285394657245673e-05, "loss": 0.6431, "step": 14899 }, { "epoch": 2.432308885351618, "grad_norm": 3.2562201023101807, "learning_rate": 1.6284895978881235e-05, "loss": 0.611, "step": 14900 }, { "epoch": 2.4324721439941226, "grad_norm": 3.001375913619995, "learning_rate": 1.6284397274681964e-05, "loss": 0.6637, "step": 14901 }, { "epoch": 2.432635402636627, "grad_norm": 2.824004888534546, "learning_rate": 1.6283898544649906e-05, "loss": 0.5511, "step": 14902 }, { "epoch": 2.4327986612791315, "grad_norm": 3.824446678161621, "learning_rate": 1.6283399788787114e-05, "loss": 0.6227, "step": 14903 }, { "epoch": 2.432961919921636, "grad_norm": 3.175945281982422, "learning_rate": 1.628290100709564e-05, "loss": 0.6041, "step": 14904 }, { "epoch": 2.4331251785641403, "grad_norm": 2.7037713527679443, "learning_rate": 1.628240219957753e-05, "loss": 0.4811, "step": 14905 }, { "epoch": 2.433288437206645, "grad_norm": 2.8551735877990723, "learning_rate": 1.628190336623484e-05, "loss": 0.5568, "step": 14906 }, { "epoch": 2.4334516958491488, "grad_norm": 3.5962817668914795, "learning_rate": 1.6281404507069617e-05, "loss": 0.8174, "step": 14907 }, { "epoch": 2.433614954491653, "grad_norm": 2.863140106201172, "learning_rate": 1.628090562208391e-05, "loss": 0.4532, "step": 14908 }, { "epoch": 2.4337782131341577, "grad_norm": 3.4328114986419678, "learning_rate": 1.628040671127977e-05, "loss": 0.6153, "step": 14909 }, { "epoch": 2.433941471776662, "grad_norm": 3.6575729846954346, "learning_rate": 1.6279907774659253e-05, "loss": 0.6051, "step": 14910 }, { "epoch": 2.4341047304191665, "grad_norm": 3.0778167247772217, "learning_rate": 1.6279408812224403e-05, "loss": 0.6381, "step": 14911 }, { "epoch": 2.434267989061671, "grad_norm": 3.1212363243103027, "learning_rate": 1.6278909823977275e-05, "loss": 0.6298, "step": 14912 }, { "epoch": 2.4344312477041754, "grad_norm": 2.9602808952331543, "learning_rate": 1.627841080991992e-05, "loss": 0.6232, "step": 14913 }, { "epoch": 2.43459450634668, "grad_norm": 2.9893991947174072, "learning_rate": 1.6277911770054392e-05, "loss": 0.6972, "step": 14914 }, { "epoch": 2.4347577649891843, "grad_norm": 2.9360828399658203, "learning_rate": 1.6277412704382737e-05, "loss": 0.5359, "step": 14915 }, { "epoch": 2.4349210236316887, "grad_norm": 2.4041593074798584, "learning_rate": 1.6276913612907005e-05, "loss": 0.5052, "step": 14916 }, { "epoch": 2.4350842822741927, "grad_norm": 2.7656757831573486, "learning_rate": 1.6276414495629258e-05, "loss": 0.556, "step": 14917 }, { "epoch": 2.435247540916697, "grad_norm": 2.387092113494873, "learning_rate": 1.6275915352551534e-05, "loss": 0.481, "step": 14918 }, { "epoch": 2.4354107995592016, "grad_norm": 2.294616222381592, "learning_rate": 1.6275416183675897e-05, "loss": 0.4928, "step": 14919 }, { "epoch": 2.435574058201706, "grad_norm": 3.194373607635498, "learning_rate": 1.627491698900439e-05, "loss": 0.5955, "step": 14920 }, { "epoch": 2.4357373168442105, "grad_norm": 2.7130818367004395, "learning_rate": 1.6274417768539073e-05, "loss": 0.5473, "step": 14921 }, { "epoch": 2.435900575486715, "grad_norm": 3.437314510345459, "learning_rate": 1.6273918522281992e-05, "loss": 0.6714, "step": 14922 }, { "epoch": 2.4360638341292193, "grad_norm": 2.77972412109375, "learning_rate": 1.6273419250235197e-05, "loss": 0.5375, "step": 14923 }, { "epoch": 2.436227092771724, "grad_norm": 2.8427064418792725, "learning_rate": 1.627291995240075e-05, "loss": 0.5287, "step": 14924 }, { "epoch": 2.4363903514142278, "grad_norm": 3.245652675628662, "learning_rate": 1.6272420628780693e-05, "loss": 0.7362, "step": 14925 }, { "epoch": 2.436553610056732, "grad_norm": 3.16200852394104, "learning_rate": 1.6271921279377087e-05, "loss": 0.6146, "step": 14926 }, { "epoch": 2.4367168686992366, "grad_norm": 2.412132978439331, "learning_rate": 1.627142190419198e-05, "loss": 0.4562, "step": 14927 }, { "epoch": 2.436880127341741, "grad_norm": 3.062729835510254, "learning_rate": 1.6270922503227424e-05, "loss": 0.5061, "step": 14928 }, { "epoch": 2.4370433859842455, "grad_norm": 2.863938570022583, "learning_rate": 1.627042307648547e-05, "loss": 0.5646, "step": 14929 }, { "epoch": 2.43720664462675, "grad_norm": 2.7480902671813965, "learning_rate": 1.626992362396818e-05, "loss": 0.5479, "step": 14930 }, { "epoch": 2.4373699032692544, "grad_norm": 2.7357356548309326, "learning_rate": 1.62694241456776e-05, "loss": 0.5067, "step": 14931 }, { "epoch": 2.437533161911759, "grad_norm": 2.693370819091797, "learning_rate": 1.6268924641615787e-05, "loss": 0.4846, "step": 14932 }, { "epoch": 2.4376964205542633, "grad_norm": 2.6786961555480957, "learning_rate": 1.6268425111784788e-05, "loss": 0.4843, "step": 14933 }, { "epoch": 2.4378596791967677, "grad_norm": 2.715773344039917, "learning_rate": 1.6267925556186664e-05, "loss": 0.4926, "step": 14934 }, { "epoch": 2.4380229378392717, "grad_norm": 2.82719087600708, "learning_rate": 1.626742597482346e-05, "loss": 0.5382, "step": 14935 }, { "epoch": 2.438186196481776, "grad_norm": 3.3766114711761475, "learning_rate": 1.626692636769724e-05, "loss": 0.7129, "step": 14936 }, { "epoch": 2.4383494551242806, "grad_norm": 2.8356542587280273, "learning_rate": 1.626642673481005e-05, "loss": 0.565, "step": 14937 }, { "epoch": 2.438512713766785, "grad_norm": 3.0037732124328613, "learning_rate": 1.6265927076163942e-05, "loss": 0.6357, "step": 14938 }, { "epoch": 2.4386759724092895, "grad_norm": 3.458479881286621, "learning_rate": 1.6265427391760977e-05, "loss": 0.6871, "step": 14939 }, { "epoch": 2.438839231051794, "grad_norm": 2.9009130001068115, "learning_rate": 1.6264927681603206e-05, "loss": 0.5727, "step": 14940 }, { "epoch": 2.4390024896942983, "grad_norm": 2.4088902473449707, "learning_rate": 1.6264427945692682e-05, "loss": 0.5222, "step": 14941 }, { "epoch": 2.4391657483368023, "grad_norm": 2.873445510864258, "learning_rate": 1.6263928184031462e-05, "loss": 0.6135, "step": 14942 }, { "epoch": 2.4393290069793068, "grad_norm": 2.8414058685302734, "learning_rate": 1.6263428396621596e-05, "loss": 0.6081, "step": 14943 }, { "epoch": 2.439492265621811, "grad_norm": 2.2481889724731445, "learning_rate": 1.6262928583465144e-05, "loss": 0.4628, "step": 14944 }, { "epoch": 2.4396555242643156, "grad_norm": 3.079009771347046, "learning_rate": 1.6262428744564155e-05, "loss": 0.6506, "step": 14945 }, { "epoch": 2.43981878290682, "grad_norm": 3.122267723083496, "learning_rate": 1.6261928879920686e-05, "loss": 0.5807, "step": 14946 }, { "epoch": 2.4399820415493245, "grad_norm": 2.453404426574707, "learning_rate": 1.626142898953679e-05, "loss": 0.5061, "step": 14947 }, { "epoch": 2.440145300191829, "grad_norm": 2.878594160079956, "learning_rate": 1.6260929073414526e-05, "loss": 0.6027, "step": 14948 }, { "epoch": 2.4403085588343334, "grad_norm": 2.994535446166992, "learning_rate": 1.6260429131555947e-05, "loss": 0.5887, "step": 14949 }, { "epoch": 2.440471817476838, "grad_norm": 2.791778087615967, "learning_rate": 1.6259929163963108e-05, "loss": 0.5811, "step": 14950 }, { "epoch": 2.4406350761193423, "grad_norm": 2.643270969390869, "learning_rate": 1.6259429170638062e-05, "loss": 0.509, "step": 14951 }, { "epoch": 2.4407983347618463, "grad_norm": 2.7506954669952393, "learning_rate": 1.625892915158287e-05, "loss": 0.5773, "step": 14952 }, { "epoch": 2.4409615934043507, "grad_norm": 3.025068998336792, "learning_rate": 1.625842910679958e-05, "loss": 0.5149, "step": 14953 }, { "epoch": 2.441124852046855, "grad_norm": 3.1791248321533203, "learning_rate": 1.625792903629025e-05, "loss": 0.6207, "step": 14954 }, { "epoch": 2.4412881106893596, "grad_norm": 2.8170173168182373, "learning_rate": 1.625742894005694e-05, "loss": 0.5081, "step": 14955 }, { "epoch": 2.441451369331864, "grad_norm": 3.515345811843872, "learning_rate": 1.6256928818101704e-05, "loss": 0.6638, "step": 14956 }, { "epoch": 2.4416146279743685, "grad_norm": 2.9032578468322754, "learning_rate": 1.625642867042659e-05, "loss": 0.5602, "step": 14957 }, { "epoch": 2.441777886616873, "grad_norm": 2.7378878593444824, "learning_rate": 1.6255928497033663e-05, "loss": 0.5934, "step": 14958 }, { "epoch": 2.4419411452593773, "grad_norm": 2.703160524368286, "learning_rate": 1.6255428297924978e-05, "loss": 0.5241, "step": 14959 }, { "epoch": 2.4421044039018813, "grad_norm": 2.8322336673736572, "learning_rate": 1.6254928073102588e-05, "loss": 0.5585, "step": 14960 }, { "epoch": 2.4422676625443858, "grad_norm": 2.674673557281494, "learning_rate": 1.625442782256855e-05, "loss": 0.4938, "step": 14961 }, { "epoch": 2.44243092118689, "grad_norm": 3.09564208984375, "learning_rate": 1.6253927546324922e-05, "loss": 0.527, "step": 14962 }, { "epoch": 2.4425941798293946, "grad_norm": 2.8804757595062256, "learning_rate": 1.625342724437376e-05, "loss": 0.6215, "step": 14963 }, { "epoch": 2.442757438471899, "grad_norm": 2.698760747909546, "learning_rate": 1.6252926916717114e-05, "loss": 0.5871, "step": 14964 }, { "epoch": 2.4429206971144035, "grad_norm": 2.857388734817505, "learning_rate": 1.6252426563357054e-05, "loss": 0.5159, "step": 14965 }, { "epoch": 2.443083955756908, "grad_norm": 2.8547871112823486, "learning_rate": 1.6251926184295627e-05, "loss": 0.5372, "step": 14966 }, { "epoch": 2.4432472143994124, "grad_norm": 3.3808224201202393, "learning_rate": 1.625142577953489e-05, "loss": 0.6052, "step": 14967 }, { "epoch": 2.443410473041917, "grad_norm": 3.1510114669799805, "learning_rate": 1.6250925349076903e-05, "loss": 0.5855, "step": 14968 }, { "epoch": 2.4435737316844213, "grad_norm": 3.0344622135162354, "learning_rate": 1.625042489292372e-05, "loss": 0.5287, "step": 14969 }, { "epoch": 2.4437369903269253, "grad_norm": 2.9288442134857178, "learning_rate": 1.6249924411077406e-05, "loss": 0.59, "step": 14970 }, { "epoch": 2.4439002489694297, "grad_norm": 3.052018642425537, "learning_rate": 1.6249423903540013e-05, "loss": 0.6161, "step": 14971 }, { "epoch": 2.444063507611934, "grad_norm": 2.559856653213501, "learning_rate": 1.624892337031359e-05, "loss": 0.5594, "step": 14972 }, { "epoch": 2.4442267662544386, "grad_norm": 2.785388708114624, "learning_rate": 1.6248422811400208e-05, "loss": 0.5499, "step": 14973 }, { "epoch": 2.444390024896943, "grad_norm": 2.9463095664978027, "learning_rate": 1.624792222680192e-05, "loss": 0.5188, "step": 14974 }, { "epoch": 2.4445532835394475, "grad_norm": 2.79872727394104, "learning_rate": 1.624742161652078e-05, "loss": 0.688, "step": 14975 }, { "epoch": 2.444716542181952, "grad_norm": 2.3910531997680664, "learning_rate": 1.6246920980558852e-05, "loss": 0.4302, "step": 14976 }, { "epoch": 2.4448798008244563, "grad_norm": 2.8663251399993896, "learning_rate": 1.6246420318918186e-05, "loss": 0.637, "step": 14977 }, { "epoch": 2.4450430594669603, "grad_norm": 2.61633563041687, "learning_rate": 1.624591963160085e-05, "loss": 0.5376, "step": 14978 }, { "epoch": 2.4452063181094648, "grad_norm": 2.7142128944396973, "learning_rate": 1.6245418918608896e-05, "loss": 0.4834, "step": 14979 }, { "epoch": 2.445369576751969, "grad_norm": 3.2572288513183594, "learning_rate": 1.624491817994438e-05, "loss": 0.5803, "step": 14980 }, { "epoch": 2.4455328353944736, "grad_norm": 2.612241268157959, "learning_rate": 1.6244417415609363e-05, "loss": 0.5144, "step": 14981 }, { "epoch": 2.445696094036978, "grad_norm": 2.7054996490478516, "learning_rate": 1.6243916625605906e-05, "loss": 0.5474, "step": 14982 }, { "epoch": 2.4458593526794825, "grad_norm": 3.1931936740875244, "learning_rate": 1.6243415809936064e-05, "loss": 0.6852, "step": 14983 }, { "epoch": 2.446022611321987, "grad_norm": 3.2829806804656982, "learning_rate": 1.62429149686019e-05, "loss": 0.8634, "step": 14984 }, { "epoch": 2.4461858699644914, "grad_norm": 2.9960577487945557, "learning_rate": 1.6242414101605472e-05, "loss": 0.563, "step": 14985 }, { "epoch": 2.446349128606996, "grad_norm": 2.9029345512390137, "learning_rate": 1.624191320894883e-05, "loss": 0.5608, "step": 14986 }, { "epoch": 2.4465123872495003, "grad_norm": 2.7820560932159424, "learning_rate": 1.6241412290634043e-05, "loss": 0.4565, "step": 14987 }, { "epoch": 2.4466756458920043, "grad_norm": 2.934297561645508, "learning_rate": 1.6240911346663164e-05, "loss": 0.4616, "step": 14988 }, { "epoch": 2.4468389045345087, "grad_norm": 3.006664752960205, "learning_rate": 1.624041037703826e-05, "loss": 0.6716, "step": 14989 }, { "epoch": 2.447002163177013, "grad_norm": 3.366885185241699, "learning_rate": 1.623990938176138e-05, "loss": 0.6021, "step": 14990 }, { "epoch": 2.4471654218195176, "grad_norm": 3.0597128868103027, "learning_rate": 1.6239408360834593e-05, "loss": 0.6497, "step": 14991 }, { "epoch": 2.447328680462022, "grad_norm": 3.372274398803711, "learning_rate": 1.6238907314259954e-05, "loss": 0.6707, "step": 14992 }, { "epoch": 2.4474919391045264, "grad_norm": 2.6815688610076904, "learning_rate": 1.6238406242039516e-05, "loss": 0.5186, "step": 14993 }, { "epoch": 2.447655197747031, "grad_norm": 3.126368761062622, "learning_rate": 1.623790514417535e-05, "loss": 0.6779, "step": 14994 }, { "epoch": 2.447818456389535, "grad_norm": 3.7084426879882812, "learning_rate": 1.6237404020669517e-05, "loss": 0.6571, "step": 14995 }, { "epoch": 2.4479817150320393, "grad_norm": 2.783344030380249, "learning_rate": 1.6236902871524064e-05, "loss": 0.6591, "step": 14996 }, { "epoch": 2.4481449736745438, "grad_norm": 2.851670742034912, "learning_rate": 1.623640169674106e-05, "loss": 0.5611, "step": 14997 }, { "epoch": 2.448308232317048, "grad_norm": 2.688075542449951, "learning_rate": 1.6235900496322562e-05, "loss": 0.4908, "step": 14998 }, { "epoch": 2.4484714909595526, "grad_norm": 2.7261080741882324, "learning_rate": 1.6235399270270637e-05, "loss": 0.5059, "step": 14999 }, { "epoch": 2.448634749602057, "grad_norm": 2.8736624717712402, "learning_rate": 1.6234898018587336e-05, "loss": 0.5133, "step": 15000 }, { "epoch": 2.4487980082445615, "grad_norm": 2.6466238498687744, "learning_rate": 1.6234396741274725e-05, "loss": 0.5115, "step": 15001 }, { "epoch": 2.448961266887066, "grad_norm": 2.867250680923462, "learning_rate": 1.6233895438334865e-05, "loss": 0.623, "step": 15002 }, { "epoch": 2.4491245255295704, "grad_norm": 2.6884210109710693, "learning_rate": 1.623339410976981e-05, "loss": 0.5216, "step": 15003 }, { "epoch": 2.449287784172075, "grad_norm": 3.0501534938812256, "learning_rate": 1.623289275558163e-05, "loss": 0.5472, "step": 15004 }, { "epoch": 2.449451042814579, "grad_norm": 3.235363721847534, "learning_rate": 1.6232391375772377e-05, "loss": 0.619, "step": 15005 }, { "epoch": 2.4496143014570833, "grad_norm": 2.675100803375244, "learning_rate": 1.623188997034412e-05, "loss": 0.6125, "step": 15006 }, { "epoch": 2.4497775600995877, "grad_norm": 2.6239583492279053, "learning_rate": 1.6231388539298914e-05, "loss": 0.6133, "step": 15007 }, { "epoch": 2.449940818742092, "grad_norm": 2.7862164974212646, "learning_rate": 1.6230887082638823e-05, "loss": 0.628, "step": 15008 }, { "epoch": 2.4501040773845966, "grad_norm": 2.6519346237182617, "learning_rate": 1.6230385600365906e-05, "loss": 0.5825, "step": 15009 }, { "epoch": 2.450267336027101, "grad_norm": 2.6236400604248047, "learning_rate": 1.622988409248223e-05, "loss": 0.5893, "step": 15010 }, { "epoch": 2.4504305946696054, "grad_norm": 3.3944928646087646, "learning_rate": 1.622938255898985e-05, "loss": 0.6668, "step": 15011 }, { "epoch": 2.45059385331211, "grad_norm": 3.244354009628296, "learning_rate": 1.6228880999890836e-05, "loss": 0.6275, "step": 15012 }, { "epoch": 2.450757111954614, "grad_norm": 3.4879369735717773, "learning_rate": 1.6228379415187237e-05, "loss": 0.5904, "step": 15013 }, { "epoch": 2.4509203705971183, "grad_norm": 2.7230350971221924, "learning_rate": 1.6227877804881126e-05, "loss": 0.5607, "step": 15014 }, { "epoch": 2.4510836292396228, "grad_norm": 2.3067221641540527, "learning_rate": 1.6227376168974563e-05, "loss": 0.4977, "step": 15015 }, { "epoch": 2.451246887882127, "grad_norm": 2.721576452255249, "learning_rate": 1.6226874507469603e-05, "loss": 0.5527, "step": 15016 }, { "epoch": 2.4514101465246316, "grad_norm": 2.835693836212158, "learning_rate": 1.6226372820368314e-05, "loss": 0.6115, "step": 15017 }, { "epoch": 2.451573405167136, "grad_norm": 2.923607587814331, "learning_rate": 1.622587110767276e-05, "loss": 0.6817, "step": 15018 }, { "epoch": 2.4517366638096405, "grad_norm": 2.9197874069213867, "learning_rate": 1.6225369369385e-05, "loss": 0.6176, "step": 15019 }, { "epoch": 2.451899922452145, "grad_norm": 2.695194959640503, "learning_rate": 1.6224867605507094e-05, "loss": 0.5399, "step": 15020 }, { "epoch": 2.4520631810946494, "grad_norm": 2.4638099670410156, "learning_rate": 1.6224365816041114e-05, "loss": 0.5136, "step": 15021 }, { "epoch": 2.452226439737154, "grad_norm": 2.7226195335388184, "learning_rate": 1.622386400098911e-05, "loss": 0.5619, "step": 15022 }, { "epoch": 2.452389698379658, "grad_norm": 2.4052412509918213, "learning_rate": 1.6223362160353153e-05, "loss": 0.5288, "step": 15023 }, { "epoch": 2.4525529570221622, "grad_norm": 2.635510206222534, "learning_rate": 1.622286029413531e-05, "loss": 0.5652, "step": 15024 }, { "epoch": 2.4527162156646667, "grad_norm": 2.892179489135742, "learning_rate": 1.622235840233763e-05, "loss": 0.6054, "step": 15025 }, { "epoch": 2.452879474307171, "grad_norm": 3.669419050216675, "learning_rate": 1.622185648496219e-05, "loss": 0.6836, "step": 15026 }, { "epoch": 2.4530427329496756, "grad_norm": 2.9133002758026123, "learning_rate": 1.6221354542011044e-05, "loss": 0.5859, "step": 15027 }, { "epoch": 2.45320599159218, "grad_norm": 2.7509381771087646, "learning_rate": 1.622085257348626e-05, "loss": 0.5678, "step": 15028 }, { "epoch": 2.4533692502346844, "grad_norm": 3.0603699684143066, "learning_rate": 1.6220350579389897e-05, "loss": 0.5898, "step": 15029 }, { "epoch": 2.453532508877189, "grad_norm": 2.781621217727661, "learning_rate": 1.621984855972402e-05, "loss": 0.6092, "step": 15030 }, { "epoch": 2.453695767519693, "grad_norm": 3.2271320819854736, "learning_rate": 1.62193465144907e-05, "loss": 0.6334, "step": 15031 }, { "epoch": 2.4538590261621973, "grad_norm": 2.437469482421875, "learning_rate": 1.6218844443691994e-05, "loss": 0.5289, "step": 15032 }, { "epoch": 2.4540222848047017, "grad_norm": 2.565934181213379, "learning_rate": 1.6218342347329965e-05, "loss": 0.5764, "step": 15033 }, { "epoch": 2.454185543447206, "grad_norm": 2.743685245513916, "learning_rate": 1.621784022540668e-05, "loss": 0.6023, "step": 15034 }, { "epoch": 2.4543488020897106, "grad_norm": 2.2908387184143066, "learning_rate": 1.62173380779242e-05, "loss": 0.4343, "step": 15035 }, { "epoch": 2.454512060732215, "grad_norm": 2.8691699504852295, "learning_rate": 1.6216835904884592e-05, "loss": 0.5998, "step": 15036 }, { "epoch": 2.4546753193747195, "grad_norm": 2.734436511993408, "learning_rate": 1.621633370628992e-05, "loss": 0.5373, "step": 15037 }, { "epoch": 2.454838578017224, "grad_norm": 2.523240804672241, "learning_rate": 1.6215831482142243e-05, "loss": 0.5837, "step": 15038 }, { "epoch": 2.4550018366597284, "grad_norm": 2.963195562362671, "learning_rate": 1.6215329232443635e-05, "loss": 0.5634, "step": 15039 }, { "epoch": 2.4551650953022324, "grad_norm": 2.677903175354004, "learning_rate": 1.6214826957196152e-05, "loss": 0.5516, "step": 15040 }, { "epoch": 2.455328353944737, "grad_norm": 2.8349709510803223, "learning_rate": 1.6214324656401864e-05, "loss": 0.609, "step": 15041 }, { "epoch": 2.4554916125872412, "grad_norm": 2.8784852027893066, "learning_rate": 1.621382233006283e-05, "loss": 0.5339, "step": 15042 }, { "epoch": 2.4556548712297457, "grad_norm": 2.8575780391693115, "learning_rate": 1.6213319978181127e-05, "loss": 0.6101, "step": 15043 }, { "epoch": 2.45581812987225, "grad_norm": 2.1780624389648438, "learning_rate": 1.6212817600758804e-05, "loss": 0.4437, "step": 15044 }, { "epoch": 2.4559813885147546, "grad_norm": 3.3011221885681152, "learning_rate": 1.6212315197797936e-05, "loss": 0.6514, "step": 15045 }, { "epoch": 2.456144647157259, "grad_norm": 2.6958703994750977, "learning_rate": 1.6211812769300587e-05, "loss": 0.6361, "step": 15046 }, { "epoch": 2.4563079057997634, "grad_norm": 2.6704320907592773, "learning_rate": 1.621131031526882e-05, "loss": 0.4931, "step": 15047 }, { "epoch": 2.4564711644422674, "grad_norm": 2.5345520973205566, "learning_rate": 1.62108078357047e-05, "loss": 0.4666, "step": 15048 }, { "epoch": 2.456634423084772, "grad_norm": 3.321329355239868, "learning_rate": 1.62103053306103e-05, "loss": 0.5915, "step": 15049 }, { "epoch": 2.4567976817272763, "grad_norm": 2.736750364303589, "learning_rate": 1.6209802799987674e-05, "loss": 0.6403, "step": 15050 }, { "epoch": 2.4569609403697807, "grad_norm": 2.8272387981414795, "learning_rate": 1.6209300243838896e-05, "loss": 0.5915, "step": 15051 }, { "epoch": 2.457124199012285, "grad_norm": 2.7964954376220703, "learning_rate": 1.6208797662166026e-05, "loss": 0.5909, "step": 15052 }, { "epoch": 2.4572874576547896, "grad_norm": 2.832799196243286, "learning_rate": 1.6208295054971136e-05, "loss": 0.5971, "step": 15053 }, { "epoch": 2.457450716297294, "grad_norm": 2.63602876663208, "learning_rate": 1.620779242225629e-05, "loss": 0.5385, "step": 15054 }, { "epoch": 2.4576139749397985, "grad_norm": 3.0943081378936768, "learning_rate": 1.6207289764023552e-05, "loss": 0.5995, "step": 15055 }, { "epoch": 2.457777233582303, "grad_norm": 3.2650606632232666, "learning_rate": 1.6206787080274993e-05, "loss": 0.6364, "step": 15056 }, { "epoch": 2.4579404922248074, "grad_norm": 2.6059021949768066, "learning_rate": 1.6206284371012673e-05, "loss": 0.4337, "step": 15057 }, { "epoch": 2.4581037508673114, "grad_norm": 3.3849053382873535, "learning_rate": 1.6205781636238662e-05, "loss": 0.554, "step": 15058 }, { "epoch": 2.458267009509816, "grad_norm": 3.714195966720581, "learning_rate": 1.6205278875955025e-05, "loss": 0.7812, "step": 15059 }, { "epoch": 2.4584302681523202, "grad_norm": 3.101189613342285, "learning_rate": 1.620477609016383e-05, "loss": 0.6904, "step": 15060 }, { "epoch": 2.4585935267948247, "grad_norm": 2.72204327583313, "learning_rate": 1.6204273278867142e-05, "loss": 0.5534, "step": 15061 }, { "epoch": 2.458756785437329, "grad_norm": 2.804987668991089, "learning_rate": 1.620377044206703e-05, "loss": 0.4442, "step": 15062 }, { "epoch": 2.4589200440798336, "grad_norm": 4.044580459594727, "learning_rate": 1.6203267579765563e-05, "loss": 0.72, "step": 15063 }, { "epoch": 2.459083302722338, "grad_norm": 3.799400568008423, "learning_rate": 1.62027646919648e-05, "loss": 0.7982, "step": 15064 }, { "epoch": 2.4592465613648424, "grad_norm": 2.2751145362854004, "learning_rate": 1.620226177866682e-05, "loss": 0.4525, "step": 15065 }, { "epoch": 2.4594098200073464, "grad_norm": 3.478055715560913, "learning_rate": 1.620175883987368e-05, "loss": 0.6944, "step": 15066 }, { "epoch": 2.459573078649851, "grad_norm": 2.828429937362671, "learning_rate": 1.6201255875587448e-05, "loss": 0.5902, "step": 15067 }, { "epoch": 2.4597363372923553, "grad_norm": 3.133674383163452, "learning_rate": 1.62007528858102e-05, "loss": 0.5884, "step": 15068 }, { "epoch": 2.4598995959348597, "grad_norm": 2.8940892219543457, "learning_rate": 1.6200249870543995e-05, "loss": 0.586, "step": 15069 }, { "epoch": 2.460062854577364, "grad_norm": 2.6576812267303467, "learning_rate": 1.6199746829790906e-05, "loss": 0.5455, "step": 15070 }, { "epoch": 2.4602261132198686, "grad_norm": 2.7254016399383545, "learning_rate": 1.6199243763553002e-05, "loss": 0.4962, "step": 15071 }, { "epoch": 2.460389371862373, "grad_norm": 2.6455047130584717, "learning_rate": 1.619874067183234e-05, "loss": 0.5235, "step": 15072 }, { "epoch": 2.4605526305048775, "grad_norm": 2.6702420711517334, "learning_rate": 1.6198237554631002e-05, "loss": 0.6072, "step": 15073 }, { "epoch": 2.460715889147382, "grad_norm": 2.9344322681427, "learning_rate": 1.6197734411951047e-05, "loss": 0.6288, "step": 15074 }, { "epoch": 2.4608791477898864, "grad_norm": 3.3642141819000244, "learning_rate": 1.6197231243794544e-05, "loss": 0.6775, "step": 15075 }, { "epoch": 2.4610424064323904, "grad_norm": 2.8310155868530273, "learning_rate": 1.6196728050163568e-05, "loss": 0.4629, "step": 15076 }, { "epoch": 2.461205665074895, "grad_norm": 3.75500750541687, "learning_rate": 1.619622483106018e-05, "loss": 0.789, "step": 15077 }, { "epoch": 2.4613689237173992, "grad_norm": 3.1596860885620117, "learning_rate": 1.6195721586486452e-05, "loss": 0.7186, "step": 15078 }, { "epoch": 2.4615321823599037, "grad_norm": 3.3340389728546143, "learning_rate": 1.619521831644445e-05, "loss": 0.5587, "step": 15079 }, { "epoch": 2.461695441002408, "grad_norm": 2.813678026199341, "learning_rate": 1.619471502093625e-05, "loss": 0.5787, "step": 15080 }, { "epoch": 2.4618586996449126, "grad_norm": 2.6396725177764893, "learning_rate": 1.6194211699963913e-05, "loss": 0.6002, "step": 15081 }, { "epoch": 2.462021958287417, "grad_norm": 2.8515191078186035, "learning_rate": 1.6193708353529508e-05, "loss": 0.6256, "step": 15082 }, { "epoch": 2.462185216929921, "grad_norm": 2.978010654449463, "learning_rate": 1.6193204981635112e-05, "loss": 0.617, "step": 15083 }, { "epoch": 2.4623484755724254, "grad_norm": 2.9310719966888428, "learning_rate": 1.6192701584282783e-05, "loss": 0.6635, "step": 15084 }, { "epoch": 2.46251173421493, "grad_norm": 3.260631799697876, "learning_rate": 1.6192198161474602e-05, "loss": 0.6678, "step": 15085 }, { "epoch": 2.4626749928574343, "grad_norm": 2.619691848754883, "learning_rate": 1.6191694713212627e-05, "loss": 0.5046, "step": 15086 }, { "epoch": 2.4628382514999387, "grad_norm": 2.6809866428375244, "learning_rate": 1.6191191239498936e-05, "loss": 0.5395, "step": 15087 }, { "epoch": 2.463001510142443, "grad_norm": 2.9650306701660156, "learning_rate": 1.6190687740335597e-05, "loss": 0.548, "step": 15088 }, { "epoch": 2.4631647687849476, "grad_norm": 2.6314685344696045, "learning_rate": 1.6190184215724674e-05, "loss": 0.5279, "step": 15089 }, { "epoch": 2.463328027427452, "grad_norm": 2.7719171047210693, "learning_rate": 1.6189680665668245e-05, "loss": 0.5378, "step": 15090 }, { "epoch": 2.4634912860699565, "grad_norm": 2.5301620960235596, "learning_rate": 1.618917709016837e-05, "loss": 0.5215, "step": 15091 }, { "epoch": 2.463654544712461, "grad_norm": 2.591151475906372, "learning_rate": 1.618867348922713e-05, "loss": 0.5177, "step": 15092 }, { "epoch": 2.463817803354965, "grad_norm": 2.809492826461792, "learning_rate": 1.618816986284659e-05, "loss": 0.6162, "step": 15093 }, { "epoch": 2.4639810619974694, "grad_norm": 2.849571943283081, "learning_rate": 1.6187666211028817e-05, "loss": 0.4847, "step": 15094 }, { "epoch": 2.464144320639974, "grad_norm": 2.9043350219726562, "learning_rate": 1.618716253377589e-05, "loss": 0.5913, "step": 15095 }, { "epoch": 2.4643075792824782, "grad_norm": 2.8217852115631104, "learning_rate": 1.618665883108987e-05, "loss": 0.6399, "step": 15096 }, { "epoch": 2.4644708379249827, "grad_norm": 2.9135870933532715, "learning_rate": 1.6186155102972834e-05, "loss": 0.557, "step": 15097 }, { "epoch": 2.464634096567487, "grad_norm": 2.749678373336792, "learning_rate": 1.618565134942685e-05, "loss": 0.6738, "step": 15098 }, { "epoch": 2.4647973552099915, "grad_norm": 2.9445884227752686, "learning_rate": 1.6185147570453987e-05, "loss": 0.5895, "step": 15099 }, { "epoch": 2.464960613852496, "grad_norm": 2.7779693603515625, "learning_rate": 1.6184643766056315e-05, "loss": 0.5433, "step": 15100 }, { "epoch": 2.465123872495, "grad_norm": 3.0957112312316895, "learning_rate": 1.6184139936235915e-05, "loss": 0.6675, "step": 15101 }, { "epoch": 2.4652871311375044, "grad_norm": 2.629936456680298, "learning_rate": 1.6183636080994846e-05, "loss": 0.4791, "step": 15102 }, { "epoch": 2.465450389780009, "grad_norm": 2.15213680267334, "learning_rate": 1.6183132200335186e-05, "loss": 0.4501, "step": 15103 }, { "epoch": 2.4656136484225133, "grad_norm": 2.863041400909424, "learning_rate": 1.6182628294259002e-05, "loss": 0.6018, "step": 15104 }, { "epoch": 2.4657769070650177, "grad_norm": 3.0250658988952637, "learning_rate": 1.618212436276837e-05, "loss": 0.4859, "step": 15105 }, { "epoch": 2.465940165707522, "grad_norm": 2.9968111515045166, "learning_rate": 1.6181620405865356e-05, "loss": 0.5925, "step": 15106 }, { "epoch": 2.4661034243500266, "grad_norm": 3.635796308517456, "learning_rate": 1.6181116423552043e-05, "loss": 0.7142, "step": 15107 }, { "epoch": 2.466266682992531, "grad_norm": 2.961820363998413, "learning_rate": 1.6180612415830485e-05, "loss": 0.6324, "step": 15108 }, { "epoch": 2.4664299416350355, "grad_norm": 2.6575756072998047, "learning_rate": 1.6180108382702763e-05, "loss": 0.5668, "step": 15109 }, { "epoch": 2.46659320027754, "grad_norm": 3.126991033554077, "learning_rate": 1.6179604324170955e-05, "loss": 0.5907, "step": 15110 }, { "epoch": 2.466756458920044, "grad_norm": 2.7205560207366943, "learning_rate": 1.6179100240237123e-05, "loss": 0.4923, "step": 15111 }, { "epoch": 2.4669197175625484, "grad_norm": 3.702613592147827, "learning_rate": 1.6178596130903345e-05, "loss": 0.75, "step": 15112 }, { "epoch": 2.467082976205053, "grad_norm": 2.779045820236206, "learning_rate": 1.617809199617169e-05, "loss": 0.6469, "step": 15113 }, { "epoch": 2.4672462348475572, "grad_norm": 2.2306032180786133, "learning_rate": 1.6177587836044233e-05, "loss": 0.4597, "step": 15114 }, { "epoch": 2.4674094934900617, "grad_norm": 3.207979917526245, "learning_rate": 1.6177083650523043e-05, "loss": 0.5642, "step": 15115 }, { "epoch": 2.467572752132566, "grad_norm": 2.5234885215759277, "learning_rate": 1.6176579439610197e-05, "loss": 0.4959, "step": 15116 }, { "epoch": 2.4677360107750705, "grad_norm": 3.3528356552124023, "learning_rate": 1.6176075203307763e-05, "loss": 0.6553, "step": 15117 }, { "epoch": 2.467899269417575, "grad_norm": 3.1530323028564453, "learning_rate": 1.617557094161782e-05, "loss": 0.6308, "step": 15118 }, { "epoch": 2.468062528060079, "grad_norm": 3.402456045150757, "learning_rate": 1.617506665454243e-05, "loss": 0.6359, "step": 15119 }, { "epoch": 2.4682257867025834, "grad_norm": 2.50899600982666, "learning_rate": 1.6174562342083677e-05, "loss": 0.4909, "step": 15120 }, { "epoch": 2.468389045345088, "grad_norm": 2.7464771270751953, "learning_rate": 1.6174058004243628e-05, "loss": 0.5699, "step": 15121 }, { "epoch": 2.4685523039875923, "grad_norm": 2.865485906600952, "learning_rate": 1.6173553641024362e-05, "loss": 0.6301, "step": 15122 }, { "epoch": 2.4687155626300967, "grad_norm": 2.671278238296509, "learning_rate": 1.6173049252427944e-05, "loss": 0.4804, "step": 15123 }, { "epoch": 2.468878821272601, "grad_norm": 2.3890254497528076, "learning_rate": 1.6172544838456452e-05, "loss": 0.4705, "step": 15124 }, { "epoch": 2.4690420799151056, "grad_norm": 2.75603985786438, "learning_rate": 1.617204039911196e-05, "loss": 0.6135, "step": 15125 }, { "epoch": 2.46920533855761, "grad_norm": 2.5734941959381104, "learning_rate": 1.6171535934396537e-05, "loss": 0.4608, "step": 15126 }, { "epoch": 2.4693685972001145, "grad_norm": 3.0925421714782715, "learning_rate": 1.6171031444312264e-05, "loss": 0.6205, "step": 15127 }, { "epoch": 2.469531855842619, "grad_norm": 3.2158310413360596, "learning_rate": 1.617052692886121e-05, "loss": 0.603, "step": 15128 }, { "epoch": 2.469695114485123, "grad_norm": 2.9694037437438965, "learning_rate": 1.617002238804545e-05, "loss": 0.6142, "step": 15129 }, { "epoch": 2.4698583731276273, "grad_norm": 2.255885362625122, "learning_rate": 1.6169517821867054e-05, "loss": 0.4685, "step": 15130 }, { "epoch": 2.470021631770132, "grad_norm": 2.880419969558716, "learning_rate": 1.6169013230328102e-05, "loss": 0.6102, "step": 15131 }, { "epoch": 2.4701848904126362, "grad_norm": 2.7403440475463867, "learning_rate": 1.616850861343067e-05, "loss": 0.6102, "step": 15132 }, { "epoch": 2.4703481490551407, "grad_norm": 3.1382224559783936, "learning_rate": 1.6168003971176823e-05, "loss": 0.5831, "step": 15133 }, { "epoch": 2.470511407697645, "grad_norm": 3.002002000808716, "learning_rate": 1.6167499303568644e-05, "loss": 0.6002, "step": 15134 }, { "epoch": 2.4706746663401495, "grad_norm": 3.737642765045166, "learning_rate": 1.6166994610608203e-05, "loss": 0.7769, "step": 15135 }, { "epoch": 2.4708379249826535, "grad_norm": 2.5405006408691406, "learning_rate": 1.616648989229758e-05, "loss": 0.5674, "step": 15136 }, { "epoch": 2.471001183625158, "grad_norm": 2.5246012210845947, "learning_rate": 1.616598514863884e-05, "loss": 0.4854, "step": 15137 }, { "epoch": 2.4711644422676624, "grad_norm": 3.016866683959961, "learning_rate": 1.6165480379634063e-05, "loss": 0.5684, "step": 15138 }, { "epoch": 2.471327700910167, "grad_norm": 2.8612303733825684, "learning_rate": 1.6164975585285327e-05, "loss": 0.6049, "step": 15139 }, { "epoch": 2.4714909595526713, "grad_norm": 2.8837552070617676, "learning_rate": 1.61644707655947e-05, "loss": 0.5869, "step": 15140 }, { "epoch": 2.4716542181951757, "grad_norm": 2.8222222328186035, "learning_rate": 1.6163965920564268e-05, "loss": 0.6561, "step": 15141 }, { "epoch": 2.47181747683768, "grad_norm": 2.308342456817627, "learning_rate": 1.6163461050196093e-05, "loss": 0.4949, "step": 15142 }, { "epoch": 2.4719807354801846, "grad_norm": 2.772857666015625, "learning_rate": 1.616295615449226e-05, "loss": 0.5903, "step": 15143 }, { "epoch": 2.472143994122689, "grad_norm": 2.793175458908081, "learning_rate": 1.6162451233454843e-05, "loss": 0.5075, "step": 15144 }, { "epoch": 2.4723072527651935, "grad_norm": 2.957324981689453, "learning_rate": 1.616194628708591e-05, "loss": 0.5572, "step": 15145 }, { "epoch": 2.4724705114076975, "grad_norm": 3.048919200897217, "learning_rate": 1.616144131538755e-05, "loss": 0.7172, "step": 15146 }, { "epoch": 2.472633770050202, "grad_norm": 2.9864742755889893, "learning_rate": 1.6160936318361827e-05, "loss": 0.5643, "step": 15147 }, { "epoch": 2.4727970286927063, "grad_norm": 2.9782609939575195, "learning_rate": 1.6160431296010822e-05, "loss": 0.5974, "step": 15148 }, { "epoch": 2.472960287335211, "grad_norm": 2.9513494968414307, "learning_rate": 1.615992624833661e-05, "loss": 0.6268, "step": 15149 }, { "epoch": 2.473123545977715, "grad_norm": 3.387047052383423, "learning_rate": 1.6159421175341265e-05, "loss": 0.6717, "step": 15150 }, { "epoch": 2.4732868046202197, "grad_norm": 2.986475944519043, "learning_rate": 1.6158916077026868e-05, "loss": 0.6542, "step": 15151 }, { "epoch": 2.473450063262724, "grad_norm": 2.8245744705200195, "learning_rate": 1.6158410953395493e-05, "loss": 0.561, "step": 15152 }, { "epoch": 2.4736133219052285, "grad_norm": 2.6040384769439697, "learning_rate": 1.6157905804449217e-05, "loss": 0.6073, "step": 15153 }, { "epoch": 2.4737765805477325, "grad_norm": 3.008627414703369, "learning_rate": 1.6157400630190113e-05, "loss": 0.6343, "step": 15154 }, { "epoch": 2.473939839190237, "grad_norm": 2.6456429958343506, "learning_rate": 1.615689543062026e-05, "loss": 0.4875, "step": 15155 }, { "epoch": 2.4741030978327414, "grad_norm": 3.0645618438720703, "learning_rate": 1.6156390205741734e-05, "loss": 0.4992, "step": 15156 }, { "epoch": 2.474266356475246, "grad_norm": 3.6095762252807617, "learning_rate": 1.6155884955556613e-05, "loss": 0.7056, "step": 15157 }, { "epoch": 2.4744296151177503, "grad_norm": 2.4502828121185303, "learning_rate": 1.6155379680066975e-05, "loss": 0.5174, "step": 15158 }, { "epoch": 2.4745928737602547, "grad_norm": 2.8002114295959473, "learning_rate": 1.6154874379274895e-05, "loss": 0.5626, "step": 15159 }, { "epoch": 2.474756132402759, "grad_norm": 2.9353411197662354, "learning_rate": 1.615436905318245e-05, "loss": 0.5876, "step": 15160 }, { "epoch": 2.4749193910452636, "grad_norm": 2.125302314758301, "learning_rate": 1.6153863701791717e-05, "loss": 0.4599, "step": 15161 }, { "epoch": 2.475082649687768, "grad_norm": 3.0032451152801514, "learning_rate": 1.6153358325104776e-05, "loss": 0.5821, "step": 15162 }, { "epoch": 2.4752459083302725, "grad_norm": 2.7202260494232178, "learning_rate": 1.61528529231237e-05, "loss": 0.4826, "step": 15163 }, { "epoch": 2.4754091669727765, "grad_norm": 2.437588691711426, "learning_rate": 1.615234749585057e-05, "loss": 0.5393, "step": 15164 }, { "epoch": 2.475572425615281, "grad_norm": 2.5224483013153076, "learning_rate": 1.615184204328746e-05, "loss": 0.509, "step": 15165 }, { "epoch": 2.4757356842577853, "grad_norm": 3.168748140335083, "learning_rate": 1.6151336565436455e-05, "loss": 0.657, "step": 15166 }, { "epoch": 2.47589894290029, "grad_norm": 2.582310199737549, "learning_rate": 1.6150831062299625e-05, "loss": 0.4781, "step": 15167 }, { "epoch": 2.476062201542794, "grad_norm": 3.253166675567627, "learning_rate": 1.615032553387905e-05, "loss": 0.6687, "step": 15168 }, { "epoch": 2.4762254601852987, "grad_norm": 2.5530552864074707, "learning_rate": 1.614981998017681e-05, "loss": 0.4834, "step": 15169 }, { "epoch": 2.476388718827803, "grad_norm": 2.8270602226257324, "learning_rate": 1.6149314401194983e-05, "loss": 0.4836, "step": 15170 }, { "epoch": 2.476551977470307, "grad_norm": 2.9560091495513916, "learning_rate": 1.6148808796935647e-05, "loss": 0.5668, "step": 15171 }, { "epoch": 2.4767152361128115, "grad_norm": 2.858013868331909, "learning_rate": 1.614830316740088e-05, "loss": 0.5073, "step": 15172 }, { "epoch": 2.476878494755316, "grad_norm": 3.478738784790039, "learning_rate": 1.614779751259276e-05, "loss": 0.5496, "step": 15173 }, { "epoch": 2.4770417533978204, "grad_norm": 3.4239346981048584, "learning_rate": 1.6147291832513365e-05, "loss": 0.5916, "step": 15174 }, { "epoch": 2.477205012040325, "grad_norm": 2.97536563873291, "learning_rate": 1.6146786127164773e-05, "loss": 0.6604, "step": 15175 }, { "epoch": 2.4773682706828293, "grad_norm": 3.0380096435546875, "learning_rate": 1.614628039654907e-05, "loss": 0.5649, "step": 15176 }, { "epoch": 2.4775315293253337, "grad_norm": 3.083052635192871, "learning_rate": 1.614577464066832e-05, "loss": 0.6033, "step": 15177 }, { "epoch": 2.477694787967838, "grad_norm": 3.0525248050689697, "learning_rate": 1.614526885952462e-05, "loss": 0.6933, "step": 15178 }, { "epoch": 2.4778580466103426, "grad_norm": 3.1784732341766357, "learning_rate": 1.6144763053120035e-05, "loss": 0.6095, "step": 15179 }, { "epoch": 2.478021305252847, "grad_norm": 2.6528213024139404, "learning_rate": 1.614425722145665e-05, "loss": 0.5156, "step": 15180 }, { "epoch": 2.478184563895351, "grad_norm": 2.888235092163086, "learning_rate": 1.6143751364536545e-05, "loss": 0.566, "step": 15181 }, { "epoch": 2.4783478225378555, "grad_norm": 3.085268259048462, "learning_rate": 1.6143245482361795e-05, "loss": 0.5634, "step": 15182 }, { "epoch": 2.47851108118036, "grad_norm": 2.387540817260742, "learning_rate": 1.6142739574934486e-05, "loss": 0.5376, "step": 15183 }, { "epoch": 2.4786743398228643, "grad_norm": 3.2535109519958496, "learning_rate": 1.614223364225669e-05, "loss": 0.6198, "step": 15184 }, { "epoch": 2.4788375984653688, "grad_norm": 2.793558120727539, "learning_rate": 1.6141727684330494e-05, "loss": 0.5833, "step": 15185 }, { "epoch": 2.479000857107873, "grad_norm": 2.5852580070495605, "learning_rate": 1.6141221701157975e-05, "loss": 0.5502, "step": 15186 }, { "epoch": 2.4791641157503777, "grad_norm": 3.3656232357025146, "learning_rate": 1.614071569274121e-05, "loss": 0.7008, "step": 15187 }, { "epoch": 2.479327374392882, "grad_norm": 2.9558229446411133, "learning_rate": 1.6140209659082282e-05, "loss": 0.5833, "step": 15188 }, { "epoch": 2.479490633035386, "grad_norm": 3.065570831298828, "learning_rate": 1.6139703600183273e-05, "loss": 0.7095, "step": 15189 }, { "epoch": 2.4796538916778905, "grad_norm": 3.0572450160980225, "learning_rate": 1.613919751604626e-05, "loss": 0.5994, "step": 15190 }, { "epoch": 2.479817150320395, "grad_norm": 2.9872336387634277, "learning_rate": 1.6138691406673322e-05, "loss": 0.5986, "step": 15191 }, { "epoch": 2.4799804089628994, "grad_norm": 2.8586676120758057, "learning_rate": 1.6138185272066542e-05, "loss": 0.5029, "step": 15192 }, { "epoch": 2.480143667605404, "grad_norm": 3.2001519203186035, "learning_rate": 1.6137679112228002e-05, "loss": 0.7558, "step": 15193 }, { "epoch": 2.4803069262479083, "grad_norm": 3.0301871299743652, "learning_rate": 1.613717292715978e-05, "loss": 0.6206, "step": 15194 }, { "epoch": 2.4804701848904127, "grad_norm": 2.7282745838165283, "learning_rate": 1.6136666716863957e-05, "loss": 0.5716, "step": 15195 }, { "epoch": 2.480633443532917, "grad_norm": 2.79294490814209, "learning_rate": 1.6136160481342614e-05, "loss": 0.4796, "step": 15196 }, { "epoch": 2.4807967021754216, "grad_norm": 2.6313302516937256, "learning_rate": 1.613565422059783e-05, "loss": 0.4609, "step": 15197 }, { "epoch": 2.480959960817926, "grad_norm": 2.9176011085510254, "learning_rate": 1.6135147934631695e-05, "loss": 0.5576, "step": 15198 }, { "epoch": 2.48112321946043, "grad_norm": 3.244309663772583, "learning_rate": 1.6134641623446277e-05, "loss": 0.5783, "step": 15199 }, { "epoch": 2.4812864781029345, "grad_norm": 2.890979528427124, "learning_rate": 1.6134135287043668e-05, "loss": 0.5874, "step": 15200 }, { "epoch": 2.481449736745439, "grad_norm": 2.7552878856658936, "learning_rate": 1.6133628925425942e-05, "loss": 0.5248, "step": 15201 }, { "epoch": 2.4816129953879433, "grad_norm": 2.7814602851867676, "learning_rate": 1.6133122538595186e-05, "loss": 0.511, "step": 15202 }, { "epoch": 2.4817762540304478, "grad_norm": 2.781726121902466, "learning_rate": 1.6132616126553474e-05, "loss": 0.5868, "step": 15203 }, { "epoch": 2.481939512672952, "grad_norm": 2.474055767059326, "learning_rate": 1.61321096893029e-05, "loss": 0.5247, "step": 15204 }, { "epoch": 2.4821027713154566, "grad_norm": 2.9033362865448, "learning_rate": 1.6131603226845535e-05, "loss": 0.6304, "step": 15205 }, { "epoch": 2.482266029957961, "grad_norm": 3.0479304790496826, "learning_rate": 1.6131096739183465e-05, "loss": 0.6391, "step": 15206 }, { "epoch": 2.482429288600465, "grad_norm": 2.895348310470581, "learning_rate": 1.613059022631877e-05, "loss": 0.6247, "step": 15207 }, { "epoch": 2.4825925472429695, "grad_norm": 2.490791082382202, "learning_rate": 1.613008368825353e-05, "loss": 0.5158, "step": 15208 }, { "epoch": 2.482755805885474, "grad_norm": 2.5344719886779785, "learning_rate": 1.6129577124989838e-05, "loss": 0.5245, "step": 15209 }, { "epoch": 2.4829190645279784, "grad_norm": 3.030193328857422, "learning_rate": 1.6129070536529767e-05, "loss": 0.578, "step": 15210 }, { "epoch": 2.483082323170483, "grad_norm": 2.2842392921447754, "learning_rate": 1.61285639228754e-05, "loss": 0.4088, "step": 15211 }, { "epoch": 2.4832455818129873, "grad_norm": 2.8044779300689697, "learning_rate": 1.612805728402882e-05, "loss": 0.5512, "step": 15212 }, { "epoch": 2.4834088404554917, "grad_norm": 2.671653985977173, "learning_rate": 1.6127550619992113e-05, "loss": 0.4445, "step": 15213 }, { "epoch": 2.483572099097996, "grad_norm": 2.6714768409729004, "learning_rate": 1.6127043930767356e-05, "loss": 0.5225, "step": 15214 }, { "epoch": 2.4837353577405006, "grad_norm": 2.8429551124572754, "learning_rate": 1.6126537216356636e-05, "loss": 0.5823, "step": 15215 }, { "epoch": 2.483898616383005, "grad_norm": 2.7976624965667725, "learning_rate": 1.6126030476762035e-05, "loss": 0.5184, "step": 15216 }, { "epoch": 2.484061875025509, "grad_norm": 3.6549336910247803, "learning_rate": 1.6125523711985636e-05, "loss": 0.705, "step": 15217 }, { "epoch": 2.4842251336680135, "grad_norm": 3.172905921936035, "learning_rate": 1.6125016922029525e-05, "loss": 0.6168, "step": 15218 }, { "epoch": 2.484388392310518, "grad_norm": 2.5143680572509766, "learning_rate": 1.6124510106895777e-05, "loss": 0.5261, "step": 15219 }, { "epoch": 2.4845516509530223, "grad_norm": 2.7933554649353027, "learning_rate": 1.6124003266586485e-05, "loss": 0.5352, "step": 15220 }, { "epoch": 2.4847149095955268, "grad_norm": 3.563192129135132, "learning_rate": 1.6123496401103726e-05, "loss": 0.7002, "step": 15221 }, { "epoch": 2.484878168238031, "grad_norm": 2.694897413253784, "learning_rate": 1.6122989510449584e-05, "loss": 0.5847, "step": 15222 }, { "epoch": 2.4850414268805356, "grad_norm": 3.505608081817627, "learning_rate": 1.6122482594626142e-05, "loss": 0.7247, "step": 15223 }, { "epoch": 2.4852046855230396, "grad_norm": 2.7697861194610596, "learning_rate": 1.6121975653635492e-05, "loss": 0.5549, "step": 15224 }, { "epoch": 2.485367944165544, "grad_norm": 3.237240791320801, "learning_rate": 1.612146868747971e-05, "loss": 0.6404, "step": 15225 }, { "epoch": 2.4855312028080485, "grad_norm": 2.9960243701934814, "learning_rate": 1.6120961696160876e-05, "loss": 0.4513, "step": 15226 }, { "epoch": 2.485694461450553, "grad_norm": 3.7900679111480713, "learning_rate": 1.6120454679681087e-05, "loss": 0.7978, "step": 15227 }, { "epoch": 2.4858577200930574, "grad_norm": 2.6906211376190186, "learning_rate": 1.6119947638042418e-05, "loss": 0.4981, "step": 15228 }, { "epoch": 2.486020978735562, "grad_norm": 2.748228073120117, "learning_rate": 1.6119440571246954e-05, "loss": 0.5111, "step": 15229 }, { "epoch": 2.4861842373780663, "grad_norm": 3.1062381267547607, "learning_rate": 1.611893347929678e-05, "loss": 0.5721, "step": 15230 }, { "epoch": 2.4863474960205707, "grad_norm": 3.2378220558166504, "learning_rate": 1.6118426362193984e-05, "loss": 0.582, "step": 15231 }, { "epoch": 2.486510754663075, "grad_norm": 2.7465648651123047, "learning_rate": 1.6117919219940642e-05, "loss": 0.5266, "step": 15232 }, { "epoch": 2.4866740133055796, "grad_norm": 3.6330978870391846, "learning_rate": 1.6117412052538847e-05, "loss": 0.8126, "step": 15233 }, { "epoch": 2.4868372719480836, "grad_norm": 2.8382339477539062, "learning_rate": 1.6116904859990684e-05, "loss": 0.5834, "step": 15234 }, { "epoch": 2.487000530590588, "grad_norm": 2.477921485900879, "learning_rate": 1.6116397642298232e-05, "loss": 0.4695, "step": 15235 }, { "epoch": 2.4871637892330924, "grad_norm": 2.9730474948883057, "learning_rate": 1.611589039946358e-05, "loss": 0.6195, "step": 15236 }, { "epoch": 2.487327047875597, "grad_norm": 2.5987744331359863, "learning_rate": 1.611538313148881e-05, "loss": 0.5574, "step": 15237 }, { "epoch": 2.4874903065181013, "grad_norm": 3.528597593307495, "learning_rate": 1.611487583837601e-05, "loss": 0.6337, "step": 15238 }, { "epoch": 2.4876535651606058, "grad_norm": 2.9788031578063965, "learning_rate": 1.6114368520127265e-05, "loss": 0.6042, "step": 15239 }, { "epoch": 2.48781682380311, "grad_norm": 2.7612576484680176, "learning_rate": 1.6113861176744658e-05, "loss": 0.6048, "step": 15240 }, { "epoch": 2.4879800824456146, "grad_norm": 2.4544801712036133, "learning_rate": 1.6113353808230275e-05, "loss": 0.5019, "step": 15241 }, { "epoch": 2.4881433410881186, "grad_norm": 2.979597330093384, "learning_rate": 1.6112846414586204e-05, "loss": 0.6115, "step": 15242 }, { "epoch": 2.488306599730623, "grad_norm": 3.036883592605591, "learning_rate": 1.6112338995814534e-05, "loss": 0.66, "step": 15243 }, { "epoch": 2.4884698583731275, "grad_norm": 2.66117262840271, "learning_rate": 1.6111831551917343e-05, "loss": 0.5174, "step": 15244 }, { "epoch": 2.488633117015632, "grad_norm": 3.199802875518799, "learning_rate": 1.611132408289672e-05, "loss": 0.5324, "step": 15245 }, { "epoch": 2.4887963756581364, "grad_norm": 2.9739577770233154, "learning_rate": 1.611081658875475e-05, "loss": 0.6368, "step": 15246 }, { "epoch": 2.488959634300641, "grad_norm": 3.0192062854766846, "learning_rate": 1.6110309069493522e-05, "loss": 0.6083, "step": 15247 }, { "epoch": 2.4891228929431453, "grad_norm": 2.496670722961426, "learning_rate": 1.610980152511512e-05, "loss": 0.548, "step": 15248 }, { "epoch": 2.4892861515856497, "grad_norm": 2.5759940147399902, "learning_rate": 1.6109293955621635e-05, "loss": 0.4741, "step": 15249 }, { "epoch": 2.489449410228154, "grad_norm": 2.487941026687622, "learning_rate": 1.6108786361015145e-05, "loss": 0.47, "step": 15250 }, { "epoch": 2.4896126688706586, "grad_norm": 2.8389670848846436, "learning_rate": 1.610827874129774e-05, "loss": 0.6017, "step": 15251 }, { "epoch": 2.4897759275131626, "grad_norm": 3.2856435775756836, "learning_rate": 1.6107771096471512e-05, "loss": 0.7205, "step": 15252 }, { "epoch": 2.489939186155667, "grad_norm": 2.6433370113372803, "learning_rate": 1.610726342653854e-05, "loss": 0.4852, "step": 15253 }, { "epoch": 2.4901024447981714, "grad_norm": 2.621049165725708, "learning_rate": 1.6106755731500916e-05, "loss": 0.6239, "step": 15254 }, { "epoch": 2.490265703440676, "grad_norm": 2.904270887374878, "learning_rate": 1.6106248011360724e-05, "loss": 0.6052, "step": 15255 }, { "epoch": 2.4904289620831803, "grad_norm": 3.043858289718628, "learning_rate": 1.610574026612005e-05, "loss": 0.6121, "step": 15256 }, { "epoch": 2.4905922207256848, "grad_norm": 2.3767945766448975, "learning_rate": 1.6105232495780985e-05, "loss": 0.4595, "step": 15257 }, { "epoch": 2.490755479368189, "grad_norm": 3.670032262802124, "learning_rate": 1.6104724700345612e-05, "loss": 0.7304, "step": 15258 }, { "epoch": 2.4909187380106936, "grad_norm": 2.9357216358184814, "learning_rate": 1.6104216879816027e-05, "loss": 0.6005, "step": 15259 }, { "epoch": 2.4910819966531976, "grad_norm": 2.9215006828308105, "learning_rate": 1.6103709034194308e-05, "loss": 0.5844, "step": 15260 }, { "epoch": 2.491245255295702, "grad_norm": 2.5621819496154785, "learning_rate": 1.6103201163482544e-05, "loss": 0.4583, "step": 15261 }, { "epoch": 2.4914085139382065, "grad_norm": 2.521735191345215, "learning_rate": 1.610269326768283e-05, "loss": 0.5603, "step": 15262 }, { "epoch": 2.491571772580711, "grad_norm": 2.860640048980713, "learning_rate": 1.6102185346797242e-05, "loss": 0.5816, "step": 15263 }, { "epoch": 2.4917350312232154, "grad_norm": 3.2426390647888184, "learning_rate": 1.610167740082788e-05, "loss": 0.6384, "step": 15264 }, { "epoch": 2.49189828986572, "grad_norm": 2.5329692363739014, "learning_rate": 1.610116942977682e-05, "loss": 0.4962, "step": 15265 }, { "epoch": 2.4920615485082243, "grad_norm": 3.233289957046509, "learning_rate": 1.610066143364616e-05, "loss": 0.6728, "step": 15266 }, { "epoch": 2.4922248071507287, "grad_norm": 2.9437997341156006, "learning_rate": 1.6100153412437984e-05, "loss": 0.6233, "step": 15267 }, { "epoch": 2.492388065793233, "grad_norm": 3.26497220993042, "learning_rate": 1.609964536615438e-05, "loss": 0.5758, "step": 15268 }, { "epoch": 2.492551324435737, "grad_norm": 3.2813475131988525, "learning_rate": 1.6099137294797438e-05, "loss": 0.5044, "step": 15269 }, { "epoch": 2.4927145830782416, "grad_norm": 2.7900094985961914, "learning_rate": 1.6098629198369246e-05, "loss": 0.6218, "step": 15270 }, { "epoch": 2.492877841720746, "grad_norm": 2.648134469985962, "learning_rate": 1.6098121076871887e-05, "loss": 0.5119, "step": 15271 }, { "epoch": 2.4930411003632504, "grad_norm": 3.0206737518310547, "learning_rate": 1.6097612930307463e-05, "loss": 0.5682, "step": 15272 }, { "epoch": 2.493204359005755, "grad_norm": 3.243830680847168, "learning_rate": 1.609710475867805e-05, "loss": 0.5198, "step": 15273 }, { "epoch": 2.4933676176482593, "grad_norm": 2.98600697517395, "learning_rate": 1.6096596561985744e-05, "loss": 0.5888, "step": 15274 }, { "epoch": 2.4935308762907638, "grad_norm": 2.6267871856689453, "learning_rate": 1.6096088340232627e-05, "loss": 0.4719, "step": 15275 }, { "epoch": 2.493694134933268, "grad_norm": 3.1382062435150146, "learning_rate": 1.6095580093420796e-05, "loss": 0.5951, "step": 15276 }, { "epoch": 2.493857393575772, "grad_norm": 2.5018599033355713, "learning_rate": 1.6095071821552335e-05, "loss": 0.4973, "step": 15277 }, { "epoch": 2.4940206522182766, "grad_norm": 3.63826584815979, "learning_rate": 1.609456352462934e-05, "loss": 0.705, "step": 15278 }, { "epoch": 2.494183910860781, "grad_norm": 3.0675442218780518, "learning_rate": 1.6094055202653892e-05, "loss": 0.6178, "step": 15279 }, { "epoch": 2.4943471695032855, "grad_norm": 2.506105422973633, "learning_rate": 1.6093546855628085e-05, "loss": 0.5011, "step": 15280 }, { "epoch": 2.49451042814579, "grad_norm": 2.6050000190734863, "learning_rate": 1.6093038483554003e-05, "loss": 0.4936, "step": 15281 }, { "epoch": 2.4946736867882944, "grad_norm": 2.8002824783325195, "learning_rate": 1.6092530086433747e-05, "loss": 0.5578, "step": 15282 }, { "epoch": 2.494836945430799, "grad_norm": 2.9793965816497803, "learning_rate": 1.60920216642694e-05, "loss": 0.5864, "step": 15283 }, { "epoch": 2.4950002040733033, "grad_norm": 3.3324594497680664, "learning_rate": 1.6091513217063047e-05, "loss": 0.7207, "step": 15284 }, { "epoch": 2.4951634627158077, "grad_norm": 2.4599738121032715, "learning_rate": 1.609100474481679e-05, "loss": 0.4535, "step": 15285 }, { "epoch": 2.495326721358312, "grad_norm": 3.115234613418579, "learning_rate": 1.6090496247532707e-05, "loss": 0.5739, "step": 15286 }, { "epoch": 2.495489980000816, "grad_norm": 2.4977996349334717, "learning_rate": 1.6089987725212894e-05, "loss": 0.4959, "step": 15287 }, { "epoch": 2.4956532386433206, "grad_norm": 2.74963116645813, "learning_rate": 1.6089479177859445e-05, "loss": 0.5238, "step": 15288 }, { "epoch": 2.495816497285825, "grad_norm": 2.6366231441497803, "learning_rate": 1.6088970605474444e-05, "loss": 0.4891, "step": 15289 }, { "epoch": 2.4959797559283294, "grad_norm": 3.17602276802063, "learning_rate": 1.6088462008059986e-05, "loss": 0.5845, "step": 15290 }, { "epoch": 2.496143014570834, "grad_norm": 2.73999285697937, "learning_rate": 1.6087953385618153e-05, "loss": 0.5887, "step": 15291 }, { "epoch": 2.4963062732133383, "grad_norm": 2.3138482570648193, "learning_rate": 1.6087444738151048e-05, "loss": 0.5345, "step": 15292 }, { "epoch": 2.4964695318558427, "grad_norm": 2.7682106494903564, "learning_rate": 1.6086936065660756e-05, "loss": 0.4975, "step": 15293 }, { "epoch": 2.496632790498347, "grad_norm": 2.5713419914245605, "learning_rate": 1.608642736814937e-05, "loss": 0.5343, "step": 15294 }, { "epoch": 2.496796049140851, "grad_norm": 2.681379795074463, "learning_rate": 1.6085918645618975e-05, "loss": 0.5064, "step": 15295 }, { "epoch": 2.4969593077833556, "grad_norm": 2.7071688175201416, "learning_rate": 1.6085409898071668e-05, "loss": 0.5442, "step": 15296 }, { "epoch": 2.49712256642586, "grad_norm": 3.47717547416687, "learning_rate": 1.608490112550954e-05, "loss": 0.5871, "step": 15297 }, { "epoch": 2.4972858250683645, "grad_norm": 2.66532826423645, "learning_rate": 1.608439232793468e-05, "loss": 0.5934, "step": 15298 }, { "epoch": 2.497449083710869, "grad_norm": 2.3862740993499756, "learning_rate": 1.608388350534918e-05, "loss": 0.4126, "step": 15299 }, { "epoch": 2.4976123423533734, "grad_norm": 3.5218915939331055, "learning_rate": 1.6083374657755132e-05, "loss": 0.7158, "step": 15300 }, { "epoch": 2.497775600995878, "grad_norm": 2.849724769592285, "learning_rate": 1.6082865785154628e-05, "loss": 0.5921, "step": 15301 }, { "epoch": 2.4979388596383822, "grad_norm": 2.2723422050476074, "learning_rate": 1.608235688754976e-05, "loss": 0.4674, "step": 15302 }, { "epoch": 2.4981021182808867, "grad_norm": 2.9338629245758057, "learning_rate": 1.608184796494262e-05, "loss": 0.6017, "step": 15303 }, { "epoch": 2.498265376923391, "grad_norm": 3.143728733062744, "learning_rate": 1.60813390173353e-05, "loss": 0.6864, "step": 15304 }, { "epoch": 2.498428635565895, "grad_norm": 3.28078031539917, "learning_rate": 1.608083004472989e-05, "loss": 0.5726, "step": 15305 }, { "epoch": 2.4985918942083996, "grad_norm": 3.3327224254608154, "learning_rate": 1.6080321047128483e-05, "loss": 0.7448, "step": 15306 }, { "epoch": 2.498755152850904, "grad_norm": 2.9849600791931152, "learning_rate": 1.6079812024533175e-05, "loss": 0.6243, "step": 15307 }, { "epoch": 2.4989184114934084, "grad_norm": 3.3817288875579834, "learning_rate": 1.6079302976946055e-05, "loss": 0.6486, "step": 15308 }, { "epoch": 2.499081670135913, "grad_norm": 3.1524267196655273, "learning_rate": 1.6078793904369217e-05, "loss": 0.6202, "step": 15309 }, { "epoch": 2.4992449287784173, "grad_norm": 2.3739194869995117, "learning_rate": 1.6078284806804748e-05, "loss": 0.4499, "step": 15310 }, { "epoch": 2.4994081874209217, "grad_norm": 2.819856882095337, "learning_rate": 1.607777568425475e-05, "loss": 0.537, "step": 15311 }, { "epoch": 2.4995714460634257, "grad_norm": 3.0348119735717773, "learning_rate": 1.607726653672131e-05, "loss": 0.5783, "step": 15312 }, { "epoch": 2.49973470470593, "grad_norm": 3.0543177127838135, "learning_rate": 1.6076757364206523e-05, "loss": 0.6167, "step": 15313 }, { "epoch": 2.4998979633484346, "grad_norm": 2.6926541328430176, "learning_rate": 1.6076248166712477e-05, "loss": 0.5311, "step": 15314 }, { "epoch": 2.500061221990939, "grad_norm": 3.3910086154937744, "learning_rate": 1.6075738944241267e-05, "loss": 0.5915, "step": 15315 }, { "epoch": 2.5002244806334435, "grad_norm": 2.83768892288208, "learning_rate": 1.6075229696794995e-05, "loss": 0.5119, "step": 15316 }, { "epoch": 2.500387739275948, "grad_norm": 3.256270408630371, "learning_rate": 1.6074720424375746e-05, "loss": 0.5552, "step": 15317 }, { "epoch": 2.5005509979184524, "grad_norm": 2.1683871746063232, "learning_rate": 1.6074211126985614e-05, "loss": 0.4034, "step": 15318 }, { "epoch": 2.500714256560957, "grad_norm": 3.0172109603881836, "learning_rate": 1.6073701804626694e-05, "loss": 0.6031, "step": 15319 }, { "epoch": 2.5008775152034612, "grad_norm": 2.6664421558380127, "learning_rate": 1.607319245730108e-05, "loss": 0.544, "step": 15320 }, { "epoch": 2.5010407738459657, "grad_norm": 3.5190985202789307, "learning_rate": 1.6072683085010862e-05, "loss": 0.6544, "step": 15321 }, { "epoch": 2.50120403248847, "grad_norm": 2.8390557765960693, "learning_rate": 1.607217368775814e-05, "loss": 0.5133, "step": 15322 }, { "epoch": 2.501367291130974, "grad_norm": 3.180384635925293, "learning_rate": 1.6071664265545004e-05, "loss": 0.6264, "step": 15323 }, { "epoch": 2.5015305497734786, "grad_norm": 3.164165496826172, "learning_rate": 1.6071154818373547e-05, "loss": 0.5304, "step": 15324 }, { "epoch": 2.501693808415983, "grad_norm": 3.2021708488464355, "learning_rate": 1.6070645346245867e-05, "loss": 0.6419, "step": 15325 }, { "epoch": 2.5018570670584874, "grad_norm": 2.974979877471924, "learning_rate": 1.6070135849164058e-05, "loss": 0.6228, "step": 15326 }, { "epoch": 2.502020325700992, "grad_norm": 3.1494693756103516, "learning_rate": 1.606962632713021e-05, "loss": 0.6492, "step": 15327 }, { "epoch": 2.5021835843434963, "grad_norm": 3.8835017681121826, "learning_rate": 1.606911678014642e-05, "loss": 0.7433, "step": 15328 }, { "epoch": 2.5023468429860003, "grad_norm": 2.787299156188965, "learning_rate": 1.6068607208214783e-05, "loss": 0.5023, "step": 15329 }, { "epoch": 2.5025101016285047, "grad_norm": 2.981107234954834, "learning_rate": 1.6068097611337395e-05, "loss": 0.6254, "step": 15330 }, { "epoch": 2.502673360271009, "grad_norm": 3.1832962036132812, "learning_rate": 1.6067587989516343e-05, "loss": 0.5946, "step": 15331 }, { "epoch": 2.5028366189135136, "grad_norm": 2.9576303958892822, "learning_rate": 1.6067078342753736e-05, "loss": 0.5558, "step": 15332 }, { "epoch": 2.502999877556018, "grad_norm": 2.8381543159484863, "learning_rate": 1.6066568671051656e-05, "loss": 0.5853, "step": 15333 }, { "epoch": 2.5031631361985225, "grad_norm": 2.456049919128418, "learning_rate": 1.6066058974412205e-05, "loss": 0.522, "step": 15334 }, { "epoch": 2.503326394841027, "grad_norm": 2.847290277481079, "learning_rate": 1.6065549252837473e-05, "loss": 0.5398, "step": 15335 }, { "epoch": 2.5034896534835314, "grad_norm": 2.6454713344573975, "learning_rate": 1.606503950632956e-05, "loss": 0.5078, "step": 15336 }, { "epoch": 2.503652912126036, "grad_norm": 2.7812798023223877, "learning_rate": 1.606452973489056e-05, "loss": 0.537, "step": 15337 }, { "epoch": 2.5038161707685402, "grad_norm": 2.9756550788879395, "learning_rate": 1.606401993852257e-05, "loss": 0.5655, "step": 15338 }, { "epoch": 2.5039794294110447, "grad_norm": 3.21356463432312, "learning_rate": 1.606351011722768e-05, "loss": 0.8066, "step": 15339 }, { "epoch": 2.5041426880535487, "grad_norm": 2.940100908279419, "learning_rate": 1.606300027100799e-05, "loss": 0.5905, "step": 15340 }, { "epoch": 2.504305946696053, "grad_norm": 2.5101428031921387, "learning_rate": 1.60624903998656e-05, "loss": 0.5225, "step": 15341 }, { "epoch": 2.5044692053385575, "grad_norm": 2.820335626602173, "learning_rate": 1.6061980503802598e-05, "loss": 0.4951, "step": 15342 }, { "epoch": 2.504632463981062, "grad_norm": 3.30513334274292, "learning_rate": 1.606147058282108e-05, "loss": 0.644, "step": 15343 }, { "epoch": 2.5047957226235664, "grad_norm": 3.0550498962402344, "learning_rate": 1.606096063692315e-05, "loss": 0.6003, "step": 15344 }, { "epoch": 2.504958981266071, "grad_norm": 2.686002492904663, "learning_rate": 1.6060450666110897e-05, "loss": 0.5671, "step": 15345 }, { "epoch": 2.5051222399085753, "grad_norm": 2.4548051357269287, "learning_rate": 1.605994067038642e-05, "loss": 0.5576, "step": 15346 }, { "epoch": 2.5052854985510793, "grad_norm": 2.7252652645111084, "learning_rate": 1.6059430649751818e-05, "loss": 0.5809, "step": 15347 }, { "epoch": 2.5054487571935837, "grad_norm": 2.3070733547210693, "learning_rate": 1.6058920604209182e-05, "loss": 0.428, "step": 15348 }, { "epoch": 2.505612015836088, "grad_norm": 2.704761266708374, "learning_rate": 1.605841053376061e-05, "loss": 0.6337, "step": 15349 }, { "epoch": 2.5057752744785926, "grad_norm": 2.7240304946899414, "learning_rate": 1.60579004384082e-05, "loss": 0.6269, "step": 15350 }, { "epoch": 2.505938533121097, "grad_norm": 2.447913885116577, "learning_rate": 1.6057390318154048e-05, "loss": 0.4851, "step": 15351 }, { "epoch": 2.5061017917636015, "grad_norm": 2.8215296268463135, "learning_rate": 1.6056880173000256e-05, "loss": 0.6663, "step": 15352 }, { "epoch": 2.506265050406106, "grad_norm": 2.813678503036499, "learning_rate": 1.6056370002948914e-05, "loss": 0.5341, "step": 15353 }, { "epoch": 2.5064283090486104, "grad_norm": 3.4026544094085693, "learning_rate": 1.605585980800212e-05, "loss": 0.6478, "step": 15354 }, { "epoch": 2.506591567691115, "grad_norm": 2.8892107009887695, "learning_rate": 1.6055349588161978e-05, "loss": 0.6111, "step": 15355 }, { "epoch": 2.5067548263336192, "grad_norm": 2.5522468090057373, "learning_rate": 1.6054839343430577e-05, "loss": 0.5751, "step": 15356 }, { "epoch": 2.5069180849761237, "grad_norm": 3.368267774581909, "learning_rate": 1.6054329073810016e-05, "loss": 0.5962, "step": 15357 }, { "epoch": 2.5070813436186277, "grad_norm": 3.0293476581573486, "learning_rate": 1.60538187793024e-05, "loss": 0.6498, "step": 15358 }, { "epoch": 2.507244602261132, "grad_norm": 2.6961710453033447, "learning_rate": 1.6053308459909813e-05, "loss": 0.5364, "step": 15359 }, { "epoch": 2.5074078609036365, "grad_norm": 3.1466026306152344, "learning_rate": 1.6052798115634364e-05, "loss": 0.6629, "step": 15360 }, { "epoch": 2.507571119546141, "grad_norm": 2.7613322734832764, "learning_rate": 1.605228774647815e-05, "loss": 0.5811, "step": 15361 }, { "epoch": 2.5077343781886454, "grad_norm": 2.8802995681762695, "learning_rate": 1.6051777352443262e-05, "loss": 0.5291, "step": 15362 }, { "epoch": 2.50789763683115, "grad_norm": 2.9483602046966553, "learning_rate": 1.6051266933531806e-05, "loss": 0.5703, "step": 15363 }, { "epoch": 2.5080608954736543, "grad_norm": 2.6680023670196533, "learning_rate": 1.6050756489745873e-05, "loss": 0.5732, "step": 15364 }, { "epoch": 2.5082241541161583, "grad_norm": 3.55549693107605, "learning_rate": 1.6050246021087566e-05, "loss": 0.5707, "step": 15365 }, { "epoch": 2.5083874127586627, "grad_norm": 2.5439467430114746, "learning_rate": 1.6049735527558986e-05, "loss": 0.5747, "step": 15366 }, { "epoch": 2.508550671401167, "grad_norm": 2.4110231399536133, "learning_rate": 1.6049225009162227e-05, "loss": 0.4994, "step": 15367 }, { "epoch": 2.5087139300436716, "grad_norm": 3.2361574172973633, "learning_rate": 1.6048714465899386e-05, "loss": 0.6603, "step": 15368 }, { "epoch": 2.508877188686176, "grad_norm": 3.675060987472534, "learning_rate": 1.604820389777256e-05, "loss": 0.7484, "step": 15369 }, { "epoch": 2.5090404473286805, "grad_norm": 2.8006691932678223, "learning_rate": 1.6047693304783857e-05, "loss": 0.5727, "step": 15370 }, { "epoch": 2.509203705971185, "grad_norm": 2.608733654022217, "learning_rate": 1.604718268693537e-05, "loss": 0.5237, "step": 15371 }, { "epoch": 2.5093669646136894, "grad_norm": 3.184114694595337, "learning_rate": 1.6046672044229198e-05, "loss": 0.7371, "step": 15372 }, { "epoch": 2.509530223256194, "grad_norm": 3.0317907333374023, "learning_rate": 1.6046161376667436e-05, "loss": 0.6201, "step": 15373 }, { "epoch": 2.5096934818986982, "grad_norm": 2.8659496307373047, "learning_rate": 1.6045650684252193e-05, "loss": 0.6667, "step": 15374 }, { "epoch": 2.5098567405412027, "grad_norm": 2.9363133907318115, "learning_rate": 1.6045139966985562e-05, "loss": 0.5397, "step": 15375 }, { "epoch": 2.5100199991837067, "grad_norm": 3.0600545406341553, "learning_rate": 1.604462922486964e-05, "loss": 0.5338, "step": 15376 }, { "epoch": 2.510183257826211, "grad_norm": 2.876185178756714, "learning_rate": 1.6044118457906533e-05, "loss": 0.5462, "step": 15377 }, { "epoch": 2.5103465164687155, "grad_norm": 2.7069010734558105, "learning_rate": 1.6043607666098333e-05, "loss": 0.5588, "step": 15378 }, { "epoch": 2.51050977511122, "grad_norm": 3.0757899284362793, "learning_rate": 1.604309684944715e-05, "loss": 0.5701, "step": 15379 }, { "epoch": 2.5106730337537244, "grad_norm": 2.934262752532959, "learning_rate": 1.6042586007955075e-05, "loss": 0.5816, "step": 15380 }, { "epoch": 2.510836292396229, "grad_norm": 2.942927360534668, "learning_rate": 1.604207514162421e-05, "loss": 0.6137, "step": 15381 }, { "epoch": 2.510999551038733, "grad_norm": 2.5250766277313232, "learning_rate": 1.6041564250456655e-05, "loss": 0.5093, "step": 15382 }, { "epoch": 2.5111628096812373, "grad_norm": 3.2121880054473877, "learning_rate": 1.604105333445451e-05, "loss": 0.708, "step": 15383 }, { "epoch": 2.5113260683237417, "grad_norm": 2.610673427581787, "learning_rate": 1.6040542393619878e-05, "loss": 0.5233, "step": 15384 }, { "epoch": 2.511489326966246, "grad_norm": 3.5742828845977783, "learning_rate": 1.6040031427954856e-05, "loss": 0.7001, "step": 15385 }, { "epoch": 2.5116525856087506, "grad_norm": 2.924703359603882, "learning_rate": 1.6039520437461546e-05, "loss": 0.6919, "step": 15386 }, { "epoch": 2.511815844251255, "grad_norm": 2.8358614444732666, "learning_rate": 1.603900942214205e-05, "loss": 0.5896, "step": 15387 }, { "epoch": 2.5119791028937595, "grad_norm": 2.4159727096557617, "learning_rate": 1.6038498381998463e-05, "loss": 0.5089, "step": 15388 }, { "epoch": 2.512142361536264, "grad_norm": 2.7236008644104004, "learning_rate": 1.6037987317032888e-05, "loss": 0.501, "step": 15389 }, { "epoch": 2.5123056201787684, "grad_norm": 2.7853012084960938, "learning_rate": 1.6037476227247428e-05, "loss": 0.4824, "step": 15390 }, { "epoch": 2.512468878821273, "grad_norm": 2.3319621086120605, "learning_rate": 1.6036965112644188e-05, "loss": 0.6028, "step": 15391 }, { "epoch": 2.5126321374637772, "grad_norm": 3.0039873123168945, "learning_rate": 1.603645397322526e-05, "loss": 0.8836, "step": 15392 }, { "epoch": 2.512795396106281, "grad_norm": 2.8792710304260254, "learning_rate": 1.6035942808992747e-05, "loss": 0.5196, "step": 15393 }, { "epoch": 2.5129586547487857, "grad_norm": 2.944411516189575, "learning_rate": 1.6035431619948754e-05, "loss": 0.5327, "step": 15394 }, { "epoch": 2.51312191339129, "grad_norm": 3.3332114219665527, "learning_rate": 1.6034920406095384e-05, "loss": 0.6559, "step": 15395 }, { "epoch": 2.5132851720337945, "grad_norm": 2.297045946121216, "learning_rate": 1.6034409167434732e-05, "loss": 0.4919, "step": 15396 }, { "epoch": 2.513448430676299, "grad_norm": 2.563420534133911, "learning_rate": 1.6033897903968904e-05, "loss": 0.5818, "step": 15397 }, { "epoch": 2.5136116893188034, "grad_norm": 3.3160815238952637, "learning_rate": 1.60333866157e-05, "loss": 0.7197, "step": 15398 }, { "epoch": 2.513774947961308, "grad_norm": 3.1949656009674072, "learning_rate": 1.603287530263012e-05, "loss": 0.6734, "step": 15399 }, { "epoch": 2.513938206603812, "grad_norm": 2.928807497024536, "learning_rate": 1.6032363964761363e-05, "loss": 0.5169, "step": 15400 }, { "epoch": 2.5141014652463163, "grad_norm": 2.5131688117980957, "learning_rate": 1.6031852602095844e-05, "loss": 0.4967, "step": 15401 }, { "epoch": 2.5142647238888207, "grad_norm": 2.934555768966675, "learning_rate": 1.6031341214635654e-05, "loss": 0.5115, "step": 15402 }, { "epoch": 2.514427982531325, "grad_norm": 3.7065863609313965, "learning_rate": 1.6030829802382895e-05, "loss": 0.5841, "step": 15403 }, { "epoch": 2.5145912411738296, "grad_norm": 2.445366144180298, "learning_rate": 1.6030318365339673e-05, "loss": 0.4446, "step": 15404 }, { "epoch": 2.514754499816334, "grad_norm": 2.571981430053711, "learning_rate": 1.6029806903508093e-05, "loss": 0.4207, "step": 15405 }, { "epoch": 2.5149177584588385, "grad_norm": 3.0177536010742188, "learning_rate": 1.602929541689025e-05, "loss": 0.5716, "step": 15406 }, { "epoch": 2.515081017101343, "grad_norm": 2.8132073879241943, "learning_rate": 1.602878390548825e-05, "loss": 0.5263, "step": 15407 }, { "epoch": 2.5152442757438473, "grad_norm": 3.010838747024536, "learning_rate": 1.6028272369304194e-05, "loss": 0.5091, "step": 15408 }, { "epoch": 2.515407534386352, "grad_norm": 2.33864426612854, "learning_rate": 1.602776080834019e-05, "loss": 0.4558, "step": 15409 }, { "epoch": 2.5155707930288562, "grad_norm": 2.813612222671509, "learning_rate": 1.602724922259834e-05, "loss": 0.5756, "step": 15410 }, { "epoch": 2.51573405167136, "grad_norm": 2.7072744369506836, "learning_rate": 1.6026737612080737e-05, "loss": 0.5337, "step": 15411 }, { "epoch": 2.5158973103138647, "grad_norm": 2.6048247814178467, "learning_rate": 1.60262259767895e-05, "loss": 0.5153, "step": 15412 }, { "epoch": 2.516060568956369, "grad_norm": 2.745091676712036, "learning_rate": 1.6025714316726713e-05, "loss": 0.5583, "step": 15413 }, { "epoch": 2.5162238275988735, "grad_norm": 2.911787748336792, "learning_rate": 1.6025202631894496e-05, "loss": 0.5675, "step": 15414 }, { "epoch": 2.516387086241378, "grad_norm": 3.4205338954925537, "learning_rate": 1.6024690922294947e-05, "loss": 0.7355, "step": 15415 }, { "epoch": 2.5165503448838824, "grad_norm": 3.476832866668701, "learning_rate": 1.6024179187930167e-05, "loss": 0.6787, "step": 15416 }, { "epoch": 2.516713603526387, "grad_norm": 2.956555128097534, "learning_rate": 1.6023667428802258e-05, "loss": 0.5764, "step": 15417 }, { "epoch": 2.516876862168891, "grad_norm": 2.5297131538391113, "learning_rate": 1.602315564491333e-05, "loss": 0.5552, "step": 15418 }, { "epoch": 2.5170401208113953, "grad_norm": 2.9664852619171143, "learning_rate": 1.6022643836265484e-05, "loss": 0.4761, "step": 15419 }, { "epoch": 2.5172033794538997, "grad_norm": 3.128098249435425, "learning_rate": 1.6022132002860824e-05, "loss": 0.5758, "step": 15420 }, { "epoch": 2.517366638096404, "grad_norm": 2.5222697257995605, "learning_rate": 1.602162014470145e-05, "loss": 0.5064, "step": 15421 }, { "epoch": 2.5175298967389086, "grad_norm": 2.8487727642059326, "learning_rate": 1.602110826178947e-05, "loss": 0.6159, "step": 15422 }, { "epoch": 2.517693155381413, "grad_norm": 3.2694571018218994, "learning_rate": 1.602059635412699e-05, "loss": 0.707, "step": 15423 }, { "epoch": 2.5178564140239175, "grad_norm": 3.376487970352173, "learning_rate": 1.602008442171611e-05, "loss": 0.6993, "step": 15424 }, { "epoch": 2.518019672666422, "grad_norm": 2.709456205368042, "learning_rate": 1.601957246455894e-05, "loss": 0.456, "step": 15425 }, { "epoch": 2.5181829313089263, "grad_norm": 3.280872344970703, "learning_rate": 1.6019060482657572e-05, "loss": 0.5901, "step": 15426 }, { "epoch": 2.518346189951431, "grad_norm": 3.616096019744873, "learning_rate": 1.6018548476014126e-05, "loss": 0.6585, "step": 15427 }, { "epoch": 2.518509448593935, "grad_norm": 3.124972343444824, "learning_rate": 1.6018036444630694e-05, "loss": 0.6431, "step": 15428 }, { "epoch": 2.518672707236439, "grad_norm": 2.6404480934143066, "learning_rate": 1.601752438850939e-05, "loss": 0.4991, "step": 15429 }, { "epoch": 2.5188359658789437, "grad_norm": 2.8222272396087646, "learning_rate": 1.6017012307652316e-05, "loss": 0.5666, "step": 15430 }, { "epoch": 2.518999224521448, "grad_norm": 3.072413444519043, "learning_rate": 1.6016500202061578e-05, "loss": 0.6052, "step": 15431 }, { "epoch": 2.5191624831639525, "grad_norm": 2.777078628540039, "learning_rate": 1.601598807173928e-05, "loss": 0.5596, "step": 15432 }, { "epoch": 2.519325741806457, "grad_norm": 3.278740644454956, "learning_rate": 1.601547591668752e-05, "loss": 0.5722, "step": 15433 }, { "epoch": 2.5194890004489614, "grad_norm": 2.634779214859009, "learning_rate": 1.6014963736908415e-05, "loss": 0.5794, "step": 15434 }, { "epoch": 2.5196522590914654, "grad_norm": 3.1221325397491455, "learning_rate": 1.6014451532404067e-05, "loss": 0.5322, "step": 15435 }, { "epoch": 2.51981551773397, "grad_norm": 3.2428336143493652, "learning_rate": 1.6013939303176574e-05, "loss": 0.5382, "step": 15436 }, { "epoch": 2.5199787763764743, "grad_norm": 2.6052372455596924, "learning_rate": 1.601342704922805e-05, "loss": 0.5002, "step": 15437 }, { "epoch": 2.5201420350189787, "grad_norm": 2.688861608505249, "learning_rate": 1.60129147705606e-05, "loss": 0.5716, "step": 15438 }, { "epoch": 2.520305293661483, "grad_norm": 3.162116289138794, "learning_rate": 1.6012402467176324e-05, "loss": 0.5675, "step": 15439 }, { "epoch": 2.5204685523039876, "grad_norm": 2.805391550064087, "learning_rate": 1.6011890139077336e-05, "loss": 0.5858, "step": 15440 }, { "epoch": 2.520631810946492, "grad_norm": 2.8989205360412598, "learning_rate": 1.6011377786265737e-05, "loss": 0.6083, "step": 15441 }, { "epoch": 2.5207950695889965, "grad_norm": 2.9685139656066895, "learning_rate": 1.6010865408743633e-05, "loss": 0.5246, "step": 15442 }, { "epoch": 2.520958328231501, "grad_norm": 2.7169559001922607, "learning_rate": 1.6010353006513128e-05, "loss": 0.5905, "step": 15443 }, { "epoch": 2.5211215868740053, "grad_norm": 2.7259624004364014, "learning_rate": 1.6009840579576334e-05, "loss": 0.5158, "step": 15444 }, { "epoch": 2.5212848455165098, "grad_norm": 2.463038921356201, "learning_rate": 1.6009328127935354e-05, "loss": 0.5523, "step": 15445 }, { "epoch": 2.5214481041590138, "grad_norm": 2.79244327545166, "learning_rate": 1.6008815651592295e-05, "loss": 0.5874, "step": 15446 }, { "epoch": 2.521611362801518, "grad_norm": 2.401951313018799, "learning_rate": 1.6008303150549266e-05, "loss": 0.5349, "step": 15447 }, { "epoch": 2.5217746214440226, "grad_norm": 2.5317559242248535, "learning_rate": 1.6007790624808367e-05, "loss": 0.5783, "step": 15448 }, { "epoch": 2.521937880086527, "grad_norm": 3.168919324874878, "learning_rate": 1.6007278074371714e-05, "loss": 0.5615, "step": 15449 }, { "epoch": 2.5221011387290315, "grad_norm": 2.5686886310577393, "learning_rate": 1.6006765499241407e-05, "loss": 0.4408, "step": 15450 }, { "epoch": 2.522264397371536, "grad_norm": 2.8950178623199463, "learning_rate": 1.6006252899419555e-05, "loss": 0.547, "step": 15451 }, { "epoch": 2.5224276560140404, "grad_norm": 2.4491472244262695, "learning_rate": 1.6005740274908264e-05, "loss": 0.5156, "step": 15452 }, { "epoch": 2.5225909146565444, "grad_norm": 3.3092615604400635, "learning_rate": 1.6005227625709646e-05, "loss": 0.601, "step": 15453 }, { "epoch": 2.522754173299049, "grad_norm": 2.5698139667510986, "learning_rate": 1.60047149518258e-05, "loss": 0.4919, "step": 15454 }, { "epoch": 2.5229174319415533, "grad_norm": 2.5188491344451904, "learning_rate": 1.6004202253258844e-05, "loss": 0.4518, "step": 15455 }, { "epoch": 2.5230806905840577, "grad_norm": 2.823897123336792, "learning_rate": 1.6003689530010874e-05, "loss": 0.5407, "step": 15456 }, { "epoch": 2.523243949226562, "grad_norm": 3.071138858795166, "learning_rate": 1.6003176782084006e-05, "loss": 0.6389, "step": 15457 }, { "epoch": 2.5234072078690666, "grad_norm": 3.2082161903381348, "learning_rate": 1.6002664009480345e-05, "loss": 0.7215, "step": 15458 }, { "epoch": 2.523570466511571, "grad_norm": 2.4693286418914795, "learning_rate": 1.6002151212202e-05, "loss": 0.5158, "step": 15459 }, { "epoch": 2.5237337251540755, "grad_norm": 2.737116813659668, "learning_rate": 1.6001638390251074e-05, "loss": 0.5173, "step": 15460 }, { "epoch": 2.52389698379658, "grad_norm": 3.0392849445343018, "learning_rate": 1.6001125543629683e-05, "loss": 0.5391, "step": 15461 }, { "epoch": 2.5240602424390843, "grad_norm": 3.0848960876464844, "learning_rate": 1.6000612672339927e-05, "loss": 0.6153, "step": 15462 }, { "epoch": 2.5242235010815888, "grad_norm": 3.1239376068115234, "learning_rate": 1.600009977638392e-05, "loss": 0.7185, "step": 15463 }, { "epoch": 2.5243867597240928, "grad_norm": 3.1636853218078613, "learning_rate": 1.599958685576377e-05, "loss": 0.6134, "step": 15464 }, { "epoch": 2.524550018366597, "grad_norm": 3.3326234817504883, "learning_rate": 1.5999073910481583e-05, "loss": 0.7448, "step": 15465 }, { "epoch": 2.5247132770091016, "grad_norm": 2.4220197200775146, "learning_rate": 1.5998560940539466e-05, "loss": 0.5889, "step": 15466 }, { "epoch": 2.524876535651606, "grad_norm": 3.209763526916504, "learning_rate": 1.599804794593953e-05, "loss": 0.5766, "step": 15467 }, { "epoch": 2.5250397942941105, "grad_norm": 2.9974422454833984, "learning_rate": 1.5997534926683887e-05, "loss": 0.5297, "step": 15468 }, { "epoch": 2.525203052936615, "grad_norm": 2.606282949447632, "learning_rate": 1.599702188277464e-05, "loss": 0.4901, "step": 15469 }, { "epoch": 2.525366311579119, "grad_norm": 2.746185541152954, "learning_rate": 1.5996508814213904e-05, "loss": 0.4614, "step": 15470 }, { "epoch": 2.5255295702216234, "grad_norm": 2.823509454727173, "learning_rate": 1.5995995721003783e-05, "loss": 0.5244, "step": 15471 }, { "epoch": 2.525692828864128, "grad_norm": 2.554511547088623, "learning_rate": 1.5995482603146385e-05, "loss": 0.5459, "step": 15472 }, { "epoch": 2.5258560875066323, "grad_norm": 2.380450487136841, "learning_rate": 1.5994969460643822e-05, "loss": 0.4746, "step": 15473 }, { "epoch": 2.5260193461491367, "grad_norm": 2.897911548614502, "learning_rate": 1.5994456293498208e-05, "loss": 0.6403, "step": 15474 }, { "epoch": 2.526182604791641, "grad_norm": 3.005178213119507, "learning_rate": 1.5993943101711642e-05, "loss": 0.4969, "step": 15475 }, { "epoch": 2.5263458634341456, "grad_norm": 2.852540969848633, "learning_rate": 1.5993429885286244e-05, "loss": 0.5296, "step": 15476 }, { "epoch": 2.52650912207665, "grad_norm": 3.2714412212371826, "learning_rate": 1.5992916644224116e-05, "loss": 0.5234, "step": 15477 }, { "epoch": 2.5266723807191545, "grad_norm": 3.0589897632598877, "learning_rate": 1.599240337852737e-05, "loss": 0.6411, "step": 15478 }, { "epoch": 2.526835639361659, "grad_norm": 3.012855291366577, "learning_rate": 1.5991890088198117e-05, "loss": 0.6448, "step": 15479 }, { "epoch": 2.5269988980041633, "grad_norm": 2.545374870300293, "learning_rate": 1.5991376773238466e-05, "loss": 0.4781, "step": 15480 }, { "epoch": 2.5271621566466673, "grad_norm": 3.187039852142334, "learning_rate": 1.5990863433650527e-05, "loss": 0.6584, "step": 15481 }, { "epoch": 2.5273254152891718, "grad_norm": 2.394272804260254, "learning_rate": 1.5990350069436412e-05, "loss": 0.4834, "step": 15482 }, { "epoch": 2.527488673931676, "grad_norm": 2.812887668609619, "learning_rate": 1.598983668059823e-05, "loss": 0.5715, "step": 15483 }, { "epoch": 2.5276519325741806, "grad_norm": 2.7111120223999023, "learning_rate": 1.5989323267138094e-05, "loss": 0.5128, "step": 15484 }, { "epoch": 2.527815191216685, "grad_norm": 2.8929555416107178, "learning_rate": 1.5988809829058105e-05, "loss": 0.6099, "step": 15485 }, { "epoch": 2.5279784498591895, "grad_norm": 3.1484508514404297, "learning_rate": 1.5988296366360384e-05, "loss": 0.5727, "step": 15486 }, { "epoch": 2.528141708501694, "grad_norm": 3.229975938796997, "learning_rate": 1.5987782879047036e-05, "loss": 0.6348, "step": 15487 }, { "epoch": 2.528304967144198, "grad_norm": 3.1684138774871826, "learning_rate": 1.5987269367120173e-05, "loss": 0.6665, "step": 15488 }, { "epoch": 2.5284682257867024, "grad_norm": 2.748579740524292, "learning_rate": 1.598675583058191e-05, "loss": 0.5633, "step": 15489 }, { "epoch": 2.528631484429207, "grad_norm": 3.003066062927246, "learning_rate": 1.5986242269434355e-05, "loss": 0.6133, "step": 15490 }, { "epoch": 2.5287947430717113, "grad_norm": 2.7057912349700928, "learning_rate": 1.5985728683679613e-05, "loss": 0.5868, "step": 15491 }, { "epoch": 2.5289580017142157, "grad_norm": 3.126089572906494, "learning_rate": 1.5985215073319804e-05, "loss": 0.6757, "step": 15492 }, { "epoch": 2.52912126035672, "grad_norm": 3.0058629512786865, "learning_rate": 1.5984701438357032e-05, "loss": 0.6061, "step": 15493 }, { "epoch": 2.5292845189992246, "grad_norm": 2.2632579803466797, "learning_rate": 1.5984187778793418e-05, "loss": 0.4888, "step": 15494 }, { "epoch": 2.529447777641729, "grad_norm": 3.034031867980957, "learning_rate": 1.5983674094631063e-05, "loss": 0.6599, "step": 15495 }, { "epoch": 2.5296110362842334, "grad_norm": 2.574920892715454, "learning_rate": 1.5983160385872087e-05, "loss": 0.5455, "step": 15496 }, { "epoch": 2.529774294926738, "grad_norm": 2.5911433696746826, "learning_rate": 1.5982646652518595e-05, "loss": 0.4745, "step": 15497 }, { "epoch": 2.5299375535692423, "grad_norm": 3.2534830570220947, "learning_rate": 1.5982132894572703e-05, "loss": 0.7319, "step": 15498 }, { "epoch": 2.5301008122117463, "grad_norm": 3.052396297454834, "learning_rate": 1.598161911203652e-05, "loss": 0.6533, "step": 15499 }, { "epoch": 2.5302640708542508, "grad_norm": 3.0162458419799805, "learning_rate": 1.598110530491216e-05, "loss": 0.7078, "step": 15500 }, { "epoch": 2.530427329496755, "grad_norm": 2.7110493183135986, "learning_rate": 1.5980591473201735e-05, "loss": 0.5955, "step": 15501 }, { "epoch": 2.5305905881392596, "grad_norm": 2.936410665512085, "learning_rate": 1.5980077616907357e-05, "loss": 0.5784, "step": 15502 }, { "epoch": 2.530753846781764, "grad_norm": 2.9577386379241943, "learning_rate": 1.5979563736031137e-05, "loss": 0.6426, "step": 15503 }, { "epoch": 2.5309171054242685, "grad_norm": 2.5271103382110596, "learning_rate": 1.597904983057519e-05, "loss": 0.4802, "step": 15504 }, { "epoch": 2.531080364066773, "grad_norm": 2.827094554901123, "learning_rate": 1.5978535900541627e-05, "loss": 0.6017, "step": 15505 }, { "epoch": 2.531243622709277, "grad_norm": 3.126824378967285, "learning_rate": 1.5978021945932557e-05, "loss": 0.5378, "step": 15506 }, { "epoch": 2.5314068813517814, "grad_norm": 2.7656145095825195, "learning_rate": 1.59775079667501e-05, "loss": 0.58, "step": 15507 }, { "epoch": 2.531570139994286, "grad_norm": 2.8451175689697266, "learning_rate": 1.5976993962996363e-05, "loss": 0.5112, "step": 15508 }, { "epoch": 2.5317333986367903, "grad_norm": 2.770643711090088, "learning_rate": 1.5976479934673462e-05, "loss": 0.5867, "step": 15509 }, { "epoch": 2.5318966572792947, "grad_norm": 2.6826508045196533, "learning_rate": 1.5975965881783502e-05, "loss": 0.4893, "step": 15510 }, { "epoch": 2.532059915921799, "grad_norm": 2.816969394683838, "learning_rate": 1.597545180432861e-05, "loss": 0.5405, "step": 15511 }, { "epoch": 2.5322231745643036, "grad_norm": 2.3439860343933105, "learning_rate": 1.597493770231089e-05, "loss": 0.466, "step": 15512 }, { "epoch": 2.532386433206808, "grad_norm": 2.6516947746276855, "learning_rate": 1.597442357573246e-05, "loss": 0.5421, "step": 15513 }, { "epoch": 2.5325496918493124, "grad_norm": 2.6349010467529297, "learning_rate": 1.5973909424595425e-05, "loss": 0.5961, "step": 15514 }, { "epoch": 2.532712950491817, "grad_norm": 2.812523365020752, "learning_rate": 1.5973395248901907e-05, "loss": 0.4627, "step": 15515 }, { "epoch": 2.5328762091343213, "grad_norm": 2.7702224254608154, "learning_rate": 1.5972881048654015e-05, "loss": 0.5152, "step": 15516 }, { "epoch": 2.5330394677768253, "grad_norm": 2.077138900756836, "learning_rate": 1.5972366823853867e-05, "loss": 0.4857, "step": 15517 }, { "epoch": 2.5332027264193298, "grad_norm": 2.975705862045288, "learning_rate": 1.5971852574503573e-05, "loss": 0.5103, "step": 15518 }, { "epoch": 2.533365985061834, "grad_norm": 2.994903326034546, "learning_rate": 1.597133830060525e-05, "loss": 0.5625, "step": 15519 }, { "epoch": 2.5335292437043386, "grad_norm": 2.6768367290496826, "learning_rate": 1.5970824002161007e-05, "loss": 0.5297, "step": 15520 }, { "epoch": 2.533692502346843, "grad_norm": 3.017944097518921, "learning_rate": 1.5970309679172962e-05, "loss": 0.5355, "step": 15521 }, { "epoch": 2.5338557609893475, "grad_norm": 3.791764974594116, "learning_rate": 1.596979533164323e-05, "loss": 0.6227, "step": 15522 }, { "epoch": 2.5340190196318515, "grad_norm": 2.687425374984741, "learning_rate": 1.596928095957392e-05, "loss": 0.6088, "step": 15523 }, { "epoch": 2.534182278274356, "grad_norm": 2.927424907684326, "learning_rate": 1.5968766562967152e-05, "loss": 0.5086, "step": 15524 }, { "epoch": 2.5343455369168604, "grad_norm": 2.8664796352386475, "learning_rate": 1.5968252141825038e-05, "loss": 0.5973, "step": 15525 }, { "epoch": 2.534508795559365, "grad_norm": 2.5114850997924805, "learning_rate": 1.5967737696149695e-05, "loss": 0.526, "step": 15526 }, { "epoch": 2.5346720542018693, "grad_norm": 2.7146666049957275, "learning_rate": 1.596722322594323e-05, "loss": 0.4693, "step": 15527 }, { "epoch": 2.5348353128443737, "grad_norm": 3.350968360900879, "learning_rate": 1.596670873120777e-05, "loss": 0.6447, "step": 15528 }, { "epoch": 2.534998571486878, "grad_norm": 2.927713394165039, "learning_rate": 1.596619421194542e-05, "loss": 0.5179, "step": 15529 }, { "epoch": 2.5351618301293826, "grad_norm": 2.9701151847839355, "learning_rate": 1.59656796681583e-05, "loss": 0.5586, "step": 15530 }, { "epoch": 2.535325088771887, "grad_norm": 3.1607038974761963, "learning_rate": 1.596516509984852e-05, "loss": 0.504, "step": 15531 }, { "epoch": 2.5354883474143914, "grad_norm": 2.921319007873535, "learning_rate": 1.5964650507018204e-05, "loss": 0.5672, "step": 15532 }, { "epoch": 2.535651606056896, "grad_norm": 2.7049927711486816, "learning_rate": 1.596413588966946e-05, "loss": 0.5676, "step": 15533 }, { "epoch": 2.5358148646994, "grad_norm": 2.922106981277466, "learning_rate": 1.5963621247804403e-05, "loss": 0.642, "step": 15534 }, { "epoch": 2.5359781233419043, "grad_norm": 3.2068967819213867, "learning_rate": 1.5963106581425153e-05, "loss": 0.6215, "step": 15535 }, { "epoch": 2.5361413819844087, "grad_norm": 3.18778657913208, "learning_rate": 1.5962591890533822e-05, "loss": 0.5803, "step": 15536 }, { "epoch": 2.536304640626913, "grad_norm": 2.403027057647705, "learning_rate": 1.596207717513253e-05, "loss": 0.4719, "step": 15537 }, { "epoch": 2.5364678992694176, "grad_norm": 2.7294838428497314, "learning_rate": 1.5961562435223386e-05, "loss": 0.5899, "step": 15538 }, { "epoch": 2.536631157911922, "grad_norm": 2.961467981338501, "learning_rate": 1.5961047670808515e-05, "loss": 0.5036, "step": 15539 }, { "epoch": 2.5367944165544265, "grad_norm": 2.8817696571350098, "learning_rate": 1.5960532881890025e-05, "loss": 0.5709, "step": 15540 }, { "epoch": 2.5369576751969305, "grad_norm": 2.4527523517608643, "learning_rate": 1.5960018068470033e-05, "loss": 0.5066, "step": 15541 }, { "epoch": 2.537120933839435, "grad_norm": 2.783479690551758, "learning_rate": 1.5959503230550662e-05, "loss": 0.5037, "step": 15542 }, { "epoch": 2.5372841924819394, "grad_norm": 2.968554735183716, "learning_rate": 1.595898836813402e-05, "loss": 0.6261, "step": 15543 }, { "epoch": 2.537447451124444, "grad_norm": 2.7564260959625244, "learning_rate": 1.595847348122223e-05, "loss": 0.5805, "step": 15544 }, { "epoch": 2.5376107097669482, "grad_norm": 3.246347665786743, "learning_rate": 1.5957958569817397e-05, "loss": 0.633, "step": 15545 }, { "epoch": 2.5377739684094527, "grad_norm": 3.220644950866699, "learning_rate": 1.595744363392165e-05, "loss": 0.6496, "step": 15546 }, { "epoch": 2.537937227051957, "grad_norm": 3.521280288696289, "learning_rate": 1.595692867353711e-05, "loss": 0.7961, "step": 15547 }, { "epoch": 2.5381004856944616, "grad_norm": 2.837019205093384, "learning_rate": 1.5956413688665878e-05, "loss": 0.5969, "step": 15548 }, { "epoch": 2.538263744336966, "grad_norm": 3.023190975189209, "learning_rate": 1.595589867931008e-05, "loss": 0.614, "step": 15549 }, { "epoch": 2.5384270029794704, "grad_norm": 2.6140716075897217, "learning_rate": 1.5955383645471828e-05, "loss": 0.5645, "step": 15550 }, { "epoch": 2.538590261621975, "grad_norm": 2.930220603942871, "learning_rate": 1.595486858715325e-05, "loss": 0.572, "step": 15551 }, { "epoch": 2.538753520264479, "grad_norm": 2.821718215942383, "learning_rate": 1.5954353504356447e-05, "loss": 0.6154, "step": 15552 }, { "epoch": 2.5389167789069833, "grad_norm": 2.688890218734741, "learning_rate": 1.595383839708355e-05, "loss": 0.5313, "step": 15553 }, { "epoch": 2.5390800375494877, "grad_norm": 2.5936684608459473, "learning_rate": 1.595332326533667e-05, "loss": 0.5113, "step": 15554 }, { "epoch": 2.539243296191992, "grad_norm": 2.8188364505767822, "learning_rate": 1.5952808109117928e-05, "loss": 0.664, "step": 15555 }, { "epoch": 2.5394065548344966, "grad_norm": 3.1041147708892822, "learning_rate": 1.595229292842944e-05, "loss": 0.5971, "step": 15556 }, { "epoch": 2.539569813477001, "grad_norm": 3.238032579421997, "learning_rate": 1.595177772327332e-05, "loss": 0.6445, "step": 15557 }, { "epoch": 2.539733072119505, "grad_norm": 3.146773099899292, "learning_rate": 1.595126249365169e-05, "loss": 0.6076, "step": 15558 }, { "epoch": 2.5398963307620095, "grad_norm": 2.1126418113708496, "learning_rate": 1.5950747239566672e-05, "loss": 0.407, "step": 15559 }, { "epoch": 2.540059589404514, "grad_norm": 2.6307191848754883, "learning_rate": 1.5950231961020373e-05, "loss": 0.5646, "step": 15560 }, { "epoch": 2.5402228480470184, "grad_norm": 2.8269004821777344, "learning_rate": 1.594971665801492e-05, "loss": 0.5584, "step": 15561 }, { "epoch": 2.540386106689523, "grad_norm": 2.670819044113159, "learning_rate": 1.5949201330552425e-05, "loss": 0.5498, "step": 15562 }, { "epoch": 2.5405493653320272, "grad_norm": 2.5402297973632812, "learning_rate": 1.5948685978635014e-05, "loss": 0.4873, "step": 15563 }, { "epoch": 2.5407126239745317, "grad_norm": 2.875962734222412, "learning_rate": 1.59481706022648e-05, "loss": 0.6275, "step": 15564 }, { "epoch": 2.540875882617036, "grad_norm": 2.9547133445739746, "learning_rate": 1.59476552014439e-05, "loss": 0.5709, "step": 15565 }, { "epoch": 2.5410391412595406, "grad_norm": 3.5204524993896484, "learning_rate": 1.594713977617444e-05, "loss": 0.6058, "step": 15566 }, { "epoch": 2.541202399902045, "grad_norm": 3.1525611877441406, "learning_rate": 1.5946624326458532e-05, "loss": 0.5895, "step": 15567 }, { "epoch": 2.5413656585445494, "grad_norm": 2.786881923675537, "learning_rate": 1.5946108852298295e-05, "loss": 0.556, "step": 15568 }, { "epoch": 2.5415289171870534, "grad_norm": 2.8303375244140625, "learning_rate": 1.594559335369585e-05, "loss": 0.5286, "step": 15569 }, { "epoch": 2.541692175829558, "grad_norm": 2.8572869300842285, "learning_rate": 1.5945077830653317e-05, "loss": 0.5541, "step": 15570 }, { "epoch": 2.5418554344720623, "grad_norm": 2.927039384841919, "learning_rate": 1.5944562283172814e-05, "loss": 0.5996, "step": 15571 }, { "epoch": 2.5420186931145667, "grad_norm": 2.8898041248321533, "learning_rate": 1.594404671125646e-05, "loss": 0.5802, "step": 15572 }, { "epoch": 2.542181951757071, "grad_norm": 2.8883004188537598, "learning_rate": 1.594353111490637e-05, "loss": 0.5234, "step": 15573 }, { "epoch": 2.5423452103995756, "grad_norm": 3.0507442951202393, "learning_rate": 1.5943015494124673e-05, "loss": 0.6948, "step": 15574 }, { "epoch": 2.54250846904208, "grad_norm": 2.8617916107177734, "learning_rate": 1.5942499848913485e-05, "loss": 0.5735, "step": 15575 }, { "epoch": 2.542671727684584, "grad_norm": 2.791842460632324, "learning_rate": 1.594198417927492e-05, "loss": 0.5303, "step": 15576 }, { "epoch": 2.5428349863270885, "grad_norm": 2.487393856048584, "learning_rate": 1.5941468485211103e-05, "loss": 0.5671, "step": 15577 }, { "epoch": 2.542998244969593, "grad_norm": 2.988114356994629, "learning_rate": 1.594095276672415e-05, "loss": 0.6053, "step": 15578 }, { "epoch": 2.5431615036120974, "grad_norm": 3.3333489894866943, "learning_rate": 1.594043702381619e-05, "loss": 0.6094, "step": 15579 }, { "epoch": 2.543324762254602, "grad_norm": 2.610140562057495, "learning_rate": 1.593992125648933e-05, "loss": 0.4963, "step": 15580 }, { "epoch": 2.5434880208971062, "grad_norm": 2.4572830200195312, "learning_rate": 1.59394054647457e-05, "loss": 0.4919, "step": 15581 }, { "epoch": 2.5436512795396107, "grad_norm": 3.1023337841033936, "learning_rate": 1.5938889648587416e-05, "loss": 0.5805, "step": 15582 }, { "epoch": 2.543814538182115, "grad_norm": 2.945281982421875, "learning_rate": 1.59383738080166e-05, "loss": 0.6132, "step": 15583 }, { "epoch": 2.5439777968246196, "grad_norm": 3.0759682655334473, "learning_rate": 1.5937857943035368e-05, "loss": 0.5609, "step": 15584 }, { "epoch": 2.544141055467124, "grad_norm": 2.521930694580078, "learning_rate": 1.5937342053645845e-05, "loss": 0.5533, "step": 15585 }, { "epoch": 2.5443043141096284, "grad_norm": 3.0208256244659424, "learning_rate": 1.593682613985015e-05, "loss": 0.5897, "step": 15586 }, { "epoch": 2.5444675727521324, "grad_norm": 3.1413044929504395, "learning_rate": 1.5936310201650412e-05, "loss": 0.6092, "step": 15587 }, { "epoch": 2.544630831394637, "grad_norm": 2.9754838943481445, "learning_rate": 1.5935794239048735e-05, "loss": 0.5793, "step": 15588 }, { "epoch": 2.5447940900371413, "grad_norm": 3.220318078994751, "learning_rate": 1.5935278252047253e-05, "loss": 0.5984, "step": 15589 }, { "epoch": 2.5449573486796457, "grad_norm": 2.4565651416778564, "learning_rate": 1.5934762240648086e-05, "loss": 0.5678, "step": 15590 }, { "epoch": 2.54512060732215, "grad_norm": 2.589388847351074, "learning_rate": 1.593424620485335e-05, "loss": 0.4889, "step": 15591 }, { "epoch": 2.5452838659646546, "grad_norm": 2.4676969051361084, "learning_rate": 1.593373014466517e-05, "loss": 0.5196, "step": 15592 }, { "epoch": 2.545447124607159, "grad_norm": 2.438608169555664, "learning_rate": 1.593321406008566e-05, "loss": 0.4453, "step": 15593 }, { "epoch": 2.545610383249663, "grad_norm": 2.8935225009918213, "learning_rate": 1.5932697951116955e-05, "loss": 0.5201, "step": 15594 }, { "epoch": 2.5457736418921675, "grad_norm": 3.284449815750122, "learning_rate": 1.5932181817761163e-05, "loss": 0.7027, "step": 15595 }, { "epoch": 2.545936900534672, "grad_norm": 2.8987925052642822, "learning_rate": 1.5931665660020415e-05, "loss": 0.5604, "step": 15596 }, { "epoch": 2.5461001591771764, "grad_norm": 2.9452309608459473, "learning_rate": 1.5931149477896827e-05, "loss": 0.5405, "step": 15597 }, { "epoch": 2.546263417819681, "grad_norm": 3.1911978721618652, "learning_rate": 1.5930633271392524e-05, "loss": 0.6614, "step": 15598 }, { "epoch": 2.5464266764621852, "grad_norm": 2.940249443054199, "learning_rate": 1.5930117040509627e-05, "loss": 0.5494, "step": 15599 }, { "epoch": 2.5465899351046897, "grad_norm": 3.4324512481689453, "learning_rate": 1.592960078525026e-05, "loss": 0.567, "step": 15600 }, { "epoch": 2.546753193747194, "grad_norm": 2.6304118633270264, "learning_rate": 1.592908450561654e-05, "loss": 0.4993, "step": 15601 }, { "epoch": 2.5469164523896985, "grad_norm": 3.1051056385040283, "learning_rate": 1.5928568201610593e-05, "loss": 0.5319, "step": 15602 }, { "epoch": 2.547079711032203, "grad_norm": 2.479951858520508, "learning_rate": 1.5928051873234543e-05, "loss": 0.4382, "step": 15603 }, { "epoch": 2.5472429696747074, "grad_norm": 2.79563570022583, "learning_rate": 1.5927535520490506e-05, "loss": 0.5432, "step": 15604 }, { "epoch": 2.5474062283172114, "grad_norm": 2.8305482864379883, "learning_rate": 1.592701914338061e-05, "loss": 0.5183, "step": 15605 }, { "epoch": 2.547569486959716, "grad_norm": 2.4493720531463623, "learning_rate": 1.5926502741906983e-05, "loss": 0.4692, "step": 15606 }, { "epoch": 2.5477327456022203, "grad_norm": 2.8141226768493652, "learning_rate": 1.5925986316071735e-05, "loss": 0.6129, "step": 15607 }, { "epoch": 2.5478960042447247, "grad_norm": 3.3510963916778564, "learning_rate": 1.5925469865876995e-05, "loss": 1.3535, "step": 15608 }, { "epoch": 2.548059262887229, "grad_norm": 2.781989097595215, "learning_rate": 1.5924953391324884e-05, "loss": 0.5238, "step": 15609 }, { "epoch": 2.5482225215297336, "grad_norm": 2.623755931854248, "learning_rate": 1.592443689241753e-05, "loss": 0.5212, "step": 15610 }, { "epoch": 2.5483857801722376, "grad_norm": 2.6406450271606445, "learning_rate": 1.5923920369157053e-05, "loss": 0.6437, "step": 15611 }, { "epoch": 2.548549038814742, "grad_norm": 2.7153759002685547, "learning_rate": 1.5923403821545576e-05, "loss": 0.5469, "step": 15612 }, { "epoch": 2.5487122974572465, "grad_norm": 2.569007158279419, "learning_rate": 1.592288724958522e-05, "loss": 0.4603, "step": 15613 }, { "epoch": 2.548875556099751, "grad_norm": 2.141728639602661, "learning_rate": 1.592237065327811e-05, "loss": 0.4523, "step": 15614 }, { "epoch": 2.5490388147422554, "grad_norm": 3.237586259841919, "learning_rate": 1.5921854032626376e-05, "loss": 0.522, "step": 15615 }, { "epoch": 2.54920207338476, "grad_norm": 3.0000646114349365, "learning_rate": 1.5921337387632134e-05, "loss": 0.618, "step": 15616 }, { "epoch": 2.5493653320272642, "grad_norm": 3.0299532413482666, "learning_rate": 1.592082071829751e-05, "loss": 0.6671, "step": 15617 }, { "epoch": 2.5495285906697687, "grad_norm": 3.1211812496185303, "learning_rate": 1.5920304024624625e-05, "loss": 0.7947, "step": 15618 }, { "epoch": 2.549691849312273, "grad_norm": 2.7458362579345703, "learning_rate": 1.5919787306615607e-05, "loss": 0.6401, "step": 15619 }, { "epoch": 2.5498551079547775, "grad_norm": 2.7009029388427734, "learning_rate": 1.5919270564272583e-05, "loss": 0.589, "step": 15620 }, { "epoch": 2.550018366597282, "grad_norm": 2.6681149005889893, "learning_rate": 1.5918753797597667e-05, "loss": 0.5387, "step": 15621 }, { "epoch": 2.550181625239786, "grad_norm": 2.7429449558258057, "learning_rate": 1.591823700659299e-05, "loss": 0.488, "step": 15622 }, { "epoch": 2.5503448838822904, "grad_norm": 2.7653584480285645, "learning_rate": 1.5917720191260676e-05, "loss": 0.5707, "step": 15623 }, { "epoch": 2.550508142524795, "grad_norm": 2.680063247680664, "learning_rate": 1.591720335160285e-05, "loss": 0.5129, "step": 15624 }, { "epoch": 2.5506714011672993, "grad_norm": 3.1848690509796143, "learning_rate": 1.5916686487621636e-05, "loss": 0.6946, "step": 15625 }, { "epoch": 2.5508346598098037, "grad_norm": 3.0636579990386963, "learning_rate": 1.5916169599319155e-05, "loss": 0.5079, "step": 15626 }, { "epoch": 2.550997918452308, "grad_norm": 3.192417621612549, "learning_rate": 1.5915652686697538e-05, "loss": 0.533, "step": 15627 }, { "epoch": 2.5511611770948126, "grad_norm": 2.822068691253662, "learning_rate": 1.5915135749758903e-05, "loss": 0.5468, "step": 15628 }, { "epoch": 2.5513244357373166, "grad_norm": 2.5411882400512695, "learning_rate": 1.591461878850538e-05, "loss": 0.6192, "step": 15629 }, { "epoch": 2.551487694379821, "grad_norm": 2.8624510765075684, "learning_rate": 1.591410180293909e-05, "loss": 0.5675, "step": 15630 }, { "epoch": 2.5516509530223255, "grad_norm": 2.7964165210723877, "learning_rate": 1.5913584793062165e-05, "loss": 0.5932, "step": 15631 }, { "epoch": 2.55181421166483, "grad_norm": 2.3674063682556152, "learning_rate": 1.5913067758876724e-05, "loss": 0.4908, "step": 15632 }, { "epoch": 2.5519774703073344, "grad_norm": 2.5083563327789307, "learning_rate": 1.5912550700384894e-05, "loss": 0.4568, "step": 15633 }, { "epoch": 2.552140728949839, "grad_norm": 3.3335561752319336, "learning_rate": 1.59120336175888e-05, "loss": 0.6448, "step": 15634 }, { "epoch": 2.5523039875923432, "grad_norm": 3.186335563659668, "learning_rate": 1.591151651049057e-05, "loss": 0.6229, "step": 15635 }, { "epoch": 2.5524672462348477, "grad_norm": 2.9505512714385986, "learning_rate": 1.5910999379092328e-05, "loss": 0.6012, "step": 15636 }, { "epoch": 2.552630504877352, "grad_norm": 3.0017588138580322, "learning_rate": 1.59104822233962e-05, "loss": 0.6603, "step": 15637 }, { "epoch": 2.5527937635198565, "grad_norm": 2.8491125106811523, "learning_rate": 1.590996504340431e-05, "loss": 0.6063, "step": 15638 }, { "epoch": 2.552957022162361, "grad_norm": 2.321451425552368, "learning_rate": 1.5909447839118782e-05, "loss": 0.5247, "step": 15639 }, { "epoch": 2.553120280804865, "grad_norm": 2.492189884185791, "learning_rate": 1.590893061054175e-05, "loss": 0.4684, "step": 15640 }, { "epoch": 2.5532835394473694, "grad_norm": 2.856875419616699, "learning_rate": 1.5908413357675333e-05, "loss": 0.5529, "step": 15641 }, { "epoch": 2.553446798089874, "grad_norm": 2.5557518005371094, "learning_rate": 1.590789608052166e-05, "loss": 0.4694, "step": 15642 }, { "epoch": 2.5536100567323783, "grad_norm": 2.7810375690460205, "learning_rate": 1.5907378779082856e-05, "loss": 0.6573, "step": 15643 }, { "epoch": 2.5537733153748827, "grad_norm": 3.3113443851470947, "learning_rate": 1.5906861453361048e-05, "loss": 0.6673, "step": 15644 }, { "epoch": 2.553936574017387, "grad_norm": 2.9159481525421143, "learning_rate": 1.5906344103358366e-05, "loss": 0.604, "step": 15645 }, { "epoch": 2.554099832659891, "grad_norm": 3.173769474029541, "learning_rate": 1.5905826729076932e-05, "loss": 0.5947, "step": 15646 }, { "epoch": 2.5542630913023956, "grad_norm": 3.006438732147217, "learning_rate": 1.5905309330518876e-05, "loss": 0.5712, "step": 15647 }, { "epoch": 2.5544263499449, "grad_norm": 3.2315118312835693, "learning_rate": 1.590479190768632e-05, "loss": 0.6243, "step": 15648 }, { "epoch": 2.5545896085874045, "grad_norm": 3.7147769927978516, "learning_rate": 1.5904274460581397e-05, "loss": 0.8002, "step": 15649 }, { "epoch": 2.554752867229909, "grad_norm": 2.8888466358184814, "learning_rate": 1.5903756989206233e-05, "loss": 0.5926, "step": 15650 }, { "epoch": 2.5549161258724133, "grad_norm": 3.113105297088623, "learning_rate": 1.5903239493562948e-05, "loss": 0.6265, "step": 15651 }, { "epoch": 2.555079384514918, "grad_norm": 2.7863805294036865, "learning_rate": 1.5902721973653677e-05, "loss": 0.5636, "step": 15652 }, { "epoch": 2.5552426431574222, "grad_norm": 2.8386178016662598, "learning_rate": 1.5902204429480544e-05, "loss": 0.5085, "step": 15653 }, { "epoch": 2.5554059017999267, "grad_norm": 2.920224666595459, "learning_rate": 1.5901686861045676e-05, "loss": 0.6062, "step": 15654 }, { "epoch": 2.555569160442431, "grad_norm": 2.8326575756073, "learning_rate": 1.5901169268351203e-05, "loss": 0.5644, "step": 15655 }, { "epoch": 2.5557324190849355, "grad_norm": 2.5335824489593506, "learning_rate": 1.5900651651399254e-05, "loss": 0.5309, "step": 15656 }, { "epoch": 2.5558956777274395, "grad_norm": 2.8627662658691406, "learning_rate": 1.590013401019195e-05, "loss": 0.5854, "step": 15657 }, { "epoch": 2.556058936369944, "grad_norm": 2.7356865406036377, "learning_rate": 1.5899616344731425e-05, "loss": 0.523, "step": 15658 }, { "epoch": 2.5562221950124484, "grad_norm": 2.674192190170288, "learning_rate": 1.5899098655019803e-05, "loss": 0.4384, "step": 15659 }, { "epoch": 2.556385453654953, "grad_norm": 2.763798475265503, "learning_rate": 1.5898580941059218e-05, "loss": 0.5242, "step": 15660 }, { "epoch": 2.5565487122974573, "grad_norm": 2.555253267288208, "learning_rate": 1.5898063202851792e-05, "loss": 0.4763, "step": 15661 }, { "epoch": 2.5567119709399617, "grad_norm": 2.897761821746826, "learning_rate": 1.5897545440399654e-05, "loss": 0.653, "step": 15662 }, { "epoch": 2.556875229582466, "grad_norm": 2.4941394329071045, "learning_rate": 1.5897027653704932e-05, "loss": 0.4528, "step": 15663 }, { "epoch": 2.55703848822497, "grad_norm": 2.7916200160980225, "learning_rate": 1.589650984276976e-05, "loss": 0.5521, "step": 15664 }, { "epoch": 2.5572017468674746, "grad_norm": 3.0289292335510254, "learning_rate": 1.5895992007596262e-05, "loss": 0.5816, "step": 15665 }, { "epoch": 2.557365005509979, "grad_norm": 2.9774398803710938, "learning_rate": 1.5895474148186564e-05, "loss": 0.5663, "step": 15666 }, { "epoch": 2.5575282641524835, "grad_norm": 2.913137435913086, "learning_rate": 1.58949562645428e-05, "loss": 0.5404, "step": 15667 }, { "epoch": 2.557691522794988, "grad_norm": 2.950200319290161, "learning_rate": 1.5894438356667097e-05, "loss": 0.584, "step": 15668 }, { "epoch": 2.5578547814374923, "grad_norm": 3.031343460083008, "learning_rate": 1.5893920424561585e-05, "loss": 0.6025, "step": 15669 }, { "epoch": 2.558018040079997, "grad_norm": 3.320427894592285, "learning_rate": 1.589340246822839e-05, "loss": 0.7217, "step": 15670 }, { "epoch": 2.558181298722501, "grad_norm": 3.114600658416748, "learning_rate": 1.5892884487669642e-05, "loss": 0.5943, "step": 15671 }, { "epoch": 2.5583445573650057, "grad_norm": 3.06886625289917, "learning_rate": 1.5892366482887474e-05, "loss": 0.6377, "step": 15672 }, { "epoch": 2.55850781600751, "grad_norm": 3.0129525661468506, "learning_rate": 1.5891848453884008e-05, "loss": 0.6928, "step": 15673 }, { "epoch": 2.5586710746500145, "grad_norm": 2.7551379203796387, "learning_rate": 1.589133040066138e-05, "loss": 0.5149, "step": 15674 }, { "epoch": 2.5588343332925185, "grad_norm": 2.680293560028076, "learning_rate": 1.5890812323221715e-05, "loss": 0.5544, "step": 15675 }, { "epoch": 2.558997591935023, "grad_norm": 3.39955735206604, "learning_rate": 1.589029422156715e-05, "loss": 0.7732, "step": 15676 }, { "epoch": 2.5591608505775274, "grad_norm": 2.987386465072632, "learning_rate": 1.5889776095699804e-05, "loss": 0.4955, "step": 15677 }, { "epoch": 2.559324109220032, "grad_norm": 3.022918224334717, "learning_rate": 1.5889257945621815e-05, "loss": 0.59, "step": 15678 }, { "epoch": 2.5594873678625363, "grad_norm": 3.155518054962158, "learning_rate": 1.588873977133531e-05, "loss": 0.577, "step": 15679 }, { "epoch": 2.5596506265050407, "grad_norm": 3.097137212753296, "learning_rate": 1.588822157284242e-05, "loss": 0.6762, "step": 15680 }, { "epoch": 2.559813885147545, "grad_norm": 2.4432871341705322, "learning_rate": 1.5887703350145272e-05, "loss": 0.5289, "step": 15681 }, { "epoch": 2.559977143790049, "grad_norm": 3.0448122024536133, "learning_rate": 1.5887185103246e-05, "loss": 0.6095, "step": 15682 }, { "epoch": 2.5601404024325536, "grad_norm": 3.2562294006347656, "learning_rate": 1.5886666832146735e-05, "loss": 0.641, "step": 15683 }, { "epoch": 2.560303661075058, "grad_norm": 2.721606969833374, "learning_rate": 1.5886148536849606e-05, "loss": 0.5566, "step": 15684 }, { "epoch": 2.5604669197175625, "grad_norm": 2.6007235050201416, "learning_rate": 1.588563021735674e-05, "loss": 0.5833, "step": 15685 }, { "epoch": 2.560630178360067, "grad_norm": 3.0179102420806885, "learning_rate": 1.588511187367027e-05, "loss": 0.646, "step": 15686 }, { "epoch": 2.5607934370025713, "grad_norm": 3.411118507385254, "learning_rate": 1.5884593505792332e-05, "loss": 0.6342, "step": 15687 }, { "epoch": 2.5609566956450758, "grad_norm": 2.522927761077881, "learning_rate": 1.5884075113725047e-05, "loss": 0.5623, "step": 15688 }, { "epoch": 2.56111995428758, "grad_norm": 2.377925157546997, "learning_rate": 1.5883556697470552e-05, "loss": 0.4736, "step": 15689 }, { "epoch": 2.5612832129300847, "grad_norm": 3.0149974822998047, "learning_rate": 1.588303825703098e-05, "loss": 0.5993, "step": 15690 }, { "epoch": 2.561446471572589, "grad_norm": 2.9431753158569336, "learning_rate": 1.5882519792408454e-05, "loss": 0.6382, "step": 15691 }, { "epoch": 2.5616097302150935, "grad_norm": 3.0321381092071533, "learning_rate": 1.5882001303605113e-05, "loss": 0.7566, "step": 15692 }, { "epoch": 2.5617729888575975, "grad_norm": 3.0824217796325684, "learning_rate": 1.588148279062309e-05, "loss": 0.593, "step": 15693 }, { "epoch": 2.561936247500102, "grad_norm": 2.7533323764801025, "learning_rate": 1.5880964253464507e-05, "loss": 0.5686, "step": 15694 }, { "epoch": 2.5620995061426064, "grad_norm": 2.4372284412384033, "learning_rate": 1.58804456921315e-05, "loss": 0.519, "step": 15695 }, { "epoch": 2.562262764785111, "grad_norm": 2.84088397026062, "learning_rate": 1.5879927106626202e-05, "loss": 0.5216, "step": 15696 }, { "epoch": 2.5624260234276153, "grad_norm": 2.5363144874572754, "learning_rate": 1.5879408496950745e-05, "loss": 0.5815, "step": 15697 }, { "epoch": 2.5625892820701197, "grad_norm": 2.4885501861572266, "learning_rate": 1.5878889863107262e-05, "loss": 0.4679, "step": 15698 }, { "epoch": 2.5627525407126237, "grad_norm": 2.866522789001465, "learning_rate": 1.587837120509788e-05, "loss": 0.558, "step": 15699 }, { "epoch": 2.562915799355128, "grad_norm": 2.4896512031555176, "learning_rate": 1.5877852522924733e-05, "loss": 0.5153, "step": 15700 }, { "epoch": 2.5630790579976326, "grad_norm": 2.711503028869629, "learning_rate": 1.587733381658995e-05, "loss": 0.4677, "step": 15701 }, { "epoch": 2.563242316640137, "grad_norm": 3.184856414794922, "learning_rate": 1.5876815086095672e-05, "loss": 0.6845, "step": 15702 }, { "epoch": 2.5634055752826415, "grad_norm": 2.56231689453125, "learning_rate": 1.5876296331444028e-05, "loss": 0.5516, "step": 15703 }, { "epoch": 2.563568833925146, "grad_norm": 3.263138771057129, "learning_rate": 1.5875777552637148e-05, "loss": 0.6223, "step": 15704 }, { "epoch": 2.5637320925676503, "grad_norm": 3.388326644897461, "learning_rate": 1.587525874967716e-05, "loss": 0.6612, "step": 15705 }, { "epoch": 2.5638953512101548, "grad_norm": 3.402106285095215, "learning_rate": 1.5874739922566208e-05, "loss": 0.6561, "step": 15706 }, { "epoch": 2.564058609852659, "grad_norm": 3.1607666015625, "learning_rate": 1.5874221071306414e-05, "loss": 0.6909, "step": 15707 }, { "epoch": 2.5642218684951636, "grad_norm": 2.6454896926879883, "learning_rate": 1.5873702195899916e-05, "loss": 0.5265, "step": 15708 }, { "epoch": 2.564385127137668, "grad_norm": 3.2762839794158936, "learning_rate": 1.5873183296348848e-05, "loss": 0.6866, "step": 15709 }, { "epoch": 2.564548385780172, "grad_norm": 2.503110647201538, "learning_rate": 1.5872664372655343e-05, "loss": 0.5449, "step": 15710 }, { "epoch": 2.5647116444226765, "grad_norm": 3.162259817123413, "learning_rate": 1.5872145424821524e-05, "loss": 0.6463, "step": 15711 }, { "epoch": 2.564874903065181, "grad_norm": 2.9942047595977783, "learning_rate": 1.587162645284954e-05, "loss": 0.528, "step": 15712 }, { "epoch": 2.5650381617076854, "grad_norm": 2.738142251968384, "learning_rate": 1.5871107456741514e-05, "loss": 0.5018, "step": 15713 }, { "epoch": 2.56520142035019, "grad_norm": 2.7142140865325928, "learning_rate": 1.5870588436499582e-05, "loss": 0.5853, "step": 15714 }, { "epoch": 2.5653646789926943, "grad_norm": 2.874732732772827, "learning_rate": 1.587006939212588e-05, "loss": 0.5662, "step": 15715 }, { "epoch": 2.5655279376351987, "grad_norm": 2.5450925827026367, "learning_rate": 1.5869550323622535e-05, "loss": 0.5741, "step": 15716 }, { "epoch": 2.5656911962777027, "grad_norm": 2.6801223754882812, "learning_rate": 1.5869031230991687e-05, "loss": 0.5336, "step": 15717 }, { "epoch": 2.565854454920207, "grad_norm": 2.6368408203125, "learning_rate": 1.586851211423547e-05, "loss": 0.564, "step": 15718 }, { "epoch": 2.5660177135627116, "grad_norm": 2.9067189693450928, "learning_rate": 1.586799297335601e-05, "loss": 0.5632, "step": 15719 }, { "epoch": 2.566180972205216, "grad_norm": 2.8032307624816895, "learning_rate": 1.5867473808355454e-05, "loss": 0.6094, "step": 15720 }, { "epoch": 2.5663442308477205, "grad_norm": 2.5983943939208984, "learning_rate": 1.5866954619235925e-05, "loss": 0.4886, "step": 15721 }, { "epoch": 2.566507489490225, "grad_norm": 3.3335492610931396, "learning_rate": 1.586643540599956e-05, "loss": 0.6934, "step": 15722 }, { "epoch": 2.5666707481327293, "grad_norm": 2.6549739837646484, "learning_rate": 1.5865916168648495e-05, "loss": 0.5795, "step": 15723 }, { "epoch": 2.5668340067752338, "grad_norm": 3.442060708999634, "learning_rate": 1.5865396907184868e-05, "loss": 0.7347, "step": 15724 }, { "epoch": 2.566997265417738, "grad_norm": 3.048736572265625, "learning_rate": 1.58648776216108e-05, "loss": 0.5808, "step": 15725 }, { "epoch": 2.5671605240602426, "grad_norm": 2.910565137863159, "learning_rate": 1.586435831192844e-05, "loss": 0.5689, "step": 15726 }, { "epoch": 2.567323782702747, "grad_norm": 2.649531841278076, "learning_rate": 1.5863838978139918e-05, "loss": 0.5226, "step": 15727 }, { "epoch": 2.567487041345251, "grad_norm": 2.6832115650177, "learning_rate": 1.5863319620247366e-05, "loss": 0.5844, "step": 15728 }, { "epoch": 2.5676502999877555, "grad_norm": 2.376314401626587, "learning_rate": 1.5862800238252923e-05, "loss": 0.5169, "step": 15729 }, { "epoch": 2.56781355863026, "grad_norm": 3.1341969966888428, "learning_rate": 1.5862280832158718e-05, "loss": 0.6463, "step": 15730 }, { "epoch": 2.5679768172727644, "grad_norm": 2.590333938598633, "learning_rate": 1.5861761401966893e-05, "loss": 0.5721, "step": 15731 }, { "epoch": 2.568140075915269, "grad_norm": 2.7527811527252197, "learning_rate": 1.586124194767958e-05, "loss": 0.5544, "step": 15732 }, { "epoch": 2.5683033345577733, "grad_norm": 3.0972909927368164, "learning_rate": 1.5860722469298915e-05, "loss": 0.5651, "step": 15733 }, { "epoch": 2.5684665932002777, "grad_norm": 2.5398001670837402, "learning_rate": 1.5860202966827035e-05, "loss": 0.4799, "step": 15734 }, { "epoch": 2.5686298518427817, "grad_norm": 2.681824207305908, "learning_rate": 1.5859683440266066e-05, "loss": 0.567, "step": 15735 }, { "epoch": 2.568793110485286, "grad_norm": 3.2499301433563232, "learning_rate": 1.585916388961816e-05, "loss": 0.6761, "step": 15736 }, { "epoch": 2.5689563691277906, "grad_norm": 3.5157699584960938, "learning_rate": 1.5858644314885438e-05, "loss": 0.7338, "step": 15737 }, { "epoch": 2.569119627770295, "grad_norm": 2.8854031562805176, "learning_rate": 1.585812471607004e-05, "loss": 0.6013, "step": 15738 }, { "epoch": 2.5692828864127994, "grad_norm": 3.023226261138916, "learning_rate": 1.5857605093174108e-05, "loss": 0.5755, "step": 15739 }, { "epoch": 2.569446145055304, "grad_norm": 3.28991436958313, "learning_rate": 1.5857085446199772e-05, "loss": 0.6077, "step": 15740 }, { "epoch": 2.5696094036978083, "grad_norm": 2.6458897590637207, "learning_rate": 1.585656577514917e-05, "loss": 0.5405, "step": 15741 }, { "epoch": 2.5697726623403128, "grad_norm": 3.3130269050598145, "learning_rate": 1.585604608002443e-05, "loss": 0.6744, "step": 15742 }, { "epoch": 2.569935920982817, "grad_norm": 3.1288654804229736, "learning_rate": 1.585552636082771e-05, "loss": 0.573, "step": 15743 }, { "epoch": 2.5700991796253216, "grad_norm": 2.932492733001709, "learning_rate": 1.585500661756112e-05, "loss": 0.5517, "step": 15744 }, { "epoch": 2.570262438267826, "grad_norm": 2.910283327102661, "learning_rate": 1.5854486850226814e-05, "loss": 0.5556, "step": 15745 }, { "epoch": 2.57042569691033, "grad_norm": 2.9272782802581787, "learning_rate": 1.5853967058826923e-05, "loss": 0.5995, "step": 15746 }, { "epoch": 2.5705889555528345, "grad_norm": 3.1720387935638428, "learning_rate": 1.585344724336358e-05, "loss": 0.5788, "step": 15747 }, { "epoch": 2.570752214195339, "grad_norm": 2.7658016681671143, "learning_rate": 1.585292740383893e-05, "loss": 0.6236, "step": 15748 }, { "epoch": 2.5709154728378434, "grad_norm": 2.771047830581665, "learning_rate": 1.5852407540255103e-05, "loss": 0.54, "step": 15749 }, { "epoch": 2.571078731480348, "grad_norm": 2.7499277591705322, "learning_rate": 1.5851887652614238e-05, "loss": 0.5281, "step": 15750 }, { "epoch": 2.5712419901228523, "grad_norm": 2.681758403778076, "learning_rate": 1.5851367740918474e-05, "loss": 0.5332, "step": 15751 }, { "epoch": 2.5714052487653563, "grad_norm": 2.1021060943603516, "learning_rate": 1.5850847805169946e-05, "loss": 0.4322, "step": 15752 }, { "epoch": 2.5715685074078607, "grad_norm": 2.9523351192474365, "learning_rate": 1.5850327845370794e-05, "loss": 0.5714, "step": 15753 }, { "epoch": 2.571731766050365, "grad_norm": 2.751269817352295, "learning_rate": 1.584980786152315e-05, "loss": 0.5956, "step": 15754 }, { "epoch": 2.5718950246928696, "grad_norm": 3.220691204071045, "learning_rate": 1.5849287853629157e-05, "loss": 0.6453, "step": 15755 }, { "epoch": 2.572058283335374, "grad_norm": 2.4140207767486572, "learning_rate": 1.5848767821690947e-05, "loss": 0.6034, "step": 15756 }, { "epoch": 2.5722215419778784, "grad_norm": 2.731849431991577, "learning_rate": 1.5848247765710666e-05, "loss": 0.5527, "step": 15757 }, { "epoch": 2.572384800620383, "grad_norm": 3.016887903213501, "learning_rate": 1.5847727685690445e-05, "loss": 0.5659, "step": 15758 }, { "epoch": 2.5725480592628873, "grad_norm": 2.2877941131591797, "learning_rate": 1.584720758163242e-05, "loss": 0.4637, "step": 15759 }, { "epoch": 2.5727113179053918, "grad_norm": 2.7991957664489746, "learning_rate": 1.5846687453538736e-05, "loss": 0.6301, "step": 15760 }, { "epoch": 2.572874576547896, "grad_norm": 2.956998109817505, "learning_rate": 1.5846167301411525e-05, "loss": 0.6192, "step": 15761 }, { "epoch": 2.5730378351904006, "grad_norm": 2.894676923751831, "learning_rate": 1.584564712525293e-05, "loss": 0.6317, "step": 15762 }, { "epoch": 2.5732010938329046, "grad_norm": 2.832871437072754, "learning_rate": 1.5845126925065088e-05, "loss": 0.4946, "step": 15763 }, { "epoch": 2.573364352475409, "grad_norm": 2.8265295028686523, "learning_rate": 1.5844606700850135e-05, "loss": 0.5082, "step": 15764 }, { "epoch": 2.5735276111179135, "grad_norm": 2.6181609630584717, "learning_rate": 1.584408645261021e-05, "loss": 0.5318, "step": 15765 }, { "epoch": 2.573690869760418, "grad_norm": 2.6993327140808105, "learning_rate": 1.5843566180347455e-05, "loss": 0.4709, "step": 15766 }, { "epoch": 2.5738541284029224, "grad_norm": 3.246365547180176, "learning_rate": 1.5843045884064e-05, "loss": 0.6004, "step": 15767 }, { "epoch": 2.574017387045427, "grad_norm": 3.7005419731140137, "learning_rate": 1.5842525563761997e-05, "loss": 0.7134, "step": 15768 }, { "epoch": 2.5741806456879313, "grad_norm": 3.095392942428589, "learning_rate": 1.5842005219443572e-05, "loss": 0.5646, "step": 15769 }, { "epoch": 2.5743439043304353, "grad_norm": 3.0782501697540283, "learning_rate": 1.5841484851110872e-05, "loss": 0.571, "step": 15770 }, { "epoch": 2.5745071629729397, "grad_norm": 2.938829183578491, "learning_rate": 1.5840964458766032e-05, "loss": 0.6042, "step": 15771 }, { "epoch": 2.574670421615444, "grad_norm": 3.291740894317627, "learning_rate": 1.5840444042411192e-05, "loss": 0.6371, "step": 15772 }, { "epoch": 2.5748336802579486, "grad_norm": 2.7729403972625732, "learning_rate": 1.5839923602048495e-05, "loss": 0.6087, "step": 15773 }, { "epoch": 2.574996938900453, "grad_norm": 3.0797603130340576, "learning_rate": 1.5839403137680074e-05, "loss": 0.5135, "step": 15774 }, { "epoch": 2.5751601975429574, "grad_norm": 2.74008846282959, "learning_rate": 1.5838882649308074e-05, "loss": 0.546, "step": 15775 }, { "epoch": 2.575323456185462, "grad_norm": 2.279797315597534, "learning_rate": 1.5838362136934633e-05, "loss": 0.4695, "step": 15776 }, { "epoch": 2.5754867148279663, "grad_norm": 2.760199785232544, "learning_rate": 1.583784160056189e-05, "loss": 0.5889, "step": 15777 }, { "epoch": 2.5756499734704708, "grad_norm": 2.7259020805358887, "learning_rate": 1.583732104019198e-05, "loss": 0.5566, "step": 15778 }, { "epoch": 2.575813232112975, "grad_norm": 2.9146010875701904, "learning_rate": 1.583680045582705e-05, "loss": 0.5672, "step": 15779 }, { "epoch": 2.5759764907554796, "grad_norm": 2.941276788711548, "learning_rate": 1.5836279847469236e-05, "loss": 0.6272, "step": 15780 }, { "epoch": 2.5761397493979836, "grad_norm": 2.2999002933502197, "learning_rate": 1.583575921512068e-05, "loss": 0.468, "step": 15781 }, { "epoch": 2.576303008040488, "grad_norm": 2.7421822547912598, "learning_rate": 1.5835238558783523e-05, "loss": 0.5895, "step": 15782 }, { "epoch": 2.5764662666829925, "grad_norm": 3.4847121238708496, "learning_rate": 1.58347178784599e-05, "loss": 0.6506, "step": 15783 }, { "epoch": 2.576629525325497, "grad_norm": 2.412620782852173, "learning_rate": 1.583419717415196e-05, "loss": 0.4819, "step": 15784 }, { "epoch": 2.5767927839680014, "grad_norm": 3.036936044692993, "learning_rate": 1.5833676445861838e-05, "loss": 0.5697, "step": 15785 }, { "epoch": 2.576956042610506, "grad_norm": 2.4481117725372314, "learning_rate": 1.5833155693591668e-05, "loss": 0.51, "step": 15786 }, { "epoch": 2.57711930125301, "grad_norm": 2.6234419345855713, "learning_rate": 1.5832634917343605e-05, "loss": 0.509, "step": 15787 }, { "epoch": 2.5772825598955142, "grad_norm": 3.86741304397583, "learning_rate": 1.583211411711978e-05, "loss": 0.6557, "step": 15788 }, { "epoch": 2.5774458185380187, "grad_norm": 2.8865904808044434, "learning_rate": 1.5831593292922333e-05, "loss": 0.6205, "step": 15789 }, { "epoch": 2.577609077180523, "grad_norm": 3.3289895057678223, "learning_rate": 1.583107244475341e-05, "loss": 0.5614, "step": 15790 }, { "epoch": 2.5777723358230276, "grad_norm": 2.725668430328369, "learning_rate": 1.5830551572615153e-05, "loss": 0.536, "step": 15791 }, { "epoch": 2.577935594465532, "grad_norm": 2.4280383586883545, "learning_rate": 1.5830030676509695e-05, "loss": 0.4631, "step": 15792 }, { "epoch": 2.5780988531080364, "grad_norm": 2.480043888092041, "learning_rate": 1.5829509756439185e-05, "loss": 0.5074, "step": 15793 }, { "epoch": 2.578262111750541, "grad_norm": 2.9889140129089355, "learning_rate": 1.582898881240576e-05, "loss": 0.5781, "step": 15794 }, { "epoch": 2.5784253703930453, "grad_norm": 3.1059951782226562, "learning_rate": 1.5828467844411566e-05, "loss": 0.6749, "step": 15795 }, { "epoch": 2.5785886290355498, "grad_norm": 2.6678168773651123, "learning_rate": 1.582794685245874e-05, "loss": 0.4955, "step": 15796 }, { "epoch": 2.578751887678054, "grad_norm": 2.9145894050598145, "learning_rate": 1.5827425836549422e-05, "loss": 0.5207, "step": 15797 }, { "epoch": 2.578915146320558, "grad_norm": 2.6222262382507324, "learning_rate": 1.5826904796685763e-05, "loss": 0.5254, "step": 15798 }, { "epoch": 2.5790784049630626, "grad_norm": 2.6206233501434326, "learning_rate": 1.5826383732869896e-05, "loss": 0.4846, "step": 15799 }, { "epoch": 2.579241663605567, "grad_norm": 3.070037603378296, "learning_rate": 1.5825862645103962e-05, "loss": 0.6443, "step": 15800 }, { "epoch": 2.5794049222480715, "grad_norm": 2.904264450073242, "learning_rate": 1.582534153339011e-05, "loss": 0.5782, "step": 15801 }, { "epoch": 2.579568180890576, "grad_norm": 3.0443029403686523, "learning_rate": 1.582482039773048e-05, "loss": 0.6021, "step": 15802 }, { "epoch": 2.5797314395330804, "grad_norm": 2.9958841800689697, "learning_rate": 1.582429923812721e-05, "loss": 0.6537, "step": 15803 }, { "epoch": 2.579894698175585, "grad_norm": 2.893018960952759, "learning_rate": 1.5823778054582443e-05, "loss": 0.5566, "step": 15804 }, { "epoch": 2.580057956818089, "grad_norm": 2.9000720977783203, "learning_rate": 1.5823256847098326e-05, "loss": 0.5486, "step": 15805 }, { "epoch": 2.5802212154605932, "grad_norm": 3.1062369346618652, "learning_rate": 1.5822735615677e-05, "loss": 0.8859, "step": 15806 }, { "epoch": 2.5803844741030977, "grad_norm": 2.7053229808807373, "learning_rate": 1.5822214360320603e-05, "loss": 0.5035, "step": 15807 }, { "epoch": 2.580547732745602, "grad_norm": 2.550670623779297, "learning_rate": 1.5821693081031284e-05, "loss": 0.5223, "step": 15808 }, { "epoch": 2.5807109913881066, "grad_norm": 3.0690290927886963, "learning_rate": 1.5821171777811183e-05, "loss": 0.5545, "step": 15809 }, { "epoch": 2.580874250030611, "grad_norm": 2.397104263305664, "learning_rate": 1.5820650450662442e-05, "loss": 0.513, "step": 15810 }, { "epoch": 2.5810375086731154, "grad_norm": 2.3308165073394775, "learning_rate": 1.5820129099587206e-05, "loss": 0.5059, "step": 15811 }, { "epoch": 2.58120076731562, "grad_norm": 2.95875883102417, "learning_rate": 1.581960772458762e-05, "loss": 0.5752, "step": 15812 }, { "epoch": 2.5813640259581243, "grad_norm": 2.9736554622650146, "learning_rate": 1.581908632566582e-05, "loss": 0.5713, "step": 15813 }, { "epoch": 2.5815272846006287, "grad_norm": 2.5233049392700195, "learning_rate": 1.581856490282395e-05, "loss": 0.5418, "step": 15814 }, { "epoch": 2.581690543243133, "grad_norm": 2.626971483230591, "learning_rate": 1.581804345606416e-05, "loss": 0.5769, "step": 15815 }, { "epoch": 2.581853801885637, "grad_norm": 2.865525484085083, "learning_rate": 1.581752198538859e-05, "loss": 0.5854, "step": 15816 }, { "epoch": 2.5820170605281416, "grad_norm": 2.9242634773254395, "learning_rate": 1.5817000490799385e-05, "loss": 0.5203, "step": 15817 }, { "epoch": 2.582180319170646, "grad_norm": 2.870509386062622, "learning_rate": 1.5816478972298688e-05, "loss": 0.6027, "step": 15818 }, { "epoch": 2.5823435778131505, "grad_norm": 3.4637744426727295, "learning_rate": 1.581595742988864e-05, "loss": 0.687, "step": 15819 }, { "epoch": 2.582506836455655, "grad_norm": 2.2399094104766846, "learning_rate": 1.5815435863571388e-05, "loss": 0.4723, "step": 15820 }, { "epoch": 2.5826700950981594, "grad_norm": 3.3379740715026855, "learning_rate": 1.5814914273349077e-05, "loss": 0.5645, "step": 15821 }, { "epoch": 2.582833353740664, "grad_norm": 2.8858847618103027, "learning_rate": 1.5814392659223846e-05, "loss": 0.6592, "step": 15822 }, { "epoch": 2.582996612383168, "grad_norm": 2.5724680423736572, "learning_rate": 1.5813871021197845e-05, "loss": 0.423, "step": 15823 }, { "epoch": 2.5831598710256722, "grad_norm": 3.129789352416992, "learning_rate": 1.5813349359273213e-05, "loss": 0.6396, "step": 15824 }, { "epoch": 2.5833231296681767, "grad_norm": 3.070671796798706, "learning_rate": 1.5812827673452097e-05, "loss": 0.6682, "step": 15825 }, { "epoch": 2.583486388310681, "grad_norm": 2.607150077819824, "learning_rate": 1.5812305963736643e-05, "loss": 0.5354, "step": 15826 }, { "epoch": 2.5836496469531856, "grad_norm": 2.687081813812256, "learning_rate": 1.5811784230128992e-05, "loss": 0.6216, "step": 15827 }, { "epoch": 2.58381290559569, "grad_norm": 3.0202295780181885, "learning_rate": 1.581126247263129e-05, "loss": 0.528, "step": 15828 }, { "epoch": 2.5839761642381944, "grad_norm": 3.0937061309814453, "learning_rate": 1.5810740691245686e-05, "loss": 0.5829, "step": 15829 }, { "epoch": 2.584139422880699, "grad_norm": 3.1712403297424316, "learning_rate": 1.5810218885974317e-05, "loss": 0.5481, "step": 15830 }, { "epoch": 2.5843026815232033, "grad_norm": 2.5360772609710693, "learning_rate": 1.5809697056819337e-05, "loss": 0.5185, "step": 15831 }, { "epoch": 2.5844659401657077, "grad_norm": 2.879218101501465, "learning_rate": 1.5809175203782882e-05, "loss": 0.5488, "step": 15832 }, { "epoch": 2.584629198808212, "grad_norm": 2.635979175567627, "learning_rate": 1.5808653326867102e-05, "loss": 0.4616, "step": 15833 }, { "epoch": 2.584792457450716, "grad_norm": 2.88338565826416, "learning_rate": 1.5808131426074144e-05, "loss": 0.6691, "step": 15834 }, { "epoch": 2.5849557160932206, "grad_norm": 3.0269358158111572, "learning_rate": 1.5807609501406146e-05, "loss": 0.5896, "step": 15835 }, { "epoch": 2.585118974735725, "grad_norm": 2.445085287094116, "learning_rate": 1.5807087552865263e-05, "loss": 0.4979, "step": 15836 }, { "epoch": 2.5852822333782295, "grad_norm": 2.45625376701355, "learning_rate": 1.5806565580453633e-05, "loss": 0.4681, "step": 15837 }, { "epoch": 2.585445492020734, "grad_norm": 2.945485830307007, "learning_rate": 1.5806043584173404e-05, "loss": 0.6288, "step": 15838 }, { "epoch": 2.5856087506632384, "grad_norm": 2.68692946434021, "learning_rate": 1.580552156402672e-05, "loss": 0.4929, "step": 15839 }, { "epoch": 2.5857720093057424, "grad_norm": 2.6931045055389404, "learning_rate": 1.5804999520015735e-05, "loss": 0.5, "step": 15840 }, { "epoch": 2.585935267948247, "grad_norm": 2.9257404804229736, "learning_rate": 1.5804477452142587e-05, "loss": 0.522, "step": 15841 }, { "epoch": 2.5860985265907512, "grad_norm": 3.144087314605713, "learning_rate": 1.5803955360409426e-05, "loss": 0.6303, "step": 15842 }, { "epoch": 2.5862617852332557, "grad_norm": 2.573451519012451, "learning_rate": 1.580343324481839e-05, "loss": 0.5901, "step": 15843 }, { "epoch": 2.58642504387576, "grad_norm": 2.806211233139038, "learning_rate": 1.5802911105371635e-05, "loss": 0.5512, "step": 15844 }, { "epoch": 2.5865883025182645, "grad_norm": 2.7870562076568604, "learning_rate": 1.58023889420713e-05, "loss": 0.5384, "step": 15845 }, { "epoch": 2.586751561160769, "grad_norm": 2.510267972946167, "learning_rate": 1.5801866754919543e-05, "loss": 0.5535, "step": 15846 }, { "epoch": 2.5869148198032734, "grad_norm": 3.160024881362915, "learning_rate": 1.5801344543918495e-05, "loss": 0.6203, "step": 15847 }, { "epoch": 2.587078078445778, "grad_norm": 2.9480366706848145, "learning_rate": 1.5800822309070315e-05, "loss": 0.6312, "step": 15848 }, { "epoch": 2.5872413370882823, "grad_norm": 2.857520580291748, "learning_rate": 1.580030005037714e-05, "loss": 0.5761, "step": 15849 }, { "epoch": 2.5874045957307867, "grad_norm": 2.4956367015838623, "learning_rate": 1.579977776784112e-05, "loss": 0.5099, "step": 15850 }, { "epoch": 2.5875678543732907, "grad_norm": 3.2471506595611572, "learning_rate": 1.5799255461464414e-05, "loss": 0.5945, "step": 15851 }, { "epoch": 2.587731113015795, "grad_norm": 2.718384027481079, "learning_rate": 1.579873313124915e-05, "loss": 0.5777, "step": 15852 }, { "epoch": 2.5878943716582996, "grad_norm": 3.008117437362671, "learning_rate": 1.5798210777197488e-05, "loss": 0.5533, "step": 15853 }, { "epoch": 2.588057630300804, "grad_norm": 2.995067834854126, "learning_rate": 1.579768839931157e-05, "loss": 0.5765, "step": 15854 }, { "epoch": 2.5882208889433085, "grad_norm": 2.4858901500701904, "learning_rate": 1.5797165997593538e-05, "loss": 0.495, "step": 15855 }, { "epoch": 2.588384147585813, "grad_norm": 2.6571156978607178, "learning_rate": 1.5796643572045552e-05, "loss": 0.4995, "step": 15856 }, { "epoch": 2.5885474062283174, "grad_norm": 2.149855375289917, "learning_rate": 1.579612112266975e-05, "loss": 0.3963, "step": 15857 }, { "epoch": 2.5887106648708214, "grad_norm": 2.7781593799591064, "learning_rate": 1.5795598649468285e-05, "loss": 0.5474, "step": 15858 }, { "epoch": 2.588873923513326, "grad_norm": 3.2843823432922363, "learning_rate": 1.5795076152443303e-05, "loss": 0.6574, "step": 15859 }, { "epoch": 2.5890371821558302, "grad_norm": 3.003758192062378, "learning_rate": 1.5794553631596952e-05, "loss": 0.6339, "step": 15860 }, { "epoch": 2.5892004407983347, "grad_norm": 3.1192235946655273, "learning_rate": 1.5794031086931374e-05, "loss": 0.6627, "step": 15861 }, { "epoch": 2.589363699440839, "grad_norm": 2.5710949897766113, "learning_rate": 1.5793508518448727e-05, "loss": 0.4635, "step": 15862 }, { "epoch": 2.5895269580833435, "grad_norm": 2.5780110359191895, "learning_rate": 1.579298592615115e-05, "loss": 0.4472, "step": 15863 }, { "epoch": 2.589690216725848, "grad_norm": 3.4873123168945312, "learning_rate": 1.5792463310040798e-05, "loss": 0.6366, "step": 15864 }, { "epoch": 2.5898534753683524, "grad_norm": 2.2010703086853027, "learning_rate": 1.5791940670119813e-05, "loss": 0.4789, "step": 15865 }, { "epoch": 2.590016734010857, "grad_norm": 2.7605371475219727, "learning_rate": 1.579141800639035e-05, "loss": 0.5788, "step": 15866 }, { "epoch": 2.5901799926533613, "grad_norm": 3.3399205207824707, "learning_rate": 1.5790895318854556e-05, "loss": 0.6988, "step": 15867 }, { "epoch": 2.5903432512958657, "grad_norm": 3.0009467601776123, "learning_rate": 1.5790372607514575e-05, "loss": 0.5748, "step": 15868 }, { "epoch": 2.5905065099383697, "grad_norm": 2.7795207500457764, "learning_rate": 1.5789849872372558e-05, "loss": 0.5667, "step": 15869 }, { "epoch": 2.590669768580874, "grad_norm": 3.4438705444335938, "learning_rate": 1.5789327113430656e-05, "loss": 0.659, "step": 15870 }, { "epoch": 2.5908330272233786, "grad_norm": 2.6946396827697754, "learning_rate": 1.5788804330691015e-05, "loss": 0.5996, "step": 15871 }, { "epoch": 2.590996285865883, "grad_norm": 2.8127050399780273, "learning_rate": 1.5788281524155786e-05, "loss": 0.5888, "step": 15872 }, { "epoch": 2.5911595445083875, "grad_norm": 2.8713972568511963, "learning_rate": 1.578775869382712e-05, "loss": 0.5395, "step": 15873 }, { "epoch": 2.591322803150892, "grad_norm": 2.9339675903320312, "learning_rate": 1.578723583970716e-05, "loss": 0.6069, "step": 15874 }, { "epoch": 2.591486061793396, "grad_norm": 2.466607093811035, "learning_rate": 1.578671296179806e-05, "loss": 0.551, "step": 15875 }, { "epoch": 2.5916493204359003, "grad_norm": 4.282415390014648, "learning_rate": 1.5786190060101965e-05, "loss": 0.8885, "step": 15876 }, { "epoch": 2.591812579078405, "grad_norm": 3.254031181335449, "learning_rate": 1.5785667134621033e-05, "loss": 0.6013, "step": 15877 }, { "epoch": 2.5919758377209092, "grad_norm": 2.995675802230835, "learning_rate": 1.5785144185357405e-05, "loss": 0.5631, "step": 15878 }, { "epoch": 2.5921390963634137, "grad_norm": 3.469531774520874, "learning_rate": 1.5784621212313235e-05, "loss": 0.6566, "step": 15879 }, { "epoch": 2.592302355005918, "grad_norm": 2.748042583465576, "learning_rate": 1.578409821549067e-05, "loss": 0.5396, "step": 15880 }, { "epoch": 2.5924656136484225, "grad_norm": 2.8952887058258057, "learning_rate": 1.578357519489186e-05, "loss": 0.6121, "step": 15881 }, { "epoch": 2.592628872290927, "grad_norm": 3.1286351680755615, "learning_rate": 1.5783052150518958e-05, "loss": 0.5023, "step": 15882 }, { "epoch": 2.5927921309334314, "grad_norm": 2.957207441329956, "learning_rate": 1.5782529082374113e-05, "loss": 0.5397, "step": 15883 }, { "epoch": 2.592955389575936, "grad_norm": 2.264228105545044, "learning_rate": 1.578200599045947e-05, "loss": 0.4727, "step": 15884 }, { "epoch": 2.5931186482184403, "grad_norm": 3.18639874458313, "learning_rate": 1.5781482874777188e-05, "loss": 0.6609, "step": 15885 }, { "epoch": 2.5932819068609443, "grad_norm": 2.694938898086548, "learning_rate": 1.578095973532941e-05, "loss": 0.5224, "step": 15886 }, { "epoch": 2.5934451655034487, "grad_norm": 2.8175032138824463, "learning_rate": 1.5780436572118295e-05, "loss": 0.5833, "step": 15887 }, { "epoch": 2.593608424145953, "grad_norm": 2.9677491188049316, "learning_rate": 1.5779913385145983e-05, "loss": 0.5982, "step": 15888 }, { "epoch": 2.5937716827884576, "grad_norm": 2.5642991065979004, "learning_rate": 1.577939017441463e-05, "loss": 0.6553, "step": 15889 }, { "epoch": 2.593934941430962, "grad_norm": 2.945268392562866, "learning_rate": 1.577886693992639e-05, "loss": 0.5823, "step": 15890 }, { "epoch": 2.5940982000734665, "grad_norm": 3.1275854110717773, "learning_rate": 1.5778343681683407e-05, "loss": 0.6788, "step": 15891 }, { "epoch": 2.594261458715971, "grad_norm": 3.2390220165252686, "learning_rate": 1.5777820399687832e-05, "loss": 0.6219, "step": 15892 }, { "epoch": 2.594424717358475, "grad_norm": 3.080944538116455, "learning_rate": 1.5777297093941823e-05, "loss": 0.6578, "step": 15893 }, { "epoch": 2.5945879760009793, "grad_norm": 2.4027647972106934, "learning_rate": 1.5776773764447527e-05, "loss": 0.4799, "step": 15894 }, { "epoch": 2.594751234643484, "grad_norm": 3.1036489009857178, "learning_rate": 1.5776250411207096e-05, "loss": 0.6637, "step": 15895 }, { "epoch": 2.594914493285988, "grad_norm": 2.575961112976074, "learning_rate": 1.5775727034222675e-05, "loss": 0.5389, "step": 15896 }, { "epoch": 2.5950777519284927, "grad_norm": 2.8781206607818604, "learning_rate": 1.577520363349643e-05, "loss": 0.6472, "step": 15897 }, { "epoch": 2.595241010570997, "grad_norm": 3.242734670639038, "learning_rate": 1.5774680209030498e-05, "loss": 0.6179, "step": 15898 }, { "epoch": 2.5954042692135015, "grad_norm": 2.807757616043091, "learning_rate": 1.5774156760827035e-05, "loss": 0.5393, "step": 15899 }, { "epoch": 2.595567527856006, "grad_norm": 2.972064733505249, "learning_rate": 1.5773633288888198e-05, "loss": 0.5718, "step": 15900 }, { "epoch": 2.5957307864985104, "grad_norm": 2.6262130737304688, "learning_rate": 1.577310979321613e-05, "loss": 0.5837, "step": 15901 }, { "epoch": 2.595894045141015, "grad_norm": 2.750997304916382, "learning_rate": 1.5772586273812992e-05, "loss": 0.5231, "step": 15902 }, { "epoch": 2.5960573037835193, "grad_norm": 2.575993776321411, "learning_rate": 1.5772062730680928e-05, "loss": 0.4891, "step": 15903 }, { "epoch": 2.5962205624260233, "grad_norm": 2.859318733215332, "learning_rate": 1.5771539163822094e-05, "loss": 0.6361, "step": 15904 }, { "epoch": 2.5963838210685277, "grad_norm": 3.100778102874756, "learning_rate": 1.5771015573238644e-05, "loss": 0.6034, "step": 15905 }, { "epoch": 2.596547079711032, "grad_norm": 3.3599698543548584, "learning_rate": 1.5770491958932728e-05, "loss": 0.6011, "step": 15906 }, { "epoch": 2.5967103383535366, "grad_norm": 3.0640876293182373, "learning_rate": 1.5769968320906498e-05, "loss": 0.6779, "step": 15907 }, { "epoch": 2.596873596996041, "grad_norm": 3.042482376098633, "learning_rate": 1.5769444659162102e-05, "loss": 0.5788, "step": 15908 }, { "epoch": 2.5970368556385455, "grad_norm": 4.009059906005859, "learning_rate": 1.5768920973701705e-05, "loss": 0.5894, "step": 15909 }, { "epoch": 2.59720011428105, "grad_norm": 2.1814894676208496, "learning_rate": 1.576839726452745e-05, "loss": 0.4841, "step": 15910 }, { "epoch": 2.597363372923554, "grad_norm": 2.6016762256622314, "learning_rate": 1.576787353164149e-05, "loss": 0.4969, "step": 15911 }, { "epoch": 2.5975266315660583, "grad_norm": 2.9411118030548096, "learning_rate": 1.576734977504598e-05, "loss": 0.6377, "step": 15912 }, { "epoch": 2.597689890208563, "grad_norm": 2.7801949977874756, "learning_rate": 1.5766825994743074e-05, "loss": 0.5431, "step": 15913 }, { "epoch": 2.597853148851067, "grad_norm": 3.0590529441833496, "learning_rate": 1.5766302190734927e-05, "loss": 0.5888, "step": 15914 }, { "epoch": 2.5980164074935717, "grad_norm": 2.6939032077789307, "learning_rate": 1.576577836302368e-05, "loss": 0.4921, "step": 15915 }, { "epoch": 2.598179666136076, "grad_norm": 2.415151596069336, "learning_rate": 1.5765254511611502e-05, "loss": 0.4833, "step": 15916 }, { "epoch": 2.5983429247785805, "grad_norm": 3.2111196517944336, "learning_rate": 1.5764730636500537e-05, "loss": 0.6742, "step": 15917 }, { "epoch": 2.598506183421085, "grad_norm": 2.671994209289551, "learning_rate": 1.5764206737692944e-05, "loss": 0.5888, "step": 15918 }, { "epoch": 2.5986694420635894, "grad_norm": 2.786364793777466, "learning_rate": 1.576368281519087e-05, "loss": 0.5911, "step": 15919 }, { "epoch": 2.598832700706094, "grad_norm": 2.9225516319274902, "learning_rate": 1.5763158868996478e-05, "loss": 0.6443, "step": 15920 }, { "epoch": 2.5989959593485983, "grad_norm": 2.890904664993286, "learning_rate": 1.576263489911191e-05, "loss": 0.5661, "step": 15921 }, { "epoch": 2.5991592179911023, "grad_norm": 2.677562952041626, "learning_rate": 1.576211090553933e-05, "loss": 0.5497, "step": 15922 }, { "epoch": 2.5993224766336067, "grad_norm": 2.7621800899505615, "learning_rate": 1.5761586888280884e-05, "loss": 0.5907, "step": 15923 }, { "epoch": 2.599485735276111, "grad_norm": 2.7833852767944336, "learning_rate": 1.5761062847338733e-05, "loss": 0.5636, "step": 15924 }, { "epoch": 2.5996489939186156, "grad_norm": 3.0746090412139893, "learning_rate": 1.576053878271503e-05, "loss": 0.6146, "step": 15925 }, { "epoch": 2.59981225256112, "grad_norm": 2.0972044467926025, "learning_rate": 1.576001469441192e-05, "loss": 0.4946, "step": 15926 }, { "epoch": 2.5999755112036245, "grad_norm": 2.6410486698150635, "learning_rate": 1.575949058243157e-05, "loss": 0.5427, "step": 15927 }, { "epoch": 2.6001387698461285, "grad_norm": 2.1219472885131836, "learning_rate": 1.5758966446776127e-05, "loss": 0.478, "step": 15928 }, { "epoch": 2.600302028488633, "grad_norm": 3.0247786045074463, "learning_rate": 1.575844228744775e-05, "loss": 0.6934, "step": 15929 }, { "epoch": 2.6004652871311373, "grad_norm": 2.9249303340911865, "learning_rate": 1.5757918104448588e-05, "loss": 0.5765, "step": 15930 }, { "epoch": 2.6006285457736418, "grad_norm": 3.1182713508605957, "learning_rate": 1.57573938977808e-05, "loss": 0.5993, "step": 15931 }, { "epoch": 2.600791804416146, "grad_norm": 2.8848559856414795, "learning_rate": 1.575686966744654e-05, "loss": 0.5442, "step": 15932 }, { "epoch": 2.6009550630586507, "grad_norm": 2.566646099090576, "learning_rate": 1.575634541344796e-05, "loss": 0.5732, "step": 15933 }, { "epoch": 2.601118321701155, "grad_norm": 2.5240938663482666, "learning_rate": 1.575582113578722e-05, "loss": 0.5298, "step": 15934 }, { "epoch": 2.6012815803436595, "grad_norm": 2.8155910968780518, "learning_rate": 1.5755296834466472e-05, "loss": 0.6127, "step": 15935 }, { "epoch": 2.601444838986164, "grad_norm": 2.835352897644043, "learning_rate": 1.5754772509487875e-05, "loss": 0.5816, "step": 15936 }, { "epoch": 2.6016080976286684, "grad_norm": 2.4338223934173584, "learning_rate": 1.5754248160853576e-05, "loss": 0.4711, "step": 15937 }, { "epoch": 2.601771356271173, "grad_norm": 2.911125659942627, "learning_rate": 1.5753723788565737e-05, "loss": 0.6892, "step": 15938 }, { "epoch": 2.601934614913677, "grad_norm": 2.5334255695343018, "learning_rate": 1.5753199392626514e-05, "loss": 0.4877, "step": 15939 }, { "epoch": 2.6020978735561813, "grad_norm": 3.568227767944336, "learning_rate": 1.575267497303806e-05, "loss": 0.7021, "step": 15940 }, { "epoch": 2.6022611321986857, "grad_norm": 2.6779417991638184, "learning_rate": 1.575215052980253e-05, "loss": 0.5667, "step": 15941 }, { "epoch": 2.60242439084119, "grad_norm": 2.913377046585083, "learning_rate": 1.5751626062922084e-05, "loss": 0.5487, "step": 15942 }, { "epoch": 2.6025876494836946, "grad_norm": 3.1261706352233887, "learning_rate": 1.5751101572398874e-05, "loss": 0.5963, "step": 15943 }, { "epoch": 2.602750908126199, "grad_norm": 3.1482126712799072, "learning_rate": 1.575057705823506e-05, "loss": 0.5904, "step": 15944 }, { "epoch": 2.6029141667687035, "grad_norm": 2.684166669845581, "learning_rate": 1.575005252043279e-05, "loss": 0.5794, "step": 15945 }, { "epoch": 2.6030774254112075, "grad_norm": 3.0867934226989746, "learning_rate": 1.5749527958994222e-05, "loss": 0.611, "step": 15946 }, { "epoch": 2.603240684053712, "grad_norm": 2.679202079772949, "learning_rate": 1.574900337392152e-05, "loss": 0.546, "step": 15947 }, { "epoch": 2.6034039426962163, "grad_norm": 2.6830368041992188, "learning_rate": 1.574847876521684e-05, "loss": 0.5567, "step": 15948 }, { "epoch": 2.6035672013387208, "grad_norm": 2.9767253398895264, "learning_rate": 1.5747954132882332e-05, "loss": 0.5623, "step": 15949 }, { "epoch": 2.603730459981225, "grad_norm": 3.0018787384033203, "learning_rate": 1.574742947692015e-05, "loss": 0.6388, "step": 15950 }, { "epoch": 2.6038937186237296, "grad_norm": 2.137129783630371, "learning_rate": 1.574690479733246e-05, "loss": 0.4929, "step": 15951 }, { "epoch": 2.604056977266234, "grad_norm": 2.5662994384765625, "learning_rate": 1.574638009412141e-05, "loss": 0.4896, "step": 15952 }, { "epoch": 2.6042202359087385, "grad_norm": 2.6056106090545654, "learning_rate": 1.5745855367289168e-05, "loss": 0.5283, "step": 15953 }, { "epoch": 2.604383494551243, "grad_norm": 2.471590757369995, "learning_rate": 1.574533061683788e-05, "loss": 0.5097, "step": 15954 }, { "epoch": 2.6045467531937474, "grad_norm": 2.943563222885132, "learning_rate": 1.5744805842769708e-05, "loss": 0.5462, "step": 15955 }, { "epoch": 2.604710011836252, "grad_norm": 2.4921064376831055, "learning_rate": 1.574428104508681e-05, "loss": 0.4899, "step": 15956 }, { "epoch": 2.604873270478756, "grad_norm": 3.164637565612793, "learning_rate": 1.574375622379134e-05, "loss": 0.6395, "step": 15957 }, { "epoch": 2.6050365291212603, "grad_norm": 2.8217713832855225, "learning_rate": 1.5743231378885457e-05, "loss": 0.58, "step": 15958 }, { "epoch": 2.6051997877637647, "grad_norm": 2.9527747631073, "learning_rate": 1.5742706510371317e-05, "loss": 0.5969, "step": 15959 }, { "epoch": 2.605363046406269, "grad_norm": 2.810682773590088, "learning_rate": 1.574218161825108e-05, "loss": 0.5606, "step": 15960 }, { "epoch": 2.6055263050487736, "grad_norm": 2.7932095527648926, "learning_rate": 1.5741656702526906e-05, "loss": 0.4744, "step": 15961 }, { "epoch": 2.605689563691278, "grad_norm": 2.5904324054718018, "learning_rate": 1.5741131763200943e-05, "loss": 0.5882, "step": 15962 }, { "epoch": 2.6058528223337825, "grad_norm": 2.9270384311676025, "learning_rate": 1.574060680027536e-05, "loss": 0.5565, "step": 15963 }, { "epoch": 2.6060160809762865, "grad_norm": 2.6055233478546143, "learning_rate": 1.574008181375231e-05, "loss": 0.5929, "step": 15964 }, { "epoch": 2.606179339618791, "grad_norm": 2.574516773223877, "learning_rate": 1.5739556803633948e-05, "loss": 0.5272, "step": 15965 }, { "epoch": 2.6063425982612953, "grad_norm": 3.253281354904175, "learning_rate": 1.5739031769922437e-05, "loss": 0.5652, "step": 15966 }, { "epoch": 2.6065058569037998, "grad_norm": 2.585170269012451, "learning_rate": 1.5738506712619933e-05, "loss": 0.5333, "step": 15967 }, { "epoch": 2.606669115546304, "grad_norm": 3.110121488571167, "learning_rate": 1.5737981631728596e-05, "loss": 0.6471, "step": 15968 }, { "epoch": 2.6068323741888086, "grad_norm": 3.1752395629882812, "learning_rate": 1.5737456527250582e-05, "loss": 0.598, "step": 15969 }, { "epoch": 2.606995632831313, "grad_norm": 3.4303598403930664, "learning_rate": 1.573693139918805e-05, "loss": 0.7044, "step": 15970 }, { "epoch": 2.6071588914738175, "grad_norm": 2.8179430961608887, "learning_rate": 1.573640624754316e-05, "loss": 0.5481, "step": 15971 }, { "epoch": 2.607322150116322, "grad_norm": 2.285372495651245, "learning_rate": 1.573588107231807e-05, "loss": 0.4023, "step": 15972 }, { "epoch": 2.6074854087588264, "grad_norm": 2.7753591537475586, "learning_rate": 1.573535587351494e-05, "loss": 0.6026, "step": 15973 }, { "epoch": 2.607648667401331, "grad_norm": 3.030442714691162, "learning_rate": 1.5734830651135927e-05, "loss": 0.5855, "step": 15974 }, { "epoch": 2.607811926043835, "grad_norm": 3.2643790245056152, "learning_rate": 1.5734305405183187e-05, "loss": 0.5466, "step": 15975 }, { "epoch": 2.6079751846863393, "grad_norm": 3.31599760055542, "learning_rate": 1.5733780135658886e-05, "loss": 0.7343, "step": 15976 }, { "epoch": 2.6081384433288437, "grad_norm": 3.2725346088409424, "learning_rate": 1.573325484256518e-05, "loss": 0.644, "step": 15977 }, { "epoch": 2.608301701971348, "grad_norm": 2.8653645515441895, "learning_rate": 1.573272952590423e-05, "loss": 0.6011, "step": 15978 }, { "epoch": 2.6084649606138526, "grad_norm": 3.0041565895080566, "learning_rate": 1.573220418567819e-05, "loss": 0.5891, "step": 15979 }, { "epoch": 2.608628219256357, "grad_norm": 3.3367066383361816, "learning_rate": 1.5731678821889225e-05, "loss": 0.7105, "step": 15980 }, { "epoch": 2.608791477898861, "grad_norm": 2.8483614921569824, "learning_rate": 1.573115343453949e-05, "loss": 0.6213, "step": 15981 }, { "epoch": 2.6089547365413654, "grad_norm": 3.2422726154327393, "learning_rate": 1.573062802363115e-05, "loss": 0.634, "step": 15982 }, { "epoch": 2.60911799518387, "grad_norm": 3.8429617881774902, "learning_rate": 1.5730102589166363e-05, "loss": 0.7786, "step": 15983 }, { "epoch": 2.6092812538263743, "grad_norm": 2.881944417953491, "learning_rate": 1.5729577131147283e-05, "loss": 0.5585, "step": 15984 }, { "epoch": 2.6094445124688788, "grad_norm": 2.4549381732940674, "learning_rate": 1.572905164957608e-05, "loss": 0.5222, "step": 15985 }, { "epoch": 2.609607771111383, "grad_norm": 2.657151460647583, "learning_rate": 1.5728526144454908e-05, "loss": 0.4787, "step": 15986 }, { "epoch": 2.6097710297538876, "grad_norm": 2.914555549621582, "learning_rate": 1.5728000615785925e-05, "loss": 0.6425, "step": 15987 }, { "epoch": 2.609934288396392, "grad_norm": 3.0516459941864014, "learning_rate": 1.57274750635713e-05, "loss": 0.6634, "step": 15988 }, { "epoch": 2.6100975470388965, "grad_norm": 2.5080463886260986, "learning_rate": 1.572694948781318e-05, "loss": 0.5035, "step": 15989 }, { "epoch": 2.610260805681401, "grad_norm": 2.7079110145568848, "learning_rate": 1.572642388851374e-05, "loss": 0.5369, "step": 15990 }, { "epoch": 2.6104240643239054, "grad_norm": 2.89731764793396, "learning_rate": 1.572589826567513e-05, "loss": 0.6368, "step": 15991 }, { "epoch": 2.6105873229664094, "grad_norm": 3.2734384536743164, "learning_rate": 1.5725372619299514e-05, "loss": 0.6238, "step": 15992 }, { "epoch": 2.610750581608914, "grad_norm": 2.881875514984131, "learning_rate": 1.5724846949389054e-05, "loss": 0.6016, "step": 15993 }, { "epoch": 2.6109138402514183, "grad_norm": 2.6170244216918945, "learning_rate": 1.572432125594591e-05, "loss": 0.5535, "step": 15994 }, { "epoch": 2.6110770988939227, "grad_norm": 2.5601720809936523, "learning_rate": 1.5723795538972245e-05, "loss": 0.5734, "step": 15995 }, { "epoch": 2.611240357536427, "grad_norm": 2.8178892135620117, "learning_rate": 1.572326979847021e-05, "loss": 0.5886, "step": 15996 }, { "epoch": 2.6114036161789316, "grad_norm": 3.128312587738037, "learning_rate": 1.5722744034441984e-05, "loss": 0.5868, "step": 15997 }, { "epoch": 2.611566874821436, "grad_norm": 3.040944814682007, "learning_rate": 1.5722218246889714e-05, "loss": 0.6111, "step": 15998 }, { "epoch": 2.61173013346394, "grad_norm": 2.5108673572540283, "learning_rate": 1.5721692435815565e-05, "loss": 0.5054, "step": 15999 }, { "epoch": 2.6118933921064444, "grad_norm": 3.7855823040008545, "learning_rate": 1.5721166601221697e-05, "loss": 0.7685, "step": 16000 }, { "epoch": 2.612056650748949, "grad_norm": 3.082524538040161, "learning_rate": 1.5720640743110274e-05, "loss": 0.659, "step": 16001 }, { "epoch": 2.6122199093914533, "grad_norm": 2.6356253623962402, "learning_rate": 1.5720114861483462e-05, "loss": 0.4786, "step": 16002 }, { "epoch": 2.6123831680339578, "grad_norm": 2.3849525451660156, "learning_rate": 1.5719588956343415e-05, "loss": 0.4357, "step": 16003 }, { "epoch": 2.612546426676462, "grad_norm": 3.0831198692321777, "learning_rate": 1.5719063027692294e-05, "loss": 0.6989, "step": 16004 }, { "epoch": 2.6127096853189666, "grad_norm": 2.544755697250366, "learning_rate": 1.571853707553227e-05, "loss": 0.4918, "step": 16005 }, { "epoch": 2.612872943961471, "grad_norm": 2.7353110313415527, "learning_rate": 1.571801109986549e-05, "loss": 0.5118, "step": 16006 }, { "epoch": 2.6130362026039755, "grad_norm": 2.9051570892333984, "learning_rate": 1.5717485100694137e-05, "loss": 0.5548, "step": 16007 }, { "epoch": 2.61319946124648, "grad_norm": 2.614757537841797, "learning_rate": 1.5716959078020354e-05, "loss": 0.6013, "step": 16008 }, { "epoch": 2.6133627198889844, "grad_norm": 2.768673896789551, "learning_rate": 1.5716433031846312e-05, "loss": 0.589, "step": 16009 }, { "epoch": 2.6135259785314884, "grad_norm": 2.594788074493408, "learning_rate": 1.5715906962174177e-05, "loss": 0.5871, "step": 16010 }, { "epoch": 2.613689237173993, "grad_norm": 2.787440299987793, "learning_rate": 1.5715380869006104e-05, "loss": 0.6127, "step": 16011 }, { "epoch": 2.6138524958164973, "grad_norm": 2.823293685913086, "learning_rate": 1.5714854752344257e-05, "loss": 0.6389, "step": 16012 }, { "epoch": 2.6140157544590017, "grad_norm": 2.5912020206451416, "learning_rate": 1.57143286121908e-05, "loss": 0.453, "step": 16013 }, { "epoch": 2.614179013101506, "grad_norm": 2.6311194896698, "learning_rate": 1.5713802448547894e-05, "loss": 0.5227, "step": 16014 }, { "epoch": 2.6143422717440106, "grad_norm": 2.9637746810913086, "learning_rate": 1.5713276261417707e-05, "loss": 0.6025, "step": 16015 }, { "epoch": 2.6145055303865146, "grad_norm": 2.7887821197509766, "learning_rate": 1.57127500508024e-05, "loss": 0.6138, "step": 16016 }, { "epoch": 2.614668789029019, "grad_norm": 2.6407220363616943, "learning_rate": 1.571222381670413e-05, "loss": 0.5725, "step": 16017 }, { "epoch": 2.6148320476715234, "grad_norm": 2.999177932739258, "learning_rate": 1.571169755912507e-05, "loss": 0.5055, "step": 16018 }, { "epoch": 2.614995306314028, "grad_norm": 2.7643349170684814, "learning_rate": 1.5711171278067375e-05, "loss": 0.5581, "step": 16019 }, { "epoch": 2.6151585649565323, "grad_norm": 2.893883228302002, "learning_rate": 1.571064497353321e-05, "loss": 0.5747, "step": 16020 }, { "epoch": 2.6153218235990368, "grad_norm": 2.7558658123016357, "learning_rate": 1.571011864552474e-05, "loss": 0.4856, "step": 16021 }, { "epoch": 2.615485082241541, "grad_norm": 2.589045524597168, "learning_rate": 1.570959229404413e-05, "loss": 0.5009, "step": 16022 }, { "epoch": 2.6156483408840456, "grad_norm": 2.5732812881469727, "learning_rate": 1.5709065919093543e-05, "loss": 0.5256, "step": 16023 }, { "epoch": 2.61581159952655, "grad_norm": 2.2112882137298584, "learning_rate": 1.5708539520675138e-05, "loss": 0.4567, "step": 16024 }, { "epoch": 2.6159748581690545, "grad_norm": 3.16717791557312, "learning_rate": 1.5708013098791087e-05, "loss": 0.6487, "step": 16025 }, { "epoch": 2.616138116811559, "grad_norm": 3.1189780235290527, "learning_rate": 1.5707486653443543e-05, "loss": 0.5575, "step": 16026 }, { "epoch": 2.616301375454063, "grad_norm": 2.766995668411255, "learning_rate": 1.5706960184634684e-05, "loss": 0.5824, "step": 16027 }, { "epoch": 2.6164646340965674, "grad_norm": 2.8486759662628174, "learning_rate": 1.5706433692366662e-05, "loss": 0.4974, "step": 16028 }, { "epoch": 2.616627892739072, "grad_norm": 2.8112149238586426, "learning_rate": 1.5705907176641647e-05, "loss": 0.599, "step": 16029 }, { "epoch": 2.6167911513815763, "grad_norm": 2.827500104904175, "learning_rate": 1.5705380637461806e-05, "loss": 0.5554, "step": 16030 }, { "epoch": 2.6169544100240807, "grad_norm": 2.7092020511627197, "learning_rate": 1.5704854074829293e-05, "loss": 0.5228, "step": 16031 }, { "epoch": 2.617117668666585, "grad_norm": 3.4707388877868652, "learning_rate": 1.5704327488746286e-05, "loss": 0.6833, "step": 16032 }, { "epoch": 2.6172809273090896, "grad_norm": 3.1605730056762695, "learning_rate": 1.5703800879214934e-05, "loss": 0.5318, "step": 16033 }, { "epoch": 2.6174441859515936, "grad_norm": 3.066072940826416, "learning_rate": 1.5703274246237416e-05, "loss": 0.5772, "step": 16034 }, { "epoch": 2.617607444594098, "grad_norm": 2.8248536586761475, "learning_rate": 1.570274758981589e-05, "loss": 0.6168, "step": 16035 }, { "epoch": 2.6177707032366024, "grad_norm": 3.048333168029785, "learning_rate": 1.570222090995252e-05, "loss": 0.5185, "step": 16036 }, { "epoch": 2.617933961879107, "grad_norm": 2.8956305980682373, "learning_rate": 1.570169420664948e-05, "loss": 0.5493, "step": 16037 }, { "epoch": 2.6180972205216113, "grad_norm": 3.0799968242645264, "learning_rate": 1.570116747990892e-05, "loss": 0.7195, "step": 16038 }, { "epoch": 2.6182604791641158, "grad_norm": 2.942423105239868, "learning_rate": 1.5700640729733015e-05, "loss": 0.5485, "step": 16039 }, { "epoch": 2.61842373780662, "grad_norm": 3.0892016887664795, "learning_rate": 1.570011395612393e-05, "loss": 0.5787, "step": 16040 }, { "epoch": 2.6185869964491246, "grad_norm": 2.678035259246826, "learning_rate": 1.569958715908383e-05, "loss": 0.5546, "step": 16041 }, { "epoch": 2.618750255091629, "grad_norm": 3.011876344680786, "learning_rate": 1.569906033861488e-05, "loss": 0.594, "step": 16042 }, { "epoch": 2.6189135137341335, "grad_norm": 3.0094873905181885, "learning_rate": 1.5698533494719238e-05, "loss": 0.5353, "step": 16043 }, { "epoch": 2.619076772376638, "grad_norm": 2.771286964416504, "learning_rate": 1.5698006627399084e-05, "loss": 0.6333, "step": 16044 }, { "epoch": 2.619240031019142, "grad_norm": 2.5022785663604736, "learning_rate": 1.569747973665657e-05, "loss": 0.5296, "step": 16045 }, { "epoch": 2.6194032896616464, "grad_norm": 2.754133462905884, "learning_rate": 1.5696952822493875e-05, "loss": 0.5536, "step": 16046 }, { "epoch": 2.619566548304151, "grad_norm": 3.6535582542419434, "learning_rate": 1.5696425884913155e-05, "loss": 0.7194, "step": 16047 }, { "epoch": 2.6197298069466552, "grad_norm": 3.0708892345428467, "learning_rate": 1.5695898923916577e-05, "loss": 0.6093, "step": 16048 }, { "epoch": 2.6198930655891597, "grad_norm": 3.610100030899048, "learning_rate": 1.5695371939506312e-05, "loss": 0.6484, "step": 16049 }, { "epoch": 2.620056324231664, "grad_norm": 2.6786584854125977, "learning_rate": 1.569484493168452e-05, "loss": 0.5487, "step": 16050 }, { "epoch": 2.6202195828741686, "grad_norm": 2.4009809494018555, "learning_rate": 1.5694317900453377e-05, "loss": 0.4646, "step": 16051 }, { "epoch": 2.6203828415166726, "grad_norm": 3.6485917568206787, "learning_rate": 1.5693790845815037e-05, "loss": 0.6808, "step": 16052 }, { "epoch": 2.620546100159177, "grad_norm": 3.3768715858459473, "learning_rate": 1.5693263767771676e-05, "loss": 0.6478, "step": 16053 }, { "epoch": 2.6207093588016814, "grad_norm": 3.2524139881134033, "learning_rate": 1.5692736666325456e-05, "loss": 0.595, "step": 16054 }, { "epoch": 2.620872617444186, "grad_norm": 3.446322441101074, "learning_rate": 1.5692209541478547e-05, "loss": 0.7653, "step": 16055 }, { "epoch": 2.6210358760866903, "grad_norm": 2.5608415603637695, "learning_rate": 1.5691682393233113e-05, "loss": 0.526, "step": 16056 }, { "epoch": 2.6211991347291947, "grad_norm": 3.064873456954956, "learning_rate": 1.569115522159132e-05, "loss": 0.6293, "step": 16057 }, { "epoch": 2.621362393371699, "grad_norm": 2.998558282852173, "learning_rate": 1.569062802655534e-05, "loss": 0.6513, "step": 16058 }, { "epoch": 2.6215256520142036, "grad_norm": 2.653451919555664, "learning_rate": 1.5690100808127336e-05, "loss": 0.6432, "step": 16059 }, { "epoch": 2.621688910656708, "grad_norm": 3.0797119140625, "learning_rate": 1.5689573566309473e-05, "loss": 0.6304, "step": 16060 }, { "epoch": 2.6218521692992125, "grad_norm": 2.5493581295013428, "learning_rate": 1.5689046301103924e-05, "loss": 0.5721, "step": 16061 }, { "epoch": 2.622015427941717, "grad_norm": 2.858588695526123, "learning_rate": 1.5688519012512854e-05, "loss": 0.5536, "step": 16062 }, { "epoch": 2.622178686584221, "grad_norm": 2.848618507385254, "learning_rate": 1.568799170053843e-05, "loss": 0.5952, "step": 16063 }, { "epoch": 2.6223419452267254, "grad_norm": 2.742619037628174, "learning_rate": 1.5687464365182822e-05, "loss": 0.5378, "step": 16064 }, { "epoch": 2.62250520386923, "grad_norm": 2.7487378120422363, "learning_rate": 1.5686937006448193e-05, "loss": 0.4895, "step": 16065 }, { "epoch": 2.6226684625117342, "grad_norm": 2.291301727294922, "learning_rate": 1.5686409624336714e-05, "loss": 0.4572, "step": 16066 }, { "epoch": 2.6228317211542387, "grad_norm": 2.7075157165527344, "learning_rate": 1.568588221885055e-05, "loss": 0.5199, "step": 16067 }, { "epoch": 2.622994979796743, "grad_norm": 3.1491708755493164, "learning_rate": 1.5685354789991873e-05, "loss": 0.6179, "step": 16068 }, { "epoch": 2.623158238439247, "grad_norm": 2.6051740646362305, "learning_rate": 1.568482733776285e-05, "loss": 0.5286, "step": 16069 }, { "epoch": 2.6233214970817516, "grad_norm": 2.6344616413116455, "learning_rate": 1.5684299862165647e-05, "loss": 0.5788, "step": 16070 }, { "epoch": 2.623484755724256, "grad_norm": 3.10646390914917, "learning_rate": 1.5683772363202436e-05, "loss": 0.6933, "step": 16071 }, { "epoch": 2.6236480143667604, "grad_norm": 2.439910411834717, "learning_rate": 1.568324484087538e-05, "loss": 0.5554, "step": 16072 }, { "epoch": 2.623811273009265, "grad_norm": 2.815323829650879, "learning_rate": 1.568271729518665e-05, "loss": 0.5547, "step": 16073 }, { "epoch": 2.6239745316517693, "grad_norm": 2.9920947551727295, "learning_rate": 1.568218972613842e-05, "loss": 0.6426, "step": 16074 }, { "epoch": 2.6241377902942737, "grad_norm": 2.951927423477173, "learning_rate": 1.5681662133732847e-05, "loss": 0.5582, "step": 16075 }, { "epoch": 2.624301048936778, "grad_norm": 2.947483777999878, "learning_rate": 1.568113451797211e-05, "loss": 0.5519, "step": 16076 }, { "epoch": 2.6244643075792826, "grad_norm": 2.790745258331299, "learning_rate": 1.5680606878858374e-05, "loss": 0.5623, "step": 16077 }, { "epoch": 2.624627566221787, "grad_norm": 2.4662530422210693, "learning_rate": 1.568007921639381e-05, "loss": 0.4851, "step": 16078 }, { "epoch": 2.6247908248642915, "grad_norm": 2.8564209938049316, "learning_rate": 1.5679551530580582e-05, "loss": 0.5637, "step": 16079 }, { "epoch": 2.6249540835067955, "grad_norm": 2.889784336090088, "learning_rate": 1.5679023821420863e-05, "loss": 0.5448, "step": 16080 }, { "epoch": 2.6251173421493, "grad_norm": 2.9712915420532227, "learning_rate": 1.567849608891682e-05, "loss": 0.5925, "step": 16081 }, { "epoch": 2.6252806007918044, "grad_norm": 3.208000898361206, "learning_rate": 1.5677968333070625e-05, "loss": 0.5501, "step": 16082 }, { "epoch": 2.625443859434309, "grad_norm": 2.861172914505005, "learning_rate": 1.567744055388445e-05, "loss": 0.5875, "step": 16083 }, { "epoch": 2.6256071180768132, "grad_norm": 2.992921829223633, "learning_rate": 1.5676912751360456e-05, "loss": 0.6221, "step": 16084 }, { "epoch": 2.6257703767193177, "grad_norm": 2.613907814025879, "learning_rate": 1.567638492550082e-05, "loss": 0.5369, "step": 16085 }, { "epoch": 2.625933635361822, "grad_norm": 2.8543758392333984, "learning_rate": 1.5675857076307703e-05, "loss": 0.5004, "step": 16086 }, { "epoch": 2.626096894004326, "grad_norm": 2.4538557529449463, "learning_rate": 1.5675329203783288e-05, "loss": 0.4915, "step": 16087 }, { "epoch": 2.6262601526468305, "grad_norm": 2.366913080215454, "learning_rate": 1.567480130792973e-05, "loss": 0.4946, "step": 16088 }, { "epoch": 2.626423411289335, "grad_norm": 2.746054172515869, "learning_rate": 1.5674273388749216e-05, "loss": 0.4944, "step": 16089 }, { "epoch": 2.6265866699318394, "grad_norm": 2.553696870803833, "learning_rate": 1.56737454462439e-05, "loss": 0.4947, "step": 16090 }, { "epoch": 2.626749928574344, "grad_norm": 3.1054604053497314, "learning_rate": 1.567321748041596e-05, "loss": 0.5457, "step": 16091 }, { "epoch": 2.6269131872168483, "grad_norm": 2.9293887615203857, "learning_rate": 1.567268949126757e-05, "loss": 0.6411, "step": 16092 }, { "epoch": 2.6270764458593527, "grad_norm": 2.7913243770599365, "learning_rate": 1.567216147880089e-05, "loss": 0.4344, "step": 16093 }, { "epoch": 2.627239704501857, "grad_norm": 2.881101608276367, "learning_rate": 1.5671633443018094e-05, "loss": 0.6819, "step": 16094 }, { "epoch": 2.6274029631443616, "grad_norm": 2.8922245502471924, "learning_rate": 1.567110538392136e-05, "loss": 0.5471, "step": 16095 }, { "epoch": 2.627566221786866, "grad_norm": 2.9692599773406982, "learning_rate": 1.567057730151285e-05, "loss": 0.533, "step": 16096 }, { "epoch": 2.6277294804293705, "grad_norm": 2.614724636077881, "learning_rate": 1.567004919579474e-05, "loss": 0.5693, "step": 16097 }, { "epoch": 2.6278927390718745, "grad_norm": 2.9941370487213135, "learning_rate": 1.5669521066769195e-05, "loss": 0.6228, "step": 16098 }, { "epoch": 2.628055997714379, "grad_norm": 3.0153708457946777, "learning_rate": 1.5668992914438393e-05, "loss": 0.5952, "step": 16099 }, { "epoch": 2.6282192563568834, "grad_norm": 3.078760862350464, "learning_rate": 1.56684647388045e-05, "loss": 0.5942, "step": 16100 }, { "epoch": 2.628382514999388, "grad_norm": 2.999995231628418, "learning_rate": 1.566793653986969e-05, "loss": 0.5584, "step": 16101 }, { "epoch": 2.6285457736418922, "grad_norm": 2.783806562423706, "learning_rate": 1.5667408317636132e-05, "loss": 0.5131, "step": 16102 }, { "epoch": 2.6287090322843967, "grad_norm": 2.980015993118286, "learning_rate": 1.5666880072106e-05, "loss": 0.5453, "step": 16103 }, { "epoch": 2.6288722909269007, "grad_norm": 3.16812801361084, "learning_rate": 1.566635180328146e-05, "loss": 0.6009, "step": 16104 }, { "epoch": 2.629035549569405, "grad_norm": 2.5196118354797363, "learning_rate": 1.5665823511164693e-05, "loss": 0.4673, "step": 16105 }, { "epoch": 2.6291988082119095, "grad_norm": 3.258016586303711, "learning_rate": 1.5665295195757863e-05, "loss": 0.6513, "step": 16106 }, { "epoch": 2.629362066854414, "grad_norm": 3.0387349128723145, "learning_rate": 1.5664766857063142e-05, "loss": 0.5505, "step": 16107 }, { "epoch": 2.6295253254969184, "grad_norm": 2.6917192935943604, "learning_rate": 1.5664238495082703e-05, "loss": 0.5191, "step": 16108 }, { "epoch": 2.629688584139423, "grad_norm": 2.589064598083496, "learning_rate": 1.566371010981872e-05, "loss": 0.5571, "step": 16109 }, { "epoch": 2.6298518427819273, "grad_norm": 3.0777289867401123, "learning_rate": 1.5663181701273364e-05, "loss": 0.6176, "step": 16110 }, { "epoch": 2.6300151014244317, "grad_norm": 2.8012962341308594, "learning_rate": 1.5662653269448807e-05, "loss": 0.6394, "step": 16111 }, { "epoch": 2.630178360066936, "grad_norm": 2.4782919883728027, "learning_rate": 1.5662124814347216e-05, "loss": 0.4708, "step": 16112 }, { "epoch": 2.6303416187094406, "grad_norm": 2.6830952167510986, "learning_rate": 1.5661596335970772e-05, "loss": 0.5694, "step": 16113 }, { "epoch": 2.630504877351945, "grad_norm": 2.6187996864318848, "learning_rate": 1.566106783432164e-05, "loss": 0.6432, "step": 16114 }, { "epoch": 2.630668135994449, "grad_norm": 2.683021306991577, "learning_rate": 1.5660539309402e-05, "loss": 0.5221, "step": 16115 }, { "epoch": 2.6308313946369535, "grad_norm": 2.6337528228759766, "learning_rate": 1.566001076121402e-05, "loss": 0.5965, "step": 16116 }, { "epoch": 2.630994653279458, "grad_norm": 2.9008407592773438, "learning_rate": 1.565948218975987e-05, "loss": 0.616, "step": 16117 }, { "epoch": 2.6311579119219624, "grad_norm": 2.8950085639953613, "learning_rate": 1.5658953595041725e-05, "loss": 0.5652, "step": 16118 }, { "epoch": 2.631321170564467, "grad_norm": 2.7181472778320312, "learning_rate": 1.565842497706176e-05, "loss": 0.5287, "step": 16119 }, { "epoch": 2.6314844292069712, "grad_norm": 2.780224084854126, "learning_rate": 1.5657896335822147e-05, "loss": 0.5126, "step": 16120 }, { "epoch": 2.6316476878494757, "grad_norm": 3.0927836894989014, "learning_rate": 1.565736767132506e-05, "loss": 0.7106, "step": 16121 }, { "epoch": 2.6318109464919797, "grad_norm": 2.4128637313842773, "learning_rate": 1.5656838983572666e-05, "loss": 0.5308, "step": 16122 }, { "epoch": 2.631974205134484, "grad_norm": 2.5569510459899902, "learning_rate": 1.5656310272567144e-05, "loss": 0.5113, "step": 16123 }, { "epoch": 2.6321374637769885, "grad_norm": 2.5468814373016357, "learning_rate": 1.565578153831067e-05, "loss": 0.5183, "step": 16124 }, { "epoch": 2.632300722419493, "grad_norm": 2.9572324752807617, "learning_rate": 1.5655252780805414e-05, "loss": 0.5984, "step": 16125 }, { "epoch": 2.6324639810619974, "grad_norm": 3.431824207305908, "learning_rate": 1.5654724000053546e-05, "loss": 0.6756, "step": 16126 }, { "epoch": 2.632627239704502, "grad_norm": 3.4141557216644287, "learning_rate": 1.5654195196057244e-05, "loss": 0.7047, "step": 16127 }, { "epoch": 2.6327904983470063, "grad_norm": 2.2879440784454346, "learning_rate": 1.565366636881868e-05, "loss": 0.3962, "step": 16128 }, { "epoch": 2.6329537569895107, "grad_norm": 3.071260690689087, "learning_rate": 1.565313751834003e-05, "loss": 0.5823, "step": 16129 }, { "epoch": 2.633117015632015, "grad_norm": 2.929016590118408, "learning_rate": 1.5652608644623462e-05, "loss": 0.6338, "step": 16130 }, { "epoch": 2.6332802742745196, "grad_norm": 3.075929641723633, "learning_rate": 1.565207974767116e-05, "loss": 0.6352, "step": 16131 }, { "epoch": 2.633443532917024, "grad_norm": 2.8996968269348145, "learning_rate": 1.5651550827485288e-05, "loss": 0.5477, "step": 16132 }, { "epoch": 2.633606791559528, "grad_norm": 2.651474714279175, "learning_rate": 1.5651021884068023e-05, "loss": 0.5932, "step": 16133 }, { "epoch": 2.6337700502020325, "grad_norm": 2.8467860221862793, "learning_rate": 1.5650492917421548e-05, "loss": 0.5125, "step": 16134 }, { "epoch": 2.633933308844537, "grad_norm": 3.642706871032715, "learning_rate": 1.5649963927548023e-05, "loss": 0.718, "step": 16135 }, { "epoch": 2.6340965674870414, "grad_norm": 3.027461528778076, "learning_rate": 1.5649434914449634e-05, "loss": 0.6598, "step": 16136 }, { "epoch": 2.634259826129546, "grad_norm": 2.6306986808776855, "learning_rate": 1.5648905878128552e-05, "loss": 0.5518, "step": 16137 }, { "epoch": 2.6344230847720502, "grad_norm": 2.8354568481445312, "learning_rate": 1.5648376818586947e-05, "loss": 0.5217, "step": 16138 }, { "epoch": 2.6345863434145547, "grad_norm": 3.442659378051758, "learning_rate": 1.5647847735827003e-05, "loss": 0.6058, "step": 16139 }, { "epoch": 2.6347496020570587, "grad_norm": 3.3266427516937256, "learning_rate": 1.5647318629850885e-05, "loss": 0.717, "step": 16140 }, { "epoch": 2.634912860699563, "grad_norm": 2.7990643978118896, "learning_rate": 1.5646789500660772e-05, "loss": 0.5874, "step": 16141 }, { "epoch": 2.6350761193420675, "grad_norm": 2.7094295024871826, "learning_rate": 1.564626034825884e-05, "loss": 0.5489, "step": 16142 }, { "epoch": 2.635239377984572, "grad_norm": 2.434445858001709, "learning_rate": 1.5645731172647266e-05, "loss": 0.4277, "step": 16143 }, { "epoch": 2.6354026366270764, "grad_norm": 3.1214277744293213, "learning_rate": 1.5645201973828223e-05, "loss": 0.5837, "step": 16144 }, { "epoch": 2.635565895269581, "grad_norm": 3.4504878520965576, "learning_rate": 1.5644672751803885e-05, "loss": 0.6884, "step": 16145 }, { "epoch": 2.6357291539120853, "grad_norm": 3.361609935760498, "learning_rate": 1.564414350657643e-05, "loss": 0.607, "step": 16146 }, { "epoch": 2.6358924125545897, "grad_norm": 3.1417768001556396, "learning_rate": 1.5643614238148028e-05, "loss": 0.5122, "step": 16147 }, { "epoch": 2.636055671197094, "grad_norm": 3.4078621864318848, "learning_rate": 1.5643084946520862e-05, "loss": 0.5641, "step": 16148 }, { "epoch": 2.6362189298395986, "grad_norm": 2.6684906482696533, "learning_rate": 1.5642555631697106e-05, "loss": 0.5415, "step": 16149 }, { "epoch": 2.636382188482103, "grad_norm": 3.0384719371795654, "learning_rate": 1.5642026293678935e-05, "loss": 0.5926, "step": 16150 }, { "epoch": 2.636545447124607, "grad_norm": 3.4960222244262695, "learning_rate": 1.564149693246852e-05, "loss": 0.6532, "step": 16151 }, { "epoch": 2.6367087057671115, "grad_norm": 2.88771915435791, "learning_rate": 1.5640967548068045e-05, "loss": 0.5209, "step": 16152 }, { "epoch": 2.636871964409616, "grad_norm": 3.3312325477600098, "learning_rate": 1.564043814047968e-05, "loss": 0.6537, "step": 16153 }, { "epoch": 2.6370352230521203, "grad_norm": 3.457732915878296, "learning_rate": 1.5639908709705604e-05, "loss": 0.7353, "step": 16154 }, { "epoch": 2.637198481694625, "grad_norm": 2.9597935676574707, "learning_rate": 1.5639379255747992e-05, "loss": 0.568, "step": 16155 }, { "epoch": 2.6373617403371292, "grad_norm": 3.4980509281158447, "learning_rate": 1.5638849778609023e-05, "loss": 0.5934, "step": 16156 }, { "epoch": 2.637524998979633, "grad_norm": 3.297931432723999, "learning_rate": 1.5638320278290873e-05, "loss": 0.6717, "step": 16157 }, { "epoch": 2.6376882576221377, "grad_norm": 3.1114718914031982, "learning_rate": 1.5637790754795715e-05, "loss": 0.6224, "step": 16158 }, { "epoch": 2.637851516264642, "grad_norm": 3.4544384479522705, "learning_rate": 1.563726120812573e-05, "loss": 0.6415, "step": 16159 }, { "epoch": 2.6380147749071465, "grad_norm": 2.5801565647125244, "learning_rate": 1.563673163828309e-05, "loss": 0.5469, "step": 16160 }, { "epoch": 2.638178033549651, "grad_norm": 3.0110385417938232, "learning_rate": 1.5636202045269977e-05, "loss": 0.6123, "step": 16161 }, { "epoch": 2.6383412921921554, "grad_norm": 2.844363212585449, "learning_rate": 1.5635672429088565e-05, "loss": 0.5944, "step": 16162 }, { "epoch": 2.63850455083466, "grad_norm": 2.82719349861145, "learning_rate": 1.563514278974103e-05, "loss": 0.5218, "step": 16163 }, { "epoch": 2.6386678094771643, "grad_norm": 3.097468376159668, "learning_rate": 1.5634613127229553e-05, "loss": 0.5657, "step": 16164 }, { "epoch": 2.6388310681196687, "grad_norm": 3.245123863220215, "learning_rate": 1.563408344155631e-05, "loss": 0.687, "step": 16165 }, { "epoch": 2.638994326762173, "grad_norm": 2.5968289375305176, "learning_rate": 1.5633553732723473e-05, "loss": 0.4829, "step": 16166 }, { "epoch": 2.6391575854046776, "grad_norm": 2.8651018142700195, "learning_rate": 1.563302400073323e-05, "loss": 0.6124, "step": 16167 }, { "epoch": 2.6393208440471816, "grad_norm": 2.9503173828125, "learning_rate": 1.5632494245587747e-05, "loss": 0.6105, "step": 16168 }, { "epoch": 2.639484102689686, "grad_norm": 2.970113754272461, "learning_rate": 1.563196446728921e-05, "loss": 0.6266, "step": 16169 }, { "epoch": 2.6396473613321905, "grad_norm": 2.749344825744629, "learning_rate": 1.5631434665839793e-05, "loss": 0.5891, "step": 16170 }, { "epoch": 2.639810619974695, "grad_norm": 3.1888906955718994, "learning_rate": 1.5630904841241672e-05, "loss": 0.6908, "step": 16171 }, { "epoch": 2.6399738786171993, "grad_norm": 2.654916524887085, "learning_rate": 1.5630374993497027e-05, "loss": 0.5783, "step": 16172 }, { "epoch": 2.640137137259704, "grad_norm": 3.1508805751800537, "learning_rate": 1.562984512260804e-05, "loss": 0.5398, "step": 16173 }, { "epoch": 2.640300395902208, "grad_norm": 3.0345191955566406, "learning_rate": 1.5629315228576886e-05, "loss": 0.5938, "step": 16174 }, { "epoch": 2.640463654544712, "grad_norm": 2.668532609939575, "learning_rate": 1.5628785311405738e-05, "loss": 0.5088, "step": 16175 }, { "epoch": 2.6406269131872167, "grad_norm": 3.0173959732055664, "learning_rate": 1.5628255371096784e-05, "loss": 0.6168, "step": 16176 }, { "epoch": 2.640790171829721, "grad_norm": 3.5405123233795166, "learning_rate": 1.562772540765219e-05, "loss": 0.7082, "step": 16177 }, { "epoch": 2.6409534304722255, "grad_norm": 2.715792655944824, "learning_rate": 1.5627195421074154e-05, "loss": 0.5488, "step": 16178 }, { "epoch": 2.64111668911473, "grad_norm": 2.7219932079315186, "learning_rate": 1.5626665411364836e-05, "loss": 0.5168, "step": 16179 }, { "epoch": 2.6412799477572344, "grad_norm": 2.4105236530303955, "learning_rate": 1.562613537852642e-05, "loss": 0.554, "step": 16180 }, { "epoch": 2.641443206399739, "grad_norm": 2.9181830883026123, "learning_rate": 1.5625605322561085e-05, "loss": 0.5408, "step": 16181 }, { "epoch": 2.6416064650422433, "grad_norm": 2.5747575759887695, "learning_rate": 1.562507524347101e-05, "loss": 0.474, "step": 16182 }, { "epoch": 2.6417697236847477, "grad_norm": 2.6793341636657715, "learning_rate": 1.562454514125838e-05, "loss": 0.5456, "step": 16183 }, { "epoch": 2.641932982327252, "grad_norm": 2.7076222896575928, "learning_rate": 1.5624015015925364e-05, "loss": 0.5927, "step": 16184 }, { "epoch": 2.6420962409697566, "grad_norm": 3.7121174335479736, "learning_rate": 1.5623484867474146e-05, "loss": 0.6848, "step": 16185 }, { "epoch": 2.6422594996122606, "grad_norm": 3.3015222549438477, "learning_rate": 1.5622954695906908e-05, "loss": 0.6986, "step": 16186 }, { "epoch": 2.642422758254765, "grad_norm": 3.121563673019409, "learning_rate": 1.5622424501225824e-05, "loss": 0.6492, "step": 16187 }, { "epoch": 2.6425860168972695, "grad_norm": 3.0459823608398438, "learning_rate": 1.5621894283433078e-05, "loss": 0.4913, "step": 16188 }, { "epoch": 2.642749275539774, "grad_norm": 3.034569263458252, "learning_rate": 1.5621364042530846e-05, "loss": 0.637, "step": 16189 }, { "epoch": 2.6429125341822783, "grad_norm": 2.47361159324646, "learning_rate": 1.5620833778521306e-05, "loss": 0.4865, "step": 16190 }, { "epoch": 2.6430757928247828, "grad_norm": 2.886033773422241, "learning_rate": 1.5620303491406644e-05, "loss": 0.6383, "step": 16191 }, { "epoch": 2.643239051467287, "grad_norm": 2.773540735244751, "learning_rate": 1.5619773181189037e-05, "loss": 0.6453, "step": 16192 }, { "epoch": 2.643402310109791, "grad_norm": 2.5779309272766113, "learning_rate": 1.5619242847870662e-05, "loss": 0.4941, "step": 16193 }, { "epoch": 2.6435655687522956, "grad_norm": 2.927922010421753, "learning_rate": 1.5618712491453706e-05, "loss": 0.642, "step": 16194 }, { "epoch": 2.6437288273948, "grad_norm": 3.108492374420166, "learning_rate": 1.561818211194034e-05, "loss": 0.6489, "step": 16195 }, { "epoch": 2.6438920860373045, "grad_norm": 2.5638253688812256, "learning_rate": 1.561765170933275e-05, "loss": 0.5748, "step": 16196 }, { "epoch": 2.644055344679809, "grad_norm": 2.8988728523254395, "learning_rate": 1.5617121283633117e-05, "loss": 0.6066, "step": 16197 }, { "epoch": 2.6442186033223134, "grad_norm": 2.9018778800964355, "learning_rate": 1.561659083484362e-05, "loss": 0.5729, "step": 16198 }, { "epoch": 2.644381861964818, "grad_norm": 3.4657864570617676, "learning_rate": 1.5616060362966434e-05, "loss": 0.6787, "step": 16199 }, { "epoch": 2.6445451206073223, "grad_norm": 2.782231092453003, "learning_rate": 1.561552986800375e-05, "loss": 0.6171, "step": 16200 }, { "epoch": 2.6447083792498267, "grad_norm": 2.817578077316284, "learning_rate": 1.5614999349957736e-05, "loss": 0.5302, "step": 16201 }, { "epoch": 2.644871637892331, "grad_norm": 2.647726058959961, "learning_rate": 1.5614468808830586e-05, "loss": 0.5633, "step": 16202 }, { "epoch": 2.6450348965348356, "grad_norm": 2.4660584926605225, "learning_rate": 1.5613938244624474e-05, "loss": 0.497, "step": 16203 }, { "epoch": 2.6451981551773396, "grad_norm": 3.215200185775757, "learning_rate": 1.561340765734158e-05, "loss": 0.6592, "step": 16204 }, { "epoch": 2.645361413819844, "grad_norm": 2.56967830657959, "learning_rate": 1.561287704698409e-05, "loss": 0.4888, "step": 16205 }, { "epoch": 2.6455246724623485, "grad_norm": 2.968871831893921, "learning_rate": 1.5612346413554176e-05, "loss": 0.6086, "step": 16206 }, { "epoch": 2.645687931104853, "grad_norm": 3.181168794631958, "learning_rate": 1.561181575705403e-05, "loss": 0.9002, "step": 16207 }, { "epoch": 2.6458511897473573, "grad_norm": 2.9987568855285645, "learning_rate": 1.561128507748583e-05, "loss": 0.5511, "step": 16208 }, { "epoch": 2.6460144483898618, "grad_norm": 2.69291615486145, "learning_rate": 1.5610754374851748e-05, "loss": 0.4884, "step": 16209 }, { "epoch": 2.6461777070323658, "grad_norm": 2.8560774326324463, "learning_rate": 1.561022364915398e-05, "loss": 0.6103, "step": 16210 }, { "epoch": 2.64634096567487, "grad_norm": 1.9956538677215576, "learning_rate": 1.5609692900394694e-05, "loss": 0.3815, "step": 16211 }, { "epoch": 2.6465042243173746, "grad_norm": 2.894031047821045, "learning_rate": 1.5609162128576087e-05, "loss": 0.5956, "step": 16212 }, { "epoch": 2.646667482959879, "grad_norm": 2.9554004669189453, "learning_rate": 1.560863133370033e-05, "loss": 0.5179, "step": 16213 }, { "epoch": 2.6468307416023835, "grad_norm": 2.6143765449523926, "learning_rate": 1.5608100515769607e-05, "loss": 0.4997, "step": 16214 }, { "epoch": 2.646994000244888, "grad_norm": 2.884341239929199, "learning_rate": 1.56075696747861e-05, "loss": 0.5784, "step": 16215 }, { "epoch": 2.6471572588873924, "grad_norm": 2.919243574142456, "learning_rate": 1.5607038810751987e-05, "loss": 0.5886, "step": 16216 }, { "epoch": 2.647320517529897, "grad_norm": 2.4828553199768066, "learning_rate": 1.560650792366946e-05, "loss": 0.4873, "step": 16217 }, { "epoch": 2.6474837761724013, "grad_norm": 2.657397508621216, "learning_rate": 1.5605977013540696e-05, "loss": 0.524, "step": 16218 }, { "epoch": 2.6476470348149057, "grad_norm": 2.7777984142303467, "learning_rate": 1.5605446080367876e-05, "loss": 0.5689, "step": 16219 }, { "epoch": 2.64781029345741, "grad_norm": 2.754491090774536, "learning_rate": 1.560491512415318e-05, "loss": 0.5502, "step": 16220 }, { "epoch": 2.647973552099914, "grad_norm": 2.9681873321533203, "learning_rate": 1.5604384144898798e-05, "loss": 0.4866, "step": 16221 }, { "epoch": 2.6481368107424186, "grad_norm": 3.103724241256714, "learning_rate": 1.560385314260691e-05, "loss": 0.5349, "step": 16222 }, { "epoch": 2.648300069384923, "grad_norm": 2.663616418838501, "learning_rate": 1.5603322117279698e-05, "loss": 0.5119, "step": 16223 }, { "epoch": 2.6484633280274275, "grad_norm": 2.5160741806030273, "learning_rate": 1.5602791068919342e-05, "loss": 0.5396, "step": 16224 }, { "epoch": 2.648626586669932, "grad_norm": 2.9537627696990967, "learning_rate": 1.5602259997528028e-05, "loss": 0.5736, "step": 16225 }, { "epoch": 2.6487898453124363, "grad_norm": 2.407376527786255, "learning_rate": 1.560172890310794e-05, "loss": 0.457, "step": 16226 }, { "epoch": 2.6489531039549408, "grad_norm": 3.7944395542144775, "learning_rate": 1.560119778566126e-05, "loss": 0.6426, "step": 16227 }, { "epoch": 2.6491163625974448, "grad_norm": 2.751192331314087, "learning_rate": 1.560066664519017e-05, "loss": 0.575, "step": 16228 }, { "epoch": 2.649279621239949, "grad_norm": 2.4072697162628174, "learning_rate": 1.5600135481696855e-05, "loss": 0.4228, "step": 16229 }, { "epoch": 2.6494428798824536, "grad_norm": 2.2607970237731934, "learning_rate": 1.5599604295183494e-05, "loss": 0.4715, "step": 16230 }, { "epoch": 2.649606138524958, "grad_norm": 3.45436429977417, "learning_rate": 1.5599073085652277e-05, "loss": 0.698, "step": 16231 }, { "epoch": 2.6497693971674625, "grad_norm": 3.1676387786865234, "learning_rate": 1.5598541853105386e-05, "loss": 0.5812, "step": 16232 }, { "epoch": 2.649932655809967, "grad_norm": 2.6869924068450928, "learning_rate": 1.5598010597545003e-05, "loss": 0.5376, "step": 16233 }, { "epoch": 2.6500959144524714, "grad_norm": 2.493520498275757, "learning_rate": 1.5597479318973306e-05, "loss": 0.4683, "step": 16234 }, { "epoch": 2.650259173094976, "grad_norm": 2.6626498699188232, "learning_rate": 1.559694801739249e-05, "loss": 0.5151, "step": 16235 }, { "epoch": 2.6504224317374803, "grad_norm": 3.077784776687622, "learning_rate": 1.5596416692804737e-05, "loss": 0.5441, "step": 16236 }, { "epoch": 2.6505856903799847, "grad_norm": 3.08743953704834, "learning_rate": 1.5595885345212228e-05, "loss": 0.5942, "step": 16237 }, { "epoch": 2.650748949022489, "grad_norm": 3.1944384574890137, "learning_rate": 1.5595353974617144e-05, "loss": 0.558, "step": 16238 }, { "epoch": 2.650912207664993, "grad_norm": 3.542699098587036, "learning_rate": 1.5594822581021673e-05, "loss": 1.2996, "step": 16239 }, { "epoch": 2.6510754663074976, "grad_norm": 3.0984315872192383, "learning_rate": 1.5594291164427997e-05, "loss": 0.6117, "step": 16240 }, { "epoch": 2.651238724950002, "grad_norm": 3.032731056213379, "learning_rate": 1.5593759724838305e-05, "loss": 0.6652, "step": 16241 }, { "epoch": 2.6514019835925065, "grad_norm": 2.6523265838623047, "learning_rate": 1.559322826225478e-05, "loss": 0.4827, "step": 16242 }, { "epoch": 2.651565242235011, "grad_norm": 2.9318385124206543, "learning_rate": 1.55926967766796e-05, "loss": 0.5747, "step": 16243 }, { "epoch": 2.6517285008775153, "grad_norm": 3.170403242111206, "learning_rate": 1.559216526811496e-05, "loss": 0.5936, "step": 16244 }, { "epoch": 2.6518917595200193, "grad_norm": 2.9231889247894287, "learning_rate": 1.559163373656304e-05, "loss": 0.6773, "step": 16245 }, { "epoch": 2.6520550181625238, "grad_norm": 2.711998224258423, "learning_rate": 1.5591102182026024e-05, "loss": 0.6341, "step": 16246 }, { "epoch": 2.652218276805028, "grad_norm": 2.8362479209899902, "learning_rate": 1.55905706045061e-05, "loss": 0.5678, "step": 16247 }, { "epoch": 2.6523815354475326, "grad_norm": 2.47808575630188, "learning_rate": 1.559003900400545e-05, "loss": 0.4921, "step": 16248 }, { "epoch": 2.652544794090037, "grad_norm": 3.005603075027466, "learning_rate": 1.558950738052626e-05, "loss": 0.6796, "step": 16249 }, { "epoch": 2.6527080527325415, "grad_norm": 2.6446070671081543, "learning_rate": 1.5588975734070717e-05, "loss": 0.5236, "step": 16250 }, { "epoch": 2.652871311375046, "grad_norm": 2.7900404930114746, "learning_rate": 1.5588444064641003e-05, "loss": 0.6248, "step": 16251 }, { "epoch": 2.6530345700175504, "grad_norm": 2.796889543533325, "learning_rate": 1.5587912372239308e-05, "loss": 0.6037, "step": 16252 }, { "epoch": 2.653197828660055, "grad_norm": 2.4737753868103027, "learning_rate": 1.5587380656867814e-05, "loss": 0.4887, "step": 16253 }, { "epoch": 2.6533610873025593, "grad_norm": 2.0975735187530518, "learning_rate": 1.5586848918528707e-05, "loss": 0.4249, "step": 16254 }, { "epoch": 2.6535243459450637, "grad_norm": 3.127129077911377, "learning_rate": 1.5586317157224175e-05, "loss": 0.625, "step": 16255 }, { "epoch": 2.6536876045875677, "grad_norm": 2.7572226524353027, "learning_rate": 1.5585785372956404e-05, "loss": 0.5417, "step": 16256 }, { "epoch": 2.653850863230072, "grad_norm": 2.5204899311065674, "learning_rate": 1.5585253565727576e-05, "loss": 0.52, "step": 16257 }, { "epoch": 2.6540141218725766, "grad_norm": 2.869617462158203, "learning_rate": 1.5584721735539878e-05, "loss": 0.5404, "step": 16258 }, { "epoch": 2.654177380515081, "grad_norm": 2.8122310638427734, "learning_rate": 1.55841898823955e-05, "loss": 0.568, "step": 16259 }, { "epoch": 2.6543406391575854, "grad_norm": 2.916501998901367, "learning_rate": 1.5583658006296626e-05, "loss": 0.5789, "step": 16260 }, { "epoch": 2.65450389780009, "grad_norm": 2.6749978065490723, "learning_rate": 1.5583126107245445e-05, "loss": 0.4339, "step": 16261 }, { "epoch": 2.6546671564425943, "grad_norm": 2.708463668823242, "learning_rate": 1.5582594185244136e-05, "loss": 0.568, "step": 16262 }, { "epoch": 2.6548304150850983, "grad_norm": 3.0168330669403076, "learning_rate": 1.558206224029489e-05, "loss": 0.54, "step": 16263 }, { "epoch": 2.6549936737276028, "grad_norm": 2.4575552940368652, "learning_rate": 1.55815302723999e-05, "loss": 0.5556, "step": 16264 }, { "epoch": 2.655156932370107, "grad_norm": 2.7729992866516113, "learning_rate": 1.5580998281561338e-05, "loss": 0.526, "step": 16265 }, { "epoch": 2.6553201910126116, "grad_norm": 2.8465540409088135, "learning_rate": 1.5580466267781404e-05, "loss": 0.59, "step": 16266 }, { "epoch": 2.655483449655116, "grad_norm": 2.507246255874634, "learning_rate": 1.5579934231062283e-05, "loss": 0.4934, "step": 16267 }, { "epoch": 2.6556467082976205, "grad_norm": 2.523075819015503, "learning_rate": 1.5579402171406154e-05, "loss": 0.4957, "step": 16268 }, { "epoch": 2.655809966940125, "grad_norm": 2.551417350769043, "learning_rate": 1.5578870088815215e-05, "loss": 0.5244, "step": 16269 }, { "epoch": 2.6559732255826294, "grad_norm": 3.198385715484619, "learning_rate": 1.5578337983291642e-05, "loss": 0.6058, "step": 16270 }, { "epoch": 2.656136484225134, "grad_norm": 2.488832950592041, "learning_rate": 1.557780585483763e-05, "loss": 0.5518, "step": 16271 }, { "epoch": 2.6562997428676383, "grad_norm": 3.146652936935425, "learning_rate": 1.5577273703455367e-05, "loss": 0.71, "step": 16272 }, { "epoch": 2.6564630015101427, "grad_norm": 2.638579845428467, "learning_rate": 1.5576741529147034e-05, "loss": 0.5479, "step": 16273 }, { "epoch": 2.6566262601526467, "grad_norm": 2.71262788772583, "learning_rate": 1.5576209331914824e-05, "loss": 0.5952, "step": 16274 }, { "epoch": 2.656789518795151, "grad_norm": 3.3215208053588867, "learning_rate": 1.5575677111760923e-05, "loss": 0.7481, "step": 16275 }, { "epoch": 2.6569527774376556, "grad_norm": 3.1730480194091797, "learning_rate": 1.557514486868752e-05, "loss": 0.5433, "step": 16276 }, { "epoch": 2.65711603608016, "grad_norm": 3.301913022994995, "learning_rate": 1.5574612602696798e-05, "loss": 0.655, "step": 16277 }, { "epoch": 2.6572792947226644, "grad_norm": 3.2314724922180176, "learning_rate": 1.557408031379095e-05, "loss": 0.6314, "step": 16278 }, { "epoch": 2.657442553365169, "grad_norm": 3.2519373893737793, "learning_rate": 1.5573548001972164e-05, "loss": 0.4738, "step": 16279 }, { "epoch": 2.6576058120076733, "grad_norm": 2.7016372680664062, "learning_rate": 1.5573015667242627e-05, "loss": 0.6296, "step": 16280 }, { "epoch": 2.6577690706501773, "grad_norm": 2.5511598587036133, "learning_rate": 1.5572483309604526e-05, "loss": 0.4339, "step": 16281 }, { "epoch": 2.6579323292926818, "grad_norm": 2.7308080196380615, "learning_rate": 1.5571950929060047e-05, "loss": 0.5909, "step": 16282 }, { "epoch": 2.658095587935186, "grad_norm": 2.5559637546539307, "learning_rate": 1.5571418525611388e-05, "loss": 0.5286, "step": 16283 }, { "epoch": 2.6582588465776906, "grad_norm": 2.578308343887329, "learning_rate": 1.5570886099260727e-05, "loss": 0.5012, "step": 16284 }, { "epoch": 2.658422105220195, "grad_norm": 2.730623245239258, "learning_rate": 1.557035365001026e-05, "loss": 0.5841, "step": 16285 }, { "epoch": 2.6585853638626995, "grad_norm": 3.081723928451538, "learning_rate": 1.556982117786217e-05, "loss": 0.5909, "step": 16286 }, { "epoch": 2.658748622505204, "grad_norm": 3.5561418533325195, "learning_rate": 1.5569288682818647e-05, "loss": 0.6723, "step": 16287 }, { "epoch": 2.6589118811477084, "grad_norm": 3.629436492919922, "learning_rate": 1.556875616488188e-05, "loss": 0.6854, "step": 16288 }, { "epoch": 2.659075139790213, "grad_norm": 2.8483946323394775, "learning_rate": 1.5568223624054062e-05, "loss": 0.5742, "step": 16289 }, { "epoch": 2.6592383984327173, "grad_norm": 3.0681912899017334, "learning_rate": 1.556769106033738e-05, "loss": 0.5571, "step": 16290 }, { "epoch": 2.6594016570752217, "grad_norm": 3.2010934352874756, "learning_rate": 1.5567158473734022e-05, "loss": 0.6379, "step": 16291 }, { "epoch": 2.6595649157177257, "grad_norm": 3.08369779586792, "learning_rate": 1.5566625864246177e-05, "loss": 0.639, "step": 16292 }, { "epoch": 2.65972817436023, "grad_norm": 2.487142324447632, "learning_rate": 1.5566093231876032e-05, "loss": 0.4655, "step": 16293 }, { "epoch": 2.6598914330027346, "grad_norm": 2.6252031326293945, "learning_rate": 1.5565560576625786e-05, "loss": 0.5158, "step": 16294 }, { "epoch": 2.660054691645239, "grad_norm": 3.336672782897949, "learning_rate": 1.5565027898497617e-05, "loss": 0.592, "step": 16295 }, { "epoch": 2.6602179502877434, "grad_norm": 2.6640524864196777, "learning_rate": 1.5564495197493724e-05, "loss": 0.471, "step": 16296 }, { "epoch": 2.660381208930248, "grad_norm": 2.966660976409912, "learning_rate": 1.556396247361629e-05, "loss": 0.5653, "step": 16297 }, { "epoch": 2.660544467572752, "grad_norm": 3.0072414875030518, "learning_rate": 1.5563429726867504e-05, "loss": 0.6419, "step": 16298 }, { "epoch": 2.6607077262152563, "grad_norm": 2.5147945880889893, "learning_rate": 1.556289695724956e-05, "loss": 0.5634, "step": 16299 }, { "epoch": 2.6608709848577607, "grad_norm": 2.9507079124450684, "learning_rate": 1.556236416476465e-05, "loss": 0.6295, "step": 16300 }, { "epoch": 2.661034243500265, "grad_norm": 2.283752202987671, "learning_rate": 1.556183134941496e-05, "loss": 0.5111, "step": 16301 }, { "epoch": 2.6611975021427696, "grad_norm": 3.0340161323547363, "learning_rate": 1.556129851120268e-05, "loss": 0.6812, "step": 16302 }, { "epoch": 2.661360760785274, "grad_norm": 2.633164882659912, "learning_rate": 1.5560765650130004e-05, "loss": 0.5535, "step": 16303 }, { "epoch": 2.6615240194277785, "grad_norm": 2.602189302444458, "learning_rate": 1.5560232766199117e-05, "loss": 0.6273, "step": 16304 }, { "epoch": 2.661687278070283, "grad_norm": 2.623504638671875, "learning_rate": 1.5559699859412215e-05, "loss": 0.59, "step": 16305 }, { "epoch": 2.6618505367127874, "grad_norm": 2.813441514968872, "learning_rate": 1.555916692977149e-05, "loss": 0.5564, "step": 16306 }, { "epoch": 2.662013795355292, "grad_norm": 2.4129295349121094, "learning_rate": 1.5558633977279124e-05, "loss": 0.5511, "step": 16307 }, { "epoch": 2.6621770539977962, "grad_norm": 2.7456254959106445, "learning_rate": 1.5558101001937312e-05, "loss": 0.6371, "step": 16308 }, { "epoch": 2.6623403126403002, "grad_norm": 3.0241434574127197, "learning_rate": 1.5557568003748247e-05, "loss": 0.6299, "step": 16309 }, { "epoch": 2.6625035712828047, "grad_norm": 2.644054889678955, "learning_rate": 1.555703498271412e-05, "loss": 0.5955, "step": 16310 }, { "epoch": 2.662666829925309, "grad_norm": 2.7463204860687256, "learning_rate": 1.5556501938837118e-05, "loss": 0.5772, "step": 16311 }, { "epoch": 2.6628300885678136, "grad_norm": 3.227848529815674, "learning_rate": 1.5555968872119435e-05, "loss": 0.5609, "step": 16312 }, { "epoch": 2.662993347210318, "grad_norm": 2.999812602996826, "learning_rate": 1.5555435782563263e-05, "loss": 0.5034, "step": 16313 }, { "epoch": 2.6631566058528224, "grad_norm": 2.2677128314971924, "learning_rate": 1.5554902670170792e-05, "loss": 0.521, "step": 16314 }, { "epoch": 2.663319864495327, "grad_norm": 2.3942813873291016, "learning_rate": 1.5554369534944213e-05, "loss": 0.5115, "step": 16315 }, { "epoch": 2.663483123137831, "grad_norm": 2.8883631229400635, "learning_rate": 1.555383637688572e-05, "loss": 0.6434, "step": 16316 }, { "epoch": 2.6636463817803353, "grad_norm": 2.88547682762146, "learning_rate": 1.55533031959975e-05, "loss": 0.546, "step": 16317 }, { "epoch": 2.6638096404228397, "grad_norm": 2.9021897315979004, "learning_rate": 1.5552769992281747e-05, "loss": 0.6161, "step": 16318 }, { "epoch": 2.663972899065344, "grad_norm": 3.314342498779297, "learning_rate": 1.5552236765740656e-05, "loss": 0.6247, "step": 16319 }, { "epoch": 2.6641361577078486, "grad_norm": 3.1884055137634277, "learning_rate": 1.5551703516376418e-05, "loss": 0.633, "step": 16320 }, { "epoch": 2.664299416350353, "grad_norm": 3.107823610305786, "learning_rate": 1.555117024419122e-05, "loss": 0.6457, "step": 16321 }, { "epoch": 2.6644626749928575, "grad_norm": 2.786942720413208, "learning_rate": 1.5550636949187253e-05, "loss": 0.5333, "step": 16322 }, { "epoch": 2.664625933635362, "grad_norm": 2.555973768234253, "learning_rate": 1.5550103631366718e-05, "loss": 0.4904, "step": 16323 }, { "epoch": 2.6647891922778664, "grad_norm": 2.825822353363037, "learning_rate": 1.5549570290731803e-05, "loss": 0.5831, "step": 16324 }, { "epoch": 2.664952450920371, "grad_norm": 2.8462064266204834, "learning_rate": 1.5549036927284703e-05, "loss": 0.5345, "step": 16325 }, { "epoch": 2.6651157095628752, "grad_norm": 2.6852686405181885, "learning_rate": 1.55485035410276e-05, "loss": 0.4864, "step": 16326 }, { "epoch": 2.6652789682053792, "grad_norm": 2.6388356685638428, "learning_rate": 1.55479701319627e-05, "loss": 0.5717, "step": 16327 }, { "epoch": 2.6654422268478837, "grad_norm": 2.683117151260376, "learning_rate": 1.5547436700092187e-05, "loss": 0.5021, "step": 16328 }, { "epoch": 2.665605485490388, "grad_norm": 3.389694929122925, "learning_rate": 1.5546903245418256e-05, "loss": 0.6128, "step": 16329 }, { "epoch": 2.6657687441328926, "grad_norm": 3.2573461532592773, "learning_rate": 1.5546369767943105e-05, "loss": 0.5587, "step": 16330 }, { "epoch": 2.665932002775397, "grad_norm": 2.8244423866271973, "learning_rate": 1.5545836267668914e-05, "loss": 0.5121, "step": 16331 }, { "epoch": 2.6660952614179014, "grad_norm": 2.8263251781463623, "learning_rate": 1.554530274459789e-05, "loss": 0.5813, "step": 16332 }, { "epoch": 2.6662585200604054, "grad_norm": 2.8957881927490234, "learning_rate": 1.5544769198732218e-05, "loss": 0.4997, "step": 16333 }, { "epoch": 2.66642177870291, "grad_norm": 2.558558225631714, "learning_rate": 1.5544235630074093e-05, "loss": 0.5589, "step": 16334 }, { "epoch": 2.6665850373454143, "grad_norm": 3.002424478530884, "learning_rate": 1.5543702038625716e-05, "loss": 0.5648, "step": 16335 }, { "epoch": 2.6667482959879187, "grad_norm": 2.1873176097869873, "learning_rate": 1.5543168424389263e-05, "loss": 0.4816, "step": 16336 }, { "epoch": 2.666911554630423, "grad_norm": 3.3615784645080566, "learning_rate": 1.5542634787366942e-05, "loss": 0.5719, "step": 16337 }, { "epoch": 2.6670748132729276, "grad_norm": 3.0965933799743652, "learning_rate": 1.5542101127560944e-05, "loss": 0.5241, "step": 16338 }, { "epoch": 2.667238071915432, "grad_norm": 3.220642328262329, "learning_rate": 1.5541567444973458e-05, "loss": 0.7254, "step": 16339 }, { "epoch": 2.6674013305579365, "grad_norm": 3.3489975929260254, "learning_rate": 1.5541033739606682e-05, "loss": 0.6357, "step": 16340 }, { "epoch": 2.667564589200441, "grad_norm": 2.760362386703491, "learning_rate": 1.554050001146281e-05, "loss": 0.5674, "step": 16341 }, { "epoch": 2.6677278478429454, "grad_norm": 2.9694809913635254, "learning_rate": 1.5539966260544032e-05, "loss": 0.5802, "step": 16342 }, { "epoch": 2.66789110648545, "grad_norm": 2.7636361122131348, "learning_rate": 1.5539432486852542e-05, "loss": 0.5734, "step": 16343 }, { "epoch": 2.668054365127954, "grad_norm": 2.6330020427703857, "learning_rate": 1.5538898690390544e-05, "loss": 0.5161, "step": 16344 }, { "epoch": 2.6682176237704582, "grad_norm": 2.66505765914917, "learning_rate": 1.5538364871160218e-05, "loss": 0.5206, "step": 16345 }, { "epoch": 2.6683808824129627, "grad_norm": 2.8474318981170654, "learning_rate": 1.553783102916377e-05, "loss": 0.4736, "step": 16346 }, { "epoch": 2.668544141055467, "grad_norm": 2.916325092315674, "learning_rate": 1.553729716440339e-05, "loss": 0.6413, "step": 16347 }, { "epoch": 2.6687073996979715, "grad_norm": 2.7060654163360596, "learning_rate": 1.553676327688127e-05, "loss": 0.5291, "step": 16348 }, { "epoch": 2.668870658340476, "grad_norm": 3.2520432472229004, "learning_rate": 1.5536229366599605e-05, "loss": 0.6398, "step": 16349 }, { "epoch": 2.6690339169829804, "grad_norm": 3.0364842414855957, "learning_rate": 1.5535695433560594e-05, "loss": 0.5938, "step": 16350 }, { "epoch": 2.6691971756254844, "grad_norm": 3.1778006553649902, "learning_rate": 1.553516147776643e-05, "loss": 0.6164, "step": 16351 }, { "epoch": 2.669360434267989, "grad_norm": 3.113103151321411, "learning_rate": 1.5534627499219307e-05, "loss": 0.6132, "step": 16352 }, { "epoch": 2.6695236929104933, "grad_norm": 2.536858320236206, "learning_rate": 1.5534093497921415e-05, "loss": 0.5072, "step": 16353 }, { "epoch": 2.6696869515529977, "grad_norm": 2.454350471496582, "learning_rate": 1.553355947387496e-05, "loss": 0.4207, "step": 16354 }, { "epoch": 2.669850210195502, "grad_norm": 3.1833319664001465, "learning_rate": 1.553302542708213e-05, "loss": 0.6038, "step": 16355 }, { "epoch": 2.6700134688380066, "grad_norm": 3.1726162433624268, "learning_rate": 1.5532491357545122e-05, "loss": 0.615, "step": 16356 }, { "epoch": 2.670176727480511, "grad_norm": 3.112489700317383, "learning_rate": 1.5531957265266134e-05, "loss": 0.5724, "step": 16357 }, { "epoch": 2.6703399861230155, "grad_norm": 3.121645927429199, "learning_rate": 1.5531423150247353e-05, "loss": 0.5909, "step": 16358 }, { "epoch": 2.67050324476552, "grad_norm": 2.787158966064453, "learning_rate": 1.5530889012490985e-05, "loss": 0.5275, "step": 16359 }, { "epoch": 2.6706665034080244, "grad_norm": 2.8626720905303955, "learning_rate": 1.5530354851999217e-05, "loss": 0.6851, "step": 16360 }, { "epoch": 2.670829762050529, "grad_norm": 3.7639596462249756, "learning_rate": 1.5529820668774247e-05, "loss": 0.6946, "step": 16361 }, { "epoch": 2.670993020693033, "grad_norm": 3.007473945617676, "learning_rate": 1.5529286462818273e-05, "loss": 0.5376, "step": 16362 }, { "epoch": 2.6711562793355372, "grad_norm": 2.3726966381073, "learning_rate": 1.5528752234133495e-05, "loss": 0.4486, "step": 16363 }, { "epoch": 2.6713195379780417, "grad_norm": 3.0234851837158203, "learning_rate": 1.55282179827221e-05, "loss": 0.5984, "step": 16364 }, { "epoch": 2.671482796620546, "grad_norm": 2.7301175594329834, "learning_rate": 1.5527683708586294e-05, "loss": 0.5378, "step": 16365 }, { "epoch": 2.6716460552630505, "grad_norm": 2.6906678676605225, "learning_rate": 1.552714941172826e-05, "loss": 0.5749, "step": 16366 }, { "epoch": 2.671809313905555, "grad_norm": 3.1450185775756836, "learning_rate": 1.5526615092150204e-05, "loss": 0.655, "step": 16367 }, { "epoch": 2.6719725725480594, "grad_norm": 3.308785915374756, "learning_rate": 1.552608074985432e-05, "loss": 0.7083, "step": 16368 }, { "epoch": 2.6721358311905634, "grad_norm": 2.9169960021972656, "learning_rate": 1.5525546384842807e-05, "loss": 0.6056, "step": 16369 }, { "epoch": 2.672299089833068, "grad_norm": 2.4284863471984863, "learning_rate": 1.552501199711786e-05, "loss": 0.4846, "step": 16370 }, { "epoch": 2.6724623484755723, "grad_norm": 3.5078718662261963, "learning_rate": 1.552447758668167e-05, "loss": 0.6275, "step": 16371 }, { "epoch": 2.6726256071180767, "grad_norm": 3.4786031246185303, "learning_rate": 1.552394315353644e-05, "loss": 0.6305, "step": 16372 }, { "epoch": 2.672788865760581, "grad_norm": 3.6294198036193848, "learning_rate": 1.5523408697684365e-05, "loss": 0.68, "step": 16373 }, { "epoch": 2.6729521244030856, "grad_norm": 2.7490603923797607, "learning_rate": 1.5522874219127646e-05, "loss": 0.6132, "step": 16374 }, { "epoch": 2.67311538304559, "grad_norm": 2.845259666442871, "learning_rate": 1.5522339717868475e-05, "loss": 0.5977, "step": 16375 }, { "epoch": 2.6732786416880945, "grad_norm": 2.7411446571350098, "learning_rate": 1.552180519390905e-05, "loss": 0.557, "step": 16376 }, { "epoch": 2.673441900330599, "grad_norm": 2.7813727855682373, "learning_rate": 1.552127064725157e-05, "loss": 0.5887, "step": 16377 }, { "epoch": 2.6736051589731034, "grad_norm": 3.3810184001922607, "learning_rate": 1.552073607789823e-05, "loss": 0.7652, "step": 16378 }, { "epoch": 2.673768417615608, "grad_norm": 3.2582311630249023, "learning_rate": 1.552020148585123e-05, "loss": 0.6173, "step": 16379 }, { "epoch": 2.673931676258112, "grad_norm": 2.5974574089050293, "learning_rate": 1.5519666871112763e-05, "loss": 0.5347, "step": 16380 }, { "epoch": 2.6740949349006162, "grad_norm": 3.340080738067627, "learning_rate": 1.5519132233685033e-05, "loss": 0.6347, "step": 16381 }, { "epoch": 2.6742581935431207, "grad_norm": 2.6874725818634033, "learning_rate": 1.5518597573570233e-05, "loss": 0.5643, "step": 16382 }, { "epoch": 2.674421452185625, "grad_norm": 3.05090594291687, "learning_rate": 1.5518062890770563e-05, "loss": 0.6613, "step": 16383 }, { "epoch": 2.6745847108281295, "grad_norm": 3.4283199310302734, "learning_rate": 1.551752818528822e-05, "loss": 0.7187, "step": 16384 }, { "epoch": 2.674747969470634, "grad_norm": 2.7354626655578613, "learning_rate": 1.55169934571254e-05, "loss": 0.5832, "step": 16385 }, { "epoch": 2.674911228113138, "grad_norm": 3.351813554763794, "learning_rate": 1.5516458706284306e-05, "loss": 0.6733, "step": 16386 }, { "epoch": 2.6750744867556424, "grad_norm": 2.6991732120513916, "learning_rate": 1.551592393276713e-05, "loss": 0.6538, "step": 16387 }, { "epoch": 2.675237745398147, "grad_norm": 2.445279121398926, "learning_rate": 1.5515389136576074e-05, "loss": 0.503, "step": 16388 }, { "epoch": 2.6754010040406513, "grad_norm": 2.6962106227874756, "learning_rate": 1.5514854317713338e-05, "loss": 0.6109, "step": 16389 }, { "epoch": 2.6755642626831557, "grad_norm": 3.064117670059204, "learning_rate": 1.5514319476181117e-05, "loss": 0.6167, "step": 16390 }, { "epoch": 2.67572752132566, "grad_norm": 3.1186678409576416, "learning_rate": 1.5513784611981613e-05, "loss": 0.5571, "step": 16391 }, { "epoch": 2.6758907799681646, "grad_norm": 2.5140089988708496, "learning_rate": 1.5513249725117018e-05, "loss": 0.4934, "step": 16392 }, { "epoch": 2.676054038610669, "grad_norm": 2.737375497817993, "learning_rate": 1.5512714815589536e-05, "loss": 0.53, "step": 16393 }, { "epoch": 2.6762172972531735, "grad_norm": 3.1537251472473145, "learning_rate": 1.5512179883401367e-05, "loss": 0.651, "step": 16394 }, { "epoch": 2.676380555895678, "grad_norm": 2.6481754779815674, "learning_rate": 1.5511644928554708e-05, "loss": 0.6098, "step": 16395 }, { "epoch": 2.6765438145381824, "grad_norm": 2.4939351081848145, "learning_rate": 1.5511109951051757e-05, "loss": 0.5691, "step": 16396 }, { "epoch": 2.6767070731806863, "grad_norm": 3.0444905757904053, "learning_rate": 1.5510574950894713e-05, "loss": 0.5728, "step": 16397 }, { "epoch": 2.676870331823191, "grad_norm": 2.8108832836151123, "learning_rate": 1.551003992808578e-05, "loss": 0.554, "step": 16398 }, { "epoch": 2.6770335904656952, "grad_norm": 3.020848035812378, "learning_rate": 1.550950488262715e-05, "loss": 0.68, "step": 16399 }, { "epoch": 2.6771968491081997, "grad_norm": 2.8143310546875, "learning_rate": 1.5508969814521026e-05, "loss": 0.484, "step": 16400 }, { "epoch": 2.677360107750704, "grad_norm": 3.000713586807251, "learning_rate": 1.5508434723769608e-05, "loss": 0.6309, "step": 16401 }, { "epoch": 2.6775233663932085, "grad_norm": 3.099153757095337, "learning_rate": 1.5507899610375098e-05, "loss": 0.6311, "step": 16402 }, { "epoch": 2.677686625035713, "grad_norm": 2.8263301849365234, "learning_rate": 1.5507364474339685e-05, "loss": 0.518, "step": 16403 }, { "epoch": 2.677849883678217, "grad_norm": 2.991957426071167, "learning_rate": 1.5506829315665582e-05, "loss": 0.5841, "step": 16404 }, { "epoch": 2.6780131423207214, "grad_norm": 3.057537317276001, "learning_rate": 1.550629413435498e-05, "loss": 0.565, "step": 16405 }, { "epoch": 2.678176400963226, "grad_norm": 2.343275785446167, "learning_rate": 1.5505758930410085e-05, "loss": 0.5131, "step": 16406 }, { "epoch": 2.6783396596057303, "grad_norm": 2.8876125812530518, "learning_rate": 1.550522370383309e-05, "loss": 0.6234, "step": 16407 }, { "epoch": 2.6785029182482347, "grad_norm": 2.54817271232605, "learning_rate": 1.5504688454626203e-05, "loss": 0.5277, "step": 16408 }, { "epoch": 2.678666176890739, "grad_norm": 2.48752760887146, "learning_rate": 1.550415318279162e-05, "loss": 0.5533, "step": 16409 }, { "epoch": 2.6788294355332436, "grad_norm": 2.809605360031128, "learning_rate": 1.5503617888331538e-05, "loss": 0.5528, "step": 16410 }, { "epoch": 2.678992694175748, "grad_norm": 2.643404960632324, "learning_rate": 1.5503082571248162e-05, "loss": 0.5036, "step": 16411 }, { "epoch": 2.6791559528182525, "grad_norm": 2.543135166168213, "learning_rate": 1.550254723154369e-05, "loss": 0.5387, "step": 16412 }, { "epoch": 2.679319211460757, "grad_norm": 3.3947081565856934, "learning_rate": 1.550201186922033e-05, "loss": 0.6922, "step": 16413 }, { "epoch": 2.6794824701032613, "grad_norm": 2.560781478881836, "learning_rate": 1.5501476484280274e-05, "loss": 0.5521, "step": 16414 }, { "epoch": 2.6796457287457653, "grad_norm": 2.6884095668792725, "learning_rate": 1.5500941076725723e-05, "loss": 0.5252, "step": 16415 }, { "epoch": 2.67980898738827, "grad_norm": 3.114649534225464, "learning_rate": 1.5500405646558885e-05, "loss": 0.6417, "step": 16416 }, { "epoch": 2.679972246030774, "grad_norm": 3.7646372318267822, "learning_rate": 1.5499870193781953e-05, "loss": 0.742, "step": 16417 }, { "epoch": 2.6801355046732787, "grad_norm": 3.1192402839660645, "learning_rate": 1.5499334718397136e-05, "loss": 0.6226, "step": 16418 }, { "epoch": 2.680298763315783, "grad_norm": 2.2794649600982666, "learning_rate": 1.549879922040662e-05, "loss": 0.4714, "step": 16419 }, { "epoch": 2.6804620219582875, "grad_norm": 3.6175014972686768, "learning_rate": 1.5498263699812627e-05, "loss": 0.7018, "step": 16420 }, { "epoch": 2.680625280600792, "grad_norm": 3.0880424976348877, "learning_rate": 1.5497728156617343e-05, "loss": 0.6903, "step": 16421 }, { "epoch": 2.680788539243296, "grad_norm": 2.4406421184539795, "learning_rate": 1.5497192590822973e-05, "loss": 0.4709, "step": 16422 }, { "epoch": 2.6809517978858004, "grad_norm": 2.780255079269409, "learning_rate": 1.5496657002431726e-05, "loss": 0.5751, "step": 16423 }, { "epoch": 2.681115056528305, "grad_norm": 2.787224054336548, "learning_rate": 1.5496121391445793e-05, "loss": 0.5886, "step": 16424 }, { "epoch": 2.6812783151708093, "grad_norm": 2.5487349033355713, "learning_rate": 1.549558575786738e-05, "loss": 0.4886, "step": 16425 }, { "epoch": 2.6814415738133137, "grad_norm": 3.183317184448242, "learning_rate": 1.5495050101698692e-05, "loss": 0.6433, "step": 16426 }, { "epoch": 2.681604832455818, "grad_norm": 3.009544610977173, "learning_rate": 1.5494514422941925e-05, "loss": 0.5558, "step": 16427 }, { "epoch": 2.6817680910983226, "grad_norm": 2.765568733215332, "learning_rate": 1.5493978721599288e-05, "loss": 0.562, "step": 16428 }, { "epoch": 2.681931349740827, "grad_norm": 2.6766750812530518, "learning_rate": 1.5493442997672974e-05, "loss": 0.5328, "step": 16429 }, { "epoch": 2.6820946083833315, "grad_norm": 2.7316737174987793, "learning_rate": 1.5492907251165192e-05, "loss": 0.6328, "step": 16430 }, { "epoch": 2.682257867025836, "grad_norm": 3.250465154647827, "learning_rate": 1.5492371482078145e-05, "loss": 0.5787, "step": 16431 }, { "epoch": 2.6824211256683403, "grad_norm": 2.3475453853607178, "learning_rate": 1.5491835690414026e-05, "loss": 0.445, "step": 16432 }, { "epoch": 2.6825843843108443, "grad_norm": 2.476186752319336, "learning_rate": 1.549129987617505e-05, "loss": 0.6003, "step": 16433 }, { "epoch": 2.6827476429533488, "grad_norm": 2.1903276443481445, "learning_rate": 1.5490764039363416e-05, "loss": 0.5, "step": 16434 }, { "epoch": 2.682910901595853, "grad_norm": 2.825183391571045, "learning_rate": 1.549022817998132e-05, "loss": 0.5273, "step": 16435 }, { "epoch": 2.6830741602383577, "grad_norm": 3.0895516872406006, "learning_rate": 1.5489692298030968e-05, "loss": 0.5131, "step": 16436 }, { "epoch": 2.683237418880862, "grad_norm": 2.4106924533843994, "learning_rate": 1.5489156393514567e-05, "loss": 0.4986, "step": 16437 }, { "epoch": 2.6834006775233665, "grad_norm": 2.962160587310791, "learning_rate": 1.5488620466434316e-05, "loss": 0.5224, "step": 16438 }, { "epoch": 2.6835639361658705, "grad_norm": 2.784074544906616, "learning_rate": 1.5488084516792417e-05, "loss": 0.5386, "step": 16439 }, { "epoch": 2.683727194808375, "grad_norm": 3.182520866394043, "learning_rate": 1.5487548544591077e-05, "loss": 0.6416, "step": 16440 }, { "epoch": 2.6838904534508794, "grad_norm": 3.2717185020446777, "learning_rate": 1.5487012549832493e-05, "loss": 0.6545, "step": 16441 }, { "epoch": 2.684053712093384, "grad_norm": 2.7402868270874023, "learning_rate": 1.5486476532518876e-05, "loss": 0.5735, "step": 16442 }, { "epoch": 2.6842169707358883, "grad_norm": 2.3850908279418945, "learning_rate": 1.5485940492652424e-05, "loss": 0.4537, "step": 16443 }, { "epoch": 2.6843802293783927, "grad_norm": 2.9238297939300537, "learning_rate": 1.5485404430235346e-05, "loss": 0.6273, "step": 16444 }, { "epoch": 2.684543488020897, "grad_norm": 2.436375379562378, "learning_rate": 1.5484868345269837e-05, "loss": 0.5358, "step": 16445 }, { "epoch": 2.6847067466634016, "grad_norm": 3.2325265407562256, "learning_rate": 1.5484332237758107e-05, "loss": 0.6172, "step": 16446 }, { "epoch": 2.684870005305906, "grad_norm": 2.81272554397583, "learning_rate": 1.5483796107702355e-05, "loss": 0.4759, "step": 16447 }, { "epoch": 2.6850332639484105, "grad_norm": 2.5394153594970703, "learning_rate": 1.5483259955104793e-05, "loss": 0.4823, "step": 16448 }, { "epoch": 2.685196522590915, "grad_norm": 2.6944289207458496, "learning_rate": 1.5482723779967616e-05, "loss": 0.5712, "step": 16449 }, { "epoch": 2.685359781233419, "grad_norm": 2.5254955291748047, "learning_rate": 1.5482187582293033e-05, "loss": 0.5774, "step": 16450 }, { "epoch": 2.6855230398759233, "grad_norm": 2.4829773902893066, "learning_rate": 1.5481651362083248e-05, "loss": 0.4809, "step": 16451 }, { "epoch": 2.6856862985184278, "grad_norm": 2.5765562057495117, "learning_rate": 1.5481115119340465e-05, "loss": 0.5572, "step": 16452 }, { "epoch": 2.685849557160932, "grad_norm": 3.0075933933258057, "learning_rate": 1.5480578854066884e-05, "loss": 0.5796, "step": 16453 }, { "epoch": 2.6860128158034366, "grad_norm": 2.7709124088287354, "learning_rate": 1.5480042566264716e-05, "loss": 0.5688, "step": 16454 }, { "epoch": 2.686176074445941, "grad_norm": 2.6970551013946533, "learning_rate": 1.5479506255936158e-05, "loss": 0.584, "step": 16455 }, { "epoch": 2.6863393330884455, "grad_norm": 2.8216636180877686, "learning_rate": 1.547896992308342e-05, "loss": 0.6062, "step": 16456 }, { "epoch": 2.6865025917309495, "grad_norm": 2.416428327560425, "learning_rate": 1.547843356770871e-05, "loss": 0.5289, "step": 16457 }, { "epoch": 2.686665850373454, "grad_norm": 3.267054557800293, "learning_rate": 1.5477897189814223e-05, "loss": 0.5679, "step": 16458 }, { "epoch": 2.6868291090159584, "grad_norm": 2.8573381900787354, "learning_rate": 1.5477360789402173e-05, "loss": 0.5939, "step": 16459 }, { "epoch": 2.686992367658463, "grad_norm": 3.0682945251464844, "learning_rate": 1.5476824366474757e-05, "loss": 0.6543, "step": 16460 }, { "epoch": 2.6871556263009673, "grad_norm": 4.425723552703857, "learning_rate": 1.5476287921034185e-05, "loss": 0.5395, "step": 16461 }, { "epoch": 2.6873188849434717, "grad_norm": 3.1447830200195312, "learning_rate": 1.5475751453082665e-05, "loss": 0.6996, "step": 16462 }, { "epoch": 2.687482143585976, "grad_norm": 3.552396535873413, "learning_rate": 1.5475214962622396e-05, "loss": 0.6851, "step": 16463 }, { "epoch": 2.6876454022284806, "grad_norm": 2.941739559173584, "learning_rate": 1.547467844965558e-05, "loss": 0.6029, "step": 16464 }, { "epoch": 2.687808660870985, "grad_norm": 2.254260540008545, "learning_rate": 1.547414191418444e-05, "loss": 0.4634, "step": 16465 }, { "epoch": 2.6879719195134895, "grad_norm": 2.8283281326293945, "learning_rate": 1.547360535621116e-05, "loss": 0.5833, "step": 16466 }, { "epoch": 2.688135178155994, "grad_norm": 2.739614248275757, "learning_rate": 1.547306877573796e-05, "loss": 0.5466, "step": 16467 }, { "epoch": 2.688298436798498, "grad_norm": 2.8006112575531006, "learning_rate": 1.547253217276704e-05, "loss": 0.5943, "step": 16468 }, { "epoch": 2.6884616954410023, "grad_norm": 2.657222270965576, "learning_rate": 1.54719955473006e-05, "loss": 0.5817, "step": 16469 }, { "epoch": 2.6886249540835068, "grad_norm": 2.488339424133301, "learning_rate": 1.547145889934086e-05, "loss": 0.5466, "step": 16470 }, { "epoch": 2.688788212726011, "grad_norm": 2.4802324771881104, "learning_rate": 1.547092222889002e-05, "loss": 0.5408, "step": 16471 }, { "epoch": 2.6889514713685156, "grad_norm": 3.4867970943450928, "learning_rate": 1.5470385535950278e-05, "loss": 0.6828, "step": 16472 }, { "epoch": 2.68911473001102, "grad_norm": 2.698514461517334, "learning_rate": 1.5469848820523852e-05, "loss": 0.523, "step": 16473 }, { "epoch": 2.689277988653524, "grad_norm": 2.983975410461426, "learning_rate": 1.546931208261294e-05, "loss": 0.5728, "step": 16474 }, { "epoch": 2.6894412472960285, "grad_norm": 2.892430305480957, "learning_rate": 1.546877532221975e-05, "loss": 0.5189, "step": 16475 }, { "epoch": 2.689604505938533, "grad_norm": 2.5956385135650635, "learning_rate": 1.546823853934649e-05, "loss": 0.4775, "step": 16476 }, { "epoch": 2.6897677645810374, "grad_norm": 2.7520034313201904, "learning_rate": 1.546770173399537e-05, "loss": 0.5157, "step": 16477 }, { "epoch": 2.689931023223542, "grad_norm": 3.2948014736175537, "learning_rate": 1.546716490616859e-05, "loss": 0.5335, "step": 16478 }, { "epoch": 2.6900942818660463, "grad_norm": 2.8385097980499268, "learning_rate": 1.546662805586836e-05, "loss": 0.5254, "step": 16479 }, { "epoch": 2.6902575405085507, "grad_norm": 3.0770232677459717, "learning_rate": 1.5466091183096884e-05, "loss": 0.5312, "step": 16480 }, { "epoch": 2.690420799151055, "grad_norm": 3.760134696960449, "learning_rate": 1.5465554287856375e-05, "loss": 0.6904, "step": 16481 }, { "epoch": 2.6905840577935596, "grad_norm": 2.7868523597717285, "learning_rate": 1.5465017370149036e-05, "loss": 0.5156, "step": 16482 }, { "epoch": 2.690747316436064, "grad_norm": 3.046247959136963, "learning_rate": 1.546448042997707e-05, "loss": 0.5906, "step": 16483 }, { "epoch": 2.6909105750785685, "grad_norm": 3.1891915798187256, "learning_rate": 1.5463943467342694e-05, "loss": 0.5743, "step": 16484 }, { "epoch": 2.6910738337210725, "grad_norm": 3.000478506088257, "learning_rate": 1.5463406482248103e-05, "loss": 0.6325, "step": 16485 }, { "epoch": 2.691237092363577, "grad_norm": 3.4687583446502686, "learning_rate": 1.5462869474695518e-05, "loss": 0.6821, "step": 16486 }, { "epoch": 2.6914003510060813, "grad_norm": 3.18247389793396, "learning_rate": 1.5462332444687134e-05, "loss": 0.5719, "step": 16487 }, { "epoch": 2.6915636096485858, "grad_norm": 3.1900899410247803, "learning_rate": 1.5461795392225164e-05, "loss": 0.6229, "step": 16488 }, { "epoch": 2.69172686829109, "grad_norm": 3.0254290103912354, "learning_rate": 1.546125831731182e-05, "loss": 0.5604, "step": 16489 }, { "epoch": 2.6918901269335946, "grad_norm": 3.220353841781616, "learning_rate": 1.54607212199493e-05, "loss": 0.6128, "step": 16490 }, { "epoch": 2.692053385576099, "grad_norm": 2.5294737815856934, "learning_rate": 1.5460184100139817e-05, "loss": 0.4536, "step": 16491 }, { "epoch": 2.692216644218603, "grad_norm": 2.7585785388946533, "learning_rate": 1.5459646957885585e-05, "loss": 0.5538, "step": 16492 }, { "epoch": 2.6923799028611075, "grad_norm": 3.218029499053955, "learning_rate": 1.54591097931888e-05, "loss": 0.5728, "step": 16493 }, { "epoch": 2.692543161503612, "grad_norm": 3.033740758895874, "learning_rate": 1.5458572606051675e-05, "loss": 0.6771, "step": 16494 }, { "epoch": 2.6927064201461164, "grad_norm": 3.052478313446045, "learning_rate": 1.5458035396476427e-05, "loss": 0.6752, "step": 16495 }, { "epoch": 2.692869678788621, "grad_norm": 2.7338638305664062, "learning_rate": 1.545749816446525e-05, "loss": 0.5081, "step": 16496 }, { "epoch": 2.6930329374311253, "grad_norm": 3.022913932800293, "learning_rate": 1.545696091002036e-05, "loss": 0.6863, "step": 16497 }, { "epoch": 2.6931961960736297, "grad_norm": 3.3440346717834473, "learning_rate": 1.5456423633143965e-05, "loss": 0.5789, "step": 16498 }, { "epoch": 2.693359454716134, "grad_norm": 2.5566959381103516, "learning_rate": 1.5455886333838267e-05, "loss": 0.4872, "step": 16499 }, { "epoch": 2.6935227133586386, "grad_norm": 3.047107458114624, "learning_rate": 1.5455349012105488e-05, "loss": 0.6782, "step": 16500 }, { "epoch": 2.693685972001143, "grad_norm": 2.404853343963623, "learning_rate": 1.5454811667947827e-05, "loss": 0.4665, "step": 16501 }, { "epoch": 2.6938492306436475, "grad_norm": 2.7844367027282715, "learning_rate": 1.5454274301367497e-05, "loss": 0.587, "step": 16502 }, { "epoch": 2.6940124892861514, "grad_norm": 2.6062519550323486, "learning_rate": 1.54537369123667e-05, "loss": 0.5374, "step": 16503 }, { "epoch": 2.694175747928656, "grad_norm": 2.5497939586639404, "learning_rate": 1.5453199500947652e-05, "loss": 0.5031, "step": 16504 }, { "epoch": 2.6943390065711603, "grad_norm": 2.442147970199585, "learning_rate": 1.545266206711256e-05, "loss": 0.4194, "step": 16505 }, { "epoch": 2.6945022652136648, "grad_norm": 2.3413283824920654, "learning_rate": 1.5452124610863634e-05, "loss": 0.46, "step": 16506 }, { "epoch": 2.694665523856169, "grad_norm": 2.5561084747314453, "learning_rate": 1.545158713220308e-05, "loss": 0.5642, "step": 16507 }, { "epoch": 2.6948287824986736, "grad_norm": 3.1896674633026123, "learning_rate": 1.545104963113311e-05, "loss": 0.6049, "step": 16508 }, { "epoch": 2.694992041141178, "grad_norm": 2.3912174701690674, "learning_rate": 1.5450512107655936e-05, "loss": 0.4667, "step": 16509 }, { "epoch": 2.695155299783682, "grad_norm": 3.2308452129364014, "learning_rate": 1.5449974561773764e-05, "loss": 0.6282, "step": 16510 }, { "epoch": 2.6953185584261865, "grad_norm": 2.9119443893432617, "learning_rate": 1.5449436993488806e-05, "loss": 0.5413, "step": 16511 }, { "epoch": 2.695481817068691, "grad_norm": 2.9675042629241943, "learning_rate": 1.5448899402803267e-05, "loss": 0.5301, "step": 16512 }, { "epoch": 2.6956450757111954, "grad_norm": 2.5441362857818604, "learning_rate": 1.544836178971936e-05, "loss": 0.4855, "step": 16513 }, { "epoch": 2.6958083343537, "grad_norm": 2.9724082946777344, "learning_rate": 1.5447824154239296e-05, "loss": 0.5284, "step": 16514 }, { "epoch": 2.6959715929962043, "grad_norm": 2.660001277923584, "learning_rate": 1.5447286496365283e-05, "loss": 0.5203, "step": 16515 }, { "epoch": 2.6961348516387087, "grad_norm": 3.043905735015869, "learning_rate": 1.5446748816099538e-05, "loss": 0.6633, "step": 16516 }, { "epoch": 2.696298110281213, "grad_norm": 3.43380069732666, "learning_rate": 1.544621111344426e-05, "loss": 0.7202, "step": 16517 }, { "epoch": 2.6964613689237176, "grad_norm": 3.02339768409729, "learning_rate": 1.5445673388401662e-05, "loss": 0.5542, "step": 16518 }, { "epoch": 2.696624627566222, "grad_norm": 2.9865574836730957, "learning_rate": 1.544513564097396e-05, "loss": 0.5028, "step": 16519 }, { "epoch": 2.6967878862087264, "grad_norm": 3.0890698432922363, "learning_rate": 1.544459787116336e-05, "loss": 0.631, "step": 16520 }, { "epoch": 2.6969511448512304, "grad_norm": 3.0507240295410156, "learning_rate": 1.5444060078972077e-05, "loss": 0.652, "step": 16521 }, { "epoch": 2.697114403493735, "grad_norm": 2.3247358798980713, "learning_rate": 1.5443522264402316e-05, "loss": 0.4485, "step": 16522 }, { "epoch": 2.6972776621362393, "grad_norm": 2.9315547943115234, "learning_rate": 1.5442984427456292e-05, "loss": 0.5151, "step": 16523 }, { "epoch": 2.6974409207787438, "grad_norm": 2.8152108192443848, "learning_rate": 1.5442446568136215e-05, "loss": 0.5576, "step": 16524 }, { "epoch": 2.697604179421248, "grad_norm": 2.679827928543091, "learning_rate": 1.5441908686444293e-05, "loss": 0.4928, "step": 16525 }, { "epoch": 2.6977674380637526, "grad_norm": 3.004425287246704, "learning_rate": 1.544137078238274e-05, "loss": 0.5459, "step": 16526 }, { "epoch": 2.6979306967062566, "grad_norm": 2.9770874977111816, "learning_rate": 1.5440832855953767e-05, "loss": 0.5568, "step": 16527 }, { "epoch": 2.698093955348761, "grad_norm": 3.2030394077301025, "learning_rate": 1.5440294907159583e-05, "loss": 0.6224, "step": 16528 }, { "epoch": 2.6982572139912655, "grad_norm": 3.0636777877807617, "learning_rate": 1.5439756936002402e-05, "loss": 0.5925, "step": 16529 }, { "epoch": 2.69842047263377, "grad_norm": 3.2837350368499756, "learning_rate": 1.5439218942484435e-05, "loss": 0.5557, "step": 16530 }, { "epoch": 2.6985837312762744, "grad_norm": 2.7639501094818115, "learning_rate": 1.543868092660789e-05, "loss": 0.6061, "step": 16531 }, { "epoch": 2.698746989918779, "grad_norm": 2.966529369354248, "learning_rate": 1.5438142888374984e-05, "loss": 0.5898, "step": 16532 }, { "epoch": 2.6989102485612833, "grad_norm": 3.5155436992645264, "learning_rate": 1.5437604827787925e-05, "loss": 0.729, "step": 16533 }, { "epoch": 2.6990735072037877, "grad_norm": 3.304887533187866, "learning_rate": 1.5437066744848927e-05, "loss": 0.6691, "step": 16534 }, { "epoch": 2.699236765846292, "grad_norm": 3.184290647506714, "learning_rate": 1.54365286395602e-05, "loss": 0.6381, "step": 16535 }, { "epoch": 2.6994000244887966, "grad_norm": 3.120079517364502, "learning_rate": 1.5435990511923957e-05, "loss": 0.5636, "step": 16536 }, { "epoch": 2.699563283131301, "grad_norm": 2.9178500175476074, "learning_rate": 1.5435452361942406e-05, "loss": 0.5021, "step": 16537 }, { "epoch": 2.699726541773805, "grad_norm": 3.025999069213867, "learning_rate": 1.5434914189617766e-05, "loss": 0.6362, "step": 16538 }, { "epoch": 2.6998898004163094, "grad_norm": 2.752610921859741, "learning_rate": 1.5434375994952246e-05, "loss": 0.556, "step": 16539 }, { "epoch": 2.700053059058814, "grad_norm": 3.3153367042541504, "learning_rate": 1.5433837777948058e-05, "loss": 0.6543, "step": 16540 }, { "epoch": 2.7002163177013183, "grad_norm": 3.4467430114746094, "learning_rate": 1.5433299538607415e-05, "loss": 0.5905, "step": 16541 }, { "epoch": 2.7003795763438228, "grad_norm": 3.050940990447998, "learning_rate": 1.543276127693253e-05, "loss": 0.601, "step": 16542 }, { "epoch": 2.700542834986327, "grad_norm": 2.6100873947143555, "learning_rate": 1.5432222992925612e-05, "loss": 0.5105, "step": 16543 }, { "epoch": 2.7007060936288316, "grad_norm": 2.5746009349823, "learning_rate": 1.5431684686588877e-05, "loss": 0.5443, "step": 16544 }, { "epoch": 2.7008693522713356, "grad_norm": 2.6397509574890137, "learning_rate": 1.543114635792454e-05, "loss": 0.5986, "step": 16545 }, { "epoch": 2.70103261091384, "grad_norm": 2.9017889499664307, "learning_rate": 1.5430608006934807e-05, "loss": 0.5462, "step": 16546 }, { "epoch": 2.7011958695563445, "grad_norm": 2.9039955139160156, "learning_rate": 1.5430069633621897e-05, "loss": 0.5413, "step": 16547 }, { "epoch": 2.701359128198849, "grad_norm": 2.991748809814453, "learning_rate": 1.542953123798802e-05, "loss": 0.6165, "step": 16548 }, { "epoch": 2.7015223868413534, "grad_norm": 2.8886682987213135, "learning_rate": 1.542899282003539e-05, "loss": 0.5334, "step": 16549 }, { "epoch": 2.701685645483858, "grad_norm": 2.76145339012146, "learning_rate": 1.5428454379766223e-05, "loss": 0.5197, "step": 16550 }, { "epoch": 2.7018489041263622, "grad_norm": 3.2515993118286133, "learning_rate": 1.542791591718273e-05, "loss": 0.6449, "step": 16551 }, { "epoch": 2.7020121627688667, "grad_norm": 2.339501142501831, "learning_rate": 1.542737743228712e-05, "loss": 0.4918, "step": 16552 }, { "epoch": 2.702175421411371, "grad_norm": 2.78640079498291, "learning_rate": 1.542683892508162e-05, "loss": 0.5424, "step": 16553 }, { "epoch": 2.7023386800538756, "grad_norm": 3.1897246837615967, "learning_rate": 1.5426300395568423e-05, "loss": 0.6018, "step": 16554 }, { "epoch": 2.70250193869638, "grad_norm": 2.2484922409057617, "learning_rate": 1.5425761843749763e-05, "loss": 0.46, "step": 16555 }, { "epoch": 2.702665197338884, "grad_norm": 3.1671230792999268, "learning_rate": 1.5425223269627842e-05, "loss": 0.533, "step": 16556 }, { "epoch": 2.7028284559813884, "grad_norm": 3.571329355239868, "learning_rate": 1.5424684673204874e-05, "loss": 0.7192, "step": 16557 }, { "epoch": 2.702991714623893, "grad_norm": 2.994839906692505, "learning_rate": 1.5424146054483076e-05, "loss": 0.5786, "step": 16558 }, { "epoch": 2.7031549732663973, "grad_norm": 3.518559217453003, "learning_rate": 1.542360741346466e-05, "loss": 0.6192, "step": 16559 }, { "epoch": 2.7033182319089017, "grad_norm": 3.6131792068481445, "learning_rate": 1.5423068750151847e-05, "loss": 1.086, "step": 16560 }, { "epoch": 2.703481490551406, "grad_norm": 2.723065137863159, "learning_rate": 1.5422530064546844e-05, "loss": 0.5695, "step": 16561 }, { "epoch": 2.70364474919391, "grad_norm": 3.4786016941070557, "learning_rate": 1.5421991356651867e-05, "loss": 0.672, "step": 16562 }, { "epoch": 2.7038080078364146, "grad_norm": 2.8908772468566895, "learning_rate": 1.542145262646913e-05, "loss": 0.6396, "step": 16563 }, { "epoch": 2.703971266478919, "grad_norm": 2.5582737922668457, "learning_rate": 1.542091387400085e-05, "loss": 0.5438, "step": 16564 }, { "epoch": 2.7041345251214235, "grad_norm": 3.158236265182495, "learning_rate": 1.542037509924924e-05, "loss": 0.5196, "step": 16565 }, { "epoch": 2.704297783763928, "grad_norm": 3.004028558731079, "learning_rate": 1.5419836302216513e-05, "loss": 0.6287, "step": 16566 }, { "epoch": 2.7044610424064324, "grad_norm": 2.8332855701446533, "learning_rate": 1.5419297482904886e-05, "loss": 0.605, "step": 16567 }, { "epoch": 2.704624301048937, "grad_norm": 3.220527410507202, "learning_rate": 1.5418758641316573e-05, "loss": 0.6365, "step": 16568 }, { "epoch": 2.7047875596914412, "grad_norm": 2.3314497470855713, "learning_rate": 1.541821977745379e-05, "loss": 0.4768, "step": 16569 }, { "epoch": 2.7049508183339457, "grad_norm": 3.0522799491882324, "learning_rate": 1.5417680891318752e-05, "loss": 0.5751, "step": 16570 }, { "epoch": 2.70511407697645, "grad_norm": 2.6162328720092773, "learning_rate": 1.5417141982913672e-05, "loss": 0.5193, "step": 16571 }, { "epoch": 2.7052773356189546, "grad_norm": 2.2235684394836426, "learning_rate": 1.541660305224077e-05, "loss": 0.481, "step": 16572 }, { "epoch": 2.7054405942614586, "grad_norm": 2.3305256366729736, "learning_rate": 1.5416064099302254e-05, "loss": 0.4724, "step": 16573 }, { "epoch": 2.705603852903963, "grad_norm": 2.8755695819854736, "learning_rate": 1.5415525124100346e-05, "loss": 0.5597, "step": 16574 }, { "epoch": 2.7057671115464674, "grad_norm": 3.094917058944702, "learning_rate": 1.541498612663726e-05, "loss": 0.6579, "step": 16575 }, { "epoch": 2.705930370188972, "grad_norm": 2.9226880073547363, "learning_rate": 1.5414447106915207e-05, "loss": 0.597, "step": 16576 }, { "epoch": 2.7060936288314763, "grad_norm": 2.497739791870117, "learning_rate": 1.541390806493641e-05, "loss": 0.4904, "step": 16577 }, { "epoch": 2.7062568874739807, "grad_norm": 2.966066360473633, "learning_rate": 1.541336900070308e-05, "loss": 0.59, "step": 16578 }, { "epoch": 2.706420146116485, "grad_norm": 3.459608316421509, "learning_rate": 1.5412829914217434e-05, "loss": 1.2066, "step": 16579 }, { "epoch": 2.706583404758989, "grad_norm": 2.554961919784546, "learning_rate": 1.5412290805481686e-05, "loss": 0.4623, "step": 16580 }, { "epoch": 2.7067466634014936, "grad_norm": 3.3374054431915283, "learning_rate": 1.5411751674498058e-05, "loss": 0.617, "step": 16581 }, { "epoch": 2.706909922043998, "grad_norm": 2.9772403240203857, "learning_rate": 1.541121252126876e-05, "loss": 0.6143, "step": 16582 }, { "epoch": 2.7070731806865025, "grad_norm": 3.3350658416748047, "learning_rate": 1.541067334579601e-05, "loss": 0.5647, "step": 16583 }, { "epoch": 2.707236439329007, "grad_norm": 2.679370164871216, "learning_rate": 1.5410134148082027e-05, "loss": 0.4825, "step": 16584 }, { "epoch": 2.7073996979715114, "grad_norm": 2.3859190940856934, "learning_rate": 1.5409594928129026e-05, "loss": 0.5132, "step": 16585 }, { "epoch": 2.707562956614016, "grad_norm": 2.716226100921631, "learning_rate": 1.540905568593922e-05, "loss": 0.5205, "step": 16586 }, { "epoch": 2.7077262152565202, "grad_norm": 2.3464348316192627, "learning_rate": 1.5408516421514827e-05, "loss": 0.4617, "step": 16587 }, { "epoch": 2.7078894738990247, "grad_norm": 2.634490728378296, "learning_rate": 1.540797713485807e-05, "loss": 0.613, "step": 16588 }, { "epoch": 2.708052732541529, "grad_norm": 2.8535096645355225, "learning_rate": 1.540743782597116e-05, "loss": 0.5707, "step": 16589 }, { "epoch": 2.7082159911840336, "grad_norm": 2.480414867401123, "learning_rate": 1.5406898494856314e-05, "loss": 0.4898, "step": 16590 }, { "epoch": 2.7083792498265375, "grad_norm": 2.8840112686157227, "learning_rate": 1.540635914151575e-05, "loss": 0.6394, "step": 16591 }, { "epoch": 2.708542508469042, "grad_norm": 3.170133113861084, "learning_rate": 1.5405819765951683e-05, "loss": 0.6294, "step": 16592 }, { "epoch": 2.7087057671115464, "grad_norm": 3.045952558517456, "learning_rate": 1.5405280368166335e-05, "loss": 0.6685, "step": 16593 }, { "epoch": 2.708869025754051, "grad_norm": 2.983853340148926, "learning_rate": 1.540474094816192e-05, "loss": 0.6617, "step": 16594 }, { "epoch": 2.7090322843965553, "grad_norm": 2.934760808944702, "learning_rate": 1.5404201505940653e-05, "loss": 0.5529, "step": 16595 }, { "epoch": 2.7091955430390597, "grad_norm": 2.5465617179870605, "learning_rate": 1.5403662041504756e-05, "loss": 0.5385, "step": 16596 }, { "epoch": 2.709358801681564, "grad_norm": 2.7285614013671875, "learning_rate": 1.5403122554856445e-05, "loss": 0.4863, "step": 16597 }, { "epoch": 2.709522060324068, "grad_norm": 3.031893730163574, "learning_rate": 1.5402583045997935e-05, "loss": 0.7107, "step": 16598 }, { "epoch": 2.7096853189665726, "grad_norm": 2.9377801418304443, "learning_rate": 1.540204351493145e-05, "loss": 0.6147, "step": 16599 }, { "epoch": 2.709848577609077, "grad_norm": 3.0415384769439697, "learning_rate": 1.5401503961659202e-05, "loss": 0.5193, "step": 16600 }, { "epoch": 2.7100118362515815, "grad_norm": 2.244934558868408, "learning_rate": 1.5400964386183414e-05, "loss": 0.4745, "step": 16601 }, { "epoch": 2.710175094894086, "grad_norm": 2.952993869781494, "learning_rate": 1.5400424788506292e-05, "loss": 0.4962, "step": 16602 }, { "epoch": 2.7103383535365904, "grad_norm": 2.733489513397217, "learning_rate": 1.539988516863007e-05, "loss": 0.5262, "step": 16603 }, { "epoch": 2.710501612179095, "grad_norm": 3.151716470718384, "learning_rate": 1.539934552655696e-05, "loss": 0.6263, "step": 16604 }, { "epoch": 2.7106648708215992, "grad_norm": 3.120258092880249, "learning_rate": 1.5398805862289176e-05, "loss": 0.5737, "step": 16605 }, { "epoch": 2.7108281294641037, "grad_norm": 2.687854766845703, "learning_rate": 1.539826617582894e-05, "loss": 0.5196, "step": 16606 }, { "epoch": 2.710991388106608, "grad_norm": 3.0845184326171875, "learning_rate": 1.5397726467178472e-05, "loss": 0.6314, "step": 16607 }, { "epoch": 2.7111546467491126, "grad_norm": 3.275211811065674, "learning_rate": 1.5397186736339985e-05, "loss": 0.6064, "step": 16608 }, { "epoch": 2.7113179053916165, "grad_norm": 2.9381914138793945, "learning_rate": 1.5396646983315707e-05, "loss": 0.6014, "step": 16609 }, { "epoch": 2.711481164034121, "grad_norm": 3.2558095455169678, "learning_rate": 1.5396107208107846e-05, "loss": 0.5479, "step": 16610 }, { "epoch": 2.7116444226766254, "grad_norm": 3.1070077419281006, "learning_rate": 1.5395567410718628e-05, "loss": 0.5279, "step": 16611 }, { "epoch": 2.71180768131913, "grad_norm": 3.017460584640503, "learning_rate": 1.5395027591150272e-05, "loss": 0.5405, "step": 16612 }, { "epoch": 2.7119709399616343, "grad_norm": 2.924008846282959, "learning_rate": 1.5394487749404988e-05, "loss": 0.5508, "step": 16613 }, { "epoch": 2.7121341986041387, "grad_norm": 2.9434444904327393, "learning_rate": 1.5393947885485004e-05, "loss": 0.5966, "step": 16614 }, { "epoch": 2.7122974572466427, "grad_norm": 2.9570956230163574, "learning_rate": 1.539340799939254e-05, "loss": 0.5591, "step": 16615 }, { "epoch": 2.712460715889147, "grad_norm": 2.6635262966156006, "learning_rate": 1.5392868091129814e-05, "loss": 0.5523, "step": 16616 }, { "epoch": 2.7126239745316516, "grad_norm": 2.319316864013672, "learning_rate": 1.5392328160699038e-05, "loss": 0.441, "step": 16617 }, { "epoch": 2.712787233174156, "grad_norm": 2.7804970741271973, "learning_rate": 1.539178820810244e-05, "loss": 0.5218, "step": 16618 }, { "epoch": 2.7129504918166605, "grad_norm": 2.912294387817383, "learning_rate": 1.5391248233342236e-05, "loss": 0.5398, "step": 16619 }, { "epoch": 2.713113750459165, "grad_norm": 3.2105307579040527, "learning_rate": 1.5390708236420645e-05, "loss": 0.5383, "step": 16620 }, { "epoch": 2.7132770091016694, "grad_norm": 2.5633068084716797, "learning_rate": 1.5390168217339892e-05, "loss": 0.5748, "step": 16621 }, { "epoch": 2.713440267744174, "grad_norm": 2.7512407302856445, "learning_rate": 1.538962817610219e-05, "loss": 0.5443, "step": 16622 }, { "epoch": 2.7136035263866782, "grad_norm": 3.0707693099975586, "learning_rate": 1.538908811270976e-05, "loss": 0.6176, "step": 16623 }, { "epoch": 2.7137667850291827, "grad_norm": 3.085510730743408, "learning_rate": 1.5388548027164826e-05, "loss": 0.5639, "step": 16624 }, { "epoch": 2.713930043671687, "grad_norm": 3.0556535720825195, "learning_rate": 1.5388007919469604e-05, "loss": 0.6339, "step": 16625 }, { "epoch": 2.714093302314191, "grad_norm": 2.4436750411987305, "learning_rate": 1.5387467789626317e-05, "loss": 0.4866, "step": 16626 }, { "epoch": 2.7142565609566955, "grad_norm": 3.9786014556884766, "learning_rate": 1.5386927637637184e-05, "loss": 0.6599, "step": 16627 }, { "epoch": 2.7144198195992, "grad_norm": 2.584954023361206, "learning_rate": 1.5386387463504425e-05, "loss": 0.5675, "step": 16628 }, { "epoch": 2.7145830782417044, "grad_norm": 2.8378641605377197, "learning_rate": 1.538584726723026e-05, "loss": 0.5151, "step": 16629 }, { "epoch": 2.714746336884209, "grad_norm": 3.0135905742645264, "learning_rate": 1.5385307048816917e-05, "loss": 0.6084, "step": 16630 }, { "epoch": 2.7149095955267133, "grad_norm": 2.8059301376342773, "learning_rate": 1.5384766808266603e-05, "loss": 0.5042, "step": 16631 }, { "epoch": 2.7150728541692177, "grad_norm": 3.004760265350342, "learning_rate": 1.538422654558155e-05, "loss": 0.6328, "step": 16632 }, { "epoch": 2.7152361128117217, "grad_norm": 3.4032366275787354, "learning_rate": 1.5383686260763973e-05, "loss": 0.7099, "step": 16633 }, { "epoch": 2.715399371454226, "grad_norm": 3.0045690536499023, "learning_rate": 1.5383145953816095e-05, "loss": 0.5912, "step": 16634 }, { "epoch": 2.7155626300967306, "grad_norm": 2.8486921787261963, "learning_rate": 1.5382605624740136e-05, "loss": 0.6359, "step": 16635 }, { "epoch": 2.715725888739235, "grad_norm": 3.1222965717315674, "learning_rate": 1.5382065273538317e-05, "loss": 0.6673, "step": 16636 }, { "epoch": 2.7158891473817395, "grad_norm": 2.653404474258423, "learning_rate": 1.538152490021286e-05, "loss": 0.5175, "step": 16637 }, { "epoch": 2.716052406024244, "grad_norm": 2.1402595043182373, "learning_rate": 1.5380984504765992e-05, "loss": 0.4533, "step": 16638 }, { "epoch": 2.7162156646667484, "grad_norm": 3.0643184185028076, "learning_rate": 1.538044408719992e-05, "loss": 0.72, "step": 16639 }, { "epoch": 2.716378923309253, "grad_norm": 3.1673130989074707, "learning_rate": 1.5379903647516877e-05, "loss": 0.6806, "step": 16640 }, { "epoch": 2.7165421819517572, "grad_norm": 2.943312406539917, "learning_rate": 1.537936318571908e-05, "loss": 0.5138, "step": 16641 }, { "epoch": 2.7167054405942617, "grad_norm": 2.5009779930114746, "learning_rate": 1.5378822701808755e-05, "loss": 0.5258, "step": 16642 }, { "epoch": 2.716868699236766, "grad_norm": 2.7982568740844727, "learning_rate": 1.5378282195788124e-05, "loss": 0.6272, "step": 16643 }, { "epoch": 2.71703195787927, "grad_norm": 2.546705484390259, "learning_rate": 1.53777416676594e-05, "loss": 0.4409, "step": 16644 }, { "epoch": 2.7171952165217745, "grad_norm": 2.8379135131835938, "learning_rate": 1.5377201117424813e-05, "loss": 0.5851, "step": 16645 }, { "epoch": 2.717358475164279, "grad_norm": 2.779327154159546, "learning_rate": 1.5376660545086578e-05, "loss": 0.5624, "step": 16646 }, { "epoch": 2.7175217338067834, "grad_norm": 2.9132962226867676, "learning_rate": 1.5376119950646925e-05, "loss": 0.5812, "step": 16647 }, { "epoch": 2.717684992449288, "grad_norm": 3.010669469833374, "learning_rate": 1.537557933410808e-05, "loss": 0.635, "step": 16648 }, { "epoch": 2.7178482510917923, "grad_norm": 3.124183177947998, "learning_rate": 1.537503869547225e-05, "loss": 0.6012, "step": 16649 }, { "epoch": 2.7180115097342963, "grad_norm": 2.830939769744873, "learning_rate": 1.5374498034741664e-05, "loss": 0.6119, "step": 16650 }, { "epoch": 2.7181747683768007, "grad_norm": 2.6110329627990723, "learning_rate": 1.5373957351918552e-05, "loss": 0.5011, "step": 16651 }, { "epoch": 2.718338027019305, "grad_norm": 2.655085563659668, "learning_rate": 1.5373416647005125e-05, "loss": 0.5163, "step": 16652 }, { "epoch": 2.7185012856618096, "grad_norm": 2.5351617336273193, "learning_rate": 1.5372875920003617e-05, "loss": 0.5415, "step": 16653 }, { "epoch": 2.718664544304314, "grad_norm": 2.8309264183044434, "learning_rate": 1.537233517091624e-05, "loss": 0.5696, "step": 16654 }, { "epoch": 2.7188278029468185, "grad_norm": 3.147972583770752, "learning_rate": 1.5371794399745223e-05, "loss": 0.5887, "step": 16655 }, { "epoch": 2.718991061589323, "grad_norm": 2.7751340866088867, "learning_rate": 1.537125360649279e-05, "loss": 0.5392, "step": 16656 }, { "epoch": 2.7191543202318273, "grad_norm": 2.717322587966919, "learning_rate": 1.5370712791161158e-05, "loss": 0.5643, "step": 16657 }, { "epoch": 2.719317578874332, "grad_norm": 2.973829984664917, "learning_rate": 1.5370171953752554e-05, "loss": 0.5221, "step": 16658 }, { "epoch": 2.7194808375168362, "grad_norm": 3.0349619388580322, "learning_rate": 1.53696310942692e-05, "loss": 0.5852, "step": 16659 }, { "epoch": 2.7196440961593407, "grad_norm": 2.826582670211792, "learning_rate": 1.5369090212713325e-05, "loss": 0.6103, "step": 16660 }, { "epoch": 2.7198073548018447, "grad_norm": 2.9459497928619385, "learning_rate": 1.5368549309087145e-05, "loss": 0.4987, "step": 16661 }, { "epoch": 2.719970613444349, "grad_norm": 3.251816511154175, "learning_rate": 1.5368008383392887e-05, "loss": 0.664, "step": 16662 }, { "epoch": 2.7201338720868535, "grad_norm": 2.1224021911621094, "learning_rate": 1.536746743563277e-05, "loss": 0.422, "step": 16663 }, { "epoch": 2.720297130729358, "grad_norm": 2.725496292114258, "learning_rate": 1.5366926465809023e-05, "loss": 0.5173, "step": 16664 }, { "epoch": 2.7204603893718624, "grad_norm": 2.6590352058410645, "learning_rate": 1.536638547392387e-05, "loss": 0.5018, "step": 16665 }, { "epoch": 2.720623648014367, "grad_norm": 2.499624490737915, "learning_rate": 1.536584445997953e-05, "loss": 0.468, "step": 16666 }, { "epoch": 2.7207869066568713, "grad_norm": 3.1470935344696045, "learning_rate": 1.5365303423978227e-05, "loss": 0.563, "step": 16667 }, { "epoch": 2.7209501652993753, "grad_norm": 2.4504590034484863, "learning_rate": 1.5364762365922192e-05, "loss": 0.4543, "step": 16668 }, { "epoch": 2.7211134239418797, "grad_norm": 3.08823823928833, "learning_rate": 1.536422128581364e-05, "loss": 0.6269, "step": 16669 }, { "epoch": 2.721276682584384, "grad_norm": 3.0523059368133545, "learning_rate": 1.5363680183654807e-05, "loss": 0.6311, "step": 16670 }, { "epoch": 2.7214399412268886, "grad_norm": 3.4525842666625977, "learning_rate": 1.5363139059447905e-05, "loss": 0.705, "step": 16671 }, { "epoch": 2.721603199869393, "grad_norm": 2.9220194816589355, "learning_rate": 1.5362597913195164e-05, "loss": 0.6861, "step": 16672 }, { "epoch": 2.7217664585118975, "grad_norm": 3.5662553310394287, "learning_rate": 1.536205674489881e-05, "loss": 0.6006, "step": 16673 }, { "epoch": 2.721929717154402, "grad_norm": 2.2382004261016846, "learning_rate": 1.5361515554561062e-05, "loss": 0.4265, "step": 16674 }, { "epoch": 2.7220929757969063, "grad_norm": 2.856046438217163, "learning_rate": 1.536097434218415e-05, "loss": 0.5976, "step": 16675 }, { "epoch": 2.722256234439411, "grad_norm": 3.1025466918945312, "learning_rate": 1.5360433107770295e-05, "loss": 0.5763, "step": 16676 }, { "epoch": 2.722419493081915, "grad_norm": 2.487826108932495, "learning_rate": 1.5359891851321726e-05, "loss": 0.4848, "step": 16677 }, { "epoch": 2.7225827517244197, "grad_norm": 3.210315704345703, "learning_rate": 1.5359350572840668e-05, "loss": 0.6251, "step": 16678 }, { "epoch": 2.7227460103669237, "grad_norm": 2.8407270908355713, "learning_rate": 1.5358809272329337e-05, "loss": 0.5243, "step": 16679 }, { "epoch": 2.722909269009428, "grad_norm": 3.2314703464508057, "learning_rate": 1.5358267949789968e-05, "loss": 0.7133, "step": 16680 }, { "epoch": 2.7230725276519325, "grad_norm": 2.466110944747925, "learning_rate": 1.535772660522478e-05, "loss": 0.4305, "step": 16681 }, { "epoch": 2.723235786294437, "grad_norm": 2.775477886199951, "learning_rate": 1.5357185238636007e-05, "loss": 0.5234, "step": 16682 }, { "epoch": 2.7233990449369414, "grad_norm": 2.8954882621765137, "learning_rate": 1.5356643850025866e-05, "loss": 0.5839, "step": 16683 }, { "epoch": 2.723562303579446, "grad_norm": 2.7733869552612305, "learning_rate": 1.5356102439396582e-05, "loss": 0.537, "step": 16684 }, { "epoch": 2.7237255622219503, "grad_norm": 3.1044511795043945, "learning_rate": 1.535556100675038e-05, "loss": 0.6048, "step": 16685 }, { "epoch": 2.7238888208644543, "grad_norm": 2.9806272983551025, "learning_rate": 1.5355019552089495e-05, "loss": 0.6298, "step": 16686 }, { "epoch": 2.7240520795069587, "grad_norm": 2.8758633136749268, "learning_rate": 1.5354478075416148e-05, "loss": 0.62, "step": 16687 }, { "epoch": 2.724215338149463, "grad_norm": 2.597316265106201, "learning_rate": 1.535393657673256e-05, "loss": 0.5169, "step": 16688 }, { "epoch": 2.7243785967919676, "grad_norm": 2.8625504970550537, "learning_rate": 1.535339505604096e-05, "loss": 0.5621, "step": 16689 }, { "epoch": 2.724541855434472, "grad_norm": 3.3367092609405518, "learning_rate": 1.5352853513343574e-05, "loss": 1.1071, "step": 16690 }, { "epoch": 2.7247051140769765, "grad_norm": 2.715034008026123, "learning_rate": 1.535231194864263e-05, "loss": 0.579, "step": 16691 }, { "epoch": 2.724868372719481, "grad_norm": 3.1751275062561035, "learning_rate": 1.5351770361940355e-05, "loss": 0.5893, "step": 16692 }, { "epoch": 2.7250316313619853, "grad_norm": 2.3088884353637695, "learning_rate": 1.535122875323897e-05, "loss": 0.4681, "step": 16693 }, { "epoch": 2.72519489000449, "grad_norm": 2.707935094833374, "learning_rate": 1.5350687122540703e-05, "loss": 0.4661, "step": 16694 }, { "epoch": 2.725358148646994, "grad_norm": 2.9892354011535645, "learning_rate": 1.5350145469847784e-05, "loss": 0.5909, "step": 16695 }, { "epoch": 2.7255214072894987, "grad_norm": 2.7281107902526855, "learning_rate": 1.5349603795162433e-05, "loss": 0.5523, "step": 16696 }, { "epoch": 2.7256846659320026, "grad_norm": 2.7619879245758057, "learning_rate": 1.5349062098486887e-05, "loss": 0.4962, "step": 16697 }, { "epoch": 2.725847924574507, "grad_norm": 2.60660719871521, "learning_rate": 1.5348520379823362e-05, "loss": 0.4887, "step": 16698 }, { "epoch": 2.7260111832170115, "grad_norm": 2.8510665893554688, "learning_rate": 1.534797863917409e-05, "loss": 0.6251, "step": 16699 }, { "epoch": 2.726174441859516, "grad_norm": 2.790865898132324, "learning_rate": 1.5347436876541298e-05, "loss": 0.5302, "step": 16700 }, { "epoch": 2.7263377005020204, "grad_norm": 3.056701898574829, "learning_rate": 1.5346895091927212e-05, "loss": 0.5971, "step": 16701 }, { "epoch": 2.726500959144525, "grad_norm": 2.713376045227051, "learning_rate": 1.5346353285334057e-05, "loss": 0.6517, "step": 16702 }, { "epoch": 2.726664217787029, "grad_norm": 2.6310324668884277, "learning_rate": 1.5345811456764067e-05, "loss": 0.5158, "step": 16703 }, { "epoch": 2.7268274764295333, "grad_norm": 2.8999087810516357, "learning_rate": 1.5345269606219463e-05, "loss": 0.6062, "step": 16704 }, { "epoch": 2.7269907350720377, "grad_norm": 3.1344809532165527, "learning_rate": 1.5344727733702473e-05, "loss": 0.6167, "step": 16705 }, { "epoch": 2.727153993714542, "grad_norm": 3.555124044418335, "learning_rate": 1.5344185839215325e-05, "loss": 0.6376, "step": 16706 }, { "epoch": 2.7273172523570466, "grad_norm": 2.654276132583618, "learning_rate": 1.5343643922760248e-05, "loss": 0.4839, "step": 16707 }, { "epoch": 2.727480510999551, "grad_norm": 2.985588788986206, "learning_rate": 1.5343101984339466e-05, "loss": 0.5914, "step": 16708 }, { "epoch": 2.7276437696420555, "grad_norm": 2.9219448566436768, "learning_rate": 1.534256002395521e-05, "loss": 0.4806, "step": 16709 }, { "epoch": 2.72780702828456, "grad_norm": 2.6863818168640137, "learning_rate": 1.534201804160971e-05, "loss": 0.6172, "step": 16710 }, { "epoch": 2.7279702869270643, "grad_norm": 3.0066335201263428, "learning_rate": 1.534147603730519e-05, "loss": 0.5596, "step": 16711 }, { "epoch": 2.7281335455695688, "grad_norm": 2.9759156703948975, "learning_rate": 1.5340934011043877e-05, "loss": 0.6019, "step": 16712 }, { "epoch": 2.728296804212073, "grad_norm": 2.2000882625579834, "learning_rate": 1.5340391962828004e-05, "loss": 0.4682, "step": 16713 }, { "epoch": 2.728460062854577, "grad_norm": 2.7245399951934814, "learning_rate": 1.5339849892659792e-05, "loss": 0.5922, "step": 16714 }, { "epoch": 2.7286233214970816, "grad_norm": 3.2988245487213135, "learning_rate": 1.5339307800541477e-05, "loss": 0.5968, "step": 16715 }, { "epoch": 2.728786580139586, "grad_norm": 2.683234691619873, "learning_rate": 1.533876568647528e-05, "loss": 0.4924, "step": 16716 }, { "epoch": 2.7289498387820905, "grad_norm": 2.34244704246521, "learning_rate": 1.5338223550463437e-05, "loss": 0.5284, "step": 16717 }, { "epoch": 2.729113097424595, "grad_norm": 3.1968414783477783, "learning_rate": 1.533768139250817e-05, "loss": 1.1424, "step": 16718 }, { "epoch": 2.7292763560670994, "grad_norm": 3.2138941287994385, "learning_rate": 1.5337139212611713e-05, "loss": 0.5611, "step": 16719 }, { "epoch": 2.729439614709604, "grad_norm": 2.9154155254364014, "learning_rate": 1.5336597010776292e-05, "loss": 0.5692, "step": 16720 }, { "epoch": 2.729602873352108, "grad_norm": 2.5014593601226807, "learning_rate": 1.5336054787004137e-05, "loss": 0.5083, "step": 16721 }, { "epoch": 2.7297661319946123, "grad_norm": 2.486194372177124, "learning_rate": 1.5335512541297474e-05, "loss": 0.4487, "step": 16722 }, { "epoch": 2.7299293906371167, "grad_norm": 2.6573405265808105, "learning_rate": 1.533497027365853e-05, "loss": 0.4791, "step": 16723 }, { "epoch": 2.730092649279621, "grad_norm": 2.918530225753784, "learning_rate": 1.5334427984089536e-05, "loss": 0.526, "step": 16724 }, { "epoch": 2.7302559079221256, "grad_norm": 3.0511045455932617, "learning_rate": 1.533388567259273e-05, "loss": 0.5624, "step": 16725 }, { "epoch": 2.73041916656463, "grad_norm": 3.1490628719329834, "learning_rate": 1.533334333917033e-05, "loss": 0.6205, "step": 16726 }, { "epoch": 2.7305824252071345, "grad_norm": 3.201004981994629, "learning_rate": 1.533280098382457e-05, "loss": 0.6036, "step": 16727 }, { "epoch": 2.730745683849639, "grad_norm": 3.2410998344421387, "learning_rate": 1.5332258606557682e-05, "loss": 0.6809, "step": 16728 }, { "epoch": 2.7309089424921433, "grad_norm": 3.060678482055664, "learning_rate": 1.5331716207371888e-05, "loss": 0.5656, "step": 16729 }, { "epoch": 2.7310722011346478, "grad_norm": 2.8077340126037598, "learning_rate": 1.5331173786269422e-05, "loss": 0.5757, "step": 16730 }, { "epoch": 2.731235459777152, "grad_norm": 3.0296757221221924, "learning_rate": 1.533063134325252e-05, "loss": 0.6234, "step": 16731 }, { "epoch": 2.731398718419656, "grad_norm": 3.1093714237213135, "learning_rate": 1.5330088878323396e-05, "loss": 0.5867, "step": 16732 }, { "epoch": 2.7315619770621606, "grad_norm": 2.6914024353027344, "learning_rate": 1.5329546391484295e-05, "loss": 0.5846, "step": 16733 }, { "epoch": 2.731725235704665, "grad_norm": 3.012368679046631, "learning_rate": 1.5329003882737443e-05, "loss": 0.6075, "step": 16734 }, { "epoch": 2.7318884943471695, "grad_norm": 3.0717613697052, "learning_rate": 1.532846135208506e-05, "loss": 0.5576, "step": 16735 }, { "epoch": 2.732051752989674, "grad_norm": 3.0328896045684814, "learning_rate": 1.532791879952939e-05, "loss": 0.6414, "step": 16736 }, { "epoch": 2.7322150116321784, "grad_norm": 3.320619583129883, "learning_rate": 1.532737622507266e-05, "loss": 0.7558, "step": 16737 }, { "epoch": 2.732378270274683, "grad_norm": 3.0418596267700195, "learning_rate": 1.5326833628717094e-05, "loss": 0.5898, "step": 16738 }, { "epoch": 2.732541528917187, "grad_norm": 3.111318826675415, "learning_rate": 1.5326291010464927e-05, "loss": 0.6624, "step": 16739 }, { "epoch": 2.7327047875596913, "grad_norm": 2.9719345569610596, "learning_rate": 1.5325748370318386e-05, "loss": 0.5938, "step": 16740 }, { "epoch": 2.7328680462021957, "grad_norm": 3.221226692199707, "learning_rate": 1.5325205708279708e-05, "loss": 0.6569, "step": 16741 }, { "epoch": 2.7330313048447, "grad_norm": 2.864849805831909, "learning_rate": 1.532466302435112e-05, "loss": 0.623, "step": 16742 }, { "epoch": 2.7331945634872046, "grad_norm": 2.738837480545044, "learning_rate": 1.5324120318534854e-05, "loss": 0.522, "step": 16743 }, { "epoch": 2.733357822129709, "grad_norm": 2.674328565597534, "learning_rate": 1.532357759083314e-05, "loss": 0.5609, "step": 16744 }, { "epoch": 2.7335210807722135, "grad_norm": 2.6354379653930664, "learning_rate": 1.5323034841248202e-05, "loss": 0.4858, "step": 16745 }, { "epoch": 2.733684339414718, "grad_norm": 3.0422582626342773, "learning_rate": 1.5322492069782283e-05, "loss": 0.612, "step": 16746 }, { "epoch": 2.7338475980572223, "grad_norm": 2.662757635116577, "learning_rate": 1.5321949276437612e-05, "loss": 0.5658, "step": 16747 }, { "epoch": 2.7340108566997268, "grad_norm": 2.896869659423828, "learning_rate": 1.532140646121641e-05, "loss": 0.5086, "step": 16748 }, { "epoch": 2.734174115342231, "grad_norm": 2.628798246383667, "learning_rate": 1.532086362412092e-05, "loss": 0.5423, "step": 16749 }, { "epoch": 2.734337373984735, "grad_norm": 2.6245877742767334, "learning_rate": 1.5320320765153367e-05, "loss": 0.4703, "step": 16750 }, { "epoch": 2.7345006326272396, "grad_norm": 3.395138740539551, "learning_rate": 1.5319777884315986e-05, "loss": 0.6618, "step": 16751 }, { "epoch": 2.734663891269744, "grad_norm": 3.1315178871154785, "learning_rate": 1.5319234981611003e-05, "loss": 0.5396, "step": 16752 }, { "epoch": 2.7348271499122485, "grad_norm": 3.269984722137451, "learning_rate": 1.531869205704066e-05, "loss": 0.6383, "step": 16753 }, { "epoch": 2.734990408554753, "grad_norm": 2.6048030853271484, "learning_rate": 1.5318149110607174e-05, "loss": 0.5097, "step": 16754 }, { "epoch": 2.7351536671972574, "grad_norm": 2.7470078468322754, "learning_rate": 1.531760614231279e-05, "loss": 0.5486, "step": 16755 }, { "epoch": 2.7353169258397614, "grad_norm": 2.9277563095092773, "learning_rate": 1.5317063152159736e-05, "loss": 0.5698, "step": 16756 }, { "epoch": 2.735480184482266, "grad_norm": 2.395656108856201, "learning_rate": 1.531652014015024e-05, "loss": 0.4685, "step": 16757 }, { "epoch": 2.7356434431247703, "grad_norm": 3.0309338569641113, "learning_rate": 1.531597710628654e-05, "loss": 0.603, "step": 16758 }, { "epoch": 2.7358067017672747, "grad_norm": 2.426361560821533, "learning_rate": 1.5315434050570864e-05, "loss": 0.5585, "step": 16759 }, { "epoch": 2.735969960409779, "grad_norm": 2.389683246612549, "learning_rate": 1.5314890973005446e-05, "loss": 0.4466, "step": 16760 }, { "epoch": 2.7361332190522836, "grad_norm": 3.1939518451690674, "learning_rate": 1.531434787359252e-05, "loss": 0.5396, "step": 16761 }, { "epoch": 2.736296477694788, "grad_norm": 2.9236912727355957, "learning_rate": 1.5313804752334315e-05, "loss": 0.5192, "step": 16762 }, { "epoch": 2.7364597363372924, "grad_norm": 2.667177200317383, "learning_rate": 1.5313261609233063e-05, "loss": 0.5037, "step": 16763 }, { "epoch": 2.736622994979797, "grad_norm": 2.780951499938965, "learning_rate": 1.5312718444291e-05, "loss": 0.5033, "step": 16764 }, { "epoch": 2.7367862536223013, "grad_norm": 2.700939178466797, "learning_rate": 1.5312175257510356e-05, "loss": 0.5387, "step": 16765 }, { "epoch": 2.7369495122648058, "grad_norm": 3.300092935562134, "learning_rate": 1.531163204889337e-05, "loss": 0.7265, "step": 16766 }, { "epoch": 2.7371127709073098, "grad_norm": 3.2052459716796875, "learning_rate": 1.531108881844227e-05, "loss": 0.5553, "step": 16767 }, { "epoch": 2.737276029549814, "grad_norm": 3.7137959003448486, "learning_rate": 1.5310545566159283e-05, "loss": 0.6171, "step": 16768 }, { "epoch": 2.7374392881923186, "grad_norm": 3.2337052822113037, "learning_rate": 1.5310002292046653e-05, "loss": 0.586, "step": 16769 }, { "epoch": 2.737602546834823, "grad_norm": 2.5133652687072754, "learning_rate": 1.530945899610661e-05, "loss": 0.4234, "step": 16770 }, { "epoch": 2.7377658054773275, "grad_norm": 3.2226386070251465, "learning_rate": 1.5308915678341382e-05, "loss": 0.6045, "step": 16771 }, { "epoch": 2.737929064119832, "grad_norm": 2.8725619316101074, "learning_rate": 1.530837233875321e-05, "loss": 0.5379, "step": 16772 }, { "epoch": 2.7380923227623364, "grad_norm": 2.9633355140686035, "learning_rate": 1.530782897734432e-05, "loss": 0.6139, "step": 16773 }, { "epoch": 2.7382555814048404, "grad_norm": 3.1721842288970947, "learning_rate": 1.530728559411695e-05, "loss": 0.5642, "step": 16774 }, { "epoch": 2.738418840047345, "grad_norm": 2.858597993850708, "learning_rate": 1.5306742189073334e-05, "loss": 0.4759, "step": 16775 }, { "epoch": 2.7385820986898493, "grad_norm": 3.072427749633789, "learning_rate": 1.5306198762215707e-05, "loss": 0.6009, "step": 16776 }, { "epoch": 2.7387453573323537, "grad_norm": 2.4826295375823975, "learning_rate": 1.5305655313546296e-05, "loss": 0.525, "step": 16777 }, { "epoch": 2.738908615974858, "grad_norm": 3.087102174758911, "learning_rate": 1.5305111843067343e-05, "loss": 0.5998, "step": 16778 }, { "epoch": 2.7390718746173626, "grad_norm": 2.653052568435669, "learning_rate": 1.5304568350781075e-05, "loss": 0.5386, "step": 16779 }, { "epoch": 2.739235133259867, "grad_norm": 3.424910545349121, "learning_rate": 1.5304024836689733e-05, "loss": 0.6587, "step": 16780 }, { "epoch": 2.7393983919023714, "grad_norm": 3.1367523670196533, "learning_rate": 1.5303481300795547e-05, "loss": 0.6549, "step": 16781 }, { "epoch": 2.739561650544876, "grad_norm": 2.7042288780212402, "learning_rate": 1.530293774310075e-05, "loss": 0.5225, "step": 16782 }, { "epoch": 2.7397249091873803, "grad_norm": 2.5267186164855957, "learning_rate": 1.530239416360758e-05, "loss": 0.5103, "step": 16783 }, { "epoch": 2.7398881678298848, "grad_norm": 2.6078171730041504, "learning_rate": 1.5301850562318265e-05, "loss": 0.5645, "step": 16784 }, { "epoch": 2.7400514264723888, "grad_norm": 3.320551872253418, "learning_rate": 1.5301306939235054e-05, "loss": 0.6166, "step": 16785 }, { "epoch": 2.740214685114893, "grad_norm": 3.3117527961730957, "learning_rate": 1.5300763294360164e-05, "loss": 0.6808, "step": 16786 }, { "epoch": 2.7403779437573976, "grad_norm": 2.783005475997925, "learning_rate": 1.5300219627695837e-05, "loss": 0.5582, "step": 16787 }, { "epoch": 2.740541202399902, "grad_norm": 3.2518715858459473, "learning_rate": 1.5299675939244312e-05, "loss": 0.7055, "step": 16788 }, { "epoch": 2.7407044610424065, "grad_norm": 3.3782405853271484, "learning_rate": 1.529913222900782e-05, "loss": 0.6702, "step": 16789 }, { "epoch": 2.740867719684911, "grad_norm": 3.1529834270477295, "learning_rate": 1.52985884969886e-05, "loss": 0.6071, "step": 16790 }, { "epoch": 2.741030978327415, "grad_norm": 2.6808791160583496, "learning_rate": 1.5298044743188875e-05, "loss": 0.5211, "step": 16791 }, { "epoch": 2.7411942369699194, "grad_norm": 3.0498571395874023, "learning_rate": 1.5297500967610893e-05, "loss": 0.59, "step": 16792 }, { "epoch": 2.741357495612424, "grad_norm": 2.8319897651672363, "learning_rate": 1.5296957170256887e-05, "loss": 0.4916, "step": 16793 }, { "epoch": 2.7415207542549282, "grad_norm": 2.4302172660827637, "learning_rate": 1.5296413351129086e-05, "loss": 0.5283, "step": 16794 }, { "epoch": 2.7416840128974327, "grad_norm": 2.6982216835021973, "learning_rate": 1.529586951022973e-05, "loss": 0.562, "step": 16795 }, { "epoch": 2.741847271539937, "grad_norm": 3.299147367477417, "learning_rate": 1.5295325647561056e-05, "loss": 0.6516, "step": 16796 }, { "epoch": 2.7420105301824416, "grad_norm": 3.518368721008301, "learning_rate": 1.5294781763125297e-05, "loss": 0.6932, "step": 16797 }, { "epoch": 2.742173788824946, "grad_norm": 3.2820401191711426, "learning_rate": 1.529423785692469e-05, "loss": 0.5939, "step": 16798 }, { "epoch": 2.7423370474674504, "grad_norm": 2.6110424995422363, "learning_rate": 1.5293693928961474e-05, "loss": 0.5767, "step": 16799 }, { "epoch": 2.742500306109955, "grad_norm": 3.0031700134277344, "learning_rate": 1.5293149979237875e-05, "loss": 0.5768, "step": 16800 }, { "epoch": 2.7426635647524593, "grad_norm": 2.430924892425537, "learning_rate": 1.5292606007756138e-05, "loss": 0.473, "step": 16801 }, { "epoch": 2.7428268233949633, "grad_norm": 2.8147659301757812, "learning_rate": 1.5292062014518495e-05, "loss": 0.5042, "step": 16802 }, { "epoch": 2.7429900820374677, "grad_norm": 3.16740345954895, "learning_rate": 1.5291517999527183e-05, "loss": 0.6761, "step": 16803 }, { "epoch": 2.743153340679972, "grad_norm": 2.837610960006714, "learning_rate": 1.529097396278444e-05, "loss": 0.586, "step": 16804 }, { "epoch": 2.7433165993224766, "grad_norm": 2.528225898742676, "learning_rate": 1.52904299042925e-05, "loss": 0.5138, "step": 16805 }, { "epoch": 2.743479857964981, "grad_norm": 2.797553300857544, "learning_rate": 1.52898858240536e-05, "loss": 0.6478, "step": 16806 }, { "epoch": 2.7436431166074855, "grad_norm": 2.8569583892822266, "learning_rate": 1.5289341722069978e-05, "loss": 0.5827, "step": 16807 }, { "epoch": 2.74380637524999, "grad_norm": 3.3590896129608154, "learning_rate": 1.5288797598343867e-05, "loss": 0.5971, "step": 16808 }, { "epoch": 2.743969633892494, "grad_norm": 3.2192931175231934, "learning_rate": 1.5288253452877507e-05, "loss": 0.6957, "step": 16809 }, { "epoch": 2.7441328925349984, "grad_norm": 2.6091134548187256, "learning_rate": 1.5287709285673138e-05, "loss": 0.5389, "step": 16810 }, { "epoch": 2.744296151177503, "grad_norm": 2.5286219120025635, "learning_rate": 1.5287165096732985e-05, "loss": 0.4499, "step": 16811 }, { "epoch": 2.7444594098200072, "grad_norm": 3.6303083896636963, "learning_rate": 1.52866208860593e-05, "loss": 0.6771, "step": 16812 }, { "epoch": 2.7446226684625117, "grad_norm": 3.054813861846924, "learning_rate": 1.528607665365431e-05, "loss": 0.6348, "step": 16813 }, { "epoch": 2.744785927105016, "grad_norm": 2.4824154376983643, "learning_rate": 1.5285532399520256e-05, "loss": 0.5515, "step": 16814 }, { "epoch": 2.7449491857475206, "grad_norm": 3.782803773880005, "learning_rate": 1.528498812365937e-05, "loss": 0.7225, "step": 16815 }, { "epoch": 2.745112444390025, "grad_norm": 3.0161585807800293, "learning_rate": 1.52844438260739e-05, "loss": 0.6343, "step": 16816 }, { "epoch": 2.7452757030325294, "grad_norm": 3.079341411590576, "learning_rate": 1.528389950676607e-05, "loss": 0.6758, "step": 16817 }, { "epoch": 2.745438961675034, "grad_norm": 2.3888142108917236, "learning_rate": 1.5283355165738124e-05, "loss": 0.4755, "step": 16818 }, { "epoch": 2.7456022203175383, "grad_norm": 2.2962939739227295, "learning_rate": 1.528281080299231e-05, "loss": 0.4245, "step": 16819 }, { "epoch": 2.7457654789600423, "grad_norm": 3.142411470413208, "learning_rate": 1.5282266418530846e-05, "loss": 0.7361, "step": 16820 }, { "epoch": 2.7459287376025467, "grad_norm": 2.576925277709961, "learning_rate": 1.528172201235598e-05, "loss": 0.5281, "step": 16821 }, { "epoch": 2.746091996245051, "grad_norm": 2.407078504562378, "learning_rate": 1.528117758446995e-05, "loss": 0.5875, "step": 16822 }, { "epoch": 2.7462552548875556, "grad_norm": 3.103320598602295, "learning_rate": 1.5280633134874995e-05, "loss": 0.5968, "step": 16823 }, { "epoch": 2.74641851353006, "grad_norm": 3.061209201812744, "learning_rate": 1.5280088663573353e-05, "loss": 0.5631, "step": 16824 }, { "epoch": 2.7465817721725645, "grad_norm": 2.9294424057006836, "learning_rate": 1.5279544170567257e-05, "loss": 0.6173, "step": 16825 }, { "epoch": 2.746745030815069, "grad_norm": 3.160998582839966, "learning_rate": 1.5278999655858947e-05, "loss": 0.5941, "step": 16826 }, { "epoch": 2.746908289457573, "grad_norm": 2.8271079063415527, "learning_rate": 1.5278455119450666e-05, "loss": 0.5822, "step": 16827 }, { "epoch": 2.7470715481000774, "grad_norm": 3.024919271469116, "learning_rate": 1.5277910561344644e-05, "loss": 0.6371, "step": 16828 }, { "epoch": 2.747234806742582, "grad_norm": 2.7239232063293457, "learning_rate": 1.5277365981543133e-05, "loss": 0.5232, "step": 16829 }, { "epoch": 2.7473980653850862, "grad_norm": 3.1134822368621826, "learning_rate": 1.5276821380048355e-05, "loss": 0.6008, "step": 16830 }, { "epoch": 2.7475613240275907, "grad_norm": 3.048529624938965, "learning_rate": 1.5276276756862562e-05, "loss": 0.6068, "step": 16831 }, { "epoch": 2.747724582670095, "grad_norm": 2.947507858276367, "learning_rate": 1.5275732111987985e-05, "loss": 0.6425, "step": 16832 }, { "epoch": 2.7478878413125996, "grad_norm": 2.8063175678253174, "learning_rate": 1.5275187445426868e-05, "loss": 0.5956, "step": 16833 }, { "epoch": 2.748051099955104, "grad_norm": 2.239546537399292, "learning_rate": 1.5274642757181447e-05, "loss": 0.4777, "step": 16834 }, { "epoch": 2.7482143585976084, "grad_norm": 2.8478434085845947, "learning_rate": 1.5274098047253957e-05, "loss": 0.5497, "step": 16835 }, { "epoch": 2.748377617240113, "grad_norm": 3.1462645530700684, "learning_rate": 1.5273553315646645e-05, "loss": 0.5188, "step": 16836 }, { "epoch": 2.7485408758826173, "grad_norm": 3.0415706634521484, "learning_rate": 1.5273008562361748e-05, "loss": 0.6019, "step": 16837 }, { "epoch": 2.7487041345251213, "grad_norm": 3.3452816009521484, "learning_rate": 1.5272463787401498e-05, "loss": 0.6413, "step": 16838 }, { "epoch": 2.7488673931676257, "grad_norm": 3.109330892562866, "learning_rate": 1.5271918990768148e-05, "loss": 0.6389, "step": 16839 }, { "epoch": 2.74903065181013, "grad_norm": 2.512091875076294, "learning_rate": 1.5271374172463924e-05, "loss": 0.5359, "step": 16840 }, { "epoch": 2.7491939104526346, "grad_norm": 2.8420519828796387, "learning_rate": 1.527082933249107e-05, "loss": 0.6085, "step": 16841 }, { "epoch": 2.749357169095139, "grad_norm": 3.0666942596435547, "learning_rate": 1.5270284470851834e-05, "loss": 0.5051, "step": 16842 }, { "epoch": 2.7495204277376435, "grad_norm": 3.2929368019104004, "learning_rate": 1.5269739587548443e-05, "loss": 0.5908, "step": 16843 }, { "epoch": 2.7496836863801475, "grad_norm": 3.485431432723999, "learning_rate": 1.5269194682583143e-05, "loss": 0.6793, "step": 16844 }, { "epoch": 2.749846945022652, "grad_norm": 2.7471413612365723, "learning_rate": 1.5268649755958175e-05, "loss": 0.5824, "step": 16845 }, { "epoch": 2.7500102036651564, "grad_norm": 2.9236788749694824, "learning_rate": 1.5268104807675773e-05, "loss": 0.6194, "step": 16846 }, { "epoch": 2.750173462307661, "grad_norm": 3.047572374343872, "learning_rate": 1.5267559837738188e-05, "loss": 0.6556, "step": 16847 }, { "epoch": 2.7503367209501652, "grad_norm": 3.2054316997528076, "learning_rate": 1.526701484614765e-05, "loss": 0.6134, "step": 16848 }, { "epoch": 2.7504999795926697, "grad_norm": 2.916447877883911, "learning_rate": 1.52664698329064e-05, "loss": 0.5397, "step": 16849 }, { "epoch": 2.750663238235174, "grad_norm": 2.946450710296631, "learning_rate": 1.5265924798016683e-05, "loss": 0.5408, "step": 16850 }, { "epoch": 2.7508264968776786, "grad_norm": 2.4276671409606934, "learning_rate": 1.526537974148074e-05, "loss": 0.5529, "step": 16851 }, { "epoch": 2.750989755520183, "grad_norm": 2.8794212341308594, "learning_rate": 1.5264834663300808e-05, "loss": 0.5702, "step": 16852 }, { "epoch": 2.7511530141626874, "grad_norm": 2.5371010303497314, "learning_rate": 1.5264289563479126e-05, "loss": 0.558, "step": 16853 }, { "epoch": 2.751316272805192, "grad_norm": 2.869833469390869, "learning_rate": 1.526374444201794e-05, "loss": 0.5453, "step": 16854 }, { "epoch": 2.751479531447696, "grad_norm": 2.6469368934631348, "learning_rate": 1.526319929891949e-05, "loss": 0.4973, "step": 16855 }, { "epoch": 2.7516427900902003, "grad_norm": 3.4817025661468506, "learning_rate": 1.526265413418601e-05, "loss": 0.6765, "step": 16856 }, { "epoch": 2.7518060487327047, "grad_norm": 2.9081387519836426, "learning_rate": 1.5262108947819747e-05, "loss": 0.6541, "step": 16857 }, { "epoch": 2.751969307375209, "grad_norm": 2.6158909797668457, "learning_rate": 1.5261563739822945e-05, "loss": 0.488, "step": 16858 }, { "epoch": 2.7521325660177136, "grad_norm": 2.5675909519195557, "learning_rate": 1.526101851019784e-05, "loss": 0.4801, "step": 16859 }, { "epoch": 2.752295824660218, "grad_norm": 3.0345094203948975, "learning_rate": 1.5260473258946673e-05, "loss": 0.5931, "step": 16860 }, { "epoch": 2.7524590833027225, "grad_norm": 2.5927186012268066, "learning_rate": 1.5259927986071684e-05, "loss": 0.5562, "step": 16861 }, { "epoch": 2.7526223419452265, "grad_norm": 2.6761910915374756, "learning_rate": 1.5259382691575123e-05, "loss": 0.499, "step": 16862 }, { "epoch": 2.752785600587731, "grad_norm": 3.1100034713745117, "learning_rate": 1.5258837375459224e-05, "loss": 0.5746, "step": 16863 }, { "epoch": 2.7529488592302354, "grad_norm": 2.6403307914733887, "learning_rate": 1.5258292037726228e-05, "loss": 0.492, "step": 16864 }, { "epoch": 2.75311211787274, "grad_norm": 2.671900510787964, "learning_rate": 1.525774667837838e-05, "loss": 0.6188, "step": 16865 }, { "epoch": 2.7532753765152442, "grad_norm": 2.7539470195770264, "learning_rate": 1.5257201297417921e-05, "loss": 0.4763, "step": 16866 }, { "epoch": 2.7534386351577487, "grad_norm": 3.0457136631011963, "learning_rate": 1.5256655894847093e-05, "loss": 0.5773, "step": 16867 }, { "epoch": 2.753601893800253, "grad_norm": 2.9170100688934326, "learning_rate": 1.5256110470668137e-05, "loss": 0.5103, "step": 16868 }, { "epoch": 2.7537651524427575, "grad_norm": 2.8296382427215576, "learning_rate": 1.5255565024883295e-05, "loss": 0.5475, "step": 16869 }, { "epoch": 2.753928411085262, "grad_norm": 3.195911407470703, "learning_rate": 1.5255019557494808e-05, "loss": 0.6438, "step": 16870 }, { "epoch": 2.7540916697277664, "grad_norm": 2.9602696895599365, "learning_rate": 1.5254474068504923e-05, "loss": 0.5845, "step": 16871 }, { "epoch": 2.754254928370271, "grad_norm": 2.4671144485473633, "learning_rate": 1.5253928557915879e-05, "loss": 0.5117, "step": 16872 }, { "epoch": 2.754418187012775, "grad_norm": 3.6673529148101807, "learning_rate": 1.5253383025729917e-05, "loss": 0.5953, "step": 16873 }, { "epoch": 2.7545814456552793, "grad_norm": 2.559264659881592, "learning_rate": 1.5252837471949283e-05, "loss": 0.4964, "step": 16874 }, { "epoch": 2.7547447042977837, "grad_norm": 2.628627300262451, "learning_rate": 1.5252291896576214e-05, "loss": 0.4655, "step": 16875 }, { "epoch": 2.754907962940288, "grad_norm": 2.5466723442077637, "learning_rate": 1.5251746299612959e-05, "loss": 0.5484, "step": 16876 }, { "epoch": 2.7550712215827926, "grad_norm": 2.8508832454681396, "learning_rate": 1.5251200681061758e-05, "loss": 0.5901, "step": 16877 }, { "epoch": 2.755234480225297, "grad_norm": 2.893404722213745, "learning_rate": 1.5250655040924854e-05, "loss": 0.5574, "step": 16878 }, { "epoch": 2.755397738867801, "grad_norm": 3.0395610332489014, "learning_rate": 1.5250109379204487e-05, "loss": 0.6069, "step": 16879 }, { "epoch": 2.7555609975103055, "grad_norm": 3.070565700531006, "learning_rate": 1.5249563695902906e-05, "loss": 0.6301, "step": 16880 }, { "epoch": 2.75572425615281, "grad_norm": 2.7093207836151123, "learning_rate": 1.524901799102235e-05, "loss": 0.5752, "step": 16881 }, { "epoch": 2.7558875147953144, "grad_norm": 3.322451114654541, "learning_rate": 1.5248472264565059e-05, "loss": 0.6946, "step": 16882 }, { "epoch": 2.756050773437819, "grad_norm": 3.1057589054107666, "learning_rate": 1.5247926516533285e-05, "loss": 0.6636, "step": 16883 }, { "epoch": 2.7562140320803232, "grad_norm": 2.844264030456543, "learning_rate": 1.5247380746929268e-05, "loss": 0.4855, "step": 16884 }, { "epoch": 2.7563772907228277, "grad_norm": 2.932710886001587, "learning_rate": 1.5246834955755248e-05, "loss": 0.5386, "step": 16885 }, { "epoch": 2.756540549365332, "grad_norm": 2.6883931159973145, "learning_rate": 1.5246289143013468e-05, "loss": 0.4914, "step": 16886 }, { "epoch": 2.7567038080078365, "grad_norm": 3.3367016315460205, "learning_rate": 1.5245743308706175e-05, "loss": 0.6291, "step": 16887 }, { "epoch": 2.756867066650341, "grad_norm": 3.460935354232788, "learning_rate": 1.5245197452835616e-05, "loss": 0.728, "step": 16888 }, { "epoch": 2.7570303252928454, "grad_norm": 3.814389705657959, "learning_rate": 1.5244651575404029e-05, "loss": 0.756, "step": 16889 }, { "epoch": 2.7571935839353494, "grad_norm": 2.830475330352783, "learning_rate": 1.5244105676413658e-05, "loss": 0.5068, "step": 16890 }, { "epoch": 2.757356842577854, "grad_norm": 2.5056517124176025, "learning_rate": 1.5243559755866751e-05, "loss": 0.5021, "step": 16891 }, { "epoch": 2.7575201012203583, "grad_norm": 2.850045919418335, "learning_rate": 1.5243013813765549e-05, "loss": 0.4809, "step": 16892 }, { "epoch": 2.7576833598628627, "grad_norm": 3.8411595821380615, "learning_rate": 1.5242467850112296e-05, "loss": 0.7081, "step": 16893 }, { "epoch": 2.757846618505367, "grad_norm": 2.9705328941345215, "learning_rate": 1.5241921864909237e-05, "loss": 0.5909, "step": 16894 }, { "epoch": 2.7580098771478716, "grad_norm": 2.9975223541259766, "learning_rate": 1.5241375858158618e-05, "loss": 0.5779, "step": 16895 }, { "epoch": 2.758173135790376, "grad_norm": 2.992926597595215, "learning_rate": 1.524082982986268e-05, "loss": 0.5927, "step": 16896 }, { "epoch": 2.75833639443288, "grad_norm": 3.012969493865967, "learning_rate": 1.5240283780023672e-05, "loss": 0.6074, "step": 16897 }, { "epoch": 2.7584996530753845, "grad_norm": 3.492738723754883, "learning_rate": 1.5239737708643837e-05, "loss": 0.6228, "step": 16898 }, { "epoch": 2.758662911717889, "grad_norm": 2.6350831985473633, "learning_rate": 1.5239191615725415e-05, "loss": 0.5593, "step": 16899 }, { "epoch": 2.7588261703603933, "grad_norm": 3.1332499980926514, "learning_rate": 1.5238645501270654e-05, "loss": 0.6149, "step": 16900 }, { "epoch": 2.758989429002898, "grad_norm": 2.7075281143188477, "learning_rate": 1.5238099365281804e-05, "loss": 0.5125, "step": 16901 }, { "epoch": 2.7591526876454022, "grad_norm": 2.6973965167999268, "learning_rate": 1.5237553207761103e-05, "loss": 0.5799, "step": 16902 }, { "epoch": 2.7593159462879067, "grad_norm": 3.0121474266052246, "learning_rate": 1.52370070287108e-05, "loss": 0.7183, "step": 16903 }, { "epoch": 2.759479204930411, "grad_norm": 3.0428245067596436, "learning_rate": 1.5236460828133134e-05, "loss": 0.6335, "step": 16904 }, { "epoch": 2.7596424635729155, "grad_norm": 3.0078980922698975, "learning_rate": 1.523591460603036e-05, "loss": 0.6566, "step": 16905 }, { "epoch": 2.75980572221542, "grad_norm": 2.8780174255371094, "learning_rate": 1.5235368362404714e-05, "loss": 0.661, "step": 16906 }, { "epoch": 2.7599689808579244, "grad_norm": 2.5716259479522705, "learning_rate": 1.5234822097258448e-05, "loss": 0.6312, "step": 16907 }, { "epoch": 2.7601322395004284, "grad_norm": 3.59321665763855, "learning_rate": 1.5234275810593806e-05, "loss": 0.7315, "step": 16908 }, { "epoch": 2.760295498142933, "grad_norm": 2.922945261001587, "learning_rate": 1.5233729502413031e-05, "loss": 0.638, "step": 16909 }, { "epoch": 2.7604587567854373, "grad_norm": 2.8482823371887207, "learning_rate": 1.5233183172718372e-05, "loss": 0.537, "step": 16910 }, { "epoch": 2.7606220154279417, "grad_norm": 2.8269729614257812, "learning_rate": 1.5232636821512067e-05, "loss": 0.6605, "step": 16911 }, { "epoch": 2.760785274070446, "grad_norm": 2.747206211090088, "learning_rate": 1.5232090448796376e-05, "loss": 0.5513, "step": 16912 }, { "epoch": 2.7609485327129506, "grad_norm": 2.875951051712036, "learning_rate": 1.523154405457353e-05, "loss": 0.6302, "step": 16913 }, { "epoch": 2.761111791355455, "grad_norm": 2.8010499477386475, "learning_rate": 1.5230997638845787e-05, "loss": 0.5784, "step": 16914 }, { "epoch": 2.761275049997959, "grad_norm": 2.825922966003418, "learning_rate": 1.5230451201615383e-05, "loss": 0.5975, "step": 16915 }, { "epoch": 2.7614383086404635, "grad_norm": 3.0821332931518555, "learning_rate": 1.5229904742884572e-05, "loss": 0.6795, "step": 16916 }, { "epoch": 2.761601567282968, "grad_norm": 2.5155727863311768, "learning_rate": 1.5229358262655598e-05, "loss": 0.5499, "step": 16917 }, { "epoch": 2.7617648259254723, "grad_norm": 2.677164077758789, "learning_rate": 1.5228811760930704e-05, "loss": 0.5808, "step": 16918 }, { "epoch": 2.761928084567977, "grad_norm": 2.764368772506714, "learning_rate": 1.5228265237712143e-05, "loss": 0.5743, "step": 16919 }, { "epoch": 2.762091343210481, "grad_norm": 2.575603723526001, "learning_rate": 1.5227718693002156e-05, "loss": 0.5068, "step": 16920 }, { "epoch": 2.7622546018529857, "grad_norm": 2.8081114292144775, "learning_rate": 1.522717212680299e-05, "loss": 0.545, "step": 16921 }, { "epoch": 2.76241786049549, "grad_norm": 2.98808217048645, "learning_rate": 1.5226625539116895e-05, "loss": 0.6074, "step": 16922 }, { "epoch": 2.7625811191379945, "grad_norm": 2.8308205604553223, "learning_rate": 1.5226078929946115e-05, "loss": 0.6333, "step": 16923 }, { "epoch": 2.762744377780499, "grad_norm": 2.2726056575775146, "learning_rate": 1.5225532299292898e-05, "loss": 0.4765, "step": 16924 }, { "epoch": 2.7629076364230034, "grad_norm": 2.5003795623779297, "learning_rate": 1.5224985647159489e-05, "loss": 0.4741, "step": 16925 }, { "epoch": 2.7630708950655074, "grad_norm": 3.220536947250366, "learning_rate": 1.5224438973548141e-05, "loss": 0.6528, "step": 16926 }, { "epoch": 2.763234153708012, "grad_norm": 2.4255683422088623, "learning_rate": 1.5223892278461094e-05, "loss": 0.4947, "step": 16927 }, { "epoch": 2.7633974123505163, "grad_norm": 2.9183871746063232, "learning_rate": 1.5223345561900598e-05, "loss": 0.6025, "step": 16928 }, { "epoch": 2.7635606709930207, "grad_norm": 2.8210880756378174, "learning_rate": 1.5222798823868902e-05, "loss": 0.626, "step": 16929 }, { "epoch": 2.763723929635525, "grad_norm": 3.4583635330200195, "learning_rate": 1.5222252064368253e-05, "loss": 0.7007, "step": 16930 }, { "epoch": 2.7638871882780296, "grad_norm": 2.6385130882263184, "learning_rate": 1.5221705283400896e-05, "loss": 0.5621, "step": 16931 }, { "epoch": 2.7640504469205336, "grad_norm": 2.6666271686553955, "learning_rate": 1.5221158480969083e-05, "loss": 0.5345, "step": 16932 }, { "epoch": 2.764213705563038, "grad_norm": 2.6125543117523193, "learning_rate": 1.5220611657075055e-05, "loss": 0.4894, "step": 16933 }, { "epoch": 2.7643769642055425, "grad_norm": 3.0426740646362305, "learning_rate": 1.5220064811721067e-05, "loss": 0.59, "step": 16934 }, { "epoch": 2.764540222848047, "grad_norm": 3.02595853805542, "learning_rate": 1.5219517944909363e-05, "loss": 0.6248, "step": 16935 }, { "epoch": 2.7647034814905513, "grad_norm": 2.1219637393951416, "learning_rate": 1.521897105664219e-05, "loss": 0.5226, "step": 16936 }, { "epoch": 2.764866740133056, "grad_norm": 2.818281650543213, "learning_rate": 1.52184241469218e-05, "loss": 0.5891, "step": 16937 }, { "epoch": 2.76502999877556, "grad_norm": 3.015507221221924, "learning_rate": 1.5217877215750436e-05, "loss": 0.5635, "step": 16938 }, { "epoch": 2.7651932574180647, "grad_norm": 2.4339661598205566, "learning_rate": 1.521733026313035e-05, "loss": 0.4246, "step": 16939 }, { "epoch": 2.765356516060569, "grad_norm": 2.927485227584839, "learning_rate": 1.5216783289063788e-05, "loss": 0.5119, "step": 16940 }, { "epoch": 2.7655197747030735, "grad_norm": 3.084059476852417, "learning_rate": 1.5216236293553004e-05, "loss": 0.6133, "step": 16941 }, { "epoch": 2.765683033345578, "grad_norm": 3.1993560791015625, "learning_rate": 1.5215689276600241e-05, "loss": 0.6157, "step": 16942 }, { "epoch": 2.765846291988082, "grad_norm": 2.4466233253479004, "learning_rate": 1.5215142238207748e-05, "loss": 0.4418, "step": 16943 }, { "epoch": 2.7660095506305864, "grad_norm": 2.596236228942871, "learning_rate": 1.5214595178377769e-05, "loss": 0.4746, "step": 16944 }, { "epoch": 2.766172809273091, "grad_norm": 3.735445737838745, "learning_rate": 1.5214048097112563e-05, "loss": 0.6177, "step": 16945 }, { "epoch": 2.7663360679155953, "grad_norm": 2.8689358234405518, "learning_rate": 1.5213500994414377e-05, "loss": 0.5714, "step": 16946 }, { "epoch": 2.7664993265580997, "grad_norm": 2.573391914367676, "learning_rate": 1.5212953870285456e-05, "loss": 0.4667, "step": 16947 }, { "epoch": 2.766662585200604, "grad_norm": 3.6490166187286377, "learning_rate": 1.5212406724728049e-05, "loss": 0.654, "step": 16948 }, { "epoch": 2.7668258438431086, "grad_norm": 3.4507946968078613, "learning_rate": 1.5211859557744401e-05, "loss": 0.6322, "step": 16949 }, { "epoch": 2.7669891024856126, "grad_norm": 2.903230905532837, "learning_rate": 1.521131236933677e-05, "loss": 0.5019, "step": 16950 }, { "epoch": 2.767152361128117, "grad_norm": 3.1509056091308594, "learning_rate": 1.5210765159507403e-05, "loss": 0.5766, "step": 16951 }, { "epoch": 2.7673156197706215, "grad_norm": 3.218874454498291, "learning_rate": 1.521021792825855e-05, "loss": 0.6328, "step": 16952 }, { "epoch": 2.767478878413126, "grad_norm": 2.9676616191864014, "learning_rate": 1.5209670675592456e-05, "loss": 0.5826, "step": 16953 }, { "epoch": 2.7676421370556303, "grad_norm": 2.7433154582977295, "learning_rate": 1.5209123401511372e-05, "loss": 0.5147, "step": 16954 }, { "epoch": 2.7678053956981348, "grad_norm": 2.5361928939819336, "learning_rate": 1.5208576106017545e-05, "loss": 0.5779, "step": 16955 }, { "epoch": 2.767968654340639, "grad_norm": 3.2493417263031006, "learning_rate": 1.5208028789113237e-05, "loss": 0.5901, "step": 16956 }, { "epoch": 2.7681319129831436, "grad_norm": 3.0112218856811523, "learning_rate": 1.5207481450800683e-05, "loss": 0.6241, "step": 16957 }, { "epoch": 2.768295171625648, "grad_norm": 2.9723637104034424, "learning_rate": 1.5206934091082142e-05, "loss": 0.6627, "step": 16958 }, { "epoch": 2.7684584302681525, "grad_norm": 2.8419954776763916, "learning_rate": 1.520638670995986e-05, "loss": 0.4474, "step": 16959 }, { "epoch": 2.768621688910657, "grad_norm": 2.8548192977905273, "learning_rate": 1.5205839307436088e-05, "loss": 0.5908, "step": 16960 }, { "epoch": 2.768784947553161, "grad_norm": 2.946889638900757, "learning_rate": 1.5205291883513076e-05, "loss": 0.5451, "step": 16961 }, { "epoch": 2.7689482061956654, "grad_norm": 2.957219123840332, "learning_rate": 1.5204744438193073e-05, "loss": 0.5889, "step": 16962 }, { "epoch": 2.76911146483817, "grad_norm": 3.1601996421813965, "learning_rate": 1.5204196971478333e-05, "loss": 0.7369, "step": 16963 }, { "epoch": 2.7692747234806743, "grad_norm": 3.014659881591797, "learning_rate": 1.5203649483371105e-05, "loss": 0.6112, "step": 16964 }, { "epoch": 2.7694379821231787, "grad_norm": 2.685994863510132, "learning_rate": 1.5203101973873637e-05, "loss": 0.5381, "step": 16965 }, { "epoch": 2.769601240765683, "grad_norm": 2.916426420211792, "learning_rate": 1.5202554442988184e-05, "loss": 0.5931, "step": 16966 }, { "epoch": 2.7697644994081876, "grad_norm": 3.0631062984466553, "learning_rate": 1.5202006890716994e-05, "loss": 0.6044, "step": 16967 }, { "epoch": 2.7699277580506916, "grad_norm": 2.5889856815338135, "learning_rate": 1.5201459317062314e-05, "loss": 0.5417, "step": 16968 }, { "epoch": 2.770091016693196, "grad_norm": 3.029888153076172, "learning_rate": 1.52009117220264e-05, "loss": 0.5697, "step": 16969 }, { "epoch": 2.7702542753357005, "grad_norm": 2.7558064460754395, "learning_rate": 1.5200364105611504e-05, "loss": 0.5317, "step": 16970 }, { "epoch": 2.770417533978205, "grad_norm": 2.499300956726074, "learning_rate": 1.5199816467819875e-05, "loss": 0.4899, "step": 16971 }, { "epoch": 2.7705807926207093, "grad_norm": 2.87874698638916, "learning_rate": 1.5199268808653762e-05, "loss": 0.6656, "step": 16972 }, { "epoch": 2.7707440512632138, "grad_norm": 2.6724939346313477, "learning_rate": 1.5198721128115419e-05, "loss": 0.6085, "step": 16973 }, { "epoch": 2.770907309905718, "grad_norm": 2.609084367752075, "learning_rate": 1.5198173426207095e-05, "loss": 0.595, "step": 16974 }, { "epoch": 2.7710705685482226, "grad_norm": 3.214721918106079, "learning_rate": 1.5197625702931045e-05, "loss": 0.6516, "step": 16975 }, { "epoch": 2.771233827190727, "grad_norm": 2.7297041416168213, "learning_rate": 1.5197077958289516e-05, "loss": 0.5319, "step": 16976 }, { "epoch": 2.7713970858332315, "grad_norm": 2.607820749282837, "learning_rate": 1.5196530192284766e-05, "loss": 0.5497, "step": 16977 }, { "epoch": 2.771560344475736, "grad_norm": 3.1440560817718506, "learning_rate": 1.519598240491904e-05, "loss": 0.7472, "step": 16978 }, { "epoch": 2.77172360311824, "grad_norm": 2.2380564212799072, "learning_rate": 1.5195434596194592e-05, "loss": 0.4717, "step": 16979 }, { "epoch": 2.7718868617607444, "grad_norm": 2.8344995975494385, "learning_rate": 1.5194886766113675e-05, "loss": 0.5951, "step": 16980 }, { "epoch": 2.772050120403249, "grad_norm": 2.781970977783203, "learning_rate": 1.5194338914678538e-05, "loss": 0.5763, "step": 16981 }, { "epoch": 2.7722133790457533, "grad_norm": 2.47904109954834, "learning_rate": 1.5193791041891435e-05, "loss": 0.5023, "step": 16982 }, { "epoch": 2.7723766376882577, "grad_norm": 2.9300341606140137, "learning_rate": 1.519324314775462e-05, "loss": 0.6403, "step": 16983 }, { "epoch": 2.772539896330762, "grad_norm": 2.826711893081665, "learning_rate": 1.5192695232270343e-05, "loss": 0.6113, "step": 16984 }, { "epoch": 2.772703154973266, "grad_norm": 3.039586305618286, "learning_rate": 1.5192147295440855e-05, "loss": 0.6747, "step": 16985 }, { "epoch": 2.7728664136157706, "grad_norm": 3.2953341007232666, "learning_rate": 1.5191599337268414e-05, "loss": 0.7366, "step": 16986 }, { "epoch": 2.773029672258275, "grad_norm": 2.7548117637634277, "learning_rate": 1.5191051357755267e-05, "loss": 0.5033, "step": 16987 }, { "epoch": 2.7731929309007795, "grad_norm": 3.22985577583313, "learning_rate": 1.5190503356903666e-05, "loss": 0.5999, "step": 16988 }, { "epoch": 2.773356189543284, "grad_norm": 2.921877861022949, "learning_rate": 1.5189955334715864e-05, "loss": 0.6562, "step": 16989 }, { "epoch": 2.7735194481857883, "grad_norm": 2.1013479232788086, "learning_rate": 1.518940729119412e-05, "loss": 0.5154, "step": 16990 }, { "epoch": 2.7736827068282928, "grad_norm": 2.9218969345092773, "learning_rate": 1.5188859226340682e-05, "loss": 0.6613, "step": 16991 }, { "epoch": 2.773845965470797, "grad_norm": 2.9981672763824463, "learning_rate": 1.5188311140157801e-05, "loss": 0.5779, "step": 16992 }, { "epoch": 2.7740092241133016, "grad_norm": 2.5457100868225098, "learning_rate": 1.5187763032647729e-05, "loss": 0.5008, "step": 16993 }, { "epoch": 2.774172482755806, "grad_norm": 2.977545738220215, "learning_rate": 1.5187214903812726e-05, "loss": 0.6043, "step": 16994 }, { "epoch": 2.7743357413983105, "grad_norm": 2.460541248321533, "learning_rate": 1.5186666753655041e-05, "loss": 0.5633, "step": 16995 }, { "epoch": 2.7744990000408145, "grad_norm": 2.8535609245300293, "learning_rate": 1.5186118582176928e-05, "loss": 0.6287, "step": 16996 }, { "epoch": 2.774662258683319, "grad_norm": 2.4544458389282227, "learning_rate": 1.518557038938064e-05, "loss": 0.4989, "step": 16997 }, { "epoch": 2.7748255173258234, "grad_norm": 2.637503147125244, "learning_rate": 1.5185022175268426e-05, "loss": 0.5365, "step": 16998 }, { "epoch": 2.774988775968328, "grad_norm": 2.70967435836792, "learning_rate": 1.5184473939842545e-05, "loss": 0.55, "step": 16999 }, { "epoch": 2.7751520346108323, "grad_norm": 2.7124645709991455, "learning_rate": 1.5183925683105254e-05, "loss": 0.4837, "step": 17000 }, { "epoch": 2.7753152932533367, "grad_norm": 2.9350152015686035, "learning_rate": 1.5183377405058797e-05, "loss": 0.5327, "step": 17001 }, { "epoch": 2.775478551895841, "grad_norm": 3.210770606994629, "learning_rate": 1.5182829105705435e-05, "loss": 0.6852, "step": 17002 }, { "epoch": 2.775641810538345, "grad_norm": 2.276139736175537, "learning_rate": 1.5182280785047416e-05, "loss": 0.5058, "step": 17003 }, { "epoch": 2.7758050691808496, "grad_norm": 2.9212563037872314, "learning_rate": 1.5181732443087e-05, "loss": 0.5982, "step": 17004 }, { "epoch": 2.775968327823354, "grad_norm": 2.4952471256256104, "learning_rate": 1.518118407982644e-05, "loss": 0.5228, "step": 17005 }, { "epoch": 2.7761315864658584, "grad_norm": 3.24294114112854, "learning_rate": 1.518063569526799e-05, "loss": 0.6114, "step": 17006 }, { "epoch": 2.776294845108363, "grad_norm": 3.666658401489258, "learning_rate": 1.5180087289413899e-05, "loss": 0.732, "step": 17007 }, { "epoch": 2.7764581037508673, "grad_norm": 2.825228452682495, "learning_rate": 1.5179538862266425e-05, "loss": 0.5553, "step": 17008 }, { "epoch": 2.7766213623933718, "grad_norm": 2.7740635871887207, "learning_rate": 1.5178990413827825e-05, "loss": 0.5384, "step": 17009 }, { "epoch": 2.776784621035876, "grad_norm": 2.605466842651367, "learning_rate": 1.5178441944100349e-05, "loss": 0.5049, "step": 17010 }, { "epoch": 2.7769478796783806, "grad_norm": 2.9489824771881104, "learning_rate": 1.5177893453086255e-05, "loss": 0.5673, "step": 17011 }, { "epoch": 2.777111138320885, "grad_norm": 2.9814999103546143, "learning_rate": 1.5177344940787793e-05, "loss": 0.6235, "step": 17012 }, { "epoch": 2.7772743969633895, "grad_norm": 3.464017868041992, "learning_rate": 1.5176796407207226e-05, "loss": 0.7097, "step": 17013 }, { "epoch": 2.7774376556058935, "grad_norm": 2.482710123062134, "learning_rate": 1.5176247852346799e-05, "loss": 0.5556, "step": 17014 }, { "epoch": 2.777600914248398, "grad_norm": 3.2127506732940674, "learning_rate": 1.5175699276208776e-05, "loss": 0.5862, "step": 17015 }, { "epoch": 2.7777641728909024, "grad_norm": 2.9422390460968018, "learning_rate": 1.5175150678795402e-05, "loss": 0.4898, "step": 17016 }, { "epoch": 2.777927431533407, "grad_norm": 2.654155731201172, "learning_rate": 1.517460206010894e-05, "loss": 0.5224, "step": 17017 }, { "epoch": 2.7780906901759113, "grad_norm": 3.122312307357788, "learning_rate": 1.5174053420151644e-05, "loss": 0.5954, "step": 17018 }, { "epoch": 2.7782539488184157, "grad_norm": 2.671323776245117, "learning_rate": 1.517350475892577e-05, "loss": 0.5202, "step": 17019 }, { "epoch": 2.7784172074609197, "grad_norm": 3.018738031387329, "learning_rate": 1.517295607643357e-05, "loss": 0.6206, "step": 17020 }, { "epoch": 2.778580466103424, "grad_norm": 2.8384594917297363, "learning_rate": 1.5172407372677298e-05, "loss": 0.531, "step": 17021 }, { "epoch": 2.7787437247459286, "grad_norm": 2.9803967475891113, "learning_rate": 1.5171858647659214e-05, "loss": 0.6346, "step": 17022 }, { "epoch": 2.778906983388433, "grad_norm": 2.7411694526672363, "learning_rate": 1.5171309901381572e-05, "loss": 0.5718, "step": 17023 }, { "epoch": 2.7790702420309374, "grad_norm": 2.739774703979492, "learning_rate": 1.517076113384663e-05, "loss": 0.5736, "step": 17024 }, { "epoch": 2.779233500673442, "grad_norm": 3.2558159828186035, "learning_rate": 1.517021234505664e-05, "loss": 0.6668, "step": 17025 }, { "epoch": 2.7793967593159463, "grad_norm": 2.927950382232666, "learning_rate": 1.5169663535013857e-05, "loss": 0.5651, "step": 17026 }, { "epoch": 2.7795600179584508, "grad_norm": 2.4018495082855225, "learning_rate": 1.5169114703720543e-05, "loss": 0.4535, "step": 17027 }, { "epoch": 2.779723276600955, "grad_norm": 2.681628942489624, "learning_rate": 1.5168565851178945e-05, "loss": 0.5664, "step": 17028 }, { "epoch": 2.7798865352434596, "grad_norm": 2.836167812347412, "learning_rate": 1.516801697739133e-05, "loss": 0.5816, "step": 17029 }, { "epoch": 2.780049793885964, "grad_norm": 3.3513641357421875, "learning_rate": 1.5167468082359947e-05, "loss": 0.5481, "step": 17030 }, { "epoch": 2.780213052528468, "grad_norm": 2.8603415489196777, "learning_rate": 1.5166919166087053e-05, "loss": 0.545, "step": 17031 }, { "epoch": 2.7803763111709725, "grad_norm": 2.524059295654297, "learning_rate": 1.5166370228574904e-05, "loss": 0.5302, "step": 17032 }, { "epoch": 2.780539569813477, "grad_norm": 2.7962727546691895, "learning_rate": 1.5165821269825758e-05, "loss": 0.5441, "step": 17033 }, { "epoch": 2.7807028284559814, "grad_norm": 3.0452921390533447, "learning_rate": 1.5165272289841873e-05, "loss": 0.611, "step": 17034 }, { "epoch": 2.780866087098486, "grad_norm": 2.8056483268737793, "learning_rate": 1.5164723288625505e-05, "loss": 0.5079, "step": 17035 }, { "epoch": 2.7810293457409903, "grad_norm": 2.899635076522827, "learning_rate": 1.5164174266178908e-05, "loss": 0.5175, "step": 17036 }, { "epoch": 2.7811926043834947, "grad_norm": 3.098592519760132, "learning_rate": 1.516362522250434e-05, "loss": 0.5837, "step": 17037 }, { "epoch": 2.7813558630259987, "grad_norm": 2.79654860496521, "learning_rate": 1.5163076157604058e-05, "loss": 0.5244, "step": 17038 }, { "epoch": 2.781519121668503, "grad_norm": 3.111353635787964, "learning_rate": 1.5162527071480324e-05, "loss": 0.558, "step": 17039 }, { "epoch": 2.7816823803110076, "grad_norm": 2.8900372982025146, "learning_rate": 1.5161977964135388e-05, "loss": 0.5351, "step": 17040 }, { "epoch": 2.781845638953512, "grad_norm": 2.7196576595306396, "learning_rate": 1.5161428835571507e-05, "loss": 0.5757, "step": 17041 }, { "epoch": 2.7820088975960164, "grad_norm": 2.5504117012023926, "learning_rate": 1.5160879685790944e-05, "loss": 0.4381, "step": 17042 }, { "epoch": 2.782172156238521, "grad_norm": 3.2878966331481934, "learning_rate": 1.516033051479595e-05, "loss": 0.6917, "step": 17043 }, { "epoch": 2.7823354148810253, "grad_norm": 2.9497995376586914, "learning_rate": 1.5159781322588791e-05, "loss": 0.5491, "step": 17044 }, { "epoch": 2.7824986735235298, "grad_norm": 3.517360210418701, "learning_rate": 1.5159232109171715e-05, "loss": 0.6874, "step": 17045 }, { "epoch": 2.782661932166034, "grad_norm": 2.61650013923645, "learning_rate": 1.5158682874546987e-05, "loss": 0.4767, "step": 17046 }, { "epoch": 2.7828251908085386, "grad_norm": 2.4803569316864014, "learning_rate": 1.5158133618716858e-05, "loss": 0.5548, "step": 17047 }, { "epoch": 2.782988449451043, "grad_norm": 3.2979931831359863, "learning_rate": 1.515758434168359e-05, "loss": 0.6026, "step": 17048 }, { "epoch": 2.783151708093547, "grad_norm": 3.200223684310913, "learning_rate": 1.5157035043449444e-05, "loss": 0.6138, "step": 17049 }, { "epoch": 2.7833149667360515, "grad_norm": 2.48093318939209, "learning_rate": 1.5156485724016672e-05, "loss": 0.5081, "step": 17050 }, { "epoch": 2.783478225378556, "grad_norm": 2.6722779273986816, "learning_rate": 1.5155936383387532e-05, "loss": 0.5832, "step": 17051 }, { "epoch": 2.7836414840210604, "grad_norm": 3.647542715072632, "learning_rate": 1.5155387021564287e-05, "loss": 0.7136, "step": 17052 }, { "epoch": 2.783804742663565, "grad_norm": 3.0867276191711426, "learning_rate": 1.5154837638549191e-05, "loss": 0.6448, "step": 17053 }, { "epoch": 2.7839680013060693, "grad_norm": 2.9643828868865967, "learning_rate": 1.5154288234344505e-05, "loss": 0.5813, "step": 17054 }, { "epoch": 2.7841312599485737, "grad_norm": 2.726020574569702, "learning_rate": 1.5153738808952486e-05, "loss": 0.543, "step": 17055 }, { "epoch": 2.7842945185910777, "grad_norm": 2.684955596923828, "learning_rate": 1.5153189362375393e-05, "loss": 0.598, "step": 17056 }, { "epoch": 2.784457777233582, "grad_norm": 2.3797242641448975, "learning_rate": 1.5152639894615483e-05, "loss": 0.5285, "step": 17057 }, { "epoch": 2.7846210358760866, "grad_norm": 2.6073312759399414, "learning_rate": 1.515209040567502e-05, "loss": 0.565, "step": 17058 }, { "epoch": 2.784784294518591, "grad_norm": 2.913475275039673, "learning_rate": 1.5151540895556256e-05, "loss": 0.6111, "step": 17059 }, { "epoch": 2.7849475531610954, "grad_norm": 2.4050991535186768, "learning_rate": 1.5150991364261452e-05, "loss": 0.4198, "step": 17060 }, { "epoch": 2.7851108118036, "grad_norm": 2.78865122795105, "learning_rate": 1.5150441811792869e-05, "loss": 0.5454, "step": 17061 }, { "epoch": 2.7852740704461043, "grad_norm": 2.452110767364502, "learning_rate": 1.5149892238152761e-05, "loss": 0.5057, "step": 17062 }, { "epoch": 2.7854373290886087, "grad_norm": 2.8158926963806152, "learning_rate": 1.5149342643343393e-05, "loss": 0.5516, "step": 17063 }, { "epoch": 2.785600587731113, "grad_norm": 3.181481122970581, "learning_rate": 1.5148793027367022e-05, "loss": 0.6445, "step": 17064 }, { "epoch": 2.7857638463736176, "grad_norm": 2.790239095687866, "learning_rate": 1.5148243390225908e-05, "loss": 0.5118, "step": 17065 }, { "epoch": 2.785927105016122, "grad_norm": 3.15856671333313, "learning_rate": 1.5147693731922308e-05, "loss": 0.616, "step": 17066 }, { "epoch": 2.786090363658626, "grad_norm": 2.9769577980041504, "learning_rate": 1.5147144052458481e-05, "loss": 0.604, "step": 17067 }, { "epoch": 2.7862536223011305, "grad_norm": 3.223686695098877, "learning_rate": 1.5146594351836692e-05, "loss": 1.2465, "step": 17068 }, { "epoch": 2.786416880943635, "grad_norm": 2.891962766647339, "learning_rate": 1.5146044630059197e-05, "loss": 0.5984, "step": 17069 }, { "epoch": 2.7865801395861394, "grad_norm": 3.01124906539917, "learning_rate": 1.514549488712825e-05, "loss": 0.5497, "step": 17070 }, { "epoch": 2.786743398228644, "grad_norm": 2.7826852798461914, "learning_rate": 1.5144945123046123e-05, "loss": 0.6444, "step": 17071 }, { "epoch": 2.7869066568711482, "grad_norm": 2.673539161682129, "learning_rate": 1.5144395337815066e-05, "loss": 0.5626, "step": 17072 }, { "epoch": 2.7870699155136522, "grad_norm": 3.251070499420166, "learning_rate": 1.5143845531437342e-05, "loss": 0.6242, "step": 17073 }, { "epoch": 2.7872331741561567, "grad_norm": 3.0106968879699707, "learning_rate": 1.5143295703915214e-05, "loss": 0.6387, "step": 17074 }, { "epoch": 2.787396432798661, "grad_norm": 2.5073041915893555, "learning_rate": 1.5142745855250938e-05, "loss": 0.4973, "step": 17075 }, { "epoch": 2.7875596914411656, "grad_norm": 2.46047306060791, "learning_rate": 1.5142195985446772e-05, "loss": 0.5284, "step": 17076 }, { "epoch": 2.78772295008367, "grad_norm": 2.7738447189331055, "learning_rate": 1.5141646094504986e-05, "loss": 0.5389, "step": 17077 }, { "epoch": 2.7878862087261744, "grad_norm": 2.8887362480163574, "learning_rate": 1.5141096182427831e-05, "loss": 0.6019, "step": 17078 }, { "epoch": 2.788049467368679, "grad_norm": 2.9098973274230957, "learning_rate": 1.5140546249217574e-05, "loss": 0.5942, "step": 17079 }, { "epoch": 2.7882127260111833, "grad_norm": 2.69309401512146, "learning_rate": 1.5139996294876467e-05, "loss": 0.518, "step": 17080 }, { "epoch": 2.7883759846536877, "grad_norm": 2.626694917678833, "learning_rate": 1.5139446319406777e-05, "loss": 0.4885, "step": 17081 }, { "epoch": 2.788539243296192, "grad_norm": 3.009077310562134, "learning_rate": 1.5138896322810767e-05, "loss": 0.6419, "step": 17082 }, { "epoch": 2.7887025019386966, "grad_norm": 2.503843069076538, "learning_rate": 1.5138346305090693e-05, "loss": 0.485, "step": 17083 }, { "epoch": 2.7888657605812006, "grad_norm": 3.0516445636749268, "learning_rate": 1.5137796266248818e-05, "loss": 0.5885, "step": 17084 }, { "epoch": 2.789029019223705, "grad_norm": 3.2663357257843018, "learning_rate": 1.5137246206287404e-05, "loss": 0.5764, "step": 17085 }, { "epoch": 2.7891922778662095, "grad_norm": 2.9525914192199707, "learning_rate": 1.513669612520871e-05, "loss": 0.5148, "step": 17086 }, { "epoch": 2.789355536508714, "grad_norm": 3.233306884765625, "learning_rate": 1.5136146023014995e-05, "loss": 0.6057, "step": 17087 }, { "epoch": 2.7895187951512184, "grad_norm": 2.888986825942993, "learning_rate": 1.5135595899708526e-05, "loss": 0.5953, "step": 17088 }, { "epoch": 2.789682053793723, "grad_norm": 2.776492118835449, "learning_rate": 1.5135045755291562e-05, "loss": 0.5306, "step": 17089 }, { "epoch": 2.7898453124362272, "grad_norm": 2.8245174884796143, "learning_rate": 1.513449558976636e-05, "loss": 0.5274, "step": 17090 }, { "epoch": 2.7900085710787312, "grad_norm": 3.0816502571105957, "learning_rate": 1.5133945403135188e-05, "loss": 0.6516, "step": 17091 }, { "epoch": 2.7901718297212357, "grad_norm": 3.3758363723754883, "learning_rate": 1.5133395195400304e-05, "loss": 0.6199, "step": 17092 }, { "epoch": 2.79033508836374, "grad_norm": 3.016498327255249, "learning_rate": 1.513284496656397e-05, "loss": 0.7068, "step": 17093 }, { "epoch": 2.7904983470062446, "grad_norm": 3.0865228176116943, "learning_rate": 1.5132294716628453e-05, "loss": 0.5878, "step": 17094 }, { "epoch": 2.790661605648749, "grad_norm": 3.0484161376953125, "learning_rate": 1.5131744445596007e-05, "loss": 0.6835, "step": 17095 }, { "epoch": 2.7908248642912534, "grad_norm": 2.7865865230560303, "learning_rate": 1.5131194153468896e-05, "loss": 0.5546, "step": 17096 }, { "epoch": 2.790988122933758, "grad_norm": 3.1791329383850098, "learning_rate": 1.5130643840249385e-05, "loss": 0.6366, "step": 17097 }, { "epoch": 2.7911513815762623, "grad_norm": 3.1766932010650635, "learning_rate": 1.5130093505939734e-05, "loss": 0.6284, "step": 17098 }, { "epoch": 2.7913146402187667, "grad_norm": 3.1000568866729736, "learning_rate": 1.5129543150542208e-05, "loss": 0.5734, "step": 17099 }, { "epoch": 2.791477898861271, "grad_norm": 3.14634108543396, "learning_rate": 1.5128992774059063e-05, "loss": 0.6185, "step": 17100 }, { "epoch": 2.7916411575037756, "grad_norm": 2.8819875717163086, "learning_rate": 1.5128442376492568e-05, "loss": 0.5027, "step": 17101 }, { "epoch": 2.7918044161462796, "grad_norm": 3.308875322341919, "learning_rate": 1.5127891957844982e-05, "loss": 0.682, "step": 17102 }, { "epoch": 2.791967674788784, "grad_norm": 2.7787842750549316, "learning_rate": 1.5127341518118569e-05, "loss": 0.561, "step": 17103 }, { "epoch": 2.7921309334312885, "grad_norm": 3.343679428100586, "learning_rate": 1.512679105731559e-05, "loss": 0.6901, "step": 17104 }, { "epoch": 2.792294192073793, "grad_norm": 2.6501247882843018, "learning_rate": 1.512624057543831e-05, "loss": 0.558, "step": 17105 }, { "epoch": 2.7924574507162974, "grad_norm": 2.9086520671844482, "learning_rate": 1.512569007248899e-05, "loss": 0.6202, "step": 17106 }, { "epoch": 2.792620709358802, "grad_norm": 3.120307683944702, "learning_rate": 1.5125139548469893e-05, "loss": 0.6915, "step": 17107 }, { "epoch": 2.792783968001306, "grad_norm": 3.083550453186035, "learning_rate": 1.5124589003383282e-05, "loss": 0.6641, "step": 17108 }, { "epoch": 2.7929472266438102, "grad_norm": 2.8621666431427, "learning_rate": 1.5124038437231421e-05, "loss": 0.5152, "step": 17109 }, { "epoch": 2.7931104852863147, "grad_norm": 2.6701698303222656, "learning_rate": 1.5123487850016575e-05, "loss": 0.4886, "step": 17110 }, { "epoch": 2.793273743928819, "grad_norm": 2.7126309871673584, "learning_rate": 1.5122937241741004e-05, "loss": 0.5709, "step": 17111 }, { "epoch": 2.7934370025713235, "grad_norm": 2.519625663757324, "learning_rate": 1.5122386612406972e-05, "loss": 0.508, "step": 17112 }, { "epoch": 2.793600261213828, "grad_norm": 2.5352983474731445, "learning_rate": 1.5121835962016744e-05, "loss": 0.5514, "step": 17113 }, { "epoch": 2.7937635198563324, "grad_norm": 2.882704019546509, "learning_rate": 1.5121285290572578e-05, "loss": 0.6109, "step": 17114 }, { "epoch": 2.793926778498837, "grad_norm": 2.746141195297241, "learning_rate": 1.5120734598076747e-05, "loss": 0.6398, "step": 17115 }, { "epoch": 2.7940900371413413, "grad_norm": 2.8669614791870117, "learning_rate": 1.5120183884531507e-05, "loss": 0.6146, "step": 17116 }, { "epoch": 2.7942532957838457, "grad_norm": 2.2019975185394287, "learning_rate": 1.5119633149939127e-05, "loss": 0.4335, "step": 17117 }, { "epoch": 2.79441655442635, "grad_norm": 2.4225215911865234, "learning_rate": 1.5119082394301865e-05, "loss": 0.4786, "step": 17118 }, { "epoch": 2.794579813068854, "grad_norm": 2.7647266387939453, "learning_rate": 1.5118531617621992e-05, "loss": 0.5516, "step": 17119 }, { "epoch": 2.7947430717113586, "grad_norm": 2.6837286949157715, "learning_rate": 1.5117980819901762e-05, "loss": 0.5075, "step": 17120 }, { "epoch": 2.794906330353863, "grad_norm": 2.5172126293182373, "learning_rate": 1.5117430001143451e-05, "loss": 0.4436, "step": 17121 }, { "epoch": 2.7950695889963675, "grad_norm": 2.7088563442230225, "learning_rate": 1.5116879161349314e-05, "loss": 0.6083, "step": 17122 }, { "epoch": 2.795232847638872, "grad_norm": 3.3448078632354736, "learning_rate": 1.5116328300521623e-05, "loss": 0.7497, "step": 17123 }, { "epoch": 2.7953961062813764, "grad_norm": 2.747415065765381, "learning_rate": 1.5115777418662637e-05, "loss": 0.5736, "step": 17124 }, { "epoch": 2.795559364923881, "grad_norm": 2.981548547744751, "learning_rate": 1.511522651577462e-05, "loss": 0.5572, "step": 17125 }, { "epoch": 2.795722623566385, "grad_norm": 2.807478904724121, "learning_rate": 1.5114675591859835e-05, "loss": 0.5159, "step": 17126 }, { "epoch": 2.7958858822088892, "grad_norm": 2.8203699588775635, "learning_rate": 1.5114124646920557e-05, "loss": 0.5717, "step": 17127 }, { "epoch": 2.7960491408513937, "grad_norm": 2.9778378009796143, "learning_rate": 1.511357368095904e-05, "loss": 0.6325, "step": 17128 }, { "epoch": 2.796212399493898, "grad_norm": 3.1251227855682373, "learning_rate": 1.5113022693977556e-05, "loss": 0.5599, "step": 17129 }, { "epoch": 2.7963756581364025, "grad_norm": 2.829408645629883, "learning_rate": 1.511247168597836e-05, "loss": 0.5254, "step": 17130 }, { "epoch": 2.796538916778907, "grad_norm": 3.068920612335205, "learning_rate": 1.5111920656963726e-05, "loss": 0.6602, "step": 17131 }, { "epoch": 2.7967021754214114, "grad_norm": 2.744638442993164, "learning_rate": 1.511136960693592e-05, "loss": 0.4551, "step": 17132 }, { "epoch": 2.796865434063916, "grad_norm": 2.8996775150299072, "learning_rate": 1.5110818535897199e-05, "loss": 0.5522, "step": 17133 }, { "epoch": 2.7970286927064203, "grad_norm": 3.0779221057891846, "learning_rate": 1.5110267443849833e-05, "loss": 0.611, "step": 17134 }, { "epoch": 2.7971919513489247, "grad_norm": 2.672238349914551, "learning_rate": 1.5109716330796089e-05, "loss": 0.5505, "step": 17135 }, { "epoch": 2.797355209991429, "grad_norm": 2.4927682876586914, "learning_rate": 1.5109165196738232e-05, "loss": 0.4874, "step": 17136 }, { "epoch": 2.797518468633933, "grad_norm": 2.7599520683288574, "learning_rate": 1.5108614041678522e-05, "loss": 0.575, "step": 17137 }, { "epoch": 2.7976817272764376, "grad_norm": 2.3460140228271484, "learning_rate": 1.5108062865619232e-05, "loss": 0.4758, "step": 17138 }, { "epoch": 2.797844985918942, "grad_norm": 2.599806070327759, "learning_rate": 1.5107511668562621e-05, "loss": 0.5371, "step": 17139 }, { "epoch": 2.7980082445614465, "grad_norm": 2.6738507747650146, "learning_rate": 1.5106960450510961e-05, "loss": 0.7086, "step": 17140 }, { "epoch": 2.798171503203951, "grad_norm": 3.0315568447113037, "learning_rate": 1.5106409211466515e-05, "loss": 0.6019, "step": 17141 }, { "epoch": 2.7983347618464554, "grad_norm": 3.0070762634277344, "learning_rate": 1.5105857951431548e-05, "loss": 0.6186, "step": 17142 }, { "epoch": 2.79849802048896, "grad_norm": 2.5071442127227783, "learning_rate": 1.5105306670408327e-05, "loss": 0.5258, "step": 17143 }, { "epoch": 2.798661279131464, "grad_norm": 3.235853672027588, "learning_rate": 1.5104755368399118e-05, "loss": 0.7295, "step": 17144 }, { "epoch": 2.7988245377739682, "grad_norm": 2.9538965225219727, "learning_rate": 1.5104204045406188e-05, "loss": 0.5599, "step": 17145 }, { "epoch": 2.7989877964164727, "grad_norm": 2.4350030422210693, "learning_rate": 1.5103652701431801e-05, "loss": 0.4669, "step": 17146 }, { "epoch": 2.799151055058977, "grad_norm": 3.139166831970215, "learning_rate": 1.5103101336478223e-05, "loss": 0.6551, "step": 17147 }, { "epoch": 2.7993143137014815, "grad_norm": 2.9466521739959717, "learning_rate": 1.5102549950547726e-05, "loss": 0.5751, "step": 17148 }, { "epoch": 2.799477572343986, "grad_norm": 2.7405219078063965, "learning_rate": 1.5101998543642573e-05, "loss": 0.5812, "step": 17149 }, { "epoch": 2.7996408309864904, "grad_norm": 2.925008535385132, "learning_rate": 1.5101447115765027e-05, "loss": 0.5221, "step": 17150 }, { "epoch": 2.799804089628995, "grad_norm": 2.7713353633880615, "learning_rate": 1.5100895666917362e-05, "loss": 0.5631, "step": 17151 }, { "epoch": 2.7999673482714993, "grad_norm": 3.1342501640319824, "learning_rate": 1.510034419710184e-05, "loss": 0.5813, "step": 17152 }, { "epoch": 2.8001306069140037, "grad_norm": 2.500974178314209, "learning_rate": 1.5099792706320726e-05, "loss": 0.4423, "step": 17153 }, { "epoch": 2.800293865556508, "grad_norm": 3.922966957092285, "learning_rate": 1.509924119457629e-05, "loss": 0.7361, "step": 17154 }, { "epoch": 2.800457124199012, "grad_norm": 2.796208143234253, "learning_rate": 1.5098689661870803e-05, "loss": 0.5459, "step": 17155 }, { "epoch": 2.8006203828415166, "grad_norm": 2.7686166763305664, "learning_rate": 1.5098138108206525e-05, "loss": 0.5404, "step": 17156 }, { "epoch": 2.800783641484021, "grad_norm": 2.837106227874756, "learning_rate": 1.5097586533585728e-05, "loss": 0.6033, "step": 17157 }, { "epoch": 2.8009469001265255, "grad_norm": 3.686452627182007, "learning_rate": 1.5097034938010677e-05, "loss": 0.7, "step": 17158 }, { "epoch": 2.80111015876903, "grad_norm": 2.7118868827819824, "learning_rate": 1.5096483321483639e-05, "loss": 0.5346, "step": 17159 }, { "epoch": 2.8012734174115343, "grad_norm": 2.2010557651519775, "learning_rate": 1.5095931684006883e-05, "loss": 0.3996, "step": 17160 }, { "epoch": 2.8014366760540383, "grad_norm": 2.79530930519104, "learning_rate": 1.5095380025582675e-05, "loss": 0.5992, "step": 17161 }, { "epoch": 2.801599934696543, "grad_norm": 2.3557846546173096, "learning_rate": 1.5094828346213289e-05, "loss": 0.4851, "step": 17162 }, { "epoch": 2.801763193339047, "grad_norm": 2.6507272720336914, "learning_rate": 1.5094276645900983e-05, "loss": 0.5001, "step": 17163 }, { "epoch": 2.8019264519815517, "grad_norm": 3.2858526706695557, "learning_rate": 1.5093724924648028e-05, "loss": 0.7074, "step": 17164 }, { "epoch": 2.802089710624056, "grad_norm": 2.223935604095459, "learning_rate": 1.5093173182456693e-05, "loss": 0.4964, "step": 17165 }, { "epoch": 2.8022529692665605, "grad_norm": 2.992885112762451, "learning_rate": 1.5092621419329247e-05, "loss": 0.5625, "step": 17166 }, { "epoch": 2.802416227909065, "grad_norm": 3.105024814605713, "learning_rate": 1.509206963526796e-05, "loss": 0.5676, "step": 17167 }, { "epoch": 2.8025794865515694, "grad_norm": 3.5812039375305176, "learning_rate": 1.5091517830275098e-05, "loss": 0.6369, "step": 17168 }, { "epoch": 2.802742745194074, "grad_norm": 2.6583709716796875, "learning_rate": 1.5090966004352926e-05, "loss": 0.5126, "step": 17169 }, { "epoch": 2.8029060038365783, "grad_norm": 3.0198752880096436, "learning_rate": 1.5090414157503715e-05, "loss": 0.596, "step": 17170 }, { "epoch": 2.8030692624790827, "grad_norm": 3.0944595336914062, "learning_rate": 1.5089862289729732e-05, "loss": 0.6572, "step": 17171 }, { "epoch": 2.8032325211215867, "grad_norm": 3.18430757522583, "learning_rate": 1.5089310401033253e-05, "loss": 0.5604, "step": 17172 }, { "epoch": 2.803395779764091, "grad_norm": 2.8431363105773926, "learning_rate": 1.5088758491416536e-05, "loss": 0.62, "step": 17173 }, { "epoch": 2.8035590384065956, "grad_norm": 3.12111496925354, "learning_rate": 1.5088206560881854e-05, "loss": 0.6744, "step": 17174 }, { "epoch": 2.8037222970491, "grad_norm": 2.61710524559021, "learning_rate": 1.5087654609431475e-05, "loss": 0.4946, "step": 17175 }, { "epoch": 2.8038855556916045, "grad_norm": 3.4191620349884033, "learning_rate": 1.5087102637067675e-05, "loss": 0.7293, "step": 17176 }, { "epoch": 2.804048814334109, "grad_norm": 2.7613697052001953, "learning_rate": 1.508655064379271e-05, "loss": 0.6284, "step": 17177 }, { "epoch": 2.8042120729766133, "grad_norm": 3.5370564460754395, "learning_rate": 1.508599862960886e-05, "loss": 0.6969, "step": 17178 }, { "epoch": 2.8043753316191173, "grad_norm": 2.93424654006958, "learning_rate": 1.5085446594518389e-05, "loss": 0.6963, "step": 17179 }, { "epoch": 2.8045385902616218, "grad_norm": 3.205453395843506, "learning_rate": 1.5084894538523567e-05, "loss": 0.7207, "step": 17180 }, { "epoch": 2.804701848904126, "grad_norm": 2.5753121376037598, "learning_rate": 1.5084342461626665e-05, "loss": 0.5473, "step": 17181 }, { "epoch": 2.8048651075466307, "grad_norm": 2.5860748291015625, "learning_rate": 1.5083790363829948e-05, "loss": 0.5537, "step": 17182 }, { "epoch": 2.805028366189135, "grad_norm": 2.9441440105438232, "learning_rate": 1.508323824513569e-05, "loss": 0.5464, "step": 17183 }, { "epoch": 2.8051916248316395, "grad_norm": 2.6536946296691895, "learning_rate": 1.508268610554616e-05, "loss": 0.5073, "step": 17184 }, { "epoch": 2.805354883474144, "grad_norm": 2.80812668800354, "learning_rate": 1.5082133945063625e-05, "loss": 0.5264, "step": 17185 }, { "epoch": 2.8055181421166484, "grad_norm": 2.519360065460205, "learning_rate": 1.5081581763690358e-05, "loss": 0.552, "step": 17186 }, { "epoch": 2.805681400759153, "grad_norm": 2.9319796562194824, "learning_rate": 1.5081029561428626e-05, "loss": 0.6144, "step": 17187 }, { "epoch": 2.8058446594016573, "grad_norm": 2.8247129917144775, "learning_rate": 1.5080477338280701e-05, "loss": 0.529, "step": 17188 }, { "epoch": 2.8060079180441617, "grad_norm": 2.7500369548797607, "learning_rate": 1.5079925094248852e-05, "loss": 0.5428, "step": 17189 }, { "epoch": 2.8061711766866657, "grad_norm": 2.4158074855804443, "learning_rate": 1.5079372829335348e-05, "loss": 0.4454, "step": 17190 }, { "epoch": 2.80633443532917, "grad_norm": 2.7835187911987305, "learning_rate": 1.507882054354246e-05, "loss": 0.5232, "step": 17191 }, { "epoch": 2.8064976939716746, "grad_norm": 3.3342154026031494, "learning_rate": 1.507826823687246e-05, "loss": 0.6535, "step": 17192 }, { "epoch": 2.806660952614179, "grad_norm": 2.8132176399230957, "learning_rate": 1.5077715909327615e-05, "loss": 0.5518, "step": 17193 }, { "epoch": 2.8068242112566835, "grad_norm": 2.402186870574951, "learning_rate": 1.5077163560910197e-05, "loss": 0.4786, "step": 17194 }, { "epoch": 2.806987469899188, "grad_norm": 2.435875177383423, "learning_rate": 1.5076611191622479e-05, "loss": 0.4704, "step": 17195 }, { "epoch": 2.8071507285416923, "grad_norm": 2.5344583988189697, "learning_rate": 1.5076058801466732e-05, "loss": 0.5342, "step": 17196 }, { "epoch": 2.8073139871841963, "grad_norm": 2.932302951812744, "learning_rate": 1.5075506390445215e-05, "loss": 0.6664, "step": 17197 }, { "epoch": 2.8074772458267008, "grad_norm": 2.5727992057800293, "learning_rate": 1.5074953958560214e-05, "loss": 0.4694, "step": 17198 }, { "epoch": 2.807640504469205, "grad_norm": 2.883256435394287, "learning_rate": 1.5074401505813993e-05, "loss": 0.6099, "step": 17199 }, { "epoch": 2.8078037631117096, "grad_norm": 2.927917718887329, "learning_rate": 1.5073849032208823e-05, "loss": 0.6148, "step": 17200 }, { "epoch": 2.807967021754214, "grad_norm": 3.2653520107269287, "learning_rate": 1.5073296537746975e-05, "loss": 0.6612, "step": 17201 }, { "epoch": 2.8081302803967185, "grad_norm": 2.6475448608398438, "learning_rate": 1.5072744022430721e-05, "loss": 0.5147, "step": 17202 }, { "epoch": 2.808293539039223, "grad_norm": 3.1039018630981445, "learning_rate": 1.5072191486262333e-05, "loss": 0.5806, "step": 17203 }, { "epoch": 2.8084567976817274, "grad_norm": 2.70554518699646, "learning_rate": 1.5071638929244077e-05, "loss": 0.5062, "step": 17204 }, { "epoch": 2.808620056324232, "grad_norm": 3.1084961891174316, "learning_rate": 1.5071086351378232e-05, "loss": 0.5358, "step": 17205 }, { "epoch": 2.8087833149667363, "grad_norm": 2.743617296218872, "learning_rate": 1.5070533752667068e-05, "loss": 0.5264, "step": 17206 }, { "epoch": 2.8089465736092407, "grad_norm": 2.6954164505004883, "learning_rate": 1.506998113311285e-05, "loss": 0.5631, "step": 17207 }, { "epoch": 2.8091098322517447, "grad_norm": 2.7392990589141846, "learning_rate": 1.5069428492717854e-05, "loss": 0.5044, "step": 17208 }, { "epoch": 2.809273090894249, "grad_norm": 2.2457239627838135, "learning_rate": 1.5068875831484354e-05, "loss": 0.4243, "step": 17209 }, { "epoch": 2.8094363495367536, "grad_norm": 2.570758819580078, "learning_rate": 1.5068323149414617e-05, "loss": 0.5631, "step": 17210 }, { "epoch": 2.809599608179258, "grad_norm": 3.162783145904541, "learning_rate": 1.5067770446510919e-05, "loss": 0.7145, "step": 17211 }, { "epoch": 2.8097628668217625, "grad_norm": 2.925476551055908, "learning_rate": 1.5067217722775532e-05, "loss": 0.6267, "step": 17212 }, { "epoch": 2.809926125464267, "grad_norm": 3.300356388092041, "learning_rate": 1.5066664978210723e-05, "loss": 0.6188, "step": 17213 }, { "epoch": 2.810089384106771, "grad_norm": 2.690899133682251, "learning_rate": 1.5066112212818767e-05, "loss": 0.5456, "step": 17214 }, { "epoch": 2.8102526427492753, "grad_norm": 2.6054632663726807, "learning_rate": 1.5065559426601937e-05, "loss": 0.5848, "step": 17215 }, { "epoch": 2.8104159013917798, "grad_norm": 2.530275821685791, "learning_rate": 1.5065006619562507e-05, "loss": 0.5415, "step": 17216 }, { "epoch": 2.810579160034284, "grad_norm": 3.3332087993621826, "learning_rate": 1.5064453791702747e-05, "loss": 0.6857, "step": 17217 }, { "epoch": 2.8107424186767886, "grad_norm": 2.7526423931121826, "learning_rate": 1.5063900943024931e-05, "loss": 0.4556, "step": 17218 }, { "epoch": 2.810905677319293, "grad_norm": 2.948793888092041, "learning_rate": 1.5063348073531325e-05, "loss": 0.6111, "step": 17219 }, { "epoch": 2.8110689359617975, "grad_norm": 3.1360836029052734, "learning_rate": 1.5062795183224213e-05, "loss": 0.5997, "step": 17220 }, { "epoch": 2.811232194604302, "grad_norm": 2.7314560413360596, "learning_rate": 1.5062242272105861e-05, "loss": 0.6134, "step": 17221 }, { "epoch": 2.8113954532468064, "grad_norm": 2.568101167678833, "learning_rate": 1.506168934017854e-05, "loss": 0.54, "step": 17222 }, { "epoch": 2.811558711889311, "grad_norm": 2.8017499446868896, "learning_rate": 1.5061136387444526e-05, "loss": 0.5079, "step": 17223 }, { "epoch": 2.8117219705318153, "grad_norm": 2.446043014526367, "learning_rate": 1.5060583413906092e-05, "loss": 0.4844, "step": 17224 }, { "epoch": 2.8118852291743193, "grad_norm": 2.836674690246582, "learning_rate": 1.5060030419565507e-05, "loss": 0.6085, "step": 17225 }, { "epoch": 2.8120484878168237, "grad_norm": 2.312990665435791, "learning_rate": 1.5059477404425051e-05, "loss": 0.492, "step": 17226 }, { "epoch": 2.812211746459328, "grad_norm": 3.613452672958374, "learning_rate": 1.5058924368486994e-05, "loss": 0.587, "step": 17227 }, { "epoch": 2.8123750051018326, "grad_norm": 3.065977096557617, "learning_rate": 1.5058371311753607e-05, "loss": 0.6171, "step": 17228 }, { "epoch": 2.812538263744337, "grad_norm": 2.583122491836548, "learning_rate": 1.5057818234227166e-05, "loss": 0.5098, "step": 17229 }, { "epoch": 2.8127015223868415, "grad_norm": 2.5830512046813965, "learning_rate": 1.5057265135909946e-05, "loss": 0.5008, "step": 17230 }, { "epoch": 2.812864781029346, "grad_norm": 2.6887853145599365, "learning_rate": 1.5056712016804218e-05, "loss": 0.5015, "step": 17231 }, { "epoch": 2.81302803967185, "grad_norm": 2.654993772506714, "learning_rate": 1.5056158876912256e-05, "loss": 0.4868, "step": 17232 }, { "epoch": 2.8131912983143543, "grad_norm": 2.954354763031006, "learning_rate": 1.5055605716236334e-05, "loss": 0.6463, "step": 17233 }, { "epoch": 2.8133545569568588, "grad_norm": 2.8549537658691406, "learning_rate": 1.5055052534778725e-05, "loss": 0.5813, "step": 17234 }, { "epoch": 2.813517815599363, "grad_norm": 2.407156467437744, "learning_rate": 1.5054499332541705e-05, "loss": 0.531, "step": 17235 }, { "epoch": 2.8136810742418676, "grad_norm": 2.6985557079315186, "learning_rate": 1.5053946109527546e-05, "loss": 0.5136, "step": 17236 }, { "epoch": 2.813844332884372, "grad_norm": 3.0378952026367188, "learning_rate": 1.5053392865738523e-05, "loss": 0.6146, "step": 17237 }, { "epoch": 2.8140075915268765, "grad_norm": 2.647038459777832, "learning_rate": 1.5052839601176909e-05, "loss": 0.5406, "step": 17238 }, { "epoch": 2.814170850169381, "grad_norm": 2.8509483337402344, "learning_rate": 1.5052286315844981e-05, "loss": 0.5549, "step": 17239 }, { "epoch": 2.8143341088118854, "grad_norm": 2.5442144870758057, "learning_rate": 1.5051733009745013e-05, "loss": 0.4809, "step": 17240 }, { "epoch": 2.81449736745439, "grad_norm": 2.879300594329834, "learning_rate": 1.5051179682879275e-05, "loss": 0.5887, "step": 17241 }, { "epoch": 2.8146606260968943, "grad_norm": 2.9529831409454346, "learning_rate": 1.5050626335250047e-05, "loss": 0.5223, "step": 17242 }, { "epoch": 2.8148238847393983, "grad_norm": 2.857556104660034, "learning_rate": 1.5050072966859598e-05, "loss": 0.56, "step": 17243 }, { "epoch": 2.8149871433819027, "grad_norm": 2.7820560932159424, "learning_rate": 1.504951957771021e-05, "loss": 0.5595, "step": 17244 }, { "epoch": 2.815150402024407, "grad_norm": 2.811936616897583, "learning_rate": 1.5048966167804154e-05, "loss": 0.5128, "step": 17245 }, { "epoch": 2.8153136606669116, "grad_norm": 2.532238483428955, "learning_rate": 1.5048412737143698e-05, "loss": 0.5298, "step": 17246 }, { "epoch": 2.815476919309416, "grad_norm": 3.1507339477539062, "learning_rate": 1.5047859285731128e-05, "loss": 0.6449, "step": 17247 }, { "epoch": 2.8156401779519205, "grad_norm": 2.751556396484375, "learning_rate": 1.5047305813568716e-05, "loss": 0.538, "step": 17248 }, { "epoch": 2.8158034365944244, "grad_norm": 2.9078147411346436, "learning_rate": 1.5046752320658734e-05, "loss": 0.531, "step": 17249 }, { "epoch": 2.815966695236929, "grad_norm": 3.356280565261841, "learning_rate": 1.504619880700346e-05, "loss": 0.6112, "step": 17250 }, { "epoch": 2.8161299538794333, "grad_norm": 2.793596029281616, "learning_rate": 1.5045645272605166e-05, "loss": 0.4915, "step": 17251 }, { "epoch": 2.8162932125219378, "grad_norm": 2.8055520057678223, "learning_rate": 1.5045091717466127e-05, "loss": 0.5653, "step": 17252 }, { "epoch": 2.816456471164442, "grad_norm": 3.0447113513946533, "learning_rate": 1.5044538141588625e-05, "loss": 0.5819, "step": 17253 }, { "epoch": 2.8166197298069466, "grad_norm": 2.4808435440063477, "learning_rate": 1.5043984544974932e-05, "loss": 0.551, "step": 17254 }, { "epoch": 2.816782988449451, "grad_norm": 2.9464147090911865, "learning_rate": 1.5043430927627324e-05, "loss": 0.6059, "step": 17255 }, { "epoch": 2.8169462470919555, "grad_norm": 2.8067727088928223, "learning_rate": 1.5042877289548072e-05, "loss": 0.5004, "step": 17256 }, { "epoch": 2.81710950573446, "grad_norm": 3.143589735031128, "learning_rate": 1.5042323630739457e-05, "loss": 0.6153, "step": 17257 }, { "epoch": 2.8172727643769644, "grad_norm": 2.528974771499634, "learning_rate": 1.5041769951203753e-05, "loss": 0.4837, "step": 17258 }, { "epoch": 2.817436023019469, "grad_norm": 2.4824602603912354, "learning_rate": 1.5041216250943236e-05, "loss": 0.4822, "step": 17259 }, { "epoch": 2.817599281661973, "grad_norm": 2.445284366607666, "learning_rate": 1.5040662529960188e-05, "loss": 0.5169, "step": 17260 }, { "epoch": 2.8177625403044773, "grad_norm": 2.8714895248413086, "learning_rate": 1.5040108788256876e-05, "loss": 0.4982, "step": 17261 }, { "epoch": 2.8179257989469817, "grad_norm": 3.0303311347961426, "learning_rate": 1.5039555025835577e-05, "loss": 0.5765, "step": 17262 }, { "epoch": 2.818089057589486, "grad_norm": 3.908945083618164, "learning_rate": 1.503900124269857e-05, "loss": 0.6414, "step": 17263 }, { "epoch": 2.8182523162319906, "grad_norm": 3.048600196838379, "learning_rate": 1.5038447438848134e-05, "loss": 0.5411, "step": 17264 }, { "epoch": 2.818415574874495, "grad_norm": 3.12463116645813, "learning_rate": 1.5037893614286545e-05, "loss": 0.5278, "step": 17265 }, { "epoch": 2.8185788335169994, "grad_norm": 2.6882553100585938, "learning_rate": 1.5037339769016077e-05, "loss": 0.4959, "step": 17266 }, { "epoch": 2.8187420921595034, "grad_norm": 2.9654669761657715, "learning_rate": 1.5036785903039007e-05, "loss": 0.6229, "step": 17267 }, { "epoch": 2.818905350802008, "grad_norm": 3.4696619510650635, "learning_rate": 1.503623201635761e-05, "loss": 0.6862, "step": 17268 }, { "epoch": 2.8190686094445123, "grad_norm": 2.810877799987793, "learning_rate": 1.5035678108974167e-05, "loss": 0.5407, "step": 17269 }, { "epoch": 2.8192318680870168, "grad_norm": 2.993072986602783, "learning_rate": 1.5035124180890952e-05, "loss": 0.6054, "step": 17270 }, { "epoch": 2.819395126729521, "grad_norm": 3.4076526165008545, "learning_rate": 1.5034570232110241e-05, "loss": 0.6826, "step": 17271 }, { "epoch": 2.8195583853720256, "grad_norm": 2.7304270267486572, "learning_rate": 1.5034016262634314e-05, "loss": 0.5978, "step": 17272 }, { "epoch": 2.81972164401453, "grad_norm": 3.5159761905670166, "learning_rate": 1.503346227246545e-05, "loss": 0.7339, "step": 17273 }, { "epoch": 2.8198849026570345, "grad_norm": 3.270688772201538, "learning_rate": 1.5032908261605919e-05, "loss": 0.577, "step": 17274 }, { "epoch": 2.820048161299539, "grad_norm": 3.3934059143066406, "learning_rate": 1.5032354230058004e-05, "loss": 0.6335, "step": 17275 }, { "epoch": 2.8202114199420434, "grad_norm": 2.666013717651367, "learning_rate": 1.5031800177823978e-05, "loss": 0.596, "step": 17276 }, { "epoch": 2.820374678584548, "grad_norm": 2.5050814151763916, "learning_rate": 1.5031246104906125e-05, "loss": 0.4833, "step": 17277 }, { "epoch": 2.820537937227052, "grad_norm": 2.8982651233673096, "learning_rate": 1.5030692011306717e-05, "loss": 0.5867, "step": 17278 }, { "epoch": 2.8207011958695563, "grad_norm": 2.7485151290893555, "learning_rate": 1.5030137897028037e-05, "loss": 0.5104, "step": 17279 }, { "epoch": 2.8208644545120607, "grad_norm": 3.0604262351989746, "learning_rate": 1.5029583762072357e-05, "loss": 0.5182, "step": 17280 }, { "epoch": 2.821027713154565, "grad_norm": 3.553943157196045, "learning_rate": 1.5029029606441959e-05, "loss": 0.6, "step": 17281 }, { "epoch": 2.8211909717970696, "grad_norm": 2.606607675552368, "learning_rate": 1.5028475430139118e-05, "loss": 0.5052, "step": 17282 }, { "epoch": 2.821354230439574, "grad_norm": 2.6410744190216064, "learning_rate": 1.5027921233166112e-05, "loss": 0.479, "step": 17283 }, { "epoch": 2.8215174890820784, "grad_norm": 3.1382923126220703, "learning_rate": 1.5027367015525225e-05, "loss": 0.6159, "step": 17284 }, { "epoch": 2.8216807477245824, "grad_norm": 3.066896438598633, "learning_rate": 1.5026812777218722e-05, "loss": 0.5794, "step": 17285 }, { "epoch": 2.821844006367087, "grad_norm": 2.647094488143921, "learning_rate": 1.5026258518248895e-05, "loss": 0.5036, "step": 17286 }, { "epoch": 2.8220072650095913, "grad_norm": 2.2797977924346924, "learning_rate": 1.5025704238618018e-05, "loss": 0.5085, "step": 17287 }, { "epoch": 2.8221705236520958, "grad_norm": 2.6774728298187256, "learning_rate": 1.5025149938328368e-05, "loss": 0.5138, "step": 17288 }, { "epoch": 2.8223337822946, "grad_norm": 2.519569158554077, "learning_rate": 1.5024595617382225e-05, "loss": 0.5051, "step": 17289 }, { "epoch": 2.8224970409371046, "grad_norm": 2.823615789413452, "learning_rate": 1.5024041275781862e-05, "loss": 0.556, "step": 17290 }, { "epoch": 2.822660299579609, "grad_norm": 2.902364730834961, "learning_rate": 1.5023486913529568e-05, "loss": 0.5899, "step": 17291 }, { "epoch": 2.8228235582221135, "grad_norm": 3.023822069168091, "learning_rate": 1.5022932530627614e-05, "loss": 0.6317, "step": 17292 }, { "epoch": 2.822986816864618, "grad_norm": 2.3143327236175537, "learning_rate": 1.5022378127078278e-05, "loss": 0.5087, "step": 17293 }, { "epoch": 2.8231500755071224, "grad_norm": 2.8168866634368896, "learning_rate": 1.5021823702883849e-05, "loss": 0.5681, "step": 17294 }, { "epoch": 2.823313334149627, "grad_norm": 2.669171094894409, "learning_rate": 1.5021269258046593e-05, "loss": 0.5563, "step": 17295 }, { "epoch": 2.823476592792131, "grad_norm": 2.8244996070861816, "learning_rate": 1.5020714792568797e-05, "loss": 0.5098, "step": 17296 }, { "epoch": 2.8236398514346353, "grad_norm": 2.625375986099243, "learning_rate": 1.5020160306452735e-05, "loss": 0.5356, "step": 17297 }, { "epoch": 2.8238031100771397, "grad_norm": 2.901297092437744, "learning_rate": 1.5019605799700693e-05, "loss": 0.5789, "step": 17298 }, { "epoch": 2.823966368719644, "grad_norm": 3.497474193572998, "learning_rate": 1.5019051272314949e-05, "loss": 0.6321, "step": 17299 }, { "epoch": 2.8241296273621486, "grad_norm": 2.8071494102478027, "learning_rate": 1.5018496724297778e-05, "loss": 0.5899, "step": 17300 }, { "epoch": 2.824292886004653, "grad_norm": 3.0171144008636475, "learning_rate": 1.5017942155651461e-05, "loss": 0.5845, "step": 17301 }, { "epoch": 2.824456144647157, "grad_norm": 2.606834888458252, "learning_rate": 1.5017387566378276e-05, "loss": 0.4821, "step": 17302 }, { "epoch": 2.8246194032896614, "grad_norm": 2.996090888977051, "learning_rate": 1.501683295648051e-05, "loss": 0.5904, "step": 17303 }, { "epoch": 2.824782661932166, "grad_norm": 3.022848606109619, "learning_rate": 1.5016278325960439e-05, "loss": 0.5875, "step": 17304 }, { "epoch": 2.8249459205746703, "grad_norm": 3.049734115600586, "learning_rate": 1.501572367482034e-05, "loss": 0.5226, "step": 17305 }, { "epoch": 2.8251091792171747, "grad_norm": 2.2523887157440186, "learning_rate": 1.5015169003062493e-05, "loss": 0.4415, "step": 17306 }, { "epoch": 2.825272437859679, "grad_norm": 2.583075523376465, "learning_rate": 1.5014614310689181e-05, "loss": 0.4917, "step": 17307 }, { "epoch": 2.8254356965021836, "grad_norm": 3.515371799468994, "learning_rate": 1.5014059597702682e-05, "loss": 0.7223, "step": 17308 }, { "epoch": 2.825598955144688, "grad_norm": 2.791212558746338, "learning_rate": 1.501350486410528e-05, "loss": 0.6272, "step": 17309 }, { "epoch": 2.8257622137871925, "grad_norm": 2.6666250228881836, "learning_rate": 1.5012950109899252e-05, "loss": 0.5646, "step": 17310 }, { "epoch": 2.825925472429697, "grad_norm": 2.4654417037963867, "learning_rate": 1.5012395335086878e-05, "loss": 0.5211, "step": 17311 }, { "epoch": 2.8260887310722014, "grad_norm": 2.7339296340942383, "learning_rate": 1.5011840539670439e-05, "loss": 0.615, "step": 17312 }, { "epoch": 2.8262519897147054, "grad_norm": 2.538212299346924, "learning_rate": 1.5011285723652216e-05, "loss": 0.5149, "step": 17313 }, { "epoch": 2.82641524835721, "grad_norm": 3.375265598297119, "learning_rate": 1.501073088703449e-05, "loss": 0.5735, "step": 17314 }, { "epoch": 2.8265785069997142, "grad_norm": 2.8037314414978027, "learning_rate": 1.5010176029819543e-05, "loss": 0.5647, "step": 17315 }, { "epoch": 2.8267417656422187, "grad_norm": 2.828336477279663, "learning_rate": 1.5009621152009653e-05, "loss": 0.5834, "step": 17316 }, { "epoch": 2.826905024284723, "grad_norm": 2.745077610015869, "learning_rate": 1.5009066253607101e-05, "loss": 0.5215, "step": 17317 }, { "epoch": 2.8270682829272276, "grad_norm": 2.7972352504730225, "learning_rate": 1.5008511334614171e-05, "loss": 0.5635, "step": 17318 }, { "epoch": 2.827231541569732, "grad_norm": 2.5626471042633057, "learning_rate": 1.5007956395033138e-05, "loss": 0.5968, "step": 17319 }, { "epoch": 2.827394800212236, "grad_norm": 3.4034552574157715, "learning_rate": 1.500740143486629e-05, "loss": 0.5985, "step": 17320 }, { "epoch": 2.8275580588547404, "grad_norm": 2.3715906143188477, "learning_rate": 1.5006846454115906e-05, "loss": 0.4584, "step": 17321 }, { "epoch": 2.827721317497245, "grad_norm": 2.737243413925171, "learning_rate": 1.5006291452784267e-05, "loss": 0.4956, "step": 17322 }, { "epoch": 2.8278845761397493, "grad_norm": 3.2224831581115723, "learning_rate": 1.5005736430873654e-05, "loss": 0.5795, "step": 17323 }, { "epoch": 2.8280478347822537, "grad_norm": 2.8144965171813965, "learning_rate": 1.5005181388386348e-05, "loss": 0.6347, "step": 17324 }, { "epoch": 2.828211093424758, "grad_norm": 2.9819374084472656, "learning_rate": 1.5004626325324631e-05, "loss": 0.4933, "step": 17325 }, { "epoch": 2.8283743520672626, "grad_norm": 2.5481791496276855, "learning_rate": 1.5004071241690785e-05, "loss": 0.5762, "step": 17326 }, { "epoch": 2.828537610709767, "grad_norm": 2.8841495513916016, "learning_rate": 1.5003516137487091e-05, "loss": 0.5915, "step": 17327 }, { "epoch": 2.8287008693522715, "grad_norm": 2.6575522422790527, "learning_rate": 1.5002961012715835e-05, "loss": 0.5544, "step": 17328 }, { "epoch": 2.828864127994776, "grad_norm": 3.24993634223938, "learning_rate": 1.5002405867379291e-05, "loss": 0.6923, "step": 17329 }, { "epoch": 2.8290273866372804, "grad_norm": 3.2335007190704346, "learning_rate": 1.5001850701479747e-05, "loss": 0.5508, "step": 17330 }, { "epoch": 2.8291906452797844, "grad_norm": 2.836860179901123, "learning_rate": 1.5001295515019486e-05, "loss": 0.5843, "step": 17331 }, { "epoch": 2.829353903922289, "grad_norm": 3.2116150856018066, "learning_rate": 1.5000740308000783e-05, "loss": 0.6621, "step": 17332 }, { "epoch": 2.8295171625647932, "grad_norm": 2.574777126312256, "learning_rate": 1.5000185080425929e-05, "loss": 0.4785, "step": 17333 }, { "epoch": 2.8296804212072977, "grad_norm": 2.44332218170166, "learning_rate": 1.49996298322972e-05, "loss": 0.4581, "step": 17334 }, { "epoch": 2.829843679849802, "grad_norm": 2.792653799057007, "learning_rate": 1.4999074563616877e-05, "loss": 0.5767, "step": 17335 }, { "epoch": 2.8300069384923066, "grad_norm": 2.7633862495422363, "learning_rate": 1.499851927438725e-05, "loss": 0.5456, "step": 17336 }, { "epoch": 2.8301701971348106, "grad_norm": 3.073316812515259, "learning_rate": 1.4997963964610598e-05, "loss": 0.6477, "step": 17337 }, { "epoch": 2.830333455777315, "grad_norm": 3.5716772079467773, "learning_rate": 1.4997408634289206e-05, "loss": 0.7131, "step": 17338 }, { "epoch": 2.8304967144198194, "grad_norm": 2.9921138286590576, "learning_rate": 1.499685328342535e-05, "loss": 0.6451, "step": 17339 }, { "epoch": 2.830659973062324, "grad_norm": 3.0640931129455566, "learning_rate": 1.4996297912021316e-05, "loss": 0.6428, "step": 17340 }, { "epoch": 2.8308232317048283, "grad_norm": 2.9937500953674316, "learning_rate": 1.4995742520079391e-05, "loss": 0.6487, "step": 17341 }, { "epoch": 2.8309864903473327, "grad_norm": 2.779780864715576, "learning_rate": 1.4995187107601852e-05, "loss": 0.469, "step": 17342 }, { "epoch": 2.831149748989837, "grad_norm": 2.965606927871704, "learning_rate": 1.4994631674590987e-05, "loss": 0.6293, "step": 17343 }, { "epoch": 2.8313130076323416, "grad_norm": 3.2204294204711914, "learning_rate": 1.4994076221049079e-05, "loss": 0.5684, "step": 17344 }, { "epoch": 2.831476266274846, "grad_norm": 3.1415905952453613, "learning_rate": 1.4993520746978405e-05, "loss": 0.6363, "step": 17345 }, { "epoch": 2.8316395249173505, "grad_norm": 2.6887850761413574, "learning_rate": 1.4992965252381254e-05, "loss": 0.4559, "step": 17346 }, { "epoch": 2.831802783559855, "grad_norm": 3.1393048763275146, "learning_rate": 1.4992409737259907e-05, "loss": 0.6199, "step": 17347 }, { "epoch": 2.831966042202359, "grad_norm": 2.2617290019989014, "learning_rate": 1.4991854201616654e-05, "loss": 0.4531, "step": 17348 }, { "epoch": 2.8321293008448634, "grad_norm": 2.9871792793273926, "learning_rate": 1.4991298645453769e-05, "loss": 0.6235, "step": 17349 }, { "epoch": 2.832292559487368, "grad_norm": 2.7526462078094482, "learning_rate": 1.499074306877354e-05, "loss": 0.4989, "step": 17350 }, { "epoch": 2.8324558181298722, "grad_norm": 2.8944449424743652, "learning_rate": 1.4990187471578252e-05, "loss": 0.5617, "step": 17351 }, { "epoch": 2.8326190767723767, "grad_norm": 2.8817896842956543, "learning_rate": 1.4989631853870186e-05, "loss": 0.5957, "step": 17352 }, { "epoch": 2.832782335414881, "grad_norm": 2.598083257675171, "learning_rate": 1.498907621565163e-05, "loss": 0.5876, "step": 17353 }, { "epoch": 2.8329455940573856, "grad_norm": 3.272806406021118, "learning_rate": 1.4988520556924864e-05, "loss": 1.1418, "step": 17354 }, { "epoch": 2.8331088526998895, "grad_norm": 2.7943520545959473, "learning_rate": 1.4987964877692173e-05, "loss": 0.5616, "step": 17355 }, { "epoch": 2.833272111342394, "grad_norm": 3.008065938949585, "learning_rate": 1.4987409177955843e-05, "loss": 0.587, "step": 17356 }, { "epoch": 2.8334353699848984, "grad_norm": 2.5645804405212402, "learning_rate": 1.4986853457718158e-05, "loss": 0.5584, "step": 17357 }, { "epoch": 2.833598628627403, "grad_norm": 3.456773519515991, "learning_rate": 1.49862977169814e-05, "loss": 0.6908, "step": 17358 }, { "epoch": 2.8337618872699073, "grad_norm": 2.917261838912964, "learning_rate": 1.4985741955747854e-05, "loss": 0.5432, "step": 17359 }, { "epoch": 2.8339251459124117, "grad_norm": 2.5513012409210205, "learning_rate": 1.4985186174019805e-05, "loss": 0.5926, "step": 17360 }, { "epoch": 2.834088404554916, "grad_norm": 2.9155642986297607, "learning_rate": 1.498463037179954e-05, "loss": 0.6007, "step": 17361 }, { "epoch": 2.8342516631974206, "grad_norm": 2.986591100692749, "learning_rate": 1.4984074549089342e-05, "loss": 0.6071, "step": 17362 }, { "epoch": 2.834414921839925, "grad_norm": 2.460721969604492, "learning_rate": 1.4983518705891494e-05, "loss": 0.5206, "step": 17363 }, { "epoch": 2.8345781804824295, "grad_norm": 2.952040910720825, "learning_rate": 1.4982962842208285e-05, "loss": 0.5258, "step": 17364 }, { "epoch": 2.834741439124934, "grad_norm": 2.455471992492676, "learning_rate": 1.4982406958041992e-05, "loss": 0.5083, "step": 17365 }, { "epoch": 2.834904697767438, "grad_norm": 2.7035226821899414, "learning_rate": 1.498185105339491e-05, "loss": 0.5406, "step": 17366 }, { "epoch": 2.8350679564099424, "grad_norm": 3.0252623558044434, "learning_rate": 1.4981295128269318e-05, "loss": 0.6341, "step": 17367 }, { "epoch": 2.835231215052447, "grad_norm": 3.114839792251587, "learning_rate": 1.4980739182667502e-05, "loss": 0.5828, "step": 17368 }, { "epoch": 2.8353944736949512, "grad_norm": 2.872089385986328, "learning_rate": 1.4980183216591748e-05, "loss": 0.5187, "step": 17369 }, { "epoch": 2.8355577323374557, "grad_norm": 2.876797914505005, "learning_rate": 1.4979627230044342e-05, "loss": 0.517, "step": 17370 }, { "epoch": 2.83572099097996, "grad_norm": 3.093191623687744, "learning_rate": 1.4979071223027567e-05, "loss": 0.639, "step": 17371 }, { "epoch": 2.8358842496224645, "grad_norm": 2.8950092792510986, "learning_rate": 1.4978515195543716e-05, "loss": 0.5808, "step": 17372 }, { "epoch": 2.8360475082649685, "grad_norm": 3.4633665084838867, "learning_rate": 1.4977959147595063e-05, "loss": 0.698, "step": 17373 }, { "epoch": 2.836210766907473, "grad_norm": 2.761594772338867, "learning_rate": 1.49774030791839e-05, "loss": 0.5813, "step": 17374 }, { "epoch": 2.8363740255499774, "grad_norm": 3.6600940227508545, "learning_rate": 1.4976846990312515e-05, "loss": 0.6656, "step": 17375 }, { "epoch": 2.836537284192482, "grad_norm": 2.867968797683716, "learning_rate": 1.4976290880983188e-05, "loss": 0.5828, "step": 17376 }, { "epoch": 2.8367005428349863, "grad_norm": 3.2738053798675537, "learning_rate": 1.4975734751198212e-05, "loss": 0.6175, "step": 17377 }, { "epoch": 2.8368638014774907, "grad_norm": 2.7298247814178467, "learning_rate": 1.4975178600959868e-05, "loss": 0.5495, "step": 17378 }, { "epoch": 2.837027060119995, "grad_norm": 2.8499932289123535, "learning_rate": 1.497462243027044e-05, "loss": 0.501, "step": 17379 }, { "epoch": 2.8371903187624996, "grad_norm": 2.852038860321045, "learning_rate": 1.497406623913222e-05, "loss": 0.5844, "step": 17380 }, { "epoch": 2.837353577405004, "grad_norm": 2.991112232208252, "learning_rate": 1.4973510027547492e-05, "loss": 0.5742, "step": 17381 }, { "epoch": 2.8375168360475085, "grad_norm": 2.2654471397399902, "learning_rate": 1.4972953795518545e-05, "loss": 0.4518, "step": 17382 }, { "epoch": 2.837680094690013, "grad_norm": 3.0576605796813965, "learning_rate": 1.497239754304766e-05, "loss": 0.5065, "step": 17383 }, { "epoch": 2.837843353332517, "grad_norm": 2.458000898361206, "learning_rate": 1.4971841270137125e-05, "loss": 0.5193, "step": 17384 }, { "epoch": 2.8380066119750214, "grad_norm": 3.0618133544921875, "learning_rate": 1.497128497678923e-05, "loss": 0.5733, "step": 17385 }, { "epoch": 2.838169870617526, "grad_norm": 2.613971710205078, "learning_rate": 1.4970728663006258e-05, "loss": 0.4609, "step": 17386 }, { "epoch": 2.8383331292600302, "grad_norm": 2.812143325805664, "learning_rate": 1.49701723287905e-05, "loss": 0.5574, "step": 17387 }, { "epoch": 2.8384963879025347, "grad_norm": 2.614567279815674, "learning_rate": 1.4969615974144238e-05, "loss": 0.5901, "step": 17388 }, { "epoch": 2.838659646545039, "grad_norm": 2.789696455001831, "learning_rate": 1.4969059599069763e-05, "loss": 0.5213, "step": 17389 }, { "epoch": 2.838822905187543, "grad_norm": 3.551560163497925, "learning_rate": 1.4968503203569355e-05, "loss": 0.6709, "step": 17390 }, { "epoch": 2.8389861638300475, "grad_norm": 2.5054197311401367, "learning_rate": 1.4967946787645311e-05, "loss": 0.4896, "step": 17391 }, { "epoch": 2.839149422472552, "grad_norm": 2.479792356491089, "learning_rate": 1.4967390351299916e-05, "loss": 0.433, "step": 17392 }, { "epoch": 2.8393126811150564, "grad_norm": 3.7463064193725586, "learning_rate": 1.496683389453545e-05, "loss": 0.6392, "step": 17393 }, { "epoch": 2.839475939757561, "grad_norm": 3.377534866333008, "learning_rate": 1.4966277417354207e-05, "loss": 0.6373, "step": 17394 }, { "epoch": 2.8396391984000653, "grad_norm": 2.5454299449920654, "learning_rate": 1.4965720919758472e-05, "loss": 0.5123, "step": 17395 }, { "epoch": 2.8398024570425697, "grad_norm": 2.840700387954712, "learning_rate": 1.4965164401750534e-05, "loss": 0.5687, "step": 17396 }, { "epoch": 2.839965715685074, "grad_norm": 2.993060350418091, "learning_rate": 1.4964607863332683e-05, "loss": 0.5233, "step": 17397 }, { "epoch": 2.8401289743275786, "grad_norm": 2.3349387645721436, "learning_rate": 1.4964051304507198e-05, "loss": 0.5194, "step": 17398 }, { "epoch": 2.840292232970083, "grad_norm": 2.916910171508789, "learning_rate": 1.4963494725276378e-05, "loss": 0.5891, "step": 17399 }, { "epoch": 2.8404554916125875, "grad_norm": 3.1304569244384766, "learning_rate": 1.4962938125642504e-05, "loss": 0.6269, "step": 17400 }, { "epoch": 2.8406187502550915, "grad_norm": 3.6183812618255615, "learning_rate": 1.4962381505607862e-05, "loss": 0.6837, "step": 17401 }, { "epoch": 2.840782008897596, "grad_norm": 2.8576691150665283, "learning_rate": 1.4961824865174745e-05, "loss": 0.5078, "step": 17402 }, { "epoch": 2.8409452675401003, "grad_norm": 2.881162166595459, "learning_rate": 1.4961268204345441e-05, "loss": 0.6013, "step": 17403 }, { "epoch": 2.841108526182605, "grad_norm": 3.133490562438965, "learning_rate": 1.4960711523122236e-05, "loss": 0.5759, "step": 17404 }, { "epoch": 2.8412717848251092, "grad_norm": 2.9606430530548096, "learning_rate": 1.4960154821507418e-05, "loss": 0.5508, "step": 17405 }, { "epoch": 2.8414350434676137, "grad_norm": 2.7508490085601807, "learning_rate": 1.4959598099503276e-05, "loss": 0.5824, "step": 17406 }, { "epoch": 2.841598302110118, "grad_norm": 3.224848747253418, "learning_rate": 1.49590413571121e-05, "loss": 0.6724, "step": 17407 }, { "epoch": 2.841761560752622, "grad_norm": 2.0456182956695557, "learning_rate": 1.495848459433618e-05, "loss": 0.4263, "step": 17408 }, { "epoch": 2.8419248193951265, "grad_norm": 2.8468034267425537, "learning_rate": 1.4957927811177799e-05, "loss": 0.5602, "step": 17409 }, { "epoch": 2.842088078037631, "grad_norm": 3.009845733642578, "learning_rate": 1.495737100763925e-05, "loss": 0.5896, "step": 17410 }, { "epoch": 2.8422513366801354, "grad_norm": 3.03989839553833, "learning_rate": 1.495681418372282e-05, "loss": 0.6115, "step": 17411 }, { "epoch": 2.84241459532264, "grad_norm": 3.3689725399017334, "learning_rate": 1.4956257339430798e-05, "loss": 0.7296, "step": 17412 }, { "epoch": 2.8425778539651443, "grad_norm": 3.0156514644622803, "learning_rate": 1.4955700474765475e-05, "loss": 0.6181, "step": 17413 }, { "epoch": 2.8427411126076487, "grad_norm": 3.3032896518707275, "learning_rate": 1.4955143589729137e-05, "loss": 0.5835, "step": 17414 }, { "epoch": 2.842904371250153, "grad_norm": 2.818128824234009, "learning_rate": 1.4954586684324077e-05, "loss": 0.529, "step": 17415 }, { "epoch": 2.8430676298926576, "grad_norm": 3.3521170616149902, "learning_rate": 1.4954029758552582e-05, "loss": 0.6931, "step": 17416 }, { "epoch": 2.843230888535162, "grad_norm": 3.2407517433166504, "learning_rate": 1.4953472812416939e-05, "loss": 0.7304, "step": 17417 }, { "epoch": 2.8433941471776665, "grad_norm": 2.413970947265625, "learning_rate": 1.4952915845919441e-05, "loss": 0.5438, "step": 17418 }, { "epoch": 2.8435574058201705, "grad_norm": 2.961177349090576, "learning_rate": 1.4952358859062375e-05, "loss": 0.625, "step": 17419 }, { "epoch": 2.843720664462675, "grad_norm": 2.6786768436431885, "learning_rate": 1.495180185184803e-05, "loss": 0.5565, "step": 17420 }, { "epoch": 2.8438839231051793, "grad_norm": 2.6051011085510254, "learning_rate": 1.4951244824278704e-05, "loss": 0.5438, "step": 17421 }, { "epoch": 2.844047181747684, "grad_norm": 2.6537983417510986, "learning_rate": 1.4950687776356676e-05, "loss": 0.5092, "step": 17422 }, { "epoch": 2.844210440390188, "grad_norm": 2.7963368892669678, "learning_rate": 1.4950130708084238e-05, "loss": 0.6381, "step": 17423 }, { "epoch": 2.8443736990326927, "grad_norm": 2.9713220596313477, "learning_rate": 1.4949573619463685e-05, "loss": 0.5665, "step": 17424 }, { "epoch": 2.8445369576751967, "grad_norm": 2.8543992042541504, "learning_rate": 1.49490165104973e-05, "loss": 0.5679, "step": 17425 }, { "epoch": 2.844700216317701, "grad_norm": 3.028913736343384, "learning_rate": 1.4948459381187382e-05, "loss": 0.5835, "step": 17426 }, { "epoch": 2.8448634749602055, "grad_norm": 3.124907970428467, "learning_rate": 1.4947902231536214e-05, "loss": 0.6065, "step": 17427 }, { "epoch": 2.84502673360271, "grad_norm": 2.2851102352142334, "learning_rate": 1.4947345061546083e-05, "loss": 0.4945, "step": 17428 }, { "epoch": 2.8451899922452144, "grad_norm": 2.6553404331207275, "learning_rate": 1.494678787121929e-05, "loss": 0.5358, "step": 17429 }, { "epoch": 2.845353250887719, "grad_norm": 2.6484642028808594, "learning_rate": 1.4946230660558118e-05, "loss": 0.5185, "step": 17430 }, { "epoch": 2.8455165095302233, "grad_norm": 2.202601909637451, "learning_rate": 1.4945673429564864e-05, "loss": 0.4668, "step": 17431 }, { "epoch": 2.8456797681727277, "grad_norm": 3.2029683589935303, "learning_rate": 1.494511617824181e-05, "loss": 0.6549, "step": 17432 }, { "epoch": 2.845843026815232, "grad_norm": 2.9569339752197266, "learning_rate": 1.4944558906591252e-05, "loss": 0.5359, "step": 17433 }, { "epoch": 2.8460062854577366, "grad_norm": 3.184976100921631, "learning_rate": 1.4944001614615476e-05, "loss": 0.5455, "step": 17434 }, { "epoch": 2.846169544100241, "grad_norm": 2.7631351947784424, "learning_rate": 1.494344430231678e-05, "loss": 0.5391, "step": 17435 }, { "epoch": 2.8463328027427455, "grad_norm": 2.570103168487549, "learning_rate": 1.4942886969697447e-05, "loss": 0.5275, "step": 17436 }, { "epoch": 2.8464960613852495, "grad_norm": 3.024017572402954, "learning_rate": 1.4942329616759777e-05, "loss": 0.5939, "step": 17437 }, { "epoch": 2.846659320027754, "grad_norm": 2.5792980194091797, "learning_rate": 1.4941772243506055e-05, "loss": 0.505, "step": 17438 }, { "epoch": 2.8468225786702583, "grad_norm": 3.2925825119018555, "learning_rate": 1.494121484993857e-05, "loss": 0.6334, "step": 17439 }, { "epoch": 2.846985837312763, "grad_norm": 2.8493330478668213, "learning_rate": 1.4940657436059619e-05, "loss": 0.5598, "step": 17440 }, { "epoch": 2.847149095955267, "grad_norm": 2.72027850151062, "learning_rate": 1.494010000187149e-05, "loss": 0.4938, "step": 17441 }, { "epoch": 2.8473123545977717, "grad_norm": 3.2516086101531982, "learning_rate": 1.4939542547376475e-05, "loss": 0.6671, "step": 17442 }, { "epoch": 2.8474756132402756, "grad_norm": 2.414970874786377, "learning_rate": 1.4938985072576869e-05, "loss": 0.5077, "step": 17443 }, { "epoch": 2.84763887188278, "grad_norm": 3.26998233795166, "learning_rate": 1.4938427577474957e-05, "loss": 0.6669, "step": 17444 }, { "epoch": 2.8478021305252845, "grad_norm": 3.1218059062957764, "learning_rate": 1.4937870062073032e-05, "loss": 0.5879, "step": 17445 }, { "epoch": 2.847965389167789, "grad_norm": 2.68057918548584, "learning_rate": 1.4937312526373393e-05, "loss": 0.5636, "step": 17446 }, { "epoch": 2.8481286478102934, "grad_norm": 2.7803287506103516, "learning_rate": 1.4936754970378324e-05, "loss": 0.5816, "step": 17447 }, { "epoch": 2.848291906452798, "grad_norm": 2.898522138595581, "learning_rate": 1.4936197394090117e-05, "loss": 0.5195, "step": 17448 }, { "epoch": 2.8484551650953023, "grad_norm": 2.7952260971069336, "learning_rate": 1.493563979751107e-05, "loss": 0.5071, "step": 17449 }, { "epoch": 2.8486184237378067, "grad_norm": 3.2488105297088623, "learning_rate": 1.493508218064347e-05, "loss": 0.6658, "step": 17450 }, { "epoch": 2.848781682380311, "grad_norm": 2.335177183151245, "learning_rate": 1.493452454348961e-05, "loss": 0.434, "step": 17451 }, { "epoch": 2.8489449410228156, "grad_norm": 2.959456205368042, "learning_rate": 1.4933966886051784e-05, "loss": 0.5764, "step": 17452 }, { "epoch": 2.84910819966532, "grad_norm": 3.37800669670105, "learning_rate": 1.4933409208332283e-05, "loss": 0.6135, "step": 17453 }, { "epoch": 2.849271458307824, "grad_norm": 2.4981322288513184, "learning_rate": 1.49328515103334e-05, "loss": 0.557, "step": 17454 }, { "epoch": 2.8494347169503285, "grad_norm": 3.304044246673584, "learning_rate": 1.4932293792057426e-05, "loss": 0.6783, "step": 17455 }, { "epoch": 2.849597975592833, "grad_norm": 3.49410343170166, "learning_rate": 1.4931736053506654e-05, "loss": 0.6221, "step": 17456 }, { "epoch": 2.8497612342353373, "grad_norm": 2.800884485244751, "learning_rate": 1.4931178294683379e-05, "loss": 0.74, "step": 17457 }, { "epoch": 2.8499244928778418, "grad_norm": 2.938066244125366, "learning_rate": 1.493062051558989e-05, "loss": 0.627, "step": 17458 }, { "epoch": 2.850087751520346, "grad_norm": 2.773591995239258, "learning_rate": 1.4930062716228484e-05, "loss": 0.5507, "step": 17459 }, { "epoch": 2.8502510101628507, "grad_norm": 3.0868070125579834, "learning_rate": 1.4929504896601452e-05, "loss": 0.6406, "step": 17460 }, { "epoch": 2.8504142688053546, "grad_norm": 2.3104941844940186, "learning_rate": 1.4928947056711086e-05, "loss": 0.425, "step": 17461 }, { "epoch": 2.850577527447859, "grad_norm": 2.871527910232544, "learning_rate": 1.4928389196559678e-05, "loss": 0.5917, "step": 17462 }, { "epoch": 2.8507407860903635, "grad_norm": 2.9163501262664795, "learning_rate": 1.4927831316149524e-05, "loss": 0.5819, "step": 17463 }, { "epoch": 2.850904044732868, "grad_norm": 3.450523853302002, "learning_rate": 1.4927273415482916e-05, "loss": 0.8136, "step": 17464 }, { "epoch": 2.8510673033753724, "grad_norm": 2.8252761363983154, "learning_rate": 1.492671549456215e-05, "loss": 0.5581, "step": 17465 }, { "epoch": 2.851230562017877, "grad_norm": 2.664736032485962, "learning_rate": 1.4926157553389515e-05, "loss": 0.5842, "step": 17466 }, { "epoch": 2.8513938206603813, "grad_norm": 3.1381583213806152, "learning_rate": 1.4925599591967305e-05, "loss": 0.6311, "step": 17467 }, { "epoch": 2.8515570793028857, "grad_norm": 3.404693603515625, "learning_rate": 1.4925041610297815e-05, "loss": 0.7061, "step": 17468 }, { "epoch": 2.85172033794539, "grad_norm": 3.3495359420776367, "learning_rate": 1.492448360838334e-05, "loss": 0.7013, "step": 17469 }, { "epoch": 2.8518835965878946, "grad_norm": 2.908252000808716, "learning_rate": 1.4923925586226173e-05, "loss": 0.5635, "step": 17470 }, { "epoch": 2.852046855230399, "grad_norm": 2.7542667388916016, "learning_rate": 1.4923367543828608e-05, "loss": 0.6004, "step": 17471 }, { "epoch": 2.852210113872903, "grad_norm": 2.8762805461883545, "learning_rate": 1.4922809481192934e-05, "loss": 0.5724, "step": 17472 }, { "epoch": 2.8523733725154075, "grad_norm": 2.5410068035125732, "learning_rate": 1.4922251398321452e-05, "loss": 0.5403, "step": 17473 }, { "epoch": 2.852536631157912, "grad_norm": 3.0252513885498047, "learning_rate": 1.4921693295216453e-05, "loss": 0.6395, "step": 17474 }, { "epoch": 2.8526998898004163, "grad_norm": 2.6595351696014404, "learning_rate": 1.4921135171880232e-05, "loss": 0.5652, "step": 17475 }, { "epoch": 2.8528631484429208, "grad_norm": 2.765359878540039, "learning_rate": 1.4920577028315083e-05, "loss": 0.4941, "step": 17476 }, { "epoch": 2.853026407085425, "grad_norm": 3.2771060466766357, "learning_rate": 1.4920018864523295e-05, "loss": 0.6121, "step": 17477 }, { "epoch": 2.853189665727929, "grad_norm": 3.25710391998291, "learning_rate": 1.491946068050717e-05, "loss": 0.6949, "step": 17478 }, { "epoch": 2.8533529243704336, "grad_norm": 2.5176186561584473, "learning_rate": 1.4918902476268999e-05, "loss": 0.5515, "step": 17479 }, { "epoch": 2.853516183012938, "grad_norm": 2.833379030227661, "learning_rate": 1.491834425181108e-05, "loss": 0.6318, "step": 17480 }, { "epoch": 2.8536794416554425, "grad_norm": 2.8887712955474854, "learning_rate": 1.4917786007135703e-05, "loss": 0.5185, "step": 17481 }, { "epoch": 2.853842700297947, "grad_norm": 2.762834072113037, "learning_rate": 1.4917227742245165e-05, "loss": 0.569, "step": 17482 }, { "epoch": 2.8540059589404514, "grad_norm": 2.2758593559265137, "learning_rate": 1.4916669457141761e-05, "loss": 0.4718, "step": 17483 }, { "epoch": 2.854169217582956, "grad_norm": 3.4082956314086914, "learning_rate": 1.4916111151827784e-05, "loss": 0.6642, "step": 17484 }, { "epoch": 2.8543324762254603, "grad_norm": 3.0498499870300293, "learning_rate": 1.4915552826305533e-05, "loss": 0.5906, "step": 17485 }, { "epoch": 2.8544957348679647, "grad_norm": 2.40175724029541, "learning_rate": 1.4914994480577296e-05, "loss": 0.5058, "step": 17486 }, { "epoch": 2.854658993510469, "grad_norm": 3.1499061584472656, "learning_rate": 1.4914436114645377e-05, "loss": 0.562, "step": 17487 }, { "epoch": 2.8548222521529736, "grad_norm": 2.75888991355896, "learning_rate": 1.4913877728512062e-05, "loss": 0.5739, "step": 17488 }, { "epoch": 2.8549855107954776, "grad_norm": 3.2883522510528564, "learning_rate": 1.4913319322179656e-05, "loss": 0.5423, "step": 17489 }, { "epoch": 2.855148769437982, "grad_norm": 2.64717173576355, "learning_rate": 1.4912760895650446e-05, "loss": 0.5576, "step": 17490 }, { "epoch": 2.8553120280804865, "grad_norm": 2.7642736434936523, "learning_rate": 1.4912202448926733e-05, "loss": 0.5111, "step": 17491 }, { "epoch": 2.855475286722991, "grad_norm": 3.1128530502319336, "learning_rate": 1.4911643982010811e-05, "loss": 0.5845, "step": 17492 }, { "epoch": 2.8556385453654953, "grad_norm": 2.9559712409973145, "learning_rate": 1.4911085494904974e-05, "loss": 0.5639, "step": 17493 }, { "epoch": 2.8558018040079998, "grad_norm": 3.005939483642578, "learning_rate": 1.4910526987611518e-05, "loss": 0.4795, "step": 17494 }, { "epoch": 2.855965062650504, "grad_norm": 2.8123488426208496, "learning_rate": 1.4909968460132743e-05, "loss": 0.5625, "step": 17495 }, { "epoch": 2.856128321293008, "grad_norm": 2.69347882270813, "learning_rate": 1.4909409912470938e-05, "loss": 0.5116, "step": 17496 }, { "epoch": 2.8562915799355126, "grad_norm": 3.4893715381622314, "learning_rate": 1.4908851344628404e-05, "loss": 0.7014, "step": 17497 }, { "epoch": 2.856454838578017, "grad_norm": 3.0952308177948, "learning_rate": 1.4908292756607434e-05, "loss": 0.6296, "step": 17498 }, { "epoch": 2.8566180972205215, "grad_norm": 2.839812755584717, "learning_rate": 1.490773414841033e-05, "loss": 0.6326, "step": 17499 }, { "epoch": 2.856781355863026, "grad_norm": 2.445878744125366, "learning_rate": 1.4907175520039381e-05, "loss": 0.3839, "step": 17500 }, { "epoch": 2.8569446145055304, "grad_norm": 3.1704039573669434, "learning_rate": 1.4906616871496886e-05, "loss": 0.5736, "step": 17501 }, { "epoch": 2.857107873148035, "grad_norm": 3.319516658782959, "learning_rate": 1.4906058202785144e-05, "loss": 0.6492, "step": 17502 }, { "epoch": 2.8572711317905393, "grad_norm": 3.1714119911193848, "learning_rate": 1.4905499513906448e-05, "loss": 0.5808, "step": 17503 }, { "epoch": 2.8574343904330437, "grad_norm": 3.2498250007629395, "learning_rate": 1.4904940804863097e-05, "loss": 0.6216, "step": 17504 }, { "epoch": 2.857597649075548, "grad_norm": 2.726165294647217, "learning_rate": 1.4904382075657383e-05, "loss": 0.5083, "step": 17505 }, { "epoch": 2.8577609077180526, "grad_norm": 2.9342384338378906, "learning_rate": 1.4903823326291607e-05, "loss": 0.5712, "step": 17506 }, { "epoch": 2.8579241663605566, "grad_norm": 2.85678768157959, "learning_rate": 1.4903264556768066e-05, "loss": 0.6485, "step": 17507 }, { "epoch": 2.858087425003061, "grad_norm": 2.802246570587158, "learning_rate": 1.4902705767089057e-05, "loss": 0.4777, "step": 17508 }, { "epoch": 2.8582506836455654, "grad_norm": 2.802603006362915, "learning_rate": 1.4902146957256877e-05, "loss": 0.6203, "step": 17509 }, { "epoch": 2.85841394228807, "grad_norm": 2.708522081375122, "learning_rate": 1.490158812727382e-05, "loss": 0.5647, "step": 17510 }, { "epoch": 2.8585772009305743, "grad_norm": 3.029628038406372, "learning_rate": 1.4901029277142183e-05, "loss": 0.6021, "step": 17511 }, { "epoch": 2.8587404595730788, "grad_norm": 2.8158633708953857, "learning_rate": 1.4900470406864265e-05, "loss": 0.5735, "step": 17512 }, { "epoch": 2.858903718215583, "grad_norm": 3.1016592979431152, "learning_rate": 1.4899911516442367e-05, "loss": 0.5797, "step": 17513 }, { "epoch": 2.859066976858087, "grad_norm": 2.949410915374756, "learning_rate": 1.4899352605878783e-05, "loss": 0.5574, "step": 17514 }, { "epoch": 2.8592302355005916, "grad_norm": 2.8495893478393555, "learning_rate": 1.4898793675175808e-05, "loss": 0.5649, "step": 17515 }, { "epoch": 2.859393494143096, "grad_norm": 2.897979497909546, "learning_rate": 1.4898234724335744e-05, "loss": 0.6655, "step": 17516 }, { "epoch": 2.8595567527856005, "grad_norm": 2.5424726009368896, "learning_rate": 1.4897675753360883e-05, "loss": 0.5302, "step": 17517 }, { "epoch": 2.859720011428105, "grad_norm": 3.313244342803955, "learning_rate": 1.489711676225353e-05, "loss": 0.5748, "step": 17518 }, { "epoch": 2.8598832700706094, "grad_norm": 2.5901448726654053, "learning_rate": 1.4896557751015979e-05, "loss": 0.5814, "step": 17519 }, { "epoch": 2.860046528713114, "grad_norm": 2.9565227031707764, "learning_rate": 1.4895998719650526e-05, "loss": 0.5386, "step": 17520 }, { "epoch": 2.8602097873556183, "grad_norm": 2.9114325046539307, "learning_rate": 1.4895439668159472e-05, "loss": 0.6519, "step": 17521 }, { "epoch": 2.8603730459981227, "grad_norm": 2.755186080932617, "learning_rate": 1.4894880596545112e-05, "loss": 0.6211, "step": 17522 }, { "epoch": 2.860536304640627, "grad_norm": 2.714050531387329, "learning_rate": 1.4894321504809747e-05, "loss": 0.5389, "step": 17523 }, { "epoch": 2.8606995632831316, "grad_norm": 2.468426465988159, "learning_rate": 1.489376239295568e-05, "loss": 0.505, "step": 17524 }, { "epoch": 2.8608628219256356, "grad_norm": 2.398968458175659, "learning_rate": 1.4893203260985196e-05, "loss": 0.536, "step": 17525 }, { "epoch": 2.86102608056814, "grad_norm": 2.4736595153808594, "learning_rate": 1.4892644108900604e-05, "loss": 0.4985, "step": 17526 }, { "epoch": 2.8611893392106444, "grad_norm": 3.572280168533325, "learning_rate": 1.48920849367042e-05, "loss": 0.6772, "step": 17527 }, { "epoch": 2.861352597853149, "grad_norm": 2.6607322692871094, "learning_rate": 1.4891525744398282e-05, "loss": 0.5333, "step": 17528 }, { "epoch": 2.8615158564956533, "grad_norm": 2.776132106781006, "learning_rate": 1.4890966531985149e-05, "loss": 0.5796, "step": 17529 }, { "epoch": 2.8616791151381578, "grad_norm": 2.913447618484497, "learning_rate": 1.4890407299467097e-05, "loss": 0.5862, "step": 17530 }, { "epoch": 2.8618423737806618, "grad_norm": 2.9512696266174316, "learning_rate": 1.4889848046846428e-05, "loss": 0.5415, "step": 17531 }, { "epoch": 2.862005632423166, "grad_norm": 2.729323387145996, "learning_rate": 1.4889288774125444e-05, "loss": 0.6525, "step": 17532 }, { "epoch": 2.8621688910656706, "grad_norm": 2.645808696746826, "learning_rate": 1.4888729481306436e-05, "loss": 0.5399, "step": 17533 }, { "epoch": 2.862332149708175, "grad_norm": 2.8096725940704346, "learning_rate": 1.4888170168391707e-05, "loss": 0.5623, "step": 17534 }, { "epoch": 2.8624954083506795, "grad_norm": 2.953939437866211, "learning_rate": 1.4887610835383558e-05, "loss": 0.6027, "step": 17535 }, { "epoch": 2.862658666993184, "grad_norm": 3.0906851291656494, "learning_rate": 1.4887051482284285e-05, "loss": 0.5093, "step": 17536 }, { "epoch": 2.8628219256356884, "grad_norm": 2.6068100929260254, "learning_rate": 1.4886492109096189e-05, "loss": 0.5173, "step": 17537 }, { "epoch": 2.862985184278193, "grad_norm": 2.3557844161987305, "learning_rate": 1.4885932715821569e-05, "loss": 0.4793, "step": 17538 }, { "epoch": 2.8631484429206973, "grad_norm": 3.27185320854187, "learning_rate": 1.4885373302462724e-05, "loss": 0.6552, "step": 17539 }, { "epoch": 2.8633117015632017, "grad_norm": 3.5019173622131348, "learning_rate": 1.4884813869021954e-05, "loss": 0.97, "step": 17540 }, { "epoch": 2.863474960205706, "grad_norm": 2.5732944011688232, "learning_rate": 1.488425441550156e-05, "loss": 0.5087, "step": 17541 }, { "epoch": 2.86363821884821, "grad_norm": 2.791588068008423, "learning_rate": 1.488369494190384e-05, "loss": 0.511, "step": 17542 }, { "epoch": 2.8638014774907146, "grad_norm": 2.9154632091522217, "learning_rate": 1.4883135448231095e-05, "loss": 0.6493, "step": 17543 }, { "epoch": 2.863964736133219, "grad_norm": 2.9080443382263184, "learning_rate": 1.4882575934485623e-05, "loss": 0.5937, "step": 17544 }, { "epoch": 2.8641279947757234, "grad_norm": 2.3559935092926025, "learning_rate": 1.4882016400669725e-05, "loss": 0.4257, "step": 17545 }, { "epoch": 2.864291253418228, "grad_norm": 3.1184000968933105, "learning_rate": 1.48814568467857e-05, "loss": 0.6175, "step": 17546 }, { "epoch": 2.8644545120607323, "grad_norm": 2.457860231399536, "learning_rate": 1.488089727283585e-05, "loss": 0.4733, "step": 17547 }, { "epoch": 2.8646177707032368, "grad_norm": 3.336333990097046, "learning_rate": 1.4880337678822477e-05, "loss": 0.6585, "step": 17548 }, { "epoch": 2.8647810293457407, "grad_norm": 2.501126289367676, "learning_rate": 1.4879778064747874e-05, "loss": 0.47, "step": 17549 }, { "epoch": 2.864944287988245, "grad_norm": 2.8042516708374023, "learning_rate": 1.4879218430614346e-05, "loss": 0.685, "step": 17550 }, { "epoch": 2.8651075466307496, "grad_norm": 2.864231586456299, "learning_rate": 1.4878658776424198e-05, "loss": 0.4795, "step": 17551 }, { "epoch": 2.865270805273254, "grad_norm": 2.537881851196289, "learning_rate": 1.4878099102179722e-05, "loss": 0.5011, "step": 17552 }, { "epoch": 2.8654340639157585, "grad_norm": 3.1388635635375977, "learning_rate": 1.4877539407883226e-05, "loss": 0.5475, "step": 17553 }, { "epoch": 2.865597322558263, "grad_norm": 2.8346686363220215, "learning_rate": 1.4876979693537004e-05, "loss": 0.5648, "step": 17554 }, { "epoch": 2.8657605812007674, "grad_norm": 2.7548792362213135, "learning_rate": 1.487641995914336e-05, "loss": 0.458, "step": 17555 }, { "epoch": 2.865923839843272, "grad_norm": 2.82930588722229, "learning_rate": 1.4875860204704595e-05, "loss": 0.5098, "step": 17556 }, { "epoch": 2.8660870984857763, "grad_norm": 2.9732561111450195, "learning_rate": 1.4875300430223009e-05, "loss": 0.538, "step": 17557 }, { "epoch": 2.8662503571282807, "grad_norm": 3.3636600971221924, "learning_rate": 1.4874740635700908e-05, "loss": 0.6033, "step": 17558 }, { "epoch": 2.866413615770785, "grad_norm": 3.2680625915527344, "learning_rate": 1.4874180821140588e-05, "loss": 0.5573, "step": 17559 }, { "epoch": 2.866576874413289, "grad_norm": 3.235300064086914, "learning_rate": 1.4873620986544348e-05, "loss": 0.6698, "step": 17560 }, { "epoch": 2.8667401330557936, "grad_norm": 2.9327821731567383, "learning_rate": 1.4873061131914492e-05, "loss": 0.5447, "step": 17561 }, { "epoch": 2.866903391698298, "grad_norm": 2.882589101791382, "learning_rate": 1.4872501257253325e-05, "loss": 0.5504, "step": 17562 }, { "epoch": 2.8670666503408024, "grad_norm": 3.0809900760650635, "learning_rate": 1.4871941362563145e-05, "loss": 0.6161, "step": 17563 }, { "epoch": 2.867229908983307, "grad_norm": 3.067535877227783, "learning_rate": 1.4871381447846251e-05, "loss": 0.6599, "step": 17564 }, { "epoch": 2.8673931676258113, "grad_norm": 3.6544911861419678, "learning_rate": 1.4870821513104949e-05, "loss": 0.9774, "step": 17565 }, { "epoch": 2.8675564262683153, "grad_norm": 3.0233800411224365, "learning_rate": 1.4870261558341536e-05, "loss": 0.66, "step": 17566 }, { "epoch": 2.8677196849108197, "grad_norm": 2.2951107025146484, "learning_rate": 1.486970158355832e-05, "loss": 0.4405, "step": 17567 }, { "epoch": 2.867882943553324, "grad_norm": 3.0421063899993896, "learning_rate": 1.48691415887576e-05, "loss": 0.5154, "step": 17568 }, { "epoch": 2.8680462021958286, "grad_norm": 2.5248804092407227, "learning_rate": 1.4868581573941676e-05, "loss": 0.5099, "step": 17569 }, { "epoch": 2.868209460838333, "grad_norm": 2.783114194869995, "learning_rate": 1.4868021539112852e-05, "loss": 0.5874, "step": 17570 }, { "epoch": 2.8683727194808375, "grad_norm": 2.7691073417663574, "learning_rate": 1.4867461484273432e-05, "loss": 0.515, "step": 17571 }, { "epoch": 2.868535978123342, "grad_norm": 2.7994983196258545, "learning_rate": 1.4866901409425709e-05, "loss": 0.5159, "step": 17572 }, { "epoch": 2.8686992367658464, "grad_norm": 2.860807180404663, "learning_rate": 1.4866341314572e-05, "loss": 0.562, "step": 17573 }, { "epoch": 2.868862495408351, "grad_norm": 2.960993766784668, "learning_rate": 1.4865781199714593e-05, "loss": 0.616, "step": 17574 }, { "epoch": 2.8690257540508552, "grad_norm": 2.55664324760437, "learning_rate": 1.4865221064855799e-05, "loss": 0.5044, "step": 17575 }, { "epoch": 2.8691890126933597, "grad_norm": 2.744828462600708, "learning_rate": 1.486466090999792e-05, "loss": 0.6117, "step": 17576 }, { "epoch": 2.8693522713358637, "grad_norm": 2.7713828086853027, "learning_rate": 1.4864100735143255e-05, "loss": 0.519, "step": 17577 }, { "epoch": 2.869515529978368, "grad_norm": 2.661623954772949, "learning_rate": 1.4863540540294108e-05, "loss": 0.6251, "step": 17578 }, { "epoch": 2.8696787886208726, "grad_norm": 2.9774985313415527, "learning_rate": 1.4862980325452783e-05, "loss": 0.534, "step": 17579 }, { "epoch": 2.869842047263377, "grad_norm": 2.561377763748169, "learning_rate": 1.4862420090621583e-05, "loss": 0.5648, "step": 17580 }, { "epoch": 2.8700053059058814, "grad_norm": 2.866819381713867, "learning_rate": 1.486185983580281e-05, "loss": 0.5187, "step": 17581 }, { "epoch": 2.870168564548386, "grad_norm": 2.4924004077911377, "learning_rate": 1.4861299560998766e-05, "loss": 0.4683, "step": 17582 }, { "epoch": 2.8703318231908903, "grad_norm": 2.7007803916931152, "learning_rate": 1.4860739266211757e-05, "loss": 0.5304, "step": 17583 }, { "epoch": 2.8704950818333943, "grad_norm": 2.6591763496398926, "learning_rate": 1.4860178951444082e-05, "loss": 0.5756, "step": 17584 }, { "epoch": 2.8706583404758987, "grad_norm": 2.856252908706665, "learning_rate": 1.4859618616698046e-05, "loss": 0.6511, "step": 17585 }, { "epoch": 2.870821599118403, "grad_norm": 2.3972647190093994, "learning_rate": 1.4859058261975954e-05, "loss": 0.5293, "step": 17586 }, { "epoch": 2.8709848577609076, "grad_norm": 2.982065200805664, "learning_rate": 1.485849788728011e-05, "loss": 0.6167, "step": 17587 }, { "epoch": 2.871148116403412, "grad_norm": 2.9366424083709717, "learning_rate": 1.4857937492612814e-05, "loss": 0.5965, "step": 17588 }, { "epoch": 2.8713113750459165, "grad_norm": 2.526895523071289, "learning_rate": 1.4857377077976369e-05, "loss": 0.4678, "step": 17589 }, { "epoch": 2.871474633688421, "grad_norm": 2.786733865737915, "learning_rate": 1.4856816643373084e-05, "loss": 0.568, "step": 17590 }, { "epoch": 2.8716378923309254, "grad_norm": 3.009544849395752, "learning_rate": 1.4856256188805258e-05, "loss": 0.5481, "step": 17591 }, { "epoch": 2.87180115097343, "grad_norm": 2.847805976867676, "learning_rate": 1.48556957142752e-05, "loss": 0.506, "step": 17592 }, { "epoch": 2.8719644096159342, "grad_norm": 2.616316318511963, "learning_rate": 1.4855135219785206e-05, "loss": 0.5167, "step": 17593 }, { "epoch": 2.8721276682584387, "grad_norm": 3.1275899410247803, "learning_rate": 1.4854574705337588e-05, "loss": 0.6169, "step": 17594 }, { "epoch": 2.8722909269009427, "grad_norm": 2.3762245178222656, "learning_rate": 1.4854014170934642e-05, "loss": 0.4033, "step": 17595 }, { "epoch": 2.872454185543447, "grad_norm": 2.9934489727020264, "learning_rate": 1.4853453616578682e-05, "loss": 0.5929, "step": 17596 }, { "epoch": 2.8726174441859516, "grad_norm": 2.9830873012542725, "learning_rate": 1.4852893042272005e-05, "loss": 0.5132, "step": 17597 }, { "epoch": 2.872780702828456, "grad_norm": 3.629472494125366, "learning_rate": 1.4852332448016914e-05, "loss": 0.7237, "step": 17598 }, { "epoch": 2.8729439614709604, "grad_norm": 2.890840530395508, "learning_rate": 1.485177183381572e-05, "loss": 0.5149, "step": 17599 }, { "epoch": 2.873107220113465, "grad_norm": 3.168389081954956, "learning_rate": 1.485121119967072e-05, "loss": 0.6062, "step": 17600 }, { "epoch": 2.8732704787559693, "grad_norm": 2.7682933807373047, "learning_rate": 1.4850650545584226e-05, "loss": 0.549, "step": 17601 }, { "epoch": 2.8734337373984733, "grad_norm": 3.0186688899993896, "learning_rate": 1.4850089871558543e-05, "loss": 0.5075, "step": 17602 }, { "epoch": 2.8735969960409777, "grad_norm": 3.242039203643799, "learning_rate": 1.4849529177595965e-05, "loss": 0.6381, "step": 17603 }, { "epoch": 2.873760254683482, "grad_norm": 2.773420810699463, "learning_rate": 1.4848968463698806e-05, "loss": 0.5892, "step": 17604 }, { "epoch": 2.8739235133259866, "grad_norm": 2.427163600921631, "learning_rate": 1.4848407729869367e-05, "loss": 0.4686, "step": 17605 }, { "epoch": 2.874086771968491, "grad_norm": 2.526625871658325, "learning_rate": 1.4847846976109956e-05, "loss": 0.4423, "step": 17606 }, { "epoch": 2.8742500306109955, "grad_norm": 3.61364483833313, "learning_rate": 1.484728620242288e-05, "loss": 0.6879, "step": 17607 }, { "epoch": 2.8744132892535, "grad_norm": 3.116243600845337, "learning_rate": 1.4846725408810436e-05, "loss": 0.6208, "step": 17608 }, { "epoch": 2.8745765478960044, "grad_norm": 2.7107152938842773, "learning_rate": 1.4846164595274937e-05, "loss": 0.4851, "step": 17609 }, { "epoch": 2.874739806538509, "grad_norm": 3.1826634407043457, "learning_rate": 1.484560376181868e-05, "loss": 0.5872, "step": 17610 }, { "epoch": 2.8749030651810132, "grad_norm": 3.024261951446533, "learning_rate": 1.484504290844398e-05, "loss": 0.6325, "step": 17611 }, { "epoch": 2.8750663238235177, "grad_norm": 2.931196928024292, "learning_rate": 1.484448203515314e-05, "loss": 0.5674, "step": 17612 }, { "epoch": 2.8752295824660217, "grad_norm": 2.598132848739624, "learning_rate": 1.484392114194846e-05, "loss": 0.4831, "step": 17613 }, { "epoch": 2.875392841108526, "grad_norm": 2.9078147411346436, "learning_rate": 1.484336022883225e-05, "loss": 0.565, "step": 17614 }, { "epoch": 2.8755560997510305, "grad_norm": 2.894940137863159, "learning_rate": 1.4842799295806816e-05, "loss": 0.5486, "step": 17615 }, { "epoch": 2.875719358393535, "grad_norm": 2.9405767917633057, "learning_rate": 1.484223834287446e-05, "loss": 0.5623, "step": 17616 }, { "epoch": 2.8758826170360394, "grad_norm": 2.975743532180786, "learning_rate": 1.4841677370037493e-05, "loss": 0.7459, "step": 17617 }, { "epoch": 2.876045875678544, "grad_norm": 2.7306230068206787, "learning_rate": 1.4841116377298219e-05, "loss": 0.5582, "step": 17618 }, { "epoch": 2.876209134321048, "grad_norm": 2.162355899810791, "learning_rate": 1.4840555364658941e-05, "loss": 0.4311, "step": 17619 }, { "epoch": 2.8763723929635523, "grad_norm": 2.795177459716797, "learning_rate": 1.483999433212197e-05, "loss": 0.6349, "step": 17620 }, { "epoch": 2.8765356516060567, "grad_norm": 2.554272174835205, "learning_rate": 1.4839433279689608e-05, "loss": 0.5018, "step": 17621 }, { "epoch": 2.876698910248561, "grad_norm": 2.9526214599609375, "learning_rate": 1.4838872207364165e-05, "loss": 0.6052, "step": 17622 }, { "epoch": 2.8768621688910656, "grad_norm": 2.9560225009918213, "learning_rate": 1.4838311115147942e-05, "loss": 0.536, "step": 17623 }, { "epoch": 2.87702542753357, "grad_norm": 3.3526313304901123, "learning_rate": 1.4837750003043254e-05, "loss": 0.7473, "step": 17624 }, { "epoch": 2.8771886861760745, "grad_norm": 3.1115431785583496, "learning_rate": 1.4837188871052399e-05, "loss": 0.6595, "step": 17625 }, { "epoch": 2.877351944818579, "grad_norm": 3.1297781467437744, "learning_rate": 1.4836627719177688e-05, "loss": 0.6914, "step": 17626 }, { "epoch": 2.8775152034610834, "grad_norm": 2.9681262969970703, "learning_rate": 1.4836066547421428e-05, "loss": 0.5366, "step": 17627 }, { "epoch": 2.877678462103588, "grad_norm": 2.5499966144561768, "learning_rate": 1.4835505355785923e-05, "loss": 0.415, "step": 17628 }, { "epoch": 2.8778417207460922, "grad_norm": 3.543879985809326, "learning_rate": 1.483494414427348e-05, "loss": 0.64, "step": 17629 }, { "epoch": 2.8780049793885962, "grad_norm": 3.1988134384155273, "learning_rate": 1.4834382912886409e-05, "loss": 0.5198, "step": 17630 }, { "epoch": 2.8781682380311007, "grad_norm": 2.954294204711914, "learning_rate": 1.4833821661627018e-05, "loss": 0.6292, "step": 17631 }, { "epoch": 2.878331496673605, "grad_norm": 2.7839083671569824, "learning_rate": 1.4833260390497606e-05, "loss": 0.5245, "step": 17632 }, { "epoch": 2.8784947553161095, "grad_norm": 2.898293972015381, "learning_rate": 1.4832699099500488e-05, "loss": 0.6202, "step": 17633 }, { "epoch": 2.878658013958614, "grad_norm": 2.8569023609161377, "learning_rate": 1.4832137788637968e-05, "loss": 0.5221, "step": 17634 }, { "epoch": 2.8788212726011184, "grad_norm": 2.27197003364563, "learning_rate": 1.4831576457912355e-05, "loss": 0.471, "step": 17635 }, { "epoch": 2.878984531243623, "grad_norm": 2.818337917327881, "learning_rate": 1.483101510732596e-05, "loss": 0.5388, "step": 17636 }, { "epoch": 2.879147789886127, "grad_norm": 3.1489851474761963, "learning_rate": 1.4830453736881079e-05, "loss": 0.613, "step": 17637 }, { "epoch": 2.8793110485286313, "grad_norm": 3.7231507301330566, "learning_rate": 1.4829892346580029e-05, "loss": 0.5958, "step": 17638 }, { "epoch": 2.8794743071711357, "grad_norm": 2.82023024559021, "learning_rate": 1.4829330936425112e-05, "loss": 0.608, "step": 17639 }, { "epoch": 2.87963756581364, "grad_norm": 2.9132578372955322, "learning_rate": 1.4828769506418646e-05, "loss": 0.5828, "step": 17640 }, { "epoch": 2.8798008244561446, "grad_norm": 2.5297012329101562, "learning_rate": 1.4828208056562929e-05, "loss": 0.5856, "step": 17641 }, { "epoch": 2.879964083098649, "grad_norm": 2.9053826332092285, "learning_rate": 1.4827646586860272e-05, "loss": 0.5974, "step": 17642 }, { "epoch": 2.8801273417411535, "grad_norm": 2.7085037231445312, "learning_rate": 1.4827085097312979e-05, "loss": 0.5358, "step": 17643 }, { "epoch": 2.880290600383658, "grad_norm": 2.851666212081909, "learning_rate": 1.4826523587923365e-05, "loss": 0.5304, "step": 17644 }, { "epoch": 2.8804538590261624, "grad_norm": 3.562028169631958, "learning_rate": 1.4825962058693735e-05, "loss": 0.6824, "step": 17645 }, { "epoch": 2.880617117668667, "grad_norm": 2.7645492553710938, "learning_rate": 1.4825400509626397e-05, "loss": 0.5401, "step": 17646 }, { "epoch": 2.8807803763111712, "grad_norm": 2.672553062438965, "learning_rate": 1.4824838940723661e-05, "loss": 0.539, "step": 17647 }, { "epoch": 2.8809436349536752, "grad_norm": 2.7966115474700928, "learning_rate": 1.4824277351987833e-05, "loss": 0.6232, "step": 17648 }, { "epoch": 2.8811068935961797, "grad_norm": 2.291844606399536, "learning_rate": 1.4823715743421219e-05, "loss": 0.4993, "step": 17649 }, { "epoch": 2.881270152238684, "grad_norm": 2.225715160369873, "learning_rate": 1.4823154115026132e-05, "loss": 0.4518, "step": 17650 }, { "epoch": 2.8814334108811885, "grad_norm": 2.820513963699341, "learning_rate": 1.4822592466804886e-05, "loss": 0.6171, "step": 17651 }, { "epoch": 2.881596669523693, "grad_norm": 2.8516769409179688, "learning_rate": 1.4822030798759777e-05, "loss": 0.5299, "step": 17652 }, { "epoch": 2.8817599281661974, "grad_norm": 2.184359073638916, "learning_rate": 1.4821469110893124e-05, "loss": 0.4641, "step": 17653 }, { "epoch": 2.8819231868087014, "grad_norm": 2.96653413772583, "learning_rate": 1.4820907403207227e-05, "loss": 0.6602, "step": 17654 }, { "epoch": 2.882086445451206, "grad_norm": 3.023401975631714, "learning_rate": 1.4820345675704402e-05, "loss": 0.6589, "step": 17655 }, { "epoch": 2.8822497040937103, "grad_norm": 2.826662063598633, "learning_rate": 1.4819783928386959e-05, "loss": 0.5457, "step": 17656 }, { "epoch": 2.8824129627362147, "grad_norm": 2.707066535949707, "learning_rate": 1.4819222161257203e-05, "loss": 0.5185, "step": 17657 }, { "epoch": 2.882576221378719, "grad_norm": 3.0414490699768066, "learning_rate": 1.4818660374317444e-05, "loss": 0.6153, "step": 17658 }, { "epoch": 2.8827394800212236, "grad_norm": 2.6919808387756348, "learning_rate": 1.4818098567569992e-05, "loss": 0.5678, "step": 17659 }, { "epoch": 2.882902738663728, "grad_norm": 2.384134531021118, "learning_rate": 1.4817536741017153e-05, "loss": 0.4913, "step": 17660 }, { "epoch": 2.8830659973062325, "grad_norm": 2.9198408126831055, "learning_rate": 1.4816974894661242e-05, "loss": 0.6062, "step": 17661 }, { "epoch": 2.883229255948737, "grad_norm": 2.9356911182403564, "learning_rate": 1.4816413028504568e-05, "loss": 0.599, "step": 17662 }, { "epoch": 2.8833925145912414, "grad_norm": 3.1907575130462646, "learning_rate": 1.4815851142549437e-05, "loss": 0.5874, "step": 17663 }, { "epoch": 2.883555773233746, "grad_norm": 2.588968515396118, "learning_rate": 1.4815289236798159e-05, "loss": 0.4532, "step": 17664 }, { "epoch": 2.88371903187625, "grad_norm": 2.7373857498168945, "learning_rate": 1.4814727311253047e-05, "loss": 0.5053, "step": 17665 }, { "epoch": 2.883882290518754, "grad_norm": 2.5868585109710693, "learning_rate": 1.481416536591641e-05, "loss": 0.5364, "step": 17666 }, { "epoch": 2.8840455491612587, "grad_norm": 3.2303335666656494, "learning_rate": 1.4813603400790552e-05, "loss": 0.5727, "step": 17667 }, { "epoch": 2.884208807803763, "grad_norm": 2.813711166381836, "learning_rate": 1.4813041415877794e-05, "loss": 0.4993, "step": 17668 }, { "epoch": 2.8843720664462675, "grad_norm": 2.616687774658203, "learning_rate": 1.4812479411180437e-05, "loss": 0.5515, "step": 17669 }, { "epoch": 2.884535325088772, "grad_norm": 2.6813156604766846, "learning_rate": 1.4811917386700794e-05, "loss": 0.5932, "step": 17670 }, { "epoch": 2.8846985837312764, "grad_norm": 2.8413262367248535, "learning_rate": 1.4811355342441176e-05, "loss": 0.5597, "step": 17671 }, { "epoch": 2.8848618423737804, "grad_norm": 2.9275450706481934, "learning_rate": 1.4810793278403895e-05, "loss": 0.5586, "step": 17672 }, { "epoch": 2.885025101016285, "grad_norm": 3.0398716926574707, "learning_rate": 1.4810231194591256e-05, "loss": 0.55, "step": 17673 }, { "epoch": 2.8851883596587893, "grad_norm": 2.80772066116333, "learning_rate": 1.4809669091005574e-05, "loss": 0.5446, "step": 17674 }, { "epoch": 2.8853516183012937, "grad_norm": 3.0675230026245117, "learning_rate": 1.480910696764916e-05, "loss": 0.6083, "step": 17675 }, { "epoch": 2.885514876943798, "grad_norm": 3.1216320991516113, "learning_rate": 1.4808544824524322e-05, "loss": 0.5934, "step": 17676 }, { "epoch": 2.8856781355863026, "grad_norm": 3.4055216312408447, "learning_rate": 1.4807982661633371e-05, "loss": 0.6128, "step": 17677 }, { "epoch": 2.885841394228807, "grad_norm": 2.51214861869812, "learning_rate": 1.480742047897862e-05, "loss": 0.4499, "step": 17678 }, { "epoch": 2.8860046528713115, "grad_norm": 2.700289011001587, "learning_rate": 1.4806858276562379e-05, "loss": 0.5534, "step": 17679 }, { "epoch": 2.886167911513816, "grad_norm": 2.9410927295684814, "learning_rate": 1.4806296054386959e-05, "loss": 0.5064, "step": 17680 }, { "epoch": 2.8863311701563203, "grad_norm": 3.313030958175659, "learning_rate": 1.4805733812454668e-05, "loss": 0.605, "step": 17681 }, { "epoch": 2.886494428798825, "grad_norm": 3.0532472133636475, "learning_rate": 1.4805171550767821e-05, "loss": 0.6326, "step": 17682 }, { "epoch": 2.886657687441329, "grad_norm": 2.7544257640838623, "learning_rate": 1.4804609269328729e-05, "loss": 0.5252, "step": 17683 }, { "epoch": 2.886820946083833, "grad_norm": 2.814985990524292, "learning_rate": 1.4804046968139702e-05, "loss": 0.5576, "step": 17684 }, { "epoch": 2.8869842047263377, "grad_norm": 2.8028504848480225, "learning_rate": 1.4803484647203055e-05, "loss": 0.6285, "step": 17685 }, { "epoch": 2.887147463368842, "grad_norm": 3.3032689094543457, "learning_rate": 1.4802922306521095e-05, "loss": 0.5805, "step": 17686 }, { "epoch": 2.8873107220113465, "grad_norm": 2.887667179107666, "learning_rate": 1.4802359946096129e-05, "loss": 0.5769, "step": 17687 }, { "epoch": 2.887473980653851, "grad_norm": 2.4686315059661865, "learning_rate": 1.480179756593048e-05, "loss": 0.5145, "step": 17688 }, { "epoch": 2.8876372392963554, "grad_norm": 3.1214230060577393, "learning_rate": 1.4801235166026456e-05, "loss": 0.5679, "step": 17689 }, { "epoch": 2.8878004979388594, "grad_norm": 2.7102086544036865, "learning_rate": 1.4800672746386364e-05, "loss": 0.5142, "step": 17690 }, { "epoch": 2.887963756581364, "grad_norm": 2.2708053588867188, "learning_rate": 1.4800110307012521e-05, "loss": 0.443, "step": 17691 }, { "epoch": 2.8881270152238683, "grad_norm": 3.082376480102539, "learning_rate": 1.4799547847907234e-05, "loss": 0.5641, "step": 17692 }, { "epoch": 2.8882902738663727, "grad_norm": 2.380077838897705, "learning_rate": 1.4798985369072818e-05, "loss": 0.5133, "step": 17693 }, { "epoch": 2.888453532508877, "grad_norm": 3.0061049461364746, "learning_rate": 1.4798422870511588e-05, "loss": 0.556, "step": 17694 }, { "epoch": 2.8886167911513816, "grad_norm": 2.830209970474243, "learning_rate": 1.4797860352225854e-05, "loss": 0.4857, "step": 17695 }, { "epoch": 2.888780049793886, "grad_norm": 2.6076409816741943, "learning_rate": 1.4797297814217925e-05, "loss": 0.5574, "step": 17696 }, { "epoch": 2.8889433084363905, "grad_norm": 3.2043638229370117, "learning_rate": 1.4796735256490117e-05, "loss": 0.5367, "step": 17697 }, { "epoch": 2.889106567078895, "grad_norm": 3.268683433532715, "learning_rate": 1.4796172679044744e-05, "loss": 0.6044, "step": 17698 }, { "epoch": 2.8892698257213993, "grad_norm": 3.237809896469116, "learning_rate": 1.479561008188411e-05, "loss": 0.7041, "step": 17699 }, { "epoch": 2.889433084363904, "grad_norm": 2.96502685546875, "learning_rate": 1.479504746501054e-05, "loss": 0.6652, "step": 17700 }, { "epoch": 2.8895963430064078, "grad_norm": 2.807875156402588, "learning_rate": 1.4794484828426339e-05, "loss": 0.5155, "step": 17701 }, { "epoch": 2.889759601648912, "grad_norm": 3.1210663318634033, "learning_rate": 1.4793922172133821e-05, "loss": 0.686, "step": 17702 }, { "epoch": 2.8899228602914167, "grad_norm": 3.118460178375244, "learning_rate": 1.4793359496135298e-05, "loss": 0.5444, "step": 17703 }, { "epoch": 2.890086118933921, "grad_norm": 2.78043794631958, "learning_rate": 1.4792796800433083e-05, "loss": 0.5527, "step": 17704 }, { "epoch": 2.8902493775764255, "grad_norm": 2.8606815338134766, "learning_rate": 1.4792234085029494e-05, "loss": 0.5237, "step": 17705 }, { "epoch": 2.89041263621893, "grad_norm": 2.8877980709075928, "learning_rate": 1.4791671349926837e-05, "loss": 0.6331, "step": 17706 }, { "epoch": 2.890575894861434, "grad_norm": 2.61301326751709, "learning_rate": 1.4791108595127427e-05, "loss": 0.5126, "step": 17707 }, { "epoch": 2.8907391535039384, "grad_norm": 3.1723382472991943, "learning_rate": 1.4790545820633585e-05, "loss": 0.6834, "step": 17708 }, { "epoch": 2.890902412146443, "grad_norm": 3.115917205810547, "learning_rate": 1.4789983026447612e-05, "loss": 0.6621, "step": 17709 }, { "epoch": 2.8910656707889473, "grad_norm": 3.030592441558838, "learning_rate": 1.478942021257183e-05, "loss": 0.602, "step": 17710 }, { "epoch": 2.8912289294314517, "grad_norm": 2.7795865535736084, "learning_rate": 1.4788857379008547e-05, "loss": 0.5611, "step": 17711 }, { "epoch": 2.891392188073956, "grad_norm": 2.76413631439209, "learning_rate": 1.4788294525760083e-05, "loss": 0.5383, "step": 17712 }, { "epoch": 2.8915554467164606, "grad_norm": 3.1453073024749756, "learning_rate": 1.4787731652828747e-05, "loss": 0.5828, "step": 17713 }, { "epoch": 2.891718705358965, "grad_norm": 3.463864803314209, "learning_rate": 1.4787168760216855e-05, "loss": 0.6959, "step": 17714 }, { "epoch": 2.8918819640014695, "grad_norm": 2.7512965202331543, "learning_rate": 1.4786605847926718e-05, "loss": 0.6576, "step": 17715 }, { "epoch": 2.892045222643974, "grad_norm": 2.263604164123535, "learning_rate": 1.4786042915960655e-05, "loss": 0.4767, "step": 17716 }, { "epoch": 2.8922084812864783, "grad_norm": 2.8086040019989014, "learning_rate": 1.4785479964320973e-05, "loss": 0.4923, "step": 17717 }, { "epoch": 2.8923717399289823, "grad_norm": 3.239274024963379, "learning_rate": 1.478491699300999e-05, "loss": 0.6175, "step": 17718 }, { "epoch": 2.8925349985714868, "grad_norm": 3.022045612335205, "learning_rate": 1.4784354002030024e-05, "loss": 0.6285, "step": 17719 }, { "epoch": 2.892698257213991, "grad_norm": 2.8994219303131104, "learning_rate": 1.478379099138338e-05, "loss": 0.6331, "step": 17720 }, { "epoch": 2.8928615158564956, "grad_norm": 2.969170570373535, "learning_rate": 1.478322796107238e-05, "loss": 0.6734, "step": 17721 }, { "epoch": 2.893024774499, "grad_norm": 2.6714928150177, "learning_rate": 1.4782664911099336e-05, "loss": 0.5642, "step": 17722 }, { "epoch": 2.8931880331415045, "grad_norm": 3.0445306301116943, "learning_rate": 1.4782101841466563e-05, "loss": 0.6108, "step": 17723 }, { "epoch": 2.893351291784009, "grad_norm": 3.2251930236816406, "learning_rate": 1.4781538752176377e-05, "loss": 0.6092, "step": 17724 }, { "epoch": 2.893514550426513, "grad_norm": 3.0620410442352295, "learning_rate": 1.4780975643231089e-05, "loss": 0.5359, "step": 17725 }, { "epoch": 2.8936778090690174, "grad_norm": 2.8560519218444824, "learning_rate": 1.4780412514633012e-05, "loss": 0.5998, "step": 17726 }, { "epoch": 2.893841067711522, "grad_norm": 2.758812665939331, "learning_rate": 1.4779849366384467e-05, "loss": 0.4987, "step": 17727 }, { "epoch": 2.8940043263540263, "grad_norm": 2.3580617904663086, "learning_rate": 1.4779286198487766e-05, "loss": 0.4423, "step": 17728 }, { "epoch": 2.8941675849965307, "grad_norm": 2.521869659423828, "learning_rate": 1.4778723010945223e-05, "loss": 0.491, "step": 17729 }, { "epoch": 2.894330843639035, "grad_norm": 2.9847259521484375, "learning_rate": 1.4778159803759159e-05, "loss": 0.5347, "step": 17730 }, { "epoch": 2.8944941022815396, "grad_norm": 3.055663824081421, "learning_rate": 1.4777596576931877e-05, "loss": 0.6118, "step": 17731 }, { "epoch": 2.894657360924044, "grad_norm": 2.4522864818573, "learning_rate": 1.4777033330465703e-05, "loss": 0.4563, "step": 17732 }, { "epoch": 2.8948206195665485, "grad_norm": 2.9675145149230957, "learning_rate": 1.4776470064362947e-05, "loss": 0.5207, "step": 17733 }, { "epoch": 2.894983878209053, "grad_norm": 3.009631633758545, "learning_rate": 1.477590677862593e-05, "loss": 0.6077, "step": 17734 }, { "epoch": 2.8951471368515573, "grad_norm": 3.144859790802002, "learning_rate": 1.4775343473256962e-05, "loss": 0.5964, "step": 17735 }, { "epoch": 2.8953103954940613, "grad_norm": 3.1626133918762207, "learning_rate": 1.4774780148258359e-05, "loss": 0.5666, "step": 17736 }, { "epoch": 2.8954736541365658, "grad_norm": 2.4288156032562256, "learning_rate": 1.4774216803632435e-05, "loss": 0.468, "step": 17737 }, { "epoch": 2.89563691277907, "grad_norm": 3.133401870727539, "learning_rate": 1.4773653439381513e-05, "loss": 0.6073, "step": 17738 }, { "epoch": 2.8958001714215746, "grad_norm": 2.984757423400879, "learning_rate": 1.4773090055507904e-05, "loss": 0.573, "step": 17739 }, { "epoch": 2.895963430064079, "grad_norm": 3.2792842388153076, "learning_rate": 1.4772526652013924e-05, "loss": 0.6516, "step": 17740 }, { "epoch": 2.8961266887065835, "grad_norm": 3.0926363468170166, "learning_rate": 1.4771963228901886e-05, "loss": 0.6529, "step": 17741 }, { "epoch": 2.896289947349088, "grad_norm": 3.0209438800811768, "learning_rate": 1.477139978617411e-05, "loss": 0.6364, "step": 17742 }, { "epoch": 2.896453205991592, "grad_norm": 2.894688367843628, "learning_rate": 1.4770836323832911e-05, "loss": 0.4924, "step": 17743 }, { "epoch": 2.8966164646340964, "grad_norm": 3.1886589527130127, "learning_rate": 1.4770272841880608e-05, "loss": 0.669, "step": 17744 }, { "epoch": 2.896779723276601, "grad_norm": 2.818533182144165, "learning_rate": 1.4769709340319513e-05, "loss": 0.5361, "step": 17745 }, { "epoch": 2.8969429819191053, "grad_norm": 2.8913252353668213, "learning_rate": 1.4769145819151945e-05, "loss": 0.5406, "step": 17746 }, { "epoch": 2.8971062405616097, "grad_norm": 2.7037625312805176, "learning_rate": 1.4768582278380218e-05, "loss": 0.5307, "step": 17747 }, { "epoch": 2.897269499204114, "grad_norm": 2.8244192600250244, "learning_rate": 1.4768018718006647e-05, "loss": 0.5876, "step": 17748 }, { "epoch": 2.8974327578466186, "grad_norm": 3.465488910675049, "learning_rate": 1.4767455138033557e-05, "loss": 0.6987, "step": 17749 }, { "epoch": 2.897596016489123, "grad_norm": 2.9353575706481934, "learning_rate": 1.4766891538463255e-05, "loss": 0.4885, "step": 17750 }, { "epoch": 2.8977592751316275, "grad_norm": 2.8436272144317627, "learning_rate": 1.4766327919298065e-05, "loss": 0.4958, "step": 17751 }, { "epoch": 2.897922533774132, "grad_norm": 2.803889036178589, "learning_rate": 1.4765764280540297e-05, "loss": 0.5627, "step": 17752 }, { "epoch": 2.8980857924166363, "grad_norm": 3.1558456420898438, "learning_rate": 1.4765200622192275e-05, "loss": 0.6116, "step": 17753 }, { "epoch": 2.8982490510591403, "grad_norm": 3.4284892082214355, "learning_rate": 1.476463694425631e-05, "loss": 0.679, "step": 17754 }, { "epoch": 2.8984123097016448, "grad_norm": 3.152676820755005, "learning_rate": 1.4764073246734722e-05, "loss": 0.5108, "step": 17755 }, { "epoch": 2.898575568344149, "grad_norm": 2.9255001544952393, "learning_rate": 1.4763509529629829e-05, "loss": 0.6561, "step": 17756 }, { "epoch": 2.8987388269866536, "grad_norm": 3.3986525535583496, "learning_rate": 1.4762945792943946e-05, "loss": 0.6418, "step": 17757 }, { "epoch": 2.898902085629158, "grad_norm": 3.2120344638824463, "learning_rate": 1.4762382036679393e-05, "loss": 0.6272, "step": 17758 }, { "epoch": 2.8990653442716625, "grad_norm": 2.5344398021698, "learning_rate": 1.4761818260838486e-05, "loss": 0.5118, "step": 17759 }, { "epoch": 2.8992286029141665, "grad_norm": 2.240532398223877, "learning_rate": 1.4761254465423538e-05, "loss": 0.4809, "step": 17760 }, { "epoch": 2.899391861556671, "grad_norm": 2.5393576622009277, "learning_rate": 1.4760690650436873e-05, "loss": 0.5113, "step": 17761 }, { "epoch": 2.8995551201991754, "grad_norm": 2.4695279598236084, "learning_rate": 1.4760126815880806e-05, "loss": 0.4906, "step": 17762 }, { "epoch": 2.89971837884168, "grad_norm": 3.2191028594970703, "learning_rate": 1.4759562961757659e-05, "loss": 0.6348, "step": 17763 }, { "epoch": 2.8998816374841843, "grad_norm": 3.2203454971313477, "learning_rate": 1.475899908806974e-05, "loss": 0.6149, "step": 17764 }, { "epoch": 2.9000448961266887, "grad_norm": 3.053041934967041, "learning_rate": 1.4758435194819375e-05, "loss": 0.548, "step": 17765 }, { "epoch": 2.900208154769193, "grad_norm": 2.809368848800659, "learning_rate": 1.4757871282008882e-05, "loss": 0.5669, "step": 17766 }, { "epoch": 2.9003714134116976, "grad_norm": 2.89410138130188, "learning_rate": 1.4757307349640573e-05, "loss": 0.8689, "step": 17767 }, { "epoch": 2.900534672054202, "grad_norm": 2.698002815246582, "learning_rate": 1.4756743397716773e-05, "loss": 0.5091, "step": 17768 }, { "epoch": 2.9006979306967065, "grad_norm": 2.701171875, "learning_rate": 1.4756179426239797e-05, "loss": 0.4882, "step": 17769 }, { "epoch": 2.900861189339211, "grad_norm": 3.0893735885620117, "learning_rate": 1.4755615435211958e-05, "loss": 0.5886, "step": 17770 }, { "epoch": 2.901024447981715, "grad_norm": 2.50282621383667, "learning_rate": 1.4755051424635585e-05, "loss": 0.5178, "step": 17771 }, { "epoch": 2.9011877066242193, "grad_norm": 3.295369863510132, "learning_rate": 1.4754487394512988e-05, "loss": 0.6528, "step": 17772 }, { "epoch": 2.9013509652667238, "grad_norm": 3.791963815689087, "learning_rate": 1.4753923344846492e-05, "loss": 0.7355, "step": 17773 }, { "epoch": 2.901514223909228, "grad_norm": 2.7029666900634766, "learning_rate": 1.4753359275638412e-05, "loss": 0.4781, "step": 17774 }, { "epoch": 2.9016774825517326, "grad_norm": 3.1351122856140137, "learning_rate": 1.4752795186891062e-05, "loss": 0.6231, "step": 17775 }, { "epoch": 2.901840741194237, "grad_norm": 3.0486223697662354, "learning_rate": 1.4752231078606769e-05, "loss": 0.637, "step": 17776 }, { "epoch": 2.9020039998367415, "grad_norm": 2.6708006858825684, "learning_rate": 1.4751666950787848e-05, "loss": 0.5518, "step": 17777 }, { "epoch": 2.9021672584792455, "grad_norm": 3.328986644744873, "learning_rate": 1.4751102803436621e-05, "loss": 0.8615, "step": 17778 }, { "epoch": 2.90233051712175, "grad_norm": 2.992352247238159, "learning_rate": 1.47505386365554e-05, "loss": 0.6055, "step": 17779 }, { "epoch": 2.9024937757642544, "grad_norm": 2.90804123878479, "learning_rate": 1.4749974450146512e-05, "loss": 0.5743, "step": 17780 }, { "epoch": 2.902657034406759, "grad_norm": 2.529435157775879, "learning_rate": 1.4749410244212267e-05, "loss": 0.5513, "step": 17781 }, { "epoch": 2.9028202930492633, "grad_norm": 2.238703727722168, "learning_rate": 1.4748846018754996e-05, "loss": 0.4562, "step": 17782 }, { "epoch": 2.9029835516917677, "grad_norm": 2.9677584171295166, "learning_rate": 1.4748281773777012e-05, "loss": 0.6631, "step": 17783 }, { "epoch": 2.903146810334272, "grad_norm": 2.983144760131836, "learning_rate": 1.474771750928063e-05, "loss": 0.5354, "step": 17784 }, { "epoch": 2.9033100689767766, "grad_norm": 2.620819091796875, "learning_rate": 1.4747153225268178e-05, "loss": 0.5573, "step": 17785 }, { "epoch": 2.903473327619281, "grad_norm": 2.9555232524871826, "learning_rate": 1.474658892174197e-05, "loss": 0.5795, "step": 17786 }, { "epoch": 2.9036365862617854, "grad_norm": 3.0469870567321777, "learning_rate": 1.4746024598704325e-05, "loss": 0.6175, "step": 17787 }, { "epoch": 2.90379984490429, "grad_norm": 2.752802848815918, "learning_rate": 1.474546025615757e-05, "loss": 0.5482, "step": 17788 }, { "epoch": 2.903963103546794, "grad_norm": 3.2436676025390625, "learning_rate": 1.4744895894104016e-05, "loss": 0.6159, "step": 17789 }, { "epoch": 2.9041263621892983, "grad_norm": 2.6488444805145264, "learning_rate": 1.4744331512545988e-05, "loss": 0.5609, "step": 17790 }, { "epoch": 2.9042896208318028, "grad_norm": 3.1131298542022705, "learning_rate": 1.4743767111485805e-05, "loss": 0.6516, "step": 17791 }, { "epoch": 2.904452879474307, "grad_norm": 2.7174408435821533, "learning_rate": 1.4743202690925782e-05, "loss": 0.4956, "step": 17792 }, { "epoch": 2.9046161381168116, "grad_norm": 3.0394375324249268, "learning_rate": 1.4742638250868251e-05, "loss": 0.6257, "step": 17793 }, { "epoch": 2.904779396759316, "grad_norm": 3.134793281555176, "learning_rate": 1.4742073791315522e-05, "loss": 0.6833, "step": 17794 }, { "epoch": 2.90494265540182, "grad_norm": 2.6963610649108887, "learning_rate": 1.4741509312269916e-05, "loss": 0.5661, "step": 17795 }, { "epoch": 2.9051059140443245, "grad_norm": 2.786445140838623, "learning_rate": 1.4740944813733758e-05, "loss": 0.5247, "step": 17796 }, { "epoch": 2.905269172686829, "grad_norm": 2.849886417388916, "learning_rate": 1.4740380295709365e-05, "loss": 0.584, "step": 17797 }, { "epoch": 2.9054324313293334, "grad_norm": 2.7139859199523926, "learning_rate": 1.473981575819906e-05, "loss": 0.5028, "step": 17798 }, { "epoch": 2.905595689971838, "grad_norm": 2.4999232292175293, "learning_rate": 1.473925120120516e-05, "loss": 0.5317, "step": 17799 }, { "epoch": 2.9057589486143423, "grad_norm": 2.4791178703308105, "learning_rate": 1.4738686624729987e-05, "loss": 0.5472, "step": 17800 }, { "epoch": 2.9059222072568467, "grad_norm": 3.145883321762085, "learning_rate": 1.4738122028775865e-05, "loss": 0.5246, "step": 17801 }, { "epoch": 2.906085465899351, "grad_norm": 2.766200542449951, "learning_rate": 1.4737557413345112e-05, "loss": 0.5695, "step": 17802 }, { "epoch": 2.9062487245418556, "grad_norm": 2.989347219467163, "learning_rate": 1.473699277844005e-05, "loss": 0.6606, "step": 17803 }, { "epoch": 2.90641198318436, "grad_norm": 2.748359203338623, "learning_rate": 1.4736428124062998e-05, "loss": 0.4775, "step": 17804 }, { "epoch": 2.9065752418268644, "grad_norm": 3.0371835231781006, "learning_rate": 1.473586345021628e-05, "loss": 0.7023, "step": 17805 }, { "epoch": 2.9067385004693684, "grad_norm": 3.2086021900177, "learning_rate": 1.4735298756902215e-05, "loss": 0.5941, "step": 17806 }, { "epoch": 2.906901759111873, "grad_norm": 2.5082545280456543, "learning_rate": 1.4734734044123123e-05, "loss": 0.4713, "step": 17807 }, { "epoch": 2.9070650177543773, "grad_norm": 2.92170786857605, "learning_rate": 1.4734169311881328e-05, "loss": 0.6362, "step": 17808 }, { "epoch": 2.9072282763968817, "grad_norm": 2.7147672176361084, "learning_rate": 1.4733604560179151e-05, "loss": 0.6042, "step": 17809 }, { "epoch": 2.907391535039386, "grad_norm": 2.531022071838379, "learning_rate": 1.4733039789018913e-05, "loss": 0.5204, "step": 17810 }, { "epoch": 2.9075547936818906, "grad_norm": 2.749372959136963, "learning_rate": 1.4732474998402935e-05, "loss": 0.529, "step": 17811 }, { "epoch": 2.907718052324395, "grad_norm": 3.0748698711395264, "learning_rate": 1.4731910188333542e-05, "loss": 0.6201, "step": 17812 }, { "epoch": 2.907881310966899, "grad_norm": 3.0532195568084717, "learning_rate": 1.4731345358813052e-05, "loss": 0.5885, "step": 17813 }, { "epoch": 2.9080445696094035, "grad_norm": 2.5720911026000977, "learning_rate": 1.4730780509843783e-05, "loss": 0.5261, "step": 17814 }, { "epoch": 2.908207828251908, "grad_norm": 2.79765248298645, "learning_rate": 1.4730215641428065e-05, "loss": 0.531, "step": 17815 }, { "epoch": 2.9083710868944124, "grad_norm": 3.3835835456848145, "learning_rate": 1.4729650753568219e-05, "loss": 0.7176, "step": 17816 }, { "epoch": 2.908534345536917, "grad_norm": 2.9785706996917725, "learning_rate": 1.4729085846266564e-05, "loss": 0.5695, "step": 17817 }, { "epoch": 2.9086976041794212, "grad_norm": 3.2111563682556152, "learning_rate": 1.4728520919525421e-05, "loss": 0.6895, "step": 17818 }, { "epoch": 2.9088608628219257, "grad_norm": 2.632215738296509, "learning_rate": 1.4727955973347112e-05, "loss": 0.5613, "step": 17819 }, { "epoch": 2.90902412146443, "grad_norm": 2.8025295734405518, "learning_rate": 1.4727391007733962e-05, "loss": 0.6007, "step": 17820 }, { "epoch": 2.9091873801069346, "grad_norm": 2.9054596424102783, "learning_rate": 1.4726826022688295e-05, "loss": 0.5763, "step": 17821 }, { "epoch": 2.909350638749439, "grad_norm": 2.965096950531006, "learning_rate": 1.4726261018212431e-05, "loss": 0.5714, "step": 17822 }, { "epoch": 2.9095138973919434, "grad_norm": 2.915497303009033, "learning_rate": 1.4725695994308694e-05, "loss": 0.6318, "step": 17823 }, { "epoch": 2.9096771560344474, "grad_norm": 2.864168167114258, "learning_rate": 1.47251309509794e-05, "loss": 0.6339, "step": 17824 }, { "epoch": 2.909840414676952, "grad_norm": 2.984680414199829, "learning_rate": 1.4724565888226879e-05, "loss": 0.5923, "step": 17825 }, { "epoch": 2.9100036733194563, "grad_norm": 2.997684955596924, "learning_rate": 1.4724000806053453e-05, "loss": 0.6251, "step": 17826 }, { "epoch": 2.9101669319619607, "grad_norm": 2.6781156063079834, "learning_rate": 1.4723435704461443e-05, "loss": 0.5026, "step": 17827 }, { "epoch": 2.910330190604465, "grad_norm": 2.445364475250244, "learning_rate": 1.4722870583453173e-05, "loss": 0.4733, "step": 17828 }, { "epoch": 2.9104934492469696, "grad_norm": 3.1077170372009277, "learning_rate": 1.4722305443030964e-05, "loss": 0.5757, "step": 17829 }, { "epoch": 2.910656707889474, "grad_norm": 3.4674882888793945, "learning_rate": 1.472174028319714e-05, "loss": 0.9826, "step": 17830 }, { "epoch": 2.910819966531978, "grad_norm": 3.4114491939544678, "learning_rate": 1.4721175103954025e-05, "loss": 0.5215, "step": 17831 }, { "epoch": 2.9109832251744825, "grad_norm": 2.926436424255371, "learning_rate": 1.4720609905303944e-05, "loss": 0.5798, "step": 17832 }, { "epoch": 2.911146483816987, "grad_norm": 3.070335865020752, "learning_rate": 1.4720044687249218e-05, "loss": 0.5409, "step": 17833 }, { "epoch": 2.9113097424594914, "grad_norm": 3.0951826572418213, "learning_rate": 1.4719479449792167e-05, "loss": 0.7117, "step": 17834 }, { "epoch": 2.911473001101996, "grad_norm": 2.9647669792175293, "learning_rate": 1.471891419293512e-05, "loss": 0.5637, "step": 17835 }, { "epoch": 2.9116362597445002, "grad_norm": 3.092823028564453, "learning_rate": 1.4718348916680396e-05, "loss": 0.639, "step": 17836 }, { "epoch": 2.9117995183870047, "grad_norm": 2.6146152019500732, "learning_rate": 1.4717783621030325e-05, "loss": 0.4348, "step": 17837 }, { "epoch": 2.911962777029509, "grad_norm": 2.3829023838043213, "learning_rate": 1.4717218305987226e-05, "loss": 0.4522, "step": 17838 }, { "epoch": 2.9121260356720136, "grad_norm": 2.97463059425354, "learning_rate": 1.4716652971553424e-05, "loss": 0.671, "step": 17839 }, { "epoch": 2.912289294314518, "grad_norm": 2.8996379375457764, "learning_rate": 1.4716087617731243e-05, "loss": 0.6013, "step": 17840 }, { "epoch": 2.9124525529570224, "grad_norm": 3.105337381362915, "learning_rate": 1.4715522244523006e-05, "loss": 0.5312, "step": 17841 }, { "epoch": 2.9126158115995264, "grad_norm": 2.747220516204834, "learning_rate": 1.4714956851931037e-05, "loss": 0.6086, "step": 17842 }, { "epoch": 2.912779070242031, "grad_norm": 2.4073047637939453, "learning_rate": 1.471439143995766e-05, "loss": 0.4774, "step": 17843 }, { "epoch": 2.9129423288845353, "grad_norm": 2.4681551456451416, "learning_rate": 1.4713826008605202e-05, "loss": 0.4849, "step": 17844 }, { "epoch": 2.9131055875270397, "grad_norm": 2.7047743797302246, "learning_rate": 1.4713260557875983e-05, "loss": 0.5561, "step": 17845 }, { "epoch": 2.913268846169544, "grad_norm": 2.869823932647705, "learning_rate": 1.471269508777233e-05, "loss": 0.559, "step": 17846 }, { "epoch": 2.9134321048120486, "grad_norm": 2.685889959335327, "learning_rate": 1.4712129598296568e-05, "loss": 0.5763, "step": 17847 }, { "epoch": 2.9135953634545526, "grad_norm": 3.1112945079803467, "learning_rate": 1.4711564089451021e-05, "loss": 0.6092, "step": 17848 }, { "epoch": 2.913758622097057, "grad_norm": 2.9415900707244873, "learning_rate": 1.4710998561238012e-05, "loss": 0.5013, "step": 17849 }, { "epoch": 2.9139218807395615, "grad_norm": 2.6759612560272217, "learning_rate": 1.4710433013659866e-05, "loss": 0.4922, "step": 17850 }, { "epoch": 2.914085139382066, "grad_norm": 2.674996852874756, "learning_rate": 1.4709867446718911e-05, "loss": 0.5642, "step": 17851 }, { "epoch": 2.9142483980245704, "grad_norm": 2.861839771270752, "learning_rate": 1.4709301860417466e-05, "loss": 0.6053, "step": 17852 }, { "epoch": 2.914411656667075, "grad_norm": 2.7716267108917236, "learning_rate": 1.470873625475786e-05, "loss": 0.5924, "step": 17853 }, { "epoch": 2.9145749153095792, "grad_norm": 3.002053737640381, "learning_rate": 1.470817062974242e-05, "loss": 0.6016, "step": 17854 }, { "epoch": 2.9147381739520837, "grad_norm": 2.4679605960845947, "learning_rate": 1.4707604985373467e-05, "loss": 0.496, "step": 17855 }, { "epoch": 2.914901432594588, "grad_norm": 2.6349358558654785, "learning_rate": 1.470703932165333e-05, "loss": 0.4767, "step": 17856 }, { "epoch": 2.9150646912370926, "grad_norm": 3.243968963623047, "learning_rate": 1.4706473638584326e-05, "loss": 0.6561, "step": 17857 }, { "epoch": 2.915227949879597, "grad_norm": 2.5491480827331543, "learning_rate": 1.470590793616879e-05, "loss": 0.5531, "step": 17858 }, { "epoch": 2.915391208522101, "grad_norm": 2.499891519546509, "learning_rate": 1.470534221440904e-05, "loss": 0.5357, "step": 17859 }, { "epoch": 2.9155544671646054, "grad_norm": 3.056648015975952, "learning_rate": 1.4704776473307408e-05, "loss": 0.568, "step": 17860 }, { "epoch": 2.91571772580711, "grad_norm": 2.8392295837402344, "learning_rate": 1.4704210712866216e-05, "loss": 0.6343, "step": 17861 }, { "epoch": 2.9158809844496143, "grad_norm": 2.6250662803649902, "learning_rate": 1.4703644933087791e-05, "loss": 0.4909, "step": 17862 }, { "epoch": 2.9160442430921187, "grad_norm": 2.5198144912719727, "learning_rate": 1.4703079133974453e-05, "loss": 0.5439, "step": 17863 }, { "epoch": 2.916207501734623, "grad_norm": 2.706022262573242, "learning_rate": 1.4702513315528535e-05, "loss": 0.5372, "step": 17864 }, { "epoch": 2.9163707603771276, "grad_norm": 2.534353494644165, "learning_rate": 1.4701947477752363e-05, "loss": 0.541, "step": 17865 }, { "epoch": 2.9165340190196316, "grad_norm": 2.814833402633667, "learning_rate": 1.470138162064826e-05, "loss": 0.6055, "step": 17866 }, { "epoch": 2.916697277662136, "grad_norm": 2.7576394081115723, "learning_rate": 1.4700815744218551e-05, "loss": 0.5205, "step": 17867 }, { "epoch": 2.9168605363046405, "grad_norm": 2.731297016143799, "learning_rate": 1.4700249848465562e-05, "loss": 0.5338, "step": 17868 }, { "epoch": 2.917023794947145, "grad_norm": 2.761075019836426, "learning_rate": 1.469968393339162e-05, "loss": 0.6035, "step": 17869 }, { "epoch": 2.9171870535896494, "grad_norm": 3.2529406547546387, "learning_rate": 1.4699117998999055e-05, "loss": 0.74, "step": 17870 }, { "epoch": 2.917350312232154, "grad_norm": 2.835223913192749, "learning_rate": 1.4698552045290192e-05, "loss": 0.609, "step": 17871 }, { "epoch": 2.9175135708746582, "grad_norm": 2.055347204208374, "learning_rate": 1.4697986072267352e-05, "loss": 0.5012, "step": 17872 }, { "epoch": 2.9176768295171627, "grad_norm": 2.3810153007507324, "learning_rate": 1.4697420079932865e-05, "loss": 0.4448, "step": 17873 }, { "epoch": 2.917840088159667, "grad_norm": 3.3601856231689453, "learning_rate": 1.4696854068289059e-05, "loss": 0.7165, "step": 17874 }, { "epoch": 2.9180033468021715, "grad_norm": 2.607428789138794, "learning_rate": 1.4696288037338258e-05, "loss": 0.4943, "step": 17875 }, { "epoch": 2.918166605444676, "grad_norm": 2.3964877128601074, "learning_rate": 1.4695721987082792e-05, "loss": 0.5078, "step": 17876 }, { "epoch": 2.91832986408718, "grad_norm": 3.1132586002349854, "learning_rate": 1.4695155917524986e-05, "loss": 0.5867, "step": 17877 }, { "epoch": 2.9184931227296844, "grad_norm": 2.5397047996520996, "learning_rate": 1.4694589828667167e-05, "loss": 0.5759, "step": 17878 }, { "epoch": 2.918656381372189, "grad_norm": 2.6959476470947266, "learning_rate": 1.4694023720511661e-05, "loss": 0.5601, "step": 17879 }, { "epoch": 2.9188196400146933, "grad_norm": 2.5473780632019043, "learning_rate": 1.4693457593060796e-05, "loss": 0.5638, "step": 17880 }, { "epoch": 2.9189828986571977, "grad_norm": 3.11140513420105, "learning_rate": 1.46928914463169e-05, "loss": 0.6182, "step": 17881 }, { "epoch": 2.919146157299702, "grad_norm": 2.7911477088928223, "learning_rate": 1.4692325280282298e-05, "loss": 0.5924, "step": 17882 }, { "epoch": 2.919309415942206, "grad_norm": 2.2773571014404297, "learning_rate": 1.4691759094959319e-05, "loss": 0.4641, "step": 17883 }, { "epoch": 2.9194726745847106, "grad_norm": 3.177743434906006, "learning_rate": 1.4691192890350289e-05, "loss": 0.6099, "step": 17884 }, { "epoch": 2.919635933227215, "grad_norm": 2.9012444019317627, "learning_rate": 1.4690626666457536e-05, "loss": 0.5322, "step": 17885 }, { "epoch": 2.9197991918697195, "grad_norm": 2.606182098388672, "learning_rate": 1.469006042328339e-05, "loss": 0.4688, "step": 17886 }, { "epoch": 2.919962450512224, "grad_norm": 3.133094310760498, "learning_rate": 1.4689494160830175e-05, "loss": 0.5823, "step": 17887 }, { "epoch": 2.9201257091547284, "grad_norm": 2.6799066066741943, "learning_rate": 1.468892787910022e-05, "loss": 0.539, "step": 17888 }, { "epoch": 2.920288967797233, "grad_norm": 2.51285982131958, "learning_rate": 1.4688361578095856e-05, "loss": 0.4891, "step": 17889 }, { "epoch": 2.9204522264397372, "grad_norm": 2.6680407524108887, "learning_rate": 1.4687795257819406e-05, "loss": 0.4979, "step": 17890 }, { "epoch": 2.9206154850822417, "grad_norm": 3.122021198272705, "learning_rate": 1.46872289182732e-05, "loss": 0.6916, "step": 17891 }, { "epoch": 2.920778743724746, "grad_norm": 3.020662307739258, "learning_rate": 1.4686662559459563e-05, "loss": 0.5519, "step": 17892 }, { "epoch": 2.9209420023672505, "grad_norm": 2.785508871078491, "learning_rate": 1.4686096181380829e-05, "loss": 0.5612, "step": 17893 }, { "epoch": 2.9211052610097545, "grad_norm": 3.032397508621216, "learning_rate": 1.4685529784039323e-05, "loss": 0.4811, "step": 17894 }, { "epoch": 2.921268519652259, "grad_norm": 3.0993800163269043, "learning_rate": 1.4684963367437373e-05, "loss": 0.5915, "step": 17895 }, { "epoch": 2.9214317782947634, "grad_norm": 3.073004722595215, "learning_rate": 1.4684396931577307e-05, "loss": 0.5363, "step": 17896 }, { "epoch": 2.921595036937268, "grad_norm": 3.6775732040405273, "learning_rate": 1.4683830476461454e-05, "loss": 0.7785, "step": 17897 }, { "epoch": 2.9217582955797723, "grad_norm": 3.277874231338501, "learning_rate": 1.4683264002092143e-05, "loss": 0.6673, "step": 17898 }, { "epoch": 2.9219215542222767, "grad_norm": 2.596747875213623, "learning_rate": 1.4682697508471702e-05, "loss": 0.4979, "step": 17899 }, { "epoch": 2.922084812864781, "grad_norm": 3.28585147857666, "learning_rate": 1.468213099560246e-05, "loss": 0.6388, "step": 17900 }, { "epoch": 2.922248071507285, "grad_norm": 3.320909261703491, "learning_rate": 1.4681564463486745e-05, "loss": 0.6319, "step": 17901 }, { "epoch": 2.9224113301497896, "grad_norm": 3.1111900806427, "learning_rate": 1.4680997912126884e-05, "loss": 0.482, "step": 17902 }, { "epoch": 2.922574588792294, "grad_norm": 3.2171273231506348, "learning_rate": 1.468043134152521e-05, "loss": 0.5759, "step": 17903 }, { "epoch": 2.9227378474347985, "grad_norm": 2.866323947906494, "learning_rate": 1.4679864751684052e-05, "loss": 0.6764, "step": 17904 }, { "epoch": 2.922901106077303, "grad_norm": 2.882688522338867, "learning_rate": 1.4679298142605735e-05, "loss": 0.4911, "step": 17905 }, { "epoch": 2.9230643647198074, "grad_norm": 2.6832306385040283, "learning_rate": 1.4678731514292592e-05, "loss": 0.5007, "step": 17906 }, { "epoch": 2.923227623362312, "grad_norm": 3.0423898696899414, "learning_rate": 1.4678164866746949e-05, "loss": 0.5824, "step": 17907 }, { "epoch": 2.9233908820048162, "grad_norm": 2.979447364807129, "learning_rate": 1.4677598199971134e-05, "loss": 0.5666, "step": 17908 }, { "epoch": 2.9235541406473207, "grad_norm": 2.814373254776001, "learning_rate": 1.4677031513967483e-05, "loss": 0.5996, "step": 17909 }, { "epoch": 2.923717399289825, "grad_norm": 2.5335888862609863, "learning_rate": 1.4676464808738322e-05, "loss": 0.5653, "step": 17910 }, { "epoch": 2.9238806579323295, "grad_norm": 2.501638650894165, "learning_rate": 1.4675898084285978e-05, "loss": 0.4895, "step": 17911 }, { "epoch": 2.9240439165748335, "grad_norm": 3.143890142440796, "learning_rate": 1.4675331340612781e-05, "loss": 0.5026, "step": 17912 }, { "epoch": 2.924207175217338, "grad_norm": 3.133315086364746, "learning_rate": 1.4674764577721063e-05, "loss": 0.6496, "step": 17913 }, { "epoch": 2.9243704338598424, "grad_norm": 2.933445453643799, "learning_rate": 1.4674197795613154e-05, "loss": 0.5312, "step": 17914 }, { "epoch": 2.924533692502347, "grad_norm": 3.2295279502868652, "learning_rate": 1.4673630994291385e-05, "loss": 0.5847, "step": 17915 }, { "epoch": 2.9246969511448513, "grad_norm": 3.008631706237793, "learning_rate": 1.467306417375808e-05, "loss": 0.6692, "step": 17916 }, { "epoch": 2.9248602097873557, "grad_norm": 2.395327568054199, "learning_rate": 1.4672497334015575e-05, "loss": 0.4639, "step": 17917 }, { "epoch": 2.92502346842986, "grad_norm": 2.6508564949035645, "learning_rate": 1.4671930475066195e-05, "loss": 0.492, "step": 17918 }, { "epoch": 2.925186727072364, "grad_norm": 3.116530179977417, "learning_rate": 1.4671363596912273e-05, "loss": 0.5828, "step": 17919 }, { "epoch": 2.9253499857148686, "grad_norm": 3.062741994857788, "learning_rate": 1.4670796699556145e-05, "loss": 0.5881, "step": 17920 }, { "epoch": 2.925513244357373, "grad_norm": 2.9610471725463867, "learning_rate": 1.4670229783000131e-05, "loss": 0.6154, "step": 17921 }, { "epoch": 2.9256765029998775, "grad_norm": 2.7945988178253174, "learning_rate": 1.4669662847246566e-05, "loss": 0.5073, "step": 17922 }, { "epoch": 2.925839761642382, "grad_norm": 2.8626067638397217, "learning_rate": 1.466909589229778e-05, "loss": 0.6077, "step": 17923 }, { "epoch": 2.9260030202848863, "grad_norm": 2.510556936264038, "learning_rate": 1.4668528918156103e-05, "loss": 0.5074, "step": 17924 }, { "epoch": 2.926166278927391, "grad_norm": 3.093799591064453, "learning_rate": 1.466796192482387e-05, "loss": 0.6909, "step": 17925 }, { "epoch": 2.9263295375698952, "grad_norm": 3.1163113117218018, "learning_rate": 1.4667394912303405e-05, "loss": 0.5824, "step": 17926 }, { "epoch": 2.9264927962123997, "grad_norm": 2.748629570007324, "learning_rate": 1.4666827880597043e-05, "loss": 0.5036, "step": 17927 }, { "epoch": 2.926656054854904, "grad_norm": 3.1041014194488525, "learning_rate": 1.4666260829707113e-05, "loss": 0.6147, "step": 17928 }, { "epoch": 2.9268193134974085, "grad_norm": 2.928739547729492, "learning_rate": 1.4665693759635946e-05, "loss": 0.5863, "step": 17929 }, { "epoch": 2.9269825721399125, "grad_norm": 2.447749376296997, "learning_rate": 1.4665126670385877e-05, "loss": 0.4943, "step": 17930 }, { "epoch": 2.927145830782417, "grad_norm": 3.124830484390259, "learning_rate": 1.4664559561959231e-05, "loss": 0.588, "step": 17931 }, { "epoch": 2.9273090894249214, "grad_norm": 2.3680660724639893, "learning_rate": 1.4663992434358343e-05, "loss": 0.5154, "step": 17932 }, { "epoch": 2.927472348067426, "grad_norm": 3.2760488986968994, "learning_rate": 1.4663425287585542e-05, "loss": 0.6624, "step": 17933 }, { "epoch": 2.9276356067099303, "grad_norm": 3.107743740081787, "learning_rate": 1.466285812164316e-05, "loss": 0.6581, "step": 17934 }, { "epoch": 2.9277988653524347, "grad_norm": 2.8622164726257324, "learning_rate": 1.466229093653353e-05, "loss": 0.5542, "step": 17935 }, { "epoch": 2.9279621239949387, "grad_norm": 2.7146060466766357, "learning_rate": 1.4661723732258984e-05, "loss": 0.5418, "step": 17936 }, { "epoch": 2.928125382637443, "grad_norm": 2.710397720336914, "learning_rate": 1.466115650882185e-05, "loss": 0.6012, "step": 17937 }, { "epoch": 2.9282886412799476, "grad_norm": 3.281315326690674, "learning_rate": 1.4660589266224463e-05, "loss": 0.7241, "step": 17938 }, { "epoch": 2.928451899922452, "grad_norm": 3.0075137615203857, "learning_rate": 1.4660022004469153e-05, "loss": 0.6676, "step": 17939 }, { "epoch": 2.9286151585649565, "grad_norm": 2.6349408626556396, "learning_rate": 1.4659454723558249e-05, "loss": 0.5199, "step": 17940 }, { "epoch": 2.928778417207461, "grad_norm": 3.388850688934326, "learning_rate": 1.4658887423494089e-05, "loss": 0.7538, "step": 17941 }, { "epoch": 2.9289416758499653, "grad_norm": 3.1973209381103516, "learning_rate": 1.4658320104279002e-05, "loss": 0.6882, "step": 17942 }, { "epoch": 2.92910493449247, "grad_norm": 2.57468318939209, "learning_rate": 1.4657752765915318e-05, "loss": 0.5214, "step": 17943 }, { "epoch": 2.929268193134974, "grad_norm": 2.392148494720459, "learning_rate": 1.4657185408405375e-05, "loss": 0.5346, "step": 17944 }, { "epoch": 2.9294314517774787, "grad_norm": 2.582336664199829, "learning_rate": 1.46566180317515e-05, "loss": 0.5147, "step": 17945 }, { "epoch": 2.929594710419983, "grad_norm": 2.295212745666504, "learning_rate": 1.4656050635956023e-05, "loss": 0.424, "step": 17946 }, { "epoch": 2.929757969062487, "grad_norm": 3.006674289703369, "learning_rate": 1.4655483221021283e-05, "loss": 0.6357, "step": 17947 }, { "epoch": 2.9299212277049915, "grad_norm": 3.081965923309326, "learning_rate": 1.465491578694961e-05, "loss": 0.613, "step": 17948 }, { "epoch": 2.930084486347496, "grad_norm": 3.3781800270080566, "learning_rate": 1.4654348333743335e-05, "loss": 0.5763, "step": 17949 }, { "epoch": 2.9302477449900004, "grad_norm": 2.309530735015869, "learning_rate": 1.4653780861404791e-05, "loss": 0.5236, "step": 17950 }, { "epoch": 2.930411003632505, "grad_norm": 2.5788683891296387, "learning_rate": 1.4653213369936311e-05, "loss": 0.4619, "step": 17951 }, { "epoch": 2.9305742622750093, "grad_norm": 2.927248954772949, "learning_rate": 1.4652645859340227e-05, "loss": 0.6379, "step": 17952 }, { "epoch": 2.9307375209175137, "grad_norm": 3.0739779472351074, "learning_rate": 1.4652078329618875e-05, "loss": 0.5255, "step": 17953 }, { "epoch": 2.9309007795600177, "grad_norm": 2.644430637359619, "learning_rate": 1.4651510780774585e-05, "loss": 0.4583, "step": 17954 }, { "epoch": 2.931064038202522, "grad_norm": 2.4389901161193848, "learning_rate": 1.4650943212809693e-05, "loss": 0.5073, "step": 17955 }, { "epoch": 2.9312272968450266, "grad_norm": 3.1494247913360596, "learning_rate": 1.4650375625726524e-05, "loss": 0.6046, "step": 17956 }, { "epoch": 2.931390555487531, "grad_norm": 2.762261390686035, "learning_rate": 1.4649808019527418e-05, "loss": 0.5276, "step": 17957 }, { "epoch": 2.9315538141300355, "grad_norm": 2.9431519508361816, "learning_rate": 1.4649240394214709e-05, "loss": 0.5964, "step": 17958 }, { "epoch": 2.93171707277254, "grad_norm": 2.7887556552886963, "learning_rate": 1.4648672749790728e-05, "loss": 0.5575, "step": 17959 }, { "epoch": 2.9318803314150443, "grad_norm": 2.674576759338379, "learning_rate": 1.4648105086257808e-05, "loss": 0.527, "step": 17960 }, { "epoch": 2.9320435900575488, "grad_norm": 2.629720687866211, "learning_rate": 1.4647537403618283e-05, "loss": 0.4574, "step": 17961 }, { "epoch": 2.932206848700053, "grad_norm": 2.4930992126464844, "learning_rate": 1.4646969701874488e-05, "loss": 0.5527, "step": 17962 }, { "epoch": 2.9323701073425577, "grad_norm": 2.724189043045044, "learning_rate": 1.4646401981028752e-05, "loss": 0.4957, "step": 17963 }, { "epoch": 2.932533365985062, "grad_norm": 2.6177539825439453, "learning_rate": 1.4645834241083416e-05, "loss": 0.5106, "step": 17964 }, { "epoch": 2.932696624627566, "grad_norm": 2.3317620754241943, "learning_rate": 1.464526648204081e-05, "loss": 0.4954, "step": 17965 }, { "epoch": 2.9328598832700705, "grad_norm": 2.910452127456665, "learning_rate": 1.4644698703903265e-05, "loss": 0.4874, "step": 17966 }, { "epoch": 2.933023141912575, "grad_norm": 2.891859292984009, "learning_rate": 1.4644130906673114e-05, "loss": 0.552, "step": 17967 }, { "epoch": 2.9331864005550794, "grad_norm": 2.9232277870178223, "learning_rate": 1.46435630903527e-05, "loss": 0.5362, "step": 17968 }, { "epoch": 2.933349659197584, "grad_norm": 2.9866902828216553, "learning_rate": 1.464299525494435e-05, "loss": 0.5416, "step": 17969 }, { "epoch": 2.9335129178400883, "grad_norm": 2.924025058746338, "learning_rate": 1.46424274004504e-05, "loss": 0.5231, "step": 17970 }, { "epoch": 2.9336761764825927, "grad_norm": 2.5918469429016113, "learning_rate": 1.4641859526873183e-05, "loss": 0.4579, "step": 17971 }, { "epoch": 2.9338394351250967, "grad_norm": 2.6461145877838135, "learning_rate": 1.4641291634215034e-05, "loss": 0.5151, "step": 17972 }, { "epoch": 2.934002693767601, "grad_norm": 3.137211561203003, "learning_rate": 1.4640723722478289e-05, "loss": 0.6012, "step": 17973 }, { "epoch": 2.9341659524101056, "grad_norm": 2.689979314804077, "learning_rate": 1.4640155791665281e-05, "loss": 0.5392, "step": 17974 }, { "epoch": 2.93432921105261, "grad_norm": 2.585836172103882, "learning_rate": 1.4639587841778342e-05, "loss": 0.4753, "step": 17975 }, { "epoch": 2.9344924696951145, "grad_norm": 2.993760585784912, "learning_rate": 1.463901987281981e-05, "loss": 0.5224, "step": 17976 }, { "epoch": 2.934655728337619, "grad_norm": 2.8949453830718994, "learning_rate": 1.4638451884792022e-05, "loss": 0.5625, "step": 17977 }, { "epoch": 2.9348189869801233, "grad_norm": 3.7433063983917236, "learning_rate": 1.4637883877697308e-05, "loss": 0.8793, "step": 17978 }, { "epoch": 2.9349822456226278, "grad_norm": 2.9581363201141357, "learning_rate": 1.4637315851538003e-05, "loss": 0.615, "step": 17979 }, { "epoch": 2.935145504265132, "grad_norm": 2.8461453914642334, "learning_rate": 1.4636747806316445e-05, "loss": 0.5382, "step": 17980 }, { "epoch": 2.9353087629076366, "grad_norm": 2.699542760848999, "learning_rate": 1.4636179742034968e-05, "loss": 0.5696, "step": 17981 }, { "epoch": 2.935472021550141, "grad_norm": 2.8177592754364014, "learning_rate": 1.4635611658695906e-05, "loss": 0.4961, "step": 17982 }, { "epoch": 2.935635280192645, "grad_norm": 2.806450366973877, "learning_rate": 1.4635043556301596e-05, "loss": 0.5801, "step": 17983 }, { "epoch": 2.9357985388351495, "grad_norm": 2.2919766902923584, "learning_rate": 1.4634475434854369e-05, "loss": 0.551, "step": 17984 }, { "epoch": 2.935961797477654, "grad_norm": 2.138485908508301, "learning_rate": 1.4633907294356566e-05, "loss": 0.4793, "step": 17985 }, { "epoch": 2.9361250561201584, "grad_norm": 3.0735151767730713, "learning_rate": 1.463333913481052e-05, "loss": 0.5361, "step": 17986 }, { "epoch": 2.936288314762663, "grad_norm": 2.4574692249298096, "learning_rate": 1.4632770956218565e-05, "loss": 0.5613, "step": 17987 }, { "epoch": 2.9364515734051673, "grad_norm": 2.6907365322113037, "learning_rate": 1.463220275858304e-05, "loss": 0.6566, "step": 17988 }, { "epoch": 2.9366148320476713, "grad_norm": 2.912411689758301, "learning_rate": 1.4631634541906279e-05, "loss": 0.652, "step": 17989 }, { "epoch": 2.9367780906901757, "grad_norm": 2.2731363773345947, "learning_rate": 1.4631066306190613e-05, "loss": 0.4884, "step": 17990 }, { "epoch": 2.93694134933268, "grad_norm": 3.204174280166626, "learning_rate": 1.4630498051438384e-05, "loss": 0.6699, "step": 17991 }, { "epoch": 2.9371046079751846, "grad_norm": 2.446859836578369, "learning_rate": 1.4629929777651928e-05, "loss": 0.4827, "step": 17992 }, { "epoch": 2.937267866617689, "grad_norm": 2.922034502029419, "learning_rate": 1.4629361484833578e-05, "loss": 0.5589, "step": 17993 }, { "epoch": 2.9374311252601935, "grad_norm": 2.886240005493164, "learning_rate": 1.4628793172985672e-05, "loss": 0.5558, "step": 17994 }, { "epoch": 2.937594383902698, "grad_norm": 3.120474100112915, "learning_rate": 1.4628224842110541e-05, "loss": 0.6314, "step": 17995 }, { "epoch": 2.9377576425452023, "grad_norm": 3.004727602005005, "learning_rate": 1.4627656492210527e-05, "loss": 0.628, "step": 17996 }, { "epoch": 2.9379209011877068, "grad_norm": 2.6041557788848877, "learning_rate": 1.4627088123287964e-05, "loss": 0.5309, "step": 17997 }, { "epoch": 2.938084159830211, "grad_norm": 3.1318843364715576, "learning_rate": 1.4626519735345195e-05, "loss": 0.537, "step": 17998 }, { "epoch": 2.9382474184727156, "grad_norm": 3.2242753505706787, "learning_rate": 1.4625951328384544e-05, "loss": 0.6712, "step": 17999 }, { "epoch": 2.9384106771152196, "grad_norm": 3.0350775718688965, "learning_rate": 1.4625382902408356e-05, "loss": 0.5619, "step": 18000 }, { "epoch": 2.938573935757724, "grad_norm": 2.1466715335845947, "learning_rate": 1.4624814457418963e-05, "loss": 0.4176, "step": 18001 }, { "epoch": 2.9387371944002285, "grad_norm": 2.653052568435669, "learning_rate": 1.4624245993418704e-05, "loss": 0.5312, "step": 18002 }, { "epoch": 2.938900453042733, "grad_norm": 2.739769220352173, "learning_rate": 1.462367751040992e-05, "loss": 0.4915, "step": 18003 }, { "epoch": 2.9390637116852374, "grad_norm": 2.677269697189331, "learning_rate": 1.4623109008394942e-05, "loss": 0.5719, "step": 18004 }, { "epoch": 2.939226970327742, "grad_norm": 2.816645860671997, "learning_rate": 1.4622540487376107e-05, "loss": 0.5453, "step": 18005 }, { "epoch": 2.9393902289702463, "grad_norm": 2.7904200553894043, "learning_rate": 1.4621971947355755e-05, "loss": 0.5614, "step": 18006 }, { "epoch": 2.9395534876127503, "grad_norm": 3.047800302505493, "learning_rate": 1.4621403388336217e-05, "loss": 0.5928, "step": 18007 }, { "epoch": 2.9397167462552547, "grad_norm": 3.0599112510681152, "learning_rate": 1.4620834810319841e-05, "loss": 0.6341, "step": 18008 }, { "epoch": 2.939880004897759, "grad_norm": 3.149181365966797, "learning_rate": 1.4620266213308956e-05, "loss": 0.5719, "step": 18009 }, { "epoch": 2.9400432635402636, "grad_norm": 2.944241523742676, "learning_rate": 1.46196975973059e-05, "loss": 0.5886, "step": 18010 }, { "epoch": 2.940206522182768, "grad_norm": 2.788299798965454, "learning_rate": 1.4619128962313012e-05, "loss": 0.6097, "step": 18011 }, { "epoch": 2.9403697808252724, "grad_norm": 3.5107452869415283, "learning_rate": 1.461856030833263e-05, "loss": 0.6448, "step": 18012 }, { "epoch": 2.940533039467777, "grad_norm": 2.5148637294769287, "learning_rate": 1.4617991635367088e-05, "loss": 0.5358, "step": 18013 }, { "epoch": 2.9406962981102813, "grad_norm": 3.058589458465576, "learning_rate": 1.4617422943418729e-05, "loss": 0.5963, "step": 18014 }, { "epoch": 2.9408595567527858, "grad_norm": 2.9455223083496094, "learning_rate": 1.4616854232489885e-05, "loss": 0.5755, "step": 18015 }, { "epoch": 2.94102281539529, "grad_norm": 3.5070741176605225, "learning_rate": 1.4616285502582898e-05, "loss": 0.584, "step": 18016 }, { "epoch": 2.9411860740377946, "grad_norm": 1.9905191659927368, "learning_rate": 1.4615716753700105e-05, "loss": 0.416, "step": 18017 }, { "epoch": 2.9413493326802986, "grad_norm": 3.1373610496520996, "learning_rate": 1.4615147985843843e-05, "loss": 0.6581, "step": 18018 }, { "epoch": 2.941512591322803, "grad_norm": 2.4470622539520264, "learning_rate": 1.461457919901645e-05, "loss": 0.497, "step": 18019 }, { "epoch": 2.9416758499653075, "grad_norm": 2.7925074100494385, "learning_rate": 1.4614010393220263e-05, "loss": 0.5478, "step": 18020 }, { "epoch": 2.941839108607812, "grad_norm": 2.389432430267334, "learning_rate": 1.4613441568457623e-05, "loss": 0.4383, "step": 18021 }, { "epoch": 2.9420023672503164, "grad_norm": 3.2883079051971436, "learning_rate": 1.4612872724730867e-05, "loss": 0.554, "step": 18022 }, { "epoch": 2.942165625892821, "grad_norm": 2.942270517349243, "learning_rate": 1.4612303862042333e-05, "loss": 0.6023, "step": 18023 }, { "epoch": 2.942328884535325, "grad_norm": 3.106370210647583, "learning_rate": 1.4611734980394359e-05, "loss": 0.6226, "step": 18024 }, { "epoch": 2.9424921431778293, "grad_norm": 2.1204845905303955, "learning_rate": 1.4611166079789283e-05, "loss": 0.448, "step": 18025 }, { "epoch": 2.9426554018203337, "grad_norm": 2.933525800704956, "learning_rate": 1.4610597160229445e-05, "loss": 0.5875, "step": 18026 }, { "epoch": 2.942818660462838, "grad_norm": 3.344179391860962, "learning_rate": 1.4610028221717183e-05, "loss": 0.7766, "step": 18027 }, { "epoch": 2.9429819191053426, "grad_norm": 2.6617472171783447, "learning_rate": 1.4609459264254838e-05, "loss": 0.4927, "step": 18028 }, { "epoch": 2.943145177747847, "grad_norm": 3.000622272491455, "learning_rate": 1.4608890287844744e-05, "loss": 0.5707, "step": 18029 }, { "epoch": 2.9433084363903514, "grad_norm": 2.9670937061309814, "learning_rate": 1.460832129248924e-05, "loss": 0.5963, "step": 18030 }, { "epoch": 2.943471695032856, "grad_norm": 2.358203411102295, "learning_rate": 1.4607752278190671e-05, "loss": 0.4798, "step": 18031 }, { "epoch": 2.9436349536753603, "grad_norm": 2.259105920791626, "learning_rate": 1.460718324495137e-05, "loss": 0.3998, "step": 18032 }, { "epoch": 2.9437982123178648, "grad_norm": 2.653432607650757, "learning_rate": 1.4606614192773682e-05, "loss": 0.5349, "step": 18033 }, { "epoch": 2.943961470960369, "grad_norm": 2.911226987838745, "learning_rate": 1.460604512165994e-05, "loss": 0.5564, "step": 18034 }, { "epoch": 2.944124729602873, "grad_norm": 2.2323403358459473, "learning_rate": 1.4605476031612485e-05, "loss": 0.4973, "step": 18035 }, { "epoch": 2.9442879882453776, "grad_norm": 3.159269094467163, "learning_rate": 1.4604906922633658e-05, "loss": 0.6303, "step": 18036 }, { "epoch": 2.944451246887882, "grad_norm": 2.8564071655273438, "learning_rate": 1.4604337794725798e-05, "loss": 0.6645, "step": 18037 }, { "epoch": 2.9446145055303865, "grad_norm": 3.3627147674560547, "learning_rate": 1.4603768647891244e-05, "loss": 0.555, "step": 18038 }, { "epoch": 2.944777764172891, "grad_norm": 2.735652446746826, "learning_rate": 1.4603199482132333e-05, "loss": 0.5586, "step": 18039 }, { "epoch": 2.9449410228153954, "grad_norm": 3.469000816345215, "learning_rate": 1.4602630297451408e-05, "loss": 0.6639, "step": 18040 }, { "epoch": 2.9451042814579, "grad_norm": 2.4713032245635986, "learning_rate": 1.4602061093850807e-05, "loss": 0.4701, "step": 18041 }, { "epoch": 2.945267540100404, "grad_norm": 2.4549481868743896, "learning_rate": 1.4601491871332875e-05, "loss": 0.5444, "step": 18042 }, { "epoch": 2.9454307987429083, "grad_norm": 2.8973429203033447, "learning_rate": 1.4600922629899942e-05, "loss": 0.5933, "step": 18043 }, { "epoch": 2.9455940573854127, "grad_norm": 2.9403553009033203, "learning_rate": 1.4600353369554356e-05, "loss": 0.556, "step": 18044 }, { "epoch": 2.945757316027917, "grad_norm": 3.0994927883148193, "learning_rate": 1.4599784090298452e-05, "loss": 0.6395, "step": 18045 }, { "epoch": 2.9459205746704216, "grad_norm": 3.760101079940796, "learning_rate": 1.4599214792134574e-05, "loss": 0.6819, "step": 18046 }, { "epoch": 2.946083833312926, "grad_norm": 3.195919990539551, "learning_rate": 1.4598645475065065e-05, "loss": 0.5817, "step": 18047 }, { "epoch": 2.9462470919554304, "grad_norm": 3.037580728530884, "learning_rate": 1.4598076139092257e-05, "loss": 0.6275, "step": 18048 }, { "epoch": 2.946410350597935, "grad_norm": 3.1917030811309814, "learning_rate": 1.4597506784218493e-05, "loss": 0.6335, "step": 18049 }, { "epoch": 2.9465736092404393, "grad_norm": 3.1317875385284424, "learning_rate": 1.4596937410446117e-05, "loss": 0.5813, "step": 18050 }, { "epoch": 2.9467368678829438, "grad_norm": 3.1085853576660156, "learning_rate": 1.4596368017777462e-05, "loss": 0.917, "step": 18051 }, { "epoch": 2.946900126525448, "grad_norm": 2.8026511669158936, "learning_rate": 1.4595798606214882e-05, "loss": 0.6567, "step": 18052 }, { "epoch": 2.947063385167952, "grad_norm": 2.695295572280884, "learning_rate": 1.4595229175760703e-05, "loss": 0.5316, "step": 18053 }, { "epoch": 2.9472266438104566, "grad_norm": 3.140913724899292, "learning_rate": 1.4594659726417276e-05, "loss": 0.5909, "step": 18054 }, { "epoch": 2.947389902452961, "grad_norm": 3.1704368591308594, "learning_rate": 1.4594090258186934e-05, "loss": 0.6433, "step": 18055 }, { "epoch": 2.9475531610954655, "grad_norm": 2.6011741161346436, "learning_rate": 1.4593520771072023e-05, "loss": 0.5303, "step": 18056 }, { "epoch": 2.94771641973797, "grad_norm": 3.3624963760375977, "learning_rate": 1.4592951265074882e-05, "loss": 0.6254, "step": 18057 }, { "epoch": 2.9478796783804744, "grad_norm": 2.9015722274780273, "learning_rate": 1.4592381740197855e-05, "loss": 0.6343, "step": 18058 }, { "epoch": 2.948042937022979, "grad_norm": 3.1592843532562256, "learning_rate": 1.4591812196443277e-05, "loss": 0.6002, "step": 18059 }, { "epoch": 2.948206195665483, "grad_norm": 3.1813085079193115, "learning_rate": 1.4591242633813496e-05, "loss": 0.6711, "step": 18060 }, { "epoch": 2.9483694543079872, "grad_norm": 3.0052905082702637, "learning_rate": 1.459067305231085e-05, "loss": 0.5743, "step": 18061 }, { "epoch": 2.9485327129504917, "grad_norm": 3.563600778579712, "learning_rate": 1.4590103451937678e-05, "loss": 0.7684, "step": 18062 }, { "epoch": 2.948695971592996, "grad_norm": 3.1143317222595215, "learning_rate": 1.4589533832696325e-05, "loss": 0.6117, "step": 18063 }, { "epoch": 2.9488592302355006, "grad_norm": 2.897495746612549, "learning_rate": 1.4588964194589132e-05, "loss": 0.5565, "step": 18064 }, { "epoch": 2.949022488878005, "grad_norm": 2.8995230197906494, "learning_rate": 1.458839453761844e-05, "loss": 0.5256, "step": 18065 }, { "epoch": 2.9491857475205094, "grad_norm": 2.7824385166168213, "learning_rate": 1.4587824861786591e-05, "loss": 0.5355, "step": 18066 }, { "epoch": 2.949349006163014, "grad_norm": 2.215404987335205, "learning_rate": 1.4587255167095926e-05, "loss": 0.4157, "step": 18067 }, { "epoch": 2.9495122648055183, "grad_norm": 2.404714345932007, "learning_rate": 1.4586685453548785e-05, "loss": 0.5259, "step": 18068 }, { "epoch": 2.9496755234480228, "grad_norm": 2.4329018592834473, "learning_rate": 1.4586115721147516e-05, "loss": 0.5218, "step": 18069 }, { "epoch": 2.949838782090527, "grad_norm": 2.6720781326293945, "learning_rate": 1.4585545969894454e-05, "loss": 0.504, "step": 18070 }, { "epoch": 2.950002040733031, "grad_norm": 2.7990810871124268, "learning_rate": 1.4584976199791944e-05, "loss": 0.5594, "step": 18071 }, { "epoch": 2.9501652993755356, "grad_norm": 2.588329553604126, "learning_rate": 1.4584406410842328e-05, "loss": 0.5588, "step": 18072 }, { "epoch": 2.95032855801804, "grad_norm": 3.2584779262542725, "learning_rate": 1.4583836603047952e-05, "loss": 0.621, "step": 18073 }, { "epoch": 2.9504918166605445, "grad_norm": 2.748711109161377, "learning_rate": 1.4583266776411152e-05, "loss": 0.52, "step": 18074 }, { "epoch": 2.950655075303049, "grad_norm": 2.9481148719787598, "learning_rate": 1.4582696930934272e-05, "loss": 0.6239, "step": 18075 }, { "epoch": 2.9508183339455534, "grad_norm": 3.3976166248321533, "learning_rate": 1.4582127066619656e-05, "loss": 0.5859, "step": 18076 }, { "epoch": 2.9509815925880574, "grad_norm": 2.684767961502075, "learning_rate": 1.4581557183469647e-05, "loss": 0.5121, "step": 18077 }, { "epoch": 2.951144851230562, "grad_norm": 2.8906972408294678, "learning_rate": 1.4580987281486585e-05, "loss": 0.5542, "step": 18078 }, { "epoch": 2.9513081098730662, "grad_norm": 3.3765008449554443, "learning_rate": 1.4580417360672814e-05, "loss": 0.6416, "step": 18079 }, { "epoch": 2.9514713685155707, "grad_norm": 3.021291732788086, "learning_rate": 1.4579847421030677e-05, "loss": 0.5672, "step": 18080 }, { "epoch": 2.951634627158075, "grad_norm": 2.880089044570923, "learning_rate": 1.4579277462562516e-05, "loss": 0.4781, "step": 18081 }, { "epoch": 2.9517978858005796, "grad_norm": 2.737976551055908, "learning_rate": 1.4578707485270678e-05, "loss": 0.5306, "step": 18082 }, { "epoch": 2.951961144443084, "grad_norm": 2.6068453788757324, "learning_rate": 1.4578137489157498e-05, "loss": 0.5216, "step": 18083 }, { "epoch": 2.9521244030855884, "grad_norm": 3.1768314838409424, "learning_rate": 1.4577567474225323e-05, "loss": 0.6243, "step": 18084 }, { "epoch": 2.952287661728093, "grad_norm": 2.5011963844299316, "learning_rate": 1.4576997440476499e-05, "loss": 0.4547, "step": 18085 }, { "epoch": 2.9524509203705973, "grad_norm": 2.234008550643921, "learning_rate": 1.4576427387913369e-05, "loss": 0.4477, "step": 18086 }, { "epoch": 2.9526141790131017, "grad_norm": 2.4274673461914062, "learning_rate": 1.4575857316538269e-05, "loss": 0.4615, "step": 18087 }, { "epoch": 2.9527774376556057, "grad_norm": 3.3969035148620605, "learning_rate": 1.4575287226353552e-05, "loss": 0.6611, "step": 18088 }, { "epoch": 2.95294069629811, "grad_norm": 3.2979040145874023, "learning_rate": 1.4574717117361554e-05, "loss": 0.7079, "step": 18089 }, { "epoch": 2.9531039549406146, "grad_norm": 2.7175493240356445, "learning_rate": 1.4574146989564619e-05, "loss": 0.489, "step": 18090 }, { "epoch": 2.953267213583119, "grad_norm": 3.453770637512207, "learning_rate": 1.4573576842965098e-05, "loss": 0.6299, "step": 18091 }, { "epoch": 2.9534304722256235, "grad_norm": 3.40934419631958, "learning_rate": 1.4573006677565326e-05, "loss": 0.6843, "step": 18092 }, { "epoch": 2.953593730868128, "grad_norm": 3.6935226917266846, "learning_rate": 1.4572436493367652e-05, "loss": 0.813, "step": 18093 }, { "epoch": 2.9537569895106324, "grad_norm": 2.53244686126709, "learning_rate": 1.457186629037442e-05, "loss": 0.4925, "step": 18094 }, { "epoch": 2.9539202481531364, "grad_norm": 3.1830742359161377, "learning_rate": 1.4571296068587966e-05, "loss": 0.6245, "step": 18095 }, { "epoch": 2.954083506795641, "grad_norm": 2.831153154373169, "learning_rate": 1.4570725828010645e-05, "loss": 0.6075, "step": 18096 }, { "epoch": 2.9542467654381452, "grad_norm": 3.09244441986084, "learning_rate": 1.4570155568644795e-05, "loss": 0.5997, "step": 18097 }, { "epoch": 2.9544100240806497, "grad_norm": 3.404414653778076, "learning_rate": 1.456958529049276e-05, "loss": 0.6081, "step": 18098 }, { "epoch": 2.954573282723154, "grad_norm": 3.5454580783843994, "learning_rate": 1.4569014993556886e-05, "loss": 0.632, "step": 18099 }, { "epoch": 2.9547365413656586, "grad_norm": 2.8005831241607666, "learning_rate": 1.4568444677839517e-05, "loss": 0.5511, "step": 18100 }, { "epoch": 2.954899800008163, "grad_norm": 3.2655766010284424, "learning_rate": 1.4567874343342996e-05, "loss": 0.6128, "step": 18101 }, { "epoch": 2.9550630586506674, "grad_norm": 2.9795944690704346, "learning_rate": 1.456730399006967e-05, "loss": 0.5293, "step": 18102 }, { "epoch": 2.955226317293172, "grad_norm": 3.2743289470672607, "learning_rate": 1.456673361802188e-05, "loss": 0.673, "step": 18103 }, { "epoch": 2.9553895759356763, "grad_norm": 3.392521381378174, "learning_rate": 1.4566163227201973e-05, "loss": 0.5494, "step": 18104 }, { "epoch": 2.9555528345781807, "grad_norm": 2.537806272506714, "learning_rate": 1.4565592817612294e-05, "loss": 0.5352, "step": 18105 }, { "epoch": 2.9557160932206847, "grad_norm": 3.0039708614349365, "learning_rate": 1.4565022389255186e-05, "loss": 0.6054, "step": 18106 }, { "epoch": 2.955879351863189, "grad_norm": 3.2658870220184326, "learning_rate": 1.4564451942132996e-05, "loss": 0.7285, "step": 18107 }, { "epoch": 2.9560426105056936, "grad_norm": 2.608917713165283, "learning_rate": 1.4563881476248065e-05, "loss": 0.4963, "step": 18108 }, { "epoch": 2.956205869148198, "grad_norm": 3.174467086791992, "learning_rate": 1.4563310991602744e-05, "loss": 0.7372, "step": 18109 }, { "epoch": 2.9563691277907025, "grad_norm": 2.3111371994018555, "learning_rate": 1.4562740488199372e-05, "loss": 0.4963, "step": 18110 }, { "epoch": 2.956532386433207, "grad_norm": 2.9035980701446533, "learning_rate": 1.4562169966040297e-05, "loss": 0.5982, "step": 18111 }, { "epoch": 2.956695645075711, "grad_norm": 3.0035834312438965, "learning_rate": 1.4561599425127864e-05, "loss": 0.5922, "step": 18112 }, { "epoch": 2.9568589037182154, "grad_norm": 2.8710970878601074, "learning_rate": 1.456102886546442e-05, "loss": 0.5025, "step": 18113 }, { "epoch": 2.95702216236072, "grad_norm": 2.8186769485473633, "learning_rate": 1.4560458287052306e-05, "loss": 0.5474, "step": 18114 }, { "epoch": 2.9571854210032242, "grad_norm": 2.6380410194396973, "learning_rate": 1.455988768989387e-05, "loss": 0.5748, "step": 18115 }, { "epoch": 2.9573486796457287, "grad_norm": 2.6016900539398193, "learning_rate": 1.455931707399146e-05, "loss": 0.5467, "step": 18116 }, { "epoch": 2.957511938288233, "grad_norm": 3.11521577835083, "learning_rate": 1.4558746439347417e-05, "loss": 0.6487, "step": 18117 }, { "epoch": 2.9576751969307375, "grad_norm": 3.0325427055358887, "learning_rate": 1.4558175785964088e-05, "loss": 0.6308, "step": 18118 }, { "epoch": 2.957838455573242, "grad_norm": 2.9636058807373047, "learning_rate": 1.4557605113843823e-05, "loss": 0.577, "step": 18119 }, { "epoch": 2.9580017142157464, "grad_norm": 2.8688883781433105, "learning_rate": 1.4557034422988961e-05, "loss": 0.5727, "step": 18120 }, { "epoch": 2.958164972858251, "grad_norm": 2.516984701156616, "learning_rate": 1.4556463713401854e-05, "loss": 0.513, "step": 18121 }, { "epoch": 2.9583282315007553, "grad_norm": 2.9511265754699707, "learning_rate": 1.455589298508484e-05, "loss": 0.4939, "step": 18122 }, { "epoch": 2.9584914901432593, "grad_norm": 2.740650177001953, "learning_rate": 1.4555322238040275e-05, "loss": 0.5504, "step": 18123 }, { "epoch": 2.9586547487857637, "grad_norm": 3.5701043605804443, "learning_rate": 1.4554751472270498e-05, "loss": 0.7203, "step": 18124 }, { "epoch": 2.958818007428268, "grad_norm": 3.0901710987091064, "learning_rate": 1.4554180687777862e-05, "loss": 0.6652, "step": 18125 }, { "epoch": 2.9589812660707726, "grad_norm": 2.7384157180786133, "learning_rate": 1.4553609884564705e-05, "loss": 0.6, "step": 18126 }, { "epoch": 2.959144524713277, "grad_norm": 3.437622308731079, "learning_rate": 1.4553039062633378e-05, "loss": 0.7147, "step": 18127 }, { "epoch": 2.9593077833557815, "grad_norm": 3.4479947090148926, "learning_rate": 1.4552468221986225e-05, "loss": 0.6738, "step": 18128 }, { "epoch": 2.959471041998286, "grad_norm": 2.8468668460845947, "learning_rate": 1.4551897362625593e-05, "loss": 0.58, "step": 18129 }, { "epoch": 2.95963430064079, "grad_norm": 2.7294328212738037, "learning_rate": 1.4551326484553836e-05, "loss": 0.5486, "step": 18130 }, { "epoch": 2.9597975592832944, "grad_norm": 2.884150981903076, "learning_rate": 1.4550755587773289e-05, "loss": 0.5335, "step": 18131 }, { "epoch": 2.959960817925799, "grad_norm": 2.534641981124878, "learning_rate": 1.4550184672286304e-05, "loss": 0.5, "step": 18132 }, { "epoch": 2.9601240765683032, "grad_norm": 3.35176420211792, "learning_rate": 1.4549613738095231e-05, "loss": 0.5857, "step": 18133 }, { "epoch": 2.9602873352108077, "grad_norm": 2.6944644451141357, "learning_rate": 1.4549042785202413e-05, "loss": 0.5241, "step": 18134 }, { "epoch": 2.960450593853312, "grad_norm": 2.437905788421631, "learning_rate": 1.4548471813610196e-05, "loss": 0.4562, "step": 18135 }, { "epoch": 2.9606138524958165, "grad_norm": 3.017685651779175, "learning_rate": 1.454790082332093e-05, "loss": 0.5475, "step": 18136 }, { "epoch": 2.960777111138321, "grad_norm": 2.7049598693847656, "learning_rate": 1.4547329814336962e-05, "loss": 0.4811, "step": 18137 }, { "epoch": 2.9609403697808254, "grad_norm": 2.6579270362854004, "learning_rate": 1.4546758786660638e-05, "loss": 0.5371, "step": 18138 }, { "epoch": 2.96110362842333, "grad_norm": 2.9116578102111816, "learning_rate": 1.4546187740294301e-05, "loss": 0.5056, "step": 18139 }, { "epoch": 2.9612668870658343, "grad_norm": 2.62660551071167, "learning_rate": 1.4545616675240309e-05, "loss": 0.5341, "step": 18140 }, { "epoch": 2.9614301457083383, "grad_norm": 2.8121371269226074, "learning_rate": 1.4545045591501002e-05, "loss": 0.6057, "step": 18141 }, { "epoch": 2.9615934043508427, "grad_norm": 2.4723901748657227, "learning_rate": 1.4544474489078726e-05, "loss": 0.5189, "step": 18142 }, { "epoch": 2.961756662993347, "grad_norm": 3.015369176864624, "learning_rate": 1.4543903367975832e-05, "loss": 0.6617, "step": 18143 }, { "epoch": 2.9619199216358516, "grad_norm": 2.3426389694213867, "learning_rate": 1.4543332228194667e-05, "loss": 0.4811, "step": 18144 }, { "epoch": 2.962083180278356, "grad_norm": 2.903855800628662, "learning_rate": 1.454276106973758e-05, "loss": 0.5566, "step": 18145 }, { "epoch": 2.9622464389208605, "grad_norm": 2.565037488937378, "learning_rate": 1.4542189892606917e-05, "loss": 0.4768, "step": 18146 }, { "epoch": 2.962409697563365, "grad_norm": 2.7397265434265137, "learning_rate": 1.4541618696805026e-05, "loss": 0.5634, "step": 18147 }, { "epoch": 2.962572956205869, "grad_norm": 2.6284780502319336, "learning_rate": 1.4541047482334256e-05, "loss": 0.5402, "step": 18148 }, { "epoch": 2.9627362148483734, "grad_norm": 2.7698917388916016, "learning_rate": 1.4540476249196956e-05, "loss": 0.5149, "step": 18149 }, { "epoch": 2.962899473490878, "grad_norm": 3.1305880546569824, "learning_rate": 1.4539904997395468e-05, "loss": 0.6064, "step": 18150 }, { "epoch": 2.9630627321333822, "grad_norm": 2.643662452697754, "learning_rate": 1.453933372693215e-05, "loss": 0.5362, "step": 18151 }, { "epoch": 2.9632259907758867, "grad_norm": 2.8286452293395996, "learning_rate": 1.4538762437809341e-05, "loss": 0.6002, "step": 18152 }, { "epoch": 2.963389249418391, "grad_norm": 2.8525023460388184, "learning_rate": 1.4538191130029396e-05, "loss": 0.6163, "step": 18153 }, { "epoch": 2.9635525080608955, "grad_norm": 3.170927047729492, "learning_rate": 1.453761980359466e-05, "loss": 0.613, "step": 18154 }, { "epoch": 2.9637157667034, "grad_norm": 2.704608917236328, "learning_rate": 1.4537048458507484e-05, "loss": 0.5931, "step": 18155 }, { "epoch": 2.9638790253459044, "grad_norm": 2.9918715953826904, "learning_rate": 1.4536477094770211e-05, "loss": 0.5728, "step": 18156 }, { "epoch": 2.964042283988409, "grad_norm": 2.442122459411621, "learning_rate": 1.4535905712385196e-05, "loss": 0.478, "step": 18157 }, { "epoch": 2.9642055426309133, "grad_norm": 3.382598638534546, "learning_rate": 1.4535334311354787e-05, "loss": 0.6172, "step": 18158 }, { "epoch": 2.9643688012734173, "grad_norm": 2.6864304542541504, "learning_rate": 1.4534762891681329e-05, "loss": 0.5338, "step": 18159 }, { "epoch": 2.9645320599159217, "grad_norm": 2.943744659423828, "learning_rate": 1.4534191453367174e-05, "loss": 0.5746, "step": 18160 }, { "epoch": 2.964695318558426, "grad_norm": 2.9124884605407715, "learning_rate": 1.453361999641467e-05, "loss": 0.6245, "step": 18161 }, { "epoch": 2.9648585772009306, "grad_norm": 3.025879383087158, "learning_rate": 1.4533048520826166e-05, "loss": 0.5276, "step": 18162 }, { "epoch": 2.965021835843435, "grad_norm": 2.786565065383911, "learning_rate": 1.4532477026604008e-05, "loss": 0.4753, "step": 18163 }, { "epoch": 2.9651850944859395, "grad_norm": 2.735132932662964, "learning_rate": 1.4531905513750552e-05, "loss": 0.5053, "step": 18164 }, { "epoch": 2.9653483531284435, "grad_norm": 3.2666213512420654, "learning_rate": 1.4531333982268145e-05, "loss": 0.5258, "step": 18165 }, { "epoch": 2.965511611770948, "grad_norm": 2.588416814804077, "learning_rate": 1.4530762432159131e-05, "loss": 0.4595, "step": 18166 }, { "epoch": 2.9656748704134523, "grad_norm": 3.378288507461548, "learning_rate": 1.4530190863425865e-05, "loss": 0.5917, "step": 18167 }, { "epoch": 2.965838129055957, "grad_norm": 2.7328245639801025, "learning_rate": 1.4529619276070697e-05, "loss": 0.5728, "step": 18168 }, { "epoch": 2.9660013876984612, "grad_norm": 3.1600122451782227, "learning_rate": 1.452904767009597e-05, "loss": 0.598, "step": 18169 }, { "epoch": 2.9661646463409657, "grad_norm": 2.260838747024536, "learning_rate": 1.4528476045504042e-05, "loss": 0.5039, "step": 18170 }, { "epoch": 2.96632790498347, "grad_norm": 2.7691574096679688, "learning_rate": 1.452790440229726e-05, "loss": 0.5561, "step": 18171 }, { "epoch": 2.9664911636259745, "grad_norm": 3.808332920074463, "learning_rate": 1.452733274047797e-05, "loss": 0.6513, "step": 18172 }, { "epoch": 2.966654422268479, "grad_norm": 3.1227238178253174, "learning_rate": 1.4526761060048525e-05, "loss": 0.5771, "step": 18173 }, { "epoch": 2.9668176809109834, "grad_norm": 2.8125195503234863, "learning_rate": 1.4526189361011276e-05, "loss": 0.619, "step": 18174 }, { "epoch": 2.966980939553488, "grad_norm": 2.630542755126953, "learning_rate": 1.4525617643368568e-05, "loss": 0.5096, "step": 18175 }, { "epoch": 2.967144198195992, "grad_norm": 3.0324831008911133, "learning_rate": 1.452504590712276e-05, "loss": 0.6622, "step": 18176 }, { "epoch": 2.9673074568384963, "grad_norm": 2.8837337493896484, "learning_rate": 1.4524474152276192e-05, "loss": 0.5165, "step": 18177 }, { "epoch": 2.9674707154810007, "grad_norm": 2.2128043174743652, "learning_rate": 1.4523902378831219e-05, "loss": 0.4407, "step": 18178 }, { "epoch": 2.967633974123505, "grad_norm": 2.9398372173309326, "learning_rate": 1.4523330586790195e-05, "loss": 0.5412, "step": 18179 }, { "epoch": 2.9677972327660096, "grad_norm": 2.608757495880127, "learning_rate": 1.4522758776155466e-05, "loss": 0.485, "step": 18180 }, { "epoch": 2.967960491408514, "grad_norm": 3.4579918384552, "learning_rate": 1.4522186946929382e-05, "loss": 0.6681, "step": 18181 }, { "epoch": 2.9681237500510185, "grad_norm": 2.5066492557525635, "learning_rate": 1.4521615099114298e-05, "loss": 0.5042, "step": 18182 }, { "epoch": 2.9682870086935225, "grad_norm": 2.2367963790893555, "learning_rate": 1.4521043232712554e-05, "loss": 0.436, "step": 18183 }, { "epoch": 2.968450267336027, "grad_norm": 2.856673240661621, "learning_rate": 1.4520471347726517e-05, "loss": 0.5528, "step": 18184 }, { "epoch": 2.9686135259785313, "grad_norm": 3.217536449432373, "learning_rate": 1.4519899444158528e-05, "loss": 0.5406, "step": 18185 }, { "epoch": 2.968776784621036, "grad_norm": 3.1206963062286377, "learning_rate": 1.4519327522010937e-05, "loss": 0.5644, "step": 18186 }, { "epoch": 2.96894004326354, "grad_norm": 3.1235737800598145, "learning_rate": 1.4518755581286097e-05, "loss": 0.5279, "step": 18187 }, { "epoch": 2.9691033019060447, "grad_norm": 3.041325330734253, "learning_rate": 1.4518183621986359e-05, "loss": 0.6264, "step": 18188 }, { "epoch": 2.969266560548549, "grad_norm": 2.736851453781128, "learning_rate": 1.4517611644114075e-05, "loss": 0.5131, "step": 18189 }, { "epoch": 2.9694298191910535, "grad_norm": 3.3619418144226074, "learning_rate": 1.4517039647671593e-05, "loss": 0.6078, "step": 18190 }, { "epoch": 2.969593077833558, "grad_norm": 2.684824228286743, "learning_rate": 1.451646763266127e-05, "loss": 0.5068, "step": 18191 }, { "epoch": 2.9697563364760624, "grad_norm": 2.5499558448791504, "learning_rate": 1.4515895599085452e-05, "loss": 0.5058, "step": 18192 }, { "epoch": 2.969919595118567, "grad_norm": 2.8442633152008057, "learning_rate": 1.4515323546946492e-05, "loss": 0.5402, "step": 18193 }, { "epoch": 2.970082853761071, "grad_norm": 2.7385265827178955, "learning_rate": 1.4514751476246742e-05, "loss": 0.5345, "step": 18194 }, { "epoch": 2.9702461124035753, "grad_norm": 3.3972222805023193, "learning_rate": 1.4514179386988555e-05, "loss": 0.6466, "step": 18195 }, { "epoch": 2.9704093710460797, "grad_norm": 2.2269539833068848, "learning_rate": 1.451360727917428e-05, "loss": 0.4425, "step": 18196 }, { "epoch": 2.970572629688584, "grad_norm": 2.420760154724121, "learning_rate": 1.4513035152806269e-05, "loss": 0.5054, "step": 18197 }, { "epoch": 2.9707358883310886, "grad_norm": 3.1278653144836426, "learning_rate": 1.4512463007886877e-05, "loss": 0.6186, "step": 18198 }, { "epoch": 2.970899146973593, "grad_norm": 2.7186806201934814, "learning_rate": 1.4511890844418453e-05, "loss": 0.5801, "step": 18199 }, { "epoch": 2.9710624056160975, "grad_norm": 2.901992082595825, "learning_rate": 1.4511318662403347e-05, "loss": 0.5529, "step": 18200 }, { "epoch": 2.9712256642586015, "grad_norm": 3.2725770473480225, "learning_rate": 1.4510746461843914e-05, "loss": 0.5958, "step": 18201 }, { "epoch": 2.971388922901106, "grad_norm": 2.9461960792541504, "learning_rate": 1.4510174242742508e-05, "loss": 0.5739, "step": 18202 }, { "epoch": 2.9715521815436103, "grad_norm": 3.5129599571228027, "learning_rate": 1.4509602005101476e-05, "loss": 0.6948, "step": 18203 }, { "epoch": 2.9717154401861148, "grad_norm": 2.949559450149536, "learning_rate": 1.4509029748923176e-05, "loss": 0.4766, "step": 18204 }, { "epoch": 2.971878698828619, "grad_norm": 2.750722885131836, "learning_rate": 1.4508457474209953e-05, "loss": 0.4497, "step": 18205 }, { "epoch": 2.9720419574711237, "grad_norm": 2.838507652282715, "learning_rate": 1.4507885180964165e-05, "loss": 0.498, "step": 18206 }, { "epoch": 2.972205216113628, "grad_norm": 2.6391708850860596, "learning_rate": 1.4507312869188167e-05, "loss": 0.4824, "step": 18207 }, { "epoch": 2.9723684747561325, "grad_norm": 3.3141305446624756, "learning_rate": 1.4506740538884303e-05, "loss": 0.6881, "step": 18208 }, { "epoch": 2.972531733398637, "grad_norm": 2.7178380489349365, "learning_rate": 1.4506168190054934e-05, "loss": 0.5017, "step": 18209 }, { "epoch": 2.9726949920411414, "grad_norm": 2.5538363456726074, "learning_rate": 1.4505595822702405e-05, "loss": 0.5749, "step": 18210 }, { "epoch": 2.972858250683646, "grad_norm": 3.1223623752593994, "learning_rate": 1.4505023436829076e-05, "loss": 0.6769, "step": 18211 }, { "epoch": 2.97302150932615, "grad_norm": 3.024650812149048, "learning_rate": 1.4504451032437295e-05, "loss": 0.6379, "step": 18212 }, { "epoch": 2.9731847679686543, "grad_norm": 3.03266978263855, "learning_rate": 1.4503878609529418e-05, "loss": 0.6448, "step": 18213 }, { "epoch": 2.9733480266111587, "grad_norm": 3.3490116596221924, "learning_rate": 1.4503306168107799e-05, "loss": 0.684, "step": 18214 }, { "epoch": 2.973511285253663, "grad_norm": 2.367173194885254, "learning_rate": 1.4502733708174784e-05, "loss": 0.5135, "step": 18215 }, { "epoch": 2.9736745438961676, "grad_norm": 2.863417387008667, "learning_rate": 1.4502161229732731e-05, "loss": 0.5838, "step": 18216 }, { "epoch": 2.973837802538672, "grad_norm": 3.0592422485351562, "learning_rate": 1.4501588732783995e-05, "loss": 0.6226, "step": 18217 }, { "epoch": 2.974001061181176, "grad_norm": 3.352108955383301, "learning_rate": 1.4501016217330927e-05, "loss": 0.6765, "step": 18218 }, { "epoch": 2.9741643198236805, "grad_norm": 3.1782307624816895, "learning_rate": 1.4500443683375884e-05, "loss": 0.7075, "step": 18219 }, { "epoch": 2.974327578466185, "grad_norm": 3.657377004623413, "learning_rate": 1.4499871130921213e-05, "loss": 0.779, "step": 18220 }, { "epoch": 2.9744908371086893, "grad_norm": 3.001206874847412, "learning_rate": 1.4499298559969273e-05, "loss": 0.5789, "step": 18221 }, { "epoch": 2.9746540957511938, "grad_norm": 2.8564889430999756, "learning_rate": 1.4498725970522412e-05, "loss": 0.6369, "step": 18222 }, { "epoch": 2.974817354393698, "grad_norm": 2.9656870365142822, "learning_rate": 1.4498153362582991e-05, "loss": 0.4979, "step": 18223 }, { "epoch": 2.9749806130362026, "grad_norm": 3.0980918407440186, "learning_rate": 1.4497580736153358e-05, "loss": 0.6188, "step": 18224 }, { "epoch": 2.975143871678707, "grad_norm": 2.6561691761016846, "learning_rate": 1.4497008091235869e-05, "loss": 0.5336, "step": 18225 }, { "epoch": 2.9753071303212115, "grad_norm": 2.707153558731079, "learning_rate": 1.4496435427832877e-05, "loss": 0.5142, "step": 18226 }, { "epoch": 2.975470388963716, "grad_norm": 2.7837445735931396, "learning_rate": 1.4495862745946738e-05, "loss": 0.6484, "step": 18227 }, { "epoch": 2.9756336476062204, "grad_norm": 2.39536190032959, "learning_rate": 1.4495290045579806e-05, "loss": 0.5332, "step": 18228 }, { "epoch": 2.9757969062487244, "grad_norm": 3.1356043815612793, "learning_rate": 1.4494717326734433e-05, "loss": 0.5849, "step": 18229 }, { "epoch": 2.975960164891229, "grad_norm": 3.012637138366699, "learning_rate": 1.4494144589412973e-05, "loss": 0.5852, "step": 18230 }, { "epoch": 2.9761234235337333, "grad_norm": 3.041476011276245, "learning_rate": 1.4493571833617784e-05, "loss": 0.5686, "step": 18231 }, { "epoch": 2.9762866821762377, "grad_norm": 2.962289810180664, "learning_rate": 1.4492999059351216e-05, "loss": 0.5904, "step": 18232 }, { "epoch": 2.976449940818742, "grad_norm": 2.948843240737915, "learning_rate": 1.4492426266615624e-05, "loss": 0.4601, "step": 18233 }, { "epoch": 2.9766131994612466, "grad_norm": 3.173217296600342, "learning_rate": 1.4491853455413367e-05, "loss": 0.515, "step": 18234 }, { "epoch": 2.976776458103751, "grad_norm": 2.9585771560668945, "learning_rate": 1.4491280625746796e-05, "loss": 0.5097, "step": 18235 }, { "epoch": 2.976939716746255, "grad_norm": 2.6164863109588623, "learning_rate": 1.4490707777618264e-05, "loss": 0.5498, "step": 18236 }, { "epoch": 2.9771029753887595, "grad_norm": 3.1950812339782715, "learning_rate": 1.4490134911030129e-05, "loss": 0.5716, "step": 18237 }, { "epoch": 2.977266234031264, "grad_norm": 2.804053783416748, "learning_rate": 1.4489562025984747e-05, "loss": 0.5382, "step": 18238 }, { "epoch": 2.9774294926737683, "grad_norm": 3.0256662368774414, "learning_rate": 1.4488989122484469e-05, "loss": 0.6147, "step": 18239 }, { "epoch": 2.9775927513162728, "grad_norm": 2.949591875076294, "learning_rate": 1.4488416200531652e-05, "loss": 0.5307, "step": 18240 }, { "epoch": 2.977756009958777, "grad_norm": 2.9126617908477783, "learning_rate": 1.4487843260128651e-05, "loss": 0.5459, "step": 18241 }, { "epoch": 2.9779192686012816, "grad_norm": 3.511673927307129, "learning_rate": 1.448727030127782e-05, "loss": 0.7218, "step": 18242 }, { "epoch": 2.978082527243786, "grad_norm": 3.6036105155944824, "learning_rate": 1.4486697323981515e-05, "loss": 0.8664, "step": 18243 }, { "epoch": 2.9782457858862905, "grad_norm": 2.90258526802063, "learning_rate": 1.4486124328242093e-05, "loss": 0.6749, "step": 18244 }, { "epoch": 2.978409044528795, "grad_norm": 3.0731027126312256, "learning_rate": 1.4485551314061907e-05, "loss": 0.6043, "step": 18245 }, { "epoch": 2.9785723031712994, "grad_norm": 2.640897274017334, "learning_rate": 1.4484978281443311e-05, "loss": 0.542, "step": 18246 }, { "epoch": 2.9787355618138034, "grad_norm": 2.9830188751220703, "learning_rate": 1.4484405230388667e-05, "loss": 0.5902, "step": 18247 }, { "epoch": 2.978898820456308, "grad_norm": 2.905329942703247, "learning_rate": 1.4483832160900326e-05, "loss": 0.5891, "step": 18248 }, { "epoch": 2.9790620790988123, "grad_norm": 2.94799542427063, "learning_rate": 1.448325907298064e-05, "loss": 0.5916, "step": 18249 }, { "epoch": 2.9792253377413167, "grad_norm": 2.6085379123687744, "learning_rate": 1.448268596663197e-05, "loss": 0.553, "step": 18250 }, { "epoch": 2.979388596383821, "grad_norm": 3.033299446105957, "learning_rate": 1.4482112841856672e-05, "loss": 0.6804, "step": 18251 }, { "epoch": 2.9795518550263256, "grad_norm": 2.582508087158203, "learning_rate": 1.4481539698657102e-05, "loss": 0.6204, "step": 18252 }, { "epoch": 2.9797151136688296, "grad_norm": 2.460263967514038, "learning_rate": 1.4480966537035615e-05, "loss": 0.5144, "step": 18253 }, { "epoch": 2.979878372311334, "grad_norm": 2.6078603267669678, "learning_rate": 1.4480393356994562e-05, "loss": 0.5342, "step": 18254 }, { "epoch": 2.9800416309538384, "grad_norm": 2.6281418800354004, "learning_rate": 1.4479820158536307e-05, "loss": 0.494, "step": 18255 }, { "epoch": 2.980204889596343, "grad_norm": 2.6523680686950684, "learning_rate": 1.4479246941663202e-05, "loss": 0.5299, "step": 18256 }, { "epoch": 2.9803681482388473, "grad_norm": 2.655813217163086, "learning_rate": 1.4478673706377602e-05, "loss": 0.563, "step": 18257 }, { "epoch": 2.9805314068813518, "grad_norm": 3.2854559421539307, "learning_rate": 1.447810045268187e-05, "loss": 0.637, "step": 18258 }, { "epoch": 2.980694665523856, "grad_norm": 2.3749895095825195, "learning_rate": 1.4477527180578356e-05, "loss": 0.5381, "step": 18259 }, { "epoch": 2.9808579241663606, "grad_norm": 2.7329275608062744, "learning_rate": 1.4476953890069416e-05, "loss": 0.5515, "step": 18260 }, { "epoch": 2.981021182808865, "grad_norm": 2.9203665256500244, "learning_rate": 1.4476380581157412e-05, "loss": 0.6232, "step": 18261 }, { "epoch": 2.9811844414513695, "grad_norm": 2.509474754333496, "learning_rate": 1.4475807253844695e-05, "loss": 0.4723, "step": 18262 }, { "epoch": 2.981347700093874, "grad_norm": 2.4074385166168213, "learning_rate": 1.447523390813363e-05, "loss": 0.4921, "step": 18263 }, { "epoch": 2.981510958736378, "grad_norm": 2.7229719161987305, "learning_rate": 1.4474660544026564e-05, "loss": 0.5779, "step": 18264 }, { "epoch": 2.9816742173788824, "grad_norm": 3.343946695327759, "learning_rate": 1.4474087161525858e-05, "loss": 0.5361, "step": 18265 }, { "epoch": 2.981837476021387, "grad_norm": 3.147555112838745, "learning_rate": 1.4473513760633869e-05, "loss": 0.6058, "step": 18266 }, { "epoch": 2.9820007346638913, "grad_norm": 3.5477662086486816, "learning_rate": 1.4472940341352954e-05, "loss": 1.2474, "step": 18267 }, { "epoch": 2.9821639933063957, "grad_norm": 2.770585298538208, "learning_rate": 1.4472366903685471e-05, "loss": 0.5422, "step": 18268 }, { "epoch": 2.9823272519489, "grad_norm": 3.0801892280578613, "learning_rate": 1.4471793447633777e-05, "loss": 0.6045, "step": 18269 }, { "epoch": 2.9824905105914046, "grad_norm": 2.8279805183410645, "learning_rate": 1.4471219973200228e-05, "loss": 0.5427, "step": 18270 }, { "epoch": 2.9826537692339086, "grad_norm": 2.7256863117218018, "learning_rate": 1.4470646480387183e-05, "loss": 0.5829, "step": 18271 }, { "epoch": 2.982817027876413, "grad_norm": 3.0223731994628906, "learning_rate": 1.4470072969196998e-05, "loss": 0.6353, "step": 18272 }, { "epoch": 2.9829802865189174, "grad_norm": 2.0222408771514893, "learning_rate": 1.446949943963203e-05, "loss": 0.4065, "step": 18273 }, { "epoch": 2.983143545161422, "grad_norm": 2.659937620162964, "learning_rate": 1.4468925891694636e-05, "loss": 0.5509, "step": 18274 }, { "epoch": 2.9833068038039263, "grad_norm": 3.1058778762817383, "learning_rate": 1.4468352325387179e-05, "loss": 0.5875, "step": 18275 }, { "epoch": 2.9834700624464308, "grad_norm": 2.8888213634490967, "learning_rate": 1.4467778740712008e-05, "loss": 0.5921, "step": 18276 }, { "epoch": 2.983633321088935, "grad_norm": 3.0153863430023193, "learning_rate": 1.446720513767149e-05, "loss": 0.6573, "step": 18277 }, { "epoch": 2.9837965797314396, "grad_norm": 3.0139167308807373, "learning_rate": 1.4466631516267975e-05, "loss": 0.5718, "step": 18278 }, { "epoch": 2.983959838373944, "grad_norm": 3.3174145221710205, "learning_rate": 1.4466057876503827e-05, "loss": 0.6037, "step": 18279 }, { "epoch": 2.9841230970164485, "grad_norm": 2.1816887855529785, "learning_rate": 1.4465484218381401e-05, "loss": 0.4173, "step": 18280 }, { "epoch": 2.984286355658953, "grad_norm": 2.953115224838257, "learning_rate": 1.4464910541903055e-05, "loss": 0.594, "step": 18281 }, { "epoch": 2.984449614301457, "grad_norm": 3.2251954078674316, "learning_rate": 1.4464336847071148e-05, "loss": 0.6888, "step": 18282 }, { "epoch": 2.9846128729439614, "grad_norm": 3.0323691368103027, "learning_rate": 1.4463763133888036e-05, "loss": 0.6491, "step": 18283 }, { "epoch": 2.984776131586466, "grad_norm": 2.8730621337890625, "learning_rate": 1.4463189402356082e-05, "loss": 0.6073, "step": 18284 }, { "epoch": 2.9849393902289703, "grad_norm": 3.087341785430908, "learning_rate": 1.446261565247764e-05, "loss": 0.6466, "step": 18285 }, { "epoch": 2.9851026488714747, "grad_norm": 3.3125011920928955, "learning_rate": 1.4462041884255072e-05, "loss": 0.6998, "step": 18286 }, { "epoch": 2.985265907513979, "grad_norm": 2.653719902038574, "learning_rate": 1.4461468097690732e-05, "loss": 0.4853, "step": 18287 }, { "epoch": 2.9854291661564836, "grad_norm": 3.014770269393921, "learning_rate": 1.4460894292786982e-05, "loss": 0.5794, "step": 18288 }, { "epoch": 2.9855924247989876, "grad_norm": 3.0015814304351807, "learning_rate": 1.4460320469546181e-05, "loss": 0.6594, "step": 18289 }, { "epoch": 2.985755683441492, "grad_norm": 3.014997720718384, "learning_rate": 1.4459746627970685e-05, "loss": 0.5881, "step": 18290 }, { "epoch": 2.9859189420839964, "grad_norm": 3.404242992401123, "learning_rate": 1.4459172768062857e-05, "loss": 0.6366, "step": 18291 }, { "epoch": 2.986082200726501, "grad_norm": 2.7039637565612793, "learning_rate": 1.4458598889825055e-05, "loss": 0.5192, "step": 18292 }, { "epoch": 2.9862454593690053, "grad_norm": 2.464604377746582, "learning_rate": 1.445802499325963e-05, "loss": 0.5062, "step": 18293 }, { "epoch": 2.9864087180115098, "grad_norm": 3.0895042419433594, "learning_rate": 1.4457451078368951e-05, "loss": 0.5795, "step": 18294 }, { "epoch": 2.986571976654014, "grad_norm": 2.5636181831359863, "learning_rate": 1.4456877145155375e-05, "loss": 0.5527, "step": 18295 }, { "epoch": 2.9867352352965186, "grad_norm": 2.9124374389648438, "learning_rate": 1.445630319362126e-05, "loss": 0.6078, "step": 18296 }, { "epoch": 2.986898493939023, "grad_norm": 2.849790096282959, "learning_rate": 1.4455729223768966e-05, "loss": 0.5921, "step": 18297 }, { "epoch": 2.9870617525815275, "grad_norm": 3.7572145462036133, "learning_rate": 1.445515523560085e-05, "loss": 0.7504, "step": 18298 }, { "epoch": 2.987225011224032, "grad_norm": 2.555356025695801, "learning_rate": 1.4454581229119272e-05, "loss": 0.5688, "step": 18299 }, { "epoch": 2.987388269866536, "grad_norm": 2.732330322265625, "learning_rate": 1.4454007204326592e-05, "loss": 0.5904, "step": 18300 }, { "epoch": 2.9875515285090404, "grad_norm": 2.8697397708892822, "learning_rate": 1.4453433161225173e-05, "loss": 0.5174, "step": 18301 }, { "epoch": 2.987714787151545, "grad_norm": 2.845499277114868, "learning_rate": 1.445285909981737e-05, "loss": 0.5625, "step": 18302 }, { "epoch": 2.9878780457940493, "grad_norm": 2.547100782394409, "learning_rate": 1.4452285020105548e-05, "loss": 0.5048, "step": 18303 }, { "epoch": 2.9880413044365537, "grad_norm": 2.831958770751953, "learning_rate": 1.4451710922092057e-05, "loss": 0.5096, "step": 18304 }, { "epoch": 2.988204563079058, "grad_norm": 2.487825870513916, "learning_rate": 1.4451136805779266e-05, "loss": 0.4685, "step": 18305 }, { "epoch": 2.988367821721562, "grad_norm": 2.879509687423706, "learning_rate": 1.4450562671169535e-05, "loss": 0.6012, "step": 18306 }, { "epoch": 2.9885310803640666, "grad_norm": 2.6809306144714355, "learning_rate": 1.4449988518265221e-05, "loss": 0.5623, "step": 18307 }, { "epoch": 2.988694339006571, "grad_norm": 2.36917781829834, "learning_rate": 1.4449414347068685e-05, "loss": 0.5013, "step": 18308 }, { "epoch": 2.9888575976490754, "grad_norm": 2.9801442623138428, "learning_rate": 1.4448840157582283e-05, "loss": 0.4902, "step": 18309 }, { "epoch": 2.98902085629158, "grad_norm": 2.663961410522461, "learning_rate": 1.444826594980838e-05, "loss": 0.5291, "step": 18310 }, { "epoch": 2.9891841149340843, "grad_norm": 2.6609981060028076, "learning_rate": 1.4447691723749337e-05, "loss": 0.548, "step": 18311 }, { "epoch": 2.9893473735765888, "grad_norm": 3.267220973968506, "learning_rate": 1.4447117479407513e-05, "loss": 0.6611, "step": 18312 }, { "epoch": 2.989510632219093, "grad_norm": 2.73462176322937, "learning_rate": 1.4446543216785268e-05, "loss": 0.4776, "step": 18313 }, { "epoch": 2.9896738908615976, "grad_norm": 2.9879796504974365, "learning_rate": 1.444596893588496e-05, "loss": 0.626, "step": 18314 }, { "epoch": 2.989837149504102, "grad_norm": 2.756981611251831, "learning_rate": 1.4445394636708953e-05, "loss": 0.5883, "step": 18315 }, { "epoch": 2.9900004081466065, "grad_norm": 3.0149879455566406, "learning_rate": 1.4444820319259612e-05, "loss": 0.5026, "step": 18316 }, { "epoch": 2.9901636667891105, "grad_norm": 2.7843291759490967, "learning_rate": 1.4444245983539289e-05, "loss": 0.5448, "step": 18317 }, { "epoch": 2.990326925431615, "grad_norm": 3.2400906085968018, "learning_rate": 1.444367162955035e-05, "loss": 0.6846, "step": 18318 }, { "epoch": 2.9904901840741194, "grad_norm": 2.9180526733398438, "learning_rate": 1.4443097257295154e-05, "loss": 0.656, "step": 18319 }, { "epoch": 2.990653442716624, "grad_norm": 2.855130672454834, "learning_rate": 1.4442522866776063e-05, "loss": 0.6475, "step": 18320 }, { "epoch": 2.9908167013591282, "grad_norm": 2.9572157859802246, "learning_rate": 1.4441948457995439e-05, "loss": 0.5908, "step": 18321 }, { "epoch": 2.9909799600016327, "grad_norm": 2.8757030963897705, "learning_rate": 1.444137403095564e-05, "loss": 0.5825, "step": 18322 }, { "epoch": 2.991143218644137, "grad_norm": 2.8855221271514893, "learning_rate": 1.4440799585659032e-05, "loss": 0.6521, "step": 18323 }, { "epoch": 2.991306477286641, "grad_norm": 2.76009464263916, "learning_rate": 1.4440225122107971e-05, "loss": 0.5853, "step": 18324 }, { "epoch": 2.9914697359291456, "grad_norm": 2.5738110542297363, "learning_rate": 1.4439650640304822e-05, "loss": 0.5146, "step": 18325 }, { "epoch": 2.99163299457165, "grad_norm": 3.327641725540161, "learning_rate": 1.4439076140251947e-05, "loss": 0.5708, "step": 18326 }, { "epoch": 2.9917962532141544, "grad_norm": 2.9020771980285645, "learning_rate": 1.4438501621951706e-05, "loss": 0.5551, "step": 18327 }, { "epoch": 2.991959511856659, "grad_norm": 2.6140449047088623, "learning_rate": 1.443792708540646e-05, "loss": 0.4933, "step": 18328 }, { "epoch": 2.9921227704991633, "grad_norm": 2.5445330142974854, "learning_rate": 1.443735253061857e-05, "loss": 0.4037, "step": 18329 }, { "epoch": 2.9922860291416677, "grad_norm": 2.5932235717773438, "learning_rate": 1.4436777957590402e-05, "loss": 0.5297, "step": 18330 }, { "epoch": 2.992449287784172, "grad_norm": 2.5617244243621826, "learning_rate": 1.4436203366324313e-05, "loss": 0.479, "step": 18331 }, { "epoch": 2.9926125464266766, "grad_norm": 2.7613790035247803, "learning_rate": 1.4435628756822669e-05, "loss": 0.5937, "step": 18332 }, { "epoch": 2.992775805069181, "grad_norm": 3.3358514308929443, "learning_rate": 1.4435054129087829e-05, "loss": 0.6163, "step": 18333 }, { "epoch": 2.9929390637116855, "grad_norm": 2.8197903633117676, "learning_rate": 1.4434479483122155e-05, "loss": 0.5294, "step": 18334 }, { "epoch": 2.9931023223541895, "grad_norm": 2.1631975173950195, "learning_rate": 1.4433904818928013e-05, "loss": 0.4403, "step": 18335 }, { "epoch": 2.993265580996694, "grad_norm": 2.5595412254333496, "learning_rate": 1.4433330136507762e-05, "loss": 0.5377, "step": 18336 }, { "epoch": 2.9934288396391984, "grad_norm": 2.5751588344573975, "learning_rate": 1.4432755435863761e-05, "loss": 0.4994, "step": 18337 }, { "epoch": 2.993592098281703, "grad_norm": 2.707798719406128, "learning_rate": 1.4432180716998381e-05, "loss": 0.4991, "step": 18338 }, { "epoch": 2.9937553569242072, "grad_norm": 3.0661399364471436, "learning_rate": 1.4431605979913976e-05, "loss": 0.5183, "step": 18339 }, { "epoch": 2.9939186155667117, "grad_norm": 2.631415605545044, "learning_rate": 1.4431031224612915e-05, "loss": 0.5288, "step": 18340 }, { "epoch": 2.9940818742092157, "grad_norm": 2.896651268005371, "learning_rate": 1.4430456451097557e-05, "loss": 0.5577, "step": 18341 }, { "epoch": 2.99424513285172, "grad_norm": 3.1644701957702637, "learning_rate": 1.4429881659370265e-05, "loss": 0.5996, "step": 18342 }, { "epoch": 2.9944083914942246, "grad_norm": 2.591771364212036, "learning_rate": 1.44293068494334e-05, "loss": 0.5371, "step": 18343 }, { "epoch": 2.994571650136729, "grad_norm": 3.0863096714019775, "learning_rate": 1.4428732021289328e-05, "loss": 0.5833, "step": 18344 }, { "epoch": 2.9947349087792334, "grad_norm": 3.278611183166504, "learning_rate": 1.4428157174940414e-05, "loss": 0.6646, "step": 18345 }, { "epoch": 2.994898167421738, "grad_norm": 3.142956018447876, "learning_rate": 1.442758231038902e-05, "loss": 0.5761, "step": 18346 }, { "epoch": 2.9950614260642423, "grad_norm": 2.8431951999664307, "learning_rate": 1.44270074276375e-05, "loss": 0.5494, "step": 18347 }, { "epoch": 2.9952246847067467, "grad_norm": 2.751932382583618, "learning_rate": 1.4426432526688226e-05, "loss": 0.5116, "step": 18348 }, { "epoch": 2.995387943349251, "grad_norm": 3.277144193649292, "learning_rate": 1.442585760754356e-05, "loss": 0.4939, "step": 18349 }, { "epoch": 2.9955512019917556, "grad_norm": 2.6372742652893066, "learning_rate": 1.4425282670205864e-05, "loss": 0.5661, "step": 18350 }, { "epoch": 2.99571446063426, "grad_norm": 2.794126272201538, "learning_rate": 1.4424707714677507e-05, "loss": 0.5545, "step": 18351 }, { "epoch": 2.995877719276764, "grad_norm": 3.8945391178131104, "learning_rate": 1.4424132740960841e-05, "loss": 0.7009, "step": 18352 }, { "epoch": 2.9960409779192685, "grad_norm": 2.584380626678467, "learning_rate": 1.442355774905824e-05, "loss": 0.5221, "step": 18353 }, { "epoch": 2.996204236561773, "grad_norm": 2.466724157333374, "learning_rate": 1.4422982738972058e-05, "loss": 0.4421, "step": 18354 }, { "epoch": 2.9963674952042774, "grad_norm": 3.3935446739196777, "learning_rate": 1.4422407710704668e-05, "loss": 0.6652, "step": 18355 }, { "epoch": 2.996530753846782, "grad_norm": 3.005819797515869, "learning_rate": 1.442183266425843e-05, "loss": 0.5211, "step": 18356 }, { "epoch": 2.9966940124892862, "grad_norm": 3.195248603820801, "learning_rate": 1.4421257599635707e-05, "loss": 0.5484, "step": 18357 }, { "epoch": 2.9968572711317907, "grad_norm": 2.5187506675720215, "learning_rate": 1.4420682516838862e-05, "loss": 0.5299, "step": 18358 }, { "epoch": 2.9970205297742947, "grad_norm": 2.8046939373016357, "learning_rate": 1.4420107415870263e-05, "loss": 0.487, "step": 18359 }, { "epoch": 2.997183788416799, "grad_norm": 2.7437944412231445, "learning_rate": 1.4419532296732271e-05, "loss": 0.5254, "step": 18360 }, { "epoch": 2.9973470470593035, "grad_norm": 3.0159692764282227, "learning_rate": 1.441895715942725e-05, "loss": 0.5294, "step": 18361 }, { "epoch": 2.997510305701808, "grad_norm": 3.01320481300354, "learning_rate": 1.4418382003957563e-05, "loss": 0.6048, "step": 18362 }, { "epoch": 2.9976735643443124, "grad_norm": 2.976144790649414, "learning_rate": 1.4417806830325576e-05, "loss": 0.549, "step": 18363 }, { "epoch": 2.997836822986817, "grad_norm": 2.960007905960083, "learning_rate": 1.4417231638533656e-05, "loss": 0.6012, "step": 18364 }, { "epoch": 2.9980000816293213, "grad_norm": 2.6634521484375, "learning_rate": 1.4416656428584162e-05, "loss": 0.4918, "step": 18365 }, { "epoch": 2.9981633402718257, "grad_norm": 3.5252342224121094, "learning_rate": 1.4416081200479462e-05, "loss": 0.6321, "step": 18366 }, { "epoch": 2.99832659891433, "grad_norm": 2.7728757858276367, "learning_rate": 1.441550595422192e-05, "loss": 0.5397, "step": 18367 }, { "epoch": 2.9984898575568346, "grad_norm": 2.9252750873565674, "learning_rate": 1.44149306898139e-05, "loss": 0.6092, "step": 18368 }, { "epoch": 2.998653116199339, "grad_norm": 2.972339630126953, "learning_rate": 1.4414355407257765e-05, "loss": 0.5402, "step": 18369 }, { "epoch": 2.998816374841843, "grad_norm": 2.683494806289673, "learning_rate": 1.4413780106555885e-05, "loss": 0.4814, "step": 18370 }, { "epoch": 2.9989796334843475, "grad_norm": 2.786654472351074, "learning_rate": 1.4413204787710623e-05, "loss": 0.504, "step": 18371 }, { "epoch": 2.999142892126852, "grad_norm": 2.447704315185547, "learning_rate": 1.4412629450724338e-05, "loss": 0.4934, "step": 18372 }, { "epoch": 2.9993061507693564, "grad_norm": 2.9457597732543945, "learning_rate": 1.44120540955994e-05, "loss": 0.5153, "step": 18373 }, { "epoch": 2.999469409411861, "grad_norm": 3.0695717334747314, "learning_rate": 1.4411478722338177e-05, "loss": 0.6704, "step": 18374 }, { "epoch": 2.9996326680543652, "grad_norm": 3.1816298961639404, "learning_rate": 1.441090333094303e-05, "loss": 0.5625, "step": 18375 }, { "epoch": 2.9997959266968697, "grad_norm": 2.6105353832244873, "learning_rate": 1.4410327921416323e-05, "loss": 0.477, "step": 18376 }, { "epoch": 2.9999591853393737, "grad_norm": 2.6851084232330322, "learning_rate": 1.4409752493760422e-05, "loss": 0.541, "step": 18377 }, { "epoch": 3.0, "grad_norm": 8.269643783569336, "learning_rate": 1.4409177047977696e-05, "loss": 1.0424, "step": 18378 }, { "epoch": 3.0001632586425044, "grad_norm": 2.756442070007324, "learning_rate": 1.4408601584070507e-05, "loss": 0.7409, "step": 18379 }, { "epoch": 3.000326517285009, "grad_norm": 1.8388828039169312, "learning_rate": 1.4408026102041223e-05, "loss": 0.3788, "step": 18380 }, { "epoch": 3.0004897759275133, "grad_norm": 2.5175514221191406, "learning_rate": 1.4407450601892206e-05, "loss": 0.4216, "step": 18381 }, { "epoch": 3.0006530345700178, "grad_norm": 2.7021021842956543, "learning_rate": 1.4406875083625824e-05, "loss": 0.5917, "step": 18382 }, { "epoch": 3.0008162932125217, "grad_norm": 2.5709753036499023, "learning_rate": 1.4406299547244445e-05, "loss": 0.4716, "step": 18383 }, { "epoch": 3.000979551855026, "grad_norm": 2.1264779567718506, "learning_rate": 1.440572399275043e-05, "loss": 0.4406, "step": 18384 }, { "epoch": 3.0011428104975306, "grad_norm": 1.9198012351989746, "learning_rate": 1.4405148420146152e-05, "loss": 0.3795, "step": 18385 }, { "epoch": 3.001306069140035, "grad_norm": 2.4978201389312744, "learning_rate": 1.4404572829433967e-05, "loss": 0.4862, "step": 18386 }, { "epoch": 3.0014693277825395, "grad_norm": 2.678196430206299, "learning_rate": 1.4403997220616246e-05, "loss": 0.5362, "step": 18387 }, { "epoch": 3.001632586425044, "grad_norm": 2.546809673309326, "learning_rate": 1.4403421593695356e-05, "loss": 0.5701, "step": 18388 }, { "epoch": 3.0017958450675484, "grad_norm": 2.4543635845184326, "learning_rate": 1.4402845948673665e-05, "loss": 0.4607, "step": 18389 }, { "epoch": 3.001959103710053, "grad_norm": 2.5442163944244385, "learning_rate": 1.4402270285553537e-05, "loss": 0.4696, "step": 18390 }, { "epoch": 3.0021223623525572, "grad_norm": 2.824497699737549, "learning_rate": 1.4401694604337335e-05, "loss": 0.5228, "step": 18391 }, { "epoch": 3.0022856209950612, "grad_norm": 2.448873996734619, "learning_rate": 1.4401118905027429e-05, "loss": 0.4342, "step": 18392 }, { "epoch": 3.0024488796375657, "grad_norm": 2.8336222171783447, "learning_rate": 1.4400543187626187e-05, "loss": 0.4769, "step": 18393 }, { "epoch": 3.00261213828007, "grad_norm": 3.004315137863159, "learning_rate": 1.439996745213597e-05, "loss": 0.4829, "step": 18394 }, { "epoch": 3.0027753969225746, "grad_norm": 2.7420473098754883, "learning_rate": 1.4399391698559153e-05, "loss": 0.4779, "step": 18395 }, { "epoch": 3.002938655565079, "grad_norm": 3.2564499378204346, "learning_rate": 1.4398815926898096e-05, "loss": 1.0359, "step": 18396 }, { "epoch": 3.0031019142075834, "grad_norm": 2.8590478897094727, "learning_rate": 1.4398240137155168e-05, "loss": 0.4822, "step": 18397 }, { "epoch": 3.003265172850088, "grad_norm": 2.5533759593963623, "learning_rate": 1.4397664329332733e-05, "loss": 0.4618, "step": 18398 }, { "epoch": 3.0034284314925923, "grad_norm": 2.2358875274658203, "learning_rate": 1.4397088503433163e-05, "loss": 0.3873, "step": 18399 }, { "epoch": 3.0035916901350967, "grad_norm": 2.625086545944214, "learning_rate": 1.4396512659458824e-05, "loss": 0.478, "step": 18400 }, { "epoch": 3.0037549487776007, "grad_norm": 3.3507838249206543, "learning_rate": 1.4395936797412077e-05, "loss": 0.6201, "step": 18401 }, { "epoch": 3.003918207420105, "grad_norm": 2.9158787727355957, "learning_rate": 1.4395360917295298e-05, "loss": 0.4926, "step": 18402 }, { "epoch": 3.0040814660626096, "grad_norm": 2.3158297538757324, "learning_rate": 1.4394785019110846e-05, "loss": 0.3723, "step": 18403 }, { "epoch": 3.004244724705114, "grad_norm": 2.5024025440216064, "learning_rate": 1.4394209102861094e-05, "loss": 0.4155, "step": 18404 }, { "epoch": 3.0044079833476185, "grad_norm": 2.9364356994628906, "learning_rate": 1.4393633168548409e-05, "loss": 0.5284, "step": 18405 }, { "epoch": 3.004571241990123, "grad_norm": 2.522639036178589, "learning_rate": 1.4393057216175156e-05, "loss": 0.3901, "step": 18406 }, { "epoch": 3.0047345006326274, "grad_norm": 2.1771178245544434, "learning_rate": 1.4392481245743704e-05, "loss": 0.359, "step": 18407 }, { "epoch": 3.004897759275132, "grad_norm": 3.0298657417297363, "learning_rate": 1.4391905257256417e-05, "loss": 0.4415, "step": 18408 }, { "epoch": 3.005061017917636, "grad_norm": 3.3834211826324463, "learning_rate": 1.4391329250715669e-05, "loss": 0.5221, "step": 18409 }, { "epoch": 3.0052242765601402, "grad_norm": 2.6805102825164795, "learning_rate": 1.4390753226123825e-05, "loss": 0.4638, "step": 18410 }, { "epoch": 3.0053875352026447, "grad_norm": 3.208784341812134, "learning_rate": 1.439017718348325e-05, "loss": 0.4623, "step": 18411 }, { "epoch": 3.005550793845149, "grad_norm": 2.885582685470581, "learning_rate": 1.4389601122796318e-05, "loss": 0.43, "step": 18412 }, { "epoch": 3.0057140524876536, "grad_norm": 3.224773645401001, "learning_rate": 1.438902504406539e-05, "loss": 0.4691, "step": 18413 }, { "epoch": 3.005877311130158, "grad_norm": 3.223954200744629, "learning_rate": 1.438844894729284e-05, "loss": 0.4901, "step": 18414 }, { "epoch": 3.0060405697726624, "grad_norm": 2.812333106994629, "learning_rate": 1.438787283248103e-05, "loss": 0.4318, "step": 18415 }, { "epoch": 3.006203828415167, "grad_norm": 2.480283260345459, "learning_rate": 1.4387296699632334e-05, "loss": 0.3595, "step": 18416 }, { "epoch": 3.0063670870576713, "grad_norm": 2.507176399230957, "learning_rate": 1.4386720548749118e-05, "loss": 0.3922, "step": 18417 }, { "epoch": 3.0065303457001753, "grad_norm": 3.3996143341064453, "learning_rate": 1.438614437983375e-05, "loss": 0.4886, "step": 18418 }, { "epoch": 3.0066936043426797, "grad_norm": 2.6322829723358154, "learning_rate": 1.4385568192888597e-05, "loss": 0.3762, "step": 18419 }, { "epoch": 3.006856862985184, "grad_norm": 3.1345415115356445, "learning_rate": 1.438499198791603e-05, "loss": 0.4018, "step": 18420 }, { "epoch": 3.0070201216276886, "grad_norm": 3.853881359100342, "learning_rate": 1.4384415764918416e-05, "loss": 0.6382, "step": 18421 }, { "epoch": 3.007183380270193, "grad_norm": 3.730912685394287, "learning_rate": 1.4383839523898126e-05, "loss": 0.4851, "step": 18422 }, { "epoch": 3.0073466389126975, "grad_norm": 3.361684560775757, "learning_rate": 1.4383263264857525e-05, "loss": 0.5014, "step": 18423 }, { "epoch": 3.007509897555202, "grad_norm": 3.2259883880615234, "learning_rate": 1.4382686987798988e-05, "loss": 0.4802, "step": 18424 }, { "epoch": 3.0076731561977064, "grad_norm": 3.103200674057007, "learning_rate": 1.4382110692724876e-05, "loss": 0.4578, "step": 18425 }, { "epoch": 3.007836414840211, "grad_norm": 2.9160096645355225, "learning_rate": 1.438153437963756e-05, "loss": 0.546, "step": 18426 }, { "epoch": 3.007999673482715, "grad_norm": 2.536060094833374, "learning_rate": 1.4380958048539415e-05, "loss": 0.4014, "step": 18427 }, { "epoch": 3.0081629321252192, "grad_norm": 3.099653482437134, "learning_rate": 1.4380381699432801e-05, "loss": 0.5359, "step": 18428 }, { "epoch": 3.0083261907677237, "grad_norm": 3.8034863471984863, "learning_rate": 1.4379805332320098e-05, "loss": 0.5916, "step": 18429 }, { "epoch": 3.008489449410228, "grad_norm": 3.702253818511963, "learning_rate": 1.4379228947203664e-05, "loss": 0.488, "step": 18430 }, { "epoch": 3.0086527080527325, "grad_norm": 3.4092447757720947, "learning_rate": 1.4378652544085873e-05, "loss": 0.5127, "step": 18431 }, { "epoch": 3.008815966695237, "grad_norm": 2.844926595687866, "learning_rate": 1.4378076122969097e-05, "loss": 0.4476, "step": 18432 }, { "epoch": 3.0089792253377414, "grad_norm": 2.8263416290283203, "learning_rate": 1.4377499683855704e-05, "loss": 0.4211, "step": 18433 }, { "epoch": 3.009142483980246, "grad_norm": 3.806546688079834, "learning_rate": 1.4376923226748063e-05, "loss": 0.4713, "step": 18434 }, { "epoch": 3.0093057426227503, "grad_norm": 2.8224549293518066, "learning_rate": 1.4376346751648542e-05, "loss": 0.4077, "step": 18435 }, { "epoch": 3.0094690012652543, "grad_norm": 2.7343590259552, "learning_rate": 1.437577025855951e-05, "loss": 0.4241, "step": 18436 }, { "epoch": 3.0096322599077587, "grad_norm": 3.089855909347534, "learning_rate": 1.437519374748334e-05, "loss": 0.4014, "step": 18437 }, { "epoch": 3.009795518550263, "grad_norm": 3.143664836883545, "learning_rate": 1.4374617218422402e-05, "loss": 0.4604, "step": 18438 }, { "epoch": 3.0099587771927676, "grad_norm": 2.834413766860962, "learning_rate": 1.4374040671379066e-05, "loss": 0.4732, "step": 18439 }, { "epoch": 3.010122035835272, "grad_norm": 3.2388274669647217, "learning_rate": 1.4373464106355697e-05, "loss": 0.4826, "step": 18440 }, { "epoch": 3.0102852944777765, "grad_norm": 3.5034265518188477, "learning_rate": 1.437288752335467e-05, "loss": 0.531, "step": 18441 }, { "epoch": 3.010448553120281, "grad_norm": 3.67688250541687, "learning_rate": 1.437231092237835e-05, "loss": 0.5668, "step": 18442 }, { "epoch": 3.0106118117627854, "grad_norm": 3.2537131309509277, "learning_rate": 1.4371734303429114e-05, "loss": 0.4801, "step": 18443 }, { "epoch": 3.01077507040529, "grad_norm": 3.0109479427337646, "learning_rate": 1.437115766650933e-05, "loss": 0.4548, "step": 18444 }, { "epoch": 3.010938329047794, "grad_norm": 3.1633870601654053, "learning_rate": 1.4370581011621367e-05, "loss": 0.4299, "step": 18445 }, { "epoch": 3.0111015876902982, "grad_norm": 2.862190008163452, "learning_rate": 1.4370004338767596e-05, "loss": 0.4375, "step": 18446 }, { "epoch": 3.0112648463328027, "grad_norm": 3.263921022415161, "learning_rate": 1.4369427647950385e-05, "loss": 0.5381, "step": 18447 }, { "epoch": 3.011428104975307, "grad_norm": 3.0241541862487793, "learning_rate": 1.4368850939172108e-05, "loss": 0.4712, "step": 18448 }, { "epoch": 3.0115913636178115, "grad_norm": 2.7553892135620117, "learning_rate": 1.4368274212435135e-05, "loss": 0.4452, "step": 18449 }, { "epoch": 3.011754622260316, "grad_norm": 3.452253580093384, "learning_rate": 1.4367697467741834e-05, "loss": 0.4482, "step": 18450 }, { "epoch": 3.0119178809028204, "grad_norm": 3.009993314743042, "learning_rate": 1.4367120705094579e-05, "loss": 0.4413, "step": 18451 }, { "epoch": 3.012081139545325, "grad_norm": 3.1177923679351807, "learning_rate": 1.4366543924495741e-05, "loss": 0.488, "step": 18452 }, { "epoch": 3.012244398187829, "grad_norm": 2.725809097290039, "learning_rate": 1.4365967125947688e-05, "loss": 0.4212, "step": 18453 }, { "epoch": 3.0124076568303333, "grad_norm": 3.059879779815674, "learning_rate": 1.4365390309452795e-05, "loss": 0.4518, "step": 18454 }, { "epoch": 3.0125709154728377, "grad_norm": 3.1150853633880615, "learning_rate": 1.436481347501343e-05, "loss": 0.5512, "step": 18455 }, { "epoch": 3.012734174115342, "grad_norm": 3.1541521549224854, "learning_rate": 1.4364236622631963e-05, "loss": 0.4926, "step": 18456 }, { "epoch": 3.0128974327578466, "grad_norm": 2.843421220779419, "learning_rate": 1.436365975231077e-05, "loss": 0.4664, "step": 18457 }, { "epoch": 3.013060691400351, "grad_norm": 2.675002098083496, "learning_rate": 1.4363082864052218e-05, "loss": 0.4556, "step": 18458 }, { "epoch": 3.0132239500428555, "grad_norm": 3.5133907794952393, "learning_rate": 1.436250595785868e-05, "loss": 0.5534, "step": 18459 }, { "epoch": 3.01338720868536, "grad_norm": 3.057199478149414, "learning_rate": 1.4361929033732526e-05, "loss": 0.6057, "step": 18460 }, { "epoch": 3.0135504673278644, "grad_norm": 2.52078914642334, "learning_rate": 1.436135209167613e-05, "loss": 0.4382, "step": 18461 }, { "epoch": 3.0137137259703684, "grad_norm": 2.9169061183929443, "learning_rate": 1.4360775131691863e-05, "loss": 0.4844, "step": 18462 }, { "epoch": 3.013876984612873, "grad_norm": 3.226961612701416, "learning_rate": 1.4360198153782094e-05, "loss": 0.5249, "step": 18463 }, { "epoch": 3.0140402432553772, "grad_norm": 3.176288604736328, "learning_rate": 1.43596211579492e-05, "loss": 0.4825, "step": 18464 }, { "epoch": 3.0142035018978817, "grad_norm": 3.2679903507232666, "learning_rate": 1.4359044144195546e-05, "loss": 0.4829, "step": 18465 }, { "epoch": 3.014366760540386, "grad_norm": 3.7087979316711426, "learning_rate": 1.4358467112523509e-05, "loss": 0.4859, "step": 18466 }, { "epoch": 3.0145300191828905, "grad_norm": 3.5379457473754883, "learning_rate": 1.435789006293546e-05, "loss": 0.5085, "step": 18467 }, { "epoch": 3.014693277825395, "grad_norm": 2.748126268386841, "learning_rate": 1.4357312995433772e-05, "loss": 0.4621, "step": 18468 }, { "epoch": 3.0148565364678994, "grad_norm": 2.9531311988830566, "learning_rate": 1.4356735910020811e-05, "loss": 0.4676, "step": 18469 }, { "epoch": 3.015019795110404, "grad_norm": 2.6726067066192627, "learning_rate": 1.4356158806698956e-05, "loss": 0.4161, "step": 18470 }, { "epoch": 3.015183053752908, "grad_norm": 2.606337547302246, "learning_rate": 1.4355581685470576e-05, "loss": 0.4659, "step": 18471 }, { "epoch": 3.0153463123954123, "grad_norm": 2.686002731323242, "learning_rate": 1.4355004546338047e-05, "loss": 0.4171, "step": 18472 }, { "epoch": 3.0155095710379167, "grad_norm": 3.332625150680542, "learning_rate": 1.435442738930374e-05, "loss": 0.4868, "step": 18473 }, { "epoch": 3.015672829680421, "grad_norm": 2.789293050765991, "learning_rate": 1.4353850214370022e-05, "loss": 0.4425, "step": 18474 }, { "epoch": 3.0158360883229256, "grad_norm": 2.757478713989258, "learning_rate": 1.4353273021539268e-05, "loss": 0.4578, "step": 18475 }, { "epoch": 3.01599934696543, "grad_norm": 3.663492202758789, "learning_rate": 1.4352695810813858e-05, "loss": 0.4789, "step": 18476 }, { "epoch": 3.0161626056079345, "grad_norm": 2.768453598022461, "learning_rate": 1.4352118582196157e-05, "loss": 0.4551, "step": 18477 }, { "epoch": 3.016325864250439, "grad_norm": 2.7719995975494385, "learning_rate": 1.435154133568854e-05, "loss": 0.4344, "step": 18478 }, { "epoch": 3.0164891228929434, "grad_norm": 2.744828701019287, "learning_rate": 1.4350964071293381e-05, "loss": 0.4662, "step": 18479 }, { "epoch": 3.0166523815354473, "grad_norm": 3.188570261001587, "learning_rate": 1.4350386789013046e-05, "loss": 0.4929, "step": 18480 }, { "epoch": 3.016815640177952, "grad_norm": 2.996187925338745, "learning_rate": 1.434980948884992e-05, "loss": 0.4926, "step": 18481 }, { "epoch": 3.0169788988204562, "grad_norm": 3.2865793704986572, "learning_rate": 1.4349232170806366e-05, "loss": 0.4608, "step": 18482 }, { "epoch": 3.0171421574629607, "grad_norm": 2.9813766479492188, "learning_rate": 1.4348654834884763e-05, "loss": 0.4098, "step": 18483 }, { "epoch": 3.017305416105465, "grad_norm": 2.75622296333313, "learning_rate": 1.4348077481087481e-05, "loss": 0.537, "step": 18484 }, { "epoch": 3.0174686747479695, "grad_norm": 3.0954325199127197, "learning_rate": 1.4347500109416895e-05, "loss": 0.5209, "step": 18485 }, { "epoch": 3.017631933390474, "grad_norm": 3.3004486560821533, "learning_rate": 1.4346922719875374e-05, "loss": 0.5228, "step": 18486 }, { "epoch": 3.0177951920329784, "grad_norm": 2.989816904067993, "learning_rate": 1.4346345312465297e-05, "loss": 0.4209, "step": 18487 }, { "epoch": 3.017958450675483, "grad_norm": 3.1326308250427246, "learning_rate": 1.434576788718904e-05, "loss": 0.4451, "step": 18488 }, { "epoch": 3.018121709317987, "grad_norm": 3.28281307220459, "learning_rate": 1.434519044404897e-05, "loss": 0.5243, "step": 18489 }, { "epoch": 3.0182849679604913, "grad_norm": 2.502962827682495, "learning_rate": 1.434461298304746e-05, "loss": 0.4218, "step": 18490 }, { "epoch": 3.0184482266029957, "grad_norm": 2.9058284759521484, "learning_rate": 1.4344035504186886e-05, "loss": 0.4793, "step": 18491 }, { "epoch": 3.0186114852455, "grad_norm": 3.009641170501709, "learning_rate": 1.4343458007469625e-05, "loss": 0.4486, "step": 18492 }, { "epoch": 3.0187747438880046, "grad_norm": 3.295853614807129, "learning_rate": 1.4342880492898048e-05, "loss": 0.4949, "step": 18493 }, { "epoch": 3.018938002530509, "grad_norm": 2.627140522003174, "learning_rate": 1.4342302960474529e-05, "loss": 0.4154, "step": 18494 }, { "epoch": 3.0191012611730135, "grad_norm": 2.8662679195404053, "learning_rate": 1.4341725410201441e-05, "loss": 0.4886, "step": 18495 }, { "epoch": 3.019264519815518, "grad_norm": 2.8275158405303955, "learning_rate": 1.434114784208116e-05, "loss": 0.4885, "step": 18496 }, { "epoch": 3.019427778458022, "grad_norm": 3.37359356880188, "learning_rate": 1.434057025611606e-05, "loss": 0.473, "step": 18497 }, { "epoch": 3.0195910371005263, "grad_norm": 2.6936545372009277, "learning_rate": 1.4339992652308514e-05, "loss": 0.3788, "step": 18498 }, { "epoch": 3.019754295743031, "grad_norm": 3.350930690765381, "learning_rate": 1.4339415030660897e-05, "loss": 0.4779, "step": 18499 }, { "epoch": 3.019917554385535, "grad_norm": 3.1402857303619385, "learning_rate": 1.4338837391175582e-05, "loss": 0.5292, "step": 18500 }, { "epoch": 3.0200808130280397, "grad_norm": 3.3302969932556152, "learning_rate": 1.4338259733854947e-05, "loss": 0.5271, "step": 18501 }, { "epoch": 3.020244071670544, "grad_norm": 3.4707934856414795, "learning_rate": 1.4337682058701363e-05, "loss": 0.5037, "step": 18502 }, { "epoch": 3.0204073303130485, "grad_norm": 2.6188971996307373, "learning_rate": 1.4337104365717206e-05, "loss": 0.4105, "step": 18503 }, { "epoch": 3.020570588955553, "grad_norm": 3.0903193950653076, "learning_rate": 1.4336526654904852e-05, "loss": 0.5034, "step": 18504 }, { "epoch": 3.0207338475980574, "grad_norm": 2.686506986618042, "learning_rate": 1.4335948926266671e-05, "loss": 0.4395, "step": 18505 }, { "epoch": 3.0208971062405614, "grad_norm": 3.0256879329681396, "learning_rate": 1.4335371179805044e-05, "loss": 0.4968, "step": 18506 }, { "epoch": 3.021060364883066, "grad_norm": 3.269939661026001, "learning_rate": 1.4334793415522344e-05, "loss": 0.5235, "step": 18507 }, { "epoch": 3.0212236235255703, "grad_norm": 3.233807325363159, "learning_rate": 1.4334215633420945e-05, "loss": 0.3773, "step": 18508 }, { "epoch": 3.0213868821680747, "grad_norm": 4.191882133483887, "learning_rate": 1.433363783350322e-05, "loss": 0.4727, "step": 18509 }, { "epoch": 3.021550140810579, "grad_norm": 2.8718864917755127, "learning_rate": 1.4333060015771547e-05, "loss": 0.4061, "step": 18510 }, { "epoch": 3.0217133994530836, "grad_norm": 3.300894260406494, "learning_rate": 1.43324821802283e-05, "loss": 0.6485, "step": 18511 }, { "epoch": 3.021876658095588, "grad_norm": 2.6751210689544678, "learning_rate": 1.4331904326875856e-05, "loss": 0.4726, "step": 18512 }, { "epoch": 3.0220399167380925, "grad_norm": 3.077378034591675, "learning_rate": 1.4331326455716588e-05, "loss": 0.5362, "step": 18513 }, { "epoch": 3.022203175380597, "grad_norm": 3.933915376663208, "learning_rate": 1.4330748566752872e-05, "loss": 0.6306, "step": 18514 }, { "epoch": 3.022366434023101, "grad_norm": 2.9142942428588867, "learning_rate": 1.4330170659987083e-05, "loss": 0.4388, "step": 18515 }, { "epoch": 3.0225296926656053, "grad_norm": 3.0568835735321045, "learning_rate": 1.43295927354216e-05, "loss": 0.5061, "step": 18516 }, { "epoch": 3.0226929513081098, "grad_norm": 3.1178059577941895, "learning_rate": 1.4329014793058798e-05, "loss": 0.4466, "step": 18517 }, { "epoch": 3.022856209950614, "grad_norm": 3.1585919857025146, "learning_rate": 1.4328436832901046e-05, "loss": 0.4811, "step": 18518 }, { "epoch": 3.0230194685931187, "grad_norm": 2.5723001956939697, "learning_rate": 1.4327858854950724e-05, "loss": 0.3903, "step": 18519 }, { "epoch": 3.023182727235623, "grad_norm": 3.3165831565856934, "learning_rate": 1.4327280859210213e-05, "loss": 0.417, "step": 18520 }, { "epoch": 3.0233459858781275, "grad_norm": 2.306947708129883, "learning_rate": 1.432670284568188e-05, "loss": 0.3582, "step": 18521 }, { "epoch": 3.023509244520632, "grad_norm": 3.0062594413757324, "learning_rate": 1.4326124814368112e-05, "loss": 0.4513, "step": 18522 }, { "epoch": 3.0236725031631364, "grad_norm": 2.6749258041381836, "learning_rate": 1.4325546765271274e-05, "loss": 0.4289, "step": 18523 }, { "epoch": 3.0238357618056404, "grad_norm": 3.1458897590637207, "learning_rate": 1.4324968698393744e-05, "loss": 0.4296, "step": 18524 }, { "epoch": 3.023999020448145, "grad_norm": 3.0763165950775146, "learning_rate": 1.4324390613737902e-05, "loss": 0.5102, "step": 18525 }, { "epoch": 3.0241622790906493, "grad_norm": 3.2683534622192383, "learning_rate": 1.4323812511306125e-05, "loss": 0.491, "step": 18526 }, { "epoch": 3.0243255377331537, "grad_norm": 3.2741494178771973, "learning_rate": 1.4323234391100789e-05, "loss": 0.4862, "step": 18527 }, { "epoch": 3.024488796375658, "grad_norm": 2.9853880405426025, "learning_rate": 1.4322656253124265e-05, "loss": 0.4821, "step": 18528 }, { "epoch": 3.0246520550181626, "grad_norm": 2.868931770324707, "learning_rate": 1.4322078097378936e-05, "loss": 0.4836, "step": 18529 }, { "epoch": 3.024815313660667, "grad_norm": 2.6265900135040283, "learning_rate": 1.4321499923867173e-05, "loss": 0.4859, "step": 18530 }, { "epoch": 3.0249785723031715, "grad_norm": 3.104081392288208, "learning_rate": 1.4320921732591357e-05, "loss": 0.4416, "step": 18531 }, { "epoch": 3.025141830945676, "grad_norm": 3.0605037212371826, "learning_rate": 1.4320343523553865e-05, "loss": 0.418, "step": 18532 }, { "epoch": 3.02530508958818, "grad_norm": 3.525202512741089, "learning_rate": 1.431976529675707e-05, "loss": 0.5091, "step": 18533 }, { "epoch": 3.0254683482306843, "grad_norm": 3.3540852069854736, "learning_rate": 1.4319187052203353e-05, "loss": 0.5871, "step": 18534 }, { "epoch": 3.0256316068731888, "grad_norm": 3.187793493270874, "learning_rate": 1.4318608789895087e-05, "loss": 0.5097, "step": 18535 }, { "epoch": 3.025794865515693, "grad_norm": 2.7182531356811523, "learning_rate": 1.4318030509834653e-05, "loss": 0.5263, "step": 18536 }, { "epoch": 3.0259581241581976, "grad_norm": 3.0752058029174805, "learning_rate": 1.4317452212024425e-05, "loss": 0.4676, "step": 18537 }, { "epoch": 3.026121382800702, "grad_norm": 2.673119306564331, "learning_rate": 1.431687389646678e-05, "loss": 0.4171, "step": 18538 }, { "epoch": 3.0262846414432065, "grad_norm": 3.5093719959259033, "learning_rate": 1.4316295563164097e-05, "loss": 0.525, "step": 18539 }, { "epoch": 3.026447900085711, "grad_norm": 3.041940212249756, "learning_rate": 1.4315717212118753e-05, "loss": 0.4329, "step": 18540 }, { "epoch": 3.026611158728215, "grad_norm": 2.9920878410339355, "learning_rate": 1.4315138843333124e-05, "loss": 0.4544, "step": 18541 }, { "epoch": 3.0267744173707194, "grad_norm": 2.852386236190796, "learning_rate": 1.4314560456809592e-05, "loss": 0.4182, "step": 18542 }, { "epoch": 3.026937676013224, "grad_norm": 3.4637539386749268, "learning_rate": 1.4313982052550528e-05, "loss": 0.5373, "step": 18543 }, { "epoch": 3.0271009346557283, "grad_norm": 3.0596706867218018, "learning_rate": 1.4313403630558313e-05, "loss": 0.4532, "step": 18544 }, { "epoch": 3.0272641932982327, "grad_norm": 2.9071834087371826, "learning_rate": 1.4312825190835327e-05, "loss": 0.4391, "step": 18545 }, { "epoch": 3.027427451940737, "grad_norm": 2.962116003036499, "learning_rate": 1.4312246733383944e-05, "loss": 0.4447, "step": 18546 }, { "epoch": 3.0275907105832416, "grad_norm": 3.0650763511657715, "learning_rate": 1.4311668258206538e-05, "loss": 0.5345, "step": 18547 }, { "epoch": 3.027753969225746, "grad_norm": 2.499274730682373, "learning_rate": 1.4311089765305499e-05, "loss": 0.3966, "step": 18548 }, { "epoch": 3.0279172278682505, "grad_norm": 2.805772542953491, "learning_rate": 1.4310511254683195e-05, "loss": 0.4781, "step": 18549 }, { "epoch": 3.0280804865107545, "grad_norm": 2.8303017616271973, "learning_rate": 1.4309932726342007e-05, "loss": 0.4623, "step": 18550 }, { "epoch": 3.028243745153259, "grad_norm": 3.273749589920044, "learning_rate": 1.4309354180284313e-05, "loss": 0.4671, "step": 18551 }, { "epoch": 3.0284070037957633, "grad_norm": 2.8202006816864014, "learning_rate": 1.430877561651249e-05, "loss": 0.4787, "step": 18552 }, { "epoch": 3.0285702624382678, "grad_norm": 2.6990408897399902, "learning_rate": 1.4308197035028917e-05, "loss": 0.43, "step": 18553 }, { "epoch": 3.028733521080772, "grad_norm": 3.1171698570251465, "learning_rate": 1.4307618435835972e-05, "loss": 0.4639, "step": 18554 }, { "epoch": 3.0288967797232766, "grad_norm": 3.474665641784668, "learning_rate": 1.4307039818936036e-05, "loss": 0.4932, "step": 18555 }, { "epoch": 3.029060038365781, "grad_norm": 3.320780038833618, "learning_rate": 1.4306461184331489e-05, "loss": 0.4674, "step": 18556 }, { "epoch": 3.0292232970082855, "grad_norm": 2.647963285446167, "learning_rate": 1.43058825320247e-05, "loss": 0.3755, "step": 18557 }, { "epoch": 3.02938655565079, "grad_norm": 2.973240613937378, "learning_rate": 1.4305303862018058e-05, "loss": 0.504, "step": 18558 }, { "epoch": 3.029549814293294, "grad_norm": 2.7616333961486816, "learning_rate": 1.4304725174313936e-05, "loss": 0.437, "step": 18559 }, { "epoch": 3.0297130729357984, "grad_norm": 2.6076197624206543, "learning_rate": 1.4304146468914716e-05, "loss": 0.4019, "step": 18560 }, { "epoch": 3.029876331578303, "grad_norm": 2.5024502277374268, "learning_rate": 1.4303567745822772e-05, "loss": 0.422, "step": 18561 }, { "epoch": 3.0300395902208073, "grad_norm": 3.0673625469207764, "learning_rate": 1.4302989005040488e-05, "loss": 0.436, "step": 18562 }, { "epoch": 3.0302028488633117, "grad_norm": 3.1867644786834717, "learning_rate": 1.4302410246570237e-05, "loss": 0.44, "step": 18563 }, { "epoch": 3.030366107505816, "grad_norm": 2.7617430686950684, "learning_rate": 1.4301831470414407e-05, "loss": 0.4581, "step": 18564 }, { "epoch": 3.0305293661483206, "grad_norm": 4.167940139770508, "learning_rate": 1.430125267657537e-05, "loss": 0.5689, "step": 18565 }, { "epoch": 3.030692624790825, "grad_norm": 3.2712721824645996, "learning_rate": 1.4300673865055509e-05, "loss": 0.4912, "step": 18566 }, { "epoch": 3.0308558834333295, "grad_norm": 3.3941433429718018, "learning_rate": 1.4300095035857202e-05, "loss": 0.5129, "step": 18567 }, { "epoch": 3.0310191420758335, "grad_norm": 3.295241355895996, "learning_rate": 1.4299516188982827e-05, "loss": 0.4674, "step": 18568 }, { "epoch": 3.031182400718338, "grad_norm": 3.6560399532318115, "learning_rate": 1.429893732443476e-05, "loss": 0.5159, "step": 18569 }, { "epoch": 3.0313456593608423, "grad_norm": 3.128523826599121, "learning_rate": 1.429835844221539e-05, "loss": 0.4749, "step": 18570 }, { "epoch": 3.0315089180033468, "grad_norm": 3.480262041091919, "learning_rate": 1.4297779542327093e-05, "loss": 0.5225, "step": 18571 }, { "epoch": 3.031672176645851, "grad_norm": 3.471021890640259, "learning_rate": 1.4297200624772245e-05, "loss": 0.4646, "step": 18572 }, { "epoch": 3.0318354352883556, "grad_norm": 3.605396270751953, "learning_rate": 1.4296621689553228e-05, "loss": 0.5263, "step": 18573 }, { "epoch": 3.03199869393086, "grad_norm": 3.2653350830078125, "learning_rate": 1.4296042736672418e-05, "loss": 0.5058, "step": 18574 }, { "epoch": 3.0321619525733645, "grad_norm": 3.6865313053131104, "learning_rate": 1.42954637661322e-05, "loss": 0.468, "step": 18575 }, { "epoch": 3.032325211215869, "grad_norm": 3.5028820037841797, "learning_rate": 1.4294884777934956e-05, "loss": 0.4931, "step": 18576 }, { "epoch": 3.032488469858373, "grad_norm": 2.8854458332061768, "learning_rate": 1.429430577208306e-05, "loss": 0.4164, "step": 18577 }, { "epoch": 3.0326517285008774, "grad_norm": 3.155578374862671, "learning_rate": 1.4293726748578895e-05, "loss": 0.4186, "step": 18578 }, { "epoch": 3.032814987143382, "grad_norm": 2.6589810848236084, "learning_rate": 1.4293147707424842e-05, "loss": 0.3913, "step": 18579 }, { "epoch": 3.0329782457858863, "grad_norm": 3.4007983207702637, "learning_rate": 1.4292568648623276e-05, "loss": 0.5716, "step": 18580 }, { "epoch": 3.0331415044283907, "grad_norm": 2.6341001987457275, "learning_rate": 1.4291989572176583e-05, "loss": 0.389, "step": 18581 }, { "epoch": 3.033304763070895, "grad_norm": 3.221397638320923, "learning_rate": 1.4291410478087143e-05, "loss": 0.3995, "step": 18582 }, { "epoch": 3.0334680217133996, "grad_norm": 3.5479354858398438, "learning_rate": 1.4290831366357334e-05, "loss": 0.5098, "step": 18583 }, { "epoch": 3.033631280355904, "grad_norm": 3.187361478805542, "learning_rate": 1.4290252236989537e-05, "loss": 0.4675, "step": 18584 }, { "epoch": 3.033794538998408, "grad_norm": 3.470165729522705, "learning_rate": 1.4289673089986132e-05, "loss": 0.5041, "step": 18585 }, { "epoch": 3.0339577976409124, "grad_norm": 2.9247822761535645, "learning_rate": 1.4289093925349503e-05, "loss": 0.3891, "step": 18586 }, { "epoch": 3.034121056283417, "grad_norm": 2.6580393314361572, "learning_rate": 1.4288514743082027e-05, "loss": 0.4212, "step": 18587 }, { "epoch": 3.0342843149259213, "grad_norm": 3.6789653301239014, "learning_rate": 1.4287935543186086e-05, "loss": 0.5771, "step": 18588 }, { "epoch": 3.0344475735684258, "grad_norm": 2.9322521686553955, "learning_rate": 1.428735632566406e-05, "loss": 0.44, "step": 18589 }, { "epoch": 3.03461083221093, "grad_norm": 2.9295825958251953, "learning_rate": 1.4286777090518334e-05, "loss": 0.4535, "step": 18590 }, { "epoch": 3.0347740908534346, "grad_norm": 3.480799913406372, "learning_rate": 1.4286197837751286e-05, "loss": 0.5, "step": 18591 }, { "epoch": 3.034937349495939, "grad_norm": 2.920318365097046, "learning_rate": 1.4285618567365296e-05, "loss": 0.4679, "step": 18592 }, { "epoch": 3.0351006081384435, "grad_norm": 2.9061455726623535, "learning_rate": 1.4285039279362745e-05, "loss": 0.4857, "step": 18593 }, { "epoch": 3.0352638667809475, "grad_norm": 3.821403980255127, "learning_rate": 1.4284459973746016e-05, "loss": 0.5302, "step": 18594 }, { "epoch": 3.035427125423452, "grad_norm": 3.18898344039917, "learning_rate": 1.4283880650517493e-05, "loss": 0.4483, "step": 18595 }, { "epoch": 3.0355903840659564, "grad_norm": 2.9608235359191895, "learning_rate": 1.4283301309679549e-05, "loss": 0.4573, "step": 18596 }, { "epoch": 3.035753642708461, "grad_norm": 3.2704482078552246, "learning_rate": 1.4282721951234573e-05, "loss": 0.5129, "step": 18597 }, { "epoch": 3.0359169013509653, "grad_norm": 2.9463937282562256, "learning_rate": 1.4282142575184945e-05, "loss": 0.4535, "step": 18598 }, { "epoch": 3.0360801599934697, "grad_norm": 3.4465417861938477, "learning_rate": 1.4281563181533045e-05, "loss": 0.5046, "step": 18599 }, { "epoch": 3.036243418635974, "grad_norm": 3.0719385147094727, "learning_rate": 1.4280983770281258e-05, "loss": 0.5442, "step": 18600 }, { "epoch": 3.0364066772784786, "grad_norm": 2.9026989936828613, "learning_rate": 1.428040434143196e-05, "loss": 0.4059, "step": 18601 }, { "epoch": 3.036569935920983, "grad_norm": 2.627426862716675, "learning_rate": 1.4279824894987535e-05, "loss": 0.44, "step": 18602 }, { "epoch": 3.036733194563487, "grad_norm": 2.7275962829589844, "learning_rate": 1.4279245430950369e-05, "loss": 0.4315, "step": 18603 }, { "epoch": 3.0368964532059914, "grad_norm": 3.418731451034546, "learning_rate": 1.4278665949322838e-05, "loss": 0.5038, "step": 18604 }, { "epoch": 3.037059711848496, "grad_norm": 3.5971431732177734, "learning_rate": 1.4278086450107331e-05, "loss": 0.6414, "step": 18605 }, { "epoch": 3.0372229704910003, "grad_norm": 3.0839123725891113, "learning_rate": 1.4277506933306224e-05, "loss": 0.4888, "step": 18606 }, { "epoch": 3.0373862291335048, "grad_norm": 2.689892053604126, "learning_rate": 1.4276927398921899e-05, "loss": 0.4211, "step": 18607 }, { "epoch": 3.037549487776009, "grad_norm": 2.9805550575256348, "learning_rate": 1.4276347846956742e-05, "loss": 0.4796, "step": 18608 }, { "epoch": 3.0377127464185136, "grad_norm": 2.529620409011841, "learning_rate": 1.4275768277413133e-05, "loss": 0.4315, "step": 18609 }, { "epoch": 3.037876005061018, "grad_norm": 2.9461936950683594, "learning_rate": 1.4275188690293458e-05, "loss": 0.4791, "step": 18610 }, { "epoch": 3.0380392637035225, "grad_norm": 3.4897148609161377, "learning_rate": 1.4274609085600094e-05, "loss": 0.5425, "step": 18611 }, { "epoch": 3.0382025223460265, "grad_norm": 3.238492488861084, "learning_rate": 1.4274029463335427e-05, "loss": 0.4757, "step": 18612 }, { "epoch": 3.038365780988531, "grad_norm": 3.0637283325195312, "learning_rate": 1.4273449823501837e-05, "loss": 0.437, "step": 18613 }, { "epoch": 3.0385290396310354, "grad_norm": 2.8148884773254395, "learning_rate": 1.4272870166101709e-05, "loss": 0.4537, "step": 18614 }, { "epoch": 3.03869229827354, "grad_norm": 2.970301389694214, "learning_rate": 1.4272290491137426e-05, "loss": 0.4905, "step": 18615 }, { "epoch": 3.0388555569160443, "grad_norm": 3.515096426010132, "learning_rate": 1.4271710798611372e-05, "loss": 0.5564, "step": 18616 }, { "epoch": 3.0390188155585487, "grad_norm": 3.1028668880462646, "learning_rate": 1.4271131088525925e-05, "loss": 0.498, "step": 18617 }, { "epoch": 3.039182074201053, "grad_norm": 3.1362345218658447, "learning_rate": 1.4270551360883469e-05, "loss": 0.4168, "step": 18618 }, { "epoch": 3.0393453328435576, "grad_norm": 2.6394004821777344, "learning_rate": 1.426997161568639e-05, "loss": 0.4226, "step": 18619 }, { "epoch": 3.039508591486062, "grad_norm": 3.182126998901367, "learning_rate": 1.4269391852937075e-05, "loss": 0.5404, "step": 18620 }, { "epoch": 3.039671850128566, "grad_norm": 2.916271448135376, "learning_rate": 1.4268812072637899e-05, "loss": 0.4907, "step": 18621 }, { "epoch": 3.0398351087710704, "grad_norm": 3.3589751720428467, "learning_rate": 1.4268232274791247e-05, "loss": 0.4384, "step": 18622 }, { "epoch": 3.039998367413575, "grad_norm": 2.828599452972412, "learning_rate": 1.4267652459399506e-05, "loss": 0.4723, "step": 18623 }, { "epoch": 3.0401616260560793, "grad_norm": 2.5574426651000977, "learning_rate": 1.4267072626465056e-05, "loss": 0.4289, "step": 18624 }, { "epoch": 3.0403248846985838, "grad_norm": 3.332092761993408, "learning_rate": 1.4266492775990281e-05, "loss": 0.513, "step": 18625 }, { "epoch": 3.040488143341088, "grad_norm": 2.888519048690796, "learning_rate": 1.4265912907977565e-05, "loss": 0.4117, "step": 18626 }, { "epoch": 3.0406514019835926, "grad_norm": 2.71976637840271, "learning_rate": 1.4265333022429296e-05, "loss": 0.4512, "step": 18627 }, { "epoch": 3.040814660626097, "grad_norm": 2.990051031112671, "learning_rate": 1.4264753119347848e-05, "loss": 0.502, "step": 18628 }, { "epoch": 3.0409779192686015, "grad_norm": 2.8973748683929443, "learning_rate": 1.4264173198735614e-05, "loss": 0.4618, "step": 18629 }, { "epoch": 3.0411411779111055, "grad_norm": 3.008941411972046, "learning_rate": 1.426359326059497e-05, "loss": 0.4443, "step": 18630 }, { "epoch": 3.04130443655361, "grad_norm": 4.078730583190918, "learning_rate": 1.4263013304928308e-05, "loss": 0.5341, "step": 18631 }, { "epoch": 3.0414676951961144, "grad_norm": 3.0021636486053467, "learning_rate": 1.4262433331738005e-05, "loss": 0.4492, "step": 18632 }, { "epoch": 3.041630953838619, "grad_norm": 3.0773916244506836, "learning_rate": 1.426185334102645e-05, "loss": 0.4449, "step": 18633 }, { "epoch": 3.0417942124811232, "grad_norm": 2.613163471221924, "learning_rate": 1.4261273332796026e-05, "loss": 0.4104, "step": 18634 }, { "epoch": 3.0419574711236277, "grad_norm": 3.131901502609253, "learning_rate": 1.4260693307049116e-05, "loss": 0.4953, "step": 18635 }, { "epoch": 3.042120729766132, "grad_norm": 2.7358193397521973, "learning_rate": 1.4260113263788103e-05, "loss": 0.4107, "step": 18636 }, { "epoch": 3.0422839884086366, "grad_norm": 3.0945775508880615, "learning_rate": 1.4259533203015373e-05, "loss": 0.5604, "step": 18637 }, { "epoch": 3.0424472470511406, "grad_norm": 2.733401298522949, "learning_rate": 1.425895312473331e-05, "loss": 0.5092, "step": 18638 }, { "epoch": 3.042610505693645, "grad_norm": 3.1256027221679688, "learning_rate": 1.4258373028944303e-05, "loss": 0.4548, "step": 18639 }, { "epoch": 3.0427737643361494, "grad_norm": 3.0487782955169678, "learning_rate": 1.4257792915650728e-05, "loss": 0.4537, "step": 18640 }, { "epoch": 3.042937022978654, "grad_norm": 3.188533306121826, "learning_rate": 1.4257212784854975e-05, "loss": 0.4355, "step": 18641 }, { "epoch": 3.0431002816211583, "grad_norm": 2.5293469429016113, "learning_rate": 1.4256632636559428e-05, "loss": 0.3912, "step": 18642 }, { "epoch": 3.0432635402636627, "grad_norm": 2.998690605163574, "learning_rate": 1.4256052470766473e-05, "loss": 0.4956, "step": 18643 }, { "epoch": 3.043426798906167, "grad_norm": 3.0860676765441895, "learning_rate": 1.4255472287478494e-05, "loss": 0.4986, "step": 18644 }, { "epoch": 3.0435900575486716, "grad_norm": 3.0742695331573486, "learning_rate": 1.4254892086697872e-05, "loss": 0.3706, "step": 18645 }, { "epoch": 3.043753316191176, "grad_norm": 3.319403648376465, "learning_rate": 1.4254311868426994e-05, "loss": 0.4616, "step": 18646 }, { "epoch": 3.04391657483368, "grad_norm": 2.9991676807403564, "learning_rate": 1.4253731632668251e-05, "loss": 0.4228, "step": 18647 }, { "epoch": 3.0440798334761845, "grad_norm": 3.0091755390167236, "learning_rate": 1.425315137942402e-05, "loss": 0.4612, "step": 18648 }, { "epoch": 3.044243092118689, "grad_norm": 3.0086820125579834, "learning_rate": 1.4252571108696693e-05, "loss": 0.4244, "step": 18649 }, { "epoch": 3.0444063507611934, "grad_norm": 3.9804940223693848, "learning_rate": 1.4251990820488648e-05, "loss": 0.6049, "step": 18650 }, { "epoch": 3.044569609403698, "grad_norm": 3.1487653255462646, "learning_rate": 1.4251410514802276e-05, "loss": 0.5544, "step": 18651 }, { "epoch": 3.0447328680462022, "grad_norm": 2.727540969848633, "learning_rate": 1.4250830191639959e-05, "loss": 0.4157, "step": 18652 }, { "epoch": 3.0448961266887067, "grad_norm": 3.024445056915283, "learning_rate": 1.4250249851004087e-05, "loss": 0.4666, "step": 18653 }, { "epoch": 3.045059385331211, "grad_norm": 3.9524219036102295, "learning_rate": 1.4249669492897043e-05, "loss": 0.5618, "step": 18654 }, { "epoch": 3.0452226439737156, "grad_norm": 3.361536741256714, "learning_rate": 1.4249089117321213e-05, "loss": 0.5026, "step": 18655 }, { "epoch": 3.0453859026162196, "grad_norm": 2.685418128967285, "learning_rate": 1.4248508724278977e-05, "loss": 0.387, "step": 18656 }, { "epoch": 3.045549161258724, "grad_norm": 2.966543197631836, "learning_rate": 1.424792831377273e-05, "loss": 0.4802, "step": 18657 }, { "epoch": 3.0457124199012284, "grad_norm": 3.3744874000549316, "learning_rate": 1.4247347885804852e-05, "loss": 0.5139, "step": 18658 }, { "epoch": 3.045875678543733, "grad_norm": 2.4106392860412598, "learning_rate": 1.4246767440377734e-05, "loss": 0.4094, "step": 18659 }, { "epoch": 3.0460389371862373, "grad_norm": 3.437117576599121, "learning_rate": 1.4246186977493754e-05, "loss": 0.5155, "step": 18660 }, { "epoch": 3.0462021958287417, "grad_norm": 3.1715126037597656, "learning_rate": 1.4245606497155305e-05, "loss": 0.5219, "step": 18661 }, { "epoch": 3.046365454471246, "grad_norm": 3.279550313949585, "learning_rate": 1.424502599936477e-05, "loss": 0.5851, "step": 18662 }, { "epoch": 3.0465287131137506, "grad_norm": 2.697618007659912, "learning_rate": 1.4244445484124536e-05, "loss": 0.4508, "step": 18663 }, { "epoch": 3.046691971756255, "grad_norm": 3.0825889110565186, "learning_rate": 1.4243864951436989e-05, "loss": 0.4886, "step": 18664 }, { "epoch": 3.046855230398759, "grad_norm": 3.335045099258423, "learning_rate": 1.4243284401304518e-05, "loss": 0.514, "step": 18665 }, { "epoch": 3.0470184890412635, "grad_norm": 3.231125593185425, "learning_rate": 1.4242703833729505e-05, "loss": 0.4726, "step": 18666 }, { "epoch": 3.047181747683768, "grad_norm": 2.812246799468994, "learning_rate": 1.4242123248714338e-05, "loss": 0.4466, "step": 18667 }, { "epoch": 3.0473450063262724, "grad_norm": 3.1551153659820557, "learning_rate": 1.4241542646261404e-05, "loss": 0.5023, "step": 18668 }, { "epoch": 3.047508264968777, "grad_norm": 2.806795597076416, "learning_rate": 1.4240962026373093e-05, "loss": 0.4708, "step": 18669 }, { "epoch": 3.0476715236112812, "grad_norm": 3.4809927940368652, "learning_rate": 1.4240381389051785e-05, "loss": 0.5105, "step": 18670 }, { "epoch": 3.0478347822537857, "grad_norm": 2.645404577255249, "learning_rate": 1.4239800734299874e-05, "loss": 0.3649, "step": 18671 }, { "epoch": 3.04799804089629, "grad_norm": 3.0048775672912598, "learning_rate": 1.4239220062119738e-05, "loss": 0.4585, "step": 18672 }, { "epoch": 3.048161299538794, "grad_norm": 2.9969356060028076, "learning_rate": 1.4238639372513772e-05, "loss": 0.4808, "step": 18673 }, { "epoch": 3.0483245581812985, "grad_norm": 2.896204710006714, "learning_rate": 1.423805866548436e-05, "loss": 0.4911, "step": 18674 }, { "epoch": 3.048487816823803, "grad_norm": 2.306469440460205, "learning_rate": 1.4237477941033888e-05, "loss": 0.3859, "step": 18675 }, { "epoch": 3.0486510754663074, "grad_norm": 3.115548849105835, "learning_rate": 1.4236897199164745e-05, "loss": 0.4034, "step": 18676 }, { "epoch": 3.048814334108812, "grad_norm": 3.0888757705688477, "learning_rate": 1.4236316439879319e-05, "loss": 0.4668, "step": 18677 }, { "epoch": 3.0489775927513163, "grad_norm": 2.711982011795044, "learning_rate": 1.4235735663179993e-05, "loss": 0.3513, "step": 18678 }, { "epoch": 3.0491408513938207, "grad_norm": 2.863330364227295, "learning_rate": 1.423515486906916e-05, "loss": 0.4122, "step": 18679 }, { "epoch": 3.049304110036325, "grad_norm": 3.5929715633392334, "learning_rate": 1.4234574057549202e-05, "loss": 0.4996, "step": 18680 }, { "epoch": 3.0494673686788296, "grad_norm": 2.8964569568634033, "learning_rate": 1.4233993228622509e-05, "loss": 0.5155, "step": 18681 }, { "epoch": 3.0496306273213336, "grad_norm": 2.4693033695220947, "learning_rate": 1.423341238229147e-05, "loss": 0.3593, "step": 18682 }, { "epoch": 3.049793885963838, "grad_norm": 2.834777355194092, "learning_rate": 1.4232831518558471e-05, "loss": 0.4431, "step": 18683 }, { "epoch": 3.0499571446063425, "grad_norm": 3.2003462314605713, "learning_rate": 1.4232250637425897e-05, "loss": 0.5198, "step": 18684 }, { "epoch": 3.050120403248847, "grad_norm": 3.3299167156219482, "learning_rate": 1.423166973889614e-05, "loss": 0.5041, "step": 18685 }, { "epoch": 3.0502836618913514, "grad_norm": 3.1472935676574707, "learning_rate": 1.4231088822971591e-05, "loss": 0.475, "step": 18686 }, { "epoch": 3.050446920533856, "grad_norm": 2.943225622177124, "learning_rate": 1.4230507889654628e-05, "loss": 0.441, "step": 18687 }, { "epoch": 3.0506101791763602, "grad_norm": 2.7690789699554443, "learning_rate": 1.4229926938947649e-05, "loss": 0.4688, "step": 18688 }, { "epoch": 3.0507734378188647, "grad_norm": 2.758441686630249, "learning_rate": 1.4229345970853032e-05, "loss": 0.4569, "step": 18689 }, { "epoch": 3.050936696461369, "grad_norm": 3.7314069271087646, "learning_rate": 1.4228764985373175e-05, "loss": 0.4665, "step": 18690 }, { "epoch": 3.051099955103873, "grad_norm": 3.157069683074951, "learning_rate": 1.4228183982510462e-05, "loss": 0.4717, "step": 18691 }, { "epoch": 3.0512632137463775, "grad_norm": 3.2653746604919434, "learning_rate": 1.422760296226728e-05, "loss": 0.4607, "step": 18692 }, { "epoch": 3.051426472388882, "grad_norm": 3.4437410831451416, "learning_rate": 1.4227021924646019e-05, "loss": 0.4509, "step": 18693 }, { "epoch": 3.0515897310313864, "grad_norm": 3.4047937393188477, "learning_rate": 1.4226440869649067e-05, "loss": 0.4646, "step": 18694 }, { "epoch": 3.051752989673891, "grad_norm": 2.739069700241089, "learning_rate": 1.422585979727881e-05, "loss": 0.4609, "step": 18695 }, { "epoch": 3.0519162483163953, "grad_norm": 2.912759780883789, "learning_rate": 1.422527870753764e-05, "loss": 0.4714, "step": 18696 }, { "epoch": 3.0520795069588997, "grad_norm": 2.588618516921997, "learning_rate": 1.4224697600427946e-05, "loss": 0.4283, "step": 18697 }, { "epoch": 3.052242765601404, "grad_norm": 3.356898307800293, "learning_rate": 1.4224116475952116e-05, "loss": 0.5536, "step": 18698 }, { "epoch": 3.0524060242439086, "grad_norm": 2.8459880352020264, "learning_rate": 1.4223535334112537e-05, "loss": 0.4494, "step": 18699 }, { "epoch": 3.0525692828864126, "grad_norm": 3.3902218341827393, "learning_rate": 1.42229541749116e-05, "loss": 0.5538, "step": 18700 }, { "epoch": 3.052732541528917, "grad_norm": 3.718991279602051, "learning_rate": 1.422237299835169e-05, "loss": 0.5077, "step": 18701 }, { "epoch": 3.0528958001714215, "grad_norm": 3.5158441066741943, "learning_rate": 1.4221791804435202e-05, "loss": 0.593, "step": 18702 }, { "epoch": 3.053059058813926, "grad_norm": 2.99078106880188, "learning_rate": 1.4221210593164523e-05, "loss": 0.5038, "step": 18703 }, { "epoch": 3.0532223174564304, "grad_norm": 3.0454113483428955, "learning_rate": 1.422062936454204e-05, "loss": 0.5099, "step": 18704 }, { "epoch": 3.053385576098935, "grad_norm": 2.5717616081237793, "learning_rate": 1.4220048118570143e-05, "loss": 0.4622, "step": 18705 }, { "epoch": 3.0535488347414392, "grad_norm": 3.1117119789123535, "learning_rate": 1.421946685525122e-05, "loss": 0.4735, "step": 18706 }, { "epoch": 3.0537120933839437, "grad_norm": 3.4963138103485107, "learning_rate": 1.421888557458766e-05, "loss": 0.5551, "step": 18707 }, { "epoch": 3.053875352026448, "grad_norm": 2.69014835357666, "learning_rate": 1.421830427658186e-05, "loss": 0.4008, "step": 18708 }, { "epoch": 3.054038610668952, "grad_norm": 3.132766008377075, "learning_rate": 1.4217722961236202e-05, "loss": 0.4723, "step": 18709 }, { "epoch": 3.0542018693114565, "grad_norm": 3.773934841156006, "learning_rate": 1.4217141628553077e-05, "loss": 0.6017, "step": 18710 }, { "epoch": 3.054365127953961, "grad_norm": 2.8549866676330566, "learning_rate": 1.4216560278534876e-05, "loss": 0.4082, "step": 18711 }, { "epoch": 3.0545283865964654, "grad_norm": 2.2392897605895996, "learning_rate": 1.4215978911183986e-05, "loss": 0.3644, "step": 18712 }, { "epoch": 3.05469164523897, "grad_norm": 2.768251657485962, "learning_rate": 1.4215397526502801e-05, "loss": 0.4421, "step": 18713 }, { "epoch": 3.0548549038814743, "grad_norm": 2.748906135559082, "learning_rate": 1.4214816124493706e-05, "loss": 0.4019, "step": 18714 }, { "epoch": 3.0550181625239787, "grad_norm": 3.6409409046173096, "learning_rate": 1.4214234705159096e-05, "loss": 0.5037, "step": 18715 }, { "epoch": 3.055181421166483, "grad_norm": 3.2249109745025635, "learning_rate": 1.4213653268501356e-05, "loss": 0.4954, "step": 18716 }, { "epoch": 3.0553446798089876, "grad_norm": 3.1835198402404785, "learning_rate": 1.421307181452288e-05, "loss": 0.4781, "step": 18717 }, { "epoch": 3.0555079384514916, "grad_norm": 2.5102996826171875, "learning_rate": 1.4212490343226055e-05, "loss": 0.3692, "step": 18718 }, { "epoch": 3.055671197093996, "grad_norm": 2.8375420570373535, "learning_rate": 1.4211908854613274e-05, "loss": 0.4365, "step": 18719 }, { "epoch": 3.0558344557365005, "grad_norm": 2.4475395679473877, "learning_rate": 1.4211327348686925e-05, "loss": 0.3847, "step": 18720 }, { "epoch": 3.055997714379005, "grad_norm": 3.1269285678863525, "learning_rate": 1.4210745825449402e-05, "loss": 0.5324, "step": 18721 }, { "epoch": 3.0561609730215094, "grad_norm": 3.4922690391540527, "learning_rate": 1.4210164284903089e-05, "loss": 0.5428, "step": 18722 }, { "epoch": 3.056324231664014, "grad_norm": 2.6349904537200928, "learning_rate": 1.4209582727050384e-05, "loss": 0.3853, "step": 18723 }, { "epoch": 3.0564874903065182, "grad_norm": 3.3405487537384033, "learning_rate": 1.4209001151893673e-05, "loss": 0.4542, "step": 18724 }, { "epoch": 3.0566507489490227, "grad_norm": 4.1370038986206055, "learning_rate": 1.4208419559435348e-05, "loss": 0.5202, "step": 18725 }, { "epoch": 3.0568140075915267, "grad_norm": 2.9052700996398926, "learning_rate": 1.4207837949677797e-05, "loss": 0.4309, "step": 18726 }, { "epoch": 3.056977266234031, "grad_norm": 3.0752830505371094, "learning_rate": 1.4207256322623418e-05, "loss": 0.4255, "step": 18727 }, { "epoch": 3.0571405248765355, "grad_norm": 3.9026753902435303, "learning_rate": 1.420667467827459e-05, "loss": 0.5565, "step": 18728 }, { "epoch": 3.05730378351904, "grad_norm": 3.8946950435638428, "learning_rate": 1.4206093016633713e-05, "loss": 0.6164, "step": 18729 }, { "epoch": 3.0574670421615444, "grad_norm": 3.7354736328125, "learning_rate": 1.4205511337703178e-05, "loss": 0.463, "step": 18730 }, { "epoch": 3.057630300804049, "grad_norm": 2.8962149620056152, "learning_rate": 1.4204929641485372e-05, "loss": 0.3777, "step": 18731 }, { "epoch": 3.0577935594465533, "grad_norm": 3.261279344558716, "learning_rate": 1.420434792798269e-05, "loss": 0.4763, "step": 18732 }, { "epoch": 3.0579568180890577, "grad_norm": 2.867520570755005, "learning_rate": 1.4203766197197522e-05, "loss": 0.4336, "step": 18733 }, { "epoch": 3.058120076731562, "grad_norm": 3.0644960403442383, "learning_rate": 1.4203184449132252e-05, "loss": 0.4526, "step": 18734 }, { "epoch": 3.058283335374066, "grad_norm": 3.038972854614258, "learning_rate": 1.4202602683789284e-05, "loss": 0.4257, "step": 18735 }, { "epoch": 3.0584465940165706, "grad_norm": 3.25519061088562, "learning_rate": 1.4202020901171e-05, "loss": 0.4876, "step": 18736 }, { "epoch": 3.058609852659075, "grad_norm": 2.784400701522827, "learning_rate": 1.42014391012798e-05, "loss": 0.424, "step": 18737 }, { "epoch": 3.0587731113015795, "grad_norm": 2.9182467460632324, "learning_rate": 1.4200857284118067e-05, "loss": 0.4799, "step": 18738 }, { "epoch": 3.058936369944084, "grad_norm": 2.971224069595337, "learning_rate": 1.4200275449688193e-05, "loss": 0.4307, "step": 18739 }, { "epoch": 3.0590996285865883, "grad_norm": 3.460967779159546, "learning_rate": 1.4199693597992574e-05, "loss": 0.5159, "step": 18740 }, { "epoch": 3.059262887229093, "grad_norm": 3.0415704250335693, "learning_rate": 1.4199111729033602e-05, "loss": 0.436, "step": 18741 }, { "epoch": 3.0594261458715972, "grad_norm": 4.073334693908691, "learning_rate": 1.4198529842813668e-05, "loss": 0.4755, "step": 18742 }, { "epoch": 3.0595894045141017, "grad_norm": 3.082279920578003, "learning_rate": 1.4197947939335161e-05, "loss": 0.4976, "step": 18743 }, { "epoch": 3.0597526631566057, "grad_norm": 3.4999215602874756, "learning_rate": 1.4197366018600475e-05, "loss": 0.5037, "step": 18744 }, { "epoch": 3.05991592179911, "grad_norm": 3.194772481918335, "learning_rate": 1.4196784080612003e-05, "loss": 0.4173, "step": 18745 }, { "epoch": 3.0600791804416145, "grad_norm": 2.881946325302124, "learning_rate": 1.4196202125372137e-05, "loss": 0.4374, "step": 18746 }, { "epoch": 3.060242439084119, "grad_norm": 3.359187602996826, "learning_rate": 1.4195620152883269e-05, "loss": 0.5135, "step": 18747 }, { "epoch": 3.0604056977266234, "grad_norm": 3.1346912384033203, "learning_rate": 1.419503816314779e-05, "loss": 0.4668, "step": 18748 }, { "epoch": 3.060568956369128, "grad_norm": 2.6485443115234375, "learning_rate": 1.4194456156168092e-05, "loss": 0.4537, "step": 18749 }, { "epoch": 3.0607322150116323, "grad_norm": 3.141530752182007, "learning_rate": 1.419387413194657e-05, "loss": 0.4783, "step": 18750 }, { "epoch": 3.0608954736541367, "grad_norm": 3.376368522644043, "learning_rate": 1.4193292090485612e-05, "loss": 0.4863, "step": 18751 }, { "epoch": 3.061058732296641, "grad_norm": 3.0541703701019287, "learning_rate": 1.419271003178762e-05, "loss": 0.4699, "step": 18752 }, { "epoch": 3.061221990939145, "grad_norm": 3.2920496463775635, "learning_rate": 1.4192127955854975e-05, "loss": 0.5107, "step": 18753 }, { "epoch": 3.0613852495816496, "grad_norm": 2.8769283294677734, "learning_rate": 1.4191545862690074e-05, "loss": 0.4134, "step": 18754 }, { "epoch": 3.061548508224154, "grad_norm": 3.123060703277588, "learning_rate": 1.4190963752295315e-05, "loss": 0.4901, "step": 18755 }, { "epoch": 3.0617117668666585, "grad_norm": 3.438486099243164, "learning_rate": 1.4190381624673082e-05, "loss": 0.5194, "step": 18756 }, { "epoch": 3.061875025509163, "grad_norm": 2.9317755699157715, "learning_rate": 1.4189799479825777e-05, "loss": 0.5188, "step": 18757 }, { "epoch": 3.0620382841516673, "grad_norm": 3.090224504470825, "learning_rate": 1.4189217317755785e-05, "loss": 0.4962, "step": 18758 }, { "epoch": 3.062201542794172, "grad_norm": 2.7252979278564453, "learning_rate": 1.4188635138465505e-05, "loss": 0.4601, "step": 18759 }, { "epoch": 3.062364801436676, "grad_norm": 3.0405900478363037, "learning_rate": 1.4188052941957327e-05, "loss": 0.4479, "step": 18760 }, { "epoch": 3.06252806007918, "grad_norm": 3.2918128967285156, "learning_rate": 1.4187470728233643e-05, "loss": 0.5084, "step": 18761 }, { "epoch": 3.0626913187216847, "grad_norm": 3.324777364730835, "learning_rate": 1.4186888497296848e-05, "loss": 0.4829, "step": 18762 }, { "epoch": 3.062854577364189, "grad_norm": 2.8683996200561523, "learning_rate": 1.4186306249149335e-05, "loss": 0.3878, "step": 18763 }, { "epoch": 3.0630178360066935, "grad_norm": 3.4527862071990967, "learning_rate": 1.41857239837935e-05, "loss": 0.4846, "step": 18764 }, { "epoch": 3.063181094649198, "grad_norm": 3.2515053749084473, "learning_rate": 1.4185141701231732e-05, "loss": 0.5081, "step": 18765 }, { "epoch": 3.0633443532917024, "grad_norm": 3.6566617488861084, "learning_rate": 1.4184559401466428e-05, "loss": 0.53, "step": 18766 }, { "epoch": 3.063507611934207, "grad_norm": 3.402517080307007, "learning_rate": 1.418397708449998e-05, "loss": 0.4694, "step": 18767 }, { "epoch": 3.0636708705767113, "grad_norm": 2.9234778881073, "learning_rate": 1.4183394750334784e-05, "loss": 0.4265, "step": 18768 }, { "epoch": 3.0638341292192157, "grad_norm": 3.482401132583618, "learning_rate": 1.4182812398973228e-05, "loss": 0.5275, "step": 18769 }, { "epoch": 3.06399738786172, "grad_norm": 3.2284491062164307, "learning_rate": 1.4182230030417712e-05, "loss": 0.4146, "step": 18770 }, { "epoch": 3.064160646504224, "grad_norm": 3.178229331970215, "learning_rate": 1.418164764467063e-05, "loss": 0.4977, "step": 18771 }, { "epoch": 3.0643239051467286, "grad_norm": 2.6316018104553223, "learning_rate": 1.4181065241734368e-05, "loss": 0.3892, "step": 18772 }, { "epoch": 3.064487163789233, "grad_norm": 3.4054501056671143, "learning_rate": 1.418048282161133e-05, "loss": 0.5011, "step": 18773 }, { "epoch": 3.0646504224317375, "grad_norm": 2.5072262287139893, "learning_rate": 1.4179900384303901e-05, "loss": 0.3568, "step": 18774 }, { "epoch": 3.064813681074242, "grad_norm": 2.6989352703094482, "learning_rate": 1.4179317929814485e-05, "loss": 0.4001, "step": 18775 }, { "epoch": 3.0649769397167463, "grad_norm": 2.464562177658081, "learning_rate": 1.4178735458145472e-05, "loss": 0.3763, "step": 18776 }, { "epoch": 3.065140198359251, "grad_norm": 3.289794445037842, "learning_rate": 1.417815296929925e-05, "loss": 0.4624, "step": 18777 }, { "epoch": 3.065303457001755, "grad_norm": 2.8275701999664307, "learning_rate": 1.417757046327822e-05, "loss": 0.3796, "step": 18778 }, { "epoch": 3.065466715644259, "grad_norm": 2.897707462310791, "learning_rate": 1.4176987940084776e-05, "loss": 0.4613, "step": 18779 }, { "epoch": 3.0656299742867636, "grad_norm": 3.1598007678985596, "learning_rate": 1.4176405399721311e-05, "loss": 0.4605, "step": 18780 }, { "epoch": 3.065793232929268, "grad_norm": 3.413076877593994, "learning_rate": 1.4175822842190225e-05, "loss": 0.4903, "step": 18781 }, { "epoch": 3.0659564915717725, "grad_norm": 3.46307373046875, "learning_rate": 1.4175240267493903e-05, "loss": 0.4779, "step": 18782 }, { "epoch": 3.066119750214277, "grad_norm": 2.8512802124023438, "learning_rate": 1.4174657675634744e-05, "loss": 0.4975, "step": 18783 }, { "epoch": 3.0662830088567814, "grad_norm": 3.7724645137786865, "learning_rate": 1.4174075066615147e-05, "loss": 0.5731, "step": 18784 }, { "epoch": 3.066446267499286, "grad_norm": 3.2404372692108154, "learning_rate": 1.4173492440437502e-05, "loss": 0.4346, "step": 18785 }, { "epoch": 3.0666095261417903, "grad_norm": 2.789815902709961, "learning_rate": 1.4172909797104208e-05, "loss": 0.4168, "step": 18786 }, { "epoch": 3.0667727847842947, "grad_norm": 3.611342191696167, "learning_rate": 1.4172327136617656e-05, "loss": 0.5053, "step": 18787 }, { "epoch": 3.0669360434267987, "grad_norm": 4.328182220458984, "learning_rate": 1.4171744458980241e-05, "loss": 0.6057, "step": 18788 }, { "epoch": 3.067099302069303, "grad_norm": 3.074631690979004, "learning_rate": 1.4171161764194359e-05, "loss": 0.4341, "step": 18789 }, { "epoch": 3.0672625607118076, "grad_norm": 3.2346785068511963, "learning_rate": 1.4170579052262408e-05, "loss": 0.4806, "step": 18790 }, { "epoch": 3.067425819354312, "grad_norm": 3.0944151878356934, "learning_rate": 1.4169996323186784e-05, "loss": 0.4402, "step": 18791 }, { "epoch": 3.0675890779968165, "grad_norm": 3.6081879138946533, "learning_rate": 1.4169413576969874e-05, "loss": 0.5459, "step": 18792 }, { "epoch": 3.067752336639321, "grad_norm": 3.1996443271636963, "learning_rate": 1.4168830813614083e-05, "loss": 0.4533, "step": 18793 }, { "epoch": 3.0679155952818253, "grad_norm": 2.8717737197875977, "learning_rate": 1.4168248033121801e-05, "loss": 0.4391, "step": 18794 }, { "epoch": 3.0680788539243298, "grad_norm": 4.266391754150391, "learning_rate": 1.4167665235495425e-05, "loss": 0.6486, "step": 18795 }, { "epoch": 3.068242112566834, "grad_norm": 3.0470755100250244, "learning_rate": 1.4167082420737353e-05, "loss": 0.4815, "step": 18796 }, { "epoch": 3.068405371209338, "grad_norm": 3.4479565620422363, "learning_rate": 1.4166499588849976e-05, "loss": 0.4726, "step": 18797 }, { "epoch": 3.0685686298518426, "grad_norm": 2.651975154876709, "learning_rate": 1.4165916739835695e-05, "loss": 0.3401, "step": 18798 }, { "epoch": 3.068731888494347, "grad_norm": 2.7419474124908447, "learning_rate": 1.41653338736969e-05, "loss": 0.4491, "step": 18799 }, { "epoch": 3.0688951471368515, "grad_norm": 3.041548490524292, "learning_rate": 1.4164750990435991e-05, "loss": 0.4694, "step": 18800 }, { "epoch": 3.069058405779356, "grad_norm": 3.667640209197998, "learning_rate": 1.4164168090055365e-05, "loss": 0.4416, "step": 18801 }, { "epoch": 3.0692216644218604, "grad_norm": 2.7734975814819336, "learning_rate": 1.4163585172557417e-05, "loss": 0.468, "step": 18802 }, { "epoch": 3.069384923064365, "grad_norm": 2.9068944454193115, "learning_rate": 1.416300223794454e-05, "loss": 0.4351, "step": 18803 }, { "epoch": 3.0695481817068693, "grad_norm": 2.9435274600982666, "learning_rate": 1.4162419286219135e-05, "loss": 0.4072, "step": 18804 }, { "epoch": 3.0697114403493737, "grad_norm": 2.6841609477996826, "learning_rate": 1.4161836317383595e-05, "loss": 0.3878, "step": 18805 }, { "epoch": 3.0698746989918777, "grad_norm": 3.5290777683258057, "learning_rate": 1.4161253331440317e-05, "loss": 0.5824, "step": 18806 }, { "epoch": 3.070037957634382, "grad_norm": 2.987637996673584, "learning_rate": 1.4160670328391698e-05, "loss": 0.4687, "step": 18807 }, { "epoch": 3.0702012162768866, "grad_norm": 3.8371081352233887, "learning_rate": 1.4160087308240136e-05, "loss": 0.533, "step": 18808 }, { "epoch": 3.070364474919391, "grad_norm": 3.407780885696411, "learning_rate": 1.4159504270988025e-05, "loss": 0.4966, "step": 18809 }, { "epoch": 3.0705277335618955, "grad_norm": 2.8313724994659424, "learning_rate": 1.415892121663776e-05, "loss": 0.4264, "step": 18810 }, { "epoch": 3.0706909922044, "grad_norm": 3.0782594680786133, "learning_rate": 1.4158338145191744e-05, "loss": 0.4524, "step": 18811 }, { "epoch": 3.0708542508469043, "grad_norm": 2.7208142280578613, "learning_rate": 1.4157755056652368e-05, "loss": 0.4885, "step": 18812 }, { "epoch": 3.0710175094894088, "grad_norm": 2.7189364433288574, "learning_rate": 1.4157171951022031e-05, "loss": 0.4194, "step": 18813 }, { "epoch": 3.0711807681319128, "grad_norm": 3.841005563735962, "learning_rate": 1.4156588828303131e-05, "loss": 0.5458, "step": 18814 }, { "epoch": 3.071344026774417, "grad_norm": 3.1939990520477295, "learning_rate": 1.4156005688498065e-05, "loss": 0.5431, "step": 18815 }, { "epoch": 3.0715072854169216, "grad_norm": 3.6585376262664795, "learning_rate": 1.4155422531609225e-05, "loss": 0.4922, "step": 18816 }, { "epoch": 3.071670544059426, "grad_norm": 3.0089213848114014, "learning_rate": 1.4154839357639012e-05, "loss": 0.4121, "step": 18817 }, { "epoch": 3.0718338027019305, "grad_norm": 2.956740617752075, "learning_rate": 1.4154256166589826e-05, "loss": 0.4871, "step": 18818 }, { "epoch": 3.071997061344435, "grad_norm": 2.8191919326782227, "learning_rate": 1.415367295846406e-05, "loss": 0.4046, "step": 18819 }, { "epoch": 3.0721603199869394, "grad_norm": 3.3171331882476807, "learning_rate": 1.4153089733264117e-05, "loss": 0.4975, "step": 18820 }, { "epoch": 3.072323578629444, "grad_norm": 3.8503963947296143, "learning_rate": 1.4152506490992384e-05, "loss": 0.5628, "step": 18821 }, { "epoch": 3.0724868372719483, "grad_norm": 3.4935736656188965, "learning_rate": 1.4151923231651265e-05, "loss": 0.5362, "step": 18822 }, { "epoch": 3.0726500959144523, "grad_norm": 3.0240912437438965, "learning_rate": 1.4151339955243159e-05, "loss": 0.4574, "step": 18823 }, { "epoch": 3.0728133545569567, "grad_norm": 2.739072561264038, "learning_rate": 1.4150756661770463e-05, "loss": 0.4769, "step": 18824 }, { "epoch": 3.072976613199461, "grad_norm": 2.9674904346466064, "learning_rate": 1.4150173351235576e-05, "loss": 0.4236, "step": 18825 }, { "epoch": 3.0731398718419656, "grad_norm": 2.7106680870056152, "learning_rate": 1.4149590023640891e-05, "loss": 0.4404, "step": 18826 }, { "epoch": 3.07330313048447, "grad_norm": 3.290334939956665, "learning_rate": 1.4149006678988806e-05, "loss": 0.4839, "step": 18827 }, { "epoch": 3.0734663891269745, "grad_norm": 3.5236709117889404, "learning_rate": 1.4148423317281723e-05, "loss": 0.5585, "step": 18828 }, { "epoch": 3.073629647769479, "grad_norm": 3.824647903442383, "learning_rate": 1.4147839938522042e-05, "loss": 0.5631, "step": 18829 }, { "epoch": 3.0737929064119833, "grad_norm": 2.399052143096924, "learning_rate": 1.4147256542712153e-05, "loss": 0.4078, "step": 18830 }, { "epoch": 3.0739561650544878, "grad_norm": 3.2730836868286133, "learning_rate": 1.414667312985446e-05, "loss": 0.4628, "step": 18831 }, { "epoch": 3.0741194236969918, "grad_norm": 3.275012254714966, "learning_rate": 1.4146089699951358e-05, "loss": 0.4994, "step": 18832 }, { "epoch": 3.074282682339496, "grad_norm": 3.8912925720214844, "learning_rate": 1.4145506253005245e-05, "loss": 1.0052, "step": 18833 }, { "epoch": 3.0744459409820006, "grad_norm": 3.42220139503479, "learning_rate": 1.4144922789018525e-05, "loss": 0.5442, "step": 18834 }, { "epoch": 3.074609199624505, "grad_norm": 3.20243239402771, "learning_rate": 1.4144339307993591e-05, "loss": 0.4912, "step": 18835 }, { "epoch": 3.0747724582670095, "grad_norm": 2.98462176322937, "learning_rate": 1.4143755809932843e-05, "loss": 0.4856, "step": 18836 }, { "epoch": 3.074935716909514, "grad_norm": 3.0973594188690186, "learning_rate": 1.4143172294838681e-05, "loss": 0.4969, "step": 18837 }, { "epoch": 3.0750989755520184, "grad_norm": 3.8750510215759277, "learning_rate": 1.41425887627135e-05, "loss": 0.5374, "step": 18838 }, { "epoch": 3.075262234194523, "grad_norm": 2.9197380542755127, "learning_rate": 1.41420052135597e-05, "loss": 0.426, "step": 18839 }, { "epoch": 3.0754254928370273, "grad_norm": 3.058309316635132, "learning_rate": 1.4141421647379683e-05, "loss": 0.4376, "step": 18840 }, { "epoch": 3.0755887514795313, "grad_norm": 2.803919553756714, "learning_rate": 1.4140838064175843e-05, "loss": 0.4435, "step": 18841 }, { "epoch": 3.0757520101220357, "grad_norm": 3.2290005683898926, "learning_rate": 1.4140254463950584e-05, "loss": 0.5283, "step": 18842 }, { "epoch": 3.07591526876454, "grad_norm": 3.231017827987671, "learning_rate": 1.4139670846706302e-05, "loss": 0.4856, "step": 18843 }, { "epoch": 3.0760785274070446, "grad_norm": 2.961540460586548, "learning_rate": 1.4139087212445395e-05, "loss": 0.5149, "step": 18844 }, { "epoch": 3.076241786049549, "grad_norm": 3.0154361724853516, "learning_rate": 1.4138503561170263e-05, "loss": 0.435, "step": 18845 }, { "epoch": 3.0764050446920534, "grad_norm": 2.7776970863342285, "learning_rate": 1.4137919892883306e-05, "loss": 0.4454, "step": 18846 }, { "epoch": 3.076568303334558, "grad_norm": 2.871109962463379, "learning_rate": 1.4137336207586923e-05, "loss": 0.4961, "step": 18847 }, { "epoch": 3.0767315619770623, "grad_norm": 2.4825074672698975, "learning_rate": 1.413675250528351e-05, "loss": 0.4453, "step": 18848 }, { "epoch": 3.0768948206195668, "grad_norm": 3.1627774238586426, "learning_rate": 1.4136168785975475e-05, "loss": 0.4863, "step": 18849 }, { "epoch": 3.0770580792620708, "grad_norm": 2.954102039337158, "learning_rate": 1.4135585049665207e-05, "loss": 0.4716, "step": 18850 }, { "epoch": 3.077221337904575, "grad_norm": 2.794790029525757, "learning_rate": 1.4135001296355112e-05, "loss": 0.4859, "step": 18851 }, { "epoch": 3.0773845965470796, "grad_norm": 2.587211847305298, "learning_rate": 1.413441752604759e-05, "loss": 0.4539, "step": 18852 }, { "epoch": 3.077547855189584, "grad_norm": 3.1481549739837646, "learning_rate": 1.4133833738745037e-05, "loss": 0.5009, "step": 18853 }, { "epoch": 3.0777111138320885, "grad_norm": 3.513916254043579, "learning_rate": 1.4133249934449856e-05, "loss": 0.4837, "step": 18854 }, { "epoch": 3.077874372474593, "grad_norm": 2.812037229537964, "learning_rate": 1.4132666113164443e-05, "loss": 0.4742, "step": 18855 }, { "epoch": 3.0780376311170974, "grad_norm": 3.454542398452759, "learning_rate": 1.4132082274891201e-05, "loss": 0.4801, "step": 18856 }, { "epoch": 3.078200889759602, "grad_norm": 2.6259593963623047, "learning_rate": 1.413149841963253e-05, "loss": 0.4155, "step": 18857 }, { "epoch": 3.0783641484021063, "grad_norm": 3.2022483348846436, "learning_rate": 1.4130914547390828e-05, "loss": 0.503, "step": 18858 }, { "epoch": 3.0785274070446103, "grad_norm": 2.952521800994873, "learning_rate": 1.4130330658168498e-05, "loss": 0.5081, "step": 18859 }, { "epoch": 3.0786906656871147, "grad_norm": 2.931485176086426, "learning_rate": 1.4129746751967934e-05, "loss": 0.4594, "step": 18860 }, { "epoch": 3.078853924329619, "grad_norm": 2.892317295074463, "learning_rate": 1.4129162828791543e-05, "loss": 0.4527, "step": 18861 }, { "epoch": 3.0790171829721236, "grad_norm": 2.398013114929199, "learning_rate": 1.4128578888641723e-05, "loss": 0.3598, "step": 18862 }, { "epoch": 3.079180441614628, "grad_norm": 3.5511791706085205, "learning_rate": 1.4127994931520875e-05, "loss": 0.6326, "step": 18863 }, { "epoch": 3.0793437002571324, "grad_norm": 3.2085936069488525, "learning_rate": 1.4127410957431398e-05, "loss": 0.5144, "step": 18864 }, { "epoch": 3.079506958899637, "grad_norm": 2.7073004245758057, "learning_rate": 1.4126826966375693e-05, "loss": 0.4277, "step": 18865 }, { "epoch": 3.0796702175421413, "grad_norm": 3.936737298965454, "learning_rate": 1.4126242958356158e-05, "loss": 0.5812, "step": 18866 }, { "epoch": 3.0798334761846453, "grad_norm": 2.745218276977539, "learning_rate": 1.4125658933375198e-05, "loss": 0.4277, "step": 18867 }, { "epoch": 3.0799967348271498, "grad_norm": 2.8330814838409424, "learning_rate": 1.412507489143521e-05, "loss": 0.4398, "step": 18868 }, { "epoch": 3.080159993469654, "grad_norm": 2.9943771362304688, "learning_rate": 1.4124490832538602e-05, "loss": 0.5159, "step": 18869 }, { "epoch": 3.0803232521121586, "grad_norm": 2.674715280532837, "learning_rate": 1.4123906756687766e-05, "loss": 0.4779, "step": 18870 }, { "epoch": 3.080486510754663, "grad_norm": 3.008660316467285, "learning_rate": 1.4123322663885106e-05, "loss": 0.4775, "step": 18871 }, { "epoch": 3.0806497693971675, "grad_norm": 3.1717681884765625, "learning_rate": 1.4122738554133025e-05, "loss": 0.4887, "step": 18872 }, { "epoch": 3.080813028039672, "grad_norm": 3.1144442558288574, "learning_rate": 1.412215442743392e-05, "loss": 0.4145, "step": 18873 }, { "epoch": 3.0809762866821764, "grad_norm": 2.8867640495300293, "learning_rate": 1.41215702837902e-05, "loss": 0.4594, "step": 18874 }, { "epoch": 3.081139545324681, "grad_norm": 2.6929683685302734, "learning_rate": 1.4120986123204257e-05, "loss": 0.4108, "step": 18875 }, { "epoch": 3.081302803967185, "grad_norm": 2.5180344581604004, "learning_rate": 1.4120401945678496e-05, "loss": 0.393, "step": 18876 }, { "epoch": 3.0814660626096892, "grad_norm": 2.851924419403076, "learning_rate": 1.4119817751215316e-05, "loss": 0.4153, "step": 18877 }, { "epoch": 3.0816293212521937, "grad_norm": 2.8949227333068848, "learning_rate": 1.4119233539817125e-05, "loss": 0.4379, "step": 18878 }, { "epoch": 3.081792579894698, "grad_norm": 3.3052587509155273, "learning_rate": 1.411864931148632e-05, "loss": 0.5128, "step": 18879 }, { "epoch": 3.0819558385372026, "grad_norm": 3.1918885707855225, "learning_rate": 1.4118065066225304e-05, "loss": 0.4653, "step": 18880 }, { "epoch": 3.082119097179707, "grad_norm": 3.02071213722229, "learning_rate": 1.4117480804036474e-05, "loss": 0.5166, "step": 18881 }, { "epoch": 3.0822823558222114, "grad_norm": 3.692821979522705, "learning_rate": 1.4116896524922236e-05, "loss": 0.523, "step": 18882 }, { "epoch": 3.082445614464716, "grad_norm": 2.955765724182129, "learning_rate": 1.4116312228884992e-05, "loss": 0.4819, "step": 18883 }, { "epoch": 3.0826088731072203, "grad_norm": 3.7216012477874756, "learning_rate": 1.4115727915927143e-05, "loss": 0.4903, "step": 18884 }, { "epoch": 3.0827721317497243, "grad_norm": 3.2634389400482178, "learning_rate": 1.411514358605109e-05, "loss": 0.5752, "step": 18885 }, { "epoch": 3.0829353903922287, "grad_norm": 3.045813798904419, "learning_rate": 1.4114559239259235e-05, "loss": 0.4807, "step": 18886 }, { "epoch": 3.083098649034733, "grad_norm": 2.8335673809051514, "learning_rate": 1.4113974875553981e-05, "loss": 0.4323, "step": 18887 }, { "epoch": 3.0832619076772376, "grad_norm": 3.1333260536193848, "learning_rate": 1.4113390494937731e-05, "loss": 0.4408, "step": 18888 }, { "epoch": 3.083425166319742, "grad_norm": 2.789161443710327, "learning_rate": 1.4112806097412883e-05, "loss": 0.4675, "step": 18889 }, { "epoch": 3.0835884249622465, "grad_norm": 3.2160778045654297, "learning_rate": 1.4112221682981843e-05, "loss": 0.4541, "step": 18890 }, { "epoch": 3.083751683604751, "grad_norm": 2.7911484241485596, "learning_rate": 1.4111637251647014e-05, "loss": 0.4595, "step": 18891 }, { "epoch": 3.0839149422472554, "grad_norm": 3.6186435222625732, "learning_rate": 1.4111052803410796e-05, "loss": 0.4873, "step": 18892 }, { "epoch": 3.08407820088976, "grad_norm": 3.013465642929077, "learning_rate": 1.4110468338275591e-05, "loss": 0.4243, "step": 18893 }, { "epoch": 3.084241459532264, "grad_norm": 3.362658739089966, "learning_rate": 1.4109883856243804e-05, "loss": 0.4967, "step": 18894 }, { "epoch": 3.0844047181747682, "grad_norm": 3.1665050983428955, "learning_rate": 1.4109299357317836e-05, "loss": 0.4288, "step": 18895 }, { "epoch": 3.0845679768172727, "grad_norm": 3.339797258377075, "learning_rate": 1.410871484150009e-05, "loss": 0.4403, "step": 18896 }, { "epoch": 3.084731235459777, "grad_norm": 3.5160064697265625, "learning_rate": 1.4108130308792971e-05, "loss": 0.5672, "step": 18897 }, { "epoch": 3.0848944941022816, "grad_norm": 3.1632142066955566, "learning_rate": 1.4107545759198878e-05, "loss": 0.4942, "step": 18898 }, { "epoch": 3.085057752744786, "grad_norm": 2.410484552383423, "learning_rate": 1.4106961192720211e-05, "loss": 0.3758, "step": 18899 }, { "epoch": 3.0852210113872904, "grad_norm": 3.2040271759033203, "learning_rate": 1.4106376609359382e-05, "loss": 0.4821, "step": 18900 }, { "epoch": 3.085384270029795, "grad_norm": 2.6400699615478516, "learning_rate": 1.4105792009118789e-05, "loss": 0.4261, "step": 18901 }, { "epoch": 3.085547528672299, "grad_norm": 2.5089330673217773, "learning_rate": 1.4105207392000835e-05, "loss": 0.3928, "step": 18902 }, { "epoch": 3.0857107873148033, "grad_norm": 3.024639844894409, "learning_rate": 1.4104622758007925e-05, "loss": 0.5442, "step": 18903 }, { "epoch": 3.0858740459573077, "grad_norm": 2.901113510131836, "learning_rate": 1.4104038107142458e-05, "loss": 0.4669, "step": 18904 }, { "epoch": 3.086037304599812, "grad_norm": 3.2526237964630127, "learning_rate": 1.410345343940684e-05, "loss": 0.4833, "step": 18905 }, { "epoch": 3.0862005632423166, "grad_norm": 2.3578193187713623, "learning_rate": 1.4102868754803476e-05, "loss": 0.3815, "step": 18906 }, { "epoch": 3.086363821884821, "grad_norm": 3.416167974472046, "learning_rate": 1.4102284053334768e-05, "loss": 0.5165, "step": 18907 }, { "epoch": 3.0865270805273255, "grad_norm": 2.6106340885162354, "learning_rate": 1.4101699335003118e-05, "loss": 0.4204, "step": 18908 }, { "epoch": 3.08669033916983, "grad_norm": 3.020673990249634, "learning_rate": 1.4101114599810933e-05, "loss": 0.4134, "step": 18909 }, { "epoch": 3.0868535978123344, "grad_norm": 3.2341768741607666, "learning_rate": 1.4100529847760609e-05, "loss": 0.5162, "step": 18910 }, { "epoch": 3.0870168564548384, "grad_norm": 2.8600707054138184, "learning_rate": 1.4099945078854558e-05, "loss": 0.4624, "step": 18911 }, { "epoch": 3.087180115097343, "grad_norm": 2.71901798248291, "learning_rate": 1.4099360293095182e-05, "loss": 0.3945, "step": 18912 }, { "epoch": 3.0873433737398472, "grad_norm": 3.6752021312713623, "learning_rate": 1.4098775490484887e-05, "loss": 0.3356, "step": 18913 }, { "epoch": 3.0875066323823517, "grad_norm": 3.324627637863159, "learning_rate": 1.4098190671026068e-05, "loss": 0.461, "step": 18914 }, { "epoch": 3.087669891024856, "grad_norm": 2.3751776218414307, "learning_rate": 1.4097605834721138e-05, "loss": 0.4106, "step": 18915 }, { "epoch": 3.0878331496673606, "grad_norm": 3.019942045211792, "learning_rate": 1.4097020981572493e-05, "loss": 0.431, "step": 18916 }, { "epoch": 3.087996408309865, "grad_norm": 2.841878890991211, "learning_rate": 1.4096436111582545e-05, "loss": 0.4578, "step": 18917 }, { "epoch": 3.0881596669523694, "grad_norm": 2.642948627471924, "learning_rate": 1.4095851224753698e-05, "loss": 0.4033, "step": 18918 }, { "epoch": 3.088322925594874, "grad_norm": 2.859488010406494, "learning_rate": 1.4095266321088348e-05, "loss": 0.4149, "step": 18919 }, { "epoch": 3.088486184237378, "grad_norm": 3.425917625427246, "learning_rate": 1.4094681400588908e-05, "loss": 0.4747, "step": 18920 }, { "epoch": 3.0886494428798823, "grad_norm": 2.829080581665039, "learning_rate": 1.4094096463257776e-05, "loss": 0.4114, "step": 18921 }, { "epoch": 3.0888127015223867, "grad_norm": 3.686988592147827, "learning_rate": 1.4093511509097359e-05, "loss": 0.5666, "step": 18922 }, { "epoch": 3.088975960164891, "grad_norm": 3.3352108001708984, "learning_rate": 1.4092926538110065e-05, "loss": 0.4348, "step": 18923 }, { "epoch": 3.0891392188073956, "grad_norm": 3.9809229373931885, "learning_rate": 1.4092341550298296e-05, "loss": 0.5365, "step": 18924 }, { "epoch": 3.0893024774499, "grad_norm": 2.8011207580566406, "learning_rate": 1.4091756545664455e-05, "loss": 0.457, "step": 18925 }, { "epoch": 3.0894657360924045, "grad_norm": 3.394651174545288, "learning_rate": 1.4091171524210946e-05, "loss": 0.4561, "step": 18926 }, { "epoch": 3.089628994734909, "grad_norm": 3.5635828971862793, "learning_rate": 1.4090586485940175e-05, "loss": 0.499, "step": 18927 }, { "epoch": 3.0897922533774134, "grad_norm": 3.5848214626312256, "learning_rate": 1.409000143085455e-05, "loss": 0.4941, "step": 18928 }, { "epoch": 3.0899555120199174, "grad_norm": 3.419046401977539, "learning_rate": 1.4089416358956474e-05, "loss": 0.5497, "step": 18929 }, { "epoch": 3.090118770662422, "grad_norm": 2.8166158199310303, "learning_rate": 1.4088831270248352e-05, "loss": 0.3657, "step": 18930 }, { "epoch": 3.0902820293049262, "grad_norm": 2.968564510345459, "learning_rate": 1.4088246164732585e-05, "loss": 0.4879, "step": 18931 }, { "epoch": 3.0904452879474307, "grad_norm": 2.9547042846679688, "learning_rate": 1.4087661042411582e-05, "loss": 0.4722, "step": 18932 }, { "epoch": 3.090608546589935, "grad_norm": 3.5696380138397217, "learning_rate": 1.408707590328775e-05, "loss": 0.4652, "step": 18933 }, { "epoch": 3.0907718052324396, "grad_norm": 4.008166313171387, "learning_rate": 1.4086490747363492e-05, "loss": 0.583, "step": 18934 }, { "epoch": 3.090935063874944, "grad_norm": 3.288759469985962, "learning_rate": 1.4085905574641214e-05, "loss": 0.4102, "step": 18935 }, { "epoch": 3.0910983225174484, "grad_norm": 3.4203364849090576, "learning_rate": 1.408532038512332e-05, "loss": 0.5074, "step": 18936 }, { "epoch": 3.091261581159953, "grad_norm": 3.081007719039917, "learning_rate": 1.4084735178812216e-05, "loss": 0.4703, "step": 18937 }, { "epoch": 3.091424839802457, "grad_norm": 3.415208578109741, "learning_rate": 1.4084149955710309e-05, "loss": 0.4976, "step": 18938 }, { "epoch": 3.0915880984449613, "grad_norm": 3.2920567989349365, "learning_rate": 1.4083564715820003e-05, "loss": 0.4162, "step": 18939 }, { "epoch": 3.0917513570874657, "grad_norm": 2.8065927028656006, "learning_rate": 1.4082979459143706e-05, "loss": 0.4124, "step": 18940 }, { "epoch": 3.09191461572997, "grad_norm": 3.2582976818084717, "learning_rate": 1.4082394185683821e-05, "loss": 0.4414, "step": 18941 }, { "epoch": 3.0920778743724746, "grad_norm": 3.1277403831481934, "learning_rate": 1.4081808895442755e-05, "loss": 0.4776, "step": 18942 }, { "epoch": 3.092241133014979, "grad_norm": 3.319415330886841, "learning_rate": 1.4081223588422912e-05, "loss": 0.5135, "step": 18943 }, { "epoch": 3.0924043916574835, "grad_norm": 2.8007400035858154, "learning_rate": 1.4080638264626702e-05, "loss": 0.4248, "step": 18944 }, { "epoch": 3.092567650299988, "grad_norm": 3.3349854946136475, "learning_rate": 1.408005292405653e-05, "loss": 0.4628, "step": 18945 }, { "epoch": 3.0927309089424924, "grad_norm": 3.265505790710449, "learning_rate": 1.40794675667148e-05, "loss": 0.534, "step": 18946 }, { "epoch": 3.0928941675849964, "grad_norm": 3.3354971408843994, "learning_rate": 1.4078882192603919e-05, "loss": 0.5289, "step": 18947 }, { "epoch": 3.093057426227501, "grad_norm": 2.8252644538879395, "learning_rate": 1.4078296801726293e-05, "loss": 0.4151, "step": 18948 }, { "epoch": 3.0932206848700052, "grad_norm": 2.709214925765991, "learning_rate": 1.4077711394084328e-05, "loss": 0.4355, "step": 18949 }, { "epoch": 3.0933839435125097, "grad_norm": 3.7327499389648438, "learning_rate": 1.4077125969680433e-05, "loss": 0.6114, "step": 18950 }, { "epoch": 3.093547202155014, "grad_norm": 3.3826088905334473, "learning_rate": 1.4076540528517013e-05, "loss": 0.5655, "step": 18951 }, { "epoch": 3.0937104607975185, "grad_norm": 2.8944828510284424, "learning_rate": 1.4075955070596475e-05, "loss": 0.4065, "step": 18952 }, { "epoch": 3.093873719440023, "grad_norm": 3.1040401458740234, "learning_rate": 1.4075369595921222e-05, "loss": 0.4835, "step": 18953 }, { "epoch": 3.0940369780825274, "grad_norm": 3.6692042350769043, "learning_rate": 1.4074784104493661e-05, "loss": 0.5578, "step": 18954 }, { "epoch": 3.0942002367250314, "grad_norm": 3.2865488529205322, "learning_rate": 1.4074198596316207e-05, "loss": 0.4892, "step": 18955 }, { "epoch": 3.094363495367536, "grad_norm": 3.175299644470215, "learning_rate": 1.4073613071391256e-05, "loss": 0.5472, "step": 18956 }, { "epoch": 3.0945267540100403, "grad_norm": 3.1788668632507324, "learning_rate": 1.4073027529721227e-05, "loss": 0.5405, "step": 18957 }, { "epoch": 3.0946900126525447, "grad_norm": 2.7799530029296875, "learning_rate": 1.4072441971308513e-05, "loss": 0.3944, "step": 18958 }, { "epoch": 3.094853271295049, "grad_norm": 3.031879425048828, "learning_rate": 1.407185639615553e-05, "loss": 0.4825, "step": 18959 }, { "epoch": 3.0950165299375536, "grad_norm": 2.8067972660064697, "learning_rate": 1.407127080426468e-05, "loss": 0.4968, "step": 18960 }, { "epoch": 3.095179788580058, "grad_norm": 3.7187442779541016, "learning_rate": 1.4070685195638376e-05, "loss": 1.0225, "step": 18961 }, { "epoch": 3.0953430472225625, "grad_norm": 2.41605544090271, "learning_rate": 1.4070099570279026e-05, "loss": 0.3915, "step": 18962 }, { "epoch": 3.095506305865067, "grad_norm": 3.378720998764038, "learning_rate": 1.4069513928189028e-05, "loss": 0.4625, "step": 18963 }, { "epoch": 3.095669564507571, "grad_norm": 3.0554709434509277, "learning_rate": 1.4068928269370795e-05, "loss": 0.4733, "step": 18964 }, { "epoch": 3.0958328231500754, "grad_norm": 2.9139151573181152, "learning_rate": 1.4068342593826736e-05, "loss": 0.4132, "step": 18965 }, { "epoch": 3.09599608179258, "grad_norm": 3.3036062717437744, "learning_rate": 1.4067756901559256e-05, "loss": 0.5203, "step": 18966 }, { "epoch": 3.0961593404350842, "grad_norm": 2.858348846435547, "learning_rate": 1.4067171192570766e-05, "loss": 0.4589, "step": 18967 }, { "epoch": 3.0963225990775887, "grad_norm": 2.818657398223877, "learning_rate": 1.4066585466863667e-05, "loss": 0.4639, "step": 18968 }, { "epoch": 3.096485857720093, "grad_norm": 2.8211276531219482, "learning_rate": 1.4065999724440374e-05, "loss": 0.4474, "step": 18969 }, { "epoch": 3.0966491163625975, "grad_norm": 3.4651684761047363, "learning_rate": 1.406541396530329e-05, "loss": 0.5085, "step": 18970 }, { "epoch": 3.096812375005102, "grad_norm": 3.085153579711914, "learning_rate": 1.4064828189454822e-05, "loss": 0.4723, "step": 18971 }, { "epoch": 3.0969756336476064, "grad_norm": 2.8699066638946533, "learning_rate": 1.4064242396897382e-05, "loss": 0.4669, "step": 18972 }, { "epoch": 3.0971388922901104, "grad_norm": 2.979154586791992, "learning_rate": 1.4063656587633378e-05, "loss": 0.4431, "step": 18973 }, { "epoch": 3.097302150932615, "grad_norm": 3.3185603618621826, "learning_rate": 1.4063070761665213e-05, "loss": 0.4509, "step": 18974 }, { "epoch": 3.0974654095751193, "grad_norm": 3.377887010574341, "learning_rate": 1.40624849189953e-05, "loss": 0.6035, "step": 18975 }, { "epoch": 3.0976286682176237, "grad_norm": 3.5034141540527344, "learning_rate": 1.4061899059626045e-05, "loss": 0.5481, "step": 18976 }, { "epoch": 3.097791926860128, "grad_norm": 2.9101130962371826, "learning_rate": 1.4061313183559859e-05, "loss": 0.4384, "step": 18977 }, { "epoch": 3.0979551855026326, "grad_norm": 3.395374059677124, "learning_rate": 1.4060727290799145e-05, "loss": 0.4835, "step": 18978 }, { "epoch": 3.098118444145137, "grad_norm": 2.948826789855957, "learning_rate": 1.4060141381346315e-05, "loss": 0.452, "step": 18979 }, { "epoch": 3.0982817027876415, "grad_norm": 3.0338709354400635, "learning_rate": 1.4059555455203777e-05, "loss": 0.458, "step": 18980 }, { "epoch": 3.098444961430146, "grad_norm": 3.0864100456237793, "learning_rate": 1.405896951237394e-05, "loss": 0.4803, "step": 18981 }, { "epoch": 3.09860822007265, "grad_norm": 2.8751485347747803, "learning_rate": 1.4058383552859212e-05, "loss": 0.475, "step": 18982 }, { "epoch": 3.0987714787151543, "grad_norm": 3.329002618789673, "learning_rate": 1.4057797576662e-05, "loss": 0.5008, "step": 18983 }, { "epoch": 3.098934737357659, "grad_norm": 3.083294153213501, "learning_rate": 1.4057211583784716e-05, "loss": 0.4615, "step": 18984 }, { "epoch": 3.0990979960001632, "grad_norm": 3.036994218826294, "learning_rate": 1.4056625574229768e-05, "loss": 0.5054, "step": 18985 }, { "epoch": 3.0992612546426677, "grad_norm": 2.7964706420898438, "learning_rate": 1.4056039547999563e-05, "loss": 0.4692, "step": 18986 }, { "epoch": 3.099424513285172, "grad_norm": 2.8979270458221436, "learning_rate": 1.405545350509651e-05, "loss": 0.4266, "step": 18987 }, { "epoch": 3.0995877719276765, "grad_norm": 3.2655446529388428, "learning_rate": 1.405486744552302e-05, "loss": 0.4945, "step": 18988 }, { "epoch": 3.099751030570181, "grad_norm": 3.1892507076263428, "learning_rate": 1.40542813692815e-05, "loss": 0.5446, "step": 18989 }, { "epoch": 3.099914289212685, "grad_norm": 3.208556890487671, "learning_rate": 1.4053695276374362e-05, "loss": 0.4984, "step": 18990 }, { "epoch": 3.1000775478551894, "grad_norm": 3.0891036987304688, "learning_rate": 1.4053109166804014e-05, "loss": 0.4639, "step": 18991 }, { "epoch": 3.100240806497694, "grad_norm": 2.885927200317383, "learning_rate": 1.405252304057286e-05, "loss": 0.5727, "step": 18992 }, { "epoch": 3.1004040651401983, "grad_norm": 3.070199489593506, "learning_rate": 1.4051936897683319e-05, "loss": 0.5064, "step": 18993 }, { "epoch": 3.1005673237827027, "grad_norm": 2.802156448364258, "learning_rate": 1.4051350738137792e-05, "loss": 0.4313, "step": 18994 }, { "epoch": 3.100730582425207, "grad_norm": 3.073448896408081, "learning_rate": 1.4050764561938692e-05, "loss": 0.489, "step": 18995 }, { "epoch": 3.1008938410677116, "grad_norm": 3.778764009475708, "learning_rate": 1.4050178369088431e-05, "loss": 0.5432, "step": 18996 }, { "epoch": 3.101057099710216, "grad_norm": 3.5043606758117676, "learning_rate": 1.4049592159589417e-05, "loss": 0.7081, "step": 18997 }, { "epoch": 3.1012203583527205, "grad_norm": 2.7411115169525146, "learning_rate": 1.4049005933444053e-05, "loss": 0.4155, "step": 18998 }, { "epoch": 3.101383616995225, "grad_norm": 2.995792865753174, "learning_rate": 1.4048419690654757e-05, "loss": 0.4731, "step": 18999 }, { "epoch": 3.101546875637729, "grad_norm": 2.9287924766540527, "learning_rate": 1.4047833431223938e-05, "loss": 0.4617, "step": 19000 }, { "epoch": 3.1017101342802333, "grad_norm": 2.972133159637451, "learning_rate": 1.4047247155154007e-05, "loss": 0.4503, "step": 19001 }, { "epoch": 3.101873392922738, "grad_norm": 3.199821949005127, "learning_rate": 1.4046660862447366e-05, "loss": 0.4916, "step": 19002 }, { "epoch": 3.102036651565242, "grad_norm": 2.7619147300720215, "learning_rate": 1.4046074553106433e-05, "loss": 0.4616, "step": 19003 }, { "epoch": 3.1021999102077467, "grad_norm": 2.702977180480957, "learning_rate": 1.4045488227133613e-05, "loss": 0.4619, "step": 19004 }, { "epoch": 3.102363168850251, "grad_norm": 2.892648935317993, "learning_rate": 1.4044901884531319e-05, "loss": 0.449, "step": 19005 }, { "epoch": 3.1025264274927555, "grad_norm": 3.0722570419311523, "learning_rate": 1.4044315525301961e-05, "loss": 0.5201, "step": 19006 }, { "epoch": 3.10268968613526, "grad_norm": 2.946944236755371, "learning_rate": 1.404372914944795e-05, "loss": 0.4823, "step": 19007 }, { "epoch": 3.102852944777764, "grad_norm": 2.990299701690674, "learning_rate": 1.4043142756971694e-05, "loss": 0.5296, "step": 19008 }, { "epoch": 3.1030162034202684, "grad_norm": 2.658273458480835, "learning_rate": 1.4042556347875604e-05, "loss": 0.4772, "step": 19009 }, { "epoch": 3.103179462062773, "grad_norm": 2.891645669937134, "learning_rate": 1.4041969922162094e-05, "loss": 0.4725, "step": 19010 }, { "epoch": 3.1033427207052773, "grad_norm": 3.140850067138672, "learning_rate": 1.4041383479833571e-05, "loss": 0.4929, "step": 19011 }, { "epoch": 3.1035059793477817, "grad_norm": 2.6680328845977783, "learning_rate": 1.4040797020892445e-05, "loss": 0.4389, "step": 19012 }, { "epoch": 3.103669237990286, "grad_norm": 4.107207775115967, "learning_rate": 1.404021054534113e-05, "loss": 0.66, "step": 19013 }, { "epoch": 3.1038324966327906, "grad_norm": 2.957963228225708, "learning_rate": 1.4039624053182035e-05, "loss": 0.4473, "step": 19014 }, { "epoch": 3.103995755275295, "grad_norm": 2.9939374923706055, "learning_rate": 1.4039037544417569e-05, "loss": 0.3955, "step": 19015 }, { "epoch": 3.1041590139177995, "grad_norm": 3.0724313259124756, "learning_rate": 1.4038451019050146e-05, "loss": 0.4711, "step": 19016 }, { "epoch": 3.1043222725603035, "grad_norm": 3.0548691749572754, "learning_rate": 1.4037864477082175e-05, "loss": 0.4351, "step": 19017 }, { "epoch": 3.104485531202808, "grad_norm": 2.8329262733459473, "learning_rate": 1.4037277918516071e-05, "loss": 0.4354, "step": 19018 }, { "epoch": 3.1046487898453123, "grad_norm": 3.4414963722229004, "learning_rate": 1.4036691343354237e-05, "loss": 0.5605, "step": 19019 }, { "epoch": 3.104812048487817, "grad_norm": 3.3375484943389893, "learning_rate": 1.403610475159909e-05, "loss": 0.5007, "step": 19020 }, { "epoch": 3.104975307130321, "grad_norm": 2.739184617996216, "learning_rate": 1.4035518143253043e-05, "loss": 0.4074, "step": 19021 }, { "epoch": 3.1051385657728257, "grad_norm": 3.5434584617614746, "learning_rate": 1.4034931518318505e-05, "loss": 1.0957, "step": 19022 }, { "epoch": 3.10530182441533, "grad_norm": 3.3767385482788086, "learning_rate": 1.4034344876797886e-05, "loss": 0.522, "step": 19023 }, { "epoch": 3.1054650830578345, "grad_norm": 2.7425172328948975, "learning_rate": 1.4033758218693596e-05, "loss": 0.4574, "step": 19024 }, { "epoch": 3.105628341700339, "grad_norm": 3.4490654468536377, "learning_rate": 1.4033171544008053e-05, "loss": 0.4586, "step": 19025 }, { "epoch": 3.105791600342843, "grad_norm": 3.0814216136932373, "learning_rate": 1.4032584852743663e-05, "loss": 0.376, "step": 19026 }, { "epoch": 3.1059548589853474, "grad_norm": 3.1657803058624268, "learning_rate": 1.403199814490284e-05, "loss": 0.4758, "step": 19027 }, { "epoch": 3.106118117627852, "grad_norm": 3.2047343254089355, "learning_rate": 1.4031411420487993e-05, "loss": 0.508, "step": 19028 }, { "epoch": 3.1062813762703563, "grad_norm": 2.843670606613159, "learning_rate": 1.4030824679501538e-05, "loss": 0.4488, "step": 19029 }, { "epoch": 3.1064446349128607, "grad_norm": 3.0251762866973877, "learning_rate": 1.4030237921945884e-05, "loss": 0.4461, "step": 19030 }, { "epoch": 3.106607893555365, "grad_norm": 3.110478401184082, "learning_rate": 1.4029651147823443e-05, "loss": 0.4398, "step": 19031 }, { "epoch": 3.1067711521978696, "grad_norm": 2.3111321926116943, "learning_rate": 1.4029064357136628e-05, "loss": 0.3817, "step": 19032 }, { "epoch": 3.106934410840374, "grad_norm": 2.656008243560791, "learning_rate": 1.4028477549887852e-05, "loss": 0.4193, "step": 19033 }, { "epoch": 3.1070976694828785, "grad_norm": 3.492027521133423, "learning_rate": 1.4027890726079525e-05, "loss": 0.5101, "step": 19034 }, { "epoch": 3.1072609281253825, "grad_norm": 2.7788491249084473, "learning_rate": 1.402730388571406e-05, "loss": 0.4136, "step": 19035 }, { "epoch": 3.107424186767887, "grad_norm": 3.1310877799987793, "learning_rate": 1.4026717028793867e-05, "loss": 0.5719, "step": 19036 }, { "epoch": 3.1075874454103913, "grad_norm": 3.1301369667053223, "learning_rate": 1.4026130155321365e-05, "loss": 0.4908, "step": 19037 }, { "epoch": 3.1077507040528958, "grad_norm": 2.773063898086548, "learning_rate": 1.402554326529896e-05, "loss": 0.4272, "step": 19038 }, { "epoch": 3.1079139626954, "grad_norm": 2.9118146896362305, "learning_rate": 1.4024956358729067e-05, "loss": 0.4268, "step": 19039 }, { "epoch": 3.1080772213379046, "grad_norm": 3.7332499027252197, "learning_rate": 1.4024369435614102e-05, "loss": 0.5399, "step": 19040 }, { "epoch": 3.108240479980409, "grad_norm": 3.401890993118286, "learning_rate": 1.4023782495956471e-05, "loss": 0.5587, "step": 19041 }, { "epoch": 3.1084037386229135, "grad_norm": 3.024052143096924, "learning_rate": 1.4023195539758586e-05, "loss": 0.5058, "step": 19042 }, { "epoch": 3.1085669972654175, "grad_norm": 3.454981565475464, "learning_rate": 1.4022608567022866e-05, "loss": 0.5118, "step": 19043 }, { "epoch": 3.108730255907922, "grad_norm": 3.5209901332855225, "learning_rate": 1.4022021577751722e-05, "loss": 0.4695, "step": 19044 }, { "epoch": 3.1088935145504264, "grad_norm": 3.4556872844696045, "learning_rate": 1.4021434571947565e-05, "loss": 0.4954, "step": 19045 }, { "epoch": 3.109056773192931, "grad_norm": 3.438206911087036, "learning_rate": 1.402084754961281e-05, "loss": 0.4965, "step": 19046 }, { "epoch": 3.1092200318354353, "grad_norm": 3.6417477130889893, "learning_rate": 1.4020260510749868e-05, "loss": 0.5174, "step": 19047 }, { "epoch": 3.1093832904779397, "grad_norm": 2.3654279708862305, "learning_rate": 1.4019673455361152e-05, "loss": 0.361, "step": 19048 }, { "epoch": 3.109546549120444, "grad_norm": 2.7183997631073, "learning_rate": 1.4019086383449077e-05, "loss": 0.3908, "step": 19049 }, { "epoch": 3.1097098077629486, "grad_norm": 2.6668195724487305, "learning_rate": 1.4018499295016057e-05, "loss": 0.4443, "step": 19050 }, { "epoch": 3.109873066405453, "grad_norm": 3.2009479999542236, "learning_rate": 1.4017912190064503e-05, "loss": 0.4478, "step": 19051 }, { "epoch": 3.110036325047957, "grad_norm": 3.8061270713806152, "learning_rate": 1.4017325068596828e-05, "loss": 0.5473, "step": 19052 }, { "epoch": 3.1101995836904615, "grad_norm": 2.8060247898101807, "learning_rate": 1.4016737930615445e-05, "loss": 0.3831, "step": 19053 }, { "epoch": 3.110362842332966, "grad_norm": 3.341130256652832, "learning_rate": 1.4016150776122772e-05, "loss": 0.4877, "step": 19054 }, { "epoch": 3.1105261009754703, "grad_norm": 2.8862829208374023, "learning_rate": 1.401556360512122e-05, "loss": 0.4353, "step": 19055 }, { "epoch": 3.1106893596179748, "grad_norm": 2.649498462677002, "learning_rate": 1.4014976417613203e-05, "loss": 0.4019, "step": 19056 }, { "epoch": 3.110852618260479, "grad_norm": 3.3998756408691406, "learning_rate": 1.4014389213601129e-05, "loss": 0.5341, "step": 19057 }, { "epoch": 3.1110158769029836, "grad_norm": 3.2408487796783447, "learning_rate": 1.4013801993087422e-05, "loss": 0.4567, "step": 19058 }, { "epoch": 3.111179135545488, "grad_norm": 3.9908857345581055, "learning_rate": 1.4013214756074485e-05, "loss": 0.5902, "step": 19059 }, { "epoch": 3.1113423941879925, "grad_norm": 3.8500397205352783, "learning_rate": 1.4012627502564743e-05, "loss": 0.5304, "step": 19060 }, { "epoch": 3.1115056528304965, "grad_norm": 2.4865903854370117, "learning_rate": 1.4012040232560602e-05, "loss": 0.398, "step": 19061 }, { "epoch": 3.111668911473001, "grad_norm": 2.6201329231262207, "learning_rate": 1.4011452946064482e-05, "loss": 0.3811, "step": 19062 }, { "epoch": 3.1118321701155054, "grad_norm": 3.562016487121582, "learning_rate": 1.4010865643078788e-05, "loss": 0.5028, "step": 19063 }, { "epoch": 3.11199542875801, "grad_norm": 3.623302698135376, "learning_rate": 1.4010278323605944e-05, "loss": 0.4259, "step": 19064 }, { "epoch": 3.1121586874005143, "grad_norm": 3.4856419563293457, "learning_rate": 1.4009690987648358e-05, "loss": 0.5674, "step": 19065 }, { "epoch": 3.1123219460430187, "grad_norm": 2.51086688041687, "learning_rate": 1.4009103635208448e-05, "loss": 0.3917, "step": 19066 }, { "epoch": 3.112485204685523, "grad_norm": 3.045130729675293, "learning_rate": 1.4008516266288626e-05, "loss": 0.4446, "step": 19067 }, { "epoch": 3.1126484633280276, "grad_norm": 2.5259435176849365, "learning_rate": 1.4007928880891306e-05, "loss": 0.3865, "step": 19068 }, { "epoch": 3.112811721970532, "grad_norm": 2.583030939102173, "learning_rate": 1.4007341479018903e-05, "loss": 0.4589, "step": 19069 }, { "epoch": 3.112974980613036, "grad_norm": 3.0565438270568848, "learning_rate": 1.4006754060673834e-05, "loss": 0.5256, "step": 19070 }, { "epoch": 3.1131382392555405, "grad_norm": 2.9069302082061768, "learning_rate": 1.4006166625858513e-05, "loss": 0.4515, "step": 19071 }, { "epoch": 3.113301497898045, "grad_norm": 3.628491163253784, "learning_rate": 1.4005579174575352e-05, "loss": 0.5836, "step": 19072 }, { "epoch": 3.1134647565405493, "grad_norm": 3.302726984024048, "learning_rate": 1.400499170682677e-05, "loss": 0.553, "step": 19073 }, { "epoch": 3.1136280151830538, "grad_norm": 3.605168104171753, "learning_rate": 1.4004404222615175e-05, "loss": 0.5595, "step": 19074 }, { "epoch": 3.113791273825558, "grad_norm": 3.5498149394989014, "learning_rate": 1.400381672194299e-05, "loss": 0.5002, "step": 19075 }, { "epoch": 3.1139545324680626, "grad_norm": 3.0657296180725098, "learning_rate": 1.4003229204812625e-05, "loss": 0.5573, "step": 19076 }, { "epoch": 3.114117791110567, "grad_norm": 3.38877010345459, "learning_rate": 1.4002641671226496e-05, "loss": 0.5104, "step": 19077 }, { "epoch": 3.1142810497530715, "grad_norm": 3.0272247791290283, "learning_rate": 1.4002054121187018e-05, "loss": 0.4423, "step": 19078 }, { "epoch": 3.1144443083955755, "grad_norm": 3.0873260498046875, "learning_rate": 1.4001466554696608e-05, "loss": 0.465, "step": 19079 }, { "epoch": 3.11460756703808, "grad_norm": 3.229159116744995, "learning_rate": 1.4000878971757681e-05, "loss": 0.5113, "step": 19080 }, { "epoch": 3.1147708256805844, "grad_norm": 2.856825828552246, "learning_rate": 1.4000291372372647e-05, "loss": 0.4251, "step": 19081 }, { "epoch": 3.114934084323089, "grad_norm": 3.334029197692871, "learning_rate": 1.399970375654393e-05, "loss": 0.5188, "step": 19082 }, { "epoch": 3.1150973429655933, "grad_norm": 3.1198782920837402, "learning_rate": 1.399911612427394e-05, "loss": 0.4733, "step": 19083 }, { "epoch": 3.1152606016080977, "grad_norm": 2.6227617263793945, "learning_rate": 1.3998528475565095e-05, "loss": 0.3694, "step": 19084 }, { "epoch": 3.115423860250602, "grad_norm": 3.1697590351104736, "learning_rate": 1.3997940810419807e-05, "loss": 0.514, "step": 19085 }, { "epoch": 3.1155871188931066, "grad_norm": 2.2761523723602295, "learning_rate": 1.3997353128840492e-05, "loss": 0.3069, "step": 19086 }, { "epoch": 3.115750377535611, "grad_norm": 3.1191658973693848, "learning_rate": 1.3996765430829569e-05, "loss": 0.5046, "step": 19087 }, { "epoch": 3.115913636178115, "grad_norm": 3.6156208515167236, "learning_rate": 1.3996177716389454e-05, "loss": 0.5465, "step": 19088 }, { "epoch": 3.1160768948206194, "grad_norm": 3.5072505474090576, "learning_rate": 1.3995589985522563e-05, "loss": 0.4668, "step": 19089 }, { "epoch": 3.116240153463124, "grad_norm": 3.7170114517211914, "learning_rate": 1.3995002238231307e-05, "loss": 0.562, "step": 19090 }, { "epoch": 3.1164034121056283, "grad_norm": 3.575575590133667, "learning_rate": 1.3994414474518107e-05, "loss": 0.5665, "step": 19091 }, { "epoch": 3.1165666707481328, "grad_norm": 3.1977384090423584, "learning_rate": 1.3993826694385375e-05, "loss": 0.4829, "step": 19092 }, { "epoch": 3.116729929390637, "grad_norm": 2.916234254837036, "learning_rate": 1.3993238897835531e-05, "loss": 0.4712, "step": 19093 }, { "epoch": 3.1168931880331416, "grad_norm": 3.3504269123077393, "learning_rate": 1.3992651084870992e-05, "loss": 0.4382, "step": 19094 }, { "epoch": 3.117056446675646, "grad_norm": 3.105205774307251, "learning_rate": 1.399206325549417e-05, "loss": 0.4702, "step": 19095 }, { "epoch": 3.11721970531815, "grad_norm": 2.9304277896881104, "learning_rate": 1.3991475409707482e-05, "loss": 0.4539, "step": 19096 }, { "epoch": 3.1173829639606545, "grad_norm": 3.017014980316162, "learning_rate": 1.3990887547513347e-05, "loss": 0.4222, "step": 19097 }, { "epoch": 3.117546222603159, "grad_norm": 3.251382350921631, "learning_rate": 1.3990299668914178e-05, "loss": 0.422, "step": 19098 }, { "epoch": 3.1177094812456634, "grad_norm": 3.032386064529419, "learning_rate": 1.39897117739124e-05, "loss": 0.507, "step": 19099 }, { "epoch": 3.117872739888168, "grad_norm": 3.404719352722168, "learning_rate": 1.3989123862510419e-05, "loss": 0.5725, "step": 19100 }, { "epoch": 3.1180359985306723, "grad_norm": 2.9042727947235107, "learning_rate": 1.3988535934710657e-05, "loss": 0.4918, "step": 19101 }, { "epoch": 3.1181992571731767, "grad_norm": 3.2486751079559326, "learning_rate": 1.3987947990515526e-05, "loss": 0.4745, "step": 19102 }, { "epoch": 3.118362515815681, "grad_norm": 3.162451982498169, "learning_rate": 1.3987360029927451e-05, "loss": 0.5087, "step": 19103 }, { "epoch": 3.1185257744581856, "grad_norm": 3.3784399032592773, "learning_rate": 1.3986772052948844e-05, "loss": 0.4988, "step": 19104 }, { "epoch": 3.1186890331006896, "grad_norm": 3.227008819580078, "learning_rate": 1.3986184059582122e-05, "loss": 0.5294, "step": 19105 }, { "epoch": 3.118852291743194, "grad_norm": 2.7631618976593018, "learning_rate": 1.3985596049829701e-05, "loss": 0.4207, "step": 19106 }, { "epoch": 3.1190155503856984, "grad_norm": 3.245443820953369, "learning_rate": 1.3985008023694003e-05, "loss": 0.5331, "step": 19107 }, { "epoch": 3.119178809028203, "grad_norm": 3.034419298171997, "learning_rate": 1.398441998117744e-05, "loss": 0.4507, "step": 19108 }, { "epoch": 3.1193420676707073, "grad_norm": 3.673654794692993, "learning_rate": 1.3983831922282429e-05, "loss": 0.6187, "step": 19109 }, { "epoch": 3.1195053263132118, "grad_norm": 3.263200283050537, "learning_rate": 1.3983243847011391e-05, "loss": 0.473, "step": 19110 }, { "epoch": 3.119668584955716, "grad_norm": 2.45792293548584, "learning_rate": 1.3982655755366743e-05, "loss": 0.4106, "step": 19111 }, { "epoch": 3.1198318435982206, "grad_norm": 3.4875364303588867, "learning_rate": 1.3982067647350899e-05, "loss": 0.5211, "step": 19112 }, { "epoch": 3.119995102240725, "grad_norm": 2.978614568710327, "learning_rate": 1.3981479522966277e-05, "loss": 0.4826, "step": 19113 }, { "epoch": 3.120158360883229, "grad_norm": 3.3752260208129883, "learning_rate": 1.39808913822153e-05, "loss": 0.5243, "step": 19114 }, { "epoch": 3.1203216195257335, "grad_norm": 3.300945281982422, "learning_rate": 1.3980303225100378e-05, "loss": 0.497, "step": 19115 }, { "epoch": 3.120484878168238, "grad_norm": 3.065228223800659, "learning_rate": 1.3979715051623934e-05, "loss": 0.4615, "step": 19116 }, { "epoch": 3.1206481368107424, "grad_norm": 3.1410202980041504, "learning_rate": 1.3979126861788386e-05, "loss": 0.5164, "step": 19117 }, { "epoch": 3.120811395453247, "grad_norm": 2.832991600036621, "learning_rate": 1.3978538655596148e-05, "loss": 0.4679, "step": 19118 }, { "epoch": 3.1209746540957513, "grad_norm": 2.9236395359039307, "learning_rate": 1.3977950433049639e-05, "loss": 0.4591, "step": 19119 }, { "epoch": 3.1211379127382557, "grad_norm": 3.295163154602051, "learning_rate": 1.397736219415128e-05, "loss": 0.4863, "step": 19120 }, { "epoch": 3.12130117138076, "grad_norm": 3.547950506210327, "learning_rate": 1.3976773938903485e-05, "loss": 0.5878, "step": 19121 }, { "epoch": 3.1214644300232646, "grad_norm": 3.511704444885254, "learning_rate": 1.3976185667308676e-05, "loss": 0.4547, "step": 19122 }, { "epoch": 3.1216276886657686, "grad_norm": 2.7584683895111084, "learning_rate": 1.3975597379369266e-05, "loss": 0.483, "step": 19123 }, { "epoch": 3.121790947308273, "grad_norm": 2.644205093383789, "learning_rate": 1.397500907508768e-05, "loss": 0.4478, "step": 19124 }, { "epoch": 3.1219542059507774, "grad_norm": 2.7704391479492188, "learning_rate": 1.397442075446633e-05, "loss": 0.4545, "step": 19125 }, { "epoch": 3.122117464593282, "grad_norm": 3.2597484588623047, "learning_rate": 1.3973832417507636e-05, "loss": 0.4368, "step": 19126 }, { "epoch": 3.1222807232357863, "grad_norm": 3.0806543827056885, "learning_rate": 1.397324406421402e-05, "loss": 0.552, "step": 19127 }, { "epoch": 3.1224439818782908, "grad_norm": 3.147188663482666, "learning_rate": 1.39726556945879e-05, "loss": 0.4943, "step": 19128 }, { "epoch": 3.122607240520795, "grad_norm": 2.701763153076172, "learning_rate": 1.397206730863169e-05, "loss": 0.4251, "step": 19129 }, { "epoch": 3.1227704991632996, "grad_norm": 3.36301589012146, "learning_rate": 1.3971478906347806e-05, "loss": 0.4594, "step": 19130 }, { "epoch": 3.1229337578058036, "grad_norm": 3.320037603378296, "learning_rate": 1.3970890487738677e-05, "loss": 0.5081, "step": 19131 }, { "epoch": 3.123097016448308, "grad_norm": 2.84674072265625, "learning_rate": 1.3970302052806717e-05, "loss": 0.3823, "step": 19132 }, { "epoch": 3.1232602750908125, "grad_norm": 3.938885450363159, "learning_rate": 1.3969713601554344e-05, "loss": 0.5492, "step": 19133 }, { "epoch": 3.123423533733317, "grad_norm": 3.008119821548462, "learning_rate": 1.3969125133983978e-05, "loss": 0.4052, "step": 19134 }, { "epoch": 3.1235867923758214, "grad_norm": 3.5306930541992188, "learning_rate": 1.3968536650098035e-05, "loss": 0.4895, "step": 19135 }, { "epoch": 3.123750051018326, "grad_norm": 2.989643096923828, "learning_rate": 1.3967948149898936e-05, "loss": 0.4878, "step": 19136 }, { "epoch": 3.1239133096608303, "grad_norm": 3.1395139694213867, "learning_rate": 1.3967359633389102e-05, "loss": 0.5059, "step": 19137 }, { "epoch": 3.1240765683033347, "grad_norm": 3.147735834121704, "learning_rate": 1.3966771100570953e-05, "loss": 0.4515, "step": 19138 }, { "epoch": 3.124239826945839, "grad_norm": 3.284184694290161, "learning_rate": 1.3966182551446903e-05, "loss": 0.4457, "step": 19139 }, { "epoch": 3.124403085588343, "grad_norm": 3.553744077682495, "learning_rate": 1.3965593986019375e-05, "loss": 0.46, "step": 19140 }, { "epoch": 3.1245663442308476, "grad_norm": 3.576601028442383, "learning_rate": 1.3965005404290785e-05, "loss": 0.5934, "step": 19141 }, { "epoch": 3.124729602873352, "grad_norm": 3.706298828125, "learning_rate": 1.3964416806263556e-05, "loss": 0.5362, "step": 19142 }, { "epoch": 3.1248928615158564, "grad_norm": 2.8665194511413574, "learning_rate": 1.396382819194011e-05, "loss": 0.4967, "step": 19143 }, { "epoch": 3.125056120158361, "grad_norm": 3.0745460987091064, "learning_rate": 1.396323956132286e-05, "loss": 0.4562, "step": 19144 }, { "epoch": 3.1252193788008653, "grad_norm": 3.2337396144866943, "learning_rate": 1.396265091441423e-05, "loss": 0.4822, "step": 19145 }, { "epoch": 3.1253826374433697, "grad_norm": 2.9829068183898926, "learning_rate": 1.3962062251216636e-05, "loss": 0.4323, "step": 19146 }, { "epoch": 3.125545896085874, "grad_norm": 3.6717007160186768, "learning_rate": 1.39614735717325e-05, "loss": 0.5796, "step": 19147 }, { "epoch": 3.1257091547283786, "grad_norm": 3.2299110889434814, "learning_rate": 1.3960884875964245e-05, "loss": 0.5146, "step": 19148 }, { "epoch": 3.1258724133708826, "grad_norm": 3.7275185585021973, "learning_rate": 1.3960296163914287e-05, "loss": 0.572, "step": 19149 }, { "epoch": 3.126035672013387, "grad_norm": 2.5444717407226562, "learning_rate": 1.3959707435585045e-05, "loss": 0.3917, "step": 19150 }, { "epoch": 3.1261989306558915, "grad_norm": 3.134772777557373, "learning_rate": 1.3959118690978942e-05, "loss": 0.4927, "step": 19151 }, { "epoch": 3.126362189298396, "grad_norm": 2.917109966278076, "learning_rate": 1.3958529930098397e-05, "loss": 0.5052, "step": 19152 }, { "epoch": 3.1265254479409004, "grad_norm": 3.520374059677124, "learning_rate": 1.395794115294583e-05, "loss": 0.5458, "step": 19153 }, { "epoch": 3.126688706583405, "grad_norm": 2.4826033115386963, "learning_rate": 1.3957352359523663e-05, "loss": 0.4149, "step": 19154 }, { "epoch": 3.1268519652259092, "grad_norm": 2.9170596599578857, "learning_rate": 1.3956763549834313e-05, "loss": 0.4742, "step": 19155 }, { "epoch": 3.1270152238684137, "grad_norm": 3.1859354972839355, "learning_rate": 1.3956174723880202e-05, "loss": 0.4432, "step": 19156 }, { "epoch": 3.127178482510918, "grad_norm": 2.8512115478515625, "learning_rate": 1.3955585881663752e-05, "loss": 0.3893, "step": 19157 }, { "epoch": 3.127341741153422, "grad_norm": 3.0389723777770996, "learning_rate": 1.395499702318738e-05, "loss": 0.4666, "step": 19158 }, { "epoch": 3.1275049997959266, "grad_norm": 3.59716796875, "learning_rate": 1.3954408148453512e-05, "loss": 0.5842, "step": 19159 }, { "epoch": 3.127668258438431, "grad_norm": 3.4179017543792725, "learning_rate": 1.395381925746456e-05, "loss": 0.4568, "step": 19160 }, { "epoch": 3.1278315170809354, "grad_norm": 3.1212828159332275, "learning_rate": 1.3953230350222952e-05, "loss": 0.4774, "step": 19161 }, { "epoch": 3.12799477572344, "grad_norm": 3.4097585678100586, "learning_rate": 1.3952641426731109e-05, "loss": 0.4962, "step": 19162 }, { "epoch": 3.1281580343659443, "grad_norm": 3.014291524887085, "learning_rate": 1.3952052486991447e-05, "loss": 0.4674, "step": 19163 }, { "epoch": 3.1283212930084487, "grad_norm": 3.5856120586395264, "learning_rate": 1.3951463531006394e-05, "loss": 0.5147, "step": 19164 }, { "epoch": 3.128484551650953, "grad_norm": 3.0099315643310547, "learning_rate": 1.3950874558778363e-05, "loss": 0.433, "step": 19165 }, { "epoch": 3.128647810293457, "grad_norm": 3.2381973266601562, "learning_rate": 1.3950285570309779e-05, "loss": 0.512, "step": 19166 }, { "epoch": 3.1288110689359616, "grad_norm": 2.8623244762420654, "learning_rate": 1.3949696565603061e-05, "loss": 0.3577, "step": 19167 }, { "epoch": 3.128974327578466, "grad_norm": 3.500073194503784, "learning_rate": 1.3949107544660634e-05, "loss": 0.4948, "step": 19168 }, { "epoch": 3.1291375862209705, "grad_norm": 3.3252861499786377, "learning_rate": 1.3948518507484914e-05, "loss": 0.5384, "step": 19169 }, { "epoch": 3.129300844863475, "grad_norm": 3.1060807704925537, "learning_rate": 1.3947929454078329e-05, "loss": 0.462, "step": 19170 }, { "epoch": 3.1294641035059794, "grad_norm": 4.337953090667725, "learning_rate": 1.3947340384443295e-05, "loss": 1.0822, "step": 19171 }, { "epoch": 3.129627362148484, "grad_norm": 3.1076509952545166, "learning_rate": 1.3946751298582237e-05, "loss": 0.4312, "step": 19172 }, { "epoch": 3.1297906207909882, "grad_norm": 3.297116756439209, "learning_rate": 1.3946162196497573e-05, "loss": 0.4294, "step": 19173 }, { "epoch": 3.1299538794334927, "grad_norm": 2.477480173110962, "learning_rate": 1.3945573078191726e-05, "loss": 0.414, "step": 19174 }, { "epoch": 3.130117138075997, "grad_norm": 2.813093423843384, "learning_rate": 1.3944983943667116e-05, "loss": 0.4313, "step": 19175 }, { "epoch": 3.130280396718501, "grad_norm": 3.548003911972046, "learning_rate": 1.394439479292617e-05, "loss": 0.5212, "step": 19176 }, { "epoch": 3.1304436553610056, "grad_norm": 3.5450351238250732, "learning_rate": 1.3943805625971308e-05, "loss": 0.5535, "step": 19177 }, { "epoch": 3.13060691400351, "grad_norm": 2.946059465408325, "learning_rate": 1.3943216442804948e-05, "loss": 0.4744, "step": 19178 }, { "epoch": 3.1307701726460144, "grad_norm": 3.4534008502960205, "learning_rate": 1.3942627243429512e-05, "loss": 0.5476, "step": 19179 }, { "epoch": 3.130933431288519, "grad_norm": 3.084911346435547, "learning_rate": 1.3942038027847425e-05, "loss": 0.4254, "step": 19180 }, { "epoch": 3.1310966899310233, "grad_norm": 3.4672298431396484, "learning_rate": 1.3941448796061108e-05, "loss": 0.5734, "step": 19181 }, { "epoch": 3.1312599485735277, "grad_norm": 2.926316976547241, "learning_rate": 1.3940859548072987e-05, "loss": 0.4865, "step": 19182 }, { "epoch": 3.131423207216032, "grad_norm": 2.897096633911133, "learning_rate": 1.3940270283885478e-05, "loss": 0.4998, "step": 19183 }, { "epoch": 3.131586465858536, "grad_norm": 2.550222635269165, "learning_rate": 1.3939681003501005e-05, "loss": 0.3599, "step": 19184 }, { "epoch": 3.1317497245010406, "grad_norm": 3.6526970863342285, "learning_rate": 1.393909170692199e-05, "loss": 0.5114, "step": 19185 }, { "epoch": 3.131912983143545, "grad_norm": 3.532703161239624, "learning_rate": 1.3938502394150857e-05, "loss": 0.561, "step": 19186 }, { "epoch": 3.1320762417860495, "grad_norm": 3.482605218887329, "learning_rate": 1.393791306519003e-05, "loss": 0.5131, "step": 19187 }, { "epoch": 3.132239500428554, "grad_norm": 3.5306007862091064, "learning_rate": 1.3937323720041928e-05, "loss": 0.5489, "step": 19188 }, { "epoch": 3.1324027590710584, "grad_norm": 3.2617347240448, "learning_rate": 1.3936734358708974e-05, "loss": 0.4676, "step": 19189 }, { "epoch": 3.132566017713563, "grad_norm": 3.4920003414154053, "learning_rate": 1.3936144981193592e-05, "loss": 0.5966, "step": 19190 }, { "epoch": 3.1327292763560672, "grad_norm": 3.0105183124542236, "learning_rate": 1.3935555587498204e-05, "loss": 0.4569, "step": 19191 }, { "epoch": 3.1328925349985717, "grad_norm": 2.6462366580963135, "learning_rate": 1.3934966177625233e-05, "loss": 0.4084, "step": 19192 }, { "epoch": 3.1330557936410757, "grad_norm": 3.548978567123413, "learning_rate": 1.3934376751577104e-05, "loss": 0.5364, "step": 19193 }, { "epoch": 3.13321905228358, "grad_norm": 2.7488601207733154, "learning_rate": 1.3933787309356234e-05, "loss": 0.4685, "step": 19194 }, { "epoch": 3.1333823109260845, "grad_norm": 2.779733180999756, "learning_rate": 1.3933197850965053e-05, "loss": 0.425, "step": 19195 }, { "epoch": 3.133545569568589, "grad_norm": 3.517742156982422, "learning_rate": 1.3932608376405981e-05, "loss": 0.5592, "step": 19196 }, { "epoch": 3.1337088282110934, "grad_norm": 3.4703357219696045, "learning_rate": 1.393201888568144e-05, "loss": 0.4508, "step": 19197 }, { "epoch": 3.133872086853598, "grad_norm": 3.319817304611206, "learning_rate": 1.3931429378793852e-05, "loss": 0.4579, "step": 19198 }, { "epoch": 3.1340353454961023, "grad_norm": 3.401029348373413, "learning_rate": 1.3930839855745644e-05, "loss": 0.4963, "step": 19199 }, { "epoch": 3.1341986041386067, "grad_norm": 3.007427930831909, "learning_rate": 1.3930250316539237e-05, "loss": 0.4341, "step": 19200 }, { "epoch": 3.134361862781111, "grad_norm": 3.070230722427368, "learning_rate": 1.3929660761177055e-05, "loss": 0.3974, "step": 19201 }, { "epoch": 3.134525121423615, "grad_norm": 3.835297107696533, "learning_rate": 1.3929071189661523e-05, "loss": 0.49, "step": 19202 }, { "epoch": 3.1346883800661196, "grad_norm": 3.1332435607910156, "learning_rate": 1.392848160199506e-05, "loss": 0.4104, "step": 19203 }, { "epoch": 3.134851638708624, "grad_norm": 2.7319133281707764, "learning_rate": 1.3927891998180096e-05, "loss": 0.4545, "step": 19204 }, { "epoch": 3.1350148973511285, "grad_norm": 2.8151156902313232, "learning_rate": 1.3927302378219049e-05, "loss": 0.4483, "step": 19205 }, { "epoch": 3.135178155993633, "grad_norm": 3.4470765590667725, "learning_rate": 1.3926712742114345e-05, "loss": 0.4501, "step": 19206 }, { "epoch": 3.1353414146361374, "grad_norm": 3.029160976409912, "learning_rate": 1.3926123089868408e-05, "loss": 0.4465, "step": 19207 }, { "epoch": 3.135504673278642, "grad_norm": 2.5935161113739014, "learning_rate": 1.3925533421483663e-05, "loss": 0.4129, "step": 19208 }, { "epoch": 3.1356679319211462, "grad_norm": 2.8242311477661133, "learning_rate": 1.392494373696253e-05, "loss": 0.4347, "step": 19209 }, { "epoch": 3.1358311905636507, "grad_norm": 3.168905019760132, "learning_rate": 1.3924354036307436e-05, "loss": 0.5146, "step": 19210 }, { "epoch": 3.1359944492061547, "grad_norm": 3.4430394172668457, "learning_rate": 1.3923764319520804e-05, "loss": 0.479, "step": 19211 }, { "epoch": 3.136157707848659, "grad_norm": 3.4977715015411377, "learning_rate": 1.3923174586605061e-05, "loss": 0.4626, "step": 19212 }, { "epoch": 3.1363209664911635, "grad_norm": 3.87090802192688, "learning_rate": 1.3922584837562625e-05, "loss": 0.6055, "step": 19213 }, { "epoch": 3.136484225133668, "grad_norm": 3.5580968856811523, "learning_rate": 1.3921995072395926e-05, "loss": 0.562, "step": 19214 }, { "epoch": 3.1366474837761724, "grad_norm": 3.116252899169922, "learning_rate": 1.3921405291107386e-05, "loss": 0.4251, "step": 19215 }, { "epoch": 3.136810742418677, "grad_norm": 3.184206247329712, "learning_rate": 1.3920815493699428e-05, "loss": 0.4745, "step": 19216 }, { "epoch": 3.1369740010611813, "grad_norm": 2.7472941875457764, "learning_rate": 1.3920225680174481e-05, "loss": 0.4069, "step": 19217 }, { "epoch": 3.1371372597036857, "grad_norm": 3.4452407360076904, "learning_rate": 1.3919635850534965e-05, "loss": 0.4731, "step": 19218 }, { "epoch": 3.1373005183461897, "grad_norm": 3.096065044403076, "learning_rate": 1.3919046004783306e-05, "loss": 0.4927, "step": 19219 }, { "epoch": 3.137463776988694, "grad_norm": 2.9096908569335938, "learning_rate": 1.3918456142921928e-05, "loss": 0.4507, "step": 19220 }, { "epoch": 3.1376270356311986, "grad_norm": 3.384169578552246, "learning_rate": 1.3917866264953257e-05, "loss": 0.5319, "step": 19221 }, { "epoch": 3.137790294273703, "grad_norm": 3.957462787628174, "learning_rate": 1.3917276370879718e-05, "loss": 0.5328, "step": 19222 }, { "epoch": 3.1379535529162075, "grad_norm": 3.216085433959961, "learning_rate": 1.3916686460703734e-05, "loss": 0.4638, "step": 19223 }, { "epoch": 3.138116811558712, "grad_norm": 3.382558584213257, "learning_rate": 1.3916096534427728e-05, "loss": 0.4433, "step": 19224 }, { "epoch": 3.1382800702012164, "grad_norm": 3.480181932449341, "learning_rate": 1.3915506592054132e-05, "loss": 0.5442, "step": 19225 }, { "epoch": 3.138443328843721, "grad_norm": 2.8555986881256104, "learning_rate": 1.3914916633585368e-05, "loss": 0.3977, "step": 19226 }, { "epoch": 3.1386065874862252, "grad_norm": 3.8599560260772705, "learning_rate": 1.3914326659023855e-05, "loss": 0.6125, "step": 19227 }, { "epoch": 3.1387698461287297, "grad_norm": 3.082606792449951, "learning_rate": 1.3913736668372027e-05, "loss": 0.4993, "step": 19228 }, { "epoch": 3.1389331047712337, "grad_norm": 2.317253589630127, "learning_rate": 1.3913146661632304e-05, "loss": 0.3612, "step": 19229 }, { "epoch": 3.139096363413738, "grad_norm": 3.5125062465667725, "learning_rate": 1.3912556638807111e-05, "loss": 0.4956, "step": 19230 }, { "epoch": 3.1392596220562425, "grad_norm": 3.543374538421631, "learning_rate": 1.3911966599898878e-05, "loss": 0.5456, "step": 19231 }, { "epoch": 3.139422880698747, "grad_norm": 2.982949733734131, "learning_rate": 1.3911376544910025e-05, "loss": 0.485, "step": 19232 }, { "epoch": 3.1395861393412514, "grad_norm": 2.7382473945617676, "learning_rate": 1.3910786473842981e-05, "loss": 0.3941, "step": 19233 }, { "epoch": 3.139749397983756, "grad_norm": 3.3039655685424805, "learning_rate": 1.391019638670017e-05, "loss": 0.4857, "step": 19234 }, { "epoch": 3.1399126566262603, "grad_norm": 2.956632137298584, "learning_rate": 1.3909606283484019e-05, "loss": 0.4574, "step": 19235 }, { "epoch": 3.1400759152687647, "grad_norm": 2.629624128341675, "learning_rate": 1.3909016164196953e-05, "loss": 0.398, "step": 19236 }, { "epoch": 3.1402391739112687, "grad_norm": 3.771404266357422, "learning_rate": 1.3908426028841397e-05, "loss": 0.5265, "step": 19237 }, { "epoch": 3.140402432553773, "grad_norm": 2.8130252361297607, "learning_rate": 1.3907835877419778e-05, "loss": 0.3908, "step": 19238 }, { "epoch": 3.1405656911962776, "grad_norm": 2.5922210216522217, "learning_rate": 1.390724570993452e-05, "loss": 0.4473, "step": 19239 }, { "epoch": 3.140728949838782, "grad_norm": 3.5759522914886475, "learning_rate": 1.3906655526388051e-05, "loss": 0.5717, "step": 19240 }, { "epoch": 3.1408922084812865, "grad_norm": 2.6235570907592773, "learning_rate": 1.3906065326782797e-05, "loss": 0.4843, "step": 19241 }, { "epoch": 3.141055467123791, "grad_norm": 3.930410146713257, "learning_rate": 1.3905475111121183e-05, "loss": 0.5296, "step": 19242 }, { "epoch": 3.1412187257662953, "grad_norm": 3.7836992740631104, "learning_rate": 1.3904884879405634e-05, "loss": 0.537, "step": 19243 }, { "epoch": 3.1413819844088, "grad_norm": 3.4731829166412354, "learning_rate": 1.3904294631638578e-05, "loss": 0.5609, "step": 19244 }, { "epoch": 3.1415452430513042, "grad_norm": 3.2795815467834473, "learning_rate": 1.3903704367822441e-05, "loss": 0.5053, "step": 19245 }, { "epoch": 3.141708501693808, "grad_norm": 2.71091365814209, "learning_rate": 1.3903114087959651e-05, "loss": 0.4169, "step": 19246 }, { "epoch": 3.1418717603363127, "grad_norm": 2.9618945121765137, "learning_rate": 1.390252379205263e-05, "loss": 0.402, "step": 19247 }, { "epoch": 3.142035018978817, "grad_norm": 3.5815916061401367, "learning_rate": 1.390193348010381e-05, "loss": 0.5301, "step": 19248 }, { "epoch": 3.1421982776213215, "grad_norm": 2.722465753555298, "learning_rate": 1.3901343152115616e-05, "loss": 0.3832, "step": 19249 }, { "epoch": 3.142361536263826, "grad_norm": 3.175485849380493, "learning_rate": 1.390075280809047e-05, "loss": 0.4557, "step": 19250 }, { "epoch": 3.1425247949063304, "grad_norm": 3.5117459297180176, "learning_rate": 1.3900162448030802e-05, "loss": 0.5212, "step": 19251 }, { "epoch": 3.142688053548835, "grad_norm": 3.3281822204589844, "learning_rate": 1.3899572071939042e-05, "loss": 0.4758, "step": 19252 }, { "epoch": 3.1428513121913393, "grad_norm": 3.807953119277954, "learning_rate": 1.3898981679817612e-05, "loss": 0.5782, "step": 19253 }, { "epoch": 3.1430145708338437, "grad_norm": 3.179417371749878, "learning_rate": 1.3898391271668941e-05, "loss": 0.4825, "step": 19254 }, { "epoch": 3.1431778294763477, "grad_norm": 3.240248203277588, "learning_rate": 1.3897800847495455e-05, "loss": 0.5649, "step": 19255 }, { "epoch": 3.143341088118852, "grad_norm": 3.157564401626587, "learning_rate": 1.3897210407299584e-05, "loss": 0.4525, "step": 19256 }, { "epoch": 3.1435043467613566, "grad_norm": 3.368133306503296, "learning_rate": 1.3896619951083747e-05, "loss": 0.4873, "step": 19257 }, { "epoch": 3.143667605403861, "grad_norm": 3.6147446632385254, "learning_rate": 1.3896029478850381e-05, "loss": 0.5145, "step": 19258 }, { "epoch": 3.1438308640463655, "grad_norm": 3.0855472087860107, "learning_rate": 1.3895438990601906e-05, "loss": 0.4649, "step": 19259 }, { "epoch": 3.14399412268887, "grad_norm": 3.3350677490234375, "learning_rate": 1.3894848486340756e-05, "loss": 0.5659, "step": 19260 }, { "epoch": 3.1441573813313743, "grad_norm": 3.1736671924591064, "learning_rate": 1.3894257966069353e-05, "loss": 0.4697, "step": 19261 }, { "epoch": 3.144320639973879, "grad_norm": 2.4282963275909424, "learning_rate": 1.3893667429790123e-05, "loss": 0.375, "step": 19262 }, { "epoch": 3.144483898616383, "grad_norm": 3.0166983604431152, "learning_rate": 1.38930768775055e-05, "loss": 0.4618, "step": 19263 }, { "epoch": 3.144647157258887, "grad_norm": 3.024506092071533, "learning_rate": 1.3892486309217907e-05, "loss": 0.4657, "step": 19264 }, { "epoch": 3.1448104159013917, "grad_norm": 3.628120183944702, "learning_rate": 1.3891895724929773e-05, "loss": 0.5345, "step": 19265 }, { "epoch": 3.144973674543896, "grad_norm": 3.6713290214538574, "learning_rate": 1.3891305124643526e-05, "loss": 0.5696, "step": 19266 }, { "epoch": 3.1451369331864005, "grad_norm": 2.8136632442474365, "learning_rate": 1.3890714508361591e-05, "loss": 0.3855, "step": 19267 }, { "epoch": 3.145300191828905, "grad_norm": 2.8815114498138428, "learning_rate": 1.3890123876086397e-05, "loss": 0.4012, "step": 19268 }, { "epoch": 3.1454634504714094, "grad_norm": 2.85404634475708, "learning_rate": 1.3889533227820374e-05, "loss": 0.4317, "step": 19269 }, { "epoch": 3.145626709113914, "grad_norm": 2.952680826187134, "learning_rate": 1.388894256356595e-05, "loss": 0.4201, "step": 19270 }, { "epoch": 3.1457899677564183, "grad_norm": 3.6084654331207275, "learning_rate": 1.3888351883325551e-05, "loss": 0.451, "step": 19271 }, { "epoch": 3.1459532263989223, "grad_norm": 3.399081230163574, "learning_rate": 1.3887761187101603e-05, "loss": 0.4994, "step": 19272 }, { "epoch": 3.1461164850414267, "grad_norm": 2.1465790271759033, "learning_rate": 1.3887170474896538e-05, "loss": 0.3226, "step": 19273 }, { "epoch": 3.146279743683931, "grad_norm": 3.2479116916656494, "learning_rate": 1.3886579746712783e-05, "loss": 0.4427, "step": 19274 }, { "epoch": 3.1464430023264356, "grad_norm": 3.230229139328003, "learning_rate": 1.3885989002552766e-05, "loss": 0.4383, "step": 19275 }, { "epoch": 3.14660626096894, "grad_norm": 3.222586154937744, "learning_rate": 1.3885398242418919e-05, "loss": 0.4376, "step": 19276 }, { "epoch": 3.1467695196114445, "grad_norm": 3.401998996734619, "learning_rate": 1.3884807466313664e-05, "loss": 0.4621, "step": 19277 }, { "epoch": 3.146932778253949, "grad_norm": 3.2056546211242676, "learning_rate": 1.3884216674239431e-05, "loss": 0.4087, "step": 19278 }, { "epoch": 3.1470960368964533, "grad_norm": 3.3340375423431396, "learning_rate": 1.388362586619865e-05, "loss": 0.5645, "step": 19279 }, { "epoch": 3.147259295538958, "grad_norm": 3.6223983764648438, "learning_rate": 1.3883035042193753e-05, "loss": 0.4638, "step": 19280 }, { "epoch": 3.1474225541814618, "grad_norm": 3.524841785430908, "learning_rate": 1.3882444202227163e-05, "loss": 0.4733, "step": 19281 }, { "epoch": 3.147585812823966, "grad_norm": 3.1221542358398438, "learning_rate": 1.388185334630131e-05, "loss": 0.4796, "step": 19282 }, { "epoch": 3.1477490714664706, "grad_norm": 3.3532748222351074, "learning_rate": 1.3881262474418624e-05, "loss": 0.5086, "step": 19283 }, { "epoch": 3.147912330108975, "grad_norm": 3.7099709510803223, "learning_rate": 1.3880671586581536e-05, "loss": 0.5077, "step": 19284 }, { "epoch": 3.1480755887514795, "grad_norm": 3.0401039123535156, "learning_rate": 1.3880080682792469e-05, "loss": 0.4256, "step": 19285 }, { "epoch": 3.148238847393984, "grad_norm": 2.933462619781494, "learning_rate": 1.3879489763053857e-05, "loss": 0.4459, "step": 19286 }, { "epoch": 3.1484021060364884, "grad_norm": 2.678544044494629, "learning_rate": 1.3878898827368128e-05, "loss": 0.4613, "step": 19287 }, { "epoch": 3.148565364678993, "grad_norm": 2.9938089847564697, "learning_rate": 1.387830787573771e-05, "loss": 0.4725, "step": 19288 }, { "epoch": 3.1487286233214973, "grad_norm": 3.0641136169433594, "learning_rate": 1.3877716908165033e-05, "loss": 0.4843, "step": 19289 }, { "epoch": 3.1488918819640013, "grad_norm": 3.3410210609436035, "learning_rate": 1.3877125924652525e-05, "loss": 0.408, "step": 19290 }, { "epoch": 3.1490551406065057, "grad_norm": 3.2471790313720703, "learning_rate": 1.3876534925202617e-05, "loss": 0.4388, "step": 19291 }, { "epoch": 3.14921839924901, "grad_norm": 2.9579124450683594, "learning_rate": 1.3875943909817738e-05, "loss": 0.3872, "step": 19292 }, { "epoch": 3.1493816578915146, "grad_norm": 2.834275484085083, "learning_rate": 1.3875352878500316e-05, "loss": 0.4701, "step": 19293 }, { "epoch": 3.149544916534019, "grad_norm": 3.633971929550171, "learning_rate": 1.3874761831252782e-05, "loss": 0.5122, "step": 19294 }, { "epoch": 3.1497081751765235, "grad_norm": 3.525315046310425, "learning_rate": 1.3874170768077567e-05, "loss": 0.548, "step": 19295 }, { "epoch": 3.149871433819028, "grad_norm": 3.0580177307128906, "learning_rate": 1.3873579688977097e-05, "loss": 0.4265, "step": 19296 }, { "epoch": 3.1500346924615323, "grad_norm": 3.865584373474121, "learning_rate": 1.3872988593953802e-05, "loss": 0.5503, "step": 19297 }, { "epoch": 3.1501979511040368, "grad_norm": 3.3616015911102295, "learning_rate": 1.3872397483010116e-05, "loss": 0.4877, "step": 19298 }, { "epoch": 3.1503612097465408, "grad_norm": 2.797199010848999, "learning_rate": 1.3871806356148465e-05, "loss": 0.4635, "step": 19299 }, { "epoch": 3.150524468389045, "grad_norm": 3.1397645473480225, "learning_rate": 1.3871215213371284e-05, "loss": 0.4137, "step": 19300 }, { "epoch": 3.1506877270315496, "grad_norm": 2.9349253177642822, "learning_rate": 1.3870624054680994e-05, "loss": 0.4331, "step": 19301 }, { "epoch": 3.150850985674054, "grad_norm": 3.992382049560547, "learning_rate": 1.3870032880080031e-05, "loss": 0.5123, "step": 19302 }, { "epoch": 3.1510142443165585, "grad_norm": 3.782268762588501, "learning_rate": 1.3869441689570824e-05, "loss": 0.5967, "step": 19303 }, { "epoch": 3.151177502959063, "grad_norm": 3.6835360527038574, "learning_rate": 1.3868850483155804e-05, "loss": 0.5195, "step": 19304 }, { "epoch": 3.1513407616015674, "grad_norm": 3.2429771423339844, "learning_rate": 1.3868259260837403e-05, "loss": 0.4916, "step": 19305 }, { "epoch": 3.151504020244072, "grad_norm": 3.6292717456817627, "learning_rate": 1.3867668022618047e-05, "loss": 0.4653, "step": 19306 }, { "epoch": 3.151667278886576, "grad_norm": 3.484997034072876, "learning_rate": 1.3867076768500165e-05, "loss": 0.5308, "step": 19307 }, { "epoch": 3.1518305375290803, "grad_norm": 3.1489386558532715, "learning_rate": 1.3866485498486193e-05, "loss": 0.4767, "step": 19308 }, { "epoch": 3.1519937961715847, "grad_norm": 2.8674793243408203, "learning_rate": 1.3865894212578562e-05, "loss": 0.4307, "step": 19309 }, { "epoch": 3.152157054814089, "grad_norm": 4.038379669189453, "learning_rate": 1.3865302910779694e-05, "loss": 0.5481, "step": 19310 }, { "epoch": 3.1523203134565936, "grad_norm": 3.012037992477417, "learning_rate": 1.3864711593092029e-05, "loss": 0.4824, "step": 19311 }, { "epoch": 3.152483572099098, "grad_norm": 2.798804521560669, "learning_rate": 1.3864120259517991e-05, "loss": 0.4028, "step": 19312 }, { "epoch": 3.1526468307416025, "grad_norm": 3.167124032974243, "learning_rate": 1.3863528910060015e-05, "loss": 0.4855, "step": 19313 }, { "epoch": 3.152810089384107, "grad_norm": 3.0838029384613037, "learning_rate": 1.3862937544720532e-05, "loss": 0.4484, "step": 19314 }, { "epoch": 3.1529733480266113, "grad_norm": 2.8931236267089844, "learning_rate": 1.3862346163501972e-05, "loss": 0.4983, "step": 19315 }, { "epoch": 3.1531366066691158, "grad_norm": 3.0559778213500977, "learning_rate": 1.3861754766406763e-05, "loss": 0.4689, "step": 19316 }, { "epoch": 3.1532998653116198, "grad_norm": 2.331328868865967, "learning_rate": 1.3861163353437339e-05, "loss": 0.352, "step": 19317 }, { "epoch": 3.153463123954124, "grad_norm": 3.4827687740325928, "learning_rate": 1.3860571924596128e-05, "loss": 0.4803, "step": 19318 }, { "epoch": 3.1536263825966286, "grad_norm": 2.899827241897583, "learning_rate": 1.3859980479885569e-05, "loss": 0.519, "step": 19319 }, { "epoch": 3.153789641239133, "grad_norm": 2.7745614051818848, "learning_rate": 1.3859389019308085e-05, "loss": 0.4019, "step": 19320 }, { "epoch": 3.1539528998816375, "grad_norm": 3.670105218887329, "learning_rate": 1.385879754286611e-05, "loss": 0.5449, "step": 19321 }, { "epoch": 3.154116158524142, "grad_norm": 3.0268731117248535, "learning_rate": 1.3858206050562074e-05, "loss": 0.4507, "step": 19322 }, { "epoch": 3.1542794171666464, "grad_norm": 2.498896837234497, "learning_rate": 1.385761454239841e-05, "loss": 0.3918, "step": 19323 }, { "epoch": 3.154442675809151, "grad_norm": 3.1458847522735596, "learning_rate": 1.3857023018377551e-05, "loss": 0.4403, "step": 19324 }, { "epoch": 3.154605934451655, "grad_norm": 2.6301121711730957, "learning_rate": 1.3856431478501926e-05, "loss": 0.4386, "step": 19325 }, { "epoch": 3.1547691930941593, "grad_norm": 2.9521758556365967, "learning_rate": 1.3855839922773968e-05, "loss": 0.4155, "step": 19326 }, { "epoch": 3.1549324517366637, "grad_norm": 3.3879776000976562, "learning_rate": 1.3855248351196106e-05, "loss": 0.5032, "step": 19327 }, { "epoch": 3.155095710379168, "grad_norm": 2.8360707759857178, "learning_rate": 1.3854656763770775e-05, "loss": 0.4431, "step": 19328 }, { "epoch": 3.1552589690216726, "grad_norm": 2.904977798461914, "learning_rate": 1.3854065160500407e-05, "loss": 0.4774, "step": 19329 }, { "epoch": 3.155422227664177, "grad_norm": 3.211212635040283, "learning_rate": 1.385347354138743e-05, "loss": 0.5101, "step": 19330 }, { "epoch": 3.1555854863066815, "grad_norm": 3.0239689350128174, "learning_rate": 1.3852881906434279e-05, "loss": 0.4748, "step": 19331 }, { "epoch": 3.155748744949186, "grad_norm": 3.4014902114868164, "learning_rate": 1.3852290255643385e-05, "loss": 0.5327, "step": 19332 }, { "epoch": 3.1559120035916903, "grad_norm": 2.7747392654418945, "learning_rate": 1.3851698589017182e-05, "loss": 0.4182, "step": 19333 }, { "epoch": 3.1560752622341943, "grad_norm": 3.4504950046539307, "learning_rate": 1.3851106906558098e-05, "loss": 0.4894, "step": 19334 }, { "epoch": 3.1562385208766988, "grad_norm": 3.39721941947937, "learning_rate": 1.3850515208268571e-05, "loss": 0.5123, "step": 19335 }, { "epoch": 3.156401779519203, "grad_norm": 3.693747043609619, "learning_rate": 1.3849923494151028e-05, "loss": 0.5403, "step": 19336 }, { "epoch": 3.1565650381617076, "grad_norm": 3.3733441829681396, "learning_rate": 1.3849331764207904e-05, "loss": 0.4071, "step": 19337 }, { "epoch": 3.156728296804212, "grad_norm": 2.9042017459869385, "learning_rate": 1.3848740018441628e-05, "loss": 0.4703, "step": 19338 }, { "epoch": 3.1568915554467165, "grad_norm": 3.009493350982666, "learning_rate": 1.3848148256854637e-05, "loss": 0.52, "step": 19339 }, { "epoch": 3.157054814089221, "grad_norm": 2.8665268421173096, "learning_rate": 1.3847556479449362e-05, "loss": 0.4525, "step": 19340 }, { "epoch": 3.1572180727317254, "grad_norm": 3.10103440284729, "learning_rate": 1.3846964686228233e-05, "loss": 0.4916, "step": 19341 }, { "epoch": 3.15738133137423, "grad_norm": 3.1681063175201416, "learning_rate": 1.3846372877193685e-05, "loss": 0.5419, "step": 19342 }, { "epoch": 3.157544590016734, "grad_norm": 2.6064040660858154, "learning_rate": 1.3845781052348153e-05, "loss": 0.4005, "step": 19343 }, { "epoch": 3.1577078486592383, "grad_norm": 3.1037018299102783, "learning_rate": 1.3845189211694068e-05, "loss": 0.4332, "step": 19344 }, { "epoch": 3.1578711073017427, "grad_norm": 2.7830770015716553, "learning_rate": 1.384459735523386e-05, "loss": 0.4467, "step": 19345 }, { "epoch": 3.158034365944247, "grad_norm": 3.3807411193847656, "learning_rate": 1.3844005482969962e-05, "loss": 0.4515, "step": 19346 }, { "epoch": 3.1581976245867516, "grad_norm": 2.7700181007385254, "learning_rate": 1.3843413594904811e-05, "loss": 0.3822, "step": 19347 }, { "epoch": 3.158360883229256, "grad_norm": 3.2619400024414062, "learning_rate": 1.384282169104084e-05, "loss": 0.5838, "step": 19348 }, { "epoch": 3.1585241418717604, "grad_norm": 3.3569977283477783, "learning_rate": 1.3842229771380478e-05, "loss": 0.4987, "step": 19349 }, { "epoch": 3.158687400514265, "grad_norm": 3.2465784549713135, "learning_rate": 1.3841637835926161e-05, "loss": 0.472, "step": 19350 }, { "epoch": 3.1588506591567693, "grad_norm": 3.4379258155822754, "learning_rate": 1.3841045884680318e-05, "loss": 0.5542, "step": 19351 }, { "epoch": 3.1590139177992733, "grad_norm": 3.1335344314575195, "learning_rate": 1.3840453917645389e-05, "loss": 0.4754, "step": 19352 }, { "epoch": 3.1591771764417778, "grad_norm": 3.049471616744995, "learning_rate": 1.3839861934823807e-05, "loss": 0.4117, "step": 19353 }, { "epoch": 3.159340435084282, "grad_norm": 3.479020118713379, "learning_rate": 1.3839269936217997e-05, "loss": 0.4872, "step": 19354 }, { "epoch": 3.1595036937267866, "grad_norm": 3.58414363861084, "learning_rate": 1.3838677921830399e-05, "loss": 0.4673, "step": 19355 }, { "epoch": 3.159666952369291, "grad_norm": 3.199801206588745, "learning_rate": 1.3838085891663445e-05, "loss": 0.4578, "step": 19356 }, { "epoch": 3.1598302110117955, "grad_norm": 3.436429738998413, "learning_rate": 1.383749384571957e-05, "loss": 0.4531, "step": 19357 }, { "epoch": 3.1599934696543, "grad_norm": 3.1152875423431396, "learning_rate": 1.3836901784001211e-05, "loss": 0.4182, "step": 19358 }, { "epoch": 3.1601567282968044, "grad_norm": 3.332991600036621, "learning_rate": 1.3836309706510792e-05, "loss": 0.455, "step": 19359 }, { "epoch": 3.1603199869393084, "grad_norm": 3.2145135402679443, "learning_rate": 1.3835717613250755e-05, "loss": 0.4299, "step": 19360 }, { "epoch": 3.160483245581813, "grad_norm": 3.029621124267578, "learning_rate": 1.383512550422353e-05, "loss": 0.4761, "step": 19361 }, { "epoch": 3.1606465042243173, "grad_norm": 2.948026180267334, "learning_rate": 1.3834533379431551e-05, "loss": 0.4238, "step": 19362 }, { "epoch": 3.1608097628668217, "grad_norm": 3.2229554653167725, "learning_rate": 1.3833941238877257e-05, "loss": 0.4735, "step": 19363 }, { "epoch": 3.160973021509326, "grad_norm": 2.7280328273773193, "learning_rate": 1.3833349082563077e-05, "loss": 0.4011, "step": 19364 }, { "epoch": 3.1611362801518306, "grad_norm": 3.6612894535064697, "learning_rate": 1.3832756910491445e-05, "loss": 0.5437, "step": 19365 }, { "epoch": 3.161299538794335, "grad_norm": 3.051605701446533, "learning_rate": 1.3832164722664796e-05, "loss": 0.4682, "step": 19366 }, { "epoch": 3.1614627974368394, "grad_norm": 2.620988607406616, "learning_rate": 1.3831572519085566e-05, "loss": 0.3966, "step": 19367 }, { "epoch": 3.161626056079344, "grad_norm": 3.218404769897461, "learning_rate": 1.3830980299756189e-05, "loss": 0.4509, "step": 19368 }, { "epoch": 3.1617893147218483, "grad_norm": 2.9803030490875244, "learning_rate": 1.3830388064679096e-05, "loss": 0.4454, "step": 19369 }, { "epoch": 3.1619525733643523, "grad_norm": 3.0796029567718506, "learning_rate": 1.3829795813856726e-05, "loss": 0.4631, "step": 19370 }, { "epoch": 3.1621158320068568, "grad_norm": 3.4745023250579834, "learning_rate": 1.3829203547291512e-05, "loss": 0.5135, "step": 19371 }, { "epoch": 3.162279090649361, "grad_norm": 3.2785990238189697, "learning_rate": 1.3828611264985887e-05, "loss": 0.4859, "step": 19372 }, { "epoch": 3.1624423492918656, "grad_norm": 3.79667592048645, "learning_rate": 1.3828018966942287e-05, "loss": 0.4951, "step": 19373 }, { "epoch": 3.16260560793437, "grad_norm": 3.897559642791748, "learning_rate": 1.3827426653163145e-05, "loss": 0.5102, "step": 19374 }, { "epoch": 3.1627688665768745, "grad_norm": 3.486729860305786, "learning_rate": 1.3826834323650899e-05, "loss": 0.5041, "step": 19375 }, { "epoch": 3.162932125219379, "grad_norm": 3.284621238708496, "learning_rate": 1.3826241978407981e-05, "loss": 0.5445, "step": 19376 }, { "epoch": 3.1630953838618834, "grad_norm": 3.2026121616363525, "learning_rate": 1.382564961743683e-05, "loss": 0.4902, "step": 19377 }, { "epoch": 3.1632586425043874, "grad_norm": 3.186338424682617, "learning_rate": 1.3825057240739874e-05, "loss": 0.5451, "step": 19378 }, { "epoch": 3.163421901146892, "grad_norm": 3.686251640319824, "learning_rate": 1.3824464848319554e-05, "loss": 0.5191, "step": 19379 }, { "epoch": 3.1635851597893963, "grad_norm": 3.0284347534179688, "learning_rate": 1.3823872440178303e-05, "loss": 0.4568, "step": 19380 }, { "epoch": 3.1637484184319007, "grad_norm": 3.0679123401641846, "learning_rate": 1.3823280016318554e-05, "loss": 0.4397, "step": 19381 }, { "epoch": 3.163911677074405, "grad_norm": 3.5496151447296143, "learning_rate": 1.3822687576742748e-05, "loss": 0.5601, "step": 19382 }, { "epoch": 3.1640749357169096, "grad_norm": 3.261013984680176, "learning_rate": 1.3822095121453316e-05, "loss": 0.5188, "step": 19383 }, { "epoch": 3.164238194359414, "grad_norm": 2.724949598312378, "learning_rate": 1.3821502650452692e-05, "loss": 0.4293, "step": 19384 }, { "epoch": 3.1644014530019184, "grad_norm": 3.2308332920074463, "learning_rate": 1.3820910163743314e-05, "loss": 0.4608, "step": 19385 }, { "epoch": 3.164564711644423, "grad_norm": 3.0865657329559326, "learning_rate": 1.382031766132762e-05, "loss": 0.4426, "step": 19386 }, { "epoch": 3.164727970286927, "grad_norm": 3.388496160507202, "learning_rate": 1.3819725143208041e-05, "loss": 0.5408, "step": 19387 }, { "epoch": 3.1648912289294313, "grad_norm": 3.010249137878418, "learning_rate": 1.3819132609387016e-05, "loss": 0.4361, "step": 19388 }, { "epoch": 3.1650544875719357, "grad_norm": 2.8742053508758545, "learning_rate": 1.3818540059866973e-05, "loss": 0.4246, "step": 19389 }, { "epoch": 3.16521774621444, "grad_norm": 3.461219549179077, "learning_rate": 1.3817947494650359e-05, "loss": 0.443, "step": 19390 }, { "epoch": 3.1653810048569446, "grad_norm": 3.1689982414245605, "learning_rate": 1.3817354913739602e-05, "loss": 0.502, "step": 19391 }, { "epoch": 3.165544263499449, "grad_norm": 2.672175884246826, "learning_rate": 1.3816762317137142e-05, "loss": 0.3904, "step": 19392 }, { "epoch": 3.1657075221419535, "grad_norm": 3.0498318672180176, "learning_rate": 1.3816169704845414e-05, "loss": 0.5247, "step": 19393 }, { "epoch": 3.165870780784458, "grad_norm": 3.5064890384674072, "learning_rate": 1.3815577076866853e-05, "loss": 0.5388, "step": 19394 }, { "epoch": 3.166034039426962, "grad_norm": 3.0390026569366455, "learning_rate": 1.3814984433203893e-05, "loss": 0.5547, "step": 19395 }, { "epoch": 3.1661972980694664, "grad_norm": 2.9371373653411865, "learning_rate": 1.3814391773858974e-05, "loss": 0.4513, "step": 19396 }, { "epoch": 3.166360556711971, "grad_norm": 3.0130438804626465, "learning_rate": 1.381379909883453e-05, "loss": 0.4553, "step": 19397 }, { "epoch": 3.1665238153544752, "grad_norm": 2.7648277282714844, "learning_rate": 1.3813206408133e-05, "loss": 0.4679, "step": 19398 }, { "epoch": 3.1666870739969797, "grad_norm": 3.3065378665924072, "learning_rate": 1.3812613701756816e-05, "loss": 0.5447, "step": 19399 }, { "epoch": 3.166850332639484, "grad_norm": 3.0349764823913574, "learning_rate": 1.3812020979708418e-05, "loss": 0.4914, "step": 19400 }, { "epoch": 3.1670135912819886, "grad_norm": 2.714477300643921, "learning_rate": 1.381142824199024e-05, "loss": 0.4193, "step": 19401 }, { "epoch": 3.167176849924493, "grad_norm": 2.6043083667755127, "learning_rate": 1.381083548860472e-05, "loss": 0.3661, "step": 19402 }, { "epoch": 3.1673401085669974, "grad_norm": 2.944786787033081, "learning_rate": 1.3810242719554296e-05, "loss": 0.4274, "step": 19403 }, { "epoch": 3.167503367209502, "grad_norm": 3.2635767459869385, "learning_rate": 1.3809649934841402e-05, "loss": 0.5052, "step": 19404 }, { "epoch": 3.167666625852006, "grad_norm": 3.1410653591156006, "learning_rate": 1.3809057134468475e-05, "loss": 0.48, "step": 19405 }, { "epoch": 3.1678298844945103, "grad_norm": 3.104199171066284, "learning_rate": 1.380846431843795e-05, "loss": 0.5036, "step": 19406 }, { "epoch": 3.1679931431370147, "grad_norm": 3.3236899375915527, "learning_rate": 1.3807871486752272e-05, "loss": 0.5086, "step": 19407 }, { "epoch": 3.168156401779519, "grad_norm": 3.1329848766326904, "learning_rate": 1.3807278639413868e-05, "loss": 0.3921, "step": 19408 }, { "epoch": 3.1683196604220236, "grad_norm": 3.757582902908325, "learning_rate": 1.3806685776425179e-05, "loss": 0.5973, "step": 19409 }, { "epoch": 3.168482919064528, "grad_norm": 2.924752950668335, "learning_rate": 1.3806092897788643e-05, "loss": 0.4462, "step": 19410 }, { "epoch": 3.1686461777070325, "grad_norm": 2.792098045349121, "learning_rate": 1.3805500003506698e-05, "loss": 0.4054, "step": 19411 }, { "epoch": 3.168809436349537, "grad_norm": 3.3263144493103027, "learning_rate": 1.3804907093581779e-05, "loss": 0.4898, "step": 19412 }, { "epoch": 3.168972694992041, "grad_norm": 3.102403402328491, "learning_rate": 1.3804314168016321e-05, "loss": 0.4771, "step": 19413 }, { "epoch": 3.1691359536345454, "grad_norm": 3.4896960258483887, "learning_rate": 1.3803721226812766e-05, "loss": 0.4759, "step": 19414 }, { "epoch": 3.16929921227705, "grad_norm": 3.267589807510376, "learning_rate": 1.380312826997355e-05, "loss": 0.5087, "step": 19415 }, { "epoch": 3.1694624709195542, "grad_norm": 2.934627056121826, "learning_rate": 1.3802535297501108e-05, "loss": 0.419, "step": 19416 }, { "epoch": 3.1696257295620587, "grad_norm": 3.067260980606079, "learning_rate": 1.3801942309397878e-05, "loss": 0.4293, "step": 19417 }, { "epoch": 3.169788988204563, "grad_norm": 3.749145746231079, "learning_rate": 1.3801349305666301e-05, "loss": 0.5867, "step": 19418 }, { "epoch": 3.1699522468470676, "grad_norm": 3.2907586097717285, "learning_rate": 1.3800756286308811e-05, "loss": 0.4215, "step": 19419 }, { "epoch": 3.170115505489572, "grad_norm": 2.461421251296997, "learning_rate": 1.380016325132785e-05, "loss": 0.393, "step": 19420 }, { "epoch": 3.1702787641320764, "grad_norm": 3.609262704849243, "learning_rate": 1.379957020072585e-05, "loss": 0.4903, "step": 19421 }, { "epoch": 3.1704420227745804, "grad_norm": 3.51957631111145, "learning_rate": 1.379897713450525e-05, "loss": 0.4716, "step": 19422 }, { "epoch": 3.170605281417085, "grad_norm": 3.3506956100463867, "learning_rate": 1.3798384052668495e-05, "loss": 0.4081, "step": 19423 }, { "epoch": 3.1707685400595893, "grad_norm": 2.685702323913574, "learning_rate": 1.3797790955218014e-05, "loss": 0.3735, "step": 19424 }, { "epoch": 3.1709317987020937, "grad_norm": 3.9662089347839355, "learning_rate": 1.3797197842156247e-05, "loss": 0.5986, "step": 19425 }, { "epoch": 3.171095057344598, "grad_norm": 3.58266019821167, "learning_rate": 1.3796604713485635e-05, "loss": 0.4137, "step": 19426 }, { "epoch": 3.1712583159871026, "grad_norm": 2.8715031147003174, "learning_rate": 1.3796011569208614e-05, "loss": 0.456, "step": 19427 }, { "epoch": 3.171421574629607, "grad_norm": 3.4300174713134766, "learning_rate": 1.3795418409327624e-05, "loss": 0.4941, "step": 19428 }, { "epoch": 3.1715848332721115, "grad_norm": 3.279202699661255, "learning_rate": 1.3794825233845101e-05, "loss": 0.4648, "step": 19429 }, { "epoch": 3.171748091914616, "grad_norm": 2.6523098945617676, "learning_rate": 1.3794232042763485e-05, "loss": 0.458, "step": 19430 }, { "epoch": 3.17191135055712, "grad_norm": 3.6500370502471924, "learning_rate": 1.3793638836085213e-05, "loss": 0.5612, "step": 19431 }, { "epoch": 3.1720746091996244, "grad_norm": 2.5702805519104004, "learning_rate": 1.3793045613812728e-05, "loss": 0.3927, "step": 19432 }, { "epoch": 3.172237867842129, "grad_norm": 3.436835765838623, "learning_rate": 1.3792452375948458e-05, "loss": 0.4877, "step": 19433 }, { "epoch": 3.1724011264846332, "grad_norm": 2.9152326583862305, "learning_rate": 1.3791859122494853e-05, "loss": 0.4517, "step": 19434 }, { "epoch": 3.1725643851271377, "grad_norm": 3.5335495471954346, "learning_rate": 1.3791265853454345e-05, "loss": 0.5071, "step": 19435 }, { "epoch": 3.172727643769642, "grad_norm": 2.328120470046997, "learning_rate": 1.3790672568829375e-05, "loss": 0.4019, "step": 19436 }, { "epoch": 3.1728909024121466, "grad_norm": 3.0152010917663574, "learning_rate": 1.3790079268622382e-05, "loss": 0.449, "step": 19437 }, { "epoch": 3.173054161054651, "grad_norm": 3.3529012203216553, "learning_rate": 1.3789485952835805e-05, "loss": 0.4678, "step": 19438 }, { "epoch": 3.1732174196971554, "grad_norm": 3.358201742172241, "learning_rate": 1.378889262147208e-05, "loss": 0.5263, "step": 19439 }, { "epoch": 3.1733806783396594, "grad_norm": 2.2270596027374268, "learning_rate": 1.378829927453365e-05, "loss": 0.4104, "step": 19440 }, { "epoch": 3.173543936982164, "grad_norm": 3.007349967956543, "learning_rate": 1.378770591202295e-05, "loss": 0.4079, "step": 19441 }, { "epoch": 3.1737071956246683, "grad_norm": 2.671645402908325, "learning_rate": 1.3787112533942426e-05, "loss": 0.3562, "step": 19442 }, { "epoch": 3.1738704542671727, "grad_norm": 3.424144744873047, "learning_rate": 1.3786519140294507e-05, "loss": 0.4358, "step": 19443 }, { "epoch": 3.174033712909677, "grad_norm": 3.253551959991455, "learning_rate": 1.3785925731081638e-05, "loss": 0.4782, "step": 19444 }, { "epoch": 3.1741969715521816, "grad_norm": 3.357569456100464, "learning_rate": 1.3785332306306258e-05, "loss": 0.4904, "step": 19445 }, { "epoch": 3.174360230194686, "grad_norm": 4.215653419494629, "learning_rate": 1.3784738865970811e-05, "loss": 0.5698, "step": 19446 }, { "epoch": 3.1745234888371905, "grad_norm": 3.218761682510376, "learning_rate": 1.3784145410077727e-05, "loss": 0.4951, "step": 19447 }, { "epoch": 3.1746867474796945, "grad_norm": 3.7771689891815186, "learning_rate": 1.3783551938629451e-05, "loss": 0.6376, "step": 19448 }, { "epoch": 3.174850006122199, "grad_norm": 3.2024049758911133, "learning_rate": 1.3782958451628423e-05, "loss": 0.4867, "step": 19449 }, { "epoch": 3.1750132647647034, "grad_norm": 2.6798019409179688, "learning_rate": 1.3782364949077078e-05, "loss": 0.3828, "step": 19450 }, { "epoch": 3.175176523407208, "grad_norm": 3.6345725059509277, "learning_rate": 1.3781771430977861e-05, "loss": 0.4717, "step": 19451 }, { "epoch": 3.1753397820497122, "grad_norm": 2.4218461513519287, "learning_rate": 1.3781177897333211e-05, "loss": 0.3643, "step": 19452 }, { "epoch": 3.1755030406922167, "grad_norm": 3.072251796722412, "learning_rate": 1.3780584348145565e-05, "loss": 0.4186, "step": 19453 }, { "epoch": 3.175666299334721, "grad_norm": 3.2426698207855225, "learning_rate": 1.3779990783417366e-05, "loss": 0.4499, "step": 19454 }, { "epoch": 3.1758295579772255, "grad_norm": 3.17093825340271, "learning_rate": 1.377939720315105e-05, "loss": 0.4378, "step": 19455 }, { "epoch": 3.17599281661973, "grad_norm": 3.2158076763153076, "learning_rate": 1.3778803607349058e-05, "loss": 0.4809, "step": 19456 }, { "epoch": 3.1761560752622344, "grad_norm": 2.8631439208984375, "learning_rate": 1.3778209996013834e-05, "loss": 0.4091, "step": 19457 }, { "epoch": 3.1763193339047384, "grad_norm": 2.9700846672058105, "learning_rate": 1.3777616369147814e-05, "loss": 0.4203, "step": 19458 }, { "epoch": 3.176482592547243, "grad_norm": 3.1052072048187256, "learning_rate": 1.377702272675344e-05, "loss": 0.4617, "step": 19459 }, { "epoch": 3.1766458511897473, "grad_norm": 2.9744837284088135, "learning_rate": 1.377642906883315e-05, "loss": 0.3954, "step": 19460 }, { "epoch": 3.1768091098322517, "grad_norm": 2.766819477081299, "learning_rate": 1.3775835395389388e-05, "loss": 0.396, "step": 19461 }, { "epoch": 3.176972368474756, "grad_norm": 3.3832547664642334, "learning_rate": 1.377524170642459e-05, "loss": 0.5464, "step": 19462 }, { "epoch": 3.1771356271172606, "grad_norm": 3.1301615238189697, "learning_rate": 1.3774648001941203e-05, "loss": 0.4737, "step": 19463 }, { "epoch": 3.177298885759765, "grad_norm": 2.6480844020843506, "learning_rate": 1.377405428194166e-05, "loss": 0.3941, "step": 19464 }, { "epoch": 3.1774621444022695, "grad_norm": 2.9673922061920166, "learning_rate": 1.3773460546428408e-05, "loss": 0.4386, "step": 19465 }, { "epoch": 3.1776254030447735, "grad_norm": 2.4757862091064453, "learning_rate": 1.377286679540388e-05, "loss": 0.3676, "step": 19466 }, { "epoch": 3.177788661687278, "grad_norm": 3.4612340927124023, "learning_rate": 1.3772273028870525e-05, "loss": 0.5026, "step": 19467 }, { "epoch": 3.1779519203297824, "grad_norm": 2.9843947887420654, "learning_rate": 1.3771679246830778e-05, "loss": 0.432, "step": 19468 }, { "epoch": 3.178115178972287, "grad_norm": 3.1550559997558594, "learning_rate": 1.3771085449287083e-05, "loss": 0.4632, "step": 19469 }, { "epoch": 3.1782784376147912, "grad_norm": 3.21042537689209, "learning_rate": 1.377049163624188e-05, "loss": 0.4554, "step": 19470 }, { "epoch": 3.1784416962572957, "grad_norm": 3.2490134239196777, "learning_rate": 1.3769897807697608e-05, "loss": 0.444, "step": 19471 }, { "epoch": 3.1786049548998, "grad_norm": 3.182410478591919, "learning_rate": 1.3769303963656711e-05, "loss": 0.5191, "step": 19472 }, { "epoch": 3.1787682135423045, "grad_norm": 2.952540159225464, "learning_rate": 1.3768710104121628e-05, "loss": 0.4313, "step": 19473 }, { "epoch": 3.178931472184809, "grad_norm": 3.304337978363037, "learning_rate": 1.37681162290948e-05, "loss": 0.5178, "step": 19474 }, { "epoch": 3.179094730827313, "grad_norm": 4.404248237609863, "learning_rate": 1.3767522338578671e-05, "loss": 0.4743, "step": 19475 }, { "epoch": 3.1792579894698174, "grad_norm": 3.432497024536133, "learning_rate": 1.3766928432575681e-05, "loss": 0.5641, "step": 19476 }, { "epoch": 3.179421248112322, "grad_norm": 2.6594433784484863, "learning_rate": 1.3766334511088266e-05, "loss": 0.406, "step": 19477 }, { "epoch": 3.1795845067548263, "grad_norm": 2.953000068664551, "learning_rate": 1.3765740574118876e-05, "loss": 0.4029, "step": 19478 }, { "epoch": 3.1797477653973307, "grad_norm": 3.6462249755859375, "learning_rate": 1.3765146621669944e-05, "loss": 0.5385, "step": 19479 }, { "epoch": 3.179911024039835, "grad_norm": 3.491415023803711, "learning_rate": 1.376455265374392e-05, "loss": 0.4482, "step": 19480 }, { "epoch": 3.1800742826823396, "grad_norm": 3.2757985591888428, "learning_rate": 1.3763958670343242e-05, "loss": 0.4377, "step": 19481 }, { "epoch": 3.180237541324844, "grad_norm": 2.4338834285736084, "learning_rate": 1.3763364671470352e-05, "loss": 0.3926, "step": 19482 }, { "epoch": 3.180400799967348, "grad_norm": 3.2004506587982178, "learning_rate": 1.3762770657127687e-05, "loss": 0.4834, "step": 19483 }, { "epoch": 3.1805640586098525, "grad_norm": 3.7003042697906494, "learning_rate": 1.3762176627317691e-05, "loss": 0.5239, "step": 19484 }, { "epoch": 3.180727317252357, "grad_norm": 3.0179390907287598, "learning_rate": 1.3761582582042811e-05, "loss": 0.4044, "step": 19485 }, { "epoch": 3.1808905758948613, "grad_norm": 3.3888232707977295, "learning_rate": 1.3760988521305487e-05, "loss": 0.5454, "step": 19486 }, { "epoch": 3.181053834537366, "grad_norm": 2.7524161338806152, "learning_rate": 1.3760394445108158e-05, "loss": 0.3951, "step": 19487 }, { "epoch": 3.1812170931798702, "grad_norm": 3.2375593185424805, "learning_rate": 1.3759800353453268e-05, "loss": 0.4978, "step": 19488 }, { "epoch": 3.1813803518223747, "grad_norm": 3.1519205570220947, "learning_rate": 1.3759206246343253e-05, "loss": 0.46, "step": 19489 }, { "epoch": 3.181543610464879, "grad_norm": 2.907975435256958, "learning_rate": 1.3758612123780569e-05, "loss": 0.4318, "step": 19490 }, { "epoch": 3.1817068691073835, "grad_norm": 3.2824866771698, "learning_rate": 1.3758017985767645e-05, "loss": 0.4826, "step": 19491 }, { "epoch": 3.181870127749888, "grad_norm": 2.96297550201416, "learning_rate": 1.3757423832306926e-05, "loss": 0.4015, "step": 19492 }, { "epoch": 3.182033386392392, "grad_norm": 2.609302520751953, "learning_rate": 1.375682966340086e-05, "loss": 0.4149, "step": 19493 }, { "epoch": 3.1821966450348964, "grad_norm": 3.0456767082214355, "learning_rate": 1.3756235479051882e-05, "loss": 0.4849, "step": 19494 }, { "epoch": 3.182359903677401, "grad_norm": 3.6279165744781494, "learning_rate": 1.3755641279262444e-05, "loss": 0.4815, "step": 19495 }, { "epoch": 3.1825231623199053, "grad_norm": 3.4291224479675293, "learning_rate": 1.3755047064034979e-05, "loss": 0.5259, "step": 19496 }, { "epoch": 3.1826864209624097, "grad_norm": 2.5584707260131836, "learning_rate": 1.3754452833371934e-05, "loss": 0.3876, "step": 19497 }, { "epoch": 3.182849679604914, "grad_norm": 3.349515199661255, "learning_rate": 1.3753858587275753e-05, "loss": 0.5312, "step": 19498 }, { "epoch": 3.1830129382474186, "grad_norm": 2.9601964950561523, "learning_rate": 1.3753264325748874e-05, "loss": 0.4758, "step": 19499 }, { "epoch": 3.183176196889923, "grad_norm": 3.4271459579467773, "learning_rate": 1.3752670048793744e-05, "loss": 0.4742, "step": 19500 }, { "epoch": 3.183339455532427, "grad_norm": 3.2449400424957275, "learning_rate": 1.3752075756412804e-05, "loss": 0.5076, "step": 19501 }, { "epoch": 3.1835027141749315, "grad_norm": 3.906176805496216, "learning_rate": 1.3751481448608497e-05, "loss": 0.4992, "step": 19502 }, { "epoch": 3.183665972817436, "grad_norm": 3.421959638595581, "learning_rate": 1.3750887125383265e-05, "loss": 0.5354, "step": 19503 }, { "epoch": 3.1838292314599403, "grad_norm": 2.966932773590088, "learning_rate": 1.3750292786739554e-05, "loss": 0.4837, "step": 19504 }, { "epoch": 3.183992490102445, "grad_norm": 2.9877874851226807, "learning_rate": 1.3749698432679806e-05, "loss": 0.4389, "step": 19505 }, { "epoch": 3.184155748744949, "grad_norm": 3.1517953872680664, "learning_rate": 1.3749104063206464e-05, "loss": 0.414, "step": 19506 }, { "epoch": 3.1843190073874537, "grad_norm": 3.170168161392212, "learning_rate": 1.3748509678321969e-05, "loss": 0.4318, "step": 19507 }, { "epoch": 3.184482266029958, "grad_norm": 3.3106095790863037, "learning_rate": 1.3747915278028767e-05, "loss": 0.4829, "step": 19508 }, { "epoch": 3.1846455246724625, "grad_norm": 3.143361806869507, "learning_rate": 1.3747320862329301e-05, "loss": 0.4453, "step": 19509 }, { "epoch": 3.1848087833149665, "grad_norm": 3.739095687866211, "learning_rate": 1.3746726431226015e-05, "loss": 0.4662, "step": 19510 }, { "epoch": 3.184972041957471, "grad_norm": 2.9984915256500244, "learning_rate": 1.3746131984721346e-05, "loss": 0.4199, "step": 19511 }, { "epoch": 3.1851353005999754, "grad_norm": 2.6696584224700928, "learning_rate": 1.374553752281775e-05, "loss": 0.416, "step": 19512 }, { "epoch": 3.18529855924248, "grad_norm": 3.3234379291534424, "learning_rate": 1.3744943045517659e-05, "loss": 0.5459, "step": 19513 }, { "epoch": 3.1854618178849843, "grad_norm": 2.930182695388794, "learning_rate": 1.3744348552823522e-05, "loss": 0.4376, "step": 19514 }, { "epoch": 3.1856250765274887, "grad_norm": 3.467045783996582, "learning_rate": 1.3743754044737783e-05, "loss": 0.5585, "step": 19515 }, { "epoch": 3.185788335169993, "grad_norm": 3.1999752521514893, "learning_rate": 1.3743159521262883e-05, "loss": 0.5179, "step": 19516 }, { "epoch": 3.1859515938124976, "grad_norm": 2.924870014190674, "learning_rate": 1.374256498240127e-05, "loss": 0.3815, "step": 19517 }, { "epoch": 3.186114852455002, "grad_norm": 3.517235517501831, "learning_rate": 1.3741970428155383e-05, "loss": 0.5475, "step": 19518 }, { "epoch": 3.186278111097506, "grad_norm": 2.5984761714935303, "learning_rate": 1.374137585852767e-05, "loss": 0.4109, "step": 19519 }, { "epoch": 3.1864413697400105, "grad_norm": 3.5367045402526855, "learning_rate": 1.3740781273520573e-05, "loss": 0.5062, "step": 19520 }, { "epoch": 3.186604628382515, "grad_norm": 3.1941192150115967, "learning_rate": 1.3740186673136537e-05, "loss": 0.4339, "step": 19521 }, { "epoch": 3.1867678870250193, "grad_norm": 3.9055631160736084, "learning_rate": 1.3739592057378005e-05, "loss": 0.5111, "step": 19522 }, { "epoch": 3.186931145667524, "grad_norm": 3.031395435333252, "learning_rate": 1.3738997426247422e-05, "loss": 0.417, "step": 19523 }, { "epoch": 3.187094404310028, "grad_norm": 3.6679935455322266, "learning_rate": 1.3738402779747233e-05, "loss": 0.5596, "step": 19524 }, { "epoch": 3.1872576629525327, "grad_norm": 2.849057197570801, "learning_rate": 1.3737808117879886e-05, "loss": 0.3681, "step": 19525 }, { "epoch": 3.187420921595037, "grad_norm": 2.8264150619506836, "learning_rate": 1.3737213440647817e-05, "loss": 0.4284, "step": 19526 }, { "epoch": 3.1875841802375415, "grad_norm": 3.0576210021972656, "learning_rate": 1.3736618748053472e-05, "loss": 0.4639, "step": 19527 }, { "epoch": 3.1877474388800455, "grad_norm": 3.1679625511169434, "learning_rate": 1.3736024040099301e-05, "loss": 0.4656, "step": 19528 }, { "epoch": 3.18791069752255, "grad_norm": 3.088937520980835, "learning_rate": 1.3735429316787747e-05, "loss": 0.4721, "step": 19529 }, { "epoch": 3.1880739561650544, "grad_norm": 2.8231282234191895, "learning_rate": 1.3734834578121255e-05, "loss": 0.3806, "step": 19530 }, { "epoch": 3.188237214807559, "grad_norm": 3.3299272060394287, "learning_rate": 1.3734239824102264e-05, "loss": 0.4936, "step": 19531 }, { "epoch": 3.1884004734500633, "grad_norm": 3.490903615951538, "learning_rate": 1.3733645054733228e-05, "loss": 0.4318, "step": 19532 }, { "epoch": 3.1885637320925677, "grad_norm": 3.752730369567871, "learning_rate": 1.373305027001658e-05, "loss": 0.5625, "step": 19533 }, { "epoch": 3.188726990735072, "grad_norm": 3.494485378265381, "learning_rate": 1.3732455469954777e-05, "loss": 0.4775, "step": 19534 }, { "epoch": 3.1888902493775766, "grad_norm": 3.809258222579956, "learning_rate": 1.373186065455026e-05, "loss": 0.609, "step": 19535 }, { "epoch": 3.1890535080200806, "grad_norm": 3.270876884460449, "learning_rate": 1.373126582380547e-05, "loss": 0.556, "step": 19536 }, { "epoch": 3.189216766662585, "grad_norm": 2.7451462745666504, "learning_rate": 1.3730670977722855e-05, "loss": 0.3734, "step": 19537 }, { "epoch": 3.1893800253050895, "grad_norm": 3.7788634300231934, "learning_rate": 1.3730076116304861e-05, "loss": 0.5109, "step": 19538 }, { "epoch": 3.189543283947594, "grad_norm": 2.855191946029663, "learning_rate": 1.3729481239553934e-05, "loss": 0.5039, "step": 19539 }, { "epoch": 3.1897065425900983, "grad_norm": 2.7999444007873535, "learning_rate": 1.3728886347472517e-05, "loss": 0.38, "step": 19540 }, { "epoch": 3.1898698012326028, "grad_norm": 3.2628982067108154, "learning_rate": 1.3728291440063055e-05, "loss": 0.3883, "step": 19541 }, { "epoch": 3.190033059875107, "grad_norm": 3.6365342140197754, "learning_rate": 1.3727696517327995e-05, "loss": 0.527, "step": 19542 }, { "epoch": 3.1901963185176117, "grad_norm": 3.051945924758911, "learning_rate": 1.3727101579269784e-05, "loss": 0.468, "step": 19543 }, { "epoch": 3.190359577160116, "grad_norm": 3.2730813026428223, "learning_rate": 1.3726506625890862e-05, "loss": 0.4594, "step": 19544 }, { "epoch": 3.1905228358026205, "grad_norm": 2.7031121253967285, "learning_rate": 1.3725911657193682e-05, "loss": 0.4545, "step": 19545 }, { "epoch": 3.1906860944451245, "grad_norm": 3.11234188079834, "learning_rate": 1.3725316673180684e-05, "loss": 0.4057, "step": 19546 }, { "epoch": 3.190849353087629, "grad_norm": 3.5361709594726562, "learning_rate": 1.3724721673854317e-05, "loss": 0.5793, "step": 19547 }, { "epoch": 3.1910126117301334, "grad_norm": 3.0952067375183105, "learning_rate": 1.3724126659217026e-05, "loss": 0.4257, "step": 19548 }, { "epoch": 3.191175870372638, "grad_norm": 3.7362029552459717, "learning_rate": 1.3723531629271256e-05, "loss": 0.578, "step": 19549 }, { "epoch": 3.1913391290151423, "grad_norm": 2.8720545768737793, "learning_rate": 1.3722936584019453e-05, "loss": 0.4533, "step": 19550 }, { "epoch": 3.1915023876576467, "grad_norm": 3.1762914657592773, "learning_rate": 1.3722341523464064e-05, "loss": 0.5374, "step": 19551 }, { "epoch": 3.191665646300151, "grad_norm": 2.967371940612793, "learning_rate": 1.3721746447607534e-05, "loss": 0.5221, "step": 19552 }, { "epoch": 3.1918289049426556, "grad_norm": 3.066943407058716, "learning_rate": 1.372115135645231e-05, "loss": 0.4398, "step": 19553 }, { "epoch": 3.1919921635851596, "grad_norm": 3.225285768508911, "learning_rate": 1.3720556250000838e-05, "loss": 0.4798, "step": 19554 }, { "epoch": 3.192155422227664, "grad_norm": 3.310488700866699, "learning_rate": 1.3719961128255563e-05, "loss": 0.5062, "step": 19555 }, { "epoch": 3.1923186808701685, "grad_norm": 3.218207597732544, "learning_rate": 1.3719365991218933e-05, "loss": 0.4347, "step": 19556 }, { "epoch": 3.192481939512673, "grad_norm": 3.2851109504699707, "learning_rate": 1.3718770838893395e-05, "loss": 0.4585, "step": 19557 }, { "epoch": 3.1926451981551773, "grad_norm": 3.9377622604370117, "learning_rate": 1.3718175671281393e-05, "loss": 0.4435, "step": 19558 }, { "epoch": 3.1928084567976818, "grad_norm": 3.1113429069519043, "learning_rate": 1.3717580488385378e-05, "loss": 0.4898, "step": 19559 }, { "epoch": 3.192971715440186, "grad_norm": 3.018247365951538, "learning_rate": 1.3716985290207787e-05, "loss": 0.4737, "step": 19560 }, { "epoch": 3.1931349740826906, "grad_norm": 3.255955219268799, "learning_rate": 1.3716390076751079e-05, "loss": 0.4917, "step": 19561 }, { "epoch": 3.193298232725195, "grad_norm": 2.4713313579559326, "learning_rate": 1.3715794848017691e-05, "loss": 0.4186, "step": 19562 }, { "epoch": 3.193461491367699, "grad_norm": 2.8765902519226074, "learning_rate": 1.3715199604010075e-05, "loss": 0.4062, "step": 19563 }, { "epoch": 3.1936247500102035, "grad_norm": 3.5367558002471924, "learning_rate": 1.3714604344730678e-05, "loss": 0.499, "step": 19564 }, { "epoch": 3.193788008652708, "grad_norm": 3.241081953048706, "learning_rate": 1.3714009070181939e-05, "loss": 0.464, "step": 19565 }, { "epoch": 3.1939512672952124, "grad_norm": 2.9029362201690674, "learning_rate": 1.3713413780366317e-05, "loss": 0.4309, "step": 19566 }, { "epoch": 3.194114525937717, "grad_norm": 3.224933385848999, "learning_rate": 1.371281847528625e-05, "loss": 0.4706, "step": 19567 }, { "epoch": 3.1942777845802213, "grad_norm": 2.9049839973449707, "learning_rate": 1.3712223154944192e-05, "loss": 0.4545, "step": 19568 }, { "epoch": 3.1944410432227257, "grad_norm": 3.6385600566864014, "learning_rate": 1.3711627819342583e-05, "loss": 0.607, "step": 19569 }, { "epoch": 3.19460430186523, "grad_norm": 3.1558074951171875, "learning_rate": 1.3711032468483873e-05, "loss": 0.4251, "step": 19570 }, { "epoch": 3.1947675605077346, "grad_norm": 3.051395893096924, "learning_rate": 1.3710437102370511e-05, "loss": 0.4502, "step": 19571 }, { "epoch": 3.1949308191502386, "grad_norm": 4.128633499145508, "learning_rate": 1.3709841721004944e-05, "loss": 0.605, "step": 19572 }, { "epoch": 3.195094077792743, "grad_norm": 3.0117411613464355, "learning_rate": 1.3709246324389616e-05, "loss": 0.4537, "step": 19573 }, { "epoch": 3.1952573364352475, "grad_norm": 3.0395326614379883, "learning_rate": 1.3708650912526981e-05, "loss": 0.4665, "step": 19574 }, { "epoch": 3.195420595077752, "grad_norm": 3.26064133644104, "learning_rate": 1.370805548541948e-05, "loss": 0.5, "step": 19575 }, { "epoch": 3.1955838537202563, "grad_norm": 2.8437533378601074, "learning_rate": 1.3707460043069563e-05, "loss": 0.3882, "step": 19576 }, { "epoch": 3.1957471123627608, "grad_norm": 3.0982344150543213, "learning_rate": 1.3706864585479675e-05, "loss": 0.4356, "step": 19577 }, { "epoch": 3.195910371005265, "grad_norm": 2.953169345855713, "learning_rate": 1.3706269112652271e-05, "loss": 0.4533, "step": 19578 }, { "epoch": 3.1960736296477696, "grad_norm": 2.6470537185668945, "learning_rate": 1.3705673624589792e-05, "loss": 0.3933, "step": 19579 }, { "epoch": 3.196236888290274, "grad_norm": 3.01582407951355, "learning_rate": 1.370507812129469e-05, "loss": 0.464, "step": 19580 }, { "epoch": 3.196400146932778, "grad_norm": 2.989126205444336, "learning_rate": 1.370448260276941e-05, "loss": 0.3973, "step": 19581 }, { "epoch": 3.1965634055752825, "grad_norm": 2.7601678371429443, "learning_rate": 1.3703887069016397e-05, "loss": 0.3494, "step": 19582 }, { "epoch": 3.196726664217787, "grad_norm": 3.3647537231445312, "learning_rate": 1.3703291520038107e-05, "loss": 0.5239, "step": 19583 }, { "epoch": 3.1968899228602914, "grad_norm": 3.4273672103881836, "learning_rate": 1.3702695955836984e-05, "loss": 0.5517, "step": 19584 }, { "epoch": 3.197053181502796, "grad_norm": 3.0780632495880127, "learning_rate": 1.3702100376415474e-05, "loss": 0.4646, "step": 19585 }, { "epoch": 3.1972164401453003, "grad_norm": 3.4535722732543945, "learning_rate": 1.3701504781776026e-05, "loss": 0.5856, "step": 19586 }, { "epoch": 3.1973796987878047, "grad_norm": 3.3552167415618896, "learning_rate": 1.3700909171921092e-05, "loss": 0.4783, "step": 19587 }, { "epoch": 3.197542957430309, "grad_norm": 2.4248151779174805, "learning_rate": 1.370031354685312e-05, "loss": 0.3977, "step": 19588 }, { "epoch": 3.197706216072813, "grad_norm": 3.3295843601226807, "learning_rate": 1.3699717906574552e-05, "loss": 0.461, "step": 19589 }, { "epoch": 3.1978694747153176, "grad_norm": 3.1575064659118652, "learning_rate": 1.3699122251087843e-05, "loss": 0.5001, "step": 19590 }, { "epoch": 3.198032733357822, "grad_norm": 3.684641122817993, "learning_rate": 1.3698526580395438e-05, "loss": 0.5063, "step": 19591 }, { "epoch": 3.1981959920003264, "grad_norm": 2.9190452098846436, "learning_rate": 1.3697930894499786e-05, "loss": 0.435, "step": 19592 }, { "epoch": 3.198359250642831, "grad_norm": 3.4826548099517822, "learning_rate": 1.3697335193403339e-05, "loss": 0.4548, "step": 19593 }, { "epoch": 3.1985225092853353, "grad_norm": 3.449708938598633, "learning_rate": 1.369673947710854e-05, "loss": 0.4823, "step": 19594 }, { "epoch": 3.1986857679278398, "grad_norm": 3.3376526832580566, "learning_rate": 1.3696143745617842e-05, "loss": 0.5379, "step": 19595 }, { "epoch": 3.198849026570344, "grad_norm": 4.244711875915527, "learning_rate": 1.3695547998933693e-05, "loss": 0.518, "step": 19596 }, { "epoch": 3.1990122852128486, "grad_norm": 3.1373963356018066, "learning_rate": 1.3694952237058542e-05, "loss": 0.467, "step": 19597 }, { "epoch": 3.199175543855353, "grad_norm": 3.277344226837158, "learning_rate": 1.369435645999484e-05, "loss": 0.5174, "step": 19598 }, { "epoch": 3.199338802497857, "grad_norm": 3.6010630130767822, "learning_rate": 1.3693760667745029e-05, "loss": 0.5073, "step": 19599 }, { "epoch": 3.1995020611403615, "grad_norm": 2.6063339710235596, "learning_rate": 1.3693164860311565e-05, "loss": 0.419, "step": 19600 }, { "epoch": 3.199665319782866, "grad_norm": 3.309290885925293, "learning_rate": 1.3692569037696895e-05, "loss": 0.4869, "step": 19601 }, { "epoch": 3.1998285784253704, "grad_norm": 2.853283405303955, "learning_rate": 1.3691973199903465e-05, "loss": 0.4152, "step": 19602 }, { "epoch": 3.199991837067875, "grad_norm": 3.153945207595825, "learning_rate": 1.3691377346933732e-05, "loss": 0.4979, "step": 19603 }, { "epoch": 3.2001550957103793, "grad_norm": 2.3597919940948486, "learning_rate": 1.3690781478790136e-05, "loss": 0.3557, "step": 19604 }, { "epoch": 3.2003183543528837, "grad_norm": 3.1863880157470703, "learning_rate": 1.3690185595475133e-05, "loss": 0.4649, "step": 19605 }, { "epoch": 3.200481612995388, "grad_norm": 2.6449532508850098, "learning_rate": 1.3689589696991173e-05, "loss": 0.4334, "step": 19606 }, { "epoch": 3.200644871637892, "grad_norm": 3.6850643157958984, "learning_rate": 1.3688993783340698e-05, "loss": 0.7975, "step": 19607 }, { "epoch": 3.2008081302803966, "grad_norm": 2.96682071685791, "learning_rate": 1.3688397854526168e-05, "loss": 0.4665, "step": 19608 }, { "epoch": 3.200971388922901, "grad_norm": 3.097336769104004, "learning_rate": 1.3687801910550023e-05, "loss": 0.4806, "step": 19609 }, { "epoch": 3.2011346475654054, "grad_norm": 3.21297025680542, "learning_rate": 1.3687205951414717e-05, "loss": 0.4862, "step": 19610 }, { "epoch": 3.20129790620791, "grad_norm": 2.806734085083008, "learning_rate": 1.36866099771227e-05, "loss": 0.4145, "step": 19611 }, { "epoch": 3.2014611648504143, "grad_norm": 3.201080083847046, "learning_rate": 1.368601398767642e-05, "loss": 0.5228, "step": 19612 }, { "epoch": 3.2016244234929188, "grad_norm": 2.077641725540161, "learning_rate": 1.3685417983078334e-05, "loss": 0.3416, "step": 19613 }, { "epoch": 3.201787682135423, "grad_norm": 2.9429233074188232, "learning_rate": 1.3684821963330883e-05, "loss": 0.4476, "step": 19614 }, { "epoch": 3.2019509407779276, "grad_norm": 3.2880935668945312, "learning_rate": 1.3684225928436519e-05, "loss": 0.4779, "step": 19615 }, { "epoch": 3.2021141994204316, "grad_norm": 3.289548635482788, "learning_rate": 1.3683629878397693e-05, "loss": 0.5268, "step": 19616 }, { "epoch": 3.202277458062936, "grad_norm": 3.040581703186035, "learning_rate": 1.3683033813216856e-05, "loss": 0.4395, "step": 19617 }, { "epoch": 3.2024407167054405, "grad_norm": 3.1821861267089844, "learning_rate": 1.3682437732896461e-05, "loss": 0.509, "step": 19618 }, { "epoch": 3.202603975347945, "grad_norm": 2.992260694503784, "learning_rate": 1.3681841637438953e-05, "loss": 0.4608, "step": 19619 }, { "epoch": 3.2027672339904494, "grad_norm": 3.132580280303955, "learning_rate": 1.3681245526846782e-05, "loss": 0.4464, "step": 19620 }, { "epoch": 3.202930492632954, "grad_norm": 2.4066038131713867, "learning_rate": 1.3680649401122401e-05, "loss": 0.331, "step": 19621 }, { "epoch": 3.2030937512754583, "grad_norm": 2.6878929138183594, "learning_rate": 1.368005326026826e-05, "loss": 0.3881, "step": 19622 }, { "epoch": 3.2032570099179627, "grad_norm": 3.2628533840179443, "learning_rate": 1.3679457104286812e-05, "loss": 0.5065, "step": 19623 }, { "epoch": 3.2034202685604667, "grad_norm": 3.2023425102233887, "learning_rate": 1.3678860933180504e-05, "loss": 0.4714, "step": 19624 }, { "epoch": 3.203583527202971, "grad_norm": 3.167956829071045, "learning_rate": 1.3678264746951789e-05, "loss": 0.4094, "step": 19625 }, { "epoch": 3.2037467858454756, "grad_norm": 3.292349100112915, "learning_rate": 1.3677668545603114e-05, "loss": 0.5512, "step": 19626 }, { "epoch": 3.20391004448798, "grad_norm": 3.1353776454925537, "learning_rate": 1.3677072329136935e-05, "loss": 0.4015, "step": 19627 }, { "epoch": 3.2040733031304844, "grad_norm": 2.9567296504974365, "learning_rate": 1.36764760975557e-05, "loss": 0.4662, "step": 19628 }, { "epoch": 3.204236561772989, "grad_norm": 3.5421080589294434, "learning_rate": 1.3675879850861858e-05, "loss": 0.4367, "step": 19629 }, { "epoch": 3.2043998204154933, "grad_norm": 2.3726842403411865, "learning_rate": 1.3675283589057861e-05, "loss": 0.3659, "step": 19630 }, { "epoch": 3.2045630790579978, "grad_norm": 3.2899935245513916, "learning_rate": 1.3674687312146162e-05, "loss": 0.5053, "step": 19631 }, { "epoch": 3.204726337700502, "grad_norm": 3.121394395828247, "learning_rate": 1.3674091020129212e-05, "loss": 0.5422, "step": 19632 }, { "epoch": 3.2048895963430066, "grad_norm": 3.2093544006347656, "learning_rate": 1.367349471300946e-05, "loss": 0.5514, "step": 19633 }, { "epoch": 3.2050528549855106, "grad_norm": 2.645084857940674, "learning_rate": 1.367289839078936e-05, "loss": 0.3849, "step": 19634 }, { "epoch": 3.205216113628015, "grad_norm": 3.341428518295288, "learning_rate": 1.367230205347136e-05, "loss": 0.555, "step": 19635 }, { "epoch": 3.2053793722705195, "grad_norm": 2.9411165714263916, "learning_rate": 1.3671705701057913e-05, "loss": 0.4043, "step": 19636 }, { "epoch": 3.205542630913024, "grad_norm": 3.2506847381591797, "learning_rate": 1.3671109333551473e-05, "loss": 0.4689, "step": 19637 }, { "epoch": 3.2057058895555284, "grad_norm": 2.9456636905670166, "learning_rate": 1.3670512950954486e-05, "loss": 0.465, "step": 19638 }, { "epoch": 3.205869148198033, "grad_norm": 2.4965240955352783, "learning_rate": 1.3669916553269407e-05, "loss": 0.3784, "step": 19639 }, { "epoch": 3.2060324068405373, "grad_norm": 3.033233404159546, "learning_rate": 1.3669320140498686e-05, "loss": 0.4939, "step": 19640 }, { "epoch": 3.2061956654830417, "grad_norm": 2.7869887351989746, "learning_rate": 1.3668723712644776e-05, "loss": 0.4547, "step": 19641 }, { "epoch": 3.2063589241255457, "grad_norm": 2.8781299591064453, "learning_rate": 1.3668127269710128e-05, "loss": 0.4184, "step": 19642 }, { "epoch": 3.20652218276805, "grad_norm": 3.3445632457733154, "learning_rate": 1.3667530811697194e-05, "loss": 0.4175, "step": 19643 }, { "epoch": 3.2066854414105546, "grad_norm": 3.546600103378296, "learning_rate": 1.3666934338608427e-05, "loss": 0.535, "step": 19644 }, { "epoch": 3.206848700053059, "grad_norm": 3.321800470352173, "learning_rate": 1.3666337850446274e-05, "loss": 0.4912, "step": 19645 }, { "epoch": 3.2070119586955634, "grad_norm": 3.4036707878112793, "learning_rate": 1.3665741347213194e-05, "loss": 0.5022, "step": 19646 }, { "epoch": 3.207175217338068, "grad_norm": 3.014169454574585, "learning_rate": 1.3665144828911634e-05, "loss": 0.4734, "step": 19647 }, { "epoch": 3.2073384759805723, "grad_norm": 3.1855592727661133, "learning_rate": 1.3664548295544046e-05, "loss": 0.4366, "step": 19648 }, { "epoch": 3.2075017346230768, "grad_norm": 3.2391128540039062, "learning_rate": 1.3663951747112884e-05, "loss": 0.4828, "step": 19649 }, { "epoch": 3.207664993265581, "grad_norm": 2.5777547359466553, "learning_rate": 1.3663355183620604e-05, "loss": 0.3879, "step": 19650 }, { "epoch": 3.207828251908085, "grad_norm": 2.807612419128418, "learning_rate": 1.366275860506965e-05, "loss": 0.3626, "step": 19651 }, { "epoch": 3.2079915105505896, "grad_norm": 3.2477188110351562, "learning_rate": 1.366216201146248e-05, "loss": 0.479, "step": 19652 }, { "epoch": 3.208154769193094, "grad_norm": 3.279898166656494, "learning_rate": 1.3661565402801545e-05, "loss": 0.5374, "step": 19653 }, { "epoch": 3.2083180278355985, "grad_norm": 3.3483245372772217, "learning_rate": 1.3660968779089295e-05, "loss": 0.4647, "step": 19654 }, { "epoch": 3.208481286478103, "grad_norm": 3.4221599102020264, "learning_rate": 1.3660372140328186e-05, "loss": 0.554, "step": 19655 }, { "epoch": 3.2086445451206074, "grad_norm": 3.2257022857666016, "learning_rate": 1.365977548652067e-05, "loss": 0.4861, "step": 19656 }, { "epoch": 3.208807803763112, "grad_norm": 3.5649261474609375, "learning_rate": 1.3659178817669201e-05, "loss": 0.5294, "step": 19657 }, { "epoch": 3.2089710624056162, "grad_norm": 3.4341447353363037, "learning_rate": 1.3658582133776226e-05, "loss": 0.5018, "step": 19658 }, { "epoch": 3.2091343210481207, "grad_norm": 2.894771099090576, "learning_rate": 1.36579854348442e-05, "loss": 0.4094, "step": 19659 }, { "epoch": 3.2092975796906247, "grad_norm": 2.9740664958953857, "learning_rate": 1.365738872087558e-05, "loss": 0.3942, "step": 19660 }, { "epoch": 3.209460838333129, "grad_norm": 2.866230010986328, "learning_rate": 1.3656791991872815e-05, "loss": 0.4725, "step": 19661 }, { "epoch": 3.2096240969756336, "grad_norm": 3.72462797164917, "learning_rate": 1.3656195247838362e-05, "loss": 0.5248, "step": 19662 }, { "epoch": 3.209787355618138, "grad_norm": 3.3988022804260254, "learning_rate": 1.3655598488774668e-05, "loss": 0.4399, "step": 19663 }, { "epoch": 3.2099506142606424, "grad_norm": 3.224992275238037, "learning_rate": 1.3655001714684188e-05, "loss": 0.5358, "step": 19664 }, { "epoch": 3.210113872903147, "grad_norm": 2.004749298095703, "learning_rate": 1.3654404925569375e-05, "loss": 0.3185, "step": 19665 }, { "epoch": 3.2102771315456513, "grad_norm": 3.631106376647949, "learning_rate": 1.3653808121432688e-05, "loss": 0.6152, "step": 19666 }, { "epoch": 3.2104403901881557, "grad_norm": 4.121649265289307, "learning_rate": 1.3653211302276574e-05, "loss": 0.5461, "step": 19667 }, { "epoch": 3.21060364883066, "grad_norm": 2.851505994796753, "learning_rate": 1.3652614468103487e-05, "loss": 0.3759, "step": 19668 }, { "epoch": 3.210766907473164, "grad_norm": 3.265683174133301, "learning_rate": 1.365201761891588e-05, "loss": 0.5319, "step": 19669 }, { "epoch": 3.2109301661156686, "grad_norm": 3.0350182056427, "learning_rate": 1.365142075471621e-05, "loss": 0.3907, "step": 19670 }, { "epoch": 3.211093424758173, "grad_norm": 3.5096821784973145, "learning_rate": 1.3650823875506925e-05, "loss": 0.5294, "step": 19671 }, { "epoch": 3.2112566834006775, "grad_norm": 2.9978301525115967, "learning_rate": 1.3650226981290481e-05, "loss": 0.4618, "step": 19672 }, { "epoch": 3.211419942043182, "grad_norm": 3.4068591594696045, "learning_rate": 1.3649630072069335e-05, "loss": 0.5599, "step": 19673 }, { "epoch": 3.2115832006856864, "grad_norm": 3.4827349185943604, "learning_rate": 1.3649033147845938e-05, "loss": 0.4633, "step": 19674 }, { "epoch": 3.211746459328191, "grad_norm": 3.0300991535186768, "learning_rate": 1.3648436208622743e-05, "loss": 0.4598, "step": 19675 }, { "epoch": 3.2119097179706952, "grad_norm": 2.962094306945801, "learning_rate": 1.3647839254402202e-05, "loss": 0.4381, "step": 19676 }, { "epoch": 3.2120729766131992, "grad_norm": 2.922424554824829, "learning_rate": 1.3647242285186775e-05, "loss": 0.5065, "step": 19677 }, { "epoch": 3.2122362352557037, "grad_norm": 3.850908041000366, "learning_rate": 1.364664530097891e-05, "loss": 0.4795, "step": 19678 }, { "epoch": 3.212399493898208, "grad_norm": 2.8543314933776855, "learning_rate": 1.3646048301781064e-05, "loss": 0.4676, "step": 19679 }, { "epoch": 3.2125627525407126, "grad_norm": 3.026930570602417, "learning_rate": 1.3645451287595688e-05, "loss": 0.4645, "step": 19680 }, { "epoch": 3.212726011183217, "grad_norm": 3.1074204444885254, "learning_rate": 1.364485425842524e-05, "loss": 0.532, "step": 19681 }, { "epoch": 3.2128892698257214, "grad_norm": 4.303009033203125, "learning_rate": 1.3644257214272171e-05, "loss": 0.631, "step": 19682 }, { "epoch": 3.213052528468226, "grad_norm": 3.4094018936157227, "learning_rate": 1.364366015513894e-05, "loss": 0.53, "step": 19683 }, { "epoch": 3.2132157871107303, "grad_norm": 3.4701175689697266, "learning_rate": 1.3643063081027994e-05, "loss": 0.5607, "step": 19684 }, { "epoch": 3.2133790457532347, "grad_norm": 3.4277875423431396, "learning_rate": 1.3642465991941793e-05, "loss": 0.5004, "step": 19685 }, { "epoch": 3.213542304395739, "grad_norm": 3.0570104122161865, "learning_rate": 1.3641868887882791e-05, "loss": 0.4112, "step": 19686 }, { "epoch": 3.213705563038243, "grad_norm": 2.832559585571289, "learning_rate": 1.3641271768853441e-05, "loss": 0.4158, "step": 19687 }, { "epoch": 3.2138688216807476, "grad_norm": 2.9860072135925293, "learning_rate": 1.3640674634856195e-05, "loss": 0.5049, "step": 19688 }, { "epoch": 3.214032080323252, "grad_norm": 3.337998628616333, "learning_rate": 1.3640077485893512e-05, "loss": 0.4812, "step": 19689 }, { "epoch": 3.2141953389657565, "grad_norm": 3.231905221939087, "learning_rate": 1.3639480321967846e-05, "loss": 0.5871, "step": 19690 }, { "epoch": 3.214358597608261, "grad_norm": 3.0630171298980713, "learning_rate": 1.3638883143081652e-05, "loss": 0.472, "step": 19691 }, { "epoch": 3.2145218562507654, "grad_norm": 3.2573611736297607, "learning_rate": 1.3638285949237378e-05, "loss": 0.4165, "step": 19692 }, { "epoch": 3.21468511489327, "grad_norm": 2.87440824508667, "learning_rate": 1.3637688740437487e-05, "loss": 0.4363, "step": 19693 }, { "epoch": 3.2148483735357742, "grad_norm": 3.034510850906372, "learning_rate": 1.363709151668443e-05, "loss": 0.4673, "step": 19694 }, { "epoch": 3.2150116321782782, "grad_norm": 3.1704626083374023, "learning_rate": 1.3636494277980666e-05, "loss": 0.4928, "step": 19695 }, { "epoch": 3.2151748908207827, "grad_norm": 2.554598093032837, "learning_rate": 1.3635897024328646e-05, "loss": 0.4328, "step": 19696 }, { "epoch": 3.215338149463287, "grad_norm": 3.222377061843872, "learning_rate": 1.3635299755730826e-05, "loss": 0.4241, "step": 19697 }, { "epoch": 3.2155014081057915, "grad_norm": 3.3420937061309814, "learning_rate": 1.3634702472189659e-05, "loss": 0.4391, "step": 19698 }, { "epoch": 3.215664666748296, "grad_norm": 3.5711891651153564, "learning_rate": 1.3634105173707605e-05, "loss": 0.5768, "step": 19699 }, { "epoch": 3.2158279253908004, "grad_norm": 3.363431692123413, "learning_rate": 1.3633507860287116e-05, "loss": 0.4888, "step": 19700 }, { "epoch": 3.215991184033305, "grad_norm": 3.37715220451355, "learning_rate": 1.363291053193065e-05, "loss": 0.5621, "step": 19701 }, { "epoch": 3.2161544426758093, "grad_norm": 2.872476100921631, "learning_rate": 1.363231318864066e-05, "loss": 0.4155, "step": 19702 }, { "epoch": 3.2163177013183137, "grad_norm": 3.133544445037842, "learning_rate": 1.36317158304196e-05, "loss": 0.4592, "step": 19703 }, { "epoch": 3.2164809599608177, "grad_norm": 3.265395164489746, "learning_rate": 1.363111845726993e-05, "loss": 0.4545, "step": 19704 }, { "epoch": 3.216644218603322, "grad_norm": 3.551973342895508, "learning_rate": 1.36305210691941e-05, "loss": 0.4894, "step": 19705 }, { "epoch": 3.2168074772458266, "grad_norm": 3.2592711448669434, "learning_rate": 1.3629923666194573e-05, "loss": 0.4677, "step": 19706 }, { "epoch": 3.216970735888331, "grad_norm": 3.0043084621429443, "learning_rate": 1.3629326248273798e-05, "loss": 0.4766, "step": 19707 }, { "epoch": 3.2171339945308355, "grad_norm": 3.621795892715454, "learning_rate": 1.3628728815434233e-05, "loss": 0.5247, "step": 19708 }, { "epoch": 3.21729725317334, "grad_norm": 3.0877649784088135, "learning_rate": 1.3628131367678333e-05, "loss": 0.4828, "step": 19709 }, { "epoch": 3.2174605118158444, "grad_norm": 3.56363582611084, "learning_rate": 1.3627533905008556e-05, "loss": 0.5364, "step": 19710 }, { "epoch": 3.217623770458349, "grad_norm": 2.832930326461792, "learning_rate": 1.362693642742736e-05, "loss": 0.3723, "step": 19711 }, { "epoch": 3.217787029100853, "grad_norm": 3.2061357498168945, "learning_rate": 1.3626338934937194e-05, "loss": 0.4643, "step": 19712 }, { "epoch": 3.2179502877433572, "grad_norm": 3.278951644897461, "learning_rate": 1.362574142754052e-05, "loss": 0.4253, "step": 19713 }, { "epoch": 3.2181135463858617, "grad_norm": 3.0755906105041504, "learning_rate": 1.362514390523979e-05, "loss": 0.4525, "step": 19714 }, { "epoch": 3.218276805028366, "grad_norm": 3.531290054321289, "learning_rate": 1.3624546368037461e-05, "loss": 0.4878, "step": 19715 }, { "epoch": 3.2184400636708705, "grad_norm": 3.056273937225342, "learning_rate": 1.3623948815935994e-05, "loss": 0.4473, "step": 19716 }, { "epoch": 3.218603322313375, "grad_norm": 3.1872756481170654, "learning_rate": 1.3623351248937842e-05, "loss": 0.4595, "step": 19717 }, { "epoch": 3.2187665809558794, "grad_norm": 3.15649676322937, "learning_rate": 1.3622753667045459e-05, "loss": 0.4679, "step": 19718 }, { "epoch": 3.218929839598384, "grad_norm": 3.2050719261169434, "learning_rate": 1.3622156070261303e-05, "loss": 0.4433, "step": 19719 }, { "epoch": 3.2190930982408883, "grad_norm": 2.65214204788208, "learning_rate": 1.3621558458587834e-05, "loss": 0.4045, "step": 19720 }, { "epoch": 3.2192563568833927, "grad_norm": 2.6397900581359863, "learning_rate": 1.3620960832027504e-05, "loss": 0.4222, "step": 19721 }, { "epoch": 3.2194196155258967, "grad_norm": 2.5498671531677246, "learning_rate": 1.362036319058277e-05, "loss": 0.4338, "step": 19722 }, { "epoch": 3.219582874168401, "grad_norm": 2.857973098754883, "learning_rate": 1.3619765534256091e-05, "loss": 0.457, "step": 19723 }, { "epoch": 3.2197461328109056, "grad_norm": 3.1324033737182617, "learning_rate": 1.3619167863049923e-05, "loss": 0.5124, "step": 19724 }, { "epoch": 3.21990939145341, "grad_norm": 3.6018362045288086, "learning_rate": 1.3618570176966723e-05, "loss": 0.4699, "step": 19725 }, { "epoch": 3.2200726500959145, "grad_norm": 3.035205364227295, "learning_rate": 1.3617972476008948e-05, "loss": 0.392, "step": 19726 }, { "epoch": 3.220235908738419, "grad_norm": 3.2953076362609863, "learning_rate": 1.3617374760179051e-05, "loss": 0.4332, "step": 19727 }, { "epoch": 3.2203991673809234, "grad_norm": 3.5415308475494385, "learning_rate": 1.3616777029479496e-05, "loss": 0.5416, "step": 19728 }, { "epoch": 3.220562426023428, "grad_norm": 3.0239827632904053, "learning_rate": 1.3616179283912734e-05, "loss": 0.4351, "step": 19729 }, { "epoch": 3.220725684665932, "grad_norm": 3.29787015914917, "learning_rate": 1.3615581523481225e-05, "loss": 0.4674, "step": 19730 }, { "epoch": 3.2208889433084362, "grad_norm": 2.8355774879455566, "learning_rate": 1.3614983748187426e-05, "loss": 0.4286, "step": 19731 }, { "epoch": 3.2210522019509407, "grad_norm": 3.573683500289917, "learning_rate": 1.3614385958033793e-05, "loss": 0.5388, "step": 19732 }, { "epoch": 3.221215460593445, "grad_norm": 3.3848917484283447, "learning_rate": 1.3613788153022784e-05, "loss": 0.4679, "step": 19733 }, { "epoch": 3.2213787192359495, "grad_norm": 2.660492420196533, "learning_rate": 1.3613190333156855e-05, "loss": 0.4327, "step": 19734 }, { "epoch": 3.221541977878454, "grad_norm": 3.28784441947937, "learning_rate": 1.3612592498438471e-05, "loss": 0.4769, "step": 19735 }, { "epoch": 3.2217052365209584, "grad_norm": 3.221846103668213, "learning_rate": 1.3611994648870077e-05, "loss": 0.44, "step": 19736 }, { "epoch": 3.221868495163463, "grad_norm": 3.1578209400177, "learning_rate": 1.3611396784454138e-05, "loss": 0.443, "step": 19737 }, { "epoch": 3.2220317538059673, "grad_norm": 3.536339521408081, "learning_rate": 1.3610798905193112e-05, "loss": 0.5353, "step": 19738 }, { "epoch": 3.2221950124484713, "grad_norm": 2.9056975841522217, "learning_rate": 1.3610201011089455e-05, "loss": 0.3976, "step": 19739 }, { "epoch": 3.2223582710909757, "grad_norm": 3.2896809577941895, "learning_rate": 1.3609603102145625e-05, "loss": 0.4721, "step": 19740 }, { "epoch": 3.22252152973348, "grad_norm": 2.697641134262085, "learning_rate": 1.3609005178364081e-05, "loss": 0.4435, "step": 19741 }, { "epoch": 3.2226847883759846, "grad_norm": 2.8623270988464355, "learning_rate": 1.3608407239747274e-05, "loss": 0.4661, "step": 19742 }, { "epoch": 3.222848047018489, "grad_norm": 3.450681447982788, "learning_rate": 1.3607809286297672e-05, "loss": 0.5568, "step": 19743 }, { "epoch": 3.2230113056609935, "grad_norm": 3.060800552368164, "learning_rate": 1.3607211318017726e-05, "loss": 0.4753, "step": 19744 }, { "epoch": 3.223174564303498, "grad_norm": 3.4194118976593018, "learning_rate": 1.3606613334909899e-05, "loss": 0.476, "step": 19745 }, { "epoch": 3.2233378229460024, "grad_norm": 3.673647880554199, "learning_rate": 1.3606015336976646e-05, "loss": 0.4967, "step": 19746 }, { "epoch": 3.223501081588507, "grad_norm": 3.3491694927215576, "learning_rate": 1.3605417324220422e-05, "loss": 0.5113, "step": 19747 }, { "epoch": 3.223664340231011, "grad_norm": 2.581024408340454, "learning_rate": 1.360481929664369e-05, "loss": 0.4306, "step": 19748 }, { "epoch": 3.223827598873515, "grad_norm": 3.074660539627075, "learning_rate": 1.3604221254248908e-05, "loss": 0.476, "step": 19749 }, { "epoch": 3.2239908575160197, "grad_norm": 2.8586580753326416, "learning_rate": 1.3603623197038536e-05, "loss": 0.457, "step": 19750 }, { "epoch": 3.224154116158524, "grad_norm": 2.71848726272583, "learning_rate": 1.3603025125015028e-05, "loss": 0.3807, "step": 19751 }, { "epoch": 3.2243173748010285, "grad_norm": 2.900545835494995, "learning_rate": 1.3602427038180843e-05, "loss": 0.4132, "step": 19752 }, { "epoch": 3.224480633443533, "grad_norm": 3.2753937244415283, "learning_rate": 1.360182893653844e-05, "loss": 0.5087, "step": 19753 }, { "epoch": 3.2246438920860374, "grad_norm": 2.489014148712158, "learning_rate": 1.360123082009028e-05, "loss": 0.4129, "step": 19754 }, { "epoch": 3.224807150728542, "grad_norm": 3.0708796977996826, "learning_rate": 1.360063268883882e-05, "loss": 0.4752, "step": 19755 }, { "epoch": 3.2249704093710463, "grad_norm": 3.3530569076538086, "learning_rate": 1.3600034542786517e-05, "loss": 0.4779, "step": 19756 }, { "epoch": 3.2251336680135503, "grad_norm": 2.5562713146209717, "learning_rate": 1.3599436381935834e-05, "loss": 0.4454, "step": 19757 }, { "epoch": 3.2252969266560547, "grad_norm": 2.495393991470337, "learning_rate": 1.3598838206289225e-05, "loss": 0.3402, "step": 19758 }, { "epoch": 3.225460185298559, "grad_norm": 3.4525556564331055, "learning_rate": 1.359824001584915e-05, "loss": 0.4602, "step": 19759 }, { "epoch": 3.2256234439410636, "grad_norm": 3.2721176147460938, "learning_rate": 1.3597641810618071e-05, "loss": 0.5091, "step": 19760 }, { "epoch": 3.225786702583568, "grad_norm": 3.4586970806121826, "learning_rate": 1.3597043590598444e-05, "loss": 0.5003, "step": 19761 }, { "epoch": 3.2259499612260725, "grad_norm": 3.317091464996338, "learning_rate": 1.3596445355792729e-05, "loss": 0.525, "step": 19762 }, { "epoch": 3.226113219868577, "grad_norm": 2.8050355911254883, "learning_rate": 1.3595847106203386e-05, "loss": 0.4327, "step": 19763 }, { "epoch": 3.2262764785110813, "grad_norm": 3.8651986122131348, "learning_rate": 1.3595248841832873e-05, "loss": 0.4882, "step": 19764 }, { "epoch": 3.2264397371535853, "grad_norm": 3.7103867530822754, "learning_rate": 1.3594650562683648e-05, "loss": 0.5392, "step": 19765 }, { "epoch": 3.22660299579609, "grad_norm": 3.0996413230895996, "learning_rate": 1.3594052268758175e-05, "loss": 0.5348, "step": 19766 }, { "epoch": 3.226766254438594, "grad_norm": 2.9414865970611572, "learning_rate": 1.3593453960058909e-05, "loss": 0.4345, "step": 19767 }, { "epoch": 3.2269295130810987, "grad_norm": 2.717705249786377, "learning_rate": 1.359285563658831e-05, "loss": 0.4245, "step": 19768 }, { "epoch": 3.227092771723603, "grad_norm": 2.8891475200653076, "learning_rate": 1.3592257298348837e-05, "loss": 0.4772, "step": 19769 }, { "epoch": 3.2272560303661075, "grad_norm": 2.9338953495025635, "learning_rate": 1.3591658945342951e-05, "loss": 0.4397, "step": 19770 }, { "epoch": 3.227419289008612, "grad_norm": 3.023601531982422, "learning_rate": 1.3591060577573113e-05, "loss": 0.4049, "step": 19771 }, { "epoch": 3.2275825476511164, "grad_norm": 3.235581398010254, "learning_rate": 1.359046219504178e-05, "loss": 0.4562, "step": 19772 }, { "epoch": 3.227745806293621, "grad_norm": 2.695823907852173, "learning_rate": 1.3589863797751414e-05, "loss": 0.4273, "step": 19773 }, { "epoch": 3.2279090649361253, "grad_norm": 2.9954566955566406, "learning_rate": 1.358926538570447e-05, "loss": 0.4239, "step": 19774 }, { "epoch": 3.2280723235786293, "grad_norm": 3.482412576675415, "learning_rate": 1.3588666958903415e-05, "loss": 0.4907, "step": 19775 }, { "epoch": 3.2282355822211337, "grad_norm": 3.5105631351470947, "learning_rate": 1.3588068517350703e-05, "loss": 0.4379, "step": 19776 }, { "epoch": 3.228398840863638, "grad_norm": 3.7875874042510986, "learning_rate": 1.3587470061048798e-05, "loss": 0.4872, "step": 19777 }, { "epoch": 3.2285620995061426, "grad_norm": 4.199926853179932, "learning_rate": 1.3586871590000157e-05, "loss": 0.5419, "step": 19778 }, { "epoch": 3.228725358148647, "grad_norm": 3.5758674144744873, "learning_rate": 1.3586273104207242e-05, "loss": 0.5488, "step": 19779 }, { "epoch": 3.2288886167911515, "grad_norm": 2.988288164138794, "learning_rate": 1.3585674603672509e-05, "loss": 0.3883, "step": 19780 }, { "epoch": 3.229051875433656, "grad_norm": 3.765096426010132, "learning_rate": 1.3585076088398426e-05, "loss": 0.5663, "step": 19781 }, { "epoch": 3.2292151340761603, "grad_norm": 3.3124217987060547, "learning_rate": 1.3584477558387448e-05, "loss": 0.4758, "step": 19782 }, { "epoch": 3.2293783927186643, "grad_norm": 3.3347132205963135, "learning_rate": 1.3583879013642035e-05, "loss": 0.4437, "step": 19783 }, { "epoch": 3.2295416513611688, "grad_norm": 3.3667263984680176, "learning_rate": 1.3583280454164651e-05, "loss": 0.5047, "step": 19784 }, { "epoch": 3.229704910003673, "grad_norm": 2.768354654312134, "learning_rate": 1.3582681879957752e-05, "loss": 0.4336, "step": 19785 }, { "epoch": 3.2298681686461777, "grad_norm": 3.459963798522949, "learning_rate": 1.3582083291023797e-05, "loss": 0.5043, "step": 19786 }, { "epoch": 3.230031427288682, "grad_norm": 3.0090909004211426, "learning_rate": 1.3581484687365256e-05, "loss": 0.4278, "step": 19787 }, { "epoch": 3.2301946859311865, "grad_norm": 3.3799121379852295, "learning_rate": 1.3580886068984581e-05, "loss": 0.4664, "step": 19788 }, { "epoch": 3.230357944573691, "grad_norm": 3.106417179107666, "learning_rate": 1.3580287435884238e-05, "loss": 0.4628, "step": 19789 }, { "epoch": 3.2305212032161954, "grad_norm": 3.185014486312866, "learning_rate": 1.3579688788066685e-05, "loss": 0.4436, "step": 19790 }, { "epoch": 3.2306844618587, "grad_norm": 2.9719812870025635, "learning_rate": 1.3579090125534378e-05, "loss": 0.4505, "step": 19791 }, { "epoch": 3.230847720501204, "grad_norm": 2.939140796661377, "learning_rate": 1.3578491448289786e-05, "loss": 0.4763, "step": 19792 }, { "epoch": 3.2310109791437083, "grad_norm": 3.3047642707824707, "learning_rate": 1.3577892756335369e-05, "loss": 0.5151, "step": 19793 }, { "epoch": 3.2311742377862127, "grad_norm": 3.2202749252319336, "learning_rate": 1.3577294049673585e-05, "loss": 0.4774, "step": 19794 }, { "epoch": 3.231337496428717, "grad_norm": 3.445815086364746, "learning_rate": 1.3576695328306895e-05, "loss": 0.5399, "step": 19795 }, { "epoch": 3.2315007550712216, "grad_norm": 3.3234353065490723, "learning_rate": 1.3576096592237762e-05, "loss": 0.4654, "step": 19796 }, { "epoch": 3.231664013713726, "grad_norm": 3.6164937019348145, "learning_rate": 1.3575497841468641e-05, "loss": 0.5241, "step": 19797 }, { "epoch": 3.2318272723562305, "grad_norm": 2.922224521636963, "learning_rate": 1.3574899076002003e-05, "loss": 0.4352, "step": 19798 }, { "epoch": 3.231990530998735, "grad_norm": 3.156301498413086, "learning_rate": 1.3574300295840309e-05, "loss": 0.5003, "step": 19799 }, { "epoch": 3.2321537896412393, "grad_norm": 3.152076244354248, "learning_rate": 1.3573701500986012e-05, "loss": 0.5109, "step": 19800 }, { "epoch": 3.2323170482837433, "grad_norm": 2.9987668991088867, "learning_rate": 1.3573102691441575e-05, "loss": 0.4708, "step": 19801 }, { "epoch": 3.2324803069262478, "grad_norm": 3.5387513637542725, "learning_rate": 1.3572503867209466e-05, "loss": 0.4722, "step": 19802 }, { "epoch": 3.232643565568752, "grad_norm": 2.7128243446350098, "learning_rate": 1.3571905028292139e-05, "loss": 0.4429, "step": 19803 }, { "epoch": 3.2328068242112566, "grad_norm": 2.870387554168701, "learning_rate": 1.3571306174692063e-05, "loss": 0.4977, "step": 19804 }, { "epoch": 3.232970082853761, "grad_norm": 2.6366796493530273, "learning_rate": 1.3570707306411693e-05, "loss": 0.4334, "step": 19805 }, { "epoch": 3.2331333414962655, "grad_norm": 3.5902044773101807, "learning_rate": 1.3570108423453494e-05, "loss": 0.5009, "step": 19806 }, { "epoch": 3.23329660013877, "grad_norm": 3.4716897010803223, "learning_rate": 1.3569509525819928e-05, "loss": 0.4942, "step": 19807 }, { "epoch": 3.2334598587812744, "grad_norm": 2.7152836322784424, "learning_rate": 1.3568910613513455e-05, "loss": 0.4408, "step": 19808 }, { "epoch": 3.233623117423779, "grad_norm": 3.378093957901001, "learning_rate": 1.356831168653654e-05, "loss": 0.4599, "step": 19809 }, { "epoch": 3.233786376066283, "grad_norm": 3.1875762939453125, "learning_rate": 1.3567712744891641e-05, "loss": 0.4256, "step": 19810 }, { "epoch": 3.2339496347087873, "grad_norm": 3.1042544841766357, "learning_rate": 1.3567113788581223e-05, "loss": 0.4701, "step": 19811 }, { "epoch": 3.2341128933512917, "grad_norm": 3.4352192878723145, "learning_rate": 1.3566514817607747e-05, "loss": 0.5368, "step": 19812 }, { "epoch": 3.234276151993796, "grad_norm": 2.754528760910034, "learning_rate": 1.3565915831973675e-05, "loss": 0.422, "step": 19813 }, { "epoch": 3.2344394106363006, "grad_norm": 2.9329867362976074, "learning_rate": 1.356531683168147e-05, "loss": 0.3953, "step": 19814 }, { "epoch": 3.234602669278805, "grad_norm": 3.124119758605957, "learning_rate": 1.3564717816733595e-05, "loss": 0.4658, "step": 19815 }, { "epoch": 3.2347659279213095, "grad_norm": 3.182772636413574, "learning_rate": 1.3564118787132507e-05, "loss": 0.4577, "step": 19816 }, { "epoch": 3.234929186563814, "grad_norm": 3.143843412399292, "learning_rate": 1.3563519742880677e-05, "loss": 0.4851, "step": 19817 }, { "epoch": 3.235092445206318, "grad_norm": 3.076467990875244, "learning_rate": 1.356292068398056e-05, "loss": 0.4811, "step": 19818 }, { "epoch": 3.2352557038488223, "grad_norm": 3.0348801612854004, "learning_rate": 1.3562321610434624e-05, "loss": 0.4295, "step": 19819 }, { "epoch": 3.2354189624913268, "grad_norm": 3.7181599140167236, "learning_rate": 1.3561722522245326e-05, "loss": 0.9335, "step": 19820 }, { "epoch": 3.235582221133831, "grad_norm": 3.354613780975342, "learning_rate": 1.3561123419415134e-05, "loss": 0.5158, "step": 19821 }, { "epoch": 3.2357454797763356, "grad_norm": 3.2778377532958984, "learning_rate": 1.3560524301946508e-05, "loss": 0.5399, "step": 19822 }, { "epoch": 3.23590873841884, "grad_norm": 3.2693214416503906, "learning_rate": 1.3559925169841912e-05, "loss": 0.5074, "step": 19823 }, { "epoch": 3.2360719970613445, "grad_norm": 3.2507145404815674, "learning_rate": 1.3559326023103803e-05, "loss": 0.4057, "step": 19824 }, { "epoch": 3.236235255703849, "grad_norm": 2.8889946937561035, "learning_rate": 1.3558726861734653e-05, "loss": 0.405, "step": 19825 }, { "epoch": 3.2363985143463534, "grad_norm": 3.2008748054504395, "learning_rate": 1.3558127685736917e-05, "loss": 0.4051, "step": 19826 }, { "epoch": 3.236561772988858, "grad_norm": 3.145158290863037, "learning_rate": 1.3557528495113065e-05, "loss": 0.476, "step": 19827 }, { "epoch": 3.236725031631362, "grad_norm": 3.1091904640197754, "learning_rate": 1.3556929289865557e-05, "loss": 0.4078, "step": 19828 }, { "epoch": 3.2368882902738663, "grad_norm": 3.8411505222320557, "learning_rate": 1.3556330069996854e-05, "loss": 0.535, "step": 19829 }, { "epoch": 3.2370515489163707, "grad_norm": 3.1989643573760986, "learning_rate": 1.355573083550942e-05, "loss": 0.4375, "step": 19830 }, { "epoch": 3.237214807558875, "grad_norm": 3.230668306350708, "learning_rate": 1.355513158640572e-05, "loss": 0.4058, "step": 19831 }, { "epoch": 3.2373780662013796, "grad_norm": 3.4263041019439697, "learning_rate": 1.3554532322688219e-05, "loss": 0.5712, "step": 19832 }, { "epoch": 3.237541324843884, "grad_norm": 2.8731439113616943, "learning_rate": 1.3553933044359377e-05, "loss": 0.4503, "step": 19833 }, { "epoch": 3.2377045834863885, "grad_norm": 3.806628704071045, "learning_rate": 1.3553333751421659e-05, "loss": 0.5344, "step": 19834 }, { "epoch": 3.237867842128893, "grad_norm": 2.714235544204712, "learning_rate": 1.3552734443877522e-05, "loss": 0.4367, "step": 19835 }, { "epoch": 3.238031100771397, "grad_norm": 3.6207337379455566, "learning_rate": 1.3552135121729437e-05, "loss": 0.4848, "step": 19836 }, { "epoch": 3.2381943594139013, "grad_norm": 2.8922924995422363, "learning_rate": 1.355153578497987e-05, "loss": 0.4003, "step": 19837 }, { "epoch": 3.2383576180564058, "grad_norm": 3.669330596923828, "learning_rate": 1.3550936433631278e-05, "loss": 0.5372, "step": 19838 }, { "epoch": 3.23852087669891, "grad_norm": 3.0661425590515137, "learning_rate": 1.3550337067686128e-05, "loss": 0.4502, "step": 19839 }, { "epoch": 3.2386841353414146, "grad_norm": 4.141526699066162, "learning_rate": 1.3549737687146882e-05, "loss": 1.17, "step": 19840 }, { "epoch": 3.238847393983919, "grad_norm": 3.020320177078247, "learning_rate": 1.3549138292016002e-05, "loss": 0.4615, "step": 19841 }, { "epoch": 3.2390106526264235, "grad_norm": 3.640049457550049, "learning_rate": 1.3548538882295959e-05, "loss": 0.4703, "step": 19842 }, { "epoch": 3.239173911268928, "grad_norm": 4.278726100921631, "learning_rate": 1.3547939457989211e-05, "loss": 0.469, "step": 19843 }, { "epoch": 3.2393371699114324, "grad_norm": 3.757465362548828, "learning_rate": 1.3547340019098226e-05, "loss": 0.5661, "step": 19844 }, { "epoch": 3.2395004285539364, "grad_norm": 2.947321891784668, "learning_rate": 1.3546740565625463e-05, "loss": 0.3987, "step": 19845 }, { "epoch": 3.239663687196441, "grad_norm": 3.632117986679077, "learning_rate": 1.3546141097573389e-05, "loss": 0.5716, "step": 19846 }, { "epoch": 3.2398269458389453, "grad_norm": 3.037329912185669, "learning_rate": 1.3545541614944466e-05, "loss": 0.4862, "step": 19847 }, { "epoch": 3.2399902044814497, "grad_norm": 3.1511998176574707, "learning_rate": 1.3544942117741164e-05, "loss": 0.4971, "step": 19848 }, { "epoch": 3.240153463123954, "grad_norm": 2.7681798934936523, "learning_rate": 1.354434260596594e-05, "loss": 0.4625, "step": 19849 }, { "epoch": 3.2403167217664586, "grad_norm": 3.8588926792144775, "learning_rate": 1.3543743079621266e-05, "loss": 0.48, "step": 19850 }, { "epoch": 3.240479980408963, "grad_norm": 2.934091091156006, "learning_rate": 1.3543143538709597e-05, "loss": 0.4034, "step": 19851 }, { "epoch": 3.2406432390514675, "grad_norm": 2.8953137397766113, "learning_rate": 1.3542543983233408e-05, "loss": 0.4301, "step": 19852 }, { "epoch": 3.2408064976939714, "grad_norm": 4.129308700561523, "learning_rate": 1.3541944413195154e-05, "loss": 0.5361, "step": 19853 }, { "epoch": 3.240969756336476, "grad_norm": 3.7356669902801514, "learning_rate": 1.3541344828597306e-05, "loss": 0.5212, "step": 19854 }, { "epoch": 3.2411330149789803, "grad_norm": 3.5875089168548584, "learning_rate": 1.3540745229442327e-05, "loss": 0.5484, "step": 19855 }, { "epoch": 3.2412962736214848, "grad_norm": 2.98866605758667, "learning_rate": 1.354014561573268e-05, "loss": 0.4227, "step": 19856 }, { "epoch": 3.241459532263989, "grad_norm": 3.039397954940796, "learning_rate": 1.3539545987470833e-05, "loss": 0.4647, "step": 19857 }, { "epoch": 3.2416227909064936, "grad_norm": 3.15433931350708, "learning_rate": 1.3538946344659247e-05, "loss": 0.4223, "step": 19858 }, { "epoch": 3.241786049548998, "grad_norm": 2.916790008544922, "learning_rate": 1.353834668730039e-05, "loss": 0.4503, "step": 19859 }, { "epoch": 3.2419493081915025, "grad_norm": 3.345853805541992, "learning_rate": 1.3537747015396726e-05, "loss": 0.4303, "step": 19860 }, { "epoch": 3.242112566834007, "grad_norm": 3.209108591079712, "learning_rate": 1.353714732895072e-05, "loss": 0.4873, "step": 19861 }, { "epoch": 3.2422758254765114, "grad_norm": 4.044816970825195, "learning_rate": 1.3536547627964835e-05, "loss": 0.8611, "step": 19862 }, { "epoch": 3.2424390841190154, "grad_norm": 2.964752674102783, "learning_rate": 1.3535947912441538e-05, "loss": 0.4632, "step": 19863 }, { "epoch": 3.24260234276152, "grad_norm": 2.673668146133423, "learning_rate": 1.3535348182383297e-05, "loss": 0.4228, "step": 19864 }, { "epoch": 3.2427656014040243, "grad_norm": 3.438711404800415, "learning_rate": 1.3534748437792573e-05, "loss": 0.4583, "step": 19865 }, { "epoch": 3.2429288600465287, "grad_norm": 2.448890209197998, "learning_rate": 1.3534148678671833e-05, "loss": 0.3761, "step": 19866 }, { "epoch": 3.243092118689033, "grad_norm": 3.5003674030303955, "learning_rate": 1.3533548905023543e-05, "loss": 0.5071, "step": 19867 }, { "epoch": 3.2432553773315376, "grad_norm": 3.3858635425567627, "learning_rate": 1.3532949116850165e-05, "loss": 0.4973, "step": 19868 }, { "epoch": 3.243418635974042, "grad_norm": 3.6545982360839844, "learning_rate": 1.353234931415417e-05, "loss": 0.5823, "step": 19869 }, { "epoch": 3.2435818946165464, "grad_norm": 3.0814802646636963, "learning_rate": 1.353174949693802e-05, "loss": 0.5169, "step": 19870 }, { "epoch": 3.2437451532590504, "grad_norm": 3.2112302780151367, "learning_rate": 1.3531149665204182e-05, "loss": 0.436, "step": 19871 }, { "epoch": 3.243908411901555, "grad_norm": 3.711638927459717, "learning_rate": 1.353054981895512e-05, "loss": 0.5721, "step": 19872 }, { "epoch": 3.2440716705440593, "grad_norm": 3.2026917934417725, "learning_rate": 1.3529949958193302e-05, "loss": 0.4321, "step": 19873 }, { "epoch": 3.2442349291865638, "grad_norm": 2.598029136657715, "learning_rate": 1.3529350082921191e-05, "loss": 0.3918, "step": 19874 }, { "epoch": 3.244398187829068, "grad_norm": 2.9802489280700684, "learning_rate": 1.3528750193141255e-05, "loss": 0.449, "step": 19875 }, { "epoch": 3.2445614464715726, "grad_norm": 3.3674020767211914, "learning_rate": 1.3528150288855959e-05, "loss": 0.4897, "step": 19876 }, { "epoch": 3.244724705114077, "grad_norm": 3.0798418521881104, "learning_rate": 1.3527550370067772e-05, "loss": 0.4789, "step": 19877 }, { "epoch": 3.2448879637565815, "grad_norm": 3.0636518001556396, "learning_rate": 1.3526950436779156e-05, "loss": 0.4165, "step": 19878 }, { "epoch": 3.245051222399086, "grad_norm": 3.3033223152160645, "learning_rate": 1.352635048899258e-05, "loss": 0.4708, "step": 19879 }, { "epoch": 3.24521448104159, "grad_norm": 3.4551072120666504, "learning_rate": 1.3525750526710501e-05, "loss": 0.5093, "step": 19880 }, { "epoch": 3.2453777396840944, "grad_norm": 3.320449113845825, "learning_rate": 1.35251505499354e-05, "loss": 0.5195, "step": 19881 }, { "epoch": 3.245540998326599, "grad_norm": 3.3414669036865234, "learning_rate": 1.3524550558669738e-05, "loss": 0.4964, "step": 19882 }, { "epoch": 3.2457042569691033, "grad_norm": 3.880923271179199, "learning_rate": 1.3523950552915974e-05, "loss": 0.5232, "step": 19883 }, { "epoch": 3.2458675156116077, "grad_norm": 3.1946303844451904, "learning_rate": 1.3523350532676582e-05, "loss": 0.4997, "step": 19884 }, { "epoch": 3.246030774254112, "grad_norm": 3.532526731491089, "learning_rate": 1.3522750497954024e-05, "loss": 0.5925, "step": 19885 }, { "epoch": 3.2461940328966166, "grad_norm": 3.9634315967559814, "learning_rate": 1.3522150448750773e-05, "loss": 0.6036, "step": 19886 }, { "epoch": 3.246357291539121, "grad_norm": 2.9537904262542725, "learning_rate": 1.352155038506929e-05, "loss": 0.3982, "step": 19887 }, { "epoch": 3.2465205501816254, "grad_norm": 3.3149633407592773, "learning_rate": 1.3520950306912044e-05, "loss": 0.5066, "step": 19888 }, { "epoch": 3.2466838088241294, "grad_norm": 3.0013489723205566, "learning_rate": 1.3520350214281499e-05, "loss": 0.5095, "step": 19889 }, { "epoch": 3.246847067466634, "grad_norm": 3.326390027999878, "learning_rate": 1.3519750107180126e-05, "loss": 0.5157, "step": 19890 }, { "epoch": 3.2470103261091383, "grad_norm": 3.21610164642334, "learning_rate": 1.3519149985610385e-05, "loss": 0.5402, "step": 19891 }, { "epoch": 3.2471735847516427, "grad_norm": 3.1928865909576416, "learning_rate": 1.3518549849574751e-05, "loss": 0.4809, "step": 19892 }, { "epoch": 3.247336843394147, "grad_norm": 3.4328064918518066, "learning_rate": 1.3517949699075686e-05, "loss": 0.595, "step": 19893 }, { "epoch": 3.2475001020366516, "grad_norm": 2.759208917617798, "learning_rate": 1.3517349534115659e-05, "loss": 0.4476, "step": 19894 }, { "epoch": 3.247663360679156, "grad_norm": 2.7688517570495605, "learning_rate": 1.3516749354697133e-05, "loss": 0.3706, "step": 19895 }, { "epoch": 3.2478266193216605, "grad_norm": 3.027967691421509, "learning_rate": 1.3516149160822583e-05, "loss": 0.4059, "step": 19896 }, { "epoch": 3.247989877964165, "grad_norm": 3.8123888969421387, "learning_rate": 1.3515548952494467e-05, "loss": 0.5853, "step": 19897 }, { "epoch": 3.248153136606669, "grad_norm": 3.55969500541687, "learning_rate": 1.3514948729715258e-05, "loss": 0.5309, "step": 19898 }, { "epoch": 3.2483163952491734, "grad_norm": 3.0746006965637207, "learning_rate": 1.3514348492487424e-05, "loss": 0.498, "step": 19899 }, { "epoch": 3.248479653891678, "grad_norm": 3.6098875999450684, "learning_rate": 1.3513748240813429e-05, "loss": 0.5758, "step": 19900 }, { "epoch": 3.2486429125341822, "grad_norm": 3.3183341026306152, "learning_rate": 1.3513147974695742e-05, "loss": 0.486, "step": 19901 }, { "epoch": 3.2488061711766867, "grad_norm": 2.6098811626434326, "learning_rate": 1.351254769413683e-05, "loss": 0.4335, "step": 19902 }, { "epoch": 3.248969429819191, "grad_norm": 3.571392297744751, "learning_rate": 1.351194739913916e-05, "loss": 0.5671, "step": 19903 }, { "epoch": 3.2491326884616956, "grad_norm": 3.369659900665283, "learning_rate": 1.3511347089705203e-05, "loss": 0.4652, "step": 19904 }, { "epoch": 3.2492959471042, "grad_norm": 3.261963129043579, "learning_rate": 1.3510746765837421e-05, "loss": 0.4862, "step": 19905 }, { "epoch": 3.249459205746704, "grad_norm": 3.0943479537963867, "learning_rate": 1.3510146427538285e-05, "loss": 0.5591, "step": 19906 }, { "epoch": 3.2496224643892084, "grad_norm": 3.2906618118286133, "learning_rate": 1.3509546074810262e-05, "loss": 0.5046, "step": 19907 }, { "epoch": 3.249785723031713, "grad_norm": 2.456496238708496, "learning_rate": 1.3508945707655822e-05, "loss": 0.4205, "step": 19908 }, { "epoch": 3.2499489816742173, "grad_norm": 3.040476083755493, "learning_rate": 1.3508345326077433e-05, "loss": 0.4836, "step": 19909 }, { "epoch": 3.2501122403167217, "grad_norm": 2.9332096576690674, "learning_rate": 1.3507744930077555e-05, "loss": 0.403, "step": 19910 }, { "epoch": 3.250275498959226, "grad_norm": 3.223588466644287, "learning_rate": 1.350714451965867e-05, "loss": 0.4655, "step": 19911 }, { "epoch": 3.2504387576017306, "grad_norm": 2.936476469039917, "learning_rate": 1.350654409482323e-05, "loss": 0.4492, "step": 19912 }, { "epoch": 3.250602016244235, "grad_norm": 2.6880321502685547, "learning_rate": 1.3505943655573716e-05, "loss": 0.4433, "step": 19913 }, { "epoch": 3.2507652748867395, "grad_norm": 2.8284804821014404, "learning_rate": 1.350534320191259e-05, "loss": 0.4501, "step": 19914 }, { "epoch": 3.250928533529244, "grad_norm": 2.921265125274658, "learning_rate": 1.3504742733842325e-05, "loss": 0.4006, "step": 19915 }, { "epoch": 3.251091792171748, "grad_norm": 3.6916704177856445, "learning_rate": 1.3504142251365384e-05, "loss": 0.5067, "step": 19916 }, { "epoch": 3.2512550508142524, "grad_norm": 3.404189348220825, "learning_rate": 1.3503541754484238e-05, "loss": 0.53, "step": 19917 }, { "epoch": 3.251418309456757, "grad_norm": 2.712909698486328, "learning_rate": 1.3502941243201351e-05, "loss": 0.4862, "step": 19918 }, { "epoch": 3.2515815680992612, "grad_norm": 3.1139752864837646, "learning_rate": 1.3502340717519196e-05, "loss": 0.4987, "step": 19919 }, { "epoch": 3.2517448267417657, "grad_norm": 3.007054567337036, "learning_rate": 1.3501740177440244e-05, "loss": 0.4048, "step": 19920 }, { "epoch": 3.25190808538427, "grad_norm": 2.775754690170288, "learning_rate": 1.350113962296696e-05, "loss": 0.4208, "step": 19921 }, { "epoch": 3.2520713440267746, "grad_norm": 3.156620502471924, "learning_rate": 1.3500539054101811e-05, "loss": 0.4507, "step": 19922 }, { "epoch": 3.252234602669279, "grad_norm": 2.8279707431793213, "learning_rate": 1.3499938470847269e-05, "loss": 0.403, "step": 19923 }, { "epoch": 3.252397861311783, "grad_norm": 2.9929115772247314, "learning_rate": 1.34993378732058e-05, "loss": 0.4236, "step": 19924 }, { "epoch": 3.2525611199542874, "grad_norm": 3.7422738075256348, "learning_rate": 1.3498737261179875e-05, "loss": 0.4676, "step": 19925 }, { "epoch": 3.252724378596792, "grad_norm": 2.5810205936431885, "learning_rate": 1.3498136634771963e-05, "loss": 0.3822, "step": 19926 }, { "epoch": 3.2528876372392963, "grad_norm": 3.8504815101623535, "learning_rate": 1.3497535993984534e-05, "loss": 0.4784, "step": 19927 }, { "epoch": 3.2530508958818007, "grad_norm": 3.1241815090179443, "learning_rate": 1.3496935338820052e-05, "loss": 0.4887, "step": 19928 }, { "epoch": 3.253214154524305, "grad_norm": 2.936783790588379, "learning_rate": 1.3496334669280989e-05, "loss": 0.4469, "step": 19929 }, { "epoch": 3.2533774131668096, "grad_norm": 2.8403804302215576, "learning_rate": 1.3495733985369815e-05, "loss": 0.3746, "step": 19930 }, { "epoch": 3.253540671809314, "grad_norm": 3.3924121856689453, "learning_rate": 1.3495133287089001e-05, "loss": 0.5229, "step": 19931 }, { "epoch": 3.2537039304518185, "grad_norm": 3.6499149799346924, "learning_rate": 1.3494532574441013e-05, "loss": 0.538, "step": 19932 }, { "epoch": 3.2538671890943225, "grad_norm": 3.3428611755371094, "learning_rate": 1.3493931847428318e-05, "loss": 0.4809, "step": 19933 }, { "epoch": 3.254030447736827, "grad_norm": 3.4769253730773926, "learning_rate": 1.3493331106053392e-05, "loss": 0.5715, "step": 19934 }, { "epoch": 3.2541937063793314, "grad_norm": 3.434058666229248, "learning_rate": 1.3492730350318698e-05, "loss": 0.4883, "step": 19935 }, { "epoch": 3.254356965021836, "grad_norm": 2.959507465362549, "learning_rate": 1.349212958022671e-05, "loss": 0.4353, "step": 19936 }, { "epoch": 3.2545202236643402, "grad_norm": 3.5058395862579346, "learning_rate": 1.3491528795779896e-05, "loss": 0.4275, "step": 19937 }, { "epoch": 3.2546834823068447, "grad_norm": 2.395862102508545, "learning_rate": 1.3490927996980725e-05, "loss": 0.3721, "step": 19938 }, { "epoch": 3.254846740949349, "grad_norm": 3.1423821449279785, "learning_rate": 1.3490327183831666e-05, "loss": 0.3812, "step": 19939 }, { "epoch": 3.2550099995918536, "grad_norm": 3.1388442516326904, "learning_rate": 1.348972635633519e-05, "loss": 0.4729, "step": 19940 }, { "epoch": 3.2551732582343575, "grad_norm": 3.1651451587677, "learning_rate": 1.3489125514493771e-05, "loss": 0.5037, "step": 19941 }, { "epoch": 3.255336516876862, "grad_norm": 3.0270955562591553, "learning_rate": 1.348852465830987e-05, "loss": 0.4283, "step": 19942 }, { "epoch": 3.2554997755193664, "grad_norm": 3.7797179222106934, "learning_rate": 1.3487923787785964e-05, "loss": 0.5311, "step": 19943 }, { "epoch": 3.255663034161871, "grad_norm": 3.762667417526245, "learning_rate": 1.3487322902924519e-05, "loss": 0.4619, "step": 19944 }, { "epoch": 3.2558262928043753, "grad_norm": 2.6201748847961426, "learning_rate": 1.3486722003728005e-05, "loss": 0.4011, "step": 19945 }, { "epoch": 3.2559895514468797, "grad_norm": 3.1306192874908447, "learning_rate": 1.3486121090198894e-05, "loss": 0.4799, "step": 19946 }, { "epoch": 3.256152810089384, "grad_norm": 2.9649689197540283, "learning_rate": 1.3485520162339656e-05, "loss": 0.366, "step": 19947 }, { "epoch": 3.2563160687318886, "grad_norm": 3.249157667160034, "learning_rate": 1.3484919220152764e-05, "loss": 0.5257, "step": 19948 }, { "epoch": 3.256479327374393, "grad_norm": 2.7957890033721924, "learning_rate": 1.3484318263640682e-05, "loss": 0.4488, "step": 19949 }, { "epoch": 3.2566425860168975, "grad_norm": 2.8151907920837402, "learning_rate": 1.3483717292805882e-05, "loss": 0.4202, "step": 19950 }, { "epoch": 3.2568058446594015, "grad_norm": 3.6503546237945557, "learning_rate": 1.3483116307650837e-05, "loss": 0.4663, "step": 19951 }, { "epoch": 3.256969103301906, "grad_norm": 3.4035463333129883, "learning_rate": 1.3482515308178014e-05, "loss": 0.5543, "step": 19952 }, { "epoch": 3.2571323619444104, "grad_norm": 2.9996368885040283, "learning_rate": 1.3481914294389888e-05, "loss": 0.4447, "step": 19953 }, { "epoch": 3.257295620586915, "grad_norm": 3.22267746925354, "learning_rate": 1.3481313266288927e-05, "loss": 0.4811, "step": 19954 }, { "epoch": 3.2574588792294192, "grad_norm": 2.8145101070404053, "learning_rate": 1.3480712223877604e-05, "loss": 0.4453, "step": 19955 }, { "epoch": 3.2576221378719237, "grad_norm": 3.264238119125366, "learning_rate": 1.3480111167158381e-05, "loss": 0.4711, "step": 19956 }, { "epoch": 3.257785396514428, "grad_norm": 3.0222182273864746, "learning_rate": 1.347951009613374e-05, "loss": 0.4209, "step": 19957 }, { "epoch": 3.2579486551569325, "grad_norm": 3.753300428390503, "learning_rate": 1.3478909010806145e-05, "loss": 0.4655, "step": 19958 }, { "epoch": 3.2581119137994365, "grad_norm": 2.52048659324646, "learning_rate": 1.347830791117807e-05, "loss": 0.4182, "step": 19959 }, { "epoch": 3.258275172441941, "grad_norm": 3.2409121990203857, "learning_rate": 1.3477706797251985e-05, "loss": 0.5005, "step": 19960 }, { "epoch": 3.2584384310844454, "grad_norm": 3.3279974460601807, "learning_rate": 1.347710566903036e-05, "loss": 0.4332, "step": 19961 }, { "epoch": 3.25860168972695, "grad_norm": 3.5357911586761475, "learning_rate": 1.3476504526515663e-05, "loss": 0.4797, "step": 19962 }, { "epoch": 3.2587649483694543, "grad_norm": 3.0870249271392822, "learning_rate": 1.347590336971037e-05, "loss": 0.4564, "step": 19963 }, { "epoch": 3.2589282070119587, "grad_norm": 2.651137113571167, "learning_rate": 1.3475302198616953e-05, "loss": 0.367, "step": 19964 }, { "epoch": 3.259091465654463, "grad_norm": 2.9620184898376465, "learning_rate": 1.347470101323788e-05, "loss": 0.4536, "step": 19965 }, { "epoch": 3.2592547242969676, "grad_norm": 2.482125759124756, "learning_rate": 1.3474099813575624e-05, "loss": 0.3702, "step": 19966 }, { "epoch": 3.259417982939472, "grad_norm": 2.8776564598083496, "learning_rate": 1.3473498599632655e-05, "loss": 0.4575, "step": 19967 }, { "epoch": 3.2595812415819765, "grad_norm": 3.7075631618499756, "learning_rate": 1.3472897371411441e-05, "loss": 0.7789, "step": 19968 }, { "epoch": 3.2597445002244805, "grad_norm": 3.4699411392211914, "learning_rate": 1.3472296128914461e-05, "loss": 0.4961, "step": 19969 }, { "epoch": 3.259907758866985, "grad_norm": 2.991935968399048, "learning_rate": 1.3471694872144188e-05, "loss": 0.4755, "step": 19970 }, { "epoch": 3.2600710175094894, "grad_norm": 3.364656925201416, "learning_rate": 1.347109360110308e-05, "loss": 0.5472, "step": 19971 }, { "epoch": 3.260234276151994, "grad_norm": 3.255904197692871, "learning_rate": 1.3470492315793623e-05, "loss": 0.5274, "step": 19972 }, { "epoch": 3.2603975347944982, "grad_norm": 3.489335298538208, "learning_rate": 1.3469891016218275e-05, "loss": 0.5533, "step": 19973 }, { "epoch": 3.2605607934370027, "grad_norm": 3.2940611839294434, "learning_rate": 1.346928970237952e-05, "loss": 0.5144, "step": 19974 }, { "epoch": 3.260724052079507, "grad_norm": 3.540403366088867, "learning_rate": 1.3468688374279827e-05, "loss": 0.5194, "step": 19975 }, { "epoch": 3.260887310722011, "grad_norm": 3.271087646484375, "learning_rate": 1.3468087031921662e-05, "loss": 0.4969, "step": 19976 }, { "epoch": 3.2610505693645155, "grad_norm": 3.199110269546509, "learning_rate": 1.3467485675307502e-05, "loss": 0.4167, "step": 19977 }, { "epoch": 3.26121382800702, "grad_norm": 3.5306930541992188, "learning_rate": 1.3466884304439817e-05, "loss": 0.5622, "step": 19978 }, { "epoch": 3.2613770866495244, "grad_norm": 3.1878323554992676, "learning_rate": 1.346628291932108e-05, "loss": 0.4801, "step": 19979 }, { "epoch": 3.261540345292029, "grad_norm": 2.9414212703704834, "learning_rate": 1.3465681519953763e-05, "loss": 0.4064, "step": 19980 }, { "epoch": 3.2617036039345333, "grad_norm": 3.0623228549957275, "learning_rate": 1.3465080106340338e-05, "loss": 0.4063, "step": 19981 }, { "epoch": 3.2618668625770377, "grad_norm": 3.121837854385376, "learning_rate": 1.3464478678483277e-05, "loss": 0.4228, "step": 19982 }, { "epoch": 3.262030121219542, "grad_norm": 3.474536180496216, "learning_rate": 1.3463877236385053e-05, "loss": 0.5293, "step": 19983 }, { "epoch": 3.2621933798620466, "grad_norm": 3.144975185394287, "learning_rate": 1.3463275780048137e-05, "loss": 0.4909, "step": 19984 }, { "epoch": 3.262356638504551, "grad_norm": 2.8498823642730713, "learning_rate": 1.3462674309475001e-05, "loss": 0.4682, "step": 19985 }, { "epoch": 3.262519897147055, "grad_norm": 2.7654645442962646, "learning_rate": 1.346207282466812e-05, "loss": 0.4179, "step": 19986 }, { "epoch": 3.2626831557895595, "grad_norm": 3.627699613571167, "learning_rate": 1.3461471325629965e-05, "loss": 0.5377, "step": 19987 }, { "epoch": 3.262846414432064, "grad_norm": 3.394320487976074, "learning_rate": 1.3460869812363006e-05, "loss": 0.5206, "step": 19988 }, { "epoch": 3.2630096730745684, "grad_norm": 2.488452434539795, "learning_rate": 1.3460268284869723e-05, "loss": 0.4032, "step": 19989 }, { "epoch": 3.263172931717073, "grad_norm": 3.1656954288482666, "learning_rate": 1.3459666743152577e-05, "loss": 0.4944, "step": 19990 }, { "epoch": 3.2633361903595772, "grad_norm": 2.889002799987793, "learning_rate": 1.3459065187214052e-05, "loss": 0.441, "step": 19991 }, { "epoch": 3.2634994490020817, "grad_norm": 2.846169948577881, "learning_rate": 1.3458463617056614e-05, "loss": 0.4018, "step": 19992 }, { "epoch": 3.263662707644586, "grad_norm": 3.1748287677764893, "learning_rate": 1.3457862032682739e-05, "loss": 0.429, "step": 19993 }, { "epoch": 3.26382596628709, "grad_norm": 3.3959550857543945, "learning_rate": 1.3457260434094902e-05, "loss": 0.5197, "step": 19994 }, { "epoch": 3.2639892249295945, "grad_norm": 3.6438801288604736, "learning_rate": 1.3456658821295564e-05, "loss": 0.4766, "step": 19995 }, { "epoch": 3.264152483572099, "grad_norm": 3.206360101699829, "learning_rate": 1.3456057194287215e-05, "loss": 0.5148, "step": 19996 }, { "epoch": 3.2643157422146034, "grad_norm": 3.391834259033203, "learning_rate": 1.3455455553072316e-05, "loss": 0.5133, "step": 19997 }, { "epoch": 3.264479000857108, "grad_norm": 3.6306591033935547, "learning_rate": 1.3454853897653346e-05, "loss": 0.5043, "step": 19998 }, { "epoch": 3.2646422594996123, "grad_norm": 2.678083658218384, "learning_rate": 1.3454252228032778e-05, "loss": 0.4038, "step": 19999 }, { "epoch": 3.2648055181421167, "grad_norm": 3.3872618675231934, "learning_rate": 1.3453650544213078e-05, "loss": 0.4992, "step": 20000 }, { "epoch": 3.264968776784621, "grad_norm": 3.6786413192749023, "learning_rate": 1.3453048846196729e-05, "loss": 0.4794, "step": 20001 }, { "epoch": 3.2651320354271256, "grad_norm": 2.881394386291504, "learning_rate": 1.3452447133986194e-05, "loss": 0.4161, "step": 20002 }, { "epoch": 3.26529529406963, "grad_norm": 3.0433123111724854, "learning_rate": 1.3451845407583959e-05, "loss": 0.4193, "step": 20003 }, { "epoch": 3.265458552712134, "grad_norm": 3.185621738433838, "learning_rate": 1.3451243666992491e-05, "loss": 0.5585, "step": 20004 }, { "epoch": 3.2656218113546385, "grad_norm": 3.334712505340576, "learning_rate": 1.3450641912214262e-05, "loss": 0.5004, "step": 20005 }, { "epoch": 3.265785069997143, "grad_norm": 3.4963815212249756, "learning_rate": 1.3450040143251743e-05, "loss": 0.4906, "step": 20006 }, { "epoch": 3.2659483286396473, "grad_norm": 2.9686551094055176, "learning_rate": 1.3449438360107414e-05, "loss": 0.4039, "step": 20007 }, { "epoch": 3.266111587282152, "grad_norm": 3.1998488903045654, "learning_rate": 1.3448836562783746e-05, "loss": 0.4647, "step": 20008 }, { "epoch": 3.2662748459246562, "grad_norm": 3.4396746158599854, "learning_rate": 1.3448234751283217e-05, "loss": 0.4934, "step": 20009 }, { "epoch": 3.2664381045671607, "grad_norm": 3.260671377182007, "learning_rate": 1.3447632925608296e-05, "loss": 0.5203, "step": 20010 }, { "epoch": 3.266601363209665, "grad_norm": 3.4349544048309326, "learning_rate": 1.3447031085761456e-05, "loss": 0.5288, "step": 20011 }, { "epoch": 3.266764621852169, "grad_norm": 2.8605797290802, "learning_rate": 1.344642923174517e-05, "loss": 0.3861, "step": 20012 }, { "epoch": 3.2669278804946735, "grad_norm": 3.107823610305786, "learning_rate": 1.3445827363561918e-05, "loss": 0.4227, "step": 20013 }, { "epoch": 3.267091139137178, "grad_norm": 2.862391710281372, "learning_rate": 1.3445225481214172e-05, "loss": 0.4573, "step": 20014 }, { "epoch": 3.2672543977796824, "grad_norm": 3.834601879119873, "learning_rate": 1.3444623584704405e-05, "loss": 0.4838, "step": 20015 }, { "epoch": 3.267417656422187, "grad_norm": 3.5849146842956543, "learning_rate": 1.344402167403509e-05, "loss": 0.521, "step": 20016 }, { "epoch": 3.2675809150646913, "grad_norm": 2.934417963027954, "learning_rate": 1.34434197492087e-05, "loss": 0.455, "step": 20017 }, { "epoch": 3.2677441737071957, "grad_norm": 3.1313300132751465, "learning_rate": 1.3442817810227713e-05, "loss": 0.4714, "step": 20018 }, { "epoch": 3.2679074323497, "grad_norm": 2.877417802810669, "learning_rate": 1.3442215857094605e-05, "loss": 0.4324, "step": 20019 }, { "epoch": 3.2680706909922046, "grad_norm": 2.9511892795562744, "learning_rate": 1.3441613889811844e-05, "loss": 0.4272, "step": 20020 }, { "epoch": 3.268233949634709, "grad_norm": 3.398350238800049, "learning_rate": 1.344101190838191e-05, "loss": 0.5366, "step": 20021 }, { "epoch": 3.268397208277213, "grad_norm": 2.5186681747436523, "learning_rate": 1.3440409912807275e-05, "loss": 0.372, "step": 20022 }, { "epoch": 3.2685604669197175, "grad_norm": 3.2311737537384033, "learning_rate": 1.343980790309041e-05, "loss": 0.4665, "step": 20023 }, { "epoch": 3.268723725562222, "grad_norm": 3.618689775466919, "learning_rate": 1.3439205879233798e-05, "loss": 0.5934, "step": 20024 }, { "epoch": 3.2688869842047263, "grad_norm": 3.5806901454925537, "learning_rate": 1.3438603841239908e-05, "loss": 0.4561, "step": 20025 }, { "epoch": 3.269050242847231, "grad_norm": 3.401341438293457, "learning_rate": 1.3438001789111217e-05, "loss": 0.4404, "step": 20026 }, { "epoch": 3.269213501489735, "grad_norm": 3.116734027862549, "learning_rate": 1.3437399722850197e-05, "loss": 0.4501, "step": 20027 }, { "epoch": 3.2693767601322397, "grad_norm": 3.378812074661255, "learning_rate": 1.3436797642459325e-05, "loss": 0.5289, "step": 20028 }, { "epoch": 3.2695400187747437, "grad_norm": 2.8661305904388428, "learning_rate": 1.3436195547941075e-05, "loss": 0.4431, "step": 20029 }, { "epoch": 3.269703277417248, "grad_norm": 2.983614683151245, "learning_rate": 1.3435593439297925e-05, "loss": 0.4754, "step": 20030 }, { "epoch": 3.2698665360597525, "grad_norm": 2.703634738922119, "learning_rate": 1.3434991316532346e-05, "loss": 0.4382, "step": 20031 }, { "epoch": 3.270029794702257, "grad_norm": 2.489429235458374, "learning_rate": 1.3434389179646816e-05, "loss": 0.363, "step": 20032 }, { "epoch": 3.2701930533447614, "grad_norm": 3.160801410675049, "learning_rate": 1.3433787028643808e-05, "loss": 0.435, "step": 20033 }, { "epoch": 3.270356311987266, "grad_norm": 3.487002372741699, "learning_rate": 1.3433184863525797e-05, "loss": 0.5381, "step": 20034 }, { "epoch": 3.2705195706297703, "grad_norm": 3.2354815006256104, "learning_rate": 1.3432582684295262e-05, "loss": 0.5114, "step": 20035 }, { "epoch": 3.2706828292722747, "grad_norm": 3.4618823528289795, "learning_rate": 1.3431980490954672e-05, "loss": 0.5629, "step": 20036 }, { "epoch": 3.270846087914779, "grad_norm": 3.1569974422454834, "learning_rate": 1.343137828350651e-05, "loss": 0.4733, "step": 20037 }, { "epoch": 3.2710093465572836, "grad_norm": 2.967167854309082, "learning_rate": 1.3430776061953246e-05, "loss": 0.417, "step": 20038 }, { "epoch": 3.2711726051997876, "grad_norm": 3.680455446243286, "learning_rate": 1.3430173826297355e-05, "loss": 0.5496, "step": 20039 }, { "epoch": 3.271335863842292, "grad_norm": 3.2328479290008545, "learning_rate": 1.3429571576541315e-05, "loss": 0.5697, "step": 20040 }, { "epoch": 3.2714991224847965, "grad_norm": 3.5806784629821777, "learning_rate": 1.3428969312687603e-05, "loss": 0.5238, "step": 20041 }, { "epoch": 3.271662381127301, "grad_norm": 3.8624823093414307, "learning_rate": 1.3428367034738691e-05, "loss": 0.687, "step": 20042 }, { "epoch": 3.2718256397698053, "grad_norm": 2.8891589641571045, "learning_rate": 1.3427764742697062e-05, "loss": 0.5052, "step": 20043 }, { "epoch": 3.2719888984123098, "grad_norm": 3.112952470779419, "learning_rate": 1.3427162436565179e-05, "loss": 0.4123, "step": 20044 }, { "epoch": 3.272152157054814, "grad_norm": 3.1840028762817383, "learning_rate": 1.342656011634553e-05, "loss": 0.4858, "step": 20045 }, { "epoch": 3.2723154156973187, "grad_norm": 3.4235751628875732, "learning_rate": 1.3425957782040586e-05, "loss": 0.4967, "step": 20046 }, { "epoch": 3.2724786743398226, "grad_norm": 3.6805291175842285, "learning_rate": 1.3425355433652823e-05, "loss": 0.5051, "step": 20047 }, { "epoch": 3.272641932982327, "grad_norm": 3.3159101009368896, "learning_rate": 1.3424753071184717e-05, "loss": 0.5177, "step": 20048 }, { "epoch": 3.2728051916248315, "grad_norm": 3.004194736480713, "learning_rate": 1.3424150694638744e-05, "loss": 0.4188, "step": 20049 }, { "epoch": 3.272968450267336, "grad_norm": 2.5126454830169678, "learning_rate": 1.342354830401738e-05, "loss": 0.3969, "step": 20050 }, { "epoch": 3.2731317089098404, "grad_norm": 3.3804073333740234, "learning_rate": 1.3422945899323101e-05, "loss": 0.4943, "step": 20051 }, { "epoch": 3.273294967552345, "grad_norm": 2.777881383895874, "learning_rate": 1.3422343480558384e-05, "loss": 0.403, "step": 20052 }, { "epoch": 3.2734582261948493, "grad_norm": 2.638123035430908, "learning_rate": 1.3421741047725707e-05, "loss": 0.4189, "step": 20053 }, { "epoch": 3.2736214848373537, "grad_norm": 3.3546974658966064, "learning_rate": 1.3421138600827544e-05, "loss": 0.4974, "step": 20054 }, { "epoch": 3.273784743479858, "grad_norm": 2.7470703125, "learning_rate": 1.3420536139866371e-05, "loss": 0.3796, "step": 20055 }, { "epoch": 3.2739480021223626, "grad_norm": 2.6697866916656494, "learning_rate": 1.3419933664844665e-05, "loss": 0.4139, "step": 20056 }, { "epoch": 3.2741112607648666, "grad_norm": 3.1424167156219482, "learning_rate": 1.3419331175764905e-05, "loss": 0.5931, "step": 20057 }, { "epoch": 3.274274519407371, "grad_norm": 3.166727066040039, "learning_rate": 1.3418728672629565e-05, "loss": 0.4999, "step": 20058 }, { "epoch": 3.2744377780498755, "grad_norm": 2.9920291900634766, "learning_rate": 1.3418126155441122e-05, "loss": 0.4786, "step": 20059 }, { "epoch": 3.27460103669238, "grad_norm": 3.4738399982452393, "learning_rate": 1.3417523624202053e-05, "loss": 0.5092, "step": 20060 }, { "epoch": 3.2747642953348843, "grad_norm": 2.7055623531341553, "learning_rate": 1.3416921078914835e-05, "loss": 0.4474, "step": 20061 }, { "epoch": 3.2749275539773888, "grad_norm": 4.366565227508545, "learning_rate": 1.3416318519581943e-05, "loss": 0.5469, "step": 20062 }, { "epoch": 3.275090812619893, "grad_norm": 3.078676462173462, "learning_rate": 1.3415715946205859e-05, "loss": 0.4857, "step": 20063 }, { "epoch": 3.275254071262397, "grad_norm": 3.1790049076080322, "learning_rate": 1.3415113358789054e-05, "loss": 0.5621, "step": 20064 }, { "epoch": 3.2754173299049016, "grad_norm": 3.2083375453948975, "learning_rate": 1.3414510757334009e-05, "loss": 0.4586, "step": 20065 }, { "epoch": 3.275580588547406, "grad_norm": 3.2327613830566406, "learning_rate": 1.3413908141843198e-05, "loss": 0.5436, "step": 20066 }, { "epoch": 3.2757438471899105, "grad_norm": 3.4388370513916016, "learning_rate": 1.3413305512319099e-05, "loss": 0.5528, "step": 20067 }, { "epoch": 3.275907105832415, "grad_norm": 3.2377374172210693, "learning_rate": 1.3412702868764194e-05, "loss": 0.5147, "step": 20068 }, { "epoch": 3.2760703644749194, "grad_norm": 2.6854701042175293, "learning_rate": 1.3412100211180952e-05, "loss": 0.389, "step": 20069 }, { "epoch": 3.276233623117424, "grad_norm": 3.5063304901123047, "learning_rate": 1.3411497539571856e-05, "loss": 0.515, "step": 20070 }, { "epoch": 3.2763968817599283, "grad_norm": 2.9826009273529053, "learning_rate": 1.3410894853939382e-05, "loss": 0.4544, "step": 20071 }, { "epoch": 3.2765601404024327, "grad_norm": 3.2908055782318115, "learning_rate": 1.3410292154286007e-05, "loss": 0.4864, "step": 20072 }, { "epoch": 3.276723399044937, "grad_norm": 3.6269142627716064, "learning_rate": 1.340968944061421e-05, "loss": 0.6155, "step": 20073 }, { "epoch": 3.276886657687441, "grad_norm": 3.072094678878784, "learning_rate": 1.3409086712926463e-05, "loss": 0.4656, "step": 20074 }, { "epoch": 3.2770499163299456, "grad_norm": 4.027831077575684, "learning_rate": 1.340848397122525e-05, "loss": 0.5567, "step": 20075 }, { "epoch": 3.27721317497245, "grad_norm": 2.8927905559539795, "learning_rate": 1.3407881215513048e-05, "loss": 0.4244, "step": 20076 }, { "epoch": 3.2773764336149545, "grad_norm": 2.8374297618865967, "learning_rate": 1.3407278445792332e-05, "loss": 0.403, "step": 20077 }, { "epoch": 3.277539692257459, "grad_norm": 3.8274693489074707, "learning_rate": 1.340667566206558e-05, "loss": 1.0034, "step": 20078 }, { "epoch": 3.2777029508999633, "grad_norm": 3.243194580078125, "learning_rate": 1.3406072864335271e-05, "loss": 0.4811, "step": 20079 }, { "epoch": 3.2778662095424678, "grad_norm": 2.757004499435425, "learning_rate": 1.3405470052603884e-05, "loss": 0.4337, "step": 20080 }, { "epoch": 3.278029468184972, "grad_norm": 3.2306034564971924, "learning_rate": 1.3404867226873895e-05, "loss": 0.4651, "step": 20081 }, { "epoch": 3.278192726827476, "grad_norm": 2.960418462753296, "learning_rate": 1.3404264387147782e-05, "loss": 0.4504, "step": 20082 }, { "epoch": 3.2783559854699806, "grad_norm": 2.826927900314331, "learning_rate": 1.3403661533428023e-05, "loss": 0.4099, "step": 20083 }, { "epoch": 3.278519244112485, "grad_norm": 3.022535562515259, "learning_rate": 1.3403058665717097e-05, "loss": 0.4636, "step": 20084 }, { "epoch": 3.2786825027549895, "grad_norm": 3.6174376010894775, "learning_rate": 1.340245578401748e-05, "loss": 0.5242, "step": 20085 }, { "epoch": 3.278845761397494, "grad_norm": 3.330873966217041, "learning_rate": 1.3401852888331655e-05, "loss": 0.5369, "step": 20086 }, { "epoch": 3.2790090200399984, "grad_norm": 3.568190097808838, "learning_rate": 1.3401249978662098e-05, "loss": 0.5298, "step": 20087 }, { "epoch": 3.279172278682503, "grad_norm": 2.6179451942443848, "learning_rate": 1.3400647055011285e-05, "loss": 0.4507, "step": 20088 }, { "epoch": 3.2793355373250073, "grad_norm": 2.5161032676696777, "learning_rate": 1.3400044117381695e-05, "loss": 0.4412, "step": 20089 }, { "epoch": 3.2794987959675117, "grad_norm": 2.77866792678833, "learning_rate": 1.3399441165775808e-05, "loss": 0.3963, "step": 20090 }, { "epoch": 3.279662054610016, "grad_norm": 2.8430168628692627, "learning_rate": 1.3398838200196103e-05, "loss": 0.4182, "step": 20091 }, { "epoch": 3.27982531325252, "grad_norm": 2.9713363647460938, "learning_rate": 1.339823522064506e-05, "loss": 0.4188, "step": 20092 }, { "epoch": 3.2799885718950246, "grad_norm": 3.5357816219329834, "learning_rate": 1.339763222712515e-05, "loss": 0.5667, "step": 20093 }, { "epoch": 3.280151830537529, "grad_norm": 2.9815115928649902, "learning_rate": 1.3397029219638857e-05, "loss": 0.516, "step": 20094 }, { "epoch": 3.2803150891800334, "grad_norm": 3.187945604324341, "learning_rate": 1.3396426198188661e-05, "loss": 0.4506, "step": 20095 }, { "epoch": 3.280478347822538, "grad_norm": 3.971503496170044, "learning_rate": 1.339582316277704e-05, "loss": 0.5485, "step": 20096 }, { "epoch": 3.2806416064650423, "grad_norm": 2.0891354084014893, "learning_rate": 1.3395220113406475e-05, "loss": 0.3049, "step": 20097 }, { "epoch": 3.2808048651075468, "grad_norm": 3.6733243465423584, "learning_rate": 1.339461705007944e-05, "loss": 0.5392, "step": 20098 }, { "epoch": 3.280968123750051, "grad_norm": 2.8120908737182617, "learning_rate": 1.3394013972798414e-05, "loss": 0.4181, "step": 20099 }, { "epoch": 3.281131382392555, "grad_norm": 3.6181681156158447, "learning_rate": 1.3393410881565878e-05, "loss": 0.5189, "step": 20100 }, { "epoch": 3.2812946410350596, "grad_norm": 3.3222239017486572, "learning_rate": 1.3392807776384312e-05, "loss": 0.5259, "step": 20101 }, { "epoch": 3.281457899677564, "grad_norm": 3.1985995769500732, "learning_rate": 1.3392204657256196e-05, "loss": 0.6077, "step": 20102 }, { "epoch": 3.2816211583200685, "grad_norm": 2.555500030517578, "learning_rate": 1.3391601524184008e-05, "loss": 0.4056, "step": 20103 }, { "epoch": 3.281784416962573, "grad_norm": 3.468564987182617, "learning_rate": 1.3390998377170226e-05, "loss": 0.4648, "step": 20104 }, { "epoch": 3.2819476756050774, "grad_norm": 3.0374884605407715, "learning_rate": 1.3390395216217326e-05, "loss": 0.5027, "step": 20105 }, { "epoch": 3.282110934247582, "grad_norm": 3.415224552154541, "learning_rate": 1.3389792041327794e-05, "loss": 0.4505, "step": 20106 }, { "epoch": 3.2822741928900863, "grad_norm": 3.0849931240081787, "learning_rate": 1.3389188852504109e-05, "loss": 0.4238, "step": 20107 }, { "epoch": 3.2824374515325907, "grad_norm": 3.253502130508423, "learning_rate": 1.3388585649748748e-05, "loss": 0.5634, "step": 20108 }, { "epoch": 3.282600710175095, "grad_norm": 3.1719844341278076, "learning_rate": 1.338798243306419e-05, "loss": 0.4796, "step": 20109 }, { "epoch": 3.282763968817599, "grad_norm": 2.821591854095459, "learning_rate": 1.3387379202452917e-05, "loss": 0.4207, "step": 20110 }, { "epoch": 3.2829272274601036, "grad_norm": 3.825505495071411, "learning_rate": 1.3386775957917402e-05, "loss": 0.542, "step": 20111 }, { "epoch": 3.283090486102608, "grad_norm": 3.608583688735962, "learning_rate": 1.3386172699460135e-05, "loss": 0.7885, "step": 20112 }, { "epoch": 3.2832537447451124, "grad_norm": 3.6057369709014893, "learning_rate": 1.3385569427083592e-05, "loss": 0.549, "step": 20113 }, { "epoch": 3.283417003387617, "grad_norm": 3.2405812740325928, "learning_rate": 1.3384966140790247e-05, "loss": 0.4346, "step": 20114 }, { "epoch": 3.2835802620301213, "grad_norm": 3.0607333183288574, "learning_rate": 1.3384362840582586e-05, "loss": 0.5, "step": 20115 }, { "epoch": 3.2837435206726258, "grad_norm": 3.7672696113586426, "learning_rate": 1.3383759526463088e-05, "loss": 0.4801, "step": 20116 }, { "epoch": 3.2839067793151298, "grad_norm": 3.218693494796753, "learning_rate": 1.3383156198434231e-05, "loss": 0.4365, "step": 20117 }, { "epoch": 3.284070037957634, "grad_norm": 3.25901460647583, "learning_rate": 1.3382552856498496e-05, "loss": 0.4487, "step": 20118 }, { "epoch": 3.2842332966001386, "grad_norm": 3.898179769515991, "learning_rate": 1.3381949500658365e-05, "loss": 0.527, "step": 20119 }, { "epoch": 3.284396555242643, "grad_norm": 3.0970678329467773, "learning_rate": 1.3381346130916316e-05, "loss": 0.4692, "step": 20120 }, { "epoch": 3.2845598138851475, "grad_norm": 3.2419183254241943, "learning_rate": 1.3380742747274828e-05, "loss": 0.4851, "step": 20121 }, { "epoch": 3.284723072527652, "grad_norm": 3.449781894683838, "learning_rate": 1.3380139349736385e-05, "loss": 0.4521, "step": 20122 }, { "epoch": 3.2848863311701564, "grad_norm": 2.82429838180542, "learning_rate": 1.3379535938303465e-05, "loss": 0.4705, "step": 20123 }, { "epoch": 3.285049589812661, "grad_norm": 3.838923215866089, "learning_rate": 1.337893251297855e-05, "loss": 0.5165, "step": 20124 }, { "epoch": 3.2852128484551653, "grad_norm": 2.979285717010498, "learning_rate": 1.3378329073764118e-05, "loss": 0.3855, "step": 20125 }, { "epoch": 3.2853761070976697, "grad_norm": 3.273672342300415, "learning_rate": 1.3377725620662654e-05, "loss": 0.4847, "step": 20126 }, { "epoch": 3.2855393657401737, "grad_norm": 2.934845447540283, "learning_rate": 1.337712215367663e-05, "loss": 0.3842, "step": 20127 }, { "epoch": 3.285702624382678, "grad_norm": 2.6774260997772217, "learning_rate": 1.3376518672808533e-05, "loss": 0.4268, "step": 20128 }, { "epoch": 3.2858658830251826, "grad_norm": 3.3628578186035156, "learning_rate": 1.3375915178060845e-05, "loss": 0.5214, "step": 20129 }, { "epoch": 3.286029141667687, "grad_norm": 2.892089366912842, "learning_rate": 1.3375311669436044e-05, "loss": 0.3841, "step": 20130 }, { "epoch": 3.2861924003101914, "grad_norm": 3.2449018955230713, "learning_rate": 1.3374708146936612e-05, "loss": 0.4514, "step": 20131 }, { "epoch": 3.286355658952696, "grad_norm": 3.6590213775634766, "learning_rate": 1.3374104610565029e-05, "loss": 0.9125, "step": 20132 }, { "epoch": 3.2865189175952003, "grad_norm": 3.1087563037872314, "learning_rate": 1.3373501060323773e-05, "loss": 0.4254, "step": 20133 }, { "epoch": 3.2866821762377048, "grad_norm": 3.0875742435455322, "learning_rate": 1.337289749621533e-05, "loss": 0.4657, "step": 20134 }, { "epoch": 3.2868454348802087, "grad_norm": 2.9067165851593018, "learning_rate": 1.3372293918242179e-05, "loss": 0.4651, "step": 20135 }, { "epoch": 3.287008693522713, "grad_norm": 3.0405194759368896, "learning_rate": 1.3371690326406803e-05, "loss": 0.4112, "step": 20136 }, { "epoch": 3.2871719521652176, "grad_norm": 3.4585773944854736, "learning_rate": 1.337108672071168e-05, "loss": 0.5327, "step": 20137 }, { "epoch": 3.287335210807722, "grad_norm": 3.052062749862671, "learning_rate": 1.3370483101159289e-05, "loss": 0.4756, "step": 20138 }, { "epoch": 3.2874984694502265, "grad_norm": 3.2589659690856934, "learning_rate": 1.3369879467752117e-05, "loss": 0.5032, "step": 20139 }, { "epoch": 3.287661728092731, "grad_norm": 2.9217333793640137, "learning_rate": 1.3369275820492643e-05, "loss": 0.3924, "step": 20140 }, { "epoch": 3.2878249867352354, "grad_norm": 2.6279072761535645, "learning_rate": 1.3368672159383348e-05, "loss": 0.4397, "step": 20141 }, { "epoch": 3.28798824537774, "grad_norm": 3.1035068035125732, "learning_rate": 1.3368068484426715e-05, "loss": 0.5001, "step": 20142 }, { "epoch": 3.2881515040202443, "grad_norm": 3.266065835952759, "learning_rate": 1.3367464795625225e-05, "loss": 0.5139, "step": 20143 }, { "epoch": 3.2883147626627487, "grad_norm": 3.1492371559143066, "learning_rate": 1.3366861092981354e-05, "loss": 0.5132, "step": 20144 }, { "epoch": 3.2884780213052527, "grad_norm": 3.6329147815704346, "learning_rate": 1.3366257376497592e-05, "loss": 0.4786, "step": 20145 }, { "epoch": 3.288641279947757, "grad_norm": 3.244279146194458, "learning_rate": 1.3365653646176417e-05, "loss": 0.4885, "step": 20146 }, { "epoch": 3.2888045385902616, "grad_norm": 3.270606517791748, "learning_rate": 1.3365049902020311e-05, "loss": 0.4297, "step": 20147 }, { "epoch": 3.288967797232766, "grad_norm": 3.0856776237487793, "learning_rate": 1.3364446144031755e-05, "loss": 0.4874, "step": 20148 }, { "epoch": 3.2891310558752704, "grad_norm": 3.1779329776763916, "learning_rate": 1.3363842372213232e-05, "loss": 0.4286, "step": 20149 }, { "epoch": 3.289294314517775, "grad_norm": 2.9905712604522705, "learning_rate": 1.336323858656722e-05, "loss": 0.4633, "step": 20150 }, { "epoch": 3.2894575731602793, "grad_norm": 2.858313798904419, "learning_rate": 1.3362634787096208e-05, "loss": 0.4224, "step": 20151 }, { "epoch": 3.2896208318027838, "grad_norm": 2.233785390853882, "learning_rate": 1.336203097380267e-05, "loss": 0.3683, "step": 20152 }, { "epoch": 3.2897840904452877, "grad_norm": 3.117450714111328, "learning_rate": 1.3361427146689097e-05, "loss": 0.5359, "step": 20153 }, { "epoch": 3.289947349087792, "grad_norm": 3.4654505252838135, "learning_rate": 1.3360823305757964e-05, "loss": 0.5211, "step": 20154 }, { "epoch": 3.2901106077302966, "grad_norm": 2.942640542984009, "learning_rate": 1.3360219451011753e-05, "loss": 0.4443, "step": 20155 }, { "epoch": 3.290273866372801, "grad_norm": 3.3766276836395264, "learning_rate": 1.335961558245295e-05, "loss": 0.5474, "step": 20156 }, { "epoch": 3.2904371250153055, "grad_norm": 3.620584487915039, "learning_rate": 1.335901170008404e-05, "loss": 0.518, "step": 20157 }, { "epoch": 3.29060038365781, "grad_norm": 3.2211263179779053, "learning_rate": 1.3358407803907496e-05, "loss": 0.4385, "step": 20158 }, { "epoch": 3.2907636423003144, "grad_norm": 2.323751449584961, "learning_rate": 1.3357803893925807e-05, "loss": 0.3858, "step": 20159 }, { "epoch": 3.290926900942819, "grad_norm": 3.0928163528442383, "learning_rate": 1.3357199970141457e-05, "loss": 0.4588, "step": 20160 }, { "epoch": 3.2910901595853232, "grad_norm": 3.308314800262451, "learning_rate": 1.3356596032556923e-05, "loss": 0.4588, "step": 20161 }, { "epoch": 3.2912534182278272, "grad_norm": 2.7867114543914795, "learning_rate": 1.335599208117469e-05, "loss": 0.4137, "step": 20162 }, { "epoch": 3.2914166768703317, "grad_norm": 3.360182046890259, "learning_rate": 1.335538811599724e-05, "loss": 0.5528, "step": 20163 }, { "epoch": 3.291579935512836, "grad_norm": 3.4750139713287354, "learning_rate": 1.3354784137027057e-05, "loss": 0.5147, "step": 20164 }, { "epoch": 3.2917431941553406, "grad_norm": 3.0945682525634766, "learning_rate": 1.3354180144266625e-05, "loss": 0.4646, "step": 20165 }, { "epoch": 3.291906452797845, "grad_norm": 2.908267021179199, "learning_rate": 1.3353576137718424e-05, "loss": 0.5128, "step": 20166 }, { "epoch": 3.2920697114403494, "grad_norm": 3.669739246368408, "learning_rate": 1.3352972117384937e-05, "loss": 0.6908, "step": 20167 }, { "epoch": 3.292232970082854, "grad_norm": 3.0249276161193848, "learning_rate": 1.3352368083268648e-05, "loss": 0.4975, "step": 20168 }, { "epoch": 3.2923962287253583, "grad_norm": 3.4519588947296143, "learning_rate": 1.335176403537204e-05, "loss": 0.4513, "step": 20169 }, { "epoch": 3.2925594873678623, "grad_norm": 3.317293882369995, "learning_rate": 1.3351159973697598e-05, "loss": 0.4874, "step": 20170 }, { "epoch": 3.2927227460103667, "grad_norm": 2.7271945476531982, "learning_rate": 1.3350555898247799e-05, "loss": 0.4506, "step": 20171 }, { "epoch": 3.292886004652871, "grad_norm": 3.396461009979248, "learning_rate": 1.334995180902513e-05, "loss": 0.5479, "step": 20172 }, { "epoch": 3.2930492632953756, "grad_norm": 3.316774845123291, "learning_rate": 1.3349347706032075e-05, "loss": 0.4707, "step": 20173 }, { "epoch": 3.29321252193788, "grad_norm": 3.7115213871002197, "learning_rate": 1.3348743589271115e-05, "loss": 0.4407, "step": 20174 }, { "epoch": 3.2933757805803845, "grad_norm": 3.607377052307129, "learning_rate": 1.334813945874474e-05, "loss": 0.472, "step": 20175 }, { "epoch": 3.293539039222889, "grad_norm": 3.3526954650878906, "learning_rate": 1.3347535314455425e-05, "loss": 0.538, "step": 20176 }, { "epoch": 3.2937022978653934, "grad_norm": 3.604198455810547, "learning_rate": 1.3346931156405651e-05, "loss": 0.608, "step": 20177 }, { "epoch": 3.293865556507898, "grad_norm": 3.8005499839782715, "learning_rate": 1.334632698459791e-05, "loss": 0.52, "step": 20178 }, { "epoch": 3.2940288151504022, "grad_norm": 3.0134332180023193, "learning_rate": 1.3345722799034684e-05, "loss": 0.4071, "step": 20179 }, { "epoch": 3.2941920737929062, "grad_norm": 3.308011770248413, "learning_rate": 1.3345118599718456e-05, "loss": 0.4632, "step": 20180 }, { "epoch": 3.2943553324354107, "grad_norm": 3.2819440364837646, "learning_rate": 1.3344514386651708e-05, "loss": 0.4656, "step": 20181 }, { "epoch": 3.294518591077915, "grad_norm": 3.7904176712036133, "learning_rate": 1.334391015983692e-05, "loss": 0.5947, "step": 20182 }, { "epoch": 3.2946818497204196, "grad_norm": 3.103039503097534, "learning_rate": 1.3343305919276584e-05, "loss": 0.414, "step": 20183 }, { "epoch": 3.294845108362924, "grad_norm": 3.1354639530181885, "learning_rate": 1.334270166497318e-05, "loss": 0.5371, "step": 20184 }, { "epoch": 3.2950083670054284, "grad_norm": 2.663113594055176, "learning_rate": 1.334209739692919e-05, "loss": 0.4508, "step": 20185 }, { "epoch": 3.295171625647933, "grad_norm": 2.8645646572113037, "learning_rate": 1.3341493115147099e-05, "loss": 0.4737, "step": 20186 }, { "epoch": 3.2953348842904373, "grad_norm": 2.8120622634887695, "learning_rate": 1.3340888819629394e-05, "loss": 0.4274, "step": 20187 }, { "epoch": 3.2954981429329413, "grad_norm": 3.157599925994873, "learning_rate": 1.3340284510378552e-05, "loss": 0.448, "step": 20188 }, { "epoch": 3.2956614015754457, "grad_norm": 3.1366078853607178, "learning_rate": 1.3339680187397063e-05, "loss": 0.4943, "step": 20189 }, { "epoch": 3.29582466021795, "grad_norm": 3.1001081466674805, "learning_rate": 1.3339075850687416e-05, "loss": 0.4414, "step": 20190 }, { "epoch": 3.2959879188604546, "grad_norm": 2.6063332557678223, "learning_rate": 1.3338471500252084e-05, "loss": 0.4116, "step": 20191 }, { "epoch": 3.296151177502959, "grad_norm": 3.518932819366455, "learning_rate": 1.3337867136093556e-05, "loss": 0.4544, "step": 20192 }, { "epoch": 3.2963144361454635, "grad_norm": 3.661977767944336, "learning_rate": 1.3337262758214317e-05, "loss": 0.565, "step": 20193 }, { "epoch": 3.296477694787968, "grad_norm": 4.18003511428833, "learning_rate": 1.3336658366616848e-05, "loss": 0.5783, "step": 20194 }, { "epoch": 3.2966409534304724, "grad_norm": 3.8810484409332275, "learning_rate": 1.3336053961303644e-05, "loss": 0.6046, "step": 20195 }, { "epoch": 3.296804212072977, "grad_norm": 3.294235944747925, "learning_rate": 1.3335449542277176e-05, "loss": 0.4862, "step": 20196 }, { "epoch": 3.2969674707154812, "grad_norm": 2.7067625522613525, "learning_rate": 1.3334845109539935e-05, "loss": 0.4372, "step": 20197 }, { "epoch": 3.2971307293579852, "grad_norm": 3.911188840866089, "learning_rate": 1.3334240663094406e-05, "loss": 0.5437, "step": 20198 }, { "epoch": 3.2972939880004897, "grad_norm": 3.214668035507202, "learning_rate": 1.333363620294307e-05, "loss": 0.5194, "step": 20199 }, { "epoch": 3.297457246642994, "grad_norm": 3.046592950820923, "learning_rate": 1.333303172908842e-05, "loss": 0.4659, "step": 20200 }, { "epoch": 3.2976205052854985, "grad_norm": 3.2823901176452637, "learning_rate": 1.3332427241532932e-05, "loss": 0.5176, "step": 20201 }, { "epoch": 3.297783763928003, "grad_norm": 3.2774224281311035, "learning_rate": 1.3331822740279093e-05, "loss": 0.5325, "step": 20202 }, { "epoch": 3.2979470225705074, "grad_norm": 3.710686683654785, "learning_rate": 1.333121822532939e-05, "loss": 0.5768, "step": 20203 }, { "epoch": 3.298110281213012, "grad_norm": 3.4225423336029053, "learning_rate": 1.3330613696686305e-05, "loss": 0.5217, "step": 20204 }, { "epoch": 3.298273539855516, "grad_norm": 3.763448476791382, "learning_rate": 1.3330009154352326e-05, "loss": 0.52, "step": 20205 }, { "epoch": 3.2984367984980203, "grad_norm": 2.9319145679473877, "learning_rate": 1.3329404598329938e-05, "loss": 0.4751, "step": 20206 }, { "epoch": 3.2986000571405247, "grad_norm": 3.200531482696533, "learning_rate": 1.3328800028621623e-05, "loss": 0.8616, "step": 20207 }, { "epoch": 3.298763315783029, "grad_norm": 4.4032158851623535, "learning_rate": 1.3328195445229869e-05, "loss": 0.4879, "step": 20208 }, { "epoch": 3.2989265744255336, "grad_norm": 3.1328885555267334, "learning_rate": 1.3327590848157158e-05, "loss": 0.4571, "step": 20209 }, { "epoch": 3.299089833068038, "grad_norm": 3.9939117431640625, "learning_rate": 1.332698623740598e-05, "loss": 0.4998, "step": 20210 }, { "epoch": 3.2992530917105425, "grad_norm": 2.761502981185913, "learning_rate": 1.3326381612978816e-05, "loss": 0.3823, "step": 20211 }, { "epoch": 3.299416350353047, "grad_norm": 3.1986849308013916, "learning_rate": 1.3325776974878156e-05, "loss": 0.4528, "step": 20212 }, { "epoch": 3.2995796089955514, "grad_norm": 2.8597421646118164, "learning_rate": 1.3325172323106478e-05, "loss": 0.4399, "step": 20213 }, { "epoch": 3.299742867638056, "grad_norm": 2.8124632835388184, "learning_rate": 1.3324567657666276e-05, "loss": 0.3683, "step": 20214 }, { "epoch": 3.29990612628056, "grad_norm": 2.9471778869628906, "learning_rate": 1.3323962978560033e-05, "loss": 0.4788, "step": 20215 }, { "epoch": 3.3000693849230642, "grad_norm": 3.155385732650757, "learning_rate": 1.332335828579023e-05, "loss": 0.4959, "step": 20216 }, { "epoch": 3.3002326435655687, "grad_norm": 3.3433072566986084, "learning_rate": 1.3322753579359357e-05, "loss": 0.4654, "step": 20217 }, { "epoch": 3.300395902208073, "grad_norm": 2.9529683589935303, "learning_rate": 1.3322148859269899e-05, "loss": 0.4528, "step": 20218 }, { "epoch": 3.3005591608505775, "grad_norm": 3.249473810195923, "learning_rate": 1.3321544125524341e-05, "loss": 0.4602, "step": 20219 }, { "epoch": 3.300722419493082, "grad_norm": 3.4922399520874023, "learning_rate": 1.332093937812517e-05, "loss": 0.5458, "step": 20220 }, { "epoch": 3.3008856781355864, "grad_norm": 3.1491384506225586, "learning_rate": 1.332033461707487e-05, "loss": 0.4047, "step": 20221 }, { "epoch": 3.301048936778091, "grad_norm": 3.65194034576416, "learning_rate": 1.3319729842375928e-05, "loss": 0.5085, "step": 20222 }, { "epoch": 3.301212195420595, "grad_norm": 4.04867696762085, "learning_rate": 1.331912505403083e-05, "loss": 0.5274, "step": 20223 }, { "epoch": 3.3013754540630993, "grad_norm": 3.405489444732666, "learning_rate": 1.3318520252042066e-05, "loss": 0.518, "step": 20224 }, { "epoch": 3.3015387127056037, "grad_norm": 3.2779059410095215, "learning_rate": 1.3317915436412116e-05, "loss": 0.462, "step": 20225 }, { "epoch": 3.301701971348108, "grad_norm": 3.045044422149658, "learning_rate": 1.3317310607143465e-05, "loss": 0.4528, "step": 20226 }, { "epoch": 3.3018652299906126, "grad_norm": 2.9256935119628906, "learning_rate": 1.3316705764238607e-05, "loss": 0.4684, "step": 20227 }, { "epoch": 3.302028488633117, "grad_norm": 3.5011892318725586, "learning_rate": 1.3316100907700023e-05, "loss": 0.5311, "step": 20228 }, { "epoch": 3.3021917472756215, "grad_norm": 3.5695409774780273, "learning_rate": 1.3315496037530201e-05, "loss": 0.5246, "step": 20229 }, { "epoch": 3.302355005918126, "grad_norm": 3.2844676971435547, "learning_rate": 1.3314891153731625e-05, "loss": 0.4514, "step": 20230 }, { "epoch": 3.3025182645606304, "grad_norm": 3.6529717445373535, "learning_rate": 1.3314286256306783e-05, "loss": 0.5138, "step": 20231 }, { "epoch": 3.302681523203135, "grad_norm": 3.235107421875, "learning_rate": 1.331368134525816e-05, "loss": 0.4612, "step": 20232 }, { "epoch": 3.302844781845639, "grad_norm": 3.081282615661621, "learning_rate": 1.3313076420588248e-05, "loss": 0.46, "step": 20233 }, { "epoch": 3.3030080404881432, "grad_norm": 2.9925801753997803, "learning_rate": 1.331247148229953e-05, "loss": 0.4558, "step": 20234 }, { "epoch": 3.3031712991306477, "grad_norm": 2.956479072570801, "learning_rate": 1.331186653039449e-05, "loss": 0.4392, "step": 20235 }, { "epoch": 3.303334557773152, "grad_norm": 2.966857671737671, "learning_rate": 1.331126156487562e-05, "loss": 0.45, "step": 20236 }, { "epoch": 3.3034978164156565, "grad_norm": 2.995553493499756, "learning_rate": 1.33106565857454e-05, "loss": 0.4583, "step": 20237 }, { "epoch": 3.303661075058161, "grad_norm": 2.746983766555786, "learning_rate": 1.331005159300632e-05, "loss": 0.4776, "step": 20238 }, { "epoch": 3.3038243337006654, "grad_norm": 3.3655498027801514, "learning_rate": 1.3309446586660875e-05, "loss": 0.5367, "step": 20239 }, { "epoch": 3.30398759234317, "grad_norm": 3.5386807918548584, "learning_rate": 1.330884156671154e-05, "loss": 0.4913, "step": 20240 }, { "epoch": 3.304150850985674, "grad_norm": 3.8381857872009277, "learning_rate": 1.3308236533160807e-05, "loss": 0.5219, "step": 20241 }, { "epoch": 3.3043141096281783, "grad_norm": 3.236085891723633, "learning_rate": 1.3307631486011163e-05, "loss": 0.4946, "step": 20242 }, { "epoch": 3.3044773682706827, "grad_norm": 3.2144081592559814, "learning_rate": 1.3307026425265094e-05, "loss": 0.4825, "step": 20243 }, { "epoch": 3.304640626913187, "grad_norm": 3.2453854084014893, "learning_rate": 1.3306421350925089e-05, "loss": 0.4655, "step": 20244 }, { "epoch": 3.3048038855556916, "grad_norm": 3.223959445953369, "learning_rate": 1.3305816262993636e-05, "loss": 0.4237, "step": 20245 }, { "epoch": 3.304967144198196, "grad_norm": 2.8643696308135986, "learning_rate": 1.3305211161473217e-05, "loss": 0.4244, "step": 20246 }, { "epoch": 3.3051304028407005, "grad_norm": 3.235731840133667, "learning_rate": 1.3304606046366324e-05, "loss": 0.449, "step": 20247 }, { "epoch": 3.305293661483205, "grad_norm": 3.2557051181793213, "learning_rate": 1.3304000917675446e-05, "loss": 0.4365, "step": 20248 }, { "epoch": 3.3054569201257094, "grad_norm": 3.476609468460083, "learning_rate": 1.3303395775403066e-05, "loss": 0.5301, "step": 20249 }, { "epoch": 3.305620178768214, "grad_norm": 3.0958163738250732, "learning_rate": 1.3302790619551673e-05, "loss": 0.3967, "step": 20250 }, { "epoch": 3.305783437410718, "grad_norm": 2.8203020095825195, "learning_rate": 1.3302185450123754e-05, "loss": 0.3861, "step": 20251 }, { "epoch": 3.3059466960532222, "grad_norm": 3.488858938217163, "learning_rate": 1.33015802671218e-05, "loss": 0.493, "step": 20252 }, { "epoch": 3.3061099546957267, "grad_norm": 3.365403652191162, "learning_rate": 1.3300975070548294e-05, "loss": 0.4813, "step": 20253 }, { "epoch": 3.306273213338231, "grad_norm": 2.5344319343566895, "learning_rate": 1.3300369860405726e-05, "loss": 0.4413, "step": 20254 }, { "epoch": 3.3064364719807355, "grad_norm": 2.7664904594421387, "learning_rate": 1.3299764636696587e-05, "loss": 0.4365, "step": 20255 }, { "epoch": 3.30659973062324, "grad_norm": 2.9117178916931152, "learning_rate": 1.3299159399423356e-05, "loss": 0.4259, "step": 20256 }, { "epoch": 3.3067629892657444, "grad_norm": 3.2987568378448486, "learning_rate": 1.329855414858853e-05, "loss": 0.5897, "step": 20257 }, { "epoch": 3.3069262479082484, "grad_norm": 3.4101481437683105, "learning_rate": 1.3297948884194591e-05, "loss": 0.4841, "step": 20258 }, { "epoch": 3.307089506550753, "grad_norm": 3.1412410736083984, "learning_rate": 1.3297343606244033e-05, "loss": 0.445, "step": 20259 }, { "epoch": 3.3072527651932573, "grad_norm": 3.0923097133636475, "learning_rate": 1.3296738314739339e-05, "loss": 0.4518, "step": 20260 }, { "epoch": 3.3074160238357617, "grad_norm": 3.638009548187256, "learning_rate": 1.3296133009683e-05, "loss": 0.467, "step": 20261 }, { "epoch": 3.307579282478266, "grad_norm": 2.9926609992980957, "learning_rate": 1.3295527691077499e-05, "loss": 0.4565, "step": 20262 }, { "epoch": 3.3077425411207706, "grad_norm": 2.5069351196289062, "learning_rate": 1.3294922358925333e-05, "loss": 0.3883, "step": 20263 }, { "epoch": 3.307905799763275, "grad_norm": 3.7258458137512207, "learning_rate": 1.3294317013228982e-05, "loss": 0.4773, "step": 20264 }, { "epoch": 3.3080690584057795, "grad_norm": 3.075918197631836, "learning_rate": 1.3293711653990935e-05, "loss": 0.423, "step": 20265 }, { "epoch": 3.308232317048284, "grad_norm": 3.369981527328491, "learning_rate": 1.3293106281213688e-05, "loss": 0.423, "step": 20266 }, { "epoch": 3.3083955756907883, "grad_norm": 3.2890007495880127, "learning_rate": 1.329250089489972e-05, "loss": 0.4628, "step": 20267 }, { "epoch": 3.3085588343332923, "grad_norm": 2.6232118606567383, "learning_rate": 1.329189549505153e-05, "loss": 0.3656, "step": 20268 }, { "epoch": 3.308722092975797, "grad_norm": 3.4711763858795166, "learning_rate": 1.3291290081671598e-05, "loss": 0.4941, "step": 20269 }, { "epoch": 3.308885351618301, "grad_norm": 3.6993696689605713, "learning_rate": 1.3290684654762415e-05, "loss": 0.5341, "step": 20270 }, { "epoch": 3.3090486102608057, "grad_norm": 3.1565487384796143, "learning_rate": 1.3290079214326465e-05, "loss": 0.4138, "step": 20271 }, { "epoch": 3.30921186890331, "grad_norm": 3.514949321746826, "learning_rate": 1.3289473760366249e-05, "loss": 0.4774, "step": 20272 }, { "epoch": 3.3093751275458145, "grad_norm": 3.6158740520477295, "learning_rate": 1.3288868292884247e-05, "loss": 0.5087, "step": 20273 }, { "epoch": 3.309538386188319, "grad_norm": 3.7257392406463623, "learning_rate": 1.3288262811882949e-05, "loss": 0.5698, "step": 20274 }, { "epoch": 3.3097016448308234, "grad_norm": 4.002843856811523, "learning_rate": 1.3287657317364842e-05, "loss": 0.5742, "step": 20275 }, { "epoch": 3.3098649034733274, "grad_norm": 3.4397013187408447, "learning_rate": 1.3287051809332417e-05, "loss": 0.5396, "step": 20276 }, { "epoch": 3.310028162115832, "grad_norm": 3.468596935272217, "learning_rate": 1.3286446287788164e-05, "loss": 0.4612, "step": 20277 }, { "epoch": 3.3101914207583363, "grad_norm": 2.9452497959136963, "learning_rate": 1.3285840752734574e-05, "loss": 0.3769, "step": 20278 }, { "epoch": 3.3103546794008407, "grad_norm": 3.4146618843078613, "learning_rate": 1.3285235204174131e-05, "loss": 0.5095, "step": 20279 }, { "epoch": 3.310517938043345, "grad_norm": 3.9889259338378906, "learning_rate": 1.3284629642109325e-05, "loss": 0.4882, "step": 20280 }, { "epoch": 3.3106811966858496, "grad_norm": 2.882305860519409, "learning_rate": 1.3284024066542652e-05, "loss": 0.4794, "step": 20281 }, { "epoch": 3.310844455328354, "grad_norm": 2.990696907043457, "learning_rate": 1.3283418477476589e-05, "loss": 0.4248, "step": 20282 }, { "epoch": 3.3110077139708585, "grad_norm": 3.2484233379364014, "learning_rate": 1.3282812874913637e-05, "loss": 0.4246, "step": 20283 }, { "epoch": 3.311170972613363, "grad_norm": 3.775848150253296, "learning_rate": 1.3282207258856281e-05, "loss": 0.5695, "step": 20284 }, { "epoch": 3.3113342312558673, "grad_norm": 3.301172971725464, "learning_rate": 1.3281601629307009e-05, "loss": 0.4292, "step": 20285 }, { "epoch": 3.3114974898983713, "grad_norm": 2.939081907272339, "learning_rate": 1.3280995986268313e-05, "loss": 0.4776, "step": 20286 }, { "epoch": 3.3116607485408758, "grad_norm": 2.916480302810669, "learning_rate": 1.3280390329742678e-05, "loss": 0.4793, "step": 20287 }, { "epoch": 3.31182400718338, "grad_norm": 2.78767466545105, "learning_rate": 1.32797846597326e-05, "loss": 0.4182, "step": 20288 }, { "epoch": 3.3119872658258847, "grad_norm": 4.187313556671143, "learning_rate": 1.3279178976240566e-05, "loss": 0.5606, "step": 20289 }, { "epoch": 3.312150524468389, "grad_norm": 3.219465970993042, "learning_rate": 1.3278573279269065e-05, "loss": 0.5135, "step": 20290 }, { "epoch": 3.3123137831108935, "grad_norm": 3.0636448860168457, "learning_rate": 1.3277967568820587e-05, "loss": 0.4263, "step": 20291 }, { "epoch": 3.312477041753398, "grad_norm": 3.0577797889709473, "learning_rate": 1.327736184489762e-05, "loss": 0.4673, "step": 20292 }, { "epoch": 3.312640300395902, "grad_norm": 3.282118797302246, "learning_rate": 1.3276756107502658e-05, "loss": 0.4688, "step": 20293 }, { "epoch": 3.3128035590384064, "grad_norm": 3.6026954650878906, "learning_rate": 1.327615035663819e-05, "loss": 0.5177, "step": 20294 }, { "epoch": 3.312966817680911, "grad_norm": 3.0784778594970703, "learning_rate": 1.3275544592306701e-05, "loss": 0.4277, "step": 20295 }, { "epoch": 3.3131300763234153, "grad_norm": 3.3743643760681152, "learning_rate": 1.327493881451069e-05, "loss": 0.4868, "step": 20296 }, { "epoch": 3.3132933349659197, "grad_norm": 3.1469738483428955, "learning_rate": 1.327433302325264e-05, "loss": 0.4556, "step": 20297 }, { "epoch": 3.313456593608424, "grad_norm": 3.654223918914795, "learning_rate": 1.3273727218535043e-05, "loss": 0.505, "step": 20298 }, { "epoch": 3.3136198522509286, "grad_norm": 3.406203031539917, "learning_rate": 1.327312140036039e-05, "loss": 0.437, "step": 20299 }, { "epoch": 3.313783110893433, "grad_norm": 2.746736526489258, "learning_rate": 1.327251556873117e-05, "loss": 0.412, "step": 20300 }, { "epoch": 3.3139463695359375, "grad_norm": 2.7587532997131348, "learning_rate": 1.3271909723649875e-05, "loss": 0.3826, "step": 20301 }, { "epoch": 3.314109628178442, "grad_norm": 2.7949705123901367, "learning_rate": 1.3271303865118994e-05, "loss": 0.4299, "step": 20302 }, { "epoch": 3.314272886820946, "grad_norm": 3.990457057952881, "learning_rate": 1.3270697993141018e-05, "loss": 0.5884, "step": 20303 }, { "epoch": 3.3144361454634503, "grad_norm": 2.8627090454101562, "learning_rate": 1.3270092107718437e-05, "loss": 0.5058, "step": 20304 }, { "epoch": 3.3145994041059548, "grad_norm": 3.343712091445923, "learning_rate": 1.3269486208853743e-05, "loss": 0.4355, "step": 20305 }, { "epoch": 3.314762662748459, "grad_norm": 3.985999345779419, "learning_rate": 1.3268880296549424e-05, "loss": 0.6308, "step": 20306 }, { "epoch": 3.3149259213909636, "grad_norm": 3.2093751430511475, "learning_rate": 1.3268274370807978e-05, "loss": 0.5043, "step": 20307 }, { "epoch": 3.315089180033468, "grad_norm": 3.609753370285034, "learning_rate": 1.3267668431631885e-05, "loss": 0.533, "step": 20308 }, { "epoch": 3.3152524386759725, "grad_norm": 2.5167250633239746, "learning_rate": 1.326706247902364e-05, "loss": 0.3822, "step": 20309 }, { "epoch": 3.315415697318477, "grad_norm": 2.7741897106170654, "learning_rate": 1.3266456512985739e-05, "loss": 0.4071, "step": 20310 }, { "epoch": 3.315578955960981, "grad_norm": 3.303283214569092, "learning_rate": 1.3265850533520666e-05, "loss": 0.4907, "step": 20311 }, { "epoch": 3.3157422146034854, "grad_norm": 3.1718878746032715, "learning_rate": 1.3265244540630917e-05, "loss": 0.4799, "step": 20312 }, { "epoch": 3.31590547324599, "grad_norm": 3.4306552410125732, "learning_rate": 1.3264638534318978e-05, "loss": 0.481, "step": 20313 }, { "epoch": 3.3160687318884943, "grad_norm": 3.0736629962921143, "learning_rate": 1.3264032514587343e-05, "loss": 0.463, "step": 20314 }, { "epoch": 3.3162319905309987, "grad_norm": 3.210404872894287, "learning_rate": 1.3263426481438501e-05, "loss": 0.4446, "step": 20315 }, { "epoch": 3.316395249173503, "grad_norm": 2.9318220615386963, "learning_rate": 1.3262820434874947e-05, "loss": 0.4191, "step": 20316 }, { "epoch": 3.3165585078160076, "grad_norm": 3.4726181030273438, "learning_rate": 1.3262214374899174e-05, "loss": 0.593, "step": 20317 }, { "epoch": 3.316721766458512, "grad_norm": 2.6866252422332764, "learning_rate": 1.3261608301513667e-05, "loss": 0.406, "step": 20318 }, { "epoch": 3.3168850251010165, "grad_norm": 3.0612285137176514, "learning_rate": 1.3261002214720917e-05, "loss": 0.4419, "step": 20319 }, { "epoch": 3.317048283743521, "grad_norm": 3.1303439140319824, "learning_rate": 1.326039611452342e-05, "loss": 0.4853, "step": 20320 }, { "epoch": 3.317211542386025, "grad_norm": 3.040731191635132, "learning_rate": 1.3259790000923665e-05, "loss": 0.4327, "step": 20321 }, { "epoch": 3.3173748010285293, "grad_norm": 3.2118115425109863, "learning_rate": 1.3259183873924146e-05, "loss": 0.4636, "step": 20322 }, { "epoch": 3.3175380596710338, "grad_norm": 2.8483588695526123, "learning_rate": 1.3258577733527352e-05, "loss": 0.4337, "step": 20323 }, { "epoch": 3.317701318313538, "grad_norm": 2.974384307861328, "learning_rate": 1.3257971579735776e-05, "loss": 0.4431, "step": 20324 }, { "epoch": 3.3178645769560426, "grad_norm": 3.524557113647461, "learning_rate": 1.3257365412551905e-05, "loss": 0.5624, "step": 20325 }, { "epoch": 3.318027835598547, "grad_norm": 2.8805043697357178, "learning_rate": 1.3256759231978237e-05, "loss": 0.4344, "step": 20326 }, { "epoch": 3.3181910942410515, "grad_norm": 3.29655385017395, "learning_rate": 1.3256153038017265e-05, "loss": 0.4772, "step": 20327 }, { "epoch": 3.318354352883556, "grad_norm": 3.349807024002075, "learning_rate": 1.3255546830671475e-05, "loss": 0.4719, "step": 20328 }, { "epoch": 3.31851761152606, "grad_norm": 2.9618496894836426, "learning_rate": 1.325494060994336e-05, "loss": 0.4281, "step": 20329 }, { "epoch": 3.3186808701685644, "grad_norm": 4.074008941650391, "learning_rate": 1.3254334375835414e-05, "loss": 0.5775, "step": 20330 }, { "epoch": 3.318844128811069, "grad_norm": 3.101086378097534, "learning_rate": 1.325372812835013e-05, "loss": 0.4282, "step": 20331 }, { "epoch": 3.3190073874535733, "grad_norm": 3.0930652618408203, "learning_rate": 1.3253121867489995e-05, "loss": 0.4321, "step": 20332 }, { "epoch": 3.3191706460960777, "grad_norm": 3.3366856575012207, "learning_rate": 1.3252515593257507e-05, "loss": 0.6018, "step": 20333 }, { "epoch": 3.319333904738582, "grad_norm": 2.6952571868896484, "learning_rate": 1.3251909305655155e-05, "loss": 0.3907, "step": 20334 }, { "epoch": 3.3194971633810866, "grad_norm": 2.6591553688049316, "learning_rate": 1.325130300468543e-05, "loss": 0.4294, "step": 20335 }, { "epoch": 3.319660422023591, "grad_norm": 2.723435878753662, "learning_rate": 1.3250696690350828e-05, "loss": 0.4262, "step": 20336 }, { "epoch": 3.3198236806660955, "grad_norm": 3.7031948566436768, "learning_rate": 1.3250090362653837e-05, "loss": 0.5072, "step": 20337 }, { "epoch": 3.3199869393086, "grad_norm": 3.0773062705993652, "learning_rate": 1.3249484021596955e-05, "loss": 0.4154, "step": 20338 }, { "epoch": 3.320150197951104, "grad_norm": 3.6056325435638428, "learning_rate": 1.3248877667182668e-05, "loss": 0.5323, "step": 20339 }, { "epoch": 3.3203134565936083, "grad_norm": 3.129716157913208, "learning_rate": 1.3248271299413476e-05, "loss": 0.443, "step": 20340 }, { "epoch": 3.3204767152361128, "grad_norm": 2.291065216064453, "learning_rate": 1.3247664918291862e-05, "loss": 0.3688, "step": 20341 }, { "epoch": 3.320639973878617, "grad_norm": 2.5873496532440186, "learning_rate": 1.3247058523820327e-05, "loss": 0.3651, "step": 20342 }, { "epoch": 3.3208032325211216, "grad_norm": 3.5985827445983887, "learning_rate": 1.324645211600136e-05, "loss": 0.4618, "step": 20343 }, { "epoch": 3.320966491163626, "grad_norm": 3.0347816944122314, "learning_rate": 1.3245845694837453e-05, "loss": 0.4434, "step": 20344 }, { "epoch": 3.3211297498061305, "grad_norm": 2.896211624145508, "learning_rate": 1.32452392603311e-05, "loss": 0.429, "step": 20345 }, { "epoch": 3.3212930084486345, "grad_norm": 3.3121085166931152, "learning_rate": 1.3244632812484795e-05, "loss": 0.4586, "step": 20346 }, { "epoch": 3.321456267091139, "grad_norm": 3.278790235519409, "learning_rate": 1.324402635130103e-05, "loss": 0.4765, "step": 20347 }, { "epoch": 3.3216195257336434, "grad_norm": 3.496870279312134, "learning_rate": 1.3243419876782296e-05, "loss": 0.4789, "step": 20348 }, { "epoch": 3.321782784376148, "grad_norm": 2.940321922302246, "learning_rate": 1.3242813388931089e-05, "loss": 0.4822, "step": 20349 }, { "epoch": 3.3219460430186523, "grad_norm": 3.178834915161133, "learning_rate": 1.32422068877499e-05, "loss": 0.4408, "step": 20350 }, { "epoch": 3.3221093016611567, "grad_norm": 3.0684802532196045, "learning_rate": 1.3241600373241226e-05, "loss": 0.4403, "step": 20351 }, { "epoch": 3.322272560303661, "grad_norm": 2.979098320007324, "learning_rate": 1.3240993845407552e-05, "loss": 0.4437, "step": 20352 }, { "epoch": 3.3224358189461656, "grad_norm": 2.691284656524658, "learning_rate": 1.3240387304251376e-05, "loss": 0.3773, "step": 20353 }, { "epoch": 3.32259907758867, "grad_norm": 3.047917366027832, "learning_rate": 1.3239780749775193e-05, "loss": 0.4937, "step": 20354 }, { "epoch": 3.3227623362311745, "grad_norm": 3.6856749057769775, "learning_rate": 1.3239174181981496e-05, "loss": 0.608, "step": 20355 }, { "epoch": 3.3229255948736784, "grad_norm": 2.5654942989349365, "learning_rate": 1.3238567600872776e-05, "loss": 0.3732, "step": 20356 }, { "epoch": 3.323088853516183, "grad_norm": 3.235417127609253, "learning_rate": 1.3237961006451527e-05, "loss": 0.4495, "step": 20357 }, { "epoch": 3.3232521121586873, "grad_norm": 3.4933102130889893, "learning_rate": 1.3237354398720244e-05, "loss": 0.4501, "step": 20358 }, { "epoch": 3.3234153708011918, "grad_norm": 3.054953098297119, "learning_rate": 1.3236747777681415e-05, "loss": 0.475, "step": 20359 }, { "epoch": 3.323578629443696, "grad_norm": 3.12031626701355, "learning_rate": 1.3236141143337542e-05, "loss": 0.4977, "step": 20360 }, { "epoch": 3.3237418880862006, "grad_norm": 2.7557952404022217, "learning_rate": 1.3235534495691118e-05, "loss": 0.4309, "step": 20361 }, { "epoch": 3.323905146728705, "grad_norm": 2.775475025177002, "learning_rate": 1.3234927834744628e-05, "loss": 0.3893, "step": 20362 }, { "epoch": 3.3240684053712095, "grad_norm": 3.224097728729248, "learning_rate": 1.3234321160500574e-05, "loss": 0.4374, "step": 20363 }, { "epoch": 3.3242316640137135, "grad_norm": 3.050978899002075, "learning_rate": 1.3233714472961446e-05, "loss": 0.5106, "step": 20364 }, { "epoch": 3.324394922656218, "grad_norm": 2.427551507949829, "learning_rate": 1.3233107772129737e-05, "loss": 0.4258, "step": 20365 }, { "epoch": 3.3245581812987224, "grad_norm": 2.657278537750244, "learning_rate": 1.3232501058007947e-05, "loss": 0.4561, "step": 20366 }, { "epoch": 3.324721439941227, "grad_norm": 3.103956460952759, "learning_rate": 1.3231894330598563e-05, "loss": 0.4471, "step": 20367 }, { "epoch": 3.3248846985837313, "grad_norm": 3.070413589477539, "learning_rate": 1.3231287589904081e-05, "loss": 0.3954, "step": 20368 }, { "epoch": 3.3250479572262357, "grad_norm": 3.309755563735962, "learning_rate": 1.3230680835926997e-05, "loss": 0.5206, "step": 20369 }, { "epoch": 3.32521121586874, "grad_norm": 3.0045268535614014, "learning_rate": 1.3230074068669802e-05, "loss": 0.4111, "step": 20370 }, { "epoch": 3.3253744745112446, "grad_norm": 2.9817962646484375, "learning_rate": 1.3229467288134997e-05, "loss": 0.5186, "step": 20371 }, { "epoch": 3.325537733153749, "grad_norm": 2.459367513656616, "learning_rate": 1.3228860494325067e-05, "loss": 0.4074, "step": 20372 }, { "epoch": 3.3257009917962534, "grad_norm": 3.8064608573913574, "learning_rate": 1.3228253687242511e-05, "loss": 0.504, "step": 20373 }, { "epoch": 3.3258642504387574, "grad_norm": 2.9322967529296875, "learning_rate": 1.3227646866889825e-05, "loss": 0.4583, "step": 20374 }, { "epoch": 3.326027509081262, "grad_norm": 3.2360715866088867, "learning_rate": 1.3227040033269501e-05, "loss": 0.4281, "step": 20375 }, { "epoch": 3.3261907677237663, "grad_norm": 3.3000898361206055, "learning_rate": 1.3226433186384033e-05, "loss": 0.379, "step": 20376 }, { "epoch": 3.3263540263662708, "grad_norm": 3.15998911857605, "learning_rate": 1.3225826326235918e-05, "loss": 0.4779, "step": 20377 }, { "epoch": 3.326517285008775, "grad_norm": 3.697899103164673, "learning_rate": 1.3225219452827646e-05, "loss": 0.5091, "step": 20378 }, { "epoch": 3.3266805436512796, "grad_norm": 3.231335163116455, "learning_rate": 1.322461256616172e-05, "loss": 0.4597, "step": 20379 }, { "epoch": 3.326843802293784, "grad_norm": 3.5594122409820557, "learning_rate": 1.3224005666240625e-05, "loss": 0.4987, "step": 20380 }, { "epoch": 3.3270070609362885, "grad_norm": 3.4969422817230225, "learning_rate": 1.322339875306686e-05, "loss": 0.4969, "step": 20381 }, { "epoch": 3.3271703195787925, "grad_norm": 3.648496627807617, "learning_rate": 1.3222791826642923e-05, "loss": 0.5691, "step": 20382 }, { "epoch": 3.327333578221297, "grad_norm": 3.98970890045166, "learning_rate": 1.3222184886971301e-05, "loss": 0.5993, "step": 20383 }, { "epoch": 3.3274968368638014, "grad_norm": 3.341841459274292, "learning_rate": 1.3221577934054498e-05, "loss": 0.452, "step": 20384 }, { "epoch": 3.327660095506306, "grad_norm": 3.475191116333008, "learning_rate": 1.3220970967895002e-05, "loss": 0.5038, "step": 20385 }, { "epoch": 3.3278233541488103, "grad_norm": 3.6326904296875, "learning_rate": 1.322036398849531e-05, "loss": 0.5307, "step": 20386 }, { "epoch": 3.3279866127913147, "grad_norm": 4.062152862548828, "learning_rate": 1.321975699585792e-05, "loss": 0.5219, "step": 20387 }, { "epoch": 3.328149871433819, "grad_norm": 3.0216665267944336, "learning_rate": 1.3219149989985325e-05, "loss": 0.4384, "step": 20388 }, { "epoch": 3.3283131300763236, "grad_norm": 3.833239793777466, "learning_rate": 1.3218542970880017e-05, "loss": 0.5734, "step": 20389 }, { "epoch": 3.328476388718828, "grad_norm": 4.091146469116211, "learning_rate": 1.3217935938544498e-05, "loss": 0.6047, "step": 20390 }, { "epoch": 3.328639647361332, "grad_norm": 3.019029140472412, "learning_rate": 1.3217328892981254e-05, "loss": 0.4786, "step": 20391 }, { "epoch": 3.3288029060038364, "grad_norm": 2.918217658996582, "learning_rate": 1.321672183419279e-05, "loss": 0.4015, "step": 20392 }, { "epoch": 3.328966164646341, "grad_norm": 3.8104512691497803, "learning_rate": 1.3216114762181595e-05, "loss": 0.5256, "step": 20393 }, { "epoch": 3.3291294232888453, "grad_norm": 3.4667694568634033, "learning_rate": 1.3215507676950168e-05, "loss": 0.4377, "step": 20394 }, { "epoch": 3.3292926819313498, "grad_norm": 3.3009567260742188, "learning_rate": 1.3214900578501004e-05, "loss": 0.4723, "step": 20395 }, { "epoch": 3.329455940573854, "grad_norm": 3.015211582183838, "learning_rate": 1.3214293466836599e-05, "loss": 0.4753, "step": 20396 }, { "epoch": 3.3296191992163586, "grad_norm": 3.389545440673828, "learning_rate": 1.3213686341959442e-05, "loss": 0.5093, "step": 20397 }, { "epoch": 3.329782457858863, "grad_norm": 2.6920461654663086, "learning_rate": 1.3213079203872036e-05, "loss": 0.411, "step": 20398 }, { "epoch": 3.329945716501367, "grad_norm": 2.8080639839172363, "learning_rate": 1.3212472052576874e-05, "loss": 0.4301, "step": 20399 }, { "epoch": 3.3301089751438715, "grad_norm": 2.9472827911376953, "learning_rate": 1.3211864888076458e-05, "loss": 0.5505, "step": 20400 }, { "epoch": 3.330272233786376, "grad_norm": 3.0576417446136475, "learning_rate": 1.3211257710373272e-05, "loss": 0.4399, "step": 20401 }, { "epoch": 3.3304354924288804, "grad_norm": 3.03926157951355, "learning_rate": 1.3210650519469821e-05, "loss": 0.4872, "step": 20402 }, { "epoch": 3.330598751071385, "grad_norm": 3.562934160232544, "learning_rate": 1.3210043315368594e-05, "loss": 0.39, "step": 20403 }, { "epoch": 3.3307620097138892, "grad_norm": 2.5403380393981934, "learning_rate": 1.3209436098072095e-05, "loss": 0.3658, "step": 20404 }, { "epoch": 3.3309252683563937, "grad_norm": 3.519495964050293, "learning_rate": 1.3208828867582817e-05, "loss": 0.501, "step": 20405 }, { "epoch": 3.331088526998898, "grad_norm": 2.9298815727233887, "learning_rate": 1.3208221623903255e-05, "loss": 0.4851, "step": 20406 }, { "epoch": 3.3312517856414026, "grad_norm": 3.4543113708496094, "learning_rate": 1.3207614367035904e-05, "loss": 0.4744, "step": 20407 }, { "epoch": 3.331415044283907, "grad_norm": 3.1935033798217773, "learning_rate": 1.3207007096983258e-05, "loss": 0.5041, "step": 20408 }, { "epoch": 3.331578302926411, "grad_norm": 2.9882636070251465, "learning_rate": 1.3206399813747819e-05, "loss": 0.4236, "step": 20409 }, { "epoch": 3.3317415615689154, "grad_norm": 2.8164286613464355, "learning_rate": 1.3205792517332086e-05, "loss": 0.4115, "step": 20410 }, { "epoch": 3.33190482021142, "grad_norm": 3.1284797191619873, "learning_rate": 1.3205185207738545e-05, "loss": 0.4779, "step": 20411 }, { "epoch": 3.3320680788539243, "grad_norm": 3.431095838546753, "learning_rate": 1.3204577884969699e-05, "loss": 0.5099, "step": 20412 }, { "epoch": 3.3322313374964287, "grad_norm": 3.459871530532837, "learning_rate": 1.3203970549028044e-05, "loss": 0.4897, "step": 20413 }, { "epoch": 3.332394596138933, "grad_norm": 3.807184934616089, "learning_rate": 1.3203363199916073e-05, "loss": 0.5075, "step": 20414 }, { "epoch": 3.3325578547814376, "grad_norm": 3.138381004333496, "learning_rate": 1.320275583763629e-05, "loss": 0.4696, "step": 20415 }, { "epoch": 3.332721113423942, "grad_norm": 2.9186367988586426, "learning_rate": 1.3202148462191185e-05, "loss": 0.445, "step": 20416 }, { "epoch": 3.332884372066446, "grad_norm": 3.9211995601654053, "learning_rate": 1.3201541073583258e-05, "loss": 0.4938, "step": 20417 }, { "epoch": 3.3330476307089505, "grad_norm": 3.1962485313415527, "learning_rate": 1.3200933671815002e-05, "loss": 0.4596, "step": 20418 }, { "epoch": 3.333210889351455, "grad_norm": 3.2070202827453613, "learning_rate": 1.3200326256888917e-05, "loss": 0.5048, "step": 20419 }, { "epoch": 3.3333741479939594, "grad_norm": 3.2838335037231445, "learning_rate": 1.3199718828807501e-05, "loss": 0.4157, "step": 20420 }, { "epoch": 3.333537406636464, "grad_norm": 2.616976261138916, "learning_rate": 1.3199111387573248e-05, "loss": 0.4044, "step": 20421 }, { "epoch": 3.3337006652789682, "grad_norm": 3.558887243270874, "learning_rate": 1.3198503933188655e-05, "loss": 0.5666, "step": 20422 }, { "epoch": 3.3338639239214727, "grad_norm": 2.794076442718506, "learning_rate": 1.319789646565622e-05, "loss": 0.4412, "step": 20423 }, { "epoch": 3.334027182563977, "grad_norm": 3.0348823070526123, "learning_rate": 1.3197288984978443e-05, "loss": 0.5397, "step": 20424 }, { "epoch": 3.3341904412064816, "grad_norm": 3.092175245285034, "learning_rate": 1.3196681491157816e-05, "loss": 0.4649, "step": 20425 }, { "epoch": 3.334353699848986, "grad_norm": 3.0948007106781006, "learning_rate": 1.319607398419684e-05, "loss": 0.4704, "step": 20426 }, { "epoch": 3.33451695849149, "grad_norm": 2.940093517303467, "learning_rate": 1.3195466464098008e-05, "loss": 0.4343, "step": 20427 }, { "epoch": 3.3346802171339944, "grad_norm": 2.9173591136932373, "learning_rate": 1.3194858930863822e-05, "loss": 0.4467, "step": 20428 }, { "epoch": 3.334843475776499, "grad_norm": 3.4820337295532227, "learning_rate": 1.3194251384496777e-05, "loss": 0.4706, "step": 20429 }, { "epoch": 3.3350067344190033, "grad_norm": 3.577399730682373, "learning_rate": 1.319364382499937e-05, "loss": 0.5573, "step": 20430 }, { "epoch": 3.3351699930615077, "grad_norm": 3.25667142868042, "learning_rate": 1.3193036252374101e-05, "loss": 0.4708, "step": 20431 }, { "epoch": 3.335333251704012, "grad_norm": 3.6648378372192383, "learning_rate": 1.3192428666623462e-05, "loss": 0.5776, "step": 20432 }, { "epoch": 3.3354965103465166, "grad_norm": 3.1210179328918457, "learning_rate": 1.3191821067749956e-05, "loss": 0.5225, "step": 20433 }, { "epoch": 3.3356597689890206, "grad_norm": 3.202880620956421, "learning_rate": 1.319121345575608e-05, "loss": 0.4869, "step": 20434 }, { "epoch": 3.335823027631525, "grad_norm": 3.299813747406006, "learning_rate": 1.3190605830644333e-05, "loss": 0.4975, "step": 20435 }, { "epoch": 3.3359862862740295, "grad_norm": 3.3882415294647217, "learning_rate": 1.3189998192417204e-05, "loss": 0.4906, "step": 20436 }, { "epoch": 3.336149544916534, "grad_norm": 2.7726407051086426, "learning_rate": 1.31893905410772e-05, "loss": 0.4238, "step": 20437 }, { "epoch": 3.3363128035590384, "grad_norm": 2.8713748455047607, "learning_rate": 1.3188782876626815e-05, "loss": 0.4203, "step": 20438 }, { "epoch": 3.336476062201543, "grad_norm": 3.380952835083008, "learning_rate": 1.3188175199068548e-05, "loss": 0.5045, "step": 20439 }, { "epoch": 3.3366393208440472, "grad_norm": 3.3536458015441895, "learning_rate": 1.31875675084049e-05, "loss": 0.4779, "step": 20440 }, { "epoch": 3.3368025794865517, "grad_norm": 3.417484998703003, "learning_rate": 1.318695980463836e-05, "loss": 0.4881, "step": 20441 }, { "epoch": 3.336965838129056, "grad_norm": 3.8345441818237305, "learning_rate": 1.3186352087771434e-05, "loss": 0.5696, "step": 20442 }, { "epoch": 3.3371290967715606, "grad_norm": 3.4418978691101074, "learning_rate": 1.3185744357806617e-05, "loss": 0.499, "step": 20443 }, { "epoch": 3.3372923554140645, "grad_norm": 2.9796392917633057, "learning_rate": 1.3185136614746412e-05, "loss": 0.4274, "step": 20444 }, { "epoch": 3.337455614056569, "grad_norm": 2.814166307449341, "learning_rate": 1.3184528858593311e-05, "loss": 0.4564, "step": 20445 }, { "epoch": 3.3376188726990734, "grad_norm": 3.2588589191436768, "learning_rate": 1.3183921089349811e-05, "loss": 0.4076, "step": 20446 }, { "epoch": 3.337782131341578, "grad_norm": 3.0417280197143555, "learning_rate": 1.3183313307018415e-05, "loss": 0.4511, "step": 20447 }, { "epoch": 3.3379453899840823, "grad_norm": 3.1699745655059814, "learning_rate": 1.318270551160162e-05, "loss": 0.4976, "step": 20448 }, { "epoch": 3.3381086486265867, "grad_norm": 3.1222646236419678, "learning_rate": 1.3182097703101926e-05, "loss": 0.4458, "step": 20449 }, { "epoch": 3.338271907269091, "grad_norm": 2.7759642601013184, "learning_rate": 1.3181489881521829e-05, "loss": 0.4213, "step": 20450 }, { "epoch": 3.3384351659115956, "grad_norm": 3.0586326122283936, "learning_rate": 1.318088204686383e-05, "loss": 0.4198, "step": 20451 }, { "epoch": 3.3385984245540996, "grad_norm": 3.0776619911193848, "learning_rate": 1.3180274199130424e-05, "loss": 0.4852, "step": 20452 }, { "epoch": 3.338761683196604, "grad_norm": 2.484868049621582, "learning_rate": 1.317966633832411e-05, "loss": 0.4408, "step": 20453 }, { "epoch": 3.3389249418391085, "grad_norm": 2.703899383544922, "learning_rate": 1.3179058464447392e-05, "loss": 0.4321, "step": 20454 }, { "epoch": 3.339088200481613, "grad_norm": 2.8860976696014404, "learning_rate": 1.3178450577502763e-05, "loss": 0.4943, "step": 20455 }, { "epoch": 3.3392514591241174, "grad_norm": 2.899872064590454, "learning_rate": 1.3177842677492724e-05, "loss": 0.43, "step": 20456 }, { "epoch": 3.339414717766622, "grad_norm": 3.324622631072998, "learning_rate": 1.3177234764419775e-05, "loss": 0.4978, "step": 20457 }, { "epoch": 3.3395779764091262, "grad_norm": 3.33660626411438, "learning_rate": 1.317662683828641e-05, "loss": 0.4934, "step": 20458 }, { "epoch": 3.3397412350516307, "grad_norm": 2.6076500415802, "learning_rate": 1.3176018899095136e-05, "loss": 0.4124, "step": 20459 }, { "epoch": 3.339904493694135, "grad_norm": 3.096788167953491, "learning_rate": 1.3175410946848446e-05, "loss": 0.4697, "step": 20460 }, { "epoch": 3.3400677523366396, "grad_norm": 3.4673197269439697, "learning_rate": 1.317480298154884e-05, "loss": 0.5229, "step": 20461 }, { "epoch": 3.3402310109791435, "grad_norm": 3.360476016998291, "learning_rate": 1.3174195003198818e-05, "loss": 0.4469, "step": 20462 }, { "epoch": 3.340394269621648, "grad_norm": 3.076777696609497, "learning_rate": 1.317358701180088e-05, "loss": 0.476, "step": 20463 }, { "epoch": 3.3405575282641524, "grad_norm": 2.7934579849243164, "learning_rate": 1.317297900735752e-05, "loss": 0.4914, "step": 20464 }, { "epoch": 3.340720786906657, "grad_norm": 3.188791036605835, "learning_rate": 1.3172370989871245e-05, "loss": 0.4721, "step": 20465 }, { "epoch": 3.3408840455491613, "grad_norm": 2.567772150039673, "learning_rate": 1.317176295934455e-05, "loss": 0.3948, "step": 20466 }, { "epoch": 3.3410473041916657, "grad_norm": 3.9920551776885986, "learning_rate": 1.3171154915779936e-05, "loss": 0.5389, "step": 20467 }, { "epoch": 3.34121056283417, "grad_norm": 2.999878168106079, "learning_rate": 1.31705468591799e-05, "loss": 0.4629, "step": 20468 }, { "epoch": 3.3413738214766746, "grad_norm": 3.514768362045288, "learning_rate": 1.3169938789546943e-05, "loss": 0.4924, "step": 20469 }, { "epoch": 3.3415370801191786, "grad_norm": 3.5500433444976807, "learning_rate": 1.3169330706883565e-05, "loss": 0.5482, "step": 20470 }, { "epoch": 3.341700338761683, "grad_norm": 3.089294672012329, "learning_rate": 1.3168722611192266e-05, "loss": 0.4546, "step": 20471 }, { "epoch": 3.3418635974041875, "grad_norm": 2.9605255126953125, "learning_rate": 1.3168114502475543e-05, "loss": 0.4263, "step": 20472 }, { "epoch": 3.342026856046692, "grad_norm": 3.5590784549713135, "learning_rate": 1.31675063807359e-05, "loss": 0.4954, "step": 20473 }, { "epoch": 3.3421901146891964, "grad_norm": 2.916736364364624, "learning_rate": 1.3166898245975834e-05, "loss": 0.4974, "step": 20474 }, { "epoch": 3.342353373331701, "grad_norm": 2.7514560222625732, "learning_rate": 1.3166290098197845e-05, "loss": 0.3954, "step": 20475 }, { "epoch": 3.3425166319742052, "grad_norm": 3.384892463684082, "learning_rate": 1.316568193740443e-05, "loss": 0.507, "step": 20476 }, { "epoch": 3.3426798906167097, "grad_norm": 3.015268564224243, "learning_rate": 1.3165073763598094e-05, "loss": 0.4825, "step": 20477 }, { "epoch": 3.342843149259214, "grad_norm": 3.1348040103912354, "learning_rate": 1.3164465576781336e-05, "loss": 0.7012, "step": 20478 }, { "epoch": 3.3430064079017185, "grad_norm": 2.978311538696289, "learning_rate": 1.3163857376956655e-05, "loss": 0.5052, "step": 20479 }, { "epoch": 3.3431696665442225, "grad_norm": 3.3574724197387695, "learning_rate": 1.3163249164126549e-05, "loss": 0.4238, "step": 20480 }, { "epoch": 3.343332925186727, "grad_norm": 3.545897960662842, "learning_rate": 1.316264093829352e-05, "loss": 0.6478, "step": 20481 }, { "epoch": 3.3434961838292314, "grad_norm": 2.6780800819396973, "learning_rate": 1.316203269946007e-05, "loss": 0.3779, "step": 20482 }, { "epoch": 3.343659442471736, "grad_norm": 3.503459930419922, "learning_rate": 1.3161424447628696e-05, "loss": 0.4948, "step": 20483 }, { "epoch": 3.3438227011142403, "grad_norm": 2.935373306274414, "learning_rate": 1.3160816182801904e-05, "loss": 0.4796, "step": 20484 }, { "epoch": 3.3439859597567447, "grad_norm": 3.411501884460449, "learning_rate": 1.3160207904982185e-05, "loss": 0.4307, "step": 20485 }, { "epoch": 3.344149218399249, "grad_norm": 2.457472324371338, "learning_rate": 1.3159599614172046e-05, "loss": 0.3581, "step": 20486 }, { "epoch": 3.344312477041753, "grad_norm": 3.041837215423584, "learning_rate": 1.3158991310373988e-05, "loss": 0.4735, "step": 20487 }, { "epoch": 3.3444757356842576, "grad_norm": 3.4065515995025635, "learning_rate": 1.3158382993590508e-05, "loss": 0.492, "step": 20488 }, { "epoch": 3.344638994326762, "grad_norm": 2.569849967956543, "learning_rate": 1.3157774663824109e-05, "loss": 0.4185, "step": 20489 }, { "epoch": 3.3448022529692665, "grad_norm": 2.827183961868286, "learning_rate": 1.3157166321077288e-05, "loss": 0.4186, "step": 20490 }, { "epoch": 3.344965511611771, "grad_norm": 3.19486665725708, "learning_rate": 1.3156557965352548e-05, "loss": 0.4402, "step": 20491 }, { "epoch": 3.3451287702542754, "grad_norm": 4.005911827087402, "learning_rate": 1.3155949596652393e-05, "loss": 0.489, "step": 20492 }, { "epoch": 3.34529202889678, "grad_norm": 3.3474233150482178, "learning_rate": 1.3155341214979323e-05, "loss": 0.4639, "step": 20493 }, { "epoch": 3.3454552875392842, "grad_norm": 2.7518627643585205, "learning_rate": 1.3154732820335833e-05, "loss": 0.4194, "step": 20494 }, { "epoch": 3.3456185461817887, "grad_norm": 2.843162775039673, "learning_rate": 1.315412441272443e-05, "loss": 0.4903, "step": 20495 }, { "epoch": 3.345781804824293, "grad_norm": 2.8930625915527344, "learning_rate": 1.3153515992147609e-05, "loss": 0.4436, "step": 20496 }, { "epoch": 3.345945063466797, "grad_norm": 3.063410758972168, "learning_rate": 1.3152907558607874e-05, "loss": 0.4759, "step": 20497 }, { "epoch": 3.3461083221093015, "grad_norm": 2.6151227951049805, "learning_rate": 1.3152299112107731e-05, "loss": 0.4354, "step": 20498 }, { "epoch": 3.346271580751806, "grad_norm": 3.2748653888702393, "learning_rate": 1.3151690652649676e-05, "loss": 0.4661, "step": 20499 }, { "epoch": 3.3464348393943104, "grad_norm": 3.363572835922241, "learning_rate": 1.315108218023621e-05, "loss": 0.4397, "step": 20500 }, { "epoch": 3.346598098036815, "grad_norm": 2.79217791557312, "learning_rate": 1.3150473694869834e-05, "loss": 0.4008, "step": 20501 }, { "epoch": 3.3467613566793193, "grad_norm": 4.395005702972412, "learning_rate": 1.3149865196553049e-05, "loss": 1.2678, "step": 20502 }, { "epoch": 3.3469246153218237, "grad_norm": 3.5669493675231934, "learning_rate": 1.3149256685288361e-05, "loss": 0.5368, "step": 20503 }, { "epoch": 3.347087873964328, "grad_norm": 3.0760231018066406, "learning_rate": 1.3148648161078266e-05, "loss": 0.4023, "step": 20504 }, { "epoch": 3.347251132606832, "grad_norm": 3.9271068572998047, "learning_rate": 1.3148039623925265e-05, "loss": 0.5259, "step": 20505 }, { "epoch": 3.3474143912493366, "grad_norm": 2.677027940750122, "learning_rate": 1.3147431073831864e-05, "loss": 0.4671, "step": 20506 }, { "epoch": 3.347577649891841, "grad_norm": 3.5123672485351562, "learning_rate": 1.3146822510800564e-05, "loss": 0.5493, "step": 20507 }, { "epoch": 3.3477409085343455, "grad_norm": 3.1211953163146973, "learning_rate": 1.3146213934833862e-05, "loss": 0.4462, "step": 20508 }, { "epoch": 3.34790416717685, "grad_norm": 3.525211811065674, "learning_rate": 1.3145605345934265e-05, "loss": 0.5394, "step": 20509 }, { "epoch": 3.3480674258193543, "grad_norm": 3.3282127380371094, "learning_rate": 1.3144996744104273e-05, "loss": 0.4597, "step": 20510 }, { "epoch": 3.348230684461859, "grad_norm": 2.850097894668579, "learning_rate": 1.3144388129346384e-05, "loss": 0.4589, "step": 20511 }, { "epoch": 3.3483939431043632, "grad_norm": 3.3005051612854004, "learning_rate": 1.3143779501663103e-05, "loss": 0.5106, "step": 20512 }, { "epoch": 3.3485572017468677, "grad_norm": 3.2480671405792236, "learning_rate": 1.3143170861056932e-05, "loss": 0.4659, "step": 20513 }, { "epoch": 3.348720460389372, "grad_norm": 3.582977056503296, "learning_rate": 1.3142562207530373e-05, "loss": 0.6007, "step": 20514 }, { "epoch": 3.348883719031876, "grad_norm": 3.4807920455932617, "learning_rate": 1.3141953541085928e-05, "loss": 0.4679, "step": 20515 }, { "epoch": 3.3490469776743805, "grad_norm": 3.2633705139160156, "learning_rate": 1.3141344861726095e-05, "loss": 0.4711, "step": 20516 }, { "epoch": 3.349210236316885, "grad_norm": 2.7393648624420166, "learning_rate": 1.3140736169453382e-05, "loss": 0.412, "step": 20517 }, { "epoch": 3.3493734949593894, "grad_norm": 3.405377149581909, "learning_rate": 1.3140127464270289e-05, "loss": 0.5223, "step": 20518 }, { "epoch": 3.349536753601894, "grad_norm": 2.7392237186431885, "learning_rate": 1.3139518746179319e-05, "loss": 0.4314, "step": 20519 }, { "epoch": 3.3497000122443983, "grad_norm": 3.081397771835327, "learning_rate": 1.313891001518297e-05, "loss": 0.4069, "step": 20520 }, { "epoch": 3.3498632708869027, "grad_norm": 3.800421714782715, "learning_rate": 1.3138301271283749e-05, "loss": 0.5288, "step": 20521 }, { "epoch": 3.3500265295294067, "grad_norm": 2.926055908203125, "learning_rate": 1.3137692514484156e-05, "loss": 0.4612, "step": 20522 }, { "epoch": 3.350189788171911, "grad_norm": 3.550858497619629, "learning_rate": 1.3137083744786694e-05, "loss": 0.4732, "step": 20523 }, { "epoch": 3.3503530468144156, "grad_norm": 3.358257293701172, "learning_rate": 1.3136474962193862e-05, "loss": 0.4953, "step": 20524 }, { "epoch": 3.35051630545692, "grad_norm": 2.9819629192352295, "learning_rate": 1.3135866166708169e-05, "loss": 0.5203, "step": 20525 }, { "epoch": 3.3506795640994245, "grad_norm": 2.7554333209991455, "learning_rate": 1.3135257358332115e-05, "loss": 0.4818, "step": 20526 }, { "epoch": 3.350842822741929, "grad_norm": 3.0448901653289795, "learning_rate": 1.3134648537068197e-05, "loss": 0.433, "step": 20527 }, { "epoch": 3.3510060813844333, "grad_norm": 3.0299508571624756, "learning_rate": 1.3134039702918931e-05, "loss": 0.4155, "step": 20528 }, { "epoch": 3.351169340026938, "grad_norm": 3.0642404556274414, "learning_rate": 1.3133430855886804e-05, "loss": 0.4964, "step": 20529 }, { "epoch": 3.351332598669442, "grad_norm": 3.067617893218994, "learning_rate": 1.3132821995974328e-05, "loss": 0.4315, "step": 20530 }, { "epoch": 3.3514958573119467, "grad_norm": 3.0723659992218018, "learning_rate": 1.3132213123184003e-05, "loss": 0.4484, "step": 20531 }, { "epoch": 3.3516591159544507, "grad_norm": 3.1247551441192627, "learning_rate": 1.3131604237518336e-05, "loss": 0.414, "step": 20532 }, { "epoch": 3.351822374596955, "grad_norm": 2.7805473804473877, "learning_rate": 1.3130995338979824e-05, "loss": 0.4143, "step": 20533 }, { "epoch": 3.3519856332394595, "grad_norm": 2.7049102783203125, "learning_rate": 1.3130386427570972e-05, "loss": 0.4016, "step": 20534 }, { "epoch": 3.352148891881964, "grad_norm": 3.1359527111053467, "learning_rate": 1.3129777503294282e-05, "loss": 0.4386, "step": 20535 }, { "epoch": 3.3523121505244684, "grad_norm": 2.730757713317871, "learning_rate": 1.312916856615226e-05, "loss": 0.4095, "step": 20536 }, { "epoch": 3.352475409166973, "grad_norm": 3.1314759254455566, "learning_rate": 1.312855961614741e-05, "loss": 0.47, "step": 20537 }, { "epoch": 3.3526386678094773, "grad_norm": 3.6252846717834473, "learning_rate": 1.3127950653282228e-05, "loss": 0.4953, "step": 20538 }, { "epoch": 3.3528019264519817, "grad_norm": 2.7143614292144775, "learning_rate": 1.3127341677559226e-05, "loss": 0.4235, "step": 20539 }, { "epoch": 3.3529651850944857, "grad_norm": 3.5298104286193848, "learning_rate": 1.31267326889809e-05, "loss": 0.5982, "step": 20540 }, { "epoch": 3.35312844373699, "grad_norm": 3.6981475353240967, "learning_rate": 1.3126123687549756e-05, "loss": 0.518, "step": 20541 }, { "epoch": 3.3532917023794946, "grad_norm": 3.503981113433838, "learning_rate": 1.3125514673268302e-05, "loss": 0.4826, "step": 20542 }, { "epoch": 3.353454961021999, "grad_norm": 3.3238766193389893, "learning_rate": 1.3124905646139037e-05, "loss": 0.5035, "step": 20543 }, { "epoch": 3.3536182196645035, "grad_norm": 3.209169626235962, "learning_rate": 1.3124296606164465e-05, "loss": 0.4936, "step": 20544 }, { "epoch": 3.353781478307008, "grad_norm": 2.8329312801361084, "learning_rate": 1.3123687553347086e-05, "loss": 0.3647, "step": 20545 }, { "epoch": 3.3539447369495123, "grad_norm": 3.1063969135284424, "learning_rate": 1.3123078487689407e-05, "loss": 0.4383, "step": 20546 }, { "epoch": 3.3541079955920168, "grad_norm": 2.8634400367736816, "learning_rate": 1.3122469409193935e-05, "loss": 0.4534, "step": 20547 }, { "epoch": 3.354271254234521, "grad_norm": 3.1086513996124268, "learning_rate": 1.3121860317863169e-05, "loss": 0.4876, "step": 20548 }, { "epoch": 3.3544345128770257, "grad_norm": 3.1143007278442383, "learning_rate": 1.3121251213699613e-05, "loss": 0.5297, "step": 20549 }, { "epoch": 3.3545977715195296, "grad_norm": 3.0104687213897705, "learning_rate": 1.3120642096705773e-05, "loss": 0.4764, "step": 20550 }, { "epoch": 3.354761030162034, "grad_norm": 3.202542781829834, "learning_rate": 1.3120032966884151e-05, "loss": 0.4824, "step": 20551 }, { "epoch": 3.3549242888045385, "grad_norm": 3.124997854232788, "learning_rate": 1.3119423824237249e-05, "loss": 0.4095, "step": 20552 }, { "epoch": 3.355087547447043, "grad_norm": 3.339022636413574, "learning_rate": 1.311881466876758e-05, "loss": 0.5234, "step": 20553 }, { "epoch": 3.3552508060895474, "grad_norm": 3.335303544998169, "learning_rate": 1.3118205500477636e-05, "loss": 0.5569, "step": 20554 }, { "epoch": 3.355414064732052, "grad_norm": 3.331613779067993, "learning_rate": 1.3117596319369928e-05, "loss": 0.4215, "step": 20555 }, { "epoch": 3.3555773233745563, "grad_norm": 3.6319708824157715, "learning_rate": 1.3116987125446958e-05, "loss": 0.5364, "step": 20556 }, { "epoch": 3.3557405820170607, "grad_norm": 2.8037049770355225, "learning_rate": 1.3116377918711232e-05, "loss": 0.3958, "step": 20557 }, { "epoch": 3.3559038406595647, "grad_norm": 3.482405424118042, "learning_rate": 1.3115768699165255e-05, "loss": 0.4544, "step": 20558 }, { "epoch": 3.356067099302069, "grad_norm": 3.481168031692505, "learning_rate": 1.3115159466811527e-05, "loss": 0.5298, "step": 20559 }, { "epoch": 3.3562303579445736, "grad_norm": 3.4681625366210938, "learning_rate": 1.3114550221652554e-05, "loss": 0.4974, "step": 20560 }, { "epoch": 3.356393616587078, "grad_norm": 2.9463160037994385, "learning_rate": 1.3113940963690844e-05, "loss": 0.4463, "step": 20561 }, { "epoch": 3.3565568752295825, "grad_norm": 3.0163278579711914, "learning_rate": 1.3113331692928894e-05, "loss": 0.4358, "step": 20562 }, { "epoch": 3.356720133872087, "grad_norm": 3.5075936317443848, "learning_rate": 1.3112722409369217e-05, "loss": 0.5206, "step": 20563 }, { "epoch": 3.3568833925145913, "grad_norm": 3.6630711555480957, "learning_rate": 1.311211311301431e-05, "loss": 0.533, "step": 20564 }, { "epoch": 3.3570466511570958, "grad_norm": 2.7247021198272705, "learning_rate": 1.3111503803866686e-05, "loss": 0.3816, "step": 20565 }, { "epoch": 3.3572099097996, "grad_norm": 3.03385853767395, "learning_rate": 1.3110894481928842e-05, "loss": 0.4323, "step": 20566 }, { "epoch": 3.3573731684421046, "grad_norm": 2.9907848834991455, "learning_rate": 1.3110285147203285e-05, "loss": 0.4247, "step": 20567 }, { "epoch": 3.3575364270846086, "grad_norm": 4.153687953948975, "learning_rate": 1.310967579969252e-05, "loss": 0.6591, "step": 20568 }, { "epoch": 3.357699685727113, "grad_norm": 3.740328311920166, "learning_rate": 1.3109066439399051e-05, "loss": 0.5982, "step": 20569 }, { "epoch": 3.3578629443696175, "grad_norm": 3.0265681743621826, "learning_rate": 1.3108457066325386e-05, "loss": 0.4355, "step": 20570 }, { "epoch": 3.358026203012122, "grad_norm": 3.6317355632781982, "learning_rate": 1.3107847680474029e-05, "loss": 0.5454, "step": 20571 }, { "epoch": 3.3581894616546264, "grad_norm": 2.5410776138305664, "learning_rate": 1.3107238281847483e-05, "loss": 0.417, "step": 20572 }, { "epoch": 3.358352720297131, "grad_norm": 3.1350200176239014, "learning_rate": 1.310662887044825e-05, "loss": 0.4365, "step": 20573 }, { "epoch": 3.3585159789396353, "grad_norm": 3.546335220336914, "learning_rate": 1.310601944627884e-05, "loss": 0.5264, "step": 20574 }, { "epoch": 3.3586792375821393, "grad_norm": 3.8315587043762207, "learning_rate": 1.310541000934176e-05, "loss": 0.6076, "step": 20575 }, { "epoch": 3.3588424962246437, "grad_norm": 3.0866732597351074, "learning_rate": 1.310480055963951e-05, "loss": 0.4385, "step": 20576 }, { "epoch": 3.359005754867148, "grad_norm": 2.7726030349731445, "learning_rate": 1.3104191097174599e-05, "loss": 0.402, "step": 20577 }, { "epoch": 3.3591690135096526, "grad_norm": 3.4537131786346436, "learning_rate": 1.3103581621949531e-05, "loss": 0.5493, "step": 20578 }, { "epoch": 3.359332272152157, "grad_norm": 3.593494415283203, "learning_rate": 1.3102972133966805e-05, "loss": 0.5469, "step": 20579 }, { "epoch": 3.3594955307946615, "grad_norm": 3.157968521118164, "learning_rate": 1.3102362633228936e-05, "loss": 0.4628, "step": 20580 }, { "epoch": 3.359658789437166, "grad_norm": 4.067269325256348, "learning_rate": 1.3101753119738428e-05, "loss": 0.544, "step": 20581 }, { "epoch": 3.3598220480796703, "grad_norm": 3.2908449172973633, "learning_rate": 1.310114359349778e-05, "loss": 0.4376, "step": 20582 }, { "epoch": 3.3599853067221748, "grad_norm": 3.041130781173706, "learning_rate": 1.3100534054509507e-05, "loss": 0.4353, "step": 20583 }, { "epoch": 3.360148565364679, "grad_norm": 2.94636869430542, "learning_rate": 1.3099924502776103e-05, "loss": 0.4355, "step": 20584 }, { "epoch": 3.360311824007183, "grad_norm": 2.902825355529785, "learning_rate": 1.309931493830008e-05, "loss": 0.4339, "step": 20585 }, { "epoch": 3.3604750826496876, "grad_norm": 2.9503440856933594, "learning_rate": 1.309870536108395e-05, "loss": 0.3931, "step": 20586 }, { "epoch": 3.360638341292192, "grad_norm": 2.3461716175079346, "learning_rate": 1.3098095771130208e-05, "loss": 0.3996, "step": 20587 }, { "epoch": 3.3608015999346965, "grad_norm": 3.8262832164764404, "learning_rate": 1.3097486168441364e-05, "loss": 0.5704, "step": 20588 }, { "epoch": 3.360964858577201, "grad_norm": 3.656026601791382, "learning_rate": 1.3096876553019926e-05, "loss": 0.4734, "step": 20589 }, { "epoch": 3.3611281172197054, "grad_norm": 4.077394485473633, "learning_rate": 1.3096266924868393e-05, "loss": 0.8782, "step": 20590 }, { "epoch": 3.36129137586221, "grad_norm": 3.6414990425109863, "learning_rate": 1.3095657283989281e-05, "loss": 0.522, "step": 20591 }, { "epoch": 3.3614546345047143, "grad_norm": 3.0439565181732178, "learning_rate": 1.3095047630385088e-05, "loss": 0.4876, "step": 20592 }, { "epoch": 3.3616178931472183, "grad_norm": 3.309283494949341, "learning_rate": 1.3094437964058324e-05, "loss": 0.4934, "step": 20593 }, { "epoch": 3.3617811517897227, "grad_norm": 2.6282739639282227, "learning_rate": 1.3093828285011494e-05, "loss": 0.4386, "step": 20594 }, { "epoch": 3.361944410432227, "grad_norm": 3.443605899810791, "learning_rate": 1.3093218593247105e-05, "loss": 0.474, "step": 20595 }, { "epoch": 3.3621076690747316, "grad_norm": 3.2771284580230713, "learning_rate": 1.3092608888767662e-05, "loss": 0.4998, "step": 20596 }, { "epoch": 3.362270927717236, "grad_norm": 3.8871238231658936, "learning_rate": 1.309199917157567e-05, "loss": 0.5763, "step": 20597 }, { "epoch": 3.3624341863597405, "grad_norm": 3.0681819915771484, "learning_rate": 1.309138944167364e-05, "loss": 0.5008, "step": 20598 }, { "epoch": 3.362597445002245, "grad_norm": 3.3487532138824463, "learning_rate": 1.3090779699064072e-05, "loss": 0.5907, "step": 20599 }, { "epoch": 3.3627607036447493, "grad_norm": 3.3369791507720947, "learning_rate": 1.3090169943749475e-05, "loss": 0.4562, "step": 20600 }, { "epoch": 3.3629239622872538, "grad_norm": 3.1166391372680664, "learning_rate": 1.3089560175732358e-05, "loss": 0.466, "step": 20601 }, { "epoch": 3.363087220929758, "grad_norm": 3.7836883068084717, "learning_rate": 1.3088950395015227e-05, "loss": 0.5734, "step": 20602 }, { "epoch": 3.363250479572262, "grad_norm": 3.0091612339019775, "learning_rate": 1.3088340601600584e-05, "loss": 0.4961, "step": 20603 }, { "epoch": 3.3634137382147666, "grad_norm": 3.458437442779541, "learning_rate": 1.3087730795490942e-05, "loss": 0.5008, "step": 20604 }, { "epoch": 3.363576996857271, "grad_norm": 3.9409892559051514, "learning_rate": 1.3087120976688806e-05, "loss": 0.6248, "step": 20605 }, { "epoch": 3.3637402554997755, "grad_norm": 3.6338813304901123, "learning_rate": 1.3086511145196676e-05, "loss": 0.5276, "step": 20606 }, { "epoch": 3.36390351414228, "grad_norm": 2.769972801208496, "learning_rate": 1.3085901301017068e-05, "loss": 0.4424, "step": 20607 }, { "epoch": 3.3640667727847844, "grad_norm": 3.0261847972869873, "learning_rate": 1.3085291444152483e-05, "loss": 0.5312, "step": 20608 }, { "epoch": 3.364230031427289, "grad_norm": 3.5796754360198975, "learning_rate": 1.3084681574605432e-05, "loss": 0.5238, "step": 20609 }, { "epoch": 3.3643932900697933, "grad_norm": 2.806347608566284, "learning_rate": 1.3084071692378419e-05, "loss": 0.4372, "step": 20610 }, { "epoch": 3.3645565487122973, "grad_norm": 3.582352876663208, "learning_rate": 1.3083461797473953e-05, "loss": 0.4876, "step": 20611 }, { "epoch": 3.3647198073548017, "grad_norm": 3.467020273208618, "learning_rate": 1.3082851889894536e-05, "loss": 0.5425, "step": 20612 }, { "epoch": 3.364883065997306, "grad_norm": 3.292027473449707, "learning_rate": 1.3082241969642681e-05, "loss": 0.5286, "step": 20613 }, { "epoch": 3.3650463246398106, "grad_norm": 2.8236756324768066, "learning_rate": 1.3081632036720895e-05, "loss": 0.4227, "step": 20614 }, { "epoch": 3.365209583282315, "grad_norm": 3.5830705165863037, "learning_rate": 1.3081022091131681e-05, "loss": 0.5362, "step": 20615 }, { "epoch": 3.3653728419248194, "grad_norm": 2.767718553543091, "learning_rate": 1.3080412132877552e-05, "loss": 0.4445, "step": 20616 }, { "epoch": 3.365536100567324, "grad_norm": 2.8602371215820312, "learning_rate": 1.3079802161961006e-05, "loss": 0.4349, "step": 20617 }, { "epoch": 3.3656993592098283, "grad_norm": 3.148796796798706, "learning_rate": 1.307919217838456e-05, "loss": 0.4782, "step": 20618 }, { "epoch": 3.3658626178523328, "grad_norm": 3.5922691822052, "learning_rate": 1.3078582182150716e-05, "loss": 0.4977, "step": 20619 }, { "epoch": 3.3660258764948368, "grad_norm": 3.5212554931640625, "learning_rate": 1.3077972173261983e-05, "loss": 0.518, "step": 20620 }, { "epoch": 3.366189135137341, "grad_norm": 2.9631447792053223, "learning_rate": 1.307736215172087e-05, "loss": 0.4404, "step": 20621 }, { "epoch": 3.3663523937798456, "grad_norm": 2.8629274368286133, "learning_rate": 1.3076752117529886e-05, "loss": 0.4739, "step": 20622 }, { "epoch": 3.36651565242235, "grad_norm": 2.9854068756103516, "learning_rate": 1.3076142070691532e-05, "loss": 0.4774, "step": 20623 }, { "epoch": 3.3666789110648545, "grad_norm": 2.8650100231170654, "learning_rate": 1.3075532011208319e-05, "loss": 0.4617, "step": 20624 }, { "epoch": 3.366842169707359, "grad_norm": 2.8154799938201904, "learning_rate": 1.3074921939082757e-05, "loss": 0.4771, "step": 20625 }, { "epoch": 3.3670054283498634, "grad_norm": 3.4104397296905518, "learning_rate": 1.3074311854317352e-05, "loss": 0.489, "step": 20626 }, { "epoch": 3.367168686992368, "grad_norm": 2.61375093460083, "learning_rate": 1.3073701756914613e-05, "loss": 0.418, "step": 20627 }, { "epoch": 3.367331945634872, "grad_norm": 3.5025088787078857, "learning_rate": 1.3073091646877043e-05, "loss": 0.5271, "step": 20628 }, { "epoch": 3.3674952042773763, "grad_norm": 3.1869723796844482, "learning_rate": 1.3072481524207153e-05, "loss": 0.4597, "step": 20629 }, { "epoch": 3.3676584629198807, "grad_norm": 4.289979934692383, "learning_rate": 1.3071871388907456e-05, "loss": 0.5953, "step": 20630 }, { "epoch": 3.367821721562385, "grad_norm": 2.955660820007324, "learning_rate": 1.3071261240980455e-05, "loss": 0.4561, "step": 20631 }, { "epoch": 3.3679849802048896, "grad_norm": 3.402818202972412, "learning_rate": 1.3070651080428656e-05, "loss": 0.5202, "step": 20632 }, { "epoch": 3.368148238847394, "grad_norm": 3.5266404151916504, "learning_rate": 1.307004090725457e-05, "loss": 0.5473, "step": 20633 }, { "epoch": 3.3683114974898984, "grad_norm": 3.386272668838501, "learning_rate": 1.3069430721460707e-05, "loss": 0.4325, "step": 20634 }, { "epoch": 3.368474756132403, "grad_norm": 3.5082223415374756, "learning_rate": 1.3068820523049573e-05, "loss": 0.4652, "step": 20635 }, { "epoch": 3.3686380147749073, "grad_norm": 3.1811130046844482, "learning_rate": 1.3068210312023677e-05, "loss": 0.5013, "step": 20636 }, { "epoch": 3.3688012734174118, "grad_norm": 3.556056022644043, "learning_rate": 1.3067600088385527e-05, "loss": 0.4712, "step": 20637 }, { "epoch": 3.3689645320599158, "grad_norm": 3.492827892303467, "learning_rate": 1.306698985213763e-05, "loss": 0.4471, "step": 20638 }, { "epoch": 3.36912779070242, "grad_norm": 3.806081771850586, "learning_rate": 1.3066379603282495e-05, "loss": 0.5533, "step": 20639 }, { "epoch": 3.3692910493449246, "grad_norm": 2.8969664573669434, "learning_rate": 1.3065769341822634e-05, "loss": 0.4021, "step": 20640 }, { "epoch": 3.369454307987429, "grad_norm": 3.419210433959961, "learning_rate": 1.3065159067760551e-05, "loss": 0.5194, "step": 20641 }, { "epoch": 3.3696175666299335, "grad_norm": 3.7263026237487793, "learning_rate": 1.3064548781098755e-05, "loss": 0.5713, "step": 20642 }, { "epoch": 3.369780825272438, "grad_norm": 3.1187429428100586, "learning_rate": 1.306393848183976e-05, "loss": 0.4275, "step": 20643 }, { "epoch": 3.3699440839149424, "grad_norm": 2.9303972721099854, "learning_rate": 1.3063328169986069e-05, "loss": 0.4749, "step": 20644 }, { "epoch": 3.370107342557447, "grad_norm": 3.6372146606445312, "learning_rate": 1.3062717845540192e-05, "loss": 0.5284, "step": 20645 }, { "epoch": 3.370270601199951, "grad_norm": 3.350681781768799, "learning_rate": 1.3062107508504638e-05, "loss": 0.5217, "step": 20646 }, { "epoch": 3.3704338598424552, "grad_norm": 2.9459757804870605, "learning_rate": 1.3061497158881917e-05, "loss": 0.4465, "step": 20647 }, { "epoch": 3.3705971184849597, "grad_norm": 3.691387414932251, "learning_rate": 1.3060886796674539e-05, "loss": 0.5663, "step": 20648 }, { "epoch": 3.370760377127464, "grad_norm": 3.127098560333252, "learning_rate": 1.306027642188501e-05, "loss": 0.4436, "step": 20649 }, { "epoch": 3.3709236357699686, "grad_norm": 2.5489747524261475, "learning_rate": 1.3059666034515839e-05, "loss": 0.3704, "step": 20650 }, { "epoch": 3.371086894412473, "grad_norm": 2.8590140342712402, "learning_rate": 1.3059055634569535e-05, "loss": 0.4269, "step": 20651 }, { "epoch": 3.3712501530549774, "grad_norm": 3.257765293121338, "learning_rate": 1.3058445222048613e-05, "loss": 0.4541, "step": 20652 }, { "epoch": 3.371413411697482, "grad_norm": 3.013187885284424, "learning_rate": 1.3057834796955574e-05, "loss": 0.5105, "step": 20653 }, { "epoch": 3.3715766703399863, "grad_norm": 3.4817073345184326, "learning_rate": 1.3057224359292931e-05, "loss": 0.5236, "step": 20654 }, { "epoch": 3.3717399289824908, "grad_norm": 3.246340274810791, "learning_rate": 1.3056613909063196e-05, "loss": 0.5134, "step": 20655 }, { "epoch": 3.3719031876249947, "grad_norm": 3.1076741218566895, "learning_rate": 1.305600344626887e-05, "loss": 0.4797, "step": 20656 }, { "epoch": 3.372066446267499, "grad_norm": 3.2610626220703125, "learning_rate": 1.305539297091247e-05, "loss": 0.5048, "step": 20657 }, { "epoch": 3.3722297049100036, "grad_norm": 3.4268624782562256, "learning_rate": 1.3054782482996504e-05, "loss": 0.5309, "step": 20658 }, { "epoch": 3.372392963552508, "grad_norm": 3.207686185836792, "learning_rate": 1.305417198252348e-05, "loss": 0.3814, "step": 20659 }, { "epoch": 3.3725562221950125, "grad_norm": 3.339564323425293, "learning_rate": 1.3053561469495912e-05, "loss": 0.4994, "step": 20660 }, { "epoch": 3.372719480837517, "grad_norm": 3.0934841632843018, "learning_rate": 1.30529509439163e-05, "loss": 0.4762, "step": 20661 }, { "epoch": 3.3728827394800214, "grad_norm": 3.578558921813965, "learning_rate": 1.3052340405787162e-05, "loss": 0.4816, "step": 20662 }, { "epoch": 3.3730459981225254, "grad_norm": 3.1666781902313232, "learning_rate": 1.3051729855111006e-05, "loss": 0.5737, "step": 20663 }, { "epoch": 3.37320925676503, "grad_norm": 3.3128952980041504, "learning_rate": 1.305111929189034e-05, "loss": 0.4484, "step": 20664 }, { "epoch": 3.3733725154075342, "grad_norm": 3.8038697242736816, "learning_rate": 1.3050508716127676e-05, "loss": 0.4634, "step": 20665 }, { "epoch": 3.3735357740500387, "grad_norm": 3.1195476055145264, "learning_rate": 1.3049898127825522e-05, "loss": 0.4734, "step": 20666 }, { "epoch": 3.373699032692543, "grad_norm": 2.5426242351531982, "learning_rate": 1.3049287526986385e-05, "loss": 0.4198, "step": 20667 }, { "epoch": 3.3738622913350476, "grad_norm": 2.8159821033477783, "learning_rate": 1.3048676913612781e-05, "loss": 0.4404, "step": 20668 }, { "epoch": 3.374025549977552, "grad_norm": 2.9687390327453613, "learning_rate": 1.3048066287707219e-05, "loss": 0.4524, "step": 20669 }, { "epoch": 3.3741888086200564, "grad_norm": 2.7382619380950928, "learning_rate": 1.3047455649272206e-05, "loss": 0.4013, "step": 20670 }, { "epoch": 3.374352067262561, "grad_norm": 2.9625918865203857, "learning_rate": 1.3046844998310256e-05, "loss": 0.4418, "step": 20671 }, { "epoch": 3.3745153259050653, "grad_norm": 2.7177798748016357, "learning_rate": 1.3046234334823872e-05, "loss": 0.4223, "step": 20672 }, { "epoch": 3.3746785845475693, "grad_norm": 3.0237348079681396, "learning_rate": 1.304562365881557e-05, "loss": 0.4134, "step": 20673 }, { "epoch": 3.3748418431900737, "grad_norm": 3.2789547443389893, "learning_rate": 1.304501297028786e-05, "loss": 0.5165, "step": 20674 }, { "epoch": 3.375005101832578, "grad_norm": 3.6407766342163086, "learning_rate": 1.3044402269243253e-05, "loss": 0.4894, "step": 20675 }, { "epoch": 3.3751683604750826, "grad_norm": 3.573903799057007, "learning_rate": 1.3043791555684255e-05, "loss": 0.4998, "step": 20676 }, { "epoch": 3.375331619117587, "grad_norm": 3.6389787197113037, "learning_rate": 1.3043180829613382e-05, "loss": 0.5526, "step": 20677 }, { "epoch": 3.3754948777600915, "grad_norm": 3.4755899906158447, "learning_rate": 1.304257009103314e-05, "loss": 0.5443, "step": 20678 }, { "epoch": 3.375658136402596, "grad_norm": 3.099210500717163, "learning_rate": 1.3041959339946039e-05, "loss": 0.4237, "step": 20679 }, { "epoch": 3.3758213950451004, "grad_norm": 3.0109646320343018, "learning_rate": 1.3041348576354595e-05, "loss": 0.5226, "step": 20680 }, { "epoch": 3.3759846536876044, "grad_norm": 2.970465898513794, "learning_rate": 1.3040737800261316e-05, "loss": 0.4627, "step": 20681 }, { "epoch": 3.376147912330109, "grad_norm": 3.2978873252868652, "learning_rate": 1.304012701166871e-05, "loss": 0.5155, "step": 20682 }, { "epoch": 3.3763111709726132, "grad_norm": 3.4304628372192383, "learning_rate": 1.303951621057929e-05, "loss": 0.4811, "step": 20683 }, { "epoch": 3.3764744296151177, "grad_norm": 3.0226922035217285, "learning_rate": 1.3038905396995567e-05, "loss": 0.4427, "step": 20684 }, { "epoch": 3.376637688257622, "grad_norm": 2.877026081085205, "learning_rate": 1.3038294570920051e-05, "loss": 0.4938, "step": 20685 }, { "epoch": 3.3768009469001266, "grad_norm": 3.1166558265686035, "learning_rate": 1.3037683732355254e-05, "loss": 0.4788, "step": 20686 }, { "epoch": 3.376964205542631, "grad_norm": 3.8509607315063477, "learning_rate": 1.3037072881303688e-05, "loss": 0.5938, "step": 20687 }, { "epoch": 3.3771274641851354, "grad_norm": 2.862112522125244, "learning_rate": 1.3036462017767858e-05, "loss": 0.4421, "step": 20688 }, { "epoch": 3.37729072282764, "grad_norm": 3.4915497303009033, "learning_rate": 1.3035851141750282e-05, "loss": 0.5856, "step": 20689 }, { "epoch": 3.3774539814701443, "grad_norm": 3.2582826614379883, "learning_rate": 1.3035240253253467e-05, "loss": 0.4843, "step": 20690 }, { "epoch": 3.3776172401126483, "grad_norm": 3.320809841156006, "learning_rate": 1.3034629352279927e-05, "loss": 0.5226, "step": 20691 }, { "epoch": 3.3777804987551527, "grad_norm": 2.8609230518341064, "learning_rate": 1.303401843883217e-05, "loss": 0.4854, "step": 20692 }, { "epoch": 3.377943757397657, "grad_norm": 3.8635635375976562, "learning_rate": 1.303340751291271e-05, "loss": 0.4236, "step": 20693 }, { "epoch": 3.3781070160401616, "grad_norm": 2.8361196517944336, "learning_rate": 1.3032796574524056e-05, "loss": 0.4299, "step": 20694 }, { "epoch": 3.378270274682666, "grad_norm": 3.099778652191162, "learning_rate": 1.303218562366872e-05, "loss": 0.4741, "step": 20695 }, { "epoch": 3.3784335333251705, "grad_norm": 3.2170419692993164, "learning_rate": 1.3031574660349216e-05, "loss": 0.4614, "step": 20696 }, { "epoch": 3.378596791967675, "grad_norm": 3.4061312675476074, "learning_rate": 1.3030963684568052e-05, "loss": 0.526, "step": 20697 }, { "epoch": 3.3787600506101794, "grad_norm": 3.5127573013305664, "learning_rate": 1.3030352696327741e-05, "loss": 0.5189, "step": 20698 }, { "epoch": 3.3789233092526834, "grad_norm": 2.979522228240967, "learning_rate": 1.3029741695630796e-05, "loss": 0.3957, "step": 20699 }, { "epoch": 3.379086567895188, "grad_norm": 3.8983261585235596, "learning_rate": 1.3029130682479722e-05, "loss": 0.5571, "step": 20700 }, { "epoch": 3.3792498265376922, "grad_norm": 2.9586851596832275, "learning_rate": 1.3028519656877038e-05, "loss": 0.4593, "step": 20701 }, { "epoch": 3.3794130851801967, "grad_norm": 3.0202584266662598, "learning_rate": 1.3027908618825256e-05, "loss": 0.4746, "step": 20702 }, { "epoch": 3.379576343822701, "grad_norm": 3.0971481800079346, "learning_rate": 1.3027297568326881e-05, "loss": 0.5063, "step": 20703 }, { "epoch": 3.3797396024652056, "grad_norm": 3.328263282775879, "learning_rate": 1.3026686505384434e-05, "loss": 0.4397, "step": 20704 }, { "epoch": 3.37990286110771, "grad_norm": 2.9213247299194336, "learning_rate": 1.3026075430000418e-05, "loss": 0.3955, "step": 20705 }, { "epoch": 3.3800661197502144, "grad_norm": 2.5756001472473145, "learning_rate": 1.3025464342177347e-05, "loss": 0.3958, "step": 20706 }, { "epoch": 3.380229378392719, "grad_norm": 2.873682737350464, "learning_rate": 1.3024853241917734e-05, "loss": 0.3947, "step": 20707 }, { "epoch": 3.380392637035223, "grad_norm": 3.8308887481689453, "learning_rate": 1.3024242129224093e-05, "loss": 0.4703, "step": 20708 }, { "epoch": 3.3805558956777273, "grad_norm": 3.4941015243530273, "learning_rate": 1.302363100409894e-05, "loss": 0.4418, "step": 20709 }, { "epoch": 3.3807191543202317, "grad_norm": 3.691650867462158, "learning_rate": 1.3023019866544777e-05, "loss": 0.4783, "step": 20710 }, { "epoch": 3.380882412962736, "grad_norm": 3.0031650066375732, "learning_rate": 1.302240871656412e-05, "loss": 0.4384, "step": 20711 }, { "epoch": 3.3810456716052406, "grad_norm": 3.592158555984497, "learning_rate": 1.3021797554159481e-05, "loss": 0.4271, "step": 20712 }, { "epoch": 3.381208930247745, "grad_norm": 3.1608808040618896, "learning_rate": 1.3021186379333375e-05, "loss": 0.5141, "step": 20713 }, { "epoch": 3.3813721888902495, "grad_norm": 3.2252418994903564, "learning_rate": 1.3020575192088314e-05, "loss": 0.5084, "step": 20714 }, { "epoch": 3.381535447532754, "grad_norm": 4.073662757873535, "learning_rate": 1.3019963992426808e-05, "loss": 0.6921, "step": 20715 }, { "epoch": 3.381698706175258, "grad_norm": 3.313253402709961, "learning_rate": 1.301935278035137e-05, "loss": 0.4707, "step": 20716 }, { "epoch": 3.3818619648177624, "grad_norm": 3.0338022708892822, "learning_rate": 1.301874155586451e-05, "loss": 0.4595, "step": 20717 }, { "epoch": 3.382025223460267, "grad_norm": 3.566479444503784, "learning_rate": 1.3018130318968747e-05, "loss": 0.4556, "step": 20718 }, { "epoch": 3.3821884821027712, "grad_norm": 3.3785765171051025, "learning_rate": 1.301751906966659e-05, "loss": 0.5383, "step": 20719 }, { "epoch": 3.3823517407452757, "grad_norm": 3.3423163890838623, "learning_rate": 1.301690780796055e-05, "loss": 0.5167, "step": 20720 }, { "epoch": 3.38251499938778, "grad_norm": 3.6544697284698486, "learning_rate": 1.301629653385314e-05, "loss": 0.5924, "step": 20721 }, { "epoch": 3.3826782580302845, "grad_norm": 3.64335560798645, "learning_rate": 1.3015685247346877e-05, "loss": 0.4956, "step": 20722 }, { "epoch": 3.382841516672789, "grad_norm": 3.134793519973755, "learning_rate": 1.301507394844427e-05, "loss": 0.4854, "step": 20723 }, { "epoch": 3.3830047753152934, "grad_norm": 3.2372474670410156, "learning_rate": 1.301446263714783e-05, "loss": 0.5131, "step": 20724 }, { "epoch": 3.383168033957798, "grad_norm": 3.4981091022491455, "learning_rate": 1.3013851313460073e-05, "loss": 0.5005, "step": 20725 }, { "epoch": 3.383331292600302, "grad_norm": 3.5103490352630615, "learning_rate": 1.3013239977383514e-05, "loss": 0.5706, "step": 20726 }, { "epoch": 3.3834945512428063, "grad_norm": 2.821171522140503, "learning_rate": 1.301262862892066e-05, "loss": 0.4425, "step": 20727 }, { "epoch": 3.3836578098853107, "grad_norm": 3.631455183029175, "learning_rate": 1.3012017268074029e-05, "loss": 0.5016, "step": 20728 }, { "epoch": 3.383821068527815, "grad_norm": 3.9114577770233154, "learning_rate": 1.3011405894846132e-05, "loss": 0.4867, "step": 20729 }, { "epoch": 3.3839843271703196, "grad_norm": 3.2149760723114014, "learning_rate": 1.3010794509239482e-05, "loss": 0.4189, "step": 20730 }, { "epoch": 3.384147585812824, "grad_norm": 3.377251148223877, "learning_rate": 1.3010183111256593e-05, "loss": 0.5081, "step": 20731 }, { "epoch": 3.3843108444553285, "grad_norm": 3.644392490386963, "learning_rate": 1.300957170089998e-05, "loss": 0.4741, "step": 20732 }, { "epoch": 3.384474103097833, "grad_norm": 3.0162527561187744, "learning_rate": 1.3008960278172151e-05, "loss": 0.3755, "step": 20733 }, { "epoch": 3.384637361740337, "grad_norm": 2.8565475940704346, "learning_rate": 1.3008348843075623e-05, "loss": 0.4415, "step": 20734 }, { "epoch": 3.3848006203828414, "grad_norm": 3.7794578075408936, "learning_rate": 1.300773739561291e-05, "loss": 0.5193, "step": 20735 }, { "epoch": 3.384963879025346, "grad_norm": 4.251117706298828, "learning_rate": 1.3007125935786523e-05, "loss": 0.6236, "step": 20736 }, { "epoch": 3.3851271376678502, "grad_norm": 3.1591989994049072, "learning_rate": 1.3006514463598979e-05, "loss": 0.4473, "step": 20737 }, { "epoch": 3.3852903963103547, "grad_norm": 3.6444947719573975, "learning_rate": 1.3005902979052784e-05, "loss": 0.5664, "step": 20738 }, { "epoch": 3.385453654952859, "grad_norm": 3.7113213539123535, "learning_rate": 1.3005291482150462e-05, "loss": 0.5638, "step": 20739 }, { "epoch": 3.3856169135953635, "grad_norm": 3.090019464492798, "learning_rate": 1.300467997289452e-05, "loss": 0.463, "step": 20740 }, { "epoch": 3.385780172237868, "grad_norm": 3.1389102935791016, "learning_rate": 1.3004068451287473e-05, "loss": 0.4586, "step": 20741 }, { "epoch": 3.3859434308803724, "grad_norm": 3.1993064880371094, "learning_rate": 1.3003456917331833e-05, "loss": 0.4952, "step": 20742 }, { "epoch": 3.386106689522877, "grad_norm": 3.2942662239074707, "learning_rate": 1.3002845371030121e-05, "loss": 0.5323, "step": 20743 }, { "epoch": 3.386269948165381, "grad_norm": 3.0150747299194336, "learning_rate": 1.3002233812384842e-05, "loss": 0.3924, "step": 20744 }, { "epoch": 3.3864332068078853, "grad_norm": 2.6725430488586426, "learning_rate": 1.3001622241398513e-05, "loss": 0.4244, "step": 20745 }, { "epoch": 3.3865964654503897, "grad_norm": 3.9736831188201904, "learning_rate": 1.300101065807365e-05, "loss": 0.5825, "step": 20746 }, { "epoch": 3.386759724092894, "grad_norm": 3.1875245571136475, "learning_rate": 1.3000399062412763e-05, "loss": 0.4474, "step": 20747 }, { "epoch": 3.3869229827353986, "grad_norm": 3.2223355770111084, "learning_rate": 1.2999787454418372e-05, "loss": 0.496, "step": 20748 }, { "epoch": 3.387086241377903, "grad_norm": 3.410764694213867, "learning_rate": 1.2999175834092985e-05, "loss": 0.438, "step": 20749 }, { "epoch": 3.3872495000204075, "grad_norm": 2.6770033836364746, "learning_rate": 1.2998564201439117e-05, "loss": 0.4559, "step": 20750 }, { "epoch": 3.3874127586629115, "grad_norm": 3.3874380588531494, "learning_rate": 1.2997952556459286e-05, "loss": 0.6181, "step": 20751 }, { "epoch": 3.387576017305416, "grad_norm": 3.6969144344329834, "learning_rate": 1.2997340899156003e-05, "loss": 0.518, "step": 20752 }, { "epoch": 3.3877392759479203, "grad_norm": 2.9111251831054688, "learning_rate": 1.2996729229531786e-05, "loss": 0.4923, "step": 20753 }, { "epoch": 3.387902534590425, "grad_norm": 3.0230681896209717, "learning_rate": 1.2996117547589147e-05, "loss": 0.5046, "step": 20754 }, { "epoch": 3.3880657932329292, "grad_norm": 2.837785482406616, "learning_rate": 1.2995505853330595e-05, "loss": 0.4696, "step": 20755 }, { "epoch": 3.3882290518754337, "grad_norm": 3.042581796646118, "learning_rate": 1.2994894146758651e-05, "loss": 0.4579, "step": 20756 }, { "epoch": 3.388392310517938, "grad_norm": 3.0198116302490234, "learning_rate": 1.2994282427875831e-05, "loss": 0.392, "step": 20757 }, { "epoch": 3.3885555691604425, "grad_norm": 3.1901345252990723, "learning_rate": 1.2993670696684647e-05, "loss": 0.5228, "step": 20758 }, { "epoch": 3.388718827802947, "grad_norm": 3.115888833999634, "learning_rate": 1.299305895318761e-05, "loss": 0.4798, "step": 20759 }, { "epoch": 3.3888820864454514, "grad_norm": 3.210118293762207, "learning_rate": 1.2992447197387238e-05, "loss": 0.4635, "step": 20760 }, { "epoch": 3.3890453450879554, "grad_norm": 3.459406614303589, "learning_rate": 1.2991835429286046e-05, "loss": 0.4477, "step": 20761 }, { "epoch": 3.38920860373046, "grad_norm": 2.9685680866241455, "learning_rate": 1.299122364888655e-05, "loss": 0.4642, "step": 20762 }, { "epoch": 3.3893718623729643, "grad_norm": 2.8081257343292236, "learning_rate": 1.2990611856191261e-05, "loss": 0.4619, "step": 20763 }, { "epoch": 3.3895351210154687, "grad_norm": 2.975982427597046, "learning_rate": 1.2990000051202695e-05, "loss": 0.4231, "step": 20764 }, { "epoch": 3.389698379657973, "grad_norm": 3.2957046031951904, "learning_rate": 1.2989388233923373e-05, "loss": 0.4479, "step": 20765 }, { "epoch": 3.3898616383004776, "grad_norm": 4.737444877624512, "learning_rate": 1.2988776404355798e-05, "loss": 0.5348, "step": 20766 }, { "epoch": 3.390024896942982, "grad_norm": 3.793001651763916, "learning_rate": 1.2988164562502497e-05, "loss": 0.5423, "step": 20767 }, { "epoch": 3.3901881555854865, "grad_norm": 3.3821797370910645, "learning_rate": 1.2987552708365976e-05, "loss": 0.5212, "step": 20768 }, { "epoch": 3.3903514142279905, "grad_norm": 4.043506145477295, "learning_rate": 1.2986940841948756e-05, "loss": 0.481, "step": 20769 }, { "epoch": 3.390514672870495, "grad_norm": 3.743666172027588, "learning_rate": 1.298632896325335e-05, "loss": 0.4724, "step": 20770 }, { "epoch": 3.3906779315129993, "grad_norm": 3.6319096088409424, "learning_rate": 1.2985717072282273e-05, "loss": 0.5038, "step": 20771 }, { "epoch": 3.390841190155504, "grad_norm": 3.236358880996704, "learning_rate": 1.2985105169038041e-05, "loss": 0.534, "step": 20772 }, { "epoch": 3.391004448798008, "grad_norm": 2.9674646854400635, "learning_rate": 1.2984493253523166e-05, "loss": 0.4606, "step": 20773 }, { "epoch": 3.3911677074405127, "grad_norm": 3.32485032081604, "learning_rate": 1.2983881325740168e-05, "loss": 0.5252, "step": 20774 }, { "epoch": 3.391330966083017, "grad_norm": 3.424290657043457, "learning_rate": 1.2983269385691562e-05, "loss": 0.4828, "step": 20775 }, { "epoch": 3.3914942247255215, "grad_norm": 3.330089807510376, "learning_rate": 1.2982657433379862e-05, "loss": 0.4449, "step": 20776 }, { "epoch": 3.391657483368026, "grad_norm": 3.30499005317688, "learning_rate": 1.2982045468807583e-05, "loss": 0.4718, "step": 20777 }, { "epoch": 3.3918207420105304, "grad_norm": 3.4226274490356445, "learning_rate": 1.298143349197724e-05, "loss": 0.4823, "step": 20778 }, { "epoch": 3.3919840006530344, "grad_norm": 3.3042547702789307, "learning_rate": 1.298082150289135e-05, "loss": 0.4552, "step": 20779 }, { "epoch": 3.392147259295539, "grad_norm": 3.0166099071502686, "learning_rate": 1.2980209501552428e-05, "loss": 0.4789, "step": 20780 }, { "epoch": 3.3923105179380433, "grad_norm": 3.4621269702911377, "learning_rate": 1.2979597487962991e-05, "loss": 0.5385, "step": 20781 }, { "epoch": 3.3924737765805477, "grad_norm": 3.499793767929077, "learning_rate": 1.2978985462125553e-05, "loss": 0.5396, "step": 20782 }, { "epoch": 3.392637035223052, "grad_norm": 3.1788833141326904, "learning_rate": 1.2978373424042632e-05, "loss": 0.4738, "step": 20783 }, { "epoch": 3.3928002938655566, "grad_norm": 3.3510584831237793, "learning_rate": 1.297776137371674e-05, "loss": 0.4075, "step": 20784 }, { "epoch": 3.392963552508061, "grad_norm": 3.5149991512298584, "learning_rate": 1.2977149311150398e-05, "loss": 0.5836, "step": 20785 }, { "epoch": 3.3931268111505655, "grad_norm": 3.6341936588287354, "learning_rate": 1.2976537236346119e-05, "loss": 0.5153, "step": 20786 }, { "epoch": 3.3932900697930695, "grad_norm": 3.3056437969207764, "learning_rate": 1.2975925149306419e-05, "loss": 0.4801, "step": 20787 }, { "epoch": 3.393453328435574, "grad_norm": 3.062199592590332, "learning_rate": 1.297531305003381e-05, "loss": 0.438, "step": 20788 }, { "epoch": 3.3936165870780783, "grad_norm": 2.9017717838287354, "learning_rate": 1.297470093853082e-05, "loss": 0.3704, "step": 20789 }, { "epoch": 3.3937798457205828, "grad_norm": 3.5152080059051514, "learning_rate": 1.2974088814799953e-05, "loss": 0.5121, "step": 20790 }, { "epoch": 3.393943104363087, "grad_norm": 3.0080649852752686, "learning_rate": 1.297347667884373e-05, "loss": 0.5174, "step": 20791 }, { "epoch": 3.3941063630055917, "grad_norm": 3.2737386226654053, "learning_rate": 1.297286453066467e-05, "loss": 0.4773, "step": 20792 }, { "epoch": 3.394269621648096, "grad_norm": 3.1534688472747803, "learning_rate": 1.2972252370265286e-05, "loss": 0.5409, "step": 20793 }, { "epoch": 3.3944328802906005, "grad_norm": 2.981290340423584, "learning_rate": 1.297164019764809e-05, "loss": 0.407, "step": 20794 }, { "epoch": 3.394596138933105, "grad_norm": 3.109248399734497, "learning_rate": 1.2971028012815607e-05, "loss": 0.4156, "step": 20795 }, { "epoch": 3.3947593975756094, "grad_norm": 3.3884220123291016, "learning_rate": 1.297041581577035e-05, "loss": 0.5152, "step": 20796 }, { "epoch": 3.3949226562181134, "grad_norm": 3.357271671295166, "learning_rate": 1.2969803606514837e-05, "loss": 0.5133, "step": 20797 }, { "epoch": 3.395085914860618, "grad_norm": 3.708686113357544, "learning_rate": 1.296919138505158e-05, "loss": 0.5597, "step": 20798 }, { "epoch": 3.3952491735031223, "grad_norm": 3.1853487491607666, "learning_rate": 1.2968579151383096e-05, "loss": 0.463, "step": 20799 }, { "epoch": 3.3954124321456267, "grad_norm": 3.3345253467559814, "learning_rate": 1.2967966905511906e-05, "loss": 0.4703, "step": 20800 }, { "epoch": 3.395575690788131, "grad_norm": 3.5632364749908447, "learning_rate": 1.2967354647440526e-05, "loss": 0.5193, "step": 20801 }, { "epoch": 3.3957389494306356, "grad_norm": 2.2598841190338135, "learning_rate": 1.2966742377171473e-05, "loss": 0.3552, "step": 20802 }, { "epoch": 3.39590220807314, "grad_norm": 3.5161609649658203, "learning_rate": 1.2966130094707258e-05, "loss": 0.4452, "step": 20803 }, { "epoch": 3.396065466715644, "grad_norm": 3.329695463180542, "learning_rate": 1.2965517800050406e-05, "loss": 0.454, "step": 20804 }, { "epoch": 3.3962287253581485, "grad_norm": 3.2563178539276123, "learning_rate": 1.2964905493203426e-05, "loss": 0.5215, "step": 20805 }, { "epoch": 3.396391984000653, "grad_norm": 3.653892993927002, "learning_rate": 1.2964293174168846e-05, "loss": 0.517, "step": 20806 }, { "epoch": 3.3965552426431573, "grad_norm": 3.517458438873291, "learning_rate": 1.2963680842949169e-05, "loss": 0.5585, "step": 20807 }, { "epoch": 3.3967185012856618, "grad_norm": 2.7518489360809326, "learning_rate": 1.2963068499546922e-05, "loss": 0.4295, "step": 20808 }, { "epoch": 3.396881759928166, "grad_norm": 2.8223185539245605, "learning_rate": 1.296245614396462e-05, "loss": 0.385, "step": 20809 }, { "epoch": 3.3970450185706706, "grad_norm": 2.972249746322632, "learning_rate": 1.2961843776204778e-05, "loss": 0.4486, "step": 20810 }, { "epoch": 3.397208277213175, "grad_norm": 2.913553237915039, "learning_rate": 1.2961231396269916e-05, "loss": 0.4379, "step": 20811 }, { "epoch": 3.3973715358556795, "grad_norm": 3.1663625240325928, "learning_rate": 1.2960619004162546e-05, "loss": 0.5448, "step": 20812 }, { "epoch": 3.397534794498184, "grad_norm": 3.2187929153442383, "learning_rate": 1.2960006599885194e-05, "loss": 0.5002, "step": 20813 }, { "epoch": 3.397698053140688, "grad_norm": 3.095996141433716, "learning_rate": 1.295939418344037e-05, "loss": 0.388, "step": 20814 }, { "epoch": 3.3978613117831924, "grad_norm": 3.355546236038208, "learning_rate": 1.2958781754830594e-05, "loss": 0.4302, "step": 20815 }, { "epoch": 3.398024570425697, "grad_norm": 3.037429094314575, "learning_rate": 1.2958169314058384e-05, "loss": 0.5026, "step": 20816 }, { "epoch": 3.3981878290682013, "grad_norm": 2.7129485607147217, "learning_rate": 1.2957556861126257e-05, "loss": 0.4073, "step": 20817 }, { "epoch": 3.3983510877107057, "grad_norm": 3.13436222076416, "learning_rate": 1.295694439603673e-05, "loss": 0.5104, "step": 20818 }, { "epoch": 3.39851434635321, "grad_norm": 3.67759370803833, "learning_rate": 1.2956331918792321e-05, "loss": 0.5726, "step": 20819 }, { "epoch": 3.3986776049957146, "grad_norm": 2.578308343887329, "learning_rate": 1.2955719429395549e-05, "loss": 0.3742, "step": 20820 }, { "epoch": 3.398840863638219, "grad_norm": 2.9283740520477295, "learning_rate": 1.2955106927848929e-05, "loss": 0.4527, "step": 20821 }, { "epoch": 3.399004122280723, "grad_norm": 3.3302974700927734, "learning_rate": 1.295449441415498e-05, "loss": 0.5329, "step": 20822 }, { "epoch": 3.3991673809232275, "grad_norm": 3.251924753189087, "learning_rate": 1.2953881888316221e-05, "loss": 0.459, "step": 20823 }, { "epoch": 3.399330639565732, "grad_norm": 2.3262391090393066, "learning_rate": 1.2953269350335169e-05, "loss": 0.3335, "step": 20824 }, { "epoch": 3.3994938982082363, "grad_norm": 2.8873348236083984, "learning_rate": 1.2952656800214343e-05, "loss": 0.4259, "step": 20825 }, { "epoch": 3.3996571568507408, "grad_norm": 3.0349457263946533, "learning_rate": 1.2952044237956259e-05, "loss": 0.4258, "step": 20826 }, { "epoch": 3.399820415493245, "grad_norm": 3.3892757892608643, "learning_rate": 1.2951431663563435e-05, "loss": 0.4928, "step": 20827 }, { "epoch": 3.3999836741357496, "grad_norm": 3.050168514251709, "learning_rate": 1.295081907703839e-05, "loss": 0.443, "step": 20828 }, { "epoch": 3.400146932778254, "grad_norm": 3.277022361755371, "learning_rate": 1.2950206478383643e-05, "loss": 0.4608, "step": 20829 }, { "epoch": 3.4003101914207585, "grad_norm": 3.1545493602752686, "learning_rate": 1.294959386760171e-05, "loss": 0.4689, "step": 20830 }, { "epoch": 3.400473450063263, "grad_norm": 2.725486993789673, "learning_rate": 1.2948981244695112e-05, "loss": 0.4202, "step": 20831 }, { "epoch": 3.400636708705767, "grad_norm": 3.0466055870056152, "learning_rate": 1.2948368609666362e-05, "loss": 0.4479, "step": 20832 }, { "epoch": 3.4007999673482714, "grad_norm": 3.745170831680298, "learning_rate": 1.2947755962517985e-05, "loss": 0.5496, "step": 20833 }, { "epoch": 3.400963225990776, "grad_norm": 3.561044931411743, "learning_rate": 1.2947143303252494e-05, "loss": 0.4414, "step": 20834 }, { "epoch": 3.4011264846332803, "grad_norm": 3.385885715484619, "learning_rate": 1.2946530631872412e-05, "loss": 0.4887, "step": 20835 }, { "epoch": 3.4012897432757847, "grad_norm": 3.688492774963379, "learning_rate": 1.2945917948380257e-05, "loss": 0.4706, "step": 20836 }, { "epoch": 3.401453001918289, "grad_norm": 3.379692554473877, "learning_rate": 1.2945305252778543e-05, "loss": 0.5111, "step": 20837 }, { "epoch": 3.4016162605607936, "grad_norm": 3.6835200786590576, "learning_rate": 1.294469254506979e-05, "loss": 0.4323, "step": 20838 }, { "epoch": 3.4017795192032976, "grad_norm": 3.601759672164917, "learning_rate": 1.2944079825256519e-05, "loss": 0.5124, "step": 20839 }, { "epoch": 3.401942777845802, "grad_norm": 3.3748767375946045, "learning_rate": 1.2943467093341247e-05, "loss": 0.4577, "step": 20840 }, { "epoch": 3.4021060364883065, "grad_norm": 2.885802984237671, "learning_rate": 1.2942854349326496e-05, "loss": 0.3668, "step": 20841 }, { "epoch": 3.402269295130811, "grad_norm": 3.5514848232269287, "learning_rate": 1.294224159321478e-05, "loss": 0.5699, "step": 20842 }, { "epoch": 3.4024325537733153, "grad_norm": 3.3771164417266846, "learning_rate": 1.2941628825008618e-05, "loss": 0.4646, "step": 20843 }, { "epoch": 3.4025958124158198, "grad_norm": 3.0030224323272705, "learning_rate": 1.2941016044710533e-05, "loss": 0.3959, "step": 20844 }, { "epoch": 3.402759071058324, "grad_norm": 2.9098100662231445, "learning_rate": 1.294040325232304e-05, "loss": 0.442, "step": 20845 }, { "epoch": 3.4029223297008286, "grad_norm": 3.346689462661743, "learning_rate": 1.2939790447848664e-05, "loss": 0.5021, "step": 20846 }, { "epoch": 3.403085588343333, "grad_norm": 3.582958221435547, "learning_rate": 1.2939177631289914e-05, "loss": 0.4766, "step": 20847 }, { "epoch": 3.4032488469858375, "grad_norm": 3.060605525970459, "learning_rate": 1.2938564802649315e-05, "loss": 0.393, "step": 20848 }, { "epoch": 3.4034121056283415, "grad_norm": 3.708831787109375, "learning_rate": 1.2937951961929387e-05, "loss": 0.4795, "step": 20849 }, { "epoch": 3.403575364270846, "grad_norm": 2.883579730987549, "learning_rate": 1.2937339109132649e-05, "loss": 0.4507, "step": 20850 }, { "epoch": 3.4037386229133504, "grad_norm": 2.744286298751831, "learning_rate": 1.2936726244261619e-05, "loss": 0.4384, "step": 20851 }, { "epoch": 3.403901881555855, "grad_norm": 2.770164966583252, "learning_rate": 1.2936113367318814e-05, "loss": 0.4446, "step": 20852 }, { "epoch": 3.4040651401983593, "grad_norm": 3.1394522190093994, "learning_rate": 1.2935500478306758e-05, "loss": 0.4964, "step": 20853 }, { "epoch": 3.4042283988408637, "grad_norm": 3.165339231491089, "learning_rate": 1.2934887577227966e-05, "loss": 0.4451, "step": 20854 }, { "epoch": 3.404391657483368, "grad_norm": 3.4642446041107178, "learning_rate": 1.293427466408496e-05, "loss": 0.497, "step": 20855 }, { "epoch": 3.4045549161258726, "grad_norm": 3.1653430461883545, "learning_rate": 1.2933661738880261e-05, "loss": 0.4543, "step": 20856 }, { "epoch": 3.4047181747683766, "grad_norm": 3.025933027267456, "learning_rate": 1.2933048801616383e-05, "loss": 0.4852, "step": 20857 }, { "epoch": 3.404881433410881, "grad_norm": 3.2745471000671387, "learning_rate": 1.293243585229585e-05, "loss": 0.5537, "step": 20858 }, { "epoch": 3.4050446920533854, "grad_norm": 3.189267158508301, "learning_rate": 1.2931822890921182e-05, "loss": 0.45, "step": 20859 }, { "epoch": 3.40520795069589, "grad_norm": 3.328559637069702, "learning_rate": 1.2931209917494896e-05, "loss": 0.4398, "step": 20860 }, { "epoch": 3.4053712093383943, "grad_norm": 3.5595593452453613, "learning_rate": 1.2930596932019512e-05, "loss": 0.5149, "step": 20861 }, { "epoch": 3.4055344679808988, "grad_norm": 3.7157294750213623, "learning_rate": 1.2929983934497549e-05, "loss": 0.5247, "step": 20862 }, { "epoch": 3.405697726623403, "grad_norm": 3.1780478954315186, "learning_rate": 1.2929370924931533e-05, "loss": 0.4439, "step": 20863 }, { "epoch": 3.4058609852659076, "grad_norm": 3.179349184036255, "learning_rate": 1.2928757903323977e-05, "loss": 0.4368, "step": 20864 }, { "epoch": 3.406024243908412, "grad_norm": 2.855839729309082, "learning_rate": 1.2928144869677404e-05, "loss": 0.4559, "step": 20865 }, { "epoch": 3.4061875025509165, "grad_norm": 3.6925787925720215, "learning_rate": 1.292753182399433e-05, "loss": 0.5368, "step": 20866 }, { "epoch": 3.4063507611934205, "grad_norm": 2.7242093086242676, "learning_rate": 1.292691876627728e-05, "loss": 0.4107, "step": 20867 }, { "epoch": 3.406514019835925, "grad_norm": 3.008054256439209, "learning_rate": 1.2926305696528774e-05, "loss": 0.406, "step": 20868 }, { "epoch": 3.4066772784784294, "grad_norm": 2.72033429145813, "learning_rate": 1.2925692614751328e-05, "loss": 0.3814, "step": 20869 }, { "epoch": 3.406840537120934, "grad_norm": 2.842291831970215, "learning_rate": 1.2925079520947465e-05, "loss": 0.3839, "step": 20870 }, { "epoch": 3.4070037957634383, "grad_norm": 4.13548469543457, "learning_rate": 1.2924466415119706e-05, "loss": 0.515, "step": 20871 }, { "epoch": 3.4071670544059427, "grad_norm": 3.192279577255249, "learning_rate": 1.2923853297270568e-05, "loss": 0.4192, "step": 20872 }, { "epoch": 3.407330313048447, "grad_norm": 2.8130388259887695, "learning_rate": 1.2923240167402573e-05, "loss": 0.3877, "step": 20873 }, { "epoch": 3.4074935716909516, "grad_norm": 2.7678492069244385, "learning_rate": 1.2922627025518243e-05, "loss": 0.4758, "step": 20874 }, { "epoch": 3.4076568303334556, "grad_norm": 2.908841133117676, "learning_rate": 1.2922013871620096e-05, "loss": 0.3731, "step": 20875 }, { "epoch": 3.40782008897596, "grad_norm": 2.8809971809387207, "learning_rate": 1.2921400705710651e-05, "loss": 0.4189, "step": 20876 }, { "epoch": 3.4079833476184644, "grad_norm": 3.5750832557678223, "learning_rate": 1.2920787527792433e-05, "loss": 0.5208, "step": 20877 }, { "epoch": 3.408146606260969, "grad_norm": 3.211008310317993, "learning_rate": 1.292017433786796e-05, "loss": 0.4209, "step": 20878 }, { "epoch": 3.4083098649034733, "grad_norm": 3.741441249847412, "learning_rate": 1.2919561135939753e-05, "loss": 0.5619, "step": 20879 }, { "epoch": 3.4084731235459778, "grad_norm": 3.7956440448760986, "learning_rate": 1.2918947922010336e-05, "loss": 0.5476, "step": 20880 }, { "epoch": 3.408636382188482, "grad_norm": 3.8648369312286377, "learning_rate": 1.2918334696082222e-05, "loss": 0.5978, "step": 20881 }, { "epoch": 3.4087996408309866, "grad_norm": 3.3076083660125732, "learning_rate": 1.2917721458157937e-05, "loss": 0.474, "step": 20882 }, { "epoch": 3.408962899473491, "grad_norm": 2.3687620162963867, "learning_rate": 1.291710820824e-05, "loss": 0.3385, "step": 20883 }, { "epoch": 3.4091261581159955, "grad_norm": 3.2945291996002197, "learning_rate": 1.2916494946330932e-05, "loss": 0.5073, "step": 20884 }, { "epoch": 3.4092894167584995, "grad_norm": 3.316819667816162, "learning_rate": 1.2915881672433259e-05, "loss": 0.4748, "step": 20885 }, { "epoch": 3.409452675401004, "grad_norm": 3.753235101699829, "learning_rate": 1.2915268386549495e-05, "loss": 0.5079, "step": 20886 }, { "epoch": 3.4096159340435084, "grad_norm": 3.55469012260437, "learning_rate": 1.2914655088682161e-05, "loss": 0.5041, "step": 20887 }, { "epoch": 3.409779192686013, "grad_norm": 3.861393690109253, "learning_rate": 1.2914041778833784e-05, "loss": 0.5116, "step": 20888 }, { "epoch": 3.4099424513285173, "grad_norm": 3.5841405391693115, "learning_rate": 1.2913428457006878e-05, "loss": 0.5136, "step": 20889 }, { "epoch": 3.4101057099710217, "grad_norm": 2.936128616333008, "learning_rate": 1.2912815123203972e-05, "loss": 0.414, "step": 20890 }, { "epoch": 3.410268968613526, "grad_norm": 3.652953863143921, "learning_rate": 1.291220177742758e-05, "loss": 0.4957, "step": 20891 }, { "epoch": 3.41043222725603, "grad_norm": 3.1000404357910156, "learning_rate": 1.2911588419680228e-05, "loss": 0.4769, "step": 20892 }, { "epoch": 3.4105954858985346, "grad_norm": 3.174997091293335, "learning_rate": 1.2910975049964431e-05, "loss": 0.4392, "step": 20893 }, { "epoch": 3.410758744541039, "grad_norm": 3.256892681121826, "learning_rate": 1.2910361668282718e-05, "loss": 0.4428, "step": 20894 }, { "epoch": 3.4109220031835434, "grad_norm": 4.081625461578369, "learning_rate": 1.290974827463761e-05, "loss": 0.5123, "step": 20895 }, { "epoch": 3.411085261826048, "grad_norm": 2.4816393852233887, "learning_rate": 1.290913486903162e-05, "loss": 0.3839, "step": 20896 }, { "epoch": 3.4112485204685523, "grad_norm": 3.267415761947632, "learning_rate": 1.290852145146728e-05, "loss": 0.4158, "step": 20897 }, { "epoch": 3.4114117791110568, "grad_norm": 2.9352259635925293, "learning_rate": 1.2907908021947104e-05, "loss": 0.4719, "step": 20898 }, { "epoch": 3.411575037753561, "grad_norm": 2.9201714992523193, "learning_rate": 1.2907294580473618e-05, "loss": 0.4441, "step": 20899 }, { "epoch": 3.4117382963960656, "grad_norm": 2.522378444671631, "learning_rate": 1.2906681127049339e-05, "loss": 0.4209, "step": 20900 }, { "epoch": 3.41190155503857, "grad_norm": 2.7847306728363037, "learning_rate": 1.2906067661676791e-05, "loss": 0.4354, "step": 20901 }, { "epoch": 3.412064813681074, "grad_norm": 3.1978673934936523, "learning_rate": 1.29054541843585e-05, "loss": 0.4912, "step": 20902 }, { "epoch": 3.4122280723235785, "grad_norm": 3.2278974056243896, "learning_rate": 1.2904840695096978e-05, "loss": 0.4758, "step": 20903 }, { "epoch": 3.412391330966083, "grad_norm": 3.2384774684906006, "learning_rate": 1.2904227193894759e-05, "loss": 0.4991, "step": 20904 }, { "epoch": 3.4125545896085874, "grad_norm": 3.601550817489624, "learning_rate": 1.2903613680754354e-05, "loss": 0.4914, "step": 20905 }, { "epoch": 3.412717848251092, "grad_norm": 2.813732147216797, "learning_rate": 1.2903000155678291e-05, "loss": 0.3929, "step": 20906 }, { "epoch": 3.4128811068935963, "grad_norm": 3.2673230171203613, "learning_rate": 1.2902386618669092e-05, "loss": 0.5123, "step": 20907 }, { "epoch": 3.4130443655361007, "grad_norm": 2.516570806503296, "learning_rate": 1.2901773069729275e-05, "loss": 0.3985, "step": 20908 }, { "epoch": 3.413207624178605, "grad_norm": 2.8677215576171875, "learning_rate": 1.2901159508861368e-05, "loss": 0.3429, "step": 20909 }, { "epoch": 3.413370882821109, "grad_norm": 3.2868239879608154, "learning_rate": 1.2900545936067888e-05, "loss": 0.5121, "step": 20910 }, { "epoch": 3.4135341414636136, "grad_norm": 3.5094802379608154, "learning_rate": 1.2899932351351358e-05, "loss": 0.4102, "step": 20911 }, { "epoch": 3.413697400106118, "grad_norm": 3.3111793994903564, "learning_rate": 1.2899318754714302e-05, "loss": 0.5257, "step": 20912 }, { "epoch": 3.4138606587486224, "grad_norm": 3.652500867843628, "learning_rate": 1.2898705146159241e-05, "loss": 0.5475, "step": 20913 }, { "epoch": 3.414023917391127, "grad_norm": 3.3810698986053467, "learning_rate": 1.2898091525688701e-05, "loss": 0.4195, "step": 20914 }, { "epoch": 3.4141871760336313, "grad_norm": 3.0965802669525146, "learning_rate": 1.2897477893305195e-05, "loss": 0.4066, "step": 20915 }, { "epoch": 3.4143504346761357, "grad_norm": 3.6949241161346436, "learning_rate": 1.2896864249011254e-05, "loss": 0.4699, "step": 20916 }, { "epoch": 3.41451369331864, "grad_norm": 2.875641345977783, "learning_rate": 1.2896250592809398e-05, "loss": 0.4268, "step": 20917 }, { "epoch": 3.4146769519611446, "grad_norm": 3.636441230773926, "learning_rate": 1.2895636924702151e-05, "loss": 0.5798, "step": 20918 }, { "epoch": 3.414840210603649, "grad_norm": 3.060328960418701, "learning_rate": 1.2895023244692034e-05, "loss": 0.4559, "step": 20919 }, { "epoch": 3.415003469246153, "grad_norm": 3.680495023727417, "learning_rate": 1.2894409552781566e-05, "loss": 0.5138, "step": 20920 }, { "epoch": 3.4151667278886575, "grad_norm": 4.016995429992676, "learning_rate": 1.2893795848973274e-05, "loss": 0.5239, "step": 20921 }, { "epoch": 3.415329986531162, "grad_norm": 3.0541677474975586, "learning_rate": 1.2893182133269683e-05, "loss": 0.4042, "step": 20922 }, { "epoch": 3.4154932451736664, "grad_norm": 2.9457385540008545, "learning_rate": 1.2892568405673311e-05, "loss": 0.4197, "step": 20923 }, { "epoch": 3.415656503816171, "grad_norm": 2.9266483783721924, "learning_rate": 1.2891954666186685e-05, "loss": 0.4362, "step": 20924 }, { "epoch": 3.4158197624586752, "grad_norm": 3.9877219200134277, "learning_rate": 1.2891340914812323e-05, "loss": 0.5339, "step": 20925 }, { "epoch": 3.4159830211011797, "grad_norm": 3.3563640117645264, "learning_rate": 1.2890727151552747e-05, "loss": 0.5122, "step": 20926 }, { "epoch": 3.416146279743684, "grad_norm": 3.066270112991333, "learning_rate": 1.2890113376410487e-05, "loss": 0.4778, "step": 20927 }, { "epoch": 3.416309538386188, "grad_norm": 3.2008328437805176, "learning_rate": 1.288949958938806e-05, "loss": 0.514, "step": 20928 }, { "epoch": 3.4164727970286926, "grad_norm": 3.1757824420928955, "learning_rate": 1.2888885790487998e-05, "loss": 0.419, "step": 20929 }, { "epoch": 3.416636055671197, "grad_norm": 3.0227723121643066, "learning_rate": 1.288827197971281e-05, "loss": 0.395, "step": 20930 }, { "epoch": 3.4167993143137014, "grad_norm": 3.2201130390167236, "learning_rate": 1.2887658157065028e-05, "loss": 0.4442, "step": 20931 }, { "epoch": 3.416962572956206, "grad_norm": 3.0775792598724365, "learning_rate": 1.2887044322547172e-05, "loss": 0.4824, "step": 20932 }, { "epoch": 3.4171258315987103, "grad_norm": 3.6281309127807617, "learning_rate": 1.2886430476161771e-05, "loss": 0.4883, "step": 20933 }, { "epoch": 3.4172890902412147, "grad_norm": 2.9270496368408203, "learning_rate": 1.2885816617911345e-05, "loss": 0.4411, "step": 20934 }, { "epoch": 3.417452348883719, "grad_norm": 2.993553638458252, "learning_rate": 1.2885202747798413e-05, "loss": 0.4336, "step": 20935 }, { "epoch": 3.4176156075262236, "grad_norm": 2.6422278881073, "learning_rate": 1.2884588865825502e-05, "loss": 0.4368, "step": 20936 }, { "epoch": 3.4177788661687276, "grad_norm": 3.178496837615967, "learning_rate": 1.2883974971995135e-05, "loss": 0.4495, "step": 20937 }, { "epoch": 3.417942124811232, "grad_norm": 3.1323156356811523, "learning_rate": 1.2883361066309838e-05, "loss": 0.5049, "step": 20938 }, { "epoch": 3.4181053834537365, "grad_norm": 2.8078699111938477, "learning_rate": 1.2882747148772133e-05, "loss": 0.4219, "step": 20939 }, { "epoch": 3.418268642096241, "grad_norm": 3.5510010719299316, "learning_rate": 1.288213321938454e-05, "loss": 0.4932, "step": 20940 }, { "epoch": 3.4184319007387454, "grad_norm": 3.7420096397399902, "learning_rate": 1.2881519278149588e-05, "loss": 0.5701, "step": 20941 }, { "epoch": 3.41859515938125, "grad_norm": 2.728947639465332, "learning_rate": 1.28809053250698e-05, "loss": 0.4442, "step": 20942 }, { "epoch": 3.4187584180237542, "grad_norm": 3.5424273014068604, "learning_rate": 1.2880291360147694e-05, "loss": 0.63, "step": 20943 }, { "epoch": 3.4189216766662587, "grad_norm": 3.07433819770813, "learning_rate": 1.2879677383385798e-05, "loss": 0.4798, "step": 20944 }, { "epoch": 3.4190849353087627, "grad_norm": 3.511523962020874, "learning_rate": 1.2879063394786638e-05, "loss": 0.567, "step": 20945 }, { "epoch": 3.419248193951267, "grad_norm": 3.4122557640075684, "learning_rate": 1.2878449394352734e-05, "loss": 0.4943, "step": 20946 }, { "epoch": 3.4194114525937715, "grad_norm": 2.843194007873535, "learning_rate": 1.2877835382086611e-05, "loss": 0.4212, "step": 20947 }, { "epoch": 3.419574711236276, "grad_norm": 3.2119810581207275, "learning_rate": 1.2877221357990794e-05, "loss": 0.4633, "step": 20948 }, { "epoch": 3.4197379698787804, "grad_norm": 3.558586359024048, "learning_rate": 1.2876607322067809e-05, "loss": 0.5351, "step": 20949 }, { "epoch": 3.419901228521285, "grad_norm": 3.2616636753082275, "learning_rate": 1.2875993274320173e-05, "loss": 0.4746, "step": 20950 }, { "epoch": 3.4200644871637893, "grad_norm": 2.8205153942108154, "learning_rate": 1.287537921475042e-05, "loss": 0.3967, "step": 20951 }, { "epoch": 3.4202277458062937, "grad_norm": 2.8109612464904785, "learning_rate": 1.2874765143361063e-05, "loss": 0.461, "step": 20952 }, { "epoch": 3.420391004448798, "grad_norm": 3.0201644897460938, "learning_rate": 1.2874151060154634e-05, "loss": 0.5038, "step": 20953 }, { "epoch": 3.4205542630913026, "grad_norm": 3.4959652423858643, "learning_rate": 1.2873536965133657e-05, "loss": 0.5428, "step": 20954 }, { "epoch": 3.4207175217338066, "grad_norm": 3.0750317573547363, "learning_rate": 1.2872922858300654e-05, "loss": 0.4983, "step": 20955 }, { "epoch": 3.420880780376311, "grad_norm": 3.4378838539123535, "learning_rate": 1.2872308739658149e-05, "loss": 0.4982, "step": 20956 }, { "epoch": 3.4210440390188155, "grad_norm": 3.0430707931518555, "learning_rate": 1.2871694609208667e-05, "loss": 0.4693, "step": 20957 }, { "epoch": 3.42120729766132, "grad_norm": 3.311253547668457, "learning_rate": 1.2871080466954735e-05, "loss": 0.4643, "step": 20958 }, { "epoch": 3.4213705563038244, "grad_norm": 3.307642698287964, "learning_rate": 1.2870466312898874e-05, "loss": 0.4898, "step": 20959 }, { "epoch": 3.421533814946329, "grad_norm": 3.0226376056671143, "learning_rate": 1.2869852147043607e-05, "loss": 0.4866, "step": 20960 }, { "epoch": 3.4216970735888332, "grad_norm": 3.3420629501342773, "learning_rate": 1.2869237969391464e-05, "loss": 0.4797, "step": 20961 }, { "epoch": 3.4218603322313377, "grad_norm": 3.3447647094726562, "learning_rate": 1.2868623779944968e-05, "loss": 0.5671, "step": 20962 }, { "epoch": 3.4220235908738417, "grad_norm": 2.790837049484253, "learning_rate": 1.2868009578706644e-05, "loss": 0.445, "step": 20963 }, { "epoch": 3.422186849516346, "grad_norm": 3.1719183921813965, "learning_rate": 1.2867395365679012e-05, "loss": 0.486, "step": 20964 }, { "epoch": 3.4223501081588505, "grad_norm": 3.255398988723755, "learning_rate": 1.2866781140864601e-05, "loss": 0.4162, "step": 20965 }, { "epoch": 3.422513366801355, "grad_norm": 3.3810534477233887, "learning_rate": 1.2866166904265936e-05, "loss": 0.5008, "step": 20966 }, { "epoch": 3.4226766254438594, "grad_norm": 3.389214515686035, "learning_rate": 1.286555265588554e-05, "loss": 0.5414, "step": 20967 }, { "epoch": 3.422839884086364, "grad_norm": 2.9623520374298096, "learning_rate": 1.2864938395725943e-05, "loss": 0.3977, "step": 20968 }, { "epoch": 3.4230031427288683, "grad_norm": 3.0294244289398193, "learning_rate": 1.2864324123789663e-05, "loss": 0.4844, "step": 20969 }, { "epoch": 3.4231664013713727, "grad_norm": 3.2048373222351074, "learning_rate": 1.2863709840079227e-05, "loss": 0.5101, "step": 20970 }, { "epoch": 3.423329660013877, "grad_norm": 3.760053873062134, "learning_rate": 1.2863095544597162e-05, "loss": 0.5545, "step": 20971 }, { "epoch": 3.4234929186563816, "grad_norm": 3.0454113483428955, "learning_rate": 1.2862481237345991e-05, "loss": 0.4481, "step": 20972 }, { "epoch": 3.4236561772988856, "grad_norm": 3.2390148639678955, "learning_rate": 1.2861866918328247e-05, "loss": 0.456, "step": 20973 }, { "epoch": 3.42381943594139, "grad_norm": 3.515501022338867, "learning_rate": 1.2861252587546442e-05, "loss": 0.5338, "step": 20974 }, { "epoch": 3.4239826945838945, "grad_norm": 2.9839565753936768, "learning_rate": 1.2860638245003112e-05, "loss": 0.4585, "step": 20975 }, { "epoch": 3.424145953226399, "grad_norm": 3.0060384273529053, "learning_rate": 1.2860023890700774e-05, "loss": 0.4621, "step": 20976 }, { "epoch": 3.4243092118689034, "grad_norm": 3.548158645629883, "learning_rate": 1.2859409524641959e-05, "loss": 0.5432, "step": 20977 }, { "epoch": 3.424472470511408, "grad_norm": 3.092813730239868, "learning_rate": 1.2858795146829195e-05, "loss": 0.4482, "step": 20978 }, { "epoch": 3.4246357291539122, "grad_norm": 2.8094780445098877, "learning_rate": 1.2858180757265002e-05, "loss": 0.4943, "step": 20979 }, { "epoch": 3.4247989877964162, "grad_norm": 3.8911101818084717, "learning_rate": 1.2857566355951905e-05, "loss": 0.5291, "step": 20980 }, { "epoch": 3.4249622464389207, "grad_norm": 3.769571542739868, "learning_rate": 1.285695194289243e-05, "loss": 0.5425, "step": 20981 }, { "epoch": 3.425125505081425, "grad_norm": 3.467799186706543, "learning_rate": 1.2856337518089105e-05, "loss": 0.5591, "step": 20982 }, { "epoch": 3.4252887637239295, "grad_norm": 3.210280179977417, "learning_rate": 1.2855723081544462e-05, "loss": 0.5009, "step": 20983 }, { "epoch": 3.425452022366434, "grad_norm": 3.0426039695739746, "learning_rate": 1.2855108633261013e-05, "loss": 0.4244, "step": 20984 }, { "epoch": 3.4256152810089384, "grad_norm": 2.6601879596710205, "learning_rate": 1.2854494173241295e-05, "loss": 0.4232, "step": 20985 }, { "epoch": 3.425778539651443, "grad_norm": 2.962557077407837, "learning_rate": 1.2853879701487823e-05, "loss": 0.4753, "step": 20986 }, { "epoch": 3.4259417982939473, "grad_norm": 2.894780397415161, "learning_rate": 1.2853265218003135e-05, "loss": 0.4605, "step": 20987 }, { "epoch": 3.4261050569364517, "grad_norm": 3.017688751220703, "learning_rate": 1.2852650722789748e-05, "loss": 0.3969, "step": 20988 }, { "epoch": 3.426268315578956, "grad_norm": 3.7849435806274414, "learning_rate": 1.2852036215850192e-05, "loss": 0.5365, "step": 20989 }, { "epoch": 3.42643157422146, "grad_norm": 3.1543309688568115, "learning_rate": 1.2851421697186992e-05, "loss": 0.4877, "step": 20990 }, { "epoch": 3.4265948328639646, "grad_norm": 4.139034271240234, "learning_rate": 1.2850807166802673e-05, "loss": 0.5974, "step": 20991 }, { "epoch": 3.426758091506469, "grad_norm": 3.8356001377105713, "learning_rate": 1.2850192624699762e-05, "loss": 0.5789, "step": 20992 }, { "epoch": 3.4269213501489735, "grad_norm": 2.8437483310699463, "learning_rate": 1.2849578070880786e-05, "loss": 0.4205, "step": 20993 }, { "epoch": 3.427084608791478, "grad_norm": 2.787975549697876, "learning_rate": 1.2848963505348273e-05, "loss": 0.4271, "step": 20994 }, { "epoch": 3.4272478674339824, "grad_norm": 2.497896671295166, "learning_rate": 1.2848348928104744e-05, "loss": 0.3775, "step": 20995 }, { "epoch": 3.427411126076487, "grad_norm": 3.3267955780029297, "learning_rate": 1.284773433915273e-05, "loss": 0.4495, "step": 20996 }, { "epoch": 3.4275743847189912, "grad_norm": 2.946406364440918, "learning_rate": 1.2847119738494753e-05, "loss": 0.4536, "step": 20997 }, { "epoch": 3.4277376433614952, "grad_norm": 3.291897773742676, "learning_rate": 1.2846505126133342e-05, "loss": 0.4888, "step": 20998 }, { "epoch": 3.4279009020039997, "grad_norm": 2.9188692569732666, "learning_rate": 1.2845890502071025e-05, "loss": 0.3966, "step": 20999 }, { "epoch": 3.428064160646504, "grad_norm": 2.5379996299743652, "learning_rate": 1.2845275866310325e-05, "loss": 0.4016, "step": 21000 }, { "epoch": 3.4282274192890085, "grad_norm": 3.288684606552124, "learning_rate": 1.2844661218853772e-05, "loss": 0.5012, "step": 21001 }, { "epoch": 3.428390677931513, "grad_norm": 3.231642484664917, "learning_rate": 1.2844046559703891e-05, "loss": 0.4579, "step": 21002 }, { "epoch": 3.4285539365740174, "grad_norm": 3.0180623531341553, "learning_rate": 1.2843431888863206e-05, "loss": 0.4377, "step": 21003 }, { "epoch": 3.428717195216522, "grad_norm": 3.75104022026062, "learning_rate": 1.2842817206334248e-05, "loss": 0.5079, "step": 21004 }, { "epoch": 3.4288804538590263, "grad_norm": 3.4476206302642822, "learning_rate": 1.2842202512119541e-05, "loss": 0.4908, "step": 21005 }, { "epoch": 3.4290437125015307, "grad_norm": 3.1263391971588135, "learning_rate": 1.2841587806221614e-05, "loss": 0.4767, "step": 21006 }, { "epoch": 3.429206971144035, "grad_norm": 2.9588825702667236, "learning_rate": 1.2840973088642995e-05, "loss": 0.4593, "step": 21007 }, { "epoch": 3.429370229786539, "grad_norm": 3.400402784347534, "learning_rate": 1.2840358359386203e-05, "loss": 0.4412, "step": 21008 }, { "epoch": 3.4295334884290436, "grad_norm": 3.219470739364624, "learning_rate": 1.283974361845377e-05, "loss": 0.5157, "step": 21009 }, { "epoch": 3.429696747071548, "grad_norm": 3.082263469696045, "learning_rate": 1.2839128865848228e-05, "loss": 0.5022, "step": 21010 }, { "epoch": 3.4298600057140525, "grad_norm": 2.8950107097625732, "learning_rate": 1.2838514101572096e-05, "loss": 0.3883, "step": 21011 }, { "epoch": 3.430023264356557, "grad_norm": 3.1357204914093018, "learning_rate": 1.2837899325627907e-05, "loss": 0.4141, "step": 21012 }, { "epoch": 3.4301865229990613, "grad_norm": 2.2906441688537598, "learning_rate": 1.2837284538018184e-05, "loss": 0.3034, "step": 21013 }, { "epoch": 3.430349781641566, "grad_norm": 3.8515172004699707, "learning_rate": 1.2836669738745454e-05, "loss": 0.5627, "step": 21014 }, { "epoch": 3.4305130402840702, "grad_norm": 3.111027479171753, "learning_rate": 1.2836054927812246e-05, "loss": 0.4915, "step": 21015 }, { "epoch": 3.430676298926574, "grad_norm": 2.8494672775268555, "learning_rate": 1.283544010522109e-05, "loss": 0.448, "step": 21016 }, { "epoch": 3.4308395575690787, "grad_norm": 3.0611469745635986, "learning_rate": 1.283482527097451e-05, "loss": 0.4829, "step": 21017 }, { "epoch": 3.431002816211583, "grad_norm": 3.759627103805542, "learning_rate": 1.2834210425075032e-05, "loss": 0.4271, "step": 21018 }, { "epoch": 3.4311660748540875, "grad_norm": 3.242279529571533, "learning_rate": 1.2833595567525185e-05, "loss": 0.4927, "step": 21019 }, { "epoch": 3.431329333496592, "grad_norm": 3.58687686920166, "learning_rate": 1.2832980698327495e-05, "loss": 0.5182, "step": 21020 }, { "epoch": 3.4314925921390964, "grad_norm": 3.6575841903686523, "learning_rate": 1.2832365817484493e-05, "loss": 0.4938, "step": 21021 }, { "epoch": 3.431655850781601, "grad_norm": 3.046753168106079, "learning_rate": 1.2831750924998708e-05, "loss": 0.4836, "step": 21022 }, { "epoch": 3.4318191094241053, "grad_norm": 3.570152997970581, "learning_rate": 1.2831136020872659e-05, "loss": 0.5084, "step": 21023 }, { "epoch": 3.4319823680666097, "grad_norm": 3.2944495677948, "learning_rate": 1.2830521105108882e-05, "loss": 0.501, "step": 21024 }, { "epoch": 3.432145626709114, "grad_norm": 3.0442776679992676, "learning_rate": 1.2829906177709896e-05, "loss": 0.4972, "step": 21025 }, { "epoch": 3.432308885351618, "grad_norm": 3.098090171813965, "learning_rate": 1.2829291238678238e-05, "loss": 0.4361, "step": 21026 }, { "epoch": 3.4324721439941226, "grad_norm": 2.946528434753418, "learning_rate": 1.2828676288016438e-05, "loss": 0.4362, "step": 21027 }, { "epoch": 3.432635402636627, "grad_norm": 2.9352142810821533, "learning_rate": 1.2828061325727011e-05, "loss": 0.4414, "step": 21028 }, { "epoch": 3.4327986612791315, "grad_norm": 2.8460588455200195, "learning_rate": 1.2827446351812493e-05, "loss": 0.4214, "step": 21029 }, { "epoch": 3.432961919921636, "grad_norm": 2.951979637145996, "learning_rate": 1.2826831366275409e-05, "loss": 0.4479, "step": 21030 }, { "epoch": 3.4331251785641403, "grad_norm": 3.492504835128784, "learning_rate": 1.282621636911829e-05, "loss": 0.5727, "step": 21031 }, { "epoch": 3.433288437206645, "grad_norm": 3.2581825256347656, "learning_rate": 1.2825601360343664e-05, "loss": 0.5129, "step": 21032 }, { "epoch": 3.4334516958491488, "grad_norm": 3.700737953186035, "learning_rate": 1.2824986339954058e-05, "loss": 0.5561, "step": 21033 }, { "epoch": 3.433614954491653, "grad_norm": 2.7983787059783936, "learning_rate": 1.2824371307951999e-05, "loss": 0.4775, "step": 21034 }, { "epoch": 3.4337782131341577, "grad_norm": 3.168177366256714, "learning_rate": 1.2823756264340015e-05, "loss": 0.5192, "step": 21035 }, { "epoch": 3.433941471776662, "grad_norm": 3.2445430755615234, "learning_rate": 1.2823141209120635e-05, "loss": 0.4154, "step": 21036 }, { "epoch": 3.4341047304191665, "grad_norm": 3.5343759059906006, "learning_rate": 1.282252614229639e-05, "loss": 0.48, "step": 21037 }, { "epoch": 3.434267989061671, "grad_norm": 3.227503538131714, "learning_rate": 1.2821911063869803e-05, "loss": 0.4594, "step": 21038 }, { "epoch": 3.4344312477041754, "grad_norm": 3.562091588973999, "learning_rate": 1.2821295973843409e-05, "loss": 0.4933, "step": 21039 }, { "epoch": 3.43459450634668, "grad_norm": 3.0460455417633057, "learning_rate": 1.2820680872219729e-05, "loss": 0.4861, "step": 21040 }, { "epoch": 3.4347577649891843, "grad_norm": 2.7873127460479736, "learning_rate": 1.2820065759001295e-05, "loss": 0.4735, "step": 21041 }, { "epoch": 3.4349210236316887, "grad_norm": 2.665905714035034, "learning_rate": 1.2819450634190638e-05, "loss": 0.4948, "step": 21042 }, { "epoch": 3.4350842822741927, "grad_norm": 2.616870403289795, "learning_rate": 1.2818835497790283e-05, "loss": 0.371, "step": 21043 }, { "epoch": 3.435247540916697, "grad_norm": 3.694033145904541, "learning_rate": 1.2818220349802757e-05, "loss": 0.5354, "step": 21044 }, { "epoch": 3.4354107995592016, "grad_norm": 3.7818729877471924, "learning_rate": 1.2817605190230596e-05, "loss": 0.4773, "step": 21045 }, { "epoch": 3.435574058201706, "grad_norm": 2.931028127670288, "learning_rate": 1.281699001907632e-05, "loss": 0.4802, "step": 21046 }, { "epoch": 3.4357373168442105, "grad_norm": 2.82926869392395, "learning_rate": 1.2816374836342462e-05, "loss": 0.4174, "step": 21047 }, { "epoch": 3.435900575486715, "grad_norm": 3.149536609649658, "learning_rate": 1.2815759642031553e-05, "loss": 0.3999, "step": 21048 }, { "epoch": 3.4360638341292193, "grad_norm": 3.2073802947998047, "learning_rate": 1.2815144436146118e-05, "loss": 0.4689, "step": 21049 }, { "epoch": 3.436227092771724, "grad_norm": 3.52864146232605, "learning_rate": 1.2814529218688688e-05, "loss": 0.4733, "step": 21050 }, { "epoch": 3.4363903514142278, "grad_norm": 2.6555917263031006, "learning_rate": 1.2813913989661791e-05, "loss": 0.4135, "step": 21051 }, { "epoch": 3.436553610056732, "grad_norm": 2.7644522190093994, "learning_rate": 1.2813298749067953e-05, "loss": 0.4566, "step": 21052 }, { "epoch": 3.4367168686992366, "grad_norm": 3.321174383163452, "learning_rate": 1.2812683496909709e-05, "loss": 0.4827, "step": 21053 }, { "epoch": 3.436880127341741, "grad_norm": 3.050262689590454, "learning_rate": 1.2812068233189585e-05, "loss": 0.448, "step": 21054 }, { "epoch": 3.4370433859842455, "grad_norm": 3.720886468887329, "learning_rate": 1.2811452957910109e-05, "loss": 0.5151, "step": 21055 }, { "epoch": 3.43720664462675, "grad_norm": 3.9389874935150146, "learning_rate": 1.2810837671073815e-05, "loss": 0.5489, "step": 21056 }, { "epoch": 3.4373699032692544, "grad_norm": 3.217520236968994, "learning_rate": 1.2810222372683226e-05, "loss": 0.4564, "step": 21057 }, { "epoch": 3.437533161911759, "grad_norm": 2.949385166168213, "learning_rate": 1.2809607062740871e-05, "loss": 0.4525, "step": 21058 }, { "epoch": 3.4376964205542633, "grad_norm": 3.333054542541504, "learning_rate": 1.2808991741249285e-05, "loss": 0.4195, "step": 21059 }, { "epoch": 3.4378596791967677, "grad_norm": 3.381220579147339, "learning_rate": 1.2808376408210997e-05, "loss": 0.4687, "step": 21060 }, { "epoch": 3.4380229378392717, "grad_norm": 3.296200752258301, "learning_rate": 1.2807761063628533e-05, "loss": 0.4522, "step": 21061 }, { "epoch": 3.438186196481776, "grad_norm": 3.4503231048583984, "learning_rate": 1.2807145707504423e-05, "loss": 0.5199, "step": 21062 }, { "epoch": 3.4383494551242806, "grad_norm": 2.6175122261047363, "learning_rate": 1.2806530339841196e-05, "loss": 0.4245, "step": 21063 }, { "epoch": 3.438512713766785, "grad_norm": 3.4017715454101562, "learning_rate": 1.280591496064138e-05, "loss": 0.5239, "step": 21064 }, { "epoch": 3.4386759724092895, "grad_norm": 3.1578168869018555, "learning_rate": 1.2805299569907512e-05, "loss": 0.4667, "step": 21065 }, { "epoch": 3.438839231051794, "grad_norm": 3.4021902084350586, "learning_rate": 1.2804684167642116e-05, "loss": 0.5286, "step": 21066 }, { "epoch": 3.4390024896942983, "grad_norm": 3.471676826477051, "learning_rate": 1.2804068753847721e-05, "loss": 0.5552, "step": 21067 }, { "epoch": 3.4391657483368023, "grad_norm": 3.3694775104522705, "learning_rate": 1.2803453328526858e-05, "loss": 0.4014, "step": 21068 }, { "epoch": 3.4393290069793068, "grad_norm": 2.4245738983154297, "learning_rate": 1.2802837891682058e-05, "loss": 0.3805, "step": 21069 }, { "epoch": 3.439492265621811, "grad_norm": 3.0270581245422363, "learning_rate": 1.2802222443315846e-05, "loss": 0.4444, "step": 21070 }, { "epoch": 3.4396555242643156, "grad_norm": 2.8116774559020996, "learning_rate": 1.280160698343076e-05, "loss": 0.3917, "step": 21071 }, { "epoch": 3.43981878290682, "grad_norm": 3.341334581375122, "learning_rate": 1.2800991512029326e-05, "loss": 0.4979, "step": 21072 }, { "epoch": 3.4399820415493245, "grad_norm": 3.243539333343506, "learning_rate": 1.2800376029114072e-05, "loss": 0.4397, "step": 21073 }, { "epoch": 3.440145300191829, "grad_norm": 3.443247079849243, "learning_rate": 1.2799760534687528e-05, "loss": 0.5349, "step": 21074 }, { "epoch": 3.4403085588343334, "grad_norm": 3.243375301361084, "learning_rate": 1.2799145028752227e-05, "loss": 0.4926, "step": 21075 }, { "epoch": 3.440471817476838, "grad_norm": 3.4881207942962646, "learning_rate": 1.2798529511310697e-05, "loss": 0.4992, "step": 21076 }, { "epoch": 3.4406350761193423, "grad_norm": 3.0110740661621094, "learning_rate": 1.279791398236547e-05, "loss": 0.435, "step": 21077 }, { "epoch": 3.4407983347618463, "grad_norm": 2.916285991668701, "learning_rate": 1.2797298441919076e-05, "loss": 0.4247, "step": 21078 }, { "epoch": 3.4409615934043507, "grad_norm": 2.924102544784546, "learning_rate": 1.2796682889974043e-05, "loss": 0.44, "step": 21079 }, { "epoch": 3.441124852046855, "grad_norm": 3.267159938812256, "learning_rate": 1.2796067326532901e-05, "loss": 0.4932, "step": 21080 }, { "epoch": 3.4412881106893596, "grad_norm": 3.309159278869629, "learning_rate": 1.2795451751598185e-05, "loss": 0.534, "step": 21081 }, { "epoch": 3.441451369331864, "grad_norm": 3.922178268432617, "learning_rate": 1.2794836165172423e-05, "loss": 0.5058, "step": 21082 }, { "epoch": 3.4416146279743685, "grad_norm": 3.388476848602295, "learning_rate": 1.279422056725814e-05, "loss": 0.5077, "step": 21083 }, { "epoch": 3.441777886616873, "grad_norm": 2.763460397720337, "learning_rate": 1.2793604957857874e-05, "loss": 0.3415, "step": 21084 }, { "epoch": 3.4419411452593773, "grad_norm": 3.2395172119140625, "learning_rate": 1.2792989336974153e-05, "loss": 0.485, "step": 21085 }, { "epoch": 3.4421044039018813, "grad_norm": 3.605405807495117, "learning_rate": 1.2792373704609508e-05, "loss": 0.5033, "step": 21086 }, { "epoch": 3.4422676625443858, "grad_norm": 3.610182523727417, "learning_rate": 1.2791758060766467e-05, "loss": 0.6123, "step": 21087 }, { "epoch": 3.44243092118689, "grad_norm": 3.1535227298736572, "learning_rate": 1.2791142405447565e-05, "loss": 0.4533, "step": 21088 }, { "epoch": 3.4425941798293946, "grad_norm": 3.5055792331695557, "learning_rate": 1.279052673865533e-05, "loss": 0.4779, "step": 21089 }, { "epoch": 3.442757438471899, "grad_norm": 3.424468755722046, "learning_rate": 1.2789911060392295e-05, "loss": 0.4914, "step": 21090 }, { "epoch": 3.4429206971144035, "grad_norm": 4.042475700378418, "learning_rate": 1.2789295370660985e-05, "loss": 0.5954, "step": 21091 }, { "epoch": 3.443083955756908, "grad_norm": 2.7980947494506836, "learning_rate": 1.2788679669463938e-05, "loss": 0.4249, "step": 21092 }, { "epoch": 3.4432472143994124, "grad_norm": 3.2116825580596924, "learning_rate": 1.278806395680368e-05, "loss": 0.5025, "step": 21093 }, { "epoch": 3.443410473041917, "grad_norm": 3.3850324153900146, "learning_rate": 1.2787448232682746e-05, "loss": 0.4842, "step": 21094 }, { "epoch": 3.4435737316844213, "grad_norm": 3.109654188156128, "learning_rate": 1.2786832497103666e-05, "loss": 0.4517, "step": 21095 }, { "epoch": 3.4437369903269253, "grad_norm": 3.1224753856658936, "learning_rate": 1.2786216750068968e-05, "loss": 0.5026, "step": 21096 }, { "epoch": 3.4439002489694297, "grad_norm": 3.3634016513824463, "learning_rate": 1.2785600991581181e-05, "loss": 0.5024, "step": 21097 }, { "epoch": 3.444063507611934, "grad_norm": 3.066995143890381, "learning_rate": 1.2784985221642843e-05, "loss": 0.456, "step": 21098 }, { "epoch": 3.4442267662544386, "grad_norm": 3.7224888801574707, "learning_rate": 1.2784369440256483e-05, "loss": 0.6165, "step": 21099 }, { "epoch": 3.444390024896943, "grad_norm": 2.6883864402770996, "learning_rate": 1.2783753647424635e-05, "loss": 0.3864, "step": 21100 }, { "epoch": 3.4445532835394475, "grad_norm": 3.7821102142333984, "learning_rate": 1.2783137843149822e-05, "loss": 0.5428, "step": 21101 }, { "epoch": 3.444716542181952, "grad_norm": 3.294003963470459, "learning_rate": 1.278252202743458e-05, "loss": 0.4362, "step": 21102 }, { "epoch": 3.4448798008244563, "grad_norm": 3.0412302017211914, "learning_rate": 1.2781906200281444e-05, "loss": 0.4727, "step": 21103 }, { "epoch": 3.4450430594669603, "grad_norm": 3.987426280975342, "learning_rate": 1.278129036169294e-05, "loss": 0.5328, "step": 21104 }, { "epoch": 3.4452063181094648, "grad_norm": 3.8384296894073486, "learning_rate": 1.2780674511671602e-05, "loss": 0.735, "step": 21105 }, { "epoch": 3.445369576751969, "grad_norm": 3.5288681983947754, "learning_rate": 1.2780058650219961e-05, "loss": 0.4953, "step": 21106 }, { "epoch": 3.4455328353944736, "grad_norm": 3.308579683303833, "learning_rate": 1.277944277734055e-05, "loss": 0.4739, "step": 21107 }, { "epoch": 3.445696094036978, "grad_norm": 3.5631768703460693, "learning_rate": 1.2778826893035896e-05, "loss": 0.5378, "step": 21108 }, { "epoch": 3.4458593526794825, "grad_norm": 4.067749977111816, "learning_rate": 1.2778210997308535e-05, "loss": 0.54, "step": 21109 }, { "epoch": 3.446022611321987, "grad_norm": 3.08219313621521, "learning_rate": 1.2777595090161002e-05, "loss": 0.4274, "step": 21110 }, { "epoch": 3.4461858699644914, "grad_norm": 2.5463051795959473, "learning_rate": 1.277697917159582e-05, "loss": 0.4183, "step": 21111 }, { "epoch": 3.446349128606996, "grad_norm": 4.360678195953369, "learning_rate": 1.2776363241615525e-05, "loss": 0.5347, "step": 21112 }, { "epoch": 3.4465123872495003, "grad_norm": 3.006289005279541, "learning_rate": 1.277574730022265e-05, "loss": 0.3901, "step": 21113 }, { "epoch": 3.4466756458920043, "grad_norm": 3.278961658477783, "learning_rate": 1.2775131347419725e-05, "loss": 0.4195, "step": 21114 }, { "epoch": 3.4468389045345087, "grad_norm": 3.911499500274658, "learning_rate": 1.2774515383209285e-05, "loss": 0.4701, "step": 21115 }, { "epoch": 3.447002163177013, "grad_norm": 3.077910900115967, "learning_rate": 1.2773899407593859e-05, "loss": 0.4883, "step": 21116 }, { "epoch": 3.4471654218195176, "grad_norm": 3.4356110095977783, "learning_rate": 1.2773283420575979e-05, "loss": 0.4375, "step": 21117 }, { "epoch": 3.447328680462022, "grad_norm": 3.267153263092041, "learning_rate": 1.2772667422158179e-05, "loss": 0.4366, "step": 21118 }, { "epoch": 3.4474919391045264, "grad_norm": 3.043452024459839, "learning_rate": 1.2772051412342988e-05, "loss": 0.4782, "step": 21119 }, { "epoch": 3.447655197747031, "grad_norm": 3.047466278076172, "learning_rate": 1.2771435391132944e-05, "loss": 0.4515, "step": 21120 }, { "epoch": 3.447818456389535, "grad_norm": 3.460042715072632, "learning_rate": 1.2770819358530573e-05, "loss": 0.5187, "step": 21121 }, { "epoch": 3.4479817150320393, "grad_norm": 3.2755908966064453, "learning_rate": 1.2770203314538412e-05, "loss": 0.4426, "step": 21122 }, { "epoch": 3.4481449736745438, "grad_norm": 3.3091824054718018, "learning_rate": 1.2769587259158992e-05, "loss": 0.4927, "step": 21123 }, { "epoch": 3.448308232317048, "grad_norm": 2.6353402137756348, "learning_rate": 1.2768971192394842e-05, "loss": 0.3804, "step": 21124 }, { "epoch": 3.4484714909595526, "grad_norm": 3.054180383682251, "learning_rate": 1.2768355114248493e-05, "loss": 0.4403, "step": 21125 }, { "epoch": 3.448634749602057, "grad_norm": 3.5897021293640137, "learning_rate": 1.2767739024722488e-05, "loss": 0.4182, "step": 21126 }, { "epoch": 3.4487980082445615, "grad_norm": 2.7901744842529297, "learning_rate": 1.276712292381935e-05, "loss": 0.3785, "step": 21127 }, { "epoch": 3.448961266887066, "grad_norm": 2.8282291889190674, "learning_rate": 1.2766506811541616e-05, "loss": 0.4042, "step": 21128 }, { "epoch": 3.4491245255295704, "grad_norm": 2.8431007862091064, "learning_rate": 1.2765890687891816e-05, "loss": 0.3931, "step": 21129 }, { "epoch": 3.449287784172075, "grad_norm": 3.1271889209747314, "learning_rate": 1.2765274552872484e-05, "loss": 0.4213, "step": 21130 }, { "epoch": 3.449451042814579, "grad_norm": 3.7663767337799072, "learning_rate": 1.2764658406486153e-05, "loss": 0.6001, "step": 21131 }, { "epoch": 3.4496143014570833, "grad_norm": 2.6179075241088867, "learning_rate": 1.2764042248735355e-05, "loss": 0.4629, "step": 21132 }, { "epoch": 3.4497775600995877, "grad_norm": 3.232696771621704, "learning_rate": 1.2763426079622623e-05, "loss": 0.508, "step": 21133 }, { "epoch": 3.449940818742092, "grad_norm": 3.5351691246032715, "learning_rate": 1.2762809899150492e-05, "loss": 0.5598, "step": 21134 }, { "epoch": 3.4501040773845966, "grad_norm": 3.0591118335723877, "learning_rate": 1.2762193707321489e-05, "loss": 0.4044, "step": 21135 }, { "epoch": 3.450267336027101, "grad_norm": 3.1055376529693604, "learning_rate": 1.2761577504138152e-05, "loss": 0.4725, "step": 21136 }, { "epoch": 3.4504305946696054, "grad_norm": 3.2342560291290283, "learning_rate": 1.2760961289603011e-05, "loss": 0.4651, "step": 21137 }, { "epoch": 3.45059385331211, "grad_norm": 3.7194836139678955, "learning_rate": 1.2760345063718603e-05, "loss": 0.5141, "step": 21138 }, { "epoch": 3.450757111954614, "grad_norm": 3.529452323913574, "learning_rate": 1.2759728826487461e-05, "loss": 0.4977, "step": 21139 }, { "epoch": 3.4509203705971183, "grad_norm": 3.4756247997283936, "learning_rate": 1.2759112577912112e-05, "loss": 0.5034, "step": 21140 }, { "epoch": 3.4510836292396228, "grad_norm": 3.6534299850463867, "learning_rate": 1.2758496317995093e-05, "loss": 0.6038, "step": 21141 }, { "epoch": 3.451246887882127, "grad_norm": 2.982233762741089, "learning_rate": 1.2757880046738938e-05, "loss": 0.4544, "step": 21142 }, { "epoch": 3.4514101465246316, "grad_norm": 3.0945699214935303, "learning_rate": 1.275726376414618e-05, "loss": 0.4744, "step": 21143 }, { "epoch": 3.451573405167136, "grad_norm": 3.217559337615967, "learning_rate": 1.2756647470219352e-05, "loss": 0.5177, "step": 21144 }, { "epoch": 3.4517366638096405, "grad_norm": 2.9638900756835938, "learning_rate": 1.2756031164960987e-05, "loss": 0.4259, "step": 21145 }, { "epoch": 3.451899922452145, "grad_norm": 3.167019844055176, "learning_rate": 1.2755414848373616e-05, "loss": 0.4996, "step": 21146 }, { "epoch": 3.4520631810946494, "grad_norm": 3.323291063308716, "learning_rate": 1.2754798520459777e-05, "loss": 0.4782, "step": 21147 }, { "epoch": 3.452226439737154, "grad_norm": 3.9804728031158447, "learning_rate": 1.2754182181222e-05, "loss": 0.5296, "step": 21148 }, { "epoch": 3.452389698379658, "grad_norm": 2.564645290374756, "learning_rate": 1.2753565830662825e-05, "loss": 0.4125, "step": 21149 }, { "epoch": 3.4525529570221622, "grad_norm": 3.2235629558563232, "learning_rate": 1.2752949468784776e-05, "loss": 0.4505, "step": 21150 }, { "epoch": 3.4527162156646667, "grad_norm": 2.724341869354248, "learning_rate": 1.2752333095590394e-05, "loss": 0.423, "step": 21151 }, { "epoch": 3.452879474307171, "grad_norm": 2.6797006130218506, "learning_rate": 1.2751716711082205e-05, "loss": 0.3971, "step": 21152 }, { "epoch": 3.4530427329496756, "grad_norm": 2.596446990966797, "learning_rate": 1.2751100315262751e-05, "loss": 0.4605, "step": 21153 }, { "epoch": 3.45320599159218, "grad_norm": 2.7134170532226562, "learning_rate": 1.2750483908134566e-05, "loss": 0.4196, "step": 21154 }, { "epoch": 3.4533692502346844, "grad_norm": 2.8694963455200195, "learning_rate": 1.2749867489700177e-05, "loss": 0.4067, "step": 21155 }, { "epoch": 3.453532508877189, "grad_norm": 3.488081693649292, "learning_rate": 1.274925105996212e-05, "loss": 0.5083, "step": 21156 }, { "epoch": 3.453695767519693, "grad_norm": 3.0836496353149414, "learning_rate": 1.274863461892293e-05, "loss": 0.5133, "step": 21157 }, { "epoch": 3.4538590261621973, "grad_norm": 2.608280658721924, "learning_rate": 1.274801816658514e-05, "loss": 0.428, "step": 21158 }, { "epoch": 3.4540222848047017, "grad_norm": 2.8997933864593506, "learning_rate": 1.2747401702951288e-05, "loss": 0.4559, "step": 21159 }, { "epoch": 3.454185543447206, "grad_norm": 2.331559181213379, "learning_rate": 1.2746785228023904e-05, "loss": 0.4165, "step": 21160 }, { "epoch": 3.4543488020897106, "grad_norm": 3.346949338912964, "learning_rate": 1.2746168741805522e-05, "loss": 0.4896, "step": 21161 }, { "epoch": 3.454512060732215, "grad_norm": 3.241283655166626, "learning_rate": 1.274555224429868e-05, "loss": 0.503, "step": 21162 }, { "epoch": 3.4546753193747195, "grad_norm": 4.054525852203369, "learning_rate": 1.2744935735505903e-05, "loss": 0.5396, "step": 21163 }, { "epoch": 3.454838578017224, "grad_norm": 3.5149190425872803, "learning_rate": 1.274431921542974e-05, "loss": 0.5064, "step": 21164 }, { "epoch": 3.4550018366597284, "grad_norm": 3.3189878463745117, "learning_rate": 1.2743702684072712e-05, "loss": 0.4263, "step": 21165 }, { "epoch": 3.4551650953022324, "grad_norm": 3.0494720935821533, "learning_rate": 1.274308614143736e-05, "loss": 0.4295, "step": 21166 }, { "epoch": 3.455328353944737, "grad_norm": 3.6225380897521973, "learning_rate": 1.2742469587526215e-05, "loss": 0.5447, "step": 21167 }, { "epoch": 3.4554916125872412, "grad_norm": 3.0165438652038574, "learning_rate": 1.2741853022341814e-05, "loss": 0.416, "step": 21168 }, { "epoch": 3.4556548712297457, "grad_norm": 3.2224884033203125, "learning_rate": 1.2741236445886691e-05, "loss": 0.4938, "step": 21169 }, { "epoch": 3.45581812987225, "grad_norm": 3.653174877166748, "learning_rate": 1.2740619858163379e-05, "loss": 0.3946, "step": 21170 }, { "epoch": 3.4559813885147546, "grad_norm": 2.8390979766845703, "learning_rate": 1.2740003259174414e-05, "loss": 0.3975, "step": 21171 }, { "epoch": 3.456144647157259, "grad_norm": 3.670668125152588, "learning_rate": 1.273938664892233e-05, "loss": 0.4327, "step": 21172 }, { "epoch": 3.4563079057997634, "grad_norm": 3.6669585704803467, "learning_rate": 1.2738770027409663e-05, "loss": 0.5938, "step": 21173 }, { "epoch": 3.4564711644422674, "grad_norm": 3.1991748809814453, "learning_rate": 1.2738153394638946e-05, "loss": 0.4201, "step": 21174 }, { "epoch": 3.456634423084772, "grad_norm": 3.426204204559326, "learning_rate": 1.2737536750612713e-05, "loss": 0.5457, "step": 21175 }, { "epoch": 3.4567976817272763, "grad_norm": 3.1436386108398438, "learning_rate": 1.2736920095333502e-05, "loss": 0.461, "step": 21176 }, { "epoch": 3.4569609403697807, "grad_norm": 3.183598518371582, "learning_rate": 1.2736303428803845e-05, "loss": 0.4188, "step": 21177 }, { "epoch": 3.457124199012285, "grad_norm": 3.8237483501434326, "learning_rate": 1.273568675102628e-05, "loss": 0.4951, "step": 21178 }, { "epoch": 3.4572874576547896, "grad_norm": 3.632157802581787, "learning_rate": 1.2735070062003334e-05, "loss": 0.4947, "step": 21179 }, { "epoch": 3.457450716297294, "grad_norm": 2.979438543319702, "learning_rate": 1.2734453361737553e-05, "loss": 0.4612, "step": 21180 }, { "epoch": 3.4576139749397985, "grad_norm": 2.533999443054199, "learning_rate": 1.2733836650231464e-05, "loss": 0.3767, "step": 21181 }, { "epoch": 3.457777233582303, "grad_norm": 3.4984214305877686, "learning_rate": 1.2733219927487605e-05, "loss": 0.5223, "step": 21182 }, { "epoch": 3.4579404922248074, "grad_norm": 2.794973373413086, "learning_rate": 1.2732603193508514e-05, "loss": 0.4232, "step": 21183 }, { "epoch": 3.4581037508673114, "grad_norm": 3.0075340270996094, "learning_rate": 1.2731986448296722e-05, "loss": 0.432, "step": 21184 }, { "epoch": 3.458267009509816, "grad_norm": 3.3257012367248535, "learning_rate": 1.2731369691854763e-05, "loss": 0.5005, "step": 21185 }, { "epoch": 3.4584302681523202, "grad_norm": 2.7581429481506348, "learning_rate": 1.2730752924185174e-05, "loss": 0.3901, "step": 21186 }, { "epoch": 3.4585935267948247, "grad_norm": 3.870152235031128, "learning_rate": 1.2730136145290493e-05, "loss": 0.4925, "step": 21187 }, { "epoch": 3.458756785437329, "grad_norm": 3.2197928428649902, "learning_rate": 1.2729519355173254e-05, "loss": 0.4658, "step": 21188 }, { "epoch": 3.4589200440798336, "grad_norm": 3.6622936725616455, "learning_rate": 1.272890255383599e-05, "loss": 0.4648, "step": 21189 }, { "epoch": 3.459083302722338, "grad_norm": 3.544600009918213, "learning_rate": 1.2728285741281238e-05, "loss": 0.54, "step": 21190 }, { "epoch": 3.4592465613648424, "grad_norm": 3.5465402603149414, "learning_rate": 1.2727668917511532e-05, "loss": 0.4576, "step": 21191 }, { "epoch": 3.4594098200073464, "grad_norm": 3.326798439025879, "learning_rate": 1.2727052082529413e-05, "loss": 0.4648, "step": 21192 }, { "epoch": 3.459573078649851, "grad_norm": 3.530290126800537, "learning_rate": 1.2726435236337413e-05, "loss": 0.5067, "step": 21193 }, { "epoch": 3.4597363372923553, "grad_norm": 3.2102749347686768, "learning_rate": 1.2725818378938063e-05, "loss": 0.5091, "step": 21194 }, { "epoch": 3.4598995959348597, "grad_norm": 3.6551513671875, "learning_rate": 1.2725201510333904e-05, "loss": 0.4861, "step": 21195 }, { "epoch": 3.460062854577364, "grad_norm": 3.154841899871826, "learning_rate": 1.272458463052747e-05, "loss": 0.5055, "step": 21196 }, { "epoch": 3.4602261132198686, "grad_norm": 4.090515613555908, "learning_rate": 1.2723967739521298e-05, "loss": 0.5616, "step": 21197 }, { "epoch": 3.460389371862373, "grad_norm": 3.2967450618743896, "learning_rate": 1.2723350837317927e-05, "loss": 0.4879, "step": 21198 }, { "epoch": 3.4605526305048775, "grad_norm": 2.8212907314300537, "learning_rate": 1.2722733923919886e-05, "loss": 0.3622, "step": 21199 }, { "epoch": 3.460715889147382, "grad_norm": 3.411614418029785, "learning_rate": 1.2722116999329712e-05, "loss": 0.5664, "step": 21200 }, { "epoch": 3.4608791477898864, "grad_norm": 3.799715280532837, "learning_rate": 1.2721500063549945e-05, "loss": 0.5299, "step": 21201 }, { "epoch": 3.4610424064323904, "grad_norm": 3.376995086669922, "learning_rate": 1.2720883116583118e-05, "loss": 0.5417, "step": 21202 }, { "epoch": 3.461205665074895, "grad_norm": 3.0478527545928955, "learning_rate": 1.272026615843177e-05, "loss": 0.4587, "step": 21203 }, { "epoch": 3.4613689237173992, "grad_norm": 3.013564348220825, "learning_rate": 1.2719649189098433e-05, "loss": 0.4717, "step": 21204 }, { "epoch": 3.4615321823599037, "grad_norm": 2.5243120193481445, "learning_rate": 1.2719032208585645e-05, "loss": 0.4114, "step": 21205 }, { "epoch": 3.461695441002408, "grad_norm": 3.582143783569336, "learning_rate": 1.2718415216895943e-05, "loss": 0.486, "step": 21206 }, { "epoch": 3.4618586996449126, "grad_norm": 2.9221413135528564, "learning_rate": 1.2717798214031863e-05, "loss": 0.4841, "step": 21207 }, { "epoch": 3.462021958287417, "grad_norm": 2.578476667404175, "learning_rate": 1.2717181199995938e-05, "loss": 0.3884, "step": 21208 }, { "epoch": 3.462185216929921, "grad_norm": 2.624521493911743, "learning_rate": 1.2716564174790711e-05, "loss": 0.4102, "step": 21209 }, { "epoch": 3.4623484755724254, "grad_norm": 2.644251585006714, "learning_rate": 1.2715947138418712e-05, "loss": 0.4026, "step": 21210 }, { "epoch": 3.46251173421493, "grad_norm": 3.2835028171539307, "learning_rate": 1.271533009088248e-05, "loss": 0.5665, "step": 21211 }, { "epoch": 3.4626749928574343, "grad_norm": 3.2117538452148438, "learning_rate": 1.2714713032184551e-05, "loss": 0.4784, "step": 21212 }, { "epoch": 3.4628382514999387, "grad_norm": 3.099864959716797, "learning_rate": 1.2714095962327464e-05, "loss": 0.4502, "step": 21213 }, { "epoch": 3.463001510142443, "grad_norm": 3.3088603019714355, "learning_rate": 1.2713478881313752e-05, "loss": 0.5265, "step": 21214 }, { "epoch": 3.4631647687849476, "grad_norm": 3.489435911178589, "learning_rate": 1.2712861789145951e-05, "loss": 0.5016, "step": 21215 }, { "epoch": 3.463328027427452, "grad_norm": 3.204941511154175, "learning_rate": 1.2712244685826603e-05, "loss": 0.4984, "step": 21216 }, { "epoch": 3.4634912860699565, "grad_norm": 3.0479822158813477, "learning_rate": 1.2711627571358239e-05, "loss": 0.4536, "step": 21217 }, { "epoch": 3.463654544712461, "grad_norm": 3.77986216545105, "learning_rate": 1.2711010445743397e-05, "loss": 0.5284, "step": 21218 }, { "epoch": 3.463817803354965, "grad_norm": 3.6293857097625732, "learning_rate": 1.2710393308984616e-05, "loss": 0.5376, "step": 21219 }, { "epoch": 3.4639810619974694, "grad_norm": 3.3811264038085938, "learning_rate": 1.2709776161084432e-05, "loss": 0.5127, "step": 21220 }, { "epoch": 3.464144320639974, "grad_norm": 3.2779314517974854, "learning_rate": 1.2709159002045381e-05, "loss": 0.469, "step": 21221 }, { "epoch": 3.4643075792824782, "grad_norm": 2.7716612815856934, "learning_rate": 1.2708541831870003e-05, "loss": 0.3726, "step": 21222 }, { "epoch": 3.4644708379249827, "grad_norm": 2.872624158859253, "learning_rate": 1.2707924650560827e-05, "loss": 0.446, "step": 21223 }, { "epoch": 3.464634096567487, "grad_norm": 3.507735013961792, "learning_rate": 1.2707307458120397e-05, "loss": 0.4976, "step": 21224 }, { "epoch": 3.4647973552099915, "grad_norm": 3.7941009998321533, "learning_rate": 1.270669025455125e-05, "loss": 0.5446, "step": 21225 }, { "epoch": 3.464960613852496, "grad_norm": 3.523477077484131, "learning_rate": 1.270607303985592e-05, "loss": 0.5162, "step": 21226 }, { "epoch": 3.465123872495, "grad_norm": 3.2700207233428955, "learning_rate": 1.2705455814036949e-05, "loss": 0.4959, "step": 21227 }, { "epoch": 3.4652871311375044, "grad_norm": 3.6799161434173584, "learning_rate": 1.2704838577096867e-05, "loss": 0.5075, "step": 21228 }, { "epoch": 3.465450389780009, "grad_norm": 3.5762908458709717, "learning_rate": 1.2704221329038214e-05, "loss": 0.455, "step": 21229 }, { "epoch": 3.4656136484225133, "grad_norm": 2.9922876358032227, "learning_rate": 1.2703604069863529e-05, "loss": 0.5264, "step": 21230 }, { "epoch": 3.4657769070650177, "grad_norm": 3.1196954250335693, "learning_rate": 1.270298679957535e-05, "loss": 0.4429, "step": 21231 }, { "epoch": 3.465940165707522, "grad_norm": 3.533207416534424, "learning_rate": 1.2702369518176215e-05, "loss": 0.5497, "step": 21232 }, { "epoch": 3.4661034243500266, "grad_norm": 3.593752384185791, "learning_rate": 1.2701752225668655e-05, "loss": 0.4911, "step": 21233 }, { "epoch": 3.466266682992531, "grad_norm": 3.0617716312408447, "learning_rate": 1.2701134922055212e-05, "loss": 0.4627, "step": 21234 }, { "epoch": 3.4664299416350355, "grad_norm": 3.715014934539795, "learning_rate": 1.2700517607338425e-05, "loss": 0.5321, "step": 21235 }, { "epoch": 3.46659320027754, "grad_norm": 2.68342924118042, "learning_rate": 1.269990028152083e-05, "loss": 0.4271, "step": 21236 }, { "epoch": 3.466756458920044, "grad_norm": 2.7576205730438232, "learning_rate": 1.2699282944604968e-05, "loss": 0.4102, "step": 21237 }, { "epoch": 3.4669197175625484, "grad_norm": 3.5212061405181885, "learning_rate": 1.2698665596593367e-05, "loss": 0.4757, "step": 21238 }, { "epoch": 3.467082976205053, "grad_norm": 3.2877674102783203, "learning_rate": 1.2698048237488571e-05, "loss": 0.4952, "step": 21239 }, { "epoch": 3.4672462348475572, "grad_norm": 3.5404863357543945, "learning_rate": 1.2697430867293118e-05, "loss": 0.5031, "step": 21240 }, { "epoch": 3.4674094934900617, "grad_norm": 3.2279927730560303, "learning_rate": 1.2696813486009547e-05, "loss": 0.4407, "step": 21241 }, { "epoch": 3.467572752132566, "grad_norm": 2.8273701667785645, "learning_rate": 1.2696196093640397e-05, "loss": 0.3815, "step": 21242 }, { "epoch": 3.4677360107750705, "grad_norm": 3.1752872467041016, "learning_rate": 1.2695578690188198e-05, "loss": 0.5258, "step": 21243 }, { "epoch": 3.467899269417575, "grad_norm": 2.635321617126465, "learning_rate": 1.2694961275655495e-05, "loss": 0.3794, "step": 21244 }, { "epoch": 3.468062528060079, "grad_norm": 3.2904248237609863, "learning_rate": 1.2694343850044821e-05, "loss": 0.4587, "step": 21245 }, { "epoch": 3.4682257867025834, "grad_norm": 3.605027675628662, "learning_rate": 1.2693726413358719e-05, "loss": 0.4489, "step": 21246 }, { "epoch": 3.468389045345088, "grad_norm": 3.382416009902954, "learning_rate": 1.2693108965599725e-05, "loss": 0.467, "step": 21247 }, { "epoch": 3.4685523039875923, "grad_norm": 2.7679336071014404, "learning_rate": 1.2692491506770378e-05, "loss": 0.423, "step": 21248 }, { "epoch": 3.4687155626300967, "grad_norm": 2.8894879817962646, "learning_rate": 1.2691874036873213e-05, "loss": 0.3992, "step": 21249 }, { "epoch": 3.468878821272601, "grad_norm": 2.9085476398468018, "learning_rate": 1.2691256555910769e-05, "loss": 0.4403, "step": 21250 }, { "epoch": 3.4690420799151056, "grad_norm": 2.980088233947754, "learning_rate": 1.2690639063885588e-05, "loss": 0.4727, "step": 21251 }, { "epoch": 3.46920533855761, "grad_norm": 2.9485650062561035, "learning_rate": 1.2690021560800205e-05, "loss": 0.4227, "step": 21252 }, { "epoch": 3.4693685972001145, "grad_norm": 3.6292026042938232, "learning_rate": 1.2689404046657162e-05, "loss": 0.5419, "step": 21253 }, { "epoch": 3.469531855842619, "grad_norm": 3.3859753608703613, "learning_rate": 1.2688786521458991e-05, "loss": 0.4987, "step": 21254 }, { "epoch": 3.469695114485123, "grad_norm": 2.9427528381347656, "learning_rate": 1.2688168985208233e-05, "loss": 0.4127, "step": 21255 }, { "epoch": 3.4698583731276273, "grad_norm": 2.532776355743408, "learning_rate": 1.268755143790743e-05, "loss": 0.4274, "step": 21256 }, { "epoch": 3.470021631770132, "grad_norm": 3.0439062118530273, "learning_rate": 1.2686933879559118e-05, "loss": 0.4196, "step": 21257 }, { "epoch": 3.4701848904126362, "grad_norm": 3.092546224594116, "learning_rate": 1.2686316310165831e-05, "loss": 0.4583, "step": 21258 }, { "epoch": 3.4703481490551407, "grad_norm": 2.9440691471099854, "learning_rate": 1.2685698729730116e-05, "loss": 0.4181, "step": 21259 }, { "epoch": 3.470511407697645, "grad_norm": 2.913585901260376, "learning_rate": 1.2685081138254506e-05, "loss": 0.4283, "step": 21260 }, { "epoch": 3.4706746663401495, "grad_norm": 3.502570629119873, "learning_rate": 1.2684463535741545e-05, "loss": 0.5487, "step": 21261 }, { "epoch": 3.4708379249826535, "grad_norm": 3.5698180198669434, "learning_rate": 1.2683845922193765e-05, "loss": 0.4827, "step": 21262 }, { "epoch": 3.471001183625158, "grad_norm": 3.395501136779785, "learning_rate": 1.2683228297613707e-05, "loss": 0.4347, "step": 21263 }, { "epoch": 3.4711644422676624, "grad_norm": 3.7428669929504395, "learning_rate": 1.2682610662003911e-05, "loss": 0.5205, "step": 21264 }, { "epoch": 3.471327700910167, "grad_norm": 3.722661256790161, "learning_rate": 1.2681993015366918e-05, "loss": 0.5422, "step": 21265 }, { "epoch": 3.4714909595526713, "grad_norm": 3.1610403060913086, "learning_rate": 1.2681375357705263e-05, "loss": 0.4793, "step": 21266 }, { "epoch": 3.4716542181951757, "grad_norm": 3.54518985748291, "learning_rate": 1.2680757689021483e-05, "loss": 0.4928, "step": 21267 }, { "epoch": 3.47181747683768, "grad_norm": 3.506079912185669, "learning_rate": 1.2680140009318125e-05, "loss": 0.4406, "step": 21268 }, { "epoch": 3.4719807354801846, "grad_norm": 3.9201338291168213, "learning_rate": 1.267952231859772e-05, "loss": 0.5648, "step": 21269 }, { "epoch": 3.472143994122689, "grad_norm": 3.731461524963379, "learning_rate": 1.2678904616862812e-05, "loss": 0.5428, "step": 21270 }, { "epoch": 3.4723072527651935, "grad_norm": 3.047109365463257, "learning_rate": 1.2678286904115945e-05, "loss": 0.4633, "step": 21271 }, { "epoch": 3.4724705114076975, "grad_norm": 2.901557445526123, "learning_rate": 1.2677669180359644e-05, "loss": 0.4461, "step": 21272 }, { "epoch": 3.472633770050202, "grad_norm": 3.54577898979187, "learning_rate": 1.2677051445596456e-05, "loss": 0.5318, "step": 21273 }, { "epoch": 3.4727970286927063, "grad_norm": 2.8481526374816895, "learning_rate": 1.267643369982892e-05, "loss": 0.4122, "step": 21274 }, { "epoch": 3.472960287335211, "grad_norm": 3.247173309326172, "learning_rate": 1.267581594305958e-05, "loss": 0.4583, "step": 21275 }, { "epoch": 3.473123545977715, "grad_norm": 3.6564369201660156, "learning_rate": 1.2675198175290971e-05, "loss": 0.5164, "step": 21276 }, { "epoch": 3.4732868046202197, "grad_norm": 3.187487840652466, "learning_rate": 1.2674580396525629e-05, "loss": 0.4483, "step": 21277 }, { "epoch": 3.473450063262724, "grad_norm": 3.151691436767578, "learning_rate": 1.26739626067661e-05, "loss": 0.5374, "step": 21278 }, { "epoch": 3.4736133219052285, "grad_norm": 2.917950391769409, "learning_rate": 1.2673344806014916e-05, "loss": 0.4281, "step": 21279 }, { "epoch": 3.4737765805477325, "grad_norm": 2.8989620208740234, "learning_rate": 1.2672726994274622e-05, "loss": 0.453, "step": 21280 }, { "epoch": 3.473939839190237, "grad_norm": 3.3101489543914795, "learning_rate": 1.2672109171547761e-05, "loss": 0.6234, "step": 21281 }, { "epoch": 3.4741030978327414, "grad_norm": 3.3145694732666016, "learning_rate": 1.2671491337836866e-05, "loss": 0.4962, "step": 21282 }, { "epoch": 3.474266356475246, "grad_norm": 4.215211868286133, "learning_rate": 1.2670873493144478e-05, "loss": 0.4635, "step": 21283 }, { "epoch": 3.4744296151177503, "grad_norm": 3.1391241550445557, "learning_rate": 1.2670255637473135e-05, "loss": 0.4279, "step": 21284 }, { "epoch": 3.4745928737602547, "grad_norm": 3.06312894821167, "learning_rate": 1.266963777082538e-05, "loss": 0.4604, "step": 21285 }, { "epoch": 3.474756132402759, "grad_norm": 3.2042832374572754, "learning_rate": 1.2669019893203758e-05, "loss": 0.4689, "step": 21286 }, { "epoch": 3.4749193910452636, "grad_norm": 3.2925612926483154, "learning_rate": 1.26684020046108e-05, "loss": 0.4875, "step": 21287 }, { "epoch": 3.475082649687768, "grad_norm": 3.092247247695923, "learning_rate": 1.2667784105049049e-05, "loss": 0.4605, "step": 21288 }, { "epoch": 3.4752459083302725, "grad_norm": 3.0720372200012207, "learning_rate": 1.2667166194521042e-05, "loss": 0.4702, "step": 21289 }, { "epoch": 3.4754091669727765, "grad_norm": 3.1532511711120605, "learning_rate": 1.2666548273029322e-05, "loss": 0.4435, "step": 21290 }, { "epoch": 3.475572425615281, "grad_norm": 3.2816390991210938, "learning_rate": 1.2665930340576433e-05, "loss": 0.5589, "step": 21291 }, { "epoch": 3.4757356842577853, "grad_norm": 3.2010550498962402, "learning_rate": 1.2665312397164908e-05, "loss": 0.4377, "step": 21292 }, { "epoch": 3.47589894290029, "grad_norm": 3.5687735080718994, "learning_rate": 1.2664694442797291e-05, "loss": 0.4826, "step": 21293 }, { "epoch": 3.476062201542794, "grad_norm": 2.842538833618164, "learning_rate": 1.2664076477476122e-05, "loss": 0.4146, "step": 21294 }, { "epoch": 3.4762254601852987, "grad_norm": 3.0899546146392822, "learning_rate": 1.266345850120394e-05, "loss": 0.4276, "step": 21295 }, { "epoch": 3.476388718827803, "grad_norm": 2.891603946685791, "learning_rate": 1.2662840513983285e-05, "loss": 0.4155, "step": 21296 }, { "epoch": 3.476551977470307, "grad_norm": 2.7066874504089355, "learning_rate": 1.2662222515816697e-05, "loss": 0.4291, "step": 21297 }, { "epoch": 3.4767152361128115, "grad_norm": 2.5850884914398193, "learning_rate": 1.2661604506706722e-05, "loss": 0.4033, "step": 21298 }, { "epoch": 3.476878494755316, "grad_norm": 3.465864419937134, "learning_rate": 1.2660986486655893e-05, "loss": 0.4455, "step": 21299 }, { "epoch": 3.4770417533978204, "grad_norm": 3.9362921714782715, "learning_rate": 1.2660368455666752e-05, "loss": 0.5858, "step": 21300 }, { "epoch": 3.477205012040325, "grad_norm": 3.0104308128356934, "learning_rate": 1.2659750413741842e-05, "loss": 0.415, "step": 21301 }, { "epoch": 3.4773682706828293, "grad_norm": 3.622643232345581, "learning_rate": 1.2659132360883703e-05, "loss": 0.5675, "step": 21302 }, { "epoch": 3.4775315293253337, "grad_norm": 2.78812837600708, "learning_rate": 1.2658514297094874e-05, "loss": 0.4109, "step": 21303 }, { "epoch": 3.477694787967838, "grad_norm": 2.9617550373077393, "learning_rate": 1.2657896222377897e-05, "loss": 0.4017, "step": 21304 }, { "epoch": 3.4778580466103426, "grad_norm": 3.269412040710449, "learning_rate": 1.2657278136735315e-05, "loss": 0.4437, "step": 21305 }, { "epoch": 3.478021305252847, "grad_norm": 3.110900402069092, "learning_rate": 1.2656660040169661e-05, "loss": 0.4563, "step": 21306 }, { "epoch": 3.478184563895351, "grad_norm": 2.3226001262664795, "learning_rate": 1.2656041932683483e-05, "loss": 0.3674, "step": 21307 }, { "epoch": 3.4783478225378555, "grad_norm": 3.0255465507507324, "learning_rate": 1.265542381427932e-05, "loss": 0.428, "step": 21308 }, { "epoch": 3.47851108118036, "grad_norm": 3.0481579303741455, "learning_rate": 1.2654805684959713e-05, "loss": 0.421, "step": 21309 }, { "epoch": 3.4786743398228643, "grad_norm": 3.1660053730010986, "learning_rate": 1.2654187544727203e-05, "loss": 0.4798, "step": 21310 }, { "epoch": 3.4788375984653688, "grad_norm": 3.464855909347534, "learning_rate": 1.2653569393584327e-05, "loss": 0.514, "step": 21311 }, { "epoch": 3.479000857107873, "grad_norm": 2.8273868560791016, "learning_rate": 1.2652951231533627e-05, "loss": 0.4043, "step": 21312 }, { "epoch": 3.4791641157503777, "grad_norm": 3.1145529747009277, "learning_rate": 1.2652333058577652e-05, "loss": 0.3958, "step": 21313 }, { "epoch": 3.479327374392882, "grad_norm": 3.2814886569976807, "learning_rate": 1.2651714874718935e-05, "loss": 0.4454, "step": 21314 }, { "epoch": 3.479490633035386, "grad_norm": 3.409170389175415, "learning_rate": 1.265109667996002e-05, "loss": 0.4824, "step": 21315 }, { "epoch": 3.4796538916778905, "grad_norm": 2.796891927719116, "learning_rate": 1.2650478474303448e-05, "loss": 0.4049, "step": 21316 }, { "epoch": 3.479817150320395, "grad_norm": 3.4991261959075928, "learning_rate": 1.2649860257751758e-05, "loss": 0.5157, "step": 21317 }, { "epoch": 3.4799804089628994, "grad_norm": 3.3220276832580566, "learning_rate": 1.2649242030307493e-05, "loss": 0.5337, "step": 21318 }, { "epoch": 3.480143667605404, "grad_norm": 3.578469753265381, "learning_rate": 1.2648623791973196e-05, "loss": 0.457, "step": 21319 }, { "epoch": 3.4803069262479083, "grad_norm": 3.335310697555542, "learning_rate": 1.2648005542751408e-05, "loss": 0.5316, "step": 21320 }, { "epoch": 3.4804701848904127, "grad_norm": 4.2400031089782715, "learning_rate": 1.2647387282644665e-05, "loss": 1.1331, "step": 21321 }, { "epoch": 3.480633443532917, "grad_norm": 3.863429307937622, "learning_rate": 1.2646769011655515e-05, "loss": 0.4983, "step": 21322 }, { "epoch": 3.4807967021754216, "grad_norm": 2.968869924545288, "learning_rate": 1.2646150729786495e-05, "loss": 0.4259, "step": 21323 }, { "epoch": 3.480959960817926, "grad_norm": 3.503908395767212, "learning_rate": 1.2645532437040151e-05, "loss": 0.4628, "step": 21324 }, { "epoch": 3.48112321946043, "grad_norm": 3.9550857543945312, "learning_rate": 1.2644914133419022e-05, "loss": 0.5125, "step": 21325 }, { "epoch": 3.4812864781029345, "grad_norm": 4.055231094360352, "learning_rate": 1.264429581892565e-05, "loss": 0.5166, "step": 21326 }, { "epoch": 3.481449736745439, "grad_norm": 3.136312484741211, "learning_rate": 1.2643677493562574e-05, "loss": 0.4203, "step": 21327 }, { "epoch": 3.4816129953879433, "grad_norm": 3.763162136077881, "learning_rate": 1.2643059157332339e-05, "loss": 0.4983, "step": 21328 }, { "epoch": 3.4817762540304478, "grad_norm": 3.8288605213165283, "learning_rate": 1.2642440810237484e-05, "loss": 0.4974, "step": 21329 }, { "epoch": 3.481939512672952, "grad_norm": 3.972090721130371, "learning_rate": 1.2641822452280557e-05, "loss": 0.4894, "step": 21330 }, { "epoch": 3.4821027713154566, "grad_norm": 3.379884958267212, "learning_rate": 1.2641204083464093e-05, "loss": 0.5044, "step": 21331 }, { "epoch": 3.482266029957961, "grad_norm": 3.089399576187134, "learning_rate": 1.2640585703790638e-05, "loss": 0.4959, "step": 21332 }, { "epoch": 3.482429288600465, "grad_norm": 3.189567804336548, "learning_rate": 1.263996731326273e-05, "loss": 0.4686, "step": 21333 }, { "epoch": 3.4825925472429695, "grad_norm": 3.2049670219421387, "learning_rate": 1.2639348911882912e-05, "loss": 0.4904, "step": 21334 }, { "epoch": 3.482755805885474, "grad_norm": 3.9438681602478027, "learning_rate": 1.2638730499653731e-05, "loss": 0.5804, "step": 21335 }, { "epoch": 3.4829190645279784, "grad_norm": 3.2576980590820312, "learning_rate": 1.2638112076577724e-05, "loss": 0.4967, "step": 21336 }, { "epoch": 3.483082323170483, "grad_norm": 2.8053674697875977, "learning_rate": 1.2637493642657432e-05, "loss": 0.4005, "step": 21337 }, { "epoch": 3.4832455818129873, "grad_norm": 3.2726738452911377, "learning_rate": 1.2636875197895402e-05, "loss": 0.5178, "step": 21338 }, { "epoch": 3.4834088404554917, "grad_norm": 3.078415632247925, "learning_rate": 1.2636256742294173e-05, "loss": 0.4742, "step": 21339 }, { "epoch": 3.483572099097996, "grad_norm": 3.5634326934814453, "learning_rate": 1.2635638275856288e-05, "loss": 0.5551, "step": 21340 }, { "epoch": 3.4837353577405006, "grad_norm": 2.9181113243103027, "learning_rate": 1.263501979858429e-05, "loss": 0.4197, "step": 21341 }, { "epoch": 3.483898616383005, "grad_norm": 3.1018691062927246, "learning_rate": 1.263440131048072e-05, "loss": 0.4263, "step": 21342 }, { "epoch": 3.484061875025509, "grad_norm": 3.138471841812134, "learning_rate": 1.2633782811548121e-05, "loss": 0.5257, "step": 21343 }, { "epoch": 3.4842251336680135, "grad_norm": 3.592661142349243, "learning_rate": 1.2633164301789034e-05, "loss": 0.4273, "step": 21344 }, { "epoch": 3.484388392310518, "grad_norm": 2.9545533657073975, "learning_rate": 1.2632545781206005e-05, "loss": 0.4356, "step": 21345 }, { "epoch": 3.4845516509530223, "grad_norm": 3.5768606662750244, "learning_rate": 1.2631927249801572e-05, "loss": 0.5099, "step": 21346 }, { "epoch": 3.4847149095955268, "grad_norm": 3.4189867973327637, "learning_rate": 1.2631308707578282e-05, "loss": 0.5312, "step": 21347 }, { "epoch": 3.484878168238031, "grad_norm": 3.1787827014923096, "learning_rate": 1.2630690154538675e-05, "loss": 0.4979, "step": 21348 }, { "epoch": 3.4850414268805356, "grad_norm": 3.757570743560791, "learning_rate": 1.2630071590685295e-05, "loss": 0.5695, "step": 21349 }, { "epoch": 3.4852046855230396, "grad_norm": 3.253499984741211, "learning_rate": 1.2629453016020681e-05, "loss": 0.3934, "step": 21350 }, { "epoch": 3.485367944165544, "grad_norm": 3.597241163253784, "learning_rate": 1.2628834430547379e-05, "loss": 0.545, "step": 21351 }, { "epoch": 3.4855312028080485, "grad_norm": 3.3268322944641113, "learning_rate": 1.2628215834267934e-05, "loss": 0.4587, "step": 21352 }, { "epoch": 3.485694461450553, "grad_norm": 3.3833253383636475, "learning_rate": 1.2627597227184883e-05, "loss": 0.4748, "step": 21353 }, { "epoch": 3.4858577200930574, "grad_norm": 3.4891228675842285, "learning_rate": 1.2626978609300777e-05, "loss": 0.5105, "step": 21354 }, { "epoch": 3.486020978735562, "grad_norm": 3.0792760848999023, "learning_rate": 1.2626359980618145e-05, "loss": 0.447, "step": 21355 }, { "epoch": 3.4861842373780663, "grad_norm": 2.9957435131073, "learning_rate": 1.2625741341139545e-05, "loss": 0.4387, "step": 21356 }, { "epoch": 3.4863474960205707, "grad_norm": 3.3536617755889893, "learning_rate": 1.2625122690867513e-05, "loss": 0.5721, "step": 21357 }, { "epoch": 3.486510754663075, "grad_norm": 3.323383092880249, "learning_rate": 1.2624504029804594e-05, "loss": 0.4973, "step": 21358 }, { "epoch": 3.4866740133055796, "grad_norm": 2.6375951766967773, "learning_rate": 1.2623885357953332e-05, "loss": 0.3816, "step": 21359 }, { "epoch": 3.4868372719480836, "grad_norm": 2.8953967094421387, "learning_rate": 1.2623266675316264e-05, "loss": 0.4576, "step": 21360 }, { "epoch": 3.487000530590588, "grad_norm": 3.3116445541381836, "learning_rate": 1.2622647981895934e-05, "loss": 0.4549, "step": 21361 }, { "epoch": 3.4871637892330924, "grad_norm": 3.119253158569336, "learning_rate": 1.2622029277694895e-05, "loss": 0.423, "step": 21362 }, { "epoch": 3.487327047875597, "grad_norm": 2.6621615886688232, "learning_rate": 1.262141056271568e-05, "loss": 0.414, "step": 21363 }, { "epoch": 3.4874903065181013, "grad_norm": 3.2918591499328613, "learning_rate": 1.262079183696084e-05, "loss": 0.5582, "step": 21364 }, { "epoch": 3.4876535651606058, "grad_norm": 2.682136058807373, "learning_rate": 1.2620173100432911e-05, "loss": 0.4185, "step": 21365 }, { "epoch": 3.48781682380311, "grad_norm": 3.289194345474243, "learning_rate": 1.261955435313444e-05, "loss": 0.4309, "step": 21366 }, { "epoch": 3.4879800824456146, "grad_norm": 2.74937105178833, "learning_rate": 1.2618935595067967e-05, "loss": 0.437, "step": 21367 }, { "epoch": 3.4881433410881186, "grad_norm": 2.9967541694641113, "learning_rate": 1.2618316826236043e-05, "loss": 0.4375, "step": 21368 }, { "epoch": 3.488306599730623, "grad_norm": 2.8064937591552734, "learning_rate": 1.261769804664121e-05, "loss": 0.4082, "step": 21369 }, { "epoch": 3.4884698583731275, "grad_norm": 3.1447572708129883, "learning_rate": 1.2617079256286002e-05, "loss": 0.4819, "step": 21370 }, { "epoch": 3.488633117015632, "grad_norm": 2.693549871444702, "learning_rate": 1.2616460455172974e-05, "loss": 0.43, "step": 21371 }, { "epoch": 3.4887963756581364, "grad_norm": 3.162313938140869, "learning_rate": 1.261584164330466e-05, "loss": 0.4681, "step": 21372 }, { "epoch": 3.488959634300641, "grad_norm": 3.6365411281585693, "learning_rate": 1.2615222820683613e-05, "loss": 0.5279, "step": 21373 }, { "epoch": 3.4891228929431453, "grad_norm": 3.3925764560699463, "learning_rate": 1.2614603987312372e-05, "loss": 0.4853, "step": 21374 }, { "epoch": 3.4892861515856497, "grad_norm": 3.8233203887939453, "learning_rate": 1.2613985143193483e-05, "loss": 0.572, "step": 21375 }, { "epoch": 3.489449410228154, "grad_norm": 3.361539840698242, "learning_rate": 1.2613366288329486e-05, "loss": 0.464, "step": 21376 }, { "epoch": 3.4896126688706586, "grad_norm": 2.623788356781006, "learning_rate": 1.2612747422722926e-05, "loss": 0.4029, "step": 21377 }, { "epoch": 3.4897759275131626, "grad_norm": 2.9078221321105957, "learning_rate": 1.2612128546376345e-05, "loss": 0.426, "step": 21378 }, { "epoch": 3.489939186155667, "grad_norm": 3.181316614151001, "learning_rate": 1.2611509659292298e-05, "loss": 0.4883, "step": 21379 }, { "epoch": 3.4901024447981714, "grad_norm": 3.5087714195251465, "learning_rate": 1.2610890761473315e-05, "loss": 0.516, "step": 21380 }, { "epoch": 3.490265703440676, "grad_norm": 3.61879825592041, "learning_rate": 1.261027185292195e-05, "loss": 0.4356, "step": 21381 }, { "epoch": 3.4904289620831803, "grad_norm": 3.714432954788208, "learning_rate": 1.2609652933640739e-05, "loss": 0.5357, "step": 21382 }, { "epoch": 3.4905922207256848, "grad_norm": 3.566540002822876, "learning_rate": 1.2609034003632232e-05, "loss": 0.5346, "step": 21383 }, { "epoch": 3.490755479368189, "grad_norm": 3.0618927478790283, "learning_rate": 1.2608415062898971e-05, "loss": 0.4498, "step": 21384 }, { "epoch": 3.4909187380106936, "grad_norm": 3.9024322032928467, "learning_rate": 1.26077961114435e-05, "loss": 0.4848, "step": 21385 }, { "epoch": 3.4910819966531976, "grad_norm": 3.712388038635254, "learning_rate": 1.2607177149268366e-05, "loss": 0.4752, "step": 21386 }, { "epoch": 3.491245255295702, "grad_norm": 3.8496477603912354, "learning_rate": 1.260655817637611e-05, "loss": 0.4466, "step": 21387 }, { "epoch": 3.4914085139382065, "grad_norm": 3.6972880363464355, "learning_rate": 1.2605939192769277e-05, "loss": 0.5504, "step": 21388 }, { "epoch": 3.491571772580711, "grad_norm": 3.638038158416748, "learning_rate": 1.2605320198450413e-05, "loss": 0.5581, "step": 21389 }, { "epoch": 3.4917350312232154, "grad_norm": 2.919705629348755, "learning_rate": 1.260470119342206e-05, "loss": 0.3813, "step": 21390 }, { "epoch": 3.49189828986572, "grad_norm": 3.1497082710266113, "learning_rate": 1.2604082177686764e-05, "loss": 0.4837, "step": 21391 }, { "epoch": 3.4920615485082243, "grad_norm": 3.0550782680511475, "learning_rate": 1.2603463151247071e-05, "loss": 0.4157, "step": 21392 }, { "epoch": 3.4922248071507287, "grad_norm": 3.2829129695892334, "learning_rate": 1.2602844114105522e-05, "loss": 0.457, "step": 21393 }, { "epoch": 3.492388065793233, "grad_norm": 3.274057149887085, "learning_rate": 1.2602225066264664e-05, "loss": 0.5086, "step": 21394 }, { "epoch": 3.492551324435737, "grad_norm": 2.5275611877441406, "learning_rate": 1.2601606007727042e-05, "loss": 0.3853, "step": 21395 }, { "epoch": 3.4927145830782416, "grad_norm": 4.182514667510986, "learning_rate": 1.2600986938495201e-05, "loss": 0.4897, "step": 21396 }, { "epoch": 3.492877841720746, "grad_norm": 3.2990448474884033, "learning_rate": 1.2600367858571683e-05, "loss": 0.4933, "step": 21397 }, { "epoch": 3.4930411003632504, "grad_norm": 2.9822723865509033, "learning_rate": 1.2599748767959039e-05, "loss": 0.4143, "step": 21398 }, { "epoch": 3.493204359005755, "grad_norm": 3.5594899654388428, "learning_rate": 1.2599129666659804e-05, "loss": 0.4738, "step": 21399 }, { "epoch": 3.4933676176482593, "grad_norm": 4.491088390350342, "learning_rate": 1.259851055467653e-05, "loss": 0.5958, "step": 21400 }, { "epoch": 3.4935308762907638, "grad_norm": 2.5587971210479736, "learning_rate": 1.2597891432011759e-05, "loss": 0.4045, "step": 21401 }, { "epoch": 3.493694134933268, "grad_norm": 3.926321268081665, "learning_rate": 1.2597272298668041e-05, "loss": 0.6397, "step": 21402 }, { "epoch": 3.493857393575772, "grad_norm": 3.2134902477264404, "learning_rate": 1.2596653154647917e-05, "loss": 0.4221, "step": 21403 }, { "epoch": 3.4940206522182766, "grad_norm": 3.0653493404388428, "learning_rate": 1.2596033999953932e-05, "loss": 0.4382, "step": 21404 }, { "epoch": 3.494183910860781, "grad_norm": 2.7702548503875732, "learning_rate": 1.2595414834588627e-05, "loss": 0.4142, "step": 21405 }, { "epoch": 3.4943471695032855, "grad_norm": 2.642580032348633, "learning_rate": 1.2594795658554554e-05, "loss": 0.3768, "step": 21406 }, { "epoch": 3.49451042814579, "grad_norm": 2.3788435459136963, "learning_rate": 1.2594176471854255e-05, "loss": 0.3639, "step": 21407 }, { "epoch": 3.4946736867882944, "grad_norm": 2.90773868560791, "learning_rate": 1.259355727449028e-05, "loss": 0.437, "step": 21408 }, { "epoch": 3.494836945430799, "grad_norm": 3.278449296951294, "learning_rate": 1.2592938066465171e-05, "loss": 0.4919, "step": 21409 }, { "epoch": 3.4950002040733033, "grad_norm": 2.611478328704834, "learning_rate": 1.259231884778147e-05, "loss": 0.4191, "step": 21410 }, { "epoch": 3.4951634627158077, "grad_norm": 3.0796289443969727, "learning_rate": 1.259169961844172e-05, "loss": 0.4826, "step": 21411 }, { "epoch": 3.495326721358312, "grad_norm": 3.2200546264648438, "learning_rate": 1.2591080378448477e-05, "loss": 0.4934, "step": 21412 }, { "epoch": 3.495489980000816, "grad_norm": 3.7690680027008057, "learning_rate": 1.2590461127804282e-05, "loss": 0.5377, "step": 21413 }, { "epoch": 3.4956532386433206, "grad_norm": 3.1315808296203613, "learning_rate": 1.258984186651168e-05, "loss": 0.3848, "step": 21414 }, { "epoch": 3.495816497285825, "grad_norm": 2.492267608642578, "learning_rate": 1.2589222594573212e-05, "loss": 0.3865, "step": 21415 }, { "epoch": 3.4959797559283294, "grad_norm": 3.053415536880493, "learning_rate": 1.2588603311991425e-05, "loss": 0.4742, "step": 21416 }, { "epoch": 3.496143014570834, "grad_norm": 3.2686963081359863, "learning_rate": 1.2587984018768873e-05, "loss": 0.4652, "step": 21417 }, { "epoch": 3.4963062732133383, "grad_norm": 3.1610376834869385, "learning_rate": 1.2587364714908095e-05, "loss": 0.5065, "step": 21418 }, { "epoch": 3.4964695318558427, "grad_norm": 3.1635422706604004, "learning_rate": 1.2586745400411638e-05, "loss": 0.4287, "step": 21419 }, { "epoch": 3.496632790498347, "grad_norm": 3.232013463973999, "learning_rate": 1.2586126075282046e-05, "loss": 0.494, "step": 21420 }, { "epoch": 3.496796049140851, "grad_norm": 3.312969923019409, "learning_rate": 1.2585506739521867e-05, "loss": 0.4562, "step": 21421 }, { "epoch": 3.4969593077833556, "grad_norm": 2.8653812408447266, "learning_rate": 1.2584887393133642e-05, "loss": 0.3953, "step": 21422 }, { "epoch": 3.49712256642586, "grad_norm": 3.6603269577026367, "learning_rate": 1.2584268036119926e-05, "loss": 0.4866, "step": 21423 }, { "epoch": 3.4972858250683645, "grad_norm": 3.5535569190979004, "learning_rate": 1.258364866848326e-05, "loss": 0.4527, "step": 21424 }, { "epoch": 3.497449083710869, "grad_norm": 4.185236930847168, "learning_rate": 1.2583029290226189e-05, "loss": 0.6288, "step": 21425 }, { "epoch": 3.4976123423533734, "grad_norm": 3.4584696292877197, "learning_rate": 1.2582409901351257e-05, "loss": 0.565, "step": 21426 }, { "epoch": 3.497775600995878, "grad_norm": 3.009779453277588, "learning_rate": 1.2581790501861017e-05, "loss": 0.4834, "step": 21427 }, { "epoch": 3.4979388596383822, "grad_norm": 3.5094566345214844, "learning_rate": 1.2581171091758008e-05, "loss": 0.5269, "step": 21428 }, { "epoch": 3.4981021182808867, "grad_norm": 3.049846649169922, "learning_rate": 1.258055167104478e-05, "loss": 0.4176, "step": 21429 }, { "epoch": 3.498265376923391, "grad_norm": 3.0011887550354004, "learning_rate": 1.2579932239723882e-05, "loss": 0.4425, "step": 21430 }, { "epoch": 3.498428635565895, "grad_norm": 3.2111551761627197, "learning_rate": 1.2579312797797854e-05, "loss": 0.4689, "step": 21431 }, { "epoch": 3.4985918942083996, "grad_norm": 3.3233344554901123, "learning_rate": 1.2578693345269243e-05, "loss": 0.5197, "step": 21432 }, { "epoch": 3.498755152850904, "grad_norm": 3.4571914672851562, "learning_rate": 1.25780738821406e-05, "loss": 0.4358, "step": 21433 }, { "epoch": 3.4989184114934084, "grad_norm": 3.1552064418792725, "learning_rate": 1.257745440841447e-05, "loss": 0.495, "step": 21434 }, { "epoch": 3.499081670135913, "grad_norm": 3.5263423919677734, "learning_rate": 1.2576834924093394e-05, "loss": 0.5435, "step": 21435 }, { "epoch": 3.4992449287784173, "grad_norm": 3.256430149078369, "learning_rate": 1.2576215429179925e-05, "loss": 0.4582, "step": 21436 }, { "epoch": 3.4994081874209217, "grad_norm": 3.5062601566314697, "learning_rate": 1.2575595923676607e-05, "loss": 0.5593, "step": 21437 }, { "epoch": 3.4995714460634257, "grad_norm": 3.8124051094055176, "learning_rate": 1.2574976407585986e-05, "loss": 0.5656, "step": 21438 }, { "epoch": 3.49973470470593, "grad_norm": 3.653456211090088, "learning_rate": 1.257435688091061e-05, "loss": 0.4909, "step": 21439 }, { "epoch": 3.4998979633484346, "grad_norm": 3.0928139686584473, "learning_rate": 1.2573737343653026e-05, "loss": 0.4556, "step": 21440 }, { "epoch": 3.500061221990939, "grad_norm": 3.2882981300354004, "learning_rate": 1.2573117795815777e-05, "loss": 0.5554, "step": 21441 }, { "epoch": 3.5002244806334435, "grad_norm": 2.906097888946533, "learning_rate": 1.2572498237401415e-05, "loss": 0.4652, "step": 21442 }, { "epoch": 3.500387739275948, "grad_norm": 3.2744216918945312, "learning_rate": 1.2571878668412483e-05, "loss": 0.4859, "step": 21443 }, { "epoch": 3.5005509979184524, "grad_norm": 2.604064464569092, "learning_rate": 1.2571259088851529e-05, "loss": 0.3859, "step": 21444 }, { "epoch": 3.500714256560957, "grad_norm": 2.8702101707458496, "learning_rate": 1.2570639498721098e-05, "loss": 0.4381, "step": 21445 }, { "epoch": 3.5008775152034612, "grad_norm": 2.929079294204712, "learning_rate": 1.257001989802374e-05, "loss": 0.3835, "step": 21446 }, { "epoch": 3.5010407738459657, "grad_norm": 2.7173678874969482, "learning_rate": 1.2569400286762003e-05, "loss": 0.433, "step": 21447 }, { "epoch": 3.50120403248847, "grad_norm": 3.377427816390991, "learning_rate": 1.2568780664938429e-05, "loss": 0.4922, "step": 21448 }, { "epoch": 3.501367291130974, "grad_norm": 3.0079002380371094, "learning_rate": 1.2568161032555567e-05, "loss": 0.3835, "step": 21449 }, { "epoch": 3.5015305497734786, "grad_norm": 3.2526793479919434, "learning_rate": 1.2567541389615965e-05, "loss": 0.4613, "step": 21450 }, { "epoch": 3.501693808415983, "grad_norm": 3.055581569671631, "learning_rate": 1.2566921736122171e-05, "loss": 0.4698, "step": 21451 }, { "epoch": 3.5018570670584874, "grad_norm": 3.0794906616210938, "learning_rate": 1.2566302072076733e-05, "loss": 0.4326, "step": 21452 }, { "epoch": 3.502020325700992, "grad_norm": 2.8306760787963867, "learning_rate": 1.2565682397482192e-05, "loss": 0.3836, "step": 21453 }, { "epoch": 3.5021835843434963, "grad_norm": 3.5100154876708984, "learning_rate": 1.2565062712341102e-05, "loss": 0.5002, "step": 21454 }, { "epoch": 3.5023468429860003, "grad_norm": 3.445498466491699, "learning_rate": 1.2564443016656006e-05, "loss": 0.5803, "step": 21455 }, { "epoch": 3.5025101016285047, "grad_norm": 2.7957115173339844, "learning_rate": 1.2563823310429455e-05, "loss": 0.3968, "step": 21456 }, { "epoch": 3.502673360271009, "grad_norm": 2.767329216003418, "learning_rate": 1.2563203593663995e-05, "loss": 0.5137, "step": 21457 }, { "epoch": 3.5028366189135136, "grad_norm": 2.7008543014526367, "learning_rate": 1.256258386636217e-05, "loss": 0.3829, "step": 21458 }, { "epoch": 3.502999877556018, "grad_norm": 3.476593017578125, "learning_rate": 1.2561964128526532e-05, "loss": 0.4751, "step": 21459 }, { "epoch": 3.5031631361985225, "grad_norm": 2.7798826694488525, "learning_rate": 1.2561344380159628e-05, "loss": 0.3855, "step": 21460 }, { "epoch": 3.503326394841027, "grad_norm": 2.9245517253875732, "learning_rate": 1.2560724621264e-05, "loss": 0.4287, "step": 21461 }, { "epoch": 3.5034896534835314, "grad_norm": 3.2833199501037598, "learning_rate": 1.2560104851842205e-05, "loss": 0.4615, "step": 21462 }, { "epoch": 3.503652912126036, "grad_norm": 2.893476963043213, "learning_rate": 1.2559485071896785e-05, "loss": 0.4586, "step": 21463 }, { "epoch": 3.5038161707685402, "grad_norm": 3.4734532833099365, "learning_rate": 1.2558865281430285e-05, "loss": 0.5363, "step": 21464 }, { "epoch": 3.5039794294110447, "grad_norm": 3.7251250743865967, "learning_rate": 1.2558245480445259e-05, "loss": 0.4493, "step": 21465 }, { "epoch": 3.5041426880535487, "grad_norm": 3.6513707637786865, "learning_rate": 1.255762566894425e-05, "loss": 0.4668, "step": 21466 }, { "epoch": 3.504305946696053, "grad_norm": 3.40351939201355, "learning_rate": 1.2557005846929808e-05, "loss": 0.4496, "step": 21467 }, { "epoch": 3.5044692053385575, "grad_norm": 3.384781837463379, "learning_rate": 1.2556386014404482e-05, "loss": 0.528, "step": 21468 }, { "epoch": 3.504632463981062, "grad_norm": 2.8542139530181885, "learning_rate": 1.2555766171370818e-05, "loss": 0.4657, "step": 21469 }, { "epoch": 3.5047957226235664, "grad_norm": 3.1567330360412598, "learning_rate": 1.2555146317831363e-05, "loss": 0.437, "step": 21470 }, { "epoch": 3.504958981266071, "grad_norm": 3.0418903827667236, "learning_rate": 1.2554526453788666e-05, "loss": 0.4003, "step": 21471 }, { "epoch": 3.5051222399085753, "grad_norm": 3.491455316543579, "learning_rate": 1.2553906579245277e-05, "loss": 0.4859, "step": 21472 }, { "epoch": 3.5052854985510793, "grad_norm": 2.9923946857452393, "learning_rate": 1.2553286694203742e-05, "loss": 0.45, "step": 21473 }, { "epoch": 3.5054487571935837, "grad_norm": 2.950181722640991, "learning_rate": 1.2552666798666608e-05, "loss": 0.4741, "step": 21474 }, { "epoch": 3.505612015836088, "grad_norm": 3.065556764602661, "learning_rate": 1.2552046892636427e-05, "loss": 0.4708, "step": 21475 }, { "epoch": 3.5057752744785926, "grad_norm": 2.994075059890747, "learning_rate": 1.2551426976115741e-05, "loss": 0.3987, "step": 21476 }, { "epoch": 3.505938533121097, "grad_norm": 2.7988879680633545, "learning_rate": 1.2550807049107107e-05, "loss": 0.4097, "step": 21477 }, { "epoch": 3.5061017917636015, "grad_norm": 2.88301157951355, "learning_rate": 1.2550187111613066e-05, "loss": 0.4064, "step": 21478 }, { "epoch": 3.506265050406106, "grad_norm": 3.310532569885254, "learning_rate": 1.254956716363617e-05, "loss": 0.4602, "step": 21479 }, { "epoch": 3.5064283090486104, "grad_norm": 3.332965135574341, "learning_rate": 1.2548947205178962e-05, "loss": 0.4654, "step": 21480 }, { "epoch": 3.506591567691115, "grad_norm": 3.682526111602783, "learning_rate": 1.2548327236243998e-05, "loss": 0.4869, "step": 21481 }, { "epoch": 3.5067548263336192, "grad_norm": 3.5725419521331787, "learning_rate": 1.2547707256833823e-05, "loss": 0.5448, "step": 21482 }, { "epoch": 3.5069180849761237, "grad_norm": 3.296111822128296, "learning_rate": 1.2547087266950983e-05, "loss": 0.4684, "step": 21483 }, { "epoch": 3.5070813436186277, "grad_norm": 2.845073938369751, "learning_rate": 1.2546467266598032e-05, "loss": 0.5166, "step": 21484 }, { "epoch": 3.507244602261132, "grad_norm": 3.2264838218688965, "learning_rate": 1.2545847255777514e-05, "loss": 0.405, "step": 21485 }, { "epoch": 3.5074078609036365, "grad_norm": 3.6833879947662354, "learning_rate": 1.2545227234491983e-05, "loss": 0.467, "step": 21486 }, { "epoch": 3.507571119546141, "grad_norm": 3.5205578804016113, "learning_rate": 1.254460720274398e-05, "loss": 0.4968, "step": 21487 }, { "epoch": 3.5077343781886454, "grad_norm": 2.8249263763427734, "learning_rate": 1.2543987160536055e-05, "loss": 0.4124, "step": 21488 }, { "epoch": 3.50789763683115, "grad_norm": 2.8915343284606934, "learning_rate": 1.2543367107870763e-05, "loss": 0.4402, "step": 21489 }, { "epoch": 3.5080608954736543, "grad_norm": 3.099780321121216, "learning_rate": 1.254274704475065e-05, "loss": 0.4932, "step": 21490 }, { "epoch": 3.5082241541161583, "grad_norm": 3.3955116271972656, "learning_rate": 1.2542126971178266e-05, "loss": 0.5163, "step": 21491 }, { "epoch": 3.5083874127586627, "grad_norm": 3.3188161849975586, "learning_rate": 1.2541506887156154e-05, "loss": 0.4975, "step": 21492 }, { "epoch": 3.508550671401167, "grad_norm": 4.148864269256592, "learning_rate": 1.2540886792686867e-05, "loss": 0.528, "step": 21493 }, { "epoch": 3.5087139300436716, "grad_norm": 2.6356723308563232, "learning_rate": 1.2540266687772951e-05, "loss": 0.4204, "step": 21494 }, { "epoch": 3.508877188686176, "grad_norm": 2.3875741958618164, "learning_rate": 1.2539646572416961e-05, "loss": 0.4009, "step": 21495 }, { "epoch": 3.5090404473286805, "grad_norm": 2.544668436050415, "learning_rate": 1.2539026446621448e-05, "loss": 0.3761, "step": 21496 }, { "epoch": 3.509203705971185, "grad_norm": 2.9547290802001953, "learning_rate": 1.2538406310388949e-05, "loss": 0.495, "step": 21497 }, { "epoch": 3.5093669646136894, "grad_norm": 3.41137433052063, "learning_rate": 1.2537786163722023e-05, "loss": 0.4787, "step": 21498 }, { "epoch": 3.509530223256194, "grad_norm": 3.0351192951202393, "learning_rate": 1.2537166006623214e-05, "loss": 0.4176, "step": 21499 }, { "epoch": 3.5096934818986982, "grad_norm": 4.0025553703308105, "learning_rate": 1.2536545839095074e-05, "loss": 0.5656, "step": 21500 }, { "epoch": 3.5098567405412027, "grad_norm": 3.0149121284484863, "learning_rate": 1.2535925661140156e-05, "loss": 0.4861, "step": 21501 }, { "epoch": 3.5100199991837067, "grad_norm": 3.5394668579101562, "learning_rate": 1.2535305472761001e-05, "loss": 0.5087, "step": 21502 }, { "epoch": 3.510183257826211, "grad_norm": 3.2304182052612305, "learning_rate": 1.2534685273960163e-05, "loss": 0.4687, "step": 21503 }, { "epoch": 3.5103465164687155, "grad_norm": 3.320599317550659, "learning_rate": 1.2534065064740194e-05, "loss": 0.4814, "step": 21504 }, { "epoch": 3.51050977511122, "grad_norm": 2.983856678009033, "learning_rate": 1.2533444845103635e-05, "loss": 0.4358, "step": 21505 }, { "epoch": 3.5106730337537244, "grad_norm": 3.4882853031158447, "learning_rate": 1.2532824615053046e-05, "loss": 0.4427, "step": 21506 }, { "epoch": 3.510836292396229, "grad_norm": 3.4811060428619385, "learning_rate": 1.2532204374590969e-05, "loss": 0.4972, "step": 21507 }, { "epoch": 3.510999551038733, "grad_norm": 3.166288137435913, "learning_rate": 1.2531584123719955e-05, "loss": 0.4477, "step": 21508 }, { "epoch": 3.5111628096812373, "grad_norm": 2.9659512042999268, "learning_rate": 1.2530963862442555e-05, "loss": 0.4673, "step": 21509 }, { "epoch": 3.5113260683237417, "grad_norm": 3.002042770385742, "learning_rate": 1.2530343590761318e-05, "loss": 0.4859, "step": 21510 }, { "epoch": 3.511489326966246, "grad_norm": 3.1717164516448975, "learning_rate": 1.2529723308678797e-05, "loss": 0.4649, "step": 21511 }, { "epoch": 3.5116525856087506, "grad_norm": 3.3075597286224365, "learning_rate": 1.2529103016197537e-05, "loss": 0.5154, "step": 21512 }, { "epoch": 3.511815844251255, "grad_norm": 3.7936646938323975, "learning_rate": 1.2528482713320089e-05, "loss": 0.5092, "step": 21513 }, { "epoch": 3.5119791028937595, "grad_norm": 3.2788314819335938, "learning_rate": 1.2527862400049002e-05, "loss": 0.4445, "step": 21514 }, { "epoch": 3.512142361536264, "grad_norm": 3.424886465072632, "learning_rate": 1.252724207638683e-05, "loss": 0.5092, "step": 21515 }, { "epoch": 3.5123056201787684, "grad_norm": 3.1733758449554443, "learning_rate": 1.2526621742336118e-05, "loss": 0.4265, "step": 21516 }, { "epoch": 3.512468878821273, "grad_norm": 2.9973788261413574, "learning_rate": 1.2526001397899418e-05, "loss": 0.443, "step": 21517 }, { "epoch": 3.5126321374637772, "grad_norm": 3.3758373260498047, "learning_rate": 1.2525381043079281e-05, "loss": 0.4662, "step": 21518 }, { "epoch": 3.512795396106281, "grad_norm": 2.791280508041382, "learning_rate": 1.2524760677878257e-05, "loss": 0.4699, "step": 21519 }, { "epoch": 3.5129586547487857, "grad_norm": 3.064034938812256, "learning_rate": 1.2524140302298893e-05, "loss": 0.4708, "step": 21520 }, { "epoch": 3.51312191339129, "grad_norm": 2.5472426414489746, "learning_rate": 1.2523519916343742e-05, "loss": 0.4123, "step": 21521 }, { "epoch": 3.5132851720337945, "grad_norm": 2.986485242843628, "learning_rate": 1.2522899520015355e-05, "loss": 0.4006, "step": 21522 }, { "epoch": 3.513448430676299, "grad_norm": 3.303694725036621, "learning_rate": 1.2522279113316277e-05, "loss": 0.5063, "step": 21523 }, { "epoch": 3.5136116893188034, "grad_norm": 3.5043163299560547, "learning_rate": 1.2521658696249066e-05, "loss": 0.4492, "step": 21524 }, { "epoch": 3.513774947961308, "grad_norm": 3.135967493057251, "learning_rate": 1.2521038268816268e-05, "loss": 0.5067, "step": 21525 }, { "epoch": 3.513938206603812, "grad_norm": 3.372775077819824, "learning_rate": 1.2520417831020431e-05, "loss": 0.4912, "step": 21526 }, { "epoch": 3.5141014652463163, "grad_norm": 2.962139368057251, "learning_rate": 1.251979738286411e-05, "loss": 0.4859, "step": 21527 }, { "epoch": 3.5142647238888207, "grad_norm": 3.4627621173858643, "learning_rate": 1.2519176924349852e-05, "loss": 0.5332, "step": 21528 }, { "epoch": 3.514427982531325, "grad_norm": 3.0659990310668945, "learning_rate": 1.251855645548021e-05, "loss": 0.4317, "step": 21529 }, { "epoch": 3.5145912411738296, "grad_norm": 4.036010265350342, "learning_rate": 1.2517935976257734e-05, "loss": 0.5416, "step": 21530 }, { "epoch": 3.514754499816334, "grad_norm": 3.176635980606079, "learning_rate": 1.2517315486684973e-05, "loss": 0.4778, "step": 21531 }, { "epoch": 3.5149177584588385, "grad_norm": 2.7185161113739014, "learning_rate": 1.2516694986764478e-05, "loss": 0.385, "step": 21532 }, { "epoch": 3.515081017101343, "grad_norm": 3.3033394813537598, "learning_rate": 1.25160744764988e-05, "loss": 0.5044, "step": 21533 }, { "epoch": 3.5152442757438473, "grad_norm": 3.7819511890411377, "learning_rate": 1.2515453955890491e-05, "loss": 0.4615, "step": 21534 }, { "epoch": 3.515407534386352, "grad_norm": 3.146141767501831, "learning_rate": 1.2514833424942104e-05, "loss": 0.4671, "step": 21535 }, { "epoch": 3.5155707930288562, "grad_norm": 3.7936794757843018, "learning_rate": 1.2514212883656181e-05, "loss": 0.579, "step": 21536 }, { "epoch": 3.51573405167136, "grad_norm": 2.871256113052368, "learning_rate": 1.2513592332035279e-05, "loss": 0.4348, "step": 21537 }, { "epoch": 3.5158973103138647, "grad_norm": 3.6713383197784424, "learning_rate": 1.2512971770081948e-05, "loss": 0.5911, "step": 21538 }, { "epoch": 3.516060568956369, "grad_norm": 3.248868227005005, "learning_rate": 1.251235119779874e-05, "loss": 0.452, "step": 21539 }, { "epoch": 3.5162238275988735, "grad_norm": 3.2520699501037598, "learning_rate": 1.2511730615188205e-05, "loss": 0.4452, "step": 21540 }, { "epoch": 3.516387086241378, "grad_norm": 3.6394147872924805, "learning_rate": 1.2511110022252894e-05, "loss": 0.5575, "step": 21541 }, { "epoch": 3.5165503448838824, "grad_norm": 3.5259876251220703, "learning_rate": 1.2510489418995357e-05, "loss": 0.4941, "step": 21542 }, { "epoch": 3.516713603526387, "grad_norm": 3.4717767238616943, "learning_rate": 1.2509868805418143e-05, "loss": 0.559, "step": 21543 }, { "epoch": 3.516876862168891, "grad_norm": 2.9207112789154053, "learning_rate": 1.250924818152381e-05, "loss": 0.4639, "step": 21544 }, { "epoch": 3.5170401208113953, "grad_norm": 2.5168755054473877, "learning_rate": 1.2508627547314909e-05, "loss": 0.3983, "step": 21545 }, { "epoch": 3.5172033794538997, "grad_norm": 2.7615323066711426, "learning_rate": 1.2508006902793981e-05, "loss": 0.44, "step": 21546 }, { "epoch": 3.517366638096404, "grad_norm": 2.803868055343628, "learning_rate": 1.2507386247963586e-05, "loss": 0.3932, "step": 21547 }, { "epoch": 3.5175298967389086, "grad_norm": 3.8032543659210205, "learning_rate": 1.2506765582826272e-05, "loss": 0.5537, "step": 21548 }, { "epoch": 3.517693155381413, "grad_norm": 3.507335901260376, "learning_rate": 1.250614490738459e-05, "loss": 0.4364, "step": 21549 }, { "epoch": 3.5178564140239175, "grad_norm": 3.3995115756988525, "learning_rate": 1.2505524221641097e-05, "loss": 0.4107, "step": 21550 }, { "epoch": 3.518019672666422, "grad_norm": 2.919835090637207, "learning_rate": 1.2504903525598338e-05, "loss": 0.4252, "step": 21551 }, { "epoch": 3.5181829313089263, "grad_norm": 3.4098317623138428, "learning_rate": 1.2504282819258866e-05, "loss": 0.4944, "step": 21552 }, { "epoch": 3.518346189951431, "grad_norm": 3.6654319763183594, "learning_rate": 1.250366210262523e-05, "loss": 0.4467, "step": 21553 }, { "epoch": 3.518509448593935, "grad_norm": 2.828603982925415, "learning_rate": 1.2503041375699987e-05, "loss": 0.4326, "step": 21554 }, { "epoch": 3.518672707236439, "grad_norm": 3.3781087398529053, "learning_rate": 1.250242063848569e-05, "loss": 0.466, "step": 21555 }, { "epoch": 3.5188359658789437, "grad_norm": 3.0644240379333496, "learning_rate": 1.250179989098488e-05, "loss": 0.4401, "step": 21556 }, { "epoch": 3.518999224521448, "grad_norm": 3.228928327560425, "learning_rate": 1.250117913320012e-05, "loss": 0.4538, "step": 21557 }, { "epoch": 3.5191624831639525, "grad_norm": 3.1740386486053467, "learning_rate": 1.2500558365133954e-05, "loss": 0.4557, "step": 21558 }, { "epoch": 3.519325741806457, "grad_norm": 2.523951530456543, "learning_rate": 1.2499937586788937e-05, "loss": 0.3625, "step": 21559 }, { "epoch": 3.5194890004489614, "grad_norm": 3.4103875160217285, "learning_rate": 1.2499316798167623e-05, "loss": 0.4856, "step": 21560 }, { "epoch": 3.5196522590914654, "grad_norm": 2.4634697437286377, "learning_rate": 1.2498695999272558e-05, "loss": 0.3644, "step": 21561 }, { "epoch": 3.51981551773397, "grad_norm": 3.5972092151641846, "learning_rate": 1.24980751901063e-05, "loss": 0.5429, "step": 21562 }, { "epoch": 3.5199787763764743, "grad_norm": 2.698484182357788, "learning_rate": 1.2497454370671399e-05, "loss": 0.3978, "step": 21563 }, { "epoch": 3.5201420350189787, "grad_norm": 3.856332540512085, "learning_rate": 1.2496833540970403e-05, "loss": 0.5339, "step": 21564 }, { "epoch": 3.520305293661483, "grad_norm": 2.6550345420837402, "learning_rate": 1.2496212701005868e-05, "loss": 0.4727, "step": 21565 }, { "epoch": 3.5204685523039876, "grad_norm": 3.7894704341888428, "learning_rate": 1.2495591850780346e-05, "loss": 0.5611, "step": 21566 }, { "epoch": 3.520631810946492, "grad_norm": 3.9693803787231445, "learning_rate": 1.2494970990296386e-05, "loss": 0.4646, "step": 21567 }, { "epoch": 3.5207950695889965, "grad_norm": 3.502139091491699, "learning_rate": 1.2494350119556546e-05, "loss": 0.5017, "step": 21568 }, { "epoch": 3.520958328231501, "grad_norm": 3.2493510246276855, "learning_rate": 1.2493729238563372e-05, "loss": 0.4904, "step": 21569 }, { "epoch": 3.5211215868740053, "grad_norm": 3.1232898235321045, "learning_rate": 1.249310834731942e-05, "loss": 0.4922, "step": 21570 }, { "epoch": 3.5212848455165098, "grad_norm": 3.2563531398773193, "learning_rate": 1.2492487445827239e-05, "loss": 0.5379, "step": 21571 }, { "epoch": 3.5214481041590138, "grad_norm": 2.974724531173706, "learning_rate": 1.2491866534089385e-05, "loss": 0.4654, "step": 21572 }, { "epoch": 3.521611362801518, "grad_norm": 2.9908719062805176, "learning_rate": 1.249124561210841e-05, "loss": 0.4151, "step": 21573 }, { "epoch": 3.5217746214440226, "grad_norm": 3.4857358932495117, "learning_rate": 1.2490624679886863e-05, "loss": 0.4593, "step": 21574 }, { "epoch": 3.521937880086527, "grad_norm": 3.0824220180511475, "learning_rate": 1.24900037374273e-05, "loss": 0.4565, "step": 21575 }, { "epoch": 3.5221011387290315, "grad_norm": 3.681143045425415, "learning_rate": 1.2489382784732266e-05, "loss": 0.4088, "step": 21576 }, { "epoch": 3.522264397371536, "grad_norm": 3.090803861618042, "learning_rate": 1.2488761821804325e-05, "loss": 0.4923, "step": 21577 }, { "epoch": 3.5224276560140404, "grad_norm": 2.884023427963257, "learning_rate": 1.2488140848646024e-05, "loss": 0.464, "step": 21578 }, { "epoch": 3.5225909146565444, "grad_norm": 3.208740472793579, "learning_rate": 1.2487519865259916e-05, "loss": 0.4313, "step": 21579 }, { "epoch": 3.522754173299049, "grad_norm": 3.1072158813476562, "learning_rate": 1.2486898871648552e-05, "loss": 0.4629, "step": 21580 }, { "epoch": 3.5229174319415533, "grad_norm": 3.0471479892730713, "learning_rate": 1.248627786781448e-05, "loss": 0.4266, "step": 21581 }, { "epoch": 3.5230806905840577, "grad_norm": 3.2221901416778564, "learning_rate": 1.2485656853760264e-05, "loss": 0.5015, "step": 21582 }, { "epoch": 3.523243949226562, "grad_norm": 3.0457422733306885, "learning_rate": 1.2485035829488453e-05, "loss": 0.5414, "step": 21583 }, { "epoch": 3.5234072078690666, "grad_norm": 3.673600912094116, "learning_rate": 1.2484414795001596e-05, "loss": 0.5046, "step": 21584 }, { "epoch": 3.523570466511571, "grad_norm": 3.1253738403320312, "learning_rate": 1.2483793750302249e-05, "loss": 0.4422, "step": 21585 }, { "epoch": 3.5237337251540755, "grad_norm": 4.334015846252441, "learning_rate": 1.2483172695392961e-05, "loss": 0.6844, "step": 21586 }, { "epoch": 3.52389698379658, "grad_norm": 3.0409271717071533, "learning_rate": 1.2482551630276287e-05, "loss": 0.4362, "step": 21587 }, { "epoch": 3.5240602424390843, "grad_norm": 3.2827653884887695, "learning_rate": 1.2481930554954783e-05, "loss": 0.5916, "step": 21588 }, { "epoch": 3.5242235010815888, "grad_norm": 4.164538860321045, "learning_rate": 1.2481309469431002e-05, "loss": 0.5384, "step": 21589 }, { "epoch": 3.5243867597240928, "grad_norm": 3.34993052482605, "learning_rate": 1.2480688373707492e-05, "loss": 0.4669, "step": 21590 }, { "epoch": 3.524550018366597, "grad_norm": 4.00654935836792, "learning_rate": 1.2480067267786809e-05, "loss": 0.5354, "step": 21591 }, { "epoch": 3.5247132770091016, "grad_norm": 3.8369481563568115, "learning_rate": 1.2479446151671505e-05, "loss": 0.5601, "step": 21592 }, { "epoch": 3.524876535651606, "grad_norm": 2.699171781539917, "learning_rate": 1.2478825025364132e-05, "loss": 0.361, "step": 21593 }, { "epoch": 3.5250397942941105, "grad_norm": 3.227705240249634, "learning_rate": 1.247820388886725e-05, "loss": 0.4212, "step": 21594 }, { "epoch": 3.525203052936615, "grad_norm": 4.024709701538086, "learning_rate": 1.2477582742183407e-05, "loss": 0.5286, "step": 21595 }, { "epoch": 3.525366311579119, "grad_norm": 2.6314821243286133, "learning_rate": 1.2476961585315155e-05, "loss": 0.378, "step": 21596 }, { "epoch": 3.5255295702216234, "grad_norm": 3.5499956607818604, "learning_rate": 1.247634041826505e-05, "loss": 0.5655, "step": 21597 }, { "epoch": 3.525692828864128, "grad_norm": 3.134709358215332, "learning_rate": 1.2475719241035643e-05, "loss": 0.4404, "step": 21598 }, { "epoch": 3.5258560875066323, "grad_norm": 3.5164246559143066, "learning_rate": 1.2475098053629493e-05, "loss": 0.5305, "step": 21599 }, { "epoch": 3.5260193461491367, "grad_norm": 3.863018035888672, "learning_rate": 1.2474476856049145e-05, "loss": 0.5203, "step": 21600 }, { "epoch": 3.526182604791641, "grad_norm": 3.938673734664917, "learning_rate": 1.2473855648297159e-05, "loss": 0.5278, "step": 21601 }, { "epoch": 3.5263458634341456, "grad_norm": 2.7974188327789307, "learning_rate": 1.2473234430376086e-05, "loss": 0.4361, "step": 21602 }, { "epoch": 3.52650912207665, "grad_norm": 3.067641258239746, "learning_rate": 1.2472613202288482e-05, "loss": 0.5115, "step": 21603 }, { "epoch": 3.5266723807191545, "grad_norm": 3.569171667098999, "learning_rate": 1.2471991964036895e-05, "loss": 0.4799, "step": 21604 }, { "epoch": 3.526835639361659, "grad_norm": 3.422240972518921, "learning_rate": 1.2471370715623887e-05, "loss": 0.4744, "step": 21605 }, { "epoch": 3.5269988980041633, "grad_norm": 3.1791350841522217, "learning_rate": 1.2470749457052005e-05, "loss": 0.4243, "step": 21606 }, { "epoch": 3.5271621566466673, "grad_norm": 3.072040319442749, "learning_rate": 1.2470128188323804e-05, "loss": 0.4586, "step": 21607 }, { "epoch": 3.5273254152891718, "grad_norm": 3.4375836849212646, "learning_rate": 1.246950690944184e-05, "loss": 0.4943, "step": 21608 }, { "epoch": 3.527488673931676, "grad_norm": 3.539917230606079, "learning_rate": 1.2468885620408666e-05, "loss": 0.5272, "step": 21609 }, { "epoch": 3.5276519325741806, "grad_norm": 4.317432880401611, "learning_rate": 1.2468264321226836e-05, "loss": 0.4809, "step": 21610 }, { "epoch": 3.527815191216685, "grad_norm": 3.0226430892944336, "learning_rate": 1.24676430118989e-05, "loss": 0.4339, "step": 21611 }, { "epoch": 3.5279784498591895, "grad_norm": 3.6958632469177246, "learning_rate": 1.2467021692427417e-05, "loss": 0.4794, "step": 21612 }, { "epoch": 3.528141708501694, "grad_norm": 3.1637349128723145, "learning_rate": 1.2466400362814938e-05, "loss": 0.5231, "step": 21613 }, { "epoch": 3.528304967144198, "grad_norm": 3.0402612686157227, "learning_rate": 1.2465779023064022e-05, "loss": 0.4734, "step": 21614 }, { "epoch": 3.5284682257867024, "grad_norm": 3.5495450496673584, "learning_rate": 1.2465157673177217e-05, "loss": 0.4072, "step": 21615 }, { "epoch": 3.528631484429207, "grad_norm": 3.1889712810516357, "learning_rate": 1.246453631315708e-05, "loss": 0.4746, "step": 21616 }, { "epoch": 3.5287947430717113, "grad_norm": 3.0796616077423096, "learning_rate": 1.2463914943006165e-05, "loss": 0.4709, "step": 21617 }, { "epoch": 3.5289580017142157, "grad_norm": 3.203824996948242, "learning_rate": 1.2463293562727025e-05, "loss": 0.4792, "step": 21618 }, { "epoch": 3.52912126035672, "grad_norm": 3.0710325241088867, "learning_rate": 1.2462672172322221e-05, "loss": 0.4701, "step": 21619 }, { "epoch": 3.5292845189992246, "grad_norm": 2.877957582473755, "learning_rate": 1.2462050771794294e-05, "loss": 0.3908, "step": 21620 }, { "epoch": 3.529447777641729, "grad_norm": 3.8652937412261963, "learning_rate": 1.2461429361145809e-05, "loss": 0.5347, "step": 21621 }, { "epoch": 3.5296110362842334, "grad_norm": 3.739201545715332, "learning_rate": 1.2460807940379316e-05, "loss": 0.5691, "step": 21622 }, { "epoch": 3.529774294926738, "grad_norm": 3.1254520416259766, "learning_rate": 1.2460186509497373e-05, "loss": 0.4877, "step": 21623 }, { "epoch": 3.5299375535692423, "grad_norm": 3.366196632385254, "learning_rate": 1.2459565068502532e-05, "loss": 0.4184, "step": 21624 }, { "epoch": 3.5301008122117463, "grad_norm": 2.7925353050231934, "learning_rate": 1.2458943617397346e-05, "loss": 0.4028, "step": 21625 }, { "epoch": 3.5302640708542508, "grad_norm": 3.2143139839172363, "learning_rate": 1.2458322156184372e-05, "loss": 0.4884, "step": 21626 }, { "epoch": 3.530427329496755, "grad_norm": 3.6751959323883057, "learning_rate": 1.2457700684866163e-05, "loss": 0.5791, "step": 21627 }, { "epoch": 3.5305905881392596, "grad_norm": 2.8001880645751953, "learning_rate": 1.245707920344528e-05, "loss": 0.429, "step": 21628 }, { "epoch": 3.530753846781764, "grad_norm": 3.035184621810913, "learning_rate": 1.2456457711924266e-05, "loss": 0.4323, "step": 21629 }, { "epoch": 3.5309171054242685, "grad_norm": 4.233023166656494, "learning_rate": 1.2455836210305682e-05, "loss": 0.613, "step": 21630 }, { "epoch": 3.531080364066773, "grad_norm": 3.077252149581909, "learning_rate": 1.2455214698592084e-05, "loss": 0.4303, "step": 21631 }, { "epoch": 3.531243622709277, "grad_norm": 3.55496883392334, "learning_rate": 1.2454593176786025e-05, "loss": 0.4827, "step": 21632 }, { "epoch": 3.5314068813517814, "grad_norm": 3.255134105682373, "learning_rate": 1.2453971644890064e-05, "loss": 0.4601, "step": 21633 }, { "epoch": 3.531570139994286, "grad_norm": 3.0584497451782227, "learning_rate": 1.245335010290675e-05, "loss": 0.4404, "step": 21634 }, { "epoch": 3.5317333986367903, "grad_norm": 2.9645137786865234, "learning_rate": 1.2452728550838638e-05, "loss": 0.4314, "step": 21635 }, { "epoch": 3.5318966572792947, "grad_norm": 3.6956138610839844, "learning_rate": 1.2452106988688286e-05, "loss": 0.4594, "step": 21636 }, { "epoch": 3.532059915921799, "grad_norm": 3.067669630050659, "learning_rate": 1.2451485416458246e-05, "loss": 0.5362, "step": 21637 }, { "epoch": 3.5322231745643036, "grad_norm": 3.0118401050567627, "learning_rate": 1.2450863834151079e-05, "loss": 0.5257, "step": 21638 }, { "epoch": 3.532386433206808, "grad_norm": 2.6648218631744385, "learning_rate": 1.2450242241769336e-05, "loss": 0.4092, "step": 21639 }, { "epoch": 3.5325496918493124, "grad_norm": 2.812967538833618, "learning_rate": 1.2449620639315569e-05, "loss": 0.4167, "step": 21640 }, { "epoch": 3.532712950491817, "grad_norm": 2.8900907039642334, "learning_rate": 1.244899902679234e-05, "loss": 0.3861, "step": 21641 }, { "epoch": 3.5328762091343213, "grad_norm": 3.363039016723633, "learning_rate": 1.24483774042022e-05, "loss": 0.5272, "step": 21642 }, { "epoch": 3.5330394677768253, "grad_norm": 3.1671979427337646, "learning_rate": 1.2447755771547702e-05, "loss": 0.5131, "step": 21643 }, { "epoch": 3.5332027264193298, "grad_norm": 3.1868386268615723, "learning_rate": 1.2447134128831407e-05, "loss": 0.4304, "step": 21644 }, { "epoch": 3.533365985061834, "grad_norm": 3.4683234691619873, "learning_rate": 1.2446512476055867e-05, "loss": 0.5133, "step": 21645 }, { "epoch": 3.5335292437043386, "grad_norm": 3.117877721786499, "learning_rate": 1.2445890813223637e-05, "loss": 0.4557, "step": 21646 }, { "epoch": 3.533692502346843, "grad_norm": 3.3945720195770264, "learning_rate": 1.2445269140337275e-05, "loss": 0.5416, "step": 21647 }, { "epoch": 3.5338557609893475, "grad_norm": 3.0255935192108154, "learning_rate": 1.2444647457399334e-05, "loss": 0.4217, "step": 21648 }, { "epoch": 3.5340190196318515, "grad_norm": 3.191335678100586, "learning_rate": 1.2444025764412373e-05, "loss": 0.482, "step": 21649 }, { "epoch": 3.534182278274356, "grad_norm": 2.946406602859497, "learning_rate": 1.2443404061378941e-05, "loss": 0.5268, "step": 21650 }, { "epoch": 3.5343455369168604, "grad_norm": 3.298417568206787, "learning_rate": 1.2442782348301601e-05, "loss": 0.5586, "step": 21651 }, { "epoch": 3.534508795559365, "grad_norm": 2.989105224609375, "learning_rate": 1.2442160625182902e-05, "loss": 0.5563, "step": 21652 }, { "epoch": 3.5346720542018693, "grad_norm": 3.208341121673584, "learning_rate": 1.2441538892025404e-05, "loss": 0.4222, "step": 21653 }, { "epoch": 3.5348353128443737, "grad_norm": 3.2591729164123535, "learning_rate": 1.2440917148831662e-05, "loss": 0.5483, "step": 21654 }, { "epoch": 3.534998571486878, "grad_norm": 2.8831005096435547, "learning_rate": 1.2440295395604232e-05, "loss": 0.3595, "step": 21655 }, { "epoch": 3.5351618301293826, "grad_norm": 3.169687509536743, "learning_rate": 1.2439673632345667e-05, "loss": 0.4402, "step": 21656 }, { "epoch": 3.535325088771887, "grad_norm": 3.974989175796509, "learning_rate": 1.2439051859058527e-05, "loss": 0.6109, "step": 21657 }, { "epoch": 3.5354883474143914, "grad_norm": 3.399545431137085, "learning_rate": 1.2438430075745365e-05, "loss": 0.476, "step": 21658 }, { "epoch": 3.535651606056896, "grad_norm": 2.7728307247161865, "learning_rate": 1.2437808282408737e-05, "loss": 0.4581, "step": 21659 }, { "epoch": 3.5358148646994, "grad_norm": 3.240971088409424, "learning_rate": 1.24371864790512e-05, "loss": 0.4921, "step": 21660 }, { "epoch": 3.5359781233419043, "grad_norm": 3.2804927825927734, "learning_rate": 1.2436564665675311e-05, "loss": 0.4498, "step": 21661 }, { "epoch": 3.5361413819844087, "grad_norm": 2.950063467025757, "learning_rate": 1.2435942842283622e-05, "loss": 0.4419, "step": 21662 }, { "epoch": 3.536304640626913, "grad_norm": 3.166217088699341, "learning_rate": 1.2435321008878696e-05, "loss": 0.5123, "step": 21663 }, { "epoch": 3.5364678992694176, "grad_norm": 3.180042028427124, "learning_rate": 1.243469916546308e-05, "loss": 0.4096, "step": 21664 }, { "epoch": 3.536631157911922, "grad_norm": 2.8642876148223877, "learning_rate": 1.2434077312039337e-05, "loss": 0.4438, "step": 21665 }, { "epoch": 3.5367944165544265, "grad_norm": 3.2066457271575928, "learning_rate": 1.2433455448610022e-05, "loss": 0.5186, "step": 21666 }, { "epoch": 3.5369576751969305, "grad_norm": 3.353271007537842, "learning_rate": 1.243283357517769e-05, "loss": 0.5261, "step": 21667 }, { "epoch": 3.537120933839435, "grad_norm": 3.5086231231689453, "learning_rate": 1.2432211691744897e-05, "loss": 0.538, "step": 21668 }, { "epoch": 3.5372841924819394, "grad_norm": 3.464599370956421, "learning_rate": 1.2431589798314204e-05, "loss": 0.4694, "step": 21669 }, { "epoch": 3.537447451124444, "grad_norm": 3.047999620437622, "learning_rate": 1.2430967894888157e-05, "loss": 0.4551, "step": 21670 }, { "epoch": 3.5376107097669482, "grad_norm": 2.9037394523620605, "learning_rate": 1.243034598146932e-05, "loss": 0.4266, "step": 21671 }, { "epoch": 3.5377739684094527, "grad_norm": 3.3833510875701904, "learning_rate": 1.2429724058060256e-05, "loss": 0.473, "step": 21672 }, { "epoch": 3.537937227051957, "grad_norm": 2.813246965408325, "learning_rate": 1.2429102124663505e-05, "loss": 0.4273, "step": 21673 }, { "epoch": 3.5381004856944616, "grad_norm": 2.6573588848114014, "learning_rate": 1.2428480181281637e-05, "loss": 0.4298, "step": 21674 }, { "epoch": 3.538263744336966, "grad_norm": 3.1724324226379395, "learning_rate": 1.2427858227917201e-05, "loss": 0.4579, "step": 21675 }, { "epoch": 3.5384270029794704, "grad_norm": 3.2260189056396484, "learning_rate": 1.2427236264572757e-05, "loss": 0.4983, "step": 21676 }, { "epoch": 3.538590261621975, "grad_norm": 3.215165138244629, "learning_rate": 1.2426614291250867e-05, "loss": 0.4701, "step": 21677 }, { "epoch": 3.538753520264479, "grad_norm": 3.7647907733917236, "learning_rate": 1.2425992307954075e-05, "loss": 0.5854, "step": 21678 }, { "epoch": 3.5389167789069833, "grad_norm": 4.071350574493408, "learning_rate": 1.2425370314684948e-05, "loss": 0.5563, "step": 21679 }, { "epoch": 3.5390800375494877, "grad_norm": 3.484656572341919, "learning_rate": 1.242474831144604e-05, "loss": 0.5342, "step": 21680 }, { "epoch": 3.539243296191992, "grad_norm": 2.9005157947540283, "learning_rate": 1.2424126298239902e-05, "loss": 0.4568, "step": 21681 }, { "epoch": 3.5394065548344966, "grad_norm": 3.474121570587158, "learning_rate": 1.2423504275069103e-05, "loss": 0.5094, "step": 21682 }, { "epoch": 3.539569813477001, "grad_norm": 3.0102264881134033, "learning_rate": 1.242288224193619e-05, "loss": 0.4666, "step": 21683 }, { "epoch": 3.539733072119505, "grad_norm": 3.5202701091766357, "learning_rate": 1.2422260198843722e-05, "loss": 0.5346, "step": 21684 }, { "epoch": 3.5398963307620095, "grad_norm": 3.4029147624969482, "learning_rate": 1.2421638145794259e-05, "loss": 0.5321, "step": 21685 }, { "epoch": 3.540059589404514, "grad_norm": 3.0361690521240234, "learning_rate": 1.2421016082790356e-05, "loss": 0.4661, "step": 21686 }, { "epoch": 3.5402228480470184, "grad_norm": 2.892080783843994, "learning_rate": 1.2420394009834568e-05, "loss": 0.4081, "step": 21687 }, { "epoch": 3.540386106689523, "grad_norm": 3.8134829998016357, "learning_rate": 1.2419771926929455e-05, "loss": 0.5391, "step": 21688 }, { "epoch": 3.5405493653320272, "grad_norm": 3.1545629501342773, "learning_rate": 1.2419149834077573e-05, "loss": 0.4708, "step": 21689 }, { "epoch": 3.5407126239745317, "grad_norm": 3.2634127140045166, "learning_rate": 1.241852773128148e-05, "loss": 0.4101, "step": 21690 }, { "epoch": 3.540875882617036, "grad_norm": 2.8771135807037354, "learning_rate": 1.2417905618543733e-05, "loss": 0.3832, "step": 21691 }, { "epoch": 3.5410391412595406, "grad_norm": 2.5758893489837646, "learning_rate": 1.2417283495866888e-05, "loss": 0.3843, "step": 21692 }, { "epoch": 3.541202399902045, "grad_norm": 3.078639507293701, "learning_rate": 1.2416661363253504e-05, "loss": 0.3948, "step": 21693 }, { "epoch": 3.5413656585445494, "grad_norm": 3.8361778259277344, "learning_rate": 1.2416039220706138e-05, "loss": 0.5424, "step": 21694 }, { "epoch": 3.5415289171870534, "grad_norm": 3.2139642238616943, "learning_rate": 1.2415417068227346e-05, "loss": 0.4416, "step": 21695 }, { "epoch": 3.541692175829558, "grad_norm": 4.061716556549072, "learning_rate": 1.2414794905819687e-05, "loss": 0.5389, "step": 21696 }, { "epoch": 3.5418554344720623, "grad_norm": 2.9509189128875732, "learning_rate": 1.2414172733485718e-05, "loss": 0.4459, "step": 21697 }, { "epoch": 3.5420186931145667, "grad_norm": 3.240140199661255, "learning_rate": 1.2413550551227997e-05, "loss": 0.4779, "step": 21698 }, { "epoch": 3.542181951757071, "grad_norm": 3.3454396724700928, "learning_rate": 1.2412928359049082e-05, "loss": 0.438, "step": 21699 }, { "epoch": 3.5423452103995756, "grad_norm": 2.9849960803985596, "learning_rate": 1.2412306156951525e-05, "loss": 0.374, "step": 21700 }, { "epoch": 3.54250846904208, "grad_norm": 3.3908531665802, "learning_rate": 1.2411683944937893e-05, "loss": 0.4993, "step": 21701 }, { "epoch": 3.542671727684584, "grad_norm": 2.6324243545532227, "learning_rate": 1.2411061723010738e-05, "loss": 0.3674, "step": 21702 }, { "epoch": 3.5428349863270885, "grad_norm": 3.163517951965332, "learning_rate": 1.2410439491172616e-05, "loss": 0.4581, "step": 21703 }, { "epoch": 3.542998244969593, "grad_norm": 3.4186201095581055, "learning_rate": 1.2409817249426091e-05, "loss": 0.4649, "step": 21704 }, { "epoch": 3.5431615036120974, "grad_norm": 3.162853717803955, "learning_rate": 1.2409194997773716e-05, "loss": 0.4458, "step": 21705 }, { "epoch": 3.543324762254602, "grad_norm": 3.1554601192474365, "learning_rate": 1.240857273621805e-05, "loss": 0.4895, "step": 21706 }, { "epoch": 3.5434880208971062, "grad_norm": 2.3661949634552, "learning_rate": 1.240795046476165e-05, "loss": 0.3085, "step": 21707 }, { "epoch": 3.5436512795396107, "grad_norm": 2.6687042713165283, "learning_rate": 1.2407328183407074e-05, "loss": 0.3831, "step": 21708 }, { "epoch": 3.543814538182115, "grad_norm": 3.168553590774536, "learning_rate": 1.2406705892156882e-05, "loss": 0.4946, "step": 21709 }, { "epoch": 3.5439777968246196, "grad_norm": 3.7789623737335205, "learning_rate": 1.240608359101363e-05, "loss": 0.8554, "step": 21710 }, { "epoch": 3.544141055467124, "grad_norm": 3.373377561569214, "learning_rate": 1.240546127997988e-05, "loss": 0.4552, "step": 21711 }, { "epoch": 3.5443043141096284, "grad_norm": 3.643866539001465, "learning_rate": 1.2404838959058187e-05, "loss": 0.4464, "step": 21712 }, { "epoch": 3.5444675727521324, "grad_norm": 3.883608818054199, "learning_rate": 1.2404216628251108e-05, "loss": 0.5985, "step": 21713 }, { "epoch": 3.544630831394637, "grad_norm": 3.819516181945801, "learning_rate": 1.2403594287561199e-05, "loss": 0.4586, "step": 21714 }, { "epoch": 3.5447940900371413, "grad_norm": 2.897019386291504, "learning_rate": 1.2402971936991025e-05, "loss": 0.4077, "step": 21715 }, { "epoch": 3.5449573486796457, "grad_norm": 3.2046151161193848, "learning_rate": 1.240234957654314e-05, "loss": 0.4899, "step": 21716 }, { "epoch": 3.54512060732215, "grad_norm": 3.5779335498809814, "learning_rate": 1.2401727206220102e-05, "loss": 0.4327, "step": 21717 }, { "epoch": 3.5452838659646546, "grad_norm": 3.0653247833251953, "learning_rate": 1.240110482602447e-05, "loss": 0.4717, "step": 21718 }, { "epoch": 3.545447124607159, "grad_norm": 2.887880802154541, "learning_rate": 1.2400482435958803e-05, "loss": 0.4025, "step": 21719 }, { "epoch": 3.545610383249663, "grad_norm": 3.2087202072143555, "learning_rate": 1.239986003602566e-05, "loss": 0.4976, "step": 21720 }, { "epoch": 3.5457736418921675, "grad_norm": 3.061408281326294, "learning_rate": 1.23992376262276e-05, "loss": 0.4561, "step": 21721 }, { "epoch": 3.545936900534672, "grad_norm": 2.7449560165405273, "learning_rate": 1.2398615206567178e-05, "loss": 0.471, "step": 21722 }, { "epoch": 3.5461001591771764, "grad_norm": 3.046426296234131, "learning_rate": 1.2397992777046954e-05, "loss": 0.5895, "step": 21723 }, { "epoch": 3.546263417819681, "grad_norm": 3.485301971435547, "learning_rate": 1.239737033766949e-05, "loss": 0.5016, "step": 21724 }, { "epoch": 3.5464266764621852, "grad_norm": 3.637296438217163, "learning_rate": 1.2396747888437337e-05, "loss": 0.5179, "step": 21725 }, { "epoch": 3.5465899351046897, "grad_norm": 3.755709648132324, "learning_rate": 1.2396125429353062e-05, "loss": 0.5144, "step": 21726 }, { "epoch": 3.546753193747194, "grad_norm": 3.1412901878356934, "learning_rate": 1.2395502960419221e-05, "loss": 0.4735, "step": 21727 }, { "epoch": 3.5469164523896985, "grad_norm": 3.1768362522125244, "learning_rate": 1.239488048163837e-05, "loss": 0.4619, "step": 21728 }, { "epoch": 3.547079711032203, "grad_norm": 4.209211349487305, "learning_rate": 1.239425799301307e-05, "loss": 0.5692, "step": 21729 }, { "epoch": 3.5472429696747074, "grad_norm": 3.5718419551849365, "learning_rate": 1.2393635494545878e-05, "loss": 0.4891, "step": 21730 }, { "epoch": 3.5474062283172114, "grad_norm": 3.5006608963012695, "learning_rate": 1.2393012986239357e-05, "loss": 0.5305, "step": 21731 }, { "epoch": 3.547569486959716, "grad_norm": 3.0419039726257324, "learning_rate": 1.2392390468096061e-05, "loss": 0.4413, "step": 21732 }, { "epoch": 3.5477327456022203, "grad_norm": 3.116905450820923, "learning_rate": 1.2391767940118551e-05, "loss": 0.4849, "step": 21733 }, { "epoch": 3.5478960042447247, "grad_norm": 3.4320833683013916, "learning_rate": 1.2391145402309388e-05, "loss": 0.5384, "step": 21734 }, { "epoch": 3.548059262887229, "grad_norm": 4.14099645614624, "learning_rate": 1.2390522854671127e-05, "loss": 0.5565, "step": 21735 }, { "epoch": 3.5482225215297336, "grad_norm": 2.9403254985809326, "learning_rate": 1.2389900297206332e-05, "loss": 0.4356, "step": 21736 }, { "epoch": 3.5483857801722376, "grad_norm": 3.464557409286499, "learning_rate": 1.2389277729917558e-05, "loss": 0.4902, "step": 21737 }, { "epoch": 3.548549038814742, "grad_norm": 2.8683931827545166, "learning_rate": 1.2388655152807365e-05, "loss": 0.4453, "step": 21738 }, { "epoch": 3.5487122974572465, "grad_norm": 2.86759090423584, "learning_rate": 1.2388032565878312e-05, "loss": 0.4782, "step": 21739 }, { "epoch": 3.548875556099751, "grad_norm": 2.9800169467926025, "learning_rate": 1.2387409969132959e-05, "loss": 0.4873, "step": 21740 }, { "epoch": 3.5490388147422554, "grad_norm": 3.510514974594116, "learning_rate": 1.2386787362573867e-05, "loss": 0.5497, "step": 21741 }, { "epoch": 3.54920207338476, "grad_norm": 3.1731832027435303, "learning_rate": 1.238616474620359e-05, "loss": 0.4955, "step": 21742 }, { "epoch": 3.5493653320272642, "grad_norm": 3.4724409580230713, "learning_rate": 1.2385542120024694e-05, "loss": 0.5268, "step": 21743 }, { "epoch": 3.5495285906697687, "grad_norm": 3.2130343914031982, "learning_rate": 1.2384919484039735e-05, "loss": 0.462, "step": 21744 }, { "epoch": 3.549691849312273, "grad_norm": 4.028531074523926, "learning_rate": 1.238429683825127e-05, "loss": 0.5355, "step": 21745 }, { "epoch": 3.5498551079547775, "grad_norm": 2.2145638465881348, "learning_rate": 1.2383674182661863e-05, "loss": 0.3803, "step": 21746 }, { "epoch": 3.550018366597282, "grad_norm": 3.147630214691162, "learning_rate": 1.2383051517274069e-05, "loss": 0.4601, "step": 21747 }, { "epoch": 3.550181625239786, "grad_norm": 2.7451136112213135, "learning_rate": 1.2382428842090453e-05, "loss": 0.4398, "step": 21748 }, { "epoch": 3.5503448838822904, "grad_norm": 2.6166181564331055, "learning_rate": 1.2381806157113572e-05, "loss": 0.3937, "step": 21749 }, { "epoch": 3.550508142524795, "grad_norm": 3.5033175945281982, "learning_rate": 1.2381183462345983e-05, "loss": 0.4416, "step": 21750 }, { "epoch": 3.5506714011672993, "grad_norm": 2.8709609508514404, "learning_rate": 1.2380560757790249e-05, "loss": 0.4309, "step": 21751 }, { "epoch": 3.5508346598098037, "grad_norm": 3.3011021614074707, "learning_rate": 1.2379938043448926e-05, "loss": 0.4855, "step": 21752 }, { "epoch": 3.550997918452308, "grad_norm": 3.337780714035034, "learning_rate": 1.237931531932458e-05, "loss": 0.422, "step": 21753 }, { "epoch": 3.5511611770948126, "grad_norm": 2.7860116958618164, "learning_rate": 1.2378692585419768e-05, "loss": 0.4314, "step": 21754 }, { "epoch": 3.5513244357373166, "grad_norm": 3.175352096557617, "learning_rate": 1.2378069841737044e-05, "loss": 0.4406, "step": 21755 }, { "epoch": 3.551487694379821, "grad_norm": 3.152644395828247, "learning_rate": 1.2377447088278978e-05, "loss": 0.4532, "step": 21756 }, { "epoch": 3.5516509530223255, "grad_norm": 3.108173370361328, "learning_rate": 1.2376824325048123e-05, "loss": 0.4835, "step": 21757 }, { "epoch": 3.55181421166483, "grad_norm": 3.143202543258667, "learning_rate": 1.2376201552047038e-05, "loss": 0.469, "step": 21758 }, { "epoch": 3.5519774703073344, "grad_norm": 3.7202095985412598, "learning_rate": 1.2375578769278288e-05, "loss": 0.5606, "step": 21759 }, { "epoch": 3.552140728949839, "grad_norm": 3.4593024253845215, "learning_rate": 1.2374955976744431e-05, "loss": 0.5372, "step": 21760 }, { "epoch": 3.5523039875923432, "grad_norm": 2.857234477996826, "learning_rate": 1.2374333174448026e-05, "loss": 0.4301, "step": 21761 }, { "epoch": 3.5524672462348477, "grad_norm": 2.9276275634765625, "learning_rate": 1.2373710362391634e-05, "loss": 0.4815, "step": 21762 }, { "epoch": 3.552630504877352, "grad_norm": 2.6982665061950684, "learning_rate": 1.2373087540577812e-05, "loss": 0.4053, "step": 21763 }, { "epoch": 3.5527937635198565, "grad_norm": 2.4773807525634766, "learning_rate": 1.2372464709009127e-05, "loss": 0.3594, "step": 21764 }, { "epoch": 3.552957022162361, "grad_norm": 3.7332041263580322, "learning_rate": 1.2371841867688137e-05, "loss": 0.4805, "step": 21765 }, { "epoch": 3.553120280804865, "grad_norm": 3.7408313751220703, "learning_rate": 1.2371219016617397e-05, "loss": 0.577, "step": 21766 }, { "epoch": 3.5532835394473694, "grad_norm": 3.5599989891052246, "learning_rate": 1.2370596155799473e-05, "loss": 0.4679, "step": 21767 }, { "epoch": 3.553446798089874, "grad_norm": 3.083387613296509, "learning_rate": 1.2369973285236923e-05, "loss": 0.4315, "step": 21768 }, { "epoch": 3.5536100567323783, "grad_norm": 2.426267147064209, "learning_rate": 1.2369350404932305e-05, "loss": 0.3789, "step": 21769 }, { "epoch": 3.5537733153748827, "grad_norm": 3.0783255100250244, "learning_rate": 1.2368727514888186e-05, "loss": 0.4279, "step": 21770 }, { "epoch": 3.553936574017387, "grad_norm": 3.41506028175354, "learning_rate": 1.2368104615107122e-05, "loss": 0.4865, "step": 21771 }, { "epoch": 3.554099832659891, "grad_norm": 3.5977187156677246, "learning_rate": 1.2367481705591673e-05, "loss": 0.4877, "step": 21772 }, { "epoch": 3.5542630913023956, "grad_norm": 3.448995590209961, "learning_rate": 1.23668587863444e-05, "loss": 0.4941, "step": 21773 }, { "epoch": 3.5544263499449, "grad_norm": 3.299741268157959, "learning_rate": 1.2366235857367865e-05, "loss": 0.4994, "step": 21774 }, { "epoch": 3.5545896085874045, "grad_norm": 3.0246105194091797, "learning_rate": 1.2365612918664626e-05, "loss": 0.4347, "step": 21775 }, { "epoch": 3.554752867229909, "grad_norm": 3.8223369121551514, "learning_rate": 1.236498997023725e-05, "loss": 0.4826, "step": 21776 }, { "epoch": 3.5549161258724133, "grad_norm": 3.3864359855651855, "learning_rate": 1.236436701208829e-05, "loss": 0.48, "step": 21777 }, { "epoch": 3.555079384514918, "grad_norm": 3.1076114177703857, "learning_rate": 1.2363744044220309e-05, "loss": 0.5085, "step": 21778 }, { "epoch": 3.5552426431574222, "grad_norm": 3.1943888664245605, "learning_rate": 1.236312106663587e-05, "loss": 0.4822, "step": 21779 }, { "epoch": 3.5554059017999267, "grad_norm": 2.9701554775238037, "learning_rate": 1.2362498079337533e-05, "loss": 0.4685, "step": 21780 }, { "epoch": 3.555569160442431, "grad_norm": 3.0121495723724365, "learning_rate": 1.236187508232786e-05, "loss": 0.4441, "step": 21781 }, { "epoch": 3.5557324190849355, "grad_norm": 3.51497745513916, "learning_rate": 1.2361252075609408e-05, "loss": 0.4544, "step": 21782 }, { "epoch": 3.5558956777274395, "grad_norm": 3.416757345199585, "learning_rate": 1.236062905918474e-05, "loss": 0.453, "step": 21783 }, { "epoch": 3.556058936369944, "grad_norm": 2.63712739944458, "learning_rate": 1.236000603305642e-05, "loss": 0.4037, "step": 21784 }, { "epoch": 3.5562221950124484, "grad_norm": 3.2341058254241943, "learning_rate": 1.2359382997227005e-05, "loss": 0.4821, "step": 21785 }, { "epoch": 3.556385453654953, "grad_norm": 3.0735628604888916, "learning_rate": 1.2358759951699056e-05, "loss": 0.4173, "step": 21786 }, { "epoch": 3.5565487122974573, "grad_norm": 3.421264410018921, "learning_rate": 1.2358136896475138e-05, "loss": 0.4184, "step": 21787 }, { "epoch": 3.5567119709399617, "grad_norm": 2.8042354583740234, "learning_rate": 1.2357513831557806e-05, "loss": 0.3898, "step": 21788 }, { "epoch": 3.556875229582466, "grad_norm": 3.7834339141845703, "learning_rate": 1.2356890756949627e-05, "loss": 0.5684, "step": 21789 }, { "epoch": 3.55703848822497, "grad_norm": 2.450028419494629, "learning_rate": 1.2356267672653162e-05, "loss": 0.3846, "step": 21790 }, { "epoch": 3.5572017468674746, "grad_norm": 3.2226498126983643, "learning_rate": 1.2355644578670967e-05, "loss": 0.4937, "step": 21791 }, { "epoch": 3.557365005509979, "grad_norm": 3.6137940883636475, "learning_rate": 1.2355021475005608e-05, "loss": 0.5708, "step": 21792 }, { "epoch": 3.5575282641524835, "grad_norm": 3.7385828495025635, "learning_rate": 1.2354398361659646e-05, "loss": 0.6065, "step": 21793 }, { "epoch": 3.557691522794988, "grad_norm": 3.1036794185638428, "learning_rate": 1.235377523863564e-05, "loss": 0.4461, "step": 21794 }, { "epoch": 3.5578547814374923, "grad_norm": 3.6095046997070312, "learning_rate": 1.2353152105936155e-05, "loss": 0.5645, "step": 21795 }, { "epoch": 3.558018040079997, "grad_norm": 3.344273805618286, "learning_rate": 1.2352528963563746e-05, "loss": 0.4763, "step": 21796 }, { "epoch": 3.558181298722501, "grad_norm": 3.046452045440674, "learning_rate": 1.235190581152098e-05, "loss": 0.5059, "step": 21797 }, { "epoch": 3.5583445573650057, "grad_norm": 3.1495096683502197, "learning_rate": 1.2351282649810418e-05, "loss": 0.4757, "step": 21798 }, { "epoch": 3.55850781600751, "grad_norm": 3.261573314666748, "learning_rate": 1.2350659478434622e-05, "loss": 0.4832, "step": 21799 }, { "epoch": 3.5586710746500145, "grad_norm": 3.3388330936431885, "learning_rate": 1.2350036297396153e-05, "loss": 0.4437, "step": 21800 }, { "epoch": 3.5588343332925185, "grad_norm": 3.2717366218566895, "learning_rate": 1.2349413106697572e-05, "loss": 0.4657, "step": 21801 }, { "epoch": 3.558997591935023, "grad_norm": 3.311481475830078, "learning_rate": 1.2348789906341436e-05, "loss": 0.4876, "step": 21802 }, { "epoch": 3.5591608505775274, "grad_norm": 2.2081961631774902, "learning_rate": 1.2348166696330314e-05, "loss": 0.3385, "step": 21803 }, { "epoch": 3.559324109220032, "grad_norm": 3.1004676818847656, "learning_rate": 1.234754347666677e-05, "loss": 0.4473, "step": 21804 }, { "epoch": 3.5594873678625363, "grad_norm": 3.1437389850616455, "learning_rate": 1.2346920247353358e-05, "loss": 0.512, "step": 21805 }, { "epoch": 3.5596506265050407, "grad_norm": 3.1644797325134277, "learning_rate": 1.2346297008392642e-05, "loss": 0.5381, "step": 21806 }, { "epoch": 3.559813885147545, "grad_norm": 3.1518900394439697, "learning_rate": 1.2345673759787184e-05, "loss": 0.4554, "step": 21807 }, { "epoch": 3.559977143790049, "grad_norm": 3.7268407344818115, "learning_rate": 1.2345050501539548e-05, "loss": 0.5183, "step": 21808 }, { "epoch": 3.5601404024325536, "grad_norm": 3.361133098602295, "learning_rate": 1.2344427233652299e-05, "loss": 0.5298, "step": 21809 }, { "epoch": 3.560303661075058, "grad_norm": 3.0752124786376953, "learning_rate": 1.234380395612799e-05, "loss": 0.4588, "step": 21810 }, { "epoch": 3.5604669197175625, "grad_norm": 3.3917524814605713, "learning_rate": 1.234318066896919e-05, "loss": 0.5172, "step": 21811 }, { "epoch": 3.560630178360067, "grad_norm": 3.1097185611724854, "learning_rate": 1.2342557372178459e-05, "loss": 0.5336, "step": 21812 }, { "epoch": 3.5607934370025713, "grad_norm": 3.409202814102173, "learning_rate": 1.2341934065758357e-05, "loss": 0.5426, "step": 21813 }, { "epoch": 3.5609566956450758, "grad_norm": 3.370504140853882, "learning_rate": 1.234131074971145e-05, "loss": 0.4764, "step": 21814 }, { "epoch": 3.56111995428758, "grad_norm": 3.5059707164764404, "learning_rate": 1.2340687424040299e-05, "loss": 0.4876, "step": 21815 }, { "epoch": 3.5612832129300847, "grad_norm": 3.6654417514801025, "learning_rate": 1.2340064088747466e-05, "loss": 0.5207, "step": 21816 }, { "epoch": 3.561446471572589, "grad_norm": 2.522279739379883, "learning_rate": 1.2339440743835512e-05, "loss": 0.4103, "step": 21817 }, { "epoch": 3.5616097302150935, "grad_norm": 3.362762212753296, "learning_rate": 1.2338817389307001e-05, "loss": 0.5748, "step": 21818 }, { "epoch": 3.5617729888575975, "grad_norm": 2.134884834289551, "learning_rate": 1.2338194025164493e-05, "loss": 0.3758, "step": 21819 }, { "epoch": 3.561936247500102, "grad_norm": 2.9017837047576904, "learning_rate": 1.2337570651410556e-05, "loss": 0.451, "step": 21820 }, { "epoch": 3.5620995061426064, "grad_norm": 3.715841293334961, "learning_rate": 1.2336947268047746e-05, "loss": 0.4911, "step": 21821 }, { "epoch": 3.562262764785111, "grad_norm": 3.0441884994506836, "learning_rate": 1.2336323875078629e-05, "loss": 0.4721, "step": 21822 }, { "epoch": 3.5624260234276153, "grad_norm": 4.441091537475586, "learning_rate": 1.2335700472505767e-05, "loss": 0.549, "step": 21823 }, { "epoch": 3.5625892820701197, "grad_norm": 2.756164312362671, "learning_rate": 1.2335077060331721e-05, "loss": 0.4522, "step": 21824 }, { "epoch": 3.5627525407126237, "grad_norm": 2.9858312606811523, "learning_rate": 1.2334453638559057e-05, "loss": 0.5054, "step": 21825 }, { "epoch": 3.562915799355128, "grad_norm": 3.2073235511779785, "learning_rate": 1.2333830207190333e-05, "loss": 0.4958, "step": 21826 }, { "epoch": 3.5630790579976326, "grad_norm": 3.034529685974121, "learning_rate": 1.2333206766228117e-05, "loss": 0.4322, "step": 21827 }, { "epoch": 3.563242316640137, "grad_norm": 2.8524832725524902, "learning_rate": 1.2332583315674966e-05, "loss": 0.4148, "step": 21828 }, { "epoch": 3.5634055752826415, "grad_norm": 3.9095098972320557, "learning_rate": 1.2331959855533446e-05, "loss": 0.5486, "step": 21829 }, { "epoch": 3.563568833925146, "grad_norm": 3.2260851860046387, "learning_rate": 1.2331336385806123e-05, "loss": 0.4775, "step": 21830 }, { "epoch": 3.5637320925676503, "grad_norm": 3.6291589736938477, "learning_rate": 1.2330712906495554e-05, "loss": 0.5262, "step": 21831 }, { "epoch": 3.5638953512101548, "grad_norm": 3.017524003982544, "learning_rate": 1.2330089417604305e-05, "loss": 0.434, "step": 21832 }, { "epoch": 3.564058609852659, "grad_norm": 3.2743401527404785, "learning_rate": 1.2329465919134938e-05, "loss": 0.5329, "step": 21833 }, { "epoch": 3.5642218684951636, "grad_norm": 3.1839427947998047, "learning_rate": 1.2328842411090017e-05, "loss": 0.4561, "step": 21834 }, { "epoch": 3.564385127137668, "grad_norm": 3.0024101734161377, "learning_rate": 1.2328218893472102e-05, "loss": 0.4677, "step": 21835 }, { "epoch": 3.564548385780172, "grad_norm": 3.0439884662628174, "learning_rate": 1.2327595366283758e-05, "loss": 0.4499, "step": 21836 }, { "epoch": 3.5647116444226765, "grad_norm": 3.7511019706726074, "learning_rate": 1.2326971829527549e-05, "loss": 0.5871, "step": 21837 }, { "epoch": 3.564874903065181, "grad_norm": 3.3284404277801514, "learning_rate": 1.232634828320604e-05, "loss": 0.5036, "step": 21838 }, { "epoch": 3.5650381617076854, "grad_norm": 2.940685749053955, "learning_rate": 1.2325724727321793e-05, "loss": 0.4158, "step": 21839 }, { "epoch": 3.56520142035019, "grad_norm": 2.995094060897827, "learning_rate": 1.2325101161877364e-05, "loss": 0.4708, "step": 21840 }, { "epoch": 3.5653646789926943, "grad_norm": 2.47636342048645, "learning_rate": 1.2324477586875325e-05, "loss": 0.3778, "step": 21841 }, { "epoch": 3.5655279376351987, "grad_norm": 3.2232162952423096, "learning_rate": 1.2323854002318236e-05, "loss": 0.526, "step": 21842 }, { "epoch": 3.5656911962777027, "grad_norm": 2.900097370147705, "learning_rate": 1.232323040820866e-05, "loss": 0.3805, "step": 21843 }, { "epoch": 3.565854454920207, "grad_norm": 3.4865188598632812, "learning_rate": 1.2322606804549164e-05, "loss": 0.4881, "step": 21844 }, { "epoch": 3.5660177135627116, "grad_norm": 2.746203899383545, "learning_rate": 1.2321983191342307e-05, "loss": 0.4937, "step": 21845 }, { "epoch": 3.566180972205216, "grad_norm": 2.7251203060150146, "learning_rate": 1.2321359568590649e-05, "loss": 0.4032, "step": 21846 }, { "epoch": 3.5663442308477205, "grad_norm": 2.929607629776001, "learning_rate": 1.2320735936296763e-05, "loss": 0.4517, "step": 21847 }, { "epoch": 3.566507489490225, "grad_norm": 3.4251906871795654, "learning_rate": 1.2320112294463212e-05, "loss": 0.596, "step": 21848 }, { "epoch": 3.5666707481327293, "grad_norm": 3.3733465671539307, "learning_rate": 1.231948864309255e-05, "loss": 0.4869, "step": 21849 }, { "epoch": 3.5668340067752338, "grad_norm": 3.6528539657592773, "learning_rate": 1.2318864982187347e-05, "loss": 0.5279, "step": 21850 }, { "epoch": 3.566997265417738, "grad_norm": 3.2629787921905518, "learning_rate": 1.2318241311750165e-05, "loss": 0.5382, "step": 21851 }, { "epoch": 3.5671605240602426, "grad_norm": 3.1094727516174316, "learning_rate": 1.2317617631783568e-05, "loss": 0.4197, "step": 21852 }, { "epoch": 3.567323782702747, "grad_norm": 3.088970422744751, "learning_rate": 1.231699394229012e-05, "loss": 0.4646, "step": 21853 }, { "epoch": 3.567487041345251, "grad_norm": 3.411862373352051, "learning_rate": 1.2316370243272386e-05, "loss": 0.5128, "step": 21854 }, { "epoch": 3.5676502999877555, "grad_norm": 3.308382272720337, "learning_rate": 1.2315746534732928e-05, "loss": 0.4945, "step": 21855 }, { "epoch": 3.56781355863026, "grad_norm": 3.0673599243164062, "learning_rate": 1.2315122816674313e-05, "loss": 0.4131, "step": 21856 }, { "epoch": 3.5679768172727644, "grad_norm": 3.797184705734253, "learning_rate": 1.2314499089099097e-05, "loss": 0.5455, "step": 21857 }, { "epoch": 3.568140075915269, "grad_norm": 3.36625337600708, "learning_rate": 1.2313875352009851e-05, "loss": 0.5205, "step": 21858 }, { "epoch": 3.5683033345577733, "grad_norm": 3.3495755195617676, "learning_rate": 1.231325160540914e-05, "loss": 0.5031, "step": 21859 }, { "epoch": 3.5684665932002777, "grad_norm": 3.3708035945892334, "learning_rate": 1.2312627849299523e-05, "loss": 0.4479, "step": 21860 }, { "epoch": 3.5686298518427817, "grad_norm": 2.716705083847046, "learning_rate": 1.2312004083683566e-05, "loss": 0.4091, "step": 21861 }, { "epoch": 3.568793110485286, "grad_norm": 3.257753849029541, "learning_rate": 1.2311380308563834e-05, "loss": 0.4777, "step": 21862 }, { "epoch": 3.5689563691277906, "grad_norm": 2.9988698959350586, "learning_rate": 1.231075652394289e-05, "loss": 0.4847, "step": 21863 }, { "epoch": 3.569119627770295, "grad_norm": 3.335390090942383, "learning_rate": 1.2310132729823298e-05, "loss": 0.4625, "step": 21864 }, { "epoch": 3.5692828864127994, "grad_norm": 2.9933667182922363, "learning_rate": 1.2309508926207622e-05, "loss": 0.3902, "step": 21865 }, { "epoch": 3.569446145055304, "grad_norm": 2.9545340538024902, "learning_rate": 1.230888511309843e-05, "loss": 0.4622, "step": 21866 }, { "epoch": 3.5696094036978083, "grad_norm": 3.3710832595825195, "learning_rate": 1.230826129049828e-05, "loss": 0.4306, "step": 21867 }, { "epoch": 3.5697726623403128, "grad_norm": 2.528414249420166, "learning_rate": 1.230763745840974e-05, "loss": 0.3377, "step": 21868 }, { "epoch": 3.569935920982817, "grad_norm": 2.9213991165161133, "learning_rate": 1.2307013616835376e-05, "loss": 0.3996, "step": 21869 }, { "epoch": 3.5700991796253216, "grad_norm": 2.462393283843994, "learning_rate": 1.2306389765777747e-05, "loss": 0.437, "step": 21870 }, { "epoch": 3.570262438267826, "grad_norm": 2.811753034591675, "learning_rate": 1.2305765905239423e-05, "loss": 0.4335, "step": 21871 }, { "epoch": 3.57042569691033, "grad_norm": 3.3163766860961914, "learning_rate": 1.2305142035222964e-05, "loss": 0.4116, "step": 21872 }, { "epoch": 3.5705889555528345, "grad_norm": 2.4660632610321045, "learning_rate": 1.2304518155730937e-05, "loss": 0.3955, "step": 21873 }, { "epoch": 3.570752214195339, "grad_norm": 3.1973702907562256, "learning_rate": 1.2303894266765908e-05, "loss": 0.5225, "step": 21874 }, { "epoch": 3.5709154728378434, "grad_norm": 3.3568637371063232, "learning_rate": 1.2303270368330438e-05, "loss": 0.5633, "step": 21875 }, { "epoch": 3.571078731480348, "grad_norm": 2.9518721103668213, "learning_rate": 1.2302646460427092e-05, "loss": 0.4381, "step": 21876 }, { "epoch": 3.5712419901228523, "grad_norm": 4.024949550628662, "learning_rate": 1.2302022543058439e-05, "loss": 0.5923, "step": 21877 }, { "epoch": 3.5714052487653563, "grad_norm": 3.795238733291626, "learning_rate": 1.2301398616227038e-05, "loss": 0.5096, "step": 21878 }, { "epoch": 3.5715685074078607, "grad_norm": 2.875703811645508, "learning_rate": 1.2300774679935456e-05, "loss": 0.4466, "step": 21879 }, { "epoch": 3.571731766050365, "grad_norm": 3.3738982677459717, "learning_rate": 1.2300150734186257e-05, "loss": 0.4695, "step": 21880 }, { "epoch": 3.5718950246928696, "grad_norm": 3.550393581390381, "learning_rate": 1.2299526778982011e-05, "loss": 0.4805, "step": 21881 }, { "epoch": 3.572058283335374, "grad_norm": 3.110215425491333, "learning_rate": 1.2298902814325277e-05, "loss": 0.4751, "step": 21882 }, { "epoch": 3.5722215419778784, "grad_norm": 3.3529915809631348, "learning_rate": 1.2298278840218623e-05, "loss": 0.4559, "step": 21883 }, { "epoch": 3.572384800620383, "grad_norm": 4.063692092895508, "learning_rate": 1.229765485666461e-05, "loss": 0.5452, "step": 21884 }, { "epoch": 3.5725480592628873, "grad_norm": 3.555590867996216, "learning_rate": 1.2297030863665804e-05, "loss": 0.5622, "step": 21885 }, { "epoch": 3.5727113179053918, "grad_norm": 3.0895297527313232, "learning_rate": 1.2296406861224774e-05, "loss": 0.4838, "step": 21886 }, { "epoch": 3.572874576547896, "grad_norm": 3.2734391689300537, "learning_rate": 1.2295782849344083e-05, "loss": 0.4396, "step": 21887 }, { "epoch": 3.5730378351904006, "grad_norm": 3.6465399265289307, "learning_rate": 1.2295158828026294e-05, "loss": 0.5619, "step": 21888 }, { "epoch": 3.5732010938329046, "grad_norm": 3.034400701522827, "learning_rate": 1.2294534797273977e-05, "loss": 0.4925, "step": 21889 }, { "epoch": 3.573364352475409, "grad_norm": 2.8208417892456055, "learning_rate": 1.2293910757089688e-05, "loss": 0.4426, "step": 21890 }, { "epoch": 3.5735276111179135, "grad_norm": 3.358614683151245, "learning_rate": 1.2293286707476e-05, "loss": 0.4987, "step": 21891 }, { "epoch": 3.573690869760418, "grad_norm": 2.343441963195801, "learning_rate": 1.2292662648435478e-05, "loss": 0.3932, "step": 21892 }, { "epoch": 3.5738541284029224, "grad_norm": 2.962852716445923, "learning_rate": 1.2292038579970687e-05, "loss": 0.4071, "step": 21893 }, { "epoch": 3.574017387045427, "grad_norm": 2.8491029739379883, "learning_rate": 1.229141450208419e-05, "loss": 0.3805, "step": 21894 }, { "epoch": 3.5741806456879313, "grad_norm": 3.2314517498016357, "learning_rate": 1.229079041477855e-05, "loss": 0.4214, "step": 21895 }, { "epoch": 3.5743439043304353, "grad_norm": 3.3396494388580322, "learning_rate": 1.2290166318056338e-05, "loss": 0.4616, "step": 21896 }, { "epoch": 3.5745071629729397, "grad_norm": 3.2859272956848145, "learning_rate": 1.2289542211920118e-05, "loss": 0.4648, "step": 21897 }, { "epoch": 3.574670421615444, "grad_norm": 2.8169288635253906, "learning_rate": 1.2288918096372454e-05, "loss": 0.4231, "step": 21898 }, { "epoch": 3.5748336802579486, "grad_norm": 2.843531608581543, "learning_rate": 1.228829397141591e-05, "loss": 0.4003, "step": 21899 }, { "epoch": 3.574996938900453, "grad_norm": 2.489874839782715, "learning_rate": 1.2287669837053055e-05, "loss": 0.3972, "step": 21900 }, { "epoch": 3.5751601975429574, "grad_norm": 2.7296290397644043, "learning_rate": 1.228704569328645e-05, "loss": 0.3635, "step": 21901 }, { "epoch": 3.575323456185462, "grad_norm": 3.6681535243988037, "learning_rate": 1.2286421540118669e-05, "loss": 0.5428, "step": 21902 }, { "epoch": 3.5754867148279663, "grad_norm": 3.343548536300659, "learning_rate": 1.2285797377552272e-05, "loss": 0.5686, "step": 21903 }, { "epoch": 3.5756499734704708, "grad_norm": 3.957515001296997, "learning_rate": 1.2285173205589823e-05, "loss": 0.5013, "step": 21904 }, { "epoch": 3.575813232112975, "grad_norm": 3.379840135574341, "learning_rate": 1.2284549024233887e-05, "loss": 0.5365, "step": 21905 }, { "epoch": 3.5759764907554796, "grad_norm": 3.4113590717315674, "learning_rate": 1.2283924833487037e-05, "loss": 0.5251, "step": 21906 }, { "epoch": 3.5761397493979836, "grad_norm": 3.188232898712158, "learning_rate": 1.2283300633351834e-05, "loss": 0.4516, "step": 21907 }, { "epoch": 3.576303008040488, "grad_norm": 2.8279526233673096, "learning_rate": 1.2282676423830841e-05, "loss": 0.4629, "step": 21908 }, { "epoch": 3.5764662666829925, "grad_norm": 3.4490244388580322, "learning_rate": 1.2282052204926629e-05, "loss": 0.4774, "step": 21909 }, { "epoch": 3.576629525325497, "grad_norm": 3.6975467205047607, "learning_rate": 1.2281427976641763e-05, "loss": 0.5779, "step": 21910 }, { "epoch": 3.5767927839680014, "grad_norm": 3.254134178161621, "learning_rate": 1.2280803738978808e-05, "loss": 0.4312, "step": 21911 }, { "epoch": 3.576956042610506, "grad_norm": 2.9987008571624756, "learning_rate": 1.228017949194033e-05, "loss": 0.3978, "step": 21912 }, { "epoch": 3.57711930125301, "grad_norm": 2.5130462646484375, "learning_rate": 1.2279555235528892e-05, "loss": 0.4268, "step": 21913 }, { "epoch": 3.5772825598955142, "grad_norm": 2.982316732406616, "learning_rate": 1.2278930969747066e-05, "loss": 0.4889, "step": 21914 }, { "epoch": 3.5774458185380187, "grad_norm": 2.6666903495788574, "learning_rate": 1.2278306694597415e-05, "loss": 0.4165, "step": 21915 }, { "epoch": 3.577609077180523, "grad_norm": 3.5868523120880127, "learning_rate": 1.2277682410082506e-05, "loss": 0.4572, "step": 21916 }, { "epoch": 3.5777723358230276, "grad_norm": 3.1259899139404297, "learning_rate": 1.2277058116204906e-05, "loss": 0.5013, "step": 21917 }, { "epoch": 3.577935594465532, "grad_norm": 3.3397247791290283, "learning_rate": 1.2276433812967176e-05, "loss": 0.5023, "step": 21918 }, { "epoch": 3.5780988531080364, "grad_norm": 3.282064199447632, "learning_rate": 1.227580950037189e-05, "loss": 0.554, "step": 21919 }, { "epoch": 3.578262111750541, "grad_norm": 2.732760190963745, "learning_rate": 1.2275185178421606e-05, "loss": 0.429, "step": 21920 }, { "epoch": 3.5784253703930453, "grad_norm": 2.9310219287872314, "learning_rate": 1.2274560847118899e-05, "loss": 0.4506, "step": 21921 }, { "epoch": 3.5785886290355498, "grad_norm": 3.211487293243408, "learning_rate": 1.227393650646633e-05, "loss": 0.478, "step": 21922 }, { "epoch": 3.578751887678054, "grad_norm": 3.3777554035186768, "learning_rate": 1.2273312156466466e-05, "loss": 0.5179, "step": 21923 }, { "epoch": 3.578915146320558, "grad_norm": 2.9821765422821045, "learning_rate": 1.2272687797121874e-05, "loss": 0.4891, "step": 21924 }, { "epoch": 3.5790784049630626, "grad_norm": 2.928558111190796, "learning_rate": 1.2272063428435122e-05, "loss": 0.4081, "step": 21925 }, { "epoch": 3.579241663605567, "grad_norm": 3.4102628231048584, "learning_rate": 1.2271439050408773e-05, "loss": 0.4985, "step": 21926 }, { "epoch": 3.5794049222480715, "grad_norm": 4.1408586502075195, "learning_rate": 1.2270814663045398e-05, "loss": 0.5523, "step": 21927 }, { "epoch": 3.579568180890576, "grad_norm": 3.7092247009277344, "learning_rate": 1.227019026634756e-05, "loss": 0.5298, "step": 21928 }, { "epoch": 3.5797314395330804, "grad_norm": 3.094794750213623, "learning_rate": 1.2269565860317827e-05, "loss": 0.5527, "step": 21929 }, { "epoch": 3.579894698175585, "grad_norm": 3.675830602645874, "learning_rate": 1.2268941444958766e-05, "loss": 0.5038, "step": 21930 }, { "epoch": 3.580057956818089, "grad_norm": 3.160303831100464, "learning_rate": 1.2268317020272945e-05, "loss": 0.451, "step": 21931 }, { "epoch": 3.5802212154605932, "grad_norm": 3.074375629425049, "learning_rate": 1.2267692586262927e-05, "loss": 0.491, "step": 21932 }, { "epoch": 3.5803844741030977, "grad_norm": 2.735076665878296, "learning_rate": 1.2267068142931283e-05, "loss": 0.449, "step": 21933 }, { "epoch": 3.580547732745602, "grad_norm": 3.234750747680664, "learning_rate": 1.2266443690280574e-05, "loss": 0.4471, "step": 21934 }, { "epoch": 3.5807109913881066, "grad_norm": 3.373717784881592, "learning_rate": 1.2265819228313374e-05, "loss": 0.5028, "step": 21935 }, { "epoch": 3.580874250030611, "grad_norm": 2.918322801589966, "learning_rate": 1.2265194757032247e-05, "loss": 0.4367, "step": 21936 }, { "epoch": 3.5810375086731154, "grad_norm": 3.5149900913238525, "learning_rate": 1.226457027643976e-05, "loss": 0.4635, "step": 21937 }, { "epoch": 3.58120076731562, "grad_norm": 3.328181743621826, "learning_rate": 1.2263945786538479e-05, "loss": 0.4663, "step": 21938 }, { "epoch": 3.5813640259581243, "grad_norm": 3.4503695964813232, "learning_rate": 1.226332128733097e-05, "loss": 0.5096, "step": 21939 }, { "epoch": 3.5815272846006287, "grad_norm": 2.7429800033569336, "learning_rate": 1.2262696778819801e-05, "loss": 0.4317, "step": 21940 }, { "epoch": 3.581690543243133, "grad_norm": 3.304522752761841, "learning_rate": 1.2262072261007545e-05, "loss": 0.4694, "step": 21941 }, { "epoch": 3.581853801885637, "grad_norm": 2.8440263271331787, "learning_rate": 1.226144773389676e-05, "loss": 0.444, "step": 21942 }, { "epoch": 3.5820170605281416, "grad_norm": 3.1849915981292725, "learning_rate": 1.226082319749002e-05, "loss": 0.5042, "step": 21943 }, { "epoch": 3.582180319170646, "grad_norm": 3.071604013442993, "learning_rate": 1.2260198651789886e-05, "loss": 0.5077, "step": 21944 }, { "epoch": 3.5823435778131505, "grad_norm": 3.0964972972869873, "learning_rate": 1.225957409679893e-05, "loss": 0.4328, "step": 21945 }, { "epoch": 3.582506836455655, "grad_norm": 3.754080057144165, "learning_rate": 1.225894953251972e-05, "loss": 0.4599, "step": 21946 }, { "epoch": 3.5826700950981594, "grad_norm": 3.7686803340911865, "learning_rate": 1.2258324958954819e-05, "loss": 0.4893, "step": 21947 }, { "epoch": 3.582833353740664, "grad_norm": 2.7098028659820557, "learning_rate": 1.2257700376106797e-05, "loss": 0.3583, "step": 21948 }, { "epoch": 3.582996612383168, "grad_norm": 4.123057842254639, "learning_rate": 1.2257075783978225e-05, "loss": 0.6261, "step": 21949 }, { "epoch": 3.5831598710256722, "grad_norm": 2.657195806503296, "learning_rate": 1.2256451182571663e-05, "loss": 0.4073, "step": 21950 }, { "epoch": 3.5833231296681767, "grad_norm": 2.9692983627319336, "learning_rate": 1.2255826571889685e-05, "loss": 0.4756, "step": 21951 }, { "epoch": 3.583486388310681, "grad_norm": 2.9667418003082275, "learning_rate": 1.2255201951934852e-05, "loss": 0.469, "step": 21952 }, { "epoch": 3.5836496469531856, "grad_norm": 3.7278311252593994, "learning_rate": 1.2254577322709737e-05, "loss": 0.5353, "step": 21953 }, { "epoch": 3.58381290559569, "grad_norm": 3.4700498580932617, "learning_rate": 1.2253952684216905e-05, "loss": 0.4582, "step": 21954 }, { "epoch": 3.5839761642381944, "grad_norm": 3.4901578426361084, "learning_rate": 1.2253328036458927e-05, "loss": 0.5415, "step": 21955 }, { "epoch": 3.584139422880699, "grad_norm": 2.8385467529296875, "learning_rate": 1.2252703379438367e-05, "loss": 0.4641, "step": 21956 }, { "epoch": 3.5843026815232033, "grad_norm": 4.156911373138428, "learning_rate": 1.2252078713157793e-05, "loss": 0.566, "step": 21957 }, { "epoch": 3.5844659401657077, "grad_norm": 2.791809558868408, "learning_rate": 1.2251454037619776e-05, "loss": 0.4535, "step": 21958 }, { "epoch": 3.584629198808212, "grad_norm": 2.8404757976531982, "learning_rate": 1.225082935282688e-05, "loss": 0.4473, "step": 21959 }, { "epoch": 3.584792457450716, "grad_norm": 3.78230619430542, "learning_rate": 1.2250204658781675e-05, "loss": 0.567, "step": 21960 }, { "epoch": 3.5849557160932206, "grad_norm": 3.371518611907959, "learning_rate": 1.2249579955486729e-05, "loss": 0.4457, "step": 21961 }, { "epoch": 3.585118974735725, "grad_norm": 2.523730516433716, "learning_rate": 1.2248955242944607e-05, "loss": 0.3944, "step": 21962 }, { "epoch": 3.5852822333782295, "grad_norm": 3.3949997425079346, "learning_rate": 1.2248330521157881e-05, "loss": 0.4975, "step": 21963 }, { "epoch": 3.585445492020734, "grad_norm": 3.180469512939453, "learning_rate": 1.2247705790129116e-05, "loss": 0.4771, "step": 21964 }, { "epoch": 3.5856087506632384, "grad_norm": 2.978438377380371, "learning_rate": 1.2247081049860882e-05, "loss": 0.4596, "step": 21965 }, { "epoch": 3.5857720093057424, "grad_norm": 2.8528614044189453, "learning_rate": 1.2246456300355749e-05, "loss": 0.4061, "step": 21966 }, { "epoch": 3.585935267948247, "grad_norm": 2.818910598754883, "learning_rate": 1.2245831541616277e-05, "loss": 0.4554, "step": 21967 }, { "epoch": 3.5860985265907512, "grad_norm": 2.626237392425537, "learning_rate": 1.2245206773645043e-05, "loss": 0.3926, "step": 21968 }, { "epoch": 3.5862617852332557, "grad_norm": 2.837966203689575, "learning_rate": 1.2244581996444612e-05, "loss": 0.4173, "step": 21969 }, { "epoch": 3.58642504387576, "grad_norm": 3.9236702919006348, "learning_rate": 1.224395721001755e-05, "loss": 0.5399, "step": 21970 }, { "epoch": 3.5865883025182645, "grad_norm": 2.7330832481384277, "learning_rate": 1.2243332414366428e-05, "loss": 0.4045, "step": 21971 }, { "epoch": 3.586751561160769, "grad_norm": 3.039693832397461, "learning_rate": 1.2242707609493814e-05, "loss": 0.4453, "step": 21972 }, { "epoch": 3.5869148198032734, "grad_norm": 3.674320936203003, "learning_rate": 1.2242082795402274e-05, "loss": 0.5746, "step": 21973 }, { "epoch": 3.587078078445778, "grad_norm": 3.170705556869507, "learning_rate": 1.2241457972094379e-05, "loss": 0.4975, "step": 21974 }, { "epoch": 3.5872413370882823, "grad_norm": 3.2434349060058594, "learning_rate": 1.2240833139572698e-05, "loss": 0.5172, "step": 21975 }, { "epoch": 3.5874045957307867, "grad_norm": 3.0868582725524902, "learning_rate": 1.22402082978398e-05, "loss": 0.5222, "step": 21976 }, { "epoch": 3.5875678543732907, "grad_norm": 3.4329354763031006, "learning_rate": 1.223958344689825e-05, "loss": 0.5798, "step": 21977 }, { "epoch": 3.587731113015795, "grad_norm": 3.1818878650665283, "learning_rate": 1.2238958586750613e-05, "loss": 0.4199, "step": 21978 }, { "epoch": 3.5878943716582996, "grad_norm": 3.5691559314727783, "learning_rate": 1.2238333717399467e-05, "loss": 0.5766, "step": 21979 }, { "epoch": 3.588057630300804, "grad_norm": 2.8804354667663574, "learning_rate": 1.2237708838847375e-05, "loss": 0.4367, "step": 21980 }, { "epoch": 3.5882208889433085, "grad_norm": 2.940066337585449, "learning_rate": 1.2237083951096912e-05, "loss": 0.4113, "step": 21981 }, { "epoch": 3.588384147585813, "grad_norm": 3.0444459915161133, "learning_rate": 1.2236459054150635e-05, "loss": 0.4021, "step": 21982 }, { "epoch": 3.5885474062283174, "grad_norm": 3.429321050643921, "learning_rate": 1.2235834148011124e-05, "loss": 0.5206, "step": 21983 }, { "epoch": 3.5887106648708214, "grad_norm": 3.5018513202667236, "learning_rate": 1.2235209232680937e-05, "loss": 0.5245, "step": 21984 }, { "epoch": 3.588873923513326, "grad_norm": 3.298112154006958, "learning_rate": 1.2234584308162656e-05, "loss": 0.4318, "step": 21985 }, { "epoch": 3.5890371821558302, "grad_norm": 3.2049357891082764, "learning_rate": 1.2233959374458839e-05, "loss": 0.4755, "step": 21986 }, { "epoch": 3.5892004407983347, "grad_norm": 3.0853750705718994, "learning_rate": 1.2233334431572058e-05, "loss": 0.431, "step": 21987 }, { "epoch": 3.589363699440839, "grad_norm": 2.92745304107666, "learning_rate": 1.2232709479504886e-05, "loss": 0.4159, "step": 21988 }, { "epoch": 3.5895269580833435, "grad_norm": 3.3426566123962402, "learning_rate": 1.223208451825988e-05, "loss": 0.4768, "step": 21989 }, { "epoch": 3.589690216725848, "grad_norm": 3.3779196739196777, "learning_rate": 1.2231459547839629e-05, "loss": 0.5197, "step": 21990 }, { "epoch": 3.5898534753683524, "grad_norm": 3.096092939376831, "learning_rate": 1.2230834568246684e-05, "loss": 0.4735, "step": 21991 }, { "epoch": 3.590016734010857, "grad_norm": 3.0982518196105957, "learning_rate": 1.2230209579483621e-05, "loss": 0.5033, "step": 21992 }, { "epoch": 3.5901799926533613, "grad_norm": 2.4614975452423096, "learning_rate": 1.2229584581553006e-05, "loss": 0.3704, "step": 21993 }, { "epoch": 3.5903432512958657, "grad_norm": 3.2688069343566895, "learning_rate": 1.2228959574457413e-05, "loss": 0.5002, "step": 21994 }, { "epoch": 3.5905065099383697, "grad_norm": 2.9105374813079834, "learning_rate": 1.222833455819941e-05, "loss": 0.4465, "step": 21995 }, { "epoch": 3.590669768580874, "grad_norm": 2.8651351928710938, "learning_rate": 1.2227709532781565e-05, "loss": 0.4358, "step": 21996 }, { "epoch": 3.5908330272233786, "grad_norm": 3.4265849590301514, "learning_rate": 1.2227084498206444e-05, "loss": 0.5218, "step": 21997 }, { "epoch": 3.590996285865883, "grad_norm": 3.1395328044891357, "learning_rate": 1.2226459454476622e-05, "loss": 0.4524, "step": 21998 }, { "epoch": 3.5911595445083875, "grad_norm": 3.3408374786376953, "learning_rate": 1.2225834401594667e-05, "loss": 0.5551, "step": 21999 }, { "epoch": 3.591322803150892, "grad_norm": 3.5848050117492676, "learning_rate": 1.2225209339563144e-05, "loss": 0.53, "step": 22000 }, { "epoch": 3.591486061793396, "grad_norm": 3.093785047531128, "learning_rate": 1.222458426838463e-05, "loss": 0.4116, "step": 22001 }, { "epoch": 3.5916493204359003, "grad_norm": 3.3739428520202637, "learning_rate": 1.2223959188061687e-05, "loss": 0.4938, "step": 22002 }, { "epoch": 3.591812579078405, "grad_norm": 3.2557849884033203, "learning_rate": 1.2223334098596888e-05, "loss": 0.4806, "step": 22003 }, { "epoch": 3.5919758377209092, "grad_norm": 3.20589280128479, "learning_rate": 1.2222708999992802e-05, "loss": 0.4723, "step": 22004 }, { "epoch": 3.5921390963634137, "grad_norm": 2.9611337184906006, "learning_rate": 1.2222083892251999e-05, "loss": 0.5083, "step": 22005 }, { "epoch": 3.592302355005918, "grad_norm": 3.492682933807373, "learning_rate": 1.2221458775377048e-05, "loss": 0.4993, "step": 22006 }, { "epoch": 3.5924656136484225, "grad_norm": 3.5414912700653076, "learning_rate": 1.2220833649370518e-05, "loss": 0.5998, "step": 22007 }, { "epoch": 3.592628872290927, "grad_norm": 3.2530205249786377, "learning_rate": 1.2220208514234982e-05, "loss": 0.4173, "step": 22008 }, { "epoch": 3.5927921309334314, "grad_norm": 2.7999045848846436, "learning_rate": 1.2219583369973003e-05, "loss": 0.4221, "step": 22009 }, { "epoch": 3.592955389575936, "grad_norm": 2.7856130599975586, "learning_rate": 1.221895821658716e-05, "loss": 0.4355, "step": 22010 }, { "epoch": 3.5931186482184403, "grad_norm": 2.913785457611084, "learning_rate": 1.2218333054080014e-05, "loss": 0.3833, "step": 22011 }, { "epoch": 3.5932819068609443, "grad_norm": 3.3357017040252686, "learning_rate": 1.2217707882454138e-05, "loss": 0.4754, "step": 22012 }, { "epoch": 3.5934451655034487, "grad_norm": 3.3183224201202393, "learning_rate": 1.2217082701712103e-05, "loss": 0.4381, "step": 22013 }, { "epoch": 3.593608424145953, "grad_norm": 2.9449520111083984, "learning_rate": 1.2216457511856479e-05, "loss": 0.4006, "step": 22014 }, { "epoch": 3.5937716827884576, "grad_norm": 3.0908615589141846, "learning_rate": 1.2215832312889837e-05, "loss": 0.4412, "step": 22015 }, { "epoch": 3.593934941430962, "grad_norm": 4.342841625213623, "learning_rate": 1.221520710481474e-05, "loss": 0.6746, "step": 22016 }, { "epoch": 3.5940982000734665, "grad_norm": 2.9130067825317383, "learning_rate": 1.2214581887633767e-05, "loss": 0.477, "step": 22017 }, { "epoch": 3.594261458715971, "grad_norm": 3.099473237991333, "learning_rate": 1.2213956661349481e-05, "loss": 0.5154, "step": 22018 }, { "epoch": 3.594424717358475, "grad_norm": 3.1053287982940674, "learning_rate": 1.2213331425964458e-05, "loss": 0.4514, "step": 22019 }, { "epoch": 3.5945879760009793, "grad_norm": 3.363436698913574, "learning_rate": 1.2212706181481267e-05, "loss": 0.4573, "step": 22020 }, { "epoch": 3.594751234643484, "grad_norm": 3.0418167114257812, "learning_rate": 1.2212080927902474e-05, "loss": 0.4894, "step": 22021 }, { "epoch": 3.594914493285988, "grad_norm": 2.937589168548584, "learning_rate": 1.2211455665230649e-05, "loss": 0.4715, "step": 22022 }, { "epoch": 3.5950777519284927, "grad_norm": 3.4784584045410156, "learning_rate": 1.2210830393468367e-05, "loss": 0.502, "step": 22023 }, { "epoch": 3.595241010570997, "grad_norm": 2.9987406730651855, "learning_rate": 1.2210205112618196e-05, "loss": 0.5035, "step": 22024 }, { "epoch": 3.5954042692135015, "grad_norm": 3.7544798851013184, "learning_rate": 1.2209579822682708e-05, "loss": 0.5804, "step": 22025 }, { "epoch": 3.595567527856006, "grad_norm": 3.2071852684020996, "learning_rate": 1.2208954523664471e-05, "loss": 0.4676, "step": 22026 }, { "epoch": 3.5957307864985104, "grad_norm": 2.83817195892334, "learning_rate": 1.2208329215566055e-05, "loss": 0.4312, "step": 22027 }, { "epoch": 3.595894045141015, "grad_norm": 3.1091666221618652, "learning_rate": 1.2207703898390029e-05, "loss": 0.4339, "step": 22028 }, { "epoch": 3.5960573037835193, "grad_norm": 3.534344434738159, "learning_rate": 1.2207078572138969e-05, "loss": 0.6009, "step": 22029 }, { "epoch": 3.5962205624260233, "grad_norm": 3.537219285964966, "learning_rate": 1.2206453236815446e-05, "loss": 0.4313, "step": 22030 }, { "epoch": 3.5963838210685277, "grad_norm": 2.812756299972534, "learning_rate": 1.2205827892422022e-05, "loss": 0.4015, "step": 22031 }, { "epoch": 3.596547079711032, "grad_norm": 3.3557116985321045, "learning_rate": 1.2205202538961272e-05, "loss": 0.519, "step": 22032 }, { "epoch": 3.5967103383535366, "grad_norm": 3.2399966716766357, "learning_rate": 1.2204577176435767e-05, "loss": 0.5078, "step": 22033 }, { "epoch": 3.596873596996041, "grad_norm": 3.6726200580596924, "learning_rate": 1.220395180484808e-05, "loss": 0.5228, "step": 22034 }, { "epoch": 3.5970368556385455, "grad_norm": 2.6352972984313965, "learning_rate": 1.2203326424200778e-05, "loss": 0.3808, "step": 22035 }, { "epoch": 3.59720011428105, "grad_norm": 3.3893120288848877, "learning_rate": 1.2202701034496434e-05, "loss": 0.513, "step": 22036 }, { "epoch": 3.597363372923554, "grad_norm": 3.646998643875122, "learning_rate": 1.2202075635737619e-05, "loss": 0.5584, "step": 22037 }, { "epoch": 3.5975266315660583, "grad_norm": 3.1334307193756104, "learning_rate": 1.2201450227926898e-05, "loss": 0.4106, "step": 22038 }, { "epoch": 3.597689890208563, "grad_norm": 3.5166115760803223, "learning_rate": 1.220082481106685e-05, "loss": 0.5081, "step": 22039 }, { "epoch": 3.597853148851067, "grad_norm": 2.7998616695404053, "learning_rate": 1.2200199385160041e-05, "loss": 0.3969, "step": 22040 }, { "epoch": 3.5980164074935717, "grad_norm": 3.246870756149292, "learning_rate": 1.2199573950209043e-05, "loss": 0.5009, "step": 22041 }, { "epoch": 3.598179666136076, "grad_norm": 2.979052782058716, "learning_rate": 1.2198948506216428e-05, "loss": 0.399, "step": 22042 }, { "epoch": 3.5983429247785805, "grad_norm": 2.978316068649292, "learning_rate": 1.2198323053184764e-05, "loss": 0.4523, "step": 22043 }, { "epoch": 3.598506183421085, "grad_norm": 2.5987510681152344, "learning_rate": 1.2197697591116627e-05, "loss": 0.3945, "step": 22044 }, { "epoch": 3.5986694420635894, "grad_norm": 3.401695728302002, "learning_rate": 1.2197072120014581e-05, "loss": 0.4768, "step": 22045 }, { "epoch": 3.598832700706094, "grad_norm": 3.277437925338745, "learning_rate": 1.2196446639881206e-05, "loss": 0.4434, "step": 22046 }, { "epoch": 3.5989959593485983, "grad_norm": 3.044809341430664, "learning_rate": 1.2195821150719064e-05, "loss": 0.4967, "step": 22047 }, { "epoch": 3.5991592179911023, "grad_norm": 3.5497210025787354, "learning_rate": 1.2195195652530734e-05, "loss": 0.5044, "step": 22048 }, { "epoch": 3.5993224766336067, "grad_norm": 3.1741087436676025, "learning_rate": 1.219457014531878e-05, "loss": 0.4646, "step": 22049 }, { "epoch": 3.599485735276111, "grad_norm": 3.497802257537842, "learning_rate": 1.2193944629085778e-05, "loss": 0.5515, "step": 22050 }, { "epoch": 3.5996489939186156, "grad_norm": 3.1621780395507812, "learning_rate": 1.21933191038343e-05, "loss": 0.458, "step": 22051 }, { "epoch": 3.59981225256112, "grad_norm": 3.1924660205841064, "learning_rate": 1.2192693569566914e-05, "loss": 0.4979, "step": 22052 }, { "epoch": 3.5999755112036245, "grad_norm": 2.5249826908111572, "learning_rate": 1.2192068026286192e-05, "loss": 0.4046, "step": 22053 }, { "epoch": 3.6001387698461285, "grad_norm": 3.1823084354400635, "learning_rate": 1.2191442473994707e-05, "loss": 0.4173, "step": 22054 }, { "epoch": 3.600302028488633, "grad_norm": 3.143615245819092, "learning_rate": 1.2190816912695027e-05, "loss": 0.4639, "step": 22055 }, { "epoch": 3.6004652871311373, "grad_norm": 3.392606735229492, "learning_rate": 1.2190191342389727e-05, "loss": 0.4866, "step": 22056 }, { "epoch": 3.6006285457736418, "grad_norm": 3.4444901943206787, "learning_rate": 1.2189565763081381e-05, "loss": 0.477, "step": 22057 }, { "epoch": 3.600791804416146, "grad_norm": 3.1271538734436035, "learning_rate": 1.2188940174772554e-05, "loss": 0.42, "step": 22058 }, { "epoch": 3.6009550630586507, "grad_norm": 2.868680953979492, "learning_rate": 1.2188314577465823e-05, "loss": 0.3852, "step": 22059 }, { "epoch": 3.601118321701155, "grad_norm": 2.754845142364502, "learning_rate": 1.2187688971163753e-05, "loss": 0.4246, "step": 22060 }, { "epoch": 3.6012815803436595, "grad_norm": 3.2552125453948975, "learning_rate": 1.218706335586892e-05, "loss": 0.4619, "step": 22061 }, { "epoch": 3.601444838986164, "grad_norm": 3.428400754928589, "learning_rate": 1.2186437731583898e-05, "loss": 0.512, "step": 22062 }, { "epoch": 3.6016080976286684, "grad_norm": 3.232245922088623, "learning_rate": 1.2185812098311254e-05, "loss": 0.4598, "step": 22063 }, { "epoch": 3.601771356271173, "grad_norm": 3.1305148601531982, "learning_rate": 1.2185186456053565e-05, "loss": 0.4173, "step": 22064 }, { "epoch": 3.601934614913677, "grad_norm": 3.4264075756073, "learning_rate": 1.2184560804813397e-05, "loss": 0.5393, "step": 22065 }, { "epoch": 3.6020978735561813, "grad_norm": 2.441178560256958, "learning_rate": 1.2183935144593324e-05, "loss": 0.3523, "step": 22066 }, { "epoch": 3.6022611321986857, "grad_norm": 3.5008537769317627, "learning_rate": 1.2183309475395917e-05, "loss": 0.4125, "step": 22067 }, { "epoch": 3.60242439084119, "grad_norm": 3.0463151931762695, "learning_rate": 1.218268379722375e-05, "loss": 0.4167, "step": 22068 }, { "epoch": 3.6025876494836946, "grad_norm": 2.884049892425537, "learning_rate": 1.21820581100794e-05, "loss": 0.4464, "step": 22069 }, { "epoch": 3.602750908126199, "grad_norm": 3.4022839069366455, "learning_rate": 1.2181432413965428e-05, "loss": 0.4321, "step": 22070 }, { "epoch": 3.6029141667687035, "grad_norm": 2.842432975769043, "learning_rate": 1.2180806708884411e-05, "loss": 0.4587, "step": 22071 }, { "epoch": 3.6030774254112075, "grad_norm": 3.1327412128448486, "learning_rate": 1.2180180994838919e-05, "loss": 0.3855, "step": 22072 }, { "epoch": 3.603240684053712, "grad_norm": 3.070307493209839, "learning_rate": 1.217955527183153e-05, "loss": 0.5393, "step": 22073 }, { "epoch": 3.6034039426962163, "grad_norm": 3.20111083984375, "learning_rate": 1.2178929539864811e-05, "loss": 0.5049, "step": 22074 }, { "epoch": 3.6035672013387208, "grad_norm": 3.0637197494506836, "learning_rate": 1.2178303798941335e-05, "loss": 0.4418, "step": 22075 }, { "epoch": 3.603730459981225, "grad_norm": 2.636953592300415, "learning_rate": 1.2177678049063677e-05, "loss": 0.3875, "step": 22076 }, { "epoch": 3.6038937186237296, "grad_norm": 3.073668956756592, "learning_rate": 1.2177052290234404e-05, "loss": 0.4021, "step": 22077 }, { "epoch": 3.604056977266234, "grad_norm": 3.5358684062957764, "learning_rate": 1.217642652245609e-05, "loss": 0.4972, "step": 22078 }, { "epoch": 3.6042202359087385, "grad_norm": 3.1959269046783447, "learning_rate": 1.2175800745731311e-05, "loss": 0.4412, "step": 22079 }, { "epoch": 3.604383494551243, "grad_norm": 3.4382107257843018, "learning_rate": 1.2175174960062637e-05, "loss": 0.5089, "step": 22080 }, { "epoch": 3.6045467531937474, "grad_norm": 3.58581280708313, "learning_rate": 1.2174549165452639e-05, "loss": 0.5379, "step": 22081 }, { "epoch": 3.604710011836252, "grad_norm": 2.9176506996154785, "learning_rate": 1.217392336190389e-05, "loss": 0.4893, "step": 22082 }, { "epoch": 3.604873270478756, "grad_norm": 3.340632915496826, "learning_rate": 1.2173297549418964e-05, "loss": 0.4565, "step": 22083 }, { "epoch": 3.6050365291212603, "grad_norm": 2.786168336868286, "learning_rate": 1.2172671728000429e-05, "loss": 0.4278, "step": 22084 }, { "epoch": 3.6051997877637647, "grad_norm": 3.6284213066101074, "learning_rate": 1.2172045897650863e-05, "loss": 0.4827, "step": 22085 }, { "epoch": 3.605363046406269, "grad_norm": 2.999739646911621, "learning_rate": 1.2171420058372838e-05, "loss": 0.4004, "step": 22086 }, { "epoch": 3.6055263050487736, "grad_norm": 3.6707260608673096, "learning_rate": 1.2170794210168926e-05, "loss": 0.6286, "step": 22087 }, { "epoch": 3.605689563691278, "grad_norm": 3.1230640411376953, "learning_rate": 1.2170168353041696e-05, "loss": 0.4371, "step": 22088 }, { "epoch": 3.6058528223337825, "grad_norm": 3.4180715084075928, "learning_rate": 1.2169542486993723e-05, "loss": 0.4926, "step": 22089 }, { "epoch": 3.6060160809762865, "grad_norm": 2.718904733657837, "learning_rate": 1.2168916612027581e-05, "loss": 0.3552, "step": 22090 }, { "epoch": 3.606179339618791, "grad_norm": 2.9117069244384766, "learning_rate": 1.2168290728145842e-05, "loss": 0.446, "step": 22091 }, { "epoch": 3.6063425982612953, "grad_norm": 2.8379533290863037, "learning_rate": 1.2167664835351079e-05, "loss": 0.4425, "step": 22092 }, { "epoch": 3.6065058569037998, "grad_norm": 3.9012091159820557, "learning_rate": 1.2167038933645862e-05, "loss": 0.5647, "step": 22093 }, { "epoch": 3.606669115546304, "grad_norm": 4.002235412597656, "learning_rate": 1.216641302303277e-05, "loss": 0.626, "step": 22094 }, { "epoch": 3.6068323741888086, "grad_norm": 3.4990944862365723, "learning_rate": 1.2165787103514371e-05, "loss": 0.4829, "step": 22095 }, { "epoch": 3.606995632831313, "grad_norm": 3.6944186687469482, "learning_rate": 1.2165161175093237e-05, "loss": 0.4774, "step": 22096 }, { "epoch": 3.6071588914738175, "grad_norm": 2.963505744934082, "learning_rate": 1.2164535237771944e-05, "loss": 0.4611, "step": 22097 }, { "epoch": 3.607322150116322, "grad_norm": 2.5887510776519775, "learning_rate": 1.2163909291553067e-05, "loss": 0.4489, "step": 22098 }, { "epoch": 3.6074854087588264, "grad_norm": 3.522569179534912, "learning_rate": 1.2163283336439172e-05, "loss": 0.5726, "step": 22099 }, { "epoch": 3.607648667401331, "grad_norm": 3.21390962600708, "learning_rate": 1.2162657372432835e-05, "loss": 0.4816, "step": 22100 }, { "epoch": 3.607811926043835, "grad_norm": 2.6423521041870117, "learning_rate": 1.2162031399536633e-05, "loss": 0.4295, "step": 22101 }, { "epoch": 3.6079751846863393, "grad_norm": 3.5741219520568848, "learning_rate": 1.2161405417753133e-05, "loss": 0.5023, "step": 22102 }, { "epoch": 3.6081384433288437, "grad_norm": 2.8119606971740723, "learning_rate": 1.2160779427084918e-05, "loss": 0.3993, "step": 22103 }, { "epoch": 3.608301701971348, "grad_norm": 2.8955063819885254, "learning_rate": 1.2160153427534551e-05, "loss": 0.4546, "step": 22104 }, { "epoch": 3.6084649606138526, "grad_norm": 2.582597017288208, "learning_rate": 1.2159527419104606e-05, "loss": 0.3911, "step": 22105 }, { "epoch": 3.608628219256357, "grad_norm": 2.7016592025756836, "learning_rate": 1.2158901401797662e-05, "loss": 0.4226, "step": 22106 }, { "epoch": 3.608791477898861, "grad_norm": 3.460362434387207, "learning_rate": 1.2158275375616288e-05, "loss": 0.5124, "step": 22107 }, { "epoch": 3.6089547365413654, "grad_norm": 3.3670341968536377, "learning_rate": 1.2157649340563062e-05, "loss": 0.4391, "step": 22108 }, { "epoch": 3.60911799518387, "grad_norm": 3.4013805389404297, "learning_rate": 1.2157023296640554e-05, "loss": 0.4996, "step": 22109 }, { "epoch": 3.6092812538263743, "grad_norm": 3.406315803527832, "learning_rate": 1.2156397243851334e-05, "loss": 0.5169, "step": 22110 }, { "epoch": 3.6094445124688788, "grad_norm": 2.896902084350586, "learning_rate": 1.2155771182197982e-05, "loss": 0.4121, "step": 22111 }, { "epoch": 3.609607771111383, "grad_norm": 2.5379951000213623, "learning_rate": 1.2155145111683066e-05, "loss": 0.3784, "step": 22112 }, { "epoch": 3.6097710297538876, "grad_norm": 4.2659010887146, "learning_rate": 1.2154519032309166e-05, "loss": 0.6209, "step": 22113 }, { "epoch": 3.609934288396392, "grad_norm": 3.346433162689209, "learning_rate": 1.215389294407885e-05, "loss": 0.4377, "step": 22114 }, { "epoch": 3.6100975470388965, "grad_norm": 3.3560876846313477, "learning_rate": 1.2153266846994695e-05, "loss": 0.4784, "step": 22115 }, { "epoch": 3.610260805681401, "grad_norm": 3.008676767349243, "learning_rate": 1.215264074105927e-05, "loss": 0.4332, "step": 22116 }, { "epoch": 3.6104240643239054, "grad_norm": 3.2464401721954346, "learning_rate": 1.2152014626275153e-05, "loss": 0.4544, "step": 22117 }, { "epoch": 3.6105873229664094, "grad_norm": 3.3236725330352783, "learning_rate": 1.2151388502644918e-05, "loss": 0.4642, "step": 22118 }, { "epoch": 3.610750581608914, "grad_norm": 2.887364625930786, "learning_rate": 1.2150762370171137e-05, "loss": 0.4005, "step": 22119 }, { "epoch": 3.6109138402514183, "grad_norm": 3.09941029548645, "learning_rate": 1.2150136228856382e-05, "loss": 0.5221, "step": 22120 }, { "epoch": 3.6110770988939227, "grad_norm": 3.345798969268799, "learning_rate": 1.2149510078703229e-05, "loss": 0.461, "step": 22121 }, { "epoch": 3.611240357536427, "grad_norm": 3.8786251544952393, "learning_rate": 1.2148883919714254e-05, "loss": 0.5336, "step": 22122 }, { "epoch": 3.6114036161789316, "grad_norm": 3.5955779552459717, "learning_rate": 1.2148257751892026e-05, "loss": 0.464, "step": 22123 }, { "epoch": 3.611566874821436, "grad_norm": 3.926919937133789, "learning_rate": 1.2147631575239123e-05, "loss": 0.5318, "step": 22124 }, { "epoch": 3.61173013346394, "grad_norm": 3.423210382461548, "learning_rate": 1.2147005389758117e-05, "loss": 0.4373, "step": 22125 }, { "epoch": 3.6118933921064444, "grad_norm": 3.2640817165374756, "learning_rate": 1.2146379195451582e-05, "loss": 0.4676, "step": 22126 }, { "epoch": 3.612056650748949, "grad_norm": 3.412883758544922, "learning_rate": 1.2145752992322093e-05, "loss": 0.5283, "step": 22127 }, { "epoch": 3.6122199093914533, "grad_norm": 3.2006731033325195, "learning_rate": 1.2145126780372225e-05, "loss": 0.5323, "step": 22128 }, { "epoch": 3.6123831680339578, "grad_norm": 2.8590545654296875, "learning_rate": 1.2144500559604548e-05, "loss": 0.4624, "step": 22129 }, { "epoch": 3.612546426676462, "grad_norm": 2.8957459926605225, "learning_rate": 1.214387433002164e-05, "loss": 0.4342, "step": 22130 }, { "epoch": 3.6127096853189666, "grad_norm": 2.7720963954925537, "learning_rate": 1.2143248091626077e-05, "loss": 0.4268, "step": 22131 }, { "epoch": 3.612872943961471, "grad_norm": 3.4440746307373047, "learning_rate": 1.2142621844420429e-05, "loss": 0.5017, "step": 22132 }, { "epoch": 3.6130362026039755, "grad_norm": 3.2038280963897705, "learning_rate": 1.214199558840727e-05, "loss": 0.5098, "step": 22133 }, { "epoch": 3.61319946124648, "grad_norm": 3.038161039352417, "learning_rate": 1.2141369323589177e-05, "loss": 0.4968, "step": 22134 }, { "epoch": 3.6133627198889844, "grad_norm": 3.622922658920288, "learning_rate": 1.2140743049968721e-05, "loss": 0.5356, "step": 22135 }, { "epoch": 3.6135259785314884, "grad_norm": 3.7963368892669678, "learning_rate": 1.2140116767548481e-05, "loss": 0.5035, "step": 22136 }, { "epoch": 3.613689237173993, "grad_norm": 3.4774844646453857, "learning_rate": 1.2139490476331028e-05, "loss": 0.5159, "step": 22137 }, { "epoch": 3.6138524958164973, "grad_norm": 3.296107530593872, "learning_rate": 1.213886417631894e-05, "loss": 0.5232, "step": 22138 }, { "epoch": 3.6140157544590017, "grad_norm": 3.0116262435913086, "learning_rate": 1.2138237867514786e-05, "loss": 0.4489, "step": 22139 }, { "epoch": 3.614179013101506, "grad_norm": 3.234412431716919, "learning_rate": 1.2137611549921147e-05, "loss": 0.4922, "step": 22140 }, { "epoch": 3.6143422717440106, "grad_norm": 3.1781044006347656, "learning_rate": 1.2136985223540592e-05, "loss": 0.468, "step": 22141 }, { "epoch": 3.6145055303865146, "grad_norm": 3.1000404357910156, "learning_rate": 1.2136358888375698e-05, "loss": 0.4043, "step": 22142 }, { "epoch": 3.614668789029019, "grad_norm": 3.219373941421509, "learning_rate": 1.2135732544429038e-05, "loss": 0.637, "step": 22143 }, { "epoch": 3.6148320476715234, "grad_norm": 3.824805498123169, "learning_rate": 1.2135106191703188e-05, "loss": 0.5423, "step": 22144 }, { "epoch": 3.614995306314028, "grad_norm": 3.0805675983428955, "learning_rate": 1.2134479830200723e-05, "loss": 0.501, "step": 22145 }, { "epoch": 3.6151585649565323, "grad_norm": 3.029419422149658, "learning_rate": 1.2133853459924217e-05, "loss": 0.4891, "step": 22146 }, { "epoch": 3.6153218235990368, "grad_norm": 3.6226675510406494, "learning_rate": 1.2133227080876249e-05, "loss": 0.5116, "step": 22147 }, { "epoch": 3.615485082241541, "grad_norm": 3.2953667640686035, "learning_rate": 1.2132600693059386e-05, "loss": 0.4769, "step": 22148 }, { "epoch": 3.6156483408840456, "grad_norm": 3.573798179626465, "learning_rate": 1.2131974296476206e-05, "loss": 0.5468, "step": 22149 }, { "epoch": 3.61581159952655, "grad_norm": 3.4906067848205566, "learning_rate": 1.2131347891129286e-05, "loss": 0.5407, "step": 22150 }, { "epoch": 3.6159748581690545, "grad_norm": 2.659274101257324, "learning_rate": 1.2130721477021198e-05, "loss": 0.3592, "step": 22151 }, { "epoch": 3.616138116811559, "grad_norm": 3.281709671020508, "learning_rate": 1.2130095054154525e-05, "loss": 0.4639, "step": 22152 }, { "epoch": 3.616301375454063, "grad_norm": 3.2122485637664795, "learning_rate": 1.212946862253183e-05, "loss": 0.4496, "step": 22153 }, { "epoch": 3.6164646340965674, "grad_norm": 2.940669059753418, "learning_rate": 1.2128842182155694e-05, "loss": 0.4234, "step": 22154 }, { "epoch": 3.616627892739072, "grad_norm": 2.95651912689209, "learning_rate": 1.2128215733028692e-05, "loss": 0.4318, "step": 22155 }, { "epoch": 3.6167911513815763, "grad_norm": 3.332751750946045, "learning_rate": 1.2127589275153397e-05, "loss": 0.502, "step": 22156 }, { "epoch": 3.6169544100240807, "grad_norm": 2.779151678085327, "learning_rate": 1.2126962808532391e-05, "loss": 0.4379, "step": 22157 }, { "epoch": 3.617117668666585, "grad_norm": 3.1079318523406982, "learning_rate": 1.2126336333168241e-05, "loss": 0.4868, "step": 22158 }, { "epoch": 3.6172809273090896, "grad_norm": 3.5928492546081543, "learning_rate": 1.2125709849063525e-05, "loss": 0.5465, "step": 22159 }, { "epoch": 3.6174441859515936, "grad_norm": 3.5839149951934814, "learning_rate": 1.2125083356220818e-05, "loss": 0.516, "step": 22160 }, { "epoch": 3.617607444594098, "grad_norm": 2.6436054706573486, "learning_rate": 1.2124456854642695e-05, "loss": 0.4543, "step": 22161 }, { "epoch": 3.6177707032366024, "grad_norm": 3.024562358856201, "learning_rate": 1.2123830344331735e-05, "loss": 0.4328, "step": 22162 }, { "epoch": 3.617933961879107, "grad_norm": 4.1258416175842285, "learning_rate": 1.2123203825290509e-05, "loss": 0.5021, "step": 22163 }, { "epoch": 3.6180972205216113, "grad_norm": 3.108062267303467, "learning_rate": 1.2122577297521593e-05, "loss": 0.4831, "step": 22164 }, { "epoch": 3.6182604791641158, "grad_norm": 2.9170496463775635, "learning_rate": 1.2121950761027563e-05, "loss": 0.4193, "step": 22165 }, { "epoch": 3.61842373780662, "grad_norm": 3.431248426437378, "learning_rate": 1.2121324215810997e-05, "loss": 0.5056, "step": 22166 }, { "epoch": 3.6185869964491246, "grad_norm": 3.851083755493164, "learning_rate": 1.2120697661874465e-05, "loss": 0.4575, "step": 22167 }, { "epoch": 3.618750255091629, "grad_norm": 3.2107033729553223, "learning_rate": 1.212007109922055e-05, "loss": 0.4304, "step": 22168 }, { "epoch": 3.6189135137341335, "grad_norm": 2.40047287940979, "learning_rate": 1.2119444527851818e-05, "loss": 0.3759, "step": 22169 }, { "epoch": 3.619076772376638, "grad_norm": 3.1940371990203857, "learning_rate": 1.2118817947770853e-05, "loss": 0.4762, "step": 22170 }, { "epoch": 3.619240031019142, "grad_norm": 3.0603814125061035, "learning_rate": 1.2118191358980228e-05, "loss": 0.4702, "step": 22171 }, { "epoch": 3.6194032896616464, "grad_norm": 2.662834405899048, "learning_rate": 1.2117564761482515e-05, "loss": 0.4489, "step": 22172 }, { "epoch": 3.619566548304151, "grad_norm": 3.145946502685547, "learning_rate": 1.2116938155280295e-05, "loss": 0.4356, "step": 22173 }, { "epoch": 3.6197298069466552, "grad_norm": 3.329423189163208, "learning_rate": 1.2116311540376142e-05, "loss": 0.4933, "step": 22174 }, { "epoch": 3.6198930655891597, "grad_norm": 3.63883113861084, "learning_rate": 1.211568491677263e-05, "loss": 0.494, "step": 22175 }, { "epoch": 3.620056324231664, "grad_norm": 3.0756540298461914, "learning_rate": 1.2115058284472336e-05, "loss": 0.4269, "step": 22176 }, { "epoch": 3.6202195828741686, "grad_norm": 2.9197914600372314, "learning_rate": 1.2114431643477837e-05, "loss": 0.4376, "step": 22177 }, { "epoch": 3.6203828415166726, "grad_norm": 3.020763397216797, "learning_rate": 1.2113804993791709e-05, "loss": 0.425, "step": 22178 }, { "epoch": 3.620546100159177, "grad_norm": 3.340785264968872, "learning_rate": 1.2113178335416525e-05, "loss": 0.5011, "step": 22179 }, { "epoch": 3.6207093588016814, "grad_norm": 3.8576505184173584, "learning_rate": 1.2112551668354863e-05, "loss": 0.5897, "step": 22180 }, { "epoch": 3.620872617444186, "grad_norm": 3.199519395828247, "learning_rate": 1.21119249926093e-05, "loss": 0.4157, "step": 22181 }, { "epoch": 3.6210358760866903, "grad_norm": 3.4641590118408203, "learning_rate": 1.211129830818241e-05, "loss": 0.4781, "step": 22182 }, { "epoch": 3.6211991347291947, "grad_norm": 4.800123691558838, "learning_rate": 1.211067161507677e-05, "loss": 0.474, "step": 22183 }, { "epoch": 3.621362393371699, "grad_norm": 3.1343703269958496, "learning_rate": 1.2110044913294955e-05, "loss": 0.4128, "step": 22184 }, { "epoch": 3.6215256520142036, "grad_norm": 3.032721519470215, "learning_rate": 1.2109418202839543e-05, "loss": 0.4527, "step": 22185 }, { "epoch": 3.621688910656708, "grad_norm": 3.260735034942627, "learning_rate": 1.2108791483713112e-05, "loss": 0.476, "step": 22186 }, { "epoch": 3.6218521692992125, "grad_norm": 2.7004802227020264, "learning_rate": 1.210816475591823e-05, "loss": 0.4119, "step": 22187 }, { "epoch": 3.622015427941717, "grad_norm": 3.1624085903167725, "learning_rate": 1.2107538019457481e-05, "loss": 0.4913, "step": 22188 }, { "epoch": 3.622178686584221, "grad_norm": 3.4222986698150635, "learning_rate": 1.2106911274333439e-05, "loss": 0.4656, "step": 22189 }, { "epoch": 3.6223419452267254, "grad_norm": 2.936065435409546, "learning_rate": 1.2106284520548681e-05, "loss": 0.4249, "step": 22190 }, { "epoch": 3.62250520386923, "grad_norm": 4.188549518585205, "learning_rate": 1.2105657758105785e-05, "loss": 0.5192, "step": 22191 }, { "epoch": 3.6226684625117342, "grad_norm": 4.019122123718262, "learning_rate": 1.2105030987007323e-05, "loss": 0.5507, "step": 22192 }, { "epoch": 3.6228317211542387, "grad_norm": 3.9035003185272217, "learning_rate": 1.210440420725587e-05, "loss": 0.5419, "step": 22193 }, { "epoch": 3.622994979796743, "grad_norm": 3.3208162784576416, "learning_rate": 1.2103777418854008e-05, "loss": 0.4844, "step": 22194 }, { "epoch": 3.623158238439247, "grad_norm": 3.017643690109253, "learning_rate": 1.2103150621804311e-05, "loss": 0.4419, "step": 22195 }, { "epoch": 3.6233214970817516, "grad_norm": 3.221558094024658, "learning_rate": 1.2102523816109361e-05, "loss": 0.5019, "step": 22196 }, { "epoch": 3.623484755724256, "grad_norm": 3.1120128631591797, "learning_rate": 1.2101897001771726e-05, "loss": 0.4544, "step": 22197 }, { "epoch": 3.6236480143667604, "grad_norm": 3.19490385055542, "learning_rate": 1.2101270178793983e-05, "loss": 0.4657, "step": 22198 }, { "epoch": 3.623811273009265, "grad_norm": 3.7196285724639893, "learning_rate": 1.2100643347178713e-05, "loss": 0.4688, "step": 22199 }, { "epoch": 3.6239745316517693, "grad_norm": 3.0983657836914062, "learning_rate": 1.2100016506928494e-05, "loss": 0.5095, "step": 22200 }, { "epoch": 3.6241377902942737, "grad_norm": 3.1984951496124268, "learning_rate": 1.2099389658045902e-05, "loss": 0.5066, "step": 22201 }, { "epoch": 3.624301048936778, "grad_norm": 3.5534512996673584, "learning_rate": 1.2098762800533508e-05, "loss": 0.5024, "step": 22202 }, { "epoch": 3.6244643075792826, "grad_norm": 3.3760573863983154, "learning_rate": 1.2098135934393894e-05, "loss": 0.4334, "step": 22203 }, { "epoch": 3.624627566221787, "grad_norm": 3.1380608081817627, "learning_rate": 1.209750905962963e-05, "loss": 0.3641, "step": 22204 }, { "epoch": 3.6247908248642915, "grad_norm": 3.5474307537078857, "learning_rate": 1.2096882176243306e-05, "loss": 0.4887, "step": 22205 }, { "epoch": 3.6249540835067955, "grad_norm": 3.0039196014404297, "learning_rate": 1.2096255284237487e-05, "loss": 0.4035, "step": 22206 }, { "epoch": 3.6251173421493, "grad_norm": 2.8728952407836914, "learning_rate": 1.2095628383614757e-05, "loss": 0.4234, "step": 22207 }, { "epoch": 3.6252806007918044, "grad_norm": 2.81801176071167, "learning_rate": 1.2095001474377688e-05, "loss": 0.3505, "step": 22208 }, { "epoch": 3.625443859434309, "grad_norm": 3.1623282432556152, "learning_rate": 1.2094374556528859e-05, "loss": 0.4526, "step": 22209 }, { "epoch": 3.6256071180768132, "grad_norm": 3.341895818710327, "learning_rate": 1.2093747630070844e-05, "loss": 0.4487, "step": 22210 }, { "epoch": 3.6257703767193177, "grad_norm": 3.7853991985321045, "learning_rate": 1.2093120695006228e-05, "loss": 0.5377, "step": 22211 }, { "epoch": 3.625933635361822, "grad_norm": 3.452826499938965, "learning_rate": 1.2092493751337585e-05, "loss": 0.5229, "step": 22212 }, { "epoch": 3.626096894004326, "grad_norm": 3.3948445320129395, "learning_rate": 1.2091866799067485e-05, "loss": 0.4928, "step": 22213 }, { "epoch": 3.6262601526468305, "grad_norm": 3.306288957595825, "learning_rate": 1.2091239838198513e-05, "loss": 0.4757, "step": 22214 }, { "epoch": 3.626423411289335, "grad_norm": 3.475621223449707, "learning_rate": 1.2090612868733242e-05, "loss": 0.4901, "step": 22215 }, { "epoch": 3.6265866699318394, "grad_norm": 2.9499692916870117, "learning_rate": 1.2089985890674251e-05, "loss": 0.436, "step": 22216 }, { "epoch": 3.626749928574344, "grad_norm": 3.4272284507751465, "learning_rate": 1.2089358904024117e-05, "loss": 0.4438, "step": 22217 }, { "epoch": 3.6269131872168483, "grad_norm": 3.6589395999908447, "learning_rate": 1.208873190878542e-05, "loss": 0.5271, "step": 22218 }, { "epoch": 3.6270764458593527, "grad_norm": 2.944474697113037, "learning_rate": 1.2088104904960734e-05, "loss": 0.3656, "step": 22219 }, { "epoch": 3.627239704501857, "grad_norm": 3.4012107849121094, "learning_rate": 1.2087477892552635e-05, "loss": 0.5132, "step": 22220 }, { "epoch": 3.6274029631443616, "grad_norm": 5.128030776977539, "learning_rate": 1.2086850871563705e-05, "loss": 0.4898, "step": 22221 }, { "epoch": 3.627566221786866, "grad_norm": 3.3925933837890625, "learning_rate": 1.2086223841996519e-05, "loss": 0.4265, "step": 22222 }, { "epoch": 3.6277294804293705, "grad_norm": 3.0294480323791504, "learning_rate": 1.2085596803853651e-05, "loss": 0.4449, "step": 22223 }, { "epoch": 3.6278927390718745, "grad_norm": 3.6758360862731934, "learning_rate": 1.2084969757137685e-05, "loss": 0.5531, "step": 22224 }, { "epoch": 3.628055997714379, "grad_norm": 3.548863410949707, "learning_rate": 1.2084342701851197e-05, "loss": 0.5401, "step": 22225 }, { "epoch": 3.6282192563568834, "grad_norm": 3.09497332572937, "learning_rate": 1.208371563799676e-05, "loss": 0.4615, "step": 22226 }, { "epoch": 3.628382514999388, "grad_norm": 3.0030558109283447, "learning_rate": 1.2083088565576957e-05, "loss": 0.4616, "step": 22227 }, { "epoch": 3.6285457736418922, "grad_norm": 3.101536750793457, "learning_rate": 1.2082461484594363e-05, "loss": 0.4631, "step": 22228 }, { "epoch": 3.6287090322843967, "grad_norm": 3.5953290462493896, "learning_rate": 1.2081834395051555e-05, "loss": 0.54, "step": 22229 }, { "epoch": 3.6288722909269007, "grad_norm": 3.2145018577575684, "learning_rate": 1.2081207296951116e-05, "loss": 0.4891, "step": 22230 }, { "epoch": 3.629035549569405, "grad_norm": 2.7607178688049316, "learning_rate": 1.2080580190295614e-05, "loss": 0.4113, "step": 22231 }, { "epoch": 3.6291988082119095, "grad_norm": 3.298215389251709, "learning_rate": 1.2079953075087634e-05, "loss": 0.4683, "step": 22232 }, { "epoch": 3.629362066854414, "grad_norm": 3.5265820026397705, "learning_rate": 1.2079325951329754e-05, "loss": 0.4806, "step": 22233 }, { "epoch": 3.6295253254969184, "grad_norm": 3.0037877559661865, "learning_rate": 1.2078698819024549e-05, "loss": 0.4844, "step": 22234 }, { "epoch": 3.629688584139423, "grad_norm": 3.220792055130005, "learning_rate": 1.2078071678174601e-05, "loss": 0.4818, "step": 22235 }, { "epoch": 3.6298518427819273, "grad_norm": 2.9259798526763916, "learning_rate": 1.2077444528782483e-05, "loss": 0.4447, "step": 22236 }, { "epoch": 3.6300151014244317, "grad_norm": 3.538154363632202, "learning_rate": 1.2076817370850771e-05, "loss": 0.4966, "step": 22237 }, { "epoch": 3.630178360066936, "grad_norm": 3.4848239421844482, "learning_rate": 1.207619020438205e-05, "loss": 0.4613, "step": 22238 }, { "epoch": 3.6303416187094406, "grad_norm": 3.0430972576141357, "learning_rate": 1.2075563029378896e-05, "loss": 0.4679, "step": 22239 }, { "epoch": 3.630504877351945, "grad_norm": 3.0423378944396973, "learning_rate": 1.2074935845843887e-05, "loss": 0.4833, "step": 22240 }, { "epoch": 3.630668135994449, "grad_norm": 3.3585727214813232, "learning_rate": 1.2074308653779598e-05, "loss": 0.4271, "step": 22241 }, { "epoch": 3.6308313946369535, "grad_norm": 3.3991215229034424, "learning_rate": 1.2073681453188611e-05, "loss": 0.496, "step": 22242 }, { "epoch": 3.630994653279458, "grad_norm": 2.787569761276245, "learning_rate": 1.2073054244073498e-05, "loss": 0.4203, "step": 22243 }, { "epoch": 3.6311579119219624, "grad_norm": 4.230159282684326, "learning_rate": 1.2072427026436846e-05, "loss": 0.5739, "step": 22244 }, { "epoch": 3.631321170564467, "grad_norm": 2.9919261932373047, "learning_rate": 1.2071799800281232e-05, "loss": 0.4677, "step": 22245 }, { "epoch": 3.6314844292069712, "grad_norm": 3.260313034057617, "learning_rate": 1.2071172565609226e-05, "loss": 0.4288, "step": 22246 }, { "epoch": 3.6316476878494757, "grad_norm": 3.6990764141082764, "learning_rate": 1.2070545322423415e-05, "loss": 0.9892, "step": 22247 }, { "epoch": 3.6318109464919797, "grad_norm": 3.0278167724609375, "learning_rate": 1.206991807072637e-05, "loss": 0.4441, "step": 22248 }, { "epoch": 3.631974205134484, "grad_norm": 2.6556129455566406, "learning_rate": 1.2069290810520675e-05, "loss": 0.418, "step": 22249 }, { "epoch": 3.6321374637769885, "grad_norm": 3.741081476211548, "learning_rate": 1.206866354180891e-05, "loss": 0.5887, "step": 22250 }, { "epoch": 3.632300722419493, "grad_norm": 2.714402675628662, "learning_rate": 1.2068036264593647e-05, "loss": 0.4535, "step": 22251 }, { "epoch": 3.6324639810619974, "grad_norm": 3.4602584838867188, "learning_rate": 1.206740897887747e-05, "loss": 0.4793, "step": 22252 }, { "epoch": 3.632627239704502, "grad_norm": 2.644233465194702, "learning_rate": 1.2066781684662954e-05, "loss": 0.4452, "step": 22253 }, { "epoch": 3.6327904983470063, "grad_norm": 3.069197416305542, "learning_rate": 1.2066154381952678e-05, "loss": 0.4991, "step": 22254 }, { "epoch": 3.6329537569895107, "grad_norm": 3.038670301437378, "learning_rate": 1.2065527070749224e-05, "loss": 0.49, "step": 22255 }, { "epoch": 3.633117015632015, "grad_norm": 3.5683248043060303, "learning_rate": 1.2064899751055165e-05, "loss": 0.388, "step": 22256 }, { "epoch": 3.6332802742745196, "grad_norm": 3.608625888824463, "learning_rate": 1.2064272422873085e-05, "loss": 0.5353, "step": 22257 }, { "epoch": 3.633443532917024, "grad_norm": 3.2501611709594727, "learning_rate": 1.2063645086205559e-05, "loss": 0.4674, "step": 22258 }, { "epoch": 3.633606791559528, "grad_norm": 3.2334086894989014, "learning_rate": 1.206301774105517e-05, "loss": 0.4945, "step": 22259 }, { "epoch": 3.6337700502020325, "grad_norm": 3.4006190299987793, "learning_rate": 1.2062390387424492e-05, "loss": 0.4675, "step": 22260 }, { "epoch": 3.633933308844537, "grad_norm": 3.5328409671783447, "learning_rate": 1.2061763025316105e-05, "loss": 0.4834, "step": 22261 }, { "epoch": 3.6340965674870414, "grad_norm": 3.243114709854126, "learning_rate": 1.206113565473259e-05, "loss": 0.4258, "step": 22262 }, { "epoch": 3.634259826129546, "grad_norm": 3.331080913543701, "learning_rate": 1.2060508275676527e-05, "loss": 0.4209, "step": 22263 }, { "epoch": 3.6344230847720502, "grad_norm": 3.191075086593628, "learning_rate": 1.2059880888150488e-05, "loss": 0.5072, "step": 22264 }, { "epoch": 3.6345863434145547, "grad_norm": 3.144685983657837, "learning_rate": 1.2059253492157057e-05, "loss": 0.425, "step": 22265 }, { "epoch": 3.6347496020570587, "grad_norm": 3.59053111076355, "learning_rate": 1.2058626087698814e-05, "loss": 0.5037, "step": 22266 }, { "epoch": 3.634912860699563, "grad_norm": 3.5745513439178467, "learning_rate": 1.2057998674778337e-05, "loss": 0.4505, "step": 22267 }, { "epoch": 3.6350761193420675, "grad_norm": 3.916750192642212, "learning_rate": 1.2057371253398205e-05, "loss": 0.6243, "step": 22268 }, { "epoch": 3.635239377984572, "grad_norm": 3.26114821434021, "learning_rate": 1.2056743823560995e-05, "loss": 0.4684, "step": 22269 }, { "epoch": 3.6354026366270764, "grad_norm": 3.8246710300445557, "learning_rate": 1.2056116385269287e-05, "loss": 0.6174, "step": 22270 }, { "epoch": 3.635565895269581, "grad_norm": 3.7968780994415283, "learning_rate": 1.205548893852566e-05, "loss": 0.5473, "step": 22271 }, { "epoch": 3.6357291539120853, "grad_norm": 2.717616319656372, "learning_rate": 1.2054861483332699e-05, "loss": 0.3876, "step": 22272 }, { "epoch": 3.6358924125545897, "grad_norm": 3.525956392288208, "learning_rate": 1.2054234019692976e-05, "loss": 0.4788, "step": 22273 }, { "epoch": 3.636055671197094, "grad_norm": 3.1732535362243652, "learning_rate": 1.2053606547609073e-05, "loss": 0.482, "step": 22274 }, { "epoch": 3.6362189298395986, "grad_norm": 4.255188465118408, "learning_rate": 1.2052979067083568e-05, "loss": 0.6057, "step": 22275 }, { "epoch": 3.636382188482103, "grad_norm": 3.165695905685425, "learning_rate": 1.2052351578119039e-05, "loss": 0.4699, "step": 22276 }, { "epoch": 3.636545447124607, "grad_norm": 3.3836984634399414, "learning_rate": 1.205172408071807e-05, "loss": 0.459, "step": 22277 }, { "epoch": 3.6367087057671115, "grad_norm": 3.215898036956787, "learning_rate": 1.2051096574883238e-05, "loss": 0.4701, "step": 22278 }, { "epoch": 3.636871964409616, "grad_norm": 2.9539828300476074, "learning_rate": 1.2050469060617125e-05, "loss": 0.3828, "step": 22279 }, { "epoch": 3.6370352230521203, "grad_norm": 2.514357566833496, "learning_rate": 1.2049841537922307e-05, "loss": 0.4151, "step": 22280 }, { "epoch": 3.637198481694625, "grad_norm": 3.0012593269348145, "learning_rate": 1.204921400680136e-05, "loss": 0.5345, "step": 22281 }, { "epoch": 3.6373617403371292, "grad_norm": 2.8712313175201416, "learning_rate": 1.204858646725687e-05, "loss": 0.4329, "step": 22282 }, { "epoch": 3.637524998979633, "grad_norm": 3.4859204292297363, "learning_rate": 1.2047958919291416e-05, "loss": 0.3977, "step": 22283 }, { "epoch": 3.6376882576221377, "grad_norm": 2.540386438369751, "learning_rate": 1.2047331362907577e-05, "loss": 0.4018, "step": 22284 }, { "epoch": 3.637851516264642, "grad_norm": 3.020350217819214, "learning_rate": 1.204670379810793e-05, "loss": 0.4836, "step": 22285 }, { "epoch": 3.6380147749071465, "grad_norm": 2.96844744682312, "learning_rate": 1.2046076224895058e-05, "loss": 0.4884, "step": 22286 }, { "epoch": 3.638178033549651, "grad_norm": 3.360574960708618, "learning_rate": 1.2045448643271536e-05, "loss": 0.5069, "step": 22287 }, { "epoch": 3.6383412921921554, "grad_norm": 3.1395304203033447, "learning_rate": 1.204482105323995e-05, "loss": 0.4932, "step": 22288 }, { "epoch": 3.63850455083466, "grad_norm": 3.4730896949768066, "learning_rate": 1.2044193454802877e-05, "loss": 0.4833, "step": 22289 }, { "epoch": 3.6386678094771643, "grad_norm": 3.7053561210632324, "learning_rate": 1.2043565847962895e-05, "loss": 0.4974, "step": 22290 }, { "epoch": 3.6388310681196687, "grad_norm": 3.080960750579834, "learning_rate": 1.2042938232722587e-05, "loss": 0.4164, "step": 22291 }, { "epoch": 3.638994326762173, "grad_norm": 3.877387523651123, "learning_rate": 1.2042310609084527e-05, "loss": 0.5693, "step": 22292 }, { "epoch": 3.6391575854046776, "grad_norm": 3.170414447784424, "learning_rate": 1.2041682977051303e-05, "loss": 0.4019, "step": 22293 }, { "epoch": 3.6393208440471816, "grad_norm": 2.9546103477478027, "learning_rate": 1.2041055336625494e-05, "loss": 0.4261, "step": 22294 }, { "epoch": 3.639484102689686, "grad_norm": 3.0939478874206543, "learning_rate": 1.2040427687809672e-05, "loss": 0.4725, "step": 22295 }, { "epoch": 3.6396473613321905, "grad_norm": 3.5108137130737305, "learning_rate": 1.2039800030606425e-05, "loss": 0.4549, "step": 22296 }, { "epoch": 3.639810619974695, "grad_norm": 3.0510714054107666, "learning_rate": 1.203917236501833e-05, "loss": 0.4272, "step": 22297 }, { "epoch": 3.6399738786171993, "grad_norm": 3.060387134552002, "learning_rate": 1.2038544691047965e-05, "loss": 0.4699, "step": 22298 }, { "epoch": 3.640137137259704, "grad_norm": 2.8212945461273193, "learning_rate": 1.2037917008697914e-05, "loss": 0.4341, "step": 22299 }, { "epoch": 3.640300395902208, "grad_norm": 3.149191379547119, "learning_rate": 1.2037289317970757e-05, "loss": 0.3752, "step": 22300 }, { "epoch": 3.640463654544712, "grad_norm": 3.0065860748291016, "learning_rate": 1.2036661618869073e-05, "loss": 0.4379, "step": 22301 }, { "epoch": 3.6406269131872167, "grad_norm": 3.433378219604492, "learning_rate": 1.203603391139544e-05, "loss": 0.4832, "step": 22302 }, { "epoch": 3.640790171829721, "grad_norm": 3.423813819885254, "learning_rate": 1.2035406195552441e-05, "loss": 0.4999, "step": 22303 }, { "epoch": 3.6409534304722255, "grad_norm": 3.7894532680511475, "learning_rate": 1.2034778471342656e-05, "loss": 0.5279, "step": 22304 }, { "epoch": 3.64111668911473, "grad_norm": 3.148616313934326, "learning_rate": 1.2034150738768666e-05, "loss": 0.4299, "step": 22305 }, { "epoch": 3.6412799477572344, "grad_norm": 3.403801202774048, "learning_rate": 1.203352299783305e-05, "loss": 0.5045, "step": 22306 }, { "epoch": 3.641443206399739, "grad_norm": 3.419003963470459, "learning_rate": 1.2032895248538387e-05, "loss": 0.4777, "step": 22307 }, { "epoch": 3.6416064650422433, "grad_norm": 3.2635297775268555, "learning_rate": 1.2032267490887262e-05, "loss": 0.3929, "step": 22308 }, { "epoch": 3.6417697236847477, "grad_norm": 3.387204647064209, "learning_rate": 1.203163972488225e-05, "loss": 0.4399, "step": 22309 }, { "epoch": 3.641932982327252, "grad_norm": 3.675827980041504, "learning_rate": 1.2031011950525935e-05, "loss": 0.5506, "step": 22310 }, { "epoch": 3.6420962409697566, "grad_norm": 3.754526138305664, "learning_rate": 1.2030384167820897e-05, "loss": 0.5711, "step": 22311 }, { "epoch": 3.6422594996122606, "grad_norm": 3.2942490577697754, "learning_rate": 1.2029756376769717e-05, "loss": 0.4655, "step": 22312 }, { "epoch": 3.642422758254765, "grad_norm": 3.3036046028137207, "learning_rate": 1.2029128577374975e-05, "loss": 0.4719, "step": 22313 }, { "epoch": 3.6425860168972695, "grad_norm": 3.3742783069610596, "learning_rate": 1.202850076963925e-05, "loss": 0.4898, "step": 22314 }, { "epoch": 3.642749275539774, "grad_norm": 3.6953394412994385, "learning_rate": 1.2027872953565125e-05, "loss": 0.4557, "step": 22315 }, { "epoch": 3.6429125341822783, "grad_norm": 3.32715106010437, "learning_rate": 1.2027245129155181e-05, "loss": 0.4338, "step": 22316 }, { "epoch": 3.6430757928247828, "grad_norm": 3.308134078979492, "learning_rate": 1.2026617296411998e-05, "loss": 0.4596, "step": 22317 }, { "epoch": 3.643239051467287, "grad_norm": 3.4211297035217285, "learning_rate": 1.2025989455338158e-05, "loss": 0.5571, "step": 22318 }, { "epoch": 3.643402310109791, "grad_norm": 2.8003790378570557, "learning_rate": 1.2025361605936234e-05, "loss": 0.3849, "step": 22319 }, { "epoch": 3.6435655687522956, "grad_norm": 3.6224772930145264, "learning_rate": 1.202473374820882e-05, "loss": 0.5537, "step": 22320 }, { "epoch": 3.6437288273948, "grad_norm": 3.379973888397217, "learning_rate": 1.2024105882158485e-05, "loss": 0.4853, "step": 22321 }, { "epoch": 3.6438920860373045, "grad_norm": 2.8769140243530273, "learning_rate": 1.2023478007787818e-05, "loss": 0.4148, "step": 22322 }, { "epoch": 3.644055344679809, "grad_norm": 2.85402250289917, "learning_rate": 1.2022850125099398e-05, "loss": 0.394, "step": 22323 }, { "epoch": 3.6442186033223134, "grad_norm": 4.146733283996582, "learning_rate": 1.2022222234095803e-05, "loss": 0.5503, "step": 22324 }, { "epoch": 3.644381861964818, "grad_norm": 2.8886518478393555, "learning_rate": 1.2021594334779615e-05, "loss": 0.3994, "step": 22325 }, { "epoch": 3.6445451206073223, "grad_norm": 4.127948760986328, "learning_rate": 1.2020966427153417e-05, "loss": 0.6036, "step": 22326 }, { "epoch": 3.6447083792498267, "grad_norm": 3.726524591445923, "learning_rate": 1.202033851121979e-05, "loss": 0.5296, "step": 22327 }, { "epoch": 3.644871637892331, "grad_norm": 3.09871506690979, "learning_rate": 1.2019710586981315e-05, "loss": 0.4474, "step": 22328 }, { "epoch": 3.6450348965348356, "grad_norm": 3.0454585552215576, "learning_rate": 1.201908265444057e-05, "loss": 0.4377, "step": 22329 }, { "epoch": 3.6451981551773396, "grad_norm": 2.860867738723755, "learning_rate": 1.2018454713600141e-05, "loss": 0.4804, "step": 22330 }, { "epoch": 3.645361413819844, "grad_norm": 3.1276657581329346, "learning_rate": 1.2017826764462603e-05, "loss": 0.4991, "step": 22331 }, { "epoch": 3.6455246724623485, "grad_norm": 3.3825085163116455, "learning_rate": 1.2017198807030543e-05, "loss": 0.5004, "step": 22332 }, { "epoch": 3.645687931104853, "grad_norm": 3.0988214015960693, "learning_rate": 1.2016570841306543e-05, "loss": 0.5024, "step": 22333 }, { "epoch": 3.6458511897473573, "grad_norm": 3.765659809112549, "learning_rate": 1.201594286729318e-05, "loss": 0.5714, "step": 22334 }, { "epoch": 3.6460144483898618, "grad_norm": 2.3884222507476807, "learning_rate": 1.2015314884993037e-05, "loss": 0.4266, "step": 22335 }, { "epoch": 3.6461777070323658, "grad_norm": 3.1898016929626465, "learning_rate": 1.2014686894408693e-05, "loss": 0.436, "step": 22336 }, { "epoch": 3.64634096567487, "grad_norm": 3.356579065322876, "learning_rate": 1.2014058895542735e-05, "loss": 0.4719, "step": 22337 }, { "epoch": 3.6465042243173746, "grad_norm": 3.2806968688964844, "learning_rate": 1.2013430888397742e-05, "loss": 0.4441, "step": 22338 }, { "epoch": 3.646667482959879, "grad_norm": 2.96724009513855, "learning_rate": 1.2012802872976293e-05, "loss": 0.5153, "step": 22339 }, { "epoch": 3.6468307416023835, "grad_norm": 2.4934799671173096, "learning_rate": 1.2012174849280972e-05, "loss": 0.4053, "step": 22340 }, { "epoch": 3.646994000244888, "grad_norm": 3.277859926223755, "learning_rate": 1.2011546817314358e-05, "loss": 0.4926, "step": 22341 }, { "epoch": 3.6471572588873924, "grad_norm": 3.3267385959625244, "learning_rate": 1.2010918777079037e-05, "loss": 0.5254, "step": 22342 }, { "epoch": 3.647320517529897, "grad_norm": 2.879626989364624, "learning_rate": 1.2010290728577589e-05, "loss": 0.3605, "step": 22343 }, { "epoch": 3.6474837761724013, "grad_norm": 2.927886486053467, "learning_rate": 1.2009662671812592e-05, "loss": 0.4459, "step": 22344 }, { "epoch": 3.6476470348149057, "grad_norm": 3.257310390472412, "learning_rate": 1.2009034606786634e-05, "loss": 0.4681, "step": 22345 }, { "epoch": 3.64781029345741, "grad_norm": 3.520824909210205, "learning_rate": 1.200840653350229e-05, "loss": 0.5006, "step": 22346 }, { "epoch": 3.647973552099914, "grad_norm": 2.9003279209136963, "learning_rate": 1.2007778451962146e-05, "loss": 0.4184, "step": 22347 }, { "epoch": 3.6481368107424186, "grad_norm": 2.9951977729797363, "learning_rate": 1.2007150362168785e-05, "loss": 0.4284, "step": 22348 }, { "epoch": 3.648300069384923, "grad_norm": 2.72666335105896, "learning_rate": 1.2006522264124784e-05, "loss": 0.4316, "step": 22349 }, { "epoch": 3.6484633280274275, "grad_norm": 3.180598497390747, "learning_rate": 1.200589415783273e-05, "loss": 0.435, "step": 22350 }, { "epoch": 3.648626586669932, "grad_norm": 3.4066367149353027, "learning_rate": 1.20052660432952e-05, "loss": 0.4488, "step": 22351 }, { "epoch": 3.6487898453124363, "grad_norm": 4.400996685028076, "learning_rate": 1.2004637920514781e-05, "loss": 0.4991, "step": 22352 }, { "epoch": 3.6489531039549408, "grad_norm": 2.9768309593200684, "learning_rate": 1.2004009789494052e-05, "loss": 0.4227, "step": 22353 }, { "epoch": 3.6491163625974448, "grad_norm": 3.2122578620910645, "learning_rate": 1.2003381650235593e-05, "loss": 0.4625, "step": 22354 }, { "epoch": 3.649279621239949, "grad_norm": 3.341952323913574, "learning_rate": 1.200275350274199e-05, "loss": 0.4114, "step": 22355 }, { "epoch": 3.6494428798824536, "grad_norm": 2.8911733627319336, "learning_rate": 1.2002125347015822e-05, "loss": 0.4399, "step": 22356 }, { "epoch": 3.649606138524958, "grad_norm": 2.9801926612854004, "learning_rate": 1.2001497183059678e-05, "loss": 0.4302, "step": 22357 }, { "epoch": 3.6497693971674625, "grad_norm": 2.9636144638061523, "learning_rate": 1.200086901087613e-05, "loss": 0.3667, "step": 22358 }, { "epoch": 3.649932655809967, "grad_norm": 3.3609156608581543, "learning_rate": 1.2000240830467766e-05, "loss": 0.4912, "step": 22359 }, { "epoch": 3.6500959144524714, "grad_norm": 3.042001247406006, "learning_rate": 1.1999612641837168e-05, "loss": 0.4301, "step": 22360 }, { "epoch": 3.650259173094976, "grad_norm": 3.3632349967956543, "learning_rate": 1.1998984444986917e-05, "loss": 0.4627, "step": 22361 }, { "epoch": 3.6504224317374803, "grad_norm": 3.2240631580352783, "learning_rate": 1.1998356239919597e-05, "loss": 0.4494, "step": 22362 }, { "epoch": 3.6505856903799847, "grad_norm": 3.138868570327759, "learning_rate": 1.1997728026637785e-05, "loss": 0.4191, "step": 22363 }, { "epoch": 3.650748949022489, "grad_norm": 3.9726722240448, "learning_rate": 1.1997099805144071e-05, "loss": 0.5981, "step": 22364 }, { "epoch": 3.650912207664993, "grad_norm": 3.857666492462158, "learning_rate": 1.1996471575441033e-05, "loss": 0.4485, "step": 22365 }, { "epoch": 3.6510754663074976, "grad_norm": 2.8671674728393555, "learning_rate": 1.1995843337531254e-05, "loss": 0.4111, "step": 22366 }, { "epoch": 3.651238724950002, "grad_norm": 2.9411532878875732, "learning_rate": 1.1995215091417318e-05, "loss": 0.4033, "step": 22367 }, { "epoch": 3.6514019835925065, "grad_norm": 3.0536320209503174, "learning_rate": 1.1994586837101806e-05, "loss": 0.4789, "step": 22368 }, { "epoch": 3.651565242235011, "grad_norm": 2.7132225036621094, "learning_rate": 1.1993958574587296e-05, "loss": 0.4461, "step": 22369 }, { "epoch": 3.6517285008775153, "grad_norm": 3.0990428924560547, "learning_rate": 1.199333030387638e-05, "loss": 0.4438, "step": 22370 }, { "epoch": 3.6518917595200193, "grad_norm": 3.6308953762054443, "learning_rate": 1.1992702024971634e-05, "loss": 0.5001, "step": 22371 }, { "epoch": 3.6520550181625238, "grad_norm": 4.117944240570068, "learning_rate": 1.1992073737875646e-05, "loss": 0.4438, "step": 22372 }, { "epoch": 3.652218276805028, "grad_norm": 3.1502268314361572, "learning_rate": 1.1991445442590991e-05, "loss": 0.5233, "step": 22373 }, { "epoch": 3.6523815354475326, "grad_norm": 3.3711633682250977, "learning_rate": 1.199081713912026e-05, "loss": 0.4751, "step": 22374 }, { "epoch": 3.652544794090037, "grad_norm": 3.1914267539978027, "learning_rate": 1.1990188827466025e-05, "loss": 0.461, "step": 22375 }, { "epoch": 3.6527080527325415, "grad_norm": 2.9550232887268066, "learning_rate": 1.1989560507630877e-05, "loss": 0.4237, "step": 22376 }, { "epoch": 3.652871311375046, "grad_norm": 3.7666327953338623, "learning_rate": 1.1988932179617401e-05, "loss": 0.4651, "step": 22377 }, { "epoch": 3.6530345700175504, "grad_norm": 3.0416460037231445, "learning_rate": 1.1988303843428173e-05, "loss": 0.4421, "step": 22378 }, { "epoch": 3.653197828660055, "grad_norm": 3.873072385787964, "learning_rate": 1.198767549906578e-05, "loss": 0.5988, "step": 22379 }, { "epoch": 3.6533610873025593, "grad_norm": 3.9783835411071777, "learning_rate": 1.19870471465328e-05, "loss": 0.4949, "step": 22380 }, { "epoch": 3.6535243459450637, "grad_norm": 3.252838611602783, "learning_rate": 1.1986418785831822e-05, "loss": 0.6006, "step": 22381 }, { "epoch": 3.6536876045875677, "grad_norm": 2.8442399501800537, "learning_rate": 1.1985790416965428e-05, "loss": 0.4255, "step": 22382 }, { "epoch": 3.653850863230072, "grad_norm": 2.570435047149658, "learning_rate": 1.1985162039936198e-05, "loss": 0.3654, "step": 22383 }, { "epoch": 3.6540141218725766, "grad_norm": 3.157989263534546, "learning_rate": 1.1984533654746718e-05, "loss": 0.4464, "step": 22384 }, { "epoch": 3.654177380515081, "grad_norm": 2.875689744949341, "learning_rate": 1.1983905261399566e-05, "loss": 0.3903, "step": 22385 }, { "epoch": 3.6543406391575854, "grad_norm": 3.780606508255005, "learning_rate": 1.198327685989733e-05, "loss": 0.5133, "step": 22386 }, { "epoch": 3.65450389780009, "grad_norm": 3.782701253890991, "learning_rate": 1.1982648450242593e-05, "loss": 0.5797, "step": 22387 }, { "epoch": 3.6546671564425943, "grad_norm": 3.314948797225952, "learning_rate": 1.1982020032437936e-05, "loss": 0.4683, "step": 22388 }, { "epoch": 3.6548304150850983, "grad_norm": 3.230105400085449, "learning_rate": 1.1981391606485943e-05, "loss": 0.4509, "step": 22389 }, { "epoch": 3.6549936737276028, "grad_norm": 2.945833683013916, "learning_rate": 1.1980763172389198e-05, "loss": 0.4525, "step": 22390 }, { "epoch": 3.655156932370107, "grad_norm": 3.1204357147216797, "learning_rate": 1.1980134730150282e-05, "loss": 0.4646, "step": 22391 }, { "epoch": 3.6553201910126116, "grad_norm": 2.6519782543182373, "learning_rate": 1.197950627977178e-05, "loss": 0.4159, "step": 22392 }, { "epoch": 3.655483449655116, "grad_norm": 3.6379387378692627, "learning_rate": 1.1978877821256276e-05, "loss": 0.5192, "step": 22393 }, { "epoch": 3.6556467082976205, "grad_norm": 3.6242165565490723, "learning_rate": 1.1978249354606353e-05, "loss": 0.5802, "step": 22394 }, { "epoch": 3.655809966940125, "grad_norm": 2.713780641555786, "learning_rate": 1.1977620879824591e-05, "loss": 0.3726, "step": 22395 }, { "epoch": 3.6559732255826294, "grad_norm": 2.6707658767700195, "learning_rate": 1.197699239691358e-05, "loss": 0.4212, "step": 22396 }, { "epoch": 3.656136484225134, "grad_norm": 3.0534684658050537, "learning_rate": 1.1976363905875899e-05, "loss": 0.4671, "step": 22397 }, { "epoch": 3.6562997428676383, "grad_norm": 2.4352612495422363, "learning_rate": 1.197573540671413e-05, "loss": 0.4093, "step": 22398 }, { "epoch": 3.6564630015101427, "grad_norm": 3.298588514328003, "learning_rate": 1.1975106899430862e-05, "loss": 0.5242, "step": 22399 }, { "epoch": 3.6566262601526467, "grad_norm": 2.921806573867798, "learning_rate": 1.1974478384028672e-05, "loss": 0.4314, "step": 22400 }, { "epoch": 3.656789518795151, "grad_norm": 2.6837990283966064, "learning_rate": 1.1973849860510152e-05, "loss": 0.4347, "step": 22401 }, { "epoch": 3.6569527774376556, "grad_norm": 3.0736358165740967, "learning_rate": 1.1973221328877874e-05, "loss": 0.4112, "step": 22402 }, { "epoch": 3.65711603608016, "grad_norm": 4.4239044189453125, "learning_rate": 1.1972592789134431e-05, "loss": 0.5983, "step": 22403 }, { "epoch": 3.6572792947226644, "grad_norm": 3.051578998565674, "learning_rate": 1.1971964241282403e-05, "loss": 0.4749, "step": 22404 }, { "epoch": 3.657442553365169, "grad_norm": 4.250287055969238, "learning_rate": 1.1971335685324377e-05, "loss": 0.5958, "step": 22405 }, { "epoch": 3.6576058120076733, "grad_norm": 3.2690858840942383, "learning_rate": 1.1970707121262934e-05, "loss": 0.4535, "step": 22406 }, { "epoch": 3.6577690706501773, "grad_norm": 3.809199810028076, "learning_rate": 1.1970078549100657e-05, "loss": 0.5367, "step": 22407 }, { "epoch": 3.6579323292926818, "grad_norm": 3.248500108718872, "learning_rate": 1.196944996884013e-05, "loss": 0.5097, "step": 22408 }, { "epoch": 3.658095587935186, "grad_norm": 3.0497095584869385, "learning_rate": 1.196882138048394e-05, "loss": 0.5403, "step": 22409 }, { "epoch": 3.6582588465776906, "grad_norm": 2.677208185195923, "learning_rate": 1.1968192784034667e-05, "loss": 0.4596, "step": 22410 }, { "epoch": 3.658422105220195, "grad_norm": 2.8935112953186035, "learning_rate": 1.19675641794949e-05, "loss": 0.4972, "step": 22411 }, { "epoch": 3.6585853638626995, "grad_norm": 3.0321409702301025, "learning_rate": 1.1966935566867215e-05, "loss": 0.5034, "step": 22412 }, { "epoch": 3.658748622505204, "grad_norm": 3.255011796951294, "learning_rate": 1.19663069461542e-05, "loss": 0.4272, "step": 22413 }, { "epoch": 3.6589118811477084, "grad_norm": 3.0019447803497314, "learning_rate": 1.1965678317358442e-05, "loss": 0.4348, "step": 22414 }, { "epoch": 3.659075139790213, "grad_norm": 2.815000534057617, "learning_rate": 1.1965049680482523e-05, "loss": 0.4208, "step": 22415 }, { "epoch": 3.6592383984327173, "grad_norm": 3.370455265045166, "learning_rate": 1.196442103552903e-05, "loss": 0.5291, "step": 22416 }, { "epoch": 3.6594016570752217, "grad_norm": 2.8506903648376465, "learning_rate": 1.1963792382500542e-05, "loss": 0.4624, "step": 22417 }, { "epoch": 3.6595649157177257, "grad_norm": 3.245924472808838, "learning_rate": 1.1963163721399643e-05, "loss": 0.4143, "step": 22418 }, { "epoch": 3.65972817436023, "grad_norm": 2.9247140884399414, "learning_rate": 1.1962535052228916e-05, "loss": 0.4333, "step": 22419 }, { "epoch": 3.6598914330027346, "grad_norm": 3.356942653656006, "learning_rate": 1.1961906374990952e-05, "loss": 0.5083, "step": 22420 }, { "epoch": 3.660054691645239, "grad_norm": 3.2955539226531982, "learning_rate": 1.1961277689688333e-05, "loss": 0.428, "step": 22421 }, { "epoch": 3.6602179502877434, "grad_norm": 3.224565267562866, "learning_rate": 1.1960648996323641e-05, "loss": 0.4979, "step": 22422 }, { "epoch": 3.660381208930248, "grad_norm": 3.4279537200927734, "learning_rate": 1.1960020294899461e-05, "loss": 0.4858, "step": 22423 }, { "epoch": 3.660544467572752, "grad_norm": 3.183116912841797, "learning_rate": 1.1959391585418378e-05, "loss": 0.4499, "step": 22424 }, { "epoch": 3.6607077262152563, "grad_norm": 3.176689863204956, "learning_rate": 1.1958762867882974e-05, "loss": 0.4047, "step": 22425 }, { "epoch": 3.6608709848577607, "grad_norm": 3.7814464569091797, "learning_rate": 1.195813414229584e-05, "loss": 0.4515, "step": 22426 }, { "epoch": 3.661034243500265, "grad_norm": 2.921605348587036, "learning_rate": 1.1957505408659554e-05, "loss": 0.3769, "step": 22427 }, { "epoch": 3.6611975021427696, "grad_norm": 3.5013427734375, "learning_rate": 1.19568766669767e-05, "loss": 0.4725, "step": 22428 }, { "epoch": 3.661360760785274, "grad_norm": 3.0596988201141357, "learning_rate": 1.1956247917249865e-05, "loss": 0.4758, "step": 22429 }, { "epoch": 3.6615240194277785, "grad_norm": 3.0045881271362305, "learning_rate": 1.1955619159481635e-05, "loss": 0.4878, "step": 22430 }, { "epoch": 3.661687278070283, "grad_norm": 2.93527889251709, "learning_rate": 1.1954990393674594e-05, "loss": 0.3888, "step": 22431 }, { "epoch": 3.6618505367127874, "grad_norm": 2.830429792404175, "learning_rate": 1.1954361619831325e-05, "loss": 0.4227, "step": 22432 }, { "epoch": 3.662013795355292, "grad_norm": 3.151015281677246, "learning_rate": 1.1953732837954412e-05, "loss": 0.4285, "step": 22433 }, { "epoch": 3.6621770539977962, "grad_norm": 3.673769474029541, "learning_rate": 1.1953104048046441e-05, "loss": 0.563, "step": 22434 }, { "epoch": 3.6623403126403002, "grad_norm": 3.426650285720825, "learning_rate": 1.1952475250110001e-05, "loss": 0.4655, "step": 22435 }, { "epoch": 3.6625035712828047, "grad_norm": 3.1478018760681152, "learning_rate": 1.1951846444147667e-05, "loss": 0.4351, "step": 22436 }, { "epoch": 3.662666829925309, "grad_norm": 3.5012259483337402, "learning_rate": 1.1951217630162033e-05, "loss": 0.5477, "step": 22437 }, { "epoch": 3.6628300885678136, "grad_norm": 3.061429500579834, "learning_rate": 1.195058880815568e-05, "loss": 0.4187, "step": 22438 }, { "epoch": 3.662993347210318, "grad_norm": 2.8373632431030273, "learning_rate": 1.194995997813119e-05, "loss": 0.3981, "step": 22439 }, { "epoch": 3.6631566058528224, "grad_norm": 2.888165235519409, "learning_rate": 1.1949331140091154e-05, "loss": 0.4852, "step": 22440 }, { "epoch": 3.663319864495327, "grad_norm": 2.8246302604675293, "learning_rate": 1.194870229403815e-05, "loss": 0.4634, "step": 22441 }, { "epoch": 3.663483123137831, "grad_norm": 3.8959414958953857, "learning_rate": 1.194807343997477e-05, "loss": 0.5219, "step": 22442 }, { "epoch": 3.6636463817803353, "grad_norm": 3.492793321609497, "learning_rate": 1.1947444577903596e-05, "loss": 0.4671, "step": 22443 }, { "epoch": 3.6638096404228397, "grad_norm": 3.310044527053833, "learning_rate": 1.1946815707827209e-05, "loss": 0.4588, "step": 22444 }, { "epoch": 3.663972899065344, "grad_norm": 3.02398943901062, "learning_rate": 1.1946186829748202e-05, "loss": 0.514, "step": 22445 }, { "epoch": 3.6641361577078486, "grad_norm": 3.138026475906372, "learning_rate": 1.1945557943669153e-05, "loss": 0.4611, "step": 22446 }, { "epoch": 3.664299416350353, "grad_norm": 3.2336173057556152, "learning_rate": 1.194492904959265e-05, "loss": 0.4987, "step": 22447 }, { "epoch": 3.6644626749928575, "grad_norm": 3.6401302814483643, "learning_rate": 1.1944300147521278e-05, "loss": 0.4706, "step": 22448 }, { "epoch": 3.664625933635362, "grad_norm": 3.3345844745635986, "learning_rate": 1.1943671237457624e-05, "loss": 0.4885, "step": 22449 }, { "epoch": 3.6647891922778664, "grad_norm": 3.4493181705474854, "learning_rate": 1.1943042319404272e-05, "loss": 0.5346, "step": 22450 }, { "epoch": 3.664952450920371, "grad_norm": 2.880178451538086, "learning_rate": 1.1942413393363803e-05, "loss": 0.4545, "step": 22451 }, { "epoch": 3.6651157095628752, "grad_norm": 3.3121705055236816, "learning_rate": 1.1941784459338807e-05, "loss": 0.4879, "step": 22452 }, { "epoch": 3.6652789682053792, "grad_norm": 3.0010814666748047, "learning_rate": 1.1941155517331871e-05, "loss": 0.4607, "step": 22453 }, { "epoch": 3.6654422268478837, "grad_norm": 3.1379003524780273, "learning_rate": 1.1940526567345577e-05, "loss": 0.4718, "step": 22454 }, { "epoch": 3.665605485490388, "grad_norm": 3.480984926223755, "learning_rate": 1.1939897609382512e-05, "loss": 0.5525, "step": 22455 }, { "epoch": 3.6657687441328926, "grad_norm": 3.3252556324005127, "learning_rate": 1.1939268643445256e-05, "loss": 0.4752, "step": 22456 }, { "epoch": 3.665932002775397, "grad_norm": 3.4569783210754395, "learning_rate": 1.19386396695364e-05, "loss": 0.4662, "step": 22457 }, { "epoch": 3.6660952614179014, "grad_norm": 3.0332541465759277, "learning_rate": 1.193801068765853e-05, "loss": 0.3953, "step": 22458 }, { "epoch": 3.6662585200604054, "grad_norm": 3.013347625732422, "learning_rate": 1.193738169781423e-05, "loss": 0.3696, "step": 22459 }, { "epoch": 3.66642177870291, "grad_norm": 3.3682873249053955, "learning_rate": 1.1936752700006087e-05, "loss": 0.4773, "step": 22460 }, { "epoch": 3.6665850373454143, "grad_norm": 2.4159188270568848, "learning_rate": 1.1936123694236683e-05, "loss": 0.4079, "step": 22461 }, { "epoch": 3.6667482959879187, "grad_norm": 3.4643096923828125, "learning_rate": 1.1935494680508606e-05, "loss": 0.4672, "step": 22462 }, { "epoch": 3.666911554630423, "grad_norm": 3.0321719646453857, "learning_rate": 1.1934865658824438e-05, "loss": 0.4535, "step": 22463 }, { "epoch": 3.6670748132729276, "grad_norm": 3.626330614089966, "learning_rate": 1.1934236629186771e-05, "loss": 0.5821, "step": 22464 }, { "epoch": 3.667238071915432, "grad_norm": 3.446842670440674, "learning_rate": 1.1933607591598188e-05, "loss": 0.5006, "step": 22465 }, { "epoch": 3.6674013305579365, "grad_norm": 3.4429171085357666, "learning_rate": 1.1932978546061274e-05, "loss": 0.5113, "step": 22466 }, { "epoch": 3.667564589200441, "grad_norm": 2.9322433471679688, "learning_rate": 1.1932349492578614e-05, "loss": 0.415, "step": 22467 }, { "epoch": 3.6677278478429454, "grad_norm": 3.1508278846740723, "learning_rate": 1.1931720431152795e-05, "loss": 0.5608, "step": 22468 }, { "epoch": 3.66789110648545, "grad_norm": 3.43269944190979, "learning_rate": 1.1931091361786402e-05, "loss": 0.4642, "step": 22469 }, { "epoch": 3.668054365127954, "grad_norm": 3.4596588611602783, "learning_rate": 1.1930462284482026e-05, "loss": 0.497, "step": 22470 }, { "epoch": 3.6682176237704582, "grad_norm": 3.1394100189208984, "learning_rate": 1.1929833199242242e-05, "loss": 0.4447, "step": 22471 }, { "epoch": 3.6683808824129627, "grad_norm": 3.637718915939331, "learning_rate": 1.1929204106069647e-05, "loss": 0.4368, "step": 22472 }, { "epoch": 3.668544141055467, "grad_norm": 4.324160575866699, "learning_rate": 1.192857500496682e-05, "loss": 0.7715, "step": 22473 }, { "epoch": 3.6687073996979715, "grad_norm": 2.731872320175171, "learning_rate": 1.1927945895936348e-05, "loss": 0.434, "step": 22474 }, { "epoch": 3.668870658340476, "grad_norm": 3.227600574493408, "learning_rate": 1.1927316778980825e-05, "loss": 0.4983, "step": 22475 }, { "epoch": 3.6690339169829804, "grad_norm": 3.4808151721954346, "learning_rate": 1.1926687654102825e-05, "loss": 0.5634, "step": 22476 }, { "epoch": 3.6691971756254844, "grad_norm": 3.580284595489502, "learning_rate": 1.192605852130494e-05, "loss": 0.528, "step": 22477 }, { "epoch": 3.669360434267989, "grad_norm": 2.964228391647339, "learning_rate": 1.1925429380589757e-05, "loss": 0.4503, "step": 22478 }, { "epoch": 3.6695236929104933, "grad_norm": 2.0618276596069336, "learning_rate": 1.1924800231959859e-05, "loss": 0.3297, "step": 22479 }, { "epoch": 3.6696869515529977, "grad_norm": 3.0305848121643066, "learning_rate": 1.1924171075417836e-05, "loss": 0.3933, "step": 22480 }, { "epoch": 3.669850210195502, "grad_norm": 3.3304829597473145, "learning_rate": 1.1923541910966272e-05, "loss": 0.5086, "step": 22481 }, { "epoch": 3.6700134688380066, "grad_norm": 3.561493396759033, "learning_rate": 1.1922912738607753e-05, "loss": 0.5252, "step": 22482 }, { "epoch": 3.670176727480511, "grad_norm": 3.3076698780059814, "learning_rate": 1.1922283558344865e-05, "loss": 0.4749, "step": 22483 }, { "epoch": 3.6703399861230155, "grad_norm": 3.228501081466675, "learning_rate": 1.1921654370180199e-05, "loss": 0.4903, "step": 22484 }, { "epoch": 3.67050324476552, "grad_norm": 3.3051254749298096, "learning_rate": 1.1921025174116331e-05, "loss": 0.4566, "step": 22485 }, { "epoch": 3.6706665034080244, "grad_norm": 2.809769868850708, "learning_rate": 1.1920395970155857e-05, "loss": 0.4021, "step": 22486 }, { "epoch": 3.670829762050529, "grad_norm": 3.1995656490325928, "learning_rate": 1.1919766758301363e-05, "loss": 0.5187, "step": 22487 }, { "epoch": 3.670993020693033, "grad_norm": 3.684014320373535, "learning_rate": 1.191913753855543e-05, "loss": 0.5142, "step": 22488 }, { "epoch": 3.6711562793355372, "grad_norm": 3.0841550827026367, "learning_rate": 1.191850831092065e-05, "loss": 0.4098, "step": 22489 }, { "epoch": 3.6713195379780417, "grad_norm": 3.4468088150024414, "learning_rate": 1.19178790753996e-05, "loss": 0.5058, "step": 22490 }, { "epoch": 3.671482796620546, "grad_norm": 3.480008602142334, "learning_rate": 1.191724983199488e-05, "loss": 0.4442, "step": 22491 }, { "epoch": 3.6716460552630505, "grad_norm": 2.886413097381592, "learning_rate": 1.1916620580709067e-05, "loss": 0.3706, "step": 22492 }, { "epoch": 3.671809313905555, "grad_norm": 3.379711151123047, "learning_rate": 1.1915991321544752e-05, "loss": 0.5033, "step": 22493 }, { "epoch": 3.6719725725480594, "grad_norm": 3.992396354675293, "learning_rate": 1.1915362054504521e-05, "loss": 0.5715, "step": 22494 }, { "epoch": 3.6721358311905634, "grad_norm": 2.91068434715271, "learning_rate": 1.1914732779590956e-05, "loss": 0.3892, "step": 22495 }, { "epoch": 3.672299089833068, "grad_norm": 3.1548702716827393, "learning_rate": 1.1914103496806648e-05, "loss": 0.424, "step": 22496 }, { "epoch": 3.6724623484755723, "grad_norm": 2.877908229827881, "learning_rate": 1.1913474206154185e-05, "loss": 0.4427, "step": 22497 }, { "epoch": 3.6726256071180767, "grad_norm": 3.2396647930145264, "learning_rate": 1.1912844907636151e-05, "loss": 0.4218, "step": 22498 }, { "epoch": 3.672788865760581, "grad_norm": 2.8201651573181152, "learning_rate": 1.1912215601255137e-05, "loss": 0.428, "step": 22499 }, { "epoch": 3.6729521244030856, "grad_norm": 3.6165051460266113, "learning_rate": 1.1911586287013726e-05, "loss": 0.4921, "step": 22500 }, { "epoch": 3.67311538304559, "grad_norm": 3.8128461837768555, "learning_rate": 1.1910956964914502e-05, "loss": 0.5592, "step": 22501 }, { "epoch": 3.6732786416880945, "grad_norm": 3.637249708175659, "learning_rate": 1.1910327634960057e-05, "loss": 0.5086, "step": 22502 }, { "epoch": 3.673441900330599, "grad_norm": 2.9455833435058594, "learning_rate": 1.1909698297152976e-05, "loss": 0.4234, "step": 22503 }, { "epoch": 3.6736051589731034, "grad_norm": 3.014124631881714, "learning_rate": 1.190906895149585e-05, "loss": 0.4386, "step": 22504 }, { "epoch": 3.673768417615608, "grad_norm": 3.2287638187408447, "learning_rate": 1.1908439597991262e-05, "loss": 0.4706, "step": 22505 }, { "epoch": 3.673931676258112, "grad_norm": 3.075305223464966, "learning_rate": 1.1907810236641795e-05, "loss": 0.4472, "step": 22506 }, { "epoch": 3.6740949349006162, "grad_norm": 3.383713483810425, "learning_rate": 1.1907180867450042e-05, "loss": 0.4822, "step": 22507 }, { "epoch": 3.6742581935431207, "grad_norm": 2.759449005126953, "learning_rate": 1.1906551490418588e-05, "loss": 0.4022, "step": 22508 }, { "epoch": 3.674421452185625, "grad_norm": 3.4274656772613525, "learning_rate": 1.1905922105550025e-05, "loss": 0.4654, "step": 22509 }, { "epoch": 3.6745847108281295, "grad_norm": 2.9182655811309814, "learning_rate": 1.1905292712846932e-05, "loss": 0.361, "step": 22510 }, { "epoch": 3.674747969470634, "grad_norm": 3.5863513946533203, "learning_rate": 1.1904663312311902e-05, "loss": 0.4394, "step": 22511 }, { "epoch": 3.674911228113138, "grad_norm": 3.2897419929504395, "learning_rate": 1.1904033903947519e-05, "loss": 0.529, "step": 22512 }, { "epoch": 3.6750744867556424, "grad_norm": 3.6214725971221924, "learning_rate": 1.190340448775637e-05, "loss": 0.5346, "step": 22513 }, { "epoch": 3.675237745398147, "grad_norm": 3.179226875305176, "learning_rate": 1.1902775063741048e-05, "loss": 0.449, "step": 22514 }, { "epoch": 3.6754010040406513, "grad_norm": 3.6721560955047607, "learning_rate": 1.1902145631904131e-05, "loss": 0.4801, "step": 22515 }, { "epoch": 3.6755642626831557, "grad_norm": 3.0271658897399902, "learning_rate": 1.1901516192248217e-05, "loss": 0.486, "step": 22516 }, { "epoch": 3.67572752132566, "grad_norm": 3.2003159523010254, "learning_rate": 1.1900886744775884e-05, "loss": 0.5106, "step": 22517 }, { "epoch": 3.6758907799681646, "grad_norm": 3.1753437519073486, "learning_rate": 1.1900257289489723e-05, "loss": 0.4817, "step": 22518 }, { "epoch": 3.676054038610669, "grad_norm": 2.8292181491851807, "learning_rate": 1.1899627826392326e-05, "loss": 0.3641, "step": 22519 }, { "epoch": 3.6762172972531735, "grad_norm": 3.2878575325012207, "learning_rate": 1.1898998355486274e-05, "loss": 0.4372, "step": 22520 }, { "epoch": 3.676380555895678, "grad_norm": 3.4163107872009277, "learning_rate": 1.1898368876774156e-05, "loss": 0.4241, "step": 22521 }, { "epoch": 3.6765438145381824, "grad_norm": 3.7464656829833984, "learning_rate": 1.189773939025856e-05, "loss": 0.8666, "step": 22522 }, { "epoch": 3.6767070731806863, "grad_norm": 3.4889917373657227, "learning_rate": 1.1897109895942076e-05, "loss": 0.4969, "step": 22523 }, { "epoch": 3.676870331823191, "grad_norm": 3.1730148792266846, "learning_rate": 1.1896480393827288e-05, "loss": 0.5068, "step": 22524 }, { "epoch": 3.6770335904656952, "grad_norm": 3.0761499404907227, "learning_rate": 1.1895850883916786e-05, "loss": 0.3941, "step": 22525 }, { "epoch": 3.6771968491081997, "grad_norm": 3.6640045642852783, "learning_rate": 1.1895221366213157e-05, "loss": 0.5346, "step": 22526 }, { "epoch": 3.677360107750704, "grad_norm": 3.994832992553711, "learning_rate": 1.1894591840718987e-05, "loss": 0.5524, "step": 22527 }, { "epoch": 3.6775233663932085, "grad_norm": 3.7055556774139404, "learning_rate": 1.1893962307436869e-05, "loss": 0.4664, "step": 22528 }, { "epoch": 3.677686625035713, "grad_norm": 3.1141891479492188, "learning_rate": 1.1893332766369382e-05, "loss": 0.377, "step": 22529 }, { "epoch": 3.677849883678217, "grad_norm": 2.696669340133667, "learning_rate": 1.1892703217519122e-05, "loss": 0.3843, "step": 22530 }, { "epoch": 3.6780131423207214, "grad_norm": 3.2616524696350098, "learning_rate": 1.1892073660888674e-05, "loss": 0.4834, "step": 22531 }, { "epoch": 3.678176400963226, "grad_norm": 4.023317337036133, "learning_rate": 1.1891444096480625e-05, "loss": 0.4997, "step": 22532 }, { "epoch": 3.6783396596057303, "grad_norm": 2.818706750869751, "learning_rate": 1.1890814524297565e-05, "loss": 0.4386, "step": 22533 }, { "epoch": 3.6785029182482347, "grad_norm": 3.3829729557037354, "learning_rate": 1.1890184944342076e-05, "loss": 0.4593, "step": 22534 }, { "epoch": 3.678666176890739, "grad_norm": 3.2367653846740723, "learning_rate": 1.1889555356616753e-05, "loss": 0.4455, "step": 22535 }, { "epoch": 3.6788294355332436, "grad_norm": 3.3256235122680664, "learning_rate": 1.1888925761124181e-05, "loss": 0.4411, "step": 22536 }, { "epoch": 3.678992694175748, "grad_norm": 3.9398651123046875, "learning_rate": 1.188829615786695e-05, "loss": 0.5595, "step": 22537 }, { "epoch": 3.6791559528182525, "grad_norm": 3.1639950275421143, "learning_rate": 1.1887666546847649e-05, "loss": 0.4637, "step": 22538 }, { "epoch": 3.679319211460757, "grad_norm": 3.4861114025115967, "learning_rate": 1.188703692806886e-05, "loss": 0.5285, "step": 22539 }, { "epoch": 3.6794824701032613, "grad_norm": 3.221813440322876, "learning_rate": 1.1886407301533171e-05, "loss": 0.4728, "step": 22540 }, { "epoch": 3.6796457287457653, "grad_norm": 3.6740150451660156, "learning_rate": 1.1885777667243178e-05, "loss": 0.5264, "step": 22541 }, { "epoch": 3.67980898738827, "grad_norm": 2.9760549068450928, "learning_rate": 1.1885148025201465e-05, "loss": 0.4303, "step": 22542 }, { "epoch": 3.679972246030774, "grad_norm": 3.1833131313323975, "learning_rate": 1.1884518375410624e-05, "loss": 0.4333, "step": 22543 }, { "epoch": 3.6801355046732787, "grad_norm": 3.4971020221710205, "learning_rate": 1.1883888717873235e-05, "loss": 0.5587, "step": 22544 }, { "epoch": 3.680298763315783, "grad_norm": 3.746544361114502, "learning_rate": 1.188325905259189e-05, "loss": 0.4449, "step": 22545 }, { "epoch": 3.6804620219582875, "grad_norm": 3.272974729537964, "learning_rate": 1.188262937956918e-05, "loss": 0.4779, "step": 22546 }, { "epoch": 3.680625280600792, "grad_norm": 2.6045405864715576, "learning_rate": 1.1881999698807689e-05, "loss": 0.4205, "step": 22547 }, { "epoch": 3.680788539243296, "grad_norm": 2.505237579345703, "learning_rate": 1.1881370010310015e-05, "loss": 0.4077, "step": 22548 }, { "epoch": 3.6809517978858004, "grad_norm": 3.476706027984619, "learning_rate": 1.1880740314078732e-05, "loss": 0.5747, "step": 22549 }, { "epoch": 3.681115056528305, "grad_norm": 2.724360227584839, "learning_rate": 1.1880110610116438e-05, "loss": 0.4365, "step": 22550 }, { "epoch": 3.6812783151708093, "grad_norm": 3.2375810146331787, "learning_rate": 1.1879480898425717e-05, "loss": 0.5085, "step": 22551 }, { "epoch": 3.6814415738133137, "grad_norm": 3.374915599822998, "learning_rate": 1.1878851179009163e-05, "loss": 0.5404, "step": 22552 }, { "epoch": 3.681604832455818, "grad_norm": 2.7534070014953613, "learning_rate": 1.187822145186936e-05, "loss": 0.3551, "step": 22553 }, { "epoch": 3.6817680910983226, "grad_norm": 3.2050490379333496, "learning_rate": 1.1877591717008899e-05, "loss": 0.4355, "step": 22554 }, { "epoch": 3.681931349740827, "grad_norm": 2.9557278156280518, "learning_rate": 1.1876961974430368e-05, "loss": 0.437, "step": 22555 }, { "epoch": 3.6820946083833315, "grad_norm": 3.021843910217285, "learning_rate": 1.187633222413635e-05, "loss": 0.4293, "step": 22556 }, { "epoch": 3.682257867025836, "grad_norm": 3.5417938232421875, "learning_rate": 1.1875702466129442e-05, "loss": 0.4784, "step": 22557 }, { "epoch": 3.6824211256683403, "grad_norm": 3.105511426925659, "learning_rate": 1.1875072700412231e-05, "loss": 0.475, "step": 22558 }, { "epoch": 3.6825843843108443, "grad_norm": 3.3957877159118652, "learning_rate": 1.1874442926987304e-05, "loss": 0.5333, "step": 22559 }, { "epoch": 3.6827476429533488, "grad_norm": 2.9543745517730713, "learning_rate": 1.187381314585725e-05, "loss": 0.4346, "step": 22560 }, { "epoch": 3.682910901595853, "grad_norm": 3.381028413772583, "learning_rate": 1.1873183357024653e-05, "loss": 0.527, "step": 22561 }, { "epoch": 3.6830741602383577, "grad_norm": 3.5184707641601562, "learning_rate": 1.1872553560492108e-05, "loss": 0.5395, "step": 22562 }, { "epoch": 3.683237418880862, "grad_norm": 3.159236192703247, "learning_rate": 1.1871923756262207e-05, "loss": 0.4452, "step": 22563 }, { "epoch": 3.6834006775233665, "grad_norm": 3.5053699016571045, "learning_rate": 1.1871293944337533e-05, "loss": 0.4939, "step": 22564 }, { "epoch": 3.6835639361658705, "grad_norm": 3.269893169403076, "learning_rate": 1.1870664124720675e-05, "loss": 0.4884, "step": 22565 }, { "epoch": 3.683727194808375, "grad_norm": 3.367945432662964, "learning_rate": 1.1870034297414221e-05, "loss": 0.4561, "step": 22566 }, { "epoch": 3.6838904534508794, "grad_norm": 3.246373414993286, "learning_rate": 1.1869404462420764e-05, "loss": 0.4331, "step": 22567 }, { "epoch": 3.684053712093384, "grad_norm": 3.070761203765869, "learning_rate": 1.186877461974289e-05, "loss": 0.388, "step": 22568 }, { "epoch": 3.6842169707358883, "grad_norm": 2.8048198223114014, "learning_rate": 1.1868144769383192e-05, "loss": 0.4023, "step": 22569 }, { "epoch": 3.6843802293783927, "grad_norm": 3.4517982006073, "learning_rate": 1.1867514911344254e-05, "loss": 0.5087, "step": 22570 }, { "epoch": 3.684543488020897, "grad_norm": 3.0512185096740723, "learning_rate": 1.186688504562867e-05, "loss": 0.4567, "step": 22571 }, { "epoch": 3.6847067466634016, "grad_norm": 3.1816470623016357, "learning_rate": 1.1866255172239022e-05, "loss": 0.5479, "step": 22572 }, { "epoch": 3.684870005305906, "grad_norm": 3.155454158782959, "learning_rate": 1.1865625291177906e-05, "loss": 0.474, "step": 22573 }, { "epoch": 3.6850332639484105, "grad_norm": 3.5198957920074463, "learning_rate": 1.186499540244791e-05, "loss": 0.4652, "step": 22574 }, { "epoch": 3.685196522590915, "grad_norm": 3.25297474861145, "learning_rate": 1.186436550605162e-05, "loss": 0.4664, "step": 22575 }, { "epoch": 3.685359781233419, "grad_norm": 3.6246702671051025, "learning_rate": 1.1863735601991629e-05, "loss": 0.5203, "step": 22576 }, { "epoch": 3.6855230398759233, "grad_norm": 3.5233190059661865, "learning_rate": 1.1863105690270527e-05, "loss": 0.4933, "step": 22577 }, { "epoch": 3.6856862985184278, "grad_norm": 3.052537441253662, "learning_rate": 1.1862475770890896e-05, "loss": 0.4481, "step": 22578 }, { "epoch": 3.685849557160932, "grad_norm": 3.43544340133667, "learning_rate": 1.186184584385533e-05, "loss": 0.4867, "step": 22579 }, { "epoch": 3.6860128158034366, "grad_norm": 3.023845672607422, "learning_rate": 1.186121590916642e-05, "loss": 0.4537, "step": 22580 }, { "epoch": 3.686176074445941, "grad_norm": 3.3667590618133545, "learning_rate": 1.1860585966826759e-05, "loss": 0.5581, "step": 22581 }, { "epoch": 3.6863393330884455, "grad_norm": 2.856571674346924, "learning_rate": 1.185995601683893e-05, "loss": 0.4086, "step": 22582 }, { "epoch": 3.6865025917309495, "grad_norm": 2.996195077896118, "learning_rate": 1.185932605920552e-05, "loss": 0.4105, "step": 22583 }, { "epoch": 3.686665850373454, "grad_norm": 3.478930950164795, "learning_rate": 1.1858696093929123e-05, "loss": 0.4345, "step": 22584 }, { "epoch": 3.6868291090159584, "grad_norm": 3.2734808921813965, "learning_rate": 1.185806612101233e-05, "loss": 0.4618, "step": 22585 }, { "epoch": 3.686992367658463, "grad_norm": 2.9958348274230957, "learning_rate": 1.185743614045773e-05, "loss": 0.34, "step": 22586 }, { "epoch": 3.6871556263009673, "grad_norm": 2.913985252380371, "learning_rate": 1.1856806152267912e-05, "loss": 0.4666, "step": 22587 }, { "epoch": 3.6873188849434717, "grad_norm": 2.8266396522521973, "learning_rate": 1.1856176156445462e-05, "loss": 0.4176, "step": 22588 }, { "epoch": 3.687482143585976, "grad_norm": 3.9362802505493164, "learning_rate": 1.1855546152992972e-05, "loss": 0.569, "step": 22589 }, { "epoch": 3.6876454022284806, "grad_norm": 2.979537010192871, "learning_rate": 1.1854916141913036e-05, "loss": 0.428, "step": 22590 }, { "epoch": 3.687808660870985, "grad_norm": 3.636190414428711, "learning_rate": 1.1854286123208236e-05, "loss": 0.3963, "step": 22591 }, { "epoch": 3.6879719195134895, "grad_norm": 4.170063018798828, "learning_rate": 1.1853656096881171e-05, "loss": 1.1567, "step": 22592 }, { "epoch": 3.688135178155994, "grad_norm": 2.711076259613037, "learning_rate": 1.1853026062934424e-05, "loss": 0.4537, "step": 22593 }, { "epoch": 3.688298436798498, "grad_norm": 3.1030936241149902, "learning_rate": 1.1852396021370585e-05, "loss": 0.439, "step": 22594 }, { "epoch": 3.6884616954410023, "grad_norm": 3.209995985031128, "learning_rate": 1.1851765972192245e-05, "loss": 0.5263, "step": 22595 }, { "epoch": 3.6886249540835068, "grad_norm": 3.506669521331787, "learning_rate": 1.1851135915401994e-05, "loss": 0.4879, "step": 22596 }, { "epoch": 3.688788212726011, "grad_norm": 3.1259653568267822, "learning_rate": 1.1850505851002425e-05, "loss": 0.4511, "step": 22597 }, { "epoch": 3.6889514713685156, "grad_norm": 3.0266292095184326, "learning_rate": 1.1849875778996125e-05, "loss": 0.414, "step": 22598 }, { "epoch": 3.68911473001102, "grad_norm": 2.9687280654907227, "learning_rate": 1.184924569938568e-05, "loss": 0.4316, "step": 22599 }, { "epoch": 3.689277988653524, "grad_norm": 3.1030590534210205, "learning_rate": 1.1848615612173689e-05, "loss": 0.4946, "step": 22600 }, { "epoch": 3.6894412472960285, "grad_norm": 3.893249988555908, "learning_rate": 1.1847985517362731e-05, "loss": 0.6941, "step": 22601 }, { "epoch": 3.689604505938533, "grad_norm": 3.4725711345672607, "learning_rate": 1.184735541495541e-05, "loss": 0.4435, "step": 22602 }, { "epoch": 3.6897677645810374, "grad_norm": 3.1285414695739746, "learning_rate": 1.1846725304954305e-05, "loss": 0.422, "step": 22603 }, { "epoch": 3.689931023223542, "grad_norm": 2.9213554859161377, "learning_rate": 1.184609518736201e-05, "loss": 0.3905, "step": 22604 }, { "epoch": 3.6900942818660463, "grad_norm": 3.649336099624634, "learning_rate": 1.1845465062181117e-05, "loss": 0.429, "step": 22605 }, { "epoch": 3.6902575405085507, "grad_norm": 3.312466859817505, "learning_rate": 1.1844834929414208e-05, "loss": 0.4495, "step": 22606 }, { "epoch": 3.690420799151055, "grad_norm": 3.3452718257904053, "learning_rate": 1.1844204789063884e-05, "loss": 0.486, "step": 22607 }, { "epoch": 3.6905840577935596, "grad_norm": 3.228666067123413, "learning_rate": 1.184357464113273e-05, "loss": 0.3719, "step": 22608 }, { "epoch": 3.690747316436064, "grad_norm": 3.1864583492279053, "learning_rate": 1.1842944485623335e-05, "loss": 0.4562, "step": 22609 }, { "epoch": 3.6909105750785685, "grad_norm": 3.666102409362793, "learning_rate": 1.1842314322538291e-05, "loss": 0.4791, "step": 22610 }, { "epoch": 3.6910738337210725, "grad_norm": 3.141004800796509, "learning_rate": 1.184168415188019e-05, "loss": 0.4838, "step": 22611 }, { "epoch": 3.691237092363577, "grad_norm": 3.7140297889709473, "learning_rate": 1.1841053973651619e-05, "loss": 0.5407, "step": 22612 }, { "epoch": 3.6914003510060813, "grad_norm": 3.2525179386138916, "learning_rate": 1.1840423787855174e-05, "loss": 0.4787, "step": 22613 }, { "epoch": 3.6915636096485858, "grad_norm": 2.9793343544006348, "learning_rate": 1.1839793594493439e-05, "loss": 0.4891, "step": 22614 }, { "epoch": 3.69172686829109, "grad_norm": 2.9254343509674072, "learning_rate": 1.1839163393569006e-05, "loss": 0.4607, "step": 22615 }, { "epoch": 3.6918901269335946, "grad_norm": 3.349165201187134, "learning_rate": 1.1838533185084468e-05, "loss": 0.4588, "step": 22616 }, { "epoch": 3.692053385576099, "grad_norm": 2.797759771347046, "learning_rate": 1.1837902969042413e-05, "loss": 0.4624, "step": 22617 }, { "epoch": 3.692216644218603, "grad_norm": 3.2089760303497314, "learning_rate": 1.1837272745445436e-05, "loss": 0.5508, "step": 22618 }, { "epoch": 3.6923799028611075, "grad_norm": 3.0688319206237793, "learning_rate": 1.1836642514296125e-05, "loss": 0.4706, "step": 22619 }, { "epoch": 3.692543161503612, "grad_norm": 2.947753429412842, "learning_rate": 1.1836012275597066e-05, "loss": 0.4193, "step": 22620 }, { "epoch": 3.6927064201461164, "grad_norm": 3.0869200229644775, "learning_rate": 1.183538202935086e-05, "loss": 0.5024, "step": 22621 }, { "epoch": 3.692869678788621, "grad_norm": 3.777691602706909, "learning_rate": 1.1834751775560083e-05, "loss": 0.5169, "step": 22622 }, { "epoch": 3.6930329374311253, "grad_norm": 3.133542776107788, "learning_rate": 1.183412151422734e-05, "loss": 0.391, "step": 22623 }, { "epoch": 3.6931961960736297, "grad_norm": 3.802510976791382, "learning_rate": 1.1833491245355212e-05, "loss": 0.5668, "step": 22624 }, { "epoch": 3.693359454716134, "grad_norm": 3.233001232147217, "learning_rate": 1.1832860968946298e-05, "loss": 0.5481, "step": 22625 }, { "epoch": 3.6935227133586386, "grad_norm": 3.0467112064361572, "learning_rate": 1.1832230685003185e-05, "loss": 0.4746, "step": 22626 }, { "epoch": 3.693685972001143, "grad_norm": 2.9901647567749023, "learning_rate": 1.1831600393528462e-05, "loss": 0.4148, "step": 22627 }, { "epoch": 3.6938492306436475, "grad_norm": 3.7594752311706543, "learning_rate": 1.1830970094524718e-05, "loss": 0.5515, "step": 22628 }, { "epoch": 3.6940124892861514, "grad_norm": 3.086984872817993, "learning_rate": 1.1830339787994552e-05, "loss": 0.4413, "step": 22629 }, { "epoch": 3.694175747928656, "grad_norm": 3.4347803592681885, "learning_rate": 1.1829709473940547e-05, "loss": 0.5302, "step": 22630 }, { "epoch": 3.6943390065711603, "grad_norm": 2.756527900695801, "learning_rate": 1.1829079152365302e-05, "loss": 0.3416, "step": 22631 }, { "epoch": 3.6945022652136648, "grad_norm": 3.5000956058502197, "learning_rate": 1.18284488232714e-05, "loss": 0.4476, "step": 22632 }, { "epoch": 3.694665523856169, "grad_norm": 2.8693087100982666, "learning_rate": 1.1827818486661431e-05, "loss": 0.4113, "step": 22633 }, { "epoch": 3.6948287824986736, "grad_norm": 3.1141703128814697, "learning_rate": 1.1827188142537996e-05, "loss": 0.527, "step": 22634 }, { "epoch": 3.694992041141178, "grad_norm": 2.849569320678711, "learning_rate": 1.1826557790903679e-05, "loss": 0.4196, "step": 22635 }, { "epoch": 3.695155299783682, "grad_norm": 3.580684185028076, "learning_rate": 1.1825927431761076e-05, "loss": 0.4769, "step": 22636 }, { "epoch": 3.6953185584261865, "grad_norm": 2.697615385055542, "learning_rate": 1.1825297065112771e-05, "loss": 0.4028, "step": 22637 }, { "epoch": 3.695481817068691, "grad_norm": 3.454606056213379, "learning_rate": 1.182466669096136e-05, "loss": 0.5365, "step": 22638 }, { "epoch": 3.6956450757111954, "grad_norm": 2.985246419906616, "learning_rate": 1.182403630930943e-05, "loss": 0.504, "step": 22639 }, { "epoch": 3.6958083343537, "grad_norm": 2.701735734939575, "learning_rate": 1.1823405920159576e-05, "loss": 0.4029, "step": 22640 }, { "epoch": 3.6959715929962043, "grad_norm": 3.8297555446624756, "learning_rate": 1.1822775523514393e-05, "loss": 0.5239, "step": 22641 }, { "epoch": 3.6961348516387087, "grad_norm": 2.693756103515625, "learning_rate": 1.1822145119376468e-05, "loss": 0.4476, "step": 22642 }, { "epoch": 3.696298110281213, "grad_norm": 2.9102749824523926, "learning_rate": 1.182151470774839e-05, "loss": 0.4134, "step": 22643 }, { "epoch": 3.6964613689237176, "grad_norm": 3.0833873748779297, "learning_rate": 1.1820884288632752e-05, "loss": 0.4801, "step": 22644 }, { "epoch": 3.696624627566222, "grad_norm": 2.7051820755004883, "learning_rate": 1.1820253862032146e-05, "loss": 0.4123, "step": 22645 }, { "epoch": 3.6967878862087264, "grad_norm": 3.0456464290618896, "learning_rate": 1.1819623427949167e-05, "loss": 0.4339, "step": 22646 }, { "epoch": 3.6969511448512304, "grad_norm": 3.831822395324707, "learning_rate": 1.1818992986386402e-05, "loss": 0.4918, "step": 22647 }, { "epoch": 3.697114403493735, "grad_norm": 3.129295825958252, "learning_rate": 1.1818362537346444e-05, "loss": 0.5145, "step": 22648 }, { "epoch": 3.6972776621362393, "grad_norm": 3.211845874786377, "learning_rate": 1.1817732080831883e-05, "loss": 0.4823, "step": 22649 }, { "epoch": 3.6974409207787438, "grad_norm": 3.743781805038452, "learning_rate": 1.1817101616845312e-05, "loss": 0.4925, "step": 22650 }, { "epoch": 3.697604179421248, "grad_norm": 3.887162685394287, "learning_rate": 1.1816471145389323e-05, "loss": 0.5713, "step": 22651 }, { "epoch": 3.6977674380637526, "grad_norm": 2.3283183574676514, "learning_rate": 1.1815840666466505e-05, "loss": 0.3335, "step": 22652 }, { "epoch": 3.6979306967062566, "grad_norm": 3.074110746383667, "learning_rate": 1.1815210180079454e-05, "loss": 0.4919, "step": 22653 }, { "epoch": 3.698093955348761, "grad_norm": 3.2406842708587646, "learning_rate": 1.1814579686230759e-05, "loss": 0.4829, "step": 22654 }, { "epoch": 3.6982572139912655, "grad_norm": 3.458103656768799, "learning_rate": 1.1813949184923011e-05, "loss": 0.4773, "step": 22655 }, { "epoch": 3.69842047263377, "grad_norm": 3.3639044761657715, "learning_rate": 1.1813318676158802e-05, "loss": 0.4957, "step": 22656 }, { "epoch": 3.6985837312762744, "grad_norm": 3.3748562335968018, "learning_rate": 1.1812688159940729e-05, "loss": 0.4731, "step": 22657 }, { "epoch": 3.698746989918779, "grad_norm": 3.5705058574676514, "learning_rate": 1.1812057636271374e-05, "loss": 0.4888, "step": 22658 }, { "epoch": 3.6989102485612833, "grad_norm": 3.033078670501709, "learning_rate": 1.1811427105153337e-05, "loss": 0.4581, "step": 22659 }, { "epoch": 3.6990735072037877, "grad_norm": 2.9973247051239014, "learning_rate": 1.1810796566589206e-05, "loss": 0.4594, "step": 22660 }, { "epoch": 3.699236765846292, "grad_norm": 3.414970874786377, "learning_rate": 1.1810166020581577e-05, "loss": 0.4668, "step": 22661 }, { "epoch": 3.6994000244887966, "grad_norm": 3.059593439102173, "learning_rate": 1.1809535467133038e-05, "loss": 0.496, "step": 22662 }, { "epoch": 3.699563283131301, "grad_norm": 3.3372769355773926, "learning_rate": 1.1808904906246179e-05, "loss": 0.5117, "step": 22663 }, { "epoch": 3.699726541773805, "grad_norm": 3.0115413665771484, "learning_rate": 1.1808274337923597e-05, "loss": 0.3977, "step": 22664 }, { "epoch": 3.6998898004163094, "grad_norm": 3.9410927295684814, "learning_rate": 1.1807643762167884e-05, "loss": 0.5503, "step": 22665 }, { "epoch": 3.700053059058814, "grad_norm": 3.0615851879119873, "learning_rate": 1.1807013178981626e-05, "loss": 0.4042, "step": 22666 }, { "epoch": 3.7002163177013183, "grad_norm": 2.8267767429351807, "learning_rate": 1.1806382588367422e-05, "loss": 0.408, "step": 22667 }, { "epoch": 3.7003795763438228, "grad_norm": 2.6545310020446777, "learning_rate": 1.1805751990327861e-05, "loss": 0.3736, "step": 22668 }, { "epoch": 3.700542834986327, "grad_norm": 2.586325168609619, "learning_rate": 1.1805121384865532e-05, "loss": 0.4041, "step": 22669 }, { "epoch": 3.7007060936288316, "grad_norm": 3.4658730030059814, "learning_rate": 1.1804490771983038e-05, "loss": 0.5727, "step": 22670 }, { "epoch": 3.7008693522713356, "grad_norm": 2.9637749195098877, "learning_rate": 1.1803860151682958e-05, "loss": 0.3972, "step": 22671 }, { "epoch": 3.70103261091384, "grad_norm": 3.2235801219940186, "learning_rate": 1.180322952396789e-05, "loss": 0.4692, "step": 22672 }, { "epoch": 3.7011958695563445, "grad_norm": 3.399561882019043, "learning_rate": 1.1802598888840426e-05, "loss": 0.441, "step": 22673 }, { "epoch": 3.701359128198849, "grad_norm": 3.8939173221588135, "learning_rate": 1.1801968246303162e-05, "loss": 0.5021, "step": 22674 }, { "epoch": 3.7015223868413534, "grad_norm": 3.310539960861206, "learning_rate": 1.1801337596358686e-05, "loss": 0.5026, "step": 22675 }, { "epoch": 3.701685645483858, "grad_norm": 3.176151752471924, "learning_rate": 1.180070693900959e-05, "loss": 0.4344, "step": 22676 }, { "epoch": 3.7018489041263622, "grad_norm": 2.9965291023254395, "learning_rate": 1.1800076274258467e-05, "loss": 0.4434, "step": 22677 }, { "epoch": 3.7020121627688667, "grad_norm": 3.4344072341918945, "learning_rate": 1.1799445602107911e-05, "loss": 0.4745, "step": 22678 }, { "epoch": 3.702175421411371, "grad_norm": 3.299617290496826, "learning_rate": 1.1798814922560513e-05, "loss": 0.4805, "step": 22679 }, { "epoch": 3.7023386800538756, "grad_norm": 2.9812204837799072, "learning_rate": 1.1798184235618868e-05, "loss": 0.522, "step": 22680 }, { "epoch": 3.70250193869638, "grad_norm": 3.073443651199341, "learning_rate": 1.1797553541285566e-05, "loss": 0.5093, "step": 22681 }, { "epoch": 3.702665197338884, "grad_norm": 2.7803337574005127, "learning_rate": 1.1796922839563202e-05, "loss": 0.4419, "step": 22682 }, { "epoch": 3.7028284559813884, "grad_norm": 3.288809061050415, "learning_rate": 1.1796292130454359e-05, "loss": 0.5052, "step": 22683 }, { "epoch": 3.702991714623893, "grad_norm": 3.0532710552215576, "learning_rate": 1.1795661413961643e-05, "loss": 0.3919, "step": 22684 }, { "epoch": 3.7031549732663973, "grad_norm": 3.1647608280181885, "learning_rate": 1.1795030690087644e-05, "loss": 0.4923, "step": 22685 }, { "epoch": 3.7033182319089017, "grad_norm": 3.338460922241211, "learning_rate": 1.1794399958834948e-05, "loss": 0.452, "step": 22686 }, { "epoch": 3.703481490551406, "grad_norm": 3.475766897201538, "learning_rate": 1.1793769220206151e-05, "loss": 0.5455, "step": 22687 }, { "epoch": 3.70364474919391, "grad_norm": 2.7266147136688232, "learning_rate": 1.1793138474203846e-05, "loss": 0.4046, "step": 22688 }, { "epoch": 3.7038080078364146, "grad_norm": 2.952638864517212, "learning_rate": 1.1792507720830625e-05, "loss": 0.5494, "step": 22689 }, { "epoch": 3.703971266478919, "grad_norm": 3.299487352371216, "learning_rate": 1.1791876960089087e-05, "loss": 0.4792, "step": 22690 }, { "epoch": 3.7041345251214235, "grad_norm": 2.9364778995513916, "learning_rate": 1.1791246191981814e-05, "loss": 0.4247, "step": 22691 }, { "epoch": 3.704297783763928, "grad_norm": 2.8728537559509277, "learning_rate": 1.1790615416511404e-05, "loss": 0.4053, "step": 22692 }, { "epoch": 3.7044610424064324, "grad_norm": 3.145040512084961, "learning_rate": 1.1789984633680454e-05, "loss": 0.4163, "step": 22693 }, { "epoch": 3.704624301048937, "grad_norm": 2.5751547813415527, "learning_rate": 1.1789353843491549e-05, "loss": 0.3889, "step": 22694 }, { "epoch": 3.7047875596914412, "grad_norm": 3.023693561553955, "learning_rate": 1.1788723045947288e-05, "loss": 0.4769, "step": 22695 }, { "epoch": 3.7049508183339457, "grad_norm": 2.5550429821014404, "learning_rate": 1.1788092241050262e-05, "loss": 0.3944, "step": 22696 }, { "epoch": 3.70511407697645, "grad_norm": 3.0793893337249756, "learning_rate": 1.1787461428803064e-05, "loss": 0.4585, "step": 22697 }, { "epoch": 3.7052773356189546, "grad_norm": 3.296928882598877, "learning_rate": 1.1786830609208287e-05, "loss": 0.4958, "step": 22698 }, { "epoch": 3.7054405942614586, "grad_norm": 3.077061414718628, "learning_rate": 1.1786199782268523e-05, "loss": 0.4649, "step": 22699 }, { "epoch": 3.705603852903963, "grad_norm": 3.5902786254882812, "learning_rate": 1.1785568947986368e-05, "loss": 0.5242, "step": 22700 }, { "epoch": 3.7057671115464674, "grad_norm": 2.7532098293304443, "learning_rate": 1.1784938106364413e-05, "loss": 0.3974, "step": 22701 }, { "epoch": 3.705930370188972, "grad_norm": 3.56317138671875, "learning_rate": 1.178430725740525e-05, "loss": 0.6087, "step": 22702 }, { "epoch": 3.7060936288314763, "grad_norm": 3.1567234992980957, "learning_rate": 1.1783676401111474e-05, "loss": 0.3959, "step": 22703 }, { "epoch": 3.7062568874739807, "grad_norm": 3.8478262424468994, "learning_rate": 1.1783045537485679e-05, "loss": 0.5998, "step": 22704 }, { "epoch": 3.706420146116485, "grad_norm": 2.959933280944824, "learning_rate": 1.1782414666530456e-05, "loss": 0.5067, "step": 22705 }, { "epoch": 3.706583404758989, "grad_norm": 3.634862184524536, "learning_rate": 1.17817837882484e-05, "loss": 0.4953, "step": 22706 }, { "epoch": 3.7067466634014936, "grad_norm": 3.4882724285125732, "learning_rate": 1.17811529026421e-05, "loss": 0.4968, "step": 22707 }, { "epoch": 3.706909922043998, "grad_norm": 3.0840988159179688, "learning_rate": 1.1780522009714159e-05, "loss": 0.455, "step": 22708 }, { "epoch": 3.7070731806865025, "grad_norm": 3.25905442237854, "learning_rate": 1.1779891109467161e-05, "loss": 0.479, "step": 22709 }, { "epoch": 3.707236439329007, "grad_norm": 3.254746675491333, "learning_rate": 1.1779260201903705e-05, "loss": 0.4676, "step": 22710 }, { "epoch": 3.7073996979715114, "grad_norm": 3.86344575881958, "learning_rate": 1.1778629287026378e-05, "loss": 0.4586, "step": 22711 }, { "epoch": 3.707562956614016, "grad_norm": 4.252876281738281, "learning_rate": 1.1777998364837781e-05, "loss": 0.5568, "step": 22712 }, { "epoch": 3.7077262152565202, "grad_norm": 3.0536811351776123, "learning_rate": 1.1777367435340503e-05, "loss": 0.4266, "step": 22713 }, { "epoch": 3.7078894738990247, "grad_norm": 3.1713473796844482, "learning_rate": 1.177673649853714e-05, "loss": 0.4395, "step": 22714 }, { "epoch": 3.708052732541529, "grad_norm": 3.2707624435424805, "learning_rate": 1.1776105554430284e-05, "loss": 0.4333, "step": 22715 }, { "epoch": 3.7082159911840336, "grad_norm": 3.0153656005859375, "learning_rate": 1.1775474603022525e-05, "loss": 0.479, "step": 22716 }, { "epoch": 3.7083792498265375, "grad_norm": 3.255624771118164, "learning_rate": 1.1774843644316462e-05, "loss": 0.4783, "step": 22717 }, { "epoch": 3.708542508469042, "grad_norm": 3.2891998291015625, "learning_rate": 1.177421267831469e-05, "loss": 0.4675, "step": 22718 }, { "epoch": 3.7087057671115464, "grad_norm": 3.1091878414154053, "learning_rate": 1.1773581705019796e-05, "loss": 0.4752, "step": 22719 }, { "epoch": 3.708869025754051, "grad_norm": 3.7583978176116943, "learning_rate": 1.177295072443438e-05, "loss": 0.4673, "step": 22720 }, { "epoch": 3.7090322843965553, "grad_norm": 4.043074131011963, "learning_rate": 1.1772319736561033e-05, "loss": 0.4805, "step": 22721 }, { "epoch": 3.7091955430390597, "grad_norm": 3.5213441848754883, "learning_rate": 1.1771688741402345e-05, "loss": 0.5501, "step": 22722 }, { "epoch": 3.709358801681564, "grad_norm": 3.1434249877929688, "learning_rate": 1.1771057738960915e-05, "loss": 0.474, "step": 22723 }, { "epoch": 3.709522060324068, "grad_norm": 3.3061652183532715, "learning_rate": 1.177042672923934e-05, "loss": 0.5251, "step": 22724 }, { "epoch": 3.7096853189665726, "grad_norm": 2.9725427627563477, "learning_rate": 1.1769795712240203e-05, "loss": 0.4207, "step": 22725 }, { "epoch": 3.709848577609077, "grad_norm": 2.793727159500122, "learning_rate": 1.1769164687966108e-05, "loss": 0.3988, "step": 22726 }, { "epoch": 3.7100118362515815, "grad_norm": 3.27154803276062, "learning_rate": 1.1768533656419642e-05, "loss": 0.5338, "step": 22727 }, { "epoch": 3.710175094894086, "grad_norm": 3.6844089031219482, "learning_rate": 1.1767902617603404e-05, "loss": 0.4698, "step": 22728 }, { "epoch": 3.7103383535365904, "grad_norm": 3.420440435409546, "learning_rate": 1.1767271571519986e-05, "loss": 0.4587, "step": 22729 }, { "epoch": 3.710501612179095, "grad_norm": 2.8885130882263184, "learning_rate": 1.1766640518171983e-05, "loss": 0.4181, "step": 22730 }, { "epoch": 3.7106648708215992, "grad_norm": 2.879004955291748, "learning_rate": 1.1766009457561987e-05, "loss": 0.511, "step": 22731 }, { "epoch": 3.7108281294641037, "grad_norm": 3.1476199626922607, "learning_rate": 1.1765378389692592e-05, "loss": 0.4621, "step": 22732 }, { "epoch": 3.710991388106608, "grad_norm": 3.3171885013580322, "learning_rate": 1.1764747314566391e-05, "loss": 0.5429, "step": 22733 }, { "epoch": 3.7111546467491126, "grad_norm": 3.6755802631378174, "learning_rate": 1.1764116232185985e-05, "loss": 0.6747, "step": 22734 }, { "epoch": 3.7113179053916165, "grad_norm": 3.6717007160186768, "learning_rate": 1.1763485142553958e-05, "loss": 0.5068, "step": 22735 }, { "epoch": 3.711481164034121, "grad_norm": 3.189331293106079, "learning_rate": 1.1762854045672914e-05, "loss": 0.4801, "step": 22736 }, { "epoch": 3.7116444226766254, "grad_norm": 4.051173210144043, "learning_rate": 1.176222294154544e-05, "loss": 0.5782, "step": 22737 }, { "epoch": 3.71180768131913, "grad_norm": 2.6880671977996826, "learning_rate": 1.1761591830174135e-05, "loss": 0.4354, "step": 22738 }, { "epoch": 3.7119709399616343, "grad_norm": 3.1560873985290527, "learning_rate": 1.1760960711561588e-05, "loss": 0.504, "step": 22739 }, { "epoch": 3.7121341986041387, "grad_norm": 2.895967483520508, "learning_rate": 1.1760329585710399e-05, "loss": 0.436, "step": 22740 }, { "epoch": 3.7122974572466427, "grad_norm": 3.180487871170044, "learning_rate": 1.175969845262316e-05, "loss": 0.5599, "step": 22741 }, { "epoch": 3.712460715889147, "grad_norm": 2.759356737136841, "learning_rate": 1.1759067312302464e-05, "loss": 0.4003, "step": 22742 }, { "epoch": 3.7126239745316516, "grad_norm": 3.6571497917175293, "learning_rate": 1.1758436164750906e-05, "loss": 0.4677, "step": 22743 }, { "epoch": 3.712787233174156, "grad_norm": 3.2569944858551025, "learning_rate": 1.1757805009971082e-05, "loss": 0.4178, "step": 22744 }, { "epoch": 3.7129504918166605, "grad_norm": 2.990891695022583, "learning_rate": 1.1757173847965586e-05, "loss": 0.4571, "step": 22745 }, { "epoch": 3.713113750459165, "grad_norm": 3.089279890060425, "learning_rate": 1.1756542678737011e-05, "loss": 0.457, "step": 22746 }, { "epoch": 3.7132770091016694, "grad_norm": 3.258854866027832, "learning_rate": 1.1755911502287951e-05, "loss": 0.41, "step": 22747 }, { "epoch": 3.713440267744174, "grad_norm": 2.8945112228393555, "learning_rate": 1.1755280318621004e-05, "loss": 0.4739, "step": 22748 }, { "epoch": 3.7136035263866782, "grad_norm": 3.578199625015259, "learning_rate": 1.175464912773876e-05, "loss": 0.5669, "step": 22749 }, { "epoch": 3.7137667850291827, "grad_norm": 3.222813129425049, "learning_rate": 1.1754017929643818e-05, "loss": 0.4171, "step": 22750 }, { "epoch": 3.713930043671687, "grad_norm": 3.6312713623046875, "learning_rate": 1.175338672433877e-05, "loss": 0.5354, "step": 22751 }, { "epoch": 3.714093302314191, "grad_norm": 3.2455787658691406, "learning_rate": 1.175275551182621e-05, "loss": 0.5335, "step": 22752 }, { "epoch": 3.7142565609566955, "grad_norm": 3.0819709300994873, "learning_rate": 1.1752124292108735e-05, "loss": 0.4805, "step": 22753 }, { "epoch": 3.7144198195992, "grad_norm": 3.088183879852295, "learning_rate": 1.175149306518894e-05, "loss": 0.4436, "step": 22754 }, { "epoch": 3.7145830782417044, "grad_norm": 3.3815255165100098, "learning_rate": 1.1750861831069415e-05, "loss": 0.4751, "step": 22755 }, { "epoch": 3.714746336884209, "grad_norm": 2.8959004878997803, "learning_rate": 1.1750230589752763e-05, "loss": 0.4612, "step": 22756 }, { "epoch": 3.7149095955267133, "grad_norm": 2.983259439468384, "learning_rate": 1.1749599341241569e-05, "loss": 0.4249, "step": 22757 }, { "epoch": 3.7150728541692177, "grad_norm": 2.902754783630371, "learning_rate": 1.1748968085538437e-05, "loss": 0.4408, "step": 22758 }, { "epoch": 3.7152361128117217, "grad_norm": 3.352607250213623, "learning_rate": 1.1748336822645956e-05, "loss": 0.4402, "step": 22759 }, { "epoch": 3.715399371454226, "grad_norm": 3.0759754180908203, "learning_rate": 1.174770555256672e-05, "loss": 0.4512, "step": 22760 }, { "epoch": 3.7155626300967306, "grad_norm": 3.0356791019439697, "learning_rate": 1.1747074275303328e-05, "loss": 0.44, "step": 22761 }, { "epoch": 3.715725888739235, "grad_norm": 2.8643815517425537, "learning_rate": 1.1746442990858374e-05, "loss": 0.4245, "step": 22762 }, { "epoch": 3.7158891473817395, "grad_norm": 3.3617496490478516, "learning_rate": 1.1745811699234453e-05, "loss": 0.4532, "step": 22763 }, { "epoch": 3.716052406024244, "grad_norm": 3.009408712387085, "learning_rate": 1.174518040043416e-05, "loss": 0.468, "step": 22764 }, { "epoch": 3.7162156646667484, "grad_norm": 3.032494068145752, "learning_rate": 1.1744549094460087e-05, "loss": 0.4181, "step": 22765 }, { "epoch": 3.716378923309253, "grad_norm": 2.6340267658233643, "learning_rate": 1.1743917781314828e-05, "loss": 0.4307, "step": 22766 }, { "epoch": 3.7165421819517572, "grad_norm": 2.7300477027893066, "learning_rate": 1.1743286461000987e-05, "loss": 0.3494, "step": 22767 }, { "epoch": 3.7167054405942617, "grad_norm": 3.616609811782837, "learning_rate": 1.1742655133521155e-05, "loss": 0.4792, "step": 22768 }, { "epoch": 3.716868699236766, "grad_norm": 3.8045692443847656, "learning_rate": 1.1742023798877921e-05, "loss": 0.5171, "step": 22769 }, { "epoch": 3.71703195787927, "grad_norm": 3.2353484630584717, "learning_rate": 1.1741392457073888e-05, "loss": 0.5144, "step": 22770 }, { "epoch": 3.7171952165217745, "grad_norm": 2.88910174369812, "learning_rate": 1.1740761108111644e-05, "loss": 0.4724, "step": 22771 }, { "epoch": 3.717358475164279, "grad_norm": 3.75418758392334, "learning_rate": 1.1740129751993791e-05, "loss": 0.6426, "step": 22772 }, { "epoch": 3.7175217338067834, "grad_norm": 2.9349327087402344, "learning_rate": 1.1739498388722924e-05, "loss": 0.3974, "step": 22773 }, { "epoch": 3.717684992449288, "grad_norm": 3.886366844177246, "learning_rate": 1.1738867018301634e-05, "loss": 0.533, "step": 22774 }, { "epoch": 3.7178482510917923, "grad_norm": 3.727217435836792, "learning_rate": 1.1738235640732518e-05, "loss": 0.4706, "step": 22775 }, { "epoch": 3.7180115097342963, "grad_norm": 2.774564027786255, "learning_rate": 1.1737604256018173e-05, "loss": 0.3964, "step": 22776 }, { "epoch": 3.7181747683768007, "grad_norm": 2.8594677448272705, "learning_rate": 1.173697286416119e-05, "loss": 0.392, "step": 22777 }, { "epoch": 3.718338027019305, "grad_norm": 3.109417200088501, "learning_rate": 1.173634146516417e-05, "loss": 0.5087, "step": 22778 }, { "epoch": 3.7185012856618096, "grad_norm": 3.0589230060577393, "learning_rate": 1.1735710059029705e-05, "loss": 0.432, "step": 22779 }, { "epoch": 3.718664544304314, "grad_norm": 2.9827189445495605, "learning_rate": 1.173507864576039e-05, "loss": 0.4349, "step": 22780 }, { "epoch": 3.7188278029468185, "grad_norm": 3.726963520050049, "learning_rate": 1.1734447225358827e-05, "loss": 0.571, "step": 22781 }, { "epoch": 3.718991061589323, "grad_norm": 3.5250775814056396, "learning_rate": 1.1733815797827601e-05, "loss": 0.4983, "step": 22782 }, { "epoch": 3.7191543202318273, "grad_norm": 2.699601173400879, "learning_rate": 1.1733184363169316e-05, "loss": 0.3481, "step": 22783 }, { "epoch": 3.719317578874332, "grad_norm": 3.1241369247436523, "learning_rate": 1.1732552921386562e-05, "loss": 0.444, "step": 22784 }, { "epoch": 3.7194808375168362, "grad_norm": 3.516012191772461, "learning_rate": 1.173192147248194e-05, "loss": 0.4817, "step": 22785 }, { "epoch": 3.7196440961593407, "grad_norm": 2.8830411434173584, "learning_rate": 1.1731290016458038e-05, "loss": 0.4475, "step": 22786 }, { "epoch": 3.7198073548018447, "grad_norm": 3.125528573989868, "learning_rate": 1.1730658553317462e-05, "loss": 0.3769, "step": 22787 }, { "epoch": 3.719970613444349, "grad_norm": 3.3617749214172363, "learning_rate": 1.1730027083062799e-05, "loss": 0.4444, "step": 22788 }, { "epoch": 3.7201338720868535, "grad_norm": 2.6935646533966064, "learning_rate": 1.172939560569665e-05, "loss": 0.412, "step": 22789 }, { "epoch": 3.720297130729358, "grad_norm": 3.5297389030456543, "learning_rate": 1.1728764121221606e-05, "loss": 0.4711, "step": 22790 }, { "epoch": 3.7204603893718624, "grad_norm": 2.763173818588257, "learning_rate": 1.1728132629640267e-05, "loss": 0.4335, "step": 22791 }, { "epoch": 3.720623648014367, "grad_norm": 2.9385311603546143, "learning_rate": 1.1727501130955227e-05, "loss": 0.4712, "step": 22792 }, { "epoch": 3.7207869066568713, "grad_norm": 3.2799248695373535, "learning_rate": 1.1726869625169082e-05, "loss": 0.4638, "step": 22793 }, { "epoch": 3.7209501652993753, "grad_norm": 3.2563555240631104, "learning_rate": 1.172623811228443e-05, "loss": 0.455, "step": 22794 }, { "epoch": 3.7211134239418797, "grad_norm": 3.523355722427368, "learning_rate": 1.1725606592303861e-05, "loss": 0.4143, "step": 22795 }, { "epoch": 3.721276682584384, "grad_norm": 3.4394922256469727, "learning_rate": 1.1724975065229977e-05, "loss": 0.5062, "step": 22796 }, { "epoch": 3.7214399412268886, "grad_norm": 3.560094118118286, "learning_rate": 1.1724343531065372e-05, "loss": 0.4651, "step": 22797 }, { "epoch": 3.721603199869393, "grad_norm": 3.6339056491851807, "learning_rate": 1.1723711989812642e-05, "loss": 0.5119, "step": 22798 }, { "epoch": 3.7217664585118975, "grad_norm": 3.515791893005371, "learning_rate": 1.1723080441474383e-05, "loss": 0.4934, "step": 22799 }, { "epoch": 3.721929717154402, "grad_norm": 2.6909687519073486, "learning_rate": 1.172244888605319e-05, "loss": 0.3701, "step": 22800 }, { "epoch": 3.7220929757969063, "grad_norm": 3.8568825721740723, "learning_rate": 1.1721817323551661e-05, "loss": 0.4436, "step": 22801 }, { "epoch": 3.722256234439411, "grad_norm": 3.1324334144592285, "learning_rate": 1.1721185753972393e-05, "loss": 0.4277, "step": 22802 }, { "epoch": 3.722419493081915, "grad_norm": 3.403428077697754, "learning_rate": 1.1720554177317977e-05, "loss": 0.442, "step": 22803 }, { "epoch": 3.7225827517244197, "grad_norm": 3.475149393081665, "learning_rate": 1.1719922593591012e-05, "loss": 0.5183, "step": 22804 }, { "epoch": 3.7227460103669237, "grad_norm": 2.687126636505127, "learning_rate": 1.1719291002794096e-05, "loss": 0.4326, "step": 22805 }, { "epoch": 3.722909269009428, "grad_norm": 3.1088600158691406, "learning_rate": 1.1718659404929824e-05, "loss": 0.4683, "step": 22806 }, { "epoch": 3.7230725276519325, "grad_norm": 2.994845151901245, "learning_rate": 1.1718027800000795e-05, "loss": 0.5166, "step": 22807 }, { "epoch": 3.723235786294437, "grad_norm": 3.3013505935668945, "learning_rate": 1.17173961880096e-05, "loss": 0.4468, "step": 22808 }, { "epoch": 3.7233990449369414, "grad_norm": 2.894338846206665, "learning_rate": 1.1716764568958837e-05, "loss": 0.4624, "step": 22809 }, { "epoch": 3.723562303579446, "grad_norm": 2.877389907836914, "learning_rate": 1.17161329428511e-05, "loss": 0.3954, "step": 22810 }, { "epoch": 3.7237255622219503, "grad_norm": 3.1499600410461426, "learning_rate": 1.1715501309688992e-05, "loss": 0.3967, "step": 22811 }, { "epoch": 3.7238888208644543, "grad_norm": 2.931368589401245, "learning_rate": 1.1714869669475108e-05, "loss": 0.4678, "step": 22812 }, { "epoch": 3.7240520795069587, "grad_norm": 3.2051737308502197, "learning_rate": 1.171423802221204e-05, "loss": 0.5043, "step": 22813 }, { "epoch": 3.724215338149463, "grad_norm": 3.038403034210205, "learning_rate": 1.1713606367902386e-05, "loss": 0.4219, "step": 22814 }, { "epoch": 3.7243785967919676, "grad_norm": 2.526083469390869, "learning_rate": 1.1712974706548743e-05, "loss": 0.385, "step": 22815 }, { "epoch": 3.724541855434472, "grad_norm": 3.6058449745178223, "learning_rate": 1.1712343038153709e-05, "loss": 0.532, "step": 22816 }, { "epoch": 3.7247051140769765, "grad_norm": 3.1940219402313232, "learning_rate": 1.1711711362719883e-05, "loss": 0.3869, "step": 22817 }, { "epoch": 3.724868372719481, "grad_norm": 2.9536001682281494, "learning_rate": 1.1711079680249852e-05, "loss": 0.4466, "step": 22818 }, { "epoch": 3.7250316313619853, "grad_norm": 4.114256381988525, "learning_rate": 1.1710447990746221e-05, "loss": 0.5767, "step": 22819 }, { "epoch": 3.72519489000449, "grad_norm": 2.5832085609436035, "learning_rate": 1.1709816294211583e-05, "loss": 0.4194, "step": 22820 }, { "epoch": 3.725358148646994, "grad_norm": 3.626430034637451, "learning_rate": 1.1709184590648535e-05, "loss": 0.5115, "step": 22821 }, { "epoch": 3.7255214072894987, "grad_norm": 3.487544059753418, "learning_rate": 1.1708552880059679e-05, "loss": 0.4942, "step": 22822 }, { "epoch": 3.7256846659320026, "grad_norm": 3.461902618408203, "learning_rate": 1.1707921162447605e-05, "loss": 0.4129, "step": 22823 }, { "epoch": 3.725847924574507, "grad_norm": 3.271972894668579, "learning_rate": 1.170728943781491e-05, "loss": 0.5419, "step": 22824 }, { "epoch": 3.7260111832170115, "grad_norm": 3.0987489223480225, "learning_rate": 1.1706657706164195e-05, "loss": 0.43, "step": 22825 }, { "epoch": 3.726174441859516, "grad_norm": 3.8401730060577393, "learning_rate": 1.1706025967498054e-05, "loss": 0.471, "step": 22826 }, { "epoch": 3.7263377005020204, "grad_norm": 3.1697962284088135, "learning_rate": 1.1705394221819084e-05, "loss": 0.5071, "step": 22827 }, { "epoch": 3.726500959144525, "grad_norm": 2.8358466625213623, "learning_rate": 1.1704762469129882e-05, "loss": 0.4133, "step": 22828 }, { "epoch": 3.726664217787029, "grad_norm": 3.28267240524292, "learning_rate": 1.1704130709433045e-05, "loss": 0.5095, "step": 22829 }, { "epoch": 3.7268274764295333, "grad_norm": 2.9786102771759033, "learning_rate": 1.170349894273117e-05, "loss": 0.453, "step": 22830 }, { "epoch": 3.7269907350720377, "grad_norm": 3.154632568359375, "learning_rate": 1.1702867169026857e-05, "loss": 0.4936, "step": 22831 }, { "epoch": 3.727153993714542, "grad_norm": 2.705165386199951, "learning_rate": 1.1702235388322699e-05, "loss": 0.3977, "step": 22832 }, { "epoch": 3.7273172523570466, "grad_norm": 3.1782474517822266, "learning_rate": 1.1701603600621291e-05, "loss": 0.4392, "step": 22833 }, { "epoch": 3.727480510999551, "grad_norm": 2.9322750568389893, "learning_rate": 1.1700971805925235e-05, "loss": 0.4262, "step": 22834 }, { "epoch": 3.7276437696420555, "grad_norm": 4.070138931274414, "learning_rate": 1.1700340004237128e-05, "loss": 0.6441, "step": 22835 }, { "epoch": 3.72780702828456, "grad_norm": 3.0234265327453613, "learning_rate": 1.1699708195559563e-05, "loss": 0.361, "step": 22836 }, { "epoch": 3.7279702869270643, "grad_norm": 3.8945510387420654, "learning_rate": 1.169907637989514e-05, "loss": 0.5409, "step": 22837 }, { "epoch": 3.7281335455695688, "grad_norm": 2.9768431186676025, "learning_rate": 1.1698444557246456e-05, "loss": 0.4053, "step": 22838 }, { "epoch": 3.728296804212073, "grad_norm": 3.1201934814453125, "learning_rate": 1.1697812727616107e-05, "loss": 0.4418, "step": 22839 }, { "epoch": 3.728460062854577, "grad_norm": 3.601911783218384, "learning_rate": 1.169718089100669e-05, "loss": 0.5216, "step": 22840 }, { "epoch": 3.7286233214970816, "grad_norm": 3.0982041358947754, "learning_rate": 1.1696549047420804e-05, "loss": 0.4416, "step": 22841 }, { "epoch": 3.728786580139586, "grad_norm": 3.157707929611206, "learning_rate": 1.1695917196861046e-05, "loss": 0.4561, "step": 22842 }, { "epoch": 3.7289498387820905, "grad_norm": 3.5031347274780273, "learning_rate": 1.1695285339330013e-05, "loss": 0.4909, "step": 22843 }, { "epoch": 3.729113097424595, "grad_norm": 3.3066365718841553, "learning_rate": 1.1694653474830302e-05, "loss": 0.4709, "step": 22844 }, { "epoch": 3.7292763560670994, "grad_norm": 3.5913326740264893, "learning_rate": 1.1694021603364512e-05, "loss": 0.5072, "step": 22845 }, { "epoch": 3.729439614709604, "grad_norm": 3.3714652061462402, "learning_rate": 1.1693389724935237e-05, "loss": 0.4696, "step": 22846 }, { "epoch": 3.729602873352108, "grad_norm": 3.853724241256714, "learning_rate": 1.1692757839545078e-05, "loss": 0.4622, "step": 22847 }, { "epoch": 3.7297661319946123, "grad_norm": 2.937481641769409, "learning_rate": 1.1692125947196626e-05, "loss": 0.4339, "step": 22848 }, { "epoch": 3.7299293906371167, "grad_norm": 2.731936454772949, "learning_rate": 1.1691494047892486e-05, "loss": 0.3686, "step": 22849 }, { "epoch": 3.730092649279621, "grad_norm": 3.15934157371521, "learning_rate": 1.1690862141635253e-05, "loss": 0.5206, "step": 22850 }, { "epoch": 3.7302559079221256, "grad_norm": 4.083537578582764, "learning_rate": 1.1690230228427527e-05, "loss": 0.4573, "step": 22851 }, { "epoch": 3.73041916656463, "grad_norm": 2.6173293590545654, "learning_rate": 1.1689598308271898e-05, "loss": 0.3962, "step": 22852 }, { "epoch": 3.7305824252071345, "grad_norm": 3.8162617683410645, "learning_rate": 1.1688966381170972e-05, "loss": 0.5359, "step": 22853 }, { "epoch": 3.730745683849639, "grad_norm": 3.339393377304077, "learning_rate": 1.1688334447127338e-05, "loss": 0.4713, "step": 22854 }, { "epoch": 3.7309089424921433, "grad_norm": 3.5156257152557373, "learning_rate": 1.1687702506143603e-05, "loss": 0.4377, "step": 22855 }, { "epoch": 3.7310722011346478, "grad_norm": 3.7341573238372803, "learning_rate": 1.1687070558222363e-05, "loss": 0.5349, "step": 22856 }, { "epoch": 3.731235459777152, "grad_norm": 3.100710391998291, "learning_rate": 1.1686438603366206e-05, "loss": 0.4598, "step": 22857 }, { "epoch": 3.731398718419656, "grad_norm": 3.1342244148254395, "learning_rate": 1.1685806641577742e-05, "loss": 0.4398, "step": 22858 }, { "epoch": 3.7315619770621606, "grad_norm": 3.1718034744262695, "learning_rate": 1.168517467285956e-05, "loss": 0.4875, "step": 22859 }, { "epoch": 3.731725235704665, "grad_norm": 3.660085916519165, "learning_rate": 1.168454269721426e-05, "loss": 0.4752, "step": 22860 }, { "epoch": 3.7318884943471695, "grad_norm": 3.195056438446045, "learning_rate": 1.1683910714644446e-05, "loss": 0.4914, "step": 22861 }, { "epoch": 3.732051752989674, "grad_norm": 3.0625932216644287, "learning_rate": 1.1683278725152708e-05, "loss": 0.486, "step": 22862 }, { "epoch": 3.7322150116321784, "grad_norm": 3.4687118530273438, "learning_rate": 1.1682646728741647e-05, "loss": 0.5293, "step": 22863 }, { "epoch": 3.732378270274683, "grad_norm": 2.5632436275482178, "learning_rate": 1.1682014725413859e-05, "loss": 0.4071, "step": 22864 }, { "epoch": 3.732541528917187, "grad_norm": 3.6561365127563477, "learning_rate": 1.1681382715171946e-05, "loss": 0.4871, "step": 22865 }, { "epoch": 3.7327047875596913, "grad_norm": 2.7542724609375, "learning_rate": 1.1680750698018503e-05, "loss": 0.3745, "step": 22866 }, { "epoch": 3.7328680462021957, "grad_norm": 3.311831474304199, "learning_rate": 1.168011867395613e-05, "loss": 0.4868, "step": 22867 }, { "epoch": 3.7330313048447, "grad_norm": 3.7838752269744873, "learning_rate": 1.167948664298742e-05, "loss": 0.509, "step": 22868 }, { "epoch": 3.7331945634872046, "grad_norm": 3.838348865509033, "learning_rate": 1.1678854605114978e-05, "loss": 0.4853, "step": 22869 }, { "epoch": 3.733357822129709, "grad_norm": 3.3487231731414795, "learning_rate": 1.1678222560341397e-05, "loss": 0.4629, "step": 22870 }, { "epoch": 3.7335210807722135, "grad_norm": 3.175469160079956, "learning_rate": 1.1677590508669276e-05, "loss": 0.4587, "step": 22871 }, { "epoch": 3.733684339414718, "grad_norm": 2.7486002445220947, "learning_rate": 1.1676958450101215e-05, "loss": 0.4229, "step": 22872 }, { "epoch": 3.7338475980572223, "grad_norm": 3.0683658123016357, "learning_rate": 1.1676326384639811e-05, "loss": 0.4088, "step": 22873 }, { "epoch": 3.7340108566997268, "grad_norm": 3.203730583190918, "learning_rate": 1.1675694312287662e-05, "loss": 0.4647, "step": 22874 }, { "epoch": 3.734174115342231, "grad_norm": 3.503610849380493, "learning_rate": 1.1675062233047365e-05, "loss": 0.5125, "step": 22875 }, { "epoch": 3.734337373984735, "grad_norm": 3.1846933364868164, "learning_rate": 1.167443014692152e-05, "loss": 0.5218, "step": 22876 }, { "epoch": 3.7345006326272396, "grad_norm": 2.940467357635498, "learning_rate": 1.1673798053912728e-05, "loss": 0.4724, "step": 22877 }, { "epoch": 3.734663891269744, "grad_norm": 3.071615695953369, "learning_rate": 1.1673165954023583e-05, "loss": 0.4494, "step": 22878 }, { "epoch": 3.7348271499122485, "grad_norm": 5.328210830688477, "learning_rate": 1.1672533847256682e-05, "loss": 0.5167, "step": 22879 }, { "epoch": 3.734990408554753, "grad_norm": 3.1990253925323486, "learning_rate": 1.1671901733614628e-05, "loss": 0.4683, "step": 22880 }, { "epoch": 3.7351536671972574, "grad_norm": 2.993757963180542, "learning_rate": 1.1671269613100016e-05, "loss": 0.4254, "step": 22881 }, { "epoch": 3.7353169258397614, "grad_norm": 3.1209309101104736, "learning_rate": 1.1670637485715446e-05, "loss": 0.4744, "step": 22882 }, { "epoch": 3.735480184482266, "grad_norm": 3.261796236038208, "learning_rate": 1.1670005351463518e-05, "loss": 0.4441, "step": 22883 }, { "epoch": 3.7356434431247703, "grad_norm": 3.392064332962036, "learning_rate": 1.1669373210346826e-05, "loss": 0.5489, "step": 22884 }, { "epoch": 3.7358067017672747, "grad_norm": 3.04836106300354, "learning_rate": 1.1668741062367972e-05, "loss": 0.4291, "step": 22885 }, { "epoch": 3.735969960409779, "grad_norm": 3.2435643672943115, "learning_rate": 1.1668108907529556e-05, "loss": 0.4631, "step": 22886 }, { "epoch": 3.7361332190522836, "grad_norm": 3.612046003341675, "learning_rate": 1.1667476745834173e-05, "loss": 0.5362, "step": 22887 }, { "epoch": 3.736296477694788, "grad_norm": 2.862278461456299, "learning_rate": 1.166684457728442e-05, "loss": 0.4101, "step": 22888 }, { "epoch": 3.7364597363372924, "grad_norm": 3.5514564514160156, "learning_rate": 1.16662124018829e-05, "loss": 0.5227, "step": 22889 }, { "epoch": 3.736622994979797, "grad_norm": 3.177337884902954, "learning_rate": 1.1665580219632209e-05, "loss": 0.4564, "step": 22890 }, { "epoch": 3.7367862536223013, "grad_norm": 3.5192863941192627, "learning_rate": 1.166494803053495e-05, "loss": 0.5765, "step": 22891 }, { "epoch": 3.7369495122648058, "grad_norm": 2.9762072563171387, "learning_rate": 1.1664315834593713e-05, "loss": 0.4947, "step": 22892 }, { "epoch": 3.7371127709073098, "grad_norm": 2.5802578926086426, "learning_rate": 1.1663683631811106e-05, "loss": 0.4185, "step": 22893 }, { "epoch": 3.737276029549814, "grad_norm": 2.8210976123809814, "learning_rate": 1.1663051422189725e-05, "loss": 0.4004, "step": 22894 }, { "epoch": 3.7374392881923186, "grad_norm": 2.610687017440796, "learning_rate": 1.1662419205732166e-05, "loss": 0.5134, "step": 22895 }, { "epoch": 3.737602546834823, "grad_norm": 3.062969923019409, "learning_rate": 1.1661786982441027e-05, "loss": 0.5235, "step": 22896 }, { "epoch": 3.7377658054773275, "grad_norm": 3.693546772003174, "learning_rate": 1.1661154752318913e-05, "loss": 0.5077, "step": 22897 }, { "epoch": 3.737929064119832, "grad_norm": 3.0234339237213135, "learning_rate": 1.1660522515368413e-05, "loss": 0.4495, "step": 22898 }, { "epoch": 3.7380923227623364, "grad_norm": 2.9992473125457764, "learning_rate": 1.1659890271592139e-05, "loss": 0.413, "step": 22899 }, { "epoch": 3.7382555814048404, "grad_norm": 3.3632869720458984, "learning_rate": 1.165925802099268e-05, "loss": 0.4933, "step": 22900 }, { "epoch": 3.738418840047345, "grad_norm": 3.1193439960479736, "learning_rate": 1.1658625763572637e-05, "loss": 0.458, "step": 22901 }, { "epoch": 3.7385820986898493, "grad_norm": 2.893000841140747, "learning_rate": 1.1657993499334613e-05, "loss": 0.4179, "step": 22902 }, { "epoch": 3.7387453573323537, "grad_norm": 2.365248918533325, "learning_rate": 1.1657361228281198e-05, "loss": 0.3711, "step": 22903 }, { "epoch": 3.738908615974858, "grad_norm": 3.4842283725738525, "learning_rate": 1.1656728950414999e-05, "loss": 0.5141, "step": 22904 }, { "epoch": 3.7390718746173626, "grad_norm": 3.4062676429748535, "learning_rate": 1.1656096665738617e-05, "loss": 0.4725, "step": 22905 }, { "epoch": 3.739235133259867, "grad_norm": 3.094895362854004, "learning_rate": 1.1655464374254644e-05, "loss": 0.498, "step": 22906 }, { "epoch": 3.7393983919023714, "grad_norm": 2.738527536392212, "learning_rate": 1.165483207596568e-05, "loss": 0.4236, "step": 22907 }, { "epoch": 3.739561650544876, "grad_norm": 3.4473283290863037, "learning_rate": 1.1654199770874328e-05, "loss": 0.4763, "step": 22908 }, { "epoch": 3.7397249091873803, "grad_norm": 3.645048141479492, "learning_rate": 1.1653567458983184e-05, "loss": 0.5011, "step": 22909 }, { "epoch": 3.7398881678298848, "grad_norm": 3.2885987758636475, "learning_rate": 1.1652935140294852e-05, "loss": 0.5533, "step": 22910 }, { "epoch": 3.7400514264723888, "grad_norm": 3.8638250827789307, "learning_rate": 1.1652302814811925e-05, "loss": 0.5197, "step": 22911 }, { "epoch": 3.740214685114893, "grad_norm": 3.309129238128662, "learning_rate": 1.1651670482537005e-05, "loss": 0.5239, "step": 22912 }, { "epoch": 3.7403779437573976, "grad_norm": 2.836646318435669, "learning_rate": 1.1651038143472693e-05, "loss": 0.3903, "step": 22913 }, { "epoch": 3.740541202399902, "grad_norm": 3.3951761722564697, "learning_rate": 1.1650405797621587e-05, "loss": 0.5553, "step": 22914 }, { "epoch": 3.7407044610424065, "grad_norm": 2.7276387214660645, "learning_rate": 1.1649773444986283e-05, "loss": 0.4114, "step": 22915 }, { "epoch": 3.740867719684911, "grad_norm": 3.5550341606140137, "learning_rate": 1.1649141085569385e-05, "loss": 0.4988, "step": 22916 }, { "epoch": 3.741030978327415, "grad_norm": 3.041595220565796, "learning_rate": 1.164850871937349e-05, "loss": 0.4827, "step": 22917 }, { "epoch": 3.7411942369699194, "grad_norm": 3.0788049697875977, "learning_rate": 1.1647876346401198e-05, "loss": 0.4544, "step": 22918 }, { "epoch": 3.741357495612424, "grad_norm": 3.5246822834014893, "learning_rate": 1.1647243966655111e-05, "loss": 0.5064, "step": 22919 }, { "epoch": 3.7415207542549282, "grad_norm": 3.1344316005706787, "learning_rate": 1.1646611580137824e-05, "loss": 0.4472, "step": 22920 }, { "epoch": 3.7416840128974327, "grad_norm": 2.6125144958496094, "learning_rate": 1.1645979186851939e-05, "loss": 0.4042, "step": 22921 }, { "epoch": 3.741847271539937, "grad_norm": 3.308046340942383, "learning_rate": 1.1645346786800054e-05, "loss": 0.4621, "step": 22922 }, { "epoch": 3.7420105301824416, "grad_norm": 3.225813627243042, "learning_rate": 1.1644714379984771e-05, "loss": 0.4571, "step": 22923 }, { "epoch": 3.742173788824946, "grad_norm": 3.3135390281677246, "learning_rate": 1.164408196640869e-05, "loss": 0.4822, "step": 22924 }, { "epoch": 3.7423370474674504, "grad_norm": 2.856374979019165, "learning_rate": 1.1643449546074406e-05, "loss": 0.424, "step": 22925 }, { "epoch": 3.742500306109955, "grad_norm": 2.9274024963378906, "learning_rate": 1.1642817118984522e-05, "loss": 0.4706, "step": 22926 }, { "epoch": 3.7426635647524593, "grad_norm": 3.268061637878418, "learning_rate": 1.1642184685141635e-05, "loss": 0.4552, "step": 22927 }, { "epoch": 3.7428268233949633, "grad_norm": 2.984788656234741, "learning_rate": 1.1641552244548348e-05, "loss": 0.401, "step": 22928 }, { "epoch": 3.7429900820374677, "grad_norm": 2.5789215564727783, "learning_rate": 1.1640919797207262e-05, "loss": 0.4447, "step": 22929 }, { "epoch": 3.743153340679972, "grad_norm": 3.3394088745117188, "learning_rate": 1.1640287343120975e-05, "loss": 0.4678, "step": 22930 }, { "epoch": 3.7433165993224766, "grad_norm": 3.1439342498779297, "learning_rate": 1.1639654882292082e-05, "loss": 0.5215, "step": 22931 }, { "epoch": 3.743479857964981, "grad_norm": 3.0891714096069336, "learning_rate": 1.163902241472319e-05, "loss": 0.4701, "step": 22932 }, { "epoch": 3.7436431166074855, "grad_norm": 3.433293581008911, "learning_rate": 1.1638389940416895e-05, "loss": 0.3736, "step": 22933 }, { "epoch": 3.74380637524999, "grad_norm": 3.421482801437378, "learning_rate": 1.1637757459375796e-05, "loss": 0.4799, "step": 22934 }, { "epoch": 3.743969633892494, "grad_norm": 3.7871060371398926, "learning_rate": 1.1637124971602498e-05, "loss": 0.4886, "step": 22935 }, { "epoch": 3.7441328925349984, "grad_norm": 3.54250168800354, "learning_rate": 1.1636492477099592e-05, "loss": 0.4803, "step": 22936 }, { "epoch": 3.744296151177503, "grad_norm": 4.052028179168701, "learning_rate": 1.1635859975869689e-05, "loss": 0.5598, "step": 22937 }, { "epoch": 3.7444594098200072, "grad_norm": 3.515043258666992, "learning_rate": 1.163522746791538e-05, "loss": 0.4863, "step": 22938 }, { "epoch": 3.7446226684625117, "grad_norm": 3.4355978965759277, "learning_rate": 1.1634594953239272e-05, "loss": 0.4945, "step": 22939 }, { "epoch": 3.744785927105016, "grad_norm": 3.477447271347046, "learning_rate": 1.1633962431843956e-05, "loss": 0.4359, "step": 22940 }, { "epoch": 3.7449491857475206, "grad_norm": 3.8660523891448975, "learning_rate": 1.163332990373204e-05, "loss": 0.591, "step": 22941 }, { "epoch": 3.745112444390025, "grad_norm": 3.4669253826141357, "learning_rate": 1.1632697368906121e-05, "loss": 0.4828, "step": 22942 }, { "epoch": 3.7452757030325294, "grad_norm": 3.5229098796844482, "learning_rate": 1.1632064827368801e-05, "loss": 0.5106, "step": 22943 }, { "epoch": 3.745438961675034, "grad_norm": 3.5407862663269043, "learning_rate": 1.1631432279122678e-05, "loss": 0.4893, "step": 22944 }, { "epoch": 3.7456022203175383, "grad_norm": 2.5701584815979004, "learning_rate": 1.1630799724170354e-05, "loss": 0.4073, "step": 22945 }, { "epoch": 3.7457654789600423, "grad_norm": 3.202051877975464, "learning_rate": 1.1630167162514425e-05, "loss": 0.4608, "step": 22946 }, { "epoch": 3.7459287376025467, "grad_norm": 3.931187391281128, "learning_rate": 1.1629534594157497e-05, "loss": 0.5159, "step": 22947 }, { "epoch": 3.746091996245051, "grad_norm": 2.6763083934783936, "learning_rate": 1.1628902019102167e-05, "loss": 0.4043, "step": 22948 }, { "epoch": 3.7462552548875556, "grad_norm": 2.338282346725464, "learning_rate": 1.1628269437351036e-05, "loss": 0.3881, "step": 22949 }, { "epoch": 3.74641851353006, "grad_norm": 2.512805223464966, "learning_rate": 1.1627636848906703e-05, "loss": 0.3669, "step": 22950 }, { "epoch": 3.7465817721725645, "grad_norm": 2.907639741897583, "learning_rate": 1.1627004253771772e-05, "loss": 0.4582, "step": 22951 }, { "epoch": 3.746745030815069, "grad_norm": 3.268355369567871, "learning_rate": 1.1626371651948839e-05, "loss": 0.5528, "step": 22952 }, { "epoch": 3.746908289457573, "grad_norm": 3.1225032806396484, "learning_rate": 1.1625739043440503e-05, "loss": 0.4586, "step": 22953 }, { "epoch": 3.7470715481000774, "grad_norm": 3.6204097270965576, "learning_rate": 1.1625106428249373e-05, "loss": 0.4983, "step": 22954 }, { "epoch": 3.747234806742582, "grad_norm": 3.111468553543091, "learning_rate": 1.1624473806378042e-05, "loss": 0.4646, "step": 22955 }, { "epoch": 3.7473980653850862, "grad_norm": 3.892843008041382, "learning_rate": 1.1623841177829113e-05, "loss": 0.5112, "step": 22956 }, { "epoch": 3.7475613240275907, "grad_norm": 2.6955854892730713, "learning_rate": 1.1623208542605187e-05, "loss": 0.3724, "step": 22957 }, { "epoch": 3.747724582670095, "grad_norm": 2.9187400341033936, "learning_rate": 1.162257590070886e-05, "loss": 0.3137, "step": 22958 }, { "epoch": 3.7478878413125996, "grad_norm": 2.8682825565338135, "learning_rate": 1.162194325214274e-05, "loss": 0.4643, "step": 22959 }, { "epoch": 3.748051099955104, "grad_norm": 3.1817679405212402, "learning_rate": 1.1621310596909423e-05, "loss": 0.4595, "step": 22960 }, { "epoch": 3.7482143585976084, "grad_norm": 3.4445183277130127, "learning_rate": 1.1620677935011509e-05, "loss": 0.5195, "step": 22961 }, { "epoch": 3.748377617240113, "grad_norm": 3.330784320831299, "learning_rate": 1.16200452664516e-05, "loss": 0.4197, "step": 22962 }, { "epoch": 3.7485408758826173, "grad_norm": 3.1101086139678955, "learning_rate": 1.1619412591232298e-05, "loss": 0.4149, "step": 22963 }, { "epoch": 3.7487041345251213, "grad_norm": 3.53068470954895, "learning_rate": 1.1618779909356202e-05, "loss": 0.5084, "step": 22964 }, { "epoch": 3.7488673931676257, "grad_norm": 3.704279899597168, "learning_rate": 1.1618147220825913e-05, "loss": 0.6058, "step": 22965 }, { "epoch": 3.74903065181013, "grad_norm": 3.542628288269043, "learning_rate": 1.161751452564403e-05, "loss": 0.4657, "step": 22966 }, { "epoch": 3.7491939104526346, "grad_norm": 4.2013258934021, "learning_rate": 1.1616881823813157e-05, "loss": 0.5103, "step": 22967 }, { "epoch": 3.749357169095139, "grad_norm": 3.446579694747925, "learning_rate": 1.1616249115335894e-05, "loss": 0.5201, "step": 22968 }, { "epoch": 3.7495204277376435, "grad_norm": 3.0029070377349854, "learning_rate": 1.161561640021484e-05, "loss": 0.4524, "step": 22969 }, { "epoch": 3.7496836863801475, "grad_norm": 3.420522451400757, "learning_rate": 1.1614983678452596e-05, "loss": 0.5277, "step": 22970 }, { "epoch": 3.749846945022652, "grad_norm": 2.6982972621917725, "learning_rate": 1.1614350950051768e-05, "loss": 0.426, "step": 22971 }, { "epoch": 3.7500102036651564, "grad_norm": 3.909926652908325, "learning_rate": 1.161371821501495e-05, "loss": 0.5158, "step": 22972 }, { "epoch": 3.750173462307661, "grad_norm": 3.476884603500366, "learning_rate": 1.1613085473344745e-05, "loss": 0.501, "step": 22973 }, { "epoch": 3.7503367209501652, "grad_norm": 2.96041202545166, "learning_rate": 1.1612452725043757e-05, "loss": 0.5074, "step": 22974 }, { "epoch": 3.7504999795926697, "grad_norm": 2.987745523452759, "learning_rate": 1.1611819970114581e-05, "loss": 0.5285, "step": 22975 }, { "epoch": 3.750663238235174, "grad_norm": 4.082622528076172, "learning_rate": 1.1611187208559824e-05, "loss": 0.6193, "step": 22976 }, { "epoch": 3.7508264968776786, "grad_norm": 3.0396196842193604, "learning_rate": 1.1610554440382085e-05, "loss": 0.4946, "step": 22977 }, { "epoch": 3.750989755520183, "grad_norm": 3.363384962081909, "learning_rate": 1.1609921665583965e-05, "loss": 0.4637, "step": 22978 }, { "epoch": 3.7511530141626874, "grad_norm": 3.438882350921631, "learning_rate": 1.1609288884168066e-05, "loss": 0.4943, "step": 22979 }, { "epoch": 3.751316272805192, "grad_norm": 2.5547521114349365, "learning_rate": 1.1608656096136985e-05, "loss": 0.3837, "step": 22980 }, { "epoch": 3.751479531447696, "grad_norm": 3.346174955368042, "learning_rate": 1.1608023301493328e-05, "loss": 0.4812, "step": 22981 }, { "epoch": 3.7516427900902003, "grad_norm": 2.9378085136413574, "learning_rate": 1.1607390500239694e-05, "loss": 0.3914, "step": 22982 }, { "epoch": 3.7518060487327047, "grad_norm": 3.000025510787964, "learning_rate": 1.1606757692378688e-05, "loss": 0.4312, "step": 22983 }, { "epoch": 3.751969307375209, "grad_norm": 3.765432119369507, "learning_rate": 1.1606124877912903e-05, "loss": 0.5822, "step": 22984 }, { "epoch": 3.7521325660177136, "grad_norm": 3.3728981018066406, "learning_rate": 1.1605492056844948e-05, "loss": 0.5176, "step": 22985 }, { "epoch": 3.752295824660218, "grad_norm": 3.1907010078430176, "learning_rate": 1.1604859229177416e-05, "loss": 0.4493, "step": 22986 }, { "epoch": 3.7524590833027225, "grad_norm": 4.353643894195557, "learning_rate": 1.1604226394912919e-05, "loss": 0.6196, "step": 22987 }, { "epoch": 3.7526223419452265, "grad_norm": 3.13503360748291, "learning_rate": 1.1603593554054054e-05, "loss": 0.4548, "step": 22988 }, { "epoch": 3.752785600587731, "grad_norm": 2.543182373046875, "learning_rate": 1.1602960706603419e-05, "loss": 0.435, "step": 22989 }, { "epoch": 3.7529488592302354, "grad_norm": 2.839663505554199, "learning_rate": 1.1602327852563618e-05, "loss": 0.3905, "step": 22990 }, { "epoch": 3.75311211787274, "grad_norm": 2.6456546783447266, "learning_rate": 1.1601694991937253e-05, "loss": 0.4022, "step": 22991 }, { "epoch": 3.7532753765152442, "grad_norm": 3.6057145595550537, "learning_rate": 1.160106212472692e-05, "loss": 0.5829, "step": 22992 }, { "epoch": 3.7534386351577487, "grad_norm": 3.1015195846557617, "learning_rate": 1.1600429250935233e-05, "loss": 0.4967, "step": 22993 }, { "epoch": 3.753601893800253, "grad_norm": 3.148345470428467, "learning_rate": 1.1599796370564783e-05, "loss": 0.5014, "step": 22994 }, { "epoch": 3.7537651524427575, "grad_norm": 3.3844311237335205, "learning_rate": 1.1599163483618172e-05, "loss": 0.4613, "step": 22995 }, { "epoch": 3.753928411085262, "grad_norm": 3.044172525405884, "learning_rate": 1.1598530590098003e-05, "loss": 0.3871, "step": 22996 }, { "epoch": 3.7540916697277664, "grad_norm": 3.2259092330932617, "learning_rate": 1.159789769000688e-05, "loss": 0.4792, "step": 22997 }, { "epoch": 3.754254928370271, "grad_norm": 3.0862903594970703, "learning_rate": 1.1597264783347404e-05, "loss": 0.583, "step": 22998 }, { "epoch": 3.754418187012775, "grad_norm": 3.1435399055480957, "learning_rate": 1.1596631870122174e-05, "loss": 0.5042, "step": 22999 }, { "epoch": 3.7545814456552793, "grad_norm": 3.6303775310516357, "learning_rate": 1.1595998950333794e-05, "loss": 0.9736, "step": 23000 }, { "epoch": 3.7547447042977837, "grad_norm": 3.5696399211883545, "learning_rate": 1.1595366023984864e-05, "loss": 0.5559, "step": 23001 }, { "epoch": 3.754907962940288, "grad_norm": 3.160451889038086, "learning_rate": 1.1594733091077988e-05, "loss": 0.4289, "step": 23002 }, { "epoch": 3.7550712215827926, "grad_norm": 3.421337604522705, "learning_rate": 1.1594100151615767e-05, "loss": 0.4182, "step": 23003 }, { "epoch": 3.755234480225297, "grad_norm": 2.992142915725708, "learning_rate": 1.1593467205600798e-05, "loss": 0.3933, "step": 23004 }, { "epoch": 3.755397738867801, "grad_norm": 2.6173880100250244, "learning_rate": 1.1592834253035692e-05, "loss": 0.4214, "step": 23005 }, { "epoch": 3.7555609975103055, "grad_norm": 3.0822865962982178, "learning_rate": 1.1592201293923045e-05, "loss": 0.4896, "step": 23006 }, { "epoch": 3.75572425615281, "grad_norm": 3.297405242919922, "learning_rate": 1.1591568328265459e-05, "loss": 0.4513, "step": 23007 }, { "epoch": 3.7558875147953144, "grad_norm": 3.5038623809814453, "learning_rate": 1.1590935356065536e-05, "loss": 0.4818, "step": 23008 }, { "epoch": 3.756050773437819, "grad_norm": 3.2057669162750244, "learning_rate": 1.1590302377325877e-05, "loss": 0.4448, "step": 23009 }, { "epoch": 3.7562140320803232, "grad_norm": 3.476142168045044, "learning_rate": 1.1589669392049088e-05, "loss": 0.5116, "step": 23010 }, { "epoch": 3.7563772907228277, "grad_norm": 3.5167174339294434, "learning_rate": 1.1589036400237767e-05, "loss": 0.5726, "step": 23011 }, { "epoch": 3.756540549365332, "grad_norm": 3.130751609802246, "learning_rate": 1.1588403401894518e-05, "loss": 0.4116, "step": 23012 }, { "epoch": 3.7567038080078365, "grad_norm": 3.1255784034729004, "learning_rate": 1.1587770397021943e-05, "loss": 0.4063, "step": 23013 }, { "epoch": 3.756867066650341, "grad_norm": 3.7651665210723877, "learning_rate": 1.1587137385622643e-05, "loss": 0.5877, "step": 23014 }, { "epoch": 3.7570303252928454, "grad_norm": 3.564509868621826, "learning_rate": 1.1586504367699219e-05, "loss": 0.5396, "step": 23015 }, { "epoch": 3.7571935839353494, "grad_norm": 3.5659735202789307, "learning_rate": 1.1585871343254275e-05, "loss": 0.5585, "step": 23016 }, { "epoch": 3.757356842577854, "grad_norm": 3.1205036640167236, "learning_rate": 1.1585238312290415e-05, "loss": 0.4251, "step": 23017 }, { "epoch": 3.7575201012203583, "grad_norm": 3.0658881664276123, "learning_rate": 1.1584605274810239e-05, "loss": 0.4758, "step": 23018 }, { "epoch": 3.7576833598628627, "grad_norm": 3.359565258026123, "learning_rate": 1.1583972230816346e-05, "loss": 0.4912, "step": 23019 }, { "epoch": 3.757846618505367, "grad_norm": 3.0931293964385986, "learning_rate": 1.1583339180311342e-05, "loss": 0.4087, "step": 23020 }, { "epoch": 3.7580098771478716, "grad_norm": 3.0292959213256836, "learning_rate": 1.1582706123297829e-05, "loss": 0.4536, "step": 23021 }, { "epoch": 3.758173135790376, "grad_norm": 3.269692897796631, "learning_rate": 1.158207305977841e-05, "loss": 0.5023, "step": 23022 }, { "epoch": 3.75833639443288, "grad_norm": 2.852524518966675, "learning_rate": 1.1581439989755685e-05, "loss": 0.4942, "step": 23023 }, { "epoch": 3.7584996530753845, "grad_norm": 3.429595708847046, "learning_rate": 1.1580806913232254e-05, "loss": 0.5205, "step": 23024 }, { "epoch": 3.758662911717889, "grad_norm": 3.635256052017212, "learning_rate": 1.1580173830210728e-05, "loss": 0.4815, "step": 23025 }, { "epoch": 3.7588261703603933, "grad_norm": 3.03078031539917, "learning_rate": 1.1579540740693703e-05, "loss": 0.4012, "step": 23026 }, { "epoch": 3.758989429002898, "grad_norm": 3.358778953552246, "learning_rate": 1.1578907644683779e-05, "loss": 0.5113, "step": 23027 }, { "epoch": 3.7591526876454022, "grad_norm": 3.2347118854522705, "learning_rate": 1.1578274542183568e-05, "loss": 0.4526, "step": 23028 }, { "epoch": 3.7593159462879067, "grad_norm": 3.7244396209716797, "learning_rate": 1.1577641433195661e-05, "loss": 0.4863, "step": 23029 }, { "epoch": 3.759479204930411, "grad_norm": 3.59452223777771, "learning_rate": 1.1577008317722666e-05, "loss": 0.4524, "step": 23030 }, { "epoch": 3.7596424635729155, "grad_norm": 3.2525503635406494, "learning_rate": 1.1576375195767186e-05, "loss": 0.4818, "step": 23031 }, { "epoch": 3.75980572221542, "grad_norm": 2.9206507205963135, "learning_rate": 1.1575742067331826e-05, "loss": 0.3993, "step": 23032 }, { "epoch": 3.7599689808579244, "grad_norm": 3.275388479232788, "learning_rate": 1.1575108932419183e-05, "loss": 0.4852, "step": 23033 }, { "epoch": 3.7601322395004284, "grad_norm": 3.023319721221924, "learning_rate": 1.1574475791031861e-05, "loss": 0.4598, "step": 23034 }, { "epoch": 3.760295498142933, "grad_norm": 3.3641421794891357, "learning_rate": 1.1573842643172463e-05, "loss": 0.5481, "step": 23035 }, { "epoch": 3.7604587567854373, "grad_norm": 3.823378562927246, "learning_rate": 1.1573209488843592e-05, "loss": 0.5184, "step": 23036 }, { "epoch": 3.7606220154279417, "grad_norm": 2.5851404666900635, "learning_rate": 1.1572576328047855e-05, "loss": 0.354, "step": 23037 }, { "epoch": 3.760785274070446, "grad_norm": 3.0685718059539795, "learning_rate": 1.1571943160787849e-05, "loss": 0.4917, "step": 23038 }, { "epoch": 3.7609485327129506, "grad_norm": 2.7583706378936768, "learning_rate": 1.1571309987066176e-05, "loss": 0.3976, "step": 23039 }, { "epoch": 3.761111791355455, "grad_norm": 3.4851701259613037, "learning_rate": 1.157067680688544e-05, "loss": 0.4777, "step": 23040 }, { "epoch": 3.761275049997959, "grad_norm": 3.328676700592041, "learning_rate": 1.1570043620248248e-05, "loss": 0.5351, "step": 23041 }, { "epoch": 3.7614383086404635, "grad_norm": 2.6733012199401855, "learning_rate": 1.15694104271572e-05, "loss": 0.4552, "step": 23042 }, { "epoch": 3.761601567282968, "grad_norm": 2.3997697830200195, "learning_rate": 1.1568777227614896e-05, "loss": 0.342, "step": 23043 }, { "epoch": 3.7617648259254723, "grad_norm": 3.532965898513794, "learning_rate": 1.1568144021623942e-05, "loss": 0.5153, "step": 23044 }, { "epoch": 3.761928084567977, "grad_norm": 3.6092281341552734, "learning_rate": 1.1567510809186939e-05, "loss": 0.5204, "step": 23045 }, { "epoch": 3.762091343210481, "grad_norm": 3.1097323894500732, "learning_rate": 1.1566877590306492e-05, "loss": 0.3863, "step": 23046 }, { "epoch": 3.7622546018529857, "grad_norm": 3.2711563110351562, "learning_rate": 1.1566244364985203e-05, "loss": 0.4512, "step": 23047 }, { "epoch": 3.76241786049549, "grad_norm": 3.79223370552063, "learning_rate": 1.1565611133225674e-05, "loss": 0.4767, "step": 23048 }, { "epoch": 3.7625811191379945, "grad_norm": 3.143838405609131, "learning_rate": 1.1564977895030509e-05, "loss": 0.3657, "step": 23049 }, { "epoch": 3.762744377780499, "grad_norm": 3.1297731399536133, "learning_rate": 1.156434465040231e-05, "loss": 0.4777, "step": 23050 }, { "epoch": 3.7629076364230034, "grad_norm": 3.0427920818328857, "learning_rate": 1.1563711399343682e-05, "loss": 0.3745, "step": 23051 }, { "epoch": 3.7630708950655074, "grad_norm": 3.5909488201141357, "learning_rate": 1.1563078141857227e-05, "loss": 0.5036, "step": 23052 }, { "epoch": 3.763234153708012, "grad_norm": 3.3826324939727783, "learning_rate": 1.1562444877945547e-05, "loss": 0.4596, "step": 23053 }, { "epoch": 3.7633974123505163, "grad_norm": 3.0207905769348145, "learning_rate": 1.1561811607611246e-05, "loss": 0.4602, "step": 23054 }, { "epoch": 3.7635606709930207, "grad_norm": 3.3629508018493652, "learning_rate": 1.1561178330856927e-05, "loss": 0.4264, "step": 23055 }, { "epoch": 3.763723929635525, "grad_norm": 3.3163137435913086, "learning_rate": 1.1560545047685197e-05, "loss": 0.4241, "step": 23056 }, { "epoch": 3.7638871882780296, "grad_norm": 3.0034546852111816, "learning_rate": 1.1559911758098651e-05, "loss": 0.4292, "step": 23057 }, { "epoch": 3.7640504469205336, "grad_norm": 3.36629581451416, "learning_rate": 1.15592784620999e-05, "loss": 0.4373, "step": 23058 }, { "epoch": 3.764213705563038, "grad_norm": 2.504873752593994, "learning_rate": 1.1558645159691541e-05, "loss": 0.361, "step": 23059 }, { "epoch": 3.7643769642055425, "grad_norm": 3.4855213165283203, "learning_rate": 1.1558011850876183e-05, "loss": 0.4424, "step": 23060 }, { "epoch": 3.764540222848047, "grad_norm": 3.1971139907836914, "learning_rate": 1.1557378535656425e-05, "loss": 0.4241, "step": 23061 }, { "epoch": 3.7647034814905513, "grad_norm": 2.7379636764526367, "learning_rate": 1.1556745214034874e-05, "loss": 0.3754, "step": 23062 }, { "epoch": 3.764866740133056, "grad_norm": 3.9457952976226807, "learning_rate": 1.1556111886014128e-05, "loss": 0.5079, "step": 23063 }, { "epoch": 3.76502999877556, "grad_norm": 4.248471260070801, "learning_rate": 1.1555478551596794e-05, "loss": 0.5953, "step": 23064 }, { "epoch": 3.7651932574180647, "grad_norm": 3.2287023067474365, "learning_rate": 1.1554845210785478e-05, "loss": 0.4271, "step": 23065 }, { "epoch": 3.765356516060569, "grad_norm": 3.473177909851074, "learning_rate": 1.1554211863582779e-05, "loss": 0.555, "step": 23066 }, { "epoch": 3.7655197747030735, "grad_norm": 2.676576852798462, "learning_rate": 1.1553578509991303e-05, "loss": 0.3584, "step": 23067 }, { "epoch": 3.765683033345578, "grad_norm": 3.2771718502044678, "learning_rate": 1.1552945150013652e-05, "loss": 0.4153, "step": 23068 }, { "epoch": 3.765846291988082, "grad_norm": 2.8562300205230713, "learning_rate": 1.1552311783652426e-05, "loss": 0.4539, "step": 23069 }, { "epoch": 3.7660095506305864, "grad_norm": 3.218348264694214, "learning_rate": 1.1551678410910236e-05, "loss": 0.4834, "step": 23070 }, { "epoch": 3.766172809273091, "grad_norm": 2.8326117992401123, "learning_rate": 1.155104503178968e-05, "loss": 0.4203, "step": 23071 }, { "epoch": 3.7663360679155953, "grad_norm": 3.0908255577087402, "learning_rate": 1.1550411646293366e-05, "loss": 0.4182, "step": 23072 }, { "epoch": 3.7664993265580997, "grad_norm": 3.5377864837646484, "learning_rate": 1.1549778254423895e-05, "loss": 0.4649, "step": 23073 }, { "epoch": 3.766662585200604, "grad_norm": 3.751195192337036, "learning_rate": 1.1549144856183867e-05, "loss": 0.4847, "step": 23074 }, { "epoch": 3.7668258438431086, "grad_norm": 3.2552356719970703, "learning_rate": 1.1548511451575892e-05, "loss": 0.4071, "step": 23075 }, { "epoch": 3.7669891024856126, "grad_norm": 3.4816296100616455, "learning_rate": 1.1547878040602573e-05, "loss": 0.491, "step": 23076 }, { "epoch": 3.767152361128117, "grad_norm": 3.2626821994781494, "learning_rate": 1.1547244623266511e-05, "loss": 0.4321, "step": 23077 }, { "epoch": 3.7673156197706215, "grad_norm": 3.551605224609375, "learning_rate": 1.1546611199570311e-05, "loss": 0.45, "step": 23078 }, { "epoch": 3.767478878413126, "grad_norm": 2.84184193611145, "learning_rate": 1.1545977769516574e-05, "loss": 0.3753, "step": 23079 }, { "epoch": 3.7676421370556303, "grad_norm": 3.8495402336120605, "learning_rate": 1.1545344333107904e-05, "loss": 0.5316, "step": 23080 }, { "epoch": 3.7678053956981348, "grad_norm": 2.9499778747558594, "learning_rate": 1.1544710890346913e-05, "loss": 0.4189, "step": 23081 }, { "epoch": 3.767968654340639, "grad_norm": 4.00391960144043, "learning_rate": 1.1544077441236197e-05, "loss": 0.5991, "step": 23082 }, { "epoch": 3.7681319129831436, "grad_norm": 3.4385337829589844, "learning_rate": 1.154344398577836e-05, "loss": 0.4667, "step": 23083 }, { "epoch": 3.768295171625648, "grad_norm": 3.9274063110351562, "learning_rate": 1.1542810523976007e-05, "loss": 0.5328, "step": 23084 }, { "epoch": 3.7684584302681525, "grad_norm": 3.2170097827911377, "learning_rate": 1.1542177055831745e-05, "loss": 0.4859, "step": 23085 }, { "epoch": 3.768621688910657, "grad_norm": 4.094166278839111, "learning_rate": 1.1541543581348174e-05, "loss": 0.5495, "step": 23086 }, { "epoch": 3.768784947553161, "grad_norm": 3.326895236968994, "learning_rate": 1.15409101005279e-05, "loss": 0.4359, "step": 23087 }, { "epoch": 3.7689482061956654, "grad_norm": 3.3632020950317383, "learning_rate": 1.1540276613373525e-05, "loss": 0.4428, "step": 23088 }, { "epoch": 3.76911146483817, "grad_norm": 3.538647413253784, "learning_rate": 1.1539643119887656e-05, "loss": 0.5211, "step": 23089 }, { "epoch": 3.7692747234806743, "grad_norm": 3.1112287044525146, "learning_rate": 1.1539009620072894e-05, "loss": 0.4081, "step": 23090 }, { "epoch": 3.7694379821231787, "grad_norm": 3.140604257583618, "learning_rate": 1.1538376113931846e-05, "loss": 0.4932, "step": 23091 }, { "epoch": 3.769601240765683, "grad_norm": 3.5850722789764404, "learning_rate": 1.1537742601467114e-05, "loss": 0.498, "step": 23092 }, { "epoch": 3.7697644994081876, "grad_norm": 2.994748592376709, "learning_rate": 1.1537109082681303e-05, "loss": 0.4527, "step": 23093 }, { "epoch": 3.7699277580506916, "grad_norm": 3.084238052368164, "learning_rate": 1.1536475557577016e-05, "loss": 0.6544, "step": 23094 }, { "epoch": 3.770091016693196, "grad_norm": 3.3374152183532715, "learning_rate": 1.153584202615686e-05, "loss": 0.4749, "step": 23095 }, { "epoch": 3.7702542753357005, "grad_norm": 2.8770751953125, "learning_rate": 1.1535208488423436e-05, "loss": 0.4038, "step": 23096 }, { "epoch": 3.770417533978205, "grad_norm": 3.4971609115600586, "learning_rate": 1.153457494437935e-05, "loss": 0.4656, "step": 23097 }, { "epoch": 3.7705807926207093, "grad_norm": 2.966829776763916, "learning_rate": 1.1533941394027207e-05, "loss": 0.4178, "step": 23098 }, { "epoch": 3.7707440512632138, "grad_norm": 2.936998128890991, "learning_rate": 1.1533307837369607e-05, "loss": 0.4118, "step": 23099 }, { "epoch": 3.770907309905718, "grad_norm": 2.749058485031128, "learning_rate": 1.1532674274409159e-05, "loss": 0.3594, "step": 23100 }, { "epoch": 3.7710705685482226, "grad_norm": 3.6752328872680664, "learning_rate": 1.1532040705148466e-05, "loss": 0.4975, "step": 23101 }, { "epoch": 3.771233827190727, "grad_norm": 3.555462598800659, "learning_rate": 1.1531407129590132e-05, "loss": 0.4656, "step": 23102 }, { "epoch": 3.7713970858332315, "grad_norm": 3.5052921772003174, "learning_rate": 1.1530773547736765e-05, "loss": 0.4788, "step": 23103 }, { "epoch": 3.771560344475736, "grad_norm": 2.91558837890625, "learning_rate": 1.153013995959096e-05, "loss": 0.4144, "step": 23104 }, { "epoch": 3.77172360311824, "grad_norm": 2.6447103023529053, "learning_rate": 1.1529506365155331e-05, "loss": 0.3731, "step": 23105 }, { "epoch": 3.7718868617607444, "grad_norm": 3.369180202484131, "learning_rate": 1.1528872764432482e-05, "loss": 0.4626, "step": 23106 }, { "epoch": 3.772050120403249, "grad_norm": 3.70898699760437, "learning_rate": 1.1528239157425008e-05, "loss": 0.5296, "step": 23107 }, { "epoch": 3.7722133790457533, "grad_norm": 3.6412763595581055, "learning_rate": 1.152760554413552e-05, "loss": 0.4485, "step": 23108 }, { "epoch": 3.7723766376882577, "grad_norm": 3.8073298931121826, "learning_rate": 1.1526971924566625e-05, "loss": 0.5136, "step": 23109 }, { "epoch": 3.772539896330762, "grad_norm": 3.967869520187378, "learning_rate": 1.1526338298720925e-05, "loss": 0.5664, "step": 23110 }, { "epoch": 3.772703154973266, "grad_norm": 2.9921281337738037, "learning_rate": 1.1525704666601028e-05, "loss": 0.4352, "step": 23111 }, { "epoch": 3.7728664136157706, "grad_norm": 3.2596840858459473, "learning_rate": 1.1525071028209531e-05, "loss": 0.5018, "step": 23112 }, { "epoch": 3.773029672258275, "grad_norm": 2.921196937561035, "learning_rate": 1.1524437383549042e-05, "loss": 0.418, "step": 23113 }, { "epoch": 3.7731929309007795, "grad_norm": 3.2004354000091553, "learning_rate": 1.1523803732622167e-05, "loss": 0.4392, "step": 23114 }, { "epoch": 3.773356189543284, "grad_norm": 3.533818006515503, "learning_rate": 1.152317007543151e-05, "loss": 0.5255, "step": 23115 }, { "epoch": 3.7735194481857883, "grad_norm": 4.045648574829102, "learning_rate": 1.1522536411979682e-05, "loss": 0.5652, "step": 23116 }, { "epoch": 3.7736827068282928, "grad_norm": 3.2631924152374268, "learning_rate": 1.1521902742269276e-05, "loss": 0.483, "step": 23117 }, { "epoch": 3.773845965470797, "grad_norm": 3.3135526180267334, "learning_rate": 1.1521269066302901e-05, "loss": 0.4977, "step": 23118 }, { "epoch": 3.7740092241133016, "grad_norm": 3.0491061210632324, "learning_rate": 1.1520635384083165e-05, "loss": 0.4452, "step": 23119 }, { "epoch": 3.774172482755806, "grad_norm": 2.8163950443267822, "learning_rate": 1.1520001695612675e-05, "loss": 0.4363, "step": 23120 }, { "epoch": 3.7743357413983105, "grad_norm": 3.206596851348877, "learning_rate": 1.1519368000894027e-05, "loss": 0.4664, "step": 23121 }, { "epoch": 3.7744990000408145, "grad_norm": 3.680506467819214, "learning_rate": 1.1518734299929832e-05, "loss": 0.601, "step": 23122 }, { "epoch": 3.774662258683319, "grad_norm": 2.9191172122955322, "learning_rate": 1.1518100592722696e-05, "loss": 0.4167, "step": 23123 }, { "epoch": 3.7748255173258234, "grad_norm": 3.522272825241089, "learning_rate": 1.1517466879275217e-05, "loss": 0.5024, "step": 23124 }, { "epoch": 3.774988775968328, "grad_norm": 2.695448875427246, "learning_rate": 1.1516833159590011e-05, "loss": 0.4388, "step": 23125 }, { "epoch": 3.7751520346108323, "grad_norm": 3.4280638694763184, "learning_rate": 1.1516199433669672e-05, "loss": 0.563, "step": 23126 }, { "epoch": 3.7753152932533367, "grad_norm": 3.487703561782837, "learning_rate": 1.1515565701516812e-05, "loss": 0.583, "step": 23127 }, { "epoch": 3.775478551895841, "grad_norm": 3.1625783443450928, "learning_rate": 1.151493196313403e-05, "loss": 0.4654, "step": 23128 }, { "epoch": 3.775641810538345, "grad_norm": 3.2970314025878906, "learning_rate": 1.1514298218523938e-05, "loss": 0.5211, "step": 23129 }, { "epoch": 3.7758050691808496, "grad_norm": 3.6992695331573486, "learning_rate": 1.1513664467689138e-05, "loss": 0.5473, "step": 23130 }, { "epoch": 3.775968327823354, "grad_norm": 3.998058319091797, "learning_rate": 1.1513030710632233e-05, "loss": 0.6059, "step": 23131 }, { "epoch": 3.7761315864658584, "grad_norm": 3.3703508377075195, "learning_rate": 1.1512396947355834e-05, "loss": 0.5089, "step": 23132 }, { "epoch": 3.776294845108363, "grad_norm": 3.298457145690918, "learning_rate": 1.1511763177862539e-05, "loss": 0.5052, "step": 23133 }, { "epoch": 3.7764581037508673, "grad_norm": 3.5142741203308105, "learning_rate": 1.1511129402154955e-05, "loss": 0.4507, "step": 23134 }, { "epoch": 3.7766213623933718, "grad_norm": 3.497089147567749, "learning_rate": 1.151049562023569e-05, "loss": 0.4693, "step": 23135 }, { "epoch": 3.776784621035876, "grad_norm": 3.1763789653778076, "learning_rate": 1.1509861832107347e-05, "loss": 0.4844, "step": 23136 }, { "epoch": 3.7769478796783806, "grad_norm": 3.3916096687316895, "learning_rate": 1.1509228037772535e-05, "loss": 0.5738, "step": 23137 }, { "epoch": 3.777111138320885, "grad_norm": 3.0783348083496094, "learning_rate": 1.1508594237233854e-05, "loss": 0.462, "step": 23138 }, { "epoch": 3.7772743969633895, "grad_norm": 3.454641819000244, "learning_rate": 1.1507960430493914e-05, "loss": 0.5001, "step": 23139 }, { "epoch": 3.7774376556058935, "grad_norm": 3.3063666820526123, "learning_rate": 1.1507326617555315e-05, "loss": 0.4688, "step": 23140 }, { "epoch": 3.777600914248398, "grad_norm": 3.079188823699951, "learning_rate": 1.1506692798420667e-05, "loss": 0.4479, "step": 23141 }, { "epoch": 3.7777641728909024, "grad_norm": 3.007321357727051, "learning_rate": 1.1506058973092575e-05, "loss": 0.4643, "step": 23142 }, { "epoch": 3.777927431533407, "grad_norm": 3.335157871246338, "learning_rate": 1.150542514157364e-05, "loss": 0.435, "step": 23143 }, { "epoch": 3.7780906901759113, "grad_norm": 3.08689284324646, "learning_rate": 1.1504791303866474e-05, "loss": 0.4571, "step": 23144 }, { "epoch": 3.7782539488184157, "grad_norm": 3.1366987228393555, "learning_rate": 1.1504157459973678e-05, "loss": 0.4746, "step": 23145 }, { "epoch": 3.7784172074609197, "grad_norm": 3.3815364837646484, "learning_rate": 1.1503523609897858e-05, "loss": 0.4263, "step": 23146 }, { "epoch": 3.778580466103424, "grad_norm": 2.959319829940796, "learning_rate": 1.1502889753641622e-05, "loss": 0.5311, "step": 23147 }, { "epoch": 3.7787437247459286, "grad_norm": 2.7540366649627686, "learning_rate": 1.1502255891207572e-05, "loss": 0.4669, "step": 23148 }, { "epoch": 3.778906983388433, "grad_norm": 2.91025447845459, "learning_rate": 1.1501622022598316e-05, "loss": 0.4329, "step": 23149 }, { "epoch": 3.7790702420309374, "grad_norm": 3.546990156173706, "learning_rate": 1.1500988147816461e-05, "loss": 0.5467, "step": 23150 }, { "epoch": 3.779233500673442, "grad_norm": 2.761263847351074, "learning_rate": 1.1500354266864605e-05, "loss": 0.4427, "step": 23151 }, { "epoch": 3.7793967593159463, "grad_norm": 2.881476402282715, "learning_rate": 1.1499720379745362e-05, "loss": 0.444, "step": 23152 }, { "epoch": 3.7795600179584508, "grad_norm": 3.5478744506835938, "learning_rate": 1.1499086486461335e-05, "loss": 0.5284, "step": 23153 }, { "epoch": 3.779723276600955, "grad_norm": 3.7622628211975098, "learning_rate": 1.1498452587015129e-05, "loss": 0.457, "step": 23154 }, { "epoch": 3.7798865352434596, "grad_norm": 3.3191018104553223, "learning_rate": 1.1497818681409352e-05, "loss": 0.42, "step": 23155 }, { "epoch": 3.780049793885964, "grad_norm": 3.319014072418213, "learning_rate": 1.1497184769646608e-05, "loss": 0.4953, "step": 23156 }, { "epoch": 3.780213052528468, "grad_norm": 3.7708051204681396, "learning_rate": 1.1496550851729499e-05, "loss": 0.5414, "step": 23157 }, { "epoch": 3.7803763111709725, "grad_norm": 2.8717379570007324, "learning_rate": 1.1495916927660638e-05, "loss": 0.4291, "step": 23158 }, { "epoch": 3.780539569813477, "grad_norm": 3.1600191593170166, "learning_rate": 1.1495282997442625e-05, "loss": 0.5057, "step": 23159 }, { "epoch": 3.7807028284559814, "grad_norm": 3.0140926837921143, "learning_rate": 1.1494649061078071e-05, "loss": 0.4794, "step": 23160 }, { "epoch": 3.780866087098486, "grad_norm": 2.946301221847534, "learning_rate": 1.149401511856958e-05, "loss": 0.4386, "step": 23161 }, { "epoch": 3.7810293457409903, "grad_norm": 2.7133212089538574, "learning_rate": 1.1493381169919751e-05, "loss": 0.4229, "step": 23162 }, { "epoch": 3.7811926043834947, "grad_norm": 3.810460090637207, "learning_rate": 1.14927472151312e-05, "loss": 0.5326, "step": 23163 }, { "epoch": 3.7813558630259987, "grad_norm": 3.468417167663574, "learning_rate": 1.1492113254206529e-05, "loss": 0.5179, "step": 23164 }, { "epoch": 3.781519121668503, "grad_norm": 3.5938117504119873, "learning_rate": 1.1491479287148344e-05, "loss": 0.5139, "step": 23165 }, { "epoch": 3.7816823803110076, "grad_norm": 3.0812458992004395, "learning_rate": 1.1490845313959248e-05, "loss": 0.4455, "step": 23166 }, { "epoch": 3.781845638953512, "grad_norm": 2.9912877082824707, "learning_rate": 1.1490211334641854e-05, "loss": 0.4512, "step": 23167 }, { "epoch": 3.7820088975960164, "grad_norm": 2.961057662963867, "learning_rate": 1.148957734919876e-05, "loss": 0.4246, "step": 23168 }, { "epoch": 3.782172156238521, "grad_norm": 3.881023645401001, "learning_rate": 1.1488943357632578e-05, "loss": 0.5392, "step": 23169 }, { "epoch": 3.7823354148810253, "grad_norm": 3.5002992153167725, "learning_rate": 1.1488309359945913e-05, "loss": 0.5485, "step": 23170 }, { "epoch": 3.7824986735235298, "grad_norm": 3.238285779953003, "learning_rate": 1.1487675356141367e-05, "loss": 0.5026, "step": 23171 }, { "epoch": 3.782661932166034, "grad_norm": 3.574751615524292, "learning_rate": 1.1487041346221552e-05, "loss": 0.5349, "step": 23172 }, { "epoch": 3.7828251908085386, "grad_norm": 3.2461800575256348, "learning_rate": 1.148640733018907e-05, "loss": 0.4711, "step": 23173 }, { "epoch": 3.782988449451043, "grad_norm": 3.2674691677093506, "learning_rate": 1.148577330804653e-05, "loss": 0.4882, "step": 23174 }, { "epoch": 3.783151708093547, "grad_norm": 2.7270777225494385, "learning_rate": 1.1485139279796537e-05, "loss": 0.4402, "step": 23175 }, { "epoch": 3.7833149667360515, "grad_norm": 3.744885206222534, "learning_rate": 1.1484505245441697e-05, "loss": 0.4157, "step": 23176 }, { "epoch": 3.783478225378556, "grad_norm": 3.8212099075317383, "learning_rate": 1.1483871204984615e-05, "loss": 0.4911, "step": 23177 }, { "epoch": 3.7836414840210604, "grad_norm": 3.383718252182007, "learning_rate": 1.14832371584279e-05, "loss": 0.4838, "step": 23178 }, { "epoch": 3.783804742663565, "grad_norm": 2.917121648788452, "learning_rate": 1.1482603105774156e-05, "loss": 0.4346, "step": 23179 }, { "epoch": 3.7839680013060693, "grad_norm": 3.6592490673065186, "learning_rate": 1.1481969047025993e-05, "loss": 0.4941, "step": 23180 }, { "epoch": 3.7841312599485737, "grad_norm": 3.1372323036193848, "learning_rate": 1.1481334982186012e-05, "loss": 0.4577, "step": 23181 }, { "epoch": 3.7842945185910777, "grad_norm": 3.257276773452759, "learning_rate": 1.1480700911256825e-05, "loss": 0.4689, "step": 23182 }, { "epoch": 3.784457777233582, "grad_norm": 3.3247127532958984, "learning_rate": 1.1480066834241034e-05, "loss": 0.5142, "step": 23183 }, { "epoch": 3.7846210358760866, "grad_norm": 3.2927966117858887, "learning_rate": 1.1479432751141247e-05, "loss": 0.4735, "step": 23184 }, { "epoch": 3.784784294518591, "grad_norm": 3.427504777908325, "learning_rate": 1.147879866196007e-05, "loss": 0.5424, "step": 23185 }, { "epoch": 3.7849475531610954, "grad_norm": 3.135429859161377, "learning_rate": 1.1478164566700112e-05, "loss": 0.4532, "step": 23186 }, { "epoch": 3.7851108118036, "grad_norm": 3.3124399185180664, "learning_rate": 1.1477530465363977e-05, "loss": 0.5018, "step": 23187 }, { "epoch": 3.7852740704461043, "grad_norm": 3.6916637420654297, "learning_rate": 1.1476896357954273e-05, "loss": 0.4992, "step": 23188 }, { "epoch": 3.7854373290886087, "grad_norm": 3.0641112327575684, "learning_rate": 1.1476262244473602e-05, "loss": 0.402, "step": 23189 }, { "epoch": 3.785600587731113, "grad_norm": 3.4398317337036133, "learning_rate": 1.1475628124924579e-05, "loss": 0.5097, "step": 23190 }, { "epoch": 3.7857638463736176, "grad_norm": 3.347337245941162, "learning_rate": 1.1474993999309803e-05, "loss": 0.4689, "step": 23191 }, { "epoch": 3.785927105016122, "grad_norm": 3.3380138874053955, "learning_rate": 1.1474359867631883e-05, "loss": 0.4736, "step": 23192 }, { "epoch": 3.786090363658626, "grad_norm": 2.734151840209961, "learning_rate": 1.1473725729893429e-05, "loss": 0.3951, "step": 23193 }, { "epoch": 3.7862536223011305, "grad_norm": 3.713081121444702, "learning_rate": 1.1473091586097045e-05, "loss": 0.5259, "step": 23194 }, { "epoch": 3.786416880943635, "grad_norm": 3.4154813289642334, "learning_rate": 1.1472457436245334e-05, "loss": 0.4729, "step": 23195 }, { "epoch": 3.7865801395861394, "grad_norm": 2.452410936355591, "learning_rate": 1.1471823280340911e-05, "loss": 0.3702, "step": 23196 }, { "epoch": 3.786743398228644, "grad_norm": 2.7758378982543945, "learning_rate": 1.1471189118386374e-05, "loss": 0.4348, "step": 23197 }, { "epoch": 3.7869066568711482, "grad_norm": 3.7967629432678223, "learning_rate": 1.1470554950384335e-05, "loss": 0.4872, "step": 23198 }, { "epoch": 3.7870699155136522, "grad_norm": 3.4733736515045166, "learning_rate": 1.1469920776337404e-05, "loss": 0.5139, "step": 23199 }, { "epoch": 3.7872331741561567, "grad_norm": 3.0341403484344482, "learning_rate": 1.1469286596248181e-05, "loss": 0.4722, "step": 23200 }, { "epoch": 3.787396432798661, "grad_norm": 2.8978233337402344, "learning_rate": 1.1468652410119275e-05, "loss": 0.4566, "step": 23201 }, { "epoch": 3.7875596914411656, "grad_norm": 3.1607108116149902, "learning_rate": 1.1468018217953292e-05, "loss": 0.4536, "step": 23202 }, { "epoch": 3.78772295008367, "grad_norm": 3.386359691619873, "learning_rate": 1.1467384019752843e-05, "loss": 0.4551, "step": 23203 }, { "epoch": 3.7878862087261744, "grad_norm": 3.560054302215576, "learning_rate": 1.1466749815520535e-05, "loss": 0.5015, "step": 23204 }, { "epoch": 3.788049467368679, "grad_norm": 3.3043346405029297, "learning_rate": 1.1466115605258968e-05, "loss": 0.532, "step": 23205 }, { "epoch": 3.7882127260111833, "grad_norm": 3.4416868686676025, "learning_rate": 1.1465481388970752e-05, "loss": 0.4531, "step": 23206 }, { "epoch": 3.7883759846536877, "grad_norm": 3.346748113632202, "learning_rate": 1.1464847166658497e-05, "loss": 0.4634, "step": 23207 }, { "epoch": 3.788539243296192, "grad_norm": 3.0302231311798096, "learning_rate": 1.146421293832481e-05, "loss": 0.4377, "step": 23208 }, { "epoch": 3.7887025019386966, "grad_norm": 2.9667704105377197, "learning_rate": 1.1463578703972297e-05, "loss": 0.4637, "step": 23209 }, { "epoch": 3.7888657605812006, "grad_norm": 3.424633502960205, "learning_rate": 1.1462944463603565e-05, "loss": 0.4829, "step": 23210 }, { "epoch": 3.789029019223705, "grad_norm": 2.4391040802001953, "learning_rate": 1.1462310217221218e-05, "loss": 0.3663, "step": 23211 }, { "epoch": 3.7891922778662095, "grad_norm": 3.760333299636841, "learning_rate": 1.1461675964827863e-05, "loss": 0.5595, "step": 23212 }, { "epoch": 3.789355536508714, "grad_norm": 3.017585277557373, "learning_rate": 1.1461041706426116e-05, "loss": 0.4606, "step": 23213 }, { "epoch": 3.7895187951512184, "grad_norm": 3.596808671951294, "learning_rate": 1.1460407442018577e-05, "loss": 0.5267, "step": 23214 }, { "epoch": 3.789682053793723, "grad_norm": 3.5455994606018066, "learning_rate": 1.1459773171607853e-05, "loss": 0.4516, "step": 23215 }, { "epoch": 3.7898453124362272, "grad_norm": 2.6706676483154297, "learning_rate": 1.1459138895196553e-05, "loss": 0.3988, "step": 23216 }, { "epoch": 3.7900085710787312, "grad_norm": 3.6258795261383057, "learning_rate": 1.1458504612787285e-05, "loss": 0.5173, "step": 23217 }, { "epoch": 3.7901718297212357, "grad_norm": 2.9176764488220215, "learning_rate": 1.1457870324382653e-05, "loss": 0.494, "step": 23218 }, { "epoch": 3.79033508836374, "grad_norm": 3.243410587310791, "learning_rate": 1.1457236029985267e-05, "loss": 0.4715, "step": 23219 }, { "epoch": 3.7904983470062446, "grad_norm": 3.863229513168335, "learning_rate": 1.1456601729597735e-05, "loss": 0.4863, "step": 23220 }, { "epoch": 3.790661605648749, "grad_norm": 3.3288979530334473, "learning_rate": 1.1455967423222665e-05, "loss": 0.4794, "step": 23221 }, { "epoch": 3.7908248642912534, "grad_norm": 3.59124493598938, "learning_rate": 1.1455333110862661e-05, "loss": 0.4786, "step": 23222 }, { "epoch": 3.790988122933758, "grad_norm": 3.4902796745300293, "learning_rate": 1.1454698792520331e-05, "loss": 0.4975, "step": 23223 }, { "epoch": 3.7911513815762623, "grad_norm": 3.1320693492889404, "learning_rate": 1.1454064468198284e-05, "loss": 0.4792, "step": 23224 }, { "epoch": 3.7913146402187667, "grad_norm": 2.814318895339966, "learning_rate": 1.1453430137899129e-05, "loss": 0.4689, "step": 23225 }, { "epoch": 3.791477898861271, "grad_norm": 3.8615851402282715, "learning_rate": 1.1452795801625469e-05, "loss": 0.5381, "step": 23226 }, { "epoch": 3.7916411575037756, "grad_norm": 3.5632739067077637, "learning_rate": 1.1452161459379917e-05, "loss": 0.5146, "step": 23227 }, { "epoch": 3.7918044161462796, "grad_norm": 3.186448335647583, "learning_rate": 1.1451527111165074e-05, "loss": 0.47, "step": 23228 }, { "epoch": 3.791967674788784, "grad_norm": 3.826368570327759, "learning_rate": 1.1450892756983554e-05, "loss": 0.5319, "step": 23229 }, { "epoch": 3.7921309334312885, "grad_norm": 2.7271721363067627, "learning_rate": 1.145025839683796e-05, "loss": 0.3815, "step": 23230 }, { "epoch": 3.792294192073793, "grad_norm": 3.252727508544922, "learning_rate": 1.1449624030730901e-05, "loss": 0.5251, "step": 23231 }, { "epoch": 3.7924574507162974, "grad_norm": 2.7604925632476807, "learning_rate": 1.1448989658664986e-05, "loss": 0.4271, "step": 23232 }, { "epoch": 3.792620709358802, "grad_norm": 3.2241485118865967, "learning_rate": 1.1448355280642823e-05, "loss": 0.4715, "step": 23233 }, { "epoch": 3.792783968001306, "grad_norm": 3.3203132152557373, "learning_rate": 1.1447720896667017e-05, "loss": 0.4457, "step": 23234 }, { "epoch": 3.7929472266438102, "grad_norm": 3.0718493461608887, "learning_rate": 1.1447086506740176e-05, "loss": 0.4556, "step": 23235 }, { "epoch": 3.7931104852863147, "grad_norm": 3.185969829559326, "learning_rate": 1.1446452110864907e-05, "loss": 0.487, "step": 23236 }, { "epoch": 3.793273743928819, "grad_norm": 3.282891035079956, "learning_rate": 1.1445817709043822e-05, "loss": 0.4161, "step": 23237 }, { "epoch": 3.7934370025713235, "grad_norm": 3.262314796447754, "learning_rate": 1.144518330127953e-05, "loss": 0.5098, "step": 23238 }, { "epoch": 3.793600261213828, "grad_norm": 3.4973337650299072, "learning_rate": 1.1444548887574629e-05, "loss": 0.4877, "step": 23239 }, { "epoch": 3.7937635198563324, "grad_norm": 3.3450746536254883, "learning_rate": 1.1443914467931736e-05, "loss": 0.4926, "step": 23240 }, { "epoch": 3.793926778498837, "grad_norm": 3.608353853225708, "learning_rate": 1.1443280042353455e-05, "loss": 0.4617, "step": 23241 }, { "epoch": 3.7940900371413413, "grad_norm": 3.2274930477142334, "learning_rate": 1.1442645610842395e-05, "loss": 0.4969, "step": 23242 }, { "epoch": 3.7942532957838457, "grad_norm": 3.757342576980591, "learning_rate": 1.1442011173401166e-05, "loss": 0.5497, "step": 23243 }, { "epoch": 3.79441655442635, "grad_norm": 3.175591230392456, "learning_rate": 1.1441376730032372e-05, "loss": 0.522, "step": 23244 }, { "epoch": 3.794579813068854, "grad_norm": 3.503873348236084, "learning_rate": 1.144074228073862e-05, "loss": 0.517, "step": 23245 }, { "epoch": 3.7947430717113586, "grad_norm": 3.0048489570617676, "learning_rate": 1.1440107825522522e-05, "loss": 0.4632, "step": 23246 }, { "epoch": 3.794906330353863, "grad_norm": 3.7050366401672363, "learning_rate": 1.1439473364386685e-05, "loss": 0.4854, "step": 23247 }, { "epoch": 3.7950695889963675, "grad_norm": 2.9071788787841797, "learning_rate": 1.1438838897333718e-05, "loss": 0.4016, "step": 23248 }, { "epoch": 3.795232847638872, "grad_norm": 3.5872368812561035, "learning_rate": 1.1438204424366227e-05, "loss": 0.4863, "step": 23249 }, { "epoch": 3.7953961062813764, "grad_norm": 3.2793948650360107, "learning_rate": 1.143756994548682e-05, "loss": 0.4442, "step": 23250 }, { "epoch": 3.795559364923881, "grad_norm": 3.2233779430389404, "learning_rate": 1.1436935460698103e-05, "loss": 0.5049, "step": 23251 }, { "epoch": 3.795722623566385, "grad_norm": 2.7356157302856445, "learning_rate": 1.143630097000269e-05, "loss": 0.3923, "step": 23252 }, { "epoch": 3.7958858822088892, "grad_norm": 2.943103790283203, "learning_rate": 1.1435666473403187e-05, "loss": 0.4216, "step": 23253 }, { "epoch": 3.7960491408513937, "grad_norm": 3.0568525791168213, "learning_rate": 1.14350319709022e-05, "loss": 0.4638, "step": 23254 }, { "epoch": 3.796212399493898, "grad_norm": 3.0783653259277344, "learning_rate": 1.1434397462502336e-05, "loss": 0.4754, "step": 23255 }, { "epoch": 3.7963756581364025, "grad_norm": 3.2453885078430176, "learning_rate": 1.1433762948206206e-05, "loss": 0.4404, "step": 23256 }, { "epoch": 3.796538916778907, "grad_norm": 2.2909340858459473, "learning_rate": 1.1433128428016421e-05, "loss": 0.3591, "step": 23257 }, { "epoch": 3.7967021754214114, "grad_norm": 2.8424298763275146, "learning_rate": 1.1432493901935587e-05, "loss": 0.4158, "step": 23258 }, { "epoch": 3.796865434063916, "grad_norm": 3.0953187942504883, "learning_rate": 1.1431859369966308e-05, "loss": 0.4394, "step": 23259 }, { "epoch": 3.7970286927064203, "grad_norm": 2.9952008724212646, "learning_rate": 1.1431224832111197e-05, "loss": 0.4644, "step": 23260 }, { "epoch": 3.7971919513489247, "grad_norm": 4.048952102661133, "learning_rate": 1.1430590288372861e-05, "loss": 0.5056, "step": 23261 }, { "epoch": 3.797355209991429, "grad_norm": 2.8114593029022217, "learning_rate": 1.1429955738753908e-05, "loss": 0.3981, "step": 23262 }, { "epoch": 3.797518468633933, "grad_norm": 3.3479747772216797, "learning_rate": 1.1429321183256947e-05, "loss": 0.5149, "step": 23263 }, { "epoch": 3.7976817272764376, "grad_norm": 2.4193150997161865, "learning_rate": 1.1428686621884588e-05, "loss": 0.385, "step": 23264 }, { "epoch": 3.797844985918942, "grad_norm": 3.5372207164764404, "learning_rate": 1.1428052054639436e-05, "loss": 0.5016, "step": 23265 }, { "epoch": 3.7980082445614465, "grad_norm": 3.410879373550415, "learning_rate": 1.1427417481524104e-05, "loss": 0.5282, "step": 23266 }, { "epoch": 3.798171503203951, "grad_norm": 3.7328176498413086, "learning_rate": 1.1426782902541194e-05, "loss": 0.5012, "step": 23267 }, { "epoch": 3.7983347618464554, "grad_norm": 3.2918734550476074, "learning_rate": 1.1426148317693319e-05, "loss": 0.4871, "step": 23268 }, { "epoch": 3.79849802048896, "grad_norm": 3.436224937438965, "learning_rate": 1.1425513726983088e-05, "loss": 0.5016, "step": 23269 }, { "epoch": 3.798661279131464, "grad_norm": 3.081299304962158, "learning_rate": 1.142487913041311e-05, "loss": 0.4638, "step": 23270 }, { "epoch": 3.7988245377739682, "grad_norm": 4.104404926300049, "learning_rate": 1.1424244527985989e-05, "loss": 0.5031, "step": 23271 }, { "epoch": 3.7989877964164727, "grad_norm": 3.2108652591705322, "learning_rate": 1.1423609919704337e-05, "loss": 0.4619, "step": 23272 }, { "epoch": 3.799151055058977, "grad_norm": 3.010507583618164, "learning_rate": 1.1422975305570763e-05, "loss": 0.4352, "step": 23273 }, { "epoch": 3.7993143137014815, "grad_norm": 3.4409313201904297, "learning_rate": 1.1422340685587873e-05, "loss": 0.5148, "step": 23274 }, { "epoch": 3.799477572343986, "grad_norm": 3.2863731384277344, "learning_rate": 1.142170605975828e-05, "loss": 0.5716, "step": 23275 }, { "epoch": 3.7996408309864904, "grad_norm": 3.252600908279419, "learning_rate": 1.1421071428084587e-05, "loss": 0.4677, "step": 23276 }, { "epoch": 3.799804089628995, "grad_norm": 2.7124266624450684, "learning_rate": 1.1420436790569411e-05, "loss": 0.407, "step": 23277 }, { "epoch": 3.7999673482714993, "grad_norm": 2.8219246864318848, "learning_rate": 1.141980214721535e-05, "loss": 0.4616, "step": 23278 }, { "epoch": 3.8001306069140037, "grad_norm": 3.317621946334839, "learning_rate": 1.141916749802502e-05, "loss": 0.5126, "step": 23279 }, { "epoch": 3.800293865556508, "grad_norm": 3.665595054626465, "learning_rate": 1.1418532843001032e-05, "loss": 0.6152, "step": 23280 }, { "epoch": 3.800457124199012, "grad_norm": 3.7742793560028076, "learning_rate": 1.1417898182145988e-05, "loss": 0.5255, "step": 23281 }, { "epoch": 3.8006203828415166, "grad_norm": 2.970231294631958, "learning_rate": 1.1417263515462502e-05, "loss": 0.4143, "step": 23282 }, { "epoch": 3.800783641484021, "grad_norm": 3.1420185565948486, "learning_rate": 1.1416628842953178e-05, "loss": 0.55, "step": 23283 }, { "epoch": 3.8009469001265255, "grad_norm": 3.31068754196167, "learning_rate": 1.141599416462063e-05, "loss": 0.4823, "step": 23284 }, { "epoch": 3.80111015876903, "grad_norm": 3.2578988075256348, "learning_rate": 1.1415359480467463e-05, "loss": 0.4525, "step": 23285 }, { "epoch": 3.8012734174115343, "grad_norm": 3.2555363178253174, "learning_rate": 1.1414724790496288e-05, "loss": 0.549, "step": 23286 }, { "epoch": 3.8014366760540383, "grad_norm": 3.0305497646331787, "learning_rate": 1.1414090094709716e-05, "loss": 0.4965, "step": 23287 }, { "epoch": 3.801599934696543, "grad_norm": 2.634877920150757, "learning_rate": 1.1413455393110351e-05, "loss": 0.4331, "step": 23288 }, { "epoch": 3.801763193339047, "grad_norm": 3.0839314460754395, "learning_rate": 1.1412820685700803e-05, "loss": 0.4207, "step": 23289 }, { "epoch": 3.8019264519815517, "grad_norm": 3.4819743633270264, "learning_rate": 1.1412185972483685e-05, "loss": 0.4914, "step": 23290 }, { "epoch": 3.802089710624056, "grad_norm": 3.558697462081909, "learning_rate": 1.1411551253461603e-05, "loss": 0.5011, "step": 23291 }, { "epoch": 3.8022529692665605, "grad_norm": 2.8823723793029785, "learning_rate": 1.141091652863717e-05, "loss": 0.4195, "step": 23292 }, { "epoch": 3.802416227909065, "grad_norm": 2.765558958053589, "learning_rate": 1.141028179801299e-05, "loss": 0.4319, "step": 23293 }, { "epoch": 3.8025794865515694, "grad_norm": 3.3384313583374023, "learning_rate": 1.1409647061591674e-05, "loss": 0.4417, "step": 23294 }, { "epoch": 3.802742745194074, "grad_norm": 3.261752128601074, "learning_rate": 1.1409012319375828e-05, "loss": 0.4862, "step": 23295 }, { "epoch": 3.8029060038365783, "grad_norm": 2.934488296508789, "learning_rate": 1.1408377571368066e-05, "loss": 0.419, "step": 23296 }, { "epoch": 3.8030692624790827, "grad_norm": 3.177093982696533, "learning_rate": 1.1407742817571e-05, "loss": 0.5007, "step": 23297 }, { "epoch": 3.8032325211215867, "grad_norm": 3.261331558227539, "learning_rate": 1.1407108057987228e-05, "loss": 0.4865, "step": 23298 }, { "epoch": 3.803395779764091, "grad_norm": 3.129502058029175, "learning_rate": 1.1406473292619371e-05, "loss": 0.504, "step": 23299 }, { "epoch": 3.8035590384065956, "grad_norm": 3.0761170387268066, "learning_rate": 1.140583852147003e-05, "loss": 0.4972, "step": 23300 }, { "epoch": 3.8037222970491, "grad_norm": 3.3374555110931396, "learning_rate": 1.1405203744541819e-05, "loss": 0.444, "step": 23301 }, { "epoch": 3.8038855556916045, "grad_norm": 3.157207727432251, "learning_rate": 1.1404568961837348e-05, "loss": 0.4304, "step": 23302 }, { "epoch": 3.804048814334109, "grad_norm": 3.453618049621582, "learning_rate": 1.1403934173359224e-05, "loss": 0.4851, "step": 23303 }, { "epoch": 3.8042120729766133, "grad_norm": 3.5209553241729736, "learning_rate": 1.1403299379110053e-05, "loss": 0.4614, "step": 23304 }, { "epoch": 3.8043753316191173, "grad_norm": 2.97568416595459, "learning_rate": 1.1402664579092453e-05, "loss": 0.4345, "step": 23305 }, { "epoch": 3.8045385902616218, "grad_norm": 3.3825316429138184, "learning_rate": 1.1402029773309025e-05, "loss": 0.5368, "step": 23306 }, { "epoch": 3.804701848904126, "grad_norm": 3.4866631031036377, "learning_rate": 1.1401394961762382e-05, "loss": 0.4682, "step": 23307 }, { "epoch": 3.8048651075466307, "grad_norm": 2.7616796493530273, "learning_rate": 1.1400760144455137e-05, "loss": 0.4044, "step": 23308 }, { "epoch": 3.805028366189135, "grad_norm": 4.3352484703063965, "learning_rate": 1.1400125321389892e-05, "loss": 0.4784, "step": 23309 }, { "epoch": 3.8051916248316395, "grad_norm": 3.345616579055786, "learning_rate": 1.1399490492569263e-05, "loss": 0.5069, "step": 23310 }, { "epoch": 3.805354883474144, "grad_norm": 3.0951719284057617, "learning_rate": 1.1398855657995855e-05, "loss": 0.4405, "step": 23311 }, { "epoch": 3.8055181421166484, "grad_norm": 2.9900145530700684, "learning_rate": 1.1398220817672282e-05, "loss": 0.4364, "step": 23312 }, { "epoch": 3.805681400759153, "grad_norm": 3.4905545711517334, "learning_rate": 1.1397585971601148e-05, "loss": 0.541, "step": 23313 }, { "epoch": 3.8058446594016573, "grad_norm": 3.0144591331481934, "learning_rate": 1.1396951119785069e-05, "loss": 0.4832, "step": 23314 }, { "epoch": 3.8060079180441617, "grad_norm": 3.5075979232788086, "learning_rate": 1.139631626222665e-05, "loss": 0.5175, "step": 23315 }, { "epoch": 3.8061711766866657, "grad_norm": 2.850292921066284, "learning_rate": 1.1395681398928503e-05, "loss": 0.4672, "step": 23316 }, { "epoch": 3.80633443532917, "grad_norm": 3.4076952934265137, "learning_rate": 1.1395046529893235e-05, "loss": 0.4436, "step": 23317 }, { "epoch": 3.8064976939716746, "grad_norm": 3.380481004714966, "learning_rate": 1.1394411655123458e-05, "loss": 0.4444, "step": 23318 }, { "epoch": 3.806660952614179, "grad_norm": 4.359226226806641, "learning_rate": 1.1393776774621781e-05, "loss": 0.5712, "step": 23319 }, { "epoch": 3.8068242112566835, "grad_norm": 2.9828155040740967, "learning_rate": 1.1393141888390815e-05, "loss": 0.414, "step": 23320 }, { "epoch": 3.806987469899188, "grad_norm": 2.936156749725342, "learning_rate": 1.139250699643317e-05, "loss": 0.4421, "step": 23321 }, { "epoch": 3.8071507285416923, "grad_norm": 3.08307147026062, "learning_rate": 1.1391872098751452e-05, "loss": 0.4451, "step": 23322 }, { "epoch": 3.8073139871841963, "grad_norm": 3.610842704772949, "learning_rate": 1.1391237195348273e-05, "loss": 0.4905, "step": 23323 }, { "epoch": 3.8074772458267008, "grad_norm": 3.1021032333374023, "learning_rate": 1.1390602286226244e-05, "loss": 0.4722, "step": 23324 }, { "epoch": 3.807640504469205, "grad_norm": 3.92445969581604, "learning_rate": 1.1389967371387975e-05, "loss": 0.5096, "step": 23325 }, { "epoch": 3.8078037631117096, "grad_norm": 3.4624557495117188, "learning_rate": 1.1389332450836077e-05, "loss": 0.46, "step": 23326 }, { "epoch": 3.807967021754214, "grad_norm": 3.2723171710968018, "learning_rate": 1.1388697524573154e-05, "loss": 0.4165, "step": 23327 }, { "epoch": 3.8081302803967185, "grad_norm": 3.4602057933807373, "learning_rate": 1.138806259260182e-05, "loss": 0.5099, "step": 23328 }, { "epoch": 3.808293539039223, "grad_norm": 2.5476863384246826, "learning_rate": 1.1387427654924685e-05, "loss": 0.3923, "step": 23329 }, { "epoch": 3.8084567976817274, "grad_norm": 2.2699496746063232, "learning_rate": 1.1386792711544361e-05, "loss": 0.3478, "step": 23330 }, { "epoch": 3.808620056324232, "grad_norm": 3.3565311431884766, "learning_rate": 1.1386157762463457e-05, "loss": 0.4075, "step": 23331 }, { "epoch": 3.8087833149667363, "grad_norm": 3.8491294384002686, "learning_rate": 1.1385522807684579e-05, "loss": 0.4948, "step": 23332 }, { "epoch": 3.8089465736092407, "grad_norm": 3.447542905807495, "learning_rate": 1.1384887847210337e-05, "loss": 0.4701, "step": 23333 }, { "epoch": 3.8091098322517447, "grad_norm": 3.6653213500976562, "learning_rate": 1.1384252881043347e-05, "loss": 0.4564, "step": 23334 }, { "epoch": 3.809273090894249, "grad_norm": 2.9532647132873535, "learning_rate": 1.1383617909186218e-05, "loss": 0.458, "step": 23335 }, { "epoch": 3.8094363495367536, "grad_norm": 3.057678699493408, "learning_rate": 1.1382982931641559e-05, "loss": 0.4062, "step": 23336 }, { "epoch": 3.809599608179258, "grad_norm": 2.837749481201172, "learning_rate": 1.1382347948411976e-05, "loss": 0.4514, "step": 23337 }, { "epoch": 3.8097628668217625, "grad_norm": 2.884763717651367, "learning_rate": 1.1381712959500084e-05, "loss": 0.4255, "step": 23338 }, { "epoch": 3.809926125464267, "grad_norm": 3.6494622230529785, "learning_rate": 1.1381077964908489e-05, "loss": 0.4945, "step": 23339 }, { "epoch": 3.810089384106771, "grad_norm": 3.6602063179016113, "learning_rate": 1.1380442964639806e-05, "loss": 0.5424, "step": 23340 }, { "epoch": 3.8102526427492753, "grad_norm": 3.2463228702545166, "learning_rate": 1.1379807958696647e-05, "loss": 0.4966, "step": 23341 }, { "epoch": 3.8104159013917798, "grad_norm": 3.448512077331543, "learning_rate": 1.1379172947081614e-05, "loss": 0.4957, "step": 23342 }, { "epoch": 3.810579160034284, "grad_norm": 3.055799722671509, "learning_rate": 1.1378537929797322e-05, "loss": 0.5106, "step": 23343 }, { "epoch": 3.8107424186767886, "grad_norm": 3.2254104614257812, "learning_rate": 1.137790290684638e-05, "loss": 0.4364, "step": 23344 }, { "epoch": 3.810905677319293, "grad_norm": 2.9002203941345215, "learning_rate": 1.1377267878231404e-05, "loss": 0.4066, "step": 23345 }, { "epoch": 3.8110689359617975, "grad_norm": 3.0336191654205322, "learning_rate": 1.1376632843954999e-05, "loss": 0.4973, "step": 23346 }, { "epoch": 3.811232194604302, "grad_norm": 3.8233416080474854, "learning_rate": 1.1375997804019772e-05, "loss": 0.6091, "step": 23347 }, { "epoch": 3.8113954532468064, "grad_norm": 2.818274736404419, "learning_rate": 1.1375362758428341e-05, "loss": 0.4107, "step": 23348 }, { "epoch": 3.811558711889311, "grad_norm": 3.5351476669311523, "learning_rate": 1.137472770718331e-05, "loss": 0.4844, "step": 23349 }, { "epoch": 3.8117219705318153, "grad_norm": 2.746422529220581, "learning_rate": 1.1374092650287297e-05, "loss": 0.3961, "step": 23350 }, { "epoch": 3.8118852291743193, "grad_norm": 3.5689704418182373, "learning_rate": 1.1373457587742904e-05, "loss": 0.4785, "step": 23351 }, { "epoch": 3.8120484878168237, "grad_norm": 3.393085241317749, "learning_rate": 1.1372822519552746e-05, "loss": 0.5061, "step": 23352 }, { "epoch": 3.812211746459328, "grad_norm": 3.0954198837280273, "learning_rate": 1.1372187445719435e-05, "loss": 0.4748, "step": 23353 }, { "epoch": 3.8123750051018326, "grad_norm": 3.4167299270629883, "learning_rate": 1.1371552366245578e-05, "loss": 0.4613, "step": 23354 }, { "epoch": 3.812538263744337, "grad_norm": 4.386897563934326, "learning_rate": 1.1370917281133789e-05, "loss": 0.5949, "step": 23355 }, { "epoch": 3.8127015223868415, "grad_norm": 3.7802765369415283, "learning_rate": 1.1370282190386675e-05, "loss": 0.5456, "step": 23356 }, { "epoch": 3.812864781029346, "grad_norm": 4.078874111175537, "learning_rate": 1.1369647094006848e-05, "loss": 0.5906, "step": 23357 }, { "epoch": 3.81302803967185, "grad_norm": 2.972954273223877, "learning_rate": 1.136901199199692e-05, "loss": 0.4666, "step": 23358 }, { "epoch": 3.8131912983143543, "grad_norm": 3.691157341003418, "learning_rate": 1.13683768843595e-05, "loss": 0.4863, "step": 23359 }, { "epoch": 3.8133545569568588, "grad_norm": 3.638882637023926, "learning_rate": 1.1367741771097197e-05, "loss": 0.578, "step": 23360 }, { "epoch": 3.813517815599363, "grad_norm": 2.9844038486480713, "learning_rate": 1.1367106652212626e-05, "loss": 0.4203, "step": 23361 }, { "epoch": 3.8136810742418676, "grad_norm": 3.0366928577423096, "learning_rate": 1.1366471527708398e-05, "loss": 0.3603, "step": 23362 }, { "epoch": 3.813844332884372, "grad_norm": 3.0094757080078125, "learning_rate": 1.136583639758712e-05, "loss": 0.4117, "step": 23363 }, { "epoch": 3.8140075915268765, "grad_norm": 3.615877866744995, "learning_rate": 1.1365201261851405e-05, "loss": 0.5074, "step": 23364 }, { "epoch": 3.814170850169381, "grad_norm": 3.2891082763671875, "learning_rate": 1.1364566120503864e-05, "loss": 0.4426, "step": 23365 }, { "epoch": 3.8143341088118854, "grad_norm": 3.72619366645813, "learning_rate": 1.1363930973547101e-05, "loss": 0.5145, "step": 23366 }, { "epoch": 3.81449736745439, "grad_norm": 3.539144992828369, "learning_rate": 1.1363295820983738e-05, "loss": 0.4889, "step": 23367 }, { "epoch": 3.8146606260968943, "grad_norm": 3.7823050022125244, "learning_rate": 1.1362660662816377e-05, "loss": 0.499, "step": 23368 }, { "epoch": 3.8148238847393983, "grad_norm": 3.3485913276672363, "learning_rate": 1.1362025499047637e-05, "loss": 0.5245, "step": 23369 }, { "epoch": 3.8149871433819027, "grad_norm": 3.3867907524108887, "learning_rate": 1.1361390329680124e-05, "loss": 0.4736, "step": 23370 }, { "epoch": 3.815150402024407, "grad_norm": 3.2346296310424805, "learning_rate": 1.1360755154716448e-05, "loss": 0.4775, "step": 23371 }, { "epoch": 3.8153136606669116, "grad_norm": 2.692107915878296, "learning_rate": 1.136011997415922e-05, "loss": 0.4077, "step": 23372 }, { "epoch": 3.815476919309416, "grad_norm": 3.3894002437591553, "learning_rate": 1.1359484788011053e-05, "loss": 0.564, "step": 23373 }, { "epoch": 3.8156401779519205, "grad_norm": 3.4638946056365967, "learning_rate": 1.135884959627456e-05, "loss": 0.464, "step": 23374 }, { "epoch": 3.8158034365944244, "grad_norm": 2.95524263381958, "learning_rate": 1.1358214398952348e-05, "loss": 0.4603, "step": 23375 }, { "epoch": 3.815966695236929, "grad_norm": 2.9519522190093994, "learning_rate": 1.1357579196047028e-05, "loss": 0.451, "step": 23376 }, { "epoch": 3.8161299538794333, "grad_norm": 3.07503080368042, "learning_rate": 1.1356943987561213e-05, "loss": 0.4929, "step": 23377 }, { "epoch": 3.8162932125219378, "grad_norm": 2.962567090988159, "learning_rate": 1.1356308773497513e-05, "loss": 0.3721, "step": 23378 }, { "epoch": 3.816456471164442, "grad_norm": 2.9899957180023193, "learning_rate": 1.1355673553858542e-05, "loss": 0.439, "step": 23379 }, { "epoch": 3.8166197298069466, "grad_norm": 3.0403964519500732, "learning_rate": 1.135503832864691e-05, "loss": 0.504, "step": 23380 }, { "epoch": 3.816782988449451, "grad_norm": 3.4553873538970947, "learning_rate": 1.1354403097865224e-05, "loss": 0.5094, "step": 23381 }, { "epoch": 3.8169462470919555, "grad_norm": 2.918562173843384, "learning_rate": 1.1353767861516099e-05, "loss": 0.3941, "step": 23382 }, { "epoch": 3.81710950573446, "grad_norm": 3.2285711765289307, "learning_rate": 1.1353132619602143e-05, "loss": 0.4931, "step": 23383 }, { "epoch": 3.8172727643769644, "grad_norm": 3.479140281677246, "learning_rate": 1.1352497372125972e-05, "loss": 0.5429, "step": 23384 }, { "epoch": 3.817436023019469, "grad_norm": 2.904447555541992, "learning_rate": 1.1351862119090198e-05, "loss": 0.4104, "step": 23385 }, { "epoch": 3.817599281661973, "grad_norm": 2.7947938442230225, "learning_rate": 1.1351226860497425e-05, "loss": 0.4508, "step": 23386 }, { "epoch": 3.8177625403044773, "grad_norm": 3.147939443588257, "learning_rate": 1.1350591596350272e-05, "loss": 0.4746, "step": 23387 }, { "epoch": 3.8179257989469817, "grad_norm": 3.0847363471984863, "learning_rate": 1.1349956326651344e-05, "loss": 0.3964, "step": 23388 }, { "epoch": 3.818089057589486, "grad_norm": 3.4445323944091797, "learning_rate": 1.1349321051403256e-05, "loss": 0.5123, "step": 23389 }, { "epoch": 3.8182523162319906, "grad_norm": 2.925628423690796, "learning_rate": 1.1348685770608621e-05, "loss": 0.4389, "step": 23390 }, { "epoch": 3.818415574874495, "grad_norm": 2.668478012084961, "learning_rate": 1.1348050484270046e-05, "loss": 0.3964, "step": 23391 }, { "epoch": 3.8185788335169994, "grad_norm": 2.5698177814483643, "learning_rate": 1.1347415192390146e-05, "loss": 0.4111, "step": 23392 }, { "epoch": 3.8187420921595034, "grad_norm": 3.2929999828338623, "learning_rate": 1.1346779894971526e-05, "loss": 0.4581, "step": 23393 }, { "epoch": 3.818905350802008, "grad_norm": 3.017808198928833, "learning_rate": 1.1346144592016807e-05, "loss": 0.4463, "step": 23394 }, { "epoch": 3.8190686094445123, "grad_norm": 3.0147242546081543, "learning_rate": 1.1345509283528595e-05, "loss": 0.4572, "step": 23395 }, { "epoch": 3.8192318680870168, "grad_norm": 3.5063822269439697, "learning_rate": 1.1344873969509502e-05, "loss": 0.5082, "step": 23396 }, { "epoch": 3.819395126729521, "grad_norm": 3.6590065956115723, "learning_rate": 1.1344238649962141e-05, "loss": 0.527, "step": 23397 }, { "epoch": 3.8195583853720256, "grad_norm": 2.967478036880493, "learning_rate": 1.134360332488912e-05, "loss": 0.4144, "step": 23398 }, { "epoch": 3.81972164401453, "grad_norm": 3.4550793170928955, "learning_rate": 1.1342967994293055e-05, "loss": 0.4379, "step": 23399 }, { "epoch": 3.8198849026570345, "grad_norm": 3.2810282707214355, "learning_rate": 1.1342332658176556e-05, "loss": 0.4637, "step": 23400 }, { "epoch": 3.820048161299539, "grad_norm": 3.355839252471924, "learning_rate": 1.1341697316542235e-05, "loss": 0.4047, "step": 23401 }, { "epoch": 3.8202114199420434, "grad_norm": 3.324037790298462, "learning_rate": 1.13410619693927e-05, "loss": 0.4438, "step": 23402 }, { "epoch": 3.820374678584548, "grad_norm": 3.490302801132202, "learning_rate": 1.134042661673057e-05, "loss": 0.489, "step": 23403 }, { "epoch": 3.820537937227052, "grad_norm": 3.5026373863220215, "learning_rate": 1.1339791258558448e-05, "loss": 0.4556, "step": 23404 }, { "epoch": 3.8207011958695563, "grad_norm": 3.4251904487609863, "learning_rate": 1.133915589487895e-05, "loss": 0.4692, "step": 23405 }, { "epoch": 3.8208644545120607, "grad_norm": 2.64054274559021, "learning_rate": 1.133852052569469e-05, "loss": 0.3706, "step": 23406 }, { "epoch": 3.821027713154565, "grad_norm": 3.002365827560425, "learning_rate": 1.1337885151008276e-05, "loss": 0.4704, "step": 23407 }, { "epoch": 3.8211909717970696, "grad_norm": 3.760749340057373, "learning_rate": 1.1337249770822323e-05, "loss": 0.4458, "step": 23408 }, { "epoch": 3.821354230439574, "grad_norm": 2.535613536834717, "learning_rate": 1.1336614385139441e-05, "loss": 0.3787, "step": 23409 }, { "epoch": 3.8215174890820784, "grad_norm": 3.3570737838745117, "learning_rate": 1.133597899396224e-05, "loss": 0.479, "step": 23410 }, { "epoch": 3.8216807477245824, "grad_norm": 3.3835995197296143, "learning_rate": 1.1335343597293335e-05, "loss": 0.4448, "step": 23411 }, { "epoch": 3.821844006367087, "grad_norm": 3.3918309211730957, "learning_rate": 1.1334708195135338e-05, "loss": 0.4505, "step": 23412 }, { "epoch": 3.8220072650095913, "grad_norm": 3.253993511199951, "learning_rate": 1.1334072787490857e-05, "loss": 0.4521, "step": 23413 }, { "epoch": 3.8221705236520958, "grad_norm": 3.3088154792785645, "learning_rate": 1.133343737436251e-05, "loss": 0.4675, "step": 23414 }, { "epoch": 3.8223337822946, "grad_norm": 2.7237982749938965, "learning_rate": 1.1332801955752901e-05, "loss": 0.39, "step": 23415 }, { "epoch": 3.8224970409371046, "grad_norm": 3.554030656814575, "learning_rate": 1.133216653166465e-05, "loss": 0.5284, "step": 23416 }, { "epoch": 3.822660299579609, "grad_norm": 3.2748680114746094, "learning_rate": 1.1331531102100363e-05, "loss": 0.4603, "step": 23417 }, { "epoch": 3.8228235582221135, "grad_norm": 3.4148404598236084, "learning_rate": 1.1330895667062656e-05, "loss": 0.5686, "step": 23418 }, { "epoch": 3.822986816864618, "grad_norm": 2.7706680297851562, "learning_rate": 1.1330260226554142e-05, "loss": 0.4362, "step": 23419 }, { "epoch": 3.8231500755071224, "grad_norm": 2.82122540473938, "learning_rate": 1.1329624780577427e-05, "loss": 0.4777, "step": 23420 }, { "epoch": 3.823313334149627, "grad_norm": 3.0417280197143555, "learning_rate": 1.1328989329135125e-05, "loss": 0.452, "step": 23421 }, { "epoch": 3.823476592792131, "grad_norm": 3.451733112335205, "learning_rate": 1.1328353872229851e-05, "loss": 0.5349, "step": 23422 }, { "epoch": 3.8236398514346353, "grad_norm": 3.626995325088501, "learning_rate": 1.1327718409864216e-05, "loss": 0.4622, "step": 23423 }, { "epoch": 3.8238031100771397, "grad_norm": 3.4974446296691895, "learning_rate": 1.1327082942040834e-05, "loss": 0.5873, "step": 23424 }, { "epoch": 3.823966368719644, "grad_norm": 3.151679277420044, "learning_rate": 1.1326447468762316e-05, "loss": 0.4048, "step": 23425 }, { "epoch": 3.8241296273621486, "grad_norm": 3.058439016342163, "learning_rate": 1.132581199003127e-05, "loss": 0.4379, "step": 23426 }, { "epoch": 3.824292886004653, "grad_norm": 3.7346572875976562, "learning_rate": 1.1325176505850311e-05, "loss": 0.499, "step": 23427 }, { "epoch": 3.824456144647157, "grad_norm": 3.4452035427093506, "learning_rate": 1.1324541016222053e-05, "loss": 0.5323, "step": 23428 }, { "epoch": 3.8246194032896614, "grad_norm": 3.2017626762390137, "learning_rate": 1.132390552114911e-05, "loss": 0.4571, "step": 23429 }, { "epoch": 3.824782661932166, "grad_norm": 2.9386188983917236, "learning_rate": 1.1323270020634087e-05, "loss": 0.4293, "step": 23430 }, { "epoch": 3.8249459205746703, "grad_norm": 3.476346492767334, "learning_rate": 1.1322634514679603e-05, "loss": 0.4563, "step": 23431 }, { "epoch": 3.8251091792171747, "grad_norm": 2.87652850151062, "learning_rate": 1.1321999003288267e-05, "loss": 0.4036, "step": 23432 }, { "epoch": 3.825272437859679, "grad_norm": 3.6222550868988037, "learning_rate": 1.132136348646269e-05, "loss": 0.5053, "step": 23433 }, { "epoch": 3.8254356965021836, "grad_norm": 3.490057945251465, "learning_rate": 1.1320727964205492e-05, "loss": 0.5737, "step": 23434 }, { "epoch": 3.825598955144688, "grad_norm": 2.6043267250061035, "learning_rate": 1.1320092436519276e-05, "loss": 0.4148, "step": 23435 }, { "epoch": 3.8257622137871925, "grad_norm": 3.5426695346832275, "learning_rate": 1.1319456903406661e-05, "loss": 0.4828, "step": 23436 }, { "epoch": 3.825925472429697, "grad_norm": 3.634831190109253, "learning_rate": 1.1318821364870253e-05, "loss": 0.4829, "step": 23437 }, { "epoch": 3.8260887310722014, "grad_norm": 3.1672070026397705, "learning_rate": 1.1318185820912672e-05, "loss": 0.5395, "step": 23438 }, { "epoch": 3.8262519897147054, "grad_norm": 3.0356032848358154, "learning_rate": 1.1317550271536526e-05, "loss": 0.4233, "step": 23439 }, { "epoch": 3.82641524835721, "grad_norm": 3.3947842121124268, "learning_rate": 1.1316914716744426e-05, "loss": 0.5658, "step": 23440 }, { "epoch": 3.8265785069997142, "grad_norm": 3.511674165725708, "learning_rate": 1.131627915653899e-05, "loss": 0.5179, "step": 23441 }, { "epoch": 3.8267417656422187, "grad_norm": 3.0129752159118652, "learning_rate": 1.1315643590922827e-05, "loss": 0.4517, "step": 23442 }, { "epoch": 3.826905024284723, "grad_norm": 2.9897847175598145, "learning_rate": 1.1315008019898548e-05, "loss": 0.3699, "step": 23443 }, { "epoch": 3.8270682829272276, "grad_norm": 3.7133631706237793, "learning_rate": 1.1314372443468768e-05, "loss": 0.4679, "step": 23444 }, { "epoch": 3.827231541569732, "grad_norm": 3.2196996212005615, "learning_rate": 1.1313736861636102e-05, "loss": 0.4664, "step": 23445 }, { "epoch": 3.827394800212236, "grad_norm": 3.242171287536621, "learning_rate": 1.1313101274403158e-05, "loss": 0.4738, "step": 23446 }, { "epoch": 3.8275580588547404, "grad_norm": 2.680453062057495, "learning_rate": 1.1312465681772548e-05, "loss": 0.4174, "step": 23447 }, { "epoch": 3.827721317497245, "grad_norm": 3.250889539718628, "learning_rate": 1.131183008374689e-05, "loss": 0.3867, "step": 23448 }, { "epoch": 3.8278845761397493, "grad_norm": 2.7010929584503174, "learning_rate": 1.1311194480328792e-05, "loss": 0.4159, "step": 23449 }, { "epoch": 3.8280478347822537, "grad_norm": 3.1100783348083496, "learning_rate": 1.1310558871520871e-05, "loss": 0.4914, "step": 23450 }, { "epoch": 3.828211093424758, "grad_norm": 3.0073461532592773, "learning_rate": 1.1309923257325737e-05, "loss": 0.4341, "step": 23451 }, { "epoch": 3.8283743520672626, "grad_norm": 3.8650126457214355, "learning_rate": 1.1309287637746003e-05, "loss": 0.5754, "step": 23452 }, { "epoch": 3.828537610709767, "grad_norm": 3.322033405303955, "learning_rate": 1.1308652012784283e-05, "loss": 0.5407, "step": 23453 }, { "epoch": 3.8287008693522715, "grad_norm": 3.5145304203033447, "learning_rate": 1.1308016382443183e-05, "loss": 0.5057, "step": 23454 }, { "epoch": 3.828864127994776, "grad_norm": 3.3112173080444336, "learning_rate": 1.1307380746725327e-05, "loss": 0.4531, "step": 23455 }, { "epoch": 3.8290273866372804, "grad_norm": 3.3085780143737793, "learning_rate": 1.130674510563332e-05, "loss": 0.4478, "step": 23456 }, { "epoch": 3.8291906452797844, "grad_norm": 3.748680830001831, "learning_rate": 1.1306109459169779e-05, "loss": 0.5727, "step": 23457 }, { "epoch": 3.829353903922289, "grad_norm": 3.105909585952759, "learning_rate": 1.1305473807337317e-05, "loss": 0.4226, "step": 23458 }, { "epoch": 3.8295171625647932, "grad_norm": 3.159775733947754, "learning_rate": 1.1304838150138541e-05, "loss": 0.4451, "step": 23459 }, { "epoch": 3.8296804212072977, "grad_norm": 3.553218364715576, "learning_rate": 1.1304202487576067e-05, "loss": 0.4857, "step": 23460 }, { "epoch": 3.829843679849802, "grad_norm": 2.877119302749634, "learning_rate": 1.1303566819652512e-05, "loss": 0.4198, "step": 23461 }, { "epoch": 3.8300069384923066, "grad_norm": 3.63031005859375, "learning_rate": 1.1302931146370486e-05, "loss": 0.5527, "step": 23462 }, { "epoch": 3.8301701971348106, "grad_norm": 3.3962063789367676, "learning_rate": 1.1302295467732605e-05, "loss": 0.511, "step": 23463 }, { "epoch": 3.830333455777315, "grad_norm": 3.129544496536255, "learning_rate": 1.1301659783741475e-05, "loss": 0.445, "step": 23464 }, { "epoch": 3.8304967144198194, "grad_norm": 3.6629583835601807, "learning_rate": 1.1301024094399711e-05, "loss": 0.5819, "step": 23465 }, { "epoch": 3.830659973062324, "grad_norm": 2.7764453887939453, "learning_rate": 1.1300388399709932e-05, "loss": 0.4277, "step": 23466 }, { "epoch": 3.8308232317048283, "grad_norm": 3.2367799282073975, "learning_rate": 1.1299752699674746e-05, "loss": 0.5056, "step": 23467 }, { "epoch": 3.8309864903473327, "grad_norm": 2.611768960952759, "learning_rate": 1.1299116994296768e-05, "loss": 0.4496, "step": 23468 }, { "epoch": 3.831149748989837, "grad_norm": 3.0933196544647217, "learning_rate": 1.1298481283578611e-05, "loss": 0.4417, "step": 23469 }, { "epoch": 3.8313130076323416, "grad_norm": 2.7814481258392334, "learning_rate": 1.1297845567522886e-05, "loss": 0.4387, "step": 23470 }, { "epoch": 3.831476266274846, "grad_norm": 3.5012893676757812, "learning_rate": 1.129720984613221e-05, "loss": 0.4884, "step": 23471 }, { "epoch": 3.8316395249173505, "grad_norm": 3.0587620735168457, "learning_rate": 1.129657411940919e-05, "loss": 0.4156, "step": 23472 }, { "epoch": 3.831802783559855, "grad_norm": 3.3675763607025146, "learning_rate": 1.1295938387356451e-05, "loss": 0.4482, "step": 23473 }, { "epoch": 3.831966042202359, "grad_norm": 3.310896635055542, "learning_rate": 1.1295302649976594e-05, "loss": 0.5028, "step": 23474 }, { "epoch": 3.8321293008448634, "grad_norm": 3.342055082321167, "learning_rate": 1.1294666907272236e-05, "loss": 0.4334, "step": 23475 }, { "epoch": 3.832292559487368, "grad_norm": 3.8883020877838135, "learning_rate": 1.1294031159245993e-05, "loss": 0.4235, "step": 23476 }, { "epoch": 3.8324558181298722, "grad_norm": 3.3693127632141113, "learning_rate": 1.1293395405900474e-05, "loss": 0.4206, "step": 23477 }, { "epoch": 3.8326190767723767, "grad_norm": 3.24594783782959, "learning_rate": 1.1292759647238298e-05, "loss": 0.5248, "step": 23478 }, { "epoch": 3.832782335414881, "grad_norm": 3.1728384494781494, "learning_rate": 1.1292123883262074e-05, "loss": 0.5115, "step": 23479 }, { "epoch": 3.8329455940573856, "grad_norm": 2.939425468444824, "learning_rate": 1.1291488113974416e-05, "loss": 0.4349, "step": 23480 }, { "epoch": 3.8331088526998895, "grad_norm": 3.310133218765259, "learning_rate": 1.1290852339377937e-05, "loss": 0.4849, "step": 23481 }, { "epoch": 3.833272111342394, "grad_norm": 3.4152960777282715, "learning_rate": 1.1290216559475256e-05, "loss": 0.521, "step": 23482 }, { "epoch": 3.8334353699848984, "grad_norm": 3.1457979679107666, "learning_rate": 1.1289580774268978e-05, "loss": 0.4605, "step": 23483 }, { "epoch": 3.833598628627403, "grad_norm": 2.9977035522460938, "learning_rate": 1.128894498376172e-05, "loss": 0.4611, "step": 23484 }, { "epoch": 3.8337618872699073, "grad_norm": 3.1485695838928223, "learning_rate": 1.1288309187956097e-05, "loss": 0.4754, "step": 23485 }, { "epoch": 3.8339251459124117, "grad_norm": 3.6848366260528564, "learning_rate": 1.1287673386854722e-05, "loss": 0.5008, "step": 23486 }, { "epoch": 3.834088404554916, "grad_norm": 3.5368120670318604, "learning_rate": 1.1287037580460207e-05, "loss": 0.498, "step": 23487 }, { "epoch": 3.8342516631974206, "grad_norm": 2.699840784072876, "learning_rate": 1.1286401768775169e-05, "loss": 0.4459, "step": 23488 }, { "epoch": 3.834414921839925, "grad_norm": 3.1446447372436523, "learning_rate": 1.1285765951802214e-05, "loss": 0.4577, "step": 23489 }, { "epoch": 3.8345781804824295, "grad_norm": 3.322470188140869, "learning_rate": 1.1285130129543964e-05, "loss": 0.5159, "step": 23490 }, { "epoch": 3.834741439124934, "grad_norm": 3.4168922901153564, "learning_rate": 1.128449430200303e-05, "loss": 0.4752, "step": 23491 }, { "epoch": 3.834904697767438, "grad_norm": 3.4367995262145996, "learning_rate": 1.1283858469182023e-05, "loss": 0.5184, "step": 23492 }, { "epoch": 3.8350679564099424, "grad_norm": 4.109549522399902, "learning_rate": 1.1283222631083558e-05, "loss": 0.564, "step": 23493 }, { "epoch": 3.835231215052447, "grad_norm": 4.036863803863525, "learning_rate": 1.1282586787710252e-05, "loss": 0.5258, "step": 23494 }, { "epoch": 3.8353944736949512, "grad_norm": 3.6015658378601074, "learning_rate": 1.1281950939064716e-05, "loss": 0.7413, "step": 23495 }, { "epoch": 3.8355577323374557, "grad_norm": 3.1617231369018555, "learning_rate": 1.1281315085149562e-05, "loss": 0.4682, "step": 23496 }, { "epoch": 3.83572099097996, "grad_norm": 3.197944402694702, "learning_rate": 1.1280679225967409e-05, "loss": 0.5195, "step": 23497 }, { "epoch": 3.8358842496224645, "grad_norm": 2.9925029277801514, "learning_rate": 1.128004336152086e-05, "loss": 0.4348, "step": 23498 }, { "epoch": 3.8360475082649685, "grad_norm": 3.0917747020721436, "learning_rate": 1.1279407491812542e-05, "loss": 0.4742, "step": 23499 }, { "epoch": 3.836210766907473, "grad_norm": 3.31577730178833, "learning_rate": 1.1278771616845061e-05, "loss": 0.4306, "step": 23500 }, { "epoch": 3.8363740255499774, "grad_norm": 2.4939064979553223, "learning_rate": 1.1278135736621032e-05, "loss": 0.3829, "step": 23501 }, { "epoch": 3.836537284192482, "grad_norm": 3.2555489540100098, "learning_rate": 1.1277499851143074e-05, "loss": 0.4523, "step": 23502 }, { "epoch": 3.8367005428349863, "grad_norm": 3.092517375946045, "learning_rate": 1.1276863960413793e-05, "loss": 0.4268, "step": 23503 }, { "epoch": 3.8368638014774907, "grad_norm": 2.422255754470825, "learning_rate": 1.1276228064435804e-05, "loss": 0.3704, "step": 23504 }, { "epoch": 3.837027060119995, "grad_norm": 3.3378329277038574, "learning_rate": 1.1275592163211727e-05, "loss": 0.5261, "step": 23505 }, { "epoch": 3.8371903187624996, "grad_norm": 2.8527731895446777, "learning_rate": 1.127495625674417e-05, "loss": 0.4249, "step": 23506 }, { "epoch": 3.837353577405004, "grad_norm": 3.5742928981781006, "learning_rate": 1.1274320345035752e-05, "loss": 0.4119, "step": 23507 }, { "epoch": 3.8375168360475085, "grad_norm": 2.604499578475952, "learning_rate": 1.1273684428089082e-05, "loss": 0.3831, "step": 23508 }, { "epoch": 3.837680094690013, "grad_norm": 3.3001909255981445, "learning_rate": 1.1273048505906775e-05, "loss": 0.4409, "step": 23509 }, { "epoch": 3.837843353332517, "grad_norm": 3.4097907543182373, "learning_rate": 1.127241257849145e-05, "loss": 0.4887, "step": 23510 }, { "epoch": 3.8380066119750214, "grad_norm": 2.6162750720977783, "learning_rate": 1.1271776645845714e-05, "loss": 0.3525, "step": 23511 }, { "epoch": 3.838169870617526, "grad_norm": 3.4531946182250977, "learning_rate": 1.1271140707972188e-05, "loss": 0.4845, "step": 23512 }, { "epoch": 3.8383331292600302, "grad_norm": 3.668405294418335, "learning_rate": 1.127050476487348e-05, "loss": 0.4814, "step": 23513 }, { "epoch": 3.8384963879025347, "grad_norm": 3.3188235759735107, "learning_rate": 1.1269868816552207e-05, "loss": 0.4713, "step": 23514 }, { "epoch": 3.838659646545039, "grad_norm": 3.5988306999206543, "learning_rate": 1.1269232863010982e-05, "loss": 0.4775, "step": 23515 }, { "epoch": 3.838822905187543, "grad_norm": 3.015118360519409, "learning_rate": 1.126859690425242e-05, "loss": 0.4593, "step": 23516 }, { "epoch": 3.8389861638300475, "grad_norm": 3.2786290645599365, "learning_rate": 1.1267960940279139e-05, "loss": 0.4535, "step": 23517 }, { "epoch": 3.839149422472552, "grad_norm": 3.007351875305176, "learning_rate": 1.1267324971093745e-05, "loss": 0.4236, "step": 23518 }, { "epoch": 3.8393126811150564, "grad_norm": 3.128448486328125, "learning_rate": 1.1266688996698858e-05, "loss": 0.478, "step": 23519 }, { "epoch": 3.839475939757561, "grad_norm": 3.221764087677002, "learning_rate": 1.1266053017097091e-05, "loss": 0.4863, "step": 23520 }, { "epoch": 3.8396391984000653, "grad_norm": 3.364872694015503, "learning_rate": 1.1265417032291055e-05, "loss": 0.476, "step": 23521 }, { "epoch": 3.8398024570425697, "grad_norm": 3.0610690116882324, "learning_rate": 1.1264781042283372e-05, "loss": 0.4512, "step": 23522 }, { "epoch": 3.839965715685074, "grad_norm": 3.69069504737854, "learning_rate": 1.126414504707665e-05, "loss": 0.5653, "step": 23523 }, { "epoch": 3.8401289743275786, "grad_norm": 2.7462024688720703, "learning_rate": 1.1263509046673507e-05, "loss": 0.3678, "step": 23524 }, { "epoch": 3.840292232970083, "grad_norm": 3.023935317993164, "learning_rate": 1.1262873041076552e-05, "loss": 0.5086, "step": 23525 }, { "epoch": 3.8404554916125875, "grad_norm": 3.861466884613037, "learning_rate": 1.12622370302884e-05, "loss": 0.4956, "step": 23526 }, { "epoch": 3.8406187502550915, "grad_norm": 3.419921398162842, "learning_rate": 1.1261601014311675e-05, "loss": 0.4851, "step": 23527 }, { "epoch": 3.840782008897596, "grad_norm": 2.766209125518799, "learning_rate": 1.1260964993148984e-05, "loss": 0.4423, "step": 23528 }, { "epoch": 3.8409452675401003, "grad_norm": 3.590435028076172, "learning_rate": 1.126032896680294e-05, "loss": 0.5387, "step": 23529 }, { "epoch": 3.841108526182605, "grad_norm": 3.539522886276245, "learning_rate": 1.1259692935276158e-05, "loss": 0.5063, "step": 23530 }, { "epoch": 3.8412717848251092, "grad_norm": 3.352961301803589, "learning_rate": 1.1259056898571256e-05, "loss": 0.4547, "step": 23531 }, { "epoch": 3.8414350434676137, "grad_norm": 3.073237657546997, "learning_rate": 1.1258420856690846e-05, "loss": 0.4494, "step": 23532 }, { "epoch": 3.841598302110118, "grad_norm": 3.1562230587005615, "learning_rate": 1.1257784809637543e-05, "loss": 0.4938, "step": 23533 }, { "epoch": 3.841761560752622, "grad_norm": 3.719102621078491, "learning_rate": 1.1257148757413962e-05, "loss": 0.522, "step": 23534 }, { "epoch": 3.8419248193951265, "grad_norm": 3.464150905609131, "learning_rate": 1.1256512700022716e-05, "loss": 0.5146, "step": 23535 }, { "epoch": 3.842088078037631, "grad_norm": 3.524850845336914, "learning_rate": 1.1255876637466421e-05, "loss": 0.5714, "step": 23536 }, { "epoch": 3.8422513366801354, "grad_norm": 3.081023931503296, "learning_rate": 1.1255240569747692e-05, "loss": 0.4276, "step": 23537 }, { "epoch": 3.84241459532264, "grad_norm": 3.5552010536193848, "learning_rate": 1.1254604496869144e-05, "loss": 0.518, "step": 23538 }, { "epoch": 3.8425778539651443, "grad_norm": 2.7273504734039307, "learning_rate": 1.125396841883339e-05, "loss": 0.4143, "step": 23539 }, { "epoch": 3.8427411126076487, "grad_norm": 3.4794859886169434, "learning_rate": 1.1253332335643043e-05, "loss": 0.4939, "step": 23540 }, { "epoch": 3.842904371250153, "grad_norm": 3.60486102104187, "learning_rate": 1.1252696247300726e-05, "loss": 0.5549, "step": 23541 }, { "epoch": 3.8430676298926576, "grad_norm": 3.0827767848968506, "learning_rate": 1.125206015380904e-05, "loss": 0.4078, "step": 23542 }, { "epoch": 3.843230888535162, "grad_norm": 2.491950273513794, "learning_rate": 1.125142405517061e-05, "loss": 0.3941, "step": 23543 }, { "epoch": 3.8433941471776665, "grad_norm": 3.4004528522491455, "learning_rate": 1.1250787951388049e-05, "loss": 0.5009, "step": 23544 }, { "epoch": 3.8435574058201705, "grad_norm": 2.8622148036956787, "learning_rate": 1.1250151842463972e-05, "loss": 0.4791, "step": 23545 }, { "epoch": 3.843720664462675, "grad_norm": 3.553992748260498, "learning_rate": 1.1249515728400995e-05, "loss": 0.5209, "step": 23546 }, { "epoch": 3.8438839231051793, "grad_norm": 3.1225569248199463, "learning_rate": 1.1248879609201726e-05, "loss": 0.479, "step": 23547 }, { "epoch": 3.844047181747684, "grad_norm": 3.3091611862182617, "learning_rate": 1.1248243484868786e-05, "loss": 0.4598, "step": 23548 }, { "epoch": 3.844210440390188, "grad_norm": 3.3716397285461426, "learning_rate": 1.1247607355404789e-05, "loss": 0.504, "step": 23549 }, { "epoch": 3.8443736990326927, "grad_norm": 3.3196215629577637, "learning_rate": 1.1246971220812348e-05, "loss": 0.4946, "step": 23550 }, { "epoch": 3.8445369576751967, "grad_norm": 2.7092843055725098, "learning_rate": 1.1246335081094082e-05, "loss": 0.4179, "step": 23551 }, { "epoch": 3.844700216317701, "grad_norm": 3.453096866607666, "learning_rate": 1.12456989362526e-05, "loss": 0.4645, "step": 23552 }, { "epoch": 3.8448634749602055, "grad_norm": 3.1044578552246094, "learning_rate": 1.124506278629052e-05, "loss": 0.4946, "step": 23553 }, { "epoch": 3.84502673360271, "grad_norm": 3.058300733566284, "learning_rate": 1.1244426631210458e-05, "loss": 0.4283, "step": 23554 }, { "epoch": 3.8451899922452144, "grad_norm": 3.0963656902313232, "learning_rate": 1.1243790471015029e-05, "loss": 0.4189, "step": 23555 }, { "epoch": 3.845353250887719, "grad_norm": 3.107954263687134, "learning_rate": 1.1243154305706847e-05, "loss": 0.4494, "step": 23556 }, { "epoch": 3.8455165095302233, "grad_norm": 2.9990432262420654, "learning_rate": 1.1242518135288526e-05, "loss": 0.4821, "step": 23557 }, { "epoch": 3.8456797681727277, "grad_norm": 3.1417949199676514, "learning_rate": 1.1241881959762683e-05, "loss": 0.5243, "step": 23558 }, { "epoch": 3.845843026815232, "grad_norm": 3.2634506225585938, "learning_rate": 1.1241245779131928e-05, "loss": 0.4576, "step": 23559 }, { "epoch": 3.8460062854577366, "grad_norm": 3.363722085952759, "learning_rate": 1.1240609593398886e-05, "loss": 0.4868, "step": 23560 }, { "epoch": 3.846169544100241, "grad_norm": 3.1584203243255615, "learning_rate": 1.1239973402566164e-05, "loss": 0.4431, "step": 23561 }, { "epoch": 3.8463328027427455, "grad_norm": 3.150027275085449, "learning_rate": 1.123933720663638e-05, "loss": 0.4824, "step": 23562 }, { "epoch": 3.8464960613852495, "grad_norm": 3.0602731704711914, "learning_rate": 1.123870100561215e-05, "loss": 0.4456, "step": 23563 }, { "epoch": 3.846659320027754, "grad_norm": 2.9096057415008545, "learning_rate": 1.1238064799496086e-05, "loss": 0.4537, "step": 23564 }, { "epoch": 3.8468225786702583, "grad_norm": 3.1111996173858643, "learning_rate": 1.1237428588290804e-05, "loss": 0.4546, "step": 23565 }, { "epoch": 3.846985837312763, "grad_norm": 3.208858013153076, "learning_rate": 1.1236792371998924e-05, "loss": 0.4795, "step": 23566 }, { "epoch": 3.847149095955267, "grad_norm": 2.778434991836548, "learning_rate": 1.1236156150623055e-05, "loss": 0.4493, "step": 23567 }, { "epoch": 3.8473123545977717, "grad_norm": 3.640676259994507, "learning_rate": 1.1235519924165814e-05, "loss": 0.5725, "step": 23568 }, { "epoch": 3.8474756132402756, "grad_norm": 3.5370397567749023, "learning_rate": 1.1234883692629818e-05, "loss": 0.5018, "step": 23569 }, { "epoch": 3.84763887188278, "grad_norm": 2.940722942352295, "learning_rate": 1.123424745601768e-05, "loss": 0.4633, "step": 23570 }, { "epoch": 3.8478021305252845, "grad_norm": 3.3785290718078613, "learning_rate": 1.1233611214332021e-05, "loss": 0.5169, "step": 23571 }, { "epoch": 3.847965389167789, "grad_norm": 3.3556835651397705, "learning_rate": 1.1232974967575449e-05, "loss": 0.5062, "step": 23572 }, { "epoch": 3.8481286478102934, "grad_norm": 3.194322347640991, "learning_rate": 1.1232338715750584e-05, "loss": 0.4287, "step": 23573 }, { "epoch": 3.848291906452798, "grad_norm": 3.808223247528076, "learning_rate": 1.1231702458860038e-05, "loss": 0.5354, "step": 23574 }, { "epoch": 3.8484551650953023, "grad_norm": 3.870037794113159, "learning_rate": 1.123106619690643e-05, "loss": 0.6197, "step": 23575 }, { "epoch": 3.8486184237378067, "grad_norm": 3.170401096343994, "learning_rate": 1.1230429929892373e-05, "loss": 0.46, "step": 23576 }, { "epoch": 3.848781682380311, "grad_norm": 3.1937551498413086, "learning_rate": 1.1229793657820482e-05, "loss": 0.5219, "step": 23577 }, { "epoch": 3.8489449410228156, "grad_norm": 3.7143847942352295, "learning_rate": 1.1229157380693376e-05, "loss": 0.5207, "step": 23578 }, { "epoch": 3.84910819966532, "grad_norm": 3.503129720687866, "learning_rate": 1.1228521098513667e-05, "loss": 0.5628, "step": 23579 }, { "epoch": 3.849271458307824, "grad_norm": 2.841552734375, "learning_rate": 1.1227884811283972e-05, "loss": 0.4021, "step": 23580 }, { "epoch": 3.8494347169503285, "grad_norm": 2.922508716583252, "learning_rate": 1.1227248519006904e-05, "loss": 0.4329, "step": 23581 }, { "epoch": 3.849597975592833, "grad_norm": 3.2699196338653564, "learning_rate": 1.1226612221685083e-05, "loss": 0.4432, "step": 23582 }, { "epoch": 3.8497612342353373, "grad_norm": 3.575038194656372, "learning_rate": 1.1225975919321121e-05, "loss": 0.5543, "step": 23583 }, { "epoch": 3.8499244928778418, "grad_norm": 3.323408842086792, "learning_rate": 1.1225339611917635e-05, "loss": 0.4569, "step": 23584 }, { "epoch": 3.850087751520346, "grad_norm": 3.275548219680786, "learning_rate": 1.1224703299477244e-05, "loss": 0.4007, "step": 23585 }, { "epoch": 3.8502510101628507, "grad_norm": 3.3975281715393066, "learning_rate": 1.1224066982002555e-05, "loss": 0.5485, "step": 23586 }, { "epoch": 3.8504142688053546, "grad_norm": 2.9609732627868652, "learning_rate": 1.122343065949619e-05, "loss": 0.4509, "step": 23587 }, { "epoch": 3.850577527447859, "grad_norm": 3.642110824584961, "learning_rate": 1.1222794331960768e-05, "loss": 0.4993, "step": 23588 }, { "epoch": 3.8507407860903635, "grad_norm": 2.770113468170166, "learning_rate": 1.1222157999398895e-05, "loss": 0.3954, "step": 23589 }, { "epoch": 3.850904044732868, "grad_norm": 3.2819721698760986, "learning_rate": 1.1221521661813198e-05, "loss": 0.4034, "step": 23590 }, { "epoch": 3.8510673033753724, "grad_norm": 2.719947099685669, "learning_rate": 1.122088531920628e-05, "loss": 0.3891, "step": 23591 }, { "epoch": 3.851230562017877, "grad_norm": 3.341914653778076, "learning_rate": 1.1220248971580764e-05, "loss": 0.4211, "step": 23592 }, { "epoch": 3.8513938206603813, "grad_norm": 3.745476245880127, "learning_rate": 1.1219612618939268e-05, "loss": 0.514, "step": 23593 }, { "epoch": 3.8515570793028857, "grad_norm": 3.253736734390259, "learning_rate": 1.1218976261284404e-05, "loss": 0.5231, "step": 23594 }, { "epoch": 3.85172033794539, "grad_norm": 3.658932685852051, "learning_rate": 1.1218339898618791e-05, "loss": 0.4874, "step": 23595 }, { "epoch": 3.8518835965878946, "grad_norm": 3.615015745162964, "learning_rate": 1.1217703530945042e-05, "loss": 0.4267, "step": 23596 }, { "epoch": 3.852046855230399, "grad_norm": 3.6290764808654785, "learning_rate": 1.1217067158265768e-05, "loss": 0.5663, "step": 23597 }, { "epoch": 3.852210113872903, "grad_norm": 2.879835605621338, "learning_rate": 1.1216430780583596e-05, "loss": 0.3659, "step": 23598 }, { "epoch": 3.8523733725154075, "grad_norm": 3.5478298664093018, "learning_rate": 1.1215794397901136e-05, "loss": 0.4575, "step": 23599 }, { "epoch": 3.852536631157912, "grad_norm": 3.102461099624634, "learning_rate": 1.1215158010221005e-05, "loss": 0.3967, "step": 23600 }, { "epoch": 3.8526998898004163, "grad_norm": 2.927091598510742, "learning_rate": 1.1214521617545818e-05, "loss": 0.4364, "step": 23601 }, { "epoch": 3.8528631484429208, "grad_norm": 2.9582653045654297, "learning_rate": 1.121388521987819e-05, "loss": 0.462, "step": 23602 }, { "epoch": 3.853026407085425, "grad_norm": 3.132411003112793, "learning_rate": 1.1213248817220734e-05, "loss": 0.4669, "step": 23603 }, { "epoch": 3.853189665727929, "grad_norm": 3.2031090259552, "learning_rate": 1.1212612409576076e-05, "loss": 0.48, "step": 23604 }, { "epoch": 3.8533529243704336, "grad_norm": 3.057701826095581, "learning_rate": 1.1211975996946827e-05, "loss": 0.4402, "step": 23605 }, { "epoch": 3.853516183012938, "grad_norm": 3.636366128921509, "learning_rate": 1.12113395793356e-05, "loss": 0.5571, "step": 23606 }, { "epoch": 3.8536794416554425, "grad_norm": 3.5947179794311523, "learning_rate": 1.1210703156745013e-05, "loss": 0.4917, "step": 23607 }, { "epoch": 3.853842700297947, "grad_norm": 3.508720874786377, "learning_rate": 1.1210066729177682e-05, "loss": 0.4772, "step": 23608 }, { "epoch": 3.8540059589404514, "grad_norm": 3.816891670227051, "learning_rate": 1.1209430296636224e-05, "loss": 0.4673, "step": 23609 }, { "epoch": 3.854169217582956, "grad_norm": 2.796966075897217, "learning_rate": 1.1208793859123258e-05, "loss": 0.3446, "step": 23610 }, { "epoch": 3.8543324762254603, "grad_norm": 2.8960108757019043, "learning_rate": 1.1208157416641391e-05, "loss": 0.3947, "step": 23611 }, { "epoch": 3.8544957348679647, "grad_norm": 3.2770371437072754, "learning_rate": 1.120752096919325e-05, "loss": 0.5121, "step": 23612 }, { "epoch": 3.854658993510469, "grad_norm": 3.0182316303253174, "learning_rate": 1.1206884516781444e-05, "loss": 0.3625, "step": 23613 }, { "epoch": 3.8548222521529736, "grad_norm": 3.463946580886841, "learning_rate": 1.1206248059408593e-05, "loss": 0.5285, "step": 23614 }, { "epoch": 3.8549855107954776, "grad_norm": 3.568118095397949, "learning_rate": 1.120561159707731e-05, "loss": 0.5179, "step": 23615 }, { "epoch": 3.855148769437982, "grad_norm": 3.6825082302093506, "learning_rate": 1.1204975129790215e-05, "loss": 0.4841, "step": 23616 }, { "epoch": 3.8553120280804865, "grad_norm": 3.30149507522583, "learning_rate": 1.120433865754992e-05, "loss": 0.5042, "step": 23617 }, { "epoch": 3.855475286722991, "grad_norm": 3.0467209815979004, "learning_rate": 1.1203702180359044e-05, "loss": 0.5005, "step": 23618 }, { "epoch": 3.8556385453654953, "grad_norm": 2.93982195854187, "learning_rate": 1.1203065698220204e-05, "loss": 0.4262, "step": 23619 }, { "epoch": 3.8558018040079998, "grad_norm": 3.667680025100708, "learning_rate": 1.1202429211136012e-05, "loss": 0.5126, "step": 23620 }, { "epoch": 3.855965062650504, "grad_norm": 3.576331377029419, "learning_rate": 1.120179271910909e-05, "loss": 0.4893, "step": 23621 }, { "epoch": 3.856128321293008, "grad_norm": 3.6680967807769775, "learning_rate": 1.1201156222142053e-05, "loss": 0.5171, "step": 23622 }, { "epoch": 3.8562915799355126, "grad_norm": 3.0961124897003174, "learning_rate": 1.1200519720237515e-05, "loss": 0.493, "step": 23623 }, { "epoch": 3.856454838578017, "grad_norm": 3.652585983276367, "learning_rate": 1.1199883213398092e-05, "loss": 0.3704, "step": 23624 }, { "epoch": 3.8566180972205215, "grad_norm": 2.9979987144470215, "learning_rate": 1.1199246701626405e-05, "loss": 0.3753, "step": 23625 }, { "epoch": 3.856781355863026, "grad_norm": 3.5831665992736816, "learning_rate": 1.1198610184925067e-05, "loss": 0.4655, "step": 23626 }, { "epoch": 3.8569446145055304, "grad_norm": 2.7474398612976074, "learning_rate": 1.1197973663296695e-05, "loss": 0.4269, "step": 23627 }, { "epoch": 3.857107873148035, "grad_norm": 2.725705862045288, "learning_rate": 1.1197337136743905e-05, "loss": 0.3961, "step": 23628 }, { "epoch": 3.8572711317905393, "grad_norm": 3.4675731658935547, "learning_rate": 1.1196700605269318e-05, "loss": 0.5168, "step": 23629 }, { "epoch": 3.8574343904330437, "grad_norm": 3.583008289337158, "learning_rate": 1.1196064068875541e-05, "loss": 0.4869, "step": 23630 }, { "epoch": 3.857597649075548, "grad_norm": 2.985499143600464, "learning_rate": 1.1195427527565196e-05, "loss": 0.4497, "step": 23631 }, { "epoch": 3.8577609077180526, "grad_norm": 3.548454999923706, "learning_rate": 1.1194790981340905e-05, "loss": 0.5848, "step": 23632 }, { "epoch": 3.8579241663605566, "grad_norm": 3.524778366088867, "learning_rate": 1.1194154430205275e-05, "loss": 0.5333, "step": 23633 }, { "epoch": 3.858087425003061, "grad_norm": 3.1610662937164307, "learning_rate": 1.1193517874160931e-05, "loss": 0.4528, "step": 23634 }, { "epoch": 3.8582506836455654, "grad_norm": 4.080728054046631, "learning_rate": 1.1192881313210484e-05, "loss": 0.6229, "step": 23635 }, { "epoch": 3.85841394228807, "grad_norm": 3.2844150066375732, "learning_rate": 1.119224474735655e-05, "loss": 0.5108, "step": 23636 }, { "epoch": 3.8585772009305743, "grad_norm": 3.5748791694641113, "learning_rate": 1.119160817660175e-05, "loss": 0.5325, "step": 23637 }, { "epoch": 3.8587404595730788, "grad_norm": 3.2063241004943848, "learning_rate": 1.11909716009487e-05, "loss": 0.487, "step": 23638 }, { "epoch": 3.858903718215583, "grad_norm": 3.7704427242279053, "learning_rate": 1.1190335020400014e-05, "loss": 0.496, "step": 23639 }, { "epoch": 3.859066976858087, "grad_norm": 3.5810465812683105, "learning_rate": 1.118969843495831e-05, "loss": 0.513, "step": 23640 }, { "epoch": 3.8592302355005916, "grad_norm": 2.576862096786499, "learning_rate": 1.1189061844626206e-05, "loss": 0.3817, "step": 23641 }, { "epoch": 3.859393494143096, "grad_norm": 3.226515769958496, "learning_rate": 1.1188425249406313e-05, "loss": 0.433, "step": 23642 }, { "epoch": 3.8595567527856005, "grad_norm": 3.4238765239715576, "learning_rate": 1.1187788649301256e-05, "loss": 0.4909, "step": 23643 }, { "epoch": 3.859720011428105, "grad_norm": 2.8192331790924072, "learning_rate": 1.1187152044313652e-05, "loss": 0.4042, "step": 23644 }, { "epoch": 3.8598832700706094, "grad_norm": 3.001079559326172, "learning_rate": 1.118651543444611e-05, "loss": 0.4534, "step": 23645 }, { "epoch": 3.860046528713114, "grad_norm": 3.5394396781921387, "learning_rate": 1.1185878819701253e-05, "loss": 0.5601, "step": 23646 }, { "epoch": 3.8602097873556183, "grad_norm": 3.5049240589141846, "learning_rate": 1.1185242200081694e-05, "loss": 0.5015, "step": 23647 }, { "epoch": 3.8603730459981227, "grad_norm": 2.964599847793579, "learning_rate": 1.1184605575590053e-05, "loss": 0.4382, "step": 23648 }, { "epoch": 3.860536304640627, "grad_norm": 3.129286050796509, "learning_rate": 1.1183968946228948e-05, "loss": 0.4197, "step": 23649 }, { "epoch": 3.8606995632831316, "grad_norm": 2.9062414169311523, "learning_rate": 1.1183332312000989e-05, "loss": 0.3906, "step": 23650 }, { "epoch": 3.8608628219256356, "grad_norm": 3.071744441986084, "learning_rate": 1.1182695672908801e-05, "loss": 0.494, "step": 23651 }, { "epoch": 3.86102608056814, "grad_norm": 2.6925671100616455, "learning_rate": 1.1182059028954997e-05, "loss": 0.4374, "step": 23652 }, { "epoch": 3.8611893392106444, "grad_norm": 3.192396879196167, "learning_rate": 1.1181422380142193e-05, "loss": 0.436, "step": 23653 }, { "epoch": 3.861352597853149, "grad_norm": 2.606323003768921, "learning_rate": 1.1180785726473012e-05, "loss": 0.4041, "step": 23654 }, { "epoch": 3.8615158564956533, "grad_norm": 3.005162477493286, "learning_rate": 1.1180149067950061e-05, "loss": 0.4059, "step": 23655 }, { "epoch": 3.8616791151381578, "grad_norm": 2.3436927795410156, "learning_rate": 1.1179512404575968e-05, "loss": 0.3593, "step": 23656 }, { "epoch": 3.8618423737806618, "grad_norm": 3.860847234725952, "learning_rate": 1.1178875736353341e-05, "loss": 0.5643, "step": 23657 }, { "epoch": 3.862005632423166, "grad_norm": 2.7919392585754395, "learning_rate": 1.1178239063284804e-05, "loss": 0.4446, "step": 23658 }, { "epoch": 3.8621688910656706, "grad_norm": 3.5965211391448975, "learning_rate": 1.1177602385372967e-05, "loss": 0.4434, "step": 23659 }, { "epoch": 3.862332149708175, "grad_norm": 3.358492374420166, "learning_rate": 1.1176965702620454e-05, "loss": 0.4955, "step": 23660 }, { "epoch": 3.8624954083506795, "grad_norm": 3.3319084644317627, "learning_rate": 1.117632901502988e-05, "loss": 0.4847, "step": 23661 }, { "epoch": 3.862658666993184, "grad_norm": 3.0286083221435547, "learning_rate": 1.1175692322603862e-05, "loss": 0.4598, "step": 23662 }, { "epoch": 3.8628219256356884, "grad_norm": 3.4253320693969727, "learning_rate": 1.1175055625345015e-05, "loss": 0.4993, "step": 23663 }, { "epoch": 3.862985184278193, "grad_norm": 3.297607421875, "learning_rate": 1.1174418923255958e-05, "loss": 0.4593, "step": 23664 }, { "epoch": 3.8631484429206973, "grad_norm": 2.4803154468536377, "learning_rate": 1.1173782216339308e-05, "loss": 0.378, "step": 23665 }, { "epoch": 3.8633117015632017, "grad_norm": 3.145648956298828, "learning_rate": 1.1173145504597684e-05, "loss": 0.477, "step": 23666 }, { "epoch": 3.863474960205706, "grad_norm": 3.3163962364196777, "learning_rate": 1.1172508788033703e-05, "loss": 0.4796, "step": 23667 }, { "epoch": 3.86363821884821, "grad_norm": 3.0588138103485107, "learning_rate": 1.1171872066649977e-05, "loss": 0.4716, "step": 23668 }, { "epoch": 3.8638014774907146, "grad_norm": 2.7974672317504883, "learning_rate": 1.1171235340449131e-05, "loss": 0.3971, "step": 23669 }, { "epoch": 3.863964736133219, "grad_norm": 2.8359389305114746, "learning_rate": 1.1170598609433779e-05, "loss": 0.403, "step": 23670 }, { "epoch": 3.8641279947757234, "grad_norm": 3.043905735015869, "learning_rate": 1.1169961873606536e-05, "loss": 0.4103, "step": 23671 }, { "epoch": 3.864291253418228, "grad_norm": 3.100600481033325, "learning_rate": 1.1169325132970022e-05, "loss": 0.4205, "step": 23672 }, { "epoch": 3.8644545120607323, "grad_norm": 2.8566126823425293, "learning_rate": 1.1168688387526856e-05, "loss": 0.3806, "step": 23673 }, { "epoch": 3.8646177707032368, "grad_norm": 3.3335702419281006, "learning_rate": 1.116805163727965e-05, "loss": 0.462, "step": 23674 }, { "epoch": 3.8647810293457407, "grad_norm": 3.770138740539551, "learning_rate": 1.1167414882231024e-05, "loss": 0.5061, "step": 23675 }, { "epoch": 3.864944287988245, "grad_norm": 4.023375034332275, "learning_rate": 1.1166778122383599e-05, "loss": 0.5545, "step": 23676 }, { "epoch": 3.8651075466307496, "grad_norm": 2.9276344776153564, "learning_rate": 1.116614135773999e-05, "loss": 0.4727, "step": 23677 }, { "epoch": 3.865270805273254, "grad_norm": 3.770402431488037, "learning_rate": 1.1165504588302816e-05, "loss": 0.5435, "step": 23678 }, { "epoch": 3.8654340639157585, "grad_norm": 2.641160249710083, "learning_rate": 1.1164867814074689e-05, "loss": 0.3787, "step": 23679 }, { "epoch": 3.865597322558263, "grad_norm": 3.1478607654571533, "learning_rate": 1.1164231035058228e-05, "loss": 0.4627, "step": 23680 }, { "epoch": 3.8657605812007674, "grad_norm": 3.281035900115967, "learning_rate": 1.1163594251256059e-05, "loss": 0.4685, "step": 23681 }, { "epoch": 3.865923839843272, "grad_norm": 3.044538736343384, "learning_rate": 1.1162957462670789e-05, "loss": 0.388, "step": 23682 }, { "epoch": 3.8660870984857763, "grad_norm": 4.154325485229492, "learning_rate": 1.1162320669305045e-05, "loss": 0.5211, "step": 23683 }, { "epoch": 3.8662503571282807, "grad_norm": 3.285069704055786, "learning_rate": 1.1161683871161435e-05, "loss": 0.4518, "step": 23684 }, { "epoch": 3.866413615770785, "grad_norm": 3.3079874515533447, "learning_rate": 1.1161047068242584e-05, "loss": 0.4546, "step": 23685 }, { "epoch": 3.866576874413289, "grad_norm": 3.0060179233551025, "learning_rate": 1.1160410260551102e-05, "loss": 0.4407, "step": 23686 }, { "epoch": 3.8667401330557936, "grad_norm": 3.6069998741149902, "learning_rate": 1.1159773448089615e-05, "loss": 0.454, "step": 23687 }, { "epoch": 3.866903391698298, "grad_norm": 3.790855646133423, "learning_rate": 1.1159136630860738e-05, "loss": 0.5212, "step": 23688 }, { "epoch": 3.8670666503408024, "grad_norm": 3.899348258972168, "learning_rate": 1.1158499808867087e-05, "loss": 0.5611, "step": 23689 }, { "epoch": 3.867229908983307, "grad_norm": 3.0307109355926514, "learning_rate": 1.115786298211128e-05, "loss": 0.4431, "step": 23690 }, { "epoch": 3.8673931676258113, "grad_norm": 3.587284803390503, "learning_rate": 1.1157226150595934e-05, "loss": 0.5083, "step": 23691 }, { "epoch": 3.8675564262683153, "grad_norm": 3.5912625789642334, "learning_rate": 1.1156589314323672e-05, "loss": 0.4557, "step": 23692 }, { "epoch": 3.8677196849108197, "grad_norm": 4.104507923126221, "learning_rate": 1.115595247329711e-05, "loss": 0.5043, "step": 23693 }, { "epoch": 3.867882943553324, "grad_norm": 3.0500638484954834, "learning_rate": 1.1155315627518859e-05, "loss": 0.4205, "step": 23694 }, { "epoch": 3.8680462021958286, "grad_norm": 3.4251389503479004, "learning_rate": 1.1154678776991544e-05, "loss": 0.4847, "step": 23695 }, { "epoch": 3.868209460838333, "grad_norm": 4.12372350692749, "learning_rate": 1.115404192171778e-05, "loss": 0.5552, "step": 23696 }, { "epoch": 3.8683727194808375, "grad_norm": 3.454704999923706, "learning_rate": 1.1153405061700181e-05, "loss": 0.3971, "step": 23697 }, { "epoch": 3.868535978123342, "grad_norm": 3.488718032836914, "learning_rate": 1.1152768196941378e-05, "loss": 0.4601, "step": 23698 }, { "epoch": 3.8686992367658464, "grad_norm": 3.8686275482177734, "learning_rate": 1.1152131327443975e-05, "loss": 0.4982, "step": 23699 }, { "epoch": 3.868862495408351, "grad_norm": 3.612847089767456, "learning_rate": 1.1151494453210596e-05, "loss": 0.5725, "step": 23700 }, { "epoch": 3.8690257540508552, "grad_norm": 2.964545488357544, "learning_rate": 1.1150857574243856e-05, "loss": 0.4881, "step": 23701 }, { "epoch": 3.8691890126933597, "grad_norm": 3.88502836227417, "learning_rate": 1.1150220690546378e-05, "loss": 0.5008, "step": 23702 }, { "epoch": 3.8693522713358637, "grad_norm": 2.8098185062408447, "learning_rate": 1.1149583802120776e-05, "loss": 0.4459, "step": 23703 }, { "epoch": 3.869515529978368, "grad_norm": 3.272305488586426, "learning_rate": 1.1148946908969669e-05, "loss": 0.4463, "step": 23704 }, { "epoch": 3.8696787886208726, "grad_norm": 2.993959426879883, "learning_rate": 1.1148310011095676e-05, "loss": 0.4497, "step": 23705 }, { "epoch": 3.869842047263377, "grad_norm": 3.266467332839966, "learning_rate": 1.1147673108501413e-05, "loss": 0.4786, "step": 23706 }, { "epoch": 3.8700053059058814, "grad_norm": 3.2802200317382812, "learning_rate": 1.11470362011895e-05, "loss": 0.4652, "step": 23707 }, { "epoch": 3.870168564548386, "grad_norm": 3.480062484741211, "learning_rate": 1.1146399289162553e-05, "loss": 0.4311, "step": 23708 }, { "epoch": 3.8703318231908903, "grad_norm": 3.4904932975769043, "learning_rate": 1.1145762372423192e-05, "loss": 0.5574, "step": 23709 }, { "epoch": 3.8704950818333943, "grad_norm": 3.228788137435913, "learning_rate": 1.1145125450974035e-05, "loss": 0.5119, "step": 23710 }, { "epoch": 3.8706583404758987, "grad_norm": 3.0730698108673096, "learning_rate": 1.1144488524817701e-05, "loss": 0.4284, "step": 23711 }, { "epoch": 3.870821599118403, "grad_norm": 2.9954416751861572, "learning_rate": 1.1143851593956807e-05, "loss": 0.4189, "step": 23712 }, { "epoch": 3.8709848577609076, "grad_norm": 3.259143114089966, "learning_rate": 1.1143214658393967e-05, "loss": 0.4789, "step": 23713 }, { "epoch": 3.871148116403412, "grad_norm": 3.7742393016815186, "learning_rate": 1.1142577718131806e-05, "loss": 0.5638, "step": 23714 }, { "epoch": 3.8713113750459165, "grad_norm": 2.929351329803467, "learning_rate": 1.1141940773172938e-05, "loss": 0.4308, "step": 23715 }, { "epoch": 3.871474633688421, "grad_norm": 2.958332061767578, "learning_rate": 1.1141303823519985e-05, "loss": 0.4295, "step": 23716 }, { "epoch": 3.8716378923309254, "grad_norm": 3.585885524749756, "learning_rate": 1.1140666869175563e-05, "loss": 0.5162, "step": 23717 }, { "epoch": 3.87180115097343, "grad_norm": 2.7376112937927246, "learning_rate": 1.1140029910142287e-05, "loss": 0.3865, "step": 23718 }, { "epoch": 3.8719644096159342, "grad_norm": 3.3032939434051514, "learning_rate": 1.113939294642278e-05, "loss": 0.4859, "step": 23719 }, { "epoch": 3.8721276682584387, "grad_norm": 3.154160737991333, "learning_rate": 1.1138755978019658e-05, "loss": 0.4368, "step": 23720 }, { "epoch": 3.8722909269009427, "grad_norm": 3.1353399753570557, "learning_rate": 1.1138119004935542e-05, "loss": 0.4598, "step": 23721 }, { "epoch": 3.872454185543447, "grad_norm": 3.099034070968628, "learning_rate": 1.113748202717305e-05, "loss": 0.4727, "step": 23722 }, { "epoch": 3.8726174441859516, "grad_norm": 3.189305305480957, "learning_rate": 1.1136845044734796e-05, "loss": 0.4116, "step": 23723 }, { "epoch": 3.872780702828456, "grad_norm": 2.6631252765655518, "learning_rate": 1.11362080576234e-05, "loss": 0.3979, "step": 23724 }, { "epoch": 3.8729439614709604, "grad_norm": 3.5485146045684814, "learning_rate": 1.1135571065841483e-05, "loss": 0.5017, "step": 23725 }, { "epoch": 3.873107220113465, "grad_norm": 3.1092820167541504, "learning_rate": 1.1134934069391664e-05, "loss": 0.4909, "step": 23726 }, { "epoch": 3.8732704787559693, "grad_norm": 2.8797404766082764, "learning_rate": 1.113429706827656e-05, "loss": 0.3939, "step": 23727 }, { "epoch": 3.8734337373984733, "grad_norm": 3.4737801551818848, "learning_rate": 1.1133660062498787e-05, "loss": 0.4994, "step": 23728 }, { "epoch": 3.8735969960409777, "grad_norm": 3.603365182876587, "learning_rate": 1.1133023052060965e-05, "loss": 0.4958, "step": 23729 }, { "epoch": 3.873760254683482, "grad_norm": 3.285994052886963, "learning_rate": 1.1132386036965713e-05, "loss": 0.4679, "step": 23730 }, { "epoch": 3.8739235133259866, "grad_norm": 3.0582664012908936, "learning_rate": 1.113174901721565e-05, "loss": 0.4338, "step": 23731 }, { "epoch": 3.874086771968491, "grad_norm": 3.2821738719940186, "learning_rate": 1.1131111992813397e-05, "loss": 0.5208, "step": 23732 }, { "epoch": 3.8742500306109955, "grad_norm": 2.922722101211548, "learning_rate": 1.1130474963761568e-05, "loss": 0.3771, "step": 23733 }, { "epoch": 3.8744132892535, "grad_norm": 3.333789825439453, "learning_rate": 1.1129837930062784e-05, "loss": 0.4962, "step": 23734 }, { "epoch": 3.8745765478960044, "grad_norm": 3.4380362033843994, "learning_rate": 1.1129200891719658e-05, "loss": 0.4561, "step": 23735 }, { "epoch": 3.874739806538509, "grad_norm": 3.735952615737915, "learning_rate": 1.1128563848734817e-05, "loss": 0.522, "step": 23736 }, { "epoch": 3.8749030651810132, "grad_norm": 3.940617084503174, "learning_rate": 1.1127926801110878e-05, "loss": 0.557, "step": 23737 }, { "epoch": 3.8750663238235177, "grad_norm": 2.923233985900879, "learning_rate": 1.1127289748850456e-05, "loss": 0.432, "step": 23738 }, { "epoch": 3.8752295824660217, "grad_norm": 3.9031453132629395, "learning_rate": 1.1126652691956172e-05, "loss": 0.4671, "step": 23739 }, { "epoch": 3.875392841108526, "grad_norm": 3.214578628540039, "learning_rate": 1.1126015630430645e-05, "loss": 0.4606, "step": 23740 }, { "epoch": 3.8755560997510305, "grad_norm": 3.5174560546875, "learning_rate": 1.1125378564276488e-05, "loss": 0.5674, "step": 23741 }, { "epoch": 3.875719358393535, "grad_norm": 3.087484359741211, "learning_rate": 1.1124741493496331e-05, "loss": 0.4256, "step": 23742 }, { "epoch": 3.8758826170360394, "grad_norm": 3.5726680755615234, "learning_rate": 1.1124104418092783e-05, "loss": 0.5305, "step": 23743 }, { "epoch": 3.876045875678544, "grad_norm": 3.7578887939453125, "learning_rate": 1.1123467338068468e-05, "loss": 0.4847, "step": 23744 }, { "epoch": 3.876209134321048, "grad_norm": 3.8359997272491455, "learning_rate": 1.1122830253426002e-05, "loss": 0.5506, "step": 23745 }, { "epoch": 3.8763723929635523, "grad_norm": 3.6013574600219727, "learning_rate": 1.1122193164168005e-05, "loss": 0.5347, "step": 23746 }, { "epoch": 3.8765356516060567, "grad_norm": 3.6218791007995605, "learning_rate": 1.1121556070297096e-05, "loss": 0.5365, "step": 23747 }, { "epoch": 3.876698910248561, "grad_norm": 3.322572708129883, "learning_rate": 1.1120918971815891e-05, "loss": 0.4639, "step": 23748 }, { "epoch": 3.8768621688910656, "grad_norm": 3.1378562450408936, "learning_rate": 1.1120281868727013e-05, "loss": 0.4722, "step": 23749 }, { "epoch": 3.87702542753357, "grad_norm": 2.5760858058929443, "learning_rate": 1.1119644761033079e-05, "loss": 0.377, "step": 23750 }, { "epoch": 3.8771886861760745, "grad_norm": 4.034341812133789, "learning_rate": 1.111900764873671e-05, "loss": 0.4934, "step": 23751 }, { "epoch": 3.877351944818579, "grad_norm": 3.4476373195648193, "learning_rate": 1.1118370531840522e-05, "loss": 0.4963, "step": 23752 }, { "epoch": 3.8775152034610834, "grad_norm": 3.2255659103393555, "learning_rate": 1.1117733410347133e-05, "loss": 0.4844, "step": 23753 }, { "epoch": 3.877678462103588, "grad_norm": 3.19162917137146, "learning_rate": 1.1117096284259168e-05, "loss": 0.4782, "step": 23754 }, { "epoch": 3.8778417207460922, "grad_norm": 3.390690803527832, "learning_rate": 1.1116459153579237e-05, "loss": 0.5559, "step": 23755 }, { "epoch": 3.8780049793885962, "grad_norm": 3.534346103668213, "learning_rate": 1.111582201830997e-05, "loss": 0.4884, "step": 23756 }, { "epoch": 3.8781682380311007, "grad_norm": 3.1863534450531006, "learning_rate": 1.1115184878453974e-05, "loss": 0.4806, "step": 23757 }, { "epoch": 3.878331496673605, "grad_norm": 3.0374512672424316, "learning_rate": 1.1114547734013878e-05, "loss": 0.4151, "step": 23758 }, { "epoch": 3.8784947553161095, "grad_norm": 3.4357378482818604, "learning_rate": 1.1113910584992296e-05, "loss": 0.4429, "step": 23759 }, { "epoch": 3.878658013958614, "grad_norm": 3.188408136367798, "learning_rate": 1.1113273431391849e-05, "loss": 0.4112, "step": 23760 }, { "epoch": 3.8788212726011184, "grad_norm": 2.4513957500457764, "learning_rate": 1.1112636273215156e-05, "loss": 0.3757, "step": 23761 }, { "epoch": 3.878984531243623, "grad_norm": 2.825850248336792, "learning_rate": 1.1111999110464831e-05, "loss": 0.4829, "step": 23762 }, { "epoch": 3.879147789886127, "grad_norm": 2.969280242919922, "learning_rate": 1.1111361943143499e-05, "loss": 0.4917, "step": 23763 }, { "epoch": 3.8793110485286313, "grad_norm": 3.7095906734466553, "learning_rate": 1.111072477125378e-05, "loss": 0.4528, "step": 23764 }, { "epoch": 3.8794743071711357, "grad_norm": 3.023122787475586, "learning_rate": 1.111008759479829e-05, "loss": 0.4152, "step": 23765 }, { "epoch": 3.87963756581364, "grad_norm": 3.2559988498687744, "learning_rate": 1.1109450413779653e-05, "loss": 0.5257, "step": 23766 }, { "epoch": 3.8798008244561446, "grad_norm": 2.7772505283355713, "learning_rate": 1.110881322820048e-05, "loss": 0.4122, "step": 23767 }, { "epoch": 3.879964083098649, "grad_norm": 3.28725004196167, "learning_rate": 1.1108176038063391e-05, "loss": 0.5094, "step": 23768 }, { "epoch": 3.8801273417411535, "grad_norm": 3.156034231185913, "learning_rate": 1.1107538843371013e-05, "loss": 0.4068, "step": 23769 }, { "epoch": 3.880290600383658, "grad_norm": 3.323732614517212, "learning_rate": 1.110690164412596e-05, "loss": 0.522, "step": 23770 }, { "epoch": 3.8804538590261624, "grad_norm": 3.221214771270752, "learning_rate": 1.1106264440330854e-05, "loss": 0.4617, "step": 23771 }, { "epoch": 3.880617117668667, "grad_norm": 3.63688588142395, "learning_rate": 1.1105627231988312e-05, "loss": 0.4019, "step": 23772 }, { "epoch": 3.8807803763111712, "grad_norm": 3.073432683944702, "learning_rate": 1.1104990019100954e-05, "loss": 0.4603, "step": 23773 }, { "epoch": 3.8809436349536752, "grad_norm": 3.0247530937194824, "learning_rate": 1.1104352801671396e-05, "loss": 0.3926, "step": 23774 }, { "epoch": 3.8811068935961797, "grad_norm": 3.0143771171569824, "learning_rate": 1.1103715579702264e-05, "loss": 0.4571, "step": 23775 }, { "epoch": 3.881270152238684, "grad_norm": 3.3688337802886963, "learning_rate": 1.1103078353196175e-05, "loss": 0.5135, "step": 23776 }, { "epoch": 3.8814334108811885, "grad_norm": 4.6778082847595215, "learning_rate": 1.1102441122155745e-05, "loss": 0.5884, "step": 23777 }, { "epoch": 3.881596669523693, "grad_norm": 2.779057025909424, "learning_rate": 1.1101803886583598e-05, "loss": 0.4178, "step": 23778 }, { "epoch": 3.8817599281661974, "grad_norm": 3.8064098358154297, "learning_rate": 1.1101166646482347e-05, "loss": 0.4977, "step": 23779 }, { "epoch": 3.8819231868087014, "grad_norm": 3.407487630844116, "learning_rate": 1.1100529401854618e-05, "loss": 0.5084, "step": 23780 }, { "epoch": 3.882086445451206, "grad_norm": 4.364026069641113, "learning_rate": 1.1099892152703032e-05, "loss": 0.5872, "step": 23781 }, { "epoch": 3.8822497040937103, "grad_norm": 3.0719363689422607, "learning_rate": 1.10992548990302e-05, "loss": 0.4544, "step": 23782 }, { "epoch": 3.8824129627362147, "grad_norm": 2.855332851409912, "learning_rate": 1.1098617640838747e-05, "loss": 0.3944, "step": 23783 }, { "epoch": 3.882576221378719, "grad_norm": 3.6350998878479004, "learning_rate": 1.1097980378131293e-05, "loss": 0.5295, "step": 23784 }, { "epoch": 3.8827394800212236, "grad_norm": 3.1250412464141846, "learning_rate": 1.1097343110910452e-05, "loss": 0.4179, "step": 23785 }, { "epoch": 3.882902738663728, "grad_norm": 3.7627670764923096, "learning_rate": 1.1096705839178853e-05, "loss": 0.5393, "step": 23786 }, { "epoch": 3.8830659973062325, "grad_norm": 2.8760151863098145, "learning_rate": 1.109606856293911e-05, "loss": 0.4333, "step": 23787 }, { "epoch": 3.883229255948737, "grad_norm": 3.9355509281158447, "learning_rate": 1.109543128219384e-05, "loss": 0.5635, "step": 23788 }, { "epoch": 3.8833925145912414, "grad_norm": 3.0765795707702637, "learning_rate": 1.1094793996945668e-05, "loss": 0.4932, "step": 23789 }, { "epoch": 3.883555773233746, "grad_norm": 3.3916947841644287, "learning_rate": 1.109415670719721e-05, "loss": 0.485, "step": 23790 }, { "epoch": 3.88371903187625, "grad_norm": 3.3924951553344727, "learning_rate": 1.1093519412951088e-05, "loss": 0.5562, "step": 23791 }, { "epoch": 3.883882290518754, "grad_norm": 2.592620849609375, "learning_rate": 1.109288211420992e-05, "loss": 0.3964, "step": 23792 }, { "epoch": 3.8840455491612587, "grad_norm": 2.851308584213257, "learning_rate": 1.1092244810976327e-05, "loss": 0.425, "step": 23793 }, { "epoch": 3.884208807803763, "grad_norm": 3.2374379634857178, "learning_rate": 1.1091607503252928e-05, "loss": 0.4588, "step": 23794 }, { "epoch": 3.8843720664462675, "grad_norm": 3.3325014114379883, "learning_rate": 1.109097019104234e-05, "loss": 0.539, "step": 23795 }, { "epoch": 3.884535325088772, "grad_norm": 3.2234184741973877, "learning_rate": 1.1090332874347189e-05, "loss": 0.5324, "step": 23796 }, { "epoch": 3.8846985837312764, "grad_norm": 3.220505952835083, "learning_rate": 1.108969555317009e-05, "loss": 0.4943, "step": 23797 }, { "epoch": 3.8848618423737804, "grad_norm": 2.9145514965057373, "learning_rate": 1.1089058227513663e-05, "loss": 0.4699, "step": 23798 }, { "epoch": 3.885025101016285, "grad_norm": 3.3898839950561523, "learning_rate": 1.108842089738053e-05, "loss": 0.5283, "step": 23799 }, { "epoch": 3.8851883596587893, "grad_norm": 3.202408790588379, "learning_rate": 1.108778356277331e-05, "loss": 0.5046, "step": 23800 }, { "epoch": 3.8853516183012937, "grad_norm": 3.4959747791290283, "learning_rate": 1.1087146223694622e-05, "loss": 0.4853, "step": 23801 }, { "epoch": 3.885514876943798, "grad_norm": 3.451660394668579, "learning_rate": 1.1086508880147087e-05, "loss": 0.5125, "step": 23802 }, { "epoch": 3.8856781355863026, "grad_norm": 3.224167585372925, "learning_rate": 1.1085871532133324e-05, "loss": 0.3984, "step": 23803 }, { "epoch": 3.885841394228807, "grad_norm": 3.434246778488159, "learning_rate": 1.108523417965595e-05, "loss": 0.4889, "step": 23804 }, { "epoch": 3.8860046528713115, "grad_norm": 3.1665382385253906, "learning_rate": 1.1084596822717596e-05, "loss": 0.4872, "step": 23805 }, { "epoch": 3.886167911513816, "grad_norm": 3.2075185775756836, "learning_rate": 1.1083959461320864e-05, "loss": 0.506, "step": 23806 }, { "epoch": 3.8863311701563203, "grad_norm": 3.244100570678711, "learning_rate": 1.108332209546839e-05, "loss": 0.4611, "step": 23807 }, { "epoch": 3.886494428798825, "grad_norm": 2.5799803733825684, "learning_rate": 1.1082684725162786e-05, "loss": 0.4566, "step": 23808 }, { "epoch": 3.886657687441329, "grad_norm": 3.6426024436950684, "learning_rate": 1.1082047350406675e-05, "loss": 0.7451, "step": 23809 }, { "epoch": 3.886820946083833, "grad_norm": 3.123229503631592, "learning_rate": 1.1081409971202677e-05, "loss": 0.4513, "step": 23810 }, { "epoch": 3.8869842047263377, "grad_norm": 3.7266128063201904, "learning_rate": 1.108077258755341e-05, "loss": 0.4711, "step": 23811 }, { "epoch": 3.887147463368842, "grad_norm": 2.8338263034820557, "learning_rate": 1.1080135199461492e-05, "loss": 0.391, "step": 23812 }, { "epoch": 3.8873107220113465, "grad_norm": 3.2243475914001465, "learning_rate": 1.1079497806929549e-05, "loss": 0.4892, "step": 23813 }, { "epoch": 3.887473980653851, "grad_norm": 2.8934266567230225, "learning_rate": 1.1078860409960196e-05, "loss": 0.4065, "step": 23814 }, { "epoch": 3.8876372392963554, "grad_norm": 2.9346134662628174, "learning_rate": 1.1078223008556059e-05, "loss": 0.4161, "step": 23815 }, { "epoch": 3.8878004979388594, "grad_norm": 2.188798427581787, "learning_rate": 1.107758560271975e-05, "loss": 0.3517, "step": 23816 }, { "epoch": 3.887963756581364, "grad_norm": 3.8992855548858643, "learning_rate": 1.1076948192453899e-05, "loss": 0.5297, "step": 23817 }, { "epoch": 3.8881270152238683, "grad_norm": 2.7537357807159424, "learning_rate": 1.1076310777761114e-05, "loss": 0.3787, "step": 23818 }, { "epoch": 3.8882902738663727, "grad_norm": 3.143148899078369, "learning_rate": 1.1075673358644025e-05, "loss": 0.4533, "step": 23819 }, { "epoch": 3.888453532508877, "grad_norm": 3.504706621170044, "learning_rate": 1.1075035935105252e-05, "loss": 0.492, "step": 23820 }, { "epoch": 3.8886167911513816, "grad_norm": 3.294233560562134, "learning_rate": 1.1074398507147407e-05, "loss": 0.4646, "step": 23821 }, { "epoch": 3.888780049793886, "grad_norm": 3.8343546390533447, "learning_rate": 1.107376107477312e-05, "loss": 0.5397, "step": 23822 }, { "epoch": 3.8889433084363905, "grad_norm": 2.6845040321350098, "learning_rate": 1.1073123637985004e-05, "loss": 0.4078, "step": 23823 }, { "epoch": 3.889106567078895, "grad_norm": 3.0819690227508545, "learning_rate": 1.1072486196785679e-05, "loss": 0.425, "step": 23824 }, { "epoch": 3.8892698257213993, "grad_norm": 2.6974737644195557, "learning_rate": 1.1071848751177774e-05, "loss": 0.4394, "step": 23825 }, { "epoch": 3.889433084363904, "grad_norm": 3.198117256164551, "learning_rate": 1.1071211301163903e-05, "loss": 0.4555, "step": 23826 }, { "epoch": 3.8895963430064078, "grad_norm": 3.2353856563568115, "learning_rate": 1.1070573846746684e-05, "loss": 0.521, "step": 23827 }, { "epoch": 3.889759601648912, "grad_norm": 2.9566049575805664, "learning_rate": 1.1069936387928739e-05, "loss": 0.4166, "step": 23828 }, { "epoch": 3.8899228602914167, "grad_norm": 3.34177565574646, "learning_rate": 1.1069298924712691e-05, "loss": 0.4859, "step": 23829 }, { "epoch": 3.890086118933921, "grad_norm": 3.8554751873016357, "learning_rate": 1.106866145710116e-05, "loss": 0.4929, "step": 23830 }, { "epoch": 3.8902493775764255, "grad_norm": 3.7392308712005615, "learning_rate": 1.1068023985096766e-05, "loss": 0.4719, "step": 23831 }, { "epoch": 3.89041263621893, "grad_norm": 3.7278008460998535, "learning_rate": 1.1067386508702127e-05, "loss": 0.4464, "step": 23832 }, { "epoch": 3.890575894861434, "grad_norm": 2.9994356632232666, "learning_rate": 1.1066749027919865e-05, "loss": 0.3649, "step": 23833 }, { "epoch": 3.8907391535039384, "grad_norm": 3.0416417121887207, "learning_rate": 1.10661115427526e-05, "loss": 0.4683, "step": 23834 }, { "epoch": 3.890902412146443, "grad_norm": 3.620756149291992, "learning_rate": 1.1065474053202956e-05, "loss": 0.5124, "step": 23835 }, { "epoch": 3.8910656707889473, "grad_norm": 3.050950765609741, "learning_rate": 1.1064836559273546e-05, "loss": 0.4358, "step": 23836 }, { "epoch": 3.8912289294314517, "grad_norm": 3.8222014904022217, "learning_rate": 1.1064199060966998e-05, "loss": 0.4983, "step": 23837 }, { "epoch": 3.891392188073956, "grad_norm": 3.683100461959839, "learning_rate": 1.106356155828593e-05, "loss": 0.4866, "step": 23838 }, { "epoch": 3.8915554467164606, "grad_norm": 2.917807102203369, "learning_rate": 1.106292405123296e-05, "loss": 0.4056, "step": 23839 }, { "epoch": 3.891718705358965, "grad_norm": 2.8002266883850098, "learning_rate": 1.1062286539810712e-05, "loss": 0.4091, "step": 23840 }, { "epoch": 3.8918819640014695, "grad_norm": 3.09948992729187, "learning_rate": 1.1061649024021804e-05, "loss": 0.5066, "step": 23841 }, { "epoch": 3.892045222643974, "grad_norm": 2.94427752494812, "learning_rate": 1.106101150386886e-05, "loss": 0.4542, "step": 23842 }, { "epoch": 3.8922084812864783, "grad_norm": 3.2410593032836914, "learning_rate": 1.1060373979354496e-05, "loss": 0.432, "step": 23843 }, { "epoch": 3.8923717399289823, "grad_norm": 3.201347827911377, "learning_rate": 1.1059736450481338e-05, "loss": 0.4704, "step": 23844 }, { "epoch": 3.8925349985714868, "grad_norm": 3.2237930297851562, "learning_rate": 1.1059098917251997e-05, "loss": 0.4768, "step": 23845 }, { "epoch": 3.892698257213991, "grad_norm": 2.842749834060669, "learning_rate": 1.1058461379669106e-05, "loss": 0.385, "step": 23846 }, { "epoch": 3.8928615158564956, "grad_norm": 3.2781760692596436, "learning_rate": 1.1057823837735277e-05, "loss": 0.4555, "step": 23847 }, { "epoch": 3.893024774499, "grad_norm": 3.246267557144165, "learning_rate": 1.1057186291453137e-05, "loss": 0.4966, "step": 23848 }, { "epoch": 3.8931880331415045, "grad_norm": 3.0551042556762695, "learning_rate": 1.1056548740825303e-05, "loss": 0.4971, "step": 23849 }, { "epoch": 3.893351291784009, "grad_norm": 3.247605800628662, "learning_rate": 1.1055911185854396e-05, "loss": 0.4299, "step": 23850 }, { "epoch": 3.893514550426513, "grad_norm": 2.8778836727142334, "learning_rate": 1.1055273626543032e-05, "loss": 0.4221, "step": 23851 }, { "epoch": 3.8936778090690174, "grad_norm": 2.681636333465576, "learning_rate": 1.105463606289384e-05, "loss": 0.3438, "step": 23852 }, { "epoch": 3.893841067711522, "grad_norm": 2.8817241191864014, "learning_rate": 1.1053998494909437e-05, "loss": 0.4197, "step": 23853 }, { "epoch": 3.8940043263540263, "grad_norm": 2.541916847229004, "learning_rate": 1.1053360922592447e-05, "loss": 0.3688, "step": 23854 }, { "epoch": 3.8941675849965307, "grad_norm": 3.3747875690460205, "learning_rate": 1.1052723345945484e-05, "loss": 0.4822, "step": 23855 }, { "epoch": 3.894330843639035, "grad_norm": 2.9093003273010254, "learning_rate": 1.1052085764971172e-05, "loss": 0.4323, "step": 23856 }, { "epoch": 3.8944941022815396, "grad_norm": 2.475705623626709, "learning_rate": 1.1051448179672134e-05, "loss": 0.3869, "step": 23857 }, { "epoch": 3.894657360924044, "grad_norm": 3.166743516921997, "learning_rate": 1.1050810590050993e-05, "loss": 0.4062, "step": 23858 }, { "epoch": 3.8948206195665485, "grad_norm": 3.2103970050811768, "learning_rate": 1.1050172996110366e-05, "loss": 0.4337, "step": 23859 }, { "epoch": 3.894983878209053, "grad_norm": 3.0607903003692627, "learning_rate": 1.1049535397852873e-05, "loss": 0.4224, "step": 23860 }, { "epoch": 3.8951471368515573, "grad_norm": 3.1729233264923096, "learning_rate": 1.1048897795281135e-05, "loss": 0.5406, "step": 23861 }, { "epoch": 3.8953103954940613, "grad_norm": 3.226773738861084, "learning_rate": 1.1048260188397772e-05, "loss": 0.4501, "step": 23862 }, { "epoch": 3.8954736541365658, "grad_norm": 2.7287161350250244, "learning_rate": 1.104762257720541e-05, "loss": 0.4235, "step": 23863 }, { "epoch": 3.89563691277907, "grad_norm": 3.4061267375946045, "learning_rate": 1.104698496170667e-05, "loss": 0.4995, "step": 23864 }, { "epoch": 3.8958001714215746, "grad_norm": 3.8084843158721924, "learning_rate": 1.1046347341904166e-05, "loss": 0.5445, "step": 23865 }, { "epoch": 3.895963430064079, "grad_norm": 3.569912910461426, "learning_rate": 1.1045709717800525e-05, "loss": 0.507, "step": 23866 }, { "epoch": 3.8961266887065835, "grad_norm": 3.7609236240386963, "learning_rate": 1.1045072089398365e-05, "loss": 0.5808, "step": 23867 }, { "epoch": 3.896289947349088, "grad_norm": 3.001481056213379, "learning_rate": 1.1044434456700307e-05, "loss": 0.4635, "step": 23868 }, { "epoch": 3.896453205991592, "grad_norm": 3.854498863220215, "learning_rate": 1.1043796819708979e-05, "loss": 0.4279, "step": 23869 }, { "epoch": 3.8966164646340964, "grad_norm": 4.286695957183838, "learning_rate": 1.1043159178426991e-05, "loss": 0.5727, "step": 23870 }, { "epoch": 3.896779723276601, "grad_norm": 3.0344555377960205, "learning_rate": 1.104252153285697e-05, "loss": 0.4368, "step": 23871 }, { "epoch": 3.8969429819191053, "grad_norm": 4.062828063964844, "learning_rate": 1.1041883883001541e-05, "loss": 0.5752, "step": 23872 }, { "epoch": 3.8971062405616097, "grad_norm": 2.7083568572998047, "learning_rate": 1.1041246228863314e-05, "loss": 0.3564, "step": 23873 }, { "epoch": 3.897269499204114, "grad_norm": 2.9489734172821045, "learning_rate": 1.1040608570444922e-05, "loss": 0.4802, "step": 23874 }, { "epoch": 3.8974327578466186, "grad_norm": 3.328824043273926, "learning_rate": 1.1039970907748981e-05, "loss": 0.4783, "step": 23875 }, { "epoch": 3.897596016489123, "grad_norm": 3.5498454570770264, "learning_rate": 1.1039333240778109e-05, "loss": 0.5381, "step": 23876 }, { "epoch": 3.8977592751316275, "grad_norm": 2.838247537612915, "learning_rate": 1.1038695569534935e-05, "loss": 0.3908, "step": 23877 }, { "epoch": 3.897922533774132, "grad_norm": 3.589426040649414, "learning_rate": 1.1038057894022072e-05, "loss": 0.4749, "step": 23878 }, { "epoch": 3.8980857924166363, "grad_norm": 3.146850824356079, "learning_rate": 1.1037420214242144e-05, "loss": 0.4462, "step": 23879 }, { "epoch": 3.8982490510591403, "grad_norm": 3.0156373977661133, "learning_rate": 1.1036782530197776e-05, "loss": 0.4922, "step": 23880 }, { "epoch": 3.8984123097016448, "grad_norm": 3.1957592964172363, "learning_rate": 1.1036144841891587e-05, "loss": 0.3996, "step": 23881 }, { "epoch": 3.898575568344149, "grad_norm": 3.691094398498535, "learning_rate": 1.1035507149326197e-05, "loss": 0.5184, "step": 23882 }, { "epoch": 3.8987388269866536, "grad_norm": 3.256836175918579, "learning_rate": 1.1034869452504227e-05, "loss": 0.4964, "step": 23883 }, { "epoch": 3.898902085629158, "grad_norm": 2.9780426025390625, "learning_rate": 1.10342317514283e-05, "loss": 0.4803, "step": 23884 }, { "epoch": 3.8990653442716625, "grad_norm": 2.5928940773010254, "learning_rate": 1.1033594046101038e-05, "loss": 0.3586, "step": 23885 }, { "epoch": 3.8992286029141665, "grad_norm": 3.792127847671509, "learning_rate": 1.1032956336525059e-05, "loss": 0.5627, "step": 23886 }, { "epoch": 3.899391861556671, "grad_norm": 3.581130266189575, "learning_rate": 1.1032318622702987e-05, "loss": 0.4647, "step": 23887 }, { "epoch": 3.8995551201991754, "grad_norm": 3.391331434249878, "learning_rate": 1.1031680904637442e-05, "loss": 0.5002, "step": 23888 }, { "epoch": 3.89971837884168, "grad_norm": 3.67703914642334, "learning_rate": 1.103104318233105e-05, "loss": 0.5024, "step": 23889 }, { "epoch": 3.8998816374841843, "grad_norm": 2.3362762928009033, "learning_rate": 1.1030405455786425e-05, "loss": 0.3428, "step": 23890 }, { "epoch": 3.9000448961266887, "grad_norm": 3.4052646160125732, "learning_rate": 1.1029767725006195e-05, "loss": 0.4674, "step": 23891 }, { "epoch": 3.900208154769193, "grad_norm": 3.0906057357788086, "learning_rate": 1.1029129989992978e-05, "loss": 0.4268, "step": 23892 }, { "epoch": 3.9003714134116976, "grad_norm": 3.184147357940674, "learning_rate": 1.1028492250749399e-05, "loss": 0.4773, "step": 23893 }, { "epoch": 3.900534672054202, "grad_norm": 3.8793740272521973, "learning_rate": 1.1027854507278073e-05, "loss": 0.509, "step": 23894 }, { "epoch": 3.9006979306967065, "grad_norm": 3.5952060222625732, "learning_rate": 1.1027216759581622e-05, "loss": 0.4531, "step": 23895 }, { "epoch": 3.900861189339211, "grad_norm": 3.521538496017456, "learning_rate": 1.1026579007662676e-05, "loss": 0.5041, "step": 23896 }, { "epoch": 3.901024447981715, "grad_norm": 3.382737398147583, "learning_rate": 1.1025941251523848e-05, "loss": 0.4284, "step": 23897 }, { "epoch": 3.9011877066242193, "grad_norm": 3.847463846206665, "learning_rate": 1.102530349116777e-05, "loss": 0.5882, "step": 23898 }, { "epoch": 3.9013509652667238, "grad_norm": 2.99959397315979, "learning_rate": 1.102466572659705e-05, "loss": 0.4061, "step": 23899 }, { "epoch": 3.901514223909228, "grad_norm": 3.0056700706481934, "learning_rate": 1.1024027957814313e-05, "loss": 0.3957, "step": 23900 }, { "epoch": 3.9016774825517326, "grad_norm": 3.1510825157165527, "learning_rate": 1.1023390184822189e-05, "loss": 0.5107, "step": 23901 }, { "epoch": 3.901840741194237, "grad_norm": 3.368908405303955, "learning_rate": 1.1022752407623294e-05, "loss": 0.4982, "step": 23902 }, { "epoch": 3.9020039998367415, "grad_norm": 3.563737392425537, "learning_rate": 1.1022114626220253e-05, "loss": 0.4905, "step": 23903 }, { "epoch": 3.9021672584792455, "grad_norm": 3.1081457138061523, "learning_rate": 1.1021476840615682e-05, "loss": 0.4609, "step": 23904 }, { "epoch": 3.90233051712175, "grad_norm": 3.127584934234619, "learning_rate": 1.1020839050812202e-05, "loss": 0.4886, "step": 23905 }, { "epoch": 3.9024937757642544, "grad_norm": 2.9020447731018066, "learning_rate": 1.1020201256812439e-05, "loss": 0.4895, "step": 23906 }, { "epoch": 3.902657034406759, "grad_norm": 2.8623580932617188, "learning_rate": 1.1019563458619016e-05, "loss": 0.4226, "step": 23907 }, { "epoch": 3.9028202930492633, "grad_norm": 3.3402481079101562, "learning_rate": 1.1018925656234553e-05, "loss": 0.5537, "step": 23908 }, { "epoch": 3.9029835516917677, "grad_norm": 3.5385308265686035, "learning_rate": 1.1018287849661674e-05, "loss": 0.5501, "step": 23909 }, { "epoch": 3.903146810334272, "grad_norm": 3.937657356262207, "learning_rate": 1.1017650038902995e-05, "loss": 0.5589, "step": 23910 }, { "epoch": 3.9033100689767766, "grad_norm": 3.510138511657715, "learning_rate": 1.1017012223961138e-05, "loss": 0.4829, "step": 23911 }, { "epoch": 3.903473327619281, "grad_norm": 3.0788838863372803, "learning_rate": 1.101637440483873e-05, "loss": 0.4479, "step": 23912 }, { "epoch": 3.9036365862617854, "grad_norm": 3.0611977577209473, "learning_rate": 1.1015736581538392e-05, "loss": 0.4723, "step": 23913 }, { "epoch": 3.90379984490429, "grad_norm": 3.63082218170166, "learning_rate": 1.1015098754062743e-05, "loss": 0.4789, "step": 23914 }, { "epoch": 3.903963103546794, "grad_norm": 3.8471665382385254, "learning_rate": 1.1014460922414407e-05, "loss": 0.4992, "step": 23915 }, { "epoch": 3.9041263621892983, "grad_norm": 3.250945806503296, "learning_rate": 1.1013823086596004e-05, "loss": 0.5299, "step": 23916 }, { "epoch": 3.9042896208318028, "grad_norm": 3.1389052867889404, "learning_rate": 1.1013185246610158e-05, "loss": 0.4927, "step": 23917 }, { "epoch": 3.904452879474307, "grad_norm": 3.2782273292541504, "learning_rate": 1.1012547402459492e-05, "loss": 0.4772, "step": 23918 }, { "epoch": 3.9046161381168116, "grad_norm": 3.255655288696289, "learning_rate": 1.1011909554146625e-05, "loss": 0.4958, "step": 23919 }, { "epoch": 3.904779396759316, "grad_norm": 3.0340588092803955, "learning_rate": 1.1011271701674177e-05, "loss": 0.4643, "step": 23920 }, { "epoch": 3.90494265540182, "grad_norm": 3.1764190196990967, "learning_rate": 1.1010633845044776e-05, "loss": 0.4832, "step": 23921 }, { "epoch": 3.9051059140443245, "grad_norm": 2.732461929321289, "learning_rate": 1.1009995984261039e-05, "loss": 0.4108, "step": 23922 }, { "epoch": 3.905269172686829, "grad_norm": 3.219465494155884, "learning_rate": 1.100935811932559e-05, "loss": 0.5074, "step": 23923 }, { "epoch": 3.9054324313293334, "grad_norm": 3.1742238998413086, "learning_rate": 1.1008720250241054e-05, "loss": 0.4326, "step": 23924 }, { "epoch": 3.905595689971838, "grad_norm": 3.3915610313415527, "learning_rate": 1.1008082377010045e-05, "loss": 0.4594, "step": 23925 }, { "epoch": 3.9057589486143423, "grad_norm": 3.2179598808288574, "learning_rate": 1.1007444499635194e-05, "loss": 0.4919, "step": 23926 }, { "epoch": 3.9059222072568467, "grad_norm": 3.572054147720337, "learning_rate": 1.1006806618119118e-05, "loss": 0.5065, "step": 23927 }, { "epoch": 3.906085465899351, "grad_norm": 3.693601131439209, "learning_rate": 1.100616873246444e-05, "loss": 0.5287, "step": 23928 }, { "epoch": 3.9062487245418556, "grad_norm": 3.8097503185272217, "learning_rate": 1.1005530842673782e-05, "loss": 0.5568, "step": 23929 }, { "epoch": 3.90641198318436, "grad_norm": 3.7913153171539307, "learning_rate": 1.1004892948749766e-05, "loss": 0.5185, "step": 23930 }, { "epoch": 3.9065752418268644, "grad_norm": 2.843017101287842, "learning_rate": 1.1004255050695014e-05, "loss": 0.4873, "step": 23931 }, { "epoch": 3.9067385004693684, "grad_norm": 2.910661220550537, "learning_rate": 1.1003617148512149e-05, "loss": 0.4692, "step": 23932 }, { "epoch": 3.906901759111873, "grad_norm": 2.9040234088897705, "learning_rate": 1.1002979242203795e-05, "loss": 0.4493, "step": 23933 }, { "epoch": 3.9070650177543773, "grad_norm": 4.032688617706299, "learning_rate": 1.100234133177257e-05, "loss": 0.966, "step": 23934 }, { "epoch": 3.9072282763968817, "grad_norm": 3.3154313564300537, "learning_rate": 1.10017034172211e-05, "loss": 0.504, "step": 23935 }, { "epoch": 3.907391535039386, "grad_norm": 3.537029504776001, "learning_rate": 1.1001065498552003e-05, "loss": 0.5639, "step": 23936 }, { "epoch": 3.9075547936818906, "grad_norm": 4.012239933013916, "learning_rate": 1.1000427575767908e-05, "loss": 0.5451, "step": 23937 }, { "epoch": 3.907718052324395, "grad_norm": 2.6204371452331543, "learning_rate": 1.0999789648871428e-05, "loss": 0.3905, "step": 23938 }, { "epoch": 3.907881310966899, "grad_norm": 2.944683313369751, "learning_rate": 1.099915171786519e-05, "loss": 0.4629, "step": 23939 }, { "epoch": 3.9080445696094035, "grad_norm": 3.8201699256896973, "learning_rate": 1.099851378275182e-05, "loss": 0.524, "step": 23940 }, { "epoch": 3.908207828251908, "grad_norm": 3.080784797668457, "learning_rate": 1.0997875843533936e-05, "loss": 0.4374, "step": 23941 }, { "epoch": 3.9083710868944124, "grad_norm": 3.424571990966797, "learning_rate": 1.0997237900214163e-05, "loss": 0.4371, "step": 23942 }, { "epoch": 3.908534345536917, "grad_norm": 2.713883638381958, "learning_rate": 1.0996599952795119e-05, "loss": 0.4394, "step": 23943 }, { "epoch": 3.9086976041794212, "grad_norm": 2.821453094482422, "learning_rate": 1.0995962001279429e-05, "loss": 0.4007, "step": 23944 }, { "epoch": 3.9088608628219257, "grad_norm": 2.989652156829834, "learning_rate": 1.0995324045669713e-05, "loss": 0.4522, "step": 23945 }, { "epoch": 3.90902412146443, "grad_norm": 3.2920916080474854, "learning_rate": 1.09946860859686e-05, "loss": 0.4461, "step": 23946 }, { "epoch": 3.9091873801069346, "grad_norm": 3.4418766498565674, "learning_rate": 1.0994048122178707e-05, "loss": 0.4754, "step": 23947 }, { "epoch": 3.909350638749439, "grad_norm": 2.509833335876465, "learning_rate": 1.0993410154302657e-05, "loss": 0.4452, "step": 23948 }, { "epoch": 3.9095138973919434, "grad_norm": 3.5577688217163086, "learning_rate": 1.0992772182343074e-05, "loss": 0.5445, "step": 23949 }, { "epoch": 3.9096771560344474, "grad_norm": 3.5720198154449463, "learning_rate": 1.0992134206302576e-05, "loss": 0.5628, "step": 23950 }, { "epoch": 3.909840414676952, "grad_norm": 3.2277116775512695, "learning_rate": 1.0991496226183791e-05, "loss": 0.5274, "step": 23951 }, { "epoch": 3.9100036733194563, "grad_norm": 3.445361852645874, "learning_rate": 1.0990858241989341e-05, "loss": 0.5894, "step": 23952 }, { "epoch": 3.9101669319619607, "grad_norm": 3.6742238998413086, "learning_rate": 1.0990220253721846e-05, "loss": 0.452, "step": 23953 }, { "epoch": 3.910330190604465, "grad_norm": 3.386247396469116, "learning_rate": 1.0989582261383928e-05, "loss": 0.5574, "step": 23954 }, { "epoch": 3.9104934492469696, "grad_norm": 3.518251895904541, "learning_rate": 1.0988944264978213e-05, "loss": 0.431, "step": 23955 }, { "epoch": 3.910656707889474, "grad_norm": 3.33937931060791, "learning_rate": 1.0988306264507318e-05, "loss": 0.4375, "step": 23956 }, { "epoch": 3.910819966531978, "grad_norm": 3.034923791885376, "learning_rate": 1.0987668259973874e-05, "loss": 0.4003, "step": 23957 }, { "epoch": 3.9109832251744825, "grad_norm": 2.644838809967041, "learning_rate": 1.0987030251380495e-05, "loss": 0.3983, "step": 23958 }, { "epoch": 3.911146483816987, "grad_norm": 2.9177920818328857, "learning_rate": 1.098639223872981e-05, "loss": 0.4164, "step": 23959 }, { "epoch": 3.9113097424594914, "grad_norm": 3.3554952144622803, "learning_rate": 1.0985754222024437e-05, "loss": 0.5103, "step": 23960 }, { "epoch": 3.911473001101996, "grad_norm": 2.9397902488708496, "learning_rate": 1.0985116201266998e-05, "loss": 0.4367, "step": 23961 }, { "epoch": 3.9116362597445002, "grad_norm": 3.057516574859619, "learning_rate": 1.0984478176460125e-05, "loss": 0.3873, "step": 23962 }, { "epoch": 3.9117995183870047, "grad_norm": 3.5474116802215576, "learning_rate": 1.098384014760643e-05, "loss": 0.5327, "step": 23963 }, { "epoch": 3.911962777029509, "grad_norm": 3.4662890434265137, "learning_rate": 1.098320211470854e-05, "loss": 0.4652, "step": 23964 }, { "epoch": 3.9121260356720136, "grad_norm": 3.3844051361083984, "learning_rate": 1.0982564077769079e-05, "loss": 0.5672, "step": 23965 }, { "epoch": 3.912289294314518, "grad_norm": 3.275455951690674, "learning_rate": 1.0981926036790664e-05, "loss": 0.4512, "step": 23966 }, { "epoch": 3.9124525529570224, "grad_norm": 3.0355005264282227, "learning_rate": 1.0981287991775927e-05, "loss": 0.4691, "step": 23967 }, { "epoch": 3.9126158115995264, "grad_norm": 3.4883463382720947, "learning_rate": 1.0980649942727482e-05, "loss": 0.4757, "step": 23968 }, { "epoch": 3.912779070242031, "grad_norm": 3.3808999061584473, "learning_rate": 1.0980011889647954e-05, "loss": 0.5678, "step": 23969 }, { "epoch": 3.9129423288845353, "grad_norm": 3.3053951263427734, "learning_rate": 1.097937383253997e-05, "loss": 0.4344, "step": 23970 }, { "epoch": 3.9131055875270397, "grad_norm": 3.593155860900879, "learning_rate": 1.097873577140615e-05, "loss": 0.488, "step": 23971 }, { "epoch": 3.913268846169544, "grad_norm": 3.4897685050964355, "learning_rate": 1.0978097706249118e-05, "loss": 0.475, "step": 23972 }, { "epoch": 3.9134321048120486, "grad_norm": 3.6417226791381836, "learning_rate": 1.0977459637071493e-05, "loss": 0.4911, "step": 23973 }, { "epoch": 3.9135953634545526, "grad_norm": 2.8318846225738525, "learning_rate": 1.0976821563875902e-05, "loss": 0.4263, "step": 23974 }, { "epoch": 3.913758622097057, "grad_norm": 3.190255880355835, "learning_rate": 1.0976183486664964e-05, "loss": 0.4468, "step": 23975 }, { "epoch": 3.9139218807395615, "grad_norm": 3.154503107070923, "learning_rate": 1.0975545405441307e-05, "loss": 0.4671, "step": 23976 }, { "epoch": 3.914085139382066, "grad_norm": 3.4881350994110107, "learning_rate": 1.0974907320207551e-05, "loss": 0.4503, "step": 23977 }, { "epoch": 3.9142483980245704, "grad_norm": 3.1722311973571777, "learning_rate": 1.0974269230966316e-05, "loss": 0.4162, "step": 23978 }, { "epoch": 3.914411656667075, "grad_norm": 3.2862274646759033, "learning_rate": 1.0973631137720233e-05, "loss": 0.4776, "step": 23979 }, { "epoch": 3.9145749153095792, "grad_norm": 2.761263847351074, "learning_rate": 1.0972993040471918e-05, "loss": 0.471, "step": 23980 }, { "epoch": 3.9147381739520837, "grad_norm": 3.2361655235290527, "learning_rate": 1.0972354939223997e-05, "loss": 0.5001, "step": 23981 }, { "epoch": 3.914901432594588, "grad_norm": 2.6156563758850098, "learning_rate": 1.097171683397909e-05, "loss": 0.3866, "step": 23982 }, { "epoch": 3.9150646912370926, "grad_norm": 2.7354135513305664, "learning_rate": 1.0971078724739822e-05, "loss": 0.4704, "step": 23983 }, { "epoch": 3.915227949879597, "grad_norm": 3.5751192569732666, "learning_rate": 1.0970440611508814e-05, "loss": 0.5197, "step": 23984 }, { "epoch": 3.915391208522101, "grad_norm": 3.125347375869751, "learning_rate": 1.0969802494288695e-05, "loss": 0.3913, "step": 23985 }, { "epoch": 3.9155544671646054, "grad_norm": 3.260697364807129, "learning_rate": 1.0969164373082084e-05, "loss": 0.5176, "step": 23986 }, { "epoch": 3.91571772580711, "grad_norm": 2.8350536823272705, "learning_rate": 1.0968526247891605e-05, "loss": 0.4742, "step": 23987 }, { "epoch": 3.9158809844496143, "grad_norm": 3.489203691482544, "learning_rate": 1.0967888118719875e-05, "loss": 0.4788, "step": 23988 }, { "epoch": 3.9160442430921187, "grad_norm": 3.1660101413726807, "learning_rate": 1.0967249985569526e-05, "loss": 0.4589, "step": 23989 }, { "epoch": 3.916207501734623, "grad_norm": 3.084162712097168, "learning_rate": 1.0966611848443175e-05, "loss": 0.478, "step": 23990 }, { "epoch": 3.9163707603771276, "grad_norm": 3.4646453857421875, "learning_rate": 1.0965973707343454e-05, "loss": 0.5583, "step": 23991 }, { "epoch": 3.9165340190196316, "grad_norm": 2.774221658706665, "learning_rate": 1.0965335562272977e-05, "loss": 0.4782, "step": 23992 }, { "epoch": 3.916697277662136, "grad_norm": 4.07681941986084, "learning_rate": 1.0964697413234366e-05, "loss": 0.5853, "step": 23993 }, { "epoch": 3.9168605363046405, "grad_norm": 3.4169886112213135, "learning_rate": 1.0964059260230251e-05, "loss": 0.4309, "step": 23994 }, { "epoch": 3.917023794947145, "grad_norm": 3.714607000350952, "learning_rate": 1.0963421103263252e-05, "loss": 0.5005, "step": 23995 }, { "epoch": 3.9171870535896494, "grad_norm": 3.2704381942749023, "learning_rate": 1.0962782942335994e-05, "loss": 0.4844, "step": 23996 }, { "epoch": 3.917350312232154, "grad_norm": 3.535738945007324, "learning_rate": 1.0962144777451098e-05, "loss": 0.4898, "step": 23997 }, { "epoch": 3.9175135708746582, "grad_norm": 3.8007588386535645, "learning_rate": 1.0961506608611187e-05, "loss": 1.0392, "step": 23998 }, { "epoch": 3.9176768295171627, "grad_norm": 3.359933614730835, "learning_rate": 1.0960868435818888e-05, "loss": 0.4947, "step": 23999 }, { "epoch": 3.917840088159667, "grad_norm": 3.4356191158294678, "learning_rate": 1.0960230259076819e-05, "loss": 0.4267, "step": 24000 }, { "epoch": 3.9180033468021715, "grad_norm": 3.234286069869995, "learning_rate": 1.095959207838761e-05, "loss": 0.4784, "step": 24001 }, { "epoch": 3.918166605444676, "grad_norm": 3.1356663703918457, "learning_rate": 1.0958953893753875e-05, "loss": 0.5101, "step": 24002 }, { "epoch": 3.91832986408718, "grad_norm": 2.8325140476226807, "learning_rate": 1.0958315705178245e-05, "loss": 0.4302, "step": 24003 }, { "epoch": 3.9184931227296844, "grad_norm": 3.156710147857666, "learning_rate": 1.0957677512663341e-05, "loss": 0.4295, "step": 24004 }, { "epoch": 3.918656381372189, "grad_norm": 3.3416929244995117, "learning_rate": 1.0957039316211786e-05, "loss": 0.4806, "step": 24005 }, { "epoch": 3.9188196400146933, "grad_norm": 3.6382791996002197, "learning_rate": 1.0956401115826206e-05, "loss": 0.5228, "step": 24006 }, { "epoch": 3.9189828986571977, "grad_norm": 3.6939032077789307, "learning_rate": 1.0955762911509218e-05, "loss": 0.5511, "step": 24007 }, { "epoch": 3.919146157299702, "grad_norm": 2.8504865169525146, "learning_rate": 1.0955124703263453e-05, "loss": 0.4256, "step": 24008 }, { "epoch": 3.919309415942206, "grad_norm": 3.1255099773406982, "learning_rate": 1.0954486491091529e-05, "loss": 0.4358, "step": 24009 }, { "epoch": 3.9194726745847106, "grad_norm": 3.2192671298980713, "learning_rate": 1.0953848274996073e-05, "loss": 0.5761, "step": 24010 }, { "epoch": 3.919635933227215, "grad_norm": 2.9331188201904297, "learning_rate": 1.0953210054979705e-05, "loss": 0.4403, "step": 24011 }, { "epoch": 3.9197991918697195, "grad_norm": 3.4239137172698975, "learning_rate": 1.0952571831045051e-05, "loss": 0.4515, "step": 24012 }, { "epoch": 3.919962450512224, "grad_norm": 2.7057178020477295, "learning_rate": 1.0951933603194735e-05, "loss": 0.3658, "step": 24013 }, { "epoch": 3.9201257091547284, "grad_norm": 3.585960626602173, "learning_rate": 1.0951295371431379e-05, "loss": 0.5949, "step": 24014 }, { "epoch": 3.920288967797233, "grad_norm": 2.9950125217437744, "learning_rate": 1.0950657135757607e-05, "loss": 0.4531, "step": 24015 }, { "epoch": 3.9204522264397372, "grad_norm": 3.404322862625122, "learning_rate": 1.0950018896176042e-05, "loss": 0.4535, "step": 24016 }, { "epoch": 3.9206154850822417, "grad_norm": 3.501415729522705, "learning_rate": 1.0949380652689307e-05, "loss": 0.4993, "step": 24017 }, { "epoch": 3.920778743724746, "grad_norm": 3.695722818374634, "learning_rate": 1.094874240530003e-05, "loss": 0.5155, "step": 24018 }, { "epoch": 3.9209420023672505, "grad_norm": 3.451991558074951, "learning_rate": 1.0948104154010827e-05, "loss": 0.505, "step": 24019 }, { "epoch": 3.9211052610097545, "grad_norm": 2.3249895572662354, "learning_rate": 1.094746589882433e-05, "loss": 0.4135, "step": 24020 }, { "epoch": 3.921268519652259, "grad_norm": 3.7241363525390625, "learning_rate": 1.0946827639743155e-05, "loss": 0.4938, "step": 24021 }, { "epoch": 3.9214317782947634, "grad_norm": 3.214449167251587, "learning_rate": 1.0946189376769932e-05, "loss": 0.5756, "step": 24022 }, { "epoch": 3.921595036937268, "grad_norm": 2.9335007667541504, "learning_rate": 1.094555110990728e-05, "loss": 0.4262, "step": 24023 }, { "epoch": 3.9217582955797723, "grad_norm": 3.1852264404296875, "learning_rate": 1.0944912839157825e-05, "loss": 0.4667, "step": 24024 }, { "epoch": 3.9219215542222767, "grad_norm": 2.965033531188965, "learning_rate": 1.0944274564524188e-05, "loss": 0.4327, "step": 24025 }, { "epoch": 3.922084812864781, "grad_norm": 3.6512601375579834, "learning_rate": 1.0943636286008999e-05, "loss": 0.4443, "step": 24026 }, { "epoch": 3.922248071507285, "grad_norm": 3.112938642501831, "learning_rate": 1.0942998003614874e-05, "loss": 0.4347, "step": 24027 }, { "epoch": 3.9224113301497896, "grad_norm": 2.777552843093872, "learning_rate": 1.0942359717344439e-05, "loss": 0.4384, "step": 24028 }, { "epoch": 3.922574588792294, "grad_norm": 3.6068568229675293, "learning_rate": 1.0941721427200323e-05, "loss": 0.6151, "step": 24029 }, { "epoch": 3.9227378474347985, "grad_norm": 3.1769447326660156, "learning_rate": 1.0941083133185146e-05, "loss": 0.4581, "step": 24030 }, { "epoch": 3.922901106077303, "grad_norm": 2.21886944770813, "learning_rate": 1.0940444835301529e-05, "loss": 0.3609, "step": 24031 }, { "epoch": 3.9230643647198074, "grad_norm": 3.287357807159424, "learning_rate": 1.09398065335521e-05, "loss": 0.5008, "step": 24032 }, { "epoch": 3.923227623362312, "grad_norm": 2.9525339603424072, "learning_rate": 1.093916822793948e-05, "loss": 0.4343, "step": 24033 }, { "epoch": 3.9233908820048162, "grad_norm": 2.754300355911255, "learning_rate": 1.0938529918466291e-05, "loss": 0.4577, "step": 24034 }, { "epoch": 3.9235541406473207, "grad_norm": 3.275273084640503, "learning_rate": 1.0937891605135167e-05, "loss": 0.4755, "step": 24035 }, { "epoch": 3.923717399289825, "grad_norm": 3.3229517936706543, "learning_rate": 1.0937253287948721e-05, "loss": 0.4008, "step": 24036 }, { "epoch": 3.9238806579323295, "grad_norm": 3.307333469390869, "learning_rate": 1.093661496690958e-05, "loss": 0.4252, "step": 24037 }, { "epoch": 3.9240439165748335, "grad_norm": 2.551093101501465, "learning_rate": 1.0935976642020367e-05, "loss": 0.3621, "step": 24038 }, { "epoch": 3.924207175217338, "grad_norm": 2.9616949558258057, "learning_rate": 1.093533831328371e-05, "loss": 0.4843, "step": 24039 }, { "epoch": 3.9243704338598424, "grad_norm": 3.674412488937378, "learning_rate": 1.0934699980702232e-05, "loss": 0.5609, "step": 24040 }, { "epoch": 3.924533692502347, "grad_norm": 3.109938144683838, "learning_rate": 1.0934061644278553e-05, "loss": 0.4599, "step": 24041 }, { "epoch": 3.9246969511448513, "grad_norm": 3.2317168712615967, "learning_rate": 1.0933423304015301e-05, "loss": 0.456, "step": 24042 }, { "epoch": 3.9248602097873557, "grad_norm": 3.197892427444458, "learning_rate": 1.0932784959915096e-05, "loss": 0.4654, "step": 24043 }, { "epoch": 3.92502346842986, "grad_norm": 3.6546030044555664, "learning_rate": 1.0932146611980563e-05, "loss": 0.5299, "step": 24044 }, { "epoch": 3.925186727072364, "grad_norm": 3.4797558784484863, "learning_rate": 1.0931508260214333e-05, "loss": 0.4871, "step": 24045 }, { "epoch": 3.9253499857148686, "grad_norm": 3.253873109817505, "learning_rate": 1.0930869904619018e-05, "loss": 0.4011, "step": 24046 }, { "epoch": 3.925513244357373, "grad_norm": 3.748584508895874, "learning_rate": 1.0930231545197251e-05, "loss": 0.5125, "step": 24047 }, { "epoch": 3.9256765029998775, "grad_norm": 3.4448440074920654, "learning_rate": 1.0929593181951654e-05, "loss": 0.4636, "step": 24048 }, { "epoch": 3.925839761642382, "grad_norm": 3.666524648666382, "learning_rate": 1.0928954814884851e-05, "loss": 0.5367, "step": 24049 }, { "epoch": 3.9260030202848863, "grad_norm": 3.4128963947296143, "learning_rate": 1.0928316443999462e-05, "loss": 0.4699, "step": 24050 }, { "epoch": 3.926166278927391, "grad_norm": 3.046236515045166, "learning_rate": 1.0927678069298117e-05, "loss": 0.5142, "step": 24051 }, { "epoch": 3.9263295375698952, "grad_norm": 3.0339090824127197, "learning_rate": 1.0927039690783438e-05, "loss": 0.4082, "step": 24052 }, { "epoch": 3.9264927962123997, "grad_norm": 3.9002387523651123, "learning_rate": 1.092640130845805e-05, "loss": 0.4841, "step": 24053 }, { "epoch": 3.926656054854904, "grad_norm": 3.2335829734802246, "learning_rate": 1.0925762922324575e-05, "loss": 0.5075, "step": 24054 }, { "epoch": 3.9268193134974085, "grad_norm": 2.999016523361206, "learning_rate": 1.0925124532385636e-05, "loss": 0.4573, "step": 24055 }, { "epoch": 3.9269825721399125, "grad_norm": 3.6296255588531494, "learning_rate": 1.0924486138643862e-05, "loss": 0.5019, "step": 24056 }, { "epoch": 3.927145830782417, "grad_norm": 3.8405871391296387, "learning_rate": 1.0923847741101874e-05, "loss": 0.4964, "step": 24057 }, { "epoch": 3.9273090894249214, "grad_norm": 3.164896011352539, "learning_rate": 1.0923209339762295e-05, "loss": 0.3971, "step": 24058 }, { "epoch": 3.927472348067426, "grad_norm": 3.00536847114563, "learning_rate": 1.0922570934627754e-05, "loss": 0.427, "step": 24059 }, { "epoch": 3.9276356067099303, "grad_norm": 4.009483337402344, "learning_rate": 1.0921932525700869e-05, "loss": 0.5207, "step": 24060 }, { "epoch": 3.9277988653524347, "grad_norm": 3.6607813835144043, "learning_rate": 1.0921294112984272e-05, "loss": 0.5357, "step": 24061 }, { "epoch": 3.9279621239949387, "grad_norm": 3.9461395740509033, "learning_rate": 1.0920655696480578e-05, "loss": 0.5102, "step": 24062 }, { "epoch": 3.928125382637443, "grad_norm": 3.6450624465942383, "learning_rate": 1.0920017276192418e-05, "loss": 0.4859, "step": 24063 }, { "epoch": 3.9282886412799476, "grad_norm": 2.999399185180664, "learning_rate": 1.0919378852122415e-05, "loss": 0.4804, "step": 24064 }, { "epoch": 3.928451899922452, "grad_norm": 3.389538049697876, "learning_rate": 1.0918740424273191e-05, "loss": 0.4477, "step": 24065 }, { "epoch": 3.9286151585649565, "grad_norm": 3.136355400085449, "learning_rate": 1.0918101992647375e-05, "loss": 0.437, "step": 24066 }, { "epoch": 3.928778417207461, "grad_norm": 3.557633399963379, "learning_rate": 1.0917463557247586e-05, "loss": 0.4709, "step": 24067 }, { "epoch": 3.9289416758499653, "grad_norm": 2.9515085220336914, "learning_rate": 1.091682511807645e-05, "loss": 0.46, "step": 24068 }, { "epoch": 3.92910493449247, "grad_norm": 3.224541664123535, "learning_rate": 1.0916186675136593e-05, "loss": 0.5294, "step": 24069 }, { "epoch": 3.929268193134974, "grad_norm": 3.1756832599639893, "learning_rate": 1.091554822843064e-05, "loss": 0.4195, "step": 24070 }, { "epoch": 3.9294314517774787, "grad_norm": 3.642592191696167, "learning_rate": 1.0914909777961213e-05, "loss": 0.4866, "step": 24071 }, { "epoch": 3.929594710419983, "grad_norm": 3.2446482181549072, "learning_rate": 1.0914271323730936e-05, "loss": 0.412, "step": 24072 }, { "epoch": 3.929757969062487, "grad_norm": 3.7033872604370117, "learning_rate": 1.0913632865742435e-05, "loss": 0.5479, "step": 24073 }, { "epoch": 3.9299212277049915, "grad_norm": 3.0658059120178223, "learning_rate": 1.0912994403998338e-05, "loss": 0.4547, "step": 24074 }, { "epoch": 3.930084486347496, "grad_norm": 3.840404987335205, "learning_rate": 1.0912355938501265e-05, "loss": 0.5572, "step": 24075 }, { "epoch": 3.9302477449900004, "grad_norm": 3.4425158500671387, "learning_rate": 1.0911717469253838e-05, "loss": 0.5169, "step": 24076 }, { "epoch": 3.930411003632505, "grad_norm": 3.464820384979248, "learning_rate": 1.0911078996258683e-05, "loss": 0.5401, "step": 24077 }, { "epoch": 3.9305742622750093, "grad_norm": 3.8062968254089355, "learning_rate": 1.091044051951843e-05, "loss": 0.4723, "step": 24078 }, { "epoch": 3.9307375209175137, "grad_norm": 2.780149459838867, "learning_rate": 1.0909802039035702e-05, "loss": 0.4581, "step": 24079 }, { "epoch": 3.9309007795600177, "grad_norm": 3.6550159454345703, "learning_rate": 1.0909163554813119e-05, "loss": 0.5754, "step": 24080 }, { "epoch": 3.931064038202522, "grad_norm": 3.608734369277954, "learning_rate": 1.0908525066853306e-05, "loss": 0.5073, "step": 24081 }, { "epoch": 3.9312272968450266, "grad_norm": 2.970210552215576, "learning_rate": 1.0907886575158888e-05, "loss": 0.3995, "step": 24082 }, { "epoch": 3.931390555487531, "grad_norm": 3.1805579662323, "learning_rate": 1.0907248079732495e-05, "loss": 0.527, "step": 24083 }, { "epoch": 3.9315538141300355, "grad_norm": 3.082573175430298, "learning_rate": 1.0906609580576748e-05, "loss": 0.4913, "step": 24084 }, { "epoch": 3.93171707277254, "grad_norm": 2.6152968406677246, "learning_rate": 1.090597107769427e-05, "loss": 0.3933, "step": 24085 }, { "epoch": 3.9318803314150443, "grad_norm": 3.5764007568359375, "learning_rate": 1.0905332571087688e-05, "loss": 0.498, "step": 24086 }, { "epoch": 3.9320435900575488, "grad_norm": 2.7242748737335205, "learning_rate": 1.0904694060759624e-05, "loss": 0.4573, "step": 24087 }, { "epoch": 3.932206848700053, "grad_norm": 3.5398612022399902, "learning_rate": 1.0904055546712704e-05, "loss": 0.4816, "step": 24088 }, { "epoch": 3.9323701073425577, "grad_norm": 3.5124807357788086, "learning_rate": 1.0903417028949554e-05, "loss": 0.4813, "step": 24089 }, { "epoch": 3.932533365985062, "grad_norm": 2.7315518856048584, "learning_rate": 1.09027785074728e-05, "loss": 0.4168, "step": 24090 }, { "epoch": 3.932696624627566, "grad_norm": 3.225149393081665, "learning_rate": 1.0902139982285061e-05, "loss": 0.5632, "step": 24091 }, { "epoch": 3.9328598832700705, "grad_norm": 3.5041470527648926, "learning_rate": 1.0901501453388968e-05, "loss": 0.4854, "step": 24092 }, { "epoch": 3.933023141912575, "grad_norm": 2.911445140838623, "learning_rate": 1.090086292078714e-05, "loss": 0.4602, "step": 24093 }, { "epoch": 3.9331864005550794, "grad_norm": 3.40773344039917, "learning_rate": 1.0900224384482205e-05, "loss": 0.4982, "step": 24094 }, { "epoch": 3.933349659197584, "grad_norm": 3.6034929752349854, "learning_rate": 1.089958584447679e-05, "loss": 0.5223, "step": 24095 }, { "epoch": 3.9335129178400883, "grad_norm": 3.5356924533843994, "learning_rate": 1.0898947300773514e-05, "loss": 0.4546, "step": 24096 }, { "epoch": 3.9336761764825927, "grad_norm": 3.52986478805542, "learning_rate": 1.0898308753375007e-05, "loss": 0.5517, "step": 24097 }, { "epoch": 3.9338394351250967, "grad_norm": 4.004855632781982, "learning_rate": 1.0897670202283892e-05, "loss": 0.5204, "step": 24098 }, { "epoch": 3.934002693767601, "grad_norm": 2.877727746963501, "learning_rate": 1.0897031647502794e-05, "loss": 0.4623, "step": 24099 }, { "epoch": 3.9341659524101056, "grad_norm": 3.091381311416626, "learning_rate": 1.0896393089034336e-05, "loss": 0.5019, "step": 24100 }, { "epoch": 3.93432921105261, "grad_norm": 3.2081451416015625, "learning_rate": 1.0895754526881147e-05, "loss": 0.4484, "step": 24101 }, { "epoch": 3.9344924696951145, "grad_norm": 3.4653663635253906, "learning_rate": 1.0895115961045848e-05, "loss": 0.4171, "step": 24102 }, { "epoch": 3.934655728337619, "grad_norm": 2.970984697341919, "learning_rate": 1.0894477391531064e-05, "loss": 0.4336, "step": 24103 }, { "epoch": 3.9348189869801233, "grad_norm": 4.4755730628967285, "learning_rate": 1.0893838818339421e-05, "loss": 0.659, "step": 24104 }, { "epoch": 3.9349822456226278, "grad_norm": 3.0360500812530518, "learning_rate": 1.0893200241473548e-05, "loss": 0.4545, "step": 24105 }, { "epoch": 3.935145504265132, "grad_norm": 3.0528483390808105, "learning_rate": 1.0892561660936063e-05, "loss": 0.4738, "step": 24106 }, { "epoch": 3.9353087629076366, "grad_norm": 2.7011473178863525, "learning_rate": 1.0891923076729594e-05, "loss": 0.4151, "step": 24107 }, { "epoch": 3.935472021550141, "grad_norm": 3.2790000438690186, "learning_rate": 1.0891284488856766e-05, "loss": 0.4157, "step": 24108 }, { "epoch": 3.935635280192645, "grad_norm": 2.7003605365753174, "learning_rate": 1.0890645897320204e-05, "loss": 0.3768, "step": 24109 }, { "epoch": 3.9357985388351495, "grad_norm": 3.721994400024414, "learning_rate": 1.0890007302122534e-05, "loss": 0.5662, "step": 24110 }, { "epoch": 3.935961797477654, "grad_norm": 3.2607014179229736, "learning_rate": 1.088936870326638e-05, "loss": 0.4464, "step": 24111 }, { "epoch": 3.9361250561201584, "grad_norm": 3.368436336517334, "learning_rate": 1.0888730100754367e-05, "loss": 0.4929, "step": 24112 }, { "epoch": 3.936288314762663, "grad_norm": 3.35821795463562, "learning_rate": 1.088809149458912e-05, "loss": 0.5049, "step": 24113 }, { "epoch": 3.9364515734051673, "grad_norm": 3.3472650051116943, "learning_rate": 1.0887452884773267e-05, "loss": 0.4363, "step": 24114 }, { "epoch": 3.9366148320476713, "grad_norm": 2.9037013053894043, "learning_rate": 1.0886814271309426e-05, "loss": 0.4324, "step": 24115 }, { "epoch": 3.9367780906901757, "grad_norm": 3.5768332481384277, "learning_rate": 1.0886175654200226e-05, "loss": 0.5536, "step": 24116 }, { "epoch": 3.93694134933268, "grad_norm": 3.567697286605835, "learning_rate": 1.0885537033448295e-05, "loss": 0.4541, "step": 24117 }, { "epoch": 3.9371046079751846, "grad_norm": 3.5855391025543213, "learning_rate": 1.0884898409056257e-05, "loss": 0.4376, "step": 24118 }, { "epoch": 3.937267866617689, "grad_norm": 3.0748889446258545, "learning_rate": 1.0884259781026734e-05, "loss": 0.4234, "step": 24119 }, { "epoch": 3.9374311252601935, "grad_norm": 3.56034779548645, "learning_rate": 1.088362114936235e-05, "loss": 0.5217, "step": 24120 }, { "epoch": 3.937594383902698, "grad_norm": 3.2267181873321533, "learning_rate": 1.0882982514065734e-05, "loss": 0.4501, "step": 24121 }, { "epoch": 3.9377576425452023, "grad_norm": 3.1421849727630615, "learning_rate": 1.0882343875139513e-05, "loss": 0.4978, "step": 24122 }, { "epoch": 3.9379209011877068, "grad_norm": 2.790255546569824, "learning_rate": 1.0881705232586311e-05, "loss": 0.4109, "step": 24123 }, { "epoch": 3.938084159830211, "grad_norm": 3.5739710330963135, "learning_rate": 1.0881066586408748e-05, "loss": 0.5179, "step": 24124 }, { "epoch": 3.9382474184727156, "grad_norm": 3.3577823638916016, "learning_rate": 1.0880427936609455e-05, "loss": 0.4989, "step": 24125 }, { "epoch": 3.9384106771152196, "grad_norm": 3.2080492973327637, "learning_rate": 1.0879789283191053e-05, "loss": 0.4468, "step": 24126 }, { "epoch": 3.938573935757724, "grad_norm": 3.1707544326782227, "learning_rate": 1.087915062615617e-05, "loss": 0.4973, "step": 24127 }, { "epoch": 3.9387371944002285, "grad_norm": 3.1925241947174072, "learning_rate": 1.0878511965507435e-05, "loss": 0.4153, "step": 24128 }, { "epoch": 3.938900453042733, "grad_norm": 3.7848992347717285, "learning_rate": 1.0877873301247463e-05, "loss": 0.4782, "step": 24129 }, { "epoch": 3.9390637116852374, "grad_norm": 4.005263328552246, "learning_rate": 1.0877234633378891e-05, "loss": 0.5686, "step": 24130 }, { "epoch": 3.939226970327742, "grad_norm": 2.8972434997558594, "learning_rate": 1.0876595961904335e-05, "loss": 0.4402, "step": 24131 }, { "epoch": 3.9393902289702463, "grad_norm": 3.2359039783477783, "learning_rate": 1.0875957286826421e-05, "loss": 0.4884, "step": 24132 }, { "epoch": 3.9395534876127503, "grad_norm": 3.409172296524048, "learning_rate": 1.0875318608147784e-05, "loss": 0.4835, "step": 24133 }, { "epoch": 3.9397167462552547, "grad_norm": 3.1608715057373047, "learning_rate": 1.0874679925871039e-05, "loss": 0.4324, "step": 24134 }, { "epoch": 3.939880004897759, "grad_norm": 3.365215539932251, "learning_rate": 1.0874041239998817e-05, "loss": 0.4259, "step": 24135 }, { "epoch": 3.9400432635402636, "grad_norm": 3.285398244857788, "learning_rate": 1.0873402550533738e-05, "loss": 0.4664, "step": 24136 }, { "epoch": 3.940206522182768, "grad_norm": 3.197275400161743, "learning_rate": 1.0872763857478435e-05, "loss": 0.4338, "step": 24137 }, { "epoch": 3.9403697808252724, "grad_norm": 3.5714330673217773, "learning_rate": 1.0872125160835528e-05, "loss": 0.4365, "step": 24138 }, { "epoch": 3.940533039467777, "grad_norm": 2.8215079307556152, "learning_rate": 1.087148646060764e-05, "loss": 0.4326, "step": 24139 }, { "epoch": 3.9406962981102813, "grad_norm": 3.682112455368042, "learning_rate": 1.0870847756797405e-05, "loss": 0.48, "step": 24140 }, { "epoch": 3.9408595567527858, "grad_norm": 4.289705753326416, "learning_rate": 1.0870209049407443e-05, "loss": 0.5832, "step": 24141 }, { "epoch": 3.94102281539529, "grad_norm": 3.3487634658813477, "learning_rate": 1.0869570338440379e-05, "loss": 0.4613, "step": 24142 }, { "epoch": 3.9411860740377946, "grad_norm": 2.978440523147583, "learning_rate": 1.086893162389884e-05, "loss": 0.4191, "step": 24143 }, { "epoch": 3.9413493326802986, "grad_norm": 3.8313255310058594, "learning_rate": 1.0868292905785452e-05, "loss": 0.4868, "step": 24144 }, { "epoch": 3.941512591322803, "grad_norm": 3.120073080062866, "learning_rate": 1.0867654184102838e-05, "loss": 0.4485, "step": 24145 }, { "epoch": 3.9416758499653075, "grad_norm": 3.5553200244903564, "learning_rate": 1.0867015458853627e-05, "loss": 0.4996, "step": 24146 }, { "epoch": 3.941839108607812, "grad_norm": 3.2289035320281982, "learning_rate": 1.0866376730040442e-05, "loss": 0.4325, "step": 24147 }, { "epoch": 3.9420023672503164, "grad_norm": 3.4052586555480957, "learning_rate": 1.0865737997665908e-05, "loss": 0.4357, "step": 24148 }, { "epoch": 3.942165625892821, "grad_norm": 3.3208298683166504, "learning_rate": 1.0865099261732655e-05, "loss": 0.4266, "step": 24149 }, { "epoch": 3.942328884535325, "grad_norm": 3.095226287841797, "learning_rate": 1.0864460522243303e-05, "loss": 0.4345, "step": 24150 }, { "epoch": 3.9424921431778293, "grad_norm": 3.4747304916381836, "learning_rate": 1.086382177920048e-05, "loss": 0.4851, "step": 24151 }, { "epoch": 3.9426554018203337, "grad_norm": 2.807861089706421, "learning_rate": 1.0863183032606814e-05, "loss": 0.3976, "step": 24152 }, { "epoch": 3.942818660462838, "grad_norm": 3.29213547706604, "learning_rate": 1.0862544282464929e-05, "loss": 0.4353, "step": 24153 }, { "epoch": 3.9429819191053426, "grad_norm": 3.1204841136932373, "learning_rate": 1.0861905528777448e-05, "loss": 0.4133, "step": 24154 }, { "epoch": 3.943145177747847, "grad_norm": 3.0762863159179688, "learning_rate": 1.0861266771546998e-05, "loss": 0.4552, "step": 24155 }, { "epoch": 3.9433084363903514, "grad_norm": 2.896615505218506, "learning_rate": 1.0860628010776208e-05, "loss": 0.4245, "step": 24156 }, { "epoch": 3.943471695032856, "grad_norm": 3.636828899383545, "learning_rate": 1.0859989246467701e-05, "loss": 0.5315, "step": 24157 }, { "epoch": 3.9436349536753603, "grad_norm": 4.011520862579346, "learning_rate": 1.0859350478624105e-05, "loss": 0.4909, "step": 24158 }, { "epoch": 3.9437982123178648, "grad_norm": 3.2735886573791504, "learning_rate": 1.0858711707248037e-05, "loss": 0.529, "step": 24159 }, { "epoch": 3.943961470960369, "grad_norm": 3.1948745250701904, "learning_rate": 1.0858072932342133e-05, "loss": 0.4566, "step": 24160 }, { "epoch": 3.944124729602873, "grad_norm": 3.2604708671569824, "learning_rate": 1.0857434153909017e-05, "loss": 0.4759, "step": 24161 }, { "epoch": 3.9442879882453776, "grad_norm": 3.710279703140259, "learning_rate": 1.0856795371951314e-05, "loss": 0.4783, "step": 24162 }, { "epoch": 3.944451246887882, "grad_norm": 4.012999534606934, "learning_rate": 1.0856156586471644e-05, "loss": 0.5125, "step": 24163 }, { "epoch": 3.9446145055303865, "grad_norm": 3.6355180740356445, "learning_rate": 1.085551779747264e-05, "loss": 0.4791, "step": 24164 }, { "epoch": 3.944777764172891, "grad_norm": 3.421832323074341, "learning_rate": 1.0854879004956924e-05, "loss": 0.5117, "step": 24165 }, { "epoch": 3.9449410228153954, "grad_norm": 2.7079710960388184, "learning_rate": 1.0854240208927125e-05, "loss": 0.4611, "step": 24166 }, { "epoch": 3.9451042814579, "grad_norm": 3.109095811843872, "learning_rate": 1.0853601409385868e-05, "loss": 0.4488, "step": 24167 }, { "epoch": 3.945267540100404, "grad_norm": 2.4842092990875244, "learning_rate": 1.0852962606335776e-05, "loss": 0.3746, "step": 24168 }, { "epoch": 3.9454307987429083, "grad_norm": 2.644224166870117, "learning_rate": 1.0852323799779477e-05, "loss": 0.3748, "step": 24169 }, { "epoch": 3.9455940573854127, "grad_norm": 2.988983631134033, "learning_rate": 1.0851684989719595e-05, "loss": 0.3768, "step": 24170 }, { "epoch": 3.945757316027917, "grad_norm": 3.2212932109832764, "learning_rate": 1.0851046176158759e-05, "loss": 0.4952, "step": 24171 }, { "epoch": 3.9459205746704216, "grad_norm": 2.9616589546203613, "learning_rate": 1.0850407359099595e-05, "loss": 0.4524, "step": 24172 }, { "epoch": 3.946083833312926, "grad_norm": 3.306213140487671, "learning_rate": 1.0849768538544727e-05, "loss": 0.4717, "step": 24173 }, { "epoch": 3.9462470919554304, "grad_norm": 3.424309730529785, "learning_rate": 1.0849129714496782e-05, "loss": 0.4742, "step": 24174 }, { "epoch": 3.946410350597935, "grad_norm": 3.387720823287964, "learning_rate": 1.0848490886958383e-05, "loss": 0.4183, "step": 24175 }, { "epoch": 3.9465736092404393, "grad_norm": 3.1220481395721436, "learning_rate": 1.0847852055932157e-05, "loss": 0.4118, "step": 24176 }, { "epoch": 3.9467368678829438, "grad_norm": 3.493321418762207, "learning_rate": 1.0847213221420735e-05, "loss": 0.4528, "step": 24177 }, { "epoch": 3.946900126525448, "grad_norm": 3.601738214492798, "learning_rate": 1.084657438342674e-05, "loss": 0.4366, "step": 24178 }, { "epoch": 3.947063385167952, "grad_norm": 3.0929133892059326, "learning_rate": 1.0845935541952794e-05, "loss": 0.4227, "step": 24179 }, { "epoch": 3.9472266438104566, "grad_norm": 2.871328592300415, "learning_rate": 1.0845296697001528e-05, "loss": 0.3853, "step": 24180 }, { "epoch": 3.947389902452961, "grad_norm": 3.20487642288208, "learning_rate": 1.0844657848575567e-05, "loss": 0.498, "step": 24181 }, { "epoch": 3.9475531610954655, "grad_norm": 3.514558792114258, "learning_rate": 1.0844018996677537e-05, "loss": 0.5099, "step": 24182 }, { "epoch": 3.94771641973797, "grad_norm": 3.228040933609009, "learning_rate": 1.0843380141310063e-05, "loss": 0.493, "step": 24183 }, { "epoch": 3.9478796783804744, "grad_norm": 3.222656726837158, "learning_rate": 1.084274128247577e-05, "loss": 0.527, "step": 24184 }, { "epoch": 3.948042937022979, "grad_norm": 2.8873159885406494, "learning_rate": 1.0842102420177288e-05, "loss": 0.3955, "step": 24185 }, { "epoch": 3.948206195665483, "grad_norm": 2.993830680847168, "learning_rate": 1.084146355441724e-05, "loss": 0.468, "step": 24186 }, { "epoch": 3.9483694543079872, "grad_norm": 3.2810373306274414, "learning_rate": 1.0840824685198252e-05, "loss": 0.4735, "step": 24187 }, { "epoch": 3.9485327129504917, "grad_norm": 2.950359582901001, "learning_rate": 1.0840185812522952e-05, "loss": 0.4393, "step": 24188 }, { "epoch": 3.948695971592996, "grad_norm": 2.932067632675171, "learning_rate": 1.0839546936393965e-05, "loss": 0.4207, "step": 24189 }, { "epoch": 3.9488592302355006, "grad_norm": 3.133042573928833, "learning_rate": 1.0838908056813919e-05, "loss": 0.4289, "step": 24190 }, { "epoch": 3.949022488878005, "grad_norm": 3.257275104522705, "learning_rate": 1.083826917378544e-05, "loss": 0.4236, "step": 24191 }, { "epoch": 3.9491857475205094, "grad_norm": 2.682572364807129, "learning_rate": 1.083763028731115e-05, "loss": 0.4165, "step": 24192 }, { "epoch": 3.949349006163014, "grad_norm": 3.150567054748535, "learning_rate": 1.0836991397393676e-05, "loss": 0.4493, "step": 24193 }, { "epoch": 3.9495122648055183, "grad_norm": 3.0220956802368164, "learning_rate": 1.083635250403565e-05, "loss": 0.4082, "step": 24194 }, { "epoch": 3.9496755234480228, "grad_norm": 2.887439250946045, "learning_rate": 1.0835713607239694e-05, "loss": 0.4, "step": 24195 }, { "epoch": 3.949838782090527, "grad_norm": 3.333768606185913, "learning_rate": 1.0835074707008434e-05, "loss": 0.4174, "step": 24196 }, { "epoch": 3.950002040733031, "grad_norm": 3.080446481704712, "learning_rate": 1.08344358033445e-05, "loss": 0.4245, "step": 24197 }, { "epoch": 3.9501652993755356, "grad_norm": 2.72355055809021, "learning_rate": 1.0833796896250512e-05, "loss": 0.4022, "step": 24198 }, { "epoch": 3.95032855801804, "grad_norm": 2.7463948726654053, "learning_rate": 1.0833157985729104e-05, "loss": 0.3858, "step": 24199 }, { "epoch": 3.9504918166605445, "grad_norm": 3.050189733505249, "learning_rate": 1.0832519071782895e-05, "loss": 0.4319, "step": 24200 }, { "epoch": 3.950655075303049, "grad_norm": 3.008354425430298, "learning_rate": 1.0831880154414514e-05, "loss": 0.4285, "step": 24201 }, { "epoch": 3.9508183339455534, "grad_norm": 3.3952503204345703, "learning_rate": 1.0831241233626592e-05, "loss": 0.5008, "step": 24202 }, { "epoch": 3.9509815925880574, "grad_norm": 3.3106307983398438, "learning_rate": 1.0830602309421745e-05, "loss": 0.4876, "step": 24203 }, { "epoch": 3.951144851230562, "grad_norm": 3.378061056137085, "learning_rate": 1.0829963381802608e-05, "loss": 0.4986, "step": 24204 }, { "epoch": 3.9513081098730662, "grad_norm": 3.2821476459503174, "learning_rate": 1.0829324450771806e-05, "loss": 0.4872, "step": 24205 }, { "epoch": 3.9514713685155707, "grad_norm": 3.2261223793029785, "learning_rate": 1.0828685516331965e-05, "loss": 0.4545, "step": 24206 }, { "epoch": 3.951634627158075, "grad_norm": 3.0178544521331787, "learning_rate": 1.0828046578485711e-05, "loss": 0.4226, "step": 24207 }, { "epoch": 3.9517978858005796, "grad_norm": 4.041083812713623, "learning_rate": 1.082740763723567e-05, "loss": 0.5778, "step": 24208 }, { "epoch": 3.951961144443084, "grad_norm": 3.741758108139038, "learning_rate": 1.0826768692584464e-05, "loss": 0.5121, "step": 24209 }, { "epoch": 3.9521244030855884, "grad_norm": 3.3581318855285645, "learning_rate": 1.0826129744534728e-05, "loss": 0.4697, "step": 24210 }, { "epoch": 3.952287661728093, "grad_norm": 2.9881787300109863, "learning_rate": 1.0825490793089086e-05, "loss": 0.4648, "step": 24211 }, { "epoch": 3.9524509203705973, "grad_norm": 3.974447011947632, "learning_rate": 1.082485183825016e-05, "loss": 0.5259, "step": 24212 }, { "epoch": 3.9526141790131017, "grad_norm": 3.672915458679199, "learning_rate": 1.0824212880020581e-05, "loss": 0.5045, "step": 24213 }, { "epoch": 3.9527774376556057, "grad_norm": 3.4567272663116455, "learning_rate": 1.0823573918402975e-05, "loss": 0.4863, "step": 24214 }, { "epoch": 3.95294069629811, "grad_norm": 3.1704227924346924, "learning_rate": 1.0822934953399962e-05, "loss": 0.3692, "step": 24215 }, { "epoch": 3.9531039549406146, "grad_norm": 3.0776803493499756, "learning_rate": 1.0822295985014182e-05, "loss": 0.4499, "step": 24216 }, { "epoch": 3.953267213583119, "grad_norm": 3.704869508743286, "learning_rate": 1.082165701324825e-05, "loss": 0.5301, "step": 24217 }, { "epoch": 3.9534304722256235, "grad_norm": 2.957099676132202, "learning_rate": 1.0821018038104796e-05, "loss": 0.4287, "step": 24218 }, { "epoch": 3.953593730868128, "grad_norm": 3.7631113529205322, "learning_rate": 1.0820379059586448e-05, "loss": 0.7659, "step": 24219 }, { "epoch": 3.9537569895106324, "grad_norm": 3.1848771572113037, "learning_rate": 1.0819740077695827e-05, "loss": 0.4597, "step": 24220 }, { "epoch": 3.9539202481531364, "grad_norm": 3.483656644821167, "learning_rate": 1.0819101092435571e-05, "loss": 0.5342, "step": 24221 }, { "epoch": 3.954083506795641, "grad_norm": 3.2668960094451904, "learning_rate": 1.0818462103808297e-05, "loss": 0.4659, "step": 24222 }, { "epoch": 3.9542467654381452, "grad_norm": 3.266974449157715, "learning_rate": 1.0817823111816633e-05, "loss": 0.4801, "step": 24223 }, { "epoch": 3.9544100240806497, "grad_norm": 3.9878652095794678, "learning_rate": 1.0817184116463207e-05, "loss": 0.5249, "step": 24224 }, { "epoch": 3.954573282723154, "grad_norm": 3.189664363861084, "learning_rate": 1.0816545117750648e-05, "loss": 0.4247, "step": 24225 }, { "epoch": 3.9547365413656586, "grad_norm": 3.727186918258667, "learning_rate": 1.0815906115681579e-05, "loss": 0.538, "step": 24226 }, { "epoch": 3.954899800008163, "grad_norm": 3.9158992767333984, "learning_rate": 1.0815267110258627e-05, "loss": 0.5459, "step": 24227 }, { "epoch": 3.9550630586506674, "grad_norm": 3.303440809249878, "learning_rate": 1.081462810148442e-05, "loss": 0.4578, "step": 24228 }, { "epoch": 3.955226317293172, "grad_norm": 2.9342823028564453, "learning_rate": 1.0813989089361585e-05, "loss": 0.3935, "step": 24229 }, { "epoch": 3.9553895759356763, "grad_norm": 3.2989706993103027, "learning_rate": 1.0813350073892747e-05, "loss": 0.4839, "step": 24230 }, { "epoch": 3.9555528345781807, "grad_norm": 2.796766996383667, "learning_rate": 1.0812711055080536e-05, "loss": 0.4324, "step": 24231 }, { "epoch": 3.9557160932206847, "grad_norm": 3.003425121307373, "learning_rate": 1.0812072032927577e-05, "loss": 0.4412, "step": 24232 }, { "epoch": 3.955879351863189, "grad_norm": 2.9541122913360596, "learning_rate": 1.0811433007436493e-05, "loss": 0.4074, "step": 24233 }, { "epoch": 3.9560426105056936, "grad_norm": 3.124753952026367, "learning_rate": 1.0810793978609918e-05, "loss": 0.467, "step": 24234 }, { "epoch": 3.956205869148198, "grad_norm": 3.442593574523926, "learning_rate": 1.0810154946450476e-05, "loss": 0.5243, "step": 24235 }, { "epoch": 3.9563691277907025, "grad_norm": 3.0763139724731445, "learning_rate": 1.0809515910960789e-05, "loss": 0.4613, "step": 24236 }, { "epoch": 3.956532386433207, "grad_norm": 2.9626963138580322, "learning_rate": 1.080887687214349e-05, "loss": 0.4241, "step": 24237 }, { "epoch": 3.956695645075711, "grad_norm": 3.39973521232605, "learning_rate": 1.0808237830001202e-05, "loss": 0.4228, "step": 24238 }, { "epoch": 3.9568589037182154, "grad_norm": 3.171621322631836, "learning_rate": 1.0807598784536555e-05, "loss": 0.453, "step": 24239 }, { "epoch": 3.95702216236072, "grad_norm": 2.707031726837158, "learning_rate": 1.0806959735752174e-05, "loss": 0.3789, "step": 24240 }, { "epoch": 3.9571854210032242, "grad_norm": 3.1418962478637695, "learning_rate": 1.0806320683650686e-05, "loss": 0.4523, "step": 24241 }, { "epoch": 3.9573486796457287, "grad_norm": 2.7341721057891846, "learning_rate": 1.0805681628234718e-05, "loss": 0.4268, "step": 24242 }, { "epoch": 3.957511938288233, "grad_norm": 2.976759195327759, "learning_rate": 1.0805042569506899e-05, "loss": 0.4342, "step": 24243 }, { "epoch": 3.9576751969307375, "grad_norm": 3.1886119842529297, "learning_rate": 1.0804403507469852e-05, "loss": 0.4838, "step": 24244 }, { "epoch": 3.957838455573242, "grad_norm": 3.025167226791382, "learning_rate": 1.0803764442126206e-05, "loss": 0.4527, "step": 24245 }, { "epoch": 3.9580017142157464, "grad_norm": 2.8535361289978027, "learning_rate": 1.0803125373478588e-05, "loss": 0.4782, "step": 24246 }, { "epoch": 3.958164972858251, "grad_norm": 3.5809273719787598, "learning_rate": 1.0802486301529625e-05, "loss": 0.5193, "step": 24247 }, { "epoch": 3.9583282315007553, "grad_norm": 2.887756586074829, "learning_rate": 1.0801847226281942e-05, "loss": 0.4546, "step": 24248 }, { "epoch": 3.9584914901432593, "grad_norm": 3.0203258991241455, "learning_rate": 1.0801208147738171e-05, "loss": 0.4445, "step": 24249 }, { "epoch": 3.9586547487857637, "grad_norm": 3.096181869506836, "learning_rate": 1.0800569065900935e-05, "loss": 0.5172, "step": 24250 }, { "epoch": 3.958818007428268, "grad_norm": 3.538411855697632, "learning_rate": 1.0799929980772864e-05, "loss": 0.5697, "step": 24251 }, { "epoch": 3.9589812660707726, "grad_norm": 3.306347608566284, "learning_rate": 1.079929089235658e-05, "loss": 0.447, "step": 24252 }, { "epoch": 3.959144524713277, "grad_norm": 3.0626769065856934, "learning_rate": 1.079865180065471e-05, "loss": 0.4814, "step": 24253 }, { "epoch": 3.9593077833557815, "grad_norm": 3.264455556869507, "learning_rate": 1.0798012705669887e-05, "loss": 0.524, "step": 24254 }, { "epoch": 3.959471041998286, "grad_norm": 3.234346866607666, "learning_rate": 1.0797373607404737e-05, "loss": 0.4469, "step": 24255 }, { "epoch": 3.95963430064079, "grad_norm": 3.406264305114746, "learning_rate": 1.0796734505861884e-05, "loss": 0.5084, "step": 24256 }, { "epoch": 3.9597975592832944, "grad_norm": 3.9008400440216064, "learning_rate": 1.0796095401043956e-05, "loss": 0.5636, "step": 24257 }, { "epoch": 3.959960817925799, "grad_norm": 2.7525463104248047, "learning_rate": 1.0795456292953578e-05, "loss": 0.3829, "step": 24258 }, { "epoch": 3.9601240765683032, "grad_norm": 3.2705042362213135, "learning_rate": 1.0794817181593383e-05, "loss": 0.508, "step": 24259 }, { "epoch": 3.9602873352108077, "grad_norm": 2.3592429161071777, "learning_rate": 1.0794178066965995e-05, "loss": 0.3498, "step": 24260 }, { "epoch": 3.960450593853312, "grad_norm": 3.321016788482666, "learning_rate": 1.0793538949074039e-05, "loss": 0.4322, "step": 24261 }, { "epoch": 3.9606138524958165, "grad_norm": 3.258164167404175, "learning_rate": 1.0792899827920142e-05, "loss": 0.4774, "step": 24262 }, { "epoch": 3.960777111138321, "grad_norm": 3.7120351791381836, "learning_rate": 1.0792260703506937e-05, "loss": 0.5109, "step": 24263 }, { "epoch": 3.9609403697808254, "grad_norm": 3.549335241317749, "learning_rate": 1.0791621575837044e-05, "loss": 0.5047, "step": 24264 }, { "epoch": 3.96110362842333, "grad_norm": 3.736299753189087, "learning_rate": 1.0790982444913096e-05, "loss": 0.5745, "step": 24265 }, { "epoch": 3.9612668870658343, "grad_norm": 3.4016356468200684, "learning_rate": 1.0790343310737718e-05, "loss": 0.4972, "step": 24266 }, { "epoch": 3.9614301457083383, "grad_norm": 3.746659994125366, "learning_rate": 1.0789704173313536e-05, "loss": 0.5359, "step": 24267 }, { "epoch": 3.9615934043508427, "grad_norm": 2.751211166381836, "learning_rate": 1.0789065032643178e-05, "loss": 0.3892, "step": 24268 }, { "epoch": 3.961756662993347, "grad_norm": 3.7692174911499023, "learning_rate": 1.0788425888729272e-05, "loss": 0.4845, "step": 24269 }, { "epoch": 3.9619199216358516, "grad_norm": 3.03605055809021, "learning_rate": 1.0787786741574447e-05, "loss": 0.4074, "step": 24270 }, { "epoch": 3.962083180278356, "grad_norm": 3.1994404792785645, "learning_rate": 1.0787147591181327e-05, "loss": 0.4684, "step": 24271 }, { "epoch": 3.9622464389208605, "grad_norm": 3.477935552597046, "learning_rate": 1.0786508437552539e-05, "loss": 0.5064, "step": 24272 }, { "epoch": 3.962409697563365, "grad_norm": 3.903963088989258, "learning_rate": 1.0785869280690713e-05, "loss": 0.5, "step": 24273 }, { "epoch": 3.962572956205869, "grad_norm": 3.365427017211914, "learning_rate": 1.0785230120598472e-05, "loss": 0.4656, "step": 24274 }, { "epoch": 3.9627362148483734, "grad_norm": 3.52144718170166, "learning_rate": 1.0784590957278452e-05, "loss": 0.4931, "step": 24275 }, { "epoch": 3.962899473490878, "grad_norm": 3.2470099925994873, "learning_rate": 1.078395179073327e-05, "loss": 0.4588, "step": 24276 }, { "epoch": 3.9630627321333822, "grad_norm": 4.056171417236328, "learning_rate": 1.0783312620965561e-05, "loss": 0.4685, "step": 24277 }, { "epoch": 3.9632259907758867, "grad_norm": 2.5187392234802246, "learning_rate": 1.0782673447977949e-05, "loss": 0.3854, "step": 24278 }, { "epoch": 3.963389249418391, "grad_norm": 4.070316314697266, "learning_rate": 1.0782034271773061e-05, "loss": 0.5058, "step": 24279 }, { "epoch": 3.9635525080608955, "grad_norm": 2.698538064956665, "learning_rate": 1.0781395092353526e-05, "loss": 0.4157, "step": 24280 }, { "epoch": 3.9637157667034, "grad_norm": 3.1055681705474854, "learning_rate": 1.0780755909721971e-05, "loss": 0.5114, "step": 24281 }, { "epoch": 3.9638790253459044, "grad_norm": 3.052759885787964, "learning_rate": 1.0780116723881024e-05, "loss": 0.4072, "step": 24282 }, { "epoch": 3.964042283988409, "grad_norm": 3.28227162361145, "learning_rate": 1.0779477534833311e-05, "loss": 0.4811, "step": 24283 }, { "epoch": 3.9642055426309133, "grad_norm": 3.836902618408203, "learning_rate": 1.077883834258146e-05, "loss": 0.5449, "step": 24284 }, { "epoch": 3.9643688012734173, "grad_norm": 3.5227394104003906, "learning_rate": 1.0778199147128102e-05, "loss": 0.5786, "step": 24285 }, { "epoch": 3.9645320599159217, "grad_norm": 3.141411542892456, "learning_rate": 1.0777559948475855e-05, "loss": 0.4283, "step": 24286 }, { "epoch": 3.964695318558426, "grad_norm": 3.8944742679595947, "learning_rate": 1.0776920746627356e-05, "loss": 0.5587, "step": 24287 }, { "epoch": 3.9648585772009306, "grad_norm": 3.031446933746338, "learning_rate": 1.077628154158523e-05, "loss": 0.3958, "step": 24288 }, { "epoch": 3.965021835843435, "grad_norm": 3.8250818252563477, "learning_rate": 1.07756423333521e-05, "loss": 0.4738, "step": 24289 }, { "epoch": 3.9651850944859395, "grad_norm": 3.148219347000122, "learning_rate": 1.0775003121930602e-05, "loss": 0.4372, "step": 24290 }, { "epoch": 3.9653483531284435, "grad_norm": 3.1051714420318604, "learning_rate": 1.0774363907323355e-05, "loss": 0.4169, "step": 24291 }, { "epoch": 3.965511611770948, "grad_norm": 2.720458507537842, "learning_rate": 1.0773724689532992e-05, "loss": 0.3759, "step": 24292 }, { "epoch": 3.9656748704134523, "grad_norm": 3.802614688873291, "learning_rate": 1.0773085468562139e-05, "loss": 0.9651, "step": 24293 }, { "epoch": 3.965838129055957, "grad_norm": 3.3200087547302246, "learning_rate": 1.0772446244413423e-05, "loss": 0.4686, "step": 24294 }, { "epoch": 3.9660013876984612, "grad_norm": 3.284668445587158, "learning_rate": 1.0771807017089476e-05, "loss": 0.4913, "step": 24295 }, { "epoch": 3.9661646463409657, "grad_norm": 2.721194267272949, "learning_rate": 1.0771167786592917e-05, "loss": 0.3735, "step": 24296 }, { "epoch": 3.96632790498347, "grad_norm": 3.2060868740081787, "learning_rate": 1.0770528552926379e-05, "loss": 0.4063, "step": 24297 }, { "epoch": 3.9664911636259745, "grad_norm": 3.3652231693267822, "learning_rate": 1.076988931609249e-05, "loss": 0.4901, "step": 24298 }, { "epoch": 3.966654422268479, "grad_norm": 2.9102776050567627, "learning_rate": 1.0769250076093878e-05, "loss": 0.4466, "step": 24299 }, { "epoch": 3.9668176809109834, "grad_norm": 3.486966371536255, "learning_rate": 1.0768610832933169e-05, "loss": 0.4903, "step": 24300 }, { "epoch": 3.966980939553488, "grad_norm": 3.33095121383667, "learning_rate": 1.076797158661299e-05, "loss": 0.4972, "step": 24301 }, { "epoch": 3.967144198195992, "grad_norm": 2.8440749645233154, "learning_rate": 1.076733233713597e-05, "loss": 0.403, "step": 24302 }, { "epoch": 3.9673074568384963, "grad_norm": 3.21478271484375, "learning_rate": 1.0766693084504734e-05, "loss": 0.4736, "step": 24303 }, { "epoch": 3.9674707154810007, "grad_norm": 2.606027841567993, "learning_rate": 1.0766053828721916e-05, "loss": 0.3757, "step": 24304 }, { "epoch": 3.967633974123505, "grad_norm": 3.328331708908081, "learning_rate": 1.0765414569790138e-05, "loss": 0.4463, "step": 24305 }, { "epoch": 3.9677972327660096, "grad_norm": 3.3317975997924805, "learning_rate": 1.076477530771203e-05, "loss": 0.4083, "step": 24306 }, { "epoch": 3.967960491408514, "grad_norm": 3.15994930267334, "learning_rate": 1.0764136042490221e-05, "loss": 0.4492, "step": 24307 }, { "epoch": 3.9681237500510185, "grad_norm": 3.1038851737976074, "learning_rate": 1.0763496774127333e-05, "loss": 0.4641, "step": 24308 }, { "epoch": 3.9682870086935225, "grad_norm": 3.0410056114196777, "learning_rate": 1.0762857502626002e-05, "loss": 0.4824, "step": 24309 }, { "epoch": 3.968450267336027, "grad_norm": 3.1382720470428467, "learning_rate": 1.0762218227988853e-05, "loss": 0.5156, "step": 24310 }, { "epoch": 3.9686135259785313, "grad_norm": 3.2939414978027344, "learning_rate": 1.076157895021851e-05, "loss": 0.4655, "step": 24311 }, { "epoch": 3.968776784621036, "grad_norm": 3.391784429550171, "learning_rate": 1.0760939669317606e-05, "loss": 0.481, "step": 24312 }, { "epoch": 3.96894004326354, "grad_norm": 3.1010794639587402, "learning_rate": 1.0760300385288763e-05, "loss": 0.4301, "step": 24313 }, { "epoch": 3.9691033019060447, "grad_norm": 3.875659704208374, "learning_rate": 1.0759661098134613e-05, "loss": 0.5097, "step": 24314 }, { "epoch": 3.969266560548549, "grad_norm": 3.5134878158569336, "learning_rate": 1.0759021807857784e-05, "loss": 0.4697, "step": 24315 }, { "epoch": 3.9694298191910535, "grad_norm": 2.9259297847747803, "learning_rate": 1.0758382514460903e-05, "loss": 0.3495, "step": 24316 }, { "epoch": 3.969593077833558, "grad_norm": 3.7604358196258545, "learning_rate": 1.0757743217946596e-05, "loss": 0.5596, "step": 24317 }, { "epoch": 3.9697563364760624, "grad_norm": 3.6163156032562256, "learning_rate": 1.0757103918317496e-05, "loss": 0.5108, "step": 24318 }, { "epoch": 3.969919595118567, "grad_norm": 3.7532176971435547, "learning_rate": 1.0756464615576225e-05, "loss": 0.5848, "step": 24319 }, { "epoch": 3.970082853761071, "grad_norm": 3.137101411819458, "learning_rate": 1.0755825309725417e-05, "loss": 0.4242, "step": 24320 }, { "epoch": 3.9702461124035753, "grad_norm": 2.964669704437256, "learning_rate": 1.0755186000767693e-05, "loss": 0.3947, "step": 24321 }, { "epoch": 3.9704093710460797, "grad_norm": 2.7384450435638428, "learning_rate": 1.0754546688705685e-05, "loss": 0.3831, "step": 24322 }, { "epoch": 3.970572629688584, "grad_norm": 3.4994025230407715, "learning_rate": 1.075390737354202e-05, "loss": 0.5197, "step": 24323 }, { "epoch": 3.9707358883310886, "grad_norm": 3.6489388942718506, "learning_rate": 1.0753268055279328e-05, "loss": 0.5116, "step": 24324 }, { "epoch": 3.970899146973593, "grad_norm": 2.971015214920044, "learning_rate": 1.0752628733920236e-05, "loss": 0.4288, "step": 24325 }, { "epoch": 3.9710624056160975, "grad_norm": 3.5419554710388184, "learning_rate": 1.075198940946737e-05, "loss": 0.4448, "step": 24326 }, { "epoch": 3.9712256642586015, "grad_norm": 3.5627315044403076, "learning_rate": 1.075135008192336e-05, "loss": 0.594, "step": 24327 }, { "epoch": 3.971388922901106, "grad_norm": 2.8620684146881104, "learning_rate": 1.0750710751290834e-05, "loss": 0.445, "step": 24328 }, { "epoch": 3.9715521815436103, "grad_norm": 3.6694064140319824, "learning_rate": 1.0750071417572421e-05, "loss": 0.4778, "step": 24329 }, { "epoch": 3.9717154401861148, "grad_norm": 3.956956624984741, "learning_rate": 1.0749432080770743e-05, "loss": 0.4749, "step": 24330 }, { "epoch": 3.971878698828619, "grad_norm": 3.0522620677948, "learning_rate": 1.0748792740888434e-05, "loss": 0.4531, "step": 24331 }, { "epoch": 3.9720419574711237, "grad_norm": 3.8763418197631836, "learning_rate": 1.0748153397928123e-05, "loss": 0.4658, "step": 24332 }, { "epoch": 3.972205216113628, "grad_norm": 3.387932538986206, "learning_rate": 1.0747514051892437e-05, "loss": 0.4499, "step": 24333 }, { "epoch": 3.9723684747561325, "grad_norm": 3.0262722969055176, "learning_rate": 1.0746874702784003e-05, "loss": 0.4196, "step": 24334 }, { "epoch": 3.972531733398637, "grad_norm": 3.588010311126709, "learning_rate": 1.0746235350605442e-05, "loss": 0.5089, "step": 24335 }, { "epoch": 3.9726949920411414, "grad_norm": 3.4504106044769287, "learning_rate": 1.0745595995359394e-05, "loss": 0.4972, "step": 24336 }, { "epoch": 3.972858250683646, "grad_norm": 3.5121026039123535, "learning_rate": 1.0744956637048485e-05, "loss": 0.4854, "step": 24337 }, { "epoch": 3.97302150932615, "grad_norm": 3.3434832096099854, "learning_rate": 1.0744317275675338e-05, "loss": 0.4514, "step": 24338 }, { "epoch": 3.9731847679686543, "grad_norm": 2.975311756134033, "learning_rate": 1.0743677911242587e-05, "loss": 0.4121, "step": 24339 }, { "epoch": 3.9733480266111587, "grad_norm": 3.299933433532715, "learning_rate": 1.0743038543752852e-05, "loss": 0.4567, "step": 24340 }, { "epoch": 3.973511285253663, "grad_norm": 3.1079955101013184, "learning_rate": 1.0742399173208766e-05, "loss": 0.4714, "step": 24341 }, { "epoch": 3.9736745438961676, "grad_norm": 3.2478339672088623, "learning_rate": 1.074175979961296e-05, "loss": 0.4358, "step": 24342 }, { "epoch": 3.973837802538672, "grad_norm": 3.5211262702941895, "learning_rate": 1.0741120422968058e-05, "loss": 0.4635, "step": 24343 }, { "epoch": 3.974001061181176, "grad_norm": 3.2994213104248047, "learning_rate": 1.0740481043276693e-05, "loss": 0.4935, "step": 24344 }, { "epoch": 3.9741643198236805, "grad_norm": 3.1328697204589844, "learning_rate": 1.0739841660541489e-05, "loss": 0.447, "step": 24345 }, { "epoch": 3.974327578466185, "grad_norm": 2.8343288898468018, "learning_rate": 1.0739202274765073e-05, "loss": 0.4082, "step": 24346 }, { "epoch": 3.9744908371086893, "grad_norm": 3.579146146774292, "learning_rate": 1.0738562885950074e-05, "loss": 0.508, "step": 24347 }, { "epoch": 3.9746540957511938, "grad_norm": 2.779330015182495, "learning_rate": 1.0737923494099127e-05, "loss": 0.4226, "step": 24348 }, { "epoch": 3.974817354393698, "grad_norm": 3.868516445159912, "learning_rate": 1.0737284099214854e-05, "loss": 0.4709, "step": 24349 }, { "epoch": 3.9749806130362026, "grad_norm": 2.706784963607788, "learning_rate": 1.0736644701299884e-05, "loss": 0.3659, "step": 24350 }, { "epoch": 3.975143871678707, "grad_norm": 3.7419118881225586, "learning_rate": 1.0736005300356842e-05, "loss": 0.5618, "step": 24351 }, { "epoch": 3.9753071303212115, "grad_norm": 3.0357422828674316, "learning_rate": 1.0735365896388361e-05, "loss": 0.4134, "step": 24352 }, { "epoch": 3.975470388963716, "grad_norm": 3.356475591659546, "learning_rate": 1.0734726489397074e-05, "loss": 0.4127, "step": 24353 }, { "epoch": 3.9756336476062204, "grad_norm": 3.699439764022827, "learning_rate": 1.07340870793856e-05, "loss": 0.4358, "step": 24354 }, { "epoch": 3.9757969062487244, "grad_norm": 3.5949480533599854, "learning_rate": 1.073344766635657e-05, "loss": 0.4892, "step": 24355 }, { "epoch": 3.975960164891229, "grad_norm": 3.1977431774139404, "learning_rate": 1.0732808250312617e-05, "loss": 0.3563, "step": 24356 }, { "epoch": 3.9761234235337333, "grad_norm": 3.211937189102173, "learning_rate": 1.0732168831256365e-05, "loss": 0.472, "step": 24357 }, { "epoch": 3.9762866821762377, "grad_norm": 3.522465229034424, "learning_rate": 1.073152940919044e-05, "loss": 0.4755, "step": 24358 }, { "epoch": 3.976449940818742, "grad_norm": 3.4557039737701416, "learning_rate": 1.0730889984117477e-05, "loss": 0.5232, "step": 24359 }, { "epoch": 3.9766131994612466, "grad_norm": 3.763383626937866, "learning_rate": 1.07302505560401e-05, "loss": 0.4827, "step": 24360 }, { "epoch": 3.976776458103751, "grad_norm": 2.5386593341827393, "learning_rate": 1.072961112496094e-05, "loss": 0.431, "step": 24361 }, { "epoch": 3.976939716746255, "grad_norm": 3.499922275543213, "learning_rate": 1.0728971690882624e-05, "loss": 0.4676, "step": 24362 }, { "epoch": 3.9771029753887595, "grad_norm": 3.39294695854187, "learning_rate": 1.0728332253807777e-05, "loss": 0.4588, "step": 24363 }, { "epoch": 3.977266234031264, "grad_norm": 2.643251895904541, "learning_rate": 1.0727692813739037e-05, "loss": 0.383, "step": 24364 }, { "epoch": 3.9774294926737683, "grad_norm": 3.1414742469787598, "learning_rate": 1.0727053370679023e-05, "loss": 0.4467, "step": 24365 }, { "epoch": 3.9775927513162728, "grad_norm": 3.6049001216888428, "learning_rate": 1.0726413924630367e-05, "loss": 0.508, "step": 24366 }, { "epoch": 3.977756009958777, "grad_norm": 2.9529812335968018, "learning_rate": 1.07257744755957e-05, "loss": 0.4052, "step": 24367 }, { "epoch": 3.9779192686012816, "grad_norm": 3.0140554904937744, "learning_rate": 1.0725135023577646e-05, "loss": 0.4352, "step": 24368 }, { "epoch": 3.978082527243786, "grad_norm": 3.4301581382751465, "learning_rate": 1.0724495568578836e-05, "loss": 0.3609, "step": 24369 }, { "epoch": 3.9782457858862905, "grad_norm": 3.3674726486206055, "learning_rate": 1.07238561106019e-05, "loss": 0.4473, "step": 24370 }, { "epoch": 3.978409044528795, "grad_norm": 3.2772886753082275, "learning_rate": 1.0723216649649464e-05, "loss": 0.509, "step": 24371 }, { "epoch": 3.9785723031712994, "grad_norm": 2.3527729511260986, "learning_rate": 1.0722577185724157e-05, "loss": 0.3858, "step": 24372 }, { "epoch": 3.9787355618138034, "grad_norm": 3.268906593322754, "learning_rate": 1.072193771882861e-05, "loss": 0.4686, "step": 24373 }, { "epoch": 3.978898820456308, "grad_norm": 3.511711359024048, "learning_rate": 1.0721298248965445e-05, "loss": 0.4362, "step": 24374 }, { "epoch": 3.9790620790988123, "grad_norm": 3.4047863483428955, "learning_rate": 1.0720658776137298e-05, "loss": 0.4564, "step": 24375 }, { "epoch": 3.9792253377413167, "grad_norm": 3.178645372390747, "learning_rate": 1.0720019300346793e-05, "loss": 0.4719, "step": 24376 }, { "epoch": 3.979388596383821, "grad_norm": 3.1164345741271973, "learning_rate": 1.0719379821596563e-05, "loss": 0.5303, "step": 24377 }, { "epoch": 3.9795518550263256, "grad_norm": 3.124690294265747, "learning_rate": 1.0718740339889235e-05, "loss": 0.4528, "step": 24378 }, { "epoch": 3.9797151136688296, "grad_norm": 3.235607862472534, "learning_rate": 1.0718100855227434e-05, "loss": 0.479, "step": 24379 }, { "epoch": 3.979878372311334, "grad_norm": 2.74981689453125, "learning_rate": 1.0717461367613794e-05, "loss": 0.3966, "step": 24380 }, { "epoch": 3.9800416309538384, "grad_norm": 3.0476884841918945, "learning_rate": 1.071682187705094e-05, "loss": 0.4505, "step": 24381 }, { "epoch": 3.980204889596343, "grad_norm": 3.6242427825927734, "learning_rate": 1.0716182383541502e-05, "loss": 0.5124, "step": 24382 }, { "epoch": 3.9803681482388473, "grad_norm": 3.1488280296325684, "learning_rate": 1.0715542887088111e-05, "loss": 0.4582, "step": 24383 }, { "epoch": 3.9805314068813518, "grad_norm": 2.93257999420166, "learning_rate": 1.0714903387693392e-05, "loss": 0.4542, "step": 24384 }, { "epoch": 3.980694665523856, "grad_norm": 3.3324127197265625, "learning_rate": 1.071426388535997e-05, "loss": 0.515, "step": 24385 }, { "epoch": 3.9808579241663606, "grad_norm": 2.9062752723693848, "learning_rate": 1.0713624380090484e-05, "loss": 0.4089, "step": 24386 }, { "epoch": 3.981021182808865, "grad_norm": 3.3702101707458496, "learning_rate": 1.0712984871887557e-05, "loss": 0.432, "step": 24387 }, { "epoch": 3.9811844414513695, "grad_norm": 3.8630499839782715, "learning_rate": 1.071234536075382e-05, "loss": 0.5109, "step": 24388 }, { "epoch": 3.981347700093874, "grad_norm": 4.356593608856201, "learning_rate": 1.0711705846691898e-05, "loss": 0.517, "step": 24389 }, { "epoch": 3.981510958736378, "grad_norm": 2.4709689617156982, "learning_rate": 1.0711066329704422e-05, "loss": 0.4065, "step": 24390 }, { "epoch": 3.9816742173788824, "grad_norm": 2.9196178913116455, "learning_rate": 1.0710426809794019e-05, "loss": 0.3973, "step": 24391 }, { "epoch": 3.981837476021387, "grad_norm": 3.201298475265503, "learning_rate": 1.0709787286963324e-05, "loss": 0.4819, "step": 24392 }, { "epoch": 3.9820007346638913, "grad_norm": 3.637300491333008, "learning_rate": 1.070914776121496e-05, "loss": 0.5025, "step": 24393 }, { "epoch": 3.9821639933063957, "grad_norm": 3.1440863609313965, "learning_rate": 1.0708508232551555e-05, "loss": 0.4223, "step": 24394 }, { "epoch": 3.9823272519489, "grad_norm": 3.347865343093872, "learning_rate": 1.0707868700975743e-05, "loss": 0.4433, "step": 24395 }, { "epoch": 3.9824905105914046, "grad_norm": 2.8227343559265137, "learning_rate": 1.0707229166490149e-05, "loss": 0.4437, "step": 24396 }, { "epoch": 3.9826537692339086, "grad_norm": 3.0681114196777344, "learning_rate": 1.07065896290974e-05, "loss": 0.4576, "step": 24397 }, { "epoch": 3.982817027876413, "grad_norm": 2.8380250930786133, "learning_rate": 1.0705950088800133e-05, "loss": 0.4213, "step": 24398 }, { "epoch": 3.9829802865189174, "grad_norm": 3.125793218612671, "learning_rate": 1.0705310545600968e-05, "loss": 0.4059, "step": 24399 }, { "epoch": 3.983143545161422, "grad_norm": 2.610295057296753, "learning_rate": 1.070467099950254e-05, "loss": 0.3553, "step": 24400 }, { "epoch": 3.9833068038039263, "grad_norm": 3.9998674392700195, "learning_rate": 1.0704031450507474e-05, "loss": 0.5981, "step": 24401 }, { "epoch": 3.9834700624464308, "grad_norm": 3.8786988258361816, "learning_rate": 1.0703391898618403e-05, "loss": 0.5519, "step": 24402 }, { "epoch": 3.983633321088935, "grad_norm": 3.365436553955078, "learning_rate": 1.070275234383795e-05, "loss": 0.5182, "step": 24403 }, { "epoch": 3.9837965797314396, "grad_norm": 3.2157418727874756, "learning_rate": 1.0702112786168751e-05, "loss": 0.4607, "step": 24404 }, { "epoch": 3.983959838373944, "grad_norm": 2.5486104488372803, "learning_rate": 1.0701473225613428e-05, "loss": 0.3754, "step": 24405 }, { "epoch": 3.9841230970164485, "grad_norm": 3.6479742527008057, "learning_rate": 1.0700833662174615e-05, "loss": 0.5653, "step": 24406 }, { "epoch": 3.984286355658953, "grad_norm": 3.8090498447418213, "learning_rate": 1.0700194095854939e-05, "loss": 0.5335, "step": 24407 }, { "epoch": 3.984449614301457, "grad_norm": 3.1929142475128174, "learning_rate": 1.069955452665703e-05, "loss": 0.477, "step": 24408 }, { "epoch": 3.9846128729439614, "grad_norm": 3.4638190269470215, "learning_rate": 1.0698914954583518e-05, "loss": 0.5658, "step": 24409 }, { "epoch": 3.984776131586466, "grad_norm": 3.4406211376190186, "learning_rate": 1.0698275379637028e-05, "loss": 0.5471, "step": 24410 }, { "epoch": 3.9849393902289703, "grad_norm": 3.085991621017456, "learning_rate": 1.0697635801820192e-05, "loss": 0.3956, "step": 24411 }, { "epoch": 3.9851026488714747, "grad_norm": 3.33720064163208, "learning_rate": 1.069699622113564e-05, "loss": 0.508, "step": 24412 }, { "epoch": 3.985265907513979, "grad_norm": 3.2111713886260986, "learning_rate": 1.0696356637585998e-05, "loss": 0.4483, "step": 24413 }, { "epoch": 3.9854291661564836, "grad_norm": 3.633580207824707, "learning_rate": 1.0695717051173897e-05, "loss": 0.4924, "step": 24414 }, { "epoch": 3.9855924247989876, "grad_norm": 2.8667447566986084, "learning_rate": 1.0695077461901968e-05, "loss": 0.4368, "step": 24415 }, { "epoch": 3.985755683441492, "grad_norm": 3.1439850330352783, "learning_rate": 1.0694437869772836e-05, "loss": 0.4488, "step": 24416 }, { "epoch": 3.9859189420839964, "grad_norm": 3.3581066131591797, "learning_rate": 1.0693798274789135e-05, "loss": 0.4487, "step": 24417 }, { "epoch": 3.986082200726501, "grad_norm": 3.6481804847717285, "learning_rate": 1.0693158676953488e-05, "loss": 0.5127, "step": 24418 }, { "epoch": 3.9862454593690053, "grad_norm": 3.817756175994873, "learning_rate": 1.0692519076268529e-05, "loss": 0.5083, "step": 24419 }, { "epoch": 3.9864087180115098, "grad_norm": 3.3263895511627197, "learning_rate": 1.0691879472736883e-05, "loss": 0.4917, "step": 24420 }, { "epoch": 3.986571976654014, "grad_norm": 3.6488518714904785, "learning_rate": 1.0691239866361186e-05, "loss": 0.5183, "step": 24421 }, { "epoch": 3.9867352352965186, "grad_norm": 2.93693470954895, "learning_rate": 1.0690600257144062e-05, "loss": 0.3933, "step": 24422 }, { "epoch": 3.986898493939023, "grad_norm": 3.8270580768585205, "learning_rate": 1.0689960645088136e-05, "loss": 0.5018, "step": 24423 }, { "epoch": 3.9870617525815275, "grad_norm": 3.351177453994751, "learning_rate": 1.0689321030196047e-05, "loss": 0.4983, "step": 24424 }, { "epoch": 3.987225011224032, "grad_norm": 3.5982730388641357, "learning_rate": 1.0688681412470418e-05, "loss": 0.5079, "step": 24425 }, { "epoch": 3.987388269866536, "grad_norm": 2.8014307022094727, "learning_rate": 1.0688041791913881e-05, "loss": 0.4154, "step": 24426 }, { "epoch": 3.9875515285090404, "grad_norm": 4.2419657707214355, "learning_rate": 1.0687402168529064e-05, "loss": 0.5062, "step": 24427 }, { "epoch": 3.987714787151545, "grad_norm": 3.772726535797119, "learning_rate": 1.0686762542318596e-05, "loss": 0.5077, "step": 24428 }, { "epoch": 3.9878780457940493, "grad_norm": 3.7627029418945312, "learning_rate": 1.0686122913285104e-05, "loss": 0.523, "step": 24429 }, { "epoch": 3.9880413044365537, "grad_norm": 3.3747212886810303, "learning_rate": 1.0685483281431221e-05, "loss": 0.5017, "step": 24430 }, { "epoch": 3.988204563079058, "grad_norm": 3.039555072784424, "learning_rate": 1.0684843646759577e-05, "loss": 0.5078, "step": 24431 }, { "epoch": 3.988367821721562, "grad_norm": 3.3221840858459473, "learning_rate": 1.0684204009272801e-05, "loss": 0.5131, "step": 24432 }, { "epoch": 3.9885310803640666, "grad_norm": 3.1212823390960693, "learning_rate": 1.0683564368973519e-05, "loss": 0.48, "step": 24433 }, { "epoch": 3.988694339006571, "grad_norm": 3.07211971282959, "learning_rate": 1.0682924725864359e-05, "loss": 0.4782, "step": 24434 }, { "epoch": 3.9888575976490754, "grad_norm": 3.6727070808410645, "learning_rate": 1.0682285079947953e-05, "loss": 0.5589, "step": 24435 }, { "epoch": 3.98902085629158, "grad_norm": 2.8573219776153564, "learning_rate": 1.0681645431226934e-05, "loss": 0.4077, "step": 24436 }, { "epoch": 3.9891841149340843, "grad_norm": 3.4132754802703857, "learning_rate": 1.0681005779703928e-05, "loss": 0.5362, "step": 24437 }, { "epoch": 3.9893473735765888, "grad_norm": 3.4098422527313232, "learning_rate": 1.0680366125381563e-05, "loss": 0.4804, "step": 24438 }, { "epoch": 3.989510632219093, "grad_norm": 3.1917901039123535, "learning_rate": 1.067972646826247e-05, "loss": 0.4735, "step": 24439 }, { "epoch": 3.9896738908615976, "grad_norm": 3.4810805320739746, "learning_rate": 1.0679086808349279e-05, "loss": 0.4849, "step": 24440 }, { "epoch": 3.989837149504102, "grad_norm": 3.5933401584625244, "learning_rate": 1.0678447145644618e-05, "loss": 0.4613, "step": 24441 }, { "epoch": 3.9900004081466065, "grad_norm": 3.380645513534546, "learning_rate": 1.0677807480151116e-05, "loss": 0.486, "step": 24442 }, { "epoch": 3.9901636667891105, "grad_norm": 3.754096746444702, "learning_rate": 1.0677167811871405e-05, "loss": 0.4364, "step": 24443 }, { "epoch": 3.990326925431615, "grad_norm": 3.4766361713409424, "learning_rate": 1.0676528140808111e-05, "loss": 0.497, "step": 24444 }, { "epoch": 3.9904901840741194, "grad_norm": 3.401336669921875, "learning_rate": 1.0675888466963866e-05, "loss": 0.4886, "step": 24445 }, { "epoch": 3.990653442716624, "grad_norm": 3.4978830814361572, "learning_rate": 1.06752487903413e-05, "loss": 0.9697, "step": 24446 }, { "epoch": 3.9908167013591282, "grad_norm": 2.8972389698028564, "learning_rate": 1.067460911094304e-05, "loss": 0.447, "step": 24447 }, { "epoch": 3.9909799600016327, "grad_norm": 3.009063482284546, "learning_rate": 1.0673969428771715e-05, "loss": 0.383, "step": 24448 }, { "epoch": 3.991143218644137, "grad_norm": 3.338270664215088, "learning_rate": 1.067332974382996e-05, "loss": 0.478, "step": 24449 }, { "epoch": 3.991306477286641, "grad_norm": 3.9248299598693848, "learning_rate": 1.0672690056120398e-05, "loss": 0.5144, "step": 24450 }, { "epoch": 3.9914697359291456, "grad_norm": 2.752211093902588, "learning_rate": 1.067205036564566e-05, "loss": 0.4033, "step": 24451 }, { "epoch": 3.99163299457165, "grad_norm": 3.3705832958221436, "learning_rate": 1.067141067240838e-05, "loss": 0.4571, "step": 24452 }, { "epoch": 3.9917962532141544, "grad_norm": 2.8659303188323975, "learning_rate": 1.0670770976411183e-05, "loss": 0.4191, "step": 24453 }, { "epoch": 3.991959511856659, "grad_norm": 2.98697829246521, "learning_rate": 1.0670131277656698e-05, "loss": 0.4138, "step": 24454 }, { "epoch": 3.9921227704991633, "grad_norm": 2.7764923572540283, "learning_rate": 1.0669491576147559e-05, "loss": 0.3857, "step": 24455 }, { "epoch": 3.9922860291416677, "grad_norm": 3.4134721755981445, "learning_rate": 1.0668851871886391e-05, "loss": 0.5142, "step": 24456 }, { "epoch": 3.992449287784172, "grad_norm": 3.4028878211975098, "learning_rate": 1.0668212164875829e-05, "loss": 0.5166, "step": 24457 }, { "epoch": 3.9926125464266766, "grad_norm": 3.440011978149414, "learning_rate": 1.0667572455118496e-05, "loss": 0.4886, "step": 24458 }, { "epoch": 3.992775805069181, "grad_norm": 3.570050001144409, "learning_rate": 1.0666932742617024e-05, "loss": 0.518, "step": 24459 }, { "epoch": 3.9929390637116855, "grad_norm": 3.1655914783477783, "learning_rate": 1.0666293027374044e-05, "loss": 0.3701, "step": 24460 }, { "epoch": 3.9931023223541895, "grad_norm": 3.1617255210876465, "learning_rate": 1.0665653309392187e-05, "loss": 0.4959, "step": 24461 }, { "epoch": 3.993265580996694, "grad_norm": 3.1919829845428467, "learning_rate": 1.0665013588674077e-05, "loss": 0.4416, "step": 24462 }, { "epoch": 3.9934288396391984, "grad_norm": 3.1561033725738525, "learning_rate": 1.066437386522235e-05, "loss": 0.4346, "step": 24463 }, { "epoch": 3.993592098281703, "grad_norm": 3.0119245052337646, "learning_rate": 1.0663734139039632e-05, "loss": 0.4363, "step": 24464 }, { "epoch": 3.9937553569242072, "grad_norm": 3.1668858528137207, "learning_rate": 1.0663094410128554e-05, "loss": 0.4296, "step": 24465 }, { "epoch": 3.9939186155667117, "grad_norm": 3.7309184074401855, "learning_rate": 1.0662454678491747e-05, "loss": 0.5545, "step": 24466 }, { "epoch": 3.9940818742092157, "grad_norm": 3.2695841789245605, "learning_rate": 1.0661814944131836e-05, "loss": 0.4463, "step": 24467 }, { "epoch": 3.99424513285172, "grad_norm": 2.8785653114318848, "learning_rate": 1.0661175207051453e-05, "loss": 0.4398, "step": 24468 }, { "epoch": 3.9944083914942246, "grad_norm": 3.3179876804351807, "learning_rate": 1.066053546725323e-05, "loss": 0.431, "step": 24469 }, { "epoch": 3.994571650136729, "grad_norm": 3.860767364501953, "learning_rate": 1.0659895724739795e-05, "loss": 0.4422, "step": 24470 }, { "epoch": 3.9947349087792334, "grad_norm": 4.194119930267334, "learning_rate": 1.065925597951378e-05, "loss": 0.5759, "step": 24471 }, { "epoch": 3.994898167421738, "grad_norm": 3.1742758750915527, "learning_rate": 1.0658616231577812e-05, "loss": 0.4436, "step": 24472 }, { "epoch": 3.9950614260642423, "grad_norm": 3.9043056964874268, "learning_rate": 1.0657976480934518e-05, "loss": 0.4842, "step": 24473 }, { "epoch": 3.9952246847067467, "grad_norm": 3.03635311126709, "learning_rate": 1.0657336727586532e-05, "loss": 0.4165, "step": 24474 }, { "epoch": 3.995387943349251, "grad_norm": 2.4811129570007324, "learning_rate": 1.0656696971536484e-05, "loss": 0.3934, "step": 24475 }, { "epoch": 3.9955512019917556, "grad_norm": 3.732914447784424, "learning_rate": 1.0656057212787006e-05, "loss": 0.5008, "step": 24476 }, { "epoch": 3.99571446063426, "grad_norm": 3.5227503776550293, "learning_rate": 1.0655417451340721e-05, "loss": 0.5061, "step": 24477 }, { "epoch": 3.995877719276764, "grad_norm": 3.24674391746521, "learning_rate": 1.0654777687200262e-05, "loss": 0.4639, "step": 24478 }, { "epoch": 3.9960409779192685, "grad_norm": 2.9052014350891113, "learning_rate": 1.0654137920368257e-05, "loss": 0.4292, "step": 24479 }, { "epoch": 3.996204236561773, "grad_norm": 3.555788993835449, "learning_rate": 1.0653498150847344e-05, "loss": 0.5998, "step": 24480 }, { "epoch": 3.9963674952042774, "grad_norm": 3.1666882038116455, "learning_rate": 1.0652858378640142e-05, "loss": 0.4668, "step": 24481 }, { "epoch": 3.996530753846782, "grad_norm": 3.4030959606170654, "learning_rate": 1.065221860374929e-05, "loss": 0.4465, "step": 24482 }, { "epoch": 3.9966940124892862, "grad_norm": 3.006477117538452, "learning_rate": 1.0651578826177408e-05, "loss": 0.4531, "step": 24483 }, { "epoch": 3.9968572711317907, "grad_norm": 3.260859966278076, "learning_rate": 1.0650939045927136e-05, "loss": 0.4407, "step": 24484 }, { "epoch": 3.9970205297742947, "grad_norm": 3.1313185691833496, "learning_rate": 1.0650299263001098e-05, "loss": 0.4399, "step": 24485 }, { "epoch": 3.997183788416799, "grad_norm": 3.150916814804077, "learning_rate": 1.0649659477401923e-05, "loss": 0.4273, "step": 24486 }, { "epoch": 3.9973470470593035, "grad_norm": 2.9408063888549805, "learning_rate": 1.0649019689132248e-05, "loss": 0.4019, "step": 24487 }, { "epoch": 3.997510305701808, "grad_norm": 3.5537006855010986, "learning_rate": 1.0648379898194693e-05, "loss": 0.5494, "step": 24488 }, { "epoch": 3.9976735643443124, "grad_norm": 3.0006299018859863, "learning_rate": 1.0647740104591897e-05, "loss": 0.4766, "step": 24489 }, { "epoch": 3.997836822986817, "grad_norm": 2.7988193035125732, "learning_rate": 1.0647100308326483e-05, "loss": 0.4063, "step": 24490 }, { "epoch": 3.9980000816293213, "grad_norm": 3.6558029651641846, "learning_rate": 1.0646460509401086e-05, "loss": 0.4961, "step": 24491 }, { "epoch": 3.9981633402718257, "grad_norm": 3.7276453971862793, "learning_rate": 1.0645820707818333e-05, "loss": 0.4761, "step": 24492 }, { "epoch": 3.99832659891433, "grad_norm": 4.034988880157471, "learning_rate": 1.0645180903580856e-05, "loss": 0.5036, "step": 24493 }, { "epoch": 3.9984898575568346, "grad_norm": 4.000712871551514, "learning_rate": 1.0644541096691283e-05, "loss": 0.5015, "step": 24494 }, { "epoch": 3.998653116199339, "grad_norm": 3.3867621421813965, "learning_rate": 1.0643901287152246e-05, "loss": 0.4494, "step": 24495 }, { "epoch": 3.998816374841843, "grad_norm": 3.0887720584869385, "learning_rate": 1.0643261474966371e-05, "loss": 0.3899, "step": 24496 }, { "epoch": 3.9989796334843475, "grad_norm": 3.6670427322387695, "learning_rate": 1.0642621660136294e-05, "loss": 0.486, "step": 24497 }, { "epoch": 3.999142892126852, "grad_norm": 3.605910539627075, "learning_rate": 1.0641981842664641e-05, "loss": 0.4844, "step": 24498 }, { "epoch": 3.9993061507693564, "grad_norm": 2.9685628414154053, "learning_rate": 1.0641342022554042e-05, "loss": 0.4347, "step": 24499 }, { "epoch": 3.999469409411861, "grad_norm": 3.5057408809661865, "learning_rate": 1.064070219980713e-05, "loss": 0.4404, "step": 24500 }, { "epoch": 3.9996326680543652, "grad_norm": 3.274388551712036, "learning_rate": 1.0640062374426531e-05, "loss": 0.4741, "step": 24501 }, { "epoch": 3.9997959266968697, "grad_norm": 3.2004058361053467, "learning_rate": 1.063942254641488e-05, "loss": 0.507, "step": 24502 }, { "epoch": 3.9999591853393737, "grad_norm": 3.3009538650512695, "learning_rate": 1.0638782715774802e-05, "loss": 0.4911, "step": 24503 }, { "epoch": 4.0, "grad_norm": 7.418438911437988, "learning_rate": 1.0638142882508932e-05, "loss": 0.6033, "step": 24504 }, { "epoch": 4.000163258642504, "grad_norm": 2.1571199893951416, "learning_rate": 1.0637503046619896e-05, "loss": 0.3726, "step": 24505 }, { "epoch": 4.000326517285009, "grad_norm": 3.018104076385498, "learning_rate": 1.0636863208110327e-05, "loss": 0.364, "step": 24506 }, { "epoch": 4.000489775927513, "grad_norm": 2.4309639930725098, "learning_rate": 1.0636223366982851e-05, "loss": 0.368, "step": 24507 }, { "epoch": 4.000653034570018, "grad_norm": 2.438462972640991, "learning_rate": 1.0635583523240102e-05, "loss": 0.3833, "step": 24508 }, { "epoch": 4.000816293212522, "grad_norm": 2.8406198024749756, "learning_rate": 1.063494367688471e-05, "loss": 0.454, "step": 24509 }, { "epoch": 4.000979551855027, "grad_norm": 2.84121036529541, "learning_rate": 1.0634303827919306e-05, "loss": 0.3762, "step": 24510 }, { "epoch": 4.001142810497531, "grad_norm": 2.347829580307007, "learning_rate": 1.0633663976346518e-05, "loss": 0.3946, "step": 24511 }, { "epoch": 4.0013060691400355, "grad_norm": 2.879659414291382, "learning_rate": 1.0633024122168972e-05, "loss": 0.4489, "step": 24512 }, { "epoch": 4.001469327782539, "grad_norm": 1.9113237857818604, "learning_rate": 1.0632384265389307e-05, "loss": 0.3145, "step": 24513 }, { "epoch": 4.0016325864250435, "grad_norm": 2.40588116645813, "learning_rate": 1.0631744406010146e-05, "loss": 0.3538, "step": 24514 }, { "epoch": 4.001795845067548, "grad_norm": 2.589895725250244, "learning_rate": 1.0631104544034127e-05, "loss": 0.4053, "step": 24515 }, { "epoch": 4.001959103710052, "grad_norm": 2.269712448120117, "learning_rate": 1.0630464679463873e-05, "loss": 0.3348, "step": 24516 }, { "epoch": 4.002122362352557, "grad_norm": 2.206408977508545, "learning_rate": 1.0629824812302015e-05, "loss": 0.2793, "step": 24517 }, { "epoch": 4.002285620995061, "grad_norm": 3.085353374481201, "learning_rate": 1.0629184942551185e-05, "loss": 0.4104, "step": 24518 }, { "epoch": 4.002448879637566, "grad_norm": 3.009425640106201, "learning_rate": 1.0628545070214017e-05, "loss": 0.3645, "step": 24519 }, { "epoch": 4.00261213828007, "grad_norm": 2.5795106887817383, "learning_rate": 1.0627905195293135e-05, "loss": 0.379, "step": 24520 }, { "epoch": 4.002775396922575, "grad_norm": 2.931988000869751, "learning_rate": 1.0627265317791173e-05, "loss": 0.3489, "step": 24521 }, { "epoch": 4.002938655565079, "grad_norm": 2.9329497814178467, "learning_rate": 1.062662543771076e-05, "loss": 0.4067, "step": 24522 }, { "epoch": 4.003101914207583, "grad_norm": 2.456892251968384, "learning_rate": 1.0625985555054522e-05, "loss": 0.3453, "step": 24523 }, { "epoch": 4.003265172850088, "grad_norm": 2.6123507022857666, "learning_rate": 1.0625345669825097e-05, "loss": 0.3397, "step": 24524 }, { "epoch": 4.003428431492592, "grad_norm": 2.872056484222412, "learning_rate": 1.0624705782025117e-05, "loss": 0.3848, "step": 24525 }, { "epoch": 4.003591690135097, "grad_norm": 3.081843137741089, "learning_rate": 1.0624065891657202e-05, "loss": 0.3649, "step": 24526 }, { "epoch": 4.003754948777601, "grad_norm": 3.0134317874908447, "learning_rate": 1.0623425998723989e-05, "loss": 0.3794, "step": 24527 }, { "epoch": 4.003918207420106, "grad_norm": 3.1922760009765625, "learning_rate": 1.0622786103228106e-05, "loss": 0.3631, "step": 24528 }, { "epoch": 4.00408146606261, "grad_norm": 2.752591609954834, "learning_rate": 1.0622146205172185e-05, "loss": 0.3427, "step": 24529 }, { "epoch": 4.0042447247051145, "grad_norm": 2.883697271347046, "learning_rate": 1.0621506304558858e-05, "loss": 0.341, "step": 24530 }, { "epoch": 4.004407983347618, "grad_norm": 3.18737530708313, "learning_rate": 1.0620866401390752e-05, "loss": 0.4061, "step": 24531 }, { "epoch": 4.0045712419901225, "grad_norm": 3.290219306945801, "learning_rate": 1.0620226495670497e-05, "loss": 0.3281, "step": 24532 }, { "epoch": 4.004734500632627, "grad_norm": 3.7609970569610596, "learning_rate": 1.0619586587400728e-05, "loss": 0.3202, "step": 24533 }, { "epoch": 4.004897759275131, "grad_norm": 3.1525442600250244, "learning_rate": 1.061894667658407e-05, "loss": 0.3873, "step": 24534 }, { "epoch": 4.005061017917636, "grad_norm": 3.5468127727508545, "learning_rate": 1.0618306763223156e-05, "loss": 0.3841, "step": 24535 }, { "epoch": 4.00522427656014, "grad_norm": 2.386059045791626, "learning_rate": 1.0617666847320619e-05, "loss": 0.3187, "step": 24536 }, { "epoch": 4.005387535202645, "grad_norm": 2.443470001220703, "learning_rate": 1.0617026928879084e-05, "loss": 0.2985, "step": 24537 }, { "epoch": 4.005550793845149, "grad_norm": 3.2735953330993652, "learning_rate": 1.0616387007901186e-05, "loss": 0.3824, "step": 24538 }, { "epoch": 4.0057140524876536, "grad_norm": 3.2124533653259277, "learning_rate": 1.0615747084389551e-05, "loss": 0.3608, "step": 24539 }, { "epoch": 4.005877311130158, "grad_norm": 2.9854042530059814, "learning_rate": 1.0615107158346816e-05, "loss": 0.3695, "step": 24540 }, { "epoch": 4.006040569772662, "grad_norm": 3.189786911010742, "learning_rate": 1.0614467229775605e-05, "loss": 0.3666, "step": 24541 }, { "epoch": 4.006203828415167, "grad_norm": 3.2210676670074463, "learning_rate": 1.0613827298678551e-05, "loss": 0.402, "step": 24542 }, { "epoch": 4.006367087057671, "grad_norm": 2.7448623180389404, "learning_rate": 1.0613187365058286e-05, "loss": 0.3354, "step": 24543 }, { "epoch": 4.006530345700176, "grad_norm": 2.927227258682251, "learning_rate": 1.061254742891744e-05, "loss": 0.3175, "step": 24544 }, { "epoch": 4.00669360434268, "grad_norm": 3.2586333751678467, "learning_rate": 1.0611907490258641e-05, "loss": 0.3637, "step": 24545 }, { "epoch": 4.006856862985185, "grad_norm": 2.768097162246704, "learning_rate": 1.0611267549084523e-05, "loss": 0.3025, "step": 24546 }, { "epoch": 4.007020121627689, "grad_norm": 3.667633295059204, "learning_rate": 1.0610627605397714e-05, "loss": 0.3776, "step": 24547 }, { "epoch": 4.0071833802701935, "grad_norm": 4.540948390960693, "learning_rate": 1.0609987659200846e-05, "loss": 0.4958, "step": 24548 }, { "epoch": 4.007346638912697, "grad_norm": 3.0551469326019287, "learning_rate": 1.0609347710496551e-05, "loss": 0.3515, "step": 24549 }, { "epoch": 4.0075098975552015, "grad_norm": 2.3712270259857178, "learning_rate": 1.0608707759287454e-05, "loss": 0.2735, "step": 24550 }, { "epoch": 4.007673156197706, "grad_norm": 3.64253306388855, "learning_rate": 1.060806780557619e-05, "loss": 0.344, "step": 24551 }, { "epoch": 4.00783641484021, "grad_norm": 3.6296308040618896, "learning_rate": 1.060742784936539e-05, "loss": 0.3413, "step": 24552 }, { "epoch": 4.007999673482715, "grad_norm": 4.0069193840026855, "learning_rate": 1.0606787890657681e-05, "loss": 0.4671, "step": 24553 }, { "epoch": 4.008162932125219, "grad_norm": 3.1747500896453857, "learning_rate": 1.0606147929455701e-05, "loss": 0.3572, "step": 24554 }, { "epoch": 4.008326190767724, "grad_norm": 3.1258065700531006, "learning_rate": 1.0605507965762073e-05, "loss": 0.3569, "step": 24555 }, { "epoch": 4.008489449410228, "grad_norm": 2.8882524967193604, "learning_rate": 1.060486799957943e-05, "loss": 0.3736, "step": 24556 }, { "epoch": 4.0086527080527325, "grad_norm": 3.57077693939209, "learning_rate": 1.0604228030910401e-05, "loss": 0.3678, "step": 24557 }, { "epoch": 4.008815966695237, "grad_norm": 3.948568344116211, "learning_rate": 1.060358805975762e-05, "loss": 0.3992, "step": 24558 }, { "epoch": 4.008979225337741, "grad_norm": 3.0284361839294434, "learning_rate": 1.0602948086123721e-05, "loss": 0.3349, "step": 24559 }, { "epoch": 4.009142483980246, "grad_norm": 2.9252755641937256, "learning_rate": 1.0602308110011328e-05, "loss": 0.3765, "step": 24560 }, { "epoch": 4.00930574262275, "grad_norm": 3.2194056510925293, "learning_rate": 1.0601668131423069e-05, "loss": 0.3453, "step": 24561 }, { "epoch": 4.009469001265255, "grad_norm": 3.4349794387817383, "learning_rate": 1.0601028150361583e-05, "loss": 0.4546, "step": 24562 }, { "epoch": 4.009632259907759, "grad_norm": 2.8115015029907227, "learning_rate": 1.0600388166829497e-05, "loss": 0.3124, "step": 24563 }, { "epoch": 4.009795518550264, "grad_norm": 4.059750556945801, "learning_rate": 1.0599748180829443e-05, "loss": 0.3894, "step": 24564 }, { "epoch": 4.009958777192768, "grad_norm": 4.019031524658203, "learning_rate": 1.0599108192364052e-05, "loss": 0.4244, "step": 24565 }, { "epoch": 4.010122035835272, "grad_norm": 2.976836681365967, "learning_rate": 1.059846820143595e-05, "loss": 0.3851, "step": 24566 }, { "epoch": 4.010285294477776, "grad_norm": 3.447071075439453, "learning_rate": 1.0597828208047769e-05, "loss": 0.3616, "step": 24567 }, { "epoch": 4.0104485531202805, "grad_norm": 4.278707504272461, "learning_rate": 1.0597188212202146e-05, "loss": 0.4583, "step": 24568 }, { "epoch": 4.010611811762785, "grad_norm": 4.0421462059021, "learning_rate": 1.059654821390171e-05, "loss": 0.4205, "step": 24569 }, { "epoch": 4.010775070405289, "grad_norm": 3.912564277648926, "learning_rate": 1.0595908213149087e-05, "loss": 0.3867, "step": 24570 }, { "epoch": 4.010938329047794, "grad_norm": 3.3977596759796143, "learning_rate": 1.059526820994691e-05, "loss": 0.3413, "step": 24571 }, { "epoch": 4.011101587690298, "grad_norm": 3.667057991027832, "learning_rate": 1.0594628204297808e-05, "loss": 0.3018, "step": 24572 }, { "epoch": 4.011264846332803, "grad_norm": 3.771975517272949, "learning_rate": 1.0593988196204416e-05, "loss": 0.3949, "step": 24573 }, { "epoch": 4.011428104975307, "grad_norm": 2.7924742698669434, "learning_rate": 1.0593348185669363e-05, "loss": 0.333, "step": 24574 }, { "epoch": 4.0115913636178115, "grad_norm": 3.283773899078369, "learning_rate": 1.0592708172695281e-05, "loss": 0.3752, "step": 24575 }, { "epoch": 4.011754622260316, "grad_norm": 3.105804443359375, "learning_rate": 1.0592068157284796e-05, "loss": 0.3241, "step": 24576 }, { "epoch": 4.01191788090282, "grad_norm": 3.905453681945801, "learning_rate": 1.0591428139440546e-05, "loss": 0.339, "step": 24577 }, { "epoch": 4.012081139545325, "grad_norm": 3.142273187637329, "learning_rate": 1.0590788119165156e-05, "loss": 0.3171, "step": 24578 }, { "epoch": 4.012244398187829, "grad_norm": 3.3198044300079346, "learning_rate": 1.0590148096461259e-05, "loss": 0.3622, "step": 24579 }, { "epoch": 4.012407656830334, "grad_norm": 4.1498942375183105, "learning_rate": 1.0589508071331487e-05, "loss": 0.3847, "step": 24580 }, { "epoch": 4.012570915472838, "grad_norm": 3.5784778594970703, "learning_rate": 1.0588868043778471e-05, "loss": 0.3813, "step": 24581 }, { "epoch": 4.012734174115343, "grad_norm": 3.9895682334899902, "learning_rate": 1.0588228013804838e-05, "loss": 0.4237, "step": 24582 }, { "epoch": 4.012897432757847, "grad_norm": 3.839099168777466, "learning_rate": 1.0587587981413223e-05, "loss": 0.4002, "step": 24583 }, { "epoch": 4.013060691400351, "grad_norm": 3.3032164573669434, "learning_rate": 1.0586947946606257e-05, "loss": 0.3385, "step": 24584 }, { "epoch": 4.013223950042855, "grad_norm": 3.5546536445617676, "learning_rate": 1.0586307909386568e-05, "loss": 0.3375, "step": 24585 }, { "epoch": 4.0133872086853595, "grad_norm": 3.585608720779419, "learning_rate": 1.0585667869756789e-05, "loss": 0.3879, "step": 24586 }, { "epoch": 4.013550467327864, "grad_norm": 3.406717538833618, "learning_rate": 1.0585027827719552e-05, "loss": 0.3615, "step": 24587 }, { "epoch": 4.013713725970368, "grad_norm": 2.681293249130249, "learning_rate": 1.0584387783277483e-05, "loss": 0.3275, "step": 24588 }, { "epoch": 4.013876984612873, "grad_norm": 3.1179544925689697, "learning_rate": 1.0583747736433218e-05, "loss": 0.3504, "step": 24589 }, { "epoch": 4.014040243255377, "grad_norm": 3.8356196880340576, "learning_rate": 1.058310768718939e-05, "loss": 0.4919, "step": 24590 }, { "epoch": 4.014203501897882, "grad_norm": 2.9130191802978516, "learning_rate": 1.0582467635548621e-05, "loss": 0.3674, "step": 24591 }, { "epoch": 4.014366760540386, "grad_norm": 4.181504249572754, "learning_rate": 1.0581827581513551e-05, "loss": 0.3779, "step": 24592 }, { "epoch": 4.0145300191828905, "grad_norm": 3.3842990398406982, "learning_rate": 1.0581187525086809e-05, "loss": 0.3692, "step": 24593 }, { "epoch": 4.014693277825395, "grad_norm": 3.840662717819214, "learning_rate": 1.058054746627102e-05, "loss": 0.3793, "step": 24594 }, { "epoch": 4.014856536467899, "grad_norm": 3.944195508956909, "learning_rate": 1.057990740506882e-05, "loss": 0.4304, "step": 24595 }, { "epoch": 4.015019795110404, "grad_norm": 2.5903160572052, "learning_rate": 1.0579267341482842e-05, "loss": 0.3183, "step": 24596 }, { "epoch": 4.015183053752908, "grad_norm": 2.9877421855926514, "learning_rate": 1.0578627275515714e-05, "loss": 0.3424, "step": 24597 }, { "epoch": 4.015346312395413, "grad_norm": 4.168334007263184, "learning_rate": 1.057798720717007e-05, "loss": 0.4109, "step": 24598 }, { "epoch": 4.015509571037917, "grad_norm": 3.8989243507385254, "learning_rate": 1.0577347136448537e-05, "loss": 0.4863, "step": 24599 }, { "epoch": 4.015672829680422, "grad_norm": 4.096686840057373, "learning_rate": 1.0576707063353745e-05, "loss": 0.3722, "step": 24600 }, { "epoch": 4.015836088322925, "grad_norm": 2.96215558052063, "learning_rate": 1.0576066987888332e-05, "loss": 0.3321, "step": 24601 }, { "epoch": 4.01599934696543, "grad_norm": 3.9169819355010986, "learning_rate": 1.0575426910054922e-05, "loss": 0.4251, "step": 24602 }, { "epoch": 4.016162605607934, "grad_norm": 2.9060449600219727, "learning_rate": 1.0574786829856153e-05, "loss": 0.3294, "step": 24603 }, { "epoch": 4.0163258642504385, "grad_norm": 3.3603134155273438, "learning_rate": 1.0574146747294652e-05, "loss": 0.3668, "step": 24604 }, { "epoch": 4.016489122892943, "grad_norm": 3.053076982498169, "learning_rate": 1.0573506662373045e-05, "loss": 0.3701, "step": 24605 }, { "epoch": 4.016652381535447, "grad_norm": 2.6658570766448975, "learning_rate": 1.0572866575093972e-05, "loss": 0.3343, "step": 24606 }, { "epoch": 4.016815640177952, "grad_norm": 3.585294723510742, "learning_rate": 1.0572226485460062e-05, "loss": 0.3345, "step": 24607 }, { "epoch": 4.016978898820456, "grad_norm": 2.8999738693237305, "learning_rate": 1.0571586393473947e-05, "loss": 0.3707, "step": 24608 }, { "epoch": 4.017142157462961, "grad_norm": 3.2681117057800293, "learning_rate": 1.0570946299138254e-05, "loss": 0.3437, "step": 24609 }, { "epoch": 4.017305416105465, "grad_norm": 3.2402665615081787, "learning_rate": 1.0570306202455615e-05, "loss": 0.3573, "step": 24610 }, { "epoch": 4.0174686747479695, "grad_norm": 3.491661787033081, "learning_rate": 1.0569666103428662e-05, "loss": 0.3738, "step": 24611 }, { "epoch": 4.017631933390474, "grad_norm": 3.4231107234954834, "learning_rate": 1.0569026002060028e-05, "loss": 0.3989, "step": 24612 }, { "epoch": 4.017795192032978, "grad_norm": 2.586791753768921, "learning_rate": 1.0568385898352345e-05, "loss": 0.2804, "step": 24613 }, { "epoch": 4.017958450675483, "grad_norm": 2.8691394329071045, "learning_rate": 1.056774579230824e-05, "loss": 0.3141, "step": 24614 }, { "epoch": 4.018121709317987, "grad_norm": 3.636446714401245, "learning_rate": 1.0567105683930346e-05, "loss": 0.4216, "step": 24615 }, { "epoch": 4.018284967960492, "grad_norm": 3.837094306945801, "learning_rate": 1.0566465573221296e-05, "loss": 0.4436, "step": 24616 }, { "epoch": 4.018448226602996, "grad_norm": 3.4013235569000244, "learning_rate": 1.0565825460183718e-05, "loss": 0.3959, "step": 24617 }, { "epoch": 4.018611485245501, "grad_norm": 2.872373580932617, "learning_rate": 1.0565185344820248e-05, "loss": 0.3077, "step": 24618 }, { "epoch": 4.018774743888004, "grad_norm": 3.602216958999634, "learning_rate": 1.0564545227133511e-05, "loss": 0.3815, "step": 24619 }, { "epoch": 4.018938002530509, "grad_norm": 3.4752063751220703, "learning_rate": 1.0563905107126145e-05, "loss": 0.3474, "step": 24620 }, { "epoch": 4.019101261173013, "grad_norm": 3.5513839721679688, "learning_rate": 1.0563264984800777e-05, "loss": 0.3605, "step": 24621 }, { "epoch": 4.0192645198155175, "grad_norm": 3.246534585952759, "learning_rate": 1.0562624860160039e-05, "loss": 0.2963, "step": 24622 }, { "epoch": 4.019427778458022, "grad_norm": 4.548970699310303, "learning_rate": 1.0561984733206564e-05, "loss": 0.4853, "step": 24623 }, { "epoch": 4.019591037100526, "grad_norm": 3.388936996459961, "learning_rate": 1.0561344603942977e-05, "loss": 0.4132, "step": 24624 }, { "epoch": 4.019754295743031, "grad_norm": 3.1184346675872803, "learning_rate": 1.0560704472371919e-05, "loss": 0.3401, "step": 24625 }, { "epoch": 4.019917554385535, "grad_norm": 3.3241937160491943, "learning_rate": 1.0560064338496016e-05, "loss": 0.3899, "step": 24626 }, { "epoch": 4.02008081302804, "grad_norm": 3.3283567428588867, "learning_rate": 1.0559424202317899e-05, "loss": 0.3667, "step": 24627 }, { "epoch": 4.020244071670544, "grad_norm": 3.1204748153686523, "learning_rate": 1.0558784063840202e-05, "loss": 0.3512, "step": 24628 }, { "epoch": 4.0204073303130485, "grad_norm": 3.670260190963745, "learning_rate": 1.0558143923065554e-05, "loss": 0.4463, "step": 24629 }, { "epoch": 4.020570588955553, "grad_norm": 3.2596371173858643, "learning_rate": 1.0557503779996585e-05, "loss": 0.4091, "step": 24630 }, { "epoch": 4.020733847598057, "grad_norm": 3.519704818725586, "learning_rate": 1.055686363463593e-05, "loss": 0.4408, "step": 24631 }, { "epoch": 4.020897106240562, "grad_norm": 3.494638204574585, "learning_rate": 1.055622348698622e-05, "loss": 0.3917, "step": 24632 }, { "epoch": 4.021060364883066, "grad_norm": 3.5407779216766357, "learning_rate": 1.0555583337050084e-05, "loss": 0.407, "step": 24633 }, { "epoch": 4.021223623525571, "grad_norm": 2.841320753097534, "learning_rate": 1.0554943184830155e-05, "loss": 0.3304, "step": 24634 }, { "epoch": 4.021386882168075, "grad_norm": 3.7909133434295654, "learning_rate": 1.0554303030329063e-05, "loss": 0.3559, "step": 24635 }, { "epoch": 4.02155014081058, "grad_norm": 2.8147597312927246, "learning_rate": 1.0553662873549442e-05, "loss": 0.3283, "step": 24636 }, { "epoch": 4.021713399453083, "grad_norm": 3.1691510677337646, "learning_rate": 1.0553022714493923e-05, "loss": 0.3559, "step": 24637 }, { "epoch": 4.021876658095588, "grad_norm": 3.0290687084198, "learning_rate": 1.0552382553165136e-05, "loss": 0.3842, "step": 24638 }, { "epoch": 4.022039916738092, "grad_norm": 3.442072629928589, "learning_rate": 1.0551742389565708e-05, "loss": 0.4226, "step": 24639 }, { "epoch": 4.0222031753805965, "grad_norm": 3.1235971450805664, "learning_rate": 1.0551102223698281e-05, "loss": 0.315, "step": 24640 }, { "epoch": 4.022366434023101, "grad_norm": 2.85560941696167, "learning_rate": 1.0550462055565478e-05, "loss": 0.3462, "step": 24641 }, { "epoch": 4.022529692665605, "grad_norm": 2.915719747543335, "learning_rate": 1.0549821885169938e-05, "loss": 0.3507, "step": 24642 }, { "epoch": 4.02269295130811, "grad_norm": 2.8343441486358643, "learning_rate": 1.0549181712514285e-05, "loss": 0.3254, "step": 24643 }, { "epoch": 4.022856209950614, "grad_norm": 2.981693744659424, "learning_rate": 1.0548541537601149e-05, "loss": 0.3611, "step": 24644 }, { "epoch": 4.023019468593119, "grad_norm": 2.8156163692474365, "learning_rate": 1.054790136043317e-05, "loss": 0.3113, "step": 24645 }, { "epoch": 4.023182727235623, "grad_norm": 3.4325790405273438, "learning_rate": 1.0547261181012975e-05, "loss": 0.3534, "step": 24646 }, { "epoch": 4.0233459858781275, "grad_norm": 3.356884717941284, "learning_rate": 1.0546620999343198e-05, "loss": 0.439, "step": 24647 }, { "epoch": 4.023509244520632, "grad_norm": 3.1762356758117676, "learning_rate": 1.0545980815426466e-05, "loss": 0.3692, "step": 24648 }, { "epoch": 4.023672503163136, "grad_norm": 3.057191848754883, "learning_rate": 1.0545340629265413e-05, "loss": 0.32, "step": 24649 }, { "epoch": 4.023835761805641, "grad_norm": 2.9275004863739014, "learning_rate": 1.0544700440862669e-05, "loss": 0.334, "step": 24650 }, { "epoch": 4.023999020448145, "grad_norm": 3.0544042587280273, "learning_rate": 1.0544060250220868e-05, "loss": 0.3719, "step": 24651 }, { "epoch": 4.02416227909065, "grad_norm": 3.99924373626709, "learning_rate": 1.0543420057342646e-05, "loss": 0.4282, "step": 24652 }, { "epoch": 4.024325537733154, "grad_norm": 2.6117186546325684, "learning_rate": 1.0542779862230625e-05, "loss": 0.3366, "step": 24653 }, { "epoch": 4.024488796375658, "grad_norm": 3.25954008102417, "learning_rate": 1.054213966488744e-05, "loss": 0.3677, "step": 24654 }, { "epoch": 4.024652055018162, "grad_norm": 2.894951581954956, "learning_rate": 1.0541499465315723e-05, "loss": 0.3367, "step": 24655 }, { "epoch": 4.024815313660667, "grad_norm": 2.8995511531829834, "learning_rate": 1.0540859263518108e-05, "loss": 0.3203, "step": 24656 }, { "epoch": 4.024978572303171, "grad_norm": 3.300313949584961, "learning_rate": 1.0540219059497227e-05, "loss": 0.3602, "step": 24657 }, { "epoch": 4.0251418309456755, "grad_norm": 3.82920503616333, "learning_rate": 1.0539578853255708e-05, "loss": 0.3611, "step": 24658 }, { "epoch": 4.02530508958818, "grad_norm": 3.2309370040893555, "learning_rate": 1.0538938644796181e-05, "loss": 0.3693, "step": 24659 }, { "epoch": 4.025468348230684, "grad_norm": 4.326601028442383, "learning_rate": 1.0538298434121284e-05, "loss": 0.4307, "step": 24660 }, { "epoch": 4.025631606873189, "grad_norm": 3.2835569381713867, "learning_rate": 1.0537658221233645e-05, "loss": 0.3616, "step": 24661 }, { "epoch": 4.025794865515693, "grad_norm": 3.917813539505005, "learning_rate": 1.0537018006135895e-05, "loss": 0.4307, "step": 24662 }, { "epoch": 4.025958124158198, "grad_norm": 3.6398727893829346, "learning_rate": 1.0536377788830667e-05, "loss": 0.4469, "step": 24663 }, { "epoch": 4.026121382800702, "grad_norm": 3.469642400741577, "learning_rate": 1.0535737569320593e-05, "loss": 0.357, "step": 24664 }, { "epoch": 4.0262846414432065, "grad_norm": 3.289055824279785, "learning_rate": 1.0535097347608303e-05, "loss": 0.347, "step": 24665 }, { "epoch": 4.026447900085711, "grad_norm": 3.394073009490967, "learning_rate": 1.053445712369643e-05, "loss": 0.3804, "step": 24666 }, { "epoch": 4.026611158728215, "grad_norm": 3.1824846267700195, "learning_rate": 1.0533816897587605e-05, "loss": 0.3625, "step": 24667 }, { "epoch": 4.02677441737072, "grad_norm": 3.379467248916626, "learning_rate": 1.0533176669284461e-05, "loss": 0.3962, "step": 24668 }, { "epoch": 4.026937676013224, "grad_norm": 3.181558609008789, "learning_rate": 1.0532536438789631e-05, "loss": 0.357, "step": 24669 }, { "epoch": 4.027100934655729, "grad_norm": 6.65605354309082, "learning_rate": 1.0531896206105743e-05, "loss": 0.4327, "step": 24670 }, { "epoch": 4.027264193298233, "grad_norm": 3.5571718215942383, "learning_rate": 1.053125597123543e-05, "loss": 0.3849, "step": 24671 }, { "epoch": 4.027427451940737, "grad_norm": 2.7910144329071045, "learning_rate": 1.0530615734181326e-05, "loss": 0.2738, "step": 24672 }, { "epoch": 4.027590710583241, "grad_norm": 3.457406520843506, "learning_rate": 1.0529975494946061e-05, "loss": 0.368, "step": 24673 }, { "epoch": 4.027753969225746, "grad_norm": 3.1212940216064453, "learning_rate": 1.0529335253532265e-05, "loss": 0.3351, "step": 24674 }, { "epoch": 4.02791722786825, "grad_norm": 3.2965011596679688, "learning_rate": 1.0528695009942574e-05, "loss": 0.4248, "step": 24675 }, { "epoch": 4.0280804865107545, "grad_norm": 3.3916585445404053, "learning_rate": 1.0528054764179616e-05, "loss": 0.42, "step": 24676 }, { "epoch": 4.028243745153259, "grad_norm": 3.226867437362671, "learning_rate": 1.0527414516246024e-05, "loss": 0.3715, "step": 24677 }, { "epoch": 4.028407003795763, "grad_norm": 3.1254165172576904, "learning_rate": 1.0526774266144431e-05, "loss": 0.3191, "step": 24678 }, { "epoch": 4.028570262438268, "grad_norm": 3.2755789756774902, "learning_rate": 1.0526134013877465e-05, "loss": 0.3518, "step": 24679 }, { "epoch": 4.028733521080772, "grad_norm": 2.7134647369384766, "learning_rate": 1.0525493759447764e-05, "loss": 0.2752, "step": 24680 }, { "epoch": 4.028896779723277, "grad_norm": 3.274651527404785, "learning_rate": 1.0524853502857956e-05, "loss": 0.3806, "step": 24681 }, { "epoch": 4.029060038365781, "grad_norm": 3.613520860671997, "learning_rate": 1.0524213244110673e-05, "loss": 0.3914, "step": 24682 }, { "epoch": 4.0292232970082855, "grad_norm": 3.2260093688964844, "learning_rate": 1.0523572983208547e-05, "loss": 0.3348, "step": 24683 }, { "epoch": 4.02938655565079, "grad_norm": 3.2017123699188232, "learning_rate": 1.0522932720154211e-05, "loss": 0.3292, "step": 24684 }, { "epoch": 4.029549814293294, "grad_norm": 3.0960710048675537, "learning_rate": 1.0522292454950294e-05, "loss": 0.3535, "step": 24685 }, { "epoch": 4.029713072935799, "grad_norm": 3.5738890171051025, "learning_rate": 1.0521652187599434e-05, "loss": 0.3571, "step": 24686 }, { "epoch": 4.029876331578303, "grad_norm": 3.3850629329681396, "learning_rate": 1.0521011918104256e-05, "loss": 0.331, "step": 24687 }, { "epoch": 4.030039590220808, "grad_norm": 3.773458957672119, "learning_rate": 1.0520371646467393e-05, "loss": 0.3727, "step": 24688 }, { "epoch": 4.030202848863311, "grad_norm": 4.378833770751953, "learning_rate": 1.0519731372691481e-05, "loss": 0.4584, "step": 24689 }, { "epoch": 4.030366107505816, "grad_norm": 3.0958292484283447, "learning_rate": 1.051909109677915e-05, "loss": 0.3705, "step": 24690 }, { "epoch": 4.03052936614832, "grad_norm": 3.8043999671936035, "learning_rate": 1.0518450818733032e-05, "loss": 0.4962, "step": 24691 }, { "epoch": 4.030692624790825, "grad_norm": 3.358344316482544, "learning_rate": 1.0517810538555757e-05, "loss": 0.3921, "step": 24692 }, { "epoch": 4.030855883433329, "grad_norm": 2.974468946456909, "learning_rate": 1.051717025624996e-05, "loss": 0.3154, "step": 24693 }, { "epoch": 4.0310191420758335, "grad_norm": 2.9015018939971924, "learning_rate": 1.0516529971818267e-05, "loss": 0.3142, "step": 24694 }, { "epoch": 4.031182400718338, "grad_norm": 2.8037943840026855, "learning_rate": 1.0515889685263316e-05, "loss": 0.3493, "step": 24695 }, { "epoch": 4.031345659360842, "grad_norm": 3.0537328720092773, "learning_rate": 1.051524939658774e-05, "loss": 0.3311, "step": 24696 }, { "epoch": 4.031508918003347, "grad_norm": 3.409949779510498, "learning_rate": 1.0514609105794168e-05, "loss": 0.3798, "step": 24697 }, { "epoch": 4.031672176645851, "grad_norm": 3.4132182598114014, "learning_rate": 1.0513968812885231e-05, "loss": 0.3613, "step": 24698 }, { "epoch": 4.031835435288356, "grad_norm": 3.0304372310638428, "learning_rate": 1.0513328517863562e-05, "loss": 0.3339, "step": 24699 }, { "epoch": 4.03199869393086, "grad_norm": 3.2690587043762207, "learning_rate": 1.0512688220731793e-05, "loss": 0.3427, "step": 24700 }, { "epoch": 4.0321619525733645, "grad_norm": 3.3465142250061035, "learning_rate": 1.0512047921492558e-05, "loss": 0.3605, "step": 24701 }, { "epoch": 4.032325211215869, "grad_norm": 3.1814792156219482, "learning_rate": 1.0511407620148489e-05, "loss": 0.3448, "step": 24702 }, { "epoch": 4.032488469858373, "grad_norm": 3.8384361267089844, "learning_rate": 1.0510767316702213e-05, "loss": 0.4287, "step": 24703 }, { "epoch": 4.032651728500878, "grad_norm": 3.037421703338623, "learning_rate": 1.0510127011156366e-05, "loss": 0.3629, "step": 24704 }, { "epoch": 4.032814987143382, "grad_norm": 3.4881389141082764, "learning_rate": 1.0509486703513577e-05, "loss": 0.3506, "step": 24705 }, { "epoch": 4.032978245785887, "grad_norm": 3.4042468070983887, "learning_rate": 1.0508846393776485e-05, "loss": 0.4033, "step": 24706 }, { "epoch": 4.03314150442839, "grad_norm": 3.961061716079712, "learning_rate": 1.0508206081947715e-05, "loss": 0.4417, "step": 24707 }, { "epoch": 4.033304763070895, "grad_norm": 2.8755834102630615, "learning_rate": 1.0507565768029903e-05, "loss": 0.3175, "step": 24708 }, { "epoch": 4.033468021713399, "grad_norm": 3.0544590950012207, "learning_rate": 1.0506925452025681e-05, "loss": 0.3802, "step": 24709 }, { "epoch": 4.033631280355904, "grad_norm": 2.881462574005127, "learning_rate": 1.0506285133937676e-05, "loss": 0.296, "step": 24710 }, { "epoch": 4.033794538998408, "grad_norm": 3.641366958618164, "learning_rate": 1.0505644813768527e-05, "loss": 0.387, "step": 24711 }, { "epoch": 4.0339577976409124, "grad_norm": 3.8834447860717773, "learning_rate": 1.0505004491520862e-05, "loss": 0.408, "step": 24712 }, { "epoch": 4.034121056283417, "grad_norm": 2.9311909675598145, "learning_rate": 1.0504364167197315e-05, "loss": 0.3672, "step": 24713 }, { "epoch": 4.034284314925921, "grad_norm": 3.5164952278137207, "learning_rate": 1.0503723840800516e-05, "loss": 0.3992, "step": 24714 }, { "epoch": 4.034447573568426, "grad_norm": 2.9831438064575195, "learning_rate": 1.0503083512333099e-05, "loss": 0.3509, "step": 24715 }, { "epoch": 4.03461083221093, "grad_norm": 3.396530866622925, "learning_rate": 1.0502443181797696e-05, "loss": 0.4267, "step": 24716 }, { "epoch": 4.034774090853435, "grad_norm": 4.068583965301514, "learning_rate": 1.050180284919694e-05, "loss": 0.3903, "step": 24717 }, { "epoch": 4.034937349495939, "grad_norm": 3.545768976211548, "learning_rate": 1.050116251453346e-05, "loss": 0.3987, "step": 24718 }, { "epoch": 4.0351006081384435, "grad_norm": 3.517775297164917, "learning_rate": 1.050052217780989e-05, "loss": 0.4254, "step": 24719 }, { "epoch": 4.035263866780948, "grad_norm": 3.166766405105591, "learning_rate": 1.0499881839028868e-05, "loss": 0.3718, "step": 24720 }, { "epoch": 4.035427125423452, "grad_norm": 4.265866756439209, "learning_rate": 1.049924149819301e-05, "loss": 0.3797, "step": 24721 }, { "epoch": 4.035590384065957, "grad_norm": 3.4764461517333984, "learning_rate": 1.0498601155304966e-05, "loss": 0.4111, "step": 24722 }, { "epoch": 4.035753642708461, "grad_norm": 2.4618752002716064, "learning_rate": 1.049796081036736e-05, "loss": 0.328, "step": 24723 }, { "epoch": 4.035916901350966, "grad_norm": 3.1726748943328857, "learning_rate": 1.0497320463382825e-05, "loss": 0.3919, "step": 24724 }, { "epoch": 4.036080159993469, "grad_norm": 3.523571491241455, "learning_rate": 1.0496680114353994e-05, "loss": 0.3532, "step": 24725 }, { "epoch": 4.036243418635974, "grad_norm": 3.028437614440918, "learning_rate": 1.0496039763283497e-05, "loss": 0.3408, "step": 24726 }, { "epoch": 4.036406677278478, "grad_norm": 3.1106150150299072, "learning_rate": 1.0495399410173968e-05, "loss": 0.3448, "step": 24727 }, { "epoch": 4.036569935920983, "grad_norm": 3.566593885421753, "learning_rate": 1.0494759055028039e-05, "loss": 0.4524, "step": 24728 }, { "epoch": 4.036733194563487, "grad_norm": 3.0370752811431885, "learning_rate": 1.049411869784834e-05, "loss": 0.3434, "step": 24729 }, { "epoch": 4.036896453205991, "grad_norm": 3.3376736640930176, "learning_rate": 1.0493478338637513e-05, "loss": 0.3599, "step": 24730 }, { "epoch": 4.037059711848496, "grad_norm": 3.454378128051758, "learning_rate": 1.0492837977398179e-05, "loss": 0.3886, "step": 24731 }, { "epoch": 4.037222970491, "grad_norm": 3.3290698528289795, "learning_rate": 1.0492197614132969e-05, "loss": 0.3531, "step": 24732 }, { "epoch": 4.037386229133505, "grad_norm": 3.1979219913482666, "learning_rate": 1.0491557248844526e-05, "loss": 0.4055, "step": 24733 }, { "epoch": 4.037549487776009, "grad_norm": 3.9266793727874756, "learning_rate": 1.0490916881535476e-05, "loss": 0.4065, "step": 24734 }, { "epoch": 4.037712746418514, "grad_norm": 4.26785135269165, "learning_rate": 1.0490276512208453e-05, "loss": 0.4015, "step": 24735 }, { "epoch": 4.037876005061018, "grad_norm": 2.851698160171509, "learning_rate": 1.0489636140866086e-05, "loss": 0.3081, "step": 24736 }, { "epoch": 4.0380392637035225, "grad_norm": 3.4070262908935547, "learning_rate": 1.0488995767511013e-05, "loss": 0.3536, "step": 24737 }, { "epoch": 4.038202522346027, "grad_norm": 4.01911735534668, "learning_rate": 1.0488355392145858e-05, "loss": 0.398, "step": 24738 }, { "epoch": 4.038365780988531, "grad_norm": 3.4107987880706787, "learning_rate": 1.0487715014773261e-05, "loss": 0.3654, "step": 24739 }, { "epoch": 4.038529039631036, "grad_norm": 3.5960848331451416, "learning_rate": 1.0487074635395854e-05, "loss": 0.3546, "step": 24740 }, { "epoch": 4.03869229827354, "grad_norm": 3.110778570175171, "learning_rate": 1.0486434254016265e-05, "loss": 0.391, "step": 24741 }, { "epoch": 4.038855556916044, "grad_norm": 2.957711935043335, "learning_rate": 1.0485793870637126e-05, "loss": 0.3869, "step": 24742 }, { "epoch": 4.039018815558548, "grad_norm": 4.100790977478027, "learning_rate": 1.0485153485261073e-05, "loss": 0.4327, "step": 24743 }, { "epoch": 4.039182074201053, "grad_norm": 3.6353201866149902, "learning_rate": 1.0484513097890739e-05, "loss": 0.376, "step": 24744 }, { "epoch": 4.039345332843557, "grad_norm": 3.6121904850006104, "learning_rate": 1.0483872708528755e-05, "loss": 0.4348, "step": 24745 }, { "epoch": 4.039508591486062, "grad_norm": 2.9240610599517822, "learning_rate": 1.0483232317177754e-05, "loss": 0.3223, "step": 24746 }, { "epoch": 4.039671850128566, "grad_norm": 3.138845443725586, "learning_rate": 1.0482591923840364e-05, "loss": 0.3376, "step": 24747 }, { "epoch": 4.03983510877107, "grad_norm": 2.347825527191162, "learning_rate": 1.0481951528519223e-05, "loss": 0.3359, "step": 24748 }, { "epoch": 4.039998367413575, "grad_norm": 3.457449197769165, "learning_rate": 1.0481311131216958e-05, "loss": 0.4278, "step": 24749 }, { "epoch": 4.040161626056079, "grad_norm": 3.27364444732666, "learning_rate": 1.0480670731936209e-05, "loss": 0.3909, "step": 24750 }, { "epoch": 4.040324884698584, "grad_norm": 3.556807518005371, "learning_rate": 1.0480030330679603e-05, "loss": 0.4157, "step": 24751 }, { "epoch": 4.040488143341088, "grad_norm": 3.2373807430267334, "learning_rate": 1.0479389927449772e-05, "loss": 0.3643, "step": 24752 }, { "epoch": 4.040651401983593, "grad_norm": 2.8609437942504883, "learning_rate": 1.0478749522249352e-05, "loss": 0.3781, "step": 24753 }, { "epoch": 4.040814660626097, "grad_norm": 3.2276012897491455, "learning_rate": 1.0478109115080973e-05, "loss": 0.3935, "step": 24754 }, { "epoch": 4.0409779192686015, "grad_norm": 3.560429811477661, "learning_rate": 1.0477468705947267e-05, "loss": 0.4006, "step": 24755 }, { "epoch": 4.041141177911106, "grad_norm": 3.282099962234497, "learning_rate": 1.0476828294850867e-05, "loss": 0.3892, "step": 24756 }, { "epoch": 4.04130443655361, "grad_norm": 3.085756301879883, "learning_rate": 1.0476187881794406e-05, "loss": 0.3294, "step": 24757 }, { "epoch": 4.041467695196115, "grad_norm": 3.0866806507110596, "learning_rate": 1.0475547466780518e-05, "loss": 0.37, "step": 24758 }, { "epoch": 4.041630953838619, "grad_norm": 2.8909647464752197, "learning_rate": 1.0474907049811834e-05, "loss": 0.3052, "step": 24759 }, { "epoch": 4.041794212481123, "grad_norm": 4.088273525238037, "learning_rate": 1.0474266630890986e-05, "loss": 0.3571, "step": 24760 }, { "epoch": 4.041957471123627, "grad_norm": 2.680417776107788, "learning_rate": 1.0473626210020605e-05, "loss": 0.318, "step": 24761 }, { "epoch": 4.042120729766132, "grad_norm": 2.6307384967803955, "learning_rate": 1.0472985787203327e-05, "loss": 0.3247, "step": 24762 }, { "epoch": 4.042283988408636, "grad_norm": 3.109044313430786, "learning_rate": 1.0472345362441782e-05, "loss": 0.3964, "step": 24763 }, { "epoch": 4.042447247051141, "grad_norm": 3.14123797416687, "learning_rate": 1.047170493573861e-05, "loss": 0.4108, "step": 24764 }, { "epoch": 4.042610505693645, "grad_norm": 3.2660388946533203, "learning_rate": 1.0471064507096427e-05, "loss": 0.3944, "step": 24765 }, { "epoch": 4.042773764336149, "grad_norm": 4.54545259475708, "learning_rate": 1.047042407651788e-05, "loss": 0.4261, "step": 24766 }, { "epoch": 4.042937022978654, "grad_norm": 3.6825997829437256, "learning_rate": 1.0469783644005599e-05, "loss": 0.356, "step": 24767 }, { "epoch": 4.043100281621158, "grad_norm": 2.593026638031006, "learning_rate": 1.0469143209562213e-05, "loss": 0.324, "step": 24768 }, { "epoch": 4.043263540263663, "grad_norm": 4.059370040893555, "learning_rate": 1.0468502773190358e-05, "loss": 0.4043, "step": 24769 }, { "epoch": 4.043426798906167, "grad_norm": 3.6844983100891113, "learning_rate": 1.0467862334892663e-05, "loss": 0.3473, "step": 24770 }, { "epoch": 4.043590057548672, "grad_norm": 3.4852294921875, "learning_rate": 1.0467221894671764e-05, "loss": 0.3677, "step": 24771 }, { "epoch": 4.043753316191176, "grad_norm": 4.43744421005249, "learning_rate": 1.0466581452530292e-05, "loss": 0.4204, "step": 24772 }, { "epoch": 4.0439165748336805, "grad_norm": 3.732515335083008, "learning_rate": 1.046594100847088e-05, "loss": 0.3456, "step": 24773 }, { "epoch": 4.044079833476185, "grad_norm": 4.389477729797363, "learning_rate": 1.0465300562496162e-05, "loss": 0.4289, "step": 24774 }, { "epoch": 4.044243092118689, "grad_norm": 3.065781593322754, "learning_rate": 1.0464660114608768e-05, "loss": 0.3228, "step": 24775 }, { "epoch": 4.044406350761194, "grad_norm": 3.015857219696045, "learning_rate": 1.0464019664811327e-05, "loss": 0.371, "step": 24776 }, { "epoch": 4.044569609403698, "grad_norm": 3.3983185291290283, "learning_rate": 1.046337921310648e-05, "loss": 0.3713, "step": 24777 }, { "epoch": 4.044732868046202, "grad_norm": 3.587584972381592, "learning_rate": 1.0462738759496855e-05, "loss": 0.3723, "step": 24778 }, { "epoch": 4.044896126688706, "grad_norm": 3.2600882053375244, "learning_rate": 1.0462098303985089e-05, "loss": 0.313, "step": 24779 }, { "epoch": 4.045059385331211, "grad_norm": 3.2982547283172607, "learning_rate": 1.046145784657381e-05, "loss": 0.3153, "step": 24780 }, { "epoch": 4.045222643973715, "grad_norm": 3.9458234310150146, "learning_rate": 1.0460817387265652e-05, "loss": 0.3468, "step": 24781 }, { "epoch": 4.0453859026162196, "grad_norm": 3.7636044025421143, "learning_rate": 1.0460176926063243e-05, "loss": 0.4331, "step": 24782 }, { "epoch": 4.045549161258724, "grad_norm": 3.6929643154144287, "learning_rate": 1.0459536462969224e-05, "loss": 0.3496, "step": 24783 }, { "epoch": 4.045712419901228, "grad_norm": 2.6863515377044678, "learning_rate": 1.0458895997986226e-05, "loss": 0.3324, "step": 24784 }, { "epoch": 4.045875678543733, "grad_norm": 4.286604404449463, "learning_rate": 1.0458255531116881e-05, "loss": 0.4538, "step": 24785 }, { "epoch": 4.046038937186237, "grad_norm": 3.3661248683929443, "learning_rate": 1.0457615062363818e-05, "loss": 0.3514, "step": 24786 }, { "epoch": 4.046202195828742, "grad_norm": 4.113919258117676, "learning_rate": 1.0456974591729672e-05, "loss": 0.3935, "step": 24787 }, { "epoch": 4.046365454471246, "grad_norm": 3.328263282775879, "learning_rate": 1.0456334119217073e-05, "loss": 0.3933, "step": 24788 }, { "epoch": 4.046528713113751, "grad_norm": 2.9219305515289307, "learning_rate": 1.0455693644828662e-05, "loss": 0.3858, "step": 24789 }, { "epoch": 4.046691971756255, "grad_norm": 3.5345864295959473, "learning_rate": 1.0455053168567064e-05, "loss": 0.3883, "step": 24790 }, { "epoch": 4.0468552303987595, "grad_norm": 3.4152846336364746, "learning_rate": 1.0454412690434914e-05, "loss": 0.3902, "step": 24791 }, { "epoch": 4.047018489041264, "grad_norm": 3.9930756092071533, "learning_rate": 1.0453772210434848e-05, "loss": 0.4054, "step": 24792 }, { "epoch": 4.047181747683768, "grad_norm": 2.794475793838501, "learning_rate": 1.045313172856949e-05, "loss": 0.3398, "step": 24793 }, { "epoch": 4.047345006326273, "grad_norm": 3.2497355937957764, "learning_rate": 1.0452491244841484e-05, "loss": 0.3841, "step": 24794 }, { "epoch": 4.047508264968776, "grad_norm": 4.503865718841553, "learning_rate": 1.0451850759253455e-05, "loss": 0.4905, "step": 24795 }, { "epoch": 4.047671523611281, "grad_norm": 3.279832124710083, "learning_rate": 1.045121027180804e-05, "loss": 0.348, "step": 24796 }, { "epoch": 4.047834782253785, "grad_norm": 3.14241099357605, "learning_rate": 1.0450569782507866e-05, "loss": 0.3236, "step": 24797 }, { "epoch": 4.04799804089629, "grad_norm": 2.8982503414154053, "learning_rate": 1.0449929291355574e-05, "loss": 0.358, "step": 24798 }, { "epoch": 4.048161299538794, "grad_norm": 3.511589527130127, "learning_rate": 1.044928879835379e-05, "loss": 0.418, "step": 24799 }, { "epoch": 4.0483245581812985, "grad_norm": 3.409376382827759, "learning_rate": 1.044864830350515e-05, "loss": 0.3367, "step": 24800 }, { "epoch": 4.048487816823803, "grad_norm": 3.7124602794647217, "learning_rate": 1.0448007806812289e-05, "loss": 0.3695, "step": 24801 }, { "epoch": 4.048651075466307, "grad_norm": 3.8167591094970703, "learning_rate": 1.0447367308277832e-05, "loss": 0.3721, "step": 24802 }, { "epoch": 4.048814334108812, "grad_norm": 3.2873425483703613, "learning_rate": 1.044672680790442e-05, "loss": 0.3535, "step": 24803 }, { "epoch": 4.048977592751316, "grad_norm": 3.075190782546997, "learning_rate": 1.0446086305694682e-05, "loss": 0.3636, "step": 24804 }, { "epoch": 4.049140851393821, "grad_norm": 3.7506728172302246, "learning_rate": 1.0445445801651253e-05, "loss": 0.4087, "step": 24805 }, { "epoch": 4.049304110036325, "grad_norm": 3.3732593059539795, "learning_rate": 1.0444805295776764e-05, "loss": 0.3844, "step": 24806 }, { "epoch": 4.04946736867883, "grad_norm": 3.669616460800171, "learning_rate": 1.0444164788073846e-05, "loss": 0.3818, "step": 24807 }, { "epoch": 4.049630627321334, "grad_norm": 2.7492613792419434, "learning_rate": 1.0443524278545138e-05, "loss": 0.3127, "step": 24808 }, { "epoch": 4.0497938859638385, "grad_norm": 3.080889940261841, "learning_rate": 1.0442883767193266e-05, "loss": 0.3244, "step": 24809 }, { "epoch": 4.049957144606343, "grad_norm": 2.979064464569092, "learning_rate": 1.0442243254020865e-05, "loss": 0.3534, "step": 24810 }, { "epoch": 4.050120403248847, "grad_norm": 3.0217740535736084, "learning_rate": 1.0441602739030572e-05, "loss": 0.3274, "step": 24811 }, { "epoch": 4.050283661891352, "grad_norm": 3.6514883041381836, "learning_rate": 1.0440962222225018e-05, "loss": 0.3563, "step": 24812 }, { "epoch": 4.050446920533855, "grad_norm": 3.5423266887664795, "learning_rate": 1.0440321703606833e-05, "loss": 0.3683, "step": 24813 }, { "epoch": 4.05061017917636, "grad_norm": 3.1451587677001953, "learning_rate": 1.043968118317865e-05, "loss": 0.3326, "step": 24814 }, { "epoch": 4.050773437818864, "grad_norm": 3.473740816116333, "learning_rate": 1.0439040660943104e-05, "loss": 0.3764, "step": 24815 }, { "epoch": 4.050936696461369, "grad_norm": 3.5208730697631836, "learning_rate": 1.0438400136902832e-05, "loss": 0.3959, "step": 24816 }, { "epoch": 4.051099955103873, "grad_norm": 3.1726579666137695, "learning_rate": 1.0437759611060459e-05, "loss": 0.3321, "step": 24817 }, { "epoch": 4.0512632137463775, "grad_norm": 3.208503246307373, "learning_rate": 1.0437119083418623e-05, "loss": 0.4121, "step": 24818 }, { "epoch": 4.051426472388882, "grad_norm": 3.785891056060791, "learning_rate": 1.0436478553979955e-05, "loss": 0.4026, "step": 24819 }, { "epoch": 4.051589731031386, "grad_norm": 3.099792003631592, "learning_rate": 1.0435838022747085e-05, "loss": 0.355, "step": 24820 }, { "epoch": 4.051752989673891, "grad_norm": 4.392889499664307, "learning_rate": 1.0435197489722652e-05, "loss": 0.3828, "step": 24821 }, { "epoch": 4.051916248316395, "grad_norm": 3.2576894760131836, "learning_rate": 1.0434556954909288e-05, "loss": 0.3298, "step": 24822 }, { "epoch": 4.0520795069589, "grad_norm": 3.1630911827087402, "learning_rate": 1.0433916418309624e-05, "loss": 0.3467, "step": 24823 }, { "epoch": 4.052242765601404, "grad_norm": 3.7749483585357666, "learning_rate": 1.0433275879926295e-05, "loss": 0.4444, "step": 24824 }, { "epoch": 4.052406024243909, "grad_norm": 3.9663288593292236, "learning_rate": 1.043263533976193e-05, "loss": 0.3935, "step": 24825 }, { "epoch": 4.052569282886413, "grad_norm": 3.835973024368286, "learning_rate": 1.043199479781916e-05, "loss": 0.3993, "step": 24826 }, { "epoch": 4.0527325415289175, "grad_norm": 3.3866682052612305, "learning_rate": 1.0431354254100627e-05, "loss": 0.3809, "step": 24827 }, { "epoch": 4.052895800171422, "grad_norm": 4.121541976928711, "learning_rate": 1.0430713708608963e-05, "loss": 0.378, "step": 24828 }, { "epoch": 4.053059058813926, "grad_norm": 3.793398141860962, "learning_rate": 1.0430073161346795e-05, "loss": 0.3424, "step": 24829 }, { "epoch": 4.05322231745643, "grad_norm": 3.619986057281494, "learning_rate": 1.0429432612316756e-05, "loss": 0.4044, "step": 24830 }, { "epoch": 4.053385576098934, "grad_norm": 3.109802722930908, "learning_rate": 1.0428792061521484e-05, "loss": 0.3413, "step": 24831 }, { "epoch": 4.053548834741439, "grad_norm": 3.7872769832611084, "learning_rate": 1.042815150896361e-05, "loss": 0.4047, "step": 24832 }, { "epoch": 4.053712093383943, "grad_norm": 3.355861186981201, "learning_rate": 1.0427510954645768e-05, "loss": 0.2982, "step": 24833 }, { "epoch": 4.053875352026448, "grad_norm": 3.0996954441070557, "learning_rate": 1.0426870398570587e-05, "loss": 0.3597, "step": 24834 }, { "epoch": 4.054038610668952, "grad_norm": 3.582017183303833, "learning_rate": 1.0426229840740705e-05, "loss": 0.4234, "step": 24835 }, { "epoch": 4.0542018693114565, "grad_norm": 3.5556790828704834, "learning_rate": 1.0425589281158752e-05, "loss": 0.4075, "step": 24836 }, { "epoch": 4.054365127953961, "grad_norm": 3.422356605529785, "learning_rate": 1.0424948719827359e-05, "loss": 0.3501, "step": 24837 }, { "epoch": 4.054528386596465, "grad_norm": 3.381883144378662, "learning_rate": 1.0424308156749167e-05, "loss": 0.3927, "step": 24838 }, { "epoch": 4.05469164523897, "grad_norm": 4.13486385345459, "learning_rate": 1.0423667591926805e-05, "loss": 0.3637, "step": 24839 }, { "epoch": 4.054854903881474, "grad_norm": 4.333697319030762, "learning_rate": 1.0423027025362901e-05, "loss": 0.4223, "step": 24840 }, { "epoch": 4.055018162523979, "grad_norm": 3.6646387577056885, "learning_rate": 1.0422386457060097e-05, "loss": 0.3859, "step": 24841 }, { "epoch": 4.055181421166483, "grad_norm": 4.1810526847839355, "learning_rate": 1.0421745887021019e-05, "loss": 0.7903, "step": 24842 }, { "epoch": 4.055344679808988, "grad_norm": 3.798673152923584, "learning_rate": 1.0421105315248303e-05, "loss": 0.4063, "step": 24843 }, { "epoch": 4.055507938451492, "grad_norm": 3.552159547805786, "learning_rate": 1.0420464741744585e-05, "loss": 0.4353, "step": 24844 }, { "epoch": 4.0556711970939965, "grad_norm": 3.202531576156616, "learning_rate": 1.0419824166512492e-05, "loss": 0.3666, "step": 24845 }, { "epoch": 4.055834455736501, "grad_norm": 4.490611553192139, "learning_rate": 1.0419183589554662e-05, "loss": 0.4592, "step": 24846 }, { "epoch": 4.055997714379005, "grad_norm": 2.809579849243164, "learning_rate": 1.0418543010873726e-05, "loss": 0.3594, "step": 24847 }, { "epoch": 4.056160973021509, "grad_norm": 3.674072027206421, "learning_rate": 1.0417902430472318e-05, "loss": 0.3755, "step": 24848 }, { "epoch": 4.056324231664013, "grad_norm": 3.3862321376800537, "learning_rate": 1.0417261848353068e-05, "loss": 0.3632, "step": 24849 }, { "epoch": 4.056487490306518, "grad_norm": 3.692272901535034, "learning_rate": 1.0416621264518617e-05, "loss": 0.45, "step": 24850 }, { "epoch": 4.056650748949022, "grad_norm": 3.04789400100708, "learning_rate": 1.0415980678971592e-05, "loss": 0.3521, "step": 24851 }, { "epoch": 4.056814007591527, "grad_norm": 3.738377332687378, "learning_rate": 1.0415340091714628e-05, "loss": 0.4091, "step": 24852 }, { "epoch": 4.056977266234031, "grad_norm": 3.3065731525421143, "learning_rate": 1.0414699502750354e-05, "loss": 0.3384, "step": 24853 }, { "epoch": 4.0571405248765355, "grad_norm": 3.639190435409546, "learning_rate": 1.041405891208141e-05, "loss": 0.4222, "step": 24854 }, { "epoch": 4.05730378351904, "grad_norm": 3.202787399291992, "learning_rate": 1.0413418319710423e-05, "loss": 0.3922, "step": 24855 }, { "epoch": 4.057467042161544, "grad_norm": 3.252784490585327, "learning_rate": 1.0412777725640031e-05, "loss": 0.3556, "step": 24856 }, { "epoch": 4.057630300804049, "grad_norm": 3.3857059478759766, "learning_rate": 1.0412137129872871e-05, "loss": 0.4168, "step": 24857 }, { "epoch": 4.057793559446553, "grad_norm": 3.74787974357605, "learning_rate": 1.0411496532411564e-05, "loss": 0.3681, "step": 24858 }, { "epoch": 4.057956818089058, "grad_norm": 3.226572036743164, "learning_rate": 1.041085593325875e-05, "loss": 0.3927, "step": 24859 }, { "epoch": 4.058120076731562, "grad_norm": 3.155132293701172, "learning_rate": 1.0410215332417065e-05, "loss": 0.332, "step": 24860 }, { "epoch": 4.058283335374067, "grad_norm": 2.849513530731201, "learning_rate": 1.0409574729889141e-05, "loss": 0.3599, "step": 24861 }, { "epoch": 4.058446594016571, "grad_norm": 3.8948440551757812, "learning_rate": 1.040893412567761e-05, "loss": 0.4239, "step": 24862 }, { "epoch": 4.0586098526590755, "grad_norm": 3.1084203720092773, "learning_rate": 1.0408293519785103e-05, "loss": 0.3347, "step": 24863 }, { "epoch": 4.05877311130158, "grad_norm": 3.0786027908325195, "learning_rate": 1.0407652912214254e-05, "loss": 0.3616, "step": 24864 }, { "epoch": 4.058936369944084, "grad_norm": 3.6193912029266357, "learning_rate": 1.0407012302967698e-05, "loss": 0.3813, "step": 24865 }, { "epoch": 4.059099628586588, "grad_norm": 3.3259336948394775, "learning_rate": 1.0406371692048071e-05, "loss": 0.4057, "step": 24866 }, { "epoch": 4.059262887229092, "grad_norm": 3.935919761657715, "learning_rate": 1.0405731079458005e-05, "loss": 0.3775, "step": 24867 }, { "epoch": 4.059426145871597, "grad_norm": 4.163828372955322, "learning_rate": 1.040509046520013e-05, "loss": 0.4081, "step": 24868 }, { "epoch": 4.059589404514101, "grad_norm": 3.4863576889038086, "learning_rate": 1.0404449849277079e-05, "loss": 0.3731, "step": 24869 }, { "epoch": 4.059752663156606, "grad_norm": 3.5277116298675537, "learning_rate": 1.0403809231691487e-05, "loss": 0.3804, "step": 24870 }, { "epoch": 4.05991592179911, "grad_norm": 3.7809839248657227, "learning_rate": 1.0403168612445988e-05, "loss": 0.3767, "step": 24871 }, { "epoch": 4.0600791804416145, "grad_norm": 3.787363290786743, "learning_rate": 1.0402527991543219e-05, "loss": 0.5649, "step": 24872 }, { "epoch": 4.060242439084119, "grad_norm": 2.837116241455078, "learning_rate": 1.0401887368985808e-05, "loss": 0.3436, "step": 24873 }, { "epoch": 4.060405697726623, "grad_norm": 3.9413468837738037, "learning_rate": 1.0401246744776388e-05, "loss": 0.4007, "step": 24874 }, { "epoch": 4.060568956369128, "grad_norm": 3.263575553894043, "learning_rate": 1.0400606118917593e-05, "loss": 0.3403, "step": 24875 }, { "epoch": 4.060732215011632, "grad_norm": 2.595427989959717, "learning_rate": 1.0399965491412058e-05, "loss": 0.3263, "step": 24876 }, { "epoch": 4.060895473654137, "grad_norm": 3.6080310344696045, "learning_rate": 1.039932486226242e-05, "loss": 0.3367, "step": 24877 }, { "epoch": 4.061058732296641, "grad_norm": 3.7849273681640625, "learning_rate": 1.0398684231471306e-05, "loss": 0.4391, "step": 24878 }, { "epoch": 4.061221990939146, "grad_norm": 3.8126471042633057, "learning_rate": 1.039804359904135e-05, "loss": 0.4216, "step": 24879 }, { "epoch": 4.06138524958165, "grad_norm": 3.7902069091796875, "learning_rate": 1.0397402964975187e-05, "loss": 0.367, "step": 24880 }, { "epoch": 4.0615485082241545, "grad_norm": 3.677860975265503, "learning_rate": 1.039676232927545e-05, "loss": 0.4064, "step": 24881 }, { "epoch": 4.061711766866659, "grad_norm": 3.3820271492004395, "learning_rate": 1.0396121691944777e-05, "loss": 0.3452, "step": 24882 }, { "epoch": 4.0618750255091625, "grad_norm": 3.410759925842285, "learning_rate": 1.0395481052985794e-05, "loss": 0.378, "step": 24883 }, { "epoch": 4.062038284151667, "grad_norm": 3.2906692028045654, "learning_rate": 1.039484041240114e-05, "loss": 0.3856, "step": 24884 }, { "epoch": 4.062201542794171, "grad_norm": 3.751437187194824, "learning_rate": 1.0394199770193442e-05, "loss": 0.3238, "step": 24885 }, { "epoch": 4.062364801436676, "grad_norm": 3.505798101425171, "learning_rate": 1.0393559126365343e-05, "loss": 0.3703, "step": 24886 }, { "epoch": 4.06252806007918, "grad_norm": 3.6228883266448975, "learning_rate": 1.0392918480919467e-05, "loss": 0.3784, "step": 24887 }, { "epoch": 4.062691318721685, "grad_norm": 3.4515111446380615, "learning_rate": 1.0392277833858453e-05, "loss": 0.3761, "step": 24888 }, { "epoch": 4.062854577364189, "grad_norm": 3.8676071166992188, "learning_rate": 1.0391637185184933e-05, "loss": 0.3649, "step": 24889 }, { "epoch": 4.0630178360066935, "grad_norm": 2.569533109664917, "learning_rate": 1.0390996534901538e-05, "loss": 0.3041, "step": 24890 }, { "epoch": 4.063181094649198, "grad_norm": 3.4798600673675537, "learning_rate": 1.0390355883010907e-05, "loss": 0.3502, "step": 24891 }, { "epoch": 4.063344353291702, "grad_norm": 3.3217391967773438, "learning_rate": 1.0389715229515668e-05, "loss": 0.3373, "step": 24892 }, { "epoch": 4.063507611934207, "grad_norm": 4.099194526672363, "learning_rate": 1.038907457441846e-05, "loss": 0.4374, "step": 24893 }, { "epoch": 4.063670870576711, "grad_norm": 3.4260504245758057, "learning_rate": 1.0388433917721911e-05, "loss": 0.3419, "step": 24894 }, { "epoch": 4.063834129219216, "grad_norm": 4.584232807159424, "learning_rate": 1.0387793259428658e-05, "loss": 0.4421, "step": 24895 }, { "epoch": 4.06399738786172, "grad_norm": 3.16788649559021, "learning_rate": 1.0387152599541334e-05, "loss": 0.3724, "step": 24896 }, { "epoch": 4.064160646504225, "grad_norm": 3.276446580886841, "learning_rate": 1.038651193806257e-05, "loss": 0.3778, "step": 24897 }, { "epoch": 4.064323905146729, "grad_norm": 3.203070878982544, "learning_rate": 1.0385871274995e-05, "loss": 0.3875, "step": 24898 }, { "epoch": 4.0644871637892335, "grad_norm": 3.8526954650878906, "learning_rate": 1.0385230610341261e-05, "loss": 0.3824, "step": 24899 }, { "epoch": 4.064650422431738, "grad_norm": 4.329887866973877, "learning_rate": 1.0384589944103984e-05, "loss": 0.4378, "step": 24900 }, { "epoch": 4.0648136810742415, "grad_norm": 3.7330431938171387, "learning_rate": 1.0383949276285804e-05, "loss": 0.3695, "step": 24901 }, { "epoch": 4.064976939716746, "grad_norm": 3.060580015182495, "learning_rate": 1.0383308606889355e-05, "loss": 0.3054, "step": 24902 }, { "epoch": 4.06514019835925, "grad_norm": 3.214141845703125, "learning_rate": 1.0382667935917264e-05, "loss": 0.3929, "step": 24903 }, { "epoch": 4.065303457001755, "grad_norm": 3.6263012886047363, "learning_rate": 1.0382027263372171e-05, "loss": 0.3577, "step": 24904 }, { "epoch": 4.065466715644259, "grad_norm": 4.389400482177734, "learning_rate": 1.038138658925671e-05, "loss": 0.3529, "step": 24905 }, { "epoch": 4.065629974286764, "grad_norm": 3.4511590003967285, "learning_rate": 1.0380745913573516e-05, "loss": 0.3476, "step": 24906 }, { "epoch": 4.065793232929268, "grad_norm": 3.704097032546997, "learning_rate": 1.0380105236325216e-05, "loss": 0.3402, "step": 24907 }, { "epoch": 4.0659564915717725, "grad_norm": 3.2948615550994873, "learning_rate": 1.0379464557514442e-05, "loss": 0.3668, "step": 24908 }, { "epoch": 4.066119750214277, "grad_norm": 3.1165504455566406, "learning_rate": 1.0378823877143838e-05, "loss": 0.3185, "step": 24909 }, { "epoch": 4.066283008856781, "grad_norm": 3.650904655456543, "learning_rate": 1.0378183195216033e-05, "loss": 0.4112, "step": 24910 }, { "epoch": 4.066446267499286, "grad_norm": 2.96651554107666, "learning_rate": 1.0377542511733657e-05, "loss": 0.3386, "step": 24911 }, { "epoch": 4.06660952614179, "grad_norm": 4.083104610443115, "learning_rate": 1.0376901826699349e-05, "loss": 0.4214, "step": 24912 }, { "epoch": 4.066772784784295, "grad_norm": 3.508948802947998, "learning_rate": 1.0376261140115737e-05, "loss": 0.373, "step": 24913 }, { "epoch": 4.066936043426799, "grad_norm": 3.1931498050689697, "learning_rate": 1.0375620451985455e-05, "loss": 0.3476, "step": 24914 }, { "epoch": 4.067099302069304, "grad_norm": 3.0974879264831543, "learning_rate": 1.0374979762311144e-05, "loss": 0.3329, "step": 24915 }, { "epoch": 4.067262560711808, "grad_norm": 3.2434239387512207, "learning_rate": 1.0374339071095434e-05, "loss": 0.4187, "step": 24916 }, { "epoch": 4.0674258193543125, "grad_norm": 2.9877147674560547, "learning_rate": 1.0373698378340952e-05, "loss": 0.3431, "step": 24917 }, { "epoch": 4.067589077996816, "grad_norm": 3.17149019241333, "learning_rate": 1.0373057684050343e-05, "loss": 0.3298, "step": 24918 }, { "epoch": 4.0677523366393205, "grad_norm": 3.5939407348632812, "learning_rate": 1.0372416988226231e-05, "loss": 0.4057, "step": 24919 }, { "epoch": 4.067915595281825, "grad_norm": 3.435957908630371, "learning_rate": 1.0371776290871252e-05, "loss": 0.3734, "step": 24920 }, { "epoch": 4.068078853924329, "grad_norm": 3.961127996444702, "learning_rate": 1.0371135591988044e-05, "loss": 0.36, "step": 24921 }, { "epoch": 4.068242112566834, "grad_norm": 3.70805287361145, "learning_rate": 1.0370494891579237e-05, "loss": 0.35, "step": 24922 }, { "epoch": 4.068405371209338, "grad_norm": 3.7436370849609375, "learning_rate": 1.0369854189647466e-05, "loss": 0.3953, "step": 24923 }, { "epoch": 4.068568629851843, "grad_norm": 3.2717039585113525, "learning_rate": 1.0369213486195361e-05, "loss": 0.3277, "step": 24924 }, { "epoch": 4.068731888494347, "grad_norm": 3.3565711975097656, "learning_rate": 1.0368572781225561e-05, "loss": 0.3363, "step": 24925 }, { "epoch": 4.0688951471368515, "grad_norm": 3.3120391368865967, "learning_rate": 1.0367932074740699e-05, "loss": 0.3701, "step": 24926 }, { "epoch": 4.069058405779356, "grad_norm": 3.204596996307373, "learning_rate": 1.0367291366743406e-05, "loss": 0.3398, "step": 24927 }, { "epoch": 4.06922166442186, "grad_norm": 5.212648868560791, "learning_rate": 1.0366650657236317e-05, "loss": 0.3798, "step": 24928 }, { "epoch": 4.069384923064365, "grad_norm": 3.8069896697998047, "learning_rate": 1.0366009946222064e-05, "loss": 0.4109, "step": 24929 }, { "epoch": 4.069548181706869, "grad_norm": 3.2432515621185303, "learning_rate": 1.0365369233703284e-05, "loss": 0.3633, "step": 24930 }, { "epoch": 4.069711440349374, "grad_norm": 3.19132399559021, "learning_rate": 1.0364728519682607e-05, "loss": 0.3548, "step": 24931 }, { "epoch": 4.069874698991878, "grad_norm": 2.9401252269744873, "learning_rate": 1.036408780416267e-05, "loss": 0.3313, "step": 24932 }, { "epoch": 4.070037957634383, "grad_norm": 3.717726707458496, "learning_rate": 1.0363447087146107e-05, "loss": 0.3773, "step": 24933 }, { "epoch": 4.070201216276887, "grad_norm": 3.7130987644195557, "learning_rate": 1.036280636863555e-05, "loss": 0.3753, "step": 24934 }, { "epoch": 4.0703644749193915, "grad_norm": 2.7107019424438477, "learning_rate": 1.0362165648633631e-05, "loss": 0.3221, "step": 24935 }, { "epoch": 4.070527733561895, "grad_norm": 3.443483591079712, "learning_rate": 1.0361524927142987e-05, "loss": 0.34, "step": 24936 }, { "epoch": 4.0706909922043994, "grad_norm": 4.261678695678711, "learning_rate": 1.036088420416625e-05, "loss": 0.4445, "step": 24937 }, { "epoch": 4.070854250846904, "grad_norm": 3.8099772930145264, "learning_rate": 1.0360243479706053e-05, "loss": 0.3977, "step": 24938 }, { "epoch": 4.071017509489408, "grad_norm": 3.2411344051361084, "learning_rate": 1.0359602753765035e-05, "loss": 0.3858, "step": 24939 }, { "epoch": 4.071180768131913, "grad_norm": 3.3368122577667236, "learning_rate": 1.0358962026345824e-05, "loss": 0.3345, "step": 24940 }, { "epoch": 4.071344026774417, "grad_norm": 2.7095375061035156, "learning_rate": 1.0358321297451055e-05, "loss": 0.3355, "step": 24941 }, { "epoch": 4.071507285416922, "grad_norm": 3.829533338546753, "learning_rate": 1.0357680567083362e-05, "loss": 0.394, "step": 24942 }, { "epoch": 4.071670544059426, "grad_norm": 3.5620920658111572, "learning_rate": 1.0357039835245379e-05, "loss": 0.3858, "step": 24943 }, { "epoch": 4.0718338027019305, "grad_norm": 3.301622152328491, "learning_rate": 1.0356399101939743e-05, "loss": 0.3798, "step": 24944 }, { "epoch": 4.071997061344435, "grad_norm": 3.6470611095428467, "learning_rate": 1.0355758367169084e-05, "loss": 0.3798, "step": 24945 }, { "epoch": 4.072160319986939, "grad_norm": 3.8640923500061035, "learning_rate": 1.0355117630936035e-05, "loss": 0.3656, "step": 24946 }, { "epoch": 4.072323578629444, "grad_norm": 3.649761915206909, "learning_rate": 1.0354476893243231e-05, "loss": 0.4631, "step": 24947 }, { "epoch": 4.072486837271948, "grad_norm": 3.3730502128601074, "learning_rate": 1.0353836154093307e-05, "loss": 0.412, "step": 24948 }, { "epoch": 4.072650095914453, "grad_norm": 3.8656415939331055, "learning_rate": 1.0353195413488897e-05, "loss": 0.405, "step": 24949 }, { "epoch": 4.072813354556957, "grad_norm": 3.606278419494629, "learning_rate": 1.0352554671432635e-05, "loss": 0.3808, "step": 24950 }, { "epoch": 4.072976613199462, "grad_norm": 3.6600775718688965, "learning_rate": 1.0351913927927152e-05, "loss": 0.4219, "step": 24951 }, { "epoch": 4.073139871841966, "grad_norm": 2.6165575981140137, "learning_rate": 1.0351273182975081e-05, "loss": 0.3018, "step": 24952 }, { "epoch": 4.0733031304844705, "grad_norm": 2.9261488914489746, "learning_rate": 1.0350632436579063e-05, "loss": 0.3307, "step": 24953 }, { "epoch": 4.073466389126974, "grad_norm": 3.1949164867401123, "learning_rate": 1.0349991688741725e-05, "loss": 0.3754, "step": 24954 }, { "epoch": 4.0736296477694784, "grad_norm": 3.328843116760254, "learning_rate": 1.0349350939465708e-05, "loss": 0.3939, "step": 24955 }, { "epoch": 4.073792906411983, "grad_norm": 3.967833995819092, "learning_rate": 1.0348710188753637e-05, "loss": 0.3969, "step": 24956 }, { "epoch": 4.073956165054487, "grad_norm": 3.4849257469177246, "learning_rate": 1.034806943660815e-05, "loss": 0.3661, "step": 24957 }, { "epoch": 4.074119423696992, "grad_norm": 3.225609064102173, "learning_rate": 1.0347428683031882e-05, "loss": 0.3555, "step": 24958 }, { "epoch": 4.074282682339496, "grad_norm": 3.5008418560028076, "learning_rate": 1.0346787928027464e-05, "loss": 0.4343, "step": 24959 }, { "epoch": 4.074445940982001, "grad_norm": 4.125208854675293, "learning_rate": 1.0346147171597536e-05, "loss": 0.4189, "step": 24960 }, { "epoch": 4.074609199624505, "grad_norm": 3.441025495529175, "learning_rate": 1.0345506413744726e-05, "loss": 0.3585, "step": 24961 }, { "epoch": 4.0747724582670095, "grad_norm": 3.9753634929656982, "learning_rate": 1.0344865654471668e-05, "loss": 0.4211, "step": 24962 }, { "epoch": 4.074935716909514, "grad_norm": 4.1857733726501465, "learning_rate": 1.0344224893780998e-05, "loss": 0.3977, "step": 24963 }, { "epoch": 4.075098975552018, "grad_norm": 3.834156036376953, "learning_rate": 1.0343584131675346e-05, "loss": 0.4525, "step": 24964 }, { "epoch": 4.075262234194523, "grad_norm": 3.3526291847229004, "learning_rate": 1.0342943368157356e-05, "loss": 0.4011, "step": 24965 }, { "epoch": 4.075425492837027, "grad_norm": 3.0564138889312744, "learning_rate": 1.0342302603229653e-05, "loss": 0.334, "step": 24966 }, { "epoch": 4.075588751479532, "grad_norm": 3.663482904434204, "learning_rate": 1.0341661836894872e-05, "loss": 0.3929, "step": 24967 }, { "epoch": 4.075752010122036, "grad_norm": 3.1620991230010986, "learning_rate": 1.0341021069155648e-05, "loss": 0.3857, "step": 24968 }, { "epoch": 4.075915268764541, "grad_norm": 3.045570135116577, "learning_rate": 1.0340380300014617e-05, "loss": 0.3462, "step": 24969 }, { "epoch": 4.076078527407045, "grad_norm": 3.0229685306549072, "learning_rate": 1.033973952947441e-05, "loss": 0.3385, "step": 24970 }, { "epoch": 4.076241786049549, "grad_norm": 3.888592481613159, "learning_rate": 1.0339098757537662e-05, "loss": 0.4193, "step": 24971 }, { "epoch": 4.076405044692053, "grad_norm": 3.552530527114868, "learning_rate": 1.0338457984207006e-05, "loss": 0.3727, "step": 24972 }, { "epoch": 4.076568303334557, "grad_norm": 4.155571460723877, "learning_rate": 1.033781720948508e-05, "loss": 0.3779, "step": 24973 }, { "epoch": 4.076731561977062, "grad_norm": 3.362746000289917, "learning_rate": 1.0337176433374512e-05, "loss": 0.4049, "step": 24974 }, { "epoch": 4.076894820619566, "grad_norm": 3.3580312728881836, "learning_rate": 1.0336535655877942e-05, "loss": 0.3751, "step": 24975 }, { "epoch": 4.077058079262071, "grad_norm": 3.3517262935638428, "learning_rate": 1.0335894876997999e-05, "loss": 0.3621, "step": 24976 }, { "epoch": 4.077221337904575, "grad_norm": 3.6742453575134277, "learning_rate": 1.0335254096737317e-05, "loss": 0.4349, "step": 24977 }, { "epoch": 4.07738459654708, "grad_norm": 2.7301366329193115, "learning_rate": 1.0334613315098536e-05, "loss": 0.3401, "step": 24978 }, { "epoch": 4.077547855189584, "grad_norm": 3.908998489379883, "learning_rate": 1.0333972532084284e-05, "loss": 0.4128, "step": 24979 }, { "epoch": 4.0777111138320885, "grad_norm": 3.3631725311279297, "learning_rate": 1.0333331747697197e-05, "loss": 0.362, "step": 24980 }, { "epoch": 4.077874372474593, "grad_norm": 3.7169477939605713, "learning_rate": 1.0332690961939908e-05, "loss": 0.3574, "step": 24981 }, { "epoch": 4.078037631117097, "grad_norm": 3.3575122356414795, "learning_rate": 1.0332050174815055e-05, "loss": 0.3814, "step": 24982 }, { "epoch": 4.078200889759602, "grad_norm": 3.986912488937378, "learning_rate": 1.0331409386325266e-05, "loss": 0.4372, "step": 24983 }, { "epoch": 4.078364148402106, "grad_norm": 3.933784008026123, "learning_rate": 1.0330768596473182e-05, "loss": 0.3994, "step": 24984 }, { "epoch": 4.078527407044611, "grad_norm": 3.1048598289489746, "learning_rate": 1.033012780526143e-05, "loss": 0.3452, "step": 24985 }, { "epoch": 4.078690665687115, "grad_norm": 3.435089588165283, "learning_rate": 1.0329487012692647e-05, "loss": 0.3957, "step": 24986 }, { "epoch": 4.07885392432962, "grad_norm": 3.4864416122436523, "learning_rate": 1.0328846218769468e-05, "loss": 0.3673, "step": 24987 }, { "epoch": 4.079017182972124, "grad_norm": 3.5391485691070557, "learning_rate": 1.0328205423494526e-05, "loss": 0.4473, "step": 24988 }, { "epoch": 4.079180441614628, "grad_norm": 3.4158942699432373, "learning_rate": 1.0327564626870459e-05, "loss": 0.3583, "step": 24989 }, { "epoch": 4.079343700257132, "grad_norm": 2.9413936138153076, "learning_rate": 1.0326923828899895e-05, "loss": 0.3472, "step": 24990 }, { "epoch": 4.079506958899636, "grad_norm": 2.495347499847412, "learning_rate": 1.0326283029585468e-05, "loss": 0.3144, "step": 24991 }, { "epoch": 4.079670217542141, "grad_norm": 2.7759199142456055, "learning_rate": 1.0325642228929817e-05, "loss": 0.3493, "step": 24992 }, { "epoch": 4.079833476184645, "grad_norm": 3.0162222385406494, "learning_rate": 1.0325001426935573e-05, "loss": 0.3556, "step": 24993 }, { "epoch": 4.07999673482715, "grad_norm": 4.068119525909424, "learning_rate": 1.0324360623605374e-05, "loss": 0.4513, "step": 24994 }, { "epoch": 4.080159993469654, "grad_norm": 3.4146645069122314, "learning_rate": 1.032371981894185e-05, "loss": 0.3163, "step": 24995 }, { "epoch": 4.080323252112159, "grad_norm": 4.114496231079102, "learning_rate": 1.0323079012947629e-05, "loss": 0.3849, "step": 24996 }, { "epoch": 4.080486510754663, "grad_norm": 3.627185344696045, "learning_rate": 1.0322438205625361e-05, "loss": 0.424, "step": 24997 }, { "epoch": 4.0806497693971675, "grad_norm": 4.157701015472412, "learning_rate": 1.0321797396977666e-05, "loss": 0.4093, "step": 24998 }, { "epoch": 4.080813028039672, "grad_norm": 3.9685630798339844, "learning_rate": 1.0321156587007188e-05, "loss": 0.3848, "step": 24999 }, { "epoch": 4.080976286682176, "grad_norm": 3.254021167755127, "learning_rate": 1.0320515775716556e-05, "loss": 0.3561, "step": 25000 }, { "epoch": 4.081139545324681, "grad_norm": 2.7033884525299072, "learning_rate": 1.03198749631084e-05, "loss": 0.2811, "step": 25001 }, { "epoch": 4.081302803967185, "grad_norm": 3.2173373699188232, "learning_rate": 1.0319234149185359e-05, "loss": 0.3625, "step": 25002 }, { "epoch": 4.08146606260969, "grad_norm": 3.9075636863708496, "learning_rate": 1.031859333395007e-05, "loss": 0.3783, "step": 25003 }, { "epoch": 4.081629321252194, "grad_norm": 3.470501184463501, "learning_rate": 1.0317952517405165e-05, "loss": 0.3548, "step": 25004 }, { "epoch": 4.081792579894699, "grad_norm": 3.125166893005371, "learning_rate": 1.0317311699553276e-05, "loss": 0.3606, "step": 25005 }, { "epoch": 4.081955838537203, "grad_norm": 3.3124806880950928, "learning_rate": 1.031667088039704e-05, "loss": 0.3628, "step": 25006 }, { "epoch": 4.082119097179707, "grad_norm": 3.3019654750823975, "learning_rate": 1.0316030059939087e-05, "loss": 0.3634, "step": 25007 }, { "epoch": 4.082282355822211, "grad_norm": 3.2526237964630127, "learning_rate": 1.0315389238182052e-05, "loss": 0.3008, "step": 25008 }, { "epoch": 4.082445614464715, "grad_norm": 3.871209144592285, "learning_rate": 1.0314748415128575e-05, "loss": 0.4065, "step": 25009 }, { "epoch": 4.08260887310722, "grad_norm": 2.8909800052642822, "learning_rate": 1.0314107590781284e-05, "loss": 0.3276, "step": 25010 }, { "epoch": 4.082772131749724, "grad_norm": 3.7668421268463135, "learning_rate": 1.0313466765142817e-05, "loss": 0.3379, "step": 25011 }, { "epoch": 4.082935390392229, "grad_norm": 3.3220269680023193, "learning_rate": 1.0312825938215804e-05, "loss": 0.379, "step": 25012 }, { "epoch": 4.083098649034733, "grad_norm": 3.7046291828155518, "learning_rate": 1.0312185110002883e-05, "loss": 0.4291, "step": 25013 }, { "epoch": 4.083261907677238, "grad_norm": 3.3831028938293457, "learning_rate": 1.0311544280506687e-05, "loss": 0.3478, "step": 25014 }, { "epoch": 4.083425166319742, "grad_norm": 3.4899539947509766, "learning_rate": 1.031090344972985e-05, "loss": 0.3691, "step": 25015 }, { "epoch": 4.0835884249622465, "grad_norm": 4.1432600021362305, "learning_rate": 1.0310262617675007e-05, "loss": 0.4746, "step": 25016 }, { "epoch": 4.083751683604751, "grad_norm": 4.325923919677734, "learning_rate": 1.030962178434479e-05, "loss": 0.4588, "step": 25017 }, { "epoch": 4.083914942247255, "grad_norm": 3.0213003158569336, "learning_rate": 1.0308980949741836e-05, "loss": 0.3074, "step": 25018 }, { "epoch": 4.08407820088976, "grad_norm": 3.5794482231140137, "learning_rate": 1.0308340113868777e-05, "loss": 0.3547, "step": 25019 }, { "epoch": 4.084241459532264, "grad_norm": 3.796066999435425, "learning_rate": 1.030769927672825e-05, "loss": 0.3631, "step": 25020 }, { "epoch": 4.084404718174769, "grad_norm": 3.678462266921997, "learning_rate": 1.0307058438322886e-05, "loss": 0.4452, "step": 25021 }, { "epoch": 4.084567976817273, "grad_norm": 2.7642247676849365, "learning_rate": 1.0306417598655321e-05, "loss": 0.3648, "step": 25022 }, { "epoch": 4.084731235459778, "grad_norm": 3.7135326862335205, "learning_rate": 1.030577675772819e-05, "loss": 0.3788, "step": 25023 }, { "epoch": 4.084894494102281, "grad_norm": 3.8587985038757324, "learning_rate": 1.0305135915544124e-05, "loss": 0.4638, "step": 25024 }, { "epoch": 4.0850577527447856, "grad_norm": 3.164862632751465, "learning_rate": 1.030449507210576e-05, "loss": 0.3507, "step": 25025 }, { "epoch": 4.08522101138729, "grad_norm": 3.598888397216797, "learning_rate": 1.0303854227415733e-05, "loss": 0.3719, "step": 25026 }, { "epoch": 4.085384270029794, "grad_norm": 3.2538058757781982, "learning_rate": 1.0303213381476675e-05, "loss": 0.3578, "step": 25027 }, { "epoch": 4.085547528672299, "grad_norm": 3.328186511993408, "learning_rate": 1.0302572534291224e-05, "loss": 0.3341, "step": 25028 }, { "epoch": 4.085710787314803, "grad_norm": 2.850738525390625, "learning_rate": 1.0301931685862009e-05, "loss": 0.3366, "step": 25029 }, { "epoch": 4.085874045957308, "grad_norm": 2.6250410079956055, "learning_rate": 1.0301290836191664e-05, "loss": 0.3094, "step": 25030 }, { "epoch": 4.086037304599812, "grad_norm": 3.8023176193237305, "learning_rate": 1.030064998528283e-05, "loss": 0.3708, "step": 25031 }, { "epoch": 4.086200563242317, "grad_norm": 2.381748914718628, "learning_rate": 1.0300009133138135e-05, "loss": 0.3345, "step": 25032 }, { "epoch": 4.086363821884821, "grad_norm": 3.183206796646118, "learning_rate": 1.029936827976022e-05, "loss": 0.3347, "step": 25033 }, { "epoch": 4.0865270805273255, "grad_norm": 3.2381038665771484, "learning_rate": 1.0298727425151714e-05, "loss": 0.4081, "step": 25034 }, { "epoch": 4.08669033916983, "grad_norm": 3.6530849933624268, "learning_rate": 1.029808656931525e-05, "loss": 0.3657, "step": 25035 }, { "epoch": 4.086853597812334, "grad_norm": 2.7355308532714844, "learning_rate": 1.0297445712253465e-05, "loss": 0.3317, "step": 25036 }, { "epoch": 4.087016856454839, "grad_norm": 3.4610843658447266, "learning_rate": 1.0296804853968992e-05, "loss": 0.4218, "step": 25037 }, { "epoch": 4.087180115097343, "grad_norm": 3.0888571739196777, "learning_rate": 1.029616399446447e-05, "loss": 0.3402, "step": 25038 }, { "epoch": 4.087343373739848, "grad_norm": 3.6423494815826416, "learning_rate": 1.0295523133742526e-05, "loss": 0.3989, "step": 25039 }, { "epoch": 4.087506632382352, "grad_norm": 3.411383628845215, "learning_rate": 1.02948822718058e-05, "loss": 0.3346, "step": 25040 }, { "epoch": 4.087669891024857, "grad_norm": 3.824856996536255, "learning_rate": 1.0294241408656921e-05, "loss": 0.4257, "step": 25041 }, { "epoch": 4.08783314966736, "grad_norm": 3.2677764892578125, "learning_rate": 1.029360054429853e-05, "loss": 0.3203, "step": 25042 }, { "epoch": 4.0879964083098645, "grad_norm": 3.190596103668213, "learning_rate": 1.0292959678733258e-05, "loss": 0.3561, "step": 25043 }, { "epoch": 4.088159666952369, "grad_norm": 3.492520809173584, "learning_rate": 1.0292318811963739e-05, "loss": 0.6674, "step": 25044 }, { "epoch": 4.088322925594873, "grad_norm": 3.376227617263794, "learning_rate": 1.0291677943992608e-05, "loss": 0.3624, "step": 25045 }, { "epoch": 4.088486184237378, "grad_norm": 2.8602144718170166, "learning_rate": 1.0291037074822496e-05, "loss": 0.321, "step": 25046 }, { "epoch": 4.088649442879882, "grad_norm": 3.6706702709198, "learning_rate": 1.0290396204456043e-05, "loss": 0.3679, "step": 25047 }, { "epoch": 4.088812701522387, "grad_norm": 3.079193353652954, "learning_rate": 1.0289755332895883e-05, "loss": 0.3708, "step": 25048 }, { "epoch": 4.088975960164891, "grad_norm": 2.843824625015259, "learning_rate": 1.0289114460144648e-05, "loss": 0.3165, "step": 25049 }, { "epoch": 4.089139218807396, "grad_norm": 2.5546348094940186, "learning_rate": 1.028847358620497e-05, "loss": 0.2729, "step": 25050 }, { "epoch": 4.0893024774499, "grad_norm": 2.899808645248413, "learning_rate": 1.0287832711079486e-05, "loss": 0.299, "step": 25051 }, { "epoch": 4.0894657360924045, "grad_norm": 3.4399352073669434, "learning_rate": 1.0287191834770832e-05, "loss": 0.3197, "step": 25052 }, { "epoch": 4.089628994734909, "grad_norm": 3.3084893226623535, "learning_rate": 1.028655095728164e-05, "loss": 0.3487, "step": 25053 }, { "epoch": 4.089792253377413, "grad_norm": 3.369499921798706, "learning_rate": 1.0285910078614546e-05, "loss": 0.3805, "step": 25054 }, { "epoch": 4.089955512019918, "grad_norm": 3.7961807250976562, "learning_rate": 1.0285269198772183e-05, "loss": 0.3378, "step": 25055 }, { "epoch": 4.090118770662422, "grad_norm": 3.3248376846313477, "learning_rate": 1.0284628317757186e-05, "loss": 0.3648, "step": 25056 }, { "epoch": 4.090282029304927, "grad_norm": 4.076923847198486, "learning_rate": 1.028398743557219e-05, "loss": 0.4067, "step": 25057 }, { "epoch": 4.090445287947431, "grad_norm": 3.833143711090088, "learning_rate": 1.0283346552219826e-05, "loss": 0.3839, "step": 25058 }, { "epoch": 4.090608546589935, "grad_norm": 3.200003147125244, "learning_rate": 1.0282705667702734e-05, "loss": 0.3828, "step": 25059 }, { "epoch": 4.090771805232439, "grad_norm": 3.9563169479370117, "learning_rate": 1.0282064782023547e-05, "loss": 0.4344, "step": 25060 }, { "epoch": 4.0909350638749435, "grad_norm": 3.3186182975769043, "learning_rate": 1.0281423895184894e-05, "loss": 0.3718, "step": 25061 }, { "epoch": 4.091098322517448, "grad_norm": 4.035313129425049, "learning_rate": 1.0280783007189416e-05, "loss": 0.3576, "step": 25062 }, { "epoch": 4.091261581159952, "grad_norm": 3.203073263168335, "learning_rate": 1.0280142118039747e-05, "loss": 0.334, "step": 25063 }, { "epoch": 4.091424839802457, "grad_norm": 3.6957592964172363, "learning_rate": 1.0279501227738518e-05, "loss": 0.4059, "step": 25064 }, { "epoch": 4.091588098444961, "grad_norm": 3.0067198276519775, "learning_rate": 1.0278860336288363e-05, "loss": 0.3528, "step": 25065 }, { "epoch": 4.091751357087466, "grad_norm": 3.3637726306915283, "learning_rate": 1.0278219443691922e-05, "loss": 0.3209, "step": 25066 }, { "epoch": 4.09191461572997, "grad_norm": 3.1801772117614746, "learning_rate": 1.0277578549951822e-05, "loss": 0.3265, "step": 25067 }, { "epoch": 4.092077874372475, "grad_norm": 2.976229190826416, "learning_rate": 1.0276937655070705e-05, "loss": 0.3608, "step": 25068 }, { "epoch": 4.092241133014979, "grad_norm": 3.620293378829956, "learning_rate": 1.0276296759051199e-05, "loss": 0.4378, "step": 25069 }, { "epoch": 4.0924043916574835, "grad_norm": 3.454479932785034, "learning_rate": 1.0275655861895944e-05, "loss": 0.3469, "step": 25070 }, { "epoch": 4.092567650299988, "grad_norm": 3.935750961303711, "learning_rate": 1.027501496360757e-05, "loss": 0.3722, "step": 25071 }, { "epoch": 4.092730908942492, "grad_norm": 3.299715042114258, "learning_rate": 1.0274374064188717e-05, "loss": 0.3368, "step": 25072 }, { "epoch": 4.092894167584997, "grad_norm": 3.8936784267425537, "learning_rate": 1.027373316364201e-05, "loss": 0.3875, "step": 25073 }, { "epoch": 4.093057426227501, "grad_norm": 3.3007972240448, "learning_rate": 1.0273092261970092e-05, "loss": 0.3793, "step": 25074 }, { "epoch": 4.093220684870006, "grad_norm": 2.7121903896331787, "learning_rate": 1.0272451359175595e-05, "loss": 0.2965, "step": 25075 }, { "epoch": 4.09338394351251, "grad_norm": 3.76328706741333, "learning_rate": 1.0271810455261154e-05, "loss": 0.3708, "step": 25076 }, { "epoch": 4.093547202155014, "grad_norm": 4.076155662536621, "learning_rate": 1.0271169550229404e-05, "loss": 0.4115, "step": 25077 }, { "epoch": 4.093710460797518, "grad_norm": 2.734718084335327, "learning_rate": 1.0270528644082975e-05, "loss": 0.3009, "step": 25078 }, { "epoch": 4.0938737194400225, "grad_norm": 3.995725631713867, "learning_rate": 1.0269887736824504e-05, "loss": 0.403, "step": 25079 }, { "epoch": 4.094036978082527, "grad_norm": 3.9760329723358154, "learning_rate": 1.026924682845663e-05, "loss": 0.3858, "step": 25080 }, { "epoch": 4.094200236725031, "grad_norm": 3.0277857780456543, "learning_rate": 1.0268605918981981e-05, "loss": 0.3878, "step": 25081 }, { "epoch": 4.094363495367536, "grad_norm": 3.241549015045166, "learning_rate": 1.0267965008403199e-05, "loss": 0.3814, "step": 25082 }, { "epoch": 4.09452675401004, "grad_norm": 3.257174253463745, "learning_rate": 1.026732409672291e-05, "loss": 0.3176, "step": 25083 }, { "epoch": 4.094690012652545, "grad_norm": 2.6905105113983154, "learning_rate": 1.0266683183943755e-05, "loss": 0.3358, "step": 25084 }, { "epoch": 4.094853271295049, "grad_norm": 2.833268880844116, "learning_rate": 1.0266042270068362e-05, "loss": 0.3338, "step": 25085 }, { "epoch": 4.095016529937554, "grad_norm": 3.120702028274536, "learning_rate": 1.0265401355099374e-05, "loss": 0.3337, "step": 25086 }, { "epoch": 4.095179788580058, "grad_norm": 4.2421369552612305, "learning_rate": 1.026476043903942e-05, "loss": 0.3626, "step": 25087 }, { "epoch": 4.0953430472225625, "grad_norm": 3.6700479984283447, "learning_rate": 1.0264119521891137e-05, "loss": 0.3627, "step": 25088 }, { "epoch": 4.095506305865067, "grad_norm": 2.8414740562438965, "learning_rate": 1.0263478603657155e-05, "loss": 0.3193, "step": 25089 }, { "epoch": 4.095669564507571, "grad_norm": 3.5886173248291016, "learning_rate": 1.0262837684340113e-05, "loss": 0.3745, "step": 25090 }, { "epoch": 4.095832823150076, "grad_norm": 3.6548290252685547, "learning_rate": 1.0262196763942646e-05, "loss": 0.3672, "step": 25091 }, { "epoch": 4.09599608179258, "grad_norm": 4.270911693572998, "learning_rate": 1.0261555842467388e-05, "loss": 0.4466, "step": 25092 }, { "epoch": 4.096159340435085, "grad_norm": 2.781463861465454, "learning_rate": 1.0260914919916972e-05, "loss": 0.3143, "step": 25093 }, { "epoch": 4.096322599077588, "grad_norm": 3.280388355255127, "learning_rate": 1.0260273996294032e-05, "loss": 0.3499, "step": 25094 }, { "epoch": 4.096485857720093, "grad_norm": 3.188925266265869, "learning_rate": 1.0259633071601206e-05, "loss": 0.3726, "step": 25095 }, { "epoch": 4.096649116362597, "grad_norm": 3.666822910308838, "learning_rate": 1.0258992145841122e-05, "loss": 0.4127, "step": 25096 }, { "epoch": 4.0968123750051015, "grad_norm": 4.490756034851074, "learning_rate": 1.0258351219016424e-05, "loss": 0.4042, "step": 25097 }, { "epoch": 4.096975633647606, "grad_norm": 3.552269220352173, "learning_rate": 1.025771029112974e-05, "loss": 0.4194, "step": 25098 }, { "epoch": 4.09713889229011, "grad_norm": 3.052401304244995, "learning_rate": 1.0257069362183705e-05, "loss": 0.3659, "step": 25099 }, { "epoch": 4.097302150932615, "grad_norm": 2.74033260345459, "learning_rate": 1.0256428432180955e-05, "loss": 0.3185, "step": 25100 }, { "epoch": 4.097465409575119, "grad_norm": 4.581640720367432, "learning_rate": 1.0255787501124126e-05, "loss": 0.4103, "step": 25101 }, { "epoch": 4.097628668217624, "grad_norm": 3.090221881866455, "learning_rate": 1.025514656901585e-05, "loss": 0.3429, "step": 25102 }, { "epoch": 4.097791926860128, "grad_norm": 3.4992566108703613, "learning_rate": 1.0254505635858765e-05, "loss": 0.346, "step": 25103 }, { "epoch": 4.097955185502633, "grad_norm": 3.193420886993408, "learning_rate": 1.0253864701655502e-05, "loss": 0.3878, "step": 25104 }, { "epoch": 4.098118444145137, "grad_norm": 3.5657989978790283, "learning_rate": 1.0253223766408697e-05, "loss": 0.3867, "step": 25105 }, { "epoch": 4.0982817027876415, "grad_norm": 4.218535423278809, "learning_rate": 1.0252582830120986e-05, "loss": 0.4482, "step": 25106 }, { "epoch": 4.098444961430146, "grad_norm": 2.84028959274292, "learning_rate": 1.0251941892794998e-05, "loss": 0.3515, "step": 25107 }, { "epoch": 4.09860822007265, "grad_norm": 3.338057279586792, "learning_rate": 1.0251300954433377e-05, "loss": 0.3901, "step": 25108 }, { "epoch": 4.098771478715155, "grad_norm": 2.8920738697052, "learning_rate": 1.0250660015038749e-05, "loss": 0.352, "step": 25109 }, { "epoch": 4.098934737357659, "grad_norm": 2.872352123260498, "learning_rate": 1.0250019074613755e-05, "loss": 0.3307, "step": 25110 }, { "epoch": 4.099097996000164, "grad_norm": 3.4897971153259277, "learning_rate": 1.0249378133161025e-05, "loss": 0.3951, "step": 25111 }, { "epoch": 4.099261254642667, "grad_norm": 2.921917676925659, "learning_rate": 1.0248737190683198e-05, "loss": 0.3463, "step": 25112 }, { "epoch": 4.099424513285172, "grad_norm": 4.215516567230225, "learning_rate": 1.0248096247182903e-05, "loss": 0.3807, "step": 25113 }, { "epoch": 4.099587771927676, "grad_norm": 3.6587069034576416, "learning_rate": 1.0247455302662782e-05, "loss": 0.3547, "step": 25114 }, { "epoch": 4.0997510305701805, "grad_norm": 3.53202223777771, "learning_rate": 1.0246814357125464e-05, "loss": 0.4178, "step": 25115 }, { "epoch": 4.099914289212685, "grad_norm": 3.6252026557922363, "learning_rate": 1.0246173410573586e-05, "loss": 0.3562, "step": 25116 }, { "epoch": 4.100077547855189, "grad_norm": 3.4354803562164307, "learning_rate": 1.0245532463009779e-05, "loss": 0.381, "step": 25117 }, { "epoch": 4.100240806497694, "grad_norm": 4.125627040863037, "learning_rate": 1.0244891514436683e-05, "loss": 0.3514, "step": 25118 }, { "epoch": 4.100404065140198, "grad_norm": 4.7768988609313965, "learning_rate": 1.024425056485693e-05, "loss": 0.4548, "step": 25119 }, { "epoch": 4.100567323782703, "grad_norm": 4.325094699859619, "learning_rate": 1.0243609614273156e-05, "loss": 0.4054, "step": 25120 }, { "epoch": 4.100730582425207, "grad_norm": 4.27567720413208, "learning_rate": 1.0242968662687996e-05, "loss": 0.4079, "step": 25121 }, { "epoch": 4.100893841067712, "grad_norm": 3.8804991245269775, "learning_rate": 1.0242327710104084e-05, "loss": 0.3974, "step": 25122 }, { "epoch": 4.101057099710216, "grad_norm": 3.5408341884613037, "learning_rate": 1.0241686756524047e-05, "loss": 0.4366, "step": 25123 }, { "epoch": 4.1012203583527205, "grad_norm": 3.6727511882781982, "learning_rate": 1.0241045801950534e-05, "loss": 0.4076, "step": 25124 }, { "epoch": 4.101383616995225, "grad_norm": 3.582808256149292, "learning_rate": 1.024040484638617e-05, "loss": 0.3869, "step": 25125 }, { "epoch": 4.101546875637729, "grad_norm": 2.6803855895996094, "learning_rate": 1.0239763889833597e-05, "loss": 0.3165, "step": 25126 }, { "epoch": 4.101710134280234, "grad_norm": 3.559558153152466, "learning_rate": 1.023912293229544e-05, "loss": 0.3617, "step": 25127 }, { "epoch": 4.101873392922738, "grad_norm": 3.7275898456573486, "learning_rate": 1.0238481973774341e-05, "loss": 0.4192, "step": 25128 }, { "epoch": 4.102036651565243, "grad_norm": 4.063982009887695, "learning_rate": 1.023784101427293e-05, "loss": 0.3703, "step": 25129 }, { "epoch": 4.102199910207746, "grad_norm": 3.070763111114502, "learning_rate": 1.0237200053793845e-05, "loss": 0.3127, "step": 25130 }, { "epoch": 4.102363168850251, "grad_norm": 3.1550052165985107, "learning_rate": 1.0236559092339724e-05, "loss": 0.3824, "step": 25131 }, { "epoch": 4.102526427492755, "grad_norm": 3.411705255508423, "learning_rate": 1.0235918129913196e-05, "loss": 0.3814, "step": 25132 }, { "epoch": 4.1026896861352595, "grad_norm": 3.235711097717285, "learning_rate": 1.0235277166516896e-05, "loss": 0.332, "step": 25133 }, { "epoch": 4.102852944777764, "grad_norm": 3.6560230255126953, "learning_rate": 1.0234636202153461e-05, "loss": 0.3819, "step": 25134 }, { "epoch": 4.103016203420268, "grad_norm": 2.8835480213165283, "learning_rate": 1.0233995236825527e-05, "loss": 0.3744, "step": 25135 }, { "epoch": 4.103179462062773, "grad_norm": 3.8193089962005615, "learning_rate": 1.0233354270535727e-05, "loss": 0.4482, "step": 25136 }, { "epoch": 4.103342720705277, "grad_norm": 3.2266862392425537, "learning_rate": 1.0232713303286694e-05, "loss": 0.3874, "step": 25137 }, { "epoch": 4.103505979347782, "grad_norm": 4.473047256469727, "learning_rate": 1.0232072335081065e-05, "loss": 0.4468, "step": 25138 }, { "epoch": 4.103669237990286, "grad_norm": 3.3468422889709473, "learning_rate": 1.0231431365921474e-05, "loss": 0.3711, "step": 25139 }, { "epoch": 4.103832496632791, "grad_norm": 3.8744306564331055, "learning_rate": 1.0230790395810554e-05, "loss": 0.4121, "step": 25140 }, { "epoch": 4.103995755275295, "grad_norm": 3.281137704849243, "learning_rate": 1.0230149424750946e-05, "loss": 0.405, "step": 25141 }, { "epoch": 4.1041590139177995, "grad_norm": 3.0775146484375, "learning_rate": 1.022950845274528e-05, "loss": 0.3201, "step": 25142 }, { "epoch": 4.104322272560304, "grad_norm": 2.9445765018463135, "learning_rate": 1.022886747979619e-05, "loss": 0.3483, "step": 25143 }, { "epoch": 4.104485531202808, "grad_norm": 3.432309865951538, "learning_rate": 1.022822650590631e-05, "loss": 0.3623, "step": 25144 }, { "epoch": 4.104648789845313, "grad_norm": 4.231047630310059, "learning_rate": 1.0227585531078281e-05, "loss": 0.4092, "step": 25145 }, { "epoch": 4.104812048487817, "grad_norm": 3.2661633491516113, "learning_rate": 1.0226944555314733e-05, "loss": 0.3218, "step": 25146 }, { "epoch": 4.104975307130321, "grad_norm": 3.285850763320923, "learning_rate": 1.02263035786183e-05, "loss": 0.3409, "step": 25147 }, { "epoch": 4.105138565772825, "grad_norm": 3.344583749771118, "learning_rate": 1.022566260099162e-05, "loss": 0.3784, "step": 25148 }, { "epoch": 4.10530182441533, "grad_norm": 3.9586379528045654, "learning_rate": 1.0225021622437323e-05, "loss": 0.4199, "step": 25149 }, { "epoch": 4.105465083057834, "grad_norm": 3.2088887691497803, "learning_rate": 1.0224380642958052e-05, "loss": 0.3406, "step": 25150 }, { "epoch": 4.1056283417003385, "grad_norm": 3.6065468788146973, "learning_rate": 1.0223739662556434e-05, "loss": 0.3999, "step": 25151 }, { "epoch": 4.105791600342843, "grad_norm": 4.144979476928711, "learning_rate": 1.0223098681235106e-05, "loss": 0.4266, "step": 25152 }, { "epoch": 4.105954858985347, "grad_norm": 3.9661765098571777, "learning_rate": 1.0222457698996706e-05, "loss": 0.383, "step": 25153 }, { "epoch": 4.106118117627852, "grad_norm": 2.984358310699463, "learning_rate": 1.0221816715843864e-05, "loss": 0.3415, "step": 25154 }, { "epoch": 4.106281376270356, "grad_norm": 2.8152177333831787, "learning_rate": 1.0221175731779218e-05, "loss": 0.3094, "step": 25155 }, { "epoch": 4.106444634912861, "grad_norm": 3.545593500137329, "learning_rate": 1.0220534746805403e-05, "loss": 0.3574, "step": 25156 }, { "epoch": 4.106607893555365, "grad_norm": 3.4571852684020996, "learning_rate": 1.0219893760925053e-05, "loss": 0.3581, "step": 25157 }, { "epoch": 4.10677115219787, "grad_norm": 4.850034713745117, "learning_rate": 1.0219252774140803e-05, "loss": 1.1051, "step": 25158 }, { "epoch": 4.106934410840374, "grad_norm": 3.359192371368408, "learning_rate": 1.0218611786455286e-05, "loss": 0.3699, "step": 25159 }, { "epoch": 4.1070976694828785, "grad_norm": 3.3111698627471924, "learning_rate": 1.0217970797871139e-05, "loss": 0.3592, "step": 25160 }, { "epoch": 4.107260928125383, "grad_norm": 3.0913708209991455, "learning_rate": 1.0217329808390996e-05, "loss": 0.4048, "step": 25161 }, { "epoch": 4.107424186767887, "grad_norm": 3.736856460571289, "learning_rate": 1.0216688818017494e-05, "loss": 0.4074, "step": 25162 }, { "epoch": 4.107587445410392, "grad_norm": 3.5827136039733887, "learning_rate": 1.0216047826753264e-05, "loss": 0.4088, "step": 25163 }, { "epoch": 4.107750704052896, "grad_norm": 3.553032398223877, "learning_rate": 1.0215406834600943e-05, "loss": 0.3674, "step": 25164 }, { "epoch": 4.1079139626954, "grad_norm": 3.0450384616851807, "learning_rate": 1.021476584156317e-05, "loss": 0.346, "step": 25165 }, { "epoch": 4.108077221337904, "grad_norm": 3.9519524574279785, "learning_rate": 1.0214124847642572e-05, "loss": 0.407, "step": 25166 }, { "epoch": 4.108240479980409, "grad_norm": 3.0741336345672607, "learning_rate": 1.0213483852841786e-05, "loss": 0.3659, "step": 25167 }, { "epoch": 4.108403738622913, "grad_norm": 3.8650927543640137, "learning_rate": 1.021284285716345e-05, "loss": 0.4381, "step": 25168 }, { "epoch": 4.1085669972654175, "grad_norm": 2.889376163482666, "learning_rate": 1.0212201860610197e-05, "loss": 0.3643, "step": 25169 }, { "epoch": 4.108730255907922, "grad_norm": 3.7238688468933105, "learning_rate": 1.0211560863184666e-05, "loss": 0.3937, "step": 25170 }, { "epoch": 4.108893514550426, "grad_norm": 2.9753594398498535, "learning_rate": 1.0210919864889485e-05, "loss": 0.3227, "step": 25171 }, { "epoch": 4.109056773192931, "grad_norm": 2.9443869590759277, "learning_rate": 1.0210278865727292e-05, "loss": 0.3566, "step": 25172 }, { "epoch": 4.109220031835435, "grad_norm": 3.37497878074646, "learning_rate": 1.020963786570072e-05, "loss": 0.3799, "step": 25173 }, { "epoch": 4.10938329047794, "grad_norm": 3.0853617191314697, "learning_rate": 1.020899686481241e-05, "loss": 0.3695, "step": 25174 }, { "epoch": 4.109546549120444, "grad_norm": 2.7107105255126953, "learning_rate": 1.0208355863064993e-05, "loss": 0.3129, "step": 25175 }, { "epoch": 4.109709807762949, "grad_norm": 3.0965914726257324, "learning_rate": 1.0207714860461102e-05, "loss": 0.4165, "step": 25176 }, { "epoch": 4.109873066405453, "grad_norm": 3.1811132431030273, "learning_rate": 1.0207073857003374e-05, "loss": 0.3759, "step": 25177 }, { "epoch": 4.1100363250479575, "grad_norm": 4.1061577796936035, "learning_rate": 1.0206432852694441e-05, "loss": 0.3984, "step": 25178 }, { "epoch": 4.110199583690462, "grad_norm": 3.7324488162994385, "learning_rate": 1.020579184753694e-05, "loss": 0.4199, "step": 25179 }, { "epoch": 4.110362842332966, "grad_norm": 3.8432772159576416, "learning_rate": 1.0205150841533513e-05, "loss": 0.4445, "step": 25180 }, { "epoch": 4.110526100975471, "grad_norm": 4.531417369842529, "learning_rate": 1.0204509834686785e-05, "loss": 0.4053, "step": 25181 }, { "epoch": 4.110689359617975, "grad_norm": 3.1075947284698486, "learning_rate": 1.0203868826999393e-05, "loss": 0.3424, "step": 25182 }, { "epoch": 4.110852618260479, "grad_norm": 3.408677577972412, "learning_rate": 1.0203227818473972e-05, "loss": 0.3727, "step": 25183 }, { "epoch": 4.111015876902983, "grad_norm": 3.992217779159546, "learning_rate": 1.020258680911316e-05, "loss": 0.4687, "step": 25184 }, { "epoch": 4.111179135545488, "grad_norm": 3.162062883377075, "learning_rate": 1.0201945798919593e-05, "loss": 0.3479, "step": 25185 }, { "epoch": 4.111342394187992, "grad_norm": 3.822251796722412, "learning_rate": 1.0201304787895898e-05, "loss": 0.4195, "step": 25186 }, { "epoch": 4.1115056528304965, "grad_norm": 3.7764363288879395, "learning_rate": 1.020066377604472e-05, "loss": 0.3893, "step": 25187 }, { "epoch": 4.111668911473001, "grad_norm": 3.47859787940979, "learning_rate": 1.0200022763368685e-05, "loss": 0.3643, "step": 25188 }, { "epoch": 4.111832170115505, "grad_norm": 3.5654637813568115, "learning_rate": 1.0199381749870432e-05, "loss": 0.3351, "step": 25189 }, { "epoch": 4.11199542875801, "grad_norm": 3.074368715286255, "learning_rate": 1.0198740735552597e-05, "loss": 0.3658, "step": 25190 }, { "epoch": 4.112158687400514, "grad_norm": 3.4278924465179443, "learning_rate": 1.0198099720417816e-05, "loss": 0.3487, "step": 25191 }, { "epoch": 4.112321946043019, "grad_norm": 2.5251781940460205, "learning_rate": 1.0197458704468719e-05, "loss": 0.3194, "step": 25192 }, { "epoch": 4.112485204685523, "grad_norm": 2.665803909301758, "learning_rate": 1.0196817687707946e-05, "loss": 0.3297, "step": 25193 }, { "epoch": 4.112648463328028, "grad_norm": 3.515550136566162, "learning_rate": 1.0196176670138128e-05, "loss": 0.459, "step": 25194 }, { "epoch": 4.112811721970532, "grad_norm": 3.4699113368988037, "learning_rate": 1.01955356517619e-05, "loss": 0.3865, "step": 25195 }, { "epoch": 4.1129749806130365, "grad_norm": 3.264993667602539, "learning_rate": 1.0194894632581903e-05, "loss": 0.4064, "step": 25196 }, { "epoch": 4.113138239255541, "grad_norm": 3.212883234024048, "learning_rate": 1.0194253612600765e-05, "loss": 0.394, "step": 25197 }, { "epoch": 4.113301497898045, "grad_norm": 3.152170419692993, "learning_rate": 1.0193612591821125e-05, "loss": 0.3743, "step": 25198 }, { "epoch": 4.11346475654055, "grad_norm": 3.5486292839050293, "learning_rate": 1.0192971570245615e-05, "loss": 0.3946, "step": 25199 }, { "epoch": 4.113628015183053, "grad_norm": 3.587872266769409, "learning_rate": 1.0192330547876871e-05, "loss": 0.4029, "step": 25200 }, { "epoch": 4.113791273825558, "grad_norm": 3.7453081607818604, "learning_rate": 1.0191689524717532e-05, "loss": 0.3979, "step": 25201 }, { "epoch": 4.113954532468062, "grad_norm": 3.355295181274414, "learning_rate": 1.0191048500770228e-05, "loss": 0.3579, "step": 25202 }, { "epoch": 4.114117791110567, "grad_norm": 3.7782175540924072, "learning_rate": 1.0190407476037598e-05, "loss": 0.3698, "step": 25203 }, { "epoch": 4.114281049753071, "grad_norm": 3.593515634536743, "learning_rate": 1.0189766450522272e-05, "loss": 0.3947, "step": 25204 }, { "epoch": 4.1144443083955755, "grad_norm": 3.1588544845581055, "learning_rate": 1.0189125424226889e-05, "loss": 0.3663, "step": 25205 }, { "epoch": 4.11460756703808, "grad_norm": 3.222888946533203, "learning_rate": 1.0188484397154083e-05, "loss": 0.3579, "step": 25206 }, { "epoch": 4.114770825680584, "grad_norm": 3.941452980041504, "learning_rate": 1.0187843369306487e-05, "loss": 0.4155, "step": 25207 }, { "epoch": 4.114934084323089, "grad_norm": 2.676253318786621, "learning_rate": 1.018720234068674e-05, "loss": 0.3526, "step": 25208 }, { "epoch": 4.115097342965593, "grad_norm": 3.1845102310180664, "learning_rate": 1.0186561311297476e-05, "loss": 0.3434, "step": 25209 }, { "epoch": 4.115260601608098, "grad_norm": 3.3046557903289795, "learning_rate": 1.0185920281141328e-05, "loss": 0.3745, "step": 25210 }, { "epoch": 4.115423860250602, "grad_norm": 3.6350088119506836, "learning_rate": 1.0185279250220927e-05, "loss": 0.4118, "step": 25211 }, { "epoch": 4.115587118893107, "grad_norm": 2.966169834136963, "learning_rate": 1.0184638218538918e-05, "loss": 0.3547, "step": 25212 }, { "epoch": 4.115750377535611, "grad_norm": 3.5847535133361816, "learning_rate": 1.0183997186097929e-05, "loss": 0.4435, "step": 25213 }, { "epoch": 4.1159136361781155, "grad_norm": 3.908595085144043, "learning_rate": 1.01833561529006e-05, "loss": 0.4117, "step": 25214 }, { "epoch": 4.11607689482062, "grad_norm": 3.3188719749450684, "learning_rate": 1.0182715118949563e-05, "loss": 0.4145, "step": 25215 }, { "epoch": 4.116240153463124, "grad_norm": 3.123011827468872, "learning_rate": 1.0182074084247451e-05, "loss": 0.3656, "step": 25216 }, { "epoch": 4.116403412105629, "grad_norm": 3.599533796310425, "learning_rate": 1.0181433048796898e-05, "loss": 0.3678, "step": 25217 }, { "epoch": 4.116566670748132, "grad_norm": 3.052414894104004, "learning_rate": 1.0180792012600544e-05, "loss": 0.2949, "step": 25218 }, { "epoch": 4.116729929390637, "grad_norm": 3.413123607635498, "learning_rate": 1.0180150975661027e-05, "loss": 0.3659, "step": 25219 }, { "epoch": 4.116893188033141, "grad_norm": 3.344944715499878, "learning_rate": 1.0179509937980973e-05, "loss": 0.3382, "step": 25220 }, { "epoch": 4.117056446675646, "grad_norm": 3.799701452255249, "learning_rate": 1.0178868899563023e-05, "loss": 0.4927, "step": 25221 }, { "epoch": 4.11721970531815, "grad_norm": 3.293466329574585, "learning_rate": 1.017822786040981e-05, "loss": 0.4153, "step": 25222 }, { "epoch": 4.1173829639606545, "grad_norm": 2.860534191131592, "learning_rate": 1.0177586820523968e-05, "loss": 0.3106, "step": 25223 }, { "epoch": 4.117546222603159, "grad_norm": 3.217808246612549, "learning_rate": 1.0176945779908138e-05, "loss": 0.3253, "step": 25224 }, { "epoch": 4.117709481245663, "grad_norm": 3.3878180980682373, "learning_rate": 1.0176304738564948e-05, "loss": 0.3461, "step": 25225 }, { "epoch": 4.117872739888168, "grad_norm": 3.726743459701538, "learning_rate": 1.0175663696497036e-05, "loss": 0.3586, "step": 25226 }, { "epoch": 4.118035998530672, "grad_norm": 2.876380443572998, "learning_rate": 1.0175022653707037e-05, "loss": 0.3615, "step": 25227 }, { "epoch": 4.118199257173177, "grad_norm": 3.065001964569092, "learning_rate": 1.0174381610197584e-05, "loss": 0.3955, "step": 25228 }, { "epoch": 4.118362515815681, "grad_norm": 3.4955756664276123, "learning_rate": 1.0173740565971317e-05, "loss": 0.4486, "step": 25229 }, { "epoch": 4.118525774458186, "grad_norm": 3.9134058952331543, "learning_rate": 1.0173099521030867e-05, "loss": 0.4406, "step": 25230 }, { "epoch": 4.11868903310069, "grad_norm": 4.345156669616699, "learning_rate": 1.0172458475378871e-05, "loss": 0.4866, "step": 25231 }, { "epoch": 4.1188522917431944, "grad_norm": 3.255432605743408, "learning_rate": 1.0171817429017962e-05, "loss": 0.3865, "step": 25232 }, { "epoch": 4.119015550385699, "grad_norm": 3.0489494800567627, "learning_rate": 1.0171176381950776e-05, "loss": 0.4463, "step": 25233 }, { "epoch": 4.119178809028203, "grad_norm": 3.36381459236145, "learning_rate": 1.017053533417995e-05, "loss": 0.3683, "step": 25234 }, { "epoch": 4.119342067670708, "grad_norm": 2.8388519287109375, "learning_rate": 1.0169894285708117e-05, "loss": 0.3407, "step": 25235 }, { "epoch": 4.119505326313211, "grad_norm": 3.338221788406372, "learning_rate": 1.0169253236537912e-05, "loss": 0.3385, "step": 25236 }, { "epoch": 4.119668584955716, "grad_norm": 3.522418737411499, "learning_rate": 1.0168612186671973e-05, "loss": 0.349, "step": 25237 }, { "epoch": 4.11983184359822, "grad_norm": 3.568596363067627, "learning_rate": 1.016797113611293e-05, "loss": 0.375, "step": 25238 }, { "epoch": 4.119995102240725, "grad_norm": 3.268765449523926, "learning_rate": 1.0167330084863423e-05, "loss": 0.3683, "step": 25239 }, { "epoch": 4.120158360883229, "grad_norm": 3.061699867248535, "learning_rate": 1.0166689032926085e-05, "loss": 0.323, "step": 25240 }, { "epoch": 4.1203216195257335, "grad_norm": 4.542595863342285, "learning_rate": 1.016604798030355e-05, "loss": 0.4448, "step": 25241 }, { "epoch": 4.120484878168238, "grad_norm": 3.22591495513916, "learning_rate": 1.0165406926998457e-05, "loss": 0.3688, "step": 25242 }, { "epoch": 4.120648136810742, "grad_norm": 3.618981122970581, "learning_rate": 1.0164765873013437e-05, "loss": 0.3714, "step": 25243 }, { "epoch": 4.120811395453247, "grad_norm": 3.569927453994751, "learning_rate": 1.0164124818351126e-05, "loss": 0.383, "step": 25244 }, { "epoch": 4.120974654095751, "grad_norm": 3.500669240951538, "learning_rate": 1.0163483763014162e-05, "loss": 0.3906, "step": 25245 }, { "epoch": 4.121137912738256, "grad_norm": 3.4224119186401367, "learning_rate": 1.0162842707005176e-05, "loss": 0.3286, "step": 25246 }, { "epoch": 4.12130117138076, "grad_norm": 3.488501787185669, "learning_rate": 1.0162201650326806e-05, "loss": 0.3686, "step": 25247 }, { "epoch": 4.121464430023265, "grad_norm": 2.8864994049072266, "learning_rate": 1.0161560592981688e-05, "loss": 0.3681, "step": 25248 }, { "epoch": 4.121627688665769, "grad_norm": 3.671908378601074, "learning_rate": 1.0160919534972455e-05, "loss": 0.3969, "step": 25249 }, { "epoch": 4.1217909473082734, "grad_norm": 3.2525265216827393, "learning_rate": 1.0160278476301739e-05, "loss": 0.3722, "step": 25250 }, { "epoch": 4.121954205950778, "grad_norm": 3.8087968826293945, "learning_rate": 1.0159637416972178e-05, "loss": 0.3813, "step": 25251 }, { "epoch": 4.122117464593282, "grad_norm": 3.1676366329193115, "learning_rate": 1.0158996356986413e-05, "loss": 0.3614, "step": 25252 }, { "epoch": 4.122280723235786, "grad_norm": 3.652458667755127, "learning_rate": 1.0158355296347073e-05, "loss": 0.3988, "step": 25253 }, { "epoch": 4.12244398187829, "grad_norm": 3.226377248764038, "learning_rate": 1.0157714235056792e-05, "loss": 0.3862, "step": 25254 }, { "epoch": 4.122607240520795, "grad_norm": 3.2711222171783447, "learning_rate": 1.0157073173118207e-05, "loss": 0.3565, "step": 25255 }, { "epoch": 4.122770499163299, "grad_norm": 3.374197483062744, "learning_rate": 1.0156432110533957e-05, "loss": 0.3958, "step": 25256 }, { "epoch": 4.122933757805804, "grad_norm": 3.765275478363037, "learning_rate": 1.0155791047306669e-05, "loss": 0.3668, "step": 25257 }, { "epoch": 4.123097016448308, "grad_norm": 3.730966806411743, "learning_rate": 1.0155149983438989e-05, "loss": 0.4617, "step": 25258 }, { "epoch": 4.1232602750908125, "grad_norm": 3.2428059577941895, "learning_rate": 1.0154508918933542e-05, "loss": 0.3198, "step": 25259 }, { "epoch": 4.123423533733317, "grad_norm": 2.9875705242156982, "learning_rate": 1.0153867853792967e-05, "loss": 0.3301, "step": 25260 }, { "epoch": 4.123586792375821, "grad_norm": 3.4075498580932617, "learning_rate": 1.0153226788019897e-05, "loss": 0.3446, "step": 25261 }, { "epoch": 4.123750051018326, "grad_norm": 3.3688104152679443, "learning_rate": 1.0152585721616972e-05, "loss": 0.4099, "step": 25262 }, { "epoch": 4.12391330966083, "grad_norm": 3.952451467514038, "learning_rate": 1.0151944654586827e-05, "loss": 0.4048, "step": 25263 }, { "epoch": 4.124076568303335, "grad_norm": 3.4395437240600586, "learning_rate": 1.0151303586932093e-05, "loss": 0.3856, "step": 25264 }, { "epoch": 4.124239826945839, "grad_norm": 2.962939500808716, "learning_rate": 1.0150662518655408e-05, "loss": 0.3355, "step": 25265 }, { "epoch": 4.124403085588344, "grad_norm": 4.443884372711182, "learning_rate": 1.0150021449759404e-05, "loss": 0.4568, "step": 25266 }, { "epoch": 4.124566344230848, "grad_norm": 3.252734422683716, "learning_rate": 1.0149380380246715e-05, "loss": 0.3626, "step": 25267 }, { "epoch": 4.124729602873352, "grad_norm": 3.466290235519409, "learning_rate": 1.0148739310119989e-05, "loss": 0.4551, "step": 25268 }, { "epoch": 4.124892861515857, "grad_norm": 3.040505886077881, "learning_rate": 1.0148098239381848e-05, "loss": 0.3649, "step": 25269 }, { "epoch": 4.12505612015836, "grad_norm": 3.63778018951416, "learning_rate": 1.014745716803493e-05, "loss": 0.3703, "step": 25270 }, { "epoch": 4.125219378800865, "grad_norm": 3.0144944190979004, "learning_rate": 1.014681609608187e-05, "loss": 0.3552, "step": 25271 }, { "epoch": 4.125382637443369, "grad_norm": 3.554784059524536, "learning_rate": 1.0146175023525305e-05, "loss": 0.3843, "step": 25272 }, { "epoch": 4.125545896085874, "grad_norm": 3.1149728298187256, "learning_rate": 1.0145533950367872e-05, "loss": 0.389, "step": 25273 }, { "epoch": 4.125709154728378, "grad_norm": 3.3687539100646973, "learning_rate": 1.0144892876612202e-05, "loss": 0.3799, "step": 25274 }, { "epoch": 4.125872413370883, "grad_norm": 3.548988103866577, "learning_rate": 1.0144251802260933e-05, "loss": 0.3793, "step": 25275 }, { "epoch": 4.126035672013387, "grad_norm": 3.839909791946411, "learning_rate": 1.01436107273167e-05, "loss": 0.4121, "step": 25276 }, { "epoch": 4.1261989306558915, "grad_norm": 4.023718357086182, "learning_rate": 1.0142969651782135e-05, "loss": 0.4479, "step": 25277 }, { "epoch": 4.126362189298396, "grad_norm": 3.499866008758545, "learning_rate": 1.0142328575659879e-05, "loss": 0.3746, "step": 25278 }, { "epoch": 4.1265254479409, "grad_norm": 3.054211378097534, "learning_rate": 1.0141687498952561e-05, "loss": 0.388, "step": 25279 }, { "epoch": 4.126688706583405, "grad_norm": 3.5844037532806396, "learning_rate": 1.014104642166282e-05, "loss": 0.4104, "step": 25280 }, { "epoch": 4.126851965225909, "grad_norm": 3.3033268451690674, "learning_rate": 1.0140405343793291e-05, "loss": 0.3748, "step": 25281 }, { "epoch": 4.127015223868414, "grad_norm": 3.3101236820220947, "learning_rate": 1.0139764265346607e-05, "loss": 0.3711, "step": 25282 }, { "epoch": 4.127178482510918, "grad_norm": 3.6068923473358154, "learning_rate": 1.0139123186325407e-05, "loss": 0.3607, "step": 25283 }, { "epoch": 4.127341741153423, "grad_norm": 3.2627973556518555, "learning_rate": 1.0138482106732323e-05, "loss": 0.3699, "step": 25284 }, { "epoch": 4.127504999795927, "grad_norm": 3.6472625732421875, "learning_rate": 1.0137841026569992e-05, "loss": 0.4071, "step": 25285 }, { "epoch": 4.127668258438431, "grad_norm": 3.947784900665283, "learning_rate": 1.0137199945841049e-05, "loss": 0.7781, "step": 25286 }, { "epoch": 4.127831517080936, "grad_norm": 3.390479326248169, "learning_rate": 1.0136558864548129e-05, "loss": 0.3519, "step": 25287 }, { "epoch": 4.12799477572344, "grad_norm": 3.329767942428589, "learning_rate": 1.0135917782693865e-05, "loss": 0.3701, "step": 25288 }, { "epoch": 4.128158034365944, "grad_norm": 3.1941049098968506, "learning_rate": 1.0135276700280896e-05, "loss": 0.3771, "step": 25289 }, { "epoch": 4.128321293008448, "grad_norm": 2.5410938262939453, "learning_rate": 1.0134635617311854e-05, "loss": 0.2874, "step": 25290 }, { "epoch": 4.128484551650953, "grad_norm": 3.4503884315490723, "learning_rate": 1.0133994533789377e-05, "loss": 0.3949, "step": 25291 }, { "epoch": 4.128647810293457, "grad_norm": 3.2011606693267822, "learning_rate": 1.01333534497161e-05, "loss": 0.4168, "step": 25292 }, { "epoch": 4.128811068935962, "grad_norm": 3.5804569721221924, "learning_rate": 1.013271236509466e-05, "loss": 0.3929, "step": 25293 }, { "epoch": 4.128974327578466, "grad_norm": 3.5259222984313965, "learning_rate": 1.0132071279927682e-05, "loss": 0.3874, "step": 25294 }, { "epoch": 4.1291375862209705, "grad_norm": 3.6873271465301514, "learning_rate": 1.0131430194217812e-05, "loss": 0.4272, "step": 25295 }, { "epoch": 4.129300844863475, "grad_norm": 3.5072224140167236, "learning_rate": 1.0130789107967684e-05, "loss": 0.3967, "step": 25296 }, { "epoch": 4.129464103505979, "grad_norm": 3.7697646617889404, "learning_rate": 1.0130148021179933e-05, "loss": 0.3659, "step": 25297 }, { "epoch": 4.129627362148484, "grad_norm": 3.3893327713012695, "learning_rate": 1.0129506933857189e-05, "loss": 0.3577, "step": 25298 }, { "epoch": 4.129790620790988, "grad_norm": 3.977874755859375, "learning_rate": 1.012886584600209e-05, "loss": 0.36, "step": 25299 }, { "epoch": 4.129953879433493, "grad_norm": 3.511056900024414, "learning_rate": 1.0128224757617272e-05, "loss": 0.332, "step": 25300 }, { "epoch": 4.130117138075997, "grad_norm": 2.7880661487579346, "learning_rate": 1.0127583668705374e-05, "loss": 0.3165, "step": 25301 }, { "epoch": 4.130280396718502, "grad_norm": 3.032735586166382, "learning_rate": 1.0126942579269029e-05, "loss": 0.367, "step": 25302 }, { "epoch": 4.130443655361006, "grad_norm": 4.4074625968933105, "learning_rate": 1.0126301489310868e-05, "loss": 0.4732, "step": 25303 }, { "epoch": 4.13060691400351, "grad_norm": 3.7512903213500977, "learning_rate": 1.0125660398833528e-05, "loss": 0.3744, "step": 25304 }, { "epoch": 4.130770172646015, "grad_norm": 3.2831497192382812, "learning_rate": 1.0125019307839645e-05, "loss": 0.3225, "step": 25305 }, { "epoch": 4.130933431288518, "grad_norm": 3.658618450164795, "learning_rate": 1.0124378216331856e-05, "loss": 0.372, "step": 25306 }, { "epoch": 4.131096689931023, "grad_norm": 3.244368314743042, "learning_rate": 1.0123737124312799e-05, "loss": 0.3924, "step": 25307 }, { "epoch": 4.131259948573527, "grad_norm": 3.3728129863739014, "learning_rate": 1.0123096031785103e-05, "loss": 0.352, "step": 25308 }, { "epoch": 4.131423207216032, "grad_norm": 3.7793612480163574, "learning_rate": 1.0122454938751405e-05, "loss": 0.509, "step": 25309 }, { "epoch": 4.131586465858536, "grad_norm": 3.2323732376098633, "learning_rate": 1.012181384521434e-05, "loss": 0.3856, "step": 25310 }, { "epoch": 4.131749724501041, "grad_norm": 3.2468743324279785, "learning_rate": 1.0121172751176542e-05, "loss": 0.3238, "step": 25311 }, { "epoch": 4.131912983143545, "grad_norm": 3.3724007606506348, "learning_rate": 1.0120531656640655e-05, "loss": 0.4288, "step": 25312 }, { "epoch": 4.1320762417860495, "grad_norm": 2.903409242630005, "learning_rate": 1.0119890561609304e-05, "loss": 0.334, "step": 25313 }, { "epoch": 4.132239500428554, "grad_norm": 3.186018466949463, "learning_rate": 1.0119249466085128e-05, "loss": 0.3825, "step": 25314 }, { "epoch": 4.132402759071058, "grad_norm": 3.649690628051758, "learning_rate": 1.0118608370070763e-05, "loss": 0.4198, "step": 25315 }, { "epoch": 4.132566017713563, "grad_norm": 3.0461339950561523, "learning_rate": 1.0117967273568843e-05, "loss": 0.3853, "step": 25316 }, { "epoch": 4.132729276356067, "grad_norm": 3.3848440647125244, "learning_rate": 1.0117326176582007e-05, "loss": 0.3137, "step": 25317 }, { "epoch": 4.132892534998572, "grad_norm": 3.337852954864502, "learning_rate": 1.0116685079112887e-05, "loss": 0.3951, "step": 25318 }, { "epoch": 4.133055793641076, "grad_norm": 3.3490872383117676, "learning_rate": 1.0116043981164116e-05, "loss": 0.3592, "step": 25319 }, { "epoch": 4.1332190522835806, "grad_norm": 3.463494300842285, "learning_rate": 1.0115402882738334e-05, "loss": 0.3935, "step": 25320 }, { "epoch": 4.133382310926085, "grad_norm": 4.141199111938477, "learning_rate": 1.0114761783838174e-05, "loss": 0.4396, "step": 25321 }, { "epoch": 4.133545569568589, "grad_norm": 3.9208948612213135, "learning_rate": 1.011412068446627e-05, "loss": 0.3734, "step": 25322 }, { "epoch": 4.133708828211093, "grad_norm": 3.0635392665863037, "learning_rate": 1.011347958462526e-05, "loss": 0.3024, "step": 25323 }, { "epoch": 4.133872086853597, "grad_norm": 2.6249337196350098, "learning_rate": 1.0112838484317779e-05, "loss": 0.3127, "step": 25324 }, { "epoch": 4.134035345496102, "grad_norm": 3.5877981185913086, "learning_rate": 1.011219738354646e-05, "loss": 0.3226, "step": 25325 }, { "epoch": 4.134198604138606, "grad_norm": 4.109586715698242, "learning_rate": 1.0111556282313941e-05, "loss": 0.4628, "step": 25326 }, { "epoch": 4.134361862781111, "grad_norm": 3.8102519512176514, "learning_rate": 1.0110915180622859e-05, "loss": 0.4097, "step": 25327 }, { "epoch": 4.134525121423615, "grad_norm": 3.7285563945770264, "learning_rate": 1.0110274078475842e-05, "loss": 0.4138, "step": 25328 }, { "epoch": 4.13468838006612, "grad_norm": 3.6209158897399902, "learning_rate": 1.0109632975875532e-05, "loss": 0.3857, "step": 25329 }, { "epoch": 4.134851638708624, "grad_norm": 3.627974033355713, "learning_rate": 1.0108991872824561e-05, "loss": 0.4195, "step": 25330 }, { "epoch": 4.1350148973511285, "grad_norm": 3.198209762573242, "learning_rate": 1.0108350769325565e-05, "loss": 0.3266, "step": 25331 }, { "epoch": 4.135178155993633, "grad_norm": 3.746628999710083, "learning_rate": 1.0107709665381183e-05, "loss": 0.3767, "step": 25332 }, { "epoch": 4.135341414636137, "grad_norm": 3.8547866344451904, "learning_rate": 1.0107068560994044e-05, "loss": 0.3974, "step": 25333 }, { "epoch": 4.135504673278642, "grad_norm": 3.4949350357055664, "learning_rate": 1.010642745616679e-05, "loss": 0.3646, "step": 25334 }, { "epoch": 4.135667931921146, "grad_norm": 3.080646276473999, "learning_rate": 1.010578635090205e-05, "loss": 0.3347, "step": 25335 }, { "epoch": 4.135831190563651, "grad_norm": 3.2991268634796143, "learning_rate": 1.0105145245202463e-05, "loss": 0.3462, "step": 25336 }, { "epoch": 4.135994449206155, "grad_norm": 3.955984115600586, "learning_rate": 1.0104504139070667e-05, "loss": 0.3881, "step": 25337 }, { "epoch": 4.1361577078486595, "grad_norm": 3.587089776992798, "learning_rate": 1.010386303250929e-05, "loss": 0.4314, "step": 25338 }, { "epoch": 4.136320966491164, "grad_norm": 3.259216547012329, "learning_rate": 1.0103221925520973e-05, "loss": 0.3729, "step": 25339 }, { "epoch": 4.136484225133668, "grad_norm": 3.6165904998779297, "learning_rate": 1.0102580818108347e-05, "loss": 0.382, "step": 25340 }, { "epoch": 4.136647483776172, "grad_norm": 3.313872814178467, "learning_rate": 1.010193971027405e-05, "loss": 0.3681, "step": 25341 }, { "epoch": 4.136810742418676, "grad_norm": 3.3893465995788574, "learning_rate": 1.0101298602020724e-05, "loss": 0.4047, "step": 25342 }, { "epoch": 4.136974001061181, "grad_norm": 3.160946846008301, "learning_rate": 1.010065749335099e-05, "loss": 0.3678, "step": 25343 }, { "epoch": 4.137137259703685, "grad_norm": 4.137792110443115, "learning_rate": 1.0100016384267493e-05, "loss": 0.4237, "step": 25344 }, { "epoch": 4.13730051834619, "grad_norm": 3.4435853958129883, "learning_rate": 1.0099375274772867e-05, "loss": 0.354, "step": 25345 }, { "epoch": 4.137463776988694, "grad_norm": 3.1325278282165527, "learning_rate": 1.009873416486975e-05, "loss": 0.3327, "step": 25346 }, { "epoch": 4.137627035631199, "grad_norm": 3.695359945297241, "learning_rate": 1.0098093054560772e-05, "loss": 0.4451, "step": 25347 }, { "epoch": 4.137790294273703, "grad_norm": 3.5578742027282715, "learning_rate": 1.0097451943848568e-05, "loss": 0.4159, "step": 25348 }, { "epoch": 4.1379535529162075, "grad_norm": 3.3576745986938477, "learning_rate": 1.0096810832735775e-05, "loss": 0.3499, "step": 25349 }, { "epoch": 4.138116811558712, "grad_norm": 3.286654233932495, "learning_rate": 1.0096169721225033e-05, "loss": 0.3864, "step": 25350 }, { "epoch": 4.138280070201216, "grad_norm": 3.812392234802246, "learning_rate": 1.0095528609318975e-05, "loss": 0.4345, "step": 25351 }, { "epoch": 4.138443328843721, "grad_norm": 3.6391420364379883, "learning_rate": 1.0094887497020231e-05, "loss": 0.3713, "step": 25352 }, { "epoch": 4.138606587486225, "grad_norm": 4.434593200683594, "learning_rate": 1.0094246384331444e-05, "loss": 0.4159, "step": 25353 }, { "epoch": 4.13876984612873, "grad_norm": 3.82488751411438, "learning_rate": 1.0093605271255242e-05, "loss": 0.3861, "step": 25354 }, { "epoch": 4.138933104771234, "grad_norm": 3.548646926879883, "learning_rate": 1.0092964157794262e-05, "loss": 0.3673, "step": 25355 }, { "epoch": 4.1390963634137385, "grad_norm": 3.1582536697387695, "learning_rate": 1.0092323043951148e-05, "loss": 0.3595, "step": 25356 }, { "epoch": 4.139259622056243, "grad_norm": 2.8652191162109375, "learning_rate": 1.0091681929728525e-05, "loss": 0.3676, "step": 25357 }, { "epoch": 4.139422880698747, "grad_norm": 3.4743528366088867, "learning_rate": 1.0091040815129034e-05, "loss": 0.3679, "step": 25358 }, { "epoch": 4.139586139341251, "grad_norm": 3.0346860885620117, "learning_rate": 1.0090399700155306e-05, "loss": 0.3309, "step": 25359 }, { "epoch": 4.139749397983755, "grad_norm": 3.392461061477661, "learning_rate": 1.008975858480998e-05, "loss": 0.3631, "step": 25360 }, { "epoch": 4.13991265662626, "grad_norm": 3.2844057083129883, "learning_rate": 1.0089117469095692e-05, "loss": 0.3309, "step": 25361 }, { "epoch": 4.140075915268764, "grad_norm": 3.6167666912078857, "learning_rate": 1.0088476353015075e-05, "loss": 0.3728, "step": 25362 }, { "epoch": 4.140239173911269, "grad_norm": 3.7369675636291504, "learning_rate": 1.0087835236570764e-05, "loss": 0.4133, "step": 25363 }, { "epoch": 4.140402432553773, "grad_norm": 3.8952877521514893, "learning_rate": 1.0087194119765396e-05, "loss": 0.4877, "step": 25364 }, { "epoch": 4.140565691196278, "grad_norm": 2.6582140922546387, "learning_rate": 1.0086553002601607e-05, "loss": 0.3245, "step": 25365 }, { "epoch": 4.140728949838782, "grad_norm": 3.341599941253662, "learning_rate": 1.008591188508203e-05, "loss": 0.3969, "step": 25366 }, { "epoch": 4.1408922084812865, "grad_norm": 3.417479991912842, "learning_rate": 1.00852707672093e-05, "loss": 0.3619, "step": 25367 }, { "epoch": 4.141055467123791, "grad_norm": 3.3500921726226807, "learning_rate": 1.0084629648986057e-05, "loss": 0.4024, "step": 25368 }, { "epoch": 4.141218725766295, "grad_norm": 3.5471158027648926, "learning_rate": 1.0083988530414934e-05, "loss": 0.3773, "step": 25369 }, { "epoch": 4.1413819844088, "grad_norm": 3.169048309326172, "learning_rate": 1.0083347411498563e-05, "loss": 0.3849, "step": 25370 }, { "epoch": 4.141545243051304, "grad_norm": 3.7500710487365723, "learning_rate": 1.0082706292239586e-05, "loss": 0.3929, "step": 25371 }, { "epoch": 4.141708501693809, "grad_norm": 3.166532516479492, "learning_rate": 1.0082065172640631e-05, "loss": 0.4022, "step": 25372 }, { "epoch": 4.141871760336313, "grad_norm": 2.8458251953125, "learning_rate": 1.0081424052704339e-05, "loss": 0.3447, "step": 25373 }, { "epoch": 4.1420350189788175, "grad_norm": 3.017512321472168, "learning_rate": 1.0080782932433343e-05, "loss": 0.3253, "step": 25374 }, { "epoch": 4.142198277621322, "grad_norm": 3.590214967727661, "learning_rate": 1.0080141811830277e-05, "loss": 0.3717, "step": 25375 }, { "epoch": 4.1423615362638255, "grad_norm": 4.224532604217529, "learning_rate": 1.007950069089778e-05, "loss": 0.4492, "step": 25376 }, { "epoch": 4.14252479490633, "grad_norm": 2.82572340965271, "learning_rate": 1.0078859569638485e-05, "loss": 0.3135, "step": 25377 }, { "epoch": 4.142688053548834, "grad_norm": 2.953143835067749, "learning_rate": 1.007821844805503e-05, "loss": 0.3769, "step": 25378 }, { "epoch": 4.142851312191339, "grad_norm": 3.7731473445892334, "learning_rate": 1.0077577326150047e-05, "loss": 0.3763, "step": 25379 }, { "epoch": 4.143014570833843, "grad_norm": 3.410054922103882, "learning_rate": 1.0076936203926174e-05, "loss": 0.3483, "step": 25380 }, { "epoch": 4.143177829476348, "grad_norm": 3.3210883140563965, "learning_rate": 1.0076295081386048e-05, "loss": 0.4238, "step": 25381 }, { "epoch": 4.143341088118852, "grad_norm": 3.316075563430786, "learning_rate": 1.0075653958532294e-05, "loss": 0.384, "step": 25382 }, { "epoch": 4.143504346761357, "grad_norm": 3.2955398559570312, "learning_rate": 1.007501283536756e-05, "loss": 0.3356, "step": 25383 }, { "epoch": 4.143667605403861, "grad_norm": 3.573782205581665, "learning_rate": 1.0074371711894477e-05, "loss": 0.3935, "step": 25384 }, { "epoch": 4.1438308640463655, "grad_norm": 3.3834104537963867, "learning_rate": 1.0073730588115677e-05, "loss": 0.3602, "step": 25385 }, { "epoch": 4.14399412268887, "grad_norm": 3.0561375617980957, "learning_rate": 1.0073089464033803e-05, "loss": 0.3033, "step": 25386 }, { "epoch": 4.144157381331374, "grad_norm": 2.5903847217559814, "learning_rate": 1.0072448339651482e-05, "loss": 0.3239, "step": 25387 }, { "epoch": 4.144320639973879, "grad_norm": 3.683793544769287, "learning_rate": 1.0071807214971354e-05, "loss": 0.3909, "step": 25388 }, { "epoch": 4.144483898616383, "grad_norm": 3.6431586742401123, "learning_rate": 1.0071166089996054e-05, "loss": 0.4516, "step": 25389 }, { "epoch": 4.144647157258888, "grad_norm": 3.171269655227661, "learning_rate": 1.007052496472822e-05, "loss": 0.3352, "step": 25390 }, { "epoch": 4.144810415901392, "grad_norm": 3.4033758640289307, "learning_rate": 1.0069883839170481e-05, "loss": 0.3626, "step": 25391 }, { "epoch": 4.1449736745438965, "grad_norm": 3.6385111808776855, "learning_rate": 1.0069242713325476e-05, "loss": 0.3772, "step": 25392 }, { "epoch": 4.145136933186401, "grad_norm": 2.7758443355560303, "learning_rate": 1.006860158719584e-05, "loss": 0.3554, "step": 25393 }, { "epoch": 4.1453001918289045, "grad_norm": 3.3767006397247314, "learning_rate": 1.0067960460784207e-05, "loss": 0.398, "step": 25394 }, { "epoch": 4.145463450471409, "grad_norm": 3.3040554523468018, "learning_rate": 1.006731933409322e-05, "loss": 0.3546, "step": 25395 }, { "epoch": 4.145626709113913, "grad_norm": 3.572070598602295, "learning_rate": 1.0066678207125507e-05, "loss": 0.3766, "step": 25396 }, { "epoch": 4.145789967756418, "grad_norm": 2.6184439659118652, "learning_rate": 1.0066037079883704e-05, "loss": 0.2913, "step": 25397 }, { "epoch": 4.145953226398922, "grad_norm": 3.3439879417419434, "learning_rate": 1.0065395952370446e-05, "loss": 0.3361, "step": 25398 }, { "epoch": 4.146116485041427, "grad_norm": 3.4123611450195312, "learning_rate": 1.0064754824588368e-05, "loss": 0.4057, "step": 25399 }, { "epoch": 4.146279743683931, "grad_norm": 3.1786584854125977, "learning_rate": 1.0064113696540112e-05, "loss": 0.3716, "step": 25400 }, { "epoch": 4.146443002326436, "grad_norm": 3.700835943222046, "learning_rate": 1.0063472568228307e-05, "loss": 0.3712, "step": 25401 }, { "epoch": 4.14660626096894, "grad_norm": 4.343169689178467, "learning_rate": 1.0062831439655591e-05, "loss": 0.435, "step": 25402 }, { "epoch": 4.1467695196114445, "grad_norm": 3.0818564891815186, "learning_rate": 1.0062190310824597e-05, "loss": 0.3576, "step": 25403 }, { "epoch": 4.146932778253949, "grad_norm": 3.365652561187744, "learning_rate": 1.0061549181737964e-05, "loss": 0.3428, "step": 25404 }, { "epoch": 4.147096036896453, "grad_norm": 3.143995523452759, "learning_rate": 1.0060908052398326e-05, "loss": 0.3674, "step": 25405 }, { "epoch": 4.147259295538958, "grad_norm": 3.495224714279175, "learning_rate": 1.0060266922808318e-05, "loss": 0.345, "step": 25406 }, { "epoch": 4.147422554181462, "grad_norm": 2.8899073600769043, "learning_rate": 1.0059625792970572e-05, "loss": 0.3435, "step": 25407 }, { "epoch": 4.147585812823967, "grad_norm": 3.0810675621032715, "learning_rate": 1.005898466288773e-05, "loss": 0.3201, "step": 25408 }, { "epoch": 4.147749071466471, "grad_norm": 3.4009921550750732, "learning_rate": 1.0058343532562422e-05, "loss": 0.344, "step": 25409 }, { "epoch": 4.1479123301089755, "grad_norm": 3.2415695190429688, "learning_rate": 1.0057702401997287e-05, "loss": 0.3551, "step": 25410 }, { "epoch": 4.14807558875148, "grad_norm": 3.7272839546203613, "learning_rate": 1.0057061271194961e-05, "loss": 0.3947, "step": 25411 }, { "epoch": 4.1482388473939835, "grad_norm": 2.8491599559783936, "learning_rate": 1.0056420140158075e-05, "loss": 0.2876, "step": 25412 }, { "epoch": 4.148402106036488, "grad_norm": 4.277873992919922, "learning_rate": 1.0055779008889269e-05, "loss": 0.4948, "step": 25413 }, { "epoch": 4.148565364678992, "grad_norm": 3.871215343475342, "learning_rate": 1.0055137877391175e-05, "loss": 0.3775, "step": 25414 }, { "epoch": 4.148728623321497, "grad_norm": 3.6004478931427, "learning_rate": 1.0054496745666432e-05, "loss": 0.3624, "step": 25415 }, { "epoch": 4.148891881964001, "grad_norm": 3.8199524879455566, "learning_rate": 1.0053855613717672e-05, "loss": 0.4414, "step": 25416 }, { "epoch": 4.149055140606506, "grad_norm": 3.473076581954956, "learning_rate": 1.0053214481547531e-05, "loss": 0.379, "step": 25417 }, { "epoch": 4.14921839924901, "grad_norm": 3.1964614391326904, "learning_rate": 1.0052573349158645e-05, "loss": 0.4374, "step": 25418 }, { "epoch": 4.149381657891515, "grad_norm": 3.2278566360473633, "learning_rate": 1.0051932216553653e-05, "loss": 0.3588, "step": 25419 }, { "epoch": 4.149544916534019, "grad_norm": 3.4480788707733154, "learning_rate": 1.0051291083735184e-05, "loss": 0.4071, "step": 25420 }, { "epoch": 4.1497081751765235, "grad_norm": 3.495093584060669, "learning_rate": 1.005064995070588e-05, "loss": 0.3512, "step": 25421 }, { "epoch": 4.149871433819028, "grad_norm": 3.8307371139526367, "learning_rate": 1.005000881746837e-05, "loss": 0.4015, "step": 25422 }, { "epoch": 4.150034692461532, "grad_norm": 3.890547037124634, "learning_rate": 1.0049367684025296e-05, "loss": 0.4315, "step": 25423 }, { "epoch": 4.150197951104037, "grad_norm": 4.467802047729492, "learning_rate": 1.0048726550379287e-05, "loss": 0.4031, "step": 25424 }, { "epoch": 4.150361209746541, "grad_norm": 3.847820997238159, "learning_rate": 1.0048085416532984e-05, "loss": 0.4033, "step": 25425 }, { "epoch": 4.150524468389046, "grad_norm": 3.1317577362060547, "learning_rate": 1.0047444282489017e-05, "loss": 0.3697, "step": 25426 }, { "epoch": 4.15068772703155, "grad_norm": 3.6470699310302734, "learning_rate": 1.0046803148250026e-05, "loss": 0.4025, "step": 25427 }, { "epoch": 4.1508509856740545, "grad_norm": 2.678720235824585, "learning_rate": 1.0046162013818646e-05, "loss": 0.2943, "step": 25428 }, { "epoch": 4.151014244316558, "grad_norm": 3.397516965866089, "learning_rate": 1.004552087919751e-05, "loss": 0.3933, "step": 25429 }, { "epoch": 4.1511775029590625, "grad_norm": 3.4113428592681885, "learning_rate": 1.0044879744389258e-05, "loss": 0.3697, "step": 25430 }, { "epoch": 4.151340761601567, "grad_norm": 3.7995967864990234, "learning_rate": 1.0044238609396519e-05, "loss": 0.3979, "step": 25431 }, { "epoch": 4.151504020244071, "grad_norm": 5.222385406494141, "learning_rate": 1.004359747422193e-05, "loss": 0.4545, "step": 25432 }, { "epoch": 4.151667278886576, "grad_norm": 2.896972179412842, "learning_rate": 1.0042956338868131e-05, "loss": 0.3512, "step": 25433 }, { "epoch": 4.15183053752908, "grad_norm": 3.890836000442505, "learning_rate": 1.0042315203337755e-05, "loss": 0.4222, "step": 25434 }, { "epoch": 4.151993796171585, "grad_norm": 3.314962387084961, "learning_rate": 1.0041674067633437e-05, "loss": 0.3739, "step": 25435 }, { "epoch": 4.152157054814089, "grad_norm": 3.400292158126831, "learning_rate": 1.0041032931757813e-05, "loss": 0.388, "step": 25436 }, { "epoch": 4.152320313456594, "grad_norm": 3.135530471801758, "learning_rate": 1.0040391795713515e-05, "loss": 0.377, "step": 25437 }, { "epoch": 4.152483572099098, "grad_norm": 3.3633155822753906, "learning_rate": 1.0039750659503185e-05, "loss": 0.3581, "step": 25438 }, { "epoch": 4.1526468307416025, "grad_norm": 2.698103189468384, "learning_rate": 1.0039109523129456e-05, "loss": 0.3376, "step": 25439 }, { "epoch": 4.152810089384107, "grad_norm": 2.1908481121063232, "learning_rate": 1.003846838659496e-05, "loss": 0.3101, "step": 25440 }, { "epoch": 4.152973348026611, "grad_norm": 3.665257692337036, "learning_rate": 1.0037827249902334e-05, "loss": 0.3681, "step": 25441 }, { "epoch": 4.153136606669116, "grad_norm": 3.511488914489746, "learning_rate": 1.0037186113054215e-05, "loss": 0.3509, "step": 25442 }, { "epoch": 4.15329986531162, "grad_norm": 3.9086546897888184, "learning_rate": 1.0036544976053238e-05, "loss": 0.3691, "step": 25443 }, { "epoch": 4.153463123954125, "grad_norm": 2.938626766204834, "learning_rate": 1.003590383890204e-05, "loss": 0.3587, "step": 25444 }, { "epoch": 4.153626382596629, "grad_norm": 2.8379392623901367, "learning_rate": 1.0035262701603256e-05, "loss": 0.3353, "step": 25445 }, { "epoch": 4.1537896412391335, "grad_norm": 3.0350563526153564, "learning_rate": 1.003462156415952e-05, "loss": 0.3623, "step": 25446 }, { "epoch": 4.153952899881637, "grad_norm": 1.9582439661026, "learning_rate": 1.0033980426573464e-05, "loss": 0.2749, "step": 25447 }, { "epoch": 4.1541161585241415, "grad_norm": 3.0445644855499268, "learning_rate": 1.003333928884773e-05, "loss": 0.3574, "step": 25448 }, { "epoch": 4.154279417166646, "grad_norm": 3.319110155105591, "learning_rate": 1.003269815098495e-05, "loss": 0.3746, "step": 25449 }, { "epoch": 4.15444267580915, "grad_norm": 4.151406288146973, "learning_rate": 1.0032057012987762e-05, "loss": 0.4284, "step": 25450 }, { "epoch": 4.154605934451655, "grad_norm": 2.735131025314331, "learning_rate": 1.0031415874858796e-05, "loss": 0.3432, "step": 25451 }, { "epoch": 4.154769193094159, "grad_norm": 4.268588066101074, "learning_rate": 1.0030774736600694e-05, "loss": 0.4163, "step": 25452 }, { "epoch": 4.154932451736664, "grad_norm": 3.0019004344940186, "learning_rate": 1.003013359821609e-05, "loss": 0.3491, "step": 25453 }, { "epoch": 4.155095710379168, "grad_norm": 3.13586163520813, "learning_rate": 1.0029492459707616e-05, "loss": 0.3808, "step": 25454 }, { "epoch": 4.155258969021673, "grad_norm": 3.8686294555664062, "learning_rate": 1.002885132107791e-05, "loss": 0.3771, "step": 25455 }, { "epoch": 4.155422227664177, "grad_norm": 3.227412462234497, "learning_rate": 1.0028210182329607e-05, "loss": 0.3789, "step": 25456 }, { "epoch": 4.1555854863066815, "grad_norm": 4.16710901260376, "learning_rate": 1.0027569043465342e-05, "loss": 0.4515, "step": 25457 }, { "epoch": 4.155748744949186, "grad_norm": 3.7269601821899414, "learning_rate": 1.002692790448775e-05, "loss": 0.4158, "step": 25458 }, { "epoch": 4.15591200359169, "grad_norm": 3.0547306537628174, "learning_rate": 1.002628676539947e-05, "loss": 0.3176, "step": 25459 }, { "epoch": 4.156075262234195, "grad_norm": 4.440144062042236, "learning_rate": 1.0025645626203136e-05, "loss": 0.4429, "step": 25460 }, { "epoch": 4.156238520876699, "grad_norm": 3.20939302444458, "learning_rate": 1.002500448690138e-05, "loss": 0.3528, "step": 25461 }, { "epoch": 4.156401779519204, "grad_norm": 3.3452675342559814, "learning_rate": 1.0024363347496841e-05, "loss": 0.3384, "step": 25462 }, { "epoch": 4.156565038161708, "grad_norm": 3.1631147861480713, "learning_rate": 1.0023722207992153e-05, "loss": 0.339, "step": 25463 }, { "epoch": 4.1567282968042125, "grad_norm": 3.817291259765625, "learning_rate": 1.0023081068389952e-05, "loss": 0.428, "step": 25464 }, { "epoch": 4.156891555446716, "grad_norm": 4.9890971183776855, "learning_rate": 1.0022439928692875e-05, "loss": 0.4025, "step": 25465 }, { "epoch": 4.1570548140892205, "grad_norm": 3.6154117584228516, "learning_rate": 1.0021798788903554e-05, "loss": 0.336, "step": 25466 }, { "epoch": 4.157218072731725, "grad_norm": 4.377490520477295, "learning_rate": 1.0021157649024627e-05, "loss": 0.4634, "step": 25467 }, { "epoch": 4.157381331374229, "grad_norm": 4.154030799865723, "learning_rate": 1.0020516509058728e-05, "loss": 0.3923, "step": 25468 }, { "epoch": 4.157544590016734, "grad_norm": 2.8319718837738037, "learning_rate": 1.0019875369008498e-05, "loss": 0.3171, "step": 25469 }, { "epoch": 4.157707848659238, "grad_norm": 3.476475715637207, "learning_rate": 1.0019234228876563e-05, "loss": 0.4001, "step": 25470 }, { "epoch": 4.157871107301743, "grad_norm": 3.6347100734710693, "learning_rate": 1.0018593088665563e-05, "loss": 0.3216, "step": 25471 }, { "epoch": 4.158034365944247, "grad_norm": 3.958228588104248, "learning_rate": 1.0017951948378134e-05, "loss": 0.4109, "step": 25472 }, { "epoch": 4.158197624586752, "grad_norm": 3.3755850791931152, "learning_rate": 1.0017310808016913e-05, "loss": 0.376, "step": 25473 }, { "epoch": 4.158360883229256, "grad_norm": 3.1333365440368652, "learning_rate": 1.0016669667584537e-05, "loss": 0.4127, "step": 25474 }, { "epoch": 4.1585241418717604, "grad_norm": 4.6739821434021, "learning_rate": 1.0016028527083634e-05, "loss": 0.4121, "step": 25475 }, { "epoch": 4.158687400514265, "grad_norm": 2.771339178085327, "learning_rate": 1.0015387386516843e-05, "loss": 0.3213, "step": 25476 }, { "epoch": 4.158850659156769, "grad_norm": 4.084358215332031, "learning_rate": 1.0014746245886802e-05, "loss": 0.4149, "step": 25477 }, { "epoch": 4.159013917799274, "grad_norm": 3.20569109916687, "learning_rate": 1.0014105105196147e-05, "loss": 0.3066, "step": 25478 }, { "epoch": 4.159177176441778, "grad_norm": 3.575129747390747, "learning_rate": 1.001346396444751e-05, "loss": 0.3741, "step": 25479 }, { "epoch": 4.159340435084283, "grad_norm": 3.1350412368774414, "learning_rate": 1.0012822823643524e-05, "loss": 0.3565, "step": 25480 }, { "epoch": 4.159503693726787, "grad_norm": 3.5306107997894287, "learning_rate": 1.001218168278683e-05, "loss": 0.4805, "step": 25481 }, { "epoch": 4.159666952369291, "grad_norm": 3.683629035949707, "learning_rate": 1.0011540541880063e-05, "loss": 0.3969, "step": 25482 }, { "epoch": 4.159830211011795, "grad_norm": 3.507235288619995, "learning_rate": 1.0010899400925858e-05, "loss": 0.4096, "step": 25483 }, { "epoch": 4.1599934696542995, "grad_norm": 4.132802963256836, "learning_rate": 1.0010258259926849e-05, "loss": 0.4059, "step": 25484 }, { "epoch": 4.160156728296804, "grad_norm": 4.173739910125732, "learning_rate": 1.000961711888567e-05, "loss": 0.4556, "step": 25485 }, { "epoch": 4.160319986939308, "grad_norm": 3.4255104064941406, "learning_rate": 1.0008975977804961e-05, "loss": 0.3728, "step": 25486 }, { "epoch": 4.160483245581813, "grad_norm": 2.9210493564605713, "learning_rate": 1.0008334836687353e-05, "loss": 0.3283, "step": 25487 }, { "epoch": 4.160646504224317, "grad_norm": 3.347827911376953, "learning_rate": 1.0007693695535487e-05, "loss": 0.3871, "step": 25488 }, { "epoch": 4.160809762866822, "grad_norm": 3.0565924644470215, "learning_rate": 1.0007052554351993e-05, "loss": 0.3506, "step": 25489 }, { "epoch": 4.160973021509326, "grad_norm": 3.177978277206421, "learning_rate": 1.0006411413139507e-05, "loss": 0.3526, "step": 25490 }, { "epoch": 4.161136280151831, "grad_norm": 3.6707022190093994, "learning_rate": 1.000577027190067e-05, "loss": 0.4761, "step": 25491 }, { "epoch": 4.161299538794335, "grad_norm": 3.1907436847686768, "learning_rate": 1.0005129130638111e-05, "loss": 0.396, "step": 25492 }, { "epoch": 4.1614627974368394, "grad_norm": 3.8842759132385254, "learning_rate": 1.0004487989354467e-05, "loss": 0.4617, "step": 25493 }, { "epoch": 4.161626056079344, "grad_norm": 2.6963865756988525, "learning_rate": 1.0003846848052377e-05, "loss": 0.3042, "step": 25494 }, { "epoch": 4.161789314721848, "grad_norm": 2.9286046028137207, "learning_rate": 1.0003205706734474e-05, "loss": 0.3331, "step": 25495 }, { "epoch": 4.161952573364353, "grad_norm": 4.133351802825928, "learning_rate": 1.0002564565403393e-05, "loss": 0.3777, "step": 25496 }, { "epoch": 4.162115832006857, "grad_norm": 3.278092861175537, "learning_rate": 1.0001923424061768e-05, "loss": 0.4049, "step": 25497 }, { "epoch": 4.162279090649362, "grad_norm": 3.31032133102417, "learning_rate": 1.000128228271224e-05, "loss": 0.3308, "step": 25498 }, { "epoch": 4.162442349291865, "grad_norm": 3.4331929683685303, "learning_rate": 1.0000641141357437e-05, "loss": 0.324, "step": 25499 }, { "epoch": 4.16260560793437, "grad_norm": 3.200962543487549, "learning_rate": 1e-05, "loss": 0.3596, "step": 25500 }, { "epoch": 4.162768866576874, "grad_norm": 3.655078411102295, "learning_rate": 9.999358858642565e-06, "loss": 0.3021, "step": 25501 }, { "epoch": 4.1629321252193785, "grad_norm": 4.03048038482666, "learning_rate": 9.998717717287765e-06, "loss": 0.5072, "step": 25502 }, { "epoch": 4.163095383861883, "grad_norm": 3.5417065620422363, "learning_rate": 9.998076575938234e-06, "loss": 0.3683, "step": 25503 }, { "epoch": 4.163258642504387, "grad_norm": 3.8833541870117188, "learning_rate": 9.997435434596609e-06, "loss": 0.4068, "step": 25504 }, { "epoch": 4.163421901146892, "grad_norm": 4.0469136238098145, "learning_rate": 9.996794293265533e-06, "loss": 0.4114, "step": 25505 }, { "epoch": 4.163585159789396, "grad_norm": 2.7706918716430664, "learning_rate": 9.996153151947628e-06, "loss": 0.3156, "step": 25506 }, { "epoch": 4.163748418431901, "grad_norm": 3.9686827659606934, "learning_rate": 9.995512010645538e-06, "loss": 0.462, "step": 25507 }, { "epoch": 4.163911677074405, "grad_norm": 3.5478458404541016, "learning_rate": 9.994870869361892e-06, "loss": 0.3652, "step": 25508 }, { "epoch": 4.16407493571691, "grad_norm": 2.5316483974456787, "learning_rate": 9.994229728099334e-06, "loss": 0.3344, "step": 25509 }, { "epoch": 4.164238194359414, "grad_norm": 3.0105607509613037, "learning_rate": 9.993588586860495e-06, "loss": 0.3169, "step": 25510 }, { "epoch": 4.164401453001918, "grad_norm": 3.9164860248565674, "learning_rate": 9.99294744564801e-06, "loss": 0.3913, "step": 25511 }, { "epoch": 4.164564711644423, "grad_norm": 2.9095370769500732, "learning_rate": 9.992306304464516e-06, "loss": 0.312, "step": 25512 }, { "epoch": 4.164727970286927, "grad_norm": 3.743004083633423, "learning_rate": 9.991665163312649e-06, "loss": 0.4254, "step": 25513 }, { "epoch": 4.164891228929432, "grad_norm": 3.4806442260742188, "learning_rate": 9.991024022195042e-06, "loss": 0.3304, "step": 25514 }, { "epoch": 4.165054487571936, "grad_norm": 3.58996319770813, "learning_rate": 9.990382881114335e-06, "loss": 0.4175, "step": 25515 }, { "epoch": 4.165217746214441, "grad_norm": 3.843909502029419, "learning_rate": 9.989741740073156e-06, "loss": 0.4315, "step": 25516 }, { "epoch": 4.165381004856945, "grad_norm": 3.228563070297241, "learning_rate": 9.989100599074146e-06, "loss": 0.3456, "step": 25517 }, { "epoch": 4.165544263499449, "grad_norm": 3.174984931945801, "learning_rate": 9.98845945811994e-06, "loss": 0.3709, "step": 25518 }, { "epoch": 4.165707522141953, "grad_norm": 3.625126361846924, "learning_rate": 9.987818317213173e-06, "loss": 0.3856, "step": 25519 }, { "epoch": 4.1658707807844575, "grad_norm": 4.038559913635254, "learning_rate": 9.987177176356478e-06, "loss": 0.4388, "step": 25520 }, { "epoch": 4.166034039426962, "grad_norm": 3.371519088745117, "learning_rate": 9.986536035552494e-06, "loss": 0.3394, "step": 25521 }, { "epoch": 4.166197298069466, "grad_norm": 3.442420482635498, "learning_rate": 9.985894894803856e-06, "loss": 0.3967, "step": 25522 }, { "epoch": 4.166360556711971, "grad_norm": 3.311343193054199, "learning_rate": 9.9852537541132e-06, "loss": 0.323, "step": 25523 }, { "epoch": 4.166523815354475, "grad_norm": 3.329773187637329, "learning_rate": 9.984612613483159e-06, "loss": 0.3379, "step": 25524 }, { "epoch": 4.16668707399698, "grad_norm": 3.8480727672576904, "learning_rate": 9.98397147291637e-06, "loss": 0.3664, "step": 25525 }, { "epoch": 4.166850332639484, "grad_norm": 3.204509973526001, "learning_rate": 9.983330332415468e-06, "loss": 0.3981, "step": 25526 }, { "epoch": 4.167013591281989, "grad_norm": 3.3425114154815674, "learning_rate": 9.982689191983089e-06, "loss": 0.3882, "step": 25527 }, { "epoch": 4.167176849924493, "grad_norm": 3.5810673236846924, "learning_rate": 9.982048051621867e-06, "loss": 0.4095, "step": 25528 }, { "epoch": 4.167340108566997, "grad_norm": 3.5460543632507324, "learning_rate": 9.981406911334439e-06, "loss": 0.33, "step": 25529 }, { "epoch": 4.167503367209502, "grad_norm": 3.202285051345825, "learning_rate": 9.98076577112344e-06, "loss": 0.3439, "step": 25530 }, { "epoch": 4.167666625852006, "grad_norm": 3.2655272483825684, "learning_rate": 9.980124630991504e-06, "loss": 0.3974, "step": 25531 }, { "epoch": 4.167829884494511, "grad_norm": 3.347154140472412, "learning_rate": 9.979483490941272e-06, "loss": 0.3258, "step": 25532 }, { "epoch": 4.167993143137015, "grad_norm": 3.4505159854888916, "learning_rate": 9.978842350975373e-06, "loss": 0.3463, "step": 25533 }, { "epoch": 4.16815640177952, "grad_norm": 4.264780044555664, "learning_rate": 9.978201211096446e-06, "loss": 0.3782, "step": 25534 }, { "epoch": 4.168319660422023, "grad_norm": 3.7040627002716064, "learning_rate": 9.977560071307128e-06, "loss": 0.458, "step": 25535 }, { "epoch": 4.168482919064528, "grad_norm": 3.4990108013153076, "learning_rate": 9.97691893161005e-06, "loss": 0.3269, "step": 25536 }, { "epoch": 4.168646177707032, "grad_norm": 3.2742528915405273, "learning_rate": 9.976277792007849e-06, "loss": 0.3532, "step": 25537 }, { "epoch": 4.1688094363495365, "grad_norm": 3.359938859939575, "learning_rate": 9.97563665250316e-06, "loss": 0.3265, "step": 25538 }, { "epoch": 4.168972694992041, "grad_norm": 3.047877073287964, "learning_rate": 9.974995513098622e-06, "loss": 0.3054, "step": 25539 }, { "epoch": 4.169135953634545, "grad_norm": 3.179109573364258, "learning_rate": 9.974354373796868e-06, "loss": 0.4067, "step": 25540 }, { "epoch": 4.16929921227705, "grad_norm": 4.29573917388916, "learning_rate": 9.973713234600531e-06, "loss": 0.4652, "step": 25541 }, { "epoch": 4.169462470919554, "grad_norm": 3.606159210205078, "learning_rate": 9.973072095512251e-06, "loss": 0.4257, "step": 25542 }, { "epoch": 4.169625729562059, "grad_norm": 2.96256947517395, "learning_rate": 9.97243095653466e-06, "loss": 0.3095, "step": 25543 }, { "epoch": 4.169788988204563, "grad_norm": 3.2454378604888916, "learning_rate": 9.971789817670394e-06, "loss": 0.3238, "step": 25544 }, { "epoch": 4.169952246847068, "grad_norm": 2.9988551139831543, "learning_rate": 9.971148678922096e-06, "loss": 0.296, "step": 25545 }, { "epoch": 4.170115505489572, "grad_norm": 2.848724603652954, "learning_rate": 9.97050754029239e-06, "loss": 0.3658, "step": 25546 }, { "epoch": 4.170278764132076, "grad_norm": 3.743319034576416, "learning_rate": 9.969866401783913e-06, "loss": 0.4144, "step": 25547 }, { "epoch": 4.170442022774581, "grad_norm": 3.153637409210205, "learning_rate": 9.969225263399307e-06, "loss": 0.358, "step": 25548 }, { "epoch": 4.170605281417085, "grad_norm": 3.246955394744873, "learning_rate": 9.968584125141206e-06, "loss": 0.3699, "step": 25549 }, { "epoch": 4.17076854005959, "grad_norm": 3.2876980304718018, "learning_rate": 9.967942987012243e-06, "loss": 0.3392, "step": 25550 }, { "epoch": 4.170931798702094, "grad_norm": 3.1957602500915527, "learning_rate": 9.967301849015051e-06, "loss": 0.3267, "step": 25551 }, { "epoch": 4.171095057344598, "grad_norm": 3.7231509685516357, "learning_rate": 9.966660711152272e-06, "loss": 0.3708, "step": 25552 }, { "epoch": 4.171258315987102, "grad_norm": 3.262105703353882, "learning_rate": 9.96601957342654e-06, "loss": 0.3267, "step": 25553 }, { "epoch": 4.171421574629607, "grad_norm": 4.568636894226074, "learning_rate": 9.965378435840487e-06, "loss": 0.3984, "step": 25554 }, { "epoch": 4.171584833272111, "grad_norm": 3.6088290214538574, "learning_rate": 9.964737298396749e-06, "loss": 0.3836, "step": 25555 }, { "epoch": 4.1717480919146155, "grad_norm": 3.8325133323669434, "learning_rate": 9.964096161097964e-06, "loss": 0.3642, "step": 25556 }, { "epoch": 4.17191135055712, "grad_norm": 3.256680488586426, "learning_rate": 9.963455023946764e-06, "loss": 0.3707, "step": 25557 }, { "epoch": 4.172074609199624, "grad_norm": 3.474699020385742, "learning_rate": 9.962813886945786e-06, "loss": 0.3634, "step": 25558 }, { "epoch": 4.172237867842129, "grad_norm": 3.5419955253601074, "learning_rate": 9.962172750097668e-06, "loss": 0.356, "step": 25559 }, { "epoch": 4.172401126484633, "grad_norm": 3.6945841312408447, "learning_rate": 9.961531613405044e-06, "loss": 0.3185, "step": 25560 }, { "epoch": 4.172564385127138, "grad_norm": 3.264756441116333, "learning_rate": 9.960890476870548e-06, "loss": 0.3522, "step": 25561 }, { "epoch": 4.172727643769642, "grad_norm": 3.6086089611053467, "learning_rate": 9.960249340496817e-06, "loss": 0.3689, "step": 25562 }, { "epoch": 4.1728909024121466, "grad_norm": 3.311192035675049, "learning_rate": 9.959608204286487e-06, "loss": 0.3104, "step": 25563 }, { "epoch": 4.173054161054651, "grad_norm": 3.7800421714782715, "learning_rate": 9.958967068242192e-06, "loss": 0.3727, "step": 25564 }, { "epoch": 4.173217419697155, "grad_norm": 3.5970001220703125, "learning_rate": 9.958325932366568e-06, "loss": 0.3951, "step": 25565 }, { "epoch": 4.17338067833966, "grad_norm": 3.794055700302124, "learning_rate": 9.957684796662248e-06, "loss": 0.4287, "step": 25566 }, { "epoch": 4.173543936982164, "grad_norm": 4.021335124969482, "learning_rate": 9.957043661131872e-06, "loss": 0.359, "step": 25567 }, { "epoch": 4.173707195624669, "grad_norm": 3.85919189453125, "learning_rate": 9.956402525778072e-06, "loss": 0.5382, "step": 25568 }, { "epoch": 4.173870454267173, "grad_norm": 3.974245548248291, "learning_rate": 9.955761390603484e-06, "loss": 0.4119, "step": 25569 }, { "epoch": 4.174033712909677, "grad_norm": 3.997098922729492, "learning_rate": 9.955120255610746e-06, "loss": 0.3675, "step": 25570 }, { "epoch": 4.174196971552181, "grad_norm": 3.4627034664154053, "learning_rate": 9.954479120802492e-06, "loss": 0.3806, "step": 25571 }, { "epoch": 4.174360230194686, "grad_norm": 3.1464574337005615, "learning_rate": 9.953837986181356e-06, "loss": 0.3358, "step": 25572 }, { "epoch": 4.17452348883719, "grad_norm": 3.148074150085449, "learning_rate": 9.953196851749974e-06, "loss": 0.3175, "step": 25573 }, { "epoch": 4.1746867474796945, "grad_norm": 3.193178653717041, "learning_rate": 9.952555717510985e-06, "loss": 0.3826, "step": 25574 }, { "epoch": 4.174850006122199, "grad_norm": 3.159148931503296, "learning_rate": 9.951914583467021e-06, "loss": 0.2924, "step": 25575 }, { "epoch": 4.175013264764703, "grad_norm": 3.4193434715270996, "learning_rate": 9.951273449620716e-06, "loss": 0.422, "step": 25576 }, { "epoch": 4.175176523407208, "grad_norm": 3.0311222076416016, "learning_rate": 9.95063231597471e-06, "loss": 0.3456, "step": 25577 }, { "epoch": 4.175339782049712, "grad_norm": 2.828744649887085, "learning_rate": 9.949991182531633e-06, "loss": 0.3358, "step": 25578 }, { "epoch": 4.175503040692217, "grad_norm": 3.530245780944824, "learning_rate": 9.949350049294124e-06, "loss": 0.3997, "step": 25579 }, { "epoch": 4.175666299334721, "grad_norm": 3.6181843280792236, "learning_rate": 9.948708916264818e-06, "loss": 0.4072, "step": 25580 }, { "epoch": 4.1758295579772255, "grad_norm": 3.3487613201141357, "learning_rate": 9.948067783446348e-06, "loss": 0.3152, "step": 25581 }, { "epoch": 4.17599281661973, "grad_norm": 3.8630151748657227, "learning_rate": 9.947426650841355e-06, "loss": 0.3598, "step": 25582 }, { "epoch": 4.176156075262234, "grad_norm": 3.351262331008911, "learning_rate": 9.94678551845247e-06, "loss": 0.3705, "step": 25583 }, { "epoch": 4.176319333904739, "grad_norm": 3.8260421752929688, "learning_rate": 9.946144386282335e-06, "loss": 0.4707, "step": 25584 }, { "epoch": 4.176482592547243, "grad_norm": 2.5701425075531006, "learning_rate": 9.945503254333572e-06, "loss": 0.2813, "step": 25585 }, { "epoch": 4.176645851189748, "grad_norm": 4.6503801345825195, "learning_rate": 9.944862122608829e-06, "loss": 0.4291, "step": 25586 }, { "epoch": 4.176809109832252, "grad_norm": 3.2522895336151123, "learning_rate": 9.944220991110734e-06, "loss": 0.3844, "step": 25587 }, { "epoch": 4.176972368474756, "grad_norm": 3.0682027339935303, "learning_rate": 9.943579859841927e-06, "loss": 0.3549, "step": 25588 }, { "epoch": 4.17713562711726, "grad_norm": 3.6538679599761963, "learning_rate": 9.942938728805042e-06, "loss": 0.4336, "step": 25589 }, { "epoch": 4.177298885759765, "grad_norm": 2.8940885066986084, "learning_rate": 9.942297598002714e-06, "loss": 0.3147, "step": 25590 }, { "epoch": 4.177462144402269, "grad_norm": 3.9508585929870605, "learning_rate": 9.941656467437581e-06, "loss": 0.375, "step": 25591 }, { "epoch": 4.1776254030447735, "grad_norm": 2.9752297401428223, "learning_rate": 9.94101533711227e-06, "loss": 0.338, "step": 25592 }, { "epoch": 4.177788661687278, "grad_norm": 2.8651554584503174, "learning_rate": 9.940374207029433e-06, "loss": 0.3603, "step": 25593 }, { "epoch": 4.177951920329782, "grad_norm": 3.6290981769561768, "learning_rate": 9.939733077191689e-06, "loss": 0.4032, "step": 25594 }, { "epoch": 4.178115178972287, "grad_norm": 3.426173448562622, "learning_rate": 9.93909194760168e-06, "loss": 0.3566, "step": 25595 }, { "epoch": 4.178278437614791, "grad_norm": 3.9048895835876465, "learning_rate": 9.938450818262038e-06, "loss": 0.3877, "step": 25596 }, { "epoch": 4.178441696257296, "grad_norm": 3.786428689956665, "learning_rate": 9.937809689175404e-06, "loss": 0.3995, "step": 25597 }, { "epoch": 4.1786049548998, "grad_norm": 2.8954946994781494, "learning_rate": 9.937168560344412e-06, "loss": 0.2649, "step": 25598 }, { "epoch": 4.1787682135423045, "grad_norm": 3.351085901260376, "learning_rate": 9.936527431771695e-06, "loss": 0.3825, "step": 25599 }, { "epoch": 4.178931472184809, "grad_norm": 3.2404863834381104, "learning_rate": 9.93588630345989e-06, "loss": 0.3551, "step": 25600 }, { "epoch": 4.179094730827313, "grad_norm": 3.5112714767456055, "learning_rate": 9.935245175411632e-06, "loss": 0.3772, "step": 25601 }, { "epoch": 4.179257989469818, "grad_norm": 3.2049789428710938, "learning_rate": 9.934604047629556e-06, "loss": 0.3026, "step": 25602 }, { "epoch": 4.179421248112322, "grad_norm": 3.4237945079803467, "learning_rate": 9.933962920116303e-06, "loss": 0.3625, "step": 25603 }, { "epoch": 4.179584506754827, "grad_norm": 3.6587002277374268, "learning_rate": 9.933321792874499e-06, "loss": 0.3919, "step": 25604 }, { "epoch": 4.17974776539733, "grad_norm": 3.3057634830474854, "learning_rate": 9.932680665906784e-06, "loss": 0.3672, "step": 25605 }, { "epoch": 4.179911024039835, "grad_norm": 3.325568914413452, "learning_rate": 9.932039539215795e-06, "loss": 0.3119, "step": 25606 }, { "epoch": 4.180074282682339, "grad_norm": 2.8225276470184326, "learning_rate": 9.931398412804164e-06, "loss": 0.3319, "step": 25607 }, { "epoch": 4.180237541324844, "grad_norm": 3.59177565574646, "learning_rate": 9.930757286674525e-06, "loss": 0.4055, "step": 25608 }, { "epoch": 4.180400799967348, "grad_norm": 3.3794190883636475, "learning_rate": 9.93011616082952e-06, "loss": 0.3484, "step": 25609 }, { "epoch": 4.1805640586098525, "grad_norm": 3.213008403778076, "learning_rate": 9.929475035271784e-06, "loss": 0.3812, "step": 25610 }, { "epoch": 4.180727317252357, "grad_norm": 3.3117785453796387, "learning_rate": 9.928833910003947e-06, "loss": 0.3462, "step": 25611 }, { "epoch": 4.180890575894861, "grad_norm": 2.9632086753845215, "learning_rate": 9.928192785028647e-06, "loss": 0.3881, "step": 25612 }, { "epoch": 4.181053834537366, "grad_norm": 3.4881725311279297, "learning_rate": 9.927551660348521e-06, "loss": 0.377, "step": 25613 }, { "epoch": 4.18121709317987, "grad_norm": 4.296226501464844, "learning_rate": 9.926910535966202e-06, "loss": 0.4035, "step": 25614 }, { "epoch": 4.181380351822375, "grad_norm": 4.475615501403809, "learning_rate": 9.926269411884325e-06, "loss": 0.3947, "step": 25615 }, { "epoch": 4.181543610464879, "grad_norm": 4.106762409210205, "learning_rate": 9.925628288105526e-06, "loss": 0.4223, "step": 25616 }, { "epoch": 4.1817068691073835, "grad_norm": 3.5548062324523926, "learning_rate": 9.924987164632443e-06, "loss": 0.3891, "step": 25617 }, { "epoch": 4.181870127749888, "grad_norm": 3.9669041633605957, "learning_rate": 9.92434604146771e-06, "loss": 0.3895, "step": 25618 }, { "epoch": 4.182033386392392, "grad_norm": 3.3820278644561768, "learning_rate": 9.923704918613956e-06, "loss": 0.3502, "step": 25619 }, { "epoch": 4.182196645034897, "grad_norm": 2.949251413345337, "learning_rate": 9.923063796073826e-06, "loss": 0.3441, "step": 25620 }, { "epoch": 4.182359903677401, "grad_norm": 3.3414292335510254, "learning_rate": 9.922422673849953e-06, "loss": 0.3655, "step": 25621 }, { "epoch": 4.182523162319906, "grad_norm": 3.1704561710357666, "learning_rate": 9.92178155194497e-06, "loss": 0.3842, "step": 25622 }, { "epoch": 4.182686420962409, "grad_norm": 3.08665132522583, "learning_rate": 9.921140430361516e-06, "loss": 0.3455, "step": 25623 }, { "epoch": 4.182849679604914, "grad_norm": 3.9420671463012695, "learning_rate": 9.920499309102221e-06, "loss": 0.4213, "step": 25624 }, { "epoch": 4.183012938247418, "grad_norm": 2.881483554840088, "learning_rate": 9.919858188169724e-06, "loss": 0.3511, "step": 25625 }, { "epoch": 4.183176196889923, "grad_norm": 3.6161749362945557, "learning_rate": 9.919217067566659e-06, "loss": 0.3478, "step": 25626 }, { "epoch": 4.183339455532427, "grad_norm": 3.1356825828552246, "learning_rate": 9.918575947295665e-06, "loss": 0.3516, "step": 25627 }, { "epoch": 4.1835027141749315, "grad_norm": 3.194932460784912, "learning_rate": 9.917934827359372e-06, "loss": 0.3835, "step": 25628 }, { "epoch": 4.183665972817436, "grad_norm": 3.5193700790405273, "learning_rate": 9.917293707760419e-06, "loss": 0.3836, "step": 25629 }, { "epoch": 4.18382923145994, "grad_norm": 3.1015753746032715, "learning_rate": 9.916652588501439e-06, "loss": 0.4041, "step": 25630 }, { "epoch": 4.183992490102445, "grad_norm": 3.7004096508026123, "learning_rate": 9.916011469585067e-06, "loss": 0.4198, "step": 25631 }, { "epoch": 4.184155748744949, "grad_norm": 3.5075955390930176, "learning_rate": 9.915370351013943e-06, "loss": 0.3617, "step": 25632 }, { "epoch": 4.184319007387454, "grad_norm": 2.638561725616455, "learning_rate": 9.914729232790703e-06, "loss": 0.3219, "step": 25633 }, { "epoch": 4.184482266029958, "grad_norm": 3.0109925270080566, "learning_rate": 9.914088114917974e-06, "loss": 0.3967, "step": 25634 }, { "epoch": 4.1846455246724625, "grad_norm": 3.5151777267456055, "learning_rate": 9.913446997398397e-06, "loss": 0.3731, "step": 25635 }, { "epoch": 4.184808783314967, "grad_norm": 3.7876899242401123, "learning_rate": 9.912805880234606e-06, "loss": 0.3617, "step": 25636 }, { "epoch": 4.184972041957471, "grad_norm": 2.680424451828003, "learning_rate": 9.912164763429238e-06, "loss": 0.305, "step": 25637 }, { "epoch": 4.185135300599976, "grad_norm": 3.4878926277160645, "learning_rate": 9.911523646984929e-06, "loss": 0.4063, "step": 25638 }, { "epoch": 4.18529855924248, "grad_norm": 3.457369089126587, "learning_rate": 9.910882530904311e-06, "loss": 0.3331, "step": 25639 }, { "epoch": 4.185461817884985, "grad_norm": 3.684476852416992, "learning_rate": 9.910241415190022e-06, "loss": 0.39, "step": 25640 }, { "epoch": 4.185625076527488, "grad_norm": 3.0988378524780273, "learning_rate": 9.909600299844695e-06, "loss": 0.3426, "step": 25641 }, { "epoch": 4.185788335169993, "grad_norm": 3.2614622116088867, "learning_rate": 9.908959184870972e-06, "loss": 0.3813, "step": 25642 }, { "epoch": 4.185951593812497, "grad_norm": 4.87934684753418, "learning_rate": 9.90831807027148e-06, "loss": 0.4321, "step": 25643 }, { "epoch": 4.186114852455002, "grad_norm": 3.881988048553467, "learning_rate": 9.907676956048857e-06, "loss": 0.4452, "step": 25644 }, { "epoch": 4.186278111097506, "grad_norm": 3.755492925643921, "learning_rate": 9.90703584220574e-06, "loss": 0.4699, "step": 25645 }, { "epoch": 4.1864413697400105, "grad_norm": 4.776631832122803, "learning_rate": 9.906394728744761e-06, "loss": 0.392, "step": 25646 }, { "epoch": 4.186604628382515, "grad_norm": 3.2520835399627686, "learning_rate": 9.905753615668561e-06, "loss": 0.3966, "step": 25647 }, { "epoch": 4.186767887025019, "grad_norm": 3.69476318359375, "learning_rate": 9.90511250297977e-06, "loss": 0.3414, "step": 25648 }, { "epoch": 4.186931145667524, "grad_norm": 3.421050548553467, "learning_rate": 9.904471390681028e-06, "loss": 0.3338, "step": 25649 }, { "epoch": 4.187094404310028, "grad_norm": 3.394491672515869, "learning_rate": 9.903830278774967e-06, "loss": 0.393, "step": 25650 }, { "epoch": 4.187257662952533, "grad_norm": 2.6888809204101562, "learning_rate": 9.903189167264225e-06, "loss": 0.3699, "step": 25651 }, { "epoch": 4.187420921595037, "grad_norm": 3.5596346855163574, "learning_rate": 9.902548056151437e-06, "loss": 0.3941, "step": 25652 }, { "epoch": 4.1875841802375415, "grad_norm": 3.795071840286255, "learning_rate": 9.901906945439233e-06, "loss": 0.4357, "step": 25653 }, { "epoch": 4.187747438880046, "grad_norm": 3.986060380935669, "learning_rate": 9.901265835130254e-06, "loss": 0.3969, "step": 25654 }, { "epoch": 4.18791069752255, "grad_norm": 3.4846270084381104, "learning_rate": 9.900624725227135e-06, "loss": 0.3738, "step": 25655 }, { "epoch": 4.188073956165055, "grad_norm": 3.739905595779419, "learning_rate": 9.899983615732509e-06, "loss": 0.3884, "step": 25656 }, { "epoch": 4.188237214807559, "grad_norm": 3.5051701068878174, "learning_rate": 9.899342506649014e-06, "loss": 0.3866, "step": 25657 }, { "epoch": 4.188400473450063, "grad_norm": 3.676621675491333, "learning_rate": 9.89870139797928e-06, "loss": 0.4209, "step": 25658 }, { "epoch": 4.188563732092567, "grad_norm": 3.0760550498962402, "learning_rate": 9.898060289725949e-06, "loss": 0.3413, "step": 25659 }, { "epoch": 4.188726990735072, "grad_norm": 3.152069091796875, "learning_rate": 9.897419181891655e-06, "loss": 0.2707, "step": 25660 }, { "epoch": 4.188890249377576, "grad_norm": 2.635551691055298, "learning_rate": 9.896778074479029e-06, "loss": 0.2929, "step": 25661 }, { "epoch": 4.189053508020081, "grad_norm": 3.6949214935302734, "learning_rate": 9.896136967490714e-06, "loss": 0.3998, "step": 25662 }, { "epoch": 4.189216766662585, "grad_norm": 3.330721616744995, "learning_rate": 9.895495860929338e-06, "loss": 0.3009, "step": 25663 }, { "epoch": 4.1893800253050895, "grad_norm": 4.148597240447998, "learning_rate": 9.894854754797539e-06, "loss": 0.305, "step": 25664 }, { "epoch": 4.189543283947594, "grad_norm": 3.9909920692443848, "learning_rate": 9.894213649097953e-06, "loss": 0.4536, "step": 25665 }, { "epoch": 4.189706542590098, "grad_norm": 3.5310888290405273, "learning_rate": 9.893572543833212e-06, "loss": 0.4232, "step": 25666 }, { "epoch": 4.189869801232603, "grad_norm": 3.2561588287353516, "learning_rate": 9.892931439005958e-06, "loss": 0.3056, "step": 25667 }, { "epoch": 4.190033059875107, "grad_norm": 3.8128244876861572, "learning_rate": 9.89229033461882e-06, "loss": 0.3756, "step": 25668 }, { "epoch": 4.190196318517612, "grad_norm": 3.215339183807373, "learning_rate": 9.891649230674433e-06, "loss": 0.3585, "step": 25669 }, { "epoch": 4.190359577160116, "grad_norm": 3.3104889392852783, "learning_rate": 9.89100812717544e-06, "loss": 0.3494, "step": 25670 }, { "epoch": 4.1905228358026205, "grad_norm": 3.5597777366638184, "learning_rate": 9.89036702412447e-06, "loss": 0.4049, "step": 25671 }, { "epoch": 4.190686094445125, "grad_norm": 6.025850772857666, "learning_rate": 9.889725921524163e-06, "loss": 0.4047, "step": 25672 }, { "epoch": 4.190849353087629, "grad_norm": 3.547316312789917, "learning_rate": 9.889084819377146e-06, "loss": 0.3672, "step": 25673 }, { "epoch": 4.191012611730134, "grad_norm": 3.179658889770508, "learning_rate": 9.888443717686062e-06, "loss": 0.3477, "step": 25674 }, { "epoch": 4.191175870372638, "grad_norm": 3.9502689838409424, "learning_rate": 9.887802616453543e-06, "loss": 0.4373, "step": 25675 }, { "epoch": 4.191339129015142, "grad_norm": 3.021146535873413, "learning_rate": 9.887161515682224e-06, "loss": 0.353, "step": 25676 }, { "epoch": 4.191502387657646, "grad_norm": 4.009178161621094, "learning_rate": 9.886520415374743e-06, "loss": 0.4093, "step": 25677 }, { "epoch": 4.191665646300151, "grad_norm": 3.250825881958008, "learning_rate": 9.885879315533733e-06, "loss": 0.3496, "step": 25678 }, { "epoch": 4.191828904942655, "grad_norm": 3.4293949604034424, "learning_rate": 9.88523821616183e-06, "loss": 0.3681, "step": 25679 }, { "epoch": 4.19199216358516, "grad_norm": 3.2145705223083496, "learning_rate": 9.884597117261668e-06, "loss": 0.3394, "step": 25680 }, { "epoch": 4.192155422227664, "grad_norm": 3.159619092941284, "learning_rate": 9.883956018835884e-06, "loss": 0.3378, "step": 25681 }, { "epoch": 4.1923186808701685, "grad_norm": 3.504978895187378, "learning_rate": 9.88331492088712e-06, "loss": 0.3649, "step": 25682 }, { "epoch": 4.192481939512673, "grad_norm": 2.870326519012451, "learning_rate": 9.882673823417998e-06, "loss": 0.3332, "step": 25683 }, { "epoch": 4.192645198155177, "grad_norm": 3.399350166320801, "learning_rate": 9.88203272643116e-06, "loss": 0.3695, "step": 25684 }, { "epoch": 4.192808456797682, "grad_norm": 3.478543758392334, "learning_rate": 9.881391629929238e-06, "loss": 0.3817, "step": 25685 }, { "epoch": 4.192971715440186, "grad_norm": 3.4151763916015625, "learning_rate": 9.880750533914874e-06, "loss": 0.3617, "step": 25686 }, { "epoch": 4.193134974082691, "grad_norm": 3.9892778396606445, "learning_rate": 9.880109438390698e-06, "loss": 0.4004, "step": 25687 }, { "epoch": 4.193298232725195, "grad_norm": 4.016403675079346, "learning_rate": 9.879468343359349e-06, "loss": 0.4485, "step": 25688 }, { "epoch": 4.1934614913676995, "grad_norm": 4.392478942871094, "learning_rate": 9.878827248823458e-06, "loss": 0.4614, "step": 25689 }, { "epoch": 4.193624750010204, "grad_norm": 4.156605243682861, "learning_rate": 9.878186154785662e-06, "loss": 0.3782, "step": 25690 }, { "epoch": 4.193788008652708, "grad_norm": 3.7983205318450928, "learning_rate": 9.877545061248601e-06, "loss": 0.4188, "step": 25691 }, { "epoch": 4.193951267295213, "grad_norm": 3.167332649230957, "learning_rate": 9.876903968214902e-06, "loss": 0.3715, "step": 25692 }, { "epoch": 4.194114525937717, "grad_norm": 4.04841423034668, "learning_rate": 9.876262875687206e-06, "loss": 0.402, "step": 25693 }, { "epoch": 4.194277784580221, "grad_norm": 3.5706303119659424, "learning_rate": 9.875621783668145e-06, "loss": 0.3928, "step": 25694 }, { "epoch": 4.194441043222725, "grad_norm": 3.8031246662139893, "learning_rate": 9.874980692160357e-06, "loss": 0.3635, "step": 25695 }, { "epoch": 4.19460430186523, "grad_norm": 3.5975160598754883, "learning_rate": 9.874339601166474e-06, "loss": 0.3296, "step": 25696 }, { "epoch": 4.194767560507734, "grad_norm": 3.604989767074585, "learning_rate": 9.873698510689137e-06, "loss": 0.4156, "step": 25697 }, { "epoch": 4.194930819150239, "grad_norm": 3.8171935081481934, "learning_rate": 9.873057420730975e-06, "loss": 0.506, "step": 25698 }, { "epoch": 4.195094077792743, "grad_norm": 4.115383148193359, "learning_rate": 9.872416331294626e-06, "loss": 0.4223, "step": 25699 }, { "epoch": 4.1952573364352475, "grad_norm": 3.6283013820648193, "learning_rate": 9.871775242382727e-06, "loss": 0.3216, "step": 25700 }, { "epoch": 4.195420595077752, "grad_norm": 3.8446848392486572, "learning_rate": 9.871134153997914e-06, "loss": 0.3917, "step": 25701 }, { "epoch": 4.195583853720256, "grad_norm": 3.613886594772339, "learning_rate": 9.870493066142816e-06, "loss": 0.3807, "step": 25702 }, { "epoch": 4.195747112362761, "grad_norm": 3.2390928268432617, "learning_rate": 9.869851978820074e-06, "loss": 0.347, "step": 25703 }, { "epoch": 4.195910371005265, "grad_norm": 3.2668135166168213, "learning_rate": 9.869210892032319e-06, "loss": 0.3466, "step": 25704 }, { "epoch": 4.19607362964777, "grad_norm": 3.20949387550354, "learning_rate": 9.86856980578219e-06, "loss": 0.368, "step": 25705 }, { "epoch": 4.196236888290274, "grad_norm": 3.223376989364624, "learning_rate": 9.867928720072321e-06, "loss": 0.3909, "step": 25706 }, { "epoch": 4.1964001469327785, "grad_norm": 3.1461374759674072, "learning_rate": 9.867287634905344e-06, "loss": 0.3514, "step": 25707 }, { "epoch": 4.196563405575283, "grad_norm": 3.0467772483825684, "learning_rate": 9.866646550283901e-06, "loss": 0.3595, "step": 25708 }, { "epoch": 4.196726664217787, "grad_norm": 4.071831226348877, "learning_rate": 9.866005466210623e-06, "loss": 0.407, "step": 25709 }, { "epoch": 4.196889922860292, "grad_norm": 4.094759941101074, "learning_rate": 9.865364382688146e-06, "loss": 0.4606, "step": 25710 }, { "epoch": 4.197053181502795, "grad_norm": 3.755983829498291, "learning_rate": 9.864723299719109e-06, "loss": 0.4058, "step": 25711 }, { "epoch": 4.1972164401453, "grad_norm": 3.782334327697754, "learning_rate": 9.864082217306137e-06, "loss": 0.376, "step": 25712 }, { "epoch": 4.197379698787804, "grad_norm": 3.9613349437713623, "learning_rate": 9.863441135451874e-06, "loss": 0.4528, "step": 25713 }, { "epoch": 4.197542957430309, "grad_norm": 3.3600544929504395, "learning_rate": 9.862800054158954e-06, "loss": 0.3799, "step": 25714 }, { "epoch": 4.197706216072813, "grad_norm": 4.150112152099609, "learning_rate": 9.862158973430011e-06, "loss": 0.7995, "step": 25715 }, { "epoch": 4.197869474715318, "grad_norm": 4.111082077026367, "learning_rate": 9.861517893267678e-06, "loss": 0.3836, "step": 25716 }, { "epoch": 4.198032733357822, "grad_norm": 3.808077812194824, "learning_rate": 9.860876813674594e-06, "loss": 0.4208, "step": 25717 }, { "epoch": 4.1981959920003264, "grad_norm": 2.9927334785461426, "learning_rate": 9.860235734653395e-06, "loss": 0.3659, "step": 25718 }, { "epoch": 4.198359250642831, "grad_norm": 3.651397943496704, "learning_rate": 9.859594656206709e-06, "loss": 0.4034, "step": 25719 }, { "epoch": 4.198522509285335, "grad_norm": 3.0364599227905273, "learning_rate": 9.858953578337181e-06, "loss": 0.3272, "step": 25720 }, { "epoch": 4.19868576792784, "grad_norm": 3.8823976516723633, "learning_rate": 9.858312501047445e-06, "loss": 0.3878, "step": 25721 }, { "epoch": 4.198849026570344, "grad_norm": 3.755254030227661, "learning_rate": 9.857671424340128e-06, "loss": 0.3882, "step": 25722 }, { "epoch": 4.199012285212849, "grad_norm": 3.3413407802581787, "learning_rate": 9.857030348217868e-06, "loss": 0.3912, "step": 25723 }, { "epoch": 4.199175543855353, "grad_norm": 3.7772927284240723, "learning_rate": 9.856389272683303e-06, "loss": 0.4031, "step": 25724 }, { "epoch": 4.1993388024978575, "grad_norm": 3.702789306640625, "learning_rate": 9.85574819773907e-06, "loss": 0.3869, "step": 25725 }, { "epoch": 4.199502061140362, "grad_norm": 3.2975456714630127, "learning_rate": 9.8551071233878e-06, "loss": 0.4027, "step": 25726 }, { "epoch": 4.199665319782866, "grad_norm": 4.016416549682617, "learning_rate": 9.854466049632132e-06, "loss": 0.4342, "step": 25727 }, { "epoch": 4.19982857842537, "grad_norm": 3.3090710639953613, "learning_rate": 9.853824976474695e-06, "loss": 0.3614, "step": 25728 }, { "epoch": 4.199991837067874, "grad_norm": 3.176439046859741, "learning_rate": 9.853183903918133e-06, "loss": 0.351, "step": 25729 }, { "epoch": 4.200155095710379, "grad_norm": 3.854471206665039, "learning_rate": 9.852542831965076e-06, "loss": 0.4245, "step": 25730 }, { "epoch": 4.200318354352883, "grad_norm": 3.542609214782715, "learning_rate": 9.851901760618157e-06, "loss": 0.3658, "step": 25731 }, { "epoch": 4.200481612995388, "grad_norm": 3.5444772243499756, "learning_rate": 9.851260689880016e-06, "loss": 0.3441, "step": 25732 }, { "epoch": 4.200644871637892, "grad_norm": 3.9034948348999023, "learning_rate": 9.850619619753286e-06, "loss": 0.4038, "step": 25733 }, { "epoch": 4.200808130280397, "grad_norm": 3.8780159950256348, "learning_rate": 9.849978550240599e-06, "loss": 0.4419, "step": 25734 }, { "epoch": 4.200971388922901, "grad_norm": 4.118142604827881, "learning_rate": 9.849337481344596e-06, "loss": 0.5285, "step": 25735 }, { "epoch": 4.2011346475654054, "grad_norm": 2.7524032592773438, "learning_rate": 9.848696413067909e-06, "loss": 0.2834, "step": 25736 }, { "epoch": 4.20129790620791, "grad_norm": 3.4578278064727783, "learning_rate": 9.848055345413175e-06, "loss": 0.3479, "step": 25737 }, { "epoch": 4.201461164850414, "grad_norm": 3.6549909114837646, "learning_rate": 9.847414278383028e-06, "loss": 0.3692, "step": 25738 }, { "epoch": 4.201624423492919, "grad_norm": 3.3340673446655273, "learning_rate": 9.846773211980103e-06, "loss": 0.407, "step": 25739 }, { "epoch": 4.201787682135423, "grad_norm": 3.3767919540405273, "learning_rate": 9.84613214620704e-06, "loss": 0.342, "step": 25740 }, { "epoch": 4.201950940777928, "grad_norm": 3.0037901401519775, "learning_rate": 9.845491081066463e-06, "loss": 0.3479, "step": 25741 }, { "epoch": 4.202114199420432, "grad_norm": 2.919433116912842, "learning_rate": 9.844850016561017e-06, "loss": 0.3484, "step": 25742 }, { "epoch": 4.2022774580629365, "grad_norm": 3.727858781814575, "learning_rate": 9.844208952693335e-06, "loss": 0.4001, "step": 25743 }, { "epoch": 4.202440716705441, "grad_norm": 3.197880983352661, "learning_rate": 9.843567889466048e-06, "loss": 0.4427, "step": 25744 }, { "epoch": 4.202603975347945, "grad_norm": 3.7925894260406494, "learning_rate": 9.842926826881796e-06, "loss": 0.3387, "step": 25745 }, { "epoch": 4.20276723399045, "grad_norm": 2.7607643604278564, "learning_rate": 9.84228576494321e-06, "loss": 0.2877, "step": 25746 }, { "epoch": 4.202930492632953, "grad_norm": 2.337167263031006, "learning_rate": 9.84164470365293e-06, "loss": 0.2793, "step": 25747 }, { "epoch": 4.203093751275458, "grad_norm": 3.6452527046203613, "learning_rate": 9.841003643013587e-06, "loss": 0.4087, "step": 25748 }, { "epoch": 4.203257009917962, "grad_norm": 2.634718894958496, "learning_rate": 9.84036258302782e-06, "loss": 0.3036, "step": 25749 }, { "epoch": 4.203420268560467, "grad_norm": 3.2424139976501465, "learning_rate": 9.839721523698265e-06, "loss": 0.3313, "step": 25750 }, { "epoch": 4.203583527202971, "grad_norm": 3.484750747680664, "learning_rate": 9.83908046502755e-06, "loss": 0.3661, "step": 25751 }, { "epoch": 4.203746785845476, "grad_norm": 3.728580951690674, "learning_rate": 9.838439407018317e-06, "loss": 0.3917, "step": 25752 }, { "epoch": 4.20391004448798, "grad_norm": 3.3566322326660156, "learning_rate": 9.837798349673197e-06, "loss": 0.388, "step": 25753 }, { "epoch": 4.204073303130484, "grad_norm": 2.9770522117614746, "learning_rate": 9.837157292994826e-06, "loss": 0.3579, "step": 25754 }, { "epoch": 4.204236561772989, "grad_norm": 4.028199195861816, "learning_rate": 9.836516236985841e-06, "loss": 0.4059, "step": 25755 }, { "epoch": 4.204399820415493, "grad_norm": 3.2393088340759277, "learning_rate": 9.835875181648875e-06, "loss": 0.3748, "step": 25756 }, { "epoch": 4.204563079057998, "grad_norm": 3.2870261669158936, "learning_rate": 9.835234126986564e-06, "loss": 0.3425, "step": 25757 }, { "epoch": 4.204726337700502, "grad_norm": 3.539407730102539, "learning_rate": 9.834593073001545e-06, "loss": 0.3981, "step": 25758 }, { "epoch": 4.204889596343007, "grad_norm": 2.9469244480133057, "learning_rate": 9.833952019696448e-06, "loss": 0.3546, "step": 25759 }, { "epoch": 4.205052854985511, "grad_norm": 3.2909445762634277, "learning_rate": 9.83331096707392e-06, "loss": 0.3279, "step": 25760 }, { "epoch": 4.2052161136280155, "grad_norm": 3.415555477142334, "learning_rate": 9.832669915136578e-06, "loss": 0.3837, "step": 25761 }, { "epoch": 4.20537937227052, "grad_norm": 3.5446176528930664, "learning_rate": 9.832028863887072e-06, "loss": 0.3789, "step": 25762 }, { "epoch": 4.205542630913024, "grad_norm": 2.9847006797790527, "learning_rate": 9.83138781332803e-06, "loss": 0.3445, "step": 25763 }, { "epoch": 4.205705889555528, "grad_norm": 3.4812686443328857, "learning_rate": 9.83074676346209e-06, "loss": 0.3546, "step": 25764 }, { "epoch": 4.205869148198032, "grad_norm": 4.168532848358154, "learning_rate": 9.830105714291886e-06, "loss": 0.4224, "step": 25765 }, { "epoch": 4.206032406840537, "grad_norm": 3.6269917488098145, "learning_rate": 9.829464665820053e-06, "loss": 0.3903, "step": 25766 }, { "epoch": 4.206195665483041, "grad_norm": 4.60047721862793, "learning_rate": 9.828823618049226e-06, "loss": 0.4433, "step": 25767 }, { "epoch": 4.206358924125546, "grad_norm": 3.1208574771881104, "learning_rate": 9.828182570982041e-06, "loss": 0.2976, "step": 25768 }, { "epoch": 4.20652218276805, "grad_norm": 3.507610321044922, "learning_rate": 9.82754152462113e-06, "loss": 0.3529, "step": 25769 }, { "epoch": 4.206685441410555, "grad_norm": 2.8740344047546387, "learning_rate": 9.826900478969138e-06, "loss": 0.2975, "step": 25770 }, { "epoch": 4.206848700053059, "grad_norm": 2.5789432525634766, "learning_rate": 9.826259434028688e-06, "loss": 0.3266, "step": 25771 }, { "epoch": 4.207011958695563, "grad_norm": 3.7329795360565186, "learning_rate": 9.82561838980242e-06, "loss": 0.4031, "step": 25772 }, { "epoch": 4.207175217338068, "grad_norm": 3.6957006454467773, "learning_rate": 9.824977346292965e-06, "loss": 0.3889, "step": 25773 }, { "epoch": 4.207338475980572, "grad_norm": 3.259279251098633, "learning_rate": 9.824336303502966e-06, "loss": 0.371, "step": 25774 }, { "epoch": 4.207501734623077, "grad_norm": 3.521211624145508, "learning_rate": 9.823695261435054e-06, "loss": 0.4179, "step": 25775 }, { "epoch": 4.207664993265581, "grad_norm": 3.0351333618164062, "learning_rate": 9.823054220091864e-06, "loss": 0.3815, "step": 25776 }, { "epoch": 4.207828251908086, "grad_norm": 3.1460161209106445, "learning_rate": 9.82241317947603e-06, "loss": 0.374, "step": 25777 }, { "epoch": 4.20799151055059, "grad_norm": 3.525207757949829, "learning_rate": 9.821772139590191e-06, "loss": 0.4166, "step": 25778 }, { "epoch": 4.2081547691930945, "grad_norm": 3.9536173343658447, "learning_rate": 9.82113110043698e-06, "loss": 0.3772, "step": 25779 }, { "epoch": 4.208318027835599, "grad_norm": 3.424158811569214, "learning_rate": 9.82049006201903e-06, "loss": 0.3606, "step": 25780 }, { "epoch": 4.2084812864781025, "grad_norm": 3.3405849933624268, "learning_rate": 9.819849024338978e-06, "loss": 0.4168, "step": 25781 }, { "epoch": 4.208644545120607, "grad_norm": 3.4661741256713867, "learning_rate": 9.819207987399458e-06, "loss": 0.3998, "step": 25782 }, { "epoch": 4.208807803763111, "grad_norm": 2.785484552383423, "learning_rate": 9.818566951203105e-06, "loss": 0.284, "step": 25783 }, { "epoch": 4.208971062405616, "grad_norm": 2.894256114959717, "learning_rate": 9.817925915752554e-06, "loss": 0.338, "step": 25784 }, { "epoch": 4.20913432104812, "grad_norm": 2.7609472274780273, "learning_rate": 9.81728488105044e-06, "loss": 0.3092, "step": 25785 }, { "epoch": 4.209297579690625, "grad_norm": 2.8939507007598877, "learning_rate": 9.816643847099402e-06, "loss": 0.3284, "step": 25786 }, { "epoch": 4.209460838333129, "grad_norm": 3.8025975227355957, "learning_rate": 9.816002813902071e-06, "loss": 0.4221, "step": 25787 }, { "epoch": 4.209624096975634, "grad_norm": 3.2868387699127197, "learning_rate": 9.815361781461082e-06, "loss": 0.357, "step": 25788 }, { "epoch": 4.209787355618138, "grad_norm": 4.071714401245117, "learning_rate": 9.814720749779075e-06, "loss": 0.4417, "step": 25789 }, { "epoch": 4.209950614260642, "grad_norm": 3.0620055198669434, "learning_rate": 9.814079718858677e-06, "loss": 0.3531, "step": 25790 }, { "epoch": 4.210113872903147, "grad_norm": 4.203248500823975, "learning_rate": 9.813438688702529e-06, "loss": 0.4505, "step": 25791 }, { "epoch": 4.210277131545651, "grad_norm": 3.2556002140045166, "learning_rate": 9.812797659313262e-06, "loss": 0.3563, "step": 25792 }, { "epoch": 4.210440390188156, "grad_norm": 2.730053186416626, "learning_rate": 9.812156630693514e-06, "loss": 0.3777, "step": 25793 }, { "epoch": 4.21060364883066, "grad_norm": 4.512734413146973, "learning_rate": 9.81151560284592e-06, "loss": 0.8767, "step": 25794 }, { "epoch": 4.210766907473165, "grad_norm": 3.388366460800171, "learning_rate": 9.810874575773114e-06, "loss": 0.4077, "step": 25795 }, { "epoch": 4.210930166115669, "grad_norm": 3.4737861156463623, "learning_rate": 9.810233549477728e-06, "loss": 0.3773, "step": 25796 }, { "epoch": 4.2110934247581735, "grad_norm": 3.316270112991333, "learning_rate": 9.809592523962404e-06, "loss": 0.3947, "step": 25797 }, { "epoch": 4.211256683400678, "grad_norm": 4.082653999328613, "learning_rate": 9.808951499229772e-06, "loss": 0.4212, "step": 25798 }, { "epoch": 4.2114199420431815, "grad_norm": 3.7004332542419434, "learning_rate": 9.808310475282472e-06, "loss": 0.4215, "step": 25799 }, { "epoch": 4.211583200685686, "grad_norm": 3.2398626804351807, "learning_rate": 9.80766945212313e-06, "loss": 0.3172, "step": 25800 }, { "epoch": 4.21174645932819, "grad_norm": 3.93093204498291, "learning_rate": 9.807028429754388e-06, "loss": 0.3591, "step": 25801 }, { "epoch": 4.211909717970695, "grad_norm": 3.4400269985198975, "learning_rate": 9.80638740817888e-06, "loss": 0.3911, "step": 25802 }, { "epoch": 4.212072976613199, "grad_norm": 3.5636682510375977, "learning_rate": 9.805746387399239e-06, "loss": 0.3962, "step": 25803 }, { "epoch": 4.212236235255704, "grad_norm": 2.9312336444854736, "learning_rate": 9.805105367418102e-06, "loss": 0.3517, "step": 25804 }, { "epoch": 4.212399493898208, "grad_norm": 3.9296770095825195, "learning_rate": 9.804464348238102e-06, "loss": 0.4256, "step": 25805 }, { "epoch": 4.2125627525407126, "grad_norm": 4.233404636383057, "learning_rate": 9.803823329861877e-06, "loss": 0.3838, "step": 25806 }, { "epoch": 4.212726011183217, "grad_norm": 3.0839641094207764, "learning_rate": 9.803182312292056e-06, "loss": 0.363, "step": 25807 }, { "epoch": 4.212889269825721, "grad_norm": 3.6317646503448486, "learning_rate": 9.802541295531283e-06, "loss": 0.4412, "step": 25808 }, { "epoch": 4.213052528468226, "grad_norm": 3.506761312484741, "learning_rate": 9.801900279582191e-06, "loss": 0.4048, "step": 25809 }, { "epoch": 4.21321578711073, "grad_norm": 3.914740800857544, "learning_rate": 9.801259264447408e-06, "loss": 0.3974, "step": 25810 }, { "epoch": 4.213379045753235, "grad_norm": 3.198694944381714, "learning_rate": 9.80061825012957e-06, "loss": 0.3446, "step": 25811 }, { "epoch": 4.213542304395739, "grad_norm": 2.7241084575653076, "learning_rate": 9.799977236631318e-06, "loss": 0.3159, "step": 25812 }, { "epoch": 4.213705563038244, "grad_norm": 4.266925811767578, "learning_rate": 9.799336223955284e-06, "loss": 0.4459, "step": 25813 }, { "epoch": 4.213868821680748, "grad_norm": 3.2963905334472656, "learning_rate": 9.798695212104104e-06, "loss": 0.4041, "step": 25814 }, { "epoch": 4.2140320803232525, "grad_norm": 2.965419054031372, "learning_rate": 9.798054201080412e-06, "loss": 0.3962, "step": 25815 }, { "epoch": 4.214195338965757, "grad_norm": 3.9157474040985107, "learning_rate": 9.797413190886841e-06, "loss": 0.4745, "step": 25816 }, { "epoch": 4.2143585976082605, "grad_norm": 3.8627419471740723, "learning_rate": 9.79677218152603e-06, "loss": 0.4651, "step": 25817 }, { "epoch": 4.214521856250765, "grad_norm": 2.9007785320281982, "learning_rate": 9.796131173000612e-06, "loss": 0.3351, "step": 25818 }, { "epoch": 4.214685114893269, "grad_norm": 3.111086845397949, "learning_rate": 9.79549016531322e-06, "loss": 0.3046, "step": 25819 }, { "epoch": 4.214848373535774, "grad_norm": 3.897455930709839, "learning_rate": 9.794849158466492e-06, "loss": 0.4005, "step": 25820 }, { "epoch": 4.215011632178278, "grad_norm": 3.972038984298706, "learning_rate": 9.794208152463061e-06, "loss": 0.4142, "step": 25821 }, { "epoch": 4.215174890820783, "grad_norm": 3.4428884983062744, "learning_rate": 9.793567147305562e-06, "loss": 0.3528, "step": 25822 }, { "epoch": 4.215338149463287, "grad_norm": 3.2302756309509277, "learning_rate": 9.79292614299663e-06, "loss": 0.3383, "step": 25823 }, { "epoch": 4.2155014081057915, "grad_norm": 3.8924973011016846, "learning_rate": 9.7922851395389e-06, "loss": 0.4248, "step": 25824 }, { "epoch": 4.215664666748296, "grad_norm": 2.637373208999634, "learning_rate": 9.79164413693501e-06, "loss": 0.3201, "step": 25825 }, { "epoch": 4.2158279253908, "grad_norm": 3.698322296142578, "learning_rate": 9.791003135187592e-06, "loss": 0.3724, "step": 25826 }, { "epoch": 4.215991184033305, "grad_norm": 3.5580663681030273, "learning_rate": 9.79036213429928e-06, "loss": 0.3933, "step": 25827 }, { "epoch": 4.216154442675809, "grad_norm": 2.875703811645508, "learning_rate": 9.789721134272711e-06, "loss": 0.3507, "step": 25828 }, { "epoch": 4.216317701318314, "grad_norm": 3.4729111194610596, "learning_rate": 9.78908013511052e-06, "loss": 0.3628, "step": 25829 }, { "epoch": 4.216480959960818, "grad_norm": 3.873671054840088, "learning_rate": 9.78843913681534e-06, "loss": 0.4604, "step": 25830 }, { "epoch": 4.216644218603323, "grad_norm": 3.720536708831787, "learning_rate": 9.787798139389804e-06, "loss": 0.4487, "step": 25831 }, { "epoch": 4.216807477245827, "grad_norm": 2.931194305419922, "learning_rate": 9.787157142836552e-06, "loss": 0.3145, "step": 25832 }, { "epoch": 4.2169707358883315, "grad_norm": 3.470407247543335, "learning_rate": 9.786516147158218e-06, "loss": 0.3648, "step": 25833 }, { "epoch": 4.217133994530835, "grad_norm": 3.4956626892089844, "learning_rate": 9.785875152357431e-06, "loss": 0.3882, "step": 25834 }, { "epoch": 4.2172972531733395, "grad_norm": 3.289681911468506, "learning_rate": 9.785234158436834e-06, "loss": 0.389, "step": 25835 }, { "epoch": 4.217460511815844, "grad_norm": 3.3379859924316406, "learning_rate": 9.784593165399057e-06, "loss": 0.3759, "step": 25836 }, { "epoch": 4.217623770458348, "grad_norm": 4.542492866516113, "learning_rate": 9.783952173246737e-06, "loss": 0.4269, "step": 25837 }, { "epoch": 4.217787029100853, "grad_norm": 3.577716588973999, "learning_rate": 9.78331118198251e-06, "loss": 0.3338, "step": 25838 }, { "epoch": 4.217950287743357, "grad_norm": 3.382481575012207, "learning_rate": 9.782670191609007e-06, "loss": 0.3976, "step": 25839 }, { "epoch": 4.218113546385862, "grad_norm": 3.5045559406280518, "learning_rate": 9.782029202128865e-06, "loss": 0.3888, "step": 25840 }, { "epoch": 4.218276805028366, "grad_norm": 3.0677733421325684, "learning_rate": 9.781388213544719e-06, "loss": 0.4209, "step": 25841 }, { "epoch": 4.2184400636708705, "grad_norm": 3.478689670562744, "learning_rate": 9.780747225859202e-06, "loss": 0.3648, "step": 25842 }, { "epoch": 4.218603322313375, "grad_norm": 3.5822787284851074, "learning_rate": 9.78010623907495e-06, "loss": 0.3813, "step": 25843 }, { "epoch": 4.218766580955879, "grad_norm": 3.5072357654571533, "learning_rate": 9.7794652531946e-06, "loss": 0.3725, "step": 25844 }, { "epoch": 4.218929839598384, "grad_norm": 2.863379716873169, "learning_rate": 9.778824268220783e-06, "loss": 0.3174, "step": 25845 }, { "epoch": 4.219093098240888, "grad_norm": 2.9542691707611084, "learning_rate": 9.778183284156136e-06, "loss": 0.3441, "step": 25846 }, { "epoch": 4.219256356883393, "grad_norm": 3.2206404209136963, "learning_rate": 9.777542301003296e-06, "loss": 0.3266, "step": 25847 }, { "epoch": 4.219419615525897, "grad_norm": 3.9244790077209473, "learning_rate": 9.776901318764899e-06, "loss": 0.3915, "step": 25848 }, { "epoch": 4.219582874168402, "grad_norm": 4.136700630187988, "learning_rate": 9.77626033744357e-06, "loss": 0.3843, "step": 25849 }, { "epoch": 4.219746132810906, "grad_norm": 3.0580122470855713, "learning_rate": 9.775619357041952e-06, "loss": 0.3928, "step": 25850 }, { "epoch": 4.2199093914534105, "grad_norm": 3.283630847930908, "learning_rate": 9.774978377562678e-06, "loss": 0.3888, "step": 25851 }, { "epoch": 4.220072650095914, "grad_norm": 3.9054384231567383, "learning_rate": 9.774337399008385e-06, "loss": 0.4183, "step": 25852 }, { "epoch": 4.2202359087384185, "grad_norm": 4.136890888214111, "learning_rate": 9.773696421381703e-06, "loss": 0.3765, "step": 25853 }, { "epoch": 4.220399167380923, "grad_norm": 3.1979517936706543, "learning_rate": 9.77305544468527e-06, "loss": 0.367, "step": 25854 }, { "epoch": 4.220562426023427, "grad_norm": 3.434390068054199, "learning_rate": 9.772414468921722e-06, "loss": 0.3763, "step": 25855 }, { "epoch": 4.220725684665932, "grad_norm": 3.7520785331726074, "learning_rate": 9.771773494093691e-06, "loss": 0.4177, "step": 25856 }, { "epoch": 4.220888943308436, "grad_norm": 3.3047807216644287, "learning_rate": 9.77113252020381e-06, "loss": 0.3564, "step": 25857 }, { "epoch": 4.221052201950941, "grad_norm": 3.680440902709961, "learning_rate": 9.770491547254725e-06, "loss": 0.4626, "step": 25858 }, { "epoch": 4.221215460593445, "grad_norm": 3.1204404830932617, "learning_rate": 9.769850575249058e-06, "loss": 0.3935, "step": 25859 }, { "epoch": 4.2213787192359495, "grad_norm": 3.3798868656158447, "learning_rate": 9.769209604189447e-06, "loss": 0.3675, "step": 25860 }, { "epoch": 4.221541977878454, "grad_norm": 3.5046558380126953, "learning_rate": 9.768568634078528e-06, "loss": 0.444, "step": 25861 }, { "epoch": 4.221705236520958, "grad_norm": 3.8476462364196777, "learning_rate": 9.767927664918938e-06, "loss": 0.4712, "step": 25862 }, { "epoch": 4.221868495163463, "grad_norm": 4.2203755378723145, "learning_rate": 9.767286696713309e-06, "loss": 0.4086, "step": 25863 }, { "epoch": 4.222031753805967, "grad_norm": 3.021040678024292, "learning_rate": 9.766645729464277e-06, "loss": 0.3438, "step": 25864 }, { "epoch": 4.222195012448472, "grad_norm": 3.477388620376587, "learning_rate": 9.766004763174475e-06, "loss": 0.4378, "step": 25865 }, { "epoch": 4.222358271090976, "grad_norm": 4.234224319458008, "learning_rate": 9.76536379784654e-06, "loss": 0.426, "step": 25866 }, { "epoch": 4.222521529733481, "grad_norm": 3.3941428661346436, "learning_rate": 9.764722833483107e-06, "loss": 0.3933, "step": 25867 }, { "epoch": 4.222684788375985, "grad_norm": 3.504312038421631, "learning_rate": 9.764081870086809e-06, "loss": 0.3942, "step": 25868 }, { "epoch": 4.2228480470184895, "grad_norm": 3.022573232650757, "learning_rate": 9.76344090766028e-06, "loss": 0.4005, "step": 25869 }, { "epoch": 4.223011305660993, "grad_norm": 3.5660266876220703, "learning_rate": 9.762799946206156e-06, "loss": 0.3619, "step": 25870 }, { "epoch": 4.2231745643034975, "grad_norm": 3.1745166778564453, "learning_rate": 9.762158985727072e-06, "loss": 0.3662, "step": 25871 }, { "epoch": 4.223337822946002, "grad_norm": 3.836566686630249, "learning_rate": 9.761518026225662e-06, "loss": 0.3874, "step": 25872 }, { "epoch": 4.223501081588506, "grad_norm": 3.2696032524108887, "learning_rate": 9.760877067704562e-06, "loss": 0.3716, "step": 25873 }, { "epoch": 4.223664340231011, "grad_norm": 3.216510534286499, "learning_rate": 9.760236110166406e-06, "loss": 0.3308, "step": 25874 }, { "epoch": 4.223827598873515, "grad_norm": 3.561805248260498, "learning_rate": 9.75959515361383e-06, "loss": 0.4145, "step": 25875 }, { "epoch": 4.22399085751602, "grad_norm": 3.2923643589019775, "learning_rate": 9.758954198049468e-06, "loss": 0.3675, "step": 25876 }, { "epoch": 4.224154116158524, "grad_norm": 3.2712032794952393, "learning_rate": 9.758313243475955e-06, "loss": 0.3857, "step": 25877 }, { "epoch": 4.2243173748010285, "grad_norm": 3.0117177963256836, "learning_rate": 9.757672289895923e-06, "loss": 0.3247, "step": 25878 }, { "epoch": 4.224480633443533, "grad_norm": 2.8905606269836426, "learning_rate": 9.75703133731201e-06, "loss": 0.3263, "step": 25879 }, { "epoch": 4.224643892086037, "grad_norm": 3.764012098312378, "learning_rate": 9.756390385726847e-06, "loss": 0.3873, "step": 25880 }, { "epoch": 4.224807150728542, "grad_norm": 3.141970634460449, "learning_rate": 9.755749435143073e-06, "loss": 0.3119, "step": 25881 }, { "epoch": 4.224970409371046, "grad_norm": 3.197356939315796, "learning_rate": 9.755108485563319e-06, "loss": 0.3647, "step": 25882 }, { "epoch": 4.225133668013551, "grad_norm": 3.783686637878418, "learning_rate": 9.754467536990224e-06, "loss": 0.3854, "step": 25883 }, { "epoch": 4.225296926656055, "grad_norm": 3.424717664718628, "learning_rate": 9.753826589426415e-06, "loss": 0.3426, "step": 25884 }, { "epoch": 4.22546018529856, "grad_norm": 3.2857577800750732, "learning_rate": 9.753185642874538e-06, "loss": 0.3688, "step": 25885 }, { "epoch": 4.225623443941064, "grad_norm": 3.4722325801849365, "learning_rate": 9.75254469733722e-06, "loss": 0.3547, "step": 25886 }, { "epoch": 4.225786702583568, "grad_norm": 3.6603477001190186, "learning_rate": 9.751903752817102e-06, "loss": 0.3868, "step": 25887 }, { "epoch": 4.225949961226072, "grad_norm": 3.4375078678131104, "learning_rate": 9.751262809316807e-06, "loss": 0.3755, "step": 25888 }, { "epoch": 4.2261132198685765, "grad_norm": 3.3677570819854736, "learning_rate": 9.750621866838977e-06, "loss": 0.3604, "step": 25889 }, { "epoch": 4.226276478511081, "grad_norm": 3.7109830379486084, "learning_rate": 9.749980925386248e-06, "loss": 0.3827, "step": 25890 }, { "epoch": 4.226439737153585, "grad_norm": 5.270674228668213, "learning_rate": 9.749339984961253e-06, "loss": 0.4557, "step": 25891 }, { "epoch": 4.22660299579609, "grad_norm": 3.5008890628814697, "learning_rate": 9.748699045566626e-06, "loss": 0.3416, "step": 25892 }, { "epoch": 4.226766254438594, "grad_norm": 2.7440810203552246, "learning_rate": 9.748058107205004e-06, "loss": 0.3306, "step": 25893 }, { "epoch": 4.226929513081099, "grad_norm": 3.0412659645080566, "learning_rate": 9.74741716987902e-06, "loss": 0.3471, "step": 25894 }, { "epoch": 4.227092771723603, "grad_norm": 3.857853412628174, "learning_rate": 9.746776233591304e-06, "loss": 0.3917, "step": 25895 }, { "epoch": 4.2272560303661075, "grad_norm": 3.679577112197876, "learning_rate": 9.7461352983445e-06, "loss": 0.3496, "step": 25896 }, { "epoch": 4.227419289008612, "grad_norm": 3.443268299102783, "learning_rate": 9.74549436414124e-06, "loss": 0.3834, "step": 25897 }, { "epoch": 4.227582547651116, "grad_norm": 3.8159539699554443, "learning_rate": 9.744853430984154e-06, "loss": 0.4495, "step": 25898 }, { "epoch": 4.227745806293621, "grad_norm": 3.4864768981933594, "learning_rate": 9.744212498875875e-06, "loss": 0.4198, "step": 25899 }, { "epoch": 4.227909064936125, "grad_norm": 3.5415091514587402, "learning_rate": 9.743571567819046e-06, "loss": 0.3315, "step": 25900 }, { "epoch": 4.22807232357863, "grad_norm": 2.9646146297454834, "learning_rate": 9.742930637816299e-06, "loss": 0.3532, "step": 25901 }, { "epoch": 4.228235582221134, "grad_norm": 3.754338264465332, "learning_rate": 9.742289708870264e-06, "loss": 0.3853, "step": 25902 }, { "epoch": 4.228398840863639, "grad_norm": 3.8896026611328125, "learning_rate": 9.74164878098358e-06, "loss": 0.4169, "step": 25903 }, { "epoch": 4.228562099506143, "grad_norm": 2.957789659500122, "learning_rate": 9.741007854158879e-06, "loss": 0.2863, "step": 25904 }, { "epoch": 4.228725358148647, "grad_norm": 3.764129877090454, "learning_rate": 9.740366928398798e-06, "loss": 0.4193, "step": 25905 }, { "epoch": 4.228888616791151, "grad_norm": 4.067847728729248, "learning_rate": 9.739726003705971e-06, "loss": 0.4362, "step": 25906 }, { "epoch": 4.2290518754336555, "grad_norm": 3.0926096439361572, "learning_rate": 9.739085080083034e-06, "loss": 0.3127, "step": 25907 }, { "epoch": 4.22921513407616, "grad_norm": 3.9836161136627197, "learning_rate": 9.738444157532616e-06, "loss": 0.3828, "step": 25908 }, { "epoch": 4.229378392718664, "grad_norm": 4.1415791511535645, "learning_rate": 9.737803236057357e-06, "loss": 0.3811, "step": 25909 }, { "epoch": 4.229541651361169, "grad_norm": 3.525865316390991, "learning_rate": 9.737162315659889e-06, "loss": 0.3688, "step": 25910 }, { "epoch": 4.229704910003673, "grad_norm": 3.4514129161834717, "learning_rate": 9.736521396342847e-06, "loss": 0.3924, "step": 25911 }, { "epoch": 4.229868168646178, "grad_norm": 3.1673758029937744, "learning_rate": 9.735880478108867e-06, "loss": 0.3511, "step": 25912 }, { "epoch": 4.230031427288682, "grad_norm": 4.008761882781982, "learning_rate": 9.735239560960583e-06, "loss": 0.4325, "step": 25913 }, { "epoch": 4.2301946859311865, "grad_norm": 3.3028104305267334, "learning_rate": 9.734598644900628e-06, "loss": 0.3619, "step": 25914 }, { "epoch": 4.230357944573691, "grad_norm": 3.385361671447754, "learning_rate": 9.733957729931638e-06, "loss": 0.3759, "step": 25915 }, { "epoch": 4.230521203216195, "grad_norm": 3.6065261363983154, "learning_rate": 9.73331681605625e-06, "loss": 0.3179, "step": 25916 }, { "epoch": 4.2306844618587, "grad_norm": 3.372903823852539, "learning_rate": 9.732675903277094e-06, "loss": 0.3749, "step": 25917 }, { "epoch": 4.230847720501204, "grad_norm": 3.8097918033599854, "learning_rate": 9.732034991596806e-06, "loss": 0.3927, "step": 25918 }, { "epoch": 4.231010979143709, "grad_norm": 3.66251540184021, "learning_rate": 9.731394081018022e-06, "loss": 0.4415, "step": 25919 }, { "epoch": 4.231174237786213, "grad_norm": 3.377006769180298, "learning_rate": 9.730753171543374e-06, "loss": 0.4358, "step": 25920 }, { "epoch": 4.231337496428718, "grad_norm": 3.646481990814209, "learning_rate": 9.730112263175498e-06, "loss": 0.3797, "step": 25921 }, { "epoch": 4.231500755071222, "grad_norm": 3.090186357498169, "learning_rate": 9.729471355917029e-06, "loss": 0.2982, "step": 25922 }, { "epoch": 4.231664013713726, "grad_norm": 4.237617015838623, "learning_rate": 9.7288304497706e-06, "loss": 0.4145, "step": 25923 }, { "epoch": 4.23182727235623, "grad_norm": 3.0320324897766113, "learning_rate": 9.728189544738848e-06, "loss": 0.3378, "step": 25924 }, { "epoch": 4.2319905309987345, "grad_norm": 3.2596654891967773, "learning_rate": 9.727548640824407e-06, "loss": 0.3607, "step": 25925 }, { "epoch": 4.232153789641239, "grad_norm": 2.651823043823242, "learning_rate": 9.726907738029911e-06, "loss": 0.3064, "step": 25926 }, { "epoch": 4.232317048283743, "grad_norm": 3.412339925765991, "learning_rate": 9.726266836357993e-06, "loss": 0.3608, "step": 25927 }, { "epoch": 4.232480306926248, "grad_norm": 3.6186468601226807, "learning_rate": 9.72562593581129e-06, "loss": 0.3964, "step": 25928 }, { "epoch": 4.232643565568752, "grad_norm": 3.6517815589904785, "learning_rate": 9.724985036392433e-06, "loss": 0.4158, "step": 25929 }, { "epoch": 4.232806824211257, "grad_norm": 3.2921574115753174, "learning_rate": 9.72434413810406e-06, "loss": 0.3415, "step": 25930 }, { "epoch": 4.232970082853761, "grad_norm": 3.2582197189331055, "learning_rate": 9.723703240948803e-06, "loss": 0.3508, "step": 25931 }, { "epoch": 4.2331333414962655, "grad_norm": 3.710926055908203, "learning_rate": 9.723062344929298e-06, "loss": 0.3684, "step": 25932 }, { "epoch": 4.23329660013877, "grad_norm": 3.864980459213257, "learning_rate": 9.72242145004818e-06, "loss": 0.4252, "step": 25933 }, { "epoch": 4.233459858781274, "grad_norm": 3.5497584342956543, "learning_rate": 9.72178055630808e-06, "loss": 0.3944, "step": 25934 }, { "epoch": 4.233623117423779, "grad_norm": 4.461552619934082, "learning_rate": 9.721139663711637e-06, "loss": 0.4464, "step": 25935 }, { "epoch": 4.233786376066283, "grad_norm": 3.519702672958374, "learning_rate": 9.720498772261489e-06, "loss": 0.3475, "step": 25936 }, { "epoch": 4.233949634708788, "grad_norm": 3.2948930263519287, "learning_rate": 9.719857881960258e-06, "loss": 0.2956, "step": 25937 }, { "epoch": 4.234112893351292, "grad_norm": 3.788245916366577, "learning_rate": 9.719216992810586e-06, "loss": 0.4082, "step": 25938 }, { "epoch": 4.234276151993797, "grad_norm": 3.0463480949401855, "learning_rate": 9.718576104815108e-06, "loss": 0.3473, "step": 25939 }, { "epoch": 4.2344394106363, "grad_norm": 3.5701816082000732, "learning_rate": 9.717935217976458e-06, "loss": 0.3983, "step": 25940 }, { "epoch": 4.234602669278805, "grad_norm": 4.0888543128967285, "learning_rate": 9.717294332297269e-06, "loss": 0.4493, "step": 25941 }, { "epoch": 4.234765927921309, "grad_norm": 3.89347243309021, "learning_rate": 9.716653447780177e-06, "loss": 0.4347, "step": 25942 }, { "epoch": 4.2349291865638135, "grad_norm": 3.4344756603240967, "learning_rate": 9.716012564427815e-06, "loss": 0.3369, "step": 25943 }, { "epoch": 4.235092445206318, "grad_norm": 3.571075439453125, "learning_rate": 9.715371682242819e-06, "loss": 0.3379, "step": 25944 }, { "epoch": 4.235255703848822, "grad_norm": 3.621258497238159, "learning_rate": 9.714730801227819e-06, "loss": 0.4337, "step": 25945 }, { "epoch": 4.235418962491327, "grad_norm": 3.3587045669555664, "learning_rate": 9.71408992138546e-06, "loss": 0.369, "step": 25946 }, { "epoch": 4.235582221133831, "grad_norm": 3.2853472232818604, "learning_rate": 9.713449042718365e-06, "loss": 0.39, "step": 25947 }, { "epoch": 4.235745479776336, "grad_norm": 2.7920379638671875, "learning_rate": 9.712808165229174e-06, "loss": 0.2921, "step": 25948 }, { "epoch": 4.23590873841884, "grad_norm": 3.271240472793579, "learning_rate": 9.712167288920515e-06, "loss": 0.3777, "step": 25949 }, { "epoch": 4.2360719970613445, "grad_norm": 3.9321634769439697, "learning_rate": 9.711526413795032e-06, "loss": 0.4333, "step": 25950 }, { "epoch": 4.236235255703849, "grad_norm": 3.7893333435058594, "learning_rate": 9.710885539855356e-06, "loss": 0.3955, "step": 25951 }, { "epoch": 4.236398514346353, "grad_norm": 2.761369466781616, "learning_rate": 9.710244667104119e-06, "loss": 0.3201, "step": 25952 }, { "epoch": 4.236561772988858, "grad_norm": 3.164245843887329, "learning_rate": 9.709603795543957e-06, "loss": 0.3587, "step": 25953 }, { "epoch": 4.236725031631362, "grad_norm": 3.5651440620422363, "learning_rate": 9.708962925177502e-06, "loss": 0.3647, "step": 25954 }, { "epoch": 4.236888290273867, "grad_norm": 3.797335624694824, "learning_rate": 9.708322056007396e-06, "loss": 0.4609, "step": 25955 }, { "epoch": 4.237051548916371, "grad_norm": 3.5910372734069824, "learning_rate": 9.707681188036264e-06, "loss": 0.3791, "step": 25956 }, { "epoch": 4.237214807558875, "grad_norm": 3.4803895950317383, "learning_rate": 9.707040321266746e-06, "loss": 0.3714, "step": 25957 }, { "epoch": 4.237378066201379, "grad_norm": 3.2631781101226807, "learning_rate": 9.706399455701472e-06, "loss": 0.357, "step": 25958 }, { "epoch": 4.237541324843884, "grad_norm": 3.6184945106506348, "learning_rate": 9.70575859134308e-06, "loss": 0.3447, "step": 25959 }, { "epoch": 4.237704583486388, "grad_norm": 3.3369946479797363, "learning_rate": 9.705117728194202e-06, "loss": 0.3631, "step": 25960 }, { "epoch": 4.2378678421288924, "grad_norm": 4.0378804206848145, "learning_rate": 9.704476866257476e-06, "loss": 0.4018, "step": 25961 }, { "epoch": 4.238031100771397, "grad_norm": 3.056396245956421, "learning_rate": 9.703836005535533e-06, "loss": 0.3267, "step": 25962 }, { "epoch": 4.238194359413901, "grad_norm": 2.708867073059082, "learning_rate": 9.70319514603101e-06, "loss": 0.3769, "step": 25963 }, { "epoch": 4.238357618056406, "grad_norm": 3.482837677001953, "learning_rate": 9.702554287746537e-06, "loss": 0.3951, "step": 25964 }, { "epoch": 4.23852087669891, "grad_norm": 3.272132158279419, "learning_rate": 9.701913430684756e-06, "loss": 0.3401, "step": 25965 }, { "epoch": 4.238684135341415, "grad_norm": 3.3802075386047363, "learning_rate": 9.701272574848291e-06, "loss": 0.3529, "step": 25966 }, { "epoch": 4.238847393983919, "grad_norm": 3.6018905639648438, "learning_rate": 9.700631720239784e-06, "loss": 0.3941, "step": 25967 }, { "epoch": 4.2390106526264235, "grad_norm": 3.8611879348754883, "learning_rate": 9.699990866861866e-06, "loss": 0.4315, "step": 25968 }, { "epoch": 4.239173911268928, "grad_norm": 3.1993677616119385, "learning_rate": 9.699350014717173e-06, "loss": 0.3649, "step": 25969 }, { "epoch": 4.239337169911432, "grad_norm": 3.4534854888916016, "learning_rate": 9.698709163808337e-06, "loss": 0.3868, "step": 25970 }, { "epoch": 4.239500428553937, "grad_norm": 3.4632086753845215, "learning_rate": 9.698068314137996e-06, "loss": 0.3177, "step": 25971 }, { "epoch": 4.239663687196441, "grad_norm": 4.327767848968506, "learning_rate": 9.697427465708779e-06, "loss": 0.4012, "step": 25972 }, { "epoch": 4.239826945838946, "grad_norm": 3.794809341430664, "learning_rate": 9.696786618523324e-06, "loss": 0.4264, "step": 25973 }, { "epoch": 4.23999020448145, "grad_norm": 3.6267929077148438, "learning_rate": 9.696145772584267e-06, "loss": 0.4052, "step": 25974 }, { "epoch": 4.240153463123955, "grad_norm": 4.4009928703308105, "learning_rate": 9.695504927894244e-06, "loss": 0.4634, "step": 25975 }, { "epoch": 4.240316721766458, "grad_norm": 3.5585551261901855, "learning_rate": 9.694864084455879e-06, "loss": 0.3715, "step": 25976 }, { "epoch": 4.240479980408963, "grad_norm": 4.285363674163818, "learning_rate": 9.694223242271813e-06, "loss": 0.464, "step": 25977 }, { "epoch": 4.240643239051467, "grad_norm": 4.000032901763916, "learning_rate": 9.693582401344682e-06, "loss": 0.3811, "step": 25978 }, { "epoch": 4.2408064976939714, "grad_norm": 3.232619285583496, "learning_rate": 9.692941561677118e-06, "loss": 0.3134, "step": 25979 }, { "epoch": 4.240969756336476, "grad_norm": 3.3892998695373535, "learning_rate": 9.692300723271752e-06, "loss": 0.3664, "step": 25980 }, { "epoch": 4.24113301497898, "grad_norm": 3.424748420715332, "learning_rate": 9.691659886131224e-06, "loss": 0.3332, "step": 25981 }, { "epoch": 4.241296273621485, "grad_norm": 3.380237102508545, "learning_rate": 9.691019050258168e-06, "loss": 0.3276, "step": 25982 }, { "epoch": 4.241459532263989, "grad_norm": 2.894376277923584, "learning_rate": 9.69037821565521e-06, "loss": 0.3015, "step": 25983 }, { "epoch": 4.241622790906494, "grad_norm": 3.5787675380706787, "learning_rate": 9.689737382324995e-06, "loss": 0.3229, "step": 25984 }, { "epoch": 4.241786049548998, "grad_norm": 3.8918237686157227, "learning_rate": 9.689096550270154e-06, "loss": 0.376, "step": 25985 }, { "epoch": 4.2419493081915025, "grad_norm": 3.3854165077209473, "learning_rate": 9.688455719493317e-06, "loss": 0.3591, "step": 25986 }, { "epoch": 4.242112566834007, "grad_norm": 4.226072788238525, "learning_rate": 9.687814889997119e-06, "loss": 0.3782, "step": 25987 }, { "epoch": 4.242275825476511, "grad_norm": 2.949643135070801, "learning_rate": 9.687174061784198e-06, "loss": 0.3675, "step": 25988 }, { "epoch": 4.242439084119016, "grad_norm": 3.4209506511688232, "learning_rate": 9.686533234857186e-06, "loss": 0.3276, "step": 25989 }, { "epoch": 4.24260234276152, "grad_norm": 3.3226945400238037, "learning_rate": 9.685892409218718e-06, "loss": 0.3662, "step": 25990 }, { "epoch": 4.242765601404025, "grad_norm": 3.425288677215576, "learning_rate": 9.685251584871428e-06, "loss": 0.3436, "step": 25991 }, { "epoch": 4.242928860046529, "grad_norm": 6.239046096801758, "learning_rate": 9.68461076181795e-06, "loss": 0.4045, "step": 25992 }, { "epoch": 4.243092118689033, "grad_norm": 4.028337001800537, "learning_rate": 9.683969940060917e-06, "loss": 0.3962, "step": 25993 }, { "epoch": 4.243255377331537, "grad_norm": 3.5002593994140625, "learning_rate": 9.683329119602965e-06, "loss": 0.3615, "step": 25994 }, { "epoch": 4.243418635974042, "grad_norm": 3.252352476119995, "learning_rate": 9.682688300446729e-06, "loss": 0.3446, "step": 25995 }, { "epoch": 4.243581894616546, "grad_norm": 4.327279090881348, "learning_rate": 9.68204748259484e-06, "loss": 0.4374, "step": 25996 }, { "epoch": 4.24374515325905, "grad_norm": 3.0725412368774414, "learning_rate": 9.681406666049933e-06, "loss": 0.3173, "step": 25997 }, { "epoch": 4.243908411901555, "grad_norm": 3.4435689449310303, "learning_rate": 9.680765850814643e-06, "loss": 0.4078, "step": 25998 }, { "epoch": 4.244071670544059, "grad_norm": 3.742130994796753, "learning_rate": 9.680125036891603e-06, "loss": 0.3758, "step": 25999 }, { "epoch": 4.244234929186564, "grad_norm": 3.55656099319458, "learning_rate": 9.67948422428345e-06, "loss": 0.3831, "step": 26000 }, { "epoch": 4.244398187829068, "grad_norm": 3.216322422027588, "learning_rate": 9.678843412992815e-06, "loss": 0.3725, "step": 26001 }, { "epoch": 4.244561446471573, "grad_norm": 2.993175983428955, "learning_rate": 9.678202603022334e-06, "loss": 0.3549, "step": 26002 }, { "epoch": 4.244724705114077, "grad_norm": 3.9004149436950684, "learning_rate": 9.67756179437464e-06, "loss": 0.416, "step": 26003 }, { "epoch": 4.2448879637565815, "grad_norm": 3.359018325805664, "learning_rate": 9.676920987052373e-06, "loss": 0.3581, "step": 26004 }, { "epoch": 4.245051222399086, "grad_norm": 3.860410451889038, "learning_rate": 9.676280181058156e-06, "loss": 0.3557, "step": 26005 }, { "epoch": 4.24521448104159, "grad_norm": 3.241734027862549, "learning_rate": 9.67563937639463e-06, "loss": 0.3636, "step": 26006 }, { "epoch": 4.245377739684095, "grad_norm": 3.3464860916137695, "learning_rate": 9.674998573064429e-06, "loss": 0.4087, "step": 26007 }, { "epoch": 4.245540998326599, "grad_norm": 2.881065845489502, "learning_rate": 9.674357771070186e-06, "loss": 0.3275, "step": 26008 }, { "epoch": 4.245704256969104, "grad_norm": 4.157265663146973, "learning_rate": 9.673716970414535e-06, "loss": 0.3829, "step": 26009 }, { "epoch": 4.245867515611607, "grad_norm": 3.5932817459106445, "learning_rate": 9.673076171100108e-06, "loss": 0.3366, "step": 26010 }, { "epoch": 4.246030774254112, "grad_norm": 3.5315585136413574, "learning_rate": 9.672435373129545e-06, "loss": 0.3954, "step": 26011 }, { "epoch": 4.246194032896616, "grad_norm": 3.095370054244995, "learning_rate": 9.671794576505474e-06, "loss": 0.32, "step": 26012 }, { "epoch": 4.246357291539121, "grad_norm": 3.9653260707855225, "learning_rate": 9.671153781230532e-06, "loss": 0.4146, "step": 26013 }, { "epoch": 4.246520550181625, "grad_norm": 3.589778423309326, "learning_rate": 9.670512987307356e-06, "loss": 0.3844, "step": 26014 }, { "epoch": 4.246683808824129, "grad_norm": 3.159076690673828, "learning_rate": 9.669872194738574e-06, "loss": 0.3087, "step": 26015 }, { "epoch": 4.246847067466634, "grad_norm": 4.009730815887451, "learning_rate": 9.669231403526823e-06, "loss": 0.4563, "step": 26016 }, { "epoch": 4.247010326109138, "grad_norm": 4.177879333496094, "learning_rate": 9.668590613674737e-06, "loss": 0.4867, "step": 26017 }, { "epoch": 4.247173584751643, "grad_norm": 3.693629741668701, "learning_rate": 9.667949825184949e-06, "loss": 0.4287, "step": 26018 }, { "epoch": 4.247336843394147, "grad_norm": 3.4682204723358154, "learning_rate": 9.667309038060095e-06, "loss": 0.3964, "step": 26019 }, { "epoch": 4.247500102036652, "grad_norm": 3.4335312843322754, "learning_rate": 9.666668252302806e-06, "loss": 0.3498, "step": 26020 }, { "epoch": 4.247663360679156, "grad_norm": 3.424823522567749, "learning_rate": 9.666027467915719e-06, "loss": 0.4463, "step": 26021 }, { "epoch": 4.2478266193216605, "grad_norm": 3.5456881523132324, "learning_rate": 9.665386684901465e-06, "loss": 0.3777, "step": 26022 }, { "epoch": 4.247989877964165, "grad_norm": 3.874586582183838, "learning_rate": 9.664745903262683e-06, "loss": 0.4286, "step": 26023 }, { "epoch": 4.248153136606669, "grad_norm": 3.877504587173462, "learning_rate": 9.664105123002008e-06, "loss": 0.3667, "step": 26024 }, { "epoch": 4.248316395249174, "grad_norm": 2.8660004138946533, "learning_rate": 9.663464344122064e-06, "loss": 0.346, "step": 26025 }, { "epoch": 4.248479653891678, "grad_norm": 3.2067954540252686, "learning_rate": 9.66282356662549e-06, "loss": 0.369, "step": 26026 }, { "epoch": 4.248642912534183, "grad_norm": 2.8498551845550537, "learning_rate": 9.662182790514924e-06, "loss": 0.3418, "step": 26027 }, { "epoch": 4.248806171176686, "grad_norm": 2.903270721435547, "learning_rate": 9.661542015792996e-06, "loss": 0.3385, "step": 26028 }, { "epoch": 4.248969429819191, "grad_norm": 3.725029230117798, "learning_rate": 9.660901242462341e-06, "loss": 0.407, "step": 26029 }, { "epoch": 4.249132688461695, "grad_norm": 4.57637882232666, "learning_rate": 9.660260470525593e-06, "loss": 0.471, "step": 26030 }, { "epoch": 4.2492959471041996, "grad_norm": 3.295067548751831, "learning_rate": 9.659619699985387e-06, "loss": 0.3657, "step": 26031 }, { "epoch": 4.249459205746704, "grad_norm": 3.307100772857666, "learning_rate": 9.658978930844354e-06, "loss": 0.3497, "step": 26032 }, { "epoch": 4.249622464389208, "grad_norm": 2.972109317779541, "learning_rate": 9.65833816310513e-06, "loss": 0.3497, "step": 26033 }, { "epoch": 4.249785723031713, "grad_norm": 3.1188271045684814, "learning_rate": 9.657697396770352e-06, "loss": 0.3742, "step": 26034 }, { "epoch": 4.249948981674217, "grad_norm": 3.2866523265838623, "learning_rate": 9.657056631842649e-06, "loss": 0.4046, "step": 26035 }, { "epoch": 4.250112240316722, "grad_norm": 3.394653081893921, "learning_rate": 9.656415868324656e-06, "loss": 0.3518, "step": 26036 }, { "epoch": 4.250275498959226, "grad_norm": 3.2152793407440186, "learning_rate": 9.655775106219006e-06, "loss": 0.3754, "step": 26037 }, { "epoch": 4.250438757601731, "grad_norm": 3.312732696533203, "learning_rate": 9.655134345528336e-06, "loss": 0.3689, "step": 26038 }, { "epoch": 4.250602016244235, "grad_norm": 3.181148052215576, "learning_rate": 9.654493586255279e-06, "loss": 0.346, "step": 26039 }, { "epoch": 4.2507652748867395, "grad_norm": 3.2951083183288574, "learning_rate": 9.653852828402467e-06, "loss": 0.3874, "step": 26040 }, { "epoch": 4.250928533529244, "grad_norm": 4.060558319091797, "learning_rate": 9.653212071972536e-06, "loss": 0.3674, "step": 26041 }, { "epoch": 4.251091792171748, "grad_norm": 3.2148170471191406, "learning_rate": 9.65257131696812e-06, "loss": 0.3399, "step": 26042 }, { "epoch": 4.251255050814253, "grad_norm": 3.004300594329834, "learning_rate": 9.651930563391851e-06, "loss": 0.3172, "step": 26043 }, { "epoch": 4.251418309456757, "grad_norm": 4.347881317138672, "learning_rate": 9.651289811246368e-06, "loss": 0.4824, "step": 26044 }, { "epoch": 4.251581568099262, "grad_norm": 3.0279903411865234, "learning_rate": 9.650649060534297e-06, "loss": 0.3547, "step": 26045 }, { "epoch": 4.251744826741765, "grad_norm": 3.8446319103240967, "learning_rate": 9.650008311258277e-06, "loss": 0.4359, "step": 26046 }, { "epoch": 4.25190808538427, "grad_norm": 4.344659328460693, "learning_rate": 9.64936756342094e-06, "loss": 0.4377, "step": 26047 }, { "epoch": 4.252071344026774, "grad_norm": 3.174699068069458, "learning_rate": 9.64872681702492e-06, "loss": 0.3678, "step": 26048 }, { "epoch": 4.2522346026692786, "grad_norm": 3.762104034423828, "learning_rate": 9.648086072072851e-06, "loss": 0.4463, "step": 26049 }, { "epoch": 4.252397861311783, "grad_norm": 3.7681784629821777, "learning_rate": 9.647445328567368e-06, "loss": 0.4045, "step": 26050 }, { "epoch": 4.252561119954287, "grad_norm": 3.5127108097076416, "learning_rate": 9.646804586511105e-06, "loss": 0.3358, "step": 26051 }, { "epoch": 4.252724378596792, "grad_norm": 3.1805622577667236, "learning_rate": 9.646163845906695e-06, "loss": 0.3708, "step": 26052 }, { "epoch": 4.252887637239296, "grad_norm": 3.6470861434936523, "learning_rate": 9.645523106756774e-06, "loss": 0.3963, "step": 26053 }, { "epoch": 4.253050895881801, "grad_norm": 3.064484119415283, "learning_rate": 9.64488236906397e-06, "loss": 0.3937, "step": 26054 }, { "epoch": 4.253214154524305, "grad_norm": 3.4109959602355957, "learning_rate": 9.644241632830923e-06, "loss": 0.3658, "step": 26055 }, { "epoch": 4.25337741316681, "grad_norm": 3.459697723388672, "learning_rate": 9.643600898060262e-06, "loss": 0.3897, "step": 26056 }, { "epoch": 4.253540671809314, "grad_norm": 3.2368390560150146, "learning_rate": 9.642960164754623e-06, "loss": 0.3802, "step": 26057 }, { "epoch": 4.2537039304518185, "grad_norm": 2.7021894454956055, "learning_rate": 9.642319432916641e-06, "loss": 0.3021, "step": 26058 }, { "epoch": 4.253867189094323, "grad_norm": 4.0614399909973145, "learning_rate": 9.641678702548948e-06, "loss": 0.4136, "step": 26059 }, { "epoch": 4.254030447736827, "grad_norm": 3.248960256576538, "learning_rate": 9.641037973654179e-06, "loss": 0.3538, "step": 26060 }, { "epoch": 4.254193706379332, "grad_norm": 3.5434494018554688, "learning_rate": 9.640397246234967e-06, "loss": 0.3455, "step": 26061 }, { "epoch": 4.254356965021836, "grad_norm": 3.5334715843200684, "learning_rate": 9.639756520293945e-06, "loss": 0.3308, "step": 26062 }, { "epoch": 4.25452022366434, "grad_norm": 3.3374860286712646, "learning_rate": 9.639115795833756e-06, "loss": 0.3735, "step": 26063 }, { "epoch": 4.254683482306844, "grad_norm": 3.209198236465454, "learning_rate": 9.638475072857016e-06, "loss": 0.3822, "step": 26064 }, { "epoch": 4.254846740949349, "grad_norm": 3.8734588623046875, "learning_rate": 9.637834351366372e-06, "loss": 0.3275, "step": 26065 }, { "epoch": 4.255009999591853, "grad_norm": 3.449350118637085, "learning_rate": 9.637193631364455e-06, "loss": 0.3772, "step": 26066 }, { "epoch": 4.2551732582343575, "grad_norm": 3.048435926437378, "learning_rate": 9.636552912853896e-06, "loss": 0.3259, "step": 26067 }, { "epoch": 4.255336516876862, "grad_norm": 3.3135626316070557, "learning_rate": 9.63591219583733e-06, "loss": 0.3828, "step": 26068 }, { "epoch": 4.255499775519366, "grad_norm": 2.7540504932403564, "learning_rate": 9.635271480317395e-06, "loss": 0.2867, "step": 26069 }, { "epoch": 4.255663034161871, "grad_norm": 3.5986690521240234, "learning_rate": 9.634630766296719e-06, "loss": 0.3571, "step": 26070 }, { "epoch": 4.255826292804375, "grad_norm": 2.8125672340393066, "learning_rate": 9.633990053777936e-06, "loss": 0.3269, "step": 26071 }, { "epoch": 4.25598955144688, "grad_norm": 2.9716854095458984, "learning_rate": 9.633349342763685e-06, "loss": 0.3155, "step": 26072 }, { "epoch": 4.256152810089384, "grad_norm": 3.249124765396118, "learning_rate": 9.6327086332566e-06, "loss": 0.3203, "step": 26073 }, { "epoch": 4.256316068731889, "grad_norm": 3.0461177825927734, "learning_rate": 9.632067925259307e-06, "loss": 0.3262, "step": 26074 }, { "epoch": 4.256479327374393, "grad_norm": 3.7398698329925537, "learning_rate": 9.631427218774442e-06, "loss": 0.429, "step": 26075 }, { "epoch": 4.2566425860168975, "grad_norm": 3.122548818588257, "learning_rate": 9.63078651380464e-06, "loss": 0.3671, "step": 26076 }, { "epoch": 4.256805844659402, "grad_norm": 3.4012060165405273, "learning_rate": 9.630145810352539e-06, "loss": 0.3926, "step": 26077 }, { "epoch": 4.256969103301906, "grad_norm": 3.17510724067688, "learning_rate": 9.629505108420766e-06, "loss": 0.3219, "step": 26078 }, { "epoch": 4.257132361944411, "grad_norm": 3.3186240196228027, "learning_rate": 9.628864408011959e-06, "loss": 0.3739, "step": 26079 }, { "epoch": 4.257295620586914, "grad_norm": 4.966032981872559, "learning_rate": 9.62822370912875e-06, "loss": 0.3367, "step": 26080 }, { "epoch": 4.257458879229419, "grad_norm": 3.4982657432556152, "learning_rate": 9.627583011773772e-06, "loss": 0.386, "step": 26081 }, { "epoch": 4.257622137871923, "grad_norm": 3.940173864364624, "learning_rate": 9.626942315949662e-06, "loss": 0.3935, "step": 26082 }, { "epoch": 4.257785396514428, "grad_norm": 3.2271728515625, "learning_rate": 9.626301621659051e-06, "loss": 0.3492, "step": 26083 }, { "epoch": 4.257948655156932, "grad_norm": 3.14336895942688, "learning_rate": 9.625660928904572e-06, "loss": 0.3526, "step": 26084 }, { "epoch": 4.2581119137994365, "grad_norm": 3.644732713699341, "learning_rate": 9.62502023768886e-06, "loss": 0.3589, "step": 26085 }, { "epoch": 4.258275172441941, "grad_norm": 3.9422335624694824, "learning_rate": 9.624379548014548e-06, "loss": 0.4304, "step": 26086 }, { "epoch": 4.258438431084445, "grad_norm": 3.8536102771759033, "learning_rate": 9.623738859884266e-06, "loss": 0.4, "step": 26087 }, { "epoch": 4.25860168972695, "grad_norm": 4.22701358795166, "learning_rate": 9.623098173300655e-06, "loss": 0.4281, "step": 26088 }, { "epoch": 4.258764948369454, "grad_norm": 3.6520168781280518, "learning_rate": 9.622457488266344e-06, "loss": 0.4308, "step": 26089 }, { "epoch": 4.258928207011959, "grad_norm": 4.109472274780273, "learning_rate": 9.621816804783969e-06, "loss": 0.3392, "step": 26090 }, { "epoch": 4.259091465654463, "grad_norm": 4.35325813293457, "learning_rate": 9.621176122856162e-06, "loss": 0.3895, "step": 26091 }, { "epoch": 4.259254724296968, "grad_norm": 4.7038397789001465, "learning_rate": 9.620535442485561e-06, "loss": 0.4259, "step": 26092 }, { "epoch": 4.259417982939472, "grad_norm": 3.6767072677612305, "learning_rate": 9.619894763674791e-06, "loss": 0.3737, "step": 26093 }, { "epoch": 4.2595812415819765, "grad_norm": 3.6496973037719727, "learning_rate": 9.619254086426489e-06, "loss": 0.3631, "step": 26094 }, { "epoch": 4.259744500224481, "grad_norm": 4.738534450531006, "learning_rate": 9.618613410743292e-06, "loss": 0.4532, "step": 26095 }, { "epoch": 4.259907758866985, "grad_norm": 3.7370219230651855, "learning_rate": 9.61797273662783e-06, "loss": 0.3813, "step": 26096 }, { "epoch": 4.26007101750949, "grad_norm": 3.791374683380127, "learning_rate": 9.617332064082738e-06, "loss": 0.3908, "step": 26097 }, { "epoch": 4.260234276151994, "grad_norm": 3.1511287689208984, "learning_rate": 9.616691393110648e-06, "loss": 0.3104, "step": 26098 }, { "epoch": 4.260397534794498, "grad_norm": 3.8252320289611816, "learning_rate": 9.616050723714197e-06, "loss": 0.4037, "step": 26099 }, { "epoch": 4.260560793437002, "grad_norm": 3.016038417816162, "learning_rate": 9.615410055896016e-06, "loss": 0.3329, "step": 26100 }, { "epoch": 4.260724052079507, "grad_norm": 3.34525728225708, "learning_rate": 9.614769389658739e-06, "loss": 0.3582, "step": 26101 }, { "epoch": 4.260887310722011, "grad_norm": 3.9269490242004395, "learning_rate": 9.614128725005003e-06, "loss": 0.3682, "step": 26102 }, { "epoch": 4.2610505693645155, "grad_norm": 3.552403211593628, "learning_rate": 9.613488061937434e-06, "loss": 0.4015, "step": 26103 }, { "epoch": 4.26121382800702, "grad_norm": 3.169848918914795, "learning_rate": 9.61284740045867e-06, "loss": 0.3537, "step": 26104 }, { "epoch": 4.261377086649524, "grad_norm": 3.4252231121063232, "learning_rate": 9.612206740571345e-06, "loss": 0.3604, "step": 26105 }, { "epoch": 4.261540345292029, "grad_norm": 2.7955093383789062, "learning_rate": 9.611566082278092e-06, "loss": 0.335, "step": 26106 }, { "epoch": 4.261703603934533, "grad_norm": 3.2556674480438232, "learning_rate": 9.610925425581543e-06, "loss": 0.362, "step": 26107 }, { "epoch": 4.261866862577038, "grad_norm": 3.903484582901001, "learning_rate": 9.610284770484333e-06, "loss": 0.4164, "step": 26108 }, { "epoch": 4.262030121219542, "grad_norm": 3.462913751602173, "learning_rate": 9.609644116989095e-06, "loss": 0.3894, "step": 26109 }, { "epoch": 4.262193379862047, "grad_norm": 2.853248357772827, "learning_rate": 9.609003465098462e-06, "loss": 0.3385, "step": 26110 }, { "epoch": 4.262356638504551, "grad_norm": 3.2977516651153564, "learning_rate": 9.608362814815069e-06, "loss": 0.3372, "step": 26111 }, { "epoch": 4.2625198971470555, "grad_norm": 3.2881104946136475, "learning_rate": 9.607722166141552e-06, "loss": 0.3078, "step": 26112 }, { "epoch": 4.26268315578956, "grad_norm": 3.4871487617492676, "learning_rate": 9.607081519080538e-06, "loss": 0.3571, "step": 26113 }, { "epoch": 4.262846414432064, "grad_norm": 2.936990976333618, "learning_rate": 9.606440873634662e-06, "loss": 0.3415, "step": 26114 }, { "epoch": 4.263009673074569, "grad_norm": 4.711328983306885, "learning_rate": 9.60580022980656e-06, "loss": 0.4625, "step": 26115 }, { "epoch": 4.263172931717072, "grad_norm": 2.9749457836151123, "learning_rate": 9.605159587598865e-06, "loss": 0.3643, "step": 26116 }, { "epoch": 4.263336190359577, "grad_norm": 4.0381975173950195, "learning_rate": 9.60451894701421e-06, "loss": 0.4689, "step": 26117 }, { "epoch": 4.263499449002081, "grad_norm": 4.479584693908691, "learning_rate": 9.603878308055225e-06, "loss": 0.9315, "step": 26118 }, { "epoch": 4.263662707644586, "grad_norm": 3.440251350402832, "learning_rate": 9.60323767072455e-06, "loss": 0.3473, "step": 26119 }, { "epoch": 4.26382596628709, "grad_norm": 3.823253631591797, "learning_rate": 9.602597035024816e-06, "loss": 0.4366, "step": 26120 }, { "epoch": 4.2639892249295945, "grad_norm": 2.9246203899383545, "learning_rate": 9.601956400958652e-06, "loss": 0.3449, "step": 26121 }, { "epoch": 4.264152483572099, "grad_norm": 3.3683855533599854, "learning_rate": 9.6013157685287e-06, "loss": 0.3656, "step": 26122 }, { "epoch": 4.264315742214603, "grad_norm": 3.2656755447387695, "learning_rate": 9.600675137737585e-06, "loss": 0.3891, "step": 26123 }, { "epoch": 4.264479000857108, "grad_norm": 3.61336350440979, "learning_rate": 9.600034508587944e-06, "loss": 0.3646, "step": 26124 }, { "epoch": 4.264642259499612, "grad_norm": 3.36751651763916, "learning_rate": 9.599393881082409e-06, "loss": 0.4189, "step": 26125 }, { "epoch": 4.264805518142117, "grad_norm": 2.756150484085083, "learning_rate": 9.598753255223616e-06, "loss": 0.3104, "step": 26126 }, { "epoch": 4.264968776784621, "grad_norm": 3.361354112625122, "learning_rate": 9.598112631014196e-06, "loss": 0.3663, "step": 26127 }, { "epoch": 4.265132035427126, "grad_norm": 4.066262722015381, "learning_rate": 9.597472008456785e-06, "loss": 0.4767, "step": 26128 }, { "epoch": 4.26529529406963, "grad_norm": 3.4371018409729004, "learning_rate": 9.596831387554012e-06, "loss": 0.3714, "step": 26129 }, { "epoch": 4.2654585527121345, "grad_norm": 3.5884945392608643, "learning_rate": 9.596190768308515e-06, "loss": 0.3466, "step": 26130 }, { "epoch": 4.265621811354639, "grad_norm": 2.773955821990967, "learning_rate": 9.595550150722925e-06, "loss": 0.2957, "step": 26131 }, { "epoch": 4.265785069997143, "grad_norm": 3.4249701499938965, "learning_rate": 9.594909534799877e-06, "loss": 0.3553, "step": 26132 }, { "epoch": 4.265948328639647, "grad_norm": 3.7013731002807617, "learning_rate": 9.594268920542e-06, "loss": 0.3759, "step": 26133 }, { "epoch": 4.266111587282151, "grad_norm": 3.396378755569458, "learning_rate": 9.593628307951932e-06, "loss": 0.3516, "step": 26134 }, { "epoch": 4.266274845924656, "grad_norm": 3.9266345500946045, "learning_rate": 9.592987697032303e-06, "loss": 0.4329, "step": 26135 }, { "epoch": 4.26643810456716, "grad_norm": 3.842582941055298, "learning_rate": 9.592347087785749e-06, "loss": 0.4108, "step": 26136 }, { "epoch": 4.266601363209665, "grad_norm": 2.8368735313415527, "learning_rate": 9.5917064802149e-06, "loss": 0.3889, "step": 26137 }, { "epoch": 4.266764621852169, "grad_norm": 4.509140491485596, "learning_rate": 9.591065874322393e-06, "loss": 0.4653, "step": 26138 }, { "epoch": 4.2669278804946735, "grad_norm": 3.1590800285339355, "learning_rate": 9.59042527011086e-06, "loss": 0.3556, "step": 26139 }, { "epoch": 4.267091139137178, "grad_norm": 2.7049992084503174, "learning_rate": 9.589784667582936e-06, "loss": 0.3281, "step": 26140 }, { "epoch": 4.267254397779682, "grad_norm": 3.7038140296936035, "learning_rate": 9.589144066741253e-06, "loss": 0.4016, "step": 26141 }, { "epoch": 4.267417656422187, "grad_norm": 4.0112433433532715, "learning_rate": 9.588503467588441e-06, "loss": 0.4812, "step": 26142 }, { "epoch": 4.267580915064691, "grad_norm": 3.1660876274108887, "learning_rate": 9.587862870127136e-06, "loss": 0.3909, "step": 26143 }, { "epoch": 4.267744173707196, "grad_norm": 3.434246301651001, "learning_rate": 9.58722227435997e-06, "loss": 0.3941, "step": 26144 }, { "epoch": 4.2679074323497, "grad_norm": 3.9406557083129883, "learning_rate": 9.586581680289579e-06, "loss": 0.4296, "step": 26145 }, { "epoch": 4.268070690992205, "grad_norm": 2.8406341075897217, "learning_rate": 9.585941087918596e-06, "loss": 0.361, "step": 26146 }, { "epoch": 4.268233949634709, "grad_norm": 3.1664247512817383, "learning_rate": 9.58530049724965e-06, "loss": 0.3652, "step": 26147 }, { "epoch": 4.2683972082772135, "grad_norm": 3.4540810585021973, "learning_rate": 9.584659908285377e-06, "loss": 0.4014, "step": 26148 }, { "epoch": 4.268560466919718, "grad_norm": 4.335822582244873, "learning_rate": 9.58401932102841e-06, "loss": 0.5206, "step": 26149 }, { "epoch": 4.268723725562222, "grad_norm": 3.586568832397461, "learning_rate": 9.583378735481385e-06, "loss": 0.4065, "step": 26150 }, { "epoch": 4.268886984204727, "grad_norm": 3.9241816997528076, "learning_rate": 9.582738151646936e-06, "loss": 0.388, "step": 26151 }, { "epoch": 4.26905024284723, "grad_norm": 3.8960487842559814, "learning_rate": 9.582097569527686e-06, "loss": 0.3573, "step": 26152 }, { "epoch": 4.269213501489735, "grad_norm": 3.7902379035949707, "learning_rate": 9.581456989126278e-06, "loss": 0.4468, "step": 26153 }, { "epoch": 4.269376760132239, "grad_norm": 4.252386569976807, "learning_rate": 9.580816410445341e-06, "loss": 0.4672, "step": 26154 }, { "epoch": 4.269540018774744, "grad_norm": 3.993342638015747, "learning_rate": 9.58017583348751e-06, "loss": 0.3711, "step": 26155 }, { "epoch": 4.269703277417248, "grad_norm": 3.797046184539795, "learning_rate": 9.579535258255419e-06, "loss": 0.3869, "step": 26156 }, { "epoch": 4.2698665360597525, "grad_norm": 2.968423366546631, "learning_rate": 9.578894684751698e-06, "loss": 0.3245, "step": 26157 }, { "epoch": 4.270029794702257, "grad_norm": 3.016303062438965, "learning_rate": 9.578254112978983e-06, "loss": 0.3474, "step": 26158 }, { "epoch": 4.270193053344761, "grad_norm": 3.5847644805908203, "learning_rate": 9.577613542939907e-06, "loss": 0.3824, "step": 26159 }, { "epoch": 4.270356311987266, "grad_norm": 4.438948631286621, "learning_rate": 9.576972974637097e-06, "loss": 0.3951, "step": 26160 }, { "epoch": 4.27051957062977, "grad_norm": 3.565150737762451, "learning_rate": 9.5763324080732e-06, "loss": 0.3655, "step": 26161 }, { "epoch": 4.270682829272275, "grad_norm": 3.150766372680664, "learning_rate": 9.575691843250838e-06, "loss": 0.4078, "step": 26162 }, { "epoch": 4.270846087914779, "grad_norm": 4.072969913482666, "learning_rate": 9.575051280172644e-06, "loss": 0.4238, "step": 26163 }, { "epoch": 4.271009346557284, "grad_norm": 3.2060465812683105, "learning_rate": 9.574410718841251e-06, "loss": 0.3623, "step": 26164 }, { "epoch": 4.271172605199788, "grad_norm": 3.44913911819458, "learning_rate": 9.573770159259298e-06, "loss": 0.3951, "step": 26165 }, { "epoch": 4.2713358638422925, "grad_norm": 3.3708817958831787, "learning_rate": 9.573129601429416e-06, "loss": 0.3684, "step": 26166 }, { "epoch": 4.271499122484797, "grad_norm": 3.341061592102051, "learning_rate": 9.572489045354237e-06, "loss": 0.3643, "step": 26167 }, { "epoch": 4.271662381127301, "grad_norm": 4.491390705108643, "learning_rate": 9.571848491036392e-06, "loss": 0.3621, "step": 26168 }, { "epoch": 4.271825639769805, "grad_norm": 3.0522689819335938, "learning_rate": 9.571207938478517e-06, "loss": 0.3221, "step": 26169 }, { "epoch": 4.271988898412309, "grad_norm": 3.100614070892334, "learning_rate": 9.570567387683245e-06, "loss": 0.3345, "step": 26170 }, { "epoch": 4.272152157054814, "grad_norm": 3.146160125732422, "learning_rate": 9.56992683865321e-06, "loss": 0.3084, "step": 26171 }, { "epoch": 4.272315415697318, "grad_norm": 3.155336618423462, "learning_rate": 9.569286291391042e-06, "loss": 0.3897, "step": 26172 }, { "epoch": 4.272478674339823, "grad_norm": 3.868192672729492, "learning_rate": 9.568645745899375e-06, "loss": 0.4098, "step": 26173 }, { "epoch": 4.272641932982327, "grad_norm": 3.1254665851593018, "learning_rate": 9.568005202180841e-06, "loss": 0.3366, "step": 26174 }, { "epoch": 4.2728051916248315, "grad_norm": 3.300429582595825, "learning_rate": 9.567364660238075e-06, "loss": 0.3395, "step": 26175 }, { "epoch": 4.272968450267336, "grad_norm": 4.344476222991943, "learning_rate": 9.56672412007371e-06, "loss": 0.4268, "step": 26176 }, { "epoch": 4.27313170890984, "grad_norm": 2.9870526790618896, "learning_rate": 9.566083581690378e-06, "loss": 0.3545, "step": 26177 }, { "epoch": 4.273294967552345, "grad_norm": 4.205277919769287, "learning_rate": 9.565443045090714e-06, "loss": 0.4149, "step": 26178 }, { "epoch": 4.273458226194849, "grad_norm": 3.079226493835449, "learning_rate": 9.564802510277348e-06, "loss": 0.3237, "step": 26179 }, { "epoch": 4.273621484837354, "grad_norm": 3.1379127502441406, "learning_rate": 9.564161977252917e-06, "loss": 0.3712, "step": 26180 }, { "epoch": 4.273784743479858, "grad_norm": 4.11292839050293, "learning_rate": 9.56352144602005e-06, "loss": 0.3742, "step": 26181 }, { "epoch": 4.273948002122363, "grad_norm": 4.36796760559082, "learning_rate": 9.562880916581382e-06, "loss": 0.3843, "step": 26182 }, { "epoch": 4.274111260764867, "grad_norm": 3.9638166427612305, "learning_rate": 9.562240388939546e-06, "loss": 0.607, "step": 26183 }, { "epoch": 4.2742745194073715, "grad_norm": 4.070375442504883, "learning_rate": 9.561599863097173e-06, "loss": 0.3988, "step": 26184 }, { "epoch": 4.274437778049876, "grad_norm": 3.851128339767456, "learning_rate": 9.560959339056897e-06, "loss": 0.4851, "step": 26185 }, { "epoch": 4.2746010366923795, "grad_norm": 4.183532238006592, "learning_rate": 9.560318816821354e-06, "loss": 0.4062, "step": 26186 }, { "epoch": 4.274764295334884, "grad_norm": 3.155956983566284, "learning_rate": 9.559678296393168e-06, "loss": 0.3635, "step": 26187 }, { "epoch": 4.274927553977388, "grad_norm": 3.5770981311798096, "learning_rate": 9.559037777774984e-06, "loss": 0.3533, "step": 26188 }, { "epoch": 4.275090812619893, "grad_norm": 2.7176156044006348, "learning_rate": 9.558397260969428e-06, "loss": 0.3351, "step": 26189 }, { "epoch": 4.275254071262397, "grad_norm": 4.218599796295166, "learning_rate": 9.557756745979138e-06, "loss": 0.4751, "step": 26190 }, { "epoch": 4.275417329904902, "grad_norm": 3.745203971862793, "learning_rate": 9.557116232806739e-06, "loss": 0.3903, "step": 26191 }, { "epoch": 4.275580588547406, "grad_norm": 3.7553224563598633, "learning_rate": 9.556475721454867e-06, "loss": 0.4056, "step": 26192 }, { "epoch": 4.2757438471899105, "grad_norm": 3.874753952026367, "learning_rate": 9.555835211926156e-06, "loss": 0.3475, "step": 26193 }, { "epoch": 4.275907105832415, "grad_norm": 3.4750962257385254, "learning_rate": 9.555194704223241e-06, "loss": 0.4057, "step": 26194 }, { "epoch": 4.276070364474919, "grad_norm": 3.5643794536590576, "learning_rate": 9.55455419834875e-06, "loss": 0.3608, "step": 26195 }, { "epoch": 4.276233623117424, "grad_norm": 3.904810905456543, "learning_rate": 9.553913694305322e-06, "loss": 0.384, "step": 26196 }, { "epoch": 4.276396881759928, "grad_norm": 3.5385985374450684, "learning_rate": 9.553273192095582e-06, "loss": 0.3857, "step": 26197 }, { "epoch": 4.276560140402433, "grad_norm": 3.763108015060425, "learning_rate": 9.552632691722168e-06, "loss": 0.4362, "step": 26198 }, { "epoch": 4.276723399044937, "grad_norm": 3.3181350231170654, "learning_rate": 9.551992193187713e-06, "loss": 0.3765, "step": 26199 }, { "epoch": 4.276886657687442, "grad_norm": 3.478193521499634, "learning_rate": 9.551351696494854e-06, "loss": 0.4399, "step": 26200 }, { "epoch": 4.277049916329946, "grad_norm": 3.9694173336029053, "learning_rate": 9.550711201646215e-06, "loss": 0.3871, "step": 26201 }, { "epoch": 4.2772131749724505, "grad_norm": 3.7690162658691406, "learning_rate": 9.550070708644428e-06, "loss": 0.3827, "step": 26202 }, { "epoch": 4.277376433614955, "grad_norm": 3.7356855869293213, "learning_rate": 9.549430217492136e-06, "loss": 0.4199, "step": 26203 }, { "epoch": 4.277539692257459, "grad_norm": 4.186577796936035, "learning_rate": 9.548789728191964e-06, "loss": 0.407, "step": 26204 }, { "epoch": 4.277702950899963, "grad_norm": 3.1350033283233643, "learning_rate": 9.548149240746547e-06, "loss": 0.3515, "step": 26205 }, { "epoch": 4.277866209542467, "grad_norm": 3.6042070388793945, "learning_rate": 9.547508755158519e-06, "loss": 0.3824, "step": 26206 }, { "epoch": 4.278029468184972, "grad_norm": 3.3902883529663086, "learning_rate": 9.546868271430511e-06, "loss": 0.3798, "step": 26207 }, { "epoch": 4.278192726827476, "grad_norm": 3.553558588027954, "learning_rate": 9.546227789565156e-06, "loss": 0.3654, "step": 26208 }, { "epoch": 4.278355985469981, "grad_norm": 3.1847405433654785, "learning_rate": 9.545587309565085e-06, "loss": 0.3159, "step": 26209 }, { "epoch": 4.278519244112485, "grad_norm": 2.833472967147827, "learning_rate": 9.544946831432941e-06, "loss": 0.3409, "step": 26210 }, { "epoch": 4.2786825027549895, "grad_norm": 3.2191073894500732, "learning_rate": 9.544306355171343e-06, "loss": 0.3832, "step": 26211 }, { "epoch": 4.278845761397494, "grad_norm": 3.032939910888672, "learning_rate": 9.54366588078293e-06, "loss": 0.3318, "step": 26212 }, { "epoch": 4.279009020039998, "grad_norm": 3.4935476779937744, "learning_rate": 9.543025408270333e-06, "loss": 0.3598, "step": 26213 }, { "epoch": 4.279172278682503, "grad_norm": 3.130099058151245, "learning_rate": 9.542384937636185e-06, "loss": 0.3365, "step": 26214 }, { "epoch": 4.279335537325007, "grad_norm": 3.089365243911743, "learning_rate": 9.541744468883122e-06, "loss": 0.3424, "step": 26215 }, { "epoch": 4.279498795967512, "grad_norm": 3.0477535724639893, "learning_rate": 9.541104002013775e-06, "loss": 0.3607, "step": 26216 }, { "epoch": 4.279662054610016, "grad_norm": 3.2509732246398926, "learning_rate": 9.540463537030776e-06, "loss": 0.3413, "step": 26217 }, { "epoch": 4.279825313252521, "grad_norm": 3.647423505783081, "learning_rate": 9.539823073936756e-06, "loss": 0.4052, "step": 26218 }, { "epoch": 4.279988571895025, "grad_norm": 2.982353925704956, "learning_rate": 9.539182612734353e-06, "loss": 0.3386, "step": 26219 }, { "epoch": 4.2801518305375295, "grad_norm": 3.458190679550171, "learning_rate": 9.538542153426195e-06, "loss": 0.401, "step": 26220 }, { "epoch": 4.280315089180034, "grad_norm": 2.8456835746765137, "learning_rate": 9.537901696014915e-06, "loss": 0.3186, "step": 26221 }, { "epoch": 4.280478347822537, "grad_norm": 3.3929555416107178, "learning_rate": 9.537261240503148e-06, "loss": 0.3769, "step": 26222 }, { "epoch": 4.280641606465042, "grad_norm": 2.882797956466675, "learning_rate": 9.536620786893524e-06, "loss": 0.3218, "step": 26223 }, { "epoch": 4.280804865107546, "grad_norm": 4.141293525695801, "learning_rate": 9.535980335188678e-06, "loss": 0.4221, "step": 26224 }, { "epoch": 4.280968123750051, "grad_norm": 4.231926918029785, "learning_rate": 9.535339885391237e-06, "loss": 0.3345, "step": 26225 }, { "epoch": 4.281131382392555, "grad_norm": 3.4502830505371094, "learning_rate": 9.534699437503843e-06, "loss": 0.2875, "step": 26226 }, { "epoch": 4.28129464103506, "grad_norm": 2.778076648712158, "learning_rate": 9.534058991529122e-06, "loss": 0.3425, "step": 26227 }, { "epoch": 4.281457899677564, "grad_norm": 4.06363582611084, "learning_rate": 9.53341854746971e-06, "loss": 0.4081, "step": 26228 }, { "epoch": 4.2816211583200685, "grad_norm": 4.0949530601501465, "learning_rate": 9.53277810532824e-06, "loss": 0.3853, "step": 26229 }, { "epoch": 4.281784416962573, "grad_norm": 3.8320746421813965, "learning_rate": 9.53213766510734e-06, "loss": 0.374, "step": 26230 }, { "epoch": 4.281947675605077, "grad_norm": 3.7415120601654053, "learning_rate": 9.531497226809647e-06, "loss": 0.3185, "step": 26231 }, { "epoch": 4.282110934247582, "grad_norm": 3.4109907150268555, "learning_rate": 9.53085679043779e-06, "loss": 0.3569, "step": 26232 }, { "epoch": 4.282274192890086, "grad_norm": 3.012603998184204, "learning_rate": 9.530216355994403e-06, "loss": 0.3765, "step": 26233 }, { "epoch": 4.282437451532591, "grad_norm": 2.732550859451294, "learning_rate": 9.529575923482122e-06, "loss": 0.322, "step": 26234 }, { "epoch": 4.282600710175095, "grad_norm": 3.3530635833740234, "learning_rate": 9.528935492903575e-06, "loss": 0.3382, "step": 26235 }, { "epoch": 4.2827639688176, "grad_norm": 3.317106008529663, "learning_rate": 9.528295064261394e-06, "loss": 0.3713, "step": 26236 }, { "epoch": 4.282927227460104, "grad_norm": 3.20381498336792, "learning_rate": 9.527654637558218e-06, "loss": 0.3416, "step": 26237 }, { "epoch": 4.2830904861026085, "grad_norm": 3.4262614250183105, "learning_rate": 9.527014212796674e-06, "loss": 0.347, "step": 26238 }, { "epoch": 4.283253744745112, "grad_norm": 3.7542316913604736, "learning_rate": 9.5263737899794e-06, "loss": 0.3767, "step": 26239 }, { "epoch": 4.283417003387616, "grad_norm": 4.4487481117248535, "learning_rate": 9.525733369109018e-06, "loss": 0.3933, "step": 26240 }, { "epoch": 4.283580262030121, "grad_norm": 3.094266891479492, "learning_rate": 9.52509295018817e-06, "loss": 0.3294, "step": 26241 }, { "epoch": 4.283743520672625, "grad_norm": 2.9146318435668945, "learning_rate": 9.524452533219485e-06, "loss": 0.3258, "step": 26242 }, { "epoch": 4.28390677931513, "grad_norm": 3.209871292114258, "learning_rate": 9.523812118205596e-06, "loss": 0.3507, "step": 26243 }, { "epoch": 4.284070037957634, "grad_norm": 3.8203024864196777, "learning_rate": 9.523171705149136e-06, "loss": 0.3722, "step": 26244 }, { "epoch": 4.284233296600139, "grad_norm": 2.963095188140869, "learning_rate": 9.522531294052736e-06, "loss": 0.3347, "step": 26245 }, { "epoch": 4.284396555242643, "grad_norm": 2.8663158416748047, "learning_rate": 9.52189088491903e-06, "loss": 0.3289, "step": 26246 }, { "epoch": 4.2845598138851475, "grad_norm": 2.9585318565368652, "learning_rate": 9.521250477750651e-06, "loss": 0.3194, "step": 26247 }, { "epoch": 4.284723072527652, "grad_norm": 3.1004085540771484, "learning_rate": 9.520610072550228e-06, "loss": 0.3613, "step": 26248 }, { "epoch": 4.284886331170156, "grad_norm": 3.497832775115967, "learning_rate": 9.519969669320402e-06, "loss": 0.3683, "step": 26249 }, { "epoch": 4.285049589812661, "grad_norm": 3.608952522277832, "learning_rate": 9.519329268063795e-06, "loss": 0.4064, "step": 26250 }, { "epoch": 4.285212848455165, "grad_norm": 3.3981714248657227, "learning_rate": 9.518688868783045e-06, "loss": 0.348, "step": 26251 }, { "epoch": 4.28537610709767, "grad_norm": 3.4712576866149902, "learning_rate": 9.51804847148078e-06, "loss": 0.3658, "step": 26252 }, { "epoch": 4.285539365740174, "grad_norm": 3.28625750541687, "learning_rate": 9.51740807615964e-06, "loss": 0.3806, "step": 26253 }, { "epoch": 4.285702624382679, "grad_norm": 3.797469139099121, "learning_rate": 9.516767682822249e-06, "loss": 0.3786, "step": 26254 }, { "epoch": 4.285865883025183, "grad_norm": 3.855152130126953, "learning_rate": 9.516127291471246e-06, "loss": 0.415, "step": 26255 }, { "epoch": 4.2860291416676874, "grad_norm": 3.2616708278656006, "learning_rate": 9.515486902109263e-06, "loss": 0.3664, "step": 26256 }, { "epoch": 4.286192400310192, "grad_norm": 3.841986894607544, "learning_rate": 9.514846514738927e-06, "loss": 0.4445, "step": 26257 }, { "epoch": 4.286355658952695, "grad_norm": 3.1071133613586426, "learning_rate": 9.514206129362876e-06, "loss": 0.3253, "step": 26258 }, { "epoch": 4.2865189175952, "grad_norm": 3.438941717147827, "learning_rate": 9.513565745983742e-06, "loss": 0.3435, "step": 26259 }, { "epoch": 4.286682176237704, "grad_norm": 3.7200939655303955, "learning_rate": 9.512925364604151e-06, "loss": 0.3623, "step": 26260 }, { "epoch": 4.286845434880209, "grad_norm": 3.5109503269195557, "learning_rate": 9.512284985226744e-06, "loss": 0.3601, "step": 26261 }, { "epoch": 4.287008693522713, "grad_norm": 4.131597995758057, "learning_rate": 9.511644607854146e-06, "loss": 0.3658, "step": 26262 }, { "epoch": 4.287171952165218, "grad_norm": 3.8140761852264404, "learning_rate": 9.511004232488992e-06, "loss": 0.3623, "step": 26263 }, { "epoch": 4.287335210807722, "grad_norm": 4.250408172607422, "learning_rate": 9.510363859133916e-06, "loss": 0.3655, "step": 26264 }, { "epoch": 4.2874984694502265, "grad_norm": 2.587390184402466, "learning_rate": 9.50972348779155e-06, "loss": 0.2939, "step": 26265 }, { "epoch": 4.287661728092731, "grad_norm": 4.055261135101318, "learning_rate": 9.509083118464526e-06, "loss": 0.4124, "step": 26266 }, { "epoch": 4.287824986735235, "grad_norm": 3.9223151206970215, "learning_rate": 9.508442751155475e-06, "loss": 0.3644, "step": 26267 }, { "epoch": 4.28798824537774, "grad_norm": 3.6946253776550293, "learning_rate": 9.507802385867031e-06, "loss": 0.3754, "step": 26268 }, { "epoch": 4.288151504020244, "grad_norm": 3.2727982997894287, "learning_rate": 9.507162022601828e-06, "loss": 0.3512, "step": 26269 }, { "epoch": 4.288314762662749, "grad_norm": 3.7816624641418457, "learning_rate": 9.506521661362494e-06, "loss": 0.3501, "step": 26270 }, { "epoch": 4.288478021305253, "grad_norm": 4.07548713684082, "learning_rate": 9.505881302151661e-06, "loss": 0.3821, "step": 26271 }, { "epoch": 4.288641279947758, "grad_norm": 3.661674737930298, "learning_rate": 9.505240944971965e-06, "loss": 0.3713, "step": 26272 }, { "epoch": 4.288804538590262, "grad_norm": 3.278975248336792, "learning_rate": 9.504600589826037e-06, "loss": 0.3466, "step": 26273 }, { "epoch": 4.288967797232766, "grad_norm": 4.881348609924316, "learning_rate": 9.503960236716508e-06, "loss": 0.4649, "step": 26274 }, { "epoch": 4.28913105587527, "grad_norm": 4.316627502441406, "learning_rate": 9.50331988564601e-06, "loss": 0.7336, "step": 26275 }, { "epoch": 4.289294314517774, "grad_norm": 3.060868263244629, "learning_rate": 9.502679536617177e-06, "loss": 0.3619, "step": 26276 }, { "epoch": 4.289457573160279, "grad_norm": 3.5809848308563232, "learning_rate": 9.502039189632642e-06, "loss": 0.3955, "step": 26277 }, { "epoch": 4.289620831802783, "grad_norm": 3.7403125762939453, "learning_rate": 9.50139884469504e-06, "loss": 0.4141, "step": 26278 }, { "epoch": 4.289784090445288, "grad_norm": 2.6627402305603027, "learning_rate": 9.500758501806991e-06, "loss": 0.3382, "step": 26279 }, { "epoch": 4.289947349087792, "grad_norm": 3.4184775352478027, "learning_rate": 9.500118160971139e-06, "loss": 0.3579, "step": 26280 }, { "epoch": 4.290110607730297, "grad_norm": 4.333200931549072, "learning_rate": 9.499477822190111e-06, "loss": 0.3835, "step": 26281 }, { "epoch": 4.290273866372801, "grad_norm": 3.043403387069702, "learning_rate": 9.498837485466541e-06, "loss": 0.3578, "step": 26282 }, { "epoch": 4.2904371250153055, "grad_norm": 3.7420825958251953, "learning_rate": 9.498197150803064e-06, "loss": 0.4116, "step": 26283 }, { "epoch": 4.29060038365781, "grad_norm": 3.409419059753418, "learning_rate": 9.497556818202306e-06, "loss": 0.3612, "step": 26284 }, { "epoch": 4.290763642300314, "grad_norm": 3.630760431289673, "learning_rate": 9.496916487666903e-06, "loss": 0.3751, "step": 26285 }, { "epoch": 4.290926900942819, "grad_norm": 3.163893461227417, "learning_rate": 9.496276159199485e-06, "loss": 0.3738, "step": 26286 }, { "epoch": 4.291090159585323, "grad_norm": 3.7457597255706787, "learning_rate": 9.495635832802685e-06, "loss": 0.3856, "step": 26287 }, { "epoch": 4.291253418227828, "grad_norm": 4.075952529907227, "learning_rate": 9.494995508479143e-06, "loss": 0.4175, "step": 26288 }, { "epoch": 4.291416676870332, "grad_norm": 3.1331026554107666, "learning_rate": 9.494355186231478e-06, "loss": 0.3279, "step": 26289 }, { "epoch": 4.291579935512837, "grad_norm": 3.780592918395996, "learning_rate": 9.493714866062325e-06, "loss": 0.3051, "step": 26290 }, { "epoch": 4.291743194155341, "grad_norm": 3.46838641166687, "learning_rate": 9.493074547974324e-06, "loss": 0.3596, "step": 26291 }, { "epoch": 4.2919064527978446, "grad_norm": 3.5318450927734375, "learning_rate": 9.492434231970099e-06, "loss": 0.3648, "step": 26292 }, { "epoch": 4.292069711440349, "grad_norm": 4.206240177154541, "learning_rate": 9.491793918052286e-06, "loss": 0.4351, "step": 26293 }, { "epoch": 4.292232970082853, "grad_norm": 3.2831907272338867, "learning_rate": 9.491153606223518e-06, "loss": 0.3614, "step": 26294 }, { "epoch": 4.292396228725358, "grad_norm": 3.409982919692993, "learning_rate": 9.490513296486423e-06, "loss": 0.4263, "step": 26295 }, { "epoch": 4.292559487367862, "grad_norm": 4.953678607940674, "learning_rate": 9.489872988843637e-06, "loss": 0.4105, "step": 26296 }, { "epoch": 4.292722746010367, "grad_norm": 3.8639588356018066, "learning_rate": 9.489232683297789e-06, "loss": 0.4407, "step": 26297 }, { "epoch": 4.292886004652871, "grad_norm": 3.4118967056274414, "learning_rate": 9.488592379851518e-06, "loss": 0.3666, "step": 26298 }, { "epoch": 4.293049263295376, "grad_norm": 3.9450278282165527, "learning_rate": 9.487952078507447e-06, "loss": 0.4085, "step": 26299 }, { "epoch": 4.29321252193788, "grad_norm": 3.6628758907318115, "learning_rate": 9.48731177926821e-06, "loss": 0.3516, "step": 26300 }, { "epoch": 4.2933757805803845, "grad_norm": 3.6913907527923584, "learning_rate": 9.486671482136442e-06, "loss": 0.4061, "step": 26301 }, { "epoch": 4.293539039222889, "grad_norm": 4.290119647979736, "learning_rate": 9.486031187114772e-06, "loss": 0.4115, "step": 26302 }, { "epoch": 4.293702297865393, "grad_norm": 4.0497260093688965, "learning_rate": 9.485390894205833e-06, "loss": 0.3494, "step": 26303 }, { "epoch": 4.293865556507898, "grad_norm": 3.311838388442993, "learning_rate": 9.484750603412261e-06, "loss": 0.3627, "step": 26304 }, { "epoch": 4.294028815150402, "grad_norm": 3.9924561977386475, "learning_rate": 9.484110314736683e-06, "loss": 0.4315, "step": 26305 }, { "epoch": 4.294192073792907, "grad_norm": 4.1039276123046875, "learning_rate": 9.483470028181734e-06, "loss": 0.4038, "step": 26306 }, { "epoch": 4.294355332435411, "grad_norm": 3.598994255065918, "learning_rate": 9.482829743750044e-06, "loss": 0.4085, "step": 26307 }, { "epoch": 4.294518591077916, "grad_norm": 3.7634196281433105, "learning_rate": 9.482189461444248e-06, "loss": 0.3567, "step": 26308 }, { "epoch": 4.294681849720419, "grad_norm": 3.4713475704193115, "learning_rate": 9.481549181266973e-06, "loss": 0.344, "step": 26309 }, { "epoch": 4.2948451083629235, "grad_norm": 3.534301519393921, "learning_rate": 9.480908903220854e-06, "loss": 0.3673, "step": 26310 }, { "epoch": 4.295008367005428, "grad_norm": 3.558448076248169, "learning_rate": 9.480268627308522e-06, "loss": 0.4008, "step": 26311 }, { "epoch": 4.295171625647932, "grad_norm": 3.3468070030212402, "learning_rate": 9.479628353532609e-06, "loss": 0.3637, "step": 26312 }, { "epoch": 4.295334884290437, "grad_norm": 3.0080788135528564, "learning_rate": 9.478988081895745e-06, "loss": 0.3504, "step": 26313 }, { "epoch": 4.295498142932941, "grad_norm": 2.8417811393737793, "learning_rate": 9.47834781240057e-06, "loss": 0.3686, "step": 26314 }, { "epoch": 4.295661401575446, "grad_norm": 3.411734104156494, "learning_rate": 9.477707545049706e-06, "loss": 0.389, "step": 26315 }, { "epoch": 4.29582466021795, "grad_norm": 3.9541163444519043, "learning_rate": 9.47706727984579e-06, "loss": 0.3545, "step": 26316 }, { "epoch": 4.295987918860455, "grad_norm": 3.2410619258880615, "learning_rate": 9.476427016791456e-06, "loss": 0.3551, "step": 26317 }, { "epoch": 4.296151177502959, "grad_norm": 3.766538381576538, "learning_rate": 9.47578675588933e-06, "loss": 0.3989, "step": 26318 }, { "epoch": 4.2963144361454635, "grad_norm": 4.6977434158325195, "learning_rate": 9.475146497142047e-06, "loss": 0.4817, "step": 26319 }, { "epoch": 4.296477694787968, "grad_norm": 4.317713737487793, "learning_rate": 9.47450624055224e-06, "loss": 0.4577, "step": 26320 }, { "epoch": 4.296640953430472, "grad_norm": 3.27681827545166, "learning_rate": 9.473865986122537e-06, "loss": 0.3379, "step": 26321 }, { "epoch": 4.296804212072977, "grad_norm": 3.6754329204559326, "learning_rate": 9.473225733855574e-06, "loss": 0.4239, "step": 26322 }, { "epoch": 4.296967470715481, "grad_norm": 3.834155797958374, "learning_rate": 9.47258548375398e-06, "loss": 0.3848, "step": 26323 }, { "epoch": 4.297130729357986, "grad_norm": 3.108820915222168, "learning_rate": 9.471945235820386e-06, "loss": 0.3708, "step": 26324 }, { "epoch": 4.29729398800049, "grad_norm": 3.3752505779266357, "learning_rate": 9.471304990057428e-06, "loss": 0.3439, "step": 26325 }, { "epoch": 4.2974572466429946, "grad_norm": 2.917855978012085, "learning_rate": 9.470664746467736e-06, "loss": 0.34, "step": 26326 }, { "epoch": 4.297620505285499, "grad_norm": 3.5613999366760254, "learning_rate": 9.470024505053945e-06, "loss": 0.4172, "step": 26327 }, { "epoch": 4.2977837639280025, "grad_norm": 2.8308334350585938, "learning_rate": 9.469384265818679e-06, "loss": 0.3043, "step": 26328 }, { "epoch": 4.297947022570507, "grad_norm": 3.6991770267486572, "learning_rate": 9.468744028764571e-06, "loss": 0.4881, "step": 26329 }, { "epoch": 4.298110281213011, "grad_norm": 2.929008960723877, "learning_rate": 9.46810379389426e-06, "loss": 0.3326, "step": 26330 }, { "epoch": 4.298273539855516, "grad_norm": 3.4126482009887695, "learning_rate": 9.467463561210372e-06, "loss": 0.381, "step": 26331 }, { "epoch": 4.29843679849802, "grad_norm": 2.999652147293091, "learning_rate": 9.46682333071554e-06, "loss": 0.3664, "step": 26332 }, { "epoch": 4.298600057140525, "grad_norm": 4.0542497634887695, "learning_rate": 9.466183102412397e-06, "loss": 0.4436, "step": 26333 }, { "epoch": 4.298763315783029, "grad_norm": 3.19285249710083, "learning_rate": 9.465542876303573e-06, "loss": 0.3935, "step": 26334 }, { "epoch": 4.298926574425534, "grad_norm": 3.257603406906128, "learning_rate": 9.4649026523917e-06, "loss": 0.3791, "step": 26335 }, { "epoch": 4.299089833068038, "grad_norm": 3.3469748497009277, "learning_rate": 9.464262430679409e-06, "loss": 0.3796, "step": 26336 }, { "epoch": 4.2992530917105425, "grad_norm": 4.093250751495361, "learning_rate": 9.46362221116934e-06, "loss": 0.4242, "step": 26337 }, { "epoch": 4.299416350353047, "grad_norm": 3.132410764694214, "learning_rate": 9.462981993864112e-06, "loss": 0.3763, "step": 26338 }, { "epoch": 4.299579608995551, "grad_norm": 3.402233123779297, "learning_rate": 9.46234177876636e-06, "loss": 0.3333, "step": 26339 }, { "epoch": 4.299742867638056, "grad_norm": 3.2821640968322754, "learning_rate": 9.46170156587872e-06, "loss": 0.3725, "step": 26340 }, { "epoch": 4.29990612628056, "grad_norm": 3.189540386199951, "learning_rate": 9.46106135520382e-06, "loss": 0.359, "step": 26341 }, { "epoch": 4.300069384923065, "grad_norm": 3.7799103260040283, "learning_rate": 9.460421146744297e-06, "loss": 0.405, "step": 26342 }, { "epoch": 4.300232643565569, "grad_norm": 4.180637836456299, "learning_rate": 9.459780940502776e-06, "loss": 0.4008, "step": 26343 }, { "epoch": 4.3003959022080736, "grad_norm": 4.224190711975098, "learning_rate": 9.459140736481892e-06, "loss": 0.4155, "step": 26344 }, { "epoch": 4.300559160850577, "grad_norm": 3.3939208984375, "learning_rate": 9.458500534684277e-06, "loss": 0.3736, "step": 26345 }, { "epoch": 4.3007224194930815, "grad_norm": 3.268354892730713, "learning_rate": 9.457860335112562e-06, "loss": 0.3744, "step": 26346 }, { "epoch": 4.300885678135586, "grad_norm": 3.8488824367523193, "learning_rate": 9.45722013776938e-06, "loss": 0.3882, "step": 26347 }, { "epoch": 4.30104893677809, "grad_norm": 2.591676950454712, "learning_rate": 9.45657994265736e-06, "loss": 0.2974, "step": 26348 }, { "epoch": 4.301212195420595, "grad_norm": 3.2960093021392822, "learning_rate": 9.455939749779134e-06, "loss": 0.3871, "step": 26349 }, { "epoch": 4.301375454063099, "grad_norm": 2.750455141067505, "learning_rate": 9.455299559137333e-06, "loss": 0.3182, "step": 26350 }, { "epoch": 4.301538712705604, "grad_norm": 3.2647488117218018, "learning_rate": 9.454659370734589e-06, "loss": 0.3684, "step": 26351 }, { "epoch": 4.301701971348108, "grad_norm": 3.972559928894043, "learning_rate": 9.454019184573536e-06, "loss": 0.7934, "step": 26352 }, { "epoch": 4.301865229990613, "grad_norm": 4.2089104652404785, "learning_rate": 9.453379000656805e-06, "loss": 0.4018, "step": 26353 }, { "epoch": 4.302028488633117, "grad_norm": 3.0687668323516846, "learning_rate": 9.452738818987026e-06, "loss": 0.3263, "step": 26354 }, { "epoch": 4.3021917472756215, "grad_norm": 3.234917640686035, "learning_rate": 9.45209863956683e-06, "loss": 0.3398, "step": 26355 }, { "epoch": 4.302355005918126, "grad_norm": 3.766815423965454, "learning_rate": 9.451458462398851e-06, "loss": 0.3733, "step": 26356 }, { "epoch": 4.30251826456063, "grad_norm": 2.5095107555389404, "learning_rate": 9.450818287485722e-06, "loss": 0.3065, "step": 26357 }, { "epoch": 4.302681523203135, "grad_norm": 3.340496063232422, "learning_rate": 9.450178114830067e-06, "loss": 0.3895, "step": 26358 }, { "epoch": 4.302844781845639, "grad_norm": 3.4333853721618652, "learning_rate": 9.449537944434524e-06, "loss": 0.3729, "step": 26359 }, { "epoch": 4.303008040488144, "grad_norm": 3.9244186878204346, "learning_rate": 9.448897776301722e-06, "loss": 0.3762, "step": 26360 }, { "epoch": 4.303171299130648, "grad_norm": 3.0536482334136963, "learning_rate": 9.448257610434293e-06, "loss": 0.3424, "step": 26361 }, { "epoch": 4.303334557773152, "grad_norm": 4.352021217346191, "learning_rate": 9.44761744683487e-06, "loss": 0.5402, "step": 26362 }, { "epoch": 4.303497816415656, "grad_norm": 3.195747137069702, "learning_rate": 9.446977285506078e-06, "loss": 0.3669, "step": 26363 }, { "epoch": 4.3036610750581605, "grad_norm": 4.022076606750488, "learning_rate": 9.44633712645056e-06, "loss": 0.446, "step": 26364 }, { "epoch": 4.303824333700665, "grad_norm": 4.0821452140808105, "learning_rate": 9.445696969670937e-06, "loss": 0.4354, "step": 26365 }, { "epoch": 4.303987592343169, "grad_norm": 3.8171534538269043, "learning_rate": 9.445056815169851e-06, "loss": 0.4942, "step": 26366 }, { "epoch": 4.304150850985674, "grad_norm": 3.3553242683410645, "learning_rate": 9.444416662949919e-06, "loss": 0.3917, "step": 26367 }, { "epoch": 4.304314109628178, "grad_norm": 3.533986806869507, "learning_rate": 9.443776513013784e-06, "loss": 0.3573, "step": 26368 }, { "epoch": 4.304477368270683, "grad_norm": 3.1935040950775146, "learning_rate": 9.443136365364073e-06, "loss": 0.3811, "step": 26369 }, { "epoch": 4.304640626913187, "grad_norm": 3.3924922943115234, "learning_rate": 9.442496220003417e-06, "loss": 0.4052, "step": 26370 }, { "epoch": 4.304803885555692, "grad_norm": 3.46299147605896, "learning_rate": 9.44185607693445e-06, "loss": 0.3632, "step": 26371 }, { "epoch": 4.304967144198196, "grad_norm": 3.5925216674804688, "learning_rate": 9.441215936159802e-06, "loss": 0.3923, "step": 26372 }, { "epoch": 4.3051304028407005, "grad_norm": 3.127042531967163, "learning_rate": 9.440575797682103e-06, "loss": 0.3898, "step": 26373 }, { "epoch": 4.305293661483205, "grad_norm": 3.5374858379364014, "learning_rate": 9.439935661503985e-06, "loss": 0.3189, "step": 26374 }, { "epoch": 4.305456920125709, "grad_norm": 3.8003671169281006, "learning_rate": 9.439295527628083e-06, "loss": 0.3986, "step": 26375 }, { "epoch": 4.305620178768214, "grad_norm": 3.5393881797790527, "learning_rate": 9.438655396057028e-06, "loss": 0.3793, "step": 26376 }, { "epoch": 4.305783437410718, "grad_norm": 4.116067409515381, "learning_rate": 9.438015266793443e-06, "loss": 0.3966, "step": 26377 }, { "epoch": 4.305946696053223, "grad_norm": 3.9204795360565186, "learning_rate": 9.437375139839965e-06, "loss": 0.384, "step": 26378 }, { "epoch": 4.306109954695727, "grad_norm": 3.392962694168091, "learning_rate": 9.436735015199227e-06, "loss": 0.4031, "step": 26379 }, { "epoch": 4.3062732133382315, "grad_norm": 3.47306752204895, "learning_rate": 9.436094892873858e-06, "loss": 0.3614, "step": 26380 }, { "epoch": 4.306436471980735, "grad_norm": 3.8255324363708496, "learning_rate": 9.43545477286649e-06, "loss": 0.3702, "step": 26381 }, { "epoch": 4.3065997306232395, "grad_norm": 3.636345148086548, "learning_rate": 9.434814655179756e-06, "loss": 0.3491, "step": 26382 }, { "epoch": 4.306762989265744, "grad_norm": 3.4656026363372803, "learning_rate": 9.434174539816285e-06, "loss": 0.3432, "step": 26383 }, { "epoch": 4.306926247908248, "grad_norm": 3.5778915882110596, "learning_rate": 9.433534426778708e-06, "loss": 0.3948, "step": 26384 }, { "epoch": 4.307089506550753, "grad_norm": 2.7430193424224854, "learning_rate": 9.432894316069656e-06, "loss": 0.3406, "step": 26385 }, { "epoch": 4.307252765193257, "grad_norm": 4.09063720703125, "learning_rate": 9.432254207691766e-06, "loss": 0.4116, "step": 26386 }, { "epoch": 4.307416023835762, "grad_norm": 3.2672102451324463, "learning_rate": 9.431614101647662e-06, "loss": 0.3745, "step": 26387 }, { "epoch": 4.307579282478266, "grad_norm": 3.3762826919555664, "learning_rate": 9.430973997939976e-06, "loss": 0.3501, "step": 26388 }, { "epoch": 4.307742541120771, "grad_norm": 3.7099454402923584, "learning_rate": 9.430333896571343e-06, "loss": 0.3353, "step": 26389 }, { "epoch": 4.307905799763275, "grad_norm": 3.625676393508911, "learning_rate": 9.429693797544388e-06, "loss": 0.3881, "step": 26390 }, { "epoch": 4.3080690584057795, "grad_norm": 3.7893056869506836, "learning_rate": 9.42905370086175e-06, "loss": 0.3667, "step": 26391 }, { "epoch": 4.308232317048284, "grad_norm": 3.032191038131714, "learning_rate": 9.428413606526056e-06, "loss": 0.3573, "step": 26392 }, { "epoch": 4.308395575690788, "grad_norm": 3.2467267513275146, "learning_rate": 9.427773514539938e-06, "loss": 0.3331, "step": 26393 }, { "epoch": 4.308558834333293, "grad_norm": 3.9372780323028564, "learning_rate": 9.427133424906028e-06, "loss": 0.4215, "step": 26394 }, { "epoch": 4.308722092975797, "grad_norm": 3.0638413429260254, "learning_rate": 9.426493337626954e-06, "loss": 0.3665, "step": 26395 }, { "epoch": 4.308885351618302, "grad_norm": 3.097172260284424, "learning_rate": 9.425853252705355e-06, "loss": 0.3613, "step": 26396 }, { "epoch": 4.309048610260806, "grad_norm": 3.225390911102295, "learning_rate": 9.425213170143852e-06, "loss": 0.3343, "step": 26397 }, { "epoch": 4.30921186890331, "grad_norm": 2.722407102584839, "learning_rate": 9.424573089945081e-06, "loss": 0.3046, "step": 26398 }, { "epoch": 4.309375127545814, "grad_norm": 3.4774768352508545, "learning_rate": 9.423933012111672e-06, "loss": 0.3713, "step": 26399 }, { "epoch": 4.3095383861883185, "grad_norm": 2.884622812271118, "learning_rate": 9.423292936646258e-06, "loss": 0.3749, "step": 26400 }, { "epoch": 4.309701644830823, "grad_norm": 4.063304901123047, "learning_rate": 9.422652863551466e-06, "loss": 0.432, "step": 26401 }, { "epoch": 4.309864903473327, "grad_norm": 3.127290964126587, "learning_rate": 9.422012792829932e-06, "loss": 0.3349, "step": 26402 }, { "epoch": 4.310028162115832, "grad_norm": 3.1508500576019287, "learning_rate": 9.421372724484286e-06, "loss": 0.3642, "step": 26403 }, { "epoch": 4.310191420758336, "grad_norm": 3.8839476108551025, "learning_rate": 9.420732658517158e-06, "loss": 0.4039, "step": 26404 }, { "epoch": 4.310354679400841, "grad_norm": 3.276718854904175, "learning_rate": 9.42009259493118e-06, "loss": 0.3634, "step": 26405 }, { "epoch": 4.310517938043345, "grad_norm": 4.132636547088623, "learning_rate": 9.419452533728983e-06, "loss": 0.4605, "step": 26406 }, { "epoch": 4.31068119668585, "grad_norm": 3.4208755493164062, "learning_rate": 9.418812474913196e-06, "loss": 0.3422, "step": 26407 }, { "epoch": 4.310844455328354, "grad_norm": 3.447230577468872, "learning_rate": 9.418172418486452e-06, "loss": 0.3572, "step": 26408 }, { "epoch": 4.3110077139708585, "grad_norm": 3.0203959941864014, "learning_rate": 9.41753236445138e-06, "loss": 0.3516, "step": 26409 }, { "epoch": 4.311170972613363, "grad_norm": 3.6102938652038574, "learning_rate": 9.416892312810615e-06, "loss": 0.3669, "step": 26410 }, { "epoch": 4.311334231255867, "grad_norm": 3.7667627334594727, "learning_rate": 9.416252263566784e-06, "loss": 0.3568, "step": 26411 }, { "epoch": 4.311497489898372, "grad_norm": 2.6395959854125977, "learning_rate": 9.415612216722519e-06, "loss": 0.2902, "step": 26412 }, { "epoch": 4.311660748540876, "grad_norm": 2.7276382446289062, "learning_rate": 9.414972172280449e-06, "loss": 0.2881, "step": 26413 }, { "epoch": 4.311824007183381, "grad_norm": 3.8382670879364014, "learning_rate": 9.414332130243211e-06, "loss": 0.3633, "step": 26414 }, { "epoch": 4.311987265825884, "grad_norm": 3.4544849395751953, "learning_rate": 9.413692090613437e-06, "loss": 0.3907, "step": 26415 }, { "epoch": 4.312150524468389, "grad_norm": 3.8218493461608887, "learning_rate": 9.413052053393749e-06, "loss": 0.3455, "step": 26416 }, { "epoch": 4.312313783110893, "grad_norm": 3.043537139892578, "learning_rate": 9.412412018586779e-06, "loss": 0.3289, "step": 26417 }, { "epoch": 4.3124770417533975, "grad_norm": 4.37878942489624, "learning_rate": 9.411771986195164e-06, "loss": 0.4319, "step": 26418 }, { "epoch": 4.312640300395902, "grad_norm": 3.8867833614349365, "learning_rate": 9.411131956221534e-06, "loss": 0.4217, "step": 26419 }, { "epoch": 4.312803559038406, "grad_norm": 3.2711994647979736, "learning_rate": 9.410491928668515e-06, "loss": 0.3619, "step": 26420 }, { "epoch": 4.312966817680911, "grad_norm": 3.651381015777588, "learning_rate": 9.409851903538744e-06, "loss": 0.3981, "step": 26421 }, { "epoch": 4.313130076323415, "grad_norm": 3.5532469749450684, "learning_rate": 9.409211880834847e-06, "loss": 0.3972, "step": 26422 }, { "epoch": 4.31329333496592, "grad_norm": 4.069911003112793, "learning_rate": 9.408571860559458e-06, "loss": 0.4278, "step": 26423 }, { "epoch": 4.313456593608424, "grad_norm": 3.660759449005127, "learning_rate": 9.407931842715204e-06, "loss": 0.4072, "step": 26424 }, { "epoch": 4.313619852250929, "grad_norm": 3.4589016437530518, "learning_rate": 9.407291827304726e-06, "loss": 0.3568, "step": 26425 }, { "epoch": 4.313783110893433, "grad_norm": 3.515089988708496, "learning_rate": 9.406651814330642e-06, "loss": 0.3695, "step": 26426 }, { "epoch": 4.3139463695359375, "grad_norm": 3.6400632858276367, "learning_rate": 9.406011803795588e-06, "loss": 0.3322, "step": 26427 }, { "epoch": 4.314109628178442, "grad_norm": 3.5578341484069824, "learning_rate": 9.405371795702194e-06, "loss": 0.3763, "step": 26428 }, { "epoch": 4.314272886820946, "grad_norm": 4.961003303527832, "learning_rate": 9.404731790053095e-06, "loss": 0.684, "step": 26429 }, { "epoch": 4.314436145463451, "grad_norm": 3.7585391998291016, "learning_rate": 9.404091786850918e-06, "loss": 0.3865, "step": 26430 }, { "epoch": 4.314599404105955, "grad_norm": 3.3264997005462646, "learning_rate": 9.403451786098295e-06, "loss": 0.36, "step": 26431 }, { "epoch": 4.31476266274846, "grad_norm": 3.4309980869293213, "learning_rate": 9.402811787797856e-06, "loss": 0.3853, "step": 26432 }, { "epoch": 4.314925921390964, "grad_norm": 2.9451115131378174, "learning_rate": 9.402171791952231e-06, "loss": 0.3022, "step": 26433 }, { "epoch": 4.315089180033468, "grad_norm": 3.585261583328247, "learning_rate": 9.401531798564053e-06, "loss": 0.3657, "step": 26434 }, { "epoch": 4.315252438675972, "grad_norm": 3.6315560340881348, "learning_rate": 9.400891807635955e-06, "loss": 0.3757, "step": 26435 }, { "epoch": 4.3154156973184765, "grad_norm": 3.4216554164886475, "learning_rate": 9.400251819170562e-06, "loss": 0.376, "step": 26436 }, { "epoch": 4.315578955960981, "grad_norm": 2.9726672172546387, "learning_rate": 9.399611833170507e-06, "loss": 0.3501, "step": 26437 }, { "epoch": 4.315742214603485, "grad_norm": 4.126502990722656, "learning_rate": 9.398971849638419e-06, "loss": 0.408, "step": 26438 }, { "epoch": 4.31590547324599, "grad_norm": 3.75276255607605, "learning_rate": 9.398331868576935e-06, "loss": 0.412, "step": 26439 }, { "epoch": 4.316068731888494, "grad_norm": 3.5939934253692627, "learning_rate": 9.397691889988677e-06, "loss": 0.4318, "step": 26440 }, { "epoch": 4.316231990530999, "grad_norm": 3.493665933609009, "learning_rate": 9.397051913876282e-06, "loss": 0.3308, "step": 26441 }, { "epoch": 4.316395249173503, "grad_norm": 2.6683530807495117, "learning_rate": 9.39641194024238e-06, "loss": 0.3213, "step": 26442 }, { "epoch": 4.316558507816008, "grad_norm": 2.8380401134490967, "learning_rate": 9.395771969089598e-06, "loss": 0.326, "step": 26443 }, { "epoch": 4.316721766458512, "grad_norm": 3.5012779235839844, "learning_rate": 9.395132000420572e-06, "loss": 0.3844, "step": 26444 }, { "epoch": 4.3168850251010165, "grad_norm": 4.2591447830200195, "learning_rate": 9.394492034237932e-06, "loss": 0.4191, "step": 26445 }, { "epoch": 4.317048283743521, "grad_norm": 3.5245749950408936, "learning_rate": 9.393852070544304e-06, "loss": 0.3559, "step": 26446 }, { "epoch": 4.317211542386025, "grad_norm": 3.258389472961426, "learning_rate": 9.39321210934232e-06, "loss": 0.3481, "step": 26447 }, { "epoch": 4.31737480102853, "grad_norm": 3.7739675045013428, "learning_rate": 9.392572150634614e-06, "loss": 0.3707, "step": 26448 }, { "epoch": 4.317538059671034, "grad_norm": 4.001935005187988, "learning_rate": 9.391932194423812e-06, "loss": 0.4006, "step": 26449 }, { "epoch": 4.317701318313539, "grad_norm": 4.233425140380859, "learning_rate": 9.39129224071255e-06, "loss": 0.5168, "step": 26450 }, { "epoch": 4.317864576956042, "grad_norm": 3.916694164276123, "learning_rate": 9.390652289503452e-06, "loss": 0.8098, "step": 26451 }, { "epoch": 4.318027835598547, "grad_norm": 3.819288730621338, "learning_rate": 9.390012340799155e-06, "loss": 0.3946, "step": 26452 }, { "epoch": 4.318191094241051, "grad_norm": 3.4180610179901123, "learning_rate": 9.389372394602286e-06, "loss": 0.4001, "step": 26453 }, { "epoch": 4.3183543528835555, "grad_norm": 4.312723159790039, "learning_rate": 9.388732450915482e-06, "loss": 0.4162, "step": 26454 }, { "epoch": 4.31851761152606, "grad_norm": 3.38492488861084, "learning_rate": 9.38809250974136e-06, "loss": 0.3906, "step": 26455 }, { "epoch": 4.318680870168564, "grad_norm": 3.445349931716919, "learning_rate": 9.387452571082562e-06, "loss": 0.3448, "step": 26456 }, { "epoch": 4.318844128811069, "grad_norm": 4.140348434448242, "learning_rate": 9.386812634941717e-06, "loss": 0.4349, "step": 26457 }, { "epoch": 4.319007387453573, "grad_norm": 3.5079267024993896, "learning_rate": 9.38617270132145e-06, "loss": 0.4391, "step": 26458 }, { "epoch": 4.319170646096078, "grad_norm": 3.748439073562622, "learning_rate": 9.385532770224399e-06, "loss": 0.376, "step": 26459 }, { "epoch": 4.319333904738582, "grad_norm": 2.6555662155151367, "learning_rate": 9.384892841653189e-06, "loss": 0.3252, "step": 26460 }, { "epoch": 4.319497163381087, "grad_norm": 3.7492940425872803, "learning_rate": 9.38425291561045e-06, "loss": 0.3727, "step": 26461 }, { "epoch": 4.319660422023591, "grad_norm": 3.539950370788574, "learning_rate": 9.383612992098816e-06, "loss": 0.3993, "step": 26462 }, { "epoch": 4.3198236806660955, "grad_norm": 3.035020589828491, "learning_rate": 9.382973071120917e-06, "loss": 0.3793, "step": 26463 }, { "epoch": 4.3199869393086, "grad_norm": 3.0807087421417236, "learning_rate": 9.382333152679386e-06, "loss": 0.4133, "step": 26464 }, { "epoch": 4.320150197951104, "grad_norm": 2.5727832317352295, "learning_rate": 9.381693236776849e-06, "loss": 0.2982, "step": 26465 }, { "epoch": 4.320313456593609, "grad_norm": 3.5128912925720215, "learning_rate": 9.381053323415935e-06, "loss": 0.3896, "step": 26466 }, { "epoch": 4.320476715236113, "grad_norm": 3.894132137298584, "learning_rate": 9.380413412599276e-06, "loss": 0.4153, "step": 26467 }, { "epoch": 4.320639973878617, "grad_norm": 3.2951724529266357, "learning_rate": 9.379773504329504e-06, "loss": 0.3611, "step": 26468 }, { "epoch": 4.320803232521121, "grad_norm": 3.0337729454040527, "learning_rate": 9.379133598609251e-06, "loss": 0.3529, "step": 26469 }, { "epoch": 4.320966491163626, "grad_norm": 3.2333297729492188, "learning_rate": 9.378493695441144e-06, "loss": 0.3841, "step": 26470 }, { "epoch": 4.32112974980613, "grad_norm": 2.897491693496704, "learning_rate": 9.377853794827817e-06, "loss": 0.3307, "step": 26471 }, { "epoch": 4.3212930084486345, "grad_norm": 3.7304461002349854, "learning_rate": 9.377213896771895e-06, "loss": 0.4097, "step": 26472 }, { "epoch": 4.321456267091139, "grad_norm": 4.440945625305176, "learning_rate": 9.376574001276013e-06, "loss": 0.4035, "step": 26473 }, { "epoch": 4.321619525733643, "grad_norm": 3.4273393154144287, "learning_rate": 9.375934108342803e-06, "loss": 0.3579, "step": 26474 }, { "epoch": 4.321782784376148, "grad_norm": 3.2650399208068848, "learning_rate": 9.375294217974888e-06, "loss": 0.3643, "step": 26475 }, { "epoch": 4.321946043018652, "grad_norm": 3.5492846965789795, "learning_rate": 9.374654330174904e-06, "loss": 0.4148, "step": 26476 }, { "epoch": 4.322109301661157, "grad_norm": 3.2158567905426025, "learning_rate": 9.37401444494548e-06, "loss": 0.3644, "step": 26477 }, { "epoch": 4.322272560303661, "grad_norm": 3.5359559059143066, "learning_rate": 9.373374562289244e-06, "loss": 0.4018, "step": 26478 }, { "epoch": 4.322435818946166, "grad_norm": 4.2354655265808105, "learning_rate": 9.372734682208829e-06, "loss": 0.395, "step": 26479 }, { "epoch": 4.32259907758867, "grad_norm": 4.2859787940979, "learning_rate": 9.372094804706867e-06, "loss": 0.4488, "step": 26480 }, { "epoch": 4.3227623362311745, "grad_norm": 3.063427448272705, "learning_rate": 9.371454929785985e-06, "loss": 0.3535, "step": 26481 }, { "epoch": 4.322925594873679, "grad_norm": 4.016476154327393, "learning_rate": 9.370815057448815e-06, "loss": 0.3918, "step": 26482 }, { "epoch": 4.323088853516183, "grad_norm": 3.763885259628296, "learning_rate": 9.370175187697987e-06, "loss": 0.3796, "step": 26483 }, { "epoch": 4.323252112158688, "grad_norm": 3.66098952293396, "learning_rate": 9.369535320536132e-06, "loss": 0.4269, "step": 26484 }, { "epoch": 4.323415370801192, "grad_norm": 4.216066360473633, "learning_rate": 9.368895455965878e-06, "loss": 0.3852, "step": 26485 }, { "epoch": 4.323578629443697, "grad_norm": 3.0820279121398926, "learning_rate": 9.368255593989857e-06, "loss": 0.3364, "step": 26486 }, { "epoch": 4.3237418880862, "grad_norm": 3.1513724327087402, "learning_rate": 9.367615734610698e-06, "loss": 0.3114, "step": 26487 }, { "epoch": 4.323905146728705, "grad_norm": 4.21368408203125, "learning_rate": 9.366975877831032e-06, "loss": 0.4472, "step": 26488 }, { "epoch": 4.324068405371209, "grad_norm": 3.514281749725342, "learning_rate": 9.366336023653485e-06, "loss": 0.3787, "step": 26489 }, { "epoch": 4.3242316640137135, "grad_norm": 3.541506290435791, "learning_rate": 9.365696172080696e-06, "loss": 0.3326, "step": 26490 }, { "epoch": 4.324394922656218, "grad_norm": 3.8955748081207275, "learning_rate": 9.365056323115291e-06, "loss": 0.4283, "step": 26491 }, { "epoch": 4.324558181298722, "grad_norm": 4.022180080413818, "learning_rate": 9.364416476759898e-06, "loss": 0.386, "step": 26492 }, { "epoch": 4.324721439941227, "grad_norm": 3.2632226943969727, "learning_rate": 9.36377663301715e-06, "loss": 0.3565, "step": 26493 }, { "epoch": 4.324884698583731, "grad_norm": 3.375455379486084, "learning_rate": 9.363136791889678e-06, "loss": 0.4028, "step": 26494 }, { "epoch": 4.325047957226236, "grad_norm": 3.0681238174438477, "learning_rate": 9.362496953380107e-06, "loss": 0.3349, "step": 26495 }, { "epoch": 4.32521121586874, "grad_norm": 3.1501967906951904, "learning_rate": 9.361857117491071e-06, "loss": 0.3467, "step": 26496 }, { "epoch": 4.325374474511245, "grad_norm": 3.9870920181274414, "learning_rate": 9.3612172842252e-06, "loss": 0.372, "step": 26497 }, { "epoch": 4.325537733153749, "grad_norm": 3.054607629776001, "learning_rate": 9.360577453585122e-06, "loss": 0.3713, "step": 26498 }, { "epoch": 4.3257009917962534, "grad_norm": 4.272585868835449, "learning_rate": 9.35993762557347e-06, "loss": 0.4819, "step": 26499 }, { "epoch": 4.325864250438758, "grad_norm": 3.2121734619140625, "learning_rate": 9.359297800192873e-06, "loss": 0.3306, "step": 26500 }, { "epoch": 4.326027509081262, "grad_norm": 3.167658567428589, "learning_rate": 9.358657977445958e-06, "loss": 0.3361, "step": 26501 }, { "epoch": 4.326190767723767, "grad_norm": 3.417163372039795, "learning_rate": 9.35801815733536e-06, "loss": 0.3993, "step": 26502 }, { "epoch": 4.326354026366271, "grad_norm": 4.014018535614014, "learning_rate": 9.357378339863711e-06, "loss": 0.4228, "step": 26503 }, { "epoch": 4.326517285008775, "grad_norm": 3.7881405353546143, "learning_rate": 9.356738525033634e-06, "loss": 0.4426, "step": 26504 }, { "epoch": 4.326680543651279, "grad_norm": 4.402939796447754, "learning_rate": 9.356098712847758e-06, "loss": 0.4063, "step": 26505 }, { "epoch": 4.326843802293784, "grad_norm": 2.827597141265869, "learning_rate": 9.355458903308719e-06, "loss": 0.3129, "step": 26506 }, { "epoch": 4.327007060936288, "grad_norm": 3.7802324295043945, "learning_rate": 9.354819096419147e-06, "loss": 0.4135, "step": 26507 }, { "epoch": 4.3271703195787925, "grad_norm": 3.3155367374420166, "learning_rate": 9.354179292181669e-06, "loss": 0.3597, "step": 26508 }, { "epoch": 4.327333578221297, "grad_norm": 2.8950772285461426, "learning_rate": 9.353539490598916e-06, "loss": 0.3651, "step": 26509 }, { "epoch": 4.327496836863801, "grad_norm": 3.928025960922241, "learning_rate": 9.352899691673519e-06, "loss": 0.3623, "step": 26510 }, { "epoch": 4.327660095506306, "grad_norm": 3.603621006011963, "learning_rate": 9.352259895408107e-06, "loss": 0.3702, "step": 26511 }, { "epoch": 4.32782335414881, "grad_norm": 3.2178919315338135, "learning_rate": 9.351620101805307e-06, "loss": 0.3227, "step": 26512 }, { "epoch": 4.327986612791315, "grad_norm": 3.404115915298462, "learning_rate": 9.350980310867759e-06, "loss": 0.4038, "step": 26513 }, { "epoch": 4.328149871433819, "grad_norm": 3.9939217567443848, "learning_rate": 9.35034052259808e-06, "loss": 0.4121, "step": 26514 }, { "epoch": 4.328313130076324, "grad_norm": 3.6134605407714844, "learning_rate": 9.349700736998908e-06, "loss": 0.4029, "step": 26515 }, { "epoch": 4.328476388718828, "grad_norm": 3.420964479446411, "learning_rate": 9.349060954072867e-06, "loss": 0.3846, "step": 26516 }, { "epoch": 4.328639647361332, "grad_norm": 3.2253377437591553, "learning_rate": 9.348421173822593e-06, "loss": 0.3356, "step": 26517 }, { "epoch": 4.328802906003837, "grad_norm": 3.8461716175079346, "learning_rate": 9.347781396250715e-06, "loss": 0.3534, "step": 26518 }, { "epoch": 4.328966164646341, "grad_norm": 3.52032732963562, "learning_rate": 9.34714162135986e-06, "loss": 0.3737, "step": 26519 }, { "epoch": 4.329129423288846, "grad_norm": 3.4106147289276123, "learning_rate": 9.34650184915266e-06, "loss": 0.3799, "step": 26520 }, { "epoch": 4.329292681931349, "grad_norm": 2.9796030521392822, "learning_rate": 9.345862079631743e-06, "loss": 0.3332, "step": 26521 }, { "epoch": 4.329455940573854, "grad_norm": 3.840876817703247, "learning_rate": 9.345222312799742e-06, "loss": 0.4448, "step": 26522 }, { "epoch": 4.329619199216358, "grad_norm": 4.645092487335205, "learning_rate": 9.344582548659284e-06, "loss": 0.4714, "step": 26523 }, { "epoch": 4.329782457858863, "grad_norm": 3.9131951332092285, "learning_rate": 9.343942787212999e-06, "loss": 0.3705, "step": 26524 }, { "epoch": 4.329945716501367, "grad_norm": 3.2434134483337402, "learning_rate": 9.343303028463518e-06, "loss": 0.3137, "step": 26525 }, { "epoch": 4.3301089751438715, "grad_norm": 3.3398478031158447, "learning_rate": 9.34266327241347e-06, "loss": 0.388, "step": 26526 }, { "epoch": 4.330272233786376, "grad_norm": 4.030155658721924, "learning_rate": 9.342023519065486e-06, "loss": 0.3853, "step": 26527 }, { "epoch": 4.33043549242888, "grad_norm": 3.8776402473449707, "learning_rate": 9.341383768422191e-06, "loss": 0.428, "step": 26528 }, { "epoch": 4.330598751071385, "grad_norm": 4.208689212799072, "learning_rate": 9.340744020486223e-06, "loss": 0.413, "step": 26529 }, { "epoch": 4.330762009713889, "grad_norm": 3.2086775302886963, "learning_rate": 9.340104275260205e-06, "loss": 0.3624, "step": 26530 }, { "epoch": 4.330925268356394, "grad_norm": 3.4194459915161133, "learning_rate": 9.339464532746769e-06, "loss": 0.3635, "step": 26531 }, { "epoch": 4.331088526998898, "grad_norm": 4.362053871154785, "learning_rate": 9.338824792948547e-06, "loss": 0.3806, "step": 26532 }, { "epoch": 4.331251785641403, "grad_norm": 3.9666881561279297, "learning_rate": 9.338185055868168e-06, "loss": 0.444, "step": 26533 }, { "epoch": 4.331415044283907, "grad_norm": 3.4845447540283203, "learning_rate": 9.337545321508258e-06, "loss": 0.3826, "step": 26534 }, { "epoch": 4.331578302926411, "grad_norm": 2.9414000511169434, "learning_rate": 9.33690558987145e-06, "loss": 0.3546, "step": 26535 }, { "epoch": 4.331741561568916, "grad_norm": 3.858738899230957, "learning_rate": 9.336265860960371e-06, "loss": 0.3904, "step": 26536 }, { "epoch": 4.33190482021142, "grad_norm": 3.772969961166382, "learning_rate": 9.335626134777652e-06, "loss": 0.383, "step": 26537 }, { "epoch": 4.332068078853924, "grad_norm": 3.479403257369995, "learning_rate": 9.334986411325924e-06, "loss": 0.4029, "step": 26538 }, { "epoch": 4.332231337496428, "grad_norm": 3.3028433322906494, "learning_rate": 9.334346690607814e-06, "loss": 0.3612, "step": 26539 }, { "epoch": 4.332394596138933, "grad_norm": 3.6394686698913574, "learning_rate": 9.333706972625956e-06, "loss": 0.3255, "step": 26540 }, { "epoch": 4.332557854781437, "grad_norm": 3.538323163986206, "learning_rate": 9.333067257382976e-06, "loss": 0.3765, "step": 26541 }, { "epoch": 4.332721113423942, "grad_norm": 3.918224811553955, "learning_rate": 9.332427544881505e-06, "loss": 0.437, "step": 26542 }, { "epoch": 4.332884372066446, "grad_norm": 3.8782591819763184, "learning_rate": 9.331787835124176e-06, "loss": 0.4856, "step": 26543 }, { "epoch": 4.3330476307089505, "grad_norm": 3.032599687576294, "learning_rate": 9.33114812811361e-06, "loss": 0.3939, "step": 26544 }, { "epoch": 4.333210889351455, "grad_norm": 3.2662088871002197, "learning_rate": 9.330508423852444e-06, "loss": 0.3227, "step": 26545 }, { "epoch": 4.333374147993959, "grad_norm": 2.906189203262329, "learning_rate": 9.329868722343303e-06, "loss": 0.345, "step": 26546 }, { "epoch": 4.333537406636464, "grad_norm": 3.0080790519714355, "learning_rate": 9.329229023588819e-06, "loss": 0.3658, "step": 26547 }, { "epoch": 4.333700665278968, "grad_norm": 3.357043981552124, "learning_rate": 9.328589327591623e-06, "loss": 0.3795, "step": 26548 }, { "epoch": 4.333863923921473, "grad_norm": 3.730344295501709, "learning_rate": 9.327949634354341e-06, "loss": 0.4336, "step": 26549 }, { "epoch": 4.334027182563977, "grad_norm": 2.8271262645721436, "learning_rate": 9.327309943879604e-06, "loss": 0.3269, "step": 26550 }, { "epoch": 4.334190441206482, "grad_norm": 3.5579988956451416, "learning_rate": 9.326670256170042e-06, "loss": 0.4206, "step": 26551 }, { "epoch": 4.334353699848986, "grad_norm": 2.9205026626586914, "learning_rate": 9.326030571228288e-06, "loss": 0.3181, "step": 26552 }, { "epoch": 4.33451695849149, "grad_norm": 4.023801803588867, "learning_rate": 9.325390889056965e-06, "loss": 0.6063, "step": 26553 }, { "epoch": 4.334680217133995, "grad_norm": 3.1005492210388184, "learning_rate": 9.324751209658705e-06, "loss": 0.3852, "step": 26554 }, { "epoch": 4.334843475776499, "grad_norm": 3.5602176189422607, "learning_rate": 9.324111533036136e-06, "loss": 0.3751, "step": 26555 }, { "epoch": 4.335006734419004, "grad_norm": 3.8607444763183594, "learning_rate": 9.323471859191892e-06, "loss": 0.3964, "step": 26556 }, { "epoch": 4.335169993061507, "grad_norm": 3.48639178276062, "learning_rate": 9.322832188128598e-06, "loss": 0.3575, "step": 26557 }, { "epoch": 4.335333251704012, "grad_norm": 3.7039196491241455, "learning_rate": 9.322192519848885e-06, "loss": 0.3466, "step": 26558 }, { "epoch": 4.335496510346516, "grad_norm": 3.5105135440826416, "learning_rate": 9.321552854355384e-06, "loss": 0.3379, "step": 26559 }, { "epoch": 4.335659768989021, "grad_norm": 3.644207000732422, "learning_rate": 9.320913191650725e-06, "loss": 0.3549, "step": 26560 }, { "epoch": 4.335823027631525, "grad_norm": 3.536182403564453, "learning_rate": 9.320273531737532e-06, "loss": 0.3894, "step": 26561 }, { "epoch": 4.3359862862740295, "grad_norm": 4.148865222930908, "learning_rate": 9.319633874618442e-06, "loss": 0.3832, "step": 26562 }, { "epoch": 4.336149544916534, "grad_norm": 3.251619577407837, "learning_rate": 9.318994220296077e-06, "loss": 0.3606, "step": 26563 }, { "epoch": 4.336312803559038, "grad_norm": 3.6573996543884277, "learning_rate": 9.318354568773069e-06, "loss": 0.332, "step": 26564 }, { "epoch": 4.336476062201543, "grad_norm": 2.8256828784942627, "learning_rate": 9.31771492005205e-06, "loss": 0.3251, "step": 26565 }, { "epoch": 4.336639320844047, "grad_norm": 3.407078981399536, "learning_rate": 9.317075274135645e-06, "loss": 0.3742, "step": 26566 }, { "epoch": 4.336802579486552, "grad_norm": 3.438960075378418, "learning_rate": 9.316435631026486e-06, "loss": 0.4044, "step": 26567 }, { "epoch": 4.336965838129056, "grad_norm": 4.150745868682861, "learning_rate": 9.315795990727202e-06, "loss": 0.4111, "step": 26568 }, { "epoch": 4.3371290967715606, "grad_norm": 3.403245449066162, "learning_rate": 9.315156353240425e-06, "loss": 0.3451, "step": 26569 }, { "epoch": 4.337292355414065, "grad_norm": 3.6499218940734863, "learning_rate": 9.314516718568779e-06, "loss": 0.348, "step": 26570 }, { "epoch": 4.337455614056569, "grad_norm": 3.3352274894714355, "learning_rate": 9.313877086714896e-06, "loss": 0.3361, "step": 26571 }, { "epoch": 4.337618872699074, "grad_norm": 3.3217906951904297, "learning_rate": 9.313237457681409e-06, "loss": 0.3504, "step": 26572 }, { "epoch": 4.337782131341578, "grad_norm": 3.4907631874084473, "learning_rate": 9.31259783147094e-06, "loss": 0.3655, "step": 26573 }, { "epoch": 4.337945389984082, "grad_norm": 3.116596221923828, "learning_rate": 9.311958208086122e-06, "loss": 0.3433, "step": 26574 }, { "epoch": 4.338108648626586, "grad_norm": 3.302006959915161, "learning_rate": 9.311318587529585e-06, "loss": 0.3381, "step": 26575 }, { "epoch": 4.338271907269091, "grad_norm": 3.3610992431640625, "learning_rate": 9.310678969803956e-06, "loss": 0.4127, "step": 26576 }, { "epoch": 4.338435165911595, "grad_norm": 3.8017208576202393, "learning_rate": 9.310039354911866e-06, "loss": 0.4137, "step": 26577 }, { "epoch": 4.3385984245541, "grad_norm": 3.6987733840942383, "learning_rate": 9.309399742855943e-06, "loss": 0.3844, "step": 26578 }, { "epoch": 4.338761683196604, "grad_norm": 4.649877071380615, "learning_rate": 9.308760133638816e-06, "loss": 0.4008, "step": 26579 }, { "epoch": 4.3389249418391085, "grad_norm": 3.430934429168701, "learning_rate": 9.308120527263117e-06, "loss": 0.4037, "step": 26580 }, { "epoch": 4.339088200481613, "grad_norm": 3.482619524002075, "learning_rate": 9.307480923731473e-06, "loss": 0.3255, "step": 26581 }, { "epoch": 4.339251459124117, "grad_norm": 3.0999677181243896, "learning_rate": 9.306841323046515e-06, "loss": 0.3737, "step": 26582 }, { "epoch": 4.339414717766622, "grad_norm": 3.3060452938079834, "learning_rate": 9.30620172521087e-06, "loss": 0.3652, "step": 26583 }, { "epoch": 4.339577976409126, "grad_norm": 3.646122932434082, "learning_rate": 9.305562130227166e-06, "loss": 0.4039, "step": 26584 }, { "epoch": 4.339741235051631, "grad_norm": 3.18100905418396, "learning_rate": 9.304922538098035e-06, "loss": 0.3853, "step": 26585 }, { "epoch": 4.339904493694135, "grad_norm": 2.8486273288726807, "learning_rate": 9.304282948826105e-06, "loss": 0.313, "step": 26586 }, { "epoch": 4.3400677523366396, "grad_norm": 3.670423746109009, "learning_rate": 9.303643362414005e-06, "loss": 0.375, "step": 26587 }, { "epoch": 4.340231010979144, "grad_norm": 3.0925934314727783, "learning_rate": 9.303003778864363e-06, "loss": 0.3527, "step": 26588 }, { "epoch": 4.340394269621648, "grad_norm": 3.281642436981201, "learning_rate": 9.302364198179808e-06, "loss": 0.3652, "step": 26589 }, { "epoch": 4.340557528264153, "grad_norm": 3.9322333335876465, "learning_rate": 9.301724620362973e-06, "loss": 0.4088, "step": 26590 }, { "epoch": 4.340720786906656, "grad_norm": 3.217122793197632, "learning_rate": 9.301085045416487e-06, "loss": 0.3669, "step": 26591 }, { "epoch": 4.340884045549161, "grad_norm": 4.026803493499756, "learning_rate": 9.300445473342975e-06, "loss": 0.3842, "step": 26592 }, { "epoch": 4.341047304191665, "grad_norm": 4.1429829597473145, "learning_rate": 9.299805904145063e-06, "loss": 0.4281, "step": 26593 }, { "epoch": 4.34121056283417, "grad_norm": 3.784666061401367, "learning_rate": 9.299166337825387e-06, "loss": 0.3687, "step": 26594 }, { "epoch": 4.341373821476674, "grad_norm": 3.0759363174438477, "learning_rate": 9.298526774386576e-06, "loss": 0.3379, "step": 26595 }, { "epoch": 4.341537080119179, "grad_norm": 3.5710394382476807, "learning_rate": 9.297887213831254e-06, "loss": 0.373, "step": 26596 }, { "epoch": 4.341700338761683, "grad_norm": 3.0293729305267334, "learning_rate": 9.297247656162052e-06, "loss": 0.3505, "step": 26597 }, { "epoch": 4.3418635974041875, "grad_norm": 3.5758442878723145, "learning_rate": 9.2966081013816e-06, "loss": 0.3912, "step": 26598 }, { "epoch": 4.342026856046692, "grad_norm": 3.179471731185913, "learning_rate": 9.295968549492527e-06, "loss": 0.3337, "step": 26599 }, { "epoch": 4.342190114689196, "grad_norm": 3.489779472351074, "learning_rate": 9.29532900049746e-06, "loss": 0.4149, "step": 26600 }, { "epoch": 4.342353373331701, "grad_norm": 3.883328914642334, "learning_rate": 9.294689454399037e-06, "loss": 0.3882, "step": 26601 }, { "epoch": 4.342516631974205, "grad_norm": 3.268533229827881, "learning_rate": 9.294049911199872e-06, "loss": 0.3475, "step": 26602 }, { "epoch": 4.34267989061671, "grad_norm": 3.7229723930358887, "learning_rate": 9.293410370902603e-06, "loss": 0.384, "step": 26603 }, { "epoch": 4.342843149259214, "grad_norm": 3.857212543487549, "learning_rate": 9.292770833509854e-06, "loss": 0.4322, "step": 26604 }, { "epoch": 4.3430064079017185, "grad_norm": 3.528730630874634, "learning_rate": 9.29213129902426e-06, "loss": 0.3656, "step": 26605 }, { "epoch": 4.343169666544223, "grad_norm": 3.8768889904022217, "learning_rate": 9.291491767448447e-06, "loss": 0.5425, "step": 26606 }, { "epoch": 4.343332925186727, "grad_norm": 4.001893520355225, "learning_rate": 9.290852238785044e-06, "loss": 0.3671, "step": 26607 }, { "epoch": 4.343496183829232, "grad_norm": 3.2487261295318604, "learning_rate": 9.290212713036678e-06, "loss": 0.3959, "step": 26608 }, { "epoch": 4.343659442471736, "grad_norm": 4.266940593719482, "learning_rate": 9.289573190205981e-06, "loss": 0.9412, "step": 26609 }, { "epoch": 4.34382270111424, "grad_norm": 3.4876019954681396, "learning_rate": 9.28893367029558e-06, "loss": 0.3484, "step": 26610 }, { "epoch": 4.343985959756744, "grad_norm": 3.6593737602233887, "learning_rate": 9.288294153308107e-06, "loss": 0.4146, "step": 26611 }, { "epoch": 4.344149218399249, "grad_norm": 3.9298806190490723, "learning_rate": 9.287654639246185e-06, "loss": 0.3839, "step": 26612 }, { "epoch": 4.344312477041753, "grad_norm": 2.959909439086914, "learning_rate": 9.287015128112446e-06, "loss": 0.3281, "step": 26613 }, { "epoch": 4.344475735684258, "grad_norm": 3.138306140899658, "learning_rate": 9.286375619909519e-06, "loss": 0.3631, "step": 26614 }, { "epoch": 4.344638994326762, "grad_norm": 3.7481629848480225, "learning_rate": 9.285736114640033e-06, "loss": 0.4101, "step": 26615 }, { "epoch": 4.3448022529692665, "grad_norm": 3.227079153060913, "learning_rate": 9.285096612306612e-06, "loss": 0.3548, "step": 26616 }, { "epoch": 4.344965511611771, "grad_norm": 3.8317768573760986, "learning_rate": 9.284457112911892e-06, "loss": 0.3987, "step": 26617 }, { "epoch": 4.345128770254275, "grad_norm": 3.201671838760376, "learning_rate": 9.2838176164585e-06, "loss": 0.3427, "step": 26618 }, { "epoch": 4.34529202889678, "grad_norm": 3.1148977279663086, "learning_rate": 9.283178122949061e-06, "loss": 0.348, "step": 26619 }, { "epoch": 4.345455287539284, "grad_norm": 3.261193037033081, "learning_rate": 9.282538632386208e-06, "loss": 0.3637, "step": 26620 }, { "epoch": 4.345618546181789, "grad_norm": 3.737342119216919, "learning_rate": 9.281899144772568e-06, "loss": 0.3731, "step": 26621 }, { "epoch": 4.345781804824293, "grad_norm": 2.9694926738739014, "learning_rate": 9.281259660110768e-06, "loss": 0.3621, "step": 26622 }, { "epoch": 4.3459450634667975, "grad_norm": 2.7453579902648926, "learning_rate": 9.280620178403438e-06, "loss": 0.3244, "step": 26623 }, { "epoch": 4.346108322109302, "grad_norm": 3.1180341243743896, "learning_rate": 9.279980699653209e-06, "loss": 0.3008, "step": 26624 }, { "epoch": 4.346271580751806, "grad_norm": 3.0158956050872803, "learning_rate": 9.279341223862705e-06, "loss": 0.2934, "step": 26625 }, { "epoch": 4.346434839394311, "grad_norm": 3.6836743354797363, "learning_rate": 9.278701751034557e-06, "loss": 0.4297, "step": 26626 }, { "epoch": 4.346598098036814, "grad_norm": 3.750357151031494, "learning_rate": 9.278062281171394e-06, "loss": 0.4217, "step": 26627 }, { "epoch": 4.346761356679319, "grad_norm": 3.625056743621826, "learning_rate": 9.277422814275844e-06, "loss": 0.3499, "step": 26628 }, { "epoch": 4.346924615321823, "grad_norm": 3.2649011611938477, "learning_rate": 9.276783350350538e-06, "loss": 0.3944, "step": 26629 }, { "epoch": 4.347087873964328, "grad_norm": 3.4476070404052734, "learning_rate": 9.276143889398102e-06, "loss": 0.3758, "step": 26630 }, { "epoch": 4.347251132606832, "grad_norm": 2.492236852645874, "learning_rate": 9.275504431421167e-06, "loss": 0.2895, "step": 26631 }, { "epoch": 4.347414391249337, "grad_norm": 3.143623113632202, "learning_rate": 9.274864976422356e-06, "loss": 0.3378, "step": 26632 }, { "epoch": 4.347577649891841, "grad_norm": 3.6329469680786133, "learning_rate": 9.274225524404303e-06, "loss": 0.4149, "step": 26633 }, { "epoch": 4.3477409085343455, "grad_norm": 3.2362544536590576, "learning_rate": 9.273586075369634e-06, "loss": 0.3285, "step": 26634 }, { "epoch": 4.34790416717685, "grad_norm": 3.8311233520507812, "learning_rate": 9.27294662932098e-06, "loss": 0.4099, "step": 26635 }, { "epoch": 4.348067425819354, "grad_norm": 3.718461275100708, "learning_rate": 9.272307186260966e-06, "loss": 0.3572, "step": 26636 }, { "epoch": 4.348230684461859, "grad_norm": 2.9039711952209473, "learning_rate": 9.271667746192224e-06, "loss": 0.3776, "step": 26637 }, { "epoch": 4.348393943104363, "grad_norm": 3.656055212020874, "learning_rate": 9.27102830911738e-06, "loss": 0.3742, "step": 26638 }, { "epoch": 4.348557201746868, "grad_norm": 2.6581225395202637, "learning_rate": 9.27038887503906e-06, "loss": 0.2938, "step": 26639 }, { "epoch": 4.348720460389372, "grad_norm": 3.520346164703369, "learning_rate": 9.269749443959905e-06, "loss": 0.3823, "step": 26640 }, { "epoch": 4.3488837190318765, "grad_norm": 3.8492069244384766, "learning_rate": 9.269110015882528e-06, "loss": 0.4011, "step": 26641 }, { "epoch": 4.349046977674381, "grad_norm": 4.143706798553467, "learning_rate": 9.268470590809564e-06, "loss": 0.383, "step": 26642 }, { "epoch": 4.349210236316885, "grad_norm": 4.637851715087891, "learning_rate": 9.26783116874364e-06, "loss": 0.4264, "step": 26643 }, { "epoch": 4.349373494959389, "grad_norm": 3.774874448776245, "learning_rate": 9.267191749687386e-06, "loss": 0.426, "step": 26644 }, { "epoch": 4.349536753601893, "grad_norm": 3.8836684226989746, "learning_rate": 9.266552333643431e-06, "loss": 0.314, "step": 26645 }, { "epoch": 4.349700012244398, "grad_norm": 3.294182777404785, "learning_rate": 9.265912920614403e-06, "loss": 0.3277, "step": 26646 }, { "epoch": 4.349863270886902, "grad_norm": 2.9012579917907715, "learning_rate": 9.26527351060293e-06, "loss": 0.3594, "step": 26647 }, { "epoch": 4.350026529529407, "grad_norm": 4.158694744110107, "learning_rate": 9.264634103611639e-06, "loss": 0.459, "step": 26648 }, { "epoch": 4.350189788171911, "grad_norm": 3.171814203262329, "learning_rate": 9.26399469964316e-06, "loss": 0.357, "step": 26649 }, { "epoch": 4.350353046814416, "grad_norm": 3.7004220485687256, "learning_rate": 9.263355298700123e-06, "loss": 0.417, "step": 26650 }, { "epoch": 4.35051630545692, "grad_norm": 3.3190019130706787, "learning_rate": 9.262715900785151e-06, "loss": 0.3967, "step": 26651 }, { "epoch": 4.3506795640994245, "grad_norm": 2.8695967197418213, "learning_rate": 9.262076505900878e-06, "loss": 0.2797, "step": 26652 }, { "epoch": 4.350842822741929, "grad_norm": 3.504058599472046, "learning_rate": 9.261437114049929e-06, "loss": 0.3966, "step": 26653 }, { "epoch": 4.351006081384433, "grad_norm": 3.671254873275757, "learning_rate": 9.260797725234928e-06, "loss": 0.3898, "step": 26654 }, { "epoch": 4.351169340026938, "grad_norm": 4.899651050567627, "learning_rate": 9.260158339458513e-06, "loss": 0.4462, "step": 26655 }, { "epoch": 4.351332598669442, "grad_norm": 3.5177388191223145, "learning_rate": 9.259518956723308e-06, "loss": 0.3457, "step": 26656 }, { "epoch": 4.351495857311947, "grad_norm": 3.460855484008789, "learning_rate": 9.258879577031942e-06, "loss": 0.3889, "step": 26657 }, { "epoch": 4.351659115954451, "grad_norm": 3.6264684200286865, "learning_rate": 9.258240200387041e-06, "loss": 0.366, "step": 26658 }, { "epoch": 4.3518223745969555, "grad_norm": 2.8665568828582764, "learning_rate": 9.257600826791234e-06, "loss": 0.3414, "step": 26659 }, { "epoch": 4.35198563323946, "grad_norm": 2.9315898418426514, "learning_rate": 9.256961456247153e-06, "loss": 0.3747, "step": 26660 }, { "epoch": 4.352148891881964, "grad_norm": 3.1755905151367188, "learning_rate": 9.25632208875742e-06, "loss": 0.3692, "step": 26661 }, { "epoch": 4.352312150524469, "grad_norm": 3.233494758605957, "learning_rate": 9.255682724324666e-06, "loss": 0.3552, "step": 26662 }, { "epoch": 4.352475409166972, "grad_norm": 3.4523158073425293, "learning_rate": 9.255043362951519e-06, "loss": 0.3646, "step": 26663 }, { "epoch": 4.352638667809477, "grad_norm": 3.563054084777832, "learning_rate": 9.254404004640607e-06, "loss": 0.4148, "step": 26664 }, { "epoch": 4.352801926451981, "grad_norm": 3.035433292388916, "learning_rate": 9.25376464939456e-06, "loss": 0.3587, "step": 26665 }, { "epoch": 4.352965185094486, "grad_norm": 3.4148242473602295, "learning_rate": 9.253125297216002e-06, "loss": 0.3283, "step": 26666 }, { "epoch": 4.35312844373699, "grad_norm": 3.3046226501464844, "learning_rate": 9.252485948107565e-06, "loss": 0.4244, "step": 26667 }, { "epoch": 4.353291702379495, "grad_norm": 3.912846565246582, "learning_rate": 9.251846602071877e-06, "loss": 0.4529, "step": 26668 }, { "epoch": 4.353454961021999, "grad_norm": 4.542139053344727, "learning_rate": 9.251207259111566e-06, "loss": 0.4102, "step": 26669 }, { "epoch": 4.3536182196645035, "grad_norm": 3.6533091068267822, "learning_rate": 9.250567919229259e-06, "loss": 0.3908, "step": 26670 }, { "epoch": 4.353781478307008, "grad_norm": 3.3295066356658936, "learning_rate": 9.249928582427584e-06, "loss": 0.3962, "step": 26671 }, { "epoch": 4.353944736949512, "grad_norm": 3.2503015995025635, "learning_rate": 9.24928924870917e-06, "loss": 0.3505, "step": 26672 }, { "epoch": 4.354107995592017, "grad_norm": 3.1438839435577393, "learning_rate": 9.248649918076644e-06, "loss": 0.3629, "step": 26673 }, { "epoch": 4.354271254234521, "grad_norm": 3.9123663902282715, "learning_rate": 9.248010590532634e-06, "loss": 0.3724, "step": 26674 }, { "epoch": 4.354434512877026, "grad_norm": 3.379777193069458, "learning_rate": 9.247371266079767e-06, "loss": 0.3477, "step": 26675 }, { "epoch": 4.35459777151953, "grad_norm": 2.4244585037231445, "learning_rate": 9.246731944720675e-06, "loss": 0.2896, "step": 26676 }, { "epoch": 4.3547610301620345, "grad_norm": 3.100813150405884, "learning_rate": 9.24609262645798e-06, "loss": 0.3497, "step": 26677 }, { "epoch": 4.354924288804539, "grad_norm": 3.7859158515930176, "learning_rate": 9.245453311294316e-06, "loss": 0.3681, "step": 26678 }, { "epoch": 4.355087547447043, "grad_norm": 4.274052619934082, "learning_rate": 9.244813999232309e-06, "loss": 0.4373, "step": 26679 }, { "epoch": 4.355250806089547, "grad_norm": 3.8612194061279297, "learning_rate": 9.24417469027459e-06, "loss": 0.4004, "step": 26680 }, { "epoch": 4.355414064732051, "grad_norm": 3.342135429382324, "learning_rate": 9.243535384423777e-06, "loss": 0.3424, "step": 26681 }, { "epoch": 4.355577323374556, "grad_norm": 2.83903169631958, "learning_rate": 9.242896081682508e-06, "loss": 0.2956, "step": 26682 }, { "epoch": 4.35574058201706, "grad_norm": 3.85073184967041, "learning_rate": 9.242256782053405e-06, "loss": 0.4262, "step": 26683 }, { "epoch": 4.355903840659565, "grad_norm": 4.2626142501831055, "learning_rate": 9.2416174855391e-06, "loss": 0.4444, "step": 26684 }, { "epoch": 4.356067099302069, "grad_norm": 3.9402947425842285, "learning_rate": 9.240978192142217e-06, "loss": 0.4771, "step": 26685 }, { "epoch": 4.356230357944574, "grad_norm": 3.202573299407959, "learning_rate": 9.240338901865389e-06, "loss": 0.355, "step": 26686 }, { "epoch": 4.356393616587078, "grad_norm": 4.633841037750244, "learning_rate": 9.239699614711239e-06, "loss": 0.3897, "step": 26687 }, { "epoch": 4.3565568752295825, "grad_norm": 3.4625372886657715, "learning_rate": 9.239060330682396e-06, "loss": 0.3549, "step": 26688 }, { "epoch": 4.356720133872087, "grad_norm": 3.8796849250793457, "learning_rate": 9.238421049781495e-06, "loss": 0.4277, "step": 26689 }, { "epoch": 4.356883392514591, "grad_norm": 3.454087972640991, "learning_rate": 9.237781772011152e-06, "loss": 0.4366, "step": 26690 }, { "epoch": 4.357046651157096, "grad_norm": 4.073171615600586, "learning_rate": 9.237142497374001e-06, "loss": 0.4406, "step": 26691 }, { "epoch": 4.3572099097996, "grad_norm": 4.100222110748291, "learning_rate": 9.236503225872669e-06, "loss": 0.4724, "step": 26692 }, { "epoch": 4.357373168442105, "grad_norm": 4.417716979980469, "learning_rate": 9.235863957509782e-06, "loss": 0.3431, "step": 26693 }, { "epoch": 4.357536427084609, "grad_norm": 3.113217830657959, "learning_rate": 9.235224692287971e-06, "loss": 0.3303, "step": 26694 }, { "epoch": 4.3576996857271135, "grad_norm": 3.2497057914733887, "learning_rate": 9.234585430209865e-06, "loss": 0.3254, "step": 26695 }, { "epoch": 4.357862944369618, "grad_norm": 3.2678756713867188, "learning_rate": 9.233946171278086e-06, "loss": 0.3508, "step": 26696 }, { "epoch": 4.3580262030121215, "grad_norm": 3.209914207458496, "learning_rate": 9.233306915495268e-06, "loss": 0.3246, "step": 26697 }, { "epoch": 4.358189461654626, "grad_norm": 3.7664260864257812, "learning_rate": 9.232667662864034e-06, "loss": 0.4147, "step": 26698 }, { "epoch": 4.35835272029713, "grad_norm": 3.2174439430236816, "learning_rate": 9.232028413387016e-06, "loss": 0.3657, "step": 26699 }, { "epoch": 4.358515978939635, "grad_norm": 2.6769087314605713, "learning_rate": 9.231389167066836e-06, "loss": 0.3644, "step": 26700 }, { "epoch": 4.358679237582139, "grad_norm": 3.9621076583862305, "learning_rate": 9.230749923906127e-06, "loss": 0.4103, "step": 26701 }, { "epoch": 4.358842496224644, "grad_norm": 3.605844020843506, "learning_rate": 9.230110683907514e-06, "loss": 0.3907, "step": 26702 }, { "epoch": 4.359005754867148, "grad_norm": 2.897465467453003, "learning_rate": 9.229471447073625e-06, "loss": 0.3323, "step": 26703 }, { "epoch": 4.359169013509653, "grad_norm": 3.6422150135040283, "learning_rate": 9.228832213407084e-06, "loss": 0.4338, "step": 26704 }, { "epoch": 4.359332272152157, "grad_norm": 3.3561131954193115, "learning_rate": 9.228192982910528e-06, "loss": 0.3574, "step": 26705 }, { "epoch": 4.3594955307946615, "grad_norm": 2.901492118835449, "learning_rate": 9.227553755586577e-06, "loss": 0.3462, "step": 26706 }, { "epoch": 4.359658789437166, "grad_norm": 3.173938035964966, "learning_rate": 9.226914531437861e-06, "loss": 0.3476, "step": 26707 }, { "epoch": 4.35982204807967, "grad_norm": 3.6919384002685547, "learning_rate": 9.226275310467008e-06, "loss": 0.3792, "step": 26708 }, { "epoch": 4.359985306722175, "grad_norm": 3.3973212242126465, "learning_rate": 9.225636092676648e-06, "loss": 0.3688, "step": 26709 }, { "epoch": 4.360148565364679, "grad_norm": 3.5115621089935303, "learning_rate": 9.224996878069401e-06, "loss": 0.389, "step": 26710 }, { "epoch": 4.360311824007184, "grad_norm": 3.6422810554504395, "learning_rate": 9.224357666647901e-06, "loss": 0.4419, "step": 26711 }, { "epoch": 4.360475082649688, "grad_norm": 4.780962944030762, "learning_rate": 9.223718458414774e-06, "loss": 0.4506, "step": 26712 }, { "epoch": 4.3606383412921925, "grad_norm": 3.272650718688965, "learning_rate": 9.223079253372646e-06, "loss": 0.3529, "step": 26713 }, { "epoch": 4.360801599934696, "grad_norm": 2.9325618743896484, "learning_rate": 9.222440051524148e-06, "loss": 0.35, "step": 26714 }, { "epoch": 4.360964858577201, "grad_norm": 2.996636152267456, "learning_rate": 9.221800852871901e-06, "loss": 0.328, "step": 26715 }, { "epoch": 4.361128117219705, "grad_norm": 3.474170446395874, "learning_rate": 9.221161657418541e-06, "loss": 0.3791, "step": 26716 }, { "epoch": 4.361291375862209, "grad_norm": 3.852524518966675, "learning_rate": 9.220522465166689e-06, "loss": 0.74, "step": 26717 }, { "epoch": 4.361454634504714, "grad_norm": 3.232750177383423, "learning_rate": 9.219883276118976e-06, "loss": 0.365, "step": 26718 }, { "epoch": 4.361617893147218, "grad_norm": 3.326852560043335, "learning_rate": 9.219244090278034e-06, "loss": 0.3421, "step": 26719 }, { "epoch": 4.361781151789723, "grad_norm": 3.491708755493164, "learning_rate": 9.218604907646475e-06, "loss": 0.3538, "step": 26720 }, { "epoch": 4.361944410432227, "grad_norm": 3.2566373348236084, "learning_rate": 9.21796572822694e-06, "loss": 0.3183, "step": 26721 }, { "epoch": 4.362107669074732, "grad_norm": 3.528094530105591, "learning_rate": 9.217326552022055e-06, "loss": 0.3728, "step": 26722 }, { "epoch": 4.362270927717236, "grad_norm": 3.475449323654175, "learning_rate": 9.21668737903444e-06, "loss": 0.3821, "step": 26723 }, { "epoch": 4.3624341863597405, "grad_norm": 3.5706989765167236, "learning_rate": 9.216048209266733e-06, "loss": 0.4171, "step": 26724 }, { "epoch": 4.362597445002245, "grad_norm": 2.942168712615967, "learning_rate": 9.215409042721553e-06, "loss": 0.3196, "step": 26725 }, { "epoch": 4.362760703644749, "grad_norm": 3.05647873878479, "learning_rate": 9.214769879401529e-06, "loss": 0.3297, "step": 26726 }, { "epoch": 4.362923962287254, "grad_norm": 3.6751620769500732, "learning_rate": 9.214130719309289e-06, "loss": 0.3748, "step": 26727 }, { "epoch": 4.363087220929758, "grad_norm": 3.0823137760162354, "learning_rate": 9.213491562447466e-06, "loss": 0.3562, "step": 26728 }, { "epoch": 4.363250479572263, "grad_norm": 3.9401443004608154, "learning_rate": 9.21285240881868e-06, "loss": 0.4141, "step": 26729 }, { "epoch": 4.363413738214767, "grad_norm": 3.7199978828430176, "learning_rate": 9.212213258425558e-06, "loss": 0.3688, "step": 26730 }, { "epoch": 4.3635769968572715, "grad_norm": 3.986788511276245, "learning_rate": 9.21157411127073e-06, "loss": 0.3949, "step": 26731 }, { "epoch": 4.363740255499776, "grad_norm": 3.613532781600952, "learning_rate": 9.210934967356823e-06, "loss": 0.344, "step": 26732 }, { "epoch": 4.3639035141422795, "grad_norm": 4.241222381591797, "learning_rate": 9.210295826686466e-06, "loss": 0.3659, "step": 26733 }, { "epoch": 4.364066772784784, "grad_norm": 3.868546485900879, "learning_rate": 9.209656689262285e-06, "loss": 0.4007, "step": 26734 }, { "epoch": 4.364230031427288, "grad_norm": 3.31911563873291, "learning_rate": 9.209017555086907e-06, "loss": 0.313, "step": 26735 }, { "epoch": 4.364393290069793, "grad_norm": 2.914787530899048, "learning_rate": 9.208378424162958e-06, "loss": 0.4197, "step": 26736 }, { "epoch": 4.364556548712297, "grad_norm": 3.787675380706787, "learning_rate": 9.207739296493067e-06, "loss": 0.4004, "step": 26737 }, { "epoch": 4.364719807354802, "grad_norm": 4.4780073165893555, "learning_rate": 9.207100172079863e-06, "loss": 0.507, "step": 26738 }, { "epoch": 4.364883065997306, "grad_norm": 3.4603254795074463, "learning_rate": 9.206461050925968e-06, "loss": 0.3951, "step": 26739 }, { "epoch": 4.365046324639811, "grad_norm": 3.926701068878174, "learning_rate": 9.205821933034012e-06, "loss": 0.4034, "step": 26740 }, { "epoch": 4.365209583282315, "grad_norm": 4.038524150848389, "learning_rate": 9.205182818406622e-06, "loss": 0.3934, "step": 26741 }, { "epoch": 4.3653728419248194, "grad_norm": 3.782881259918213, "learning_rate": 9.204543707046423e-06, "loss": 0.37, "step": 26742 }, { "epoch": 4.365536100567324, "grad_norm": 3.2931506633758545, "learning_rate": 9.203904598956048e-06, "loss": 0.3343, "step": 26743 }, { "epoch": 4.365699359209828, "grad_norm": 3.3158469200134277, "learning_rate": 9.20326549413812e-06, "loss": 0.3697, "step": 26744 }, { "epoch": 4.365862617852333, "grad_norm": 3.617262840270996, "learning_rate": 9.202626392595265e-06, "loss": 0.3138, "step": 26745 }, { "epoch": 4.366025876494837, "grad_norm": 3.786168336868286, "learning_rate": 9.201987294330114e-06, "loss": 0.4173, "step": 26746 }, { "epoch": 4.366189135137342, "grad_norm": 3.2436702251434326, "learning_rate": 9.201348199345292e-06, "loss": 0.3144, "step": 26747 }, { "epoch": 4.366352393779846, "grad_norm": 3.176191806793213, "learning_rate": 9.200709107643427e-06, "loss": 0.3293, "step": 26748 }, { "epoch": 4.3665156524223505, "grad_norm": 3.4253132343292236, "learning_rate": 9.200070019227143e-06, "loss": 0.399, "step": 26749 }, { "epoch": 4.366678911064854, "grad_norm": 3.7963504791259766, "learning_rate": 9.199430934099068e-06, "loss": 0.3975, "step": 26750 }, { "epoch": 4.3668421697073585, "grad_norm": 3.317129611968994, "learning_rate": 9.198791852261832e-06, "loss": 0.3722, "step": 26751 }, { "epoch": 4.367005428349863, "grad_norm": 2.8088083267211914, "learning_rate": 9.19815277371806e-06, "loss": 0.3251, "step": 26752 }, { "epoch": 4.367168686992367, "grad_norm": 3.2961723804473877, "learning_rate": 9.197513698470379e-06, "loss": 0.3305, "step": 26753 }, { "epoch": 4.367331945634872, "grad_norm": 3.664391040802002, "learning_rate": 9.196874626521413e-06, "loss": 0.4287, "step": 26754 }, { "epoch": 4.367495204277376, "grad_norm": 4.022806167602539, "learning_rate": 9.196235557873794e-06, "loss": 0.4755, "step": 26755 }, { "epoch": 4.367658462919881, "grad_norm": 4.080772399902344, "learning_rate": 9.19559649253015e-06, "loss": 0.4357, "step": 26756 }, { "epoch": 4.367821721562385, "grad_norm": 2.988147735595703, "learning_rate": 9.194957430493103e-06, "loss": 0.3277, "step": 26757 }, { "epoch": 4.36798498020489, "grad_norm": 3.114948034286499, "learning_rate": 9.194318371765285e-06, "loss": 0.3497, "step": 26758 }, { "epoch": 4.368148238847394, "grad_norm": 3.8256821632385254, "learning_rate": 9.193679316349317e-06, "loss": 0.4918, "step": 26759 }, { "epoch": 4.368311497489898, "grad_norm": 3.8133249282836914, "learning_rate": 9.19304026424783e-06, "loss": 0.4299, "step": 26760 }, { "epoch": 4.368474756132403, "grad_norm": 3.1251583099365234, "learning_rate": 9.192401215463448e-06, "loss": 0.3704, "step": 26761 }, { "epoch": 4.368638014774907, "grad_norm": 3.5278310775756836, "learning_rate": 9.191762169998801e-06, "loss": 0.3858, "step": 26762 }, { "epoch": 4.368801273417412, "grad_norm": 3.6758058071136475, "learning_rate": 9.191123127856513e-06, "loss": 0.3436, "step": 26763 }, { "epoch": 4.368964532059916, "grad_norm": 3.050830841064453, "learning_rate": 9.190484089039215e-06, "loss": 0.3381, "step": 26764 }, { "epoch": 4.369127790702421, "grad_norm": 3.19645357131958, "learning_rate": 9.189845053549526e-06, "loss": 0.3649, "step": 26765 }, { "epoch": 4.369291049344925, "grad_norm": 3.6871135234832764, "learning_rate": 9.189206021390082e-06, "loss": 0.4306, "step": 26766 }, { "epoch": 4.369454307987429, "grad_norm": 3.6377949714660645, "learning_rate": 9.188566992563505e-06, "loss": 0.4315, "step": 26767 }, { "epoch": 4.369617566629933, "grad_norm": 3.025196075439453, "learning_rate": 9.187927967072428e-06, "loss": 0.3248, "step": 26768 }, { "epoch": 4.3697808252724375, "grad_norm": 3.800077199935913, "learning_rate": 9.187288944919468e-06, "loss": 0.3969, "step": 26769 }, { "epoch": 4.369944083914942, "grad_norm": 3.2435150146484375, "learning_rate": 9.186649926107256e-06, "loss": 0.3725, "step": 26770 }, { "epoch": 4.370107342557446, "grad_norm": 5.272015571594238, "learning_rate": 9.186010910638419e-06, "loss": 0.4802, "step": 26771 }, { "epoch": 4.370270601199951, "grad_norm": 3.701267719268799, "learning_rate": 9.185371898515582e-06, "loss": 0.3908, "step": 26772 }, { "epoch": 4.370433859842455, "grad_norm": 3.3017683029174805, "learning_rate": 9.184732889741376e-06, "loss": 0.3948, "step": 26773 }, { "epoch": 4.37059711848496, "grad_norm": 4.224270343780518, "learning_rate": 9.184093884318426e-06, "loss": 0.4359, "step": 26774 }, { "epoch": 4.370760377127464, "grad_norm": 2.97466778755188, "learning_rate": 9.183454882249357e-06, "loss": 0.318, "step": 26775 }, { "epoch": 4.370923635769969, "grad_norm": 3.4197230339050293, "learning_rate": 9.182815883536795e-06, "loss": 0.3692, "step": 26776 }, { "epoch": 4.371086894412473, "grad_norm": 3.5026297569274902, "learning_rate": 9.182176888183372e-06, "loss": 0.4145, "step": 26777 }, { "epoch": 4.371250153054977, "grad_norm": 3.395423412322998, "learning_rate": 9.18153789619171e-06, "loss": 0.3705, "step": 26778 }, { "epoch": 4.371413411697482, "grad_norm": 3.6015355587005615, "learning_rate": 9.180898907564436e-06, "loss": 0.3386, "step": 26779 }, { "epoch": 4.371576670339986, "grad_norm": 3.6186256408691406, "learning_rate": 9.180259922304175e-06, "loss": 0.4242, "step": 26780 }, { "epoch": 4.371739928982491, "grad_norm": 3.6558518409729004, "learning_rate": 9.179620940413557e-06, "loss": 0.3919, "step": 26781 }, { "epoch": 4.371903187624995, "grad_norm": 3.8567819595336914, "learning_rate": 9.178981961895207e-06, "loss": 0.3468, "step": 26782 }, { "epoch": 4.3720664462675, "grad_norm": 3.3610708713531494, "learning_rate": 9.178342986751753e-06, "loss": 0.3215, "step": 26783 }, { "epoch": 4.372229704910004, "grad_norm": 3.045879602432251, "learning_rate": 9.177704014985822e-06, "loss": 0.3203, "step": 26784 }, { "epoch": 4.3723929635525085, "grad_norm": 2.9222023487091064, "learning_rate": 9.177065046600038e-06, "loss": 0.3152, "step": 26785 }, { "epoch": 4.372556222195012, "grad_norm": 3.5728821754455566, "learning_rate": 9.17642608159703e-06, "loss": 0.4546, "step": 26786 }, { "epoch": 4.3727194808375165, "grad_norm": 3.2477145195007324, "learning_rate": 9.175787119979424e-06, "loss": 0.3133, "step": 26787 }, { "epoch": 4.372882739480021, "grad_norm": 3.060112953186035, "learning_rate": 9.175148161749845e-06, "loss": 0.3828, "step": 26788 }, { "epoch": 4.373045998122525, "grad_norm": 3.0471980571746826, "learning_rate": 9.17450920691092e-06, "loss": 0.4028, "step": 26789 }, { "epoch": 4.37320925676503, "grad_norm": 4.275571823120117, "learning_rate": 9.173870255465276e-06, "loss": 0.4359, "step": 26790 }, { "epoch": 4.373372515407534, "grad_norm": 3.2514569759368896, "learning_rate": 9.173231307415538e-06, "loss": 0.3446, "step": 26791 }, { "epoch": 4.373535774050039, "grad_norm": 3.783562421798706, "learning_rate": 9.172592362764334e-06, "loss": 0.4155, "step": 26792 }, { "epoch": 4.373699032692543, "grad_norm": 3.383744478225708, "learning_rate": 9.171953421514292e-06, "loss": 0.3454, "step": 26793 }, { "epoch": 4.373862291335048, "grad_norm": 3.6138927936553955, "learning_rate": 9.171314483668037e-06, "loss": 0.3689, "step": 26794 }, { "epoch": 4.374025549977552, "grad_norm": 3.7209644317626953, "learning_rate": 9.170675549228194e-06, "loss": 0.389, "step": 26795 }, { "epoch": 4.374188808620056, "grad_norm": 3.833422899246216, "learning_rate": 9.170036618197392e-06, "loss": 0.375, "step": 26796 }, { "epoch": 4.374352067262561, "grad_norm": 3.6367104053497314, "learning_rate": 9.169397690578258e-06, "loss": 0.3937, "step": 26797 }, { "epoch": 4.374515325905065, "grad_norm": 3.1506154537200928, "learning_rate": 9.168758766373413e-06, "loss": 0.3178, "step": 26798 }, { "epoch": 4.37467858454757, "grad_norm": 3.162914514541626, "learning_rate": 9.168119845585488e-06, "loss": 0.3422, "step": 26799 }, { "epoch": 4.374841843190074, "grad_norm": 3.3414487838745117, "learning_rate": 9.167480928217108e-06, "loss": 0.389, "step": 26800 }, { "epoch": 4.375005101832579, "grad_norm": 3.6348116397857666, "learning_rate": 9.1668420142709e-06, "loss": 0.3873, "step": 26801 }, { "epoch": 4.375168360475083, "grad_norm": 4.025035381317139, "learning_rate": 9.16620310374949e-06, "loss": 0.4164, "step": 26802 }, { "epoch": 4.375331619117587, "grad_norm": 3.689450979232788, "learning_rate": 9.165564196655503e-06, "loss": 0.3827, "step": 26803 }, { "epoch": 4.375494877760091, "grad_norm": 3.1635067462921143, "learning_rate": 9.164925292991564e-06, "loss": 0.3848, "step": 26804 }, { "epoch": 4.3756581364025955, "grad_norm": 3.594191312789917, "learning_rate": 9.164286392760306e-06, "loss": 0.4033, "step": 26805 }, { "epoch": 4.3758213950451, "grad_norm": 3.9945685863494873, "learning_rate": 9.16364749596435e-06, "loss": 0.4322, "step": 26806 }, { "epoch": 4.375984653687604, "grad_norm": 3.2876195907592773, "learning_rate": 9.163008602606329e-06, "loss": 0.3725, "step": 26807 }, { "epoch": 4.376147912330109, "grad_norm": 4.312007904052734, "learning_rate": 9.162369712688854e-06, "loss": 0.4069, "step": 26808 }, { "epoch": 4.376311170972613, "grad_norm": 3.388993501663208, "learning_rate": 9.161730826214566e-06, "loss": 0.3305, "step": 26809 }, { "epoch": 4.376474429615118, "grad_norm": 4.000154972076416, "learning_rate": 9.161091943186084e-06, "loss": 0.3603, "step": 26810 }, { "epoch": 4.376637688257622, "grad_norm": 3.1830015182495117, "learning_rate": 9.160453063606038e-06, "loss": 0.3703, "step": 26811 }, { "epoch": 4.3768009469001266, "grad_norm": 3.2413370609283447, "learning_rate": 9.159814187477051e-06, "loss": 0.3688, "step": 26812 }, { "epoch": 4.376964205542631, "grad_norm": 3.438537359237671, "learning_rate": 9.159175314801752e-06, "loss": 0.3638, "step": 26813 }, { "epoch": 4.377127464185135, "grad_norm": 2.7726590633392334, "learning_rate": 9.158536445582763e-06, "loss": 0.2919, "step": 26814 }, { "epoch": 4.37729072282764, "grad_norm": 3.261807918548584, "learning_rate": 9.157897579822714e-06, "loss": 0.3382, "step": 26815 }, { "epoch": 4.377453981470144, "grad_norm": 3.854079008102417, "learning_rate": 9.157258717524235e-06, "loss": 0.4208, "step": 26816 }, { "epoch": 4.377617240112649, "grad_norm": 3.5467967987060547, "learning_rate": 9.156619858689943e-06, "loss": 0.3834, "step": 26817 }, { "epoch": 4.377780498755153, "grad_norm": 4.048991680145264, "learning_rate": 9.155981003322468e-06, "loss": 0.4111, "step": 26818 }, { "epoch": 4.377943757397658, "grad_norm": 3.359339952468872, "learning_rate": 9.155342151424436e-06, "loss": 0.3314, "step": 26819 }, { "epoch": 4.378107016040161, "grad_norm": 3.736471176147461, "learning_rate": 9.154703302998473e-06, "loss": 0.3919, "step": 26820 }, { "epoch": 4.378270274682666, "grad_norm": 3.61255145072937, "learning_rate": 9.154064458047207e-06, "loss": 0.3684, "step": 26821 }, { "epoch": 4.37843353332517, "grad_norm": 3.1115195751190186, "learning_rate": 9.153425616573264e-06, "loss": 0.347, "step": 26822 }, { "epoch": 4.3785967919676745, "grad_norm": 2.9974632263183594, "learning_rate": 9.152786778579266e-06, "loss": 0.3203, "step": 26823 }, { "epoch": 4.378760050610179, "grad_norm": 4.2167863845825195, "learning_rate": 9.152147944067843e-06, "loss": 0.4383, "step": 26824 }, { "epoch": 4.378923309252683, "grad_norm": 3.186710834503174, "learning_rate": 9.15150911304162e-06, "loss": 0.2946, "step": 26825 }, { "epoch": 4.379086567895188, "grad_norm": 3.470555067062378, "learning_rate": 9.150870285503224e-06, "loss": 0.368, "step": 26826 }, { "epoch": 4.379249826537692, "grad_norm": 3.392228364944458, "learning_rate": 9.150231461455278e-06, "loss": 0.3766, "step": 26827 }, { "epoch": 4.379413085180197, "grad_norm": 4.01659631729126, "learning_rate": 9.14959264090041e-06, "loss": 0.3896, "step": 26828 }, { "epoch": 4.379576343822701, "grad_norm": 3.8018016815185547, "learning_rate": 9.148953823841244e-06, "loss": 0.457, "step": 26829 }, { "epoch": 4.3797396024652056, "grad_norm": 3.707669496536255, "learning_rate": 9.148315010280408e-06, "loss": 0.3816, "step": 26830 }, { "epoch": 4.37990286110771, "grad_norm": 3.1897661685943604, "learning_rate": 9.147676200220525e-06, "loss": 0.3503, "step": 26831 }, { "epoch": 4.380066119750214, "grad_norm": 2.8884546756744385, "learning_rate": 9.147037393664226e-06, "loss": 0.3516, "step": 26832 }, { "epoch": 4.380229378392719, "grad_norm": 3.7283544540405273, "learning_rate": 9.146398590614135e-06, "loss": 0.4, "step": 26833 }, { "epoch": 4.380392637035223, "grad_norm": 3.1591176986694336, "learning_rate": 9.145759791072876e-06, "loss": 0.3415, "step": 26834 }, { "epoch": 4.380555895677728, "grad_norm": 3.7324106693267822, "learning_rate": 9.145120995043076e-06, "loss": 0.3983, "step": 26835 }, { "epoch": 4.380719154320232, "grad_norm": 3.632289171218872, "learning_rate": 9.144482202527364e-06, "loss": 0.4052, "step": 26836 }, { "epoch": 4.380882412962737, "grad_norm": 3.6601762771606445, "learning_rate": 9.143843413528359e-06, "loss": 0.3658, "step": 26837 }, { "epoch": 4.381045671605241, "grad_norm": 3.1563422679901123, "learning_rate": 9.143204628048691e-06, "loss": 0.3153, "step": 26838 }, { "epoch": 4.381208930247745, "grad_norm": 3.019749879837036, "learning_rate": 9.142565846090986e-06, "loss": 0.3597, "step": 26839 }, { "epoch": 4.381372188890249, "grad_norm": 4.08683967590332, "learning_rate": 9.141927067657868e-06, "loss": 0.4052, "step": 26840 }, { "epoch": 4.3815354475327535, "grad_norm": 3.278237819671631, "learning_rate": 9.141288292751965e-06, "loss": 0.3857, "step": 26841 }, { "epoch": 4.381698706175258, "grad_norm": 3.750511884689331, "learning_rate": 9.140649521375899e-06, "loss": 0.4044, "step": 26842 }, { "epoch": 4.381861964817762, "grad_norm": 3.3675379753112793, "learning_rate": 9.1400107535323e-06, "loss": 0.3711, "step": 26843 }, { "epoch": 4.382025223460267, "grad_norm": 3.426898717880249, "learning_rate": 9.139371989223792e-06, "loss": 0.3363, "step": 26844 }, { "epoch": 4.382188482102771, "grad_norm": 3.6030831336975098, "learning_rate": 9.138733228453e-06, "loss": 0.3338, "step": 26845 }, { "epoch": 4.382351740745276, "grad_norm": 3.683790683746338, "learning_rate": 9.138094471222555e-06, "loss": 0.4274, "step": 26846 }, { "epoch": 4.38251499938778, "grad_norm": 3.398197650909424, "learning_rate": 9.137455717535074e-06, "loss": 0.3389, "step": 26847 }, { "epoch": 4.3826782580302845, "grad_norm": 4.176876068115234, "learning_rate": 9.136816967393189e-06, "loss": 0.4201, "step": 26848 }, { "epoch": 4.382841516672789, "grad_norm": 3.211620807647705, "learning_rate": 9.136178220799521e-06, "loss": 0.4071, "step": 26849 }, { "epoch": 4.383004775315293, "grad_norm": 3.8986997604370117, "learning_rate": 9.1355394777567e-06, "loss": 0.443, "step": 26850 }, { "epoch": 4.383168033957798, "grad_norm": 3.8810200691223145, "learning_rate": 9.134900738267348e-06, "loss": 0.378, "step": 26851 }, { "epoch": 4.383331292600302, "grad_norm": 3.8723080158233643, "learning_rate": 9.134262002334095e-06, "loss": 0.3618, "step": 26852 }, { "epoch": 4.383494551242807, "grad_norm": 3.2675225734710693, "learning_rate": 9.13362326995956e-06, "loss": 0.3809, "step": 26853 }, { "epoch": 4.383657809885311, "grad_norm": 3.1688807010650635, "learning_rate": 9.132984541146375e-06, "loss": 0.3346, "step": 26854 }, { "epoch": 4.383821068527816, "grad_norm": 3.617685556411743, "learning_rate": 9.132345815897162e-06, "loss": 0.4299, "step": 26855 }, { "epoch": 4.383984327170319, "grad_norm": 3.2251110076904297, "learning_rate": 9.131707094214553e-06, "loss": 0.3445, "step": 26856 }, { "epoch": 4.384147585812824, "grad_norm": 3.358397960662842, "learning_rate": 9.131068376101166e-06, "loss": 0.3711, "step": 26857 }, { "epoch": 4.384310844455328, "grad_norm": 4.127298355102539, "learning_rate": 9.130429661559623e-06, "loss": 0.4087, "step": 26858 }, { "epoch": 4.3844741030978325, "grad_norm": 4.132051944732666, "learning_rate": 9.12979095059256e-06, "loss": 0.439, "step": 26859 }, { "epoch": 4.384637361740337, "grad_norm": 3.547959327697754, "learning_rate": 9.129152243202596e-06, "loss": 0.3917, "step": 26860 }, { "epoch": 4.384800620382841, "grad_norm": 3.3465516567230225, "learning_rate": 9.128513539392361e-06, "loss": 0.376, "step": 26861 }, { "epoch": 4.384963879025346, "grad_norm": 3.630376100540161, "learning_rate": 9.127874839164477e-06, "loss": 0.3619, "step": 26862 }, { "epoch": 4.38512713766785, "grad_norm": 3.1938717365264893, "learning_rate": 9.127236142521569e-06, "loss": 0.3771, "step": 26863 }, { "epoch": 4.385290396310355, "grad_norm": 4.235743045806885, "learning_rate": 9.126597449466263e-06, "loss": 0.3847, "step": 26864 }, { "epoch": 4.385453654952859, "grad_norm": 3.2455623149871826, "learning_rate": 9.125958760001188e-06, "loss": 0.4007, "step": 26865 }, { "epoch": 4.3856169135953635, "grad_norm": 3.5700738430023193, "learning_rate": 9.125320074128966e-06, "loss": 0.3916, "step": 26866 }, { "epoch": 4.385780172237868, "grad_norm": 4.5358123779296875, "learning_rate": 9.124681391852221e-06, "loss": 0.4614, "step": 26867 }, { "epoch": 4.385943430880372, "grad_norm": 2.633012056350708, "learning_rate": 9.124042713173582e-06, "loss": 0.3391, "step": 26868 }, { "epoch": 4.386106689522877, "grad_norm": 2.7511494159698486, "learning_rate": 9.123404038095668e-06, "loss": 0.3113, "step": 26869 }, { "epoch": 4.386269948165381, "grad_norm": 3.1476149559020996, "learning_rate": 9.122765366621114e-06, "loss": 0.3264, "step": 26870 }, { "epoch": 4.386433206807886, "grad_norm": 3.326045036315918, "learning_rate": 9.122126698752538e-06, "loss": 0.3513, "step": 26871 }, { "epoch": 4.38659646545039, "grad_norm": 3.1709752082824707, "learning_rate": 9.121488034492569e-06, "loss": 0.4017, "step": 26872 }, { "epoch": 4.386759724092894, "grad_norm": 3.001107931137085, "learning_rate": 9.12084937384383e-06, "loss": 0.3247, "step": 26873 }, { "epoch": 4.386922982735398, "grad_norm": 3.1850740909576416, "learning_rate": 9.120210716808949e-06, "loss": 0.3233, "step": 26874 }, { "epoch": 4.387086241377903, "grad_norm": 3.79154372215271, "learning_rate": 9.11957206339055e-06, "loss": 0.4211, "step": 26875 }, { "epoch": 4.387249500020407, "grad_norm": 3.0411038398742676, "learning_rate": 9.118933413591255e-06, "loss": 0.3228, "step": 26876 }, { "epoch": 4.3874127586629115, "grad_norm": 4.218899726867676, "learning_rate": 9.118294767413694e-06, "loss": 0.38, "step": 26877 }, { "epoch": 4.387576017305416, "grad_norm": 3.008148193359375, "learning_rate": 9.11765612486049e-06, "loss": 0.3778, "step": 26878 }, { "epoch": 4.38773927594792, "grad_norm": 4.050276756286621, "learning_rate": 9.117017485934268e-06, "loss": 0.4265, "step": 26879 }, { "epoch": 4.387902534590425, "grad_norm": 4.071517467498779, "learning_rate": 9.116378850637651e-06, "loss": 0.3636, "step": 26880 }, { "epoch": 4.388065793232929, "grad_norm": 3.897528886795044, "learning_rate": 9.11574021897327e-06, "loss": 0.4308, "step": 26881 }, { "epoch": 4.388229051875434, "grad_norm": 3.4392166137695312, "learning_rate": 9.115101590943747e-06, "loss": 0.3136, "step": 26882 }, { "epoch": 4.388392310517938, "grad_norm": 3.8104264736175537, "learning_rate": 9.114462966551707e-06, "loss": 0.3822, "step": 26883 }, { "epoch": 4.3885555691604425, "grad_norm": 3.0920159816741943, "learning_rate": 9.113824345799774e-06, "loss": 0.3404, "step": 26884 }, { "epoch": 4.388718827802947, "grad_norm": 3.3365190029144287, "learning_rate": 9.11318572869058e-06, "loss": 0.3661, "step": 26885 }, { "epoch": 4.388882086445451, "grad_norm": 3.0951359272003174, "learning_rate": 9.112547115226738e-06, "loss": 0.3124, "step": 26886 }, { "epoch": 4.389045345087956, "grad_norm": 3.2794337272644043, "learning_rate": 9.111908505410882e-06, "loss": 0.3709, "step": 26887 }, { "epoch": 4.38920860373046, "grad_norm": 3.175527811050415, "learning_rate": 9.111269899245636e-06, "loss": 0.289, "step": 26888 }, { "epoch": 4.389371862372965, "grad_norm": 2.938854932785034, "learning_rate": 9.110631296733623e-06, "loss": 0.3509, "step": 26889 }, { "epoch": 4.389535121015469, "grad_norm": 4.205322265625, "learning_rate": 9.109992697877468e-06, "loss": 0.4019, "step": 26890 }, { "epoch": 4.389698379657974, "grad_norm": 3.2763254642486572, "learning_rate": 9.109354102679797e-06, "loss": 0.3597, "step": 26891 }, { "epoch": 4.389861638300477, "grad_norm": 3.684739828109741, "learning_rate": 9.108715511143234e-06, "loss": 0.3463, "step": 26892 }, { "epoch": 4.390024896942982, "grad_norm": 4.66666841506958, "learning_rate": 9.108076923270406e-06, "loss": 0.433, "step": 26893 }, { "epoch": 4.390188155585486, "grad_norm": 2.8789069652557373, "learning_rate": 9.107438339063939e-06, "loss": 0.338, "step": 26894 }, { "epoch": 4.3903514142279905, "grad_norm": 3.9558260440826416, "learning_rate": 9.106799758526457e-06, "loss": 0.3607, "step": 26895 }, { "epoch": 4.390514672870495, "grad_norm": 4.031518459320068, "learning_rate": 9.10616118166058e-06, "loss": 0.3797, "step": 26896 }, { "epoch": 4.390677931512999, "grad_norm": 4.223902225494385, "learning_rate": 9.105522608468938e-06, "loss": 0.4154, "step": 26897 }, { "epoch": 4.390841190155504, "grad_norm": 4.025448322296143, "learning_rate": 9.104884038954155e-06, "loss": 0.4147, "step": 26898 }, { "epoch": 4.391004448798008, "grad_norm": 3.5884053707122803, "learning_rate": 9.104245473118857e-06, "loss": 0.3944, "step": 26899 }, { "epoch": 4.391167707440513, "grad_norm": 3.4826462268829346, "learning_rate": 9.103606910965666e-06, "loss": 0.3723, "step": 26900 }, { "epoch": 4.391330966083017, "grad_norm": 3.1544830799102783, "learning_rate": 9.10296835249721e-06, "loss": 0.3463, "step": 26901 }, { "epoch": 4.3914942247255215, "grad_norm": 3.705781936645508, "learning_rate": 9.102329797716111e-06, "loss": 0.3511, "step": 26902 }, { "epoch": 4.391657483368026, "grad_norm": 3.8785922527313232, "learning_rate": 9.101691246624993e-06, "loss": 0.3587, "step": 26903 }, { "epoch": 4.39182074201053, "grad_norm": 3.955307722091675, "learning_rate": 9.101052699226486e-06, "loss": 0.3727, "step": 26904 }, { "epoch": 4.391984000653035, "grad_norm": 3.1931068897247314, "learning_rate": 9.100414155523216e-06, "loss": 0.3389, "step": 26905 }, { "epoch": 4.392147259295539, "grad_norm": 3.7985177040100098, "learning_rate": 9.0997756155178e-06, "loss": 0.3821, "step": 26906 }, { "epoch": 4.392310517938044, "grad_norm": 3.4548869132995605, "learning_rate": 9.099137079212863e-06, "loss": 0.3515, "step": 26907 }, { "epoch": 4.392473776580548, "grad_norm": 4.020540714263916, "learning_rate": 9.098498546611036e-06, "loss": 0.4308, "step": 26908 }, { "epoch": 4.392637035223052, "grad_norm": 3.8026442527770996, "learning_rate": 9.09786001771494e-06, "loss": 0.387, "step": 26909 }, { "epoch": 4.392800293865556, "grad_norm": 3.484271764755249, "learning_rate": 9.097221492527205e-06, "loss": 0.3513, "step": 26910 }, { "epoch": 4.392963552508061, "grad_norm": 3.761122465133667, "learning_rate": 9.096582971050447e-06, "loss": 0.3508, "step": 26911 }, { "epoch": 4.393126811150565, "grad_norm": 3.287598133087158, "learning_rate": 9.095944453287297e-06, "loss": 0.3679, "step": 26912 }, { "epoch": 4.3932900697930695, "grad_norm": 2.9154648780822754, "learning_rate": 9.095305939240379e-06, "loss": 0.3408, "step": 26913 }, { "epoch": 4.393453328435574, "grad_norm": 3.60756516456604, "learning_rate": 9.094667428912317e-06, "loss": 0.386, "step": 26914 }, { "epoch": 4.393616587078078, "grad_norm": 3.6242480278015137, "learning_rate": 9.094028922305734e-06, "loss": 0.3738, "step": 26915 }, { "epoch": 4.393779845720583, "grad_norm": 3.8224055767059326, "learning_rate": 9.093390419423256e-06, "loss": 0.3626, "step": 26916 }, { "epoch": 4.393943104363087, "grad_norm": 3.577449321746826, "learning_rate": 9.092751920267507e-06, "loss": 0.356, "step": 26917 }, { "epoch": 4.394106363005592, "grad_norm": 2.9347996711730957, "learning_rate": 9.092113424841114e-06, "loss": 0.3061, "step": 26918 }, { "epoch": 4.394269621648096, "grad_norm": 3.6232402324676514, "learning_rate": 9.091474933146697e-06, "loss": 0.4017, "step": 26919 }, { "epoch": 4.3944328802906005, "grad_norm": 2.5879101753234863, "learning_rate": 9.090836445186885e-06, "loss": 0.3765, "step": 26920 }, { "epoch": 4.394596138933105, "grad_norm": 3.9541642665863037, "learning_rate": 9.090197960964301e-06, "loss": 0.4178, "step": 26921 }, { "epoch": 4.394759397575609, "grad_norm": 3.8436667919158936, "learning_rate": 9.08955948048157e-06, "loss": 0.4242, "step": 26922 }, { "epoch": 4.394922656218114, "grad_norm": 3.9423415660858154, "learning_rate": 9.088921003741317e-06, "loss": 0.3433, "step": 26923 }, { "epoch": 4.395085914860618, "grad_norm": 3.82475209236145, "learning_rate": 9.088282530746167e-06, "loss": 0.4445, "step": 26924 }, { "epoch": 4.395249173503123, "grad_norm": 3.6655590534210205, "learning_rate": 9.08764406149874e-06, "loss": 0.3707, "step": 26925 }, { "epoch": 4.395412432145626, "grad_norm": 3.3023154735565186, "learning_rate": 9.087005596001665e-06, "loss": 0.3975, "step": 26926 }, { "epoch": 4.395575690788131, "grad_norm": 3.1709816455841064, "learning_rate": 9.086367134257566e-06, "loss": 0.3636, "step": 26927 }, { "epoch": 4.395738949430635, "grad_norm": 3.5353012084960938, "learning_rate": 9.085728676269067e-06, "loss": 0.3627, "step": 26928 }, { "epoch": 4.39590220807314, "grad_norm": 3.5616257190704346, "learning_rate": 9.08509022203879e-06, "loss": 0.3666, "step": 26929 }, { "epoch": 4.396065466715644, "grad_norm": 4.1913743019104, "learning_rate": 9.08445177156936e-06, "loss": 0.4148, "step": 26930 }, { "epoch": 4.3962287253581485, "grad_norm": 2.971956968307495, "learning_rate": 9.083813324863407e-06, "loss": 0.3531, "step": 26931 }, { "epoch": 4.396391984000653, "grad_norm": 3.7920703887939453, "learning_rate": 9.08317488192355e-06, "loss": 0.3935, "step": 26932 }, { "epoch": 4.396555242643157, "grad_norm": 3.686187505722046, "learning_rate": 9.082536442752416e-06, "loss": 0.4093, "step": 26933 }, { "epoch": 4.396718501285662, "grad_norm": 3.5519378185272217, "learning_rate": 9.08189800735263e-06, "loss": 0.3488, "step": 26934 }, { "epoch": 4.396881759928166, "grad_norm": 4.180148601531982, "learning_rate": 9.08125957572681e-06, "loss": 0.4215, "step": 26935 }, { "epoch": 4.397045018570671, "grad_norm": 2.8290114402770996, "learning_rate": 9.080621147877588e-06, "loss": 0.2901, "step": 26936 }, { "epoch": 4.397208277213175, "grad_norm": 3.776122570037842, "learning_rate": 9.079982723807584e-06, "loss": 0.3359, "step": 26937 }, { "epoch": 4.3973715358556795, "grad_norm": 4.191190719604492, "learning_rate": 9.079344303519425e-06, "loss": 0.5071, "step": 26938 }, { "epoch": 4.397534794498184, "grad_norm": 3.4774270057678223, "learning_rate": 9.078705887015733e-06, "loss": 0.3578, "step": 26939 }, { "epoch": 4.397698053140688, "grad_norm": 3.777606725692749, "learning_rate": 9.078067474299133e-06, "loss": 0.3854, "step": 26940 }, { "epoch": 4.397861311783193, "grad_norm": 4.104284763336182, "learning_rate": 9.07742906537225e-06, "loss": 0.4459, "step": 26941 }, { "epoch": 4.398024570425697, "grad_norm": 4.791566848754883, "learning_rate": 9.076790660237705e-06, "loss": 0.4404, "step": 26942 }, { "epoch": 4.398187829068201, "grad_norm": 3.2404944896698, "learning_rate": 9.076152258898128e-06, "loss": 0.3684, "step": 26943 }, { "epoch": 4.398351087710706, "grad_norm": 3.6073014736175537, "learning_rate": 9.075513861356143e-06, "loss": 0.4227, "step": 26944 }, { "epoch": 4.39851434635321, "grad_norm": 3.4617440700531006, "learning_rate": 9.074875467614368e-06, "loss": 0.3379, "step": 26945 }, { "epoch": 4.398677604995714, "grad_norm": 3.5441088676452637, "learning_rate": 9.074237077675428e-06, "loss": 0.3933, "step": 26946 }, { "epoch": 4.398840863638219, "grad_norm": 2.861323356628418, "learning_rate": 9.073598691541954e-06, "loss": 0.3295, "step": 26947 }, { "epoch": 4.399004122280723, "grad_norm": 3.024444580078125, "learning_rate": 9.072960309216564e-06, "loss": 0.3117, "step": 26948 }, { "epoch": 4.3991673809232275, "grad_norm": 3.3799214363098145, "learning_rate": 9.072321930701885e-06, "loss": 0.3735, "step": 26949 }, { "epoch": 4.399330639565732, "grad_norm": 3.3996565341949463, "learning_rate": 9.07168355600054e-06, "loss": 0.4011, "step": 26950 }, { "epoch": 4.399493898208236, "grad_norm": 3.2741594314575195, "learning_rate": 9.071045185115152e-06, "loss": 0.3942, "step": 26951 }, { "epoch": 4.399657156850741, "grad_norm": 3.191181182861328, "learning_rate": 9.070406818048349e-06, "loss": 0.3416, "step": 26952 }, { "epoch": 4.399820415493245, "grad_norm": 3.438272476196289, "learning_rate": 9.069768454802754e-06, "loss": 0.4088, "step": 26953 }, { "epoch": 4.39998367413575, "grad_norm": 3.306882858276367, "learning_rate": 9.069130095380985e-06, "loss": 0.3391, "step": 26954 }, { "epoch": 4.400146932778254, "grad_norm": 3.864522695541382, "learning_rate": 9.068491739785672e-06, "loss": 0.3808, "step": 26955 }, { "epoch": 4.4003101914207585, "grad_norm": 3.6862261295318604, "learning_rate": 9.06785338801944e-06, "loss": 0.3307, "step": 26956 }, { "epoch": 4.400473450063263, "grad_norm": 3.016026496887207, "learning_rate": 9.067215040084907e-06, "loss": 0.3567, "step": 26957 }, { "epoch": 4.400636708705767, "grad_norm": 3.7748777866363525, "learning_rate": 9.066576695984702e-06, "loss": 0.3576, "step": 26958 }, { "epoch": 4.400799967348272, "grad_norm": 3.749481439590454, "learning_rate": 9.065938355721449e-06, "loss": 0.4129, "step": 26959 }, { "epoch": 4.400963225990776, "grad_norm": 4.027465343475342, "learning_rate": 9.06530001929777e-06, "loss": 0.358, "step": 26960 }, { "epoch": 4.401126484633281, "grad_norm": 4.084865093231201, "learning_rate": 9.06466168671629e-06, "loss": 0.3845, "step": 26961 }, { "epoch": 4.401289743275784, "grad_norm": 3.770571231842041, "learning_rate": 9.064023357979633e-06, "loss": 0.3604, "step": 26962 }, { "epoch": 4.401453001918289, "grad_norm": 3.2870090007781982, "learning_rate": 9.063385033090426e-06, "loss": 0.3474, "step": 26963 }, { "epoch": 4.401616260560793, "grad_norm": 3.562765598297119, "learning_rate": 9.062746712051284e-06, "loss": 0.3233, "step": 26964 }, { "epoch": 4.401779519203298, "grad_norm": 3.2777585983276367, "learning_rate": 9.062108394864838e-06, "loss": 0.3814, "step": 26965 }, { "epoch": 4.401942777845802, "grad_norm": 3.6849427223205566, "learning_rate": 9.06147008153371e-06, "loss": 0.3517, "step": 26966 }, { "epoch": 4.4021060364883065, "grad_norm": 3.9816715717315674, "learning_rate": 9.060831772060526e-06, "loss": 0.3764, "step": 26967 }, { "epoch": 4.402269295130811, "grad_norm": 3.7322850227355957, "learning_rate": 9.060193466447902e-06, "loss": 0.3632, "step": 26968 }, { "epoch": 4.402432553773315, "grad_norm": 2.7505712509155273, "learning_rate": 9.059555164698473e-06, "loss": 0.3274, "step": 26969 }, { "epoch": 4.40259581241582, "grad_norm": 3.4114699363708496, "learning_rate": 9.058916866814857e-06, "loss": 0.379, "step": 26970 }, { "epoch": 4.402759071058324, "grad_norm": 3.0790152549743652, "learning_rate": 9.058278572799679e-06, "loss": 0.396, "step": 26971 }, { "epoch": 4.402922329700829, "grad_norm": 3.854353904724121, "learning_rate": 9.057640282655559e-06, "loss": 0.3702, "step": 26972 }, { "epoch": 4.403085588343333, "grad_norm": 3.973472833633423, "learning_rate": 9.05700199638513e-06, "loss": 0.4374, "step": 26973 }, { "epoch": 4.4032488469858375, "grad_norm": 3.101273536682129, "learning_rate": 9.056363713991006e-06, "loss": 0.3726, "step": 26974 }, { "epoch": 4.403412105628342, "grad_norm": 3.6339728832244873, "learning_rate": 9.055725435475815e-06, "loss": 0.3769, "step": 26975 }, { "epoch": 4.403575364270846, "grad_norm": 4.149772644042969, "learning_rate": 9.05508716084218e-06, "loss": 0.4122, "step": 26976 }, { "epoch": 4.403738622913351, "grad_norm": 3.0358529090881348, "learning_rate": 9.054448890092725e-06, "loss": 0.3374, "step": 26977 }, { "epoch": 4.403901881555855, "grad_norm": 3.3474044799804688, "learning_rate": 9.053810623230072e-06, "loss": 0.3726, "step": 26978 }, { "epoch": 4.404065140198359, "grad_norm": 3.213188409805298, "learning_rate": 9.053172360256848e-06, "loss": 0.3698, "step": 26979 }, { "epoch": 4.404228398840863, "grad_norm": 3.388707160949707, "learning_rate": 9.052534101175671e-06, "loss": 0.3532, "step": 26980 }, { "epoch": 4.404391657483368, "grad_norm": 3.7240076065063477, "learning_rate": 9.051895845989172e-06, "loss": 0.3761, "step": 26981 }, { "epoch": 4.404554916125872, "grad_norm": 3.1627252101898193, "learning_rate": 9.051257594699972e-06, "loss": 0.4093, "step": 26982 }, { "epoch": 4.404718174768377, "grad_norm": 3.451092004776001, "learning_rate": 9.050619347310696e-06, "loss": 0.4217, "step": 26983 }, { "epoch": 4.404881433410881, "grad_norm": 2.981248140335083, "learning_rate": 9.04998110382396e-06, "loss": 0.3916, "step": 26984 }, { "epoch": 4.4050446920533854, "grad_norm": 3.0918703079223633, "learning_rate": 9.049342864242397e-06, "loss": 0.3487, "step": 26985 }, { "epoch": 4.40520795069589, "grad_norm": 3.9859888553619385, "learning_rate": 9.048704628568625e-06, "loss": 0.3576, "step": 26986 }, { "epoch": 4.405371209338394, "grad_norm": 3.309556722640991, "learning_rate": 9.048066396805267e-06, "loss": 0.4249, "step": 26987 }, { "epoch": 4.405534467980899, "grad_norm": 3.012079954147339, "learning_rate": 9.04742816895495e-06, "loss": 0.3647, "step": 26988 }, { "epoch": 4.405697726623403, "grad_norm": 3.2810354232788086, "learning_rate": 9.046789945020298e-06, "loss": 0.3607, "step": 26989 }, { "epoch": 4.405860985265908, "grad_norm": 2.8925976753234863, "learning_rate": 9.04615172500393e-06, "loss": 0.3365, "step": 26990 }, { "epoch": 4.406024243908412, "grad_norm": 3.445342779159546, "learning_rate": 9.045513508908471e-06, "loss": 0.3815, "step": 26991 }, { "epoch": 4.4061875025509165, "grad_norm": 3.2737390995025635, "learning_rate": 9.044875296736548e-06, "loss": 0.3748, "step": 26992 }, { "epoch": 4.406350761193421, "grad_norm": 3.908604860305786, "learning_rate": 9.044237088490786e-06, "loss": 0.4358, "step": 26993 }, { "epoch": 4.406514019835925, "grad_norm": 3.2931675910949707, "learning_rate": 9.043598884173801e-06, "loss": 0.4206, "step": 26994 }, { "epoch": 4.40667727847843, "grad_norm": 3.477731227874756, "learning_rate": 9.042960683788216e-06, "loss": 0.3715, "step": 26995 }, { "epoch": 4.406840537120933, "grad_norm": 4.189788818359375, "learning_rate": 9.042322487336662e-06, "loss": 0.4505, "step": 26996 }, { "epoch": 4.407003795763438, "grad_norm": 3.725821018218994, "learning_rate": 9.041684294821757e-06, "loss": 0.3906, "step": 26997 }, { "epoch": 4.407167054405942, "grad_norm": 3.1565818786621094, "learning_rate": 9.041046106246127e-06, "loss": 0.3869, "step": 26998 }, { "epoch": 4.407330313048447, "grad_norm": 3.2771224975585938, "learning_rate": 9.040407921612396e-06, "loss": 0.3937, "step": 26999 }, { "epoch": 4.407493571690951, "grad_norm": 3.6495888233184814, "learning_rate": 9.039769740923183e-06, "loss": 0.3621, "step": 27000 }, { "epoch": 4.407656830333456, "grad_norm": 3.4203262329101562, "learning_rate": 9.039131564181115e-06, "loss": 0.377, "step": 27001 }, { "epoch": 4.40782008897596, "grad_norm": 3.625326156616211, "learning_rate": 9.038493391388816e-06, "loss": 0.3745, "step": 27002 }, { "epoch": 4.407983347618464, "grad_norm": 3.41512131690979, "learning_rate": 9.037855222548907e-06, "loss": 0.337, "step": 27003 }, { "epoch": 4.408146606260969, "grad_norm": 2.817171812057495, "learning_rate": 9.037217057664011e-06, "loss": 0.3182, "step": 27004 }, { "epoch": 4.408309864903473, "grad_norm": 3.048628330230713, "learning_rate": 9.036578896736751e-06, "loss": 0.3364, "step": 27005 }, { "epoch": 4.408473123545978, "grad_norm": 3.4422786235809326, "learning_rate": 9.035940739769754e-06, "loss": 0.3697, "step": 27006 }, { "epoch": 4.408636382188482, "grad_norm": 4.024802207946777, "learning_rate": 9.035302586765635e-06, "loss": 0.3889, "step": 27007 }, { "epoch": 4.408799640830987, "grad_norm": 3.673802137374878, "learning_rate": 9.034664437727028e-06, "loss": 0.4226, "step": 27008 }, { "epoch": 4.408962899473491, "grad_norm": 3.466447114944458, "learning_rate": 9.034026292656549e-06, "loss": 0.4086, "step": 27009 }, { "epoch": 4.4091261581159955, "grad_norm": 3.532381534576416, "learning_rate": 9.033388151556825e-06, "loss": 0.4199, "step": 27010 }, { "epoch": 4.4092894167585, "grad_norm": 3.2023375034332275, "learning_rate": 9.032750014430476e-06, "loss": 0.3438, "step": 27011 }, { "epoch": 4.409452675401004, "grad_norm": 3.163191556930542, "learning_rate": 9.032111881280127e-06, "loss": 0.3717, "step": 27012 }, { "epoch": 4.409615934043509, "grad_norm": 3.53607439994812, "learning_rate": 9.031473752108402e-06, "loss": 0.434, "step": 27013 }, { "epoch": 4.409779192686013, "grad_norm": 3.5769317150115967, "learning_rate": 9.030835626917921e-06, "loss": 0.3434, "step": 27014 }, { "epoch": 4.409942451328517, "grad_norm": 3.904365062713623, "learning_rate": 9.03019750571131e-06, "loss": 0.3804, "step": 27015 }, { "epoch": 4.410105709971021, "grad_norm": 3.788727045059204, "learning_rate": 9.029559388491189e-06, "loss": 0.3559, "step": 27016 }, { "epoch": 4.410268968613526, "grad_norm": 3.55130934715271, "learning_rate": 9.028921275260183e-06, "loss": 0.3897, "step": 27017 }, { "epoch": 4.41043222725603, "grad_norm": 3.587873935699463, "learning_rate": 9.028283166020912e-06, "loss": 0.408, "step": 27018 }, { "epoch": 4.410595485898535, "grad_norm": 3.5913126468658447, "learning_rate": 9.027645060776008e-06, "loss": 0.4162, "step": 27019 }, { "epoch": 4.410758744541039, "grad_norm": 3.6356747150421143, "learning_rate": 9.027006959528084e-06, "loss": 0.3711, "step": 27020 }, { "epoch": 4.410922003183543, "grad_norm": 4.073955059051514, "learning_rate": 9.026368862279769e-06, "loss": 0.5129, "step": 27021 }, { "epoch": 4.411085261826048, "grad_norm": 3.5018417835235596, "learning_rate": 9.025730769033686e-06, "loss": 0.3994, "step": 27022 }, { "epoch": 4.411248520468552, "grad_norm": 3.4580507278442383, "learning_rate": 9.025092679792454e-06, "loss": 0.3779, "step": 27023 }, { "epoch": 4.411411779111057, "grad_norm": 3.6635870933532715, "learning_rate": 9.024454594558696e-06, "loss": 0.406, "step": 27024 }, { "epoch": 4.411575037753561, "grad_norm": 3.4908483028411865, "learning_rate": 9.023816513335038e-06, "loss": 0.3898, "step": 27025 }, { "epoch": 4.411738296396066, "grad_norm": 3.7207677364349365, "learning_rate": 9.023178436124102e-06, "loss": 0.3649, "step": 27026 }, { "epoch": 4.41190155503857, "grad_norm": 3.9794387817382812, "learning_rate": 9.02254036292851e-06, "loss": 0.3922, "step": 27027 }, { "epoch": 4.4120648136810745, "grad_norm": 3.4203178882598877, "learning_rate": 9.021902293750887e-06, "loss": 0.4283, "step": 27028 }, { "epoch": 4.412228072323579, "grad_norm": 3.0903403759002686, "learning_rate": 9.021264228593853e-06, "loss": 0.3541, "step": 27029 }, { "epoch": 4.412391330966083, "grad_norm": 3.8095693588256836, "learning_rate": 9.020626167460029e-06, "loss": 0.3792, "step": 27030 }, { "epoch": 4.412554589608588, "grad_norm": 3.804147481918335, "learning_rate": 9.019988110352044e-06, "loss": 0.3865, "step": 27031 }, { "epoch": 4.412717848251091, "grad_norm": 3.4893531799316406, "learning_rate": 9.019350057272523e-06, "loss": 0.421, "step": 27032 }, { "epoch": 4.412881106893596, "grad_norm": 4.05038595199585, "learning_rate": 9.01871200822408e-06, "loss": 0.3462, "step": 27033 }, { "epoch": 4.4130443655361, "grad_norm": 3.2733840942382812, "learning_rate": 9.018073963209337e-06, "loss": 0.345, "step": 27034 }, { "epoch": 4.413207624178605, "grad_norm": 4.04893159866333, "learning_rate": 9.017435922230925e-06, "loss": 0.4324, "step": 27035 }, { "epoch": 4.413370882821109, "grad_norm": 3.6861560344696045, "learning_rate": 9.016797885291462e-06, "loss": 0.4171, "step": 27036 }, { "epoch": 4.413534141463614, "grad_norm": 3.537414073944092, "learning_rate": 9.016159852393573e-06, "loss": 0.3586, "step": 27037 }, { "epoch": 4.413697400106118, "grad_norm": 3.2640609741210938, "learning_rate": 9.015521823539878e-06, "loss": 0.3765, "step": 27038 }, { "epoch": 4.413860658748622, "grad_norm": 3.544649600982666, "learning_rate": 9.014883798733002e-06, "loss": 0.4104, "step": 27039 }, { "epoch": 4.414023917391127, "grad_norm": 3.045649528503418, "learning_rate": 9.014245777975565e-06, "loss": 0.3834, "step": 27040 }, { "epoch": 4.414187176033631, "grad_norm": 4.0475239753723145, "learning_rate": 9.013607761270191e-06, "loss": 0.436, "step": 27041 }, { "epoch": 4.414350434676136, "grad_norm": 3.386333465576172, "learning_rate": 9.01296974861951e-06, "loss": 0.4205, "step": 27042 }, { "epoch": 4.41451369331864, "grad_norm": 3.9257736206054688, "learning_rate": 9.012331740026131e-06, "loss": 0.3892, "step": 27043 }, { "epoch": 4.414676951961145, "grad_norm": 3.2390174865722656, "learning_rate": 9.011693735492685e-06, "loss": 0.3403, "step": 27044 }, { "epoch": 4.414840210603649, "grad_norm": 2.9207167625427246, "learning_rate": 9.011055735021789e-06, "loss": 0.3595, "step": 27045 }, { "epoch": 4.4150034692461535, "grad_norm": 3.1553146839141846, "learning_rate": 9.010417738616073e-06, "loss": 0.4238, "step": 27046 }, { "epoch": 4.415166727888658, "grad_norm": 3.255747079849243, "learning_rate": 9.009779746278157e-06, "loss": 0.3695, "step": 27047 }, { "epoch": 4.415329986531162, "grad_norm": 2.7871010303497314, "learning_rate": 9.00914175801066e-06, "loss": 0.3525, "step": 27048 }, { "epoch": 4.415493245173666, "grad_norm": 4.902731418609619, "learning_rate": 9.00850377381621e-06, "loss": 0.3833, "step": 27049 }, { "epoch": 4.41565650381617, "grad_norm": 3.4367494583129883, "learning_rate": 9.007865793697425e-06, "loss": 0.3557, "step": 27050 }, { "epoch": 4.415819762458675, "grad_norm": 3.432349443435669, "learning_rate": 9.007227817656931e-06, "loss": 0.4352, "step": 27051 }, { "epoch": 4.415983021101179, "grad_norm": 3.0610477924346924, "learning_rate": 9.006589845697348e-06, "loss": 0.3507, "step": 27052 }, { "epoch": 4.416146279743684, "grad_norm": 3.4109561443328857, "learning_rate": 9.005951877821298e-06, "loss": 0.3947, "step": 27053 }, { "epoch": 4.416309538386188, "grad_norm": 4.038571357727051, "learning_rate": 9.005313914031404e-06, "loss": 0.4031, "step": 27054 }, { "epoch": 4.4164727970286926, "grad_norm": 3.8333323001861572, "learning_rate": 9.004675954330289e-06, "loss": 0.4281, "step": 27055 }, { "epoch": 4.416636055671197, "grad_norm": 3.2276358604431152, "learning_rate": 9.004037998720577e-06, "loss": 0.4028, "step": 27056 }, { "epoch": 4.416799314313701, "grad_norm": 3.7370917797088623, "learning_rate": 9.003400047204884e-06, "loss": 0.4415, "step": 27057 }, { "epoch": 4.416962572956206, "grad_norm": 2.664581060409546, "learning_rate": 9.00276209978584e-06, "loss": 0.3035, "step": 27058 }, { "epoch": 4.41712583159871, "grad_norm": 2.8596158027648926, "learning_rate": 9.002124156466066e-06, "loss": 0.3222, "step": 27059 }, { "epoch": 4.417289090241215, "grad_norm": 4.486430644989014, "learning_rate": 9.001486217248181e-06, "loss": 0.4698, "step": 27060 }, { "epoch": 4.417452348883719, "grad_norm": 3.4041383266448975, "learning_rate": 9.000848282134813e-06, "loss": 0.3824, "step": 27061 }, { "epoch": 4.417615607526224, "grad_norm": 3.847774028778076, "learning_rate": 9.000210351128575e-06, "loss": 0.4319, "step": 27062 }, { "epoch": 4.417778866168728, "grad_norm": 2.913727283477783, "learning_rate": 8.999572424232097e-06, "loss": 0.3517, "step": 27063 }, { "epoch": 4.4179421248112325, "grad_norm": 3.2359893321990967, "learning_rate": 8.998934501448e-06, "loss": 0.3836, "step": 27064 }, { "epoch": 4.418105383453737, "grad_norm": 3.8870933055877686, "learning_rate": 8.998296582778904e-06, "loss": 0.3915, "step": 27065 }, { "epoch": 4.418268642096241, "grad_norm": 3.077467679977417, "learning_rate": 8.997658668227433e-06, "loss": 0.3511, "step": 27066 }, { "epoch": 4.418431900738746, "grad_norm": 4.075514316558838, "learning_rate": 8.997020757796209e-06, "loss": 0.4195, "step": 27067 }, { "epoch": 4.418595159381249, "grad_norm": 3.4194252490997314, "learning_rate": 8.996382851487851e-06, "loss": 0.3885, "step": 27068 }, { "epoch": 4.418758418023754, "grad_norm": 2.7496438026428223, "learning_rate": 8.995744949304986e-06, "loss": 0.3102, "step": 27069 }, { "epoch": 4.418921676666258, "grad_norm": 2.9921629428863525, "learning_rate": 8.995107051250235e-06, "loss": 0.3493, "step": 27070 }, { "epoch": 4.419084935308763, "grad_norm": 3.4354898929595947, "learning_rate": 8.994469157326224e-06, "loss": 0.3125, "step": 27071 }, { "epoch": 4.419248193951267, "grad_norm": 2.624309778213501, "learning_rate": 8.993831267535563e-06, "loss": 0.2837, "step": 27072 }, { "epoch": 4.4194114525937715, "grad_norm": 3.270679473876953, "learning_rate": 8.993193381880886e-06, "loss": 0.3485, "step": 27073 }, { "epoch": 4.419574711236276, "grad_norm": 3.5345263481140137, "learning_rate": 8.99255550036481e-06, "loss": 0.3472, "step": 27074 }, { "epoch": 4.41973796987878, "grad_norm": 3.6543936729431152, "learning_rate": 8.991917622989957e-06, "loss": 0.3779, "step": 27075 }, { "epoch": 4.419901228521285, "grad_norm": 3.563002109527588, "learning_rate": 8.99127974975895e-06, "loss": 0.4068, "step": 27076 }, { "epoch": 4.420064487163789, "grad_norm": 3.8007447719573975, "learning_rate": 8.990641880674411e-06, "loss": 0.4158, "step": 27077 }, { "epoch": 4.420227745806294, "grad_norm": 4.384562015533447, "learning_rate": 8.990004015738963e-06, "loss": 0.4711, "step": 27078 }, { "epoch": 4.420391004448798, "grad_norm": 3.8288636207580566, "learning_rate": 8.989366154955224e-06, "loss": 0.3676, "step": 27079 }, { "epoch": 4.420554263091303, "grad_norm": 4.1602911949157715, "learning_rate": 8.988728298325823e-06, "loss": 0.4975, "step": 27080 }, { "epoch": 4.420717521733807, "grad_norm": 3.346175193786621, "learning_rate": 8.988090445853382e-06, "loss": 0.3419, "step": 27081 }, { "epoch": 4.4208807803763115, "grad_norm": 4.122391223907471, "learning_rate": 8.987452597540514e-06, "loss": 0.4588, "step": 27082 }, { "epoch": 4.421044039018816, "grad_norm": 4.031938076019287, "learning_rate": 8.986814753389845e-06, "loss": 0.3462, "step": 27083 }, { "epoch": 4.42120729766132, "grad_norm": 3.6373469829559326, "learning_rate": 8.986176913403997e-06, "loss": 0.3462, "step": 27084 }, { "epoch": 4.421370556303824, "grad_norm": 3.525075674057007, "learning_rate": 8.985539077585596e-06, "loss": 0.4128, "step": 27085 }, { "epoch": 4.421533814946328, "grad_norm": 3.6638131141662598, "learning_rate": 8.984901245937258e-06, "loss": 0.3878, "step": 27086 }, { "epoch": 4.421697073588833, "grad_norm": 3.327009916305542, "learning_rate": 8.98426341846161e-06, "loss": 0.391, "step": 27087 }, { "epoch": 4.421860332231337, "grad_norm": 3.353715419769287, "learning_rate": 8.983625595161271e-06, "loss": 0.3436, "step": 27088 }, { "epoch": 4.422023590873842, "grad_norm": 3.1505560874938965, "learning_rate": 8.982987776038863e-06, "loss": 0.3284, "step": 27089 }, { "epoch": 4.422186849516346, "grad_norm": 3.6514997482299805, "learning_rate": 8.982349961097012e-06, "loss": 0.3445, "step": 27090 }, { "epoch": 4.4223501081588505, "grad_norm": 4.156802177429199, "learning_rate": 8.981712150338333e-06, "loss": 0.379, "step": 27091 }, { "epoch": 4.422513366801355, "grad_norm": 3.4277918338775635, "learning_rate": 8.98107434376545e-06, "loss": 0.3774, "step": 27092 }, { "epoch": 4.422676625443859, "grad_norm": 3.0744872093200684, "learning_rate": 8.980436541380986e-06, "loss": 0.2863, "step": 27093 }, { "epoch": 4.422839884086364, "grad_norm": 3.4010236263275146, "learning_rate": 8.979798743187563e-06, "loss": 0.3692, "step": 27094 }, { "epoch": 4.423003142728868, "grad_norm": 3.03344464302063, "learning_rate": 8.9791609491878e-06, "loss": 0.3589, "step": 27095 }, { "epoch": 4.423166401371373, "grad_norm": 3.35270094871521, "learning_rate": 8.978523159384323e-06, "loss": 0.3998, "step": 27096 }, { "epoch": 4.423329660013877, "grad_norm": 3.457697868347168, "learning_rate": 8.97788537377975e-06, "loss": 0.3611, "step": 27097 }, { "epoch": 4.423492918656382, "grad_norm": 3.9208455085754395, "learning_rate": 8.977247592376706e-06, "loss": 0.398, "step": 27098 }, { "epoch": 4.423656177298886, "grad_norm": 3.8864972591400146, "learning_rate": 8.97660981517781e-06, "loss": 0.3725, "step": 27099 }, { "epoch": 4.4238194359413905, "grad_norm": 3.8272416591644287, "learning_rate": 8.975972042185688e-06, "loss": 0.444, "step": 27100 }, { "epoch": 4.423982694583895, "grad_norm": 3.890109062194824, "learning_rate": 8.975334273402955e-06, "loss": 0.3996, "step": 27101 }, { "epoch": 4.4241459532263985, "grad_norm": 3.651303768157959, "learning_rate": 8.974696508832237e-06, "loss": 0.3974, "step": 27102 }, { "epoch": 4.424309211868903, "grad_norm": 3.6127662658691406, "learning_rate": 8.974058748476154e-06, "loss": 0.3724, "step": 27103 }, { "epoch": 4.424472470511407, "grad_norm": 3.716110944747925, "learning_rate": 8.973420992337327e-06, "loss": 0.3634, "step": 27104 }, { "epoch": 4.424635729153912, "grad_norm": 4.459988117218018, "learning_rate": 8.97278324041838e-06, "loss": 0.4589, "step": 27105 }, { "epoch": 4.424798987796416, "grad_norm": 3.391983985900879, "learning_rate": 8.972145492721932e-06, "loss": 0.3682, "step": 27106 }, { "epoch": 4.424962246438921, "grad_norm": 3.2497751712799072, "learning_rate": 8.971507749250604e-06, "loss": 0.3384, "step": 27107 }, { "epoch": 4.425125505081425, "grad_norm": 4.101300239562988, "learning_rate": 8.970870010007022e-06, "loss": 0.3829, "step": 27108 }, { "epoch": 4.4252887637239295, "grad_norm": 3.011504650115967, "learning_rate": 8.970232274993805e-06, "loss": 0.3698, "step": 27109 }, { "epoch": 4.425452022366434, "grad_norm": 3.237877368927002, "learning_rate": 8.96959454421358e-06, "loss": 0.3533, "step": 27110 }, { "epoch": 4.425615281008938, "grad_norm": 2.931795835494995, "learning_rate": 8.968956817668954e-06, "loss": 0.2913, "step": 27111 }, { "epoch": 4.425778539651443, "grad_norm": 3.4463930130004883, "learning_rate": 8.96831909536256e-06, "loss": 0.3363, "step": 27112 }, { "epoch": 4.425941798293947, "grad_norm": 3.7349483966827393, "learning_rate": 8.967681377297015e-06, "loss": 0.4455, "step": 27113 }, { "epoch": 4.426105056936452, "grad_norm": 3.8475019931793213, "learning_rate": 8.967043663474943e-06, "loss": 0.3456, "step": 27114 }, { "epoch": 4.426268315578956, "grad_norm": 4.426100254058838, "learning_rate": 8.966405953898967e-06, "loss": 0.411, "step": 27115 }, { "epoch": 4.426431574221461, "grad_norm": 4.171228408813477, "learning_rate": 8.965768248571704e-06, "loss": 0.4354, "step": 27116 }, { "epoch": 4.426594832863965, "grad_norm": 3.5845117568969727, "learning_rate": 8.965130547495777e-06, "loss": 0.3703, "step": 27117 }, { "epoch": 4.4267580915064695, "grad_norm": 3.656604051589966, "learning_rate": 8.964492850673805e-06, "loss": 0.3493, "step": 27118 }, { "epoch": 4.426921350148974, "grad_norm": 4.186909198760986, "learning_rate": 8.963855158108414e-06, "loss": 0.4418, "step": 27119 }, { "epoch": 4.427084608791478, "grad_norm": 4.692595481872559, "learning_rate": 8.963217469802227e-06, "loss": 0.4139, "step": 27120 }, { "epoch": 4.427247867433982, "grad_norm": 3.7838265895843506, "learning_rate": 8.96257978575786e-06, "loss": 0.3538, "step": 27121 }, { "epoch": 4.427411126076486, "grad_norm": 4.121805191040039, "learning_rate": 8.961942105977931e-06, "loss": 0.3784, "step": 27122 }, { "epoch": 4.427574384718991, "grad_norm": 3.3322622776031494, "learning_rate": 8.96130443046507e-06, "loss": 0.3275, "step": 27123 }, { "epoch": 4.427737643361495, "grad_norm": 4.138345241546631, "learning_rate": 8.960666759221893e-06, "loss": 0.4134, "step": 27124 }, { "epoch": 4.427900902004, "grad_norm": 3.9577279090881348, "learning_rate": 8.960029092251022e-06, "loss": 0.4224, "step": 27125 }, { "epoch": 4.428064160646504, "grad_norm": 3.470705986022949, "learning_rate": 8.95939142955508e-06, "loss": 0.4188, "step": 27126 }, { "epoch": 4.4282274192890085, "grad_norm": 3.925875186920166, "learning_rate": 8.958753771136687e-06, "loss": 0.461, "step": 27127 }, { "epoch": 4.428390677931513, "grad_norm": 3.733307361602783, "learning_rate": 8.958116116998462e-06, "loss": 0.4104, "step": 27128 }, { "epoch": 4.428553936574017, "grad_norm": 3.5995352268218994, "learning_rate": 8.95747846714303e-06, "loss": 0.3553, "step": 27129 }, { "epoch": 4.428717195216522, "grad_norm": 4.0737528800964355, "learning_rate": 8.956840821573014e-06, "loss": 0.3695, "step": 27130 }, { "epoch": 4.428880453859026, "grad_norm": 3.1840031147003174, "learning_rate": 8.956203180291028e-06, "loss": 0.315, "step": 27131 }, { "epoch": 4.429043712501531, "grad_norm": 3.406935930252075, "learning_rate": 8.955565543299696e-06, "loss": 0.3841, "step": 27132 }, { "epoch": 4.429206971144035, "grad_norm": 3.4178152084350586, "learning_rate": 8.954927910601637e-06, "loss": 0.3389, "step": 27133 }, { "epoch": 4.42937022978654, "grad_norm": 4.390344142913818, "learning_rate": 8.954290282199478e-06, "loss": 0.4388, "step": 27134 }, { "epoch": 4.429533488429044, "grad_norm": 3.3742785453796387, "learning_rate": 8.953652658095835e-06, "loss": 0.3331, "step": 27135 }, { "epoch": 4.4296967470715485, "grad_norm": 3.8136613368988037, "learning_rate": 8.953015038293333e-06, "loss": 0.4463, "step": 27136 }, { "epoch": 4.429860005714053, "grad_norm": 3.7757973670959473, "learning_rate": 8.952377422794591e-06, "loss": 0.364, "step": 27137 }, { "epoch": 4.4300232643565565, "grad_norm": 3.328066349029541, "learning_rate": 8.95173981160223e-06, "loss": 0.3, "step": 27138 }, { "epoch": 4.430186522999061, "grad_norm": 2.5718581676483154, "learning_rate": 8.951102204718871e-06, "loss": 0.3003, "step": 27139 }, { "epoch": 4.430349781641565, "grad_norm": 3.5457379817962646, "learning_rate": 8.950464602147133e-06, "loss": 0.3967, "step": 27140 }, { "epoch": 4.43051304028407, "grad_norm": 3.340122938156128, "learning_rate": 8.949827003889639e-06, "loss": 0.3597, "step": 27141 }, { "epoch": 4.430676298926574, "grad_norm": 4.427102565765381, "learning_rate": 8.94918940994901e-06, "loss": 0.4355, "step": 27142 }, { "epoch": 4.430839557569079, "grad_norm": 3.1552085876464844, "learning_rate": 8.948551820327867e-06, "loss": 0.3765, "step": 27143 }, { "epoch": 4.431002816211583, "grad_norm": 4.223170757293701, "learning_rate": 8.94791423502883e-06, "loss": 0.4294, "step": 27144 }, { "epoch": 4.4311660748540875, "grad_norm": 4.182583332061768, "learning_rate": 8.947276654054518e-06, "loss": 0.3862, "step": 27145 }, { "epoch": 4.431329333496592, "grad_norm": 3.817253828048706, "learning_rate": 8.946639077407556e-06, "loss": 0.3489, "step": 27146 }, { "epoch": 4.431492592139096, "grad_norm": 3.5173511505126953, "learning_rate": 8.946001505090564e-06, "loss": 0.3703, "step": 27147 }, { "epoch": 4.431655850781601, "grad_norm": 3.4760994911193848, "learning_rate": 8.945363937106161e-06, "loss": 0.3764, "step": 27148 }, { "epoch": 4.431819109424105, "grad_norm": 3.5350887775421143, "learning_rate": 8.94472637345697e-06, "loss": 0.3542, "step": 27149 }, { "epoch": 4.43198236806661, "grad_norm": 4.213665962219238, "learning_rate": 8.94408881414561e-06, "loss": 0.516, "step": 27150 }, { "epoch": 4.432145626709114, "grad_norm": 3.4239799976348877, "learning_rate": 8.943451259174702e-06, "loss": 0.3532, "step": 27151 }, { "epoch": 4.432308885351619, "grad_norm": 3.698550224304199, "learning_rate": 8.942813708546868e-06, "loss": 0.3767, "step": 27152 }, { "epoch": 4.432472143994123, "grad_norm": 2.76253080368042, "learning_rate": 8.942176162264724e-06, "loss": 0.3286, "step": 27153 }, { "epoch": 4.4326354026366275, "grad_norm": 3.5539300441741943, "learning_rate": 8.941538620330897e-06, "loss": 0.4515, "step": 27154 }, { "epoch": 4.432798661279131, "grad_norm": 3.677781581878662, "learning_rate": 8.940901082748004e-06, "loss": 0.3522, "step": 27155 }, { "epoch": 4.4329619199216355, "grad_norm": 3.7988922595977783, "learning_rate": 8.940263549518666e-06, "loss": 0.3531, "step": 27156 }, { "epoch": 4.43312517856414, "grad_norm": 3.557121992111206, "learning_rate": 8.939626020645506e-06, "loss": 0.3463, "step": 27157 }, { "epoch": 4.433288437206644, "grad_norm": 3.526907444000244, "learning_rate": 8.938988496131142e-06, "loss": 0.42, "step": 27158 }, { "epoch": 4.433451695849149, "grad_norm": 3.6933505535125732, "learning_rate": 8.9383509759782e-06, "loss": 0.3838, "step": 27159 }, { "epoch": 4.433614954491653, "grad_norm": 3.654806137084961, "learning_rate": 8.937713460189292e-06, "loss": 0.4261, "step": 27160 }, { "epoch": 4.433778213134158, "grad_norm": 2.873401641845703, "learning_rate": 8.937075948767042e-06, "loss": 0.3101, "step": 27161 }, { "epoch": 4.433941471776662, "grad_norm": 3.8810715675354004, "learning_rate": 8.936438441714074e-06, "loss": 0.3525, "step": 27162 }, { "epoch": 4.4341047304191665, "grad_norm": 3.3603551387786865, "learning_rate": 8.935800939033004e-06, "loss": 0.3432, "step": 27163 }, { "epoch": 4.434267989061671, "grad_norm": 4.482997894287109, "learning_rate": 8.935163440726456e-06, "loss": 0.4455, "step": 27164 }, { "epoch": 4.434431247704175, "grad_norm": 3.8373491764068604, "learning_rate": 8.934525946797047e-06, "loss": 0.4597, "step": 27165 }, { "epoch": 4.43459450634668, "grad_norm": 3.440150737762451, "learning_rate": 8.933888457247402e-06, "loss": 0.3733, "step": 27166 }, { "epoch": 4.434757764989184, "grad_norm": 3.7593441009521484, "learning_rate": 8.933250972080138e-06, "loss": 0.3817, "step": 27167 }, { "epoch": 4.434921023631689, "grad_norm": 4.086215496063232, "learning_rate": 8.932613491297875e-06, "loss": 0.4004, "step": 27168 }, { "epoch": 4.435084282274193, "grad_norm": 3.7118897438049316, "learning_rate": 8.93197601490324e-06, "loss": 0.3633, "step": 27169 }, { "epoch": 4.435247540916698, "grad_norm": 3.0469002723693848, "learning_rate": 8.931338542898845e-06, "loss": 0.3572, "step": 27170 }, { "epoch": 4.435410799559202, "grad_norm": 3.8258869647979736, "learning_rate": 8.930701075287312e-06, "loss": 0.3789, "step": 27171 }, { "epoch": 4.435574058201706, "grad_norm": 3.205564022064209, "learning_rate": 8.930063612071263e-06, "loss": 0.3378, "step": 27172 }, { "epoch": 4.435737316844211, "grad_norm": 3.6551127433776855, "learning_rate": 8.92942615325332e-06, "loss": 0.4047, "step": 27173 }, { "epoch": 4.4359005754867145, "grad_norm": 3.9702670574188232, "learning_rate": 8.928788698836102e-06, "loss": 0.4509, "step": 27174 }, { "epoch": 4.436063834129219, "grad_norm": 3.1696078777313232, "learning_rate": 8.928151248822228e-06, "loss": 0.3309, "step": 27175 }, { "epoch": 4.436227092771723, "grad_norm": 4.462554454803467, "learning_rate": 8.927513803214321e-06, "loss": 0.4386, "step": 27176 }, { "epoch": 4.436390351414228, "grad_norm": 3.988358736038208, "learning_rate": 8.926876362014999e-06, "loss": 0.4588, "step": 27177 }, { "epoch": 4.436553610056732, "grad_norm": 3.560513496398926, "learning_rate": 8.926238925226884e-06, "loss": 0.4243, "step": 27178 }, { "epoch": 4.436716868699237, "grad_norm": 3.6661789417266846, "learning_rate": 8.925601492852596e-06, "loss": 0.4172, "step": 27179 }, { "epoch": 4.436880127341741, "grad_norm": 3.170809030532837, "learning_rate": 8.924964064894753e-06, "loss": 0.3447, "step": 27180 }, { "epoch": 4.4370433859842455, "grad_norm": 4.163512229919434, "learning_rate": 8.924326641355978e-06, "loss": 0.4617, "step": 27181 }, { "epoch": 4.43720664462675, "grad_norm": 3.0714433193206787, "learning_rate": 8.923689222238888e-06, "loss": 0.3323, "step": 27182 }, { "epoch": 4.437369903269254, "grad_norm": 3.7392423152923584, "learning_rate": 8.923051807546105e-06, "loss": 0.366, "step": 27183 }, { "epoch": 4.437533161911759, "grad_norm": 3.091714859008789, "learning_rate": 8.922414397280251e-06, "loss": 0.3599, "step": 27184 }, { "epoch": 4.437696420554263, "grad_norm": 2.9634242057800293, "learning_rate": 8.921776991443943e-06, "loss": 0.3444, "step": 27185 }, { "epoch": 4.437859679196768, "grad_norm": 3.5146236419677734, "learning_rate": 8.921139590039805e-06, "loss": 0.3818, "step": 27186 }, { "epoch": 4.438022937839272, "grad_norm": 3.5564422607421875, "learning_rate": 8.920502193070453e-06, "loss": 0.3576, "step": 27187 }, { "epoch": 4.438186196481777, "grad_norm": 3.341080904006958, "learning_rate": 8.919864800538512e-06, "loss": 0.3273, "step": 27188 }, { "epoch": 4.438349455124281, "grad_norm": 3.03627610206604, "learning_rate": 8.919227412446596e-06, "loss": 0.3199, "step": 27189 }, { "epoch": 4.4385127137667855, "grad_norm": 3.128310203552246, "learning_rate": 8.918590028797328e-06, "loss": 0.3087, "step": 27190 }, { "epoch": 4.438675972409289, "grad_norm": 3.2725491523742676, "learning_rate": 8.917952649593328e-06, "loss": 0.3524, "step": 27191 }, { "epoch": 4.4388392310517935, "grad_norm": 2.754063129425049, "learning_rate": 8.917315274837217e-06, "loss": 0.3141, "step": 27192 }, { "epoch": 4.439002489694298, "grad_norm": 3.719142436981201, "learning_rate": 8.916677904531615e-06, "loss": 0.3742, "step": 27193 }, { "epoch": 4.439165748336802, "grad_norm": 4.408721923828125, "learning_rate": 8.916040538679138e-06, "loss": 0.4488, "step": 27194 }, { "epoch": 4.439329006979307, "grad_norm": 3.5904386043548584, "learning_rate": 8.91540317728241e-06, "loss": 0.3736, "step": 27195 }, { "epoch": 4.439492265621811, "grad_norm": 3.1526684761047363, "learning_rate": 8.91476582034405e-06, "loss": 0.3295, "step": 27196 }, { "epoch": 4.439655524264316, "grad_norm": 2.8726418018341064, "learning_rate": 8.914128467866678e-06, "loss": 0.3394, "step": 27197 }, { "epoch": 4.43981878290682, "grad_norm": 3.7215194702148438, "learning_rate": 8.91349111985292e-06, "loss": 0.4075, "step": 27198 }, { "epoch": 4.4399820415493245, "grad_norm": 3.7689223289489746, "learning_rate": 8.912853776305381e-06, "loss": 0.3219, "step": 27199 }, { "epoch": 4.440145300191829, "grad_norm": 3.6501264572143555, "learning_rate": 8.912216437226692e-06, "loss": 0.3652, "step": 27200 }, { "epoch": 4.440308558834333, "grad_norm": 2.311864137649536, "learning_rate": 8.911579102619473e-06, "loss": 0.3127, "step": 27201 }, { "epoch": 4.440471817476838, "grad_norm": 3.699409008026123, "learning_rate": 8.910941772486338e-06, "loss": 0.3666, "step": 27202 }, { "epoch": 4.440635076119342, "grad_norm": 3.3137195110321045, "learning_rate": 8.910304446829913e-06, "loss": 0.3345, "step": 27203 }, { "epoch": 4.440798334761847, "grad_norm": 3.157541036605835, "learning_rate": 8.909667125652815e-06, "loss": 0.366, "step": 27204 }, { "epoch": 4.440961593404351, "grad_norm": 4.187073707580566, "learning_rate": 8.909029808957663e-06, "loss": 0.4314, "step": 27205 }, { "epoch": 4.441124852046856, "grad_norm": 3.140018939971924, "learning_rate": 8.908392496747074e-06, "loss": 0.3069, "step": 27206 }, { "epoch": 4.44128811068936, "grad_norm": 3.4449520111083984, "learning_rate": 8.907755189023674e-06, "loss": 0.3202, "step": 27207 }, { "epoch": 4.441451369331864, "grad_norm": 3.760274648666382, "learning_rate": 8.907117885790085e-06, "loss": 0.4393, "step": 27208 }, { "epoch": 4.441614627974368, "grad_norm": 3.066328287124634, "learning_rate": 8.906480587048917e-06, "loss": 0.3136, "step": 27209 }, { "epoch": 4.4417778866168725, "grad_norm": 2.6514649391174316, "learning_rate": 8.905843292802791e-06, "loss": 0.2905, "step": 27210 }, { "epoch": 4.441941145259377, "grad_norm": 3.194685459136963, "learning_rate": 8.905206003054334e-06, "loss": 0.3272, "step": 27211 }, { "epoch": 4.442104403901881, "grad_norm": 3.3006279468536377, "learning_rate": 8.904568717806162e-06, "loss": 0.3917, "step": 27212 }, { "epoch": 4.442267662544386, "grad_norm": 3.535588502883911, "learning_rate": 8.903931437060892e-06, "loss": 0.3986, "step": 27213 }, { "epoch": 4.44243092118689, "grad_norm": 3.4677727222442627, "learning_rate": 8.903294160821148e-06, "loss": 0.3232, "step": 27214 }, { "epoch": 4.442594179829395, "grad_norm": 3.8704073429107666, "learning_rate": 8.902656889089548e-06, "loss": 0.4217, "step": 27215 }, { "epoch": 4.442757438471899, "grad_norm": 4.386363506317139, "learning_rate": 8.90201962186871e-06, "loss": 0.3824, "step": 27216 }, { "epoch": 4.4429206971144035, "grad_norm": 3.426255226135254, "learning_rate": 8.901382359161253e-06, "loss": 0.3471, "step": 27217 }, { "epoch": 4.443083955756908, "grad_norm": 3.6846728324890137, "learning_rate": 8.900745100969806e-06, "loss": 0.437, "step": 27218 }, { "epoch": 4.443247214399412, "grad_norm": 3.4031927585601807, "learning_rate": 8.900107847296975e-06, "loss": 0.3679, "step": 27219 }, { "epoch": 4.443410473041917, "grad_norm": 3.6434683799743652, "learning_rate": 8.899470598145385e-06, "loss": 0.3212, "step": 27220 }, { "epoch": 4.443573731684421, "grad_norm": 3.4629509449005127, "learning_rate": 8.898833353517656e-06, "loss": 0.3475, "step": 27221 }, { "epoch": 4.443736990326926, "grad_norm": 3.2256762981414795, "learning_rate": 8.898196113416407e-06, "loss": 0.3578, "step": 27222 }, { "epoch": 4.44390024896943, "grad_norm": 3.2524850368499756, "learning_rate": 8.897558877844258e-06, "loss": 0.3357, "step": 27223 }, { "epoch": 4.444063507611935, "grad_norm": 3.7637569904327393, "learning_rate": 8.896921646803827e-06, "loss": 0.393, "step": 27224 }, { "epoch": 4.444226766254438, "grad_norm": 3.671557664871216, "learning_rate": 8.896284420297738e-06, "loss": 0.3719, "step": 27225 }, { "epoch": 4.444390024896943, "grad_norm": 3.6444196701049805, "learning_rate": 8.895647198328606e-06, "loss": 0.3343, "step": 27226 }, { "epoch": 4.444553283539447, "grad_norm": 4.222781181335449, "learning_rate": 8.895009980899052e-06, "loss": 0.4216, "step": 27227 }, { "epoch": 4.4447165421819514, "grad_norm": 3.2724595069885254, "learning_rate": 8.894372768011693e-06, "loss": 0.3328, "step": 27228 }, { "epoch": 4.444879800824456, "grad_norm": 3.3720788955688477, "learning_rate": 8.89373555966915e-06, "loss": 0.418, "step": 27229 }, { "epoch": 4.44504305946696, "grad_norm": 3.3237321376800537, "learning_rate": 8.893098355874043e-06, "loss": 0.3301, "step": 27230 }, { "epoch": 4.445206318109465, "grad_norm": 3.525312662124634, "learning_rate": 8.89246115662899e-06, "loss": 0.3307, "step": 27231 }, { "epoch": 4.445369576751969, "grad_norm": 4.873569488525391, "learning_rate": 8.891823961936612e-06, "loss": 0.4976, "step": 27232 }, { "epoch": 4.445532835394474, "grad_norm": 3.7232091426849365, "learning_rate": 8.891186771799525e-06, "loss": 0.3641, "step": 27233 }, { "epoch": 4.445696094036978, "grad_norm": 4.229440212249756, "learning_rate": 8.890549586220352e-06, "loss": 0.4039, "step": 27234 }, { "epoch": 4.4458593526794825, "grad_norm": 3.9798641204833984, "learning_rate": 8.889912405201711e-06, "loss": 0.4486, "step": 27235 }, { "epoch": 4.446022611321987, "grad_norm": 3.6802661418914795, "learning_rate": 8.889275228746221e-06, "loss": 0.3748, "step": 27236 }, { "epoch": 4.446185869964491, "grad_norm": 3.944481611251831, "learning_rate": 8.888638056856503e-06, "loss": 0.4552, "step": 27237 }, { "epoch": 4.446349128606996, "grad_norm": 3.355738639831543, "learning_rate": 8.888000889535172e-06, "loss": 0.425, "step": 27238 }, { "epoch": 4.4465123872495, "grad_norm": 3.447932004928589, "learning_rate": 8.88736372678485e-06, "loss": 0.3717, "step": 27239 }, { "epoch": 4.446675645892005, "grad_norm": 3.376521348953247, "learning_rate": 8.886726568608154e-06, "loss": 0.3545, "step": 27240 }, { "epoch": 4.446838904534509, "grad_norm": 3.1112115383148193, "learning_rate": 8.886089415007707e-06, "loss": 0.3269, "step": 27241 }, { "epoch": 4.447002163177014, "grad_norm": 4.028482913970947, "learning_rate": 8.885452265986125e-06, "loss": 0.3788, "step": 27242 }, { "epoch": 4.447165421819518, "grad_norm": 3.3346903324127197, "learning_rate": 8.884815121546027e-06, "loss": 0.3805, "step": 27243 }, { "epoch": 4.447328680462022, "grad_norm": 4.250400543212891, "learning_rate": 8.884177981690031e-06, "loss": 0.3962, "step": 27244 }, { "epoch": 4.447491939104526, "grad_norm": 3.899122714996338, "learning_rate": 8.883540846420761e-06, "loss": 0.4236, "step": 27245 }, { "epoch": 4.44765519774703, "grad_norm": 3.125736951828003, "learning_rate": 8.882903715740834e-06, "loss": 0.3136, "step": 27246 }, { "epoch": 4.447818456389535, "grad_norm": 4.113254547119141, "learning_rate": 8.88226658965287e-06, "loss": 0.3897, "step": 27247 }, { "epoch": 4.447981715032039, "grad_norm": 3.6771974563598633, "learning_rate": 8.881629468159481e-06, "loss": 0.328, "step": 27248 }, { "epoch": 4.448144973674544, "grad_norm": 3.5373055934906006, "learning_rate": 8.880992351263293e-06, "loss": 0.3555, "step": 27249 }, { "epoch": 4.448308232317048, "grad_norm": 3.5753347873687744, "learning_rate": 8.880355238966923e-06, "loss": 0.3576, "step": 27250 }, { "epoch": 4.448471490959553, "grad_norm": 3.954653024673462, "learning_rate": 8.87971813127299e-06, "loss": 0.3971, "step": 27251 }, { "epoch": 4.448634749602057, "grad_norm": 3.070390224456787, "learning_rate": 8.87908102818411e-06, "loss": 0.3207, "step": 27252 }, { "epoch": 4.4487980082445615, "grad_norm": 3.0589041709899902, "learning_rate": 8.878443929702908e-06, "loss": 0.3332, "step": 27253 }, { "epoch": 4.448961266887066, "grad_norm": 4.644470691680908, "learning_rate": 8.877806835831998e-06, "loss": 0.4603, "step": 27254 }, { "epoch": 4.44912452552957, "grad_norm": 3.950423240661621, "learning_rate": 8.877169746574002e-06, "loss": 0.3975, "step": 27255 }, { "epoch": 4.449287784172075, "grad_norm": 4.1328511238098145, "learning_rate": 8.876532661931533e-06, "loss": 0.3514, "step": 27256 }, { "epoch": 4.449451042814579, "grad_norm": 3.302079916000366, "learning_rate": 8.87589558190722e-06, "loss": 0.3902, "step": 27257 }, { "epoch": 4.449614301457084, "grad_norm": 3.639862537384033, "learning_rate": 8.875258506503674e-06, "loss": 0.403, "step": 27258 }, { "epoch": 4.449777560099588, "grad_norm": 3.757964849472046, "learning_rate": 8.874621435723515e-06, "loss": 0.3799, "step": 27259 }, { "epoch": 4.449940818742093, "grad_norm": 4.149846076965332, "learning_rate": 8.87398436956936e-06, "loss": 0.3817, "step": 27260 }, { "epoch": 4.450104077384596, "grad_norm": 3.932687520980835, "learning_rate": 8.873347308043832e-06, "loss": 0.3501, "step": 27261 }, { "epoch": 4.450267336027101, "grad_norm": 4.252663612365723, "learning_rate": 8.872710251149546e-06, "loss": 0.4798, "step": 27262 }, { "epoch": 4.450430594669605, "grad_norm": 4.22774600982666, "learning_rate": 8.872073198889123e-06, "loss": 0.4292, "step": 27263 }, { "epoch": 4.450593853312109, "grad_norm": 4.16387414932251, "learning_rate": 8.871436151265183e-06, "loss": 0.4472, "step": 27264 }, { "epoch": 4.450757111954614, "grad_norm": 3.430509090423584, "learning_rate": 8.870799108280342e-06, "loss": 0.3321, "step": 27265 }, { "epoch": 4.450920370597118, "grad_norm": 3.625762462615967, "learning_rate": 8.87016206993722e-06, "loss": 0.382, "step": 27266 }, { "epoch": 4.451083629239623, "grad_norm": 3.4431772232055664, "learning_rate": 8.869525036238438e-06, "loss": 0.3588, "step": 27267 }, { "epoch": 4.451246887882127, "grad_norm": 4.404247760772705, "learning_rate": 8.868888007186608e-06, "loss": 0.4211, "step": 27268 }, { "epoch": 4.451410146524632, "grad_norm": 3.5653603076934814, "learning_rate": 8.868250982784352e-06, "loss": 0.3567, "step": 27269 }, { "epoch": 4.451573405167136, "grad_norm": 3.3789498805999756, "learning_rate": 8.867613963034289e-06, "loss": 0.3861, "step": 27270 }, { "epoch": 4.4517366638096405, "grad_norm": 3.3736679553985596, "learning_rate": 8.866976947939036e-06, "loss": 0.3845, "step": 27271 }, { "epoch": 4.451899922452145, "grad_norm": 3.4833598136901855, "learning_rate": 8.866339937501216e-06, "loss": 0.3916, "step": 27272 }, { "epoch": 4.452063181094649, "grad_norm": 3.7159805297851562, "learning_rate": 8.865702931723443e-06, "loss": 0.4153, "step": 27273 }, { "epoch": 4.452226439737154, "grad_norm": 3.1986758708953857, "learning_rate": 8.865065930608338e-06, "loss": 0.3446, "step": 27274 }, { "epoch": 4.452389698379658, "grad_norm": 3.1906707286834717, "learning_rate": 8.864428934158518e-06, "loss": 0.3606, "step": 27275 }, { "epoch": 4.452552957022163, "grad_norm": 3.5484516620635986, "learning_rate": 8.863791942376602e-06, "loss": 0.34, "step": 27276 }, { "epoch": 4.452716215664667, "grad_norm": 3.6883013248443604, "learning_rate": 8.863154955265209e-06, "loss": 0.302, "step": 27277 }, { "epoch": 4.452879474307171, "grad_norm": 3.4719831943511963, "learning_rate": 8.862517972826956e-06, "loss": 0.3915, "step": 27278 }, { "epoch": 4.453042732949675, "grad_norm": 3.5262198448181152, "learning_rate": 8.861880995064461e-06, "loss": 0.3406, "step": 27279 }, { "epoch": 4.45320599159218, "grad_norm": 3.266853094100952, "learning_rate": 8.861244021980344e-06, "loss": 0.3378, "step": 27280 }, { "epoch": 4.453369250234684, "grad_norm": 3.4712202548980713, "learning_rate": 8.860607053577225e-06, "loss": 0.3615, "step": 27281 }, { "epoch": 4.453532508877188, "grad_norm": 4.233877658843994, "learning_rate": 8.859970089857717e-06, "loss": 0.3839, "step": 27282 }, { "epoch": 4.453695767519693, "grad_norm": 3.3332343101501465, "learning_rate": 8.85933313082444e-06, "loss": 0.3256, "step": 27283 }, { "epoch": 4.453859026162197, "grad_norm": 3.6283295154571533, "learning_rate": 8.858696176480017e-06, "loss": 0.4085, "step": 27284 }, { "epoch": 4.454022284804702, "grad_norm": 3.5186822414398193, "learning_rate": 8.858059226827063e-06, "loss": 0.3363, "step": 27285 }, { "epoch": 4.454185543447206, "grad_norm": 3.3446128368377686, "learning_rate": 8.8574222818682e-06, "loss": 0.4094, "step": 27286 }, { "epoch": 4.454348802089711, "grad_norm": 3.9618561267852783, "learning_rate": 8.856785341606035e-06, "loss": 0.4059, "step": 27287 }, { "epoch": 4.454512060732215, "grad_norm": 3.106628179550171, "learning_rate": 8.856148406043198e-06, "loss": 0.3307, "step": 27288 }, { "epoch": 4.4546753193747195, "grad_norm": 3.5606689453125, "learning_rate": 8.855511475182302e-06, "loss": 0.3422, "step": 27289 }, { "epoch": 4.454838578017224, "grad_norm": 2.993356227874756, "learning_rate": 8.854874549025966e-06, "loss": 0.3028, "step": 27290 }, { "epoch": 4.455001836659728, "grad_norm": 2.963770866394043, "learning_rate": 8.85423762757681e-06, "loss": 0.3181, "step": 27291 }, { "epoch": 4.455165095302233, "grad_norm": 4.0590972900390625, "learning_rate": 8.853600710837448e-06, "loss": 0.4712, "step": 27292 }, { "epoch": 4.455328353944737, "grad_norm": 3.610877513885498, "learning_rate": 8.852963798810503e-06, "loss": 0.3951, "step": 27293 }, { "epoch": 4.455491612587242, "grad_norm": 2.623760938644409, "learning_rate": 8.852326891498587e-06, "loss": 0.3388, "step": 27294 }, { "epoch": 4.455654871229746, "grad_norm": 3.631621837615967, "learning_rate": 8.851689988904325e-06, "loss": 0.3512, "step": 27295 }, { "epoch": 4.455818129872251, "grad_norm": 3.7986512184143066, "learning_rate": 8.851053091030335e-06, "loss": 0.4258, "step": 27296 }, { "epoch": 4.455981388514754, "grad_norm": 4.007653713226318, "learning_rate": 8.850416197879227e-06, "loss": 0.4384, "step": 27297 }, { "epoch": 4.4561446471572586, "grad_norm": 3.7127368450164795, "learning_rate": 8.849779309453625e-06, "loss": 0.363, "step": 27298 }, { "epoch": 4.456307905799763, "grad_norm": 3.4285175800323486, "learning_rate": 8.849142425756145e-06, "loss": 0.331, "step": 27299 }, { "epoch": 4.456471164442267, "grad_norm": 3.301431894302368, "learning_rate": 8.848505546789407e-06, "loss": 0.3426, "step": 27300 }, { "epoch": 4.456634423084772, "grad_norm": 2.7544689178466797, "learning_rate": 8.847868672556028e-06, "loss": 0.321, "step": 27301 }, { "epoch": 4.456797681727276, "grad_norm": 3.4046661853790283, "learning_rate": 8.847231803058626e-06, "loss": 0.3748, "step": 27302 }, { "epoch": 4.456960940369781, "grad_norm": 3.4905261993408203, "learning_rate": 8.846594938299819e-06, "loss": 0.3915, "step": 27303 }, { "epoch": 4.457124199012285, "grad_norm": 3.6581342220306396, "learning_rate": 8.845958078282224e-06, "loss": 0.3559, "step": 27304 }, { "epoch": 4.45728745765479, "grad_norm": 3.028184175491333, "learning_rate": 8.84532122300846e-06, "loss": 0.3395, "step": 27305 }, { "epoch": 4.457450716297294, "grad_norm": 3.4948089122772217, "learning_rate": 8.844684372481146e-06, "loss": 0.434, "step": 27306 }, { "epoch": 4.4576139749397985, "grad_norm": 3.386488914489746, "learning_rate": 8.844047526702895e-06, "loss": 0.3563, "step": 27307 }, { "epoch": 4.457777233582303, "grad_norm": 3.9354960918426514, "learning_rate": 8.843410685676332e-06, "loss": 0.369, "step": 27308 }, { "epoch": 4.457940492224807, "grad_norm": 3.706141948699951, "learning_rate": 8.842773849404068e-06, "loss": 0.3678, "step": 27309 }, { "epoch": 4.458103750867312, "grad_norm": 3.214252471923828, "learning_rate": 8.842137017888722e-06, "loss": 0.3364, "step": 27310 }, { "epoch": 4.458267009509816, "grad_norm": 4.427556037902832, "learning_rate": 8.841500191132915e-06, "loss": 0.4093, "step": 27311 }, { "epoch": 4.458430268152321, "grad_norm": 3.5812582969665527, "learning_rate": 8.840863369139265e-06, "loss": 0.3907, "step": 27312 }, { "epoch": 4.458593526794825, "grad_norm": 3.6801917552948, "learning_rate": 8.840226551910387e-06, "loss": 0.3945, "step": 27313 }, { "epoch": 4.458756785437329, "grad_norm": 3.3934521675109863, "learning_rate": 8.8395897394489e-06, "loss": 0.3277, "step": 27314 }, { "epoch": 4.458920044079833, "grad_norm": 3.719351053237915, "learning_rate": 8.838952931757423e-06, "loss": 0.3964, "step": 27315 }, { "epoch": 4.4590833027223375, "grad_norm": 3.362649440765381, "learning_rate": 8.83831612883857e-06, "loss": 0.3698, "step": 27316 }, { "epoch": 4.459246561364842, "grad_norm": 3.8931925296783447, "learning_rate": 8.837679330694962e-06, "loss": 0.4336, "step": 27317 }, { "epoch": 4.459409820007346, "grad_norm": 2.907174825668335, "learning_rate": 8.837042537329214e-06, "loss": 0.3207, "step": 27318 }, { "epoch": 4.459573078649851, "grad_norm": 3.8423268795013428, "learning_rate": 8.836405748743946e-06, "loss": 0.4544, "step": 27319 }, { "epoch": 4.459736337292355, "grad_norm": 3.933917999267578, "learning_rate": 8.835768964941773e-06, "loss": 0.3665, "step": 27320 }, { "epoch": 4.45989959593486, "grad_norm": 3.639606475830078, "learning_rate": 8.835132185925313e-06, "loss": 0.4234, "step": 27321 }, { "epoch": 4.460062854577364, "grad_norm": 4.186946868896484, "learning_rate": 8.834495411697189e-06, "loss": 0.4565, "step": 27322 }, { "epoch": 4.460226113219869, "grad_norm": 3.5364139080047607, "learning_rate": 8.833858642260012e-06, "loss": 0.3556, "step": 27323 }, { "epoch": 4.460389371862373, "grad_norm": 3.0712246894836426, "learning_rate": 8.833221877616401e-06, "loss": 0.3587, "step": 27324 }, { "epoch": 4.4605526305048775, "grad_norm": 3.1529810428619385, "learning_rate": 8.832585117768979e-06, "loss": 0.351, "step": 27325 }, { "epoch": 4.460715889147382, "grad_norm": 4.485784530639648, "learning_rate": 8.831948362720354e-06, "loss": 0.416, "step": 27326 }, { "epoch": 4.460879147789886, "grad_norm": 4.080301284790039, "learning_rate": 8.831311612473149e-06, "loss": 0.4248, "step": 27327 }, { "epoch": 4.461042406432391, "grad_norm": 3.6011502742767334, "learning_rate": 8.830674867029981e-06, "loss": 0.3887, "step": 27328 }, { "epoch": 4.461205665074895, "grad_norm": 3.128570079803467, "learning_rate": 8.830038126393468e-06, "loss": 0.3546, "step": 27329 }, { "epoch": 4.4613689237174, "grad_norm": 3.513889789581299, "learning_rate": 8.829401390566226e-06, "loss": 0.388, "step": 27330 }, { "epoch": 4.461532182359903, "grad_norm": 4.0069451332092285, "learning_rate": 8.828764659550872e-06, "loss": 0.4117, "step": 27331 }, { "epoch": 4.461695441002408, "grad_norm": 3.1208438873291016, "learning_rate": 8.828127933350024e-06, "loss": 0.3439, "step": 27332 }, { "epoch": 4.461858699644912, "grad_norm": 3.994478702545166, "learning_rate": 8.827491211966299e-06, "loss": 0.3826, "step": 27333 }, { "epoch": 4.4620219582874165, "grad_norm": 3.663881301879883, "learning_rate": 8.826854495402316e-06, "loss": 0.4517, "step": 27334 }, { "epoch": 4.462185216929921, "grad_norm": 3.933896541595459, "learning_rate": 8.826217783660695e-06, "loss": 0.4222, "step": 27335 }, { "epoch": 4.462348475572425, "grad_norm": 3.8399758338928223, "learning_rate": 8.825581076744047e-06, "loss": 0.409, "step": 27336 }, { "epoch": 4.46251173421493, "grad_norm": 3.4963057041168213, "learning_rate": 8.824944374654988e-06, "loss": 0.3289, "step": 27337 }, { "epoch": 4.462674992857434, "grad_norm": 4.177603244781494, "learning_rate": 8.824307677396141e-06, "loss": 0.4045, "step": 27338 }, { "epoch": 4.462838251499939, "grad_norm": 4.119924545288086, "learning_rate": 8.823670984970123e-06, "loss": 0.3869, "step": 27339 }, { "epoch": 4.463001510142443, "grad_norm": 3.7321646213531494, "learning_rate": 8.823034297379547e-06, "loss": 0.4392, "step": 27340 }, { "epoch": 4.463164768784948, "grad_norm": 3.8983335494995117, "learning_rate": 8.822397614627034e-06, "loss": 0.4038, "step": 27341 }, { "epoch": 4.463328027427452, "grad_norm": 3.8552238941192627, "learning_rate": 8.8217609367152e-06, "loss": 0.3852, "step": 27342 }, { "epoch": 4.4634912860699565, "grad_norm": 3.951049566268921, "learning_rate": 8.82112426364666e-06, "loss": 0.4263, "step": 27343 }, { "epoch": 4.463654544712461, "grad_norm": 3.266101598739624, "learning_rate": 8.820487595424034e-06, "loss": 0.3693, "step": 27344 }, { "epoch": 4.463817803354965, "grad_norm": 4.254861831665039, "learning_rate": 8.819850932049942e-06, "loss": 0.4596, "step": 27345 }, { "epoch": 4.46398106199747, "grad_norm": 3.4067680835723877, "learning_rate": 8.819214273526995e-06, "loss": 0.3662, "step": 27346 }, { "epoch": 4.464144320639974, "grad_norm": 3.8119380474090576, "learning_rate": 8.81857761985781e-06, "loss": 0.3866, "step": 27347 }, { "epoch": 4.464307579282479, "grad_norm": 3.974926471710205, "learning_rate": 8.817940971045007e-06, "loss": 0.3587, "step": 27348 }, { "epoch": 4.464470837924983, "grad_norm": 3.406787872314453, "learning_rate": 8.817304327091202e-06, "loss": 0.4286, "step": 27349 }, { "epoch": 4.464634096567487, "grad_norm": 3.7284677028656006, "learning_rate": 8.816667687999013e-06, "loss": 0.3534, "step": 27350 }, { "epoch": 4.464797355209991, "grad_norm": 3.1299383640289307, "learning_rate": 8.816031053771056e-06, "loss": 0.3285, "step": 27351 }, { "epoch": 4.4649606138524955, "grad_norm": 3.4766201972961426, "learning_rate": 8.815394424409948e-06, "loss": 0.4082, "step": 27352 }, { "epoch": 4.465123872495, "grad_norm": 3.338620662689209, "learning_rate": 8.814757799918308e-06, "loss": 0.3608, "step": 27353 }, { "epoch": 4.465287131137504, "grad_norm": 3.9049415588378906, "learning_rate": 8.814121180298749e-06, "loss": 0.4889, "step": 27354 }, { "epoch": 4.465450389780009, "grad_norm": 3.2323825359344482, "learning_rate": 8.813484565553893e-06, "loss": 0.3076, "step": 27355 }, { "epoch": 4.465613648422513, "grad_norm": 3.5401086807250977, "learning_rate": 8.812847955686353e-06, "loss": 0.372, "step": 27356 }, { "epoch": 4.465776907065018, "grad_norm": 3.6301841735839844, "learning_rate": 8.812211350698745e-06, "loss": 0.4008, "step": 27357 }, { "epoch": 4.465940165707522, "grad_norm": 3.9360270500183105, "learning_rate": 8.811574750593689e-06, "loss": 0.4424, "step": 27358 }, { "epoch": 4.466103424350027, "grad_norm": 3.7940683364868164, "learning_rate": 8.810938155373798e-06, "loss": 0.4266, "step": 27359 }, { "epoch": 4.466266682992531, "grad_norm": 3.401677131652832, "learning_rate": 8.810301565041692e-06, "loss": 0.3946, "step": 27360 }, { "epoch": 4.4664299416350355, "grad_norm": 3.704824209213257, "learning_rate": 8.80966497959999e-06, "loss": 0.4426, "step": 27361 }, { "epoch": 4.46659320027754, "grad_norm": 3.5540049076080322, "learning_rate": 8.809028399051302e-06, "loss": 0.4144, "step": 27362 }, { "epoch": 4.466756458920044, "grad_norm": 3.269620418548584, "learning_rate": 8.808391823398252e-06, "loss": 0.3382, "step": 27363 }, { "epoch": 4.466919717562549, "grad_norm": 3.4831924438476562, "learning_rate": 8.807755252643452e-06, "loss": 0.3872, "step": 27364 }, { "epoch": 4.467082976205053, "grad_norm": 3.7250277996063232, "learning_rate": 8.807118686789521e-06, "loss": 0.415, "step": 27365 }, { "epoch": 4.467246234847558, "grad_norm": 3.812936782836914, "learning_rate": 8.806482125839074e-06, "loss": 0.4204, "step": 27366 }, { "epoch": 4.467409493490061, "grad_norm": 3.5156960487365723, "learning_rate": 8.805845569794728e-06, "loss": 0.4414, "step": 27367 }, { "epoch": 4.467572752132566, "grad_norm": 3.3128855228424072, "learning_rate": 8.805209018659099e-06, "loss": 0.4016, "step": 27368 }, { "epoch": 4.46773601077507, "grad_norm": 3.2669851779937744, "learning_rate": 8.804572472434805e-06, "loss": 0.3232, "step": 27369 }, { "epoch": 4.4678992694175745, "grad_norm": 3.3506038188934326, "learning_rate": 8.803935931124464e-06, "loss": 0.4215, "step": 27370 }, { "epoch": 4.468062528060079, "grad_norm": 3.6608059406280518, "learning_rate": 8.803299394730686e-06, "loss": 0.3959, "step": 27371 }, { "epoch": 4.468225786702583, "grad_norm": 2.948525905609131, "learning_rate": 8.802662863256095e-06, "loss": 0.2968, "step": 27372 }, { "epoch": 4.468389045345088, "grad_norm": 3.553034543991089, "learning_rate": 8.802026336703305e-06, "loss": 0.4055, "step": 27373 }, { "epoch": 4.468552303987592, "grad_norm": 3.9078257083892822, "learning_rate": 8.801389815074938e-06, "loss": 0.4018, "step": 27374 }, { "epoch": 4.468715562630097, "grad_norm": 3.5950212478637695, "learning_rate": 8.800753298373597e-06, "loss": 0.3558, "step": 27375 }, { "epoch": 4.468878821272601, "grad_norm": 3.7019753456115723, "learning_rate": 8.80011678660191e-06, "loss": 0.434, "step": 27376 }, { "epoch": 4.469042079915106, "grad_norm": 3.6112825870513916, "learning_rate": 8.799480279762488e-06, "loss": 0.4082, "step": 27377 }, { "epoch": 4.46920533855761, "grad_norm": 3.980664014816284, "learning_rate": 8.79884377785795e-06, "loss": 0.3903, "step": 27378 }, { "epoch": 4.4693685972001145, "grad_norm": 4.130534648895264, "learning_rate": 8.798207280890912e-06, "loss": 0.4398, "step": 27379 }, { "epoch": 4.469531855842619, "grad_norm": 3.884780168533325, "learning_rate": 8.79757078886399e-06, "loss": 0.4564, "step": 27380 }, { "epoch": 4.469695114485123, "grad_norm": 2.813157081604004, "learning_rate": 8.7969343017798e-06, "loss": 0.3189, "step": 27381 }, { "epoch": 4.469858373127628, "grad_norm": 3.5234251022338867, "learning_rate": 8.796297819640958e-06, "loss": 0.3806, "step": 27382 }, { "epoch": 4.470021631770132, "grad_norm": 2.5134565830230713, "learning_rate": 8.795661342450082e-06, "loss": 0.2964, "step": 27383 }, { "epoch": 4.470184890412636, "grad_norm": 3.337498664855957, "learning_rate": 8.795024870209792e-06, "loss": 0.3854, "step": 27384 }, { "epoch": 4.47034814905514, "grad_norm": 3.75349497795105, "learning_rate": 8.794388402922696e-06, "loss": 0.4552, "step": 27385 }, { "epoch": 4.470511407697645, "grad_norm": 3.8141143321990967, "learning_rate": 8.79375194059141e-06, "loss": 0.5064, "step": 27386 }, { "epoch": 4.470674666340149, "grad_norm": 3.3968775272369385, "learning_rate": 8.793115483218559e-06, "loss": 0.4042, "step": 27387 }, { "epoch": 4.4708379249826535, "grad_norm": 3.44447922706604, "learning_rate": 8.792479030806753e-06, "loss": 0.3801, "step": 27388 }, { "epoch": 4.471001183625158, "grad_norm": 3.766684055328369, "learning_rate": 8.79184258335861e-06, "loss": 0.4124, "step": 27389 }, { "epoch": 4.471164442267662, "grad_norm": 3.731097459793091, "learning_rate": 8.791206140876746e-06, "loss": 0.3946, "step": 27390 }, { "epoch": 4.471327700910167, "grad_norm": 3.3931291103363037, "learning_rate": 8.790569703363778e-06, "loss": 0.3515, "step": 27391 }, { "epoch": 4.471490959552671, "grad_norm": 3.297616720199585, "learning_rate": 8.78993327082232e-06, "loss": 0.3785, "step": 27392 }, { "epoch": 4.471654218195176, "grad_norm": 3.898165225982666, "learning_rate": 8.78929684325499e-06, "loss": 0.3856, "step": 27393 }, { "epoch": 4.47181747683768, "grad_norm": 3.3003592491149902, "learning_rate": 8.788660420664405e-06, "loss": 0.3277, "step": 27394 }, { "epoch": 4.471980735480185, "grad_norm": 3.7596449851989746, "learning_rate": 8.788024003053178e-06, "loss": 0.4429, "step": 27395 }, { "epoch": 4.472143994122689, "grad_norm": 3.5388944149017334, "learning_rate": 8.787387590423927e-06, "loss": 0.3761, "step": 27396 }, { "epoch": 4.4723072527651935, "grad_norm": 2.9423933029174805, "learning_rate": 8.786751182779267e-06, "loss": 0.3561, "step": 27397 }, { "epoch": 4.472470511407698, "grad_norm": 3.3204824924468994, "learning_rate": 8.786114780121814e-06, "loss": 0.3785, "step": 27398 }, { "epoch": 4.472633770050202, "grad_norm": 3.7263686656951904, "learning_rate": 8.785478382454186e-06, "loss": 0.4433, "step": 27399 }, { "epoch": 4.472797028692707, "grad_norm": 3.202359199523926, "learning_rate": 8.784841989778997e-06, "loss": 0.3315, "step": 27400 }, { "epoch": 4.47296028733521, "grad_norm": 3.679654836654663, "learning_rate": 8.784205602098866e-06, "loss": 0.3912, "step": 27401 }, { "epoch": 4.473123545977716, "grad_norm": 2.8617494106292725, "learning_rate": 8.783569219416404e-06, "loss": 0.3535, "step": 27402 }, { "epoch": 4.473286804620219, "grad_norm": 3.645724296569824, "learning_rate": 8.78293284173423e-06, "loss": 0.3927, "step": 27403 }, { "epoch": 4.473450063262724, "grad_norm": 3.0708560943603516, "learning_rate": 8.782296469054964e-06, "loss": 0.3299, "step": 27404 }, { "epoch": 4.473613321905228, "grad_norm": 3.2496891021728516, "learning_rate": 8.781660101381214e-06, "loss": 0.3292, "step": 27405 }, { "epoch": 4.4737765805477325, "grad_norm": 3.7934200763702393, "learning_rate": 8.7810237387156e-06, "loss": 0.4503, "step": 27406 }, { "epoch": 4.473939839190237, "grad_norm": 3.2842965126037598, "learning_rate": 8.780387381060735e-06, "loss": 0.3693, "step": 27407 }, { "epoch": 4.474103097832741, "grad_norm": 3.7225120067596436, "learning_rate": 8.779751028419239e-06, "loss": 0.412, "step": 27408 }, { "epoch": 4.474266356475246, "grad_norm": 2.7874531745910645, "learning_rate": 8.779114680793723e-06, "loss": 0.2974, "step": 27409 }, { "epoch": 4.47442961511775, "grad_norm": 4.518360614776611, "learning_rate": 8.778478338186807e-06, "loss": 0.4589, "step": 27410 }, { "epoch": 4.474592873760255, "grad_norm": 3.165804862976074, "learning_rate": 8.777842000601106e-06, "loss": 0.3324, "step": 27411 }, { "epoch": 4.474756132402759, "grad_norm": 4.039962291717529, "learning_rate": 8.777205668039234e-06, "loss": 0.4041, "step": 27412 }, { "epoch": 4.474919391045264, "grad_norm": 4.1545939445495605, "learning_rate": 8.776569340503811e-06, "loss": 0.3953, "step": 27413 }, { "epoch": 4.475082649687768, "grad_norm": 3.457735776901245, "learning_rate": 8.775933017997448e-06, "loss": 0.4047, "step": 27414 }, { "epoch": 4.4752459083302725, "grad_norm": 3.629404067993164, "learning_rate": 8.77529670052276e-06, "loss": 0.3567, "step": 27415 }, { "epoch": 4.475409166972777, "grad_norm": 4.64582633972168, "learning_rate": 8.774660388082366e-06, "loss": 0.4256, "step": 27416 }, { "epoch": 4.475572425615281, "grad_norm": 4.067535877227783, "learning_rate": 8.774024080678882e-06, "loss": 0.4035, "step": 27417 }, { "epoch": 4.475735684257786, "grad_norm": 3.341027021408081, "learning_rate": 8.77338777831492e-06, "loss": 0.3365, "step": 27418 }, { "epoch": 4.47589894290029, "grad_norm": 3.471877098083496, "learning_rate": 8.772751480993098e-06, "loss": 0.3966, "step": 27419 }, { "epoch": 4.476062201542794, "grad_norm": 3.922990560531616, "learning_rate": 8.772115188716033e-06, "loss": 0.4312, "step": 27420 }, { "epoch": 4.476225460185298, "grad_norm": 4.651517868041992, "learning_rate": 8.771478901486334e-06, "loss": 0.4577, "step": 27421 }, { "epoch": 4.476388718827803, "grad_norm": 2.9061172008514404, "learning_rate": 8.770842619306626e-06, "loss": 0.3009, "step": 27422 }, { "epoch": 4.476551977470307, "grad_norm": 2.4332387447357178, "learning_rate": 8.770206342179523e-06, "loss": 0.2686, "step": 27423 }, { "epoch": 4.4767152361128115, "grad_norm": 3.6406350135803223, "learning_rate": 8.769570070107632e-06, "loss": 0.3671, "step": 27424 }, { "epoch": 4.476878494755316, "grad_norm": 2.9917454719543457, "learning_rate": 8.768933803093573e-06, "loss": 0.3726, "step": 27425 }, { "epoch": 4.47704175339782, "grad_norm": 3.3760275840759277, "learning_rate": 8.768297541139964e-06, "loss": 0.3609, "step": 27426 }, { "epoch": 4.477205012040325, "grad_norm": 3.086308717727661, "learning_rate": 8.76766128424942e-06, "loss": 0.3465, "step": 27427 }, { "epoch": 4.477368270682829, "grad_norm": 4.231956958770752, "learning_rate": 8.767025032424553e-06, "loss": 0.3987, "step": 27428 }, { "epoch": 4.477531529325334, "grad_norm": 3.2077300548553467, "learning_rate": 8.766388785667982e-06, "loss": 0.3618, "step": 27429 }, { "epoch": 4.477694787967838, "grad_norm": 3.711198568344116, "learning_rate": 8.76575254398232e-06, "loss": 0.3834, "step": 27430 }, { "epoch": 4.477858046610343, "grad_norm": 3.6267006397247314, "learning_rate": 8.765116307370183e-06, "loss": 0.3598, "step": 27431 }, { "epoch": 4.478021305252847, "grad_norm": 3.7334883213043213, "learning_rate": 8.764480075834186e-06, "loss": 0.3619, "step": 27432 }, { "epoch": 4.4781845638953515, "grad_norm": 3.210968494415283, "learning_rate": 8.76384384937695e-06, "loss": 0.3767, "step": 27433 }, { "epoch": 4.478347822537856, "grad_norm": 3.170194387435913, "learning_rate": 8.763207628001081e-06, "loss": 0.3292, "step": 27434 }, { "epoch": 4.47851108118036, "grad_norm": 4.1035475730896, "learning_rate": 8.762571411709199e-06, "loss": 0.3865, "step": 27435 }, { "epoch": 4.478674339822865, "grad_norm": 4.266820430755615, "learning_rate": 8.761935200503917e-06, "loss": 0.4691, "step": 27436 }, { "epoch": 4.478837598465368, "grad_norm": 3.8757317066192627, "learning_rate": 8.761298994387854e-06, "loss": 0.4095, "step": 27437 }, { "epoch": 4.479000857107873, "grad_norm": 3.726266860961914, "learning_rate": 8.760662793363622e-06, "loss": 0.3756, "step": 27438 }, { "epoch": 4.479164115750377, "grad_norm": 4.183494567871094, "learning_rate": 8.760026597433837e-06, "loss": 0.4839, "step": 27439 }, { "epoch": 4.479327374392882, "grad_norm": 2.856275796890259, "learning_rate": 8.759390406601116e-06, "loss": 0.3418, "step": 27440 }, { "epoch": 4.479490633035386, "grad_norm": 2.864168405532837, "learning_rate": 8.75875422086807e-06, "loss": 0.3404, "step": 27441 }, { "epoch": 4.4796538916778905, "grad_norm": 3.786191701889038, "learning_rate": 8.75811804023732e-06, "loss": 0.4141, "step": 27442 }, { "epoch": 4.479817150320395, "grad_norm": 3.6814756393432617, "learning_rate": 8.757481864711479e-06, "loss": 0.4146, "step": 27443 }, { "epoch": 4.479980408962899, "grad_norm": 3.5880303382873535, "learning_rate": 8.756845694293158e-06, "loss": 0.3697, "step": 27444 }, { "epoch": 4.480143667605404, "grad_norm": 2.9237561225891113, "learning_rate": 8.756209528984975e-06, "loss": 0.3449, "step": 27445 }, { "epoch": 4.480306926247908, "grad_norm": 4.050052165985107, "learning_rate": 8.755573368789545e-06, "loss": 0.3509, "step": 27446 }, { "epoch": 4.480470184890413, "grad_norm": 3.459468126296997, "learning_rate": 8.754937213709483e-06, "loss": 0.3839, "step": 27447 }, { "epoch": 4.480633443532917, "grad_norm": 4.010242462158203, "learning_rate": 8.754301063747403e-06, "loss": 0.3981, "step": 27448 }, { "epoch": 4.480796702175422, "grad_norm": 3.828937530517578, "learning_rate": 8.753664918905921e-06, "loss": 0.3792, "step": 27449 }, { "epoch": 4.480959960817926, "grad_norm": 4.2158918380737305, "learning_rate": 8.753028779187654e-06, "loss": 0.4783, "step": 27450 }, { "epoch": 4.4811232194604305, "grad_norm": 3.8468048572540283, "learning_rate": 8.752392644595213e-06, "loss": 0.3393, "step": 27451 }, { "epoch": 4.481286478102935, "grad_norm": 3.4837448596954346, "learning_rate": 8.751756515131218e-06, "loss": 0.3814, "step": 27452 }, { "epoch": 4.481449736745439, "grad_norm": 4.294055461883545, "learning_rate": 8.751120390798277e-06, "loss": 0.4769, "step": 27453 }, { "epoch": 4.481612995387943, "grad_norm": 3.595825433731079, "learning_rate": 8.75048427159901e-06, "loss": 0.3682, "step": 27454 }, { "epoch": 4.481776254030447, "grad_norm": 3.8990771770477295, "learning_rate": 8.749848157536031e-06, "loss": 0.4026, "step": 27455 }, { "epoch": 4.481939512672952, "grad_norm": 4.165010929107666, "learning_rate": 8.749212048611953e-06, "loss": 0.4368, "step": 27456 }, { "epoch": 4.482102771315456, "grad_norm": 3.2003321647644043, "learning_rate": 8.748575944829392e-06, "loss": 0.3983, "step": 27457 }, { "epoch": 4.482266029957961, "grad_norm": 3.9077069759368896, "learning_rate": 8.747939846190962e-06, "loss": 0.4302, "step": 27458 }, { "epoch": 4.482429288600465, "grad_norm": 4.036569118499756, "learning_rate": 8.74730375269928e-06, "loss": 0.3451, "step": 27459 }, { "epoch": 4.4825925472429695, "grad_norm": 4.395487308502197, "learning_rate": 8.746667664356957e-06, "loss": 0.3987, "step": 27460 }, { "epoch": 4.482755805885474, "grad_norm": 4.178109645843506, "learning_rate": 8.746031581166612e-06, "loss": 0.4157, "step": 27461 }, { "epoch": 4.482919064527978, "grad_norm": 3.377373218536377, "learning_rate": 8.745395503130863e-06, "loss": 0.3762, "step": 27462 }, { "epoch": 4.483082323170483, "grad_norm": 2.972505807876587, "learning_rate": 8.74475943025231e-06, "loss": 0.3659, "step": 27463 }, { "epoch": 4.483245581812987, "grad_norm": 4.078818321228027, "learning_rate": 8.74412336253358e-06, "loss": 0.4036, "step": 27464 }, { "epoch": 4.483408840455492, "grad_norm": 3.176072597503662, "learning_rate": 8.743487299977287e-06, "loss": 0.3152, "step": 27465 }, { "epoch": 4.483572099097996, "grad_norm": 4.299774646759033, "learning_rate": 8.742851242586041e-06, "loss": 0.4802, "step": 27466 }, { "epoch": 4.483735357740501, "grad_norm": 3.227710723876953, "learning_rate": 8.742215190362459e-06, "loss": 0.3392, "step": 27467 }, { "epoch": 4.483898616383005, "grad_norm": 2.854363441467285, "learning_rate": 8.741579143309157e-06, "loss": 0.3375, "step": 27468 }, { "epoch": 4.4840618750255095, "grad_norm": 3.35909366607666, "learning_rate": 8.740943101428747e-06, "loss": 0.36, "step": 27469 }, { "epoch": 4.484225133668014, "grad_norm": 3.4114086627960205, "learning_rate": 8.740307064723842e-06, "loss": 0.3847, "step": 27470 }, { "epoch": 4.484388392310518, "grad_norm": 3.4603383541107178, "learning_rate": 8.739671033197062e-06, "loss": 0.364, "step": 27471 }, { "epoch": 4.484551650953023, "grad_norm": 3.2764506340026855, "learning_rate": 8.739035006851022e-06, "loss": 0.389, "step": 27472 }, { "epoch": 4.484714909595526, "grad_norm": 3.339275360107422, "learning_rate": 8.73839898568833e-06, "loss": 0.336, "step": 27473 }, { "epoch": 4.484878168238031, "grad_norm": 2.9821863174438477, "learning_rate": 8.737762969711601e-06, "loss": 0.3513, "step": 27474 }, { "epoch": 4.485041426880535, "grad_norm": 4.261667728424072, "learning_rate": 8.737126958923451e-06, "loss": 0.4437, "step": 27475 }, { "epoch": 4.48520468552304, "grad_norm": 2.9129064083099365, "learning_rate": 8.736490953326498e-06, "loss": 0.318, "step": 27476 }, { "epoch": 4.485367944165544, "grad_norm": 3.216963529586792, "learning_rate": 8.735854952923351e-06, "loss": 0.3509, "step": 27477 }, { "epoch": 4.4855312028080485, "grad_norm": 3.335458993911743, "learning_rate": 8.73521895771663e-06, "loss": 0.3115, "step": 27478 }, { "epoch": 4.485694461450553, "grad_norm": 3.10638689994812, "learning_rate": 8.734582967708944e-06, "loss": 0.3218, "step": 27479 }, { "epoch": 4.485857720093057, "grad_norm": 3.8647141456604004, "learning_rate": 8.733946982902912e-06, "loss": 0.3379, "step": 27480 }, { "epoch": 4.486020978735562, "grad_norm": 3.354518413543701, "learning_rate": 8.733311003301145e-06, "loss": 0.3258, "step": 27481 }, { "epoch": 4.486184237378066, "grad_norm": 3.2885231971740723, "learning_rate": 8.732675028906258e-06, "loss": 0.334, "step": 27482 }, { "epoch": 4.486347496020571, "grad_norm": 3.7931747436523438, "learning_rate": 8.732039059720866e-06, "loss": 0.3901, "step": 27483 }, { "epoch": 4.486510754663075, "grad_norm": 4.82932186126709, "learning_rate": 8.731403095747582e-06, "loss": 0.352, "step": 27484 }, { "epoch": 4.48667401330558, "grad_norm": 3.4344358444213867, "learning_rate": 8.730767136989022e-06, "loss": 0.3614, "step": 27485 }, { "epoch": 4.486837271948084, "grad_norm": 3.0539324283599854, "learning_rate": 8.730131183447795e-06, "loss": 0.3163, "step": 27486 }, { "epoch": 4.4870005305905885, "grad_norm": 3.170443534851074, "learning_rate": 8.729495235126522e-06, "loss": 0.3859, "step": 27487 }, { "epoch": 4.487163789233093, "grad_norm": 3.378134250640869, "learning_rate": 8.728859292027815e-06, "loss": 0.354, "step": 27488 }, { "epoch": 4.487327047875597, "grad_norm": 3.4282124042510986, "learning_rate": 8.728223354154286e-06, "loss": 0.3877, "step": 27489 }, { "epoch": 4.487490306518101, "grad_norm": 2.6264290809631348, "learning_rate": 8.727587421508552e-06, "loss": 0.3313, "step": 27490 }, { "epoch": 4.487653565160605, "grad_norm": 4.802624702453613, "learning_rate": 8.726951494093226e-06, "loss": 0.4385, "step": 27491 }, { "epoch": 4.48781682380311, "grad_norm": 4.00194787979126, "learning_rate": 8.726315571910921e-06, "loss": 0.4357, "step": 27492 }, { "epoch": 4.487980082445614, "grad_norm": 3.588209867477417, "learning_rate": 8.725679654964252e-06, "loss": 0.4139, "step": 27493 }, { "epoch": 4.488143341088119, "grad_norm": 2.825040340423584, "learning_rate": 8.725043743255834e-06, "loss": 0.3388, "step": 27494 }, { "epoch": 4.488306599730623, "grad_norm": 3.719548463821411, "learning_rate": 8.724407836788277e-06, "loss": 0.413, "step": 27495 }, { "epoch": 4.4884698583731275, "grad_norm": 3.0816714763641357, "learning_rate": 8.723771935564197e-06, "loss": 0.3893, "step": 27496 }, { "epoch": 4.488633117015632, "grad_norm": 3.536677360534668, "learning_rate": 8.72313603958621e-06, "loss": 0.3623, "step": 27497 }, { "epoch": 4.488796375658136, "grad_norm": 3.6772778034210205, "learning_rate": 8.72250014885693e-06, "loss": 0.3841, "step": 27498 }, { "epoch": 4.488959634300641, "grad_norm": 3.6598594188690186, "learning_rate": 8.721864263378968e-06, "loss": 0.3585, "step": 27499 }, { "epoch": 4.489122892943145, "grad_norm": 3.266528844833374, "learning_rate": 8.721228383154939e-06, "loss": 0.4121, "step": 27500 }, { "epoch": 4.48928615158565, "grad_norm": 3.334590435028076, "learning_rate": 8.720592508187463e-06, "loss": 0.325, "step": 27501 }, { "epoch": 4.489449410228154, "grad_norm": 3.702807664871216, "learning_rate": 8.719956638479142e-06, "loss": 0.3956, "step": 27502 }, { "epoch": 4.489612668870659, "grad_norm": 2.932873010635376, "learning_rate": 8.719320774032596e-06, "loss": 0.3211, "step": 27503 }, { "epoch": 4.489775927513163, "grad_norm": 3.327850580215454, "learning_rate": 8.71868491485044e-06, "loss": 0.3577, "step": 27504 }, { "epoch": 4.4899391861556674, "grad_norm": 3.3954968452453613, "learning_rate": 8.718049060935287e-06, "loss": 0.3577, "step": 27505 }, { "epoch": 4.490102444798172, "grad_norm": 4.0663909912109375, "learning_rate": 8.717413212289751e-06, "loss": 0.3715, "step": 27506 }, { "epoch": 4.490265703440675, "grad_norm": 3.5532093048095703, "learning_rate": 8.716777368916443e-06, "loss": 0.3605, "step": 27507 }, { "epoch": 4.49042896208318, "grad_norm": 3.504167318344116, "learning_rate": 8.716141530817979e-06, "loss": 0.3455, "step": 27508 }, { "epoch": 4.490592220725684, "grad_norm": 3.571514129638672, "learning_rate": 8.715505697996972e-06, "loss": 0.3552, "step": 27509 }, { "epoch": 4.490755479368189, "grad_norm": 4.49793004989624, "learning_rate": 8.714869870456034e-06, "loss": 0.4077, "step": 27510 }, { "epoch": 4.490918738010693, "grad_norm": 3.602139472961426, "learning_rate": 8.71423404819779e-06, "loss": 0.3663, "step": 27511 }, { "epoch": 4.491081996653198, "grad_norm": 3.252715826034546, "learning_rate": 8.713598231224838e-06, "loss": 0.3222, "step": 27512 }, { "epoch": 4.491245255295702, "grad_norm": 3.9909250736236572, "learning_rate": 8.712962419539795e-06, "loss": 0.4024, "step": 27513 }, { "epoch": 4.4914085139382065, "grad_norm": 2.749802589416504, "learning_rate": 8.71232661314528e-06, "loss": 0.3296, "step": 27514 }, { "epoch": 4.491571772580711, "grad_norm": 3.716707944869995, "learning_rate": 8.711690812043904e-06, "loss": 0.3766, "step": 27515 }, { "epoch": 4.491735031223215, "grad_norm": 3.329834222793579, "learning_rate": 8.711055016238283e-06, "loss": 0.3613, "step": 27516 }, { "epoch": 4.49189828986572, "grad_norm": 3.8757200241088867, "learning_rate": 8.710419225731025e-06, "loss": 0.34, "step": 27517 }, { "epoch": 4.492061548508224, "grad_norm": 3.0000460147857666, "learning_rate": 8.709783440524749e-06, "loss": 0.3454, "step": 27518 }, { "epoch": 4.492224807150729, "grad_norm": 3.2743492126464844, "learning_rate": 8.709147660622064e-06, "loss": 0.378, "step": 27519 }, { "epoch": 4.492388065793233, "grad_norm": 3.5414626598358154, "learning_rate": 8.708511886025585e-06, "loss": 0.3898, "step": 27520 }, { "epoch": 4.492551324435738, "grad_norm": 4.201446056365967, "learning_rate": 8.707876116737931e-06, "loss": 0.4294, "step": 27521 }, { "epoch": 4.492714583078242, "grad_norm": 3.89851450920105, "learning_rate": 8.707240352761707e-06, "loss": 0.4227, "step": 27522 }, { "epoch": 4.4928778417207464, "grad_norm": 3.5901710987091064, "learning_rate": 8.70660459409953e-06, "loss": 0.415, "step": 27523 }, { "epoch": 4.493041100363251, "grad_norm": 4.251260280609131, "learning_rate": 8.70596884075401e-06, "loss": 0.3991, "step": 27524 }, { "epoch": 4.493204359005755, "grad_norm": 3.6868300437927246, "learning_rate": 8.705333092727766e-06, "loss": 0.3775, "step": 27525 }, { "epoch": 4.493367617648259, "grad_norm": 3.018441677093506, "learning_rate": 8.70469735002341e-06, "loss": 0.3493, "step": 27526 }, { "epoch": 4.493530876290763, "grad_norm": 3.225743293762207, "learning_rate": 8.704061612643552e-06, "loss": 0.3108, "step": 27527 }, { "epoch": 4.493694134933268, "grad_norm": 3.0390849113464355, "learning_rate": 8.703425880590809e-06, "loss": 0.3117, "step": 27528 }, { "epoch": 4.493857393575772, "grad_norm": 4.192257881164551, "learning_rate": 8.702790153867792e-06, "loss": 0.3689, "step": 27529 }, { "epoch": 4.494020652218277, "grad_norm": 3.5061545372009277, "learning_rate": 8.702154432477115e-06, "loss": 0.3542, "step": 27530 }, { "epoch": 4.494183910860781, "grad_norm": 3.735517740249634, "learning_rate": 8.701518716421394e-06, "loss": 0.3567, "step": 27531 }, { "epoch": 4.4943471695032855, "grad_norm": 3.570936441421509, "learning_rate": 8.700883005703236e-06, "loss": 0.3833, "step": 27532 }, { "epoch": 4.49451042814579, "grad_norm": 3.528886079788208, "learning_rate": 8.700247300325257e-06, "loss": 0.369, "step": 27533 }, { "epoch": 4.494673686788294, "grad_norm": 3.2032997608184814, "learning_rate": 8.699611600290072e-06, "loss": 0.3652, "step": 27534 }, { "epoch": 4.494836945430799, "grad_norm": 4.154246807098389, "learning_rate": 8.698975905600292e-06, "loss": 0.4533, "step": 27535 }, { "epoch": 4.495000204073303, "grad_norm": 4.145125389099121, "learning_rate": 8.698340216258528e-06, "loss": 0.3895, "step": 27536 }, { "epoch": 4.495163462715808, "grad_norm": 4.17068338394165, "learning_rate": 8.697704532267399e-06, "loss": 0.4096, "step": 27537 }, { "epoch": 4.495326721358312, "grad_norm": 3.1707894802093506, "learning_rate": 8.697068853629514e-06, "loss": 0.3919, "step": 27538 }, { "epoch": 4.495489980000817, "grad_norm": 3.2450735569000244, "learning_rate": 8.696433180347488e-06, "loss": 0.3892, "step": 27539 }, { "epoch": 4.495653238643321, "grad_norm": 3.6877880096435547, "learning_rate": 8.695797512423932e-06, "loss": 0.3497, "step": 27540 }, { "epoch": 4.495816497285825, "grad_norm": 3.2503015995025635, "learning_rate": 8.695161849861462e-06, "loss": 0.3368, "step": 27541 }, { "epoch": 4.49597975592833, "grad_norm": 3.905463457107544, "learning_rate": 8.69452619266269e-06, "loss": 0.4215, "step": 27542 }, { "epoch": 4.496143014570833, "grad_norm": 4.08515739440918, "learning_rate": 8.693890540830224e-06, "loss": 0.4164, "step": 27543 }, { "epoch": 4.496306273213338, "grad_norm": 3.647174119949341, "learning_rate": 8.693254894366683e-06, "loss": 0.3794, "step": 27544 }, { "epoch": 4.496469531855842, "grad_norm": 3.7306995391845703, "learning_rate": 8.692619253274677e-06, "loss": 0.3556, "step": 27545 }, { "epoch": 4.496632790498347, "grad_norm": 3.724454641342163, "learning_rate": 8.691983617556818e-06, "loss": 0.3724, "step": 27546 }, { "epoch": 4.496796049140851, "grad_norm": 5.278686046600342, "learning_rate": 8.69134798721572e-06, "loss": 0.4112, "step": 27547 }, { "epoch": 4.496959307783356, "grad_norm": 3.6608195304870605, "learning_rate": 8.690712362253998e-06, "loss": 0.401, "step": 27548 }, { "epoch": 4.49712256642586, "grad_norm": 3.0011537075042725, "learning_rate": 8.690076742674264e-06, "loss": 0.3853, "step": 27549 }, { "epoch": 4.4972858250683645, "grad_norm": 3.246933698654175, "learning_rate": 8.689441128479134e-06, "loss": 0.3571, "step": 27550 }, { "epoch": 4.497449083710869, "grad_norm": 3.8756370544433594, "learning_rate": 8.68880551967121e-06, "loss": 0.3946, "step": 27551 }, { "epoch": 4.497612342353373, "grad_norm": 3.538379669189453, "learning_rate": 8.688169916253113e-06, "loss": 0.3266, "step": 27552 }, { "epoch": 4.497775600995878, "grad_norm": 4.634031772613525, "learning_rate": 8.687534318227453e-06, "loss": 0.4047, "step": 27553 }, { "epoch": 4.497938859638382, "grad_norm": 3.8423404693603516, "learning_rate": 8.686898725596847e-06, "loss": 0.361, "step": 27554 }, { "epoch": 4.498102118280887, "grad_norm": 3.5415616035461426, "learning_rate": 8.686263138363902e-06, "loss": 0.3132, "step": 27555 }, { "epoch": 4.498265376923391, "grad_norm": 3.265347480773926, "learning_rate": 8.685627556531234e-06, "loss": 0.415, "step": 27556 }, { "epoch": 4.498428635565896, "grad_norm": 3.4408161640167236, "learning_rate": 8.684991980101453e-06, "loss": 0.4153, "step": 27557 }, { "epoch": 4.4985918942084, "grad_norm": 4.355221271514893, "learning_rate": 8.684356409077177e-06, "loss": 0.4569, "step": 27558 }, { "epoch": 4.498755152850904, "grad_norm": 3.3701419830322266, "learning_rate": 8.68372084346101e-06, "loss": 0.3748, "step": 27559 }, { "epoch": 4.498918411493408, "grad_norm": 3.856661558151245, "learning_rate": 8.683085283255577e-06, "loss": 0.4079, "step": 27560 }, { "epoch": 4.499081670135912, "grad_norm": 2.7896265983581543, "learning_rate": 8.682449728463479e-06, "loss": 0.348, "step": 27561 }, { "epoch": 4.499244928778417, "grad_norm": 3.379721164703369, "learning_rate": 8.681814179087333e-06, "loss": 0.4236, "step": 27562 }, { "epoch": 4.499408187420921, "grad_norm": 3.72835111618042, "learning_rate": 8.681178635129748e-06, "loss": 0.428, "step": 27563 }, { "epoch": 4.499571446063426, "grad_norm": 3.6711297035217285, "learning_rate": 8.680543096593344e-06, "loss": 0.3791, "step": 27564 }, { "epoch": 4.49973470470593, "grad_norm": 3.3563828468322754, "learning_rate": 8.679907563480727e-06, "loss": 0.3602, "step": 27565 }, { "epoch": 4.499897963348435, "grad_norm": 4.119307518005371, "learning_rate": 8.679272035794511e-06, "loss": 0.4379, "step": 27566 }, { "epoch": 4.500061221990939, "grad_norm": 2.990424156188965, "learning_rate": 8.678636513537311e-06, "loss": 0.3615, "step": 27567 }, { "epoch": 4.5002244806334435, "grad_norm": 3.067180871963501, "learning_rate": 8.678000996711735e-06, "loss": 0.3265, "step": 27568 }, { "epoch": 4.500387739275948, "grad_norm": 3.5521535873413086, "learning_rate": 8.677365485320399e-06, "loss": 0.3943, "step": 27569 }, { "epoch": 4.500550997918452, "grad_norm": 3.31693959236145, "learning_rate": 8.676729979365918e-06, "loss": 0.327, "step": 27570 }, { "epoch": 4.500714256560957, "grad_norm": 3.218859910964966, "learning_rate": 8.676094478850897e-06, "loss": 0.3589, "step": 27571 }, { "epoch": 4.500877515203461, "grad_norm": 3.564826011657715, "learning_rate": 8.67545898377795e-06, "loss": 0.3633, "step": 27572 }, { "epoch": 4.501040773845966, "grad_norm": 3.930205821990967, "learning_rate": 8.674823494149692e-06, "loss": 0.3296, "step": 27573 }, { "epoch": 4.50120403248847, "grad_norm": 3.487382411956787, "learning_rate": 8.674188009968732e-06, "loss": 0.348, "step": 27574 }, { "epoch": 4.501367291130975, "grad_norm": 3.6186716556549072, "learning_rate": 8.673552531237689e-06, "loss": 0.3548, "step": 27575 }, { "epoch": 4.501530549773479, "grad_norm": 4.125778675079346, "learning_rate": 8.672917057959167e-06, "loss": 0.4428, "step": 27576 }, { "epoch": 4.5016938084159825, "grad_norm": 3.5832700729370117, "learning_rate": 8.672281590135784e-06, "loss": 0.3791, "step": 27577 }, { "epoch": 4.501857067058488, "grad_norm": 3.365204095840454, "learning_rate": 8.671646127770149e-06, "loss": 0.3943, "step": 27578 }, { "epoch": 4.502020325700991, "grad_norm": 3.297997236251831, "learning_rate": 8.671010670864876e-06, "loss": 0.3716, "step": 27579 }, { "epoch": 4.502183584343496, "grad_norm": 3.600667715072632, "learning_rate": 8.670375219422578e-06, "loss": 0.4207, "step": 27580 }, { "epoch": 4.502346842986, "grad_norm": 3.5383617877960205, "learning_rate": 8.669739773445865e-06, "loss": 0.379, "step": 27581 }, { "epoch": 4.502510101628505, "grad_norm": 3.660335063934326, "learning_rate": 8.669104332937348e-06, "loss": 0.404, "step": 27582 }, { "epoch": 4.502673360271009, "grad_norm": 3.45416522026062, "learning_rate": 8.668468897899638e-06, "loss": 0.3861, "step": 27583 }, { "epoch": 4.502836618913514, "grad_norm": 3.6367945671081543, "learning_rate": 8.667833468335354e-06, "loss": 0.4069, "step": 27584 }, { "epoch": 4.502999877556018, "grad_norm": 2.8380184173583984, "learning_rate": 8.667198044247102e-06, "loss": 0.3292, "step": 27585 }, { "epoch": 4.5031631361985225, "grad_norm": 4.268363952636719, "learning_rate": 8.666562625637492e-06, "loss": 0.4724, "step": 27586 }, { "epoch": 4.503326394841027, "grad_norm": 3.168704032897949, "learning_rate": 8.665927212509143e-06, "loss": 0.3571, "step": 27587 }, { "epoch": 4.503489653483531, "grad_norm": 3.3711884021759033, "learning_rate": 8.665291804864664e-06, "loss": 0.3269, "step": 27588 }, { "epoch": 4.503652912126036, "grad_norm": 4.257577419281006, "learning_rate": 8.66465640270667e-06, "loss": 0.435, "step": 27589 }, { "epoch": 4.50381617076854, "grad_norm": 3.0575263500213623, "learning_rate": 8.664021006037762e-06, "loss": 0.3355, "step": 27590 }, { "epoch": 4.503979429411045, "grad_norm": 3.41996431350708, "learning_rate": 8.663385614860562e-06, "loss": 0.3384, "step": 27591 }, { "epoch": 4.504142688053549, "grad_norm": 3.7631402015686035, "learning_rate": 8.66275022917768e-06, "loss": 0.4033, "step": 27592 }, { "epoch": 4.5043059466960536, "grad_norm": 3.0375514030456543, "learning_rate": 8.662114848991726e-06, "loss": 0.3337, "step": 27593 }, { "epoch": 4.504469205338558, "grad_norm": 3.313687324523926, "learning_rate": 8.661479474305314e-06, "loss": 0.3748, "step": 27594 }, { "epoch": 4.504632463981062, "grad_norm": 3.1124298572540283, "learning_rate": 8.660844105121052e-06, "loss": 0.3609, "step": 27595 }, { "epoch": 4.504795722623566, "grad_norm": 3.9351329803466797, "learning_rate": 8.660208741441557e-06, "loss": 0.4128, "step": 27596 }, { "epoch": 4.50495898126607, "grad_norm": 4.186864376068115, "learning_rate": 8.659573383269432e-06, "loss": 0.4104, "step": 27597 }, { "epoch": 4.505122239908575, "grad_norm": 2.5440328121185303, "learning_rate": 8.6589380306073e-06, "loss": 0.3024, "step": 27598 }, { "epoch": 4.505285498551079, "grad_norm": 4.005069732666016, "learning_rate": 8.658302683457772e-06, "loss": 0.396, "step": 27599 }, { "epoch": 4.505448757193584, "grad_norm": 3.749230146408081, "learning_rate": 8.657667341823449e-06, "loss": 0.4061, "step": 27600 }, { "epoch": 4.505612015836088, "grad_norm": 3.443143844604492, "learning_rate": 8.657032005706947e-06, "loss": 0.4291, "step": 27601 }, { "epoch": 4.505775274478593, "grad_norm": 3.982053518295288, "learning_rate": 8.656396675110881e-06, "loss": 0.4539, "step": 27602 }, { "epoch": 4.505938533121097, "grad_norm": 3.1544954776763916, "learning_rate": 8.655761350037862e-06, "loss": 0.3578, "step": 27603 }, { "epoch": 4.5061017917636015, "grad_norm": 3.135328769683838, "learning_rate": 8.6551260304905e-06, "loss": 0.3581, "step": 27604 }, { "epoch": 4.506265050406106, "grad_norm": 3.3687326908111572, "learning_rate": 8.654490716471408e-06, "loss": 0.3902, "step": 27605 }, { "epoch": 4.50642830904861, "grad_norm": 3.848912477493286, "learning_rate": 8.653855407983194e-06, "loss": 0.4111, "step": 27606 }, { "epoch": 4.506591567691115, "grad_norm": 3.5784761905670166, "learning_rate": 8.653220105028476e-06, "loss": 0.3588, "step": 27607 }, { "epoch": 4.506754826333619, "grad_norm": 3.4547626972198486, "learning_rate": 8.652584807609856e-06, "loss": 0.3369, "step": 27608 }, { "epoch": 4.506918084976124, "grad_norm": 4.06155252456665, "learning_rate": 8.651949515729959e-06, "loss": 0.4822, "step": 27609 }, { "epoch": 4.507081343618628, "grad_norm": 3.6504275798797607, "learning_rate": 8.651314229391384e-06, "loss": 0.3554, "step": 27610 }, { "epoch": 4.5072446022611325, "grad_norm": 4.232635498046875, "learning_rate": 8.650678948596746e-06, "loss": 0.4213, "step": 27611 }, { "epoch": 4.507407860903637, "grad_norm": 3.4566612243652344, "learning_rate": 8.65004367334866e-06, "loss": 0.3969, "step": 27612 }, { "epoch": 4.5075711195461405, "grad_norm": 3.896512746810913, "learning_rate": 8.64940840364973e-06, "loss": 0.395, "step": 27613 }, { "epoch": 4.507734378188645, "grad_norm": 3.164278507232666, "learning_rate": 8.648773139502576e-06, "loss": 0.3513, "step": 27614 }, { "epoch": 4.507897636831149, "grad_norm": 3.1735360622406006, "learning_rate": 8.648137880909805e-06, "loss": 0.399, "step": 27615 }, { "epoch": 4.508060895473654, "grad_norm": 3.9351391792297363, "learning_rate": 8.647502627874028e-06, "loss": 0.4201, "step": 27616 }, { "epoch": 4.508224154116158, "grad_norm": 3.314642906188965, "learning_rate": 8.646867380397858e-06, "loss": 0.3639, "step": 27617 }, { "epoch": 4.508387412758663, "grad_norm": 3.8257718086242676, "learning_rate": 8.646232138483903e-06, "loss": 0.3887, "step": 27618 }, { "epoch": 4.508550671401167, "grad_norm": 3.879000425338745, "learning_rate": 8.645596902134781e-06, "loss": 0.3853, "step": 27619 }, { "epoch": 4.508713930043672, "grad_norm": 3.4634649753570557, "learning_rate": 8.644961671353096e-06, "loss": 0.3847, "step": 27620 }, { "epoch": 4.508877188686176, "grad_norm": 3.6282105445861816, "learning_rate": 8.644326446141463e-06, "loss": 0.4362, "step": 27621 }, { "epoch": 4.5090404473286805, "grad_norm": 2.892829418182373, "learning_rate": 8.64369122650249e-06, "loss": 0.3365, "step": 27622 }, { "epoch": 4.509203705971185, "grad_norm": 3.4966838359832764, "learning_rate": 8.64305601243879e-06, "loss": 0.4395, "step": 27623 }, { "epoch": 4.509366964613689, "grad_norm": 3.655571937561035, "learning_rate": 8.642420803952974e-06, "loss": 0.3683, "step": 27624 }, { "epoch": 4.509530223256194, "grad_norm": 3.3191401958465576, "learning_rate": 8.641785601047654e-06, "loss": 0.3442, "step": 27625 }, { "epoch": 4.509693481898698, "grad_norm": 3.4361371994018555, "learning_rate": 8.641150403725442e-06, "loss": 0.3522, "step": 27626 }, { "epoch": 4.509856740541203, "grad_norm": 2.7920806407928467, "learning_rate": 8.640515211988947e-06, "loss": 0.3381, "step": 27627 }, { "epoch": 4.510019999183707, "grad_norm": 3.613295078277588, "learning_rate": 8.639880025840782e-06, "loss": 0.3639, "step": 27628 }, { "epoch": 4.5101832578262115, "grad_norm": 3.0262625217437744, "learning_rate": 8.639244845283557e-06, "loss": 0.3641, "step": 27629 }, { "epoch": 4.510346516468715, "grad_norm": 3.488455057144165, "learning_rate": 8.638609670319879e-06, "loss": 0.4937, "step": 27630 }, { "epoch": 4.51050977511122, "grad_norm": 3.306227922439575, "learning_rate": 8.637974500952366e-06, "loss": 0.3651, "step": 27631 }, { "epoch": 4.510673033753724, "grad_norm": 2.8686628341674805, "learning_rate": 8.637339337183624e-06, "loss": 0.3083, "step": 27632 }, { "epoch": 4.510836292396228, "grad_norm": 3.44624662399292, "learning_rate": 8.636704179016265e-06, "loss": 0.3703, "step": 27633 }, { "epoch": 4.510999551038733, "grad_norm": 3.321424722671509, "learning_rate": 8.636069026452902e-06, "loss": 0.366, "step": 27634 }, { "epoch": 4.511162809681237, "grad_norm": 4.608186721801758, "learning_rate": 8.63543387949614e-06, "loss": 0.93, "step": 27635 }, { "epoch": 4.511326068323742, "grad_norm": 5.816647529602051, "learning_rate": 8.634798738148597e-06, "loss": 0.4678, "step": 27636 }, { "epoch": 4.511489326966246, "grad_norm": 3.9079015254974365, "learning_rate": 8.634163602412881e-06, "loss": 0.3733, "step": 27637 }, { "epoch": 4.511652585608751, "grad_norm": 3.7444570064544678, "learning_rate": 8.633528472291607e-06, "loss": 0.4623, "step": 27638 }, { "epoch": 4.511815844251255, "grad_norm": 2.6942732334136963, "learning_rate": 8.632893347787375e-06, "loss": 0.3106, "step": 27639 }, { "epoch": 4.5119791028937595, "grad_norm": 3.4032907485961914, "learning_rate": 8.632258228902804e-06, "loss": 0.342, "step": 27640 }, { "epoch": 4.512142361536264, "grad_norm": 3.422132968902588, "learning_rate": 8.631623115640504e-06, "loss": 0.3396, "step": 27641 }, { "epoch": 4.512305620178768, "grad_norm": 3.430635690689087, "learning_rate": 8.630988008003084e-06, "loss": 0.3614, "step": 27642 }, { "epoch": 4.512468878821273, "grad_norm": 3.683887243270874, "learning_rate": 8.630352905993156e-06, "loss": 0.3451, "step": 27643 }, { "epoch": 4.512632137463777, "grad_norm": 3.466824769973755, "learning_rate": 8.62971780961333e-06, "loss": 0.37, "step": 27644 }, { "epoch": 4.512795396106282, "grad_norm": 3.2103233337402344, "learning_rate": 8.629082718866216e-06, "loss": 0.3522, "step": 27645 }, { "epoch": 4.512958654748786, "grad_norm": 3.5790867805480957, "learning_rate": 8.628447633754424e-06, "loss": 0.3993, "step": 27646 }, { "epoch": 4.51312191339129, "grad_norm": 3.310663938522339, "learning_rate": 8.627812554280566e-06, "loss": 0.3427, "step": 27647 }, { "epoch": 4.513285172033795, "grad_norm": 3.58431339263916, "learning_rate": 8.627177480447259e-06, "loss": 0.384, "step": 27648 }, { "epoch": 4.5134484306762985, "grad_norm": 2.8483901023864746, "learning_rate": 8.626542412257101e-06, "loss": 0.3196, "step": 27649 }, { "epoch": 4.513611689318803, "grad_norm": 2.9606873989105225, "learning_rate": 8.62590734971271e-06, "loss": 0.3711, "step": 27650 }, { "epoch": 4.513774947961307, "grad_norm": 3.632178544998169, "learning_rate": 8.625272292816691e-06, "loss": 0.4682, "step": 27651 }, { "epoch": 4.513938206603812, "grad_norm": 3.3466098308563232, "learning_rate": 8.624637241571664e-06, "loss": 0.3894, "step": 27652 }, { "epoch": 4.514101465246316, "grad_norm": 3.6372694969177246, "learning_rate": 8.62400219598023e-06, "loss": 0.4236, "step": 27653 }, { "epoch": 4.514264723888821, "grad_norm": 3.242143154144287, "learning_rate": 8.623367156045006e-06, "loss": 0.3679, "step": 27654 }, { "epoch": 4.514427982531325, "grad_norm": 3.4761674404144287, "learning_rate": 8.622732121768598e-06, "loss": 0.387, "step": 27655 }, { "epoch": 4.51459124117383, "grad_norm": 3.1664865016937256, "learning_rate": 8.62209709315362e-06, "loss": 0.3637, "step": 27656 }, { "epoch": 4.514754499816334, "grad_norm": 4.674722194671631, "learning_rate": 8.62146207020268e-06, "loss": 0.5002, "step": 27657 }, { "epoch": 4.5149177584588385, "grad_norm": 3.562021017074585, "learning_rate": 8.620827052918391e-06, "loss": 0.3681, "step": 27658 }, { "epoch": 4.515081017101343, "grad_norm": 3.7695424556732178, "learning_rate": 8.620192041303358e-06, "loss": 0.3951, "step": 27659 }, { "epoch": 4.515244275743847, "grad_norm": 3.018429756164551, "learning_rate": 8.619557035360197e-06, "loss": 0.3607, "step": 27660 }, { "epoch": 4.515407534386352, "grad_norm": 3.317967414855957, "learning_rate": 8.618922035091514e-06, "loss": 0.3921, "step": 27661 }, { "epoch": 4.515570793028856, "grad_norm": 3.253460168838501, "learning_rate": 8.61828704049992e-06, "loss": 0.315, "step": 27662 }, { "epoch": 4.515734051671361, "grad_norm": 3.2635226249694824, "learning_rate": 8.617652051588026e-06, "loss": 0.3548, "step": 27663 }, { "epoch": 4.515897310313865, "grad_norm": 4.339657783508301, "learning_rate": 8.617017068358444e-06, "loss": 0.4458, "step": 27664 }, { "epoch": 4.5160605689563695, "grad_norm": 3.2026708126068115, "learning_rate": 8.616382090813784e-06, "loss": 0.3341, "step": 27665 }, { "epoch": 4.516223827598873, "grad_norm": 3.6066150665283203, "learning_rate": 8.615747118956653e-06, "loss": 0.3531, "step": 27666 }, { "epoch": 4.5163870862413775, "grad_norm": 3.2761216163635254, "learning_rate": 8.615112152789662e-06, "loss": 0.3311, "step": 27667 }, { "epoch": 4.516550344883882, "grad_norm": 2.82867431640625, "learning_rate": 8.614477192315426e-06, "loss": 0.3715, "step": 27668 }, { "epoch": 4.516713603526386, "grad_norm": 4.442132472991943, "learning_rate": 8.61384223753655e-06, "loss": 0.3901, "step": 27669 }, { "epoch": 4.516876862168891, "grad_norm": 3.344526767730713, "learning_rate": 8.613207288455642e-06, "loss": 0.3435, "step": 27670 }, { "epoch": 4.517040120811395, "grad_norm": 3.4034552574157715, "learning_rate": 8.612572345075317e-06, "loss": 0.3296, "step": 27671 }, { "epoch": 4.5172033794539, "grad_norm": 2.869468927383423, "learning_rate": 8.611937407398183e-06, "loss": 0.3108, "step": 27672 }, { "epoch": 4.517366638096404, "grad_norm": 3.3771848678588867, "learning_rate": 8.611302475426851e-06, "loss": 0.3285, "step": 27673 }, { "epoch": 4.517529896738909, "grad_norm": 2.6702117919921875, "learning_rate": 8.610667549163927e-06, "loss": 0.3233, "step": 27674 }, { "epoch": 4.517693155381413, "grad_norm": 3.4964301586151123, "learning_rate": 8.610032628612027e-06, "loss": 0.3733, "step": 27675 }, { "epoch": 4.5178564140239175, "grad_norm": 3.7200708389282227, "learning_rate": 8.609397713773756e-06, "loss": 0.41, "step": 27676 }, { "epoch": 4.518019672666422, "grad_norm": 3.0653257369995117, "learning_rate": 8.608762804651727e-06, "loss": 0.3377, "step": 27677 }, { "epoch": 4.518182931308926, "grad_norm": 4.040088176727295, "learning_rate": 8.608127901248552e-06, "loss": 0.4294, "step": 27678 }, { "epoch": 4.518346189951431, "grad_norm": 3.280928134918213, "learning_rate": 8.607493003566835e-06, "loss": 0.331, "step": 27679 }, { "epoch": 4.518509448593935, "grad_norm": 3.8706142902374268, "learning_rate": 8.606858111609189e-06, "loss": 0.3946, "step": 27680 }, { "epoch": 4.51867270723644, "grad_norm": 3.654080867767334, "learning_rate": 8.606223225378222e-06, "loss": 0.4297, "step": 27681 }, { "epoch": 4.518835965878944, "grad_norm": 2.6457624435424805, "learning_rate": 8.605588344876546e-06, "loss": 0.3248, "step": 27682 }, { "epoch": 4.518999224521448, "grad_norm": 3.9454355239868164, "learning_rate": 8.604953470106768e-06, "loss": 0.3546, "step": 27683 }, { "epoch": 4.519162483163953, "grad_norm": 3.5130808353424072, "learning_rate": 8.604318601071502e-06, "loss": 0.3536, "step": 27684 }, { "epoch": 4.5193257418064565, "grad_norm": 2.7019646167755127, "learning_rate": 8.603683737773351e-06, "loss": 0.3038, "step": 27685 }, { "epoch": 4.519489000448961, "grad_norm": 3.4951932430267334, "learning_rate": 8.603048880214933e-06, "loss": 0.4103, "step": 27686 }, { "epoch": 4.519652259091465, "grad_norm": 3.6484806537628174, "learning_rate": 8.602414028398857e-06, "loss": 0.3892, "step": 27687 }, { "epoch": 4.51981551773397, "grad_norm": 2.8891968727111816, "learning_rate": 8.601779182327723e-06, "loss": 0.3358, "step": 27688 }, { "epoch": 4.519978776376474, "grad_norm": 3.5162785053253174, "learning_rate": 8.601144342004147e-06, "loss": 0.3712, "step": 27689 }, { "epoch": 4.520142035018979, "grad_norm": 2.907529830932617, "learning_rate": 8.600509507430742e-06, "loss": 0.3413, "step": 27690 }, { "epoch": 4.520305293661483, "grad_norm": 3.4293313026428223, "learning_rate": 8.599874678610111e-06, "loss": 0.3741, "step": 27691 }, { "epoch": 4.520468552303988, "grad_norm": 3.45019793510437, "learning_rate": 8.599239855544867e-06, "loss": 0.3156, "step": 27692 }, { "epoch": 4.520631810946492, "grad_norm": 3.4484493732452393, "learning_rate": 8.59860503823762e-06, "loss": 0.3601, "step": 27693 }, { "epoch": 4.5207950695889965, "grad_norm": 3.838711738586426, "learning_rate": 8.597970226690978e-06, "loss": 0.4159, "step": 27694 }, { "epoch": 4.520958328231501, "grad_norm": 3.1396453380584717, "learning_rate": 8.597335420907552e-06, "loss": 0.329, "step": 27695 }, { "epoch": 4.521121586874005, "grad_norm": 3.7905125617980957, "learning_rate": 8.596700620889948e-06, "loss": 0.3504, "step": 27696 }, { "epoch": 4.52128484551651, "grad_norm": 4.04721212387085, "learning_rate": 8.596065826640783e-06, "loss": 0.3645, "step": 27697 }, { "epoch": 4.521448104159014, "grad_norm": 4.338252067565918, "learning_rate": 8.595431038162657e-06, "loss": 0.4398, "step": 27698 }, { "epoch": 4.521611362801519, "grad_norm": 3.5409607887268066, "learning_rate": 8.594796255458185e-06, "loss": 0.3725, "step": 27699 }, { "epoch": 4.521774621444022, "grad_norm": 4.76909875869751, "learning_rate": 8.594161478529974e-06, "loss": 0.4436, "step": 27700 }, { "epoch": 4.5219378800865275, "grad_norm": 4.203300952911377, "learning_rate": 8.593526707380632e-06, "loss": 0.4576, "step": 27701 }, { "epoch": 4.522101138729031, "grad_norm": 4.114466667175293, "learning_rate": 8.592891942012773e-06, "loss": 0.4804, "step": 27702 }, { "epoch": 4.5222643973715355, "grad_norm": 3.7893240451812744, "learning_rate": 8.592257182429004e-06, "loss": 0.325, "step": 27703 }, { "epoch": 4.52242765601404, "grad_norm": 2.799647092819214, "learning_rate": 8.591622428631935e-06, "loss": 0.3351, "step": 27704 }, { "epoch": 4.522590914656544, "grad_norm": 4.192892074584961, "learning_rate": 8.590987680624174e-06, "loss": 0.4293, "step": 27705 }, { "epoch": 4.522754173299049, "grad_norm": 4.171919822692871, "learning_rate": 8.590352938408331e-06, "loss": 0.422, "step": 27706 }, { "epoch": 4.522917431941553, "grad_norm": 2.752368688583374, "learning_rate": 8.589718201987017e-06, "loss": 0.2849, "step": 27707 }, { "epoch": 4.523080690584058, "grad_norm": 4.153748512268066, "learning_rate": 8.589083471362835e-06, "loss": 0.4164, "step": 27708 }, { "epoch": 4.523243949226562, "grad_norm": 3.852226972579956, "learning_rate": 8.588448746538399e-06, "loss": 0.4019, "step": 27709 }, { "epoch": 4.523407207869067, "grad_norm": 3.9676129817962646, "learning_rate": 8.587814027516317e-06, "loss": 0.3998, "step": 27710 }, { "epoch": 4.523570466511571, "grad_norm": 4.056789398193359, "learning_rate": 8.587179314299199e-06, "loss": 0.3887, "step": 27711 }, { "epoch": 4.5237337251540755, "grad_norm": 3.612234354019165, "learning_rate": 8.58654460688965e-06, "loss": 0.3843, "step": 27712 }, { "epoch": 4.52389698379658, "grad_norm": 3.1103875637054443, "learning_rate": 8.585909905290286e-06, "loss": 0.3239, "step": 27713 }, { "epoch": 4.524060242439084, "grad_norm": 3.059769868850708, "learning_rate": 8.585275209503714e-06, "loss": 0.3109, "step": 27714 }, { "epoch": 4.524223501081589, "grad_norm": 4.284665107727051, "learning_rate": 8.584640519532537e-06, "loss": 0.4702, "step": 27715 }, { "epoch": 4.524386759724093, "grad_norm": 3.594045400619507, "learning_rate": 8.584005835379371e-06, "loss": 0.3465, "step": 27716 }, { "epoch": 4.524550018366598, "grad_norm": 3.9287898540496826, "learning_rate": 8.583371157046824e-06, "loss": 0.3855, "step": 27717 }, { "epoch": 4.524713277009102, "grad_norm": 3.8771419525146484, "learning_rate": 8.582736484537503e-06, "loss": 0.4094, "step": 27718 }, { "epoch": 4.524876535651606, "grad_norm": 3.90893292427063, "learning_rate": 8.582101817854016e-06, "loss": 0.4513, "step": 27719 }, { "epoch": 4.52503979429411, "grad_norm": 3.5549943447113037, "learning_rate": 8.581467156998972e-06, "loss": 0.3848, "step": 27720 }, { "epoch": 4.5252030529366145, "grad_norm": 3.639090061187744, "learning_rate": 8.58083250197498e-06, "loss": 0.3992, "step": 27721 }, { "epoch": 4.525366311579119, "grad_norm": 3.9127566814422607, "learning_rate": 8.580197852784651e-06, "loss": 0.3231, "step": 27722 }, { "epoch": 4.525529570221623, "grad_norm": 3.683577299118042, "learning_rate": 8.57956320943059e-06, "loss": 0.3894, "step": 27723 }, { "epoch": 4.525692828864128, "grad_norm": 3.901484966278076, "learning_rate": 8.578928571915413e-06, "loss": 0.4523, "step": 27724 }, { "epoch": 4.525856087506632, "grad_norm": 3.7747902870178223, "learning_rate": 8.578293940241721e-06, "loss": 0.3673, "step": 27725 }, { "epoch": 4.526019346149137, "grad_norm": 3.797898292541504, "learning_rate": 8.577659314412132e-06, "loss": 0.3768, "step": 27726 }, { "epoch": 4.526182604791641, "grad_norm": 3.4212687015533447, "learning_rate": 8.577024694429243e-06, "loss": 0.3983, "step": 27727 }, { "epoch": 4.526345863434146, "grad_norm": 4.218554496765137, "learning_rate": 8.576390080295666e-06, "loss": 0.4007, "step": 27728 }, { "epoch": 4.52650912207665, "grad_norm": 3.0349392890930176, "learning_rate": 8.575755472014015e-06, "loss": 0.3235, "step": 27729 }, { "epoch": 4.5266723807191545, "grad_norm": 3.895994186401367, "learning_rate": 8.575120869586895e-06, "loss": 0.3814, "step": 27730 }, { "epoch": 4.526835639361659, "grad_norm": 3.7038626670837402, "learning_rate": 8.574486273016914e-06, "loss": 0.4442, "step": 27731 }, { "epoch": 4.526998898004163, "grad_norm": 4.4307708740234375, "learning_rate": 8.573851682306683e-06, "loss": 0.4814, "step": 27732 }, { "epoch": 4.527162156646668, "grad_norm": 2.9516358375549316, "learning_rate": 8.573217097458809e-06, "loss": 0.3379, "step": 27733 }, { "epoch": 4.527325415289172, "grad_norm": 3.376591920852661, "learning_rate": 8.5725825184759e-06, "loss": 0.3717, "step": 27734 }, { "epoch": 4.527488673931677, "grad_norm": 3.1526801586151123, "learning_rate": 8.571947945360564e-06, "loss": 0.3585, "step": 27735 }, { "epoch": 4.52765193257418, "grad_norm": 2.947392463684082, "learning_rate": 8.571313378115417e-06, "loss": 0.343, "step": 27736 }, { "epoch": 4.5278151912166855, "grad_norm": 3.495331287384033, "learning_rate": 8.570678816743056e-06, "loss": 0.3328, "step": 27737 }, { "epoch": 4.527978449859189, "grad_norm": 3.1616196632385254, "learning_rate": 8.570044261246096e-06, "loss": 0.3568, "step": 27738 }, { "epoch": 4.5281417085016935, "grad_norm": 3.102630853652954, "learning_rate": 8.56940971162714e-06, "loss": 0.351, "step": 27739 }, { "epoch": 4.528304967144198, "grad_norm": 4.076285362243652, "learning_rate": 8.568775167888806e-06, "loss": 0.4171, "step": 27740 }, { "epoch": 4.528468225786702, "grad_norm": 3.603531837463379, "learning_rate": 8.568140630033694e-06, "loss": 0.4092, "step": 27741 }, { "epoch": 4.528631484429207, "grad_norm": 3.606292247772217, "learning_rate": 8.567506098064418e-06, "loss": 0.3977, "step": 27742 }, { "epoch": 4.528794743071711, "grad_norm": 3.472607135772705, "learning_rate": 8.566871571983582e-06, "loss": 0.3486, "step": 27743 }, { "epoch": 4.528958001714216, "grad_norm": 3.693995475769043, "learning_rate": 8.566237051793794e-06, "loss": 0.4118, "step": 27744 }, { "epoch": 4.52912126035672, "grad_norm": 3.4971323013305664, "learning_rate": 8.565602537497666e-06, "loss": 0.3708, "step": 27745 }, { "epoch": 4.529284518999225, "grad_norm": 3.4717373847961426, "learning_rate": 8.564968029097807e-06, "loss": 0.3669, "step": 27746 }, { "epoch": 4.529447777641729, "grad_norm": 4.444606781005859, "learning_rate": 8.564333526596818e-06, "loss": 0.4845, "step": 27747 }, { "epoch": 4.5296110362842334, "grad_norm": 4.320145606994629, "learning_rate": 8.563699029997314e-06, "loss": 0.4219, "step": 27748 }, { "epoch": 4.529774294926738, "grad_norm": 3.2434840202331543, "learning_rate": 8.5630645393019e-06, "loss": 0.3829, "step": 27749 }, { "epoch": 4.529937553569242, "grad_norm": 4.078998565673828, "learning_rate": 8.562430054513184e-06, "loss": 0.4375, "step": 27750 }, { "epoch": 4.530100812211747, "grad_norm": 3.2913455963134766, "learning_rate": 8.561795575633777e-06, "loss": 0.351, "step": 27751 }, { "epoch": 4.530264070854251, "grad_norm": 3.28534197807312, "learning_rate": 8.561161102666284e-06, "loss": 0.3542, "step": 27752 }, { "epoch": 4.530427329496755, "grad_norm": 5.26440954208374, "learning_rate": 8.560526635613315e-06, "loss": 0.3936, "step": 27753 }, { "epoch": 4.53059058813926, "grad_norm": 3.4652748107910156, "learning_rate": 8.559892174477478e-06, "loss": 0.3766, "step": 27754 }, { "epoch": 4.530753846781764, "grad_norm": 3.328603744506836, "learning_rate": 8.559257719261382e-06, "loss": 0.3638, "step": 27755 }, { "epoch": 4.530917105424268, "grad_norm": 3.3033580780029297, "learning_rate": 8.558623269967633e-06, "loss": 0.3935, "step": 27756 }, { "epoch": 4.5310803640667725, "grad_norm": 3.5998878479003906, "learning_rate": 8.557988826598837e-06, "loss": 0.3889, "step": 27757 }, { "epoch": 4.531243622709277, "grad_norm": 3.324274778366089, "learning_rate": 8.557354389157607e-06, "loss": 0.3377, "step": 27758 }, { "epoch": 4.531406881351781, "grad_norm": 3.4061119556427, "learning_rate": 8.556719957646547e-06, "loss": 0.3605, "step": 27759 }, { "epoch": 4.531570139994286, "grad_norm": 4.39011812210083, "learning_rate": 8.556085532068267e-06, "loss": 0.4406, "step": 27760 }, { "epoch": 4.53173339863679, "grad_norm": 3.923396348953247, "learning_rate": 8.555451112425373e-06, "loss": 0.4056, "step": 27761 }, { "epoch": 4.531896657279295, "grad_norm": 3.471649408340454, "learning_rate": 8.554816698720474e-06, "loss": 0.3929, "step": 27762 }, { "epoch": 4.532059915921799, "grad_norm": 3.887117385864258, "learning_rate": 8.554182290956177e-06, "loss": 0.4183, "step": 27763 }, { "epoch": 4.532223174564304, "grad_norm": 3.4587223529815674, "learning_rate": 8.553547889135091e-06, "loss": 0.3782, "step": 27764 }, { "epoch": 4.532386433206808, "grad_norm": 3.3981852531433105, "learning_rate": 8.552913493259826e-06, "loss": 0.3326, "step": 27765 }, { "epoch": 4.5325496918493124, "grad_norm": 4.255335807800293, "learning_rate": 8.552279103332989e-06, "loss": 0.3672, "step": 27766 }, { "epoch": 4.532712950491817, "grad_norm": 3.188631534576416, "learning_rate": 8.551644719357182e-06, "loss": 0.3731, "step": 27767 }, { "epoch": 4.532876209134321, "grad_norm": 3.5769429206848145, "learning_rate": 8.551010341335017e-06, "loss": 0.4231, "step": 27768 }, { "epoch": 4.533039467776826, "grad_norm": 3.3791210651397705, "learning_rate": 8.5503759692691e-06, "loss": 0.3323, "step": 27769 }, { "epoch": 4.53320272641933, "grad_norm": 3.5571706295013428, "learning_rate": 8.549741603162042e-06, "loss": 0.3352, "step": 27770 }, { "epoch": 4.533365985061835, "grad_norm": 2.933075428009033, "learning_rate": 8.54910724301645e-06, "loss": 0.3851, "step": 27771 }, { "epoch": 4.533529243704338, "grad_norm": 3.1802940368652344, "learning_rate": 8.54847288883493e-06, "loss": 0.3038, "step": 27772 }, { "epoch": 4.533692502346843, "grad_norm": 3.099639415740967, "learning_rate": 8.547838540620085e-06, "loss": 0.3224, "step": 27773 }, { "epoch": 4.533855760989347, "grad_norm": 3.716216802597046, "learning_rate": 8.547204198374531e-06, "loss": 0.4173, "step": 27774 }, { "epoch": 4.5340190196318515, "grad_norm": 3.5400075912475586, "learning_rate": 8.546569862100876e-06, "loss": 0.3312, "step": 27775 }, { "epoch": 4.534182278274356, "grad_norm": 3.515981674194336, "learning_rate": 8.54593553180172e-06, "loss": 0.3469, "step": 27776 }, { "epoch": 4.53434553691686, "grad_norm": 4.577414035797119, "learning_rate": 8.545301207479672e-06, "loss": 0.3102, "step": 27777 }, { "epoch": 4.534508795559365, "grad_norm": 3.1797804832458496, "learning_rate": 8.544666889137342e-06, "loss": 0.3716, "step": 27778 }, { "epoch": 4.534672054201869, "grad_norm": 4.073611259460449, "learning_rate": 8.544032576777339e-06, "loss": 0.3513, "step": 27779 }, { "epoch": 4.534835312844374, "grad_norm": 3.3651552200317383, "learning_rate": 8.543398270402266e-06, "loss": 0.4366, "step": 27780 }, { "epoch": 4.534998571486878, "grad_norm": 3.714388132095337, "learning_rate": 8.542763970014734e-06, "loss": 0.4087, "step": 27781 }, { "epoch": 4.535161830129383, "grad_norm": 3.34613299369812, "learning_rate": 8.542129675617348e-06, "loss": 0.3566, "step": 27782 }, { "epoch": 4.535325088771887, "grad_norm": 3.7757575511932373, "learning_rate": 8.541495387212719e-06, "loss": 0.3416, "step": 27783 }, { "epoch": 4.535488347414391, "grad_norm": 3.612058162689209, "learning_rate": 8.540861104803449e-06, "loss": 0.3449, "step": 27784 }, { "epoch": 4.535651606056896, "grad_norm": 3.338473320007324, "learning_rate": 8.540226828392152e-06, "loss": 0.3183, "step": 27785 }, { "epoch": 4.5358148646994, "grad_norm": 3.3903043270111084, "learning_rate": 8.53959255798143e-06, "loss": 0.3974, "step": 27786 }, { "epoch": 4.535978123341905, "grad_norm": 2.993967056274414, "learning_rate": 8.538958293573889e-06, "loss": 0.3443, "step": 27787 }, { "epoch": 4.536141381984409, "grad_norm": 3.6789026260375977, "learning_rate": 8.538324035172138e-06, "loss": 0.3714, "step": 27788 }, { "epoch": 4.536304640626913, "grad_norm": 3.420685291290283, "learning_rate": 8.537689782778786e-06, "loss": 0.3647, "step": 27789 }, { "epoch": 4.536467899269418, "grad_norm": 3.3979859352111816, "learning_rate": 8.53705553639644e-06, "loss": 0.4106, "step": 27790 }, { "epoch": 4.536631157911922, "grad_norm": 3.6966946125030518, "learning_rate": 8.536421296027706e-06, "loss": 0.3724, "step": 27791 }, { "epoch": 4.536794416554426, "grad_norm": 3.4532413482666016, "learning_rate": 8.535787061675191e-06, "loss": 0.3993, "step": 27792 }, { "epoch": 4.5369576751969305, "grad_norm": 3.445585250854492, "learning_rate": 8.535152833341503e-06, "loss": 0.4217, "step": 27793 }, { "epoch": 4.537120933839435, "grad_norm": 3.981377363204956, "learning_rate": 8.534518611029248e-06, "loss": 0.4306, "step": 27794 }, { "epoch": 4.537284192481939, "grad_norm": 3.123436450958252, "learning_rate": 8.533884394741037e-06, "loss": 0.3465, "step": 27795 }, { "epoch": 4.537447451124444, "grad_norm": 3.55957293510437, "learning_rate": 8.533250184479471e-06, "loss": 0.4543, "step": 27796 }, { "epoch": 4.537610709766948, "grad_norm": 3.7959482669830322, "learning_rate": 8.53261598024716e-06, "loss": 0.4069, "step": 27797 }, { "epoch": 4.537773968409453, "grad_norm": 2.8575167655944824, "learning_rate": 8.53198178204671e-06, "loss": 0.3388, "step": 27798 }, { "epoch": 4.537937227051957, "grad_norm": 3.233355760574341, "learning_rate": 8.53134758988073e-06, "loss": 0.3476, "step": 27799 }, { "epoch": 4.538100485694462, "grad_norm": 3.737367630004883, "learning_rate": 8.530713403751822e-06, "loss": 0.408, "step": 27800 }, { "epoch": 4.538263744336966, "grad_norm": 3.3943593502044678, "learning_rate": 8.530079223662598e-06, "loss": 0.3577, "step": 27801 }, { "epoch": 4.53842700297947, "grad_norm": 3.4524943828582764, "learning_rate": 8.529445049615663e-06, "loss": 0.3784, "step": 27802 }, { "epoch": 4.538590261621975, "grad_norm": 4.067430019378662, "learning_rate": 8.528810881613626e-06, "loss": 0.3846, "step": 27803 }, { "epoch": 4.538753520264479, "grad_norm": 2.673258066177368, "learning_rate": 8.52817671965909e-06, "loss": 0.3083, "step": 27804 }, { "epoch": 4.538916778906984, "grad_norm": 3.7542707920074463, "learning_rate": 8.527542563754668e-06, "loss": 0.4007, "step": 27805 }, { "epoch": 4.539080037549487, "grad_norm": 3.1837821006774902, "learning_rate": 8.52690841390296e-06, "loss": 0.3396, "step": 27806 }, { "epoch": 4.539243296191993, "grad_norm": 4.006025314331055, "learning_rate": 8.526274270106573e-06, "loss": 0.4078, "step": 27807 }, { "epoch": 4.539406554834496, "grad_norm": 3.489497423171997, "learning_rate": 8.525640132368118e-06, "loss": 0.3417, "step": 27808 }, { "epoch": 4.539569813477001, "grad_norm": 3.870123863220215, "learning_rate": 8.5250060006902e-06, "loss": 0.3634, "step": 27809 }, { "epoch": 4.539733072119505, "grad_norm": 3.31908917427063, "learning_rate": 8.524371875075426e-06, "loss": 0.3456, "step": 27810 }, { "epoch": 4.5398963307620095, "grad_norm": 5.3594865798950195, "learning_rate": 8.5237377555264e-06, "loss": 0.4437, "step": 27811 }, { "epoch": 4.540059589404514, "grad_norm": 2.961047410964966, "learning_rate": 8.523103642045729e-06, "loss": 0.3048, "step": 27812 }, { "epoch": 4.540222848047018, "grad_norm": 3.296456813812256, "learning_rate": 8.522469534636024e-06, "loss": 0.3507, "step": 27813 }, { "epoch": 4.540386106689523, "grad_norm": 3.2799246311187744, "learning_rate": 8.521835433299893e-06, "loss": 0.3616, "step": 27814 }, { "epoch": 4.540549365332027, "grad_norm": 4.057740211486816, "learning_rate": 8.521201338039934e-06, "loss": 0.3921, "step": 27815 }, { "epoch": 4.540712623974532, "grad_norm": 3.5434153079986572, "learning_rate": 8.520567248858755e-06, "loss": 0.3229, "step": 27816 }, { "epoch": 4.540875882617036, "grad_norm": 4.19362735748291, "learning_rate": 8.519933165758969e-06, "loss": 0.3843, "step": 27817 }, { "epoch": 4.541039141259541, "grad_norm": 3.9442262649536133, "learning_rate": 8.519299088743178e-06, "loss": 0.346, "step": 27818 }, { "epoch": 4.541202399902045, "grad_norm": 3.187326192855835, "learning_rate": 8.51866501781399e-06, "loss": 0.3714, "step": 27819 }, { "epoch": 4.541365658544549, "grad_norm": 3.7753007411956787, "learning_rate": 8.51803095297401e-06, "loss": 0.3439, "step": 27820 }, { "epoch": 4.541528917187054, "grad_norm": 3.4890213012695312, "learning_rate": 8.517396894225845e-06, "loss": 0.3822, "step": 27821 }, { "epoch": 4.541692175829558, "grad_norm": 3.5934829711914062, "learning_rate": 8.516762841572103e-06, "loss": 0.3716, "step": 27822 }, { "epoch": 4.541855434472063, "grad_norm": 4.267745018005371, "learning_rate": 8.516128795015385e-06, "loss": 0.42, "step": 27823 }, { "epoch": 4.542018693114567, "grad_norm": 3.428635597229004, "learning_rate": 8.515494754558308e-06, "loss": 0.3489, "step": 27824 }, { "epoch": 4.542181951757071, "grad_norm": 3.3863344192504883, "learning_rate": 8.514860720203468e-06, "loss": 0.3632, "step": 27825 }, { "epoch": 4.542345210399575, "grad_norm": 3.4862008094787598, "learning_rate": 8.514226691953475e-06, "loss": 0.3311, "step": 27826 }, { "epoch": 4.54250846904208, "grad_norm": 3.9503374099731445, "learning_rate": 8.513592669810931e-06, "loss": 0.3755, "step": 27827 }, { "epoch": 4.542671727684584, "grad_norm": 3.062954902648926, "learning_rate": 8.51295865377845e-06, "loss": 0.3271, "step": 27828 }, { "epoch": 4.5428349863270885, "grad_norm": 3.5832290649414062, "learning_rate": 8.512324643858634e-06, "loss": 0.4239, "step": 27829 }, { "epoch": 4.542998244969593, "grad_norm": 3.760995626449585, "learning_rate": 8.511690640054092e-06, "loss": 0.4191, "step": 27830 }, { "epoch": 4.543161503612097, "grad_norm": 3.1986098289489746, "learning_rate": 8.511056642367424e-06, "loss": 0.3601, "step": 27831 }, { "epoch": 4.543324762254602, "grad_norm": 3.8891260623931885, "learning_rate": 8.510422650801242e-06, "loss": 0.4354, "step": 27832 }, { "epoch": 4.543488020897106, "grad_norm": 3.418574810028076, "learning_rate": 8.50978866535815e-06, "loss": 0.3989, "step": 27833 }, { "epoch": 4.543651279539611, "grad_norm": 3.491903781890869, "learning_rate": 8.509154686040757e-06, "loss": 0.3498, "step": 27834 }, { "epoch": 4.543814538182115, "grad_norm": 3.996016263961792, "learning_rate": 8.508520712851663e-06, "loss": 0.3574, "step": 27835 }, { "epoch": 4.5439777968246196, "grad_norm": 4.1536946296691895, "learning_rate": 8.507886745793476e-06, "loss": 0.4008, "step": 27836 }, { "epoch": 4.544141055467124, "grad_norm": 3.5390100479125977, "learning_rate": 8.507252784868804e-06, "loss": 0.4512, "step": 27837 }, { "epoch": 4.544304314109628, "grad_norm": 3.783951997756958, "learning_rate": 8.506618830080252e-06, "loss": 0.4137, "step": 27838 }, { "epoch": 4.544467572752133, "grad_norm": 3.9912028312683105, "learning_rate": 8.505984881430424e-06, "loss": 0.4046, "step": 27839 }, { "epoch": 4.544630831394637, "grad_norm": 3.6230413913726807, "learning_rate": 8.50535093892193e-06, "loss": 0.3949, "step": 27840 }, { "epoch": 4.544794090037142, "grad_norm": 3.4489006996154785, "learning_rate": 8.504717002557375e-06, "loss": 0.3536, "step": 27841 }, { "epoch": 4.544957348679645, "grad_norm": 3.500018358230591, "learning_rate": 8.504083072339364e-06, "loss": 0.3488, "step": 27842 }, { "epoch": 4.54512060732215, "grad_norm": 4.288519859313965, "learning_rate": 8.503449148270501e-06, "loss": 0.402, "step": 27843 }, { "epoch": 4.545283865964654, "grad_norm": 3.2049319744110107, "learning_rate": 8.502815230353397e-06, "loss": 0.3844, "step": 27844 }, { "epoch": 4.545447124607159, "grad_norm": 3.8161118030548096, "learning_rate": 8.502181318590653e-06, "loss": 0.3725, "step": 27845 }, { "epoch": 4.545610383249663, "grad_norm": 5.014344215393066, "learning_rate": 8.501547412984874e-06, "loss": 0.4651, "step": 27846 }, { "epoch": 4.5457736418921675, "grad_norm": 4.021899223327637, "learning_rate": 8.500913513538668e-06, "loss": 0.5011, "step": 27847 }, { "epoch": 4.545936900534672, "grad_norm": 3.647611141204834, "learning_rate": 8.500279620254641e-06, "loss": 0.3485, "step": 27848 }, { "epoch": 4.546100159177176, "grad_norm": 3.69875168800354, "learning_rate": 8.499645733135397e-06, "loss": 0.3793, "step": 27849 }, { "epoch": 4.546263417819681, "grad_norm": 3.4452762603759766, "learning_rate": 8.499011852183542e-06, "loss": 0.3311, "step": 27850 }, { "epoch": 4.546426676462185, "grad_norm": 3.4286417961120605, "learning_rate": 8.498377977401686e-06, "loss": 0.3601, "step": 27851 }, { "epoch": 4.54658993510469, "grad_norm": 3.5031588077545166, "learning_rate": 8.49774410879243e-06, "loss": 0.3968, "step": 27852 }, { "epoch": 4.546753193747194, "grad_norm": 4.542529582977295, "learning_rate": 8.49711024635838e-06, "loss": 0.4234, "step": 27853 }, { "epoch": 4.5469164523896985, "grad_norm": 3.9943535327911377, "learning_rate": 8.496476390102144e-06, "loss": 0.4962, "step": 27854 }, { "epoch": 4.547079711032203, "grad_norm": 3.7843029499053955, "learning_rate": 8.495842540026324e-06, "loss": 0.3575, "step": 27855 }, { "epoch": 4.547242969674707, "grad_norm": 4.42725133895874, "learning_rate": 8.49520869613353e-06, "loss": 0.4258, "step": 27856 }, { "epoch": 4.547406228317212, "grad_norm": 3.839794397354126, "learning_rate": 8.49457485842636e-06, "loss": 0.4318, "step": 27857 }, { "epoch": 4.547569486959716, "grad_norm": 3.3763644695281982, "learning_rate": 8.493941026907429e-06, "loss": 0.3499, "step": 27858 }, { "epoch": 4.54773274560222, "grad_norm": 3.8715641498565674, "learning_rate": 8.493307201579335e-06, "loss": 0.3937, "step": 27859 }, { "epoch": 4.547896004244725, "grad_norm": 3.443666458129883, "learning_rate": 8.492673382444687e-06, "loss": 0.3571, "step": 27860 }, { "epoch": 4.548059262887229, "grad_norm": 3.193847417831421, "learning_rate": 8.492039569506088e-06, "loss": 0.3851, "step": 27861 }, { "epoch": 4.548222521529733, "grad_norm": 3.4371962547302246, "learning_rate": 8.491405762766146e-06, "loss": 0.4024, "step": 27862 }, { "epoch": 4.548385780172238, "grad_norm": 3.4704439640045166, "learning_rate": 8.49077196222747e-06, "loss": 0.4272, "step": 27863 }, { "epoch": 4.548549038814742, "grad_norm": 2.7904906272888184, "learning_rate": 8.490138167892656e-06, "loss": 0.3244, "step": 27864 }, { "epoch": 4.5487122974572465, "grad_norm": 3.761232614517212, "learning_rate": 8.489504379764315e-06, "loss": 0.4001, "step": 27865 }, { "epoch": 4.548875556099751, "grad_norm": 2.9651572704315186, "learning_rate": 8.488870597845048e-06, "loss": 0.318, "step": 27866 }, { "epoch": 4.549038814742255, "grad_norm": 3.671499252319336, "learning_rate": 8.488236822137466e-06, "loss": 0.3804, "step": 27867 }, { "epoch": 4.54920207338476, "grad_norm": 3.5230212211608887, "learning_rate": 8.487603052644171e-06, "loss": 0.3322, "step": 27868 }, { "epoch": 4.549365332027264, "grad_norm": 3.9944841861724854, "learning_rate": 8.486969289367768e-06, "loss": 0.3761, "step": 27869 }, { "epoch": 4.549528590669769, "grad_norm": 3.7408652305603027, "learning_rate": 8.486335532310865e-06, "loss": 0.4372, "step": 27870 }, { "epoch": 4.549691849312273, "grad_norm": 3.2053780555725098, "learning_rate": 8.485701781476063e-06, "loss": 0.394, "step": 27871 }, { "epoch": 4.5498551079547775, "grad_norm": 3.639613628387451, "learning_rate": 8.485068036865971e-06, "loss": 0.3576, "step": 27872 }, { "epoch": 4.550018366597282, "grad_norm": 3.477627992630005, "learning_rate": 8.484434298483193e-06, "loss": 0.3873, "step": 27873 }, { "epoch": 4.550181625239786, "grad_norm": 4.037040710449219, "learning_rate": 8.483800566330333e-06, "loss": 0.4274, "step": 27874 }, { "epoch": 4.550344883882291, "grad_norm": 3.658341884613037, "learning_rate": 8.483166840409996e-06, "loss": 0.4235, "step": 27875 }, { "epoch": 4.550508142524794, "grad_norm": 3.44579815864563, "learning_rate": 8.482533120724784e-06, "loss": 0.3738, "step": 27876 }, { "epoch": 4.5506714011673, "grad_norm": 3.7071497440338135, "learning_rate": 8.481899407277307e-06, "loss": 0.4184, "step": 27877 }, { "epoch": 4.550834659809803, "grad_norm": 3.254185199737549, "learning_rate": 8.48126570007017e-06, "loss": 0.3414, "step": 27878 }, { "epoch": 4.550997918452308, "grad_norm": 3.2868175506591797, "learning_rate": 8.480631999105974e-06, "loss": 0.3235, "step": 27879 }, { "epoch": 4.551161177094812, "grad_norm": 4.054433345794678, "learning_rate": 8.479998304387329e-06, "loss": 0.4002, "step": 27880 }, { "epoch": 4.551324435737317, "grad_norm": 3.350895404815674, "learning_rate": 8.479364615916837e-06, "loss": 0.3512, "step": 27881 }, { "epoch": 4.551487694379821, "grad_norm": 3.9251344203948975, "learning_rate": 8.4787309336971e-06, "loss": 0.4367, "step": 27882 }, { "epoch": 4.5516509530223255, "grad_norm": 3.0278239250183105, "learning_rate": 8.47809725773073e-06, "loss": 0.2954, "step": 27883 }, { "epoch": 4.55181421166483, "grad_norm": 3.527898073196411, "learning_rate": 8.477463588020325e-06, "loss": 0.3822, "step": 27884 }, { "epoch": 4.551977470307334, "grad_norm": 3.9877824783325195, "learning_rate": 8.476829924568493e-06, "loss": 0.4016, "step": 27885 }, { "epoch": 4.552140728949839, "grad_norm": 3.8451294898986816, "learning_rate": 8.476196267377836e-06, "loss": 0.458, "step": 27886 }, { "epoch": 4.552303987592343, "grad_norm": 3.584787368774414, "learning_rate": 8.475562616450962e-06, "loss": 0.3938, "step": 27887 }, { "epoch": 4.552467246234848, "grad_norm": 3.696840524673462, "learning_rate": 8.474928971790472e-06, "loss": 0.381, "step": 27888 }, { "epoch": 4.552630504877352, "grad_norm": 3.1113522052764893, "learning_rate": 8.474295333398976e-06, "loss": 0.3543, "step": 27889 }, { "epoch": 4.5527937635198565, "grad_norm": 3.666203737258911, "learning_rate": 8.473661701279075e-06, "loss": 0.3583, "step": 27890 }, { "epoch": 4.552957022162361, "grad_norm": 3.240402936935425, "learning_rate": 8.473028075433375e-06, "loss": 0.407, "step": 27891 }, { "epoch": 4.553120280804865, "grad_norm": 2.9211394786834717, "learning_rate": 8.47239445586448e-06, "loss": 0.3171, "step": 27892 }, { "epoch": 4.55328353944737, "grad_norm": 3.3663105964660645, "learning_rate": 8.471760842574996e-06, "loss": 0.3371, "step": 27893 }, { "epoch": 4.553446798089874, "grad_norm": 3.8728866577148438, "learning_rate": 8.471127235567525e-06, "loss": 0.4059, "step": 27894 }, { "epoch": 4.553610056732378, "grad_norm": 3.663700580596924, "learning_rate": 8.470493634844672e-06, "loss": 0.3943, "step": 27895 }, { "epoch": 4.553773315374882, "grad_norm": 3.984647035598755, "learning_rate": 8.469860040409042e-06, "loss": 0.4425, "step": 27896 }, { "epoch": 4.553936574017387, "grad_norm": 3.99945068359375, "learning_rate": 8.46922645226324e-06, "loss": 0.4177, "step": 27897 }, { "epoch": 4.554099832659891, "grad_norm": 3.0100247859954834, "learning_rate": 8.46859287040987e-06, "loss": 0.3609, "step": 27898 }, { "epoch": 4.554263091302396, "grad_norm": 2.8721084594726562, "learning_rate": 8.467959294851536e-06, "loss": 0.3046, "step": 27899 }, { "epoch": 4.5544263499449, "grad_norm": 4.0710015296936035, "learning_rate": 8.46732572559084e-06, "loss": 0.4058, "step": 27900 }, { "epoch": 4.5545896085874045, "grad_norm": 4.072704792022705, "learning_rate": 8.466692162630393e-06, "loss": 0.4949, "step": 27901 }, { "epoch": 4.554752867229909, "grad_norm": 3.075613260269165, "learning_rate": 8.466058605972795e-06, "loss": 0.3749, "step": 27902 }, { "epoch": 4.554916125872413, "grad_norm": 3.202589988708496, "learning_rate": 8.465425055620656e-06, "loss": 0.3717, "step": 27903 }, { "epoch": 4.555079384514918, "grad_norm": 3.9461586475372314, "learning_rate": 8.464791511576566e-06, "loss": 0.3798, "step": 27904 }, { "epoch": 4.555242643157422, "grad_norm": 2.908721923828125, "learning_rate": 8.464157973843143e-06, "loss": 0.3156, "step": 27905 }, { "epoch": 4.555405901799927, "grad_norm": 3.3779377937316895, "learning_rate": 8.463524442422986e-06, "loss": 0.3936, "step": 27906 }, { "epoch": 4.555569160442431, "grad_norm": 3.756340742111206, "learning_rate": 8.4628909173187e-06, "loss": 0.4545, "step": 27907 }, { "epoch": 4.5557324190849355, "grad_norm": 3.5687973499298096, "learning_rate": 8.462257398532889e-06, "loss": 0.3561, "step": 27908 }, { "epoch": 4.55589567772744, "grad_norm": 2.485489845275879, "learning_rate": 8.461623886068157e-06, "loss": 0.2998, "step": 27909 }, { "epoch": 4.556058936369944, "grad_norm": 3.3561458587646484, "learning_rate": 8.460990379927108e-06, "loss": 0.3987, "step": 27910 }, { "epoch": 4.556222195012449, "grad_norm": 4.05641508102417, "learning_rate": 8.460356880112346e-06, "loss": 0.3986, "step": 27911 }, { "epoch": 4.556385453654952, "grad_norm": 3.2598471641540527, "learning_rate": 8.459723386626479e-06, "loss": 0.36, "step": 27912 }, { "epoch": 4.556548712297458, "grad_norm": 3.10927677154541, "learning_rate": 8.459089899472106e-06, "loss": 0.3216, "step": 27913 }, { "epoch": 4.556711970939961, "grad_norm": 3.7560839653015137, "learning_rate": 8.458456418651831e-06, "loss": 0.3749, "step": 27914 }, { "epoch": 4.556875229582466, "grad_norm": 3.3894920349121094, "learning_rate": 8.457822944168259e-06, "loss": 0.4251, "step": 27915 }, { "epoch": 4.55703848822497, "grad_norm": 3.959240198135376, "learning_rate": 8.457189476023994e-06, "loss": 0.3678, "step": 27916 }, { "epoch": 4.557201746867475, "grad_norm": 3.7248520851135254, "learning_rate": 8.456556014221643e-06, "loss": 0.3841, "step": 27917 }, { "epoch": 4.557365005509979, "grad_norm": 3.1949281692504883, "learning_rate": 8.455922558763807e-06, "loss": 0.3723, "step": 27918 }, { "epoch": 4.5575282641524835, "grad_norm": 3.890383720397949, "learning_rate": 8.455289109653089e-06, "loss": 0.3652, "step": 27919 }, { "epoch": 4.557691522794988, "grad_norm": 3.36895751953125, "learning_rate": 8.454655666892095e-06, "loss": 0.4006, "step": 27920 }, { "epoch": 4.557854781437492, "grad_norm": 2.794362783432007, "learning_rate": 8.454022230483429e-06, "loss": 0.3705, "step": 27921 }, { "epoch": 4.558018040079997, "grad_norm": 3.082481622695923, "learning_rate": 8.453388800429694e-06, "loss": 0.3529, "step": 27922 }, { "epoch": 4.558181298722501, "grad_norm": 2.7316102981567383, "learning_rate": 8.452755376733494e-06, "loss": 0.3057, "step": 27923 }, { "epoch": 4.558344557365006, "grad_norm": 3.3536500930786133, "learning_rate": 8.45212195939743e-06, "loss": 0.3414, "step": 27924 }, { "epoch": 4.55850781600751, "grad_norm": 3.720473289489746, "learning_rate": 8.45148854842411e-06, "loss": 0.4287, "step": 27925 }, { "epoch": 4.5586710746500145, "grad_norm": 3.325300931930542, "learning_rate": 8.450855143816134e-06, "loss": 0.3433, "step": 27926 }, { "epoch": 4.558834333292519, "grad_norm": 3.3002307415008545, "learning_rate": 8.450221745576106e-06, "loss": 0.3935, "step": 27927 }, { "epoch": 4.558997591935023, "grad_norm": 3.4079887866973877, "learning_rate": 8.449588353706635e-06, "loss": 0.3909, "step": 27928 }, { "epoch": 4.559160850577527, "grad_norm": 3.5965611934661865, "learning_rate": 8.44895496821032e-06, "loss": 0.3993, "step": 27929 }, { "epoch": 4.559324109220032, "grad_norm": 2.9983022212982178, "learning_rate": 8.448321589089766e-06, "loss": 0.3229, "step": 27930 }, { "epoch": 4.559487367862536, "grad_norm": 3.2510862350463867, "learning_rate": 8.447688216347574e-06, "loss": 0.3842, "step": 27931 }, { "epoch": 4.55965062650504, "grad_norm": 3.51613450050354, "learning_rate": 8.447054849986353e-06, "loss": 0.4075, "step": 27932 }, { "epoch": 4.559813885147545, "grad_norm": 3.482541799545288, "learning_rate": 8.446421490008702e-06, "loss": 0.3764, "step": 27933 }, { "epoch": 4.559977143790049, "grad_norm": 3.495681047439575, "learning_rate": 8.445788136417223e-06, "loss": 0.3538, "step": 27934 }, { "epoch": 4.560140402432554, "grad_norm": 3.878265380859375, "learning_rate": 8.445154789214525e-06, "loss": 0.4283, "step": 27935 }, { "epoch": 4.560303661075058, "grad_norm": 3.5468735694885254, "learning_rate": 8.444521448403207e-06, "loss": 0.4012, "step": 27936 }, { "epoch": 4.5604669197175625, "grad_norm": 3.7185256481170654, "learning_rate": 8.443888113985874e-06, "loss": 0.4312, "step": 27937 }, { "epoch": 4.560630178360067, "grad_norm": 3.001626491546631, "learning_rate": 8.443254785965128e-06, "loss": 0.3276, "step": 27938 }, { "epoch": 4.560793437002571, "grad_norm": 5.275972843170166, "learning_rate": 8.442621464343574e-06, "loss": 0.3784, "step": 27939 }, { "epoch": 4.560956695645076, "grad_norm": 3.037771224975586, "learning_rate": 8.441988149123818e-06, "loss": 0.3379, "step": 27940 }, { "epoch": 4.56111995428758, "grad_norm": 3.3529720306396484, "learning_rate": 8.441354840308457e-06, "loss": 0.3973, "step": 27941 }, { "epoch": 4.561283212930085, "grad_norm": 3.0633974075317383, "learning_rate": 8.440721537900102e-06, "loss": 0.371, "step": 27942 }, { "epoch": 4.561446471572589, "grad_norm": 3.422072410583496, "learning_rate": 8.44008824190135e-06, "loss": 0.3585, "step": 27943 }, { "epoch": 4.5616097302150935, "grad_norm": 4.1417036056518555, "learning_rate": 8.439454952314808e-06, "loss": 0.3929, "step": 27944 }, { "epoch": 4.561772988857598, "grad_norm": 2.884631395339966, "learning_rate": 8.438821669143074e-06, "loss": 0.3218, "step": 27945 }, { "epoch": 4.561936247500102, "grad_norm": 3.638153314590454, "learning_rate": 8.438188392388755e-06, "loss": 0.39, "step": 27946 }, { "epoch": 4.562099506142607, "grad_norm": 3.4741098880767822, "learning_rate": 8.437555122054455e-06, "loss": 0.3418, "step": 27947 }, { "epoch": 4.56226276478511, "grad_norm": 3.0489566326141357, "learning_rate": 8.436921858142777e-06, "loss": 0.3588, "step": 27948 }, { "epoch": 4.562426023427615, "grad_norm": 3.140498161315918, "learning_rate": 8.436288600656321e-06, "loss": 0.403, "step": 27949 }, { "epoch": 4.562589282070119, "grad_norm": 3.6445443630218506, "learning_rate": 8.43565534959769e-06, "loss": 0.3457, "step": 27950 }, { "epoch": 4.562752540712624, "grad_norm": 3.7306389808654785, "learning_rate": 8.435022104969496e-06, "loss": 0.3923, "step": 27951 }, { "epoch": 4.562915799355128, "grad_norm": 3.924973964691162, "learning_rate": 8.434388866774333e-06, "loss": 0.4182, "step": 27952 }, { "epoch": 4.563079057997633, "grad_norm": 3.8472533226013184, "learning_rate": 8.433755635014802e-06, "loss": 0.4837, "step": 27953 }, { "epoch": 4.563242316640137, "grad_norm": 3.6066911220550537, "learning_rate": 8.433122409693512e-06, "loss": 0.4159, "step": 27954 }, { "epoch": 4.5634055752826415, "grad_norm": 3.428938865661621, "learning_rate": 8.432489190813065e-06, "loss": 0.388, "step": 27955 }, { "epoch": 4.563568833925146, "grad_norm": 4.025944709777832, "learning_rate": 8.431855978376063e-06, "loss": 0.3972, "step": 27956 }, { "epoch": 4.56373209256765, "grad_norm": 3.797590732574463, "learning_rate": 8.431222772385108e-06, "loss": 0.4359, "step": 27957 }, { "epoch": 4.563895351210155, "grad_norm": 3.008357524871826, "learning_rate": 8.430589572842804e-06, "loss": 0.3416, "step": 27958 }, { "epoch": 4.564058609852659, "grad_norm": 3.583544969558716, "learning_rate": 8.429956379751754e-06, "loss": 0.4165, "step": 27959 }, { "epoch": 4.564221868495164, "grad_norm": 3.4841349124908447, "learning_rate": 8.429323193114561e-06, "loss": 0.3946, "step": 27960 }, { "epoch": 4.564385127137668, "grad_norm": 4.687199115753174, "learning_rate": 8.42869001293383e-06, "loss": 0.4229, "step": 27961 }, { "epoch": 4.5645483857801725, "grad_norm": 3.196381092071533, "learning_rate": 8.428056839212158e-06, "loss": 0.3843, "step": 27962 }, { "epoch": 4.564711644422677, "grad_norm": 3.4491875171661377, "learning_rate": 8.42742367195215e-06, "loss": 0.3786, "step": 27963 }, { "epoch": 4.564874903065181, "grad_norm": 3.948320150375366, "learning_rate": 8.42679051115641e-06, "loss": 0.3667, "step": 27964 }, { "epoch": 4.565038161707685, "grad_norm": 3.2664361000061035, "learning_rate": 8.426157356827538e-06, "loss": 0.3636, "step": 27965 }, { "epoch": 4.56520142035019, "grad_norm": 3.9348225593566895, "learning_rate": 8.42552420896814e-06, "loss": 0.4391, "step": 27966 }, { "epoch": 4.565364678992694, "grad_norm": 2.9120278358459473, "learning_rate": 8.42489106758082e-06, "loss": 0.3751, "step": 27967 }, { "epoch": 4.565527937635198, "grad_norm": 3.7275567054748535, "learning_rate": 8.424257932668175e-06, "loss": 0.3402, "step": 27968 }, { "epoch": 4.565691196277703, "grad_norm": 3.1722655296325684, "learning_rate": 8.423624804232814e-06, "loss": 0.3619, "step": 27969 }, { "epoch": 4.565854454920207, "grad_norm": 3.286214590072632, "learning_rate": 8.422991682277335e-06, "loss": 0.33, "step": 27970 }, { "epoch": 4.566017713562712, "grad_norm": 3.474886655807495, "learning_rate": 8.422358566804342e-06, "loss": 0.3541, "step": 27971 }, { "epoch": 4.566180972205216, "grad_norm": 2.733804702758789, "learning_rate": 8.421725457816437e-06, "loss": 0.3132, "step": 27972 }, { "epoch": 4.5663442308477205, "grad_norm": 3.249378204345703, "learning_rate": 8.421092355316223e-06, "loss": 0.3453, "step": 27973 }, { "epoch": 4.566507489490225, "grad_norm": 3.0449304580688477, "learning_rate": 8.420459259306302e-06, "loss": 0.3326, "step": 27974 }, { "epoch": 4.566670748132729, "grad_norm": 4.176352500915527, "learning_rate": 8.419826169789275e-06, "loss": 0.4361, "step": 27975 }, { "epoch": 4.566834006775234, "grad_norm": 3.568739414215088, "learning_rate": 8.419193086767748e-06, "loss": 0.3553, "step": 27976 }, { "epoch": 4.566997265417738, "grad_norm": 3.171515941619873, "learning_rate": 8.418560010244318e-06, "loss": 0.3642, "step": 27977 }, { "epoch": 4.567160524060243, "grad_norm": 3.537490129470825, "learning_rate": 8.417926940221592e-06, "loss": 0.3862, "step": 27978 }, { "epoch": 4.567323782702747, "grad_norm": 3.641747236251831, "learning_rate": 8.417293876702173e-06, "loss": 0.4167, "step": 27979 }, { "epoch": 4.5674870413452515, "grad_norm": 3.1488900184631348, "learning_rate": 8.41666081968866e-06, "loss": 0.3716, "step": 27980 }, { "epoch": 4.567650299987756, "grad_norm": 3.679633378982544, "learning_rate": 8.416027769183658e-06, "loss": 0.4234, "step": 27981 }, { "epoch": 4.5678135586302595, "grad_norm": 3.705230236053467, "learning_rate": 8.415394725189768e-06, "loss": 0.3793, "step": 27982 }, { "epoch": 4.567976817272765, "grad_norm": 3.853297233581543, "learning_rate": 8.41476168770959e-06, "loss": 0.3645, "step": 27983 }, { "epoch": 4.568140075915268, "grad_norm": 3.7228705883026123, "learning_rate": 8.414128656745726e-06, "loss": 0.3301, "step": 27984 }, { "epoch": 4.568303334557773, "grad_norm": 5.026853561401367, "learning_rate": 8.413495632300784e-06, "loss": 0.4364, "step": 27985 }, { "epoch": 4.568466593200277, "grad_norm": 3.954364061355591, "learning_rate": 8.412862614377362e-06, "loss": 0.4048, "step": 27986 }, { "epoch": 4.568629851842782, "grad_norm": 4.581364631652832, "learning_rate": 8.412229602978062e-06, "loss": 0.4181, "step": 27987 }, { "epoch": 4.568793110485286, "grad_norm": 3.624420642852783, "learning_rate": 8.411596598105483e-06, "loss": 0.4371, "step": 27988 }, { "epoch": 4.568956369127791, "grad_norm": 3.014328956604004, "learning_rate": 8.410963599762234e-06, "loss": 0.2907, "step": 27989 }, { "epoch": 4.569119627770295, "grad_norm": 4.361202239990234, "learning_rate": 8.410330607950913e-06, "loss": 0.4885, "step": 27990 }, { "epoch": 4.5692828864127994, "grad_norm": 3.532705545425415, "learning_rate": 8.409697622674128e-06, "loss": 0.3962, "step": 27991 }, { "epoch": 4.569446145055304, "grad_norm": 3.0649383068084717, "learning_rate": 8.409064643934467e-06, "loss": 0.3359, "step": 27992 }, { "epoch": 4.569609403697808, "grad_norm": 3.553231954574585, "learning_rate": 8.408431671734544e-06, "loss": 0.3411, "step": 27993 }, { "epoch": 4.569772662340313, "grad_norm": 3.416832447052002, "learning_rate": 8.407798706076958e-06, "loss": 0.3343, "step": 27994 }, { "epoch": 4.569935920982817, "grad_norm": 3.65317702293396, "learning_rate": 8.407165746964311e-06, "loss": 0.3478, "step": 27995 }, { "epoch": 4.570099179625322, "grad_norm": 4.032932758331299, "learning_rate": 8.406532794399203e-06, "loss": 0.3761, "step": 27996 }, { "epoch": 4.570262438267826, "grad_norm": 2.900479316711426, "learning_rate": 8.405899848384237e-06, "loss": 0.2934, "step": 27997 }, { "epoch": 4.5704256969103305, "grad_norm": 4.928996562957764, "learning_rate": 8.405266908922014e-06, "loss": 0.4726, "step": 27998 }, { "epoch": 4.570588955552835, "grad_norm": 3.085010051727295, "learning_rate": 8.404633976015136e-06, "loss": 0.3273, "step": 27999 }, { "epoch": 4.570752214195339, "grad_norm": 3.8431735038757324, "learning_rate": 8.404001049666211e-06, "loss": 0.4403, "step": 28000 }, { "epoch": 4.570915472837843, "grad_norm": 3.502284049987793, "learning_rate": 8.40336812987783e-06, "loss": 0.3619, "step": 28001 }, { "epoch": 4.571078731480347, "grad_norm": 3.8734021186828613, "learning_rate": 8.4027352166526e-06, "loss": 0.3842, "step": 28002 }, { "epoch": 4.571241990122852, "grad_norm": 4.471452713012695, "learning_rate": 8.402102309993123e-06, "loss": 0.4177, "step": 28003 }, { "epoch": 4.571405248765356, "grad_norm": 3.4470889568328857, "learning_rate": 8.401469409901999e-06, "loss": 0.4243, "step": 28004 }, { "epoch": 4.571568507407861, "grad_norm": 3.960606813430786, "learning_rate": 8.400836516381831e-06, "loss": 0.3916, "step": 28005 }, { "epoch": 4.571731766050365, "grad_norm": 3.7313480377197266, "learning_rate": 8.400203629435222e-06, "loss": 0.4235, "step": 28006 }, { "epoch": 4.57189502469287, "grad_norm": 4.468976020812988, "learning_rate": 8.39957074906477e-06, "loss": 0.4915, "step": 28007 }, { "epoch": 4.572058283335374, "grad_norm": 3.9396331310272217, "learning_rate": 8.39893787527308e-06, "loss": 0.4084, "step": 28008 }, { "epoch": 4.5722215419778784, "grad_norm": 2.9244439601898193, "learning_rate": 8.39830500806275e-06, "loss": 0.3171, "step": 28009 }, { "epoch": 4.572384800620383, "grad_norm": 3.626833438873291, "learning_rate": 8.397672147436387e-06, "loss": 0.3959, "step": 28010 }, { "epoch": 4.572548059262887, "grad_norm": 3.892387628555298, "learning_rate": 8.397039293396586e-06, "loss": 0.3481, "step": 28011 }, { "epoch": 4.572711317905392, "grad_norm": 3.30621075630188, "learning_rate": 8.396406445945951e-06, "loss": 0.3538, "step": 28012 }, { "epoch": 4.572874576547896, "grad_norm": 4.106638431549072, "learning_rate": 8.395773605087084e-06, "loss": 0.4532, "step": 28013 }, { "epoch": 4.573037835190401, "grad_norm": 3.728645086288452, "learning_rate": 8.395140770822586e-06, "loss": 0.3662, "step": 28014 }, { "epoch": 4.573201093832905, "grad_norm": 3.2967958450317383, "learning_rate": 8.394507943155056e-06, "loss": 0.4033, "step": 28015 }, { "epoch": 4.5733643524754095, "grad_norm": 3.434887647628784, "learning_rate": 8.3938751220871e-06, "loss": 0.3992, "step": 28016 }, { "epoch": 4.573527611117914, "grad_norm": 3.7554352283477783, "learning_rate": 8.393242307621317e-06, "loss": 0.3903, "step": 28017 }, { "epoch": 4.5736908697604175, "grad_norm": 3.1712357997894287, "learning_rate": 8.392609499760308e-06, "loss": 0.3518, "step": 28018 }, { "epoch": 4.573854128402923, "grad_norm": 3.3409550189971924, "learning_rate": 8.391976698506673e-06, "loss": 0.4043, "step": 28019 }, { "epoch": 4.574017387045426, "grad_norm": 3.700978994369507, "learning_rate": 8.391343903863018e-06, "loss": 0.4031, "step": 28020 }, { "epoch": 4.574180645687931, "grad_norm": 3.116821765899658, "learning_rate": 8.39071111583194e-06, "loss": 0.3828, "step": 28021 }, { "epoch": 4.574343904330435, "grad_norm": 3.765022039413452, "learning_rate": 8.390078334416039e-06, "loss": 0.3786, "step": 28022 }, { "epoch": 4.57450716297294, "grad_norm": 3.6121814250946045, "learning_rate": 8.389445559617917e-06, "loss": 0.4053, "step": 28023 }, { "epoch": 4.574670421615444, "grad_norm": 3.346397876739502, "learning_rate": 8.388812791440179e-06, "loss": 0.3243, "step": 28024 }, { "epoch": 4.574833680257949, "grad_norm": 3.980586528778076, "learning_rate": 8.38818002988542e-06, "loss": 0.4384, "step": 28025 }, { "epoch": 4.574996938900453, "grad_norm": 3.0289788246154785, "learning_rate": 8.387547274956245e-06, "loss": 0.3581, "step": 28026 }, { "epoch": 4.575160197542957, "grad_norm": 3.2768869400024414, "learning_rate": 8.386914526655257e-06, "loss": 0.3645, "step": 28027 }, { "epoch": 4.575323456185462, "grad_norm": 3.512808322906494, "learning_rate": 8.386281784985052e-06, "loss": 0.4229, "step": 28028 }, { "epoch": 4.575486714827966, "grad_norm": 3.926893949508667, "learning_rate": 8.385649049948234e-06, "loss": 0.45, "step": 28029 }, { "epoch": 4.575649973470471, "grad_norm": 3.4982736110687256, "learning_rate": 8.385016321547406e-06, "loss": 0.3971, "step": 28030 }, { "epoch": 4.575813232112975, "grad_norm": 3.6014087200164795, "learning_rate": 8.384383599785162e-06, "loss": 0.3819, "step": 28031 }, { "epoch": 4.57597649075548, "grad_norm": 3.107598066329956, "learning_rate": 8.38375088466411e-06, "loss": 0.3478, "step": 28032 }, { "epoch": 4.576139749397984, "grad_norm": 4.830906867980957, "learning_rate": 8.383118176186846e-06, "loss": 0.4755, "step": 28033 }, { "epoch": 4.5763030080404885, "grad_norm": 4.1541290283203125, "learning_rate": 8.382485474355973e-06, "loss": 0.4334, "step": 28034 }, { "epoch": 4.576466266682992, "grad_norm": 4.282944202423096, "learning_rate": 8.38185277917409e-06, "loss": 0.4088, "step": 28035 }, { "epoch": 4.576629525325497, "grad_norm": 3.728365659713745, "learning_rate": 8.381220090643801e-06, "loss": 0.3557, "step": 28036 }, { "epoch": 4.576792783968001, "grad_norm": 3.4918885231018066, "learning_rate": 8.380587408767706e-06, "loss": 0.3695, "step": 28037 }, { "epoch": 4.576956042610505, "grad_norm": 3.646328926086426, "learning_rate": 8.3799547335484e-06, "loss": 0.3595, "step": 28038 }, { "epoch": 4.57711930125301, "grad_norm": 4.483732223510742, "learning_rate": 8.37932206498849e-06, "loss": 0.4244, "step": 28039 }, { "epoch": 4.577282559895514, "grad_norm": 3.402956485748291, "learning_rate": 8.378689403090582e-06, "loss": 0.3563, "step": 28040 }, { "epoch": 4.577445818538019, "grad_norm": 4.186175346374512, "learning_rate": 8.378056747857264e-06, "loss": 0.4048, "step": 28041 }, { "epoch": 4.577609077180523, "grad_norm": 3.6293721199035645, "learning_rate": 8.377424099291141e-06, "loss": 0.4367, "step": 28042 }, { "epoch": 4.577772335823028, "grad_norm": 3.9931204319000244, "learning_rate": 8.376791457394818e-06, "loss": 0.7005, "step": 28043 }, { "epoch": 4.577935594465532, "grad_norm": 3.3375937938690186, "learning_rate": 8.37615882217089e-06, "loss": 0.3421, "step": 28044 }, { "epoch": 4.578098853108036, "grad_norm": 3.4676995277404785, "learning_rate": 8.37552619362196e-06, "loss": 0.3644, "step": 28045 }, { "epoch": 4.578262111750541, "grad_norm": 3.470733404159546, "learning_rate": 8.37489357175063e-06, "loss": 0.3372, "step": 28046 }, { "epoch": 4.578425370393045, "grad_norm": 3.9936931133270264, "learning_rate": 8.374260956559497e-06, "loss": 0.3975, "step": 28047 }, { "epoch": 4.57858862903555, "grad_norm": 3.640068292617798, "learning_rate": 8.373628348051165e-06, "loss": 0.3442, "step": 28048 }, { "epoch": 4.578751887678054, "grad_norm": 3.7947537899017334, "learning_rate": 8.372995746228234e-06, "loss": 0.386, "step": 28049 }, { "epoch": 4.578915146320559, "grad_norm": 3.1836771965026855, "learning_rate": 8.372363151093302e-06, "loss": 0.3123, "step": 28050 }, { "epoch": 4.579078404963063, "grad_norm": 3.7890517711639404, "learning_rate": 8.371730562648969e-06, "loss": 0.4081, "step": 28051 }, { "epoch": 4.5792416636055675, "grad_norm": 3.817384958267212, "learning_rate": 8.371097980897838e-06, "loss": 0.3886, "step": 28052 }, { "epoch": 4.579404922248072, "grad_norm": 3.9526543617248535, "learning_rate": 8.370465405842504e-06, "loss": 0.4913, "step": 28053 }, { "epoch": 4.5795681808905755, "grad_norm": 4.247734069824219, "learning_rate": 8.369832837485576e-06, "loss": 0.4418, "step": 28054 }, { "epoch": 4.57973143953308, "grad_norm": 3.4844164848327637, "learning_rate": 8.36920027582965e-06, "loss": 0.3594, "step": 28055 }, { "epoch": 4.579894698175584, "grad_norm": 3.7115018367767334, "learning_rate": 8.368567720877325e-06, "loss": 0.3653, "step": 28056 }, { "epoch": 4.580057956818089, "grad_norm": 3.7371771335601807, "learning_rate": 8.3679351726312e-06, "loss": 0.3904, "step": 28057 }, { "epoch": 4.580221215460593, "grad_norm": 3.2558140754699707, "learning_rate": 8.36730263109388e-06, "loss": 0.3403, "step": 28058 }, { "epoch": 4.580384474103098, "grad_norm": 3.103811502456665, "learning_rate": 8.366670096267963e-06, "loss": 0.3397, "step": 28059 }, { "epoch": 4.580547732745602, "grad_norm": 3.02878999710083, "learning_rate": 8.366037568156047e-06, "loss": 0.3218, "step": 28060 }, { "epoch": 4.580710991388107, "grad_norm": 3.4747660160064697, "learning_rate": 8.365405046760733e-06, "loss": 0.364, "step": 28061 }, { "epoch": 4.580874250030611, "grad_norm": 5.4287638664245605, "learning_rate": 8.364772532084623e-06, "loss": 0.417, "step": 28062 }, { "epoch": 4.581037508673115, "grad_norm": 4.4734907150268555, "learning_rate": 8.364140024130315e-06, "loss": 0.481, "step": 28063 }, { "epoch": 4.58120076731562, "grad_norm": 3.421173095703125, "learning_rate": 8.36350752290041e-06, "loss": 0.3618, "step": 28064 }, { "epoch": 4.581364025958124, "grad_norm": 3.36472225189209, "learning_rate": 8.362875028397505e-06, "loss": 0.3668, "step": 28065 }, { "epoch": 4.581527284600629, "grad_norm": 3.35595440864563, "learning_rate": 8.362242540624204e-06, "loss": 0.3857, "step": 28066 }, { "epoch": 4.581690543243133, "grad_norm": 3.454464912414551, "learning_rate": 8.361610059583107e-06, "loss": 0.374, "step": 28067 }, { "epoch": 4.581853801885638, "grad_norm": 3.296297311782837, "learning_rate": 8.36097758527681e-06, "loss": 0.3725, "step": 28068 }, { "epoch": 4.582017060528142, "grad_norm": 3.261286973953247, "learning_rate": 8.36034511770792e-06, "loss": 0.3241, "step": 28069 }, { "epoch": 4.5821803191706465, "grad_norm": 4.124048233032227, "learning_rate": 8.35971265687903e-06, "loss": 0.3608, "step": 28070 }, { "epoch": 4.58234357781315, "grad_norm": 3.9541852474212646, "learning_rate": 8.35908020279274e-06, "loss": 0.4035, "step": 28071 }, { "epoch": 4.5825068364556545, "grad_norm": 3.671154022216797, "learning_rate": 8.358447755451653e-06, "loss": 0.3633, "step": 28072 }, { "epoch": 4.582670095098159, "grad_norm": 4.141983985900879, "learning_rate": 8.357815314858368e-06, "loss": 0.3777, "step": 28073 }, { "epoch": 4.582833353740663, "grad_norm": 3.678900718688965, "learning_rate": 8.357182881015481e-06, "loss": 0.3152, "step": 28074 }, { "epoch": 4.582996612383168, "grad_norm": 3.56239914894104, "learning_rate": 8.356550453925599e-06, "loss": 0.4101, "step": 28075 }, { "epoch": 4.583159871025672, "grad_norm": 3.8025898933410645, "learning_rate": 8.355918033591312e-06, "loss": 0.4328, "step": 28076 }, { "epoch": 4.583323129668177, "grad_norm": 2.9815495014190674, "learning_rate": 8.35528562001523e-06, "loss": 0.3558, "step": 28077 }, { "epoch": 4.583486388310681, "grad_norm": 3.2180144786834717, "learning_rate": 8.354653213199946e-06, "loss": 0.3854, "step": 28078 }, { "epoch": 4.5836496469531856, "grad_norm": 3.5454161167144775, "learning_rate": 8.354020813148066e-06, "loss": 0.3851, "step": 28079 }, { "epoch": 4.58381290559569, "grad_norm": 3.903365135192871, "learning_rate": 8.353388419862178e-06, "loss": 0.4086, "step": 28080 }, { "epoch": 4.583976164238194, "grad_norm": 3.8081226348876953, "learning_rate": 8.352756033344892e-06, "loss": 0.3839, "step": 28081 }, { "epoch": 4.584139422880699, "grad_norm": 2.6950271129608154, "learning_rate": 8.352123653598803e-06, "loss": 0.2909, "step": 28082 }, { "epoch": 4.584302681523203, "grad_norm": 3.836592435836792, "learning_rate": 8.351491280626513e-06, "loss": 0.4177, "step": 28083 }, { "epoch": 4.584465940165708, "grad_norm": 4.109568119049072, "learning_rate": 8.350858914430617e-06, "loss": 0.4396, "step": 28084 }, { "epoch": 4.584629198808212, "grad_norm": 4.266550064086914, "learning_rate": 8.35022655501372e-06, "loss": 0.4111, "step": 28085 }, { "epoch": 4.584792457450717, "grad_norm": 3.210005044937134, "learning_rate": 8.349594202378417e-06, "loss": 0.3384, "step": 28086 }, { "epoch": 4.584955716093221, "grad_norm": 3.2002933025360107, "learning_rate": 8.34896185652731e-06, "loss": 0.2992, "step": 28087 }, { "epoch": 4.585118974735725, "grad_norm": 3.2769877910614014, "learning_rate": 8.348329517462999e-06, "loss": 0.3344, "step": 28088 }, { "epoch": 4.58528223337823, "grad_norm": 3.889561891555786, "learning_rate": 8.34769718518808e-06, "loss": 0.4374, "step": 28089 }, { "epoch": 4.5854454920207335, "grad_norm": 3.6052143573760986, "learning_rate": 8.347064859705153e-06, "loss": 0.3697, "step": 28090 }, { "epoch": 4.585608750663238, "grad_norm": 3.3464298248291016, "learning_rate": 8.346432541016819e-06, "loss": 0.3772, "step": 28091 }, { "epoch": 4.585772009305742, "grad_norm": 3.111812114715576, "learning_rate": 8.345800229125674e-06, "loss": 0.3533, "step": 28092 }, { "epoch": 4.585935267948247, "grad_norm": 3.132528066635132, "learning_rate": 8.345167924034321e-06, "loss": 0.2989, "step": 28093 }, { "epoch": 4.586098526590751, "grad_norm": 3.2213077545166016, "learning_rate": 8.34453562574536e-06, "loss": 0.3798, "step": 28094 }, { "epoch": 4.586261785233256, "grad_norm": 4.092153072357178, "learning_rate": 8.343903334261388e-06, "loss": 0.4263, "step": 28095 }, { "epoch": 4.58642504387576, "grad_norm": 3.09592604637146, "learning_rate": 8.343271049585003e-06, "loss": 0.3337, "step": 28096 }, { "epoch": 4.5865883025182645, "grad_norm": 4.041207313537598, "learning_rate": 8.342638771718804e-06, "loss": 0.4333, "step": 28097 }, { "epoch": 4.586751561160769, "grad_norm": 4.458313465118408, "learning_rate": 8.342006500665394e-06, "loss": 0.4324, "step": 28098 }, { "epoch": 4.586914819803273, "grad_norm": 3.6605684757232666, "learning_rate": 8.341374236427368e-06, "loss": 0.3472, "step": 28099 }, { "epoch": 4.587078078445778, "grad_norm": 3.352672815322876, "learning_rate": 8.340741979007325e-06, "loss": 0.3398, "step": 28100 }, { "epoch": 4.587241337088282, "grad_norm": 3.205733060836792, "learning_rate": 8.340109728407866e-06, "loss": 0.3129, "step": 28101 }, { "epoch": 4.587404595730787, "grad_norm": 3.9202992916107178, "learning_rate": 8.339477484631589e-06, "loss": 0.3353, "step": 28102 }, { "epoch": 4.587567854373291, "grad_norm": 3.7968997955322266, "learning_rate": 8.33884524768109e-06, "loss": 0.3417, "step": 28103 }, { "epoch": 4.587731113015796, "grad_norm": 4.632918834686279, "learning_rate": 8.338213017558974e-06, "loss": 0.4295, "step": 28104 }, { "epoch": 4.587894371658299, "grad_norm": 4.308273792266846, "learning_rate": 8.337580794267836e-06, "loss": 0.4739, "step": 28105 }, { "epoch": 4.5880576303008045, "grad_norm": 3.392979621887207, "learning_rate": 8.336948577810277e-06, "loss": 0.3537, "step": 28106 }, { "epoch": 4.588220888943308, "grad_norm": 4.128861427307129, "learning_rate": 8.336316368188895e-06, "loss": 0.4446, "step": 28107 }, { "epoch": 4.5883841475858125, "grad_norm": 3.6393988132476807, "learning_rate": 8.335684165406288e-06, "loss": 0.3664, "step": 28108 }, { "epoch": 4.588547406228317, "grad_norm": 3.341651678085327, "learning_rate": 8.335051969465055e-06, "loss": 0.3689, "step": 28109 }, { "epoch": 4.588710664870821, "grad_norm": 4.068806171417236, "learning_rate": 8.334419780367793e-06, "loss": 0.4267, "step": 28110 }, { "epoch": 4.588873923513326, "grad_norm": 3.94429087638855, "learning_rate": 8.333787598117104e-06, "loss": 0.3934, "step": 28111 }, { "epoch": 4.58903718215583, "grad_norm": 4.5780839920043945, "learning_rate": 8.333155422715582e-06, "loss": 0.4598, "step": 28112 }, { "epoch": 4.589200440798335, "grad_norm": 2.9465396404266357, "learning_rate": 8.332523254165832e-06, "loss": 0.3024, "step": 28113 }, { "epoch": 4.589363699440839, "grad_norm": 3.6929335594177246, "learning_rate": 8.331891092470446e-06, "loss": 0.3746, "step": 28114 }, { "epoch": 4.5895269580833435, "grad_norm": 3.5857388973236084, "learning_rate": 8.331258937632028e-06, "loss": 0.3664, "step": 28115 }, { "epoch": 4.589690216725848, "grad_norm": 3.4737555980682373, "learning_rate": 8.330626789653174e-06, "loss": 0.3556, "step": 28116 }, { "epoch": 4.589853475368352, "grad_norm": 3.541370153427124, "learning_rate": 8.329994648536483e-06, "loss": 0.3382, "step": 28117 }, { "epoch": 4.590016734010857, "grad_norm": 4.352499961853027, "learning_rate": 8.329362514284558e-06, "loss": 0.4017, "step": 28118 }, { "epoch": 4.590179992653361, "grad_norm": 3.7117912769317627, "learning_rate": 8.328730386899986e-06, "loss": 0.4283, "step": 28119 }, { "epoch": 4.590343251295866, "grad_norm": 3.695849895477295, "learning_rate": 8.328098266385376e-06, "loss": 0.3849, "step": 28120 }, { "epoch": 4.59050650993837, "grad_norm": 4.17039155960083, "learning_rate": 8.327466152743322e-06, "loss": 0.415, "step": 28121 }, { "epoch": 4.590669768580875, "grad_norm": 4.516384601593018, "learning_rate": 8.326834045976423e-06, "loss": 0.4387, "step": 28122 }, { "epoch": 4.590833027223379, "grad_norm": 3.666910409927368, "learning_rate": 8.326201946087275e-06, "loss": 0.3189, "step": 28123 }, { "epoch": 4.590996285865883, "grad_norm": 3.5566306114196777, "learning_rate": 8.325569853078481e-06, "loss": 0.3414, "step": 28124 }, { "epoch": 4.591159544508387, "grad_norm": 3.3222014904022217, "learning_rate": 8.324937766952638e-06, "loss": 0.3304, "step": 28125 }, { "epoch": 4.5913228031508915, "grad_norm": 3.5910260677337646, "learning_rate": 8.32430568771234e-06, "loss": 0.4245, "step": 28126 }, { "epoch": 4.591486061793396, "grad_norm": 3.376131534576416, "learning_rate": 8.32367361536019e-06, "loss": 0.3735, "step": 28127 }, { "epoch": 4.5916493204359, "grad_norm": 3.737169027328491, "learning_rate": 8.32304154989879e-06, "loss": 0.3713, "step": 28128 }, { "epoch": 4.591812579078405, "grad_norm": 3.308109760284424, "learning_rate": 8.322409491330729e-06, "loss": 0.3031, "step": 28129 }, { "epoch": 4.591975837720909, "grad_norm": 3.920821189880371, "learning_rate": 8.321777439658607e-06, "loss": 0.443, "step": 28130 }, { "epoch": 4.592139096363414, "grad_norm": 3.88620924949646, "learning_rate": 8.321145394885026e-06, "loss": 0.3559, "step": 28131 }, { "epoch": 4.592302355005918, "grad_norm": 4.2196245193481445, "learning_rate": 8.320513357012581e-06, "loss": 0.3881, "step": 28132 }, { "epoch": 4.5924656136484225, "grad_norm": 3.334826707839966, "learning_rate": 8.319881326043874e-06, "loss": 0.3586, "step": 28133 }, { "epoch": 4.592628872290927, "grad_norm": 3.322150468826294, "learning_rate": 8.319249301981498e-06, "loss": 0.3554, "step": 28134 }, { "epoch": 4.592792130933431, "grad_norm": 3.6430864334106445, "learning_rate": 8.318617284828056e-06, "loss": 0.3409, "step": 28135 }, { "epoch": 4.592955389575936, "grad_norm": 3.97037410736084, "learning_rate": 8.317985274586143e-06, "loss": 0.4134, "step": 28136 }, { "epoch": 4.59311864821844, "grad_norm": 3.1601622104644775, "learning_rate": 8.317353271258358e-06, "loss": 0.4128, "step": 28137 }, { "epoch": 4.593281906860945, "grad_norm": 4.3239569664001465, "learning_rate": 8.316721274847297e-06, "loss": 0.4746, "step": 28138 }, { "epoch": 4.593445165503449, "grad_norm": 2.8837971687316895, "learning_rate": 8.316089285355559e-06, "loss": 0.3485, "step": 28139 }, { "epoch": 4.593608424145954, "grad_norm": 3.9528067111968994, "learning_rate": 8.315457302785743e-06, "loss": 0.3925, "step": 28140 }, { "epoch": 4.593771682788457, "grad_norm": 3.445556879043579, "learning_rate": 8.314825327140444e-06, "loss": 0.3946, "step": 28141 }, { "epoch": 4.5939349414309625, "grad_norm": 4.227770805358887, "learning_rate": 8.314193358422263e-06, "loss": 0.4343, "step": 28142 }, { "epoch": 4.594098200073466, "grad_norm": 3.06243634223938, "learning_rate": 8.313561396633795e-06, "loss": 0.3119, "step": 28143 }, { "epoch": 4.5942614587159705, "grad_norm": 3.388929843902588, "learning_rate": 8.312929441777642e-06, "loss": 0.352, "step": 28144 }, { "epoch": 4.594424717358475, "grad_norm": 3.7716922760009766, "learning_rate": 8.312297493856399e-06, "loss": 0.3654, "step": 28145 }, { "epoch": 4.594587976000979, "grad_norm": 4.1292724609375, "learning_rate": 8.311665552872662e-06, "loss": 0.4112, "step": 28146 }, { "epoch": 4.594751234643484, "grad_norm": 4.249782562255859, "learning_rate": 8.311033618829033e-06, "loss": 0.4106, "step": 28147 }, { "epoch": 4.594914493285988, "grad_norm": 3.7178499698638916, "learning_rate": 8.310401691728105e-06, "loss": 0.3653, "step": 28148 }, { "epoch": 4.595077751928493, "grad_norm": 3.8813681602478027, "learning_rate": 8.309769771572478e-06, "loss": 0.416, "step": 28149 }, { "epoch": 4.595241010570997, "grad_norm": 4.482821464538574, "learning_rate": 8.309137858364748e-06, "loss": 0.4622, "step": 28150 }, { "epoch": 4.5954042692135015, "grad_norm": 2.9730637073516846, "learning_rate": 8.308505952107517e-06, "loss": 0.3173, "step": 28151 }, { "epoch": 4.595567527856006, "grad_norm": 2.857455253601074, "learning_rate": 8.307874052803376e-06, "loss": 0.2951, "step": 28152 }, { "epoch": 4.59573078649851, "grad_norm": 3.828070640563965, "learning_rate": 8.307242160454924e-06, "loss": 0.3821, "step": 28153 }, { "epoch": 4.595894045141015, "grad_norm": 4.081684589385986, "learning_rate": 8.306610275064764e-06, "loss": 0.3717, "step": 28154 }, { "epoch": 4.596057303783519, "grad_norm": 3.071483612060547, "learning_rate": 8.30597839663549e-06, "loss": 0.3894, "step": 28155 }, { "epoch": 4.596220562426024, "grad_norm": 3.9661262035369873, "learning_rate": 8.305346525169698e-06, "loss": 0.3836, "step": 28156 }, { "epoch": 4.596383821068528, "grad_norm": 3.288891077041626, "learning_rate": 8.304714660669988e-06, "loss": 0.3416, "step": 28157 }, { "epoch": 4.596547079711032, "grad_norm": 3.3025126457214355, "learning_rate": 8.304082803138956e-06, "loss": 0.3642, "step": 28158 }, { "epoch": 4.596710338353537, "grad_norm": 3.4305145740509033, "learning_rate": 8.303450952579198e-06, "loss": 0.3852, "step": 28159 }, { "epoch": 4.596873596996041, "grad_norm": 4.278336524963379, "learning_rate": 8.302819108993311e-06, "loss": 0.4094, "step": 28160 }, { "epoch": 4.597036855638545, "grad_norm": 3.3548693656921387, "learning_rate": 8.302187272383898e-06, "loss": 0.3888, "step": 28161 }, { "epoch": 4.5972001142810495, "grad_norm": 3.4324872493743896, "learning_rate": 8.301555442753548e-06, "loss": 0.3652, "step": 28162 }, { "epoch": 4.597363372923554, "grad_norm": 3.637037515640259, "learning_rate": 8.300923620104864e-06, "loss": 0.3817, "step": 28163 }, { "epoch": 4.597526631566058, "grad_norm": 3.9204795360565186, "learning_rate": 8.300291804440437e-06, "loss": 0.3809, "step": 28164 }, { "epoch": 4.597689890208563, "grad_norm": 3.744340419769287, "learning_rate": 8.299659995762874e-06, "loss": 0.4075, "step": 28165 }, { "epoch": 4.597853148851067, "grad_norm": 3.5931386947631836, "learning_rate": 8.299028194074765e-06, "loss": 0.3874, "step": 28166 }, { "epoch": 4.598016407493572, "grad_norm": 3.5476083755493164, "learning_rate": 8.298396399378714e-06, "loss": 0.4211, "step": 28167 }, { "epoch": 4.598179666136076, "grad_norm": 4.182292461395264, "learning_rate": 8.297764611677306e-06, "loss": 0.3512, "step": 28168 }, { "epoch": 4.5983429247785805, "grad_norm": 2.9454219341278076, "learning_rate": 8.297132830973146e-06, "loss": 0.3282, "step": 28169 }, { "epoch": 4.598506183421085, "grad_norm": 3.384821653366089, "learning_rate": 8.296501057268831e-06, "loss": 0.3886, "step": 28170 }, { "epoch": 4.598669442063589, "grad_norm": 3.9565134048461914, "learning_rate": 8.295869290566958e-06, "loss": 0.3919, "step": 28171 }, { "epoch": 4.598832700706094, "grad_norm": 3.7976815700531006, "learning_rate": 8.295237530870122e-06, "loss": 0.3858, "step": 28172 }, { "epoch": 4.598995959348598, "grad_norm": 3.45754075050354, "learning_rate": 8.29460577818092e-06, "loss": 0.346, "step": 28173 }, { "epoch": 4.599159217991103, "grad_norm": 4.11091423034668, "learning_rate": 8.29397403250195e-06, "loss": 0.443, "step": 28174 }, { "epoch": 4.599322476633607, "grad_norm": 2.943509101867676, "learning_rate": 8.29334229383581e-06, "loss": 0.3366, "step": 28175 }, { "epoch": 4.599485735276112, "grad_norm": 3.7256884574890137, "learning_rate": 8.292710562185091e-06, "loss": 0.3624, "step": 28176 }, { "epoch": 4.599648993918615, "grad_norm": 3.3515536785125732, "learning_rate": 8.292078837552402e-06, "loss": 0.3157, "step": 28177 }, { "epoch": 4.59981225256112, "grad_norm": 4.0131402015686035, "learning_rate": 8.291447119940326e-06, "loss": 0.3978, "step": 28178 }, { "epoch": 4.599975511203624, "grad_norm": 3.4983327388763428, "learning_rate": 8.290815409351468e-06, "loss": 0.3689, "step": 28179 }, { "epoch": 4.6001387698461285, "grad_norm": 3.5378644466400146, "learning_rate": 8.290183705788419e-06, "loss": 0.4623, "step": 28180 }, { "epoch": 4.600302028488633, "grad_norm": 3.5268001556396484, "learning_rate": 8.289552009253782e-06, "loss": 0.3725, "step": 28181 }, { "epoch": 4.600465287131137, "grad_norm": 3.7298319339752197, "learning_rate": 8.288920319750151e-06, "loss": 0.3519, "step": 28182 }, { "epoch": 4.600628545773642, "grad_norm": 3.7156805992126465, "learning_rate": 8.288288637280122e-06, "loss": 0.4058, "step": 28183 }, { "epoch": 4.600791804416146, "grad_norm": 3.7291176319122314, "learning_rate": 8.287656961846291e-06, "loss": 0.4251, "step": 28184 }, { "epoch": 4.600955063058651, "grad_norm": 2.707346200942993, "learning_rate": 8.287025293451258e-06, "loss": 0.3055, "step": 28185 }, { "epoch": 4.601118321701155, "grad_norm": 3.538433313369751, "learning_rate": 8.286393632097619e-06, "loss": 0.364, "step": 28186 }, { "epoch": 4.6012815803436595, "grad_norm": 3.309678554534912, "learning_rate": 8.285761977787964e-06, "loss": 0.3563, "step": 28187 }, { "epoch": 4.601444838986164, "grad_norm": 4.28397798538208, "learning_rate": 8.285130330524897e-06, "loss": 0.3722, "step": 28188 }, { "epoch": 4.601608097628668, "grad_norm": 3.0951900482177734, "learning_rate": 8.284498690311011e-06, "loss": 0.3406, "step": 28189 }, { "epoch": 4.601771356271173, "grad_norm": 3.580298900604248, "learning_rate": 8.283867057148901e-06, "loss": 0.3677, "step": 28190 }, { "epoch": 4.601934614913677, "grad_norm": 3.465895652770996, "learning_rate": 8.283235431041167e-06, "loss": 0.3823, "step": 28191 }, { "epoch": 4.602097873556182, "grad_norm": 3.2164125442504883, "learning_rate": 8.282603811990404e-06, "loss": 0.3329, "step": 28192 }, { "epoch": 4.602261132198686, "grad_norm": 3.7169859409332275, "learning_rate": 8.281972199999208e-06, "loss": 0.3768, "step": 28193 }, { "epoch": 4.60242439084119, "grad_norm": 3.5999667644500732, "learning_rate": 8.281340595070176e-06, "loss": 0.3759, "step": 28194 }, { "epoch": 4.602587649483695, "grad_norm": 3.264704465866089, "learning_rate": 8.280708997205904e-06, "loss": 0.3407, "step": 28195 }, { "epoch": 4.602750908126199, "grad_norm": 3.1397674083709717, "learning_rate": 8.28007740640899e-06, "loss": 0.3693, "step": 28196 }, { "epoch": 4.602914166768703, "grad_norm": 2.979701042175293, "learning_rate": 8.279445822682028e-06, "loss": 0.3483, "step": 28197 }, { "epoch": 4.6030774254112075, "grad_norm": 3.845717668533325, "learning_rate": 8.278814246027612e-06, "loss": 0.3754, "step": 28198 }, { "epoch": 4.603240684053712, "grad_norm": 3.291881561279297, "learning_rate": 8.278182676448342e-06, "loss": 0.3509, "step": 28199 }, { "epoch": 4.603403942696216, "grad_norm": 3.463484525680542, "learning_rate": 8.277551113946812e-06, "loss": 0.3927, "step": 28200 }, { "epoch": 4.603567201338721, "grad_norm": 3.6764519214630127, "learning_rate": 8.27691955852562e-06, "loss": 0.3803, "step": 28201 }, { "epoch": 4.603730459981225, "grad_norm": 3.515408992767334, "learning_rate": 8.276288010187361e-06, "loss": 0.3628, "step": 28202 }, { "epoch": 4.60389371862373, "grad_norm": 3.2165324687957764, "learning_rate": 8.275656468934628e-06, "loss": 0.362, "step": 28203 }, { "epoch": 4.604056977266234, "grad_norm": 3.5910587310791016, "learning_rate": 8.275024934770023e-06, "loss": 0.3517, "step": 28204 }, { "epoch": 4.6042202359087385, "grad_norm": 3.868237257003784, "learning_rate": 8.27439340769614e-06, "loss": 0.4203, "step": 28205 }, { "epoch": 4.604383494551243, "grad_norm": 3.9070985317230225, "learning_rate": 8.273761887715577e-06, "loss": 0.4236, "step": 28206 }, { "epoch": 4.604546753193747, "grad_norm": 3.543044090270996, "learning_rate": 8.273130374830921e-06, "loss": 0.3625, "step": 28207 }, { "epoch": 4.604710011836252, "grad_norm": 3.6175618171691895, "learning_rate": 8.272498869044776e-06, "loss": 0.3516, "step": 28208 }, { "epoch": 4.604873270478756, "grad_norm": 3.2815961837768555, "learning_rate": 8.271867370359736e-06, "loss": 0.3484, "step": 28209 }, { "epoch": 4.605036529121261, "grad_norm": 3.3558061122894287, "learning_rate": 8.271235878778396e-06, "loss": 0.3813, "step": 28210 }, { "epoch": 4.605199787763764, "grad_norm": 3.548480272293091, "learning_rate": 8.270604394303354e-06, "loss": 0.3029, "step": 28211 }, { "epoch": 4.60536304640627, "grad_norm": 3.7668309211730957, "learning_rate": 8.269972916937203e-06, "loss": 0.421, "step": 28212 }, { "epoch": 4.605526305048773, "grad_norm": 4.166652679443359, "learning_rate": 8.269341446682541e-06, "loss": 0.4206, "step": 28213 }, { "epoch": 4.605689563691278, "grad_norm": 3.902616500854492, "learning_rate": 8.26870998354196e-06, "loss": 0.3928, "step": 28214 }, { "epoch": 4.605852822333782, "grad_norm": 3.344428062438965, "learning_rate": 8.268078527518062e-06, "loss": 0.3449, "step": 28215 }, { "epoch": 4.6060160809762865, "grad_norm": 3.340975522994995, "learning_rate": 8.267447078613441e-06, "loss": 0.397, "step": 28216 }, { "epoch": 4.606179339618791, "grad_norm": 4.589015483856201, "learning_rate": 8.26681563683069e-06, "loss": 0.418, "step": 28217 }, { "epoch": 4.606342598261295, "grad_norm": 3.9163825511932373, "learning_rate": 8.2661842021724e-06, "loss": 0.4129, "step": 28218 }, { "epoch": 4.6065058569038, "grad_norm": 4.653299331665039, "learning_rate": 8.265552774641176e-06, "loss": 0.4937, "step": 28219 }, { "epoch": 4.606669115546304, "grad_norm": 3.5485949516296387, "learning_rate": 8.26492135423961e-06, "loss": 0.3498, "step": 28220 }, { "epoch": 4.606832374188809, "grad_norm": 3.9466490745544434, "learning_rate": 8.264289940970297e-06, "loss": 0.4234, "step": 28221 }, { "epoch": 4.606995632831313, "grad_norm": 3.0259897708892822, "learning_rate": 8.263658534835833e-06, "loss": 0.3394, "step": 28222 }, { "epoch": 4.6071588914738175, "grad_norm": 3.4643783569335938, "learning_rate": 8.26302713583881e-06, "loss": 0.4027, "step": 28223 }, { "epoch": 4.607322150116322, "grad_norm": 3.4683594703674316, "learning_rate": 8.26239574398183e-06, "loss": 0.3399, "step": 28224 }, { "epoch": 4.607485408758826, "grad_norm": 4.336164474487305, "learning_rate": 8.261764359267486e-06, "loss": 0.4169, "step": 28225 }, { "epoch": 4.607648667401331, "grad_norm": 4.122434139251709, "learning_rate": 8.26113298169837e-06, "loss": 0.4337, "step": 28226 }, { "epoch": 4.607811926043835, "grad_norm": 3.3096923828125, "learning_rate": 8.26050161127708e-06, "loss": 0.3743, "step": 28227 }, { "epoch": 4.60797518468634, "grad_norm": 3.7563865184783936, "learning_rate": 8.25987024800621e-06, "loss": 0.4467, "step": 28228 }, { "epoch": 4.608138443328844, "grad_norm": 2.860389471054077, "learning_rate": 8.259238891888357e-06, "loss": 0.3395, "step": 28229 }, { "epoch": 4.608301701971348, "grad_norm": 3.353705406188965, "learning_rate": 8.258607542926115e-06, "loss": 0.3475, "step": 28230 }, { "epoch": 4.608464960613852, "grad_norm": 3.159881830215454, "learning_rate": 8.25797620112208e-06, "loss": 0.3722, "step": 28231 }, { "epoch": 4.608628219256357, "grad_norm": 3.9732229709625244, "learning_rate": 8.257344866478849e-06, "loss": 0.4004, "step": 28232 }, { "epoch": 4.608791477898861, "grad_norm": 3.1889379024505615, "learning_rate": 8.256713538999013e-06, "loss": 0.3047, "step": 28233 }, { "epoch": 4.6089547365413654, "grad_norm": 3.603151798248291, "learning_rate": 8.25608221868517e-06, "loss": 0.4002, "step": 28234 }, { "epoch": 4.60911799518387, "grad_norm": 3.2652721405029297, "learning_rate": 8.25545090553992e-06, "loss": 0.33, "step": 28235 }, { "epoch": 4.609281253826374, "grad_norm": 4.020932197570801, "learning_rate": 8.254819599565846e-06, "loss": 0.4085, "step": 28236 }, { "epoch": 4.609444512468879, "grad_norm": 3.9504101276397705, "learning_rate": 8.254188300765552e-06, "loss": 0.337, "step": 28237 }, { "epoch": 4.609607771111383, "grad_norm": 4.19428014755249, "learning_rate": 8.25355700914163e-06, "loss": 0.414, "step": 28238 }, { "epoch": 4.609771029753888, "grad_norm": 2.9736785888671875, "learning_rate": 8.252925724696675e-06, "loss": 0.3434, "step": 28239 }, { "epoch": 4.609934288396392, "grad_norm": 3.896726608276367, "learning_rate": 8.252294447433283e-06, "loss": 0.4238, "step": 28240 }, { "epoch": 4.6100975470388965, "grad_norm": 3.5486888885498047, "learning_rate": 8.251663177354047e-06, "loss": 0.3781, "step": 28241 }, { "epoch": 4.610260805681401, "grad_norm": 3.4121131896972656, "learning_rate": 8.251031914461565e-06, "loss": 0.3628, "step": 28242 }, { "epoch": 4.610424064323905, "grad_norm": 3.9035608768463135, "learning_rate": 8.250400658758431e-06, "loss": 0.3937, "step": 28243 }, { "epoch": 4.61058732296641, "grad_norm": 4.291244029998779, "learning_rate": 8.249769410247239e-06, "loss": 0.3968, "step": 28244 }, { "epoch": 4.610750581608914, "grad_norm": 2.8568108081817627, "learning_rate": 8.249138168930587e-06, "loss": 0.3182, "step": 28245 }, { "epoch": 4.610913840251419, "grad_norm": 3.870018243789673, "learning_rate": 8.248506934811064e-06, "loss": 0.4136, "step": 28246 }, { "epoch": 4.611077098893922, "grad_norm": 4.093601226806641, "learning_rate": 8.247875707891268e-06, "loss": 0.4057, "step": 28247 }, { "epoch": 4.611240357536428, "grad_norm": 5.2134785652160645, "learning_rate": 8.247244488173792e-06, "loss": 0.3906, "step": 28248 }, { "epoch": 4.611403616178931, "grad_norm": 2.9856150150299072, "learning_rate": 8.246613275661234e-06, "loss": 0.3517, "step": 28249 }, { "epoch": 4.611566874821436, "grad_norm": 3.2531960010528564, "learning_rate": 8.245982070356186e-06, "loss": 0.3279, "step": 28250 }, { "epoch": 4.61173013346394, "grad_norm": 3.49625301361084, "learning_rate": 8.245350872261243e-06, "loss": 0.3188, "step": 28251 }, { "epoch": 4.6118933921064444, "grad_norm": 2.9023125171661377, "learning_rate": 8.244719681378998e-06, "loss": 0.2966, "step": 28252 }, { "epoch": 4.612056650748949, "grad_norm": 4.909613609313965, "learning_rate": 8.24408849771205e-06, "loss": 0.4108, "step": 28253 }, { "epoch": 4.612219909391453, "grad_norm": 2.961369752883911, "learning_rate": 8.24345732126299e-06, "loss": 0.3268, "step": 28254 }, { "epoch": 4.612383168033958, "grad_norm": 3.3437278270721436, "learning_rate": 8.24282615203442e-06, "loss": 0.3655, "step": 28255 }, { "epoch": 4.612546426676462, "grad_norm": 3.7164225578308105, "learning_rate": 8.242194990028921e-06, "loss": 0.3373, "step": 28256 }, { "epoch": 4.612709685318967, "grad_norm": 3.70578932762146, "learning_rate": 8.241563835249096e-06, "loss": 0.3911, "step": 28257 }, { "epoch": 4.612872943961471, "grad_norm": 2.9668524265289307, "learning_rate": 8.240932687697537e-06, "loss": 0.3292, "step": 28258 }, { "epoch": 4.6130362026039755, "grad_norm": 3.210314989089966, "learning_rate": 8.240301547376844e-06, "loss": 0.3714, "step": 28259 }, { "epoch": 4.61319946124648, "grad_norm": 4.08705472946167, "learning_rate": 8.239670414289603e-06, "loss": 0.3689, "step": 28260 }, { "epoch": 4.613362719888984, "grad_norm": 3.2787539958953857, "learning_rate": 8.239039288438414e-06, "loss": 0.357, "step": 28261 }, { "epoch": 4.613525978531489, "grad_norm": 4.177393436431885, "learning_rate": 8.238408169825868e-06, "loss": 0.3528, "step": 28262 }, { "epoch": 4.613689237173993, "grad_norm": 3.2875306606292725, "learning_rate": 8.237777058454562e-06, "loss": 0.3888, "step": 28263 }, { "epoch": 4.613852495816497, "grad_norm": 3.5057945251464844, "learning_rate": 8.237145954327088e-06, "loss": 0.3552, "step": 28264 }, { "epoch": 4.614015754459002, "grad_norm": 3.3969309329986572, "learning_rate": 8.236514857446045e-06, "loss": 0.3273, "step": 28265 }, { "epoch": 4.614179013101506, "grad_norm": 3.465461254119873, "learning_rate": 8.235883767814022e-06, "loss": 0.3671, "step": 28266 }, { "epoch": 4.61434227174401, "grad_norm": 3.314401626586914, "learning_rate": 8.235252685433612e-06, "loss": 0.4329, "step": 28267 }, { "epoch": 4.614505530386515, "grad_norm": 4.084967136383057, "learning_rate": 8.234621610307411e-06, "loss": 0.3758, "step": 28268 }, { "epoch": 4.614668789029019, "grad_norm": 4.465610027313232, "learning_rate": 8.233990542438018e-06, "loss": 0.4457, "step": 28269 }, { "epoch": 4.614832047671523, "grad_norm": 4.017642974853516, "learning_rate": 8.23335948182802e-06, "loss": 0.3462, "step": 28270 }, { "epoch": 4.614995306314028, "grad_norm": 3.5929133892059326, "learning_rate": 8.232728428480015e-06, "loss": 0.3972, "step": 28271 }, { "epoch": 4.615158564956532, "grad_norm": 3.222252368927002, "learning_rate": 8.232097382396597e-06, "loss": 0.3659, "step": 28272 }, { "epoch": 4.615321823599037, "grad_norm": 3.7089250087738037, "learning_rate": 8.23146634358036e-06, "loss": 0.3474, "step": 28273 }, { "epoch": 4.615485082241541, "grad_norm": 3.770540714263916, "learning_rate": 8.230835312033897e-06, "loss": 0.3315, "step": 28274 }, { "epoch": 4.615648340884046, "grad_norm": 3.7742772102355957, "learning_rate": 8.2302042877598e-06, "loss": 0.3783, "step": 28275 }, { "epoch": 4.61581159952655, "grad_norm": 3.444251537322998, "learning_rate": 8.229573270760666e-06, "loss": 0.4098, "step": 28276 }, { "epoch": 4.6159748581690545, "grad_norm": 3.472266435623169, "learning_rate": 8.228942261039088e-06, "loss": 0.3565, "step": 28277 }, { "epoch": 4.616138116811559, "grad_norm": 3.877352476119995, "learning_rate": 8.228311258597658e-06, "loss": 0.3689, "step": 28278 }, { "epoch": 4.616301375454063, "grad_norm": 4.288937568664551, "learning_rate": 8.22768026343897e-06, "loss": 0.4257, "step": 28279 }, { "epoch": 4.616464634096568, "grad_norm": 3.874889850616455, "learning_rate": 8.227049275565623e-06, "loss": 0.3962, "step": 28280 }, { "epoch": 4.616627892739072, "grad_norm": 3.8382911682128906, "learning_rate": 8.226418294980205e-06, "loss": 0.437, "step": 28281 }, { "epoch": 4.616791151381577, "grad_norm": 3.5904757976531982, "learning_rate": 8.225787321685312e-06, "loss": 0.314, "step": 28282 }, { "epoch": 4.61695441002408, "grad_norm": 3.394235372543335, "learning_rate": 8.225156355683538e-06, "loss": 0.3568, "step": 28283 }, { "epoch": 4.617117668666585, "grad_norm": 3.882479429244995, "learning_rate": 8.224525396977479e-06, "loss": 0.4127, "step": 28284 }, { "epoch": 4.617280927309089, "grad_norm": 3.873342752456665, "learning_rate": 8.223894445569723e-06, "loss": 0.3323, "step": 28285 }, { "epoch": 4.617444185951594, "grad_norm": 2.6051478385925293, "learning_rate": 8.223263501462865e-06, "loss": 0.2868, "step": 28286 }, { "epoch": 4.617607444594098, "grad_norm": 3.289144515991211, "learning_rate": 8.2226325646595e-06, "loss": 0.3552, "step": 28287 }, { "epoch": 4.617770703236602, "grad_norm": 3.2231147289276123, "learning_rate": 8.222001635162222e-06, "loss": 0.3836, "step": 28288 }, { "epoch": 4.617933961879107, "grad_norm": 2.9348134994506836, "learning_rate": 8.221370712973624e-06, "loss": 0.336, "step": 28289 }, { "epoch": 4.618097220521611, "grad_norm": 3.813976526260376, "learning_rate": 8.2207397980963e-06, "loss": 0.446, "step": 28290 }, { "epoch": 4.618260479164116, "grad_norm": 3.096106767654419, "learning_rate": 8.220108890532839e-06, "loss": 0.3339, "step": 28291 }, { "epoch": 4.61842373780662, "grad_norm": 3.2067766189575195, "learning_rate": 8.219477990285843e-06, "loss": 0.3468, "step": 28292 }, { "epoch": 4.618586996449125, "grad_norm": 3.7662432193756104, "learning_rate": 8.218847097357898e-06, "loss": 0.3804, "step": 28293 }, { "epoch": 4.618750255091629, "grad_norm": 3.531888008117676, "learning_rate": 8.218216211751607e-06, "loss": 0.3736, "step": 28294 }, { "epoch": 4.6189135137341335, "grad_norm": 2.993032217025757, "learning_rate": 8.217585333469546e-06, "loss": 0.308, "step": 28295 }, { "epoch": 4.619076772376638, "grad_norm": 4.793339729309082, "learning_rate": 8.216954462514323e-06, "loss": 0.4179, "step": 28296 }, { "epoch": 4.619240031019142, "grad_norm": 3.5858981609344482, "learning_rate": 8.216323598888529e-06, "loss": 0.3759, "step": 28297 }, { "epoch": 4.619403289661647, "grad_norm": 4.0642499923706055, "learning_rate": 8.215692742594754e-06, "loss": 0.3993, "step": 28298 }, { "epoch": 4.619566548304151, "grad_norm": 3.6360549926757812, "learning_rate": 8.21506189363559e-06, "loss": 0.3636, "step": 28299 }, { "epoch": 4.619729806946655, "grad_norm": 4.062929630279541, "learning_rate": 8.214431052013636e-06, "loss": 0.4153, "step": 28300 }, { "epoch": 4.619893065589159, "grad_norm": 3.336866617202759, "learning_rate": 8.213800217731478e-06, "loss": 0.3377, "step": 28301 }, { "epoch": 4.620056324231664, "grad_norm": 4.277591705322266, "learning_rate": 8.213169390791713e-06, "loss": 0.5728, "step": 28302 }, { "epoch": 4.620219582874168, "grad_norm": 3.7300264835357666, "learning_rate": 8.212538571196936e-06, "loss": 0.3795, "step": 28303 }, { "epoch": 4.620382841516673, "grad_norm": 2.8219480514526367, "learning_rate": 8.211907758949741e-06, "loss": 0.3262, "step": 28304 }, { "epoch": 4.620546100159177, "grad_norm": 3.765533208847046, "learning_rate": 8.211276954052716e-06, "loss": 0.3847, "step": 28305 }, { "epoch": 4.620709358801681, "grad_norm": 3.834747791290283, "learning_rate": 8.210646156508453e-06, "loss": 0.403, "step": 28306 }, { "epoch": 4.620872617444186, "grad_norm": 3.4576456546783447, "learning_rate": 8.210015366319551e-06, "loss": 0.406, "step": 28307 }, { "epoch": 4.62103587608669, "grad_norm": 3.7114555835723877, "learning_rate": 8.209384583488598e-06, "loss": 0.4152, "step": 28308 }, { "epoch": 4.621199134729195, "grad_norm": 3.2691516876220703, "learning_rate": 8.20875380801819e-06, "loss": 0.3518, "step": 28309 }, { "epoch": 4.621362393371699, "grad_norm": 3.7244598865509033, "learning_rate": 8.208123039910918e-06, "loss": 0.3693, "step": 28310 }, { "epoch": 4.621525652014204, "grad_norm": 3.2011451721191406, "learning_rate": 8.207492279169376e-06, "loss": 0.3186, "step": 28311 }, { "epoch": 4.621688910656708, "grad_norm": 3.6241729259490967, "learning_rate": 8.206861525796155e-06, "loss": 0.416, "step": 28312 }, { "epoch": 4.6218521692992125, "grad_norm": 3.2535154819488525, "learning_rate": 8.206230779793854e-06, "loss": 0.3599, "step": 28313 }, { "epoch": 4.622015427941717, "grad_norm": 4.377382755279541, "learning_rate": 8.205600041165057e-06, "loss": 0.4032, "step": 28314 }, { "epoch": 4.622178686584221, "grad_norm": 2.9988012313842773, "learning_rate": 8.204969309912361e-06, "loss": 0.3597, "step": 28315 }, { "epoch": 4.622341945226726, "grad_norm": 2.918680191040039, "learning_rate": 8.20433858603836e-06, "loss": 0.3639, "step": 28316 }, { "epoch": 4.622505203869229, "grad_norm": 3.8931005001068115, "learning_rate": 8.203707869545643e-06, "loss": 0.4274, "step": 28317 }, { "epoch": 4.622668462511735, "grad_norm": 3.5270676612854004, "learning_rate": 8.203077160436803e-06, "loss": 0.3852, "step": 28318 }, { "epoch": 4.622831721154238, "grad_norm": 4.003873825073242, "learning_rate": 8.202446458714437e-06, "loss": 0.462, "step": 28319 }, { "epoch": 4.622994979796743, "grad_norm": 3.314936399459839, "learning_rate": 8.201815764381133e-06, "loss": 0.383, "step": 28320 }, { "epoch": 4.623158238439247, "grad_norm": 2.9402825832366943, "learning_rate": 8.201185077439488e-06, "loss": 0.3656, "step": 28321 }, { "epoch": 4.6233214970817516, "grad_norm": 3.8354508876800537, "learning_rate": 8.20055439789209e-06, "loss": 0.4661, "step": 28322 }, { "epoch": 4.623484755724256, "grad_norm": 3.1811580657958984, "learning_rate": 8.199923725741536e-06, "loss": 0.3577, "step": 28323 }, { "epoch": 4.62364801436676, "grad_norm": 3.6415741443634033, "learning_rate": 8.199293060990415e-06, "loss": 0.3429, "step": 28324 }, { "epoch": 4.623811273009265, "grad_norm": 3.632497549057007, "learning_rate": 8.19866240364132e-06, "loss": 0.3644, "step": 28325 }, { "epoch": 4.623974531651769, "grad_norm": 3.5734870433807373, "learning_rate": 8.198031753696842e-06, "loss": 0.3715, "step": 28326 }, { "epoch": 4.624137790294274, "grad_norm": 3.72072172164917, "learning_rate": 8.197401111159576e-06, "loss": 0.3511, "step": 28327 }, { "epoch": 4.624301048936778, "grad_norm": 4.006687641143799, "learning_rate": 8.196770476032114e-06, "loss": 0.4217, "step": 28328 }, { "epoch": 4.624464307579283, "grad_norm": 3.7570698261260986, "learning_rate": 8.196139848317045e-06, "loss": 0.3862, "step": 28329 }, { "epoch": 4.624627566221787, "grad_norm": 3.7006301879882812, "learning_rate": 8.195509228016967e-06, "loss": 0.3838, "step": 28330 }, { "epoch": 4.6247908248642915, "grad_norm": 3.4209728240966797, "learning_rate": 8.194878615134467e-06, "loss": 0.3546, "step": 28331 }, { "epoch": 4.624954083506796, "grad_norm": 2.890974998474121, "learning_rate": 8.19424800967214e-06, "loss": 0.3623, "step": 28332 }, { "epoch": 4.6251173421493, "grad_norm": 3.2250030040740967, "learning_rate": 8.193617411632582e-06, "loss": 0.3517, "step": 28333 }, { "epoch": 4.625280600791804, "grad_norm": 4.832883358001709, "learning_rate": 8.192986821018376e-06, "loss": 0.523, "step": 28334 }, { "epoch": 4.625443859434309, "grad_norm": 4.110688209533691, "learning_rate": 8.192356237832121e-06, "loss": 0.4779, "step": 28335 }, { "epoch": 4.625607118076813, "grad_norm": 3.2531323432922363, "learning_rate": 8.191725662076405e-06, "loss": 0.3492, "step": 28336 }, { "epoch": 4.625770376719317, "grad_norm": 3.849670886993408, "learning_rate": 8.191095093753823e-06, "loss": 0.3777, "step": 28337 }, { "epoch": 4.625933635361822, "grad_norm": 3.650414228439331, "learning_rate": 8.190464532866966e-06, "loss": 0.4452, "step": 28338 }, { "epoch": 4.626096894004326, "grad_norm": 3.7028801441192627, "learning_rate": 8.189833979418426e-06, "loss": 0.3755, "step": 28339 }, { "epoch": 4.6262601526468305, "grad_norm": 3.8703441619873047, "learning_rate": 8.189203433410795e-06, "loss": 0.3737, "step": 28340 }, { "epoch": 4.626423411289335, "grad_norm": 3.5207347869873047, "learning_rate": 8.188572894846663e-06, "loss": 0.368, "step": 28341 }, { "epoch": 4.626586669931839, "grad_norm": 3.4329781532287598, "learning_rate": 8.187942363728626e-06, "loss": 0.3796, "step": 28342 }, { "epoch": 4.626749928574344, "grad_norm": 3.683026075363159, "learning_rate": 8.187311840059278e-06, "loss": 0.4481, "step": 28343 }, { "epoch": 4.626913187216848, "grad_norm": 3.2635657787323, "learning_rate": 8.186681323841201e-06, "loss": 0.368, "step": 28344 }, { "epoch": 4.627076445859353, "grad_norm": 3.7739155292510986, "learning_rate": 8.18605081507699e-06, "loss": 0.3982, "step": 28345 }, { "epoch": 4.627239704501857, "grad_norm": 4.09320592880249, "learning_rate": 8.185420313769245e-06, "loss": 0.4873, "step": 28346 }, { "epoch": 4.627402963144362, "grad_norm": 3.7990384101867676, "learning_rate": 8.184789819920549e-06, "loss": 0.4528, "step": 28347 }, { "epoch": 4.627566221786866, "grad_norm": 4.007448673248291, "learning_rate": 8.184159333533497e-06, "loss": 0.3388, "step": 28348 }, { "epoch": 4.6277294804293705, "grad_norm": 3.483732223510742, "learning_rate": 8.18352885461068e-06, "loss": 0.39, "step": 28349 }, { "epoch": 4.627892739071875, "grad_norm": 3.6693499088287354, "learning_rate": 8.182898383154692e-06, "loss": 0.3924, "step": 28350 }, { "epoch": 4.628055997714379, "grad_norm": 3.1387739181518555, "learning_rate": 8.18226791916812e-06, "loss": 0.3525, "step": 28351 }, { "epoch": 4.628219256356884, "grad_norm": 3.634110450744629, "learning_rate": 8.181637462653557e-06, "loss": 0.4036, "step": 28352 }, { "epoch": 4.628382514999387, "grad_norm": 3.440500259399414, "learning_rate": 8.181007013613603e-06, "loss": 0.3763, "step": 28353 }, { "epoch": 4.628545773641892, "grad_norm": 3.2090132236480713, "learning_rate": 8.180376572050836e-06, "loss": 0.3372, "step": 28354 }, { "epoch": 4.628709032284396, "grad_norm": 3.917768716812134, "learning_rate": 8.179746137967857e-06, "loss": 0.4033, "step": 28355 }, { "epoch": 4.628872290926901, "grad_norm": 3.5853230953216553, "learning_rate": 8.17911571136725e-06, "loss": 0.378, "step": 28356 }, { "epoch": 4.629035549569405, "grad_norm": 3.488924741744995, "learning_rate": 8.178485292251612e-06, "loss": 0.3666, "step": 28357 }, { "epoch": 4.6291988082119095, "grad_norm": 3.5664141178131104, "learning_rate": 8.177854880623534e-06, "loss": 0.3877, "step": 28358 }, { "epoch": 4.629362066854414, "grad_norm": 3.34005069732666, "learning_rate": 8.177224476485609e-06, "loss": 0.3694, "step": 28359 }, { "epoch": 4.629525325496918, "grad_norm": 3.5574843883514404, "learning_rate": 8.176594079840424e-06, "loss": 0.3984, "step": 28360 }, { "epoch": 4.629688584139423, "grad_norm": 3.6404480934143066, "learning_rate": 8.175963690690572e-06, "loss": 0.4018, "step": 28361 }, { "epoch": 4.629851842781927, "grad_norm": 3.4555776119232178, "learning_rate": 8.175333309038647e-06, "loss": 0.3673, "step": 28362 }, { "epoch": 4.630015101424432, "grad_norm": 4.210771560668945, "learning_rate": 8.174702934887235e-06, "loss": 0.3833, "step": 28363 }, { "epoch": 4.630178360066936, "grad_norm": 2.7171051502227783, "learning_rate": 8.17407256823893e-06, "loss": 0.328, "step": 28364 }, { "epoch": 4.630341618709441, "grad_norm": 2.8252551555633545, "learning_rate": 8.173442209096324e-06, "loss": 0.3349, "step": 28365 }, { "epoch": 4.630504877351945, "grad_norm": 3.6116037368774414, "learning_rate": 8.172811857462007e-06, "loss": 0.692, "step": 28366 }, { "epoch": 4.6306681359944495, "grad_norm": 3.9246878623962402, "learning_rate": 8.17218151333857e-06, "loss": 0.4561, "step": 28367 }, { "epoch": 4.630831394636954, "grad_norm": 3.3941214084625244, "learning_rate": 8.171551176728604e-06, "loss": 0.3443, "step": 28368 }, { "epoch": 4.630994653279458, "grad_norm": 4.123940944671631, "learning_rate": 8.170920847634701e-06, "loss": 0.4515, "step": 28369 }, { "epoch": 4.631157911921962, "grad_norm": 3.0746893882751465, "learning_rate": 8.170290526059454e-06, "loss": 0.3576, "step": 28370 }, { "epoch": 4.631321170564467, "grad_norm": 4.173401355743408, "learning_rate": 8.16966021200545e-06, "loss": 0.3889, "step": 28371 }, { "epoch": 4.631484429206971, "grad_norm": 3.1264536380767822, "learning_rate": 8.169029905475285e-06, "loss": 0.3225, "step": 28372 }, { "epoch": 4.631647687849475, "grad_norm": 3.033097267150879, "learning_rate": 8.168399606471543e-06, "loss": 0.3654, "step": 28373 }, { "epoch": 4.63181094649198, "grad_norm": 3.5333964824676514, "learning_rate": 8.16776931499682e-06, "loss": 0.3687, "step": 28374 }, { "epoch": 4.631974205134484, "grad_norm": 3.5943000316619873, "learning_rate": 8.167139031053705e-06, "loss": 0.3391, "step": 28375 }, { "epoch": 4.6321374637769885, "grad_norm": 4.040675163269043, "learning_rate": 8.16650875464479e-06, "loss": 0.4543, "step": 28376 }, { "epoch": 4.632300722419493, "grad_norm": 4.068548679351807, "learning_rate": 8.165878485772664e-06, "loss": 0.3803, "step": 28377 }, { "epoch": 4.632463981061997, "grad_norm": 3.375155448913574, "learning_rate": 8.165248224439918e-06, "loss": 0.3179, "step": 28378 }, { "epoch": 4.632627239704502, "grad_norm": 3.5255658626556396, "learning_rate": 8.164617970649145e-06, "loss": 0.3696, "step": 28379 }, { "epoch": 4.632790498347006, "grad_norm": 4.160839557647705, "learning_rate": 8.163987724402934e-06, "loss": 0.4482, "step": 28380 }, { "epoch": 4.632953756989511, "grad_norm": 3.2511954307556152, "learning_rate": 8.163357485703877e-06, "loss": 0.4168, "step": 28381 }, { "epoch": 4.633117015632015, "grad_norm": 3.6469478607177734, "learning_rate": 8.162727254554567e-06, "loss": 0.3551, "step": 28382 }, { "epoch": 4.63328027427452, "grad_norm": 3.064387798309326, "learning_rate": 8.162097030957588e-06, "loss": 0.3464, "step": 28383 }, { "epoch": 4.633443532917024, "grad_norm": 3.7978203296661377, "learning_rate": 8.161466814915535e-06, "loss": 0.4058, "step": 28384 }, { "epoch": 4.6336067915595285, "grad_norm": 3.1471633911132812, "learning_rate": 8.160836606430996e-06, "loss": 0.3842, "step": 28385 }, { "epoch": 4.633770050202033, "grad_norm": 3.3586788177490234, "learning_rate": 8.160206405506564e-06, "loss": 0.3661, "step": 28386 }, { "epoch": 4.6339333088445365, "grad_norm": 3.6101956367492676, "learning_rate": 8.15957621214483e-06, "loss": 0.3712, "step": 28387 }, { "epoch": 4.634096567487042, "grad_norm": 3.3334782123565674, "learning_rate": 8.158946026348383e-06, "loss": 0.3292, "step": 28388 }, { "epoch": 4.634259826129545, "grad_norm": 3.065852403640747, "learning_rate": 8.158315848119813e-06, "loss": 0.3523, "step": 28389 }, { "epoch": 4.63442308477205, "grad_norm": 3.0223493576049805, "learning_rate": 8.157685677461709e-06, "loss": 0.3568, "step": 28390 }, { "epoch": 4.634586343414554, "grad_norm": 3.099109411239624, "learning_rate": 8.157055514376667e-06, "loss": 0.3147, "step": 28391 }, { "epoch": 4.634749602057059, "grad_norm": 3.4967753887176514, "learning_rate": 8.156425358867276e-06, "loss": 0.3477, "step": 28392 }, { "epoch": 4.634912860699563, "grad_norm": 3.6445274353027344, "learning_rate": 8.155795210936122e-06, "loss": 0.3984, "step": 28393 }, { "epoch": 4.6350761193420675, "grad_norm": 3.6863858699798584, "learning_rate": 8.155165070585794e-06, "loss": 0.4015, "step": 28394 }, { "epoch": 4.635239377984572, "grad_norm": 3.3522348403930664, "learning_rate": 8.154534937818888e-06, "loss": 0.3469, "step": 28395 }, { "epoch": 4.635402636627076, "grad_norm": 3.3806393146514893, "learning_rate": 8.153904812637991e-06, "loss": 0.3665, "step": 28396 }, { "epoch": 4.635565895269581, "grad_norm": 3.2301621437072754, "learning_rate": 8.153274695045696e-06, "loss": 0.3338, "step": 28397 }, { "epoch": 4.635729153912085, "grad_norm": 3.6891725063323975, "learning_rate": 8.152644585044592e-06, "loss": 0.4007, "step": 28398 }, { "epoch": 4.63589241255459, "grad_norm": 3.5572338104248047, "learning_rate": 8.152014482637267e-06, "loss": 0.4199, "step": 28399 }, { "epoch": 4.636055671197094, "grad_norm": 3.624354124069214, "learning_rate": 8.151384387826313e-06, "loss": 0.3551, "step": 28400 }, { "epoch": 4.636218929839599, "grad_norm": 3.8259761333465576, "learning_rate": 8.150754300614321e-06, "loss": 0.4016, "step": 28401 }, { "epoch": 4.636382188482103, "grad_norm": 3.8610570430755615, "learning_rate": 8.150124221003882e-06, "loss": 0.3307, "step": 28402 }, { "epoch": 4.6365454471246075, "grad_norm": 3.388974905014038, "learning_rate": 8.14949414899758e-06, "loss": 0.387, "step": 28403 }, { "epoch": 4.636708705767112, "grad_norm": 4.211234092712402, "learning_rate": 8.14886408459801e-06, "loss": 0.4048, "step": 28404 }, { "epoch": 4.636871964409616, "grad_norm": 4.197689056396484, "learning_rate": 8.14823402780776e-06, "loss": 0.419, "step": 28405 }, { "epoch": 4.63703522305212, "grad_norm": 3.1639997959136963, "learning_rate": 8.147603978629419e-06, "loss": 0.3419, "step": 28406 }, { "epoch": 4.637198481694624, "grad_norm": 3.603909969329834, "learning_rate": 8.14697393706558e-06, "loss": 0.3801, "step": 28407 }, { "epoch": 4.637361740337129, "grad_norm": 4.496657371520996, "learning_rate": 8.146343903118832e-06, "loss": 0.482, "step": 28408 }, { "epoch": 4.637524998979633, "grad_norm": 3.7296464443206787, "learning_rate": 8.145713876791764e-06, "loss": 0.3882, "step": 28409 }, { "epoch": 4.637688257622138, "grad_norm": 3.8003695011138916, "learning_rate": 8.145083858086966e-06, "loss": 0.3669, "step": 28410 }, { "epoch": 4.637851516264642, "grad_norm": 3.4802794456481934, "learning_rate": 8.14445384700703e-06, "loss": 0.383, "step": 28411 }, { "epoch": 4.6380147749071465, "grad_norm": 3.175121545791626, "learning_rate": 8.143823843554543e-06, "loss": 0.354, "step": 28412 }, { "epoch": 4.638178033549651, "grad_norm": 4.249218463897705, "learning_rate": 8.143193847732094e-06, "loss": 0.4543, "step": 28413 }, { "epoch": 4.638341292192155, "grad_norm": 3.178443193435669, "learning_rate": 8.142563859542275e-06, "loss": 0.3411, "step": 28414 }, { "epoch": 4.63850455083466, "grad_norm": 3.5378921031951904, "learning_rate": 8.141933878987673e-06, "loss": 0.3443, "step": 28415 }, { "epoch": 4.638667809477164, "grad_norm": 3.8521738052368164, "learning_rate": 8.141303906070879e-06, "loss": 0.353, "step": 28416 }, { "epoch": 4.638831068119669, "grad_norm": 3.023465156555176, "learning_rate": 8.140673940794481e-06, "loss": 0.3413, "step": 28417 }, { "epoch": 4.638994326762173, "grad_norm": 3.321608543395996, "learning_rate": 8.140043983161075e-06, "loss": 0.3527, "step": 28418 }, { "epoch": 4.639157585404678, "grad_norm": 3.481193780899048, "learning_rate": 8.139414033173243e-06, "loss": 0.4391, "step": 28419 }, { "epoch": 4.639320844047182, "grad_norm": 3.5199060440063477, "learning_rate": 8.138784090833577e-06, "loss": 0.4187, "step": 28420 }, { "epoch": 4.6394841026896865, "grad_norm": 3.738539695739746, "learning_rate": 8.138154156144673e-06, "loss": 0.4609, "step": 28421 }, { "epoch": 4.639647361332191, "grad_norm": 3.369079113006592, "learning_rate": 8.137524229109109e-06, "loss": 0.3862, "step": 28422 }, { "epoch": 4.6398106199746945, "grad_norm": 3.3633780479431152, "learning_rate": 8.13689430972948e-06, "loss": 0.3479, "step": 28423 }, { "epoch": 4.6399738786172, "grad_norm": 2.840113639831543, "learning_rate": 8.136264398008375e-06, "loss": 0.2869, "step": 28424 }, { "epoch": 4.640137137259703, "grad_norm": 3.1514394283294678, "learning_rate": 8.135634493948381e-06, "loss": 0.3239, "step": 28425 }, { "epoch": 4.640300395902208, "grad_norm": 3.2334229946136475, "learning_rate": 8.135004597552095e-06, "loss": 0.3653, "step": 28426 }, { "epoch": 4.640463654544712, "grad_norm": 4.286754131317139, "learning_rate": 8.134374708822097e-06, "loss": 0.4226, "step": 28427 }, { "epoch": 4.640626913187217, "grad_norm": 3.9577744007110596, "learning_rate": 8.133744827760981e-06, "loss": 0.3894, "step": 28428 }, { "epoch": 4.640790171829721, "grad_norm": 3.3229639530181885, "learning_rate": 8.133114954371332e-06, "loss": 0.3278, "step": 28429 }, { "epoch": 4.6409534304722255, "grad_norm": 3.3669991493225098, "learning_rate": 8.132485088655746e-06, "loss": 0.332, "step": 28430 }, { "epoch": 4.64111668911473, "grad_norm": 4.058709621429443, "learning_rate": 8.131855230616813e-06, "loss": 0.3876, "step": 28431 }, { "epoch": 4.641279947757234, "grad_norm": 3.5533790588378906, "learning_rate": 8.131225380257113e-06, "loss": 0.3734, "step": 28432 }, { "epoch": 4.641443206399739, "grad_norm": 3.5468227863311768, "learning_rate": 8.130595537579237e-06, "loss": 0.38, "step": 28433 }, { "epoch": 4.641606465042243, "grad_norm": 3.5137596130371094, "learning_rate": 8.129965702585782e-06, "loss": 0.4021, "step": 28434 }, { "epoch": 4.641769723684748, "grad_norm": 3.7342145442962646, "learning_rate": 8.129335875279329e-06, "loss": 0.4111, "step": 28435 }, { "epoch": 4.641932982327252, "grad_norm": 3.9357798099517822, "learning_rate": 8.12870605566247e-06, "loss": 0.4147, "step": 28436 }, { "epoch": 4.642096240969757, "grad_norm": 3.977980613708496, "learning_rate": 8.128076243737795e-06, "loss": 0.3873, "step": 28437 }, { "epoch": 4.642259499612261, "grad_norm": 3.398832082748413, "learning_rate": 8.127446439507892e-06, "loss": 0.3462, "step": 28438 }, { "epoch": 4.6424227582547655, "grad_norm": 3.537825345993042, "learning_rate": 8.126816642975348e-06, "loss": 0.4154, "step": 28439 }, { "epoch": 4.642586016897269, "grad_norm": 3.581509828567505, "learning_rate": 8.126186854142752e-06, "loss": 0.3649, "step": 28440 }, { "epoch": 4.642749275539774, "grad_norm": 4.04749059677124, "learning_rate": 8.125557073012701e-06, "loss": 0.4516, "step": 28441 }, { "epoch": 4.642912534182278, "grad_norm": 3.5153748989105225, "learning_rate": 8.124927299587772e-06, "loss": 0.3429, "step": 28442 }, { "epoch": 4.643075792824782, "grad_norm": 3.704829454421997, "learning_rate": 8.124297533870562e-06, "loss": 0.3756, "step": 28443 }, { "epoch": 4.643239051467287, "grad_norm": 4.155412673950195, "learning_rate": 8.123667775863651e-06, "loss": 0.4653, "step": 28444 }, { "epoch": 4.643402310109791, "grad_norm": 3.2650210857391357, "learning_rate": 8.123038025569637e-06, "loss": 0.2995, "step": 28445 }, { "epoch": 4.643565568752296, "grad_norm": 3.2482898235321045, "learning_rate": 8.122408282991103e-06, "loss": 0.364, "step": 28446 }, { "epoch": 4.6437288273948, "grad_norm": 3.4942467212677, "learning_rate": 8.121778548130642e-06, "loss": 0.3555, "step": 28447 }, { "epoch": 4.6438920860373045, "grad_norm": 3.517876148223877, "learning_rate": 8.121148820990839e-06, "loss": 0.3896, "step": 28448 }, { "epoch": 4.644055344679809, "grad_norm": 3.2972686290740967, "learning_rate": 8.120519101574283e-06, "loss": 0.3881, "step": 28449 }, { "epoch": 4.644218603322313, "grad_norm": 2.8355088233947754, "learning_rate": 8.119889389883567e-06, "loss": 0.3274, "step": 28450 }, { "epoch": 4.644381861964818, "grad_norm": 3.811997890472412, "learning_rate": 8.119259685921272e-06, "loss": 0.4225, "step": 28451 }, { "epoch": 4.644545120607322, "grad_norm": 3.467224359512329, "learning_rate": 8.118629989689992e-06, "loss": 0.3603, "step": 28452 }, { "epoch": 4.644708379249827, "grad_norm": 3.1488595008850098, "learning_rate": 8.118000301192313e-06, "loss": 0.3072, "step": 28453 }, { "epoch": 4.644871637892331, "grad_norm": 3.506504774093628, "learning_rate": 8.117370620430825e-06, "loss": 0.3551, "step": 28454 }, { "epoch": 4.645034896534836, "grad_norm": 3.5183346271514893, "learning_rate": 8.116740947408114e-06, "loss": 0.3516, "step": 28455 }, { "epoch": 4.64519815517734, "grad_norm": 3.8504891395568848, "learning_rate": 8.116111282126768e-06, "loss": 0.3803, "step": 28456 }, { "epoch": 4.6453614138198445, "grad_norm": 3.564249038696289, "learning_rate": 8.11548162458938e-06, "loss": 0.3596, "step": 28457 }, { "epoch": 4.645524672462349, "grad_norm": 3.2216598987579346, "learning_rate": 8.114851974798535e-06, "loss": 0.3664, "step": 28458 }, { "epoch": 4.6456879311048525, "grad_norm": 3.4799325466156006, "learning_rate": 8.114222332756822e-06, "loss": 0.3929, "step": 28459 }, { "epoch": 4.645851189747357, "grad_norm": 3.7740354537963867, "learning_rate": 8.11359269846683e-06, "loss": 0.4435, "step": 28460 }, { "epoch": 4.646014448389861, "grad_norm": 3.4497787952423096, "learning_rate": 8.112963071931146e-06, "loss": 0.3762, "step": 28461 }, { "epoch": 4.646177707032366, "grad_norm": 3.8663136959075928, "learning_rate": 8.112333453152356e-06, "loss": 0.4371, "step": 28462 }, { "epoch": 4.64634096567487, "grad_norm": 3.928199052810669, "learning_rate": 8.111703842133052e-06, "loss": 0.409, "step": 28463 }, { "epoch": 4.646504224317375, "grad_norm": 3.6679513454437256, "learning_rate": 8.11107423887582e-06, "loss": 0.3804, "step": 28464 }, { "epoch": 4.646667482959879, "grad_norm": 2.742129325866699, "learning_rate": 8.110444643383248e-06, "loss": 0.3294, "step": 28465 }, { "epoch": 4.6468307416023835, "grad_norm": 4.07293176651001, "learning_rate": 8.109815055657926e-06, "loss": 0.3765, "step": 28466 }, { "epoch": 4.646994000244888, "grad_norm": 3.6085050106048584, "learning_rate": 8.109185475702439e-06, "loss": 0.3567, "step": 28467 }, { "epoch": 4.647157258887392, "grad_norm": 4.194447040557861, "learning_rate": 8.108555903519376e-06, "loss": 0.4068, "step": 28468 }, { "epoch": 4.647320517529897, "grad_norm": 3.258382558822632, "learning_rate": 8.107926339111328e-06, "loss": 0.3406, "step": 28469 }, { "epoch": 4.647483776172401, "grad_norm": 3.0821590423583984, "learning_rate": 8.107296782480882e-06, "loss": 0.4277, "step": 28470 }, { "epoch": 4.647647034814906, "grad_norm": 3.080277442932129, "learning_rate": 8.106667233630621e-06, "loss": 0.3316, "step": 28471 }, { "epoch": 4.64781029345741, "grad_norm": 4.41802453994751, "learning_rate": 8.106037692563136e-06, "loss": 0.4064, "step": 28472 }, { "epoch": 4.647973552099915, "grad_norm": 4.49538516998291, "learning_rate": 8.105408159281016e-06, "loss": 0.4006, "step": 28473 }, { "epoch": 4.648136810742419, "grad_norm": 3.6050493717193604, "learning_rate": 8.104778633786847e-06, "loss": 0.3616, "step": 28474 }, { "epoch": 4.6483000693849235, "grad_norm": 3.4994454383850098, "learning_rate": 8.104149116083216e-06, "loss": 0.345, "step": 28475 }, { "epoch": 4.648463328027427, "grad_norm": 3.059889078140259, "learning_rate": 8.103519606172713e-06, "loss": 0.3875, "step": 28476 }, { "epoch": 4.648626586669932, "grad_norm": 4.0883283615112305, "learning_rate": 8.102890104057927e-06, "loss": 0.4308, "step": 28477 }, { "epoch": 4.648789845312436, "grad_norm": 3.28662109375, "learning_rate": 8.102260609741438e-06, "loss": 0.3565, "step": 28478 }, { "epoch": 4.64895310395494, "grad_norm": 4.5400519371032715, "learning_rate": 8.101631123225845e-06, "loss": 0.3993, "step": 28479 }, { "epoch": 4.649116362597445, "grad_norm": 3.253058910369873, "learning_rate": 8.101001644513731e-06, "loss": 0.3517, "step": 28480 }, { "epoch": 4.649279621239949, "grad_norm": 3.1898996829986572, "learning_rate": 8.10037217360768e-06, "loss": 0.3829, "step": 28481 }, { "epoch": 4.649442879882454, "grad_norm": 3.623058557510376, "learning_rate": 8.09974271051028e-06, "loss": 0.4178, "step": 28482 }, { "epoch": 4.649606138524958, "grad_norm": 3.6202354431152344, "learning_rate": 8.099113255224118e-06, "loss": 0.3604, "step": 28483 }, { "epoch": 4.6497693971674625, "grad_norm": 3.514254331588745, "learning_rate": 8.098483807751786e-06, "loss": 0.3581, "step": 28484 }, { "epoch": 4.649932655809967, "grad_norm": 3.1800789833068848, "learning_rate": 8.09785436809587e-06, "loss": 0.3769, "step": 28485 }, { "epoch": 4.650095914452471, "grad_norm": 3.9574105739593506, "learning_rate": 8.097224936258956e-06, "loss": 0.37, "step": 28486 }, { "epoch": 4.650259173094976, "grad_norm": 4.696293830871582, "learning_rate": 8.096595512243632e-06, "loss": 0.3406, "step": 28487 }, { "epoch": 4.65042243173748, "grad_norm": 3.320103645324707, "learning_rate": 8.095966096052485e-06, "loss": 0.36, "step": 28488 }, { "epoch": 4.650585690379985, "grad_norm": 3.5656442642211914, "learning_rate": 8.095336687688102e-06, "loss": 0.3789, "step": 28489 }, { "epoch": 4.650748949022489, "grad_norm": 3.587083578109741, "learning_rate": 8.094707287153073e-06, "loss": 0.3831, "step": 28490 }, { "epoch": 4.650912207664994, "grad_norm": 4.0402703285217285, "learning_rate": 8.09407789444998e-06, "loss": 0.3682, "step": 28491 }, { "epoch": 4.651075466307498, "grad_norm": 2.899766206741333, "learning_rate": 8.093448509581414e-06, "loss": 0.2988, "step": 28492 }, { "epoch": 4.651238724950002, "grad_norm": 4.066213130950928, "learning_rate": 8.092819132549962e-06, "loss": 0.3894, "step": 28493 }, { "epoch": 4.651401983592507, "grad_norm": 3.858558416366577, "learning_rate": 8.092189763358206e-06, "loss": 0.5254, "step": 28494 }, { "epoch": 4.6515652422350104, "grad_norm": 3.2475385665893555, "learning_rate": 8.091560402008743e-06, "loss": 0.3446, "step": 28495 }, { "epoch": 4.651728500877515, "grad_norm": 3.4664089679718018, "learning_rate": 8.090931048504153e-06, "loss": 0.3406, "step": 28496 }, { "epoch": 4.651891759520019, "grad_norm": 3.68764328956604, "learning_rate": 8.090301702847024e-06, "loss": 0.341, "step": 28497 }, { "epoch": 4.652055018162524, "grad_norm": 3.507659912109375, "learning_rate": 8.089672365039943e-06, "loss": 0.3132, "step": 28498 }, { "epoch": 4.652218276805028, "grad_norm": 3.9945883750915527, "learning_rate": 8.089043035085502e-06, "loss": 0.4064, "step": 28499 }, { "epoch": 4.652381535447533, "grad_norm": 3.458160638809204, "learning_rate": 8.08841371298628e-06, "loss": 0.3345, "step": 28500 }, { "epoch": 4.652544794090037, "grad_norm": 3.6635429859161377, "learning_rate": 8.087784398744867e-06, "loss": 0.3867, "step": 28501 }, { "epoch": 4.6527080527325415, "grad_norm": 3.539696455001831, "learning_rate": 8.08715509236385e-06, "loss": 0.3703, "step": 28502 }, { "epoch": 4.652871311375046, "grad_norm": 3.464515209197998, "learning_rate": 8.086525793845817e-06, "loss": 0.4034, "step": 28503 }, { "epoch": 4.65303457001755, "grad_norm": 3.1922528743743896, "learning_rate": 8.085896503193355e-06, "loss": 0.3751, "step": 28504 }, { "epoch": 4.653197828660055, "grad_norm": 3.162719249725342, "learning_rate": 8.085267220409045e-06, "loss": 0.3691, "step": 28505 }, { "epoch": 4.653361087302559, "grad_norm": 3.7619667053222656, "learning_rate": 8.084637945495482e-06, "loss": 0.4202, "step": 28506 }, { "epoch": 4.653524345945064, "grad_norm": 4.119931697845459, "learning_rate": 8.08400867845525e-06, "loss": 0.4378, "step": 28507 }, { "epoch": 4.653687604587568, "grad_norm": 3.155578136444092, "learning_rate": 8.083379419290933e-06, "loss": 0.384, "step": 28508 }, { "epoch": 4.653850863230073, "grad_norm": 3.436767578125, "learning_rate": 8.082750168005126e-06, "loss": 0.3703, "step": 28509 }, { "epoch": 4.654014121872577, "grad_norm": 3.326456069946289, "learning_rate": 8.082120924600401e-06, "loss": 0.3958, "step": 28510 }, { "epoch": 4.6541773805150815, "grad_norm": 4.028719425201416, "learning_rate": 8.081491689079355e-06, "loss": 0.4083, "step": 28511 }, { "epoch": 4.654340639157585, "grad_norm": 3.084259033203125, "learning_rate": 8.080862461444573e-06, "loss": 0.2973, "step": 28512 }, { "epoch": 4.654503897800089, "grad_norm": 3.804671049118042, "learning_rate": 8.08023324169864e-06, "loss": 0.394, "step": 28513 }, { "epoch": 4.654667156442594, "grad_norm": 3.147202730178833, "learning_rate": 8.079604029844144e-06, "loss": 0.3943, "step": 28514 }, { "epoch": 4.654830415085098, "grad_norm": 3.69699764251709, "learning_rate": 8.07897482588367e-06, "loss": 0.3809, "step": 28515 }, { "epoch": 4.654993673727603, "grad_norm": 3.9904098510742188, "learning_rate": 8.078345629819806e-06, "loss": 0.4194, "step": 28516 }, { "epoch": 4.655156932370107, "grad_norm": 2.883803367614746, "learning_rate": 8.077716441655135e-06, "loss": 0.3263, "step": 28517 }, { "epoch": 4.655320191012612, "grad_norm": 3.4848930835723877, "learning_rate": 8.077087261392249e-06, "loss": 0.3752, "step": 28518 }, { "epoch": 4.655483449655116, "grad_norm": 3.5621259212493896, "learning_rate": 8.076458089033734e-06, "loss": 0.3906, "step": 28519 }, { "epoch": 4.6556467082976205, "grad_norm": 3.7652273178100586, "learning_rate": 8.07582892458217e-06, "loss": 0.3886, "step": 28520 }, { "epoch": 4.655809966940125, "grad_norm": 3.9692399501800537, "learning_rate": 8.075199768040143e-06, "loss": 0.3865, "step": 28521 }, { "epoch": 4.655973225582629, "grad_norm": 3.646730899810791, "learning_rate": 8.074570619410246e-06, "loss": 0.3855, "step": 28522 }, { "epoch": 4.656136484225134, "grad_norm": 3.5715808868408203, "learning_rate": 8.073941478695062e-06, "loss": 0.3676, "step": 28523 }, { "epoch": 4.656299742867638, "grad_norm": 3.1432957649230957, "learning_rate": 8.073312345897179e-06, "loss": 0.348, "step": 28524 }, { "epoch": 4.656463001510143, "grad_norm": 3.7095608711242676, "learning_rate": 8.07268322101918e-06, "loss": 0.3686, "step": 28525 }, { "epoch": 4.656626260152647, "grad_norm": 3.8386919498443604, "learning_rate": 8.072054104063652e-06, "loss": 0.3422, "step": 28526 }, { "epoch": 4.656789518795152, "grad_norm": 3.608565330505371, "learning_rate": 8.071424995033182e-06, "loss": 0.3711, "step": 28527 }, { "epoch": 4.656952777437656, "grad_norm": 3.6578266620635986, "learning_rate": 8.070795893930355e-06, "loss": 0.4006, "step": 28528 }, { "epoch": 4.65711603608016, "grad_norm": 5.550879001617432, "learning_rate": 8.070166800757762e-06, "loss": 0.4857, "step": 28529 }, { "epoch": 4.657279294722664, "grad_norm": 3.5174965858459473, "learning_rate": 8.069537715517981e-06, "loss": 0.364, "step": 28530 }, { "epoch": 4.657442553365168, "grad_norm": 4.286347389221191, "learning_rate": 8.068908638213602e-06, "loss": 0.432, "step": 28531 }, { "epoch": 4.657605812007673, "grad_norm": 3.9774889945983887, "learning_rate": 8.068279568847206e-06, "loss": 0.3838, "step": 28532 }, { "epoch": 4.657769070650177, "grad_norm": 3.2490718364715576, "learning_rate": 8.067650507421388e-06, "loss": 0.3899, "step": 28533 }, { "epoch": 4.657932329292682, "grad_norm": 3.4609363079071045, "learning_rate": 8.067021453938729e-06, "loss": 0.3684, "step": 28534 }, { "epoch": 4.658095587935186, "grad_norm": 3.525055408477783, "learning_rate": 8.066392408401815e-06, "loss": 0.4083, "step": 28535 }, { "epoch": 4.658258846577691, "grad_norm": 3.7354464530944824, "learning_rate": 8.06576337081323e-06, "loss": 0.4223, "step": 28536 }, { "epoch": 4.658422105220195, "grad_norm": 4.211817264556885, "learning_rate": 8.065134341175564e-06, "loss": 0.518, "step": 28537 }, { "epoch": 4.6585853638626995, "grad_norm": 3.854016065597534, "learning_rate": 8.064505319491398e-06, "loss": 0.3927, "step": 28538 }, { "epoch": 4.658748622505204, "grad_norm": 3.5045371055603027, "learning_rate": 8.063876305763322e-06, "loss": 0.3707, "step": 28539 }, { "epoch": 4.658911881147708, "grad_norm": 3.89548921585083, "learning_rate": 8.063247299993918e-06, "loss": 0.3863, "step": 28540 }, { "epoch": 4.659075139790213, "grad_norm": 3.857442617416382, "learning_rate": 8.062618302185772e-06, "loss": 0.3998, "step": 28541 }, { "epoch": 4.659238398432717, "grad_norm": 3.3248884677886963, "learning_rate": 8.061989312341473e-06, "loss": 0.3379, "step": 28542 }, { "epoch": 4.659401657075222, "grad_norm": 3.4644320011138916, "learning_rate": 8.061360330463602e-06, "loss": 0.3697, "step": 28543 }, { "epoch": 4.659564915717726, "grad_norm": 3.2746431827545166, "learning_rate": 8.060731356554745e-06, "loss": 0.3761, "step": 28544 }, { "epoch": 4.659728174360231, "grad_norm": 3.828159809112549, "learning_rate": 8.060102390617492e-06, "loss": 0.3878, "step": 28545 }, { "epoch": 4.659891433002734, "grad_norm": 3.2150919437408447, "learning_rate": 8.059473432654425e-06, "loss": 0.344, "step": 28546 }, { "epoch": 4.6600546916452394, "grad_norm": 3.4897401332855225, "learning_rate": 8.05884448266813e-06, "loss": 0.3829, "step": 28547 }, { "epoch": 4.660217950287743, "grad_norm": 4.360124111175537, "learning_rate": 8.058215540661196e-06, "loss": 0.3974, "step": 28548 }, { "epoch": 4.660381208930247, "grad_norm": 3.800096273422241, "learning_rate": 8.0575866066362e-06, "loss": 0.4178, "step": 28549 }, { "epoch": 4.660544467572752, "grad_norm": 3.8385696411132812, "learning_rate": 8.056957680595733e-06, "loss": 0.4126, "step": 28550 }, { "epoch": 4.660707726215256, "grad_norm": 4.82113790512085, "learning_rate": 8.056328762542379e-06, "loss": 0.4046, "step": 28551 }, { "epoch": 4.660870984857761, "grad_norm": 4.747945308685303, "learning_rate": 8.055699852478724e-06, "loss": 0.4421, "step": 28552 }, { "epoch": 4.661034243500265, "grad_norm": 3.763040542602539, "learning_rate": 8.055070950407353e-06, "loss": 0.4005, "step": 28553 }, { "epoch": 4.66119750214277, "grad_norm": 3.2345995903015137, "learning_rate": 8.05444205633085e-06, "loss": 0.3661, "step": 28554 }, { "epoch": 4.661360760785274, "grad_norm": 4.192921161651611, "learning_rate": 8.053813170251801e-06, "loss": 0.429, "step": 28555 }, { "epoch": 4.6615240194277785, "grad_norm": 3.6064486503601074, "learning_rate": 8.053184292172791e-06, "loss": 0.4063, "step": 28556 }, { "epoch": 4.661687278070283, "grad_norm": 3.7259509563446045, "learning_rate": 8.052555422096406e-06, "loss": 0.3853, "step": 28557 }, { "epoch": 4.661850536712787, "grad_norm": 2.8906874656677246, "learning_rate": 8.051926560025235e-06, "loss": 0.3629, "step": 28558 }, { "epoch": 4.662013795355292, "grad_norm": 3.014237642288208, "learning_rate": 8.051297705961852e-06, "loss": 0.341, "step": 28559 }, { "epoch": 4.662177053997796, "grad_norm": 3.5928895473480225, "learning_rate": 8.05066885990885e-06, "loss": 0.4033, "step": 28560 }, { "epoch": 4.662340312640301, "grad_norm": 3.177049398422241, "learning_rate": 8.050040021868812e-06, "loss": 0.3625, "step": 28561 }, { "epoch": 4.662503571282805, "grad_norm": 4.009975433349609, "learning_rate": 8.049411191844324e-06, "loss": 0.3763, "step": 28562 }, { "epoch": 4.662666829925309, "grad_norm": 3.0324110984802246, "learning_rate": 8.04878236983797e-06, "loss": 0.3412, "step": 28563 }, { "epoch": 4.662830088567814, "grad_norm": 3.497938394546509, "learning_rate": 8.048153555852334e-06, "loss": 0.3946, "step": 28564 }, { "epoch": 4.6629933472103176, "grad_norm": 3.5253260135650635, "learning_rate": 8.047524749890004e-06, "loss": 0.3692, "step": 28565 }, { "epoch": 4.663156605852822, "grad_norm": 4.156108379364014, "learning_rate": 8.04689595195356e-06, "loss": 0.4335, "step": 28566 }, { "epoch": 4.663319864495326, "grad_norm": 3.593276262283325, "learning_rate": 8.046267162045586e-06, "loss": 0.384, "step": 28567 }, { "epoch": 4.663483123137831, "grad_norm": 3.1268951892852783, "learning_rate": 8.04563838016868e-06, "loss": 0.4118, "step": 28568 }, { "epoch": 4.663646381780335, "grad_norm": 3.698331594467163, "learning_rate": 8.045009606325411e-06, "loss": 0.3437, "step": 28569 }, { "epoch": 4.66380964042284, "grad_norm": 3.585538864135742, "learning_rate": 8.044380840518368e-06, "loss": 0.3428, "step": 28570 }, { "epoch": 4.663972899065344, "grad_norm": 3.0618467330932617, "learning_rate": 8.043752082750136e-06, "loss": 0.3128, "step": 28571 }, { "epoch": 4.664136157707849, "grad_norm": 4.220217227935791, "learning_rate": 8.043123333023303e-06, "loss": 0.442, "step": 28572 }, { "epoch": 4.664299416350353, "grad_norm": 3.990943431854248, "learning_rate": 8.042494591340451e-06, "loss": 0.4157, "step": 28573 }, { "epoch": 4.6644626749928575, "grad_norm": 4.128432750701904, "learning_rate": 8.041865857704162e-06, "loss": 0.413, "step": 28574 }, { "epoch": 4.664625933635362, "grad_norm": 3.114813804626465, "learning_rate": 8.041237132117028e-06, "loss": 0.346, "step": 28575 }, { "epoch": 4.664789192277866, "grad_norm": 3.6358466148376465, "learning_rate": 8.040608414581624e-06, "loss": 0.4312, "step": 28576 }, { "epoch": 4.664952450920371, "grad_norm": 4.403555393218994, "learning_rate": 8.03997970510054e-06, "loss": 0.3739, "step": 28577 }, { "epoch": 4.665115709562875, "grad_norm": 3.3937854766845703, "learning_rate": 8.039351003676364e-06, "loss": 0.3776, "step": 28578 }, { "epoch": 4.66527896820538, "grad_norm": 3.294694662094116, "learning_rate": 8.03872231031167e-06, "loss": 0.3709, "step": 28579 }, { "epoch": 4.665442226847884, "grad_norm": 3.0651309490203857, "learning_rate": 8.038093625009052e-06, "loss": 0.3503, "step": 28580 }, { "epoch": 4.665605485490389, "grad_norm": 3.324413299560547, "learning_rate": 8.037464947771088e-06, "loss": 0.3457, "step": 28581 }, { "epoch": 4.665768744132892, "grad_norm": 3.169032335281372, "learning_rate": 8.036836278600362e-06, "loss": 0.2934, "step": 28582 }, { "epoch": 4.6659320027753965, "grad_norm": 3.99511456489563, "learning_rate": 8.036207617499463e-06, "loss": 0.3602, "step": 28583 }, { "epoch": 4.666095261417901, "grad_norm": 3.5621182918548584, "learning_rate": 8.035578964470973e-06, "loss": 0.3675, "step": 28584 }, { "epoch": 4.666258520060405, "grad_norm": 3.17732834815979, "learning_rate": 8.034950319517475e-06, "loss": 0.3901, "step": 28585 }, { "epoch": 4.66642177870291, "grad_norm": 4.021876811981201, "learning_rate": 8.034321682641556e-06, "loss": 0.3908, "step": 28586 }, { "epoch": 4.666585037345414, "grad_norm": 4.14329195022583, "learning_rate": 8.033693053845801e-06, "loss": 0.4601, "step": 28587 }, { "epoch": 4.666748295987919, "grad_norm": 3.804469108581543, "learning_rate": 8.033064433132789e-06, "loss": 0.3376, "step": 28588 }, { "epoch": 4.666911554630423, "grad_norm": 3.7268238067626953, "learning_rate": 8.032435820505107e-06, "loss": 0.3625, "step": 28589 }, { "epoch": 4.667074813272928, "grad_norm": 3.4889461994171143, "learning_rate": 8.031807215965337e-06, "loss": 0.3659, "step": 28590 }, { "epoch": 4.667238071915432, "grad_norm": 3.462611436843872, "learning_rate": 8.031178619516064e-06, "loss": 0.4109, "step": 28591 }, { "epoch": 4.6674013305579365, "grad_norm": 3.611922264099121, "learning_rate": 8.030550031159873e-06, "loss": 0.3462, "step": 28592 }, { "epoch": 4.667564589200441, "grad_norm": 3.6296985149383545, "learning_rate": 8.029921450899348e-06, "loss": 0.3923, "step": 28593 }, { "epoch": 4.667727847842945, "grad_norm": 3.738247871398926, "learning_rate": 8.029292878737068e-06, "loss": 0.3902, "step": 28594 }, { "epoch": 4.66789110648545, "grad_norm": 3.2464921474456787, "learning_rate": 8.028664314675625e-06, "loss": 0.3342, "step": 28595 }, { "epoch": 4.668054365127954, "grad_norm": 3.5683395862579346, "learning_rate": 8.028035758717597e-06, "loss": 0.3884, "step": 28596 }, { "epoch": 4.668217623770459, "grad_norm": 2.938457727432251, "learning_rate": 8.027407210865574e-06, "loss": 0.2971, "step": 28597 }, { "epoch": 4.668380882412963, "grad_norm": 3.753822088241577, "learning_rate": 8.026778671122128e-06, "loss": 0.3487, "step": 28598 }, { "epoch": 4.668544141055467, "grad_norm": 3.631040096282959, "learning_rate": 8.026150139489854e-06, "loss": 0.3387, "step": 28599 }, { "epoch": 4.668707399697972, "grad_norm": 3.7365949153900146, "learning_rate": 8.02552161597133e-06, "loss": 0.3913, "step": 28600 }, { "epoch": 4.6688706583404755, "grad_norm": 3.6618874073028564, "learning_rate": 8.024893100569143e-06, "loss": 0.3961, "step": 28601 }, { "epoch": 4.66903391698298, "grad_norm": 3.414609909057617, "learning_rate": 8.024264593285871e-06, "loss": 0.3701, "step": 28602 }, { "epoch": 4.669197175625484, "grad_norm": 3.8269917964935303, "learning_rate": 8.023636094124104e-06, "loss": 0.2937, "step": 28603 }, { "epoch": 4.669360434267989, "grad_norm": 3.464848518371582, "learning_rate": 8.023007603086422e-06, "loss": 0.3657, "step": 28604 }, { "epoch": 4.669523692910493, "grad_norm": 3.6887526512145996, "learning_rate": 8.022379120175407e-06, "loss": 0.4057, "step": 28605 }, { "epoch": 4.669686951552998, "grad_norm": 3.2563648223876953, "learning_rate": 8.021750645393647e-06, "loss": 0.3761, "step": 28606 }, { "epoch": 4.669850210195502, "grad_norm": 4.109111785888672, "learning_rate": 8.021122178743727e-06, "loss": 0.4031, "step": 28607 }, { "epoch": 4.670013468838007, "grad_norm": 3.6403145790100098, "learning_rate": 8.020493720228223e-06, "loss": 0.3294, "step": 28608 }, { "epoch": 4.670176727480511, "grad_norm": 3.198450803756714, "learning_rate": 8.01986526984972e-06, "loss": 0.3457, "step": 28609 }, { "epoch": 4.6703399861230155, "grad_norm": 3.3472235202789307, "learning_rate": 8.019236827610805e-06, "loss": 0.3787, "step": 28610 }, { "epoch": 4.67050324476552, "grad_norm": 4.395464897155762, "learning_rate": 8.01860839351406e-06, "loss": 0.4517, "step": 28611 }, { "epoch": 4.670666503408024, "grad_norm": 3.7162580490112305, "learning_rate": 8.017979967562066e-06, "loss": 0.3506, "step": 28612 }, { "epoch": 4.670829762050529, "grad_norm": 4.332982063293457, "learning_rate": 8.01735154975741e-06, "loss": 0.4083, "step": 28613 }, { "epoch": 4.670993020693033, "grad_norm": 3.7097318172454834, "learning_rate": 8.016723140102672e-06, "loss": 0.3758, "step": 28614 }, { "epoch": 4.671156279335538, "grad_norm": 4.228394508361816, "learning_rate": 8.016094738600436e-06, "loss": 0.4284, "step": 28615 }, { "epoch": 4.671319537978041, "grad_norm": 3.092238664627075, "learning_rate": 8.015466345253284e-06, "loss": 0.2921, "step": 28616 }, { "epoch": 4.6714827966205466, "grad_norm": 3.73801589012146, "learning_rate": 8.014837960063807e-06, "loss": 0.3661, "step": 28617 }, { "epoch": 4.67164605526305, "grad_norm": 3.7333498001098633, "learning_rate": 8.014209583034575e-06, "loss": 0.439, "step": 28618 }, { "epoch": 4.6718093139055545, "grad_norm": 3.658147096633911, "learning_rate": 8.01358121416818e-06, "loss": 0.3833, "step": 28619 }, { "epoch": 4.671972572548059, "grad_norm": 3.624718427658081, "learning_rate": 8.012952853467202e-06, "loss": 0.3626, "step": 28620 }, { "epoch": 4.672135831190563, "grad_norm": 4.101155757904053, "learning_rate": 8.012324500934223e-06, "loss": 0.3874, "step": 28621 }, { "epoch": 4.672299089833068, "grad_norm": 3.2919373512268066, "learning_rate": 8.011696156571828e-06, "loss": 0.3261, "step": 28622 }, { "epoch": 4.672462348475572, "grad_norm": 3.566652774810791, "learning_rate": 8.0110678203826e-06, "loss": 0.3753, "step": 28623 }, { "epoch": 4.672625607118077, "grad_norm": 3.5208537578582764, "learning_rate": 8.010439492369123e-06, "loss": 0.3613, "step": 28624 }, { "epoch": 4.672788865760581, "grad_norm": 3.7179620265960693, "learning_rate": 8.009811172533977e-06, "loss": 0.4324, "step": 28625 }, { "epoch": 4.672952124403086, "grad_norm": 4.462977886199951, "learning_rate": 8.009182860879746e-06, "loss": 0.8474, "step": 28626 }, { "epoch": 4.67311538304559, "grad_norm": 3.72702693939209, "learning_rate": 8.008554557409014e-06, "loss": 0.3573, "step": 28627 }, { "epoch": 4.6732786416880945, "grad_norm": 3.090583562850952, "learning_rate": 8.007926262124359e-06, "loss": 0.3459, "step": 28628 }, { "epoch": 4.673441900330599, "grad_norm": 3.631260395050049, "learning_rate": 8.007297975028367e-06, "loss": 0.3684, "step": 28629 }, { "epoch": 4.673605158973103, "grad_norm": 3.8764350414276123, "learning_rate": 8.006669696123622e-06, "loss": 0.3628, "step": 28630 }, { "epoch": 4.673768417615608, "grad_norm": 3.9913716316223145, "learning_rate": 8.006041425412706e-06, "loss": 0.4235, "step": 28631 }, { "epoch": 4.673931676258112, "grad_norm": 3.238232135772705, "learning_rate": 8.005413162898197e-06, "loss": 0.313, "step": 28632 }, { "epoch": 4.674094934900617, "grad_norm": 3.8463146686553955, "learning_rate": 8.004784908582684e-06, "loss": 0.4212, "step": 28633 }, { "epoch": 4.674258193543121, "grad_norm": 4.210007667541504, "learning_rate": 8.004156662468746e-06, "loss": 0.4528, "step": 28634 }, { "epoch": 4.674421452185625, "grad_norm": 3.769366979598999, "learning_rate": 8.003528424558968e-06, "loss": 0.3636, "step": 28635 }, { "epoch": 4.674584710828129, "grad_norm": 4.206056118011475, "learning_rate": 8.00290019485593e-06, "loss": 0.3954, "step": 28636 }, { "epoch": 4.6747479694706335, "grad_norm": 3.190558910369873, "learning_rate": 8.002271973362216e-06, "loss": 0.3834, "step": 28637 }, { "epoch": 4.674911228113138, "grad_norm": 3.5607964992523193, "learning_rate": 8.001643760080408e-06, "loss": 0.4051, "step": 28638 }, { "epoch": 4.675074486755642, "grad_norm": 3.666522979736328, "learning_rate": 8.001015555013087e-06, "loss": 0.4176, "step": 28639 }, { "epoch": 4.675237745398147, "grad_norm": 3.4393091201782227, "learning_rate": 8.000387358162835e-06, "loss": 0.3104, "step": 28640 }, { "epoch": 4.675401004040651, "grad_norm": 3.3712105751037598, "learning_rate": 7.999759169532237e-06, "loss": 0.3533, "step": 28641 }, { "epoch": 4.675564262683156, "grad_norm": 4.084626197814941, "learning_rate": 7.999130989123872e-06, "loss": 0.4542, "step": 28642 }, { "epoch": 4.67572752132566, "grad_norm": 5.005306243896484, "learning_rate": 7.998502816940325e-06, "loss": 0.4166, "step": 28643 }, { "epoch": 4.675890779968165, "grad_norm": 3.6446280479431152, "learning_rate": 7.997874652984176e-06, "loss": 0.3788, "step": 28644 }, { "epoch": 4.676054038610669, "grad_norm": 4.639115333557129, "learning_rate": 7.997246497258011e-06, "loss": 0.4372, "step": 28645 }, { "epoch": 4.6762172972531735, "grad_norm": 3.825058698654175, "learning_rate": 7.996618349764412e-06, "loss": 0.3968, "step": 28646 }, { "epoch": 4.676380555895678, "grad_norm": 7.156062126159668, "learning_rate": 7.995990210505955e-06, "loss": 0.3675, "step": 28647 }, { "epoch": 4.676543814538182, "grad_norm": 3.542214870452881, "learning_rate": 7.995362079485222e-06, "loss": 0.3806, "step": 28648 }, { "epoch": 4.676707073180687, "grad_norm": 3.7167696952819824, "learning_rate": 7.994733956704802e-06, "loss": 0.3751, "step": 28649 }, { "epoch": 4.676870331823191, "grad_norm": 2.8800623416900635, "learning_rate": 7.994105842167274e-06, "loss": 0.3069, "step": 28650 }, { "epoch": 4.677033590465696, "grad_norm": 3.3122072219848633, "learning_rate": 7.993477735875219e-06, "loss": 0.3415, "step": 28651 }, { "epoch": 4.677196849108199, "grad_norm": 3.183858871459961, "learning_rate": 7.992849637831218e-06, "loss": 0.3604, "step": 28652 }, { "epoch": 4.6773601077507045, "grad_norm": 3.4352545738220215, "learning_rate": 7.992221548037856e-06, "loss": 0.3762, "step": 28653 }, { "epoch": 4.677523366393208, "grad_norm": 3.958336114883423, "learning_rate": 7.991593466497713e-06, "loss": 0.4082, "step": 28654 }, { "epoch": 4.6776866250357125, "grad_norm": 3.8771615028381348, "learning_rate": 7.990965393213368e-06, "loss": 0.4146, "step": 28655 }, { "epoch": 4.677849883678217, "grad_norm": 3.721451759338379, "learning_rate": 7.990337328187411e-06, "loss": 0.3956, "step": 28656 }, { "epoch": 4.678013142320721, "grad_norm": 3.220879554748535, "learning_rate": 7.989709271422416e-06, "loss": 0.3359, "step": 28657 }, { "epoch": 4.678176400963226, "grad_norm": 3.912642240524292, "learning_rate": 7.989081222920966e-06, "loss": 0.4135, "step": 28658 }, { "epoch": 4.67833965960573, "grad_norm": 3.4809226989746094, "learning_rate": 7.988453182685643e-06, "loss": 0.3476, "step": 28659 }, { "epoch": 4.678502918248235, "grad_norm": 4.048633098602295, "learning_rate": 7.98782515071903e-06, "loss": 0.4388, "step": 28660 }, { "epoch": 4.678666176890739, "grad_norm": 4.261386871337891, "learning_rate": 7.987197127023709e-06, "loss": 0.3441, "step": 28661 }, { "epoch": 4.678829435533244, "grad_norm": 3.9296884536743164, "learning_rate": 7.98656911160226e-06, "loss": 0.4049, "step": 28662 }, { "epoch": 4.678992694175748, "grad_norm": 3.2149972915649414, "learning_rate": 7.985941104457267e-06, "loss": 0.3743, "step": 28663 }, { "epoch": 4.6791559528182525, "grad_norm": 4.0781989097595215, "learning_rate": 7.985313105591309e-06, "loss": 0.4137, "step": 28664 }, { "epoch": 4.679319211460757, "grad_norm": 3.8175718784332275, "learning_rate": 7.984685115006966e-06, "loss": 0.3903, "step": 28665 }, { "epoch": 4.679482470103261, "grad_norm": 3.1779873371124268, "learning_rate": 7.984057132706825e-06, "loss": 0.3658, "step": 28666 }, { "epoch": 4.679645728745766, "grad_norm": 2.5997350215911865, "learning_rate": 7.98342915869346e-06, "loss": 0.3365, "step": 28667 }, { "epoch": 4.67980898738827, "grad_norm": 2.72347354888916, "learning_rate": 7.982801192969459e-06, "loss": 0.2962, "step": 28668 }, { "epoch": 4.679972246030774, "grad_norm": 3.9628896713256836, "learning_rate": 7.9821732355374e-06, "loss": 0.3969, "step": 28669 }, { "epoch": 4.680135504673279, "grad_norm": 3.6360251903533936, "learning_rate": 7.981545286399862e-06, "loss": 0.4187, "step": 28670 }, { "epoch": 4.680298763315783, "grad_norm": 3.189460515975952, "learning_rate": 7.980917345559432e-06, "loss": 0.3124, "step": 28671 }, { "epoch": 4.680462021958287, "grad_norm": 3.591658592224121, "learning_rate": 7.980289413018686e-06, "loss": 0.4028, "step": 28672 }, { "epoch": 4.6806252806007915, "grad_norm": 3.181485414505005, "learning_rate": 7.979661488780212e-06, "loss": 0.3162, "step": 28673 }, { "epoch": 4.680788539243296, "grad_norm": 4.842490196228027, "learning_rate": 7.979033572846584e-06, "loss": 0.4299, "step": 28674 }, { "epoch": 4.6809517978858, "grad_norm": 4.253164291381836, "learning_rate": 7.978405665220386e-06, "loss": 0.4269, "step": 28675 }, { "epoch": 4.681115056528305, "grad_norm": 3.538055658340454, "learning_rate": 7.977777765904202e-06, "loss": 0.3771, "step": 28676 }, { "epoch": 4.681278315170809, "grad_norm": 4.377314567565918, "learning_rate": 7.977149874900607e-06, "loss": 0.4534, "step": 28677 }, { "epoch": 4.681441573813314, "grad_norm": 4.270745277404785, "learning_rate": 7.976521992212185e-06, "loss": 0.3479, "step": 28678 }, { "epoch": 4.681604832455818, "grad_norm": 3.5775744915008545, "learning_rate": 7.975894117841517e-06, "loss": 0.3682, "step": 28679 }, { "epoch": 4.681768091098323, "grad_norm": 4.073921203613281, "learning_rate": 7.975266251791186e-06, "loss": 0.3899, "step": 28680 }, { "epoch": 4.681931349740827, "grad_norm": 3.0899977684020996, "learning_rate": 7.974638394063768e-06, "loss": 0.3644, "step": 28681 }, { "epoch": 4.6820946083833315, "grad_norm": 3.6315383911132812, "learning_rate": 7.974010544661846e-06, "loss": 0.382, "step": 28682 }, { "epoch": 4.682257867025836, "grad_norm": 3.831639051437378, "learning_rate": 7.973382703588004e-06, "loss": 0.4178, "step": 28683 }, { "epoch": 4.68242112566834, "grad_norm": 2.938410520553589, "learning_rate": 7.972754870844819e-06, "loss": 0.3482, "step": 28684 }, { "epoch": 4.682584384310845, "grad_norm": 3.9593749046325684, "learning_rate": 7.972127046434878e-06, "loss": 0.4218, "step": 28685 }, { "epoch": 4.682747642953349, "grad_norm": 3.527026891708374, "learning_rate": 7.971499230360753e-06, "loss": 0.385, "step": 28686 }, { "epoch": 4.682910901595854, "grad_norm": 3.361405611038208, "learning_rate": 7.970871422625028e-06, "loss": 0.3233, "step": 28687 }, { "epoch": 4.683074160238357, "grad_norm": 4.3076252937316895, "learning_rate": 7.970243623230286e-06, "loss": 0.7616, "step": 28688 }, { "epoch": 4.683237418880862, "grad_norm": 3.8702893257141113, "learning_rate": 7.969615832179105e-06, "loss": 0.3803, "step": 28689 }, { "epoch": 4.683400677523366, "grad_norm": 3.2869627475738525, "learning_rate": 7.968988049474068e-06, "loss": 0.3618, "step": 28690 }, { "epoch": 4.6835639361658705, "grad_norm": 3.8743553161621094, "learning_rate": 7.968360275117753e-06, "loss": 0.3897, "step": 28691 }, { "epoch": 4.683727194808375, "grad_norm": 3.4595258235931396, "learning_rate": 7.967732509112742e-06, "loss": 0.3406, "step": 28692 }, { "epoch": 4.683890453450879, "grad_norm": 4.003739833831787, "learning_rate": 7.967104751461615e-06, "loss": 0.4314, "step": 28693 }, { "epoch": 4.684053712093384, "grad_norm": 3.2004079818725586, "learning_rate": 7.966477002166952e-06, "loss": 0.3428, "step": 28694 }, { "epoch": 4.684216970735888, "grad_norm": 3.413339853286743, "learning_rate": 7.965849261231339e-06, "loss": 0.3361, "step": 28695 }, { "epoch": 4.684380229378393, "grad_norm": 3.9256715774536133, "learning_rate": 7.965221528657349e-06, "loss": 0.3959, "step": 28696 }, { "epoch": 4.684543488020897, "grad_norm": 3.2361979484558105, "learning_rate": 7.964593804447562e-06, "loss": 0.3355, "step": 28697 }, { "epoch": 4.684706746663402, "grad_norm": 3.1921348571777344, "learning_rate": 7.963966088604563e-06, "loss": 0.3414, "step": 28698 }, { "epoch": 4.684870005305906, "grad_norm": 3.7099812030792236, "learning_rate": 7.96333838113093e-06, "loss": 0.3786, "step": 28699 }, { "epoch": 4.6850332639484105, "grad_norm": 3.991837739944458, "learning_rate": 7.962710682029245e-06, "loss": 0.3628, "step": 28700 }, { "epoch": 4.685196522590915, "grad_norm": 3.8061580657958984, "learning_rate": 7.962082991302088e-06, "loss": 0.3703, "step": 28701 }, { "epoch": 4.685359781233419, "grad_norm": 3.7668678760528564, "learning_rate": 7.961455308952037e-06, "loss": 0.3959, "step": 28702 }, { "epoch": 4.685523039875924, "grad_norm": 3.133680820465088, "learning_rate": 7.960827634981673e-06, "loss": 0.3274, "step": 28703 }, { "epoch": 4.685686298518428, "grad_norm": 4.1229634284973145, "learning_rate": 7.960199969393578e-06, "loss": 0.4328, "step": 28704 }, { "epoch": 4.685849557160932, "grad_norm": 3.283168315887451, "learning_rate": 7.959572312190333e-06, "loss": 0.3232, "step": 28705 }, { "epoch": 4.686012815803437, "grad_norm": 3.3718791007995605, "learning_rate": 7.958944663374513e-06, "loss": 0.3597, "step": 28706 }, { "epoch": 4.686176074445941, "grad_norm": 3.99168062210083, "learning_rate": 7.9583170229487e-06, "loss": 0.3959, "step": 28707 }, { "epoch": 4.686339333088445, "grad_norm": 3.1992416381835938, "learning_rate": 7.957689390915475e-06, "loss": 0.3442, "step": 28708 }, { "epoch": 4.6865025917309495, "grad_norm": 3.4982500076293945, "learning_rate": 7.957061767277416e-06, "loss": 0.3794, "step": 28709 }, { "epoch": 4.686665850373454, "grad_norm": 4.083131790161133, "learning_rate": 7.956434152037107e-06, "loss": 0.3844, "step": 28710 }, { "epoch": 4.686829109015958, "grad_norm": 4.145312309265137, "learning_rate": 7.955806545197126e-06, "loss": 0.3867, "step": 28711 }, { "epoch": 4.686992367658463, "grad_norm": 3.7171385288238525, "learning_rate": 7.955178946760051e-06, "loss": 0.3579, "step": 28712 }, { "epoch": 4.687155626300967, "grad_norm": 4.0357666015625, "learning_rate": 7.954551356728465e-06, "loss": 0.4404, "step": 28713 }, { "epoch": 4.687318884943472, "grad_norm": 3.0570929050445557, "learning_rate": 7.953923775104945e-06, "loss": 0.3534, "step": 28714 }, { "epoch": 4.687482143585976, "grad_norm": 4.6462602615356445, "learning_rate": 7.953296201892073e-06, "loss": 0.46, "step": 28715 }, { "epoch": 4.687645402228481, "grad_norm": 3.540834665298462, "learning_rate": 7.952668637092428e-06, "loss": 0.4117, "step": 28716 }, { "epoch": 4.687808660870985, "grad_norm": 3.6884348392486572, "learning_rate": 7.952041080708587e-06, "loss": 0.3705, "step": 28717 }, { "epoch": 4.6879719195134895, "grad_norm": 2.5027050971984863, "learning_rate": 7.951413532743133e-06, "loss": 0.3202, "step": 28718 }, { "epoch": 4.688135178155994, "grad_norm": 4.306031703948975, "learning_rate": 7.950785993198642e-06, "loss": 0.4331, "step": 28719 }, { "epoch": 4.688298436798498, "grad_norm": 3.2590057849884033, "learning_rate": 7.950158462077697e-06, "loss": 0.3687, "step": 28720 }, { "epoch": 4.688461695441003, "grad_norm": 4.442022800445557, "learning_rate": 7.949530939382877e-06, "loss": 0.3962, "step": 28721 }, { "epoch": 4.688624954083506, "grad_norm": 4.163157939910889, "learning_rate": 7.948903425116762e-06, "loss": 0.4065, "step": 28722 }, { "epoch": 4.688788212726012, "grad_norm": 4.8513102531433105, "learning_rate": 7.94827591928193e-06, "loss": 0.4546, "step": 28723 }, { "epoch": 4.688951471368515, "grad_norm": 4.554821491241455, "learning_rate": 7.947648421880963e-06, "loss": 0.4453, "step": 28724 }, { "epoch": 4.68911473001102, "grad_norm": 3.9331583976745605, "learning_rate": 7.947020932916437e-06, "loss": 0.3704, "step": 28725 }, { "epoch": 4.689277988653524, "grad_norm": 3.8679027557373047, "learning_rate": 7.946393452390932e-06, "loss": 0.4434, "step": 28726 }, { "epoch": 4.6894412472960285, "grad_norm": 3.9352874755859375, "learning_rate": 7.945765980307027e-06, "loss": 0.4118, "step": 28727 }, { "epoch": 4.689604505938533, "grad_norm": 3.379960775375366, "learning_rate": 7.945138516667304e-06, "loss": 0.3445, "step": 28728 }, { "epoch": 4.689767764581037, "grad_norm": 3.316450595855713, "learning_rate": 7.944511061474341e-06, "loss": 0.3393, "step": 28729 }, { "epoch": 4.689931023223542, "grad_norm": 3.4449427127838135, "learning_rate": 7.943883614730715e-06, "loss": 0.3613, "step": 28730 }, { "epoch": 4.690094281866046, "grad_norm": 3.5224032402038574, "learning_rate": 7.94325617643901e-06, "loss": 0.382, "step": 28731 }, { "epoch": 4.690257540508551, "grad_norm": 2.9214377403259277, "learning_rate": 7.942628746601796e-06, "loss": 0.2971, "step": 28732 }, { "epoch": 4.690420799151055, "grad_norm": 3.2855429649353027, "learning_rate": 7.942001325221665e-06, "loss": 0.3236, "step": 28733 }, { "epoch": 4.69058405779356, "grad_norm": 3.702753782272339, "learning_rate": 7.94137391230119e-06, "loss": 0.3912, "step": 28734 }, { "epoch": 4.690747316436064, "grad_norm": 3.8101882934570312, "learning_rate": 7.940746507842946e-06, "loss": 0.3968, "step": 28735 }, { "epoch": 4.6909105750785685, "grad_norm": 3.1940107345581055, "learning_rate": 7.940119111849516e-06, "loss": 0.3239, "step": 28736 }, { "epoch": 4.691073833721073, "grad_norm": 3.1030797958374023, "learning_rate": 7.939491724323479e-06, "loss": 0.3623, "step": 28737 }, { "epoch": 4.691237092363577, "grad_norm": 3.2201755046844482, "learning_rate": 7.938864345267413e-06, "loss": 0.3908, "step": 28738 }, { "epoch": 4.691400351006082, "grad_norm": 3.990992546081543, "learning_rate": 7.938236974683897e-06, "loss": 0.4151, "step": 28739 }, { "epoch": 4.691563609648586, "grad_norm": 4.045422077178955, "learning_rate": 7.937609612575511e-06, "loss": 0.4304, "step": 28740 }, { "epoch": 4.69172686829109, "grad_norm": 3.231261968612671, "learning_rate": 7.936982258944834e-06, "loss": 0.3586, "step": 28741 }, { "epoch": 4.691890126933594, "grad_norm": 3.380147933959961, "learning_rate": 7.936354913794443e-06, "loss": 0.3395, "step": 28742 }, { "epoch": 4.692053385576099, "grad_norm": 3.409752607345581, "learning_rate": 7.935727577126917e-06, "loss": 0.3921, "step": 28743 }, { "epoch": 4.692216644218603, "grad_norm": 3.4724957942962646, "learning_rate": 7.93510024894484e-06, "loss": 0.3731, "step": 28744 }, { "epoch": 4.6923799028611075, "grad_norm": 3.0667057037353516, "learning_rate": 7.934472929250781e-06, "loss": 0.3979, "step": 28745 }, { "epoch": 4.692543161503612, "grad_norm": 4.393784999847412, "learning_rate": 7.933845618047327e-06, "loss": 0.6396, "step": 28746 }, { "epoch": 4.692706420146116, "grad_norm": 3.9233651161193848, "learning_rate": 7.933218315337049e-06, "loss": 0.4543, "step": 28747 }, { "epoch": 4.692869678788621, "grad_norm": 3.5685391426086426, "learning_rate": 7.932591021122533e-06, "loss": 0.3824, "step": 28748 }, { "epoch": 4.693032937431125, "grad_norm": 3.2389824390411377, "learning_rate": 7.931963735406355e-06, "loss": 0.3159, "step": 28749 }, { "epoch": 4.69319619607363, "grad_norm": 3.4023990631103516, "learning_rate": 7.931336458191092e-06, "loss": 0.3974, "step": 28750 }, { "epoch": 4.693359454716134, "grad_norm": 4.371380805969238, "learning_rate": 7.930709189479325e-06, "loss": 0.3887, "step": 28751 }, { "epoch": 4.693522713358639, "grad_norm": 3.257594108581543, "learning_rate": 7.930081929273631e-06, "loss": 0.3888, "step": 28752 }, { "epoch": 4.693685972001143, "grad_norm": 4.0099873542785645, "learning_rate": 7.92945467757659e-06, "loss": 0.3822, "step": 28753 }, { "epoch": 4.6938492306436475, "grad_norm": 2.9430627822875977, "learning_rate": 7.928827434390777e-06, "loss": 0.3311, "step": 28754 }, { "epoch": 4.694012489286152, "grad_norm": 4.182565689086914, "learning_rate": 7.928200199718775e-06, "loss": 0.4012, "step": 28755 }, { "epoch": 4.694175747928656, "grad_norm": 3.321922779083252, "learning_rate": 7.927572973563156e-06, "loss": 0.368, "step": 28756 }, { "epoch": 4.694339006571161, "grad_norm": 3.4821789264678955, "learning_rate": 7.926945755926504e-06, "loss": 0.3523, "step": 28757 }, { "epoch": 4.694502265213664, "grad_norm": 3.837012529373169, "learning_rate": 7.92631854681139e-06, "loss": 0.3885, "step": 28758 }, { "epoch": 4.694665523856169, "grad_norm": 3.3255650997161865, "learning_rate": 7.925691346220404e-06, "loss": 0.3547, "step": 28759 }, { "epoch": 4.694828782498673, "grad_norm": 3.6887435913085938, "learning_rate": 7.925064154156117e-06, "loss": 0.3713, "step": 28760 }, { "epoch": 4.694992041141178, "grad_norm": 3.641744375228882, "learning_rate": 7.924436970621105e-06, "loss": 0.3845, "step": 28761 }, { "epoch": 4.695155299783682, "grad_norm": 3.3137362003326416, "learning_rate": 7.92380979561795e-06, "loss": 0.4034, "step": 28762 }, { "epoch": 4.6953185584261865, "grad_norm": 3.2454721927642822, "learning_rate": 7.92318262914923e-06, "loss": 0.3564, "step": 28763 }, { "epoch": 4.695481817068691, "grad_norm": 2.715137481689453, "learning_rate": 7.922555471217523e-06, "loss": 0.3492, "step": 28764 }, { "epoch": 4.695645075711195, "grad_norm": 3.5797119140625, "learning_rate": 7.921928321825406e-06, "loss": 0.3806, "step": 28765 }, { "epoch": 4.6958083343537, "grad_norm": 3.723475217819214, "learning_rate": 7.921301180975453e-06, "loss": 0.356, "step": 28766 }, { "epoch": 4.695971592996204, "grad_norm": 3.2340569496154785, "learning_rate": 7.92067404867025e-06, "loss": 0.3443, "step": 28767 }, { "epoch": 4.696134851638709, "grad_norm": 3.297997236251831, "learning_rate": 7.920046924912368e-06, "loss": 0.3144, "step": 28768 }, { "epoch": 4.696298110281213, "grad_norm": 3.4069504737854004, "learning_rate": 7.91941980970439e-06, "loss": 0.3713, "step": 28769 }, { "epoch": 4.696461368923718, "grad_norm": 2.8936173915863037, "learning_rate": 7.918792703048887e-06, "loss": 0.3527, "step": 28770 }, { "epoch": 4.696624627566222, "grad_norm": 3.0612218379974365, "learning_rate": 7.918165604948445e-06, "loss": 0.3417, "step": 28771 }, { "epoch": 4.6967878862087264, "grad_norm": 3.3115100860595703, "learning_rate": 7.917538515405639e-06, "loss": 0.3668, "step": 28772 }, { "epoch": 4.696951144851231, "grad_norm": 3.9226670265197754, "learning_rate": 7.916911434423048e-06, "loss": 0.3506, "step": 28773 }, { "epoch": 4.697114403493735, "grad_norm": 3.841599941253662, "learning_rate": 7.916284362003242e-06, "loss": 0.3864, "step": 28774 }, { "epoch": 4.697277662136239, "grad_norm": 2.5651586055755615, "learning_rate": 7.915657298148806e-06, "loss": 0.3301, "step": 28775 }, { "epoch": 4.697440920778744, "grad_norm": 2.924178123474121, "learning_rate": 7.915030242862317e-06, "loss": 0.3152, "step": 28776 }, { "epoch": 4.697604179421248, "grad_norm": 4.0316572189331055, "learning_rate": 7.91440319614635e-06, "loss": 0.3946, "step": 28777 }, { "epoch": 4.697767438063752, "grad_norm": 4.255829334259033, "learning_rate": 7.913776158003486e-06, "loss": 0.4412, "step": 28778 }, { "epoch": 4.697930696706257, "grad_norm": 3.385603427886963, "learning_rate": 7.913149128436298e-06, "loss": 0.3339, "step": 28779 }, { "epoch": 4.698093955348761, "grad_norm": 3.316884994506836, "learning_rate": 7.912522107447367e-06, "loss": 0.345, "step": 28780 }, { "epoch": 4.6982572139912655, "grad_norm": 3.7552547454833984, "learning_rate": 7.911895095039268e-06, "loss": 0.4402, "step": 28781 }, { "epoch": 4.69842047263377, "grad_norm": 3.6210503578186035, "learning_rate": 7.911268091214582e-06, "loss": 0.3971, "step": 28782 }, { "epoch": 4.698583731276274, "grad_norm": 3.880422592163086, "learning_rate": 7.910641095975886e-06, "loss": 0.3593, "step": 28783 }, { "epoch": 4.698746989918779, "grad_norm": 3.4388864040374756, "learning_rate": 7.910014109325754e-06, "loss": 0.3423, "step": 28784 }, { "epoch": 4.698910248561283, "grad_norm": 2.5357024669647217, "learning_rate": 7.909387131266761e-06, "loss": 0.3153, "step": 28785 }, { "epoch": 4.699073507203788, "grad_norm": 3.4611408710479736, "learning_rate": 7.90876016180149e-06, "loss": 0.395, "step": 28786 }, { "epoch": 4.699236765846292, "grad_norm": 3.503413677215576, "learning_rate": 7.908133200932517e-06, "loss": 0.3311, "step": 28787 }, { "epoch": 4.699400024488797, "grad_norm": 3.280536413192749, "learning_rate": 7.90750624866242e-06, "loss": 0.3629, "step": 28788 }, { "epoch": 4.699563283131301, "grad_norm": 3.9524765014648438, "learning_rate": 7.906879304993774e-06, "loss": 0.3811, "step": 28789 }, { "epoch": 4.699726541773805, "grad_norm": 3.930675745010376, "learning_rate": 7.906252369929156e-06, "loss": 0.4032, "step": 28790 }, { "epoch": 4.69988980041631, "grad_norm": 2.6400208473205566, "learning_rate": 7.905625443471143e-06, "loss": 0.338, "step": 28791 }, { "epoch": 4.700053059058813, "grad_norm": 2.9244980812072754, "learning_rate": 7.904998525622315e-06, "loss": 0.3136, "step": 28792 }, { "epoch": 4.700216317701319, "grad_norm": 2.945852279663086, "learning_rate": 7.904371616385248e-06, "loss": 0.3273, "step": 28793 }, { "epoch": 4.700379576343822, "grad_norm": 3.5098578929901123, "learning_rate": 7.903744715762516e-06, "loss": 0.3832, "step": 28794 }, { "epoch": 4.700542834986327, "grad_norm": 3.6774868965148926, "learning_rate": 7.9031178237567e-06, "loss": 0.4172, "step": 28795 }, { "epoch": 4.700706093628831, "grad_norm": 3.9147183895111084, "learning_rate": 7.902490940370371e-06, "loss": 0.4078, "step": 28796 }, { "epoch": 4.700869352271336, "grad_norm": 4.143857479095459, "learning_rate": 7.90186406560611e-06, "loss": 0.3835, "step": 28797 }, { "epoch": 4.70103261091384, "grad_norm": 3.561241865158081, "learning_rate": 7.901237199466496e-06, "loss": 0.3379, "step": 28798 }, { "epoch": 4.7011958695563445, "grad_norm": 3.944700002670288, "learning_rate": 7.900610341954102e-06, "loss": 0.4002, "step": 28799 }, { "epoch": 4.701359128198849, "grad_norm": 3.602585554122925, "learning_rate": 7.899983493071506e-06, "loss": 0.3817, "step": 28800 }, { "epoch": 4.701522386841353, "grad_norm": 3.5608205795288086, "learning_rate": 7.899356652821287e-06, "loss": 0.4154, "step": 28801 }, { "epoch": 4.701685645483858, "grad_norm": 3.0021862983703613, "learning_rate": 7.898729821206019e-06, "loss": 0.333, "step": 28802 }, { "epoch": 4.701848904126362, "grad_norm": 3.277535915374756, "learning_rate": 7.89810299822828e-06, "loss": 0.3983, "step": 28803 }, { "epoch": 4.702012162768867, "grad_norm": 3.868725061416626, "learning_rate": 7.897476183890644e-06, "loss": 0.4009, "step": 28804 }, { "epoch": 4.702175421411371, "grad_norm": 4.433058738708496, "learning_rate": 7.89684937819569e-06, "loss": 0.3858, "step": 28805 }, { "epoch": 4.702338680053876, "grad_norm": 3.730592966079712, "learning_rate": 7.896222581145994e-06, "loss": 0.3584, "step": 28806 }, { "epoch": 4.70250193869638, "grad_norm": 3.437819480895996, "learning_rate": 7.895595792744133e-06, "loss": 0.3735, "step": 28807 }, { "epoch": 4.702665197338884, "grad_norm": 3.7448906898498535, "learning_rate": 7.89496901299268e-06, "loss": 0.4158, "step": 28808 }, { "epoch": 4.702828455981389, "grad_norm": 2.981318712234497, "learning_rate": 7.894342241894219e-06, "loss": 0.3187, "step": 28809 }, { "epoch": 4.702991714623893, "grad_norm": 3.720388889312744, "learning_rate": 7.893715479451319e-06, "loss": 0.389, "step": 28810 }, { "epoch": 4.703154973266397, "grad_norm": 3.8811135292053223, "learning_rate": 7.893088725666561e-06, "loss": 0.3434, "step": 28811 }, { "epoch": 4.703318231908901, "grad_norm": 3.7952380180358887, "learning_rate": 7.89246198054252e-06, "loss": 0.3975, "step": 28812 }, { "epoch": 4.703481490551406, "grad_norm": 3.3755617141723633, "learning_rate": 7.891835244081773e-06, "loss": 0.3627, "step": 28813 }, { "epoch": 4.70364474919391, "grad_norm": 3.1400113105773926, "learning_rate": 7.891208516286893e-06, "loss": 0.3515, "step": 28814 }, { "epoch": 4.703808007836415, "grad_norm": 3.0685696601867676, "learning_rate": 7.890581797160459e-06, "loss": 0.3735, "step": 28815 }, { "epoch": 4.703971266478919, "grad_norm": 3.623603343963623, "learning_rate": 7.889955086705048e-06, "loss": 0.391, "step": 28816 }, { "epoch": 4.7041345251214235, "grad_norm": 3.9199185371398926, "learning_rate": 7.889328384923234e-06, "loss": 0.4671, "step": 28817 }, { "epoch": 4.704297783763928, "grad_norm": 3.4703805446624756, "learning_rate": 7.888701691817592e-06, "loss": 0.364, "step": 28818 }, { "epoch": 4.704461042406432, "grad_norm": 4.089195728302002, "learning_rate": 7.888075007390703e-06, "loss": 0.3777, "step": 28819 }, { "epoch": 4.704624301048937, "grad_norm": 2.9405806064605713, "learning_rate": 7.887448331645139e-06, "loss": 0.3078, "step": 28820 }, { "epoch": 4.704787559691441, "grad_norm": 3.9377329349517822, "learning_rate": 7.886821664583475e-06, "loss": 0.4002, "step": 28821 }, { "epoch": 4.704950818333946, "grad_norm": 3.276742935180664, "learning_rate": 7.886195006208296e-06, "loss": 0.3535, "step": 28822 }, { "epoch": 4.70511407697645, "grad_norm": 3.1246395111083984, "learning_rate": 7.885568356522168e-06, "loss": 0.3414, "step": 28823 }, { "epoch": 4.705277335618955, "grad_norm": 3.508546829223633, "learning_rate": 7.884941715527665e-06, "loss": 0.3148, "step": 28824 }, { "epoch": 4.705440594261459, "grad_norm": 3.7905807495117188, "learning_rate": 7.884315083227373e-06, "loss": 0.4637, "step": 28825 }, { "epoch": 4.705603852903963, "grad_norm": 3.507097005844116, "learning_rate": 7.883688459623861e-06, "loss": 0.4174, "step": 28826 }, { "epoch": 4.705767111546468, "grad_norm": 4.124783515930176, "learning_rate": 7.883061844719708e-06, "loss": 0.3816, "step": 28827 }, { "epoch": 4.705930370188971, "grad_norm": 4.1632843017578125, "learning_rate": 7.882435238517487e-06, "loss": 0.3593, "step": 28828 }, { "epoch": 4.706093628831477, "grad_norm": 3.3093671798706055, "learning_rate": 7.881808641019776e-06, "loss": 0.3905, "step": 28829 }, { "epoch": 4.70625688747398, "grad_norm": 3.6876068115234375, "learning_rate": 7.88118205222915e-06, "loss": 0.4095, "step": 28830 }, { "epoch": 4.706420146116485, "grad_norm": 2.6933650970458984, "learning_rate": 7.880555472148181e-06, "loss": 0.3165, "step": 28831 }, { "epoch": 4.706583404758989, "grad_norm": 4.294820308685303, "learning_rate": 7.879928900779457e-06, "loss": 0.3503, "step": 28832 }, { "epoch": 4.706746663401494, "grad_norm": 3.8748250007629395, "learning_rate": 7.879302338125539e-06, "loss": 0.3417, "step": 28833 }, { "epoch": 4.706909922043998, "grad_norm": 3.5064425468444824, "learning_rate": 7.878675784189008e-06, "loss": 0.3564, "step": 28834 }, { "epoch": 4.7070731806865025, "grad_norm": 3.906698226928711, "learning_rate": 7.878049238972439e-06, "loss": 0.377, "step": 28835 }, { "epoch": 4.707236439329007, "grad_norm": 3.6147570610046387, "learning_rate": 7.877422702478408e-06, "loss": 0.3927, "step": 28836 }, { "epoch": 4.707399697971511, "grad_norm": 3.211777925491333, "learning_rate": 7.876796174709493e-06, "loss": 0.367, "step": 28837 }, { "epoch": 4.707562956614016, "grad_norm": 3.2126238346099854, "learning_rate": 7.876169655668268e-06, "loss": 0.3352, "step": 28838 }, { "epoch": 4.70772621525652, "grad_norm": 3.7076773643493652, "learning_rate": 7.875543145357307e-06, "loss": 0.4162, "step": 28839 }, { "epoch": 4.707889473899025, "grad_norm": 3.3101558685302734, "learning_rate": 7.874916643779184e-06, "loss": 0.4138, "step": 28840 }, { "epoch": 4.708052732541529, "grad_norm": 4.5132832527160645, "learning_rate": 7.874290150936478e-06, "loss": 0.3497, "step": 28841 }, { "epoch": 4.7082159911840336, "grad_norm": 4.03592586517334, "learning_rate": 7.873663666831764e-06, "loss": 0.4048, "step": 28842 }, { "epoch": 4.708379249826538, "grad_norm": 2.9355106353759766, "learning_rate": 7.873037191467614e-06, "loss": 0.3411, "step": 28843 }, { "epoch": 4.708542508469042, "grad_norm": 2.961447238922119, "learning_rate": 7.872410724846605e-06, "loss": 0.3494, "step": 28844 }, { "epoch": 4.708705767111546, "grad_norm": 3.323532819747925, "learning_rate": 7.871784266971311e-06, "loss": 0.3108, "step": 28845 }, { "epoch": 4.708869025754051, "grad_norm": 3.8003652095794678, "learning_rate": 7.87115781784431e-06, "loss": 0.3739, "step": 28846 }, { "epoch": 4.709032284396555, "grad_norm": 3.3623557090759277, "learning_rate": 7.870531377468173e-06, "loss": 0.3666, "step": 28847 }, { "epoch": 4.709195543039059, "grad_norm": 3.0145483016967773, "learning_rate": 7.869904945845478e-06, "loss": 0.3369, "step": 28848 }, { "epoch": 4.709358801681564, "grad_norm": 2.9921445846557617, "learning_rate": 7.8692785229788e-06, "loss": 0.3662, "step": 28849 }, { "epoch": 4.709522060324068, "grad_norm": 3.4712510108947754, "learning_rate": 7.868652108870715e-06, "loss": 0.3522, "step": 28850 }, { "epoch": 4.709685318966573, "grad_norm": 3.3300395011901855, "learning_rate": 7.868025703523795e-06, "loss": 0.3614, "step": 28851 }, { "epoch": 4.709848577609077, "grad_norm": 3.2826082706451416, "learning_rate": 7.867399306940619e-06, "loss": 0.2929, "step": 28852 }, { "epoch": 4.7100118362515815, "grad_norm": 4.1325602531433105, "learning_rate": 7.866772919123756e-06, "loss": 0.4562, "step": 28853 }, { "epoch": 4.710175094894086, "grad_norm": 3.775886058807373, "learning_rate": 7.866146540075785e-06, "loss": 0.387, "step": 28854 }, { "epoch": 4.71033835353659, "grad_norm": 4.718801498413086, "learning_rate": 7.86552016979928e-06, "loss": 0.4106, "step": 28855 }, { "epoch": 4.710501612179095, "grad_norm": 3.168300151824951, "learning_rate": 7.864893808296815e-06, "loss": 0.3367, "step": 28856 }, { "epoch": 4.710664870821599, "grad_norm": 3.6585657596588135, "learning_rate": 7.864267455570967e-06, "loss": 0.3985, "step": 28857 }, { "epoch": 4.710828129464104, "grad_norm": 4.404420375823975, "learning_rate": 7.863641111624305e-06, "loss": 0.4142, "step": 28858 }, { "epoch": 4.710991388106608, "grad_norm": 2.7201945781707764, "learning_rate": 7.86301477645941e-06, "loss": 0.2943, "step": 28859 }, { "epoch": 4.7111546467491126, "grad_norm": 4.84165096282959, "learning_rate": 7.862388450078854e-06, "loss": 0.4148, "step": 28860 }, { "epoch": 4.711317905391617, "grad_norm": 3.954411506652832, "learning_rate": 7.861762132485217e-06, "loss": 0.3519, "step": 28861 }, { "epoch": 4.711481164034121, "grad_norm": 3.049785852432251, "learning_rate": 7.861135823681063e-06, "loss": 0.3408, "step": 28862 }, { "epoch": 4.711644422676626, "grad_norm": 4.611417293548584, "learning_rate": 7.860509523668973e-06, "loss": 0.388, "step": 28863 }, { "epoch": 4.711807681319129, "grad_norm": 4.964419364929199, "learning_rate": 7.859883232451522e-06, "loss": 0.4111, "step": 28864 }, { "epoch": 4.711970939961634, "grad_norm": 2.874349355697632, "learning_rate": 7.85925695003128e-06, "loss": 0.3111, "step": 28865 }, { "epoch": 4.712134198604138, "grad_norm": 3.555065631866455, "learning_rate": 7.858630676410827e-06, "loss": 0.3005, "step": 28866 }, { "epoch": 4.712297457246643, "grad_norm": 3.259594440460205, "learning_rate": 7.858004411592734e-06, "loss": 0.3286, "step": 28867 }, { "epoch": 4.712460715889147, "grad_norm": 3.143498659133911, "learning_rate": 7.857378155579576e-06, "loss": 0.3236, "step": 28868 }, { "epoch": 4.712623974531652, "grad_norm": 2.8800201416015625, "learning_rate": 7.856751908373925e-06, "loss": 0.3371, "step": 28869 }, { "epoch": 4.712787233174156, "grad_norm": 4.350010871887207, "learning_rate": 7.856125669978358e-06, "loss": 0.3953, "step": 28870 }, { "epoch": 4.7129504918166605, "grad_norm": 4.245281219482422, "learning_rate": 7.855499440395455e-06, "loss": 0.3735, "step": 28871 }, { "epoch": 4.713113750459165, "grad_norm": 3.805182695388794, "learning_rate": 7.85487321962778e-06, "loss": 0.4305, "step": 28872 }, { "epoch": 4.713277009101669, "grad_norm": 3.903630018234253, "learning_rate": 7.85424700767791e-06, "loss": 0.4282, "step": 28873 }, { "epoch": 4.713440267744174, "grad_norm": 3.988668203353882, "learning_rate": 7.853620804548421e-06, "loss": 0.412, "step": 28874 }, { "epoch": 4.713603526386678, "grad_norm": 3.7166244983673096, "learning_rate": 7.852994610241886e-06, "loss": 0.4551, "step": 28875 }, { "epoch": 4.713766785029183, "grad_norm": 3.8024516105651855, "learning_rate": 7.85236842476088e-06, "loss": 0.4151, "step": 28876 }, { "epoch": 4.713930043671687, "grad_norm": 3.7781810760498047, "learning_rate": 7.851742248107976e-06, "loss": 0.3964, "step": 28877 }, { "epoch": 4.7140933023141915, "grad_norm": 3.364793062210083, "learning_rate": 7.85111608028575e-06, "loss": 0.3986, "step": 28878 }, { "epoch": 4.714256560956696, "grad_norm": 3.46614408493042, "learning_rate": 7.850489921296773e-06, "loss": 0.3479, "step": 28879 }, { "epoch": 4.7144198195992, "grad_norm": 3.867568254470825, "learning_rate": 7.849863771143621e-06, "loss": 0.3962, "step": 28880 }, { "epoch": 4.714583078241704, "grad_norm": 4.5086283683776855, "learning_rate": 7.84923762982887e-06, "loss": 0.4187, "step": 28881 }, { "epoch": 4.714746336884209, "grad_norm": 3.0539917945861816, "learning_rate": 7.848611497355088e-06, "loss": 0.3487, "step": 28882 }, { "epoch": 4.714909595526713, "grad_norm": 4.123540878295898, "learning_rate": 7.84798537372485e-06, "loss": 0.3978, "step": 28883 }, { "epoch": 4.715072854169217, "grad_norm": 3.6472551822662354, "learning_rate": 7.847359258940735e-06, "loss": 0.3835, "step": 28884 }, { "epoch": 4.715236112811722, "grad_norm": 3.1298680305480957, "learning_rate": 7.846733153005308e-06, "loss": 0.3751, "step": 28885 }, { "epoch": 4.715399371454226, "grad_norm": 4.543319225311279, "learning_rate": 7.846107055921151e-06, "loss": 0.4437, "step": 28886 }, { "epoch": 4.715562630096731, "grad_norm": 3.8654849529266357, "learning_rate": 7.845480967690836e-06, "loss": 0.4334, "step": 28887 }, { "epoch": 4.715725888739235, "grad_norm": 3.413503885269165, "learning_rate": 7.844854888316933e-06, "loss": 0.3787, "step": 28888 }, { "epoch": 4.7158891473817395, "grad_norm": 3.7689802646636963, "learning_rate": 7.84422881780202e-06, "loss": 0.354, "step": 28889 }, { "epoch": 4.716052406024244, "grad_norm": 4.01983118057251, "learning_rate": 7.843602756148668e-06, "loss": 0.4501, "step": 28890 }, { "epoch": 4.716215664666748, "grad_norm": 3.7190709114074707, "learning_rate": 7.842976703359451e-06, "loss": 0.407, "step": 28891 }, { "epoch": 4.716378923309253, "grad_norm": 3.1848483085632324, "learning_rate": 7.842350659436941e-06, "loss": 0.3722, "step": 28892 }, { "epoch": 4.716542181951757, "grad_norm": 4.013704299926758, "learning_rate": 7.841724624383714e-06, "loss": 0.5338, "step": 28893 }, { "epoch": 4.716705440594262, "grad_norm": 3.638913154602051, "learning_rate": 7.841098598202341e-06, "loss": 0.3366, "step": 28894 }, { "epoch": 4.716868699236766, "grad_norm": 3.478562116622925, "learning_rate": 7.840472580895396e-06, "loss": 0.364, "step": 28895 }, { "epoch": 4.7170319578792705, "grad_norm": 4.050448894500732, "learning_rate": 7.839846572465452e-06, "loss": 0.3825, "step": 28896 }, { "epoch": 4.717195216521775, "grad_norm": 3.4167768955230713, "learning_rate": 7.839220572915085e-06, "loss": 0.3585, "step": 28897 }, { "epoch": 4.7173584751642785, "grad_norm": 3.7939066886901855, "learning_rate": 7.838594582246865e-06, "loss": 0.3932, "step": 28898 }, { "epoch": 4.717521733806784, "grad_norm": 2.9423484802246094, "learning_rate": 7.837968600463368e-06, "loss": 0.3253, "step": 28899 }, { "epoch": 4.717684992449287, "grad_norm": 3.689981698989868, "learning_rate": 7.837342627567167e-06, "loss": 0.3903, "step": 28900 }, { "epoch": 4.717848251091792, "grad_norm": 4.4291605949401855, "learning_rate": 7.836716663560833e-06, "loss": 0.4521, "step": 28901 }, { "epoch": 4.718011509734296, "grad_norm": 3.7433793544769287, "learning_rate": 7.836090708446938e-06, "loss": 0.4528, "step": 28902 }, { "epoch": 4.718174768376801, "grad_norm": 3.1435468196868896, "learning_rate": 7.835464762228059e-06, "loss": 0.3928, "step": 28903 }, { "epoch": 4.718338027019305, "grad_norm": 3.9054911136627197, "learning_rate": 7.834838824906765e-06, "loss": 0.3853, "step": 28904 }, { "epoch": 4.71850128566181, "grad_norm": 4.150392055511475, "learning_rate": 7.834212896485634e-06, "loss": 0.4261, "step": 28905 }, { "epoch": 4.718664544304314, "grad_norm": 3.5676028728485107, "learning_rate": 7.833586976967233e-06, "loss": 0.3986, "step": 28906 }, { "epoch": 4.7188278029468185, "grad_norm": 2.7582311630249023, "learning_rate": 7.83296106635414e-06, "loss": 0.2697, "step": 28907 }, { "epoch": 4.718991061589323, "grad_norm": 4.109766483306885, "learning_rate": 7.832335164648921e-06, "loss": 0.4091, "step": 28908 }, { "epoch": 4.719154320231827, "grad_norm": 3.8825490474700928, "learning_rate": 7.831709271854158e-06, "loss": 0.4278, "step": 28909 }, { "epoch": 4.719317578874332, "grad_norm": 3.536900520324707, "learning_rate": 7.831083387972424e-06, "loss": 0.3738, "step": 28910 }, { "epoch": 4.719480837516836, "grad_norm": 3.4755446910858154, "learning_rate": 7.830457513006282e-06, "loss": 0.4036, "step": 28911 }, { "epoch": 4.719644096159341, "grad_norm": 4.102764129638672, "learning_rate": 7.829831646958307e-06, "loss": 0.3847, "step": 28912 }, { "epoch": 4.719807354801845, "grad_norm": 2.9791691303253174, "learning_rate": 7.829205789831077e-06, "loss": 0.3682, "step": 28913 }, { "epoch": 4.7199706134443495, "grad_norm": 3.580582618713379, "learning_rate": 7.828579941627163e-06, "loss": 0.3771, "step": 28914 }, { "epoch": 4.720133872086854, "grad_norm": 3.72065806388855, "learning_rate": 7.827954102349139e-06, "loss": 0.4082, "step": 28915 }, { "epoch": 4.720297130729358, "grad_norm": 3.6440980434417725, "learning_rate": 7.827328271999573e-06, "loss": 0.3231, "step": 28916 }, { "epoch": 4.720460389371862, "grad_norm": 3.4854772090911865, "learning_rate": 7.826702450581041e-06, "loss": 0.3821, "step": 28917 }, { "epoch": 4.720623648014366, "grad_norm": 2.909019947052002, "learning_rate": 7.826076638096113e-06, "loss": 0.3579, "step": 28918 }, { "epoch": 4.720786906656871, "grad_norm": 3.737549066543579, "learning_rate": 7.825450834547363e-06, "loss": 0.4689, "step": 28919 }, { "epoch": 4.720950165299375, "grad_norm": 3.9045565128326416, "learning_rate": 7.82482503993737e-06, "loss": 0.4419, "step": 28920 }, { "epoch": 4.72111342394188, "grad_norm": 3.2015137672424316, "learning_rate": 7.824199254268694e-06, "loss": 0.3339, "step": 28921 }, { "epoch": 4.721276682584384, "grad_norm": 4.444050312042236, "learning_rate": 7.823573477543913e-06, "loss": 0.42, "step": 28922 }, { "epoch": 4.721439941226889, "grad_norm": 3.383023738861084, "learning_rate": 7.8229477097656e-06, "loss": 0.4148, "step": 28923 }, { "epoch": 4.721603199869393, "grad_norm": 3.516841411590576, "learning_rate": 7.822321950936326e-06, "loss": 0.3792, "step": 28924 }, { "epoch": 4.7217664585118975, "grad_norm": 3.597245693206787, "learning_rate": 7.821696201058666e-06, "loss": 0.4031, "step": 28925 }, { "epoch": 4.721929717154402, "grad_norm": 3.117521047592163, "learning_rate": 7.82107046013519e-06, "loss": 0.3732, "step": 28926 }, { "epoch": 4.722092975796906, "grad_norm": 4.150178909301758, "learning_rate": 7.82044472816847e-06, "loss": 0.3913, "step": 28927 }, { "epoch": 4.722256234439411, "grad_norm": 3.2933223247528076, "learning_rate": 7.819819005161081e-06, "loss": 0.3819, "step": 28928 }, { "epoch": 4.722419493081915, "grad_norm": 3.5778772830963135, "learning_rate": 7.819193291115592e-06, "loss": 0.3651, "step": 28929 }, { "epoch": 4.72258275172442, "grad_norm": 2.562945604324341, "learning_rate": 7.818567586034578e-06, "loss": 0.3356, "step": 28930 }, { "epoch": 4.722746010366924, "grad_norm": 3.1583800315856934, "learning_rate": 7.817941889920604e-06, "loss": 0.3688, "step": 28931 }, { "epoch": 4.7229092690094285, "grad_norm": 3.091585159301758, "learning_rate": 7.817316202776251e-06, "loss": 0.3853, "step": 28932 }, { "epoch": 4.723072527651933, "grad_norm": 3.8497700691223145, "learning_rate": 7.816690524604084e-06, "loss": 0.3902, "step": 28933 }, { "epoch": 4.7232357862944365, "grad_norm": 4.025117874145508, "learning_rate": 7.81606485540668e-06, "loss": 0.4177, "step": 28934 }, { "epoch": 4.723399044936942, "grad_norm": 4.21645975112915, "learning_rate": 7.815439195186604e-06, "loss": 0.4489, "step": 28935 }, { "epoch": 4.723562303579445, "grad_norm": 3.957876443862915, "learning_rate": 7.814813543946438e-06, "loss": 0.4314, "step": 28936 }, { "epoch": 4.72372556222195, "grad_norm": 3.3745760917663574, "learning_rate": 7.814187901688746e-06, "loss": 0.3924, "step": 28937 }, { "epoch": 4.723888820864454, "grad_norm": 3.8760461807250977, "learning_rate": 7.813562268416103e-06, "loss": 0.4048, "step": 28938 }, { "epoch": 4.724052079506959, "grad_norm": 3.8969249725341797, "learning_rate": 7.81293664413108e-06, "loss": 0.4736, "step": 28939 }, { "epoch": 4.724215338149463, "grad_norm": 3.996006727218628, "learning_rate": 7.81231102883625e-06, "loss": 0.4307, "step": 28940 }, { "epoch": 4.724378596791968, "grad_norm": 3.077242612838745, "learning_rate": 7.811685422534184e-06, "loss": 0.3087, "step": 28941 }, { "epoch": 4.724541855434472, "grad_norm": 3.8356401920318604, "learning_rate": 7.81105982522745e-06, "loss": 0.401, "step": 28942 }, { "epoch": 4.7247051140769765, "grad_norm": 3.1271605491638184, "learning_rate": 7.810434236918622e-06, "loss": 0.3041, "step": 28943 }, { "epoch": 4.724868372719481, "grad_norm": 3.7778239250183105, "learning_rate": 7.809808657610274e-06, "loss": 0.4545, "step": 28944 }, { "epoch": 4.725031631361985, "grad_norm": 3.122750759124756, "learning_rate": 7.809183087304975e-06, "loss": 0.3385, "step": 28945 }, { "epoch": 4.72519489000449, "grad_norm": 5.327070236206055, "learning_rate": 7.808557526005294e-06, "loss": 0.4922, "step": 28946 }, { "epoch": 4.725358148646994, "grad_norm": 2.496814012527466, "learning_rate": 7.807931973713809e-06, "loss": 0.3093, "step": 28947 }, { "epoch": 4.725521407289499, "grad_norm": 3.7370595932006836, "learning_rate": 7.807306430433088e-06, "loss": 0.3536, "step": 28948 }, { "epoch": 4.725684665932003, "grad_norm": 3.338374614715576, "learning_rate": 7.806680896165706e-06, "loss": 0.3984, "step": 28949 }, { "epoch": 4.7258479245745075, "grad_norm": 3.685634136199951, "learning_rate": 7.806055370914223e-06, "loss": 0.4046, "step": 28950 }, { "epoch": 4.726011183217011, "grad_norm": 3.6116838455200195, "learning_rate": 7.805429854681222e-06, "loss": 0.4026, "step": 28951 }, { "epoch": 4.726174441859516, "grad_norm": 3.0137529373168945, "learning_rate": 7.80480434746927e-06, "loss": 0.3304, "step": 28952 }, { "epoch": 4.72633770050202, "grad_norm": 3.6448495388031006, "learning_rate": 7.80417884928094e-06, "loss": 0.3753, "step": 28953 }, { "epoch": 4.726500959144524, "grad_norm": 3.639634847640991, "learning_rate": 7.8035533601188e-06, "loss": 0.3634, "step": 28954 }, { "epoch": 4.726664217787029, "grad_norm": 3.3009753227233887, "learning_rate": 7.80292787998542e-06, "loss": 0.3143, "step": 28955 }, { "epoch": 4.726827476429533, "grad_norm": 3.9590420722961426, "learning_rate": 7.802302408883377e-06, "loss": 0.4303, "step": 28956 }, { "epoch": 4.726990735072038, "grad_norm": 3.5048160552978516, "learning_rate": 7.801676946815237e-06, "loss": 0.3442, "step": 28957 }, { "epoch": 4.727153993714542, "grad_norm": 3.871892213821411, "learning_rate": 7.801051493783574e-06, "loss": 0.4115, "step": 28958 }, { "epoch": 4.727317252357047, "grad_norm": 3.24552583694458, "learning_rate": 7.800426049790962e-06, "loss": 0.3621, "step": 28959 }, { "epoch": 4.727480510999551, "grad_norm": 4.346569538116455, "learning_rate": 7.799800614839964e-06, "loss": 0.3675, "step": 28960 }, { "epoch": 4.7276437696420555, "grad_norm": 3.9064340591430664, "learning_rate": 7.799175188933155e-06, "loss": 0.4091, "step": 28961 }, { "epoch": 4.72780702828456, "grad_norm": 3.7780251502990723, "learning_rate": 7.798549772073104e-06, "loss": 0.3748, "step": 28962 }, { "epoch": 4.727970286927064, "grad_norm": 3.6650736331939697, "learning_rate": 7.797924364262386e-06, "loss": 0.3769, "step": 28963 }, { "epoch": 4.728133545569569, "grad_norm": 3.4119558334350586, "learning_rate": 7.79729896550357e-06, "loss": 0.3914, "step": 28964 }, { "epoch": 4.728296804212073, "grad_norm": 3.7457640171051025, "learning_rate": 7.796673575799224e-06, "loss": 0.3711, "step": 28965 }, { "epoch": 4.728460062854578, "grad_norm": 3.1773149967193604, "learning_rate": 7.796048195151921e-06, "loss": 0.3536, "step": 28966 }, { "epoch": 4.728623321497082, "grad_norm": 3.3587920665740967, "learning_rate": 7.795422823564234e-06, "loss": 0.3703, "step": 28967 }, { "epoch": 4.7287865801395865, "grad_norm": 3.3252649307250977, "learning_rate": 7.79479746103873e-06, "loss": 0.3426, "step": 28968 }, { "epoch": 4.728949838782091, "grad_norm": 3.598259687423706, "learning_rate": 7.794172107577984e-06, "loss": 0.3183, "step": 28969 }, { "epoch": 4.7291130974245945, "grad_norm": 3.4034228324890137, "learning_rate": 7.793546763184561e-06, "loss": 0.3802, "step": 28970 }, { "epoch": 4.729276356067099, "grad_norm": 3.0672340393066406, "learning_rate": 7.792921427861033e-06, "loss": 0.3418, "step": 28971 }, { "epoch": 4.729439614709603, "grad_norm": 4.010647773742676, "learning_rate": 7.792296101609973e-06, "loss": 0.3807, "step": 28972 }, { "epoch": 4.729602873352108, "grad_norm": 4.083010673522949, "learning_rate": 7.791670784433948e-06, "loss": 0.4267, "step": 28973 }, { "epoch": 4.729766131994612, "grad_norm": 4.823437690734863, "learning_rate": 7.79104547633553e-06, "loss": 0.4625, "step": 28974 }, { "epoch": 4.729929390637117, "grad_norm": 3.1352593898773193, "learning_rate": 7.790420177317293e-06, "loss": 0.3644, "step": 28975 }, { "epoch": 4.730092649279621, "grad_norm": 3.816018581390381, "learning_rate": 7.789794887381804e-06, "loss": 0.4114, "step": 28976 }, { "epoch": 4.730255907922126, "grad_norm": 3.7653648853302, "learning_rate": 7.789169606531633e-06, "loss": 0.3854, "step": 28977 }, { "epoch": 4.73041916656463, "grad_norm": 3.8892509937286377, "learning_rate": 7.788544334769351e-06, "loss": 0.4305, "step": 28978 }, { "epoch": 4.7305824252071345, "grad_norm": 3.1495938301086426, "learning_rate": 7.787919072097531e-06, "loss": 0.3347, "step": 28979 }, { "epoch": 4.730745683849639, "grad_norm": 3.721132755279541, "learning_rate": 7.787293818518737e-06, "loss": 0.3692, "step": 28980 }, { "epoch": 4.730908942492143, "grad_norm": 3.7630646228790283, "learning_rate": 7.786668574035545e-06, "loss": 0.4062, "step": 28981 }, { "epoch": 4.731072201134648, "grad_norm": 3.4251317977905273, "learning_rate": 7.78604333865052e-06, "loss": 0.4031, "step": 28982 }, { "epoch": 4.731235459777152, "grad_norm": 3.981781482696533, "learning_rate": 7.785418112366236e-06, "loss": 0.368, "step": 28983 }, { "epoch": 4.731398718419657, "grad_norm": 3.0975117683410645, "learning_rate": 7.784792895185263e-06, "loss": 0.3688, "step": 28984 }, { "epoch": 4.731561977062161, "grad_norm": 3.2188422679901123, "learning_rate": 7.784167687110166e-06, "loss": 0.3581, "step": 28985 }, { "epoch": 4.7317252357046655, "grad_norm": 4.0032172203063965, "learning_rate": 7.783542488143521e-06, "loss": 0.4405, "step": 28986 }, { "epoch": 4.731888494347169, "grad_norm": 4.13517951965332, "learning_rate": 7.782917298287898e-06, "loss": 0.4768, "step": 28987 }, { "epoch": 4.7320517529896735, "grad_norm": 3.560387134552002, "learning_rate": 7.782292117545862e-06, "loss": 0.4261, "step": 28988 }, { "epoch": 4.732215011632178, "grad_norm": 3.035665512084961, "learning_rate": 7.78166694591999e-06, "loss": 0.3328, "step": 28989 }, { "epoch": 4.732378270274682, "grad_norm": 3.8296995162963867, "learning_rate": 7.781041783412844e-06, "loss": 0.4444, "step": 28990 }, { "epoch": 4.732541528917187, "grad_norm": 2.385930299758911, "learning_rate": 7.780416630026999e-06, "loss": 0.2716, "step": 28991 }, { "epoch": 4.732704787559691, "grad_norm": 3.6013190746307373, "learning_rate": 7.779791485765023e-06, "loss": 0.3746, "step": 28992 }, { "epoch": 4.732868046202196, "grad_norm": 3.2405154705047607, "learning_rate": 7.779166350629485e-06, "loss": 0.3685, "step": 28993 }, { "epoch": 4.7330313048447, "grad_norm": 2.7707197666168213, "learning_rate": 7.778541224622955e-06, "loss": 0.3101, "step": 28994 }, { "epoch": 4.733194563487205, "grad_norm": 3.1828835010528564, "learning_rate": 7.777916107748004e-06, "loss": 0.3392, "step": 28995 }, { "epoch": 4.733357822129709, "grad_norm": 3.4977641105651855, "learning_rate": 7.7772910000072e-06, "loss": 0.3496, "step": 28996 }, { "epoch": 4.7335210807722135, "grad_norm": 3.5788469314575195, "learning_rate": 7.776665901403113e-06, "loss": 0.4301, "step": 28997 }, { "epoch": 4.733684339414718, "grad_norm": 3.6569864749908447, "learning_rate": 7.776040811938318e-06, "loss": 0.4055, "step": 28998 }, { "epoch": 4.733847598057222, "grad_norm": 3.8753838539123535, "learning_rate": 7.775415731615375e-06, "loss": 0.3968, "step": 28999 }, { "epoch": 4.734010856699727, "grad_norm": 3.8796122074127197, "learning_rate": 7.774790660436857e-06, "loss": 0.405, "step": 29000 }, { "epoch": 4.734174115342231, "grad_norm": 3.9938817024230957, "learning_rate": 7.774165598405337e-06, "loss": 0.4229, "step": 29001 }, { "epoch": 4.734337373984736, "grad_norm": 2.9650425910949707, "learning_rate": 7.77354054552338e-06, "loss": 0.3528, "step": 29002 }, { "epoch": 4.73450063262724, "grad_norm": 3.3145947456359863, "learning_rate": 7.772915501793557e-06, "loss": 0.4039, "step": 29003 }, { "epoch": 4.734663891269744, "grad_norm": 3.3539085388183594, "learning_rate": 7.77229046721844e-06, "loss": 0.3377, "step": 29004 }, { "epoch": 4.734827149912249, "grad_norm": 4.1757402420043945, "learning_rate": 7.771665441800593e-06, "loss": 0.3939, "step": 29005 }, { "epoch": 4.7349904085547525, "grad_norm": 3.018249750137329, "learning_rate": 7.771040425542588e-06, "loss": 0.3255, "step": 29006 }, { "epoch": 4.735153667197257, "grad_norm": 3.8785388469696045, "learning_rate": 7.770415418446994e-06, "loss": 0.3869, "step": 29007 }, { "epoch": 4.735316925839761, "grad_norm": 3.805368661880493, "learning_rate": 7.769790420516385e-06, "loss": 0.4357, "step": 29008 }, { "epoch": 4.735480184482266, "grad_norm": 3.2890539169311523, "learning_rate": 7.769165431753323e-06, "loss": 0.3387, "step": 29009 }, { "epoch": 4.73564344312477, "grad_norm": 3.6273045539855957, "learning_rate": 7.768540452160378e-06, "loss": 0.4224, "step": 29010 }, { "epoch": 4.735806701767275, "grad_norm": 3.368917942047119, "learning_rate": 7.76791548174012e-06, "loss": 0.3348, "step": 29011 }, { "epoch": 4.735969960409779, "grad_norm": 2.94062876701355, "learning_rate": 7.76729052049512e-06, "loss": 0.3818, "step": 29012 }, { "epoch": 4.736133219052284, "grad_norm": 3.7412266731262207, "learning_rate": 7.766665568427943e-06, "loss": 0.3737, "step": 29013 }, { "epoch": 4.736296477694788, "grad_norm": 3.4467976093292236, "learning_rate": 7.766040625541163e-06, "loss": 0.3771, "step": 29014 }, { "epoch": 4.7364597363372924, "grad_norm": 3.6420047283172607, "learning_rate": 7.765415691837348e-06, "loss": 0.3728, "step": 29015 }, { "epoch": 4.736622994979797, "grad_norm": 3.9186031818389893, "learning_rate": 7.764790767319063e-06, "loss": 0.354, "step": 29016 }, { "epoch": 4.736786253622301, "grad_norm": 3.675016164779663, "learning_rate": 7.76416585198888e-06, "loss": 0.3529, "step": 29017 }, { "epoch": 4.736949512264806, "grad_norm": 3.529956579208374, "learning_rate": 7.763540945849368e-06, "loss": 0.3514, "step": 29018 }, { "epoch": 4.73711277090731, "grad_norm": 3.183274507522583, "learning_rate": 7.762916048903093e-06, "loss": 0.3712, "step": 29019 }, { "epoch": 4.737276029549815, "grad_norm": 3.4675097465515137, "learning_rate": 7.762291161152627e-06, "loss": 0.3943, "step": 29020 }, { "epoch": 4.737439288192318, "grad_norm": 4.28241491317749, "learning_rate": 7.761666282600536e-06, "loss": 0.4497, "step": 29021 }, { "epoch": 4.7376025468348235, "grad_norm": 3.3659284114837646, "learning_rate": 7.761041413249389e-06, "loss": 0.3886, "step": 29022 }, { "epoch": 4.737765805477327, "grad_norm": 2.731414318084717, "learning_rate": 7.760416553101755e-06, "loss": 0.2752, "step": 29023 }, { "epoch": 4.7379290641198315, "grad_norm": 4.0620503425598145, "learning_rate": 7.759791702160203e-06, "loss": 0.4331, "step": 29024 }, { "epoch": 4.738092322762336, "grad_norm": 4.716700553894043, "learning_rate": 7.759166860427303e-06, "loss": 0.4667, "step": 29025 }, { "epoch": 4.73825558140484, "grad_norm": 3.007558584213257, "learning_rate": 7.758542027905621e-06, "loss": 0.3712, "step": 29026 }, { "epoch": 4.738418840047345, "grad_norm": 3.5978362560272217, "learning_rate": 7.757917204597727e-06, "loss": 0.3623, "step": 29027 }, { "epoch": 4.738582098689849, "grad_norm": 5.102191925048828, "learning_rate": 7.757292390506191e-06, "loss": 0.8709, "step": 29028 }, { "epoch": 4.738745357332354, "grad_norm": 4.35056734085083, "learning_rate": 7.756667585633577e-06, "loss": 0.3913, "step": 29029 }, { "epoch": 4.738908615974858, "grad_norm": 4.596884250640869, "learning_rate": 7.756042789982454e-06, "loss": 0.412, "step": 29030 }, { "epoch": 4.739071874617363, "grad_norm": 4.1456298828125, "learning_rate": 7.755418003555393e-06, "loss": 0.4017, "step": 29031 }, { "epoch": 4.739235133259867, "grad_norm": 3.250209331512451, "learning_rate": 7.75479322635496e-06, "loss": 0.3239, "step": 29032 }, { "epoch": 4.739398391902371, "grad_norm": 3.489825963973999, "learning_rate": 7.754168458383726e-06, "loss": 0.3665, "step": 29033 }, { "epoch": 4.739561650544876, "grad_norm": 3.0903072357177734, "learning_rate": 7.753543699644255e-06, "loss": 0.3297, "step": 29034 }, { "epoch": 4.73972490918738, "grad_norm": 3.3362908363342285, "learning_rate": 7.75291895013912e-06, "loss": 0.3578, "step": 29035 }, { "epoch": 4.739888167829885, "grad_norm": 3.774972438812256, "learning_rate": 7.752294209870884e-06, "loss": 0.341, "step": 29036 }, { "epoch": 4.740051426472389, "grad_norm": 3.4719274044036865, "learning_rate": 7.75166947884212e-06, "loss": 0.3686, "step": 29037 }, { "epoch": 4.740214685114894, "grad_norm": 3.808138847351074, "learning_rate": 7.751044757055396e-06, "loss": 0.3889, "step": 29038 }, { "epoch": 4.740377943757398, "grad_norm": 3.580270528793335, "learning_rate": 7.750420044513275e-06, "loss": 0.3904, "step": 29039 }, { "epoch": 4.740541202399902, "grad_norm": 2.9745969772338867, "learning_rate": 7.749795341218329e-06, "loss": 0.3308, "step": 29040 }, { "epoch": 4.740704461042406, "grad_norm": 4.38159704208374, "learning_rate": 7.749170647173123e-06, "loss": 0.4265, "step": 29041 }, { "epoch": 4.7408677196849105, "grad_norm": 3.201347589492798, "learning_rate": 7.748545962380227e-06, "loss": 0.4036, "step": 29042 }, { "epoch": 4.741030978327415, "grad_norm": 3.4526400566101074, "learning_rate": 7.747921286842209e-06, "loss": 0.3524, "step": 29043 }, { "epoch": 4.741194236969919, "grad_norm": 3.7844579219818115, "learning_rate": 7.747296620561637e-06, "loss": 0.3489, "step": 29044 }, { "epoch": 4.741357495612424, "grad_norm": 4.15065860748291, "learning_rate": 7.746671963541076e-06, "loss": 0.4251, "step": 29045 }, { "epoch": 4.741520754254928, "grad_norm": 3.068589448928833, "learning_rate": 7.746047315783095e-06, "loss": 0.3329, "step": 29046 }, { "epoch": 4.741684012897433, "grad_norm": 3.974194049835205, "learning_rate": 7.74542267729027e-06, "loss": 0.3925, "step": 29047 }, { "epoch": 4.741847271539937, "grad_norm": 3.5193428993225098, "learning_rate": 7.744798048065153e-06, "loss": 0.3899, "step": 29048 }, { "epoch": 4.742010530182442, "grad_norm": 3.1232190132141113, "learning_rate": 7.744173428110322e-06, "loss": 0.3359, "step": 29049 }, { "epoch": 4.742173788824946, "grad_norm": 3.3476710319519043, "learning_rate": 7.74354881742834e-06, "loss": 0.361, "step": 29050 }, { "epoch": 4.74233704746745, "grad_norm": 3.7809836864471436, "learning_rate": 7.74292421602178e-06, "loss": 0.429, "step": 29051 }, { "epoch": 4.742500306109955, "grad_norm": 3.8470306396484375, "learning_rate": 7.742299623893204e-06, "loss": 0.4048, "step": 29052 }, { "epoch": 4.742663564752459, "grad_norm": 3.6810147762298584, "learning_rate": 7.741675041045183e-06, "loss": 0.4061, "step": 29053 }, { "epoch": 4.742826823394964, "grad_norm": 3.1915032863616943, "learning_rate": 7.741050467480283e-06, "loss": 0.3558, "step": 29054 }, { "epoch": 4.742990082037468, "grad_norm": 4.501543998718262, "learning_rate": 7.740425903201072e-06, "loss": 0.4122, "step": 29055 }, { "epoch": 4.743153340679973, "grad_norm": 3.359121561050415, "learning_rate": 7.739801348210116e-06, "loss": 0.393, "step": 29056 }, { "epoch": 4.743316599322476, "grad_norm": 3.1866884231567383, "learning_rate": 7.739176802509987e-06, "loss": 0.3639, "step": 29057 }, { "epoch": 4.7434798579649815, "grad_norm": 3.350154161453247, "learning_rate": 7.738552266103245e-06, "loss": 0.3381, "step": 29058 }, { "epoch": 4.743643116607485, "grad_norm": 3.146759033203125, "learning_rate": 7.737927738992461e-06, "loss": 0.3949, "step": 29059 }, { "epoch": 4.7438063752499895, "grad_norm": 3.8074724674224854, "learning_rate": 7.7373032211802e-06, "loss": 0.4147, "step": 29060 }, { "epoch": 4.743969633892494, "grad_norm": 2.8207452297210693, "learning_rate": 7.736678712669032e-06, "loss": 0.3577, "step": 29061 }, { "epoch": 4.744132892534998, "grad_norm": 3.045048475265503, "learning_rate": 7.736054213461525e-06, "loss": 0.3433, "step": 29062 }, { "epoch": 4.744296151177503, "grad_norm": 3.8854434490203857, "learning_rate": 7.735429723560245e-06, "loss": 0.4031, "step": 29063 }, { "epoch": 4.744459409820007, "grad_norm": 4.289220809936523, "learning_rate": 7.734805242967755e-06, "loss": 0.4091, "step": 29064 }, { "epoch": 4.744622668462512, "grad_norm": 4.424705505371094, "learning_rate": 7.734180771686627e-06, "loss": 0.4052, "step": 29065 }, { "epoch": 4.744785927105016, "grad_norm": 4.274255275726318, "learning_rate": 7.733556309719426e-06, "loss": 0.4825, "step": 29066 }, { "epoch": 4.744949185747521, "grad_norm": 3.264265537261963, "learning_rate": 7.732931857068722e-06, "loss": 0.3555, "step": 29067 }, { "epoch": 4.745112444390025, "grad_norm": 3.3656082153320312, "learning_rate": 7.732307413737076e-06, "loss": 0.3857, "step": 29068 }, { "epoch": 4.745275703032529, "grad_norm": 3.347290277481079, "learning_rate": 7.73168297972706e-06, "loss": 0.3869, "step": 29069 }, { "epoch": 4.745438961675034, "grad_norm": 3.7476890087127686, "learning_rate": 7.731058555041237e-06, "loss": 0.409, "step": 29070 }, { "epoch": 4.745602220317538, "grad_norm": 3.249009609222412, "learning_rate": 7.730434139682177e-06, "loss": 0.3816, "step": 29071 }, { "epoch": 4.745765478960043, "grad_norm": 3.349642753601074, "learning_rate": 7.729809733652444e-06, "loss": 0.3827, "step": 29072 }, { "epoch": 4.745928737602547, "grad_norm": 2.8652307987213135, "learning_rate": 7.729185336954603e-06, "loss": 0.3342, "step": 29073 }, { "epoch": 4.746091996245051, "grad_norm": 4.037125110626221, "learning_rate": 7.728560949591229e-06, "loss": 0.4006, "step": 29074 }, { "epoch": 4.746255254887556, "grad_norm": 3.582463264465332, "learning_rate": 7.72793657156488e-06, "loss": 0.3889, "step": 29075 }, { "epoch": 4.74641851353006, "grad_norm": 3.5826258659362793, "learning_rate": 7.727312202878127e-06, "loss": 0.3774, "step": 29076 }, { "epoch": 4.746581772172564, "grad_norm": 4.255799293518066, "learning_rate": 7.726687843533539e-06, "loss": 0.4736, "step": 29077 }, { "epoch": 4.7467450308150685, "grad_norm": 2.9017069339752197, "learning_rate": 7.726063493533673e-06, "loss": 0.3574, "step": 29078 }, { "epoch": 4.746908289457573, "grad_norm": 3.2076756954193115, "learning_rate": 7.725439152881104e-06, "loss": 0.3845, "step": 29079 }, { "epoch": 4.747071548100077, "grad_norm": 3.447718381881714, "learning_rate": 7.724814821578395e-06, "loss": 0.367, "step": 29080 }, { "epoch": 4.747234806742582, "grad_norm": 3.6470632553100586, "learning_rate": 7.724190499628114e-06, "loss": 0.4014, "step": 29081 }, { "epoch": 4.747398065385086, "grad_norm": 3.660473108291626, "learning_rate": 7.723566187032825e-06, "loss": 0.3639, "step": 29082 }, { "epoch": 4.747561324027591, "grad_norm": 3.3257107734680176, "learning_rate": 7.7229418837951e-06, "loss": 0.338, "step": 29083 }, { "epoch": 4.747724582670095, "grad_norm": 2.9463534355163574, "learning_rate": 7.722317589917494e-06, "loss": 0.3376, "step": 29084 }, { "epoch": 4.7478878413125996, "grad_norm": 3.469247341156006, "learning_rate": 7.721693305402584e-06, "loss": 0.3767, "step": 29085 }, { "epoch": 4.748051099955104, "grad_norm": 3.6219799518585205, "learning_rate": 7.721069030252939e-06, "loss": 0.3639, "step": 29086 }, { "epoch": 4.748214358597608, "grad_norm": 3.2206380367279053, "learning_rate": 7.720444764471111e-06, "loss": 0.3832, "step": 29087 }, { "epoch": 4.748377617240113, "grad_norm": 3.9927051067352295, "learning_rate": 7.719820508059674e-06, "loss": 0.4603, "step": 29088 }, { "epoch": 4.748540875882617, "grad_norm": 3.828339099884033, "learning_rate": 7.719196261021195e-06, "loss": 0.4007, "step": 29089 }, { "epoch": 4.748704134525122, "grad_norm": 3.7367911338806152, "learning_rate": 7.718572023358239e-06, "loss": 0.3874, "step": 29090 }, { "epoch": 4.748867393167626, "grad_norm": 4.355736255645752, "learning_rate": 7.717947795073373e-06, "loss": 0.4421, "step": 29091 }, { "epoch": 4.749030651810131, "grad_norm": 3.147773265838623, "learning_rate": 7.71732357616916e-06, "loss": 0.3818, "step": 29092 }, { "epoch": 4.749193910452634, "grad_norm": 2.972386598587036, "learning_rate": 7.71669936664817e-06, "loss": 0.3177, "step": 29093 }, { "epoch": 4.749357169095139, "grad_norm": 3.6121788024902344, "learning_rate": 7.716075166512966e-06, "loss": 0.3404, "step": 29094 }, { "epoch": 4.749520427737643, "grad_norm": 3.7345893383026123, "learning_rate": 7.715450975766115e-06, "loss": 0.4109, "step": 29095 }, { "epoch": 4.7496836863801475, "grad_norm": 4.197562217712402, "learning_rate": 7.714826794410184e-06, "loss": 0.4348, "step": 29096 }, { "epoch": 4.749846945022652, "grad_norm": 3.080687999725342, "learning_rate": 7.714202622447735e-06, "loss": 0.3423, "step": 29097 }, { "epoch": 4.750010203665156, "grad_norm": 3.58349609375, "learning_rate": 7.713578459881336e-06, "loss": 0.3762, "step": 29098 }, { "epoch": 4.750173462307661, "grad_norm": 3.1684517860412598, "learning_rate": 7.712954306713552e-06, "loss": 0.3498, "step": 29099 }, { "epoch": 4.750336720950165, "grad_norm": 2.9227423667907715, "learning_rate": 7.712330162946948e-06, "loss": 0.3065, "step": 29100 }, { "epoch": 4.75049997959267, "grad_norm": 2.8842408657073975, "learning_rate": 7.711706028584092e-06, "loss": 0.3463, "step": 29101 }, { "epoch": 4.750663238235174, "grad_norm": 3.8663277626037598, "learning_rate": 7.71108190362755e-06, "loss": 0.407, "step": 29102 }, { "epoch": 4.7508264968776786, "grad_norm": 3.5167038440704346, "learning_rate": 7.710457788079885e-06, "loss": 0.3971, "step": 29103 }, { "epoch": 4.750989755520183, "grad_norm": 2.844728469848633, "learning_rate": 7.709833681943663e-06, "loss": 0.3503, "step": 29104 }, { "epoch": 4.751153014162687, "grad_norm": 5.009818077087402, "learning_rate": 7.709209585221452e-06, "loss": 0.5115, "step": 29105 }, { "epoch": 4.751316272805192, "grad_norm": 3.135715961456299, "learning_rate": 7.708585497915814e-06, "loss": 0.394, "step": 29106 }, { "epoch": 4.751479531447696, "grad_norm": 4.012181282043457, "learning_rate": 7.707961420029317e-06, "loss": 0.3559, "step": 29107 }, { "epoch": 4.751642790090201, "grad_norm": 3.7135009765625, "learning_rate": 7.707337351564524e-06, "loss": 0.4016, "step": 29108 }, { "epoch": 4.751806048732705, "grad_norm": 3.76249098777771, "learning_rate": 7.706713292524001e-06, "loss": 0.4036, "step": 29109 }, { "epoch": 4.751969307375209, "grad_norm": 4.248536109924316, "learning_rate": 7.706089242910315e-06, "loss": 0.4871, "step": 29110 }, { "epoch": 4.752132566017714, "grad_norm": 3.1066975593566895, "learning_rate": 7.705465202726027e-06, "loss": 0.3071, "step": 29111 }, { "epoch": 4.752295824660218, "grad_norm": 4.329174041748047, "learning_rate": 7.704841171973707e-06, "loss": 0.3859, "step": 29112 }, { "epoch": 4.752459083302722, "grad_norm": 4.534468173980713, "learning_rate": 7.704217150655919e-06, "loss": 0.4958, "step": 29113 }, { "epoch": 4.7526223419452265, "grad_norm": 3.5692191123962402, "learning_rate": 7.703593138775228e-06, "loss": 0.3943, "step": 29114 }, { "epoch": 4.752785600587731, "grad_norm": 3.191532850265503, "learning_rate": 7.702969136334196e-06, "loss": 0.349, "step": 29115 }, { "epoch": 4.752948859230235, "grad_norm": 3.3695266246795654, "learning_rate": 7.702345143335395e-06, "loss": 0.4057, "step": 29116 }, { "epoch": 4.75311211787274, "grad_norm": 3.9194893836975098, "learning_rate": 7.701721159781382e-06, "loss": 0.4293, "step": 29117 }, { "epoch": 4.753275376515244, "grad_norm": 3.979177713394165, "learning_rate": 7.701097185674726e-06, "loss": 0.4013, "step": 29118 }, { "epoch": 4.753438635157749, "grad_norm": 3.667523145675659, "learning_rate": 7.700473221017992e-06, "loss": 0.4461, "step": 29119 }, { "epoch": 4.753601893800253, "grad_norm": 3.8473920822143555, "learning_rate": 7.699849265813744e-06, "loss": 0.3563, "step": 29120 }, { "epoch": 4.7537651524427575, "grad_norm": 3.178406000137329, "learning_rate": 7.699225320064548e-06, "loss": 0.3376, "step": 29121 }, { "epoch": 4.753928411085262, "grad_norm": 3.4897007942199707, "learning_rate": 7.698601383772965e-06, "loss": 0.4046, "step": 29122 }, { "epoch": 4.754091669727766, "grad_norm": 3.7272002696990967, "learning_rate": 7.697977456941563e-06, "loss": 0.4559, "step": 29123 }, { "epoch": 4.754254928370271, "grad_norm": 3.733181953430176, "learning_rate": 7.697353539572908e-06, "loss": 0.4045, "step": 29124 }, { "epoch": 4.754418187012775, "grad_norm": 3.4210410118103027, "learning_rate": 7.696729631669563e-06, "loss": 0.3871, "step": 29125 }, { "epoch": 4.75458144565528, "grad_norm": 3.491377115249634, "learning_rate": 7.696105733234099e-06, "loss": 0.4046, "step": 29126 }, { "epoch": 4.754744704297783, "grad_norm": 4.075939655303955, "learning_rate": 7.695481844269064e-06, "loss": 0.4025, "step": 29127 }, { "epoch": 4.754907962940289, "grad_norm": 3.6963160037994385, "learning_rate": 7.694857964777038e-06, "loss": 0.3715, "step": 29128 }, { "epoch": 4.755071221582792, "grad_norm": 3.3009676933288574, "learning_rate": 7.694234094760579e-06, "loss": 0.3386, "step": 29129 }, { "epoch": 4.755234480225297, "grad_norm": 3.18346905708313, "learning_rate": 7.693610234222255e-06, "loss": 0.3471, "step": 29130 }, { "epoch": 4.755397738867801, "grad_norm": 3.908520221710205, "learning_rate": 7.692986383164628e-06, "loss": 0.3557, "step": 29131 }, { "epoch": 4.7555609975103055, "grad_norm": 3.679577112197876, "learning_rate": 7.692362541590261e-06, "loss": 0.3607, "step": 29132 }, { "epoch": 4.75572425615281, "grad_norm": 3.9370949268341064, "learning_rate": 7.691738709501722e-06, "loss": 0.3868, "step": 29133 }, { "epoch": 4.755887514795314, "grad_norm": 3.437570571899414, "learning_rate": 7.69111488690157e-06, "loss": 0.3401, "step": 29134 }, { "epoch": 4.756050773437819, "grad_norm": 3.9052608013153076, "learning_rate": 7.690491073792381e-06, "loss": 0.3922, "step": 29135 }, { "epoch": 4.756214032080323, "grad_norm": 3.961486577987671, "learning_rate": 7.689867270176707e-06, "loss": 0.4627, "step": 29136 }, { "epoch": 4.756377290722828, "grad_norm": 4.052526950836182, "learning_rate": 7.689243476057114e-06, "loss": 0.408, "step": 29137 }, { "epoch": 4.756540549365332, "grad_norm": 3.3187289237976074, "learning_rate": 7.688619691436168e-06, "loss": 0.3349, "step": 29138 }, { "epoch": 4.7567038080078365, "grad_norm": 3.0973663330078125, "learning_rate": 7.687995916316435e-06, "loss": 0.3581, "step": 29139 }, { "epoch": 4.756867066650341, "grad_norm": 3.427907943725586, "learning_rate": 7.687372150700479e-06, "loss": 0.3712, "step": 29140 }, { "epoch": 4.757030325292845, "grad_norm": 3.895057201385498, "learning_rate": 7.686748394590863e-06, "loss": 0.3979, "step": 29141 }, { "epoch": 4.75719358393535, "grad_norm": 3.1787328720092773, "learning_rate": 7.68612464799015e-06, "loss": 0.3559, "step": 29142 }, { "epoch": 4.757356842577854, "grad_norm": 3.435448408126831, "learning_rate": 7.685500910900905e-06, "loss": 0.346, "step": 29143 }, { "epoch": 4.757520101220359, "grad_norm": 5.033909320831299, "learning_rate": 7.684877183325692e-06, "loss": 0.4463, "step": 29144 }, { "epoch": 4.757683359862863, "grad_norm": 3.7103259563446045, "learning_rate": 7.684253465267077e-06, "loss": 0.3717, "step": 29145 }, { "epoch": 4.757846618505367, "grad_norm": 3.4202184677124023, "learning_rate": 7.683629756727619e-06, "loss": 0.3954, "step": 29146 }, { "epoch": 4.758009877147871, "grad_norm": 3.020087480545044, "learning_rate": 7.683006057709885e-06, "loss": 0.361, "step": 29147 }, { "epoch": 4.758173135790376, "grad_norm": 3.9733238220214844, "learning_rate": 7.682382368216437e-06, "loss": 0.3969, "step": 29148 }, { "epoch": 4.75833639443288, "grad_norm": 3.412944793701172, "learning_rate": 7.681758688249838e-06, "loss": 0.3528, "step": 29149 }, { "epoch": 4.7584996530753845, "grad_norm": 3.336932897567749, "learning_rate": 7.681135017812656e-06, "loss": 0.364, "step": 29150 }, { "epoch": 4.758662911717889, "grad_norm": 3.5950682163238525, "learning_rate": 7.680511356907455e-06, "loss": 0.3793, "step": 29151 }, { "epoch": 4.758826170360393, "grad_norm": 3.606402635574341, "learning_rate": 7.679887705536793e-06, "loss": 0.4164, "step": 29152 }, { "epoch": 4.758989429002898, "grad_norm": 3.0700652599334717, "learning_rate": 7.679264063703236e-06, "loss": 0.3403, "step": 29153 }, { "epoch": 4.759152687645402, "grad_norm": 2.6655657291412354, "learning_rate": 7.67864043140935e-06, "loss": 0.3211, "step": 29154 }, { "epoch": 4.759315946287907, "grad_norm": 2.953662872314453, "learning_rate": 7.678016808657698e-06, "loss": 0.331, "step": 29155 }, { "epoch": 4.759479204930411, "grad_norm": 3.6467325687408447, "learning_rate": 7.677393195450841e-06, "loss": 0.3714, "step": 29156 }, { "epoch": 4.7596424635729155, "grad_norm": 2.4538543224334717, "learning_rate": 7.676769591791342e-06, "loss": 0.3026, "step": 29157 }, { "epoch": 4.75980572221542, "grad_norm": 3.738748788833618, "learning_rate": 7.676145997681767e-06, "loss": 0.4331, "step": 29158 }, { "epoch": 4.759968980857924, "grad_norm": 3.530749797821045, "learning_rate": 7.675522413124679e-06, "loss": 0.3562, "step": 29159 }, { "epoch": 4.760132239500429, "grad_norm": 2.877225160598755, "learning_rate": 7.67489883812264e-06, "loss": 0.3576, "step": 29160 }, { "epoch": 4.760295498142933, "grad_norm": 2.730835199356079, "learning_rate": 7.67427527267821e-06, "loss": 0.3271, "step": 29161 }, { "epoch": 4.760458756785438, "grad_norm": 3.373342514038086, "learning_rate": 7.673651716793961e-06, "loss": 0.3817, "step": 29162 }, { "epoch": 4.760622015427941, "grad_norm": 3.643120765686035, "learning_rate": 7.673028170472449e-06, "loss": 0.3988, "step": 29163 }, { "epoch": 4.760785274070446, "grad_norm": 3.7208309173583984, "learning_rate": 7.672404633716242e-06, "loss": 0.3646, "step": 29164 }, { "epoch": 4.76094853271295, "grad_norm": 3.855672836303711, "learning_rate": 7.671781106527901e-06, "loss": 0.4248, "step": 29165 }, { "epoch": 4.761111791355455, "grad_norm": 3.5059738159179688, "learning_rate": 7.671157588909988e-06, "loss": 0.3341, "step": 29166 }, { "epoch": 4.761275049997959, "grad_norm": 3.966055393218994, "learning_rate": 7.670534080865067e-06, "loss": 0.3987, "step": 29167 }, { "epoch": 4.7614383086404635, "grad_norm": 2.572998285293579, "learning_rate": 7.669910582395699e-06, "loss": 0.3479, "step": 29168 }, { "epoch": 4.761601567282968, "grad_norm": 4.182407855987549, "learning_rate": 7.669287093504449e-06, "loss": 0.465, "step": 29169 }, { "epoch": 4.761764825925472, "grad_norm": 3.765125274658203, "learning_rate": 7.66866361419388e-06, "loss": 0.3677, "step": 29170 }, { "epoch": 4.761928084567977, "grad_norm": 3.480515956878662, "learning_rate": 7.668040144466556e-06, "loss": 0.3774, "step": 29171 }, { "epoch": 4.762091343210481, "grad_norm": 5.107637882232666, "learning_rate": 7.667416684325034e-06, "loss": 0.3919, "step": 29172 }, { "epoch": 4.762254601852986, "grad_norm": 3.779529571533203, "learning_rate": 7.666793233771885e-06, "loss": 0.3975, "step": 29173 }, { "epoch": 4.76241786049549, "grad_norm": 2.909954786300659, "learning_rate": 7.666169792809669e-06, "loss": 0.3323, "step": 29174 }, { "epoch": 4.7625811191379945, "grad_norm": 3.570796251296997, "learning_rate": 7.66554636144095e-06, "loss": 0.3716, "step": 29175 }, { "epoch": 4.762744377780499, "grad_norm": 3.4968972206115723, "learning_rate": 7.664922939668282e-06, "loss": 0.4119, "step": 29176 }, { "epoch": 4.762907636423003, "grad_norm": 3.641571521759033, "learning_rate": 7.664299527494236e-06, "loss": 0.3679, "step": 29177 }, { "epoch": 4.763070895065508, "grad_norm": 2.997910499572754, "learning_rate": 7.663676124921375e-06, "loss": 0.3452, "step": 29178 }, { "epoch": 4.763234153708012, "grad_norm": 3.6386842727661133, "learning_rate": 7.663052731952257e-06, "loss": 0.3805, "step": 29179 }, { "epoch": 4.763397412350516, "grad_norm": 3.9164609909057617, "learning_rate": 7.662429348589448e-06, "loss": 0.3595, "step": 29180 }, { "epoch": 4.763560670993021, "grad_norm": 4.105436325073242, "learning_rate": 7.661805974835508e-06, "loss": 0.3884, "step": 29181 }, { "epoch": 4.763723929635525, "grad_norm": 3.5296790599823, "learning_rate": 7.661182610693002e-06, "loss": 0.3526, "step": 29182 }, { "epoch": 4.763887188278029, "grad_norm": 2.6801981925964355, "learning_rate": 7.660559256164492e-06, "loss": 0.3132, "step": 29183 }, { "epoch": 4.764050446920534, "grad_norm": 4.088509559631348, "learning_rate": 7.659935911252539e-06, "loss": 0.4344, "step": 29184 }, { "epoch": 4.764213705563038, "grad_norm": 3.893486738204956, "learning_rate": 7.659312575959706e-06, "loss": 0.368, "step": 29185 }, { "epoch": 4.7643769642055425, "grad_norm": 4.692303657531738, "learning_rate": 7.658689250288554e-06, "loss": 0.429, "step": 29186 }, { "epoch": 4.764540222848047, "grad_norm": 3.282355785369873, "learning_rate": 7.658065934241648e-06, "loss": 0.3359, "step": 29187 }, { "epoch": 4.764703481490551, "grad_norm": 3.2162892818450928, "learning_rate": 7.657442627821544e-06, "loss": 0.3538, "step": 29188 }, { "epoch": 4.764866740133056, "grad_norm": 4.638327598571777, "learning_rate": 7.656819331030813e-06, "loss": 0.4815, "step": 29189 }, { "epoch": 4.76502999877556, "grad_norm": 3.573232889175415, "learning_rate": 7.656196043872013e-06, "loss": 0.3764, "step": 29190 }, { "epoch": 4.765193257418065, "grad_norm": 3.4866514205932617, "learning_rate": 7.655572766347704e-06, "loss": 0.3274, "step": 29191 }, { "epoch": 4.765356516060569, "grad_norm": 3.325845718383789, "learning_rate": 7.65494949846045e-06, "loss": 0.3831, "step": 29192 }, { "epoch": 4.7655197747030735, "grad_norm": 3.316723346710205, "learning_rate": 7.654326240212816e-06, "loss": 0.3603, "step": 29193 }, { "epoch": 4.765683033345578, "grad_norm": 3.7719008922576904, "learning_rate": 7.653702991607363e-06, "loss": 0.3754, "step": 29194 }, { "epoch": 4.765846291988082, "grad_norm": 3.857639789581299, "learning_rate": 7.653079752646647e-06, "loss": 0.4109, "step": 29195 }, { "epoch": 4.766009550630587, "grad_norm": 3.9020328521728516, "learning_rate": 7.652456523333235e-06, "loss": 0.4196, "step": 29196 }, { "epoch": 4.766172809273091, "grad_norm": 3.321976661682129, "learning_rate": 7.651833303669687e-06, "loss": 0.3974, "step": 29197 }, { "epoch": 4.766336067915596, "grad_norm": 3.2375781536102295, "learning_rate": 7.651210093658567e-06, "loss": 0.3552, "step": 29198 }, { "epoch": 4.766499326558099, "grad_norm": 2.7566781044006348, "learning_rate": 7.650586893302431e-06, "loss": 0.3031, "step": 29199 }, { "epoch": 4.766662585200604, "grad_norm": 3.693368673324585, "learning_rate": 7.649963702603848e-06, "loss": 0.3773, "step": 29200 }, { "epoch": 4.766825843843108, "grad_norm": 3.306166648864746, "learning_rate": 7.64934052156538e-06, "loss": 0.3909, "step": 29201 }, { "epoch": 4.766989102485613, "grad_norm": 3.547818422317505, "learning_rate": 7.648717350189582e-06, "loss": 0.3466, "step": 29202 }, { "epoch": 4.767152361128117, "grad_norm": 3.5979387760162354, "learning_rate": 7.648094188479021e-06, "loss": 0.3852, "step": 29203 }, { "epoch": 4.7673156197706215, "grad_norm": 3.103368043899536, "learning_rate": 7.647471036436257e-06, "loss": 0.3318, "step": 29204 }, { "epoch": 4.767478878413126, "grad_norm": 3.970869302749634, "learning_rate": 7.646847894063852e-06, "loss": 0.3671, "step": 29205 }, { "epoch": 4.76764213705563, "grad_norm": 4.5822577476501465, "learning_rate": 7.646224761364364e-06, "loss": 0.4311, "step": 29206 }, { "epoch": 4.767805395698135, "grad_norm": 3.972167491912842, "learning_rate": 7.645601638340358e-06, "loss": 0.4624, "step": 29207 }, { "epoch": 4.767968654340639, "grad_norm": 3.848445177078247, "learning_rate": 7.644978524994396e-06, "loss": 0.4076, "step": 29208 }, { "epoch": 4.768131912983144, "grad_norm": 2.855401039123535, "learning_rate": 7.644355421329036e-06, "loss": 0.3471, "step": 29209 }, { "epoch": 4.768295171625648, "grad_norm": 3.811894178390503, "learning_rate": 7.643732327346842e-06, "loss": 0.3833, "step": 29210 }, { "epoch": 4.7684584302681525, "grad_norm": 3.461825132369995, "learning_rate": 7.643109243050373e-06, "loss": 0.3431, "step": 29211 }, { "epoch": 4.768621688910657, "grad_norm": 4.436182498931885, "learning_rate": 7.642486168442193e-06, "loss": 0.3903, "step": 29212 }, { "epoch": 4.768784947553161, "grad_norm": 3.9628970623016357, "learning_rate": 7.641863103524864e-06, "loss": 0.4319, "step": 29213 }, { "epoch": 4.768948206195666, "grad_norm": 3.5317654609680176, "learning_rate": 7.64124004830095e-06, "loss": 0.3425, "step": 29214 }, { "epoch": 4.76911146483817, "grad_norm": 3.8492422103881836, "learning_rate": 7.640617002772999e-06, "loss": 0.4266, "step": 29215 }, { "epoch": 4.769274723480674, "grad_norm": 4.215450286865234, "learning_rate": 7.639993966943584e-06, "loss": 0.4842, "step": 29216 }, { "epoch": 4.769437982123178, "grad_norm": 2.8673059940338135, "learning_rate": 7.639370940815261e-06, "loss": 0.3227, "step": 29217 }, { "epoch": 4.769601240765683, "grad_norm": 3.9667282104492188, "learning_rate": 7.638747924390595e-06, "loss": 0.3956, "step": 29218 }, { "epoch": 4.769764499408187, "grad_norm": 3.4804534912109375, "learning_rate": 7.638124917672144e-06, "loss": 0.3687, "step": 29219 }, { "epoch": 4.769927758050692, "grad_norm": 3.4043643474578857, "learning_rate": 7.637501920662469e-06, "loss": 0.3657, "step": 29220 }, { "epoch": 4.770091016693196, "grad_norm": 3.5156567096710205, "learning_rate": 7.636878933364132e-06, "loss": 0.3441, "step": 29221 }, { "epoch": 4.7702542753357005, "grad_norm": 3.722219228744507, "learning_rate": 7.636255955779691e-06, "loss": 0.4031, "step": 29222 }, { "epoch": 4.770417533978205, "grad_norm": 4.095069408416748, "learning_rate": 7.635632987911717e-06, "loss": 0.4193, "step": 29223 }, { "epoch": 4.770580792620709, "grad_norm": 4.605716705322266, "learning_rate": 7.635010029762755e-06, "loss": 0.419, "step": 29224 }, { "epoch": 4.770744051263214, "grad_norm": 3.770406484603882, "learning_rate": 7.634387081335377e-06, "loss": 0.3127, "step": 29225 }, { "epoch": 4.770907309905718, "grad_norm": 4.274173259735107, "learning_rate": 7.633764142632138e-06, "loss": 0.4006, "step": 29226 }, { "epoch": 4.771070568548223, "grad_norm": 3.768146514892578, "learning_rate": 7.633141213655604e-06, "loss": 0.3638, "step": 29227 }, { "epoch": 4.771233827190727, "grad_norm": 3.7236251831054688, "learning_rate": 7.632518294408332e-06, "loss": 0.4066, "step": 29228 }, { "epoch": 4.7713970858332315, "grad_norm": 3.3651580810546875, "learning_rate": 7.631895384892883e-06, "loss": 0.3927, "step": 29229 }, { "epoch": 4.771560344475736, "grad_norm": 3.7843682765960693, "learning_rate": 7.631272485111816e-06, "loss": 0.406, "step": 29230 }, { "epoch": 4.77172360311824, "grad_norm": 2.5562644004821777, "learning_rate": 7.630649595067696e-06, "loss": 0.3032, "step": 29231 }, { "epoch": 4.771886861760745, "grad_norm": 3.43514347076416, "learning_rate": 7.63002671476308e-06, "loss": 0.3642, "step": 29232 }, { "epoch": 4.772050120403248, "grad_norm": 3.592576026916504, "learning_rate": 7.629403844200532e-06, "loss": 0.3841, "step": 29233 }, { "epoch": 4.772213379045754, "grad_norm": 2.8875961303710938, "learning_rate": 7.628780983382606e-06, "loss": 0.2761, "step": 29234 }, { "epoch": 4.772376637688257, "grad_norm": 2.9797120094299316, "learning_rate": 7.6281581323118685e-06, "loss": 0.3368, "step": 29235 }, { "epoch": 4.772539896330762, "grad_norm": 3.556227445602417, "learning_rate": 7.627535290990875e-06, "loss": 0.3366, "step": 29236 }, { "epoch": 4.772703154973266, "grad_norm": 3.8988311290740967, "learning_rate": 7.62691245942219e-06, "loss": 0.3571, "step": 29237 }, { "epoch": 4.772866413615771, "grad_norm": 4.016761302947998, "learning_rate": 7.6262896376083684e-06, "loss": 0.4136, "step": 29238 }, { "epoch": 4.773029672258275, "grad_norm": 3.649822473526001, "learning_rate": 7.625666825551976e-06, "loss": 0.3956, "step": 29239 }, { "epoch": 4.7731929309007795, "grad_norm": 4.0283522605896, "learning_rate": 7.625044023255571e-06, "loss": 0.3811, "step": 29240 }, { "epoch": 4.773356189543284, "grad_norm": 3.6523900032043457, "learning_rate": 7.624421230721714e-06, "loss": 0.3827, "step": 29241 }, { "epoch": 4.773519448185788, "grad_norm": 3.700364589691162, "learning_rate": 7.6237984479529635e-06, "loss": 0.3773, "step": 29242 }, { "epoch": 4.773682706828293, "grad_norm": 3.390209913253784, "learning_rate": 7.623175674951883e-06, "loss": 0.3411, "step": 29243 }, { "epoch": 4.773845965470797, "grad_norm": 3.89395809173584, "learning_rate": 7.622552911721026e-06, "loss": 0.3632, "step": 29244 }, { "epoch": 4.774009224113302, "grad_norm": 3.3987762928009033, "learning_rate": 7.621930158262958e-06, "loss": 0.3977, "step": 29245 }, { "epoch": 4.774172482755806, "grad_norm": 2.650254011154175, "learning_rate": 7.621307414580236e-06, "loss": 0.2729, "step": 29246 }, { "epoch": 4.7743357413983105, "grad_norm": 3.4763190746307373, "learning_rate": 7.620684680675423e-06, "loss": 0.3799, "step": 29247 }, { "epoch": 4.774499000040815, "grad_norm": 3.652940273284912, "learning_rate": 7.620061956551075e-06, "loss": 0.3667, "step": 29248 }, { "epoch": 4.774662258683319, "grad_norm": 4.1241583824157715, "learning_rate": 7.619439242209752e-06, "loss": 0.3945, "step": 29249 }, { "epoch": 4.774825517325823, "grad_norm": 5.016653060913086, "learning_rate": 7.618816537654018e-06, "loss": 0.424, "step": 29250 }, { "epoch": 4.774988775968328, "grad_norm": 3.1296322345733643, "learning_rate": 7.61819384288643e-06, "loss": 0.3224, "step": 29251 }, { "epoch": 4.775152034610832, "grad_norm": 3.7603540420532227, "learning_rate": 7.6175711579095465e-06, "loss": 0.3777, "step": 29252 }, { "epoch": 4.775315293253336, "grad_norm": 3.067492723464966, "learning_rate": 7.616948482725932e-06, "loss": 0.3309, "step": 29253 }, { "epoch": 4.775478551895841, "grad_norm": 5.165536880493164, "learning_rate": 7.616325817338141e-06, "loss": 0.5624, "step": 29254 }, { "epoch": 4.775641810538345, "grad_norm": 3.1876327991485596, "learning_rate": 7.615703161748733e-06, "loss": 0.3254, "step": 29255 }, { "epoch": 4.77580506918085, "grad_norm": 3.4936790466308594, "learning_rate": 7.615080515960269e-06, "loss": 0.3525, "step": 29256 }, { "epoch": 4.775968327823354, "grad_norm": 4.050800323486328, "learning_rate": 7.614457879975309e-06, "loss": 0.4044, "step": 29257 }, { "epoch": 4.7761315864658584, "grad_norm": 2.7399041652679443, "learning_rate": 7.6138352537964114e-06, "loss": 0.3332, "step": 29258 }, { "epoch": 4.776294845108363, "grad_norm": 3.7377588748931885, "learning_rate": 7.613212637426137e-06, "loss": 0.4191, "step": 29259 }, { "epoch": 4.776458103750867, "grad_norm": 3.9989993572235107, "learning_rate": 7.612590030867041e-06, "loss": 0.4432, "step": 29260 }, { "epoch": 4.776621362393372, "grad_norm": 2.9072957038879395, "learning_rate": 7.6119674341216885e-06, "loss": 0.3014, "step": 29261 }, { "epoch": 4.776784621035876, "grad_norm": 3.8950400352478027, "learning_rate": 7.6113448471926365e-06, "loss": 0.3841, "step": 29262 }, { "epoch": 4.776947879678381, "grad_norm": 3.4765872955322266, "learning_rate": 7.610722270082448e-06, "loss": 0.4237, "step": 29263 }, { "epoch": 4.777111138320885, "grad_norm": 3.837566375732422, "learning_rate": 7.610099702793673e-06, "loss": 0.467, "step": 29264 }, { "epoch": 4.7772743969633895, "grad_norm": 3.4933724403381348, "learning_rate": 7.609477145328874e-06, "loss": 0.4, "step": 29265 }, { "epoch": 4.777437655605894, "grad_norm": 4.2553391456604, "learning_rate": 7.608854597690614e-06, "loss": 0.4335, "step": 29266 }, { "epoch": 4.777600914248398, "grad_norm": 3.5361685752868652, "learning_rate": 7.608232059881451e-06, "loss": 0.3634, "step": 29267 }, { "epoch": 4.777764172890903, "grad_norm": 3.168250560760498, "learning_rate": 7.6076095319039415e-06, "loss": 0.3734, "step": 29268 }, { "epoch": 4.777927431533406, "grad_norm": 3.7872567176818848, "learning_rate": 7.606987013760646e-06, "loss": 0.4063, "step": 29269 }, { "epoch": 4.778090690175911, "grad_norm": 3.5854499340057373, "learning_rate": 7.606364505454125e-06, "loss": 0.3588, "step": 29270 }, { "epoch": 4.778253948818415, "grad_norm": 5.4156718254089355, "learning_rate": 7.605742006986934e-06, "loss": 0.4472, "step": 29271 }, { "epoch": 4.77841720746092, "grad_norm": 3.575477361679077, "learning_rate": 7.605119518361636e-06, "loss": 0.4257, "step": 29272 }, { "epoch": 4.778580466103424, "grad_norm": 4.099480628967285, "learning_rate": 7.604497039580785e-06, "loss": 0.4259, "step": 29273 }, { "epoch": 4.778743724745929, "grad_norm": 3.0739006996154785, "learning_rate": 7.603874570646942e-06, "loss": 0.3437, "step": 29274 }, { "epoch": 4.778906983388433, "grad_norm": 4.14658260345459, "learning_rate": 7.603252111562666e-06, "loss": 0.3876, "step": 29275 }, { "epoch": 4.779070242030937, "grad_norm": 4.171530246734619, "learning_rate": 7.602629662330514e-06, "loss": 0.3949, "step": 29276 }, { "epoch": 4.779233500673442, "grad_norm": 3.491612672805786, "learning_rate": 7.602007222953047e-06, "loss": 0.3226, "step": 29277 }, { "epoch": 4.779396759315946, "grad_norm": 3.9502573013305664, "learning_rate": 7.6013847934328245e-06, "loss": 0.3872, "step": 29278 }, { "epoch": 4.779560017958451, "grad_norm": 3.180224895477295, "learning_rate": 7.600762373772403e-06, "loss": 0.3597, "step": 29279 }, { "epoch": 4.779723276600955, "grad_norm": 3.8430845737457275, "learning_rate": 7.600139963974341e-06, "loss": 0.3834, "step": 29280 }, { "epoch": 4.77988653524346, "grad_norm": 3.9654808044433594, "learning_rate": 7.599517564041198e-06, "loss": 0.3564, "step": 29281 }, { "epoch": 4.780049793885964, "grad_norm": 3.840752124786377, "learning_rate": 7.598895173975533e-06, "loss": 0.3976, "step": 29282 }, { "epoch": 4.7802130525284685, "grad_norm": 3.3794198036193848, "learning_rate": 7.598272793779903e-06, "loss": 0.3403, "step": 29283 }, { "epoch": 4.780376311170973, "grad_norm": 4.433854579925537, "learning_rate": 7.5976504234568645e-06, "loss": 0.4821, "step": 29284 }, { "epoch": 4.780539569813477, "grad_norm": 3.255068778991699, "learning_rate": 7.5970280630089795e-06, "loss": 0.3266, "step": 29285 }, { "epoch": 4.780702828455981, "grad_norm": 3.839771270751953, "learning_rate": 7.596405712438805e-06, "loss": 0.4, "step": 29286 }, { "epoch": 4.780866087098486, "grad_norm": 3.53190279006958, "learning_rate": 7.595783371748895e-06, "loss": 0.3803, "step": 29287 }, { "epoch": 4.78102934574099, "grad_norm": 3.787501096725464, "learning_rate": 7.595161040941815e-06, "loss": 0.3983, "step": 29288 }, { "epoch": 4.781192604383494, "grad_norm": 4.248331546783447, "learning_rate": 7.59453872002012e-06, "loss": 0.4419, "step": 29289 }, { "epoch": 4.781355863025999, "grad_norm": 3.403012275695801, "learning_rate": 7.593916408986368e-06, "loss": 0.4014, "step": 29290 }, { "epoch": 4.781519121668503, "grad_norm": 3.589806318283081, "learning_rate": 7.593294107843118e-06, "loss": 0.3458, "step": 29291 }, { "epoch": 4.781682380311008, "grad_norm": 3.913170099258423, "learning_rate": 7.592671816592928e-06, "loss": 0.3747, "step": 29292 }, { "epoch": 4.781845638953512, "grad_norm": 4.184685230255127, "learning_rate": 7.5920495352383535e-06, "loss": 0.4179, "step": 29293 }, { "epoch": 4.782008897596016, "grad_norm": 3.936439037322998, "learning_rate": 7.591427263781954e-06, "loss": 0.4024, "step": 29294 }, { "epoch": 4.782172156238521, "grad_norm": 3.8259332180023193, "learning_rate": 7.590805002226288e-06, "loss": 0.4753, "step": 29295 }, { "epoch": 4.782335414881025, "grad_norm": 4.029106140136719, "learning_rate": 7.590182750573912e-06, "loss": 0.4344, "step": 29296 }, { "epoch": 4.78249867352353, "grad_norm": 3.4060847759246826, "learning_rate": 7.589560508827386e-06, "loss": 0.3739, "step": 29297 }, { "epoch": 4.782661932166034, "grad_norm": 3.3677995204925537, "learning_rate": 7.588938276989266e-06, "loss": 0.3249, "step": 29298 }, { "epoch": 4.782825190808539, "grad_norm": 3.250403881072998, "learning_rate": 7.588316055062108e-06, "loss": 0.3661, "step": 29299 }, { "epoch": 4.782988449451043, "grad_norm": 3.4710702896118164, "learning_rate": 7.587693843048475e-06, "loss": 0.3545, "step": 29300 }, { "epoch": 4.7831517080935475, "grad_norm": 3.03519868850708, "learning_rate": 7.58707164095092e-06, "loss": 0.3325, "step": 29301 }, { "epoch": 4.783314966736052, "grad_norm": 3.609647750854492, "learning_rate": 7.586449448772008e-06, "loss": 0.3985, "step": 29302 }, { "epoch": 4.7834782253785555, "grad_norm": 3.29287052154541, "learning_rate": 7.585827266514285e-06, "loss": 0.3569, "step": 29303 }, { "epoch": 4.783641484021061, "grad_norm": 3.7333953380584717, "learning_rate": 7.585205094180315e-06, "loss": 0.3873, "step": 29304 }, { "epoch": 4.783804742663564, "grad_norm": 3.3906595706939697, "learning_rate": 7.584582931772657e-06, "loss": 0.3622, "step": 29305 }, { "epoch": 4.783968001306069, "grad_norm": 3.001849412918091, "learning_rate": 7.583960779293865e-06, "loss": 0.335, "step": 29306 }, { "epoch": 4.784131259948573, "grad_norm": 3.921313762664795, "learning_rate": 7.583338636746499e-06, "loss": 0.4167, "step": 29307 }, { "epoch": 4.784294518591078, "grad_norm": 3.434283494949341, "learning_rate": 7.582716504133114e-06, "loss": 0.3452, "step": 29308 }, { "epoch": 4.784457777233582, "grad_norm": 3.450798273086548, "learning_rate": 7.582094381456271e-06, "loss": 0.4051, "step": 29309 }, { "epoch": 4.784621035876087, "grad_norm": 3.597904682159424, "learning_rate": 7.58147226871852e-06, "loss": 0.4008, "step": 29310 }, { "epoch": 4.784784294518591, "grad_norm": 3.2875466346740723, "learning_rate": 7.5808501659224274e-06, "loss": 0.3832, "step": 29311 }, { "epoch": 4.784947553161095, "grad_norm": 3.8736140727996826, "learning_rate": 7.580228073070551e-06, "loss": 0.4067, "step": 29312 }, { "epoch": 4.7851108118036, "grad_norm": 3.6327593326568604, "learning_rate": 7.5796059901654376e-06, "loss": 0.3901, "step": 29313 }, { "epoch": 4.785274070446104, "grad_norm": 4.4349188804626465, "learning_rate": 7.578983917209648e-06, "loss": 0.4133, "step": 29314 }, { "epoch": 4.785437329088609, "grad_norm": 4.042895793914795, "learning_rate": 7.5783618542057435e-06, "loss": 0.4414, "step": 29315 }, { "epoch": 4.785600587731113, "grad_norm": 3.6211912631988525, "learning_rate": 7.5777398011562794e-06, "loss": 0.4122, "step": 29316 }, { "epoch": 4.785763846373618, "grad_norm": 3.1788458824157715, "learning_rate": 7.577117758063812e-06, "loss": 0.3295, "step": 29317 }, { "epoch": 4.785927105016122, "grad_norm": 3.651428461074829, "learning_rate": 7.5764957249309e-06, "loss": 0.3666, "step": 29318 }, { "epoch": 4.7860903636586265, "grad_norm": 4.230587482452393, "learning_rate": 7.575873701760098e-06, "loss": 0.429, "step": 29319 }, { "epoch": 4.786253622301131, "grad_norm": 3.5835041999816895, "learning_rate": 7.575251688553964e-06, "loss": 0.3955, "step": 29320 }, { "epoch": 4.786416880943635, "grad_norm": 3.8929648399353027, "learning_rate": 7.574629685315056e-06, "loss": 0.4192, "step": 29321 }, { "epoch": 4.786580139586139, "grad_norm": 2.8036396503448486, "learning_rate": 7.574007692045928e-06, "loss": 0.3362, "step": 29322 }, { "epoch": 4.786743398228643, "grad_norm": 2.8945305347442627, "learning_rate": 7.573385708749139e-06, "loss": 0.3344, "step": 29323 }, { "epoch": 4.786906656871148, "grad_norm": 3.2932381629943848, "learning_rate": 7.572763735427244e-06, "loss": 0.3656, "step": 29324 }, { "epoch": 4.787069915513652, "grad_norm": 3.5510008335113525, "learning_rate": 7.572141772082802e-06, "loss": 0.3923, "step": 29325 }, { "epoch": 4.787233174156157, "grad_norm": 3.6110105514526367, "learning_rate": 7.571519818718364e-06, "loss": 0.3218, "step": 29326 }, { "epoch": 4.787396432798661, "grad_norm": 3.366905689239502, "learning_rate": 7.5708978753364955e-06, "loss": 0.3809, "step": 29327 }, { "epoch": 4.7875596914411656, "grad_norm": 3.172755718231201, "learning_rate": 7.570275941939748e-06, "loss": 0.3832, "step": 29328 }, { "epoch": 4.78772295008367, "grad_norm": 3.022491693496704, "learning_rate": 7.569654018530678e-06, "loss": 0.3502, "step": 29329 }, { "epoch": 4.787886208726174, "grad_norm": 4.049234390258789, "learning_rate": 7.5690321051118445e-06, "loss": 0.3892, "step": 29330 }, { "epoch": 4.788049467368679, "grad_norm": 3.7589008808135986, "learning_rate": 7.568410201685803e-06, "loss": 0.4218, "step": 29331 }, { "epoch": 4.788212726011183, "grad_norm": 3.341787576675415, "learning_rate": 7.567788308255106e-06, "loss": 0.3541, "step": 29332 }, { "epoch": 4.788375984653688, "grad_norm": 3.9744114875793457, "learning_rate": 7.567166424822314e-06, "loss": 0.4438, "step": 29333 }, { "epoch": 4.788539243296192, "grad_norm": 3.822187662124634, "learning_rate": 7.566544551389982e-06, "loss": 0.408, "step": 29334 }, { "epoch": 4.788702501938697, "grad_norm": 3.2071709632873535, "learning_rate": 7.565922687960667e-06, "loss": 0.3752, "step": 29335 }, { "epoch": 4.788865760581201, "grad_norm": 4.458253383636475, "learning_rate": 7.565300834536924e-06, "loss": 0.3702, "step": 29336 }, { "epoch": 4.7890290192237055, "grad_norm": 2.918924331665039, "learning_rate": 7.564678991121308e-06, "loss": 0.3406, "step": 29337 }, { "epoch": 4.78919227786621, "grad_norm": 3.3372955322265625, "learning_rate": 7.5640571577163794e-06, "loss": 0.3659, "step": 29338 }, { "epoch": 4.7893555365087135, "grad_norm": 3.418586015701294, "learning_rate": 7.563435334324691e-06, "loss": 0.3466, "step": 29339 }, { "epoch": 4.789518795151219, "grad_norm": 3.5131499767303467, "learning_rate": 7.562813520948802e-06, "loss": 0.3477, "step": 29340 }, { "epoch": 4.789682053793722, "grad_norm": 3.436339855194092, "learning_rate": 7.562191717591266e-06, "loss": 0.3918, "step": 29341 }, { "epoch": 4.789845312436227, "grad_norm": 3.689427375793457, "learning_rate": 7.56156992425464e-06, "loss": 0.3952, "step": 29342 }, { "epoch": 4.790008571078731, "grad_norm": 3.7367758750915527, "learning_rate": 7.560948140941477e-06, "loss": 0.3722, "step": 29343 }, { "epoch": 4.790171829721236, "grad_norm": 2.8388478755950928, "learning_rate": 7.560326367654336e-06, "loss": 0.3554, "step": 29344 }, { "epoch": 4.79033508836374, "grad_norm": 2.948000192642212, "learning_rate": 7.559704604395771e-06, "loss": 0.3751, "step": 29345 }, { "epoch": 4.7904983470062446, "grad_norm": 3.623600959777832, "learning_rate": 7.55908285116834e-06, "loss": 0.4432, "step": 29346 }, { "epoch": 4.790661605648749, "grad_norm": 3.3571128845214844, "learning_rate": 7.558461107974599e-06, "loss": 0.3906, "step": 29347 }, { "epoch": 4.790824864291253, "grad_norm": 3.6285901069641113, "learning_rate": 7.557839374817099e-06, "loss": 0.3923, "step": 29348 }, { "epoch": 4.790988122933758, "grad_norm": 3.4163522720336914, "learning_rate": 7.557217651698401e-06, "loss": 0.3994, "step": 29349 }, { "epoch": 4.791151381576262, "grad_norm": 3.3026936054229736, "learning_rate": 7.556595938621058e-06, "loss": 0.3352, "step": 29350 }, { "epoch": 4.791314640218767, "grad_norm": 4.279520034790039, "learning_rate": 7.555974235587633e-06, "loss": 0.4314, "step": 29351 }, { "epoch": 4.791477898861271, "grad_norm": 3.1067419052124023, "learning_rate": 7.555352542600669e-06, "loss": 0.3593, "step": 29352 }, { "epoch": 4.791641157503776, "grad_norm": 3.907912492752075, "learning_rate": 7.554730859662726e-06, "loss": 0.4118, "step": 29353 }, { "epoch": 4.79180441614628, "grad_norm": 3.2926034927368164, "learning_rate": 7.554109186776364e-06, "loss": 0.328, "step": 29354 }, { "epoch": 4.7919676747887845, "grad_norm": 3.3540163040161133, "learning_rate": 7.5534875239441355e-06, "loss": 0.3141, "step": 29355 }, { "epoch": 4.792130933431288, "grad_norm": 4.17073392868042, "learning_rate": 7.552865871168596e-06, "loss": 0.4599, "step": 29356 }, { "epoch": 4.792294192073793, "grad_norm": 3.396787643432617, "learning_rate": 7.5522442284523e-06, "loss": 0.3629, "step": 29357 }, { "epoch": 4.792457450716297, "grad_norm": 3.1067187786102295, "learning_rate": 7.551622595797804e-06, "loss": 0.3237, "step": 29358 }, { "epoch": 4.792620709358801, "grad_norm": 3.5880138874053955, "learning_rate": 7.551000973207663e-06, "loss": 0.3644, "step": 29359 }, { "epoch": 4.792783968001306, "grad_norm": 3.867605209350586, "learning_rate": 7.550379360684434e-06, "loss": 0.4069, "step": 29360 }, { "epoch": 4.79294722664381, "grad_norm": 4.001603126525879, "learning_rate": 7.54975775823067e-06, "loss": 0.4585, "step": 29361 }, { "epoch": 4.793110485286315, "grad_norm": 3.093862533569336, "learning_rate": 7.549136165848926e-06, "loss": 0.3483, "step": 29362 }, { "epoch": 4.793273743928819, "grad_norm": 4.090529441833496, "learning_rate": 7.548514583541757e-06, "loss": 0.554, "step": 29363 }, { "epoch": 4.7934370025713235, "grad_norm": 3.571910858154297, "learning_rate": 7.547893011311718e-06, "loss": 0.3955, "step": 29364 }, { "epoch": 4.793600261213828, "grad_norm": 3.641648530960083, "learning_rate": 7.5472714491613655e-06, "loss": 0.4081, "step": 29365 }, { "epoch": 4.793763519856332, "grad_norm": 3.379312038421631, "learning_rate": 7.546649897093255e-06, "loss": 0.3255, "step": 29366 }, { "epoch": 4.793926778498837, "grad_norm": 4.065262317657471, "learning_rate": 7.54602835510994e-06, "loss": 0.3772, "step": 29367 }, { "epoch": 4.794090037141341, "grad_norm": 4.087305545806885, "learning_rate": 7.5454068232139754e-06, "loss": 0.3613, "step": 29368 }, { "epoch": 4.794253295783846, "grad_norm": 3.7348036766052246, "learning_rate": 7.544785301407918e-06, "loss": 0.3906, "step": 29369 }, { "epoch": 4.79441655442635, "grad_norm": 3.5276873111724854, "learning_rate": 7.544163789694321e-06, "loss": 0.3679, "step": 29370 }, { "epoch": 4.794579813068855, "grad_norm": 3.1155924797058105, "learning_rate": 7.543542288075739e-06, "loss": 0.3507, "step": 29371 }, { "epoch": 4.794743071711359, "grad_norm": 3.166318416595459, "learning_rate": 7.542920796554727e-06, "loss": 0.3532, "step": 29372 }, { "epoch": 4.7949063303538635, "grad_norm": 4.533982276916504, "learning_rate": 7.542299315133839e-06, "loss": 0.4216, "step": 29373 }, { "epoch": 4.795069588996368, "grad_norm": 3.936000108718872, "learning_rate": 7.541677843815631e-06, "loss": 0.3868, "step": 29374 }, { "epoch": 4.7952328476388715, "grad_norm": 3.712489604949951, "learning_rate": 7.541056382602657e-06, "loss": 0.366, "step": 29375 }, { "epoch": 4.795396106281376, "grad_norm": 3.9343299865722656, "learning_rate": 7.54043493149747e-06, "loss": 0.4197, "step": 29376 }, { "epoch": 4.79555936492388, "grad_norm": 3.4123120307922363, "learning_rate": 7.5398134905026275e-06, "loss": 0.3442, "step": 29377 }, { "epoch": 4.795722623566385, "grad_norm": 3.4948623180389404, "learning_rate": 7.5391920596206836e-06, "loss": 0.3935, "step": 29378 }, { "epoch": 4.795885882208889, "grad_norm": 4.333178997039795, "learning_rate": 7.538570638854193e-06, "loss": 0.3743, "step": 29379 }, { "epoch": 4.796049140851394, "grad_norm": 3.4282405376434326, "learning_rate": 7.5379492282057084e-06, "loss": 0.3303, "step": 29380 }, { "epoch": 4.796212399493898, "grad_norm": 3.1635448932647705, "learning_rate": 7.537327827677785e-06, "loss": 0.3365, "step": 29381 }, { "epoch": 4.7963756581364025, "grad_norm": 3.9235785007476807, "learning_rate": 7.5367064372729756e-06, "loss": 0.4245, "step": 29382 }, { "epoch": 4.796538916778907, "grad_norm": 3.6266796588897705, "learning_rate": 7.536085056993837e-06, "loss": 0.3585, "step": 29383 }, { "epoch": 4.796702175421411, "grad_norm": 3.404615640640259, "learning_rate": 7.535463686842922e-06, "loss": 0.3828, "step": 29384 }, { "epoch": 4.796865434063916, "grad_norm": 3.464611768722534, "learning_rate": 7.534842326822786e-06, "loss": 0.3941, "step": 29385 }, { "epoch": 4.79702869270642, "grad_norm": 4.093986988067627, "learning_rate": 7.534220976935981e-06, "loss": 0.4409, "step": 29386 }, { "epoch": 4.797191951348925, "grad_norm": 4.032998561859131, "learning_rate": 7.53359963718506e-06, "loss": 0.4276, "step": 29387 }, { "epoch": 4.797355209991429, "grad_norm": 3.509216070175171, "learning_rate": 7.532978307572584e-06, "loss": 0.3526, "step": 29388 }, { "epoch": 4.797518468633934, "grad_norm": 3.4674386978149414, "learning_rate": 7.5323569881011005e-06, "loss": 0.358, "step": 29389 }, { "epoch": 4.797681727276438, "grad_norm": 3.9364640712738037, "learning_rate": 7.531735678773171e-06, "loss": 0.3747, "step": 29390 }, { "epoch": 4.7978449859189425, "grad_norm": 3.5844624042510986, "learning_rate": 7.531114379591338e-06, "loss": 0.3894, "step": 29391 }, { "epoch": 4.798008244561446, "grad_norm": 3.2078351974487305, "learning_rate": 7.530493090558163e-06, "loss": 0.3152, "step": 29392 }, { "epoch": 4.7981715032039505, "grad_norm": 3.154170036315918, "learning_rate": 7.529871811676198e-06, "loss": 0.3608, "step": 29393 }, { "epoch": 4.798334761846455, "grad_norm": 3.48233962059021, "learning_rate": 7.529250542947998e-06, "loss": 0.4052, "step": 29394 }, { "epoch": 4.798498020488959, "grad_norm": 3.6757500171661377, "learning_rate": 7.528629284376117e-06, "loss": 0.3767, "step": 29395 }, { "epoch": 4.798661279131464, "grad_norm": 3.395190477371216, "learning_rate": 7.5280080359631055e-06, "loss": 0.3164, "step": 29396 }, { "epoch": 4.798824537773968, "grad_norm": 3.452214002609253, "learning_rate": 7.527386797711521e-06, "loss": 0.3328, "step": 29397 }, { "epoch": 4.798987796416473, "grad_norm": 3.6627228260040283, "learning_rate": 7.526765569623913e-06, "loss": 0.355, "step": 29398 }, { "epoch": 4.799151055058977, "grad_norm": 3.791642665863037, "learning_rate": 7.526144351702841e-06, "loss": 0.3955, "step": 29399 }, { "epoch": 4.7993143137014815, "grad_norm": 3.4216575622558594, "learning_rate": 7.525523143950859e-06, "loss": 0.3107, "step": 29400 }, { "epoch": 4.799477572343986, "grad_norm": 3.826766014099121, "learning_rate": 7.524901946370513e-06, "loss": 0.3546, "step": 29401 }, { "epoch": 4.79964083098649, "grad_norm": 4.012751579284668, "learning_rate": 7.5242807589643605e-06, "loss": 0.385, "step": 29402 }, { "epoch": 4.799804089628995, "grad_norm": 4.122780799865723, "learning_rate": 7.523659581734952e-06, "loss": 0.4048, "step": 29403 }, { "epoch": 4.799967348271499, "grad_norm": 3.268315553665161, "learning_rate": 7.523038414684847e-06, "loss": 0.3547, "step": 29404 }, { "epoch": 4.800130606914004, "grad_norm": 2.9391896724700928, "learning_rate": 7.522417257816597e-06, "loss": 0.3576, "step": 29405 }, { "epoch": 4.800293865556508, "grad_norm": 3.89827299118042, "learning_rate": 7.521796111132752e-06, "loss": 0.3639, "step": 29406 }, { "epoch": 4.800457124199013, "grad_norm": 3.9798712730407715, "learning_rate": 7.521174974635868e-06, "loss": 0.411, "step": 29407 }, { "epoch": 4.800620382841517, "grad_norm": 3.6989240646362305, "learning_rate": 7.520553848328498e-06, "loss": 0.346, "step": 29408 }, { "epoch": 4.800783641484021, "grad_norm": 3.8741393089294434, "learning_rate": 7.519932732213197e-06, "loss": 0.4055, "step": 29409 }, { "epoch": 4.800946900126526, "grad_norm": 3.6744771003723145, "learning_rate": 7.519311626292513e-06, "loss": 0.3981, "step": 29410 }, { "epoch": 4.8011101587690295, "grad_norm": 3.62510085105896, "learning_rate": 7.5186905305690036e-06, "loss": 0.3947, "step": 29411 }, { "epoch": 4.801273417411534, "grad_norm": 3.2737834453582764, "learning_rate": 7.51806944504522e-06, "loss": 0.3611, "step": 29412 }, { "epoch": 4.801436676054038, "grad_norm": 3.5550589561462402, "learning_rate": 7.517448369723716e-06, "loss": 0.358, "step": 29413 }, { "epoch": 4.801599934696543, "grad_norm": 4.324342727661133, "learning_rate": 7.516827304607042e-06, "loss": 0.424, "step": 29414 }, { "epoch": 4.801763193339047, "grad_norm": 3.6767382621765137, "learning_rate": 7.516206249697755e-06, "loss": 0.3737, "step": 29415 }, { "epoch": 4.801926451981552, "grad_norm": 3.8586947917938232, "learning_rate": 7.515585204998406e-06, "loss": 0.5934, "step": 29416 }, { "epoch": 4.802089710624056, "grad_norm": 3.782257080078125, "learning_rate": 7.51496417051155e-06, "loss": 0.3654, "step": 29417 }, { "epoch": 4.8022529692665605, "grad_norm": 3.6018409729003906, "learning_rate": 7.5143431462397355e-06, "loss": 0.3624, "step": 29418 }, { "epoch": 4.802416227909065, "grad_norm": 4.04186487197876, "learning_rate": 7.513722132185521e-06, "loss": 0.4167, "step": 29419 }, { "epoch": 4.802579486551569, "grad_norm": 3.490354299545288, "learning_rate": 7.513101128351454e-06, "loss": 0.4093, "step": 29420 }, { "epoch": 4.802742745194074, "grad_norm": 3.8310678005218506, "learning_rate": 7.51248013474009e-06, "loss": 0.3956, "step": 29421 }, { "epoch": 4.802906003836578, "grad_norm": 3.7911853790283203, "learning_rate": 7.51185915135398e-06, "loss": 0.3649, "step": 29422 }, { "epoch": 4.803069262479083, "grad_norm": 4.576861381530762, "learning_rate": 7.511238178195677e-06, "loss": 0.3898, "step": 29423 }, { "epoch": 4.803232521121587, "grad_norm": 3.6817641258239746, "learning_rate": 7.510617215267734e-06, "loss": 0.319, "step": 29424 }, { "epoch": 4.803395779764092, "grad_norm": 4.031924247741699, "learning_rate": 7.509996262572703e-06, "loss": 0.4428, "step": 29425 }, { "epoch": 4.803559038406595, "grad_norm": 3.356236696243286, "learning_rate": 7.509375320113139e-06, "loss": 0.4119, "step": 29426 }, { "epoch": 4.8037222970491005, "grad_norm": 3.0474164485931396, "learning_rate": 7.508754387891592e-06, "loss": 0.3507, "step": 29427 }, { "epoch": 4.803885555691604, "grad_norm": 3.885232448577881, "learning_rate": 7.508133465910615e-06, "loss": 0.4482, "step": 29428 }, { "epoch": 4.8040488143341085, "grad_norm": 3.3941121101379395, "learning_rate": 7.507512554172763e-06, "loss": 0.3272, "step": 29429 }, { "epoch": 4.804212072976613, "grad_norm": 3.698310375213623, "learning_rate": 7.506891652680583e-06, "loss": 0.3443, "step": 29430 }, { "epoch": 4.804375331619117, "grad_norm": 3.0729711055755615, "learning_rate": 7.50627076143663e-06, "loss": 0.3939, "step": 29431 }, { "epoch": 4.804538590261622, "grad_norm": 3.329521894454956, "learning_rate": 7.505649880443458e-06, "loss": 0.4026, "step": 29432 }, { "epoch": 4.804701848904126, "grad_norm": 3.289740562438965, "learning_rate": 7.505029009703615e-06, "loss": 0.4266, "step": 29433 }, { "epoch": 4.804865107546631, "grad_norm": 3.1602134704589844, "learning_rate": 7.504408149219657e-06, "loss": 0.3705, "step": 29434 }, { "epoch": 4.805028366189135, "grad_norm": 3.3845701217651367, "learning_rate": 7.503787298994134e-06, "loss": 0.3619, "step": 29435 }, { "epoch": 4.8051916248316395, "grad_norm": 3.0920166969299316, "learning_rate": 7.5031664590296e-06, "loss": 0.3479, "step": 29436 }, { "epoch": 4.805354883474144, "grad_norm": 3.223667860031128, "learning_rate": 7.502545629328604e-06, "loss": 0.3616, "step": 29437 }, { "epoch": 4.805518142116648, "grad_norm": 3.781062602996826, "learning_rate": 7.5019248098937e-06, "loss": 0.3847, "step": 29438 }, { "epoch": 4.805681400759153, "grad_norm": 4.121935844421387, "learning_rate": 7.501304000727445e-06, "loss": 0.4547, "step": 29439 }, { "epoch": 4.805844659401657, "grad_norm": 3.6326746940612793, "learning_rate": 7.500683201832383e-06, "loss": 0.4205, "step": 29440 }, { "epoch": 4.806007918044162, "grad_norm": 3.294801950454712, "learning_rate": 7.500062413211065e-06, "loss": 0.4003, "step": 29441 }, { "epoch": 4.806171176686666, "grad_norm": 3.210383892059326, "learning_rate": 7.4994416348660494e-06, "loss": 0.3441, "step": 29442 }, { "epoch": 4.806334435329171, "grad_norm": 2.969282627105713, "learning_rate": 7.498820866799884e-06, "loss": 0.353, "step": 29443 }, { "epoch": 4.806497693971675, "grad_norm": 3.4250197410583496, "learning_rate": 7.498200109015121e-06, "loss": 0.3491, "step": 29444 }, { "epoch": 4.806660952614179, "grad_norm": 3.4658243656158447, "learning_rate": 7.497579361514316e-06, "loss": 0.3457, "step": 29445 }, { "epoch": 4.806824211256683, "grad_norm": 2.4849400520324707, "learning_rate": 7.496958624300015e-06, "loss": 0.2999, "step": 29446 }, { "epoch": 4.8069874698991875, "grad_norm": 3.8743720054626465, "learning_rate": 7.496337897374772e-06, "loss": 0.4127, "step": 29447 }, { "epoch": 4.807150728541692, "grad_norm": 2.918511390686035, "learning_rate": 7.4957171807411396e-06, "loss": 0.3864, "step": 29448 }, { "epoch": 4.807313987184196, "grad_norm": 3.450016975402832, "learning_rate": 7.495096474401668e-06, "loss": 0.3744, "step": 29449 }, { "epoch": 4.807477245826701, "grad_norm": 3.0049941539764404, "learning_rate": 7.494475778358907e-06, "loss": 0.3239, "step": 29450 }, { "epoch": 4.807640504469205, "grad_norm": 4.772635459899902, "learning_rate": 7.493855092615412e-06, "loss": 0.4288, "step": 29451 }, { "epoch": 4.80780376311171, "grad_norm": 3.6654438972473145, "learning_rate": 7.493234417173729e-06, "loss": 0.4655, "step": 29452 }, { "epoch": 4.807967021754214, "grad_norm": 3.650956630706787, "learning_rate": 7.4926137520364165e-06, "loss": 0.4259, "step": 29453 }, { "epoch": 4.8081302803967185, "grad_norm": 3.4887261390686035, "learning_rate": 7.491993097206021e-06, "loss": 0.3848, "step": 29454 }, { "epoch": 4.808293539039223, "grad_norm": 3.780078411102295, "learning_rate": 7.491372452685095e-06, "loss": 0.4165, "step": 29455 }, { "epoch": 4.808456797681727, "grad_norm": 4.048830509185791, "learning_rate": 7.49075181847619e-06, "loss": 0.3895, "step": 29456 }, { "epoch": 4.808620056324232, "grad_norm": 3.4756052494049072, "learning_rate": 7.490131194581856e-06, "loss": 0.3233, "step": 29457 }, { "epoch": 4.808783314966736, "grad_norm": 2.990140676498413, "learning_rate": 7.489510581004648e-06, "loss": 0.312, "step": 29458 }, { "epoch": 4.808946573609241, "grad_norm": 3.0445477962493896, "learning_rate": 7.488889977747111e-06, "loss": 0.2997, "step": 29459 }, { "epoch": 4.809109832251745, "grad_norm": 3.7640726566314697, "learning_rate": 7.4882693848118e-06, "loss": 0.4094, "step": 29460 }, { "epoch": 4.80927309089425, "grad_norm": 3.6790504455566406, "learning_rate": 7.487648802201264e-06, "loss": 0.3635, "step": 29461 }, { "epoch": 4.809436349536753, "grad_norm": 3.6481940746307373, "learning_rate": 7.487028229918056e-06, "loss": 0.3746, "step": 29462 }, { "epoch": 4.8095996081792585, "grad_norm": 3.6981003284454346, "learning_rate": 7.486407667964726e-06, "loss": 0.3884, "step": 29463 }, { "epoch": 4.809762866821762, "grad_norm": 3.320237159729004, "learning_rate": 7.485787116343822e-06, "loss": 0.4185, "step": 29464 }, { "epoch": 4.8099261254642665, "grad_norm": 3.576327323913574, "learning_rate": 7.485166575057901e-06, "loss": 0.4508, "step": 29465 }, { "epoch": 4.810089384106771, "grad_norm": 3.994584083557129, "learning_rate": 7.484546044109509e-06, "loss": 0.382, "step": 29466 }, { "epoch": 4.810252642749275, "grad_norm": 4.321158409118652, "learning_rate": 7.483925523501201e-06, "loss": 0.3954, "step": 29467 }, { "epoch": 4.81041590139178, "grad_norm": 4.640240669250488, "learning_rate": 7.483305013235525e-06, "loss": 0.3545, "step": 29468 }, { "epoch": 4.810579160034284, "grad_norm": 3.505995035171509, "learning_rate": 7.482684513315031e-06, "loss": 0.4067, "step": 29469 }, { "epoch": 4.810742418676789, "grad_norm": 4.027490615844727, "learning_rate": 7.482064023742271e-06, "loss": 0.4662, "step": 29470 }, { "epoch": 4.810905677319293, "grad_norm": 3.2323365211486816, "learning_rate": 7.481443544519793e-06, "loss": 0.334, "step": 29471 }, { "epoch": 4.8110689359617975, "grad_norm": 3.6102850437164307, "learning_rate": 7.48082307565015e-06, "loss": 0.4249, "step": 29472 }, { "epoch": 4.811232194604302, "grad_norm": 3.150510311126709, "learning_rate": 7.480202617135893e-06, "loss": 0.3688, "step": 29473 }, { "epoch": 4.811395453246806, "grad_norm": 3.4281978607177734, "learning_rate": 7.479582168979571e-06, "loss": 0.3328, "step": 29474 }, { "epoch": 4.811558711889311, "grad_norm": 3.084120035171509, "learning_rate": 7.478961731183733e-06, "loss": 0.3347, "step": 29475 }, { "epoch": 4.811721970531815, "grad_norm": 3.873629093170166, "learning_rate": 7.478341303750934e-06, "loss": 0.422, "step": 29476 }, { "epoch": 4.81188522917432, "grad_norm": 3.7124319076538086, "learning_rate": 7.477720886683722e-06, "loss": 0.349, "step": 29477 }, { "epoch": 4.812048487816824, "grad_norm": 2.8756895065307617, "learning_rate": 7.477100479984652e-06, "loss": 0.3826, "step": 29478 }, { "epoch": 4.812211746459328, "grad_norm": 3.6161603927612305, "learning_rate": 7.476480083656261e-06, "loss": 0.4051, "step": 29479 }, { "epoch": 4.812375005101833, "grad_norm": 3.6436281204223633, "learning_rate": 7.47585969770111e-06, "loss": 0.4247, "step": 29480 }, { "epoch": 4.812538263744337, "grad_norm": 3.4023568630218506, "learning_rate": 7.475239322121747e-06, "loss": 0.3926, "step": 29481 }, { "epoch": 4.812701522386841, "grad_norm": 3.2587356567382812, "learning_rate": 7.4746189569207216e-06, "loss": 0.3371, "step": 29482 }, { "epoch": 4.8128647810293455, "grad_norm": 3.7635819911956787, "learning_rate": 7.473998602100585e-06, "loss": 0.4026, "step": 29483 }, { "epoch": 4.81302803967185, "grad_norm": 3.548346996307373, "learning_rate": 7.473378257663885e-06, "loss": 0.3725, "step": 29484 }, { "epoch": 4.813191298314354, "grad_norm": 3.478454351425171, "learning_rate": 7.4727579236131746e-06, "loss": 0.3955, "step": 29485 }, { "epoch": 4.813354556956859, "grad_norm": 3.846522092819214, "learning_rate": 7.472137599950999e-06, "loss": 0.3384, "step": 29486 }, { "epoch": 4.813517815599363, "grad_norm": 3.721208333969116, "learning_rate": 7.471517286679912e-06, "loss": 0.3879, "step": 29487 }, { "epoch": 4.813681074241868, "grad_norm": 3.2716729640960693, "learning_rate": 7.470896983802469e-06, "loss": 0.3603, "step": 29488 }, { "epoch": 4.813844332884372, "grad_norm": 3.2925167083740234, "learning_rate": 7.470276691321207e-06, "loss": 0.3154, "step": 29489 }, { "epoch": 4.8140075915268765, "grad_norm": 3.148369550704956, "learning_rate": 7.469656409238685e-06, "loss": 0.3586, "step": 29490 }, { "epoch": 4.814170850169381, "grad_norm": 3.416748285293579, "learning_rate": 7.469036137557446e-06, "loss": 0.417, "step": 29491 }, { "epoch": 4.814334108811885, "grad_norm": 3.45632004737854, "learning_rate": 7.468415876280048e-06, "loss": 0.3933, "step": 29492 }, { "epoch": 4.81449736745439, "grad_norm": 3.9523744583129883, "learning_rate": 7.467795625409034e-06, "loss": 0.419, "step": 29493 }, { "epoch": 4.814660626096894, "grad_norm": 4.58579158782959, "learning_rate": 7.467175384946957e-06, "loss": 0.3833, "step": 29494 }, { "epoch": 4.814823884739399, "grad_norm": 3.9004979133605957, "learning_rate": 7.466555154896366e-06, "loss": 0.4468, "step": 29495 }, { "epoch": 4.814987143381903, "grad_norm": 3.7070493698120117, "learning_rate": 7.4659349352598106e-06, "loss": 0.3921, "step": 29496 }, { "epoch": 4.815150402024408, "grad_norm": 3.1778576374053955, "learning_rate": 7.465314726039841e-06, "loss": 0.3398, "step": 29497 }, { "epoch": 4.815313660666911, "grad_norm": 3.5565829277038574, "learning_rate": 7.464694527239004e-06, "loss": 0.3482, "step": 29498 }, { "epoch": 4.815476919309416, "grad_norm": 3.9169297218322754, "learning_rate": 7.46407433885985e-06, "loss": 0.4035, "step": 29499 }, { "epoch": 4.81564017795192, "grad_norm": 2.4653713703155518, "learning_rate": 7.463454160904928e-06, "loss": 0.3146, "step": 29500 }, { "epoch": 4.8158034365944244, "grad_norm": 3.485991954803467, "learning_rate": 7.46283399337679e-06, "loss": 0.4334, "step": 29501 }, { "epoch": 4.815966695236929, "grad_norm": 4.340795040130615, "learning_rate": 7.46221383627798e-06, "loss": 0.4006, "step": 29502 }, { "epoch": 4.816129953879433, "grad_norm": 4.030573844909668, "learning_rate": 7.461593689611053e-06, "loss": 0.3747, "step": 29503 }, { "epoch": 4.816293212521938, "grad_norm": 3.7179861068725586, "learning_rate": 7.460973553378556e-06, "loss": 0.4092, "step": 29504 }, { "epoch": 4.816456471164442, "grad_norm": 4.498945713043213, "learning_rate": 7.460353427583038e-06, "loss": 0.3729, "step": 29505 }, { "epoch": 4.816619729806947, "grad_norm": 3.2638866901397705, "learning_rate": 7.4597333122270485e-06, "loss": 0.3328, "step": 29506 }, { "epoch": 4.816782988449451, "grad_norm": 3.7621569633483887, "learning_rate": 7.4591132073131375e-06, "loss": 0.4549, "step": 29507 }, { "epoch": 4.8169462470919555, "grad_norm": 4.43846321105957, "learning_rate": 7.458493112843852e-06, "loss": 0.4218, "step": 29508 }, { "epoch": 4.81710950573446, "grad_norm": 3.6107354164123535, "learning_rate": 7.457873028821741e-06, "loss": 0.4036, "step": 29509 }, { "epoch": 4.817272764376964, "grad_norm": 3.1616528034210205, "learning_rate": 7.457252955249353e-06, "loss": 0.3211, "step": 29510 }, { "epoch": 4.817436023019469, "grad_norm": 3.444127321243286, "learning_rate": 7.456632892129239e-06, "loss": 0.3856, "step": 29511 }, { "epoch": 4.817599281661973, "grad_norm": 4.157310485839844, "learning_rate": 7.456012839463946e-06, "loss": 0.3705, "step": 29512 }, { "epoch": 4.817762540304478, "grad_norm": 3.4501891136169434, "learning_rate": 7.455392797256022e-06, "loss": 0.3575, "step": 29513 }, { "epoch": 4.817925798946982, "grad_norm": 3.2826879024505615, "learning_rate": 7.45477276550802e-06, "loss": 0.3212, "step": 29514 }, { "epoch": 4.818089057589486, "grad_norm": 3.8572793006896973, "learning_rate": 7.454152744222486e-06, "loss": 0.375, "step": 29515 }, { "epoch": 4.818252316231991, "grad_norm": 3.7400848865509033, "learning_rate": 7.453532733401968e-06, "loss": 0.3511, "step": 29516 }, { "epoch": 4.818415574874495, "grad_norm": 4.237776279449463, "learning_rate": 7.45291273304902e-06, "loss": 0.3441, "step": 29517 }, { "epoch": 4.818578833516999, "grad_norm": 4.379356384277344, "learning_rate": 7.4522927431661805e-06, "loss": 0.4305, "step": 29518 }, { "epoch": 4.818742092159503, "grad_norm": 3.460022211074829, "learning_rate": 7.451672763756004e-06, "loss": 0.3929, "step": 29519 }, { "epoch": 4.818905350802008, "grad_norm": 3.7033169269561768, "learning_rate": 7.45105279482104e-06, "loss": 0.3501, "step": 29520 }, { "epoch": 4.819068609444512, "grad_norm": 4.122257232666016, "learning_rate": 7.4504328363638345e-06, "loss": 0.4075, "step": 29521 }, { "epoch": 4.819231868087017, "grad_norm": 3.1463561058044434, "learning_rate": 7.449812888386938e-06, "loss": 0.3236, "step": 29522 }, { "epoch": 4.819395126729521, "grad_norm": 3.531381845474243, "learning_rate": 7.449192950892896e-06, "loss": 0.353, "step": 29523 }, { "epoch": 4.819558385372026, "grad_norm": 3.2992842197418213, "learning_rate": 7.44857302388426e-06, "loss": 0.3869, "step": 29524 }, { "epoch": 4.81972164401453, "grad_norm": 3.582632064819336, "learning_rate": 7.447953107363574e-06, "loss": 0.3829, "step": 29525 }, { "epoch": 4.8198849026570345, "grad_norm": 3.054333209991455, "learning_rate": 7.4473332013333925e-06, "loss": 0.3401, "step": 29526 }, { "epoch": 4.820048161299539, "grad_norm": 4.124393463134766, "learning_rate": 7.446713305796264e-06, "loss": 0.4167, "step": 29527 }, { "epoch": 4.820211419942043, "grad_norm": 3.770848274230957, "learning_rate": 7.4460934207547275e-06, "loss": 0.4021, "step": 29528 }, { "epoch": 4.820374678584548, "grad_norm": 4.203972816467285, "learning_rate": 7.445473546211337e-06, "loss": 0.3532, "step": 29529 }, { "epoch": 4.820537937227052, "grad_norm": 3.5933165550231934, "learning_rate": 7.44485368216864e-06, "loss": 0.3878, "step": 29530 }, { "epoch": 4.820701195869557, "grad_norm": 3.2023580074310303, "learning_rate": 7.444233828629186e-06, "loss": 0.3315, "step": 29531 }, { "epoch": 4.82086445451206, "grad_norm": 3.4728477001190186, "learning_rate": 7.443613985595521e-06, "loss": 0.3067, "step": 29532 }, { "epoch": 4.821027713154566, "grad_norm": 3.7008817195892334, "learning_rate": 7.442994153070194e-06, "loss": 0.3862, "step": 29533 }, { "epoch": 4.821190971797069, "grad_norm": 3.5361108779907227, "learning_rate": 7.442374331055753e-06, "loss": 0.3688, "step": 29534 }, { "epoch": 4.821354230439574, "grad_norm": 3.4410102367401123, "learning_rate": 7.441754519554745e-06, "loss": 0.3554, "step": 29535 }, { "epoch": 4.821517489082078, "grad_norm": 4.038189888000488, "learning_rate": 7.441134718569715e-06, "loss": 0.3703, "step": 29536 }, { "epoch": 4.821680747724582, "grad_norm": 3.3010878562927246, "learning_rate": 7.440514928103222e-06, "loss": 0.3242, "step": 29537 }, { "epoch": 4.821844006367087, "grad_norm": 3.1335277557373047, "learning_rate": 7.4398951481578e-06, "loss": 0.3538, "step": 29538 }, { "epoch": 4.822007265009591, "grad_norm": 4.971800804138184, "learning_rate": 7.4392753787360025e-06, "loss": 0.4193, "step": 29539 }, { "epoch": 4.822170523652096, "grad_norm": 3.47696590423584, "learning_rate": 7.438655619840376e-06, "loss": 0.393, "step": 29540 }, { "epoch": 4.8223337822946, "grad_norm": 3.3882896900177, "learning_rate": 7.43803587147347e-06, "loss": 0.3184, "step": 29541 }, { "epoch": 4.822497040937105, "grad_norm": 3.3451640605926514, "learning_rate": 7.437416133637831e-06, "loss": 0.4384, "step": 29542 }, { "epoch": 4.822660299579609, "grad_norm": 3.977463483810425, "learning_rate": 7.436796406336009e-06, "loss": 0.4411, "step": 29543 }, { "epoch": 4.8228235582221135, "grad_norm": 3.2074406147003174, "learning_rate": 7.436176689570547e-06, "loss": 0.3284, "step": 29544 }, { "epoch": 4.822986816864618, "grad_norm": 2.882936954498291, "learning_rate": 7.435556983343994e-06, "loss": 0.3339, "step": 29545 }, { "epoch": 4.823150075507122, "grad_norm": 4.728643417358398, "learning_rate": 7.434937287658902e-06, "loss": 0.4804, "step": 29546 }, { "epoch": 4.823313334149627, "grad_norm": 3.356818675994873, "learning_rate": 7.434317602517811e-06, "loss": 0.4026, "step": 29547 }, { "epoch": 4.823476592792131, "grad_norm": 3.1471056938171387, "learning_rate": 7.433697927923272e-06, "loss": 0.3894, "step": 29548 }, { "epoch": 4.823639851434636, "grad_norm": 3.876420497894287, "learning_rate": 7.433078263877832e-06, "loss": 0.4165, "step": 29549 }, { "epoch": 4.82380311007714, "grad_norm": 3.6334571838378906, "learning_rate": 7.432458610384037e-06, "loss": 0.3627, "step": 29550 }, { "epoch": 4.823966368719644, "grad_norm": 3.693023681640625, "learning_rate": 7.431838967444436e-06, "loss": 0.3368, "step": 29551 }, { "epoch": 4.824129627362148, "grad_norm": 3.9028167724609375, "learning_rate": 7.431219335061572e-06, "loss": 0.418, "step": 29552 }, { "epoch": 4.824292886004653, "grad_norm": 3.3948371410369873, "learning_rate": 7.430599713238e-06, "loss": 0.3462, "step": 29553 }, { "epoch": 4.824456144647157, "grad_norm": 3.4050021171569824, "learning_rate": 7.429980101976259e-06, "loss": 0.3937, "step": 29554 }, { "epoch": 4.824619403289661, "grad_norm": 3.6867735385894775, "learning_rate": 7.429360501278903e-06, "loss": 0.3532, "step": 29555 }, { "epoch": 4.824782661932166, "grad_norm": 3.909694194793701, "learning_rate": 7.428740911148475e-06, "loss": 0.3606, "step": 29556 }, { "epoch": 4.82494592057467, "grad_norm": 3.269568681716919, "learning_rate": 7.428121331587521e-06, "loss": 0.3504, "step": 29557 }, { "epoch": 4.825109179217175, "grad_norm": 4.290018558502197, "learning_rate": 7.427501762598589e-06, "loss": 0.3979, "step": 29558 }, { "epoch": 4.825272437859679, "grad_norm": 2.717773199081421, "learning_rate": 7.426882204184225e-06, "loss": 0.3116, "step": 29559 }, { "epoch": 4.825435696502184, "grad_norm": 3.195441246032715, "learning_rate": 7.426262656346979e-06, "loss": 0.3509, "step": 29560 }, { "epoch": 4.825598955144688, "grad_norm": 3.399343967437744, "learning_rate": 7.4256431190893915e-06, "loss": 0.396, "step": 29561 }, { "epoch": 4.8257622137871925, "grad_norm": 3.5406970977783203, "learning_rate": 7.425023592414015e-06, "loss": 0.3806, "step": 29562 }, { "epoch": 4.825925472429697, "grad_norm": 3.49714732170105, "learning_rate": 7.424404076323394e-06, "loss": 0.3616, "step": 29563 }, { "epoch": 4.826088731072201, "grad_norm": 3.7023816108703613, "learning_rate": 7.423784570820076e-06, "loss": 0.4187, "step": 29564 }, { "epoch": 4.826251989714706, "grad_norm": 3.7462944984436035, "learning_rate": 7.4231650759066064e-06, "loss": 0.4323, "step": 29565 }, { "epoch": 4.82641524835721, "grad_norm": 3.4804625511169434, "learning_rate": 7.422545591585537e-06, "loss": 0.3526, "step": 29566 }, { "epoch": 4.826578506999715, "grad_norm": 3.3147079944610596, "learning_rate": 7.421926117859403e-06, "loss": 0.3377, "step": 29567 }, { "epoch": 4.826741765642218, "grad_norm": 3.734375238418579, "learning_rate": 7.421306654730759e-06, "loss": 0.4155, "step": 29568 }, { "epoch": 4.826905024284724, "grad_norm": 3.8537232875823975, "learning_rate": 7.42068720220215e-06, "loss": 0.3788, "step": 29569 }, { "epoch": 4.827068282927227, "grad_norm": 2.928788661956787, "learning_rate": 7.420067760276122e-06, "loss": 0.3335, "step": 29570 }, { "epoch": 4.8272315415697316, "grad_norm": 3.3127593994140625, "learning_rate": 7.419448328955221e-06, "loss": 0.3473, "step": 29571 }, { "epoch": 4.827394800212236, "grad_norm": 3.4251980781555176, "learning_rate": 7.4188289082419925e-06, "loss": 0.3326, "step": 29572 }, { "epoch": 4.82755805885474, "grad_norm": 3.744420289993286, "learning_rate": 7.418209498138986e-06, "loss": 0.3802, "step": 29573 }, { "epoch": 4.827721317497245, "grad_norm": 3.3477530479431152, "learning_rate": 7.417590098648744e-06, "loss": 0.3551, "step": 29574 }, { "epoch": 4.827884576139749, "grad_norm": 3.638075828552246, "learning_rate": 7.416970709773812e-06, "loss": 0.4271, "step": 29575 }, { "epoch": 4.828047834782254, "grad_norm": 3.759486675262451, "learning_rate": 7.416351331516745e-06, "loss": 0.4729, "step": 29576 }, { "epoch": 4.828211093424758, "grad_norm": 3.2863073348999023, "learning_rate": 7.415731963880077e-06, "loss": 0.3432, "step": 29577 }, { "epoch": 4.828374352067263, "grad_norm": 3.6931653022766113, "learning_rate": 7.4151126068663594e-06, "loss": 0.3768, "step": 29578 }, { "epoch": 4.828537610709767, "grad_norm": 3.254578113555908, "learning_rate": 7.414493260478136e-06, "loss": 0.3458, "step": 29579 }, { "epoch": 4.8287008693522715, "grad_norm": 2.9768497943878174, "learning_rate": 7.413873924717957e-06, "loss": 0.3406, "step": 29580 }, { "epoch": 4.828864127994776, "grad_norm": 3.251450538635254, "learning_rate": 7.413254599588364e-06, "loss": 0.3414, "step": 29581 }, { "epoch": 4.82902738663728, "grad_norm": 3.702644109725952, "learning_rate": 7.412635285091907e-06, "loss": 0.3528, "step": 29582 }, { "epoch": 4.829190645279785, "grad_norm": 2.7856593132019043, "learning_rate": 7.412015981231129e-06, "loss": 0.3124, "step": 29583 }, { "epoch": 4.829353903922289, "grad_norm": 3.4450128078460693, "learning_rate": 7.411396688008574e-06, "loss": 0.3867, "step": 29584 }, { "epoch": 4.829517162564793, "grad_norm": 3.7142860889434814, "learning_rate": 7.410777405426795e-06, "loss": 0.395, "step": 29585 }, { "epoch": 4.829680421207298, "grad_norm": 3.584629774093628, "learning_rate": 7.4101581334883275e-06, "loss": 0.3743, "step": 29586 }, { "epoch": 4.829843679849802, "grad_norm": 3.2791287899017334, "learning_rate": 7.4095388721957226e-06, "loss": 0.3779, "step": 29587 }, { "epoch": 4.830006938492306, "grad_norm": 3.1461894512176514, "learning_rate": 7.408919621551527e-06, "loss": 0.3372, "step": 29588 }, { "epoch": 4.8301701971348106, "grad_norm": 4.235727310180664, "learning_rate": 7.408300381558282e-06, "loss": 0.4061, "step": 29589 }, { "epoch": 4.830333455777315, "grad_norm": 4.028310298919678, "learning_rate": 7.407681152218536e-06, "loss": 0.4416, "step": 29590 }, { "epoch": 4.830496714419819, "grad_norm": 3.5259273052215576, "learning_rate": 7.407061933534834e-06, "loss": 0.4066, "step": 29591 }, { "epoch": 4.830659973062324, "grad_norm": 4.440655708312988, "learning_rate": 7.406442725509721e-06, "loss": 0.4494, "step": 29592 }, { "epoch": 4.830823231704828, "grad_norm": 3.488379716873169, "learning_rate": 7.405823528145744e-06, "loss": 0.3946, "step": 29593 }, { "epoch": 4.830986490347333, "grad_norm": 3.1168904304504395, "learning_rate": 7.405204341445446e-06, "loss": 0.3599, "step": 29594 }, { "epoch": 4.831149748989837, "grad_norm": 4.154200553894043, "learning_rate": 7.4045851654113765e-06, "loss": 0.5058, "step": 29595 }, { "epoch": 4.831313007632342, "grad_norm": 3.6885697841644287, "learning_rate": 7.4039660000460744e-06, "loss": 0.4051, "step": 29596 }, { "epoch": 4.831476266274846, "grad_norm": 3.808443546295166, "learning_rate": 7.403346845352088e-06, "loss": 0.4165, "step": 29597 }, { "epoch": 4.8316395249173505, "grad_norm": 3.982304811477661, "learning_rate": 7.402727701331962e-06, "loss": 0.3639, "step": 29598 }, { "epoch": 4.831802783559855, "grad_norm": 2.8742005825042725, "learning_rate": 7.402108567988242e-06, "loss": 0.329, "step": 29599 }, { "epoch": 4.831966042202359, "grad_norm": 3.1846935749053955, "learning_rate": 7.401489445323473e-06, "loss": 0.3193, "step": 29600 }, { "epoch": 4.832129300844864, "grad_norm": 3.8247885704040527, "learning_rate": 7.4008703333402e-06, "loss": 0.4273, "step": 29601 }, { "epoch": 4.832292559487368, "grad_norm": 2.9021284580230713, "learning_rate": 7.4002512320409644e-06, "loss": 0.3366, "step": 29602 }, { "epoch": 4.832455818129873, "grad_norm": 3.8419456481933594, "learning_rate": 7.3996321414283165e-06, "loss": 0.3434, "step": 29603 }, { "epoch": 4.832619076772376, "grad_norm": 2.8774099349975586, "learning_rate": 7.399013061504799e-06, "loss": 0.3347, "step": 29604 }, { "epoch": 4.832782335414881, "grad_norm": 3.1355791091918945, "learning_rate": 7.398393992272962e-06, "loss": 0.3837, "step": 29605 }, { "epoch": 4.832945594057385, "grad_norm": 3.854004144668579, "learning_rate": 7.397774933735337e-06, "loss": 0.3536, "step": 29606 }, { "epoch": 4.8331088526998895, "grad_norm": 3.6071624755859375, "learning_rate": 7.39715588589448e-06, "loss": 0.3867, "step": 29607 }, { "epoch": 4.833272111342394, "grad_norm": 3.0223114490509033, "learning_rate": 7.396536848752934e-06, "loss": 0.3367, "step": 29608 }, { "epoch": 4.833435369984898, "grad_norm": 3.9227676391601562, "learning_rate": 7.395917822313239e-06, "loss": 0.3969, "step": 29609 }, { "epoch": 4.833598628627403, "grad_norm": 3.3520166873931885, "learning_rate": 7.395298806577944e-06, "loss": 0.3571, "step": 29610 }, { "epoch": 4.833761887269907, "grad_norm": 3.205556631088257, "learning_rate": 7.394679801549591e-06, "loss": 0.3532, "step": 29611 }, { "epoch": 4.833925145912412, "grad_norm": 3.169522285461426, "learning_rate": 7.3940608072307266e-06, "loss": 0.3248, "step": 29612 }, { "epoch": 4.834088404554916, "grad_norm": 4.402405738830566, "learning_rate": 7.3934418236238914e-06, "loss": 0.4946, "step": 29613 }, { "epoch": 4.834251663197421, "grad_norm": 3.538395643234253, "learning_rate": 7.392822850731636e-06, "loss": 0.3477, "step": 29614 }, { "epoch": 4.834414921839925, "grad_norm": 3.6335995197296143, "learning_rate": 7.392203888556504e-06, "loss": 0.3823, "step": 29615 }, { "epoch": 4.8345781804824295, "grad_norm": 3.3621037006378174, "learning_rate": 7.391584937101034e-06, "loss": 0.38, "step": 29616 }, { "epoch": 4.834741439124934, "grad_norm": 3.327199935913086, "learning_rate": 7.3909659963677715e-06, "loss": 0.3303, "step": 29617 }, { "epoch": 4.834904697767438, "grad_norm": 3.291095018386841, "learning_rate": 7.3903470663592636e-06, "loss": 0.3604, "step": 29618 }, { "epoch": 4.835067956409943, "grad_norm": 3.737287759780884, "learning_rate": 7.389728147078054e-06, "loss": 0.3694, "step": 29619 }, { "epoch": 4.835231215052447, "grad_norm": 3.377504587173462, "learning_rate": 7.389109238526686e-06, "loss": 0.3769, "step": 29620 }, { "epoch": 4.835394473694951, "grad_norm": 3.60007381439209, "learning_rate": 7.388490340707706e-06, "loss": 0.4191, "step": 29621 }, { "epoch": 4.835557732337455, "grad_norm": 3.2630863189697266, "learning_rate": 7.387871453623654e-06, "loss": 0.3456, "step": 29622 }, { "epoch": 4.83572099097996, "grad_norm": 3.363746404647827, "learning_rate": 7.387252577277078e-06, "loss": 0.373, "step": 29623 }, { "epoch": 4.835884249622464, "grad_norm": 3.9958956241607666, "learning_rate": 7.386633711670515e-06, "loss": 0.3756, "step": 29624 }, { "epoch": 4.8360475082649685, "grad_norm": 3.4026553630828857, "learning_rate": 7.3860148568065225e-06, "loss": 0.3619, "step": 29625 }, { "epoch": 4.836210766907473, "grad_norm": 4.159123420715332, "learning_rate": 7.3853960126876315e-06, "loss": 0.4189, "step": 29626 }, { "epoch": 4.836374025549977, "grad_norm": 3.5157718658447266, "learning_rate": 7.38477717931639e-06, "loss": 0.3883, "step": 29627 }, { "epoch": 4.836537284192482, "grad_norm": 3.5935754776000977, "learning_rate": 7.3841583566953426e-06, "loss": 0.3924, "step": 29628 }, { "epoch": 4.836700542834986, "grad_norm": 3.177306890487671, "learning_rate": 7.383539544827029e-06, "loss": 0.3826, "step": 29629 }, { "epoch": 4.836863801477491, "grad_norm": 3.191213607788086, "learning_rate": 7.382920743713999e-06, "loss": 0.3593, "step": 29630 }, { "epoch": 4.837027060119995, "grad_norm": 3.9825685024261475, "learning_rate": 7.382301953358795e-06, "loss": 0.3937, "step": 29631 }, { "epoch": 4.8371903187625, "grad_norm": 3.2029662132263184, "learning_rate": 7.381683173763958e-06, "loss": 0.3374, "step": 29632 }, { "epoch": 4.837353577405004, "grad_norm": 3.7017104625701904, "learning_rate": 7.381064404932032e-06, "loss": 0.3966, "step": 29633 }, { "epoch": 4.8375168360475085, "grad_norm": 3.39802622795105, "learning_rate": 7.380445646865566e-06, "loss": 0.36, "step": 29634 }, { "epoch": 4.837680094690013, "grad_norm": 3.478330135345459, "learning_rate": 7.379826899567095e-06, "loss": 0.4197, "step": 29635 }, { "epoch": 4.837843353332517, "grad_norm": 3.9345455169677734, "learning_rate": 7.3792081630391665e-06, "loss": 0.3852, "step": 29636 }, { "epoch": 4.838006611975022, "grad_norm": 3.4124701023101807, "learning_rate": 7.378589437284322e-06, "loss": 0.3272, "step": 29637 }, { "epoch": 4.838169870617525, "grad_norm": 3.146091938018799, "learning_rate": 7.37797072230511e-06, "loss": 0.3368, "step": 29638 }, { "epoch": 4.838333129260031, "grad_norm": 2.962772846221924, "learning_rate": 7.3773520181040666e-06, "loss": 0.2903, "step": 29639 }, { "epoch": 4.838496387902534, "grad_norm": 4.279799938201904, "learning_rate": 7.3767333246837405e-06, "loss": 0.4617, "step": 29640 }, { "epoch": 4.838659646545039, "grad_norm": 4.142765045166016, "learning_rate": 7.376114642046672e-06, "loss": 0.4063, "step": 29641 }, { "epoch": 4.838822905187543, "grad_norm": 3.6428380012512207, "learning_rate": 7.3754959701954075e-06, "loss": 0.3707, "step": 29642 }, { "epoch": 4.8389861638300475, "grad_norm": 3.705382823944092, "learning_rate": 7.374877309132486e-06, "loss": 0.4268, "step": 29643 }, { "epoch": 4.839149422472552, "grad_norm": 4.918726444244385, "learning_rate": 7.3742586588604555e-06, "loss": 0.4369, "step": 29644 }, { "epoch": 4.839312681115056, "grad_norm": 4.00892972946167, "learning_rate": 7.373640019381856e-06, "loss": 0.3629, "step": 29645 }, { "epoch": 4.839475939757561, "grad_norm": 4.290890216827393, "learning_rate": 7.3730213906992285e-06, "loss": 0.3885, "step": 29646 }, { "epoch": 4.839639198400065, "grad_norm": 3.548816680908203, "learning_rate": 7.3724027728151195e-06, "loss": 0.4004, "step": 29647 }, { "epoch": 4.83980245704257, "grad_norm": 3.4300460815429688, "learning_rate": 7.37178416573207e-06, "loss": 0.3497, "step": 29648 }, { "epoch": 4.839965715685074, "grad_norm": 4.107937812805176, "learning_rate": 7.371165569452623e-06, "loss": 0.3771, "step": 29649 }, { "epoch": 4.840128974327579, "grad_norm": 2.8454880714416504, "learning_rate": 7.370546983979322e-06, "loss": 0.3187, "step": 29650 }, { "epoch": 4.840292232970083, "grad_norm": 3.708611488342285, "learning_rate": 7.369928409314709e-06, "loss": 0.3506, "step": 29651 }, { "epoch": 4.8404554916125875, "grad_norm": 3.743027925491333, "learning_rate": 7.369309845461325e-06, "loss": 0.3804, "step": 29652 }, { "epoch": 4.840618750255092, "grad_norm": 3.769892930984497, "learning_rate": 7.3686912924217195e-06, "loss": 0.3847, "step": 29653 }, { "epoch": 4.840782008897596, "grad_norm": 3.5905420780181885, "learning_rate": 7.368072750198431e-06, "loss": 0.3626, "step": 29654 }, { "epoch": 4.8409452675401, "grad_norm": 3.5318796634674072, "learning_rate": 7.367454218794001e-06, "loss": 0.3695, "step": 29655 }, { "epoch": 4.841108526182605, "grad_norm": 2.8566665649414062, "learning_rate": 7.366835698210968e-06, "loss": 0.3375, "step": 29656 }, { "epoch": 4.841271784825109, "grad_norm": 3.6025609970092773, "learning_rate": 7.366217188451883e-06, "loss": 0.3794, "step": 29657 }, { "epoch": 4.841435043467613, "grad_norm": 3.7334189414978027, "learning_rate": 7.365598689519283e-06, "loss": 0.3458, "step": 29658 }, { "epoch": 4.841598302110118, "grad_norm": 2.9239203929901123, "learning_rate": 7.364980201415712e-06, "loss": 0.3245, "step": 29659 }, { "epoch": 4.841761560752622, "grad_norm": 3.041689872741699, "learning_rate": 7.364361724143713e-06, "loss": 0.3064, "step": 29660 }, { "epoch": 4.8419248193951265, "grad_norm": 3.450378656387329, "learning_rate": 7.363743257705829e-06, "loss": 0.3694, "step": 29661 }, { "epoch": 4.842088078037631, "grad_norm": 3.366428852081299, "learning_rate": 7.3631248021046e-06, "loss": 0.3398, "step": 29662 }, { "epoch": 4.842251336680135, "grad_norm": 4.241623878479004, "learning_rate": 7.362506357342567e-06, "loss": 0.4006, "step": 29663 }, { "epoch": 4.84241459532264, "grad_norm": 3.4730961322784424, "learning_rate": 7.361887923422282e-06, "loss": 0.3491, "step": 29664 }, { "epoch": 4.842577853965144, "grad_norm": 3.4719457626342773, "learning_rate": 7.361269500346274e-06, "loss": 0.3737, "step": 29665 }, { "epoch": 4.842741112607649, "grad_norm": 4.625782489776611, "learning_rate": 7.360651088117091e-06, "loss": 0.3753, "step": 29666 }, { "epoch": 4.842904371250153, "grad_norm": 3.3761520385742188, "learning_rate": 7.360032686737273e-06, "loss": 0.3444, "step": 29667 }, { "epoch": 4.843067629892658, "grad_norm": 3.6498405933380127, "learning_rate": 7.359414296209367e-06, "loss": 0.3631, "step": 29668 }, { "epoch": 4.843230888535162, "grad_norm": 3.0234177112579346, "learning_rate": 7.358795916535909e-06, "loss": 0.367, "step": 29669 }, { "epoch": 4.8433941471776665, "grad_norm": 3.5725605487823486, "learning_rate": 7.358177547719445e-06, "loss": 0.3589, "step": 29670 }, { "epoch": 4.843557405820171, "grad_norm": 3.9730398654937744, "learning_rate": 7.357559189762516e-06, "loss": 0.4084, "step": 29671 }, { "epoch": 4.843720664462675, "grad_norm": 3.2016971111297607, "learning_rate": 7.356940842667664e-06, "loss": 0.3412, "step": 29672 }, { "epoch": 4.84388392310518, "grad_norm": 3.8596465587615967, "learning_rate": 7.356322506437428e-06, "loss": 0.3378, "step": 29673 }, { "epoch": 4.844047181747683, "grad_norm": 4.744657039642334, "learning_rate": 7.3557041810743555e-06, "loss": 0.4572, "step": 29674 }, { "epoch": 4.844210440390188, "grad_norm": 3.8512942790985107, "learning_rate": 7.355085866580982e-06, "loss": 0.4063, "step": 29675 }, { "epoch": 4.844373699032692, "grad_norm": 3.394524335861206, "learning_rate": 7.354467562959853e-06, "loss": 0.3361, "step": 29676 }, { "epoch": 4.844536957675197, "grad_norm": 3.988314390182495, "learning_rate": 7.353849270213508e-06, "loss": 0.3655, "step": 29677 }, { "epoch": 4.844700216317701, "grad_norm": 3.355560779571533, "learning_rate": 7.353230988344486e-06, "loss": 0.3375, "step": 29678 }, { "epoch": 4.8448634749602055, "grad_norm": 3.47572660446167, "learning_rate": 7.352612717355336e-06, "loss": 0.3508, "step": 29679 }, { "epoch": 4.84502673360271, "grad_norm": 3.4413750171661377, "learning_rate": 7.3519944572485955e-06, "loss": 0.3629, "step": 29680 }, { "epoch": 4.845189992245214, "grad_norm": 2.7877461910247803, "learning_rate": 7.351376208026806e-06, "loss": 0.3407, "step": 29681 }, { "epoch": 4.845353250887719, "grad_norm": 3.4297308921813965, "learning_rate": 7.350757969692508e-06, "loss": 0.3529, "step": 29682 }, { "epoch": 4.845516509530223, "grad_norm": 3.9961109161376953, "learning_rate": 7.3501397422482456e-06, "loss": 0.4063, "step": 29683 }, { "epoch": 4.845679768172728, "grad_norm": 3.297915458679199, "learning_rate": 7.349521525696557e-06, "loss": 0.3837, "step": 29684 }, { "epoch": 4.845843026815232, "grad_norm": 3.883225202560425, "learning_rate": 7.348903320039984e-06, "loss": 0.3773, "step": 29685 }, { "epoch": 4.846006285457737, "grad_norm": 3.9910988807678223, "learning_rate": 7.348285125281068e-06, "loss": 0.4164, "step": 29686 }, { "epoch": 4.846169544100241, "grad_norm": 3.4865877628326416, "learning_rate": 7.347666941422351e-06, "loss": 0.3216, "step": 29687 }, { "epoch": 4.8463328027427455, "grad_norm": 3.1413307189941406, "learning_rate": 7.347048768466374e-06, "loss": 0.3236, "step": 29688 }, { "epoch": 4.84649606138525, "grad_norm": 3.5433011054992676, "learning_rate": 7.346430606415678e-06, "loss": 0.4098, "step": 29689 }, { "epoch": 4.846659320027754, "grad_norm": 3.978792428970337, "learning_rate": 7.3458124552728005e-06, "loss": 0.3605, "step": 29690 }, { "epoch": 4.846822578670258, "grad_norm": 2.8752152919769287, "learning_rate": 7.345194315040289e-06, "loss": 0.3515, "step": 29691 }, { "epoch": 4.846985837312763, "grad_norm": 3.8693654537200928, "learning_rate": 7.34457618572068e-06, "loss": 0.3875, "step": 29692 }, { "epoch": 4.847149095955267, "grad_norm": 3.7837514877319336, "learning_rate": 7.343958067316521e-06, "loss": 0.4017, "step": 29693 }, { "epoch": 4.847312354597771, "grad_norm": 3.1808290481567383, "learning_rate": 7.343339959830342e-06, "loss": 0.3439, "step": 29694 }, { "epoch": 4.847475613240276, "grad_norm": 3.567915916442871, "learning_rate": 7.34272186326469e-06, "loss": 0.4208, "step": 29695 }, { "epoch": 4.84763887188278, "grad_norm": 4.041076183319092, "learning_rate": 7.342103777622105e-06, "loss": 0.3901, "step": 29696 }, { "epoch": 4.8478021305252845, "grad_norm": 3.720027208328247, "learning_rate": 7.3414857029051275e-06, "loss": 0.3985, "step": 29697 }, { "epoch": 4.847965389167789, "grad_norm": 3.397458791732788, "learning_rate": 7.340867639116299e-06, "loss": 0.3299, "step": 29698 }, { "epoch": 4.848128647810293, "grad_norm": 4.181921482086182, "learning_rate": 7.34024958625816e-06, "loss": 0.4411, "step": 29699 }, { "epoch": 4.848291906452798, "grad_norm": 4.235876560211182, "learning_rate": 7.33963154433325e-06, "loss": 0.3891, "step": 29700 }, { "epoch": 4.848455165095302, "grad_norm": 3.820561170578003, "learning_rate": 7.3390135133441086e-06, "loss": 0.4245, "step": 29701 }, { "epoch": 4.848618423737807, "grad_norm": 4.192480087280273, "learning_rate": 7.33839549329328e-06, "loss": 0.4381, "step": 29702 }, { "epoch": 4.848781682380311, "grad_norm": 4.054840564727783, "learning_rate": 7.337777484183306e-06, "loss": 0.4032, "step": 29703 }, { "epoch": 4.848944941022816, "grad_norm": 4.108748435974121, "learning_rate": 7.337159486016721e-06, "loss": 0.4191, "step": 29704 }, { "epoch": 4.84910819966532, "grad_norm": 3.8236207962036133, "learning_rate": 7.336541498796064e-06, "loss": 0.4898, "step": 29705 }, { "epoch": 4.8492714583078245, "grad_norm": 3.6954691410064697, "learning_rate": 7.335923522523881e-06, "loss": 0.3382, "step": 29706 }, { "epoch": 4.849434716950329, "grad_norm": 3.366212844848633, "learning_rate": 7.335305557202712e-06, "loss": 0.407, "step": 29707 }, { "epoch": 4.8495979755928325, "grad_norm": 3.773056745529175, "learning_rate": 7.334687602835094e-06, "loss": 0.3828, "step": 29708 }, { "epoch": 4.849761234235338, "grad_norm": 3.7644505500793457, "learning_rate": 7.334069659423571e-06, "loss": 0.3608, "step": 29709 }, { "epoch": 4.849924492877841, "grad_norm": 3.4729602336883545, "learning_rate": 7.333451726970679e-06, "loss": 0.3505, "step": 29710 }, { "epoch": 4.850087751520346, "grad_norm": 4.047185897827148, "learning_rate": 7.332833805478961e-06, "loss": 0.4063, "step": 29711 }, { "epoch": 4.85025101016285, "grad_norm": 3.2249906063079834, "learning_rate": 7.3322158949509556e-06, "loss": 0.3205, "step": 29712 }, { "epoch": 4.850414268805355, "grad_norm": 4.817679405212402, "learning_rate": 7.3315979953892056e-06, "loss": 0.4803, "step": 29713 }, { "epoch": 4.850577527447859, "grad_norm": 3.8862507343292236, "learning_rate": 7.330980106796247e-06, "loss": 0.4153, "step": 29714 }, { "epoch": 4.8507407860903635, "grad_norm": 3.7838497161865234, "learning_rate": 7.330362229174621e-06, "loss": 0.3862, "step": 29715 }, { "epoch": 4.850904044732868, "grad_norm": 3.5334744453430176, "learning_rate": 7.329744362526868e-06, "loss": 0.3832, "step": 29716 }, { "epoch": 4.851067303375372, "grad_norm": 3.1785926818847656, "learning_rate": 7.329126506855526e-06, "loss": 0.3322, "step": 29717 }, { "epoch": 4.851230562017877, "grad_norm": 4.230566501617432, "learning_rate": 7.328508662163138e-06, "loss": 0.4554, "step": 29718 }, { "epoch": 4.851393820660381, "grad_norm": 4.3687262535095215, "learning_rate": 7.327890828452242e-06, "loss": 0.4401, "step": 29719 }, { "epoch": 4.851557079302886, "grad_norm": 4.1654181480407715, "learning_rate": 7.327273005725378e-06, "loss": 0.4033, "step": 29720 }, { "epoch": 4.85172033794539, "grad_norm": 4.5615949630737305, "learning_rate": 7.326655193985086e-06, "loss": 0.4318, "step": 29721 }, { "epoch": 4.851883596587895, "grad_norm": 3.1860523223876953, "learning_rate": 7.326037393233907e-06, "loss": 0.4066, "step": 29722 }, { "epoch": 4.852046855230399, "grad_norm": 3.2985129356384277, "learning_rate": 7.325419603474375e-06, "loss": 0.3701, "step": 29723 }, { "epoch": 4.8522101138729035, "grad_norm": 3.7038183212280273, "learning_rate": 7.324801824709035e-06, "loss": 0.387, "step": 29724 }, { "epoch": 4.852373372515408, "grad_norm": 3.4086124897003174, "learning_rate": 7.324184056940424e-06, "loss": 0.3802, "step": 29725 }, { "epoch": 4.852536631157912, "grad_norm": 3.1886212825775146, "learning_rate": 7.323566300171081e-06, "loss": 0.3367, "step": 29726 }, { "epoch": 4.852699889800416, "grad_norm": 2.6603500843048096, "learning_rate": 7.322948554403548e-06, "loss": 0.2922, "step": 29727 }, { "epoch": 4.85286314844292, "grad_norm": 3.820683479309082, "learning_rate": 7.32233081964036e-06, "loss": 0.3878, "step": 29728 }, { "epoch": 4.853026407085425, "grad_norm": 3.565082550048828, "learning_rate": 7.32171309588406e-06, "loss": 0.3682, "step": 29729 }, { "epoch": 4.853189665727929, "grad_norm": 3.8284125328063965, "learning_rate": 7.321095383137187e-06, "loss": 0.3998, "step": 29730 }, { "epoch": 4.853352924370434, "grad_norm": 4.007420063018799, "learning_rate": 7.320477681402279e-06, "loss": 0.4458, "step": 29731 }, { "epoch": 4.853516183012938, "grad_norm": 3.576066493988037, "learning_rate": 7.3198599906818775e-06, "loss": 0.3841, "step": 29732 }, { "epoch": 4.8536794416554425, "grad_norm": 3.967170476913452, "learning_rate": 7.319242310978518e-06, "loss": 0.4075, "step": 29733 }, { "epoch": 4.853842700297947, "grad_norm": 3.3081986904144287, "learning_rate": 7.318624642294742e-06, "loss": 0.3457, "step": 29734 }, { "epoch": 4.854005958940451, "grad_norm": 3.782921075820923, "learning_rate": 7.318006984633086e-06, "loss": 0.3697, "step": 29735 }, { "epoch": 4.854169217582956, "grad_norm": 4.566099643707275, "learning_rate": 7.317389337996091e-06, "loss": 0.398, "step": 29736 }, { "epoch": 4.85433247622546, "grad_norm": 3.263366460800171, "learning_rate": 7.316771702386296e-06, "loss": 0.3515, "step": 29737 }, { "epoch": 4.854495734867965, "grad_norm": 2.6663894653320312, "learning_rate": 7.316154077806239e-06, "loss": 0.3065, "step": 29738 }, { "epoch": 4.854658993510469, "grad_norm": 3.227479934692383, "learning_rate": 7.315536464258459e-06, "loss": 0.3155, "step": 29739 }, { "epoch": 4.854822252152974, "grad_norm": 3.7283289432525635, "learning_rate": 7.314918861745492e-06, "loss": 0.3943, "step": 29740 }, { "epoch": 4.854985510795478, "grad_norm": 3.6767303943634033, "learning_rate": 7.3143012702698835e-06, "loss": 0.4104, "step": 29741 }, { "epoch": 4.8551487694379825, "grad_norm": 4.164511680603027, "learning_rate": 7.313683689834172e-06, "loss": 0.3864, "step": 29742 }, { "epoch": 4.855312028080487, "grad_norm": 3.8758630752563477, "learning_rate": 7.3130661204408885e-06, "loss": 0.4033, "step": 29743 }, { "epoch": 4.8554752867229904, "grad_norm": 3.338843584060669, "learning_rate": 7.312448562092573e-06, "loss": 0.3569, "step": 29744 }, { "epoch": 4.855638545365496, "grad_norm": 3.9360110759735107, "learning_rate": 7.311831014791769e-06, "loss": 0.3264, "step": 29745 }, { "epoch": 4.855801804007999, "grad_norm": 3.5327560901641846, "learning_rate": 7.311213478541013e-06, "loss": 0.4058, "step": 29746 }, { "epoch": 4.855965062650504, "grad_norm": 3.763646364212036, "learning_rate": 7.3105959533428425e-06, "loss": 0.384, "step": 29747 }, { "epoch": 4.856128321293008, "grad_norm": 4.3358154296875, "learning_rate": 7.309978439199796e-06, "loss": 0.3947, "step": 29748 }, { "epoch": 4.856291579935513, "grad_norm": 3.8821234703063965, "learning_rate": 7.309360936114413e-06, "loss": 0.3927, "step": 29749 }, { "epoch": 4.856454838578017, "grad_norm": 4.3911967277526855, "learning_rate": 7.308743444089232e-06, "loss": 0.4534, "step": 29750 }, { "epoch": 4.8566180972205215, "grad_norm": 3.398116111755371, "learning_rate": 7.308125963126789e-06, "loss": 0.3647, "step": 29751 }, { "epoch": 4.856781355863026, "grad_norm": 3.948606014251709, "learning_rate": 7.307508493229628e-06, "loss": 0.4436, "step": 29752 }, { "epoch": 4.85694461450553, "grad_norm": 3.1144919395446777, "learning_rate": 7.3068910344002796e-06, "loss": 0.3254, "step": 29753 }, { "epoch": 4.857107873148035, "grad_norm": 4.005618095397949, "learning_rate": 7.306273586641286e-06, "loss": 0.3982, "step": 29754 }, { "epoch": 4.857271131790539, "grad_norm": 3.8424746990203857, "learning_rate": 7.305656149955181e-06, "loss": 0.4129, "step": 29755 }, { "epoch": 4.857434390433044, "grad_norm": 4.289190769195557, "learning_rate": 7.3050387243445084e-06, "loss": 0.3817, "step": 29756 }, { "epoch": 4.857597649075548, "grad_norm": 3.8339083194732666, "learning_rate": 7.304421309811804e-06, "loss": 0.3686, "step": 29757 }, { "epoch": 4.857760907718053, "grad_norm": 3.3356473445892334, "learning_rate": 7.303803906359608e-06, "loss": 0.4216, "step": 29758 }, { "epoch": 4.857924166360557, "grad_norm": 4.762524604797363, "learning_rate": 7.303186513990455e-06, "loss": 0.332, "step": 29759 }, { "epoch": 4.8580874250030615, "grad_norm": 3.8092269897460938, "learning_rate": 7.3025691327068825e-06, "loss": 0.3488, "step": 29760 }, { "epoch": 4.858250683645565, "grad_norm": 3.7683236598968506, "learning_rate": 7.30195176251143e-06, "loss": 0.403, "step": 29761 }, { "epoch": 4.85841394228807, "grad_norm": 3.770043134689331, "learning_rate": 7.301334403406638e-06, "loss": 0.3592, "step": 29762 }, { "epoch": 4.858577200930574, "grad_norm": 4.419088363647461, "learning_rate": 7.300717055395039e-06, "loss": 0.437, "step": 29763 }, { "epoch": 4.858740459573078, "grad_norm": 3.4822824001312256, "learning_rate": 7.300099718479173e-06, "loss": 0.389, "step": 29764 }, { "epoch": 4.858903718215583, "grad_norm": 3.854478120803833, "learning_rate": 7.2994823926615774e-06, "loss": 0.3606, "step": 29765 }, { "epoch": 4.859066976858087, "grad_norm": 3.043537139892578, "learning_rate": 7.29886507794479e-06, "loss": 0.3284, "step": 29766 }, { "epoch": 4.859230235500592, "grad_norm": 3.8527872562408447, "learning_rate": 7.298247774331346e-06, "loss": 0.3927, "step": 29767 }, { "epoch": 4.859393494143096, "grad_norm": 3.4020509719848633, "learning_rate": 7.297630481823787e-06, "loss": 0.3383, "step": 29768 }, { "epoch": 4.8595567527856005, "grad_norm": 4.562254428863525, "learning_rate": 7.29701320042465e-06, "loss": 0.5135, "step": 29769 }, { "epoch": 4.859720011428105, "grad_norm": 3.6025242805480957, "learning_rate": 7.296395930136471e-06, "loss": 0.387, "step": 29770 }, { "epoch": 4.859883270070609, "grad_norm": 3.4636316299438477, "learning_rate": 7.2957786709617886e-06, "loss": 0.3553, "step": 29771 }, { "epoch": 4.860046528713114, "grad_norm": 3.8209421634674072, "learning_rate": 7.295161422903138e-06, "loss": 0.3981, "step": 29772 }, { "epoch": 4.860209787355618, "grad_norm": 4.0825605392456055, "learning_rate": 7.294544185963056e-06, "loss": 0.3821, "step": 29773 }, { "epoch": 4.860373045998123, "grad_norm": 3.519115447998047, "learning_rate": 7.293926960144082e-06, "loss": 0.4147, "step": 29774 }, { "epoch": 4.860536304640627, "grad_norm": 3.696671724319458, "learning_rate": 7.293309745448753e-06, "loss": 0.3976, "step": 29775 }, { "epoch": 4.860699563283132, "grad_norm": 3.553455114364624, "learning_rate": 7.292692541879604e-06, "loss": 0.3681, "step": 29776 }, { "epoch": 4.860862821925636, "grad_norm": 3.525719404220581, "learning_rate": 7.292075349439176e-06, "loss": 0.4136, "step": 29777 }, { "epoch": 4.8610260805681405, "grad_norm": 3.7167983055114746, "learning_rate": 7.29145816813e-06, "loss": 0.3254, "step": 29778 }, { "epoch": 4.861189339210645, "grad_norm": 3.8116941452026367, "learning_rate": 7.29084099795462e-06, "loss": 0.402, "step": 29779 }, { "epoch": 4.861352597853148, "grad_norm": 3.90146541595459, "learning_rate": 7.2902238389155685e-06, "loss": 0.375, "step": 29780 }, { "epoch": 4.861515856495653, "grad_norm": 3.698563575744629, "learning_rate": 7.289606691015388e-06, "loss": 0.3506, "step": 29781 }, { "epoch": 4.861679115138157, "grad_norm": 2.595548152923584, "learning_rate": 7.288989554256604e-06, "loss": 0.3168, "step": 29782 }, { "epoch": 4.861842373780662, "grad_norm": 4.300702095031738, "learning_rate": 7.2883724286417646e-06, "loss": 0.3852, "step": 29783 }, { "epoch": 4.862005632423166, "grad_norm": 3.515442132949829, "learning_rate": 7.2877553141734014e-06, "loss": 0.3544, "step": 29784 }, { "epoch": 4.862168891065671, "grad_norm": 3.57181453704834, "learning_rate": 7.287138210854051e-06, "loss": 0.3862, "step": 29785 }, { "epoch": 4.862332149708175, "grad_norm": 3.146996021270752, "learning_rate": 7.286521118686252e-06, "loss": 0.3005, "step": 29786 }, { "epoch": 4.8624954083506795, "grad_norm": 3.4402031898498535, "learning_rate": 7.28590403767254e-06, "loss": 0.3666, "step": 29787 }, { "epoch": 4.862658666993184, "grad_norm": 3.179879665374756, "learning_rate": 7.285286967815451e-06, "loss": 0.2941, "step": 29788 }, { "epoch": 4.862821925635688, "grad_norm": 3.1674604415893555, "learning_rate": 7.284669909117521e-06, "loss": 0.3369, "step": 29789 }, { "epoch": 4.862985184278193, "grad_norm": 2.8591957092285156, "learning_rate": 7.2840528615812885e-06, "loss": 0.2981, "step": 29790 }, { "epoch": 4.863148442920697, "grad_norm": 3.9693491458892822, "learning_rate": 7.283435825209295e-06, "loss": 0.45, "step": 29791 }, { "epoch": 4.863311701563202, "grad_norm": 2.7392659187316895, "learning_rate": 7.282818800004065e-06, "loss": 0.3964, "step": 29792 }, { "epoch": 4.863474960205706, "grad_norm": 3.026963710784912, "learning_rate": 7.28220178596814e-06, "loss": 0.3536, "step": 29793 }, { "epoch": 4.863638218848211, "grad_norm": 2.8726630210876465, "learning_rate": 7.28158478310406e-06, "loss": 0.3359, "step": 29794 }, { "epoch": 4.863801477490715, "grad_norm": 3.6519935131073, "learning_rate": 7.2809677914143575e-06, "loss": 0.3335, "step": 29795 }, { "epoch": 4.8639647361332194, "grad_norm": 3.6238934993743896, "learning_rate": 7.28035081090157e-06, "loss": 0.3812, "step": 29796 }, { "epoch": 4.864127994775723, "grad_norm": 3.554990768432617, "learning_rate": 7.279733841568233e-06, "loss": 0.3361, "step": 29797 }, { "epoch": 4.864291253418228, "grad_norm": 3.4212121963500977, "learning_rate": 7.279116883416883e-06, "loss": 0.3616, "step": 29798 }, { "epoch": 4.864454512060732, "grad_norm": 3.3372135162353516, "learning_rate": 7.278499936450056e-06, "loss": 0.3814, "step": 29799 }, { "epoch": 4.864617770703236, "grad_norm": 4.129712104797363, "learning_rate": 7.27788300067029e-06, "loss": 0.3332, "step": 29800 }, { "epoch": 4.864781029345741, "grad_norm": 3.4013116359710693, "learning_rate": 7.277266076080119e-06, "loss": 0.3769, "step": 29801 }, { "epoch": 4.864944287988245, "grad_norm": 2.9180736541748047, "learning_rate": 7.276649162682078e-06, "loss": 0.2912, "step": 29802 }, { "epoch": 4.86510754663075, "grad_norm": 3.0004661083221436, "learning_rate": 7.276032260478704e-06, "loss": 0.3235, "step": 29803 }, { "epoch": 4.865270805273254, "grad_norm": 3.3707611560821533, "learning_rate": 7.2754153694725326e-06, "loss": 0.3343, "step": 29804 }, { "epoch": 4.8654340639157585, "grad_norm": 4.012511730194092, "learning_rate": 7.2747984896660975e-06, "loss": 0.3356, "step": 29805 }, { "epoch": 4.865597322558263, "grad_norm": 3.316131591796875, "learning_rate": 7.27418162106194e-06, "loss": 0.3524, "step": 29806 }, { "epoch": 4.865760581200767, "grad_norm": 4.613731384277344, "learning_rate": 7.273564763662591e-06, "loss": 0.4013, "step": 29807 }, { "epoch": 4.865923839843272, "grad_norm": 3.3140108585357666, "learning_rate": 7.272947917470589e-06, "loss": 0.3404, "step": 29808 }, { "epoch": 4.866087098485776, "grad_norm": 4.216836929321289, "learning_rate": 7.2723310824884675e-06, "loss": 0.4212, "step": 29809 }, { "epoch": 4.866250357128281, "grad_norm": 3.1739723682403564, "learning_rate": 7.2717142587187625e-06, "loss": 0.3934, "step": 29810 }, { "epoch": 4.866413615770785, "grad_norm": 3.685544013977051, "learning_rate": 7.2710974461640135e-06, "loss": 0.3798, "step": 29811 }, { "epoch": 4.86657687441329, "grad_norm": 5.2929534912109375, "learning_rate": 7.27048064482675e-06, "loss": 0.4342, "step": 29812 }, { "epoch": 4.866740133055794, "grad_norm": 3.798619031906128, "learning_rate": 7.269863854709509e-06, "loss": 0.4125, "step": 29813 }, { "epoch": 4.8669033916982976, "grad_norm": 3.1519086360931396, "learning_rate": 7.269247075814828e-06, "loss": 0.366, "step": 29814 }, { "epoch": 4.867066650340803, "grad_norm": 4.7917280197143555, "learning_rate": 7.268630308145241e-06, "loss": 0.4784, "step": 29815 }, { "epoch": 4.867229908983306, "grad_norm": 3.3689048290252686, "learning_rate": 7.2680135517032816e-06, "loss": 0.3575, "step": 29816 }, { "epoch": 4.867393167625811, "grad_norm": 3.9497437477111816, "learning_rate": 7.267396806491488e-06, "loss": 0.3784, "step": 29817 }, { "epoch": 4.867556426268315, "grad_norm": 3.193009376525879, "learning_rate": 7.266780072512395e-06, "loss": 0.3521, "step": 29818 }, { "epoch": 4.86771968491082, "grad_norm": 3.336555004119873, "learning_rate": 7.266163349768536e-06, "loss": 0.3812, "step": 29819 }, { "epoch": 4.867882943553324, "grad_norm": 3.5271711349487305, "learning_rate": 7.265546638262451e-06, "loss": 0.3671, "step": 29820 }, { "epoch": 4.868046202195829, "grad_norm": 3.6691558361053467, "learning_rate": 7.264929937996667e-06, "loss": 0.3271, "step": 29821 }, { "epoch": 4.868209460838333, "grad_norm": 3.4484035968780518, "learning_rate": 7.264313248973725e-06, "loss": 0.3895, "step": 29822 }, { "epoch": 4.8683727194808375, "grad_norm": 4.323098182678223, "learning_rate": 7.263696571196158e-06, "loss": 0.4272, "step": 29823 }, { "epoch": 4.868535978123342, "grad_norm": 3.4239068031311035, "learning_rate": 7.263079904666501e-06, "loss": 0.4219, "step": 29824 }, { "epoch": 4.868699236765846, "grad_norm": 3.3582265377044678, "learning_rate": 7.26246324938729e-06, "loss": 0.3269, "step": 29825 }, { "epoch": 4.868862495408351, "grad_norm": 3.5304458141326904, "learning_rate": 7.261846605361058e-06, "loss": 0.3859, "step": 29826 }, { "epoch": 4.869025754050855, "grad_norm": 3.5314791202545166, "learning_rate": 7.26122997259034e-06, "loss": 0.3612, "step": 29827 }, { "epoch": 4.86918901269336, "grad_norm": 3.4873578548431396, "learning_rate": 7.26061335107767e-06, "loss": 0.3971, "step": 29828 }, { "epoch": 4.869352271335864, "grad_norm": 5.190939426422119, "learning_rate": 7.2599967408255865e-06, "loss": 0.3555, "step": 29829 }, { "epoch": 4.869515529978369, "grad_norm": 2.941527843475342, "learning_rate": 7.2593801418366265e-06, "loss": 0.339, "step": 29830 }, { "epoch": 4.869678788620873, "grad_norm": 3.5253257751464844, "learning_rate": 7.258763554113315e-06, "loss": 0.3499, "step": 29831 }, { "epoch": 4.869842047263377, "grad_norm": 3.8211820125579834, "learning_rate": 7.258146977658189e-06, "loss": 0.4259, "step": 29832 }, { "epoch": 4.870005305905881, "grad_norm": 3.6937496662139893, "learning_rate": 7.257530412473788e-06, "loss": 0.3295, "step": 29833 }, { "epoch": 4.870168564548385, "grad_norm": 4.303989410400391, "learning_rate": 7.256913858562644e-06, "loss": 0.4921, "step": 29834 }, { "epoch": 4.87033182319089, "grad_norm": 4.6522345542907715, "learning_rate": 7.256297315927291e-06, "loss": 0.426, "step": 29835 }, { "epoch": 4.870495081833394, "grad_norm": 3.146031141281128, "learning_rate": 7.255680784570264e-06, "loss": 0.3748, "step": 29836 }, { "epoch": 4.870658340475899, "grad_norm": 3.1794004440307617, "learning_rate": 7.255064264494096e-06, "loss": 0.3787, "step": 29837 }, { "epoch": 4.870821599118403, "grad_norm": 3.816262722015381, "learning_rate": 7.254447755701324e-06, "loss": 0.4368, "step": 29838 }, { "epoch": 4.870984857760908, "grad_norm": 2.345174789428711, "learning_rate": 7.253831258194477e-06, "loss": 0.274, "step": 29839 }, { "epoch": 4.871148116403412, "grad_norm": 4.057708740234375, "learning_rate": 7.2532147719761e-06, "loss": 0.3895, "step": 29840 }, { "epoch": 4.8713113750459165, "grad_norm": 3.519444465637207, "learning_rate": 7.252598297048716e-06, "loss": 0.403, "step": 29841 }, { "epoch": 4.871474633688421, "grad_norm": 3.386505365371704, "learning_rate": 7.251981833414863e-06, "loss": 0.4145, "step": 29842 }, { "epoch": 4.871637892330925, "grad_norm": 2.8190293312072754, "learning_rate": 7.251365381077073e-06, "loss": 0.379, "step": 29843 }, { "epoch": 4.87180115097343, "grad_norm": 3.7615137100219727, "learning_rate": 7.250748940037883e-06, "loss": 0.4106, "step": 29844 }, { "epoch": 4.871964409615934, "grad_norm": 3.0720341205596924, "learning_rate": 7.250132510299827e-06, "loss": 0.3754, "step": 29845 }, { "epoch": 4.872127668258439, "grad_norm": 3.941199541091919, "learning_rate": 7.249516091865437e-06, "loss": 0.444, "step": 29846 }, { "epoch": 4.872290926900943, "grad_norm": 3.9694950580596924, "learning_rate": 7.2488996847372485e-06, "loss": 0.4291, "step": 29847 }, { "epoch": 4.872454185543448, "grad_norm": 2.8128974437713623, "learning_rate": 7.248283288917794e-06, "loss": 0.3282, "step": 29848 }, { "epoch": 4.872617444185952, "grad_norm": 4.107044696807861, "learning_rate": 7.247666904409609e-06, "loss": 0.459, "step": 29849 }, { "epoch": 4.8727807028284555, "grad_norm": 3.3888697624206543, "learning_rate": 7.2470505312152275e-06, "loss": 0.3944, "step": 29850 }, { "epoch": 4.87294396147096, "grad_norm": 3.5410664081573486, "learning_rate": 7.24643416933718e-06, "loss": 0.3711, "step": 29851 }, { "epoch": 4.873107220113464, "grad_norm": 3.625231981277466, "learning_rate": 7.245817818778001e-06, "loss": 0.3664, "step": 29852 }, { "epoch": 4.873270478755969, "grad_norm": 3.7897448539733887, "learning_rate": 7.245201479540226e-06, "loss": 0.34, "step": 29853 }, { "epoch": 4.873433737398473, "grad_norm": 3.349478006362915, "learning_rate": 7.244585151626387e-06, "loss": 0.3461, "step": 29854 }, { "epoch": 4.873596996040978, "grad_norm": 4.623605728149414, "learning_rate": 7.243968835039016e-06, "loss": 0.4086, "step": 29855 }, { "epoch": 4.873760254683482, "grad_norm": 3.2109298706054688, "learning_rate": 7.243352529780651e-06, "loss": 0.3675, "step": 29856 }, { "epoch": 4.873923513325987, "grad_norm": 4.165931701660156, "learning_rate": 7.242736235853822e-06, "loss": 0.4222, "step": 29857 }, { "epoch": 4.874086771968491, "grad_norm": 3.6170241832733154, "learning_rate": 7.242119953261064e-06, "loss": 0.3597, "step": 29858 }, { "epoch": 4.8742500306109955, "grad_norm": 3.817246437072754, "learning_rate": 7.2415036820049116e-06, "loss": 0.3788, "step": 29859 }, { "epoch": 4.8744132892535, "grad_norm": 3.0545148849487305, "learning_rate": 7.240887422087892e-06, "loss": 0.3462, "step": 29860 }, { "epoch": 4.874576547896004, "grad_norm": 3.3134756088256836, "learning_rate": 7.240271173512545e-06, "loss": 0.3315, "step": 29861 }, { "epoch": 4.874739806538509, "grad_norm": 3.222104549407959, "learning_rate": 7.239654936281399e-06, "loss": 0.3583, "step": 29862 }, { "epoch": 4.874903065181013, "grad_norm": 4.890065670013428, "learning_rate": 7.2390387103969905e-06, "loss": 0.4256, "step": 29863 }, { "epoch": 4.875066323823518, "grad_norm": 3.875962734222412, "learning_rate": 7.238422495861852e-06, "loss": 0.3566, "step": 29864 }, { "epoch": 4.875229582466022, "grad_norm": 2.886730909347534, "learning_rate": 7.237806292678516e-06, "loss": 0.3396, "step": 29865 }, { "epoch": 4.8753928411085266, "grad_norm": 2.9996511936187744, "learning_rate": 7.237190100849511e-06, "loss": 0.273, "step": 29866 }, { "epoch": 4.87555609975103, "grad_norm": 3.3165767192840576, "learning_rate": 7.2365739203773775e-06, "loss": 0.3721, "step": 29867 }, { "epoch": 4.875719358393535, "grad_norm": 3.1963040828704834, "learning_rate": 7.2359577512646465e-06, "loss": 0.3178, "step": 29868 }, { "epoch": 4.875882617036039, "grad_norm": 3.4779300689697266, "learning_rate": 7.235341593513851e-06, "loss": 0.4044, "step": 29869 }, { "epoch": 4.876045875678543, "grad_norm": 4.213613033294678, "learning_rate": 7.2347254471275175e-06, "loss": 0.4505, "step": 29870 }, { "epoch": 4.876209134321048, "grad_norm": 3.6072983741760254, "learning_rate": 7.234109312108186e-06, "loss": 0.3489, "step": 29871 }, { "epoch": 4.876372392963552, "grad_norm": 3.4497265815734863, "learning_rate": 7.233493188458387e-06, "loss": 0.4137, "step": 29872 }, { "epoch": 4.876535651606057, "grad_norm": 3.764216661453247, "learning_rate": 7.232877076180651e-06, "loss": 0.4357, "step": 29873 }, { "epoch": 4.876698910248561, "grad_norm": 3.260462999343872, "learning_rate": 7.232260975277514e-06, "loss": 0.3975, "step": 29874 }, { "epoch": 4.876862168891066, "grad_norm": 3.0913360118865967, "learning_rate": 7.2316448857515076e-06, "loss": 0.3458, "step": 29875 }, { "epoch": 4.87702542753357, "grad_norm": 3.447209358215332, "learning_rate": 7.2310288076051626e-06, "loss": 0.4328, "step": 29876 }, { "epoch": 4.8771886861760745, "grad_norm": 3.3923346996307373, "learning_rate": 7.2304127408410126e-06, "loss": 0.351, "step": 29877 }, { "epoch": 4.877351944818579, "grad_norm": 3.5702455043792725, "learning_rate": 7.2297966854615884e-06, "loss": 0.4065, "step": 29878 }, { "epoch": 4.877515203461083, "grad_norm": 3.9341273307800293, "learning_rate": 7.229180641469431e-06, "loss": 0.3725, "step": 29879 }, { "epoch": 4.877678462103588, "grad_norm": 3.670133590698242, "learning_rate": 7.228564608867061e-06, "loss": 0.3451, "step": 29880 }, { "epoch": 4.877841720746092, "grad_norm": 4.140880107879639, "learning_rate": 7.227948587657015e-06, "loss": 0.4409, "step": 29881 }, { "epoch": 4.878004979388597, "grad_norm": 3.38173770904541, "learning_rate": 7.227332577841824e-06, "loss": 0.3374, "step": 29882 }, { "epoch": 4.878168238031101, "grad_norm": 3.1360719203948975, "learning_rate": 7.226716579424024e-06, "loss": 0.3511, "step": 29883 }, { "epoch": 4.878331496673605, "grad_norm": 4.075927734375, "learning_rate": 7.2261005924061444e-06, "loss": 0.4485, "step": 29884 }, { "epoch": 4.87849475531611, "grad_norm": 3.25575852394104, "learning_rate": 7.225484616790718e-06, "loss": 0.3347, "step": 29885 }, { "epoch": 4.8786580139586135, "grad_norm": 3.5938777923583984, "learning_rate": 7.224868652580277e-06, "loss": 0.4141, "step": 29886 }, { "epoch": 4.878821272601118, "grad_norm": 4.150174617767334, "learning_rate": 7.224252699777352e-06, "loss": 0.4042, "step": 29887 }, { "epoch": 4.878984531243622, "grad_norm": 3.844022512435913, "learning_rate": 7.223636758384478e-06, "loss": 0.3903, "step": 29888 }, { "epoch": 4.879147789886127, "grad_norm": 3.375537395477295, "learning_rate": 7.223020828404186e-06, "loss": 0.3744, "step": 29889 }, { "epoch": 4.879311048528631, "grad_norm": 3.3453333377838135, "learning_rate": 7.222404909839005e-06, "loss": 0.3637, "step": 29890 }, { "epoch": 4.879474307171136, "grad_norm": 3.6984457969665527, "learning_rate": 7.2217890026914675e-06, "loss": 0.4189, "step": 29891 }, { "epoch": 4.87963756581364, "grad_norm": 3.820729970932007, "learning_rate": 7.221173106964107e-06, "loss": 0.4012, "step": 29892 }, { "epoch": 4.879800824456145, "grad_norm": 3.121389627456665, "learning_rate": 7.220557222659452e-06, "loss": 0.3515, "step": 29893 }, { "epoch": 4.879964083098649, "grad_norm": 4.060657978057861, "learning_rate": 7.2199413497800395e-06, "loss": 0.453, "step": 29894 }, { "epoch": 4.8801273417411535, "grad_norm": 3.86185622215271, "learning_rate": 7.2193254883283995e-06, "loss": 0.3958, "step": 29895 }, { "epoch": 4.880290600383658, "grad_norm": 3.321967124938965, "learning_rate": 7.218709638307061e-06, "loss": 0.3501, "step": 29896 }, { "epoch": 4.880453859026162, "grad_norm": 3.422311782836914, "learning_rate": 7.218093799718557e-06, "loss": 0.3451, "step": 29897 }, { "epoch": 4.880617117668667, "grad_norm": 3.6106112003326416, "learning_rate": 7.21747797256542e-06, "loss": 0.3579, "step": 29898 }, { "epoch": 4.880780376311171, "grad_norm": 3.5564000606536865, "learning_rate": 7.216862156850182e-06, "loss": 0.3616, "step": 29899 }, { "epoch": 4.880943634953676, "grad_norm": 3.4608490467071533, "learning_rate": 7.21624635257537e-06, "loss": 0.3919, "step": 29900 }, { "epoch": 4.88110689359618, "grad_norm": 4.007021427154541, "learning_rate": 7.215630559743518e-06, "loss": 0.4016, "step": 29901 }, { "epoch": 4.8812701522386845, "grad_norm": 3.3730363845825195, "learning_rate": 7.215014778357159e-06, "loss": 0.3629, "step": 29902 }, { "epoch": 4.881433410881188, "grad_norm": 3.9745497703552246, "learning_rate": 7.214399008418822e-06, "loss": 0.4113, "step": 29903 }, { "epoch": 4.8815966695236925, "grad_norm": 3.299156665802002, "learning_rate": 7.213783249931036e-06, "loss": 0.3379, "step": 29904 }, { "epoch": 4.881759928166197, "grad_norm": 3.072187662124634, "learning_rate": 7.213167502896337e-06, "loss": 0.3852, "step": 29905 }, { "epoch": 4.881923186808701, "grad_norm": 3.272679090499878, "learning_rate": 7.212551767317255e-06, "loss": 0.3739, "step": 29906 }, { "epoch": 4.882086445451206, "grad_norm": 3.663851737976074, "learning_rate": 7.211936043196319e-06, "loss": 0.3899, "step": 29907 }, { "epoch": 4.88224970409371, "grad_norm": 2.962700366973877, "learning_rate": 7.2113203305360666e-06, "loss": 0.3148, "step": 29908 }, { "epoch": 4.882412962736215, "grad_norm": 3.627309799194336, "learning_rate": 7.210704629339017e-06, "loss": 0.345, "step": 29909 }, { "epoch": 4.882576221378719, "grad_norm": 3.5624024868011475, "learning_rate": 7.210088939607709e-06, "loss": 0.4118, "step": 29910 }, { "epoch": 4.882739480021224, "grad_norm": 3.420217514038086, "learning_rate": 7.209473261344673e-06, "loss": 0.396, "step": 29911 }, { "epoch": 4.882902738663728, "grad_norm": 2.982624053955078, "learning_rate": 7.208857594552436e-06, "loss": 0.345, "step": 29912 }, { "epoch": 4.8830659973062325, "grad_norm": 3.412102222442627, "learning_rate": 7.208241939233534e-06, "loss": 0.3274, "step": 29913 }, { "epoch": 4.883229255948737, "grad_norm": 3.4710326194763184, "learning_rate": 7.207626295390495e-06, "loss": 0.3692, "step": 29914 }, { "epoch": 4.883392514591241, "grad_norm": 2.9612159729003906, "learning_rate": 7.20701066302585e-06, "loss": 0.3052, "step": 29915 }, { "epoch": 4.883555773233746, "grad_norm": 2.9386978149414062, "learning_rate": 7.206395042142127e-06, "loss": 0.3549, "step": 29916 }, { "epoch": 4.88371903187625, "grad_norm": 3.8034534454345703, "learning_rate": 7.205779432741861e-06, "loss": 0.4295, "step": 29917 }, { "epoch": 4.883882290518755, "grad_norm": 3.583158254623413, "learning_rate": 7.205163834827584e-06, "loss": 0.356, "step": 29918 }, { "epoch": 4.884045549161259, "grad_norm": 3.5631330013275146, "learning_rate": 7.2045482484018205e-06, "loss": 0.363, "step": 29919 }, { "epoch": 4.884208807803763, "grad_norm": 3.505326986312866, "learning_rate": 7.203932673467101e-06, "loss": 0.4175, "step": 29920 }, { "epoch": 4.884372066446268, "grad_norm": 2.6431875228881836, "learning_rate": 7.2033171100259604e-06, "loss": 0.2895, "step": 29921 }, { "epoch": 4.8845353250887715, "grad_norm": 3.5950143337249756, "learning_rate": 7.202701558080927e-06, "loss": 0.3988, "step": 29922 }, { "epoch": 4.884698583731276, "grad_norm": 3.179948568344116, "learning_rate": 7.202086017634532e-06, "loss": 0.3106, "step": 29923 }, { "epoch": 4.88486184237378, "grad_norm": 4.002328395843506, "learning_rate": 7.201470488689304e-06, "loss": 0.359, "step": 29924 }, { "epoch": 4.885025101016285, "grad_norm": 3.0771636962890625, "learning_rate": 7.200854971247775e-06, "loss": 0.326, "step": 29925 }, { "epoch": 4.885188359658789, "grad_norm": 3.7022838592529297, "learning_rate": 7.200239465312474e-06, "loss": 0.3622, "step": 29926 }, { "epoch": 4.885351618301294, "grad_norm": 3.607334613800049, "learning_rate": 7.1996239708859295e-06, "loss": 0.4325, "step": 29927 }, { "epoch": 4.885514876943798, "grad_norm": 5.592647075653076, "learning_rate": 7.19900848797068e-06, "loss": 0.4729, "step": 29928 }, { "epoch": 4.885678135586303, "grad_norm": 3.9885916709899902, "learning_rate": 7.198393016569243e-06, "loss": 0.3887, "step": 29929 }, { "epoch": 4.885841394228807, "grad_norm": 3.6361637115478516, "learning_rate": 7.1977775566841555e-06, "loss": 0.3915, "step": 29930 }, { "epoch": 4.8860046528713115, "grad_norm": 3.877641201019287, "learning_rate": 7.197162108317945e-06, "loss": 0.4004, "step": 29931 }, { "epoch": 4.886167911513816, "grad_norm": 4.058894634246826, "learning_rate": 7.196546671473143e-06, "loss": 0.4213, "step": 29932 }, { "epoch": 4.88633117015632, "grad_norm": 3.6393914222717285, "learning_rate": 7.195931246152281e-06, "loss": 0.3621, "step": 29933 }, { "epoch": 4.886494428798825, "grad_norm": 4.368798732757568, "learning_rate": 7.195315832357886e-06, "loss": 0.408, "step": 29934 }, { "epoch": 4.886657687441329, "grad_norm": 3.9081761837005615, "learning_rate": 7.194700430092489e-06, "loss": 0.3481, "step": 29935 }, { "epoch": 4.886820946083834, "grad_norm": 4.483236789703369, "learning_rate": 7.194085039358619e-06, "loss": 0.4355, "step": 29936 }, { "epoch": 4.886984204726337, "grad_norm": 2.5787720680236816, "learning_rate": 7.193469660158806e-06, "loss": 0.3015, "step": 29937 }, { "epoch": 4.8871474633688425, "grad_norm": 4.288017749786377, "learning_rate": 7.192854292495581e-06, "loss": 0.4713, "step": 29938 }, { "epoch": 4.887310722011346, "grad_norm": 3.5757458209991455, "learning_rate": 7.1922389363714715e-06, "loss": 0.3598, "step": 29939 }, { "epoch": 4.8874739806538505, "grad_norm": 2.9638831615448, "learning_rate": 7.1916235917890066e-06, "loss": 0.3358, "step": 29940 }, { "epoch": 4.887637239296355, "grad_norm": 3.9247524738311768, "learning_rate": 7.1910082587507155e-06, "loss": 0.4758, "step": 29941 }, { "epoch": 4.887800497938859, "grad_norm": 3.6753036975860596, "learning_rate": 7.190392937259131e-06, "loss": 0.3744, "step": 29942 }, { "epoch": 4.887963756581364, "grad_norm": 3.9215683937072754, "learning_rate": 7.189777627316776e-06, "loss": 0.3911, "step": 29943 }, { "epoch": 4.888127015223868, "grad_norm": 3.219169855117798, "learning_rate": 7.1891623289261885e-06, "loss": 0.3241, "step": 29944 }, { "epoch": 4.888290273866373, "grad_norm": 3.764248847961426, "learning_rate": 7.1885470420898906e-06, "loss": 0.4027, "step": 29945 }, { "epoch": 4.888453532508877, "grad_norm": 5.335949897766113, "learning_rate": 7.187931766810416e-06, "loss": 0.382, "step": 29946 }, { "epoch": 4.888616791151382, "grad_norm": 4.464827537536621, "learning_rate": 7.187316503090294e-06, "loss": 0.4084, "step": 29947 }, { "epoch": 4.888780049793886, "grad_norm": 4.055545806884766, "learning_rate": 7.18670125093205e-06, "loss": 0.3903, "step": 29948 }, { "epoch": 4.8889433084363905, "grad_norm": 2.908720016479492, "learning_rate": 7.186086010338214e-06, "loss": 0.3354, "step": 29949 }, { "epoch": 4.889106567078895, "grad_norm": 3.3132622241973877, "learning_rate": 7.185470781311317e-06, "loss": 0.3509, "step": 29950 }, { "epoch": 4.889269825721399, "grad_norm": 3.600301504135132, "learning_rate": 7.184855563853885e-06, "loss": 0.3966, "step": 29951 }, { "epoch": 4.889433084363904, "grad_norm": 3.8155431747436523, "learning_rate": 7.18424035796845e-06, "loss": 0.3894, "step": 29952 }, { "epoch": 4.889596343006408, "grad_norm": 3.2106235027313232, "learning_rate": 7.183625163657539e-06, "loss": 0.3621, "step": 29953 }, { "epoch": 4.889759601648913, "grad_norm": 3.7021889686584473, "learning_rate": 7.183009980923681e-06, "loss": 0.3883, "step": 29954 }, { "epoch": 4.889922860291417, "grad_norm": 3.5697543621063232, "learning_rate": 7.182394809769406e-06, "loss": 0.424, "step": 29955 }, { "epoch": 4.890086118933921, "grad_norm": 3.651552438735962, "learning_rate": 7.181779650197243e-06, "loss": 0.3736, "step": 29956 }, { "epoch": 4.890249377576425, "grad_norm": 2.8096795082092285, "learning_rate": 7.181164502209723e-06, "loss": 0.3375, "step": 29957 }, { "epoch": 4.8904126362189295, "grad_norm": 3.1241235733032227, "learning_rate": 7.180549365809366e-06, "loss": 0.3742, "step": 29958 }, { "epoch": 4.890575894861434, "grad_norm": 3.9861695766448975, "learning_rate": 7.179934240998707e-06, "loss": 0.3833, "step": 29959 }, { "epoch": 4.890739153503938, "grad_norm": 3.2380714416503906, "learning_rate": 7.1793191277802745e-06, "loss": 0.351, "step": 29960 }, { "epoch": 4.890902412146443, "grad_norm": 3.4073479175567627, "learning_rate": 7.178704026156596e-06, "loss": 0.3807, "step": 29961 }, { "epoch": 4.891065670788947, "grad_norm": 3.4376304149627686, "learning_rate": 7.178088936130199e-06, "loss": 0.4162, "step": 29962 }, { "epoch": 4.891228929431452, "grad_norm": 5.030054092407227, "learning_rate": 7.177473857703613e-06, "loss": 0.4762, "step": 29963 }, { "epoch": 4.891392188073956, "grad_norm": 4.194916248321533, "learning_rate": 7.176858790879366e-06, "loss": 0.3725, "step": 29964 }, { "epoch": 4.891555446716461, "grad_norm": 3.874088764190674, "learning_rate": 7.176243735659987e-06, "loss": 0.3693, "step": 29965 }, { "epoch": 4.891718705358965, "grad_norm": 3.3382012844085693, "learning_rate": 7.175628692048002e-06, "loss": 0.3294, "step": 29966 }, { "epoch": 4.8918819640014695, "grad_norm": 2.9875974655151367, "learning_rate": 7.175013660045946e-06, "loss": 0.3427, "step": 29967 }, { "epoch": 4.892045222643974, "grad_norm": 3.6158559322357178, "learning_rate": 7.174398639656339e-06, "loss": 0.3988, "step": 29968 }, { "epoch": 4.892208481286478, "grad_norm": 3.0149636268615723, "learning_rate": 7.173783630881712e-06, "loss": 0.3415, "step": 29969 }, { "epoch": 4.892371739928983, "grad_norm": 3.113071918487549, "learning_rate": 7.173168633724592e-06, "loss": 0.3319, "step": 29970 }, { "epoch": 4.892534998571487, "grad_norm": 3.3036091327667236, "learning_rate": 7.17255364818751e-06, "loss": 0.4137, "step": 29971 }, { "epoch": 4.892698257213992, "grad_norm": 3.4015395641326904, "learning_rate": 7.171938674272991e-06, "loss": 0.3585, "step": 29972 }, { "epoch": 4.892861515856495, "grad_norm": 3.817796468734741, "learning_rate": 7.171323711983567e-06, "loss": 0.3454, "step": 29973 }, { "epoch": 4.8930247744990005, "grad_norm": 3.599257707595825, "learning_rate": 7.1707087613217604e-06, "loss": 0.3937, "step": 29974 }, { "epoch": 4.893188033141504, "grad_norm": 2.9977495670318604, "learning_rate": 7.170093822290103e-06, "loss": 0.3023, "step": 29975 }, { "epoch": 4.8933512917840085, "grad_norm": 4.213920593261719, "learning_rate": 7.169478894891122e-06, "loss": 0.4141, "step": 29976 }, { "epoch": 4.893514550426513, "grad_norm": 4.3134613037109375, "learning_rate": 7.1688639791273454e-06, "loss": 0.3559, "step": 29977 }, { "epoch": 4.893677809069017, "grad_norm": 4.128997325897217, "learning_rate": 7.168249075001299e-06, "loss": 0.4275, "step": 29978 }, { "epoch": 4.893841067711522, "grad_norm": 4.155020713806152, "learning_rate": 7.1676341825155105e-06, "loss": 0.3507, "step": 29979 }, { "epoch": 4.894004326354026, "grad_norm": 3.4197123050689697, "learning_rate": 7.167019301672508e-06, "loss": 0.3562, "step": 29980 }, { "epoch": 4.894167584996531, "grad_norm": 3.7780632972717285, "learning_rate": 7.166404432474818e-06, "loss": 0.3482, "step": 29981 }, { "epoch": 4.894330843639035, "grad_norm": 3.4157276153564453, "learning_rate": 7.165789574924971e-06, "loss": 0.3528, "step": 29982 }, { "epoch": 4.89449410228154, "grad_norm": 3.7594797611236572, "learning_rate": 7.165174729025494e-06, "loss": 0.4051, "step": 29983 }, { "epoch": 4.894657360924044, "grad_norm": 3.423572540283203, "learning_rate": 7.1645598947789116e-06, "loss": 0.3449, "step": 29984 }, { "epoch": 4.8948206195665485, "grad_norm": 3.4730122089385986, "learning_rate": 7.163945072187754e-06, "loss": 0.3903, "step": 29985 }, { "epoch": 4.894983878209053, "grad_norm": 3.370899200439453, "learning_rate": 7.163330261254548e-06, "loss": 0.3641, "step": 29986 }, { "epoch": 4.895147136851557, "grad_norm": 3.2783985137939453, "learning_rate": 7.1627154619818206e-06, "loss": 0.3964, "step": 29987 }, { "epoch": 4.895310395494062, "grad_norm": 3.073366403579712, "learning_rate": 7.1621006743720975e-06, "loss": 0.3605, "step": 29988 }, { "epoch": 4.895473654136566, "grad_norm": 3.4020209312438965, "learning_rate": 7.161485898427907e-06, "loss": 0.3522, "step": 29989 }, { "epoch": 4.89563691277907, "grad_norm": 3.7696785926818848, "learning_rate": 7.160871134151776e-06, "loss": 0.4136, "step": 29990 }, { "epoch": 4.895800171421575, "grad_norm": 3.771542549133301, "learning_rate": 7.160256381546231e-06, "loss": 0.3381, "step": 29991 }, { "epoch": 4.895963430064079, "grad_norm": 3.559600353240967, "learning_rate": 7.159641640613801e-06, "loss": 0.4049, "step": 29992 }, { "epoch": 4.896126688706583, "grad_norm": 3.036813259124756, "learning_rate": 7.159026911357009e-06, "loss": 0.3278, "step": 29993 }, { "epoch": 4.8962899473490875, "grad_norm": 3.779021978378296, "learning_rate": 7.1584121937783864e-06, "loss": 0.4186, "step": 29994 }, { "epoch": 4.896453205991592, "grad_norm": 3.2399773597717285, "learning_rate": 7.157797487880459e-06, "loss": 0.3247, "step": 29995 }, { "epoch": 4.896616464634096, "grad_norm": 4.056601524353027, "learning_rate": 7.1571827936657565e-06, "loss": 0.4471, "step": 29996 }, { "epoch": 4.896779723276601, "grad_norm": 3.1818408966064453, "learning_rate": 7.156568111136797e-06, "loss": 0.3503, "step": 29997 }, { "epoch": 4.896942981919105, "grad_norm": 4.323853969573975, "learning_rate": 7.155953440296113e-06, "loss": 0.4211, "step": 29998 }, { "epoch": 4.89710624056161, "grad_norm": 3.7544727325439453, "learning_rate": 7.155338781146231e-06, "loss": 0.37, "step": 29999 }, { "epoch": 4.897269499204114, "grad_norm": 3.826444149017334, "learning_rate": 7.154724133689677e-06, "loss": 0.4446, "step": 30000 } ], "logging_steps": 1, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.6938673876486246e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }