{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8379185572339325, "eval_steps": 500, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011486990982712079, "grad_norm": 1.0950555801391602, "learning_rate": 0.0001, "loss": 2.0384, "step": 1 }, { "epoch": 0.00022973981965424158, "grad_norm": 0.6104637384414673, "learning_rate": 0.0001, "loss": 2.0026, "step": 2 }, { "epoch": 0.00034460972948136237, "grad_norm": 1.6990306377410889, "learning_rate": 0.0001, "loss": 2.2147, "step": 3 }, { "epoch": 0.00045947963930848316, "grad_norm": 0.5962863564491272, "learning_rate": 0.0001, "loss": 1.6379, "step": 4 }, { "epoch": 0.0005743495491356039, "grad_norm": 0.5962501764297485, "learning_rate": 0.0001, "loss": 1.9908, "step": 5 }, { "epoch": 0.0006892194589627247, "grad_norm": 0.46677908301353455, "learning_rate": 0.0001, "loss": 1.8843, "step": 6 }, { "epoch": 0.0008040893687898455, "grad_norm": 0.5024643540382385, "learning_rate": 0.0001, "loss": 2.0395, "step": 7 }, { "epoch": 0.0009189592786169663, "grad_norm": 0.3814510703086853, "learning_rate": 0.0001, "loss": 1.7637, "step": 8 }, { "epoch": 0.0010338291884440872, "grad_norm": 0.4189926087856293, "learning_rate": 0.0001, "loss": 1.9592, "step": 9 }, { "epoch": 0.0011486990982712078, "grad_norm": 0.44599318504333496, "learning_rate": 0.0001, "loss": 1.7664, "step": 10 }, { "epoch": 0.0012635690080983286, "grad_norm": 0.3737109899520874, "learning_rate": 0.0001, "loss": 1.7794, "step": 11 }, { "epoch": 0.0013784389179254495, "grad_norm": 0.39896202087402344, "learning_rate": 0.0001, "loss": 1.8159, "step": 12 }, { "epoch": 0.0014933088277525703, "grad_norm": 0.3495546281337738, "learning_rate": 0.0001, "loss": 1.739, "step": 13 }, { "epoch": 0.001608178737579691, "grad_norm": 0.4069090187549591, "learning_rate": 0.0001, "loss": 1.9259, "step": 14 }, { "epoch": 0.0017230486474068118, "grad_norm": 0.4087446331977844, "learning_rate": 0.0001, "loss": 1.8746, "step": 15 }, { "epoch": 0.0018379185572339326, "grad_norm": 0.33482518792152405, "learning_rate": 0.0001, "loss": 1.6317, "step": 16 }, { "epoch": 0.0019527884670610533, "grad_norm": 0.37447136640548706, "learning_rate": 0.0001, "loss": 2.0974, "step": 17 }, { "epoch": 0.0020676583768881743, "grad_norm": 0.359221875667572, "learning_rate": 0.0001, "loss": 1.7629, "step": 18 }, { "epoch": 0.0021825282867152947, "grad_norm": 0.38664090633392334, "learning_rate": 0.0001, "loss": 1.9719, "step": 19 }, { "epoch": 0.0022973981965424156, "grad_norm": 0.326967716217041, "learning_rate": 0.0001, "loss": 1.6188, "step": 20 }, { "epoch": 0.0024122681063695364, "grad_norm": 0.3577702045440674, "learning_rate": 0.0001, "loss": 1.9068, "step": 21 }, { "epoch": 0.0025271380161966573, "grad_norm": 0.3471507728099823, "learning_rate": 0.0001, "loss": 1.8111, "step": 22 }, { "epoch": 0.002642007926023778, "grad_norm": 0.3563218414783478, "learning_rate": 0.0001, "loss": 1.862, "step": 23 }, { "epoch": 0.002756877835850899, "grad_norm": 0.38289228081703186, "learning_rate": 0.0001, "loss": 1.9097, "step": 24 }, { "epoch": 0.0028717477456780198, "grad_norm": 0.37357401847839355, "learning_rate": 0.0001, "loss": 1.9897, "step": 25 }, { "epoch": 0.0029866176555051406, "grad_norm": 0.3501838445663452, "learning_rate": 0.0001, "loss": 1.9073, "step": 26 }, { "epoch": 0.003101487565332261, "grad_norm": 0.3973883390426636, "learning_rate": 0.0001, "loss": 1.961, "step": 27 }, { "epoch": 0.003216357475159382, "grad_norm": 0.44121477007865906, "learning_rate": 0.0001, "loss": 1.9053, "step": 28 }, { "epoch": 0.0033312273849865027, "grad_norm": 0.35977938771247864, "learning_rate": 0.0001, "loss": 1.8474, "step": 29 }, { "epoch": 0.0034460972948136236, "grad_norm": 0.3839881420135498, "learning_rate": 0.0001, "loss": 1.9948, "step": 30 }, { "epoch": 0.0035609672046407444, "grad_norm": 0.3463224768638611, "learning_rate": 0.0001, "loss": 1.7018, "step": 31 }, { "epoch": 0.0036758371144678652, "grad_norm": 0.3363831639289856, "learning_rate": 0.0001, "loss": 1.7479, "step": 32 }, { "epoch": 0.003790707024294986, "grad_norm": 0.3735474944114685, "learning_rate": 0.0001, "loss": 1.7217, "step": 33 }, { "epoch": 0.0039055769341221065, "grad_norm": 0.3522309958934784, "learning_rate": 0.0001, "loss": 1.9002, "step": 34 }, { "epoch": 0.004020446843949228, "grad_norm": 0.35659080743789673, "learning_rate": 0.0001, "loss": 1.9188, "step": 35 }, { "epoch": 0.004135316753776349, "grad_norm": 0.3676978647708893, "learning_rate": 0.0001, "loss": 2.0147, "step": 36 }, { "epoch": 0.0042501866636034695, "grad_norm": 0.3312574326992035, "learning_rate": 0.0001, "loss": 1.6601, "step": 37 }, { "epoch": 0.0043650565734305894, "grad_norm": 0.31456542015075684, "learning_rate": 0.0001, "loss": 1.7814, "step": 38 }, { "epoch": 0.00447992648325771, "grad_norm": 0.34201157093048096, "learning_rate": 0.0001, "loss": 1.8276, "step": 39 }, { "epoch": 0.004594796393084831, "grad_norm": 0.3848654329776764, "learning_rate": 0.0001, "loss": 1.9418, "step": 40 }, { "epoch": 0.004709666302911952, "grad_norm": 0.333026260137558, "learning_rate": 0.0001, "loss": 1.8563, "step": 41 }, { "epoch": 0.004824536212739073, "grad_norm": 0.3494780659675598, "learning_rate": 0.0001, "loss": 1.8389, "step": 42 }, { "epoch": 0.004939406122566194, "grad_norm": 0.3298174738883972, "learning_rate": 0.0001, "loss": 1.8113, "step": 43 }, { "epoch": 0.0050542760323933145, "grad_norm": 0.33735814690589905, "learning_rate": 0.0001, "loss": 1.7348, "step": 44 }, { "epoch": 0.005169145942220435, "grad_norm": 0.3232858180999756, "learning_rate": 0.0001, "loss": 1.7709, "step": 45 }, { "epoch": 0.005284015852047556, "grad_norm": 0.34412050247192383, "learning_rate": 0.0001, "loss": 1.8392, "step": 46 }, { "epoch": 0.005398885761874677, "grad_norm": 0.36024415493011475, "learning_rate": 0.0001, "loss": 1.8496, "step": 47 }, { "epoch": 0.005513755671701798, "grad_norm": 0.36573317646980286, "learning_rate": 0.0001, "loss": 2.0788, "step": 48 }, { "epoch": 0.005628625581528919, "grad_norm": 0.36239179968833923, "learning_rate": 0.0001, "loss": 1.8717, "step": 49 }, { "epoch": 0.0057434954913560396, "grad_norm": 0.35284945368766785, "learning_rate": 0.0001, "loss": 2.0221, "step": 50 }, { "epoch": 0.00585836540118316, "grad_norm": 0.33570870757102966, "learning_rate": 0.0001, "loss": 1.9219, "step": 51 }, { "epoch": 0.005973235311010281, "grad_norm": 0.36989328265190125, "learning_rate": 0.0001, "loss": 1.848, "step": 52 }, { "epoch": 0.006088105220837401, "grad_norm": 0.3059888482093811, "learning_rate": 0.0001, "loss": 1.7479, "step": 53 }, { "epoch": 0.006202975130664522, "grad_norm": 0.3483046293258667, "learning_rate": 0.0001, "loss": 1.9025, "step": 54 }, { "epoch": 0.006317845040491643, "grad_norm": 0.33112019300460815, "learning_rate": 0.0001, "loss": 1.7152, "step": 55 }, { "epoch": 0.006432714950318764, "grad_norm": 0.3339669406414032, "learning_rate": 0.0001, "loss": 1.6784, "step": 56 }, { "epoch": 0.006547584860145885, "grad_norm": 0.3434010446071625, "learning_rate": 0.0001, "loss": 1.9298, "step": 57 }, { "epoch": 0.0066624547699730054, "grad_norm": 0.3272426724433899, "learning_rate": 0.0001, "loss": 1.7955, "step": 58 }, { "epoch": 0.006777324679800126, "grad_norm": 0.3431703746318817, "learning_rate": 0.0001, "loss": 1.835, "step": 59 }, { "epoch": 0.006892194589627247, "grad_norm": 0.36123043298721313, "learning_rate": 0.0001, "loss": 1.8674, "step": 60 }, { "epoch": 0.007007064499454368, "grad_norm": 0.3534761071205139, "learning_rate": 0.0001, "loss": 1.8935, "step": 61 }, { "epoch": 0.007121934409281489, "grad_norm": 0.3585789203643799, "learning_rate": 0.0001, "loss": 1.8238, "step": 62 }, { "epoch": 0.00723680431910861, "grad_norm": 0.3434857726097107, "learning_rate": 0.0001, "loss": 1.7009, "step": 63 }, { "epoch": 0.0073516742289357305, "grad_norm": 0.3498717248439789, "learning_rate": 0.0001, "loss": 1.854, "step": 64 }, { "epoch": 0.007466544138762851, "grad_norm": 0.37057748436927795, "learning_rate": 0.0001, "loss": 1.9863, "step": 65 }, { "epoch": 0.007581414048589972, "grad_norm": 0.36728590726852417, "learning_rate": 0.0001, "loss": 1.8498, "step": 66 }, { "epoch": 0.007696283958417093, "grad_norm": 0.34580233693122864, "learning_rate": 0.0001, "loss": 2.0121, "step": 67 }, { "epoch": 0.007811153868244213, "grad_norm": 0.3278733491897583, "learning_rate": 0.0001, "loss": 1.8673, "step": 68 }, { "epoch": 0.007926023778071334, "grad_norm": 0.33521461486816406, "learning_rate": 0.0001, "loss": 1.7693, "step": 69 }, { "epoch": 0.008040893687898456, "grad_norm": 0.34356561303138733, "learning_rate": 0.0001, "loss": 1.9444, "step": 70 }, { "epoch": 0.008155763597725576, "grad_norm": 0.38897210359573364, "learning_rate": 0.0001, "loss": 1.9214, "step": 71 }, { "epoch": 0.008270633507552697, "grad_norm": 0.34927767515182495, "learning_rate": 0.0001, "loss": 1.8995, "step": 72 }, { "epoch": 0.008385503417379817, "grad_norm": 0.3214438259601593, "learning_rate": 0.0001, "loss": 1.6437, "step": 73 }, { "epoch": 0.008500373327206939, "grad_norm": 0.32360997796058655, "learning_rate": 0.0001, "loss": 1.7942, "step": 74 }, { "epoch": 0.008615243237034059, "grad_norm": 0.34416717290878296, "learning_rate": 0.0001, "loss": 1.8455, "step": 75 }, { "epoch": 0.008730113146861179, "grad_norm": 0.3457936644554138, "learning_rate": 0.0001, "loss": 1.9531, "step": 76 }, { "epoch": 0.0088449830566883, "grad_norm": 0.3502795398235321, "learning_rate": 0.0001, "loss": 1.7687, "step": 77 }, { "epoch": 0.00895985296651542, "grad_norm": 0.348290354013443, "learning_rate": 0.0001, "loss": 1.7663, "step": 78 }, { "epoch": 0.009074722876342542, "grad_norm": 0.32894769310951233, "learning_rate": 0.0001, "loss": 1.7323, "step": 79 }, { "epoch": 0.009189592786169662, "grad_norm": 0.32729727029800415, "learning_rate": 0.0001, "loss": 1.7761, "step": 80 }, { "epoch": 0.009304462695996784, "grad_norm": 0.3347381055355072, "learning_rate": 0.0001, "loss": 1.9055, "step": 81 }, { "epoch": 0.009419332605823904, "grad_norm": 0.35440585017204285, "learning_rate": 0.0001, "loss": 1.9095, "step": 82 }, { "epoch": 0.009534202515651026, "grad_norm": 0.33549365401268005, "learning_rate": 0.0001, "loss": 1.7806, "step": 83 }, { "epoch": 0.009649072425478146, "grad_norm": 0.37921905517578125, "learning_rate": 0.0001, "loss": 2.0155, "step": 84 }, { "epoch": 0.009763942335305267, "grad_norm": 0.3329186737537384, "learning_rate": 0.0001, "loss": 1.8357, "step": 85 }, { "epoch": 0.009878812245132387, "grad_norm": 0.36343327164649963, "learning_rate": 0.0001, "loss": 1.9286, "step": 86 }, { "epoch": 0.009993682154959509, "grad_norm": 0.3510076403617859, "learning_rate": 0.0001, "loss": 1.8747, "step": 87 }, { "epoch": 0.010108552064786629, "grad_norm": 0.32831454277038574, "learning_rate": 0.0001, "loss": 1.5749, "step": 88 }, { "epoch": 0.01022342197461375, "grad_norm": 0.3502967655658722, "learning_rate": 0.0001, "loss": 1.9506, "step": 89 }, { "epoch": 0.01033829188444087, "grad_norm": 0.34648793935775757, "learning_rate": 0.0001, "loss": 1.8406, "step": 90 }, { "epoch": 0.01045316179426799, "grad_norm": 0.35201138257980347, "learning_rate": 0.0001, "loss": 1.8329, "step": 91 }, { "epoch": 0.010568031704095112, "grad_norm": 0.3722350299358368, "learning_rate": 0.0001, "loss": 1.9775, "step": 92 }, { "epoch": 0.010682901613922232, "grad_norm": 0.3540225327014923, "learning_rate": 0.0001, "loss": 1.8165, "step": 93 }, { "epoch": 0.010797771523749354, "grad_norm": 0.33188265562057495, "learning_rate": 0.0001, "loss": 1.9455, "step": 94 }, { "epoch": 0.010912641433576474, "grad_norm": 0.32253599166870117, "learning_rate": 0.0001, "loss": 1.7694, "step": 95 }, { "epoch": 0.011027511343403596, "grad_norm": 0.34879428148269653, "learning_rate": 0.0001, "loss": 1.9055, "step": 96 }, { "epoch": 0.011142381253230716, "grad_norm": 0.3185734450817108, "learning_rate": 0.0001, "loss": 1.6201, "step": 97 }, { "epoch": 0.011257251163057837, "grad_norm": 0.34270620346069336, "learning_rate": 0.0001, "loss": 1.8956, "step": 98 }, { "epoch": 0.011372121072884957, "grad_norm": 0.3397463858127594, "learning_rate": 0.0001, "loss": 1.8395, "step": 99 }, { "epoch": 0.011486990982712079, "grad_norm": 0.587956428527832, "learning_rate": 0.0001, "loss": 1.67, "step": 100 }, { "epoch": 0.011601860892539199, "grad_norm": 0.34749558568000793, "learning_rate": 0.0001, "loss": 1.9738, "step": 101 }, { "epoch": 0.01171673080236632, "grad_norm": 0.31300491094589233, "learning_rate": 0.0001, "loss": 1.8049, "step": 102 }, { "epoch": 0.01183160071219344, "grad_norm": 0.3428112864494324, "learning_rate": 0.0001, "loss": 1.8066, "step": 103 }, { "epoch": 0.011946470622020562, "grad_norm": 0.3374922573566437, "learning_rate": 0.0001, "loss": 1.8526, "step": 104 }, { "epoch": 0.012061340531847682, "grad_norm": 0.32223325967788696, "learning_rate": 0.0001, "loss": 1.7013, "step": 105 }, { "epoch": 0.012176210441674802, "grad_norm": 0.3365132808685303, "learning_rate": 0.0001, "loss": 1.7933, "step": 106 }, { "epoch": 0.012291080351501924, "grad_norm": 0.31686636805534363, "learning_rate": 0.0001, "loss": 1.5097, "step": 107 }, { "epoch": 0.012405950261329044, "grad_norm": 0.3461359441280365, "learning_rate": 0.0001, "loss": 1.563, "step": 108 }, { "epoch": 0.012520820171156166, "grad_norm": 0.31762605905532837, "learning_rate": 0.0001, "loss": 1.665, "step": 109 }, { "epoch": 0.012635690080983286, "grad_norm": 0.3150866627693176, "learning_rate": 0.0001, "loss": 1.6591, "step": 110 }, { "epoch": 0.012750559990810408, "grad_norm": 0.3498469889163971, "learning_rate": 0.0001, "loss": 1.8973, "step": 111 }, { "epoch": 0.012865429900637528, "grad_norm": 0.3512935936450958, "learning_rate": 0.0001, "loss": 1.9445, "step": 112 }, { "epoch": 0.01298029981046465, "grad_norm": 0.3678135573863983, "learning_rate": 0.0001, "loss": 1.7957, "step": 113 }, { "epoch": 0.01309516972029177, "grad_norm": 0.3330201208591461, "learning_rate": 0.0001, "loss": 1.7507, "step": 114 }, { "epoch": 0.013210039630118891, "grad_norm": 0.33755484223365784, "learning_rate": 0.0001, "loss": 1.6139, "step": 115 }, { "epoch": 0.013324909539946011, "grad_norm": 0.34242576360702515, "learning_rate": 0.0001, "loss": 1.8006, "step": 116 }, { "epoch": 0.013439779449773133, "grad_norm": 0.35527321696281433, "learning_rate": 0.0001, "loss": 1.8855, "step": 117 }, { "epoch": 0.013554649359600253, "grad_norm": 0.3338663578033447, "learning_rate": 0.0001, "loss": 1.7004, "step": 118 }, { "epoch": 0.013669519269427374, "grad_norm": 0.3488336503505707, "learning_rate": 0.0001, "loss": 1.7168, "step": 119 }, { "epoch": 0.013784389179254494, "grad_norm": 0.3300139605998993, "learning_rate": 0.0001, "loss": 1.7849, "step": 120 }, { "epoch": 0.013899259089081614, "grad_norm": 0.29988691210746765, "learning_rate": 0.0001, "loss": 1.476, "step": 121 }, { "epoch": 0.014014128998908736, "grad_norm": 0.3436170220375061, "learning_rate": 0.0001, "loss": 1.7939, "step": 122 }, { "epoch": 0.014128998908735856, "grad_norm": 0.3263620436191559, "learning_rate": 0.0001, "loss": 1.7402, "step": 123 }, { "epoch": 0.014243868818562978, "grad_norm": 0.3402661383152008, "learning_rate": 0.0001, "loss": 1.7146, "step": 124 }, { "epoch": 0.014358738728390098, "grad_norm": 0.3439052700996399, "learning_rate": 0.0001, "loss": 1.7521, "step": 125 }, { "epoch": 0.01447360863821722, "grad_norm": 0.3327753245830536, "learning_rate": 0.0001, "loss": 1.8139, "step": 126 }, { "epoch": 0.01458847854804434, "grad_norm": 0.3490423560142517, "learning_rate": 0.0001, "loss": 1.9259, "step": 127 }, { "epoch": 0.014703348457871461, "grad_norm": 0.33834782242774963, "learning_rate": 0.0001, "loss": 1.8205, "step": 128 }, { "epoch": 0.014818218367698581, "grad_norm": 0.34202057123184204, "learning_rate": 0.0001, "loss": 1.7071, "step": 129 }, { "epoch": 0.014933088277525703, "grad_norm": 0.31165608763694763, "learning_rate": 0.0001, "loss": 1.7411, "step": 130 }, { "epoch": 0.015047958187352823, "grad_norm": 0.3511159420013428, "learning_rate": 0.0001, "loss": 1.8655, "step": 131 }, { "epoch": 0.015162828097179944, "grad_norm": 0.3437829613685608, "learning_rate": 0.0001, "loss": 1.9517, "step": 132 }, { "epoch": 0.015277698007007064, "grad_norm": 0.3470558822154999, "learning_rate": 0.0001, "loss": 1.6335, "step": 133 }, { "epoch": 0.015392567916834186, "grad_norm": 0.3070822060108185, "learning_rate": 0.0001, "loss": 1.6165, "step": 134 }, { "epoch": 0.015507437826661306, "grad_norm": 0.3400954306125641, "learning_rate": 0.0001, "loss": 1.8433, "step": 135 }, { "epoch": 0.015622307736488426, "grad_norm": 0.37735289335250854, "learning_rate": 0.0001, "loss": 1.742, "step": 136 }, { "epoch": 0.015737177646315548, "grad_norm": 0.33449429273605347, "learning_rate": 0.0001, "loss": 1.5548, "step": 137 }, { "epoch": 0.015852047556142668, "grad_norm": 0.3531495928764343, "learning_rate": 0.0001, "loss": 1.9361, "step": 138 }, { "epoch": 0.015966917465969788, "grad_norm": 0.3543091118335724, "learning_rate": 0.0001, "loss": 1.707, "step": 139 }, { "epoch": 0.01608178737579691, "grad_norm": 0.3521219789981842, "learning_rate": 0.0001, "loss": 1.7885, "step": 140 }, { "epoch": 0.01619665728562403, "grad_norm": 0.3496558368206024, "learning_rate": 0.0001, "loss": 1.6586, "step": 141 }, { "epoch": 0.01631152719545115, "grad_norm": 0.35304608941078186, "learning_rate": 0.0001, "loss": 1.774, "step": 142 }, { "epoch": 0.01642639710527827, "grad_norm": 0.3590501546859741, "learning_rate": 0.0001, "loss": 1.8743, "step": 143 }, { "epoch": 0.016541267015105394, "grad_norm": 0.30004453659057617, "learning_rate": 0.0001, "loss": 1.6433, "step": 144 }, { "epoch": 0.016656136924932514, "grad_norm": 0.3431392014026642, "learning_rate": 0.0001, "loss": 1.7894, "step": 145 }, { "epoch": 0.016771006834759634, "grad_norm": 0.38148149847984314, "learning_rate": 0.0001, "loss": 1.9799, "step": 146 }, { "epoch": 0.016885876744586754, "grad_norm": 0.35888367891311646, "learning_rate": 0.0001, "loss": 2.0782, "step": 147 }, { "epoch": 0.017000746654413878, "grad_norm": 0.3419305980205536, "learning_rate": 0.0001, "loss": 1.8875, "step": 148 }, { "epoch": 0.017115616564240998, "grad_norm": 0.33760684728622437, "learning_rate": 0.0001, "loss": 1.7646, "step": 149 }, { "epoch": 0.017230486474068118, "grad_norm": 0.37631165981292725, "learning_rate": 0.0001, "loss": 1.7915, "step": 150 }, { "epoch": 0.017345356383895238, "grad_norm": 0.3630012273788452, "learning_rate": 0.0001, "loss": 1.9777, "step": 151 }, { "epoch": 0.017460226293722358, "grad_norm": 0.3230808973312378, "learning_rate": 0.0001, "loss": 1.73, "step": 152 }, { "epoch": 0.01757509620354948, "grad_norm": 0.38511770963668823, "learning_rate": 0.0001, "loss": 1.9186, "step": 153 }, { "epoch": 0.0176899661133766, "grad_norm": 0.32792919874191284, "learning_rate": 0.0001, "loss": 1.7209, "step": 154 }, { "epoch": 0.01780483602320372, "grad_norm": 0.3158191442489624, "learning_rate": 0.0001, "loss": 1.6447, "step": 155 }, { "epoch": 0.01791970593303084, "grad_norm": 0.36141133308410645, "learning_rate": 0.0001, "loss": 1.8623, "step": 156 }, { "epoch": 0.018034575842857965, "grad_norm": 0.3470099866390228, "learning_rate": 0.0001, "loss": 1.7607, "step": 157 }, { "epoch": 0.018149445752685085, "grad_norm": 0.31708627939224243, "learning_rate": 0.0001, "loss": 1.8103, "step": 158 }, { "epoch": 0.018264315662512205, "grad_norm": 0.3248085677623749, "learning_rate": 0.0001, "loss": 1.8378, "step": 159 }, { "epoch": 0.018379185572339325, "grad_norm": 0.32634925842285156, "learning_rate": 0.0001, "loss": 1.6819, "step": 160 }, { "epoch": 0.018494055482166448, "grad_norm": 0.33397799730300903, "learning_rate": 0.0001, "loss": 1.5416, "step": 161 }, { "epoch": 0.018608925391993568, "grad_norm": 0.3550772964954376, "learning_rate": 0.0001, "loss": 1.9381, "step": 162 }, { "epoch": 0.018723795301820688, "grad_norm": 0.3159865438938141, "learning_rate": 0.0001, "loss": 1.6508, "step": 163 }, { "epoch": 0.018838665211647808, "grad_norm": 0.3258025348186493, "learning_rate": 0.0001, "loss": 1.6776, "step": 164 }, { "epoch": 0.01895353512147493, "grad_norm": 0.3488035798072815, "learning_rate": 0.0001, "loss": 2.0627, "step": 165 }, { "epoch": 0.01906840503130205, "grad_norm": 0.3233996033668518, "learning_rate": 0.0001, "loss": 1.7189, "step": 166 }, { "epoch": 0.01918327494112917, "grad_norm": 0.34753552079200745, "learning_rate": 0.0001, "loss": 1.9096, "step": 167 }, { "epoch": 0.01929814485095629, "grad_norm": 0.3238770067691803, "learning_rate": 0.0001, "loss": 1.6883, "step": 168 }, { "epoch": 0.01941301476078341, "grad_norm": 0.3520447611808777, "learning_rate": 0.0001, "loss": 1.8742, "step": 169 }, { "epoch": 0.019527884670610535, "grad_norm": 0.35114407539367676, "learning_rate": 0.0001, "loss": 1.9098, "step": 170 }, { "epoch": 0.019642754580437655, "grad_norm": 0.3472774922847748, "learning_rate": 0.0001, "loss": 1.6165, "step": 171 }, { "epoch": 0.019757624490264775, "grad_norm": 0.33552825450897217, "learning_rate": 0.0001, "loss": 1.8656, "step": 172 }, { "epoch": 0.019872494400091895, "grad_norm": 0.3363969027996063, "learning_rate": 0.0001, "loss": 1.8223, "step": 173 }, { "epoch": 0.019987364309919018, "grad_norm": 0.3562895655632019, "learning_rate": 0.0001, "loss": 2.0164, "step": 174 }, { "epoch": 0.020102234219746138, "grad_norm": 0.3177125155925751, "learning_rate": 0.0001, "loss": 1.6151, "step": 175 }, { "epoch": 0.020217104129573258, "grad_norm": 0.33565396070480347, "learning_rate": 0.0001, "loss": 1.8483, "step": 176 }, { "epoch": 0.020331974039400378, "grad_norm": 0.3205985128879547, "learning_rate": 0.0001, "loss": 1.9118, "step": 177 }, { "epoch": 0.0204468439492275, "grad_norm": 0.32828885316848755, "learning_rate": 0.0001, "loss": 1.7084, "step": 178 }, { "epoch": 0.02056171385905462, "grad_norm": 0.3119982182979584, "learning_rate": 0.0001, "loss": 1.6665, "step": 179 }, { "epoch": 0.02067658376888174, "grad_norm": 0.34377679228782654, "learning_rate": 0.0001, "loss": 1.8481, "step": 180 }, { "epoch": 0.02079145367870886, "grad_norm": 0.3235953152179718, "learning_rate": 0.0001, "loss": 1.8979, "step": 181 }, { "epoch": 0.02090632358853598, "grad_norm": 0.35601556301116943, "learning_rate": 0.0001, "loss": 1.8469, "step": 182 }, { "epoch": 0.021021193498363105, "grad_norm": 0.37358132004737854, "learning_rate": 0.0001, "loss": 2.0467, "step": 183 }, { "epoch": 0.021136063408190225, "grad_norm": 0.31928551197052, "learning_rate": 0.0001, "loss": 1.6332, "step": 184 }, { "epoch": 0.021250933318017345, "grad_norm": 0.33927685022354126, "learning_rate": 0.0001, "loss": 1.8849, "step": 185 }, { "epoch": 0.021365803227844465, "grad_norm": 0.3599051237106323, "learning_rate": 0.0001, "loss": 1.9486, "step": 186 }, { "epoch": 0.021480673137671588, "grad_norm": 0.32173585891723633, "learning_rate": 0.0001, "loss": 1.7674, "step": 187 }, { "epoch": 0.021595543047498708, "grad_norm": 0.3629266023635864, "learning_rate": 0.0001, "loss": 1.7596, "step": 188 }, { "epoch": 0.021710412957325828, "grad_norm": 0.31827229261398315, "learning_rate": 0.0001, "loss": 1.6838, "step": 189 }, { "epoch": 0.021825282867152948, "grad_norm": 0.3418801426887512, "learning_rate": 0.0001, "loss": 1.6405, "step": 190 }, { "epoch": 0.02194015277698007, "grad_norm": 0.3373776376247406, "learning_rate": 0.0001, "loss": 1.7593, "step": 191 }, { "epoch": 0.02205502268680719, "grad_norm": 0.32532092928886414, "learning_rate": 0.0001, "loss": 1.8815, "step": 192 }, { "epoch": 0.02216989259663431, "grad_norm": 0.3122539520263672, "learning_rate": 0.0001, "loss": 1.7756, "step": 193 }, { "epoch": 0.02228476250646143, "grad_norm": 0.3185436725616455, "learning_rate": 0.0001, "loss": 1.6333, "step": 194 }, { "epoch": 0.022399632416288555, "grad_norm": 0.3428870439529419, "learning_rate": 0.0001, "loss": 1.7126, "step": 195 }, { "epoch": 0.022514502326115675, "grad_norm": 0.345682293176651, "learning_rate": 0.0001, "loss": 1.7324, "step": 196 }, { "epoch": 0.022629372235942795, "grad_norm": 0.33158332109451294, "learning_rate": 0.0001, "loss": 1.78, "step": 197 }, { "epoch": 0.022744242145769915, "grad_norm": 0.3336940109729767, "learning_rate": 0.0001, "loss": 1.7551, "step": 198 }, { "epoch": 0.022859112055597035, "grad_norm": 0.3249858319759369, "learning_rate": 0.0001, "loss": 1.667, "step": 199 }, { "epoch": 0.022973981965424158, "grad_norm": 0.3251815736293793, "learning_rate": 0.0001, "loss": 1.6737, "step": 200 }, { "epoch": 0.023088851875251278, "grad_norm": 0.3444681763648987, "learning_rate": 0.0001, "loss": 1.6374, "step": 201 }, { "epoch": 0.023203721785078398, "grad_norm": 0.33435139060020447, "learning_rate": 0.0001, "loss": 1.7646, "step": 202 }, { "epoch": 0.023318591694905518, "grad_norm": 0.33999738097190857, "learning_rate": 0.0001, "loss": 1.8351, "step": 203 }, { "epoch": 0.02343346160473264, "grad_norm": 0.33522528409957886, "learning_rate": 0.0001, "loss": 1.5754, "step": 204 }, { "epoch": 0.02354833151455976, "grad_norm": 0.343474805355072, "learning_rate": 0.0001, "loss": 1.7712, "step": 205 }, { "epoch": 0.02366320142438688, "grad_norm": 0.35150644183158875, "learning_rate": 0.0001, "loss": 1.9298, "step": 206 }, { "epoch": 0.023778071334214, "grad_norm": 0.3332938253879547, "learning_rate": 0.0001, "loss": 1.6533, "step": 207 }, { "epoch": 0.023892941244041125, "grad_norm": 0.34478330612182617, "learning_rate": 0.0001, "loss": 1.6317, "step": 208 }, { "epoch": 0.024007811153868245, "grad_norm": 0.36427655816078186, "learning_rate": 0.0001, "loss": 1.7768, "step": 209 }, { "epoch": 0.024122681063695365, "grad_norm": 0.33923599123954773, "learning_rate": 0.0001, "loss": 1.8095, "step": 210 }, { "epoch": 0.024237550973522485, "grad_norm": 0.342052161693573, "learning_rate": 0.0001, "loss": 1.6883, "step": 211 }, { "epoch": 0.024352420883349605, "grad_norm": 0.3708147704601288, "learning_rate": 0.0001, "loss": 1.9331, "step": 212 }, { "epoch": 0.02446729079317673, "grad_norm": 0.3495383858680725, "learning_rate": 0.0001, "loss": 1.8306, "step": 213 }, { "epoch": 0.02458216070300385, "grad_norm": 0.34303730726242065, "learning_rate": 0.0001, "loss": 1.848, "step": 214 }, { "epoch": 0.02469703061283097, "grad_norm": 0.29449400305747986, "learning_rate": 0.0001, "loss": 1.6473, "step": 215 }, { "epoch": 0.024811900522658088, "grad_norm": 0.31254515051841736, "learning_rate": 0.0001, "loss": 1.6091, "step": 216 }, { "epoch": 0.02492677043248521, "grad_norm": 0.3257744014263153, "learning_rate": 0.0001, "loss": 1.5715, "step": 217 }, { "epoch": 0.02504164034231233, "grad_norm": 0.37795010209083557, "learning_rate": 0.0001, "loss": 1.8833, "step": 218 }, { "epoch": 0.02515651025213945, "grad_norm": 0.3558560311794281, "learning_rate": 0.0001, "loss": 1.7896, "step": 219 }, { "epoch": 0.02527138016196657, "grad_norm": 0.3125206530094147, "learning_rate": 0.0001, "loss": 1.7799, "step": 220 }, { "epoch": 0.025386250071793695, "grad_norm": 0.37031280994415283, "learning_rate": 0.0001, "loss": 1.7588, "step": 221 }, { "epoch": 0.025501119981620815, "grad_norm": 0.32977885007858276, "learning_rate": 0.0001, "loss": 1.8772, "step": 222 }, { "epoch": 0.025615989891447935, "grad_norm": 0.3385184407234192, "learning_rate": 0.0001, "loss": 1.7859, "step": 223 }, { "epoch": 0.025730859801275055, "grad_norm": 0.3234337866306305, "learning_rate": 0.0001, "loss": 1.8324, "step": 224 }, { "epoch": 0.02584572971110218, "grad_norm": 0.33667194843292236, "learning_rate": 0.0001, "loss": 1.8861, "step": 225 }, { "epoch": 0.0259605996209293, "grad_norm": 0.3473112881183624, "learning_rate": 0.0001, "loss": 1.8953, "step": 226 }, { "epoch": 0.02607546953075642, "grad_norm": 0.36767181754112244, "learning_rate": 0.0001, "loss": 1.8615, "step": 227 }, { "epoch": 0.02619033944058354, "grad_norm": 0.2963101267814636, "learning_rate": 0.0001, "loss": 1.6676, "step": 228 }, { "epoch": 0.02630520935041066, "grad_norm": 0.35011404752731323, "learning_rate": 0.0001, "loss": 1.9064, "step": 229 }, { "epoch": 0.026420079260237782, "grad_norm": 0.3166825771331787, "learning_rate": 0.0001, "loss": 1.531, "step": 230 }, { "epoch": 0.026534949170064902, "grad_norm": 0.33136868476867676, "learning_rate": 0.0001, "loss": 1.9791, "step": 231 }, { "epoch": 0.026649819079892022, "grad_norm": 0.3448893129825592, "learning_rate": 0.0001, "loss": 1.9482, "step": 232 }, { "epoch": 0.02676468898971914, "grad_norm": 0.33245259523391724, "learning_rate": 0.0001, "loss": 1.6812, "step": 233 }, { "epoch": 0.026879558899546265, "grad_norm": 0.29518410563468933, "learning_rate": 0.0001, "loss": 1.5645, "step": 234 }, { "epoch": 0.026994428809373385, "grad_norm": 0.33720827102661133, "learning_rate": 0.0001, "loss": 1.7339, "step": 235 }, { "epoch": 0.027109298719200505, "grad_norm": 0.3211282789707184, "learning_rate": 0.0001, "loss": 1.7116, "step": 236 }, { "epoch": 0.027224168629027625, "grad_norm": 0.3239465355873108, "learning_rate": 0.0001, "loss": 1.7349, "step": 237 }, { "epoch": 0.02733903853885475, "grad_norm": 0.32968419790267944, "learning_rate": 0.0001, "loss": 1.7973, "step": 238 }, { "epoch": 0.02745390844868187, "grad_norm": 0.3076919615268707, "learning_rate": 0.0001, "loss": 1.7016, "step": 239 }, { "epoch": 0.02756877835850899, "grad_norm": 0.3537105619907379, "learning_rate": 0.0001, "loss": 1.7679, "step": 240 }, { "epoch": 0.02768364826833611, "grad_norm": 0.3480488955974579, "learning_rate": 0.0001, "loss": 1.8703, "step": 241 }, { "epoch": 0.02779851817816323, "grad_norm": 0.3412216901779175, "learning_rate": 0.0001, "loss": 1.6465, "step": 242 }, { "epoch": 0.027913388087990352, "grad_norm": 0.3362070918083191, "learning_rate": 0.0001, "loss": 1.7042, "step": 243 }, { "epoch": 0.028028257997817472, "grad_norm": 0.3288329839706421, "learning_rate": 0.0001, "loss": 1.7347, "step": 244 }, { "epoch": 0.028143127907644592, "grad_norm": 0.33331453800201416, "learning_rate": 0.0001, "loss": 1.7895, "step": 245 }, { "epoch": 0.028257997817471712, "grad_norm": 0.34005260467529297, "learning_rate": 0.0001, "loss": 1.8154, "step": 246 }, { "epoch": 0.028372867727298835, "grad_norm": 0.33353686332702637, "learning_rate": 0.0001, "loss": 1.7256, "step": 247 }, { "epoch": 0.028487737637125955, "grad_norm": 0.30739593505859375, "learning_rate": 0.0001, "loss": 1.779, "step": 248 }, { "epoch": 0.028602607546953075, "grad_norm": 0.333033949136734, "learning_rate": 0.0001, "loss": 1.9319, "step": 249 }, { "epoch": 0.028717477456780195, "grad_norm": 0.3508240878582001, "learning_rate": 0.0001, "loss": 1.993, "step": 250 }, { "epoch": 0.02883234736660732, "grad_norm": 0.32476675510406494, "learning_rate": 0.0001, "loss": 1.8096, "step": 251 }, { "epoch": 0.02894721727643444, "grad_norm": 0.31880107522010803, "learning_rate": 0.0001, "loss": 1.6591, "step": 252 }, { "epoch": 0.02906208718626156, "grad_norm": 0.33040839433670044, "learning_rate": 0.0001, "loss": 1.7228, "step": 253 }, { "epoch": 0.02917695709608868, "grad_norm": 0.3504570424556732, "learning_rate": 0.0001, "loss": 1.9012, "step": 254 }, { "epoch": 0.029291827005915802, "grad_norm": 0.3304044008255005, "learning_rate": 0.0001, "loss": 1.8114, "step": 255 }, { "epoch": 0.029406696915742922, "grad_norm": 0.3044085204601288, "learning_rate": 0.0001, "loss": 1.4707, "step": 256 }, { "epoch": 0.029521566825570042, "grad_norm": 0.314557284116745, "learning_rate": 0.0001, "loss": 1.6985, "step": 257 }, { "epoch": 0.029636436735397162, "grad_norm": 0.3400304317474365, "learning_rate": 0.0001, "loss": 1.7826, "step": 258 }, { "epoch": 0.029751306645224282, "grad_norm": 0.3435501158237457, "learning_rate": 0.0001, "loss": 1.8779, "step": 259 }, { "epoch": 0.029866176555051405, "grad_norm": 0.35256657004356384, "learning_rate": 0.0001, "loss": 1.9058, "step": 260 }, { "epoch": 0.029981046464878525, "grad_norm": 0.32517293095588684, "learning_rate": 0.0001, "loss": 1.7505, "step": 261 }, { "epoch": 0.030095916374705645, "grad_norm": 0.32231926918029785, "learning_rate": 0.0001, "loss": 1.5946, "step": 262 }, { "epoch": 0.030210786284532765, "grad_norm": 0.312837153673172, "learning_rate": 0.0001, "loss": 1.7485, "step": 263 }, { "epoch": 0.03032565619435989, "grad_norm": 0.33711639046669006, "learning_rate": 0.0001, "loss": 1.8586, "step": 264 }, { "epoch": 0.03044052610418701, "grad_norm": 0.33530449867248535, "learning_rate": 0.0001, "loss": 1.5639, "step": 265 }, { "epoch": 0.03055539601401413, "grad_norm": 0.3420933783054352, "learning_rate": 0.0001, "loss": 1.8716, "step": 266 }, { "epoch": 0.03067026592384125, "grad_norm": 0.3286440372467041, "learning_rate": 0.0001, "loss": 1.6937, "step": 267 }, { "epoch": 0.030785135833668372, "grad_norm": 0.331714391708374, "learning_rate": 0.0001, "loss": 1.8212, "step": 268 }, { "epoch": 0.030900005743495492, "grad_norm": 0.32993122935295105, "learning_rate": 0.0001, "loss": 1.6213, "step": 269 }, { "epoch": 0.031014875653322612, "grad_norm": 0.32213708758354187, "learning_rate": 0.0001, "loss": 1.8358, "step": 270 }, { "epoch": 0.031129745563149732, "grad_norm": 0.33745911717414856, "learning_rate": 0.0001, "loss": 1.7545, "step": 271 }, { "epoch": 0.031244615472976852, "grad_norm": 0.32847779989242554, "learning_rate": 0.0001, "loss": 1.7301, "step": 272 }, { "epoch": 0.031359485382803975, "grad_norm": 0.3093877136707306, "learning_rate": 0.0001, "loss": 1.684, "step": 273 }, { "epoch": 0.031474355292631095, "grad_norm": 0.32528170943260193, "learning_rate": 0.0001, "loss": 1.7503, "step": 274 }, { "epoch": 0.031589225202458215, "grad_norm": 0.3486316204071045, "learning_rate": 0.0001, "loss": 1.8535, "step": 275 }, { "epoch": 0.031704095112285335, "grad_norm": 0.3452807068824768, "learning_rate": 0.0001, "loss": 1.791, "step": 276 }, { "epoch": 0.031818965022112455, "grad_norm": 0.3220309615135193, "learning_rate": 0.0001, "loss": 1.7711, "step": 277 }, { "epoch": 0.031933834931939575, "grad_norm": 0.3608686327934265, "learning_rate": 0.0001, "loss": 1.853, "step": 278 }, { "epoch": 0.0320487048417667, "grad_norm": 0.3356455862522125, "learning_rate": 0.0001, "loss": 1.8301, "step": 279 }, { "epoch": 0.03216357475159382, "grad_norm": 0.36832770705223083, "learning_rate": 0.0001, "loss": 1.9413, "step": 280 }, { "epoch": 0.03227844466142094, "grad_norm": 0.31815385818481445, "learning_rate": 0.0001, "loss": 1.7599, "step": 281 }, { "epoch": 0.03239331457124806, "grad_norm": 0.3182365894317627, "learning_rate": 0.0001, "loss": 1.8303, "step": 282 }, { "epoch": 0.03250818448107518, "grad_norm": 0.3294694721698761, "learning_rate": 0.0001, "loss": 1.7363, "step": 283 }, { "epoch": 0.0326230543909023, "grad_norm": 0.3316207826137543, "learning_rate": 0.0001, "loss": 1.8191, "step": 284 }, { "epoch": 0.03273792430072942, "grad_norm": 0.33212903141975403, "learning_rate": 0.0001, "loss": 1.8683, "step": 285 }, { "epoch": 0.03285279421055654, "grad_norm": 0.3195394277572632, "learning_rate": 0.0001, "loss": 1.726, "step": 286 }, { "epoch": 0.03296766412038366, "grad_norm": 0.30052995681762695, "learning_rate": 0.0001, "loss": 1.5014, "step": 287 }, { "epoch": 0.03308253403021079, "grad_norm": 0.34062662720680237, "learning_rate": 0.0001, "loss": 1.9087, "step": 288 }, { "epoch": 0.03319740394003791, "grad_norm": 0.34171825647354126, "learning_rate": 0.0001, "loss": 1.8431, "step": 289 }, { "epoch": 0.03331227384986503, "grad_norm": 0.3276672065258026, "learning_rate": 0.0001, "loss": 1.6594, "step": 290 }, { "epoch": 0.03342714375969215, "grad_norm": 0.36738014221191406, "learning_rate": 0.0001, "loss": 2.1369, "step": 291 }, { "epoch": 0.03354201366951927, "grad_norm": 0.33889520168304443, "learning_rate": 0.0001, "loss": 1.6107, "step": 292 }, { "epoch": 0.03365688357934639, "grad_norm": 0.33499544858932495, "learning_rate": 0.0001, "loss": 1.9176, "step": 293 }, { "epoch": 0.03377175348917351, "grad_norm": 0.32459279894828796, "learning_rate": 0.0001, "loss": 1.7669, "step": 294 }, { "epoch": 0.03388662339900063, "grad_norm": 0.3338513672351837, "learning_rate": 0.0001, "loss": 1.8755, "step": 295 }, { "epoch": 0.034001493308827756, "grad_norm": 0.3502792418003082, "learning_rate": 0.0001, "loss": 1.7341, "step": 296 }, { "epoch": 0.034116363218654876, "grad_norm": 0.34327709674835205, "learning_rate": 0.0001, "loss": 1.8455, "step": 297 }, { "epoch": 0.034231233128481996, "grad_norm": 0.34909358620643616, "learning_rate": 0.0001, "loss": 1.6939, "step": 298 }, { "epoch": 0.034346103038309116, "grad_norm": 0.3473874032497406, "learning_rate": 0.0001, "loss": 1.7983, "step": 299 }, { "epoch": 0.034460972948136236, "grad_norm": 0.3394108712673187, "learning_rate": 0.0001, "loss": 1.6407, "step": 300 }, { "epoch": 0.034575842857963356, "grad_norm": 0.34042516350746155, "learning_rate": 0.0001, "loss": 1.9465, "step": 301 }, { "epoch": 0.034690712767790476, "grad_norm": 0.3370528221130371, "learning_rate": 0.0001, "loss": 1.7753, "step": 302 }, { "epoch": 0.034805582677617596, "grad_norm": 0.32830432057380676, "learning_rate": 0.0001, "loss": 1.9652, "step": 303 }, { "epoch": 0.034920452587444716, "grad_norm": 0.33252227306365967, "learning_rate": 0.0001, "loss": 1.8031, "step": 304 }, { "epoch": 0.03503532249727184, "grad_norm": 0.32804980874061584, "learning_rate": 0.0001, "loss": 1.8358, "step": 305 }, { "epoch": 0.03515019240709896, "grad_norm": 0.29810619354248047, "learning_rate": 0.0001, "loss": 1.6119, "step": 306 }, { "epoch": 0.03526506231692608, "grad_norm": 0.3392501473426819, "learning_rate": 0.0001, "loss": 1.7285, "step": 307 }, { "epoch": 0.0353799322267532, "grad_norm": 0.31986865401268005, "learning_rate": 0.0001, "loss": 1.7034, "step": 308 }, { "epoch": 0.03549480213658032, "grad_norm": 0.33063703775405884, "learning_rate": 0.0001, "loss": 1.7634, "step": 309 }, { "epoch": 0.03560967204640744, "grad_norm": 0.3045586049556732, "learning_rate": 0.0001, "loss": 1.6945, "step": 310 }, { "epoch": 0.03572454195623456, "grad_norm": 0.3407602608203888, "learning_rate": 0.0001, "loss": 1.6488, "step": 311 }, { "epoch": 0.03583941186606168, "grad_norm": 0.3090314567089081, "learning_rate": 0.0001, "loss": 1.6678, "step": 312 }, { "epoch": 0.03595428177588881, "grad_norm": 0.3623706102371216, "learning_rate": 0.0001, "loss": 1.8877, "step": 313 }, { "epoch": 0.03606915168571593, "grad_norm": 0.3450353145599365, "learning_rate": 0.0001, "loss": 1.6494, "step": 314 }, { "epoch": 0.03618402159554305, "grad_norm": 0.3811194598674774, "learning_rate": 0.0001, "loss": 2.0295, "step": 315 }, { "epoch": 0.03629889150537017, "grad_norm": 0.3349141776561737, "learning_rate": 0.0001, "loss": 1.8964, "step": 316 }, { "epoch": 0.03641376141519729, "grad_norm": 0.3339914381504059, "learning_rate": 0.0001, "loss": 1.6882, "step": 317 }, { "epoch": 0.03652863132502441, "grad_norm": 0.32706162333488464, "learning_rate": 0.0001, "loss": 1.6468, "step": 318 }, { "epoch": 0.03664350123485153, "grad_norm": 0.35195019841194153, "learning_rate": 0.0001, "loss": 1.8871, "step": 319 }, { "epoch": 0.03675837114467865, "grad_norm": 0.2981555163860321, "learning_rate": 0.0001, "loss": 1.6054, "step": 320 }, { "epoch": 0.03687324105450577, "grad_norm": 0.3194637894630432, "learning_rate": 0.0001, "loss": 1.7356, "step": 321 }, { "epoch": 0.036988110964332896, "grad_norm": 0.3623133599758148, "learning_rate": 0.0001, "loss": 1.8527, "step": 322 }, { "epoch": 0.037102980874160016, "grad_norm": 0.3471209704875946, "learning_rate": 0.0001, "loss": 1.7521, "step": 323 }, { "epoch": 0.037217850783987136, "grad_norm": 0.35352832078933716, "learning_rate": 0.0001, "loss": 1.7211, "step": 324 }, { "epoch": 0.037332720693814256, "grad_norm": 0.32329848408699036, "learning_rate": 0.0001, "loss": 1.7678, "step": 325 }, { "epoch": 0.037447590603641376, "grad_norm": 0.32614487409591675, "learning_rate": 0.0001, "loss": 1.7791, "step": 326 }, { "epoch": 0.037562460513468496, "grad_norm": 0.3770069479942322, "learning_rate": 0.0001, "loss": 1.7185, "step": 327 }, { "epoch": 0.037677330423295616, "grad_norm": 0.32778894901275635, "learning_rate": 0.0001, "loss": 1.6565, "step": 328 }, { "epoch": 0.037792200333122736, "grad_norm": 0.37443724274635315, "learning_rate": 0.0001, "loss": 2.0193, "step": 329 }, { "epoch": 0.03790707024294986, "grad_norm": 0.3752276599407196, "learning_rate": 0.0001, "loss": 1.9978, "step": 330 }, { "epoch": 0.03802194015277698, "grad_norm": 0.33359140157699585, "learning_rate": 0.0001, "loss": 1.7836, "step": 331 }, { "epoch": 0.0381368100626041, "grad_norm": 0.3710048794746399, "learning_rate": 0.0001, "loss": 1.9855, "step": 332 }, { "epoch": 0.03825167997243122, "grad_norm": 0.3335336446762085, "learning_rate": 0.0001, "loss": 1.7433, "step": 333 }, { "epoch": 0.03836654988225834, "grad_norm": 0.33409109711647034, "learning_rate": 0.0001, "loss": 1.7517, "step": 334 }, { "epoch": 0.03848141979208546, "grad_norm": 0.3645201623439789, "learning_rate": 0.0001, "loss": 1.8764, "step": 335 }, { "epoch": 0.03859628970191258, "grad_norm": 0.33294662833213806, "learning_rate": 0.0001, "loss": 1.867, "step": 336 }, { "epoch": 0.0387111596117397, "grad_norm": 0.32880741357803345, "learning_rate": 0.0001, "loss": 1.8049, "step": 337 }, { "epoch": 0.03882602952156682, "grad_norm": 0.353669136762619, "learning_rate": 0.0001, "loss": 1.7027, "step": 338 }, { "epoch": 0.03894089943139395, "grad_norm": 0.3438865840435028, "learning_rate": 0.0001, "loss": 1.6894, "step": 339 }, { "epoch": 0.03905576934122107, "grad_norm": 0.3039886951446533, "learning_rate": 0.0001, "loss": 1.7142, "step": 340 }, { "epoch": 0.03917063925104819, "grad_norm": 0.34314149618148804, "learning_rate": 0.0001, "loss": 1.7391, "step": 341 }, { "epoch": 0.03928550916087531, "grad_norm": 0.3602879047393799, "learning_rate": 0.0001, "loss": 1.8054, "step": 342 }, { "epoch": 0.03940037907070243, "grad_norm": 0.33207011222839355, "learning_rate": 0.0001, "loss": 1.7549, "step": 343 }, { "epoch": 0.03951524898052955, "grad_norm": 0.36213555932044983, "learning_rate": 0.0001, "loss": 1.8795, "step": 344 }, { "epoch": 0.03963011889035667, "grad_norm": 0.37275567650794983, "learning_rate": 0.0001, "loss": 1.7072, "step": 345 }, { "epoch": 0.03974498880018379, "grad_norm": 0.3421449661254883, "learning_rate": 0.0001, "loss": 1.7917, "step": 346 }, { "epoch": 0.03985985871001091, "grad_norm": 0.33920395374298096, "learning_rate": 0.0001, "loss": 1.8476, "step": 347 }, { "epoch": 0.039974728619838036, "grad_norm": 0.3364729881286621, "learning_rate": 0.0001, "loss": 1.8673, "step": 348 }, { "epoch": 0.040089598529665156, "grad_norm": 0.3258533179759979, "learning_rate": 0.0001, "loss": 1.5958, "step": 349 }, { "epoch": 0.040204468439492276, "grad_norm": 0.31070706248283386, "learning_rate": 0.0001, "loss": 1.482, "step": 350 }, { "epoch": 0.040319338349319396, "grad_norm": 0.3752513527870178, "learning_rate": 0.0001, "loss": 1.9127, "step": 351 }, { "epoch": 0.040434208259146516, "grad_norm": 0.34795695543289185, "learning_rate": 0.0001, "loss": 1.7637, "step": 352 }, { "epoch": 0.040549078168973636, "grad_norm": 0.3222607374191284, "learning_rate": 0.0001, "loss": 1.8052, "step": 353 }, { "epoch": 0.040663948078800756, "grad_norm": 0.3587421476840973, "learning_rate": 0.0001, "loss": 1.8354, "step": 354 }, { "epoch": 0.040778817988627876, "grad_norm": 0.3381020724773407, "learning_rate": 0.0001, "loss": 1.9156, "step": 355 }, { "epoch": 0.040893687898455, "grad_norm": 0.3503625690937042, "learning_rate": 0.0001, "loss": 1.8307, "step": 356 }, { "epoch": 0.04100855780828212, "grad_norm": 0.34590432047843933, "learning_rate": 0.0001, "loss": 1.7548, "step": 357 }, { "epoch": 0.04112342771810924, "grad_norm": 0.38469988107681274, "learning_rate": 0.0001, "loss": 1.8686, "step": 358 }, { "epoch": 0.04123829762793636, "grad_norm": 0.34001877903938293, "learning_rate": 0.0001, "loss": 1.5505, "step": 359 }, { "epoch": 0.04135316753776348, "grad_norm": 0.35807985067367554, "learning_rate": 0.0001, "loss": 1.8079, "step": 360 }, { "epoch": 0.0414680374475906, "grad_norm": 0.3272436857223511, "learning_rate": 0.0001, "loss": 1.8085, "step": 361 }, { "epoch": 0.04158290735741772, "grad_norm": 0.31507229804992676, "learning_rate": 0.0001, "loss": 1.6678, "step": 362 }, { "epoch": 0.04169777726724484, "grad_norm": 0.3048648238182068, "learning_rate": 0.0001, "loss": 1.5551, "step": 363 }, { "epoch": 0.04181264717707196, "grad_norm": 0.3657607436180115, "learning_rate": 0.0001, "loss": 1.7309, "step": 364 }, { "epoch": 0.04192751708689909, "grad_norm": 0.3520914316177368, "learning_rate": 0.0001, "loss": 1.746, "step": 365 }, { "epoch": 0.04204238699672621, "grad_norm": 0.3329162299633026, "learning_rate": 0.0001, "loss": 1.7818, "step": 366 }, { "epoch": 0.04215725690655333, "grad_norm": 0.342009574174881, "learning_rate": 0.0001, "loss": 1.6875, "step": 367 }, { "epoch": 0.04227212681638045, "grad_norm": 0.3568592667579651, "learning_rate": 0.0001, "loss": 1.8474, "step": 368 }, { "epoch": 0.04238699672620757, "grad_norm": 0.32136744260787964, "learning_rate": 0.0001, "loss": 1.7908, "step": 369 }, { "epoch": 0.04250186663603469, "grad_norm": 0.34790685772895813, "learning_rate": 0.0001, "loss": 1.8666, "step": 370 }, { "epoch": 0.04261673654586181, "grad_norm": 0.3257257044315338, "learning_rate": 0.0001, "loss": 1.8399, "step": 371 }, { "epoch": 0.04273160645568893, "grad_norm": 0.319909006357193, "learning_rate": 0.0001, "loss": 1.5395, "step": 372 }, { "epoch": 0.042846476365516056, "grad_norm": 0.34518831968307495, "learning_rate": 0.0001, "loss": 1.721, "step": 373 }, { "epoch": 0.042961346275343176, "grad_norm": 0.350404292345047, "learning_rate": 0.0001, "loss": 1.7933, "step": 374 }, { "epoch": 0.043076216185170296, "grad_norm": 0.3150002956390381, "learning_rate": 0.0001, "loss": 1.4682, "step": 375 }, { "epoch": 0.043191086094997416, "grad_norm": 0.3526088297367096, "learning_rate": 0.0001, "loss": 1.7383, "step": 376 }, { "epoch": 0.043305956004824536, "grad_norm": 0.3494514226913452, "learning_rate": 0.0001, "loss": 1.7906, "step": 377 }, { "epoch": 0.043420825914651656, "grad_norm": 0.3323955833911896, "learning_rate": 0.0001, "loss": 1.8454, "step": 378 }, { "epoch": 0.043535695824478776, "grad_norm": 0.31533968448638916, "learning_rate": 0.0001, "loss": 1.538, "step": 379 }, { "epoch": 0.043650565734305896, "grad_norm": 0.3485229015350342, "learning_rate": 0.0001, "loss": 1.704, "step": 380 }, { "epoch": 0.043765435644133016, "grad_norm": 0.3400436043739319, "learning_rate": 0.0001, "loss": 1.861, "step": 381 }, { "epoch": 0.04388030555396014, "grad_norm": 0.32537248730659485, "learning_rate": 0.0001, "loss": 1.926, "step": 382 }, { "epoch": 0.04399517546378726, "grad_norm": 0.35941800475120544, "learning_rate": 0.0001, "loss": 1.8136, "step": 383 }, { "epoch": 0.04411004537361438, "grad_norm": 0.3239869773387909, "learning_rate": 0.0001, "loss": 1.5941, "step": 384 }, { "epoch": 0.0442249152834415, "grad_norm": 0.34292441606521606, "learning_rate": 0.0001, "loss": 2.0001, "step": 385 }, { "epoch": 0.04433978519326862, "grad_norm": 0.35059481859207153, "learning_rate": 0.0001, "loss": 1.8536, "step": 386 }, { "epoch": 0.04445465510309574, "grad_norm": 0.35030195116996765, "learning_rate": 0.0001, "loss": 1.7471, "step": 387 }, { "epoch": 0.04456952501292286, "grad_norm": 0.34561511874198914, "learning_rate": 0.0001, "loss": 1.6975, "step": 388 }, { "epoch": 0.04468439492274998, "grad_norm": 0.3485510051250458, "learning_rate": 0.0001, "loss": 1.7986, "step": 389 }, { "epoch": 0.04479926483257711, "grad_norm": 0.3528231978416443, "learning_rate": 0.0001, "loss": 1.7736, "step": 390 }, { "epoch": 0.04491413474240423, "grad_norm": 0.3497932255268097, "learning_rate": 0.0001, "loss": 1.8213, "step": 391 }, { "epoch": 0.04502900465223135, "grad_norm": 0.34640979766845703, "learning_rate": 0.0001, "loss": 2.0511, "step": 392 }, { "epoch": 0.04514387456205847, "grad_norm": 0.3179776668548584, "learning_rate": 0.0001, "loss": 1.6705, "step": 393 }, { "epoch": 0.04525874447188559, "grad_norm": 0.3492574691772461, "learning_rate": 0.0001, "loss": 1.7829, "step": 394 }, { "epoch": 0.04537361438171271, "grad_norm": 0.32157397270202637, "learning_rate": 0.0001, "loss": 1.8084, "step": 395 }, { "epoch": 0.04548848429153983, "grad_norm": 0.32758328318595886, "learning_rate": 0.0001, "loss": 1.6538, "step": 396 }, { "epoch": 0.04560335420136695, "grad_norm": 0.34034964442253113, "learning_rate": 0.0001, "loss": 1.6549, "step": 397 }, { "epoch": 0.04571822411119407, "grad_norm": 0.3422950506210327, "learning_rate": 0.0001, "loss": 1.9178, "step": 398 }, { "epoch": 0.045833094021021197, "grad_norm": 0.3140711784362793, "learning_rate": 0.0001, "loss": 1.6569, "step": 399 }, { "epoch": 0.045947963930848316, "grad_norm": 0.33444762229919434, "learning_rate": 0.0001, "loss": 1.7443, "step": 400 }, { "epoch": 0.046062833840675436, "grad_norm": 0.3292236924171448, "learning_rate": 0.0001, "loss": 1.786, "step": 401 }, { "epoch": 0.046177703750502556, "grad_norm": 0.3406231999397278, "learning_rate": 0.0001, "loss": 1.8477, "step": 402 }, { "epoch": 0.046292573660329676, "grad_norm": 0.35850241780281067, "learning_rate": 0.0001, "loss": 1.9219, "step": 403 }, { "epoch": 0.046407443570156796, "grad_norm": 0.39436420798301697, "learning_rate": 0.0001, "loss": 1.9564, "step": 404 }, { "epoch": 0.046522313479983916, "grad_norm": 0.3540903627872467, "learning_rate": 0.0001, "loss": 1.6521, "step": 405 }, { "epoch": 0.046637183389811036, "grad_norm": 0.4025273323059082, "learning_rate": 0.0001, "loss": 1.7531, "step": 406 }, { "epoch": 0.04675205329963816, "grad_norm": 0.3019482493400574, "learning_rate": 0.0001, "loss": 1.5366, "step": 407 }, { "epoch": 0.04686692320946528, "grad_norm": 0.3409338593482971, "learning_rate": 0.0001, "loss": 1.8024, "step": 408 }, { "epoch": 0.0469817931192924, "grad_norm": 0.3111829161643982, "learning_rate": 0.0001, "loss": 1.486, "step": 409 }, { "epoch": 0.04709666302911952, "grad_norm": 0.38814571499824524, "learning_rate": 0.0001, "loss": 2.0234, "step": 410 }, { "epoch": 0.04721153293894664, "grad_norm": 0.3604361116886139, "learning_rate": 0.0001, "loss": 1.7389, "step": 411 }, { "epoch": 0.04732640284877376, "grad_norm": 0.36267879605293274, "learning_rate": 0.0001, "loss": 1.9424, "step": 412 }, { "epoch": 0.04744127275860088, "grad_norm": 0.3534083366394043, "learning_rate": 0.0001, "loss": 1.9132, "step": 413 }, { "epoch": 0.047556142668428, "grad_norm": 0.31778329610824585, "learning_rate": 0.0001, "loss": 1.7196, "step": 414 }, { "epoch": 0.04767101257825512, "grad_norm": 0.3307456970214844, "learning_rate": 0.0001, "loss": 1.7706, "step": 415 }, { "epoch": 0.04778588248808225, "grad_norm": 0.33666694164276123, "learning_rate": 0.0001, "loss": 1.8634, "step": 416 }, { "epoch": 0.04790075239790937, "grad_norm": 0.352847158908844, "learning_rate": 0.0001, "loss": 1.8828, "step": 417 }, { "epoch": 0.04801562230773649, "grad_norm": 0.33423542976379395, "learning_rate": 0.0001, "loss": 1.9455, "step": 418 }, { "epoch": 0.04813049221756361, "grad_norm": 0.35909709334373474, "learning_rate": 0.0001, "loss": 1.8293, "step": 419 }, { "epoch": 0.04824536212739073, "grad_norm": 0.35591983795166016, "learning_rate": 0.0001, "loss": 1.8619, "step": 420 }, { "epoch": 0.04836023203721785, "grad_norm": 0.33195993304252625, "learning_rate": 0.0001, "loss": 1.6672, "step": 421 }, { "epoch": 0.04847510194704497, "grad_norm": 0.3257507383823395, "learning_rate": 0.0001, "loss": 1.7567, "step": 422 }, { "epoch": 0.04858997185687209, "grad_norm": 0.31000784039497375, "learning_rate": 0.0001, "loss": 1.7718, "step": 423 }, { "epoch": 0.04870484176669921, "grad_norm": 0.3311057686805725, "learning_rate": 0.0001, "loss": 1.8224, "step": 424 }, { "epoch": 0.04881971167652634, "grad_norm": 0.32574066519737244, "learning_rate": 0.0001, "loss": 1.7199, "step": 425 }, { "epoch": 0.04893458158635346, "grad_norm": 0.30064114928245544, "learning_rate": 0.0001, "loss": 1.6134, "step": 426 }, { "epoch": 0.04904945149618058, "grad_norm": 0.331144779920578, "learning_rate": 0.0001, "loss": 1.8779, "step": 427 }, { "epoch": 0.0491643214060077, "grad_norm": 0.3030913472175598, "learning_rate": 0.0001, "loss": 1.5864, "step": 428 }, { "epoch": 0.04927919131583482, "grad_norm": 0.3529425263404846, "learning_rate": 0.0001, "loss": 1.743, "step": 429 }, { "epoch": 0.04939406122566194, "grad_norm": 0.32856279611587524, "learning_rate": 0.0001, "loss": 1.6406, "step": 430 }, { "epoch": 0.04950893113548906, "grad_norm": 0.32099583745002747, "learning_rate": 0.0001, "loss": 1.6706, "step": 431 }, { "epoch": 0.049623801045316177, "grad_norm": 0.3427245616912842, "learning_rate": 0.0001, "loss": 1.8355, "step": 432 }, { "epoch": 0.0497386709551433, "grad_norm": 0.34210020303726196, "learning_rate": 0.0001, "loss": 1.7778, "step": 433 }, { "epoch": 0.04985354086497042, "grad_norm": 0.3216181695461273, "learning_rate": 0.0001, "loss": 1.8209, "step": 434 }, { "epoch": 0.04996841077479754, "grad_norm": 0.33420485258102417, "learning_rate": 0.0001, "loss": 1.8087, "step": 435 }, { "epoch": 0.05008328068462466, "grad_norm": 0.3496338725090027, "learning_rate": 0.0001, "loss": 1.737, "step": 436 }, { "epoch": 0.05019815059445178, "grad_norm": 0.3511291444301605, "learning_rate": 0.0001, "loss": 2.0355, "step": 437 }, { "epoch": 0.0503130205042789, "grad_norm": 0.34182974696159363, "learning_rate": 0.0001, "loss": 1.7236, "step": 438 }, { "epoch": 0.05042789041410602, "grad_norm": 0.3205072581768036, "learning_rate": 0.0001, "loss": 1.7691, "step": 439 }, { "epoch": 0.05054276032393314, "grad_norm": 0.3407754600048065, "learning_rate": 0.0001, "loss": 1.9003, "step": 440 }, { "epoch": 0.05065763023376026, "grad_norm": 0.31289535760879517, "learning_rate": 0.0001, "loss": 1.6923, "step": 441 }, { "epoch": 0.05077250014358739, "grad_norm": 0.2934773564338684, "learning_rate": 0.0001, "loss": 1.5488, "step": 442 }, { "epoch": 0.05088737005341451, "grad_norm": 0.3349979817867279, "learning_rate": 0.0001, "loss": 1.6209, "step": 443 }, { "epoch": 0.05100223996324163, "grad_norm": 0.3626347482204437, "learning_rate": 0.0001, "loss": 1.8299, "step": 444 }, { "epoch": 0.05111710987306875, "grad_norm": 0.3510279059410095, "learning_rate": 0.0001, "loss": 1.6544, "step": 445 }, { "epoch": 0.05123197978289587, "grad_norm": 0.3967926502227783, "learning_rate": 0.0001, "loss": 2.0913, "step": 446 }, { "epoch": 0.05134684969272299, "grad_norm": 0.31338948011398315, "learning_rate": 0.0001, "loss": 1.5693, "step": 447 }, { "epoch": 0.05146171960255011, "grad_norm": 0.32318705320358276, "learning_rate": 0.0001, "loss": 1.5317, "step": 448 }, { "epoch": 0.05157658951237723, "grad_norm": 0.31790030002593994, "learning_rate": 0.0001, "loss": 1.6432, "step": 449 }, { "epoch": 0.05169145942220436, "grad_norm": 0.3432879149913788, "learning_rate": 0.0001, "loss": 1.6502, "step": 450 }, { "epoch": 0.05180632933203148, "grad_norm": 0.38444429636001587, "learning_rate": 0.0001, "loss": 1.844, "step": 451 }, { "epoch": 0.0519211992418586, "grad_norm": 0.33826589584350586, "learning_rate": 0.0001, "loss": 1.7821, "step": 452 }, { "epoch": 0.05203606915168572, "grad_norm": 0.340986967086792, "learning_rate": 0.0001, "loss": 1.7472, "step": 453 }, { "epoch": 0.05215093906151284, "grad_norm": 0.34421804547309875, "learning_rate": 0.0001, "loss": 1.7193, "step": 454 }, { "epoch": 0.05226580897133996, "grad_norm": 0.33938467502593994, "learning_rate": 0.0001, "loss": 1.8119, "step": 455 }, { "epoch": 0.05238067888116708, "grad_norm": 0.3173467516899109, "learning_rate": 0.0001, "loss": 1.6541, "step": 456 }, { "epoch": 0.0524955487909942, "grad_norm": 0.32488059997558594, "learning_rate": 0.0001, "loss": 1.5981, "step": 457 }, { "epoch": 0.05261041870082132, "grad_norm": 0.35664987564086914, "learning_rate": 0.0001, "loss": 1.539, "step": 458 }, { "epoch": 0.052725288610648444, "grad_norm": 0.3289016783237457, "learning_rate": 0.0001, "loss": 1.6183, "step": 459 }, { "epoch": 0.052840158520475564, "grad_norm": 0.3202899992465973, "learning_rate": 0.0001, "loss": 1.6696, "step": 460 }, { "epoch": 0.052955028430302684, "grad_norm": 0.34299010038375854, "learning_rate": 0.0001, "loss": 1.8016, "step": 461 }, { "epoch": 0.053069898340129804, "grad_norm": 0.34644559025764465, "learning_rate": 0.0001, "loss": 1.7947, "step": 462 }, { "epoch": 0.053184768249956924, "grad_norm": 0.32393816113471985, "learning_rate": 0.0001, "loss": 1.7503, "step": 463 }, { "epoch": 0.053299638159784044, "grad_norm": 0.36531350016593933, "learning_rate": 0.0001, "loss": 1.6858, "step": 464 }, { "epoch": 0.053414508069611163, "grad_norm": 0.29397326707839966, "learning_rate": 0.0001, "loss": 1.6449, "step": 465 }, { "epoch": 0.05352937797943828, "grad_norm": 0.3278639614582062, "learning_rate": 0.0001, "loss": 1.8286, "step": 466 }, { "epoch": 0.05364424788926541, "grad_norm": 0.3598267734050751, "learning_rate": 0.0001, "loss": 1.6739, "step": 467 }, { "epoch": 0.05375911779909253, "grad_norm": 0.36339178681373596, "learning_rate": 0.0001, "loss": 1.9749, "step": 468 }, { "epoch": 0.05387398770891965, "grad_norm": 0.31908461451530457, "learning_rate": 0.0001, "loss": 1.6836, "step": 469 }, { "epoch": 0.05398885761874677, "grad_norm": 0.3400243818759918, "learning_rate": 0.0001, "loss": 1.7625, "step": 470 }, { "epoch": 0.05410372752857389, "grad_norm": 0.3101711869239807, "learning_rate": 0.0001, "loss": 1.5419, "step": 471 }, { "epoch": 0.05421859743840101, "grad_norm": 0.33695968985557556, "learning_rate": 0.0001, "loss": 1.8077, "step": 472 }, { "epoch": 0.05433346734822813, "grad_norm": 0.3501220941543579, "learning_rate": 0.0001, "loss": 1.8684, "step": 473 }, { "epoch": 0.05444833725805525, "grad_norm": 0.3185228705406189, "learning_rate": 0.0001, "loss": 1.7575, "step": 474 }, { "epoch": 0.05456320716788237, "grad_norm": 0.33037081360816956, "learning_rate": 0.0001, "loss": 1.8286, "step": 475 }, { "epoch": 0.0546780770777095, "grad_norm": 0.3522806763648987, "learning_rate": 0.0001, "loss": 1.8658, "step": 476 }, { "epoch": 0.05479294698753662, "grad_norm": 0.35064372420310974, "learning_rate": 0.0001, "loss": 1.7537, "step": 477 }, { "epoch": 0.05490781689736374, "grad_norm": 0.3410029411315918, "learning_rate": 0.0001, "loss": 1.8334, "step": 478 }, { "epoch": 0.05502268680719086, "grad_norm": 0.34402716159820557, "learning_rate": 0.0001, "loss": 1.8506, "step": 479 }, { "epoch": 0.05513755671701798, "grad_norm": 0.34367635846138, "learning_rate": 0.0001, "loss": 1.832, "step": 480 }, { "epoch": 0.0552524266268451, "grad_norm": 0.33249610662460327, "learning_rate": 0.0001, "loss": 1.7356, "step": 481 }, { "epoch": 0.05536729653667222, "grad_norm": 0.33778467774391174, "learning_rate": 0.0001, "loss": 1.6529, "step": 482 }, { "epoch": 0.05548216644649934, "grad_norm": 0.31293541193008423, "learning_rate": 0.0001, "loss": 1.5809, "step": 483 }, { "epoch": 0.05559703635632646, "grad_norm": 0.3448321521282196, "learning_rate": 0.0001, "loss": 1.8017, "step": 484 }, { "epoch": 0.055711906266153584, "grad_norm": 0.36519747972488403, "learning_rate": 0.0001, "loss": 2.0294, "step": 485 }, { "epoch": 0.055826776175980704, "grad_norm": 0.36726245284080505, "learning_rate": 0.0001, "loss": 1.9747, "step": 486 }, { "epoch": 0.055941646085807824, "grad_norm": 0.35360264778137207, "learning_rate": 0.0001, "loss": 1.6291, "step": 487 }, { "epoch": 0.056056515995634944, "grad_norm": 0.31348568201065063, "learning_rate": 0.0001, "loss": 1.5357, "step": 488 }, { "epoch": 0.056171385905462064, "grad_norm": 0.3481610119342804, "learning_rate": 0.0001, "loss": 1.6213, "step": 489 }, { "epoch": 0.056286255815289184, "grad_norm": 0.30713188648223877, "learning_rate": 0.0001, "loss": 1.5417, "step": 490 }, { "epoch": 0.056401125725116304, "grad_norm": 0.33684420585632324, "learning_rate": 0.0001, "loss": 1.7113, "step": 491 }, { "epoch": 0.056515995634943424, "grad_norm": 0.36983436346054077, "learning_rate": 0.0001, "loss": 1.8795, "step": 492 }, { "epoch": 0.05663086554477055, "grad_norm": 0.32992857694625854, "learning_rate": 0.0001, "loss": 1.8728, "step": 493 }, { "epoch": 0.05674573545459767, "grad_norm": 0.3567913770675659, "learning_rate": 0.0001, "loss": 1.7194, "step": 494 }, { "epoch": 0.05686060536442479, "grad_norm": 0.3176042437553406, "learning_rate": 0.0001, "loss": 1.6712, "step": 495 }, { "epoch": 0.05697547527425191, "grad_norm": 0.30702850222587585, "learning_rate": 0.0001, "loss": 1.6685, "step": 496 }, { "epoch": 0.05709034518407903, "grad_norm": 0.33594316244125366, "learning_rate": 0.0001, "loss": 1.7385, "step": 497 }, { "epoch": 0.05720521509390615, "grad_norm": 0.3079899549484253, "learning_rate": 0.0001, "loss": 1.6913, "step": 498 }, { "epoch": 0.05732008500373327, "grad_norm": 0.3289260268211365, "learning_rate": 0.0001, "loss": 1.7654, "step": 499 }, { "epoch": 0.05743495491356039, "grad_norm": 0.33289793133735657, "learning_rate": 0.0001, "loss": 1.7681, "step": 500 }, { "epoch": 0.05754982482338751, "grad_norm": 0.34192466735839844, "learning_rate": 0.0001, "loss": 1.753, "step": 501 }, { "epoch": 0.05766469473321464, "grad_norm": 0.40127864480018616, "learning_rate": 0.0001, "loss": 2.114, "step": 502 }, { "epoch": 0.05777956464304176, "grad_norm": 0.3597794473171234, "learning_rate": 0.0001, "loss": 1.8872, "step": 503 }, { "epoch": 0.05789443455286888, "grad_norm": 0.3503691256046295, "learning_rate": 0.0001, "loss": 1.764, "step": 504 }, { "epoch": 0.058009304462696, "grad_norm": 0.33984240889549255, "learning_rate": 0.0001, "loss": 1.6724, "step": 505 }, { "epoch": 0.05812417437252312, "grad_norm": 0.3279857337474823, "learning_rate": 0.0001, "loss": 1.6906, "step": 506 }, { "epoch": 0.05823904428235024, "grad_norm": 0.3238902986049652, "learning_rate": 0.0001, "loss": 1.5953, "step": 507 }, { "epoch": 0.05835391419217736, "grad_norm": 0.35532209277153015, "learning_rate": 0.0001, "loss": 1.8577, "step": 508 }, { "epoch": 0.05846878410200448, "grad_norm": 0.3488325774669647, "learning_rate": 0.0001, "loss": 1.7485, "step": 509 }, { "epoch": 0.058583654011831604, "grad_norm": 0.32330816984176636, "learning_rate": 0.0001, "loss": 1.5808, "step": 510 }, { "epoch": 0.058698523921658724, "grad_norm": 0.32216721773147583, "learning_rate": 0.0001, "loss": 1.6325, "step": 511 }, { "epoch": 0.058813393831485844, "grad_norm": 0.3667677640914917, "learning_rate": 0.0001, "loss": 1.916, "step": 512 }, { "epoch": 0.058928263741312964, "grad_norm": 0.3243674635887146, "learning_rate": 0.0001, "loss": 1.6907, "step": 513 }, { "epoch": 0.059043133651140084, "grad_norm": 0.3738958537578583, "learning_rate": 0.0001, "loss": 2.0036, "step": 514 }, { "epoch": 0.059158003560967204, "grad_norm": 0.33784958720207214, "learning_rate": 0.0001, "loss": 1.8144, "step": 515 }, { "epoch": 0.059272873470794324, "grad_norm": 0.34589648246765137, "learning_rate": 0.0001, "loss": 1.7907, "step": 516 }, { "epoch": 0.059387743380621444, "grad_norm": 0.3396931290626526, "learning_rate": 0.0001, "loss": 1.8062, "step": 517 }, { "epoch": 0.059502613290448564, "grad_norm": 0.33430787920951843, "learning_rate": 0.0001, "loss": 1.8861, "step": 518 }, { "epoch": 0.05961748320027569, "grad_norm": 0.33483850955963135, "learning_rate": 0.0001, "loss": 1.7733, "step": 519 }, { "epoch": 0.05973235311010281, "grad_norm": 0.3303142488002777, "learning_rate": 0.0001, "loss": 1.6669, "step": 520 }, { "epoch": 0.05984722301992993, "grad_norm": 0.38823017477989197, "learning_rate": 0.0001, "loss": 1.9658, "step": 521 }, { "epoch": 0.05996209292975705, "grad_norm": 0.3367463946342468, "learning_rate": 0.0001, "loss": 1.7768, "step": 522 }, { "epoch": 0.06007696283958417, "grad_norm": 0.3448854684829712, "learning_rate": 0.0001, "loss": 1.8997, "step": 523 }, { "epoch": 0.06019183274941129, "grad_norm": 0.33760958909988403, "learning_rate": 0.0001, "loss": 1.7803, "step": 524 }, { "epoch": 0.06030670265923841, "grad_norm": 0.3285827934741974, "learning_rate": 0.0001, "loss": 1.7758, "step": 525 }, { "epoch": 0.06042157256906553, "grad_norm": 0.32994288206100464, "learning_rate": 0.0001, "loss": 1.7558, "step": 526 }, { "epoch": 0.06053644247889266, "grad_norm": 0.31827473640441895, "learning_rate": 0.0001, "loss": 1.6905, "step": 527 }, { "epoch": 0.06065131238871978, "grad_norm": 0.35536283254623413, "learning_rate": 0.0001, "loss": 1.8389, "step": 528 }, { "epoch": 0.0607661822985469, "grad_norm": 0.34808462858200073, "learning_rate": 0.0001, "loss": 1.7249, "step": 529 }, { "epoch": 0.06088105220837402, "grad_norm": 0.32803285121917725, "learning_rate": 0.0001, "loss": 1.7431, "step": 530 }, { "epoch": 0.06099592211820114, "grad_norm": 0.3689769506454468, "learning_rate": 0.0001, "loss": 1.95, "step": 531 }, { "epoch": 0.06111079202802826, "grad_norm": 0.324849396944046, "learning_rate": 0.0001, "loss": 1.6858, "step": 532 }, { "epoch": 0.06122566193785538, "grad_norm": 0.367699533700943, "learning_rate": 0.0001, "loss": 1.9701, "step": 533 }, { "epoch": 0.0613405318476825, "grad_norm": 0.346123069524765, "learning_rate": 0.0001, "loss": 1.7326, "step": 534 }, { "epoch": 0.06145540175750962, "grad_norm": 0.33581048250198364, "learning_rate": 0.0001, "loss": 1.7353, "step": 535 }, { "epoch": 0.061570271667336744, "grad_norm": 0.31508779525756836, "learning_rate": 0.0001, "loss": 1.6533, "step": 536 }, { "epoch": 0.061685141577163864, "grad_norm": 0.31980377435684204, "learning_rate": 0.0001, "loss": 1.7605, "step": 537 }, { "epoch": 0.061800011486990984, "grad_norm": 0.324613481760025, "learning_rate": 0.0001, "loss": 1.6779, "step": 538 }, { "epoch": 0.061914881396818104, "grad_norm": 0.36573949456214905, "learning_rate": 0.0001, "loss": 1.8471, "step": 539 }, { "epoch": 0.062029751306645224, "grad_norm": 0.34220999479293823, "learning_rate": 0.0001, "loss": 1.8383, "step": 540 }, { "epoch": 0.062144621216472344, "grad_norm": 0.3276033103466034, "learning_rate": 0.0001, "loss": 1.6503, "step": 541 }, { "epoch": 0.062259491126299464, "grad_norm": 0.320403516292572, "learning_rate": 0.0001, "loss": 1.4869, "step": 542 }, { "epoch": 0.062374361036126584, "grad_norm": 0.333486407995224, "learning_rate": 0.0001, "loss": 1.6474, "step": 543 }, { "epoch": 0.062489230945953704, "grad_norm": 0.3390301764011383, "learning_rate": 0.0001, "loss": 1.6722, "step": 544 }, { "epoch": 0.06260410085578083, "grad_norm": 0.3051248788833618, "learning_rate": 0.0001, "loss": 1.632, "step": 545 }, { "epoch": 0.06271897076560795, "grad_norm": 0.4015922248363495, "learning_rate": 0.0001, "loss": 1.9027, "step": 546 }, { "epoch": 0.06283384067543507, "grad_norm": 0.35976481437683105, "learning_rate": 0.0001, "loss": 1.8451, "step": 547 }, { "epoch": 0.06294871058526219, "grad_norm": 0.3470692038536072, "learning_rate": 0.0001, "loss": 1.7992, "step": 548 }, { "epoch": 0.06306358049508931, "grad_norm": 0.324569433927536, "learning_rate": 0.0001, "loss": 1.7567, "step": 549 }, { "epoch": 0.06317845040491643, "grad_norm": 0.36248597502708435, "learning_rate": 0.0001, "loss": 1.8184, "step": 550 }, { "epoch": 0.06329332031474355, "grad_norm": 0.33431920409202576, "learning_rate": 0.0001, "loss": 1.7591, "step": 551 }, { "epoch": 0.06340819022457067, "grad_norm": 0.3006725013256073, "learning_rate": 0.0001, "loss": 1.5713, "step": 552 }, { "epoch": 0.06352306013439779, "grad_norm": 0.3330213725566864, "learning_rate": 0.0001, "loss": 1.7226, "step": 553 }, { "epoch": 0.06363793004422491, "grad_norm": 0.34222641587257385, "learning_rate": 0.0001, "loss": 1.6587, "step": 554 }, { "epoch": 0.06375279995405203, "grad_norm": 0.34243908524513245, "learning_rate": 0.0001, "loss": 1.7752, "step": 555 }, { "epoch": 0.06386766986387915, "grad_norm": 0.3517223596572876, "learning_rate": 0.0001, "loss": 1.91, "step": 556 }, { "epoch": 0.06398253977370628, "grad_norm": 0.38396772742271423, "learning_rate": 0.0001, "loss": 2.1684, "step": 557 }, { "epoch": 0.0640974096835334, "grad_norm": 0.34429916739463806, "learning_rate": 0.0001, "loss": 1.9133, "step": 558 }, { "epoch": 0.06421227959336052, "grad_norm": 0.31630486249923706, "learning_rate": 0.0001, "loss": 1.574, "step": 559 }, { "epoch": 0.06432714950318764, "grad_norm": 0.3668649196624756, "learning_rate": 0.0001, "loss": 1.7739, "step": 560 }, { "epoch": 0.06444201941301476, "grad_norm": 0.33105143904685974, "learning_rate": 0.0001, "loss": 1.6974, "step": 561 }, { "epoch": 0.06455688932284188, "grad_norm": 0.3978722393512726, "learning_rate": 0.0001, "loss": 1.8365, "step": 562 }, { "epoch": 0.064671759232669, "grad_norm": 0.3352854549884796, "learning_rate": 0.0001, "loss": 1.7237, "step": 563 }, { "epoch": 0.06478662914249612, "grad_norm": 0.3484468460083008, "learning_rate": 0.0001, "loss": 1.8297, "step": 564 }, { "epoch": 0.06490149905232324, "grad_norm": 0.3346973955631256, "learning_rate": 0.0001, "loss": 1.7631, "step": 565 }, { "epoch": 0.06501636896215036, "grad_norm": 0.3363039195537567, "learning_rate": 0.0001, "loss": 1.6818, "step": 566 }, { "epoch": 0.06513123887197748, "grad_norm": 0.34610244631767273, "learning_rate": 0.0001, "loss": 1.8759, "step": 567 }, { "epoch": 0.0652461087818046, "grad_norm": 0.36252561211586, "learning_rate": 0.0001, "loss": 1.8793, "step": 568 }, { "epoch": 0.06536097869163172, "grad_norm": 0.3493739068508148, "learning_rate": 0.0001, "loss": 1.7335, "step": 569 }, { "epoch": 0.06547584860145884, "grad_norm": 0.3322302997112274, "learning_rate": 0.0001, "loss": 1.5927, "step": 570 }, { "epoch": 0.06559071851128596, "grad_norm": 0.31431615352630615, "learning_rate": 0.0001, "loss": 1.5679, "step": 571 }, { "epoch": 0.06570558842111308, "grad_norm": 0.3374696671962738, "learning_rate": 0.0001, "loss": 1.7067, "step": 572 }, { "epoch": 0.0658204583309402, "grad_norm": 0.343352735042572, "learning_rate": 0.0001, "loss": 1.8222, "step": 573 }, { "epoch": 0.06593532824076732, "grad_norm": 0.33851170539855957, "learning_rate": 0.0001, "loss": 1.6893, "step": 574 }, { "epoch": 0.06605019815059446, "grad_norm": 0.36369964480400085, "learning_rate": 0.0001, "loss": 1.8065, "step": 575 }, { "epoch": 0.06616506806042158, "grad_norm": 0.31349602341651917, "learning_rate": 0.0001, "loss": 1.5028, "step": 576 }, { "epoch": 0.0662799379702487, "grad_norm": 0.3367163836956024, "learning_rate": 0.0001, "loss": 1.8033, "step": 577 }, { "epoch": 0.06639480788007582, "grad_norm": 0.3456117510795593, "learning_rate": 0.0001, "loss": 1.711, "step": 578 }, { "epoch": 0.06650967778990294, "grad_norm": 0.31135809421539307, "learning_rate": 0.0001, "loss": 1.3939, "step": 579 }, { "epoch": 0.06662454769973006, "grad_norm": 0.327361136674881, "learning_rate": 0.0001, "loss": 1.6371, "step": 580 }, { "epoch": 0.06673941760955718, "grad_norm": 0.345680296421051, "learning_rate": 0.0001, "loss": 1.7006, "step": 581 }, { "epoch": 0.0668542875193843, "grad_norm": 0.33879801630973816, "learning_rate": 0.0001, "loss": 1.8236, "step": 582 }, { "epoch": 0.06696915742921142, "grad_norm": 0.3614217936992645, "learning_rate": 0.0001, "loss": 1.8137, "step": 583 }, { "epoch": 0.06708402733903854, "grad_norm": 0.34495973587036133, "learning_rate": 0.0001, "loss": 1.6859, "step": 584 }, { "epoch": 0.06719889724886566, "grad_norm": 0.31370049715042114, "learning_rate": 0.0001, "loss": 1.7482, "step": 585 }, { "epoch": 0.06731376715869278, "grad_norm": 0.3325652480125427, "learning_rate": 0.0001, "loss": 1.88, "step": 586 }, { "epoch": 0.0674286370685199, "grad_norm": 0.351700097322464, "learning_rate": 0.0001, "loss": 1.8817, "step": 587 }, { "epoch": 0.06754350697834702, "grad_norm": 0.3200022280216217, "learning_rate": 0.0001, "loss": 1.7419, "step": 588 }, { "epoch": 0.06765837688817414, "grad_norm": 0.36215293407440186, "learning_rate": 0.0001, "loss": 1.9603, "step": 589 }, { "epoch": 0.06777324679800126, "grad_norm": 0.3392893970012665, "learning_rate": 0.0001, "loss": 1.7047, "step": 590 }, { "epoch": 0.06788811670782838, "grad_norm": 0.3454267084598541, "learning_rate": 0.0001, "loss": 1.6759, "step": 591 }, { "epoch": 0.06800298661765551, "grad_norm": 0.35802924633026123, "learning_rate": 0.0001, "loss": 1.8607, "step": 592 }, { "epoch": 0.06811785652748263, "grad_norm": 0.2966287434101105, "learning_rate": 0.0001, "loss": 1.594, "step": 593 }, { "epoch": 0.06823272643730975, "grad_norm": 0.36141011118888855, "learning_rate": 0.0001, "loss": 1.7596, "step": 594 }, { "epoch": 0.06834759634713687, "grad_norm": 0.36919906735420227, "learning_rate": 0.0001, "loss": 1.9334, "step": 595 }, { "epoch": 0.06846246625696399, "grad_norm": 0.34811851382255554, "learning_rate": 0.0001, "loss": 1.92, "step": 596 }, { "epoch": 0.06857733616679111, "grad_norm": 0.3520393669605255, "learning_rate": 0.0001, "loss": 1.5446, "step": 597 }, { "epoch": 0.06869220607661823, "grad_norm": 0.3605727553367615, "learning_rate": 0.0001, "loss": 1.8132, "step": 598 }, { "epoch": 0.06880707598644535, "grad_norm": 0.3948690593242645, "learning_rate": 0.0001, "loss": 1.7842, "step": 599 }, { "epoch": 0.06892194589627247, "grad_norm": 0.34386035799980164, "learning_rate": 0.0001, "loss": 1.6827, "step": 600 }, { "epoch": 0.06903681580609959, "grad_norm": 0.37180081009864807, "learning_rate": 0.0001, "loss": 1.7982, "step": 601 }, { "epoch": 0.06915168571592671, "grad_norm": 0.3451867401599884, "learning_rate": 0.0001, "loss": 1.8339, "step": 602 }, { "epoch": 0.06926655562575383, "grad_norm": 0.3325120210647583, "learning_rate": 0.0001, "loss": 1.7506, "step": 603 }, { "epoch": 0.06938142553558095, "grad_norm": 0.3503422737121582, "learning_rate": 0.0001, "loss": 1.8154, "step": 604 }, { "epoch": 0.06949629544540807, "grad_norm": 0.36198514699935913, "learning_rate": 0.0001, "loss": 1.7698, "step": 605 }, { "epoch": 0.06961116535523519, "grad_norm": 0.35194844007492065, "learning_rate": 0.0001, "loss": 1.7223, "step": 606 }, { "epoch": 0.06972603526506231, "grad_norm": 0.3265458643436432, "learning_rate": 0.0001, "loss": 1.6648, "step": 607 }, { "epoch": 0.06984090517488943, "grad_norm": 0.3669833838939667, "learning_rate": 0.0001, "loss": 1.8176, "step": 608 }, { "epoch": 0.06995577508471656, "grad_norm": 0.35220587253570557, "learning_rate": 0.0001, "loss": 1.6173, "step": 609 }, { "epoch": 0.07007064499454368, "grad_norm": 0.3398061692714691, "learning_rate": 0.0001, "loss": 1.4962, "step": 610 }, { "epoch": 0.0701855149043708, "grad_norm": 0.36075279116630554, "learning_rate": 0.0001, "loss": 1.8924, "step": 611 }, { "epoch": 0.07030038481419792, "grad_norm": 0.34688082337379456, "learning_rate": 0.0001, "loss": 1.5272, "step": 612 }, { "epoch": 0.07041525472402504, "grad_norm": 0.33742544054985046, "learning_rate": 0.0001, "loss": 1.75, "step": 613 }, { "epoch": 0.07053012463385216, "grad_norm": 0.3314124345779419, "learning_rate": 0.0001, "loss": 1.7574, "step": 614 }, { "epoch": 0.07064499454367928, "grad_norm": 0.36540499329566956, "learning_rate": 0.0001, "loss": 1.926, "step": 615 }, { "epoch": 0.0707598644535064, "grad_norm": 0.3602568507194519, "learning_rate": 0.0001, "loss": 1.7833, "step": 616 }, { "epoch": 0.07087473436333352, "grad_norm": 0.3252723515033722, "learning_rate": 0.0001, "loss": 1.8458, "step": 617 }, { "epoch": 0.07098960427316064, "grad_norm": 0.3487424850463867, "learning_rate": 0.0001, "loss": 1.8981, "step": 618 }, { "epoch": 0.07110447418298776, "grad_norm": 0.33129236102104187, "learning_rate": 0.0001, "loss": 1.6613, "step": 619 }, { "epoch": 0.07121934409281488, "grad_norm": 0.33063703775405884, "learning_rate": 0.0001, "loss": 1.6859, "step": 620 }, { "epoch": 0.071334214002642, "grad_norm": 0.32881104946136475, "learning_rate": 0.0001, "loss": 1.7321, "step": 621 }, { "epoch": 0.07144908391246912, "grad_norm": 0.3607088029384613, "learning_rate": 0.0001, "loss": 1.698, "step": 622 }, { "epoch": 0.07156395382229624, "grad_norm": 0.3350905179977417, "learning_rate": 0.0001, "loss": 1.7254, "step": 623 }, { "epoch": 0.07167882373212336, "grad_norm": 0.35111361742019653, "learning_rate": 0.0001, "loss": 1.9324, "step": 624 }, { "epoch": 0.07179369364195048, "grad_norm": 0.3444902002811432, "learning_rate": 0.0001, "loss": 1.8281, "step": 625 }, { "epoch": 0.07190856355177762, "grad_norm": 0.34226348996162415, "learning_rate": 0.0001, "loss": 1.7824, "step": 626 }, { "epoch": 0.07202343346160474, "grad_norm": 0.33303242921829224, "learning_rate": 0.0001, "loss": 1.8413, "step": 627 }, { "epoch": 0.07213830337143186, "grad_norm": 0.3243139088153839, "learning_rate": 0.0001, "loss": 1.8731, "step": 628 }, { "epoch": 0.07225317328125898, "grad_norm": 0.3446861505508423, "learning_rate": 0.0001, "loss": 1.7179, "step": 629 }, { "epoch": 0.0723680431910861, "grad_norm": 0.337261438369751, "learning_rate": 0.0001, "loss": 1.6671, "step": 630 }, { "epoch": 0.07248291310091322, "grad_norm": 0.32150766253471375, "learning_rate": 0.0001, "loss": 1.8097, "step": 631 }, { "epoch": 0.07259778301074034, "grad_norm": 0.3300226032733917, "learning_rate": 0.0001, "loss": 1.6207, "step": 632 }, { "epoch": 0.07271265292056746, "grad_norm": 0.3479246199131012, "learning_rate": 0.0001, "loss": 1.7969, "step": 633 }, { "epoch": 0.07282752283039458, "grad_norm": 0.33039695024490356, "learning_rate": 0.0001, "loss": 1.4106, "step": 634 }, { "epoch": 0.0729423927402217, "grad_norm": 0.3294956088066101, "learning_rate": 0.0001, "loss": 1.7817, "step": 635 }, { "epoch": 0.07305726265004882, "grad_norm": 0.3452272415161133, "learning_rate": 0.0001, "loss": 1.7148, "step": 636 }, { "epoch": 0.07317213255987594, "grad_norm": 0.33393406867980957, "learning_rate": 0.0001, "loss": 1.8335, "step": 637 }, { "epoch": 0.07328700246970306, "grad_norm": 0.3169970214366913, "learning_rate": 0.0001, "loss": 1.6022, "step": 638 }, { "epoch": 0.07340187237953018, "grad_norm": 0.3217456340789795, "learning_rate": 0.0001, "loss": 1.579, "step": 639 }, { "epoch": 0.0735167422893573, "grad_norm": 0.34844133257865906, "learning_rate": 0.0001, "loss": 1.8499, "step": 640 }, { "epoch": 0.07363161219918442, "grad_norm": 0.33645370602607727, "learning_rate": 0.0001, "loss": 1.862, "step": 641 }, { "epoch": 0.07374648210901154, "grad_norm": 0.3201218843460083, "learning_rate": 0.0001, "loss": 1.7561, "step": 642 }, { "epoch": 0.07386135201883867, "grad_norm": 0.34113234281539917, "learning_rate": 0.0001, "loss": 1.8113, "step": 643 }, { "epoch": 0.07397622192866579, "grad_norm": 0.33081957697868347, "learning_rate": 0.0001, "loss": 1.7435, "step": 644 }, { "epoch": 0.07409109183849291, "grad_norm": 0.3413662314414978, "learning_rate": 0.0001, "loss": 1.8755, "step": 645 }, { "epoch": 0.07420596174832003, "grad_norm": 0.3311666250228882, "learning_rate": 0.0001, "loss": 1.769, "step": 646 }, { "epoch": 0.07432083165814715, "grad_norm": 0.33269715309143066, "learning_rate": 0.0001, "loss": 1.7321, "step": 647 }, { "epoch": 0.07443570156797427, "grad_norm": 0.34695979952812195, "learning_rate": 0.0001, "loss": 1.7628, "step": 648 }, { "epoch": 0.07455057147780139, "grad_norm": 0.3331931233406067, "learning_rate": 0.0001, "loss": 1.7498, "step": 649 }, { "epoch": 0.07466544138762851, "grad_norm": 0.3264698088169098, "learning_rate": 0.0001, "loss": 1.6114, "step": 650 }, { "epoch": 0.07478031129745563, "grad_norm": 0.3626859486103058, "learning_rate": 0.0001, "loss": 1.8154, "step": 651 }, { "epoch": 0.07489518120728275, "grad_norm": 0.32509714365005493, "learning_rate": 0.0001, "loss": 1.4671, "step": 652 }, { "epoch": 0.07501005111710987, "grad_norm": 0.3228186070919037, "learning_rate": 0.0001, "loss": 1.8735, "step": 653 }, { "epoch": 0.07512492102693699, "grad_norm": 0.3465333878993988, "learning_rate": 0.0001, "loss": 1.872, "step": 654 }, { "epoch": 0.07523979093676411, "grad_norm": 0.3378332555294037, "learning_rate": 0.0001, "loss": 1.6398, "step": 655 }, { "epoch": 0.07535466084659123, "grad_norm": 0.3364262282848358, "learning_rate": 0.0001, "loss": 1.8611, "step": 656 }, { "epoch": 0.07546953075641835, "grad_norm": 0.34226563572883606, "learning_rate": 0.0001, "loss": 1.7843, "step": 657 }, { "epoch": 0.07558440066624547, "grad_norm": 0.3533295691013336, "learning_rate": 0.0001, "loss": 1.7962, "step": 658 }, { "epoch": 0.07569927057607259, "grad_norm": 0.3422401249408722, "learning_rate": 0.0001, "loss": 1.8801, "step": 659 }, { "epoch": 0.07581414048589973, "grad_norm": 0.3540160357952118, "learning_rate": 0.0001, "loss": 1.8129, "step": 660 }, { "epoch": 0.07592901039572685, "grad_norm": 0.334587424993515, "learning_rate": 0.0001, "loss": 1.8578, "step": 661 }, { "epoch": 0.07604388030555397, "grad_norm": 0.32655155658721924, "learning_rate": 0.0001, "loss": 1.6494, "step": 662 }, { "epoch": 0.07615875021538109, "grad_norm": 0.36004751920700073, "learning_rate": 0.0001, "loss": 1.7735, "step": 663 }, { "epoch": 0.0762736201252082, "grad_norm": 0.32442474365234375, "learning_rate": 0.0001, "loss": 1.5389, "step": 664 }, { "epoch": 0.07638849003503533, "grad_norm": 0.344626784324646, "learning_rate": 0.0001, "loss": 1.6952, "step": 665 }, { "epoch": 0.07650335994486245, "grad_norm": 0.31557947397232056, "learning_rate": 0.0001, "loss": 1.5595, "step": 666 }, { "epoch": 0.07661822985468957, "grad_norm": 0.3274221420288086, "learning_rate": 0.0001, "loss": 1.6471, "step": 667 }, { "epoch": 0.07673309976451669, "grad_norm": 0.3906736969947815, "learning_rate": 0.0001, "loss": 1.6702, "step": 668 }, { "epoch": 0.0768479696743438, "grad_norm": 0.35045820474624634, "learning_rate": 0.0001, "loss": 1.8738, "step": 669 }, { "epoch": 0.07696283958417093, "grad_norm": 0.3959348201751709, "learning_rate": 0.0001, "loss": 2.094, "step": 670 }, { "epoch": 0.07707770949399805, "grad_norm": 0.3369539976119995, "learning_rate": 0.0001, "loss": 1.6899, "step": 671 }, { "epoch": 0.07719257940382517, "grad_norm": 0.34965980052948, "learning_rate": 0.0001, "loss": 1.8694, "step": 672 }, { "epoch": 0.07730744931365229, "grad_norm": 0.33249253034591675, "learning_rate": 0.0001, "loss": 1.5991, "step": 673 }, { "epoch": 0.0774223192234794, "grad_norm": 0.32257145643234253, "learning_rate": 0.0001, "loss": 1.7565, "step": 674 }, { "epoch": 0.07753718913330652, "grad_norm": 0.33610349893569946, "learning_rate": 0.0001, "loss": 1.8517, "step": 675 }, { "epoch": 0.07765205904313364, "grad_norm": 0.3666530251502991, "learning_rate": 0.0001, "loss": 1.6426, "step": 676 }, { "epoch": 0.07776692895296078, "grad_norm": 0.3422529697418213, "learning_rate": 0.0001, "loss": 1.6884, "step": 677 }, { "epoch": 0.0778817988627879, "grad_norm": 0.3361228108406067, "learning_rate": 0.0001, "loss": 1.7452, "step": 678 }, { "epoch": 0.07799666877261502, "grad_norm": 0.3532163202762604, "learning_rate": 0.0001, "loss": 1.6878, "step": 679 }, { "epoch": 0.07811153868244214, "grad_norm": 0.3304887115955353, "learning_rate": 0.0001, "loss": 1.6648, "step": 680 }, { "epoch": 0.07822640859226926, "grad_norm": 0.3171667456626892, "learning_rate": 0.0001, "loss": 1.5896, "step": 681 }, { "epoch": 0.07834127850209638, "grad_norm": 0.36117199063301086, "learning_rate": 0.0001, "loss": 1.7566, "step": 682 }, { "epoch": 0.0784561484119235, "grad_norm": 0.37346988916397095, "learning_rate": 0.0001, "loss": 1.8458, "step": 683 }, { "epoch": 0.07857101832175062, "grad_norm": 0.34928634762763977, "learning_rate": 0.0001, "loss": 1.661, "step": 684 }, { "epoch": 0.07868588823157774, "grad_norm": 0.34768396615982056, "learning_rate": 0.0001, "loss": 1.8074, "step": 685 }, { "epoch": 0.07880075814140486, "grad_norm": 0.3412458300590515, "learning_rate": 0.0001, "loss": 1.5803, "step": 686 }, { "epoch": 0.07891562805123198, "grad_norm": 0.3672271966934204, "learning_rate": 0.0001, "loss": 1.8826, "step": 687 }, { "epoch": 0.0790304979610591, "grad_norm": 0.33574315905570984, "learning_rate": 0.0001, "loss": 1.6836, "step": 688 }, { "epoch": 0.07914536787088622, "grad_norm": 0.3387349247932434, "learning_rate": 0.0001, "loss": 1.6531, "step": 689 }, { "epoch": 0.07926023778071334, "grad_norm": 0.3550173342227936, "learning_rate": 0.0001, "loss": 1.7975, "step": 690 }, { "epoch": 0.07937510769054046, "grad_norm": 0.380522221326828, "learning_rate": 0.0001, "loss": 1.9264, "step": 691 }, { "epoch": 0.07948997760036758, "grad_norm": 0.374406099319458, "learning_rate": 0.0001, "loss": 1.8652, "step": 692 }, { "epoch": 0.0796048475101947, "grad_norm": 0.34742456674575806, "learning_rate": 0.0001, "loss": 1.9069, "step": 693 }, { "epoch": 0.07971971742002182, "grad_norm": 0.35284706950187683, "learning_rate": 0.0001, "loss": 1.8065, "step": 694 }, { "epoch": 0.07983458732984895, "grad_norm": 0.3327619135379791, "learning_rate": 0.0001, "loss": 1.7721, "step": 695 }, { "epoch": 0.07994945723967607, "grad_norm": 0.3591189384460449, "learning_rate": 0.0001, "loss": 1.6802, "step": 696 }, { "epoch": 0.08006432714950319, "grad_norm": 0.34091916680336, "learning_rate": 0.0001, "loss": 1.6535, "step": 697 }, { "epoch": 0.08017919705933031, "grad_norm": 0.352022260427475, "learning_rate": 0.0001, "loss": 1.7479, "step": 698 }, { "epoch": 0.08029406696915743, "grad_norm": 0.33692196011543274, "learning_rate": 0.0001, "loss": 1.6643, "step": 699 }, { "epoch": 0.08040893687898455, "grad_norm": 0.370638370513916, "learning_rate": 0.0001, "loss": 2.0285, "step": 700 }, { "epoch": 0.08052380678881167, "grad_norm": 0.3345963656902313, "learning_rate": 0.0001, "loss": 1.7766, "step": 701 }, { "epoch": 0.08063867669863879, "grad_norm": 0.34501156210899353, "learning_rate": 0.0001, "loss": 1.8179, "step": 702 }, { "epoch": 0.08075354660846591, "grad_norm": 0.33231601119041443, "learning_rate": 0.0001, "loss": 1.6653, "step": 703 }, { "epoch": 0.08086841651829303, "grad_norm": 0.34279513359069824, "learning_rate": 0.0001, "loss": 1.695, "step": 704 }, { "epoch": 0.08098328642812015, "grad_norm": 0.3368370234966278, "learning_rate": 0.0001, "loss": 1.7584, "step": 705 }, { "epoch": 0.08109815633794727, "grad_norm": 0.34584423899650574, "learning_rate": 0.0001, "loss": 1.6271, "step": 706 }, { "epoch": 0.08121302624777439, "grad_norm": 0.35114485025405884, "learning_rate": 0.0001, "loss": 1.7287, "step": 707 }, { "epoch": 0.08132789615760151, "grad_norm": 0.31173431873321533, "learning_rate": 0.0001, "loss": 1.6346, "step": 708 }, { "epoch": 0.08144276606742863, "grad_norm": 0.3620467782020569, "learning_rate": 0.0001, "loss": 1.8973, "step": 709 }, { "epoch": 0.08155763597725575, "grad_norm": 0.3193514049053192, "learning_rate": 0.0001, "loss": 1.6496, "step": 710 }, { "epoch": 0.08167250588708287, "grad_norm": 0.34377214312553406, "learning_rate": 0.0001, "loss": 1.8458, "step": 711 }, { "epoch": 0.08178737579691, "grad_norm": 0.3406418263912201, "learning_rate": 0.0001, "loss": 1.6495, "step": 712 }, { "epoch": 0.08190224570673713, "grad_norm": 0.33058011531829834, "learning_rate": 0.0001, "loss": 1.7232, "step": 713 }, { "epoch": 0.08201711561656425, "grad_norm": 0.32958585023880005, "learning_rate": 0.0001, "loss": 1.588, "step": 714 }, { "epoch": 0.08213198552639137, "grad_norm": 0.3345566689968109, "learning_rate": 0.0001, "loss": 1.7372, "step": 715 }, { "epoch": 0.08224685543621849, "grad_norm": 0.3363969027996063, "learning_rate": 0.0001, "loss": 1.5248, "step": 716 }, { "epoch": 0.0823617253460456, "grad_norm": 0.3310002386569977, "learning_rate": 0.0001, "loss": 1.4364, "step": 717 }, { "epoch": 0.08247659525587273, "grad_norm": 0.35177963972091675, "learning_rate": 0.0001, "loss": 1.8578, "step": 718 }, { "epoch": 0.08259146516569985, "grad_norm": 0.37990766763687134, "learning_rate": 0.0001, "loss": 1.8266, "step": 719 }, { "epoch": 0.08270633507552697, "grad_norm": 0.3506127893924713, "learning_rate": 0.0001, "loss": 1.7454, "step": 720 }, { "epoch": 0.08282120498535409, "grad_norm": 0.3488253653049469, "learning_rate": 0.0001, "loss": 1.8147, "step": 721 }, { "epoch": 0.0829360748951812, "grad_norm": 0.34471482038497925, "learning_rate": 0.0001, "loss": 1.7288, "step": 722 }, { "epoch": 0.08305094480500833, "grad_norm": 0.33807575702667236, "learning_rate": 0.0001, "loss": 1.8128, "step": 723 }, { "epoch": 0.08316581471483545, "grad_norm": 0.3196840286254883, "learning_rate": 0.0001, "loss": 1.6008, "step": 724 }, { "epoch": 0.08328068462466257, "grad_norm": 0.36051392555236816, "learning_rate": 0.0001, "loss": 1.7202, "step": 725 }, { "epoch": 0.08339555453448969, "grad_norm": 0.3275487720966339, "learning_rate": 0.0001, "loss": 1.6035, "step": 726 }, { "epoch": 0.0835104244443168, "grad_norm": 0.3423649072647095, "learning_rate": 0.0001, "loss": 1.7988, "step": 727 }, { "epoch": 0.08362529435414393, "grad_norm": 0.3507118225097656, "learning_rate": 0.0001, "loss": 1.5723, "step": 728 }, { "epoch": 0.08374016426397106, "grad_norm": 0.3463688790798187, "learning_rate": 0.0001, "loss": 1.8294, "step": 729 }, { "epoch": 0.08385503417379818, "grad_norm": 0.3360377848148346, "learning_rate": 0.0001, "loss": 1.7275, "step": 730 }, { "epoch": 0.0839699040836253, "grad_norm": 0.3324314057826996, "learning_rate": 0.0001, "loss": 1.7487, "step": 731 }, { "epoch": 0.08408477399345242, "grad_norm": 0.3390614688396454, "learning_rate": 0.0001, "loss": 1.7351, "step": 732 }, { "epoch": 0.08419964390327954, "grad_norm": 0.35863199830055237, "learning_rate": 0.0001, "loss": 1.908, "step": 733 }, { "epoch": 0.08431451381310666, "grad_norm": 0.3417515158653259, "learning_rate": 0.0001, "loss": 1.8832, "step": 734 }, { "epoch": 0.08442938372293378, "grad_norm": 0.31135502457618713, "learning_rate": 0.0001, "loss": 1.432, "step": 735 }, { "epoch": 0.0845442536327609, "grad_norm": 0.33590710163116455, "learning_rate": 0.0001, "loss": 1.7915, "step": 736 }, { "epoch": 0.08465912354258802, "grad_norm": 0.33955830335617065, "learning_rate": 0.0001, "loss": 1.7333, "step": 737 }, { "epoch": 0.08477399345241514, "grad_norm": 0.3825172483921051, "learning_rate": 0.0001, "loss": 1.7999, "step": 738 }, { "epoch": 0.08488886336224226, "grad_norm": 0.33067587018013, "learning_rate": 0.0001, "loss": 1.7567, "step": 739 }, { "epoch": 0.08500373327206938, "grad_norm": 0.344756156206131, "learning_rate": 0.0001, "loss": 1.7844, "step": 740 }, { "epoch": 0.0851186031818965, "grad_norm": 0.3478997051715851, "learning_rate": 0.0001, "loss": 1.8903, "step": 741 }, { "epoch": 0.08523347309172362, "grad_norm": 0.3651295006275177, "learning_rate": 0.0001, "loss": 1.8882, "step": 742 }, { "epoch": 0.08534834300155074, "grad_norm": 0.30588778853416443, "learning_rate": 0.0001, "loss": 1.6636, "step": 743 }, { "epoch": 0.08546321291137786, "grad_norm": 0.3360552489757538, "learning_rate": 0.0001, "loss": 1.865, "step": 744 }, { "epoch": 0.08557808282120498, "grad_norm": 0.3278788626194, "learning_rate": 0.0001, "loss": 1.5782, "step": 745 }, { "epoch": 0.08569295273103211, "grad_norm": 0.3437139391899109, "learning_rate": 0.0001, "loss": 1.8981, "step": 746 }, { "epoch": 0.08580782264085923, "grad_norm": 0.34554487466812134, "learning_rate": 0.0001, "loss": 1.7921, "step": 747 }, { "epoch": 0.08592269255068635, "grad_norm": 0.35071298480033875, "learning_rate": 0.0001, "loss": 1.8354, "step": 748 }, { "epoch": 0.08603756246051347, "grad_norm": 0.36390817165374756, "learning_rate": 0.0001, "loss": 1.6217, "step": 749 }, { "epoch": 0.08615243237034059, "grad_norm": 0.35594916343688965, "learning_rate": 0.0001, "loss": 1.4535, "step": 750 }, { "epoch": 0.08626730228016771, "grad_norm": 0.3606272339820862, "learning_rate": 0.0001, "loss": 1.8661, "step": 751 }, { "epoch": 0.08638217218999483, "grad_norm": 0.3557438552379608, "learning_rate": 0.0001, "loss": 1.7762, "step": 752 }, { "epoch": 0.08649704209982195, "grad_norm": 0.37106749415397644, "learning_rate": 0.0001, "loss": 1.913, "step": 753 }, { "epoch": 0.08661191200964907, "grad_norm": 0.34176573157310486, "learning_rate": 0.0001, "loss": 1.7081, "step": 754 }, { "epoch": 0.08672678191947619, "grad_norm": 0.32890862226486206, "learning_rate": 0.0001, "loss": 1.7647, "step": 755 }, { "epoch": 0.08684165182930331, "grad_norm": 0.3404117822647095, "learning_rate": 0.0001, "loss": 1.7522, "step": 756 }, { "epoch": 0.08695652173913043, "grad_norm": 0.3754931092262268, "learning_rate": 0.0001, "loss": 2.0646, "step": 757 }, { "epoch": 0.08707139164895755, "grad_norm": 0.32435300946235657, "learning_rate": 0.0001, "loss": 1.5691, "step": 758 }, { "epoch": 0.08718626155878467, "grad_norm": 0.3234858810901642, "learning_rate": 0.0001, "loss": 1.6367, "step": 759 }, { "epoch": 0.08730113146861179, "grad_norm": 0.34800320863723755, "learning_rate": 0.0001, "loss": 1.7462, "step": 760 }, { "epoch": 0.08741600137843891, "grad_norm": 0.3534897565841675, "learning_rate": 0.0001, "loss": 1.7654, "step": 761 }, { "epoch": 0.08753087128826603, "grad_norm": 0.3358789384365082, "learning_rate": 0.0001, "loss": 1.8423, "step": 762 }, { "epoch": 0.08764574119809317, "grad_norm": 0.35216909646987915, "learning_rate": 0.0001, "loss": 1.9625, "step": 763 }, { "epoch": 0.08776061110792029, "grad_norm": 0.32955506443977356, "learning_rate": 0.0001, "loss": 1.7347, "step": 764 }, { "epoch": 0.0878754810177474, "grad_norm": 0.3332022428512573, "learning_rate": 0.0001, "loss": 1.7732, "step": 765 }, { "epoch": 0.08799035092757453, "grad_norm": 0.3248218894004822, "learning_rate": 0.0001, "loss": 1.6407, "step": 766 }, { "epoch": 0.08810522083740165, "grad_norm": 0.3316574692726135, "learning_rate": 0.0001, "loss": 1.8211, "step": 767 }, { "epoch": 0.08822009074722877, "grad_norm": 0.33877885341644287, "learning_rate": 0.0001, "loss": 1.7671, "step": 768 }, { "epoch": 0.08833496065705589, "grad_norm": 0.34064409136772156, "learning_rate": 0.0001, "loss": 1.8061, "step": 769 }, { "epoch": 0.088449830566883, "grad_norm": 0.3360152542591095, "learning_rate": 0.0001, "loss": 1.762, "step": 770 }, { "epoch": 0.08856470047671013, "grad_norm": 0.3164994716644287, "learning_rate": 0.0001, "loss": 1.7382, "step": 771 }, { "epoch": 0.08867957038653725, "grad_norm": 0.3785625994205475, "learning_rate": 0.0001, "loss": 1.9568, "step": 772 }, { "epoch": 0.08879444029636437, "grad_norm": 0.3452891409397125, "learning_rate": 0.0001, "loss": 1.7799, "step": 773 }, { "epoch": 0.08890931020619149, "grad_norm": 0.3608962595462799, "learning_rate": 0.0001, "loss": 1.7845, "step": 774 }, { "epoch": 0.0890241801160186, "grad_norm": 0.3125813901424408, "learning_rate": 0.0001, "loss": 1.7237, "step": 775 }, { "epoch": 0.08913905002584573, "grad_norm": 0.34034648537635803, "learning_rate": 0.0001, "loss": 1.7136, "step": 776 }, { "epoch": 0.08925391993567285, "grad_norm": 0.31160974502563477, "learning_rate": 0.0001, "loss": 1.6431, "step": 777 }, { "epoch": 0.08936878984549997, "grad_norm": 0.3525000512599945, "learning_rate": 0.0001, "loss": 1.8185, "step": 778 }, { "epoch": 0.08948365975532709, "grad_norm": 0.3665052056312561, "learning_rate": 0.0001, "loss": 1.5812, "step": 779 }, { "epoch": 0.08959852966515422, "grad_norm": 0.37317225337028503, "learning_rate": 0.0001, "loss": 1.7051, "step": 780 }, { "epoch": 0.08971339957498134, "grad_norm": 0.3269886374473572, "learning_rate": 0.0001, "loss": 1.6282, "step": 781 }, { "epoch": 0.08982826948480846, "grad_norm": 0.34751588106155396, "learning_rate": 0.0001, "loss": 1.7788, "step": 782 }, { "epoch": 0.08994313939463558, "grad_norm": 0.35242506861686707, "learning_rate": 0.0001, "loss": 1.8759, "step": 783 }, { "epoch": 0.0900580093044627, "grad_norm": 0.3438495695590973, "learning_rate": 0.0001, "loss": 1.6846, "step": 784 }, { "epoch": 0.09017287921428982, "grad_norm": 0.32642510533332825, "learning_rate": 0.0001, "loss": 1.6951, "step": 785 }, { "epoch": 0.09028774912411694, "grad_norm": 0.32845741510391235, "learning_rate": 0.0001, "loss": 1.7464, "step": 786 }, { "epoch": 0.09040261903394406, "grad_norm": 0.36917203664779663, "learning_rate": 0.0001, "loss": 1.7882, "step": 787 }, { "epoch": 0.09051748894377118, "grad_norm": 0.3328888416290283, "learning_rate": 0.0001, "loss": 1.6509, "step": 788 }, { "epoch": 0.0906323588535983, "grad_norm": 0.3314300775527954, "learning_rate": 0.0001, "loss": 1.7402, "step": 789 }, { "epoch": 0.09074722876342542, "grad_norm": 0.34112605452537537, "learning_rate": 0.0001, "loss": 1.8362, "step": 790 }, { "epoch": 0.09086209867325254, "grad_norm": 0.3412010669708252, "learning_rate": 0.0001, "loss": 1.9103, "step": 791 }, { "epoch": 0.09097696858307966, "grad_norm": 0.34920912981033325, "learning_rate": 0.0001, "loss": 1.756, "step": 792 }, { "epoch": 0.09109183849290678, "grad_norm": 0.36219510436058044, "learning_rate": 0.0001, "loss": 1.8899, "step": 793 }, { "epoch": 0.0912067084027339, "grad_norm": 0.34478431940078735, "learning_rate": 0.0001, "loss": 1.8184, "step": 794 }, { "epoch": 0.09132157831256102, "grad_norm": 0.32101911306381226, "learning_rate": 0.0001, "loss": 1.8286, "step": 795 }, { "epoch": 0.09143644822238814, "grad_norm": 0.3403749465942383, "learning_rate": 0.0001, "loss": 1.7323, "step": 796 }, { "epoch": 0.09155131813221527, "grad_norm": 0.3531983196735382, "learning_rate": 0.0001, "loss": 1.9426, "step": 797 }, { "epoch": 0.09166618804204239, "grad_norm": 0.33084288239479065, "learning_rate": 0.0001, "loss": 1.662, "step": 798 }, { "epoch": 0.09178105795186951, "grad_norm": 0.3422669768333435, "learning_rate": 0.0001, "loss": 1.8586, "step": 799 }, { "epoch": 0.09189592786169663, "grad_norm": 0.333498477935791, "learning_rate": 0.0001, "loss": 1.6855, "step": 800 }, { "epoch": 0.09201079777152375, "grad_norm": 0.3705277740955353, "learning_rate": 0.0001, "loss": 1.7961, "step": 801 }, { "epoch": 0.09212566768135087, "grad_norm": 0.3193943500518799, "learning_rate": 0.0001, "loss": 1.6723, "step": 802 }, { "epoch": 0.09224053759117799, "grad_norm": 0.3290367126464844, "learning_rate": 0.0001, "loss": 1.6078, "step": 803 }, { "epoch": 0.09235540750100511, "grad_norm": 0.3147204518318176, "learning_rate": 0.0001, "loss": 1.6357, "step": 804 }, { "epoch": 0.09247027741083223, "grad_norm": 0.33811822533607483, "learning_rate": 0.0001, "loss": 1.6841, "step": 805 }, { "epoch": 0.09258514732065935, "grad_norm": 0.34665969014167786, "learning_rate": 0.0001, "loss": 1.8406, "step": 806 }, { "epoch": 0.09270001723048647, "grad_norm": 0.367832213640213, "learning_rate": 0.0001, "loss": 1.8237, "step": 807 }, { "epoch": 0.09281488714031359, "grad_norm": 0.3354632258415222, "learning_rate": 0.0001, "loss": 1.7281, "step": 808 }, { "epoch": 0.09292975705014071, "grad_norm": 0.3267354965209961, "learning_rate": 0.0001, "loss": 1.6339, "step": 809 }, { "epoch": 0.09304462695996783, "grad_norm": 0.3412437438964844, "learning_rate": 0.0001, "loss": 1.8719, "step": 810 }, { "epoch": 0.09315949686979495, "grad_norm": 0.3305417597293854, "learning_rate": 0.0001, "loss": 1.8327, "step": 811 }, { "epoch": 0.09327436677962207, "grad_norm": 0.3420979678630829, "learning_rate": 0.0001, "loss": 1.7677, "step": 812 }, { "epoch": 0.09338923668944919, "grad_norm": 0.3424234092235565, "learning_rate": 0.0001, "loss": 1.8198, "step": 813 }, { "epoch": 0.09350410659927633, "grad_norm": 0.3489876985549927, "learning_rate": 0.0001, "loss": 1.8778, "step": 814 }, { "epoch": 0.09361897650910345, "grad_norm": 0.3356654942035675, "learning_rate": 0.0001, "loss": 1.6532, "step": 815 }, { "epoch": 0.09373384641893057, "grad_norm": 0.33615395426750183, "learning_rate": 0.0001, "loss": 1.6045, "step": 816 }, { "epoch": 0.09384871632875769, "grad_norm": 0.3469023108482361, "learning_rate": 0.0001, "loss": 1.7758, "step": 817 }, { "epoch": 0.0939635862385848, "grad_norm": 0.3822448253631592, "learning_rate": 0.0001, "loss": 1.8071, "step": 818 }, { "epoch": 0.09407845614841193, "grad_norm": 0.32721102237701416, "learning_rate": 0.0001, "loss": 1.7461, "step": 819 }, { "epoch": 0.09419332605823905, "grad_norm": 0.3481324017047882, "learning_rate": 0.0001, "loss": 1.5403, "step": 820 }, { "epoch": 0.09430819596806617, "grad_norm": 0.34965309500694275, "learning_rate": 0.0001, "loss": 1.6718, "step": 821 }, { "epoch": 0.09442306587789329, "grad_norm": 0.3382103443145752, "learning_rate": 0.0001, "loss": 1.6406, "step": 822 }, { "epoch": 0.0945379357877204, "grad_norm": 0.33241525292396545, "learning_rate": 0.0001, "loss": 1.6408, "step": 823 }, { "epoch": 0.09465280569754753, "grad_norm": 0.34981343150138855, "learning_rate": 0.0001, "loss": 1.9116, "step": 824 }, { "epoch": 0.09476767560737465, "grad_norm": 0.3368913531303406, "learning_rate": 0.0001, "loss": 1.8079, "step": 825 }, { "epoch": 0.09488254551720177, "grad_norm": 0.3350062370300293, "learning_rate": 0.0001, "loss": 1.7194, "step": 826 }, { "epoch": 0.09499741542702889, "grad_norm": 0.365464448928833, "learning_rate": 0.0001, "loss": 1.9002, "step": 827 }, { "epoch": 0.095112285336856, "grad_norm": 0.3544370234012604, "learning_rate": 0.0001, "loss": 1.9647, "step": 828 }, { "epoch": 0.09522715524668313, "grad_norm": 0.3434012532234192, "learning_rate": 0.0001, "loss": 1.8148, "step": 829 }, { "epoch": 0.09534202515651025, "grad_norm": 0.33346980810165405, "learning_rate": 0.0001, "loss": 1.5846, "step": 830 }, { "epoch": 0.09545689506633737, "grad_norm": 0.3305695652961731, "learning_rate": 0.0001, "loss": 1.7361, "step": 831 }, { "epoch": 0.0955717649761645, "grad_norm": 0.32634860277175903, "learning_rate": 0.0001, "loss": 1.5721, "step": 832 }, { "epoch": 0.09568663488599162, "grad_norm": 0.3320368528366089, "learning_rate": 0.0001, "loss": 1.6206, "step": 833 }, { "epoch": 0.09580150479581874, "grad_norm": 0.33831140398979187, "learning_rate": 0.0001, "loss": 1.9525, "step": 834 }, { "epoch": 0.09591637470564586, "grad_norm": 0.37599530816078186, "learning_rate": 0.0001, "loss": 1.8062, "step": 835 }, { "epoch": 0.09603124461547298, "grad_norm": 0.3151451647281647, "learning_rate": 0.0001, "loss": 1.5515, "step": 836 }, { "epoch": 0.0961461145253001, "grad_norm": 0.3700610101222992, "learning_rate": 0.0001, "loss": 1.9294, "step": 837 }, { "epoch": 0.09626098443512722, "grad_norm": 0.3530856668949127, "learning_rate": 0.0001, "loss": 1.9053, "step": 838 }, { "epoch": 0.09637585434495434, "grad_norm": 0.3287757933139801, "learning_rate": 0.0001, "loss": 1.7148, "step": 839 }, { "epoch": 0.09649072425478146, "grad_norm": 0.33994293212890625, "learning_rate": 0.0001, "loss": 1.6897, "step": 840 }, { "epoch": 0.09660559416460858, "grad_norm": 0.3461272418498993, "learning_rate": 0.0001, "loss": 1.6242, "step": 841 }, { "epoch": 0.0967204640744357, "grad_norm": 0.3444170653820038, "learning_rate": 0.0001, "loss": 1.6429, "step": 842 }, { "epoch": 0.09683533398426282, "grad_norm": 0.33912792801856995, "learning_rate": 0.0001, "loss": 1.7618, "step": 843 }, { "epoch": 0.09695020389408994, "grad_norm": 0.36995047330856323, "learning_rate": 0.0001, "loss": 1.7405, "step": 844 }, { "epoch": 0.09706507380391706, "grad_norm": 0.318174809217453, "learning_rate": 0.0001, "loss": 1.5174, "step": 845 }, { "epoch": 0.09717994371374418, "grad_norm": 0.340555876493454, "learning_rate": 0.0001, "loss": 1.7104, "step": 846 }, { "epoch": 0.0972948136235713, "grad_norm": 0.3448858857154846, "learning_rate": 0.0001, "loss": 1.6208, "step": 847 }, { "epoch": 0.09740968353339842, "grad_norm": 0.34647923707962036, "learning_rate": 0.0001, "loss": 1.8009, "step": 848 }, { "epoch": 0.09752455344322555, "grad_norm": 0.3762481212615967, "learning_rate": 0.0001, "loss": 1.6759, "step": 849 }, { "epoch": 0.09763942335305267, "grad_norm": 0.3799351751804352, "learning_rate": 0.0001, "loss": 1.9706, "step": 850 }, { "epoch": 0.0977542932628798, "grad_norm": 0.33727753162384033, "learning_rate": 0.0001, "loss": 1.6215, "step": 851 }, { "epoch": 0.09786916317270691, "grad_norm": 0.3406698703765869, "learning_rate": 0.0001, "loss": 1.7837, "step": 852 }, { "epoch": 0.09798403308253403, "grad_norm": 0.3939476013183594, "learning_rate": 0.0001, "loss": 1.6971, "step": 853 }, { "epoch": 0.09809890299236115, "grad_norm": 0.3444227874279022, "learning_rate": 0.0001, "loss": 1.5426, "step": 854 }, { "epoch": 0.09821377290218827, "grad_norm": 0.38864031434059143, "learning_rate": 0.0001, "loss": 1.7997, "step": 855 }, { "epoch": 0.0983286428120154, "grad_norm": 0.36266061663627625, "learning_rate": 0.0001, "loss": 1.6731, "step": 856 }, { "epoch": 0.09844351272184251, "grad_norm": 0.3754750192165375, "learning_rate": 0.0001, "loss": 1.922, "step": 857 }, { "epoch": 0.09855838263166963, "grad_norm": 0.3244558274745941, "learning_rate": 0.0001, "loss": 1.6094, "step": 858 }, { "epoch": 0.09867325254149675, "grad_norm": 0.3381154537200928, "learning_rate": 0.0001, "loss": 1.5764, "step": 859 }, { "epoch": 0.09878812245132387, "grad_norm": 0.37016791105270386, "learning_rate": 0.0001, "loss": 1.7252, "step": 860 }, { "epoch": 0.098902992361151, "grad_norm": 0.3377515971660614, "learning_rate": 0.0001, "loss": 1.7457, "step": 861 }, { "epoch": 0.09901786227097811, "grad_norm": 0.3582906126976013, "learning_rate": 0.0001, "loss": 1.7201, "step": 862 }, { "epoch": 0.09913273218080523, "grad_norm": 0.39218735694885254, "learning_rate": 0.0001, "loss": 2.1357, "step": 863 }, { "epoch": 0.09924760209063235, "grad_norm": 0.3586115837097168, "learning_rate": 0.0001, "loss": 1.7406, "step": 864 }, { "epoch": 0.09936247200045947, "grad_norm": 0.3303786516189575, "learning_rate": 0.0001, "loss": 1.6618, "step": 865 }, { "epoch": 0.0994773419102866, "grad_norm": 0.34086883068084717, "learning_rate": 0.0001, "loss": 1.6499, "step": 866 }, { "epoch": 0.09959221182011373, "grad_norm": 0.3244491219520569, "learning_rate": 0.0001, "loss": 1.621, "step": 867 }, { "epoch": 0.09970708172994085, "grad_norm": 0.3519229292869568, "learning_rate": 0.0001, "loss": 1.7324, "step": 868 }, { "epoch": 0.09982195163976797, "grad_norm": 0.3447989523410797, "learning_rate": 0.0001, "loss": 1.7337, "step": 869 }, { "epoch": 0.09993682154959509, "grad_norm": 0.3334493935108185, "learning_rate": 0.0001, "loss": 1.685, "step": 870 }, { "epoch": 0.1000516914594222, "grad_norm": 0.3429296612739563, "learning_rate": 0.0001, "loss": 1.7162, "step": 871 }, { "epoch": 0.10016656136924933, "grad_norm": 0.3839278519153595, "learning_rate": 0.0001, "loss": 1.7534, "step": 872 }, { "epoch": 0.10028143127907645, "grad_norm": 0.33588939905166626, "learning_rate": 0.0001, "loss": 1.6247, "step": 873 }, { "epoch": 0.10039630118890357, "grad_norm": 0.34705570340156555, "learning_rate": 0.0001, "loss": 1.841, "step": 874 }, { "epoch": 0.10051117109873069, "grad_norm": 0.36733031272888184, "learning_rate": 0.0001, "loss": 1.852, "step": 875 }, { "epoch": 0.1006260410085578, "grad_norm": 0.3770068883895874, "learning_rate": 0.0001, "loss": 1.6318, "step": 876 }, { "epoch": 0.10074091091838493, "grad_norm": 0.3212607800960541, "learning_rate": 0.0001, "loss": 1.7223, "step": 877 }, { "epoch": 0.10085578082821205, "grad_norm": 0.36928656697273254, "learning_rate": 0.0001, "loss": 1.7392, "step": 878 }, { "epoch": 0.10097065073803917, "grad_norm": 0.34793323278427124, "learning_rate": 0.0001, "loss": 1.7446, "step": 879 }, { "epoch": 0.10108552064786629, "grad_norm": 0.3587128221988678, "learning_rate": 0.0001, "loss": 1.8248, "step": 880 }, { "epoch": 0.1012003905576934, "grad_norm": 0.37394312024116516, "learning_rate": 0.0001, "loss": 1.8343, "step": 881 }, { "epoch": 0.10131526046752053, "grad_norm": 0.3411267101764679, "learning_rate": 0.0001, "loss": 1.7313, "step": 882 }, { "epoch": 0.10143013037734766, "grad_norm": 0.3414922058582306, "learning_rate": 0.0001, "loss": 1.6468, "step": 883 }, { "epoch": 0.10154500028717478, "grad_norm": 0.3430386781692505, "learning_rate": 0.0001, "loss": 1.6358, "step": 884 }, { "epoch": 0.1016598701970019, "grad_norm": 0.38398411870002747, "learning_rate": 0.0001, "loss": 1.8655, "step": 885 }, { "epoch": 0.10177474010682902, "grad_norm": 0.35513511300086975, "learning_rate": 0.0001, "loss": 1.7441, "step": 886 }, { "epoch": 0.10188961001665614, "grad_norm": 0.37063589692115784, "learning_rate": 0.0001, "loss": 1.8402, "step": 887 }, { "epoch": 0.10200447992648326, "grad_norm": 0.3473655581474304, "learning_rate": 0.0001, "loss": 1.7719, "step": 888 }, { "epoch": 0.10211934983631038, "grad_norm": 0.3341835141181946, "learning_rate": 0.0001, "loss": 1.9165, "step": 889 }, { "epoch": 0.1022342197461375, "grad_norm": 0.33776983618736267, "learning_rate": 0.0001, "loss": 1.6519, "step": 890 }, { "epoch": 0.10234908965596462, "grad_norm": 0.34027940034866333, "learning_rate": 0.0001, "loss": 1.6075, "step": 891 }, { "epoch": 0.10246395956579174, "grad_norm": 0.3332427442073822, "learning_rate": 0.0001, "loss": 1.6832, "step": 892 }, { "epoch": 0.10257882947561886, "grad_norm": 0.3453330099582672, "learning_rate": 0.0001, "loss": 1.8543, "step": 893 }, { "epoch": 0.10269369938544598, "grad_norm": 0.3439124524593353, "learning_rate": 0.0001, "loss": 1.8294, "step": 894 }, { "epoch": 0.1028085692952731, "grad_norm": 0.3373720645904541, "learning_rate": 0.0001, "loss": 1.816, "step": 895 }, { "epoch": 0.10292343920510022, "grad_norm": 0.3701860308647156, "learning_rate": 0.0001, "loss": 1.6363, "step": 896 }, { "epoch": 0.10303830911492734, "grad_norm": 0.3510701358318329, "learning_rate": 0.0001, "loss": 1.8181, "step": 897 }, { "epoch": 0.10315317902475446, "grad_norm": 0.383064329624176, "learning_rate": 0.0001, "loss": 1.7649, "step": 898 }, { "epoch": 0.10326804893458158, "grad_norm": 0.33695435523986816, "learning_rate": 0.0001, "loss": 1.7667, "step": 899 }, { "epoch": 0.10338291884440871, "grad_norm": 0.32354670763015747, "learning_rate": 0.0001, "loss": 1.6521, "step": 900 }, { "epoch": 0.10349778875423583, "grad_norm": 0.3468325138092041, "learning_rate": 0.0001, "loss": 1.897, "step": 901 }, { "epoch": 0.10361265866406295, "grad_norm": 0.33674290776252747, "learning_rate": 0.0001, "loss": 1.7802, "step": 902 }, { "epoch": 0.10372752857389007, "grad_norm": 0.3250616490840912, "learning_rate": 0.0001, "loss": 1.7262, "step": 903 }, { "epoch": 0.1038423984837172, "grad_norm": 0.3321487009525299, "learning_rate": 0.0001, "loss": 1.7408, "step": 904 }, { "epoch": 0.10395726839354431, "grad_norm": 0.3137947916984558, "learning_rate": 0.0001, "loss": 1.6136, "step": 905 }, { "epoch": 0.10407213830337143, "grad_norm": 0.35805246233940125, "learning_rate": 0.0001, "loss": 1.7689, "step": 906 }, { "epoch": 0.10418700821319855, "grad_norm": 0.35889506340026855, "learning_rate": 0.0001, "loss": 1.8557, "step": 907 }, { "epoch": 0.10430187812302567, "grad_norm": 0.3665921092033386, "learning_rate": 0.0001, "loss": 1.8241, "step": 908 }, { "epoch": 0.1044167480328528, "grad_norm": 0.32144057750701904, "learning_rate": 0.0001, "loss": 1.6686, "step": 909 }, { "epoch": 0.10453161794267991, "grad_norm": 0.3347714841365814, "learning_rate": 0.0001, "loss": 1.5503, "step": 910 }, { "epoch": 0.10464648785250703, "grad_norm": 0.3488980233669281, "learning_rate": 0.0001, "loss": 1.8368, "step": 911 }, { "epoch": 0.10476135776233415, "grad_norm": 0.3683393895626068, "learning_rate": 0.0001, "loss": 1.8511, "step": 912 }, { "epoch": 0.10487622767216127, "grad_norm": 0.36709287762641907, "learning_rate": 0.0001, "loss": 1.8485, "step": 913 }, { "epoch": 0.1049910975819884, "grad_norm": 0.3464992642402649, "learning_rate": 0.0001, "loss": 1.8211, "step": 914 }, { "epoch": 0.10510596749181551, "grad_norm": 0.3259120285511017, "learning_rate": 0.0001, "loss": 1.6602, "step": 915 }, { "epoch": 0.10522083740164263, "grad_norm": 0.3253241777420044, "learning_rate": 0.0001, "loss": 1.721, "step": 916 }, { "epoch": 0.10533570731146977, "grad_norm": 0.3512059152126312, "learning_rate": 0.0001, "loss": 1.7504, "step": 917 }, { "epoch": 0.10545057722129689, "grad_norm": 0.34296149015426636, "learning_rate": 0.0001, "loss": 1.655, "step": 918 }, { "epoch": 0.10556544713112401, "grad_norm": 0.34532561898231506, "learning_rate": 0.0001, "loss": 1.7067, "step": 919 }, { "epoch": 0.10568031704095113, "grad_norm": 0.4030701816082001, "learning_rate": 0.0001, "loss": 1.8292, "step": 920 }, { "epoch": 0.10579518695077825, "grad_norm": 0.3220067322254181, "learning_rate": 0.0001, "loss": 1.7312, "step": 921 }, { "epoch": 0.10591005686060537, "grad_norm": 0.35036081075668335, "learning_rate": 0.0001, "loss": 1.9524, "step": 922 }, { "epoch": 0.10602492677043249, "grad_norm": 0.34477829933166504, "learning_rate": 0.0001, "loss": 1.6833, "step": 923 }, { "epoch": 0.10613979668025961, "grad_norm": 0.3312055468559265, "learning_rate": 0.0001, "loss": 1.7349, "step": 924 }, { "epoch": 0.10625466659008673, "grad_norm": 0.3699291944503784, "learning_rate": 0.0001, "loss": 1.8016, "step": 925 }, { "epoch": 0.10636953649991385, "grad_norm": 0.35141250491142273, "learning_rate": 0.0001, "loss": 1.6422, "step": 926 }, { "epoch": 0.10648440640974097, "grad_norm": 0.3546350300312042, "learning_rate": 0.0001, "loss": 1.792, "step": 927 }, { "epoch": 0.10659927631956809, "grad_norm": 0.32933250069618225, "learning_rate": 0.0001, "loss": 1.6196, "step": 928 }, { "epoch": 0.10671414622939521, "grad_norm": 0.31342101097106934, "learning_rate": 0.0001, "loss": 1.4959, "step": 929 }, { "epoch": 0.10682901613922233, "grad_norm": 0.3823557496070862, "learning_rate": 0.0001, "loss": 1.7735, "step": 930 }, { "epoch": 0.10694388604904945, "grad_norm": 0.37668943405151367, "learning_rate": 0.0001, "loss": 1.7153, "step": 931 }, { "epoch": 0.10705875595887657, "grad_norm": 0.3394898772239685, "learning_rate": 0.0001, "loss": 1.5666, "step": 932 }, { "epoch": 0.10717362586870369, "grad_norm": 0.3595438301563263, "learning_rate": 0.0001, "loss": 1.5773, "step": 933 }, { "epoch": 0.10728849577853082, "grad_norm": 0.3278176486492157, "learning_rate": 0.0001, "loss": 1.4859, "step": 934 }, { "epoch": 0.10740336568835794, "grad_norm": 0.35469111800193787, "learning_rate": 0.0001, "loss": 1.7812, "step": 935 }, { "epoch": 0.10751823559818506, "grad_norm": 0.35269084572792053, "learning_rate": 0.0001, "loss": 1.7469, "step": 936 }, { "epoch": 0.10763310550801218, "grad_norm": 0.3482814133167267, "learning_rate": 0.0001, "loss": 1.4346, "step": 937 }, { "epoch": 0.1077479754178393, "grad_norm": 0.3587512671947479, "learning_rate": 0.0001, "loss": 1.433, "step": 938 }, { "epoch": 0.10786284532766642, "grad_norm": 0.3574024736881256, "learning_rate": 0.0001, "loss": 1.9325, "step": 939 }, { "epoch": 0.10797771523749354, "grad_norm": 0.3392809331417084, "learning_rate": 0.0001, "loss": 1.7022, "step": 940 }, { "epoch": 0.10809258514732066, "grad_norm": 0.3584175109863281, "learning_rate": 0.0001, "loss": 1.6396, "step": 941 }, { "epoch": 0.10820745505714778, "grad_norm": 0.35646557807922363, "learning_rate": 0.0001, "loss": 1.7241, "step": 942 }, { "epoch": 0.1083223249669749, "grad_norm": 0.3471522331237793, "learning_rate": 0.0001, "loss": 1.7296, "step": 943 }, { "epoch": 0.10843719487680202, "grad_norm": 0.36520814895629883, "learning_rate": 0.0001, "loss": 1.7646, "step": 944 }, { "epoch": 0.10855206478662914, "grad_norm": 0.3618639409542084, "learning_rate": 0.0001, "loss": 1.879, "step": 945 }, { "epoch": 0.10866693469645626, "grad_norm": 0.3887125849723816, "learning_rate": 0.0001, "loss": 1.9636, "step": 946 }, { "epoch": 0.10878180460628338, "grad_norm": 0.36584070324897766, "learning_rate": 0.0001, "loss": 1.8166, "step": 947 }, { "epoch": 0.1088966745161105, "grad_norm": 0.4011528491973877, "learning_rate": 0.0001, "loss": 1.8934, "step": 948 }, { "epoch": 0.10901154442593762, "grad_norm": 0.35552287101745605, "learning_rate": 0.0001, "loss": 1.7606, "step": 949 }, { "epoch": 0.10912641433576474, "grad_norm": 0.3487032949924469, "learning_rate": 0.0001, "loss": 1.6729, "step": 950 }, { "epoch": 0.10924128424559186, "grad_norm": 0.34331613779067993, "learning_rate": 0.0001, "loss": 1.5127, "step": 951 }, { "epoch": 0.109356154155419, "grad_norm": 0.35324063897132874, "learning_rate": 0.0001, "loss": 1.7534, "step": 952 }, { "epoch": 0.10947102406524611, "grad_norm": 0.33275148272514343, "learning_rate": 0.0001, "loss": 1.6231, "step": 953 }, { "epoch": 0.10958589397507323, "grad_norm": 0.329111784696579, "learning_rate": 0.0001, "loss": 1.682, "step": 954 }, { "epoch": 0.10970076388490035, "grad_norm": 0.3550437092781067, "learning_rate": 0.0001, "loss": 1.8724, "step": 955 }, { "epoch": 0.10981563379472747, "grad_norm": 0.3454737663269043, "learning_rate": 0.0001, "loss": 1.7989, "step": 956 }, { "epoch": 0.1099305037045546, "grad_norm": 0.3605286777019501, "learning_rate": 0.0001, "loss": 1.7102, "step": 957 }, { "epoch": 0.11004537361438171, "grad_norm": 0.3494301438331604, "learning_rate": 0.0001, "loss": 1.6718, "step": 958 }, { "epoch": 0.11016024352420883, "grad_norm": 0.3383633494377136, "learning_rate": 0.0001, "loss": 1.8941, "step": 959 }, { "epoch": 0.11027511343403595, "grad_norm": 0.3342844247817993, "learning_rate": 0.0001, "loss": 1.7833, "step": 960 }, { "epoch": 0.11038998334386307, "grad_norm": 0.3412388861179352, "learning_rate": 0.0001, "loss": 1.6875, "step": 961 }, { "epoch": 0.1105048532536902, "grad_norm": 0.34491872787475586, "learning_rate": 0.0001, "loss": 1.6936, "step": 962 }, { "epoch": 0.11061972316351731, "grad_norm": 0.3250080943107605, "learning_rate": 0.0001, "loss": 1.5619, "step": 963 }, { "epoch": 0.11073459307334443, "grad_norm": 0.3618282973766327, "learning_rate": 0.0001, "loss": 1.7687, "step": 964 }, { "epoch": 0.11084946298317155, "grad_norm": 0.37689584493637085, "learning_rate": 0.0001, "loss": 1.6914, "step": 965 }, { "epoch": 0.11096433289299867, "grad_norm": 0.35625600814819336, "learning_rate": 0.0001, "loss": 1.7087, "step": 966 }, { "epoch": 0.1110792028028258, "grad_norm": 0.36949923634529114, "learning_rate": 0.0001, "loss": 1.8479, "step": 967 }, { "epoch": 0.11119407271265291, "grad_norm": 0.32159894704818726, "learning_rate": 0.0001, "loss": 1.7816, "step": 968 }, { "epoch": 0.11130894262248005, "grad_norm": 0.35336339473724365, "learning_rate": 0.0001, "loss": 1.7206, "step": 969 }, { "epoch": 0.11142381253230717, "grad_norm": 0.3411477506160736, "learning_rate": 0.0001, "loss": 1.7505, "step": 970 }, { "epoch": 0.11153868244213429, "grad_norm": 0.37740951776504517, "learning_rate": 0.0001, "loss": 1.8982, "step": 971 }, { "epoch": 0.11165355235196141, "grad_norm": 0.32587164640426636, "learning_rate": 0.0001, "loss": 1.7459, "step": 972 }, { "epoch": 0.11176842226178853, "grad_norm": 0.40314409136772156, "learning_rate": 0.0001, "loss": 1.752, "step": 973 }, { "epoch": 0.11188329217161565, "grad_norm": 0.3304173946380615, "learning_rate": 0.0001, "loss": 1.5715, "step": 974 }, { "epoch": 0.11199816208144277, "grad_norm": 0.3680497407913208, "learning_rate": 0.0001, "loss": 1.6852, "step": 975 }, { "epoch": 0.11211303199126989, "grad_norm": 0.3391849100589752, "learning_rate": 0.0001, "loss": 1.7026, "step": 976 }, { "epoch": 0.11222790190109701, "grad_norm": 0.37164103984832764, "learning_rate": 0.0001, "loss": 1.847, "step": 977 }, { "epoch": 0.11234277181092413, "grad_norm": 0.3532995581626892, "learning_rate": 0.0001, "loss": 1.8528, "step": 978 }, { "epoch": 0.11245764172075125, "grad_norm": 0.35095518827438354, "learning_rate": 0.0001, "loss": 1.8725, "step": 979 }, { "epoch": 0.11257251163057837, "grad_norm": 0.3317796289920807, "learning_rate": 0.0001, "loss": 1.6293, "step": 980 }, { "epoch": 0.11268738154040549, "grad_norm": 0.3477189838886261, "learning_rate": 0.0001, "loss": 1.6846, "step": 981 }, { "epoch": 0.11280225145023261, "grad_norm": 0.34023842215538025, "learning_rate": 0.0001, "loss": 1.6921, "step": 982 }, { "epoch": 0.11291712136005973, "grad_norm": 0.3701896369457245, "learning_rate": 0.0001, "loss": 1.545, "step": 983 }, { "epoch": 0.11303199126988685, "grad_norm": 0.3711189329624176, "learning_rate": 0.0001, "loss": 1.4718, "step": 984 }, { "epoch": 0.11314686117971397, "grad_norm": 0.35467204451560974, "learning_rate": 0.0001, "loss": 1.7369, "step": 985 }, { "epoch": 0.1132617310895411, "grad_norm": 0.3388875722885132, "learning_rate": 0.0001, "loss": 1.7018, "step": 986 }, { "epoch": 0.11337660099936822, "grad_norm": 0.34032291173934937, "learning_rate": 0.0001, "loss": 1.6629, "step": 987 }, { "epoch": 0.11349147090919534, "grad_norm": 0.3326657712459564, "learning_rate": 0.0001, "loss": 1.5377, "step": 988 }, { "epoch": 0.11360634081902246, "grad_norm": 0.3543054163455963, "learning_rate": 0.0001, "loss": 1.6426, "step": 989 }, { "epoch": 0.11372121072884958, "grad_norm": 0.3578903377056122, "learning_rate": 0.0001, "loss": 1.7261, "step": 990 }, { "epoch": 0.1138360806386767, "grad_norm": 0.3672734498977661, "learning_rate": 0.0001, "loss": 1.8798, "step": 991 }, { "epoch": 0.11395095054850382, "grad_norm": 0.3719589114189148, "learning_rate": 0.0001, "loss": 1.6408, "step": 992 }, { "epoch": 0.11406582045833094, "grad_norm": 0.3530134856700897, "learning_rate": 0.0001, "loss": 1.7737, "step": 993 }, { "epoch": 0.11418069036815806, "grad_norm": 0.371442586183548, "learning_rate": 0.0001, "loss": 1.5202, "step": 994 }, { "epoch": 0.11429556027798518, "grad_norm": 0.33966636657714844, "learning_rate": 0.0001, "loss": 1.8347, "step": 995 }, { "epoch": 0.1144104301878123, "grad_norm": 0.3425755202770233, "learning_rate": 0.0001, "loss": 1.7236, "step": 996 }, { "epoch": 0.11452530009763942, "grad_norm": 0.3630322515964508, "learning_rate": 0.0001, "loss": 1.6771, "step": 997 }, { "epoch": 0.11464017000746654, "grad_norm": 0.31861042976379395, "learning_rate": 0.0001, "loss": 1.5433, "step": 998 }, { "epoch": 0.11475503991729366, "grad_norm": 0.34319064021110535, "learning_rate": 0.0001, "loss": 1.7079, "step": 999 }, { "epoch": 0.11486990982712078, "grad_norm": 0.33769530057907104, "learning_rate": 0.0001, "loss": 1.6176, "step": 1000 }, { "epoch": 0.1149847797369479, "grad_norm": 0.3288537859916687, "learning_rate": 0.0001, "loss": 1.4786, "step": 1001 }, { "epoch": 0.11509964964677502, "grad_norm": 0.41031739115715027, "learning_rate": 0.0001, "loss": 1.7241, "step": 1002 }, { "epoch": 0.11521451955660215, "grad_norm": 0.33957231044769287, "learning_rate": 0.0001, "loss": 1.608, "step": 1003 }, { "epoch": 0.11532938946642927, "grad_norm": 0.35768911242485046, "learning_rate": 0.0001, "loss": 1.8097, "step": 1004 }, { "epoch": 0.1154442593762564, "grad_norm": 0.3527219593524933, "learning_rate": 0.0001, "loss": 1.6337, "step": 1005 }, { "epoch": 0.11555912928608351, "grad_norm": 0.3403669595718384, "learning_rate": 0.0001, "loss": 1.7182, "step": 1006 }, { "epoch": 0.11567399919591063, "grad_norm": 0.33553534746170044, "learning_rate": 0.0001, "loss": 1.7559, "step": 1007 }, { "epoch": 0.11578886910573775, "grad_norm": 0.39612364768981934, "learning_rate": 0.0001, "loss": 2.1183, "step": 1008 }, { "epoch": 0.11590373901556487, "grad_norm": 0.3312492072582245, "learning_rate": 0.0001, "loss": 1.6856, "step": 1009 }, { "epoch": 0.116018608925392, "grad_norm": 0.35512349009513855, "learning_rate": 0.0001, "loss": 1.8648, "step": 1010 }, { "epoch": 0.11613347883521911, "grad_norm": 0.34850746393203735, "learning_rate": 0.0001, "loss": 1.7185, "step": 1011 }, { "epoch": 0.11624834874504623, "grad_norm": 0.34042853116989136, "learning_rate": 0.0001, "loss": 1.5852, "step": 1012 }, { "epoch": 0.11636321865487335, "grad_norm": 0.3678249716758728, "learning_rate": 0.0001, "loss": 1.9585, "step": 1013 }, { "epoch": 0.11647808856470047, "grad_norm": 0.38127046823501587, "learning_rate": 0.0001, "loss": 1.7736, "step": 1014 }, { "epoch": 0.1165929584745276, "grad_norm": 0.33116331696510315, "learning_rate": 0.0001, "loss": 1.6812, "step": 1015 }, { "epoch": 0.11670782838435471, "grad_norm": 0.3540381193161011, "learning_rate": 0.0001, "loss": 1.666, "step": 1016 }, { "epoch": 0.11682269829418183, "grad_norm": 0.34696200489997864, "learning_rate": 0.0001, "loss": 1.88, "step": 1017 }, { "epoch": 0.11693756820400895, "grad_norm": 0.3833637833595276, "learning_rate": 0.0001, "loss": 1.9577, "step": 1018 }, { "epoch": 0.11705243811383607, "grad_norm": 0.34638655185699463, "learning_rate": 0.0001, "loss": 1.7792, "step": 1019 }, { "epoch": 0.11716730802366321, "grad_norm": 0.3427535593509674, "learning_rate": 0.0001, "loss": 1.5908, "step": 1020 }, { "epoch": 0.11728217793349033, "grad_norm": 0.3441438376903534, "learning_rate": 0.0001, "loss": 1.7116, "step": 1021 }, { "epoch": 0.11739704784331745, "grad_norm": 0.3629339039325714, "learning_rate": 0.0001, "loss": 1.7716, "step": 1022 }, { "epoch": 0.11751191775314457, "grad_norm": 0.35050028562545776, "learning_rate": 0.0001, "loss": 1.7887, "step": 1023 }, { "epoch": 0.11762678766297169, "grad_norm": 0.3770378828048706, "learning_rate": 0.0001, "loss": 1.9182, "step": 1024 }, { "epoch": 0.11774165757279881, "grad_norm": 0.3678201138973236, "learning_rate": 0.0001, "loss": 1.7746, "step": 1025 }, { "epoch": 0.11785652748262593, "grad_norm": 0.3541167080402374, "learning_rate": 0.0001, "loss": 1.7004, "step": 1026 }, { "epoch": 0.11797139739245305, "grad_norm": 0.34430310130119324, "learning_rate": 0.0001, "loss": 1.6821, "step": 1027 }, { "epoch": 0.11808626730228017, "grad_norm": 0.35517948865890503, "learning_rate": 0.0001, "loss": 1.8264, "step": 1028 }, { "epoch": 0.11820113721210729, "grad_norm": 0.35066622495651245, "learning_rate": 0.0001, "loss": 1.6285, "step": 1029 }, { "epoch": 0.11831600712193441, "grad_norm": 0.3344581425189972, "learning_rate": 0.0001, "loss": 1.6052, "step": 1030 }, { "epoch": 0.11843087703176153, "grad_norm": 0.3826616406440735, "learning_rate": 0.0001, "loss": 1.74, "step": 1031 }, { "epoch": 0.11854574694158865, "grad_norm": 0.3551161587238312, "learning_rate": 0.0001, "loss": 1.7829, "step": 1032 }, { "epoch": 0.11866061685141577, "grad_norm": 0.3458511233329773, "learning_rate": 0.0001, "loss": 1.7166, "step": 1033 }, { "epoch": 0.11877548676124289, "grad_norm": 0.38753172755241394, "learning_rate": 0.0001, "loss": 1.5937, "step": 1034 }, { "epoch": 0.11889035667107001, "grad_norm": 0.36152660846710205, "learning_rate": 0.0001, "loss": 1.7422, "step": 1035 }, { "epoch": 0.11900522658089713, "grad_norm": 0.3490775525569916, "learning_rate": 0.0001, "loss": 1.7268, "step": 1036 }, { "epoch": 0.11912009649072426, "grad_norm": 0.3666988015174866, "learning_rate": 0.0001, "loss": 1.7706, "step": 1037 }, { "epoch": 0.11923496640055138, "grad_norm": 0.3366585671901703, "learning_rate": 0.0001, "loss": 1.5956, "step": 1038 }, { "epoch": 0.1193498363103785, "grad_norm": 0.36800581216812134, "learning_rate": 0.0001, "loss": 1.7542, "step": 1039 }, { "epoch": 0.11946470622020562, "grad_norm": 0.36081400513648987, "learning_rate": 0.0001, "loss": 1.882, "step": 1040 }, { "epoch": 0.11957957613003274, "grad_norm": 0.3410419821739197, "learning_rate": 0.0001, "loss": 1.6178, "step": 1041 }, { "epoch": 0.11969444603985986, "grad_norm": 0.3386545777320862, "learning_rate": 0.0001, "loss": 1.6777, "step": 1042 }, { "epoch": 0.11980931594968698, "grad_norm": 0.31798994541168213, "learning_rate": 0.0001, "loss": 1.5473, "step": 1043 }, { "epoch": 0.1199241858595141, "grad_norm": 0.3879840672016144, "learning_rate": 0.0001, "loss": 1.9508, "step": 1044 }, { "epoch": 0.12003905576934122, "grad_norm": 0.3498954176902771, "learning_rate": 0.0001, "loss": 1.6588, "step": 1045 }, { "epoch": 0.12015392567916834, "grad_norm": 0.3412124216556549, "learning_rate": 0.0001, "loss": 1.6106, "step": 1046 }, { "epoch": 0.12026879558899546, "grad_norm": 0.35735565423965454, "learning_rate": 0.0001, "loss": 1.7393, "step": 1047 }, { "epoch": 0.12038366549882258, "grad_norm": 0.339915931224823, "learning_rate": 0.0001, "loss": 1.7161, "step": 1048 }, { "epoch": 0.1204985354086497, "grad_norm": 0.3441682457923889, "learning_rate": 0.0001, "loss": 1.7686, "step": 1049 }, { "epoch": 0.12061340531847682, "grad_norm": 0.36500778794288635, "learning_rate": 0.0001, "loss": 1.7857, "step": 1050 }, { "epoch": 0.12072827522830394, "grad_norm": 0.34607526659965515, "learning_rate": 0.0001, "loss": 1.8434, "step": 1051 }, { "epoch": 0.12084314513813106, "grad_norm": 0.33628061413764954, "learning_rate": 0.0001, "loss": 1.7286, "step": 1052 }, { "epoch": 0.12095801504795818, "grad_norm": 0.3468761146068573, "learning_rate": 0.0001, "loss": 1.8219, "step": 1053 }, { "epoch": 0.12107288495778532, "grad_norm": 0.34483084082603455, "learning_rate": 0.0001, "loss": 1.7694, "step": 1054 }, { "epoch": 0.12118775486761243, "grad_norm": 0.3372325003147125, "learning_rate": 0.0001, "loss": 1.6335, "step": 1055 }, { "epoch": 0.12130262477743955, "grad_norm": 0.33837631344795227, "learning_rate": 0.0001, "loss": 1.6369, "step": 1056 }, { "epoch": 0.12141749468726667, "grad_norm": 0.3314455449581146, "learning_rate": 0.0001, "loss": 1.6946, "step": 1057 }, { "epoch": 0.1215323645970938, "grad_norm": 0.33914583921432495, "learning_rate": 0.0001, "loss": 1.6406, "step": 1058 }, { "epoch": 0.12164723450692091, "grad_norm": 0.33204326033592224, "learning_rate": 0.0001, "loss": 1.6924, "step": 1059 }, { "epoch": 0.12176210441674803, "grad_norm": 0.3244878649711609, "learning_rate": 0.0001, "loss": 1.622, "step": 1060 }, { "epoch": 0.12187697432657515, "grad_norm": 0.37732627987861633, "learning_rate": 0.0001, "loss": 1.7643, "step": 1061 }, { "epoch": 0.12199184423640227, "grad_norm": 0.3585972785949707, "learning_rate": 0.0001, "loss": 1.6568, "step": 1062 }, { "epoch": 0.1221067141462294, "grad_norm": 0.36080244183540344, "learning_rate": 0.0001, "loss": 1.5489, "step": 1063 }, { "epoch": 0.12222158405605651, "grad_norm": 0.3640299141407013, "learning_rate": 0.0001, "loss": 1.6219, "step": 1064 }, { "epoch": 0.12233645396588363, "grad_norm": 0.3357522189617157, "learning_rate": 0.0001, "loss": 1.5328, "step": 1065 }, { "epoch": 0.12245132387571075, "grad_norm": 0.37440431118011475, "learning_rate": 0.0001, "loss": 1.4352, "step": 1066 }, { "epoch": 0.12256619378553787, "grad_norm": 0.3403536379337311, "learning_rate": 0.0001, "loss": 1.7168, "step": 1067 }, { "epoch": 0.122681063695365, "grad_norm": 0.34183967113494873, "learning_rate": 0.0001, "loss": 1.6223, "step": 1068 }, { "epoch": 0.12279593360519211, "grad_norm": 0.34776198863983154, "learning_rate": 0.0001, "loss": 1.7219, "step": 1069 }, { "epoch": 0.12291080351501923, "grad_norm": 0.36906319856643677, "learning_rate": 0.0001, "loss": 1.8587, "step": 1070 }, { "epoch": 0.12302567342484635, "grad_norm": 0.33320197463035583, "learning_rate": 0.0001, "loss": 1.6826, "step": 1071 }, { "epoch": 0.12314054333467349, "grad_norm": 0.35025712847709656, "learning_rate": 0.0001, "loss": 1.7588, "step": 1072 }, { "epoch": 0.12325541324450061, "grad_norm": 0.33611828088760376, "learning_rate": 0.0001, "loss": 1.6779, "step": 1073 }, { "epoch": 0.12337028315432773, "grad_norm": 0.34194374084472656, "learning_rate": 0.0001, "loss": 1.6713, "step": 1074 }, { "epoch": 0.12348515306415485, "grad_norm": 0.3618099093437195, "learning_rate": 0.0001, "loss": 1.928, "step": 1075 }, { "epoch": 0.12360002297398197, "grad_norm": 0.36288702487945557, "learning_rate": 0.0001, "loss": 1.825, "step": 1076 }, { "epoch": 0.12371489288380909, "grad_norm": 0.3502649664878845, "learning_rate": 0.0001, "loss": 1.805, "step": 1077 }, { "epoch": 0.12382976279363621, "grad_norm": 0.3414769470691681, "learning_rate": 0.0001, "loss": 1.769, "step": 1078 }, { "epoch": 0.12394463270346333, "grad_norm": 0.3354913592338562, "learning_rate": 0.0001, "loss": 1.8155, "step": 1079 }, { "epoch": 0.12405950261329045, "grad_norm": 0.34553056955337524, "learning_rate": 0.0001, "loss": 1.5901, "step": 1080 }, { "epoch": 0.12417437252311757, "grad_norm": 0.3307434320449829, "learning_rate": 0.0001, "loss": 1.564, "step": 1081 }, { "epoch": 0.12428924243294469, "grad_norm": 0.3462519347667694, "learning_rate": 0.0001, "loss": 1.7164, "step": 1082 }, { "epoch": 0.12440411234277181, "grad_norm": 0.3434395492076874, "learning_rate": 0.0001, "loss": 1.9303, "step": 1083 }, { "epoch": 0.12451898225259893, "grad_norm": 0.32828354835510254, "learning_rate": 0.0001, "loss": 1.5756, "step": 1084 }, { "epoch": 0.12463385216242605, "grad_norm": 0.3581448793411255, "learning_rate": 0.0001, "loss": 1.7733, "step": 1085 }, { "epoch": 0.12474872207225317, "grad_norm": 0.33728450536727905, "learning_rate": 0.0001, "loss": 1.6101, "step": 1086 }, { "epoch": 0.12486359198208029, "grad_norm": 0.36441946029663086, "learning_rate": 0.0001, "loss": 1.8154, "step": 1087 }, { "epoch": 0.12497846189190741, "grad_norm": 0.3511005640029907, "learning_rate": 0.0001, "loss": 1.8249, "step": 1088 }, { "epoch": 0.12509333180173454, "grad_norm": 0.36078888177871704, "learning_rate": 0.0001, "loss": 1.7089, "step": 1089 }, { "epoch": 0.12520820171156166, "grad_norm": 0.3429834246635437, "learning_rate": 0.0001, "loss": 1.66, "step": 1090 }, { "epoch": 0.12532307162138878, "grad_norm": 0.3412310779094696, "learning_rate": 0.0001, "loss": 1.704, "step": 1091 }, { "epoch": 0.1254379415312159, "grad_norm": 0.377566933631897, "learning_rate": 0.0001, "loss": 2.0321, "step": 1092 }, { "epoch": 0.12555281144104302, "grad_norm": 0.3425246477127075, "learning_rate": 0.0001, "loss": 1.6953, "step": 1093 }, { "epoch": 0.12566768135087014, "grad_norm": 0.36676526069641113, "learning_rate": 0.0001, "loss": 1.5699, "step": 1094 }, { "epoch": 0.12578255126069726, "grad_norm": 0.3748563528060913, "learning_rate": 0.0001, "loss": 1.6054, "step": 1095 }, { "epoch": 0.12589742117052438, "grad_norm": 0.3399527370929718, "learning_rate": 0.0001, "loss": 1.5754, "step": 1096 }, { "epoch": 0.1260122910803515, "grad_norm": 0.32709023356437683, "learning_rate": 0.0001, "loss": 1.606, "step": 1097 }, { "epoch": 0.12612716099017862, "grad_norm": 0.3199503421783447, "learning_rate": 0.0001, "loss": 1.5727, "step": 1098 }, { "epoch": 0.12624203090000574, "grad_norm": 0.36548885703086853, "learning_rate": 0.0001, "loss": 1.5882, "step": 1099 }, { "epoch": 0.12635690080983286, "grad_norm": 0.35938435792922974, "learning_rate": 0.0001, "loss": 1.8585, "step": 1100 }, { "epoch": 0.12647177071965998, "grad_norm": 0.3288070857524872, "learning_rate": 0.0001, "loss": 1.8322, "step": 1101 }, { "epoch": 0.1265866406294871, "grad_norm": 0.3379286527633667, "learning_rate": 0.0001, "loss": 1.7366, "step": 1102 }, { "epoch": 0.12670151053931422, "grad_norm": 0.38364285230636597, "learning_rate": 0.0001, "loss": 1.9132, "step": 1103 }, { "epoch": 0.12681638044914134, "grad_norm": 0.348711222410202, "learning_rate": 0.0001, "loss": 1.8004, "step": 1104 }, { "epoch": 0.12693125035896846, "grad_norm": 0.3605641722679138, "learning_rate": 0.0001, "loss": 1.7316, "step": 1105 }, { "epoch": 0.12704612026879558, "grad_norm": 0.3259005546569824, "learning_rate": 0.0001, "loss": 1.7338, "step": 1106 }, { "epoch": 0.1271609901786227, "grad_norm": 0.374476820230484, "learning_rate": 0.0001, "loss": 1.9204, "step": 1107 }, { "epoch": 0.12727586008844982, "grad_norm": 0.36476418375968933, "learning_rate": 0.0001, "loss": 1.7403, "step": 1108 }, { "epoch": 0.12739072999827694, "grad_norm": 0.3357693552970886, "learning_rate": 0.0001, "loss": 1.6227, "step": 1109 }, { "epoch": 0.12750559990810406, "grad_norm": 0.34735891222953796, "learning_rate": 0.0001, "loss": 1.5068, "step": 1110 }, { "epoch": 0.12762046981793118, "grad_norm": 0.34894609451293945, "learning_rate": 0.0001, "loss": 1.6742, "step": 1111 }, { "epoch": 0.1277353397277583, "grad_norm": 0.34470030665397644, "learning_rate": 0.0001, "loss": 1.6905, "step": 1112 }, { "epoch": 0.12785020963758542, "grad_norm": 0.34968432784080505, "learning_rate": 0.0001, "loss": 1.535, "step": 1113 }, { "epoch": 0.12796507954741257, "grad_norm": 0.35253041982650757, "learning_rate": 0.0001, "loss": 1.5959, "step": 1114 }, { "epoch": 0.1280799494572397, "grad_norm": 0.36112409830093384, "learning_rate": 0.0001, "loss": 1.7485, "step": 1115 }, { "epoch": 0.1281948193670668, "grad_norm": 0.361447274684906, "learning_rate": 0.0001, "loss": 1.8199, "step": 1116 }, { "epoch": 0.12830968927689393, "grad_norm": 0.3329083323478699, "learning_rate": 0.0001, "loss": 1.8049, "step": 1117 }, { "epoch": 0.12842455918672105, "grad_norm": 0.37761425971984863, "learning_rate": 0.0001, "loss": 1.8367, "step": 1118 }, { "epoch": 0.12853942909654817, "grad_norm": 0.36730146408081055, "learning_rate": 0.0001, "loss": 1.7953, "step": 1119 }, { "epoch": 0.1286542990063753, "grad_norm": 0.3333646357059479, "learning_rate": 0.0001, "loss": 1.6046, "step": 1120 }, { "epoch": 0.1287691689162024, "grad_norm": 0.37335070967674255, "learning_rate": 0.0001, "loss": 1.9296, "step": 1121 }, { "epoch": 0.12888403882602953, "grad_norm": 0.36722204089164734, "learning_rate": 0.0001, "loss": 1.7312, "step": 1122 }, { "epoch": 0.12899890873585665, "grad_norm": 0.34996774792671204, "learning_rate": 0.0001, "loss": 1.7276, "step": 1123 }, { "epoch": 0.12911377864568377, "grad_norm": 0.3631379306316376, "learning_rate": 0.0001, "loss": 1.7279, "step": 1124 }, { "epoch": 0.1292286485555109, "grad_norm": 0.3820423483848572, "learning_rate": 0.0001, "loss": 1.7714, "step": 1125 }, { "epoch": 0.129343518465338, "grad_norm": 0.3379541039466858, "learning_rate": 0.0001, "loss": 1.6302, "step": 1126 }, { "epoch": 0.12945838837516513, "grad_norm": 0.35675719380378723, "learning_rate": 0.0001, "loss": 1.5973, "step": 1127 }, { "epoch": 0.12957325828499225, "grad_norm": 0.34876230359077454, "learning_rate": 0.0001, "loss": 1.6188, "step": 1128 }, { "epoch": 0.12968812819481937, "grad_norm": 0.35678061842918396, "learning_rate": 0.0001, "loss": 1.8694, "step": 1129 }, { "epoch": 0.1298029981046465, "grad_norm": 0.3658314347267151, "learning_rate": 0.0001, "loss": 1.9091, "step": 1130 }, { "epoch": 0.1299178680144736, "grad_norm": 0.3842300474643707, "learning_rate": 0.0001, "loss": 1.7441, "step": 1131 }, { "epoch": 0.13003273792430073, "grad_norm": 0.3557139039039612, "learning_rate": 0.0001, "loss": 1.9989, "step": 1132 }, { "epoch": 0.13014760783412785, "grad_norm": 0.3451981246471405, "learning_rate": 0.0001, "loss": 1.5792, "step": 1133 }, { "epoch": 0.13026247774395497, "grad_norm": 0.34899675846099854, "learning_rate": 0.0001, "loss": 1.632, "step": 1134 }, { "epoch": 0.1303773476537821, "grad_norm": 0.3958018124103546, "learning_rate": 0.0001, "loss": 2.2029, "step": 1135 }, { "epoch": 0.1304922175636092, "grad_norm": 0.33789554238319397, "learning_rate": 0.0001, "loss": 1.677, "step": 1136 }, { "epoch": 0.13060708747343633, "grad_norm": 0.3390919864177704, "learning_rate": 0.0001, "loss": 1.7488, "step": 1137 }, { "epoch": 0.13072195738326345, "grad_norm": 0.32724031805992126, "learning_rate": 0.0001, "loss": 1.6631, "step": 1138 }, { "epoch": 0.13083682729309057, "grad_norm": 0.35171130299568176, "learning_rate": 0.0001, "loss": 1.8461, "step": 1139 }, { "epoch": 0.1309516972029177, "grad_norm": 0.37492451071739197, "learning_rate": 0.0001, "loss": 1.6274, "step": 1140 }, { "epoch": 0.1310665671127448, "grad_norm": 0.3509044051170349, "learning_rate": 0.0001, "loss": 1.6438, "step": 1141 }, { "epoch": 0.13118143702257193, "grad_norm": 0.379950612783432, "learning_rate": 0.0001, "loss": 1.8488, "step": 1142 }, { "epoch": 0.13129630693239905, "grad_norm": 0.3770541846752167, "learning_rate": 0.0001, "loss": 1.9027, "step": 1143 }, { "epoch": 0.13141117684222617, "grad_norm": 0.3268395960330963, "learning_rate": 0.0001, "loss": 1.6642, "step": 1144 }, { "epoch": 0.1315260467520533, "grad_norm": 0.3465530276298523, "learning_rate": 0.0001, "loss": 1.5535, "step": 1145 }, { "epoch": 0.1316409166618804, "grad_norm": 0.3526296317577362, "learning_rate": 0.0001, "loss": 1.9214, "step": 1146 }, { "epoch": 0.13175578657170753, "grad_norm": 0.3607937693595886, "learning_rate": 0.0001, "loss": 1.9947, "step": 1147 }, { "epoch": 0.13187065648153465, "grad_norm": 0.35963696241378784, "learning_rate": 0.0001, "loss": 1.7277, "step": 1148 }, { "epoch": 0.1319855263913618, "grad_norm": 0.3521120846271515, "learning_rate": 0.0001, "loss": 1.7295, "step": 1149 }, { "epoch": 0.13210039630118892, "grad_norm": 0.34942013025283813, "learning_rate": 0.0001, "loss": 1.6966, "step": 1150 }, { "epoch": 0.13221526621101604, "grad_norm": 0.3466810882091522, "learning_rate": 0.0001, "loss": 1.5861, "step": 1151 }, { "epoch": 0.13233013612084316, "grad_norm": 0.36123740673065186, "learning_rate": 0.0001, "loss": 1.7322, "step": 1152 }, { "epoch": 0.13244500603067028, "grad_norm": 0.36202317476272583, "learning_rate": 0.0001, "loss": 1.844, "step": 1153 }, { "epoch": 0.1325598759404974, "grad_norm": 0.3521657884120941, "learning_rate": 0.0001, "loss": 1.778, "step": 1154 }, { "epoch": 0.13267474585032452, "grad_norm": 0.35501620173454285, "learning_rate": 0.0001, "loss": 1.7139, "step": 1155 }, { "epoch": 0.13278961576015164, "grad_norm": 0.35365980863571167, "learning_rate": 0.0001, "loss": 1.5055, "step": 1156 }, { "epoch": 0.13290448566997876, "grad_norm": 0.3492221534252167, "learning_rate": 0.0001, "loss": 1.7555, "step": 1157 }, { "epoch": 0.13301935557980588, "grad_norm": 0.4094052016735077, "learning_rate": 0.0001, "loss": 1.7331, "step": 1158 }, { "epoch": 0.133134225489633, "grad_norm": 0.385098934173584, "learning_rate": 0.0001, "loss": 1.6894, "step": 1159 }, { "epoch": 0.13324909539946012, "grad_norm": 0.3582170605659485, "learning_rate": 0.0001, "loss": 1.8903, "step": 1160 }, { "epoch": 0.13336396530928724, "grad_norm": 0.35824549198150635, "learning_rate": 0.0001, "loss": 1.6978, "step": 1161 }, { "epoch": 0.13347883521911436, "grad_norm": 0.36423972249031067, "learning_rate": 0.0001, "loss": 1.7682, "step": 1162 }, { "epoch": 0.13359370512894148, "grad_norm": 0.3333613872528076, "learning_rate": 0.0001, "loss": 1.7676, "step": 1163 }, { "epoch": 0.1337085750387686, "grad_norm": 0.3693676292896271, "learning_rate": 0.0001, "loss": 1.8687, "step": 1164 }, { "epoch": 0.13382344494859572, "grad_norm": 0.36510157585144043, "learning_rate": 0.0001, "loss": 1.8022, "step": 1165 }, { "epoch": 0.13393831485842284, "grad_norm": 0.35315313935279846, "learning_rate": 0.0001, "loss": 1.6586, "step": 1166 }, { "epoch": 0.13405318476824996, "grad_norm": 0.3328644037246704, "learning_rate": 0.0001, "loss": 1.5863, "step": 1167 }, { "epoch": 0.13416805467807708, "grad_norm": 0.34875044226646423, "learning_rate": 0.0001, "loss": 1.6538, "step": 1168 }, { "epoch": 0.1342829245879042, "grad_norm": 0.3806982636451721, "learning_rate": 0.0001, "loss": 1.8317, "step": 1169 }, { "epoch": 0.13439779449773132, "grad_norm": 0.35632237792015076, "learning_rate": 0.0001, "loss": 1.757, "step": 1170 }, { "epoch": 0.13451266440755844, "grad_norm": 0.3340839147567749, "learning_rate": 0.0001, "loss": 1.6582, "step": 1171 }, { "epoch": 0.13462753431738556, "grad_norm": 0.3517102599143982, "learning_rate": 0.0001, "loss": 1.6821, "step": 1172 }, { "epoch": 0.13474240422721268, "grad_norm": 0.33846724033355713, "learning_rate": 0.0001, "loss": 1.7031, "step": 1173 }, { "epoch": 0.1348572741370398, "grad_norm": 0.3715790808200836, "learning_rate": 0.0001, "loss": 1.6864, "step": 1174 }, { "epoch": 0.13497214404686692, "grad_norm": 0.30750709772109985, "learning_rate": 0.0001, "loss": 1.4213, "step": 1175 }, { "epoch": 0.13508701395669404, "grad_norm": 0.35879287123680115, "learning_rate": 0.0001, "loss": 1.6197, "step": 1176 }, { "epoch": 0.13520188386652116, "grad_norm": 0.35407108068466187, "learning_rate": 0.0001, "loss": 1.6578, "step": 1177 }, { "epoch": 0.13531675377634828, "grad_norm": 0.35184237360954285, "learning_rate": 0.0001, "loss": 1.7593, "step": 1178 }, { "epoch": 0.1354316236861754, "grad_norm": 0.3242950737476349, "learning_rate": 0.0001, "loss": 1.6438, "step": 1179 }, { "epoch": 0.13554649359600252, "grad_norm": 0.3433818817138672, "learning_rate": 0.0001, "loss": 1.568, "step": 1180 }, { "epoch": 0.13566136350582964, "grad_norm": 0.34305769205093384, "learning_rate": 0.0001, "loss": 1.5726, "step": 1181 }, { "epoch": 0.13577623341565676, "grad_norm": 0.35595643520355225, "learning_rate": 0.0001, "loss": 1.7659, "step": 1182 }, { "epoch": 0.1358911033254839, "grad_norm": 0.3613067865371704, "learning_rate": 0.0001, "loss": 1.8036, "step": 1183 }, { "epoch": 0.13600597323531102, "grad_norm": 0.38018158078193665, "learning_rate": 0.0001, "loss": 1.9313, "step": 1184 }, { "epoch": 0.13612084314513814, "grad_norm": 0.3490848243236542, "learning_rate": 0.0001, "loss": 1.7979, "step": 1185 }, { "epoch": 0.13623571305496526, "grad_norm": 0.35665586590766907, "learning_rate": 0.0001, "loss": 1.7383, "step": 1186 }, { "epoch": 0.13635058296479238, "grad_norm": 0.3440529704093933, "learning_rate": 0.0001, "loss": 1.5626, "step": 1187 }, { "epoch": 0.1364654528746195, "grad_norm": 0.31870830059051514, "learning_rate": 0.0001, "loss": 1.5472, "step": 1188 }, { "epoch": 0.13658032278444662, "grad_norm": 0.3264746367931366, "learning_rate": 0.0001, "loss": 1.6856, "step": 1189 }, { "epoch": 0.13669519269427374, "grad_norm": 0.364711731672287, "learning_rate": 0.0001, "loss": 1.662, "step": 1190 }, { "epoch": 0.13681006260410086, "grad_norm": 0.34434038400650024, "learning_rate": 0.0001, "loss": 1.6279, "step": 1191 }, { "epoch": 0.13692493251392798, "grad_norm": 0.36517781019210815, "learning_rate": 0.0001, "loss": 1.8602, "step": 1192 }, { "epoch": 0.1370398024237551, "grad_norm": 0.36987563967704773, "learning_rate": 0.0001, "loss": 1.7108, "step": 1193 }, { "epoch": 0.13715467233358222, "grad_norm": 0.34185507893562317, "learning_rate": 0.0001, "loss": 1.7236, "step": 1194 }, { "epoch": 0.13726954224340934, "grad_norm": 0.3375924527645111, "learning_rate": 0.0001, "loss": 1.7451, "step": 1195 }, { "epoch": 0.13738441215323646, "grad_norm": 0.3625550866127014, "learning_rate": 0.0001, "loss": 1.8872, "step": 1196 }, { "epoch": 0.13749928206306358, "grad_norm": 0.36060526967048645, "learning_rate": 0.0001, "loss": 1.9059, "step": 1197 }, { "epoch": 0.1376141519728907, "grad_norm": 0.32978108525276184, "learning_rate": 0.0001, "loss": 1.5382, "step": 1198 }, { "epoch": 0.13772902188271782, "grad_norm": 0.3556724488735199, "learning_rate": 0.0001, "loss": 1.659, "step": 1199 }, { "epoch": 0.13784389179254494, "grad_norm": 0.36621591448783875, "learning_rate": 0.0001, "loss": 1.9125, "step": 1200 }, { "epoch": 0.13795876170237206, "grad_norm": 0.3547092080116272, "learning_rate": 0.0001, "loss": 1.7579, "step": 1201 }, { "epoch": 0.13807363161219918, "grad_norm": 0.3358149826526642, "learning_rate": 0.0001, "loss": 1.7776, "step": 1202 }, { "epoch": 0.1381885015220263, "grad_norm": 0.37259694933891296, "learning_rate": 0.0001, "loss": 1.7184, "step": 1203 }, { "epoch": 0.13830337143185342, "grad_norm": 0.35611268877983093, "learning_rate": 0.0001, "loss": 1.7256, "step": 1204 }, { "epoch": 0.13841824134168054, "grad_norm": 0.3654508590698242, "learning_rate": 0.0001, "loss": 1.8226, "step": 1205 }, { "epoch": 0.13853311125150766, "grad_norm": 0.3912622928619385, "learning_rate": 0.0001, "loss": 1.7948, "step": 1206 }, { "epoch": 0.13864798116133478, "grad_norm": 0.37169212102890015, "learning_rate": 0.0001, "loss": 1.7112, "step": 1207 }, { "epoch": 0.1387628510711619, "grad_norm": 0.34099262952804565, "learning_rate": 0.0001, "loss": 1.5217, "step": 1208 }, { "epoch": 0.13887772098098902, "grad_norm": 0.34905219078063965, "learning_rate": 0.0001, "loss": 1.734, "step": 1209 }, { "epoch": 0.13899259089081614, "grad_norm": 0.35248732566833496, "learning_rate": 0.0001, "loss": 1.6179, "step": 1210 }, { "epoch": 0.13910746080064326, "grad_norm": 0.343364417552948, "learning_rate": 0.0001, "loss": 1.8933, "step": 1211 }, { "epoch": 0.13922233071047038, "grad_norm": 0.3398562967777252, "learning_rate": 0.0001, "loss": 1.7221, "step": 1212 }, { "epoch": 0.1393372006202975, "grad_norm": 0.3378565013408661, "learning_rate": 0.0001, "loss": 1.7439, "step": 1213 }, { "epoch": 0.13945207053012462, "grad_norm": 0.36921605467796326, "learning_rate": 0.0001, "loss": 1.6726, "step": 1214 }, { "epoch": 0.13956694043995174, "grad_norm": 0.3712867200374603, "learning_rate": 0.0001, "loss": 1.8628, "step": 1215 }, { "epoch": 0.13968181034977886, "grad_norm": 0.3491092324256897, "learning_rate": 0.0001, "loss": 1.7379, "step": 1216 }, { "epoch": 0.139796680259606, "grad_norm": 0.3602731227874756, "learning_rate": 0.0001, "loss": 1.7088, "step": 1217 }, { "epoch": 0.13991155016943313, "grad_norm": 0.39251574873924255, "learning_rate": 0.0001, "loss": 1.8268, "step": 1218 }, { "epoch": 0.14002642007926025, "grad_norm": 0.3653351664543152, "learning_rate": 0.0001, "loss": 1.7597, "step": 1219 }, { "epoch": 0.14014128998908737, "grad_norm": 0.3393062353134155, "learning_rate": 0.0001, "loss": 1.6635, "step": 1220 }, { "epoch": 0.1402561598989145, "grad_norm": 0.3772340416908264, "learning_rate": 0.0001, "loss": 1.7796, "step": 1221 }, { "epoch": 0.1403710298087416, "grad_norm": 0.3478431701660156, "learning_rate": 0.0001, "loss": 1.6802, "step": 1222 }, { "epoch": 0.14048589971856873, "grad_norm": 0.33005356788635254, "learning_rate": 0.0001, "loss": 1.555, "step": 1223 }, { "epoch": 0.14060076962839585, "grad_norm": 0.348294734954834, "learning_rate": 0.0001, "loss": 1.6254, "step": 1224 }, { "epoch": 0.14071563953822297, "grad_norm": 0.35383546352386475, "learning_rate": 0.0001, "loss": 1.5475, "step": 1225 }, { "epoch": 0.1408305094480501, "grad_norm": 0.37990522384643555, "learning_rate": 0.0001, "loss": 1.8762, "step": 1226 }, { "epoch": 0.1409453793578772, "grad_norm": 0.3329927623271942, "learning_rate": 0.0001, "loss": 1.752, "step": 1227 }, { "epoch": 0.14106024926770433, "grad_norm": 0.3647679090499878, "learning_rate": 0.0001, "loss": 1.7886, "step": 1228 }, { "epoch": 0.14117511917753145, "grad_norm": 0.3833819627761841, "learning_rate": 0.0001, "loss": 1.8977, "step": 1229 }, { "epoch": 0.14128998908735857, "grad_norm": 0.3702079951763153, "learning_rate": 0.0001, "loss": 1.6843, "step": 1230 }, { "epoch": 0.1414048589971857, "grad_norm": 0.3493407368659973, "learning_rate": 0.0001, "loss": 1.7134, "step": 1231 }, { "epoch": 0.1415197289070128, "grad_norm": 0.3664112687110901, "learning_rate": 0.0001, "loss": 1.7641, "step": 1232 }, { "epoch": 0.14163459881683993, "grad_norm": 0.3498665988445282, "learning_rate": 0.0001, "loss": 1.7903, "step": 1233 }, { "epoch": 0.14174946872666705, "grad_norm": 0.37883898615837097, "learning_rate": 0.0001, "loss": 1.7601, "step": 1234 }, { "epoch": 0.14186433863649417, "grad_norm": 0.37011733651161194, "learning_rate": 0.0001, "loss": 1.8304, "step": 1235 }, { "epoch": 0.1419792085463213, "grad_norm": 0.3556150197982788, "learning_rate": 0.0001, "loss": 1.7222, "step": 1236 }, { "epoch": 0.1420940784561484, "grad_norm": 0.3517720103263855, "learning_rate": 0.0001, "loss": 1.5597, "step": 1237 }, { "epoch": 0.14220894836597553, "grad_norm": 0.3677484691143036, "learning_rate": 0.0001, "loss": 1.9557, "step": 1238 }, { "epoch": 0.14232381827580265, "grad_norm": 0.35142794251441956, "learning_rate": 0.0001, "loss": 1.7057, "step": 1239 }, { "epoch": 0.14243868818562977, "grad_norm": 0.33948856592178345, "learning_rate": 0.0001, "loss": 1.5986, "step": 1240 }, { "epoch": 0.1425535580954569, "grad_norm": 0.340010404586792, "learning_rate": 0.0001, "loss": 1.6787, "step": 1241 }, { "epoch": 0.142668428005284, "grad_norm": 0.34367939829826355, "learning_rate": 0.0001, "loss": 1.628, "step": 1242 }, { "epoch": 0.14278329791511113, "grad_norm": 0.38786405324935913, "learning_rate": 0.0001, "loss": 1.7947, "step": 1243 }, { "epoch": 0.14289816782493825, "grad_norm": 0.35119712352752686, "learning_rate": 0.0001, "loss": 1.6828, "step": 1244 }, { "epoch": 0.14301303773476537, "grad_norm": 0.3436744809150696, "learning_rate": 0.0001, "loss": 1.6213, "step": 1245 }, { "epoch": 0.1431279076445925, "grad_norm": 0.3338393270969391, "learning_rate": 0.0001, "loss": 1.536, "step": 1246 }, { "epoch": 0.1432427775544196, "grad_norm": 0.38156992197036743, "learning_rate": 0.0001, "loss": 1.7975, "step": 1247 }, { "epoch": 0.14335764746424673, "grad_norm": 0.34734535217285156, "learning_rate": 0.0001, "loss": 1.6203, "step": 1248 }, { "epoch": 0.14347251737407385, "grad_norm": 0.34041401743888855, "learning_rate": 0.0001, "loss": 1.7274, "step": 1249 }, { "epoch": 0.14358738728390097, "grad_norm": 0.37156790494918823, "learning_rate": 0.0001, "loss": 1.7642, "step": 1250 }, { "epoch": 0.14370225719372812, "grad_norm": 0.3499716818332672, "learning_rate": 0.0001, "loss": 1.7686, "step": 1251 }, { "epoch": 0.14381712710355524, "grad_norm": 0.32781967520713806, "learning_rate": 0.0001, "loss": 1.5751, "step": 1252 }, { "epoch": 0.14393199701338236, "grad_norm": 0.3511214852333069, "learning_rate": 0.0001, "loss": 1.6698, "step": 1253 }, { "epoch": 0.14404686692320948, "grad_norm": 0.3647572994232178, "learning_rate": 0.0001, "loss": 1.6883, "step": 1254 }, { "epoch": 0.1441617368330366, "grad_norm": 0.3573339879512787, "learning_rate": 0.0001, "loss": 1.6322, "step": 1255 }, { "epoch": 0.14427660674286372, "grad_norm": 0.34564316272735596, "learning_rate": 0.0001, "loss": 1.7221, "step": 1256 }, { "epoch": 0.14439147665269084, "grad_norm": 0.3462870717048645, "learning_rate": 0.0001, "loss": 1.7767, "step": 1257 }, { "epoch": 0.14450634656251796, "grad_norm": 0.3375271260738373, "learning_rate": 0.0001, "loss": 1.6136, "step": 1258 }, { "epoch": 0.14462121647234508, "grad_norm": 0.3306554853916168, "learning_rate": 0.0001, "loss": 1.6813, "step": 1259 }, { "epoch": 0.1447360863821722, "grad_norm": 0.3348354995250702, "learning_rate": 0.0001, "loss": 1.5897, "step": 1260 }, { "epoch": 0.14485095629199932, "grad_norm": 0.3619769811630249, "learning_rate": 0.0001, "loss": 1.6966, "step": 1261 }, { "epoch": 0.14496582620182644, "grad_norm": 0.3743123412132263, "learning_rate": 0.0001, "loss": 1.8824, "step": 1262 }, { "epoch": 0.14508069611165356, "grad_norm": 0.33532246947288513, "learning_rate": 0.0001, "loss": 1.6501, "step": 1263 }, { "epoch": 0.14519556602148068, "grad_norm": 0.37136563658714294, "learning_rate": 0.0001, "loss": 1.6995, "step": 1264 }, { "epoch": 0.1453104359313078, "grad_norm": 0.4397648274898529, "learning_rate": 0.0001, "loss": 2.1078, "step": 1265 }, { "epoch": 0.14542530584113492, "grad_norm": 0.3656454086303711, "learning_rate": 0.0001, "loss": 1.8501, "step": 1266 }, { "epoch": 0.14554017575096204, "grad_norm": 0.3662125766277313, "learning_rate": 0.0001, "loss": 1.8535, "step": 1267 }, { "epoch": 0.14565504566078916, "grad_norm": 0.4044576585292816, "learning_rate": 0.0001, "loss": 1.9077, "step": 1268 }, { "epoch": 0.14576991557061628, "grad_norm": 0.3433517515659332, "learning_rate": 0.0001, "loss": 1.4109, "step": 1269 }, { "epoch": 0.1458847854804434, "grad_norm": 0.3514901101589203, "learning_rate": 0.0001, "loss": 1.6432, "step": 1270 }, { "epoch": 0.14599965539027052, "grad_norm": 0.3364449143409729, "learning_rate": 0.0001, "loss": 1.6527, "step": 1271 }, { "epoch": 0.14611452530009764, "grad_norm": 0.3305410146713257, "learning_rate": 0.0001, "loss": 1.6434, "step": 1272 }, { "epoch": 0.14622939520992476, "grad_norm": 0.35502052307128906, "learning_rate": 0.0001, "loss": 1.6401, "step": 1273 }, { "epoch": 0.14634426511975188, "grad_norm": 0.3556758165359497, "learning_rate": 0.0001, "loss": 1.6573, "step": 1274 }, { "epoch": 0.146459135029579, "grad_norm": 0.3576909899711609, "learning_rate": 0.0001, "loss": 1.6997, "step": 1275 }, { "epoch": 0.14657400493940612, "grad_norm": 0.34596705436706543, "learning_rate": 0.0001, "loss": 1.7428, "step": 1276 }, { "epoch": 0.14668887484923324, "grad_norm": 0.38846471905708313, "learning_rate": 0.0001, "loss": 1.7209, "step": 1277 }, { "epoch": 0.14680374475906036, "grad_norm": 0.37087368965148926, "learning_rate": 0.0001, "loss": 1.443, "step": 1278 }, { "epoch": 0.14691861466888748, "grad_norm": 0.3498583137989044, "learning_rate": 0.0001, "loss": 1.6488, "step": 1279 }, { "epoch": 0.1470334845787146, "grad_norm": 0.34810611605644226, "learning_rate": 0.0001, "loss": 1.6464, "step": 1280 }, { "epoch": 0.14714835448854172, "grad_norm": 0.3536350727081299, "learning_rate": 0.0001, "loss": 1.8913, "step": 1281 }, { "epoch": 0.14726322439836884, "grad_norm": 0.3864216208457947, "learning_rate": 0.0001, "loss": 1.9487, "step": 1282 }, { "epoch": 0.14737809430819596, "grad_norm": 0.36202993988990784, "learning_rate": 0.0001, "loss": 1.7515, "step": 1283 }, { "epoch": 0.14749296421802308, "grad_norm": 0.3679467439651489, "learning_rate": 0.0001, "loss": 1.8294, "step": 1284 }, { "epoch": 0.1476078341278502, "grad_norm": 0.3474784195423126, "learning_rate": 0.0001, "loss": 1.5674, "step": 1285 }, { "epoch": 0.14772270403767734, "grad_norm": 0.3244973123073578, "learning_rate": 0.0001, "loss": 1.604, "step": 1286 }, { "epoch": 0.14783757394750446, "grad_norm": 0.36702069640159607, "learning_rate": 0.0001, "loss": 1.6718, "step": 1287 }, { "epoch": 0.14795244385733158, "grad_norm": 0.3686244487762451, "learning_rate": 0.0001, "loss": 1.9021, "step": 1288 }, { "epoch": 0.1480673137671587, "grad_norm": 0.36901167035102844, "learning_rate": 0.0001, "loss": 1.8618, "step": 1289 }, { "epoch": 0.14818218367698582, "grad_norm": 0.3515526354312897, "learning_rate": 0.0001, "loss": 1.6961, "step": 1290 }, { "epoch": 0.14829705358681294, "grad_norm": 0.36879947781562805, "learning_rate": 0.0001, "loss": 1.7333, "step": 1291 }, { "epoch": 0.14841192349664006, "grad_norm": 0.3325467109680176, "learning_rate": 0.0001, "loss": 1.5058, "step": 1292 }, { "epoch": 0.14852679340646718, "grad_norm": 0.3682822287082672, "learning_rate": 0.0001, "loss": 1.8649, "step": 1293 }, { "epoch": 0.1486416633162943, "grad_norm": 0.3390534222126007, "learning_rate": 0.0001, "loss": 1.7462, "step": 1294 }, { "epoch": 0.14875653322612142, "grad_norm": 0.3411741554737091, "learning_rate": 0.0001, "loss": 1.733, "step": 1295 }, { "epoch": 0.14887140313594854, "grad_norm": 0.33508262038230896, "learning_rate": 0.0001, "loss": 1.7306, "step": 1296 }, { "epoch": 0.14898627304577566, "grad_norm": 0.3365498483181, "learning_rate": 0.0001, "loss": 1.6963, "step": 1297 }, { "epoch": 0.14910114295560278, "grad_norm": 0.3673759400844574, "learning_rate": 0.0001, "loss": 1.3957, "step": 1298 }, { "epoch": 0.1492160128654299, "grad_norm": 0.32368943095207214, "learning_rate": 0.0001, "loss": 1.4204, "step": 1299 }, { "epoch": 0.14933088277525702, "grad_norm": 0.37173643708229065, "learning_rate": 0.0001, "loss": 1.7783, "step": 1300 }, { "epoch": 0.14944575268508414, "grad_norm": 0.3451181650161743, "learning_rate": 0.0001, "loss": 1.6048, "step": 1301 }, { "epoch": 0.14956062259491126, "grad_norm": 0.3521316945552826, "learning_rate": 0.0001, "loss": 1.7907, "step": 1302 }, { "epoch": 0.14967549250473838, "grad_norm": 0.3625703454017639, "learning_rate": 0.0001, "loss": 1.9229, "step": 1303 }, { "epoch": 0.1497903624145655, "grad_norm": 0.32430633902549744, "learning_rate": 0.0001, "loss": 1.5798, "step": 1304 }, { "epoch": 0.14990523232439262, "grad_norm": 0.3538731038570404, "learning_rate": 0.0001, "loss": 1.6985, "step": 1305 }, { "epoch": 0.15002010223421974, "grad_norm": 0.362496018409729, "learning_rate": 0.0001, "loss": 1.6863, "step": 1306 }, { "epoch": 0.15013497214404686, "grad_norm": 0.33480730652809143, "learning_rate": 0.0001, "loss": 1.6784, "step": 1307 }, { "epoch": 0.15024984205387398, "grad_norm": 0.3331620991230011, "learning_rate": 0.0001, "loss": 1.635, "step": 1308 }, { "epoch": 0.1503647119637011, "grad_norm": 0.39281463623046875, "learning_rate": 0.0001, "loss": 1.791, "step": 1309 }, { "epoch": 0.15047958187352822, "grad_norm": 0.3314681351184845, "learning_rate": 0.0001, "loss": 1.6435, "step": 1310 }, { "epoch": 0.15059445178335534, "grad_norm": 0.349573016166687, "learning_rate": 0.0001, "loss": 1.6755, "step": 1311 }, { "epoch": 0.15070932169318246, "grad_norm": 0.35691556334495544, "learning_rate": 0.0001, "loss": 1.7388, "step": 1312 }, { "epoch": 0.15082419160300958, "grad_norm": 0.3481789827346802, "learning_rate": 0.0001, "loss": 1.6805, "step": 1313 }, { "epoch": 0.1509390615128367, "grad_norm": 0.36650916934013367, "learning_rate": 0.0001, "loss": 1.8396, "step": 1314 }, { "epoch": 0.15105393142266382, "grad_norm": 0.3846674859523773, "learning_rate": 0.0001, "loss": 1.9883, "step": 1315 }, { "epoch": 0.15116880133249094, "grad_norm": 0.3474387228488922, "learning_rate": 0.0001, "loss": 1.4629, "step": 1316 }, { "epoch": 0.15128367124231806, "grad_norm": 0.37427589297294617, "learning_rate": 0.0001, "loss": 1.7977, "step": 1317 }, { "epoch": 0.15139854115214518, "grad_norm": 0.38520902395248413, "learning_rate": 0.0001, "loss": 1.8338, "step": 1318 }, { "epoch": 0.1515134110619723, "grad_norm": 0.3556143641471863, "learning_rate": 0.0001, "loss": 1.5932, "step": 1319 }, { "epoch": 0.15162828097179945, "grad_norm": 0.36979398131370544, "learning_rate": 0.0001, "loss": 1.7625, "step": 1320 }, { "epoch": 0.15174315088162657, "grad_norm": 0.35435059666633606, "learning_rate": 0.0001, "loss": 1.6956, "step": 1321 }, { "epoch": 0.1518580207914537, "grad_norm": 0.35871556401252747, "learning_rate": 0.0001, "loss": 1.8573, "step": 1322 }, { "epoch": 0.1519728907012808, "grad_norm": 0.3628619611263275, "learning_rate": 0.0001, "loss": 1.8769, "step": 1323 }, { "epoch": 0.15208776061110793, "grad_norm": 0.3393780291080475, "learning_rate": 0.0001, "loss": 1.5716, "step": 1324 }, { "epoch": 0.15220263052093505, "grad_norm": 0.3389873802661896, "learning_rate": 0.0001, "loss": 1.6257, "step": 1325 }, { "epoch": 0.15231750043076217, "grad_norm": 0.3557165563106537, "learning_rate": 0.0001, "loss": 1.8152, "step": 1326 }, { "epoch": 0.1524323703405893, "grad_norm": 0.36443039774894714, "learning_rate": 0.0001, "loss": 1.7758, "step": 1327 }, { "epoch": 0.1525472402504164, "grad_norm": 0.3473578989505768, "learning_rate": 0.0001, "loss": 1.6577, "step": 1328 }, { "epoch": 0.15266211016024353, "grad_norm": 0.36054491996765137, "learning_rate": 0.0001, "loss": 1.7633, "step": 1329 }, { "epoch": 0.15277698007007065, "grad_norm": 0.3966304659843445, "learning_rate": 0.0001, "loss": 1.8216, "step": 1330 }, { "epoch": 0.15289184997989777, "grad_norm": 0.3621263802051544, "learning_rate": 0.0001, "loss": 1.8102, "step": 1331 }, { "epoch": 0.1530067198897249, "grad_norm": 0.346164733171463, "learning_rate": 0.0001, "loss": 1.8498, "step": 1332 }, { "epoch": 0.153121589799552, "grad_norm": 0.3580979108810425, "learning_rate": 0.0001, "loss": 1.4788, "step": 1333 }, { "epoch": 0.15323645970937913, "grad_norm": 0.4236307740211487, "learning_rate": 0.0001, "loss": 2.0173, "step": 1334 }, { "epoch": 0.15335132961920625, "grad_norm": 0.36115512251853943, "learning_rate": 0.0001, "loss": 1.7246, "step": 1335 }, { "epoch": 0.15346619952903337, "grad_norm": 0.35453563928604126, "learning_rate": 0.0001, "loss": 1.7424, "step": 1336 }, { "epoch": 0.1535810694388605, "grad_norm": 0.37019240856170654, "learning_rate": 0.0001, "loss": 1.7979, "step": 1337 }, { "epoch": 0.1536959393486876, "grad_norm": 0.34816619753837585, "learning_rate": 0.0001, "loss": 1.8167, "step": 1338 }, { "epoch": 0.15381080925851473, "grad_norm": 0.3500208556652069, "learning_rate": 0.0001, "loss": 1.79, "step": 1339 }, { "epoch": 0.15392567916834185, "grad_norm": 0.34298229217529297, "learning_rate": 0.0001, "loss": 1.6299, "step": 1340 }, { "epoch": 0.15404054907816897, "grad_norm": 0.34139198064804077, "learning_rate": 0.0001, "loss": 1.694, "step": 1341 }, { "epoch": 0.1541554189879961, "grad_norm": 0.3685595989227295, "learning_rate": 0.0001, "loss": 1.748, "step": 1342 }, { "epoch": 0.1542702888978232, "grad_norm": 0.33886897563934326, "learning_rate": 0.0001, "loss": 1.7177, "step": 1343 }, { "epoch": 0.15438515880765033, "grad_norm": 0.3549462854862213, "learning_rate": 0.0001, "loss": 1.7208, "step": 1344 }, { "epoch": 0.15450002871747745, "grad_norm": 0.40802568197250366, "learning_rate": 0.0001, "loss": 1.6444, "step": 1345 }, { "epoch": 0.15461489862730457, "grad_norm": 0.37750929594039917, "learning_rate": 0.0001, "loss": 1.5996, "step": 1346 }, { "epoch": 0.1547297685371317, "grad_norm": 0.3724265694618225, "learning_rate": 0.0001, "loss": 1.8619, "step": 1347 }, { "epoch": 0.1548446384469588, "grad_norm": 0.37521660327911377, "learning_rate": 0.0001, "loss": 1.7973, "step": 1348 }, { "epoch": 0.15495950835678593, "grad_norm": 0.35817015171051025, "learning_rate": 0.0001, "loss": 1.8391, "step": 1349 }, { "epoch": 0.15507437826661305, "grad_norm": 0.35718318819999695, "learning_rate": 0.0001, "loss": 1.5746, "step": 1350 }, { "epoch": 0.15518924817644017, "grad_norm": 0.3497890830039978, "learning_rate": 0.0001, "loss": 1.7013, "step": 1351 }, { "epoch": 0.1553041180862673, "grad_norm": 0.3606964349746704, "learning_rate": 0.0001, "loss": 1.6408, "step": 1352 }, { "epoch": 0.1554189879960944, "grad_norm": 0.367531955242157, "learning_rate": 0.0001, "loss": 2.0184, "step": 1353 }, { "epoch": 0.15553385790592156, "grad_norm": 0.364165723323822, "learning_rate": 0.0001, "loss": 1.7831, "step": 1354 }, { "epoch": 0.15564872781574868, "grad_norm": 0.3798482418060303, "learning_rate": 0.0001, "loss": 1.9703, "step": 1355 }, { "epoch": 0.1557635977255758, "grad_norm": 0.38180306553840637, "learning_rate": 0.0001, "loss": 1.9124, "step": 1356 }, { "epoch": 0.15587846763540292, "grad_norm": 0.34865236282348633, "learning_rate": 0.0001, "loss": 1.6616, "step": 1357 }, { "epoch": 0.15599333754523004, "grad_norm": 0.3822656571865082, "learning_rate": 0.0001, "loss": 1.8847, "step": 1358 }, { "epoch": 0.15610820745505716, "grad_norm": 0.37447991967201233, "learning_rate": 0.0001, "loss": 1.7326, "step": 1359 }, { "epoch": 0.15622307736488428, "grad_norm": 0.39164337515830994, "learning_rate": 0.0001, "loss": 1.9483, "step": 1360 }, { "epoch": 0.1563379472747114, "grad_norm": 0.37871086597442627, "learning_rate": 0.0001, "loss": 1.6851, "step": 1361 }, { "epoch": 0.15645281718453852, "grad_norm": 0.4217022657394409, "learning_rate": 0.0001, "loss": 1.7808, "step": 1362 }, { "epoch": 0.15656768709436564, "grad_norm": 0.35998785495758057, "learning_rate": 0.0001, "loss": 1.644, "step": 1363 }, { "epoch": 0.15668255700419276, "grad_norm": 0.36455628275871277, "learning_rate": 0.0001, "loss": 1.5796, "step": 1364 }, { "epoch": 0.15679742691401988, "grad_norm": 0.33390358090400696, "learning_rate": 0.0001, "loss": 1.5069, "step": 1365 }, { "epoch": 0.156912296823847, "grad_norm": 0.35853371024131775, "learning_rate": 0.0001, "loss": 1.8491, "step": 1366 }, { "epoch": 0.15702716673367412, "grad_norm": 0.39624473452568054, "learning_rate": 0.0001, "loss": 1.8059, "step": 1367 }, { "epoch": 0.15714203664350124, "grad_norm": 0.341155081987381, "learning_rate": 0.0001, "loss": 1.6845, "step": 1368 }, { "epoch": 0.15725690655332836, "grad_norm": 0.3553493320941925, "learning_rate": 0.0001, "loss": 1.7542, "step": 1369 }, { "epoch": 0.15737177646315548, "grad_norm": 0.3464072644710541, "learning_rate": 0.0001, "loss": 1.6961, "step": 1370 }, { "epoch": 0.1574866463729826, "grad_norm": 0.32570725679397583, "learning_rate": 0.0001, "loss": 1.4859, "step": 1371 }, { "epoch": 0.15760151628280972, "grad_norm": 0.3374817371368408, "learning_rate": 0.0001, "loss": 1.6443, "step": 1372 }, { "epoch": 0.15771638619263684, "grad_norm": 0.3570788502693176, "learning_rate": 0.0001, "loss": 1.7114, "step": 1373 }, { "epoch": 0.15783125610246396, "grad_norm": 0.3562948703765869, "learning_rate": 0.0001, "loss": 1.6656, "step": 1374 }, { "epoch": 0.15794612601229108, "grad_norm": 0.3416786789894104, "learning_rate": 0.0001, "loss": 1.5716, "step": 1375 }, { "epoch": 0.1580609959221182, "grad_norm": 0.36627301573753357, "learning_rate": 0.0001, "loss": 1.8055, "step": 1376 }, { "epoch": 0.15817586583194532, "grad_norm": 0.38520297408103943, "learning_rate": 0.0001, "loss": 1.7763, "step": 1377 }, { "epoch": 0.15829073574177244, "grad_norm": 0.35688209533691406, "learning_rate": 0.0001, "loss": 1.7664, "step": 1378 }, { "epoch": 0.15840560565159956, "grad_norm": 0.3223513960838318, "learning_rate": 0.0001, "loss": 1.6262, "step": 1379 }, { "epoch": 0.15852047556142668, "grad_norm": 0.3319501578807831, "learning_rate": 0.0001, "loss": 1.6991, "step": 1380 }, { "epoch": 0.1586353454712538, "grad_norm": 0.3719445765018463, "learning_rate": 0.0001, "loss": 1.7745, "step": 1381 }, { "epoch": 0.15875021538108092, "grad_norm": 0.41066795587539673, "learning_rate": 0.0001, "loss": 1.7897, "step": 1382 }, { "epoch": 0.15886508529090804, "grad_norm": 0.38358965516090393, "learning_rate": 0.0001, "loss": 1.9239, "step": 1383 }, { "epoch": 0.15897995520073516, "grad_norm": 0.35356229543685913, "learning_rate": 0.0001, "loss": 1.7281, "step": 1384 }, { "epoch": 0.15909482511056228, "grad_norm": 0.36122894287109375, "learning_rate": 0.0001, "loss": 1.7863, "step": 1385 }, { "epoch": 0.1592096950203894, "grad_norm": 0.35605597496032715, "learning_rate": 0.0001, "loss": 1.7772, "step": 1386 }, { "epoch": 0.15932456493021652, "grad_norm": 0.3338839113712311, "learning_rate": 0.0001, "loss": 1.4497, "step": 1387 }, { "epoch": 0.15943943484004364, "grad_norm": 0.34843042492866516, "learning_rate": 0.0001, "loss": 1.8037, "step": 1388 }, { "epoch": 0.15955430474987078, "grad_norm": 0.3467456102371216, "learning_rate": 0.0001, "loss": 1.6538, "step": 1389 }, { "epoch": 0.1596691746596979, "grad_norm": 0.37024548649787903, "learning_rate": 0.0001, "loss": 1.888, "step": 1390 }, { "epoch": 0.15978404456952502, "grad_norm": 0.3692200481891632, "learning_rate": 0.0001, "loss": 1.7917, "step": 1391 }, { "epoch": 0.15989891447935214, "grad_norm": 0.32846784591674805, "learning_rate": 0.0001, "loss": 1.5709, "step": 1392 }, { "epoch": 0.16001378438917926, "grad_norm": 0.3437194228172302, "learning_rate": 0.0001, "loss": 1.7055, "step": 1393 }, { "epoch": 0.16012865429900638, "grad_norm": 0.346202552318573, "learning_rate": 0.0001, "loss": 1.7074, "step": 1394 }, { "epoch": 0.1602435242088335, "grad_norm": 0.3795225918292999, "learning_rate": 0.0001, "loss": 1.9109, "step": 1395 }, { "epoch": 0.16035839411866062, "grad_norm": 0.33237701654434204, "learning_rate": 0.0001, "loss": 1.4413, "step": 1396 }, { "epoch": 0.16047326402848774, "grad_norm": 0.38135427236557007, "learning_rate": 0.0001, "loss": 1.767, "step": 1397 }, { "epoch": 0.16058813393831486, "grad_norm": 0.37453657388687134, "learning_rate": 0.0001, "loss": 1.7438, "step": 1398 }, { "epoch": 0.16070300384814198, "grad_norm": 0.3387562334537506, "learning_rate": 0.0001, "loss": 1.6319, "step": 1399 }, { "epoch": 0.1608178737579691, "grad_norm": 0.33894360065460205, "learning_rate": 0.0001, "loss": 1.5218, "step": 1400 }, { "epoch": 0.16093274366779622, "grad_norm": 0.3334555923938751, "learning_rate": 0.0001, "loss": 1.5768, "step": 1401 }, { "epoch": 0.16104761357762334, "grad_norm": 0.36844760179519653, "learning_rate": 0.0001, "loss": 1.7755, "step": 1402 }, { "epoch": 0.16116248348745046, "grad_norm": 0.34594935178756714, "learning_rate": 0.0001, "loss": 1.8161, "step": 1403 }, { "epoch": 0.16127735339727758, "grad_norm": 0.37070515751838684, "learning_rate": 0.0001, "loss": 1.6415, "step": 1404 }, { "epoch": 0.1613922233071047, "grad_norm": 0.3500889241695404, "learning_rate": 0.0001, "loss": 1.6533, "step": 1405 }, { "epoch": 0.16150709321693182, "grad_norm": 0.3509732186794281, "learning_rate": 0.0001, "loss": 1.7667, "step": 1406 }, { "epoch": 0.16162196312675894, "grad_norm": 0.33721843361854553, "learning_rate": 0.0001, "loss": 1.6363, "step": 1407 }, { "epoch": 0.16173683303658606, "grad_norm": 0.3420223593711853, "learning_rate": 0.0001, "loss": 1.3828, "step": 1408 }, { "epoch": 0.16185170294641318, "grad_norm": 0.3664703965187073, "learning_rate": 0.0001, "loss": 1.8106, "step": 1409 }, { "epoch": 0.1619665728562403, "grad_norm": 0.4004788398742676, "learning_rate": 0.0001, "loss": 1.8107, "step": 1410 }, { "epoch": 0.16208144276606742, "grad_norm": 0.3599262237548828, "learning_rate": 0.0001, "loss": 1.7453, "step": 1411 }, { "epoch": 0.16219631267589454, "grad_norm": 0.37064090371131897, "learning_rate": 0.0001, "loss": 1.7842, "step": 1412 }, { "epoch": 0.16231118258572166, "grad_norm": 0.3801650404930115, "learning_rate": 0.0001, "loss": 1.6275, "step": 1413 }, { "epoch": 0.16242605249554878, "grad_norm": 0.3450910449028015, "learning_rate": 0.0001, "loss": 1.6267, "step": 1414 }, { "epoch": 0.1625409224053759, "grad_norm": 0.35267990827560425, "learning_rate": 0.0001, "loss": 1.5651, "step": 1415 }, { "epoch": 0.16265579231520302, "grad_norm": 0.3844035267829895, "learning_rate": 0.0001, "loss": 1.9421, "step": 1416 }, { "epoch": 0.16277066222503014, "grad_norm": 0.35612425208091736, "learning_rate": 0.0001, "loss": 1.6319, "step": 1417 }, { "epoch": 0.16288553213485726, "grad_norm": 0.3794462978839874, "learning_rate": 0.0001, "loss": 1.8599, "step": 1418 }, { "epoch": 0.16300040204468438, "grad_norm": 0.33937835693359375, "learning_rate": 0.0001, "loss": 1.6891, "step": 1419 }, { "epoch": 0.1631152719545115, "grad_norm": 0.3379872441291809, "learning_rate": 0.0001, "loss": 1.708, "step": 1420 }, { "epoch": 0.16323014186433862, "grad_norm": 0.35873672366142273, "learning_rate": 0.0001, "loss": 1.7832, "step": 1421 }, { "epoch": 0.16334501177416574, "grad_norm": 0.37622302770614624, "learning_rate": 0.0001, "loss": 1.716, "step": 1422 }, { "epoch": 0.1634598816839929, "grad_norm": 0.34433531761169434, "learning_rate": 0.0001, "loss": 1.6682, "step": 1423 }, { "epoch": 0.16357475159382, "grad_norm": 0.35809025168418884, "learning_rate": 0.0001, "loss": 1.6115, "step": 1424 }, { "epoch": 0.16368962150364713, "grad_norm": 0.35675248503685, "learning_rate": 0.0001, "loss": 1.6158, "step": 1425 }, { "epoch": 0.16380449141347425, "grad_norm": 0.356037437915802, "learning_rate": 0.0001, "loss": 1.5049, "step": 1426 }, { "epoch": 0.16391936132330137, "grad_norm": 0.3485028147697449, "learning_rate": 0.0001, "loss": 1.7554, "step": 1427 }, { "epoch": 0.1640342312331285, "grad_norm": 0.36230984330177307, "learning_rate": 0.0001, "loss": 1.6059, "step": 1428 }, { "epoch": 0.1641491011429556, "grad_norm": 0.35187479853630066, "learning_rate": 0.0001, "loss": 1.7184, "step": 1429 }, { "epoch": 0.16426397105278273, "grad_norm": 0.34478455781936646, "learning_rate": 0.0001, "loss": 1.7086, "step": 1430 }, { "epoch": 0.16437884096260985, "grad_norm": 0.4025746285915375, "learning_rate": 0.0001, "loss": 1.8836, "step": 1431 }, { "epoch": 0.16449371087243697, "grad_norm": 0.358385294675827, "learning_rate": 0.0001, "loss": 1.8405, "step": 1432 }, { "epoch": 0.1646085807822641, "grad_norm": 0.3677537143230438, "learning_rate": 0.0001, "loss": 1.6616, "step": 1433 }, { "epoch": 0.1647234506920912, "grad_norm": 0.3488091826438904, "learning_rate": 0.0001, "loss": 1.6911, "step": 1434 }, { "epoch": 0.16483832060191833, "grad_norm": 0.3559654951095581, "learning_rate": 0.0001, "loss": 1.6136, "step": 1435 }, { "epoch": 0.16495319051174545, "grad_norm": 0.4024467170238495, "learning_rate": 0.0001, "loss": 1.8107, "step": 1436 }, { "epoch": 0.16506806042157257, "grad_norm": 0.3618294298648834, "learning_rate": 0.0001, "loss": 1.8111, "step": 1437 }, { "epoch": 0.1651829303313997, "grad_norm": 0.3504227101802826, "learning_rate": 0.0001, "loss": 1.639, "step": 1438 }, { "epoch": 0.1652978002412268, "grad_norm": 0.36997562646865845, "learning_rate": 0.0001, "loss": 1.8394, "step": 1439 }, { "epoch": 0.16541267015105393, "grad_norm": 0.3473089635372162, "learning_rate": 0.0001, "loss": 1.7694, "step": 1440 }, { "epoch": 0.16552754006088105, "grad_norm": 0.3754732012748718, "learning_rate": 0.0001, "loss": 1.6047, "step": 1441 }, { "epoch": 0.16564240997070817, "grad_norm": 0.318775475025177, "learning_rate": 0.0001, "loss": 1.4992, "step": 1442 }, { "epoch": 0.1657572798805353, "grad_norm": 0.3740909695625305, "learning_rate": 0.0001, "loss": 1.7752, "step": 1443 }, { "epoch": 0.1658721497903624, "grad_norm": 0.34481796622276306, "learning_rate": 0.0001, "loss": 1.7427, "step": 1444 }, { "epoch": 0.16598701970018953, "grad_norm": 0.3509489595890045, "learning_rate": 0.0001, "loss": 1.8072, "step": 1445 }, { "epoch": 0.16610188961001665, "grad_norm": 0.364310085773468, "learning_rate": 0.0001, "loss": 1.7988, "step": 1446 }, { "epoch": 0.16621675951984377, "grad_norm": 0.36774981021881104, "learning_rate": 0.0001, "loss": 1.8602, "step": 1447 }, { "epoch": 0.1663316294296709, "grad_norm": 0.34403902292251587, "learning_rate": 0.0001, "loss": 1.7574, "step": 1448 }, { "epoch": 0.166446499339498, "grad_norm": 0.3631366193294525, "learning_rate": 0.0001, "loss": 1.7609, "step": 1449 }, { "epoch": 0.16656136924932513, "grad_norm": 0.4153347313404083, "learning_rate": 0.0001, "loss": 1.8153, "step": 1450 }, { "epoch": 0.16667623915915225, "grad_norm": 0.3547952175140381, "learning_rate": 0.0001, "loss": 1.5481, "step": 1451 }, { "epoch": 0.16679110906897937, "grad_norm": 0.3516184091567993, "learning_rate": 0.0001, "loss": 1.6451, "step": 1452 }, { "epoch": 0.1669059789788065, "grad_norm": 0.35548651218414307, "learning_rate": 0.0001, "loss": 1.7025, "step": 1453 }, { "epoch": 0.1670208488886336, "grad_norm": 0.3711240291595459, "learning_rate": 0.0001, "loss": 1.6624, "step": 1454 }, { "epoch": 0.16713571879846073, "grad_norm": 0.3621267080307007, "learning_rate": 0.0001, "loss": 1.587, "step": 1455 }, { "epoch": 0.16725058870828785, "grad_norm": 0.36165568232536316, "learning_rate": 0.0001, "loss": 1.7507, "step": 1456 }, { "epoch": 0.167365458618115, "grad_norm": 0.3373228907585144, "learning_rate": 0.0001, "loss": 1.6279, "step": 1457 }, { "epoch": 0.16748032852794212, "grad_norm": 0.3716026842594147, "learning_rate": 0.0001, "loss": 1.7762, "step": 1458 }, { "epoch": 0.16759519843776924, "grad_norm": 0.3812613785266876, "learning_rate": 0.0001, "loss": 1.6665, "step": 1459 }, { "epoch": 0.16771006834759636, "grad_norm": 0.3630736470222473, "learning_rate": 0.0001, "loss": 1.7054, "step": 1460 }, { "epoch": 0.16782493825742348, "grad_norm": 0.3346702754497528, "learning_rate": 0.0001, "loss": 1.6076, "step": 1461 }, { "epoch": 0.1679398081672506, "grad_norm": 0.3383999764919281, "learning_rate": 0.0001, "loss": 1.6432, "step": 1462 }, { "epoch": 0.16805467807707772, "grad_norm": 0.3548593521118164, "learning_rate": 0.0001, "loss": 1.6434, "step": 1463 }, { "epoch": 0.16816954798690484, "grad_norm": 0.3538789451122284, "learning_rate": 0.0001, "loss": 1.657, "step": 1464 }, { "epoch": 0.16828441789673196, "grad_norm": 0.3505842387676239, "learning_rate": 0.0001, "loss": 1.7951, "step": 1465 }, { "epoch": 0.16839928780655908, "grad_norm": 0.3387717604637146, "learning_rate": 0.0001, "loss": 1.6237, "step": 1466 }, { "epoch": 0.1685141577163862, "grad_norm": 0.3535427153110504, "learning_rate": 0.0001, "loss": 1.7087, "step": 1467 }, { "epoch": 0.16862902762621332, "grad_norm": 0.35021501779556274, "learning_rate": 0.0001, "loss": 1.6706, "step": 1468 }, { "epoch": 0.16874389753604044, "grad_norm": 0.36078891158103943, "learning_rate": 0.0001, "loss": 1.7588, "step": 1469 }, { "epoch": 0.16885876744586756, "grad_norm": 0.3418395221233368, "learning_rate": 0.0001, "loss": 1.5688, "step": 1470 }, { "epoch": 0.16897363735569468, "grad_norm": 0.37558332085609436, "learning_rate": 0.0001, "loss": 1.6314, "step": 1471 }, { "epoch": 0.1690885072655218, "grad_norm": 0.3494422137737274, "learning_rate": 0.0001, "loss": 1.7267, "step": 1472 }, { "epoch": 0.16920337717534892, "grad_norm": 0.35918739438056946, "learning_rate": 0.0001, "loss": 1.6485, "step": 1473 }, { "epoch": 0.16931824708517604, "grad_norm": 0.36204949021339417, "learning_rate": 0.0001, "loss": 1.8109, "step": 1474 }, { "epoch": 0.16943311699500316, "grad_norm": 0.35251131653785706, "learning_rate": 0.0001, "loss": 1.6827, "step": 1475 }, { "epoch": 0.16954798690483028, "grad_norm": 0.35120296478271484, "learning_rate": 0.0001, "loss": 1.6935, "step": 1476 }, { "epoch": 0.1696628568146574, "grad_norm": 0.34975898265838623, "learning_rate": 0.0001, "loss": 1.7992, "step": 1477 }, { "epoch": 0.16977772672448452, "grad_norm": 0.33744266629219055, "learning_rate": 0.0001, "loss": 1.6606, "step": 1478 }, { "epoch": 0.16989259663431164, "grad_norm": 0.34000107645988464, "learning_rate": 0.0001, "loss": 1.6484, "step": 1479 }, { "epoch": 0.17000746654413876, "grad_norm": 0.36184847354888916, "learning_rate": 0.0001, "loss": 1.7398, "step": 1480 }, { "epoch": 0.17012233645396588, "grad_norm": 0.3685035705566406, "learning_rate": 0.0001, "loss": 1.8032, "step": 1481 }, { "epoch": 0.170237206363793, "grad_norm": 0.38592687249183655, "learning_rate": 0.0001, "loss": 1.7185, "step": 1482 }, { "epoch": 0.17035207627362012, "grad_norm": 0.3724033832550049, "learning_rate": 0.0001, "loss": 1.7696, "step": 1483 }, { "epoch": 0.17046694618344724, "grad_norm": 0.3662974238395691, "learning_rate": 0.0001, "loss": 1.727, "step": 1484 }, { "epoch": 0.17058181609327436, "grad_norm": 0.38893744349479675, "learning_rate": 0.0001, "loss": 1.746, "step": 1485 }, { "epoch": 0.17069668600310148, "grad_norm": 0.368671178817749, "learning_rate": 0.0001, "loss": 1.7863, "step": 1486 }, { "epoch": 0.1708115559129286, "grad_norm": 0.3590819835662842, "learning_rate": 0.0001, "loss": 1.8859, "step": 1487 }, { "epoch": 0.17092642582275572, "grad_norm": 0.36079901456832886, "learning_rate": 0.0001, "loss": 1.7423, "step": 1488 }, { "epoch": 0.17104129573258284, "grad_norm": 0.355546772480011, "learning_rate": 0.0001, "loss": 1.7369, "step": 1489 }, { "epoch": 0.17115616564240996, "grad_norm": 0.3821921944618225, "learning_rate": 0.0001, "loss": 1.9179, "step": 1490 }, { "epoch": 0.1712710355522371, "grad_norm": 0.3505462110042572, "learning_rate": 0.0001, "loss": 1.871, "step": 1491 }, { "epoch": 0.17138590546206423, "grad_norm": 0.3656969368457794, "learning_rate": 0.0001, "loss": 1.6599, "step": 1492 }, { "epoch": 0.17150077537189135, "grad_norm": 0.3786547780036926, "learning_rate": 0.0001, "loss": 1.6202, "step": 1493 }, { "epoch": 0.17161564528171847, "grad_norm": 0.37065404653549194, "learning_rate": 0.0001, "loss": 1.6333, "step": 1494 }, { "epoch": 0.17173051519154559, "grad_norm": 0.3699958622455597, "learning_rate": 0.0001, "loss": 1.7745, "step": 1495 }, { "epoch": 0.1718453851013727, "grad_norm": 0.3573478162288666, "learning_rate": 0.0001, "loss": 1.6497, "step": 1496 }, { "epoch": 0.17196025501119983, "grad_norm": 0.3474213778972626, "learning_rate": 0.0001, "loss": 1.5043, "step": 1497 }, { "epoch": 0.17207512492102695, "grad_norm": 0.3627040684223175, "learning_rate": 0.0001, "loss": 1.7553, "step": 1498 }, { "epoch": 0.17218999483085407, "grad_norm": 0.34735116362571716, "learning_rate": 0.0001, "loss": 1.6331, "step": 1499 }, { "epoch": 0.17230486474068119, "grad_norm": 0.4130633771419525, "learning_rate": 0.0001, "loss": 1.8155, "step": 1500 }, { "epoch": 0.1724197346505083, "grad_norm": 0.38091927766799927, "learning_rate": 0.0001, "loss": 1.7231, "step": 1501 }, { "epoch": 0.17253460456033543, "grad_norm": 0.39104804396629333, "learning_rate": 0.0001, "loss": 1.756, "step": 1502 }, { "epoch": 0.17264947447016255, "grad_norm": 0.39437583088874817, "learning_rate": 0.0001, "loss": 1.7184, "step": 1503 }, { "epoch": 0.17276434437998966, "grad_norm": 0.37000584602355957, "learning_rate": 0.0001, "loss": 1.6456, "step": 1504 }, { "epoch": 0.17287921428981678, "grad_norm": 0.37976545095443726, "learning_rate": 0.0001, "loss": 1.8923, "step": 1505 }, { "epoch": 0.1729940841996439, "grad_norm": 0.36573851108551025, "learning_rate": 0.0001, "loss": 1.8202, "step": 1506 }, { "epoch": 0.17310895410947102, "grad_norm": 0.37791380286216736, "learning_rate": 0.0001, "loss": 1.7888, "step": 1507 }, { "epoch": 0.17322382401929814, "grad_norm": 0.3388189673423767, "learning_rate": 0.0001, "loss": 1.5188, "step": 1508 }, { "epoch": 0.17333869392912526, "grad_norm": 0.36455753445625305, "learning_rate": 0.0001, "loss": 1.8167, "step": 1509 }, { "epoch": 0.17345356383895238, "grad_norm": 0.3855915665626526, "learning_rate": 0.0001, "loss": 1.8208, "step": 1510 }, { "epoch": 0.1735684337487795, "grad_norm": 0.3601621687412262, "learning_rate": 0.0001, "loss": 1.8135, "step": 1511 }, { "epoch": 0.17368330365860662, "grad_norm": 0.34166282415390015, "learning_rate": 0.0001, "loss": 1.5916, "step": 1512 }, { "epoch": 0.17379817356843374, "grad_norm": 0.36744624376296997, "learning_rate": 0.0001, "loss": 1.7702, "step": 1513 }, { "epoch": 0.17391304347826086, "grad_norm": 0.3522723913192749, "learning_rate": 0.0001, "loss": 1.7836, "step": 1514 }, { "epoch": 0.17402791338808798, "grad_norm": 0.34597423672676086, "learning_rate": 0.0001, "loss": 1.6167, "step": 1515 }, { "epoch": 0.1741427832979151, "grad_norm": 0.33401763439178467, "learning_rate": 0.0001, "loss": 1.5352, "step": 1516 }, { "epoch": 0.17425765320774222, "grad_norm": 0.3676266074180603, "learning_rate": 0.0001, "loss": 1.7157, "step": 1517 }, { "epoch": 0.17437252311756934, "grad_norm": 0.3546941578388214, "learning_rate": 0.0001, "loss": 1.8186, "step": 1518 }, { "epoch": 0.17448739302739646, "grad_norm": 0.37473762035369873, "learning_rate": 0.0001, "loss": 1.7717, "step": 1519 }, { "epoch": 0.17460226293722358, "grad_norm": 0.35405465960502625, "learning_rate": 0.0001, "loss": 1.683, "step": 1520 }, { "epoch": 0.1747171328470507, "grad_norm": 0.3613182306289673, "learning_rate": 0.0001, "loss": 1.7042, "step": 1521 }, { "epoch": 0.17483200275687782, "grad_norm": 0.3764897882938385, "learning_rate": 0.0001, "loss": 1.834, "step": 1522 }, { "epoch": 0.17494687266670494, "grad_norm": 0.3841586410999298, "learning_rate": 0.0001, "loss": 1.8038, "step": 1523 }, { "epoch": 0.17506174257653206, "grad_norm": 0.37141889333724976, "learning_rate": 0.0001, "loss": 1.5864, "step": 1524 }, { "epoch": 0.17517661248635918, "grad_norm": 0.37981778383255005, "learning_rate": 0.0001, "loss": 1.772, "step": 1525 }, { "epoch": 0.17529148239618633, "grad_norm": 0.40624651312828064, "learning_rate": 0.0001, "loss": 1.5573, "step": 1526 }, { "epoch": 0.17540635230601345, "grad_norm": 0.37434014678001404, "learning_rate": 0.0001, "loss": 1.7501, "step": 1527 }, { "epoch": 0.17552122221584057, "grad_norm": 0.3867623805999756, "learning_rate": 0.0001, "loss": 1.6846, "step": 1528 }, { "epoch": 0.1756360921256677, "grad_norm": 0.384644478559494, "learning_rate": 0.0001, "loss": 1.7358, "step": 1529 }, { "epoch": 0.1757509620354948, "grad_norm": 0.36406537890434265, "learning_rate": 0.0001, "loss": 1.6338, "step": 1530 }, { "epoch": 0.17586583194532193, "grad_norm": 0.3523077070713043, "learning_rate": 0.0001, "loss": 1.663, "step": 1531 }, { "epoch": 0.17598070185514905, "grad_norm": 0.3456611633300781, "learning_rate": 0.0001, "loss": 1.6552, "step": 1532 }, { "epoch": 0.17609557176497617, "grad_norm": 0.4034580588340759, "learning_rate": 0.0001, "loss": 1.8922, "step": 1533 }, { "epoch": 0.1762104416748033, "grad_norm": 0.3668345510959625, "learning_rate": 0.0001, "loss": 1.7633, "step": 1534 }, { "epoch": 0.1763253115846304, "grad_norm": 0.3617863059043884, "learning_rate": 0.0001, "loss": 1.766, "step": 1535 }, { "epoch": 0.17644018149445753, "grad_norm": 0.4189690053462982, "learning_rate": 0.0001, "loss": 1.8517, "step": 1536 }, { "epoch": 0.17655505140428465, "grad_norm": 0.36103829741477966, "learning_rate": 0.0001, "loss": 1.7356, "step": 1537 }, { "epoch": 0.17666992131411177, "grad_norm": 0.3502132296562195, "learning_rate": 0.0001, "loss": 1.7039, "step": 1538 }, { "epoch": 0.1767847912239389, "grad_norm": 0.4156895577907562, "learning_rate": 0.0001, "loss": 1.8255, "step": 1539 }, { "epoch": 0.176899661133766, "grad_norm": 0.35794487595558167, "learning_rate": 0.0001, "loss": 1.762, "step": 1540 }, { "epoch": 0.17701453104359313, "grad_norm": 0.3665020763874054, "learning_rate": 0.0001, "loss": 1.7417, "step": 1541 }, { "epoch": 0.17712940095342025, "grad_norm": 0.42144718766212463, "learning_rate": 0.0001, "loss": 1.9003, "step": 1542 }, { "epoch": 0.17724427086324737, "grad_norm": 0.3615649342536926, "learning_rate": 0.0001, "loss": 1.4503, "step": 1543 }, { "epoch": 0.1773591407730745, "grad_norm": 0.36150482296943665, "learning_rate": 0.0001, "loss": 1.7157, "step": 1544 }, { "epoch": 0.1774740106829016, "grad_norm": 0.39197593927383423, "learning_rate": 0.0001, "loss": 1.717, "step": 1545 }, { "epoch": 0.17758888059272873, "grad_norm": 0.40486010909080505, "learning_rate": 0.0001, "loss": 1.7411, "step": 1546 }, { "epoch": 0.17770375050255585, "grad_norm": 0.3970898389816284, "learning_rate": 0.0001, "loss": 2.0669, "step": 1547 }, { "epoch": 0.17781862041238297, "grad_norm": 0.3371671736240387, "learning_rate": 0.0001, "loss": 1.5913, "step": 1548 }, { "epoch": 0.1779334903222101, "grad_norm": 0.33657070994377136, "learning_rate": 0.0001, "loss": 1.5829, "step": 1549 }, { "epoch": 0.1780483602320372, "grad_norm": 0.34936872124671936, "learning_rate": 0.0001, "loss": 1.6385, "step": 1550 }, { "epoch": 0.17816323014186433, "grad_norm": 0.353533536195755, "learning_rate": 0.0001, "loss": 1.6985, "step": 1551 }, { "epoch": 0.17827810005169145, "grad_norm": 0.3584658205509186, "learning_rate": 0.0001, "loss": 1.6322, "step": 1552 }, { "epoch": 0.17839296996151857, "grad_norm": 0.3596382141113281, "learning_rate": 0.0001, "loss": 1.8855, "step": 1553 }, { "epoch": 0.1785078398713457, "grad_norm": 0.3663223683834076, "learning_rate": 0.0001, "loss": 1.5073, "step": 1554 }, { "epoch": 0.1786227097811728, "grad_norm": 0.39811477065086365, "learning_rate": 0.0001, "loss": 1.7019, "step": 1555 }, { "epoch": 0.17873757969099993, "grad_norm": 0.3574909567832947, "learning_rate": 0.0001, "loss": 1.7678, "step": 1556 }, { "epoch": 0.17885244960082705, "grad_norm": 0.3777164816856384, "learning_rate": 0.0001, "loss": 1.8767, "step": 1557 }, { "epoch": 0.17896731951065417, "grad_norm": 0.3364102244377136, "learning_rate": 0.0001, "loss": 1.6808, "step": 1558 }, { "epoch": 0.1790821894204813, "grad_norm": 0.36213961243629456, "learning_rate": 0.0001, "loss": 1.7973, "step": 1559 }, { "epoch": 0.17919705933030844, "grad_norm": 0.4215514361858368, "learning_rate": 0.0001, "loss": 1.8289, "step": 1560 }, { "epoch": 0.17931192924013556, "grad_norm": 0.35058748722076416, "learning_rate": 0.0001, "loss": 1.5582, "step": 1561 }, { "epoch": 0.17942679914996268, "grad_norm": 0.4215516448020935, "learning_rate": 0.0001, "loss": 1.6928, "step": 1562 }, { "epoch": 0.1795416690597898, "grad_norm": 0.3747852146625519, "learning_rate": 0.0001, "loss": 1.6683, "step": 1563 }, { "epoch": 0.17965653896961692, "grad_norm": 0.3510657846927643, "learning_rate": 0.0001, "loss": 1.6948, "step": 1564 }, { "epoch": 0.17977140887944404, "grad_norm": 0.3715681731700897, "learning_rate": 0.0001, "loss": 1.7083, "step": 1565 }, { "epoch": 0.17988627878927116, "grad_norm": 0.3528061509132385, "learning_rate": 0.0001, "loss": 1.6339, "step": 1566 }, { "epoch": 0.18000114869909828, "grad_norm": 0.3377302587032318, "learning_rate": 0.0001, "loss": 1.7079, "step": 1567 }, { "epoch": 0.1801160186089254, "grad_norm": 0.40321823954582214, "learning_rate": 0.0001, "loss": 1.8162, "step": 1568 }, { "epoch": 0.18023088851875252, "grad_norm": 0.3601834774017334, "learning_rate": 0.0001, "loss": 1.6868, "step": 1569 }, { "epoch": 0.18034575842857964, "grad_norm": 0.3452896475791931, "learning_rate": 0.0001, "loss": 1.4845, "step": 1570 }, { "epoch": 0.18046062833840676, "grad_norm": 0.3979194462299347, "learning_rate": 0.0001, "loss": 1.9325, "step": 1571 }, { "epoch": 0.18057549824823388, "grad_norm": 0.3308473229408264, "learning_rate": 0.0001, "loss": 1.455, "step": 1572 }, { "epoch": 0.180690368158061, "grad_norm": 0.3808495104312897, "learning_rate": 0.0001, "loss": 1.8788, "step": 1573 }, { "epoch": 0.18080523806788812, "grad_norm": 0.35744503140449524, "learning_rate": 0.0001, "loss": 1.6442, "step": 1574 }, { "epoch": 0.18092010797771524, "grad_norm": 0.35669422149658203, "learning_rate": 0.0001, "loss": 1.5716, "step": 1575 }, { "epoch": 0.18103497788754236, "grad_norm": 0.3452187776565552, "learning_rate": 0.0001, "loss": 1.6189, "step": 1576 }, { "epoch": 0.18114984779736948, "grad_norm": 0.36027148365974426, "learning_rate": 0.0001, "loss": 1.5917, "step": 1577 }, { "epoch": 0.1812647177071966, "grad_norm": 0.32262781262397766, "learning_rate": 0.0001, "loss": 1.5746, "step": 1578 }, { "epoch": 0.18137958761702372, "grad_norm": 0.3979918658733368, "learning_rate": 0.0001, "loss": 1.8699, "step": 1579 }, { "epoch": 0.18149445752685084, "grad_norm": 0.42020371556282043, "learning_rate": 0.0001, "loss": 1.9041, "step": 1580 }, { "epoch": 0.18160932743667796, "grad_norm": 0.34996458888053894, "learning_rate": 0.0001, "loss": 1.6857, "step": 1581 }, { "epoch": 0.18172419734650508, "grad_norm": 0.3742469549179077, "learning_rate": 0.0001, "loss": 1.934, "step": 1582 }, { "epoch": 0.1818390672563322, "grad_norm": 0.37955376505851746, "learning_rate": 0.0001, "loss": 1.7681, "step": 1583 }, { "epoch": 0.18195393716615932, "grad_norm": 0.31765666604042053, "learning_rate": 0.0001, "loss": 1.5858, "step": 1584 }, { "epoch": 0.18206880707598644, "grad_norm": 0.39358144998550415, "learning_rate": 0.0001, "loss": 1.8361, "step": 1585 }, { "epoch": 0.18218367698581356, "grad_norm": 0.35237935185432434, "learning_rate": 0.0001, "loss": 1.7351, "step": 1586 }, { "epoch": 0.18229854689564068, "grad_norm": 0.38084107637405396, "learning_rate": 0.0001, "loss": 1.7978, "step": 1587 }, { "epoch": 0.1824134168054678, "grad_norm": 0.37168824672698975, "learning_rate": 0.0001, "loss": 1.7858, "step": 1588 }, { "epoch": 0.18252828671529492, "grad_norm": 0.33814021944999695, "learning_rate": 0.0001, "loss": 1.738, "step": 1589 }, { "epoch": 0.18264315662512204, "grad_norm": 0.39129403233528137, "learning_rate": 0.0001, "loss": 1.9242, "step": 1590 }, { "epoch": 0.18275802653494916, "grad_norm": 0.3859502971172333, "learning_rate": 0.0001, "loss": 1.8557, "step": 1591 }, { "epoch": 0.18287289644477628, "grad_norm": 0.3586483895778656, "learning_rate": 0.0001, "loss": 1.7178, "step": 1592 }, { "epoch": 0.1829877663546034, "grad_norm": 0.3621407449245453, "learning_rate": 0.0001, "loss": 1.5869, "step": 1593 }, { "epoch": 0.18310263626443055, "grad_norm": 0.3588270843029022, "learning_rate": 0.0001, "loss": 1.6165, "step": 1594 }, { "epoch": 0.18321750617425767, "grad_norm": 0.36701640486717224, "learning_rate": 0.0001, "loss": 1.6575, "step": 1595 }, { "epoch": 0.18333237608408479, "grad_norm": 0.3731893301010132, "learning_rate": 0.0001, "loss": 1.5814, "step": 1596 }, { "epoch": 0.1834472459939119, "grad_norm": 0.3400730788707733, "learning_rate": 0.0001, "loss": 1.5258, "step": 1597 }, { "epoch": 0.18356211590373903, "grad_norm": 0.3284122943878174, "learning_rate": 0.0001, "loss": 1.3967, "step": 1598 }, { "epoch": 0.18367698581356615, "grad_norm": 0.3313588798046112, "learning_rate": 0.0001, "loss": 1.5584, "step": 1599 }, { "epoch": 0.18379185572339327, "grad_norm": 0.4088406562805176, "learning_rate": 0.0001, "loss": 1.8716, "step": 1600 }, { "epoch": 0.18390672563322039, "grad_norm": 0.3627072870731354, "learning_rate": 0.0001, "loss": 1.7801, "step": 1601 }, { "epoch": 0.1840215955430475, "grad_norm": 0.3515871465206146, "learning_rate": 0.0001, "loss": 1.6491, "step": 1602 }, { "epoch": 0.18413646545287463, "grad_norm": 0.4161235988140106, "learning_rate": 0.0001, "loss": 1.749, "step": 1603 }, { "epoch": 0.18425133536270175, "grad_norm": 0.35696670413017273, "learning_rate": 0.0001, "loss": 1.6153, "step": 1604 }, { "epoch": 0.18436620527252887, "grad_norm": 0.3441614508628845, "learning_rate": 0.0001, "loss": 1.6761, "step": 1605 }, { "epoch": 0.18448107518235599, "grad_norm": 0.35174059867858887, "learning_rate": 0.0001, "loss": 1.5445, "step": 1606 }, { "epoch": 0.1845959450921831, "grad_norm": 0.3795402944087982, "learning_rate": 0.0001, "loss": 1.7702, "step": 1607 }, { "epoch": 0.18471081500201023, "grad_norm": 0.3490031957626343, "learning_rate": 0.0001, "loss": 1.6797, "step": 1608 }, { "epoch": 0.18482568491183735, "grad_norm": 0.39439791440963745, "learning_rate": 0.0001, "loss": 1.8993, "step": 1609 }, { "epoch": 0.18494055482166447, "grad_norm": 0.3577129542827606, "learning_rate": 0.0001, "loss": 1.6329, "step": 1610 }, { "epoch": 0.18505542473149159, "grad_norm": 0.34242141246795654, "learning_rate": 0.0001, "loss": 1.4998, "step": 1611 }, { "epoch": 0.1851702946413187, "grad_norm": 0.3696388304233551, "learning_rate": 0.0001, "loss": 1.7205, "step": 1612 }, { "epoch": 0.18528516455114583, "grad_norm": 0.3409230411052704, "learning_rate": 0.0001, "loss": 1.6485, "step": 1613 }, { "epoch": 0.18540003446097295, "grad_norm": 0.34659913182258606, "learning_rate": 0.0001, "loss": 1.6436, "step": 1614 }, { "epoch": 0.18551490437080007, "grad_norm": 0.3633543848991394, "learning_rate": 0.0001, "loss": 1.8687, "step": 1615 }, { "epoch": 0.18562977428062719, "grad_norm": 0.3456427752971649, "learning_rate": 0.0001, "loss": 1.6913, "step": 1616 }, { "epoch": 0.1857446441904543, "grad_norm": 0.3466663062572479, "learning_rate": 0.0001, "loss": 1.7164, "step": 1617 }, { "epoch": 0.18585951410028143, "grad_norm": 0.373751163482666, "learning_rate": 0.0001, "loss": 1.7618, "step": 1618 }, { "epoch": 0.18597438401010855, "grad_norm": 0.3516460657119751, "learning_rate": 0.0001, "loss": 1.587, "step": 1619 }, { "epoch": 0.18608925391993567, "grad_norm": 0.36074620485305786, "learning_rate": 0.0001, "loss": 1.6287, "step": 1620 }, { "epoch": 0.18620412382976279, "grad_norm": 0.37144434452056885, "learning_rate": 0.0001, "loss": 1.6111, "step": 1621 }, { "epoch": 0.1863189937395899, "grad_norm": 0.34920889139175415, "learning_rate": 0.0001, "loss": 1.5325, "step": 1622 }, { "epoch": 0.18643386364941703, "grad_norm": 0.3639899790287018, "learning_rate": 0.0001, "loss": 1.773, "step": 1623 }, { "epoch": 0.18654873355924415, "grad_norm": 0.38695764541625977, "learning_rate": 0.0001, "loss": 1.9671, "step": 1624 }, { "epoch": 0.18666360346907127, "grad_norm": 0.3656146228313446, "learning_rate": 0.0001, "loss": 1.8799, "step": 1625 }, { "epoch": 0.18677847337889839, "grad_norm": 0.3708580732345581, "learning_rate": 0.0001, "loss": 1.8234, "step": 1626 }, { "epoch": 0.1868933432887255, "grad_norm": 0.3623522222042084, "learning_rate": 0.0001, "loss": 1.623, "step": 1627 }, { "epoch": 0.18700821319855265, "grad_norm": 0.3340558409690857, "learning_rate": 0.0001, "loss": 1.5646, "step": 1628 }, { "epoch": 0.18712308310837977, "grad_norm": 0.3819306790828705, "learning_rate": 0.0001, "loss": 1.9233, "step": 1629 }, { "epoch": 0.1872379530182069, "grad_norm": 0.35179227590560913, "learning_rate": 0.0001, "loss": 1.6158, "step": 1630 }, { "epoch": 0.187352822928034, "grad_norm": 0.3724440634250641, "learning_rate": 0.0001, "loss": 1.6322, "step": 1631 }, { "epoch": 0.18746769283786113, "grad_norm": 0.35643836855888367, "learning_rate": 0.0001, "loss": 1.6294, "step": 1632 }, { "epoch": 0.18758256274768825, "grad_norm": 0.3557715117931366, "learning_rate": 0.0001, "loss": 1.8076, "step": 1633 }, { "epoch": 0.18769743265751537, "grad_norm": 0.3418234586715698, "learning_rate": 0.0001, "loss": 1.7002, "step": 1634 }, { "epoch": 0.1878123025673425, "grad_norm": 0.3681597113609314, "learning_rate": 0.0001, "loss": 1.7233, "step": 1635 }, { "epoch": 0.1879271724771696, "grad_norm": 0.34994348883628845, "learning_rate": 0.0001, "loss": 1.4936, "step": 1636 }, { "epoch": 0.18804204238699673, "grad_norm": 0.35400843620300293, "learning_rate": 0.0001, "loss": 1.5871, "step": 1637 }, { "epoch": 0.18815691229682385, "grad_norm": 0.3841044008731842, "learning_rate": 0.0001, "loss": 1.8312, "step": 1638 }, { "epoch": 0.18827178220665097, "grad_norm": 0.3630238473415375, "learning_rate": 0.0001, "loss": 1.7448, "step": 1639 }, { "epoch": 0.1883866521164781, "grad_norm": 0.3915660083293915, "learning_rate": 0.0001, "loss": 1.8261, "step": 1640 }, { "epoch": 0.1885015220263052, "grad_norm": 0.3483685851097107, "learning_rate": 0.0001, "loss": 1.5901, "step": 1641 }, { "epoch": 0.18861639193613233, "grad_norm": 0.35304656624794006, "learning_rate": 0.0001, "loss": 1.6511, "step": 1642 }, { "epoch": 0.18873126184595945, "grad_norm": 0.3424839973449707, "learning_rate": 0.0001, "loss": 1.6643, "step": 1643 }, { "epoch": 0.18884613175578657, "grad_norm": 0.3762650489807129, "learning_rate": 0.0001, "loss": 1.7361, "step": 1644 }, { "epoch": 0.1889610016656137, "grad_norm": 0.3635323643684387, "learning_rate": 0.0001, "loss": 1.7845, "step": 1645 }, { "epoch": 0.1890758715754408, "grad_norm": 0.36258241534233093, "learning_rate": 0.0001, "loss": 1.8612, "step": 1646 }, { "epoch": 0.18919074148526793, "grad_norm": 0.35478660464286804, "learning_rate": 0.0001, "loss": 1.7232, "step": 1647 }, { "epoch": 0.18930561139509505, "grad_norm": 0.38521048426628113, "learning_rate": 0.0001, "loss": 1.7481, "step": 1648 }, { "epoch": 0.18942048130492217, "grad_norm": 0.34351152181625366, "learning_rate": 0.0001, "loss": 1.5854, "step": 1649 }, { "epoch": 0.1895353512147493, "grad_norm": 0.38085636496543884, "learning_rate": 0.0001, "loss": 1.8211, "step": 1650 }, { "epoch": 0.1896502211245764, "grad_norm": 0.3599552512168884, "learning_rate": 0.0001, "loss": 1.6675, "step": 1651 }, { "epoch": 0.18976509103440353, "grad_norm": 0.3752254247665405, "learning_rate": 0.0001, "loss": 1.6259, "step": 1652 }, { "epoch": 0.18987996094423065, "grad_norm": 0.3557283580303192, "learning_rate": 0.0001, "loss": 1.8026, "step": 1653 }, { "epoch": 0.18999483085405777, "grad_norm": 0.3593176603317261, "learning_rate": 0.0001, "loss": 1.6131, "step": 1654 }, { "epoch": 0.1901097007638849, "grad_norm": 0.36632347106933594, "learning_rate": 0.0001, "loss": 1.751, "step": 1655 }, { "epoch": 0.190224570673712, "grad_norm": 0.36848095059394836, "learning_rate": 0.0001, "loss": 1.7481, "step": 1656 }, { "epoch": 0.19033944058353913, "grad_norm": 0.35497191548347473, "learning_rate": 0.0001, "loss": 1.6606, "step": 1657 }, { "epoch": 0.19045431049336625, "grad_norm": 0.3646465837955475, "learning_rate": 0.0001, "loss": 1.6281, "step": 1658 }, { "epoch": 0.19056918040319337, "grad_norm": 0.3539585471153259, "learning_rate": 0.0001, "loss": 1.6751, "step": 1659 }, { "epoch": 0.1906840503130205, "grad_norm": 0.36736389994621277, "learning_rate": 0.0001, "loss": 1.633, "step": 1660 }, { "epoch": 0.1907989202228476, "grad_norm": 0.38588473200798035, "learning_rate": 0.0001, "loss": 1.7653, "step": 1661 }, { "epoch": 0.19091379013267473, "grad_norm": 0.38840097188949585, "learning_rate": 0.0001, "loss": 1.6896, "step": 1662 }, { "epoch": 0.19102866004250188, "grad_norm": 0.34677135944366455, "learning_rate": 0.0001, "loss": 1.6718, "step": 1663 }, { "epoch": 0.191143529952329, "grad_norm": 0.3521466553211212, "learning_rate": 0.0001, "loss": 1.6003, "step": 1664 }, { "epoch": 0.19125839986215612, "grad_norm": 0.34969663619995117, "learning_rate": 0.0001, "loss": 1.7715, "step": 1665 }, { "epoch": 0.19137326977198324, "grad_norm": 0.3782643973827362, "learning_rate": 0.0001, "loss": 1.777, "step": 1666 }, { "epoch": 0.19148813968181036, "grad_norm": 0.3731124699115753, "learning_rate": 0.0001, "loss": 1.7143, "step": 1667 }, { "epoch": 0.19160300959163748, "grad_norm": 0.37945446372032166, "learning_rate": 0.0001, "loss": 1.7618, "step": 1668 }, { "epoch": 0.1917178795014646, "grad_norm": 0.331589937210083, "learning_rate": 0.0001, "loss": 1.6613, "step": 1669 }, { "epoch": 0.19183274941129172, "grad_norm": 0.3730468451976776, "learning_rate": 0.0001, "loss": 1.611, "step": 1670 }, { "epoch": 0.19194761932111884, "grad_norm": 0.40152809023857117, "learning_rate": 0.0001, "loss": 1.9885, "step": 1671 }, { "epoch": 0.19206248923094596, "grad_norm": 0.3776914179325104, "learning_rate": 0.0001, "loss": 1.8431, "step": 1672 }, { "epoch": 0.19217735914077308, "grad_norm": 0.3614851236343384, "learning_rate": 0.0001, "loss": 1.7252, "step": 1673 }, { "epoch": 0.1922922290506002, "grad_norm": 0.33890727162361145, "learning_rate": 0.0001, "loss": 1.6394, "step": 1674 }, { "epoch": 0.19240709896042732, "grad_norm": 0.3711831867694855, "learning_rate": 0.0001, "loss": 1.8968, "step": 1675 }, { "epoch": 0.19252196887025444, "grad_norm": 0.407746285200119, "learning_rate": 0.0001, "loss": 1.6691, "step": 1676 }, { "epoch": 0.19263683878008156, "grad_norm": 0.3913660943508148, "learning_rate": 0.0001, "loss": 1.6698, "step": 1677 }, { "epoch": 0.19275170868990868, "grad_norm": 0.3649699091911316, "learning_rate": 0.0001, "loss": 1.8171, "step": 1678 }, { "epoch": 0.1928665785997358, "grad_norm": 0.34938865900039673, "learning_rate": 0.0001, "loss": 1.7187, "step": 1679 }, { "epoch": 0.19298144850956292, "grad_norm": 0.3610716462135315, "learning_rate": 0.0001, "loss": 1.7097, "step": 1680 }, { "epoch": 0.19309631841939004, "grad_norm": 0.3534272015094757, "learning_rate": 0.0001, "loss": 1.7907, "step": 1681 }, { "epoch": 0.19321118832921716, "grad_norm": 0.3574727177619934, "learning_rate": 0.0001, "loss": 1.7394, "step": 1682 }, { "epoch": 0.19332605823904428, "grad_norm": 0.32285967469215393, "learning_rate": 0.0001, "loss": 1.5277, "step": 1683 }, { "epoch": 0.1934409281488714, "grad_norm": 0.36247870326042175, "learning_rate": 0.0001, "loss": 1.7193, "step": 1684 }, { "epoch": 0.19355579805869852, "grad_norm": 0.3250444233417511, "learning_rate": 0.0001, "loss": 1.5945, "step": 1685 }, { "epoch": 0.19367066796852564, "grad_norm": 0.3848918676376343, "learning_rate": 0.0001, "loss": 1.8016, "step": 1686 }, { "epoch": 0.19378553787835276, "grad_norm": 0.3321680426597595, "learning_rate": 0.0001, "loss": 1.5136, "step": 1687 }, { "epoch": 0.19390040778817988, "grad_norm": 0.3534335196018219, "learning_rate": 0.0001, "loss": 1.8101, "step": 1688 }, { "epoch": 0.194015277698007, "grad_norm": 0.3084717094898224, "learning_rate": 0.0001, "loss": 1.2239, "step": 1689 }, { "epoch": 0.19413014760783412, "grad_norm": 0.36308553814888, "learning_rate": 0.0001, "loss": 1.699, "step": 1690 }, { "epoch": 0.19424501751766124, "grad_norm": 0.35767173767089844, "learning_rate": 0.0001, "loss": 1.7313, "step": 1691 }, { "epoch": 0.19435988742748836, "grad_norm": 0.3621061444282532, "learning_rate": 0.0001, "loss": 1.5605, "step": 1692 }, { "epoch": 0.19447475733731548, "grad_norm": 0.3489883840084076, "learning_rate": 0.0001, "loss": 1.7473, "step": 1693 }, { "epoch": 0.1945896272471426, "grad_norm": 0.35943081974983215, "learning_rate": 0.0001, "loss": 1.8841, "step": 1694 }, { "epoch": 0.19470449715696972, "grad_norm": 0.3859713673591614, "learning_rate": 0.0001, "loss": 1.7675, "step": 1695 }, { "epoch": 0.19481936706679684, "grad_norm": 0.3559940457344055, "learning_rate": 0.0001, "loss": 1.6382, "step": 1696 }, { "epoch": 0.194934236976624, "grad_norm": 0.37918147444725037, "learning_rate": 0.0001, "loss": 1.7507, "step": 1697 }, { "epoch": 0.1950491068864511, "grad_norm": 0.36371055245399475, "learning_rate": 0.0001, "loss": 1.7621, "step": 1698 }, { "epoch": 0.19516397679627823, "grad_norm": 0.34695690870285034, "learning_rate": 0.0001, "loss": 1.7555, "step": 1699 }, { "epoch": 0.19527884670610535, "grad_norm": 0.35389262437820435, "learning_rate": 0.0001, "loss": 1.6632, "step": 1700 }, { "epoch": 0.19539371661593247, "grad_norm": 0.35778507590293884, "learning_rate": 0.0001, "loss": 1.7939, "step": 1701 }, { "epoch": 0.1955085865257596, "grad_norm": 0.37663915753364563, "learning_rate": 0.0001, "loss": 1.6431, "step": 1702 }, { "epoch": 0.1956234564355867, "grad_norm": 0.3157816529273987, "learning_rate": 0.0001, "loss": 1.3818, "step": 1703 }, { "epoch": 0.19573832634541383, "grad_norm": 0.35252466797828674, "learning_rate": 0.0001, "loss": 1.7476, "step": 1704 }, { "epoch": 0.19585319625524095, "grad_norm": 0.3793637752532959, "learning_rate": 0.0001, "loss": 1.8395, "step": 1705 }, { "epoch": 0.19596806616506807, "grad_norm": 0.3287891149520874, "learning_rate": 0.0001, "loss": 1.4472, "step": 1706 }, { "epoch": 0.1960829360748952, "grad_norm": 0.3720473349094391, "learning_rate": 0.0001, "loss": 1.8553, "step": 1707 }, { "epoch": 0.1961978059847223, "grad_norm": 0.3807579576969147, "learning_rate": 0.0001, "loss": 1.8815, "step": 1708 }, { "epoch": 0.19631267589454943, "grad_norm": 0.3885481655597687, "learning_rate": 0.0001, "loss": 1.8668, "step": 1709 }, { "epoch": 0.19642754580437655, "grad_norm": 0.3450814187526703, "learning_rate": 0.0001, "loss": 1.5449, "step": 1710 }, { "epoch": 0.19654241571420367, "grad_norm": 0.3540419936180115, "learning_rate": 0.0001, "loss": 1.6886, "step": 1711 }, { "epoch": 0.1966572856240308, "grad_norm": 0.38482004404067993, "learning_rate": 0.0001, "loss": 1.6155, "step": 1712 }, { "epoch": 0.1967721555338579, "grad_norm": 0.37782052159309387, "learning_rate": 0.0001, "loss": 1.6786, "step": 1713 }, { "epoch": 0.19688702544368503, "grad_norm": 0.3788130283355713, "learning_rate": 0.0001, "loss": 1.6707, "step": 1714 }, { "epoch": 0.19700189535351215, "grad_norm": 0.3536849617958069, "learning_rate": 0.0001, "loss": 1.7248, "step": 1715 }, { "epoch": 0.19711676526333927, "grad_norm": 0.4016459584236145, "learning_rate": 0.0001, "loss": 1.4351, "step": 1716 }, { "epoch": 0.1972316351731664, "grad_norm": 0.34957659244537354, "learning_rate": 0.0001, "loss": 1.7459, "step": 1717 }, { "epoch": 0.1973465050829935, "grad_norm": 0.36093124747276306, "learning_rate": 0.0001, "loss": 1.7523, "step": 1718 }, { "epoch": 0.19746137499282063, "grad_norm": 0.36018863320350647, "learning_rate": 0.0001, "loss": 1.607, "step": 1719 }, { "epoch": 0.19757624490264775, "grad_norm": 0.37135326862335205, "learning_rate": 0.0001, "loss": 1.4374, "step": 1720 }, { "epoch": 0.19769111481247487, "grad_norm": 0.3906667232513428, "learning_rate": 0.0001, "loss": 1.7177, "step": 1721 }, { "epoch": 0.197805984722302, "grad_norm": 0.3404282331466675, "learning_rate": 0.0001, "loss": 1.4111, "step": 1722 }, { "epoch": 0.1979208546321291, "grad_norm": 0.39864858984947205, "learning_rate": 0.0001, "loss": 1.7668, "step": 1723 }, { "epoch": 0.19803572454195623, "grad_norm": 0.38260820508003235, "learning_rate": 0.0001, "loss": 1.4592, "step": 1724 }, { "epoch": 0.19815059445178335, "grad_norm": 0.35546496510505676, "learning_rate": 0.0001, "loss": 1.7121, "step": 1725 }, { "epoch": 0.19826546436161047, "grad_norm": 0.34984710812568665, "learning_rate": 0.0001, "loss": 1.7286, "step": 1726 }, { "epoch": 0.1983803342714376, "grad_norm": 0.38462913036346436, "learning_rate": 0.0001, "loss": 1.9247, "step": 1727 }, { "epoch": 0.1984952041812647, "grad_norm": 0.3563911020755768, "learning_rate": 0.0001, "loss": 1.6917, "step": 1728 }, { "epoch": 0.19861007409109183, "grad_norm": 0.3691602647304535, "learning_rate": 0.0001, "loss": 1.6081, "step": 1729 }, { "epoch": 0.19872494400091895, "grad_norm": 0.36970895528793335, "learning_rate": 0.0001, "loss": 1.7104, "step": 1730 }, { "epoch": 0.1988398139107461, "grad_norm": 0.3286248445510864, "learning_rate": 0.0001, "loss": 1.4056, "step": 1731 }, { "epoch": 0.1989546838205732, "grad_norm": 0.3443751037120819, "learning_rate": 0.0001, "loss": 1.5495, "step": 1732 }, { "epoch": 0.19906955373040033, "grad_norm": 0.3585871458053589, "learning_rate": 0.0001, "loss": 1.8211, "step": 1733 }, { "epoch": 0.19918442364022745, "grad_norm": 0.37067654728889465, "learning_rate": 0.0001, "loss": 1.6953, "step": 1734 }, { "epoch": 0.19929929355005457, "grad_norm": 0.3535691797733307, "learning_rate": 0.0001, "loss": 1.6193, "step": 1735 }, { "epoch": 0.1994141634598817, "grad_norm": 0.37914103269577026, "learning_rate": 0.0001, "loss": 1.6576, "step": 1736 }, { "epoch": 0.1995290333697088, "grad_norm": 0.3962135910987854, "learning_rate": 0.0001, "loss": 1.625, "step": 1737 }, { "epoch": 0.19964390327953593, "grad_norm": 0.37456363439559937, "learning_rate": 0.0001, "loss": 1.6887, "step": 1738 }, { "epoch": 0.19975877318936305, "grad_norm": 0.36762315034866333, "learning_rate": 0.0001, "loss": 1.7478, "step": 1739 }, { "epoch": 0.19987364309919017, "grad_norm": 0.37871554493904114, "learning_rate": 0.0001, "loss": 1.7603, "step": 1740 }, { "epoch": 0.1999885130090173, "grad_norm": 0.39336419105529785, "learning_rate": 0.0001, "loss": 1.7888, "step": 1741 }, { "epoch": 0.2001033829188444, "grad_norm": 0.366931289434433, "learning_rate": 0.0001, "loss": 1.6331, "step": 1742 }, { "epoch": 0.20021825282867153, "grad_norm": 0.37387847900390625, "learning_rate": 0.0001, "loss": 1.6847, "step": 1743 }, { "epoch": 0.20033312273849865, "grad_norm": 0.3491780459880829, "learning_rate": 0.0001, "loss": 1.8099, "step": 1744 }, { "epoch": 0.20044799264832577, "grad_norm": 0.39339229464530945, "learning_rate": 0.0001, "loss": 1.7889, "step": 1745 }, { "epoch": 0.2005628625581529, "grad_norm": 0.3711383640766144, "learning_rate": 0.0001, "loss": 1.853, "step": 1746 }, { "epoch": 0.20067773246798, "grad_norm": 0.33763977885246277, "learning_rate": 0.0001, "loss": 1.6071, "step": 1747 }, { "epoch": 0.20079260237780713, "grad_norm": 0.34965288639068604, "learning_rate": 0.0001, "loss": 1.5583, "step": 1748 }, { "epoch": 0.20090747228763425, "grad_norm": 0.3817383646965027, "learning_rate": 0.0001, "loss": 1.6416, "step": 1749 }, { "epoch": 0.20102234219746137, "grad_norm": 0.37496039271354675, "learning_rate": 0.0001, "loss": 1.711, "step": 1750 }, { "epoch": 0.2011372121072885, "grad_norm": 0.37966370582580566, "learning_rate": 0.0001, "loss": 1.8306, "step": 1751 }, { "epoch": 0.2012520820171156, "grad_norm": 0.3994872272014618, "learning_rate": 0.0001, "loss": 1.9473, "step": 1752 }, { "epoch": 0.20136695192694273, "grad_norm": 0.3680518865585327, "learning_rate": 0.0001, "loss": 1.7061, "step": 1753 }, { "epoch": 0.20148182183676985, "grad_norm": 0.31908658146858215, "learning_rate": 0.0001, "loss": 1.4684, "step": 1754 }, { "epoch": 0.20159669174659697, "grad_norm": 0.362386554479599, "learning_rate": 0.0001, "loss": 1.5278, "step": 1755 }, { "epoch": 0.2017115616564241, "grad_norm": 0.35823360085487366, "learning_rate": 0.0001, "loss": 1.7714, "step": 1756 }, { "epoch": 0.2018264315662512, "grad_norm": 0.3523258566856384, "learning_rate": 0.0001, "loss": 1.8227, "step": 1757 }, { "epoch": 0.20194130147607833, "grad_norm": 0.348457396030426, "learning_rate": 0.0001, "loss": 1.6338, "step": 1758 }, { "epoch": 0.20205617138590545, "grad_norm": 0.35159286856651306, "learning_rate": 0.0001, "loss": 1.4644, "step": 1759 }, { "epoch": 0.20217104129573257, "grad_norm": 0.38442832231521606, "learning_rate": 0.0001, "loss": 1.7378, "step": 1760 }, { "epoch": 0.2022859112055597, "grad_norm": 0.3663921356201172, "learning_rate": 0.0001, "loss": 1.8518, "step": 1761 }, { "epoch": 0.2024007811153868, "grad_norm": 0.3565858006477356, "learning_rate": 0.0001, "loss": 1.6532, "step": 1762 }, { "epoch": 0.20251565102521393, "grad_norm": 0.37562236189842224, "learning_rate": 0.0001, "loss": 1.7504, "step": 1763 }, { "epoch": 0.20263052093504105, "grad_norm": 0.3319898247718811, "learning_rate": 0.0001, "loss": 1.5616, "step": 1764 }, { "epoch": 0.20274539084486817, "grad_norm": 0.35017985105514526, "learning_rate": 0.0001, "loss": 1.6769, "step": 1765 }, { "epoch": 0.20286026075469532, "grad_norm": 0.3883030116558075, "learning_rate": 0.0001, "loss": 1.6532, "step": 1766 }, { "epoch": 0.20297513066452244, "grad_norm": 0.34447354078292847, "learning_rate": 0.0001, "loss": 1.6055, "step": 1767 }, { "epoch": 0.20309000057434956, "grad_norm": 0.3480866253376007, "learning_rate": 0.0001, "loss": 1.6489, "step": 1768 }, { "epoch": 0.20320487048417668, "grad_norm": 0.3792140781879425, "learning_rate": 0.0001, "loss": 1.7436, "step": 1769 }, { "epoch": 0.2033197403940038, "grad_norm": 0.3636226952075958, "learning_rate": 0.0001, "loss": 1.4727, "step": 1770 }, { "epoch": 0.20343461030383092, "grad_norm": 0.3649579584598541, "learning_rate": 0.0001, "loss": 1.5061, "step": 1771 }, { "epoch": 0.20354948021365804, "grad_norm": 0.34622448682785034, "learning_rate": 0.0001, "loss": 1.5336, "step": 1772 }, { "epoch": 0.20366435012348516, "grad_norm": 0.4073683023452759, "learning_rate": 0.0001, "loss": 1.9036, "step": 1773 }, { "epoch": 0.20377922003331228, "grad_norm": 0.35996997356414795, "learning_rate": 0.0001, "loss": 1.8344, "step": 1774 }, { "epoch": 0.2038940899431394, "grad_norm": 0.35649874806404114, "learning_rate": 0.0001, "loss": 1.4739, "step": 1775 }, { "epoch": 0.20400895985296652, "grad_norm": 0.3954346477985382, "learning_rate": 0.0001, "loss": 1.9226, "step": 1776 }, { "epoch": 0.20412382976279364, "grad_norm": 0.3589356243610382, "learning_rate": 0.0001, "loss": 1.6198, "step": 1777 }, { "epoch": 0.20423869967262076, "grad_norm": 0.36763888597488403, "learning_rate": 0.0001, "loss": 1.7394, "step": 1778 }, { "epoch": 0.20435356958244788, "grad_norm": 0.38217705488204956, "learning_rate": 0.0001, "loss": 1.9139, "step": 1779 }, { "epoch": 0.204468439492275, "grad_norm": 0.39067110419273376, "learning_rate": 0.0001, "loss": 1.8865, "step": 1780 }, { "epoch": 0.20458330940210212, "grad_norm": 0.369056761264801, "learning_rate": 0.0001, "loss": 1.7165, "step": 1781 }, { "epoch": 0.20469817931192924, "grad_norm": 0.3984009623527527, "learning_rate": 0.0001, "loss": 1.6909, "step": 1782 }, { "epoch": 0.20481304922175636, "grad_norm": 0.3637178838253021, "learning_rate": 0.0001, "loss": 1.7317, "step": 1783 }, { "epoch": 0.20492791913158348, "grad_norm": 0.3644060790538788, "learning_rate": 0.0001, "loss": 1.7301, "step": 1784 }, { "epoch": 0.2050427890414106, "grad_norm": 0.36648574471473694, "learning_rate": 0.0001, "loss": 1.793, "step": 1785 }, { "epoch": 0.20515765895123772, "grad_norm": 0.37490981817245483, "learning_rate": 0.0001, "loss": 1.7762, "step": 1786 }, { "epoch": 0.20527252886106484, "grad_norm": 0.35913270711898804, "learning_rate": 0.0001, "loss": 1.6501, "step": 1787 }, { "epoch": 0.20538739877089196, "grad_norm": 0.35344579815864563, "learning_rate": 0.0001, "loss": 1.7904, "step": 1788 }, { "epoch": 0.20550226868071908, "grad_norm": 0.4043060839176178, "learning_rate": 0.0001, "loss": 1.8669, "step": 1789 }, { "epoch": 0.2056171385905462, "grad_norm": 0.35678645968437195, "learning_rate": 0.0001, "loss": 1.6165, "step": 1790 }, { "epoch": 0.20573200850037332, "grad_norm": 0.3554267883300781, "learning_rate": 0.0001, "loss": 1.5462, "step": 1791 }, { "epoch": 0.20584687841020044, "grad_norm": 0.3699265122413635, "learning_rate": 0.0001, "loss": 1.8192, "step": 1792 }, { "epoch": 0.20596174832002756, "grad_norm": 0.3494689464569092, "learning_rate": 0.0001, "loss": 1.4697, "step": 1793 }, { "epoch": 0.20607661822985468, "grad_norm": 0.3618113398551941, "learning_rate": 0.0001, "loss": 1.7152, "step": 1794 }, { "epoch": 0.2061914881396818, "grad_norm": 0.36774349212646484, "learning_rate": 0.0001, "loss": 1.769, "step": 1795 }, { "epoch": 0.20630635804950892, "grad_norm": 0.3754447400569916, "learning_rate": 0.0001, "loss": 1.549, "step": 1796 }, { "epoch": 0.20642122795933604, "grad_norm": 0.4026842713356018, "learning_rate": 0.0001, "loss": 1.832, "step": 1797 }, { "epoch": 0.20653609786916316, "grad_norm": 0.34931662678718567, "learning_rate": 0.0001, "loss": 1.6936, "step": 1798 }, { "epoch": 0.20665096777899028, "grad_norm": 0.35631394386291504, "learning_rate": 0.0001, "loss": 1.7474, "step": 1799 }, { "epoch": 0.20676583768881743, "grad_norm": 0.35016921162605286, "learning_rate": 0.0001, "loss": 1.8252, "step": 1800 }, { "epoch": 0.20688070759864455, "grad_norm": 0.36527907848358154, "learning_rate": 0.0001, "loss": 1.7728, "step": 1801 }, { "epoch": 0.20699557750847167, "grad_norm": 0.375942200422287, "learning_rate": 0.0001, "loss": 1.76, "step": 1802 }, { "epoch": 0.2071104474182988, "grad_norm": 0.3598606586456299, "learning_rate": 0.0001, "loss": 1.7765, "step": 1803 }, { "epoch": 0.2072253173281259, "grad_norm": 0.3487381041049957, "learning_rate": 0.0001, "loss": 1.5144, "step": 1804 }, { "epoch": 0.20734018723795303, "grad_norm": 0.33189913630485535, "learning_rate": 0.0001, "loss": 1.5354, "step": 1805 }, { "epoch": 0.20745505714778015, "grad_norm": 0.3427751660346985, "learning_rate": 0.0001, "loss": 1.4288, "step": 1806 }, { "epoch": 0.20756992705760727, "grad_norm": 0.3979537785053253, "learning_rate": 0.0001, "loss": 1.6666, "step": 1807 }, { "epoch": 0.2076847969674344, "grad_norm": 0.37315791845321655, "learning_rate": 0.0001, "loss": 1.5901, "step": 1808 }, { "epoch": 0.2077996668772615, "grad_norm": 0.37909650802612305, "learning_rate": 0.0001, "loss": 1.7111, "step": 1809 }, { "epoch": 0.20791453678708863, "grad_norm": 0.3696240186691284, "learning_rate": 0.0001, "loss": 1.7612, "step": 1810 }, { "epoch": 0.20802940669691575, "grad_norm": 0.38484904170036316, "learning_rate": 0.0001, "loss": 1.5298, "step": 1811 }, { "epoch": 0.20814427660674287, "grad_norm": 0.3398043215274811, "learning_rate": 0.0001, "loss": 1.6615, "step": 1812 }, { "epoch": 0.20825914651657, "grad_norm": 0.3796899616718292, "learning_rate": 0.0001, "loss": 1.8264, "step": 1813 }, { "epoch": 0.2083740164263971, "grad_norm": 0.38819029927253723, "learning_rate": 0.0001, "loss": 1.8616, "step": 1814 }, { "epoch": 0.20848888633622423, "grad_norm": 0.4355449378490448, "learning_rate": 0.0001, "loss": 1.8042, "step": 1815 }, { "epoch": 0.20860375624605135, "grad_norm": 0.37194108963012695, "learning_rate": 0.0001, "loss": 1.7524, "step": 1816 }, { "epoch": 0.20871862615587847, "grad_norm": 0.3792515993118286, "learning_rate": 0.0001, "loss": 1.6455, "step": 1817 }, { "epoch": 0.2088334960657056, "grad_norm": 0.37876373529434204, "learning_rate": 0.0001, "loss": 1.5946, "step": 1818 }, { "epoch": 0.2089483659755327, "grad_norm": 0.34558865427970886, "learning_rate": 0.0001, "loss": 1.6621, "step": 1819 }, { "epoch": 0.20906323588535983, "grad_norm": 0.43524423241615295, "learning_rate": 0.0001, "loss": 2.0707, "step": 1820 }, { "epoch": 0.20917810579518695, "grad_norm": 0.4018253982067108, "learning_rate": 0.0001, "loss": 1.8651, "step": 1821 }, { "epoch": 0.20929297570501407, "grad_norm": 0.3796786367893219, "learning_rate": 0.0001, "loss": 1.6014, "step": 1822 }, { "epoch": 0.2094078456148412, "grad_norm": 0.35122066736221313, "learning_rate": 0.0001, "loss": 1.7398, "step": 1823 }, { "epoch": 0.2095227155246683, "grad_norm": 0.3692324161529541, "learning_rate": 0.0001, "loss": 1.4884, "step": 1824 }, { "epoch": 0.20963758543449543, "grad_norm": 0.39032313227653503, "learning_rate": 0.0001, "loss": 1.7603, "step": 1825 }, { "epoch": 0.20975245534432255, "grad_norm": 0.34031352400779724, "learning_rate": 0.0001, "loss": 1.4029, "step": 1826 }, { "epoch": 0.20986732525414967, "grad_norm": 0.40523627400398254, "learning_rate": 0.0001, "loss": 1.6919, "step": 1827 }, { "epoch": 0.2099821951639768, "grad_norm": 0.3522171378135681, "learning_rate": 0.0001, "loss": 1.7032, "step": 1828 }, { "epoch": 0.2100970650738039, "grad_norm": 0.3715449273586273, "learning_rate": 0.0001, "loss": 1.7735, "step": 1829 }, { "epoch": 0.21021193498363103, "grad_norm": 0.39641836285591125, "learning_rate": 0.0001, "loss": 1.8061, "step": 1830 }, { "epoch": 0.21032680489345815, "grad_norm": 0.3934457302093506, "learning_rate": 0.0001, "loss": 1.7792, "step": 1831 }, { "epoch": 0.21044167480328527, "grad_norm": 0.35226938128471375, "learning_rate": 0.0001, "loss": 1.7576, "step": 1832 }, { "epoch": 0.2105565447131124, "grad_norm": 0.36764612793922424, "learning_rate": 0.0001, "loss": 1.819, "step": 1833 }, { "epoch": 0.21067141462293953, "grad_norm": 0.36354872584342957, "learning_rate": 0.0001, "loss": 1.5628, "step": 1834 }, { "epoch": 0.21078628453276665, "grad_norm": 0.3655260503292084, "learning_rate": 0.0001, "loss": 1.6431, "step": 1835 }, { "epoch": 0.21090115444259377, "grad_norm": 0.36574462056159973, "learning_rate": 0.0001, "loss": 1.8352, "step": 1836 }, { "epoch": 0.2110160243524209, "grad_norm": 0.3510647118091583, "learning_rate": 0.0001, "loss": 1.671, "step": 1837 }, { "epoch": 0.21113089426224801, "grad_norm": 0.38021084666252136, "learning_rate": 0.0001, "loss": 1.7853, "step": 1838 }, { "epoch": 0.21124576417207513, "grad_norm": 0.38449275493621826, "learning_rate": 0.0001, "loss": 1.68, "step": 1839 }, { "epoch": 0.21136063408190225, "grad_norm": 0.378221720457077, "learning_rate": 0.0001, "loss": 1.7663, "step": 1840 }, { "epoch": 0.21147550399172937, "grad_norm": 0.3675908148288727, "learning_rate": 0.0001, "loss": 1.7717, "step": 1841 }, { "epoch": 0.2115903739015565, "grad_norm": 0.3863251805305481, "learning_rate": 0.0001, "loss": 1.686, "step": 1842 }, { "epoch": 0.21170524381138361, "grad_norm": 0.4383453130722046, "learning_rate": 0.0001, "loss": 1.9812, "step": 1843 }, { "epoch": 0.21182011372121073, "grad_norm": 0.3583828806877136, "learning_rate": 0.0001, "loss": 1.7918, "step": 1844 }, { "epoch": 0.21193498363103785, "grad_norm": 0.39561352133750916, "learning_rate": 0.0001, "loss": 1.673, "step": 1845 }, { "epoch": 0.21204985354086497, "grad_norm": 0.37109145522117615, "learning_rate": 0.0001, "loss": 1.7545, "step": 1846 }, { "epoch": 0.2121647234506921, "grad_norm": 0.3461610972881317, "learning_rate": 0.0001, "loss": 1.4589, "step": 1847 }, { "epoch": 0.21227959336051921, "grad_norm": 0.43045949935913086, "learning_rate": 0.0001, "loss": 1.7546, "step": 1848 }, { "epoch": 0.21239446327034633, "grad_norm": 0.4053126871585846, "learning_rate": 0.0001, "loss": 1.6525, "step": 1849 }, { "epoch": 0.21250933318017345, "grad_norm": 0.35681119561195374, "learning_rate": 0.0001, "loss": 1.5118, "step": 1850 }, { "epoch": 0.21262420309000057, "grad_norm": 0.3934498429298401, "learning_rate": 0.0001, "loss": 1.5628, "step": 1851 }, { "epoch": 0.2127390729998277, "grad_norm": 0.3668583035469055, "learning_rate": 0.0001, "loss": 1.4663, "step": 1852 }, { "epoch": 0.21285394290965481, "grad_norm": 0.36090970039367676, "learning_rate": 0.0001, "loss": 1.8151, "step": 1853 }, { "epoch": 0.21296881281948193, "grad_norm": 0.3945942521095276, "learning_rate": 0.0001, "loss": 1.7807, "step": 1854 }, { "epoch": 0.21308368272930905, "grad_norm": 0.34316137433052063, "learning_rate": 0.0001, "loss": 1.5175, "step": 1855 }, { "epoch": 0.21319855263913617, "grad_norm": 0.3736970126628876, "learning_rate": 0.0001, "loss": 1.66, "step": 1856 }, { "epoch": 0.2133134225489633, "grad_norm": 0.36034896969795227, "learning_rate": 0.0001, "loss": 1.6949, "step": 1857 }, { "epoch": 0.21342829245879041, "grad_norm": 0.38381823897361755, "learning_rate": 0.0001, "loss": 1.7625, "step": 1858 }, { "epoch": 0.21354316236861753, "grad_norm": 0.39188024401664734, "learning_rate": 0.0001, "loss": 1.6711, "step": 1859 }, { "epoch": 0.21365803227844465, "grad_norm": 0.3275800049304962, "learning_rate": 0.0001, "loss": 1.5081, "step": 1860 }, { "epoch": 0.21377290218827177, "grad_norm": 0.38419318199157715, "learning_rate": 0.0001, "loss": 1.6184, "step": 1861 }, { "epoch": 0.2138877720980989, "grad_norm": 0.3843541443347931, "learning_rate": 0.0001, "loss": 1.7979, "step": 1862 }, { "epoch": 0.21400264200792601, "grad_norm": 0.4217662513256073, "learning_rate": 0.0001, "loss": 1.7142, "step": 1863 }, { "epoch": 0.21411751191775313, "grad_norm": 0.37826740741729736, "learning_rate": 0.0001, "loss": 1.7926, "step": 1864 }, { "epoch": 0.21423238182758025, "grad_norm": 0.38108256459236145, "learning_rate": 0.0001, "loss": 1.672, "step": 1865 }, { "epoch": 0.21434725173740737, "grad_norm": 0.3516540229320526, "learning_rate": 0.0001, "loss": 1.571, "step": 1866 }, { "epoch": 0.2144621216472345, "grad_norm": 0.3580091595649719, "learning_rate": 0.0001, "loss": 1.7167, "step": 1867 }, { "epoch": 0.21457699155706164, "grad_norm": 0.3416488468647003, "learning_rate": 0.0001, "loss": 1.4516, "step": 1868 }, { "epoch": 0.21469186146688876, "grad_norm": 0.3605569303035736, "learning_rate": 0.0001, "loss": 1.6703, "step": 1869 }, { "epoch": 0.21480673137671588, "grad_norm": 0.3645571768283844, "learning_rate": 0.0001, "loss": 1.6764, "step": 1870 }, { "epoch": 0.214921601286543, "grad_norm": 0.3980044424533844, "learning_rate": 0.0001, "loss": 1.7082, "step": 1871 }, { "epoch": 0.21503647119637012, "grad_norm": 0.3569796681404114, "learning_rate": 0.0001, "loss": 1.572, "step": 1872 }, { "epoch": 0.21515134110619724, "grad_norm": 0.37738198041915894, "learning_rate": 0.0001, "loss": 1.6913, "step": 1873 }, { "epoch": 0.21526621101602436, "grad_norm": 0.34949570894241333, "learning_rate": 0.0001, "loss": 1.6524, "step": 1874 }, { "epoch": 0.21538108092585148, "grad_norm": 0.3858441114425659, "learning_rate": 0.0001, "loss": 1.7056, "step": 1875 }, { "epoch": 0.2154959508356786, "grad_norm": 0.33417800068855286, "learning_rate": 0.0001, "loss": 1.5352, "step": 1876 }, { "epoch": 0.21561082074550572, "grad_norm": 0.363370418548584, "learning_rate": 0.0001, "loss": 1.6378, "step": 1877 }, { "epoch": 0.21572569065533284, "grad_norm": 0.37960943579673767, "learning_rate": 0.0001, "loss": 1.8329, "step": 1878 }, { "epoch": 0.21584056056515996, "grad_norm": 0.34242677688598633, "learning_rate": 0.0001, "loss": 1.6365, "step": 1879 }, { "epoch": 0.21595543047498708, "grad_norm": 0.34529823064804077, "learning_rate": 0.0001, "loss": 1.7954, "step": 1880 }, { "epoch": 0.2160703003848142, "grad_norm": 0.3592895567417145, "learning_rate": 0.0001, "loss": 1.7834, "step": 1881 }, { "epoch": 0.21618517029464132, "grad_norm": 0.3785233795642853, "learning_rate": 0.0001, "loss": 1.585, "step": 1882 }, { "epoch": 0.21630004020446844, "grad_norm": 0.3563402593135834, "learning_rate": 0.0001, "loss": 1.7312, "step": 1883 }, { "epoch": 0.21641491011429556, "grad_norm": 0.4084146320819855, "learning_rate": 0.0001, "loss": 1.8166, "step": 1884 }, { "epoch": 0.21652978002412268, "grad_norm": 0.3653600811958313, "learning_rate": 0.0001, "loss": 1.5874, "step": 1885 }, { "epoch": 0.2166446499339498, "grad_norm": 0.34954777359962463, "learning_rate": 0.0001, "loss": 1.7595, "step": 1886 }, { "epoch": 0.21675951984377692, "grad_norm": 0.34437569975852966, "learning_rate": 0.0001, "loss": 1.5336, "step": 1887 }, { "epoch": 0.21687438975360404, "grad_norm": 0.37032750248908997, "learning_rate": 0.0001, "loss": 1.6742, "step": 1888 }, { "epoch": 0.21698925966343116, "grad_norm": 0.36316627264022827, "learning_rate": 0.0001, "loss": 1.448, "step": 1889 }, { "epoch": 0.21710412957325828, "grad_norm": 0.3642198145389557, "learning_rate": 0.0001, "loss": 1.7953, "step": 1890 }, { "epoch": 0.2172189994830854, "grad_norm": 0.36583060026168823, "learning_rate": 0.0001, "loss": 1.5196, "step": 1891 }, { "epoch": 0.21733386939291252, "grad_norm": 0.35113057494163513, "learning_rate": 0.0001, "loss": 1.4707, "step": 1892 }, { "epoch": 0.21744873930273964, "grad_norm": 0.4065643846988678, "learning_rate": 0.0001, "loss": 1.8893, "step": 1893 }, { "epoch": 0.21756360921256676, "grad_norm": 0.36826783418655396, "learning_rate": 0.0001, "loss": 1.8333, "step": 1894 }, { "epoch": 0.21767847912239388, "grad_norm": 0.3617238998413086, "learning_rate": 0.0001, "loss": 1.7314, "step": 1895 }, { "epoch": 0.217793349032221, "grad_norm": 0.40174373984336853, "learning_rate": 0.0001, "loss": 1.8332, "step": 1896 }, { "epoch": 0.21790821894204812, "grad_norm": 0.3792894184589386, "learning_rate": 0.0001, "loss": 1.615, "step": 1897 }, { "epoch": 0.21802308885187524, "grad_norm": 0.35536929965019226, "learning_rate": 0.0001, "loss": 1.69, "step": 1898 }, { "epoch": 0.21813795876170236, "grad_norm": 0.36117202043533325, "learning_rate": 0.0001, "loss": 1.6592, "step": 1899 }, { "epoch": 0.21825282867152948, "grad_norm": 0.3893747627735138, "learning_rate": 0.0001, "loss": 1.7219, "step": 1900 }, { "epoch": 0.2183676985813566, "grad_norm": 0.40482643246650696, "learning_rate": 0.0001, "loss": 1.8143, "step": 1901 }, { "epoch": 0.21848256849118372, "grad_norm": 0.35201117396354675, "learning_rate": 0.0001, "loss": 1.5626, "step": 1902 }, { "epoch": 0.21859743840101087, "grad_norm": 0.359512060880661, "learning_rate": 0.0001, "loss": 1.7391, "step": 1903 }, { "epoch": 0.218712308310838, "grad_norm": 0.4181397259235382, "learning_rate": 0.0001, "loss": 1.9794, "step": 1904 }, { "epoch": 0.2188271782206651, "grad_norm": 0.3568892478942871, "learning_rate": 0.0001, "loss": 1.5442, "step": 1905 }, { "epoch": 0.21894204813049223, "grad_norm": 0.3569229245185852, "learning_rate": 0.0001, "loss": 1.8519, "step": 1906 }, { "epoch": 0.21905691804031935, "grad_norm": 0.3385428488254547, "learning_rate": 0.0001, "loss": 1.5492, "step": 1907 }, { "epoch": 0.21917178795014647, "grad_norm": 0.35773056745529175, "learning_rate": 0.0001, "loss": 1.7635, "step": 1908 }, { "epoch": 0.2192866578599736, "grad_norm": 0.37452182173728943, "learning_rate": 0.0001, "loss": 1.8033, "step": 1909 }, { "epoch": 0.2194015277698007, "grad_norm": 0.38838356733322144, "learning_rate": 0.0001, "loss": 1.6672, "step": 1910 }, { "epoch": 0.21951639767962783, "grad_norm": 0.3544471263885498, "learning_rate": 0.0001, "loss": 1.4947, "step": 1911 }, { "epoch": 0.21963126758945495, "grad_norm": 0.3904416561126709, "learning_rate": 0.0001, "loss": 1.8598, "step": 1912 }, { "epoch": 0.21974613749928207, "grad_norm": 0.3736543357372284, "learning_rate": 0.0001, "loss": 1.6786, "step": 1913 }, { "epoch": 0.2198610074091092, "grad_norm": 0.3756701648235321, "learning_rate": 0.0001, "loss": 1.6923, "step": 1914 }, { "epoch": 0.2199758773189363, "grad_norm": 0.334176629781723, "learning_rate": 0.0001, "loss": 1.3314, "step": 1915 }, { "epoch": 0.22009074722876343, "grad_norm": 0.35989829897880554, "learning_rate": 0.0001, "loss": 1.6578, "step": 1916 }, { "epoch": 0.22020561713859055, "grad_norm": 0.3670518100261688, "learning_rate": 0.0001, "loss": 1.6586, "step": 1917 }, { "epoch": 0.22032048704841767, "grad_norm": 0.3405352234840393, "learning_rate": 0.0001, "loss": 1.4737, "step": 1918 }, { "epoch": 0.2204353569582448, "grad_norm": 0.3269241154193878, "learning_rate": 0.0001, "loss": 1.5191, "step": 1919 }, { "epoch": 0.2205502268680719, "grad_norm": 0.34716111421585083, "learning_rate": 0.0001, "loss": 1.6567, "step": 1920 }, { "epoch": 0.22066509677789903, "grad_norm": 0.3655507564544678, "learning_rate": 0.0001, "loss": 1.7015, "step": 1921 }, { "epoch": 0.22077996668772615, "grad_norm": 0.37460073828697205, "learning_rate": 0.0001, "loss": 1.6521, "step": 1922 }, { "epoch": 0.22089483659755327, "grad_norm": 0.3556302785873413, "learning_rate": 0.0001, "loss": 1.672, "step": 1923 }, { "epoch": 0.2210097065073804, "grad_norm": 0.30222126841545105, "learning_rate": 0.0001, "loss": 1.3695, "step": 1924 }, { "epoch": 0.2211245764172075, "grad_norm": 0.3618417978286743, "learning_rate": 0.0001, "loss": 1.6877, "step": 1925 }, { "epoch": 0.22123944632703463, "grad_norm": 0.3906730115413666, "learning_rate": 0.0001, "loss": 1.7139, "step": 1926 }, { "epoch": 0.22135431623686175, "grad_norm": 0.3969448506832123, "learning_rate": 0.0001, "loss": 1.5555, "step": 1927 }, { "epoch": 0.22146918614668887, "grad_norm": 0.4062451124191284, "learning_rate": 0.0001, "loss": 1.8545, "step": 1928 }, { "epoch": 0.221584056056516, "grad_norm": 0.3763776421546936, "learning_rate": 0.0001, "loss": 1.6737, "step": 1929 }, { "epoch": 0.2216989259663431, "grad_norm": 0.3991961181163788, "learning_rate": 0.0001, "loss": 1.776, "step": 1930 }, { "epoch": 0.22181379587617023, "grad_norm": 0.37733355164527893, "learning_rate": 0.0001, "loss": 1.6528, "step": 1931 }, { "epoch": 0.22192866578599735, "grad_norm": 0.383766233921051, "learning_rate": 0.0001, "loss": 1.665, "step": 1932 }, { "epoch": 0.22204353569582447, "grad_norm": 0.35736414790153503, "learning_rate": 0.0001, "loss": 1.7631, "step": 1933 }, { "epoch": 0.2221584056056516, "grad_norm": 0.3915473222732544, "learning_rate": 0.0001, "loss": 1.8461, "step": 1934 }, { "epoch": 0.2222732755154787, "grad_norm": 0.4120464622974396, "learning_rate": 0.0001, "loss": 1.9815, "step": 1935 }, { "epoch": 0.22238814542530583, "grad_norm": 0.36867755651474, "learning_rate": 0.0001, "loss": 1.6904, "step": 1936 }, { "epoch": 0.22250301533513298, "grad_norm": 0.3585013151168823, "learning_rate": 0.0001, "loss": 1.5512, "step": 1937 }, { "epoch": 0.2226178852449601, "grad_norm": 0.36579304933547974, "learning_rate": 0.0001, "loss": 1.6213, "step": 1938 }, { "epoch": 0.22273275515478722, "grad_norm": 0.43999186158180237, "learning_rate": 0.0001, "loss": 1.9172, "step": 1939 }, { "epoch": 0.22284762506461434, "grad_norm": 0.340197890996933, "learning_rate": 0.0001, "loss": 1.6752, "step": 1940 }, { "epoch": 0.22296249497444146, "grad_norm": 0.3636626601219177, "learning_rate": 0.0001, "loss": 1.849, "step": 1941 }, { "epoch": 0.22307736488426858, "grad_norm": 0.33925965428352356, "learning_rate": 0.0001, "loss": 1.6694, "step": 1942 }, { "epoch": 0.2231922347940957, "grad_norm": 0.3498164713382721, "learning_rate": 0.0001, "loss": 1.5879, "step": 1943 }, { "epoch": 0.22330710470392282, "grad_norm": 0.35146066546440125, "learning_rate": 0.0001, "loss": 1.6627, "step": 1944 }, { "epoch": 0.22342197461374994, "grad_norm": 0.40588539838790894, "learning_rate": 0.0001, "loss": 2.0319, "step": 1945 }, { "epoch": 0.22353684452357706, "grad_norm": 0.35790249705314636, "learning_rate": 0.0001, "loss": 1.5836, "step": 1946 }, { "epoch": 0.22365171443340418, "grad_norm": 0.3292383849620819, "learning_rate": 0.0001, "loss": 1.372, "step": 1947 }, { "epoch": 0.2237665843432313, "grad_norm": 0.3615871071815491, "learning_rate": 0.0001, "loss": 1.7117, "step": 1948 }, { "epoch": 0.22388145425305842, "grad_norm": 0.3719077408313751, "learning_rate": 0.0001, "loss": 1.5964, "step": 1949 }, { "epoch": 0.22399632416288554, "grad_norm": 0.4070468544960022, "learning_rate": 0.0001, "loss": 1.9156, "step": 1950 }, { "epoch": 0.22411119407271266, "grad_norm": 0.39306938648223877, "learning_rate": 0.0001, "loss": 1.8247, "step": 1951 }, { "epoch": 0.22422606398253977, "grad_norm": 0.380942165851593, "learning_rate": 0.0001, "loss": 1.8662, "step": 1952 }, { "epoch": 0.2243409338923669, "grad_norm": 0.3670596480369568, "learning_rate": 0.0001, "loss": 1.7237, "step": 1953 }, { "epoch": 0.22445580380219401, "grad_norm": 0.3629164695739746, "learning_rate": 0.0001, "loss": 1.6835, "step": 1954 }, { "epoch": 0.22457067371202113, "grad_norm": 0.3525116443634033, "learning_rate": 0.0001, "loss": 1.5414, "step": 1955 }, { "epoch": 0.22468554362184825, "grad_norm": 0.362248957157135, "learning_rate": 0.0001, "loss": 1.7471, "step": 1956 }, { "epoch": 0.22480041353167537, "grad_norm": 0.3838635981082916, "learning_rate": 0.0001, "loss": 1.845, "step": 1957 }, { "epoch": 0.2249152834415025, "grad_norm": 0.3582911193370819, "learning_rate": 0.0001, "loss": 1.6315, "step": 1958 }, { "epoch": 0.22503015335132961, "grad_norm": 0.3515222668647766, "learning_rate": 0.0001, "loss": 1.7908, "step": 1959 }, { "epoch": 0.22514502326115673, "grad_norm": 0.38744720816612244, "learning_rate": 0.0001, "loss": 1.6675, "step": 1960 }, { "epoch": 0.22525989317098385, "grad_norm": 0.356286883354187, "learning_rate": 0.0001, "loss": 1.635, "step": 1961 }, { "epoch": 0.22537476308081097, "grad_norm": 0.3699129819869995, "learning_rate": 0.0001, "loss": 1.7541, "step": 1962 }, { "epoch": 0.2254896329906381, "grad_norm": 0.3715314269065857, "learning_rate": 0.0001, "loss": 1.7375, "step": 1963 }, { "epoch": 0.22560450290046521, "grad_norm": 0.3639882206916809, "learning_rate": 0.0001, "loss": 1.6326, "step": 1964 }, { "epoch": 0.22571937281029233, "grad_norm": 0.34050387144088745, "learning_rate": 0.0001, "loss": 1.5521, "step": 1965 }, { "epoch": 0.22583424272011945, "grad_norm": 0.36640483140945435, "learning_rate": 0.0001, "loss": 1.7285, "step": 1966 }, { "epoch": 0.22594911262994657, "grad_norm": 0.39753469824790955, "learning_rate": 0.0001, "loss": 1.8873, "step": 1967 }, { "epoch": 0.2260639825397737, "grad_norm": 0.3590884208679199, "learning_rate": 0.0001, "loss": 1.647, "step": 1968 }, { "epoch": 0.22617885244960081, "grad_norm": 0.36375731229782104, "learning_rate": 0.0001, "loss": 1.6994, "step": 1969 }, { "epoch": 0.22629372235942793, "grad_norm": 0.3400304317474365, "learning_rate": 0.0001, "loss": 1.5619, "step": 1970 }, { "epoch": 0.22640859226925508, "grad_norm": 0.36975133419036865, "learning_rate": 0.0001, "loss": 1.58, "step": 1971 }, { "epoch": 0.2265234621790822, "grad_norm": 0.3886123299598694, "learning_rate": 0.0001, "loss": 1.6675, "step": 1972 }, { "epoch": 0.22663833208890932, "grad_norm": 0.37827616930007935, "learning_rate": 0.0001, "loss": 1.8186, "step": 1973 }, { "epoch": 0.22675320199873644, "grad_norm": 0.3893340229988098, "learning_rate": 0.0001, "loss": 1.7555, "step": 1974 }, { "epoch": 0.22686807190856356, "grad_norm": 0.3668055534362793, "learning_rate": 0.0001, "loss": 1.585, "step": 1975 }, { "epoch": 0.22698294181839068, "grad_norm": 0.37988904118537903, "learning_rate": 0.0001, "loss": 1.7357, "step": 1976 }, { "epoch": 0.2270978117282178, "grad_norm": 0.3713071942329407, "learning_rate": 0.0001, "loss": 1.73, "step": 1977 }, { "epoch": 0.22721268163804492, "grad_norm": 0.3730880916118622, "learning_rate": 0.0001, "loss": 1.8461, "step": 1978 }, { "epoch": 0.22732755154787204, "grad_norm": 0.38077569007873535, "learning_rate": 0.0001, "loss": 1.8324, "step": 1979 }, { "epoch": 0.22744242145769916, "grad_norm": 0.34529733657836914, "learning_rate": 0.0001, "loss": 1.5826, "step": 1980 }, { "epoch": 0.22755729136752628, "grad_norm": 0.3712940812110901, "learning_rate": 0.0001, "loss": 1.8049, "step": 1981 }, { "epoch": 0.2276721612773534, "grad_norm": 0.39372700452804565, "learning_rate": 0.0001, "loss": 1.7774, "step": 1982 }, { "epoch": 0.22778703118718052, "grad_norm": 0.3780519664287567, "learning_rate": 0.0001, "loss": 1.8419, "step": 1983 }, { "epoch": 0.22790190109700764, "grad_norm": 0.36118772625923157, "learning_rate": 0.0001, "loss": 1.6452, "step": 1984 }, { "epoch": 0.22801677100683476, "grad_norm": 0.36958158016204834, "learning_rate": 0.0001, "loss": 1.5734, "step": 1985 }, { "epoch": 0.22813164091666188, "grad_norm": 0.365408331155777, "learning_rate": 0.0001, "loss": 1.5612, "step": 1986 }, { "epoch": 0.228246510826489, "grad_norm": 0.3450736999511719, "learning_rate": 0.0001, "loss": 1.6522, "step": 1987 }, { "epoch": 0.22836138073631612, "grad_norm": 0.37709590792655945, "learning_rate": 0.0001, "loss": 1.7375, "step": 1988 }, { "epoch": 0.22847625064614324, "grad_norm": 0.3818245232105255, "learning_rate": 0.0001, "loss": 1.7378, "step": 1989 }, { "epoch": 0.22859112055597036, "grad_norm": 0.375186949968338, "learning_rate": 0.0001, "loss": 1.7919, "step": 1990 }, { "epoch": 0.22870599046579748, "grad_norm": 0.3831838369369507, "learning_rate": 0.0001, "loss": 1.9628, "step": 1991 }, { "epoch": 0.2288208603756246, "grad_norm": 0.3555734157562256, "learning_rate": 0.0001, "loss": 1.7085, "step": 1992 }, { "epoch": 0.22893573028545172, "grad_norm": 0.36431822180747986, "learning_rate": 0.0001, "loss": 1.6481, "step": 1993 }, { "epoch": 0.22905060019527884, "grad_norm": 0.36310869455337524, "learning_rate": 0.0001, "loss": 1.8057, "step": 1994 }, { "epoch": 0.22916547010510596, "grad_norm": 0.3640761375427246, "learning_rate": 0.0001, "loss": 1.7278, "step": 1995 }, { "epoch": 0.22928034001493308, "grad_norm": 0.3574616014957428, "learning_rate": 0.0001, "loss": 1.6583, "step": 1996 }, { "epoch": 0.2293952099247602, "grad_norm": 0.36930689215660095, "learning_rate": 0.0001, "loss": 1.7673, "step": 1997 }, { "epoch": 0.22951007983458732, "grad_norm": 0.3680112659931183, "learning_rate": 0.0001, "loss": 1.6705, "step": 1998 }, { "epoch": 0.22962494974441444, "grad_norm": 0.3612775504589081, "learning_rate": 0.0001, "loss": 1.827, "step": 1999 }, { "epoch": 0.22973981965424156, "grad_norm": 0.3716758191585541, "learning_rate": 0.0001, "loss": 1.7205, "step": 2000 }, { "epoch": 0.22985468956406868, "grad_norm": 0.3520873785018921, "learning_rate": 0.0001, "loss": 1.5418, "step": 2001 }, { "epoch": 0.2299695594738958, "grad_norm": 0.3716805875301361, "learning_rate": 0.0001, "loss": 1.9395, "step": 2002 }, { "epoch": 0.23008442938372292, "grad_norm": 0.35050535202026367, "learning_rate": 0.0001, "loss": 1.7538, "step": 2003 }, { "epoch": 0.23019929929355004, "grad_norm": 0.35754668712615967, "learning_rate": 0.0001, "loss": 1.6116, "step": 2004 }, { "epoch": 0.23031416920337716, "grad_norm": 0.35769370198249817, "learning_rate": 0.0001, "loss": 1.6091, "step": 2005 }, { "epoch": 0.2304290391132043, "grad_norm": 0.3884750008583069, "learning_rate": 0.0001, "loss": 1.73, "step": 2006 }, { "epoch": 0.23054390902303143, "grad_norm": 0.35404741764068604, "learning_rate": 0.0001, "loss": 1.5435, "step": 2007 }, { "epoch": 0.23065877893285855, "grad_norm": 0.37792080640792847, "learning_rate": 0.0001, "loss": 1.694, "step": 2008 }, { "epoch": 0.23077364884268567, "grad_norm": 0.3727210462093353, "learning_rate": 0.0001, "loss": 1.6421, "step": 2009 }, { "epoch": 0.2308885187525128, "grad_norm": 0.3520371615886688, "learning_rate": 0.0001, "loss": 1.6815, "step": 2010 }, { "epoch": 0.2310033886623399, "grad_norm": 0.3894649147987366, "learning_rate": 0.0001, "loss": 1.6817, "step": 2011 }, { "epoch": 0.23111825857216703, "grad_norm": 0.3612670302391052, "learning_rate": 0.0001, "loss": 1.6792, "step": 2012 }, { "epoch": 0.23123312848199415, "grad_norm": 0.35834938287734985, "learning_rate": 0.0001, "loss": 1.6607, "step": 2013 }, { "epoch": 0.23134799839182127, "grad_norm": 0.36019662022590637, "learning_rate": 0.0001, "loss": 1.8426, "step": 2014 }, { "epoch": 0.2314628683016484, "grad_norm": 0.35688281059265137, "learning_rate": 0.0001, "loss": 1.6774, "step": 2015 }, { "epoch": 0.2315777382114755, "grad_norm": 0.3944391906261444, "learning_rate": 0.0001, "loss": 1.6247, "step": 2016 }, { "epoch": 0.23169260812130263, "grad_norm": 0.4213406443595886, "learning_rate": 0.0001, "loss": 1.5176, "step": 2017 }, { "epoch": 0.23180747803112975, "grad_norm": 0.37755805253982544, "learning_rate": 0.0001, "loss": 1.7714, "step": 2018 }, { "epoch": 0.23192234794095687, "grad_norm": 0.36677590012550354, "learning_rate": 0.0001, "loss": 1.5277, "step": 2019 }, { "epoch": 0.232037217850784, "grad_norm": 0.40023931860923767, "learning_rate": 0.0001, "loss": 1.6677, "step": 2020 }, { "epoch": 0.2321520877606111, "grad_norm": 0.36305657029151917, "learning_rate": 0.0001, "loss": 1.726, "step": 2021 }, { "epoch": 0.23226695767043823, "grad_norm": 0.3816761076450348, "learning_rate": 0.0001, "loss": 1.8716, "step": 2022 }, { "epoch": 0.23238182758026535, "grad_norm": 0.3882976770401001, "learning_rate": 0.0001, "loss": 1.8066, "step": 2023 }, { "epoch": 0.23249669749009247, "grad_norm": 0.3663192093372345, "learning_rate": 0.0001, "loss": 1.5011, "step": 2024 }, { "epoch": 0.2326115673999196, "grad_norm": 0.3655019998550415, "learning_rate": 0.0001, "loss": 1.7241, "step": 2025 }, { "epoch": 0.2327264373097467, "grad_norm": 0.3717341423034668, "learning_rate": 0.0001, "loss": 1.766, "step": 2026 }, { "epoch": 0.23284130721957383, "grad_norm": 0.38333335518836975, "learning_rate": 0.0001, "loss": 1.8287, "step": 2027 }, { "epoch": 0.23295617712940095, "grad_norm": 0.3660372197628021, "learning_rate": 0.0001, "loss": 1.6404, "step": 2028 }, { "epoch": 0.23307104703922807, "grad_norm": 0.4071906507015228, "learning_rate": 0.0001, "loss": 1.6666, "step": 2029 }, { "epoch": 0.2331859169490552, "grad_norm": 0.33285489678382874, "learning_rate": 0.0001, "loss": 1.5495, "step": 2030 }, { "epoch": 0.2333007868588823, "grad_norm": 0.36184608936309814, "learning_rate": 0.0001, "loss": 1.7848, "step": 2031 }, { "epoch": 0.23341565676870943, "grad_norm": 0.3524230718612671, "learning_rate": 0.0001, "loss": 1.606, "step": 2032 }, { "epoch": 0.23353052667853655, "grad_norm": 0.37483248114585876, "learning_rate": 0.0001, "loss": 1.6525, "step": 2033 }, { "epoch": 0.23364539658836367, "grad_norm": 0.3705480992794037, "learning_rate": 0.0001, "loss": 1.5733, "step": 2034 }, { "epoch": 0.2337602664981908, "grad_norm": 0.39686012268066406, "learning_rate": 0.0001, "loss": 1.7466, "step": 2035 }, { "epoch": 0.2338751364080179, "grad_norm": 0.3723219633102417, "learning_rate": 0.0001, "loss": 1.6358, "step": 2036 }, { "epoch": 0.23399000631784503, "grad_norm": 0.3748587965965271, "learning_rate": 0.0001, "loss": 1.7883, "step": 2037 }, { "epoch": 0.23410487622767215, "grad_norm": 0.3536939024925232, "learning_rate": 0.0001, "loss": 1.8048, "step": 2038 }, { "epoch": 0.23421974613749927, "grad_norm": 0.36391976475715637, "learning_rate": 0.0001, "loss": 1.7326, "step": 2039 }, { "epoch": 0.23433461604732642, "grad_norm": 0.3579852879047394, "learning_rate": 0.0001, "loss": 1.7311, "step": 2040 }, { "epoch": 0.23444948595715354, "grad_norm": 0.3592579960823059, "learning_rate": 0.0001, "loss": 1.7675, "step": 2041 }, { "epoch": 0.23456435586698066, "grad_norm": 0.34196656942367554, "learning_rate": 0.0001, "loss": 1.4591, "step": 2042 }, { "epoch": 0.23467922577680778, "grad_norm": 0.40393322706222534, "learning_rate": 0.0001, "loss": 1.8106, "step": 2043 }, { "epoch": 0.2347940956866349, "grad_norm": 0.39818137884140015, "learning_rate": 0.0001, "loss": 1.6923, "step": 2044 }, { "epoch": 0.23490896559646202, "grad_norm": 0.3436848521232605, "learning_rate": 0.0001, "loss": 1.5952, "step": 2045 }, { "epoch": 0.23502383550628914, "grad_norm": 0.35754308104515076, "learning_rate": 0.0001, "loss": 1.7914, "step": 2046 }, { "epoch": 0.23513870541611626, "grad_norm": 0.37163278460502625, "learning_rate": 0.0001, "loss": 1.7085, "step": 2047 }, { "epoch": 0.23525357532594338, "grad_norm": 0.3725832402706146, "learning_rate": 0.0001, "loss": 1.7469, "step": 2048 }, { "epoch": 0.2353684452357705, "grad_norm": 0.3440713584423065, "learning_rate": 0.0001, "loss": 1.615, "step": 2049 }, { "epoch": 0.23548331514559762, "grad_norm": 0.3776625096797943, "learning_rate": 0.0001, "loss": 1.6978, "step": 2050 }, { "epoch": 0.23559818505542474, "grad_norm": 0.37252920866012573, "learning_rate": 0.0001, "loss": 1.6163, "step": 2051 }, { "epoch": 0.23571305496525186, "grad_norm": 0.36468106508255005, "learning_rate": 0.0001, "loss": 1.7879, "step": 2052 }, { "epoch": 0.23582792487507898, "grad_norm": 0.3846648037433624, "learning_rate": 0.0001, "loss": 1.7629, "step": 2053 }, { "epoch": 0.2359427947849061, "grad_norm": 0.3676551580429077, "learning_rate": 0.0001, "loss": 1.687, "step": 2054 }, { "epoch": 0.23605766469473322, "grad_norm": 0.38455480337142944, "learning_rate": 0.0001, "loss": 1.4812, "step": 2055 }, { "epoch": 0.23617253460456034, "grad_norm": 0.39479124546051025, "learning_rate": 0.0001, "loss": 1.8082, "step": 2056 }, { "epoch": 0.23628740451438746, "grad_norm": 0.37550801038742065, "learning_rate": 0.0001, "loss": 1.7738, "step": 2057 }, { "epoch": 0.23640227442421458, "grad_norm": 0.3539324104785919, "learning_rate": 0.0001, "loss": 1.4211, "step": 2058 }, { "epoch": 0.2365171443340417, "grad_norm": 0.38804805278778076, "learning_rate": 0.0001, "loss": 1.4982, "step": 2059 }, { "epoch": 0.23663201424386882, "grad_norm": 0.34223702549934387, "learning_rate": 0.0001, "loss": 1.6774, "step": 2060 }, { "epoch": 0.23674688415369594, "grad_norm": 0.3723870515823364, "learning_rate": 0.0001, "loss": 1.7951, "step": 2061 }, { "epoch": 0.23686175406352306, "grad_norm": 0.3813320994377136, "learning_rate": 0.0001, "loss": 1.7424, "step": 2062 }, { "epoch": 0.23697662397335018, "grad_norm": 0.38882893323898315, "learning_rate": 0.0001, "loss": 1.7914, "step": 2063 }, { "epoch": 0.2370914938831773, "grad_norm": 0.3598068058490753, "learning_rate": 0.0001, "loss": 1.605, "step": 2064 }, { "epoch": 0.23720636379300442, "grad_norm": 0.35870295763015747, "learning_rate": 0.0001, "loss": 1.7168, "step": 2065 }, { "epoch": 0.23732123370283154, "grad_norm": 0.38279473781585693, "learning_rate": 0.0001, "loss": 1.7661, "step": 2066 }, { "epoch": 0.23743610361265866, "grad_norm": 0.3647814393043518, "learning_rate": 0.0001, "loss": 1.656, "step": 2067 }, { "epoch": 0.23755097352248578, "grad_norm": 0.38286200165748596, "learning_rate": 0.0001, "loss": 1.7234, "step": 2068 }, { "epoch": 0.2376658434323129, "grad_norm": 0.3622128367424011, "learning_rate": 0.0001, "loss": 1.8133, "step": 2069 }, { "epoch": 0.23778071334214002, "grad_norm": 0.38336479663848877, "learning_rate": 0.0001, "loss": 1.8398, "step": 2070 }, { "epoch": 0.23789558325196714, "grad_norm": 0.34429532289505005, "learning_rate": 0.0001, "loss": 1.5766, "step": 2071 }, { "epoch": 0.23801045316179426, "grad_norm": 0.3539319932460785, "learning_rate": 0.0001, "loss": 1.8332, "step": 2072 }, { "epoch": 0.23812532307162138, "grad_norm": 0.41278359293937683, "learning_rate": 0.0001, "loss": 1.7155, "step": 2073 }, { "epoch": 0.23824019298144852, "grad_norm": 0.3446906507015228, "learning_rate": 0.0001, "loss": 1.6454, "step": 2074 }, { "epoch": 0.23835506289127564, "grad_norm": 0.3742326498031616, "learning_rate": 0.0001, "loss": 1.7497, "step": 2075 }, { "epoch": 0.23846993280110276, "grad_norm": 0.3751114308834076, "learning_rate": 0.0001, "loss": 1.766, "step": 2076 }, { "epoch": 0.23858480271092988, "grad_norm": 0.358395516872406, "learning_rate": 0.0001, "loss": 1.8033, "step": 2077 }, { "epoch": 0.238699672620757, "grad_norm": 0.3309650123119354, "learning_rate": 0.0001, "loss": 1.4627, "step": 2078 }, { "epoch": 0.23881454253058412, "grad_norm": 0.3873356878757477, "learning_rate": 0.0001, "loss": 1.7555, "step": 2079 }, { "epoch": 0.23892941244041124, "grad_norm": 0.3576614558696747, "learning_rate": 0.0001, "loss": 1.7634, "step": 2080 }, { "epoch": 0.23904428235023836, "grad_norm": 0.4060707688331604, "learning_rate": 0.0001, "loss": 1.8486, "step": 2081 }, { "epoch": 0.23915915226006548, "grad_norm": 0.3609447181224823, "learning_rate": 0.0001, "loss": 1.7257, "step": 2082 }, { "epoch": 0.2392740221698926, "grad_norm": 0.39137205481529236, "learning_rate": 0.0001, "loss": 1.788, "step": 2083 }, { "epoch": 0.23938889207971972, "grad_norm": 0.4108488857746124, "learning_rate": 0.0001, "loss": 1.5738, "step": 2084 }, { "epoch": 0.23950376198954684, "grad_norm": 0.3932496905326843, "learning_rate": 0.0001, "loss": 1.9864, "step": 2085 }, { "epoch": 0.23961863189937396, "grad_norm": 0.3582679331302643, "learning_rate": 0.0001, "loss": 1.6553, "step": 2086 }, { "epoch": 0.23973350180920108, "grad_norm": 0.382107138633728, "learning_rate": 0.0001, "loss": 1.8809, "step": 2087 }, { "epoch": 0.2398483717190282, "grad_norm": 0.37730252742767334, "learning_rate": 0.0001, "loss": 1.5719, "step": 2088 }, { "epoch": 0.23996324162885532, "grad_norm": 0.39350810647010803, "learning_rate": 0.0001, "loss": 1.7602, "step": 2089 }, { "epoch": 0.24007811153868244, "grad_norm": 0.3490624725818634, "learning_rate": 0.0001, "loss": 1.7111, "step": 2090 }, { "epoch": 0.24019298144850956, "grad_norm": 0.39576220512390137, "learning_rate": 0.0001, "loss": 1.7265, "step": 2091 }, { "epoch": 0.24030785135833668, "grad_norm": 0.34063565731048584, "learning_rate": 0.0001, "loss": 1.5151, "step": 2092 }, { "epoch": 0.2404227212681638, "grad_norm": 0.36094650626182556, "learning_rate": 0.0001, "loss": 1.7337, "step": 2093 }, { "epoch": 0.24053759117799092, "grad_norm": 0.39108943939208984, "learning_rate": 0.0001, "loss": 1.8195, "step": 2094 }, { "epoch": 0.24065246108781804, "grad_norm": 0.3705921471118927, "learning_rate": 0.0001, "loss": 1.5288, "step": 2095 }, { "epoch": 0.24076733099764516, "grad_norm": 0.38726896047592163, "learning_rate": 0.0001, "loss": 1.8124, "step": 2096 }, { "epoch": 0.24088220090747228, "grad_norm": 0.3693424165248871, "learning_rate": 0.0001, "loss": 1.4665, "step": 2097 }, { "epoch": 0.2409970708172994, "grad_norm": 0.37922948598861694, "learning_rate": 0.0001, "loss": 1.6475, "step": 2098 }, { "epoch": 0.24111194072712652, "grad_norm": 0.3428456485271454, "learning_rate": 0.0001, "loss": 1.6265, "step": 2099 }, { "epoch": 0.24122681063695364, "grad_norm": 0.38854971528053284, "learning_rate": 0.0001, "loss": 1.8648, "step": 2100 }, { "epoch": 0.24134168054678076, "grad_norm": 0.3516068756580353, "learning_rate": 0.0001, "loss": 1.6517, "step": 2101 }, { "epoch": 0.24145655045660788, "grad_norm": 0.3651220500469208, "learning_rate": 0.0001, "loss": 1.7118, "step": 2102 }, { "epoch": 0.241571420366435, "grad_norm": 0.355566143989563, "learning_rate": 0.0001, "loss": 1.473, "step": 2103 }, { "epoch": 0.24168629027626212, "grad_norm": 0.3593463599681854, "learning_rate": 0.0001, "loss": 1.6904, "step": 2104 }, { "epoch": 0.24180116018608924, "grad_norm": 0.35307595133781433, "learning_rate": 0.0001, "loss": 1.6098, "step": 2105 }, { "epoch": 0.24191603009591636, "grad_norm": 0.37137705087661743, "learning_rate": 0.0001, "loss": 1.6786, "step": 2106 }, { "epoch": 0.24203090000574348, "grad_norm": 0.3728501498699188, "learning_rate": 0.0001, "loss": 1.8597, "step": 2107 }, { "epoch": 0.24214576991557063, "grad_norm": 0.3600460886955261, "learning_rate": 0.0001, "loss": 1.769, "step": 2108 }, { "epoch": 0.24226063982539775, "grad_norm": 0.3472753167152405, "learning_rate": 0.0001, "loss": 1.5751, "step": 2109 }, { "epoch": 0.24237550973522487, "grad_norm": 0.36193597316741943, "learning_rate": 0.0001, "loss": 1.6062, "step": 2110 }, { "epoch": 0.242490379645052, "grad_norm": 0.3618480861186981, "learning_rate": 0.0001, "loss": 1.6479, "step": 2111 }, { "epoch": 0.2426052495548791, "grad_norm": 0.358378529548645, "learning_rate": 0.0001, "loss": 1.381, "step": 2112 }, { "epoch": 0.24272011946470623, "grad_norm": 0.386943519115448, "learning_rate": 0.0001, "loss": 1.896, "step": 2113 }, { "epoch": 0.24283498937453335, "grad_norm": 0.38075029850006104, "learning_rate": 0.0001, "loss": 1.6401, "step": 2114 }, { "epoch": 0.24294985928436047, "grad_norm": 0.3709038197994232, "learning_rate": 0.0001, "loss": 1.7108, "step": 2115 }, { "epoch": 0.2430647291941876, "grad_norm": 0.3460537791252136, "learning_rate": 0.0001, "loss": 1.5338, "step": 2116 }, { "epoch": 0.2431795991040147, "grad_norm": 0.3539460599422455, "learning_rate": 0.0001, "loss": 1.6383, "step": 2117 }, { "epoch": 0.24329446901384183, "grad_norm": 0.3654404580593109, "learning_rate": 0.0001, "loss": 1.7205, "step": 2118 }, { "epoch": 0.24340933892366895, "grad_norm": 0.37309154868125916, "learning_rate": 0.0001, "loss": 1.7026, "step": 2119 }, { "epoch": 0.24352420883349607, "grad_norm": 0.3884303569793701, "learning_rate": 0.0001, "loss": 1.8393, "step": 2120 }, { "epoch": 0.2436390787433232, "grad_norm": 0.41834697127342224, "learning_rate": 0.0001, "loss": 1.6144, "step": 2121 }, { "epoch": 0.2437539486531503, "grad_norm": 0.48797813057899475, "learning_rate": 0.0001, "loss": 1.7264, "step": 2122 }, { "epoch": 0.24386881856297743, "grad_norm": 0.3766729533672333, "learning_rate": 0.0001, "loss": 1.7361, "step": 2123 }, { "epoch": 0.24398368847280455, "grad_norm": 0.3885788023471832, "learning_rate": 0.0001, "loss": 1.9709, "step": 2124 }, { "epoch": 0.24409855838263167, "grad_norm": 0.36236581206321716, "learning_rate": 0.0001, "loss": 1.5214, "step": 2125 }, { "epoch": 0.2442134282924588, "grad_norm": 0.368348628282547, "learning_rate": 0.0001, "loss": 1.6698, "step": 2126 }, { "epoch": 0.2443282982022859, "grad_norm": 0.3759993612766266, "learning_rate": 0.0001, "loss": 1.7122, "step": 2127 }, { "epoch": 0.24444316811211303, "grad_norm": 0.3532876670360565, "learning_rate": 0.0001, "loss": 1.5642, "step": 2128 }, { "epoch": 0.24455803802194015, "grad_norm": 0.3924901783466339, "learning_rate": 0.0001, "loss": 1.6596, "step": 2129 }, { "epoch": 0.24467290793176727, "grad_norm": 0.36675912141799927, "learning_rate": 0.0001, "loss": 1.5667, "step": 2130 }, { "epoch": 0.2447877778415944, "grad_norm": 0.3623403310775757, "learning_rate": 0.0001, "loss": 1.7219, "step": 2131 }, { "epoch": 0.2449026477514215, "grad_norm": 0.4029271900653839, "learning_rate": 0.0001, "loss": 1.8573, "step": 2132 }, { "epoch": 0.24501751766124863, "grad_norm": 0.38149070739746094, "learning_rate": 0.0001, "loss": 1.6958, "step": 2133 }, { "epoch": 0.24513238757107575, "grad_norm": 0.36011314392089844, "learning_rate": 0.0001, "loss": 1.6092, "step": 2134 }, { "epoch": 0.24524725748090287, "grad_norm": 0.35948145389556885, "learning_rate": 0.0001, "loss": 1.4656, "step": 2135 }, { "epoch": 0.24536212739073, "grad_norm": 0.39184871315956116, "learning_rate": 0.0001, "loss": 1.6596, "step": 2136 }, { "epoch": 0.2454769973005571, "grad_norm": 0.3509827256202698, "learning_rate": 0.0001, "loss": 1.6038, "step": 2137 }, { "epoch": 0.24559186721038423, "grad_norm": 0.37394797801971436, "learning_rate": 0.0001, "loss": 1.7774, "step": 2138 }, { "epoch": 0.24570673712021135, "grad_norm": 0.37039676308631897, "learning_rate": 0.0001, "loss": 1.6785, "step": 2139 }, { "epoch": 0.24582160703003847, "grad_norm": 0.34239184856414795, "learning_rate": 0.0001, "loss": 1.6419, "step": 2140 }, { "epoch": 0.2459364769398656, "grad_norm": 0.4217103123664856, "learning_rate": 0.0001, "loss": 2.0345, "step": 2141 }, { "epoch": 0.2460513468496927, "grad_norm": 0.352780282497406, "learning_rate": 0.0001, "loss": 1.6952, "step": 2142 }, { "epoch": 0.24616621675951986, "grad_norm": 0.34040459990501404, "learning_rate": 0.0001, "loss": 1.3958, "step": 2143 }, { "epoch": 0.24628108666934698, "grad_norm": 0.3590191900730133, "learning_rate": 0.0001, "loss": 1.5738, "step": 2144 }, { "epoch": 0.2463959565791741, "grad_norm": 0.3775222599506378, "learning_rate": 0.0001, "loss": 1.7631, "step": 2145 }, { "epoch": 0.24651082648900122, "grad_norm": 0.3881835639476776, "learning_rate": 0.0001, "loss": 1.8367, "step": 2146 }, { "epoch": 0.24662569639882834, "grad_norm": 0.35426759719848633, "learning_rate": 0.0001, "loss": 1.5748, "step": 2147 }, { "epoch": 0.24674056630865546, "grad_norm": 0.43328168988227844, "learning_rate": 0.0001, "loss": 1.7385, "step": 2148 }, { "epoch": 0.24685543621848258, "grad_norm": 0.38424152135849, "learning_rate": 0.0001, "loss": 1.8426, "step": 2149 }, { "epoch": 0.2469703061283097, "grad_norm": 0.3645937144756317, "learning_rate": 0.0001, "loss": 1.7276, "step": 2150 }, { "epoch": 0.24708517603813682, "grad_norm": 0.413036584854126, "learning_rate": 0.0001, "loss": 1.8982, "step": 2151 }, { "epoch": 0.24720004594796394, "grad_norm": 0.35927438735961914, "learning_rate": 0.0001, "loss": 1.663, "step": 2152 }, { "epoch": 0.24731491585779106, "grad_norm": 0.3814374804496765, "learning_rate": 0.0001, "loss": 1.76, "step": 2153 }, { "epoch": 0.24742978576761818, "grad_norm": 0.3744584321975708, "learning_rate": 0.0001, "loss": 1.7106, "step": 2154 }, { "epoch": 0.2475446556774453, "grad_norm": 0.3815367817878723, "learning_rate": 0.0001, "loss": 1.8374, "step": 2155 }, { "epoch": 0.24765952558727242, "grad_norm": 0.37428149580955505, "learning_rate": 0.0001, "loss": 1.7515, "step": 2156 }, { "epoch": 0.24777439549709954, "grad_norm": 0.3383006751537323, "learning_rate": 0.0001, "loss": 1.5878, "step": 2157 }, { "epoch": 0.24788926540692666, "grad_norm": 0.3787260055541992, "learning_rate": 0.0001, "loss": 1.7558, "step": 2158 }, { "epoch": 0.24800413531675378, "grad_norm": 0.34863799810409546, "learning_rate": 0.0001, "loss": 1.7608, "step": 2159 }, { "epoch": 0.2481190052265809, "grad_norm": 0.34521782398223877, "learning_rate": 0.0001, "loss": 1.6219, "step": 2160 }, { "epoch": 0.24823387513640802, "grad_norm": 0.4506453275680542, "learning_rate": 0.0001, "loss": 1.5816, "step": 2161 }, { "epoch": 0.24834874504623514, "grad_norm": 0.388887494802475, "learning_rate": 0.0001, "loss": 1.9017, "step": 2162 }, { "epoch": 0.24846361495606226, "grad_norm": 0.3828551173210144, "learning_rate": 0.0001, "loss": 1.755, "step": 2163 }, { "epoch": 0.24857848486588938, "grad_norm": 0.35569700598716736, "learning_rate": 0.0001, "loss": 1.6083, "step": 2164 }, { "epoch": 0.2486933547757165, "grad_norm": 0.3651540279388428, "learning_rate": 0.0001, "loss": 1.7724, "step": 2165 }, { "epoch": 0.24880822468554362, "grad_norm": 0.362402081489563, "learning_rate": 0.0001, "loss": 1.6725, "step": 2166 }, { "epoch": 0.24892309459537074, "grad_norm": 0.36203300952911377, "learning_rate": 0.0001, "loss": 1.6146, "step": 2167 }, { "epoch": 0.24903796450519786, "grad_norm": 0.37122076749801636, "learning_rate": 0.0001, "loss": 1.5805, "step": 2168 }, { "epoch": 0.24915283441502498, "grad_norm": 0.357860803604126, "learning_rate": 0.0001, "loss": 1.6995, "step": 2169 }, { "epoch": 0.2492677043248521, "grad_norm": 0.36631032824516296, "learning_rate": 0.0001, "loss": 1.7249, "step": 2170 }, { "epoch": 0.24938257423467922, "grad_norm": 0.37046635150909424, "learning_rate": 0.0001, "loss": 1.6338, "step": 2171 }, { "epoch": 0.24949744414450634, "grad_norm": 0.39707261323928833, "learning_rate": 0.0001, "loss": 1.9089, "step": 2172 }, { "epoch": 0.24961231405433346, "grad_norm": 0.3699803650379181, "learning_rate": 0.0001, "loss": 1.6095, "step": 2173 }, { "epoch": 0.24972718396416058, "grad_norm": 0.3902735710144043, "learning_rate": 0.0001, "loss": 1.7023, "step": 2174 }, { "epoch": 0.2498420538739877, "grad_norm": 0.3696674406528473, "learning_rate": 0.0001, "loss": 1.7209, "step": 2175 }, { "epoch": 0.24995692378381482, "grad_norm": 0.3827657699584961, "learning_rate": 0.0001, "loss": 1.7072, "step": 2176 }, { "epoch": 0.25007179369364196, "grad_norm": 0.38356491923332214, "learning_rate": 0.0001, "loss": 1.7748, "step": 2177 }, { "epoch": 0.2501866636034691, "grad_norm": 0.3400535583496094, "learning_rate": 0.0001, "loss": 1.5347, "step": 2178 }, { "epoch": 0.2503015335132962, "grad_norm": 0.37260451912879944, "learning_rate": 0.0001, "loss": 1.7625, "step": 2179 }, { "epoch": 0.2504164034231233, "grad_norm": 0.4127359688282013, "learning_rate": 0.0001, "loss": 1.4756, "step": 2180 }, { "epoch": 0.25053127333295044, "grad_norm": 0.3528091609477997, "learning_rate": 0.0001, "loss": 1.6681, "step": 2181 }, { "epoch": 0.25064614324277756, "grad_norm": 0.39472371339797974, "learning_rate": 0.0001, "loss": 1.8162, "step": 2182 }, { "epoch": 0.2507610131526047, "grad_norm": 0.3909297585487366, "learning_rate": 0.0001, "loss": 1.7071, "step": 2183 }, { "epoch": 0.2508758830624318, "grad_norm": 0.42550724744796753, "learning_rate": 0.0001, "loss": 1.5249, "step": 2184 }, { "epoch": 0.2509907529722589, "grad_norm": 0.3795909881591797, "learning_rate": 0.0001, "loss": 1.6718, "step": 2185 }, { "epoch": 0.25110562288208604, "grad_norm": 0.37554746866226196, "learning_rate": 0.0001, "loss": 1.8221, "step": 2186 }, { "epoch": 0.25122049279191316, "grad_norm": 0.362114280462265, "learning_rate": 0.0001, "loss": 1.6699, "step": 2187 }, { "epoch": 0.2513353627017403, "grad_norm": 0.36386096477508545, "learning_rate": 0.0001, "loss": 1.4788, "step": 2188 }, { "epoch": 0.2514502326115674, "grad_norm": 0.3716920018196106, "learning_rate": 0.0001, "loss": 1.5724, "step": 2189 }, { "epoch": 0.2515651025213945, "grad_norm": 0.3535931706428528, "learning_rate": 0.0001, "loss": 1.6721, "step": 2190 }, { "epoch": 0.25167997243122164, "grad_norm": 0.3724220097064972, "learning_rate": 0.0001, "loss": 1.6522, "step": 2191 }, { "epoch": 0.25179484234104876, "grad_norm": 0.40217527747154236, "learning_rate": 0.0001, "loss": 1.7181, "step": 2192 }, { "epoch": 0.2519097122508759, "grad_norm": 0.383705198764801, "learning_rate": 0.0001, "loss": 1.6798, "step": 2193 }, { "epoch": 0.252024582160703, "grad_norm": 0.35728463530540466, "learning_rate": 0.0001, "loss": 1.7787, "step": 2194 }, { "epoch": 0.2521394520705301, "grad_norm": 0.34664642810821533, "learning_rate": 0.0001, "loss": 1.4345, "step": 2195 }, { "epoch": 0.25225432198035724, "grad_norm": 0.3613569438457489, "learning_rate": 0.0001, "loss": 1.4541, "step": 2196 }, { "epoch": 0.25236919189018436, "grad_norm": 0.36570632457733154, "learning_rate": 0.0001, "loss": 1.674, "step": 2197 }, { "epoch": 0.2524840618000115, "grad_norm": 0.36983218789100647, "learning_rate": 0.0001, "loss": 1.7606, "step": 2198 }, { "epoch": 0.2525989317098386, "grad_norm": 0.35930711030960083, "learning_rate": 0.0001, "loss": 1.6157, "step": 2199 }, { "epoch": 0.2527138016196657, "grad_norm": 0.39522892236709595, "learning_rate": 0.0001, "loss": 1.7063, "step": 2200 }, { "epoch": 0.25282867152949284, "grad_norm": 0.3675948679447174, "learning_rate": 0.0001, "loss": 1.6295, "step": 2201 }, { "epoch": 0.25294354143931996, "grad_norm": 0.3466283679008484, "learning_rate": 0.0001, "loss": 1.643, "step": 2202 }, { "epoch": 0.2530584113491471, "grad_norm": 0.3820589482784271, "learning_rate": 0.0001, "loss": 1.8305, "step": 2203 }, { "epoch": 0.2531732812589742, "grad_norm": 0.45334169268608093, "learning_rate": 0.0001, "loss": 1.8272, "step": 2204 }, { "epoch": 0.2532881511688013, "grad_norm": 0.36322474479675293, "learning_rate": 0.0001, "loss": 1.5607, "step": 2205 }, { "epoch": 0.25340302107862844, "grad_norm": 0.3699181377887726, "learning_rate": 0.0001, "loss": 1.7263, "step": 2206 }, { "epoch": 0.25351789098845556, "grad_norm": 0.3594902455806732, "learning_rate": 0.0001, "loss": 1.6536, "step": 2207 }, { "epoch": 0.2536327608982827, "grad_norm": 0.3868749141693115, "learning_rate": 0.0001, "loss": 1.7946, "step": 2208 }, { "epoch": 0.2537476308081098, "grad_norm": 0.39426884055137634, "learning_rate": 0.0001, "loss": 1.898, "step": 2209 }, { "epoch": 0.2538625007179369, "grad_norm": 0.41421404480934143, "learning_rate": 0.0001, "loss": 1.8456, "step": 2210 }, { "epoch": 0.25397737062776404, "grad_norm": 0.35613730549812317, "learning_rate": 0.0001, "loss": 1.706, "step": 2211 }, { "epoch": 0.25409224053759116, "grad_norm": 0.38455823063850403, "learning_rate": 0.0001, "loss": 1.6824, "step": 2212 }, { "epoch": 0.2542071104474183, "grad_norm": 0.3799576759338379, "learning_rate": 0.0001, "loss": 1.714, "step": 2213 }, { "epoch": 0.2543219803572454, "grad_norm": 0.43304580450057983, "learning_rate": 0.0001, "loss": 1.7829, "step": 2214 }, { "epoch": 0.2544368502670725, "grad_norm": 0.39498084783554077, "learning_rate": 0.0001, "loss": 1.8003, "step": 2215 }, { "epoch": 0.25455172017689964, "grad_norm": 0.3838403522968292, "learning_rate": 0.0001, "loss": 1.6831, "step": 2216 }, { "epoch": 0.25466659008672676, "grad_norm": 0.38586917519569397, "learning_rate": 0.0001, "loss": 1.8446, "step": 2217 }, { "epoch": 0.2547814599965539, "grad_norm": 0.37291255593299866, "learning_rate": 0.0001, "loss": 1.6258, "step": 2218 }, { "epoch": 0.254896329906381, "grad_norm": 0.37416213750839233, "learning_rate": 0.0001, "loss": 1.6125, "step": 2219 }, { "epoch": 0.2550111998162081, "grad_norm": 0.37467607855796814, "learning_rate": 0.0001, "loss": 1.6916, "step": 2220 }, { "epoch": 0.25512606972603524, "grad_norm": 0.3643462061882019, "learning_rate": 0.0001, "loss": 1.6534, "step": 2221 }, { "epoch": 0.25524093963586236, "grad_norm": 0.3733225464820862, "learning_rate": 0.0001, "loss": 1.7953, "step": 2222 }, { "epoch": 0.2553558095456895, "grad_norm": 0.3754335045814514, "learning_rate": 0.0001, "loss": 1.8093, "step": 2223 }, { "epoch": 0.2554706794555166, "grad_norm": 0.3865663707256317, "learning_rate": 0.0001, "loss": 1.7088, "step": 2224 }, { "epoch": 0.2555855493653437, "grad_norm": 0.38617023825645447, "learning_rate": 0.0001, "loss": 1.8311, "step": 2225 }, { "epoch": 0.25570041927517084, "grad_norm": 0.37515413761138916, "learning_rate": 0.0001, "loss": 1.5905, "step": 2226 }, { "epoch": 0.25581528918499796, "grad_norm": 0.38885021209716797, "learning_rate": 0.0001, "loss": 1.8163, "step": 2227 }, { "epoch": 0.25593015909482514, "grad_norm": 0.37118270993232727, "learning_rate": 0.0001, "loss": 1.8075, "step": 2228 }, { "epoch": 0.25604502900465226, "grad_norm": 0.389565110206604, "learning_rate": 0.0001, "loss": 1.6765, "step": 2229 }, { "epoch": 0.2561598989144794, "grad_norm": 0.3385695517063141, "learning_rate": 0.0001, "loss": 1.3302, "step": 2230 }, { "epoch": 0.2562747688243065, "grad_norm": 0.37382227182388306, "learning_rate": 0.0001, "loss": 1.7651, "step": 2231 }, { "epoch": 0.2563896387341336, "grad_norm": 0.39900514483451843, "learning_rate": 0.0001, "loss": 1.7818, "step": 2232 }, { "epoch": 0.25650450864396074, "grad_norm": 0.3673021197319031, "learning_rate": 0.0001, "loss": 1.814, "step": 2233 }, { "epoch": 0.25661937855378786, "grad_norm": 0.3759724199771881, "learning_rate": 0.0001, "loss": 1.7816, "step": 2234 }, { "epoch": 0.256734248463615, "grad_norm": 0.37381884455680847, "learning_rate": 0.0001, "loss": 1.7092, "step": 2235 }, { "epoch": 0.2568491183734421, "grad_norm": 0.3776073157787323, "learning_rate": 0.0001, "loss": 1.6729, "step": 2236 }, { "epoch": 0.2569639882832692, "grad_norm": 0.4105524718761444, "learning_rate": 0.0001, "loss": 1.5777, "step": 2237 }, { "epoch": 0.25707885819309634, "grad_norm": 0.41020524501800537, "learning_rate": 0.0001, "loss": 1.9763, "step": 2238 }, { "epoch": 0.25719372810292346, "grad_norm": 0.4077359139919281, "learning_rate": 0.0001, "loss": 2.0014, "step": 2239 }, { "epoch": 0.2573085980127506, "grad_norm": 0.41229790449142456, "learning_rate": 0.0001, "loss": 1.6186, "step": 2240 }, { "epoch": 0.2574234679225777, "grad_norm": 0.380489319562912, "learning_rate": 0.0001, "loss": 1.8079, "step": 2241 }, { "epoch": 0.2575383378324048, "grad_norm": 0.3971484899520874, "learning_rate": 0.0001, "loss": 1.8367, "step": 2242 }, { "epoch": 0.25765320774223194, "grad_norm": 0.37627938389778137, "learning_rate": 0.0001, "loss": 1.6502, "step": 2243 }, { "epoch": 0.25776807765205906, "grad_norm": 0.3823041319847107, "learning_rate": 0.0001, "loss": 1.7642, "step": 2244 }, { "epoch": 0.2578829475618862, "grad_norm": 0.3671165704727173, "learning_rate": 0.0001, "loss": 1.7102, "step": 2245 }, { "epoch": 0.2579978174717133, "grad_norm": 0.39813950657844543, "learning_rate": 0.0001, "loss": 1.8005, "step": 2246 }, { "epoch": 0.2581126873815404, "grad_norm": 0.3677361309528351, "learning_rate": 0.0001, "loss": 1.658, "step": 2247 }, { "epoch": 0.25822755729136754, "grad_norm": 0.3524981439113617, "learning_rate": 0.0001, "loss": 1.5583, "step": 2248 }, { "epoch": 0.25834242720119466, "grad_norm": 0.37544408440589905, "learning_rate": 0.0001, "loss": 1.6862, "step": 2249 }, { "epoch": 0.2584572971110218, "grad_norm": 0.35381603240966797, "learning_rate": 0.0001, "loss": 1.5272, "step": 2250 }, { "epoch": 0.2585721670208489, "grad_norm": 0.39469513297080994, "learning_rate": 0.0001, "loss": 1.7863, "step": 2251 }, { "epoch": 0.258687036930676, "grad_norm": 0.3802177906036377, "learning_rate": 0.0001, "loss": 1.7908, "step": 2252 }, { "epoch": 0.25880190684050314, "grad_norm": 0.3482286036014557, "learning_rate": 0.0001, "loss": 1.6267, "step": 2253 }, { "epoch": 0.25891677675033026, "grad_norm": 0.3742547035217285, "learning_rate": 0.0001, "loss": 1.6636, "step": 2254 }, { "epoch": 0.2590316466601574, "grad_norm": 0.386465847492218, "learning_rate": 0.0001, "loss": 1.6659, "step": 2255 }, { "epoch": 0.2591465165699845, "grad_norm": 0.365347295999527, "learning_rate": 0.0001, "loss": 1.7536, "step": 2256 }, { "epoch": 0.2592613864798116, "grad_norm": 0.3640124797821045, "learning_rate": 0.0001, "loss": 1.7142, "step": 2257 }, { "epoch": 0.25937625638963874, "grad_norm": 0.36901116371154785, "learning_rate": 0.0001, "loss": 1.477, "step": 2258 }, { "epoch": 0.25949112629946586, "grad_norm": 0.36894455552101135, "learning_rate": 0.0001, "loss": 1.6933, "step": 2259 }, { "epoch": 0.259605996209293, "grad_norm": 0.37590548396110535, "learning_rate": 0.0001, "loss": 1.7388, "step": 2260 }, { "epoch": 0.2597208661191201, "grad_norm": 0.4689255654811859, "learning_rate": 0.0001, "loss": 2.1016, "step": 2261 }, { "epoch": 0.2598357360289472, "grad_norm": 0.3811010420322418, "learning_rate": 0.0001, "loss": 1.6575, "step": 2262 }, { "epoch": 0.25995060593877434, "grad_norm": 0.3560413420200348, "learning_rate": 0.0001, "loss": 1.6288, "step": 2263 }, { "epoch": 0.26006547584860146, "grad_norm": 0.4040233790874481, "learning_rate": 0.0001, "loss": 1.8888, "step": 2264 }, { "epoch": 0.2601803457584286, "grad_norm": 0.3741479516029358, "learning_rate": 0.0001, "loss": 1.6749, "step": 2265 }, { "epoch": 0.2602952156682557, "grad_norm": 0.3752221465110779, "learning_rate": 0.0001, "loss": 1.8194, "step": 2266 }, { "epoch": 0.2604100855780828, "grad_norm": 0.3529088497161865, "learning_rate": 0.0001, "loss": 1.616, "step": 2267 }, { "epoch": 0.26052495548790994, "grad_norm": 0.37346789240837097, "learning_rate": 0.0001, "loss": 1.694, "step": 2268 }, { "epoch": 0.26063982539773706, "grad_norm": 0.38784459233283997, "learning_rate": 0.0001, "loss": 1.7968, "step": 2269 }, { "epoch": 0.2607546953075642, "grad_norm": 0.3864971697330475, "learning_rate": 0.0001, "loss": 1.7894, "step": 2270 }, { "epoch": 0.2608695652173913, "grad_norm": 0.3784656226634979, "learning_rate": 0.0001, "loss": 1.6398, "step": 2271 }, { "epoch": 0.2609844351272184, "grad_norm": 0.3623911738395691, "learning_rate": 0.0001, "loss": 1.7436, "step": 2272 }, { "epoch": 0.26109930503704554, "grad_norm": 0.3621329069137573, "learning_rate": 0.0001, "loss": 1.4809, "step": 2273 }, { "epoch": 0.26121417494687266, "grad_norm": 0.40668338537216187, "learning_rate": 0.0001, "loss": 1.7219, "step": 2274 }, { "epoch": 0.2613290448566998, "grad_norm": 0.3952760398387909, "learning_rate": 0.0001, "loss": 1.9019, "step": 2275 }, { "epoch": 0.2614439147665269, "grad_norm": 0.39390838146209717, "learning_rate": 0.0001, "loss": 1.8203, "step": 2276 }, { "epoch": 0.261558784676354, "grad_norm": 0.3634113073348999, "learning_rate": 0.0001, "loss": 1.5633, "step": 2277 }, { "epoch": 0.26167365458618114, "grad_norm": 0.36678680777549744, "learning_rate": 0.0001, "loss": 1.7097, "step": 2278 }, { "epoch": 0.26178852449600826, "grad_norm": 0.4072323143482208, "learning_rate": 0.0001, "loss": 1.7973, "step": 2279 }, { "epoch": 0.2619033944058354, "grad_norm": 0.39810293912887573, "learning_rate": 0.0001, "loss": 1.8374, "step": 2280 }, { "epoch": 0.2620182643156625, "grad_norm": 0.35911354422569275, "learning_rate": 0.0001, "loss": 1.7054, "step": 2281 }, { "epoch": 0.2621331342254896, "grad_norm": 0.3710712492465973, "learning_rate": 0.0001, "loss": 1.7595, "step": 2282 }, { "epoch": 0.26224800413531674, "grad_norm": 0.3721124231815338, "learning_rate": 0.0001, "loss": 1.6115, "step": 2283 }, { "epoch": 0.26236287404514386, "grad_norm": 0.35447004437446594, "learning_rate": 0.0001, "loss": 1.6689, "step": 2284 }, { "epoch": 0.262477743954971, "grad_norm": 0.37349700927734375, "learning_rate": 0.0001, "loss": 1.5631, "step": 2285 }, { "epoch": 0.2625926138647981, "grad_norm": 0.3656753599643707, "learning_rate": 0.0001, "loss": 1.5151, "step": 2286 }, { "epoch": 0.2627074837746252, "grad_norm": 0.35918521881103516, "learning_rate": 0.0001, "loss": 1.5962, "step": 2287 }, { "epoch": 0.26282235368445234, "grad_norm": 0.36438190937042236, "learning_rate": 0.0001, "loss": 1.6488, "step": 2288 }, { "epoch": 0.26293722359427946, "grad_norm": 0.365792453289032, "learning_rate": 0.0001, "loss": 1.6017, "step": 2289 }, { "epoch": 0.2630520935041066, "grad_norm": 0.3362356424331665, "learning_rate": 0.0001, "loss": 1.3965, "step": 2290 }, { "epoch": 0.2631669634139337, "grad_norm": 0.3698473274707794, "learning_rate": 0.0001, "loss": 1.6996, "step": 2291 }, { "epoch": 0.2632818333237608, "grad_norm": 0.447664737701416, "learning_rate": 0.0001, "loss": 1.6443, "step": 2292 }, { "epoch": 0.26339670323358794, "grad_norm": 0.4149281978607178, "learning_rate": 0.0001, "loss": 1.7173, "step": 2293 }, { "epoch": 0.26351157314341506, "grad_norm": 0.37246763706207275, "learning_rate": 0.0001, "loss": 1.7145, "step": 2294 }, { "epoch": 0.2636264430532422, "grad_norm": 0.37586966156959534, "learning_rate": 0.0001, "loss": 1.8224, "step": 2295 }, { "epoch": 0.2637413129630693, "grad_norm": 0.36681029200553894, "learning_rate": 0.0001, "loss": 1.6284, "step": 2296 }, { "epoch": 0.26385618287289647, "grad_norm": 0.3731790781021118, "learning_rate": 0.0001, "loss": 1.6059, "step": 2297 }, { "epoch": 0.2639710527827236, "grad_norm": 0.4063315987586975, "learning_rate": 0.0001, "loss": 1.8513, "step": 2298 }, { "epoch": 0.2640859226925507, "grad_norm": 0.36280304193496704, "learning_rate": 0.0001, "loss": 1.5944, "step": 2299 }, { "epoch": 0.26420079260237783, "grad_norm": 0.41365888714790344, "learning_rate": 0.0001, "loss": 1.7712, "step": 2300 }, { "epoch": 0.26431566251220495, "grad_norm": 0.4301823079586029, "learning_rate": 0.0001, "loss": 2.0047, "step": 2301 }, { "epoch": 0.26443053242203207, "grad_norm": 0.3758213222026825, "learning_rate": 0.0001, "loss": 1.6871, "step": 2302 }, { "epoch": 0.2645454023318592, "grad_norm": 0.34419310092926025, "learning_rate": 0.0001, "loss": 1.5699, "step": 2303 }, { "epoch": 0.2646602722416863, "grad_norm": 0.38945549726486206, "learning_rate": 0.0001, "loss": 1.7238, "step": 2304 }, { "epoch": 0.26477514215151343, "grad_norm": 0.3707123398780823, "learning_rate": 0.0001, "loss": 1.6401, "step": 2305 }, { "epoch": 0.26489001206134055, "grad_norm": 0.35695189237594604, "learning_rate": 0.0001, "loss": 1.6122, "step": 2306 }, { "epoch": 0.26500488197116767, "grad_norm": 0.36474674940109253, "learning_rate": 0.0001, "loss": 1.7198, "step": 2307 }, { "epoch": 0.2651197518809948, "grad_norm": 0.35917285084724426, "learning_rate": 0.0001, "loss": 1.5133, "step": 2308 }, { "epoch": 0.2652346217908219, "grad_norm": 0.38368695974349976, "learning_rate": 0.0001, "loss": 1.7404, "step": 2309 }, { "epoch": 0.26534949170064903, "grad_norm": 0.4073870778083801, "learning_rate": 0.0001, "loss": 1.7424, "step": 2310 }, { "epoch": 0.26546436161047615, "grad_norm": 0.35575923323631287, "learning_rate": 0.0001, "loss": 1.6298, "step": 2311 }, { "epoch": 0.26557923152030327, "grad_norm": 0.3653978109359741, "learning_rate": 0.0001, "loss": 1.7346, "step": 2312 }, { "epoch": 0.2656941014301304, "grad_norm": 0.35288769006729126, "learning_rate": 0.0001, "loss": 1.611, "step": 2313 }, { "epoch": 0.2658089713399575, "grad_norm": 0.36546453833580017, "learning_rate": 0.0001, "loss": 1.638, "step": 2314 }, { "epoch": 0.26592384124978463, "grad_norm": 0.3479709029197693, "learning_rate": 0.0001, "loss": 1.6432, "step": 2315 }, { "epoch": 0.26603871115961175, "grad_norm": 0.3809703290462494, "learning_rate": 0.0001, "loss": 1.7787, "step": 2316 }, { "epoch": 0.26615358106943887, "grad_norm": 0.3938155472278595, "learning_rate": 0.0001, "loss": 1.7848, "step": 2317 }, { "epoch": 0.266268450979266, "grad_norm": 0.3979399502277374, "learning_rate": 0.0001, "loss": 1.7428, "step": 2318 }, { "epoch": 0.2663833208890931, "grad_norm": 0.34814873337745667, "learning_rate": 0.0001, "loss": 1.5687, "step": 2319 }, { "epoch": 0.26649819079892023, "grad_norm": 0.3895953595638275, "learning_rate": 0.0001, "loss": 1.6384, "step": 2320 }, { "epoch": 0.26661306070874735, "grad_norm": 0.3680303692817688, "learning_rate": 0.0001, "loss": 1.6297, "step": 2321 }, { "epoch": 0.26672793061857447, "grad_norm": 0.3696908950805664, "learning_rate": 0.0001, "loss": 1.7016, "step": 2322 }, { "epoch": 0.2668428005284016, "grad_norm": 0.3959087133407593, "learning_rate": 0.0001, "loss": 1.6687, "step": 2323 }, { "epoch": 0.2669576704382287, "grad_norm": 0.41594138741493225, "learning_rate": 0.0001, "loss": 1.7522, "step": 2324 }, { "epoch": 0.26707254034805583, "grad_norm": 0.3591321110725403, "learning_rate": 0.0001, "loss": 1.7126, "step": 2325 }, { "epoch": 0.26718741025788295, "grad_norm": 0.3633732795715332, "learning_rate": 0.0001, "loss": 1.6606, "step": 2326 }, { "epoch": 0.26730228016771007, "grad_norm": 0.3702927529811859, "learning_rate": 0.0001, "loss": 1.7004, "step": 2327 }, { "epoch": 0.2674171500775372, "grad_norm": 0.38838890194892883, "learning_rate": 0.0001, "loss": 1.7151, "step": 2328 }, { "epoch": 0.2675320199873643, "grad_norm": 0.3630795180797577, "learning_rate": 0.0001, "loss": 1.7975, "step": 2329 }, { "epoch": 0.26764688989719143, "grad_norm": 0.3541039228439331, "learning_rate": 0.0001, "loss": 1.7219, "step": 2330 }, { "epoch": 0.26776175980701855, "grad_norm": 0.35986846685409546, "learning_rate": 0.0001, "loss": 1.8925, "step": 2331 }, { "epoch": 0.26787662971684567, "grad_norm": 0.3914209008216858, "learning_rate": 0.0001, "loss": 1.9096, "step": 2332 }, { "epoch": 0.2679914996266728, "grad_norm": 0.3626435399055481, "learning_rate": 0.0001, "loss": 1.8039, "step": 2333 }, { "epoch": 0.2681063695364999, "grad_norm": 0.33067116141319275, "learning_rate": 0.0001, "loss": 1.5963, "step": 2334 }, { "epoch": 0.26822123944632703, "grad_norm": 0.38686808943748474, "learning_rate": 0.0001, "loss": 1.5786, "step": 2335 }, { "epoch": 0.26833610935615415, "grad_norm": 0.3926886022090912, "learning_rate": 0.0001, "loss": 1.8575, "step": 2336 }, { "epoch": 0.26845097926598127, "grad_norm": 0.3721444606781006, "learning_rate": 0.0001, "loss": 1.6829, "step": 2337 }, { "epoch": 0.2685658491758084, "grad_norm": 0.39007246494293213, "learning_rate": 0.0001, "loss": 1.7679, "step": 2338 }, { "epoch": 0.2686807190856355, "grad_norm": 0.3614482283592224, "learning_rate": 0.0001, "loss": 1.6376, "step": 2339 }, { "epoch": 0.26879558899546263, "grad_norm": 0.39546316862106323, "learning_rate": 0.0001, "loss": 1.4712, "step": 2340 }, { "epoch": 0.26891045890528975, "grad_norm": 0.36849477887153625, "learning_rate": 0.0001, "loss": 1.6153, "step": 2341 }, { "epoch": 0.26902532881511687, "grad_norm": 0.3788404166698456, "learning_rate": 0.0001, "loss": 1.6837, "step": 2342 }, { "epoch": 0.269140198724944, "grad_norm": 0.35742640495300293, "learning_rate": 0.0001, "loss": 1.5822, "step": 2343 }, { "epoch": 0.2692550686347711, "grad_norm": 0.37428852915763855, "learning_rate": 0.0001, "loss": 1.7477, "step": 2344 }, { "epoch": 0.26936993854459823, "grad_norm": 0.42374229431152344, "learning_rate": 0.0001, "loss": 1.8088, "step": 2345 }, { "epoch": 0.26948480845442535, "grad_norm": 0.3708469867706299, "learning_rate": 0.0001, "loss": 1.8116, "step": 2346 }, { "epoch": 0.26959967836425247, "grad_norm": 0.37047696113586426, "learning_rate": 0.0001, "loss": 1.6812, "step": 2347 }, { "epoch": 0.2697145482740796, "grad_norm": 0.4440860450267792, "learning_rate": 0.0001, "loss": 1.7974, "step": 2348 }, { "epoch": 0.2698294181839067, "grad_norm": 0.38337430357933044, "learning_rate": 0.0001, "loss": 1.7039, "step": 2349 }, { "epoch": 0.26994428809373383, "grad_norm": 0.3824228048324585, "learning_rate": 0.0001, "loss": 1.7418, "step": 2350 }, { "epoch": 0.27005915800356095, "grad_norm": 0.38111045956611633, "learning_rate": 0.0001, "loss": 1.7184, "step": 2351 }, { "epoch": 0.27017402791338807, "grad_norm": 0.3806154727935791, "learning_rate": 0.0001, "loss": 1.8688, "step": 2352 }, { "epoch": 0.2702888978232152, "grad_norm": 0.3737587332725525, "learning_rate": 0.0001, "loss": 1.7697, "step": 2353 }, { "epoch": 0.2704037677330423, "grad_norm": 0.37875616550445557, "learning_rate": 0.0001, "loss": 1.7901, "step": 2354 }, { "epoch": 0.27051863764286943, "grad_norm": 0.3805428445339203, "learning_rate": 0.0001, "loss": 1.8359, "step": 2355 }, { "epoch": 0.27063350755269655, "grad_norm": 0.3564281761646271, "learning_rate": 0.0001, "loss": 1.6027, "step": 2356 }, { "epoch": 0.27074837746252367, "grad_norm": 0.36830076575279236, "learning_rate": 0.0001, "loss": 1.7583, "step": 2357 }, { "epoch": 0.2708632473723508, "grad_norm": 0.38345348834991455, "learning_rate": 0.0001, "loss": 1.8286, "step": 2358 }, { "epoch": 0.2709781172821779, "grad_norm": 0.3683982789516449, "learning_rate": 0.0001, "loss": 1.6467, "step": 2359 }, { "epoch": 0.27109298719200503, "grad_norm": 0.36231061816215515, "learning_rate": 0.0001, "loss": 1.6102, "step": 2360 }, { "epoch": 0.27120785710183215, "grad_norm": 0.40081697702407837, "learning_rate": 0.0001, "loss": 1.7706, "step": 2361 }, { "epoch": 0.27132272701165927, "grad_norm": 0.3795923590660095, "learning_rate": 0.0001, "loss": 1.8317, "step": 2362 }, { "epoch": 0.2714375969214864, "grad_norm": 0.41488227248191833, "learning_rate": 0.0001, "loss": 1.8353, "step": 2363 }, { "epoch": 0.2715524668313135, "grad_norm": 0.36446574330329895, "learning_rate": 0.0001, "loss": 1.7574, "step": 2364 }, { "epoch": 0.2716673367411407, "grad_norm": 0.4068087935447693, "learning_rate": 0.0001, "loss": 1.6117, "step": 2365 }, { "epoch": 0.2717822066509678, "grad_norm": 0.3724612295627594, "learning_rate": 0.0001, "loss": 1.8087, "step": 2366 }, { "epoch": 0.2718970765607949, "grad_norm": 0.3523292541503906, "learning_rate": 0.0001, "loss": 1.6064, "step": 2367 }, { "epoch": 0.27201194647062205, "grad_norm": 0.3559991717338562, "learning_rate": 0.0001, "loss": 1.6747, "step": 2368 }, { "epoch": 0.27212681638044917, "grad_norm": 0.370811402797699, "learning_rate": 0.0001, "loss": 1.5848, "step": 2369 }, { "epoch": 0.2722416862902763, "grad_norm": 0.350815087556839, "learning_rate": 0.0001, "loss": 1.5376, "step": 2370 }, { "epoch": 0.2723565562001034, "grad_norm": 0.3826122283935547, "learning_rate": 0.0001, "loss": 1.7605, "step": 2371 }, { "epoch": 0.2724714261099305, "grad_norm": 0.3690153956413269, "learning_rate": 0.0001, "loss": 1.5615, "step": 2372 }, { "epoch": 0.27258629601975765, "grad_norm": 0.3891347050666809, "learning_rate": 0.0001, "loss": 1.7345, "step": 2373 }, { "epoch": 0.27270116592958477, "grad_norm": 0.3778075873851776, "learning_rate": 0.0001, "loss": 1.7759, "step": 2374 }, { "epoch": 0.2728160358394119, "grad_norm": 0.35634365677833557, "learning_rate": 0.0001, "loss": 1.7241, "step": 2375 }, { "epoch": 0.272930905749239, "grad_norm": 0.3891456127166748, "learning_rate": 0.0001, "loss": 1.7749, "step": 2376 }, { "epoch": 0.2730457756590661, "grad_norm": 0.3792819380760193, "learning_rate": 0.0001, "loss": 1.6347, "step": 2377 }, { "epoch": 0.27316064556889325, "grad_norm": 0.37712422013282776, "learning_rate": 0.0001, "loss": 1.5787, "step": 2378 }, { "epoch": 0.27327551547872037, "grad_norm": 0.38600102066993713, "learning_rate": 0.0001, "loss": 1.8743, "step": 2379 }, { "epoch": 0.2733903853885475, "grad_norm": 0.3560759723186493, "learning_rate": 0.0001, "loss": 1.6198, "step": 2380 }, { "epoch": 0.2735052552983746, "grad_norm": 0.3798205852508545, "learning_rate": 0.0001, "loss": 1.7504, "step": 2381 }, { "epoch": 0.2736201252082017, "grad_norm": 0.40677618980407715, "learning_rate": 0.0001, "loss": 1.6794, "step": 2382 }, { "epoch": 0.27373499511802885, "grad_norm": 0.3787870705127716, "learning_rate": 0.0001, "loss": 1.7416, "step": 2383 }, { "epoch": 0.27384986502785597, "grad_norm": 0.39786311984062195, "learning_rate": 0.0001, "loss": 1.8658, "step": 2384 }, { "epoch": 0.2739647349376831, "grad_norm": 0.39431723952293396, "learning_rate": 0.0001, "loss": 1.8427, "step": 2385 }, { "epoch": 0.2740796048475102, "grad_norm": 0.40065842866897583, "learning_rate": 0.0001, "loss": 1.8135, "step": 2386 }, { "epoch": 0.2741944747573373, "grad_norm": 0.3779159188270569, "learning_rate": 0.0001, "loss": 1.6793, "step": 2387 }, { "epoch": 0.27430934466716445, "grad_norm": 0.35582754015922546, "learning_rate": 0.0001, "loss": 1.6984, "step": 2388 }, { "epoch": 0.27442421457699157, "grad_norm": 0.3920300304889679, "learning_rate": 0.0001, "loss": 1.7694, "step": 2389 }, { "epoch": 0.2745390844868187, "grad_norm": 0.394414484500885, "learning_rate": 0.0001, "loss": 1.6563, "step": 2390 }, { "epoch": 0.2746539543966458, "grad_norm": 0.3930966258049011, "learning_rate": 0.0001, "loss": 1.5893, "step": 2391 }, { "epoch": 0.2747688243064729, "grad_norm": 0.3484657406806946, "learning_rate": 0.0001, "loss": 1.5964, "step": 2392 }, { "epoch": 0.27488369421630005, "grad_norm": 0.41878804564476013, "learning_rate": 0.0001, "loss": 1.7226, "step": 2393 }, { "epoch": 0.27499856412612717, "grad_norm": 0.36988186836242676, "learning_rate": 0.0001, "loss": 1.7018, "step": 2394 }, { "epoch": 0.2751134340359543, "grad_norm": 0.3710579574108124, "learning_rate": 0.0001, "loss": 1.5329, "step": 2395 }, { "epoch": 0.2752283039457814, "grad_norm": 0.3970875144004822, "learning_rate": 0.0001, "loss": 1.5914, "step": 2396 }, { "epoch": 0.2753431738556085, "grad_norm": 0.3790924847126007, "learning_rate": 0.0001, "loss": 1.7784, "step": 2397 }, { "epoch": 0.27545804376543565, "grad_norm": 0.35328909754753113, "learning_rate": 0.0001, "loss": 1.8243, "step": 2398 }, { "epoch": 0.27557291367526277, "grad_norm": 0.360989511013031, "learning_rate": 0.0001, "loss": 1.5756, "step": 2399 }, { "epoch": 0.2756877835850899, "grad_norm": 0.4168824553489685, "learning_rate": 0.0001, "loss": 1.7192, "step": 2400 }, { "epoch": 0.275802653494917, "grad_norm": 0.3885016143321991, "learning_rate": 0.0001, "loss": 1.6223, "step": 2401 }, { "epoch": 0.2759175234047441, "grad_norm": 0.3799929916858673, "learning_rate": 0.0001, "loss": 1.7446, "step": 2402 }, { "epoch": 0.27603239331457124, "grad_norm": 0.42033493518829346, "learning_rate": 0.0001, "loss": 1.8286, "step": 2403 }, { "epoch": 0.27614726322439836, "grad_norm": 0.39111387729644775, "learning_rate": 0.0001, "loss": 1.7342, "step": 2404 }, { "epoch": 0.2762621331342255, "grad_norm": 0.34956350922584534, "learning_rate": 0.0001, "loss": 1.6533, "step": 2405 }, { "epoch": 0.2763770030440526, "grad_norm": 0.3685983419418335, "learning_rate": 0.0001, "loss": 1.8086, "step": 2406 }, { "epoch": 0.2764918729538797, "grad_norm": 0.35792601108551025, "learning_rate": 0.0001, "loss": 1.7419, "step": 2407 }, { "epoch": 0.27660674286370684, "grad_norm": 0.39823225140571594, "learning_rate": 0.0001, "loss": 1.7881, "step": 2408 }, { "epoch": 0.27672161277353396, "grad_norm": 0.39737337827682495, "learning_rate": 0.0001, "loss": 1.7751, "step": 2409 }, { "epoch": 0.2768364826833611, "grad_norm": 0.35919415950775146, "learning_rate": 0.0001, "loss": 1.6735, "step": 2410 }, { "epoch": 0.2769513525931882, "grad_norm": 0.33741164207458496, "learning_rate": 0.0001, "loss": 1.5492, "step": 2411 }, { "epoch": 0.2770662225030153, "grad_norm": 0.359222948551178, "learning_rate": 0.0001, "loss": 1.6339, "step": 2412 }, { "epoch": 0.27718109241284244, "grad_norm": 0.3950967490673065, "learning_rate": 0.0001, "loss": 1.7063, "step": 2413 }, { "epoch": 0.27729596232266956, "grad_norm": 0.3793290853500366, "learning_rate": 0.0001, "loss": 1.732, "step": 2414 }, { "epoch": 0.2774108322324967, "grad_norm": 0.38844072818756104, "learning_rate": 0.0001, "loss": 1.6996, "step": 2415 }, { "epoch": 0.2775257021423238, "grad_norm": 0.3837685286998749, "learning_rate": 0.0001, "loss": 1.7407, "step": 2416 }, { "epoch": 0.2776405720521509, "grad_norm": 0.4028517007827759, "learning_rate": 0.0001, "loss": 1.8119, "step": 2417 }, { "epoch": 0.27775544196197804, "grad_norm": 0.38334351778030396, "learning_rate": 0.0001, "loss": 1.7055, "step": 2418 }, { "epoch": 0.27787031187180516, "grad_norm": 0.3632005453109741, "learning_rate": 0.0001, "loss": 1.531, "step": 2419 }, { "epoch": 0.2779851817816323, "grad_norm": 0.36728063225746155, "learning_rate": 0.0001, "loss": 1.7756, "step": 2420 }, { "epoch": 0.2781000516914594, "grad_norm": 0.41414251923561096, "learning_rate": 0.0001, "loss": 1.9967, "step": 2421 }, { "epoch": 0.2782149216012865, "grad_norm": 0.3955191969871521, "learning_rate": 0.0001, "loss": 1.7223, "step": 2422 }, { "epoch": 0.27832979151111364, "grad_norm": 0.37816205620765686, "learning_rate": 0.0001, "loss": 1.4855, "step": 2423 }, { "epoch": 0.27844466142094076, "grad_norm": 0.4215168058872223, "learning_rate": 0.0001, "loss": 1.6134, "step": 2424 }, { "epoch": 0.2785595313307679, "grad_norm": 0.3857564926147461, "learning_rate": 0.0001, "loss": 1.6492, "step": 2425 }, { "epoch": 0.278674401240595, "grad_norm": 0.3551070988178253, "learning_rate": 0.0001, "loss": 1.5696, "step": 2426 }, { "epoch": 0.2787892711504221, "grad_norm": 0.3506256937980652, "learning_rate": 0.0001, "loss": 1.6087, "step": 2427 }, { "epoch": 0.27890414106024924, "grad_norm": 0.3387966752052307, "learning_rate": 0.0001, "loss": 1.5616, "step": 2428 }, { "epoch": 0.27901901097007636, "grad_norm": 0.3767859637737274, "learning_rate": 0.0001, "loss": 1.7688, "step": 2429 }, { "epoch": 0.2791338808799035, "grad_norm": 0.38322755694389343, "learning_rate": 0.0001, "loss": 1.867, "step": 2430 }, { "epoch": 0.2792487507897306, "grad_norm": 0.3900243043899536, "learning_rate": 0.0001, "loss": 1.9258, "step": 2431 }, { "epoch": 0.2793636206995577, "grad_norm": 0.3808649182319641, "learning_rate": 0.0001, "loss": 1.53, "step": 2432 }, { "epoch": 0.27947849060938484, "grad_norm": 0.36853742599487305, "learning_rate": 0.0001, "loss": 1.584, "step": 2433 }, { "epoch": 0.279593360519212, "grad_norm": 0.36365145444869995, "learning_rate": 0.0001, "loss": 1.7215, "step": 2434 }, { "epoch": 0.27970823042903914, "grad_norm": 0.361780047416687, "learning_rate": 0.0001, "loss": 1.6156, "step": 2435 }, { "epoch": 0.27982310033886626, "grad_norm": 0.39141660928726196, "learning_rate": 0.0001, "loss": 1.8127, "step": 2436 }, { "epoch": 0.2799379702486934, "grad_norm": 0.3739947974681854, "learning_rate": 0.0001, "loss": 1.5502, "step": 2437 }, { "epoch": 0.2800528401585205, "grad_norm": 0.3897460103034973, "learning_rate": 0.0001, "loss": 1.869, "step": 2438 }, { "epoch": 0.2801677100683476, "grad_norm": 0.3739219903945923, "learning_rate": 0.0001, "loss": 1.5862, "step": 2439 }, { "epoch": 0.28028257997817474, "grad_norm": 0.3786073923110962, "learning_rate": 0.0001, "loss": 1.6744, "step": 2440 }, { "epoch": 0.28039744988800186, "grad_norm": 0.37929031252861023, "learning_rate": 0.0001, "loss": 1.7422, "step": 2441 }, { "epoch": 0.280512319797829, "grad_norm": 0.35074925422668457, "learning_rate": 0.0001, "loss": 1.6005, "step": 2442 }, { "epoch": 0.2806271897076561, "grad_norm": 0.3912547528743744, "learning_rate": 0.0001, "loss": 1.8629, "step": 2443 }, { "epoch": 0.2807420596174832, "grad_norm": 0.36757802963256836, "learning_rate": 0.0001, "loss": 1.557, "step": 2444 }, { "epoch": 0.28085692952731034, "grad_norm": 0.3755451738834381, "learning_rate": 0.0001, "loss": 1.8249, "step": 2445 }, { "epoch": 0.28097179943713746, "grad_norm": 0.3435642719268799, "learning_rate": 0.0001, "loss": 1.4253, "step": 2446 }, { "epoch": 0.2810866693469646, "grad_norm": 0.3567301332950592, "learning_rate": 0.0001, "loss": 1.4934, "step": 2447 }, { "epoch": 0.2812015392567917, "grad_norm": 0.3861426115036011, "learning_rate": 0.0001, "loss": 1.6306, "step": 2448 }, { "epoch": 0.2813164091666188, "grad_norm": 0.38505983352661133, "learning_rate": 0.0001, "loss": 1.7413, "step": 2449 }, { "epoch": 0.28143127907644594, "grad_norm": 0.3739560544490814, "learning_rate": 0.0001, "loss": 1.7037, "step": 2450 }, { "epoch": 0.28154614898627306, "grad_norm": 0.359266996383667, "learning_rate": 0.0001, "loss": 1.6393, "step": 2451 }, { "epoch": 0.2816610188961002, "grad_norm": 0.34818223118782043, "learning_rate": 0.0001, "loss": 1.4532, "step": 2452 }, { "epoch": 0.2817758888059273, "grad_norm": 0.40676432847976685, "learning_rate": 0.0001, "loss": 1.8238, "step": 2453 }, { "epoch": 0.2818907587157544, "grad_norm": 0.37692880630493164, "learning_rate": 0.0001, "loss": 1.7096, "step": 2454 }, { "epoch": 0.28200562862558154, "grad_norm": 0.3589748442173004, "learning_rate": 0.0001, "loss": 1.5383, "step": 2455 }, { "epoch": 0.28212049853540866, "grad_norm": 0.3927457928657532, "learning_rate": 0.0001, "loss": 1.7663, "step": 2456 }, { "epoch": 0.2822353684452358, "grad_norm": 0.37036100029945374, "learning_rate": 0.0001, "loss": 1.6362, "step": 2457 }, { "epoch": 0.2823502383550629, "grad_norm": 0.3649539649486542, "learning_rate": 0.0001, "loss": 1.4174, "step": 2458 }, { "epoch": 0.28246510826489, "grad_norm": 0.3977210521697998, "learning_rate": 0.0001, "loss": 1.7361, "step": 2459 }, { "epoch": 0.28257997817471714, "grad_norm": 0.37352606654167175, "learning_rate": 0.0001, "loss": 1.7877, "step": 2460 }, { "epoch": 0.28269484808454426, "grad_norm": 0.37431058287620544, "learning_rate": 0.0001, "loss": 1.7231, "step": 2461 }, { "epoch": 0.2828097179943714, "grad_norm": 0.3667674660682678, "learning_rate": 0.0001, "loss": 1.6047, "step": 2462 }, { "epoch": 0.2829245879041985, "grad_norm": 0.4098829925060272, "learning_rate": 0.0001, "loss": 1.7942, "step": 2463 }, { "epoch": 0.2830394578140256, "grad_norm": 0.35849472880363464, "learning_rate": 0.0001, "loss": 1.7135, "step": 2464 }, { "epoch": 0.28315432772385274, "grad_norm": 0.40055009722709656, "learning_rate": 0.0001, "loss": 1.7541, "step": 2465 }, { "epoch": 0.28326919763367986, "grad_norm": 0.3810610771179199, "learning_rate": 0.0001, "loss": 1.6542, "step": 2466 }, { "epoch": 0.283384067543507, "grad_norm": 0.3662252724170685, "learning_rate": 0.0001, "loss": 1.5483, "step": 2467 }, { "epoch": 0.2834989374533341, "grad_norm": 0.36314326524734497, "learning_rate": 0.0001, "loss": 1.5905, "step": 2468 }, { "epoch": 0.2836138073631612, "grad_norm": 0.3729235529899597, "learning_rate": 0.0001, "loss": 1.6139, "step": 2469 }, { "epoch": 0.28372867727298834, "grad_norm": 0.40618225932121277, "learning_rate": 0.0001, "loss": 1.7267, "step": 2470 }, { "epoch": 0.28384354718281546, "grad_norm": 0.3927193284034729, "learning_rate": 0.0001, "loss": 1.8012, "step": 2471 }, { "epoch": 0.2839584170926426, "grad_norm": 0.3536290228366852, "learning_rate": 0.0001, "loss": 1.7102, "step": 2472 }, { "epoch": 0.2840732870024697, "grad_norm": 0.36351481080055237, "learning_rate": 0.0001, "loss": 1.5794, "step": 2473 }, { "epoch": 0.2841881569122968, "grad_norm": 0.3685545027256012, "learning_rate": 0.0001, "loss": 1.619, "step": 2474 }, { "epoch": 0.28430302682212394, "grad_norm": 0.37130284309387207, "learning_rate": 0.0001, "loss": 1.7307, "step": 2475 }, { "epoch": 0.28441789673195106, "grad_norm": 0.3823324143886566, "learning_rate": 0.0001, "loss": 1.7253, "step": 2476 }, { "epoch": 0.2845327666417782, "grad_norm": 0.3819986879825592, "learning_rate": 0.0001, "loss": 1.5769, "step": 2477 }, { "epoch": 0.2846476365516053, "grad_norm": 0.4364182651042938, "learning_rate": 0.0001, "loss": 1.7868, "step": 2478 }, { "epoch": 0.2847625064614324, "grad_norm": 0.3844752907752991, "learning_rate": 0.0001, "loss": 1.6089, "step": 2479 }, { "epoch": 0.28487737637125954, "grad_norm": 0.38411590456962585, "learning_rate": 0.0001, "loss": 1.5398, "step": 2480 }, { "epoch": 0.28499224628108666, "grad_norm": 0.40151742100715637, "learning_rate": 0.0001, "loss": 1.8084, "step": 2481 }, { "epoch": 0.2851071161909138, "grad_norm": 0.41466403007507324, "learning_rate": 0.0001, "loss": 1.6723, "step": 2482 }, { "epoch": 0.2852219861007409, "grad_norm": 0.37958747148513794, "learning_rate": 0.0001, "loss": 1.516, "step": 2483 }, { "epoch": 0.285336856010568, "grad_norm": 0.3950232267379761, "learning_rate": 0.0001, "loss": 1.955, "step": 2484 }, { "epoch": 0.28545172592039514, "grad_norm": 0.3635809123516083, "learning_rate": 0.0001, "loss": 1.7445, "step": 2485 }, { "epoch": 0.28556659583022226, "grad_norm": 0.38769015669822693, "learning_rate": 0.0001, "loss": 1.6845, "step": 2486 }, { "epoch": 0.2856814657400494, "grad_norm": 0.37288132309913635, "learning_rate": 0.0001, "loss": 1.5452, "step": 2487 }, { "epoch": 0.2857963356498765, "grad_norm": 0.4224679172039032, "learning_rate": 0.0001, "loss": 1.8745, "step": 2488 }, { "epoch": 0.2859112055597036, "grad_norm": 0.4068784713745117, "learning_rate": 0.0001, "loss": 1.9496, "step": 2489 }, { "epoch": 0.28602607546953074, "grad_norm": 0.39766570925712585, "learning_rate": 0.0001, "loss": 1.7433, "step": 2490 }, { "epoch": 0.28614094537935786, "grad_norm": 0.4218822717666626, "learning_rate": 0.0001, "loss": 1.7239, "step": 2491 }, { "epoch": 0.286255815289185, "grad_norm": 0.3917092978954315, "learning_rate": 0.0001, "loss": 1.5885, "step": 2492 }, { "epoch": 0.2863706851990121, "grad_norm": 0.44831544160842896, "learning_rate": 0.0001, "loss": 1.7862, "step": 2493 }, { "epoch": 0.2864855551088392, "grad_norm": 0.39573273062705994, "learning_rate": 0.0001, "loss": 1.8192, "step": 2494 }, { "epoch": 0.28660042501866634, "grad_norm": 0.37129390239715576, "learning_rate": 0.0001, "loss": 1.5017, "step": 2495 }, { "epoch": 0.28671529492849346, "grad_norm": 0.3685044050216675, "learning_rate": 0.0001, "loss": 1.7772, "step": 2496 }, { "epoch": 0.2868301648383206, "grad_norm": 0.41524022817611694, "learning_rate": 0.0001, "loss": 1.825, "step": 2497 }, { "epoch": 0.2869450347481477, "grad_norm": 0.39948517084121704, "learning_rate": 0.0001, "loss": 1.6756, "step": 2498 }, { "epoch": 0.2870599046579748, "grad_norm": 0.3788050413131714, "learning_rate": 0.0001, "loss": 1.8223, "step": 2499 }, { "epoch": 0.28717477456780194, "grad_norm": 0.3545449674129486, "learning_rate": 0.0001, "loss": 1.4563, "step": 2500 }, { "epoch": 0.28728964447762906, "grad_norm": 0.37108996510505676, "learning_rate": 0.0001, "loss": 1.685, "step": 2501 }, { "epoch": 0.28740451438745623, "grad_norm": 0.37973251938819885, "learning_rate": 0.0001, "loss": 1.9102, "step": 2502 }, { "epoch": 0.28751938429728335, "grad_norm": 0.3596639931201935, "learning_rate": 0.0001, "loss": 1.6966, "step": 2503 }, { "epoch": 0.2876342542071105, "grad_norm": 0.39931946992874146, "learning_rate": 0.0001, "loss": 1.664, "step": 2504 }, { "epoch": 0.2877491241169376, "grad_norm": 0.3801995515823364, "learning_rate": 0.0001, "loss": 1.6762, "step": 2505 }, { "epoch": 0.2878639940267647, "grad_norm": 0.3975699245929718, "learning_rate": 0.0001, "loss": 1.8263, "step": 2506 }, { "epoch": 0.28797886393659183, "grad_norm": 0.41587021946907043, "learning_rate": 0.0001, "loss": 1.8763, "step": 2507 }, { "epoch": 0.28809373384641895, "grad_norm": 0.3884534239768982, "learning_rate": 0.0001, "loss": 1.8323, "step": 2508 }, { "epoch": 0.2882086037562461, "grad_norm": 0.376265287399292, "learning_rate": 0.0001, "loss": 1.735, "step": 2509 }, { "epoch": 0.2883234736660732, "grad_norm": 0.349988728761673, "learning_rate": 0.0001, "loss": 1.5764, "step": 2510 }, { "epoch": 0.2884383435759003, "grad_norm": 0.3495781123638153, "learning_rate": 0.0001, "loss": 1.6788, "step": 2511 }, { "epoch": 0.28855321348572743, "grad_norm": 0.40810123085975647, "learning_rate": 0.0001, "loss": 1.7881, "step": 2512 }, { "epoch": 0.28866808339555455, "grad_norm": 0.37921836972236633, "learning_rate": 0.0001, "loss": 1.6171, "step": 2513 }, { "epoch": 0.2887829533053817, "grad_norm": 0.3909505009651184, "learning_rate": 0.0001, "loss": 1.7327, "step": 2514 }, { "epoch": 0.2888978232152088, "grad_norm": 0.3837600648403168, "learning_rate": 0.0001, "loss": 1.5121, "step": 2515 }, { "epoch": 0.2890126931250359, "grad_norm": 0.4128781855106354, "learning_rate": 0.0001, "loss": 1.8047, "step": 2516 }, { "epoch": 0.28912756303486303, "grad_norm": 0.3738429546356201, "learning_rate": 0.0001, "loss": 1.6059, "step": 2517 }, { "epoch": 0.28924243294469015, "grad_norm": 0.40288591384887695, "learning_rate": 0.0001, "loss": 1.8381, "step": 2518 }, { "epoch": 0.2893573028545173, "grad_norm": 0.37133219838142395, "learning_rate": 0.0001, "loss": 1.6335, "step": 2519 }, { "epoch": 0.2894721727643444, "grad_norm": 0.3895220160484314, "learning_rate": 0.0001, "loss": 1.6405, "step": 2520 }, { "epoch": 0.2895870426741715, "grad_norm": 0.42748066782951355, "learning_rate": 0.0001, "loss": 1.7745, "step": 2521 }, { "epoch": 0.28970191258399863, "grad_norm": 0.3681427836418152, "learning_rate": 0.0001, "loss": 1.5979, "step": 2522 }, { "epoch": 0.28981678249382575, "grad_norm": 0.3755578100681305, "learning_rate": 0.0001, "loss": 1.7104, "step": 2523 }, { "epoch": 0.2899316524036529, "grad_norm": 0.3791626989841461, "learning_rate": 0.0001, "loss": 1.6502, "step": 2524 }, { "epoch": 0.29004652231348, "grad_norm": 0.4189550578594208, "learning_rate": 0.0001, "loss": 1.9463, "step": 2525 }, { "epoch": 0.2901613922233071, "grad_norm": 0.4706687033176422, "learning_rate": 0.0001, "loss": 1.8105, "step": 2526 }, { "epoch": 0.29027626213313423, "grad_norm": 0.38283270597457886, "learning_rate": 0.0001, "loss": 1.7055, "step": 2527 }, { "epoch": 0.29039113204296135, "grad_norm": 0.38680872321128845, "learning_rate": 0.0001, "loss": 1.7457, "step": 2528 }, { "epoch": 0.2905060019527885, "grad_norm": 0.34589383006095886, "learning_rate": 0.0001, "loss": 1.5529, "step": 2529 }, { "epoch": 0.2906208718626156, "grad_norm": 0.33116042613983154, "learning_rate": 0.0001, "loss": 1.3595, "step": 2530 }, { "epoch": 0.2907357417724427, "grad_norm": 0.37206852436065674, "learning_rate": 0.0001, "loss": 1.8634, "step": 2531 }, { "epoch": 0.29085061168226983, "grad_norm": 0.39483073353767395, "learning_rate": 0.0001, "loss": 1.7221, "step": 2532 }, { "epoch": 0.29096548159209695, "grad_norm": 0.3861776888370514, "learning_rate": 0.0001, "loss": 1.7241, "step": 2533 }, { "epoch": 0.2910803515019241, "grad_norm": 0.3621794283390045, "learning_rate": 0.0001, "loss": 1.6201, "step": 2534 }, { "epoch": 0.2911952214117512, "grad_norm": 0.40973809361457825, "learning_rate": 0.0001, "loss": 1.7886, "step": 2535 }, { "epoch": 0.2913100913215783, "grad_norm": 0.36391711235046387, "learning_rate": 0.0001, "loss": 1.6253, "step": 2536 }, { "epoch": 0.29142496123140543, "grad_norm": 0.38137203454971313, "learning_rate": 0.0001, "loss": 1.4499, "step": 2537 }, { "epoch": 0.29153983114123255, "grad_norm": 0.3694712817668915, "learning_rate": 0.0001, "loss": 1.5863, "step": 2538 }, { "epoch": 0.2916547010510597, "grad_norm": 0.38381004333496094, "learning_rate": 0.0001, "loss": 1.8341, "step": 2539 }, { "epoch": 0.2917695709608868, "grad_norm": 0.39511749148368835, "learning_rate": 0.0001, "loss": 1.8543, "step": 2540 }, { "epoch": 0.2918844408707139, "grad_norm": 0.41761019825935364, "learning_rate": 0.0001, "loss": 1.6229, "step": 2541 }, { "epoch": 0.29199931078054103, "grad_norm": 0.4057486355304718, "learning_rate": 0.0001, "loss": 1.7036, "step": 2542 }, { "epoch": 0.29211418069036815, "grad_norm": 0.35340383648872375, "learning_rate": 0.0001, "loss": 1.4367, "step": 2543 }, { "epoch": 0.2922290506001953, "grad_norm": 0.3808727264404297, "learning_rate": 0.0001, "loss": 1.644, "step": 2544 }, { "epoch": 0.2923439205100224, "grad_norm": 0.38112786412239075, "learning_rate": 0.0001, "loss": 1.6942, "step": 2545 }, { "epoch": 0.2924587904198495, "grad_norm": 0.41311514377593994, "learning_rate": 0.0001, "loss": 1.8936, "step": 2546 }, { "epoch": 0.29257366032967663, "grad_norm": 0.37865912914276123, "learning_rate": 0.0001, "loss": 1.7468, "step": 2547 }, { "epoch": 0.29268853023950375, "grad_norm": 0.379802942276001, "learning_rate": 0.0001, "loss": 1.6966, "step": 2548 }, { "epoch": 0.2928034001493309, "grad_norm": 0.4265817105770111, "learning_rate": 0.0001, "loss": 1.6835, "step": 2549 }, { "epoch": 0.292918270059158, "grad_norm": 0.38082998991012573, "learning_rate": 0.0001, "loss": 1.6804, "step": 2550 }, { "epoch": 0.2930331399689851, "grad_norm": 0.38684189319610596, "learning_rate": 0.0001, "loss": 1.9011, "step": 2551 }, { "epoch": 0.29314800987881223, "grad_norm": 0.38656875491142273, "learning_rate": 0.0001, "loss": 1.7264, "step": 2552 }, { "epoch": 0.29326287978863935, "grad_norm": 0.365608274936676, "learning_rate": 0.0001, "loss": 1.4035, "step": 2553 }, { "epoch": 0.2933777496984665, "grad_norm": 0.3454169034957886, "learning_rate": 0.0001, "loss": 1.5401, "step": 2554 }, { "epoch": 0.2934926196082936, "grad_norm": 0.3723081052303314, "learning_rate": 0.0001, "loss": 1.5431, "step": 2555 }, { "epoch": 0.2936074895181207, "grad_norm": 0.35232463479042053, "learning_rate": 0.0001, "loss": 1.5584, "step": 2556 }, { "epoch": 0.29372235942794783, "grad_norm": 0.36890724301338196, "learning_rate": 0.0001, "loss": 1.6345, "step": 2557 }, { "epoch": 0.29383722933777495, "grad_norm": 0.39997896552085876, "learning_rate": 0.0001, "loss": 1.9178, "step": 2558 }, { "epoch": 0.29395209924760207, "grad_norm": 0.3915958106517792, "learning_rate": 0.0001, "loss": 1.6637, "step": 2559 }, { "epoch": 0.2940669691574292, "grad_norm": 0.3597055971622467, "learning_rate": 0.0001, "loss": 1.5527, "step": 2560 }, { "epoch": 0.2941818390672563, "grad_norm": 0.38754889369010925, "learning_rate": 0.0001, "loss": 1.68, "step": 2561 }, { "epoch": 0.29429670897708343, "grad_norm": 0.37158629298210144, "learning_rate": 0.0001, "loss": 1.6678, "step": 2562 }, { "epoch": 0.29441157888691055, "grad_norm": 0.393255352973938, "learning_rate": 0.0001, "loss": 1.5541, "step": 2563 }, { "epoch": 0.29452644879673767, "grad_norm": 0.4069103002548218, "learning_rate": 0.0001, "loss": 1.6978, "step": 2564 }, { "epoch": 0.2946413187065648, "grad_norm": 0.3824077844619751, "learning_rate": 0.0001, "loss": 1.628, "step": 2565 }, { "epoch": 0.2947561886163919, "grad_norm": 0.3937399387359619, "learning_rate": 0.0001, "loss": 1.7788, "step": 2566 }, { "epoch": 0.29487105852621903, "grad_norm": 0.4155014753341675, "learning_rate": 0.0001, "loss": 1.8913, "step": 2567 }, { "epoch": 0.29498592843604615, "grad_norm": 0.3631512224674225, "learning_rate": 0.0001, "loss": 1.6648, "step": 2568 }, { "epoch": 0.29510079834587327, "grad_norm": 0.37146568298339844, "learning_rate": 0.0001, "loss": 1.5236, "step": 2569 }, { "epoch": 0.2952156682557004, "grad_norm": 0.3793732523918152, "learning_rate": 0.0001, "loss": 1.77, "step": 2570 }, { "epoch": 0.29533053816552757, "grad_norm": 0.39151185750961304, "learning_rate": 0.0001, "loss": 1.7726, "step": 2571 }, { "epoch": 0.2954454080753547, "grad_norm": 0.38116058707237244, "learning_rate": 0.0001, "loss": 1.8428, "step": 2572 }, { "epoch": 0.2955602779851818, "grad_norm": 0.3952963650226593, "learning_rate": 0.0001, "loss": 1.6293, "step": 2573 }, { "epoch": 0.2956751478950089, "grad_norm": 0.3993338942527771, "learning_rate": 0.0001, "loss": 1.9019, "step": 2574 }, { "epoch": 0.29579001780483605, "grad_norm": 0.37218764424324036, "learning_rate": 0.0001, "loss": 1.564, "step": 2575 }, { "epoch": 0.29590488771466317, "grad_norm": 0.3602159023284912, "learning_rate": 0.0001, "loss": 1.5937, "step": 2576 }, { "epoch": 0.2960197576244903, "grad_norm": 0.4018074572086334, "learning_rate": 0.0001, "loss": 1.7806, "step": 2577 }, { "epoch": 0.2961346275343174, "grad_norm": 0.38347718119621277, "learning_rate": 0.0001, "loss": 1.6941, "step": 2578 }, { "epoch": 0.2962494974441445, "grad_norm": 0.3954737186431885, "learning_rate": 0.0001, "loss": 1.8642, "step": 2579 }, { "epoch": 0.29636436735397165, "grad_norm": 0.4196760952472687, "learning_rate": 0.0001, "loss": 1.6487, "step": 2580 }, { "epoch": 0.29647923726379877, "grad_norm": 0.3532737195491791, "learning_rate": 0.0001, "loss": 1.6036, "step": 2581 }, { "epoch": 0.2965941071736259, "grad_norm": 0.36641091108322144, "learning_rate": 0.0001, "loss": 1.4252, "step": 2582 }, { "epoch": 0.296708977083453, "grad_norm": 0.37007638812065125, "learning_rate": 0.0001, "loss": 1.6963, "step": 2583 }, { "epoch": 0.2968238469932801, "grad_norm": 0.3760312795639038, "learning_rate": 0.0001, "loss": 1.613, "step": 2584 }, { "epoch": 0.29693871690310725, "grad_norm": 0.392160564661026, "learning_rate": 0.0001, "loss": 1.759, "step": 2585 }, { "epoch": 0.29705358681293437, "grad_norm": 0.3935618996620178, "learning_rate": 0.0001, "loss": 1.787, "step": 2586 }, { "epoch": 0.2971684567227615, "grad_norm": 0.3662956655025482, "learning_rate": 0.0001, "loss": 1.6899, "step": 2587 }, { "epoch": 0.2972833266325886, "grad_norm": 0.4194296598434448, "learning_rate": 0.0001, "loss": 1.7168, "step": 2588 }, { "epoch": 0.2973981965424157, "grad_norm": 0.37482190132141113, "learning_rate": 0.0001, "loss": 1.8223, "step": 2589 }, { "epoch": 0.29751306645224285, "grad_norm": 0.38104763627052307, "learning_rate": 0.0001, "loss": 1.7773, "step": 2590 }, { "epoch": 0.29762793636206997, "grad_norm": 0.37571361660957336, "learning_rate": 0.0001, "loss": 1.6534, "step": 2591 }, { "epoch": 0.2977428062718971, "grad_norm": 0.4095185697078705, "learning_rate": 0.0001, "loss": 1.5866, "step": 2592 }, { "epoch": 0.2978576761817242, "grad_norm": 0.4440751075744629, "learning_rate": 0.0001, "loss": 1.646, "step": 2593 }, { "epoch": 0.2979725460915513, "grad_norm": 0.3924051821231842, "learning_rate": 0.0001, "loss": 1.6179, "step": 2594 }, { "epoch": 0.29808741600137845, "grad_norm": 0.3902948796749115, "learning_rate": 0.0001, "loss": 1.7332, "step": 2595 }, { "epoch": 0.29820228591120557, "grad_norm": 0.3790493607521057, "learning_rate": 0.0001, "loss": 1.7428, "step": 2596 }, { "epoch": 0.2983171558210327, "grad_norm": 0.38171616196632385, "learning_rate": 0.0001, "loss": 1.6307, "step": 2597 }, { "epoch": 0.2984320257308598, "grad_norm": 0.40960413217544556, "learning_rate": 0.0001, "loss": 1.9166, "step": 2598 }, { "epoch": 0.2985468956406869, "grad_norm": 0.3882502019405365, "learning_rate": 0.0001, "loss": 1.7118, "step": 2599 }, { "epoch": 0.29866176555051405, "grad_norm": 0.3626171350479126, "learning_rate": 0.0001, "loss": 1.6102, "step": 2600 }, { "epoch": 0.29877663546034117, "grad_norm": 0.37243038415908813, "learning_rate": 0.0001, "loss": 1.6959, "step": 2601 }, { "epoch": 0.2988915053701683, "grad_norm": 0.40221109986305237, "learning_rate": 0.0001, "loss": 1.6998, "step": 2602 }, { "epoch": 0.2990063752799954, "grad_norm": 0.3771398663520813, "learning_rate": 0.0001, "loss": 1.6593, "step": 2603 }, { "epoch": 0.2991212451898225, "grad_norm": 0.3790495991706848, "learning_rate": 0.0001, "loss": 1.6981, "step": 2604 }, { "epoch": 0.29923611509964965, "grad_norm": 0.3962880074977875, "learning_rate": 0.0001, "loss": 1.7409, "step": 2605 }, { "epoch": 0.29935098500947677, "grad_norm": 0.3856731057167053, "learning_rate": 0.0001, "loss": 1.8138, "step": 2606 }, { "epoch": 0.2994658549193039, "grad_norm": 0.37686559557914734, "learning_rate": 0.0001, "loss": 1.7306, "step": 2607 }, { "epoch": 0.299580724829131, "grad_norm": 0.4329466223716736, "learning_rate": 0.0001, "loss": 2.0436, "step": 2608 }, { "epoch": 0.2996955947389581, "grad_norm": 0.3704317808151245, "learning_rate": 0.0001, "loss": 1.6667, "step": 2609 }, { "epoch": 0.29981046464878525, "grad_norm": 0.42832422256469727, "learning_rate": 0.0001, "loss": 1.7882, "step": 2610 }, { "epoch": 0.29992533455861237, "grad_norm": 0.416471928358078, "learning_rate": 0.0001, "loss": 1.8709, "step": 2611 }, { "epoch": 0.3000402044684395, "grad_norm": 0.35787421464920044, "learning_rate": 0.0001, "loss": 1.6281, "step": 2612 }, { "epoch": 0.3001550743782666, "grad_norm": 0.37828290462493896, "learning_rate": 0.0001, "loss": 1.5254, "step": 2613 }, { "epoch": 0.3002699442880937, "grad_norm": 0.3505316972732544, "learning_rate": 0.0001, "loss": 1.6496, "step": 2614 }, { "epoch": 0.30038481419792085, "grad_norm": 0.3572443127632141, "learning_rate": 0.0001, "loss": 1.6333, "step": 2615 }, { "epoch": 0.30049968410774797, "grad_norm": 0.3744872808456421, "learning_rate": 0.0001, "loss": 1.7276, "step": 2616 }, { "epoch": 0.3006145540175751, "grad_norm": 0.38314029574394226, "learning_rate": 0.0001, "loss": 1.6795, "step": 2617 }, { "epoch": 0.3007294239274022, "grad_norm": 0.38417288661003113, "learning_rate": 0.0001, "loss": 1.7798, "step": 2618 }, { "epoch": 0.3008442938372293, "grad_norm": 0.39451834559440613, "learning_rate": 0.0001, "loss": 1.7956, "step": 2619 }, { "epoch": 0.30095916374705645, "grad_norm": 0.3972351849079132, "learning_rate": 0.0001, "loss": 1.654, "step": 2620 }, { "epoch": 0.30107403365688357, "grad_norm": 0.4091535806655884, "learning_rate": 0.0001, "loss": 1.9206, "step": 2621 }, { "epoch": 0.3011889035667107, "grad_norm": 0.3716078996658325, "learning_rate": 0.0001, "loss": 1.432, "step": 2622 }, { "epoch": 0.3013037734765378, "grad_norm": 0.34581923484802246, "learning_rate": 0.0001, "loss": 1.3475, "step": 2623 }, { "epoch": 0.3014186433863649, "grad_norm": 0.3731369376182556, "learning_rate": 0.0001, "loss": 1.613, "step": 2624 }, { "epoch": 0.30153351329619205, "grad_norm": 0.36073705554008484, "learning_rate": 0.0001, "loss": 1.6255, "step": 2625 }, { "epoch": 0.30164838320601917, "grad_norm": 0.3785097002983093, "learning_rate": 0.0001, "loss": 1.7506, "step": 2626 }, { "epoch": 0.3017632531158463, "grad_norm": 0.346171498298645, "learning_rate": 0.0001, "loss": 1.4672, "step": 2627 }, { "epoch": 0.3018781230256734, "grad_norm": 0.353345662355423, "learning_rate": 0.0001, "loss": 1.652, "step": 2628 }, { "epoch": 0.3019929929355005, "grad_norm": 0.3921557366847992, "learning_rate": 0.0001, "loss": 1.8869, "step": 2629 }, { "epoch": 0.30210786284532765, "grad_norm": 0.377298504114151, "learning_rate": 0.0001, "loss": 1.8358, "step": 2630 }, { "epoch": 0.30222273275515477, "grad_norm": 0.3824778199195862, "learning_rate": 0.0001, "loss": 1.705, "step": 2631 }, { "epoch": 0.3023376026649819, "grad_norm": 0.371186763048172, "learning_rate": 0.0001, "loss": 1.5894, "step": 2632 }, { "epoch": 0.302452472574809, "grad_norm": 0.3520771563053131, "learning_rate": 0.0001, "loss": 1.4233, "step": 2633 }, { "epoch": 0.3025673424846361, "grad_norm": 0.3969862461090088, "learning_rate": 0.0001, "loss": 1.7946, "step": 2634 }, { "epoch": 0.30268221239446325, "grad_norm": 0.39209333062171936, "learning_rate": 0.0001, "loss": 1.5675, "step": 2635 }, { "epoch": 0.30279708230429037, "grad_norm": 0.4001356065273285, "learning_rate": 0.0001, "loss": 1.5882, "step": 2636 }, { "epoch": 0.3029119522141175, "grad_norm": 0.3858399987220764, "learning_rate": 0.0001, "loss": 1.7268, "step": 2637 }, { "epoch": 0.3030268221239446, "grad_norm": 0.3735487163066864, "learning_rate": 0.0001, "loss": 1.5653, "step": 2638 }, { "epoch": 0.3031416920337717, "grad_norm": 0.39118990302085876, "learning_rate": 0.0001, "loss": 1.6705, "step": 2639 }, { "epoch": 0.3032565619435989, "grad_norm": 0.42595022916793823, "learning_rate": 0.0001, "loss": 1.7263, "step": 2640 }, { "epoch": 0.303371431853426, "grad_norm": 0.41437122225761414, "learning_rate": 0.0001, "loss": 1.8724, "step": 2641 }, { "epoch": 0.30348630176325314, "grad_norm": 0.3900952637195587, "learning_rate": 0.0001, "loss": 1.673, "step": 2642 }, { "epoch": 0.30360117167308026, "grad_norm": 0.38442274928092957, "learning_rate": 0.0001, "loss": 1.6173, "step": 2643 }, { "epoch": 0.3037160415829074, "grad_norm": 0.39064112305641174, "learning_rate": 0.0001, "loss": 1.7353, "step": 2644 }, { "epoch": 0.3038309114927345, "grad_norm": 0.3769366443157196, "learning_rate": 0.0001, "loss": 1.5141, "step": 2645 }, { "epoch": 0.3039457814025616, "grad_norm": 0.39268919825553894, "learning_rate": 0.0001, "loss": 1.6863, "step": 2646 }, { "epoch": 0.30406065131238874, "grad_norm": 0.3635852038860321, "learning_rate": 0.0001, "loss": 1.6138, "step": 2647 }, { "epoch": 0.30417552122221586, "grad_norm": 0.36260101199150085, "learning_rate": 0.0001, "loss": 1.5824, "step": 2648 }, { "epoch": 0.304290391132043, "grad_norm": 0.37813135981559753, "learning_rate": 0.0001, "loss": 1.7067, "step": 2649 }, { "epoch": 0.3044052610418701, "grad_norm": 0.39040499925613403, "learning_rate": 0.0001, "loss": 1.6515, "step": 2650 }, { "epoch": 0.3045201309516972, "grad_norm": 0.36264294385910034, "learning_rate": 0.0001, "loss": 1.589, "step": 2651 }, { "epoch": 0.30463500086152434, "grad_norm": 0.3571374714374542, "learning_rate": 0.0001, "loss": 1.5062, "step": 2652 }, { "epoch": 0.30474987077135146, "grad_norm": 0.3698153793811798, "learning_rate": 0.0001, "loss": 1.5825, "step": 2653 }, { "epoch": 0.3048647406811786, "grad_norm": 0.37908482551574707, "learning_rate": 0.0001, "loss": 1.7519, "step": 2654 }, { "epoch": 0.3049796105910057, "grad_norm": 0.4145006835460663, "learning_rate": 0.0001, "loss": 2.0428, "step": 2655 }, { "epoch": 0.3050944805008328, "grad_norm": 0.37752121686935425, "learning_rate": 0.0001, "loss": 1.7289, "step": 2656 }, { "epoch": 0.30520935041065994, "grad_norm": 0.41512149572372437, "learning_rate": 0.0001, "loss": 2.0011, "step": 2657 }, { "epoch": 0.30532422032048706, "grad_norm": 0.3637721836566925, "learning_rate": 0.0001, "loss": 1.6781, "step": 2658 }, { "epoch": 0.3054390902303142, "grad_norm": 0.3608452081680298, "learning_rate": 0.0001, "loss": 1.5406, "step": 2659 }, { "epoch": 0.3055539601401413, "grad_norm": 0.4033581018447876, "learning_rate": 0.0001, "loss": 1.8266, "step": 2660 }, { "epoch": 0.3056688300499684, "grad_norm": 0.371520459651947, "learning_rate": 0.0001, "loss": 1.6083, "step": 2661 }, { "epoch": 0.30578369995979554, "grad_norm": 0.36730000376701355, "learning_rate": 0.0001, "loss": 1.5817, "step": 2662 }, { "epoch": 0.30589856986962266, "grad_norm": 0.376396119594574, "learning_rate": 0.0001, "loss": 1.7396, "step": 2663 }, { "epoch": 0.3060134397794498, "grad_norm": 0.35428953170776367, "learning_rate": 0.0001, "loss": 1.589, "step": 2664 }, { "epoch": 0.3061283096892769, "grad_norm": 0.4117322266101837, "learning_rate": 0.0001, "loss": 1.7213, "step": 2665 }, { "epoch": 0.306243179599104, "grad_norm": 0.39247551560401917, "learning_rate": 0.0001, "loss": 1.6146, "step": 2666 }, { "epoch": 0.30635804950893114, "grad_norm": 0.39449337124824524, "learning_rate": 0.0001, "loss": 1.8517, "step": 2667 }, { "epoch": 0.30647291941875826, "grad_norm": 0.3695959150791168, "learning_rate": 0.0001, "loss": 1.5881, "step": 2668 }, { "epoch": 0.3065877893285854, "grad_norm": 0.3867664635181427, "learning_rate": 0.0001, "loss": 1.7339, "step": 2669 }, { "epoch": 0.3067026592384125, "grad_norm": 0.4185912311077118, "learning_rate": 0.0001, "loss": 1.9492, "step": 2670 }, { "epoch": 0.3068175291482396, "grad_norm": 0.365018755197525, "learning_rate": 0.0001, "loss": 1.6135, "step": 2671 }, { "epoch": 0.30693239905806674, "grad_norm": 0.4161297380924225, "learning_rate": 0.0001, "loss": 1.7588, "step": 2672 }, { "epoch": 0.30704726896789386, "grad_norm": 0.4104420840740204, "learning_rate": 0.0001, "loss": 1.6287, "step": 2673 }, { "epoch": 0.307162138877721, "grad_norm": 0.393228143453598, "learning_rate": 0.0001, "loss": 1.6481, "step": 2674 }, { "epoch": 0.3072770087875481, "grad_norm": 0.37595561146736145, "learning_rate": 0.0001, "loss": 1.7663, "step": 2675 }, { "epoch": 0.3073918786973752, "grad_norm": 0.3587210178375244, "learning_rate": 0.0001, "loss": 1.505, "step": 2676 }, { "epoch": 0.30750674860720234, "grad_norm": 0.37775328755378723, "learning_rate": 0.0001, "loss": 1.6793, "step": 2677 }, { "epoch": 0.30762161851702946, "grad_norm": 0.3870543837547302, "learning_rate": 0.0001, "loss": 1.73, "step": 2678 }, { "epoch": 0.3077364884268566, "grad_norm": 0.3819582760334015, "learning_rate": 0.0001, "loss": 1.6968, "step": 2679 }, { "epoch": 0.3078513583366837, "grad_norm": 0.3661853075027466, "learning_rate": 0.0001, "loss": 1.6436, "step": 2680 }, { "epoch": 0.3079662282465108, "grad_norm": 0.356086790561676, "learning_rate": 0.0001, "loss": 1.7138, "step": 2681 }, { "epoch": 0.30808109815633794, "grad_norm": 0.34564852714538574, "learning_rate": 0.0001, "loss": 1.4758, "step": 2682 }, { "epoch": 0.30819596806616506, "grad_norm": 0.37625306844711304, "learning_rate": 0.0001, "loss": 1.8159, "step": 2683 }, { "epoch": 0.3083108379759922, "grad_norm": 0.4367530345916748, "learning_rate": 0.0001, "loss": 1.9309, "step": 2684 }, { "epoch": 0.3084257078858193, "grad_norm": 0.41043978929519653, "learning_rate": 0.0001, "loss": 1.9249, "step": 2685 }, { "epoch": 0.3085405777956464, "grad_norm": 0.36539000272750854, "learning_rate": 0.0001, "loss": 1.689, "step": 2686 }, { "epoch": 0.30865544770547354, "grad_norm": 0.3621140718460083, "learning_rate": 0.0001, "loss": 1.6027, "step": 2687 }, { "epoch": 0.30877031761530066, "grad_norm": 0.34792810678482056, "learning_rate": 0.0001, "loss": 1.5316, "step": 2688 }, { "epoch": 0.3088851875251278, "grad_norm": 0.37253010272979736, "learning_rate": 0.0001, "loss": 1.6275, "step": 2689 }, { "epoch": 0.3090000574349549, "grad_norm": 0.3895919919013977, "learning_rate": 0.0001, "loss": 1.6633, "step": 2690 }, { "epoch": 0.309114927344782, "grad_norm": 0.3458951711654663, "learning_rate": 0.0001, "loss": 1.5419, "step": 2691 }, { "epoch": 0.30922979725460914, "grad_norm": 0.34888923168182373, "learning_rate": 0.0001, "loss": 1.6153, "step": 2692 }, { "epoch": 0.30934466716443626, "grad_norm": 0.3860279321670532, "learning_rate": 0.0001, "loss": 1.6428, "step": 2693 }, { "epoch": 0.3094595370742634, "grad_norm": 0.3899478018283844, "learning_rate": 0.0001, "loss": 1.8091, "step": 2694 }, { "epoch": 0.3095744069840905, "grad_norm": 0.3502478003501892, "learning_rate": 0.0001, "loss": 1.6229, "step": 2695 }, { "epoch": 0.3096892768939176, "grad_norm": 0.3814723789691925, "learning_rate": 0.0001, "loss": 1.6441, "step": 2696 }, { "epoch": 0.30980414680374474, "grad_norm": 0.4145774245262146, "learning_rate": 0.0001, "loss": 1.6666, "step": 2697 }, { "epoch": 0.30991901671357186, "grad_norm": 0.3777678906917572, "learning_rate": 0.0001, "loss": 1.7871, "step": 2698 }, { "epoch": 0.310033886623399, "grad_norm": 0.39043325185775757, "learning_rate": 0.0001, "loss": 1.8022, "step": 2699 }, { "epoch": 0.3101487565332261, "grad_norm": 0.39468830823898315, "learning_rate": 0.0001, "loss": 1.6472, "step": 2700 }, { "epoch": 0.3102636264430532, "grad_norm": 0.41475868225097656, "learning_rate": 0.0001, "loss": 1.8264, "step": 2701 }, { "epoch": 0.31037849635288034, "grad_norm": 0.387824147939682, "learning_rate": 0.0001, "loss": 1.7159, "step": 2702 }, { "epoch": 0.31049336626270746, "grad_norm": 0.38441115617752075, "learning_rate": 0.0001, "loss": 1.6584, "step": 2703 }, { "epoch": 0.3106082361725346, "grad_norm": 0.374197781085968, "learning_rate": 0.0001, "loss": 1.6529, "step": 2704 }, { "epoch": 0.3107231060823617, "grad_norm": 0.3562909960746765, "learning_rate": 0.0001, "loss": 1.4979, "step": 2705 }, { "epoch": 0.3108379759921888, "grad_norm": 0.38204044103622437, "learning_rate": 0.0001, "loss": 1.8704, "step": 2706 }, { "epoch": 0.31095284590201594, "grad_norm": 0.3814204931259155, "learning_rate": 0.0001, "loss": 1.5465, "step": 2707 }, { "epoch": 0.3110677158118431, "grad_norm": 0.4282824993133545, "learning_rate": 0.0001, "loss": 1.6085, "step": 2708 }, { "epoch": 0.31118258572167024, "grad_norm": 0.3564637005329132, "learning_rate": 0.0001, "loss": 1.6366, "step": 2709 }, { "epoch": 0.31129745563149736, "grad_norm": 0.4151432514190674, "learning_rate": 0.0001, "loss": 1.9175, "step": 2710 }, { "epoch": 0.3114123255413245, "grad_norm": 0.37644943594932556, "learning_rate": 0.0001, "loss": 1.5751, "step": 2711 }, { "epoch": 0.3115271954511516, "grad_norm": 0.39474377036094666, "learning_rate": 0.0001, "loss": 1.6295, "step": 2712 }, { "epoch": 0.3116420653609787, "grad_norm": 0.3828750550746918, "learning_rate": 0.0001, "loss": 1.6704, "step": 2713 }, { "epoch": 0.31175693527080584, "grad_norm": 0.38936948776245117, "learning_rate": 0.0001, "loss": 1.6456, "step": 2714 }, { "epoch": 0.31187180518063295, "grad_norm": 0.40668943524360657, "learning_rate": 0.0001, "loss": 1.708, "step": 2715 }, { "epoch": 0.3119866750904601, "grad_norm": 0.36367443203926086, "learning_rate": 0.0001, "loss": 1.6902, "step": 2716 }, { "epoch": 0.3121015450002872, "grad_norm": 0.4468287229537964, "learning_rate": 0.0001, "loss": 1.5658, "step": 2717 }, { "epoch": 0.3122164149101143, "grad_norm": 0.3429298400878906, "learning_rate": 0.0001, "loss": 1.4857, "step": 2718 }, { "epoch": 0.31233128481994143, "grad_norm": 0.4072478711605072, "learning_rate": 0.0001, "loss": 1.7749, "step": 2719 }, { "epoch": 0.31244615472976855, "grad_norm": 0.37284785509109497, "learning_rate": 0.0001, "loss": 1.6334, "step": 2720 }, { "epoch": 0.3125610246395957, "grad_norm": 0.41400986909866333, "learning_rate": 0.0001, "loss": 1.936, "step": 2721 }, { "epoch": 0.3126758945494228, "grad_norm": 0.3585307002067566, "learning_rate": 0.0001, "loss": 1.3443, "step": 2722 }, { "epoch": 0.3127907644592499, "grad_norm": 0.41940388083457947, "learning_rate": 0.0001, "loss": 1.8787, "step": 2723 }, { "epoch": 0.31290563436907703, "grad_norm": 0.3974437713623047, "learning_rate": 0.0001, "loss": 1.6164, "step": 2724 }, { "epoch": 0.31302050427890415, "grad_norm": 0.39561134576797485, "learning_rate": 0.0001, "loss": 1.7365, "step": 2725 }, { "epoch": 0.3131353741887313, "grad_norm": 0.36751341819763184, "learning_rate": 0.0001, "loss": 1.6052, "step": 2726 }, { "epoch": 0.3132502440985584, "grad_norm": 0.41230806708335876, "learning_rate": 0.0001, "loss": 1.5398, "step": 2727 }, { "epoch": 0.3133651140083855, "grad_norm": 0.3846902847290039, "learning_rate": 0.0001, "loss": 1.6922, "step": 2728 }, { "epoch": 0.31347998391821263, "grad_norm": 0.40803879499435425, "learning_rate": 0.0001, "loss": 1.7361, "step": 2729 }, { "epoch": 0.31359485382803975, "grad_norm": 0.35604923963546753, "learning_rate": 0.0001, "loss": 1.5149, "step": 2730 }, { "epoch": 0.3137097237378669, "grad_norm": 0.38761159777641296, "learning_rate": 0.0001, "loss": 1.7401, "step": 2731 }, { "epoch": 0.313824593647694, "grad_norm": 0.42147189378738403, "learning_rate": 0.0001, "loss": 1.7959, "step": 2732 }, { "epoch": 0.3139394635575211, "grad_norm": 0.3850533366203308, "learning_rate": 0.0001, "loss": 1.8302, "step": 2733 }, { "epoch": 0.31405433346734823, "grad_norm": 0.3670084476470947, "learning_rate": 0.0001, "loss": 1.546, "step": 2734 }, { "epoch": 0.31416920337717535, "grad_norm": 0.3647415041923523, "learning_rate": 0.0001, "loss": 1.5944, "step": 2735 }, { "epoch": 0.3142840732870025, "grad_norm": 0.38492029905319214, "learning_rate": 0.0001, "loss": 1.5857, "step": 2736 }, { "epoch": 0.3143989431968296, "grad_norm": 0.4054207503795624, "learning_rate": 0.0001, "loss": 1.8419, "step": 2737 }, { "epoch": 0.3145138131066567, "grad_norm": 0.36467689275741577, "learning_rate": 0.0001, "loss": 1.5701, "step": 2738 }, { "epoch": 0.31462868301648383, "grad_norm": 0.3815039098262787, "learning_rate": 0.0001, "loss": 1.6893, "step": 2739 }, { "epoch": 0.31474355292631095, "grad_norm": 0.3768649697303772, "learning_rate": 0.0001, "loss": 1.5021, "step": 2740 }, { "epoch": 0.3148584228361381, "grad_norm": 0.36210617423057556, "learning_rate": 0.0001, "loss": 1.7335, "step": 2741 }, { "epoch": 0.3149732927459652, "grad_norm": 0.41380202770233154, "learning_rate": 0.0001, "loss": 1.799, "step": 2742 }, { "epoch": 0.3150881626557923, "grad_norm": 0.38201257586479187, "learning_rate": 0.0001, "loss": 1.669, "step": 2743 }, { "epoch": 0.31520303256561943, "grad_norm": 0.383025586605072, "learning_rate": 0.0001, "loss": 1.5881, "step": 2744 }, { "epoch": 0.31531790247544655, "grad_norm": 0.3883838653564453, "learning_rate": 0.0001, "loss": 1.6941, "step": 2745 }, { "epoch": 0.3154327723852737, "grad_norm": 0.35281357169151306, "learning_rate": 0.0001, "loss": 1.4192, "step": 2746 }, { "epoch": 0.3155476422951008, "grad_norm": 0.40777984261512756, "learning_rate": 0.0001, "loss": 1.6675, "step": 2747 }, { "epoch": 0.3156625122049279, "grad_norm": 0.36390420794487, "learning_rate": 0.0001, "loss": 1.5117, "step": 2748 }, { "epoch": 0.31577738211475503, "grad_norm": 0.38784828782081604, "learning_rate": 0.0001, "loss": 1.7435, "step": 2749 }, { "epoch": 0.31589225202458215, "grad_norm": 0.4247525632381439, "learning_rate": 0.0001, "loss": 1.7565, "step": 2750 }, { "epoch": 0.3160071219344093, "grad_norm": 0.38927143812179565, "learning_rate": 0.0001, "loss": 1.6956, "step": 2751 }, { "epoch": 0.3161219918442364, "grad_norm": 0.34700581431388855, "learning_rate": 0.0001, "loss": 1.6371, "step": 2752 }, { "epoch": 0.3162368617540635, "grad_norm": 0.3716479241847992, "learning_rate": 0.0001, "loss": 1.5576, "step": 2753 }, { "epoch": 0.31635173166389063, "grad_norm": 0.4048490822315216, "learning_rate": 0.0001, "loss": 1.7723, "step": 2754 }, { "epoch": 0.31646660157371775, "grad_norm": 0.36082956194877625, "learning_rate": 0.0001, "loss": 1.6133, "step": 2755 }, { "epoch": 0.3165814714835449, "grad_norm": 0.40957361459732056, "learning_rate": 0.0001, "loss": 1.845, "step": 2756 }, { "epoch": 0.316696341393372, "grad_norm": 0.3455057442188263, "learning_rate": 0.0001, "loss": 1.5038, "step": 2757 }, { "epoch": 0.3168112113031991, "grad_norm": 0.3513345718383789, "learning_rate": 0.0001, "loss": 1.4897, "step": 2758 }, { "epoch": 0.31692608121302623, "grad_norm": 0.3828970491886139, "learning_rate": 0.0001, "loss": 1.6297, "step": 2759 }, { "epoch": 0.31704095112285335, "grad_norm": 0.370225191116333, "learning_rate": 0.0001, "loss": 1.6051, "step": 2760 }, { "epoch": 0.3171558210326805, "grad_norm": 0.4304163157939911, "learning_rate": 0.0001, "loss": 1.9163, "step": 2761 }, { "epoch": 0.3172706909425076, "grad_norm": 0.3837917745113373, "learning_rate": 0.0001, "loss": 1.5688, "step": 2762 }, { "epoch": 0.3173855608523347, "grad_norm": 0.3771938383579254, "learning_rate": 0.0001, "loss": 1.7106, "step": 2763 }, { "epoch": 0.31750043076216183, "grad_norm": 0.3861342668533325, "learning_rate": 0.0001, "loss": 1.6719, "step": 2764 }, { "epoch": 0.31761530067198895, "grad_norm": 0.36387091875076294, "learning_rate": 0.0001, "loss": 1.6573, "step": 2765 }, { "epoch": 0.3177301705818161, "grad_norm": 0.3894106447696686, "learning_rate": 0.0001, "loss": 1.4844, "step": 2766 }, { "epoch": 0.3178450404916432, "grad_norm": 0.37301284074783325, "learning_rate": 0.0001, "loss": 1.6716, "step": 2767 }, { "epoch": 0.3179599104014703, "grad_norm": 0.38504043221473694, "learning_rate": 0.0001, "loss": 1.6545, "step": 2768 }, { "epoch": 0.31807478031129743, "grad_norm": 0.37516283988952637, "learning_rate": 0.0001, "loss": 1.6915, "step": 2769 }, { "epoch": 0.31818965022112455, "grad_norm": 0.3629774749279022, "learning_rate": 0.0001, "loss": 1.5294, "step": 2770 }, { "epoch": 0.3183045201309517, "grad_norm": 0.3853417634963989, "learning_rate": 0.0001, "loss": 1.7261, "step": 2771 }, { "epoch": 0.3184193900407788, "grad_norm": 0.36345425248146057, "learning_rate": 0.0001, "loss": 1.4298, "step": 2772 }, { "epoch": 0.3185342599506059, "grad_norm": 0.3774106204509735, "learning_rate": 0.0001, "loss": 1.6075, "step": 2773 }, { "epoch": 0.31864912986043303, "grad_norm": 0.40592819452285767, "learning_rate": 0.0001, "loss": 1.8993, "step": 2774 }, { "epoch": 0.31876399977026015, "grad_norm": 0.38778918981552124, "learning_rate": 0.0001, "loss": 1.5641, "step": 2775 }, { "epoch": 0.3188788696800873, "grad_norm": 0.39623749256134033, "learning_rate": 0.0001, "loss": 1.7249, "step": 2776 }, { "epoch": 0.31899373958991445, "grad_norm": 0.3900299072265625, "learning_rate": 0.0001, "loss": 1.7291, "step": 2777 }, { "epoch": 0.31910860949974157, "grad_norm": 0.3717004358768463, "learning_rate": 0.0001, "loss": 1.5562, "step": 2778 }, { "epoch": 0.3192234794095687, "grad_norm": 0.38834843039512634, "learning_rate": 0.0001, "loss": 1.7519, "step": 2779 }, { "epoch": 0.3193383493193958, "grad_norm": 0.3893420994281769, "learning_rate": 0.0001, "loss": 1.7821, "step": 2780 }, { "epoch": 0.31945321922922293, "grad_norm": 0.4312572479248047, "learning_rate": 0.0001, "loss": 1.6432, "step": 2781 }, { "epoch": 0.31956808913905005, "grad_norm": 0.3759611248970032, "learning_rate": 0.0001, "loss": 1.5577, "step": 2782 }, { "epoch": 0.31968295904887717, "grad_norm": 0.37230929732322693, "learning_rate": 0.0001, "loss": 1.6343, "step": 2783 }, { "epoch": 0.3197978289587043, "grad_norm": 0.3799343407154083, "learning_rate": 0.0001, "loss": 1.5809, "step": 2784 }, { "epoch": 0.3199126988685314, "grad_norm": 0.38527607917785645, "learning_rate": 0.0001, "loss": 1.7334, "step": 2785 }, { "epoch": 0.32002756877835853, "grad_norm": 0.3624141812324524, "learning_rate": 0.0001, "loss": 1.644, "step": 2786 }, { "epoch": 0.32014243868818565, "grad_norm": 0.36637428402900696, "learning_rate": 0.0001, "loss": 1.5811, "step": 2787 }, { "epoch": 0.32025730859801277, "grad_norm": 0.4166329503059387, "learning_rate": 0.0001, "loss": 1.8372, "step": 2788 }, { "epoch": 0.3203721785078399, "grad_norm": 0.38629505038261414, "learning_rate": 0.0001, "loss": 1.726, "step": 2789 }, { "epoch": 0.320487048417667, "grad_norm": 0.3948490023612976, "learning_rate": 0.0001, "loss": 1.7453, "step": 2790 }, { "epoch": 0.32060191832749413, "grad_norm": 0.3907056450843811, "learning_rate": 0.0001, "loss": 1.6473, "step": 2791 }, { "epoch": 0.32071678823732125, "grad_norm": 0.36693593859672546, "learning_rate": 0.0001, "loss": 1.5415, "step": 2792 }, { "epoch": 0.32083165814714837, "grad_norm": 0.36896297335624695, "learning_rate": 0.0001, "loss": 1.4488, "step": 2793 }, { "epoch": 0.3209465280569755, "grad_norm": 0.38584834337234497, "learning_rate": 0.0001, "loss": 1.8108, "step": 2794 }, { "epoch": 0.3210613979668026, "grad_norm": 0.3919477164745331, "learning_rate": 0.0001, "loss": 1.4851, "step": 2795 }, { "epoch": 0.32117626787662973, "grad_norm": 0.41220781207084656, "learning_rate": 0.0001, "loss": 1.6354, "step": 2796 }, { "epoch": 0.32129113778645685, "grad_norm": 0.3902750015258789, "learning_rate": 0.0001, "loss": 1.6299, "step": 2797 }, { "epoch": 0.32140600769628397, "grad_norm": 0.3956315219402313, "learning_rate": 0.0001, "loss": 1.7504, "step": 2798 }, { "epoch": 0.3215208776061111, "grad_norm": 0.35562974214553833, "learning_rate": 0.0001, "loss": 1.6145, "step": 2799 }, { "epoch": 0.3216357475159382, "grad_norm": 0.39182206988334656, "learning_rate": 0.0001, "loss": 1.5574, "step": 2800 }, { "epoch": 0.32175061742576533, "grad_norm": 0.41521987318992615, "learning_rate": 0.0001, "loss": 1.7274, "step": 2801 }, { "epoch": 0.32186548733559245, "grad_norm": 0.38131776452064514, "learning_rate": 0.0001, "loss": 1.5591, "step": 2802 }, { "epoch": 0.32198035724541957, "grad_norm": 0.4228755533695221, "learning_rate": 0.0001, "loss": 1.8683, "step": 2803 }, { "epoch": 0.3220952271552467, "grad_norm": 0.3989662230014801, "learning_rate": 0.0001, "loss": 1.8067, "step": 2804 }, { "epoch": 0.3222100970650738, "grad_norm": 0.3961024284362793, "learning_rate": 0.0001, "loss": 1.8281, "step": 2805 }, { "epoch": 0.32232496697490093, "grad_norm": 0.4035508930683136, "learning_rate": 0.0001, "loss": 1.7393, "step": 2806 }, { "epoch": 0.32243983688472805, "grad_norm": 0.38078513741493225, "learning_rate": 0.0001, "loss": 1.7108, "step": 2807 }, { "epoch": 0.32255470679455517, "grad_norm": 0.367631196975708, "learning_rate": 0.0001, "loss": 1.7122, "step": 2808 }, { "epoch": 0.3226695767043823, "grad_norm": 0.3607901632785797, "learning_rate": 0.0001, "loss": 1.5505, "step": 2809 }, { "epoch": 0.3227844466142094, "grad_norm": 0.3930343687534332, "learning_rate": 0.0001, "loss": 1.4403, "step": 2810 }, { "epoch": 0.32289931652403653, "grad_norm": 0.37640708684921265, "learning_rate": 0.0001, "loss": 1.798, "step": 2811 }, { "epoch": 0.32301418643386365, "grad_norm": 0.36390334367752075, "learning_rate": 0.0001, "loss": 1.5824, "step": 2812 }, { "epoch": 0.32312905634369077, "grad_norm": 0.3854324519634247, "learning_rate": 0.0001, "loss": 1.6426, "step": 2813 }, { "epoch": 0.3232439262535179, "grad_norm": 0.37264391779899597, "learning_rate": 0.0001, "loss": 1.7246, "step": 2814 }, { "epoch": 0.323358796163345, "grad_norm": 0.40931811928749084, "learning_rate": 0.0001, "loss": 1.6462, "step": 2815 }, { "epoch": 0.32347366607317213, "grad_norm": 0.36656174063682556, "learning_rate": 0.0001, "loss": 1.618, "step": 2816 }, { "epoch": 0.32358853598299925, "grad_norm": 0.3858596682548523, "learning_rate": 0.0001, "loss": 1.6036, "step": 2817 }, { "epoch": 0.32370340589282637, "grad_norm": 0.3784213066101074, "learning_rate": 0.0001, "loss": 1.8733, "step": 2818 }, { "epoch": 0.3238182758026535, "grad_norm": 0.3836335241794586, "learning_rate": 0.0001, "loss": 1.6904, "step": 2819 }, { "epoch": 0.3239331457124806, "grad_norm": 0.3633041977882385, "learning_rate": 0.0001, "loss": 1.7444, "step": 2820 }, { "epoch": 0.32404801562230773, "grad_norm": 0.3903411626815796, "learning_rate": 0.0001, "loss": 1.7132, "step": 2821 }, { "epoch": 0.32416288553213485, "grad_norm": 0.4233011305332184, "learning_rate": 0.0001, "loss": 1.6829, "step": 2822 }, { "epoch": 0.32427775544196197, "grad_norm": 0.4304129481315613, "learning_rate": 0.0001, "loss": 1.859, "step": 2823 }, { "epoch": 0.3243926253517891, "grad_norm": 0.37114959955215454, "learning_rate": 0.0001, "loss": 1.5548, "step": 2824 }, { "epoch": 0.3245074952616162, "grad_norm": 0.3852083683013916, "learning_rate": 0.0001, "loss": 1.6829, "step": 2825 }, { "epoch": 0.32462236517144333, "grad_norm": 0.3652872145175934, "learning_rate": 0.0001, "loss": 1.5912, "step": 2826 }, { "epoch": 0.32473723508127045, "grad_norm": 0.3811475932598114, "learning_rate": 0.0001, "loss": 1.6123, "step": 2827 }, { "epoch": 0.32485210499109757, "grad_norm": 0.3886179029941559, "learning_rate": 0.0001, "loss": 1.7562, "step": 2828 }, { "epoch": 0.3249669749009247, "grad_norm": 0.39535319805145264, "learning_rate": 0.0001, "loss": 1.5934, "step": 2829 }, { "epoch": 0.3250818448107518, "grad_norm": 0.40873974561691284, "learning_rate": 0.0001, "loss": 1.8607, "step": 2830 }, { "epoch": 0.3251967147205789, "grad_norm": 0.38622164726257324, "learning_rate": 0.0001, "loss": 1.6962, "step": 2831 }, { "epoch": 0.32531158463040605, "grad_norm": 0.42052188515663147, "learning_rate": 0.0001, "loss": 1.7905, "step": 2832 }, { "epoch": 0.32542645454023317, "grad_norm": 0.3537195026874542, "learning_rate": 0.0001, "loss": 1.5616, "step": 2833 }, { "epoch": 0.3255413244500603, "grad_norm": 0.3691607415676117, "learning_rate": 0.0001, "loss": 1.5968, "step": 2834 }, { "epoch": 0.3256561943598874, "grad_norm": 0.40789857506752014, "learning_rate": 0.0001, "loss": 1.9685, "step": 2835 }, { "epoch": 0.3257710642697145, "grad_norm": 0.3981241285800934, "learning_rate": 0.0001, "loss": 1.493, "step": 2836 }, { "epoch": 0.32588593417954165, "grad_norm": 0.36344999074935913, "learning_rate": 0.0001, "loss": 1.6628, "step": 2837 }, { "epoch": 0.32600080408936877, "grad_norm": 0.3493889272212982, "learning_rate": 0.0001, "loss": 1.4989, "step": 2838 }, { "epoch": 0.3261156739991959, "grad_norm": 0.39113399386405945, "learning_rate": 0.0001, "loss": 1.7783, "step": 2839 }, { "epoch": 0.326230543909023, "grad_norm": 0.3922522962093353, "learning_rate": 0.0001, "loss": 1.8986, "step": 2840 }, { "epoch": 0.3263454138188501, "grad_norm": 0.35591554641723633, "learning_rate": 0.0001, "loss": 1.6045, "step": 2841 }, { "epoch": 0.32646028372867725, "grad_norm": 0.38669490814208984, "learning_rate": 0.0001, "loss": 1.8257, "step": 2842 }, { "epoch": 0.32657515363850437, "grad_norm": 0.37732958793640137, "learning_rate": 0.0001, "loss": 1.7277, "step": 2843 }, { "epoch": 0.3266900235483315, "grad_norm": 0.3958292603492737, "learning_rate": 0.0001, "loss": 1.8254, "step": 2844 }, { "epoch": 0.32680489345815866, "grad_norm": 0.368966668844223, "learning_rate": 0.0001, "loss": 1.728, "step": 2845 }, { "epoch": 0.3269197633679858, "grad_norm": 0.3690721094608307, "learning_rate": 0.0001, "loss": 1.6272, "step": 2846 }, { "epoch": 0.3270346332778129, "grad_norm": 0.37405163049697876, "learning_rate": 0.0001, "loss": 1.6248, "step": 2847 }, { "epoch": 0.32714950318764, "grad_norm": 0.43797191977500916, "learning_rate": 0.0001, "loss": 1.9247, "step": 2848 }, { "epoch": 0.32726437309746714, "grad_norm": 0.38513773679733276, "learning_rate": 0.0001, "loss": 1.7059, "step": 2849 }, { "epoch": 0.32737924300729426, "grad_norm": 0.36473625898361206, "learning_rate": 0.0001, "loss": 1.5298, "step": 2850 }, { "epoch": 0.3274941129171214, "grad_norm": 0.38011401891708374, "learning_rate": 0.0001, "loss": 1.6667, "step": 2851 }, { "epoch": 0.3276089828269485, "grad_norm": 0.3875674605369568, "learning_rate": 0.0001, "loss": 1.8539, "step": 2852 }, { "epoch": 0.3277238527367756, "grad_norm": 0.4060609042644501, "learning_rate": 0.0001, "loss": 1.6604, "step": 2853 }, { "epoch": 0.32783872264660274, "grad_norm": 0.38478556275367737, "learning_rate": 0.0001, "loss": 1.8449, "step": 2854 }, { "epoch": 0.32795359255642986, "grad_norm": 0.414813756942749, "learning_rate": 0.0001, "loss": 1.7037, "step": 2855 }, { "epoch": 0.328068462466257, "grad_norm": 0.38957443833351135, "learning_rate": 0.0001, "loss": 1.8987, "step": 2856 }, { "epoch": 0.3281833323760841, "grad_norm": 0.3751903772354126, "learning_rate": 0.0001, "loss": 1.7977, "step": 2857 }, { "epoch": 0.3282982022859112, "grad_norm": 0.3669251799583435, "learning_rate": 0.0001, "loss": 1.6621, "step": 2858 }, { "epoch": 0.32841307219573834, "grad_norm": 0.36348956823349, "learning_rate": 0.0001, "loss": 1.7146, "step": 2859 }, { "epoch": 0.32852794210556546, "grad_norm": 0.35992637276649475, "learning_rate": 0.0001, "loss": 1.4548, "step": 2860 }, { "epoch": 0.3286428120153926, "grad_norm": 0.3643839955329895, "learning_rate": 0.0001, "loss": 1.7187, "step": 2861 }, { "epoch": 0.3287576819252197, "grad_norm": 0.40732714533805847, "learning_rate": 0.0001, "loss": 1.8751, "step": 2862 }, { "epoch": 0.3288725518350468, "grad_norm": 0.38161808252334595, "learning_rate": 0.0001, "loss": 1.7558, "step": 2863 }, { "epoch": 0.32898742174487394, "grad_norm": 0.40559515357017517, "learning_rate": 0.0001, "loss": 1.6177, "step": 2864 }, { "epoch": 0.32910229165470106, "grad_norm": 0.3841257393360138, "learning_rate": 0.0001, "loss": 1.5836, "step": 2865 }, { "epoch": 0.3292171615645282, "grad_norm": 0.36284148693084717, "learning_rate": 0.0001, "loss": 1.4342, "step": 2866 }, { "epoch": 0.3293320314743553, "grad_norm": 0.3603561818599701, "learning_rate": 0.0001, "loss": 1.6746, "step": 2867 }, { "epoch": 0.3294469013841824, "grad_norm": 0.39351412653923035, "learning_rate": 0.0001, "loss": 1.7173, "step": 2868 }, { "epoch": 0.32956177129400954, "grad_norm": 0.408401757478714, "learning_rate": 0.0001, "loss": 1.7423, "step": 2869 }, { "epoch": 0.32967664120383666, "grad_norm": 0.37314414978027344, "learning_rate": 0.0001, "loss": 1.5158, "step": 2870 }, { "epoch": 0.3297915111136638, "grad_norm": 0.369933545589447, "learning_rate": 0.0001, "loss": 1.5922, "step": 2871 }, { "epoch": 0.3299063810234909, "grad_norm": 0.4098500907421112, "learning_rate": 0.0001, "loss": 1.8109, "step": 2872 }, { "epoch": 0.330021250933318, "grad_norm": 0.40561625361442566, "learning_rate": 0.0001, "loss": 1.6972, "step": 2873 }, { "epoch": 0.33013612084314514, "grad_norm": 0.3818763792514801, "learning_rate": 0.0001, "loss": 1.6462, "step": 2874 }, { "epoch": 0.33025099075297226, "grad_norm": 0.4518624544143677, "learning_rate": 0.0001, "loss": 1.6921, "step": 2875 }, { "epoch": 0.3303658606627994, "grad_norm": 0.3883214294910431, "learning_rate": 0.0001, "loss": 1.7215, "step": 2876 }, { "epoch": 0.3304807305726265, "grad_norm": 0.4087526500225067, "learning_rate": 0.0001, "loss": 1.919, "step": 2877 }, { "epoch": 0.3305956004824536, "grad_norm": 0.37639445066452026, "learning_rate": 0.0001, "loss": 1.7867, "step": 2878 }, { "epoch": 0.33071047039228074, "grad_norm": 0.42621272802352905, "learning_rate": 0.0001, "loss": 1.5689, "step": 2879 }, { "epoch": 0.33082534030210786, "grad_norm": 0.36311256885528564, "learning_rate": 0.0001, "loss": 1.7286, "step": 2880 }, { "epoch": 0.330940210211935, "grad_norm": 0.35729482769966125, "learning_rate": 0.0001, "loss": 1.6164, "step": 2881 }, { "epoch": 0.3310550801217621, "grad_norm": 0.3519277274608612, "learning_rate": 0.0001, "loss": 1.4828, "step": 2882 }, { "epoch": 0.3311699500315892, "grad_norm": 0.40030479431152344, "learning_rate": 0.0001, "loss": 1.8654, "step": 2883 }, { "epoch": 0.33128481994141634, "grad_norm": 0.3615962862968445, "learning_rate": 0.0001, "loss": 1.5445, "step": 2884 }, { "epoch": 0.33139968985124346, "grad_norm": 0.38331496715545654, "learning_rate": 0.0001, "loss": 1.7308, "step": 2885 }, { "epoch": 0.3315145597610706, "grad_norm": 0.3735135793685913, "learning_rate": 0.0001, "loss": 1.4669, "step": 2886 }, { "epoch": 0.3316294296708977, "grad_norm": 0.39380258321762085, "learning_rate": 0.0001, "loss": 1.5611, "step": 2887 }, { "epoch": 0.3317442995807248, "grad_norm": 0.4285888373851776, "learning_rate": 0.0001, "loss": 1.5816, "step": 2888 }, { "epoch": 0.33185916949055194, "grad_norm": 0.36404624581336975, "learning_rate": 0.0001, "loss": 1.5868, "step": 2889 }, { "epoch": 0.33197403940037906, "grad_norm": 0.42275553941726685, "learning_rate": 0.0001, "loss": 1.6401, "step": 2890 }, { "epoch": 0.3320889093102062, "grad_norm": 0.37147074937820435, "learning_rate": 0.0001, "loss": 1.5029, "step": 2891 }, { "epoch": 0.3322037792200333, "grad_norm": 0.3962612450122833, "learning_rate": 0.0001, "loss": 1.8306, "step": 2892 }, { "epoch": 0.3323186491298604, "grad_norm": 0.36973974108695984, "learning_rate": 0.0001, "loss": 1.6596, "step": 2893 }, { "epoch": 0.33243351903968754, "grad_norm": 0.3744032680988312, "learning_rate": 0.0001, "loss": 1.6306, "step": 2894 }, { "epoch": 0.33254838894951466, "grad_norm": 0.3863425552845001, "learning_rate": 0.0001, "loss": 1.5361, "step": 2895 }, { "epoch": 0.3326632588593418, "grad_norm": 0.3814421594142914, "learning_rate": 0.0001, "loss": 1.6219, "step": 2896 }, { "epoch": 0.3327781287691689, "grad_norm": 0.3841194808483124, "learning_rate": 0.0001, "loss": 1.7178, "step": 2897 }, { "epoch": 0.332892998678996, "grad_norm": 0.3847556412220001, "learning_rate": 0.0001, "loss": 1.6418, "step": 2898 }, { "epoch": 0.33300786858882314, "grad_norm": 0.38841572403907776, "learning_rate": 0.0001, "loss": 1.8005, "step": 2899 }, { "epoch": 0.33312273849865026, "grad_norm": 0.41473743319511414, "learning_rate": 0.0001, "loss": 1.6702, "step": 2900 }, { "epoch": 0.3332376084084774, "grad_norm": 0.37773972749710083, "learning_rate": 0.0001, "loss": 1.5637, "step": 2901 }, { "epoch": 0.3333524783183045, "grad_norm": 0.38093826174736023, "learning_rate": 0.0001, "loss": 1.7681, "step": 2902 }, { "epoch": 0.3334673482281316, "grad_norm": 0.39234668016433716, "learning_rate": 0.0001, "loss": 1.7385, "step": 2903 }, { "epoch": 0.33358221813795874, "grad_norm": 0.4004804790019989, "learning_rate": 0.0001, "loss": 1.7519, "step": 2904 }, { "epoch": 0.33369708804778586, "grad_norm": 0.35611221194267273, "learning_rate": 0.0001, "loss": 1.5161, "step": 2905 }, { "epoch": 0.333811957957613, "grad_norm": 0.383696585893631, "learning_rate": 0.0001, "loss": 1.7001, "step": 2906 }, { "epoch": 0.3339268278674401, "grad_norm": 0.3913770020008087, "learning_rate": 0.0001, "loss": 1.7143, "step": 2907 }, { "epoch": 0.3340416977772672, "grad_norm": 0.38301557302474976, "learning_rate": 0.0001, "loss": 1.649, "step": 2908 }, { "epoch": 0.33415656768709434, "grad_norm": 0.3856869041919708, "learning_rate": 0.0001, "loss": 1.7601, "step": 2909 }, { "epoch": 0.33427143759692146, "grad_norm": 0.3839951753616333, "learning_rate": 0.0001, "loss": 1.8057, "step": 2910 }, { "epoch": 0.3343863075067486, "grad_norm": 0.38269492983818054, "learning_rate": 0.0001, "loss": 1.5389, "step": 2911 }, { "epoch": 0.3345011774165757, "grad_norm": 0.3791959583759308, "learning_rate": 0.0001, "loss": 1.7275, "step": 2912 }, { "epoch": 0.3346160473264028, "grad_norm": 0.36112257838249207, "learning_rate": 0.0001, "loss": 1.6216, "step": 2913 }, { "epoch": 0.33473091723623, "grad_norm": 0.3655812740325928, "learning_rate": 0.0001, "loss": 1.4784, "step": 2914 }, { "epoch": 0.3348457871460571, "grad_norm": 0.3751130998134613, "learning_rate": 0.0001, "loss": 1.7072, "step": 2915 }, { "epoch": 0.33496065705588424, "grad_norm": 0.3733077049255371, "learning_rate": 0.0001, "loss": 1.8166, "step": 2916 }, { "epoch": 0.33507552696571136, "grad_norm": 0.3870159983634949, "learning_rate": 0.0001, "loss": 1.6662, "step": 2917 }, { "epoch": 0.3351903968755385, "grad_norm": 0.3635254204273224, "learning_rate": 0.0001, "loss": 1.4773, "step": 2918 }, { "epoch": 0.3353052667853656, "grad_norm": 0.3966655135154724, "learning_rate": 0.0001, "loss": 1.4219, "step": 2919 }, { "epoch": 0.3354201366951927, "grad_norm": 0.3748622536659241, "learning_rate": 0.0001, "loss": 1.5614, "step": 2920 }, { "epoch": 0.33553500660501984, "grad_norm": 0.38931792974472046, "learning_rate": 0.0001, "loss": 1.6788, "step": 2921 }, { "epoch": 0.33564987651484696, "grad_norm": 0.4402804970741272, "learning_rate": 0.0001, "loss": 1.5002, "step": 2922 }, { "epoch": 0.3357647464246741, "grad_norm": 0.3441646099090576, "learning_rate": 0.0001, "loss": 1.5717, "step": 2923 }, { "epoch": 0.3358796163345012, "grad_norm": 0.39570891857147217, "learning_rate": 0.0001, "loss": 1.714, "step": 2924 }, { "epoch": 0.3359944862443283, "grad_norm": 0.39201679825782776, "learning_rate": 0.0001, "loss": 1.7896, "step": 2925 }, { "epoch": 0.33610935615415544, "grad_norm": 0.3926868140697479, "learning_rate": 0.0001, "loss": 1.6262, "step": 2926 }, { "epoch": 0.33622422606398256, "grad_norm": 0.3830588757991791, "learning_rate": 0.0001, "loss": 1.6365, "step": 2927 }, { "epoch": 0.3363390959738097, "grad_norm": 0.3714669346809387, "learning_rate": 0.0001, "loss": 1.6564, "step": 2928 }, { "epoch": 0.3364539658836368, "grad_norm": 0.4032626748085022, "learning_rate": 0.0001, "loss": 1.7987, "step": 2929 }, { "epoch": 0.3365688357934639, "grad_norm": 0.40316057205200195, "learning_rate": 0.0001, "loss": 1.6721, "step": 2930 }, { "epoch": 0.33668370570329104, "grad_norm": 0.4364853799343109, "learning_rate": 0.0001, "loss": 1.5832, "step": 2931 }, { "epoch": 0.33679857561311816, "grad_norm": 0.3844012916088104, "learning_rate": 0.0001, "loss": 1.687, "step": 2932 }, { "epoch": 0.3369134455229453, "grad_norm": 0.3774738311767578, "learning_rate": 0.0001, "loss": 1.7877, "step": 2933 }, { "epoch": 0.3370283154327724, "grad_norm": 0.4184546172618866, "learning_rate": 0.0001, "loss": 1.8513, "step": 2934 }, { "epoch": 0.3371431853425995, "grad_norm": 0.3983631432056427, "learning_rate": 0.0001, "loss": 1.7087, "step": 2935 }, { "epoch": 0.33725805525242664, "grad_norm": 0.356240451335907, "learning_rate": 0.0001, "loss": 1.5994, "step": 2936 }, { "epoch": 0.33737292516225376, "grad_norm": 0.3877936601638794, "learning_rate": 0.0001, "loss": 1.6252, "step": 2937 }, { "epoch": 0.3374877950720809, "grad_norm": 0.3945756256580353, "learning_rate": 0.0001, "loss": 1.7137, "step": 2938 }, { "epoch": 0.337602664981908, "grad_norm": 0.3544231355190277, "learning_rate": 0.0001, "loss": 1.3307, "step": 2939 }, { "epoch": 0.3377175348917351, "grad_norm": 0.3833335041999817, "learning_rate": 0.0001, "loss": 1.7065, "step": 2940 }, { "epoch": 0.33783240480156224, "grad_norm": 0.3731600046157837, "learning_rate": 0.0001, "loss": 1.7736, "step": 2941 }, { "epoch": 0.33794727471138936, "grad_norm": 0.4063700735569, "learning_rate": 0.0001, "loss": 1.6827, "step": 2942 }, { "epoch": 0.3380621446212165, "grad_norm": 0.4960021674633026, "learning_rate": 0.0001, "loss": 1.9989, "step": 2943 }, { "epoch": 0.3381770145310436, "grad_norm": 0.4238811433315277, "learning_rate": 0.0001, "loss": 1.7855, "step": 2944 }, { "epoch": 0.3382918844408707, "grad_norm": 0.4114185571670532, "learning_rate": 0.0001, "loss": 1.8296, "step": 2945 }, { "epoch": 0.33840675435069784, "grad_norm": 0.40994930267333984, "learning_rate": 0.0001, "loss": 1.8185, "step": 2946 }, { "epoch": 0.33852162426052496, "grad_norm": 0.39755189418792725, "learning_rate": 0.0001, "loss": 1.7911, "step": 2947 }, { "epoch": 0.3386364941703521, "grad_norm": 0.39836958050727844, "learning_rate": 0.0001, "loss": 1.6468, "step": 2948 }, { "epoch": 0.3387513640801792, "grad_norm": 0.3699915409088135, "learning_rate": 0.0001, "loss": 1.603, "step": 2949 }, { "epoch": 0.3388662339900063, "grad_norm": 0.42995521426200867, "learning_rate": 0.0001, "loss": 1.852, "step": 2950 }, { "epoch": 0.33898110389983344, "grad_norm": 0.398151695728302, "learning_rate": 0.0001, "loss": 1.8414, "step": 2951 }, { "epoch": 0.33909597380966056, "grad_norm": 0.36557191610336304, "learning_rate": 0.0001, "loss": 1.5987, "step": 2952 }, { "epoch": 0.3392108437194877, "grad_norm": 0.3784855008125305, "learning_rate": 0.0001, "loss": 1.6334, "step": 2953 }, { "epoch": 0.3393257136293148, "grad_norm": 0.40273427963256836, "learning_rate": 0.0001, "loss": 1.7353, "step": 2954 }, { "epoch": 0.3394405835391419, "grad_norm": 0.37005481123924255, "learning_rate": 0.0001, "loss": 1.6173, "step": 2955 }, { "epoch": 0.33955545344896904, "grad_norm": 0.3850763142108917, "learning_rate": 0.0001, "loss": 1.7957, "step": 2956 }, { "epoch": 0.33967032335879616, "grad_norm": 0.3557315170764923, "learning_rate": 0.0001, "loss": 1.6479, "step": 2957 }, { "epoch": 0.3397851932686233, "grad_norm": 0.44396260380744934, "learning_rate": 0.0001, "loss": 1.6434, "step": 2958 }, { "epoch": 0.3399000631784504, "grad_norm": 0.3347325325012207, "learning_rate": 0.0001, "loss": 1.3493, "step": 2959 }, { "epoch": 0.3400149330882775, "grad_norm": 0.3799315094947815, "learning_rate": 0.0001, "loss": 1.698, "step": 2960 }, { "epoch": 0.34012980299810464, "grad_norm": 0.3979965150356293, "learning_rate": 0.0001, "loss": 1.6106, "step": 2961 }, { "epoch": 0.34024467290793176, "grad_norm": 0.3687105178833008, "learning_rate": 0.0001, "loss": 1.6298, "step": 2962 }, { "epoch": 0.3403595428177589, "grad_norm": 0.3896116018295288, "learning_rate": 0.0001, "loss": 1.6547, "step": 2963 }, { "epoch": 0.340474412727586, "grad_norm": 0.38803455233573914, "learning_rate": 0.0001, "loss": 1.7099, "step": 2964 }, { "epoch": 0.3405892826374131, "grad_norm": 0.37791207432746887, "learning_rate": 0.0001, "loss": 1.4235, "step": 2965 }, { "epoch": 0.34070415254724024, "grad_norm": 0.3701097071170807, "learning_rate": 0.0001, "loss": 1.7074, "step": 2966 }, { "epoch": 0.34081902245706736, "grad_norm": 0.3952276408672333, "learning_rate": 0.0001, "loss": 1.7784, "step": 2967 }, { "epoch": 0.3409338923668945, "grad_norm": 0.6198942065238953, "learning_rate": 0.0001, "loss": 1.4752, "step": 2968 }, { "epoch": 0.3410487622767216, "grad_norm": 0.39042800664901733, "learning_rate": 0.0001, "loss": 1.715, "step": 2969 }, { "epoch": 0.3411636321865487, "grad_norm": 0.39784181118011475, "learning_rate": 0.0001, "loss": 1.7825, "step": 2970 }, { "epoch": 0.34127850209637584, "grad_norm": 0.42527255415916443, "learning_rate": 0.0001, "loss": 1.6846, "step": 2971 }, { "epoch": 0.34139337200620296, "grad_norm": 0.35908040404319763, "learning_rate": 0.0001, "loss": 1.5091, "step": 2972 }, { "epoch": 0.3415082419160301, "grad_norm": 0.3641200661659241, "learning_rate": 0.0001, "loss": 1.6271, "step": 2973 }, { "epoch": 0.3416231118258572, "grad_norm": 0.38720008730888367, "learning_rate": 0.0001, "loss": 1.6965, "step": 2974 }, { "epoch": 0.3417379817356843, "grad_norm": 0.40444648265838623, "learning_rate": 0.0001, "loss": 1.9605, "step": 2975 }, { "epoch": 0.34185285164551144, "grad_norm": 0.3607354462146759, "learning_rate": 0.0001, "loss": 1.537, "step": 2976 }, { "epoch": 0.34196772155533856, "grad_norm": 0.3962652087211609, "learning_rate": 0.0001, "loss": 1.85, "step": 2977 }, { "epoch": 0.3420825914651657, "grad_norm": 0.36956319212913513, "learning_rate": 0.0001, "loss": 1.6285, "step": 2978 }, { "epoch": 0.3421974613749928, "grad_norm": 0.39066120982170105, "learning_rate": 0.0001, "loss": 1.7077, "step": 2979 }, { "epoch": 0.3423123312848199, "grad_norm": 0.3526730537414551, "learning_rate": 0.0001, "loss": 1.5309, "step": 2980 }, { "epoch": 0.34242720119464704, "grad_norm": 0.39420273900032043, "learning_rate": 0.0001, "loss": 1.7418, "step": 2981 }, { "epoch": 0.3425420711044742, "grad_norm": 0.3418049216270447, "learning_rate": 0.0001, "loss": 1.3557, "step": 2982 }, { "epoch": 0.34265694101430133, "grad_norm": 0.4102267920970917, "learning_rate": 0.0001, "loss": 1.8232, "step": 2983 }, { "epoch": 0.34277181092412845, "grad_norm": 0.3934805691242218, "learning_rate": 0.0001, "loss": 1.7044, "step": 2984 }, { "epoch": 0.34288668083395557, "grad_norm": 0.3770129978656769, "learning_rate": 0.0001, "loss": 1.4916, "step": 2985 }, { "epoch": 0.3430015507437827, "grad_norm": 0.39099806547164917, "learning_rate": 0.0001, "loss": 1.6679, "step": 2986 }, { "epoch": 0.3431164206536098, "grad_norm": 0.36751532554626465, "learning_rate": 0.0001, "loss": 1.7394, "step": 2987 }, { "epoch": 0.34323129056343693, "grad_norm": 0.3834240734577179, "learning_rate": 0.0001, "loss": 1.5544, "step": 2988 }, { "epoch": 0.34334616047326405, "grad_norm": 0.3922926187515259, "learning_rate": 0.0001, "loss": 1.6312, "step": 2989 }, { "epoch": 0.34346103038309117, "grad_norm": 0.38694506883621216, "learning_rate": 0.0001, "loss": 1.81, "step": 2990 }, { "epoch": 0.3435759002929183, "grad_norm": 0.3988105356693268, "learning_rate": 0.0001, "loss": 1.6714, "step": 2991 }, { "epoch": 0.3436907702027454, "grad_norm": 0.42653176188468933, "learning_rate": 0.0001, "loss": 1.8591, "step": 2992 }, { "epoch": 0.34380564011257253, "grad_norm": 0.4091017246246338, "learning_rate": 0.0001, "loss": 1.6663, "step": 2993 }, { "epoch": 0.34392051002239965, "grad_norm": 0.4520750641822815, "learning_rate": 0.0001, "loss": 1.8694, "step": 2994 }, { "epoch": 0.34403537993222677, "grad_norm": 0.38830214738845825, "learning_rate": 0.0001, "loss": 1.7041, "step": 2995 }, { "epoch": 0.3441502498420539, "grad_norm": 0.40523776412010193, "learning_rate": 0.0001, "loss": 1.7478, "step": 2996 }, { "epoch": 0.344265119751881, "grad_norm": 0.3667933940887451, "learning_rate": 0.0001, "loss": 1.6113, "step": 2997 }, { "epoch": 0.34437998966170813, "grad_norm": 0.43613407015800476, "learning_rate": 0.0001, "loss": 1.7168, "step": 2998 }, { "epoch": 0.34449485957153525, "grad_norm": 0.38262253999710083, "learning_rate": 0.0001, "loss": 1.5915, "step": 2999 }, { "epoch": 0.34460972948136237, "grad_norm": 0.39579108357429504, "learning_rate": 0.0001, "loss": 1.658, "step": 3000 }, { "epoch": 0.3447245993911895, "grad_norm": 0.38654452562332153, "learning_rate": 0.0001, "loss": 1.3892, "step": 3001 }, { "epoch": 0.3448394693010166, "grad_norm": 0.35805824398994446, "learning_rate": 0.0001, "loss": 1.4611, "step": 3002 }, { "epoch": 0.34495433921084373, "grad_norm": 0.3665701448917389, "learning_rate": 0.0001, "loss": 1.5227, "step": 3003 }, { "epoch": 0.34506920912067085, "grad_norm": 0.40592774748802185, "learning_rate": 0.0001, "loss": 1.5783, "step": 3004 }, { "epoch": 0.34518407903049797, "grad_norm": 0.3932124376296997, "learning_rate": 0.0001, "loss": 1.6414, "step": 3005 }, { "epoch": 0.3452989489403251, "grad_norm": 0.42479029297828674, "learning_rate": 0.0001, "loss": 1.6137, "step": 3006 }, { "epoch": 0.3454138188501522, "grad_norm": 0.43539106845855713, "learning_rate": 0.0001, "loss": 1.7239, "step": 3007 }, { "epoch": 0.34552868875997933, "grad_norm": 0.40625861287117004, "learning_rate": 0.0001, "loss": 1.9526, "step": 3008 }, { "epoch": 0.34564355866980645, "grad_norm": 0.3962743282318115, "learning_rate": 0.0001, "loss": 1.6972, "step": 3009 }, { "epoch": 0.34575842857963357, "grad_norm": 0.37623900175094604, "learning_rate": 0.0001, "loss": 1.5136, "step": 3010 }, { "epoch": 0.3458732984894607, "grad_norm": 0.3827407658100128, "learning_rate": 0.0001, "loss": 1.6511, "step": 3011 }, { "epoch": 0.3459881683992878, "grad_norm": 0.3819064795970917, "learning_rate": 0.0001, "loss": 1.6127, "step": 3012 }, { "epoch": 0.34610303830911493, "grad_norm": 0.38156652450561523, "learning_rate": 0.0001, "loss": 1.5852, "step": 3013 }, { "epoch": 0.34621790821894205, "grad_norm": 0.42006298899650574, "learning_rate": 0.0001, "loss": 1.8345, "step": 3014 }, { "epoch": 0.34633277812876917, "grad_norm": 0.3623389005661011, "learning_rate": 0.0001, "loss": 1.6146, "step": 3015 }, { "epoch": 0.3464476480385963, "grad_norm": 0.39460188150405884, "learning_rate": 0.0001, "loss": 1.8264, "step": 3016 }, { "epoch": 0.3465625179484234, "grad_norm": 0.40314847230911255, "learning_rate": 0.0001, "loss": 1.646, "step": 3017 }, { "epoch": 0.34667738785825053, "grad_norm": 0.37284019589424133, "learning_rate": 0.0001, "loss": 1.6055, "step": 3018 }, { "epoch": 0.34679225776807765, "grad_norm": 0.4431118071079254, "learning_rate": 0.0001, "loss": 1.9377, "step": 3019 }, { "epoch": 0.34690712767790477, "grad_norm": 0.3849484622478485, "learning_rate": 0.0001, "loss": 1.6559, "step": 3020 }, { "epoch": 0.3470219975877319, "grad_norm": 0.3909108340740204, "learning_rate": 0.0001, "loss": 1.8541, "step": 3021 }, { "epoch": 0.347136867497559, "grad_norm": 0.38009119033813477, "learning_rate": 0.0001, "loss": 1.7868, "step": 3022 }, { "epoch": 0.34725173740738613, "grad_norm": 0.3676866292953491, "learning_rate": 0.0001, "loss": 1.5971, "step": 3023 }, { "epoch": 0.34736660731721325, "grad_norm": 0.3783824145793915, "learning_rate": 0.0001, "loss": 1.6714, "step": 3024 }, { "epoch": 0.34748147722704037, "grad_norm": 0.3574279546737671, "learning_rate": 0.0001, "loss": 1.4742, "step": 3025 }, { "epoch": 0.3475963471368675, "grad_norm": 0.3630661070346832, "learning_rate": 0.0001, "loss": 1.3814, "step": 3026 }, { "epoch": 0.3477112170466946, "grad_norm": 0.37314149737358093, "learning_rate": 0.0001, "loss": 1.6602, "step": 3027 }, { "epoch": 0.34782608695652173, "grad_norm": 0.38733503222465515, "learning_rate": 0.0001, "loss": 1.5787, "step": 3028 }, { "epoch": 0.34794095686634885, "grad_norm": 0.42135870456695557, "learning_rate": 0.0001, "loss": 1.9024, "step": 3029 }, { "epoch": 0.34805582677617597, "grad_norm": 0.40700820088386536, "learning_rate": 0.0001, "loss": 1.5771, "step": 3030 }, { "epoch": 0.3481706966860031, "grad_norm": 0.389607310295105, "learning_rate": 0.0001, "loss": 1.751, "step": 3031 }, { "epoch": 0.3482855665958302, "grad_norm": 0.39443865418434143, "learning_rate": 0.0001, "loss": 1.4554, "step": 3032 }, { "epoch": 0.34840043650565733, "grad_norm": 0.4073096513748169, "learning_rate": 0.0001, "loss": 1.7826, "step": 3033 }, { "epoch": 0.34851530641548445, "grad_norm": 0.38107818365097046, "learning_rate": 0.0001, "loss": 1.7504, "step": 3034 }, { "epoch": 0.34863017632531157, "grad_norm": 0.39980536699295044, "learning_rate": 0.0001, "loss": 1.7422, "step": 3035 }, { "epoch": 0.3487450462351387, "grad_norm": 0.3910979628562927, "learning_rate": 0.0001, "loss": 1.7604, "step": 3036 }, { "epoch": 0.3488599161449658, "grad_norm": 0.3955102562904358, "learning_rate": 0.0001, "loss": 1.5553, "step": 3037 }, { "epoch": 0.34897478605479293, "grad_norm": 0.3812708258628845, "learning_rate": 0.0001, "loss": 1.6518, "step": 3038 }, { "epoch": 0.34908965596462005, "grad_norm": 0.402920126914978, "learning_rate": 0.0001, "loss": 1.6048, "step": 3039 }, { "epoch": 0.34920452587444717, "grad_norm": 0.3769501745700836, "learning_rate": 0.0001, "loss": 1.6784, "step": 3040 }, { "epoch": 0.3493193957842743, "grad_norm": 0.3954136371612549, "learning_rate": 0.0001, "loss": 1.7905, "step": 3041 }, { "epoch": 0.3494342656941014, "grad_norm": 0.36639055609703064, "learning_rate": 0.0001, "loss": 1.625, "step": 3042 }, { "epoch": 0.34954913560392853, "grad_norm": 0.42278578877449036, "learning_rate": 0.0001, "loss": 1.9002, "step": 3043 }, { "epoch": 0.34966400551375565, "grad_norm": 0.37817153334617615, "learning_rate": 0.0001, "loss": 1.4365, "step": 3044 }, { "epoch": 0.34977887542358277, "grad_norm": 0.3958953320980072, "learning_rate": 0.0001, "loss": 1.8305, "step": 3045 }, { "epoch": 0.3498937453334099, "grad_norm": 0.3798516094684601, "learning_rate": 0.0001, "loss": 1.6299, "step": 3046 }, { "epoch": 0.350008615243237, "grad_norm": 0.37097060680389404, "learning_rate": 0.0001, "loss": 1.4808, "step": 3047 }, { "epoch": 0.35012348515306413, "grad_norm": 0.3970964848995209, "learning_rate": 0.0001, "loss": 1.5711, "step": 3048 }, { "epoch": 0.35023835506289125, "grad_norm": 0.3985350728034973, "learning_rate": 0.0001, "loss": 1.9372, "step": 3049 }, { "epoch": 0.35035322497271837, "grad_norm": 0.3665081262588501, "learning_rate": 0.0001, "loss": 1.4447, "step": 3050 }, { "epoch": 0.35046809488254554, "grad_norm": 0.3866146504878998, "learning_rate": 0.0001, "loss": 1.6509, "step": 3051 }, { "epoch": 0.35058296479237266, "grad_norm": 0.37819328904151917, "learning_rate": 0.0001, "loss": 1.6386, "step": 3052 }, { "epoch": 0.3506978347021998, "grad_norm": 0.3822932243347168, "learning_rate": 0.0001, "loss": 1.6757, "step": 3053 }, { "epoch": 0.3508127046120269, "grad_norm": 0.3891104757785797, "learning_rate": 0.0001, "loss": 1.6485, "step": 3054 }, { "epoch": 0.350927574521854, "grad_norm": 0.40191301703453064, "learning_rate": 0.0001, "loss": 1.8016, "step": 3055 }, { "epoch": 0.35104244443168114, "grad_norm": 0.40188851952552795, "learning_rate": 0.0001, "loss": 1.7198, "step": 3056 }, { "epoch": 0.35115731434150826, "grad_norm": 0.3913547694683075, "learning_rate": 0.0001, "loss": 1.5879, "step": 3057 }, { "epoch": 0.3512721842513354, "grad_norm": 0.410559743642807, "learning_rate": 0.0001, "loss": 1.6572, "step": 3058 }, { "epoch": 0.3513870541611625, "grad_norm": 0.3729799687862396, "learning_rate": 0.0001, "loss": 1.6277, "step": 3059 }, { "epoch": 0.3515019240709896, "grad_norm": 0.39681947231292725, "learning_rate": 0.0001, "loss": 1.7177, "step": 3060 }, { "epoch": 0.35161679398081674, "grad_norm": 0.38647258281707764, "learning_rate": 0.0001, "loss": 1.8458, "step": 3061 }, { "epoch": 0.35173166389064386, "grad_norm": 0.3754447102546692, "learning_rate": 0.0001, "loss": 1.78, "step": 3062 }, { "epoch": 0.351846533800471, "grad_norm": 0.35854676365852356, "learning_rate": 0.0001, "loss": 1.4786, "step": 3063 }, { "epoch": 0.3519614037102981, "grad_norm": 0.3889663815498352, "learning_rate": 0.0001, "loss": 1.8046, "step": 3064 }, { "epoch": 0.3520762736201252, "grad_norm": 0.36157429218292236, "learning_rate": 0.0001, "loss": 1.3026, "step": 3065 }, { "epoch": 0.35219114352995234, "grad_norm": 0.44542935490608215, "learning_rate": 0.0001, "loss": 1.6889, "step": 3066 }, { "epoch": 0.35230601343977946, "grad_norm": 0.3814290463924408, "learning_rate": 0.0001, "loss": 1.5027, "step": 3067 }, { "epoch": 0.3524208833496066, "grad_norm": 0.36910781264305115, "learning_rate": 0.0001, "loss": 1.5745, "step": 3068 }, { "epoch": 0.3525357532594337, "grad_norm": 0.4063052535057068, "learning_rate": 0.0001, "loss": 1.6937, "step": 3069 }, { "epoch": 0.3526506231692608, "grad_norm": 0.3729820251464844, "learning_rate": 0.0001, "loss": 1.5685, "step": 3070 }, { "epoch": 0.35276549307908794, "grad_norm": 0.3980967700481415, "learning_rate": 0.0001, "loss": 1.6192, "step": 3071 }, { "epoch": 0.35288036298891506, "grad_norm": 0.3677471876144409, "learning_rate": 0.0001, "loss": 1.555, "step": 3072 }, { "epoch": 0.3529952328987422, "grad_norm": 0.38200077414512634, "learning_rate": 0.0001, "loss": 1.6235, "step": 3073 }, { "epoch": 0.3531101028085693, "grad_norm": 0.37423157691955566, "learning_rate": 0.0001, "loss": 1.5642, "step": 3074 }, { "epoch": 0.3532249727183964, "grad_norm": 0.37253043055534363, "learning_rate": 0.0001, "loss": 1.6977, "step": 3075 }, { "epoch": 0.35333984262822354, "grad_norm": 0.3927091360092163, "learning_rate": 0.0001, "loss": 1.7231, "step": 3076 }, { "epoch": 0.35345471253805066, "grad_norm": 0.36988648772239685, "learning_rate": 0.0001, "loss": 1.506, "step": 3077 }, { "epoch": 0.3535695824478778, "grad_norm": 0.38558465242385864, "learning_rate": 0.0001, "loss": 1.7394, "step": 3078 }, { "epoch": 0.3536844523577049, "grad_norm": 0.44665199518203735, "learning_rate": 0.0001, "loss": 1.6499, "step": 3079 }, { "epoch": 0.353799322267532, "grad_norm": 0.3623279929161072, "learning_rate": 0.0001, "loss": 1.4641, "step": 3080 }, { "epoch": 0.35391419217735914, "grad_norm": 0.3726842701435089, "learning_rate": 0.0001, "loss": 1.5952, "step": 3081 }, { "epoch": 0.35402906208718626, "grad_norm": 0.3852822184562683, "learning_rate": 0.0001, "loss": 1.7239, "step": 3082 }, { "epoch": 0.3541439319970134, "grad_norm": 0.3527339994907379, "learning_rate": 0.0001, "loss": 1.5354, "step": 3083 }, { "epoch": 0.3542588019068405, "grad_norm": 0.3964180052280426, "learning_rate": 0.0001, "loss": 1.6671, "step": 3084 }, { "epoch": 0.3543736718166676, "grad_norm": 0.37189149856567383, "learning_rate": 0.0001, "loss": 1.6399, "step": 3085 }, { "epoch": 0.35448854172649474, "grad_norm": 0.3664330840110779, "learning_rate": 0.0001, "loss": 1.4749, "step": 3086 }, { "epoch": 0.35460341163632186, "grad_norm": 0.43963682651519775, "learning_rate": 0.0001, "loss": 1.8784, "step": 3087 }, { "epoch": 0.354718281546149, "grad_norm": 0.41229376196861267, "learning_rate": 0.0001, "loss": 2.0028, "step": 3088 }, { "epoch": 0.3548331514559761, "grad_norm": 0.37985897064208984, "learning_rate": 0.0001, "loss": 1.522, "step": 3089 }, { "epoch": 0.3549480213658032, "grad_norm": 0.4061013162136078, "learning_rate": 0.0001, "loss": 1.767, "step": 3090 }, { "epoch": 0.35506289127563034, "grad_norm": 0.4098246395587921, "learning_rate": 0.0001, "loss": 1.6884, "step": 3091 }, { "epoch": 0.35517776118545746, "grad_norm": 0.4013693630695343, "learning_rate": 0.0001, "loss": 1.6242, "step": 3092 }, { "epoch": 0.3552926310952846, "grad_norm": 0.3519565463066101, "learning_rate": 0.0001, "loss": 1.6036, "step": 3093 }, { "epoch": 0.3554075010051117, "grad_norm": 0.3677949011325836, "learning_rate": 0.0001, "loss": 1.4798, "step": 3094 }, { "epoch": 0.3555223709149388, "grad_norm": 0.38344481587409973, "learning_rate": 0.0001, "loss": 1.7043, "step": 3095 }, { "epoch": 0.35563724082476594, "grad_norm": 0.3987760543823242, "learning_rate": 0.0001, "loss": 1.5873, "step": 3096 }, { "epoch": 0.35575211073459306, "grad_norm": 0.37182289361953735, "learning_rate": 0.0001, "loss": 1.5454, "step": 3097 }, { "epoch": 0.3558669806444202, "grad_norm": 0.36989521980285645, "learning_rate": 0.0001, "loss": 1.6257, "step": 3098 }, { "epoch": 0.3559818505542473, "grad_norm": 0.38265496492385864, "learning_rate": 0.0001, "loss": 1.6665, "step": 3099 }, { "epoch": 0.3560967204640744, "grad_norm": 0.4006288945674896, "learning_rate": 0.0001, "loss": 1.787, "step": 3100 }, { "epoch": 0.35621159037390154, "grad_norm": 0.38467901945114136, "learning_rate": 0.0001, "loss": 1.639, "step": 3101 }, { "epoch": 0.35632646028372866, "grad_norm": 0.3911599814891815, "learning_rate": 0.0001, "loss": 1.6307, "step": 3102 }, { "epoch": 0.3564413301935558, "grad_norm": 0.36751917004585266, "learning_rate": 0.0001, "loss": 1.5144, "step": 3103 }, { "epoch": 0.3565562001033829, "grad_norm": 0.3881228566169739, "learning_rate": 0.0001, "loss": 1.7045, "step": 3104 }, { "epoch": 0.35667107001321, "grad_norm": 0.36318087577819824, "learning_rate": 0.0001, "loss": 1.4927, "step": 3105 }, { "epoch": 0.35678593992303714, "grad_norm": 0.3672310709953308, "learning_rate": 0.0001, "loss": 1.6125, "step": 3106 }, { "epoch": 0.35690080983286426, "grad_norm": 0.35635697841644287, "learning_rate": 0.0001, "loss": 1.5576, "step": 3107 }, { "epoch": 0.3570156797426914, "grad_norm": 0.38991066813468933, "learning_rate": 0.0001, "loss": 1.6838, "step": 3108 }, { "epoch": 0.3571305496525185, "grad_norm": 0.3975917100906372, "learning_rate": 0.0001, "loss": 1.6502, "step": 3109 }, { "epoch": 0.3572454195623456, "grad_norm": 0.39171603322029114, "learning_rate": 0.0001, "loss": 1.7254, "step": 3110 }, { "epoch": 0.35736028947217274, "grad_norm": 0.39555254578590393, "learning_rate": 0.0001, "loss": 1.5358, "step": 3111 }, { "epoch": 0.35747515938199986, "grad_norm": 0.36104774475097656, "learning_rate": 0.0001, "loss": 1.4724, "step": 3112 }, { "epoch": 0.357590029291827, "grad_norm": 0.38759103417396545, "learning_rate": 0.0001, "loss": 1.6787, "step": 3113 }, { "epoch": 0.3577048992016541, "grad_norm": 0.4347061216831207, "learning_rate": 0.0001, "loss": 1.8388, "step": 3114 }, { "epoch": 0.3578197691114812, "grad_norm": 0.36423662304878235, "learning_rate": 0.0001, "loss": 1.7236, "step": 3115 }, { "epoch": 0.35793463902130834, "grad_norm": 0.4022355377674103, "learning_rate": 0.0001, "loss": 1.5869, "step": 3116 }, { "epoch": 0.35804950893113546, "grad_norm": 0.3823007047176361, "learning_rate": 0.0001, "loss": 1.5984, "step": 3117 }, { "epoch": 0.3581643788409626, "grad_norm": 0.3590918481349945, "learning_rate": 0.0001, "loss": 1.6226, "step": 3118 }, { "epoch": 0.35827924875078976, "grad_norm": 0.3881797790527344, "learning_rate": 0.0001, "loss": 1.7983, "step": 3119 }, { "epoch": 0.3583941186606169, "grad_norm": 0.41121959686279297, "learning_rate": 0.0001, "loss": 1.6413, "step": 3120 }, { "epoch": 0.358508988570444, "grad_norm": 0.37650343775749207, "learning_rate": 0.0001, "loss": 1.553, "step": 3121 }, { "epoch": 0.3586238584802711, "grad_norm": 0.4044298827648163, "learning_rate": 0.0001, "loss": 1.715, "step": 3122 }, { "epoch": 0.35873872839009824, "grad_norm": 0.3791009485721588, "learning_rate": 0.0001, "loss": 1.6326, "step": 3123 }, { "epoch": 0.35885359829992536, "grad_norm": 0.425060898065567, "learning_rate": 0.0001, "loss": 1.8154, "step": 3124 }, { "epoch": 0.3589684682097525, "grad_norm": 0.3991539776325226, "learning_rate": 0.0001, "loss": 1.7491, "step": 3125 }, { "epoch": 0.3590833381195796, "grad_norm": 0.40489476919174194, "learning_rate": 0.0001, "loss": 1.8376, "step": 3126 }, { "epoch": 0.3591982080294067, "grad_norm": 0.40158843994140625, "learning_rate": 0.0001, "loss": 1.818, "step": 3127 }, { "epoch": 0.35931307793923384, "grad_norm": 0.4292317032814026, "learning_rate": 0.0001, "loss": 1.8346, "step": 3128 }, { "epoch": 0.35942794784906096, "grad_norm": 0.4303164780139923, "learning_rate": 0.0001, "loss": 1.9145, "step": 3129 }, { "epoch": 0.3595428177588881, "grad_norm": 0.406921923160553, "learning_rate": 0.0001, "loss": 1.7952, "step": 3130 }, { "epoch": 0.3596576876687152, "grad_norm": 0.39321935176849365, "learning_rate": 0.0001, "loss": 1.8008, "step": 3131 }, { "epoch": 0.3597725575785423, "grad_norm": 0.36769285798072815, "learning_rate": 0.0001, "loss": 1.7165, "step": 3132 }, { "epoch": 0.35988742748836944, "grad_norm": 0.37966418266296387, "learning_rate": 0.0001, "loss": 1.652, "step": 3133 }, { "epoch": 0.36000229739819656, "grad_norm": 0.4449335038661957, "learning_rate": 0.0001, "loss": 1.8674, "step": 3134 }, { "epoch": 0.3601171673080237, "grad_norm": 0.37845826148986816, "learning_rate": 0.0001, "loss": 1.6453, "step": 3135 }, { "epoch": 0.3602320372178508, "grad_norm": 0.43711939454078674, "learning_rate": 0.0001, "loss": 1.6399, "step": 3136 }, { "epoch": 0.3603469071276779, "grad_norm": 0.3823258876800537, "learning_rate": 0.0001, "loss": 1.6534, "step": 3137 }, { "epoch": 0.36046177703750504, "grad_norm": 0.38525834679603577, "learning_rate": 0.0001, "loss": 1.611, "step": 3138 }, { "epoch": 0.36057664694733216, "grad_norm": 0.38904234766960144, "learning_rate": 0.0001, "loss": 1.5454, "step": 3139 }, { "epoch": 0.3606915168571593, "grad_norm": 0.39216116070747375, "learning_rate": 0.0001, "loss": 1.7976, "step": 3140 }, { "epoch": 0.3608063867669864, "grad_norm": 0.3699231743812561, "learning_rate": 0.0001, "loss": 1.5785, "step": 3141 }, { "epoch": 0.3609212566768135, "grad_norm": 0.3960736393928528, "learning_rate": 0.0001, "loss": 1.6743, "step": 3142 }, { "epoch": 0.36103612658664064, "grad_norm": 0.37303784489631653, "learning_rate": 0.0001, "loss": 1.415, "step": 3143 }, { "epoch": 0.36115099649646776, "grad_norm": 0.4160480797290802, "learning_rate": 0.0001, "loss": 1.7466, "step": 3144 }, { "epoch": 0.3612658664062949, "grad_norm": 0.4043230712413788, "learning_rate": 0.0001, "loss": 1.7383, "step": 3145 }, { "epoch": 0.361380736316122, "grad_norm": 0.3847825825214386, "learning_rate": 0.0001, "loss": 1.6484, "step": 3146 }, { "epoch": 0.3614956062259491, "grad_norm": 0.40463775396347046, "learning_rate": 0.0001, "loss": 1.6482, "step": 3147 }, { "epoch": 0.36161047613577624, "grad_norm": 0.3750319480895996, "learning_rate": 0.0001, "loss": 1.6921, "step": 3148 }, { "epoch": 0.36172534604560336, "grad_norm": 0.37009164690971375, "learning_rate": 0.0001, "loss": 1.6415, "step": 3149 }, { "epoch": 0.3618402159554305, "grad_norm": 0.34993571043014526, "learning_rate": 0.0001, "loss": 1.5076, "step": 3150 }, { "epoch": 0.3619550858652576, "grad_norm": 0.3800502419471741, "learning_rate": 0.0001, "loss": 1.6974, "step": 3151 }, { "epoch": 0.3620699557750847, "grad_norm": 0.3825697600841522, "learning_rate": 0.0001, "loss": 1.4917, "step": 3152 }, { "epoch": 0.36218482568491184, "grad_norm": 0.384375661611557, "learning_rate": 0.0001, "loss": 1.5734, "step": 3153 }, { "epoch": 0.36229969559473896, "grad_norm": 0.4005104899406433, "learning_rate": 0.0001, "loss": 1.7287, "step": 3154 }, { "epoch": 0.3624145655045661, "grad_norm": 0.3781236708164215, "learning_rate": 0.0001, "loss": 1.4272, "step": 3155 }, { "epoch": 0.3625294354143932, "grad_norm": 0.37591099739074707, "learning_rate": 0.0001, "loss": 1.5723, "step": 3156 }, { "epoch": 0.3626443053242203, "grad_norm": 0.3829016089439392, "learning_rate": 0.0001, "loss": 1.4127, "step": 3157 }, { "epoch": 0.36275917523404744, "grad_norm": 0.4181736707687378, "learning_rate": 0.0001, "loss": 1.8398, "step": 3158 }, { "epoch": 0.36287404514387456, "grad_norm": 0.3961413502693176, "learning_rate": 0.0001, "loss": 1.651, "step": 3159 }, { "epoch": 0.3629889150537017, "grad_norm": 0.3490610420703888, "learning_rate": 0.0001, "loss": 1.6495, "step": 3160 }, { "epoch": 0.3631037849635288, "grad_norm": 0.45399048924446106, "learning_rate": 0.0001, "loss": 1.7701, "step": 3161 }, { "epoch": 0.3632186548733559, "grad_norm": 0.3794902563095093, "learning_rate": 0.0001, "loss": 1.6005, "step": 3162 }, { "epoch": 0.36333352478318304, "grad_norm": 0.41195976734161377, "learning_rate": 0.0001, "loss": 1.7688, "step": 3163 }, { "epoch": 0.36344839469301016, "grad_norm": 0.4826469123363495, "learning_rate": 0.0001, "loss": 1.5523, "step": 3164 }, { "epoch": 0.3635632646028373, "grad_norm": 0.39594000577926636, "learning_rate": 0.0001, "loss": 1.6323, "step": 3165 }, { "epoch": 0.3636781345126644, "grad_norm": 0.41155776381492615, "learning_rate": 0.0001, "loss": 1.7261, "step": 3166 }, { "epoch": 0.3637930044224915, "grad_norm": 0.413484126329422, "learning_rate": 0.0001, "loss": 1.7269, "step": 3167 }, { "epoch": 0.36390787433231864, "grad_norm": 0.41094526648521423, "learning_rate": 0.0001, "loss": 1.5909, "step": 3168 }, { "epoch": 0.36402274424214576, "grad_norm": 0.4104525148868561, "learning_rate": 0.0001, "loss": 1.7229, "step": 3169 }, { "epoch": 0.3641376141519729, "grad_norm": 0.40125396847724915, "learning_rate": 0.0001, "loss": 1.7518, "step": 3170 }, { "epoch": 0.3642524840618, "grad_norm": 0.37167468667030334, "learning_rate": 0.0001, "loss": 1.6433, "step": 3171 }, { "epoch": 0.3643673539716271, "grad_norm": 0.4034722149372101, "learning_rate": 0.0001, "loss": 1.6484, "step": 3172 }, { "epoch": 0.36448222388145424, "grad_norm": 0.3839128613471985, "learning_rate": 0.0001, "loss": 1.6553, "step": 3173 }, { "epoch": 0.36459709379128136, "grad_norm": 0.36040592193603516, "learning_rate": 0.0001, "loss": 1.5386, "step": 3174 }, { "epoch": 0.3647119637011085, "grad_norm": 0.3928232192993164, "learning_rate": 0.0001, "loss": 1.6535, "step": 3175 }, { "epoch": 0.3648268336109356, "grad_norm": 0.3935447931289673, "learning_rate": 0.0001, "loss": 1.6686, "step": 3176 }, { "epoch": 0.3649417035207627, "grad_norm": 0.37371543049812317, "learning_rate": 0.0001, "loss": 1.5849, "step": 3177 }, { "epoch": 0.36505657343058984, "grad_norm": 0.40319526195526123, "learning_rate": 0.0001, "loss": 1.5654, "step": 3178 }, { "epoch": 0.36517144334041696, "grad_norm": 0.42261865735054016, "learning_rate": 0.0001, "loss": 1.5549, "step": 3179 }, { "epoch": 0.3652863132502441, "grad_norm": 0.4181409180164337, "learning_rate": 0.0001, "loss": 1.6968, "step": 3180 }, { "epoch": 0.3654011831600712, "grad_norm": 0.420463889837265, "learning_rate": 0.0001, "loss": 1.8145, "step": 3181 }, { "epoch": 0.3655160530698983, "grad_norm": 0.3919375538825989, "learning_rate": 0.0001, "loss": 1.5424, "step": 3182 }, { "epoch": 0.36563092297972544, "grad_norm": 0.6190922260284424, "learning_rate": 0.0001, "loss": 1.6232, "step": 3183 }, { "epoch": 0.36574579288955256, "grad_norm": 0.3965461254119873, "learning_rate": 0.0001, "loss": 1.6968, "step": 3184 }, { "epoch": 0.3658606627993797, "grad_norm": 0.3818904757499695, "learning_rate": 0.0001, "loss": 1.7041, "step": 3185 }, { "epoch": 0.3659755327092068, "grad_norm": 0.38142475485801697, "learning_rate": 0.0001, "loss": 1.6398, "step": 3186 }, { "epoch": 0.3660904026190339, "grad_norm": 0.4176258444786072, "learning_rate": 0.0001, "loss": 1.678, "step": 3187 }, { "epoch": 0.3662052725288611, "grad_norm": 0.3882141709327698, "learning_rate": 0.0001, "loss": 1.601, "step": 3188 }, { "epoch": 0.3663201424386882, "grad_norm": 0.3725062608718872, "learning_rate": 0.0001, "loss": 1.5094, "step": 3189 }, { "epoch": 0.36643501234851533, "grad_norm": 0.4642198979854584, "learning_rate": 0.0001, "loss": 2.0719, "step": 3190 }, { "epoch": 0.36654988225834245, "grad_norm": 0.4120834171772003, "learning_rate": 0.0001, "loss": 1.475, "step": 3191 }, { "epoch": 0.36666475216816957, "grad_norm": 0.44245001673698425, "learning_rate": 0.0001, "loss": 1.818, "step": 3192 }, { "epoch": 0.3667796220779967, "grad_norm": 0.3757948875427246, "learning_rate": 0.0001, "loss": 1.6047, "step": 3193 }, { "epoch": 0.3668944919878238, "grad_norm": 0.386857271194458, "learning_rate": 0.0001, "loss": 1.7447, "step": 3194 }, { "epoch": 0.36700936189765093, "grad_norm": 0.37118127942085266, "learning_rate": 0.0001, "loss": 1.7593, "step": 3195 }, { "epoch": 0.36712423180747805, "grad_norm": 0.39029768109321594, "learning_rate": 0.0001, "loss": 1.6951, "step": 3196 }, { "epoch": 0.36723910171730517, "grad_norm": 0.4252242147922516, "learning_rate": 0.0001, "loss": 1.817, "step": 3197 }, { "epoch": 0.3673539716271323, "grad_norm": 0.39361050724983215, "learning_rate": 0.0001, "loss": 1.663, "step": 3198 }, { "epoch": 0.3674688415369594, "grad_norm": 0.4176224172115326, "learning_rate": 0.0001, "loss": 1.8173, "step": 3199 }, { "epoch": 0.36758371144678653, "grad_norm": 0.42337673902511597, "learning_rate": 0.0001, "loss": 1.6874, "step": 3200 }, { "epoch": 0.36769858135661365, "grad_norm": 0.3670978546142578, "learning_rate": 0.0001, "loss": 1.6705, "step": 3201 }, { "epoch": 0.36781345126644077, "grad_norm": 0.3871794044971466, "learning_rate": 0.0001, "loss": 1.7955, "step": 3202 }, { "epoch": 0.3679283211762679, "grad_norm": 0.39479079842567444, "learning_rate": 0.0001, "loss": 1.8078, "step": 3203 }, { "epoch": 0.368043191086095, "grad_norm": 0.405738890171051, "learning_rate": 0.0001, "loss": 1.8309, "step": 3204 }, { "epoch": 0.36815806099592213, "grad_norm": 0.37141597270965576, "learning_rate": 0.0001, "loss": 1.5382, "step": 3205 }, { "epoch": 0.36827293090574925, "grad_norm": 0.4640069007873535, "learning_rate": 0.0001, "loss": 1.5492, "step": 3206 }, { "epoch": 0.36838780081557637, "grad_norm": 0.39117664098739624, "learning_rate": 0.0001, "loss": 1.8055, "step": 3207 }, { "epoch": 0.3685026707254035, "grad_norm": 0.3931315839290619, "learning_rate": 0.0001, "loss": 1.6636, "step": 3208 }, { "epoch": 0.3686175406352306, "grad_norm": 0.406044065952301, "learning_rate": 0.0001, "loss": 1.7652, "step": 3209 }, { "epoch": 0.36873241054505773, "grad_norm": 0.37992948293685913, "learning_rate": 0.0001, "loss": 1.5028, "step": 3210 }, { "epoch": 0.36884728045488485, "grad_norm": 0.4231666326522827, "learning_rate": 0.0001, "loss": 1.7238, "step": 3211 }, { "epoch": 0.36896215036471197, "grad_norm": 0.4150424599647522, "learning_rate": 0.0001, "loss": 1.8108, "step": 3212 }, { "epoch": 0.3690770202745391, "grad_norm": 0.3594525158405304, "learning_rate": 0.0001, "loss": 1.4806, "step": 3213 }, { "epoch": 0.3691918901843662, "grad_norm": 0.381798654794693, "learning_rate": 0.0001, "loss": 1.6687, "step": 3214 }, { "epoch": 0.36930676009419333, "grad_norm": 0.38499388098716736, "learning_rate": 0.0001, "loss": 1.7469, "step": 3215 }, { "epoch": 0.36942163000402045, "grad_norm": 0.4006686210632324, "learning_rate": 0.0001, "loss": 1.7326, "step": 3216 }, { "epoch": 0.36953649991384757, "grad_norm": 0.396440327167511, "learning_rate": 0.0001, "loss": 1.6461, "step": 3217 }, { "epoch": 0.3696513698236747, "grad_norm": 0.3961047828197479, "learning_rate": 0.0001, "loss": 1.762, "step": 3218 }, { "epoch": 0.3697662397335018, "grad_norm": 0.4057855010032654, "learning_rate": 0.0001, "loss": 1.5334, "step": 3219 }, { "epoch": 0.36988110964332893, "grad_norm": 0.4407478868961334, "learning_rate": 0.0001, "loss": 1.6928, "step": 3220 }, { "epoch": 0.36999597955315605, "grad_norm": 0.37952542304992676, "learning_rate": 0.0001, "loss": 1.7687, "step": 3221 }, { "epoch": 0.37011084946298317, "grad_norm": 0.3981403708457947, "learning_rate": 0.0001, "loss": 1.6591, "step": 3222 }, { "epoch": 0.3702257193728103, "grad_norm": 0.3874553143978119, "learning_rate": 0.0001, "loss": 1.6144, "step": 3223 }, { "epoch": 0.3703405892826374, "grad_norm": 0.40902000665664673, "learning_rate": 0.0001, "loss": 1.6587, "step": 3224 }, { "epoch": 0.37045545919246453, "grad_norm": 0.42551088333129883, "learning_rate": 0.0001, "loss": 1.7355, "step": 3225 }, { "epoch": 0.37057032910229165, "grad_norm": 0.3739902079105377, "learning_rate": 0.0001, "loss": 1.4641, "step": 3226 }, { "epoch": 0.37068519901211877, "grad_norm": 0.41387036442756653, "learning_rate": 0.0001, "loss": 1.7716, "step": 3227 }, { "epoch": 0.3708000689219459, "grad_norm": 0.393655002117157, "learning_rate": 0.0001, "loss": 1.5306, "step": 3228 }, { "epoch": 0.370914938831773, "grad_norm": 0.39244723320007324, "learning_rate": 0.0001, "loss": 1.7408, "step": 3229 }, { "epoch": 0.37102980874160013, "grad_norm": 0.3959055542945862, "learning_rate": 0.0001, "loss": 1.8051, "step": 3230 }, { "epoch": 0.37114467865142725, "grad_norm": 0.39641687273979187, "learning_rate": 0.0001, "loss": 1.5969, "step": 3231 }, { "epoch": 0.37125954856125437, "grad_norm": 0.37749791145324707, "learning_rate": 0.0001, "loss": 1.685, "step": 3232 }, { "epoch": 0.3713744184710815, "grad_norm": 0.40520602464675903, "learning_rate": 0.0001, "loss": 1.6052, "step": 3233 }, { "epoch": 0.3714892883809086, "grad_norm": 0.37299278378486633, "learning_rate": 0.0001, "loss": 1.6383, "step": 3234 }, { "epoch": 0.37160415829073573, "grad_norm": 0.41653332114219666, "learning_rate": 0.0001, "loss": 1.5829, "step": 3235 }, { "epoch": 0.37171902820056285, "grad_norm": 0.4918358623981476, "learning_rate": 0.0001, "loss": 1.894, "step": 3236 }, { "epoch": 0.37183389811038997, "grad_norm": 0.40961939096450806, "learning_rate": 0.0001, "loss": 1.6471, "step": 3237 }, { "epoch": 0.3719487680202171, "grad_norm": 0.39822736382484436, "learning_rate": 0.0001, "loss": 1.6876, "step": 3238 }, { "epoch": 0.3720636379300442, "grad_norm": 0.42709147930145264, "learning_rate": 0.0001, "loss": 1.6064, "step": 3239 }, { "epoch": 0.37217850783987133, "grad_norm": 0.3828563690185547, "learning_rate": 0.0001, "loss": 1.5153, "step": 3240 }, { "epoch": 0.37229337774969845, "grad_norm": 0.3979793190956116, "learning_rate": 0.0001, "loss": 1.7632, "step": 3241 }, { "epoch": 0.37240824765952557, "grad_norm": 0.4186950623989105, "learning_rate": 0.0001, "loss": 1.6498, "step": 3242 }, { "epoch": 0.3725231175693527, "grad_norm": 0.3847867548465729, "learning_rate": 0.0001, "loss": 1.6185, "step": 3243 }, { "epoch": 0.3726379874791798, "grad_norm": 0.4237745404243469, "learning_rate": 0.0001, "loss": 1.6776, "step": 3244 }, { "epoch": 0.37275285738900693, "grad_norm": 0.41460272669792175, "learning_rate": 0.0001, "loss": 1.8457, "step": 3245 }, { "epoch": 0.37286772729883405, "grad_norm": 0.37694281339645386, "learning_rate": 0.0001, "loss": 1.5774, "step": 3246 }, { "epoch": 0.37298259720866117, "grad_norm": 0.3860875368118286, "learning_rate": 0.0001, "loss": 1.7283, "step": 3247 }, { "epoch": 0.3730974671184883, "grad_norm": 0.4228290319442749, "learning_rate": 0.0001, "loss": 1.9065, "step": 3248 }, { "epoch": 0.3732123370283154, "grad_norm": 0.4396858811378479, "learning_rate": 0.0001, "loss": 1.8495, "step": 3249 }, { "epoch": 0.37332720693814253, "grad_norm": 0.44928887486457825, "learning_rate": 0.0001, "loss": 1.9191, "step": 3250 }, { "epoch": 0.37344207684796965, "grad_norm": 0.4096646010875702, "learning_rate": 0.0001, "loss": 1.7892, "step": 3251 }, { "epoch": 0.37355694675779677, "grad_norm": 0.36828523874282837, "learning_rate": 0.0001, "loss": 1.396, "step": 3252 }, { "epoch": 0.3736718166676239, "grad_norm": 0.3781738877296448, "learning_rate": 0.0001, "loss": 1.6157, "step": 3253 }, { "epoch": 0.373786686577451, "grad_norm": 0.3941371738910675, "learning_rate": 0.0001, "loss": 1.7005, "step": 3254 }, { "epoch": 0.37390155648727813, "grad_norm": 0.3932330310344696, "learning_rate": 0.0001, "loss": 1.605, "step": 3255 }, { "epoch": 0.3740164263971053, "grad_norm": 0.40510842204093933, "learning_rate": 0.0001, "loss": 1.7448, "step": 3256 }, { "epoch": 0.3741312963069324, "grad_norm": 0.40375015139579773, "learning_rate": 0.0001, "loss": 1.7276, "step": 3257 }, { "epoch": 0.37424616621675955, "grad_norm": 0.4124782085418701, "learning_rate": 0.0001, "loss": 1.7071, "step": 3258 }, { "epoch": 0.37436103612658667, "grad_norm": 0.3871046006679535, "learning_rate": 0.0001, "loss": 1.7161, "step": 3259 }, { "epoch": 0.3744759060364138, "grad_norm": 0.40944960713386536, "learning_rate": 0.0001, "loss": 1.5999, "step": 3260 }, { "epoch": 0.3745907759462409, "grad_norm": 0.39792561531066895, "learning_rate": 0.0001, "loss": 1.6789, "step": 3261 }, { "epoch": 0.374705645856068, "grad_norm": 0.38910964131355286, "learning_rate": 0.0001, "loss": 1.6719, "step": 3262 }, { "epoch": 0.37482051576589515, "grad_norm": 0.37065139412879944, "learning_rate": 0.0001, "loss": 1.5161, "step": 3263 }, { "epoch": 0.37493538567572227, "grad_norm": 0.3843998908996582, "learning_rate": 0.0001, "loss": 1.7721, "step": 3264 }, { "epoch": 0.3750502555855494, "grad_norm": 0.4312693178653717, "learning_rate": 0.0001, "loss": 1.9565, "step": 3265 }, { "epoch": 0.3751651254953765, "grad_norm": 0.3939458727836609, "learning_rate": 0.0001, "loss": 1.7894, "step": 3266 }, { "epoch": 0.3752799954052036, "grad_norm": 0.3727447986602783, "learning_rate": 0.0001, "loss": 1.5771, "step": 3267 }, { "epoch": 0.37539486531503075, "grad_norm": 0.3730446994304657, "learning_rate": 0.0001, "loss": 1.5062, "step": 3268 }, { "epoch": 0.37550973522485787, "grad_norm": 0.4053596258163452, "learning_rate": 0.0001, "loss": 1.7005, "step": 3269 }, { "epoch": 0.375624605134685, "grad_norm": 0.4294143617153168, "learning_rate": 0.0001, "loss": 1.5863, "step": 3270 }, { "epoch": 0.3757394750445121, "grad_norm": 0.3998027443885803, "learning_rate": 0.0001, "loss": 1.6598, "step": 3271 }, { "epoch": 0.3758543449543392, "grad_norm": 0.39624300599098206, "learning_rate": 0.0001, "loss": 1.6187, "step": 3272 }, { "epoch": 0.37596921486416635, "grad_norm": 0.38765525817871094, "learning_rate": 0.0001, "loss": 1.7249, "step": 3273 }, { "epoch": 0.37608408477399347, "grad_norm": 0.38132765889167786, "learning_rate": 0.0001, "loss": 1.6718, "step": 3274 }, { "epoch": 0.3761989546838206, "grad_norm": 0.386642187833786, "learning_rate": 0.0001, "loss": 1.6102, "step": 3275 }, { "epoch": 0.3763138245936477, "grad_norm": 0.3864053189754486, "learning_rate": 0.0001, "loss": 1.5038, "step": 3276 }, { "epoch": 0.3764286945034748, "grad_norm": 0.40680447220802307, "learning_rate": 0.0001, "loss": 1.6453, "step": 3277 }, { "epoch": 0.37654356441330195, "grad_norm": 0.38861724734306335, "learning_rate": 0.0001, "loss": 1.6394, "step": 3278 }, { "epoch": 0.37665843432312907, "grad_norm": 0.3894346058368683, "learning_rate": 0.0001, "loss": 1.7633, "step": 3279 }, { "epoch": 0.3767733042329562, "grad_norm": 0.4006745517253876, "learning_rate": 0.0001, "loss": 1.6574, "step": 3280 }, { "epoch": 0.3768881741427833, "grad_norm": 0.4053511917591095, "learning_rate": 0.0001, "loss": 1.5958, "step": 3281 }, { "epoch": 0.3770030440526104, "grad_norm": 0.44916823506355286, "learning_rate": 0.0001, "loss": 1.5274, "step": 3282 }, { "epoch": 0.37711791396243755, "grad_norm": 0.4060499668121338, "learning_rate": 0.0001, "loss": 1.6009, "step": 3283 }, { "epoch": 0.37723278387226467, "grad_norm": 0.43672630190849304, "learning_rate": 0.0001, "loss": 1.7468, "step": 3284 }, { "epoch": 0.3773476537820918, "grad_norm": 0.4144275486469269, "learning_rate": 0.0001, "loss": 1.7347, "step": 3285 }, { "epoch": 0.3774625236919189, "grad_norm": 0.3954853415489197, "learning_rate": 0.0001, "loss": 1.7747, "step": 3286 }, { "epoch": 0.377577393601746, "grad_norm": 0.4159530699253082, "learning_rate": 0.0001, "loss": 1.868, "step": 3287 }, { "epoch": 0.37769226351157315, "grad_norm": 0.4597531855106354, "learning_rate": 0.0001, "loss": 1.6728, "step": 3288 }, { "epoch": 0.37780713342140027, "grad_norm": 0.3839264214038849, "learning_rate": 0.0001, "loss": 1.4713, "step": 3289 }, { "epoch": 0.3779220033312274, "grad_norm": 0.40157079696655273, "learning_rate": 0.0001, "loss": 1.7359, "step": 3290 }, { "epoch": 0.3780368732410545, "grad_norm": 0.38555535674095154, "learning_rate": 0.0001, "loss": 1.5765, "step": 3291 }, { "epoch": 0.3781517431508816, "grad_norm": 0.41350314021110535, "learning_rate": 0.0001, "loss": 1.6308, "step": 3292 }, { "epoch": 0.37826661306070875, "grad_norm": 0.4091131389141083, "learning_rate": 0.0001, "loss": 1.7817, "step": 3293 }, { "epoch": 0.37838148297053587, "grad_norm": 0.39197850227355957, "learning_rate": 0.0001, "loss": 1.7373, "step": 3294 }, { "epoch": 0.378496352880363, "grad_norm": 0.3582981824874878, "learning_rate": 0.0001, "loss": 1.5578, "step": 3295 }, { "epoch": 0.3786112227901901, "grad_norm": 0.4097733497619629, "learning_rate": 0.0001, "loss": 1.815, "step": 3296 }, { "epoch": 0.3787260927000172, "grad_norm": 0.405089408159256, "learning_rate": 0.0001, "loss": 1.7199, "step": 3297 }, { "epoch": 0.37884096260984435, "grad_norm": 0.3839695155620575, "learning_rate": 0.0001, "loss": 1.7156, "step": 3298 }, { "epoch": 0.37895583251967146, "grad_norm": 0.4171733260154724, "learning_rate": 0.0001, "loss": 1.4943, "step": 3299 }, { "epoch": 0.3790707024294986, "grad_norm": 0.39771202206611633, "learning_rate": 0.0001, "loss": 1.4806, "step": 3300 }, { "epoch": 0.3791855723393257, "grad_norm": 0.4176054894924164, "learning_rate": 0.0001, "loss": 1.6608, "step": 3301 }, { "epoch": 0.3793004422491528, "grad_norm": 0.3709241449832916, "learning_rate": 0.0001, "loss": 1.4327, "step": 3302 }, { "epoch": 0.37941531215897994, "grad_norm": 0.4202117919921875, "learning_rate": 0.0001, "loss": 1.7664, "step": 3303 }, { "epoch": 0.37953018206880706, "grad_norm": 0.4221265912055969, "learning_rate": 0.0001, "loss": 1.8107, "step": 3304 }, { "epoch": 0.3796450519786342, "grad_norm": 0.3954116404056549, "learning_rate": 0.0001, "loss": 1.5563, "step": 3305 }, { "epoch": 0.3797599218884613, "grad_norm": 0.3891046643257141, "learning_rate": 0.0001, "loss": 1.4281, "step": 3306 }, { "epoch": 0.3798747917982884, "grad_norm": 0.4024218022823334, "learning_rate": 0.0001, "loss": 1.7119, "step": 3307 }, { "epoch": 0.37998966170811554, "grad_norm": 0.45850181579589844, "learning_rate": 0.0001, "loss": 1.7876, "step": 3308 }, { "epoch": 0.38010453161794266, "grad_norm": 0.4152509868144989, "learning_rate": 0.0001, "loss": 1.847, "step": 3309 }, { "epoch": 0.3802194015277698, "grad_norm": 0.4059930443763733, "learning_rate": 0.0001, "loss": 1.6807, "step": 3310 }, { "epoch": 0.3803342714375969, "grad_norm": 0.4039838910102844, "learning_rate": 0.0001, "loss": 1.7222, "step": 3311 }, { "epoch": 0.380449141347424, "grad_norm": 0.40002793073654175, "learning_rate": 0.0001, "loss": 1.7285, "step": 3312 }, { "epoch": 0.38056401125725114, "grad_norm": 0.3877595067024231, "learning_rate": 0.0001, "loss": 1.6028, "step": 3313 }, { "epoch": 0.38067888116707826, "grad_norm": 0.41167452931404114, "learning_rate": 0.0001, "loss": 1.7559, "step": 3314 }, { "epoch": 0.3807937510769054, "grad_norm": 0.3951496481895447, "learning_rate": 0.0001, "loss": 1.6941, "step": 3315 }, { "epoch": 0.3809086209867325, "grad_norm": 0.4057624936103821, "learning_rate": 0.0001, "loss": 1.7384, "step": 3316 }, { "epoch": 0.3810234908965596, "grad_norm": 0.41710686683654785, "learning_rate": 0.0001, "loss": 1.6361, "step": 3317 }, { "epoch": 0.38113836080638674, "grad_norm": 0.38492968678474426, "learning_rate": 0.0001, "loss": 1.5527, "step": 3318 }, { "epoch": 0.38125323071621386, "grad_norm": 0.3713145852088928, "learning_rate": 0.0001, "loss": 1.4979, "step": 3319 }, { "epoch": 0.381368100626041, "grad_norm": 0.3968343734741211, "learning_rate": 0.0001, "loss": 1.4566, "step": 3320 }, { "epoch": 0.3814829705358681, "grad_norm": 0.3992241621017456, "learning_rate": 0.0001, "loss": 1.6667, "step": 3321 }, { "epoch": 0.3815978404456952, "grad_norm": 0.3984096050262451, "learning_rate": 0.0001, "loss": 1.6411, "step": 3322 }, { "epoch": 0.38171271035552234, "grad_norm": 0.4242939352989197, "learning_rate": 0.0001, "loss": 1.8618, "step": 3323 }, { "epoch": 0.38182758026534946, "grad_norm": 0.4698657691478729, "learning_rate": 0.0001, "loss": 1.7804, "step": 3324 }, { "epoch": 0.38194245017517664, "grad_norm": 0.3952822685241699, "learning_rate": 0.0001, "loss": 1.762, "step": 3325 }, { "epoch": 0.38205732008500376, "grad_norm": 0.3968496322631836, "learning_rate": 0.0001, "loss": 1.7494, "step": 3326 }, { "epoch": 0.3821721899948309, "grad_norm": 0.35102754831314087, "learning_rate": 0.0001, "loss": 1.5076, "step": 3327 }, { "epoch": 0.382287059904658, "grad_norm": 0.3999139070510864, "learning_rate": 0.0001, "loss": 1.422, "step": 3328 }, { "epoch": 0.3824019298144851, "grad_norm": 0.3803260922431946, "learning_rate": 0.0001, "loss": 1.5255, "step": 3329 }, { "epoch": 0.38251679972431224, "grad_norm": 0.3881869614124298, "learning_rate": 0.0001, "loss": 1.6423, "step": 3330 }, { "epoch": 0.38263166963413936, "grad_norm": 0.3832140266895294, "learning_rate": 0.0001, "loss": 1.5412, "step": 3331 }, { "epoch": 0.3827465395439665, "grad_norm": 0.4469684064388275, "learning_rate": 0.0001, "loss": 1.7267, "step": 3332 }, { "epoch": 0.3828614094537936, "grad_norm": 0.3962031602859497, "learning_rate": 0.0001, "loss": 1.6285, "step": 3333 }, { "epoch": 0.3829762793636207, "grad_norm": 0.408916175365448, "learning_rate": 0.0001, "loss": 1.6637, "step": 3334 }, { "epoch": 0.38309114927344784, "grad_norm": 0.4158702790737152, "learning_rate": 0.0001, "loss": 1.6769, "step": 3335 }, { "epoch": 0.38320601918327496, "grad_norm": 0.3798253536224365, "learning_rate": 0.0001, "loss": 1.5571, "step": 3336 }, { "epoch": 0.3833208890931021, "grad_norm": 0.39568817615509033, "learning_rate": 0.0001, "loss": 1.6156, "step": 3337 }, { "epoch": 0.3834357590029292, "grad_norm": 0.38767796754837036, "learning_rate": 0.0001, "loss": 1.6802, "step": 3338 }, { "epoch": 0.3835506289127563, "grad_norm": 0.3866712749004364, "learning_rate": 0.0001, "loss": 1.7018, "step": 3339 }, { "epoch": 0.38366549882258344, "grad_norm": 0.41397809982299805, "learning_rate": 0.0001, "loss": 1.8039, "step": 3340 }, { "epoch": 0.38378036873241056, "grad_norm": 0.39398789405822754, "learning_rate": 0.0001, "loss": 1.6419, "step": 3341 }, { "epoch": 0.3838952386422377, "grad_norm": 0.4054300785064697, "learning_rate": 0.0001, "loss": 1.6549, "step": 3342 }, { "epoch": 0.3840101085520648, "grad_norm": 0.4255220890045166, "learning_rate": 0.0001, "loss": 1.9429, "step": 3343 }, { "epoch": 0.3841249784618919, "grad_norm": 0.4655556082725525, "learning_rate": 0.0001, "loss": 1.7391, "step": 3344 }, { "epoch": 0.38423984837171904, "grad_norm": 0.43720874190330505, "learning_rate": 0.0001, "loss": 1.7068, "step": 3345 }, { "epoch": 0.38435471828154616, "grad_norm": 0.443059504032135, "learning_rate": 0.0001, "loss": 1.6133, "step": 3346 }, { "epoch": 0.3844695881913733, "grad_norm": 0.4180503189563751, "learning_rate": 0.0001, "loss": 1.7699, "step": 3347 }, { "epoch": 0.3845844581012004, "grad_norm": 0.3981085419654846, "learning_rate": 0.0001, "loss": 1.6587, "step": 3348 }, { "epoch": 0.3846993280110275, "grad_norm": 0.38549384474754333, "learning_rate": 0.0001, "loss": 1.5092, "step": 3349 }, { "epoch": 0.38481419792085464, "grad_norm": 0.4354805052280426, "learning_rate": 0.0001, "loss": 1.7426, "step": 3350 }, { "epoch": 0.38492906783068176, "grad_norm": 0.39129069447517395, "learning_rate": 0.0001, "loss": 1.6652, "step": 3351 }, { "epoch": 0.3850439377405089, "grad_norm": 0.40617823600769043, "learning_rate": 0.0001, "loss": 1.7721, "step": 3352 }, { "epoch": 0.385158807650336, "grad_norm": 0.3931029438972473, "learning_rate": 0.0001, "loss": 1.4904, "step": 3353 }, { "epoch": 0.3852736775601631, "grad_norm": 0.37746840715408325, "learning_rate": 0.0001, "loss": 1.6286, "step": 3354 }, { "epoch": 0.38538854746999024, "grad_norm": 0.4153432250022888, "learning_rate": 0.0001, "loss": 1.8866, "step": 3355 }, { "epoch": 0.38550341737981736, "grad_norm": 0.3631424605846405, "learning_rate": 0.0001, "loss": 1.6112, "step": 3356 }, { "epoch": 0.3856182872896445, "grad_norm": 0.42953309416770935, "learning_rate": 0.0001, "loss": 1.9191, "step": 3357 }, { "epoch": 0.3857331571994716, "grad_norm": 0.4635213613510132, "learning_rate": 0.0001, "loss": 1.9103, "step": 3358 }, { "epoch": 0.3858480271092987, "grad_norm": 0.40578657388687134, "learning_rate": 0.0001, "loss": 1.7202, "step": 3359 }, { "epoch": 0.38596289701912584, "grad_norm": 0.3969693183898926, "learning_rate": 0.0001, "loss": 1.7364, "step": 3360 }, { "epoch": 0.38607776692895296, "grad_norm": 0.39727887511253357, "learning_rate": 0.0001, "loss": 1.6442, "step": 3361 }, { "epoch": 0.3861926368387801, "grad_norm": 0.3983653485774994, "learning_rate": 0.0001, "loss": 1.5409, "step": 3362 }, { "epoch": 0.3863075067486072, "grad_norm": 0.3816327452659607, "learning_rate": 0.0001, "loss": 1.7286, "step": 3363 }, { "epoch": 0.3864223766584343, "grad_norm": 0.38533225655555725, "learning_rate": 0.0001, "loss": 1.6898, "step": 3364 }, { "epoch": 0.38653724656826144, "grad_norm": 0.40804487466812134, "learning_rate": 0.0001, "loss": 1.7621, "step": 3365 }, { "epoch": 0.38665211647808856, "grad_norm": 0.3823128044605255, "learning_rate": 0.0001, "loss": 1.6575, "step": 3366 }, { "epoch": 0.3867669863879157, "grad_norm": 0.39397284388542175, "learning_rate": 0.0001, "loss": 1.4919, "step": 3367 }, { "epoch": 0.3868818562977428, "grad_norm": 0.38799893856048584, "learning_rate": 0.0001, "loss": 1.5236, "step": 3368 }, { "epoch": 0.3869967262075699, "grad_norm": 0.3756542205810547, "learning_rate": 0.0001, "loss": 1.5963, "step": 3369 }, { "epoch": 0.38711159611739704, "grad_norm": 0.40256842970848083, "learning_rate": 0.0001, "loss": 1.606, "step": 3370 }, { "epoch": 0.38722646602722416, "grad_norm": 0.43613606691360474, "learning_rate": 0.0001, "loss": 1.8034, "step": 3371 }, { "epoch": 0.3873413359370513, "grad_norm": 0.38475316762924194, "learning_rate": 0.0001, "loss": 1.6643, "step": 3372 }, { "epoch": 0.3874562058468784, "grad_norm": 0.40157008171081543, "learning_rate": 0.0001, "loss": 1.6925, "step": 3373 }, { "epoch": 0.3875710757567055, "grad_norm": 0.39951568841934204, "learning_rate": 0.0001, "loss": 1.5993, "step": 3374 }, { "epoch": 0.38768594566653264, "grad_norm": 0.42573562264442444, "learning_rate": 0.0001, "loss": 1.7148, "step": 3375 }, { "epoch": 0.38780081557635976, "grad_norm": 0.3692642152309418, "learning_rate": 0.0001, "loss": 1.6399, "step": 3376 }, { "epoch": 0.3879156854861869, "grad_norm": 0.3984464108943939, "learning_rate": 0.0001, "loss": 1.8419, "step": 3377 }, { "epoch": 0.388030555396014, "grad_norm": 0.40837594866752625, "learning_rate": 0.0001, "loss": 1.8327, "step": 3378 }, { "epoch": 0.3881454253058411, "grad_norm": 0.4153634011745453, "learning_rate": 0.0001, "loss": 1.6514, "step": 3379 }, { "epoch": 0.38826029521566824, "grad_norm": 0.40997132658958435, "learning_rate": 0.0001, "loss": 1.7067, "step": 3380 }, { "epoch": 0.38837516512549536, "grad_norm": 0.43204736709594727, "learning_rate": 0.0001, "loss": 1.7734, "step": 3381 }, { "epoch": 0.3884900350353225, "grad_norm": 0.4231989085674286, "learning_rate": 0.0001, "loss": 1.7414, "step": 3382 }, { "epoch": 0.3886049049451496, "grad_norm": 0.3816749155521393, "learning_rate": 0.0001, "loss": 1.6473, "step": 3383 }, { "epoch": 0.3887197748549767, "grad_norm": 0.4512353539466858, "learning_rate": 0.0001, "loss": 1.9125, "step": 3384 }, { "epoch": 0.38883464476480384, "grad_norm": 0.3650393486022949, "learning_rate": 0.0001, "loss": 1.6222, "step": 3385 }, { "epoch": 0.38894951467463096, "grad_norm": 0.3751547038555145, "learning_rate": 0.0001, "loss": 1.7122, "step": 3386 }, { "epoch": 0.3890643845844581, "grad_norm": 0.36982572078704834, "learning_rate": 0.0001, "loss": 1.6857, "step": 3387 }, { "epoch": 0.3891792544942852, "grad_norm": 0.43625834584236145, "learning_rate": 0.0001, "loss": 1.591, "step": 3388 }, { "epoch": 0.3892941244041123, "grad_norm": 0.40629592537879944, "learning_rate": 0.0001, "loss": 1.6299, "step": 3389 }, { "epoch": 0.38940899431393944, "grad_norm": 0.41246914863586426, "learning_rate": 0.0001, "loss": 1.8241, "step": 3390 }, { "epoch": 0.38952386422376656, "grad_norm": 0.40135088562965393, "learning_rate": 0.0001, "loss": 1.5663, "step": 3391 }, { "epoch": 0.3896387341335937, "grad_norm": 0.4104592502117157, "learning_rate": 0.0001, "loss": 1.8265, "step": 3392 }, { "epoch": 0.3897536040434208, "grad_norm": 0.35968706011772156, "learning_rate": 0.0001, "loss": 1.5164, "step": 3393 }, { "epoch": 0.389868473953248, "grad_norm": 0.4108087420463562, "learning_rate": 0.0001, "loss": 1.6904, "step": 3394 }, { "epoch": 0.3899833438630751, "grad_norm": 0.39649486541748047, "learning_rate": 0.0001, "loss": 1.6845, "step": 3395 }, { "epoch": 0.3900982137729022, "grad_norm": 0.448559045791626, "learning_rate": 0.0001, "loss": 1.8202, "step": 3396 }, { "epoch": 0.39021308368272933, "grad_norm": 0.40180444717407227, "learning_rate": 0.0001, "loss": 1.7422, "step": 3397 }, { "epoch": 0.39032795359255645, "grad_norm": 0.4011480510234833, "learning_rate": 0.0001, "loss": 1.5017, "step": 3398 }, { "epoch": 0.3904428235023836, "grad_norm": 0.41441354155540466, "learning_rate": 0.0001, "loss": 1.5995, "step": 3399 }, { "epoch": 0.3905576934122107, "grad_norm": 0.41887158155441284, "learning_rate": 0.0001, "loss": 1.7279, "step": 3400 }, { "epoch": 0.3906725633220378, "grad_norm": 0.4123765826225281, "learning_rate": 0.0001, "loss": 1.7621, "step": 3401 }, { "epoch": 0.39078743323186493, "grad_norm": 0.3798231780529022, "learning_rate": 0.0001, "loss": 1.4537, "step": 3402 }, { "epoch": 0.39090230314169205, "grad_norm": 0.40583983063697815, "learning_rate": 0.0001, "loss": 1.609, "step": 3403 }, { "epoch": 0.3910171730515192, "grad_norm": 0.41971534490585327, "learning_rate": 0.0001, "loss": 1.6041, "step": 3404 }, { "epoch": 0.3911320429613463, "grad_norm": 0.4053299129009247, "learning_rate": 0.0001, "loss": 1.7071, "step": 3405 }, { "epoch": 0.3912469128711734, "grad_norm": 0.4222469925880432, "learning_rate": 0.0001, "loss": 1.9102, "step": 3406 }, { "epoch": 0.39136178278100053, "grad_norm": 0.3722541630268097, "learning_rate": 0.0001, "loss": 1.6368, "step": 3407 }, { "epoch": 0.39147665269082765, "grad_norm": 0.3922461271286011, "learning_rate": 0.0001, "loss": 1.5646, "step": 3408 }, { "epoch": 0.3915915226006548, "grad_norm": 0.43620502948760986, "learning_rate": 0.0001, "loss": 1.9101, "step": 3409 }, { "epoch": 0.3917063925104819, "grad_norm": 0.38486000895500183, "learning_rate": 0.0001, "loss": 1.625, "step": 3410 }, { "epoch": 0.391821262420309, "grad_norm": 0.417163223028183, "learning_rate": 0.0001, "loss": 1.4222, "step": 3411 }, { "epoch": 0.39193613233013613, "grad_norm": 0.37510520219802856, "learning_rate": 0.0001, "loss": 1.2839, "step": 3412 }, { "epoch": 0.39205100223996325, "grad_norm": 0.4312410056591034, "learning_rate": 0.0001, "loss": 1.7017, "step": 3413 }, { "epoch": 0.3921658721497904, "grad_norm": 0.4008169174194336, "learning_rate": 0.0001, "loss": 1.6157, "step": 3414 }, { "epoch": 0.3922807420596175, "grad_norm": 0.3868180811405182, "learning_rate": 0.0001, "loss": 1.6447, "step": 3415 }, { "epoch": 0.3923956119694446, "grad_norm": 0.38877058029174805, "learning_rate": 0.0001, "loss": 1.8274, "step": 3416 }, { "epoch": 0.39251048187927173, "grad_norm": 0.39784759283065796, "learning_rate": 0.0001, "loss": 1.7979, "step": 3417 }, { "epoch": 0.39262535178909885, "grad_norm": 0.3924922049045563, "learning_rate": 0.0001, "loss": 1.713, "step": 3418 }, { "epoch": 0.392740221698926, "grad_norm": 0.40661346912384033, "learning_rate": 0.0001, "loss": 1.723, "step": 3419 }, { "epoch": 0.3928550916087531, "grad_norm": 0.4197205901145935, "learning_rate": 0.0001, "loss": 1.7735, "step": 3420 }, { "epoch": 0.3929699615185802, "grad_norm": 0.3987165689468384, "learning_rate": 0.0001, "loss": 1.8069, "step": 3421 }, { "epoch": 0.39308483142840733, "grad_norm": 0.38276979327201843, "learning_rate": 0.0001, "loss": 1.6656, "step": 3422 }, { "epoch": 0.39319970133823445, "grad_norm": 0.3716558814048767, "learning_rate": 0.0001, "loss": 1.5835, "step": 3423 }, { "epoch": 0.3933145712480616, "grad_norm": 0.3954230844974518, "learning_rate": 0.0001, "loss": 1.7698, "step": 3424 }, { "epoch": 0.3934294411578887, "grad_norm": 0.39034202694892883, "learning_rate": 0.0001, "loss": 1.783, "step": 3425 }, { "epoch": 0.3935443110677158, "grad_norm": 0.36752232909202576, "learning_rate": 0.0001, "loss": 1.6384, "step": 3426 }, { "epoch": 0.39365918097754293, "grad_norm": 0.3931824564933777, "learning_rate": 0.0001, "loss": 1.7198, "step": 3427 }, { "epoch": 0.39377405088737005, "grad_norm": 0.3895040452480316, "learning_rate": 0.0001, "loss": 1.717, "step": 3428 }, { "epoch": 0.3938889207971972, "grad_norm": 0.4126931428909302, "learning_rate": 0.0001, "loss": 1.5665, "step": 3429 }, { "epoch": 0.3940037907070243, "grad_norm": 0.39863380789756775, "learning_rate": 0.0001, "loss": 1.8072, "step": 3430 }, { "epoch": 0.3941186606168514, "grad_norm": 0.409013032913208, "learning_rate": 0.0001, "loss": 1.5039, "step": 3431 }, { "epoch": 0.39423353052667853, "grad_norm": 0.39997661113739014, "learning_rate": 0.0001, "loss": 1.4622, "step": 3432 }, { "epoch": 0.39434840043650565, "grad_norm": 0.38587263226509094, "learning_rate": 0.0001, "loss": 1.7621, "step": 3433 }, { "epoch": 0.3944632703463328, "grad_norm": 0.41442370414733887, "learning_rate": 0.0001, "loss": 1.5584, "step": 3434 }, { "epoch": 0.3945781402561599, "grad_norm": 0.387526273727417, "learning_rate": 0.0001, "loss": 1.6732, "step": 3435 }, { "epoch": 0.394693010165987, "grad_norm": 0.4018903970718384, "learning_rate": 0.0001, "loss": 1.7994, "step": 3436 }, { "epoch": 0.39480788007581413, "grad_norm": 0.38275036215782166, "learning_rate": 0.0001, "loss": 1.6699, "step": 3437 }, { "epoch": 0.39492274998564125, "grad_norm": 0.392865926027298, "learning_rate": 0.0001, "loss": 1.699, "step": 3438 }, { "epoch": 0.3950376198954684, "grad_norm": 0.3883517384529114, "learning_rate": 0.0001, "loss": 1.7361, "step": 3439 }, { "epoch": 0.3951524898052955, "grad_norm": 0.43552684783935547, "learning_rate": 0.0001, "loss": 1.835, "step": 3440 }, { "epoch": 0.3952673597151226, "grad_norm": 0.38498955965042114, "learning_rate": 0.0001, "loss": 1.6328, "step": 3441 }, { "epoch": 0.39538222962494973, "grad_norm": 0.4317042827606201, "learning_rate": 0.0001, "loss": 1.9773, "step": 3442 }, { "epoch": 0.39549709953477685, "grad_norm": 0.4095926880836487, "learning_rate": 0.0001, "loss": 1.6328, "step": 3443 }, { "epoch": 0.395611969444604, "grad_norm": 0.4393211007118225, "learning_rate": 0.0001, "loss": 1.8934, "step": 3444 }, { "epoch": 0.3957268393544311, "grad_norm": 0.3967788517475128, "learning_rate": 0.0001, "loss": 1.6129, "step": 3445 }, { "epoch": 0.3958417092642582, "grad_norm": 0.3683384656906128, "learning_rate": 0.0001, "loss": 1.6212, "step": 3446 }, { "epoch": 0.39595657917408533, "grad_norm": 0.37010353803634644, "learning_rate": 0.0001, "loss": 1.6381, "step": 3447 }, { "epoch": 0.39607144908391245, "grad_norm": 0.4637109637260437, "learning_rate": 0.0001, "loss": 1.7736, "step": 3448 }, { "epoch": 0.3961863189937396, "grad_norm": 0.3716694116592407, "learning_rate": 0.0001, "loss": 1.5737, "step": 3449 }, { "epoch": 0.3963011889035667, "grad_norm": 0.3940329849720001, "learning_rate": 0.0001, "loss": 1.8883, "step": 3450 }, { "epoch": 0.3964160588133938, "grad_norm": 0.3837946653366089, "learning_rate": 0.0001, "loss": 1.5734, "step": 3451 }, { "epoch": 0.39653092872322093, "grad_norm": 0.4419655203819275, "learning_rate": 0.0001, "loss": 1.7845, "step": 3452 }, { "epoch": 0.39664579863304805, "grad_norm": 0.39042928814888, "learning_rate": 0.0001, "loss": 1.6394, "step": 3453 }, { "epoch": 0.3967606685428752, "grad_norm": 0.43041858077049255, "learning_rate": 0.0001, "loss": 1.4786, "step": 3454 }, { "epoch": 0.3968755384527023, "grad_norm": 0.36053743958473206, "learning_rate": 0.0001, "loss": 1.4182, "step": 3455 }, { "epoch": 0.3969904083625294, "grad_norm": 0.3963378965854645, "learning_rate": 0.0001, "loss": 1.8062, "step": 3456 }, { "epoch": 0.39710527827235653, "grad_norm": 0.391775906085968, "learning_rate": 0.0001, "loss": 1.6042, "step": 3457 }, { "epoch": 0.39722014818218365, "grad_norm": 0.39680591225624084, "learning_rate": 0.0001, "loss": 1.7523, "step": 3458 }, { "epoch": 0.39733501809201077, "grad_norm": 0.4133915901184082, "learning_rate": 0.0001, "loss": 1.6781, "step": 3459 }, { "epoch": 0.3974498880018379, "grad_norm": 0.41504111886024475, "learning_rate": 0.0001, "loss": 1.5819, "step": 3460 }, { "epoch": 0.397564757911665, "grad_norm": 0.39782634377479553, "learning_rate": 0.0001, "loss": 1.5943, "step": 3461 }, { "epoch": 0.3976796278214922, "grad_norm": 0.4147406220436096, "learning_rate": 0.0001, "loss": 1.6262, "step": 3462 }, { "epoch": 0.3977944977313193, "grad_norm": 0.3900047838687897, "learning_rate": 0.0001, "loss": 1.5578, "step": 3463 }, { "epoch": 0.3979093676411464, "grad_norm": 0.3884030282497406, "learning_rate": 0.0001, "loss": 1.695, "step": 3464 }, { "epoch": 0.39802423755097355, "grad_norm": 0.422282874584198, "learning_rate": 0.0001, "loss": 1.6552, "step": 3465 }, { "epoch": 0.39813910746080067, "grad_norm": 0.39325493574142456, "learning_rate": 0.0001, "loss": 1.6323, "step": 3466 }, { "epoch": 0.3982539773706278, "grad_norm": 0.3940114974975586, "learning_rate": 0.0001, "loss": 1.6235, "step": 3467 }, { "epoch": 0.3983688472804549, "grad_norm": 0.4041246175765991, "learning_rate": 0.0001, "loss": 1.6107, "step": 3468 }, { "epoch": 0.398483717190282, "grad_norm": 0.37534579634666443, "learning_rate": 0.0001, "loss": 1.737, "step": 3469 }, { "epoch": 0.39859858710010915, "grad_norm": 0.40846285223960876, "learning_rate": 0.0001, "loss": 1.7085, "step": 3470 }, { "epoch": 0.39871345700993627, "grad_norm": 0.43047085404396057, "learning_rate": 0.0001, "loss": 1.7536, "step": 3471 }, { "epoch": 0.3988283269197634, "grad_norm": 0.3942980170249939, "learning_rate": 0.0001, "loss": 1.7882, "step": 3472 }, { "epoch": 0.3989431968295905, "grad_norm": 0.408866822719574, "learning_rate": 0.0001, "loss": 1.6989, "step": 3473 }, { "epoch": 0.3990580667394176, "grad_norm": 0.3862482011318207, "learning_rate": 0.0001, "loss": 1.6176, "step": 3474 }, { "epoch": 0.39917293664924475, "grad_norm": 0.3940208852291107, "learning_rate": 0.0001, "loss": 1.6806, "step": 3475 }, { "epoch": 0.39928780655907187, "grad_norm": 0.42412546277046204, "learning_rate": 0.0001, "loss": 1.6059, "step": 3476 }, { "epoch": 0.399402676468899, "grad_norm": 0.4334600567817688, "learning_rate": 0.0001, "loss": 1.5058, "step": 3477 }, { "epoch": 0.3995175463787261, "grad_norm": 0.3741202652454376, "learning_rate": 0.0001, "loss": 1.5907, "step": 3478 }, { "epoch": 0.3996324162885532, "grad_norm": 0.37549999356269836, "learning_rate": 0.0001, "loss": 1.4891, "step": 3479 }, { "epoch": 0.39974728619838035, "grad_norm": 0.41318750381469727, "learning_rate": 0.0001, "loss": 1.7634, "step": 3480 }, { "epoch": 0.39986215610820747, "grad_norm": 0.39875760674476624, "learning_rate": 0.0001, "loss": 1.74, "step": 3481 }, { "epoch": 0.3999770260180346, "grad_norm": 0.4000686705112457, "learning_rate": 0.0001, "loss": 1.5764, "step": 3482 }, { "epoch": 0.4000918959278617, "grad_norm": 0.4229472577571869, "learning_rate": 0.0001, "loss": 1.5092, "step": 3483 }, { "epoch": 0.4002067658376888, "grad_norm": 0.4480687975883484, "learning_rate": 0.0001, "loss": 1.8381, "step": 3484 }, { "epoch": 0.40032163574751595, "grad_norm": 0.3803386688232422, "learning_rate": 0.0001, "loss": 1.6061, "step": 3485 }, { "epoch": 0.40043650565734307, "grad_norm": 0.40477797389030457, "learning_rate": 0.0001, "loss": 1.7031, "step": 3486 }, { "epoch": 0.4005513755671702, "grad_norm": 0.4002171456813812, "learning_rate": 0.0001, "loss": 1.7292, "step": 3487 }, { "epoch": 0.4006662454769973, "grad_norm": 0.4127141833305359, "learning_rate": 0.0001, "loss": 1.8455, "step": 3488 }, { "epoch": 0.4007811153868244, "grad_norm": 0.4116250276565552, "learning_rate": 0.0001, "loss": 1.5637, "step": 3489 }, { "epoch": 0.40089598529665155, "grad_norm": 0.3903186619281769, "learning_rate": 0.0001, "loss": 1.567, "step": 3490 }, { "epoch": 0.40101085520647867, "grad_norm": 0.3886655867099762, "learning_rate": 0.0001, "loss": 1.512, "step": 3491 }, { "epoch": 0.4011257251163058, "grad_norm": 0.40146976709365845, "learning_rate": 0.0001, "loss": 1.6052, "step": 3492 }, { "epoch": 0.4012405950261329, "grad_norm": 0.4304850697517395, "learning_rate": 0.0001, "loss": 1.6255, "step": 3493 }, { "epoch": 0.40135546493596, "grad_norm": 0.41273269057273865, "learning_rate": 0.0001, "loss": 1.566, "step": 3494 }, { "epoch": 0.40147033484578715, "grad_norm": 0.42954984307289124, "learning_rate": 0.0001, "loss": 1.7387, "step": 3495 }, { "epoch": 0.40158520475561427, "grad_norm": 0.3892943859100342, "learning_rate": 0.0001, "loss": 1.6935, "step": 3496 }, { "epoch": 0.4017000746654414, "grad_norm": 0.43084821105003357, "learning_rate": 0.0001, "loss": 1.8832, "step": 3497 }, { "epoch": 0.4018149445752685, "grad_norm": 0.3853071331977844, "learning_rate": 0.0001, "loss": 1.4343, "step": 3498 }, { "epoch": 0.4019298144850956, "grad_norm": 0.3769509196281433, "learning_rate": 0.0001, "loss": 1.5531, "step": 3499 }, { "epoch": 0.40204468439492275, "grad_norm": 0.41047728061676025, "learning_rate": 0.0001, "loss": 1.6463, "step": 3500 }, { "epoch": 0.40215955430474987, "grad_norm": 0.3837498128414154, "learning_rate": 0.0001, "loss": 1.6573, "step": 3501 }, { "epoch": 0.402274424214577, "grad_norm": 0.4407394826412201, "learning_rate": 0.0001, "loss": 1.6908, "step": 3502 }, { "epoch": 0.4023892941244041, "grad_norm": 0.3970791697502136, "learning_rate": 0.0001, "loss": 1.6581, "step": 3503 }, { "epoch": 0.4025041640342312, "grad_norm": 0.402908593416214, "learning_rate": 0.0001, "loss": 1.7421, "step": 3504 }, { "epoch": 0.40261903394405835, "grad_norm": 0.3898850977420807, "learning_rate": 0.0001, "loss": 1.5148, "step": 3505 }, { "epoch": 0.40273390385388547, "grad_norm": 0.4609782099723816, "learning_rate": 0.0001, "loss": 1.9721, "step": 3506 }, { "epoch": 0.4028487737637126, "grad_norm": 0.4256317615509033, "learning_rate": 0.0001, "loss": 1.8592, "step": 3507 }, { "epoch": 0.4029636436735397, "grad_norm": 0.38240641355514526, "learning_rate": 0.0001, "loss": 1.5619, "step": 3508 }, { "epoch": 0.4030785135833668, "grad_norm": 0.40074291825294495, "learning_rate": 0.0001, "loss": 1.6603, "step": 3509 }, { "epoch": 0.40319338349319395, "grad_norm": 0.38915419578552246, "learning_rate": 0.0001, "loss": 1.4588, "step": 3510 }, { "epoch": 0.40330825340302107, "grad_norm": 0.39490392804145813, "learning_rate": 0.0001, "loss": 1.7036, "step": 3511 }, { "epoch": 0.4034231233128482, "grad_norm": 0.37011227011680603, "learning_rate": 0.0001, "loss": 1.6374, "step": 3512 }, { "epoch": 0.4035379932226753, "grad_norm": 0.41320136189460754, "learning_rate": 0.0001, "loss": 1.6905, "step": 3513 }, { "epoch": 0.4036528631325024, "grad_norm": 0.43924593925476074, "learning_rate": 0.0001, "loss": 1.7871, "step": 3514 }, { "epoch": 0.40376773304232955, "grad_norm": 0.3919188380241394, "learning_rate": 0.0001, "loss": 1.5806, "step": 3515 }, { "epoch": 0.40388260295215667, "grad_norm": 0.40619519352912903, "learning_rate": 0.0001, "loss": 1.5834, "step": 3516 }, { "epoch": 0.4039974728619838, "grad_norm": 0.4137546420097351, "learning_rate": 0.0001, "loss": 1.7776, "step": 3517 }, { "epoch": 0.4041123427718109, "grad_norm": 0.4008006453514099, "learning_rate": 0.0001, "loss": 1.5994, "step": 3518 }, { "epoch": 0.404227212681638, "grad_norm": 0.43822675943374634, "learning_rate": 0.0001, "loss": 1.8259, "step": 3519 }, { "epoch": 0.40434208259146515, "grad_norm": 0.41821324825286865, "learning_rate": 0.0001, "loss": 1.7496, "step": 3520 }, { "epoch": 0.40445695250129227, "grad_norm": 0.45367196202278137, "learning_rate": 0.0001, "loss": 1.9165, "step": 3521 }, { "epoch": 0.4045718224111194, "grad_norm": 0.38345637917518616, "learning_rate": 0.0001, "loss": 1.7299, "step": 3522 }, { "epoch": 0.4046866923209465, "grad_norm": 0.3878767490386963, "learning_rate": 0.0001, "loss": 1.683, "step": 3523 }, { "epoch": 0.4048015622307736, "grad_norm": 0.43922069668769836, "learning_rate": 0.0001, "loss": 1.8087, "step": 3524 }, { "epoch": 0.40491643214060075, "grad_norm": 0.38794466853141785, "learning_rate": 0.0001, "loss": 1.6888, "step": 3525 }, { "epoch": 0.40503130205042787, "grad_norm": 0.39647185802459717, "learning_rate": 0.0001, "loss": 1.7157, "step": 3526 }, { "epoch": 0.405146171960255, "grad_norm": 0.38950473070144653, "learning_rate": 0.0001, "loss": 1.7733, "step": 3527 }, { "epoch": 0.4052610418700821, "grad_norm": 0.43412071466445923, "learning_rate": 0.0001, "loss": 1.8643, "step": 3528 }, { "epoch": 0.4053759117799092, "grad_norm": 0.4561465382575989, "learning_rate": 0.0001, "loss": 1.9004, "step": 3529 }, { "epoch": 0.40549078168973635, "grad_norm": 0.3911570906639099, "learning_rate": 0.0001, "loss": 1.7185, "step": 3530 }, { "epoch": 0.4056056515995635, "grad_norm": 0.3808756172657013, "learning_rate": 0.0001, "loss": 1.6488, "step": 3531 }, { "epoch": 0.40572052150939064, "grad_norm": 0.41304758191108704, "learning_rate": 0.0001, "loss": 1.704, "step": 3532 }, { "epoch": 0.40583539141921776, "grad_norm": 0.3868066072463989, "learning_rate": 0.0001, "loss": 1.6564, "step": 3533 }, { "epoch": 0.4059502613290449, "grad_norm": 0.47195518016815186, "learning_rate": 0.0001, "loss": 1.85, "step": 3534 }, { "epoch": 0.406065131238872, "grad_norm": 0.40682026743888855, "learning_rate": 0.0001, "loss": 1.7334, "step": 3535 }, { "epoch": 0.4061800011486991, "grad_norm": 0.4269334375858307, "learning_rate": 0.0001, "loss": 1.743, "step": 3536 }, { "epoch": 0.40629487105852624, "grad_norm": 0.40638071298599243, "learning_rate": 0.0001, "loss": 1.734, "step": 3537 }, { "epoch": 0.40640974096835336, "grad_norm": 0.3894198536872864, "learning_rate": 0.0001, "loss": 1.6416, "step": 3538 }, { "epoch": 0.4065246108781805, "grad_norm": 0.4187344014644623, "learning_rate": 0.0001, "loss": 1.647, "step": 3539 }, { "epoch": 0.4066394807880076, "grad_norm": 0.3857315182685852, "learning_rate": 0.0001, "loss": 1.5714, "step": 3540 }, { "epoch": 0.4067543506978347, "grad_norm": 0.3764125108718872, "learning_rate": 0.0001, "loss": 1.6134, "step": 3541 }, { "epoch": 0.40686922060766184, "grad_norm": 0.38731610774993896, "learning_rate": 0.0001, "loss": 1.551, "step": 3542 }, { "epoch": 0.40698409051748896, "grad_norm": 0.38628220558166504, "learning_rate": 0.0001, "loss": 1.599, "step": 3543 }, { "epoch": 0.4070989604273161, "grad_norm": 0.39660200476646423, "learning_rate": 0.0001, "loss": 1.6402, "step": 3544 }, { "epoch": 0.4072138303371432, "grad_norm": 0.4349413812160492, "learning_rate": 0.0001, "loss": 1.9247, "step": 3545 }, { "epoch": 0.4073287002469703, "grad_norm": 0.41799604892730713, "learning_rate": 0.0001, "loss": 1.7725, "step": 3546 }, { "epoch": 0.40744357015679744, "grad_norm": 0.4117201566696167, "learning_rate": 0.0001, "loss": 1.6207, "step": 3547 }, { "epoch": 0.40755844006662456, "grad_norm": 0.3982154130935669, "learning_rate": 0.0001, "loss": 1.652, "step": 3548 }, { "epoch": 0.4076733099764517, "grad_norm": 0.386783242225647, "learning_rate": 0.0001, "loss": 1.4822, "step": 3549 }, { "epoch": 0.4077881798862788, "grad_norm": 0.3949221670627594, "learning_rate": 0.0001, "loss": 1.5991, "step": 3550 }, { "epoch": 0.4079030497961059, "grad_norm": 0.38319775462150574, "learning_rate": 0.0001, "loss": 1.6634, "step": 3551 }, { "epoch": 0.40801791970593304, "grad_norm": 0.4165116250514984, "learning_rate": 0.0001, "loss": 1.7474, "step": 3552 }, { "epoch": 0.40813278961576016, "grad_norm": 0.4256540536880493, "learning_rate": 0.0001, "loss": 1.8328, "step": 3553 }, { "epoch": 0.4082476595255873, "grad_norm": 0.4176916182041168, "learning_rate": 0.0001, "loss": 1.7, "step": 3554 }, { "epoch": 0.4083625294354144, "grad_norm": 0.40990862250328064, "learning_rate": 0.0001, "loss": 1.3584, "step": 3555 }, { "epoch": 0.4084773993452415, "grad_norm": 0.39748623967170715, "learning_rate": 0.0001, "loss": 1.5139, "step": 3556 }, { "epoch": 0.40859226925506864, "grad_norm": 0.3950015604496002, "learning_rate": 0.0001, "loss": 1.5983, "step": 3557 }, { "epoch": 0.40870713916489576, "grad_norm": 0.3774421215057373, "learning_rate": 0.0001, "loss": 1.4646, "step": 3558 }, { "epoch": 0.4088220090747229, "grad_norm": 0.3629385530948639, "learning_rate": 0.0001, "loss": 1.5852, "step": 3559 }, { "epoch": 0.40893687898455, "grad_norm": 0.3893386125564575, "learning_rate": 0.0001, "loss": 1.6006, "step": 3560 }, { "epoch": 0.4090517488943771, "grad_norm": 0.3888409435749054, "learning_rate": 0.0001, "loss": 1.65, "step": 3561 }, { "epoch": 0.40916661880420424, "grad_norm": 0.415077805519104, "learning_rate": 0.0001, "loss": 1.6517, "step": 3562 }, { "epoch": 0.40928148871403136, "grad_norm": 0.420685350894928, "learning_rate": 0.0001, "loss": 1.7946, "step": 3563 }, { "epoch": 0.4093963586238585, "grad_norm": 0.4362063407897949, "learning_rate": 0.0001, "loss": 1.7735, "step": 3564 }, { "epoch": 0.4095112285336856, "grad_norm": 0.42826390266418457, "learning_rate": 0.0001, "loss": 1.8189, "step": 3565 }, { "epoch": 0.4096260984435127, "grad_norm": 0.3822219669818878, "learning_rate": 0.0001, "loss": 1.6092, "step": 3566 }, { "epoch": 0.40974096835333984, "grad_norm": 0.4273683428764343, "learning_rate": 0.0001, "loss": 1.8192, "step": 3567 }, { "epoch": 0.40985583826316696, "grad_norm": 0.401048481464386, "learning_rate": 0.0001, "loss": 1.5636, "step": 3568 }, { "epoch": 0.4099707081729941, "grad_norm": 0.4532688558101654, "learning_rate": 0.0001, "loss": 1.6273, "step": 3569 }, { "epoch": 0.4100855780828212, "grad_norm": 0.40526339411735535, "learning_rate": 0.0001, "loss": 1.8208, "step": 3570 }, { "epoch": 0.4102004479926483, "grad_norm": 0.3863803446292877, "learning_rate": 0.0001, "loss": 1.6683, "step": 3571 }, { "epoch": 0.41031531790247544, "grad_norm": 0.38212668895721436, "learning_rate": 0.0001, "loss": 1.6424, "step": 3572 }, { "epoch": 0.41043018781230256, "grad_norm": 0.38503432273864746, "learning_rate": 0.0001, "loss": 1.5401, "step": 3573 }, { "epoch": 0.4105450577221297, "grad_norm": 0.4347050189971924, "learning_rate": 0.0001, "loss": 1.7337, "step": 3574 }, { "epoch": 0.4106599276319568, "grad_norm": 0.4009620249271393, "learning_rate": 0.0001, "loss": 1.4972, "step": 3575 }, { "epoch": 0.4107747975417839, "grad_norm": 0.40209561586380005, "learning_rate": 0.0001, "loss": 1.716, "step": 3576 }, { "epoch": 0.41088966745161104, "grad_norm": 0.42424091696739197, "learning_rate": 0.0001, "loss": 1.6551, "step": 3577 }, { "epoch": 0.41100453736143816, "grad_norm": 0.405749648809433, "learning_rate": 0.0001, "loss": 1.3783, "step": 3578 }, { "epoch": 0.4111194072712653, "grad_norm": 0.4051080346107483, "learning_rate": 0.0001, "loss": 1.7914, "step": 3579 }, { "epoch": 0.4112342771810924, "grad_norm": 0.3940957188606262, "learning_rate": 0.0001, "loss": 1.4819, "step": 3580 }, { "epoch": 0.4113491470909195, "grad_norm": 0.4148561358451843, "learning_rate": 0.0001, "loss": 1.815, "step": 3581 }, { "epoch": 0.41146401700074664, "grad_norm": 0.37967970967292786, "learning_rate": 0.0001, "loss": 1.5627, "step": 3582 }, { "epoch": 0.41157888691057376, "grad_norm": 0.4085034728050232, "learning_rate": 0.0001, "loss": 1.7586, "step": 3583 }, { "epoch": 0.4116937568204009, "grad_norm": 0.43780767917633057, "learning_rate": 0.0001, "loss": 1.6554, "step": 3584 }, { "epoch": 0.411808626730228, "grad_norm": 0.4053542912006378, "learning_rate": 0.0001, "loss": 1.627, "step": 3585 }, { "epoch": 0.4119234966400551, "grad_norm": 0.42248353362083435, "learning_rate": 0.0001, "loss": 1.7921, "step": 3586 }, { "epoch": 0.41203836654988224, "grad_norm": 0.38272780179977417, "learning_rate": 0.0001, "loss": 1.6164, "step": 3587 }, { "epoch": 0.41215323645970936, "grad_norm": 0.3969421088695526, "learning_rate": 0.0001, "loss": 1.7657, "step": 3588 }, { "epoch": 0.4122681063695365, "grad_norm": 0.3919724225997925, "learning_rate": 0.0001, "loss": 1.5506, "step": 3589 }, { "epoch": 0.4123829762793636, "grad_norm": 0.37912601232528687, "learning_rate": 0.0001, "loss": 1.6657, "step": 3590 }, { "epoch": 0.4124978461891907, "grad_norm": 0.4223651885986328, "learning_rate": 0.0001, "loss": 1.7099, "step": 3591 }, { "epoch": 0.41261271609901784, "grad_norm": 0.39578935503959656, "learning_rate": 0.0001, "loss": 1.5249, "step": 3592 }, { "epoch": 0.41272758600884496, "grad_norm": 0.40267515182495117, "learning_rate": 0.0001, "loss": 1.5518, "step": 3593 }, { "epoch": 0.4128424559186721, "grad_norm": 0.4238963723182678, "learning_rate": 0.0001, "loss": 1.9176, "step": 3594 }, { "epoch": 0.4129573258284992, "grad_norm": 0.43114468455314636, "learning_rate": 0.0001, "loss": 1.7819, "step": 3595 }, { "epoch": 0.4130721957383263, "grad_norm": 0.36643150448799133, "learning_rate": 0.0001, "loss": 1.5617, "step": 3596 }, { "epoch": 0.41318706564815344, "grad_norm": 0.37914469838142395, "learning_rate": 0.0001, "loss": 1.6791, "step": 3597 }, { "epoch": 0.41330193555798056, "grad_norm": 0.4015408158302307, "learning_rate": 0.0001, "loss": 1.6765, "step": 3598 }, { "epoch": 0.41341680546780774, "grad_norm": 0.40977799892425537, "learning_rate": 0.0001, "loss": 1.445, "step": 3599 }, { "epoch": 0.41353167537763486, "grad_norm": 0.4055366814136505, "learning_rate": 0.0001, "loss": 1.5245, "step": 3600 }, { "epoch": 0.413646545287462, "grad_norm": 0.4310015141963959, "learning_rate": 0.0001, "loss": 1.6015, "step": 3601 }, { "epoch": 0.4137614151972891, "grad_norm": 0.4342171847820282, "learning_rate": 0.0001, "loss": 1.7804, "step": 3602 }, { "epoch": 0.4138762851071162, "grad_norm": 0.3718089759349823, "learning_rate": 0.0001, "loss": 1.5651, "step": 3603 }, { "epoch": 0.41399115501694334, "grad_norm": 0.38151004910469055, "learning_rate": 0.0001, "loss": 1.6581, "step": 3604 }, { "epoch": 0.41410602492677046, "grad_norm": 0.4037642776966095, "learning_rate": 0.0001, "loss": 1.5375, "step": 3605 }, { "epoch": 0.4142208948365976, "grad_norm": 0.40361806750297546, "learning_rate": 0.0001, "loss": 1.5097, "step": 3606 }, { "epoch": 0.4143357647464247, "grad_norm": 0.4126417934894562, "learning_rate": 0.0001, "loss": 1.703, "step": 3607 }, { "epoch": 0.4144506346562518, "grad_norm": 0.409773051738739, "learning_rate": 0.0001, "loss": 1.6464, "step": 3608 }, { "epoch": 0.41456550456607894, "grad_norm": 0.44128933548927307, "learning_rate": 0.0001, "loss": 1.7266, "step": 3609 }, { "epoch": 0.41468037447590606, "grad_norm": 0.3855801820755005, "learning_rate": 0.0001, "loss": 1.5346, "step": 3610 }, { "epoch": 0.4147952443857332, "grad_norm": 0.3980732262134552, "learning_rate": 0.0001, "loss": 1.7958, "step": 3611 }, { "epoch": 0.4149101142955603, "grad_norm": 0.4035704433917999, "learning_rate": 0.0001, "loss": 1.5173, "step": 3612 }, { "epoch": 0.4150249842053874, "grad_norm": 0.3865931034088135, "learning_rate": 0.0001, "loss": 1.778, "step": 3613 }, { "epoch": 0.41513985411521453, "grad_norm": 0.4053201377391815, "learning_rate": 0.0001, "loss": 1.7013, "step": 3614 }, { "epoch": 0.41525472402504165, "grad_norm": 0.3935595452785492, "learning_rate": 0.0001, "loss": 1.6111, "step": 3615 }, { "epoch": 0.4153695939348688, "grad_norm": 0.3803655803203583, "learning_rate": 0.0001, "loss": 1.5889, "step": 3616 }, { "epoch": 0.4154844638446959, "grad_norm": 0.4078107476234436, "learning_rate": 0.0001, "loss": 1.6897, "step": 3617 }, { "epoch": 0.415599333754523, "grad_norm": 0.40983328223228455, "learning_rate": 0.0001, "loss": 1.8803, "step": 3618 }, { "epoch": 0.41571420366435013, "grad_norm": 0.3894909918308258, "learning_rate": 0.0001, "loss": 1.6771, "step": 3619 }, { "epoch": 0.41582907357417725, "grad_norm": 0.41159242391586304, "learning_rate": 0.0001, "loss": 1.7309, "step": 3620 }, { "epoch": 0.4159439434840044, "grad_norm": 0.4065288305282593, "learning_rate": 0.0001, "loss": 1.7908, "step": 3621 }, { "epoch": 0.4160588133938315, "grad_norm": 0.4101758599281311, "learning_rate": 0.0001, "loss": 1.7292, "step": 3622 }, { "epoch": 0.4161736833036586, "grad_norm": 0.41453102231025696, "learning_rate": 0.0001, "loss": 1.8015, "step": 3623 }, { "epoch": 0.41628855321348573, "grad_norm": 0.39582541584968567, "learning_rate": 0.0001, "loss": 1.7353, "step": 3624 }, { "epoch": 0.41640342312331285, "grad_norm": 0.4312124252319336, "learning_rate": 0.0001, "loss": 1.7743, "step": 3625 }, { "epoch": 0.41651829303314, "grad_norm": 0.40501224994659424, "learning_rate": 0.0001, "loss": 1.8335, "step": 3626 }, { "epoch": 0.4166331629429671, "grad_norm": 0.39315342903137207, "learning_rate": 0.0001, "loss": 1.5686, "step": 3627 }, { "epoch": 0.4167480328527942, "grad_norm": 0.4335770606994629, "learning_rate": 0.0001, "loss": 1.6204, "step": 3628 }, { "epoch": 0.41686290276262133, "grad_norm": 0.3941115736961365, "learning_rate": 0.0001, "loss": 1.6904, "step": 3629 }, { "epoch": 0.41697777267244845, "grad_norm": 0.40810754895210266, "learning_rate": 0.0001, "loss": 1.6955, "step": 3630 }, { "epoch": 0.4170926425822756, "grad_norm": 0.3764353394508362, "learning_rate": 0.0001, "loss": 1.496, "step": 3631 }, { "epoch": 0.4172075124921027, "grad_norm": 0.41539716720581055, "learning_rate": 0.0001, "loss": 1.638, "step": 3632 }, { "epoch": 0.4173223824019298, "grad_norm": 0.3807416260242462, "learning_rate": 0.0001, "loss": 1.6627, "step": 3633 }, { "epoch": 0.41743725231175693, "grad_norm": 0.38666248321533203, "learning_rate": 0.0001, "loss": 1.5018, "step": 3634 }, { "epoch": 0.41755212222158405, "grad_norm": 0.40492692589759827, "learning_rate": 0.0001, "loss": 1.5823, "step": 3635 }, { "epoch": 0.4176669921314112, "grad_norm": 0.38428235054016113, "learning_rate": 0.0001, "loss": 1.5198, "step": 3636 }, { "epoch": 0.4177818620412383, "grad_norm": 0.3912535607814789, "learning_rate": 0.0001, "loss": 1.6182, "step": 3637 }, { "epoch": 0.4178967319510654, "grad_norm": 0.3759515881538391, "learning_rate": 0.0001, "loss": 1.5587, "step": 3638 }, { "epoch": 0.41801160186089253, "grad_norm": 0.38995474576950073, "learning_rate": 0.0001, "loss": 1.6871, "step": 3639 }, { "epoch": 0.41812647177071965, "grad_norm": 0.38808655738830566, "learning_rate": 0.0001, "loss": 1.3153, "step": 3640 }, { "epoch": 0.4182413416805468, "grad_norm": 0.3956000804901123, "learning_rate": 0.0001, "loss": 1.575, "step": 3641 }, { "epoch": 0.4183562115903739, "grad_norm": 0.43580135703086853, "learning_rate": 0.0001, "loss": 1.6672, "step": 3642 }, { "epoch": 0.418471081500201, "grad_norm": 0.4119028151035309, "learning_rate": 0.0001, "loss": 1.7918, "step": 3643 }, { "epoch": 0.41858595141002813, "grad_norm": 0.44993263483047485, "learning_rate": 0.0001, "loss": 1.88, "step": 3644 }, { "epoch": 0.41870082131985525, "grad_norm": 0.4024428129196167, "learning_rate": 0.0001, "loss": 1.5243, "step": 3645 }, { "epoch": 0.4188156912296824, "grad_norm": 0.4061081111431122, "learning_rate": 0.0001, "loss": 1.7381, "step": 3646 }, { "epoch": 0.4189305611395095, "grad_norm": 0.3835539221763611, "learning_rate": 0.0001, "loss": 1.4161, "step": 3647 }, { "epoch": 0.4190454310493366, "grad_norm": 0.41590166091918945, "learning_rate": 0.0001, "loss": 1.7132, "step": 3648 }, { "epoch": 0.41916030095916373, "grad_norm": 0.40571367740631104, "learning_rate": 0.0001, "loss": 1.5744, "step": 3649 }, { "epoch": 0.41927517086899085, "grad_norm": 0.43440738320350647, "learning_rate": 0.0001, "loss": 1.7795, "step": 3650 }, { "epoch": 0.419390040778818, "grad_norm": 0.40848132967948914, "learning_rate": 0.0001, "loss": 1.5865, "step": 3651 }, { "epoch": 0.4195049106886451, "grad_norm": 0.3971754014492035, "learning_rate": 0.0001, "loss": 1.6035, "step": 3652 }, { "epoch": 0.4196197805984722, "grad_norm": 0.3942772448062897, "learning_rate": 0.0001, "loss": 1.59, "step": 3653 }, { "epoch": 0.41973465050829933, "grad_norm": 0.4283560812473297, "learning_rate": 0.0001, "loss": 1.7446, "step": 3654 }, { "epoch": 0.41984952041812645, "grad_norm": 0.39290323853492737, "learning_rate": 0.0001, "loss": 1.6255, "step": 3655 }, { "epoch": 0.4199643903279536, "grad_norm": 0.3974771797657013, "learning_rate": 0.0001, "loss": 1.5625, "step": 3656 }, { "epoch": 0.4200792602377807, "grad_norm": 0.40750113129615784, "learning_rate": 0.0001, "loss": 1.8154, "step": 3657 }, { "epoch": 0.4201941301476078, "grad_norm": 0.4657525420188904, "learning_rate": 0.0001, "loss": 1.8862, "step": 3658 }, { "epoch": 0.42030900005743493, "grad_norm": 0.40977463126182556, "learning_rate": 0.0001, "loss": 1.7574, "step": 3659 }, { "epoch": 0.42042386996726205, "grad_norm": 0.4081574082374573, "learning_rate": 0.0001, "loss": 1.7681, "step": 3660 }, { "epoch": 0.4205387398770892, "grad_norm": 0.46353742480278015, "learning_rate": 0.0001, "loss": 1.9854, "step": 3661 }, { "epoch": 0.4206536097869163, "grad_norm": 0.3851860463619232, "learning_rate": 0.0001, "loss": 1.5924, "step": 3662 }, { "epoch": 0.4207684796967434, "grad_norm": 0.38905036449432373, "learning_rate": 0.0001, "loss": 1.5134, "step": 3663 }, { "epoch": 0.42088334960657053, "grad_norm": 0.3887808620929718, "learning_rate": 0.0001, "loss": 1.4947, "step": 3664 }, { "epoch": 0.42099821951639765, "grad_norm": 0.39309823513031006, "learning_rate": 0.0001, "loss": 1.7099, "step": 3665 }, { "epoch": 0.4211130894262248, "grad_norm": 0.389574259519577, "learning_rate": 0.0001, "loss": 1.7133, "step": 3666 }, { "epoch": 0.4212279593360519, "grad_norm": 0.3817690312862396, "learning_rate": 0.0001, "loss": 1.4487, "step": 3667 }, { "epoch": 0.42134282924587907, "grad_norm": 0.41336965560913086, "learning_rate": 0.0001, "loss": 1.7784, "step": 3668 }, { "epoch": 0.4214576991557062, "grad_norm": 0.42329856753349304, "learning_rate": 0.0001, "loss": 1.7214, "step": 3669 }, { "epoch": 0.4215725690655333, "grad_norm": 0.3827388882637024, "learning_rate": 0.0001, "loss": 1.5998, "step": 3670 }, { "epoch": 0.42168743897536043, "grad_norm": 0.37781426310539246, "learning_rate": 0.0001, "loss": 1.5439, "step": 3671 }, { "epoch": 0.42180230888518755, "grad_norm": 0.40145260095596313, "learning_rate": 0.0001, "loss": 1.6156, "step": 3672 }, { "epoch": 0.42191717879501467, "grad_norm": 0.37764087319374084, "learning_rate": 0.0001, "loss": 1.5918, "step": 3673 }, { "epoch": 0.4220320487048418, "grad_norm": 0.40427038073539734, "learning_rate": 0.0001, "loss": 1.8588, "step": 3674 }, { "epoch": 0.4221469186146689, "grad_norm": 0.3810051679611206, "learning_rate": 0.0001, "loss": 1.6272, "step": 3675 }, { "epoch": 0.42226178852449603, "grad_norm": 0.4347747266292572, "learning_rate": 0.0001, "loss": 1.81, "step": 3676 }, { "epoch": 0.42237665843432315, "grad_norm": 0.4019508361816406, "learning_rate": 0.0001, "loss": 1.6382, "step": 3677 }, { "epoch": 0.42249152834415027, "grad_norm": 0.40587884187698364, "learning_rate": 0.0001, "loss": 1.665, "step": 3678 }, { "epoch": 0.4226063982539774, "grad_norm": 0.3974643051624298, "learning_rate": 0.0001, "loss": 1.646, "step": 3679 }, { "epoch": 0.4227212681638045, "grad_norm": 0.443687379360199, "learning_rate": 0.0001, "loss": 1.9096, "step": 3680 }, { "epoch": 0.42283613807363163, "grad_norm": 0.4022139608860016, "learning_rate": 0.0001, "loss": 1.8164, "step": 3681 }, { "epoch": 0.42295100798345875, "grad_norm": 0.46298283338546753, "learning_rate": 0.0001, "loss": 1.9288, "step": 3682 }, { "epoch": 0.42306587789328587, "grad_norm": 0.3819207549095154, "learning_rate": 0.0001, "loss": 1.5375, "step": 3683 }, { "epoch": 0.423180747803113, "grad_norm": 0.39071542024612427, "learning_rate": 0.0001, "loss": 1.4206, "step": 3684 }, { "epoch": 0.4232956177129401, "grad_norm": 0.39119791984558105, "learning_rate": 0.0001, "loss": 1.5128, "step": 3685 }, { "epoch": 0.42341048762276723, "grad_norm": 0.41179823875427246, "learning_rate": 0.0001, "loss": 1.6371, "step": 3686 }, { "epoch": 0.42352535753259435, "grad_norm": 0.40467724204063416, "learning_rate": 0.0001, "loss": 1.6625, "step": 3687 }, { "epoch": 0.42364022744242147, "grad_norm": 0.4044126570224762, "learning_rate": 0.0001, "loss": 1.7322, "step": 3688 }, { "epoch": 0.4237550973522486, "grad_norm": 0.39904049038887024, "learning_rate": 0.0001, "loss": 1.4788, "step": 3689 }, { "epoch": 0.4238699672620757, "grad_norm": 0.4178762137889862, "learning_rate": 0.0001, "loss": 1.6207, "step": 3690 }, { "epoch": 0.42398483717190283, "grad_norm": 0.4076288640499115, "learning_rate": 0.0001, "loss": 1.6828, "step": 3691 }, { "epoch": 0.42409970708172995, "grad_norm": 0.4018821716308594, "learning_rate": 0.0001, "loss": 1.5477, "step": 3692 }, { "epoch": 0.42421457699155707, "grad_norm": 0.40769195556640625, "learning_rate": 0.0001, "loss": 1.6397, "step": 3693 }, { "epoch": 0.4243294469013842, "grad_norm": 0.38684284687042236, "learning_rate": 0.0001, "loss": 1.6296, "step": 3694 }, { "epoch": 0.4244443168112113, "grad_norm": 0.41944825649261475, "learning_rate": 0.0001, "loss": 1.6284, "step": 3695 }, { "epoch": 0.42455918672103843, "grad_norm": 0.38351449370384216, "learning_rate": 0.0001, "loss": 1.6596, "step": 3696 }, { "epoch": 0.42467405663086555, "grad_norm": 0.429304838180542, "learning_rate": 0.0001, "loss": 1.6653, "step": 3697 }, { "epoch": 0.42478892654069267, "grad_norm": 0.42793336510658264, "learning_rate": 0.0001, "loss": 1.6201, "step": 3698 }, { "epoch": 0.4249037964505198, "grad_norm": 0.41888338327407837, "learning_rate": 0.0001, "loss": 1.7443, "step": 3699 }, { "epoch": 0.4250186663603469, "grad_norm": 0.3938450217247009, "learning_rate": 0.0001, "loss": 1.4583, "step": 3700 }, { "epoch": 0.42513353627017403, "grad_norm": 0.3846457004547119, "learning_rate": 0.0001, "loss": 1.4163, "step": 3701 }, { "epoch": 0.42524840618000115, "grad_norm": 0.37341800332069397, "learning_rate": 0.0001, "loss": 1.5151, "step": 3702 }, { "epoch": 0.42536327608982827, "grad_norm": 0.39832285046577454, "learning_rate": 0.0001, "loss": 1.5883, "step": 3703 }, { "epoch": 0.4254781459996554, "grad_norm": 0.374226838350296, "learning_rate": 0.0001, "loss": 1.6674, "step": 3704 }, { "epoch": 0.4255930159094825, "grad_norm": 0.4002193212509155, "learning_rate": 0.0001, "loss": 1.5852, "step": 3705 }, { "epoch": 0.42570788581930963, "grad_norm": 0.3768205940723419, "learning_rate": 0.0001, "loss": 1.5192, "step": 3706 }, { "epoch": 0.42582275572913675, "grad_norm": 0.3951648771762848, "learning_rate": 0.0001, "loss": 1.6479, "step": 3707 }, { "epoch": 0.42593762563896387, "grad_norm": 0.4215008616447449, "learning_rate": 0.0001, "loss": 1.8617, "step": 3708 }, { "epoch": 0.426052495548791, "grad_norm": 0.4458306133747101, "learning_rate": 0.0001, "loss": 1.7462, "step": 3709 }, { "epoch": 0.4261673654586181, "grad_norm": 0.40523743629455566, "learning_rate": 0.0001, "loss": 1.7303, "step": 3710 }, { "epoch": 0.42628223536844523, "grad_norm": 0.412463515996933, "learning_rate": 0.0001, "loss": 1.6925, "step": 3711 }, { "epoch": 0.42639710527827235, "grad_norm": 0.4172394871711731, "learning_rate": 0.0001, "loss": 1.7788, "step": 3712 }, { "epoch": 0.42651197518809947, "grad_norm": 0.38772034645080566, "learning_rate": 0.0001, "loss": 1.6891, "step": 3713 }, { "epoch": 0.4266268450979266, "grad_norm": 0.43306443095207214, "learning_rate": 0.0001, "loss": 1.7567, "step": 3714 }, { "epoch": 0.4267417150077537, "grad_norm": 0.4059670865535736, "learning_rate": 0.0001, "loss": 1.8066, "step": 3715 }, { "epoch": 0.42685658491758083, "grad_norm": 0.392241507768631, "learning_rate": 0.0001, "loss": 1.6895, "step": 3716 }, { "epoch": 0.42697145482740795, "grad_norm": 0.42141708731651306, "learning_rate": 0.0001, "loss": 1.7378, "step": 3717 }, { "epoch": 0.42708632473723507, "grad_norm": 0.42226916551589966, "learning_rate": 0.0001, "loss": 1.7086, "step": 3718 }, { "epoch": 0.4272011946470622, "grad_norm": 0.4137056767940521, "learning_rate": 0.0001, "loss": 1.5845, "step": 3719 }, { "epoch": 0.4273160645568893, "grad_norm": 0.4272863566875458, "learning_rate": 0.0001, "loss": 1.6079, "step": 3720 }, { "epoch": 0.42743093446671643, "grad_norm": 0.4106510579586029, "learning_rate": 0.0001, "loss": 1.5838, "step": 3721 }, { "epoch": 0.42754580437654355, "grad_norm": 0.4005192518234253, "learning_rate": 0.0001, "loss": 1.6362, "step": 3722 }, { "epoch": 0.42766067428637067, "grad_norm": 0.4278923273086548, "learning_rate": 0.0001, "loss": 1.7448, "step": 3723 }, { "epoch": 0.4277755441961978, "grad_norm": 0.4044369161128998, "learning_rate": 0.0001, "loss": 1.6479, "step": 3724 }, { "epoch": 0.4278904141060249, "grad_norm": 0.40069693326950073, "learning_rate": 0.0001, "loss": 1.7594, "step": 3725 }, { "epoch": 0.42800528401585203, "grad_norm": 0.4300965964794159, "learning_rate": 0.0001, "loss": 1.5522, "step": 3726 }, { "epoch": 0.42812015392567915, "grad_norm": 0.39623257517814636, "learning_rate": 0.0001, "loss": 1.4162, "step": 3727 }, { "epoch": 0.42823502383550627, "grad_norm": 0.39578771591186523, "learning_rate": 0.0001, "loss": 1.5182, "step": 3728 }, { "epoch": 0.4283498937453334, "grad_norm": 0.4229432940483093, "learning_rate": 0.0001, "loss": 1.7019, "step": 3729 }, { "epoch": 0.4284647636551605, "grad_norm": 0.40856388211250305, "learning_rate": 0.0001, "loss": 1.8467, "step": 3730 }, { "epoch": 0.4285796335649876, "grad_norm": 0.4008654057979584, "learning_rate": 0.0001, "loss": 1.7282, "step": 3731 }, { "epoch": 0.42869450347481475, "grad_norm": 0.4332294464111328, "learning_rate": 0.0001, "loss": 1.8846, "step": 3732 }, { "epoch": 0.42880937338464187, "grad_norm": 0.39115771651268005, "learning_rate": 0.0001, "loss": 1.5872, "step": 3733 }, { "epoch": 0.428924243294469, "grad_norm": 0.42474624514579773, "learning_rate": 0.0001, "loss": 1.7635, "step": 3734 }, { "epoch": 0.4290391132042961, "grad_norm": 0.3834139406681061, "learning_rate": 0.0001, "loss": 1.6279, "step": 3735 }, { "epoch": 0.4291539831141233, "grad_norm": 0.42314034700393677, "learning_rate": 0.0001, "loss": 1.7384, "step": 3736 }, { "epoch": 0.4292688530239504, "grad_norm": 0.41689667105674744, "learning_rate": 0.0001, "loss": 1.5733, "step": 3737 }, { "epoch": 0.4293837229337775, "grad_norm": 0.38845548033714294, "learning_rate": 0.0001, "loss": 1.6234, "step": 3738 }, { "epoch": 0.42949859284360464, "grad_norm": 0.4228481352329254, "learning_rate": 0.0001, "loss": 1.7028, "step": 3739 }, { "epoch": 0.42961346275343176, "grad_norm": 0.38769879937171936, "learning_rate": 0.0001, "loss": 1.6032, "step": 3740 }, { "epoch": 0.4297283326632589, "grad_norm": 0.43911242485046387, "learning_rate": 0.0001, "loss": 1.7073, "step": 3741 }, { "epoch": 0.429843202573086, "grad_norm": 0.40347546339035034, "learning_rate": 0.0001, "loss": 1.5931, "step": 3742 }, { "epoch": 0.4299580724829131, "grad_norm": 0.43745940923690796, "learning_rate": 0.0001, "loss": 1.6856, "step": 3743 }, { "epoch": 0.43007294239274024, "grad_norm": 0.41324344277381897, "learning_rate": 0.0001, "loss": 1.6837, "step": 3744 }, { "epoch": 0.43018781230256736, "grad_norm": 0.43059617280960083, "learning_rate": 0.0001, "loss": 1.7581, "step": 3745 }, { "epoch": 0.4303026822123945, "grad_norm": 0.40253952145576477, "learning_rate": 0.0001, "loss": 1.5059, "step": 3746 }, { "epoch": 0.4304175521222216, "grad_norm": 0.3712945580482483, "learning_rate": 0.0001, "loss": 1.4402, "step": 3747 }, { "epoch": 0.4305324220320487, "grad_norm": 0.41729727387428284, "learning_rate": 0.0001, "loss": 1.6481, "step": 3748 }, { "epoch": 0.43064729194187584, "grad_norm": 0.40478816628456116, "learning_rate": 0.0001, "loss": 1.7401, "step": 3749 }, { "epoch": 0.43076216185170296, "grad_norm": 0.37169450521469116, "learning_rate": 0.0001, "loss": 1.45, "step": 3750 }, { "epoch": 0.4308770317615301, "grad_norm": 0.4352746903896332, "learning_rate": 0.0001, "loss": 1.711, "step": 3751 }, { "epoch": 0.4309919016713572, "grad_norm": 0.41806161403656006, "learning_rate": 0.0001, "loss": 1.7793, "step": 3752 }, { "epoch": 0.4311067715811843, "grad_norm": 0.41790369153022766, "learning_rate": 0.0001, "loss": 1.5924, "step": 3753 }, { "epoch": 0.43122164149101144, "grad_norm": 0.40323588252067566, "learning_rate": 0.0001, "loss": 1.6645, "step": 3754 }, { "epoch": 0.43133651140083856, "grad_norm": 0.4263021945953369, "learning_rate": 0.0001, "loss": 1.795, "step": 3755 }, { "epoch": 0.4314513813106657, "grad_norm": 0.4722081124782562, "learning_rate": 0.0001, "loss": 1.8166, "step": 3756 }, { "epoch": 0.4315662512204928, "grad_norm": 0.4170565605163574, "learning_rate": 0.0001, "loss": 1.7488, "step": 3757 }, { "epoch": 0.4316811211303199, "grad_norm": 0.3915204703807831, "learning_rate": 0.0001, "loss": 1.673, "step": 3758 }, { "epoch": 0.43179599104014704, "grad_norm": 0.38846898078918457, "learning_rate": 0.0001, "loss": 1.6482, "step": 3759 }, { "epoch": 0.43191086094997416, "grad_norm": 0.3822706639766693, "learning_rate": 0.0001, "loss": 1.7103, "step": 3760 }, { "epoch": 0.4320257308598013, "grad_norm": 0.3950004279613495, "learning_rate": 0.0001, "loss": 1.5763, "step": 3761 }, { "epoch": 0.4321406007696284, "grad_norm": 0.3973482549190521, "learning_rate": 0.0001, "loss": 1.561, "step": 3762 }, { "epoch": 0.4322554706794555, "grad_norm": 0.40038934350013733, "learning_rate": 0.0001, "loss": 1.6894, "step": 3763 }, { "epoch": 0.43237034058928264, "grad_norm": 0.3878767490386963, "learning_rate": 0.0001, "loss": 1.5425, "step": 3764 }, { "epoch": 0.43248521049910976, "grad_norm": 0.40222638845443726, "learning_rate": 0.0001, "loss": 1.7212, "step": 3765 }, { "epoch": 0.4326000804089369, "grad_norm": 0.38121557235717773, "learning_rate": 0.0001, "loss": 1.5263, "step": 3766 }, { "epoch": 0.432714950318764, "grad_norm": 0.4091407060623169, "learning_rate": 0.0001, "loss": 1.8233, "step": 3767 }, { "epoch": 0.4328298202285911, "grad_norm": 0.39854979515075684, "learning_rate": 0.0001, "loss": 1.6323, "step": 3768 }, { "epoch": 0.43294469013841824, "grad_norm": 0.41306614875793457, "learning_rate": 0.0001, "loss": 1.8204, "step": 3769 }, { "epoch": 0.43305956004824536, "grad_norm": 0.4026402235031128, "learning_rate": 0.0001, "loss": 1.6817, "step": 3770 }, { "epoch": 0.4331744299580725, "grad_norm": 0.3648960292339325, "learning_rate": 0.0001, "loss": 1.3872, "step": 3771 }, { "epoch": 0.4332892998678996, "grad_norm": 0.3978753983974457, "learning_rate": 0.0001, "loss": 1.6007, "step": 3772 }, { "epoch": 0.4334041697777267, "grad_norm": 0.3977111876010895, "learning_rate": 0.0001, "loss": 1.7074, "step": 3773 }, { "epoch": 0.43351903968755384, "grad_norm": 0.4535907506942749, "learning_rate": 0.0001, "loss": 1.8803, "step": 3774 }, { "epoch": 0.43363390959738096, "grad_norm": 0.4159374535083771, "learning_rate": 0.0001, "loss": 1.6929, "step": 3775 }, { "epoch": 0.4337487795072081, "grad_norm": 0.40071192383766174, "learning_rate": 0.0001, "loss": 1.6455, "step": 3776 }, { "epoch": 0.4338636494170352, "grad_norm": 0.40635019540786743, "learning_rate": 0.0001, "loss": 1.6612, "step": 3777 }, { "epoch": 0.4339785193268623, "grad_norm": 0.4317454993724823, "learning_rate": 0.0001, "loss": 1.7709, "step": 3778 }, { "epoch": 0.43409338923668944, "grad_norm": 0.3959444761276245, "learning_rate": 0.0001, "loss": 1.5872, "step": 3779 }, { "epoch": 0.43420825914651656, "grad_norm": 0.3976995050907135, "learning_rate": 0.0001, "loss": 1.671, "step": 3780 }, { "epoch": 0.4343231290563437, "grad_norm": 0.4593839943408966, "learning_rate": 0.0001, "loss": 2.0386, "step": 3781 }, { "epoch": 0.4344379989661708, "grad_norm": 0.4245525598526001, "learning_rate": 0.0001, "loss": 1.7047, "step": 3782 }, { "epoch": 0.4345528688759979, "grad_norm": 0.3964807689189911, "learning_rate": 0.0001, "loss": 1.5174, "step": 3783 }, { "epoch": 0.43466773878582504, "grad_norm": 0.3919978439807892, "learning_rate": 0.0001, "loss": 1.598, "step": 3784 }, { "epoch": 0.43478260869565216, "grad_norm": 0.41812998056411743, "learning_rate": 0.0001, "loss": 1.7921, "step": 3785 }, { "epoch": 0.4348974786054793, "grad_norm": 0.3976697325706482, "learning_rate": 0.0001, "loss": 1.636, "step": 3786 }, { "epoch": 0.4350123485153064, "grad_norm": 0.39994823932647705, "learning_rate": 0.0001, "loss": 1.7524, "step": 3787 }, { "epoch": 0.4351272184251335, "grad_norm": 0.41444066166877747, "learning_rate": 0.0001, "loss": 1.591, "step": 3788 }, { "epoch": 0.43524208833496064, "grad_norm": 0.41670259833335876, "learning_rate": 0.0001, "loss": 1.6125, "step": 3789 }, { "epoch": 0.43535695824478776, "grad_norm": 0.4110081195831299, "learning_rate": 0.0001, "loss": 1.4517, "step": 3790 }, { "epoch": 0.4354718281546149, "grad_norm": 0.41741257905960083, "learning_rate": 0.0001, "loss": 1.7861, "step": 3791 }, { "epoch": 0.435586698064442, "grad_norm": 0.40086445212364197, "learning_rate": 0.0001, "loss": 1.6162, "step": 3792 }, { "epoch": 0.4357015679742691, "grad_norm": 0.38740214705467224, "learning_rate": 0.0001, "loss": 1.5713, "step": 3793 }, { "epoch": 0.43581643788409624, "grad_norm": 0.43951183557510376, "learning_rate": 0.0001, "loss": 1.795, "step": 3794 }, { "epoch": 0.43593130779392336, "grad_norm": 0.4288491904735565, "learning_rate": 0.0001, "loss": 1.7329, "step": 3795 }, { "epoch": 0.4360461777037505, "grad_norm": 0.4051986038684845, "learning_rate": 0.0001, "loss": 1.6826, "step": 3796 }, { "epoch": 0.4361610476135776, "grad_norm": 0.36968207359313965, "learning_rate": 0.0001, "loss": 1.4493, "step": 3797 }, { "epoch": 0.4362759175234047, "grad_norm": 0.39007097482681274, "learning_rate": 0.0001, "loss": 1.4471, "step": 3798 }, { "epoch": 0.43639078743323184, "grad_norm": 0.40956395864486694, "learning_rate": 0.0001, "loss": 1.5975, "step": 3799 }, { "epoch": 0.43650565734305896, "grad_norm": 0.4181691110134125, "learning_rate": 0.0001, "loss": 1.7006, "step": 3800 }, { "epoch": 0.4366205272528861, "grad_norm": 0.4084911644458771, "learning_rate": 0.0001, "loss": 1.7662, "step": 3801 }, { "epoch": 0.4367353971627132, "grad_norm": 0.4404086470603943, "learning_rate": 0.0001, "loss": 1.7702, "step": 3802 }, { "epoch": 0.4368502670725403, "grad_norm": 0.43644893169403076, "learning_rate": 0.0001, "loss": 1.5842, "step": 3803 }, { "epoch": 0.43696513698236744, "grad_norm": 0.42488721013069153, "learning_rate": 0.0001, "loss": 1.795, "step": 3804 }, { "epoch": 0.4370800068921946, "grad_norm": 0.4115133583545685, "learning_rate": 0.0001, "loss": 1.7059, "step": 3805 }, { "epoch": 0.43719487680202174, "grad_norm": 0.4241204857826233, "learning_rate": 0.0001, "loss": 1.7201, "step": 3806 }, { "epoch": 0.43730974671184886, "grad_norm": 0.3907410204410553, "learning_rate": 0.0001, "loss": 1.6551, "step": 3807 }, { "epoch": 0.437424616621676, "grad_norm": 0.42004403471946716, "learning_rate": 0.0001, "loss": 1.7367, "step": 3808 }, { "epoch": 0.4375394865315031, "grad_norm": 0.39130643010139465, "learning_rate": 0.0001, "loss": 1.5863, "step": 3809 }, { "epoch": 0.4376543564413302, "grad_norm": 0.420646071434021, "learning_rate": 0.0001, "loss": 1.6687, "step": 3810 }, { "epoch": 0.43776922635115734, "grad_norm": 0.43482112884521484, "learning_rate": 0.0001, "loss": 1.7814, "step": 3811 }, { "epoch": 0.43788409626098446, "grad_norm": 0.4364151656627655, "learning_rate": 0.0001, "loss": 1.7378, "step": 3812 }, { "epoch": 0.4379989661708116, "grad_norm": 0.44241318106651306, "learning_rate": 0.0001, "loss": 1.8158, "step": 3813 }, { "epoch": 0.4381138360806387, "grad_norm": 0.3973522186279297, "learning_rate": 0.0001, "loss": 1.6807, "step": 3814 }, { "epoch": 0.4382287059904658, "grad_norm": 0.42336922883987427, "learning_rate": 0.0001, "loss": 1.8086, "step": 3815 }, { "epoch": 0.43834357590029294, "grad_norm": 0.40706026554107666, "learning_rate": 0.0001, "loss": 1.7805, "step": 3816 }, { "epoch": 0.43845844581012006, "grad_norm": 0.4009002149105072, "learning_rate": 0.0001, "loss": 1.7439, "step": 3817 }, { "epoch": 0.4385733157199472, "grad_norm": 0.38294774293899536, "learning_rate": 0.0001, "loss": 1.6179, "step": 3818 }, { "epoch": 0.4386881856297743, "grad_norm": 0.40694519877433777, "learning_rate": 0.0001, "loss": 1.5189, "step": 3819 }, { "epoch": 0.4388030555396014, "grad_norm": 0.3984982371330261, "learning_rate": 0.0001, "loss": 1.6288, "step": 3820 }, { "epoch": 0.43891792544942854, "grad_norm": 0.44056373834609985, "learning_rate": 0.0001, "loss": 1.7716, "step": 3821 }, { "epoch": 0.43903279535925566, "grad_norm": 0.41726037859916687, "learning_rate": 0.0001, "loss": 1.8127, "step": 3822 }, { "epoch": 0.4391476652690828, "grad_norm": 0.4185958802700043, "learning_rate": 0.0001, "loss": 1.6932, "step": 3823 }, { "epoch": 0.4392625351789099, "grad_norm": 0.3969833254814148, "learning_rate": 0.0001, "loss": 1.661, "step": 3824 }, { "epoch": 0.439377405088737, "grad_norm": 0.42625898122787476, "learning_rate": 0.0001, "loss": 1.7313, "step": 3825 }, { "epoch": 0.43949227499856414, "grad_norm": 0.3792615532875061, "learning_rate": 0.0001, "loss": 1.5973, "step": 3826 }, { "epoch": 0.43960714490839126, "grad_norm": 0.3736385107040405, "learning_rate": 0.0001, "loss": 1.3924, "step": 3827 }, { "epoch": 0.4397220148182184, "grad_norm": 0.4105335772037506, "learning_rate": 0.0001, "loss": 1.6946, "step": 3828 }, { "epoch": 0.4398368847280455, "grad_norm": 0.4096393585205078, "learning_rate": 0.0001, "loss": 1.722, "step": 3829 }, { "epoch": 0.4399517546378726, "grad_norm": 0.3897015154361725, "learning_rate": 0.0001, "loss": 1.597, "step": 3830 }, { "epoch": 0.44006662454769974, "grad_norm": 0.4688592851161957, "learning_rate": 0.0001, "loss": 1.7616, "step": 3831 }, { "epoch": 0.44018149445752686, "grad_norm": 0.4732673168182373, "learning_rate": 0.0001, "loss": 1.5295, "step": 3832 }, { "epoch": 0.440296364367354, "grad_norm": 0.40647318959236145, "learning_rate": 0.0001, "loss": 1.745, "step": 3833 }, { "epoch": 0.4404112342771811, "grad_norm": 0.38484612107276917, "learning_rate": 0.0001, "loss": 1.4694, "step": 3834 }, { "epoch": 0.4405261041870082, "grad_norm": 0.39425164461135864, "learning_rate": 0.0001, "loss": 1.6431, "step": 3835 }, { "epoch": 0.44064097409683534, "grad_norm": 0.38018524646759033, "learning_rate": 0.0001, "loss": 1.5564, "step": 3836 }, { "epoch": 0.44075584400666246, "grad_norm": 0.40597987174987793, "learning_rate": 0.0001, "loss": 1.6501, "step": 3837 }, { "epoch": 0.4408707139164896, "grad_norm": 0.4145064949989319, "learning_rate": 0.0001, "loss": 1.699, "step": 3838 }, { "epoch": 0.4409855838263167, "grad_norm": 0.41554126143455505, "learning_rate": 0.0001, "loss": 1.7004, "step": 3839 }, { "epoch": 0.4411004537361438, "grad_norm": 0.3642127811908722, "learning_rate": 0.0001, "loss": 1.5098, "step": 3840 }, { "epoch": 0.44121532364597094, "grad_norm": 0.40661707520484924, "learning_rate": 0.0001, "loss": 1.7808, "step": 3841 }, { "epoch": 0.44133019355579806, "grad_norm": 0.42845696210861206, "learning_rate": 0.0001, "loss": 1.6614, "step": 3842 }, { "epoch": 0.4414450634656252, "grad_norm": 0.43189069628715515, "learning_rate": 0.0001, "loss": 1.7699, "step": 3843 }, { "epoch": 0.4415599333754523, "grad_norm": 0.42555564641952515, "learning_rate": 0.0001, "loss": 1.7836, "step": 3844 }, { "epoch": 0.4416748032852794, "grad_norm": 0.4227694571018219, "learning_rate": 0.0001, "loss": 1.7315, "step": 3845 }, { "epoch": 0.44178967319510654, "grad_norm": 0.4147578477859497, "learning_rate": 0.0001, "loss": 1.7124, "step": 3846 }, { "epoch": 0.44190454310493366, "grad_norm": 0.39049941301345825, "learning_rate": 0.0001, "loss": 1.5687, "step": 3847 }, { "epoch": 0.4420194130147608, "grad_norm": 0.4264180362224579, "learning_rate": 0.0001, "loss": 1.7393, "step": 3848 }, { "epoch": 0.4421342829245879, "grad_norm": 0.416720449924469, "learning_rate": 0.0001, "loss": 1.6101, "step": 3849 }, { "epoch": 0.442249152834415, "grad_norm": 0.4004881680011749, "learning_rate": 0.0001, "loss": 1.6228, "step": 3850 }, { "epoch": 0.44236402274424214, "grad_norm": 0.3840608298778534, "learning_rate": 0.0001, "loss": 1.4561, "step": 3851 }, { "epoch": 0.44247889265406926, "grad_norm": 0.398914098739624, "learning_rate": 0.0001, "loss": 1.6199, "step": 3852 }, { "epoch": 0.4425937625638964, "grad_norm": 0.4018940329551697, "learning_rate": 0.0001, "loss": 1.6785, "step": 3853 }, { "epoch": 0.4427086324737235, "grad_norm": 0.4263051450252533, "learning_rate": 0.0001, "loss": 1.6775, "step": 3854 }, { "epoch": 0.4428235023835506, "grad_norm": 0.4021928608417511, "learning_rate": 0.0001, "loss": 1.6207, "step": 3855 }, { "epoch": 0.44293837229337774, "grad_norm": 0.3968657851219177, "learning_rate": 0.0001, "loss": 1.4729, "step": 3856 }, { "epoch": 0.44305324220320486, "grad_norm": 0.3775864541530609, "learning_rate": 0.0001, "loss": 1.6014, "step": 3857 }, { "epoch": 0.443168112113032, "grad_norm": 0.3850625157356262, "learning_rate": 0.0001, "loss": 1.6046, "step": 3858 }, { "epoch": 0.4432829820228591, "grad_norm": 0.4146713614463806, "learning_rate": 0.0001, "loss": 1.6827, "step": 3859 }, { "epoch": 0.4433978519326862, "grad_norm": 0.3781043291091919, "learning_rate": 0.0001, "loss": 1.4812, "step": 3860 }, { "epoch": 0.44351272184251334, "grad_norm": 0.38465172052383423, "learning_rate": 0.0001, "loss": 1.6866, "step": 3861 }, { "epoch": 0.44362759175234046, "grad_norm": 0.41216936707496643, "learning_rate": 0.0001, "loss": 1.7803, "step": 3862 }, { "epoch": 0.4437424616621676, "grad_norm": 0.4183763563632965, "learning_rate": 0.0001, "loss": 1.6628, "step": 3863 }, { "epoch": 0.4438573315719947, "grad_norm": 0.39858147501945496, "learning_rate": 0.0001, "loss": 1.641, "step": 3864 }, { "epoch": 0.4439722014818218, "grad_norm": 0.4076845347881317, "learning_rate": 0.0001, "loss": 1.7546, "step": 3865 }, { "epoch": 0.44408707139164894, "grad_norm": 0.4265919625759125, "learning_rate": 0.0001, "loss": 1.6738, "step": 3866 }, { "epoch": 0.44420194130147606, "grad_norm": 0.4252798855304718, "learning_rate": 0.0001, "loss": 1.698, "step": 3867 }, { "epoch": 0.4443168112113032, "grad_norm": 0.3964008688926697, "learning_rate": 0.0001, "loss": 1.5616, "step": 3868 }, { "epoch": 0.4444316811211303, "grad_norm": 0.3971204459667206, "learning_rate": 0.0001, "loss": 1.5393, "step": 3869 }, { "epoch": 0.4445465510309574, "grad_norm": 0.39955177903175354, "learning_rate": 0.0001, "loss": 1.7192, "step": 3870 }, { "epoch": 0.44466142094078454, "grad_norm": 0.40053296089172363, "learning_rate": 0.0001, "loss": 1.5665, "step": 3871 }, { "epoch": 0.44477629085061166, "grad_norm": 0.3930230438709259, "learning_rate": 0.0001, "loss": 1.6522, "step": 3872 }, { "epoch": 0.44489116076043883, "grad_norm": 0.41546955704689026, "learning_rate": 0.0001, "loss": 1.7292, "step": 3873 }, { "epoch": 0.44500603067026595, "grad_norm": 0.40702396631240845, "learning_rate": 0.0001, "loss": 1.6555, "step": 3874 }, { "epoch": 0.44512090058009307, "grad_norm": 0.451388955116272, "learning_rate": 0.0001, "loss": 1.8062, "step": 3875 }, { "epoch": 0.4452357704899202, "grad_norm": 0.40768948197364807, "learning_rate": 0.0001, "loss": 1.6942, "step": 3876 }, { "epoch": 0.4453506403997473, "grad_norm": 0.4409973919391632, "learning_rate": 0.0001, "loss": 1.7809, "step": 3877 }, { "epoch": 0.44546551030957443, "grad_norm": 0.4067426323890686, "learning_rate": 0.0001, "loss": 1.6392, "step": 3878 }, { "epoch": 0.44558038021940155, "grad_norm": 0.37034404277801514, "learning_rate": 0.0001, "loss": 1.6065, "step": 3879 }, { "epoch": 0.44569525012922867, "grad_norm": 0.41960740089416504, "learning_rate": 0.0001, "loss": 1.9168, "step": 3880 }, { "epoch": 0.4458101200390558, "grad_norm": 0.40706804394721985, "learning_rate": 0.0001, "loss": 1.5405, "step": 3881 }, { "epoch": 0.4459249899488829, "grad_norm": 0.38883423805236816, "learning_rate": 0.0001, "loss": 1.5699, "step": 3882 }, { "epoch": 0.44603985985871003, "grad_norm": 0.3777417540550232, "learning_rate": 0.0001, "loss": 1.4643, "step": 3883 }, { "epoch": 0.44615472976853715, "grad_norm": 0.3927326798439026, "learning_rate": 0.0001, "loss": 1.6023, "step": 3884 }, { "epoch": 0.44626959967836427, "grad_norm": 0.39336463809013367, "learning_rate": 0.0001, "loss": 1.6599, "step": 3885 }, { "epoch": 0.4463844695881914, "grad_norm": 0.38999754190444946, "learning_rate": 0.0001, "loss": 1.6558, "step": 3886 }, { "epoch": 0.4464993394980185, "grad_norm": 0.37875038385391235, "learning_rate": 0.0001, "loss": 1.5655, "step": 3887 }, { "epoch": 0.44661420940784563, "grad_norm": 0.4335520267486572, "learning_rate": 0.0001, "loss": 1.9779, "step": 3888 }, { "epoch": 0.44672907931767275, "grad_norm": 0.461556613445282, "learning_rate": 0.0001, "loss": 1.9997, "step": 3889 }, { "epoch": 0.44684394922749987, "grad_norm": 0.4445992410182953, "learning_rate": 0.0001, "loss": 2.0335, "step": 3890 }, { "epoch": 0.446958819137327, "grad_norm": 0.39221543073654175, "learning_rate": 0.0001, "loss": 1.5501, "step": 3891 }, { "epoch": 0.4470736890471541, "grad_norm": 0.3761284351348877, "learning_rate": 0.0001, "loss": 1.6046, "step": 3892 }, { "epoch": 0.44718855895698123, "grad_norm": 0.473550409078598, "learning_rate": 0.0001, "loss": 1.9831, "step": 3893 }, { "epoch": 0.44730342886680835, "grad_norm": 0.39917153120040894, "learning_rate": 0.0001, "loss": 1.6847, "step": 3894 }, { "epoch": 0.44741829877663547, "grad_norm": 0.3896121680736542, "learning_rate": 0.0001, "loss": 1.5956, "step": 3895 }, { "epoch": 0.4475331686864626, "grad_norm": 0.39092063903808594, "learning_rate": 0.0001, "loss": 1.5873, "step": 3896 }, { "epoch": 0.4476480385962897, "grad_norm": 0.427628755569458, "learning_rate": 0.0001, "loss": 1.7593, "step": 3897 }, { "epoch": 0.44776290850611683, "grad_norm": 0.4042544364929199, "learning_rate": 0.0001, "loss": 1.7025, "step": 3898 }, { "epoch": 0.44787777841594395, "grad_norm": 0.3957765996456146, "learning_rate": 0.0001, "loss": 1.6169, "step": 3899 }, { "epoch": 0.44799264832577107, "grad_norm": 0.41617143154144287, "learning_rate": 0.0001, "loss": 1.6083, "step": 3900 }, { "epoch": 0.4481075182355982, "grad_norm": 0.4180884063243866, "learning_rate": 0.0001, "loss": 1.4391, "step": 3901 }, { "epoch": 0.4482223881454253, "grad_norm": 0.38561975955963135, "learning_rate": 0.0001, "loss": 1.4084, "step": 3902 }, { "epoch": 0.44833725805525243, "grad_norm": 0.4009731709957123, "learning_rate": 0.0001, "loss": 1.5731, "step": 3903 }, { "epoch": 0.44845212796507955, "grad_norm": 0.4165489077568054, "learning_rate": 0.0001, "loss": 1.6112, "step": 3904 }, { "epoch": 0.44856699787490667, "grad_norm": 0.41844233870506287, "learning_rate": 0.0001, "loss": 1.7241, "step": 3905 }, { "epoch": 0.4486818677847338, "grad_norm": 0.4142603576183319, "learning_rate": 0.0001, "loss": 1.5215, "step": 3906 }, { "epoch": 0.4487967376945609, "grad_norm": 0.41738462448120117, "learning_rate": 0.0001, "loss": 1.7553, "step": 3907 }, { "epoch": 0.44891160760438803, "grad_norm": 0.40905338525772095, "learning_rate": 0.0001, "loss": 1.5517, "step": 3908 }, { "epoch": 0.44902647751421515, "grad_norm": 0.4092770516872406, "learning_rate": 0.0001, "loss": 1.7373, "step": 3909 }, { "epoch": 0.44914134742404227, "grad_norm": 0.3912902772426605, "learning_rate": 0.0001, "loss": 1.6418, "step": 3910 }, { "epoch": 0.4492562173338694, "grad_norm": 0.42974889278411865, "learning_rate": 0.0001, "loss": 1.5082, "step": 3911 }, { "epoch": 0.4493710872436965, "grad_norm": 0.4151856601238251, "learning_rate": 0.0001, "loss": 1.511, "step": 3912 }, { "epoch": 0.44948595715352363, "grad_norm": 0.4048108458518982, "learning_rate": 0.0001, "loss": 1.6821, "step": 3913 }, { "epoch": 0.44960082706335075, "grad_norm": 0.3995741307735443, "learning_rate": 0.0001, "loss": 1.5816, "step": 3914 }, { "epoch": 0.44971569697317787, "grad_norm": 0.41176357865333557, "learning_rate": 0.0001, "loss": 1.65, "step": 3915 }, { "epoch": 0.449830566883005, "grad_norm": 0.4261864721775055, "learning_rate": 0.0001, "loss": 1.719, "step": 3916 }, { "epoch": 0.4499454367928321, "grad_norm": 0.41965851187705994, "learning_rate": 0.0001, "loss": 1.8569, "step": 3917 }, { "epoch": 0.45006030670265923, "grad_norm": 0.4024384617805481, "learning_rate": 0.0001, "loss": 1.682, "step": 3918 }, { "epoch": 0.45017517661248635, "grad_norm": 0.42263585329055786, "learning_rate": 0.0001, "loss": 1.7975, "step": 3919 }, { "epoch": 0.45029004652231347, "grad_norm": 0.37755391001701355, "learning_rate": 0.0001, "loss": 1.6863, "step": 3920 }, { "epoch": 0.4504049164321406, "grad_norm": 0.4409322440624237, "learning_rate": 0.0001, "loss": 1.9805, "step": 3921 }, { "epoch": 0.4505197863419677, "grad_norm": 0.4285867512226105, "learning_rate": 0.0001, "loss": 1.7172, "step": 3922 }, { "epoch": 0.45063465625179483, "grad_norm": 0.4555914103984833, "learning_rate": 0.0001, "loss": 1.7151, "step": 3923 }, { "epoch": 0.45074952616162195, "grad_norm": 0.399330198764801, "learning_rate": 0.0001, "loss": 1.5435, "step": 3924 }, { "epoch": 0.45086439607144907, "grad_norm": 0.4097477197647095, "learning_rate": 0.0001, "loss": 1.7046, "step": 3925 }, { "epoch": 0.4509792659812762, "grad_norm": 0.4111149311065674, "learning_rate": 0.0001, "loss": 1.6762, "step": 3926 }, { "epoch": 0.4510941358911033, "grad_norm": 0.4010746479034424, "learning_rate": 0.0001, "loss": 1.2525, "step": 3927 }, { "epoch": 0.45120900580093043, "grad_norm": 0.39972639083862305, "learning_rate": 0.0001, "loss": 1.6492, "step": 3928 }, { "epoch": 0.45132387571075755, "grad_norm": 0.4085247218608856, "learning_rate": 0.0001, "loss": 1.7221, "step": 3929 }, { "epoch": 0.45143874562058467, "grad_norm": 0.3858582377433777, "learning_rate": 0.0001, "loss": 1.4941, "step": 3930 }, { "epoch": 0.4515536155304118, "grad_norm": 0.4039546251296997, "learning_rate": 0.0001, "loss": 1.4808, "step": 3931 }, { "epoch": 0.4516684854402389, "grad_norm": 0.42480626702308655, "learning_rate": 0.0001, "loss": 1.6769, "step": 3932 }, { "epoch": 0.45178335535006603, "grad_norm": 0.4410436451435089, "learning_rate": 0.0001, "loss": 1.7151, "step": 3933 }, { "epoch": 0.45189822525989315, "grad_norm": 0.3930107057094574, "learning_rate": 0.0001, "loss": 1.5862, "step": 3934 }, { "epoch": 0.45201309516972027, "grad_norm": 0.3937776982784271, "learning_rate": 0.0001, "loss": 1.6546, "step": 3935 }, { "epoch": 0.4521279650795474, "grad_norm": 0.41633737087249756, "learning_rate": 0.0001, "loss": 1.9046, "step": 3936 }, { "epoch": 0.4522428349893745, "grad_norm": 0.41131532192230225, "learning_rate": 0.0001, "loss": 1.7626, "step": 3937 }, { "epoch": 0.45235770489920163, "grad_norm": 0.4197579026222229, "learning_rate": 0.0001, "loss": 1.686, "step": 3938 }, { "epoch": 0.45247257480902875, "grad_norm": 0.4465826749801636, "learning_rate": 0.0001, "loss": 1.6723, "step": 3939 }, { "epoch": 0.45258744471885587, "grad_norm": 0.3683403730392456, "learning_rate": 0.0001, "loss": 1.6319, "step": 3940 }, { "epoch": 0.452702314628683, "grad_norm": 0.4257335066795349, "learning_rate": 0.0001, "loss": 1.6718, "step": 3941 }, { "epoch": 0.45281718453851016, "grad_norm": 0.42209717631340027, "learning_rate": 0.0001, "loss": 1.5877, "step": 3942 }, { "epoch": 0.4529320544483373, "grad_norm": 0.3728243112564087, "learning_rate": 0.0001, "loss": 1.3088, "step": 3943 }, { "epoch": 0.4530469243581644, "grad_norm": 0.44413092732429504, "learning_rate": 0.0001, "loss": 1.7914, "step": 3944 }, { "epoch": 0.4531617942679915, "grad_norm": 0.40472888946533203, "learning_rate": 0.0001, "loss": 1.4247, "step": 3945 }, { "epoch": 0.45327666417781864, "grad_norm": 0.44987305998802185, "learning_rate": 0.0001, "loss": 1.8835, "step": 3946 }, { "epoch": 0.45339153408764576, "grad_norm": 0.4118070602416992, "learning_rate": 0.0001, "loss": 1.5984, "step": 3947 }, { "epoch": 0.4535064039974729, "grad_norm": 0.3929295241832733, "learning_rate": 0.0001, "loss": 1.6814, "step": 3948 }, { "epoch": 0.4536212739073, "grad_norm": 0.43117576837539673, "learning_rate": 0.0001, "loss": 1.6748, "step": 3949 }, { "epoch": 0.4537361438171271, "grad_norm": 0.434177041053772, "learning_rate": 0.0001, "loss": 1.6521, "step": 3950 }, { "epoch": 0.45385101372695424, "grad_norm": 0.4338892698287964, "learning_rate": 0.0001, "loss": 1.6405, "step": 3951 }, { "epoch": 0.45396588363678136, "grad_norm": 0.4370008409023285, "learning_rate": 0.0001, "loss": 1.6605, "step": 3952 }, { "epoch": 0.4540807535466085, "grad_norm": 0.42377498745918274, "learning_rate": 0.0001, "loss": 1.6392, "step": 3953 }, { "epoch": 0.4541956234564356, "grad_norm": 0.3817870318889618, "learning_rate": 0.0001, "loss": 1.5003, "step": 3954 }, { "epoch": 0.4543104933662627, "grad_norm": 0.37805989384651184, "learning_rate": 0.0001, "loss": 1.6596, "step": 3955 }, { "epoch": 0.45442536327608984, "grad_norm": 0.4458475708961487, "learning_rate": 0.0001, "loss": 1.8117, "step": 3956 }, { "epoch": 0.45454023318591696, "grad_norm": 0.44833672046661377, "learning_rate": 0.0001, "loss": 1.6747, "step": 3957 }, { "epoch": 0.4546551030957441, "grad_norm": 0.40606266260147095, "learning_rate": 0.0001, "loss": 1.7301, "step": 3958 }, { "epoch": 0.4547699730055712, "grad_norm": 0.4011266529560089, "learning_rate": 0.0001, "loss": 1.5636, "step": 3959 }, { "epoch": 0.4548848429153983, "grad_norm": 0.4372859299182892, "learning_rate": 0.0001, "loss": 1.7811, "step": 3960 }, { "epoch": 0.45499971282522544, "grad_norm": 0.41698816418647766, "learning_rate": 0.0001, "loss": 1.6018, "step": 3961 }, { "epoch": 0.45511458273505256, "grad_norm": 0.41979551315307617, "learning_rate": 0.0001, "loss": 1.8385, "step": 3962 }, { "epoch": 0.4552294526448797, "grad_norm": 0.4077071249485016, "learning_rate": 0.0001, "loss": 1.6945, "step": 3963 }, { "epoch": 0.4553443225547068, "grad_norm": 0.3627765476703644, "learning_rate": 0.0001, "loss": 1.4897, "step": 3964 }, { "epoch": 0.4554591924645339, "grad_norm": 0.3952348828315735, "learning_rate": 0.0001, "loss": 1.6327, "step": 3965 }, { "epoch": 0.45557406237436104, "grad_norm": 0.44601067900657654, "learning_rate": 0.0001, "loss": 1.8478, "step": 3966 }, { "epoch": 0.45568893228418816, "grad_norm": 0.3901880085468292, "learning_rate": 0.0001, "loss": 1.5955, "step": 3967 }, { "epoch": 0.4558038021940153, "grad_norm": 0.4016532599925995, "learning_rate": 0.0001, "loss": 1.4559, "step": 3968 }, { "epoch": 0.4559186721038424, "grad_norm": 0.4067118763923645, "learning_rate": 0.0001, "loss": 1.4878, "step": 3969 }, { "epoch": 0.4560335420136695, "grad_norm": 0.49588122963905334, "learning_rate": 0.0001, "loss": 1.8831, "step": 3970 }, { "epoch": 0.45614841192349664, "grad_norm": 0.4378882348537445, "learning_rate": 0.0001, "loss": 1.7759, "step": 3971 }, { "epoch": 0.45626328183332376, "grad_norm": 0.3988548517227173, "learning_rate": 0.0001, "loss": 1.5539, "step": 3972 }, { "epoch": 0.4563781517431509, "grad_norm": 0.4102991819381714, "learning_rate": 0.0001, "loss": 1.6752, "step": 3973 }, { "epoch": 0.456493021652978, "grad_norm": 0.41767358779907227, "learning_rate": 0.0001, "loss": 1.5417, "step": 3974 }, { "epoch": 0.4566078915628051, "grad_norm": 0.40786105394363403, "learning_rate": 0.0001, "loss": 1.6357, "step": 3975 }, { "epoch": 0.45672276147263224, "grad_norm": 0.4227571487426758, "learning_rate": 0.0001, "loss": 1.6638, "step": 3976 }, { "epoch": 0.45683763138245936, "grad_norm": 0.4906644821166992, "learning_rate": 0.0001, "loss": 1.8612, "step": 3977 }, { "epoch": 0.4569525012922865, "grad_norm": 0.4171657860279083, "learning_rate": 0.0001, "loss": 1.5353, "step": 3978 }, { "epoch": 0.4570673712021136, "grad_norm": 0.5092725157737732, "learning_rate": 0.0001, "loss": 1.9038, "step": 3979 }, { "epoch": 0.4571822411119407, "grad_norm": 0.3923867642879486, "learning_rate": 0.0001, "loss": 1.5821, "step": 3980 }, { "epoch": 0.45729711102176784, "grad_norm": 0.41276079416275024, "learning_rate": 0.0001, "loss": 1.7424, "step": 3981 }, { "epoch": 0.45741198093159496, "grad_norm": 0.41370689868927, "learning_rate": 0.0001, "loss": 1.6337, "step": 3982 }, { "epoch": 0.4575268508414221, "grad_norm": 0.40904295444488525, "learning_rate": 0.0001, "loss": 1.5968, "step": 3983 }, { "epoch": 0.4576417207512492, "grad_norm": 0.4055366516113281, "learning_rate": 0.0001, "loss": 1.6431, "step": 3984 }, { "epoch": 0.4577565906610763, "grad_norm": 0.40704345703125, "learning_rate": 0.0001, "loss": 1.6477, "step": 3985 }, { "epoch": 0.45787146057090344, "grad_norm": 0.395580530166626, "learning_rate": 0.0001, "loss": 1.5129, "step": 3986 }, { "epoch": 0.45798633048073056, "grad_norm": 0.5622934103012085, "learning_rate": 0.0001, "loss": 1.6131, "step": 3987 }, { "epoch": 0.4581012003905577, "grad_norm": 0.4147200584411621, "learning_rate": 0.0001, "loss": 1.5065, "step": 3988 }, { "epoch": 0.4582160703003848, "grad_norm": 0.4014433026313782, "learning_rate": 0.0001, "loss": 1.7181, "step": 3989 }, { "epoch": 0.4583309402102119, "grad_norm": 0.438319593667984, "learning_rate": 0.0001, "loss": 1.5005, "step": 3990 }, { "epoch": 0.45844581012003904, "grad_norm": 0.4093727767467499, "learning_rate": 0.0001, "loss": 1.5912, "step": 3991 }, { "epoch": 0.45856068002986616, "grad_norm": 0.40968188643455505, "learning_rate": 0.0001, "loss": 1.6271, "step": 3992 }, { "epoch": 0.4586755499396933, "grad_norm": 0.43956807255744934, "learning_rate": 0.0001, "loss": 1.6531, "step": 3993 }, { "epoch": 0.4587904198495204, "grad_norm": 0.40947967767715454, "learning_rate": 0.0001, "loss": 1.5883, "step": 3994 }, { "epoch": 0.4589052897593475, "grad_norm": 0.40524375438690186, "learning_rate": 0.0001, "loss": 1.6648, "step": 3995 }, { "epoch": 0.45902015966917464, "grad_norm": 0.4079066812992096, "learning_rate": 0.0001, "loss": 1.7688, "step": 3996 }, { "epoch": 0.45913502957900176, "grad_norm": 0.40221965312957764, "learning_rate": 0.0001, "loss": 1.8436, "step": 3997 }, { "epoch": 0.4592498994888289, "grad_norm": 0.4226916432380676, "learning_rate": 0.0001, "loss": 1.8167, "step": 3998 }, { "epoch": 0.459364769398656, "grad_norm": 0.3903824985027313, "learning_rate": 0.0001, "loss": 1.7388, "step": 3999 }, { "epoch": 0.4594796393084831, "grad_norm": 0.4148130416870117, "learning_rate": 0.0001, "loss": 1.8559, "step": 4000 }, { "epoch": 0.45959450921831024, "grad_norm": 0.39708560705184937, "learning_rate": 0.0001, "loss": 1.6761, "step": 4001 }, { "epoch": 0.45970937912813736, "grad_norm": 0.4335632920265198, "learning_rate": 0.0001, "loss": 1.7744, "step": 4002 }, { "epoch": 0.4598242490379645, "grad_norm": 0.4105009436607361, "learning_rate": 0.0001, "loss": 1.7787, "step": 4003 }, { "epoch": 0.4599391189477916, "grad_norm": 0.4119328558444977, "learning_rate": 0.0001, "loss": 1.569, "step": 4004 }, { "epoch": 0.4600539888576187, "grad_norm": 0.4249887466430664, "learning_rate": 0.0001, "loss": 1.6975, "step": 4005 }, { "epoch": 0.46016885876744584, "grad_norm": 0.4140334129333496, "learning_rate": 0.0001, "loss": 1.6846, "step": 4006 }, { "epoch": 0.46028372867727296, "grad_norm": 0.41865649819374084, "learning_rate": 0.0001, "loss": 1.6627, "step": 4007 }, { "epoch": 0.4603985985871001, "grad_norm": 0.3861866295337677, "learning_rate": 0.0001, "loss": 1.3934, "step": 4008 }, { "epoch": 0.4605134684969272, "grad_norm": 0.3830776512622833, "learning_rate": 0.0001, "loss": 1.6086, "step": 4009 }, { "epoch": 0.4606283384067543, "grad_norm": 0.4259714186191559, "learning_rate": 0.0001, "loss": 1.7963, "step": 4010 }, { "epoch": 0.4607432083165815, "grad_norm": 0.3694056570529938, "learning_rate": 0.0001, "loss": 1.5173, "step": 4011 }, { "epoch": 0.4608580782264086, "grad_norm": 0.41248619556427, "learning_rate": 0.0001, "loss": 1.7967, "step": 4012 }, { "epoch": 0.46097294813623574, "grad_norm": 0.40641582012176514, "learning_rate": 0.0001, "loss": 1.7708, "step": 4013 }, { "epoch": 0.46108781804606286, "grad_norm": 0.43671518564224243, "learning_rate": 0.0001, "loss": 1.6405, "step": 4014 }, { "epoch": 0.46120268795589, "grad_norm": 0.4056967496871948, "learning_rate": 0.0001, "loss": 1.7083, "step": 4015 }, { "epoch": 0.4613175578657171, "grad_norm": 0.4197401702404022, "learning_rate": 0.0001, "loss": 1.763, "step": 4016 }, { "epoch": 0.4614324277755442, "grad_norm": 0.38381877541542053, "learning_rate": 0.0001, "loss": 1.6494, "step": 4017 }, { "epoch": 0.46154729768537134, "grad_norm": 0.39994266629219055, "learning_rate": 0.0001, "loss": 1.6528, "step": 4018 }, { "epoch": 0.46166216759519846, "grad_norm": 0.40306952595710754, "learning_rate": 0.0001, "loss": 1.7447, "step": 4019 }, { "epoch": 0.4617770375050256, "grad_norm": 0.4068452715873718, "learning_rate": 0.0001, "loss": 1.6314, "step": 4020 }, { "epoch": 0.4618919074148527, "grad_norm": 0.3850820064544678, "learning_rate": 0.0001, "loss": 1.6017, "step": 4021 }, { "epoch": 0.4620067773246798, "grad_norm": 0.420269250869751, "learning_rate": 0.0001, "loss": 1.814, "step": 4022 }, { "epoch": 0.46212164723450694, "grad_norm": 0.404812216758728, "learning_rate": 0.0001, "loss": 1.6553, "step": 4023 }, { "epoch": 0.46223651714433406, "grad_norm": 0.40154874324798584, "learning_rate": 0.0001, "loss": 1.7349, "step": 4024 }, { "epoch": 0.4623513870541612, "grad_norm": 0.40221109986305237, "learning_rate": 0.0001, "loss": 1.6723, "step": 4025 }, { "epoch": 0.4624662569639883, "grad_norm": 0.4642849564552307, "learning_rate": 0.0001, "loss": 1.6938, "step": 4026 }, { "epoch": 0.4625811268738154, "grad_norm": 0.4198199212551117, "learning_rate": 0.0001, "loss": 1.6603, "step": 4027 }, { "epoch": 0.46269599678364254, "grad_norm": 0.40827077627182007, "learning_rate": 0.0001, "loss": 1.7611, "step": 4028 }, { "epoch": 0.46281086669346966, "grad_norm": 0.41351550817489624, "learning_rate": 0.0001, "loss": 1.6947, "step": 4029 }, { "epoch": 0.4629257366032968, "grad_norm": 0.41555505990982056, "learning_rate": 0.0001, "loss": 1.7832, "step": 4030 }, { "epoch": 0.4630406065131239, "grad_norm": 0.4276737570762634, "learning_rate": 0.0001, "loss": 1.7332, "step": 4031 }, { "epoch": 0.463155476422951, "grad_norm": 0.39254918694496155, "learning_rate": 0.0001, "loss": 1.7283, "step": 4032 }, { "epoch": 0.46327034633277814, "grad_norm": 0.40760576725006104, "learning_rate": 0.0001, "loss": 1.6464, "step": 4033 }, { "epoch": 0.46338521624260526, "grad_norm": 0.3900972902774811, "learning_rate": 0.0001, "loss": 1.6067, "step": 4034 }, { "epoch": 0.4635000861524324, "grad_norm": 0.42098426818847656, "learning_rate": 0.0001, "loss": 1.6296, "step": 4035 }, { "epoch": 0.4636149560622595, "grad_norm": 0.41556769609451294, "learning_rate": 0.0001, "loss": 1.545, "step": 4036 }, { "epoch": 0.4637298259720866, "grad_norm": 0.4194001853466034, "learning_rate": 0.0001, "loss": 1.6771, "step": 4037 }, { "epoch": 0.46384469588191374, "grad_norm": 0.4148116111755371, "learning_rate": 0.0001, "loss": 1.7292, "step": 4038 }, { "epoch": 0.46395956579174086, "grad_norm": 0.39787450432777405, "learning_rate": 0.0001, "loss": 1.4533, "step": 4039 }, { "epoch": 0.464074435701568, "grad_norm": 0.4328993856906891, "learning_rate": 0.0001, "loss": 1.74, "step": 4040 }, { "epoch": 0.4641893056113951, "grad_norm": 0.45193320512771606, "learning_rate": 0.0001, "loss": 1.9133, "step": 4041 }, { "epoch": 0.4643041755212222, "grad_norm": 0.4238536059856415, "learning_rate": 0.0001, "loss": 1.7697, "step": 4042 }, { "epoch": 0.46441904543104934, "grad_norm": 0.4080374836921692, "learning_rate": 0.0001, "loss": 1.5563, "step": 4043 }, { "epoch": 0.46453391534087646, "grad_norm": 0.40985575318336487, "learning_rate": 0.0001, "loss": 1.742, "step": 4044 }, { "epoch": 0.4646487852507036, "grad_norm": 0.40973904728889465, "learning_rate": 0.0001, "loss": 1.6101, "step": 4045 }, { "epoch": 0.4647636551605307, "grad_norm": 0.4287373423576355, "learning_rate": 0.0001, "loss": 1.6476, "step": 4046 }, { "epoch": 0.4648785250703578, "grad_norm": 0.41029754281044006, "learning_rate": 0.0001, "loss": 1.7405, "step": 4047 }, { "epoch": 0.46499339498018494, "grad_norm": 0.4067857563495636, "learning_rate": 0.0001, "loss": 1.5641, "step": 4048 }, { "epoch": 0.46510826489001206, "grad_norm": 0.42624571919441223, "learning_rate": 0.0001, "loss": 1.7049, "step": 4049 }, { "epoch": 0.4652231347998392, "grad_norm": 0.3797012269496918, "learning_rate": 0.0001, "loss": 1.3009, "step": 4050 }, { "epoch": 0.4653380047096663, "grad_norm": 0.37980806827545166, "learning_rate": 0.0001, "loss": 1.7286, "step": 4051 }, { "epoch": 0.4654528746194934, "grad_norm": 0.42795923352241516, "learning_rate": 0.0001, "loss": 1.5539, "step": 4052 }, { "epoch": 0.46556774452932054, "grad_norm": 0.4233475625514984, "learning_rate": 0.0001, "loss": 1.6725, "step": 4053 }, { "epoch": 0.46568261443914766, "grad_norm": 0.3736623525619507, "learning_rate": 0.0001, "loss": 1.5268, "step": 4054 }, { "epoch": 0.4657974843489748, "grad_norm": 0.43289387226104736, "learning_rate": 0.0001, "loss": 1.7833, "step": 4055 }, { "epoch": 0.4659123542588019, "grad_norm": 0.39889997243881226, "learning_rate": 0.0001, "loss": 1.6901, "step": 4056 }, { "epoch": 0.466027224168629, "grad_norm": 0.40544313192367554, "learning_rate": 0.0001, "loss": 1.7284, "step": 4057 }, { "epoch": 0.46614209407845614, "grad_norm": 0.3909522294998169, "learning_rate": 0.0001, "loss": 1.5248, "step": 4058 }, { "epoch": 0.46625696398828326, "grad_norm": 0.4280645549297333, "learning_rate": 0.0001, "loss": 1.6554, "step": 4059 }, { "epoch": 0.4663718338981104, "grad_norm": 0.40522295236587524, "learning_rate": 0.0001, "loss": 1.5629, "step": 4060 }, { "epoch": 0.4664867038079375, "grad_norm": 0.43785059452056885, "learning_rate": 0.0001, "loss": 1.6645, "step": 4061 }, { "epoch": 0.4666015737177646, "grad_norm": 0.39573609828948975, "learning_rate": 0.0001, "loss": 1.6039, "step": 4062 }, { "epoch": 0.46671644362759174, "grad_norm": 0.39389288425445557, "learning_rate": 0.0001, "loss": 1.5866, "step": 4063 }, { "epoch": 0.46683131353741886, "grad_norm": 0.43171268701553345, "learning_rate": 0.0001, "loss": 1.8756, "step": 4064 }, { "epoch": 0.466946183447246, "grad_norm": 0.4238601624965668, "learning_rate": 0.0001, "loss": 1.9024, "step": 4065 }, { "epoch": 0.4670610533570731, "grad_norm": 0.4299545884132385, "learning_rate": 0.0001, "loss": 1.7863, "step": 4066 }, { "epoch": 0.4671759232669002, "grad_norm": 0.38363876938819885, "learning_rate": 0.0001, "loss": 1.5237, "step": 4067 }, { "epoch": 0.46729079317672734, "grad_norm": 0.39695143699645996, "learning_rate": 0.0001, "loss": 1.6357, "step": 4068 }, { "epoch": 0.46740566308655446, "grad_norm": 0.4232119023799896, "learning_rate": 0.0001, "loss": 1.7294, "step": 4069 }, { "epoch": 0.4675205329963816, "grad_norm": 0.39182764291763306, "learning_rate": 0.0001, "loss": 1.4701, "step": 4070 }, { "epoch": 0.4676354029062087, "grad_norm": 0.38294535875320435, "learning_rate": 0.0001, "loss": 1.5047, "step": 4071 }, { "epoch": 0.4677502728160358, "grad_norm": 0.39663416147232056, "learning_rate": 0.0001, "loss": 1.4622, "step": 4072 }, { "epoch": 0.46786514272586294, "grad_norm": 0.4411957263946533, "learning_rate": 0.0001, "loss": 1.7604, "step": 4073 }, { "epoch": 0.46798001263569006, "grad_norm": 0.43797844648361206, "learning_rate": 0.0001, "loss": 1.871, "step": 4074 }, { "epoch": 0.4680948825455172, "grad_norm": 0.4069494605064392, "learning_rate": 0.0001, "loss": 1.6468, "step": 4075 }, { "epoch": 0.4682097524553443, "grad_norm": 0.4068356454372406, "learning_rate": 0.0001, "loss": 1.6041, "step": 4076 }, { "epoch": 0.4683246223651714, "grad_norm": 0.4451930820941925, "learning_rate": 0.0001, "loss": 1.6094, "step": 4077 }, { "epoch": 0.46843949227499854, "grad_norm": 0.4141843616962433, "learning_rate": 0.0001, "loss": 1.6697, "step": 4078 }, { "epoch": 0.4685543621848257, "grad_norm": 0.37937334179878235, "learning_rate": 0.0001, "loss": 1.5631, "step": 4079 }, { "epoch": 0.46866923209465283, "grad_norm": 0.3883213996887207, "learning_rate": 0.0001, "loss": 1.7144, "step": 4080 }, { "epoch": 0.46878410200447995, "grad_norm": 0.4085143506526947, "learning_rate": 0.0001, "loss": 1.6192, "step": 4081 }, { "epoch": 0.46889897191430707, "grad_norm": 0.4008790850639343, "learning_rate": 0.0001, "loss": 1.5507, "step": 4082 }, { "epoch": 0.4690138418241342, "grad_norm": 0.40579459071159363, "learning_rate": 0.0001, "loss": 1.6322, "step": 4083 }, { "epoch": 0.4691287117339613, "grad_norm": 0.41397562623023987, "learning_rate": 0.0001, "loss": 1.7478, "step": 4084 }, { "epoch": 0.46924358164378843, "grad_norm": 0.40979722142219543, "learning_rate": 0.0001, "loss": 1.5539, "step": 4085 }, { "epoch": 0.46935845155361555, "grad_norm": 0.3959348201751709, "learning_rate": 0.0001, "loss": 1.5345, "step": 4086 }, { "epoch": 0.46947332146344267, "grad_norm": 0.4095418453216553, "learning_rate": 0.0001, "loss": 1.6808, "step": 4087 }, { "epoch": 0.4695881913732698, "grad_norm": 0.4053293466567993, "learning_rate": 0.0001, "loss": 1.6511, "step": 4088 }, { "epoch": 0.4697030612830969, "grad_norm": 0.41647395491600037, "learning_rate": 0.0001, "loss": 1.7705, "step": 4089 }, { "epoch": 0.46981793119292403, "grad_norm": 0.420941025018692, "learning_rate": 0.0001, "loss": 1.5527, "step": 4090 }, { "epoch": 0.46993280110275115, "grad_norm": 0.4197264015674591, "learning_rate": 0.0001, "loss": 1.7921, "step": 4091 }, { "epoch": 0.47004767101257827, "grad_norm": 0.40832772850990295, "learning_rate": 0.0001, "loss": 1.6727, "step": 4092 }, { "epoch": 0.4701625409224054, "grad_norm": 0.4231039881706238, "learning_rate": 0.0001, "loss": 1.5909, "step": 4093 }, { "epoch": 0.4702774108322325, "grad_norm": 0.4249723255634308, "learning_rate": 0.0001, "loss": 1.6056, "step": 4094 }, { "epoch": 0.47039228074205963, "grad_norm": 0.41998720169067383, "learning_rate": 0.0001, "loss": 1.7629, "step": 4095 }, { "epoch": 0.47050715065188675, "grad_norm": 0.38457614183425903, "learning_rate": 0.0001, "loss": 1.6671, "step": 4096 }, { "epoch": 0.47062202056171387, "grad_norm": 0.3916681408882141, "learning_rate": 0.0001, "loss": 1.6675, "step": 4097 }, { "epoch": 0.470736890471541, "grad_norm": 0.40213701128959656, "learning_rate": 0.0001, "loss": 1.6876, "step": 4098 }, { "epoch": 0.4708517603813681, "grad_norm": 0.3882690668106079, "learning_rate": 0.0001, "loss": 1.6579, "step": 4099 }, { "epoch": 0.47096663029119523, "grad_norm": 0.4170128107070923, "learning_rate": 0.0001, "loss": 1.5347, "step": 4100 }, { "epoch": 0.47108150020102235, "grad_norm": 0.3979085087776184, "learning_rate": 0.0001, "loss": 1.6854, "step": 4101 }, { "epoch": 0.47119637011084947, "grad_norm": 0.41884052753448486, "learning_rate": 0.0001, "loss": 1.7235, "step": 4102 }, { "epoch": 0.4713112400206766, "grad_norm": 0.4105212688446045, "learning_rate": 0.0001, "loss": 1.6214, "step": 4103 }, { "epoch": 0.4714261099305037, "grad_norm": 0.4247276186943054, "learning_rate": 0.0001, "loss": 1.7434, "step": 4104 }, { "epoch": 0.47154097984033083, "grad_norm": 0.4294239580631256, "learning_rate": 0.0001, "loss": 1.7376, "step": 4105 }, { "epoch": 0.47165584975015795, "grad_norm": 0.4285709857940674, "learning_rate": 0.0001, "loss": 1.7798, "step": 4106 }, { "epoch": 0.47177071965998507, "grad_norm": 0.44262823462486267, "learning_rate": 0.0001, "loss": 1.8391, "step": 4107 }, { "epoch": 0.4718855895698122, "grad_norm": 0.4055534601211548, "learning_rate": 0.0001, "loss": 1.6681, "step": 4108 }, { "epoch": 0.4720004594796393, "grad_norm": 0.39059194922447205, "learning_rate": 0.0001, "loss": 1.7303, "step": 4109 }, { "epoch": 0.47211532938946643, "grad_norm": 0.3832624554634094, "learning_rate": 0.0001, "loss": 1.6018, "step": 4110 }, { "epoch": 0.47223019929929355, "grad_norm": 0.42001864314079285, "learning_rate": 0.0001, "loss": 1.65, "step": 4111 }, { "epoch": 0.47234506920912067, "grad_norm": 0.4122453033924103, "learning_rate": 0.0001, "loss": 1.6837, "step": 4112 }, { "epoch": 0.4724599391189478, "grad_norm": 0.38872405886650085, "learning_rate": 0.0001, "loss": 1.5834, "step": 4113 }, { "epoch": 0.4725748090287749, "grad_norm": 0.3932107388973236, "learning_rate": 0.0001, "loss": 1.5957, "step": 4114 }, { "epoch": 0.47268967893860203, "grad_norm": 0.3980741798877716, "learning_rate": 0.0001, "loss": 1.6973, "step": 4115 }, { "epoch": 0.47280454884842915, "grad_norm": 0.4428998827934265, "learning_rate": 0.0001, "loss": 1.6528, "step": 4116 }, { "epoch": 0.47291941875825627, "grad_norm": 0.37861019372940063, "learning_rate": 0.0001, "loss": 1.6581, "step": 4117 }, { "epoch": 0.4730342886680834, "grad_norm": 0.43132567405700684, "learning_rate": 0.0001, "loss": 1.8072, "step": 4118 }, { "epoch": 0.4731491585779105, "grad_norm": 0.4315222501754761, "learning_rate": 0.0001, "loss": 1.7475, "step": 4119 }, { "epoch": 0.47326402848773763, "grad_norm": 0.3742518424987793, "learning_rate": 0.0001, "loss": 1.4972, "step": 4120 }, { "epoch": 0.47337889839756475, "grad_norm": 0.39365971088409424, "learning_rate": 0.0001, "loss": 1.6029, "step": 4121 }, { "epoch": 0.47349376830739187, "grad_norm": 0.43931326270103455, "learning_rate": 0.0001, "loss": 1.7912, "step": 4122 }, { "epoch": 0.473608638217219, "grad_norm": 0.4146181046962738, "learning_rate": 0.0001, "loss": 1.7775, "step": 4123 }, { "epoch": 0.4737235081270461, "grad_norm": 0.4833734929561615, "learning_rate": 0.0001, "loss": 1.8526, "step": 4124 }, { "epoch": 0.47383837803687323, "grad_norm": 0.38752907514572144, "learning_rate": 0.0001, "loss": 1.6212, "step": 4125 }, { "epoch": 0.47395324794670035, "grad_norm": 0.38719457387924194, "learning_rate": 0.0001, "loss": 1.3979, "step": 4126 }, { "epoch": 0.47406811785652747, "grad_norm": 0.4148174226284027, "learning_rate": 0.0001, "loss": 1.7373, "step": 4127 }, { "epoch": 0.4741829877663546, "grad_norm": 0.41567835211753845, "learning_rate": 0.0001, "loss": 1.5284, "step": 4128 }, { "epoch": 0.4742978576761817, "grad_norm": 0.45437824726104736, "learning_rate": 0.0001, "loss": 1.7829, "step": 4129 }, { "epoch": 0.47441272758600883, "grad_norm": 0.41790398955345154, "learning_rate": 0.0001, "loss": 1.6595, "step": 4130 }, { "epoch": 0.47452759749583595, "grad_norm": 0.40303486585617065, "learning_rate": 0.0001, "loss": 1.6587, "step": 4131 }, { "epoch": 0.47464246740566307, "grad_norm": 0.391946405172348, "learning_rate": 0.0001, "loss": 1.6744, "step": 4132 }, { "epoch": 0.4747573373154902, "grad_norm": 0.3910079896450043, "learning_rate": 0.0001, "loss": 1.6995, "step": 4133 }, { "epoch": 0.4748722072253173, "grad_norm": 0.3853004276752472, "learning_rate": 0.0001, "loss": 1.6034, "step": 4134 }, { "epoch": 0.47498707713514443, "grad_norm": 0.41160422563552856, "learning_rate": 0.0001, "loss": 1.7027, "step": 4135 }, { "epoch": 0.47510194704497155, "grad_norm": 0.40281742811203003, "learning_rate": 0.0001, "loss": 1.7533, "step": 4136 }, { "epoch": 0.47521681695479867, "grad_norm": 0.39282864332199097, "learning_rate": 0.0001, "loss": 1.5728, "step": 4137 }, { "epoch": 0.4753316868646258, "grad_norm": 0.3944832384586334, "learning_rate": 0.0001, "loss": 1.5693, "step": 4138 }, { "epoch": 0.4754465567744529, "grad_norm": 0.4138934910297394, "learning_rate": 0.0001, "loss": 1.6778, "step": 4139 }, { "epoch": 0.47556142668428003, "grad_norm": 0.40532898902893066, "learning_rate": 0.0001, "loss": 1.4911, "step": 4140 }, { "epoch": 0.47567629659410715, "grad_norm": 0.3844515383243561, "learning_rate": 0.0001, "loss": 1.551, "step": 4141 }, { "epoch": 0.47579116650393427, "grad_norm": 0.3857368230819702, "learning_rate": 0.0001, "loss": 1.5968, "step": 4142 }, { "epoch": 0.4759060364137614, "grad_norm": 0.42560672760009766, "learning_rate": 0.0001, "loss": 1.8693, "step": 4143 }, { "epoch": 0.4760209063235885, "grad_norm": 0.43299341201782227, "learning_rate": 0.0001, "loss": 1.6393, "step": 4144 }, { "epoch": 0.47613577623341563, "grad_norm": 0.431902676820755, "learning_rate": 0.0001, "loss": 1.8618, "step": 4145 }, { "epoch": 0.47625064614324275, "grad_norm": 0.43173423409461975, "learning_rate": 0.0001, "loss": 1.5596, "step": 4146 }, { "epoch": 0.47636551605306987, "grad_norm": 0.4200470447540283, "learning_rate": 0.0001, "loss": 1.7881, "step": 4147 }, { "epoch": 0.47648038596289705, "grad_norm": 0.3650665581226349, "learning_rate": 0.0001, "loss": 1.2449, "step": 4148 }, { "epoch": 0.47659525587272417, "grad_norm": 0.40055355429649353, "learning_rate": 0.0001, "loss": 1.4084, "step": 4149 }, { "epoch": 0.4767101257825513, "grad_norm": 0.41404348611831665, "learning_rate": 0.0001, "loss": 1.667, "step": 4150 }, { "epoch": 0.4768249956923784, "grad_norm": 0.42411214113235474, "learning_rate": 0.0001, "loss": 1.4006, "step": 4151 }, { "epoch": 0.4769398656022055, "grad_norm": 0.4193376302719116, "learning_rate": 0.0001, "loss": 1.7433, "step": 4152 }, { "epoch": 0.47705473551203265, "grad_norm": 0.41245442628860474, "learning_rate": 0.0001, "loss": 1.5723, "step": 4153 }, { "epoch": 0.47716960542185977, "grad_norm": 0.4130132496356964, "learning_rate": 0.0001, "loss": 1.6799, "step": 4154 }, { "epoch": 0.4772844753316869, "grad_norm": 0.3704391419887543, "learning_rate": 0.0001, "loss": 1.4916, "step": 4155 }, { "epoch": 0.477399345241514, "grad_norm": 0.4236399531364441, "learning_rate": 0.0001, "loss": 1.7303, "step": 4156 }, { "epoch": 0.4775142151513411, "grad_norm": 0.4399377703666687, "learning_rate": 0.0001, "loss": 1.8676, "step": 4157 }, { "epoch": 0.47762908506116825, "grad_norm": 0.4146808683872223, "learning_rate": 0.0001, "loss": 1.4918, "step": 4158 }, { "epoch": 0.47774395497099537, "grad_norm": 0.40414994955062866, "learning_rate": 0.0001, "loss": 1.6722, "step": 4159 }, { "epoch": 0.4778588248808225, "grad_norm": 0.44139429926872253, "learning_rate": 0.0001, "loss": 1.5388, "step": 4160 }, { "epoch": 0.4779736947906496, "grad_norm": 0.35786864161491394, "learning_rate": 0.0001, "loss": 1.3425, "step": 4161 }, { "epoch": 0.4780885647004767, "grad_norm": 0.4114287793636322, "learning_rate": 0.0001, "loss": 1.6002, "step": 4162 }, { "epoch": 0.47820343461030385, "grad_norm": 0.436565101146698, "learning_rate": 0.0001, "loss": 1.6089, "step": 4163 }, { "epoch": 0.47831830452013097, "grad_norm": 0.42503979802131653, "learning_rate": 0.0001, "loss": 1.6941, "step": 4164 }, { "epoch": 0.4784331744299581, "grad_norm": 0.4688960313796997, "learning_rate": 0.0001, "loss": 1.6121, "step": 4165 }, { "epoch": 0.4785480443397852, "grad_norm": 0.42915526032447815, "learning_rate": 0.0001, "loss": 1.7311, "step": 4166 }, { "epoch": 0.4786629142496123, "grad_norm": 0.42920050024986267, "learning_rate": 0.0001, "loss": 1.8337, "step": 4167 }, { "epoch": 0.47877778415943945, "grad_norm": 0.4068220257759094, "learning_rate": 0.0001, "loss": 1.2956, "step": 4168 }, { "epoch": 0.47889265406926657, "grad_norm": 0.38509076833724976, "learning_rate": 0.0001, "loss": 1.5839, "step": 4169 }, { "epoch": 0.4790075239790937, "grad_norm": 0.43463391065597534, "learning_rate": 0.0001, "loss": 1.8903, "step": 4170 }, { "epoch": 0.4791223938889208, "grad_norm": 0.4463506042957306, "learning_rate": 0.0001, "loss": 1.8181, "step": 4171 }, { "epoch": 0.4792372637987479, "grad_norm": 0.42022788524627686, "learning_rate": 0.0001, "loss": 1.8081, "step": 4172 }, { "epoch": 0.47935213370857505, "grad_norm": 0.4187469780445099, "learning_rate": 0.0001, "loss": 1.6547, "step": 4173 }, { "epoch": 0.47946700361840217, "grad_norm": 0.3888328969478607, "learning_rate": 0.0001, "loss": 1.4741, "step": 4174 }, { "epoch": 0.4795818735282293, "grad_norm": 0.4130741059780121, "learning_rate": 0.0001, "loss": 1.6267, "step": 4175 }, { "epoch": 0.4796967434380564, "grad_norm": 0.3948152959346771, "learning_rate": 0.0001, "loss": 1.6343, "step": 4176 }, { "epoch": 0.4798116133478835, "grad_norm": 0.3814520537853241, "learning_rate": 0.0001, "loss": 1.5364, "step": 4177 }, { "epoch": 0.47992648325771065, "grad_norm": 0.4245845079421997, "learning_rate": 0.0001, "loss": 1.6257, "step": 4178 }, { "epoch": 0.48004135316753777, "grad_norm": 0.39747804403305054, "learning_rate": 0.0001, "loss": 1.4679, "step": 4179 }, { "epoch": 0.4801562230773649, "grad_norm": 0.41911858320236206, "learning_rate": 0.0001, "loss": 1.6383, "step": 4180 }, { "epoch": 0.480271092987192, "grad_norm": 0.4305039048194885, "learning_rate": 0.0001, "loss": 1.7368, "step": 4181 }, { "epoch": 0.4803859628970191, "grad_norm": 0.3938789665699005, "learning_rate": 0.0001, "loss": 1.4452, "step": 4182 }, { "epoch": 0.48050083280684625, "grad_norm": 0.45596420764923096, "learning_rate": 0.0001, "loss": 1.697, "step": 4183 }, { "epoch": 0.48061570271667337, "grad_norm": 0.3951970040798187, "learning_rate": 0.0001, "loss": 1.602, "step": 4184 }, { "epoch": 0.4807305726265005, "grad_norm": 0.40332433581352234, "learning_rate": 0.0001, "loss": 1.7118, "step": 4185 }, { "epoch": 0.4808454425363276, "grad_norm": 0.4350488781929016, "learning_rate": 0.0001, "loss": 1.7018, "step": 4186 }, { "epoch": 0.4809603124461547, "grad_norm": 0.4046489894390106, "learning_rate": 0.0001, "loss": 1.2882, "step": 4187 }, { "epoch": 0.48107518235598185, "grad_norm": 0.4263879358768463, "learning_rate": 0.0001, "loss": 1.6298, "step": 4188 }, { "epoch": 0.48119005226580897, "grad_norm": 0.42358294129371643, "learning_rate": 0.0001, "loss": 1.791, "step": 4189 }, { "epoch": 0.4813049221756361, "grad_norm": 0.3843347430229187, "learning_rate": 0.0001, "loss": 1.6233, "step": 4190 }, { "epoch": 0.4814197920854632, "grad_norm": 0.43530306220054626, "learning_rate": 0.0001, "loss": 1.7777, "step": 4191 }, { "epoch": 0.4815346619952903, "grad_norm": 0.4170604646205902, "learning_rate": 0.0001, "loss": 1.6756, "step": 4192 }, { "epoch": 0.48164953190511745, "grad_norm": 0.39548924565315247, "learning_rate": 0.0001, "loss": 1.5848, "step": 4193 }, { "epoch": 0.48176440181494457, "grad_norm": 0.38482916355133057, "learning_rate": 0.0001, "loss": 1.6586, "step": 4194 }, { "epoch": 0.4818792717247717, "grad_norm": 0.38123294711112976, "learning_rate": 0.0001, "loss": 1.4144, "step": 4195 }, { "epoch": 0.4819941416345988, "grad_norm": 0.4207984209060669, "learning_rate": 0.0001, "loss": 1.6567, "step": 4196 }, { "epoch": 0.4821090115444259, "grad_norm": 0.40305450558662415, "learning_rate": 0.0001, "loss": 1.6715, "step": 4197 }, { "epoch": 0.48222388145425304, "grad_norm": 0.3921899199485779, "learning_rate": 0.0001, "loss": 1.5884, "step": 4198 }, { "epoch": 0.48233875136408016, "grad_norm": 0.3937564790248871, "learning_rate": 0.0001, "loss": 1.4346, "step": 4199 }, { "epoch": 0.4824536212739073, "grad_norm": 0.44775843620300293, "learning_rate": 0.0001, "loss": 1.6271, "step": 4200 }, { "epoch": 0.4825684911837344, "grad_norm": 0.424165278673172, "learning_rate": 0.0001, "loss": 1.632, "step": 4201 }, { "epoch": 0.4826833610935615, "grad_norm": 0.4116969406604767, "learning_rate": 0.0001, "loss": 1.7062, "step": 4202 }, { "epoch": 0.48279823100338864, "grad_norm": 0.4231579303741455, "learning_rate": 0.0001, "loss": 1.8064, "step": 4203 }, { "epoch": 0.48291310091321576, "grad_norm": 0.4327984154224396, "learning_rate": 0.0001, "loss": 1.7143, "step": 4204 }, { "epoch": 0.4830279708230429, "grad_norm": 0.45221564173698425, "learning_rate": 0.0001, "loss": 1.7857, "step": 4205 }, { "epoch": 0.48314284073287, "grad_norm": 0.4953210651874542, "learning_rate": 0.0001, "loss": 1.861, "step": 4206 }, { "epoch": 0.4832577106426971, "grad_norm": 0.38952094316482544, "learning_rate": 0.0001, "loss": 1.4247, "step": 4207 }, { "epoch": 0.48337258055252424, "grad_norm": 0.4227844774723053, "learning_rate": 0.0001, "loss": 1.6854, "step": 4208 }, { "epoch": 0.48348745046235136, "grad_norm": 0.4414394199848175, "learning_rate": 0.0001, "loss": 1.9377, "step": 4209 }, { "epoch": 0.4836023203721785, "grad_norm": 0.4291560649871826, "learning_rate": 0.0001, "loss": 1.4077, "step": 4210 }, { "epoch": 0.4837171902820056, "grad_norm": 0.40819114446640015, "learning_rate": 0.0001, "loss": 1.8186, "step": 4211 }, { "epoch": 0.4838320601918327, "grad_norm": 0.40871623158454895, "learning_rate": 0.0001, "loss": 1.545, "step": 4212 }, { "epoch": 0.48394693010165984, "grad_norm": 0.41948193311691284, "learning_rate": 0.0001, "loss": 1.7082, "step": 4213 }, { "epoch": 0.48406180001148696, "grad_norm": 0.40412595868110657, "learning_rate": 0.0001, "loss": 1.6625, "step": 4214 }, { "epoch": 0.4841766699213141, "grad_norm": 0.4160853624343872, "learning_rate": 0.0001, "loss": 1.5709, "step": 4215 }, { "epoch": 0.48429153983114126, "grad_norm": 0.3837149143218994, "learning_rate": 0.0001, "loss": 1.4121, "step": 4216 }, { "epoch": 0.4844064097409684, "grad_norm": 0.443243145942688, "learning_rate": 0.0001, "loss": 1.8064, "step": 4217 }, { "epoch": 0.4845212796507955, "grad_norm": 0.38426709175109863, "learning_rate": 0.0001, "loss": 1.6433, "step": 4218 }, { "epoch": 0.4846361495606226, "grad_norm": 0.40559226274490356, "learning_rate": 0.0001, "loss": 1.6694, "step": 4219 }, { "epoch": 0.48475101947044974, "grad_norm": 0.4205404222011566, "learning_rate": 0.0001, "loss": 1.8064, "step": 4220 }, { "epoch": 0.48486588938027686, "grad_norm": 0.4019322693347931, "learning_rate": 0.0001, "loss": 1.6447, "step": 4221 }, { "epoch": 0.484980759290104, "grad_norm": 0.43610960245132446, "learning_rate": 0.0001, "loss": 1.6533, "step": 4222 }, { "epoch": 0.4850956291999311, "grad_norm": 0.45350828766822815, "learning_rate": 0.0001, "loss": 1.6381, "step": 4223 }, { "epoch": 0.4852104991097582, "grad_norm": 0.420280396938324, "learning_rate": 0.0001, "loss": 1.8019, "step": 4224 }, { "epoch": 0.48532536901958534, "grad_norm": 0.4381343424320221, "learning_rate": 0.0001, "loss": 1.5139, "step": 4225 }, { "epoch": 0.48544023892941246, "grad_norm": 0.38392412662506104, "learning_rate": 0.0001, "loss": 1.4845, "step": 4226 }, { "epoch": 0.4855551088392396, "grad_norm": 0.4646781086921692, "learning_rate": 0.0001, "loss": 1.8663, "step": 4227 }, { "epoch": 0.4856699787490667, "grad_norm": 0.39009374380111694, "learning_rate": 0.0001, "loss": 1.5679, "step": 4228 }, { "epoch": 0.4857848486588938, "grad_norm": 0.44190630316734314, "learning_rate": 0.0001, "loss": 1.891, "step": 4229 }, { "epoch": 0.48589971856872094, "grad_norm": 0.38428959250450134, "learning_rate": 0.0001, "loss": 1.5294, "step": 4230 }, { "epoch": 0.48601458847854806, "grad_norm": 0.3903064429759979, "learning_rate": 0.0001, "loss": 1.4688, "step": 4231 }, { "epoch": 0.4861294583883752, "grad_norm": 0.39627987146377563, "learning_rate": 0.0001, "loss": 1.5868, "step": 4232 }, { "epoch": 0.4862443282982023, "grad_norm": 0.3723553419113159, "learning_rate": 0.0001, "loss": 1.4925, "step": 4233 }, { "epoch": 0.4863591982080294, "grad_norm": 0.4561592936515808, "learning_rate": 0.0001, "loss": 1.8449, "step": 4234 }, { "epoch": 0.48647406811785654, "grad_norm": 0.42943283915519714, "learning_rate": 0.0001, "loss": 1.6589, "step": 4235 }, { "epoch": 0.48658893802768366, "grad_norm": 0.39778202772140503, "learning_rate": 0.0001, "loss": 1.4218, "step": 4236 }, { "epoch": 0.4867038079375108, "grad_norm": 0.4467572569847107, "learning_rate": 0.0001, "loss": 1.7572, "step": 4237 }, { "epoch": 0.4868186778473379, "grad_norm": 0.393715500831604, "learning_rate": 0.0001, "loss": 1.56, "step": 4238 }, { "epoch": 0.486933547757165, "grad_norm": 0.43201369047164917, "learning_rate": 0.0001, "loss": 1.8393, "step": 4239 }, { "epoch": 0.48704841766699214, "grad_norm": 0.3823792040348053, "learning_rate": 0.0001, "loss": 1.3971, "step": 4240 }, { "epoch": 0.48716328757681926, "grad_norm": 0.38682422041893005, "learning_rate": 0.0001, "loss": 1.3362, "step": 4241 }, { "epoch": 0.4872781574866464, "grad_norm": 0.405185341835022, "learning_rate": 0.0001, "loss": 1.5936, "step": 4242 }, { "epoch": 0.4873930273964735, "grad_norm": 0.4169776141643524, "learning_rate": 0.0001, "loss": 1.6479, "step": 4243 }, { "epoch": 0.4875078973063006, "grad_norm": 0.399635910987854, "learning_rate": 0.0001, "loss": 1.4352, "step": 4244 }, { "epoch": 0.48762276721612774, "grad_norm": 0.39507734775543213, "learning_rate": 0.0001, "loss": 1.5375, "step": 4245 }, { "epoch": 0.48773763712595486, "grad_norm": 0.42992520332336426, "learning_rate": 0.0001, "loss": 1.5337, "step": 4246 }, { "epoch": 0.487852507035782, "grad_norm": 0.4018322825431824, "learning_rate": 0.0001, "loss": 1.8032, "step": 4247 }, { "epoch": 0.4879673769456091, "grad_norm": 0.40367868542671204, "learning_rate": 0.0001, "loss": 1.6862, "step": 4248 }, { "epoch": 0.4880822468554362, "grad_norm": 0.4355872571468353, "learning_rate": 0.0001, "loss": 1.5951, "step": 4249 }, { "epoch": 0.48819711676526334, "grad_norm": 0.4191339313983917, "learning_rate": 0.0001, "loss": 1.8388, "step": 4250 }, { "epoch": 0.48831198667509046, "grad_norm": 0.39485347270965576, "learning_rate": 0.0001, "loss": 1.5524, "step": 4251 }, { "epoch": 0.4884268565849176, "grad_norm": 0.40303096175193787, "learning_rate": 0.0001, "loss": 1.718, "step": 4252 }, { "epoch": 0.4885417264947447, "grad_norm": 0.4248945116996765, "learning_rate": 0.0001, "loss": 1.6762, "step": 4253 }, { "epoch": 0.4886565964045718, "grad_norm": 0.45680445432662964, "learning_rate": 0.0001, "loss": 1.8161, "step": 4254 }, { "epoch": 0.48877146631439894, "grad_norm": 0.40725842118263245, "learning_rate": 0.0001, "loss": 1.6237, "step": 4255 }, { "epoch": 0.48888633622422606, "grad_norm": 0.4762290418148041, "learning_rate": 0.0001, "loss": 1.6353, "step": 4256 }, { "epoch": 0.4890012061340532, "grad_norm": 0.42993873357772827, "learning_rate": 0.0001, "loss": 1.6581, "step": 4257 }, { "epoch": 0.4891160760438803, "grad_norm": 0.38512858748435974, "learning_rate": 0.0001, "loss": 1.6684, "step": 4258 }, { "epoch": 0.4892309459537074, "grad_norm": 0.4009222984313965, "learning_rate": 0.0001, "loss": 1.662, "step": 4259 }, { "epoch": 0.48934581586353454, "grad_norm": 0.3873811662197113, "learning_rate": 0.0001, "loss": 1.6713, "step": 4260 }, { "epoch": 0.48946068577336166, "grad_norm": 0.39984700083732605, "learning_rate": 0.0001, "loss": 1.577, "step": 4261 }, { "epoch": 0.4895755556831888, "grad_norm": 0.4596845209598541, "learning_rate": 0.0001, "loss": 1.7131, "step": 4262 }, { "epoch": 0.4896904255930159, "grad_norm": 0.4038037657737732, "learning_rate": 0.0001, "loss": 1.7656, "step": 4263 }, { "epoch": 0.489805295502843, "grad_norm": 0.39577624201774597, "learning_rate": 0.0001, "loss": 1.652, "step": 4264 }, { "epoch": 0.48992016541267014, "grad_norm": 0.38728025555610657, "learning_rate": 0.0001, "loss": 1.5229, "step": 4265 }, { "epoch": 0.49003503532249726, "grad_norm": 0.4493556618690491, "learning_rate": 0.0001, "loss": 1.6367, "step": 4266 }, { "epoch": 0.4901499052323244, "grad_norm": 0.4298556447029114, "learning_rate": 0.0001, "loss": 1.8001, "step": 4267 }, { "epoch": 0.4902647751421515, "grad_norm": 0.4090805947780609, "learning_rate": 0.0001, "loss": 1.6775, "step": 4268 }, { "epoch": 0.4903796450519786, "grad_norm": 0.4103395640850067, "learning_rate": 0.0001, "loss": 1.6592, "step": 4269 }, { "epoch": 0.49049451496180574, "grad_norm": 0.41509976983070374, "learning_rate": 0.0001, "loss": 1.6015, "step": 4270 }, { "epoch": 0.49060938487163286, "grad_norm": 0.4312727153301239, "learning_rate": 0.0001, "loss": 1.7256, "step": 4271 }, { "epoch": 0.49072425478146, "grad_norm": 0.42118600010871887, "learning_rate": 0.0001, "loss": 1.7583, "step": 4272 }, { "epoch": 0.4908391246912871, "grad_norm": 0.4323917329311371, "learning_rate": 0.0001, "loss": 1.6377, "step": 4273 }, { "epoch": 0.4909539946011142, "grad_norm": 0.4115605354309082, "learning_rate": 0.0001, "loss": 1.7165, "step": 4274 }, { "epoch": 0.49106886451094134, "grad_norm": 0.41110193729400635, "learning_rate": 0.0001, "loss": 1.6258, "step": 4275 }, { "epoch": 0.49118373442076846, "grad_norm": 0.43548524379730225, "learning_rate": 0.0001, "loss": 1.7275, "step": 4276 }, { "epoch": 0.4912986043305956, "grad_norm": 0.4107424020767212, "learning_rate": 0.0001, "loss": 1.514, "step": 4277 }, { "epoch": 0.4914134742404227, "grad_norm": 0.4132782220840454, "learning_rate": 0.0001, "loss": 1.7099, "step": 4278 }, { "epoch": 0.4915283441502498, "grad_norm": 0.4486086070537567, "learning_rate": 0.0001, "loss": 1.9551, "step": 4279 }, { "epoch": 0.49164321406007694, "grad_norm": 0.41517210006713867, "learning_rate": 0.0001, "loss": 1.7067, "step": 4280 }, { "epoch": 0.49175808396990406, "grad_norm": 0.37078627943992615, "learning_rate": 0.0001, "loss": 1.4954, "step": 4281 }, { "epoch": 0.4918729538797312, "grad_norm": 0.41808491945266724, "learning_rate": 0.0001, "loss": 1.5452, "step": 4282 }, { "epoch": 0.4919878237895583, "grad_norm": 0.3952369689941406, "learning_rate": 0.0001, "loss": 1.7146, "step": 4283 }, { "epoch": 0.4921026936993854, "grad_norm": 0.43479466438293457, "learning_rate": 0.0001, "loss": 1.7984, "step": 4284 }, { "epoch": 0.4922175636092126, "grad_norm": 0.44229763746261597, "learning_rate": 0.0001, "loss": 1.877, "step": 4285 }, { "epoch": 0.4923324335190397, "grad_norm": 0.4501185715198517, "learning_rate": 0.0001, "loss": 1.5643, "step": 4286 }, { "epoch": 0.49244730342886683, "grad_norm": 0.43694865703582764, "learning_rate": 0.0001, "loss": 1.512, "step": 4287 }, { "epoch": 0.49256217333869395, "grad_norm": 0.4092878997325897, "learning_rate": 0.0001, "loss": 1.7253, "step": 4288 }, { "epoch": 0.4926770432485211, "grad_norm": 0.4009423851966858, "learning_rate": 0.0001, "loss": 1.6641, "step": 4289 }, { "epoch": 0.4927919131583482, "grad_norm": 0.4281792640686035, "learning_rate": 0.0001, "loss": 1.5892, "step": 4290 }, { "epoch": 0.4929067830681753, "grad_norm": 0.49694761633872986, "learning_rate": 0.0001, "loss": 1.7392, "step": 4291 }, { "epoch": 0.49302165297800243, "grad_norm": 0.44447091221809387, "learning_rate": 0.0001, "loss": 1.6769, "step": 4292 }, { "epoch": 0.49313652288782955, "grad_norm": 0.3885389566421509, "learning_rate": 0.0001, "loss": 1.6375, "step": 4293 }, { "epoch": 0.4932513927976567, "grad_norm": 0.3895886242389679, "learning_rate": 0.0001, "loss": 1.5885, "step": 4294 }, { "epoch": 0.4933662627074838, "grad_norm": 0.4553350806236267, "learning_rate": 0.0001, "loss": 1.793, "step": 4295 }, { "epoch": 0.4934811326173109, "grad_norm": 0.384520560503006, "learning_rate": 0.0001, "loss": 1.4896, "step": 4296 }, { "epoch": 0.49359600252713803, "grad_norm": 0.4104674160480499, "learning_rate": 0.0001, "loss": 1.6698, "step": 4297 }, { "epoch": 0.49371087243696515, "grad_norm": 0.4543203115463257, "learning_rate": 0.0001, "loss": 1.7958, "step": 4298 }, { "epoch": 0.4938257423467923, "grad_norm": 0.40907683968544006, "learning_rate": 0.0001, "loss": 1.6956, "step": 4299 }, { "epoch": 0.4939406122566194, "grad_norm": 0.43240875005722046, "learning_rate": 0.0001, "loss": 1.4569, "step": 4300 }, { "epoch": 0.4940554821664465, "grad_norm": 0.41229525208473206, "learning_rate": 0.0001, "loss": 1.7708, "step": 4301 }, { "epoch": 0.49417035207627363, "grad_norm": 0.4220798909664154, "learning_rate": 0.0001, "loss": 1.6977, "step": 4302 }, { "epoch": 0.49428522198610075, "grad_norm": 0.3954547345638275, "learning_rate": 0.0001, "loss": 1.6479, "step": 4303 }, { "epoch": 0.4944000918959279, "grad_norm": 0.41736140847206116, "learning_rate": 0.0001, "loss": 1.7265, "step": 4304 }, { "epoch": 0.494514961805755, "grad_norm": 0.39642804861068726, "learning_rate": 0.0001, "loss": 1.5365, "step": 4305 }, { "epoch": 0.4946298317155821, "grad_norm": 0.4558791220188141, "learning_rate": 0.0001, "loss": 1.6012, "step": 4306 }, { "epoch": 0.49474470162540923, "grad_norm": 0.39758846163749695, "learning_rate": 0.0001, "loss": 1.6934, "step": 4307 }, { "epoch": 0.49485957153523635, "grad_norm": 0.43392619490623474, "learning_rate": 0.0001, "loss": 1.7523, "step": 4308 }, { "epoch": 0.4949744414450635, "grad_norm": 0.3947283625602722, "learning_rate": 0.0001, "loss": 1.5171, "step": 4309 }, { "epoch": 0.4950893113548906, "grad_norm": 0.3992256224155426, "learning_rate": 0.0001, "loss": 1.7432, "step": 4310 }, { "epoch": 0.4952041812647177, "grad_norm": 0.4191253185272217, "learning_rate": 0.0001, "loss": 1.7122, "step": 4311 }, { "epoch": 0.49531905117454483, "grad_norm": 0.41294851899147034, "learning_rate": 0.0001, "loss": 1.8674, "step": 4312 }, { "epoch": 0.49543392108437195, "grad_norm": 0.4245891869068146, "learning_rate": 0.0001, "loss": 1.5189, "step": 4313 }, { "epoch": 0.4955487909941991, "grad_norm": 0.44946831464767456, "learning_rate": 0.0001, "loss": 1.8442, "step": 4314 }, { "epoch": 0.4956636609040262, "grad_norm": 0.4152979254722595, "learning_rate": 0.0001, "loss": 1.6727, "step": 4315 }, { "epoch": 0.4957785308138533, "grad_norm": 0.3948223888874054, "learning_rate": 0.0001, "loss": 1.6683, "step": 4316 }, { "epoch": 0.49589340072368043, "grad_norm": 0.423066109418869, "learning_rate": 0.0001, "loss": 1.6065, "step": 4317 }, { "epoch": 0.49600827063350755, "grad_norm": 0.3875940144062042, "learning_rate": 0.0001, "loss": 1.4541, "step": 4318 }, { "epoch": 0.4961231405433347, "grad_norm": 0.4109596014022827, "learning_rate": 0.0001, "loss": 1.5759, "step": 4319 }, { "epoch": 0.4962380104531618, "grad_norm": 0.42905092239379883, "learning_rate": 0.0001, "loss": 1.6572, "step": 4320 }, { "epoch": 0.4963528803629889, "grad_norm": 0.38710954785346985, "learning_rate": 0.0001, "loss": 1.3936, "step": 4321 }, { "epoch": 0.49646775027281603, "grad_norm": 0.4148370623588562, "learning_rate": 0.0001, "loss": 1.3909, "step": 4322 }, { "epoch": 0.49658262018264315, "grad_norm": 0.4470541775226593, "learning_rate": 0.0001, "loss": 1.6346, "step": 4323 }, { "epoch": 0.4966974900924703, "grad_norm": 0.3968261480331421, "learning_rate": 0.0001, "loss": 1.5145, "step": 4324 }, { "epoch": 0.4968123600022974, "grad_norm": 0.43433794379234314, "learning_rate": 0.0001, "loss": 1.7366, "step": 4325 }, { "epoch": 0.4969272299121245, "grad_norm": 0.39728692173957825, "learning_rate": 0.0001, "loss": 1.4233, "step": 4326 }, { "epoch": 0.49704209982195163, "grad_norm": 0.3991283178329468, "learning_rate": 0.0001, "loss": 1.6562, "step": 4327 }, { "epoch": 0.49715696973177875, "grad_norm": 0.4111589193344116, "learning_rate": 0.0001, "loss": 1.6703, "step": 4328 }, { "epoch": 0.4972718396416059, "grad_norm": 0.41188472509384155, "learning_rate": 0.0001, "loss": 1.7465, "step": 4329 }, { "epoch": 0.497386709551433, "grad_norm": 0.435154527425766, "learning_rate": 0.0001, "loss": 1.6414, "step": 4330 }, { "epoch": 0.4975015794612601, "grad_norm": 0.40389662981033325, "learning_rate": 0.0001, "loss": 1.7158, "step": 4331 }, { "epoch": 0.49761644937108723, "grad_norm": 0.4168131947517395, "learning_rate": 0.0001, "loss": 1.6856, "step": 4332 }, { "epoch": 0.49773131928091435, "grad_norm": 0.43243077397346497, "learning_rate": 0.0001, "loss": 1.8349, "step": 4333 }, { "epoch": 0.4978461891907415, "grad_norm": 0.42854100465774536, "learning_rate": 0.0001, "loss": 1.732, "step": 4334 }, { "epoch": 0.4979610591005686, "grad_norm": 0.41653481125831604, "learning_rate": 0.0001, "loss": 1.6833, "step": 4335 }, { "epoch": 0.4980759290103957, "grad_norm": 0.45638951659202576, "learning_rate": 0.0001, "loss": 1.844, "step": 4336 }, { "epoch": 0.49819079892022283, "grad_norm": 0.4429507553577423, "learning_rate": 0.0001, "loss": 1.6555, "step": 4337 }, { "epoch": 0.49830566883004995, "grad_norm": 0.45221859216690063, "learning_rate": 0.0001, "loss": 1.8052, "step": 4338 }, { "epoch": 0.4984205387398771, "grad_norm": 0.42000943422317505, "learning_rate": 0.0001, "loss": 1.5532, "step": 4339 }, { "epoch": 0.4985354086497042, "grad_norm": 0.44999954104423523, "learning_rate": 0.0001, "loss": 1.7288, "step": 4340 }, { "epoch": 0.4986502785595313, "grad_norm": 0.44586285948753357, "learning_rate": 0.0001, "loss": 1.8559, "step": 4341 }, { "epoch": 0.49876514846935843, "grad_norm": 0.41515326499938965, "learning_rate": 0.0001, "loss": 1.6989, "step": 4342 }, { "epoch": 0.49888001837918555, "grad_norm": 0.40862441062927246, "learning_rate": 0.0001, "loss": 1.6441, "step": 4343 }, { "epoch": 0.4989948882890127, "grad_norm": 0.4498591423034668, "learning_rate": 0.0001, "loss": 1.7579, "step": 4344 }, { "epoch": 0.4991097581988398, "grad_norm": 0.4223852753639221, "learning_rate": 0.0001, "loss": 1.6668, "step": 4345 }, { "epoch": 0.4992246281086669, "grad_norm": 0.39054638147354126, "learning_rate": 0.0001, "loss": 1.3893, "step": 4346 }, { "epoch": 0.49933949801849403, "grad_norm": 0.4462035298347473, "learning_rate": 0.0001, "loss": 1.7637, "step": 4347 }, { "epoch": 0.49945436792832115, "grad_norm": 0.4029097855091095, "learning_rate": 0.0001, "loss": 1.6173, "step": 4348 }, { "epoch": 0.4995692378381483, "grad_norm": 0.4124845862388611, "learning_rate": 0.0001, "loss": 1.7638, "step": 4349 }, { "epoch": 0.4996841077479754, "grad_norm": 0.37937289476394653, "learning_rate": 0.0001, "loss": 1.5661, "step": 4350 }, { "epoch": 0.4997989776578025, "grad_norm": 0.40648961067199707, "learning_rate": 0.0001, "loss": 1.5733, "step": 4351 }, { "epoch": 0.49991384756762963, "grad_norm": 0.4147469997406006, "learning_rate": 0.0001, "loss": 1.5053, "step": 4352 }, { "epoch": 0.5000287174774568, "grad_norm": 0.42763659358024597, "learning_rate": 0.0001, "loss": 1.6607, "step": 4353 }, { "epoch": 0.5001435873872839, "grad_norm": 0.4175349771976471, "learning_rate": 0.0001, "loss": 1.675, "step": 4354 }, { "epoch": 0.500258457297111, "grad_norm": 0.4236915707588196, "learning_rate": 0.0001, "loss": 1.6923, "step": 4355 }, { "epoch": 0.5003733272069382, "grad_norm": 0.41402342915534973, "learning_rate": 0.0001, "loss": 1.8008, "step": 4356 }, { "epoch": 0.5004881971167653, "grad_norm": 0.4136095345020294, "learning_rate": 0.0001, "loss": 1.5044, "step": 4357 }, { "epoch": 0.5006030670265924, "grad_norm": 0.43916845321655273, "learning_rate": 0.0001, "loss": 1.8298, "step": 4358 }, { "epoch": 0.5007179369364195, "grad_norm": 0.41625019907951355, "learning_rate": 0.0001, "loss": 1.6969, "step": 4359 }, { "epoch": 0.5008328068462466, "grad_norm": 0.422838419675827, "learning_rate": 0.0001, "loss": 1.5765, "step": 4360 }, { "epoch": 0.5009476767560738, "grad_norm": 0.356978178024292, "learning_rate": 0.0001, "loss": 1.3617, "step": 4361 }, { "epoch": 0.5010625466659009, "grad_norm": 0.4372119903564453, "learning_rate": 0.0001, "loss": 1.7264, "step": 4362 }, { "epoch": 0.501177416575728, "grad_norm": 0.43347251415252686, "learning_rate": 0.0001, "loss": 1.6472, "step": 4363 }, { "epoch": 0.5012922864855551, "grad_norm": 0.38658055663108826, "learning_rate": 0.0001, "loss": 1.564, "step": 4364 }, { "epoch": 0.5014071563953822, "grad_norm": 0.3823000192642212, "learning_rate": 0.0001, "loss": 1.6238, "step": 4365 }, { "epoch": 0.5015220263052094, "grad_norm": 0.44494375586509705, "learning_rate": 0.0001, "loss": 1.884, "step": 4366 }, { "epoch": 0.5016368962150365, "grad_norm": 0.37995579838752747, "learning_rate": 0.0001, "loss": 1.5818, "step": 4367 }, { "epoch": 0.5017517661248636, "grad_norm": 0.416050523519516, "learning_rate": 0.0001, "loss": 1.8128, "step": 4368 }, { "epoch": 0.5018666360346907, "grad_norm": 0.41402295231819153, "learning_rate": 0.0001, "loss": 1.7219, "step": 4369 }, { "epoch": 0.5019815059445178, "grad_norm": 0.4046735167503357, "learning_rate": 0.0001, "loss": 1.6491, "step": 4370 }, { "epoch": 0.502096375854345, "grad_norm": 0.4079616367816925, "learning_rate": 0.0001, "loss": 1.5906, "step": 4371 }, { "epoch": 0.5022112457641721, "grad_norm": 0.42762479186058044, "learning_rate": 0.0001, "loss": 1.7243, "step": 4372 }, { "epoch": 0.5023261156739992, "grad_norm": 0.42116057872772217, "learning_rate": 0.0001, "loss": 1.7058, "step": 4373 }, { "epoch": 0.5024409855838263, "grad_norm": 0.43111321330070496, "learning_rate": 0.0001, "loss": 1.6998, "step": 4374 }, { "epoch": 0.5025558554936534, "grad_norm": 0.43248212337493896, "learning_rate": 0.0001, "loss": 1.839, "step": 4375 }, { "epoch": 0.5026707254034806, "grad_norm": 0.40893498063087463, "learning_rate": 0.0001, "loss": 1.6113, "step": 4376 }, { "epoch": 0.5027855953133077, "grad_norm": 0.43067118525505066, "learning_rate": 0.0001, "loss": 1.5609, "step": 4377 }, { "epoch": 0.5029004652231348, "grad_norm": 0.3844601809978485, "learning_rate": 0.0001, "loss": 1.5709, "step": 4378 }, { "epoch": 0.5030153351329619, "grad_norm": 0.41813233494758606, "learning_rate": 0.0001, "loss": 1.7656, "step": 4379 }, { "epoch": 0.503130205042789, "grad_norm": 0.4095008373260498, "learning_rate": 0.0001, "loss": 1.6633, "step": 4380 }, { "epoch": 0.5032450749526162, "grad_norm": 0.4308955669403076, "learning_rate": 0.0001, "loss": 1.5697, "step": 4381 }, { "epoch": 0.5033599448624433, "grad_norm": 0.41021478176116943, "learning_rate": 0.0001, "loss": 1.6172, "step": 4382 }, { "epoch": 0.5034748147722704, "grad_norm": 0.37911084294319153, "learning_rate": 0.0001, "loss": 1.4941, "step": 4383 }, { "epoch": 0.5035896846820975, "grad_norm": 0.3981432020664215, "learning_rate": 0.0001, "loss": 1.7432, "step": 4384 }, { "epoch": 0.5037045545919246, "grad_norm": 0.3934857249259949, "learning_rate": 0.0001, "loss": 1.7143, "step": 4385 }, { "epoch": 0.5038194245017518, "grad_norm": 0.40038371086120605, "learning_rate": 0.0001, "loss": 1.7377, "step": 4386 }, { "epoch": 0.5039342944115789, "grad_norm": 0.422829806804657, "learning_rate": 0.0001, "loss": 1.6704, "step": 4387 }, { "epoch": 0.504049164321406, "grad_norm": 0.3928106725215912, "learning_rate": 0.0001, "loss": 1.6269, "step": 4388 }, { "epoch": 0.5041640342312331, "grad_norm": 0.4150887131690979, "learning_rate": 0.0001, "loss": 1.4932, "step": 4389 }, { "epoch": 0.5042789041410602, "grad_norm": 0.40696507692337036, "learning_rate": 0.0001, "loss": 1.6295, "step": 4390 }, { "epoch": 0.5043937740508874, "grad_norm": 0.43781277537345886, "learning_rate": 0.0001, "loss": 1.6691, "step": 4391 }, { "epoch": 0.5045086439607145, "grad_norm": 0.4133634865283966, "learning_rate": 0.0001, "loss": 1.6552, "step": 4392 }, { "epoch": 0.5046235138705416, "grad_norm": 0.3970431387424469, "learning_rate": 0.0001, "loss": 1.2777, "step": 4393 }, { "epoch": 0.5047383837803687, "grad_norm": 0.44825881719589233, "learning_rate": 0.0001, "loss": 1.5762, "step": 4394 }, { "epoch": 0.5048532536901958, "grad_norm": 0.38739416003227234, "learning_rate": 0.0001, "loss": 1.6222, "step": 4395 }, { "epoch": 0.504968123600023, "grad_norm": 0.41295453906059265, "learning_rate": 0.0001, "loss": 1.7472, "step": 4396 }, { "epoch": 0.5050829935098501, "grad_norm": 0.3822581470012665, "learning_rate": 0.0001, "loss": 1.5052, "step": 4397 }, { "epoch": 0.5051978634196772, "grad_norm": 0.40491965413093567, "learning_rate": 0.0001, "loss": 1.49, "step": 4398 }, { "epoch": 0.5053127333295043, "grad_norm": 0.48093438148498535, "learning_rate": 0.0001, "loss": 1.7826, "step": 4399 }, { "epoch": 0.5054276032393314, "grad_norm": 0.4143761694431305, "learning_rate": 0.0001, "loss": 1.5054, "step": 4400 }, { "epoch": 0.5055424731491586, "grad_norm": 0.4251968264579773, "learning_rate": 0.0001, "loss": 1.61, "step": 4401 }, { "epoch": 0.5056573430589857, "grad_norm": 0.4139546751976013, "learning_rate": 0.0001, "loss": 1.4851, "step": 4402 }, { "epoch": 0.5057722129688128, "grad_norm": 0.4102967381477356, "learning_rate": 0.0001, "loss": 1.6109, "step": 4403 }, { "epoch": 0.5058870828786399, "grad_norm": 0.411199688911438, "learning_rate": 0.0001, "loss": 1.3911, "step": 4404 }, { "epoch": 0.506001952788467, "grad_norm": 0.43025514483451843, "learning_rate": 0.0001, "loss": 1.6434, "step": 4405 }, { "epoch": 0.5061168226982942, "grad_norm": 0.4648951292037964, "learning_rate": 0.0001, "loss": 1.8235, "step": 4406 }, { "epoch": 0.5062316926081213, "grad_norm": 0.4003261923789978, "learning_rate": 0.0001, "loss": 1.6433, "step": 4407 }, { "epoch": 0.5063465625179484, "grad_norm": 0.4348773956298828, "learning_rate": 0.0001, "loss": 1.6097, "step": 4408 }, { "epoch": 0.5064614324277755, "grad_norm": 0.3802754580974579, "learning_rate": 0.0001, "loss": 1.3187, "step": 4409 }, { "epoch": 0.5065763023376026, "grad_norm": 0.4715876579284668, "learning_rate": 0.0001, "loss": 1.9138, "step": 4410 }, { "epoch": 0.5066911722474298, "grad_norm": 0.42883962392807007, "learning_rate": 0.0001, "loss": 1.5673, "step": 4411 }, { "epoch": 0.5068060421572569, "grad_norm": 0.3982695937156677, "learning_rate": 0.0001, "loss": 1.5531, "step": 4412 }, { "epoch": 0.506920912067084, "grad_norm": 0.4575648605823517, "learning_rate": 0.0001, "loss": 1.6778, "step": 4413 }, { "epoch": 0.5070357819769111, "grad_norm": 0.422573983669281, "learning_rate": 0.0001, "loss": 1.7639, "step": 4414 }, { "epoch": 0.5071506518867382, "grad_norm": 0.41428160667419434, "learning_rate": 0.0001, "loss": 1.5767, "step": 4415 }, { "epoch": 0.5072655217965654, "grad_norm": 0.39599475264549255, "learning_rate": 0.0001, "loss": 1.442, "step": 4416 }, { "epoch": 0.5073803917063925, "grad_norm": 0.41915363073349, "learning_rate": 0.0001, "loss": 1.6944, "step": 4417 }, { "epoch": 0.5074952616162196, "grad_norm": 0.4034424126148224, "learning_rate": 0.0001, "loss": 1.5641, "step": 4418 }, { "epoch": 0.5076101315260467, "grad_norm": 0.4440549612045288, "learning_rate": 0.0001, "loss": 1.9008, "step": 4419 }, { "epoch": 0.5077250014358738, "grad_norm": 0.43546631932258606, "learning_rate": 0.0001, "loss": 1.6674, "step": 4420 }, { "epoch": 0.507839871345701, "grad_norm": 0.4137895107269287, "learning_rate": 0.0001, "loss": 1.7555, "step": 4421 }, { "epoch": 0.5079547412555281, "grad_norm": 0.4285581111907959, "learning_rate": 0.0001, "loss": 1.4998, "step": 4422 }, { "epoch": 0.5080696111653552, "grad_norm": 0.39121928811073303, "learning_rate": 0.0001, "loss": 1.5603, "step": 4423 }, { "epoch": 0.5081844810751823, "grad_norm": 0.45524442195892334, "learning_rate": 0.0001, "loss": 1.6992, "step": 4424 }, { "epoch": 0.5082993509850094, "grad_norm": 0.42169350385665894, "learning_rate": 0.0001, "loss": 1.4717, "step": 4425 }, { "epoch": 0.5084142208948366, "grad_norm": 0.44150853157043457, "learning_rate": 0.0001, "loss": 1.735, "step": 4426 }, { "epoch": 0.5085290908046637, "grad_norm": 0.4405604600906372, "learning_rate": 0.0001, "loss": 1.7817, "step": 4427 }, { "epoch": 0.5086439607144908, "grad_norm": 0.4324178099632263, "learning_rate": 0.0001, "loss": 1.8098, "step": 4428 }, { "epoch": 0.5087588306243179, "grad_norm": 0.3942691683769226, "learning_rate": 0.0001, "loss": 1.5467, "step": 4429 }, { "epoch": 0.508873700534145, "grad_norm": 0.3946244716644287, "learning_rate": 0.0001, "loss": 1.5341, "step": 4430 }, { "epoch": 0.5089885704439722, "grad_norm": 0.4541240930557251, "learning_rate": 0.0001, "loss": 1.8342, "step": 4431 }, { "epoch": 0.5091034403537993, "grad_norm": 0.4114912450313568, "learning_rate": 0.0001, "loss": 1.6808, "step": 4432 }, { "epoch": 0.5092183102636264, "grad_norm": 0.40291449427604675, "learning_rate": 0.0001, "loss": 1.4467, "step": 4433 }, { "epoch": 0.5093331801734535, "grad_norm": 0.43536198139190674, "learning_rate": 0.0001, "loss": 1.6229, "step": 4434 }, { "epoch": 0.5094480500832806, "grad_norm": 0.4481135606765747, "learning_rate": 0.0001, "loss": 1.8252, "step": 4435 }, { "epoch": 0.5095629199931078, "grad_norm": 0.41042807698249817, "learning_rate": 0.0001, "loss": 1.6277, "step": 4436 }, { "epoch": 0.5096777899029349, "grad_norm": 0.44647055864334106, "learning_rate": 0.0001, "loss": 1.7893, "step": 4437 }, { "epoch": 0.509792659812762, "grad_norm": 0.4402143061161041, "learning_rate": 0.0001, "loss": 1.7, "step": 4438 }, { "epoch": 0.5099075297225891, "grad_norm": 0.44720834493637085, "learning_rate": 0.0001, "loss": 1.6523, "step": 4439 }, { "epoch": 0.5100223996324162, "grad_norm": 0.43396279215812683, "learning_rate": 0.0001, "loss": 1.6265, "step": 4440 }, { "epoch": 0.5101372695422434, "grad_norm": 0.42125704884529114, "learning_rate": 0.0001, "loss": 1.6251, "step": 4441 }, { "epoch": 0.5102521394520705, "grad_norm": 0.4178660213947296, "learning_rate": 0.0001, "loss": 1.7153, "step": 4442 }, { "epoch": 0.5103670093618976, "grad_norm": 0.490590363740921, "learning_rate": 0.0001, "loss": 1.6246, "step": 4443 }, { "epoch": 0.5104818792717247, "grad_norm": 0.40945470333099365, "learning_rate": 0.0001, "loss": 1.4578, "step": 4444 }, { "epoch": 0.5105967491815518, "grad_norm": 0.44269421696662903, "learning_rate": 0.0001, "loss": 1.6619, "step": 4445 }, { "epoch": 0.510711619091379, "grad_norm": 0.41479024291038513, "learning_rate": 0.0001, "loss": 1.6295, "step": 4446 }, { "epoch": 0.5108264890012061, "grad_norm": 0.3876959979534149, "learning_rate": 0.0001, "loss": 1.505, "step": 4447 }, { "epoch": 0.5109413589110332, "grad_norm": 0.4491494297981262, "learning_rate": 0.0001, "loss": 1.681, "step": 4448 }, { "epoch": 0.5110562288208603, "grad_norm": 0.41451436281204224, "learning_rate": 0.0001, "loss": 1.6657, "step": 4449 }, { "epoch": 0.5111710987306874, "grad_norm": 0.39285629987716675, "learning_rate": 0.0001, "loss": 1.5493, "step": 4450 }, { "epoch": 0.5112859686405146, "grad_norm": 0.39508745074272156, "learning_rate": 0.0001, "loss": 1.4939, "step": 4451 }, { "epoch": 0.5114008385503417, "grad_norm": 0.44860512018203735, "learning_rate": 0.0001, "loss": 1.5626, "step": 4452 }, { "epoch": 0.5115157084601688, "grad_norm": 0.4382397532463074, "learning_rate": 0.0001, "loss": 1.6823, "step": 4453 }, { "epoch": 0.5116305783699959, "grad_norm": 0.40884271264076233, "learning_rate": 0.0001, "loss": 1.5451, "step": 4454 }, { "epoch": 0.511745448279823, "grad_norm": 0.43925192952156067, "learning_rate": 0.0001, "loss": 1.6231, "step": 4455 }, { "epoch": 0.5118603181896503, "grad_norm": 0.42059096693992615, "learning_rate": 0.0001, "loss": 1.6763, "step": 4456 }, { "epoch": 0.5119751880994774, "grad_norm": 0.4306631088256836, "learning_rate": 0.0001, "loss": 1.5764, "step": 4457 }, { "epoch": 0.5120900580093045, "grad_norm": 0.41633883118629456, "learning_rate": 0.0001, "loss": 1.6652, "step": 4458 }, { "epoch": 0.5122049279191316, "grad_norm": 0.4185430705547333, "learning_rate": 0.0001, "loss": 1.6695, "step": 4459 }, { "epoch": 0.5123197978289588, "grad_norm": 0.38397789001464844, "learning_rate": 0.0001, "loss": 1.3248, "step": 4460 }, { "epoch": 0.5124346677387859, "grad_norm": 0.42769157886505127, "learning_rate": 0.0001, "loss": 1.6386, "step": 4461 }, { "epoch": 0.512549537648613, "grad_norm": 0.40281736850738525, "learning_rate": 0.0001, "loss": 1.553, "step": 4462 }, { "epoch": 0.5126644075584401, "grad_norm": 0.48889243602752686, "learning_rate": 0.0001, "loss": 1.8824, "step": 4463 }, { "epoch": 0.5127792774682672, "grad_norm": 0.4288654625415802, "learning_rate": 0.0001, "loss": 1.6987, "step": 4464 }, { "epoch": 0.5128941473780944, "grad_norm": 0.4444088637828827, "learning_rate": 0.0001, "loss": 1.7862, "step": 4465 }, { "epoch": 0.5130090172879215, "grad_norm": 0.3586702346801758, "learning_rate": 0.0001, "loss": 1.4236, "step": 4466 }, { "epoch": 0.5131238871977486, "grad_norm": 0.41832202672958374, "learning_rate": 0.0001, "loss": 1.6599, "step": 4467 }, { "epoch": 0.5132387571075757, "grad_norm": 0.41678473353385925, "learning_rate": 0.0001, "loss": 1.663, "step": 4468 }, { "epoch": 0.5133536270174028, "grad_norm": 0.39657461643218994, "learning_rate": 0.0001, "loss": 1.541, "step": 4469 }, { "epoch": 0.51346849692723, "grad_norm": 0.4119437336921692, "learning_rate": 0.0001, "loss": 1.631, "step": 4470 }, { "epoch": 0.5135833668370571, "grad_norm": 0.4205493927001953, "learning_rate": 0.0001, "loss": 1.5289, "step": 4471 }, { "epoch": 0.5136982367468842, "grad_norm": 0.4292590618133545, "learning_rate": 0.0001, "loss": 1.6888, "step": 4472 }, { "epoch": 0.5138131066567113, "grad_norm": 0.4054730534553528, "learning_rate": 0.0001, "loss": 1.6858, "step": 4473 }, { "epoch": 0.5139279765665384, "grad_norm": 0.5104745626449585, "learning_rate": 0.0001, "loss": 1.5412, "step": 4474 }, { "epoch": 0.5140428464763656, "grad_norm": 0.4039926528930664, "learning_rate": 0.0001, "loss": 1.7071, "step": 4475 }, { "epoch": 0.5141577163861927, "grad_norm": 0.38494694232940674, "learning_rate": 0.0001, "loss": 1.5487, "step": 4476 }, { "epoch": 0.5142725862960198, "grad_norm": 0.4241875112056732, "learning_rate": 0.0001, "loss": 1.6744, "step": 4477 }, { "epoch": 0.5143874562058469, "grad_norm": 0.41834181547164917, "learning_rate": 0.0001, "loss": 1.4409, "step": 4478 }, { "epoch": 0.514502326115674, "grad_norm": 0.38371768593788147, "learning_rate": 0.0001, "loss": 1.4855, "step": 4479 }, { "epoch": 0.5146171960255012, "grad_norm": 0.42289024591445923, "learning_rate": 0.0001, "loss": 1.8331, "step": 4480 }, { "epoch": 0.5147320659353283, "grad_norm": 0.41524538397789, "learning_rate": 0.0001, "loss": 1.6312, "step": 4481 }, { "epoch": 0.5148469358451554, "grad_norm": 0.4520059823989868, "learning_rate": 0.0001, "loss": 1.7944, "step": 4482 }, { "epoch": 0.5149618057549825, "grad_norm": 0.46914947032928467, "learning_rate": 0.0001, "loss": 1.6221, "step": 4483 }, { "epoch": 0.5150766756648096, "grad_norm": 0.4201009273529053, "learning_rate": 0.0001, "loss": 1.7413, "step": 4484 }, { "epoch": 0.5151915455746368, "grad_norm": 0.43544942140579224, "learning_rate": 0.0001, "loss": 1.7241, "step": 4485 }, { "epoch": 0.5153064154844639, "grad_norm": 0.3825484812259674, "learning_rate": 0.0001, "loss": 1.543, "step": 4486 }, { "epoch": 0.515421285394291, "grad_norm": 0.46979475021362305, "learning_rate": 0.0001, "loss": 1.765, "step": 4487 }, { "epoch": 0.5155361553041181, "grad_norm": 0.4438627064228058, "learning_rate": 0.0001, "loss": 1.796, "step": 4488 }, { "epoch": 0.5156510252139452, "grad_norm": 0.4040473699569702, "learning_rate": 0.0001, "loss": 1.6934, "step": 4489 }, { "epoch": 0.5157658951237724, "grad_norm": 0.3772587776184082, "learning_rate": 0.0001, "loss": 1.548, "step": 4490 }, { "epoch": 0.5158807650335995, "grad_norm": 0.4254496693611145, "learning_rate": 0.0001, "loss": 1.7002, "step": 4491 }, { "epoch": 0.5159956349434266, "grad_norm": 0.43310895562171936, "learning_rate": 0.0001, "loss": 1.7606, "step": 4492 }, { "epoch": 0.5161105048532537, "grad_norm": 0.43428242206573486, "learning_rate": 0.0001, "loss": 1.8536, "step": 4493 }, { "epoch": 0.5162253747630808, "grad_norm": 0.4234102964401245, "learning_rate": 0.0001, "loss": 1.6754, "step": 4494 }, { "epoch": 0.516340244672908, "grad_norm": 0.4157521724700928, "learning_rate": 0.0001, "loss": 1.717, "step": 4495 }, { "epoch": 0.5164551145827351, "grad_norm": 0.4052300751209259, "learning_rate": 0.0001, "loss": 1.4448, "step": 4496 }, { "epoch": 0.5165699844925622, "grad_norm": 0.4426092207431793, "learning_rate": 0.0001, "loss": 1.683, "step": 4497 }, { "epoch": 0.5166848544023893, "grad_norm": 0.41626426577568054, "learning_rate": 0.0001, "loss": 1.5067, "step": 4498 }, { "epoch": 0.5167997243122164, "grad_norm": 0.38398024439811707, "learning_rate": 0.0001, "loss": 1.6062, "step": 4499 }, { "epoch": 0.5169145942220436, "grad_norm": 0.4056454598903656, "learning_rate": 0.0001, "loss": 1.5062, "step": 4500 }, { "epoch": 0.5170294641318707, "grad_norm": 0.4996061325073242, "learning_rate": 0.0001, "loss": 1.7317, "step": 4501 }, { "epoch": 0.5171443340416978, "grad_norm": 0.40144461393356323, "learning_rate": 0.0001, "loss": 1.6122, "step": 4502 }, { "epoch": 0.5172592039515249, "grad_norm": 0.4855468273162842, "learning_rate": 0.0001, "loss": 1.6999, "step": 4503 }, { "epoch": 0.517374073861352, "grad_norm": 0.4194789230823517, "learning_rate": 0.0001, "loss": 1.6659, "step": 4504 }, { "epoch": 0.5174889437711792, "grad_norm": 0.3837035596370697, "learning_rate": 0.0001, "loss": 1.5493, "step": 4505 }, { "epoch": 0.5176038136810063, "grad_norm": 0.39867258071899414, "learning_rate": 0.0001, "loss": 1.3699, "step": 4506 }, { "epoch": 0.5177186835908334, "grad_norm": 0.4189174473285675, "learning_rate": 0.0001, "loss": 1.6099, "step": 4507 }, { "epoch": 0.5178335535006605, "grad_norm": 0.43066421151161194, "learning_rate": 0.0001, "loss": 1.7007, "step": 4508 }, { "epoch": 0.5179484234104876, "grad_norm": 0.40682274103164673, "learning_rate": 0.0001, "loss": 1.4341, "step": 4509 }, { "epoch": 0.5180632933203148, "grad_norm": 0.4145391583442688, "learning_rate": 0.0001, "loss": 1.6284, "step": 4510 }, { "epoch": 0.5181781632301419, "grad_norm": 0.42484042048454285, "learning_rate": 0.0001, "loss": 1.6313, "step": 4511 }, { "epoch": 0.518293033139969, "grad_norm": 0.48949721455574036, "learning_rate": 0.0001, "loss": 1.7115, "step": 4512 }, { "epoch": 0.5184079030497961, "grad_norm": 0.46473968029022217, "learning_rate": 0.0001, "loss": 1.778, "step": 4513 }, { "epoch": 0.5185227729596232, "grad_norm": 0.4237271845340729, "learning_rate": 0.0001, "loss": 1.7289, "step": 4514 }, { "epoch": 0.5186376428694504, "grad_norm": 0.4288172721862793, "learning_rate": 0.0001, "loss": 1.8229, "step": 4515 }, { "epoch": 0.5187525127792775, "grad_norm": 0.39855021238327026, "learning_rate": 0.0001, "loss": 1.7383, "step": 4516 }, { "epoch": 0.5188673826891046, "grad_norm": 0.4043690860271454, "learning_rate": 0.0001, "loss": 1.7437, "step": 4517 }, { "epoch": 0.5189822525989317, "grad_norm": 0.4174858331680298, "learning_rate": 0.0001, "loss": 1.4415, "step": 4518 }, { "epoch": 0.5190971225087588, "grad_norm": 0.4092039167881012, "learning_rate": 0.0001, "loss": 1.7563, "step": 4519 }, { "epoch": 0.519211992418586, "grad_norm": 0.44929736852645874, "learning_rate": 0.0001, "loss": 1.8381, "step": 4520 }, { "epoch": 0.5193268623284131, "grad_norm": 0.38499635457992554, "learning_rate": 0.0001, "loss": 1.5668, "step": 4521 }, { "epoch": 0.5194417322382402, "grad_norm": 0.4187855124473572, "learning_rate": 0.0001, "loss": 1.6891, "step": 4522 }, { "epoch": 0.5195566021480673, "grad_norm": 0.43714576959609985, "learning_rate": 0.0001, "loss": 1.7564, "step": 4523 }, { "epoch": 0.5196714720578944, "grad_norm": 0.42951980233192444, "learning_rate": 0.0001, "loss": 1.6217, "step": 4524 }, { "epoch": 0.5197863419677216, "grad_norm": 0.4339199662208557, "learning_rate": 0.0001, "loss": 1.5095, "step": 4525 }, { "epoch": 0.5199012118775487, "grad_norm": 0.434725821018219, "learning_rate": 0.0001, "loss": 1.6993, "step": 4526 }, { "epoch": 0.5200160817873758, "grad_norm": 0.42468908429145813, "learning_rate": 0.0001, "loss": 1.6434, "step": 4527 }, { "epoch": 0.5201309516972029, "grad_norm": 0.42453402280807495, "learning_rate": 0.0001, "loss": 1.6366, "step": 4528 }, { "epoch": 0.52024582160703, "grad_norm": 0.4131234586238861, "learning_rate": 0.0001, "loss": 1.4308, "step": 4529 }, { "epoch": 0.5203606915168572, "grad_norm": 0.40424486994743347, "learning_rate": 0.0001, "loss": 1.5853, "step": 4530 }, { "epoch": 0.5204755614266843, "grad_norm": 0.45649197697639465, "learning_rate": 0.0001, "loss": 1.8034, "step": 4531 }, { "epoch": 0.5205904313365114, "grad_norm": 0.412251740694046, "learning_rate": 0.0001, "loss": 1.6628, "step": 4532 }, { "epoch": 0.5207053012463385, "grad_norm": 0.4387061297893524, "learning_rate": 0.0001, "loss": 1.8548, "step": 4533 }, { "epoch": 0.5208201711561656, "grad_norm": 0.4186844229698181, "learning_rate": 0.0001, "loss": 1.7774, "step": 4534 }, { "epoch": 0.5209350410659928, "grad_norm": 0.41150832176208496, "learning_rate": 0.0001, "loss": 1.6646, "step": 4535 }, { "epoch": 0.5210499109758199, "grad_norm": 0.4150691628456116, "learning_rate": 0.0001, "loss": 1.579, "step": 4536 }, { "epoch": 0.521164780885647, "grad_norm": 0.4016995131969452, "learning_rate": 0.0001, "loss": 1.6122, "step": 4537 }, { "epoch": 0.5212796507954741, "grad_norm": 0.4465637505054474, "learning_rate": 0.0001, "loss": 1.8251, "step": 4538 }, { "epoch": 0.5213945207053012, "grad_norm": 0.38491567969322205, "learning_rate": 0.0001, "loss": 1.5484, "step": 4539 }, { "epoch": 0.5215093906151284, "grad_norm": 0.3878459632396698, "learning_rate": 0.0001, "loss": 1.6515, "step": 4540 }, { "epoch": 0.5216242605249555, "grad_norm": 0.38580122590065, "learning_rate": 0.0001, "loss": 1.55, "step": 4541 }, { "epoch": 0.5217391304347826, "grad_norm": 0.3909989595413208, "learning_rate": 0.0001, "loss": 1.6571, "step": 4542 }, { "epoch": 0.5218540003446097, "grad_norm": 0.49450787901878357, "learning_rate": 0.0001, "loss": 1.744, "step": 4543 }, { "epoch": 0.5219688702544368, "grad_norm": 0.43028536438941956, "learning_rate": 0.0001, "loss": 1.6926, "step": 4544 }, { "epoch": 0.522083740164264, "grad_norm": 0.4146410822868347, "learning_rate": 0.0001, "loss": 1.5019, "step": 4545 }, { "epoch": 0.5221986100740911, "grad_norm": 0.40828803181648254, "learning_rate": 0.0001, "loss": 1.696, "step": 4546 }, { "epoch": 0.5223134799839182, "grad_norm": 0.38090986013412476, "learning_rate": 0.0001, "loss": 1.4923, "step": 4547 }, { "epoch": 0.5224283498937453, "grad_norm": 0.3952951431274414, "learning_rate": 0.0001, "loss": 1.524, "step": 4548 }, { "epoch": 0.5225432198035724, "grad_norm": 0.4446341395378113, "learning_rate": 0.0001, "loss": 1.7717, "step": 4549 }, { "epoch": 0.5226580897133996, "grad_norm": 0.4435203969478607, "learning_rate": 0.0001, "loss": 1.6232, "step": 4550 }, { "epoch": 0.5227729596232267, "grad_norm": 0.4518386423587799, "learning_rate": 0.0001, "loss": 1.6649, "step": 4551 }, { "epoch": 0.5228878295330538, "grad_norm": 0.4210054576396942, "learning_rate": 0.0001, "loss": 1.5636, "step": 4552 }, { "epoch": 0.5230026994428809, "grad_norm": 0.464871346950531, "learning_rate": 0.0001, "loss": 1.7242, "step": 4553 }, { "epoch": 0.523117569352708, "grad_norm": 0.4055049419403076, "learning_rate": 0.0001, "loss": 1.6339, "step": 4554 }, { "epoch": 0.5232324392625352, "grad_norm": 0.4289097189903259, "learning_rate": 0.0001, "loss": 1.5451, "step": 4555 }, { "epoch": 0.5233473091723623, "grad_norm": 0.40787026286125183, "learning_rate": 0.0001, "loss": 1.5455, "step": 4556 }, { "epoch": 0.5234621790821894, "grad_norm": 0.41342809796333313, "learning_rate": 0.0001, "loss": 1.6431, "step": 4557 }, { "epoch": 0.5235770489920165, "grad_norm": 0.3959392011165619, "learning_rate": 0.0001, "loss": 1.5286, "step": 4558 }, { "epoch": 0.5236919189018436, "grad_norm": 0.41905930638313293, "learning_rate": 0.0001, "loss": 1.4938, "step": 4559 }, { "epoch": 0.5238067888116708, "grad_norm": 0.4155671000480652, "learning_rate": 0.0001, "loss": 1.6784, "step": 4560 }, { "epoch": 0.5239216587214979, "grad_norm": 0.3972344398498535, "learning_rate": 0.0001, "loss": 1.5804, "step": 4561 }, { "epoch": 0.524036528631325, "grad_norm": 0.40942510962486267, "learning_rate": 0.0001, "loss": 1.648, "step": 4562 }, { "epoch": 0.5241513985411521, "grad_norm": 0.43989264965057373, "learning_rate": 0.0001, "loss": 1.8291, "step": 4563 }, { "epoch": 0.5242662684509792, "grad_norm": 0.38364219665527344, "learning_rate": 0.0001, "loss": 1.6185, "step": 4564 }, { "epoch": 0.5243811383608064, "grad_norm": 0.3964410424232483, "learning_rate": 0.0001, "loss": 1.6562, "step": 4565 }, { "epoch": 0.5244960082706335, "grad_norm": 0.3990534842014313, "learning_rate": 0.0001, "loss": 1.62, "step": 4566 }, { "epoch": 0.5246108781804606, "grad_norm": 0.4154457747936249, "learning_rate": 0.0001, "loss": 1.6036, "step": 4567 }, { "epoch": 0.5247257480902877, "grad_norm": 0.4000798761844635, "learning_rate": 0.0001, "loss": 1.685, "step": 4568 }, { "epoch": 0.5248406180001148, "grad_norm": 0.4398530423641205, "learning_rate": 0.0001, "loss": 1.7647, "step": 4569 }, { "epoch": 0.524955487909942, "grad_norm": 0.41386568546295166, "learning_rate": 0.0001, "loss": 1.7705, "step": 4570 }, { "epoch": 0.5250703578197691, "grad_norm": 0.48047158122062683, "learning_rate": 0.0001, "loss": 1.8748, "step": 4571 }, { "epoch": 0.5251852277295962, "grad_norm": 0.38870546221733093, "learning_rate": 0.0001, "loss": 1.4763, "step": 4572 }, { "epoch": 0.5253000976394233, "grad_norm": 0.48835334181785583, "learning_rate": 0.0001, "loss": 1.6587, "step": 4573 }, { "epoch": 0.5254149675492504, "grad_norm": 0.39779576659202576, "learning_rate": 0.0001, "loss": 1.5337, "step": 4574 }, { "epoch": 0.5255298374590776, "grad_norm": 0.3713628947734833, "learning_rate": 0.0001, "loss": 1.5932, "step": 4575 }, { "epoch": 0.5256447073689047, "grad_norm": 0.39374494552612305, "learning_rate": 0.0001, "loss": 1.5818, "step": 4576 }, { "epoch": 0.5257595772787318, "grad_norm": 0.40991827845573425, "learning_rate": 0.0001, "loss": 1.6499, "step": 4577 }, { "epoch": 0.5258744471885589, "grad_norm": 0.4329836964607239, "learning_rate": 0.0001, "loss": 1.7166, "step": 4578 }, { "epoch": 0.525989317098386, "grad_norm": 0.44446802139282227, "learning_rate": 0.0001, "loss": 1.7636, "step": 4579 }, { "epoch": 0.5261041870082132, "grad_norm": 0.42868572473526, "learning_rate": 0.0001, "loss": 1.8013, "step": 4580 }, { "epoch": 0.5262190569180403, "grad_norm": 0.38663357496261597, "learning_rate": 0.0001, "loss": 1.5617, "step": 4581 }, { "epoch": 0.5263339268278674, "grad_norm": 0.4398927390575409, "learning_rate": 0.0001, "loss": 1.6136, "step": 4582 }, { "epoch": 0.5264487967376945, "grad_norm": 0.4069356620311737, "learning_rate": 0.0001, "loss": 1.5597, "step": 4583 }, { "epoch": 0.5265636666475216, "grad_norm": 0.40244022011756897, "learning_rate": 0.0001, "loss": 1.5808, "step": 4584 }, { "epoch": 0.5266785365573488, "grad_norm": 0.42762094736099243, "learning_rate": 0.0001, "loss": 1.7083, "step": 4585 }, { "epoch": 0.5267934064671759, "grad_norm": 0.4556163251399994, "learning_rate": 0.0001, "loss": 1.824, "step": 4586 }, { "epoch": 0.526908276377003, "grad_norm": 0.4342927634716034, "learning_rate": 0.0001, "loss": 1.496, "step": 4587 }, { "epoch": 0.5270231462868301, "grad_norm": 0.4018527865409851, "learning_rate": 0.0001, "loss": 1.6768, "step": 4588 }, { "epoch": 0.5271380161966572, "grad_norm": 0.3828233480453491, "learning_rate": 0.0001, "loss": 1.31, "step": 4589 }, { "epoch": 0.5272528861064844, "grad_norm": 0.4347113072872162, "learning_rate": 0.0001, "loss": 1.7758, "step": 4590 }, { "epoch": 0.5273677560163115, "grad_norm": 0.38657423853874207, "learning_rate": 0.0001, "loss": 1.6179, "step": 4591 }, { "epoch": 0.5274826259261386, "grad_norm": 0.42126983404159546, "learning_rate": 0.0001, "loss": 1.4377, "step": 4592 }, { "epoch": 0.5275974958359658, "grad_norm": 0.45303142070770264, "learning_rate": 0.0001, "loss": 1.7843, "step": 4593 }, { "epoch": 0.5277123657457929, "grad_norm": 0.395085871219635, "learning_rate": 0.0001, "loss": 1.5851, "step": 4594 }, { "epoch": 0.5278272356556201, "grad_norm": 0.3973239064216614, "learning_rate": 0.0001, "loss": 1.6489, "step": 4595 }, { "epoch": 0.5279421055654472, "grad_norm": 0.3768349587917328, "learning_rate": 0.0001, "loss": 1.325, "step": 4596 }, { "epoch": 0.5280569754752743, "grad_norm": 0.43345212936401367, "learning_rate": 0.0001, "loss": 1.6315, "step": 4597 }, { "epoch": 0.5281718453851014, "grad_norm": 0.4553665816783905, "learning_rate": 0.0001, "loss": 1.7439, "step": 4598 }, { "epoch": 0.5282867152949285, "grad_norm": 0.4854236841201782, "learning_rate": 0.0001, "loss": 1.9161, "step": 4599 }, { "epoch": 0.5284015852047557, "grad_norm": 0.421282559633255, "learning_rate": 0.0001, "loss": 1.5079, "step": 4600 }, { "epoch": 0.5285164551145828, "grad_norm": 0.4243543744087219, "learning_rate": 0.0001, "loss": 1.6191, "step": 4601 }, { "epoch": 0.5286313250244099, "grad_norm": 0.4060072600841522, "learning_rate": 0.0001, "loss": 1.5374, "step": 4602 }, { "epoch": 0.528746194934237, "grad_norm": 0.4349260628223419, "learning_rate": 0.0001, "loss": 1.8664, "step": 4603 }, { "epoch": 0.5288610648440641, "grad_norm": 0.40768831968307495, "learning_rate": 0.0001, "loss": 1.5477, "step": 4604 }, { "epoch": 0.5289759347538913, "grad_norm": 0.39794713258743286, "learning_rate": 0.0001, "loss": 1.6151, "step": 4605 }, { "epoch": 0.5290908046637184, "grad_norm": 0.4161005914211273, "learning_rate": 0.0001, "loss": 1.6587, "step": 4606 }, { "epoch": 0.5292056745735455, "grad_norm": 0.44163885712623596, "learning_rate": 0.0001, "loss": 1.8116, "step": 4607 }, { "epoch": 0.5293205444833726, "grad_norm": 0.5268819332122803, "learning_rate": 0.0001, "loss": 1.8569, "step": 4608 }, { "epoch": 0.5294354143931997, "grad_norm": 0.46857014298439026, "learning_rate": 0.0001, "loss": 1.6907, "step": 4609 }, { "epoch": 0.5295502843030269, "grad_norm": 0.401594340801239, "learning_rate": 0.0001, "loss": 1.5846, "step": 4610 }, { "epoch": 0.529665154212854, "grad_norm": 0.42373210191726685, "learning_rate": 0.0001, "loss": 1.6703, "step": 4611 }, { "epoch": 0.5297800241226811, "grad_norm": 0.40101566910743713, "learning_rate": 0.0001, "loss": 1.6537, "step": 4612 }, { "epoch": 0.5298948940325082, "grad_norm": 0.4653320014476776, "learning_rate": 0.0001, "loss": 1.6741, "step": 4613 }, { "epoch": 0.5300097639423353, "grad_norm": 0.4147876501083374, "learning_rate": 0.0001, "loss": 1.638, "step": 4614 }, { "epoch": 0.5301246338521625, "grad_norm": 0.4062972068786621, "learning_rate": 0.0001, "loss": 1.5747, "step": 4615 }, { "epoch": 0.5302395037619896, "grad_norm": 0.4018900394439697, "learning_rate": 0.0001, "loss": 1.6461, "step": 4616 }, { "epoch": 0.5303543736718167, "grad_norm": 0.41791051626205444, "learning_rate": 0.0001, "loss": 1.7225, "step": 4617 }, { "epoch": 0.5304692435816438, "grad_norm": 0.41558316349983215, "learning_rate": 0.0001, "loss": 1.8203, "step": 4618 }, { "epoch": 0.5305841134914709, "grad_norm": 0.4309985935688019, "learning_rate": 0.0001, "loss": 1.5813, "step": 4619 }, { "epoch": 0.5306989834012981, "grad_norm": 0.4364014267921448, "learning_rate": 0.0001, "loss": 1.8082, "step": 4620 }, { "epoch": 0.5308138533111252, "grad_norm": 0.41828298568725586, "learning_rate": 0.0001, "loss": 1.6167, "step": 4621 }, { "epoch": 0.5309287232209523, "grad_norm": 0.3910794258117676, "learning_rate": 0.0001, "loss": 1.5617, "step": 4622 }, { "epoch": 0.5310435931307794, "grad_norm": 0.4651833772659302, "learning_rate": 0.0001, "loss": 1.7743, "step": 4623 }, { "epoch": 0.5311584630406065, "grad_norm": 0.44561001658439636, "learning_rate": 0.0001, "loss": 1.7388, "step": 4624 }, { "epoch": 0.5312733329504337, "grad_norm": 0.3969671130180359, "learning_rate": 0.0001, "loss": 1.5671, "step": 4625 }, { "epoch": 0.5313882028602608, "grad_norm": 0.4239133894443512, "learning_rate": 0.0001, "loss": 1.6436, "step": 4626 }, { "epoch": 0.5315030727700879, "grad_norm": 0.3926708698272705, "learning_rate": 0.0001, "loss": 1.6082, "step": 4627 }, { "epoch": 0.531617942679915, "grad_norm": 0.4034792184829712, "learning_rate": 0.0001, "loss": 1.5234, "step": 4628 }, { "epoch": 0.5317328125897421, "grad_norm": 0.4075685143470764, "learning_rate": 0.0001, "loss": 1.597, "step": 4629 }, { "epoch": 0.5318476824995693, "grad_norm": 0.41798871755599976, "learning_rate": 0.0001, "loss": 1.7057, "step": 4630 }, { "epoch": 0.5319625524093964, "grad_norm": 0.44857409596443176, "learning_rate": 0.0001, "loss": 1.7721, "step": 4631 }, { "epoch": 0.5320774223192235, "grad_norm": 0.39396408200263977, "learning_rate": 0.0001, "loss": 1.5443, "step": 4632 }, { "epoch": 0.5321922922290506, "grad_norm": 0.4028686285018921, "learning_rate": 0.0001, "loss": 1.6521, "step": 4633 }, { "epoch": 0.5323071621388777, "grad_norm": 0.4449384808540344, "learning_rate": 0.0001, "loss": 1.6526, "step": 4634 }, { "epoch": 0.5324220320487049, "grad_norm": 0.4593904912471771, "learning_rate": 0.0001, "loss": 1.8295, "step": 4635 }, { "epoch": 0.532536901958532, "grad_norm": 0.40952321887016296, "learning_rate": 0.0001, "loss": 1.4723, "step": 4636 }, { "epoch": 0.5326517718683591, "grad_norm": 0.421491801738739, "learning_rate": 0.0001, "loss": 1.5559, "step": 4637 }, { "epoch": 0.5327666417781862, "grad_norm": 0.5018342733383179, "learning_rate": 0.0001, "loss": 1.7603, "step": 4638 }, { "epoch": 0.5328815116880133, "grad_norm": 0.394999235868454, "learning_rate": 0.0001, "loss": 1.4546, "step": 4639 }, { "epoch": 0.5329963815978405, "grad_norm": 0.3952369689941406, "learning_rate": 0.0001, "loss": 1.5503, "step": 4640 }, { "epoch": 0.5331112515076676, "grad_norm": 0.44612810015678406, "learning_rate": 0.0001, "loss": 1.8486, "step": 4641 }, { "epoch": 0.5332261214174947, "grad_norm": 0.4116518497467041, "learning_rate": 0.0001, "loss": 1.7686, "step": 4642 }, { "epoch": 0.5333409913273218, "grad_norm": 0.4047883152961731, "learning_rate": 0.0001, "loss": 1.6333, "step": 4643 }, { "epoch": 0.5334558612371489, "grad_norm": 0.4536430239677429, "learning_rate": 0.0001, "loss": 1.8459, "step": 4644 }, { "epoch": 0.5335707311469761, "grad_norm": 0.4192769527435303, "learning_rate": 0.0001, "loss": 1.7317, "step": 4645 }, { "epoch": 0.5336856010568032, "grad_norm": 0.426587849855423, "learning_rate": 0.0001, "loss": 1.8026, "step": 4646 }, { "epoch": 0.5338004709666303, "grad_norm": 0.40313446521759033, "learning_rate": 0.0001, "loss": 1.6022, "step": 4647 }, { "epoch": 0.5339153408764574, "grad_norm": 0.37919333577156067, "learning_rate": 0.0001, "loss": 1.3846, "step": 4648 }, { "epoch": 0.5340302107862845, "grad_norm": 0.4192160367965698, "learning_rate": 0.0001, "loss": 1.6382, "step": 4649 }, { "epoch": 0.5341450806961117, "grad_norm": 0.42048901319503784, "learning_rate": 0.0001, "loss": 1.6995, "step": 4650 }, { "epoch": 0.5342599506059388, "grad_norm": 0.4094361960887909, "learning_rate": 0.0001, "loss": 1.6199, "step": 4651 }, { "epoch": 0.5343748205157659, "grad_norm": 0.4340735971927643, "learning_rate": 0.0001, "loss": 1.6967, "step": 4652 }, { "epoch": 0.534489690425593, "grad_norm": 0.41635647416114807, "learning_rate": 0.0001, "loss": 1.7417, "step": 4653 }, { "epoch": 0.5346045603354201, "grad_norm": 0.42730745673179626, "learning_rate": 0.0001, "loss": 1.6606, "step": 4654 }, { "epoch": 0.5347194302452473, "grad_norm": 0.43178364634513855, "learning_rate": 0.0001, "loss": 1.5394, "step": 4655 }, { "epoch": 0.5348343001550744, "grad_norm": 0.4151526391506195, "learning_rate": 0.0001, "loss": 1.7846, "step": 4656 }, { "epoch": 0.5349491700649015, "grad_norm": 0.4182411730289459, "learning_rate": 0.0001, "loss": 1.6844, "step": 4657 }, { "epoch": 0.5350640399747286, "grad_norm": 0.423403263092041, "learning_rate": 0.0001, "loss": 1.7087, "step": 4658 }, { "epoch": 0.5351789098845557, "grad_norm": 0.41044798493385315, "learning_rate": 0.0001, "loss": 1.6801, "step": 4659 }, { "epoch": 0.5352937797943829, "grad_norm": 0.4252939224243164, "learning_rate": 0.0001, "loss": 1.7553, "step": 4660 }, { "epoch": 0.53540864970421, "grad_norm": 0.4315437078475952, "learning_rate": 0.0001, "loss": 1.7822, "step": 4661 }, { "epoch": 0.5355235196140371, "grad_norm": 0.4264442026615143, "learning_rate": 0.0001, "loss": 1.7567, "step": 4662 }, { "epoch": 0.5356383895238642, "grad_norm": 0.3990129232406616, "learning_rate": 0.0001, "loss": 1.5359, "step": 4663 }, { "epoch": 0.5357532594336913, "grad_norm": 0.4486422836780548, "learning_rate": 0.0001, "loss": 1.9106, "step": 4664 }, { "epoch": 0.5358681293435185, "grad_norm": 0.4086044430732727, "learning_rate": 0.0001, "loss": 1.7818, "step": 4665 }, { "epoch": 0.5359829992533456, "grad_norm": 0.39344853162765503, "learning_rate": 0.0001, "loss": 1.5296, "step": 4666 }, { "epoch": 0.5360978691631727, "grad_norm": 0.4144117534160614, "learning_rate": 0.0001, "loss": 1.4561, "step": 4667 }, { "epoch": 0.5362127390729998, "grad_norm": 0.40345409512519836, "learning_rate": 0.0001, "loss": 1.6228, "step": 4668 }, { "epoch": 0.5363276089828269, "grad_norm": 0.4093291461467743, "learning_rate": 0.0001, "loss": 1.5001, "step": 4669 }, { "epoch": 0.5364424788926541, "grad_norm": 0.39114460349082947, "learning_rate": 0.0001, "loss": 1.4858, "step": 4670 }, { "epoch": 0.5365573488024812, "grad_norm": 0.4037148058414459, "learning_rate": 0.0001, "loss": 1.5966, "step": 4671 }, { "epoch": 0.5366722187123083, "grad_norm": 0.3919554054737091, "learning_rate": 0.0001, "loss": 1.5426, "step": 4672 }, { "epoch": 0.5367870886221354, "grad_norm": 0.45879948139190674, "learning_rate": 0.0001, "loss": 1.8282, "step": 4673 }, { "epoch": 0.5369019585319625, "grad_norm": 0.4263037443161011, "learning_rate": 0.0001, "loss": 1.7901, "step": 4674 }, { "epoch": 0.5370168284417897, "grad_norm": 0.4132154583930969, "learning_rate": 0.0001, "loss": 1.5991, "step": 4675 }, { "epoch": 0.5371316983516168, "grad_norm": 0.4135374426841736, "learning_rate": 0.0001, "loss": 1.6075, "step": 4676 }, { "epoch": 0.5372465682614439, "grad_norm": 0.43650367856025696, "learning_rate": 0.0001, "loss": 1.7241, "step": 4677 }, { "epoch": 0.537361438171271, "grad_norm": 0.5079533457756042, "learning_rate": 0.0001, "loss": 1.7532, "step": 4678 }, { "epoch": 0.5374763080810981, "grad_norm": 0.46438872814178467, "learning_rate": 0.0001, "loss": 1.7692, "step": 4679 }, { "epoch": 0.5375911779909253, "grad_norm": 0.43697604537010193, "learning_rate": 0.0001, "loss": 1.7386, "step": 4680 }, { "epoch": 0.5377060479007524, "grad_norm": 0.41365137696266174, "learning_rate": 0.0001, "loss": 1.5255, "step": 4681 }, { "epoch": 0.5378209178105795, "grad_norm": 0.42320525646209717, "learning_rate": 0.0001, "loss": 1.5984, "step": 4682 }, { "epoch": 0.5379357877204066, "grad_norm": 0.40122270584106445, "learning_rate": 0.0001, "loss": 1.4893, "step": 4683 }, { "epoch": 0.5380506576302337, "grad_norm": 0.434253990650177, "learning_rate": 0.0001, "loss": 1.5002, "step": 4684 }, { "epoch": 0.5381655275400609, "grad_norm": 0.4121977686882019, "learning_rate": 0.0001, "loss": 1.639, "step": 4685 }, { "epoch": 0.538280397449888, "grad_norm": 0.4040560722351074, "learning_rate": 0.0001, "loss": 1.5075, "step": 4686 }, { "epoch": 0.5383952673597151, "grad_norm": 0.4145204722881317, "learning_rate": 0.0001, "loss": 1.591, "step": 4687 }, { "epoch": 0.5385101372695422, "grad_norm": 0.39439859986305237, "learning_rate": 0.0001, "loss": 1.4481, "step": 4688 }, { "epoch": 0.5386250071793693, "grad_norm": 0.4014344811439514, "learning_rate": 0.0001, "loss": 1.5778, "step": 4689 }, { "epoch": 0.5387398770891965, "grad_norm": 0.42101871967315674, "learning_rate": 0.0001, "loss": 1.6869, "step": 4690 }, { "epoch": 0.5388547469990236, "grad_norm": 0.45102766156196594, "learning_rate": 0.0001, "loss": 1.627, "step": 4691 }, { "epoch": 0.5389696169088507, "grad_norm": 0.43720656633377075, "learning_rate": 0.0001, "loss": 1.6872, "step": 4692 }, { "epoch": 0.5390844868186778, "grad_norm": 0.4264827072620392, "learning_rate": 0.0001, "loss": 1.4308, "step": 4693 }, { "epoch": 0.5391993567285049, "grad_norm": 0.4725572466850281, "learning_rate": 0.0001, "loss": 1.7914, "step": 4694 }, { "epoch": 0.5393142266383321, "grad_norm": 0.4113958477973938, "learning_rate": 0.0001, "loss": 1.625, "step": 4695 }, { "epoch": 0.5394290965481592, "grad_norm": 0.4475966691970825, "learning_rate": 0.0001, "loss": 1.7054, "step": 4696 }, { "epoch": 0.5395439664579863, "grad_norm": 0.43861621618270874, "learning_rate": 0.0001, "loss": 1.6496, "step": 4697 }, { "epoch": 0.5396588363678134, "grad_norm": 0.4491176903247833, "learning_rate": 0.0001, "loss": 1.5547, "step": 4698 }, { "epoch": 0.5397737062776405, "grad_norm": 0.4262305498123169, "learning_rate": 0.0001, "loss": 1.7657, "step": 4699 }, { "epoch": 0.5398885761874677, "grad_norm": 0.4179171025753021, "learning_rate": 0.0001, "loss": 1.5766, "step": 4700 }, { "epoch": 0.5400034460972948, "grad_norm": 0.39834854006767273, "learning_rate": 0.0001, "loss": 1.5324, "step": 4701 }, { "epoch": 0.5401183160071219, "grad_norm": 0.42186471819877625, "learning_rate": 0.0001, "loss": 1.8005, "step": 4702 }, { "epoch": 0.540233185916949, "grad_norm": 0.42857295274734497, "learning_rate": 0.0001, "loss": 1.7399, "step": 4703 }, { "epoch": 0.5403480558267761, "grad_norm": 0.4083864390850067, "learning_rate": 0.0001, "loss": 1.707, "step": 4704 }, { "epoch": 0.5404629257366033, "grad_norm": 0.4472416341304779, "learning_rate": 0.0001, "loss": 1.5065, "step": 4705 }, { "epoch": 0.5405777956464304, "grad_norm": 0.41945722699165344, "learning_rate": 0.0001, "loss": 1.5004, "step": 4706 }, { "epoch": 0.5406926655562575, "grad_norm": 0.41861411929130554, "learning_rate": 0.0001, "loss": 1.6596, "step": 4707 }, { "epoch": 0.5408075354660846, "grad_norm": 0.45254752039909363, "learning_rate": 0.0001, "loss": 1.6306, "step": 4708 }, { "epoch": 0.5409224053759117, "grad_norm": 0.42132070660591125, "learning_rate": 0.0001, "loss": 1.672, "step": 4709 }, { "epoch": 0.5410372752857389, "grad_norm": 0.4404768943786621, "learning_rate": 0.0001, "loss": 1.6902, "step": 4710 }, { "epoch": 0.541152145195566, "grad_norm": 0.42923909425735474, "learning_rate": 0.0001, "loss": 1.7181, "step": 4711 }, { "epoch": 0.5412670151053931, "grad_norm": 0.4722338020801544, "learning_rate": 0.0001, "loss": 1.8949, "step": 4712 }, { "epoch": 0.5413818850152202, "grad_norm": 0.4394582211971283, "learning_rate": 0.0001, "loss": 1.7403, "step": 4713 }, { "epoch": 0.5414967549250473, "grad_norm": 0.3982559144496918, "learning_rate": 0.0001, "loss": 1.594, "step": 4714 }, { "epoch": 0.5416116248348745, "grad_norm": 0.42785125970840454, "learning_rate": 0.0001, "loss": 1.7779, "step": 4715 }, { "epoch": 0.5417264947447016, "grad_norm": 0.38715437054634094, "learning_rate": 0.0001, "loss": 1.6039, "step": 4716 }, { "epoch": 0.5418413646545287, "grad_norm": 0.4336237609386444, "learning_rate": 0.0001, "loss": 1.6872, "step": 4717 }, { "epoch": 0.5419562345643558, "grad_norm": 0.4250262975692749, "learning_rate": 0.0001, "loss": 1.6197, "step": 4718 }, { "epoch": 0.5420711044741829, "grad_norm": 4.473871231079102, "learning_rate": 0.0001, "loss": 1.6598, "step": 4719 }, { "epoch": 0.5421859743840101, "grad_norm": 0.4131799042224884, "learning_rate": 0.0001, "loss": 1.5988, "step": 4720 }, { "epoch": 0.5423008442938372, "grad_norm": 0.43611302971839905, "learning_rate": 0.0001, "loss": 1.6548, "step": 4721 }, { "epoch": 0.5424157142036643, "grad_norm": 0.4363075792789459, "learning_rate": 0.0001, "loss": 1.5381, "step": 4722 }, { "epoch": 0.5425305841134914, "grad_norm": 0.37694451212882996, "learning_rate": 0.0001, "loss": 1.4295, "step": 4723 }, { "epoch": 0.5426454540233185, "grad_norm": 0.4214974641799927, "learning_rate": 0.0001, "loss": 1.5914, "step": 4724 }, { "epoch": 0.5427603239331457, "grad_norm": 0.46396970748901367, "learning_rate": 0.0001, "loss": 1.6362, "step": 4725 }, { "epoch": 0.5428751938429728, "grad_norm": 0.45121777057647705, "learning_rate": 0.0001, "loss": 1.7544, "step": 4726 }, { "epoch": 0.5429900637527999, "grad_norm": 0.40204212069511414, "learning_rate": 0.0001, "loss": 1.6319, "step": 4727 }, { "epoch": 0.543104933662627, "grad_norm": 0.4438580572605133, "learning_rate": 0.0001, "loss": 1.7001, "step": 4728 }, { "epoch": 0.5432198035724541, "grad_norm": 0.41876137256622314, "learning_rate": 0.0001, "loss": 1.612, "step": 4729 }, { "epoch": 0.5433346734822814, "grad_norm": 0.4323340952396393, "learning_rate": 0.0001, "loss": 1.6284, "step": 4730 }, { "epoch": 0.5434495433921085, "grad_norm": 0.4222297668457031, "learning_rate": 0.0001, "loss": 1.5687, "step": 4731 }, { "epoch": 0.5435644133019356, "grad_norm": 0.4332796633243561, "learning_rate": 0.0001, "loss": 1.6464, "step": 4732 }, { "epoch": 0.5436792832117627, "grad_norm": 0.4457077383995056, "learning_rate": 0.0001, "loss": 1.6226, "step": 4733 }, { "epoch": 0.5437941531215899, "grad_norm": 0.40646892786026, "learning_rate": 0.0001, "loss": 1.3511, "step": 4734 }, { "epoch": 0.543909023031417, "grad_norm": 0.42264634370803833, "learning_rate": 0.0001, "loss": 1.5848, "step": 4735 }, { "epoch": 0.5440238929412441, "grad_norm": 7.803822994232178, "learning_rate": 0.0001, "loss": 1.548, "step": 4736 }, { "epoch": 0.5441387628510712, "grad_norm": 0.4260199964046478, "learning_rate": 0.0001, "loss": 1.7074, "step": 4737 }, { "epoch": 0.5442536327608983, "grad_norm": 0.4420236051082611, "learning_rate": 0.0001, "loss": 1.8486, "step": 4738 }, { "epoch": 0.5443685026707255, "grad_norm": 0.39475566148757935, "learning_rate": 0.0001, "loss": 1.448, "step": 4739 }, { "epoch": 0.5444833725805526, "grad_norm": 0.48185980319976807, "learning_rate": 0.0001, "loss": 1.9924, "step": 4740 }, { "epoch": 0.5445982424903797, "grad_norm": 0.4522726833820343, "learning_rate": 0.0001, "loss": 1.7875, "step": 4741 }, { "epoch": 0.5447131124002068, "grad_norm": 0.45154809951782227, "learning_rate": 0.0001, "loss": 1.5931, "step": 4742 }, { "epoch": 0.5448279823100339, "grad_norm": 0.39332887530326843, "learning_rate": 0.0001, "loss": 1.6388, "step": 4743 }, { "epoch": 0.544942852219861, "grad_norm": 0.4441022574901581, "learning_rate": 0.0001, "loss": 1.5344, "step": 4744 }, { "epoch": 0.5450577221296882, "grad_norm": 0.42498818039894104, "learning_rate": 0.0001, "loss": 1.7215, "step": 4745 }, { "epoch": 0.5451725920395153, "grad_norm": 0.39692699909210205, "learning_rate": 0.0001, "loss": 1.5205, "step": 4746 }, { "epoch": 0.5452874619493424, "grad_norm": 0.4348728060722351, "learning_rate": 0.0001, "loss": 1.6317, "step": 4747 }, { "epoch": 0.5454023318591695, "grad_norm": 0.4652867615222931, "learning_rate": 0.0001, "loss": 1.6545, "step": 4748 }, { "epoch": 0.5455172017689967, "grad_norm": 3.736886739730835, "learning_rate": 0.0001, "loss": 1.74, "step": 4749 }, { "epoch": 0.5456320716788238, "grad_norm": 0.4334099292755127, "learning_rate": 0.0001, "loss": 1.7789, "step": 4750 }, { "epoch": 0.5457469415886509, "grad_norm": 0.4378454387187958, "learning_rate": 0.0001, "loss": 1.6962, "step": 4751 }, { "epoch": 0.545861811498478, "grad_norm": 0.46604326367378235, "learning_rate": 0.0001, "loss": 1.6623, "step": 4752 }, { "epoch": 0.5459766814083051, "grad_norm": 13.21650505065918, "learning_rate": 0.0001, "loss": 2.8761, "step": 4753 }, { "epoch": 0.5460915513181323, "grad_norm": 0.6902241706848145, "learning_rate": 0.0001, "loss": 1.6363, "step": 4754 }, { "epoch": 0.5462064212279594, "grad_norm": 14.453927040100098, "learning_rate": 0.0001, "loss": 1.5613, "step": 4755 }, { "epoch": 0.5463212911377865, "grad_norm": 1.930003046989441, "learning_rate": 0.0001, "loss": 1.6478, "step": 4756 }, { "epoch": 0.5464361610476136, "grad_norm": 0.43484631180763245, "learning_rate": 0.0001, "loss": 1.6057, "step": 4757 }, { "epoch": 0.5465510309574407, "grad_norm": 0.4576379656791687, "learning_rate": 0.0001, "loss": 1.6909, "step": 4758 }, { "epoch": 0.5466659008672679, "grad_norm": 0.4986869990825653, "learning_rate": 0.0001, "loss": 1.5963, "step": 4759 }, { "epoch": 0.546780770777095, "grad_norm": 0.4431321620941162, "learning_rate": 0.0001, "loss": 1.5434, "step": 4760 }, { "epoch": 0.5468956406869221, "grad_norm": 0.9614063501358032, "learning_rate": 0.0001, "loss": 1.7638, "step": 4761 }, { "epoch": 0.5470105105967492, "grad_norm": 0.505508303642273, "learning_rate": 0.0001, "loss": 1.7313, "step": 4762 }, { "epoch": 0.5471253805065763, "grad_norm": 0.44963207840919495, "learning_rate": 0.0001, "loss": 1.465, "step": 4763 }, { "epoch": 0.5472402504164035, "grad_norm": 0.4592747092247009, "learning_rate": 0.0001, "loss": 1.7584, "step": 4764 }, { "epoch": 0.5473551203262306, "grad_norm": 0.5370006561279297, "learning_rate": 0.0001, "loss": 1.744, "step": 4765 }, { "epoch": 0.5474699902360577, "grad_norm": 0.4782159924507141, "learning_rate": 0.0001, "loss": 1.5303, "step": 4766 }, { "epoch": 0.5475848601458848, "grad_norm": 0.48043233156204224, "learning_rate": 0.0001, "loss": 1.886, "step": 4767 }, { "epoch": 0.5476997300557119, "grad_norm": 0.47774550318717957, "learning_rate": 0.0001, "loss": 1.6583, "step": 4768 }, { "epoch": 0.547814599965539, "grad_norm": 0.4518395662307739, "learning_rate": 0.0001, "loss": 1.4686, "step": 4769 }, { "epoch": 0.5479294698753662, "grad_norm": 0.4505947530269623, "learning_rate": 0.0001, "loss": 1.6369, "step": 4770 }, { "epoch": 0.5480443397851933, "grad_norm": 0.480541467666626, "learning_rate": 0.0001, "loss": 1.8852, "step": 4771 }, { "epoch": 0.5481592096950204, "grad_norm": 0.48814818263053894, "learning_rate": 0.0001, "loss": 1.6918, "step": 4772 }, { "epoch": 0.5482740796048475, "grad_norm": 0.5180444121360779, "learning_rate": 0.0001, "loss": 1.684, "step": 4773 }, { "epoch": 0.5483889495146747, "grad_norm": 0.4888029396533966, "learning_rate": 0.0001, "loss": 1.6361, "step": 4774 }, { "epoch": 0.5485038194245018, "grad_norm": 0.5578180551528931, "learning_rate": 0.0001, "loss": 1.5176, "step": 4775 }, { "epoch": 0.5486186893343289, "grad_norm": 0.47964030504226685, "learning_rate": 0.0001, "loss": 1.7186, "step": 4776 }, { "epoch": 0.548733559244156, "grad_norm": 0.45499885082244873, "learning_rate": 0.0001, "loss": 1.6304, "step": 4777 }, { "epoch": 0.5488484291539831, "grad_norm": 0.4086899757385254, "learning_rate": 0.0001, "loss": 1.5499, "step": 4778 }, { "epoch": 0.5489632990638103, "grad_norm": 0.4332214593887329, "learning_rate": 0.0001, "loss": 1.6879, "step": 4779 }, { "epoch": 0.5490781689736374, "grad_norm": 0.4639904499053955, "learning_rate": 0.0001, "loss": 1.741, "step": 4780 }, { "epoch": 0.5491930388834645, "grad_norm": 0.4308798313140869, "learning_rate": 0.0001, "loss": 1.6483, "step": 4781 }, { "epoch": 0.5493079087932916, "grad_norm": 0.4329991042613983, "learning_rate": 0.0001, "loss": 1.6593, "step": 4782 }, { "epoch": 0.5494227787031187, "grad_norm": 0.48464497923851013, "learning_rate": 0.0001, "loss": 1.6769, "step": 4783 }, { "epoch": 0.5495376486129459, "grad_norm": 0.44216474890708923, "learning_rate": 0.0001, "loss": 1.744, "step": 4784 }, { "epoch": 0.549652518522773, "grad_norm": 0.44045913219451904, "learning_rate": 0.0001, "loss": 1.6436, "step": 4785 }, { "epoch": 0.5497673884326001, "grad_norm": 0.4664250910282135, "learning_rate": 0.0001, "loss": 1.634, "step": 4786 }, { "epoch": 0.5498822583424272, "grad_norm": 0.41855260729789734, "learning_rate": 0.0001, "loss": 1.7692, "step": 4787 }, { "epoch": 0.5499971282522543, "grad_norm": 0.41777145862579346, "learning_rate": 0.0001, "loss": 1.5246, "step": 4788 }, { "epoch": 0.5501119981620815, "grad_norm": 0.4096580147743225, "learning_rate": 0.0001, "loss": 1.7369, "step": 4789 }, { "epoch": 0.5502268680719086, "grad_norm": 0.4547279477119446, "learning_rate": 0.0001, "loss": 1.7846, "step": 4790 }, { "epoch": 0.5503417379817357, "grad_norm": 0.4305475354194641, "learning_rate": 0.0001, "loss": 1.5839, "step": 4791 }, { "epoch": 0.5504566078915628, "grad_norm": 0.461092084646225, "learning_rate": 0.0001, "loss": 1.5893, "step": 4792 }, { "epoch": 0.5505714778013899, "grad_norm": 0.43664494156837463, "learning_rate": 0.0001, "loss": 1.5907, "step": 4793 }, { "epoch": 0.550686347711217, "grad_norm": 0.4284107983112335, "learning_rate": 0.0001, "loss": 1.6731, "step": 4794 }, { "epoch": 0.5508012176210442, "grad_norm": 0.47756850719451904, "learning_rate": 0.0001, "loss": 1.7553, "step": 4795 }, { "epoch": 0.5509160875308713, "grad_norm": 0.39941316843032837, "learning_rate": 0.0001, "loss": 1.5273, "step": 4796 }, { "epoch": 0.5510309574406984, "grad_norm": 0.4312814474105835, "learning_rate": 0.0001, "loss": 1.4235, "step": 4797 }, { "epoch": 0.5511458273505255, "grad_norm": 0.4587937891483307, "learning_rate": 0.0001, "loss": 1.7078, "step": 4798 }, { "epoch": 0.5512606972603527, "grad_norm": 0.43980634212493896, "learning_rate": 0.0001, "loss": 1.7708, "step": 4799 }, { "epoch": 0.5513755671701798, "grad_norm": 0.4155730903148651, "learning_rate": 0.0001, "loss": 1.5494, "step": 4800 }, { "epoch": 0.5514904370800069, "grad_norm": 0.4062232971191406, "learning_rate": 0.0001, "loss": 1.6272, "step": 4801 }, { "epoch": 0.551605306989834, "grad_norm": 0.43724343180656433, "learning_rate": 0.0001, "loss": 1.6522, "step": 4802 }, { "epoch": 0.5517201768996611, "grad_norm": 0.4811611473560333, "learning_rate": 0.0001, "loss": 1.4776, "step": 4803 }, { "epoch": 0.5518350468094882, "grad_norm": 0.3989713191986084, "learning_rate": 0.0001, "loss": 1.2717, "step": 4804 }, { "epoch": 0.5519499167193154, "grad_norm": 0.43779945373535156, "learning_rate": 0.0001, "loss": 1.7521, "step": 4805 }, { "epoch": 0.5520647866291425, "grad_norm": 0.46011802554130554, "learning_rate": 0.0001, "loss": 1.7228, "step": 4806 }, { "epoch": 0.5521796565389696, "grad_norm": 0.3978514075279236, "learning_rate": 0.0001, "loss": 1.5681, "step": 4807 }, { "epoch": 0.5522945264487967, "grad_norm": 0.4245090186595917, "learning_rate": 0.0001, "loss": 1.6901, "step": 4808 }, { "epoch": 0.5524093963586238, "grad_norm": 0.46046197414398193, "learning_rate": 0.0001, "loss": 1.6541, "step": 4809 }, { "epoch": 0.552524266268451, "grad_norm": 0.40693244338035583, "learning_rate": 0.0001, "loss": 1.601, "step": 4810 }, { "epoch": 0.5526391361782781, "grad_norm": 0.4510886073112488, "learning_rate": 0.0001, "loss": 1.6634, "step": 4811 }, { "epoch": 0.5527540060881052, "grad_norm": 0.43159717321395874, "learning_rate": 0.0001, "loss": 1.7195, "step": 4812 }, { "epoch": 0.5528688759979323, "grad_norm": 0.6375383734703064, "learning_rate": 0.0001, "loss": 1.4753, "step": 4813 }, { "epoch": 0.5529837459077594, "grad_norm": 0.4993058741092682, "learning_rate": 0.0001, "loss": 1.8643, "step": 4814 }, { "epoch": 0.5530986158175866, "grad_norm": 0.44548937678337097, "learning_rate": 0.0001, "loss": 1.6889, "step": 4815 }, { "epoch": 0.5532134857274137, "grad_norm": 0.416087806224823, "learning_rate": 0.0001, "loss": 1.62, "step": 4816 }, { "epoch": 0.5533283556372408, "grad_norm": 0.4004303216934204, "learning_rate": 0.0001, "loss": 1.6196, "step": 4817 }, { "epoch": 0.5534432255470679, "grad_norm": 0.46140408515930176, "learning_rate": 0.0001, "loss": 1.8474, "step": 4818 }, { "epoch": 0.553558095456895, "grad_norm": 0.41780680418014526, "learning_rate": 0.0001, "loss": 1.5836, "step": 4819 }, { "epoch": 0.5536729653667222, "grad_norm": 0.4246324598789215, "learning_rate": 0.0001, "loss": 1.5161, "step": 4820 }, { "epoch": 0.5537878352765493, "grad_norm": 0.4020591080188751, "learning_rate": 0.0001, "loss": 1.5327, "step": 4821 }, { "epoch": 0.5539027051863764, "grad_norm": 0.4175722301006317, "learning_rate": 0.0001, "loss": 1.6808, "step": 4822 }, { "epoch": 0.5540175750962035, "grad_norm": 0.41285720467567444, "learning_rate": 0.0001, "loss": 1.7206, "step": 4823 }, { "epoch": 0.5541324450060306, "grad_norm": 0.3958023190498352, "learning_rate": 0.0001, "loss": 1.4307, "step": 4824 }, { "epoch": 0.5542473149158578, "grad_norm": 0.43072789907455444, "learning_rate": 0.0001, "loss": 1.6919, "step": 4825 }, { "epoch": 0.5543621848256849, "grad_norm": 0.4155644178390503, "learning_rate": 0.0001, "loss": 1.6788, "step": 4826 }, { "epoch": 0.554477054735512, "grad_norm": 0.4563823640346527, "learning_rate": 0.0001, "loss": 1.503, "step": 4827 }, { "epoch": 0.5545919246453391, "grad_norm": 0.40075716376304626, "learning_rate": 0.0001, "loss": 1.4557, "step": 4828 }, { "epoch": 0.5547067945551662, "grad_norm": 0.3869268000125885, "learning_rate": 0.0001, "loss": 1.375, "step": 4829 }, { "epoch": 0.5548216644649934, "grad_norm": 0.4496128559112549, "learning_rate": 0.0001, "loss": 1.6762, "step": 4830 }, { "epoch": 0.5549365343748205, "grad_norm": 0.42789193987846375, "learning_rate": 0.0001, "loss": 1.4755, "step": 4831 }, { "epoch": 0.5550514042846476, "grad_norm": 0.43896588683128357, "learning_rate": 0.0001, "loss": 1.861, "step": 4832 }, { "epoch": 0.5551662741944747, "grad_norm": 0.42781296372413635, "learning_rate": 0.0001, "loss": 1.4889, "step": 4833 }, { "epoch": 0.5552811441043018, "grad_norm": 0.40033093094825745, "learning_rate": 0.0001, "loss": 1.6163, "step": 4834 }, { "epoch": 0.555396014014129, "grad_norm": 0.43149489164352417, "learning_rate": 0.0001, "loss": 1.5783, "step": 4835 }, { "epoch": 0.5555108839239561, "grad_norm": 0.4161946177482605, "learning_rate": 0.0001, "loss": 1.4264, "step": 4836 }, { "epoch": 0.5556257538337832, "grad_norm": 0.45024940371513367, "learning_rate": 0.0001, "loss": 1.6887, "step": 4837 }, { "epoch": 0.5557406237436103, "grad_norm": 0.48352208733558655, "learning_rate": 0.0001, "loss": 1.7129, "step": 4838 }, { "epoch": 0.5558554936534374, "grad_norm": 0.40999725461006165, "learning_rate": 0.0001, "loss": 1.6054, "step": 4839 }, { "epoch": 0.5559703635632646, "grad_norm": 0.43245741724967957, "learning_rate": 0.0001, "loss": 1.635, "step": 4840 }, { "epoch": 0.5560852334730917, "grad_norm": 0.4161476194858551, "learning_rate": 0.0001, "loss": 1.5808, "step": 4841 }, { "epoch": 0.5562001033829188, "grad_norm": 0.39406320452690125, "learning_rate": 0.0001, "loss": 1.4987, "step": 4842 }, { "epoch": 0.5563149732927459, "grad_norm": 0.43421339988708496, "learning_rate": 0.0001, "loss": 1.6044, "step": 4843 }, { "epoch": 0.556429843202573, "grad_norm": 0.4208906888961792, "learning_rate": 0.0001, "loss": 1.4881, "step": 4844 }, { "epoch": 0.5565447131124002, "grad_norm": 0.43722692131996155, "learning_rate": 0.0001, "loss": 1.7109, "step": 4845 }, { "epoch": 0.5566595830222273, "grad_norm": 0.4307270348072052, "learning_rate": 0.0001, "loss": 1.7208, "step": 4846 }, { "epoch": 0.5567744529320544, "grad_norm": 0.44914358854293823, "learning_rate": 0.0001, "loss": 1.5319, "step": 4847 }, { "epoch": 0.5568893228418815, "grad_norm": 0.4444207549095154, "learning_rate": 0.0001, "loss": 1.6153, "step": 4848 }, { "epoch": 0.5570041927517086, "grad_norm": 0.46169236302375793, "learning_rate": 0.0001, "loss": 1.7459, "step": 4849 }, { "epoch": 0.5571190626615358, "grad_norm": 0.45408234000205994, "learning_rate": 0.0001, "loss": 1.6744, "step": 4850 }, { "epoch": 0.5572339325713629, "grad_norm": 0.4497997760772705, "learning_rate": 0.0001, "loss": 1.7392, "step": 4851 }, { "epoch": 0.55734880248119, "grad_norm": 0.4248064160346985, "learning_rate": 0.0001, "loss": 1.4848, "step": 4852 }, { "epoch": 0.5574636723910171, "grad_norm": 0.436135470867157, "learning_rate": 0.0001, "loss": 1.5865, "step": 4853 }, { "epoch": 0.5575785423008442, "grad_norm": 0.4491892158985138, "learning_rate": 0.0001, "loss": 1.7014, "step": 4854 }, { "epoch": 0.5576934122106714, "grad_norm": 0.395537793636322, "learning_rate": 0.0001, "loss": 1.6343, "step": 4855 }, { "epoch": 0.5578082821204985, "grad_norm": 0.46674367785453796, "learning_rate": 0.0001, "loss": 1.6606, "step": 4856 }, { "epoch": 0.5579231520303256, "grad_norm": 0.5027632713317871, "learning_rate": 0.0001, "loss": 1.4204, "step": 4857 }, { "epoch": 0.5580380219401527, "grad_norm": 0.39720940589904785, "learning_rate": 0.0001, "loss": 1.4723, "step": 4858 }, { "epoch": 0.5581528918499798, "grad_norm": 0.4141447842121124, "learning_rate": 0.0001, "loss": 1.5265, "step": 4859 }, { "epoch": 0.558267761759807, "grad_norm": 0.4240556061267853, "learning_rate": 0.0001, "loss": 1.6346, "step": 4860 }, { "epoch": 0.5583826316696341, "grad_norm": 0.47687891125679016, "learning_rate": 0.0001, "loss": 1.9204, "step": 4861 }, { "epoch": 0.5584975015794612, "grad_norm": 0.4583745002746582, "learning_rate": 0.0001, "loss": 1.5725, "step": 4862 }, { "epoch": 0.5586123714892883, "grad_norm": 0.43266603350639343, "learning_rate": 0.0001, "loss": 1.6633, "step": 4863 }, { "epoch": 0.5587272413991154, "grad_norm": 0.415556401014328, "learning_rate": 0.0001, "loss": 1.5649, "step": 4864 }, { "epoch": 0.5588421113089426, "grad_norm": 0.4445532262325287, "learning_rate": 0.0001, "loss": 1.7728, "step": 4865 }, { "epoch": 0.5589569812187697, "grad_norm": 0.43825092911720276, "learning_rate": 0.0001, "loss": 1.7384, "step": 4866 }, { "epoch": 0.5590718511285969, "grad_norm": 0.4609036147594452, "learning_rate": 0.0001, "loss": 1.4781, "step": 4867 }, { "epoch": 0.559186721038424, "grad_norm": 0.44946613907814026, "learning_rate": 0.0001, "loss": 1.6208, "step": 4868 }, { "epoch": 0.5593015909482512, "grad_norm": 0.44441309571266174, "learning_rate": 0.0001, "loss": 1.7457, "step": 4869 }, { "epoch": 0.5594164608580783, "grad_norm": 0.42556867003440857, "learning_rate": 0.0001, "loss": 1.7197, "step": 4870 }, { "epoch": 0.5595313307679054, "grad_norm": 0.47983217239379883, "learning_rate": 0.0001, "loss": 1.8489, "step": 4871 }, { "epoch": 0.5596462006777325, "grad_norm": 0.46023857593536377, "learning_rate": 0.0001, "loss": 1.7739, "step": 4872 }, { "epoch": 0.5597610705875596, "grad_norm": 0.4268305003643036, "learning_rate": 0.0001, "loss": 1.6737, "step": 4873 }, { "epoch": 0.5598759404973868, "grad_norm": 0.43025729060173035, "learning_rate": 0.0001, "loss": 1.3838, "step": 4874 }, { "epoch": 0.5599908104072139, "grad_norm": 0.4198823869228363, "learning_rate": 0.0001, "loss": 1.5685, "step": 4875 }, { "epoch": 0.560105680317041, "grad_norm": 0.39771756529808044, "learning_rate": 0.0001, "loss": 1.3502, "step": 4876 }, { "epoch": 0.5602205502268681, "grad_norm": 0.42094194889068604, "learning_rate": 0.0001, "loss": 1.7735, "step": 4877 }, { "epoch": 0.5603354201366952, "grad_norm": 0.4145548343658447, "learning_rate": 0.0001, "loss": 1.535, "step": 4878 }, { "epoch": 0.5604502900465224, "grad_norm": 0.4257977306842804, "learning_rate": 0.0001, "loss": 1.7172, "step": 4879 }, { "epoch": 0.5605651599563495, "grad_norm": 0.43406781554222107, "learning_rate": 0.0001, "loss": 1.6353, "step": 4880 }, { "epoch": 0.5606800298661766, "grad_norm": 0.4150993525981903, "learning_rate": 0.0001, "loss": 1.5706, "step": 4881 }, { "epoch": 0.5607948997760037, "grad_norm": 0.4342502951622009, "learning_rate": 0.0001, "loss": 1.7698, "step": 4882 }, { "epoch": 0.5609097696858308, "grad_norm": 0.46102142333984375, "learning_rate": 0.0001, "loss": 1.6396, "step": 4883 }, { "epoch": 0.561024639595658, "grad_norm": 0.42644503712654114, "learning_rate": 0.0001, "loss": 1.7872, "step": 4884 }, { "epoch": 0.5611395095054851, "grad_norm": 0.4223010241985321, "learning_rate": 0.0001, "loss": 1.6453, "step": 4885 }, { "epoch": 0.5612543794153122, "grad_norm": 0.4439496695995331, "learning_rate": 0.0001, "loss": 1.6689, "step": 4886 }, { "epoch": 0.5613692493251393, "grad_norm": 0.4849221408367157, "learning_rate": 0.0001, "loss": 1.7259, "step": 4887 }, { "epoch": 0.5614841192349664, "grad_norm": 0.4383675158023834, "learning_rate": 0.0001, "loss": 1.7152, "step": 4888 }, { "epoch": 0.5615989891447936, "grad_norm": 0.4355122745037079, "learning_rate": 0.0001, "loss": 1.548, "step": 4889 }, { "epoch": 0.5617138590546207, "grad_norm": 0.4068525731563568, "learning_rate": 0.0001, "loss": 1.3924, "step": 4890 }, { "epoch": 0.5618287289644478, "grad_norm": 0.4126929044723511, "learning_rate": 0.0001, "loss": 1.4923, "step": 4891 }, { "epoch": 0.5619435988742749, "grad_norm": 0.43039217591285706, "learning_rate": 0.0001, "loss": 1.5134, "step": 4892 }, { "epoch": 0.562058468784102, "grad_norm": 0.4148006737232208, "learning_rate": 0.0001, "loss": 1.4452, "step": 4893 }, { "epoch": 0.5621733386939292, "grad_norm": 0.467264860868454, "learning_rate": 0.0001, "loss": 1.7233, "step": 4894 }, { "epoch": 0.5622882086037563, "grad_norm": 0.43316879868507385, "learning_rate": 0.0001, "loss": 1.6064, "step": 4895 }, { "epoch": 0.5624030785135834, "grad_norm": 0.4467410147190094, "learning_rate": 0.0001, "loss": 1.7144, "step": 4896 }, { "epoch": 0.5625179484234105, "grad_norm": 0.45276862382888794, "learning_rate": 0.0001, "loss": 1.7343, "step": 4897 }, { "epoch": 0.5626328183332376, "grad_norm": 0.4133438169956207, "learning_rate": 0.0001, "loss": 1.6185, "step": 4898 }, { "epoch": 0.5627476882430648, "grad_norm": 0.4188348352909088, "learning_rate": 0.0001, "loss": 1.5476, "step": 4899 }, { "epoch": 0.5628625581528919, "grad_norm": 0.44320085644721985, "learning_rate": 0.0001, "loss": 1.6812, "step": 4900 }, { "epoch": 0.562977428062719, "grad_norm": 0.43440455198287964, "learning_rate": 0.0001, "loss": 1.6843, "step": 4901 }, { "epoch": 0.5630922979725461, "grad_norm": 0.45006540417671204, "learning_rate": 0.0001, "loss": 1.5807, "step": 4902 }, { "epoch": 0.5632071678823732, "grad_norm": 0.4506426453590393, "learning_rate": 0.0001, "loss": 1.6472, "step": 4903 }, { "epoch": 0.5633220377922004, "grad_norm": 0.43011045455932617, "learning_rate": 0.0001, "loss": 1.6468, "step": 4904 }, { "epoch": 0.5634369077020275, "grad_norm": 0.4250010848045349, "learning_rate": 0.0001, "loss": 1.5558, "step": 4905 }, { "epoch": 0.5635517776118546, "grad_norm": 0.4180524945259094, "learning_rate": 0.0001, "loss": 1.5528, "step": 4906 }, { "epoch": 0.5636666475216817, "grad_norm": 0.4589906334877014, "learning_rate": 0.0001, "loss": 1.69, "step": 4907 }, { "epoch": 0.5637815174315088, "grad_norm": 0.40024542808532715, "learning_rate": 0.0001, "loss": 1.6212, "step": 4908 }, { "epoch": 0.563896387341336, "grad_norm": 0.4209577739238739, "learning_rate": 0.0001, "loss": 1.7709, "step": 4909 }, { "epoch": 0.5640112572511631, "grad_norm": 0.42181211709976196, "learning_rate": 0.0001, "loss": 1.6891, "step": 4910 }, { "epoch": 0.5641261271609902, "grad_norm": 0.44479602575302124, "learning_rate": 0.0001, "loss": 1.8052, "step": 4911 }, { "epoch": 0.5642409970708173, "grad_norm": 0.41980117559432983, "learning_rate": 0.0001, "loss": 1.7223, "step": 4912 }, { "epoch": 0.5643558669806444, "grad_norm": 0.45046502351760864, "learning_rate": 0.0001, "loss": 1.6636, "step": 4913 }, { "epoch": 0.5644707368904716, "grad_norm": 0.4410959482192993, "learning_rate": 0.0001, "loss": 1.751, "step": 4914 }, { "epoch": 0.5645856068002987, "grad_norm": 0.4019821286201477, "learning_rate": 0.0001, "loss": 1.6362, "step": 4915 }, { "epoch": 0.5647004767101258, "grad_norm": 0.4647962152957916, "learning_rate": 0.0001, "loss": 1.7969, "step": 4916 }, { "epoch": 0.5648153466199529, "grad_norm": 0.4058244824409485, "learning_rate": 0.0001, "loss": 1.723, "step": 4917 }, { "epoch": 0.56493021652978, "grad_norm": 0.41798049211502075, "learning_rate": 0.0001, "loss": 1.5339, "step": 4918 }, { "epoch": 0.5650450864396072, "grad_norm": 0.39506152272224426, "learning_rate": 0.0001, "loss": 1.2809, "step": 4919 }, { "epoch": 0.5651599563494343, "grad_norm": 0.41670095920562744, "learning_rate": 0.0001, "loss": 1.6378, "step": 4920 }, { "epoch": 0.5652748262592614, "grad_norm": 0.41250118613243103, "learning_rate": 0.0001, "loss": 1.5185, "step": 4921 }, { "epoch": 0.5653896961690885, "grad_norm": 0.4120524227619171, "learning_rate": 0.0001, "loss": 1.6923, "step": 4922 }, { "epoch": 0.5655045660789156, "grad_norm": 0.3931626081466675, "learning_rate": 0.0001, "loss": 1.4581, "step": 4923 }, { "epoch": 0.5656194359887428, "grad_norm": 0.42506247758865356, "learning_rate": 0.0001, "loss": 1.6842, "step": 4924 }, { "epoch": 0.5657343058985699, "grad_norm": 0.4446874260902405, "learning_rate": 0.0001, "loss": 1.6743, "step": 4925 }, { "epoch": 0.565849175808397, "grad_norm": 0.3943353295326233, "learning_rate": 0.0001, "loss": 1.5795, "step": 4926 }, { "epoch": 0.5659640457182241, "grad_norm": 0.41571202874183655, "learning_rate": 0.0001, "loss": 1.3642, "step": 4927 }, { "epoch": 0.5660789156280512, "grad_norm": 0.4377326965332031, "learning_rate": 0.0001, "loss": 1.6458, "step": 4928 }, { "epoch": 0.5661937855378784, "grad_norm": 0.4330956041812897, "learning_rate": 0.0001, "loss": 1.6974, "step": 4929 }, { "epoch": 0.5663086554477055, "grad_norm": 0.4336974024772644, "learning_rate": 0.0001, "loss": 1.6062, "step": 4930 }, { "epoch": 0.5664235253575326, "grad_norm": 0.43469882011413574, "learning_rate": 0.0001, "loss": 1.6367, "step": 4931 }, { "epoch": 0.5665383952673597, "grad_norm": 0.42299988865852356, "learning_rate": 0.0001, "loss": 1.6504, "step": 4932 }, { "epoch": 0.5666532651771868, "grad_norm": 0.4548715651035309, "learning_rate": 0.0001, "loss": 1.771, "step": 4933 }, { "epoch": 0.566768135087014, "grad_norm": 0.45423921942710876, "learning_rate": 0.0001, "loss": 1.7278, "step": 4934 }, { "epoch": 0.5668830049968411, "grad_norm": 0.4271089434623718, "learning_rate": 0.0001, "loss": 1.6531, "step": 4935 }, { "epoch": 0.5669978749066682, "grad_norm": 0.4774452745914459, "learning_rate": 0.0001, "loss": 1.7197, "step": 4936 }, { "epoch": 0.5671127448164953, "grad_norm": 0.4133590757846832, "learning_rate": 0.0001, "loss": 1.5688, "step": 4937 }, { "epoch": 0.5672276147263224, "grad_norm": 0.4909397065639496, "learning_rate": 0.0001, "loss": 1.7266, "step": 4938 }, { "epoch": 0.5673424846361496, "grad_norm": 0.47322049736976624, "learning_rate": 0.0001, "loss": 1.5714, "step": 4939 }, { "epoch": 0.5674573545459767, "grad_norm": 0.42633056640625, "learning_rate": 0.0001, "loss": 1.5971, "step": 4940 }, { "epoch": 0.5675722244558038, "grad_norm": 0.43367111682891846, "learning_rate": 0.0001, "loss": 1.5723, "step": 4941 }, { "epoch": 0.5676870943656309, "grad_norm": 0.4305420219898224, "learning_rate": 0.0001, "loss": 1.6753, "step": 4942 }, { "epoch": 0.567801964275458, "grad_norm": 0.4496208727359772, "learning_rate": 0.0001, "loss": 1.8287, "step": 4943 }, { "epoch": 0.5679168341852852, "grad_norm": 0.4456053376197815, "learning_rate": 0.0001, "loss": 1.5372, "step": 4944 }, { "epoch": 0.5680317040951123, "grad_norm": 0.41399866342544556, "learning_rate": 0.0001, "loss": 1.6282, "step": 4945 }, { "epoch": 0.5681465740049394, "grad_norm": 0.41621148586273193, "learning_rate": 0.0001, "loss": 1.5602, "step": 4946 }, { "epoch": 0.5682614439147665, "grad_norm": 0.4587562084197998, "learning_rate": 0.0001, "loss": 1.8734, "step": 4947 }, { "epoch": 0.5683763138245936, "grad_norm": 0.4021860361099243, "learning_rate": 0.0001, "loss": 1.3734, "step": 4948 }, { "epoch": 0.5684911837344208, "grad_norm": 0.4463704228401184, "learning_rate": 0.0001, "loss": 1.8134, "step": 4949 }, { "epoch": 0.5686060536442479, "grad_norm": 0.424570769071579, "learning_rate": 0.0001, "loss": 1.7018, "step": 4950 }, { "epoch": 0.568720923554075, "grad_norm": 0.42182692885398865, "learning_rate": 0.0001, "loss": 1.4935, "step": 4951 }, { "epoch": 0.5688357934639021, "grad_norm": 0.4009837806224823, "learning_rate": 0.0001, "loss": 1.5051, "step": 4952 }, { "epoch": 0.5689506633737292, "grad_norm": 0.394553542137146, "learning_rate": 0.0001, "loss": 1.5031, "step": 4953 }, { "epoch": 0.5690655332835564, "grad_norm": 0.4050164520740509, "learning_rate": 0.0001, "loss": 1.548, "step": 4954 }, { "epoch": 0.5691804031933835, "grad_norm": 0.4225456714630127, "learning_rate": 0.0001, "loss": 1.6712, "step": 4955 }, { "epoch": 0.5692952731032106, "grad_norm": 0.4110758304595947, "learning_rate": 0.0001, "loss": 1.4407, "step": 4956 }, { "epoch": 0.5694101430130377, "grad_norm": 0.434222549200058, "learning_rate": 0.0001, "loss": 1.6659, "step": 4957 }, { "epoch": 0.5695250129228648, "grad_norm": 0.4162403345108032, "learning_rate": 0.0001, "loss": 1.6895, "step": 4958 }, { "epoch": 0.569639882832692, "grad_norm": 0.43041545152664185, "learning_rate": 0.0001, "loss": 1.6472, "step": 4959 }, { "epoch": 0.5697547527425191, "grad_norm": 0.4041105806827545, "learning_rate": 0.0001, "loss": 1.4989, "step": 4960 }, { "epoch": 0.5698696226523462, "grad_norm": 0.4230792820453644, "learning_rate": 0.0001, "loss": 1.5815, "step": 4961 }, { "epoch": 0.5699844925621733, "grad_norm": 0.4470837414264679, "learning_rate": 0.0001, "loss": 1.9244, "step": 4962 }, { "epoch": 0.5700993624720004, "grad_norm": 0.40653735399246216, "learning_rate": 0.0001, "loss": 1.5813, "step": 4963 }, { "epoch": 0.5702142323818276, "grad_norm": 0.42262402176856995, "learning_rate": 0.0001, "loss": 1.6871, "step": 4964 }, { "epoch": 0.5703291022916547, "grad_norm": 0.4305260181427002, "learning_rate": 0.0001, "loss": 1.5856, "step": 4965 }, { "epoch": 0.5704439722014818, "grad_norm": 0.4613392651081085, "learning_rate": 0.0001, "loss": 1.7875, "step": 4966 }, { "epoch": 0.5705588421113089, "grad_norm": 0.446806937456131, "learning_rate": 0.0001, "loss": 1.7816, "step": 4967 }, { "epoch": 0.570673712021136, "grad_norm": 0.40673378109931946, "learning_rate": 0.0001, "loss": 1.5248, "step": 4968 }, { "epoch": 0.5707885819309632, "grad_norm": 0.46812543272972107, "learning_rate": 0.0001, "loss": 1.5529, "step": 4969 }, { "epoch": 0.5709034518407903, "grad_norm": 0.406392902135849, "learning_rate": 0.0001, "loss": 1.5711, "step": 4970 }, { "epoch": 0.5710183217506174, "grad_norm": 0.4076795279979706, "learning_rate": 0.0001, "loss": 1.4139, "step": 4971 }, { "epoch": 0.5711331916604445, "grad_norm": 0.4175947308540344, "learning_rate": 0.0001, "loss": 1.6253, "step": 4972 }, { "epoch": 0.5712480615702716, "grad_norm": 0.4206025004386902, "learning_rate": 0.0001, "loss": 1.7126, "step": 4973 }, { "epoch": 0.5713629314800988, "grad_norm": 0.41030603647232056, "learning_rate": 0.0001, "loss": 1.6595, "step": 4974 }, { "epoch": 0.5714778013899259, "grad_norm": 0.42624354362487793, "learning_rate": 0.0001, "loss": 1.5886, "step": 4975 }, { "epoch": 0.571592671299753, "grad_norm": 0.39379024505615234, "learning_rate": 0.0001, "loss": 1.4981, "step": 4976 }, { "epoch": 0.5717075412095801, "grad_norm": 0.42009881138801575, "learning_rate": 0.0001, "loss": 1.5639, "step": 4977 }, { "epoch": 0.5718224111194072, "grad_norm": 0.4457103908061981, "learning_rate": 0.0001, "loss": 1.4205, "step": 4978 }, { "epoch": 0.5719372810292344, "grad_norm": 0.4116220474243164, "learning_rate": 0.0001, "loss": 1.6223, "step": 4979 }, { "epoch": 0.5720521509390615, "grad_norm": 0.44594505429267883, "learning_rate": 0.0001, "loss": 1.5149, "step": 4980 }, { "epoch": 0.5721670208488886, "grad_norm": 0.4323948919773102, "learning_rate": 0.0001, "loss": 1.4976, "step": 4981 }, { "epoch": 0.5722818907587157, "grad_norm": 0.38895127177238464, "learning_rate": 0.0001, "loss": 1.4075, "step": 4982 }, { "epoch": 0.5723967606685428, "grad_norm": 0.4395938515663147, "learning_rate": 0.0001, "loss": 1.5436, "step": 4983 }, { "epoch": 0.57251163057837, "grad_norm": 0.43774986267089844, "learning_rate": 0.0001, "loss": 1.4892, "step": 4984 }, { "epoch": 0.5726265004881971, "grad_norm": 0.4183255136013031, "learning_rate": 0.0001, "loss": 1.6281, "step": 4985 }, { "epoch": 0.5727413703980242, "grad_norm": 0.439797580242157, "learning_rate": 0.0001, "loss": 1.4632, "step": 4986 }, { "epoch": 0.5728562403078513, "grad_norm": 0.413779616355896, "learning_rate": 0.0001, "loss": 1.3779, "step": 4987 }, { "epoch": 0.5729711102176784, "grad_norm": 0.4428330361843109, "learning_rate": 0.0001, "loss": 1.7449, "step": 4988 }, { "epoch": 0.5730859801275056, "grad_norm": 0.41953855752944946, "learning_rate": 0.0001, "loss": 1.6953, "step": 4989 }, { "epoch": 0.5732008500373327, "grad_norm": 0.4438232183456421, "learning_rate": 0.0001, "loss": 1.7179, "step": 4990 }, { "epoch": 0.5733157199471598, "grad_norm": 0.4678000807762146, "learning_rate": 0.0001, "loss": 1.8077, "step": 4991 }, { "epoch": 0.5734305898569869, "grad_norm": 0.48187345266342163, "learning_rate": 0.0001, "loss": 1.7687, "step": 4992 }, { "epoch": 0.573545459766814, "grad_norm": 0.4310360848903656, "learning_rate": 0.0001, "loss": 1.5992, "step": 4993 }, { "epoch": 0.5736603296766412, "grad_norm": 0.4390687346458435, "learning_rate": 0.0001, "loss": 1.6331, "step": 4994 }, { "epoch": 0.5737751995864683, "grad_norm": 0.44566598534584045, "learning_rate": 0.0001, "loss": 1.6057, "step": 4995 }, { "epoch": 0.5738900694962954, "grad_norm": 0.399006724357605, "learning_rate": 0.0001, "loss": 1.6791, "step": 4996 }, { "epoch": 0.5740049394061225, "grad_norm": 0.42733508348464966, "learning_rate": 0.0001, "loss": 1.7704, "step": 4997 }, { "epoch": 0.5741198093159496, "grad_norm": 0.4273543655872345, "learning_rate": 0.0001, "loss": 1.6384, "step": 4998 }, { "epoch": 0.5742346792257768, "grad_norm": 0.43150708079338074, "learning_rate": 0.0001, "loss": 1.7348, "step": 4999 }, { "epoch": 0.5743495491356039, "grad_norm": 0.4143429398536682, "learning_rate": 0.0001, "loss": 1.5195, "step": 5000 }, { "epoch": 0.574464419045431, "grad_norm": 0.41837653517723083, "learning_rate": 0.0001, "loss": 1.6473, "step": 5001 }, { "epoch": 0.5745792889552581, "grad_norm": 0.4500226080417633, "learning_rate": 0.0001, "loss": 1.8321, "step": 5002 }, { "epoch": 0.5746941588650852, "grad_norm": 0.43939411640167236, "learning_rate": 0.0001, "loss": 1.7294, "step": 5003 }, { "epoch": 0.5748090287749125, "grad_norm": 0.4168783128261566, "learning_rate": 0.0001, "loss": 1.7016, "step": 5004 }, { "epoch": 0.5749238986847396, "grad_norm": 0.4361497759819031, "learning_rate": 0.0001, "loss": 1.7038, "step": 5005 }, { "epoch": 0.5750387685945667, "grad_norm": 0.4564398527145386, "learning_rate": 0.0001, "loss": 1.738, "step": 5006 }, { "epoch": 0.5751536385043938, "grad_norm": 0.4513961672782898, "learning_rate": 0.0001, "loss": 1.6747, "step": 5007 }, { "epoch": 0.575268508414221, "grad_norm": 0.4509972631931305, "learning_rate": 0.0001, "loss": 1.6103, "step": 5008 }, { "epoch": 0.5753833783240481, "grad_norm": 0.42755764722824097, "learning_rate": 0.0001, "loss": 1.6723, "step": 5009 }, { "epoch": 0.5754982482338752, "grad_norm": 0.4352383017539978, "learning_rate": 0.0001, "loss": 1.604, "step": 5010 }, { "epoch": 0.5756131181437023, "grad_norm": 0.4069184362888336, "learning_rate": 0.0001, "loss": 1.5665, "step": 5011 }, { "epoch": 0.5757279880535294, "grad_norm": 0.4504094421863556, "learning_rate": 0.0001, "loss": 1.7465, "step": 5012 }, { "epoch": 0.5758428579633565, "grad_norm": 0.4137848913669586, "learning_rate": 0.0001, "loss": 1.419, "step": 5013 }, { "epoch": 0.5759577278731837, "grad_norm": 0.43577203154563904, "learning_rate": 0.0001, "loss": 1.8129, "step": 5014 }, { "epoch": 0.5760725977830108, "grad_norm": 0.45914286375045776, "learning_rate": 0.0001, "loss": 1.8664, "step": 5015 }, { "epoch": 0.5761874676928379, "grad_norm": 0.4018266499042511, "learning_rate": 0.0001, "loss": 1.4698, "step": 5016 }, { "epoch": 0.576302337602665, "grad_norm": 0.4395134449005127, "learning_rate": 0.0001, "loss": 1.7316, "step": 5017 }, { "epoch": 0.5764172075124921, "grad_norm": 0.42709699273109436, "learning_rate": 0.0001, "loss": 1.7313, "step": 5018 }, { "epoch": 0.5765320774223193, "grad_norm": 0.40391069650650024, "learning_rate": 0.0001, "loss": 1.6898, "step": 5019 }, { "epoch": 0.5766469473321464, "grad_norm": 0.4373941421508789, "learning_rate": 0.0001, "loss": 1.6366, "step": 5020 }, { "epoch": 0.5767618172419735, "grad_norm": 0.4111486077308655, "learning_rate": 0.0001, "loss": 1.5335, "step": 5021 }, { "epoch": 0.5768766871518006, "grad_norm": 0.44314584136009216, "learning_rate": 0.0001, "loss": 1.7859, "step": 5022 }, { "epoch": 0.5769915570616277, "grad_norm": 0.38819631934165955, "learning_rate": 0.0001, "loss": 1.3416, "step": 5023 }, { "epoch": 0.5771064269714549, "grad_norm": 0.4158842861652374, "learning_rate": 0.0001, "loss": 1.7234, "step": 5024 }, { "epoch": 0.577221296881282, "grad_norm": 0.41062501072883606, "learning_rate": 0.0001, "loss": 1.51, "step": 5025 }, { "epoch": 0.5773361667911091, "grad_norm": 0.4440518617630005, "learning_rate": 0.0001, "loss": 1.555, "step": 5026 }, { "epoch": 0.5774510367009362, "grad_norm": 0.3918763995170593, "learning_rate": 0.0001, "loss": 1.5668, "step": 5027 }, { "epoch": 0.5775659066107633, "grad_norm": 0.4936619997024536, "learning_rate": 0.0001, "loss": 1.9151, "step": 5028 }, { "epoch": 0.5776807765205905, "grad_norm": 0.4350280463695526, "learning_rate": 0.0001, "loss": 1.534, "step": 5029 }, { "epoch": 0.5777956464304176, "grad_norm": 0.41786620020866394, "learning_rate": 0.0001, "loss": 1.6405, "step": 5030 }, { "epoch": 0.5779105163402447, "grad_norm": 0.44200530648231506, "learning_rate": 0.0001, "loss": 1.8449, "step": 5031 }, { "epoch": 0.5780253862500718, "grad_norm": 0.49857649207115173, "learning_rate": 0.0001, "loss": 1.7837, "step": 5032 }, { "epoch": 0.578140256159899, "grad_norm": 0.40374937653541565, "learning_rate": 0.0001, "loss": 1.3166, "step": 5033 }, { "epoch": 0.5782551260697261, "grad_norm": 0.425289124250412, "learning_rate": 0.0001, "loss": 1.5533, "step": 5034 }, { "epoch": 0.5783699959795532, "grad_norm": 0.42704570293426514, "learning_rate": 0.0001, "loss": 1.6993, "step": 5035 }, { "epoch": 0.5784848658893803, "grad_norm": 0.46401023864746094, "learning_rate": 0.0001, "loss": 1.7889, "step": 5036 }, { "epoch": 0.5785997357992074, "grad_norm": 0.45945799350738525, "learning_rate": 0.0001, "loss": 1.6405, "step": 5037 }, { "epoch": 0.5787146057090345, "grad_norm": 0.4467889070510864, "learning_rate": 0.0001, "loss": 1.5449, "step": 5038 }, { "epoch": 0.5788294756188617, "grad_norm": 0.418954998254776, "learning_rate": 0.0001, "loss": 1.5479, "step": 5039 }, { "epoch": 0.5789443455286888, "grad_norm": 0.4095263183116913, "learning_rate": 0.0001, "loss": 1.5838, "step": 5040 }, { "epoch": 0.5790592154385159, "grad_norm": 0.39436352252960205, "learning_rate": 0.0001, "loss": 1.3867, "step": 5041 }, { "epoch": 0.579174085348343, "grad_norm": 0.43269625306129456, "learning_rate": 0.0001, "loss": 1.686, "step": 5042 }, { "epoch": 0.5792889552581701, "grad_norm": 0.4262961745262146, "learning_rate": 0.0001, "loss": 1.7065, "step": 5043 }, { "epoch": 0.5794038251679973, "grad_norm": 0.42846837639808655, "learning_rate": 0.0001, "loss": 1.5565, "step": 5044 }, { "epoch": 0.5795186950778244, "grad_norm": 0.4515441656112671, "learning_rate": 0.0001, "loss": 1.7085, "step": 5045 }, { "epoch": 0.5796335649876515, "grad_norm": 0.4548323452472687, "learning_rate": 0.0001, "loss": 1.9127, "step": 5046 }, { "epoch": 0.5797484348974786, "grad_norm": 0.4489242136478424, "learning_rate": 0.0001, "loss": 1.7654, "step": 5047 }, { "epoch": 0.5798633048073057, "grad_norm": 0.4277878403663635, "learning_rate": 0.0001, "loss": 1.7829, "step": 5048 }, { "epoch": 0.5799781747171329, "grad_norm": 0.4465863108634949, "learning_rate": 0.0001, "loss": 1.4391, "step": 5049 }, { "epoch": 0.58009304462696, "grad_norm": 0.4445357322692871, "learning_rate": 0.0001, "loss": 1.5621, "step": 5050 }, { "epoch": 0.5802079145367871, "grad_norm": 0.41989558935165405, "learning_rate": 0.0001, "loss": 1.5528, "step": 5051 }, { "epoch": 0.5803227844466142, "grad_norm": 0.4065076410770416, "learning_rate": 0.0001, "loss": 1.5414, "step": 5052 }, { "epoch": 0.5804376543564413, "grad_norm": 0.42565080523490906, "learning_rate": 0.0001, "loss": 1.6824, "step": 5053 }, { "epoch": 0.5805525242662685, "grad_norm": 0.4369344115257263, "learning_rate": 0.0001, "loss": 1.6905, "step": 5054 }, { "epoch": 0.5806673941760956, "grad_norm": 0.4437665343284607, "learning_rate": 0.0001, "loss": 1.702, "step": 5055 }, { "epoch": 0.5807822640859227, "grad_norm": 0.4107830822467804, "learning_rate": 0.0001, "loss": 1.5411, "step": 5056 }, { "epoch": 0.5808971339957498, "grad_norm": 0.4161568582057953, "learning_rate": 0.0001, "loss": 1.6756, "step": 5057 }, { "epoch": 0.581012003905577, "grad_norm": 0.4613957405090332, "learning_rate": 0.0001, "loss": 1.7078, "step": 5058 }, { "epoch": 0.5811268738154041, "grad_norm": 0.44085410237312317, "learning_rate": 0.0001, "loss": 1.7637, "step": 5059 }, { "epoch": 0.5812417437252312, "grad_norm": 0.4186076819896698, "learning_rate": 0.0001, "loss": 1.571, "step": 5060 }, { "epoch": 0.5813566136350583, "grad_norm": 0.4560279846191406, "learning_rate": 0.0001, "loss": 1.7631, "step": 5061 }, { "epoch": 0.5814714835448854, "grad_norm": 0.44726184010505676, "learning_rate": 0.0001, "loss": 1.8003, "step": 5062 }, { "epoch": 0.5815863534547125, "grad_norm": 0.4400385320186615, "learning_rate": 0.0001, "loss": 1.7035, "step": 5063 }, { "epoch": 0.5817012233645397, "grad_norm": 0.42711982131004333, "learning_rate": 0.0001, "loss": 1.5343, "step": 5064 }, { "epoch": 0.5818160932743668, "grad_norm": 0.40731683373451233, "learning_rate": 0.0001, "loss": 1.715, "step": 5065 }, { "epoch": 0.5819309631841939, "grad_norm": 0.4120251536369324, "learning_rate": 0.0001, "loss": 1.4904, "step": 5066 }, { "epoch": 0.582045833094021, "grad_norm": 0.4294714033603668, "learning_rate": 0.0001, "loss": 1.7013, "step": 5067 }, { "epoch": 0.5821607030038481, "grad_norm": 0.4543513059616089, "learning_rate": 0.0001, "loss": 1.7307, "step": 5068 }, { "epoch": 0.5822755729136753, "grad_norm": 0.4606270492076874, "learning_rate": 0.0001, "loss": 1.7362, "step": 5069 }, { "epoch": 0.5823904428235024, "grad_norm": 0.47500404715538025, "learning_rate": 0.0001, "loss": 1.7057, "step": 5070 }, { "epoch": 0.5825053127333295, "grad_norm": 0.39924708008766174, "learning_rate": 0.0001, "loss": 1.4902, "step": 5071 }, { "epoch": 0.5826201826431566, "grad_norm": 0.4525192379951477, "learning_rate": 0.0001, "loss": 1.5355, "step": 5072 }, { "epoch": 0.5827350525529837, "grad_norm": 0.462971568107605, "learning_rate": 0.0001, "loss": 1.5256, "step": 5073 }, { "epoch": 0.5828499224628109, "grad_norm": 0.3806494474411011, "learning_rate": 0.0001, "loss": 1.3727, "step": 5074 }, { "epoch": 0.582964792372638, "grad_norm": 0.4327925741672516, "learning_rate": 0.0001, "loss": 1.5462, "step": 5075 }, { "epoch": 0.5830796622824651, "grad_norm": 0.39075303077697754, "learning_rate": 0.0001, "loss": 1.2952, "step": 5076 }, { "epoch": 0.5831945321922922, "grad_norm": 0.4309884011745453, "learning_rate": 0.0001, "loss": 1.5131, "step": 5077 }, { "epoch": 0.5833094021021193, "grad_norm": 0.4743507206439972, "learning_rate": 0.0001, "loss": 1.7537, "step": 5078 }, { "epoch": 0.5834242720119465, "grad_norm": 0.39666301012039185, "learning_rate": 0.0001, "loss": 1.4356, "step": 5079 }, { "epoch": 0.5835391419217736, "grad_norm": 0.431244432926178, "learning_rate": 0.0001, "loss": 1.6521, "step": 5080 }, { "epoch": 0.5836540118316007, "grad_norm": 0.4098447859287262, "learning_rate": 0.0001, "loss": 1.4811, "step": 5081 }, { "epoch": 0.5837688817414278, "grad_norm": 0.44986143708229065, "learning_rate": 0.0001, "loss": 1.6683, "step": 5082 }, { "epoch": 0.583883751651255, "grad_norm": 0.40975865721702576, "learning_rate": 0.0001, "loss": 1.6027, "step": 5083 }, { "epoch": 0.5839986215610821, "grad_norm": 0.42169931530952454, "learning_rate": 0.0001, "loss": 1.6373, "step": 5084 }, { "epoch": 0.5841134914709092, "grad_norm": 0.4497613310813904, "learning_rate": 0.0001, "loss": 1.7151, "step": 5085 }, { "epoch": 0.5842283613807363, "grad_norm": 0.4520888030529022, "learning_rate": 0.0001, "loss": 1.5859, "step": 5086 }, { "epoch": 0.5843432312905634, "grad_norm": 0.42139726877212524, "learning_rate": 0.0001, "loss": 1.7561, "step": 5087 }, { "epoch": 0.5844581012003905, "grad_norm": 0.4443289041519165, "learning_rate": 0.0001, "loss": 1.6255, "step": 5088 }, { "epoch": 0.5845729711102177, "grad_norm": 0.3971986770629883, "learning_rate": 0.0001, "loss": 1.5313, "step": 5089 }, { "epoch": 0.5846878410200448, "grad_norm": 0.4299372434616089, "learning_rate": 0.0001, "loss": 1.6881, "step": 5090 }, { "epoch": 0.5848027109298719, "grad_norm": 0.40114906430244446, "learning_rate": 0.0001, "loss": 1.5197, "step": 5091 }, { "epoch": 0.584917580839699, "grad_norm": 0.4438854157924652, "learning_rate": 0.0001, "loss": 1.6968, "step": 5092 }, { "epoch": 0.5850324507495261, "grad_norm": 0.44349342584609985, "learning_rate": 0.0001, "loss": 1.6948, "step": 5093 }, { "epoch": 0.5851473206593533, "grad_norm": 0.42675167322158813, "learning_rate": 0.0001, "loss": 1.5868, "step": 5094 }, { "epoch": 0.5852621905691804, "grad_norm": 0.43637698888778687, "learning_rate": 0.0001, "loss": 1.7436, "step": 5095 }, { "epoch": 0.5853770604790075, "grad_norm": 0.44595828652381897, "learning_rate": 0.0001, "loss": 1.7227, "step": 5096 }, { "epoch": 0.5854919303888346, "grad_norm": 0.4193721413612366, "learning_rate": 0.0001, "loss": 1.6027, "step": 5097 }, { "epoch": 0.5856068002986617, "grad_norm": 0.42000266909599304, "learning_rate": 0.0001, "loss": 1.6702, "step": 5098 }, { "epoch": 0.5857216702084889, "grad_norm": 0.4284018874168396, "learning_rate": 0.0001, "loss": 1.6606, "step": 5099 }, { "epoch": 0.585836540118316, "grad_norm": 0.40313681960105896, "learning_rate": 0.0001, "loss": 1.3916, "step": 5100 }, { "epoch": 0.5859514100281431, "grad_norm": 0.4598637521266937, "learning_rate": 0.0001, "loss": 1.7123, "step": 5101 }, { "epoch": 0.5860662799379702, "grad_norm": 0.45406660437583923, "learning_rate": 0.0001, "loss": 1.6212, "step": 5102 }, { "epoch": 0.5861811498477973, "grad_norm": 0.40444087982177734, "learning_rate": 0.0001, "loss": 1.4982, "step": 5103 }, { "epoch": 0.5862960197576245, "grad_norm": 0.4442274272441864, "learning_rate": 0.0001, "loss": 1.5654, "step": 5104 }, { "epoch": 0.5864108896674516, "grad_norm": 0.43179601430892944, "learning_rate": 0.0001, "loss": 1.5765, "step": 5105 }, { "epoch": 0.5865257595772787, "grad_norm": 0.5134614706039429, "learning_rate": 0.0001, "loss": 1.8153, "step": 5106 }, { "epoch": 0.5866406294871058, "grad_norm": 0.4080960750579834, "learning_rate": 0.0001, "loss": 1.4996, "step": 5107 }, { "epoch": 0.586755499396933, "grad_norm": 0.4044763743877411, "learning_rate": 0.0001, "loss": 1.5646, "step": 5108 }, { "epoch": 0.5868703693067601, "grad_norm": 0.40748360753059387, "learning_rate": 0.0001, "loss": 1.3913, "step": 5109 }, { "epoch": 0.5869852392165872, "grad_norm": 0.46773701906204224, "learning_rate": 0.0001, "loss": 1.724, "step": 5110 }, { "epoch": 0.5871001091264143, "grad_norm": 0.46918338537216187, "learning_rate": 0.0001, "loss": 1.8717, "step": 5111 }, { "epoch": 0.5872149790362414, "grad_norm": 0.45832809805870056, "learning_rate": 0.0001, "loss": 1.8843, "step": 5112 }, { "epoch": 0.5873298489460685, "grad_norm": 0.4251342713832855, "learning_rate": 0.0001, "loss": 1.5424, "step": 5113 }, { "epoch": 0.5874447188558957, "grad_norm": 0.4477440118789673, "learning_rate": 0.0001, "loss": 1.729, "step": 5114 }, { "epoch": 0.5875595887657228, "grad_norm": 0.47431480884552, "learning_rate": 0.0001, "loss": 1.8087, "step": 5115 }, { "epoch": 0.5876744586755499, "grad_norm": 0.4649442434310913, "learning_rate": 0.0001, "loss": 1.8614, "step": 5116 }, { "epoch": 0.587789328585377, "grad_norm": 0.40479037165641785, "learning_rate": 0.0001, "loss": 1.5014, "step": 5117 }, { "epoch": 0.5879041984952041, "grad_norm": 0.4071318805217743, "learning_rate": 0.0001, "loss": 1.6169, "step": 5118 }, { "epoch": 0.5880190684050313, "grad_norm": 0.3959406316280365, "learning_rate": 0.0001, "loss": 1.4992, "step": 5119 }, { "epoch": 0.5881339383148584, "grad_norm": 0.4508780241012573, "learning_rate": 0.0001, "loss": 1.6267, "step": 5120 }, { "epoch": 0.5882488082246855, "grad_norm": 0.41295212507247925, "learning_rate": 0.0001, "loss": 1.4935, "step": 5121 }, { "epoch": 0.5883636781345126, "grad_norm": 0.39473357796669006, "learning_rate": 0.0001, "loss": 1.4775, "step": 5122 }, { "epoch": 0.5884785480443397, "grad_norm": 0.4360009431838989, "learning_rate": 0.0001, "loss": 1.8118, "step": 5123 }, { "epoch": 0.5885934179541669, "grad_norm": 0.4192604720592499, "learning_rate": 0.0001, "loss": 1.5552, "step": 5124 }, { "epoch": 0.588708287863994, "grad_norm": 0.44447702169418335, "learning_rate": 0.0001, "loss": 1.5772, "step": 5125 }, { "epoch": 0.5888231577738211, "grad_norm": 0.4330410063266754, "learning_rate": 0.0001, "loss": 1.6319, "step": 5126 }, { "epoch": 0.5889380276836482, "grad_norm": 0.44558635354042053, "learning_rate": 0.0001, "loss": 1.8103, "step": 5127 }, { "epoch": 0.5890528975934753, "grad_norm": 0.4943927526473999, "learning_rate": 0.0001, "loss": 1.77, "step": 5128 }, { "epoch": 0.5891677675033025, "grad_norm": 0.4345741271972656, "learning_rate": 0.0001, "loss": 1.5057, "step": 5129 }, { "epoch": 0.5892826374131296, "grad_norm": 0.4273737967014313, "learning_rate": 0.0001, "loss": 1.6475, "step": 5130 }, { "epoch": 0.5893975073229567, "grad_norm": 0.4054570198059082, "learning_rate": 0.0001, "loss": 1.7624, "step": 5131 }, { "epoch": 0.5895123772327838, "grad_norm": 0.43333131074905396, "learning_rate": 0.0001, "loss": 1.434, "step": 5132 }, { "epoch": 0.5896272471426109, "grad_norm": 0.4401973485946655, "learning_rate": 0.0001, "loss": 1.4911, "step": 5133 }, { "epoch": 0.5897421170524381, "grad_norm": 0.43175458908081055, "learning_rate": 0.0001, "loss": 1.6226, "step": 5134 }, { "epoch": 0.5898569869622652, "grad_norm": 0.4577193558216095, "learning_rate": 0.0001, "loss": 1.825, "step": 5135 }, { "epoch": 0.5899718568720923, "grad_norm": 0.45421233773231506, "learning_rate": 0.0001, "loss": 1.7356, "step": 5136 }, { "epoch": 0.5900867267819194, "grad_norm": 0.4477955102920532, "learning_rate": 0.0001, "loss": 1.6004, "step": 5137 }, { "epoch": 0.5902015966917465, "grad_norm": 0.41615384817123413, "learning_rate": 0.0001, "loss": 1.5265, "step": 5138 }, { "epoch": 0.5903164666015737, "grad_norm": 0.4334186613559723, "learning_rate": 0.0001, "loss": 1.7663, "step": 5139 }, { "epoch": 0.5904313365114008, "grad_norm": 0.4008978605270386, "learning_rate": 0.0001, "loss": 1.6154, "step": 5140 }, { "epoch": 0.5905462064212279, "grad_norm": 0.4319349229335785, "learning_rate": 0.0001, "loss": 1.6769, "step": 5141 }, { "epoch": 0.5906610763310551, "grad_norm": 0.4281979501247406, "learning_rate": 0.0001, "loss": 1.6188, "step": 5142 }, { "epoch": 0.5907759462408823, "grad_norm": 0.4367743134498596, "learning_rate": 0.0001, "loss": 1.7232, "step": 5143 }, { "epoch": 0.5908908161507094, "grad_norm": 0.4409448504447937, "learning_rate": 0.0001, "loss": 1.5421, "step": 5144 }, { "epoch": 0.5910056860605365, "grad_norm": 0.4476580023765564, "learning_rate": 0.0001, "loss": 1.6214, "step": 5145 }, { "epoch": 0.5911205559703636, "grad_norm": 0.43535661697387695, "learning_rate": 0.0001, "loss": 1.6434, "step": 5146 }, { "epoch": 0.5912354258801907, "grad_norm": 0.4497022330760956, "learning_rate": 0.0001, "loss": 1.6346, "step": 5147 }, { "epoch": 0.5913502957900179, "grad_norm": 0.42515555024147034, "learning_rate": 0.0001, "loss": 1.4534, "step": 5148 }, { "epoch": 0.591465165699845, "grad_norm": 0.4538893699645996, "learning_rate": 0.0001, "loss": 1.6042, "step": 5149 }, { "epoch": 0.5915800356096721, "grad_norm": 0.4361916184425354, "learning_rate": 0.0001, "loss": 1.6162, "step": 5150 }, { "epoch": 0.5916949055194992, "grad_norm": 0.43373700976371765, "learning_rate": 0.0001, "loss": 1.7429, "step": 5151 }, { "epoch": 0.5918097754293263, "grad_norm": 0.46759071946144104, "learning_rate": 0.0001, "loss": 1.6652, "step": 5152 }, { "epoch": 0.5919246453391535, "grad_norm": 0.45296040177345276, "learning_rate": 0.0001, "loss": 1.6969, "step": 5153 }, { "epoch": 0.5920395152489806, "grad_norm": 0.3908827602863312, "learning_rate": 0.0001, "loss": 1.5251, "step": 5154 }, { "epoch": 0.5921543851588077, "grad_norm": 0.41083312034606934, "learning_rate": 0.0001, "loss": 1.53, "step": 5155 }, { "epoch": 0.5922692550686348, "grad_norm": 0.4350758492946625, "learning_rate": 0.0001, "loss": 1.6496, "step": 5156 }, { "epoch": 0.5923841249784619, "grad_norm": 0.44962963461875916, "learning_rate": 0.0001, "loss": 1.6378, "step": 5157 }, { "epoch": 0.592498994888289, "grad_norm": 0.47061699628829956, "learning_rate": 0.0001, "loss": 1.9426, "step": 5158 }, { "epoch": 0.5926138647981162, "grad_norm": 0.4288279712200165, "learning_rate": 0.0001, "loss": 1.4317, "step": 5159 }, { "epoch": 0.5927287347079433, "grad_norm": 0.4379502534866333, "learning_rate": 0.0001, "loss": 1.6085, "step": 5160 }, { "epoch": 0.5928436046177704, "grad_norm": 0.41820356249809265, "learning_rate": 0.0001, "loss": 1.5538, "step": 5161 }, { "epoch": 0.5929584745275975, "grad_norm": 0.4359080493450165, "learning_rate": 0.0001, "loss": 1.7123, "step": 5162 }, { "epoch": 0.5930733444374247, "grad_norm": 0.40916740894317627, "learning_rate": 0.0001, "loss": 1.6091, "step": 5163 }, { "epoch": 0.5931882143472518, "grad_norm": 0.4382950961589813, "learning_rate": 0.0001, "loss": 1.5852, "step": 5164 }, { "epoch": 0.5933030842570789, "grad_norm": 0.4391472041606903, "learning_rate": 0.0001, "loss": 1.8258, "step": 5165 }, { "epoch": 0.593417954166906, "grad_norm": 0.4115656316280365, "learning_rate": 0.0001, "loss": 1.5322, "step": 5166 }, { "epoch": 0.5935328240767331, "grad_norm": 0.4537063241004944, "learning_rate": 0.0001, "loss": 1.7013, "step": 5167 }, { "epoch": 0.5936476939865603, "grad_norm": 0.4141421616077423, "learning_rate": 0.0001, "loss": 1.5697, "step": 5168 }, { "epoch": 0.5937625638963874, "grad_norm": 0.42686209082603455, "learning_rate": 0.0001, "loss": 1.5285, "step": 5169 }, { "epoch": 0.5938774338062145, "grad_norm": 0.4201597273349762, "learning_rate": 0.0001, "loss": 1.3904, "step": 5170 }, { "epoch": 0.5939923037160416, "grad_norm": 0.43558233976364136, "learning_rate": 0.0001, "loss": 1.529, "step": 5171 }, { "epoch": 0.5941071736258687, "grad_norm": 0.43399038910865784, "learning_rate": 0.0001, "loss": 1.6307, "step": 5172 }, { "epoch": 0.5942220435356959, "grad_norm": 0.40567871928215027, "learning_rate": 0.0001, "loss": 1.4648, "step": 5173 }, { "epoch": 0.594336913445523, "grad_norm": 0.44226759672164917, "learning_rate": 0.0001, "loss": 1.6939, "step": 5174 }, { "epoch": 0.5944517833553501, "grad_norm": 0.4169192910194397, "learning_rate": 0.0001, "loss": 1.5668, "step": 5175 }, { "epoch": 0.5945666532651772, "grad_norm": 0.4189157485961914, "learning_rate": 0.0001, "loss": 1.6832, "step": 5176 }, { "epoch": 0.5946815231750043, "grad_norm": 0.4361801743507385, "learning_rate": 0.0001, "loss": 1.53, "step": 5177 }, { "epoch": 0.5947963930848315, "grad_norm": 0.41981688141822815, "learning_rate": 0.0001, "loss": 1.6242, "step": 5178 }, { "epoch": 0.5949112629946586, "grad_norm": 0.4301121234893799, "learning_rate": 0.0001, "loss": 1.6233, "step": 5179 }, { "epoch": 0.5950261329044857, "grad_norm": 0.42863231897354126, "learning_rate": 0.0001, "loss": 1.6747, "step": 5180 }, { "epoch": 0.5951410028143128, "grad_norm": 0.4355190694332123, "learning_rate": 0.0001, "loss": 1.6419, "step": 5181 }, { "epoch": 0.5952558727241399, "grad_norm": 0.4153485894203186, "learning_rate": 0.0001, "loss": 1.6399, "step": 5182 }, { "epoch": 0.595370742633967, "grad_norm": 0.44687628746032715, "learning_rate": 0.0001, "loss": 1.7095, "step": 5183 }, { "epoch": 0.5954856125437942, "grad_norm": 0.4202732443809509, "learning_rate": 0.0001, "loss": 1.6044, "step": 5184 }, { "epoch": 0.5956004824536213, "grad_norm": 0.4697241187095642, "learning_rate": 0.0001, "loss": 1.7244, "step": 5185 }, { "epoch": 0.5957153523634484, "grad_norm": 0.428061306476593, "learning_rate": 0.0001, "loss": 1.4106, "step": 5186 }, { "epoch": 0.5958302222732755, "grad_norm": 0.4194773733615875, "learning_rate": 0.0001, "loss": 1.6237, "step": 5187 }, { "epoch": 0.5959450921831027, "grad_norm": 0.45670002698898315, "learning_rate": 0.0001, "loss": 1.6503, "step": 5188 }, { "epoch": 0.5960599620929298, "grad_norm": 0.43582040071487427, "learning_rate": 0.0001, "loss": 1.6735, "step": 5189 }, { "epoch": 0.5961748320027569, "grad_norm": 0.4162776470184326, "learning_rate": 0.0001, "loss": 1.5631, "step": 5190 }, { "epoch": 0.596289701912584, "grad_norm": 0.4055253565311432, "learning_rate": 0.0001, "loss": 1.5907, "step": 5191 }, { "epoch": 0.5964045718224111, "grad_norm": 0.4150947332382202, "learning_rate": 0.0001, "loss": 1.6579, "step": 5192 }, { "epoch": 0.5965194417322383, "grad_norm": 0.4119030237197876, "learning_rate": 0.0001, "loss": 1.5916, "step": 5193 }, { "epoch": 0.5966343116420654, "grad_norm": 0.43656405806541443, "learning_rate": 0.0001, "loss": 1.6424, "step": 5194 }, { "epoch": 0.5967491815518925, "grad_norm": 0.4502866864204407, "learning_rate": 0.0001, "loss": 1.7509, "step": 5195 }, { "epoch": 0.5968640514617196, "grad_norm": 0.4506380558013916, "learning_rate": 0.0001, "loss": 1.6351, "step": 5196 }, { "epoch": 0.5969789213715467, "grad_norm": 0.48698195815086365, "learning_rate": 0.0001, "loss": 1.7874, "step": 5197 }, { "epoch": 0.5970937912813739, "grad_norm": 0.42721307277679443, "learning_rate": 0.0001, "loss": 1.6294, "step": 5198 }, { "epoch": 0.597208661191201, "grad_norm": 0.428030788898468, "learning_rate": 0.0001, "loss": 1.752, "step": 5199 }, { "epoch": 0.5973235311010281, "grad_norm": 0.43251699209213257, "learning_rate": 0.0001, "loss": 1.8308, "step": 5200 }, { "epoch": 0.5974384010108552, "grad_norm": 0.4091329872608185, "learning_rate": 0.0001, "loss": 1.5663, "step": 5201 }, { "epoch": 0.5975532709206823, "grad_norm": 0.4518309235572815, "learning_rate": 0.0001, "loss": 1.6743, "step": 5202 }, { "epoch": 0.5976681408305095, "grad_norm": 0.4639250338077545, "learning_rate": 0.0001, "loss": 1.6573, "step": 5203 }, { "epoch": 0.5977830107403366, "grad_norm": 0.41630759835243225, "learning_rate": 0.0001, "loss": 1.6118, "step": 5204 }, { "epoch": 0.5978978806501637, "grad_norm": 0.47292619943618774, "learning_rate": 0.0001, "loss": 1.8113, "step": 5205 }, { "epoch": 0.5980127505599908, "grad_norm": 0.4484859108924866, "learning_rate": 0.0001, "loss": 1.6676, "step": 5206 }, { "epoch": 0.5981276204698179, "grad_norm": 0.42833200097084045, "learning_rate": 0.0001, "loss": 1.7428, "step": 5207 }, { "epoch": 0.598242490379645, "grad_norm": 0.40881600975990295, "learning_rate": 0.0001, "loss": 1.4835, "step": 5208 }, { "epoch": 0.5983573602894722, "grad_norm": 0.412045955657959, "learning_rate": 0.0001, "loss": 1.591, "step": 5209 }, { "epoch": 0.5984722301992993, "grad_norm": 0.4817086160182953, "learning_rate": 0.0001, "loss": 1.7922, "step": 5210 }, { "epoch": 0.5985871001091264, "grad_norm": 0.46558666229248047, "learning_rate": 0.0001, "loss": 1.7812, "step": 5211 }, { "epoch": 0.5987019700189535, "grad_norm": 0.43379732966423035, "learning_rate": 0.0001, "loss": 1.7301, "step": 5212 }, { "epoch": 0.5988168399287807, "grad_norm": 0.4646340310573578, "learning_rate": 0.0001, "loss": 1.7395, "step": 5213 }, { "epoch": 0.5989317098386078, "grad_norm": 0.4490647315979004, "learning_rate": 0.0001, "loss": 1.559, "step": 5214 }, { "epoch": 0.5990465797484349, "grad_norm": 0.404638409614563, "learning_rate": 0.0001, "loss": 1.6963, "step": 5215 }, { "epoch": 0.599161449658262, "grad_norm": 0.4501255452632904, "learning_rate": 0.0001, "loss": 1.7297, "step": 5216 }, { "epoch": 0.5992763195680891, "grad_norm": 0.39730724692344666, "learning_rate": 0.0001, "loss": 1.4956, "step": 5217 }, { "epoch": 0.5993911894779163, "grad_norm": 0.5064243674278259, "learning_rate": 0.0001, "loss": 1.8652, "step": 5218 }, { "epoch": 0.5995060593877434, "grad_norm": 0.41334420442581177, "learning_rate": 0.0001, "loss": 1.7021, "step": 5219 }, { "epoch": 0.5996209292975705, "grad_norm": 0.44307664036750793, "learning_rate": 0.0001, "loss": 1.7261, "step": 5220 }, { "epoch": 0.5997357992073976, "grad_norm": 0.4100275933742523, "learning_rate": 0.0001, "loss": 1.5175, "step": 5221 }, { "epoch": 0.5998506691172247, "grad_norm": 0.4305122494697571, "learning_rate": 0.0001, "loss": 1.5417, "step": 5222 }, { "epoch": 0.5999655390270519, "grad_norm": 0.8222983479499817, "learning_rate": 0.0001, "loss": 1.5928, "step": 5223 }, { "epoch": 0.600080408936879, "grad_norm": 0.46560120582580566, "learning_rate": 0.0001, "loss": 1.5538, "step": 5224 }, { "epoch": 0.6001952788467061, "grad_norm": 0.413687139749527, "learning_rate": 0.0001, "loss": 1.5065, "step": 5225 }, { "epoch": 0.6003101487565332, "grad_norm": 0.4451950192451477, "learning_rate": 0.0001, "loss": 1.6965, "step": 5226 }, { "epoch": 0.6004250186663603, "grad_norm": 0.42893052101135254, "learning_rate": 0.0001, "loss": 1.4295, "step": 5227 }, { "epoch": 0.6005398885761875, "grad_norm": 0.4320535659790039, "learning_rate": 0.0001, "loss": 1.6712, "step": 5228 }, { "epoch": 0.6006547584860146, "grad_norm": 0.4111475944519043, "learning_rate": 0.0001, "loss": 1.5597, "step": 5229 }, { "epoch": 0.6007696283958417, "grad_norm": 0.4269995093345642, "learning_rate": 0.0001, "loss": 1.5736, "step": 5230 }, { "epoch": 0.6008844983056688, "grad_norm": 0.4943247139453888, "learning_rate": 0.0001, "loss": 1.4476, "step": 5231 }, { "epoch": 0.6009993682154959, "grad_norm": 0.43460339307785034, "learning_rate": 0.0001, "loss": 1.738, "step": 5232 }, { "epoch": 0.601114238125323, "grad_norm": 0.45859336853027344, "learning_rate": 0.0001, "loss": 1.7815, "step": 5233 }, { "epoch": 0.6012291080351502, "grad_norm": 0.4369470775127411, "learning_rate": 0.0001, "loss": 1.745, "step": 5234 }, { "epoch": 0.6013439779449773, "grad_norm": 0.39994218945503235, "learning_rate": 0.0001, "loss": 1.4631, "step": 5235 }, { "epoch": 0.6014588478548044, "grad_norm": 0.4235687851905823, "learning_rate": 0.0001, "loss": 1.6046, "step": 5236 }, { "epoch": 0.6015737177646315, "grad_norm": 0.45614656805992126, "learning_rate": 0.0001, "loss": 1.6661, "step": 5237 }, { "epoch": 0.6016885876744587, "grad_norm": 0.4340771436691284, "learning_rate": 0.0001, "loss": 1.5328, "step": 5238 }, { "epoch": 0.6018034575842858, "grad_norm": 0.458238422870636, "learning_rate": 0.0001, "loss": 1.796, "step": 5239 }, { "epoch": 0.6019183274941129, "grad_norm": 0.42685821652412415, "learning_rate": 0.0001, "loss": 1.6421, "step": 5240 }, { "epoch": 0.60203319740394, "grad_norm": 0.4220592975616455, "learning_rate": 0.0001, "loss": 1.602, "step": 5241 }, { "epoch": 0.6021480673137671, "grad_norm": 0.41730085015296936, "learning_rate": 0.0001, "loss": 1.5507, "step": 5242 }, { "epoch": 0.6022629372235943, "grad_norm": 0.4415682852268219, "learning_rate": 0.0001, "loss": 1.6285, "step": 5243 }, { "epoch": 0.6023778071334214, "grad_norm": 0.41799354553222656, "learning_rate": 0.0001, "loss": 1.5479, "step": 5244 }, { "epoch": 0.6024926770432485, "grad_norm": 0.47606340050697327, "learning_rate": 0.0001, "loss": 1.8567, "step": 5245 }, { "epoch": 0.6026075469530756, "grad_norm": 0.4121111333370209, "learning_rate": 0.0001, "loss": 1.5148, "step": 5246 }, { "epoch": 0.6027224168629027, "grad_norm": 0.43468672037124634, "learning_rate": 0.0001, "loss": 1.5985, "step": 5247 }, { "epoch": 0.6028372867727299, "grad_norm": 0.3789305090904236, "learning_rate": 0.0001, "loss": 1.2786, "step": 5248 }, { "epoch": 0.602952156682557, "grad_norm": 0.3968721330165863, "learning_rate": 0.0001, "loss": 1.5158, "step": 5249 }, { "epoch": 0.6030670265923841, "grad_norm": 0.46839994192123413, "learning_rate": 0.0001, "loss": 1.9271, "step": 5250 }, { "epoch": 0.6031818965022112, "grad_norm": 0.4195137023925781, "learning_rate": 0.0001, "loss": 1.573, "step": 5251 }, { "epoch": 0.6032967664120383, "grad_norm": 0.4006030261516571, "learning_rate": 0.0001, "loss": 1.509, "step": 5252 }, { "epoch": 0.6034116363218655, "grad_norm": 0.4589271545410156, "learning_rate": 0.0001, "loss": 1.791, "step": 5253 }, { "epoch": 0.6035265062316926, "grad_norm": 0.4122845232486725, "learning_rate": 0.0001, "loss": 1.6732, "step": 5254 }, { "epoch": 0.6036413761415197, "grad_norm": 0.4685841500759125, "learning_rate": 0.0001, "loss": 1.7381, "step": 5255 }, { "epoch": 0.6037562460513468, "grad_norm": 0.455402672290802, "learning_rate": 0.0001, "loss": 1.6299, "step": 5256 }, { "epoch": 0.6038711159611739, "grad_norm": 0.4526597261428833, "learning_rate": 0.0001, "loss": 1.5991, "step": 5257 }, { "epoch": 0.603985985871001, "grad_norm": 0.42606300115585327, "learning_rate": 0.0001, "loss": 1.4687, "step": 5258 }, { "epoch": 0.6041008557808282, "grad_norm": 0.45859774947166443, "learning_rate": 0.0001, "loss": 1.7105, "step": 5259 }, { "epoch": 0.6042157256906553, "grad_norm": 0.4001207947731018, "learning_rate": 0.0001, "loss": 1.446, "step": 5260 }, { "epoch": 0.6043305956004824, "grad_norm": 0.4063998758792877, "learning_rate": 0.0001, "loss": 1.6282, "step": 5261 }, { "epoch": 0.6044454655103095, "grad_norm": 0.4181976616382599, "learning_rate": 0.0001, "loss": 1.5813, "step": 5262 }, { "epoch": 0.6045603354201367, "grad_norm": 0.4905684292316437, "learning_rate": 0.0001, "loss": 1.8801, "step": 5263 }, { "epoch": 0.6046752053299638, "grad_norm": 0.4444495737552643, "learning_rate": 0.0001, "loss": 1.4989, "step": 5264 }, { "epoch": 0.6047900752397909, "grad_norm": 0.4244244694709778, "learning_rate": 0.0001, "loss": 1.5065, "step": 5265 }, { "epoch": 0.604904945149618, "grad_norm": 0.48309603333473206, "learning_rate": 0.0001, "loss": 1.7415, "step": 5266 }, { "epoch": 0.6050198150594451, "grad_norm": 0.4249451756477356, "learning_rate": 0.0001, "loss": 1.6244, "step": 5267 }, { "epoch": 0.6051346849692723, "grad_norm": 0.4714745283126831, "learning_rate": 0.0001, "loss": 1.6689, "step": 5268 }, { "epoch": 0.6052495548790994, "grad_norm": 0.4268541634082794, "learning_rate": 0.0001, "loss": 1.6959, "step": 5269 }, { "epoch": 0.6053644247889265, "grad_norm": 0.4523731768131256, "learning_rate": 0.0001, "loss": 1.5711, "step": 5270 }, { "epoch": 0.6054792946987536, "grad_norm": 0.47980424761772156, "learning_rate": 0.0001, "loss": 1.6036, "step": 5271 }, { "epoch": 0.6055941646085807, "grad_norm": 0.43983185291290283, "learning_rate": 0.0001, "loss": 1.4902, "step": 5272 }, { "epoch": 0.6057090345184079, "grad_norm": 0.41765522956848145, "learning_rate": 0.0001, "loss": 1.5531, "step": 5273 }, { "epoch": 0.605823904428235, "grad_norm": 0.4522044360637665, "learning_rate": 0.0001, "loss": 1.7149, "step": 5274 }, { "epoch": 0.6059387743380621, "grad_norm": 0.4616985321044922, "learning_rate": 0.0001, "loss": 1.714, "step": 5275 }, { "epoch": 0.6060536442478892, "grad_norm": 0.4109836518764496, "learning_rate": 0.0001, "loss": 1.6499, "step": 5276 }, { "epoch": 0.6061685141577163, "grad_norm": 0.4296359717845917, "learning_rate": 0.0001, "loss": 1.507, "step": 5277 }, { "epoch": 0.6062833840675435, "grad_norm": 0.469368577003479, "learning_rate": 0.0001, "loss": 1.7734, "step": 5278 }, { "epoch": 0.6063982539773707, "grad_norm": 0.41937753558158875, "learning_rate": 0.0001, "loss": 1.7647, "step": 5279 }, { "epoch": 0.6065131238871978, "grad_norm": 0.4088084399700165, "learning_rate": 0.0001, "loss": 1.3934, "step": 5280 }, { "epoch": 0.6066279937970249, "grad_norm": 0.4353596568107605, "learning_rate": 0.0001, "loss": 1.6364, "step": 5281 }, { "epoch": 0.606742863706852, "grad_norm": 0.4809739589691162, "learning_rate": 0.0001, "loss": 1.8213, "step": 5282 }, { "epoch": 0.6068577336166792, "grad_norm": 0.4168318808078766, "learning_rate": 0.0001, "loss": 1.5803, "step": 5283 }, { "epoch": 0.6069726035265063, "grad_norm": 0.4438597857952118, "learning_rate": 0.0001, "loss": 1.6453, "step": 5284 }, { "epoch": 0.6070874734363334, "grad_norm": 0.4545292556285858, "learning_rate": 0.0001, "loss": 1.4997, "step": 5285 }, { "epoch": 0.6072023433461605, "grad_norm": 0.4365883469581604, "learning_rate": 0.0001, "loss": 1.4618, "step": 5286 }, { "epoch": 0.6073172132559876, "grad_norm": 0.4614627957344055, "learning_rate": 0.0001, "loss": 1.6091, "step": 5287 }, { "epoch": 0.6074320831658148, "grad_norm": 0.4482710659503937, "learning_rate": 0.0001, "loss": 1.7119, "step": 5288 }, { "epoch": 0.6075469530756419, "grad_norm": 0.404892235994339, "learning_rate": 0.0001, "loss": 1.5626, "step": 5289 }, { "epoch": 0.607661822985469, "grad_norm": 0.4186989367008209, "learning_rate": 0.0001, "loss": 1.6791, "step": 5290 }, { "epoch": 0.6077766928952961, "grad_norm": 0.39953678846359253, "learning_rate": 0.0001, "loss": 1.5322, "step": 5291 }, { "epoch": 0.6078915628051232, "grad_norm": 0.4357908070087433, "learning_rate": 0.0001, "loss": 1.7891, "step": 5292 }, { "epoch": 0.6080064327149504, "grad_norm": 0.4001089334487915, "learning_rate": 0.0001, "loss": 1.489, "step": 5293 }, { "epoch": 0.6081213026247775, "grad_norm": 0.44134122133255005, "learning_rate": 0.0001, "loss": 1.5262, "step": 5294 }, { "epoch": 0.6082361725346046, "grad_norm": 0.4329252243041992, "learning_rate": 0.0001, "loss": 1.5406, "step": 5295 }, { "epoch": 0.6083510424444317, "grad_norm": 0.41654008626937866, "learning_rate": 0.0001, "loss": 1.5814, "step": 5296 }, { "epoch": 0.6084659123542588, "grad_norm": 0.43320518732070923, "learning_rate": 0.0001, "loss": 1.5642, "step": 5297 }, { "epoch": 0.608580782264086, "grad_norm": 0.4640411138534546, "learning_rate": 0.0001, "loss": 1.7619, "step": 5298 }, { "epoch": 0.6086956521739131, "grad_norm": 0.4832577705383301, "learning_rate": 0.0001, "loss": 1.886, "step": 5299 }, { "epoch": 0.6088105220837402, "grad_norm": 0.4550410509109497, "learning_rate": 0.0001, "loss": 1.6412, "step": 5300 }, { "epoch": 0.6089253919935673, "grad_norm": 0.4446395933628082, "learning_rate": 0.0001, "loss": 1.5463, "step": 5301 }, { "epoch": 0.6090402619033944, "grad_norm": 0.44712090492248535, "learning_rate": 0.0001, "loss": 1.7342, "step": 5302 }, { "epoch": 0.6091551318132216, "grad_norm": 0.46963006258010864, "learning_rate": 0.0001, "loss": 1.7253, "step": 5303 }, { "epoch": 0.6092700017230487, "grad_norm": 0.4730952978134155, "learning_rate": 0.0001, "loss": 1.537, "step": 5304 }, { "epoch": 0.6093848716328758, "grad_norm": 0.4162120521068573, "learning_rate": 0.0001, "loss": 1.3652, "step": 5305 }, { "epoch": 0.6094997415427029, "grad_norm": 0.4251774251461029, "learning_rate": 0.0001, "loss": 1.5932, "step": 5306 }, { "epoch": 0.60961461145253, "grad_norm": 0.4354092478752136, "learning_rate": 0.0001, "loss": 1.6629, "step": 5307 }, { "epoch": 0.6097294813623572, "grad_norm": 0.41313931345939636, "learning_rate": 0.0001, "loss": 1.6076, "step": 5308 }, { "epoch": 0.6098443512721843, "grad_norm": 0.47585734724998474, "learning_rate": 0.0001, "loss": 1.6206, "step": 5309 }, { "epoch": 0.6099592211820114, "grad_norm": 0.41451969742774963, "learning_rate": 0.0001, "loss": 1.5368, "step": 5310 }, { "epoch": 0.6100740910918385, "grad_norm": 0.42730268836021423, "learning_rate": 0.0001, "loss": 1.4848, "step": 5311 }, { "epoch": 0.6101889610016656, "grad_norm": 0.4963441789150238, "learning_rate": 0.0001, "loss": 1.719, "step": 5312 }, { "epoch": 0.6103038309114928, "grad_norm": 0.46581605076789856, "learning_rate": 0.0001, "loss": 1.8281, "step": 5313 }, { "epoch": 0.6104187008213199, "grad_norm": 0.4398311972618103, "learning_rate": 0.0001, "loss": 1.6242, "step": 5314 }, { "epoch": 0.610533570731147, "grad_norm": 0.44360533356666565, "learning_rate": 0.0001, "loss": 1.6104, "step": 5315 }, { "epoch": 0.6106484406409741, "grad_norm": 0.4808873236179352, "learning_rate": 0.0001, "loss": 1.7675, "step": 5316 }, { "epoch": 0.6107633105508012, "grad_norm": 0.40180516242980957, "learning_rate": 0.0001, "loss": 1.4326, "step": 5317 }, { "epoch": 0.6108781804606284, "grad_norm": 0.4601239860057831, "learning_rate": 0.0001, "loss": 1.5387, "step": 5318 }, { "epoch": 0.6109930503704555, "grad_norm": 0.4126811921596527, "learning_rate": 0.0001, "loss": 1.5125, "step": 5319 }, { "epoch": 0.6111079202802826, "grad_norm": 0.45912793278694153, "learning_rate": 0.0001, "loss": 1.716, "step": 5320 }, { "epoch": 0.6112227901901097, "grad_norm": 0.43586671352386475, "learning_rate": 0.0001, "loss": 1.6456, "step": 5321 }, { "epoch": 0.6113376600999368, "grad_norm": 0.4181722402572632, "learning_rate": 0.0001, "loss": 1.6628, "step": 5322 }, { "epoch": 0.611452530009764, "grad_norm": 0.4227443039417267, "learning_rate": 0.0001, "loss": 1.4071, "step": 5323 }, { "epoch": 0.6115673999195911, "grad_norm": 0.42024925351142883, "learning_rate": 0.0001, "loss": 1.6544, "step": 5324 }, { "epoch": 0.6116822698294182, "grad_norm": 0.42331016063690186, "learning_rate": 0.0001, "loss": 1.6499, "step": 5325 }, { "epoch": 0.6117971397392453, "grad_norm": 0.4389086663722992, "learning_rate": 0.0001, "loss": 1.5742, "step": 5326 }, { "epoch": 0.6119120096490724, "grad_norm": 0.42871832847595215, "learning_rate": 0.0001, "loss": 1.5348, "step": 5327 }, { "epoch": 0.6120268795588996, "grad_norm": 0.4434123635292053, "learning_rate": 0.0001, "loss": 1.358, "step": 5328 }, { "epoch": 0.6121417494687267, "grad_norm": 0.460252046585083, "learning_rate": 0.0001, "loss": 1.5934, "step": 5329 }, { "epoch": 0.6122566193785538, "grad_norm": 0.4134620428085327, "learning_rate": 0.0001, "loss": 1.5169, "step": 5330 }, { "epoch": 0.6123714892883809, "grad_norm": 0.4522944986820221, "learning_rate": 0.0001, "loss": 1.754, "step": 5331 }, { "epoch": 0.612486359198208, "grad_norm": 0.47153130173683167, "learning_rate": 0.0001, "loss": 1.6896, "step": 5332 }, { "epoch": 0.6126012291080352, "grad_norm": 0.4305824041366577, "learning_rate": 0.0001, "loss": 1.5964, "step": 5333 }, { "epoch": 0.6127160990178623, "grad_norm": 0.4275292158126831, "learning_rate": 0.0001, "loss": 1.6582, "step": 5334 }, { "epoch": 0.6128309689276894, "grad_norm": 0.44580233097076416, "learning_rate": 0.0001, "loss": 1.5851, "step": 5335 }, { "epoch": 0.6129458388375165, "grad_norm": 0.42193603515625, "learning_rate": 0.0001, "loss": 1.5258, "step": 5336 }, { "epoch": 0.6130607087473436, "grad_norm": 0.4233403503894806, "learning_rate": 0.0001, "loss": 1.6786, "step": 5337 }, { "epoch": 0.6131755786571708, "grad_norm": 0.45567867159843445, "learning_rate": 0.0001, "loss": 1.76, "step": 5338 }, { "epoch": 0.6132904485669979, "grad_norm": 0.4384849965572357, "learning_rate": 0.0001, "loss": 1.4506, "step": 5339 }, { "epoch": 0.613405318476825, "grad_norm": 0.442428320646286, "learning_rate": 0.0001, "loss": 1.7631, "step": 5340 }, { "epoch": 0.6135201883866521, "grad_norm": 0.42773962020874023, "learning_rate": 0.0001, "loss": 1.6109, "step": 5341 }, { "epoch": 0.6136350582964792, "grad_norm": 0.4419418275356293, "learning_rate": 0.0001, "loss": 1.6981, "step": 5342 }, { "epoch": 0.6137499282063064, "grad_norm": 0.4556467533111572, "learning_rate": 0.0001, "loss": 1.726, "step": 5343 }, { "epoch": 0.6138647981161335, "grad_norm": 0.41763773560523987, "learning_rate": 0.0001, "loss": 1.7347, "step": 5344 }, { "epoch": 0.6139796680259606, "grad_norm": 0.39312151074409485, "learning_rate": 0.0001, "loss": 1.4951, "step": 5345 }, { "epoch": 0.6140945379357877, "grad_norm": 0.4212815761566162, "learning_rate": 0.0001, "loss": 1.711, "step": 5346 }, { "epoch": 0.6142094078456148, "grad_norm": 0.425071656703949, "learning_rate": 0.0001, "loss": 1.5043, "step": 5347 }, { "epoch": 0.614324277755442, "grad_norm": 0.4074556827545166, "learning_rate": 0.0001, "loss": 1.5206, "step": 5348 }, { "epoch": 0.6144391476652691, "grad_norm": 0.45755401253700256, "learning_rate": 0.0001, "loss": 1.6133, "step": 5349 }, { "epoch": 0.6145540175750962, "grad_norm": 0.43361806869506836, "learning_rate": 0.0001, "loss": 1.6676, "step": 5350 }, { "epoch": 0.6146688874849233, "grad_norm": 0.45359665155410767, "learning_rate": 0.0001, "loss": 1.7329, "step": 5351 }, { "epoch": 0.6147837573947504, "grad_norm": 0.4411814510822296, "learning_rate": 0.0001, "loss": 1.5939, "step": 5352 }, { "epoch": 0.6148986273045776, "grad_norm": 0.4266383647918701, "learning_rate": 0.0001, "loss": 1.6575, "step": 5353 }, { "epoch": 0.6150134972144047, "grad_norm": 0.4333922564983368, "learning_rate": 0.0001, "loss": 1.7112, "step": 5354 }, { "epoch": 0.6151283671242318, "grad_norm": 0.48001882433891296, "learning_rate": 0.0001, "loss": 1.8559, "step": 5355 }, { "epoch": 0.6152432370340589, "grad_norm": 0.43073657155036926, "learning_rate": 0.0001, "loss": 1.6248, "step": 5356 }, { "epoch": 0.615358106943886, "grad_norm": 0.42971518635749817, "learning_rate": 0.0001, "loss": 1.6327, "step": 5357 }, { "epoch": 0.6154729768537132, "grad_norm": 0.41858428716659546, "learning_rate": 0.0001, "loss": 1.7038, "step": 5358 }, { "epoch": 0.6155878467635403, "grad_norm": 0.4066700041294098, "learning_rate": 0.0001, "loss": 1.5025, "step": 5359 }, { "epoch": 0.6157027166733674, "grad_norm": 0.42960700392723083, "learning_rate": 0.0001, "loss": 1.7926, "step": 5360 }, { "epoch": 0.6158175865831945, "grad_norm": 0.425981342792511, "learning_rate": 0.0001, "loss": 1.5527, "step": 5361 }, { "epoch": 0.6159324564930216, "grad_norm": 0.44844427704811096, "learning_rate": 0.0001, "loss": 1.7161, "step": 5362 }, { "epoch": 0.6160473264028488, "grad_norm": 0.44269078969955444, "learning_rate": 0.0001, "loss": 1.6469, "step": 5363 }, { "epoch": 0.6161621963126759, "grad_norm": 0.4440799951553345, "learning_rate": 0.0001, "loss": 1.5691, "step": 5364 }, { "epoch": 0.616277066222503, "grad_norm": 0.46443289518356323, "learning_rate": 0.0001, "loss": 1.7149, "step": 5365 }, { "epoch": 0.6163919361323301, "grad_norm": 0.43236956000328064, "learning_rate": 0.0001, "loss": 1.4569, "step": 5366 }, { "epoch": 0.6165068060421572, "grad_norm": 0.4475458860397339, "learning_rate": 0.0001, "loss": 1.6878, "step": 5367 }, { "epoch": 0.6166216759519844, "grad_norm": 0.4264468252658844, "learning_rate": 0.0001, "loss": 1.6352, "step": 5368 }, { "epoch": 0.6167365458618115, "grad_norm": 0.4327240586280823, "learning_rate": 0.0001, "loss": 1.6209, "step": 5369 }, { "epoch": 0.6168514157716386, "grad_norm": 0.43093448877334595, "learning_rate": 0.0001, "loss": 1.637, "step": 5370 }, { "epoch": 0.6169662856814657, "grad_norm": 0.4652967154979706, "learning_rate": 0.0001, "loss": 1.8944, "step": 5371 }, { "epoch": 0.6170811555912928, "grad_norm": 0.40782034397125244, "learning_rate": 0.0001, "loss": 1.4373, "step": 5372 }, { "epoch": 0.61719602550112, "grad_norm": 0.3958606421947479, "learning_rate": 0.0001, "loss": 1.4501, "step": 5373 }, { "epoch": 0.6173108954109471, "grad_norm": 0.4088459312915802, "learning_rate": 0.0001, "loss": 1.5566, "step": 5374 }, { "epoch": 0.6174257653207742, "grad_norm": 0.42610761523246765, "learning_rate": 0.0001, "loss": 1.7281, "step": 5375 }, { "epoch": 0.6175406352306013, "grad_norm": 0.4308398365974426, "learning_rate": 0.0001, "loss": 1.5579, "step": 5376 }, { "epoch": 0.6176555051404284, "grad_norm": 0.4212062358856201, "learning_rate": 0.0001, "loss": 1.5424, "step": 5377 }, { "epoch": 0.6177703750502556, "grad_norm": 0.4277915358543396, "learning_rate": 0.0001, "loss": 1.4436, "step": 5378 }, { "epoch": 0.6178852449600827, "grad_norm": 0.45164668560028076, "learning_rate": 0.0001, "loss": 1.3541, "step": 5379 }, { "epoch": 0.6180001148699098, "grad_norm": 0.46653538942337036, "learning_rate": 0.0001, "loss": 1.7403, "step": 5380 }, { "epoch": 0.6181149847797369, "grad_norm": 0.4493342936038971, "learning_rate": 0.0001, "loss": 1.6205, "step": 5381 }, { "epoch": 0.618229854689564, "grad_norm": 0.46748727560043335, "learning_rate": 0.0001, "loss": 1.7097, "step": 5382 }, { "epoch": 0.6183447245993912, "grad_norm": 0.46008816361427307, "learning_rate": 0.0001, "loss": 1.7876, "step": 5383 }, { "epoch": 0.6184595945092183, "grad_norm": 0.4166385531425476, "learning_rate": 0.0001, "loss": 1.666, "step": 5384 }, { "epoch": 0.6185744644190454, "grad_norm": 0.46595972776412964, "learning_rate": 0.0001, "loss": 1.7317, "step": 5385 }, { "epoch": 0.6186893343288725, "grad_norm": 0.41361644864082336, "learning_rate": 0.0001, "loss": 1.6432, "step": 5386 }, { "epoch": 0.6188042042386996, "grad_norm": 0.4521470069885254, "learning_rate": 0.0001, "loss": 1.7181, "step": 5387 }, { "epoch": 0.6189190741485268, "grad_norm": 0.4314734637737274, "learning_rate": 0.0001, "loss": 1.7027, "step": 5388 }, { "epoch": 0.6190339440583539, "grad_norm": 0.49784350395202637, "learning_rate": 0.0001, "loss": 1.473, "step": 5389 }, { "epoch": 0.619148813968181, "grad_norm": 0.4353158473968506, "learning_rate": 0.0001, "loss": 1.5425, "step": 5390 }, { "epoch": 0.6192636838780081, "grad_norm": 0.405387818813324, "learning_rate": 0.0001, "loss": 1.5862, "step": 5391 }, { "epoch": 0.6193785537878352, "grad_norm": 0.42280814051628113, "learning_rate": 0.0001, "loss": 1.6584, "step": 5392 }, { "epoch": 0.6194934236976624, "grad_norm": 0.42651262879371643, "learning_rate": 0.0001, "loss": 1.6186, "step": 5393 }, { "epoch": 0.6196082936074895, "grad_norm": 0.4402959644794464, "learning_rate": 0.0001, "loss": 1.5753, "step": 5394 }, { "epoch": 0.6197231635173166, "grad_norm": 0.41979432106018066, "learning_rate": 0.0001, "loss": 1.5584, "step": 5395 }, { "epoch": 0.6198380334271437, "grad_norm": 0.40594765543937683, "learning_rate": 0.0001, "loss": 1.5995, "step": 5396 }, { "epoch": 0.6199529033369708, "grad_norm": 0.4297271966934204, "learning_rate": 0.0001, "loss": 1.6042, "step": 5397 }, { "epoch": 0.620067773246798, "grad_norm": 0.47684258222579956, "learning_rate": 0.0001, "loss": 1.7715, "step": 5398 }, { "epoch": 0.6201826431566251, "grad_norm": 0.44073134660720825, "learning_rate": 0.0001, "loss": 1.6191, "step": 5399 }, { "epoch": 0.6202975130664522, "grad_norm": 0.4722387194633484, "learning_rate": 0.0001, "loss": 1.6578, "step": 5400 }, { "epoch": 0.6204123829762793, "grad_norm": 0.4637337327003479, "learning_rate": 0.0001, "loss": 1.6055, "step": 5401 }, { "epoch": 0.6205272528861064, "grad_norm": 0.43157336115837097, "learning_rate": 0.0001, "loss": 1.7081, "step": 5402 }, { "epoch": 0.6206421227959336, "grad_norm": 0.4264506995677948, "learning_rate": 0.0001, "loss": 1.5326, "step": 5403 }, { "epoch": 0.6207569927057607, "grad_norm": 0.42598217725753784, "learning_rate": 0.0001, "loss": 1.6313, "step": 5404 }, { "epoch": 0.6208718626155878, "grad_norm": 0.4123907685279846, "learning_rate": 0.0001, "loss": 1.4996, "step": 5405 }, { "epoch": 0.6209867325254149, "grad_norm": 0.4402943551540375, "learning_rate": 0.0001, "loss": 1.7162, "step": 5406 }, { "epoch": 0.621101602435242, "grad_norm": 0.4304845929145813, "learning_rate": 0.0001, "loss": 1.7301, "step": 5407 }, { "epoch": 0.6212164723450692, "grad_norm": 0.43135395646095276, "learning_rate": 0.0001, "loss": 1.6792, "step": 5408 }, { "epoch": 0.6213313422548963, "grad_norm": 0.42243319749832153, "learning_rate": 0.0001, "loss": 1.548, "step": 5409 }, { "epoch": 0.6214462121647234, "grad_norm": 0.4920620024204254, "learning_rate": 0.0001, "loss": 1.7013, "step": 5410 }, { "epoch": 0.6215610820745505, "grad_norm": 0.41864511370658875, "learning_rate": 0.0001, "loss": 1.6426, "step": 5411 }, { "epoch": 0.6216759519843776, "grad_norm": 0.4509679675102234, "learning_rate": 0.0001, "loss": 1.7161, "step": 5412 }, { "epoch": 0.6217908218942048, "grad_norm": 0.4350128769874573, "learning_rate": 0.0001, "loss": 1.7858, "step": 5413 }, { "epoch": 0.6219056918040319, "grad_norm": 0.4748633801937103, "learning_rate": 0.0001, "loss": 1.7502, "step": 5414 }, { "epoch": 0.622020561713859, "grad_norm": 0.47240012884140015, "learning_rate": 0.0001, "loss": 1.88, "step": 5415 }, { "epoch": 0.6221354316236862, "grad_norm": 0.42443564534187317, "learning_rate": 0.0001, "loss": 1.6455, "step": 5416 }, { "epoch": 0.6222503015335134, "grad_norm": 0.4257674217224121, "learning_rate": 0.0001, "loss": 1.726, "step": 5417 }, { "epoch": 0.6223651714433405, "grad_norm": 0.4025082588195801, "learning_rate": 0.0001, "loss": 1.5188, "step": 5418 }, { "epoch": 0.6224800413531676, "grad_norm": 0.4154866337776184, "learning_rate": 0.0001, "loss": 1.6686, "step": 5419 }, { "epoch": 0.6225949112629947, "grad_norm": 0.41281425952911377, "learning_rate": 0.0001, "loss": 1.5721, "step": 5420 }, { "epoch": 0.6227097811728218, "grad_norm": 0.44021958112716675, "learning_rate": 0.0001, "loss": 1.6801, "step": 5421 }, { "epoch": 0.622824651082649, "grad_norm": 0.4224892556667328, "learning_rate": 0.0001, "loss": 1.7219, "step": 5422 }, { "epoch": 0.6229395209924761, "grad_norm": 0.44819357991218567, "learning_rate": 0.0001, "loss": 1.6665, "step": 5423 }, { "epoch": 0.6230543909023032, "grad_norm": 0.4217240512371063, "learning_rate": 0.0001, "loss": 1.5709, "step": 5424 }, { "epoch": 0.6231692608121303, "grad_norm": 0.45825833082199097, "learning_rate": 0.0001, "loss": 1.6292, "step": 5425 }, { "epoch": 0.6232841307219574, "grad_norm": 0.427919864654541, "learning_rate": 0.0001, "loss": 1.6708, "step": 5426 }, { "epoch": 0.6233990006317846, "grad_norm": 0.42998164892196655, "learning_rate": 0.0001, "loss": 1.4521, "step": 5427 }, { "epoch": 0.6235138705416117, "grad_norm": 0.4373745024204254, "learning_rate": 0.0001, "loss": 1.6303, "step": 5428 }, { "epoch": 0.6236287404514388, "grad_norm": 0.4681239426136017, "learning_rate": 0.0001, "loss": 1.7811, "step": 5429 }, { "epoch": 0.6237436103612659, "grad_norm": 0.4634624719619751, "learning_rate": 0.0001, "loss": 1.8239, "step": 5430 }, { "epoch": 0.623858480271093, "grad_norm": 0.4383297562599182, "learning_rate": 0.0001, "loss": 1.5753, "step": 5431 }, { "epoch": 0.6239733501809201, "grad_norm": 0.46512508392333984, "learning_rate": 0.0001, "loss": 1.8332, "step": 5432 }, { "epoch": 0.6240882200907473, "grad_norm": 0.4566185474395752, "learning_rate": 0.0001, "loss": 1.6087, "step": 5433 }, { "epoch": 0.6242030900005744, "grad_norm": 0.3922889828681946, "learning_rate": 0.0001, "loss": 1.525, "step": 5434 }, { "epoch": 0.6243179599104015, "grad_norm": 0.4435923993587494, "learning_rate": 0.0001, "loss": 1.7022, "step": 5435 }, { "epoch": 0.6244328298202286, "grad_norm": 0.39931339025497437, "learning_rate": 0.0001, "loss": 1.4173, "step": 5436 }, { "epoch": 0.6245476997300557, "grad_norm": 0.44321221113204956, "learning_rate": 0.0001, "loss": 1.6534, "step": 5437 }, { "epoch": 0.6246625696398829, "grad_norm": 0.40193137526512146, "learning_rate": 0.0001, "loss": 1.5338, "step": 5438 }, { "epoch": 0.62477743954971, "grad_norm": 0.45055443048477173, "learning_rate": 0.0001, "loss": 1.6323, "step": 5439 }, { "epoch": 0.6248923094595371, "grad_norm": 0.38443759083747864, "learning_rate": 0.0001, "loss": 1.4288, "step": 5440 }, { "epoch": 0.6250071793693642, "grad_norm": 0.47293904423713684, "learning_rate": 0.0001, "loss": 1.7437, "step": 5441 }, { "epoch": 0.6251220492791913, "grad_norm": 0.4477282464504242, "learning_rate": 0.0001, "loss": 1.709, "step": 5442 }, { "epoch": 0.6252369191890185, "grad_norm": 0.4328649938106537, "learning_rate": 0.0001, "loss": 1.4047, "step": 5443 }, { "epoch": 0.6253517890988456, "grad_norm": 0.4671032428741455, "learning_rate": 0.0001, "loss": 1.6841, "step": 5444 }, { "epoch": 0.6254666590086727, "grad_norm": 0.4175901412963867, "learning_rate": 0.0001, "loss": 1.5299, "step": 5445 }, { "epoch": 0.6255815289184998, "grad_norm": 0.45132726430892944, "learning_rate": 0.0001, "loss": 1.5784, "step": 5446 }, { "epoch": 0.625696398828327, "grad_norm": 0.45002493262290955, "learning_rate": 0.0001, "loss": 1.6631, "step": 5447 }, { "epoch": 0.6258112687381541, "grad_norm": 0.49596941471099854, "learning_rate": 0.0001, "loss": 1.7334, "step": 5448 }, { "epoch": 0.6259261386479812, "grad_norm": 0.5087515711784363, "learning_rate": 0.0001, "loss": 1.7773, "step": 5449 }, { "epoch": 0.6260410085578083, "grad_norm": 0.43160462379455566, "learning_rate": 0.0001, "loss": 1.5872, "step": 5450 }, { "epoch": 0.6261558784676354, "grad_norm": 0.41861292719841003, "learning_rate": 0.0001, "loss": 1.4826, "step": 5451 }, { "epoch": 0.6262707483774625, "grad_norm": 0.3896733820438385, "learning_rate": 0.0001, "loss": 1.4015, "step": 5452 }, { "epoch": 0.6263856182872897, "grad_norm": 0.4674106538295746, "learning_rate": 0.0001, "loss": 1.6152, "step": 5453 }, { "epoch": 0.6265004881971168, "grad_norm": 0.4450713098049164, "learning_rate": 0.0001, "loss": 1.5596, "step": 5454 }, { "epoch": 0.6266153581069439, "grad_norm": 0.5066030621528625, "learning_rate": 0.0001, "loss": 1.7224, "step": 5455 }, { "epoch": 0.626730228016771, "grad_norm": 0.41346675157546997, "learning_rate": 0.0001, "loss": 1.6268, "step": 5456 }, { "epoch": 0.6268450979265981, "grad_norm": 0.45128896832466125, "learning_rate": 0.0001, "loss": 1.5715, "step": 5457 }, { "epoch": 0.6269599678364253, "grad_norm": 0.42013174295425415, "learning_rate": 0.0001, "loss": 1.7011, "step": 5458 }, { "epoch": 0.6270748377462524, "grad_norm": 0.4740227460861206, "learning_rate": 0.0001, "loss": 1.6902, "step": 5459 }, { "epoch": 0.6271897076560795, "grad_norm": 0.4719151258468628, "learning_rate": 0.0001, "loss": 1.905, "step": 5460 }, { "epoch": 0.6273045775659066, "grad_norm": 0.46546176075935364, "learning_rate": 0.0001, "loss": 1.6859, "step": 5461 }, { "epoch": 0.6274194474757337, "grad_norm": 0.44801339507102966, "learning_rate": 0.0001, "loss": 1.606, "step": 5462 }, { "epoch": 0.6275343173855609, "grad_norm": 0.49131953716278076, "learning_rate": 0.0001, "loss": 1.6653, "step": 5463 }, { "epoch": 0.627649187295388, "grad_norm": 0.4395316243171692, "learning_rate": 0.0001, "loss": 1.5935, "step": 5464 }, { "epoch": 0.6277640572052151, "grad_norm": 0.43068498373031616, "learning_rate": 0.0001, "loss": 1.6478, "step": 5465 }, { "epoch": 0.6278789271150422, "grad_norm": 0.41301921010017395, "learning_rate": 0.0001, "loss": 1.41, "step": 5466 }, { "epoch": 0.6279937970248693, "grad_norm": 0.44340062141418457, "learning_rate": 0.0001, "loss": 1.7067, "step": 5467 }, { "epoch": 0.6281086669346965, "grad_norm": 0.43528372049331665, "learning_rate": 0.0001, "loss": 1.5209, "step": 5468 }, { "epoch": 0.6282235368445236, "grad_norm": 0.4367297291755676, "learning_rate": 0.0001, "loss": 1.574, "step": 5469 }, { "epoch": 0.6283384067543507, "grad_norm": 0.4735720455646515, "learning_rate": 0.0001, "loss": 1.6138, "step": 5470 }, { "epoch": 0.6284532766641778, "grad_norm": 0.43283382058143616, "learning_rate": 0.0001, "loss": 1.6992, "step": 5471 }, { "epoch": 0.628568146574005, "grad_norm": 0.446654349565506, "learning_rate": 0.0001, "loss": 1.4944, "step": 5472 }, { "epoch": 0.6286830164838321, "grad_norm": 0.4372828006744385, "learning_rate": 0.0001, "loss": 1.4353, "step": 5473 }, { "epoch": 0.6287978863936592, "grad_norm": 0.4139867424964905, "learning_rate": 0.0001, "loss": 1.5969, "step": 5474 }, { "epoch": 0.6289127563034863, "grad_norm": 0.4681108891963959, "learning_rate": 0.0001, "loss": 1.7943, "step": 5475 }, { "epoch": 0.6290276262133134, "grad_norm": 0.42920827865600586, "learning_rate": 0.0001, "loss": 1.5649, "step": 5476 }, { "epoch": 0.6291424961231405, "grad_norm": 0.42748481035232544, "learning_rate": 0.0001, "loss": 1.5491, "step": 5477 }, { "epoch": 0.6292573660329677, "grad_norm": 0.4460189938545227, "learning_rate": 0.0001, "loss": 1.5892, "step": 5478 }, { "epoch": 0.6293722359427948, "grad_norm": 0.43394696712493896, "learning_rate": 0.0001, "loss": 1.5284, "step": 5479 }, { "epoch": 0.6294871058526219, "grad_norm": 0.429083913564682, "learning_rate": 0.0001, "loss": 1.6287, "step": 5480 }, { "epoch": 0.629601975762449, "grad_norm": 0.4215359687805176, "learning_rate": 0.0001, "loss": 1.5587, "step": 5481 }, { "epoch": 0.6297168456722761, "grad_norm": 0.45450299978256226, "learning_rate": 0.0001, "loss": 1.3634, "step": 5482 }, { "epoch": 0.6298317155821033, "grad_norm": 0.4605744481086731, "learning_rate": 0.0001, "loss": 1.7417, "step": 5483 }, { "epoch": 0.6299465854919304, "grad_norm": 0.4325323700904846, "learning_rate": 0.0001, "loss": 1.4853, "step": 5484 }, { "epoch": 0.6300614554017575, "grad_norm": 0.441438764333725, "learning_rate": 0.0001, "loss": 1.5838, "step": 5485 }, { "epoch": 0.6301763253115846, "grad_norm": 0.5233652591705322, "learning_rate": 0.0001, "loss": 1.9394, "step": 5486 }, { "epoch": 0.6302911952214117, "grad_norm": 0.44866594672203064, "learning_rate": 0.0001, "loss": 1.8056, "step": 5487 }, { "epoch": 0.6304060651312389, "grad_norm": 0.4310179352760315, "learning_rate": 0.0001, "loss": 1.6206, "step": 5488 }, { "epoch": 0.630520935041066, "grad_norm": 0.46676987409591675, "learning_rate": 0.0001, "loss": 1.7363, "step": 5489 }, { "epoch": 0.6306358049508931, "grad_norm": 0.45726069808006287, "learning_rate": 0.0001, "loss": 1.5432, "step": 5490 }, { "epoch": 0.6307506748607202, "grad_norm": 0.46568796038627625, "learning_rate": 0.0001, "loss": 1.772, "step": 5491 }, { "epoch": 0.6308655447705473, "grad_norm": 0.43564847111701965, "learning_rate": 0.0001, "loss": 1.7335, "step": 5492 }, { "epoch": 0.6309804146803745, "grad_norm": 0.43100833892822266, "learning_rate": 0.0001, "loss": 1.5743, "step": 5493 }, { "epoch": 0.6310952845902016, "grad_norm": 0.48715853691101074, "learning_rate": 0.0001, "loss": 1.6999, "step": 5494 }, { "epoch": 0.6312101545000287, "grad_norm": 0.4202541708946228, "learning_rate": 0.0001, "loss": 1.4744, "step": 5495 }, { "epoch": 0.6313250244098558, "grad_norm": 0.49194201827049255, "learning_rate": 0.0001, "loss": 1.6739, "step": 5496 }, { "epoch": 0.631439894319683, "grad_norm": 0.4225814938545227, "learning_rate": 0.0001, "loss": 1.6509, "step": 5497 }, { "epoch": 0.6315547642295101, "grad_norm": 0.41638097167015076, "learning_rate": 0.0001, "loss": 1.6172, "step": 5498 }, { "epoch": 0.6316696341393372, "grad_norm": 0.44772642850875854, "learning_rate": 0.0001, "loss": 1.8187, "step": 5499 }, { "epoch": 0.6317845040491643, "grad_norm": 0.4238649904727936, "learning_rate": 0.0001, "loss": 1.6478, "step": 5500 }, { "epoch": 0.6318993739589914, "grad_norm": 0.4199509918689728, "learning_rate": 0.0001, "loss": 1.7285, "step": 5501 }, { "epoch": 0.6320142438688185, "grad_norm": 0.4321916699409485, "learning_rate": 0.0001, "loss": 1.6335, "step": 5502 }, { "epoch": 0.6321291137786457, "grad_norm": 0.4737303853034973, "learning_rate": 0.0001, "loss": 1.4796, "step": 5503 }, { "epoch": 0.6322439836884728, "grad_norm": 0.450839102268219, "learning_rate": 0.0001, "loss": 1.5807, "step": 5504 }, { "epoch": 0.6323588535982999, "grad_norm": 0.4439679682254791, "learning_rate": 0.0001, "loss": 1.6076, "step": 5505 }, { "epoch": 0.632473723508127, "grad_norm": 0.438607782125473, "learning_rate": 0.0001, "loss": 1.6273, "step": 5506 }, { "epoch": 0.6325885934179541, "grad_norm": 0.44356659054756165, "learning_rate": 0.0001, "loss": 1.6346, "step": 5507 }, { "epoch": 0.6327034633277813, "grad_norm": 0.47536197304725647, "learning_rate": 0.0001, "loss": 1.673, "step": 5508 }, { "epoch": 0.6328183332376084, "grad_norm": 0.4841341972351074, "learning_rate": 0.0001, "loss": 1.627, "step": 5509 }, { "epoch": 0.6329332031474355, "grad_norm": 0.4180241823196411, "learning_rate": 0.0001, "loss": 1.4014, "step": 5510 }, { "epoch": 0.6330480730572626, "grad_norm": 0.46159014105796814, "learning_rate": 0.0001, "loss": 1.7947, "step": 5511 }, { "epoch": 0.6331629429670897, "grad_norm": 0.43560919165611267, "learning_rate": 0.0001, "loss": 1.6293, "step": 5512 }, { "epoch": 0.6332778128769169, "grad_norm": 0.41563135385513306, "learning_rate": 0.0001, "loss": 1.548, "step": 5513 }, { "epoch": 0.633392682786744, "grad_norm": 0.4384070634841919, "learning_rate": 0.0001, "loss": 1.6485, "step": 5514 }, { "epoch": 0.6335075526965711, "grad_norm": 0.45419734716415405, "learning_rate": 0.0001, "loss": 1.5951, "step": 5515 }, { "epoch": 0.6336224226063982, "grad_norm": 0.438769668340683, "learning_rate": 0.0001, "loss": 1.5075, "step": 5516 }, { "epoch": 0.6337372925162253, "grad_norm": 0.4500299394130707, "learning_rate": 0.0001, "loss": 1.7686, "step": 5517 }, { "epoch": 0.6338521624260525, "grad_norm": 0.543213963508606, "learning_rate": 0.0001, "loss": 1.7704, "step": 5518 }, { "epoch": 0.6339670323358796, "grad_norm": 0.4491806924343109, "learning_rate": 0.0001, "loss": 1.7672, "step": 5519 }, { "epoch": 0.6340819022457067, "grad_norm": 0.4237130284309387, "learning_rate": 0.0001, "loss": 1.5714, "step": 5520 }, { "epoch": 0.6341967721555338, "grad_norm": 0.4394038915634155, "learning_rate": 0.0001, "loss": 1.5889, "step": 5521 }, { "epoch": 0.634311642065361, "grad_norm": 0.4348239302635193, "learning_rate": 0.0001, "loss": 1.6972, "step": 5522 }, { "epoch": 0.6344265119751881, "grad_norm": 0.45309606194496155, "learning_rate": 0.0001, "loss": 1.6889, "step": 5523 }, { "epoch": 0.6345413818850152, "grad_norm": 0.40822720527648926, "learning_rate": 0.0001, "loss": 1.5377, "step": 5524 }, { "epoch": 0.6346562517948423, "grad_norm": 0.48253944516181946, "learning_rate": 0.0001, "loss": 1.727, "step": 5525 }, { "epoch": 0.6347711217046694, "grad_norm": 0.4541143476963043, "learning_rate": 0.0001, "loss": 1.8615, "step": 5526 }, { "epoch": 0.6348859916144965, "grad_norm": 0.4158259630203247, "learning_rate": 0.0001, "loss": 1.4537, "step": 5527 }, { "epoch": 0.6350008615243237, "grad_norm": 0.41893231868743896, "learning_rate": 0.0001, "loss": 1.6787, "step": 5528 }, { "epoch": 0.6351157314341508, "grad_norm": 0.4827512502670288, "learning_rate": 0.0001, "loss": 1.7608, "step": 5529 }, { "epoch": 0.6352306013439779, "grad_norm": 0.44435709714889526, "learning_rate": 0.0001, "loss": 1.6079, "step": 5530 }, { "epoch": 0.635345471253805, "grad_norm": 0.4805011451244354, "learning_rate": 0.0001, "loss": 1.7446, "step": 5531 }, { "epoch": 0.6354603411636321, "grad_norm": 0.43575429916381836, "learning_rate": 0.0001, "loss": 1.6265, "step": 5532 }, { "epoch": 0.6355752110734593, "grad_norm": 0.4281097650527954, "learning_rate": 0.0001, "loss": 1.7776, "step": 5533 }, { "epoch": 0.6356900809832864, "grad_norm": 0.41298815608024597, "learning_rate": 0.0001, "loss": 1.5776, "step": 5534 }, { "epoch": 0.6358049508931135, "grad_norm": 0.43981999158859253, "learning_rate": 0.0001, "loss": 1.6219, "step": 5535 }, { "epoch": 0.6359198208029406, "grad_norm": 0.46365123987197876, "learning_rate": 0.0001, "loss": 1.7387, "step": 5536 }, { "epoch": 0.6360346907127677, "grad_norm": 0.40434473752975464, "learning_rate": 0.0001, "loss": 1.5259, "step": 5537 }, { "epoch": 0.6361495606225949, "grad_norm": 0.42286887764930725, "learning_rate": 0.0001, "loss": 1.4093, "step": 5538 }, { "epoch": 0.636264430532422, "grad_norm": 0.4497700035572052, "learning_rate": 0.0001, "loss": 1.8652, "step": 5539 }, { "epoch": 0.6363793004422491, "grad_norm": 0.4267425537109375, "learning_rate": 0.0001, "loss": 1.704, "step": 5540 }, { "epoch": 0.6364941703520762, "grad_norm": 0.448066383600235, "learning_rate": 0.0001, "loss": 1.5721, "step": 5541 }, { "epoch": 0.6366090402619033, "grad_norm": 0.46805888414382935, "learning_rate": 0.0001, "loss": 1.7208, "step": 5542 }, { "epoch": 0.6367239101717305, "grad_norm": 0.42206695675849915, "learning_rate": 0.0001, "loss": 1.3684, "step": 5543 }, { "epoch": 0.6368387800815576, "grad_norm": 0.39510831236839294, "learning_rate": 0.0001, "loss": 1.6721, "step": 5544 }, { "epoch": 0.6369536499913847, "grad_norm": 0.4754282236099243, "learning_rate": 0.0001, "loss": 1.8412, "step": 5545 }, { "epoch": 0.6370685199012118, "grad_norm": 0.42278730869293213, "learning_rate": 0.0001, "loss": 1.4947, "step": 5546 }, { "epoch": 0.637183389811039, "grad_norm": 0.4810383915901184, "learning_rate": 0.0001, "loss": 1.6435, "step": 5547 }, { "epoch": 0.6372982597208661, "grad_norm": 0.4392484128475189, "learning_rate": 0.0001, "loss": 1.4485, "step": 5548 }, { "epoch": 0.6374131296306932, "grad_norm": 0.46849682927131653, "learning_rate": 0.0001, "loss": 1.7611, "step": 5549 }, { "epoch": 0.6375279995405203, "grad_norm": 0.428786039352417, "learning_rate": 0.0001, "loss": 1.7185, "step": 5550 }, { "epoch": 0.6376428694503474, "grad_norm": 0.41656240820884705, "learning_rate": 0.0001, "loss": 1.5787, "step": 5551 }, { "epoch": 0.6377577393601745, "grad_norm": 0.447986364364624, "learning_rate": 0.0001, "loss": 1.6424, "step": 5552 }, { "epoch": 0.6378726092700018, "grad_norm": 0.4375683665275574, "learning_rate": 0.0001, "loss": 1.7039, "step": 5553 }, { "epoch": 0.6379874791798289, "grad_norm": 0.4190763831138611, "learning_rate": 0.0001, "loss": 1.5491, "step": 5554 }, { "epoch": 0.638102349089656, "grad_norm": 0.42789924144744873, "learning_rate": 0.0001, "loss": 1.6198, "step": 5555 }, { "epoch": 0.6382172189994831, "grad_norm": 0.42377930879592896, "learning_rate": 0.0001, "loss": 1.6668, "step": 5556 }, { "epoch": 0.6383320889093103, "grad_norm": 0.43022698163986206, "learning_rate": 0.0001, "loss": 1.5902, "step": 5557 }, { "epoch": 0.6384469588191374, "grad_norm": 0.40309348702430725, "learning_rate": 0.0001, "loss": 1.4805, "step": 5558 }, { "epoch": 0.6385618287289645, "grad_norm": 0.4346220791339874, "learning_rate": 0.0001, "loss": 1.598, "step": 5559 }, { "epoch": 0.6386766986387916, "grad_norm": 0.45233309268951416, "learning_rate": 0.0001, "loss": 1.685, "step": 5560 }, { "epoch": 0.6387915685486187, "grad_norm": 0.44141674041748047, "learning_rate": 0.0001, "loss": 1.6379, "step": 5561 }, { "epoch": 0.6389064384584459, "grad_norm": 0.45340585708618164, "learning_rate": 0.0001, "loss": 1.7477, "step": 5562 }, { "epoch": 0.639021308368273, "grad_norm": 0.46206724643707275, "learning_rate": 0.0001, "loss": 1.8006, "step": 5563 }, { "epoch": 0.6391361782781001, "grad_norm": 0.42310798168182373, "learning_rate": 0.0001, "loss": 1.7606, "step": 5564 }, { "epoch": 0.6392510481879272, "grad_norm": 0.40606430172920227, "learning_rate": 0.0001, "loss": 1.4893, "step": 5565 }, { "epoch": 0.6393659180977543, "grad_norm": 0.4227276146411896, "learning_rate": 0.0001, "loss": 1.5051, "step": 5566 }, { "epoch": 0.6394807880075815, "grad_norm": 0.42999976873397827, "learning_rate": 0.0001, "loss": 1.6919, "step": 5567 }, { "epoch": 0.6395956579174086, "grad_norm": 0.4548937678337097, "learning_rate": 0.0001, "loss": 1.606, "step": 5568 }, { "epoch": 0.6397105278272357, "grad_norm": 0.4163641333580017, "learning_rate": 0.0001, "loss": 1.4716, "step": 5569 }, { "epoch": 0.6398253977370628, "grad_norm": 0.41462451219558716, "learning_rate": 0.0001, "loss": 1.674, "step": 5570 }, { "epoch": 0.6399402676468899, "grad_norm": 0.4123389422893524, "learning_rate": 0.0001, "loss": 1.5769, "step": 5571 }, { "epoch": 0.6400551375567171, "grad_norm": 0.447255939245224, "learning_rate": 0.0001, "loss": 1.6402, "step": 5572 }, { "epoch": 0.6401700074665442, "grad_norm": 0.46185576915740967, "learning_rate": 0.0001, "loss": 1.5632, "step": 5573 }, { "epoch": 0.6402848773763713, "grad_norm": 0.46707770228385925, "learning_rate": 0.0001, "loss": 1.7298, "step": 5574 }, { "epoch": 0.6403997472861984, "grad_norm": 0.4091823399066925, "learning_rate": 0.0001, "loss": 1.4606, "step": 5575 }, { "epoch": 0.6405146171960255, "grad_norm": 0.45736071467399597, "learning_rate": 0.0001, "loss": 1.764, "step": 5576 }, { "epoch": 0.6406294871058527, "grad_norm": 0.4426022469997406, "learning_rate": 0.0001, "loss": 1.6942, "step": 5577 }, { "epoch": 0.6407443570156798, "grad_norm": 0.42908066511154175, "learning_rate": 0.0001, "loss": 1.6928, "step": 5578 }, { "epoch": 0.6408592269255069, "grad_norm": 0.4217868149280548, "learning_rate": 0.0001, "loss": 1.5034, "step": 5579 }, { "epoch": 0.640974096835334, "grad_norm": 0.4545440673828125, "learning_rate": 0.0001, "loss": 1.7952, "step": 5580 }, { "epoch": 0.6410889667451611, "grad_norm": 0.42396777868270874, "learning_rate": 0.0001, "loss": 1.54, "step": 5581 }, { "epoch": 0.6412038366549883, "grad_norm": 0.43753355741500854, "learning_rate": 0.0001, "loss": 1.5584, "step": 5582 }, { "epoch": 0.6413187065648154, "grad_norm": 0.46905073523521423, "learning_rate": 0.0001, "loss": 1.5581, "step": 5583 }, { "epoch": 0.6414335764746425, "grad_norm": 0.4515920877456665, "learning_rate": 0.0001, "loss": 1.4264, "step": 5584 }, { "epoch": 0.6415484463844696, "grad_norm": 0.434110552072525, "learning_rate": 0.0001, "loss": 1.6005, "step": 5585 }, { "epoch": 0.6416633162942967, "grad_norm": 0.4196917414665222, "learning_rate": 0.0001, "loss": 1.6059, "step": 5586 }, { "epoch": 0.6417781862041239, "grad_norm": 0.4676187336444855, "learning_rate": 0.0001, "loss": 1.7113, "step": 5587 }, { "epoch": 0.641893056113951, "grad_norm": 0.4227234423160553, "learning_rate": 0.0001, "loss": 1.573, "step": 5588 }, { "epoch": 0.6420079260237781, "grad_norm": 0.45535120368003845, "learning_rate": 0.0001, "loss": 1.6142, "step": 5589 }, { "epoch": 0.6421227959336052, "grad_norm": 0.4416712820529938, "learning_rate": 0.0001, "loss": 1.6422, "step": 5590 }, { "epoch": 0.6422376658434323, "grad_norm": 0.454698383808136, "learning_rate": 0.0001, "loss": 1.835, "step": 5591 }, { "epoch": 0.6423525357532595, "grad_norm": 0.429575115442276, "learning_rate": 0.0001, "loss": 1.6046, "step": 5592 }, { "epoch": 0.6424674056630866, "grad_norm": 0.47407978773117065, "learning_rate": 0.0001, "loss": 1.7451, "step": 5593 }, { "epoch": 0.6425822755729137, "grad_norm": 0.4476969838142395, "learning_rate": 0.0001, "loss": 1.6611, "step": 5594 }, { "epoch": 0.6426971454827408, "grad_norm": 0.48670604825019836, "learning_rate": 0.0001, "loss": 1.8238, "step": 5595 }, { "epoch": 0.6428120153925679, "grad_norm": 0.4715462923049927, "learning_rate": 0.0001, "loss": 1.5888, "step": 5596 }, { "epoch": 0.6429268853023951, "grad_norm": 0.4310706555843353, "learning_rate": 0.0001, "loss": 1.6972, "step": 5597 }, { "epoch": 0.6430417552122222, "grad_norm": 0.4375722408294678, "learning_rate": 0.0001, "loss": 1.7016, "step": 5598 }, { "epoch": 0.6431566251220493, "grad_norm": 0.44785434007644653, "learning_rate": 0.0001, "loss": 1.5328, "step": 5599 }, { "epoch": 0.6432714950318764, "grad_norm": 0.43473127484321594, "learning_rate": 0.0001, "loss": 1.5382, "step": 5600 }, { "epoch": 0.6433863649417035, "grad_norm": 0.5235137939453125, "learning_rate": 0.0001, "loss": 1.7376, "step": 5601 }, { "epoch": 0.6435012348515307, "grad_norm": 0.47962141036987305, "learning_rate": 0.0001, "loss": 1.4158, "step": 5602 }, { "epoch": 0.6436161047613578, "grad_norm": 0.42864152789115906, "learning_rate": 0.0001, "loss": 1.5161, "step": 5603 }, { "epoch": 0.6437309746711849, "grad_norm": 0.4481607675552368, "learning_rate": 0.0001, "loss": 1.6201, "step": 5604 }, { "epoch": 0.643845844581012, "grad_norm": 0.503777027130127, "learning_rate": 0.0001, "loss": 1.9532, "step": 5605 }, { "epoch": 0.6439607144908391, "grad_norm": 0.44352078437805176, "learning_rate": 0.0001, "loss": 1.7002, "step": 5606 }, { "epoch": 0.6440755844006663, "grad_norm": 0.43461933732032776, "learning_rate": 0.0001, "loss": 1.685, "step": 5607 }, { "epoch": 0.6441904543104934, "grad_norm": 0.41317033767700195, "learning_rate": 0.0001, "loss": 1.6394, "step": 5608 }, { "epoch": 0.6443053242203205, "grad_norm": 0.47428956627845764, "learning_rate": 0.0001, "loss": 1.768, "step": 5609 }, { "epoch": 0.6444201941301476, "grad_norm": 0.4190300703048706, "learning_rate": 0.0001, "loss": 1.5919, "step": 5610 }, { "epoch": 0.6445350640399747, "grad_norm": 0.4992254376411438, "learning_rate": 0.0001, "loss": 1.9133, "step": 5611 }, { "epoch": 0.6446499339498019, "grad_norm": 0.4316772222518921, "learning_rate": 0.0001, "loss": 1.3204, "step": 5612 }, { "epoch": 0.644764803859629, "grad_norm": 0.4693394899368286, "learning_rate": 0.0001, "loss": 1.8395, "step": 5613 }, { "epoch": 0.6448796737694561, "grad_norm": 0.4208371043205261, "learning_rate": 0.0001, "loss": 1.4802, "step": 5614 }, { "epoch": 0.6449945436792832, "grad_norm": 0.44202813506126404, "learning_rate": 0.0001, "loss": 1.5953, "step": 5615 }, { "epoch": 0.6451094135891103, "grad_norm": 0.4268895387649536, "learning_rate": 0.0001, "loss": 1.6182, "step": 5616 }, { "epoch": 0.6452242834989375, "grad_norm": 0.4346774220466614, "learning_rate": 0.0001, "loss": 1.5141, "step": 5617 }, { "epoch": 0.6453391534087646, "grad_norm": 0.46032214164733887, "learning_rate": 0.0001, "loss": 1.6249, "step": 5618 }, { "epoch": 0.6454540233185917, "grad_norm": 0.4409688413143158, "learning_rate": 0.0001, "loss": 1.5832, "step": 5619 }, { "epoch": 0.6455688932284188, "grad_norm": 0.4379858374595642, "learning_rate": 0.0001, "loss": 1.6493, "step": 5620 }, { "epoch": 0.6456837631382459, "grad_norm": 0.43202537298202515, "learning_rate": 0.0001, "loss": 1.5685, "step": 5621 }, { "epoch": 0.6457986330480731, "grad_norm": 0.44553086161613464, "learning_rate": 0.0001, "loss": 1.5579, "step": 5622 }, { "epoch": 0.6459135029579002, "grad_norm": 0.42607608437538147, "learning_rate": 0.0001, "loss": 1.6773, "step": 5623 }, { "epoch": 0.6460283728677273, "grad_norm": 0.45914703607559204, "learning_rate": 0.0001, "loss": 1.8061, "step": 5624 }, { "epoch": 0.6461432427775544, "grad_norm": 0.46458396315574646, "learning_rate": 0.0001, "loss": 1.701, "step": 5625 }, { "epoch": 0.6462581126873815, "grad_norm": 0.4472190737724304, "learning_rate": 0.0001, "loss": 1.6258, "step": 5626 }, { "epoch": 0.6463729825972087, "grad_norm": 0.4329933524131775, "learning_rate": 0.0001, "loss": 1.5421, "step": 5627 }, { "epoch": 0.6464878525070358, "grad_norm": 0.4526257812976837, "learning_rate": 0.0001, "loss": 1.5711, "step": 5628 }, { "epoch": 0.6466027224168629, "grad_norm": 0.4655599892139435, "learning_rate": 0.0001, "loss": 1.5488, "step": 5629 }, { "epoch": 0.64671759232669, "grad_norm": 0.40564316511154175, "learning_rate": 0.0001, "loss": 1.602, "step": 5630 }, { "epoch": 0.6468324622365171, "grad_norm": 0.41868773102760315, "learning_rate": 0.0001, "loss": 1.6287, "step": 5631 }, { "epoch": 0.6469473321463443, "grad_norm": 0.4080910384654999, "learning_rate": 0.0001, "loss": 1.6231, "step": 5632 }, { "epoch": 0.6470622020561714, "grad_norm": 0.45903560519218445, "learning_rate": 0.0001, "loss": 1.8245, "step": 5633 }, { "epoch": 0.6471770719659985, "grad_norm": 0.4414862394332886, "learning_rate": 0.0001, "loss": 1.6152, "step": 5634 }, { "epoch": 0.6472919418758256, "grad_norm": 0.43176713585853577, "learning_rate": 0.0001, "loss": 1.6816, "step": 5635 }, { "epoch": 0.6474068117856527, "grad_norm": 0.41198432445526123, "learning_rate": 0.0001, "loss": 1.566, "step": 5636 }, { "epoch": 0.6475216816954799, "grad_norm": 0.47420576214790344, "learning_rate": 0.0001, "loss": 1.7435, "step": 5637 }, { "epoch": 0.647636551605307, "grad_norm": 0.44818076491355896, "learning_rate": 0.0001, "loss": 1.6726, "step": 5638 }, { "epoch": 0.6477514215151341, "grad_norm": 0.43701666593551636, "learning_rate": 0.0001, "loss": 1.5689, "step": 5639 }, { "epoch": 0.6478662914249612, "grad_norm": 0.4181070923805237, "learning_rate": 0.0001, "loss": 1.4794, "step": 5640 }, { "epoch": 0.6479811613347883, "grad_norm": 0.4229847192764282, "learning_rate": 0.0001, "loss": 1.594, "step": 5641 }, { "epoch": 0.6480960312446155, "grad_norm": 0.4145675003528595, "learning_rate": 0.0001, "loss": 1.5745, "step": 5642 }, { "epoch": 0.6482109011544426, "grad_norm": 0.43566563725471497, "learning_rate": 0.0001, "loss": 1.7148, "step": 5643 }, { "epoch": 0.6483257710642697, "grad_norm": 0.4276692867279053, "learning_rate": 0.0001, "loss": 1.4687, "step": 5644 }, { "epoch": 0.6484406409740968, "grad_norm": 0.3920123875141144, "learning_rate": 0.0001, "loss": 1.4192, "step": 5645 }, { "epoch": 0.6485555108839239, "grad_norm": 0.4451933801174164, "learning_rate": 0.0001, "loss": 1.6342, "step": 5646 }, { "epoch": 0.6486703807937511, "grad_norm": 0.404407799243927, "learning_rate": 0.0001, "loss": 1.5801, "step": 5647 }, { "epoch": 0.6487852507035782, "grad_norm": 0.4093591570854187, "learning_rate": 0.0001, "loss": 1.4817, "step": 5648 }, { "epoch": 0.6489001206134053, "grad_norm": 0.4147598147392273, "learning_rate": 0.0001, "loss": 1.7049, "step": 5649 }, { "epoch": 0.6490149905232324, "grad_norm": 0.45576632022857666, "learning_rate": 0.0001, "loss": 1.6524, "step": 5650 }, { "epoch": 0.6491298604330595, "grad_norm": 0.4459483027458191, "learning_rate": 0.0001, "loss": 1.7097, "step": 5651 }, { "epoch": 0.6492447303428867, "grad_norm": 0.47733941674232483, "learning_rate": 0.0001, "loss": 1.8741, "step": 5652 }, { "epoch": 0.6493596002527138, "grad_norm": 0.45850062370300293, "learning_rate": 0.0001, "loss": 1.7533, "step": 5653 }, { "epoch": 0.6494744701625409, "grad_norm": 0.42292144894599915, "learning_rate": 0.0001, "loss": 1.5892, "step": 5654 }, { "epoch": 0.649589340072368, "grad_norm": 0.4376376271247864, "learning_rate": 0.0001, "loss": 1.7886, "step": 5655 }, { "epoch": 0.6497042099821951, "grad_norm": 0.4351494610309601, "learning_rate": 0.0001, "loss": 1.6099, "step": 5656 }, { "epoch": 0.6498190798920223, "grad_norm": 0.4215450882911682, "learning_rate": 0.0001, "loss": 1.6642, "step": 5657 }, { "epoch": 0.6499339498018494, "grad_norm": 0.4552718997001648, "learning_rate": 0.0001, "loss": 1.7277, "step": 5658 }, { "epoch": 0.6500488197116765, "grad_norm": 0.4284704625606537, "learning_rate": 0.0001, "loss": 1.6748, "step": 5659 }, { "epoch": 0.6501636896215036, "grad_norm": 0.42294004559516907, "learning_rate": 0.0001, "loss": 1.575, "step": 5660 }, { "epoch": 0.6502785595313307, "grad_norm": 0.43804609775543213, "learning_rate": 0.0001, "loss": 1.6529, "step": 5661 }, { "epoch": 0.6503934294411579, "grad_norm": 0.4440459907054901, "learning_rate": 0.0001, "loss": 1.7683, "step": 5662 }, { "epoch": 0.650508299350985, "grad_norm": 0.4592325687408447, "learning_rate": 0.0001, "loss": 1.5892, "step": 5663 }, { "epoch": 0.6506231692608121, "grad_norm": 0.44203251600265503, "learning_rate": 0.0001, "loss": 1.6765, "step": 5664 }, { "epoch": 0.6507380391706392, "grad_norm": 0.4115028977394104, "learning_rate": 0.0001, "loss": 1.3906, "step": 5665 }, { "epoch": 0.6508529090804663, "grad_norm": 0.456676721572876, "learning_rate": 0.0001, "loss": 1.6296, "step": 5666 }, { "epoch": 0.6509677789902935, "grad_norm": 0.4849773645401001, "learning_rate": 0.0001, "loss": 1.5728, "step": 5667 }, { "epoch": 0.6510826489001206, "grad_norm": 0.4652778208255768, "learning_rate": 0.0001, "loss": 1.8482, "step": 5668 }, { "epoch": 0.6511975188099477, "grad_norm": 0.42466142773628235, "learning_rate": 0.0001, "loss": 1.4337, "step": 5669 }, { "epoch": 0.6513123887197748, "grad_norm": 0.4781624674797058, "learning_rate": 0.0001, "loss": 1.884, "step": 5670 }, { "epoch": 0.6514272586296019, "grad_norm": 0.4678051769733429, "learning_rate": 0.0001, "loss": 1.5438, "step": 5671 }, { "epoch": 0.651542128539429, "grad_norm": 0.4526500701904297, "learning_rate": 0.0001, "loss": 1.5922, "step": 5672 }, { "epoch": 0.6516569984492562, "grad_norm": 0.44952574372291565, "learning_rate": 0.0001, "loss": 1.5678, "step": 5673 }, { "epoch": 0.6517718683590833, "grad_norm": 0.4886074960231781, "learning_rate": 0.0001, "loss": 1.8023, "step": 5674 }, { "epoch": 0.6518867382689104, "grad_norm": 0.43213948607444763, "learning_rate": 0.0001, "loss": 1.6028, "step": 5675 }, { "epoch": 0.6520016081787375, "grad_norm": 0.41444772481918335, "learning_rate": 0.0001, "loss": 1.5833, "step": 5676 }, { "epoch": 0.6521164780885647, "grad_norm": 0.46063005924224854, "learning_rate": 0.0001, "loss": 1.686, "step": 5677 }, { "epoch": 0.6522313479983918, "grad_norm": 0.4700499176979065, "learning_rate": 0.0001, "loss": 1.6407, "step": 5678 }, { "epoch": 0.6523462179082189, "grad_norm": 0.48296359181404114, "learning_rate": 0.0001, "loss": 1.5319, "step": 5679 }, { "epoch": 0.652461087818046, "grad_norm": 0.4892067015171051, "learning_rate": 0.0001, "loss": 1.8181, "step": 5680 }, { "epoch": 0.6525759577278731, "grad_norm": 0.426547646522522, "learning_rate": 0.0001, "loss": 1.7042, "step": 5681 }, { "epoch": 0.6526908276377003, "grad_norm": 0.48687633872032166, "learning_rate": 0.0001, "loss": 1.6153, "step": 5682 }, { "epoch": 0.6528056975475274, "grad_norm": 0.449789434671402, "learning_rate": 0.0001, "loss": 1.7252, "step": 5683 }, { "epoch": 0.6529205674573545, "grad_norm": 0.43669000267982483, "learning_rate": 0.0001, "loss": 1.5944, "step": 5684 }, { "epoch": 0.6530354373671816, "grad_norm": 0.4181348383426666, "learning_rate": 0.0001, "loss": 1.6324, "step": 5685 }, { "epoch": 0.6531503072770087, "grad_norm": 0.4275439381599426, "learning_rate": 0.0001, "loss": 1.7064, "step": 5686 }, { "epoch": 0.6532651771868359, "grad_norm": 0.44249075651168823, "learning_rate": 0.0001, "loss": 1.6323, "step": 5687 }, { "epoch": 0.653380047096663, "grad_norm": 0.4645352363586426, "learning_rate": 0.0001, "loss": 1.733, "step": 5688 }, { "epoch": 0.6534949170064901, "grad_norm": 0.47578033804893494, "learning_rate": 0.0001, "loss": 1.8043, "step": 5689 }, { "epoch": 0.6536097869163173, "grad_norm": 0.4634343981742859, "learning_rate": 0.0001, "loss": 1.6265, "step": 5690 }, { "epoch": 0.6537246568261444, "grad_norm": 0.4607868194580078, "learning_rate": 0.0001, "loss": 1.6108, "step": 5691 }, { "epoch": 0.6538395267359716, "grad_norm": 0.43927955627441406, "learning_rate": 0.0001, "loss": 1.7641, "step": 5692 }, { "epoch": 0.6539543966457987, "grad_norm": 0.42264971137046814, "learning_rate": 0.0001, "loss": 1.6051, "step": 5693 }, { "epoch": 0.6540692665556258, "grad_norm": 0.4350726902484894, "learning_rate": 0.0001, "loss": 1.5614, "step": 5694 }, { "epoch": 0.6541841364654529, "grad_norm": 0.45460623502731323, "learning_rate": 0.0001, "loss": 1.5079, "step": 5695 }, { "epoch": 0.65429900637528, "grad_norm": 0.45324012637138367, "learning_rate": 0.0001, "loss": 1.458, "step": 5696 }, { "epoch": 0.6544138762851072, "grad_norm": 0.4302757680416107, "learning_rate": 0.0001, "loss": 1.4266, "step": 5697 }, { "epoch": 0.6545287461949343, "grad_norm": 0.4926580488681793, "learning_rate": 0.0001, "loss": 1.9029, "step": 5698 }, { "epoch": 0.6546436161047614, "grad_norm": 0.4108467698097229, "learning_rate": 0.0001, "loss": 1.4296, "step": 5699 }, { "epoch": 0.6547584860145885, "grad_norm": 0.4439946413040161, "learning_rate": 0.0001, "loss": 1.4945, "step": 5700 }, { "epoch": 0.6548733559244156, "grad_norm": 0.42766717076301575, "learning_rate": 0.0001, "loss": 1.6439, "step": 5701 }, { "epoch": 0.6549882258342428, "grad_norm": 0.4058894217014313, "learning_rate": 0.0001, "loss": 1.5366, "step": 5702 }, { "epoch": 0.6551030957440699, "grad_norm": 0.42546913027763367, "learning_rate": 0.0001, "loss": 1.5646, "step": 5703 }, { "epoch": 0.655217965653897, "grad_norm": 0.452540785074234, "learning_rate": 0.0001, "loss": 1.5635, "step": 5704 }, { "epoch": 0.6553328355637241, "grad_norm": 0.4617590308189392, "learning_rate": 0.0001, "loss": 1.6154, "step": 5705 }, { "epoch": 0.6554477054735512, "grad_norm": 0.41302189230918884, "learning_rate": 0.0001, "loss": 1.5644, "step": 5706 }, { "epoch": 0.6555625753833784, "grad_norm": 0.43837377429008484, "learning_rate": 0.0001, "loss": 1.6912, "step": 5707 }, { "epoch": 0.6556774452932055, "grad_norm": 0.4599246382713318, "learning_rate": 0.0001, "loss": 1.7636, "step": 5708 }, { "epoch": 0.6557923152030326, "grad_norm": 0.44273531436920166, "learning_rate": 0.0001, "loss": 1.5861, "step": 5709 }, { "epoch": 0.6559071851128597, "grad_norm": 0.40543317794799805, "learning_rate": 0.0001, "loss": 1.4967, "step": 5710 }, { "epoch": 0.6560220550226868, "grad_norm": 0.44224417209625244, "learning_rate": 0.0001, "loss": 1.4827, "step": 5711 }, { "epoch": 0.656136924932514, "grad_norm": 0.4091229736804962, "learning_rate": 0.0001, "loss": 1.5083, "step": 5712 }, { "epoch": 0.6562517948423411, "grad_norm": 0.44809257984161377, "learning_rate": 0.0001, "loss": 1.7865, "step": 5713 }, { "epoch": 0.6563666647521682, "grad_norm": 0.4188038110733032, "learning_rate": 0.0001, "loss": 1.6259, "step": 5714 }, { "epoch": 0.6564815346619953, "grad_norm": 0.4264969229698181, "learning_rate": 0.0001, "loss": 1.3747, "step": 5715 }, { "epoch": 0.6565964045718224, "grad_norm": 0.46266868710517883, "learning_rate": 0.0001, "loss": 1.8456, "step": 5716 }, { "epoch": 0.6567112744816496, "grad_norm": 0.45116865634918213, "learning_rate": 0.0001, "loss": 1.4373, "step": 5717 }, { "epoch": 0.6568261443914767, "grad_norm": 0.4384673833847046, "learning_rate": 0.0001, "loss": 1.5562, "step": 5718 }, { "epoch": 0.6569410143013038, "grad_norm": 0.4380660355091095, "learning_rate": 0.0001, "loss": 1.5475, "step": 5719 }, { "epoch": 0.6570558842111309, "grad_norm": 0.4457499086856842, "learning_rate": 0.0001, "loss": 1.5831, "step": 5720 }, { "epoch": 0.657170754120958, "grad_norm": 0.45870745182037354, "learning_rate": 0.0001, "loss": 1.6296, "step": 5721 }, { "epoch": 0.6572856240307852, "grad_norm": 0.44491496682167053, "learning_rate": 0.0001, "loss": 1.6472, "step": 5722 }, { "epoch": 0.6574004939406123, "grad_norm": 0.4536452293395996, "learning_rate": 0.0001, "loss": 1.5365, "step": 5723 }, { "epoch": 0.6575153638504394, "grad_norm": 0.4279188811779022, "learning_rate": 0.0001, "loss": 1.6861, "step": 5724 }, { "epoch": 0.6576302337602665, "grad_norm": 0.49207669496536255, "learning_rate": 0.0001, "loss": 1.6214, "step": 5725 }, { "epoch": 0.6577451036700936, "grad_norm": 0.47983112931251526, "learning_rate": 0.0001, "loss": 1.77, "step": 5726 }, { "epoch": 0.6578599735799208, "grad_norm": 0.42303088307380676, "learning_rate": 0.0001, "loss": 1.439, "step": 5727 }, { "epoch": 0.6579748434897479, "grad_norm": 0.40364500880241394, "learning_rate": 0.0001, "loss": 1.4152, "step": 5728 }, { "epoch": 0.658089713399575, "grad_norm": 0.44114431738853455, "learning_rate": 0.0001, "loss": 1.5885, "step": 5729 }, { "epoch": 0.6582045833094021, "grad_norm": 0.4176272451877594, "learning_rate": 0.0001, "loss": 1.5793, "step": 5730 }, { "epoch": 0.6583194532192292, "grad_norm": 0.4215009808540344, "learning_rate": 0.0001, "loss": 1.5576, "step": 5731 }, { "epoch": 0.6584343231290564, "grad_norm": 0.4414535164833069, "learning_rate": 0.0001, "loss": 1.6113, "step": 5732 }, { "epoch": 0.6585491930388835, "grad_norm": 0.46415939927101135, "learning_rate": 0.0001, "loss": 1.6739, "step": 5733 }, { "epoch": 0.6586640629487106, "grad_norm": 0.42953890562057495, "learning_rate": 0.0001, "loss": 1.5175, "step": 5734 }, { "epoch": 0.6587789328585377, "grad_norm": 0.4586336016654968, "learning_rate": 0.0001, "loss": 1.6979, "step": 5735 }, { "epoch": 0.6588938027683648, "grad_norm": 0.41848480701446533, "learning_rate": 0.0001, "loss": 1.724, "step": 5736 }, { "epoch": 0.659008672678192, "grad_norm": 0.43761909008026123, "learning_rate": 0.0001, "loss": 1.7051, "step": 5737 }, { "epoch": 0.6591235425880191, "grad_norm": 0.4685233235359192, "learning_rate": 0.0001, "loss": 1.7588, "step": 5738 }, { "epoch": 0.6592384124978462, "grad_norm": 0.4934740364551544, "learning_rate": 0.0001, "loss": 1.7505, "step": 5739 }, { "epoch": 0.6593532824076733, "grad_norm": 0.44371286034584045, "learning_rate": 0.0001, "loss": 1.6174, "step": 5740 }, { "epoch": 0.6594681523175004, "grad_norm": 0.429153710603714, "learning_rate": 0.0001, "loss": 1.6668, "step": 5741 }, { "epoch": 0.6595830222273276, "grad_norm": 0.46635177731513977, "learning_rate": 0.0001, "loss": 1.6942, "step": 5742 }, { "epoch": 0.6596978921371547, "grad_norm": 0.39945048093795776, "learning_rate": 0.0001, "loss": 1.6229, "step": 5743 }, { "epoch": 0.6598127620469818, "grad_norm": 0.46601754426956177, "learning_rate": 0.0001, "loss": 1.6118, "step": 5744 }, { "epoch": 0.6599276319568089, "grad_norm": 0.48034870624542236, "learning_rate": 0.0001, "loss": 1.5462, "step": 5745 }, { "epoch": 0.660042501866636, "grad_norm": 0.42302387952804565, "learning_rate": 0.0001, "loss": 1.4733, "step": 5746 }, { "epoch": 0.6601573717764632, "grad_norm": 0.4531380236148834, "learning_rate": 0.0001, "loss": 1.72, "step": 5747 }, { "epoch": 0.6602722416862903, "grad_norm": 0.43431270122528076, "learning_rate": 0.0001, "loss": 1.5529, "step": 5748 }, { "epoch": 0.6603871115961174, "grad_norm": 0.44529253244400024, "learning_rate": 0.0001, "loss": 1.5221, "step": 5749 }, { "epoch": 0.6605019815059445, "grad_norm": 0.43237540125846863, "learning_rate": 0.0001, "loss": 1.5567, "step": 5750 }, { "epoch": 0.6606168514157716, "grad_norm": 0.4513912796974182, "learning_rate": 0.0001, "loss": 1.5776, "step": 5751 }, { "epoch": 0.6607317213255988, "grad_norm": 0.42277923226356506, "learning_rate": 0.0001, "loss": 1.7517, "step": 5752 }, { "epoch": 0.6608465912354259, "grad_norm": 0.40695422887802124, "learning_rate": 0.0001, "loss": 1.5718, "step": 5753 }, { "epoch": 0.660961461145253, "grad_norm": 0.4478527009487152, "learning_rate": 0.0001, "loss": 1.7765, "step": 5754 }, { "epoch": 0.6610763310550801, "grad_norm": 0.4173600971698761, "learning_rate": 0.0001, "loss": 1.5796, "step": 5755 }, { "epoch": 0.6611912009649072, "grad_norm": 0.4463992416858673, "learning_rate": 0.0001, "loss": 1.6569, "step": 5756 }, { "epoch": 0.6613060708747344, "grad_norm": 0.4375052750110626, "learning_rate": 0.0001, "loss": 1.4407, "step": 5757 }, { "epoch": 0.6614209407845615, "grad_norm": 0.4219117760658264, "learning_rate": 0.0001, "loss": 1.5296, "step": 5758 }, { "epoch": 0.6615358106943886, "grad_norm": 0.45714181661605835, "learning_rate": 0.0001, "loss": 1.674, "step": 5759 }, { "epoch": 0.6616506806042157, "grad_norm": 0.4279073178768158, "learning_rate": 0.0001, "loss": 1.5276, "step": 5760 }, { "epoch": 0.6617655505140428, "grad_norm": 0.4680931568145752, "learning_rate": 0.0001, "loss": 1.7313, "step": 5761 }, { "epoch": 0.66188042042387, "grad_norm": 0.4030158817768097, "learning_rate": 0.0001, "loss": 1.5361, "step": 5762 }, { "epoch": 0.6619952903336971, "grad_norm": 0.4316936433315277, "learning_rate": 0.0001, "loss": 1.6257, "step": 5763 }, { "epoch": 0.6621101602435242, "grad_norm": 0.45963194966316223, "learning_rate": 0.0001, "loss": 1.7182, "step": 5764 }, { "epoch": 0.6622250301533513, "grad_norm": 0.4106147289276123, "learning_rate": 0.0001, "loss": 1.5614, "step": 5765 }, { "epoch": 0.6623399000631784, "grad_norm": 0.4534708261489868, "learning_rate": 0.0001, "loss": 1.7293, "step": 5766 }, { "epoch": 0.6624547699730056, "grad_norm": 0.4368360936641693, "learning_rate": 0.0001, "loss": 1.5357, "step": 5767 }, { "epoch": 0.6625696398828327, "grad_norm": 0.40505045652389526, "learning_rate": 0.0001, "loss": 1.5054, "step": 5768 }, { "epoch": 0.6626845097926598, "grad_norm": 0.42817744612693787, "learning_rate": 0.0001, "loss": 1.6716, "step": 5769 }, { "epoch": 0.6627993797024869, "grad_norm": 0.46093493700027466, "learning_rate": 0.0001, "loss": 1.6421, "step": 5770 }, { "epoch": 0.662914249612314, "grad_norm": 0.44305554032325745, "learning_rate": 0.0001, "loss": 1.6918, "step": 5771 }, { "epoch": 0.6630291195221412, "grad_norm": 0.39377158880233765, "learning_rate": 0.0001, "loss": 1.5552, "step": 5772 }, { "epoch": 0.6631439894319683, "grad_norm": 0.4344120919704437, "learning_rate": 0.0001, "loss": 1.5301, "step": 5773 }, { "epoch": 0.6632588593417954, "grad_norm": 0.44036102294921875, "learning_rate": 0.0001, "loss": 1.6784, "step": 5774 }, { "epoch": 0.6633737292516225, "grad_norm": 0.4466957449913025, "learning_rate": 0.0001, "loss": 1.6549, "step": 5775 }, { "epoch": 0.6634885991614496, "grad_norm": 0.43694761395454407, "learning_rate": 0.0001, "loss": 1.6732, "step": 5776 }, { "epoch": 0.6636034690712768, "grad_norm": 0.4368375539779663, "learning_rate": 0.0001, "loss": 1.5941, "step": 5777 }, { "epoch": 0.6637183389811039, "grad_norm": 0.4765366017818451, "learning_rate": 0.0001, "loss": 1.7931, "step": 5778 }, { "epoch": 0.663833208890931, "grad_norm": 0.43931543827056885, "learning_rate": 0.0001, "loss": 1.6254, "step": 5779 }, { "epoch": 0.6639480788007581, "grad_norm": 0.43625199794769287, "learning_rate": 0.0001, "loss": 1.6375, "step": 5780 }, { "epoch": 0.6640629487105852, "grad_norm": 0.4167712926864624, "learning_rate": 0.0001, "loss": 1.4647, "step": 5781 }, { "epoch": 0.6641778186204124, "grad_norm": 0.40136420726776123, "learning_rate": 0.0001, "loss": 1.5526, "step": 5782 }, { "epoch": 0.6642926885302395, "grad_norm": 0.4109748303890228, "learning_rate": 0.0001, "loss": 1.4092, "step": 5783 }, { "epoch": 0.6644075584400666, "grad_norm": 0.4257631301879883, "learning_rate": 0.0001, "loss": 1.6211, "step": 5784 }, { "epoch": 0.6645224283498937, "grad_norm": 0.4686290919780731, "learning_rate": 0.0001, "loss": 1.7784, "step": 5785 }, { "epoch": 0.6646372982597208, "grad_norm": 0.4436352252960205, "learning_rate": 0.0001, "loss": 1.6364, "step": 5786 }, { "epoch": 0.664752168169548, "grad_norm": 0.4808412790298462, "learning_rate": 0.0001, "loss": 1.7645, "step": 5787 }, { "epoch": 0.6648670380793751, "grad_norm": 0.5230939984321594, "learning_rate": 0.0001, "loss": 1.8253, "step": 5788 }, { "epoch": 0.6649819079892022, "grad_norm": 0.4324409067630768, "learning_rate": 0.0001, "loss": 1.4929, "step": 5789 }, { "epoch": 0.6650967778990293, "grad_norm": 0.48814013600349426, "learning_rate": 0.0001, "loss": 1.8974, "step": 5790 }, { "epoch": 0.6652116478088564, "grad_norm": 0.4277442991733551, "learning_rate": 0.0001, "loss": 1.4797, "step": 5791 }, { "epoch": 0.6653265177186836, "grad_norm": 0.43875277042388916, "learning_rate": 0.0001, "loss": 1.624, "step": 5792 }, { "epoch": 0.6654413876285107, "grad_norm": 0.47888147830963135, "learning_rate": 0.0001, "loss": 1.6131, "step": 5793 }, { "epoch": 0.6655562575383378, "grad_norm": 0.43682193756103516, "learning_rate": 0.0001, "loss": 1.6384, "step": 5794 }, { "epoch": 0.6656711274481649, "grad_norm": 0.4496222734451294, "learning_rate": 0.0001, "loss": 1.7213, "step": 5795 }, { "epoch": 0.665785997357992, "grad_norm": 0.44679972529411316, "learning_rate": 0.0001, "loss": 1.644, "step": 5796 }, { "epoch": 0.6659008672678192, "grad_norm": 0.44004032015800476, "learning_rate": 0.0001, "loss": 1.6932, "step": 5797 }, { "epoch": 0.6660157371776463, "grad_norm": 0.46193936467170715, "learning_rate": 0.0001, "loss": 1.5795, "step": 5798 }, { "epoch": 0.6661306070874734, "grad_norm": 0.4364147186279297, "learning_rate": 0.0001, "loss": 1.6024, "step": 5799 }, { "epoch": 0.6662454769973005, "grad_norm": 0.46602505445480347, "learning_rate": 0.0001, "loss": 1.6581, "step": 5800 }, { "epoch": 0.6663603469071276, "grad_norm": 0.4367719888687134, "learning_rate": 0.0001, "loss": 1.615, "step": 5801 }, { "epoch": 0.6664752168169548, "grad_norm": 0.4234819710254669, "learning_rate": 0.0001, "loss": 1.654, "step": 5802 }, { "epoch": 0.6665900867267819, "grad_norm": 0.46970421075820923, "learning_rate": 0.0001, "loss": 1.8436, "step": 5803 }, { "epoch": 0.666704956636609, "grad_norm": 0.45983707904815674, "learning_rate": 0.0001, "loss": 1.5, "step": 5804 }, { "epoch": 0.6668198265464361, "grad_norm": 0.4581074118614197, "learning_rate": 0.0001, "loss": 1.6173, "step": 5805 }, { "epoch": 0.6669346964562632, "grad_norm": 0.4409380853176117, "learning_rate": 0.0001, "loss": 1.6336, "step": 5806 }, { "epoch": 0.6670495663660904, "grad_norm": 0.46238353848457336, "learning_rate": 0.0001, "loss": 1.8162, "step": 5807 }, { "epoch": 0.6671644362759175, "grad_norm": 0.4390278160572052, "learning_rate": 0.0001, "loss": 1.7262, "step": 5808 }, { "epoch": 0.6672793061857446, "grad_norm": 0.44605204463005066, "learning_rate": 0.0001, "loss": 1.6926, "step": 5809 }, { "epoch": 0.6673941760955717, "grad_norm": 0.41407981514930725, "learning_rate": 0.0001, "loss": 1.6147, "step": 5810 }, { "epoch": 0.6675090460053988, "grad_norm": 0.44644859433174133, "learning_rate": 0.0001, "loss": 1.5268, "step": 5811 }, { "epoch": 0.667623915915226, "grad_norm": 0.4450972080230713, "learning_rate": 0.0001, "loss": 1.7008, "step": 5812 }, { "epoch": 0.6677387858250531, "grad_norm": 0.45438942313194275, "learning_rate": 0.0001, "loss": 1.572, "step": 5813 }, { "epoch": 0.6678536557348802, "grad_norm": 0.4170825481414795, "learning_rate": 0.0001, "loss": 1.5343, "step": 5814 }, { "epoch": 0.6679685256447073, "grad_norm": 0.4803465008735657, "learning_rate": 0.0001, "loss": 1.8373, "step": 5815 }, { "epoch": 0.6680833955545344, "grad_norm": 0.4364113211631775, "learning_rate": 0.0001, "loss": 1.5873, "step": 5816 }, { "epoch": 0.6681982654643616, "grad_norm": 0.4389718174934387, "learning_rate": 0.0001, "loss": 1.3752, "step": 5817 }, { "epoch": 0.6683131353741887, "grad_norm": 0.4332119822502136, "learning_rate": 0.0001, "loss": 1.5222, "step": 5818 }, { "epoch": 0.6684280052840158, "grad_norm": 0.42295655608177185, "learning_rate": 0.0001, "loss": 1.4948, "step": 5819 }, { "epoch": 0.6685428751938429, "grad_norm": 0.39874276518821716, "learning_rate": 0.0001, "loss": 1.5184, "step": 5820 }, { "epoch": 0.66865774510367, "grad_norm": 0.4008484482765198, "learning_rate": 0.0001, "loss": 1.6389, "step": 5821 }, { "epoch": 0.6687726150134972, "grad_norm": 0.43164539337158203, "learning_rate": 0.0001, "loss": 1.6292, "step": 5822 }, { "epoch": 0.6688874849233243, "grad_norm": 0.39950746297836304, "learning_rate": 0.0001, "loss": 1.5272, "step": 5823 }, { "epoch": 0.6690023548331514, "grad_norm": 0.442396879196167, "learning_rate": 0.0001, "loss": 1.6061, "step": 5824 }, { "epoch": 0.6691172247429785, "grad_norm": 0.4410782754421234, "learning_rate": 0.0001, "loss": 1.4986, "step": 5825 }, { "epoch": 0.6692320946528056, "grad_norm": 0.4995570480823517, "learning_rate": 0.0001, "loss": 1.7851, "step": 5826 }, { "epoch": 0.6693469645626329, "grad_norm": 0.43698373436927795, "learning_rate": 0.0001, "loss": 1.5612, "step": 5827 }, { "epoch": 0.66946183447246, "grad_norm": 0.5518622994422913, "learning_rate": 0.0001, "loss": 1.6748, "step": 5828 }, { "epoch": 0.6695767043822871, "grad_norm": 0.4583264887332916, "learning_rate": 0.0001, "loss": 1.7106, "step": 5829 }, { "epoch": 0.6696915742921142, "grad_norm": 0.40689265727996826, "learning_rate": 0.0001, "loss": 1.5925, "step": 5830 }, { "epoch": 0.6698064442019414, "grad_norm": 0.45366060733795166, "learning_rate": 0.0001, "loss": 1.6063, "step": 5831 }, { "epoch": 0.6699213141117685, "grad_norm": 0.4742351472377777, "learning_rate": 0.0001, "loss": 1.5794, "step": 5832 }, { "epoch": 0.6700361840215956, "grad_norm": 0.43246540427207947, "learning_rate": 0.0001, "loss": 1.524, "step": 5833 }, { "epoch": 0.6701510539314227, "grad_norm": 0.4532145857810974, "learning_rate": 0.0001, "loss": 1.6588, "step": 5834 }, { "epoch": 0.6702659238412498, "grad_norm": 0.4628235399723053, "learning_rate": 0.0001, "loss": 1.7553, "step": 5835 }, { "epoch": 0.670380793751077, "grad_norm": 0.5169723629951477, "learning_rate": 0.0001, "loss": 1.6494, "step": 5836 }, { "epoch": 0.6704956636609041, "grad_norm": 0.4652624726295471, "learning_rate": 0.0001, "loss": 1.6496, "step": 5837 }, { "epoch": 0.6706105335707312, "grad_norm": 0.45965853333473206, "learning_rate": 0.0001, "loss": 1.6612, "step": 5838 }, { "epoch": 0.6707254034805583, "grad_norm": 0.4296656548976898, "learning_rate": 0.0001, "loss": 1.5995, "step": 5839 }, { "epoch": 0.6708402733903854, "grad_norm": 0.4188656806945801, "learning_rate": 0.0001, "loss": 1.594, "step": 5840 }, { "epoch": 0.6709551433002126, "grad_norm": 0.442221075296402, "learning_rate": 0.0001, "loss": 1.6074, "step": 5841 }, { "epoch": 0.6710700132100397, "grad_norm": 0.4227312207221985, "learning_rate": 0.0001, "loss": 1.5262, "step": 5842 }, { "epoch": 0.6711848831198668, "grad_norm": 0.4592028856277466, "learning_rate": 0.0001, "loss": 1.6376, "step": 5843 }, { "epoch": 0.6712997530296939, "grad_norm": 0.4832910895347595, "learning_rate": 0.0001, "loss": 1.6153, "step": 5844 }, { "epoch": 0.671414622939521, "grad_norm": 0.44571515917778015, "learning_rate": 0.0001, "loss": 1.7643, "step": 5845 }, { "epoch": 0.6715294928493482, "grad_norm": 0.45350438356399536, "learning_rate": 0.0001, "loss": 1.7148, "step": 5846 }, { "epoch": 0.6716443627591753, "grad_norm": 0.4490528106689453, "learning_rate": 0.0001, "loss": 1.6748, "step": 5847 }, { "epoch": 0.6717592326690024, "grad_norm": 0.4567161500453949, "learning_rate": 0.0001, "loss": 1.6875, "step": 5848 }, { "epoch": 0.6718741025788295, "grad_norm": 0.44406652450561523, "learning_rate": 0.0001, "loss": 1.6164, "step": 5849 }, { "epoch": 0.6719889724886566, "grad_norm": 0.447835773229599, "learning_rate": 0.0001, "loss": 1.6824, "step": 5850 }, { "epoch": 0.6721038423984838, "grad_norm": 0.597815752029419, "learning_rate": 0.0001, "loss": 1.5491, "step": 5851 }, { "epoch": 0.6722187123083109, "grad_norm": 0.4304426610469818, "learning_rate": 0.0001, "loss": 1.5914, "step": 5852 }, { "epoch": 0.672333582218138, "grad_norm": 0.4473625719547272, "learning_rate": 0.0001, "loss": 1.511, "step": 5853 }, { "epoch": 0.6724484521279651, "grad_norm": 0.4547788202762604, "learning_rate": 0.0001, "loss": 1.8366, "step": 5854 }, { "epoch": 0.6725633220377922, "grad_norm": 0.43215829133987427, "learning_rate": 0.0001, "loss": 1.611, "step": 5855 }, { "epoch": 0.6726781919476194, "grad_norm": 0.515165388584137, "learning_rate": 0.0001, "loss": 1.6671, "step": 5856 }, { "epoch": 0.6727930618574465, "grad_norm": 0.4932282865047455, "learning_rate": 0.0001, "loss": 1.5951, "step": 5857 }, { "epoch": 0.6729079317672736, "grad_norm": 0.46201443672180176, "learning_rate": 0.0001, "loss": 1.5531, "step": 5858 }, { "epoch": 0.6730228016771007, "grad_norm": 0.470007061958313, "learning_rate": 0.0001, "loss": 1.6675, "step": 5859 }, { "epoch": 0.6731376715869278, "grad_norm": 0.4670080542564392, "learning_rate": 0.0001, "loss": 1.7879, "step": 5860 }, { "epoch": 0.673252541496755, "grad_norm": 0.4259360730648041, "learning_rate": 0.0001, "loss": 1.581, "step": 5861 }, { "epoch": 0.6733674114065821, "grad_norm": 0.47610583901405334, "learning_rate": 0.0001, "loss": 1.8552, "step": 5862 }, { "epoch": 0.6734822813164092, "grad_norm": 0.4580018222332001, "learning_rate": 0.0001, "loss": 1.6853, "step": 5863 }, { "epoch": 0.6735971512262363, "grad_norm": 0.4032216966152191, "learning_rate": 0.0001, "loss": 1.5547, "step": 5864 }, { "epoch": 0.6737120211360634, "grad_norm": 0.45826172828674316, "learning_rate": 0.0001, "loss": 1.7622, "step": 5865 }, { "epoch": 0.6738268910458906, "grad_norm": 0.43257811665534973, "learning_rate": 0.0001, "loss": 1.6669, "step": 5866 }, { "epoch": 0.6739417609557177, "grad_norm": 0.434496134519577, "learning_rate": 0.0001, "loss": 1.6081, "step": 5867 }, { "epoch": 0.6740566308655448, "grad_norm": 0.4298497438430786, "learning_rate": 0.0001, "loss": 1.5388, "step": 5868 }, { "epoch": 0.6741715007753719, "grad_norm": 0.46281471848487854, "learning_rate": 0.0001, "loss": 1.7304, "step": 5869 }, { "epoch": 0.674286370685199, "grad_norm": 0.42578932642936707, "learning_rate": 0.0001, "loss": 1.5891, "step": 5870 }, { "epoch": 0.6744012405950262, "grad_norm": 0.431417316198349, "learning_rate": 0.0001, "loss": 1.411, "step": 5871 }, { "epoch": 0.6745161105048533, "grad_norm": 0.4810255169868469, "learning_rate": 0.0001, "loss": 1.5891, "step": 5872 }, { "epoch": 0.6746309804146804, "grad_norm": 0.4635904133319855, "learning_rate": 0.0001, "loss": 1.8039, "step": 5873 }, { "epoch": 0.6747458503245075, "grad_norm": 0.46906232833862305, "learning_rate": 0.0001, "loss": 1.86, "step": 5874 }, { "epoch": 0.6748607202343346, "grad_norm": 0.44870471954345703, "learning_rate": 0.0001, "loss": 1.5199, "step": 5875 }, { "epoch": 0.6749755901441618, "grad_norm": 0.4645586609840393, "learning_rate": 0.0001, "loss": 1.7069, "step": 5876 }, { "epoch": 0.6750904600539889, "grad_norm": 0.45580583810806274, "learning_rate": 0.0001, "loss": 1.6092, "step": 5877 }, { "epoch": 0.675205329963816, "grad_norm": 0.4602547585964203, "learning_rate": 0.0001, "loss": 1.5772, "step": 5878 }, { "epoch": 0.6753201998736431, "grad_norm": 0.45218491554260254, "learning_rate": 0.0001, "loss": 1.5413, "step": 5879 }, { "epoch": 0.6754350697834702, "grad_norm": 0.4633892774581909, "learning_rate": 0.0001, "loss": 1.5366, "step": 5880 }, { "epoch": 0.6755499396932974, "grad_norm": 0.47411301732063293, "learning_rate": 0.0001, "loss": 1.6984, "step": 5881 }, { "epoch": 0.6756648096031245, "grad_norm": 0.4305265545845032, "learning_rate": 0.0001, "loss": 1.6009, "step": 5882 }, { "epoch": 0.6757796795129516, "grad_norm": 0.444837749004364, "learning_rate": 0.0001, "loss": 1.5941, "step": 5883 }, { "epoch": 0.6758945494227787, "grad_norm": 0.4728662967681885, "learning_rate": 0.0001, "loss": 1.7626, "step": 5884 }, { "epoch": 0.6760094193326058, "grad_norm": 0.4483180046081543, "learning_rate": 0.0001, "loss": 1.7023, "step": 5885 }, { "epoch": 0.676124289242433, "grad_norm": 0.4406643211841583, "learning_rate": 0.0001, "loss": 1.7195, "step": 5886 }, { "epoch": 0.6762391591522601, "grad_norm": 0.44651803374290466, "learning_rate": 0.0001, "loss": 1.5651, "step": 5887 }, { "epoch": 0.6763540290620872, "grad_norm": 0.4229283630847931, "learning_rate": 0.0001, "loss": 1.7038, "step": 5888 }, { "epoch": 0.6764688989719143, "grad_norm": 0.4413118064403534, "learning_rate": 0.0001, "loss": 1.5026, "step": 5889 }, { "epoch": 0.6765837688817414, "grad_norm": 0.44531190395355225, "learning_rate": 0.0001, "loss": 1.6668, "step": 5890 }, { "epoch": 0.6766986387915686, "grad_norm": 0.4322546720504761, "learning_rate": 0.0001, "loss": 1.7821, "step": 5891 }, { "epoch": 0.6768135087013957, "grad_norm": 0.43194666504859924, "learning_rate": 0.0001, "loss": 1.5749, "step": 5892 }, { "epoch": 0.6769283786112228, "grad_norm": 0.45931199193000793, "learning_rate": 0.0001, "loss": 1.5343, "step": 5893 }, { "epoch": 0.6770432485210499, "grad_norm": 0.4326918423175812, "learning_rate": 0.0001, "loss": 1.394, "step": 5894 }, { "epoch": 0.677158118430877, "grad_norm": 0.43630099296569824, "learning_rate": 0.0001, "loss": 1.4559, "step": 5895 }, { "epoch": 0.6772729883407042, "grad_norm": 0.4202820360660553, "learning_rate": 0.0001, "loss": 1.6519, "step": 5896 }, { "epoch": 0.6773878582505313, "grad_norm": 0.508823812007904, "learning_rate": 0.0001, "loss": 1.6324, "step": 5897 }, { "epoch": 0.6775027281603584, "grad_norm": 0.43873631954193115, "learning_rate": 0.0001, "loss": 1.522, "step": 5898 }, { "epoch": 0.6776175980701855, "grad_norm": 0.4063846170902252, "learning_rate": 0.0001, "loss": 1.6185, "step": 5899 }, { "epoch": 0.6777324679800126, "grad_norm": 0.4242575466632843, "learning_rate": 0.0001, "loss": 1.6539, "step": 5900 }, { "epoch": 0.6778473378898398, "grad_norm": 0.44178491830825806, "learning_rate": 0.0001, "loss": 1.4893, "step": 5901 }, { "epoch": 0.6779622077996669, "grad_norm": 0.4485710561275482, "learning_rate": 0.0001, "loss": 1.6329, "step": 5902 }, { "epoch": 0.678077077709494, "grad_norm": 0.4207887351512909, "learning_rate": 0.0001, "loss": 1.5055, "step": 5903 }, { "epoch": 0.6781919476193211, "grad_norm": 1.451164722442627, "learning_rate": 0.0001, "loss": 1.6455, "step": 5904 }, { "epoch": 0.6783068175291482, "grad_norm": 0.46162787079811096, "learning_rate": 0.0001, "loss": 1.5872, "step": 5905 }, { "epoch": 0.6784216874389754, "grad_norm": 0.4590327739715576, "learning_rate": 0.0001, "loss": 1.703, "step": 5906 }, { "epoch": 0.6785365573488025, "grad_norm": 0.44896572828292847, "learning_rate": 0.0001, "loss": 1.7352, "step": 5907 }, { "epoch": 0.6786514272586296, "grad_norm": 0.43614816665649414, "learning_rate": 0.0001, "loss": 1.5451, "step": 5908 }, { "epoch": 0.6787662971684567, "grad_norm": 0.4527941942214966, "learning_rate": 0.0001, "loss": 1.5409, "step": 5909 }, { "epoch": 0.6788811670782838, "grad_norm": 0.4711673855781555, "learning_rate": 0.0001, "loss": 1.5503, "step": 5910 }, { "epoch": 0.678996036988111, "grad_norm": 0.5014576315879822, "learning_rate": 0.0001, "loss": 1.6173, "step": 5911 }, { "epoch": 0.6791109068979381, "grad_norm": 0.4469035565853119, "learning_rate": 0.0001, "loss": 1.5892, "step": 5912 }, { "epoch": 0.6792257768077652, "grad_norm": 0.47193190455436707, "learning_rate": 0.0001, "loss": 1.58, "step": 5913 }, { "epoch": 0.6793406467175923, "grad_norm": 0.4429560899734497, "learning_rate": 0.0001, "loss": 1.6693, "step": 5914 }, { "epoch": 0.6794555166274194, "grad_norm": 0.46765467524528503, "learning_rate": 0.0001, "loss": 1.6985, "step": 5915 }, { "epoch": 0.6795703865372466, "grad_norm": 0.4488067328929901, "learning_rate": 0.0001, "loss": 1.6629, "step": 5916 }, { "epoch": 0.6796852564470737, "grad_norm": 0.4276580214500427, "learning_rate": 0.0001, "loss": 1.579, "step": 5917 }, { "epoch": 0.6798001263569008, "grad_norm": 0.47405701875686646, "learning_rate": 0.0001, "loss": 1.7718, "step": 5918 }, { "epoch": 0.6799149962667279, "grad_norm": 0.4314579367637634, "learning_rate": 0.0001, "loss": 1.5739, "step": 5919 }, { "epoch": 0.680029866176555, "grad_norm": 0.4281775951385498, "learning_rate": 0.0001, "loss": 1.4945, "step": 5920 }, { "epoch": 0.6801447360863822, "grad_norm": 0.4201895594596863, "learning_rate": 0.0001, "loss": 1.519, "step": 5921 }, { "epoch": 0.6802596059962093, "grad_norm": 0.42386960983276367, "learning_rate": 0.0001, "loss": 1.5078, "step": 5922 }, { "epoch": 0.6803744759060364, "grad_norm": 0.49620696902275085, "learning_rate": 0.0001, "loss": 1.8191, "step": 5923 }, { "epoch": 0.6804893458158635, "grad_norm": 0.4446530044078827, "learning_rate": 0.0001, "loss": 1.7002, "step": 5924 }, { "epoch": 0.6806042157256906, "grad_norm": 0.436937540769577, "learning_rate": 0.0001, "loss": 1.5185, "step": 5925 }, { "epoch": 0.6807190856355178, "grad_norm": 0.41910529136657715, "learning_rate": 0.0001, "loss": 1.3554, "step": 5926 }, { "epoch": 0.6808339555453449, "grad_norm": 0.4524851143360138, "learning_rate": 0.0001, "loss": 1.5111, "step": 5927 }, { "epoch": 0.680948825455172, "grad_norm": 0.5210041999816895, "learning_rate": 0.0001, "loss": 1.8005, "step": 5928 }, { "epoch": 0.6810636953649991, "grad_norm": 0.4216352105140686, "learning_rate": 0.0001, "loss": 1.5726, "step": 5929 }, { "epoch": 0.6811785652748262, "grad_norm": 0.44295185804367065, "learning_rate": 0.0001, "loss": 1.5998, "step": 5930 }, { "epoch": 0.6812934351846534, "grad_norm": 0.4396561086177826, "learning_rate": 0.0001, "loss": 1.6761, "step": 5931 }, { "epoch": 0.6814083050944805, "grad_norm": 0.440660297870636, "learning_rate": 0.0001, "loss": 1.7889, "step": 5932 }, { "epoch": 0.6815231750043076, "grad_norm": 0.4529242217540741, "learning_rate": 0.0001, "loss": 1.6308, "step": 5933 }, { "epoch": 0.6816380449141347, "grad_norm": 0.43237507343292236, "learning_rate": 0.0001, "loss": 1.569, "step": 5934 }, { "epoch": 0.6817529148239618, "grad_norm": 0.41882574558258057, "learning_rate": 0.0001, "loss": 1.5298, "step": 5935 }, { "epoch": 0.681867784733789, "grad_norm": 0.501582682132721, "learning_rate": 0.0001, "loss": 1.7749, "step": 5936 }, { "epoch": 0.6819826546436161, "grad_norm": 0.4597390294075012, "learning_rate": 0.0001, "loss": 1.7377, "step": 5937 }, { "epoch": 0.6820975245534432, "grad_norm": 0.42805132269859314, "learning_rate": 0.0001, "loss": 1.5204, "step": 5938 }, { "epoch": 0.6822123944632703, "grad_norm": 0.4181848168373108, "learning_rate": 0.0001, "loss": 1.3851, "step": 5939 }, { "epoch": 0.6823272643730974, "grad_norm": 0.5363374352455139, "learning_rate": 0.0001, "loss": 1.9319, "step": 5940 }, { "epoch": 0.6824421342829246, "grad_norm": 0.47564831376075745, "learning_rate": 0.0001, "loss": 1.7815, "step": 5941 }, { "epoch": 0.6825570041927517, "grad_norm": 0.4543587267398834, "learning_rate": 0.0001, "loss": 1.626, "step": 5942 }, { "epoch": 0.6826718741025788, "grad_norm": 0.43262454867362976, "learning_rate": 0.0001, "loss": 1.6205, "step": 5943 }, { "epoch": 0.6827867440124059, "grad_norm": 0.44168633222579956, "learning_rate": 0.0001, "loss": 1.3962, "step": 5944 }, { "epoch": 0.682901613922233, "grad_norm": 0.4552295207977295, "learning_rate": 0.0001, "loss": 1.6962, "step": 5945 }, { "epoch": 0.6830164838320602, "grad_norm": 0.4199536442756653, "learning_rate": 0.0001, "loss": 1.6031, "step": 5946 }, { "epoch": 0.6831313537418873, "grad_norm": 0.44757145643234253, "learning_rate": 0.0001, "loss": 1.7174, "step": 5947 }, { "epoch": 0.6832462236517144, "grad_norm": 0.4116254448890686, "learning_rate": 0.0001, "loss": 1.5348, "step": 5948 }, { "epoch": 0.6833610935615415, "grad_norm": 0.4747268259525299, "learning_rate": 0.0001, "loss": 1.6715, "step": 5949 }, { "epoch": 0.6834759634713686, "grad_norm": 0.41993069648742676, "learning_rate": 0.0001, "loss": 1.459, "step": 5950 }, { "epoch": 0.6835908333811958, "grad_norm": 0.4138827621936798, "learning_rate": 0.0001, "loss": 1.4987, "step": 5951 }, { "epoch": 0.6837057032910229, "grad_norm": 0.45296478271484375, "learning_rate": 0.0001, "loss": 1.7347, "step": 5952 }, { "epoch": 0.68382057320085, "grad_norm": 0.510452389717102, "learning_rate": 0.0001, "loss": 1.6208, "step": 5953 }, { "epoch": 0.6839354431106771, "grad_norm": 0.474874883890152, "learning_rate": 0.0001, "loss": 1.6779, "step": 5954 }, { "epoch": 0.6840503130205042, "grad_norm": 0.45102521777153015, "learning_rate": 0.0001, "loss": 1.5271, "step": 5955 }, { "epoch": 0.6841651829303314, "grad_norm": 0.4434516429901123, "learning_rate": 0.0001, "loss": 1.4887, "step": 5956 }, { "epoch": 0.6842800528401585, "grad_norm": 0.44997087121009827, "learning_rate": 0.0001, "loss": 1.6192, "step": 5957 }, { "epoch": 0.6843949227499856, "grad_norm": 0.42875492572784424, "learning_rate": 0.0001, "loss": 1.5499, "step": 5958 }, { "epoch": 0.6845097926598127, "grad_norm": 0.4152733087539673, "learning_rate": 0.0001, "loss": 1.5594, "step": 5959 }, { "epoch": 0.6846246625696398, "grad_norm": 0.43790438771247864, "learning_rate": 0.0001, "loss": 1.5427, "step": 5960 }, { "epoch": 0.684739532479467, "grad_norm": 0.45538073778152466, "learning_rate": 0.0001, "loss": 1.8264, "step": 5961 }, { "epoch": 0.6848544023892941, "grad_norm": 0.671297013759613, "learning_rate": 0.0001, "loss": 1.6119, "step": 5962 }, { "epoch": 0.6849692722991212, "grad_norm": 0.454745888710022, "learning_rate": 0.0001, "loss": 1.6478, "step": 5963 }, { "epoch": 0.6850841422089484, "grad_norm": 0.43612053990364075, "learning_rate": 0.0001, "loss": 1.6183, "step": 5964 }, { "epoch": 0.6851990121187755, "grad_norm": 0.42518892884254456, "learning_rate": 0.0001, "loss": 1.5105, "step": 5965 }, { "epoch": 0.6853138820286027, "grad_norm": 0.4237819015979767, "learning_rate": 0.0001, "loss": 1.5573, "step": 5966 }, { "epoch": 0.6854287519384298, "grad_norm": 0.4369647204875946, "learning_rate": 0.0001, "loss": 1.5347, "step": 5967 }, { "epoch": 0.6855436218482569, "grad_norm": 0.4214072525501251, "learning_rate": 0.0001, "loss": 1.5991, "step": 5968 }, { "epoch": 0.685658491758084, "grad_norm": 0.45082470774650574, "learning_rate": 0.0001, "loss": 1.7264, "step": 5969 }, { "epoch": 0.6857733616679111, "grad_norm": 0.44140127301216125, "learning_rate": 0.0001, "loss": 1.447, "step": 5970 }, { "epoch": 0.6858882315777383, "grad_norm": 0.4286753833293915, "learning_rate": 0.0001, "loss": 1.6595, "step": 5971 }, { "epoch": 0.6860031014875654, "grad_norm": 0.43218451738357544, "learning_rate": 0.0001, "loss": 1.6167, "step": 5972 }, { "epoch": 0.6861179713973925, "grad_norm": 0.40552157163619995, "learning_rate": 0.0001, "loss": 1.6152, "step": 5973 }, { "epoch": 0.6862328413072196, "grad_norm": 0.449190229177475, "learning_rate": 0.0001, "loss": 1.5253, "step": 5974 }, { "epoch": 0.6863477112170467, "grad_norm": 0.42103078961372375, "learning_rate": 0.0001, "loss": 1.485, "step": 5975 }, { "epoch": 0.6864625811268739, "grad_norm": 0.4737277328968048, "learning_rate": 0.0001, "loss": 1.5635, "step": 5976 }, { "epoch": 0.686577451036701, "grad_norm": 0.44992202520370483, "learning_rate": 0.0001, "loss": 1.6039, "step": 5977 }, { "epoch": 0.6866923209465281, "grad_norm": 0.4830211400985718, "learning_rate": 0.0001, "loss": 1.3773, "step": 5978 }, { "epoch": 0.6868071908563552, "grad_norm": 0.48300692439079285, "learning_rate": 0.0001, "loss": 1.6668, "step": 5979 }, { "epoch": 0.6869220607661823, "grad_norm": 0.4886820614337921, "learning_rate": 0.0001, "loss": 1.7533, "step": 5980 }, { "epoch": 0.6870369306760095, "grad_norm": 0.4783158600330353, "learning_rate": 0.0001, "loss": 1.5382, "step": 5981 }, { "epoch": 0.6871518005858366, "grad_norm": 0.44039803743362427, "learning_rate": 0.0001, "loss": 1.6981, "step": 5982 }, { "epoch": 0.6872666704956637, "grad_norm": 0.44316425919532776, "learning_rate": 0.0001, "loss": 1.6761, "step": 5983 }, { "epoch": 0.6873815404054908, "grad_norm": 0.442717581987381, "learning_rate": 0.0001, "loss": 1.6243, "step": 5984 }, { "epoch": 0.6874964103153179, "grad_norm": 0.40507930517196655, "learning_rate": 0.0001, "loss": 1.5069, "step": 5985 }, { "epoch": 0.6876112802251451, "grad_norm": 0.4449567496776581, "learning_rate": 0.0001, "loss": 1.6059, "step": 5986 }, { "epoch": 0.6877261501349722, "grad_norm": 0.4152354300022125, "learning_rate": 0.0001, "loss": 1.5178, "step": 5987 }, { "epoch": 0.6878410200447993, "grad_norm": 0.4327658414840698, "learning_rate": 0.0001, "loss": 1.6211, "step": 5988 }, { "epoch": 0.6879558899546264, "grad_norm": 0.39523184299468994, "learning_rate": 0.0001, "loss": 1.4862, "step": 5989 }, { "epoch": 0.6880707598644535, "grad_norm": 0.4782007336616516, "learning_rate": 0.0001, "loss": 1.7516, "step": 5990 }, { "epoch": 0.6881856297742807, "grad_norm": 0.4494955539703369, "learning_rate": 0.0001, "loss": 1.5783, "step": 5991 }, { "epoch": 0.6883004996841078, "grad_norm": 0.44850456714630127, "learning_rate": 0.0001, "loss": 1.6185, "step": 5992 }, { "epoch": 0.6884153695939349, "grad_norm": 0.43033161759376526, "learning_rate": 0.0001, "loss": 1.4819, "step": 5993 }, { "epoch": 0.688530239503762, "grad_norm": 0.4402123987674713, "learning_rate": 0.0001, "loss": 1.6092, "step": 5994 }, { "epoch": 0.6886451094135891, "grad_norm": 0.41884496808052063, "learning_rate": 0.0001, "loss": 1.5384, "step": 5995 }, { "epoch": 0.6887599793234163, "grad_norm": 0.4290192127227783, "learning_rate": 0.0001, "loss": 1.6155, "step": 5996 }, { "epoch": 0.6888748492332434, "grad_norm": 0.5004449486732483, "learning_rate": 0.0001, "loss": 1.6538, "step": 5997 }, { "epoch": 0.6889897191430705, "grad_norm": 0.4282512068748474, "learning_rate": 0.0001, "loss": 1.554, "step": 5998 }, { "epoch": 0.6891045890528976, "grad_norm": 0.4622134566307068, "learning_rate": 0.0001, "loss": 1.5423, "step": 5999 }, { "epoch": 0.6892194589627247, "grad_norm": 0.43330010771751404, "learning_rate": 0.0001, "loss": 1.2861, "step": 6000 }, { "epoch": 0.6893343288725519, "grad_norm": 0.47380784153938293, "learning_rate": 0.0001, "loss": 1.5154, "step": 6001 }, { "epoch": 0.689449198782379, "grad_norm": 0.4909721314907074, "learning_rate": 0.0001, "loss": 1.6611, "step": 6002 }, { "epoch": 0.6895640686922061, "grad_norm": 0.444553405046463, "learning_rate": 0.0001, "loss": 1.5056, "step": 6003 }, { "epoch": 0.6896789386020332, "grad_norm": 0.4200032651424408, "learning_rate": 0.0001, "loss": 1.4871, "step": 6004 }, { "epoch": 0.6897938085118603, "grad_norm": 0.45052191615104675, "learning_rate": 0.0001, "loss": 1.6345, "step": 6005 }, { "epoch": 0.6899086784216875, "grad_norm": 0.44020742177963257, "learning_rate": 0.0001, "loss": 1.5327, "step": 6006 }, { "epoch": 0.6900235483315146, "grad_norm": 0.4479757249355316, "learning_rate": 0.0001, "loss": 1.652, "step": 6007 }, { "epoch": 0.6901384182413417, "grad_norm": 0.40676623582839966, "learning_rate": 0.0001, "loss": 1.4707, "step": 6008 }, { "epoch": 0.6902532881511688, "grad_norm": 0.44237807393074036, "learning_rate": 0.0001, "loss": 1.6429, "step": 6009 }, { "epoch": 0.6903681580609959, "grad_norm": 0.5069726705551147, "learning_rate": 0.0001, "loss": 1.819, "step": 6010 }, { "epoch": 0.6904830279708231, "grad_norm": 0.4417176842689514, "learning_rate": 0.0001, "loss": 1.5259, "step": 6011 }, { "epoch": 0.6905978978806502, "grad_norm": 0.4071502685546875, "learning_rate": 0.0001, "loss": 1.4589, "step": 6012 }, { "epoch": 0.6907127677904773, "grad_norm": 0.4580749273300171, "learning_rate": 0.0001, "loss": 1.4879, "step": 6013 }, { "epoch": 0.6908276377003044, "grad_norm": 0.4313982427120209, "learning_rate": 0.0001, "loss": 1.584, "step": 6014 }, { "epoch": 0.6909425076101315, "grad_norm": 0.4240618944168091, "learning_rate": 0.0001, "loss": 1.5827, "step": 6015 }, { "epoch": 0.6910573775199587, "grad_norm": 0.6877708435058594, "learning_rate": 0.0001, "loss": 1.4963, "step": 6016 }, { "epoch": 0.6911722474297858, "grad_norm": 0.4607912003993988, "learning_rate": 0.0001, "loss": 1.6725, "step": 6017 }, { "epoch": 0.6912871173396129, "grad_norm": 0.44218817353248596, "learning_rate": 0.0001, "loss": 1.7323, "step": 6018 }, { "epoch": 0.69140198724944, "grad_norm": 0.4411420226097107, "learning_rate": 0.0001, "loss": 1.6747, "step": 6019 }, { "epoch": 0.6915168571592671, "grad_norm": 0.4698505103588104, "learning_rate": 0.0001, "loss": 1.8326, "step": 6020 }, { "epoch": 0.6916317270690943, "grad_norm": 0.4248715937137604, "learning_rate": 0.0001, "loss": 1.7072, "step": 6021 }, { "epoch": 0.6917465969789214, "grad_norm": 0.4034425616264343, "learning_rate": 0.0001, "loss": 1.4084, "step": 6022 }, { "epoch": 0.6918614668887485, "grad_norm": 0.4377814829349518, "learning_rate": 0.0001, "loss": 1.5643, "step": 6023 }, { "epoch": 0.6919763367985756, "grad_norm": 0.4794885218143463, "learning_rate": 0.0001, "loss": 1.6453, "step": 6024 }, { "epoch": 0.6920912067084027, "grad_norm": 0.4388996958732605, "learning_rate": 0.0001, "loss": 1.6848, "step": 6025 }, { "epoch": 0.6922060766182299, "grad_norm": 0.3899931311607361, "learning_rate": 0.0001, "loss": 1.3496, "step": 6026 }, { "epoch": 0.692320946528057, "grad_norm": 0.49666503071784973, "learning_rate": 0.0001, "loss": 1.7286, "step": 6027 }, { "epoch": 0.6924358164378841, "grad_norm": 0.4482400119304657, "learning_rate": 0.0001, "loss": 1.6767, "step": 6028 }, { "epoch": 0.6925506863477112, "grad_norm": 0.41266587376594543, "learning_rate": 0.0001, "loss": 1.4796, "step": 6029 }, { "epoch": 0.6926655562575383, "grad_norm": 0.3807138502597809, "learning_rate": 0.0001, "loss": 1.34, "step": 6030 }, { "epoch": 0.6927804261673655, "grad_norm": 0.46382012963294983, "learning_rate": 0.0001, "loss": 1.5642, "step": 6031 }, { "epoch": 0.6928952960771926, "grad_norm": 0.45717358589172363, "learning_rate": 0.0001, "loss": 1.6184, "step": 6032 }, { "epoch": 0.6930101659870197, "grad_norm": 0.44662654399871826, "learning_rate": 0.0001, "loss": 1.7359, "step": 6033 }, { "epoch": 0.6931250358968468, "grad_norm": 0.4659220278263092, "learning_rate": 0.0001, "loss": 1.7403, "step": 6034 }, { "epoch": 0.6932399058066739, "grad_norm": 0.4265783727169037, "learning_rate": 0.0001, "loss": 1.6654, "step": 6035 }, { "epoch": 0.6933547757165011, "grad_norm": 0.45917513966560364, "learning_rate": 0.0001, "loss": 1.5656, "step": 6036 }, { "epoch": 0.6934696456263282, "grad_norm": 0.4457080066204071, "learning_rate": 0.0001, "loss": 1.6824, "step": 6037 }, { "epoch": 0.6935845155361553, "grad_norm": 0.428594172000885, "learning_rate": 0.0001, "loss": 1.4754, "step": 6038 }, { "epoch": 0.6936993854459824, "grad_norm": 0.4832080602645874, "learning_rate": 0.0001, "loss": 1.7493, "step": 6039 }, { "epoch": 0.6938142553558095, "grad_norm": 0.4549182653427124, "learning_rate": 0.0001, "loss": 1.5576, "step": 6040 }, { "epoch": 0.6939291252656367, "grad_norm": 0.46311667561531067, "learning_rate": 0.0001, "loss": 1.6486, "step": 6041 }, { "epoch": 0.6940439951754638, "grad_norm": 0.43765565752983093, "learning_rate": 0.0001, "loss": 1.5472, "step": 6042 }, { "epoch": 0.6941588650852909, "grad_norm": 0.4676961600780487, "learning_rate": 0.0001, "loss": 1.5775, "step": 6043 }, { "epoch": 0.694273734995118, "grad_norm": 0.48037439584732056, "learning_rate": 0.0001, "loss": 1.7783, "step": 6044 }, { "epoch": 0.6943886049049451, "grad_norm": 0.4451299011707306, "learning_rate": 0.0001, "loss": 1.5532, "step": 6045 }, { "epoch": 0.6945034748147723, "grad_norm": 0.4473123848438263, "learning_rate": 0.0001, "loss": 1.7079, "step": 6046 }, { "epoch": 0.6946183447245994, "grad_norm": 0.4191644489765167, "learning_rate": 0.0001, "loss": 1.4635, "step": 6047 }, { "epoch": 0.6947332146344265, "grad_norm": 0.5033878087997437, "learning_rate": 0.0001, "loss": 1.6531, "step": 6048 }, { "epoch": 0.6948480845442536, "grad_norm": 0.4745533764362335, "learning_rate": 0.0001, "loss": 1.6522, "step": 6049 }, { "epoch": 0.6949629544540807, "grad_norm": 0.44269341230392456, "learning_rate": 0.0001, "loss": 1.5863, "step": 6050 }, { "epoch": 0.6950778243639079, "grad_norm": 0.47186774015426636, "learning_rate": 0.0001, "loss": 1.6539, "step": 6051 }, { "epoch": 0.695192694273735, "grad_norm": 0.5083333849906921, "learning_rate": 0.0001, "loss": 1.7727, "step": 6052 }, { "epoch": 0.6953075641835621, "grad_norm": 0.4614897072315216, "learning_rate": 0.0001, "loss": 1.669, "step": 6053 }, { "epoch": 0.6954224340933892, "grad_norm": 0.47114160656929016, "learning_rate": 0.0001, "loss": 1.6241, "step": 6054 }, { "epoch": 0.6955373040032163, "grad_norm": 0.4435749650001526, "learning_rate": 0.0001, "loss": 1.5528, "step": 6055 }, { "epoch": 0.6956521739130435, "grad_norm": 0.4462308883666992, "learning_rate": 0.0001, "loss": 1.5747, "step": 6056 }, { "epoch": 0.6957670438228706, "grad_norm": 0.4508498013019562, "learning_rate": 0.0001, "loss": 1.772, "step": 6057 }, { "epoch": 0.6958819137326977, "grad_norm": 0.45102837681770325, "learning_rate": 0.0001, "loss": 1.5337, "step": 6058 }, { "epoch": 0.6959967836425248, "grad_norm": 0.427142471075058, "learning_rate": 0.0001, "loss": 1.5359, "step": 6059 }, { "epoch": 0.6961116535523519, "grad_norm": 0.44649988412857056, "learning_rate": 0.0001, "loss": 1.7092, "step": 6060 }, { "epoch": 0.6962265234621791, "grad_norm": 0.44798871874809265, "learning_rate": 0.0001, "loss": 1.6849, "step": 6061 }, { "epoch": 0.6963413933720062, "grad_norm": 0.5189127326011658, "learning_rate": 0.0001, "loss": 1.1203, "step": 6062 }, { "epoch": 0.6964562632818333, "grad_norm": 0.4362175762653351, "learning_rate": 0.0001, "loss": 1.6535, "step": 6063 }, { "epoch": 0.6965711331916604, "grad_norm": 0.4506964683532715, "learning_rate": 0.0001, "loss": 1.5871, "step": 6064 }, { "epoch": 0.6966860031014875, "grad_norm": 0.4647710621356964, "learning_rate": 0.0001, "loss": 1.5128, "step": 6065 }, { "epoch": 0.6968008730113147, "grad_norm": 0.43700385093688965, "learning_rate": 0.0001, "loss": 1.5093, "step": 6066 }, { "epoch": 0.6969157429211418, "grad_norm": 0.4622689187526703, "learning_rate": 0.0001, "loss": 1.6878, "step": 6067 }, { "epoch": 0.6970306128309689, "grad_norm": 0.4806528687477112, "learning_rate": 0.0001, "loss": 1.7705, "step": 6068 }, { "epoch": 0.697145482740796, "grad_norm": 0.4175126254558563, "learning_rate": 0.0001, "loss": 1.4511, "step": 6069 }, { "epoch": 0.6972603526506231, "grad_norm": 0.4509715735912323, "learning_rate": 0.0001, "loss": 1.5212, "step": 6070 }, { "epoch": 0.6973752225604503, "grad_norm": 0.4509661793708801, "learning_rate": 0.0001, "loss": 1.5706, "step": 6071 }, { "epoch": 0.6974900924702774, "grad_norm": 0.4647543728351593, "learning_rate": 0.0001, "loss": 1.7786, "step": 6072 }, { "epoch": 0.6976049623801045, "grad_norm": 0.4497321546077728, "learning_rate": 0.0001, "loss": 1.4881, "step": 6073 }, { "epoch": 0.6977198322899316, "grad_norm": 0.431309312582016, "learning_rate": 0.0001, "loss": 1.5486, "step": 6074 }, { "epoch": 0.6978347021997587, "grad_norm": 0.423828661441803, "learning_rate": 0.0001, "loss": 1.717, "step": 6075 }, { "epoch": 0.6979495721095859, "grad_norm": 0.4408908784389496, "learning_rate": 0.0001, "loss": 1.5664, "step": 6076 }, { "epoch": 0.698064442019413, "grad_norm": 0.4263383448123932, "learning_rate": 0.0001, "loss": 1.5551, "step": 6077 }, { "epoch": 0.6981793119292401, "grad_norm": 0.4382713735103607, "learning_rate": 0.0001, "loss": 1.6121, "step": 6078 }, { "epoch": 0.6982941818390672, "grad_norm": 0.4006011486053467, "learning_rate": 0.0001, "loss": 1.4744, "step": 6079 }, { "epoch": 0.6984090517488943, "grad_norm": 0.4670262038707733, "learning_rate": 0.0001, "loss": 1.7082, "step": 6080 }, { "epoch": 0.6985239216587215, "grad_norm": 0.46280163526535034, "learning_rate": 0.0001, "loss": 1.7092, "step": 6081 }, { "epoch": 0.6986387915685486, "grad_norm": 0.4739822745323181, "learning_rate": 0.0001, "loss": 1.6459, "step": 6082 }, { "epoch": 0.6987536614783757, "grad_norm": 0.44407156109809875, "learning_rate": 0.0001, "loss": 1.6641, "step": 6083 }, { "epoch": 0.6988685313882028, "grad_norm": 0.44577091932296753, "learning_rate": 0.0001, "loss": 1.5051, "step": 6084 }, { "epoch": 0.6989834012980299, "grad_norm": 0.465145468711853, "learning_rate": 0.0001, "loss": 1.5938, "step": 6085 }, { "epoch": 0.6990982712078571, "grad_norm": 0.47078871726989746, "learning_rate": 0.0001, "loss": 1.7108, "step": 6086 }, { "epoch": 0.6992131411176842, "grad_norm": 0.4897402226924896, "learning_rate": 0.0001, "loss": 1.5122, "step": 6087 }, { "epoch": 0.6993280110275113, "grad_norm": 0.46980687975883484, "learning_rate": 0.0001, "loss": 1.7254, "step": 6088 }, { "epoch": 0.6994428809373384, "grad_norm": 0.41509878635406494, "learning_rate": 0.0001, "loss": 1.4403, "step": 6089 }, { "epoch": 0.6995577508471655, "grad_norm": 0.3764444887638092, "learning_rate": 0.0001, "loss": 1.2123, "step": 6090 }, { "epoch": 0.6996726207569927, "grad_norm": 0.4393060803413391, "learning_rate": 0.0001, "loss": 1.4743, "step": 6091 }, { "epoch": 0.6997874906668198, "grad_norm": 0.45182573795318604, "learning_rate": 0.0001, "loss": 1.7701, "step": 6092 }, { "epoch": 0.6999023605766469, "grad_norm": 0.4436628818511963, "learning_rate": 0.0001, "loss": 1.6242, "step": 6093 }, { "epoch": 0.700017230486474, "grad_norm": 0.44743016362190247, "learning_rate": 0.0001, "loss": 1.7463, "step": 6094 }, { "epoch": 0.7001321003963011, "grad_norm": 0.4477095901966095, "learning_rate": 0.0001, "loss": 1.6296, "step": 6095 }, { "epoch": 0.7002469703061283, "grad_norm": 0.43921157717704773, "learning_rate": 0.0001, "loss": 1.6887, "step": 6096 }, { "epoch": 0.7003618402159554, "grad_norm": 0.4518073797225952, "learning_rate": 0.0001, "loss": 1.715, "step": 6097 }, { "epoch": 0.7004767101257825, "grad_norm": 0.4737764298915863, "learning_rate": 0.0001, "loss": 1.6202, "step": 6098 }, { "epoch": 0.7005915800356096, "grad_norm": 0.42427653074264526, "learning_rate": 0.0001, "loss": 1.4087, "step": 6099 }, { "epoch": 0.7007064499454367, "grad_norm": 0.42667144536972046, "learning_rate": 0.0001, "loss": 1.5881, "step": 6100 }, { "epoch": 0.700821319855264, "grad_norm": 0.49589815735816956, "learning_rate": 0.0001, "loss": 1.712, "step": 6101 }, { "epoch": 0.7009361897650911, "grad_norm": 0.43956196308135986, "learning_rate": 0.0001, "loss": 1.5506, "step": 6102 }, { "epoch": 0.7010510596749182, "grad_norm": 0.45342060923576355, "learning_rate": 0.0001, "loss": 1.6568, "step": 6103 }, { "epoch": 0.7011659295847453, "grad_norm": 0.4133893847465515, "learning_rate": 0.0001, "loss": 1.4956, "step": 6104 }, { "epoch": 0.7012807994945724, "grad_norm": 0.41374027729034424, "learning_rate": 0.0001, "loss": 1.6463, "step": 6105 }, { "epoch": 0.7013956694043996, "grad_norm": 0.44979679584503174, "learning_rate": 0.0001, "loss": 1.7201, "step": 6106 }, { "epoch": 0.7015105393142267, "grad_norm": 0.42173275351524353, "learning_rate": 0.0001, "loss": 1.6273, "step": 6107 }, { "epoch": 0.7016254092240538, "grad_norm": 0.4458094835281372, "learning_rate": 0.0001, "loss": 1.6467, "step": 6108 }, { "epoch": 0.7017402791338809, "grad_norm": 0.44119831919670105, "learning_rate": 0.0001, "loss": 1.7528, "step": 6109 }, { "epoch": 0.701855149043708, "grad_norm": 0.4866656959056854, "learning_rate": 0.0001, "loss": 1.6004, "step": 6110 }, { "epoch": 0.7019700189535352, "grad_norm": 0.43976983428001404, "learning_rate": 0.0001, "loss": 1.5994, "step": 6111 }, { "epoch": 0.7020848888633623, "grad_norm": 0.4584594666957855, "learning_rate": 0.0001, "loss": 1.5199, "step": 6112 }, { "epoch": 0.7021997587731894, "grad_norm": 0.4671233594417572, "learning_rate": 0.0001, "loss": 1.5643, "step": 6113 }, { "epoch": 0.7023146286830165, "grad_norm": 0.4047556519508362, "learning_rate": 0.0001, "loss": 1.3305, "step": 6114 }, { "epoch": 0.7024294985928436, "grad_norm": 0.44763243198394775, "learning_rate": 0.0001, "loss": 1.5656, "step": 6115 }, { "epoch": 0.7025443685026708, "grad_norm": 0.42392697930336, "learning_rate": 0.0001, "loss": 1.5303, "step": 6116 }, { "epoch": 0.7026592384124979, "grad_norm": 0.4174063205718994, "learning_rate": 0.0001, "loss": 1.4395, "step": 6117 }, { "epoch": 0.702774108322325, "grad_norm": 0.44011473655700684, "learning_rate": 0.0001, "loss": 1.568, "step": 6118 }, { "epoch": 0.7028889782321521, "grad_norm": 0.467358261346817, "learning_rate": 0.0001, "loss": 1.7407, "step": 6119 }, { "epoch": 0.7030038481419792, "grad_norm": 0.43859270215034485, "learning_rate": 0.0001, "loss": 1.6201, "step": 6120 }, { "epoch": 0.7031187180518064, "grad_norm": 0.45992812514305115, "learning_rate": 0.0001, "loss": 1.6784, "step": 6121 }, { "epoch": 0.7032335879616335, "grad_norm": 0.5027499198913574, "learning_rate": 0.0001, "loss": 1.8963, "step": 6122 }, { "epoch": 0.7033484578714606, "grad_norm": 0.4613763689994812, "learning_rate": 0.0001, "loss": 1.7305, "step": 6123 }, { "epoch": 0.7034633277812877, "grad_norm": 0.44609150290489197, "learning_rate": 0.0001, "loss": 1.593, "step": 6124 }, { "epoch": 0.7035781976911148, "grad_norm": 0.47288355231285095, "learning_rate": 0.0001, "loss": 1.5033, "step": 6125 }, { "epoch": 0.703693067600942, "grad_norm": 0.4981932044029236, "learning_rate": 0.0001, "loss": 1.6387, "step": 6126 }, { "epoch": 0.7038079375107691, "grad_norm": 0.43476441502571106, "learning_rate": 0.0001, "loss": 1.6783, "step": 6127 }, { "epoch": 0.7039228074205962, "grad_norm": 0.4434894323348999, "learning_rate": 0.0001, "loss": 1.5384, "step": 6128 }, { "epoch": 0.7040376773304233, "grad_norm": 0.4626059830188751, "learning_rate": 0.0001, "loss": 1.6981, "step": 6129 }, { "epoch": 0.7041525472402504, "grad_norm": 0.43429237604141235, "learning_rate": 0.0001, "loss": 1.6668, "step": 6130 }, { "epoch": 0.7042674171500776, "grad_norm": 0.4962638318538666, "learning_rate": 0.0001, "loss": 1.6528, "step": 6131 }, { "epoch": 0.7043822870599047, "grad_norm": 0.47368475794792175, "learning_rate": 0.0001, "loss": 1.7509, "step": 6132 }, { "epoch": 0.7044971569697318, "grad_norm": 0.44226646423339844, "learning_rate": 0.0001, "loss": 1.5321, "step": 6133 }, { "epoch": 0.7046120268795589, "grad_norm": 0.4369940757751465, "learning_rate": 0.0001, "loss": 1.6187, "step": 6134 }, { "epoch": 0.704726896789386, "grad_norm": 0.4563344120979309, "learning_rate": 0.0001, "loss": 1.7639, "step": 6135 }, { "epoch": 0.7048417666992132, "grad_norm": 0.4372788965702057, "learning_rate": 0.0001, "loss": 1.6908, "step": 6136 }, { "epoch": 0.7049566366090403, "grad_norm": 0.43250662088394165, "learning_rate": 0.0001, "loss": 1.6608, "step": 6137 }, { "epoch": 0.7050715065188674, "grad_norm": 0.43698424100875854, "learning_rate": 0.0001, "loss": 1.656, "step": 6138 }, { "epoch": 0.7051863764286945, "grad_norm": 0.4592958390712738, "learning_rate": 0.0001, "loss": 1.6603, "step": 6139 }, { "epoch": 0.7053012463385216, "grad_norm": 0.4927108883857727, "learning_rate": 0.0001, "loss": 1.6586, "step": 6140 }, { "epoch": 0.7054161162483488, "grad_norm": 0.41103577613830566, "learning_rate": 0.0001, "loss": 1.4693, "step": 6141 }, { "epoch": 0.7055309861581759, "grad_norm": 0.429688423871994, "learning_rate": 0.0001, "loss": 1.6462, "step": 6142 }, { "epoch": 0.705645856068003, "grad_norm": 0.4417214095592499, "learning_rate": 0.0001, "loss": 1.769, "step": 6143 }, { "epoch": 0.7057607259778301, "grad_norm": 0.4288574457168579, "learning_rate": 0.0001, "loss": 1.6017, "step": 6144 }, { "epoch": 0.7058755958876572, "grad_norm": 0.4483475089073181, "learning_rate": 0.0001, "loss": 1.6976, "step": 6145 }, { "epoch": 0.7059904657974844, "grad_norm": 0.44906046986579895, "learning_rate": 0.0001, "loss": 1.771, "step": 6146 }, { "epoch": 0.7061053357073115, "grad_norm": 0.43787476420402527, "learning_rate": 0.0001, "loss": 1.5868, "step": 6147 }, { "epoch": 0.7062202056171386, "grad_norm": 0.47539424896240234, "learning_rate": 0.0001, "loss": 1.5655, "step": 6148 }, { "epoch": 0.7063350755269657, "grad_norm": 0.4609004855155945, "learning_rate": 0.0001, "loss": 1.5423, "step": 6149 }, { "epoch": 0.7064499454367928, "grad_norm": 0.443278431892395, "learning_rate": 0.0001, "loss": 1.5154, "step": 6150 }, { "epoch": 0.70656481534662, "grad_norm": 0.43776434659957886, "learning_rate": 0.0001, "loss": 1.518, "step": 6151 }, { "epoch": 0.7066796852564471, "grad_norm": 0.4367440342903137, "learning_rate": 0.0001, "loss": 1.6544, "step": 6152 }, { "epoch": 0.7067945551662742, "grad_norm": 0.4293498694896698, "learning_rate": 0.0001, "loss": 1.6271, "step": 6153 }, { "epoch": 0.7069094250761013, "grad_norm": 0.46721354126930237, "learning_rate": 0.0001, "loss": 1.8351, "step": 6154 }, { "epoch": 0.7070242949859284, "grad_norm": 0.4610436260700226, "learning_rate": 0.0001, "loss": 1.6588, "step": 6155 }, { "epoch": 0.7071391648957556, "grad_norm": 0.44414547085762024, "learning_rate": 0.0001, "loss": 1.6412, "step": 6156 }, { "epoch": 0.7072540348055827, "grad_norm": 0.4219599962234497, "learning_rate": 0.0001, "loss": 1.5344, "step": 6157 }, { "epoch": 0.7073689047154098, "grad_norm": 0.45348840951919556, "learning_rate": 0.0001, "loss": 1.6561, "step": 6158 }, { "epoch": 0.7074837746252369, "grad_norm": 0.44765985012054443, "learning_rate": 0.0001, "loss": 1.7357, "step": 6159 }, { "epoch": 0.707598644535064, "grad_norm": 0.43484190106391907, "learning_rate": 0.0001, "loss": 1.6815, "step": 6160 }, { "epoch": 0.7077135144448912, "grad_norm": 0.44024690985679626, "learning_rate": 0.0001, "loss": 1.5954, "step": 6161 }, { "epoch": 0.7078283843547183, "grad_norm": 0.4765664339065552, "learning_rate": 0.0001, "loss": 1.6059, "step": 6162 }, { "epoch": 0.7079432542645454, "grad_norm": 0.4749075174331665, "learning_rate": 0.0001, "loss": 1.6469, "step": 6163 }, { "epoch": 0.7080581241743725, "grad_norm": 0.47169381380081177, "learning_rate": 0.0001, "loss": 1.7209, "step": 6164 }, { "epoch": 0.7081729940841996, "grad_norm": 0.45661336183547974, "learning_rate": 0.0001, "loss": 1.573, "step": 6165 }, { "epoch": 0.7082878639940268, "grad_norm": 0.45333951711654663, "learning_rate": 0.0001, "loss": 1.6396, "step": 6166 }, { "epoch": 0.7084027339038539, "grad_norm": 0.4652560353279114, "learning_rate": 0.0001, "loss": 1.7598, "step": 6167 }, { "epoch": 0.708517603813681, "grad_norm": 0.46927064657211304, "learning_rate": 0.0001, "loss": 1.782, "step": 6168 }, { "epoch": 0.7086324737235081, "grad_norm": 0.44694873690605164, "learning_rate": 0.0001, "loss": 1.7146, "step": 6169 }, { "epoch": 0.7087473436333352, "grad_norm": 0.4289446473121643, "learning_rate": 0.0001, "loss": 1.6457, "step": 6170 }, { "epoch": 0.7088622135431624, "grad_norm": 0.41569510102272034, "learning_rate": 0.0001, "loss": 1.4681, "step": 6171 }, { "epoch": 0.7089770834529895, "grad_norm": 0.42229345440864563, "learning_rate": 0.0001, "loss": 1.6183, "step": 6172 }, { "epoch": 0.7090919533628166, "grad_norm": 0.4784461557865143, "learning_rate": 0.0001, "loss": 1.727, "step": 6173 }, { "epoch": 0.7092068232726437, "grad_norm": 0.40896356105804443, "learning_rate": 0.0001, "loss": 1.3902, "step": 6174 }, { "epoch": 0.7093216931824708, "grad_norm": 0.4578010141849518, "learning_rate": 0.0001, "loss": 1.5634, "step": 6175 }, { "epoch": 0.709436563092298, "grad_norm": 0.4804964065551758, "learning_rate": 0.0001, "loss": 1.5759, "step": 6176 }, { "epoch": 0.7095514330021251, "grad_norm": 0.4631587266921997, "learning_rate": 0.0001, "loss": 1.6562, "step": 6177 }, { "epoch": 0.7096663029119522, "grad_norm": 0.4732271134853363, "learning_rate": 0.0001, "loss": 1.7075, "step": 6178 }, { "epoch": 0.7097811728217793, "grad_norm": 0.4341810643672943, "learning_rate": 0.0001, "loss": 1.6526, "step": 6179 }, { "epoch": 0.7098960427316064, "grad_norm": 0.4346090853214264, "learning_rate": 0.0001, "loss": 1.6048, "step": 6180 }, { "epoch": 0.7100109126414336, "grad_norm": 0.4299982190132141, "learning_rate": 0.0001, "loss": 1.4397, "step": 6181 }, { "epoch": 0.7101257825512607, "grad_norm": 0.4570772647857666, "learning_rate": 0.0001, "loss": 1.7246, "step": 6182 }, { "epoch": 0.7102406524610878, "grad_norm": 0.44084087014198303, "learning_rate": 0.0001, "loss": 1.664, "step": 6183 }, { "epoch": 0.7103555223709149, "grad_norm": 0.4548969566822052, "learning_rate": 0.0001, "loss": 1.707, "step": 6184 }, { "epoch": 0.710470392280742, "grad_norm": 0.40112459659576416, "learning_rate": 0.0001, "loss": 1.4677, "step": 6185 }, { "epoch": 0.7105852621905692, "grad_norm": 0.4894128143787384, "learning_rate": 0.0001, "loss": 1.7654, "step": 6186 }, { "epoch": 0.7107001321003963, "grad_norm": 0.46307554841041565, "learning_rate": 0.0001, "loss": 1.731, "step": 6187 }, { "epoch": 0.7108150020102234, "grad_norm": 0.44500499963760376, "learning_rate": 0.0001, "loss": 1.4718, "step": 6188 }, { "epoch": 0.7109298719200505, "grad_norm": 0.46841320395469666, "learning_rate": 0.0001, "loss": 1.7267, "step": 6189 }, { "epoch": 0.7110447418298776, "grad_norm": 0.452532023191452, "learning_rate": 0.0001, "loss": 1.567, "step": 6190 }, { "epoch": 0.7111596117397048, "grad_norm": 0.4364396929740906, "learning_rate": 0.0001, "loss": 1.4916, "step": 6191 }, { "epoch": 0.7112744816495319, "grad_norm": 0.46478208899497986, "learning_rate": 0.0001, "loss": 1.7538, "step": 6192 }, { "epoch": 0.711389351559359, "grad_norm": 0.5013893246650696, "learning_rate": 0.0001, "loss": 1.9132, "step": 6193 }, { "epoch": 0.7115042214691861, "grad_norm": 0.4351854622364044, "learning_rate": 0.0001, "loss": 1.5114, "step": 6194 }, { "epoch": 0.7116190913790132, "grad_norm": 0.4540832042694092, "learning_rate": 0.0001, "loss": 1.782, "step": 6195 }, { "epoch": 0.7117339612888404, "grad_norm": 0.444970041513443, "learning_rate": 0.0001, "loss": 1.5446, "step": 6196 }, { "epoch": 0.7118488311986675, "grad_norm": 0.4413135349750519, "learning_rate": 0.0001, "loss": 1.4519, "step": 6197 }, { "epoch": 0.7119637011084946, "grad_norm": 0.4258001148700714, "learning_rate": 0.0001, "loss": 1.6815, "step": 6198 }, { "epoch": 0.7120785710183217, "grad_norm": 0.4716213047504425, "learning_rate": 0.0001, "loss": 1.7128, "step": 6199 }, { "epoch": 0.7121934409281488, "grad_norm": 0.4352003335952759, "learning_rate": 0.0001, "loss": 1.6439, "step": 6200 }, { "epoch": 0.712308310837976, "grad_norm": 0.43441304564476013, "learning_rate": 0.0001, "loss": 1.5772, "step": 6201 }, { "epoch": 0.7124231807478031, "grad_norm": 0.4781912565231323, "learning_rate": 0.0001, "loss": 1.685, "step": 6202 }, { "epoch": 0.7125380506576302, "grad_norm": 0.4544585347175598, "learning_rate": 0.0001, "loss": 1.7234, "step": 6203 }, { "epoch": 0.7126529205674573, "grad_norm": 0.398957222700119, "learning_rate": 0.0001, "loss": 1.431, "step": 6204 }, { "epoch": 0.7127677904772844, "grad_norm": 0.44276830554008484, "learning_rate": 0.0001, "loss": 1.4976, "step": 6205 }, { "epoch": 0.7128826603871116, "grad_norm": 0.44275638461112976, "learning_rate": 0.0001, "loss": 1.5749, "step": 6206 }, { "epoch": 0.7129975302969387, "grad_norm": 0.5160487294197083, "learning_rate": 0.0001, "loss": 1.6689, "step": 6207 }, { "epoch": 0.7131124002067658, "grad_norm": 0.44017454981803894, "learning_rate": 0.0001, "loss": 1.6119, "step": 6208 }, { "epoch": 0.7132272701165929, "grad_norm": 0.4619143307209015, "learning_rate": 0.0001, "loss": 1.8167, "step": 6209 }, { "epoch": 0.71334214002642, "grad_norm": 0.4661194086074829, "learning_rate": 0.0001, "loss": 1.5478, "step": 6210 }, { "epoch": 0.7134570099362472, "grad_norm": 0.4605958163738251, "learning_rate": 0.0001, "loss": 1.7684, "step": 6211 }, { "epoch": 0.7135718798460743, "grad_norm": 0.463900089263916, "learning_rate": 0.0001, "loss": 1.7286, "step": 6212 }, { "epoch": 0.7136867497559014, "grad_norm": 0.48637497425079346, "learning_rate": 0.0001, "loss": 1.8437, "step": 6213 }, { "epoch": 0.7138016196657285, "grad_norm": 0.40677204728126526, "learning_rate": 0.0001, "loss": 1.4627, "step": 6214 }, { "epoch": 0.7139164895755556, "grad_norm": 0.5679096579551697, "learning_rate": 0.0001, "loss": 1.4847, "step": 6215 }, { "epoch": 0.7140313594853828, "grad_norm": 0.4575541019439697, "learning_rate": 0.0001, "loss": 1.7186, "step": 6216 }, { "epoch": 0.7141462293952099, "grad_norm": 0.4405764043331146, "learning_rate": 0.0001, "loss": 1.7805, "step": 6217 }, { "epoch": 0.714261099305037, "grad_norm": 0.4583107531070709, "learning_rate": 0.0001, "loss": 1.625, "step": 6218 }, { "epoch": 0.7143759692148641, "grad_norm": 0.44853004813194275, "learning_rate": 0.0001, "loss": 1.4672, "step": 6219 }, { "epoch": 0.7144908391246912, "grad_norm": 0.4117748439311981, "learning_rate": 0.0001, "loss": 1.4321, "step": 6220 }, { "epoch": 0.7146057090345184, "grad_norm": 0.43110406398773193, "learning_rate": 0.0001, "loss": 1.483, "step": 6221 }, { "epoch": 0.7147205789443455, "grad_norm": 0.46626466512680054, "learning_rate": 0.0001, "loss": 1.4815, "step": 6222 }, { "epoch": 0.7148354488541726, "grad_norm": 0.4862065017223358, "learning_rate": 0.0001, "loss": 1.6176, "step": 6223 }, { "epoch": 0.7149503187639997, "grad_norm": 0.4461599588394165, "learning_rate": 0.0001, "loss": 1.4846, "step": 6224 }, { "epoch": 0.7150651886738268, "grad_norm": 0.4415801763534546, "learning_rate": 0.0001, "loss": 1.7068, "step": 6225 }, { "epoch": 0.715180058583654, "grad_norm": 0.4292342960834503, "learning_rate": 0.0001, "loss": 1.5751, "step": 6226 }, { "epoch": 0.7152949284934811, "grad_norm": 0.4549332857131958, "learning_rate": 0.0001, "loss": 1.699, "step": 6227 }, { "epoch": 0.7154097984033082, "grad_norm": 0.4223959147930145, "learning_rate": 0.0001, "loss": 1.4203, "step": 6228 }, { "epoch": 0.7155246683131353, "grad_norm": 0.42485642433166504, "learning_rate": 0.0001, "loss": 1.5133, "step": 6229 }, { "epoch": 0.7156395382229624, "grad_norm": 0.45224228501319885, "learning_rate": 0.0001, "loss": 1.4644, "step": 6230 }, { "epoch": 0.7157544081327896, "grad_norm": 0.4319050908088684, "learning_rate": 0.0001, "loss": 1.4666, "step": 6231 }, { "epoch": 0.7158692780426167, "grad_norm": 0.4529288113117218, "learning_rate": 0.0001, "loss": 1.7083, "step": 6232 }, { "epoch": 0.7159841479524438, "grad_norm": 0.4431191682815552, "learning_rate": 0.0001, "loss": 1.5841, "step": 6233 }, { "epoch": 0.7160990178622709, "grad_norm": 0.44510093331336975, "learning_rate": 0.0001, "loss": 1.54, "step": 6234 }, { "epoch": 0.716213887772098, "grad_norm": 0.43626534938812256, "learning_rate": 0.0001, "loss": 1.5614, "step": 6235 }, { "epoch": 0.7163287576819252, "grad_norm": 0.4344061017036438, "learning_rate": 0.0001, "loss": 1.3115, "step": 6236 }, { "epoch": 0.7164436275917523, "grad_norm": 0.5267593860626221, "learning_rate": 0.0001, "loss": 1.9086, "step": 6237 }, { "epoch": 0.7165584975015795, "grad_norm": 0.4149724245071411, "learning_rate": 0.0001, "loss": 1.6038, "step": 6238 }, { "epoch": 0.7166733674114066, "grad_norm": 0.4510195255279541, "learning_rate": 0.0001, "loss": 1.8163, "step": 6239 }, { "epoch": 0.7167882373212338, "grad_norm": 0.4458303451538086, "learning_rate": 0.0001, "loss": 1.5324, "step": 6240 }, { "epoch": 0.7169031072310609, "grad_norm": 0.4366897642612457, "learning_rate": 0.0001, "loss": 1.6088, "step": 6241 }, { "epoch": 0.717017977140888, "grad_norm": 0.43107160925865173, "learning_rate": 0.0001, "loss": 1.5355, "step": 6242 }, { "epoch": 0.7171328470507151, "grad_norm": 0.43756523728370667, "learning_rate": 0.0001, "loss": 1.5967, "step": 6243 }, { "epoch": 0.7172477169605422, "grad_norm": 0.4852414131164551, "learning_rate": 0.0001, "loss": 1.8807, "step": 6244 }, { "epoch": 0.7173625868703694, "grad_norm": 0.43804264068603516, "learning_rate": 0.0001, "loss": 1.551, "step": 6245 }, { "epoch": 0.7174774567801965, "grad_norm": 0.4281727075576782, "learning_rate": 0.0001, "loss": 1.551, "step": 6246 }, { "epoch": 0.7175923266900236, "grad_norm": 0.45637455582618713, "learning_rate": 0.0001, "loss": 1.6597, "step": 6247 }, { "epoch": 0.7177071965998507, "grad_norm": 0.48942601680755615, "learning_rate": 0.0001, "loss": 1.7886, "step": 6248 }, { "epoch": 0.7178220665096778, "grad_norm": 0.44699156284332275, "learning_rate": 0.0001, "loss": 1.6049, "step": 6249 }, { "epoch": 0.717936936419505, "grad_norm": 0.45388197898864746, "learning_rate": 0.0001, "loss": 1.5923, "step": 6250 }, { "epoch": 0.7180518063293321, "grad_norm": 0.4573292136192322, "learning_rate": 0.0001, "loss": 1.3832, "step": 6251 }, { "epoch": 0.7181666762391592, "grad_norm": 0.4720889925956726, "learning_rate": 0.0001, "loss": 1.698, "step": 6252 }, { "epoch": 0.7182815461489863, "grad_norm": 0.44124001264572144, "learning_rate": 0.0001, "loss": 1.468, "step": 6253 }, { "epoch": 0.7183964160588134, "grad_norm": 0.48664864897727966, "learning_rate": 0.0001, "loss": 1.6452, "step": 6254 }, { "epoch": 0.7185112859686406, "grad_norm": 0.44402679800987244, "learning_rate": 0.0001, "loss": 1.5786, "step": 6255 }, { "epoch": 0.7186261558784677, "grad_norm": 0.46472397446632385, "learning_rate": 0.0001, "loss": 1.4484, "step": 6256 }, { "epoch": 0.7187410257882948, "grad_norm": 0.49754804372787476, "learning_rate": 0.0001, "loss": 1.9748, "step": 6257 }, { "epoch": 0.7188558956981219, "grad_norm": 0.439335435628891, "learning_rate": 0.0001, "loss": 1.695, "step": 6258 }, { "epoch": 0.718970765607949, "grad_norm": 0.4374498426914215, "learning_rate": 0.0001, "loss": 1.5566, "step": 6259 }, { "epoch": 0.7190856355177762, "grad_norm": 0.4089290201663971, "learning_rate": 0.0001, "loss": 1.4817, "step": 6260 }, { "epoch": 0.7192005054276033, "grad_norm": 0.4366007447242737, "learning_rate": 0.0001, "loss": 1.5968, "step": 6261 }, { "epoch": 0.7193153753374304, "grad_norm": 0.44272562861442566, "learning_rate": 0.0001, "loss": 1.4592, "step": 6262 }, { "epoch": 0.7194302452472575, "grad_norm": 0.4497889280319214, "learning_rate": 0.0001, "loss": 1.6329, "step": 6263 }, { "epoch": 0.7195451151570846, "grad_norm": 0.43780991435050964, "learning_rate": 0.0001, "loss": 1.5423, "step": 6264 }, { "epoch": 0.7196599850669118, "grad_norm": 0.4534786343574524, "learning_rate": 0.0001, "loss": 1.7093, "step": 6265 }, { "epoch": 0.7197748549767389, "grad_norm": 0.42275941371917725, "learning_rate": 0.0001, "loss": 1.4069, "step": 6266 }, { "epoch": 0.719889724886566, "grad_norm": 0.47264930605888367, "learning_rate": 0.0001, "loss": 1.6001, "step": 6267 }, { "epoch": 0.7200045947963931, "grad_norm": 0.4754311740398407, "learning_rate": 0.0001, "loss": 1.8423, "step": 6268 }, { "epoch": 0.7201194647062202, "grad_norm": 0.4460528492927551, "learning_rate": 0.0001, "loss": 1.4606, "step": 6269 }, { "epoch": 0.7202343346160474, "grad_norm": 0.43937763571739197, "learning_rate": 0.0001, "loss": 1.5779, "step": 6270 }, { "epoch": 0.7203492045258745, "grad_norm": 0.4385170638561249, "learning_rate": 0.0001, "loss": 1.6334, "step": 6271 }, { "epoch": 0.7204640744357016, "grad_norm": 0.4786663353443146, "learning_rate": 0.0001, "loss": 1.3311, "step": 6272 }, { "epoch": 0.7205789443455287, "grad_norm": 0.45746463537216187, "learning_rate": 0.0001, "loss": 1.6661, "step": 6273 }, { "epoch": 0.7206938142553558, "grad_norm": 0.4555452764034271, "learning_rate": 0.0001, "loss": 1.6727, "step": 6274 }, { "epoch": 0.720808684165183, "grad_norm": 0.4532817006111145, "learning_rate": 0.0001, "loss": 1.5359, "step": 6275 }, { "epoch": 0.7209235540750101, "grad_norm": 0.42538461089134216, "learning_rate": 0.0001, "loss": 1.5689, "step": 6276 }, { "epoch": 0.7210384239848372, "grad_norm": 0.4184345006942749, "learning_rate": 0.0001, "loss": 1.442, "step": 6277 }, { "epoch": 0.7211532938946643, "grad_norm": 0.4728604853153229, "learning_rate": 0.0001, "loss": 1.7058, "step": 6278 }, { "epoch": 0.7212681638044914, "grad_norm": 0.45347052812576294, "learning_rate": 0.0001, "loss": 1.4433, "step": 6279 }, { "epoch": 0.7213830337143186, "grad_norm": 0.4866488575935364, "learning_rate": 0.0001, "loss": 1.7027, "step": 6280 }, { "epoch": 0.7214979036241457, "grad_norm": 0.48550790548324585, "learning_rate": 0.0001, "loss": 1.6111, "step": 6281 }, { "epoch": 0.7216127735339728, "grad_norm": 0.45319709181785583, "learning_rate": 0.0001, "loss": 1.5712, "step": 6282 }, { "epoch": 0.7217276434437999, "grad_norm": 0.49187415838241577, "learning_rate": 0.0001, "loss": 1.7311, "step": 6283 }, { "epoch": 0.721842513353627, "grad_norm": 0.4277302324771881, "learning_rate": 0.0001, "loss": 1.5716, "step": 6284 }, { "epoch": 0.7219573832634542, "grad_norm": 0.4738651216030121, "learning_rate": 0.0001, "loss": 1.6308, "step": 6285 }, { "epoch": 0.7220722531732813, "grad_norm": 0.4568105638027191, "learning_rate": 0.0001, "loss": 1.5111, "step": 6286 }, { "epoch": 0.7221871230831084, "grad_norm": 0.41216906905174255, "learning_rate": 0.0001, "loss": 1.4644, "step": 6287 }, { "epoch": 0.7223019929929355, "grad_norm": 0.4425976574420929, "learning_rate": 0.0001, "loss": 1.6404, "step": 6288 }, { "epoch": 0.7224168629027626, "grad_norm": 0.42881807684898376, "learning_rate": 0.0001, "loss": 1.6171, "step": 6289 }, { "epoch": 0.7225317328125898, "grad_norm": 0.41956013441085815, "learning_rate": 0.0001, "loss": 1.4663, "step": 6290 }, { "epoch": 0.7226466027224169, "grad_norm": 0.46805068850517273, "learning_rate": 0.0001, "loss": 1.727, "step": 6291 }, { "epoch": 0.722761472632244, "grad_norm": 0.432712197303772, "learning_rate": 0.0001, "loss": 1.4838, "step": 6292 }, { "epoch": 0.7228763425420711, "grad_norm": 0.4402911067008972, "learning_rate": 0.0001, "loss": 1.7633, "step": 6293 }, { "epoch": 0.7229912124518982, "grad_norm": 0.44642138481140137, "learning_rate": 0.0001, "loss": 1.5206, "step": 6294 }, { "epoch": 0.7231060823617254, "grad_norm": 0.43020543456077576, "learning_rate": 0.0001, "loss": 1.7095, "step": 6295 }, { "epoch": 0.7232209522715525, "grad_norm": 0.4489821195602417, "learning_rate": 0.0001, "loss": 1.5012, "step": 6296 }, { "epoch": 0.7233358221813796, "grad_norm": 0.427055299282074, "learning_rate": 0.0001, "loss": 1.5981, "step": 6297 }, { "epoch": 0.7234506920912067, "grad_norm": 0.41614022850990295, "learning_rate": 0.0001, "loss": 1.3573, "step": 6298 }, { "epoch": 0.7235655620010338, "grad_norm": 0.4344491958618164, "learning_rate": 0.0001, "loss": 1.6139, "step": 6299 }, { "epoch": 0.723680431910861, "grad_norm": 0.43843087553977966, "learning_rate": 0.0001, "loss": 1.1252, "step": 6300 }, { "epoch": 0.7237953018206881, "grad_norm": 0.4700905680656433, "learning_rate": 0.0001, "loss": 1.48, "step": 6301 }, { "epoch": 0.7239101717305152, "grad_norm": 0.48474234342575073, "learning_rate": 0.0001, "loss": 1.652, "step": 6302 }, { "epoch": 0.7240250416403423, "grad_norm": 0.44834601879119873, "learning_rate": 0.0001, "loss": 1.3831, "step": 6303 }, { "epoch": 0.7241399115501694, "grad_norm": 0.4446815252304077, "learning_rate": 0.0001, "loss": 1.6446, "step": 6304 }, { "epoch": 0.7242547814599966, "grad_norm": 0.4817836880683899, "learning_rate": 0.0001, "loss": 1.5911, "step": 6305 }, { "epoch": 0.7243696513698237, "grad_norm": 0.5483472347259521, "learning_rate": 0.0001, "loss": 1.6487, "step": 6306 }, { "epoch": 0.7244845212796508, "grad_norm": 0.42818188667297363, "learning_rate": 0.0001, "loss": 1.5687, "step": 6307 }, { "epoch": 0.7245993911894779, "grad_norm": 0.49449649453163147, "learning_rate": 0.0001, "loss": 1.5372, "step": 6308 }, { "epoch": 0.724714261099305, "grad_norm": 0.4369608163833618, "learning_rate": 0.0001, "loss": 1.5778, "step": 6309 }, { "epoch": 0.7248291310091322, "grad_norm": 0.5256768465042114, "learning_rate": 0.0001, "loss": 1.583, "step": 6310 }, { "epoch": 0.7249440009189593, "grad_norm": 0.44161832332611084, "learning_rate": 0.0001, "loss": 1.5239, "step": 6311 }, { "epoch": 0.7250588708287864, "grad_norm": 0.4450240731239319, "learning_rate": 0.0001, "loss": 1.3923, "step": 6312 }, { "epoch": 0.7251737407386135, "grad_norm": 0.45049697160720825, "learning_rate": 0.0001, "loss": 1.6166, "step": 6313 }, { "epoch": 0.7252886106484406, "grad_norm": 0.43609675765037537, "learning_rate": 0.0001, "loss": 1.7061, "step": 6314 }, { "epoch": 0.7254034805582678, "grad_norm": 0.516049325466156, "learning_rate": 0.0001, "loss": 1.7453, "step": 6315 }, { "epoch": 0.7255183504680949, "grad_norm": 0.45927536487579346, "learning_rate": 0.0001, "loss": 1.5909, "step": 6316 }, { "epoch": 0.725633220377922, "grad_norm": 0.4337046146392822, "learning_rate": 0.0001, "loss": 1.6932, "step": 6317 }, { "epoch": 0.7257480902877491, "grad_norm": 0.44162049889564514, "learning_rate": 0.0001, "loss": 1.541, "step": 6318 }, { "epoch": 0.7258629601975762, "grad_norm": 0.4465755224227905, "learning_rate": 0.0001, "loss": 1.5295, "step": 6319 }, { "epoch": 0.7259778301074034, "grad_norm": 0.4506109654903412, "learning_rate": 0.0001, "loss": 1.6735, "step": 6320 }, { "epoch": 0.7260927000172305, "grad_norm": 0.45097821950912476, "learning_rate": 0.0001, "loss": 1.595, "step": 6321 }, { "epoch": 0.7262075699270576, "grad_norm": 0.4603080749511719, "learning_rate": 0.0001, "loss": 1.7865, "step": 6322 }, { "epoch": 0.7263224398368847, "grad_norm": 0.4264637231826782, "learning_rate": 0.0001, "loss": 1.7495, "step": 6323 }, { "epoch": 0.7264373097467118, "grad_norm": 0.4358821213245392, "learning_rate": 0.0001, "loss": 1.7046, "step": 6324 }, { "epoch": 0.726552179656539, "grad_norm": 0.4452092945575714, "learning_rate": 0.0001, "loss": 1.6058, "step": 6325 }, { "epoch": 0.7266670495663661, "grad_norm": 0.5116655230522156, "learning_rate": 0.0001, "loss": 1.7254, "step": 6326 }, { "epoch": 0.7267819194761932, "grad_norm": 0.4238274097442627, "learning_rate": 0.0001, "loss": 1.4587, "step": 6327 }, { "epoch": 0.7268967893860203, "grad_norm": 0.4614119231700897, "learning_rate": 0.0001, "loss": 1.7239, "step": 6328 }, { "epoch": 0.7270116592958474, "grad_norm": 0.47386687994003296, "learning_rate": 0.0001, "loss": 1.6542, "step": 6329 }, { "epoch": 0.7271265292056746, "grad_norm": 0.4244793653488159, "learning_rate": 0.0001, "loss": 1.5649, "step": 6330 }, { "epoch": 0.7272413991155017, "grad_norm": 0.44631174206733704, "learning_rate": 0.0001, "loss": 1.4767, "step": 6331 }, { "epoch": 0.7273562690253288, "grad_norm": 0.4230101406574249, "learning_rate": 0.0001, "loss": 1.4192, "step": 6332 }, { "epoch": 0.7274711389351559, "grad_norm": 0.42536047101020813, "learning_rate": 0.0001, "loss": 1.5629, "step": 6333 }, { "epoch": 0.727586008844983, "grad_norm": 0.45973941683769226, "learning_rate": 0.0001, "loss": 1.7336, "step": 6334 }, { "epoch": 0.7277008787548102, "grad_norm": 0.4620080888271332, "learning_rate": 0.0001, "loss": 1.6115, "step": 6335 }, { "epoch": 0.7278157486646373, "grad_norm": 0.5099964737892151, "learning_rate": 0.0001, "loss": 1.5348, "step": 6336 }, { "epoch": 0.7279306185744644, "grad_norm": 0.4300444722175598, "learning_rate": 0.0001, "loss": 1.3811, "step": 6337 }, { "epoch": 0.7280454884842915, "grad_norm": 0.5135782957077026, "learning_rate": 0.0001, "loss": 1.8914, "step": 6338 }, { "epoch": 0.7281603583941186, "grad_norm": 0.4119094908237457, "learning_rate": 0.0001, "loss": 1.5381, "step": 6339 }, { "epoch": 0.7282752283039458, "grad_norm": 0.4423353374004364, "learning_rate": 0.0001, "loss": 1.6617, "step": 6340 }, { "epoch": 0.7283900982137729, "grad_norm": 0.4462561011314392, "learning_rate": 0.0001, "loss": 1.6236, "step": 6341 }, { "epoch": 0.7285049681236, "grad_norm": 0.47333210706710815, "learning_rate": 0.0001, "loss": 1.6761, "step": 6342 }, { "epoch": 0.7286198380334271, "grad_norm": 0.46242082118988037, "learning_rate": 0.0001, "loss": 1.5732, "step": 6343 }, { "epoch": 0.7287347079432542, "grad_norm": 0.4549300968647003, "learning_rate": 0.0001, "loss": 1.6149, "step": 6344 }, { "epoch": 0.7288495778530814, "grad_norm": 0.46361663937568665, "learning_rate": 0.0001, "loss": 1.7257, "step": 6345 }, { "epoch": 0.7289644477629085, "grad_norm": 0.4788486361503601, "learning_rate": 0.0001, "loss": 1.6031, "step": 6346 }, { "epoch": 0.7290793176727356, "grad_norm": 0.4863881766796112, "learning_rate": 0.0001, "loss": 1.8181, "step": 6347 }, { "epoch": 0.7291941875825627, "grad_norm": 0.4203285276889801, "learning_rate": 0.0001, "loss": 1.5597, "step": 6348 }, { "epoch": 0.7293090574923898, "grad_norm": 0.4476950168609619, "learning_rate": 0.0001, "loss": 1.5288, "step": 6349 }, { "epoch": 0.729423927402217, "grad_norm": 0.4235599935054779, "learning_rate": 0.0001, "loss": 1.3824, "step": 6350 }, { "epoch": 0.7295387973120441, "grad_norm": 0.4462408423423767, "learning_rate": 0.0001, "loss": 1.5426, "step": 6351 }, { "epoch": 0.7296536672218712, "grad_norm": 0.40480706095695496, "learning_rate": 0.0001, "loss": 1.4957, "step": 6352 }, { "epoch": 0.7297685371316983, "grad_norm": 0.4449676275253296, "learning_rate": 0.0001, "loss": 1.5833, "step": 6353 }, { "epoch": 0.7298834070415254, "grad_norm": 0.43793755769729614, "learning_rate": 0.0001, "loss": 1.5997, "step": 6354 }, { "epoch": 0.7299982769513526, "grad_norm": 0.45420295000076294, "learning_rate": 0.0001, "loss": 1.7223, "step": 6355 }, { "epoch": 0.7301131468611797, "grad_norm": 0.5009284615516663, "learning_rate": 0.0001, "loss": 1.5439, "step": 6356 }, { "epoch": 0.7302280167710068, "grad_norm": 0.5149348974227905, "learning_rate": 0.0001, "loss": 1.5916, "step": 6357 }, { "epoch": 0.7303428866808339, "grad_norm": 0.4348343014717102, "learning_rate": 0.0001, "loss": 1.6413, "step": 6358 }, { "epoch": 0.730457756590661, "grad_norm": 0.44643691182136536, "learning_rate": 0.0001, "loss": 1.4579, "step": 6359 }, { "epoch": 0.7305726265004882, "grad_norm": 0.42023977637290955, "learning_rate": 0.0001, "loss": 1.5319, "step": 6360 }, { "epoch": 0.7306874964103153, "grad_norm": 0.49339669942855835, "learning_rate": 0.0001, "loss": 1.7135, "step": 6361 }, { "epoch": 0.7308023663201424, "grad_norm": 0.44311895966529846, "learning_rate": 0.0001, "loss": 1.5733, "step": 6362 }, { "epoch": 0.7309172362299695, "grad_norm": 0.4121879041194916, "learning_rate": 0.0001, "loss": 1.3989, "step": 6363 }, { "epoch": 0.7310321061397966, "grad_norm": 0.43048325181007385, "learning_rate": 0.0001, "loss": 1.6959, "step": 6364 }, { "epoch": 0.7311469760496238, "grad_norm": 0.4717782735824585, "learning_rate": 0.0001, "loss": 1.6826, "step": 6365 }, { "epoch": 0.7312618459594509, "grad_norm": 0.46323728561401367, "learning_rate": 0.0001, "loss": 1.6805, "step": 6366 }, { "epoch": 0.731376715869278, "grad_norm": 0.4346836805343628, "learning_rate": 0.0001, "loss": 1.5547, "step": 6367 }, { "epoch": 0.7314915857791051, "grad_norm": 0.47042396664619446, "learning_rate": 0.0001, "loss": 1.5663, "step": 6368 }, { "epoch": 0.7316064556889322, "grad_norm": 0.44026100635528564, "learning_rate": 0.0001, "loss": 1.618, "step": 6369 }, { "epoch": 0.7317213255987594, "grad_norm": 0.428864449262619, "learning_rate": 0.0001, "loss": 1.4316, "step": 6370 }, { "epoch": 0.7318361955085865, "grad_norm": 0.4537869989871979, "learning_rate": 0.0001, "loss": 1.5533, "step": 6371 }, { "epoch": 0.7319510654184136, "grad_norm": 0.45302248001098633, "learning_rate": 0.0001, "loss": 1.5901, "step": 6372 }, { "epoch": 0.7320659353282407, "grad_norm": 0.4309695363044739, "learning_rate": 0.0001, "loss": 1.6486, "step": 6373 }, { "epoch": 0.7321808052380678, "grad_norm": 0.4645009934902191, "learning_rate": 0.0001, "loss": 1.5925, "step": 6374 }, { "epoch": 0.7322956751478951, "grad_norm": 0.42796799540519714, "learning_rate": 0.0001, "loss": 1.6142, "step": 6375 }, { "epoch": 0.7324105450577222, "grad_norm": 0.4301031231880188, "learning_rate": 0.0001, "loss": 1.4068, "step": 6376 }, { "epoch": 0.7325254149675493, "grad_norm": 0.4349769055843353, "learning_rate": 0.0001, "loss": 1.5291, "step": 6377 }, { "epoch": 0.7326402848773764, "grad_norm": 0.4733826518058777, "learning_rate": 0.0001, "loss": 1.6648, "step": 6378 }, { "epoch": 0.7327551547872035, "grad_norm": 0.4366433322429657, "learning_rate": 0.0001, "loss": 1.5781, "step": 6379 }, { "epoch": 0.7328700246970307, "grad_norm": 0.47278228402137756, "learning_rate": 0.0001, "loss": 1.6445, "step": 6380 }, { "epoch": 0.7329848946068578, "grad_norm": 0.4604305922985077, "learning_rate": 0.0001, "loss": 1.7437, "step": 6381 }, { "epoch": 0.7330997645166849, "grad_norm": 0.45044025778770447, "learning_rate": 0.0001, "loss": 1.649, "step": 6382 }, { "epoch": 0.733214634426512, "grad_norm": 0.4326942563056946, "learning_rate": 0.0001, "loss": 1.567, "step": 6383 }, { "epoch": 0.7333295043363391, "grad_norm": 0.4268265664577484, "learning_rate": 0.0001, "loss": 1.627, "step": 6384 }, { "epoch": 0.7334443742461663, "grad_norm": 0.49201422929763794, "learning_rate": 0.0001, "loss": 1.8518, "step": 6385 }, { "epoch": 0.7335592441559934, "grad_norm": 0.4553898274898529, "learning_rate": 0.0001, "loss": 1.8142, "step": 6386 }, { "epoch": 0.7336741140658205, "grad_norm": 0.4906349778175354, "learning_rate": 0.0001, "loss": 1.8773, "step": 6387 }, { "epoch": 0.7337889839756476, "grad_norm": 0.4062640070915222, "learning_rate": 0.0001, "loss": 1.4207, "step": 6388 }, { "epoch": 0.7339038538854747, "grad_norm": 0.4532264173030853, "learning_rate": 0.0001, "loss": 1.7094, "step": 6389 }, { "epoch": 0.7340187237953019, "grad_norm": 0.4875713586807251, "learning_rate": 0.0001, "loss": 1.835, "step": 6390 }, { "epoch": 0.734133593705129, "grad_norm": 0.4259282052516937, "learning_rate": 0.0001, "loss": 1.4534, "step": 6391 }, { "epoch": 0.7342484636149561, "grad_norm": 0.4660018980503082, "learning_rate": 0.0001, "loss": 1.7193, "step": 6392 }, { "epoch": 0.7343633335247832, "grad_norm": 0.4709480106830597, "learning_rate": 0.0001, "loss": 1.7843, "step": 6393 }, { "epoch": 0.7344782034346103, "grad_norm": 0.4932401180267334, "learning_rate": 0.0001, "loss": 1.629, "step": 6394 }, { "epoch": 0.7345930733444375, "grad_norm": 0.4418211281299591, "learning_rate": 0.0001, "loss": 1.51, "step": 6395 }, { "epoch": 0.7347079432542646, "grad_norm": 0.4617857038974762, "learning_rate": 0.0001, "loss": 1.6093, "step": 6396 }, { "epoch": 0.7348228131640917, "grad_norm": 0.44936829805374146, "learning_rate": 0.0001, "loss": 1.5856, "step": 6397 }, { "epoch": 0.7349376830739188, "grad_norm": 0.49701419472694397, "learning_rate": 0.0001, "loss": 1.7478, "step": 6398 }, { "epoch": 0.7350525529837459, "grad_norm": 0.42285898327827454, "learning_rate": 0.0001, "loss": 1.4871, "step": 6399 }, { "epoch": 0.7351674228935731, "grad_norm": 0.46721500158309937, "learning_rate": 0.0001, "loss": 1.535, "step": 6400 }, { "epoch": 0.7352822928034002, "grad_norm": 0.4647897779941559, "learning_rate": 0.0001, "loss": 1.635, "step": 6401 }, { "epoch": 0.7353971627132273, "grad_norm": 0.48368552327156067, "learning_rate": 0.0001, "loss": 1.734, "step": 6402 }, { "epoch": 0.7355120326230544, "grad_norm": 0.4766364097595215, "learning_rate": 0.0001, "loss": 1.6196, "step": 6403 }, { "epoch": 0.7356269025328815, "grad_norm": 0.4311745762825012, "learning_rate": 0.0001, "loss": 1.5385, "step": 6404 }, { "epoch": 0.7357417724427087, "grad_norm": 0.424921452999115, "learning_rate": 0.0001, "loss": 1.4973, "step": 6405 }, { "epoch": 0.7358566423525358, "grad_norm": 0.44613373279571533, "learning_rate": 0.0001, "loss": 1.6768, "step": 6406 }, { "epoch": 0.7359715122623629, "grad_norm": 0.52595055103302, "learning_rate": 0.0001, "loss": 1.8678, "step": 6407 }, { "epoch": 0.73608638217219, "grad_norm": 0.4704437851905823, "learning_rate": 0.0001, "loss": 1.7375, "step": 6408 }, { "epoch": 0.7362012520820171, "grad_norm": 0.4089406132698059, "learning_rate": 0.0001, "loss": 1.5, "step": 6409 }, { "epoch": 0.7363161219918443, "grad_norm": 0.4442186951637268, "learning_rate": 0.0001, "loss": 1.4664, "step": 6410 }, { "epoch": 0.7364309919016714, "grad_norm": 0.4903067350387573, "learning_rate": 0.0001, "loss": 1.7815, "step": 6411 }, { "epoch": 0.7365458618114985, "grad_norm": 0.47199535369873047, "learning_rate": 0.0001, "loss": 1.7579, "step": 6412 }, { "epoch": 0.7366607317213256, "grad_norm": 0.4860610365867615, "learning_rate": 0.0001, "loss": 1.7036, "step": 6413 }, { "epoch": 0.7367756016311527, "grad_norm": 0.44982248544692993, "learning_rate": 0.0001, "loss": 1.5699, "step": 6414 }, { "epoch": 0.7368904715409799, "grad_norm": 0.41628536581993103, "learning_rate": 0.0001, "loss": 1.3993, "step": 6415 }, { "epoch": 0.737005341450807, "grad_norm": 0.4167647063732147, "learning_rate": 0.0001, "loss": 1.4185, "step": 6416 }, { "epoch": 0.7371202113606341, "grad_norm": 0.44325876235961914, "learning_rate": 0.0001, "loss": 1.6582, "step": 6417 }, { "epoch": 0.7372350812704612, "grad_norm": 0.4645753800868988, "learning_rate": 0.0001, "loss": 1.44, "step": 6418 }, { "epoch": 0.7373499511802883, "grad_norm": 0.41918548941612244, "learning_rate": 0.0001, "loss": 1.584, "step": 6419 }, { "epoch": 0.7374648210901155, "grad_norm": 0.4537114202976227, "learning_rate": 0.0001, "loss": 1.664, "step": 6420 }, { "epoch": 0.7375796909999426, "grad_norm": 0.4690401554107666, "learning_rate": 0.0001, "loss": 1.7344, "step": 6421 }, { "epoch": 0.7376945609097697, "grad_norm": 0.5302848815917969, "learning_rate": 0.0001, "loss": 1.8387, "step": 6422 }, { "epoch": 0.7378094308195968, "grad_norm": 0.49464482069015503, "learning_rate": 0.0001, "loss": 1.7305, "step": 6423 }, { "epoch": 0.7379243007294239, "grad_norm": 0.48099836707115173, "learning_rate": 0.0001, "loss": 1.5702, "step": 6424 }, { "epoch": 0.7380391706392511, "grad_norm": 0.4626838266849518, "learning_rate": 0.0001, "loss": 1.6737, "step": 6425 }, { "epoch": 0.7381540405490782, "grad_norm": 0.5171931385993958, "learning_rate": 0.0001, "loss": 1.562, "step": 6426 }, { "epoch": 0.7382689104589053, "grad_norm": 0.46404412388801575, "learning_rate": 0.0001, "loss": 1.767, "step": 6427 }, { "epoch": 0.7383837803687324, "grad_norm": 0.44521862268447876, "learning_rate": 0.0001, "loss": 1.5083, "step": 6428 }, { "epoch": 0.7384986502785595, "grad_norm": 0.4961501359939575, "learning_rate": 0.0001, "loss": 1.6542, "step": 6429 }, { "epoch": 0.7386135201883867, "grad_norm": 0.43013089895248413, "learning_rate": 0.0001, "loss": 1.5063, "step": 6430 }, { "epoch": 0.7387283900982138, "grad_norm": 0.43474483489990234, "learning_rate": 0.0001, "loss": 1.459, "step": 6431 }, { "epoch": 0.7388432600080409, "grad_norm": 0.4235991835594177, "learning_rate": 0.0001, "loss": 1.6437, "step": 6432 }, { "epoch": 0.738958129917868, "grad_norm": 0.45976969599723816, "learning_rate": 0.0001, "loss": 1.6887, "step": 6433 }, { "epoch": 0.7390729998276951, "grad_norm": 0.5007357597351074, "learning_rate": 0.0001, "loss": 1.5328, "step": 6434 }, { "epoch": 0.7391878697375223, "grad_norm": 0.4222777783870697, "learning_rate": 0.0001, "loss": 1.4098, "step": 6435 }, { "epoch": 0.7393027396473494, "grad_norm": 0.4068715274333954, "learning_rate": 0.0001, "loss": 1.2175, "step": 6436 }, { "epoch": 0.7394176095571765, "grad_norm": 0.4571983516216278, "learning_rate": 0.0001, "loss": 1.5671, "step": 6437 }, { "epoch": 0.7395324794670036, "grad_norm": 0.4495130181312561, "learning_rate": 0.0001, "loss": 1.5278, "step": 6438 }, { "epoch": 0.7396473493768307, "grad_norm": 0.48614737391471863, "learning_rate": 0.0001, "loss": 1.6044, "step": 6439 }, { "epoch": 0.7397622192866579, "grad_norm": 0.48480525612831116, "learning_rate": 0.0001, "loss": 1.6996, "step": 6440 }, { "epoch": 0.739877089196485, "grad_norm": 0.46960780024528503, "learning_rate": 0.0001, "loss": 1.3502, "step": 6441 }, { "epoch": 0.7399919591063121, "grad_norm": 0.45793652534484863, "learning_rate": 0.0001, "loss": 1.6815, "step": 6442 }, { "epoch": 0.7401068290161392, "grad_norm": 0.46224090456962585, "learning_rate": 0.0001, "loss": 1.6644, "step": 6443 }, { "epoch": 0.7402216989259663, "grad_norm": 0.41686657071113586, "learning_rate": 0.0001, "loss": 1.5682, "step": 6444 }, { "epoch": 0.7403365688357935, "grad_norm": 0.42895323038101196, "learning_rate": 0.0001, "loss": 1.5301, "step": 6445 }, { "epoch": 0.7404514387456206, "grad_norm": 0.42914414405822754, "learning_rate": 0.0001, "loss": 1.6159, "step": 6446 }, { "epoch": 0.7405663086554477, "grad_norm": 0.4458409249782562, "learning_rate": 0.0001, "loss": 1.5835, "step": 6447 }, { "epoch": 0.7406811785652748, "grad_norm": 0.4255877435207367, "learning_rate": 0.0001, "loss": 1.4395, "step": 6448 }, { "epoch": 0.7407960484751019, "grad_norm": 0.4231339991092682, "learning_rate": 0.0001, "loss": 1.495, "step": 6449 }, { "epoch": 0.7409109183849291, "grad_norm": 0.4323672652244568, "learning_rate": 0.0001, "loss": 1.4857, "step": 6450 }, { "epoch": 0.7410257882947562, "grad_norm": 0.4640507996082306, "learning_rate": 0.0001, "loss": 1.4276, "step": 6451 }, { "epoch": 0.7411406582045833, "grad_norm": 0.442341685295105, "learning_rate": 0.0001, "loss": 1.6777, "step": 6452 }, { "epoch": 0.7412555281144104, "grad_norm": 0.44191497564315796, "learning_rate": 0.0001, "loss": 1.561, "step": 6453 }, { "epoch": 0.7413703980242375, "grad_norm": 0.4533472955226898, "learning_rate": 0.0001, "loss": 1.5872, "step": 6454 }, { "epoch": 0.7414852679340647, "grad_norm": 0.45057639479637146, "learning_rate": 0.0001, "loss": 1.7246, "step": 6455 }, { "epoch": 0.7416001378438918, "grad_norm": 0.5106820464134216, "learning_rate": 0.0001, "loss": 1.7923, "step": 6456 }, { "epoch": 0.7417150077537189, "grad_norm": 0.4512089788913727, "learning_rate": 0.0001, "loss": 1.6305, "step": 6457 }, { "epoch": 0.741829877663546, "grad_norm": 0.44131678342819214, "learning_rate": 0.0001, "loss": 1.547, "step": 6458 }, { "epoch": 0.7419447475733731, "grad_norm": 0.4136804938316345, "learning_rate": 0.0001, "loss": 1.5348, "step": 6459 }, { "epoch": 0.7420596174832003, "grad_norm": 0.48369288444519043, "learning_rate": 0.0001, "loss": 1.7312, "step": 6460 }, { "epoch": 0.7421744873930274, "grad_norm": 0.4654393792152405, "learning_rate": 0.0001, "loss": 1.6704, "step": 6461 }, { "epoch": 0.7422893573028545, "grad_norm": 0.457107275724411, "learning_rate": 0.0001, "loss": 1.6405, "step": 6462 }, { "epoch": 0.7424042272126816, "grad_norm": 0.46595972776412964, "learning_rate": 0.0001, "loss": 1.6433, "step": 6463 }, { "epoch": 0.7425190971225087, "grad_norm": 0.43557512760162354, "learning_rate": 0.0001, "loss": 1.3404, "step": 6464 }, { "epoch": 0.7426339670323359, "grad_norm": 0.4468023180961609, "learning_rate": 0.0001, "loss": 1.531, "step": 6465 }, { "epoch": 0.742748836942163, "grad_norm": 0.4448022246360779, "learning_rate": 0.0001, "loss": 1.6072, "step": 6466 }, { "epoch": 0.7428637068519901, "grad_norm": 0.461955726146698, "learning_rate": 0.0001, "loss": 1.6381, "step": 6467 }, { "epoch": 0.7429785767618172, "grad_norm": 0.47350478172302246, "learning_rate": 0.0001, "loss": 1.5773, "step": 6468 }, { "epoch": 0.7430934466716443, "grad_norm": 0.506061851978302, "learning_rate": 0.0001, "loss": 1.9245, "step": 6469 }, { "epoch": 0.7432083165814715, "grad_norm": 0.48322594165802, "learning_rate": 0.0001, "loss": 1.8228, "step": 6470 }, { "epoch": 0.7433231864912986, "grad_norm": 0.4514816701412201, "learning_rate": 0.0001, "loss": 1.7976, "step": 6471 }, { "epoch": 0.7434380564011257, "grad_norm": 0.45972806215286255, "learning_rate": 0.0001, "loss": 1.676, "step": 6472 }, { "epoch": 0.7435529263109528, "grad_norm": 0.4653480350971222, "learning_rate": 0.0001, "loss": 1.7181, "step": 6473 }, { "epoch": 0.7436677962207799, "grad_norm": 0.4564400315284729, "learning_rate": 0.0001, "loss": 1.4732, "step": 6474 }, { "epoch": 0.7437826661306071, "grad_norm": 0.44110190868377686, "learning_rate": 0.0001, "loss": 1.4867, "step": 6475 }, { "epoch": 0.7438975360404342, "grad_norm": 0.4426039159297943, "learning_rate": 0.0001, "loss": 1.5752, "step": 6476 }, { "epoch": 0.7440124059502613, "grad_norm": 0.4515429735183716, "learning_rate": 0.0001, "loss": 1.6102, "step": 6477 }, { "epoch": 0.7441272758600884, "grad_norm": 0.4343465268611908, "learning_rate": 0.0001, "loss": 1.685, "step": 6478 }, { "epoch": 0.7442421457699155, "grad_norm": 0.448831170797348, "learning_rate": 0.0001, "loss": 1.6047, "step": 6479 }, { "epoch": 0.7443570156797427, "grad_norm": 0.45811012387275696, "learning_rate": 0.0001, "loss": 1.6288, "step": 6480 }, { "epoch": 0.7444718855895698, "grad_norm": 0.48360949754714966, "learning_rate": 0.0001, "loss": 1.6076, "step": 6481 }, { "epoch": 0.7445867554993969, "grad_norm": 0.4996730089187622, "learning_rate": 0.0001, "loss": 1.8236, "step": 6482 }, { "epoch": 0.744701625409224, "grad_norm": 0.4542221426963806, "learning_rate": 0.0001, "loss": 1.6124, "step": 6483 }, { "epoch": 0.7448164953190511, "grad_norm": 0.4285677671432495, "learning_rate": 0.0001, "loss": 1.374, "step": 6484 }, { "epoch": 0.7449313652288783, "grad_norm": 0.48036250472068787, "learning_rate": 0.0001, "loss": 1.6905, "step": 6485 }, { "epoch": 0.7450462351387054, "grad_norm": 0.4445507824420929, "learning_rate": 0.0001, "loss": 1.5425, "step": 6486 }, { "epoch": 0.7451611050485325, "grad_norm": 0.4675546884536743, "learning_rate": 0.0001, "loss": 1.7367, "step": 6487 }, { "epoch": 0.7452759749583596, "grad_norm": 0.4238932132720947, "learning_rate": 0.0001, "loss": 1.6086, "step": 6488 }, { "epoch": 0.7453908448681867, "grad_norm": 0.4289308786392212, "learning_rate": 0.0001, "loss": 1.4649, "step": 6489 }, { "epoch": 0.7455057147780139, "grad_norm": 0.449756383895874, "learning_rate": 0.0001, "loss": 1.6866, "step": 6490 }, { "epoch": 0.745620584687841, "grad_norm": 0.4513997435569763, "learning_rate": 0.0001, "loss": 1.6018, "step": 6491 }, { "epoch": 0.7457354545976681, "grad_norm": 0.49391379952430725, "learning_rate": 0.0001, "loss": 1.7308, "step": 6492 }, { "epoch": 0.7458503245074952, "grad_norm": 0.45236408710479736, "learning_rate": 0.0001, "loss": 1.6059, "step": 6493 }, { "epoch": 0.7459651944173223, "grad_norm": 0.466229110956192, "learning_rate": 0.0001, "loss": 1.7665, "step": 6494 }, { "epoch": 0.7460800643271495, "grad_norm": 0.44140058755874634, "learning_rate": 0.0001, "loss": 1.572, "step": 6495 }, { "epoch": 0.7461949342369766, "grad_norm": 0.4668871760368347, "learning_rate": 0.0001, "loss": 1.7177, "step": 6496 }, { "epoch": 0.7463098041468037, "grad_norm": 0.43868231773376465, "learning_rate": 0.0001, "loss": 1.5234, "step": 6497 }, { "epoch": 0.7464246740566308, "grad_norm": 0.517144501209259, "learning_rate": 0.0001, "loss": 1.7245, "step": 6498 }, { "epoch": 0.7465395439664579, "grad_norm": 0.4234006404876709, "learning_rate": 0.0001, "loss": 1.4977, "step": 6499 }, { "epoch": 0.7466544138762851, "grad_norm": 0.44216203689575195, "learning_rate": 0.0001, "loss": 1.549, "step": 6500 }, { "epoch": 0.7467692837861122, "grad_norm": 0.43986350297927856, "learning_rate": 0.0001, "loss": 1.6273, "step": 6501 }, { "epoch": 0.7468841536959393, "grad_norm": 0.4326637387275696, "learning_rate": 0.0001, "loss": 1.4611, "step": 6502 }, { "epoch": 0.7469990236057664, "grad_norm": 0.43953758478164673, "learning_rate": 0.0001, "loss": 1.6316, "step": 6503 }, { "epoch": 0.7471138935155935, "grad_norm": 0.44682297110557556, "learning_rate": 0.0001, "loss": 1.5408, "step": 6504 }, { "epoch": 0.7472287634254207, "grad_norm": 0.4634048342704773, "learning_rate": 0.0001, "loss": 1.68, "step": 6505 }, { "epoch": 0.7473436333352478, "grad_norm": 0.43683484196662903, "learning_rate": 0.0001, "loss": 1.6397, "step": 6506 }, { "epoch": 0.7474585032450749, "grad_norm": 0.42723357677459717, "learning_rate": 0.0001, "loss": 1.4604, "step": 6507 }, { "epoch": 0.747573373154902, "grad_norm": 0.49617254734039307, "learning_rate": 0.0001, "loss": 1.8581, "step": 6508 }, { "epoch": 0.7476882430647291, "grad_norm": 0.46171995997428894, "learning_rate": 0.0001, "loss": 1.4629, "step": 6509 }, { "epoch": 0.7478031129745563, "grad_norm": 0.4606563150882721, "learning_rate": 0.0001, "loss": 1.4719, "step": 6510 }, { "epoch": 0.7479179828843834, "grad_norm": 0.47616055607795715, "learning_rate": 0.0001, "loss": 1.7907, "step": 6511 }, { "epoch": 0.7480328527942106, "grad_norm": 0.42046403884887695, "learning_rate": 0.0001, "loss": 1.5671, "step": 6512 }, { "epoch": 0.7481477227040377, "grad_norm": 0.41847604513168335, "learning_rate": 0.0001, "loss": 1.6201, "step": 6513 }, { "epoch": 0.7482625926138649, "grad_norm": 0.45703554153442383, "learning_rate": 0.0001, "loss": 1.7209, "step": 6514 }, { "epoch": 0.748377462523692, "grad_norm": 0.40654999017715454, "learning_rate": 0.0001, "loss": 1.2961, "step": 6515 }, { "epoch": 0.7484923324335191, "grad_norm": 0.4782845377922058, "learning_rate": 0.0001, "loss": 1.6385, "step": 6516 }, { "epoch": 0.7486072023433462, "grad_norm": 0.4496891498565674, "learning_rate": 0.0001, "loss": 1.4721, "step": 6517 }, { "epoch": 0.7487220722531733, "grad_norm": 0.4883591830730438, "learning_rate": 0.0001, "loss": 1.6914, "step": 6518 }, { "epoch": 0.7488369421630005, "grad_norm": 0.5101348161697388, "learning_rate": 0.0001, "loss": 1.8276, "step": 6519 }, { "epoch": 0.7489518120728276, "grad_norm": 0.47420623898506165, "learning_rate": 0.0001, "loss": 1.7383, "step": 6520 }, { "epoch": 0.7490666819826547, "grad_norm": 0.544798731803894, "learning_rate": 0.0001, "loss": 1.2877, "step": 6521 }, { "epoch": 0.7491815518924818, "grad_norm": 0.496698796749115, "learning_rate": 0.0001, "loss": 1.7508, "step": 6522 }, { "epoch": 0.7492964218023089, "grad_norm": 0.42780181765556335, "learning_rate": 0.0001, "loss": 1.448, "step": 6523 }, { "epoch": 0.749411291712136, "grad_norm": 0.4542038142681122, "learning_rate": 0.0001, "loss": 1.6221, "step": 6524 }, { "epoch": 0.7495261616219632, "grad_norm": 0.4443798363208771, "learning_rate": 0.0001, "loss": 1.5453, "step": 6525 }, { "epoch": 0.7496410315317903, "grad_norm": 0.4632874131202698, "learning_rate": 0.0001, "loss": 1.6545, "step": 6526 }, { "epoch": 0.7497559014416174, "grad_norm": 0.4470142424106598, "learning_rate": 0.0001, "loss": 1.6075, "step": 6527 }, { "epoch": 0.7498707713514445, "grad_norm": 0.4853494465351105, "learning_rate": 0.0001, "loss": 1.6171, "step": 6528 }, { "epoch": 0.7499856412612717, "grad_norm": 0.43715977668762207, "learning_rate": 0.0001, "loss": 1.3894, "step": 6529 }, { "epoch": 0.7501005111710988, "grad_norm": 0.43983104825019836, "learning_rate": 0.0001, "loss": 1.6225, "step": 6530 }, { "epoch": 0.7502153810809259, "grad_norm": 0.4784538745880127, "learning_rate": 0.0001, "loss": 1.7794, "step": 6531 }, { "epoch": 0.750330250990753, "grad_norm": 0.47763192653656006, "learning_rate": 0.0001, "loss": 1.85, "step": 6532 }, { "epoch": 0.7504451209005801, "grad_norm": 0.4692215025424957, "learning_rate": 0.0001, "loss": 1.7338, "step": 6533 }, { "epoch": 0.7505599908104073, "grad_norm": 0.4766184091567993, "learning_rate": 0.0001, "loss": 1.8508, "step": 6534 }, { "epoch": 0.7506748607202344, "grad_norm": 0.44639328122138977, "learning_rate": 0.0001, "loss": 1.7465, "step": 6535 }, { "epoch": 0.7507897306300615, "grad_norm": 0.4912661910057068, "learning_rate": 0.0001, "loss": 1.7199, "step": 6536 }, { "epoch": 0.7509046005398886, "grad_norm": 0.4561523199081421, "learning_rate": 0.0001, "loss": 1.4812, "step": 6537 }, { "epoch": 0.7510194704497157, "grad_norm": 0.44610196352005005, "learning_rate": 0.0001, "loss": 1.5681, "step": 6538 }, { "epoch": 0.7511343403595429, "grad_norm": 0.46128877997398376, "learning_rate": 0.0001, "loss": 1.6083, "step": 6539 }, { "epoch": 0.75124921026937, "grad_norm": 0.4640040397644043, "learning_rate": 0.0001, "loss": 1.8346, "step": 6540 }, { "epoch": 0.7513640801791971, "grad_norm": 0.4831351339817047, "learning_rate": 0.0001, "loss": 1.5296, "step": 6541 }, { "epoch": 0.7514789500890242, "grad_norm": 0.4846135377883911, "learning_rate": 0.0001, "loss": 1.727, "step": 6542 }, { "epoch": 0.7515938199988513, "grad_norm": 0.40949901938438416, "learning_rate": 0.0001, "loss": 1.5282, "step": 6543 }, { "epoch": 0.7517086899086785, "grad_norm": 0.4737204909324646, "learning_rate": 0.0001, "loss": 1.766, "step": 6544 }, { "epoch": 0.7518235598185056, "grad_norm": 0.4291594922542572, "learning_rate": 0.0001, "loss": 1.6355, "step": 6545 }, { "epoch": 0.7519384297283327, "grad_norm": 0.4513239562511444, "learning_rate": 0.0001, "loss": 1.5654, "step": 6546 }, { "epoch": 0.7520532996381598, "grad_norm": 0.42415329813957214, "learning_rate": 0.0001, "loss": 1.5541, "step": 6547 }, { "epoch": 0.7521681695479869, "grad_norm": 0.4564039707183838, "learning_rate": 0.0001, "loss": 1.2685, "step": 6548 }, { "epoch": 0.752283039457814, "grad_norm": 0.4450725018978119, "learning_rate": 0.0001, "loss": 1.5905, "step": 6549 }, { "epoch": 0.7523979093676412, "grad_norm": 0.4227917492389679, "learning_rate": 0.0001, "loss": 1.448, "step": 6550 }, { "epoch": 0.7525127792774683, "grad_norm": 0.4478033483028412, "learning_rate": 0.0001, "loss": 1.6344, "step": 6551 }, { "epoch": 0.7526276491872954, "grad_norm": 0.45714229345321655, "learning_rate": 0.0001, "loss": 1.7176, "step": 6552 }, { "epoch": 0.7527425190971225, "grad_norm": 0.4998722970485687, "learning_rate": 0.0001, "loss": 1.4318, "step": 6553 }, { "epoch": 0.7528573890069497, "grad_norm": 0.49186933040618896, "learning_rate": 0.0001, "loss": 1.6897, "step": 6554 }, { "epoch": 0.7529722589167768, "grad_norm": 0.5109425187110901, "learning_rate": 0.0001, "loss": 1.7841, "step": 6555 }, { "epoch": 0.7530871288266039, "grad_norm": 0.436103492975235, "learning_rate": 0.0001, "loss": 1.5522, "step": 6556 }, { "epoch": 0.753201998736431, "grad_norm": 0.46560850739479065, "learning_rate": 0.0001, "loss": 1.4743, "step": 6557 }, { "epoch": 0.7533168686462581, "grad_norm": 0.47097107768058777, "learning_rate": 0.0001, "loss": 1.5025, "step": 6558 }, { "epoch": 0.7534317385560853, "grad_norm": 0.47928592562675476, "learning_rate": 0.0001, "loss": 1.6335, "step": 6559 }, { "epoch": 0.7535466084659124, "grad_norm": 0.52452552318573, "learning_rate": 0.0001, "loss": 1.6683, "step": 6560 }, { "epoch": 0.7536614783757395, "grad_norm": 0.47707682847976685, "learning_rate": 0.0001, "loss": 1.7117, "step": 6561 }, { "epoch": 0.7537763482855666, "grad_norm": 0.4742587208747864, "learning_rate": 0.0001, "loss": 1.6456, "step": 6562 }, { "epoch": 0.7538912181953937, "grad_norm": 0.4345073103904724, "learning_rate": 0.0001, "loss": 1.4673, "step": 6563 }, { "epoch": 0.7540060881052209, "grad_norm": 0.4827091693878174, "learning_rate": 0.0001, "loss": 1.731, "step": 6564 }, { "epoch": 0.754120958015048, "grad_norm": 0.4398217797279358, "learning_rate": 0.0001, "loss": 1.5551, "step": 6565 }, { "epoch": 0.7542358279248751, "grad_norm": 0.4701884090900421, "learning_rate": 0.0001, "loss": 1.811, "step": 6566 }, { "epoch": 0.7543506978347022, "grad_norm": 0.41458040475845337, "learning_rate": 0.0001, "loss": 1.1869, "step": 6567 }, { "epoch": 0.7544655677445293, "grad_norm": 0.4266148507595062, "learning_rate": 0.0001, "loss": 1.5984, "step": 6568 }, { "epoch": 0.7545804376543565, "grad_norm": 0.441137433052063, "learning_rate": 0.0001, "loss": 1.6422, "step": 6569 }, { "epoch": 0.7546953075641836, "grad_norm": 0.511440098285675, "learning_rate": 0.0001, "loss": 1.6331, "step": 6570 }, { "epoch": 0.7548101774740107, "grad_norm": 0.4327561855316162, "learning_rate": 0.0001, "loss": 1.3331, "step": 6571 }, { "epoch": 0.7549250473838378, "grad_norm": 0.4539497494697571, "learning_rate": 0.0001, "loss": 1.3839, "step": 6572 }, { "epoch": 0.7550399172936649, "grad_norm": 0.47245991230010986, "learning_rate": 0.0001, "loss": 1.5165, "step": 6573 }, { "epoch": 0.755154787203492, "grad_norm": 0.46419230103492737, "learning_rate": 0.0001, "loss": 1.6633, "step": 6574 }, { "epoch": 0.7552696571133192, "grad_norm": 0.47439756989479065, "learning_rate": 0.0001, "loss": 1.5447, "step": 6575 }, { "epoch": 0.7553845270231463, "grad_norm": 0.4236104488372803, "learning_rate": 0.0001, "loss": 1.3438, "step": 6576 }, { "epoch": 0.7554993969329734, "grad_norm": 0.4503575563430786, "learning_rate": 0.0001, "loss": 1.6844, "step": 6577 }, { "epoch": 0.7556142668428005, "grad_norm": 0.4399665594100952, "learning_rate": 0.0001, "loss": 1.5789, "step": 6578 }, { "epoch": 0.7557291367526277, "grad_norm": 0.45208460092544556, "learning_rate": 0.0001, "loss": 1.6652, "step": 6579 }, { "epoch": 0.7558440066624548, "grad_norm": 0.48191729187965393, "learning_rate": 0.0001, "loss": 1.7206, "step": 6580 }, { "epoch": 0.7559588765722819, "grad_norm": 0.496855765581131, "learning_rate": 0.0001, "loss": 1.5174, "step": 6581 }, { "epoch": 0.756073746482109, "grad_norm": 0.4180924594402313, "learning_rate": 0.0001, "loss": 1.4212, "step": 6582 }, { "epoch": 0.7561886163919361, "grad_norm": 0.4331256151199341, "learning_rate": 0.0001, "loss": 1.3518, "step": 6583 }, { "epoch": 0.7563034863017633, "grad_norm": 0.47106215357780457, "learning_rate": 0.0001, "loss": 1.6164, "step": 6584 }, { "epoch": 0.7564183562115904, "grad_norm": 0.4699764549732208, "learning_rate": 0.0001, "loss": 1.3998, "step": 6585 }, { "epoch": 0.7565332261214175, "grad_norm": 0.44458329677581787, "learning_rate": 0.0001, "loss": 1.5519, "step": 6586 }, { "epoch": 0.7566480960312446, "grad_norm": 0.46415066719055176, "learning_rate": 0.0001, "loss": 1.6553, "step": 6587 }, { "epoch": 0.7567629659410717, "grad_norm": 0.44048815965652466, "learning_rate": 0.0001, "loss": 1.4584, "step": 6588 }, { "epoch": 0.7568778358508989, "grad_norm": 0.43758389353752136, "learning_rate": 0.0001, "loss": 1.5962, "step": 6589 }, { "epoch": 0.756992705760726, "grad_norm": 0.44562289118766785, "learning_rate": 0.0001, "loss": 1.5985, "step": 6590 }, { "epoch": 0.7571075756705531, "grad_norm": 0.4680253267288208, "learning_rate": 0.0001, "loss": 1.6759, "step": 6591 }, { "epoch": 0.7572224455803802, "grad_norm": 0.5020065307617188, "learning_rate": 0.0001, "loss": 1.7335, "step": 6592 }, { "epoch": 0.7573373154902073, "grad_norm": 0.42748236656188965, "learning_rate": 0.0001, "loss": 1.4555, "step": 6593 }, { "epoch": 0.7574521854000345, "grad_norm": 0.4277678430080414, "learning_rate": 0.0001, "loss": 1.5494, "step": 6594 }, { "epoch": 0.7575670553098616, "grad_norm": 0.4632684886455536, "learning_rate": 0.0001, "loss": 1.5616, "step": 6595 }, { "epoch": 0.7576819252196887, "grad_norm": 0.5025344491004944, "learning_rate": 0.0001, "loss": 1.7728, "step": 6596 }, { "epoch": 0.7577967951295158, "grad_norm": 0.45161494612693787, "learning_rate": 0.0001, "loss": 1.6224, "step": 6597 }, { "epoch": 0.7579116650393429, "grad_norm": 0.45215389132499695, "learning_rate": 0.0001, "loss": 1.659, "step": 6598 }, { "epoch": 0.75802653494917, "grad_norm": 0.4214898347854614, "learning_rate": 0.0001, "loss": 1.4783, "step": 6599 }, { "epoch": 0.7581414048589972, "grad_norm": 0.5195730924606323, "learning_rate": 0.0001, "loss": 1.8058, "step": 6600 }, { "epoch": 0.7582562747688243, "grad_norm": 0.48730143904685974, "learning_rate": 0.0001, "loss": 1.8334, "step": 6601 }, { "epoch": 0.7583711446786514, "grad_norm": 0.5312708020210266, "learning_rate": 0.0001, "loss": 1.8008, "step": 6602 }, { "epoch": 0.7584860145884785, "grad_norm": 0.44341936707496643, "learning_rate": 0.0001, "loss": 1.6025, "step": 6603 }, { "epoch": 0.7586008844983056, "grad_norm": 0.4990461468696594, "learning_rate": 0.0001, "loss": 1.7039, "step": 6604 }, { "epoch": 0.7587157544081328, "grad_norm": 0.43165311217308044, "learning_rate": 0.0001, "loss": 1.6646, "step": 6605 }, { "epoch": 0.7588306243179599, "grad_norm": 0.4335373640060425, "learning_rate": 0.0001, "loss": 1.4298, "step": 6606 }, { "epoch": 0.758945494227787, "grad_norm": 0.4470045566558838, "learning_rate": 0.0001, "loss": 1.3655, "step": 6607 }, { "epoch": 0.7590603641376141, "grad_norm": 0.46403536200523376, "learning_rate": 0.0001, "loss": 1.4718, "step": 6608 }, { "epoch": 0.7591752340474412, "grad_norm": 0.49644750356674194, "learning_rate": 0.0001, "loss": 1.6212, "step": 6609 }, { "epoch": 0.7592901039572684, "grad_norm": 0.5031653046607971, "learning_rate": 0.0001, "loss": 1.5862, "step": 6610 }, { "epoch": 0.7594049738670955, "grad_norm": 0.46444204449653625, "learning_rate": 0.0001, "loss": 1.5091, "step": 6611 }, { "epoch": 0.7595198437769226, "grad_norm": 0.4190670847892761, "learning_rate": 0.0001, "loss": 1.3953, "step": 6612 }, { "epoch": 0.7596347136867497, "grad_norm": 0.4325488209724426, "learning_rate": 0.0001, "loss": 1.5455, "step": 6613 }, { "epoch": 0.7597495835965768, "grad_norm": 0.4146260917186737, "learning_rate": 0.0001, "loss": 1.4447, "step": 6614 }, { "epoch": 0.759864453506404, "grad_norm": 0.4315129816532135, "learning_rate": 0.0001, "loss": 1.5806, "step": 6615 }, { "epoch": 0.7599793234162311, "grad_norm": 0.4837382733821869, "learning_rate": 0.0001, "loss": 1.5836, "step": 6616 }, { "epoch": 0.7600941933260582, "grad_norm": 0.5264213681221008, "learning_rate": 0.0001, "loss": 1.7784, "step": 6617 }, { "epoch": 0.7602090632358853, "grad_norm": 0.4192154109477997, "learning_rate": 0.0001, "loss": 1.49, "step": 6618 }, { "epoch": 0.7603239331457124, "grad_norm": 0.4764638841152191, "learning_rate": 0.0001, "loss": 1.4579, "step": 6619 }, { "epoch": 0.7604388030555396, "grad_norm": 0.4535200893878937, "learning_rate": 0.0001, "loss": 1.4815, "step": 6620 }, { "epoch": 0.7605536729653667, "grad_norm": 0.40182626247406006, "learning_rate": 0.0001, "loss": 1.2703, "step": 6621 }, { "epoch": 0.7606685428751938, "grad_norm": 0.4054408669471741, "learning_rate": 0.0001, "loss": 1.4678, "step": 6622 }, { "epoch": 0.7607834127850209, "grad_norm": 0.4554063081741333, "learning_rate": 0.0001, "loss": 1.7263, "step": 6623 }, { "epoch": 0.760898282694848, "grad_norm": 0.447591632604599, "learning_rate": 0.0001, "loss": 1.75, "step": 6624 }, { "epoch": 0.7610131526046752, "grad_norm": 0.4537143409252167, "learning_rate": 0.0001, "loss": 1.7036, "step": 6625 }, { "epoch": 0.7611280225145023, "grad_norm": 0.47372350096702576, "learning_rate": 0.0001, "loss": 1.5821, "step": 6626 }, { "epoch": 0.7612428924243294, "grad_norm": 0.47313424944877625, "learning_rate": 0.0001, "loss": 1.7578, "step": 6627 }, { "epoch": 0.7613577623341565, "grad_norm": 0.4805237054824829, "learning_rate": 0.0001, "loss": 1.6613, "step": 6628 }, { "epoch": 0.7614726322439836, "grad_norm": 0.4734886884689331, "learning_rate": 0.0001, "loss": 1.7557, "step": 6629 }, { "epoch": 0.7615875021538108, "grad_norm": 0.46872133016586304, "learning_rate": 0.0001, "loss": 1.7054, "step": 6630 }, { "epoch": 0.7617023720636379, "grad_norm": 0.44141799211502075, "learning_rate": 0.0001, "loss": 1.5425, "step": 6631 }, { "epoch": 0.761817241973465, "grad_norm": 0.4713442623615265, "learning_rate": 0.0001, "loss": 1.7963, "step": 6632 }, { "epoch": 0.7619321118832921, "grad_norm": 0.46353405714035034, "learning_rate": 0.0001, "loss": 1.6291, "step": 6633 }, { "epoch": 0.7620469817931192, "grad_norm": 0.9744009971618652, "learning_rate": 0.0001, "loss": 1.63, "step": 6634 }, { "epoch": 0.7621618517029464, "grad_norm": 0.41723188757896423, "learning_rate": 0.0001, "loss": 1.5305, "step": 6635 }, { "epoch": 0.7622767216127735, "grad_norm": 0.47938770055770874, "learning_rate": 0.0001, "loss": 1.7722, "step": 6636 }, { "epoch": 0.7623915915226006, "grad_norm": 0.46202394366264343, "learning_rate": 0.0001, "loss": 1.6189, "step": 6637 }, { "epoch": 0.7625064614324277, "grad_norm": 0.4409821033477783, "learning_rate": 0.0001, "loss": 1.3921, "step": 6638 }, { "epoch": 0.7626213313422548, "grad_norm": 0.4694823920726776, "learning_rate": 0.0001, "loss": 1.6386, "step": 6639 }, { "epoch": 0.762736201252082, "grad_norm": 0.45864009857177734, "learning_rate": 0.0001, "loss": 1.6221, "step": 6640 }, { "epoch": 0.7628510711619091, "grad_norm": 0.46855515241622925, "learning_rate": 0.0001, "loss": 1.4683, "step": 6641 }, { "epoch": 0.7629659410717362, "grad_norm": 0.498154878616333, "learning_rate": 0.0001, "loss": 1.5162, "step": 6642 }, { "epoch": 0.7630808109815633, "grad_norm": 0.4623854160308838, "learning_rate": 0.0001, "loss": 1.5181, "step": 6643 }, { "epoch": 0.7631956808913904, "grad_norm": 0.42440950870513916, "learning_rate": 0.0001, "loss": 1.6539, "step": 6644 }, { "epoch": 0.7633105508012176, "grad_norm": 0.4813312888145447, "learning_rate": 0.0001, "loss": 1.5837, "step": 6645 }, { "epoch": 0.7634254207110447, "grad_norm": 0.430618554353714, "learning_rate": 0.0001, "loss": 1.5017, "step": 6646 }, { "epoch": 0.7635402906208718, "grad_norm": 0.4468959867954254, "learning_rate": 0.0001, "loss": 1.5506, "step": 6647 }, { "epoch": 0.7636551605306989, "grad_norm": 0.45413529872894287, "learning_rate": 0.0001, "loss": 1.5858, "step": 6648 }, { "epoch": 0.763770030440526, "grad_norm": 0.4446607530117035, "learning_rate": 0.0001, "loss": 1.5391, "step": 6649 }, { "epoch": 0.7638849003503533, "grad_norm": 0.4520895779132843, "learning_rate": 0.0001, "loss": 1.5946, "step": 6650 }, { "epoch": 0.7639997702601804, "grad_norm": 0.46558019518852234, "learning_rate": 0.0001, "loss": 1.6506, "step": 6651 }, { "epoch": 0.7641146401700075, "grad_norm": 0.501394510269165, "learning_rate": 0.0001, "loss": 1.8724, "step": 6652 }, { "epoch": 0.7642295100798346, "grad_norm": 0.47188153862953186, "learning_rate": 0.0001, "loss": 1.7557, "step": 6653 }, { "epoch": 0.7643443799896618, "grad_norm": 0.42527419328689575, "learning_rate": 0.0001, "loss": 1.4982, "step": 6654 }, { "epoch": 0.7644592498994889, "grad_norm": 0.4962664246559143, "learning_rate": 0.0001, "loss": 1.7203, "step": 6655 }, { "epoch": 0.764574119809316, "grad_norm": 0.4442030191421509, "learning_rate": 0.0001, "loss": 1.5981, "step": 6656 }, { "epoch": 0.7646889897191431, "grad_norm": 0.47021228075027466, "learning_rate": 0.0001, "loss": 1.7917, "step": 6657 }, { "epoch": 0.7648038596289702, "grad_norm": 0.4421970546245575, "learning_rate": 0.0001, "loss": 1.5717, "step": 6658 }, { "epoch": 0.7649187295387974, "grad_norm": 0.45684003829956055, "learning_rate": 0.0001, "loss": 1.6662, "step": 6659 }, { "epoch": 0.7650335994486245, "grad_norm": 0.47243228554725647, "learning_rate": 0.0001, "loss": 1.6116, "step": 6660 }, { "epoch": 0.7651484693584516, "grad_norm": 0.44212606549263, "learning_rate": 0.0001, "loss": 1.5302, "step": 6661 }, { "epoch": 0.7652633392682787, "grad_norm": 0.48998787999153137, "learning_rate": 0.0001, "loss": 1.7869, "step": 6662 }, { "epoch": 0.7653782091781058, "grad_norm": 0.4589942693710327, "learning_rate": 0.0001, "loss": 1.5954, "step": 6663 }, { "epoch": 0.765493079087933, "grad_norm": 0.45436352491378784, "learning_rate": 0.0001, "loss": 1.7321, "step": 6664 }, { "epoch": 0.7656079489977601, "grad_norm": 0.45719271898269653, "learning_rate": 0.0001, "loss": 1.3269, "step": 6665 }, { "epoch": 0.7657228189075872, "grad_norm": 0.4515266716480255, "learning_rate": 0.0001, "loss": 1.5696, "step": 6666 }, { "epoch": 0.7658376888174143, "grad_norm": 0.4383704662322998, "learning_rate": 0.0001, "loss": 1.6029, "step": 6667 }, { "epoch": 0.7659525587272414, "grad_norm": 0.5026112198829651, "learning_rate": 0.0001, "loss": 1.6132, "step": 6668 }, { "epoch": 0.7660674286370686, "grad_norm": 0.4402121305465698, "learning_rate": 0.0001, "loss": 1.4645, "step": 6669 }, { "epoch": 0.7661822985468957, "grad_norm": 0.4881230294704437, "learning_rate": 0.0001, "loss": 1.7567, "step": 6670 }, { "epoch": 0.7662971684567228, "grad_norm": 0.454129159450531, "learning_rate": 0.0001, "loss": 1.5974, "step": 6671 }, { "epoch": 0.7664120383665499, "grad_norm": 0.45725277066230774, "learning_rate": 0.0001, "loss": 1.3238, "step": 6672 }, { "epoch": 0.766526908276377, "grad_norm": 0.4708629250526428, "learning_rate": 0.0001, "loss": 1.5594, "step": 6673 }, { "epoch": 0.7666417781862042, "grad_norm": 0.4380055367946625, "learning_rate": 0.0001, "loss": 1.6742, "step": 6674 }, { "epoch": 0.7667566480960313, "grad_norm": 0.45258665084838867, "learning_rate": 0.0001, "loss": 1.4825, "step": 6675 }, { "epoch": 0.7668715180058584, "grad_norm": 0.4417783319950104, "learning_rate": 0.0001, "loss": 1.469, "step": 6676 }, { "epoch": 0.7669863879156855, "grad_norm": 0.5079810619354248, "learning_rate": 0.0001, "loss": 1.6424, "step": 6677 }, { "epoch": 0.7671012578255126, "grad_norm": 0.4944427013397217, "learning_rate": 0.0001, "loss": 1.5304, "step": 6678 }, { "epoch": 0.7672161277353398, "grad_norm": 0.4718817472457886, "learning_rate": 0.0001, "loss": 1.6147, "step": 6679 }, { "epoch": 0.7673309976451669, "grad_norm": 0.4739345610141754, "learning_rate": 0.0001, "loss": 1.765, "step": 6680 }, { "epoch": 0.767445867554994, "grad_norm": 0.46249300241470337, "learning_rate": 0.0001, "loss": 1.6155, "step": 6681 }, { "epoch": 0.7675607374648211, "grad_norm": 0.5330554246902466, "learning_rate": 0.0001, "loss": 1.6253, "step": 6682 }, { "epoch": 0.7676756073746482, "grad_norm": 0.4298166334629059, "learning_rate": 0.0001, "loss": 1.4984, "step": 6683 }, { "epoch": 0.7677904772844754, "grad_norm": 0.47553664445877075, "learning_rate": 0.0001, "loss": 1.6892, "step": 6684 }, { "epoch": 0.7679053471943025, "grad_norm": 0.4601770341396332, "learning_rate": 0.0001, "loss": 1.7258, "step": 6685 }, { "epoch": 0.7680202171041296, "grad_norm": 0.4717545509338379, "learning_rate": 0.0001, "loss": 1.7111, "step": 6686 }, { "epoch": 0.7681350870139567, "grad_norm": 0.47477278113365173, "learning_rate": 0.0001, "loss": 1.5278, "step": 6687 }, { "epoch": 0.7682499569237838, "grad_norm": 0.45599865913391113, "learning_rate": 0.0001, "loss": 1.6069, "step": 6688 }, { "epoch": 0.768364826833611, "grad_norm": 0.43916571140289307, "learning_rate": 0.0001, "loss": 1.5651, "step": 6689 }, { "epoch": 0.7684796967434381, "grad_norm": 0.4506683945655823, "learning_rate": 0.0001, "loss": 1.6638, "step": 6690 }, { "epoch": 0.7685945666532652, "grad_norm": 0.42485401034355164, "learning_rate": 0.0001, "loss": 1.4915, "step": 6691 }, { "epoch": 0.7687094365630923, "grad_norm": 0.47712936997413635, "learning_rate": 0.0001, "loss": 1.6449, "step": 6692 }, { "epoch": 0.7688243064729194, "grad_norm": 0.4386729896068573, "learning_rate": 0.0001, "loss": 1.5198, "step": 6693 }, { "epoch": 0.7689391763827466, "grad_norm": 0.5089852213859558, "learning_rate": 0.0001, "loss": 1.6115, "step": 6694 }, { "epoch": 0.7690540462925737, "grad_norm": 0.4199332594871521, "learning_rate": 0.0001, "loss": 1.4757, "step": 6695 }, { "epoch": 0.7691689162024008, "grad_norm": 0.5055813789367676, "learning_rate": 0.0001, "loss": 1.7663, "step": 6696 }, { "epoch": 0.7692837861122279, "grad_norm": 0.4748409390449524, "learning_rate": 0.0001, "loss": 1.5431, "step": 6697 }, { "epoch": 0.769398656022055, "grad_norm": 0.452891081571579, "learning_rate": 0.0001, "loss": 1.4932, "step": 6698 }, { "epoch": 0.7695135259318822, "grad_norm": 0.4543799161911011, "learning_rate": 0.0001, "loss": 1.5107, "step": 6699 }, { "epoch": 0.7696283958417093, "grad_norm": 0.47071510553359985, "learning_rate": 0.0001, "loss": 1.7285, "step": 6700 }, { "epoch": 0.7697432657515364, "grad_norm": 0.46207287907600403, "learning_rate": 0.0001, "loss": 1.5552, "step": 6701 }, { "epoch": 0.7698581356613635, "grad_norm": 0.4610616862773895, "learning_rate": 0.0001, "loss": 1.4635, "step": 6702 }, { "epoch": 0.7699730055711906, "grad_norm": 0.46330535411834717, "learning_rate": 0.0001, "loss": 1.5052, "step": 6703 }, { "epoch": 0.7700878754810178, "grad_norm": 0.4601458013057709, "learning_rate": 0.0001, "loss": 1.4925, "step": 6704 }, { "epoch": 0.7702027453908449, "grad_norm": 0.4191734790802002, "learning_rate": 0.0001, "loss": 1.3989, "step": 6705 }, { "epoch": 0.770317615300672, "grad_norm": 0.4650634229183197, "learning_rate": 0.0001, "loss": 1.6411, "step": 6706 }, { "epoch": 0.7704324852104991, "grad_norm": 0.4624280035495758, "learning_rate": 0.0001, "loss": 1.6782, "step": 6707 }, { "epoch": 0.7705473551203262, "grad_norm": 0.45528537034988403, "learning_rate": 0.0001, "loss": 1.7724, "step": 6708 }, { "epoch": 0.7706622250301534, "grad_norm": 0.49074849486351013, "learning_rate": 0.0001, "loss": 1.5958, "step": 6709 }, { "epoch": 0.7707770949399805, "grad_norm": 0.4072690010070801, "learning_rate": 0.0001, "loss": 1.5553, "step": 6710 }, { "epoch": 0.7708919648498076, "grad_norm": 0.5691601634025574, "learning_rate": 0.0001, "loss": 1.4397, "step": 6711 }, { "epoch": 0.7710068347596347, "grad_norm": 0.4546988308429718, "learning_rate": 0.0001, "loss": 1.5887, "step": 6712 }, { "epoch": 0.7711217046694618, "grad_norm": 0.49612048268318176, "learning_rate": 0.0001, "loss": 1.6534, "step": 6713 }, { "epoch": 0.771236574579289, "grad_norm": 0.5100909471511841, "learning_rate": 0.0001, "loss": 1.6387, "step": 6714 }, { "epoch": 0.7713514444891161, "grad_norm": 0.4859296679496765, "learning_rate": 0.0001, "loss": 1.6302, "step": 6715 }, { "epoch": 0.7714663143989432, "grad_norm": 0.45790478587150574, "learning_rate": 0.0001, "loss": 1.5652, "step": 6716 }, { "epoch": 0.7715811843087703, "grad_norm": 0.4487425982952118, "learning_rate": 0.0001, "loss": 1.6406, "step": 6717 }, { "epoch": 0.7716960542185974, "grad_norm": 0.49326303601264954, "learning_rate": 0.0001, "loss": 1.7071, "step": 6718 }, { "epoch": 0.7718109241284246, "grad_norm": 0.43098387122154236, "learning_rate": 0.0001, "loss": 1.6099, "step": 6719 }, { "epoch": 0.7719257940382517, "grad_norm": 0.4224907159805298, "learning_rate": 0.0001, "loss": 1.4381, "step": 6720 }, { "epoch": 0.7720406639480788, "grad_norm": 0.4463758170604706, "learning_rate": 0.0001, "loss": 1.6655, "step": 6721 }, { "epoch": 0.7721555338579059, "grad_norm": 0.45395219326019287, "learning_rate": 0.0001, "loss": 1.7321, "step": 6722 }, { "epoch": 0.772270403767733, "grad_norm": 0.443327933549881, "learning_rate": 0.0001, "loss": 1.6938, "step": 6723 }, { "epoch": 0.7723852736775602, "grad_norm": 0.44689181447029114, "learning_rate": 0.0001, "loss": 1.6242, "step": 6724 }, { "epoch": 0.7725001435873873, "grad_norm": 0.4609023928642273, "learning_rate": 0.0001, "loss": 1.7214, "step": 6725 }, { "epoch": 0.7726150134972144, "grad_norm": 0.42757725715637207, "learning_rate": 0.0001, "loss": 1.298, "step": 6726 }, { "epoch": 0.7727298834070415, "grad_norm": 0.43794259428977966, "learning_rate": 0.0001, "loss": 1.6358, "step": 6727 }, { "epoch": 0.7728447533168686, "grad_norm": 0.4543568193912506, "learning_rate": 0.0001, "loss": 1.5736, "step": 6728 }, { "epoch": 0.7729596232266958, "grad_norm": 0.4287487864494324, "learning_rate": 0.0001, "loss": 1.4149, "step": 6729 }, { "epoch": 0.7730744931365229, "grad_norm": 0.44154518842697144, "learning_rate": 0.0001, "loss": 1.4262, "step": 6730 }, { "epoch": 0.77318936304635, "grad_norm": 0.44655200839042664, "learning_rate": 0.0001, "loss": 1.5583, "step": 6731 }, { "epoch": 0.7733042329561771, "grad_norm": 0.5005887150764465, "learning_rate": 0.0001, "loss": 1.4248, "step": 6732 }, { "epoch": 0.7734191028660042, "grad_norm": 0.5025423169136047, "learning_rate": 0.0001, "loss": 1.7576, "step": 6733 }, { "epoch": 0.7735339727758314, "grad_norm": 0.5430256128311157, "learning_rate": 0.0001, "loss": 1.7347, "step": 6734 }, { "epoch": 0.7736488426856585, "grad_norm": 0.4566929340362549, "learning_rate": 0.0001, "loss": 1.4515, "step": 6735 }, { "epoch": 0.7737637125954856, "grad_norm": 0.45113489031791687, "learning_rate": 0.0001, "loss": 1.6234, "step": 6736 }, { "epoch": 0.7738785825053127, "grad_norm": 0.44897618889808655, "learning_rate": 0.0001, "loss": 1.6176, "step": 6737 }, { "epoch": 0.7739934524151398, "grad_norm": 0.4536812901496887, "learning_rate": 0.0001, "loss": 1.6545, "step": 6738 }, { "epoch": 0.774108322324967, "grad_norm": 0.4436149299144745, "learning_rate": 0.0001, "loss": 1.6586, "step": 6739 }, { "epoch": 0.7742231922347941, "grad_norm": 0.4435299336910248, "learning_rate": 0.0001, "loss": 1.6059, "step": 6740 }, { "epoch": 0.7743380621446212, "grad_norm": 0.44554266333580017, "learning_rate": 0.0001, "loss": 1.6621, "step": 6741 }, { "epoch": 0.7744529320544483, "grad_norm": 0.45656824111938477, "learning_rate": 0.0001, "loss": 1.539, "step": 6742 }, { "epoch": 0.7745678019642754, "grad_norm": 0.4641306698322296, "learning_rate": 0.0001, "loss": 1.6619, "step": 6743 }, { "epoch": 0.7746826718741026, "grad_norm": 0.48745623230934143, "learning_rate": 0.0001, "loss": 1.603, "step": 6744 }, { "epoch": 0.7747975417839297, "grad_norm": 0.4215722978115082, "learning_rate": 0.0001, "loss": 1.5519, "step": 6745 }, { "epoch": 0.7749124116937568, "grad_norm": 0.49982690811157227, "learning_rate": 0.0001, "loss": 1.6318, "step": 6746 }, { "epoch": 0.7750272816035839, "grad_norm": 0.5177478194236755, "learning_rate": 0.0001, "loss": 1.6534, "step": 6747 }, { "epoch": 0.775142151513411, "grad_norm": 0.4642798900604248, "learning_rate": 0.0001, "loss": 1.7246, "step": 6748 }, { "epoch": 0.7752570214232382, "grad_norm": 0.5008178949356079, "learning_rate": 0.0001, "loss": 1.5999, "step": 6749 }, { "epoch": 0.7753718913330653, "grad_norm": 0.4520767033100128, "learning_rate": 0.0001, "loss": 1.4705, "step": 6750 }, { "epoch": 0.7754867612428924, "grad_norm": 0.4959637224674225, "learning_rate": 0.0001, "loss": 1.6124, "step": 6751 }, { "epoch": 0.7756016311527195, "grad_norm": 0.4342813491821289, "learning_rate": 0.0001, "loss": 1.6029, "step": 6752 }, { "epoch": 0.7757165010625466, "grad_norm": 0.45691919326782227, "learning_rate": 0.0001, "loss": 1.7955, "step": 6753 }, { "epoch": 0.7758313709723738, "grad_norm": 0.44725337624549866, "learning_rate": 0.0001, "loss": 1.4242, "step": 6754 }, { "epoch": 0.7759462408822009, "grad_norm": 0.4812532365322113, "learning_rate": 0.0001, "loss": 1.6946, "step": 6755 }, { "epoch": 0.776061110792028, "grad_norm": 0.40750646591186523, "learning_rate": 0.0001, "loss": 1.5516, "step": 6756 }, { "epoch": 0.7761759807018551, "grad_norm": 0.482294499874115, "learning_rate": 0.0001, "loss": 1.6497, "step": 6757 }, { "epoch": 0.7762908506116822, "grad_norm": 0.4344806969165802, "learning_rate": 0.0001, "loss": 1.4736, "step": 6758 }, { "epoch": 0.7764057205215094, "grad_norm": 0.46780574321746826, "learning_rate": 0.0001, "loss": 1.5484, "step": 6759 }, { "epoch": 0.7765205904313365, "grad_norm": 0.4722866714000702, "learning_rate": 0.0001, "loss": 1.8075, "step": 6760 }, { "epoch": 0.7766354603411636, "grad_norm": 0.45282799005508423, "learning_rate": 0.0001, "loss": 1.5938, "step": 6761 }, { "epoch": 0.7767503302509907, "grad_norm": 0.4654501974582672, "learning_rate": 0.0001, "loss": 1.7371, "step": 6762 }, { "epoch": 0.7768652001608178, "grad_norm": 0.46721896529197693, "learning_rate": 0.0001, "loss": 1.6322, "step": 6763 }, { "epoch": 0.776980070070645, "grad_norm": 0.47334960103034973, "learning_rate": 0.0001, "loss": 1.6024, "step": 6764 }, { "epoch": 0.7770949399804721, "grad_norm": 0.46868133544921875, "learning_rate": 0.0001, "loss": 1.5711, "step": 6765 }, { "epoch": 0.7772098098902992, "grad_norm": 0.45955634117126465, "learning_rate": 0.0001, "loss": 1.633, "step": 6766 }, { "epoch": 0.7773246798001263, "grad_norm": 0.43872690200805664, "learning_rate": 0.0001, "loss": 1.3424, "step": 6767 }, { "epoch": 0.7774395497099534, "grad_norm": 0.5430575609207153, "learning_rate": 0.0001, "loss": 1.8176, "step": 6768 }, { "epoch": 0.7775544196197806, "grad_norm": 0.4751816689968109, "learning_rate": 0.0001, "loss": 1.6546, "step": 6769 }, { "epoch": 0.7776692895296077, "grad_norm": 0.42308947443962097, "learning_rate": 0.0001, "loss": 1.4672, "step": 6770 }, { "epoch": 0.7777841594394348, "grad_norm": 0.4343721866607666, "learning_rate": 0.0001, "loss": 1.5106, "step": 6771 }, { "epoch": 0.7778990293492619, "grad_norm": 0.4403552711009979, "learning_rate": 0.0001, "loss": 1.5326, "step": 6772 }, { "epoch": 0.778013899259089, "grad_norm": 0.46488016843795776, "learning_rate": 0.0001, "loss": 1.6177, "step": 6773 }, { "epoch": 0.7781287691689162, "grad_norm": 0.43037131428718567, "learning_rate": 0.0001, "loss": 1.5796, "step": 6774 }, { "epoch": 0.7782436390787433, "grad_norm": 0.42041462659835815, "learning_rate": 0.0001, "loss": 1.4634, "step": 6775 }, { "epoch": 0.7783585089885704, "grad_norm": 0.46670305728912354, "learning_rate": 0.0001, "loss": 1.7338, "step": 6776 }, { "epoch": 0.7784733788983975, "grad_norm": 0.44566023349761963, "learning_rate": 0.0001, "loss": 1.4879, "step": 6777 }, { "epoch": 0.7785882488082246, "grad_norm": 0.46468743681907654, "learning_rate": 0.0001, "loss": 1.4851, "step": 6778 }, { "epoch": 0.7787031187180518, "grad_norm": 0.4546216130256653, "learning_rate": 0.0001, "loss": 1.4767, "step": 6779 }, { "epoch": 0.7788179886278789, "grad_norm": 0.41460397839546204, "learning_rate": 0.0001, "loss": 1.5568, "step": 6780 }, { "epoch": 0.778932858537706, "grad_norm": 0.43776392936706543, "learning_rate": 0.0001, "loss": 1.5138, "step": 6781 }, { "epoch": 0.7790477284475331, "grad_norm": 0.42901432514190674, "learning_rate": 0.0001, "loss": 1.4523, "step": 6782 }, { "epoch": 0.7791625983573602, "grad_norm": 0.4392737150192261, "learning_rate": 0.0001, "loss": 1.5261, "step": 6783 }, { "epoch": 0.7792774682671874, "grad_norm": 0.5367296934127808, "learning_rate": 0.0001, "loss": 1.7535, "step": 6784 }, { "epoch": 0.7793923381770145, "grad_norm": 0.48000702261924744, "learning_rate": 0.0001, "loss": 1.6611, "step": 6785 }, { "epoch": 0.7795072080868416, "grad_norm": 0.42784345149993896, "learning_rate": 0.0001, "loss": 1.5054, "step": 6786 }, { "epoch": 0.7796220779966688, "grad_norm": 0.4323700964450836, "learning_rate": 0.0001, "loss": 1.5182, "step": 6787 }, { "epoch": 0.779736947906496, "grad_norm": 0.46200308203697205, "learning_rate": 0.0001, "loss": 1.5219, "step": 6788 }, { "epoch": 0.7798518178163231, "grad_norm": 0.4896979033946991, "learning_rate": 0.0001, "loss": 1.698, "step": 6789 }, { "epoch": 0.7799666877261502, "grad_norm": 0.4582618474960327, "learning_rate": 0.0001, "loss": 1.4937, "step": 6790 }, { "epoch": 0.7800815576359773, "grad_norm": 0.45183730125427246, "learning_rate": 0.0001, "loss": 1.6155, "step": 6791 }, { "epoch": 0.7801964275458044, "grad_norm": 0.45246532559394836, "learning_rate": 0.0001, "loss": 1.6663, "step": 6792 }, { "epoch": 0.7803112974556315, "grad_norm": 0.453540176153183, "learning_rate": 0.0001, "loss": 1.4815, "step": 6793 }, { "epoch": 0.7804261673654587, "grad_norm": 0.460021436214447, "learning_rate": 0.0001, "loss": 1.5578, "step": 6794 }, { "epoch": 0.7805410372752858, "grad_norm": 0.5325375199317932, "learning_rate": 0.0001, "loss": 1.7254, "step": 6795 }, { "epoch": 0.7806559071851129, "grad_norm": 0.41828301548957825, "learning_rate": 0.0001, "loss": 1.5596, "step": 6796 }, { "epoch": 0.78077077709494, "grad_norm": 0.4506258964538574, "learning_rate": 0.0001, "loss": 1.587, "step": 6797 }, { "epoch": 0.7808856470047671, "grad_norm": 0.4368896484375, "learning_rate": 0.0001, "loss": 1.5261, "step": 6798 }, { "epoch": 0.7810005169145943, "grad_norm": 0.4336557388305664, "learning_rate": 0.0001, "loss": 1.4341, "step": 6799 }, { "epoch": 0.7811153868244214, "grad_norm": 0.47496965527534485, "learning_rate": 0.0001, "loss": 1.7202, "step": 6800 }, { "epoch": 0.7812302567342485, "grad_norm": 0.44038236141204834, "learning_rate": 0.0001, "loss": 1.7002, "step": 6801 }, { "epoch": 0.7813451266440756, "grad_norm": 0.44530773162841797, "learning_rate": 0.0001, "loss": 1.668, "step": 6802 }, { "epoch": 0.7814599965539027, "grad_norm": 0.4356937110424042, "learning_rate": 0.0001, "loss": 1.6553, "step": 6803 }, { "epoch": 0.7815748664637299, "grad_norm": 0.4739866256713867, "learning_rate": 0.0001, "loss": 1.6232, "step": 6804 }, { "epoch": 0.781689736373557, "grad_norm": 0.5034109950065613, "learning_rate": 0.0001, "loss": 1.7754, "step": 6805 }, { "epoch": 0.7818046062833841, "grad_norm": 0.46401265263557434, "learning_rate": 0.0001, "loss": 1.6829, "step": 6806 }, { "epoch": 0.7819194761932112, "grad_norm": 0.44969433546066284, "learning_rate": 0.0001, "loss": 1.6893, "step": 6807 }, { "epoch": 0.7820343461030383, "grad_norm": 0.48960667848587036, "learning_rate": 0.0001, "loss": 1.8962, "step": 6808 }, { "epoch": 0.7821492160128655, "grad_norm": 0.41496121883392334, "learning_rate": 0.0001, "loss": 1.3764, "step": 6809 }, { "epoch": 0.7822640859226926, "grad_norm": 0.4606899917125702, "learning_rate": 0.0001, "loss": 1.6955, "step": 6810 }, { "epoch": 0.7823789558325197, "grad_norm": 0.5153933763504028, "learning_rate": 0.0001, "loss": 1.8612, "step": 6811 }, { "epoch": 0.7824938257423468, "grad_norm": 0.4672335684299469, "learning_rate": 0.0001, "loss": 1.6347, "step": 6812 }, { "epoch": 0.782608695652174, "grad_norm": 0.4316304326057434, "learning_rate": 0.0001, "loss": 1.5107, "step": 6813 }, { "epoch": 0.7827235655620011, "grad_norm": 0.4348050355911255, "learning_rate": 0.0001, "loss": 1.5962, "step": 6814 }, { "epoch": 0.7828384354718282, "grad_norm": 0.44443196058273315, "learning_rate": 0.0001, "loss": 1.6767, "step": 6815 }, { "epoch": 0.7829533053816553, "grad_norm": 0.45539575815200806, "learning_rate": 0.0001, "loss": 1.6111, "step": 6816 }, { "epoch": 0.7830681752914824, "grad_norm": 0.42358124256134033, "learning_rate": 0.0001, "loss": 1.5121, "step": 6817 }, { "epoch": 0.7831830452013095, "grad_norm": 0.444414347410202, "learning_rate": 0.0001, "loss": 1.6731, "step": 6818 }, { "epoch": 0.7832979151111367, "grad_norm": 0.45530837774276733, "learning_rate": 0.0001, "loss": 1.6444, "step": 6819 }, { "epoch": 0.7834127850209638, "grad_norm": 0.4563850462436676, "learning_rate": 0.0001, "loss": 1.6079, "step": 6820 }, { "epoch": 0.7835276549307909, "grad_norm": 0.43416640162467957, "learning_rate": 0.0001, "loss": 1.3742, "step": 6821 }, { "epoch": 0.783642524840618, "grad_norm": 0.44997450709342957, "learning_rate": 0.0001, "loss": 1.6559, "step": 6822 }, { "epoch": 0.7837573947504451, "grad_norm": 0.4280833303928375, "learning_rate": 0.0001, "loss": 1.4376, "step": 6823 }, { "epoch": 0.7838722646602723, "grad_norm": 0.4517759680747986, "learning_rate": 0.0001, "loss": 1.6059, "step": 6824 }, { "epoch": 0.7839871345700994, "grad_norm": 0.45241299271583557, "learning_rate": 0.0001, "loss": 1.3445, "step": 6825 }, { "epoch": 0.7841020044799265, "grad_norm": 0.4801664352416992, "learning_rate": 0.0001, "loss": 1.7256, "step": 6826 }, { "epoch": 0.7842168743897536, "grad_norm": 0.44268131256103516, "learning_rate": 0.0001, "loss": 1.7158, "step": 6827 }, { "epoch": 0.7843317442995807, "grad_norm": 0.46175646781921387, "learning_rate": 0.0001, "loss": 1.7005, "step": 6828 }, { "epoch": 0.7844466142094079, "grad_norm": 0.4507410228252411, "learning_rate": 0.0001, "loss": 1.415, "step": 6829 }, { "epoch": 0.784561484119235, "grad_norm": 0.42673787474632263, "learning_rate": 0.0001, "loss": 1.4747, "step": 6830 }, { "epoch": 0.7846763540290621, "grad_norm": 0.42640364170074463, "learning_rate": 0.0001, "loss": 1.3069, "step": 6831 }, { "epoch": 0.7847912239388892, "grad_norm": 0.46200641989707947, "learning_rate": 0.0001, "loss": 1.633, "step": 6832 }, { "epoch": 0.7849060938487163, "grad_norm": 0.43748313188552856, "learning_rate": 0.0001, "loss": 1.5343, "step": 6833 }, { "epoch": 0.7850209637585435, "grad_norm": 0.49261534214019775, "learning_rate": 0.0001, "loss": 1.9485, "step": 6834 }, { "epoch": 0.7851358336683706, "grad_norm": 0.4890846908092499, "learning_rate": 0.0001, "loss": 1.6535, "step": 6835 }, { "epoch": 0.7852507035781977, "grad_norm": 0.4594542384147644, "learning_rate": 0.0001, "loss": 1.5565, "step": 6836 }, { "epoch": 0.7853655734880248, "grad_norm": 0.434871107339859, "learning_rate": 0.0001, "loss": 1.5575, "step": 6837 }, { "epoch": 0.785480443397852, "grad_norm": 0.45254603028297424, "learning_rate": 0.0001, "loss": 1.6604, "step": 6838 }, { "epoch": 0.7855953133076791, "grad_norm": 0.46842509508132935, "learning_rate": 0.0001, "loss": 1.5599, "step": 6839 }, { "epoch": 0.7857101832175062, "grad_norm": 0.45516836643218994, "learning_rate": 0.0001, "loss": 1.533, "step": 6840 }, { "epoch": 0.7858250531273333, "grad_norm": 0.4427262246608734, "learning_rate": 0.0001, "loss": 1.671, "step": 6841 }, { "epoch": 0.7859399230371604, "grad_norm": 0.4520769417285919, "learning_rate": 0.0001, "loss": 1.6729, "step": 6842 }, { "epoch": 0.7860547929469875, "grad_norm": 0.5243545770645142, "learning_rate": 0.0001, "loss": 1.6431, "step": 6843 }, { "epoch": 0.7861696628568147, "grad_norm": 0.4461134374141693, "learning_rate": 0.0001, "loss": 1.4884, "step": 6844 }, { "epoch": 0.7862845327666418, "grad_norm": 0.48180684447288513, "learning_rate": 0.0001, "loss": 1.6279, "step": 6845 }, { "epoch": 0.7863994026764689, "grad_norm": 0.4266572594642639, "learning_rate": 0.0001, "loss": 1.4144, "step": 6846 }, { "epoch": 0.786514272586296, "grad_norm": 0.43107786774635315, "learning_rate": 0.0001, "loss": 1.5917, "step": 6847 }, { "epoch": 0.7866291424961231, "grad_norm": 0.49343347549438477, "learning_rate": 0.0001, "loss": 1.6641, "step": 6848 }, { "epoch": 0.7867440124059503, "grad_norm": 0.4486045837402344, "learning_rate": 0.0001, "loss": 1.2793, "step": 6849 }, { "epoch": 0.7868588823157774, "grad_norm": 0.4293416440486908, "learning_rate": 0.0001, "loss": 1.5555, "step": 6850 }, { "epoch": 0.7869737522256045, "grad_norm": 0.48061510920524597, "learning_rate": 0.0001, "loss": 1.6201, "step": 6851 }, { "epoch": 0.7870886221354316, "grad_norm": 0.4739622175693512, "learning_rate": 0.0001, "loss": 1.7332, "step": 6852 }, { "epoch": 0.7872034920452587, "grad_norm": 0.4450550675392151, "learning_rate": 0.0001, "loss": 1.5765, "step": 6853 }, { "epoch": 0.7873183619550859, "grad_norm": 0.4692426025867462, "learning_rate": 0.0001, "loss": 1.5579, "step": 6854 }, { "epoch": 0.787433231864913, "grad_norm": 0.4459279775619507, "learning_rate": 0.0001, "loss": 1.4608, "step": 6855 }, { "epoch": 0.7875481017747401, "grad_norm": 0.44502994418144226, "learning_rate": 0.0001, "loss": 1.5768, "step": 6856 }, { "epoch": 0.7876629716845672, "grad_norm": 0.4621850550174713, "learning_rate": 0.0001, "loss": 1.5366, "step": 6857 }, { "epoch": 0.7877778415943943, "grad_norm": 0.40475985407829285, "learning_rate": 0.0001, "loss": 1.3404, "step": 6858 }, { "epoch": 0.7878927115042215, "grad_norm": 0.47660937905311584, "learning_rate": 0.0001, "loss": 1.6131, "step": 6859 }, { "epoch": 0.7880075814140486, "grad_norm": 0.44972842931747437, "learning_rate": 0.0001, "loss": 1.6736, "step": 6860 }, { "epoch": 0.7881224513238757, "grad_norm": 0.454998642206192, "learning_rate": 0.0001, "loss": 1.5207, "step": 6861 }, { "epoch": 0.7882373212337028, "grad_norm": 0.4434056580066681, "learning_rate": 0.0001, "loss": 1.5593, "step": 6862 }, { "epoch": 0.78835219114353, "grad_norm": 0.4378054141998291, "learning_rate": 0.0001, "loss": 1.6163, "step": 6863 }, { "epoch": 0.7884670610533571, "grad_norm": 0.43698400259017944, "learning_rate": 0.0001, "loss": 1.7174, "step": 6864 }, { "epoch": 0.7885819309631842, "grad_norm": 0.4555806815624237, "learning_rate": 0.0001, "loss": 1.606, "step": 6865 }, { "epoch": 0.7886968008730113, "grad_norm": 0.44483011960983276, "learning_rate": 0.0001, "loss": 1.6285, "step": 6866 }, { "epoch": 0.7888116707828384, "grad_norm": 0.42852985858917236, "learning_rate": 0.0001, "loss": 1.5767, "step": 6867 }, { "epoch": 0.7889265406926655, "grad_norm": 0.4609838128089905, "learning_rate": 0.0001, "loss": 1.6478, "step": 6868 }, { "epoch": 0.7890414106024927, "grad_norm": 0.45633402466773987, "learning_rate": 0.0001, "loss": 1.6004, "step": 6869 }, { "epoch": 0.7891562805123198, "grad_norm": 0.44641777873039246, "learning_rate": 0.0001, "loss": 1.4307, "step": 6870 }, { "epoch": 0.7892711504221469, "grad_norm": 0.45838481187820435, "learning_rate": 0.0001, "loss": 1.7874, "step": 6871 }, { "epoch": 0.789386020331974, "grad_norm": 0.44452112913131714, "learning_rate": 0.0001, "loss": 1.5838, "step": 6872 }, { "epoch": 0.7895008902418011, "grad_norm": 0.44366639852523804, "learning_rate": 0.0001, "loss": 1.6561, "step": 6873 }, { "epoch": 0.7896157601516283, "grad_norm": 0.44662636518478394, "learning_rate": 0.0001, "loss": 1.5177, "step": 6874 }, { "epoch": 0.7897306300614554, "grad_norm": 0.46313855051994324, "learning_rate": 0.0001, "loss": 1.6924, "step": 6875 }, { "epoch": 0.7898454999712825, "grad_norm": 0.4705411195755005, "learning_rate": 0.0001, "loss": 1.6291, "step": 6876 }, { "epoch": 0.7899603698811096, "grad_norm": 0.4569123387336731, "learning_rate": 0.0001, "loss": 1.617, "step": 6877 }, { "epoch": 0.7900752397909367, "grad_norm": 0.5129233002662659, "learning_rate": 0.0001, "loss": 1.9261, "step": 6878 }, { "epoch": 0.7901901097007639, "grad_norm": 0.4295032322406769, "learning_rate": 0.0001, "loss": 1.4843, "step": 6879 }, { "epoch": 0.790304979610591, "grad_norm": 0.44028350710868835, "learning_rate": 0.0001, "loss": 1.7061, "step": 6880 }, { "epoch": 0.7904198495204181, "grad_norm": 0.48637375235557556, "learning_rate": 0.0001, "loss": 1.6503, "step": 6881 }, { "epoch": 0.7905347194302452, "grad_norm": 0.4660567045211792, "learning_rate": 0.0001, "loss": 1.6903, "step": 6882 }, { "epoch": 0.7906495893400723, "grad_norm": 0.4239864945411682, "learning_rate": 0.0001, "loss": 1.5828, "step": 6883 }, { "epoch": 0.7907644592498995, "grad_norm": 0.5081626772880554, "learning_rate": 0.0001, "loss": 1.8917, "step": 6884 }, { "epoch": 0.7908793291597266, "grad_norm": 0.47426649928092957, "learning_rate": 0.0001, "loss": 1.7486, "step": 6885 }, { "epoch": 0.7909941990695537, "grad_norm": 0.47770363092422485, "learning_rate": 0.0001, "loss": 1.5376, "step": 6886 }, { "epoch": 0.7911090689793808, "grad_norm": 0.44490906596183777, "learning_rate": 0.0001, "loss": 1.6119, "step": 6887 }, { "epoch": 0.791223938889208, "grad_norm": 0.4341287612915039, "learning_rate": 0.0001, "loss": 1.6521, "step": 6888 }, { "epoch": 0.7913388087990351, "grad_norm": 0.46785807609558105, "learning_rate": 0.0001, "loss": 1.6912, "step": 6889 }, { "epoch": 0.7914536787088622, "grad_norm": 0.4538176655769348, "learning_rate": 0.0001, "loss": 1.6581, "step": 6890 }, { "epoch": 0.7915685486186893, "grad_norm": 0.4823910593986511, "learning_rate": 0.0001, "loss": 1.6738, "step": 6891 }, { "epoch": 0.7916834185285164, "grad_norm": 0.49208977818489075, "learning_rate": 0.0001, "loss": 1.7127, "step": 6892 }, { "epoch": 0.7917982884383435, "grad_norm": 0.6321043968200684, "learning_rate": 0.0001, "loss": 1.5294, "step": 6893 }, { "epoch": 0.7919131583481707, "grad_norm": 0.43280404806137085, "learning_rate": 0.0001, "loss": 1.5839, "step": 6894 }, { "epoch": 0.7920280282579978, "grad_norm": 0.5009095072746277, "learning_rate": 0.0001, "loss": 1.6516, "step": 6895 }, { "epoch": 0.7921428981678249, "grad_norm": 0.4481852650642395, "learning_rate": 0.0001, "loss": 1.6166, "step": 6896 }, { "epoch": 0.792257768077652, "grad_norm": 0.4593367278575897, "learning_rate": 0.0001, "loss": 1.652, "step": 6897 }, { "epoch": 0.7923726379874791, "grad_norm": 0.5545166730880737, "learning_rate": 0.0001, "loss": 1.3038, "step": 6898 }, { "epoch": 0.7924875078973063, "grad_norm": 0.4565151631832123, "learning_rate": 0.0001, "loss": 1.5986, "step": 6899 }, { "epoch": 0.7926023778071334, "grad_norm": 0.45123347640037537, "learning_rate": 0.0001, "loss": 1.7472, "step": 6900 }, { "epoch": 0.7927172477169605, "grad_norm": 0.46333909034729004, "learning_rate": 0.0001, "loss": 1.5799, "step": 6901 }, { "epoch": 0.7928321176267876, "grad_norm": 0.4783911108970642, "learning_rate": 0.0001, "loss": 1.5615, "step": 6902 }, { "epoch": 0.7929469875366147, "grad_norm": 0.48153024911880493, "learning_rate": 0.0001, "loss": 1.6846, "step": 6903 }, { "epoch": 0.7930618574464419, "grad_norm": 0.45357227325439453, "learning_rate": 0.0001, "loss": 1.6658, "step": 6904 }, { "epoch": 0.793176727356269, "grad_norm": 0.44956380128860474, "learning_rate": 0.0001, "loss": 1.6623, "step": 6905 }, { "epoch": 0.7932915972660961, "grad_norm": 0.4557205140590668, "learning_rate": 0.0001, "loss": 1.4921, "step": 6906 }, { "epoch": 0.7934064671759232, "grad_norm": 0.45987358689308167, "learning_rate": 0.0001, "loss": 1.6181, "step": 6907 }, { "epoch": 0.7935213370857503, "grad_norm": 0.4671670198440552, "learning_rate": 0.0001, "loss": 1.559, "step": 6908 }, { "epoch": 0.7936362069955775, "grad_norm": 0.4789045453071594, "learning_rate": 0.0001, "loss": 1.545, "step": 6909 }, { "epoch": 0.7937510769054046, "grad_norm": 0.4139240086078644, "learning_rate": 0.0001, "loss": 1.4065, "step": 6910 }, { "epoch": 0.7938659468152317, "grad_norm": 0.43465399742126465, "learning_rate": 0.0001, "loss": 1.4924, "step": 6911 }, { "epoch": 0.7939808167250588, "grad_norm": 0.45922383666038513, "learning_rate": 0.0001, "loss": 1.5714, "step": 6912 }, { "epoch": 0.7940956866348859, "grad_norm": 0.5036603808403015, "learning_rate": 0.0001, "loss": 1.7516, "step": 6913 }, { "epoch": 0.7942105565447131, "grad_norm": 0.4896206855773926, "learning_rate": 0.0001, "loss": 1.6954, "step": 6914 }, { "epoch": 0.7943254264545402, "grad_norm": 0.4837943911552429, "learning_rate": 0.0001, "loss": 1.7452, "step": 6915 }, { "epoch": 0.7944402963643673, "grad_norm": 0.4519433081150055, "learning_rate": 0.0001, "loss": 1.5967, "step": 6916 }, { "epoch": 0.7945551662741944, "grad_norm": 0.45447391271591187, "learning_rate": 0.0001, "loss": 1.5263, "step": 6917 }, { "epoch": 0.7946700361840215, "grad_norm": 0.4731905460357666, "learning_rate": 0.0001, "loss": 1.8278, "step": 6918 }, { "epoch": 0.7947849060938487, "grad_norm": 0.44193151593208313, "learning_rate": 0.0001, "loss": 1.4828, "step": 6919 }, { "epoch": 0.7948997760036758, "grad_norm": 0.472493439912796, "learning_rate": 0.0001, "loss": 1.6412, "step": 6920 }, { "epoch": 0.7950146459135029, "grad_norm": 0.46209755539894104, "learning_rate": 0.0001, "loss": 1.5916, "step": 6921 }, { "epoch": 0.79512951582333, "grad_norm": 0.41559138894081116, "learning_rate": 0.0001, "loss": 1.6852, "step": 6922 }, { "epoch": 0.7952443857331571, "grad_norm": 0.44188040494918823, "learning_rate": 0.0001, "loss": 1.5008, "step": 6923 }, { "epoch": 0.7953592556429844, "grad_norm": 0.4449104070663452, "learning_rate": 0.0001, "loss": 1.5437, "step": 6924 }, { "epoch": 0.7954741255528115, "grad_norm": 0.45927226543426514, "learning_rate": 0.0001, "loss": 1.6136, "step": 6925 }, { "epoch": 0.7955889954626386, "grad_norm": 0.532455563545227, "learning_rate": 0.0001, "loss": 1.7761, "step": 6926 }, { "epoch": 0.7957038653724657, "grad_norm": 0.5386255979537964, "learning_rate": 0.0001, "loss": 1.7247, "step": 6927 }, { "epoch": 0.7958187352822929, "grad_norm": 0.449303537607193, "learning_rate": 0.0001, "loss": 1.4939, "step": 6928 }, { "epoch": 0.79593360519212, "grad_norm": 0.43787631392478943, "learning_rate": 0.0001, "loss": 1.5544, "step": 6929 }, { "epoch": 0.7960484751019471, "grad_norm": 0.4425432085990906, "learning_rate": 0.0001, "loss": 1.5128, "step": 6930 }, { "epoch": 0.7961633450117742, "grad_norm": 0.47082197666168213, "learning_rate": 0.0001, "loss": 1.6663, "step": 6931 }, { "epoch": 0.7962782149216013, "grad_norm": 0.42969945073127747, "learning_rate": 0.0001, "loss": 1.5908, "step": 6932 }, { "epoch": 0.7963930848314285, "grad_norm": 0.4878128468990326, "learning_rate": 0.0001, "loss": 1.4099, "step": 6933 }, { "epoch": 0.7965079547412556, "grad_norm": 0.4496504068374634, "learning_rate": 0.0001, "loss": 1.4885, "step": 6934 }, { "epoch": 0.7966228246510827, "grad_norm": 0.48936036229133606, "learning_rate": 0.0001, "loss": 1.6446, "step": 6935 }, { "epoch": 0.7967376945609098, "grad_norm": 0.45953816175460815, "learning_rate": 0.0001, "loss": 1.6319, "step": 6936 }, { "epoch": 0.7968525644707369, "grad_norm": 0.4707304537296295, "learning_rate": 0.0001, "loss": 1.5218, "step": 6937 }, { "epoch": 0.796967434380564, "grad_norm": 0.4913303256034851, "learning_rate": 0.0001, "loss": 1.6678, "step": 6938 }, { "epoch": 0.7970823042903912, "grad_norm": 0.4540485739707947, "learning_rate": 0.0001, "loss": 1.5723, "step": 6939 }, { "epoch": 0.7971971742002183, "grad_norm": 0.4464291036128998, "learning_rate": 0.0001, "loss": 1.5943, "step": 6940 }, { "epoch": 0.7973120441100454, "grad_norm": 0.4318457245826721, "learning_rate": 0.0001, "loss": 1.5276, "step": 6941 }, { "epoch": 0.7974269140198725, "grad_norm": 0.47109347581863403, "learning_rate": 0.0001, "loss": 1.5672, "step": 6942 }, { "epoch": 0.7975417839296997, "grad_norm": 0.47364145517349243, "learning_rate": 0.0001, "loss": 1.7189, "step": 6943 }, { "epoch": 0.7976566538395268, "grad_norm": 0.5263649821281433, "learning_rate": 0.0001, "loss": 1.3896, "step": 6944 }, { "epoch": 0.7977715237493539, "grad_norm": 0.45865774154663086, "learning_rate": 0.0001, "loss": 1.4919, "step": 6945 }, { "epoch": 0.797886393659181, "grad_norm": 0.4584158658981323, "learning_rate": 0.0001, "loss": 1.7354, "step": 6946 }, { "epoch": 0.7980012635690081, "grad_norm": 0.4830566346645355, "learning_rate": 0.0001, "loss": 1.525, "step": 6947 }, { "epoch": 0.7981161334788353, "grad_norm": 0.4785566031932831, "learning_rate": 0.0001, "loss": 1.6353, "step": 6948 }, { "epoch": 0.7982310033886624, "grad_norm": 0.4663696587085724, "learning_rate": 0.0001, "loss": 1.636, "step": 6949 }, { "epoch": 0.7983458732984895, "grad_norm": 0.48032259941101074, "learning_rate": 0.0001, "loss": 1.3853, "step": 6950 }, { "epoch": 0.7984607432083166, "grad_norm": 0.4568888545036316, "learning_rate": 0.0001, "loss": 1.6627, "step": 6951 }, { "epoch": 0.7985756131181437, "grad_norm": 0.458404541015625, "learning_rate": 0.0001, "loss": 1.5422, "step": 6952 }, { "epoch": 0.7986904830279709, "grad_norm": 0.45047512650489807, "learning_rate": 0.0001, "loss": 1.5767, "step": 6953 }, { "epoch": 0.798805352937798, "grad_norm": 0.4487355053424835, "learning_rate": 0.0001, "loss": 1.5318, "step": 6954 }, { "epoch": 0.7989202228476251, "grad_norm": 0.46647393703460693, "learning_rate": 0.0001, "loss": 1.5918, "step": 6955 }, { "epoch": 0.7990350927574522, "grad_norm": 0.4552063047885895, "learning_rate": 0.0001, "loss": 1.6044, "step": 6956 }, { "epoch": 0.7991499626672793, "grad_norm": 0.5072106719017029, "learning_rate": 0.0001, "loss": 1.8092, "step": 6957 }, { "epoch": 0.7992648325771065, "grad_norm": 0.4884167015552521, "learning_rate": 0.0001, "loss": 1.6688, "step": 6958 }, { "epoch": 0.7993797024869336, "grad_norm": 0.4894075095653534, "learning_rate": 0.0001, "loss": 1.665, "step": 6959 }, { "epoch": 0.7994945723967607, "grad_norm": 0.5430724024772644, "learning_rate": 0.0001, "loss": 1.4763, "step": 6960 }, { "epoch": 0.7996094423065878, "grad_norm": 0.49636295437812805, "learning_rate": 0.0001, "loss": 1.7136, "step": 6961 }, { "epoch": 0.7997243122164149, "grad_norm": 0.46621257066726685, "learning_rate": 0.0001, "loss": 1.7219, "step": 6962 }, { "epoch": 0.799839182126242, "grad_norm": 0.4584997296333313, "learning_rate": 0.0001, "loss": 1.6187, "step": 6963 }, { "epoch": 0.7999540520360692, "grad_norm": 0.4953577518463135, "learning_rate": 0.0001, "loss": 1.744, "step": 6964 }, { "epoch": 0.8000689219458963, "grad_norm": 0.5007856488227844, "learning_rate": 0.0001, "loss": 1.6076, "step": 6965 }, { "epoch": 0.8001837918557234, "grad_norm": 0.46211037039756775, "learning_rate": 0.0001, "loss": 1.6078, "step": 6966 }, { "epoch": 0.8002986617655505, "grad_norm": 0.47474467754364014, "learning_rate": 0.0001, "loss": 1.7794, "step": 6967 }, { "epoch": 0.8004135316753777, "grad_norm": 0.4621082544326782, "learning_rate": 0.0001, "loss": 1.6307, "step": 6968 }, { "epoch": 0.8005284015852048, "grad_norm": 0.4746955335140228, "learning_rate": 0.0001, "loss": 1.6833, "step": 6969 }, { "epoch": 0.8006432714950319, "grad_norm": 0.44209524989128113, "learning_rate": 0.0001, "loss": 1.7345, "step": 6970 }, { "epoch": 0.800758141404859, "grad_norm": 0.48901766538619995, "learning_rate": 0.0001, "loss": 1.672, "step": 6971 }, { "epoch": 0.8008730113146861, "grad_norm": 0.5113202333450317, "learning_rate": 0.0001, "loss": 1.5684, "step": 6972 }, { "epoch": 0.8009878812245133, "grad_norm": 0.4625761806964874, "learning_rate": 0.0001, "loss": 1.6895, "step": 6973 }, { "epoch": 0.8011027511343404, "grad_norm": 0.4590431749820709, "learning_rate": 0.0001, "loss": 1.6322, "step": 6974 }, { "epoch": 0.8012176210441675, "grad_norm": 0.4370403289794922, "learning_rate": 0.0001, "loss": 1.5681, "step": 6975 }, { "epoch": 0.8013324909539946, "grad_norm": 0.4700826406478882, "learning_rate": 0.0001, "loss": 1.6091, "step": 6976 }, { "epoch": 0.8014473608638217, "grad_norm": 0.5367459058761597, "learning_rate": 0.0001, "loss": 1.7318, "step": 6977 }, { "epoch": 0.8015622307736489, "grad_norm": 0.4377821087837219, "learning_rate": 0.0001, "loss": 1.5507, "step": 6978 }, { "epoch": 0.801677100683476, "grad_norm": 0.45518580079078674, "learning_rate": 0.0001, "loss": 1.5111, "step": 6979 }, { "epoch": 0.8017919705933031, "grad_norm": 0.4376772344112396, "learning_rate": 0.0001, "loss": 1.5323, "step": 6980 }, { "epoch": 0.8019068405031302, "grad_norm": 0.45350295305252075, "learning_rate": 0.0001, "loss": 1.6036, "step": 6981 }, { "epoch": 0.8020217104129573, "grad_norm": 0.4758756458759308, "learning_rate": 0.0001, "loss": 1.7442, "step": 6982 }, { "epoch": 0.8021365803227845, "grad_norm": 0.43972933292388916, "learning_rate": 0.0001, "loss": 1.5419, "step": 6983 }, { "epoch": 0.8022514502326116, "grad_norm": 0.45926135778427124, "learning_rate": 0.0001, "loss": 1.7564, "step": 6984 }, { "epoch": 0.8023663201424387, "grad_norm": 0.46066513657569885, "learning_rate": 0.0001, "loss": 1.5823, "step": 6985 }, { "epoch": 0.8024811900522658, "grad_norm": 0.4452419579029083, "learning_rate": 0.0001, "loss": 1.5413, "step": 6986 }, { "epoch": 0.8025960599620929, "grad_norm": 0.44738146662712097, "learning_rate": 0.0001, "loss": 1.5042, "step": 6987 }, { "epoch": 0.80271092987192, "grad_norm": 0.4505269229412079, "learning_rate": 0.0001, "loss": 1.5, "step": 6988 }, { "epoch": 0.8028257997817472, "grad_norm": 0.4531947672367096, "learning_rate": 0.0001, "loss": 1.6508, "step": 6989 }, { "epoch": 0.8029406696915743, "grad_norm": 0.5062926411628723, "learning_rate": 0.0001, "loss": 1.8187, "step": 6990 }, { "epoch": 0.8030555396014014, "grad_norm": 0.472827285528183, "learning_rate": 0.0001, "loss": 1.6253, "step": 6991 }, { "epoch": 0.8031704095112285, "grad_norm": 0.5587745308876038, "learning_rate": 0.0001, "loss": 1.7311, "step": 6992 }, { "epoch": 0.8032852794210557, "grad_norm": 0.43462514877319336, "learning_rate": 0.0001, "loss": 1.3869, "step": 6993 }, { "epoch": 0.8034001493308828, "grad_norm": 0.45932313799858093, "learning_rate": 0.0001, "loss": 1.7735, "step": 6994 }, { "epoch": 0.8035150192407099, "grad_norm": 0.47527042031288147, "learning_rate": 0.0001, "loss": 1.7342, "step": 6995 }, { "epoch": 0.803629889150537, "grad_norm": 0.4415908455848694, "learning_rate": 0.0001, "loss": 1.4238, "step": 6996 }, { "epoch": 0.8037447590603641, "grad_norm": 0.4424525499343872, "learning_rate": 0.0001, "loss": 1.8068, "step": 6997 }, { "epoch": 0.8038596289701913, "grad_norm": 0.45409083366394043, "learning_rate": 0.0001, "loss": 1.4999, "step": 6998 }, { "epoch": 0.8039744988800184, "grad_norm": 0.5033005475997925, "learning_rate": 0.0001, "loss": 1.6588, "step": 6999 }, { "epoch": 0.8040893687898455, "grad_norm": 0.4428901970386505, "learning_rate": 0.0001, "loss": 1.5321, "step": 7000 }, { "epoch": 0.8042042386996726, "grad_norm": 0.4608984589576721, "learning_rate": 0.0001, "loss": 1.5062, "step": 7001 }, { "epoch": 0.8043191086094997, "grad_norm": 0.47772228717803955, "learning_rate": 0.0001, "loss": 1.602, "step": 7002 }, { "epoch": 0.8044339785193269, "grad_norm": 0.4590427577495575, "learning_rate": 0.0001, "loss": 1.7222, "step": 7003 }, { "epoch": 0.804548848429154, "grad_norm": 0.5008092522621155, "learning_rate": 0.0001, "loss": 1.8124, "step": 7004 }, { "epoch": 0.8046637183389811, "grad_norm": 0.428852915763855, "learning_rate": 0.0001, "loss": 1.6382, "step": 7005 }, { "epoch": 0.8047785882488082, "grad_norm": 0.46282047033309937, "learning_rate": 0.0001, "loss": 1.6544, "step": 7006 }, { "epoch": 0.8048934581586353, "grad_norm": 0.4332880675792694, "learning_rate": 0.0001, "loss": 1.796, "step": 7007 }, { "epoch": 0.8050083280684625, "grad_norm": 0.4656613767147064, "learning_rate": 0.0001, "loss": 1.723, "step": 7008 }, { "epoch": 0.8051231979782896, "grad_norm": 0.4826306104660034, "learning_rate": 0.0001, "loss": 1.8894, "step": 7009 }, { "epoch": 0.8052380678881167, "grad_norm": 0.4590745270252228, "learning_rate": 0.0001, "loss": 1.7591, "step": 7010 }, { "epoch": 0.8053529377979438, "grad_norm": 0.48546114563941956, "learning_rate": 0.0001, "loss": 1.7585, "step": 7011 }, { "epoch": 0.8054678077077709, "grad_norm": 0.4384916424751282, "learning_rate": 0.0001, "loss": 1.6979, "step": 7012 }, { "epoch": 0.805582677617598, "grad_norm": 0.47794288396835327, "learning_rate": 0.0001, "loss": 1.7491, "step": 7013 }, { "epoch": 0.8056975475274252, "grad_norm": 0.4551166892051697, "learning_rate": 0.0001, "loss": 1.6755, "step": 7014 }, { "epoch": 0.8058124174372523, "grad_norm": 0.4861622452735901, "learning_rate": 0.0001, "loss": 1.5717, "step": 7015 }, { "epoch": 0.8059272873470794, "grad_norm": 0.4588170051574707, "learning_rate": 0.0001, "loss": 1.4308, "step": 7016 }, { "epoch": 0.8060421572569065, "grad_norm": 0.46102410554885864, "learning_rate": 0.0001, "loss": 1.5345, "step": 7017 }, { "epoch": 0.8061570271667337, "grad_norm": 0.44939014315605164, "learning_rate": 0.0001, "loss": 1.6842, "step": 7018 }, { "epoch": 0.8062718970765608, "grad_norm": 0.4488585293292999, "learning_rate": 0.0001, "loss": 1.485, "step": 7019 }, { "epoch": 0.8063867669863879, "grad_norm": 0.4329974055290222, "learning_rate": 0.0001, "loss": 1.5916, "step": 7020 }, { "epoch": 0.806501636896215, "grad_norm": 0.4898880124092102, "learning_rate": 0.0001, "loss": 1.7087, "step": 7021 }, { "epoch": 0.8066165068060421, "grad_norm": 0.4246252477169037, "learning_rate": 0.0001, "loss": 1.4405, "step": 7022 }, { "epoch": 0.8067313767158693, "grad_norm": 0.49754396080970764, "learning_rate": 0.0001, "loss": 1.4626, "step": 7023 }, { "epoch": 0.8068462466256964, "grad_norm": 0.4949999749660492, "learning_rate": 0.0001, "loss": 1.6155, "step": 7024 }, { "epoch": 0.8069611165355235, "grad_norm": 0.4603281617164612, "learning_rate": 0.0001, "loss": 1.5919, "step": 7025 }, { "epoch": 0.8070759864453506, "grad_norm": 0.4442635476589203, "learning_rate": 0.0001, "loss": 1.6615, "step": 7026 }, { "epoch": 0.8071908563551777, "grad_norm": 0.46228450536727905, "learning_rate": 0.0001, "loss": 1.5407, "step": 7027 }, { "epoch": 0.8073057262650049, "grad_norm": 0.46300187706947327, "learning_rate": 0.0001, "loss": 1.5931, "step": 7028 }, { "epoch": 0.807420596174832, "grad_norm": 0.4652291536331177, "learning_rate": 0.0001, "loss": 1.7684, "step": 7029 }, { "epoch": 0.8075354660846591, "grad_norm": 0.4570204019546509, "learning_rate": 0.0001, "loss": 1.4686, "step": 7030 }, { "epoch": 0.8076503359944862, "grad_norm": 0.453274667263031, "learning_rate": 0.0001, "loss": 1.5297, "step": 7031 }, { "epoch": 0.8077652059043133, "grad_norm": 0.4465121030807495, "learning_rate": 0.0001, "loss": 1.4167, "step": 7032 }, { "epoch": 0.8078800758141405, "grad_norm": 0.4628574550151825, "learning_rate": 0.0001, "loss": 1.5498, "step": 7033 }, { "epoch": 0.8079949457239676, "grad_norm": 0.4569374918937683, "learning_rate": 0.0001, "loss": 1.4265, "step": 7034 }, { "epoch": 0.8081098156337947, "grad_norm": 0.43578875064849854, "learning_rate": 0.0001, "loss": 1.4849, "step": 7035 }, { "epoch": 0.8082246855436218, "grad_norm": 0.45690855383872986, "learning_rate": 0.0001, "loss": 1.6328, "step": 7036 }, { "epoch": 0.8083395554534489, "grad_norm": 0.4643135666847229, "learning_rate": 0.0001, "loss": 1.6545, "step": 7037 }, { "epoch": 0.808454425363276, "grad_norm": 0.4422778785228729, "learning_rate": 0.0001, "loss": 1.4945, "step": 7038 }, { "epoch": 0.8085692952731032, "grad_norm": 0.5053475499153137, "learning_rate": 0.0001, "loss": 1.5699, "step": 7039 }, { "epoch": 0.8086841651829303, "grad_norm": 0.4230154752731323, "learning_rate": 0.0001, "loss": 1.4409, "step": 7040 }, { "epoch": 0.8087990350927574, "grad_norm": 0.45723220705986023, "learning_rate": 0.0001, "loss": 1.6677, "step": 7041 }, { "epoch": 0.8089139050025845, "grad_norm": 0.43414244055747986, "learning_rate": 0.0001, "loss": 1.4774, "step": 7042 }, { "epoch": 0.8090287749124117, "grad_norm": 0.45168161392211914, "learning_rate": 0.0001, "loss": 1.468, "step": 7043 }, { "epoch": 0.8091436448222388, "grad_norm": 0.4413895308971405, "learning_rate": 0.0001, "loss": 1.5116, "step": 7044 }, { "epoch": 0.8092585147320659, "grad_norm": 0.45228877663612366, "learning_rate": 0.0001, "loss": 1.627, "step": 7045 }, { "epoch": 0.809373384641893, "grad_norm": 0.4586316645145416, "learning_rate": 0.0001, "loss": 1.6476, "step": 7046 }, { "epoch": 0.8094882545517201, "grad_norm": 0.4477645456790924, "learning_rate": 0.0001, "loss": 1.6319, "step": 7047 }, { "epoch": 0.8096031244615473, "grad_norm": 0.5138921737670898, "learning_rate": 0.0001, "loss": 1.6401, "step": 7048 }, { "epoch": 0.8097179943713744, "grad_norm": 0.4743337333202362, "learning_rate": 0.0001, "loss": 1.8095, "step": 7049 }, { "epoch": 0.8098328642812015, "grad_norm": 0.47952887415885925, "learning_rate": 0.0001, "loss": 1.8992, "step": 7050 }, { "epoch": 0.8099477341910286, "grad_norm": 0.4406343102455139, "learning_rate": 0.0001, "loss": 1.5797, "step": 7051 }, { "epoch": 0.8100626041008557, "grad_norm": 0.5044456720352173, "learning_rate": 0.0001, "loss": 1.6264, "step": 7052 }, { "epoch": 0.8101774740106829, "grad_norm": 0.45316067337989807, "learning_rate": 0.0001, "loss": 1.6792, "step": 7053 }, { "epoch": 0.81029234392051, "grad_norm": 0.4341808259487152, "learning_rate": 0.0001, "loss": 1.3805, "step": 7054 }, { "epoch": 0.8104072138303371, "grad_norm": 0.44615742564201355, "learning_rate": 0.0001, "loss": 1.577, "step": 7055 }, { "epoch": 0.8105220837401642, "grad_norm": 0.5084645748138428, "learning_rate": 0.0001, "loss": 1.4708, "step": 7056 }, { "epoch": 0.8106369536499913, "grad_norm": 0.4395124912261963, "learning_rate": 0.0001, "loss": 1.4493, "step": 7057 }, { "epoch": 0.8107518235598185, "grad_norm": 0.4524141550064087, "learning_rate": 0.0001, "loss": 1.5445, "step": 7058 }, { "epoch": 0.8108666934696456, "grad_norm": 0.4532511532306671, "learning_rate": 0.0001, "loss": 1.6722, "step": 7059 }, { "epoch": 0.8109815633794727, "grad_norm": 0.45477429032325745, "learning_rate": 0.0001, "loss": 1.5978, "step": 7060 }, { "epoch": 0.8110964332892999, "grad_norm": 0.4670192003250122, "learning_rate": 0.0001, "loss": 1.7155, "step": 7061 }, { "epoch": 0.811211303199127, "grad_norm": 0.47830730676651, "learning_rate": 0.0001, "loss": 1.6898, "step": 7062 }, { "epoch": 0.8113261731089542, "grad_norm": 0.4505847692489624, "learning_rate": 0.0001, "loss": 1.5469, "step": 7063 }, { "epoch": 0.8114410430187813, "grad_norm": 0.44358721375465393, "learning_rate": 0.0001, "loss": 1.6125, "step": 7064 }, { "epoch": 0.8115559129286084, "grad_norm": 0.45430463552474976, "learning_rate": 0.0001, "loss": 1.6516, "step": 7065 }, { "epoch": 0.8116707828384355, "grad_norm": 0.40954452753067017, "learning_rate": 0.0001, "loss": 1.4936, "step": 7066 }, { "epoch": 0.8117856527482626, "grad_norm": 0.4465819001197815, "learning_rate": 0.0001, "loss": 1.641, "step": 7067 }, { "epoch": 0.8119005226580898, "grad_norm": 0.45103660225868225, "learning_rate": 0.0001, "loss": 1.5577, "step": 7068 }, { "epoch": 0.8120153925679169, "grad_norm": 0.44100210070610046, "learning_rate": 0.0001, "loss": 1.5393, "step": 7069 }, { "epoch": 0.812130262477744, "grad_norm": 0.45234718918800354, "learning_rate": 0.0001, "loss": 1.5397, "step": 7070 }, { "epoch": 0.8122451323875711, "grad_norm": 0.4711310565471649, "learning_rate": 0.0001, "loss": 1.6381, "step": 7071 }, { "epoch": 0.8123600022973982, "grad_norm": 0.4617573618888855, "learning_rate": 0.0001, "loss": 1.6191, "step": 7072 }, { "epoch": 0.8124748722072254, "grad_norm": 0.4681065082550049, "learning_rate": 0.0001, "loss": 1.3769, "step": 7073 }, { "epoch": 0.8125897421170525, "grad_norm": 0.500937819480896, "learning_rate": 0.0001, "loss": 1.8512, "step": 7074 }, { "epoch": 0.8127046120268796, "grad_norm": 0.5059450268745422, "learning_rate": 0.0001, "loss": 1.6661, "step": 7075 }, { "epoch": 0.8128194819367067, "grad_norm": 0.49339231848716736, "learning_rate": 0.0001, "loss": 1.619, "step": 7076 }, { "epoch": 0.8129343518465338, "grad_norm": 0.4886321723461151, "learning_rate": 0.0001, "loss": 1.6388, "step": 7077 }, { "epoch": 0.813049221756361, "grad_norm": 0.4459485113620758, "learning_rate": 0.0001, "loss": 1.5529, "step": 7078 }, { "epoch": 0.8131640916661881, "grad_norm": 0.45102250576019287, "learning_rate": 0.0001, "loss": 1.6559, "step": 7079 }, { "epoch": 0.8132789615760152, "grad_norm": 0.46558091044425964, "learning_rate": 0.0001, "loss": 1.4895, "step": 7080 }, { "epoch": 0.8133938314858423, "grad_norm": 0.45526906847953796, "learning_rate": 0.0001, "loss": 1.6212, "step": 7081 }, { "epoch": 0.8135087013956694, "grad_norm": 0.44742417335510254, "learning_rate": 0.0001, "loss": 1.6513, "step": 7082 }, { "epoch": 0.8136235713054966, "grad_norm": 0.44720160961151123, "learning_rate": 0.0001, "loss": 1.5228, "step": 7083 }, { "epoch": 0.8137384412153237, "grad_norm": 0.4373728632926941, "learning_rate": 0.0001, "loss": 1.5815, "step": 7084 }, { "epoch": 0.8138533111251508, "grad_norm": 0.4450926184654236, "learning_rate": 0.0001, "loss": 1.4912, "step": 7085 }, { "epoch": 0.8139681810349779, "grad_norm": 0.44571229815483093, "learning_rate": 0.0001, "loss": 1.5636, "step": 7086 }, { "epoch": 0.814083050944805, "grad_norm": 0.49900081753730774, "learning_rate": 0.0001, "loss": 1.5591, "step": 7087 }, { "epoch": 0.8141979208546322, "grad_norm": 0.440686970949173, "learning_rate": 0.0001, "loss": 1.6891, "step": 7088 }, { "epoch": 0.8143127907644593, "grad_norm": 0.5091794729232788, "learning_rate": 0.0001, "loss": 1.7744, "step": 7089 }, { "epoch": 0.8144276606742864, "grad_norm": 0.4285080134868622, "learning_rate": 0.0001, "loss": 1.5412, "step": 7090 }, { "epoch": 0.8145425305841135, "grad_norm": 0.43077725172042847, "learning_rate": 0.0001, "loss": 1.4802, "step": 7091 }, { "epoch": 0.8146574004939406, "grad_norm": 0.46352505683898926, "learning_rate": 0.0001, "loss": 1.6418, "step": 7092 }, { "epoch": 0.8147722704037678, "grad_norm": 0.44118359684944153, "learning_rate": 0.0001, "loss": 1.6337, "step": 7093 }, { "epoch": 0.8148871403135949, "grad_norm": 0.45531347393989563, "learning_rate": 0.0001, "loss": 1.6527, "step": 7094 }, { "epoch": 0.815002010223422, "grad_norm": 0.5839067697525024, "learning_rate": 0.0001, "loss": 1.6389, "step": 7095 }, { "epoch": 0.8151168801332491, "grad_norm": 0.4553830623626709, "learning_rate": 0.0001, "loss": 1.6279, "step": 7096 }, { "epoch": 0.8152317500430762, "grad_norm": 0.4333423972129822, "learning_rate": 0.0001, "loss": 1.5816, "step": 7097 }, { "epoch": 0.8153466199529034, "grad_norm": 0.4156707525253296, "learning_rate": 0.0001, "loss": 1.2689, "step": 7098 }, { "epoch": 0.8154614898627305, "grad_norm": 0.5249226093292236, "learning_rate": 0.0001, "loss": 1.8285, "step": 7099 }, { "epoch": 0.8155763597725576, "grad_norm": 0.45831164717674255, "learning_rate": 0.0001, "loss": 1.5238, "step": 7100 }, { "epoch": 0.8156912296823847, "grad_norm": 0.5419580340385437, "learning_rate": 0.0001, "loss": 1.9262, "step": 7101 }, { "epoch": 0.8158060995922118, "grad_norm": 0.46825796365737915, "learning_rate": 0.0001, "loss": 1.5954, "step": 7102 }, { "epoch": 0.815920969502039, "grad_norm": 0.47406765818595886, "learning_rate": 0.0001, "loss": 1.5404, "step": 7103 }, { "epoch": 0.8160358394118661, "grad_norm": 0.49613794684410095, "learning_rate": 0.0001, "loss": 1.7463, "step": 7104 }, { "epoch": 0.8161507093216932, "grad_norm": 0.4632255733013153, "learning_rate": 0.0001, "loss": 1.4529, "step": 7105 }, { "epoch": 0.8162655792315203, "grad_norm": 0.45750927925109863, "learning_rate": 0.0001, "loss": 1.6081, "step": 7106 }, { "epoch": 0.8163804491413474, "grad_norm": 0.46657127141952515, "learning_rate": 0.0001, "loss": 1.7844, "step": 7107 }, { "epoch": 0.8164953190511746, "grad_norm": 0.47650468349456787, "learning_rate": 0.0001, "loss": 1.5957, "step": 7108 }, { "epoch": 0.8166101889610017, "grad_norm": 0.4252217411994934, "learning_rate": 0.0001, "loss": 1.5233, "step": 7109 }, { "epoch": 0.8167250588708288, "grad_norm": 0.45260146260261536, "learning_rate": 0.0001, "loss": 1.6294, "step": 7110 }, { "epoch": 0.8168399287806559, "grad_norm": 0.43198126554489136, "learning_rate": 0.0001, "loss": 1.5063, "step": 7111 }, { "epoch": 0.816954798690483, "grad_norm": 0.48374143242836, "learning_rate": 0.0001, "loss": 1.72, "step": 7112 }, { "epoch": 0.8170696686003102, "grad_norm": 0.5113547444343567, "learning_rate": 0.0001, "loss": 1.7583, "step": 7113 }, { "epoch": 0.8171845385101373, "grad_norm": 0.46327292919158936, "learning_rate": 0.0001, "loss": 1.6309, "step": 7114 }, { "epoch": 0.8172994084199644, "grad_norm": 0.4762055575847626, "learning_rate": 0.0001, "loss": 1.6213, "step": 7115 }, { "epoch": 0.8174142783297915, "grad_norm": 0.5080848932266235, "learning_rate": 0.0001, "loss": 1.7955, "step": 7116 }, { "epoch": 0.8175291482396186, "grad_norm": 0.47820234298706055, "learning_rate": 0.0001, "loss": 1.736, "step": 7117 }, { "epoch": 0.8176440181494458, "grad_norm": 0.4423477351665497, "learning_rate": 0.0001, "loss": 1.5467, "step": 7118 }, { "epoch": 0.8177588880592729, "grad_norm": 0.4471362829208374, "learning_rate": 0.0001, "loss": 1.5868, "step": 7119 }, { "epoch": 0.8178737579691, "grad_norm": 0.4536069631576538, "learning_rate": 0.0001, "loss": 1.6112, "step": 7120 }, { "epoch": 0.8179886278789271, "grad_norm": 0.4437789022922516, "learning_rate": 0.0001, "loss": 1.4918, "step": 7121 }, { "epoch": 0.8181034977887542, "grad_norm": 0.4352627992630005, "learning_rate": 0.0001, "loss": 1.5588, "step": 7122 }, { "epoch": 0.8182183676985814, "grad_norm": 0.4951383173465729, "learning_rate": 0.0001, "loss": 1.5797, "step": 7123 }, { "epoch": 0.8183332376084085, "grad_norm": 0.47660359740257263, "learning_rate": 0.0001, "loss": 1.649, "step": 7124 }, { "epoch": 0.8184481075182356, "grad_norm": 0.4454366862773895, "learning_rate": 0.0001, "loss": 1.647, "step": 7125 }, { "epoch": 0.8185629774280627, "grad_norm": 0.47320306301116943, "learning_rate": 0.0001, "loss": 1.8041, "step": 7126 }, { "epoch": 0.8186778473378898, "grad_norm": 0.4817422330379486, "learning_rate": 0.0001, "loss": 1.6996, "step": 7127 }, { "epoch": 0.818792717247717, "grad_norm": 0.4612285792827606, "learning_rate": 0.0001, "loss": 1.6331, "step": 7128 }, { "epoch": 0.8189075871575441, "grad_norm": 0.4710875153541565, "learning_rate": 0.0001, "loss": 1.6399, "step": 7129 }, { "epoch": 0.8190224570673712, "grad_norm": 0.4853847324848175, "learning_rate": 0.0001, "loss": 1.7546, "step": 7130 }, { "epoch": 0.8191373269771983, "grad_norm": 0.46727731823921204, "learning_rate": 0.0001, "loss": 1.7467, "step": 7131 }, { "epoch": 0.8192521968870254, "grad_norm": 0.44645458459854126, "learning_rate": 0.0001, "loss": 1.5363, "step": 7132 }, { "epoch": 0.8193670667968526, "grad_norm": 0.4743711054325104, "learning_rate": 0.0001, "loss": 1.7297, "step": 7133 }, { "epoch": 0.8194819367066797, "grad_norm": 0.45293739438056946, "learning_rate": 0.0001, "loss": 1.6004, "step": 7134 }, { "epoch": 0.8195968066165068, "grad_norm": 0.46416231989860535, "learning_rate": 0.0001, "loss": 1.6761, "step": 7135 }, { "epoch": 0.8197116765263339, "grad_norm": 0.45859238505363464, "learning_rate": 0.0001, "loss": 1.6363, "step": 7136 }, { "epoch": 0.819826546436161, "grad_norm": 0.45095375180244446, "learning_rate": 0.0001, "loss": 1.5282, "step": 7137 }, { "epoch": 0.8199414163459882, "grad_norm": 0.5300037860870361, "learning_rate": 0.0001, "loss": 1.827, "step": 7138 }, { "epoch": 0.8200562862558153, "grad_norm": 0.4848960340023041, "learning_rate": 0.0001, "loss": 1.4755, "step": 7139 }, { "epoch": 0.8201711561656424, "grad_norm": 0.5703202486038208, "learning_rate": 0.0001, "loss": 1.2951, "step": 7140 }, { "epoch": 0.8202860260754695, "grad_norm": 0.44500285387039185, "learning_rate": 0.0001, "loss": 1.5112, "step": 7141 }, { "epoch": 0.8204008959852966, "grad_norm": 0.4689328968524933, "learning_rate": 0.0001, "loss": 1.4309, "step": 7142 }, { "epoch": 0.8205157658951238, "grad_norm": 0.486074298620224, "learning_rate": 0.0001, "loss": 1.5857, "step": 7143 }, { "epoch": 0.8206306358049509, "grad_norm": 0.4206257462501526, "learning_rate": 0.0001, "loss": 1.334, "step": 7144 }, { "epoch": 0.820745505714778, "grad_norm": 0.4606837332248688, "learning_rate": 0.0001, "loss": 1.5509, "step": 7145 }, { "epoch": 0.8208603756246051, "grad_norm": 0.5066717267036438, "learning_rate": 0.0001, "loss": 1.8185, "step": 7146 }, { "epoch": 0.8209752455344322, "grad_norm": 0.46547627449035645, "learning_rate": 0.0001, "loss": 1.5673, "step": 7147 }, { "epoch": 0.8210901154442594, "grad_norm": 0.4761962294578552, "learning_rate": 0.0001, "loss": 1.6391, "step": 7148 }, { "epoch": 0.8212049853540865, "grad_norm": 0.459431916475296, "learning_rate": 0.0001, "loss": 1.66, "step": 7149 }, { "epoch": 0.8213198552639136, "grad_norm": 0.4470955729484558, "learning_rate": 0.0001, "loss": 1.6065, "step": 7150 }, { "epoch": 0.8214347251737407, "grad_norm": 0.48236003518104553, "learning_rate": 0.0001, "loss": 1.8159, "step": 7151 }, { "epoch": 0.8215495950835678, "grad_norm": 0.46525686979293823, "learning_rate": 0.0001, "loss": 1.6244, "step": 7152 }, { "epoch": 0.821664464993395, "grad_norm": 0.41665059328079224, "learning_rate": 0.0001, "loss": 1.4355, "step": 7153 }, { "epoch": 0.8217793349032221, "grad_norm": 0.518457293510437, "learning_rate": 0.0001, "loss": 1.83, "step": 7154 }, { "epoch": 0.8218942048130492, "grad_norm": 0.4431942105293274, "learning_rate": 0.0001, "loss": 1.5345, "step": 7155 }, { "epoch": 0.8220090747228763, "grad_norm": 0.5172891616821289, "learning_rate": 0.0001, "loss": 1.6801, "step": 7156 }, { "epoch": 0.8221239446327034, "grad_norm": 0.4258945882320404, "learning_rate": 0.0001, "loss": 1.6212, "step": 7157 }, { "epoch": 0.8222388145425306, "grad_norm": 0.4604533016681671, "learning_rate": 0.0001, "loss": 1.5509, "step": 7158 }, { "epoch": 0.8223536844523577, "grad_norm": 0.4496094584465027, "learning_rate": 0.0001, "loss": 1.4679, "step": 7159 }, { "epoch": 0.8224685543621848, "grad_norm": 0.4688582420349121, "learning_rate": 0.0001, "loss": 1.668, "step": 7160 }, { "epoch": 0.8225834242720119, "grad_norm": 0.47711849212646484, "learning_rate": 0.0001, "loss": 1.574, "step": 7161 }, { "epoch": 0.822698294181839, "grad_norm": 0.45683956146240234, "learning_rate": 0.0001, "loss": 1.5046, "step": 7162 }, { "epoch": 0.8228131640916662, "grad_norm": 0.437517374753952, "learning_rate": 0.0001, "loss": 1.4831, "step": 7163 }, { "epoch": 0.8229280340014933, "grad_norm": 0.4502145051956177, "learning_rate": 0.0001, "loss": 1.5689, "step": 7164 }, { "epoch": 0.8230429039113204, "grad_norm": 0.4960477650165558, "learning_rate": 0.0001, "loss": 1.6825, "step": 7165 }, { "epoch": 0.8231577738211475, "grad_norm": 0.4727974534034729, "learning_rate": 0.0001, "loss": 1.5416, "step": 7166 }, { "epoch": 0.8232726437309746, "grad_norm": 0.4566989541053772, "learning_rate": 0.0001, "loss": 1.5257, "step": 7167 }, { "epoch": 0.8233875136408018, "grad_norm": 0.49063435196876526, "learning_rate": 0.0001, "loss": 1.576, "step": 7168 }, { "epoch": 0.8235023835506289, "grad_norm": 0.4589118957519531, "learning_rate": 0.0001, "loss": 1.6443, "step": 7169 }, { "epoch": 0.823617253460456, "grad_norm": 0.46506285667419434, "learning_rate": 0.0001, "loss": 1.5499, "step": 7170 }, { "epoch": 0.8237321233702831, "grad_norm": 0.4581180810928345, "learning_rate": 0.0001, "loss": 1.6783, "step": 7171 }, { "epoch": 0.8238469932801102, "grad_norm": 0.47631680965423584, "learning_rate": 0.0001, "loss": 1.6033, "step": 7172 }, { "epoch": 0.8239618631899374, "grad_norm": 0.4644998013973236, "learning_rate": 0.0001, "loss": 1.6591, "step": 7173 }, { "epoch": 0.8240767330997645, "grad_norm": 0.43831029534339905, "learning_rate": 0.0001, "loss": 1.4786, "step": 7174 }, { "epoch": 0.8241916030095916, "grad_norm": 0.45947766304016113, "learning_rate": 0.0001, "loss": 1.6604, "step": 7175 }, { "epoch": 0.8243064729194187, "grad_norm": 0.44559556245803833, "learning_rate": 0.0001, "loss": 1.5418, "step": 7176 }, { "epoch": 0.8244213428292458, "grad_norm": 0.43358561396598816, "learning_rate": 0.0001, "loss": 1.5983, "step": 7177 }, { "epoch": 0.824536212739073, "grad_norm": 0.4433882534503937, "learning_rate": 0.0001, "loss": 1.5811, "step": 7178 }, { "epoch": 0.8246510826489001, "grad_norm": 0.48557913303375244, "learning_rate": 0.0001, "loss": 1.6611, "step": 7179 }, { "epoch": 0.8247659525587272, "grad_norm": 0.4322436451911926, "learning_rate": 0.0001, "loss": 1.5646, "step": 7180 }, { "epoch": 0.8248808224685543, "grad_norm": 0.4723256230354309, "learning_rate": 0.0001, "loss": 1.6417, "step": 7181 }, { "epoch": 0.8249956923783814, "grad_norm": 0.4498218894004822, "learning_rate": 0.0001, "loss": 1.69, "step": 7182 }, { "epoch": 0.8251105622882086, "grad_norm": 0.44787004590034485, "learning_rate": 0.0001, "loss": 1.8015, "step": 7183 }, { "epoch": 0.8252254321980357, "grad_norm": 0.4494984745979309, "learning_rate": 0.0001, "loss": 1.4992, "step": 7184 }, { "epoch": 0.8253403021078628, "grad_norm": 0.4542940557003021, "learning_rate": 0.0001, "loss": 1.5179, "step": 7185 }, { "epoch": 0.8254551720176899, "grad_norm": 0.4573206901550293, "learning_rate": 0.0001, "loss": 1.731, "step": 7186 }, { "epoch": 0.825570041927517, "grad_norm": 0.455084890127182, "learning_rate": 0.0001, "loss": 1.7468, "step": 7187 }, { "epoch": 0.8256849118373442, "grad_norm": 0.4983527958393097, "learning_rate": 0.0001, "loss": 1.5253, "step": 7188 }, { "epoch": 0.8257997817471713, "grad_norm": 0.457497239112854, "learning_rate": 0.0001, "loss": 1.6493, "step": 7189 }, { "epoch": 0.8259146516569984, "grad_norm": 0.485844224691391, "learning_rate": 0.0001, "loss": 1.7502, "step": 7190 }, { "epoch": 0.8260295215668255, "grad_norm": 0.47317448258399963, "learning_rate": 0.0001, "loss": 1.5996, "step": 7191 }, { "epoch": 0.8261443914766526, "grad_norm": 0.4314495921134949, "learning_rate": 0.0001, "loss": 1.5514, "step": 7192 }, { "epoch": 0.8262592613864798, "grad_norm": 0.45822349190711975, "learning_rate": 0.0001, "loss": 1.667, "step": 7193 }, { "epoch": 0.8263741312963069, "grad_norm": 0.4706357419490814, "learning_rate": 0.0001, "loss": 1.6343, "step": 7194 }, { "epoch": 0.826489001206134, "grad_norm": 0.45781227946281433, "learning_rate": 0.0001, "loss": 1.4011, "step": 7195 }, { "epoch": 0.8266038711159611, "grad_norm": 0.43155431747436523, "learning_rate": 0.0001, "loss": 1.5375, "step": 7196 }, { "epoch": 0.8267187410257882, "grad_norm": 0.4829701781272888, "learning_rate": 0.0001, "loss": 1.6697, "step": 7197 }, { "epoch": 0.8268336109356155, "grad_norm": 0.4185773432254791, "learning_rate": 0.0001, "loss": 1.4531, "step": 7198 }, { "epoch": 0.8269484808454426, "grad_norm": 0.4704040288925171, "learning_rate": 0.0001, "loss": 1.6184, "step": 7199 }, { "epoch": 0.8270633507552697, "grad_norm": 0.41344597935676575, "learning_rate": 0.0001, "loss": 1.4317, "step": 7200 }, { "epoch": 0.8271782206650968, "grad_norm": 0.4467063546180725, "learning_rate": 0.0001, "loss": 1.3732, "step": 7201 }, { "epoch": 0.827293090574924, "grad_norm": 0.4785305857658386, "learning_rate": 0.0001, "loss": 1.5443, "step": 7202 }, { "epoch": 0.8274079604847511, "grad_norm": 0.4496002793312073, "learning_rate": 0.0001, "loss": 1.5494, "step": 7203 }, { "epoch": 0.8275228303945782, "grad_norm": 0.45911693572998047, "learning_rate": 0.0001, "loss": 1.3529, "step": 7204 }, { "epoch": 0.8276377003044053, "grad_norm": 0.4438045918941498, "learning_rate": 0.0001, "loss": 1.2912, "step": 7205 }, { "epoch": 0.8277525702142324, "grad_norm": 0.4709499776363373, "learning_rate": 0.0001, "loss": 1.5936, "step": 7206 }, { "epoch": 0.8278674401240596, "grad_norm": 0.4676428437232971, "learning_rate": 0.0001, "loss": 1.5819, "step": 7207 }, { "epoch": 0.8279823100338867, "grad_norm": 0.5031962394714355, "learning_rate": 0.0001, "loss": 1.5998, "step": 7208 }, { "epoch": 0.8280971799437138, "grad_norm": 0.49766260385513306, "learning_rate": 0.0001, "loss": 1.4382, "step": 7209 }, { "epoch": 0.8282120498535409, "grad_norm": 0.5159705877304077, "learning_rate": 0.0001, "loss": 1.8591, "step": 7210 }, { "epoch": 0.828326919763368, "grad_norm": 0.47950395941734314, "learning_rate": 0.0001, "loss": 1.568, "step": 7211 }, { "epoch": 0.8284417896731952, "grad_norm": 0.4581819772720337, "learning_rate": 0.0001, "loss": 1.6142, "step": 7212 }, { "epoch": 0.8285566595830223, "grad_norm": 0.4366266429424286, "learning_rate": 0.0001, "loss": 1.6161, "step": 7213 }, { "epoch": 0.8286715294928494, "grad_norm": 0.48760971426963806, "learning_rate": 0.0001, "loss": 1.5188, "step": 7214 }, { "epoch": 0.8287863994026765, "grad_norm": 0.4329768419265747, "learning_rate": 0.0001, "loss": 1.5507, "step": 7215 }, { "epoch": 0.8289012693125036, "grad_norm": 0.44413167238235474, "learning_rate": 0.0001, "loss": 1.5943, "step": 7216 }, { "epoch": 0.8290161392223308, "grad_norm": 0.4911179840564728, "learning_rate": 0.0001, "loss": 1.4973, "step": 7217 }, { "epoch": 0.8291310091321579, "grad_norm": 0.4415479302406311, "learning_rate": 0.0001, "loss": 1.5761, "step": 7218 }, { "epoch": 0.829245879041985, "grad_norm": 0.5084326863288879, "learning_rate": 0.0001, "loss": 1.7074, "step": 7219 }, { "epoch": 0.8293607489518121, "grad_norm": 0.48137006163597107, "learning_rate": 0.0001, "loss": 1.7523, "step": 7220 }, { "epoch": 0.8294756188616392, "grad_norm": 0.4507937431335449, "learning_rate": 0.0001, "loss": 1.6697, "step": 7221 }, { "epoch": 0.8295904887714664, "grad_norm": 0.4717406630516052, "learning_rate": 0.0001, "loss": 1.754, "step": 7222 }, { "epoch": 0.8297053586812935, "grad_norm": 0.4703536927700043, "learning_rate": 0.0001, "loss": 1.6438, "step": 7223 }, { "epoch": 0.8298202285911206, "grad_norm": 0.41335755586624146, "learning_rate": 0.0001, "loss": 1.4745, "step": 7224 }, { "epoch": 0.8299350985009477, "grad_norm": 0.47170180082321167, "learning_rate": 0.0001, "loss": 1.5615, "step": 7225 }, { "epoch": 0.8300499684107748, "grad_norm": 0.44753673672676086, "learning_rate": 0.0001, "loss": 1.3859, "step": 7226 }, { "epoch": 0.830164838320602, "grad_norm": 0.44213739037513733, "learning_rate": 0.0001, "loss": 1.3913, "step": 7227 }, { "epoch": 0.8302797082304291, "grad_norm": 0.46415868401527405, "learning_rate": 0.0001, "loss": 1.7794, "step": 7228 }, { "epoch": 0.8303945781402562, "grad_norm": 0.47127893567085266, "learning_rate": 0.0001, "loss": 1.6631, "step": 7229 }, { "epoch": 0.8305094480500833, "grad_norm": 0.48462972044944763, "learning_rate": 0.0001, "loss": 1.643, "step": 7230 }, { "epoch": 0.8306243179599104, "grad_norm": 0.43438246846199036, "learning_rate": 0.0001, "loss": 1.5174, "step": 7231 }, { "epoch": 0.8307391878697375, "grad_norm": 0.44734856486320496, "learning_rate": 0.0001, "loss": 1.5771, "step": 7232 }, { "epoch": 0.8308540577795647, "grad_norm": 0.4758615493774414, "learning_rate": 0.0001, "loss": 1.5675, "step": 7233 }, { "epoch": 0.8309689276893918, "grad_norm": 0.5029293298721313, "learning_rate": 0.0001, "loss": 1.7695, "step": 7234 }, { "epoch": 0.8310837975992189, "grad_norm": 0.4502490162849426, "learning_rate": 0.0001, "loss": 1.4654, "step": 7235 }, { "epoch": 0.831198667509046, "grad_norm": 0.4694804251194, "learning_rate": 0.0001, "loss": 1.5624, "step": 7236 }, { "epoch": 0.8313135374188731, "grad_norm": 0.4628579616546631, "learning_rate": 0.0001, "loss": 1.4772, "step": 7237 }, { "epoch": 0.8314284073287003, "grad_norm": 0.40973448753356934, "learning_rate": 0.0001, "loss": 1.3017, "step": 7238 }, { "epoch": 0.8315432772385274, "grad_norm": 0.48314985632896423, "learning_rate": 0.0001, "loss": 1.6824, "step": 7239 }, { "epoch": 0.8316581471483545, "grad_norm": 0.43235471844673157, "learning_rate": 0.0001, "loss": 1.5396, "step": 7240 }, { "epoch": 0.8317730170581816, "grad_norm": 0.45430463552474976, "learning_rate": 0.0001, "loss": 1.5663, "step": 7241 }, { "epoch": 0.8318878869680087, "grad_norm": 0.4984131455421448, "learning_rate": 0.0001, "loss": 1.5334, "step": 7242 }, { "epoch": 0.8320027568778359, "grad_norm": 0.46940264105796814, "learning_rate": 0.0001, "loss": 1.6658, "step": 7243 }, { "epoch": 0.832117626787663, "grad_norm": 0.47424784302711487, "learning_rate": 0.0001, "loss": 1.4645, "step": 7244 }, { "epoch": 0.8322324966974901, "grad_norm": 0.5137829780578613, "learning_rate": 0.0001, "loss": 1.6511, "step": 7245 }, { "epoch": 0.8323473666073172, "grad_norm": 0.5260666012763977, "learning_rate": 0.0001, "loss": 1.7015, "step": 7246 }, { "epoch": 0.8324622365171443, "grad_norm": 0.47583386301994324, "learning_rate": 0.0001, "loss": 1.504, "step": 7247 }, { "epoch": 0.8325771064269715, "grad_norm": 0.4737547039985657, "learning_rate": 0.0001, "loss": 1.7114, "step": 7248 }, { "epoch": 0.8326919763367986, "grad_norm": 0.49073606729507446, "learning_rate": 0.0001, "loss": 1.6678, "step": 7249 }, { "epoch": 0.8328068462466257, "grad_norm": 0.4550478756427765, "learning_rate": 0.0001, "loss": 1.366, "step": 7250 }, { "epoch": 0.8329217161564528, "grad_norm": 0.45904481410980225, "learning_rate": 0.0001, "loss": 1.4628, "step": 7251 }, { "epoch": 0.83303658606628, "grad_norm": 0.4898592233657837, "learning_rate": 0.0001, "loss": 1.7453, "step": 7252 }, { "epoch": 0.8331514559761071, "grad_norm": 0.46044033765792847, "learning_rate": 0.0001, "loss": 1.5762, "step": 7253 }, { "epoch": 0.8332663258859342, "grad_norm": 0.4777674376964569, "learning_rate": 0.0001, "loss": 1.5323, "step": 7254 }, { "epoch": 0.8333811957957613, "grad_norm": 0.44753503799438477, "learning_rate": 0.0001, "loss": 1.729, "step": 7255 }, { "epoch": 0.8334960657055884, "grad_norm": 0.45856595039367676, "learning_rate": 0.0001, "loss": 1.5738, "step": 7256 }, { "epoch": 0.8336109356154155, "grad_norm": 0.4056832790374756, "learning_rate": 0.0001, "loss": 1.2812, "step": 7257 }, { "epoch": 0.8337258055252427, "grad_norm": 0.48094066977500916, "learning_rate": 0.0001, "loss": 1.5232, "step": 7258 }, { "epoch": 0.8338406754350698, "grad_norm": 0.4848513603210449, "learning_rate": 0.0001, "loss": 1.6777, "step": 7259 }, { "epoch": 0.8339555453448969, "grad_norm": 0.4800076484680176, "learning_rate": 0.0001, "loss": 1.4396, "step": 7260 }, { "epoch": 0.834070415254724, "grad_norm": 0.47761738300323486, "learning_rate": 0.0001, "loss": 1.5938, "step": 7261 }, { "epoch": 0.8341852851645511, "grad_norm": 0.4880167245864868, "learning_rate": 0.0001, "loss": 1.5884, "step": 7262 }, { "epoch": 0.8343001550743783, "grad_norm": 0.464358925819397, "learning_rate": 0.0001, "loss": 1.7559, "step": 7263 }, { "epoch": 0.8344150249842054, "grad_norm": 0.4656590521335602, "learning_rate": 0.0001, "loss": 1.576, "step": 7264 }, { "epoch": 0.8345298948940325, "grad_norm": 0.4818393290042877, "learning_rate": 0.0001, "loss": 1.6201, "step": 7265 }, { "epoch": 0.8346447648038596, "grad_norm": 0.4994739294052124, "learning_rate": 0.0001, "loss": 1.4481, "step": 7266 }, { "epoch": 0.8347596347136867, "grad_norm": 0.49908795952796936, "learning_rate": 0.0001, "loss": 1.6915, "step": 7267 }, { "epoch": 0.8348745046235139, "grad_norm": 0.5030030608177185, "learning_rate": 0.0001, "loss": 1.7213, "step": 7268 }, { "epoch": 0.834989374533341, "grad_norm": 0.4775954484939575, "learning_rate": 0.0001, "loss": 1.7861, "step": 7269 }, { "epoch": 0.8351042444431681, "grad_norm": 0.41004353761672974, "learning_rate": 0.0001, "loss": 1.4602, "step": 7270 }, { "epoch": 0.8352191143529952, "grad_norm": 0.49906715750694275, "learning_rate": 0.0001, "loss": 1.5855, "step": 7271 }, { "epoch": 0.8353339842628223, "grad_norm": 0.4458923637866974, "learning_rate": 0.0001, "loss": 1.7002, "step": 7272 }, { "epoch": 0.8354488541726495, "grad_norm": 0.4532411992549896, "learning_rate": 0.0001, "loss": 1.6273, "step": 7273 }, { "epoch": 0.8355637240824766, "grad_norm": 0.4611133933067322, "learning_rate": 0.0001, "loss": 1.5768, "step": 7274 }, { "epoch": 0.8356785939923037, "grad_norm": 0.4394255578517914, "learning_rate": 0.0001, "loss": 1.4816, "step": 7275 }, { "epoch": 0.8357934639021308, "grad_norm": 0.4390329420566559, "learning_rate": 0.0001, "loss": 1.6247, "step": 7276 }, { "epoch": 0.835908333811958, "grad_norm": 0.4387739300727844, "learning_rate": 0.0001, "loss": 1.5974, "step": 7277 }, { "epoch": 0.8360232037217851, "grad_norm": 0.44605275988578796, "learning_rate": 0.0001, "loss": 1.6068, "step": 7278 }, { "epoch": 0.8361380736316122, "grad_norm": 0.4986502528190613, "learning_rate": 0.0001, "loss": 1.4895, "step": 7279 }, { "epoch": 0.8362529435414393, "grad_norm": 0.4799439013004303, "learning_rate": 0.0001, "loss": 1.7177, "step": 7280 }, { "epoch": 0.8363678134512664, "grad_norm": 0.4725569784641266, "learning_rate": 0.0001, "loss": 1.6727, "step": 7281 }, { "epoch": 0.8364826833610935, "grad_norm": 0.46974942088127136, "learning_rate": 0.0001, "loss": 1.5882, "step": 7282 }, { "epoch": 0.8365975532709207, "grad_norm": 0.4600738286972046, "learning_rate": 0.0001, "loss": 1.662, "step": 7283 }, { "epoch": 0.8367124231807478, "grad_norm": 0.516070544719696, "learning_rate": 0.0001, "loss": 1.5734, "step": 7284 }, { "epoch": 0.8368272930905749, "grad_norm": 0.44913583993911743, "learning_rate": 0.0001, "loss": 1.4985, "step": 7285 }, { "epoch": 0.836942163000402, "grad_norm": 0.4450875222682953, "learning_rate": 0.0001, "loss": 1.4021, "step": 7286 }, { "epoch": 0.8370570329102291, "grad_norm": 0.45671719312667847, "learning_rate": 0.0001, "loss": 1.4193, "step": 7287 }, { "epoch": 0.8371719028200563, "grad_norm": 0.4811131954193115, "learning_rate": 0.0001, "loss": 1.588, "step": 7288 }, { "epoch": 0.8372867727298834, "grad_norm": 0.473019540309906, "learning_rate": 0.0001, "loss": 1.6143, "step": 7289 }, { "epoch": 0.8374016426397105, "grad_norm": 0.4964619576931, "learning_rate": 0.0001, "loss": 1.4574, "step": 7290 }, { "epoch": 0.8375165125495376, "grad_norm": 0.43981707096099854, "learning_rate": 0.0001, "loss": 1.5759, "step": 7291 }, { "epoch": 0.8376313824593647, "grad_norm": 0.47483888268470764, "learning_rate": 0.0001, "loss": 1.7885, "step": 7292 }, { "epoch": 0.8377462523691919, "grad_norm": 0.46889838576316833, "learning_rate": 0.0001, "loss": 1.4536, "step": 7293 }, { "epoch": 0.837861122279019, "grad_norm": 0.46966785192489624, "learning_rate": 0.0001, "loss": 1.4182, "step": 7294 }, { "epoch": 0.8379759921888461, "grad_norm": 0.4754125475883484, "learning_rate": 0.0001, "loss": 1.8329, "step": 7295 }, { "epoch": 0.8380908620986732, "grad_norm": 0.44175228476524353, "learning_rate": 0.0001, "loss": 1.5962, "step": 7296 }, { "epoch": 0.8382057320085003, "grad_norm": 0.45148342847824097, "learning_rate": 0.0001, "loss": 1.514, "step": 7297 }, { "epoch": 0.8383206019183275, "grad_norm": 0.46137553453445435, "learning_rate": 0.0001, "loss": 1.531, "step": 7298 }, { "epoch": 0.8384354718281546, "grad_norm": 0.49679750204086304, "learning_rate": 0.0001, "loss": 1.646, "step": 7299 }, { "epoch": 0.8385503417379817, "grad_norm": 0.44846269488334656, "learning_rate": 0.0001, "loss": 1.5111, "step": 7300 }, { "epoch": 0.8386652116478088, "grad_norm": 0.43875476717948914, "learning_rate": 0.0001, "loss": 1.5015, "step": 7301 }, { "epoch": 0.838780081557636, "grad_norm": 0.44996675848960876, "learning_rate": 0.0001, "loss": 1.5054, "step": 7302 }, { "epoch": 0.8388949514674631, "grad_norm": 0.47677183151245117, "learning_rate": 0.0001, "loss": 1.7262, "step": 7303 }, { "epoch": 0.8390098213772902, "grad_norm": 0.4702812433242798, "learning_rate": 0.0001, "loss": 1.6581, "step": 7304 }, { "epoch": 0.8391246912871173, "grad_norm": 0.4662913978099823, "learning_rate": 0.0001, "loss": 1.541, "step": 7305 }, { "epoch": 0.8392395611969444, "grad_norm": 0.5055838227272034, "learning_rate": 0.0001, "loss": 1.747, "step": 7306 }, { "epoch": 0.8393544311067715, "grad_norm": 0.47500503063201904, "learning_rate": 0.0001, "loss": 1.6744, "step": 7307 }, { "epoch": 0.8394693010165987, "grad_norm": 0.48270219564437866, "learning_rate": 0.0001, "loss": 1.7157, "step": 7308 }, { "epoch": 0.8395841709264258, "grad_norm": 0.48767292499542236, "learning_rate": 0.0001, "loss": 1.5027, "step": 7309 }, { "epoch": 0.8396990408362529, "grad_norm": 0.46618691086769104, "learning_rate": 0.0001, "loss": 1.4696, "step": 7310 }, { "epoch": 0.83981391074608, "grad_norm": 0.4929366111755371, "learning_rate": 0.0001, "loss": 1.683, "step": 7311 }, { "epoch": 0.8399287806559071, "grad_norm": 0.4555191397666931, "learning_rate": 0.0001, "loss": 1.549, "step": 7312 }, { "epoch": 0.8400436505657343, "grad_norm": 0.4752098321914673, "learning_rate": 0.0001, "loss": 1.8156, "step": 7313 }, { "epoch": 0.8401585204755614, "grad_norm": 0.4447777271270752, "learning_rate": 0.0001, "loss": 1.5551, "step": 7314 }, { "epoch": 0.8402733903853885, "grad_norm": 0.46375584602355957, "learning_rate": 0.0001, "loss": 1.5083, "step": 7315 }, { "epoch": 0.8403882602952156, "grad_norm": 0.5160993933677673, "learning_rate": 0.0001, "loss": 1.7724, "step": 7316 }, { "epoch": 0.8405031302050427, "grad_norm": 0.49875614047050476, "learning_rate": 0.0001, "loss": 1.7911, "step": 7317 }, { "epoch": 0.8406180001148699, "grad_norm": 0.47104793787002563, "learning_rate": 0.0001, "loss": 1.5512, "step": 7318 }, { "epoch": 0.840732870024697, "grad_norm": 0.47585365176200867, "learning_rate": 0.0001, "loss": 1.4807, "step": 7319 }, { "epoch": 0.8408477399345241, "grad_norm": 0.48287901282310486, "learning_rate": 0.0001, "loss": 1.4909, "step": 7320 }, { "epoch": 0.8409626098443512, "grad_norm": 0.4639113247394562, "learning_rate": 0.0001, "loss": 1.6005, "step": 7321 }, { "epoch": 0.8410774797541783, "grad_norm": 0.4889727830886841, "learning_rate": 0.0001, "loss": 1.8168, "step": 7322 }, { "epoch": 0.8411923496640055, "grad_norm": 0.44564002752304077, "learning_rate": 0.0001, "loss": 1.4868, "step": 7323 }, { "epoch": 0.8413072195738326, "grad_norm": 0.4678811728954315, "learning_rate": 0.0001, "loss": 1.5586, "step": 7324 }, { "epoch": 0.8414220894836597, "grad_norm": 0.483021080493927, "learning_rate": 0.0001, "loss": 1.7088, "step": 7325 }, { "epoch": 0.8415369593934868, "grad_norm": 0.47490793466567993, "learning_rate": 0.0001, "loss": 1.6228, "step": 7326 }, { "epoch": 0.841651829303314, "grad_norm": 0.4489779770374298, "learning_rate": 0.0001, "loss": 1.5528, "step": 7327 }, { "epoch": 0.8417666992131411, "grad_norm": 0.4593144953250885, "learning_rate": 0.0001, "loss": 1.6267, "step": 7328 }, { "epoch": 0.8418815691229682, "grad_norm": 0.4318476617336273, "learning_rate": 0.0001, "loss": 1.458, "step": 7329 }, { "epoch": 0.8419964390327953, "grad_norm": 0.44436419010162354, "learning_rate": 0.0001, "loss": 1.4058, "step": 7330 }, { "epoch": 0.8421113089426224, "grad_norm": 0.4526481032371521, "learning_rate": 0.0001, "loss": 1.5017, "step": 7331 }, { "epoch": 0.8422261788524495, "grad_norm": 0.4639897644519806, "learning_rate": 0.0001, "loss": 1.6044, "step": 7332 }, { "epoch": 0.8423410487622767, "grad_norm": 0.4710429012775421, "learning_rate": 0.0001, "loss": 1.7472, "step": 7333 }, { "epoch": 0.8424559186721038, "grad_norm": 0.4736747443675995, "learning_rate": 0.0001, "loss": 1.5991, "step": 7334 }, { "epoch": 0.842570788581931, "grad_norm": 0.46228286623954773, "learning_rate": 0.0001, "loss": 1.4623, "step": 7335 }, { "epoch": 0.8426856584917581, "grad_norm": 0.5162138938903809, "learning_rate": 0.0001, "loss": 1.8545, "step": 7336 }, { "epoch": 0.8428005284015853, "grad_norm": 0.4684411585330963, "learning_rate": 0.0001, "loss": 1.6929, "step": 7337 }, { "epoch": 0.8429153983114124, "grad_norm": 0.45652058720588684, "learning_rate": 0.0001, "loss": 1.4637, "step": 7338 }, { "epoch": 0.8430302682212395, "grad_norm": 0.49784135818481445, "learning_rate": 0.0001, "loss": 1.491, "step": 7339 }, { "epoch": 0.8431451381310666, "grad_norm": 0.5035806894302368, "learning_rate": 0.0001, "loss": 1.6469, "step": 7340 }, { "epoch": 0.8432600080408937, "grad_norm": 0.4898698925971985, "learning_rate": 0.0001, "loss": 1.7416, "step": 7341 }, { "epoch": 0.8433748779507209, "grad_norm": 0.4696720242500305, "learning_rate": 0.0001, "loss": 1.5175, "step": 7342 }, { "epoch": 0.843489747860548, "grad_norm": 0.4923301339149475, "learning_rate": 0.0001, "loss": 1.7249, "step": 7343 }, { "epoch": 0.8436046177703751, "grad_norm": 0.49450981616973877, "learning_rate": 0.0001, "loss": 1.7339, "step": 7344 }, { "epoch": 0.8437194876802022, "grad_norm": 0.43973812460899353, "learning_rate": 0.0001, "loss": 1.4611, "step": 7345 }, { "epoch": 0.8438343575900293, "grad_norm": 0.48189517855644226, "learning_rate": 0.0001, "loss": 1.6952, "step": 7346 }, { "epoch": 0.8439492274998565, "grad_norm": 0.4575080871582031, "learning_rate": 0.0001, "loss": 1.5302, "step": 7347 }, { "epoch": 0.8440640974096836, "grad_norm": 0.4627358019351959, "learning_rate": 0.0001, "loss": 1.4716, "step": 7348 }, { "epoch": 0.8441789673195107, "grad_norm": 0.4325619041919708, "learning_rate": 0.0001, "loss": 1.5157, "step": 7349 }, { "epoch": 0.8442938372293378, "grad_norm": 0.4869388937950134, "learning_rate": 0.0001, "loss": 1.7152, "step": 7350 }, { "epoch": 0.8444087071391649, "grad_norm": 0.4528372585773468, "learning_rate": 0.0001, "loss": 1.6697, "step": 7351 }, { "epoch": 0.8445235770489921, "grad_norm": 0.45322051644325256, "learning_rate": 0.0001, "loss": 1.7075, "step": 7352 }, { "epoch": 0.8446384469588192, "grad_norm": 0.43234097957611084, "learning_rate": 0.0001, "loss": 1.4102, "step": 7353 }, { "epoch": 0.8447533168686463, "grad_norm": 0.4526819586753845, "learning_rate": 0.0001, "loss": 1.5617, "step": 7354 }, { "epoch": 0.8448681867784734, "grad_norm": 0.46750757098197937, "learning_rate": 0.0001, "loss": 1.8057, "step": 7355 }, { "epoch": 0.8449830566883005, "grad_norm": 0.47405919432640076, "learning_rate": 0.0001, "loss": 1.6563, "step": 7356 }, { "epoch": 0.8450979265981277, "grad_norm": 0.4665440320968628, "learning_rate": 0.0001, "loss": 1.6313, "step": 7357 }, { "epoch": 0.8452127965079548, "grad_norm": 0.4736506938934326, "learning_rate": 0.0001, "loss": 1.6407, "step": 7358 }, { "epoch": 0.8453276664177819, "grad_norm": 0.4538653492927551, "learning_rate": 0.0001, "loss": 1.7792, "step": 7359 }, { "epoch": 0.845442536327609, "grad_norm": 0.4535897672176361, "learning_rate": 0.0001, "loss": 1.5042, "step": 7360 }, { "epoch": 0.8455574062374361, "grad_norm": 0.5242683291435242, "learning_rate": 0.0001, "loss": 1.6937, "step": 7361 }, { "epoch": 0.8456722761472633, "grad_norm": 0.4639322757720947, "learning_rate": 0.0001, "loss": 1.6695, "step": 7362 }, { "epoch": 0.8457871460570904, "grad_norm": 0.44305261969566345, "learning_rate": 0.0001, "loss": 1.3519, "step": 7363 }, { "epoch": 0.8459020159669175, "grad_norm": 0.4527410864830017, "learning_rate": 0.0001, "loss": 1.584, "step": 7364 }, { "epoch": 0.8460168858767446, "grad_norm": 0.48342829942703247, "learning_rate": 0.0001, "loss": 1.5043, "step": 7365 }, { "epoch": 0.8461317557865717, "grad_norm": 0.46347376704216003, "learning_rate": 0.0001, "loss": 1.6209, "step": 7366 }, { "epoch": 0.8462466256963989, "grad_norm": 0.45847606658935547, "learning_rate": 0.0001, "loss": 1.5473, "step": 7367 }, { "epoch": 0.846361495606226, "grad_norm": 0.4818578362464905, "learning_rate": 0.0001, "loss": 1.5684, "step": 7368 }, { "epoch": 0.8464763655160531, "grad_norm": 0.48799648880958557, "learning_rate": 0.0001, "loss": 1.6581, "step": 7369 }, { "epoch": 0.8465912354258802, "grad_norm": 0.4803750813007355, "learning_rate": 0.0001, "loss": 1.4806, "step": 7370 }, { "epoch": 0.8467061053357073, "grad_norm": 0.48348790407180786, "learning_rate": 0.0001, "loss": 1.6172, "step": 7371 }, { "epoch": 0.8468209752455345, "grad_norm": 0.4604859948158264, "learning_rate": 0.0001, "loss": 1.6259, "step": 7372 }, { "epoch": 0.8469358451553616, "grad_norm": 0.459789901971817, "learning_rate": 0.0001, "loss": 1.4266, "step": 7373 }, { "epoch": 0.8470507150651887, "grad_norm": 0.5160319805145264, "learning_rate": 0.0001, "loss": 1.7858, "step": 7374 }, { "epoch": 0.8471655849750158, "grad_norm": 0.4354211688041687, "learning_rate": 0.0001, "loss": 1.6375, "step": 7375 }, { "epoch": 0.8472804548848429, "grad_norm": 0.4287918508052826, "learning_rate": 0.0001, "loss": 1.5123, "step": 7376 }, { "epoch": 0.8473953247946701, "grad_norm": 0.4390510320663452, "learning_rate": 0.0001, "loss": 1.5634, "step": 7377 }, { "epoch": 0.8475101947044972, "grad_norm": 0.4523385465145111, "learning_rate": 0.0001, "loss": 1.631, "step": 7378 }, { "epoch": 0.8476250646143243, "grad_norm": 0.4572320878505707, "learning_rate": 0.0001, "loss": 1.1648, "step": 7379 }, { "epoch": 0.8477399345241514, "grad_norm": 0.5559184551239014, "learning_rate": 0.0001, "loss": 1.3973, "step": 7380 }, { "epoch": 0.8478548044339785, "grad_norm": 0.49197232723236084, "learning_rate": 0.0001, "loss": 1.7199, "step": 7381 }, { "epoch": 0.8479696743438057, "grad_norm": 0.49532270431518555, "learning_rate": 0.0001, "loss": 1.6443, "step": 7382 }, { "epoch": 0.8480845442536328, "grad_norm": 0.49762824177742004, "learning_rate": 0.0001, "loss": 1.6974, "step": 7383 }, { "epoch": 0.8481994141634599, "grad_norm": 0.473664790391922, "learning_rate": 0.0001, "loss": 1.6332, "step": 7384 }, { "epoch": 0.848314284073287, "grad_norm": 0.48429468274116516, "learning_rate": 0.0001, "loss": 1.4807, "step": 7385 }, { "epoch": 0.8484291539831141, "grad_norm": 0.4701521396636963, "learning_rate": 0.0001, "loss": 1.6465, "step": 7386 }, { "epoch": 0.8485440238929413, "grad_norm": 0.43333899974823, "learning_rate": 0.0001, "loss": 1.5189, "step": 7387 }, { "epoch": 0.8486588938027684, "grad_norm": 0.47169727087020874, "learning_rate": 0.0001, "loss": 1.4835, "step": 7388 }, { "epoch": 0.8487737637125955, "grad_norm": 0.4842875003814697, "learning_rate": 0.0001, "loss": 1.7324, "step": 7389 }, { "epoch": 0.8488886336224226, "grad_norm": 0.49503180384635925, "learning_rate": 0.0001, "loss": 1.707, "step": 7390 }, { "epoch": 0.8490035035322497, "grad_norm": 0.44051480293273926, "learning_rate": 0.0001, "loss": 1.5385, "step": 7391 }, { "epoch": 0.8491183734420769, "grad_norm": 0.4651011824607849, "learning_rate": 0.0001, "loss": 1.4514, "step": 7392 }, { "epoch": 0.849233243351904, "grad_norm": 0.4789915978908539, "learning_rate": 0.0001, "loss": 1.6536, "step": 7393 }, { "epoch": 0.8493481132617311, "grad_norm": 0.44147029519081116, "learning_rate": 0.0001, "loss": 1.6246, "step": 7394 }, { "epoch": 0.8494629831715582, "grad_norm": 0.42696404457092285, "learning_rate": 0.0001, "loss": 1.3676, "step": 7395 }, { "epoch": 0.8495778530813853, "grad_norm": 0.4485602080821991, "learning_rate": 0.0001, "loss": 1.602, "step": 7396 }, { "epoch": 0.8496927229912125, "grad_norm": 0.46463295817375183, "learning_rate": 0.0001, "loss": 1.6137, "step": 7397 }, { "epoch": 0.8498075929010396, "grad_norm": 0.43345364928245544, "learning_rate": 0.0001, "loss": 1.5218, "step": 7398 }, { "epoch": 0.8499224628108667, "grad_norm": 0.4817748963832855, "learning_rate": 0.0001, "loss": 1.6097, "step": 7399 }, { "epoch": 0.8500373327206938, "grad_norm": 0.48344627022743225, "learning_rate": 0.0001, "loss": 1.434, "step": 7400 }, { "epoch": 0.8501522026305209, "grad_norm": 0.4697633683681488, "learning_rate": 0.0001, "loss": 1.5238, "step": 7401 }, { "epoch": 0.8502670725403481, "grad_norm": 0.4468742609024048, "learning_rate": 0.0001, "loss": 1.4694, "step": 7402 }, { "epoch": 0.8503819424501752, "grad_norm": 0.4675341546535492, "learning_rate": 0.0001, "loss": 1.5938, "step": 7403 }, { "epoch": 0.8504968123600023, "grad_norm": 0.43257632851600647, "learning_rate": 0.0001, "loss": 1.3002, "step": 7404 }, { "epoch": 0.8506116822698294, "grad_norm": 0.4668475389480591, "learning_rate": 0.0001, "loss": 1.6644, "step": 7405 }, { "epoch": 0.8507265521796565, "grad_norm": 0.4785885512828827, "learning_rate": 0.0001, "loss": 1.5659, "step": 7406 }, { "epoch": 0.8508414220894837, "grad_norm": 0.43829622864723206, "learning_rate": 0.0001, "loss": 1.5819, "step": 7407 }, { "epoch": 0.8509562919993108, "grad_norm": 0.5236424207687378, "learning_rate": 0.0001, "loss": 1.5819, "step": 7408 }, { "epoch": 0.8510711619091379, "grad_norm": 0.45531997084617615, "learning_rate": 0.0001, "loss": 1.6721, "step": 7409 }, { "epoch": 0.851186031818965, "grad_norm": 0.5072128176689148, "learning_rate": 0.0001, "loss": 1.5845, "step": 7410 }, { "epoch": 0.8513009017287921, "grad_norm": 0.4630776643753052, "learning_rate": 0.0001, "loss": 1.6296, "step": 7411 }, { "epoch": 0.8514157716386193, "grad_norm": 0.4900180697441101, "learning_rate": 0.0001, "loss": 1.7094, "step": 7412 }, { "epoch": 0.8515306415484464, "grad_norm": 0.46314537525177, "learning_rate": 0.0001, "loss": 1.4107, "step": 7413 }, { "epoch": 0.8516455114582735, "grad_norm": 0.5156530141830444, "learning_rate": 0.0001, "loss": 1.5988, "step": 7414 }, { "epoch": 0.8517603813681006, "grad_norm": 0.5086135268211365, "learning_rate": 0.0001, "loss": 1.5731, "step": 7415 }, { "epoch": 0.8518752512779277, "grad_norm": 0.48410969972610474, "learning_rate": 0.0001, "loss": 1.7056, "step": 7416 }, { "epoch": 0.8519901211877549, "grad_norm": 0.5622872114181519, "learning_rate": 0.0001, "loss": 1.5636, "step": 7417 }, { "epoch": 0.852104991097582, "grad_norm": 0.4555276036262512, "learning_rate": 0.0001, "loss": 1.5397, "step": 7418 }, { "epoch": 0.8522198610074091, "grad_norm": 0.46399348974227905, "learning_rate": 0.0001, "loss": 1.6051, "step": 7419 }, { "epoch": 0.8523347309172362, "grad_norm": 0.43579867482185364, "learning_rate": 0.0001, "loss": 1.3899, "step": 7420 }, { "epoch": 0.8524496008270633, "grad_norm": 0.47155770659446716, "learning_rate": 0.0001, "loss": 1.6079, "step": 7421 }, { "epoch": 0.8525644707368905, "grad_norm": 0.46017301082611084, "learning_rate": 0.0001, "loss": 1.623, "step": 7422 }, { "epoch": 0.8526793406467176, "grad_norm": 0.4408453404903412, "learning_rate": 0.0001, "loss": 1.5108, "step": 7423 }, { "epoch": 0.8527942105565447, "grad_norm": 0.475716769695282, "learning_rate": 0.0001, "loss": 1.6382, "step": 7424 }, { "epoch": 0.8529090804663718, "grad_norm": 0.4536953270435333, "learning_rate": 0.0001, "loss": 1.5114, "step": 7425 }, { "epoch": 0.8530239503761989, "grad_norm": 0.476300448179245, "learning_rate": 0.0001, "loss": 1.6174, "step": 7426 }, { "epoch": 0.8531388202860261, "grad_norm": 0.42572006583213806, "learning_rate": 0.0001, "loss": 1.5994, "step": 7427 }, { "epoch": 0.8532536901958532, "grad_norm": 0.5320248007774353, "learning_rate": 0.0001, "loss": 1.6911, "step": 7428 }, { "epoch": 0.8533685601056803, "grad_norm": 0.46820348501205444, "learning_rate": 0.0001, "loss": 1.5416, "step": 7429 }, { "epoch": 0.8534834300155074, "grad_norm": 0.4351903796195984, "learning_rate": 0.0001, "loss": 1.5144, "step": 7430 }, { "epoch": 0.8535982999253345, "grad_norm": 0.47440531849861145, "learning_rate": 0.0001, "loss": 1.4344, "step": 7431 }, { "epoch": 0.8537131698351617, "grad_norm": 0.43947550654411316, "learning_rate": 0.0001, "loss": 1.5196, "step": 7432 }, { "epoch": 0.8538280397449888, "grad_norm": 0.4965689182281494, "learning_rate": 0.0001, "loss": 1.6887, "step": 7433 }, { "epoch": 0.8539429096548159, "grad_norm": 0.4858873188495636, "learning_rate": 0.0001, "loss": 1.8044, "step": 7434 }, { "epoch": 0.854057779564643, "grad_norm": 0.4513735771179199, "learning_rate": 0.0001, "loss": 1.4199, "step": 7435 }, { "epoch": 0.8541726494744701, "grad_norm": 0.4994846284389496, "learning_rate": 0.0001, "loss": 1.943, "step": 7436 }, { "epoch": 0.8542875193842973, "grad_norm": 0.45225298404693604, "learning_rate": 0.0001, "loss": 1.6063, "step": 7437 }, { "epoch": 0.8544023892941244, "grad_norm": 0.445266991853714, "learning_rate": 0.0001, "loss": 1.4866, "step": 7438 }, { "epoch": 0.8545172592039515, "grad_norm": 0.4278365969657898, "learning_rate": 0.0001, "loss": 1.617, "step": 7439 }, { "epoch": 0.8546321291137786, "grad_norm": 0.4820537865161896, "learning_rate": 0.0001, "loss": 1.7703, "step": 7440 }, { "epoch": 0.8547469990236057, "grad_norm": 0.4873339533805847, "learning_rate": 0.0001, "loss": 1.7428, "step": 7441 }, { "epoch": 0.8548618689334329, "grad_norm": 0.44816693663597107, "learning_rate": 0.0001, "loss": 1.6484, "step": 7442 }, { "epoch": 0.85497673884326, "grad_norm": 0.47395050525665283, "learning_rate": 0.0001, "loss": 1.5569, "step": 7443 }, { "epoch": 0.8550916087530871, "grad_norm": 0.42696264386177063, "learning_rate": 0.0001, "loss": 1.5543, "step": 7444 }, { "epoch": 0.8552064786629142, "grad_norm": 0.5623756647109985, "learning_rate": 0.0001, "loss": 2.051, "step": 7445 }, { "epoch": 0.8553213485727413, "grad_norm": 0.5125777721405029, "learning_rate": 0.0001, "loss": 1.7589, "step": 7446 }, { "epoch": 0.8554362184825685, "grad_norm": 0.4917384684085846, "learning_rate": 0.0001, "loss": 1.8042, "step": 7447 }, { "epoch": 0.8555510883923956, "grad_norm": 0.4971913695335388, "learning_rate": 0.0001, "loss": 1.6491, "step": 7448 }, { "epoch": 0.8556659583022227, "grad_norm": 0.4413685202598572, "learning_rate": 0.0001, "loss": 1.4241, "step": 7449 }, { "epoch": 0.8557808282120498, "grad_norm": 0.5352091789245605, "learning_rate": 0.0001, "loss": 1.9268, "step": 7450 }, { "epoch": 0.8558956981218769, "grad_norm": 0.5074852705001831, "learning_rate": 0.0001, "loss": 1.7119, "step": 7451 }, { "epoch": 0.8560105680317041, "grad_norm": 0.45686012506484985, "learning_rate": 0.0001, "loss": 1.5553, "step": 7452 }, { "epoch": 0.8561254379415312, "grad_norm": 0.48405084013938904, "learning_rate": 0.0001, "loss": 1.792, "step": 7453 }, { "epoch": 0.8562403078513583, "grad_norm": 0.5095300674438477, "learning_rate": 0.0001, "loss": 1.7128, "step": 7454 }, { "epoch": 0.8563551777611854, "grad_norm": 0.49315640330314636, "learning_rate": 0.0001, "loss": 1.4333, "step": 7455 }, { "epoch": 0.8564700476710125, "grad_norm": 0.44458267092704773, "learning_rate": 0.0001, "loss": 1.606, "step": 7456 }, { "epoch": 0.8565849175808397, "grad_norm": 0.47778499126434326, "learning_rate": 0.0001, "loss": 1.7333, "step": 7457 }, { "epoch": 0.8566997874906668, "grad_norm": 0.47323766350746155, "learning_rate": 0.0001, "loss": 1.4791, "step": 7458 }, { "epoch": 0.8568146574004939, "grad_norm": 0.4731180965900421, "learning_rate": 0.0001, "loss": 1.6042, "step": 7459 }, { "epoch": 0.856929527310321, "grad_norm": 0.4616163671016693, "learning_rate": 0.0001, "loss": 1.7287, "step": 7460 }, { "epoch": 0.8570443972201481, "grad_norm": 0.4829119145870209, "learning_rate": 0.0001, "loss": 1.6345, "step": 7461 }, { "epoch": 0.8571592671299753, "grad_norm": 0.5113245844841003, "learning_rate": 0.0001, "loss": 1.5072, "step": 7462 }, { "epoch": 0.8572741370398024, "grad_norm": 0.45658257603645325, "learning_rate": 0.0001, "loss": 1.5559, "step": 7463 }, { "epoch": 0.8573890069496295, "grad_norm": 0.44155845046043396, "learning_rate": 0.0001, "loss": 1.5001, "step": 7464 }, { "epoch": 0.8575038768594566, "grad_norm": 0.46995145082473755, "learning_rate": 0.0001, "loss": 1.6653, "step": 7465 }, { "epoch": 0.8576187467692837, "grad_norm": 0.43981465697288513, "learning_rate": 0.0001, "loss": 1.5494, "step": 7466 }, { "epoch": 0.8577336166791109, "grad_norm": 0.4353700280189514, "learning_rate": 0.0001, "loss": 1.5173, "step": 7467 }, { "epoch": 0.857848486588938, "grad_norm": 0.4613707959651947, "learning_rate": 0.0001, "loss": 1.6324, "step": 7468 }, { "epoch": 0.8579633564987651, "grad_norm": 0.43150594830513, "learning_rate": 0.0001, "loss": 1.3483, "step": 7469 }, { "epoch": 0.8580782264085922, "grad_norm": 0.49536994099617004, "learning_rate": 0.0001, "loss": 1.6696, "step": 7470 }, { "epoch": 0.8581930963184193, "grad_norm": 0.4285619258880615, "learning_rate": 0.0001, "loss": 1.6479, "step": 7471 }, { "epoch": 0.8583079662282466, "grad_norm": 0.5003694891929626, "learning_rate": 0.0001, "loss": 1.5725, "step": 7472 }, { "epoch": 0.8584228361380737, "grad_norm": 0.4515581429004669, "learning_rate": 0.0001, "loss": 1.5494, "step": 7473 }, { "epoch": 0.8585377060479008, "grad_norm": 0.4796769320964813, "learning_rate": 0.0001, "loss": 1.4985, "step": 7474 }, { "epoch": 0.8586525759577279, "grad_norm": 0.5017609596252441, "learning_rate": 0.0001, "loss": 1.7879, "step": 7475 }, { "epoch": 0.858767445867555, "grad_norm": 0.4531811475753784, "learning_rate": 0.0001, "loss": 1.5423, "step": 7476 }, { "epoch": 0.8588823157773822, "grad_norm": 0.4460836946964264, "learning_rate": 0.0001, "loss": 1.456, "step": 7477 }, { "epoch": 0.8589971856872093, "grad_norm": 0.4407133162021637, "learning_rate": 0.0001, "loss": 1.5907, "step": 7478 }, { "epoch": 0.8591120555970364, "grad_norm": 0.4690662622451782, "learning_rate": 0.0001, "loss": 1.6899, "step": 7479 }, { "epoch": 0.8592269255068635, "grad_norm": 0.43073776364326477, "learning_rate": 0.0001, "loss": 1.3295, "step": 7480 }, { "epoch": 0.8593417954166906, "grad_norm": 0.45200130343437195, "learning_rate": 0.0001, "loss": 1.435, "step": 7481 }, { "epoch": 0.8594566653265178, "grad_norm": 0.45768219232559204, "learning_rate": 0.0001, "loss": 1.5326, "step": 7482 }, { "epoch": 0.8595715352363449, "grad_norm": 0.4337681829929352, "learning_rate": 0.0001, "loss": 1.5591, "step": 7483 }, { "epoch": 0.859686405146172, "grad_norm": 0.44868743419647217, "learning_rate": 0.0001, "loss": 1.5495, "step": 7484 }, { "epoch": 0.8598012750559991, "grad_norm": 0.4528716206550598, "learning_rate": 0.0001, "loss": 1.5656, "step": 7485 }, { "epoch": 0.8599161449658262, "grad_norm": 0.4922735095024109, "learning_rate": 0.0001, "loss": 1.6894, "step": 7486 }, { "epoch": 0.8600310148756534, "grad_norm": 0.46525344252586365, "learning_rate": 0.0001, "loss": 1.5649, "step": 7487 }, { "epoch": 0.8601458847854805, "grad_norm": 0.46007898449897766, "learning_rate": 0.0001, "loss": 1.6705, "step": 7488 }, { "epoch": 0.8602607546953076, "grad_norm": 0.4645099341869354, "learning_rate": 0.0001, "loss": 1.5782, "step": 7489 }, { "epoch": 0.8603756246051347, "grad_norm": 0.46048229932785034, "learning_rate": 0.0001, "loss": 1.5643, "step": 7490 }, { "epoch": 0.8604904945149618, "grad_norm": 0.4628632366657257, "learning_rate": 0.0001, "loss": 1.6688, "step": 7491 }, { "epoch": 0.860605364424789, "grad_norm": 0.5050228238105774, "learning_rate": 0.0001, "loss": 1.712, "step": 7492 }, { "epoch": 0.8607202343346161, "grad_norm": 0.4507612884044647, "learning_rate": 0.0001, "loss": 1.5151, "step": 7493 }, { "epoch": 0.8608351042444432, "grad_norm": 0.4406948983669281, "learning_rate": 0.0001, "loss": 1.5542, "step": 7494 }, { "epoch": 0.8609499741542703, "grad_norm": 0.4973854124546051, "learning_rate": 0.0001, "loss": 1.5352, "step": 7495 }, { "epoch": 0.8610648440640974, "grad_norm": 0.4488707482814789, "learning_rate": 0.0001, "loss": 1.4153, "step": 7496 }, { "epoch": 0.8611797139739246, "grad_norm": 0.4815550744533539, "learning_rate": 0.0001, "loss": 1.6676, "step": 7497 }, { "epoch": 0.8612945838837517, "grad_norm": 0.4687705338001251, "learning_rate": 0.0001, "loss": 1.3591, "step": 7498 }, { "epoch": 0.8614094537935788, "grad_norm": 0.46637317538261414, "learning_rate": 0.0001, "loss": 1.6578, "step": 7499 }, { "epoch": 0.8615243237034059, "grad_norm": 0.4513280987739563, "learning_rate": 0.0001, "loss": 1.5599, "step": 7500 }, { "epoch": 0.861639193613233, "grad_norm": 0.4474887549877167, "learning_rate": 0.0001, "loss": 1.3628, "step": 7501 }, { "epoch": 0.8617540635230602, "grad_norm": 0.4975503087043762, "learning_rate": 0.0001, "loss": 1.5146, "step": 7502 }, { "epoch": 0.8618689334328873, "grad_norm": 0.5043979287147522, "learning_rate": 0.0001, "loss": 1.5227, "step": 7503 }, { "epoch": 0.8619838033427144, "grad_norm": 0.4594765305519104, "learning_rate": 0.0001, "loss": 1.702, "step": 7504 }, { "epoch": 0.8620986732525415, "grad_norm": 0.46996229887008667, "learning_rate": 0.0001, "loss": 1.714, "step": 7505 }, { "epoch": 0.8622135431623686, "grad_norm": 0.45425885915756226, "learning_rate": 0.0001, "loss": 1.6545, "step": 7506 }, { "epoch": 0.8623284130721958, "grad_norm": 0.48939937353134155, "learning_rate": 0.0001, "loss": 1.611, "step": 7507 }, { "epoch": 0.8624432829820229, "grad_norm": 0.44552651047706604, "learning_rate": 0.0001, "loss": 1.5571, "step": 7508 }, { "epoch": 0.86255815289185, "grad_norm": 0.4959718883037567, "learning_rate": 0.0001, "loss": 1.6482, "step": 7509 }, { "epoch": 0.8626730228016771, "grad_norm": 0.4864218533039093, "learning_rate": 0.0001, "loss": 1.7558, "step": 7510 }, { "epoch": 0.8627878927115042, "grad_norm": 0.4416675865650177, "learning_rate": 0.0001, "loss": 1.686, "step": 7511 }, { "epoch": 0.8629027626213314, "grad_norm": 0.46253499388694763, "learning_rate": 0.0001, "loss": 1.5942, "step": 7512 }, { "epoch": 0.8630176325311585, "grad_norm": 0.47551655769348145, "learning_rate": 0.0001, "loss": 1.7636, "step": 7513 }, { "epoch": 0.8631325024409856, "grad_norm": 0.48232796788215637, "learning_rate": 0.0001, "loss": 1.6967, "step": 7514 }, { "epoch": 0.8632473723508127, "grad_norm": 0.4513819217681885, "learning_rate": 0.0001, "loss": 1.5229, "step": 7515 }, { "epoch": 0.8633622422606398, "grad_norm": 0.4784489572048187, "learning_rate": 0.0001, "loss": 1.7298, "step": 7516 }, { "epoch": 0.863477112170467, "grad_norm": 0.4839509427547455, "learning_rate": 0.0001, "loss": 1.8396, "step": 7517 }, { "epoch": 0.8635919820802941, "grad_norm": 0.5013167858123779, "learning_rate": 0.0001, "loss": 1.5833, "step": 7518 }, { "epoch": 0.8637068519901212, "grad_norm": 0.43714308738708496, "learning_rate": 0.0001, "loss": 1.4933, "step": 7519 }, { "epoch": 0.8638217218999483, "grad_norm": 0.45084357261657715, "learning_rate": 0.0001, "loss": 1.6041, "step": 7520 }, { "epoch": 0.8639365918097754, "grad_norm": 0.4939959943294525, "learning_rate": 0.0001, "loss": 1.769, "step": 7521 }, { "epoch": 0.8640514617196026, "grad_norm": 0.46642765402793884, "learning_rate": 0.0001, "loss": 1.6056, "step": 7522 }, { "epoch": 0.8641663316294297, "grad_norm": 0.4534401595592499, "learning_rate": 0.0001, "loss": 1.582, "step": 7523 }, { "epoch": 0.8642812015392568, "grad_norm": 0.43686237931251526, "learning_rate": 0.0001, "loss": 1.5726, "step": 7524 }, { "epoch": 0.8643960714490839, "grad_norm": 0.4657284915447235, "learning_rate": 0.0001, "loss": 1.6247, "step": 7525 }, { "epoch": 0.864510941358911, "grad_norm": 0.5079863667488098, "learning_rate": 0.0001, "loss": 1.551, "step": 7526 }, { "epoch": 0.8646258112687382, "grad_norm": 0.4670884311199188, "learning_rate": 0.0001, "loss": 1.6194, "step": 7527 }, { "epoch": 0.8647406811785653, "grad_norm": 0.4581589102745056, "learning_rate": 0.0001, "loss": 1.3903, "step": 7528 }, { "epoch": 0.8648555510883924, "grad_norm": 0.4376249313354492, "learning_rate": 0.0001, "loss": 1.5475, "step": 7529 }, { "epoch": 0.8649704209982195, "grad_norm": 0.46135491132736206, "learning_rate": 0.0001, "loss": 1.5266, "step": 7530 }, { "epoch": 0.8650852909080466, "grad_norm": 0.492291659116745, "learning_rate": 0.0001, "loss": 1.713, "step": 7531 }, { "epoch": 0.8652001608178738, "grad_norm": 0.4598917067050934, "learning_rate": 0.0001, "loss": 1.3503, "step": 7532 }, { "epoch": 0.8653150307277009, "grad_norm": 0.4626048505306244, "learning_rate": 0.0001, "loss": 1.4571, "step": 7533 }, { "epoch": 0.865429900637528, "grad_norm": 0.49662452936172485, "learning_rate": 0.0001, "loss": 1.5838, "step": 7534 }, { "epoch": 0.8655447705473551, "grad_norm": 0.4794904291629791, "learning_rate": 0.0001, "loss": 1.5125, "step": 7535 }, { "epoch": 0.8656596404571822, "grad_norm": 0.4706816077232361, "learning_rate": 0.0001, "loss": 1.4572, "step": 7536 }, { "epoch": 0.8657745103670094, "grad_norm": 0.4907233417034149, "learning_rate": 0.0001, "loss": 1.5851, "step": 7537 }, { "epoch": 0.8658893802768365, "grad_norm": 0.46914729475975037, "learning_rate": 0.0001, "loss": 1.5293, "step": 7538 }, { "epoch": 0.8660042501866636, "grad_norm": 0.47738802433013916, "learning_rate": 0.0001, "loss": 1.6708, "step": 7539 }, { "epoch": 0.8661191200964907, "grad_norm": 0.47731783986091614, "learning_rate": 0.0001, "loss": 1.5712, "step": 7540 }, { "epoch": 0.8662339900063178, "grad_norm": 0.4860245883464813, "learning_rate": 0.0001, "loss": 1.4923, "step": 7541 }, { "epoch": 0.866348859916145, "grad_norm": 0.4608958065509796, "learning_rate": 0.0001, "loss": 1.4598, "step": 7542 }, { "epoch": 0.8664637298259721, "grad_norm": 0.4907558262348175, "learning_rate": 0.0001, "loss": 1.6327, "step": 7543 }, { "epoch": 0.8665785997357992, "grad_norm": 0.460859477519989, "learning_rate": 0.0001, "loss": 1.7288, "step": 7544 }, { "epoch": 0.8666934696456263, "grad_norm": 0.5079466104507446, "learning_rate": 0.0001, "loss": 1.7155, "step": 7545 }, { "epoch": 0.8668083395554534, "grad_norm": 0.4723812937736511, "learning_rate": 0.0001, "loss": 1.7198, "step": 7546 }, { "epoch": 0.8669232094652806, "grad_norm": 0.457302063703537, "learning_rate": 0.0001, "loss": 1.5175, "step": 7547 }, { "epoch": 0.8670380793751077, "grad_norm": 0.44479089975357056, "learning_rate": 0.0001, "loss": 1.5455, "step": 7548 }, { "epoch": 0.8671529492849348, "grad_norm": 0.4218999147415161, "learning_rate": 0.0001, "loss": 1.5437, "step": 7549 }, { "epoch": 0.8672678191947619, "grad_norm": 0.44676443934440613, "learning_rate": 0.0001, "loss": 1.6445, "step": 7550 }, { "epoch": 0.867382689104589, "grad_norm": 0.4901319444179535, "learning_rate": 0.0001, "loss": 1.7671, "step": 7551 }, { "epoch": 0.8674975590144162, "grad_norm": 0.4453258216381073, "learning_rate": 0.0001, "loss": 1.4686, "step": 7552 }, { "epoch": 0.8676124289242433, "grad_norm": 0.4534637928009033, "learning_rate": 0.0001, "loss": 1.5514, "step": 7553 }, { "epoch": 0.8677272988340704, "grad_norm": 0.46283942461013794, "learning_rate": 0.0001, "loss": 1.6294, "step": 7554 }, { "epoch": 0.8678421687438975, "grad_norm": 0.5271000266075134, "learning_rate": 0.0001, "loss": 1.6204, "step": 7555 }, { "epoch": 0.8679570386537246, "grad_norm": 0.4478399157524109, "learning_rate": 0.0001, "loss": 1.5407, "step": 7556 }, { "epoch": 0.8680719085635518, "grad_norm": 0.4519982933998108, "learning_rate": 0.0001, "loss": 1.5047, "step": 7557 }, { "epoch": 0.8681867784733789, "grad_norm": 0.49382635951042175, "learning_rate": 0.0001, "loss": 1.662, "step": 7558 }, { "epoch": 0.868301648383206, "grad_norm": 0.4385705888271332, "learning_rate": 0.0001, "loss": 1.4204, "step": 7559 }, { "epoch": 0.8684165182930331, "grad_norm": 0.42191818356513977, "learning_rate": 0.0001, "loss": 1.3775, "step": 7560 }, { "epoch": 0.8685313882028602, "grad_norm": 0.48729297518730164, "learning_rate": 0.0001, "loss": 1.5923, "step": 7561 }, { "epoch": 0.8686462581126874, "grad_norm": 0.45936107635498047, "learning_rate": 0.0001, "loss": 1.5908, "step": 7562 }, { "epoch": 0.8687611280225145, "grad_norm": 0.4694010317325592, "learning_rate": 0.0001, "loss": 1.6691, "step": 7563 }, { "epoch": 0.8688759979323416, "grad_norm": 0.4324677288532257, "learning_rate": 0.0001, "loss": 1.5969, "step": 7564 }, { "epoch": 0.8689908678421687, "grad_norm": 0.47414863109588623, "learning_rate": 0.0001, "loss": 1.6054, "step": 7565 }, { "epoch": 0.8691057377519958, "grad_norm": 0.46548882126808167, "learning_rate": 0.0001, "loss": 1.3367, "step": 7566 }, { "epoch": 0.869220607661823, "grad_norm": 0.44881516695022583, "learning_rate": 0.0001, "loss": 1.4306, "step": 7567 }, { "epoch": 0.8693354775716501, "grad_norm": 0.475082129240036, "learning_rate": 0.0001, "loss": 1.5533, "step": 7568 }, { "epoch": 0.8694503474814772, "grad_norm": 0.4312398135662079, "learning_rate": 0.0001, "loss": 1.6606, "step": 7569 }, { "epoch": 0.8695652173913043, "grad_norm": 0.4381447434425354, "learning_rate": 0.0001, "loss": 1.4735, "step": 7570 }, { "epoch": 0.8696800873011314, "grad_norm": 0.42871126532554626, "learning_rate": 0.0001, "loss": 1.4752, "step": 7571 }, { "epoch": 0.8697949572109586, "grad_norm": 0.44468954205513, "learning_rate": 0.0001, "loss": 1.5352, "step": 7572 }, { "epoch": 0.8699098271207857, "grad_norm": 0.49432846903800964, "learning_rate": 0.0001, "loss": 1.7286, "step": 7573 }, { "epoch": 0.8700246970306128, "grad_norm": 0.4997519552707672, "learning_rate": 0.0001, "loss": 1.4884, "step": 7574 }, { "epoch": 0.8701395669404399, "grad_norm": 0.47391921281814575, "learning_rate": 0.0001, "loss": 1.7209, "step": 7575 }, { "epoch": 0.870254436850267, "grad_norm": 0.49615180492401123, "learning_rate": 0.0001, "loss": 1.5852, "step": 7576 }, { "epoch": 0.8703693067600942, "grad_norm": 0.5302203297615051, "learning_rate": 0.0001, "loss": 1.6529, "step": 7577 }, { "epoch": 0.8704841766699213, "grad_norm": 0.512974739074707, "learning_rate": 0.0001, "loss": 1.8474, "step": 7578 }, { "epoch": 0.8705990465797484, "grad_norm": 0.5273780226707458, "learning_rate": 0.0001, "loss": 1.7045, "step": 7579 }, { "epoch": 0.8707139164895755, "grad_norm": 0.45488354563713074, "learning_rate": 0.0001, "loss": 1.7054, "step": 7580 }, { "epoch": 0.8708287863994026, "grad_norm": 0.4844851791858673, "learning_rate": 0.0001, "loss": 1.7121, "step": 7581 }, { "epoch": 0.8709436563092298, "grad_norm": 0.47248736023902893, "learning_rate": 0.0001, "loss": 1.7882, "step": 7582 }, { "epoch": 0.8710585262190569, "grad_norm": 0.4401882588863373, "learning_rate": 0.0001, "loss": 1.5392, "step": 7583 }, { "epoch": 0.871173396128884, "grad_norm": 0.46107155084609985, "learning_rate": 0.0001, "loss": 1.5845, "step": 7584 }, { "epoch": 0.8712882660387111, "grad_norm": 0.44512903690338135, "learning_rate": 0.0001, "loss": 1.4246, "step": 7585 }, { "epoch": 0.8714031359485382, "grad_norm": 0.5119422078132629, "learning_rate": 0.0001, "loss": 1.6568, "step": 7586 }, { "epoch": 0.8715180058583654, "grad_norm": 0.5448932647705078, "learning_rate": 0.0001, "loss": 1.6187, "step": 7587 }, { "epoch": 0.8716328757681925, "grad_norm": 0.4820195138454437, "learning_rate": 0.0001, "loss": 1.5244, "step": 7588 }, { "epoch": 0.8717477456780196, "grad_norm": 0.46764081716537476, "learning_rate": 0.0001, "loss": 1.6459, "step": 7589 }, { "epoch": 0.8718626155878467, "grad_norm": 0.5043097734451294, "learning_rate": 0.0001, "loss": 1.7252, "step": 7590 }, { "epoch": 0.8719774854976738, "grad_norm": 0.48596030473709106, "learning_rate": 0.0001, "loss": 1.6176, "step": 7591 }, { "epoch": 0.872092355407501, "grad_norm": 0.4883681535720825, "learning_rate": 0.0001, "loss": 1.7058, "step": 7592 }, { "epoch": 0.8722072253173281, "grad_norm": 0.4625040888786316, "learning_rate": 0.0001, "loss": 1.633, "step": 7593 }, { "epoch": 0.8723220952271552, "grad_norm": 0.4826609194278717, "learning_rate": 0.0001, "loss": 1.4658, "step": 7594 }, { "epoch": 0.8724369651369823, "grad_norm": 0.4496923089027405, "learning_rate": 0.0001, "loss": 1.6275, "step": 7595 }, { "epoch": 0.8725518350468094, "grad_norm": 0.5118072032928467, "learning_rate": 0.0001, "loss": 1.8564, "step": 7596 }, { "epoch": 0.8726667049566366, "grad_norm": 0.4654580056667328, "learning_rate": 0.0001, "loss": 1.4611, "step": 7597 }, { "epoch": 0.8727815748664637, "grad_norm": 0.42999371886253357, "learning_rate": 0.0001, "loss": 1.4828, "step": 7598 }, { "epoch": 0.8728964447762908, "grad_norm": 0.5076425671577454, "learning_rate": 0.0001, "loss": 1.7994, "step": 7599 }, { "epoch": 0.8730113146861179, "grad_norm": 0.4634290933609009, "learning_rate": 0.0001, "loss": 1.7437, "step": 7600 }, { "epoch": 0.873126184595945, "grad_norm": 0.455967515707016, "learning_rate": 0.0001, "loss": 1.6326, "step": 7601 }, { "epoch": 0.8732410545057722, "grad_norm": 0.5265469551086426, "learning_rate": 0.0001, "loss": 1.828, "step": 7602 }, { "epoch": 0.8733559244155993, "grad_norm": 0.44739046692848206, "learning_rate": 0.0001, "loss": 1.5161, "step": 7603 }, { "epoch": 0.8734707943254264, "grad_norm": 0.47437113523483276, "learning_rate": 0.0001, "loss": 1.7686, "step": 7604 }, { "epoch": 0.8735856642352535, "grad_norm": 0.47178056836128235, "learning_rate": 0.0001, "loss": 1.5443, "step": 7605 }, { "epoch": 0.8737005341450806, "grad_norm": 0.47476136684417725, "learning_rate": 0.0001, "loss": 1.6839, "step": 7606 }, { "epoch": 0.8738154040549078, "grad_norm": 0.47085344791412354, "learning_rate": 0.0001, "loss": 1.6793, "step": 7607 }, { "epoch": 0.8739302739647349, "grad_norm": 0.48475953936576843, "learning_rate": 0.0001, "loss": 1.7614, "step": 7608 }, { "epoch": 0.8740451438745621, "grad_norm": 0.46257898211479187, "learning_rate": 0.0001, "loss": 1.5595, "step": 7609 }, { "epoch": 0.8741600137843892, "grad_norm": 0.4603593051433563, "learning_rate": 0.0001, "loss": 1.6879, "step": 7610 }, { "epoch": 0.8742748836942164, "grad_norm": 0.46927449107170105, "learning_rate": 0.0001, "loss": 1.7635, "step": 7611 }, { "epoch": 0.8743897536040435, "grad_norm": 0.48620933294296265, "learning_rate": 0.0001, "loss": 1.6829, "step": 7612 }, { "epoch": 0.8745046235138706, "grad_norm": 0.4888293445110321, "learning_rate": 0.0001, "loss": 1.6731, "step": 7613 }, { "epoch": 0.8746194934236977, "grad_norm": 0.45221349596977234, "learning_rate": 0.0001, "loss": 1.4829, "step": 7614 }, { "epoch": 0.8747343633335248, "grad_norm": 0.4384780824184418, "learning_rate": 0.0001, "loss": 1.5118, "step": 7615 }, { "epoch": 0.874849233243352, "grad_norm": 0.463351309299469, "learning_rate": 0.0001, "loss": 1.2614, "step": 7616 }, { "epoch": 0.8749641031531791, "grad_norm": 0.5050379037857056, "learning_rate": 0.0001, "loss": 1.7645, "step": 7617 }, { "epoch": 0.8750789730630062, "grad_norm": 0.4900285005569458, "learning_rate": 0.0001, "loss": 1.668, "step": 7618 }, { "epoch": 0.8751938429728333, "grad_norm": 0.4964447319507599, "learning_rate": 0.0001, "loss": 1.7461, "step": 7619 }, { "epoch": 0.8753087128826604, "grad_norm": 0.4623139798641205, "learning_rate": 0.0001, "loss": 1.5311, "step": 7620 }, { "epoch": 0.8754235827924876, "grad_norm": 0.47225624322891235, "learning_rate": 0.0001, "loss": 1.6197, "step": 7621 }, { "epoch": 0.8755384527023147, "grad_norm": 0.49223390221595764, "learning_rate": 0.0001, "loss": 1.6516, "step": 7622 }, { "epoch": 0.8756533226121418, "grad_norm": 0.5095483064651489, "learning_rate": 0.0001, "loss": 1.5173, "step": 7623 }, { "epoch": 0.8757681925219689, "grad_norm": 0.4903635084629059, "learning_rate": 0.0001, "loss": 1.7643, "step": 7624 }, { "epoch": 0.875883062431796, "grad_norm": 0.4946781396865845, "learning_rate": 0.0001, "loss": 1.5768, "step": 7625 }, { "epoch": 0.8759979323416232, "grad_norm": 0.4905765652656555, "learning_rate": 0.0001, "loss": 1.5622, "step": 7626 }, { "epoch": 0.8761128022514503, "grad_norm": 0.505204975605011, "learning_rate": 0.0001, "loss": 1.7406, "step": 7627 }, { "epoch": 0.8762276721612774, "grad_norm": 0.533305823802948, "learning_rate": 0.0001, "loss": 1.4833, "step": 7628 }, { "epoch": 0.8763425420711045, "grad_norm": 0.47964945435523987, "learning_rate": 0.0001, "loss": 1.6496, "step": 7629 }, { "epoch": 0.8764574119809316, "grad_norm": 0.4678000211715698, "learning_rate": 0.0001, "loss": 1.6501, "step": 7630 }, { "epoch": 0.8765722818907588, "grad_norm": 0.47018638253211975, "learning_rate": 0.0001, "loss": 1.489, "step": 7631 }, { "epoch": 0.8766871518005859, "grad_norm": 0.4635443091392517, "learning_rate": 0.0001, "loss": 1.4924, "step": 7632 }, { "epoch": 0.876802021710413, "grad_norm": 0.46582552790641785, "learning_rate": 0.0001, "loss": 1.6194, "step": 7633 }, { "epoch": 0.8769168916202401, "grad_norm": 0.47066211700439453, "learning_rate": 0.0001, "loss": 1.6757, "step": 7634 }, { "epoch": 0.8770317615300672, "grad_norm": 0.569146990776062, "learning_rate": 0.0001, "loss": 1.7014, "step": 7635 }, { "epoch": 0.8771466314398944, "grad_norm": 0.48655346035957336, "learning_rate": 0.0001, "loss": 1.5346, "step": 7636 }, { "epoch": 0.8772615013497215, "grad_norm": 0.4453171491622925, "learning_rate": 0.0001, "loss": 1.6097, "step": 7637 }, { "epoch": 0.8773763712595486, "grad_norm": 0.4952543079853058, "learning_rate": 0.0001, "loss": 1.6746, "step": 7638 }, { "epoch": 0.8774912411693757, "grad_norm": 0.45405057072639465, "learning_rate": 0.0001, "loss": 1.5223, "step": 7639 }, { "epoch": 0.8776061110792028, "grad_norm": 0.46041983366012573, "learning_rate": 0.0001, "loss": 1.5733, "step": 7640 }, { "epoch": 0.87772098098903, "grad_norm": 0.4734715223312378, "learning_rate": 0.0001, "loss": 1.6602, "step": 7641 }, { "epoch": 0.8778358508988571, "grad_norm": 0.4629979431629181, "learning_rate": 0.0001, "loss": 1.6299, "step": 7642 }, { "epoch": 0.8779507208086842, "grad_norm": 0.47290873527526855, "learning_rate": 0.0001, "loss": 1.524, "step": 7643 }, { "epoch": 0.8780655907185113, "grad_norm": 0.4647415578365326, "learning_rate": 0.0001, "loss": 1.5595, "step": 7644 }, { "epoch": 0.8781804606283384, "grad_norm": 0.5042547583580017, "learning_rate": 0.0001, "loss": 1.7437, "step": 7645 }, { "epoch": 0.8782953305381656, "grad_norm": 0.4914029836654663, "learning_rate": 0.0001, "loss": 1.8578, "step": 7646 }, { "epoch": 0.8784102004479927, "grad_norm": 0.5158650875091553, "learning_rate": 0.0001, "loss": 1.6133, "step": 7647 }, { "epoch": 0.8785250703578198, "grad_norm": 0.4820328950881958, "learning_rate": 0.0001, "loss": 1.5127, "step": 7648 }, { "epoch": 0.8786399402676469, "grad_norm": 0.4517586827278137, "learning_rate": 0.0001, "loss": 1.4968, "step": 7649 }, { "epoch": 0.878754810177474, "grad_norm": 0.45039236545562744, "learning_rate": 0.0001, "loss": 1.539, "step": 7650 }, { "epoch": 0.8788696800873012, "grad_norm": 0.4908886253833771, "learning_rate": 0.0001, "loss": 1.5477, "step": 7651 }, { "epoch": 0.8789845499971283, "grad_norm": 0.4798978865146637, "learning_rate": 0.0001, "loss": 1.5524, "step": 7652 }, { "epoch": 0.8790994199069554, "grad_norm": 0.4604727327823639, "learning_rate": 0.0001, "loss": 1.6227, "step": 7653 }, { "epoch": 0.8792142898167825, "grad_norm": 0.5215774774551392, "learning_rate": 0.0001, "loss": 1.6924, "step": 7654 }, { "epoch": 0.8793291597266096, "grad_norm": 0.5219174027442932, "learning_rate": 0.0001, "loss": 1.8248, "step": 7655 }, { "epoch": 0.8794440296364368, "grad_norm": 0.46888914704322815, "learning_rate": 0.0001, "loss": 1.6328, "step": 7656 }, { "epoch": 0.8795588995462639, "grad_norm": 0.49896708130836487, "learning_rate": 0.0001, "loss": 1.679, "step": 7657 }, { "epoch": 0.879673769456091, "grad_norm": 0.4882703423500061, "learning_rate": 0.0001, "loss": 1.7798, "step": 7658 }, { "epoch": 0.8797886393659181, "grad_norm": 0.4401630461215973, "learning_rate": 0.0001, "loss": 1.5311, "step": 7659 }, { "epoch": 0.8799035092757452, "grad_norm": 0.46130862832069397, "learning_rate": 0.0001, "loss": 1.5991, "step": 7660 }, { "epoch": 0.8800183791855724, "grad_norm": 0.4488151967525482, "learning_rate": 0.0001, "loss": 1.5732, "step": 7661 }, { "epoch": 0.8801332490953995, "grad_norm": 0.47853875160217285, "learning_rate": 0.0001, "loss": 1.5023, "step": 7662 }, { "epoch": 0.8802481190052266, "grad_norm": 0.4806554615497589, "learning_rate": 0.0001, "loss": 1.5578, "step": 7663 }, { "epoch": 0.8803629889150537, "grad_norm": 0.44891056418418884, "learning_rate": 0.0001, "loss": 1.4271, "step": 7664 }, { "epoch": 0.8804778588248808, "grad_norm": 0.4926340579986572, "learning_rate": 0.0001, "loss": 1.6632, "step": 7665 }, { "epoch": 0.880592728734708, "grad_norm": 0.47855570912361145, "learning_rate": 0.0001, "loss": 1.6053, "step": 7666 }, { "epoch": 0.8807075986445351, "grad_norm": 0.47924453020095825, "learning_rate": 0.0001, "loss": 1.6421, "step": 7667 }, { "epoch": 0.8808224685543622, "grad_norm": 0.45178931951522827, "learning_rate": 0.0001, "loss": 1.3517, "step": 7668 }, { "epoch": 0.8809373384641893, "grad_norm": 0.4857328534126282, "learning_rate": 0.0001, "loss": 1.5011, "step": 7669 }, { "epoch": 0.8810522083740164, "grad_norm": 0.5388020277023315, "learning_rate": 0.0001, "loss": 1.7677, "step": 7670 }, { "epoch": 0.8811670782838436, "grad_norm": 0.4225468039512634, "learning_rate": 0.0001, "loss": 1.3599, "step": 7671 }, { "epoch": 0.8812819481936707, "grad_norm": 0.4660169780254364, "learning_rate": 0.0001, "loss": 1.641, "step": 7672 }, { "epoch": 0.8813968181034978, "grad_norm": 0.47213736176490784, "learning_rate": 0.0001, "loss": 1.5267, "step": 7673 }, { "epoch": 0.8815116880133249, "grad_norm": 0.4642762541770935, "learning_rate": 0.0001, "loss": 1.3589, "step": 7674 }, { "epoch": 0.881626557923152, "grad_norm": 0.47581663727760315, "learning_rate": 0.0001, "loss": 1.475, "step": 7675 }, { "epoch": 0.8817414278329792, "grad_norm": 0.4813084900379181, "learning_rate": 0.0001, "loss": 1.426, "step": 7676 }, { "epoch": 0.8818562977428063, "grad_norm": 0.46025902032852173, "learning_rate": 0.0001, "loss": 1.5892, "step": 7677 }, { "epoch": 0.8819711676526334, "grad_norm": 0.5673764944076538, "learning_rate": 0.0001, "loss": 1.6262, "step": 7678 }, { "epoch": 0.8820860375624605, "grad_norm": 0.4987919330596924, "learning_rate": 0.0001, "loss": 1.6401, "step": 7679 }, { "epoch": 0.8822009074722876, "grad_norm": 0.4518562853336334, "learning_rate": 0.0001, "loss": 1.3551, "step": 7680 }, { "epoch": 0.8823157773821148, "grad_norm": 0.486644446849823, "learning_rate": 0.0001, "loss": 1.5322, "step": 7681 }, { "epoch": 0.8824306472919419, "grad_norm": 0.5359245538711548, "learning_rate": 0.0001, "loss": 1.8432, "step": 7682 }, { "epoch": 0.882545517201769, "grad_norm": 0.4623430371284485, "learning_rate": 0.0001, "loss": 1.5393, "step": 7683 }, { "epoch": 0.8826603871115961, "grad_norm": 0.47618022561073303, "learning_rate": 0.0001, "loss": 1.5502, "step": 7684 }, { "epoch": 0.8827752570214232, "grad_norm": 0.5313566327095032, "learning_rate": 0.0001, "loss": 1.8855, "step": 7685 }, { "epoch": 0.8828901269312504, "grad_norm": 0.4783889055252075, "learning_rate": 0.0001, "loss": 1.6492, "step": 7686 }, { "epoch": 0.8830049968410775, "grad_norm": 0.45966142416000366, "learning_rate": 0.0001, "loss": 1.5857, "step": 7687 }, { "epoch": 0.8831198667509046, "grad_norm": 0.47145649790763855, "learning_rate": 0.0001, "loss": 1.7645, "step": 7688 }, { "epoch": 0.8832347366607317, "grad_norm": 0.4668165445327759, "learning_rate": 0.0001, "loss": 1.6133, "step": 7689 }, { "epoch": 0.8833496065705588, "grad_norm": 0.44585132598876953, "learning_rate": 0.0001, "loss": 1.3982, "step": 7690 }, { "epoch": 0.883464476480386, "grad_norm": 0.49249133467674255, "learning_rate": 0.0001, "loss": 1.6614, "step": 7691 }, { "epoch": 0.8835793463902131, "grad_norm": 0.4700791835784912, "learning_rate": 0.0001, "loss": 1.3881, "step": 7692 }, { "epoch": 0.8836942163000402, "grad_norm": 0.44664838910102844, "learning_rate": 0.0001, "loss": 1.5379, "step": 7693 }, { "epoch": 0.8838090862098673, "grad_norm": 0.4859354496002197, "learning_rate": 0.0001, "loss": 1.6673, "step": 7694 }, { "epoch": 0.8839239561196944, "grad_norm": 0.4571913480758667, "learning_rate": 0.0001, "loss": 1.5796, "step": 7695 }, { "epoch": 0.8840388260295216, "grad_norm": 0.4279733896255493, "learning_rate": 0.0001, "loss": 1.5309, "step": 7696 }, { "epoch": 0.8841536959393487, "grad_norm": 0.4860191345214844, "learning_rate": 0.0001, "loss": 1.827, "step": 7697 }, { "epoch": 0.8842685658491758, "grad_norm": 0.4540635049343109, "learning_rate": 0.0001, "loss": 1.5865, "step": 7698 }, { "epoch": 0.8843834357590029, "grad_norm": 0.5169913172721863, "learning_rate": 0.0001, "loss": 1.6006, "step": 7699 }, { "epoch": 0.88449830566883, "grad_norm": 0.4786296784877777, "learning_rate": 0.0001, "loss": 1.5426, "step": 7700 }, { "epoch": 0.8846131755786572, "grad_norm": 0.4788622558116913, "learning_rate": 0.0001, "loss": 1.4655, "step": 7701 }, { "epoch": 0.8847280454884843, "grad_norm": 0.4708092212677002, "learning_rate": 0.0001, "loss": 1.6054, "step": 7702 }, { "epoch": 0.8848429153983114, "grad_norm": 0.49218055605888367, "learning_rate": 0.0001, "loss": 1.5345, "step": 7703 }, { "epoch": 0.8849577853081385, "grad_norm": 0.4458230137825012, "learning_rate": 0.0001, "loss": 1.5672, "step": 7704 }, { "epoch": 0.8850726552179656, "grad_norm": 0.46966198086738586, "learning_rate": 0.0001, "loss": 1.6702, "step": 7705 }, { "epoch": 0.8851875251277928, "grad_norm": 0.5042508244514465, "learning_rate": 0.0001, "loss": 1.6373, "step": 7706 }, { "epoch": 0.8853023950376199, "grad_norm": 0.5178636312484741, "learning_rate": 0.0001, "loss": 1.8095, "step": 7707 }, { "epoch": 0.885417264947447, "grad_norm": 0.5226283073425293, "learning_rate": 0.0001, "loss": 1.8296, "step": 7708 }, { "epoch": 0.8855321348572741, "grad_norm": 0.4413732886314392, "learning_rate": 0.0001, "loss": 1.4009, "step": 7709 }, { "epoch": 0.8856470047671012, "grad_norm": 0.44534966349601746, "learning_rate": 0.0001, "loss": 1.5705, "step": 7710 }, { "epoch": 0.8857618746769284, "grad_norm": 0.49753180146217346, "learning_rate": 0.0001, "loss": 1.4834, "step": 7711 }, { "epoch": 0.8858767445867555, "grad_norm": 0.4720037281513214, "learning_rate": 0.0001, "loss": 1.5619, "step": 7712 }, { "epoch": 0.8859916144965826, "grad_norm": 0.5182157754898071, "learning_rate": 0.0001, "loss": 1.6232, "step": 7713 }, { "epoch": 0.8861064844064097, "grad_norm": 0.47871580719947815, "learning_rate": 0.0001, "loss": 1.5819, "step": 7714 }, { "epoch": 0.8862213543162368, "grad_norm": 0.508307158946991, "learning_rate": 0.0001, "loss": 1.6747, "step": 7715 }, { "epoch": 0.886336224226064, "grad_norm": 0.47485652565956116, "learning_rate": 0.0001, "loss": 1.5988, "step": 7716 }, { "epoch": 0.8864510941358911, "grad_norm": 0.5017088651657104, "learning_rate": 0.0001, "loss": 1.6983, "step": 7717 }, { "epoch": 0.8865659640457182, "grad_norm": 0.46508511900901794, "learning_rate": 0.0001, "loss": 1.5499, "step": 7718 }, { "epoch": 0.8866808339555453, "grad_norm": 0.4940198063850403, "learning_rate": 0.0001, "loss": 1.6522, "step": 7719 }, { "epoch": 0.8867957038653724, "grad_norm": 0.508063554763794, "learning_rate": 0.0001, "loss": 1.7217, "step": 7720 }, { "epoch": 0.8869105737751996, "grad_norm": 0.5674510598182678, "learning_rate": 0.0001, "loss": 1.7209, "step": 7721 }, { "epoch": 0.8870254436850267, "grad_norm": 0.47527050971984863, "learning_rate": 0.0001, "loss": 1.4227, "step": 7722 }, { "epoch": 0.8871403135948538, "grad_norm": 0.45663002133369446, "learning_rate": 0.0001, "loss": 1.4982, "step": 7723 }, { "epoch": 0.8872551835046809, "grad_norm": 0.4941721558570862, "learning_rate": 0.0001, "loss": 1.7747, "step": 7724 }, { "epoch": 0.887370053414508, "grad_norm": 0.43632128834724426, "learning_rate": 0.0001, "loss": 1.5104, "step": 7725 }, { "epoch": 0.8874849233243352, "grad_norm": 0.4669651389122009, "learning_rate": 0.0001, "loss": 1.5774, "step": 7726 }, { "epoch": 0.8875997932341623, "grad_norm": 0.5105260014533997, "learning_rate": 0.0001, "loss": 1.7566, "step": 7727 }, { "epoch": 0.8877146631439894, "grad_norm": 0.44809389114379883, "learning_rate": 0.0001, "loss": 1.4762, "step": 7728 }, { "epoch": 0.8878295330538165, "grad_norm": 0.4762137234210968, "learning_rate": 0.0001, "loss": 1.7521, "step": 7729 }, { "epoch": 0.8879444029636436, "grad_norm": 0.45027822256088257, "learning_rate": 0.0001, "loss": 1.4717, "step": 7730 }, { "epoch": 0.8880592728734708, "grad_norm": 0.4824502170085907, "learning_rate": 0.0001, "loss": 1.5083, "step": 7731 }, { "epoch": 0.8881741427832979, "grad_norm": 0.4795003831386566, "learning_rate": 0.0001, "loss": 1.5373, "step": 7732 }, { "epoch": 0.888289012693125, "grad_norm": 0.5017417073249817, "learning_rate": 0.0001, "loss": 1.6815, "step": 7733 }, { "epoch": 0.8884038826029521, "grad_norm": 0.44462695717811584, "learning_rate": 0.0001, "loss": 1.4145, "step": 7734 }, { "epoch": 0.8885187525127792, "grad_norm": 0.4432191550731659, "learning_rate": 0.0001, "loss": 1.4622, "step": 7735 }, { "epoch": 0.8886336224226064, "grad_norm": 0.4664211571216583, "learning_rate": 0.0001, "loss": 1.6906, "step": 7736 }, { "epoch": 0.8887484923324335, "grad_norm": 0.4574565589427948, "learning_rate": 0.0001, "loss": 1.515, "step": 7737 }, { "epoch": 0.8888633622422606, "grad_norm": 0.44210490584373474, "learning_rate": 0.0001, "loss": 1.4454, "step": 7738 }, { "epoch": 0.8889782321520877, "grad_norm": 0.4957959055900574, "learning_rate": 0.0001, "loss": 1.7905, "step": 7739 }, { "epoch": 0.8890931020619148, "grad_norm": 0.43715575337409973, "learning_rate": 0.0001, "loss": 1.4055, "step": 7740 }, { "epoch": 0.889207971971742, "grad_norm": 0.4835394024848938, "learning_rate": 0.0001, "loss": 1.7379, "step": 7741 }, { "epoch": 0.8893228418815691, "grad_norm": 0.4787488281726837, "learning_rate": 0.0001, "loss": 1.6368, "step": 7742 }, { "epoch": 0.8894377117913962, "grad_norm": 0.4879782199859619, "learning_rate": 0.0001, "loss": 1.5455, "step": 7743 }, { "epoch": 0.8895525817012233, "grad_norm": 0.4805709719657898, "learning_rate": 0.0001, "loss": 1.5724, "step": 7744 }, { "epoch": 0.8896674516110504, "grad_norm": 0.4606061577796936, "learning_rate": 0.0001, "loss": 1.6125, "step": 7745 }, { "epoch": 0.8897823215208777, "grad_norm": 0.5064094662666321, "learning_rate": 0.0001, "loss": 1.322, "step": 7746 }, { "epoch": 0.8898971914307048, "grad_norm": 0.45881563425064087, "learning_rate": 0.0001, "loss": 1.5164, "step": 7747 }, { "epoch": 0.8900120613405319, "grad_norm": 0.48323944211006165, "learning_rate": 0.0001, "loss": 1.5188, "step": 7748 }, { "epoch": 0.890126931250359, "grad_norm": 0.49248838424682617, "learning_rate": 0.0001, "loss": 1.5483, "step": 7749 }, { "epoch": 0.8902418011601861, "grad_norm": 0.46828705072402954, "learning_rate": 0.0001, "loss": 1.422, "step": 7750 }, { "epoch": 0.8903566710700133, "grad_norm": 0.45508071780204773, "learning_rate": 0.0001, "loss": 1.5604, "step": 7751 }, { "epoch": 0.8904715409798404, "grad_norm": 0.4920658469200134, "learning_rate": 0.0001, "loss": 1.4792, "step": 7752 }, { "epoch": 0.8905864108896675, "grad_norm": 0.46648716926574707, "learning_rate": 0.0001, "loss": 1.674, "step": 7753 }, { "epoch": 0.8907012807994946, "grad_norm": 0.4534609019756317, "learning_rate": 0.0001, "loss": 1.3492, "step": 7754 }, { "epoch": 0.8908161507093217, "grad_norm": 0.5462976098060608, "learning_rate": 0.0001, "loss": 1.2098, "step": 7755 }, { "epoch": 0.8909310206191489, "grad_norm": 0.4742743670940399, "learning_rate": 0.0001, "loss": 1.6341, "step": 7756 }, { "epoch": 0.891045890528976, "grad_norm": 0.46202293038368225, "learning_rate": 0.0001, "loss": 1.5176, "step": 7757 }, { "epoch": 0.8911607604388031, "grad_norm": 0.45886507630348206, "learning_rate": 0.0001, "loss": 1.4584, "step": 7758 }, { "epoch": 0.8912756303486302, "grad_norm": 0.4991988241672516, "learning_rate": 0.0001, "loss": 1.6096, "step": 7759 }, { "epoch": 0.8913905002584573, "grad_norm": 0.47904571890830994, "learning_rate": 0.0001, "loss": 1.7234, "step": 7760 }, { "epoch": 0.8915053701682845, "grad_norm": 0.48273780941963196, "learning_rate": 0.0001, "loss": 1.6437, "step": 7761 }, { "epoch": 0.8916202400781116, "grad_norm": 0.45888492465019226, "learning_rate": 0.0001, "loss": 1.5468, "step": 7762 }, { "epoch": 0.8917351099879387, "grad_norm": 0.485304057598114, "learning_rate": 0.0001, "loss": 1.5815, "step": 7763 }, { "epoch": 0.8918499798977658, "grad_norm": 0.49057838320732117, "learning_rate": 0.0001, "loss": 1.62, "step": 7764 }, { "epoch": 0.8919648498075929, "grad_norm": 0.503322958946228, "learning_rate": 0.0001, "loss": 1.4175, "step": 7765 }, { "epoch": 0.8920797197174201, "grad_norm": 0.4755953550338745, "learning_rate": 0.0001, "loss": 1.61, "step": 7766 }, { "epoch": 0.8921945896272472, "grad_norm": 0.5407209396362305, "learning_rate": 0.0001, "loss": 1.8089, "step": 7767 }, { "epoch": 0.8923094595370743, "grad_norm": 0.48187482357025146, "learning_rate": 0.0001, "loss": 1.5752, "step": 7768 }, { "epoch": 0.8924243294469014, "grad_norm": 0.4826925992965698, "learning_rate": 0.0001, "loss": 1.6726, "step": 7769 }, { "epoch": 0.8925391993567285, "grad_norm": 0.4857804775238037, "learning_rate": 0.0001, "loss": 1.5634, "step": 7770 }, { "epoch": 0.8926540692665557, "grad_norm": 0.4642079472541809, "learning_rate": 0.0001, "loss": 1.4843, "step": 7771 }, { "epoch": 0.8927689391763828, "grad_norm": 0.44618815183639526, "learning_rate": 0.0001, "loss": 1.3971, "step": 7772 }, { "epoch": 0.8928838090862099, "grad_norm": 0.44455963373184204, "learning_rate": 0.0001, "loss": 1.3867, "step": 7773 }, { "epoch": 0.892998678996037, "grad_norm": 0.46926748752593994, "learning_rate": 0.0001, "loss": 1.5882, "step": 7774 }, { "epoch": 0.8931135489058641, "grad_norm": 0.5021077394485474, "learning_rate": 0.0001, "loss": 1.7976, "step": 7775 }, { "epoch": 0.8932284188156913, "grad_norm": 0.4602797031402588, "learning_rate": 0.0001, "loss": 1.599, "step": 7776 }, { "epoch": 0.8933432887255184, "grad_norm": 0.4697769582271576, "learning_rate": 0.0001, "loss": 1.4241, "step": 7777 }, { "epoch": 0.8934581586353455, "grad_norm": 0.48710909485816956, "learning_rate": 0.0001, "loss": 1.4455, "step": 7778 }, { "epoch": 0.8935730285451726, "grad_norm": 0.47241660952568054, "learning_rate": 0.0001, "loss": 1.6332, "step": 7779 }, { "epoch": 0.8936878984549997, "grad_norm": 0.488765686750412, "learning_rate": 0.0001, "loss": 1.5666, "step": 7780 }, { "epoch": 0.8938027683648269, "grad_norm": 0.45212939381599426, "learning_rate": 0.0001, "loss": 1.5377, "step": 7781 }, { "epoch": 0.893917638274654, "grad_norm": 0.49566277861595154, "learning_rate": 0.0001, "loss": 1.6206, "step": 7782 }, { "epoch": 0.8940325081844811, "grad_norm": 0.4758758544921875, "learning_rate": 0.0001, "loss": 1.5356, "step": 7783 }, { "epoch": 0.8941473780943082, "grad_norm": 0.47846412658691406, "learning_rate": 0.0001, "loss": 1.409, "step": 7784 }, { "epoch": 0.8942622480041353, "grad_norm": 0.465116947889328, "learning_rate": 0.0001, "loss": 1.6158, "step": 7785 }, { "epoch": 0.8943771179139625, "grad_norm": 0.4536508321762085, "learning_rate": 0.0001, "loss": 1.7166, "step": 7786 }, { "epoch": 0.8944919878237896, "grad_norm": 0.44384217262268066, "learning_rate": 0.0001, "loss": 1.5588, "step": 7787 }, { "epoch": 0.8946068577336167, "grad_norm": 0.46297183632850647, "learning_rate": 0.0001, "loss": 1.4644, "step": 7788 }, { "epoch": 0.8947217276434438, "grad_norm": 0.5043572783470154, "learning_rate": 0.0001, "loss": 1.6958, "step": 7789 }, { "epoch": 0.8948365975532709, "grad_norm": 0.49215108156204224, "learning_rate": 0.0001, "loss": 1.7364, "step": 7790 }, { "epoch": 0.8949514674630981, "grad_norm": 0.45499980449676514, "learning_rate": 0.0001, "loss": 1.5584, "step": 7791 }, { "epoch": 0.8950663373729252, "grad_norm": 0.46621590852737427, "learning_rate": 0.0001, "loss": 1.6982, "step": 7792 }, { "epoch": 0.8951812072827523, "grad_norm": 0.4747898578643799, "learning_rate": 0.0001, "loss": 1.5868, "step": 7793 }, { "epoch": 0.8952960771925794, "grad_norm": 0.4650057256221771, "learning_rate": 0.0001, "loss": 1.4734, "step": 7794 }, { "epoch": 0.8954109471024065, "grad_norm": 0.45124003291130066, "learning_rate": 0.0001, "loss": 1.6151, "step": 7795 }, { "epoch": 0.8955258170122337, "grad_norm": 0.469590425491333, "learning_rate": 0.0001, "loss": 1.6978, "step": 7796 }, { "epoch": 0.8956406869220608, "grad_norm": 0.4690435826778412, "learning_rate": 0.0001, "loss": 1.5853, "step": 7797 }, { "epoch": 0.8957555568318879, "grad_norm": 0.48957929015159607, "learning_rate": 0.0001, "loss": 1.767, "step": 7798 }, { "epoch": 0.895870426741715, "grad_norm": 0.459736168384552, "learning_rate": 0.0001, "loss": 1.593, "step": 7799 }, { "epoch": 0.8959852966515421, "grad_norm": 0.4722045063972473, "learning_rate": 0.0001, "loss": 1.8041, "step": 7800 }, { "epoch": 0.8961001665613693, "grad_norm": 0.5619189143180847, "learning_rate": 0.0001, "loss": 1.6918, "step": 7801 }, { "epoch": 0.8962150364711964, "grad_norm": 0.4744798243045807, "learning_rate": 0.0001, "loss": 1.7405, "step": 7802 }, { "epoch": 0.8963299063810235, "grad_norm": 0.4761870801448822, "learning_rate": 0.0001, "loss": 1.5544, "step": 7803 }, { "epoch": 0.8964447762908506, "grad_norm": 0.45697519183158875, "learning_rate": 0.0001, "loss": 1.5555, "step": 7804 }, { "epoch": 0.8965596462006777, "grad_norm": 0.5395013689994812, "learning_rate": 0.0001, "loss": 1.8088, "step": 7805 }, { "epoch": 0.8966745161105049, "grad_norm": 0.4655267000198364, "learning_rate": 0.0001, "loss": 1.5784, "step": 7806 }, { "epoch": 0.896789386020332, "grad_norm": 0.4401511549949646, "learning_rate": 0.0001, "loss": 1.4877, "step": 7807 }, { "epoch": 0.8969042559301591, "grad_norm": 0.5011132955551147, "learning_rate": 0.0001, "loss": 1.6072, "step": 7808 }, { "epoch": 0.8970191258399862, "grad_norm": 0.4679315984249115, "learning_rate": 0.0001, "loss": 1.3788, "step": 7809 }, { "epoch": 0.8971339957498133, "grad_norm": 0.487979918718338, "learning_rate": 0.0001, "loss": 1.5793, "step": 7810 }, { "epoch": 0.8972488656596405, "grad_norm": 0.4689542353153229, "learning_rate": 0.0001, "loss": 1.5487, "step": 7811 }, { "epoch": 0.8973637355694676, "grad_norm": 0.49013927578926086, "learning_rate": 0.0001, "loss": 1.8521, "step": 7812 }, { "epoch": 0.8974786054792947, "grad_norm": 0.4543326497077942, "learning_rate": 0.0001, "loss": 1.5612, "step": 7813 }, { "epoch": 0.8975934753891218, "grad_norm": 0.4831598103046417, "learning_rate": 0.0001, "loss": 1.619, "step": 7814 }, { "epoch": 0.8977083452989489, "grad_norm": 0.49325430393218994, "learning_rate": 0.0001, "loss": 1.6324, "step": 7815 }, { "epoch": 0.8978232152087761, "grad_norm": 0.5205132365226746, "learning_rate": 0.0001, "loss": 1.5647, "step": 7816 }, { "epoch": 0.8979380851186032, "grad_norm": 0.485248327255249, "learning_rate": 0.0001, "loss": 1.5955, "step": 7817 }, { "epoch": 0.8980529550284303, "grad_norm": 0.467001736164093, "learning_rate": 0.0001, "loss": 1.5915, "step": 7818 }, { "epoch": 0.8981678249382574, "grad_norm": 0.519828200340271, "learning_rate": 0.0001, "loss": 1.6739, "step": 7819 }, { "epoch": 0.8982826948480845, "grad_norm": 0.4926901161670685, "learning_rate": 0.0001, "loss": 1.7966, "step": 7820 }, { "epoch": 0.8983975647579117, "grad_norm": 0.4958095848560333, "learning_rate": 0.0001, "loss": 1.6828, "step": 7821 }, { "epoch": 0.8985124346677388, "grad_norm": 0.4742107093334198, "learning_rate": 0.0001, "loss": 1.6398, "step": 7822 }, { "epoch": 0.8986273045775659, "grad_norm": 0.48758435249328613, "learning_rate": 0.0001, "loss": 1.7207, "step": 7823 }, { "epoch": 0.898742174487393, "grad_norm": 0.48550042510032654, "learning_rate": 0.0001, "loss": 1.6212, "step": 7824 }, { "epoch": 0.8988570443972201, "grad_norm": 0.5684822201728821, "learning_rate": 0.0001, "loss": 2.0053, "step": 7825 }, { "epoch": 0.8989719143070473, "grad_norm": 0.4416176378726959, "learning_rate": 0.0001, "loss": 1.4682, "step": 7826 }, { "epoch": 0.8990867842168744, "grad_norm": 0.44723203778266907, "learning_rate": 0.0001, "loss": 1.493, "step": 7827 }, { "epoch": 0.8992016541267015, "grad_norm": 0.4821613132953644, "learning_rate": 0.0001, "loss": 1.6573, "step": 7828 }, { "epoch": 0.8993165240365286, "grad_norm": 0.4443539083003998, "learning_rate": 0.0001, "loss": 1.4682, "step": 7829 }, { "epoch": 0.8994313939463557, "grad_norm": 0.5018233060836792, "learning_rate": 0.0001, "loss": 1.7302, "step": 7830 }, { "epoch": 0.8995462638561829, "grad_norm": 0.4475822448730469, "learning_rate": 0.0001, "loss": 1.5244, "step": 7831 }, { "epoch": 0.89966113376601, "grad_norm": 0.4100770652294159, "learning_rate": 0.0001, "loss": 1.3041, "step": 7832 }, { "epoch": 0.8997760036758371, "grad_norm": 0.5303254723548889, "learning_rate": 0.0001, "loss": 1.9058, "step": 7833 }, { "epoch": 0.8998908735856642, "grad_norm": 0.4729917645454407, "learning_rate": 0.0001, "loss": 1.4515, "step": 7834 }, { "epoch": 0.9000057434954913, "grad_norm": 0.45676717162132263, "learning_rate": 0.0001, "loss": 1.4337, "step": 7835 }, { "epoch": 0.9001206134053185, "grad_norm": 0.4548957943916321, "learning_rate": 0.0001, "loss": 1.5653, "step": 7836 }, { "epoch": 0.9002354833151456, "grad_norm": 0.46776890754699707, "learning_rate": 0.0001, "loss": 1.5995, "step": 7837 }, { "epoch": 0.9003503532249727, "grad_norm": 0.8313576579093933, "learning_rate": 0.0001, "loss": 1.6672, "step": 7838 }, { "epoch": 0.9004652231347998, "grad_norm": 0.45772501826286316, "learning_rate": 0.0001, "loss": 1.5468, "step": 7839 }, { "epoch": 0.9005800930446269, "grad_norm": 0.4540563225746155, "learning_rate": 0.0001, "loss": 1.6791, "step": 7840 }, { "epoch": 0.9006949629544541, "grad_norm": 0.43928417563438416, "learning_rate": 0.0001, "loss": 1.4581, "step": 7841 }, { "epoch": 0.9008098328642812, "grad_norm": 0.46283161640167236, "learning_rate": 0.0001, "loss": 1.4749, "step": 7842 }, { "epoch": 0.9009247027741083, "grad_norm": 0.49827006459236145, "learning_rate": 0.0001, "loss": 1.6633, "step": 7843 }, { "epoch": 0.9010395726839354, "grad_norm": 0.48653677105903625, "learning_rate": 0.0001, "loss": 1.5447, "step": 7844 }, { "epoch": 0.9011544425937625, "grad_norm": 0.5136744976043701, "learning_rate": 0.0001, "loss": 1.5334, "step": 7845 }, { "epoch": 0.9012693125035897, "grad_norm": 0.4694797694683075, "learning_rate": 0.0001, "loss": 1.5367, "step": 7846 }, { "epoch": 0.9013841824134168, "grad_norm": 0.47298017144203186, "learning_rate": 0.0001, "loss": 1.4799, "step": 7847 }, { "epoch": 0.9014990523232439, "grad_norm": 0.46906399726867676, "learning_rate": 0.0001, "loss": 1.6518, "step": 7848 }, { "epoch": 0.901613922233071, "grad_norm": 0.473000168800354, "learning_rate": 0.0001, "loss": 1.6285, "step": 7849 }, { "epoch": 0.9017287921428981, "grad_norm": 0.462663471698761, "learning_rate": 0.0001, "loss": 1.7218, "step": 7850 }, { "epoch": 0.9018436620527253, "grad_norm": 0.48268601298332214, "learning_rate": 0.0001, "loss": 1.4941, "step": 7851 }, { "epoch": 0.9019585319625524, "grad_norm": 0.4828319251537323, "learning_rate": 0.0001, "loss": 1.6787, "step": 7852 }, { "epoch": 0.9020734018723795, "grad_norm": 0.5251330733299255, "learning_rate": 0.0001, "loss": 1.8425, "step": 7853 }, { "epoch": 0.9021882717822066, "grad_norm": 0.478097140789032, "learning_rate": 0.0001, "loss": 1.4669, "step": 7854 }, { "epoch": 0.9023031416920337, "grad_norm": 0.4791843891143799, "learning_rate": 0.0001, "loss": 1.4991, "step": 7855 }, { "epoch": 0.9024180116018609, "grad_norm": 0.4416709244251251, "learning_rate": 0.0001, "loss": 1.2615, "step": 7856 }, { "epoch": 0.902532881511688, "grad_norm": 0.4547010362148285, "learning_rate": 0.0001, "loss": 1.6578, "step": 7857 }, { "epoch": 0.9026477514215151, "grad_norm": 0.47191575169563293, "learning_rate": 0.0001, "loss": 1.6771, "step": 7858 }, { "epoch": 0.9027626213313422, "grad_norm": 0.4907677173614502, "learning_rate": 0.0001, "loss": 1.6336, "step": 7859 }, { "epoch": 0.9028774912411693, "grad_norm": 0.4836212396621704, "learning_rate": 0.0001, "loss": 1.5876, "step": 7860 }, { "epoch": 0.9029923611509965, "grad_norm": 0.46854549646377563, "learning_rate": 0.0001, "loss": 1.7531, "step": 7861 }, { "epoch": 0.9031072310608236, "grad_norm": 0.45791977643966675, "learning_rate": 0.0001, "loss": 1.6377, "step": 7862 }, { "epoch": 0.9032221009706507, "grad_norm": 0.4522063732147217, "learning_rate": 0.0001, "loss": 1.6546, "step": 7863 }, { "epoch": 0.9033369708804778, "grad_norm": 0.4975827932357788, "learning_rate": 0.0001, "loss": 1.464, "step": 7864 }, { "epoch": 0.9034518407903049, "grad_norm": 0.450082927942276, "learning_rate": 0.0001, "loss": 1.69, "step": 7865 }, { "epoch": 0.9035667107001321, "grad_norm": 0.4922288656234741, "learning_rate": 0.0001, "loss": 1.6362, "step": 7866 }, { "epoch": 0.9036815806099592, "grad_norm": 0.4563447833061218, "learning_rate": 0.0001, "loss": 1.6167, "step": 7867 }, { "epoch": 0.9037964505197863, "grad_norm": 0.466033399105072, "learning_rate": 0.0001, "loss": 1.5822, "step": 7868 }, { "epoch": 0.9039113204296134, "grad_norm": 0.48967087268829346, "learning_rate": 0.0001, "loss": 1.6251, "step": 7869 }, { "epoch": 0.9040261903394405, "grad_norm": 0.4684128165245056, "learning_rate": 0.0001, "loss": 1.61, "step": 7870 }, { "epoch": 0.9041410602492677, "grad_norm": 0.5109823942184448, "learning_rate": 0.0001, "loss": 1.6527, "step": 7871 }, { "epoch": 0.9042559301590948, "grad_norm": 0.46000662446022034, "learning_rate": 0.0001, "loss": 1.6549, "step": 7872 }, { "epoch": 0.9043708000689219, "grad_norm": 0.5103839039802551, "learning_rate": 0.0001, "loss": 1.5844, "step": 7873 }, { "epoch": 0.904485669978749, "grad_norm": 0.47217777371406555, "learning_rate": 0.0001, "loss": 1.6014, "step": 7874 }, { "epoch": 0.9046005398885761, "grad_norm": 0.5040363073348999, "learning_rate": 0.0001, "loss": 1.6295, "step": 7875 }, { "epoch": 0.9047154097984033, "grad_norm": 0.4671007990837097, "learning_rate": 0.0001, "loss": 1.4647, "step": 7876 }, { "epoch": 0.9048302797082304, "grad_norm": 0.467616468667984, "learning_rate": 0.0001, "loss": 1.5478, "step": 7877 }, { "epoch": 0.9049451496180575, "grad_norm": 0.4528937339782715, "learning_rate": 0.0001, "loss": 1.4716, "step": 7878 }, { "epoch": 0.9050600195278846, "grad_norm": 0.4835106432437897, "learning_rate": 0.0001, "loss": 1.6353, "step": 7879 }, { "epoch": 0.9051748894377117, "grad_norm": 0.4774007201194763, "learning_rate": 0.0001, "loss": 1.619, "step": 7880 }, { "epoch": 0.9052897593475389, "grad_norm": 0.4768272340297699, "learning_rate": 0.0001, "loss": 1.4522, "step": 7881 }, { "epoch": 0.905404629257366, "grad_norm": 0.4725984036922455, "learning_rate": 0.0001, "loss": 1.5499, "step": 7882 }, { "epoch": 0.9055194991671932, "grad_norm": 0.44116008281707764, "learning_rate": 0.0001, "loss": 1.3919, "step": 7883 }, { "epoch": 0.9056343690770203, "grad_norm": 0.4748861789703369, "learning_rate": 0.0001, "loss": 1.6583, "step": 7884 }, { "epoch": 0.9057492389868474, "grad_norm": 0.4855034053325653, "learning_rate": 0.0001, "loss": 1.6794, "step": 7885 }, { "epoch": 0.9058641088966746, "grad_norm": 0.4543818533420563, "learning_rate": 0.0001, "loss": 1.5825, "step": 7886 }, { "epoch": 0.9059789788065017, "grad_norm": 0.529973566532135, "learning_rate": 0.0001, "loss": 1.9031, "step": 7887 }, { "epoch": 0.9060938487163288, "grad_norm": 0.445414662361145, "learning_rate": 0.0001, "loss": 1.4344, "step": 7888 }, { "epoch": 0.9062087186261559, "grad_norm": 0.5017836689949036, "learning_rate": 0.0001, "loss": 1.4926, "step": 7889 }, { "epoch": 0.906323588535983, "grad_norm": 0.4460631012916565, "learning_rate": 0.0001, "loss": 1.4937, "step": 7890 }, { "epoch": 0.9064384584458102, "grad_norm": 0.4223814904689789, "learning_rate": 0.0001, "loss": 1.2901, "step": 7891 }, { "epoch": 0.9065533283556373, "grad_norm": 0.4735008478164673, "learning_rate": 0.0001, "loss": 1.6393, "step": 7892 }, { "epoch": 0.9066681982654644, "grad_norm": 0.4891226589679718, "learning_rate": 0.0001, "loss": 1.6429, "step": 7893 }, { "epoch": 0.9067830681752915, "grad_norm": 0.49529507756233215, "learning_rate": 0.0001, "loss": 1.4392, "step": 7894 }, { "epoch": 0.9068979380851186, "grad_norm": 0.4905736446380615, "learning_rate": 0.0001, "loss": 1.5082, "step": 7895 }, { "epoch": 0.9070128079949458, "grad_norm": 0.53058922290802, "learning_rate": 0.0001, "loss": 1.4193, "step": 7896 }, { "epoch": 0.9071276779047729, "grad_norm": 0.42824816703796387, "learning_rate": 0.0001, "loss": 1.4727, "step": 7897 }, { "epoch": 0.9072425478146, "grad_norm": 0.4656597673892975, "learning_rate": 0.0001, "loss": 1.5861, "step": 7898 }, { "epoch": 0.9073574177244271, "grad_norm": 0.5175133347511292, "learning_rate": 0.0001, "loss": 1.7236, "step": 7899 }, { "epoch": 0.9074722876342542, "grad_norm": 0.4708557426929474, "learning_rate": 0.0001, "loss": 1.6186, "step": 7900 }, { "epoch": 0.9075871575440814, "grad_norm": 0.49528563022613525, "learning_rate": 0.0001, "loss": 1.5486, "step": 7901 }, { "epoch": 0.9077020274539085, "grad_norm": 0.45954427123069763, "learning_rate": 0.0001, "loss": 1.4233, "step": 7902 }, { "epoch": 0.9078168973637356, "grad_norm": 0.48827245831489563, "learning_rate": 0.0001, "loss": 1.5515, "step": 7903 }, { "epoch": 0.9079317672735627, "grad_norm": 0.4947773218154907, "learning_rate": 0.0001, "loss": 1.6213, "step": 7904 }, { "epoch": 0.9080466371833898, "grad_norm": 0.4761560261249542, "learning_rate": 0.0001, "loss": 1.5405, "step": 7905 }, { "epoch": 0.908161507093217, "grad_norm": 0.48969805240631104, "learning_rate": 0.0001, "loss": 1.5753, "step": 7906 }, { "epoch": 0.9082763770030441, "grad_norm": 0.49315011501312256, "learning_rate": 0.0001, "loss": 1.6573, "step": 7907 }, { "epoch": 0.9083912469128712, "grad_norm": 0.4530597925186157, "learning_rate": 0.0001, "loss": 1.5737, "step": 7908 }, { "epoch": 0.9085061168226983, "grad_norm": 0.4942728877067566, "learning_rate": 0.0001, "loss": 1.7064, "step": 7909 }, { "epoch": 0.9086209867325254, "grad_norm": 0.44348907470703125, "learning_rate": 0.0001, "loss": 1.506, "step": 7910 }, { "epoch": 0.9087358566423526, "grad_norm": 0.4613859951496124, "learning_rate": 0.0001, "loss": 1.5753, "step": 7911 }, { "epoch": 0.9088507265521797, "grad_norm": 0.46956050395965576, "learning_rate": 0.0001, "loss": 1.2878, "step": 7912 }, { "epoch": 0.9089655964620068, "grad_norm": 0.5553674697875977, "learning_rate": 0.0001, "loss": 1.8513, "step": 7913 }, { "epoch": 0.9090804663718339, "grad_norm": 0.48194214701652527, "learning_rate": 0.0001, "loss": 1.3442, "step": 7914 }, { "epoch": 0.909195336281661, "grad_norm": 0.4378836750984192, "learning_rate": 0.0001, "loss": 1.4797, "step": 7915 }, { "epoch": 0.9093102061914882, "grad_norm": 0.4518606662750244, "learning_rate": 0.0001, "loss": 1.5698, "step": 7916 }, { "epoch": 0.9094250761013153, "grad_norm": 0.46091845631599426, "learning_rate": 0.0001, "loss": 1.4956, "step": 7917 }, { "epoch": 0.9095399460111424, "grad_norm": 0.447496235370636, "learning_rate": 0.0001, "loss": 1.5605, "step": 7918 }, { "epoch": 0.9096548159209695, "grad_norm": 0.5359869599342346, "learning_rate": 0.0001, "loss": 1.6687, "step": 7919 }, { "epoch": 0.9097696858307966, "grad_norm": 0.5245752930641174, "learning_rate": 0.0001, "loss": 1.5827, "step": 7920 }, { "epoch": 0.9098845557406238, "grad_norm": 0.5289234519004822, "learning_rate": 0.0001, "loss": 1.6148, "step": 7921 }, { "epoch": 0.9099994256504509, "grad_norm": 0.49240848422050476, "learning_rate": 0.0001, "loss": 1.6205, "step": 7922 }, { "epoch": 0.910114295560278, "grad_norm": 0.46257832646369934, "learning_rate": 0.0001, "loss": 1.623, "step": 7923 }, { "epoch": 0.9102291654701051, "grad_norm": 0.5155847072601318, "learning_rate": 0.0001, "loss": 1.6578, "step": 7924 }, { "epoch": 0.9103440353799322, "grad_norm": 0.5221872925758362, "learning_rate": 0.0001, "loss": 1.65, "step": 7925 }, { "epoch": 0.9104589052897594, "grad_norm": 0.4366224408149719, "learning_rate": 0.0001, "loss": 1.5788, "step": 7926 }, { "epoch": 0.9105737751995865, "grad_norm": 0.48819804191589355, "learning_rate": 0.0001, "loss": 1.6978, "step": 7927 }, { "epoch": 0.9106886451094136, "grad_norm": 0.4560621678829193, "learning_rate": 0.0001, "loss": 1.4342, "step": 7928 }, { "epoch": 0.9108035150192407, "grad_norm": 0.48248445987701416, "learning_rate": 0.0001, "loss": 1.7123, "step": 7929 }, { "epoch": 0.9109183849290678, "grad_norm": 0.4500480890274048, "learning_rate": 0.0001, "loss": 1.5309, "step": 7930 }, { "epoch": 0.911033254838895, "grad_norm": 0.52225661277771, "learning_rate": 0.0001, "loss": 1.6317, "step": 7931 }, { "epoch": 0.9111481247487221, "grad_norm": 0.5065485239028931, "learning_rate": 0.0001, "loss": 1.3653, "step": 7932 }, { "epoch": 0.9112629946585492, "grad_norm": 0.5204269886016846, "learning_rate": 0.0001, "loss": 1.7194, "step": 7933 }, { "epoch": 0.9113778645683763, "grad_norm": 0.5047922134399414, "learning_rate": 0.0001, "loss": 1.6934, "step": 7934 }, { "epoch": 0.9114927344782034, "grad_norm": 0.4803232252597809, "learning_rate": 0.0001, "loss": 1.7358, "step": 7935 }, { "epoch": 0.9116076043880306, "grad_norm": 0.5010977983474731, "learning_rate": 0.0001, "loss": 1.6251, "step": 7936 }, { "epoch": 0.9117224742978577, "grad_norm": 0.4615200459957123, "learning_rate": 0.0001, "loss": 1.5255, "step": 7937 }, { "epoch": 0.9118373442076848, "grad_norm": 0.4917146861553192, "learning_rate": 0.0001, "loss": 1.5949, "step": 7938 }, { "epoch": 0.9119522141175119, "grad_norm": 0.45587408542633057, "learning_rate": 0.0001, "loss": 1.5541, "step": 7939 }, { "epoch": 0.912067084027339, "grad_norm": 0.4592360556125641, "learning_rate": 0.0001, "loss": 1.5118, "step": 7940 }, { "epoch": 0.9121819539371662, "grad_norm": 0.49223262071609497, "learning_rate": 0.0001, "loss": 1.6489, "step": 7941 }, { "epoch": 0.9122968238469933, "grad_norm": 0.46011883020401, "learning_rate": 0.0001, "loss": 1.3733, "step": 7942 }, { "epoch": 0.9124116937568204, "grad_norm": 0.49059078097343445, "learning_rate": 0.0001, "loss": 1.7346, "step": 7943 }, { "epoch": 0.9125265636666475, "grad_norm": 0.472307026386261, "learning_rate": 0.0001, "loss": 1.5103, "step": 7944 }, { "epoch": 0.9126414335764746, "grad_norm": 0.48213303089141846, "learning_rate": 0.0001, "loss": 1.5634, "step": 7945 }, { "epoch": 0.9127563034863018, "grad_norm": 0.5104955434799194, "learning_rate": 0.0001, "loss": 1.8161, "step": 7946 }, { "epoch": 0.9128711733961289, "grad_norm": 0.4811389446258545, "learning_rate": 0.0001, "loss": 1.5841, "step": 7947 }, { "epoch": 0.912986043305956, "grad_norm": 0.4906313419342041, "learning_rate": 0.0001, "loss": 1.7777, "step": 7948 }, { "epoch": 0.9131009132157831, "grad_norm": 0.45326316356658936, "learning_rate": 0.0001, "loss": 1.6841, "step": 7949 }, { "epoch": 0.9132157831256102, "grad_norm": 0.41644978523254395, "learning_rate": 0.0001, "loss": 1.306, "step": 7950 }, { "epoch": 0.9133306530354374, "grad_norm": 0.432559996843338, "learning_rate": 0.0001, "loss": 1.3399, "step": 7951 }, { "epoch": 0.9134455229452645, "grad_norm": 0.4607470631599426, "learning_rate": 0.0001, "loss": 1.6326, "step": 7952 }, { "epoch": 0.9135603928550916, "grad_norm": 0.5131176710128784, "learning_rate": 0.0001, "loss": 1.7356, "step": 7953 }, { "epoch": 0.9136752627649187, "grad_norm": 0.4790054261684418, "learning_rate": 0.0001, "loss": 1.6153, "step": 7954 }, { "epoch": 0.9137901326747458, "grad_norm": 0.4506865441799164, "learning_rate": 0.0001, "loss": 1.5772, "step": 7955 }, { "epoch": 0.913905002584573, "grad_norm": 0.44508710503578186, "learning_rate": 0.0001, "loss": 1.5731, "step": 7956 }, { "epoch": 0.9140198724944001, "grad_norm": 0.43661433458328247, "learning_rate": 0.0001, "loss": 1.531, "step": 7957 }, { "epoch": 0.9141347424042272, "grad_norm": 0.456387460231781, "learning_rate": 0.0001, "loss": 1.6312, "step": 7958 }, { "epoch": 0.9142496123140543, "grad_norm": 0.5062097311019897, "learning_rate": 0.0001, "loss": 1.7662, "step": 7959 }, { "epoch": 0.9143644822238814, "grad_norm": 0.4519118070602417, "learning_rate": 0.0001, "loss": 1.5057, "step": 7960 }, { "epoch": 0.9144793521337086, "grad_norm": 0.4753338098526001, "learning_rate": 0.0001, "loss": 1.5305, "step": 7961 }, { "epoch": 0.9145942220435357, "grad_norm": 0.44463813304901123, "learning_rate": 0.0001, "loss": 1.4873, "step": 7962 }, { "epoch": 0.9147090919533628, "grad_norm": 0.4657713770866394, "learning_rate": 0.0001, "loss": 1.5608, "step": 7963 }, { "epoch": 0.9148239618631899, "grad_norm": 0.47514888644218445, "learning_rate": 0.0001, "loss": 1.4934, "step": 7964 }, { "epoch": 0.914938831773017, "grad_norm": 0.5030425786972046, "learning_rate": 0.0001, "loss": 1.6392, "step": 7965 }, { "epoch": 0.9150537016828442, "grad_norm": 0.49183422327041626, "learning_rate": 0.0001, "loss": 1.6318, "step": 7966 }, { "epoch": 0.9151685715926713, "grad_norm": 0.4909827709197998, "learning_rate": 0.0001, "loss": 1.607, "step": 7967 }, { "epoch": 0.9152834415024984, "grad_norm": 0.5114017724990845, "learning_rate": 0.0001, "loss": 1.7051, "step": 7968 }, { "epoch": 0.9153983114123255, "grad_norm": 0.5097264647483826, "learning_rate": 0.0001, "loss": 1.7322, "step": 7969 }, { "epoch": 0.9155131813221526, "grad_norm": 0.4767652750015259, "learning_rate": 0.0001, "loss": 1.4866, "step": 7970 }, { "epoch": 0.9156280512319798, "grad_norm": 0.4586257338523865, "learning_rate": 0.0001, "loss": 1.5762, "step": 7971 }, { "epoch": 0.9157429211418069, "grad_norm": 0.4332074224948883, "learning_rate": 0.0001, "loss": 1.3948, "step": 7972 }, { "epoch": 0.915857791051634, "grad_norm": 0.48241952061653137, "learning_rate": 0.0001, "loss": 1.5413, "step": 7973 }, { "epoch": 0.9159726609614611, "grad_norm": 0.5874638557434082, "learning_rate": 0.0001, "loss": 1.895, "step": 7974 }, { "epoch": 0.9160875308712882, "grad_norm": 0.4532858431339264, "learning_rate": 0.0001, "loss": 1.6298, "step": 7975 }, { "epoch": 0.9162024007811154, "grad_norm": 0.47365015745162964, "learning_rate": 0.0001, "loss": 1.6582, "step": 7976 }, { "epoch": 0.9163172706909425, "grad_norm": 0.5175490975379944, "learning_rate": 0.0001, "loss": 1.5293, "step": 7977 }, { "epoch": 0.9164321406007696, "grad_norm": 0.47895553708076477, "learning_rate": 0.0001, "loss": 1.608, "step": 7978 }, { "epoch": 0.9165470105105967, "grad_norm": 0.4826950132846832, "learning_rate": 0.0001, "loss": 1.5567, "step": 7979 }, { "epoch": 0.9166618804204238, "grad_norm": 0.4933220148086548, "learning_rate": 0.0001, "loss": 1.5636, "step": 7980 }, { "epoch": 0.916776750330251, "grad_norm": 0.47146233916282654, "learning_rate": 0.0001, "loss": 1.7157, "step": 7981 }, { "epoch": 0.9168916202400781, "grad_norm": 0.4537650942802429, "learning_rate": 0.0001, "loss": 1.5842, "step": 7982 }, { "epoch": 0.9170064901499052, "grad_norm": 0.5133910179138184, "learning_rate": 0.0001, "loss": 1.75, "step": 7983 }, { "epoch": 0.9171213600597323, "grad_norm": 0.4584057927131653, "learning_rate": 0.0001, "loss": 1.5925, "step": 7984 }, { "epoch": 0.9172362299695594, "grad_norm": 0.4383944571018219, "learning_rate": 0.0001, "loss": 1.4504, "step": 7985 }, { "epoch": 0.9173510998793866, "grad_norm": 0.482051819562912, "learning_rate": 0.0001, "loss": 1.4079, "step": 7986 }, { "epoch": 0.9174659697892137, "grad_norm": 0.4862101078033447, "learning_rate": 0.0001, "loss": 1.734, "step": 7987 }, { "epoch": 0.9175808396990408, "grad_norm": 0.46888962388038635, "learning_rate": 0.0001, "loss": 1.6339, "step": 7988 }, { "epoch": 0.9176957096088679, "grad_norm": 0.4773329198360443, "learning_rate": 0.0001, "loss": 1.5293, "step": 7989 }, { "epoch": 0.917810579518695, "grad_norm": 0.5123506784439087, "learning_rate": 0.0001, "loss": 1.4992, "step": 7990 }, { "epoch": 0.9179254494285222, "grad_norm": 0.4806014895439148, "learning_rate": 0.0001, "loss": 1.5878, "step": 7991 }, { "epoch": 0.9180403193383493, "grad_norm": 0.45858410000801086, "learning_rate": 0.0001, "loss": 1.5983, "step": 7992 }, { "epoch": 0.9181551892481764, "grad_norm": 0.5545932650566101, "learning_rate": 0.0001, "loss": 1.6907, "step": 7993 }, { "epoch": 0.9182700591580035, "grad_norm": 0.4767701327800751, "learning_rate": 0.0001, "loss": 1.6551, "step": 7994 }, { "epoch": 0.9183849290678306, "grad_norm": 0.5300646424293518, "learning_rate": 0.0001, "loss": 1.4472, "step": 7995 }, { "epoch": 0.9184997989776578, "grad_norm": 0.4707062244415283, "learning_rate": 0.0001, "loss": 1.6684, "step": 7996 }, { "epoch": 0.9186146688874849, "grad_norm": 0.47407713532447815, "learning_rate": 0.0001, "loss": 1.5561, "step": 7997 }, { "epoch": 0.918729538797312, "grad_norm": 0.4721267521381378, "learning_rate": 0.0001, "loss": 1.6156, "step": 7998 }, { "epoch": 0.9188444087071391, "grad_norm": 0.48417842388153076, "learning_rate": 0.0001, "loss": 1.5445, "step": 7999 }, { "epoch": 0.9189592786169662, "grad_norm": 0.4994370639324188, "learning_rate": 0.0001, "loss": 1.786, "step": 8000 }, { "epoch": 0.9190741485267934, "grad_norm": 0.5283456444740295, "learning_rate": 0.0001, "loss": 1.8096, "step": 8001 }, { "epoch": 0.9191890184366205, "grad_norm": 0.4809178411960602, "learning_rate": 0.0001, "loss": 1.5805, "step": 8002 }, { "epoch": 0.9193038883464476, "grad_norm": 0.5179066061973572, "learning_rate": 0.0001, "loss": 1.6071, "step": 8003 }, { "epoch": 0.9194187582562747, "grad_norm": 0.5334687829017639, "learning_rate": 0.0001, "loss": 1.759, "step": 8004 }, { "epoch": 0.9195336281661018, "grad_norm": 0.4858127236366272, "learning_rate": 0.0001, "loss": 1.6873, "step": 8005 }, { "epoch": 0.919648498075929, "grad_norm": 0.47769853472709656, "learning_rate": 0.0001, "loss": 1.4748, "step": 8006 }, { "epoch": 0.9197633679857561, "grad_norm": 0.5166919827461243, "learning_rate": 0.0001, "loss": 1.7652, "step": 8007 }, { "epoch": 0.9198782378955832, "grad_norm": 0.45674726366996765, "learning_rate": 0.0001, "loss": 1.6726, "step": 8008 }, { "epoch": 0.9199931078054103, "grad_norm": 0.45661020278930664, "learning_rate": 0.0001, "loss": 1.55, "step": 8009 }, { "epoch": 0.9201079777152374, "grad_norm": 0.5638238787651062, "learning_rate": 0.0001, "loss": 1.9338, "step": 8010 }, { "epoch": 0.9202228476250646, "grad_norm": 0.4804632067680359, "learning_rate": 0.0001, "loss": 1.695, "step": 8011 }, { "epoch": 0.9203377175348917, "grad_norm": 0.45596975088119507, "learning_rate": 0.0001, "loss": 1.4805, "step": 8012 }, { "epoch": 0.9204525874447188, "grad_norm": 0.47516369819641113, "learning_rate": 0.0001, "loss": 1.6081, "step": 8013 }, { "epoch": 0.9205674573545459, "grad_norm": 0.47999808192253113, "learning_rate": 0.0001, "loss": 1.6079, "step": 8014 }, { "epoch": 0.920682327264373, "grad_norm": 0.4690406918525696, "learning_rate": 0.0001, "loss": 1.6159, "step": 8015 }, { "epoch": 0.9207971971742002, "grad_norm": 0.4539242088794708, "learning_rate": 0.0001, "loss": 1.598, "step": 8016 }, { "epoch": 0.9209120670840273, "grad_norm": 0.4828774333000183, "learning_rate": 0.0001, "loss": 1.5996, "step": 8017 }, { "epoch": 0.9210269369938544, "grad_norm": 0.46138039231300354, "learning_rate": 0.0001, "loss": 1.6745, "step": 8018 }, { "epoch": 0.9211418069036815, "grad_norm": 0.5137259364128113, "learning_rate": 0.0001, "loss": 1.7913, "step": 8019 }, { "epoch": 0.9212566768135086, "grad_norm": 0.4621741771697998, "learning_rate": 0.0001, "loss": 1.5345, "step": 8020 }, { "epoch": 0.9213715467233359, "grad_norm": 0.4695769250392914, "learning_rate": 0.0001, "loss": 1.6416, "step": 8021 }, { "epoch": 0.921486416633163, "grad_norm": 0.4522899389266968, "learning_rate": 0.0001, "loss": 1.6889, "step": 8022 }, { "epoch": 0.9216012865429901, "grad_norm": 0.4740762710571289, "learning_rate": 0.0001, "loss": 1.5799, "step": 8023 }, { "epoch": 0.9217161564528172, "grad_norm": 0.4992024302482605, "learning_rate": 0.0001, "loss": 1.5142, "step": 8024 }, { "epoch": 0.9218310263626444, "grad_norm": 0.4674678444862366, "learning_rate": 0.0001, "loss": 1.6359, "step": 8025 }, { "epoch": 0.9219458962724715, "grad_norm": 0.5612513422966003, "learning_rate": 0.0001, "loss": 1.715, "step": 8026 }, { "epoch": 0.9220607661822986, "grad_norm": 0.4955098330974579, "learning_rate": 0.0001, "loss": 1.6685, "step": 8027 }, { "epoch": 0.9221756360921257, "grad_norm": 0.47574183344841003, "learning_rate": 0.0001, "loss": 1.5452, "step": 8028 }, { "epoch": 0.9222905060019528, "grad_norm": 0.4642239212989807, "learning_rate": 0.0001, "loss": 1.6651, "step": 8029 }, { "epoch": 0.92240537591178, "grad_norm": 0.473645955324173, "learning_rate": 0.0001, "loss": 1.6189, "step": 8030 }, { "epoch": 0.9225202458216071, "grad_norm": 0.4713420569896698, "learning_rate": 0.0001, "loss": 1.6377, "step": 8031 }, { "epoch": 0.9226351157314342, "grad_norm": 0.44111305475234985, "learning_rate": 0.0001, "loss": 1.4193, "step": 8032 }, { "epoch": 0.9227499856412613, "grad_norm": 0.4585154354572296, "learning_rate": 0.0001, "loss": 1.5419, "step": 8033 }, { "epoch": 0.9228648555510884, "grad_norm": 0.5225629210472107, "learning_rate": 0.0001, "loss": 1.8615, "step": 8034 }, { "epoch": 0.9229797254609156, "grad_norm": 0.43736404180526733, "learning_rate": 0.0001, "loss": 1.6029, "step": 8035 }, { "epoch": 0.9230945953707427, "grad_norm": 0.4762333333492279, "learning_rate": 0.0001, "loss": 1.6124, "step": 8036 }, { "epoch": 0.9232094652805698, "grad_norm": 0.47877663373947144, "learning_rate": 0.0001, "loss": 1.7119, "step": 8037 }, { "epoch": 0.9233243351903969, "grad_norm": 0.47204989194869995, "learning_rate": 0.0001, "loss": 1.5251, "step": 8038 }, { "epoch": 0.923439205100224, "grad_norm": 0.4415188133716583, "learning_rate": 0.0001, "loss": 1.6022, "step": 8039 }, { "epoch": 0.9235540750100512, "grad_norm": 0.4474656879901886, "learning_rate": 0.0001, "loss": 1.4882, "step": 8040 }, { "epoch": 0.9236689449198783, "grad_norm": 0.4732295274734497, "learning_rate": 0.0001, "loss": 1.3283, "step": 8041 }, { "epoch": 0.9237838148297054, "grad_norm": 0.5105409026145935, "learning_rate": 0.0001, "loss": 1.7868, "step": 8042 }, { "epoch": 0.9238986847395325, "grad_norm": 0.48805803060531616, "learning_rate": 0.0001, "loss": 1.3698, "step": 8043 }, { "epoch": 0.9240135546493596, "grad_norm": 0.5286892652511597, "learning_rate": 0.0001, "loss": 1.5557, "step": 8044 }, { "epoch": 0.9241284245591868, "grad_norm": 0.5003154873847961, "learning_rate": 0.0001, "loss": 1.5898, "step": 8045 }, { "epoch": 0.9242432944690139, "grad_norm": 0.503452718257904, "learning_rate": 0.0001, "loss": 1.2122, "step": 8046 }, { "epoch": 0.924358164378841, "grad_norm": 0.46282196044921875, "learning_rate": 0.0001, "loss": 1.4744, "step": 8047 }, { "epoch": 0.9244730342886681, "grad_norm": 0.48527947068214417, "learning_rate": 0.0001, "loss": 1.7213, "step": 8048 }, { "epoch": 0.9245879041984952, "grad_norm": 0.5114087462425232, "learning_rate": 0.0001, "loss": 1.8104, "step": 8049 }, { "epoch": 0.9247027741083224, "grad_norm": 0.5129145979881287, "learning_rate": 0.0001, "loss": 1.7043, "step": 8050 }, { "epoch": 0.9248176440181495, "grad_norm": 0.4763910174369812, "learning_rate": 0.0001, "loss": 1.7121, "step": 8051 }, { "epoch": 0.9249325139279766, "grad_norm": 0.47218838334083557, "learning_rate": 0.0001, "loss": 1.5667, "step": 8052 }, { "epoch": 0.9250473838378037, "grad_norm": 0.48110297322273254, "learning_rate": 0.0001, "loss": 1.4001, "step": 8053 }, { "epoch": 0.9251622537476308, "grad_norm": 0.4787732660770416, "learning_rate": 0.0001, "loss": 1.6857, "step": 8054 }, { "epoch": 0.925277123657458, "grad_norm": 0.48069849610328674, "learning_rate": 0.0001, "loss": 1.7946, "step": 8055 }, { "epoch": 0.9253919935672851, "grad_norm": 0.47537073493003845, "learning_rate": 0.0001, "loss": 1.6928, "step": 8056 }, { "epoch": 0.9255068634771122, "grad_norm": 0.5298829078674316, "learning_rate": 0.0001, "loss": 1.76, "step": 8057 }, { "epoch": 0.9256217333869393, "grad_norm": 0.5032019019126892, "learning_rate": 0.0001, "loss": 1.8279, "step": 8058 }, { "epoch": 0.9257366032967664, "grad_norm": 0.45885175466537476, "learning_rate": 0.0001, "loss": 1.5709, "step": 8059 }, { "epoch": 0.9258514732065936, "grad_norm": 0.5285278558731079, "learning_rate": 0.0001, "loss": 1.8198, "step": 8060 }, { "epoch": 0.9259663431164207, "grad_norm": 0.4382495880126953, "learning_rate": 0.0001, "loss": 1.3852, "step": 8061 }, { "epoch": 0.9260812130262478, "grad_norm": 0.47816163301467896, "learning_rate": 0.0001, "loss": 1.7725, "step": 8062 }, { "epoch": 0.9261960829360749, "grad_norm": 0.4544413685798645, "learning_rate": 0.0001, "loss": 1.4782, "step": 8063 }, { "epoch": 0.926310952845902, "grad_norm": 0.46362125873565674, "learning_rate": 0.0001, "loss": 1.583, "step": 8064 }, { "epoch": 0.9264258227557292, "grad_norm": 0.4957447052001953, "learning_rate": 0.0001, "loss": 1.5227, "step": 8065 }, { "epoch": 0.9265406926655563, "grad_norm": 0.4854178726673126, "learning_rate": 0.0001, "loss": 1.7782, "step": 8066 }, { "epoch": 0.9266555625753834, "grad_norm": 0.4702478051185608, "learning_rate": 0.0001, "loss": 1.5576, "step": 8067 }, { "epoch": 0.9267704324852105, "grad_norm": 0.5195414423942566, "learning_rate": 0.0001, "loss": 1.8032, "step": 8068 }, { "epoch": 0.9268853023950376, "grad_norm": 0.45996007323265076, "learning_rate": 0.0001, "loss": 1.3316, "step": 8069 }, { "epoch": 0.9270001723048648, "grad_norm": 0.482707679271698, "learning_rate": 0.0001, "loss": 1.7089, "step": 8070 }, { "epoch": 0.9271150422146919, "grad_norm": 0.4624692499637604, "learning_rate": 0.0001, "loss": 1.6492, "step": 8071 }, { "epoch": 0.927229912124519, "grad_norm": 0.46416664123535156, "learning_rate": 0.0001, "loss": 1.5141, "step": 8072 }, { "epoch": 0.9273447820343461, "grad_norm": 0.4962540566921234, "learning_rate": 0.0001, "loss": 1.6515, "step": 8073 }, { "epoch": 0.9274596519441732, "grad_norm": 0.4407590627670288, "learning_rate": 0.0001, "loss": 1.4663, "step": 8074 }, { "epoch": 0.9275745218540004, "grad_norm": 0.4883404076099396, "learning_rate": 0.0001, "loss": 1.5755, "step": 8075 }, { "epoch": 0.9276893917638275, "grad_norm": 0.46453699469566345, "learning_rate": 0.0001, "loss": 1.4894, "step": 8076 }, { "epoch": 0.9278042616736546, "grad_norm": 0.45241397619247437, "learning_rate": 0.0001, "loss": 1.5494, "step": 8077 }, { "epoch": 0.9279191315834817, "grad_norm": 0.48242220282554626, "learning_rate": 0.0001, "loss": 1.5236, "step": 8078 }, { "epoch": 0.9280340014933088, "grad_norm": 0.546755313873291, "learning_rate": 0.0001, "loss": 1.9116, "step": 8079 }, { "epoch": 0.928148871403136, "grad_norm": 0.4613960087299347, "learning_rate": 0.0001, "loss": 1.5672, "step": 8080 }, { "epoch": 0.9282637413129631, "grad_norm": 0.47544580698013306, "learning_rate": 0.0001, "loss": 1.5371, "step": 8081 }, { "epoch": 0.9283786112227902, "grad_norm": 0.5122746825218201, "learning_rate": 0.0001, "loss": 1.686, "step": 8082 }, { "epoch": 0.9284934811326173, "grad_norm": 0.47522497177124023, "learning_rate": 0.0001, "loss": 1.6247, "step": 8083 }, { "epoch": 0.9286083510424444, "grad_norm": 0.46367111802101135, "learning_rate": 0.0001, "loss": 1.4283, "step": 8084 }, { "epoch": 0.9287232209522716, "grad_norm": 0.5942444205284119, "learning_rate": 0.0001, "loss": 1.5347, "step": 8085 }, { "epoch": 0.9288380908620987, "grad_norm": 0.46138957142829895, "learning_rate": 0.0001, "loss": 1.7417, "step": 8086 }, { "epoch": 0.9289529607719258, "grad_norm": 0.5159793496131897, "learning_rate": 0.0001, "loss": 1.757, "step": 8087 }, { "epoch": 0.9290678306817529, "grad_norm": 0.48674991726875305, "learning_rate": 0.0001, "loss": 1.617, "step": 8088 }, { "epoch": 0.92918270059158, "grad_norm": 0.5087023973464966, "learning_rate": 0.0001, "loss": 1.8192, "step": 8089 }, { "epoch": 0.9292975705014072, "grad_norm": 0.5132076740264893, "learning_rate": 0.0001, "loss": 1.6679, "step": 8090 }, { "epoch": 0.9294124404112343, "grad_norm": 0.4683862030506134, "learning_rate": 0.0001, "loss": 1.6059, "step": 8091 }, { "epoch": 0.9295273103210614, "grad_norm": 0.5067598819732666, "learning_rate": 0.0001, "loss": 1.6037, "step": 8092 }, { "epoch": 0.9296421802308885, "grad_norm": 0.4644726514816284, "learning_rate": 0.0001, "loss": 1.6217, "step": 8093 }, { "epoch": 0.9297570501407156, "grad_norm": 0.44829845428466797, "learning_rate": 0.0001, "loss": 1.43, "step": 8094 }, { "epoch": 0.9298719200505428, "grad_norm": 0.5093761682510376, "learning_rate": 0.0001, "loss": 1.6939, "step": 8095 }, { "epoch": 0.9299867899603699, "grad_norm": 0.4932906925678253, "learning_rate": 0.0001, "loss": 1.7515, "step": 8096 }, { "epoch": 0.930101659870197, "grad_norm": 0.4766685366630554, "learning_rate": 0.0001, "loss": 1.6851, "step": 8097 }, { "epoch": 0.9302165297800241, "grad_norm": 0.46401628851890564, "learning_rate": 0.0001, "loss": 1.5306, "step": 8098 }, { "epoch": 0.9303313996898512, "grad_norm": 0.45654991269111633, "learning_rate": 0.0001, "loss": 1.4563, "step": 8099 }, { "epoch": 0.9304462695996784, "grad_norm": 0.46206626296043396, "learning_rate": 0.0001, "loss": 1.3484, "step": 8100 }, { "epoch": 0.9305611395095055, "grad_norm": 0.4578942060470581, "learning_rate": 0.0001, "loss": 1.3981, "step": 8101 }, { "epoch": 0.9306760094193326, "grad_norm": 0.4928850531578064, "learning_rate": 0.0001, "loss": 1.5062, "step": 8102 }, { "epoch": 0.9307908793291597, "grad_norm": 0.46304240822792053, "learning_rate": 0.0001, "loss": 1.5991, "step": 8103 }, { "epoch": 0.9309057492389868, "grad_norm": 0.43924397230148315, "learning_rate": 0.0001, "loss": 1.5359, "step": 8104 }, { "epoch": 0.931020619148814, "grad_norm": 0.5079434514045715, "learning_rate": 0.0001, "loss": 1.6138, "step": 8105 }, { "epoch": 0.9311354890586411, "grad_norm": 0.4720153510570526, "learning_rate": 0.0001, "loss": 1.7547, "step": 8106 }, { "epoch": 0.9312503589684682, "grad_norm": 0.4631122648715973, "learning_rate": 0.0001, "loss": 1.591, "step": 8107 }, { "epoch": 0.9313652288782953, "grad_norm": 0.4567249119281769, "learning_rate": 0.0001, "loss": 1.5734, "step": 8108 }, { "epoch": 0.9314800987881224, "grad_norm": 0.49746426939964294, "learning_rate": 0.0001, "loss": 1.6227, "step": 8109 }, { "epoch": 0.9315949686979496, "grad_norm": 0.47081831097602844, "learning_rate": 0.0001, "loss": 1.6083, "step": 8110 }, { "epoch": 0.9317098386077767, "grad_norm": 0.5293380618095398, "learning_rate": 0.0001, "loss": 1.6673, "step": 8111 }, { "epoch": 0.9318247085176038, "grad_norm": 0.47562193870544434, "learning_rate": 0.0001, "loss": 1.5876, "step": 8112 }, { "epoch": 0.9319395784274309, "grad_norm": 0.5127376317977905, "learning_rate": 0.0001, "loss": 1.6752, "step": 8113 }, { "epoch": 0.932054448337258, "grad_norm": 0.47655048966407776, "learning_rate": 0.0001, "loss": 1.5865, "step": 8114 }, { "epoch": 0.9321693182470852, "grad_norm": 0.4715765416622162, "learning_rate": 0.0001, "loss": 1.6537, "step": 8115 }, { "epoch": 0.9322841881569123, "grad_norm": 0.4677276313304901, "learning_rate": 0.0001, "loss": 1.5214, "step": 8116 }, { "epoch": 0.9323990580667394, "grad_norm": 0.4286912977695465, "learning_rate": 0.0001, "loss": 1.4507, "step": 8117 }, { "epoch": 0.9325139279765665, "grad_norm": 0.4614866375923157, "learning_rate": 0.0001, "loss": 1.6722, "step": 8118 }, { "epoch": 0.9326287978863936, "grad_norm": 0.44791609048843384, "learning_rate": 0.0001, "loss": 1.5154, "step": 8119 }, { "epoch": 0.9327436677962208, "grad_norm": 0.49002981185913086, "learning_rate": 0.0001, "loss": 1.7432, "step": 8120 }, { "epoch": 0.9328585377060479, "grad_norm": 0.4595596194267273, "learning_rate": 0.0001, "loss": 1.6082, "step": 8121 }, { "epoch": 0.932973407615875, "grad_norm": 0.47388172149658203, "learning_rate": 0.0001, "loss": 1.4072, "step": 8122 }, { "epoch": 0.9330882775257021, "grad_norm": 0.5274661183357239, "learning_rate": 0.0001, "loss": 1.7443, "step": 8123 }, { "epoch": 0.9332031474355292, "grad_norm": 0.5174868702888489, "learning_rate": 0.0001, "loss": 1.7705, "step": 8124 }, { "epoch": 0.9333180173453564, "grad_norm": 0.5129168629646301, "learning_rate": 0.0001, "loss": 1.6475, "step": 8125 }, { "epoch": 0.9334328872551835, "grad_norm": 0.4941151440143585, "learning_rate": 0.0001, "loss": 1.6202, "step": 8126 }, { "epoch": 0.9335477571650106, "grad_norm": 0.4885686933994293, "learning_rate": 0.0001, "loss": 1.7374, "step": 8127 }, { "epoch": 0.9336626270748377, "grad_norm": 0.47207513451576233, "learning_rate": 0.0001, "loss": 1.6066, "step": 8128 }, { "epoch": 0.9337774969846648, "grad_norm": 0.5630714893341064, "learning_rate": 0.0001, "loss": 2.0647, "step": 8129 }, { "epoch": 0.933892366894492, "grad_norm": 0.47757160663604736, "learning_rate": 0.0001, "loss": 1.6114, "step": 8130 }, { "epoch": 0.9340072368043191, "grad_norm": 0.4331253170967102, "learning_rate": 0.0001, "loss": 1.4655, "step": 8131 }, { "epoch": 0.9341221067141462, "grad_norm": 0.48484155535697937, "learning_rate": 0.0001, "loss": 1.6523, "step": 8132 }, { "epoch": 0.9342369766239733, "grad_norm": 0.47229698300361633, "learning_rate": 0.0001, "loss": 1.7429, "step": 8133 }, { "epoch": 0.9343518465338004, "grad_norm": 0.45240116119384766, "learning_rate": 0.0001, "loss": 1.3286, "step": 8134 }, { "epoch": 0.9344667164436276, "grad_norm": 0.4636879563331604, "learning_rate": 0.0001, "loss": 1.5574, "step": 8135 }, { "epoch": 0.9345815863534547, "grad_norm": 0.4439462721347809, "learning_rate": 0.0001, "loss": 1.6443, "step": 8136 }, { "epoch": 0.9346964562632818, "grad_norm": 0.46923813223838806, "learning_rate": 0.0001, "loss": 1.3393, "step": 8137 }, { "epoch": 0.9348113261731089, "grad_norm": 0.47300711274147034, "learning_rate": 0.0001, "loss": 1.6769, "step": 8138 }, { "epoch": 0.934926196082936, "grad_norm": 0.5104739665985107, "learning_rate": 0.0001, "loss": 1.5852, "step": 8139 }, { "epoch": 0.9350410659927632, "grad_norm": 0.4453759491443634, "learning_rate": 0.0001, "loss": 1.5457, "step": 8140 }, { "epoch": 0.9351559359025903, "grad_norm": 0.46000081300735474, "learning_rate": 0.0001, "loss": 1.568, "step": 8141 }, { "epoch": 0.9352708058124174, "grad_norm": 0.4699609577655792, "learning_rate": 0.0001, "loss": 1.6492, "step": 8142 }, { "epoch": 0.9353856757222445, "grad_norm": 0.4497356414794922, "learning_rate": 0.0001, "loss": 1.6447, "step": 8143 }, { "epoch": 0.9355005456320716, "grad_norm": 0.4631606638431549, "learning_rate": 0.0001, "loss": 1.7243, "step": 8144 }, { "epoch": 0.9356154155418988, "grad_norm": 0.4655974805355072, "learning_rate": 0.0001, "loss": 1.6267, "step": 8145 }, { "epoch": 0.9357302854517259, "grad_norm": 0.464070588350296, "learning_rate": 0.0001, "loss": 1.5003, "step": 8146 }, { "epoch": 0.935845155361553, "grad_norm": 0.5652233362197876, "learning_rate": 0.0001, "loss": 1.5475, "step": 8147 }, { "epoch": 0.9359600252713801, "grad_norm": 0.47151249647140503, "learning_rate": 0.0001, "loss": 1.5032, "step": 8148 }, { "epoch": 0.9360748951812072, "grad_norm": 0.4593266248703003, "learning_rate": 0.0001, "loss": 1.5327, "step": 8149 }, { "epoch": 0.9361897650910344, "grad_norm": 0.491393119096756, "learning_rate": 0.0001, "loss": 1.6612, "step": 8150 }, { "epoch": 0.9363046350008615, "grad_norm": 0.5595842003822327, "learning_rate": 0.0001, "loss": 1.4467, "step": 8151 }, { "epoch": 0.9364195049106886, "grad_norm": 0.46376144886016846, "learning_rate": 0.0001, "loss": 1.5592, "step": 8152 }, { "epoch": 0.9365343748205157, "grad_norm": 0.4688231348991394, "learning_rate": 0.0001, "loss": 1.5557, "step": 8153 }, { "epoch": 0.9366492447303428, "grad_norm": 0.45528239011764526, "learning_rate": 0.0001, "loss": 1.5417, "step": 8154 }, { "epoch": 0.93676411464017, "grad_norm": 0.48385414481163025, "learning_rate": 0.0001, "loss": 1.4563, "step": 8155 }, { "epoch": 0.9368789845499971, "grad_norm": 0.4678395688533783, "learning_rate": 0.0001, "loss": 1.5949, "step": 8156 }, { "epoch": 0.9369938544598242, "grad_norm": 0.5076170563697815, "learning_rate": 0.0001, "loss": 1.61, "step": 8157 }, { "epoch": 0.9371087243696514, "grad_norm": 0.4750152826309204, "learning_rate": 0.0001, "loss": 1.4804, "step": 8158 }, { "epoch": 0.9372235942794785, "grad_norm": 0.4693584740161896, "learning_rate": 0.0001, "loss": 1.5668, "step": 8159 }, { "epoch": 0.9373384641893057, "grad_norm": 0.45886656641960144, "learning_rate": 0.0001, "loss": 1.7251, "step": 8160 }, { "epoch": 0.9374533340991328, "grad_norm": 0.554453432559967, "learning_rate": 0.0001, "loss": 1.9527, "step": 8161 }, { "epoch": 0.9375682040089599, "grad_norm": 0.46930959820747375, "learning_rate": 0.0001, "loss": 1.4046, "step": 8162 }, { "epoch": 0.937683073918787, "grad_norm": 0.44557827711105347, "learning_rate": 0.0001, "loss": 1.4906, "step": 8163 }, { "epoch": 0.9377979438286141, "grad_norm": 0.4786651134490967, "learning_rate": 0.0001, "loss": 1.7044, "step": 8164 }, { "epoch": 0.9379128137384413, "grad_norm": 0.47346630692481995, "learning_rate": 0.0001, "loss": 1.384, "step": 8165 }, { "epoch": 0.9380276836482684, "grad_norm": 0.4756239056587219, "learning_rate": 0.0001, "loss": 1.6438, "step": 8166 }, { "epoch": 0.9381425535580955, "grad_norm": 0.4796939194202423, "learning_rate": 0.0001, "loss": 1.6085, "step": 8167 }, { "epoch": 0.9382574234679226, "grad_norm": 0.4924411475658417, "learning_rate": 0.0001, "loss": 1.5232, "step": 8168 }, { "epoch": 0.9383722933777497, "grad_norm": 0.4914194941520691, "learning_rate": 0.0001, "loss": 1.6322, "step": 8169 }, { "epoch": 0.9384871632875769, "grad_norm": 0.46854540705680847, "learning_rate": 0.0001, "loss": 1.4879, "step": 8170 }, { "epoch": 0.938602033197404, "grad_norm": 0.46316492557525635, "learning_rate": 0.0001, "loss": 1.523, "step": 8171 }, { "epoch": 0.9387169031072311, "grad_norm": 0.4873157739639282, "learning_rate": 0.0001, "loss": 1.5733, "step": 8172 }, { "epoch": 0.9388317730170582, "grad_norm": 0.4700479805469513, "learning_rate": 0.0001, "loss": 1.4223, "step": 8173 }, { "epoch": 0.9389466429268853, "grad_norm": 0.5385305285453796, "learning_rate": 0.0001, "loss": 1.821, "step": 8174 }, { "epoch": 0.9390615128367125, "grad_norm": 0.48141562938690186, "learning_rate": 0.0001, "loss": 1.4552, "step": 8175 }, { "epoch": 0.9391763827465396, "grad_norm": 0.4608069360256195, "learning_rate": 0.0001, "loss": 1.521, "step": 8176 }, { "epoch": 0.9392912526563667, "grad_norm": 0.4807290732860565, "learning_rate": 0.0001, "loss": 1.4567, "step": 8177 }, { "epoch": 0.9394061225661938, "grad_norm": 0.5841659307479858, "learning_rate": 0.0001, "loss": 1.8741, "step": 8178 }, { "epoch": 0.9395209924760209, "grad_norm": 0.4911169111728668, "learning_rate": 0.0001, "loss": 1.6685, "step": 8179 }, { "epoch": 0.9396358623858481, "grad_norm": 0.4507903754711151, "learning_rate": 0.0001, "loss": 1.5333, "step": 8180 }, { "epoch": 0.9397507322956752, "grad_norm": 0.4615688621997833, "learning_rate": 0.0001, "loss": 1.6094, "step": 8181 }, { "epoch": 0.9398656022055023, "grad_norm": 0.5235313773155212, "learning_rate": 0.0001, "loss": 1.6907, "step": 8182 }, { "epoch": 0.9399804721153294, "grad_norm": 0.4749388098716736, "learning_rate": 0.0001, "loss": 1.4103, "step": 8183 }, { "epoch": 0.9400953420251565, "grad_norm": 0.49427589774131775, "learning_rate": 0.0001, "loss": 1.7691, "step": 8184 }, { "epoch": 0.9402102119349837, "grad_norm": 0.4776000678539276, "learning_rate": 0.0001, "loss": 1.566, "step": 8185 }, { "epoch": 0.9403250818448108, "grad_norm": 0.47408178448677063, "learning_rate": 0.0001, "loss": 1.4462, "step": 8186 }, { "epoch": 0.9404399517546379, "grad_norm": 0.5133025646209717, "learning_rate": 0.0001, "loss": 1.6323, "step": 8187 }, { "epoch": 0.940554821664465, "grad_norm": 0.45842912793159485, "learning_rate": 0.0001, "loss": 1.6081, "step": 8188 }, { "epoch": 0.9406696915742921, "grad_norm": 0.4748247563838959, "learning_rate": 0.0001, "loss": 1.4791, "step": 8189 }, { "epoch": 0.9407845614841193, "grad_norm": 0.46958670020103455, "learning_rate": 0.0001, "loss": 1.597, "step": 8190 }, { "epoch": 0.9408994313939464, "grad_norm": 0.48225289583206177, "learning_rate": 0.0001, "loss": 1.6671, "step": 8191 }, { "epoch": 0.9410143013037735, "grad_norm": 0.5528295636177063, "learning_rate": 0.0001, "loss": 1.6373, "step": 8192 }, { "epoch": 0.9411291712136006, "grad_norm": 0.5050978064537048, "learning_rate": 0.0001, "loss": 1.702, "step": 8193 }, { "epoch": 0.9412440411234277, "grad_norm": 0.4624803066253662, "learning_rate": 0.0001, "loss": 1.6395, "step": 8194 }, { "epoch": 0.9413589110332549, "grad_norm": 0.5099507570266724, "learning_rate": 0.0001, "loss": 1.6849, "step": 8195 }, { "epoch": 0.941473780943082, "grad_norm": 0.4514469504356384, "learning_rate": 0.0001, "loss": 1.5848, "step": 8196 }, { "epoch": 0.9415886508529091, "grad_norm": 0.5006936192512512, "learning_rate": 0.0001, "loss": 1.5678, "step": 8197 }, { "epoch": 0.9417035207627362, "grad_norm": 0.5389548540115356, "learning_rate": 0.0001, "loss": 1.6739, "step": 8198 }, { "epoch": 0.9418183906725633, "grad_norm": 0.46252140402793884, "learning_rate": 0.0001, "loss": 1.5236, "step": 8199 }, { "epoch": 0.9419332605823905, "grad_norm": 0.46771207451820374, "learning_rate": 0.0001, "loss": 1.3591, "step": 8200 }, { "epoch": 0.9420481304922176, "grad_norm": 0.492191880941391, "learning_rate": 0.0001, "loss": 1.4178, "step": 8201 }, { "epoch": 0.9421630004020447, "grad_norm": 0.4770967960357666, "learning_rate": 0.0001, "loss": 1.7547, "step": 8202 }, { "epoch": 0.9422778703118718, "grad_norm": 0.5047717094421387, "learning_rate": 0.0001, "loss": 1.6103, "step": 8203 }, { "epoch": 0.9423927402216989, "grad_norm": 0.47742435336112976, "learning_rate": 0.0001, "loss": 1.5417, "step": 8204 }, { "epoch": 0.9425076101315261, "grad_norm": 0.5914652943611145, "learning_rate": 0.0001, "loss": 1.6294, "step": 8205 }, { "epoch": 0.9426224800413532, "grad_norm": 0.4261223077774048, "learning_rate": 0.0001, "loss": 1.3505, "step": 8206 }, { "epoch": 0.9427373499511803, "grad_norm": 0.47556623816490173, "learning_rate": 0.0001, "loss": 1.4745, "step": 8207 }, { "epoch": 0.9428522198610074, "grad_norm": 0.4878052771091461, "learning_rate": 0.0001, "loss": 1.6136, "step": 8208 }, { "epoch": 0.9429670897708345, "grad_norm": 0.44221365451812744, "learning_rate": 0.0001, "loss": 1.3707, "step": 8209 }, { "epoch": 0.9430819596806617, "grad_norm": 0.47154805064201355, "learning_rate": 0.0001, "loss": 1.5895, "step": 8210 }, { "epoch": 0.9431968295904888, "grad_norm": 0.5039512515068054, "learning_rate": 0.0001, "loss": 1.7088, "step": 8211 }, { "epoch": 0.9433116995003159, "grad_norm": 0.45005252957344055, "learning_rate": 0.0001, "loss": 1.4901, "step": 8212 }, { "epoch": 0.943426569410143, "grad_norm": 0.47104600071907043, "learning_rate": 0.0001, "loss": 1.5345, "step": 8213 }, { "epoch": 0.9435414393199701, "grad_norm": 0.4666808843612671, "learning_rate": 0.0001, "loss": 1.5343, "step": 8214 }, { "epoch": 0.9436563092297973, "grad_norm": 0.510503351688385, "learning_rate": 0.0001, "loss": 1.6839, "step": 8215 }, { "epoch": 0.9437711791396244, "grad_norm": 0.4818345606327057, "learning_rate": 0.0001, "loss": 1.4745, "step": 8216 }, { "epoch": 0.9438860490494515, "grad_norm": 0.4899197518825531, "learning_rate": 0.0001, "loss": 1.5966, "step": 8217 }, { "epoch": 0.9440009189592786, "grad_norm": 0.5139290690422058, "learning_rate": 0.0001, "loss": 1.406, "step": 8218 }, { "epoch": 0.9441157888691057, "grad_norm": 0.4770418107509613, "learning_rate": 0.0001, "loss": 1.4606, "step": 8219 }, { "epoch": 0.9442306587789329, "grad_norm": 0.617544949054718, "learning_rate": 0.0001, "loss": 1.5908, "step": 8220 }, { "epoch": 0.94434552868876, "grad_norm": 0.49356672167778015, "learning_rate": 0.0001, "loss": 1.7148, "step": 8221 }, { "epoch": 0.9444603985985871, "grad_norm": 0.4679422974586487, "learning_rate": 0.0001, "loss": 1.5843, "step": 8222 }, { "epoch": 0.9445752685084142, "grad_norm": 0.5478677749633789, "learning_rate": 0.0001, "loss": 1.8256, "step": 8223 }, { "epoch": 0.9446901384182413, "grad_norm": 0.4296529293060303, "learning_rate": 0.0001, "loss": 1.416, "step": 8224 }, { "epoch": 0.9448050083280685, "grad_norm": 0.4777142405509949, "learning_rate": 0.0001, "loss": 1.4079, "step": 8225 }, { "epoch": 0.9449198782378956, "grad_norm": 0.5253008604049683, "learning_rate": 0.0001, "loss": 1.6611, "step": 8226 }, { "epoch": 0.9450347481477227, "grad_norm": 0.4972079396247864, "learning_rate": 0.0001, "loss": 1.6474, "step": 8227 }, { "epoch": 0.9451496180575498, "grad_norm": 0.4560241997241974, "learning_rate": 0.0001, "loss": 1.4919, "step": 8228 }, { "epoch": 0.9452644879673769, "grad_norm": 0.4682483673095703, "learning_rate": 0.0001, "loss": 1.5651, "step": 8229 }, { "epoch": 0.9453793578772041, "grad_norm": 0.4736382067203522, "learning_rate": 0.0001, "loss": 1.3865, "step": 8230 }, { "epoch": 0.9454942277870312, "grad_norm": 0.4748673737049103, "learning_rate": 0.0001, "loss": 1.427, "step": 8231 }, { "epoch": 0.9456090976968583, "grad_norm": 0.45922762155532837, "learning_rate": 0.0001, "loss": 1.3151, "step": 8232 }, { "epoch": 0.9457239676066854, "grad_norm": 0.47256186604499817, "learning_rate": 0.0001, "loss": 1.4456, "step": 8233 }, { "epoch": 0.9458388375165125, "grad_norm": 0.46213608980178833, "learning_rate": 0.0001, "loss": 1.4308, "step": 8234 }, { "epoch": 0.9459537074263397, "grad_norm": 0.4915864169597626, "learning_rate": 0.0001, "loss": 1.6674, "step": 8235 }, { "epoch": 0.9460685773361668, "grad_norm": 0.4443720579147339, "learning_rate": 0.0001, "loss": 1.5858, "step": 8236 }, { "epoch": 0.9461834472459939, "grad_norm": 0.4873688519001007, "learning_rate": 0.0001, "loss": 1.7889, "step": 8237 }, { "epoch": 0.946298317155821, "grad_norm": 0.49472931027412415, "learning_rate": 0.0001, "loss": 1.6489, "step": 8238 }, { "epoch": 0.9464131870656481, "grad_norm": 0.4404892921447754, "learning_rate": 0.0001, "loss": 1.3307, "step": 8239 }, { "epoch": 0.9465280569754753, "grad_norm": 0.48074471950531006, "learning_rate": 0.0001, "loss": 1.6203, "step": 8240 }, { "epoch": 0.9466429268853024, "grad_norm": 0.5083596110343933, "learning_rate": 0.0001, "loss": 1.8559, "step": 8241 }, { "epoch": 0.9467577967951295, "grad_norm": 0.4485703110694885, "learning_rate": 0.0001, "loss": 1.5608, "step": 8242 }, { "epoch": 0.9468726667049566, "grad_norm": 0.48620954155921936, "learning_rate": 0.0001, "loss": 1.5907, "step": 8243 }, { "epoch": 0.9469875366147837, "grad_norm": 0.4664001166820526, "learning_rate": 0.0001, "loss": 1.5831, "step": 8244 }, { "epoch": 0.9471024065246109, "grad_norm": 0.4595847427845001, "learning_rate": 0.0001, "loss": 1.4782, "step": 8245 }, { "epoch": 0.947217276434438, "grad_norm": 0.5062698721885681, "learning_rate": 0.0001, "loss": 1.5421, "step": 8246 }, { "epoch": 0.9473321463442651, "grad_norm": 0.44905591011047363, "learning_rate": 0.0001, "loss": 1.5338, "step": 8247 }, { "epoch": 0.9474470162540922, "grad_norm": 0.47497665882110596, "learning_rate": 0.0001, "loss": 1.4712, "step": 8248 }, { "epoch": 0.9475618861639193, "grad_norm": 0.48770657181739807, "learning_rate": 0.0001, "loss": 1.43, "step": 8249 }, { "epoch": 0.9476767560737465, "grad_norm": 0.4931548833847046, "learning_rate": 0.0001, "loss": 1.5114, "step": 8250 }, { "epoch": 0.9477916259835736, "grad_norm": 0.47381991147994995, "learning_rate": 0.0001, "loss": 1.7137, "step": 8251 }, { "epoch": 0.9479064958934007, "grad_norm": 0.5158901810646057, "learning_rate": 0.0001, "loss": 1.6038, "step": 8252 }, { "epoch": 0.9480213658032278, "grad_norm": 0.48369431495666504, "learning_rate": 0.0001, "loss": 1.6776, "step": 8253 }, { "epoch": 0.9481362357130549, "grad_norm": 0.5041664838790894, "learning_rate": 0.0001, "loss": 1.6957, "step": 8254 }, { "epoch": 0.9482511056228821, "grad_norm": 0.46258240938186646, "learning_rate": 0.0001, "loss": 1.4937, "step": 8255 }, { "epoch": 0.9483659755327092, "grad_norm": 0.4723878800868988, "learning_rate": 0.0001, "loss": 1.656, "step": 8256 }, { "epoch": 0.9484808454425363, "grad_norm": 0.45941516757011414, "learning_rate": 0.0001, "loss": 1.6875, "step": 8257 }, { "epoch": 0.9485957153523634, "grad_norm": 0.4574301540851593, "learning_rate": 0.0001, "loss": 1.5827, "step": 8258 }, { "epoch": 0.9487105852621905, "grad_norm": 0.4727140963077545, "learning_rate": 0.0001, "loss": 1.6921, "step": 8259 }, { "epoch": 0.9488254551720177, "grad_norm": 0.46204841136932373, "learning_rate": 0.0001, "loss": 1.4919, "step": 8260 }, { "epoch": 0.9489403250818448, "grad_norm": 0.48473307490348816, "learning_rate": 0.0001, "loss": 1.3974, "step": 8261 }, { "epoch": 0.9490551949916719, "grad_norm": 0.4569971561431885, "learning_rate": 0.0001, "loss": 1.7005, "step": 8262 }, { "epoch": 0.949170064901499, "grad_norm": 0.4910833239555359, "learning_rate": 0.0001, "loss": 1.6477, "step": 8263 }, { "epoch": 0.9492849348113261, "grad_norm": 0.469138503074646, "learning_rate": 0.0001, "loss": 1.5438, "step": 8264 }, { "epoch": 0.9493998047211533, "grad_norm": 0.4757539927959442, "learning_rate": 0.0001, "loss": 1.5641, "step": 8265 }, { "epoch": 0.9495146746309804, "grad_norm": 0.4612511992454529, "learning_rate": 0.0001, "loss": 1.6176, "step": 8266 }, { "epoch": 0.9496295445408075, "grad_norm": 0.5166419744491577, "learning_rate": 0.0001, "loss": 1.6812, "step": 8267 }, { "epoch": 0.9497444144506346, "grad_norm": 0.451444149017334, "learning_rate": 0.0001, "loss": 1.5371, "step": 8268 }, { "epoch": 0.9498592843604617, "grad_norm": 0.4882412850856781, "learning_rate": 0.0001, "loss": 1.5438, "step": 8269 }, { "epoch": 0.9499741542702889, "grad_norm": 0.5102916359901428, "learning_rate": 0.0001, "loss": 1.6798, "step": 8270 }, { "epoch": 0.950089024180116, "grad_norm": 0.46115705370903015, "learning_rate": 0.0001, "loss": 1.2963, "step": 8271 }, { "epoch": 0.9502038940899431, "grad_norm": 0.5228103399276733, "learning_rate": 0.0001, "loss": 1.4118, "step": 8272 }, { "epoch": 0.9503187639997702, "grad_norm": 0.5342441201210022, "learning_rate": 0.0001, "loss": 1.7615, "step": 8273 }, { "epoch": 0.9504336339095973, "grad_norm": 0.4900018274784088, "learning_rate": 0.0001, "loss": 1.3293, "step": 8274 }, { "epoch": 0.9505485038194245, "grad_norm": 0.5222525000572205, "learning_rate": 0.0001, "loss": 1.6752, "step": 8275 }, { "epoch": 0.9506633737292516, "grad_norm": 0.5265666842460632, "learning_rate": 0.0001, "loss": 1.7776, "step": 8276 }, { "epoch": 0.9507782436390787, "grad_norm": 0.4796416759490967, "learning_rate": 0.0001, "loss": 1.5964, "step": 8277 }, { "epoch": 0.9508931135489058, "grad_norm": 0.4717714488506317, "learning_rate": 0.0001, "loss": 1.6328, "step": 8278 }, { "epoch": 0.9510079834587329, "grad_norm": 0.5052562952041626, "learning_rate": 0.0001, "loss": 1.5438, "step": 8279 }, { "epoch": 0.9511228533685601, "grad_norm": 0.45067209005355835, "learning_rate": 0.0001, "loss": 1.5466, "step": 8280 }, { "epoch": 0.9512377232783872, "grad_norm": 0.527546226978302, "learning_rate": 0.0001, "loss": 1.4335, "step": 8281 }, { "epoch": 0.9513525931882143, "grad_norm": 0.46170440316200256, "learning_rate": 0.0001, "loss": 1.3969, "step": 8282 }, { "epoch": 0.9514674630980414, "grad_norm": 0.5153254270553589, "learning_rate": 0.0001, "loss": 1.6541, "step": 8283 }, { "epoch": 0.9515823330078685, "grad_norm": 0.48110654950141907, "learning_rate": 0.0001, "loss": 1.5888, "step": 8284 }, { "epoch": 0.9516972029176957, "grad_norm": 0.5225505828857422, "learning_rate": 0.0001, "loss": 1.7728, "step": 8285 }, { "epoch": 0.9518120728275228, "grad_norm": 0.4549621343612671, "learning_rate": 0.0001, "loss": 1.4923, "step": 8286 }, { "epoch": 0.9519269427373499, "grad_norm": 0.5324844121932983, "learning_rate": 0.0001, "loss": 1.6255, "step": 8287 }, { "epoch": 0.952041812647177, "grad_norm": 0.4963549077510834, "learning_rate": 0.0001, "loss": 1.5002, "step": 8288 }, { "epoch": 0.9521566825570041, "grad_norm": 0.460531085729599, "learning_rate": 0.0001, "loss": 1.564, "step": 8289 }, { "epoch": 0.9522715524668313, "grad_norm": 0.4704626798629761, "learning_rate": 0.0001, "loss": 1.7415, "step": 8290 }, { "epoch": 0.9523864223766584, "grad_norm": 0.4891350269317627, "learning_rate": 0.0001, "loss": 1.5294, "step": 8291 }, { "epoch": 0.9525012922864855, "grad_norm": 0.4812767207622528, "learning_rate": 0.0001, "loss": 1.6853, "step": 8292 }, { "epoch": 0.9526161621963126, "grad_norm": 0.47794899344444275, "learning_rate": 0.0001, "loss": 1.586, "step": 8293 }, { "epoch": 0.9527310321061397, "grad_norm": 0.49231424927711487, "learning_rate": 0.0001, "loss": 1.4125, "step": 8294 }, { "epoch": 0.952845902015967, "grad_norm": 0.47585782408714294, "learning_rate": 0.0001, "loss": 1.5914, "step": 8295 }, { "epoch": 0.9529607719257941, "grad_norm": 0.5035213828086853, "learning_rate": 0.0001, "loss": 1.6766, "step": 8296 }, { "epoch": 0.9530756418356212, "grad_norm": 0.5115920305252075, "learning_rate": 0.0001, "loss": 1.4955, "step": 8297 }, { "epoch": 0.9531905117454483, "grad_norm": 0.5224997401237488, "learning_rate": 0.0001, "loss": 1.6931, "step": 8298 }, { "epoch": 0.9533053816552755, "grad_norm": 0.5272790789604187, "learning_rate": 0.0001, "loss": 1.584, "step": 8299 }, { "epoch": 0.9534202515651026, "grad_norm": 0.516810953617096, "learning_rate": 0.0001, "loss": 1.7744, "step": 8300 }, { "epoch": 0.9535351214749297, "grad_norm": 0.46272823214530945, "learning_rate": 0.0001, "loss": 1.3943, "step": 8301 }, { "epoch": 0.9536499913847568, "grad_norm": 0.5371290445327759, "learning_rate": 0.0001, "loss": 1.5101, "step": 8302 }, { "epoch": 0.9537648612945839, "grad_norm": 0.5088568329811096, "learning_rate": 0.0001, "loss": 1.7429, "step": 8303 }, { "epoch": 0.953879731204411, "grad_norm": 0.4637158215045929, "learning_rate": 0.0001, "loss": 1.5429, "step": 8304 }, { "epoch": 0.9539946011142382, "grad_norm": 0.44325053691864014, "learning_rate": 0.0001, "loss": 1.521, "step": 8305 }, { "epoch": 0.9541094710240653, "grad_norm": 0.4635404944419861, "learning_rate": 0.0001, "loss": 1.4581, "step": 8306 }, { "epoch": 0.9542243409338924, "grad_norm": 0.5078271627426147, "learning_rate": 0.0001, "loss": 1.7131, "step": 8307 }, { "epoch": 0.9543392108437195, "grad_norm": 0.5453723669052124, "learning_rate": 0.0001, "loss": 1.6502, "step": 8308 }, { "epoch": 0.9544540807535467, "grad_norm": 0.4599123001098633, "learning_rate": 0.0001, "loss": 1.5017, "step": 8309 }, { "epoch": 0.9545689506633738, "grad_norm": 0.5290499329566956, "learning_rate": 0.0001, "loss": 1.661, "step": 8310 }, { "epoch": 0.9546838205732009, "grad_norm": 0.44906917214393616, "learning_rate": 0.0001, "loss": 1.5134, "step": 8311 }, { "epoch": 0.954798690483028, "grad_norm": 0.44247668981552124, "learning_rate": 0.0001, "loss": 1.4854, "step": 8312 }, { "epoch": 0.9549135603928551, "grad_norm": 0.49097177386283875, "learning_rate": 0.0001, "loss": 1.3212, "step": 8313 }, { "epoch": 0.9550284303026823, "grad_norm": 0.47003665566444397, "learning_rate": 0.0001, "loss": 1.5748, "step": 8314 }, { "epoch": 0.9551433002125094, "grad_norm": 0.4841088056564331, "learning_rate": 0.0001, "loss": 1.6514, "step": 8315 }, { "epoch": 0.9552581701223365, "grad_norm": 0.47243985533714294, "learning_rate": 0.0001, "loss": 1.4881, "step": 8316 }, { "epoch": 0.9553730400321636, "grad_norm": 0.47702085971832275, "learning_rate": 0.0001, "loss": 1.4967, "step": 8317 }, { "epoch": 0.9554879099419907, "grad_norm": 0.45515045523643494, "learning_rate": 0.0001, "loss": 1.5439, "step": 8318 }, { "epoch": 0.9556027798518179, "grad_norm": 0.5219390392303467, "learning_rate": 0.0001, "loss": 1.7652, "step": 8319 }, { "epoch": 0.955717649761645, "grad_norm": 0.5401485562324524, "learning_rate": 0.0001, "loss": 1.8357, "step": 8320 }, { "epoch": 0.9558325196714721, "grad_norm": 0.4558376669883728, "learning_rate": 0.0001, "loss": 1.4786, "step": 8321 }, { "epoch": 0.9559473895812992, "grad_norm": 0.4861636459827423, "learning_rate": 0.0001, "loss": 1.6804, "step": 8322 }, { "epoch": 0.9560622594911263, "grad_norm": 0.4567401707172394, "learning_rate": 0.0001, "loss": 1.5903, "step": 8323 }, { "epoch": 0.9561771294009535, "grad_norm": 0.5179786086082458, "learning_rate": 0.0001, "loss": 1.8311, "step": 8324 }, { "epoch": 0.9562919993107806, "grad_norm": 0.45889440178871155, "learning_rate": 0.0001, "loss": 1.5582, "step": 8325 }, { "epoch": 0.9564068692206077, "grad_norm": 0.4716584086418152, "learning_rate": 0.0001, "loss": 1.6171, "step": 8326 }, { "epoch": 0.9565217391304348, "grad_norm": 0.43330761790275574, "learning_rate": 0.0001, "loss": 1.2869, "step": 8327 }, { "epoch": 0.9566366090402619, "grad_norm": 0.4397355914115906, "learning_rate": 0.0001, "loss": 1.5012, "step": 8328 }, { "epoch": 0.956751478950089, "grad_norm": 0.4772343933582306, "learning_rate": 0.0001, "loss": 1.3369, "step": 8329 }, { "epoch": 0.9568663488599162, "grad_norm": 0.5061171054840088, "learning_rate": 0.0001, "loss": 1.7989, "step": 8330 }, { "epoch": 0.9569812187697433, "grad_norm": 0.46907833218574524, "learning_rate": 0.0001, "loss": 1.5958, "step": 8331 }, { "epoch": 0.9570960886795704, "grad_norm": 0.5048026442527771, "learning_rate": 0.0001, "loss": 1.5469, "step": 8332 }, { "epoch": 0.9572109585893975, "grad_norm": 0.4772169888019562, "learning_rate": 0.0001, "loss": 1.4803, "step": 8333 }, { "epoch": 0.9573258284992247, "grad_norm": 0.47201335430145264, "learning_rate": 0.0001, "loss": 1.6003, "step": 8334 }, { "epoch": 0.9574406984090518, "grad_norm": 0.49312329292297363, "learning_rate": 0.0001, "loss": 1.5597, "step": 8335 }, { "epoch": 0.9575555683188789, "grad_norm": 0.4100249111652374, "learning_rate": 0.0001, "loss": 1.3143, "step": 8336 }, { "epoch": 0.957670438228706, "grad_norm": 0.5461440086364746, "learning_rate": 0.0001, "loss": 1.7893, "step": 8337 }, { "epoch": 0.9577853081385331, "grad_norm": 0.47124072909355164, "learning_rate": 0.0001, "loss": 1.3658, "step": 8338 }, { "epoch": 0.9579001780483603, "grad_norm": 0.4608875811100006, "learning_rate": 0.0001, "loss": 1.6032, "step": 8339 }, { "epoch": 0.9580150479581874, "grad_norm": 0.49988433718681335, "learning_rate": 0.0001, "loss": 1.6149, "step": 8340 }, { "epoch": 0.9581299178680145, "grad_norm": 0.47735628485679626, "learning_rate": 0.0001, "loss": 1.5887, "step": 8341 }, { "epoch": 0.9582447877778416, "grad_norm": 0.5070236921310425, "learning_rate": 0.0001, "loss": 1.4978, "step": 8342 }, { "epoch": 0.9583596576876687, "grad_norm": 0.5401067137718201, "learning_rate": 0.0001, "loss": 1.7773, "step": 8343 }, { "epoch": 0.9584745275974959, "grad_norm": 0.45689040422439575, "learning_rate": 0.0001, "loss": 1.6352, "step": 8344 }, { "epoch": 0.958589397507323, "grad_norm": 0.4751141667366028, "learning_rate": 0.0001, "loss": 1.5874, "step": 8345 }, { "epoch": 0.9587042674171501, "grad_norm": 0.5322357416152954, "learning_rate": 0.0001, "loss": 1.7816, "step": 8346 }, { "epoch": 0.9588191373269772, "grad_norm": 0.472920298576355, "learning_rate": 0.0001, "loss": 1.4187, "step": 8347 }, { "epoch": 0.9589340072368043, "grad_norm": 0.5017462372779846, "learning_rate": 0.0001, "loss": 1.7655, "step": 8348 }, { "epoch": 0.9590488771466315, "grad_norm": 0.4727727770805359, "learning_rate": 0.0001, "loss": 1.5264, "step": 8349 }, { "epoch": 0.9591637470564586, "grad_norm": 0.45175325870513916, "learning_rate": 0.0001, "loss": 1.5411, "step": 8350 }, { "epoch": 0.9592786169662857, "grad_norm": 0.500636100769043, "learning_rate": 0.0001, "loss": 1.7486, "step": 8351 }, { "epoch": 0.9593934868761128, "grad_norm": 0.44014236330986023, "learning_rate": 0.0001, "loss": 1.3911, "step": 8352 }, { "epoch": 0.9595083567859399, "grad_norm": 0.4677266478538513, "learning_rate": 0.0001, "loss": 1.8267, "step": 8353 }, { "epoch": 0.959623226695767, "grad_norm": 0.4543503522872925, "learning_rate": 0.0001, "loss": 1.65, "step": 8354 }, { "epoch": 0.9597380966055942, "grad_norm": 0.48893970251083374, "learning_rate": 0.0001, "loss": 1.6525, "step": 8355 }, { "epoch": 0.9598529665154213, "grad_norm": 0.4677874445915222, "learning_rate": 0.0001, "loss": 1.6743, "step": 8356 }, { "epoch": 0.9599678364252484, "grad_norm": 0.46414315700531006, "learning_rate": 0.0001, "loss": 1.6057, "step": 8357 }, { "epoch": 0.9600827063350755, "grad_norm": 0.46315401792526245, "learning_rate": 0.0001, "loss": 1.5053, "step": 8358 }, { "epoch": 0.9601975762449027, "grad_norm": 0.4876864552497864, "learning_rate": 0.0001, "loss": 1.5766, "step": 8359 }, { "epoch": 0.9603124461547298, "grad_norm": 0.4578522741794586, "learning_rate": 0.0001, "loss": 1.5981, "step": 8360 }, { "epoch": 0.9604273160645569, "grad_norm": 0.48835068941116333, "learning_rate": 0.0001, "loss": 1.7534, "step": 8361 }, { "epoch": 0.960542185974384, "grad_norm": 0.4757782518863678, "learning_rate": 0.0001, "loss": 1.4454, "step": 8362 }, { "epoch": 0.9606570558842111, "grad_norm": 0.45514336228370667, "learning_rate": 0.0001, "loss": 1.476, "step": 8363 }, { "epoch": 0.9607719257940383, "grad_norm": 0.4917604625225067, "learning_rate": 0.0001, "loss": 1.6986, "step": 8364 }, { "epoch": 0.9608867957038654, "grad_norm": 0.4693104326725006, "learning_rate": 0.0001, "loss": 1.4697, "step": 8365 }, { "epoch": 0.9610016656136925, "grad_norm": 0.4452751874923706, "learning_rate": 0.0001, "loss": 1.5288, "step": 8366 }, { "epoch": 0.9611165355235196, "grad_norm": 0.4996024966239929, "learning_rate": 0.0001, "loss": 1.5848, "step": 8367 }, { "epoch": 0.9612314054333467, "grad_norm": 0.4728527069091797, "learning_rate": 0.0001, "loss": 1.6645, "step": 8368 }, { "epoch": 0.9613462753431739, "grad_norm": 0.5003929734230042, "learning_rate": 0.0001, "loss": 1.709, "step": 8369 }, { "epoch": 0.961461145253001, "grad_norm": 0.49857136607170105, "learning_rate": 0.0001, "loss": 1.6209, "step": 8370 }, { "epoch": 0.9615760151628281, "grad_norm": 0.49390268325805664, "learning_rate": 0.0001, "loss": 1.5569, "step": 8371 }, { "epoch": 0.9616908850726552, "grad_norm": 0.5034048557281494, "learning_rate": 0.0001, "loss": 1.5221, "step": 8372 }, { "epoch": 0.9618057549824823, "grad_norm": 0.45734164118766785, "learning_rate": 0.0001, "loss": 1.4694, "step": 8373 }, { "epoch": 0.9619206248923095, "grad_norm": 0.47848743200302124, "learning_rate": 0.0001, "loss": 1.6597, "step": 8374 }, { "epoch": 0.9620354948021366, "grad_norm": 0.4872235357761383, "learning_rate": 0.0001, "loss": 1.6839, "step": 8375 }, { "epoch": 0.9621503647119637, "grad_norm": 0.491102933883667, "learning_rate": 0.0001, "loss": 1.5953, "step": 8376 }, { "epoch": 0.9622652346217908, "grad_norm": 0.4527552127838135, "learning_rate": 0.0001, "loss": 1.3692, "step": 8377 }, { "epoch": 0.9623801045316179, "grad_norm": 0.4430198073387146, "learning_rate": 0.0001, "loss": 1.5954, "step": 8378 }, { "epoch": 0.962494974441445, "grad_norm": 0.4682437479496002, "learning_rate": 0.0001, "loss": 1.5791, "step": 8379 }, { "epoch": 0.9626098443512722, "grad_norm": 0.4858921766281128, "learning_rate": 0.0001, "loss": 1.4802, "step": 8380 }, { "epoch": 0.9627247142610993, "grad_norm": 0.4844062626361847, "learning_rate": 0.0001, "loss": 1.6267, "step": 8381 }, { "epoch": 0.9628395841709264, "grad_norm": 0.5199929475784302, "learning_rate": 0.0001, "loss": 1.7057, "step": 8382 }, { "epoch": 0.9629544540807535, "grad_norm": 0.49841341376304626, "learning_rate": 0.0001, "loss": 1.5898, "step": 8383 }, { "epoch": 0.9630693239905807, "grad_norm": 0.5616093873977661, "learning_rate": 0.0001, "loss": 1.9157, "step": 8384 }, { "epoch": 0.9631841939004078, "grad_norm": 0.45158112049102783, "learning_rate": 0.0001, "loss": 1.5281, "step": 8385 }, { "epoch": 0.9632990638102349, "grad_norm": 0.5046762228012085, "learning_rate": 0.0001, "loss": 1.6964, "step": 8386 }, { "epoch": 0.963413933720062, "grad_norm": 0.5029253959655762, "learning_rate": 0.0001, "loss": 1.6383, "step": 8387 }, { "epoch": 0.9635288036298891, "grad_norm": 0.48361513018608093, "learning_rate": 0.0001, "loss": 1.7588, "step": 8388 }, { "epoch": 0.9636436735397163, "grad_norm": 0.4966965317726135, "learning_rate": 0.0001, "loss": 1.5243, "step": 8389 }, { "epoch": 0.9637585434495434, "grad_norm": 0.4773005545139313, "learning_rate": 0.0001, "loss": 1.6763, "step": 8390 }, { "epoch": 0.9638734133593705, "grad_norm": 0.4541994035243988, "learning_rate": 0.0001, "loss": 1.5686, "step": 8391 }, { "epoch": 0.9639882832691976, "grad_norm": 0.43786925077438354, "learning_rate": 0.0001, "loss": 1.3373, "step": 8392 }, { "epoch": 0.9641031531790247, "grad_norm": 0.5206244587898254, "learning_rate": 0.0001, "loss": 1.61, "step": 8393 }, { "epoch": 0.9642180230888518, "grad_norm": 0.5168501138687134, "learning_rate": 0.0001, "loss": 1.7587, "step": 8394 }, { "epoch": 0.964332892998679, "grad_norm": 0.48464956879615784, "learning_rate": 0.0001, "loss": 1.7185, "step": 8395 }, { "epoch": 0.9644477629085061, "grad_norm": 0.4584466814994812, "learning_rate": 0.0001, "loss": 1.5487, "step": 8396 }, { "epoch": 0.9645626328183332, "grad_norm": 0.49749574065208435, "learning_rate": 0.0001, "loss": 1.785, "step": 8397 }, { "epoch": 0.9646775027281603, "grad_norm": 0.4844261407852173, "learning_rate": 0.0001, "loss": 1.4182, "step": 8398 }, { "epoch": 0.9647923726379874, "grad_norm": 0.4621402323246002, "learning_rate": 0.0001, "loss": 1.5379, "step": 8399 }, { "epoch": 0.9649072425478146, "grad_norm": 0.4868880808353424, "learning_rate": 0.0001, "loss": 1.6465, "step": 8400 }, { "epoch": 0.9650221124576417, "grad_norm": 0.4497327506542206, "learning_rate": 0.0001, "loss": 1.5056, "step": 8401 }, { "epoch": 0.9651369823674688, "grad_norm": 0.5474899411201477, "learning_rate": 0.0001, "loss": 1.8234, "step": 8402 }, { "epoch": 0.9652518522772959, "grad_norm": 0.46715471148490906, "learning_rate": 0.0001, "loss": 1.3831, "step": 8403 }, { "epoch": 0.965366722187123, "grad_norm": 0.5017038583755493, "learning_rate": 0.0001, "loss": 1.5066, "step": 8404 }, { "epoch": 0.9654815920969502, "grad_norm": 0.5196341872215271, "learning_rate": 0.0001, "loss": 1.8523, "step": 8405 }, { "epoch": 0.9655964620067773, "grad_norm": 0.4290624260902405, "learning_rate": 0.0001, "loss": 1.3917, "step": 8406 }, { "epoch": 0.9657113319166044, "grad_norm": 0.5225144028663635, "learning_rate": 0.0001, "loss": 1.7594, "step": 8407 }, { "epoch": 0.9658262018264315, "grad_norm": 0.44787734746932983, "learning_rate": 0.0001, "loss": 1.5222, "step": 8408 }, { "epoch": 0.9659410717362586, "grad_norm": 0.4654848575592041, "learning_rate": 0.0001, "loss": 1.5487, "step": 8409 }, { "epoch": 0.9660559416460858, "grad_norm": 0.49902090430259705, "learning_rate": 0.0001, "loss": 1.7194, "step": 8410 }, { "epoch": 0.9661708115559129, "grad_norm": 0.5180195569992065, "learning_rate": 0.0001, "loss": 1.5937, "step": 8411 }, { "epoch": 0.96628568146574, "grad_norm": 0.4926517903804779, "learning_rate": 0.0001, "loss": 1.6377, "step": 8412 }, { "epoch": 0.9664005513755671, "grad_norm": 0.4600144028663635, "learning_rate": 0.0001, "loss": 1.5446, "step": 8413 }, { "epoch": 0.9665154212853942, "grad_norm": 0.4665214717388153, "learning_rate": 0.0001, "loss": 1.6127, "step": 8414 }, { "epoch": 0.9666302911952214, "grad_norm": 0.47527435421943665, "learning_rate": 0.0001, "loss": 1.5603, "step": 8415 }, { "epoch": 0.9667451611050485, "grad_norm": 0.4811752140522003, "learning_rate": 0.0001, "loss": 1.4612, "step": 8416 }, { "epoch": 0.9668600310148756, "grad_norm": 0.4516500234603882, "learning_rate": 0.0001, "loss": 1.4649, "step": 8417 }, { "epoch": 0.9669749009247027, "grad_norm": 0.5186557769775391, "learning_rate": 0.0001, "loss": 1.7303, "step": 8418 }, { "epoch": 0.9670897708345298, "grad_norm": 0.4529307782649994, "learning_rate": 0.0001, "loss": 1.4181, "step": 8419 }, { "epoch": 0.967204640744357, "grad_norm": 0.4979994297027588, "learning_rate": 0.0001, "loss": 1.6673, "step": 8420 }, { "epoch": 0.9673195106541841, "grad_norm": 0.4783253073692322, "learning_rate": 0.0001, "loss": 1.5552, "step": 8421 }, { "epoch": 0.9674343805640112, "grad_norm": 0.4694981276988983, "learning_rate": 0.0001, "loss": 1.6136, "step": 8422 }, { "epoch": 0.9675492504738383, "grad_norm": 0.4916779398918152, "learning_rate": 0.0001, "loss": 1.7472, "step": 8423 }, { "epoch": 0.9676641203836654, "grad_norm": 0.42479321360588074, "learning_rate": 0.0001, "loss": 1.3967, "step": 8424 }, { "epoch": 0.9677789902934926, "grad_norm": 0.5579563975334167, "learning_rate": 0.0001, "loss": 1.8852, "step": 8425 }, { "epoch": 0.9678938602033197, "grad_norm": 0.5487393736839294, "learning_rate": 0.0001, "loss": 1.7019, "step": 8426 }, { "epoch": 0.9680087301131468, "grad_norm": 0.46243569254875183, "learning_rate": 0.0001, "loss": 1.5185, "step": 8427 }, { "epoch": 0.9681236000229739, "grad_norm": 0.468492716550827, "learning_rate": 0.0001, "loss": 1.5824, "step": 8428 }, { "epoch": 0.968238469932801, "grad_norm": 0.4968058466911316, "learning_rate": 0.0001, "loss": 1.586, "step": 8429 }, { "epoch": 0.9683533398426282, "grad_norm": 0.44949015974998474, "learning_rate": 0.0001, "loss": 1.5882, "step": 8430 }, { "epoch": 0.9684682097524553, "grad_norm": 0.46042609214782715, "learning_rate": 0.0001, "loss": 1.6254, "step": 8431 }, { "epoch": 0.9685830796622825, "grad_norm": 0.48226192593574524, "learning_rate": 0.0001, "loss": 1.6218, "step": 8432 }, { "epoch": 0.9686979495721096, "grad_norm": 0.4611875116825104, "learning_rate": 0.0001, "loss": 1.4546, "step": 8433 }, { "epoch": 0.9688128194819368, "grad_norm": 0.49678662419319153, "learning_rate": 0.0001, "loss": 1.6045, "step": 8434 }, { "epoch": 0.9689276893917639, "grad_norm": 0.4989873170852661, "learning_rate": 0.0001, "loss": 1.6201, "step": 8435 }, { "epoch": 0.969042559301591, "grad_norm": 0.49776941537857056, "learning_rate": 0.0001, "loss": 1.6435, "step": 8436 }, { "epoch": 0.9691574292114181, "grad_norm": 0.4603618085384369, "learning_rate": 0.0001, "loss": 1.6716, "step": 8437 }, { "epoch": 0.9692722991212452, "grad_norm": 0.503463625907898, "learning_rate": 0.0001, "loss": 1.6739, "step": 8438 }, { "epoch": 0.9693871690310724, "grad_norm": 0.520193874835968, "learning_rate": 0.0001, "loss": 1.7394, "step": 8439 }, { "epoch": 0.9695020389408995, "grad_norm": 0.4809243679046631, "learning_rate": 0.0001, "loss": 1.5859, "step": 8440 }, { "epoch": 0.9696169088507266, "grad_norm": 0.49442121386528015, "learning_rate": 0.0001, "loss": 1.6572, "step": 8441 }, { "epoch": 0.9697317787605537, "grad_norm": 0.43582677841186523, "learning_rate": 0.0001, "loss": 1.5189, "step": 8442 }, { "epoch": 0.9698466486703808, "grad_norm": 0.4740058481693268, "learning_rate": 0.0001, "loss": 1.6358, "step": 8443 }, { "epoch": 0.969961518580208, "grad_norm": 0.4624651372432709, "learning_rate": 0.0001, "loss": 1.6075, "step": 8444 }, { "epoch": 0.9700763884900351, "grad_norm": 0.5144423842430115, "learning_rate": 0.0001, "loss": 1.7943, "step": 8445 }, { "epoch": 0.9701912583998622, "grad_norm": 0.4762701392173767, "learning_rate": 0.0001, "loss": 1.4843, "step": 8446 }, { "epoch": 0.9703061283096893, "grad_norm": 0.46587637066841125, "learning_rate": 0.0001, "loss": 1.6171, "step": 8447 }, { "epoch": 0.9704209982195164, "grad_norm": 0.516172468662262, "learning_rate": 0.0001, "loss": 1.4831, "step": 8448 }, { "epoch": 0.9705358681293436, "grad_norm": 0.4806175231933594, "learning_rate": 0.0001, "loss": 1.5625, "step": 8449 }, { "epoch": 0.9706507380391707, "grad_norm": 0.4877374768257141, "learning_rate": 0.0001, "loss": 1.6528, "step": 8450 }, { "epoch": 0.9707656079489978, "grad_norm": 0.5047240257263184, "learning_rate": 0.0001, "loss": 1.7531, "step": 8451 }, { "epoch": 0.9708804778588249, "grad_norm": 0.4985336363315582, "learning_rate": 0.0001, "loss": 1.8455, "step": 8452 }, { "epoch": 0.970995347768652, "grad_norm": 0.5148325562477112, "learning_rate": 0.0001, "loss": 1.6977, "step": 8453 }, { "epoch": 0.9711102176784792, "grad_norm": 0.4783836901187897, "learning_rate": 0.0001, "loss": 1.7402, "step": 8454 }, { "epoch": 0.9712250875883063, "grad_norm": 0.5134006142616272, "learning_rate": 0.0001, "loss": 1.5913, "step": 8455 }, { "epoch": 0.9713399574981334, "grad_norm": 0.4870409071445465, "learning_rate": 0.0001, "loss": 1.5859, "step": 8456 }, { "epoch": 0.9714548274079605, "grad_norm": 0.47401508688926697, "learning_rate": 0.0001, "loss": 1.7521, "step": 8457 }, { "epoch": 0.9715696973177876, "grad_norm": 0.46967458724975586, "learning_rate": 0.0001, "loss": 1.5803, "step": 8458 }, { "epoch": 0.9716845672276148, "grad_norm": 0.5170222520828247, "learning_rate": 0.0001, "loss": 1.662, "step": 8459 }, { "epoch": 0.9717994371374419, "grad_norm": 0.503593921661377, "learning_rate": 0.0001, "loss": 1.5592, "step": 8460 }, { "epoch": 0.971914307047269, "grad_norm": 0.47926294803619385, "learning_rate": 0.0001, "loss": 1.5149, "step": 8461 }, { "epoch": 0.9720291769570961, "grad_norm": 0.48252803087234497, "learning_rate": 0.0001, "loss": 1.7432, "step": 8462 }, { "epoch": 0.9721440468669232, "grad_norm": 0.5037051439285278, "learning_rate": 0.0001, "loss": 1.7139, "step": 8463 }, { "epoch": 0.9722589167767504, "grad_norm": 0.46094417572021484, "learning_rate": 0.0001, "loss": 1.406, "step": 8464 }, { "epoch": 0.9723737866865775, "grad_norm": 0.5147666335105896, "learning_rate": 0.0001, "loss": 1.6715, "step": 8465 }, { "epoch": 0.9724886565964046, "grad_norm": 0.4747392237186432, "learning_rate": 0.0001, "loss": 1.6376, "step": 8466 }, { "epoch": 0.9726035265062317, "grad_norm": 0.5164050459861755, "learning_rate": 0.0001, "loss": 1.5872, "step": 8467 }, { "epoch": 0.9727183964160588, "grad_norm": 0.46309229731559753, "learning_rate": 0.0001, "loss": 1.5583, "step": 8468 }, { "epoch": 0.972833266325886, "grad_norm": 0.4927702248096466, "learning_rate": 0.0001, "loss": 1.6477, "step": 8469 }, { "epoch": 0.9729481362357131, "grad_norm": 0.5026853084564209, "learning_rate": 0.0001, "loss": 1.5314, "step": 8470 }, { "epoch": 0.9730630061455402, "grad_norm": 0.5030810832977295, "learning_rate": 0.0001, "loss": 1.7262, "step": 8471 }, { "epoch": 0.9731778760553673, "grad_norm": 0.4909353256225586, "learning_rate": 0.0001, "loss": 1.6468, "step": 8472 }, { "epoch": 0.9732927459651944, "grad_norm": 0.4842395484447479, "learning_rate": 0.0001, "loss": 1.6103, "step": 8473 }, { "epoch": 0.9734076158750216, "grad_norm": 0.44747745990753174, "learning_rate": 0.0001, "loss": 1.4004, "step": 8474 }, { "epoch": 0.9735224857848487, "grad_norm": 0.4720572233200073, "learning_rate": 0.0001, "loss": 1.4371, "step": 8475 }, { "epoch": 0.9736373556946758, "grad_norm": 0.5061703324317932, "learning_rate": 0.0001, "loss": 1.5303, "step": 8476 }, { "epoch": 0.9737522256045029, "grad_norm": 0.46352124214172363, "learning_rate": 0.0001, "loss": 1.5687, "step": 8477 }, { "epoch": 0.97386709551433, "grad_norm": 0.4936666488647461, "learning_rate": 0.0001, "loss": 1.6035, "step": 8478 }, { "epoch": 0.9739819654241572, "grad_norm": 0.4917965829372406, "learning_rate": 0.0001, "loss": 1.545, "step": 8479 }, { "epoch": 0.9740968353339843, "grad_norm": 0.5103937387466431, "learning_rate": 0.0001, "loss": 1.7674, "step": 8480 }, { "epoch": 0.9742117052438114, "grad_norm": 0.44612032175064087, "learning_rate": 0.0001, "loss": 1.3708, "step": 8481 }, { "epoch": 0.9743265751536385, "grad_norm": 0.48888033628463745, "learning_rate": 0.0001, "loss": 1.5865, "step": 8482 }, { "epoch": 0.9744414450634656, "grad_norm": 0.49829840660095215, "learning_rate": 0.0001, "loss": 1.7156, "step": 8483 }, { "epoch": 0.9745563149732928, "grad_norm": 0.4825059473514557, "learning_rate": 0.0001, "loss": 1.4587, "step": 8484 }, { "epoch": 0.9746711848831199, "grad_norm": 0.44610595703125, "learning_rate": 0.0001, "loss": 1.3814, "step": 8485 }, { "epoch": 0.974786054792947, "grad_norm": 0.5563763976097107, "learning_rate": 0.0001, "loss": 1.7003, "step": 8486 }, { "epoch": 0.9749009247027741, "grad_norm": 0.4703843295574188, "learning_rate": 0.0001, "loss": 1.4677, "step": 8487 }, { "epoch": 0.9750157946126012, "grad_norm": 0.524733304977417, "learning_rate": 0.0001, "loss": 1.6185, "step": 8488 }, { "epoch": 0.9751306645224284, "grad_norm": 0.5146064758300781, "learning_rate": 0.0001, "loss": 1.8297, "step": 8489 }, { "epoch": 0.9752455344322555, "grad_norm": 0.432230681180954, "learning_rate": 0.0001, "loss": 1.5052, "step": 8490 }, { "epoch": 0.9753604043420826, "grad_norm": 0.4458877742290497, "learning_rate": 0.0001, "loss": 1.5611, "step": 8491 }, { "epoch": 0.9754752742519097, "grad_norm": 0.474324107170105, "learning_rate": 0.0001, "loss": 1.7137, "step": 8492 }, { "epoch": 0.9755901441617368, "grad_norm": 0.4817248582839966, "learning_rate": 0.0001, "loss": 1.4067, "step": 8493 }, { "epoch": 0.975705014071564, "grad_norm": 0.46746066212654114, "learning_rate": 0.0001, "loss": 1.5686, "step": 8494 }, { "epoch": 0.9758198839813911, "grad_norm": 0.4995744824409485, "learning_rate": 0.0001, "loss": 1.6563, "step": 8495 }, { "epoch": 0.9759347538912182, "grad_norm": 0.4750157594680786, "learning_rate": 0.0001, "loss": 1.4601, "step": 8496 }, { "epoch": 0.9760496238010453, "grad_norm": 0.474306583404541, "learning_rate": 0.0001, "loss": 1.5302, "step": 8497 }, { "epoch": 0.9761644937108724, "grad_norm": 0.494228720664978, "learning_rate": 0.0001, "loss": 1.4225, "step": 8498 }, { "epoch": 0.9762793636206996, "grad_norm": 0.4527890384197235, "learning_rate": 0.0001, "loss": 1.5073, "step": 8499 }, { "epoch": 0.9763942335305267, "grad_norm": 0.4580267071723938, "learning_rate": 0.0001, "loss": 1.5774, "step": 8500 }, { "epoch": 0.9765091034403538, "grad_norm": 0.5154402852058411, "learning_rate": 0.0001, "loss": 1.6121, "step": 8501 }, { "epoch": 0.9766239733501809, "grad_norm": 0.4666356146335602, "learning_rate": 0.0001, "loss": 1.6026, "step": 8502 }, { "epoch": 0.976738843260008, "grad_norm": 0.4806033670902252, "learning_rate": 0.0001, "loss": 1.6383, "step": 8503 }, { "epoch": 0.9768537131698352, "grad_norm": 0.49173009395599365, "learning_rate": 0.0001, "loss": 1.6378, "step": 8504 }, { "epoch": 0.9769685830796623, "grad_norm": 0.528270959854126, "learning_rate": 0.0001, "loss": 1.6054, "step": 8505 }, { "epoch": 0.9770834529894894, "grad_norm": 0.4734746217727661, "learning_rate": 0.0001, "loss": 1.7124, "step": 8506 }, { "epoch": 0.9771983228993165, "grad_norm": 0.44804245233535767, "learning_rate": 0.0001, "loss": 1.4199, "step": 8507 }, { "epoch": 0.9773131928091436, "grad_norm": 0.4997158944606781, "learning_rate": 0.0001, "loss": 1.5853, "step": 8508 }, { "epoch": 0.9774280627189708, "grad_norm": 0.5285566449165344, "learning_rate": 0.0001, "loss": 1.5682, "step": 8509 }, { "epoch": 0.9775429326287979, "grad_norm": 0.5019384026527405, "learning_rate": 0.0001, "loss": 1.7047, "step": 8510 }, { "epoch": 0.977657802538625, "grad_norm": 0.49325329065322876, "learning_rate": 0.0001, "loss": 1.5596, "step": 8511 }, { "epoch": 0.9777726724484521, "grad_norm": 0.439513236284256, "learning_rate": 0.0001, "loss": 1.3171, "step": 8512 }, { "epoch": 0.9778875423582792, "grad_norm": 0.4921404719352722, "learning_rate": 0.0001, "loss": 1.6964, "step": 8513 }, { "epoch": 0.9780024122681064, "grad_norm": 0.48236212134361267, "learning_rate": 0.0001, "loss": 1.3529, "step": 8514 }, { "epoch": 0.9781172821779335, "grad_norm": 0.437320739030838, "learning_rate": 0.0001, "loss": 1.345, "step": 8515 }, { "epoch": 0.9782321520877606, "grad_norm": 0.48497122526168823, "learning_rate": 0.0001, "loss": 1.2823, "step": 8516 }, { "epoch": 0.9783470219975877, "grad_norm": 0.48262646794319153, "learning_rate": 0.0001, "loss": 1.6981, "step": 8517 }, { "epoch": 0.9784618919074148, "grad_norm": 0.48424267768859863, "learning_rate": 0.0001, "loss": 1.5371, "step": 8518 }, { "epoch": 0.978576761817242, "grad_norm": 0.48815423250198364, "learning_rate": 0.0001, "loss": 1.5908, "step": 8519 }, { "epoch": 0.9786916317270691, "grad_norm": 0.48007214069366455, "learning_rate": 0.0001, "loss": 1.696, "step": 8520 }, { "epoch": 0.9788065016368962, "grad_norm": 0.501133918762207, "learning_rate": 0.0001, "loss": 1.6893, "step": 8521 }, { "epoch": 0.9789213715467233, "grad_norm": 0.48664984107017517, "learning_rate": 0.0001, "loss": 1.5407, "step": 8522 }, { "epoch": 0.9790362414565504, "grad_norm": 0.47685080766677856, "learning_rate": 0.0001, "loss": 1.5173, "step": 8523 }, { "epoch": 0.9791511113663776, "grad_norm": 0.458021342754364, "learning_rate": 0.0001, "loss": 1.3507, "step": 8524 }, { "epoch": 0.9792659812762047, "grad_norm": 0.48686715960502625, "learning_rate": 0.0001, "loss": 1.6155, "step": 8525 }, { "epoch": 0.9793808511860318, "grad_norm": 0.4461478590965271, "learning_rate": 0.0001, "loss": 1.4896, "step": 8526 }, { "epoch": 0.9794957210958589, "grad_norm": 0.47489675879478455, "learning_rate": 0.0001, "loss": 1.691, "step": 8527 }, { "epoch": 0.979610591005686, "grad_norm": 0.4845513105392456, "learning_rate": 0.0001, "loss": 1.5867, "step": 8528 }, { "epoch": 0.9797254609155132, "grad_norm": 0.4592495858669281, "learning_rate": 0.0001, "loss": 1.5356, "step": 8529 }, { "epoch": 0.9798403308253403, "grad_norm": 0.4863679111003876, "learning_rate": 0.0001, "loss": 1.5116, "step": 8530 }, { "epoch": 0.9799552007351674, "grad_norm": 0.47153112292289734, "learning_rate": 0.0001, "loss": 1.5476, "step": 8531 }, { "epoch": 0.9800700706449945, "grad_norm": 0.4806155562400818, "learning_rate": 0.0001, "loss": 1.3655, "step": 8532 }, { "epoch": 0.9801849405548216, "grad_norm": 0.5179005265235901, "learning_rate": 0.0001, "loss": 1.6003, "step": 8533 }, { "epoch": 0.9802998104646488, "grad_norm": 0.5735377073287964, "learning_rate": 0.0001, "loss": 1.5655, "step": 8534 }, { "epoch": 0.9804146803744759, "grad_norm": 0.47526344656944275, "learning_rate": 0.0001, "loss": 1.5431, "step": 8535 }, { "epoch": 0.980529550284303, "grad_norm": 0.4913672208786011, "learning_rate": 0.0001, "loss": 1.6334, "step": 8536 }, { "epoch": 0.9806444201941301, "grad_norm": 0.5402122735977173, "learning_rate": 0.0001, "loss": 1.6258, "step": 8537 }, { "epoch": 0.9807592901039572, "grad_norm": 0.4758612811565399, "learning_rate": 0.0001, "loss": 1.6862, "step": 8538 }, { "epoch": 0.9808741600137844, "grad_norm": 0.46299320459365845, "learning_rate": 0.0001, "loss": 1.5098, "step": 8539 }, { "epoch": 0.9809890299236115, "grad_norm": 0.48221555352211, "learning_rate": 0.0001, "loss": 1.5331, "step": 8540 }, { "epoch": 0.9811038998334386, "grad_norm": 0.47534555196762085, "learning_rate": 0.0001, "loss": 1.6351, "step": 8541 }, { "epoch": 0.9812187697432657, "grad_norm": 0.44301724433898926, "learning_rate": 0.0001, "loss": 1.4897, "step": 8542 }, { "epoch": 0.9813336396530928, "grad_norm": 0.5158907771110535, "learning_rate": 0.0001, "loss": 1.6469, "step": 8543 }, { "epoch": 0.98144850956292, "grad_norm": 0.5194290280342102, "learning_rate": 0.0001, "loss": 1.4948, "step": 8544 }, { "epoch": 0.9815633794727471, "grad_norm": 0.5136659741401672, "learning_rate": 0.0001, "loss": 1.6842, "step": 8545 }, { "epoch": 0.9816782493825742, "grad_norm": 0.5167720913887024, "learning_rate": 0.0001, "loss": 1.844, "step": 8546 }, { "epoch": 0.9817931192924013, "grad_norm": 0.45328572392463684, "learning_rate": 0.0001, "loss": 1.513, "step": 8547 }, { "epoch": 0.9819079892022284, "grad_norm": 0.4788568317890167, "learning_rate": 0.0001, "loss": 1.6795, "step": 8548 }, { "epoch": 0.9820228591120556, "grad_norm": 0.4906943440437317, "learning_rate": 0.0001, "loss": 1.5957, "step": 8549 }, { "epoch": 0.9821377290218827, "grad_norm": 0.4383232295513153, "learning_rate": 0.0001, "loss": 1.3974, "step": 8550 }, { "epoch": 0.9822525989317098, "grad_norm": 0.480825275182724, "learning_rate": 0.0001, "loss": 1.5138, "step": 8551 }, { "epoch": 0.9823674688415369, "grad_norm": 0.47409677505493164, "learning_rate": 0.0001, "loss": 1.5389, "step": 8552 }, { "epoch": 0.982482338751364, "grad_norm": 0.4691976010799408, "learning_rate": 0.0001, "loss": 1.4925, "step": 8553 }, { "epoch": 0.9825972086611912, "grad_norm": 0.48811331391334534, "learning_rate": 0.0001, "loss": 1.3919, "step": 8554 }, { "epoch": 0.9827120785710183, "grad_norm": 0.5102879405021667, "learning_rate": 0.0001, "loss": 1.7138, "step": 8555 }, { "epoch": 0.9828269484808454, "grad_norm": 0.4797075092792511, "learning_rate": 0.0001, "loss": 1.4157, "step": 8556 }, { "epoch": 0.9829418183906725, "grad_norm": 0.4744819104671478, "learning_rate": 0.0001, "loss": 1.54, "step": 8557 }, { "epoch": 0.9830566883004996, "grad_norm": 0.5046994090080261, "learning_rate": 0.0001, "loss": 1.5037, "step": 8558 }, { "epoch": 0.9831715582103268, "grad_norm": 0.49901989102363586, "learning_rate": 0.0001, "loss": 1.6441, "step": 8559 }, { "epoch": 0.9832864281201539, "grad_norm": 0.49144455790519714, "learning_rate": 0.0001, "loss": 1.6678, "step": 8560 }, { "epoch": 0.983401298029981, "grad_norm": 0.47116243839263916, "learning_rate": 0.0001, "loss": 1.5554, "step": 8561 }, { "epoch": 0.9835161679398081, "grad_norm": 0.5342057347297668, "learning_rate": 0.0001, "loss": 1.7248, "step": 8562 }, { "epoch": 0.9836310378496352, "grad_norm": 0.4303724467754364, "learning_rate": 0.0001, "loss": 1.3374, "step": 8563 }, { "epoch": 0.9837459077594624, "grad_norm": 0.5251704454421997, "learning_rate": 0.0001, "loss": 1.572, "step": 8564 }, { "epoch": 0.9838607776692895, "grad_norm": 0.4820622503757477, "learning_rate": 0.0001, "loss": 1.531, "step": 8565 }, { "epoch": 0.9839756475791166, "grad_norm": 0.4786358177661896, "learning_rate": 0.0001, "loss": 1.6379, "step": 8566 }, { "epoch": 0.9840905174889437, "grad_norm": 0.508850634098053, "learning_rate": 0.0001, "loss": 1.5797, "step": 8567 }, { "epoch": 0.9842053873987708, "grad_norm": 0.5014883875846863, "learning_rate": 0.0001, "loss": 1.7386, "step": 8568 }, { "epoch": 0.9843202573085981, "grad_norm": 0.48899659514427185, "learning_rate": 0.0001, "loss": 1.5323, "step": 8569 }, { "epoch": 0.9844351272184252, "grad_norm": 0.5107044577598572, "learning_rate": 0.0001, "loss": 1.8143, "step": 8570 }, { "epoch": 0.9845499971282523, "grad_norm": 0.5331456065177917, "learning_rate": 0.0001, "loss": 1.5942, "step": 8571 }, { "epoch": 0.9846648670380794, "grad_norm": 0.46554502844810486, "learning_rate": 0.0001, "loss": 1.4281, "step": 8572 }, { "epoch": 0.9847797369479065, "grad_norm": 0.45582613348960876, "learning_rate": 0.0001, "loss": 1.512, "step": 8573 }, { "epoch": 0.9848946068577337, "grad_norm": 0.5047944188117981, "learning_rate": 0.0001, "loss": 1.6731, "step": 8574 }, { "epoch": 0.9850094767675608, "grad_norm": 0.4860698878765106, "learning_rate": 0.0001, "loss": 1.6575, "step": 8575 }, { "epoch": 0.9851243466773879, "grad_norm": 0.5398543477058411, "learning_rate": 0.0001, "loss": 1.6691, "step": 8576 }, { "epoch": 0.985239216587215, "grad_norm": 0.4853983521461487, "learning_rate": 0.0001, "loss": 1.4137, "step": 8577 }, { "epoch": 0.9853540864970421, "grad_norm": 0.47747766971588135, "learning_rate": 0.0001, "loss": 1.6155, "step": 8578 }, { "epoch": 0.9854689564068693, "grad_norm": 0.5553230047225952, "learning_rate": 0.0001, "loss": 1.9805, "step": 8579 }, { "epoch": 0.9855838263166964, "grad_norm": 0.5310031771659851, "learning_rate": 0.0001, "loss": 1.5902, "step": 8580 }, { "epoch": 0.9856986962265235, "grad_norm": 0.46402546763420105, "learning_rate": 0.0001, "loss": 1.3974, "step": 8581 }, { "epoch": 0.9858135661363506, "grad_norm": 0.4859161674976349, "learning_rate": 0.0001, "loss": 1.7247, "step": 8582 }, { "epoch": 0.9859284360461777, "grad_norm": 0.47045204043388367, "learning_rate": 0.0001, "loss": 1.3744, "step": 8583 }, { "epoch": 0.9860433059560049, "grad_norm": 0.525871992111206, "learning_rate": 0.0001, "loss": 1.5243, "step": 8584 }, { "epoch": 0.986158175865832, "grad_norm": 0.47140932083129883, "learning_rate": 0.0001, "loss": 1.5403, "step": 8585 }, { "epoch": 0.9862730457756591, "grad_norm": 0.4621630311012268, "learning_rate": 0.0001, "loss": 1.4708, "step": 8586 }, { "epoch": 0.9863879156854862, "grad_norm": 0.4546113610267639, "learning_rate": 0.0001, "loss": 1.5826, "step": 8587 }, { "epoch": 0.9865027855953133, "grad_norm": 0.47744020819664, "learning_rate": 0.0001, "loss": 1.5277, "step": 8588 }, { "epoch": 0.9866176555051405, "grad_norm": 0.45344898104667664, "learning_rate": 0.0001, "loss": 1.3902, "step": 8589 }, { "epoch": 0.9867325254149676, "grad_norm": 0.5098491907119751, "learning_rate": 0.0001, "loss": 1.5117, "step": 8590 }, { "epoch": 0.9868473953247947, "grad_norm": 0.5069968700408936, "learning_rate": 0.0001, "loss": 1.4687, "step": 8591 }, { "epoch": 0.9869622652346218, "grad_norm": 0.45141881704330444, "learning_rate": 0.0001, "loss": 1.5025, "step": 8592 }, { "epoch": 0.987077135144449, "grad_norm": 0.49101027846336365, "learning_rate": 0.0001, "loss": 1.5944, "step": 8593 }, { "epoch": 0.9871920050542761, "grad_norm": 0.4652627408504486, "learning_rate": 0.0001, "loss": 1.6014, "step": 8594 }, { "epoch": 0.9873068749641032, "grad_norm": 0.4782879650592804, "learning_rate": 0.0001, "loss": 1.6211, "step": 8595 }, { "epoch": 0.9874217448739303, "grad_norm": 0.4874049425125122, "learning_rate": 0.0001, "loss": 1.5952, "step": 8596 }, { "epoch": 0.9875366147837574, "grad_norm": 0.5173846483230591, "learning_rate": 0.0001, "loss": 1.6132, "step": 8597 }, { "epoch": 0.9876514846935845, "grad_norm": 0.520719051361084, "learning_rate": 0.0001, "loss": 1.6545, "step": 8598 }, { "epoch": 0.9877663546034117, "grad_norm": 0.5008309483528137, "learning_rate": 0.0001, "loss": 1.5412, "step": 8599 }, { "epoch": 0.9878812245132388, "grad_norm": 0.491559237241745, "learning_rate": 0.0001, "loss": 1.5968, "step": 8600 }, { "epoch": 0.9879960944230659, "grad_norm": 0.5189545750617981, "learning_rate": 0.0001, "loss": 1.5997, "step": 8601 }, { "epoch": 0.988110964332893, "grad_norm": 0.4896295964717865, "learning_rate": 0.0001, "loss": 1.4129, "step": 8602 }, { "epoch": 0.9882258342427201, "grad_norm": 0.47650331258773804, "learning_rate": 0.0001, "loss": 1.6492, "step": 8603 }, { "epoch": 0.9883407041525473, "grad_norm": 0.46054142713546753, "learning_rate": 0.0001, "loss": 1.4276, "step": 8604 }, { "epoch": 0.9884555740623744, "grad_norm": 0.5123742818832397, "learning_rate": 0.0001, "loss": 1.6383, "step": 8605 }, { "epoch": 0.9885704439722015, "grad_norm": 0.48375701904296875, "learning_rate": 0.0001, "loss": 1.3923, "step": 8606 }, { "epoch": 0.9886853138820286, "grad_norm": 0.5176060795783997, "learning_rate": 0.0001, "loss": 1.5655, "step": 8607 }, { "epoch": 0.9888001837918557, "grad_norm": 0.5452961921691895, "learning_rate": 0.0001, "loss": 1.7097, "step": 8608 }, { "epoch": 0.9889150537016829, "grad_norm": 0.5161251425743103, "learning_rate": 0.0001, "loss": 1.6645, "step": 8609 }, { "epoch": 0.98902992361151, "grad_norm": 0.47460034489631653, "learning_rate": 0.0001, "loss": 1.6462, "step": 8610 }, { "epoch": 0.9891447935213371, "grad_norm": 0.46245843172073364, "learning_rate": 0.0001, "loss": 1.2765, "step": 8611 }, { "epoch": 0.9892596634311642, "grad_norm": 0.46704110503196716, "learning_rate": 0.0001, "loss": 1.5735, "step": 8612 }, { "epoch": 0.9893745333409913, "grad_norm": 0.585635781288147, "learning_rate": 0.0001, "loss": 1.4845, "step": 8613 }, { "epoch": 0.9894894032508185, "grad_norm": 0.4674919843673706, "learning_rate": 0.0001, "loss": 1.6238, "step": 8614 }, { "epoch": 0.9896042731606456, "grad_norm": 0.4823983609676361, "learning_rate": 0.0001, "loss": 1.5698, "step": 8615 }, { "epoch": 0.9897191430704727, "grad_norm": 0.5025752186775208, "learning_rate": 0.0001, "loss": 1.5845, "step": 8616 }, { "epoch": 0.9898340129802998, "grad_norm": 0.5351320505142212, "learning_rate": 0.0001, "loss": 1.7404, "step": 8617 }, { "epoch": 0.989948882890127, "grad_norm": 0.4923611283302307, "learning_rate": 0.0001, "loss": 1.4485, "step": 8618 }, { "epoch": 0.9900637527999541, "grad_norm": 0.4848472476005554, "learning_rate": 0.0001, "loss": 1.5072, "step": 8619 }, { "epoch": 0.9901786227097812, "grad_norm": 0.5281382203102112, "learning_rate": 0.0001, "loss": 1.6906, "step": 8620 }, { "epoch": 0.9902934926196083, "grad_norm": 0.4649568498134613, "learning_rate": 0.0001, "loss": 1.5553, "step": 8621 }, { "epoch": 0.9904083625294354, "grad_norm": 0.4958634674549103, "learning_rate": 0.0001, "loss": 1.6043, "step": 8622 }, { "epoch": 0.9905232324392625, "grad_norm": 0.501117467880249, "learning_rate": 0.0001, "loss": 1.6405, "step": 8623 }, { "epoch": 0.9906381023490897, "grad_norm": 0.4949086308479309, "learning_rate": 0.0001, "loss": 1.7568, "step": 8624 }, { "epoch": 0.9907529722589168, "grad_norm": 0.46853989362716675, "learning_rate": 0.0001, "loss": 1.4587, "step": 8625 }, { "epoch": 0.9908678421687439, "grad_norm": 0.47322335839271545, "learning_rate": 0.0001, "loss": 1.5358, "step": 8626 }, { "epoch": 0.990982712078571, "grad_norm": 0.5503798127174377, "learning_rate": 0.0001, "loss": 1.7705, "step": 8627 }, { "epoch": 0.9910975819883981, "grad_norm": 0.48998063802719116, "learning_rate": 0.0001, "loss": 1.7953, "step": 8628 }, { "epoch": 0.9912124518982253, "grad_norm": 0.4801502525806427, "learning_rate": 0.0001, "loss": 1.6477, "step": 8629 }, { "epoch": 0.9913273218080524, "grad_norm": 0.5271478295326233, "learning_rate": 0.0001, "loss": 1.618, "step": 8630 }, { "epoch": 0.9914421917178795, "grad_norm": 0.501660943031311, "learning_rate": 0.0001, "loss": 1.6391, "step": 8631 }, { "epoch": 0.9915570616277066, "grad_norm": 0.47359898686408997, "learning_rate": 0.0001, "loss": 1.5764, "step": 8632 }, { "epoch": 0.9916719315375337, "grad_norm": 0.49344560503959656, "learning_rate": 0.0001, "loss": 1.3197, "step": 8633 }, { "epoch": 0.9917868014473609, "grad_norm": 0.4800715446472168, "learning_rate": 0.0001, "loss": 1.5114, "step": 8634 }, { "epoch": 0.991901671357188, "grad_norm": 0.49181854724884033, "learning_rate": 0.0001, "loss": 1.6261, "step": 8635 }, { "epoch": 0.9920165412670151, "grad_norm": 0.4867425560951233, "learning_rate": 0.0001, "loss": 1.5423, "step": 8636 }, { "epoch": 0.9921314111768422, "grad_norm": 0.5553606152534485, "learning_rate": 0.0001, "loss": 1.8589, "step": 8637 }, { "epoch": 0.9922462810866693, "grad_norm": 0.4396592080593109, "learning_rate": 0.0001, "loss": 1.3746, "step": 8638 }, { "epoch": 0.9923611509964965, "grad_norm": 0.5199365019798279, "learning_rate": 0.0001, "loss": 1.7913, "step": 8639 }, { "epoch": 0.9924760209063236, "grad_norm": 0.5626462697982788, "learning_rate": 0.0001, "loss": 1.583, "step": 8640 }, { "epoch": 0.9925908908161507, "grad_norm": 0.4940207600593567, "learning_rate": 0.0001, "loss": 1.3618, "step": 8641 }, { "epoch": 0.9927057607259778, "grad_norm": 0.5407283902168274, "learning_rate": 0.0001, "loss": 1.5611, "step": 8642 }, { "epoch": 0.992820630635805, "grad_norm": 0.5184419751167297, "learning_rate": 0.0001, "loss": 1.7046, "step": 8643 }, { "epoch": 0.9929355005456321, "grad_norm": 0.4674243628978729, "learning_rate": 0.0001, "loss": 1.4962, "step": 8644 }, { "epoch": 0.9930503704554592, "grad_norm": 0.519338846206665, "learning_rate": 0.0001, "loss": 1.731, "step": 8645 }, { "epoch": 0.9931652403652863, "grad_norm": 0.43663740158081055, "learning_rate": 0.0001, "loss": 1.34, "step": 8646 }, { "epoch": 0.9932801102751134, "grad_norm": 0.4806329309940338, "learning_rate": 0.0001, "loss": 1.322, "step": 8647 }, { "epoch": 0.9933949801849405, "grad_norm": 0.509090006351471, "learning_rate": 0.0001, "loss": 1.6648, "step": 8648 }, { "epoch": 0.9935098500947677, "grad_norm": 0.4767511487007141, "learning_rate": 0.0001, "loss": 1.5916, "step": 8649 }, { "epoch": 0.9936247200045948, "grad_norm": 0.4973703920841217, "learning_rate": 0.0001, "loss": 1.6247, "step": 8650 }, { "epoch": 0.9937395899144219, "grad_norm": 0.4734041392803192, "learning_rate": 0.0001, "loss": 1.7253, "step": 8651 }, { "epoch": 0.993854459824249, "grad_norm": 0.44982102513313293, "learning_rate": 0.0001, "loss": 1.5608, "step": 8652 }, { "epoch": 0.9939693297340761, "grad_norm": 0.4307469129562378, "learning_rate": 0.0001, "loss": 1.3253, "step": 8653 }, { "epoch": 0.9940841996439033, "grad_norm": 0.5234149694442749, "learning_rate": 0.0001, "loss": 1.6276, "step": 8654 }, { "epoch": 0.9941990695537304, "grad_norm": 0.4377621114253998, "learning_rate": 0.0001, "loss": 1.1823, "step": 8655 }, { "epoch": 0.9943139394635575, "grad_norm": 0.4438837766647339, "learning_rate": 0.0001, "loss": 1.415, "step": 8656 }, { "epoch": 0.9944288093733846, "grad_norm": 0.4784393012523651, "learning_rate": 0.0001, "loss": 1.499, "step": 8657 }, { "epoch": 0.9945436792832117, "grad_norm": 0.468770831823349, "learning_rate": 0.0001, "loss": 1.5585, "step": 8658 }, { "epoch": 0.9946585491930389, "grad_norm": 0.4884924292564392, "learning_rate": 0.0001, "loss": 1.5302, "step": 8659 }, { "epoch": 0.994773419102866, "grad_norm": 0.5758072733879089, "learning_rate": 0.0001, "loss": 1.6505, "step": 8660 }, { "epoch": 0.9948882890126931, "grad_norm": 0.5178779363632202, "learning_rate": 0.0001, "loss": 1.7349, "step": 8661 }, { "epoch": 0.9950031589225202, "grad_norm": 0.49969327449798584, "learning_rate": 0.0001, "loss": 1.5336, "step": 8662 }, { "epoch": 0.9951180288323473, "grad_norm": 0.4617246985435486, "learning_rate": 0.0001, "loss": 1.467, "step": 8663 }, { "epoch": 0.9952328987421745, "grad_norm": 0.4882105886936188, "learning_rate": 0.0001, "loss": 1.5951, "step": 8664 }, { "epoch": 0.9953477686520016, "grad_norm": 0.47335708141326904, "learning_rate": 0.0001, "loss": 1.6539, "step": 8665 }, { "epoch": 0.9954626385618287, "grad_norm": 0.5231687426567078, "learning_rate": 0.0001, "loss": 1.3034, "step": 8666 }, { "epoch": 0.9955775084716558, "grad_norm": 0.48819267749786377, "learning_rate": 0.0001, "loss": 1.6421, "step": 8667 }, { "epoch": 0.995692378381483, "grad_norm": 0.4395897686481476, "learning_rate": 0.0001, "loss": 1.349, "step": 8668 }, { "epoch": 0.9958072482913101, "grad_norm": 0.44600117206573486, "learning_rate": 0.0001, "loss": 1.2248, "step": 8669 }, { "epoch": 0.9959221182011372, "grad_norm": 0.5204833745956421, "learning_rate": 0.0001, "loss": 1.8804, "step": 8670 }, { "epoch": 0.9960369881109643, "grad_norm": 0.5219905972480774, "learning_rate": 0.0001, "loss": 1.7275, "step": 8671 }, { "epoch": 0.9961518580207914, "grad_norm": 0.5194002389907837, "learning_rate": 0.0001, "loss": 1.7193, "step": 8672 }, { "epoch": 0.9962667279306185, "grad_norm": 0.4883606433868408, "learning_rate": 0.0001, "loss": 1.5022, "step": 8673 }, { "epoch": 0.9963815978404457, "grad_norm": 0.5156763792037964, "learning_rate": 0.0001, "loss": 1.7083, "step": 8674 }, { "epoch": 0.9964964677502728, "grad_norm": 0.4738801121711731, "learning_rate": 0.0001, "loss": 1.4915, "step": 8675 }, { "epoch": 0.9966113376600999, "grad_norm": 0.4632324278354645, "learning_rate": 0.0001, "loss": 1.702, "step": 8676 }, { "epoch": 0.996726207569927, "grad_norm": 0.4966660439968109, "learning_rate": 0.0001, "loss": 1.6701, "step": 8677 }, { "epoch": 0.9968410774797541, "grad_norm": 0.48212072253227234, "learning_rate": 0.0001, "loss": 1.5969, "step": 8678 }, { "epoch": 0.9969559473895813, "grad_norm": 0.498323529958725, "learning_rate": 0.0001, "loss": 1.6029, "step": 8679 }, { "epoch": 0.9970708172994084, "grad_norm": 0.5070053935050964, "learning_rate": 0.0001, "loss": 1.8766, "step": 8680 }, { "epoch": 0.9971856872092355, "grad_norm": 0.4994966387748718, "learning_rate": 0.0001, "loss": 1.6799, "step": 8681 }, { "epoch": 0.9973005571190626, "grad_norm": 0.5102283954620361, "learning_rate": 0.0001, "loss": 1.4916, "step": 8682 }, { "epoch": 0.9974154270288897, "grad_norm": 0.5465260148048401, "learning_rate": 0.0001, "loss": 1.7164, "step": 8683 }, { "epoch": 0.9975302969387169, "grad_norm": 0.46429330110549927, "learning_rate": 0.0001, "loss": 1.6374, "step": 8684 }, { "epoch": 0.997645166848544, "grad_norm": 0.4978303611278534, "learning_rate": 0.0001, "loss": 1.6153, "step": 8685 }, { "epoch": 0.9977600367583711, "grad_norm": 0.4677812457084656, "learning_rate": 0.0001, "loss": 1.5271, "step": 8686 }, { "epoch": 0.9978749066681982, "grad_norm": 0.49235597252845764, "learning_rate": 0.0001, "loss": 1.6004, "step": 8687 }, { "epoch": 0.9979897765780253, "grad_norm": 0.48482832312583923, "learning_rate": 0.0001, "loss": 1.5123, "step": 8688 }, { "epoch": 0.9981046464878525, "grad_norm": 0.5265589356422424, "learning_rate": 0.0001, "loss": 1.7991, "step": 8689 }, { "epoch": 0.9982195163976796, "grad_norm": 0.47693178057670593, "learning_rate": 0.0001, "loss": 1.5514, "step": 8690 }, { "epoch": 0.9983343863075067, "grad_norm": 0.4894190728664398, "learning_rate": 0.0001, "loss": 1.6614, "step": 8691 }, { "epoch": 0.9984492562173338, "grad_norm": 0.46358221769332886, "learning_rate": 0.0001, "loss": 1.5936, "step": 8692 }, { "epoch": 0.998564126127161, "grad_norm": 0.45858120918273926, "learning_rate": 0.0001, "loss": 1.5139, "step": 8693 }, { "epoch": 0.9986789960369881, "grad_norm": 0.45518410205841064, "learning_rate": 0.0001, "loss": 1.5492, "step": 8694 }, { "epoch": 0.9987938659468152, "grad_norm": 0.47189682722091675, "learning_rate": 0.0001, "loss": 1.5014, "step": 8695 }, { "epoch": 0.9989087358566423, "grad_norm": 0.5133928060531616, "learning_rate": 0.0001, "loss": 1.6432, "step": 8696 }, { "epoch": 0.9990236057664694, "grad_norm": 0.5051494240760803, "learning_rate": 0.0001, "loss": 1.6099, "step": 8697 }, { "epoch": 0.9991384756762965, "grad_norm": 0.5052590370178223, "learning_rate": 0.0001, "loss": 1.5629, "step": 8698 }, { "epoch": 0.9992533455861237, "grad_norm": 0.4864301085472107, "learning_rate": 0.0001, "loss": 1.4405, "step": 8699 }, { "epoch": 0.9993682154959508, "grad_norm": 0.49039098620414734, "learning_rate": 0.0001, "loss": 1.6348, "step": 8700 }, { "epoch": 0.9994830854057779, "grad_norm": 0.5003101229667664, "learning_rate": 0.0001, "loss": 1.729, "step": 8701 }, { "epoch": 0.999597955315605, "grad_norm": 0.4698851704597473, "learning_rate": 0.0001, "loss": 1.5738, "step": 8702 }, { "epoch": 0.9997128252254321, "grad_norm": 0.5079613327980042, "learning_rate": 0.0001, "loss": 1.6768, "step": 8703 }, { "epoch": 0.9998276951352593, "grad_norm": 0.5211082100868225, "learning_rate": 0.0001, "loss": 1.536, "step": 8704 }, { "epoch": 0.9999425650450864, "grad_norm": 0.45943862199783325, "learning_rate": 0.0001, "loss": 1.5753, "step": 8705 }, { "epoch": 1.0000574349549136, "grad_norm": 0.45664501190185547, "learning_rate": 0.0001, "loss": 1.5082, "step": 8706 }, { "epoch": 1.0001723048647406, "grad_norm": 0.4673193097114563, "learning_rate": 0.0001, "loss": 1.6225, "step": 8707 }, { "epoch": 1.0002871747745679, "grad_norm": 0.46662116050720215, "learning_rate": 0.0001, "loss": 1.3963, "step": 8708 }, { "epoch": 1.0004020446843949, "grad_norm": 0.543308675289154, "learning_rate": 0.0001, "loss": 1.2983, "step": 8709 }, { "epoch": 1.000516914594222, "grad_norm": 0.49053019285202026, "learning_rate": 0.0001, "loss": 1.417, "step": 8710 }, { "epoch": 1.000631784504049, "grad_norm": 0.504199743270874, "learning_rate": 0.0001, "loss": 1.4342, "step": 8711 }, { "epoch": 1.0007466544138763, "grad_norm": 0.5157631635665894, "learning_rate": 0.0001, "loss": 1.5539, "step": 8712 }, { "epoch": 1.0008615243237033, "grad_norm": 0.5185043215751648, "learning_rate": 0.0001, "loss": 1.401, "step": 8713 }, { "epoch": 1.0009763942335306, "grad_norm": 0.5186070203781128, "learning_rate": 0.0001, "loss": 1.5388, "step": 8714 }, { "epoch": 1.0010912641433576, "grad_norm": 0.5666443705558777, "learning_rate": 0.0001, "loss": 1.579, "step": 8715 }, { "epoch": 1.0012061340531848, "grad_norm": 0.6023586988449097, "learning_rate": 0.0001, "loss": 1.4872, "step": 8716 }, { "epoch": 1.0013210039630118, "grad_norm": 0.504416823387146, "learning_rate": 0.0001, "loss": 1.4496, "step": 8717 }, { "epoch": 1.001435873872839, "grad_norm": 0.563830554485321, "learning_rate": 0.0001, "loss": 1.2972, "step": 8718 }, { "epoch": 1.001550743782666, "grad_norm": 0.5681375861167908, "learning_rate": 0.0001, "loss": 1.5167, "step": 8719 }, { "epoch": 1.0016656136924933, "grad_norm": 0.5679755210876465, "learning_rate": 0.0001, "loss": 1.4425, "step": 8720 }, { "epoch": 1.0017804836023203, "grad_norm": 0.5443891286849976, "learning_rate": 0.0001, "loss": 1.6377, "step": 8721 }, { "epoch": 1.0018953535121475, "grad_norm": 0.5426947474479675, "learning_rate": 0.0001, "loss": 1.6093, "step": 8722 }, { "epoch": 1.0020102234219745, "grad_norm": 0.5179216861724854, "learning_rate": 0.0001, "loss": 1.463, "step": 8723 }, { "epoch": 1.0021250933318018, "grad_norm": 0.5044360160827637, "learning_rate": 0.0001, "loss": 1.4617, "step": 8724 }, { "epoch": 1.0022399632416288, "grad_norm": 0.5553032159805298, "learning_rate": 0.0001, "loss": 1.671, "step": 8725 }, { "epoch": 1.002354833151456, "grad_norm": 0.5114759206771851, "learning_rate": 0.0001, "loss": 1.6313, "step": 8726 }, { "epoch": 1.002469703061283, "grad_norm": 0.5439996719360352, "learning_rate": 0.0001, "loss": 1.54, "step": 8727 }, { "epoch": 1.0025845729711103, "grad_norm": 0.5239347815513611, "learning_rate": 0.0001, "loss": 1.6265, "step": 8728 }, { "epoch": 1.0026994428809373, "grad_norm": 0.49250611662864685, "learning_rate": 0.0001, "loss": 1.5114, "step": 8729 }, { "epoch": 1.0028143127907645, "grad_norm": 0.5004076361656189, "learning_rate": 0.0001, "loss": 1.435, "step": 8730 }, { "epoch": 1.0029291827005915, "grad_norm": 0.519535481929779, "learning_rate": 0.0001, "loss": 1.5809, "step": 8731 }, { "epoch": 1.0030440526104187, "grad_norm": 0.536308228969574, "learning_rate": 0.0001, "loss": 1.5055, "step": 8732 }, { "epoch": 1.0031589225202457, "grad_norm": 0.5679396986961365, "learning_rate": 0.0001, "loss": 1.8589, "step": 8733 }, { "epoch": 1.003273792430073, "grad_norm": 0.5036899447441101, "learning_rate": 0.0001, "loss": 1.4946, "step": 8734 }, { "epoch": 1.0033886623399, "grad_norm": 0.4876912236213684, "learning_rate": 0.0001, "loss": 1.4041, "step": 8735 }, { "epoch": 1.0035035322497272, "grad_norm": 0.4986098110675812, "learning_rate": 0.0001, "loss": 1.5502, "step": 8736 }, { "epoch": 1.0036184021595542, "grad_norm": 0.4937874674797058, "learning_rate": 0.0001, "loss": 1.4384, "step": 8737 }, { "epoch": 1.0037332720693815, "grad_norm": 0.5576295852661133, "learning_rate": 0.0001, "loss": 1.4278, "step": 8738 }, { "epoch": 1.0038481419792085, "grad_norm": 0.5405202507972717, "learning_rate": 0.0001, "loss": 1.6171, "step": 8739 }, { "epoch": 1.0039630118890357, "grad_norm": 0.5165756344795227, "learning_rate": 0.0001, "loss": 1.4411, "step": 8740 }, { "epoch": 1.0040778817988627, "grad_norm": 0.4816116988658905, "learning_rate": 0.0001, "loss": 1.2506, "step": 8741 }, { "epoch": 1.00419275170869, "grad_norm": 0.5027560591697693, "learning_rate": 0.0001, "loss": 1.4022, "step": 8742 }, { "epoch": 1.004307621618517, "grad_norm": 0.5379549264907837, "learning_rate": 0.0001, "loss": 1.6524, "step": 8743 }, { "epoch": 1.0044224915283442, "grad_norm": 0.6135854721069336, "learning_rate": 0.0001, "loss": 1.5376, "step": 8744 }, { "epoch": 1.0045373614381712, "grad_norm": 0.4991096556186676, "learning_rate": 0.0001, "loss": 1.454, "step": 8745 }, { "epoch": 1.0046522313479984, "grad_norm": 0.5324615836143494, "learning_rate": 0.0001, "loss": 1.3607, "step": 8746 }, { "epoch": 1.0047671012578254, "grad_norm": 0.5299088358879089, "learning_rate": 0.0001, "loss": 1.4187, "step": 8747 }, { "epoch": 1.0048819711676527, "grad_norm": 0.4894624650478363, "learning_rate": 0.0001, "loss": 1.5665, "step": 8748 }, { "epoch": 1.0049968410774797, "grad_norm": 0.5205270648002625, "learning_rate": 0.0001, "loss": 1.4387, "step": 8749 }, { "epoch": 1.005111710987307, "grad_norm": 0.4865090250968933, "learning_rate": 0.0001, "loss": 1.478, "step": 8750 }, { "epoch": 1.005226580897134, "grad_norm": 0.5512129664421082, "learning_rate": 0.0001, "loss": 1.5537, "step": 8751 }, { "epoch": 1.0053414508069611, "grad_norm": 0.484320729970932, "learning_rate": 0.0001, "loss": 1.5362, "step": 8752 }, { "epoch": 1.0054563207167881, "grad_norm": 0.5077235698699951, "learning_rate": 0.0001, "loss": 1.5192, "step": 8753 }, { "epoch": 1.0055711906266154, "grad_norm": 0.5155119895935059, "learning_rate": 0.0001, "loss": 1.4048, "step": 8754 }, { "epoch": 1.0056860605364424, "grad_norm": 0.54921954870224, "learning_rate": 0.0001, "loss": 1.459, "step": 8755 }, { "epoch": 1.0058009304462696, "grad_norm": 0.4844646751880646, "learning_rate": 0.0001, "loss": 1.7109, "step": 8756 }, { "epoch": 1.0059158003560966, "grad_norm": 0.47289541363716125, "learning_rate": 0.0001, "loss": 1.3917, "step": 8757 }, { "epoch": 1.0060306702659239, "grad_norm": 0.5090060234069824, "learning_rate": 0.0001, "loss": 1.4632, "step": 8758 }, { "epoch": 1.0061455401757509, "grad_norm": 0.5446418523788452, "learning_rate": 0.0001, "loss": 1.4641, "step": 8759 }, { "epoch": 1.006260410085578, "grad_norm": 0.4822421669960022, "learning_rate": 0.0001, "loss": 1.3481, "step": 8760 }, { "epoch": 1.006375279995405, "grad_norm": 0.5040774345397949, "learning_rate": 0.0001, "loss": 1.5635, "step": 8761 }, { "epoch": 1.0064901499052323, "grad_norm": 0.5044432282447815, "learning_rate": 0.0001, "loss": 1.3254, "step": 8762 }, { "epoch": 1.0066050198150593, "grad_norm": 0.5607876181602478, "learning_rate": 0.0001, "loss": 1.5483, "step": 8763 }, { "epoch": 1.0067198897248866, "grad_norm": 0.5420013666152954, "learning_rate": 0.0001, "loss": 1.5704, "step": 8764 }, { "epoch": 1.0068347596347136, "grad_norm": 0.5551548004150391, "learning_rate": 0.0001, "loss": 1.5416, "step": 8765 }, { "epoch": 1.0069496295445408, "grad_norm": 0.48607149720191956, "learning_rate": 0.0001, "loss": 1.4455, "step": 8766 }, { "epoch": 1.0070644994543678, "grad_norm": 0.5381131172180176, "learning_rate": 0.0001, "loss": 1.6194, "step": 8767 }, { "epoch": 1.007179369364195, "grad_norm": 0.5239553451538086, "learning_rate": 0.0001, "loss": 1.4548, "step": 8768 }, { "epoch": 1.007294239274022, "grad_norm": 0.5344405174255371, "learning_rate": 0.0001, "loss": 1.6047, "step": 8769 }, { "epoch": 1.0074091091838493, "grad_norm": 0.4784035384654999, "learning_rate": 0.0001, "loss": 1.3902, "step": 8770 }, { "epoch": 1.0075239790936763, "grad_norm": 0.49824607372283936, "learning_rate": 0.0001, "loss": 1.4876, "step": 8771 }, { "epoch": 1.0076388490035035, "grad_norm": 0.4996454119682312, "learning_rate": 0.0001, "loss": 1.4361, "step": 8772 }, { "epoch": 1.0077537189133305, "grad_norm": 0.5669626593589783, "learning_rate": 0.0001, "loss": 1.5217, "step": 8773 }, { "epoch": 1.0078685888231578, "grad_norm": 0.5511415600776672, "learning_rate": 0.0001, "loss": 1.4873, "step": 8774 }, { "epoch": 1.0079834587329848, "grad_norm": 0.5880183577537537, "learning_rate": 0.0001, "loss": 1.6994, "step": 8775 }, { "epoch": 1.008098328642812, "grad_norm": 0.47547656297683716, "learning_rate": 0.0001, "loss": 1.3952, "step": 8776 }, { "epoch": 1.0082131985526392, "grad_norm": 0.49340614676475525, "learning_rate": 0.0001, "loss": 1.4364, "step": 8777 }, { "epoch": 1.0083280684624663, "grad_norm": 0.5298573970794678, "learning_rate": 0.0001, "loss": 1.6306, "step": 8778 }, { "epoch": 1.0084429383722935, "grad_norm": 0.5538324117660522, "learning_rate": 0.0001, "loss": 1.6803, "step": 8779 }, { "epoch": 1.0085578082821205, "grad_norm": 0.5182627439498901, "learning_rate": 0.0001, "loss": 1.5341, "step": 8780 }, { "epoch": 1.0086726781919477, "grad_norm": 0.516372561454773, "learning_rate": 0.0001, "loss": 1.6618, "step": 8781 }, { "epoch": 1.0087875481017747, "grad_norm": 0.47818052768707275, "learning_rate": 0.0001, "loss": 1.4983, "step": 8782 }, { "epoch": 1.008902418011602, "grad_norm": 0.5226326584815979, "learning_rate": 0.0001, "loss": 1.3928, "step": 8783 }, { "epoch": 1.009017287921429, "grad_norm": 0.4940192401409149, "learning_rate": 0.0001, "loss": 1.4149, "step": 8784 }, { "epoch": 1.0091321578312562, "grad_norm": 0.5674400329589844, "learning_rate": 0.0001, "loss": 1.5652, "step": 8785 }, { "epoch": 1.0092470277410832, "grad_norm": 0.4928450286388397, "learning_rate": 0.0001, "loss": 1.4894, "step": 8786 }, { "epoch": 1.0093618976509104, "grad_norm": 0.48630955815315247, "learning_rate": 0.0001, "loss": 1.3444, "step": 8787 }, { "epoch": 1.0094767675607375, "grad_norm": 0.4933887720108032, "learning_rate": 0.0001, "loss": 1.414, "step": 8788 }, { "epoch": 1.0095916374705647, "grad_norm": 0.587792158126831, "learning_rate": 0.0001, "loss": 1.3733, "step": 8789 }, { "epoch": 1.0097065073803917, "grad_norm": 0.5260916352272034, "learning_rate": 0.0001, "loss": 1.3751, "step": 8790 }, { "epoch": 1.009821377290219, "grad_norm": 0.55689936876297, "learning_rate": 0.0001, "loss": 1.501, "step": 8791 }, { "epoch": 1.009936247200046, "grad_norm": 0.5287038087844849, "learning_rate": 0.0001, "loss": 1.3097, "step": 8792 }, { "epoch": 1.0100511171098732, "grad_norm": 0.521734356880188, "learning_rate": 0.0001, "loss": 1.5742, "step": 8793 }, { "epoch": 1.0101659870197002, "grad_norm": 0.5244892239570618, "learning_rate": 0.0001, "loss": 1.3727, "step": 8794 }, { "epoch": 1.0102808569295274, "grad_norm": 0.519815981388092, "learning_rate": 0.0001, "loss": 1.3018, "step": 8795 }, { "epoch": 1.0103957268393544, "grad_norm": 0.5823325514793396, "learning_rate": 0.0001, "loss": 1.5463, "step": 8796 }, { "epoch": 1.0105105967491816, "grad_norm": 0.51022869348526, "learning_rate": 0.0001, "loss": 1.2856, "step": 8797 }, { "epoch": 1.0106254666590087, "grad_norm": 0.530922532081604, "learning_rate": 0.0001, "loss": 1.7368, "step": 8798 }, { "epoch": 1.0107403365688359, "grad_norm": 0.48666539788246155, "learning_rate": 0.0001, "loss": 1.3965, "step": 8799 }, { "epoch": 1.010855206478663, "grad_norm": 0.5583677291870117, "learning_rate": 0.0001, "loss": 1.499, "step": 8800 }, { "epoch": 1.0109700763884901, "grad_norm": 0.5303579568862915, "learning_rate": 0.0001, "loss": 1.5556, "step": 8801 }, { "epoch": 1.0110849462983171, "grad_norm": 0.5240969657897949, "learning_rate": 0.0001, "loss": 1.6731, "step": 8802 }, { "epoch": 1.0111998162081444, "grad_norm": 0.5126948356628418, "learning_rate": 0.0001, "loss": 1.4357, "step": 8803 }, { "epoch": 1.0113146861179714, "grad_norm": 0.5634292960166931, "learning_rate": 0.0001, "loss": 1.6382, "step": 8804 }, { "epoch": 1.0114295560277986, "grad_norm": 0.5226359963417053, "learning_rate": 0.0001, "loss": 1.6549, "step": 8805 }, { "epoch": 1.0115444259376256, "grad_norm": 0.4996303617954254, "learning_rate": 0.0001, "loss": 1.3072, "step": 8806 }, { "epoch": 1.0116592958474528, "grad_norm": 0.49595144391059875, "learning_rate": 0.0001, "loss": 1.5657, "step": 8807 }, { "epoch": 1.0117741657572799, "grad_norm": 0.5020740628242493, "learning_rate": 0.0001, "loss": 1.3781, "step": 8808 }, { "epoch": 1.011889035667107, "grad_norm": 0.49597975611686707, "learning_rate": 0.0001, "loss": 1.4879, "step": 8809 }, { "epoch": 1.012003905576934, "grad_norm": 0.5139532685279846, "learning_rate": 0.0001, "loss": 1.5396, "step": 8810 }, { "epoch": 1.0121187754867613, "grad_norm": 0.4895060658454895, "learning_rate": 0.0001, "loss": 1.3705, "step": 8811 }, { "epoch": 1.0122336453965883, "grad_norm": 0.47956687211990356, "learning_rate": 0.0001, "loss": 1.3134, "step": 8812 }, { "epoch": 1.0123485153064156, "grad_norm": 0.4845159649848938, "learning_rate": 0.0001, "loss": 1.3541, "step": 8813 }, { "epoch": 1.0124633852162426, "grad_norm": 0.5296355485916138, "learning_rate": 0.0001, "loss": 1.5696, "step": 8814 }, { "epoch": 1.0125782551260698, "grad_norm": 0.5246522426605225, "learning_rate": 0.0001, "loss": 1.6872, "step": 8815 }, { "epoch": 1.0126931250358968, "grad_norm": 0.5685099959373474, "learning_rate": 0.0001, "loss": 1.4409, "step": 8816 }, { "epoch": 1.012807994945724, "grad_norm": 0.47896215319633484, "learning_rate": 0.0001, "loss": 1.4345, "step": 8817 }, { "epoch": 1.012922864855551, "grad_norm": 0.487551748752594, "learning_rate": 0.0001, "loss": 1.1566, "step": 8818 }, { "epoch": 1.0130377347653783, "grad_norm": 0.5409836173057556, "learning_rate": 0.0001, "loss": 1.6797, "step": 8819 }, { "epoch": 1.0131526046752053, "grad_norm": 0.5281198620796204, "learning_rate": 0.0001, "loss": 1.5419, "step": 8820 }, { "epoch": 1.0132674745850325, "grad_norm": 0.5546361804008484, "learning_rate": 0.0001, "loss": 1.5202, "step": 8821 }, { "epoch": 1.0133823444948595, "grad_norm": 0.5375561118125916, "learning_rate": 0.0001, "loss": 1.4132, "step": 8822 }, { "epoch": 1.0134972144046868, "grad_norm": 0.5230275988578796, "learning_rate": 0.0001, "loss": 1.242, "step": 8823 }, { "epoch": 1.0136120843145138, "grad_norm": 0.5615382194519043, "learning_rate": 0.0001, "loss": 1.585, "step": 8824 }, { "epoch": 1.013726954224341, "grad_norm": 0.49817347526550293, "learning_rate": 0.0001, "loss": 1.2932, "step": 8825 }, { "epoch": 1.013841824134168, "grad_norm": 0.5396572947502136, "learning_rate": 0.0001, "loss": 1.5191, "step": 8826 }, { "epoch": 1.0139566940439952, "grad_norm": 0.5678941607475281, "learning_rate": 0.0001, "loss": 1.548, "step": 8827 }, { "epoch": 1.0140715639538223, "grad_norm": 0.5782578587532043, "learning_rate": 0.0001, "loss": 1.6673, "step": 8828 }, { "epoch": 1.0141864338636495, "grad_norm": 0.502582311630249, "learning_rate": 0.0001, "loss": 1.614, "step": 8829 }, { "epoch": 1.0143013037734765, "grad_norm": 0.6554863452911377, "learning_rate": 0.0001, "loss": 1.4321, "step": 8830 }, { "epoch": 1.0144161736833037, "grad_norm": 0.5245055556297302, "learning_rate": 0.0001, "loss": 1.5352, "step": 8831 }, { "epoch": 1.0145310435931307, "grad_norm": 0.4950437843799591, "learning_rate": 0.0001, "loss": 1.404, "step": 8832 }, { "epoch": 1.014645913502958, "grad_norm": 0.48556363582611084, "learning_rate": 0.0001, "loss": 1.3682, "step": 8833 }, { "epoch": 1.014760783412785, "grad_norm": 0.5288364291191101, "learning_rate": 0.0001, "loss": 1.3036, "step": 8834 }, { "epoch": 1.0148756533226122, "grad_norm": 0.5161809325218201, "learning_rate": 0.0001, "loss": 1.4239, "step": 8835 }, { "epoch": 1.0149905232324392, "grad_norm": 0.6864306330680847, "learning_rate": 0.0001, "loss": 1.4739, "step": 8836 }, { "epoch": 1.0151053931422664, "grad_norm": 0.5075218081474304, "learning_rate": 0.0001, "loss": 1.4884, "step": 8837 }, { "epoch": 1.0152202630520935, "grad_norm": 0.5431599617004395, "learning_rate": 0.0001, "loss": 1.522, "step": 8838 }, { "epoch": 1.0153351329619207, "grad_norm": 0.49773699045181274, "learning_rate": 0.0001, "loss": 1.4085, "step": 8839 }, { "epoch": 1.0154500028717477, "grad_norm": 0.5445903539657593, "learning_rate": 0.0001, "loss": 1.6883, "step": 8840 }, { "epoch": 1.015564872781575, "grad_norm": 0.5013106465339661, "learning_rate": 0.0001, "loss": 1.489, "step": 8841 }, { "epoch": 1.015679742691402, "grad_norm": 0.48422935605049133, "learning_rate": 0.0001, "loss": 1.4696, "step": 8842 }, { "epoch": 1.0157946126012292, "grad_norm": 0.507689893245697, "learning_rate": 0.0001, "loss": 1.5015, "step": 8843 }, { "epoch": 1.0159094825110562, "grad_norm": 0.48591098189353943, "learning_rate": 0.0001, "loss": 1.5294, "step": 8844 }, { "epoch": 1.0160243524208834, "grad_norm": 0.5136837363243103, "learning_rate": 0.0001, "loss": 1.3958, "step": 8845 }, { "epoch": 1.0161392223307104, "grad_norm": 0.5529723167419434, "learning_rate": 0.0001, "loss": 1.6141, "step": 8846 }, { "epoch": 1.0162540922405376, "grad_norm": 0.5329626202583313, "learning_rate": 0.0001, "loss": 1.5841, "step": 8847 }, { "epoch": 1.0163689621503647, "grad_norm": 0.5492246747016907, "learning_rate": 0.0001, "loss": 1.322, "step": 8848 }, { "epoch": 1.0164838320601919, "grad_norm": 0.5101956725120544, "learning_rate": 0.0001, "loss": 1.4037, "step": 8849 }, { "epoch": 1.016598701970019, "grad_norm": 0.5277436971664429, "learning_rate": 0.0001, "loss": 1.5163, "step": 8850 }, { "epoch": 1.0167135718798461, "grad_norm": 0.5436544418334961, "learning_rate": 0.0001, "loss": 1.4159, "step": 8851 }, { "epoch": 1.0168284417896731, "grad_norm": 0.5090106725692749, "learning_rate": 0.0001, "loss": 1.4402, "step": 8852 }, { "epoch": 1.0169433116995004, "grad_norm": 0.5208089351654053, "learning_rate": 0.0001, "loss": 1.3969, "step": 8853 }, { "epoch": 1.0170581816093274, "grad_norm": 0.506205141544342, "learning_rate": 0.0001, "loss": 1.2677, "step": 8854 }, { "epoch": 1.0171730515191546, "grad_norm": 0.4834211468696594, "learning_rate": 0.0001, "loss": 1.4207, "step": 8855 }, { "epoch": 1.0172879214289816, "grad_norm": 0.4916054606437683, "learning_rate": 0.0001, "loss": 1.6002, "step": 8856 }, { "epoch": 1.0174027913388088, "grad_norm": 0.5485023856163025, "learning_rate": 0.0001, "loss": 1.4735, "step": 8857 }, { "epoch": 1.0175176612486359, "grad_norm": 0.5115214586257935, "learning_rate": 0.0001, "loss": 1.4413, "step": 8858 }, { "epoch": 1.017632531158463, "grad_norm": 0.526879608631134, "learning_rate": 0.0001, "loss": 1.5169, "step": 8859 }, { "epoch": 1.01774740106829, "grad_norm": 0.547588050365448, "learning_rate": 0.0001, "loss": 1.5799, "step": 8860 }, { "epoch": 1.0178622709781173, "grad_norm": 0.4805614948272705, "learning_rate": 0.0001, "loss": 1.2985, "step": 8861 }, { "epoch": 1.0179771408879443, "grad_norm": 0.5069999098777771, "learning_rate": 0.0001, "loss": 1.4824, "step": 8862 }, { "epoch": 1.0180920107977716, "grad_norm": 0.4949933588504791, "learning_rate": 0.0001, "loss": 1.483, "step": 8863 }, { "epoch": 1.0182068807075986, "grad_norm": 0.4933357238769531, "learning_rate": 0.0001, "loss": 1.4847, "step": 8864 }, { "epoch": 1.0183217506174258, "grad_norm": 0.5064468383789062, "learning_rate": 0.0001, "loss": 1.5914, "step": 8865 }, { "epoch": 1.0184366205272528, "grad_norm": 0.5541262030601501, "learning_rate": 0.0001, "loss": 1.5795, "step": 8866 }, { "epoch": 1.01855149043708, "grad_norm": 0.5688692331314087, "learning_rate": 0.0001, "loss": 1.565, "step": 8867 }, { "epoch": 1.018666360346907, "grad_norm": 0.5120310187339783, "learning_rate": 0.0001, "loss": 1.4911, "step": 8868 }, { "epoch": 1.0187812302567343, "grad_norm": 0.5274046063423157, "learning_rate": 0.0001, "loss": 1.6001, "step": 8869 }, { "epoch": 1.0188961001665613, "grad_norm": 0.5022189021110535, "learning_rate": 0.0001, "loss": 1.4302, "step": 8870 }, { "epoch": 1.0190109700763885, "grad_norm": 0.5581625699996948, "learning_rate": 0.0001, "loss": 1.6717, "step": 8871 }, { "epoch": 1.0191258399862155, "grad_norm": 0.535330593585968, "learning_rate": 0.0001, "loss": 1.4295, "step": 8872 }, { "epoch": 1.0192407098960428, "grad_norm": 0.4544619023799896, "learning_rate": 0.0001, "loss": 1.3384, "step": 8873 }, { "epoch": 1.0193555798058698, "grad_norm": 0.6101300716400146, "learning_rate": 0.0001, "loss": 1.3963, "step": 8874 }, { "epoch": 1.019470449715697, "grad_norm": 0.5120131969451904, "learning_rate": 0.0001, "loss": 1.5754, "step": 8875 }, { "epoch": 1.019585319625524, "grad_norm": 0.49319207668304443, "learning_rate": 0.0001, "loss": 1.5138, "step": 8876 }, { "epoch": 1.0197001895353512, "grad_norm": 0.5753055810928345, "learning_rate": 0.0001, "loss": 1.5648, "step": 8877 }, { "epoch": 1.0198150594451783, "grad_norm": 0.5088034868240356, "learning_rate": 0.0001, "loss": 1.3886, "step": 8878 }, { "epoch": 1.0199299293550055, "grad_norm": 0.5577898621559143, "learning_rate": 0.0001, "loss": 1.4367, "step": 8879 }, { "epoch": 1.0200447992648325, "grad_norm": 0.5172377228736877, "learning_rate": 0.0001, "loss": 1.5446, "step": 8880 }, { "epoch": 1.0201596691746597, "grad_norm": 0.49840047955513, "learning_rate": 0.0001, "loss": 1.3737, "step": 8881 }, { "epoch": 1.0202745390844867, "grad_norm": 0.5156157612800598, "learning_rate": 0.0001, "loss": 1.4591, "step": 8882 }, { "epoch": 1.020389408994314, "grad_norm": 0.4850468337535858, "learning_rate": 0.0001, "loss": 1.4474, "step": 8883 }, { "epoch": 1.020504278904141, "grad_norm": 0.5439092516899109, "learning_rate": 0.0001, "loss": 1.3706, "step": 8884 }, { "epoch": 1.0206191488139682, "grad_norm": 0.5357840657234192, "learning_rate": 0.0001, "loss": 1.5991, "step": 8885 }, { "epoch": 1.0207340187237952, "grad_norm": 0.5319818258285522, "learning_rate": 0.0001, "loss": 1.5083, "step": 8886 }, { "epoch": 1.0208488886336224, "grad_norm": 0.52201247215271, "learning_rate": 0.0001, "loss": 1.6158, "step": 8887 }, { "epoch": 1.0209637585434495, "grad_norm": 0.4789310097694397, "learning_rate": 0.0001, "loss": 1.4419, "step": 8888 }, { "epoch": 1.0210786284532767, "grad_norm": 0.5354008078575134, "learning_rate": 0.0001, "loss": 1.6706, "step": 8889 }, { "epoch": 1.0211934983631037, "grad_norm": 0.5723029971122742, "learning_rate": 0.0001, "loss": 1.5574, "step": 8890 }, { "epoch": 1.021308368272931, "grad_norm": 0.4998638927936554, "learning_rate": 0.0001, "loss": 1.5325, "step": 8891 }, { "epoch": 1.021423238182758, "grad_norm": 0.5805894732475281, "learning_rate": 0.0001, "loss": 1.7066, "step": 8892 }, { "epoch": 1.0215381080925852, "grad_norm": 0.6122562885284424, "learning_rate": 0.0001, "loss": 1.5378, "step": 8893 }, { "epoch": 1.0216529780024122, "grad_norm": 0.5063309073448181, "learning_rate": 0.0001, "loss": 1.4165, "step": 8894 }, { "epoch": 1.0217678479122394, "grad_norm": 0.5316261053085327, "learning_rate": 0.0001, "loss": 1.4106, "step": 8895 }, { "epoch": 1.0218827178220664, "grad_norm": 0.5456799268722534, "learning_rate": 0.0001, "loss": 1.3952, "step": 8896 }, { "epoch": 1.0219975877318936, "grad_norm": 0.5277631878852844, "learning_rate": 0.0001, "loss": 1.502, "step": 8897 }, { "epoch": 1.0221124576417207, "grad_norm": 0.5488672852516174, "learning_rate": 0.0001, "loss": 1.4201, "step": 8898 }, { "epoch": 1.0222273275515479, "grad_norm": 0.5567140579223633, "learning_rate": 0.0001, "loss": 1.5074, "step": 8899 }, { "epoch": 1.022342197461375, "grad_norm": 0.5117017030715942, "learning_rate": 0.0001, "loss": 1.5041, "step": 8900 }, { "epoch": 1.0224570673712021, "grad_norm": 0.569999098777771, "learning_rate": 0.0001, "loss": 1.4689, "step": 8901 }, { "epoch": 1.0225719372810291, "grad_norm": 0.5409071445465088, "learning_rate": 0.0001, "loss": 1.3848, "step": 8902 }, { "epoch": 1.0226868071908564, "grad_norm": 0.5431987047195435, "learning_rate": 0.0001, "loss": 1.6376, "step": 8903 }, { "epoch": 1.0228016771006834, "grad_norm": 0.4773072302341461, "learning_rate": 0.0001, "loss": 1.3969, "step": 8904 }, { "epoch": 1.0229165470105106, "grad_norm": 0.48939380049705505, "learning_rate": 0.0001, "loss": 1.2671, "step": 8905 }, { "epoch": 1.0230314169203376, "grad_norm": 0.5301325917243958, "learning_rate": 0.0001, "loss": 1.6652, "step": 8906 }, { "epoch": 1.0231462868301648, "grad_norm": 0.5332849621772766, "learning_rate": 0.0001, "loss": 1.4009, "step": 8907 }, { "epoch": 1.0232611567399919, "grad_norm": 0.5592193603515625, "learning_rate": 0.0001, "loss": 1.4986, "step": 8908 }, { "epoch": 1.023376026649819, "grad_norm": 0.5041161179542542, "learning_rate": 0.0001, "loss": 1.3943, "step": 8909 }, { "epoch": 1.0234908965596463, "grad_norm": 0.5014781951904297, "learning_rate": 0.0001, "loss": 1.424, "step": 8910 }, { "epoch": 1.0236057664694733, "grad_norm": 0.49788087606430054, "learning_rate": 0.0001, "loss": 1.442, "step": 8911 }, { "epoch": 1.0237206363793003, "grad_norm": 0.5221337080001831, "learning_rate": 0.0001, "loss": 1.3539, "step": 8912 }, { "epoch": 1.0238355062891276, "grad_norm": 0.5878422856330872, "learning_rate": 0.0001, "loss": 1.5483, "step": 8913 }, { "epoch": 1.0239503761989548, "grad_norm": 0.5616274476051331, "learning_rate": 0.0001, "loss": 1.4821, "step": 8914 }, { "epoch": 1.0240652461087818, "grad_norm": 0.4898712635040283, "learning_rate": 0.0001, "loss": 1.4507, "step": 8915 }, { "epoch": 1.024180116018609, "grad_norm": 0.5116493105888367, "learning_rate": 0.0001, "loss": 1.4732, "step": 8916 }, { "epoch": 1.024294985928436, "grad_norm": 0.5652375221252441, "learning_rate": 0.0001, "loss": 1.7441, "step": 8917 }, { "epoch": 1.0244098558382633, "grad_norm": 0.525242805480957, "learning_rate": 0.0001, "loss": 1.3921, "step": 8918 }, { "epoch": 1.0245247257480903, "grad_norm": 0.5136474370956421, "learning_rate": 0.0001, "loss": 1.4856, "step": 8919 }, { "epoch": 1.0246395956579175, "grad_norm": 0.5519577860832214, "learning_rate": 0.0001, "loss": 1.4674, "step": 8920 }, { "epoch": 1.0247544655677445, "grad_norm": 0.5397735834121704, "learning_rate": 0.0001, "loss": 1.4107, "step": 8921 }, { "epoch": 1.0248693354775718, "grad_norm": 0.530933141708374, "learning_rate": 0.0001, "loss": 1.399, "step": 8922 }, { "epoch": 1.0249842053873988, "grad_norm": 0.5000669956207275, "learning_rate": 0.0001, "loss": 1.5068, "step": 8923 }, { "epoch": 1.025099075297226, "grad_norm": 0.5061250329017639, "learning_rate": 0.0001, "loss": 1.2578, "step": 8924 }, { "epoch": 1.025213945207053, "grad_norm": 0.5225130915641785, "learning_rate": 0.0001, "loss": 1.5042, "step": 8925 }, { "epoch": 1.0253288151168802, "grad_norm": 0.567387044429779, "learning_rate": 0.0001, "loss": 1.5323, "step": 8926 }, { "epoch": 1.0254436850267072, "grad_norm": 0.6644077897071838, "learning_rate": 0.0001, "loss": 1.3388, "step": 8927 }, { "epoch": 1.0255585549365345, "grad_norm": 0.5568031668663025, "learning_rate": 0.0001, "loss": 1.5316, "step": 8928 }, { "epoch": 1.0256734248463615, "grad_norm": 0.45952072739601135, "learning_rate": 0.0001, "loss": 1.2552, "step": 8929 }, { "epoch": 1.0257882947561887, "grad_norm": 0.4860810935497284, "learning_rate": 0.0001, "loss": 1.2508, "step": 8930 }, { "epoch": 1.0259031646660157, "grad_norm": 0.5749561190605164, "learning_rate": 0.0001, "loss": 1.4491, "step": 8931 }, { "epoch": 1.026018034575843, "grad_norm": 0.5022095441818237, "learning_rate": 0.0001, "loss": 1.2589, "step": 8932 }, { "epoch": 1.02613290448567, "grad_norm": 0.4758133888244629, "learning_rate": 0.0001, "loss": 1.3553, "step": 8933 }, { "epoch": 1.0262477743954972, "grad_norm": 0.5137754678726196, "learning_rate": 0.0001, "loss": 1.4829, "step": 8934 }, { "epoch": 1.0263626443053242, "grad_norm": 0.517978847026825, "learning_rate": 0.0001, "loss": 1.2764, "step": 8935 }, { "epoch": 1.0264775142151514, "grad_norm": 0.527491569519043, "learning_rate": 0.0001, "loss": 1.4839, "step": 8936 }, { "epoch": 1.0265923841249784, "grad_norm": 0.5537298321723938, "learning_rate": 0.0001, "loss": 1.4757, "step": 8937 }, { "epoch": 1.0267072540348057, "grad_norm": 0.49527880549430847, "learning_rate": 0.0001, "loss": 1.4042, "step": 8938 }, { "epoch": 1.0268221239446327, "grad_norm": 0.5914473533630371, "learning_rate": 0.0001, "loss": 1.5999, "step": 8939 }, { "epoch": 1.02693699385446, "grad_norm": 0.5270557403564453, "learning_rate": 0.0001, "loss": 1.5776, "step": 8940 }, { "epoch": 1.027051863764287, "grad_norm": 0.4984856843948364, "learning_rate": 0.0001, "loss": 1.4058, "step": 8941 }, { "epoch": 1.0271667336741142, "grad_norm": 0.5142775177955627, "learning_rate": 0.0001, "loss": 1.4986, "step": 8942 }, { "epoch": 1.0272816035839412, "grad_norm": 0.5666712522506714, "learning_rate": 0.0001, "loss": 1.5856, "step": 8943 }, { "epoch": 1.0273964734937684, "grad_norm": 0.515430212020874, "learning_rate": 0.0001, "loss": 1.5156, "step": 8944 }, { "epoch": 1.0275113434035954, "grad_norm": 0.4515966475009918, "learning_rate": 0.0001, "loss": 1.0998, "step": 8945 }, { "epoch": 1.0276262133134226, "grad_norm": 0.5609683394432068, "learning_rate": 0.0001, "loss": 1.5127, "step": 8946 }, { "epoch": 1.0277410832232496, "grad_norm": 0.616452157497406, "learning_rate": 0.0001, "loss": 1.4311, "step": 8947 }, { "epoch": 1.0278559531330769, "grad_norm": 0.5427160859107971, "learning_rate": 0.0001, "loss": 1.6402, "step": 8948 }, { "epoch": 1.0279708230429039, "grad_norm": 0.5293609499931335, "learning_rate": 0.0001, "loss": 1.3271, "step": 8949 }, { "epoch": 1.0280856929527311, "grad_norm": 0.5510618686676025, "learning_rate": 0.0001, "loss": 1.4305, "step": 8950 }, { "epoch": 1.0282005628625581, "grad_norm": 0.5403707027435303, "learning_rate": 0.0001, "loss": 1.5898, "step": 8951 }, { "epoch": 1.0283154327723854, "grad_norm": 0.5419697761535645, "learning_rate": 0.0001, "loss": 1.6068, "step": 8952 }, { "epoch": 1.0284303026822124, "grad_norm": 0.5279735326766968, "learning_rate": 0.0001, "loss": 1.3857, "step": 8953 }, { "epoch": 1.0285451725920396, "grad_norm": 0.48474159836769104, "learning_rate": 0.0001, "loss": 1.4308, "step": 8954 }, { "epoch": 1.0286600425018666, "grad_norm": 0.5220118761062622, "learning_rate": 0.0001, "loss": 1.6229, "step": 8955 }, { "epoch": 1.0287749124116938, "grad_norm": 0.5131214261054993, "learning_rate": 0.0001, "loss": 1.449, "step": 8956 }, { "epoch": 1.0288897823215208, "grad_norm": 0.5555469989776611, "learning_rate": 0.0001, "loss": 1.4271, "step": 8957 }, { "epoch": 1.029004652231348, "grad_norm": 0.5716768503189087, "learning_rate": 0.0001, "loss": 1.7778, "step": 8958 }, { "epoch": 1.029119522141175, "grad_norm": 0.5097254514694214, "learning_rate": 0.0001, "loss": 1.5314, "step": 8959 }, { "epoch": 1.0292343920510023, "grad_norm": 0.5094704031944275, "learning_rate": 0.0001, "loss": 1.5173, "step": 8960 }, { "epoch": 1.0293492619608293, "grad_norm": 0.49823570251464844, "learning_rate": 0.0001, "loss": 1.4139, "step": 8961 }, { "epoch": 1.0294641318706566, "grad_norm": 0.5155112147331238, "learning_rate": 0.0001, "loss": 1.4046, "step": 8962 }, { "epoch": 1.0295790017804836, "grad_norm": 0.4962422549724579, "learning_rate": 0.0001, "loss": 1.5553, "step": 8963 }, { "epoch": 1.0296938716903108, "grad_norm": 0.530081570148468, "learning_rate": 0.0001, "loss": 1.5269, "step": 8964 }, { "epoch": 1.0298087416001378, "grad_norm": 0.5400624871253967, "learning_rate": 0.0001, "loss": 1.5078, "step": 8965 }, { "epoch": 1.029923611509965, "grad_norm": 0.57288658618927, "learning_rate": 0.0001, "loss": 1.2249, "step": 8966 }, { "epoch": 1.030038481419792, "grad_norm": 0.5239868760108948, "learning_rate": 0.0001, "loss": 1.3921, "step": 8967 }, { "epoch": 1.0301533513296193, "grad_norm": 0.5611276030540466, "learning_rate": 0.0001, "loss": 1.5666, "step": 8968 }, { "epoch": 1.0302682212394463, "grad_norm": 0.5283910036087036, "learning_rate": 0.0001, "loss": 1.5352, "step": 8969 }, { "epoch": 1.0303830911492735, "grad_norm": 0.5439105033874512, "learning_rate": 0.0001, "loss": 1.4536, "step": 8970 }, { "epoch": 1.0304979610591005, "grad_norm": 0.5273348093032837, "learning_rate": 0.0001, "loss": 1.683, "step": 8971 }, { "epoch": 1.0306128309689278, "grad_norm": 0.5008038282394409, "learning_rate": 0.0001, "loss": 1.4605, "step": 8972 }, { "epoch": 1.0307277008787548, "grad_norm": 0.5743789672851562, "learning_rate": 0.0001, "loss": 1.4544, "step": 8973 }, { "epoch": 1.030842570788582, "grad_norm": 0.5148991346359253, "learning_rate": 0.0001, "loss": 1.3179, "step": 8974 }, { "epoch": 1.030957440698409, "grad_norm": 0.5285124182701111, "learning_rate": 0.0001, "loss": 1.5723, "step": 8975 }, { "epoch": 1.0310723106082362, "grad_norm": 0.5112460851669312, "learning_rate": 0.0001, "loss": 1.486, "step": 8976 }, { "epoch": 1.0311871805180632, "grad_norm": 0.5338481068611145, "learning_rate": 0.0001, "loss": 1.3635, "step": 8977 }, { "epoch": 1.0313020504278905, "grad_norm": 0.5114549994468689, "learning_rate": 0.0001, "loss": 1.1468, "step": 8978 }, { "epoch": 1.0314169203377175, "grad_norm": 0.5220407247543335, "learning_rate": 0.0001, "loss": 1.5003, "step": 8979 }, { "epoch": 1.0315317902475447, "grad_norm": 0.47584792971611023, "learning_rate": 0.0001, "loss": 1.4724, "step": 8980 }, { "epoch": 1.0316466601573717, "grad_norm": 0.5218107104301453, "learning_rate": 0.0001, "loss": 1.4703, "step": 8981 }, { "epoch": 1.031761530067199, "grad_norm": 0.5085425972938538, "learning_rate": 0.0001, "loss": 1.4469, "step": 8982 }, { "epoch": 1.031876399977026, "grad_norm": 0.5143735408782959, "learning_rate": 0.0001, "loss": 1.2407, "step": 8983 }, { "epoch": 1.0319912698868532, "grad_norm": 0.47457659244537354, "learning_rate": 0.0001, "loss": 1.3498, "step": 8984 }, { "epoch": 1.0321061397966802, "grad_norm": 0.5300225615501404, "learning_rate": 0.0001, "loss": 1.4482, "step": 8985 }, { "epoch": 1.0322210097065074, "grad_norm": 0.5350293517112732, "learning_rate": 0.0001, "loss": 1.4815, "step": 8986 }, { "epoch": 1.0323358796163344, "grad_norm": 0.5425633788108826, "learning_rate": 0.0001, "loss": 1.674, "step": 8987 }, { "epoch": 1.0324507495261617, "grad_norm": 0.5491638779640198, "learning_rate": 0.0001, "loss": 1.5377, "step": 8988 }, { "epoch": 1.0325656194359887, "grad_norm": 0.47783395648002625, "learning_rate": 0.0001, "loss": 1.3218, "step": 8989 }, { "epoch": 1.032680489345816, "grad_norm": 0.57433021068573, "learning_rate": 0.0001, "loss": 1.5094, "step": 8990 }, { "epoch": 1.032795359255643, "grad_norm": 0.5770861506462097, "learning_rate": 0.0001, "loss": 1.568, "step": 8991 }, { "epoch": 1.0329102291654702, "grad_norm": 0.5487734079360962, "learning_rate": 0.0001, "loss": 1.6067, "step": 8992 }, { "epoch": 1.0330250990752972, "grad_norm": 0.5157055854797363, "learning_rate": 0.0001, "loss": 1.4258, "step": 8993 }, { "epoch": 1.0331399689851244, "grad_norm": 0.5037803649902344, "learning_rate": 0.0001, "loss": 1.3896, "step": 8994 }, { "epoch": 1.0332548388949514, "grad_norm": 0.5406895875930786, "learning_rate": 0.0001, "loss": 1.5333, "step": 8995 }, { "epoch": 1.0333697088047786, "grad_norm": 0.5066888332366943, "learning_rate": 0.0001, "loss": 1.5356, "step": 8996 }, { "epoch": 1.0334845787146056, "grad_norm": 0.48853886127471924, "learning_rate": 0.0001, "loss": 1.5338, "step": 8997 }, { "epoch": 1.0335994486244329, "grad_norm": 0.48481160402297974, "learning_rate": 0.0001, "loss": 1.406, "step": 8998 }, { "epoch": 1.0337143185342599, "grad_norm": 0.4979383945465088, "learning_rate": 0.0001, "loss": 1.3085, "step": 8999 }, { "epoch": 1.033829188444087, "grad_norm": 0.5427919030189514, "learning_rate": 0.0001, "loss": 1.5745, "step": 9000 }, { "epoch": 1.0339440583539141, "grad_norm": 0.5497532486915588, "learning_rate": 0.0001, "loss": 1.4961, "step": 9001 }, { "epoch": 1.0340589282637414, "grad_norm": 0.5383864045143127, "learning_rate": 0.0001, "loss": 1.263, "step": 9002 }, { "epoch": 1.0341737981735684, "grad_norm": 0.5036965012550354, "learning_rate": 0.0001, "loss": 1.4651, "step": 9003 }, { "epoch": 1.0342886680833956, "grad_norm": 0.555041491985321, "learning_rate": 0.0001, "loss": 1.5879, "step": 9004 }, { "epoch": 1.0344035379932226, "grad_norm": 0.5549968481063843, "learning_rate": 0.0001, "loss": 1.5353, "step": 9005 }, { "epoch": 1.0345184079030498, "grad_norm": 0.579571008682251, "learning_rate": 0.0001, "loss": 1.458, "step": 9006 }, { "epoch": 1.0346332778128768, "grad_norm": 0.5244538187980652, "learning_rate": 0.0001, "loss": 1.4796, "step": 9007 }, { "epoch": 1.034748147722704, "grad_norm": 0.544011116027832, "learning_rate": 0.0001, "loss": 1.5416, "step": 9008 }, { "epoch": 1.034863017632531, "grad_norm": 0.5533955097198486, "learning_rate": 0.0001, "loss": 1.6356, "step": 9009 }, { "epoch": 1.0349778875423583, "grad_norm": 0.5618937611579895, "learning_rate": 0.0001, "loss": 1.6885, "step": 9010 }, { "epoch": 1.0350927574521853, "grad_norm": 0.5388464331626892, "learning_rate": 0.0001, "loss": 1.4549, "step": 9011 }, { "epoch": 1.0352076273620126, "grad_norm": 0.5120463967323303, "learning_rate": 0.0001, "loss": 1.509, "step": 9012 }, { "epoch": 1.0353224972718396, "grad_norm": 0.5093674659729004, "learning_rate": 0.0001, "loss": 1.4645, "step": 9013 }, { "epoch": 1.0354373671816668, "grad_norm": 0.4842431843280792, "learning_rate": 0.0001, "loss": 1.4475, "step": 9014 }, { "epoch": 1.0355522370914938, "grad_norm": 0.5035402774810791, "learning_rate": 0.0001, "loss": 1.443, "step": 9015 }, { "epoch": 1.035667107001321, "grad_norm": 0.5231695771217346, "learning_rate": 0.0001, "loss": 1.4145, "step": 9016 }, { "epoch": 1.035781976911148, "grad_norm": 0.5481805801391602, "learning_rate": 0.0001, "loss": 1.5047, "step": 9017 }, { "epoch": 1.0358968468209753, "grad_norm": 0.510319173336029, "learning_rate": 0.0001, "loss": 1.4618, "step": 9018 }, { "epoch": 1.0360117167308023, "grad_norm": 0.5354859828948975, "learning_rate": 0.0001, "loss": 1.3844, "step": 9019 }, { "epoch": 1.0361265866406295, "grad_norm": 0.4900701642036438, "learning_rate": 0.0001, "loss": 1.3788, "step": 9020 }, { "epoch": 1.0362414565504565, "grad_norm": 0.5398866534233093, "learning_rate": 0.0001, "loss": 1.5015, "step": 9021 }, { "epoch": 1.0363563264602837, "grad_norm": 0.5214775204658508, "learning_rate": 0.0001, "loss": 1.4741, "step": 9022 }, { "epoch": 1.0364711963701108, "grad_norm": 0.4958128035068512, "learning_rate": 0.0001, "loss": 1.6112, "step": 9023 }, { "epoch": 1.036586066279938, "grad_norm": 0.5190637111663818, "learning_rate": 0.0001, "loss": 1.4925, "step": 9024 }, { "epoch": 1.036700936189765, "grad_norm": 0.568577766418457, "learning_rate": 0.0001, "loss": 1.5747, "step": 9025 }, { "epoch": 1.0368158060995922, "grad_norm": 0.5356246829032898, "learning_rate": 0.0001, "loss": 1.5718, "step": 9026 }, { "epoch": 1.0369306760094192, "grad_norm": 0.5341411232948303, "learning_rate": 0.0001, "loss": 1.5258, "step": 9027 }, { "epoch": 1.0370455459192465, "grad_norm": 0.5894232392311096, "learning_rate": 0.0001, "loss": 1.7847, "step": 9028 }, { "epoch": 1.0371604158290735, "grad_norm": 0.4876749515533447, "learning_rate": 0.0001, "loss": 1.4103, "step": 9029 }, { "epoch": 1.0372752857389007, "grad_norm": 0.5098420977592468, "learning_rate": 0.0001, "loss": 1.59, "step": 9030 }, { "epoch": 1.0373901556487277, "grad_norm": 0.5374711751937866, "learning_rate": 0.0001, "loss": 1.406, "step": 9031 }, { "epoch": 1.037505025558555, "grad_norm": 0.5379756689071655, "learning_rate": 0.0001, "loss": 1.4881, "step": 9032 }, { "epoch": 1.037619895468382, "grad_norm": 0.5455212593078613, "learning_rate": 0.0001, "loss": 1.4375, "step": 9033 }, { "epoch": 1.0377347653782092, "grad_norm": 0.5194243788719177, "learning_rate": 0.0001, "loss": 1.4984, "step": 9034 }, { "epoch": 1.0378496352880362, "grad_norm": 0.5264743566513062, "learning_rate": 0.0001, "loss": 1.3457, "step": 9035 }, { "epoch": 1.0379645051978634, "grad_norm": 0.5833313465118408, "learning_rate": 0.0001, "loss": 1.5327, "step": 9036 }, { "epoch": 1.0380793751076904, "grad_norm": 0.5064496994018555, "learning_rate": 0.0001, "loss": 1.3317, "step": 9037 }, { "epoch": 1.0381942450175177, "grad_norm": 0.5563880205154419, "learning_rate": 0.0001, "loss": 1.5072, "step": 9038 }, { "epoch": 1.0383091149273447, "grad_norm": 0.5034350156784058, "learning_rate": 0.0001, "loss": 1.5021, "step": 9039 }, { "epoch": 1.038423984837172, "grad_norm": 0.5645545125007629, "learning_rate": 0.0001, "loss": 1.65, "step": 9040 }, { "epoch": 1.038538854746999, "grad_norm": 0.5010533928871155, "learning_rate": 0.0001, "loss": 1.6339, "step": 9041 }, { "epoch": 1.0386537246568261, "grad_norm": 0.5693106651306152, "learning_rate": 0.0001, "loss": 1.624, "step": 9042 }, { "epoch": 1.0387685945666532, "grad_norm": 0.5178468227386475, "learning_rate": 0.0001, "loss": 1.5859, "step": 9043 }, { "epoch": 1.0388834644764804, "grad_norm": 0.49142345786094666, "learning_rate": 0.0001, "loss": 1.3921, "step": 9044 }, { "epoch": 1.0389983343863074, "grad_norm": 0.512225329875946, "learning_rate": 0.0001, "loss": 1.6559, "step": 9045 }, { "epoch": 1.0391132042961346, "grad_norm": 0.500827968120575, "learning_rate": 0.0001, "loss": 1.444, "step": 9046 }, { "epoch": 1.0392280742059619, "grad_norm": 0.6273430585861206, "learning_rate": 0.0001, "loss": 1.6012, "step": 9047 }, { "epoch": 1.0393429441157889, "grad_norm": 0.610727071762085, "learning_rate": 0.0001, "loss": 1.6243, "step": 9048 }, { "epoch": 1.0394578140256159, "grad_norm": 0.4880732297897339, "learning_rate": 0.0001, "loss": 1.3747, "step": 9049 }, { "epoch": 1.039572683935443, "grad_norm": 0.5448058843612671, "learning_rate": 0.0001, "loss": 1.422, "step": 9050 }, { "epoch": 1.0396875538452703, "grad_norm": 0.5537201762199402, "learning_rate": 0.0001, "loss": 1.4965, "step": 9051 }, { "epoch": 1.0398024237550973, "grad_norm": 0.518325686454773, "learning_rate": 0.0001, "loss": 1.6048, "step": 9052 }, { "epoch": 1.0399172936649246, "grad_norm": 0.49593982100486755, "learning_rate": 0.0001, "loss": 1.3273, "step": 9053 }, { "epoch": 1.0400321635747516, "grad_norm": 0.5287415385246277, "learning_rate": 0.0001, "loss": 1.5204, "step": 9054 }, { "epoch": 1.0401470334845788, "grad_norm": 0.48811841011047363, "learning_rate": 0.0001, "loss": 1.3151, "step": 9055 }, { "epoch": 1.0402619033944058, "grad_norm": 0.557843804359436, "learning_rate": 0.0001, "loss": 1.5249, "step": 9056 }, { "epoch": 1.040376773304233, "grad_norm": 0.5163830518722534, "learning_rate": 0.0001, "loss": 1.5059, "step": 9057 }, { "epoch": 1.04049164321406, "grad_norm": 0.5076297521591187, "learning_rate": 0.0001, "loss": 1.3535, "step": 9058 }, { "epoch": 1.0406065131238873, "grad_norm": 0.5205563902854919, "learning_rate": 0.0001, "loss": 1.5076, "step": 9059 }, { "epoch": 1.0407213830337143, "grad_norm": 0.5657637119293213, "learning_rate": 0.0001, "loss": 1.6143, "step": 9060 }, { "epoch": 1.0408362529435415, "grad_norm": 0.5690082907676697, "learning_rate": 0.0001, "loss": 1.4616, "step": 9061 }, { "epoch": 1.0409511228533685, "grad_norm": 0.5683187246322632, "learning_rate": 0.0001, "loss": 1.3052, "step": 9062 }, { "epoch": 1.0410659927631958, "grad_norm": 0.5876998901367188, "learning_rate": 0.0001, "loss": 1.5554, "step": 9063 }, { "epoch": 1.0411808626730228, "grad_norm": 0.5582718849182129, "learning_rate": 0.0001, "loss": 1.3038, "step": 9064 }, { "epoch": 1.04129573258285, "grad_norm": 0.4977602958679199, "learning_rate": 0.0001, "loss": 1.4045, "step": 9065 }, { "epoch": 1.041410602492677, "grad_norm": 0.5169378519058228, "learning_rate": 0.0001, "loss": 1.5039, "step": 9066 }, { "epoch": 1.0415254724025043, "grad_norm": 0.5189085006713867, "learning_rate": 0.0001, "loss": 1.4188, "step": 9067 }, { "epoch": 1.0416403423123313, "grad_norm": 0.49691158533096313, "learning_rate": 0.0001, "loss": 1.2885, "step": 9068 }, { "epoch": 1.0417552122221585, "grad_norm": 0.5555067658424377, "learning_rate": 0.0001, "loss": 1.6002, "step": 9069 }, { "epoch": 1.0418700821319855, "grad_norm": 0.5489440560340881, "learning_rate": 0.0001, "loss": 1.5219, "step": 9070 }, { "epoch": 1.0419849520418127, "grad_norm": 0.5622000098228455, "learning_rate": 0.0001, "loss": 1.5963, "step": 9071 }, { "epoch": 1.0420998219516397, "grad_norm": 0.546467661857605, "learning_rate": 0.0001, "loss": 1.2848, "step": 9072 }, { "epoch": 1.042214691861467, "grad_norm": 0.496011883020401, "learning_rate": 0.0001, "loss": 1.2943, "step": 9073 }, { "epoch": 1.042329561771294, "grad_norm": 0.500908613204956, "learning_rate": 0.0001, "loss": 1.3917, "step": 9074 }, { "epoch": 1.0424444316811212, "grad_norm": 0.554749071598053, "learning_rate": 0.0001, "loss": 1.547, "step": 9075 }, { "epoch": 1.0425593015909482, "grad_norm": 0.5059463977813721, "learning_rate": 0.0001, "loss": 1.4333, "step": 9076 }, { "epoch": 1.0426741715007755, "grad_norm": 0.5207396745681763, "learning_rate": 0.0001, "loss": 1.387, "step": 9077 }, { "epoch": 1.0427890414106025, "grad_norm": 0.48321661353111267, "learning_rate": 0.0001, "loss": 1.4378, "step": 9078 }, { "epoch": 1.0429039113204297, "grad_norm": 0.5234763622283936, "learning_rate": 0.0001, "loss": 1.5333, "step": 9079 }, { "epoch": 1.0430187812302567, "grad_norm": 0.5880540609359741, "learning_rate": 0.0001, "loss": 1.5906, "step": 9080 }, { "epoch": 1.043133651140084, "grad_norm": 0.548507809638977, "learning_rate": 0.0001, "loss": 1.5296, "step": 9081 }, { "epoch": 1.043248521049911, "grad_norm": 0.545957088470459, "learning_rate": 0.0001, "loss": 1.5419, "step": 9082 }, { "epoch": 1.0433633909597382, "grad_norm": 0.5886163115501404, "learning_rate": 0.0001, "loss": 1.5929, "step": 9083 }, { "epoch": 1.0434782608695652, "grad_norm": 0.5363225340843201, "learning_rate": 0.0001, "loss": 1.5265, "step": 9084 }, { "epoch": 1.0435931307793924, "grad_norm": 0.5168259739875793, "learning_rate": 0.0001, "loss": 1.3905, "step": 9085 }, { "epoch": 1.0437080006892194, "grad_norm": 0.5514031052589417, "learning_rate": 0.0001, "loss": 1.6164, "step": 9086 }, { "epoch": 1.0438228705990467, "grad_norm": 0.5472466945648193, "learning_rate": 0.0001, "loss": 1.382, "step": 9087 }, { "epoch": 1.0439377405088737, "grad_norm": 0.5088561773300171, "learning_rate": 0.0001, "loss": 1.4711, "step": 9088 }, { "epoch": 1.044052610418701, "grad_norm": 0.535428524017334, "learning_rate": 0.0001, "loss": 1.4639, "step": 9089 }, { "epoch": 1.044167480328528, "grad_norm": 0.4890363812446594, "learning_rate": 0.0001, "loss": 1.3678, "step": 9090 }, { "epoch": 1.0442823502383551, "grad_norm": 0.5183672308921814, "learning_rate": 0.0001, "loss": 1.4534, "step": 9091 }, { "epoch": 1.0443972201481821, "grad_norm": 0.5594877004623413, "learning_rate": 0.0001, "loss": 1.6638, "step": 9092 }, { "epoch": 1.0445120900580094, "grad_norm": 0.5082018375396729, "learning_rate": 0.0001, "loss": 1.5265, "step": 9093 }, { "epoch": 1.0446269599678364, "grad_norm": 0.5126281380653381, "learning_rate": 0.0001, "loss": 1.3822, "step": 9094 }, { "epoch": 1.0447418298776636, "grad_norm": 0.5137059092521667, "learning_rate": 0.0001, "loss": 1.4601, "step": 9095 }, { "epoch": 1.0448566997874906, "grad_norm": 0.491517573595047, "learning_rate": 0.0001, "loss": 1.4795, "step": 9096 }, { "epoch": 1.0449715696973179, "grad_norm": 0.5336720943450928, "learning_rate": 0.0001, "loss": 1.4341, "step": 9097 }, { "epoch": 1.0450864396071449, "grad_norm": 0.5771209001541138, "learning_rate": 0.0001, "loss": 1.6016, "step": 9098 }, { "epoch": 1.045201309516972, "grad_norm": 0.5206882357597351, "learning_rate": 0.0001, "loss": 1.5159, "step": 9099 }, { "epoch": 1.045316179426799, "grad_norm": 0.5490942597389221, "learning_rate": 0.0001, "loss": 1.6517, "step": 9100 }, { "epoch": 1.0454310493366263, "grad_norm": 0.5432296991348267, "learning_rate": 0.0001, "loss": 1.6488, "step": 9101 }, { "epoch": 1.0455459192464533, "grad_norm": 0.5428939461708069, "learning_rate": 0.0001, "loss": 1.3368, "step": 9102 }, { "epoch": 1.0456607891562806, "grad_norm": 0.4910656213760376, "learning_rate": 0.0001, "loss": 1.2355, "step": 9103 }, { "epoch": 1.0457756590661076, "grad_norm": 0.5123563408851624, "learning_rate": 0.0001, "loss": 1.5744, "step": 9104 }, { "epoch": 1.0458905289759348, "grad_norm": 0.5455781817436218, "learning_rate": 0.0001, "loss": 1.4858, "step": 9105 }, { "epoch": 1.0460053988857618, "grad_norm": 0.5809556245803833, "learning_rate": 0.0001, "loss": 1.515, "step": 9106 }, { "epoch": 1.046120268795589, "grad_norm": 0.5579778552055359, "learning_rate": 0.0001, "loss": 1.6172, "step": 9107 }, { "epoch": 1.046235138705416, "grad_norm": 0.5217399597167969, "learning_rate": 0.0001, "loss": 1.4473, "step": 9108 }, { "epoch": 1.0463500086152433, "grad_norm": 0.5513988137245178, "learning_rate": 0.0001, "loss": 1.663, "step": 9109 }, { "epoch": 1.0464648785250703, "grad_norm": 0.5201188921928406, "learning_rate": 0.0001, "loss": 1.3266, "step": 9110 }, { "epoch": 1.0465797484348975, "grad_norm": 0.5076174736022949, "learning_rate": 0.0001, "loss": 1.4519, "step": 9111 }, { "epoch": 1.0466946183447245, "grad_norm": 0.543914258480072, "learning_rate": 0.0001, "loss": 1.5246, "step": 9112 }, { "epoch": 1.0468094882545518, "grad_norm": 0.5245856642723083, "learning_rate": 0.0001, "loss": 1.587, "step": 9113 }, { "epoch": 1.0469243581643788, "grad_norm": 0.5603649616241455, "learning_rate": 0.0001, "loss": 1.462, "step": 9114 }, { "epoch": 1.047039228074206, "grad_norm": 0.5366967916488647, "learning_rate": 0.0001, "loss": 1.7216, "step": 9115 }, { "epoch": 1.047154097984033, "grad_norm": 0.6067121624946594, "learning_rate": 0.0001, "loss": 1.6592, "step": 9116 }, { "epoch": 1.0472689678938603, "grad_norm": 0.5686327219009399, "learning_rate": 0.0001, "loss": 1.6469, "step": 9117 }, { "epoch": 1.0473838378036873, "grad_norm": 0.5154968500137329, "learning_rate": 0.0001, "loss": 1.2529, "step": 9118 }, { "epoch": 1.0474987077135145, "grad_norm": 0.49103155732154846, "learning_rate": 0.0001, "loss": 1.3669, "step": 9119 }, { "epoch": 1.0476135776233415, "grad_norm": 0.5241073966026306, "learning_rate": 0.0001, "loss": 1.4359, "step": 9120 }, { "epoch": 1.0477284475331687, "grad_norm": 0.5242918133735657, "learning_rate": 0.0001, "loss": 1.4032, "step": 9121 }, { "epoch": 1.0478433174429957, "grad_norm": 0.500344455242157, "learning_rate": 0.0001, "loss": 1.3488, "step": 9122 }, { "epoch": 1.047958187352823, "grad_norm": 0.5362123250961304, "learning_rate": 0.0001, "loss": 1.3767, "step": 9123 }, { "epoch": 1.04807305726265, "grad_norm": 0.5414429903030396, "learning_rate": 0.0001, "loss": 1.5629, "step": 9124 }, { "epoch": 1.0481879271724772, "grad_norm": 0.5353612899780273, "learning_rate": 0.0001, "loss": 1.5028, "step": 9125 }, { "epoch": 1.0483027970823042, "grad_norm": 0.5600303411483765, "learning_rate": 0.0001, "loss": 1.6894, "step": 9126 }, { "epoch": 1.0484176669921315, "grad_norm": 0.5072823166847229, "learning_rate": 0.0001, "loss": 1.4421, "step": 9127 }, { "epoch": 1.0485325369019585, "grad_norm": 0.5189083218574524, "learning_rate": 0.0001, "loss": 1.4933, "step": 9128 }, { "epoch": 1.0486474068117857, "grad_norm": 0.5282391309738159, "learning_rate": 0.0001, "loss": 1.4874, "step": 9129 }, { "epoch": 1.0487622767216127, "grad_norm": 0.5408895015716553, "learning_rate": 0.0001, "loss": 1.4552, "step": 9130 }, { "epoch": 1.04887714663144, "grad_norm": 0.5891426205635071, "learning_rate": 0.0001, "loss": 1.5407, "step": 9131 }, { "epoch": 1.048992016541267, "grad_norm": 0.5693600177764893, "learning_rate": 0.0001, "loss": 1.4055, "step": 9132 }, { "epoch": 1.0491068864510942, "grad_norm": 0.534770667552948, "learning_rate": 0.0001, "loss": 1.4429, "step": 9133 }, { "epoch": 1.0492217563609212, "grad_norm": 0.5606382489204407, "learning_rate": 0.0001, "loss": 1.5205, "step": 9134 }, { "epoch": 1.0493366262707484, "grad_norm": 0.5300989747047424, "learning_rate": 0.0001, "loss": 1.4919, "step": 9135 }, { "epoch": 1.0494514961805754, "grad_norm": 0.56827712059021, "learning_rate": 0.0001, "loss": 1.6758, "step": 9136 }, { "epoch": 1.0495663660904027, "grad_norm": 0.5045410394668579, "learning_rate": 0.0001, "loss": 1.277, "step": 9137 }, { "epoch": 1.0496812360002297, "grad_norm": 0.5462251901626587, "learning_rate": 0.0001, "loss": 1.3595, "step": 9138 }, { "epoch": 1.049796105910057, "grad_norm": 0.5165095925331116, "learning_rate": 0.0001, "loss": 1.5132, "step": 9139 }, { "epoch": 1.049910975819884, "grad_norm": 0.4886758327484131, "learning_rate": 0.0001, "loss": 1.4078, "step": 9140 }, { "epoch": 1.0500258457297111, "grad_norm": 0.5420746207237244, "learning_rate": 0.0001, "loss": 1.2517, "step": 9141 }, { "epoch": 1.0501407156395381, "grad_norm": 0.5480278730392456, "learning_rate": 0.0001, "loss": 1.554, "step": 9142 }, { "epoch": 1.0502555855493654, "grad_norm": 0.5709562301635742, "learning_rate": 0.0001, "loss": 1.6546, "step": 9143 }, { "epoch": 1.0503704554591924, "grad_norm": 0.5308666229248047, "learning_rate": 0.0001, "loss": 1.5047, "step": 9144 }, { "epoch": 1.0504853253690196, "grad_norm": 0.5184751749038696, "learning_rate": 0.0001, "loss": 1.4136, "step": 9145 }, { "epoch": 1.0506001952788466, "grad_norm": 0.5020022392272949, "learning_rate": 0.0001, "loss": 1.4594, "step": 9146 }, { "epoch": 1.0507150651886739, "grad_norm": 0.531156063079834, "learning_rate": 0.0001, "loss": 1.4978, "step": 9147 }, { "epoch": 1.0508299350985009, "grad_norm": 0.5110874772071838, "learning_rate": 0.0001, "loss": 1.3658, "step": 9148 }, { "epoch": 1.050944805008328, "grad_norm": 0.6354340314865112, "learning_rate": 0.0001, "loss": 1.511, "step": 9149 }, { "epoch": 1.051059674918155, "grad_norm": 0.5681877136230469, "learning_rate": 0.0001, "loss": 1.382, "step": 9150 }, { "epoch": 1.0511745448279823, "grad_norm": 0.4902039170265198, "learning_rate": 0.0001, "loss": 1.3208, "step": 9151 }, { "epoch": 1.0512894147378093, "grad_norm": 0.5818510055541992, "learning_rate": 0.0001, "loss": 1.4707, "step": 9152 }, { "epoch": 1.0514042846476366, "grad_norm": 0.5937033891677856, "learning_rate": 0.0001, "loss": 1.5029, "step": 9153 }, { "epoch": 1.0515191545574636, "grad_norm": 0.5286517143249512, "learning_rate": 0.0001, "loss": 1.4477, "step": 9154 }, { "epoch": 1.0516340244672908, "grad_norm": 0.5208611488342285, "learning_rate": 0.0001, "loss": 1.3308, "step": 9155 }, { "epoch": 1.0517488943771178, "grad_norm": 0.5702365636825562, "learning_rate": 0.0001, "loss": 1.7063, "step": 9156 }, { "epoch": 1.051863764286945, "grad_norm": 0.551328182220459, "learning_rate": 0.0001, "loss": 1.548, "step": 9157 }, { "epoch": 1.051978634196772, "grad_norm": 0.5106463432312012, "learning_rate": 0.0001, "loss": 1.5278, "step": 9158 }, { "epoch": 1.0520935041065993, "grad_norm": 0.5503958463668823, "learning_rate": 0.0001, "loss": 1.5551, "step": 9159 }, { "epoch": 1.0522083740164263, "grad_norm": 0.5540024042129517, "learning_rate": 0.0001, "loss": 1.3085, "step": 9160 }, { "epoch": 1.0523232439262535, "grad_norm": 0.5419454574584961, "learning_rate": 0.0001, "loss": 1.5515, "step": 9161 }, { "epoch": 1.0524381138360805, "grad_norm": 0.5251337885856628, "learning_rate": 0.0001, "loss": 1.3589, "step": 9162 }, { "epoch": 1.0525529837459078, "grad_norm": 0.553993284702301, "learning_rate": 0.0001, "loss": 1.5545, "step": 9163 }, { "epoch": 1.0526678536557348, "grad_norm": 0.5526601672172546, "learning_rate": 0.0001, "loss": 1.3881, "step": 9164 }, { "epoch": 1.052782723565562, "grad_norm": 0.5293091535568237, "learning_rate": 0.0001, "loss": 1.5249, "step": 9165 }, { "epoch": 1.052897593475389, "grad_norm": 0.5309327840805054, "learning_rate": 0.0001, "loss": 1.2998, "step": 9166 }, { "epoch": 1.0530124633852163, "grad_norm": 0.5376285314559937, "learning_rate": 0.0001, "loss": 1.5854, "step": 9167 }, { "epoch": 1.0531273332950433, "grad_norm": 0.5321171283721924, "learning_rate": 0.0001, "loss": 1.4715, "step": 9168 }, { "epoch": 1.0532422032048705, "grad_norm": 0.5454419851303101, "learning_rate": 0.0001, "loss": 1.565, "step": 9169 }, { "epoch": 1.0533570731146975, "grad_norm": 0.5634780526161194, "learning_rate": 0.0001, "loss": 1.2175, "step": 9170 }, { "epoch": 1.0534719430245247, "grad_norm": 0.6618204712867737, "learning_rate": 0.0001, "loss": 1.6722, "step": 9171 }, { "epoch": 1.0535868129343517, "grad_norm": 0.5510662198066711, "learning_rate": 0.0001, "loss": 1.5464, "step": 9172 }, { "epoch": 1.053701682844179, "grad_norm": 0.5555408596992493, "learning_rate": 0.0001, "loss": 1.5615, "step": 9173 }, { "epoch": 1.053816552754006, "grad_norm": 0.5211841464042664, "learning_rate": 0.0001, "loss": 1.3639, "step": 9174 }, { "epoch": 1.0539314226638332, "grad_norm": 0.5432996153831482, "learning_rate": 0.0001, "loss": 1.6417, "step": 9175 }, { "epoch": 1.0540462925736602, "grad_norm": 0.5261744260787964, "learning_rate": 0.0001, "loss": 1.4003, "step": 9176 }, { "epoch": 1.0541611624834875, "grad_norm": 0.5317165851593018, "learning_rate": 0.0001, "loss": 1.3629, "step": 9177 }, { "epoch": 1.0542760323933145, "grad_norm": 0.5396149754524231, "learning_rate": 0.0001, "loss": 1.4707, "step": 9178 }, { "epoch": 1.0543909023031417, "grad_norm": 0.5117886066436768, "learning_rate": 0.0001, "loss": 1.2257, "step": 9179 }, { "epoch": 1.0545057722129687, "grad_norm": 0.5107698440551758, "learning_rate": 0.0001, "loss": 1.5142, "step": 9180 }, { "epoch": 1.054620642122796, "grad_norm": 0.5465172529220581, "learning_rate": 0.0001, "loss": 1.4611, "step": 9181 }, { "epoch": 1.054735512032623, "grad_norm": 0.5007506012916565, "learning_rate": 0.0001, "loss": 1.3207, "step": 9182 }, { "epoch": 1.0548503819424502, "grad_norm": 0.5821637511253357, "learning_rate": 0.0001, "loss": 1.5268, "step": 9183 }, { "epoch": 1.0549652518522774, "grad_norm": 0.5477774739265442, "learning_rate": 0.0001, "loss": 1.1992, "step": 9184 }, { "epoch": 1.0550801217621044, "grad_norm": 0.5345506072044373, "learning_rate": 0.0001, "loss": 1.4741, "step": 9185 }, { "epoch": 1.0551949916719314, "grad_norm": 0.4949226379394531, "learning_rate": 0.0001, "loss": 1.4013, "step": 9186 }, { "epoch": 1.0553098615817587, "grad_norm": 0.543696403503418, "learning_rate": 0.0001, "loss": 1.6937, "step": 9187 }, { "epoch": 1.0554247314915859, "grad_norm": 0.5482311248779297, "learning_rate": 0.0001, "loss": 1.6222, "step": 9188 }, { "epoch": 1.055539601401413, "grad_norm": 0.500032901763916, "learning_rate": 0.0001, "loss": 1.507, "step": 9189 }, { "epoch": 1.0556544713112401, "grad_norm": 0.5224020481109619, "learning_rate": 0.0001, "loss": 1.4317, "step": 9190 }, { "epoch": 1.0557693412210671, "grad_norm": 0.5447744131088257, "learning_rate": 0.0001, "loss": 1.3627, "step": 9191 }, { "epoch": 1.0558842111308944, "grad_norm": 0.551868200302124, "learning_rate": 0.0001, "loss": 1.5095, "step": 9192 }, { "epoch": 1.0559990810407214, "grad_norm": 0.5407156944274902, "learning_rate": 0.0001, "loss": 1.6561, "step": 9193 }, { "epoch": 1.0561139509505486, "grad_norm": 0.5364171862602234, "learning_rate": 0.0001, "loss": 1.5194, "step": 9194 }, { "epoch": 1.0562288208603756, "grad_norm": 0.5270219445228577, "learning_rate": 0.0001, "loss": 1.3945, "step": 9195 }, { "epoch": 1.0563436907702028, "grad_norm": 0.5508518218994141, "learning_rate": 0.0001, "loss": 1.5531, "step": 9196 }, { "epoch": 1.0564585606800299, "grad_norm": 0.5767947435379028, "learning_rate": 0.0001, "loss": 1.3796, "step": 9197 }, { "epoch": 1.056573430589857, "grad_norm": 0.516804575920105, "learning_rate": 0.0001, "loss": 1.3958, "step": 9198 }, { "epoch": 1.056688300499684, "grad_norm": 0.5120447278022766, "learning_rate": 0.0001, "loss": 1.5065, "step": 9199 }, { "epoch": 1.0568031704095113, "grad_norm": 0.5243872404098511, "learning_rate": 0.0001, "loss": 1.5117, "step": 9200 }, { "epoch": 1.0569180403193383, "grad_norm": 0.5689402222633362, "learning_rate": 0.0001, "loss": 1.5406, "step": 9201 }, { "epoch": 1.0570329102291656, "grad_norm": 0.5817130208015442, "learning_rate": 0.0001, "loss": 1.4481, "step": 9202 }, { "epoch": 1.0571477801389926, "grad_norm": 0.5574333667755127, "learning_rate": 0.0001, "loss": 1.4967, "step": 9203 }, { "epoch": 1.0572626500488198, "grad_norm": 0.5260115265846252, "learning_rate": 0.0001, "loss": 1.51, "step": 9204 }, { "epoch": 1.0573775199586468, "grad_norm": 0.5010888576507568, "learning_rate": 0.0001, "loss": 1.46, "step": 9205 }, { "epoch": 1.057492389868474, "grad_norm": 0.5643918514251709, "learning_rate": 0.0001, "loss": 1.335, "step": 9206 }, { "epoch": 1.057607259778301, "grad_norm": 0.5270254015922546, "learning_rate": 0.0001, "loss": 1.2919, "step": 9207 }, { "epoch": 1.0577221296881283, "grad_norm": 0.5090634226799011, "learning_rate": 0.0001, "loss": 1.4459, "step": 9208 }, { "epoch": 1.0578369995979553, "grad_norm": 0.5546037554740906, "learning_rate": 0.0001, "loss": 1.4886, "step": 9209 }, { "epoch": 1.0579518695077825, "grad_norm": 0.5387788414955139, "learning_rate": 0.0001, "loss": 1.5268, "step": 9210 }, { "epoch": 1.0580667394176095, "grad_norm": 0.5352441668510437, "learning_rate": 0.0001, "loss": 1.5164, "step": 9211 }, { "epoch": 1.0581816093274368, "grad_norm": 0.5658225417137146, "learning_rate": 0.0001, "loss": 1.472, "step": 9212 }, { "epoch": 1.0582964792372638, "grad_norm": 0.541875958442688, "learning_rate": 0.0001, "loss": 1.0823, "step": 9213 }, { "epoch": 1.058411349147091, "grad_norm": 0.5292019248008728, "learning_rate": 0.0001, "loss": 1.4035, "step": 9214 }, { "epoch": 1.058526219056918, "grad_norm": 0.5192826986312866, "learning_rate": 0.0001, "loss": 1.2651, "step": 9215 }, { "epoch": 1.0586410889667452, "grad_norm": 0.5812119245529175, "learning_rate": 0.0001, "loss": 1.3717, "step": 9216 }, { "epoch": 1.0587559588765723, "grad_norm": 0.6301900744438171, "learning_rate": 0.0001, "loss": 1.616, "step": 9217 }, { "epoch": 1.0588708287863995, "grad_norm": 0.5290337800979614, "learning_rate": 0.0001, "loss": 1.3963, "step": 9218 }, { "epoch": 1.0589856986962265, "grad_norm": 0.6721698641777039, "learning_rate": 0.0001, "loss": 1.6594, "step": 9219 }, { "epoch": 1.0591005686060537, "grad_norm": 0.6142293810844421, "learning_rate": 0.0001, "loss": 1.6459, "step": 9220 }, { "epoch": 1.0592154385158807, "grad_norm": 0.5450447797775269, "learning_rate": 0.0001, "loss": 1.5673, "step": 9221 }, { "epoch": 1.059330308425708, "grad_norm": 0.5755720138549805, "learning_rate": 0.0001, "loss": 1.5907, "step": 9222 }, { "epoch": 1.059445178335535, "grad_norm": 0.6007996201515198, "learning_rate": 0.0001, "loss": 1.7402, "step": 9223 }, { "epoch": 1.0595600482453622, "grad_norm": 0.5676028728485107, "learning_rate": 0.0001, "loss": 1.4648, "step": 9224 }, { "epoch": 1.0596749181551892, "grad_norm": 0.5260946750640869, "learning_rate": 0.0001, "loss": 1.5316, "step": 9225 }, { "epoch": 1.0597897880650164, "grad_norm": 0.5359583497047424, "learning_rate": 0.0001, "loss": 1.3536, "step": 9226 }, { "epoch": 1.0599046579748435, "grad_norm": 0.5493340492248535, "learning_rate": 0.0001, "loss": 1.6997, "step": 9227 }, { "epoch": 1.0600195278846707, "grad_norm": 0.5583151578903198, "learning_rate": 0.0001, "loss": 1.5545, "step": 9228 }, { "epoch": 1.0601343977944977, "grad_norm": 0.5902903079986572, "learning_rate": 0.0001, "loss": 1.4995, "step": 9229 }, { "epoch": 1.060249267704325, "grad_norm": 0.5786603689193726, "learning_rate": 0.0001, "loss": 1.3942, "step": 9230 }, { "epoch": 1.060364137614152, "grad_norm": 0.5523187518119812, "learning_rate": 0.0001, "loss": 1.5173, "step": 9231 }, { "epoch": 1.0604790075239792, "grad_norm": 0.5302920341491699, "learning_rate": 0.0001, "loss": 1.4374, "step": 9232 }, { "epoch": 1.0605938774338062, "grad_norm": 0.5174841284751892, "learning_rate": 0.0001, "loss": 1.4171, "step": 9233 }, { "epoch": 1.0607087473436334, "grad_norm": 0.5988504886627197, "learning_rate": 0.0001, "loss": 1.6997, "step": 9234 }, { "epoch": 1.0608236172534604, "grad_norm": 0.5470107197761536, "learning_rate": 0.0001, "loss": 1.5583, "step": 9235 }, { "epoch": 1.0609384871632876, "grad_norm": 0.5042878985404968, "learning_rate": 0.0001, "loss": 1.4093, "step": 9236 }, { "epoch": 1.0610533570731147, "grad_norm": 0.5748067498207092, "learning_rate": 0.0001, "loss": 1.5261, "step": 9237 }, { "epoch": 1.0611682269829419, "grad_norm": 0.5632085204124451, "learning_rate": 0.0001, "loss": 1.6162, "step": 9238 }, { "epoch": 1.061283096892769, "grad_norm": 0.5615068674087524, "learning_rate": 0.0001, "loss": 1.5225, "step": 9239 }, { "epoch": 1.0613979668025961, "grad_norm": 0.5865307450294495, "learning_rate": 0.0001, "loss": 1.3333, "step": 9240 }, { "epoch": 1.0615128367124231, "grad_norm": 0.5403387546539307, "learning_rate": 0.0001, "loss": 1.3786, "step": 9241 }, { "epoch": 1.0616277066222504, "grad_norm": 0.5474202036857605, "learning_rate": 0.0001, "loss": 1.2461, "step": 9242 }, { "epoch": 1.0617425765320774, "grad_norm": 0.5543806552886963, "learning_rate": 0.0001, "loss": 1.4252, "step": 9243 }, { "epoch": 1.0618574464419046, "grad_norm": 0.5339075326919556, "learning_rate": 0.0001, "loss": 1.3884, "step": 9244 }, { "epoch": 1.0619723163517316, "grad_norm": 0.49664506316185, "learning_rate": 0.0001, "loss": 1.376, "step": 9245 }, { "epoch": 1.0620871862615588, "grad_norm": 0.5427022576332092, "learning_rate": 0.0001, "loss": 1.5364, "step": 9246 }, { "epoch": 1.0622020561713859, "grad_norm": 0.5456292629241943, "learning_rate": 0.0001, "loss": 1.6116, "step": 9247 }, { "epoch": 1.062316926081213, "grad_norm": 0.533386766910553, "learning_rate": 0.0001, "loss": 1.4291, "step": 9248 }, { "epoch": 1.06243179599104, "grad_norm": 0.5511561632156372, "learning_rate": 0.0001, "loss": 1.4988, "step": 9249 }, { "epoch": 1.0625466659008673, "grad_norm": 0.5116701126098633, "learning_rate": 0.0001, "loss": 1.367, "step": 9250 }, { "epoch": 1.0626615358106943, "grad_norm": 0.5287088751792908, "learning_rate": 0.0001, "loss": 1.5248, "step": 9251 }, { "epoch": 1.0627764057205216, "grad_norm": 0.5381811261177063, "learning_rate": 0.0001, "loss": 1.6131, "step": 9252 }, { "epoch": 1.0628912756303486, "grad_norm": 0.5121027231216431, "learning_rate": 0.0001, "loss": 1.3487, "step": 9253 }, { "epoch": 1.0630061455401758, "grad_norm": 0.49183768033981323, "learning_rate": 0.0001, "loss": 1.4759, "step": 9254 }, { "epoch": 1.0631210154500028, "grad_norm": 0.5818896293640137, "learning_rate": 0.0001, "loss": 1.5526, "step": 9255 }, { "epoch": 1.06323588535983, "grad_norm": 0.5403487682342529, "learning_rate": 0.0001, "loss": 1.0887, "step": 9256 }, { "epoch": 1.063350755269657, "grad_norm": 0.5411499738693237, "learning_rate": 0.0001, "loss": 1.4933, "step": 9257 }, { "epoch": 1.0634656251794843, "grad_norm": 0.5758698582649231, "learning_rate": 0.0001, "loss": 1.5877, "step": 9258 }, { "epoch": 1.0635804950893113, "grad_norm": 0.5343381762504578, "learning_rate": 0.0001, "loss": 1.5186, "step": 9259 }, { "epoch": 1.0636953649991385, "grad_norm": 0.5107303261756897, "learning_rate": 0.0001, "loss": 1.5133, "step": 9260 }, { "epoch": 1.0638102349089655, "grad_norm": 0.5439027547836304, "learning_rate": 0.0001, "loss": 1.6627, "step": 9261 }, { "epoch": 1.0639251048187928, "grad_norm": 0.5605586767196655, "learning_rate": 0.0001, "loss": 1.6073, "step": 9262 }, { "epoch": 1.0640399747286198, "grad_norm": 0.5504233837127686, "learning_rate": 0.0001, "loss": 1.5642, "step": 9263 }, { "epoch": 1.064154844638447, "grad_norm": 0.5588496923446655, "learning_rate": 0.0001, "loss": 1.6055, "step": 9264 }, { "epoch": 1.064269714548274, "grad_norm": 0.5234324932098389, "learning_rate": 0.0001, "loss": 1.4739, "step": 9265 }, { "epoch": 1.0643845844581012, "grad_norm": 0.5559567809104919, "learning_rate": 0.0001, "loss": 1.4358, "step": 9266 }, { "epoch": 1.0644994543679283, "grad_norm": 0.5315650105476379, "learning_rate": 0.0001, "loss": 1.5795, "step": 9267 }, { "epoch": 1.0646143242777555, "grad_norm": 0.586476743221283, "learning_rate": 0.0001, "loss": 1.693, "step": 9268 }, { "epoch": 1.0647291941875825, "grad_norm": 0.545502245426178, "learning_rate": 0.0001, "loss": 1.5206, "step": 9269 }, { "epoch": 1.0648440640974097, "grad_norm": 0.4984760582447052, "learning_rate": 0.0001, "loss": 1.3982, "step": 9270 }, { "epoch": 1.0649589340072367, "grad_norm": 0.5218265056610107, "learning_rate": 0.0001, "loss": 1.4516, "step": 9271 }, { "epoch": 1.065073803917064, "grad_norm": 0.5094042420387268, "learning_rate": 0.0001, "loss": 1.5032, "step": 9272 }, { "epoch": 1.065188673826891, "grad_norm": 0.5259179472923279, "learning_rate": 0.0001, "loss": 1.5218, "step": 9273 }, { "epoch": 1.0653035437367182, "grad_norm": 0.4852963387966156, "learning_rate": 0.0001, "loss": 1.2565, "step": 9274 }, { "epoch": 1.0654184136465452, "grad_norm": 0.5180572867393494, "learning_rate": 0.0001, "loss": 1.4978, "step": 9275 }, { "epoch": 1.0655332835563724, "grad_norm": 0.5113215446472168, "learning_rate": 0.0001, "loss": 1.5048, "step": 9276 }, { "epoch": 1.0656481534661995, "grad_norm": 0.5396996140480042, "learning_rate": 0.0001, "loss": 1.5992, "step": 9277 }, { "epoch": 1.0657630233760267, "grad_norm": 0.5498672723770142, "learning_rate": 0.0001, "loss": 1.5927, "step": 9278 }, { "epoch": 1.0658778932858537, "grad_norm": 0.5394907593727112, "learning_rate": 0.0001, "loss": 1.4526, "step": 9279 }, { "epoch": 1.065992763195681, "grad_norm": 0.5096206665039062, "learning_rate": 0.0001, "loss": 1.5192, "step": 9280 }, { "epoch": 1.066107633105508, "grad_norm": 0.49931639432907104, "learning_rate": 0.0001, "loss": 1.3194, "step": 9281 }, { "epoch": 1.0662225030153352, "grad_norm": 0.5749096274375916, "learning_rate": 0.0001, "loss": 1.634, "step": 9282 }, { "epoch": 1.0663373729251622, "grad_norm": 0.5646559000015259, "learning_rate": 0.0001, "loss": 1.5262, "step": 9283 }, { "epoch": 1.0664522428349894, "grad_norm": 0.5303424000740051, "learning_rate": 0.0001, "loss": 1.2186, "step": 9284 }, { "epoch": 1.0665671127448164, "grad_norm": 0.5843081474304199, "learning_rate": 0.0001, "loss": 1.5284, "step": 9285 }, { "epoch": 1.0666819826546436, "grad_norm": 0.506814181804657, "learning_rate": 0.0001, "loss": 1.2877, "step": 9286 }, { "epoch": 1.0667968525644707, "grad_norm": 0.5348950028419495, "learning_rate": 0.0001, "loss": 1.5083, "step": 9287 }, { "epoch": 1.0669117224742979, "grad_norm": 0.5017212629318237, "learning_rate": 0.0001, "loss": 1.4226, "step": 9288 }, { "epoch": 1.067026592384125, "grad_norm": 0.5333235859870911, "learning_rate": 0.0001, "loss": 1.6674, "step": 9289 }, { "epoch": 1.0671414622939521, "grad_norm": 0.4682038426399231, "learning_rate": 0.0001, "loss": 1.3008, "step": 9290 }, { "epoch": 1.0672563322037791, "grad_norm": 0.4889301061630249, "learning_rate": 0.0001, "loss": 1.4078, "step": 9291 }, { "epoch": 1.0673712021136064, "grad_norm": 0.52678382396698, "learning_rate": 0.0001, "loss": 1.4328, "step": 9292 }, { "epoch": 1.0674860720234334, "grad_norm": 0.5729466676712036, "learning_rate": 0.0001, "loss": 1.6461, "step": 9293 }, { "epoch": 1.0676009419332606, "grad_norm": 0.5632121562957764, "learning_rate": 0.0001, "loss": 1.5527, "step": 9294 }, { "epoch": 1.0677158118430876, "grad_norm": 0.5657988786697388, "learning_rate": 0.0001, "loss": 1.4767, "step": 9295 }, { "epoch": 1.0678306817529148, "grad_norm": 0.5650860071182251, "learning_rate": 0.0001, "loss": 1.4778, "step": 9296 }, { "epoch": 1.0679455516627419, "grad_norm": 0.5312690138816833, "learning_rate": 0.0001, "loss": 1.5812, "step": 9297 }, { "epoch": 1.068060421572569, "grad_norm": 0.5419080853462219, "learning_rate": 0.0001, "loss": 1.544, "step": 9298 }, { "epoch": 1.068175291482396, "grad_norm": 0.5316713452339172, "learning_rate": 0.0001, "loss": 1.4472, "step": 9299 }, { "epoch": 1.0682901613922233, "grad_norm": 0.5229997634887695, "learning_rate": 0.0001, "loss": 1.333, "step": 9300 }, { "epoch": 1.0684050313020503, "grad_norm": 0.5338220596313477, "learning_rate": 0.0001, "loss": 1.4723, "step": 9301 }, { "epoch": 1.0685199012118776, "grad_norm": 0.5003429651260376, "learning_rate": 0.0001, "loss": 1.3834, "step": 9302 }, { "epoch": 1.0686347711217046, "grad_norm": 0.5164094567298889, "learning_rate": 0.0001, "loss": 1.5219, "step": 9303 }, { "epoch": 1.0687496410315318, "grad_norm": 0.5205984115600586, "learning_rate": 0.0001, "loss": 1.2881, "step": 9304 }, { "epoch": 1.0688645109413588, "grad_norm": 0.5387089252471924, "learning_rate": 0.0001, "loss": 1.4241, "step": 9305 }, { "epoch": 1.068979380851186, "grad_norm": 0.5411062836647034, "learning_rate": 0.0001, "loss": 1.4488, "step": 9306 }, { "epoch": 1.069094250761013, "grad_norm": 0.522082507610321, "learning_rate": 0.0001, "loss": 1.48, "step": 9307 }, { "epoch": 1.0692091206708403, "grad_norm": 0.5525926351547241, "learning_rate": 0.0001, "loss": 1.359, "step": 9308 }, { "epoch": 1.0693239905806673, "grad_norm": 0.6042261719703674, "learning_rate": 0.0001, "loss": 1.6407, "step": 9309 }, { "epoch": 1.0694388604904945, "grad_norm": 0.5462369918823242, "learning_rate": 0.0001, "loss": 1.713, "step": 9310 }, { "epoch": 1.0695537304003215, "grad_norm": 0.5386967658996582, "learning_rate": 0.0001, "loss": 1.4737, "step": 9311 }, { "epoch": 1.0696686003101488, "grad_norm": 0.5753105282783508, "learning_rate": 0.0001, "loss": 1.7007, "step": 9312 }, { "epoch": 1.0697834702199758, "grad_norm": 0.509254515171051, "learning_rate": 0.0001, "loss": 1.3826, "step": 9313 }, { "epoch": 1.069898340129803, "grad_norm": 0.5281634330749512, "learning_rate": 0.0001, "loss": 1.5919, "step": 9314 }, { "epoch": 1.07001321003963, "grad_norm": 0.5292205214500427, "learning_rate": 0.0001, "loss": 1.5443, "step": 9315 }, { "epoch": 1.0701280799494572, "grad_norm": 0.522649347782135, "learning_rate": 0.0001, "loss": 1.4171, "step": 9316 }, { "epoch": 1.0702429498592845, "grad_norm": 0.5478872656822205, "learning_rate": 0.0001, "loss": 1.5693, "step": 9317 }, { "epoch": 1.0703578197691115, "grad_norm": 0.5441164374351501, "learning_rate": 0.0001, "loss": 1.4823, "step": 9318 }, { "epoch": 1.0704726896789385, "grad_norm": 0.5701571702957153, "learning_rate": 0.0001, "loss": 1.4287, "step": 9319 }, { "epoch": 1.0705875595887657, "grad_norm": 0.5586724281311035, "learning_rate": 0.0001, "loss": 1.4541, "step": 9320 }, { "epoch": 1.070702429498593, "grad_norm": 0.5121877789497375, "learning_rate": 0.0001, "loss": 1.58, "step": 9321 }, { "epoch": 1.07081729940842, "grad_norm": 0.5557401776313782, "learning_rate": 0.0001, "loss": 1.4617, "step": 9322 }, { "epoch": 1.070932169318247, "grad_norm": 0.5157942175865173, "learning_rate": 0.0001, "loss": 1.5935, "step": 9323 }, { "epoch": 1.0710470392280742, "grad_norm": 0.5756588578224182, "learning_rate": 0.0001, "loss": 1.5785, "step": 9324 }, { "epoch": 1.0711619091379014, "grad_norm": 0.5151377320289612, "learning_rate": 0.0001, "loss": 1.4551, "step": 9325 }, { "epoch": 1.0712767790477284, "grad_norm": 0.5527158975601196, "learning_rate": 0.0001, "loss": 1.5425, "step": 9326 }, { "epoch": 1.0713916489575555, "grad_norm": 0.517097532749176, "learning_rate": 0.0001, "loss": 1.2917, "step": 9327 }, { "epoch": 1.0715065188673827, "grad_norm": 0.5368314385414124, "learning_rate": 0.0001, "loss": 1.4936, "step": 9328 }, { "epoch": 1.07162138877721, "grad_norm": 0.5495009422302246, "learning_rate": 0.0001, "loss": 1.589, "step": 9329 }, { "epoch": 1.071736258687037, "grad_norm": 0.5558755993843079, "learning_rate": 0.0001, "loss": 1.4963, "step": 9330 }, { "epoch": 1.0718511285968642, "grad_norm": 0.5218509435653687, "learning_rate": 0.0001, "loss": 1.4938, "step": 9331 }, { "epoch": 1.0719659985066912, "grad_norm": 0.5873258709907532, "learning_rate": 0.0001, "loss": 1.5986, "step": 9332 }, { "epoch": 1.0720808684165184, "grad_norm": 0.542597234249115, "learning_rate": 0.0001, "loss": 1.4964, "step": 9333 }, { "epoch": 1.0721957383263454, "grad_norm": 0.5635952949523926, "learning_rate": 0.0001, "loss": 1.5998, "step": 9334 }, { "epoch": 1.0723106082361726, "grad_norm": 0.5015137791633606, "learning_rate": 0.0001, "loss": 1.409, "step": 9335 }, { "epoch": 1.0724254781459996, "grad_norm": 0.5085453987121582, "learning_rate": 0.0001, "loss": 1.4133, "step": 9336 }, { "epoch": 1.0725403480558269, "grad_norm": 0.5499141216278076, "learning_rate": 0.0001, "loss": 1.5787, "step": 9337 }, { "epoch": 1.0726552179656539, "grad_norm": 0.5491046905517578, "learning_rate": 0.0001, "loss": 1.389, "step": 9338 }, { "epoch": 1.0727700878754811, "grad_norm": 0.58219975233078, "learning_rate": 0.0001, "loss": 1.5334, "step": 9339 }, { "epoch": 1.0728849577853081, "grad_norm": 0.5443148612976074, "learning_rate": 0.0001, "loss": 1.4332, "step": 9340 }, { "epoch": 1.0729998276951354, "grad_norm": 0.5684245824813843, "learning_rate": 0.0001, "loss": 1.5124, "step": 9341 }, { "epoch": 1.0731146976049624, "grad_norm": 0.6064300537109375, "learning_rate": 0.0001, "loss": 1.5588, "step": 9342 }, { "epoch": 1.0732295675147896, "grad_norm": 0.5258216857910156, "learning_rate": 0.0001, "loss": 1.3385, "step": 9343 }, { "epoch": 1.0733444374246166, "grad_norm": 0.5391144752502441, "learning_rate": 0.0001, "loss": 1.4488, "step": 9344 }, { "epoch": 1.0734593073344438, "grad_norm": 0.5189342498779297, "learning_rate": 0.0001, "loss": 1.4995, "step": 9345 }, { "epoch": 1.0735741772442708, "grad_norm": 0.527388870716095, "learning_rate": 0.0001, "loss": 1.4789, "step": 9346 }, { "epoch": 1.073689047154098, "grad_norm": 0.569824755191803, "learning_rate": 0.0001, "loss": 1.2466, "step": 9347 }, { "epoch": 1.073803917063925, "grad_norm": 0.5429185628890991, "learning_rate": 0.0001, "loss": 1.4456, "step": 9348 }, { "epoch": 1.0739187869737523, "grad_norm": 0.5612589120864868, "learning_rate": 0.0001, "loss": 1.2836, "step": 9349 }, { "epoch": 1.0740336568835793, "grad_norm": 0.562879204750061, "learning_rate": 0.0001, "loss": 1.2671, "step": 9350 }, { "epoch": 1.0741485267934066, "grad_norm": 0.6129858493804932, "learning_rate": 0.0001, "loss": 1.6126, "step": 9351 }, { "epoch": 1.0742633967032336, "grad_norm": 0.5683223009109497, "learning_rate": 0.0001, "loss": 1.5117, "step": 9352 }, { "epoch": 1.0743782666130608, "grad_norm": 0.549032986164093, "learning_rate": 0.0001, "loss": 1.3925, "step": 9353 }, { "epoch": 1.0744931365228878, "grad_norm": 0.5353028774261475, "learning_rate": 0.0001, "loss": 1.4364, "step": 9354 }, { "epoch": 1.074608006432715, "grad_norm": 0.5511956810951233, "learning_rate": 0.0001, "loss": 1.6354, "step": 9355 }, { "epoch": 1.074722876342542, "grad_norm": 0.5705670118331909, "learning_rate": 0.0001, "loss": 1.3258, "step": 9356 }, { "epoch": 1.0748377462523693, "grad_norm": 0.5503584742546082, "learning_rate": 0.0001, "loss": 1.3599, "step": 9357 }, { "epoch": 1.0749526161621963, "grad_norm": 0.48784855008125305, "learning_rate": 0.0001, "loss": 1.3298, "step": 9358 }, { "epoch": 1.0750674860720235, "grad_norm": 0.5487546324729919, "learning_rate": 0.0001, "loss": 1.3159, "step": 9359 }, { "epoch": 1.0751823559818505, "grad_norm": 0.6427622437477112, "learning_rate": 0.0001, "loss": 1.4016, "step": 9360 }, { "epoch": 1.0752972258916778, "grad_norm": 0.5325155258178711, "learning_rate": 0.0001, "loss": 1.4784, "step": 9361 }, { "epoch": 1.0754120958015048, "grad_norm": 0.5536829829216003, "learning_rate": 0.0001, "loss": 1.5445, "step": 9362 }, { "epoch": 1.075526965711332, "grad_norm": 0.49843937158584595, "learning_rate": 0.0001, "loss": 1.6056, "step": 9363 }, { "epoch": 1.075641835621159, "grad_norm": 0.5196369290351868, "learning_rate": 0.0001, "loss": 1.5446, "step": 9364 }, { "epoch": 1.0757567055309862, "grad_norm": 0.5939671993255615, "learning_rate": 0.0001, "loss": 1.532, "step": 9365 }, { "epoch": 1.0758715754408132, "grad_norm": 0.5654057860374451, "learning_rate": 0.0001, "loss": 1.7005, "step": 9366 }, { "epoch": 1.0759864453506405, "grad_norm": 0.5311039686203003, "learning_rate": 0.0001, "loss": 1.4949, "step": 9367 }, { "epoch": 1.0761013152604675, "grad_norm": 0.5418568849563599, "learning_rate": 0.0001, "loss": 1.5952, "step": 9368 }, { "epoch": 1.0762161851702947, "grad_norm": 0.5042740106582642, "learning_rate": 0.0001, "loss": 1.4819, "step": 9369 }, { "epoch": 1.0763310550801217, "grad_norm": 0.5391519665718079, "learning_rate": 0.0001, "loss": 1.5778, "step": 9370 }, { "epoch": 1.076445924989949, "grad_norm": 0.48869240283966064, "learning_rate": 0.0001, "loss": 1.3108, "step": 9371 }, { "epoch": 1.076560794899776, "grad_norm": 0.5286645889282227, "learning_rate": 0.0001, "loss": 1.6169, "step": 9372 }, { "epoch": 1.0766756648096032, "grad_norm": 0.5066417455673218, "learning_rate": 0.0001, "loss": 1.2543, "step": 9373 }, { "epoch": 1.0767905347194302, "grad_norm": 0.5383975505828857, "learning_rate": 0.0001, "loss": 1.2786, "step": 9374 }, { "epoch": 1.0769054046292574, "grad_norm": 0.5614655017852783, "learning_rate": 0.0001, "loss": 1.5742, "step": 9375 }, { "epoch": 1.0770202745390844, "grad_norm": 0.5506321787834167, "learning_rate": 0.0001, "loss": 1.4668, "step": 9376 }, { "epoch": 1.0771351444489117, "grad_norm": 0.6028522253036499, "learning_rate": 0.0001, "loss": 1.6613, "step": 9377 }, { "epoch": 1.0772500143587387, "grad_norm": 0.535199761390686, "learning_rate": 0.0001, "loss": 1.5098, "step": 9378 }, { "epoch": 1.077364884268566, "grad_norm": 0.5362054705619812, "learning_rate": 0.0001, "loss": 1.6251, "step": 9379 }, { "epoch": 1.077479754178393, "grad_norm": 0.5681738257408142, "learning_rate": 0.0001, "loss": 1.6243, "step": 9380 }, { "epoch": 1.0775946240882202, "grad_norm": 0.5353713631629944, "learning_rate": 0.0001, "loss": 1.5445, "step": 9381 }, { "epoch": 1.0777094939980472, "grad_norm": 0.5305677652359009, "learning_rate": 0.0001, "loss": 1.5472, "step": 9382 }, { "epoch": 1.0778243639078744, "grad_norm": 0.5264517068862915, "learning_rate": 0.0001, "loss": 1.3933, "step": 9383 }, { "epoch": 1.0779392338177014, "grad_norm": 0.5325207114219666, "learning_rate": 0.0001, "loss": 1.5097, "step": 9384 }, { "epoch": 1.0780541037275286, "grad_norm": 0.5129626989364624, "learning_rate": 0.0001, "loss": 1.392, "step": 9385 }, { "epoch": 1.0781689736373556, "grad_norm": 0.5592479109764099, "learning_rate": 0.0001, "loss": 1.4948, "step": 9386 }, { "epoch": 1.0782838435471829, "grad_norm": 0.5640060901641846, "learning_rate": 0.0001, "loss": 1.4874, "step": 9387 }, { "epoch": 1.0783987134570099, "grad_norm": 0.5315003991127014, "learning_rate": 0.0001, "loss": 1.4705, "step": 9388 }, { "epoch": 1.0785135833668371, "grad_norm": 0.5562155842781067, "learning_rate": 0.0001, "loss": 1.5838, "step": 9389 }, { "epoch": 1.0786284532766641, "grad_norm": 0.5401049256324768, "learning_rate": 0.0001, "loss": 1.5833, "step": 9390 }, { "epoch": 1.0787433231864914, "grad_norm": 0.5349462032318115, "learning_rate": 0.0001, "loss": 1.471, "step": 9391 }, { "epoch": 1.0788581930963184, "grad_norm": 0.543636679649353, "learning_rate": 0.0001, "loss": 1.4982, "step": 9392 }, { "epoch": 1.0789730630061456, "grad_norm": 0.5344265103340149, "learning_rate": 0.0001, "loss": 1.5697, "step": 9393 }, { "epoch": 1.0790879329159726, "grad_norm": 0.5137317180633545, "learning_rate": 0.0001, "loss": 1.509, "step": 9394 }, { "epoch": 1.0792028028257998, "grad_norm": 0.5402754545211792, "learning_rate": 0.0001, "loss": 1.422, "step": 9395 }, { "epoch": 1.0793176727356268, "grad_norm": 0.6155654191970825, "learning_rate": 0.0001, "loss": 1.7829, "step": 9396 }, { "epoch": 1.079432542645454, "grad_norm": 0.5445329546928406, "learning_rate": 0.0001, "loss": 1.3084, "step": 9397 }, { "epoch": 1.079547412555281, "grad_norm": 0.5331481099128723, "learning_rate": 0.0001, "loss": 1.2886, "step": 9398 }, { "epoch": 1.0796622824651083, "grad_norm": 0.4990472197532654, "learning_rate": 0.0001, "loss": 1.4427, "step": 9399 }, { "epoch": 1.0797771523749353, "grad_norm": 0.5740399956703186, "learning_rate": 0.0001, "loss": 1.5451, "step": 9400 }, { "epoch": 1.0798920222847626, "grad_norm": 0.5508021712303162, "learning_rate": 0.0001, "loss": 1.725, "step": 9401 }, { "epoch": 1.0800068921945896, "grad_norm": 0.5360609889030457, "learning_rate": 0.0001, "loss": 1.5607, "step": 9402 }, { "epoch": 1.0801217621044168, "grad_norm": 0.4839753806591034, "learning_rate": 0.0001, "loss": 1.3208, "step": 9403 }, { "epoch": 1.0802366320142438, "grad_norm": 0.5315743684768677, "learning_rate": 0.0001, "loss": 1.4254, "step": 9404 }, { "epoch": 1.080351501924071, "grad_norm": 0.62214195728302, "learning_rate": 0.0001, "loss": 1.5521, "step": 9405 }, { "epoch": 1.080466371833898, "grad_norm": 0.5492658615112305, "learning_rate": 0.0001, "loss": 1.5138, "step": 9406 }, { "epoch": 1.0805812417437253, "grad_norm": 0.5915753841400146, "learning_rate": 0.0001, "loss": 1.6729, "step": 9407 }, { "epoch": 1.0806961116535523, "grad_norm": 0.49104854464530945, "learning_rate": 0.0001, "loss": 1.284, "step": 9408 }, { "epoch": 1.0808109815633795, "grad_norm": 0.5419100522994995, "learning_rate": 0.0001, "loss": 1.3962, "step": 9409 }, { "epoch": 1.0809258514732065, "grad_norm": 0.5406461358070374, "learning_rate": 0.0001, "loss": 1.4855, "step": 9410 }, { "epoch": 1.0810407213830338, "grad_norm": 0.5958341360092163, "learning_rate": 0.0001, "loss": 1.5927, "step": 9411 }, { "epoch": 1.0811555912928608, "grad_norm": 0.5315396189689636, "learning_rate": 0.0001, "loss": 1.5714, "step": 9412 }, { "epoch": 1.081270461202688, "grad_norm": 0.5059750080108643, "learning_rate": 0.0001, "loss": 1.3242, "step": 9413 }, { "epoch": 1.081385331112515, "grad_norm": 0.5587995052337646, "learning_rate": 0.0001, "loss": 1.446, "step": 9414 }, { "epoch": 1.0815002010223422, "grad_norm": 0.5836725831031799, "learning_rate": 0.0001, "loss": 1.6805, "step": 9415 }, { "epoch": 1.0816150709321692, "grad_norm": 0.5699766278266907, "learning_rate": 0.0001, "loss": 1.3352, "step": 9416 }, { "epoch": 1.0817299408419965, "grad_norm": 0.5126357078552246, "learning_rate": 0.0001, "loss": 1.3888, "step": 9417 }, { "epoch": 1.0818448107518235, "grad_norm": 0.5475831627845764, "learning_rate": 0.0001, "loss": 1.6174, "step": 9418 }, { "epoch": 1.0819596806616507, "grad_norm": 0.5603591203689575, "learning_rate": 0.0001, "loss": 1.5595, "step": 9419 }, { "epoch": 1.0820745505714777, "grad_norm": 0.5825121998786926, "learning_rate": 0.0001, "loss": 1.5418, "step": 9420 }, { "epoch": 1.082189420481305, "grad_norm": 0.5266685485839844, "learning_rate": 0.0001, "loss": 1.5018, "step": 9421 }, { "epoch": 1.082304290391132, "grad_norm": 0.5489354133605957, "learning_rate": 0.0001, "loss": 1.3732, "step": 9422 }, { "epoch": 1.0824191603009592, "grad_norm": 0.5415682792663574, "learning_rate": 0.0001, "loss": 1.4901, "step": 9423 }, { "epoch": 1.0825340302107862, "grad_norm": 0.5336417555809021, "learning_rate": 0.0001, "loss": 1.4311, "step": 9424 }, { "epoch": 1.0826489001206134, "grad_norm": 0.5154435038566589, "learning_rate": 0.0001, "loss": 1.4462, "step": 9425 }, { "epoch": 1.0827637700304404, "grad_norm": 0.5345861911773682, "learning_rate": 0.0001, "loss": 1.3614, "step": 9426 }, { "epoch": 1.0828786399402677, "grad_norm": 0.5687355399131775, "learning_rate": 0.0001, "loss": 1.4912, "step": 9427 }, { "epoch": 1.0829935098500947, "grad_norm": 0.508594810962677, "learning_rate": 0.0001, "loss": 1.471, "step": 9428 }, { "epoch": 1.083108379759922, "grad_norm": 0.5026642084121704, "learning_rate": 0.0001, "loss": 1.3456, "step": 9429 }, { "epoch": 1.083223249669749, "grad_norm": 0.5498985648155212, "learning_rate": 0.0001, "loss": 1.3439, "step": 9430 }, { "epoch": 1.0833381195795762, "grad_norm": 0.5488834977149963, "learning_rate": 0.0001, "loss": 1.5431, "step": 9431 }, { "epoch": 1.0834529894894032, "grad_norm": 0.5577558279037476, "learning_rate": 0.0001, "loss": 1.4781, "step": 9432 }, { "epoch": 1.0835678593992304, "grad_norm": 0.5541431307792664, "learning_rate": 0.0001, "loss": 1.5649, "step": 9433 }, { "epoch": 1.0836827293090574, "grad_norm": 0.6540974378585815, "learning_rate": 0.0001, "loss": 1.7835, "step": 9434 }, { "epoch": 1.0837975992188846, "grad_norm": 0.6181141138076782, "learning_rate": 0.0001, "loss": 1.5425, "step": 9435 }, { "epoch": 1.0839124691287116, "grad_norm": 0.5547922849655151, "learning_rate": 0.0001, "loss": 1.4657, "step": 9436 }, { "epoch": 1.0840273390385389, "grad_norm": 0.5842359066009521, "learning_rate": 0.0001, "loss": 1.5911, "step": 9437 }, { "epoch": 1.0841422089483659, "grad_norm": 0.5383824110031128, "learning_rate": 0.0001, "loss": 1.5272, "step": 9438 }, { "epoch": 1.0842570788581931, "grad_norm": 0.5450701713562012, "learning_rate": 0.0001, "loss": 1.5627, "step": 9439 }, { "epoch": 1.0843719487680201, "grad_norm": 0.5500561594963074, "learning_rate": 0.0001, "loss": 1.5371, "step": 9440 }, { "epoch": 1.0844868186778474, "grad_norm": 0.6040080785751343, "learning_rate": 0.0001, "loss": 1.5762, "step": 9441 }, { "epoch": 1.0846016885876744, "grad_norm": 0.5557257533073425, "learning_rate": 0.0001, "loss": 1.6202, "step": 9442 }, { "epoch": 1.0847165584975016, "grad_norm": 0.5492468476295471, "learning_rate": 0.0001, "loss": 1.5363, "step": 9443 }, { "epoch": 1.0848314284073286, "grad_norm": 0.5362855195999146, "learning_rate": 0.0001, "loss": 1.3116, "step": 9444 }, { "epoch": 1.0849462983171558, "grad_norm": 0.491189569234848, "learning_rate": 0.0001, "loss": 1.1433, "step": 9445 }, { "epoch": 1.0850611682269828, "grad_norm": 0.561486005783081, "learning_rate": 0.0001, "loss": 1.5065, "step": 9446 }, { "epoch": 1.08517603813681, "grad_norm": 0.5247159600257874, "learning_rate": 0.0001, "loss": 1.5988, "step": 9447 }, { "epoch": 1.085290908046637, "grad_norm": 0.5002375841140747, "learning_rate": 0.0001, "loss": 1.3614, "step": 9448 }, { "epoch": 1.0854057779564643, "grad_norm": 0.5415583252906799, "learning_rate": 0.0001, "loss": 1.3459, "step": 9449 }, { "epoch": 1.0855206478662913, "grad_norm": 0.5931771993637085, "learning_rate": 0.0001, "loss": 1.5553, "step": 9450 }, { "epoch": 1.0856355177761186, "grad_norm": 0.6163171529769897, "learning_rate": 0.0001, "loss": 1.5842, "step": 9451 }, { "epoch": 1.0857503876859456, "grad_norm": 0.5527577996253967, "learning_rate": 0.0001, "loss": 1.3209, "step": 9452 }, { "epoch": 1.0858652575957728, "grad_norm": 0.5498605966567993, "learning_rate": 0.0001, "loss": 1.3325, "step": 9453 }, { "epoch": 1.0859801275056, "grad_norm": 0.5718228220939636, "learning_rate": 0.0001, "loss": 1.5869, "step": 9454 }, { "epoch": 1.086094997415427, "grad_norm": 0.537427544593811, "learning_rate": 0.0001, "loss": 1.3966, "step": 9455 }, { "epoch": 1.086209867325254, "grad_norm": 0.5483804941177368, "learning_rate": 0.0001, "loss": 1.2163, "step": 9456 }, { "epoch": 1.0863247372350813, "grad_norm": 0.5748531222343445, "learning_rate": 0.0001, "loss": 1.4913, "step": 9457 }, { "epoch": 1.0864396071449085, "grad_norm": 0.5303325653076172, "learning_rate": 0.0001, "loss": 1.5366, "step": 9458 }, { "epoch": 1.0865544770547355, "grad_norm": 0.5701431632041931, "learning_rate": 0.0001, "loss": 1.5886, "step": 9459 }, { "epoch": 1.0866693469645625, "grad_norm": 0.5297312140464783, "learning_rate": 0.0001, "loss": 1.49, "step": 9460 }, { "epoch": 1.0867842168743898, "grad_norm": 0.5745691061019897, "learning_rate": 0.0001, "loss": 1.6774, "step": 9461 }, { "epoch": 1.086899086784217, "grad_norm": 0.5294743180274963, "learning_rate": 0.0001, "loss": 1.3204, "step": 9462 }, { "epoch": 1.087013956694044, "grad_norm": 0.5992701649665833, "learning_rate": 0.0001, "loss": 1.3512, "step": 9463 }, { "epoch": 1.087128826603871, "grad_norm": 0.5554730892181396, "learning_rate": 0.0001, "loss": 1.5883, "step": 9464 }, { "epoch": 1.0872436965136982, "grad_norm": 0.555023193359375, "learning_rate": 0.0001, "loss": 1.5655, "step": 9465 }, { "epoch": 1.0873585664235255, "grad_norm": 0.5271770358085632, "learning_rate": 0.0001, "loss": 1.2415, "step": 9466 }, { "epoch": 1.0874734363333525, "grad_norm": 0.520576536655426, "learning_rate": 0.0001, "loss": 1.6088, "step": 9467 }, { "epoch": 1.0875883062431795, "grad_norm": 0.5427173972129822, "learning_rate": 0.0001, "loss": 1.6376, "step": 9468 }, { "epoch": 1.0877031761530067, "grad_norm": 0.5307012796401978, "learning_rate": 0.0001, "loss": 1.3187, "step": 9469 }, { "epoch": 1.087818046062834, "grad_norm": 0.49438974261283875, "learning_rate": 0.0001, "loss": 1.3237, "step": 9470 }, { "epoch": 1.087932915972661, "grad_norm": 0.5442484617233276, "learning_rate": 0.0001, "loss": 1.5418, "step": 9471 }, { "epoch": 1.0880477858824882, "grad_norm": 0.5300331711769104, "learning_rate": 0.0001, "loss": 1.6, "step": 9472 }, { "epoch": 1.0881626557923152, "grad_norm": 0.5837119817733765, "learning_rate": 0.0001, "loss": 1.656, "step": 9473 }, { "epoch": 1.0882775257021424, "grad_norm": 0.5622014999389648, "learning_rate": 0.0001, "loss": 1.5176, "step": 9474 }, { "epoch": 1.0883923956119694, "grad_norm": 0.5535881519317627, "learning_rate": 0.0001, "loss": 1.4017, "step": 9475 }, { "epoch": 1.0885072655217967, "grad_norm": 0.5968254804611206, "learning_rate": 0.0001, "loss": 1.6419, "step": 9476 }, { "epoch": 1.0886221354316237, "grad_norm": 0.5204367637634277, "learning_rate": 0.0001, "loss": 1.4491, "step": 9477 }, { "epoch": 1.088737005341451, "grad_norm": 0.5856305956840515, "learning_rate": 0.0001, "loss": 1.6327, "step": 9478 }, { "epoch": 1.088851875251278, "grad_norm": 0.5582146048545837, "learning_rate": 0.0001, "loss": 1.5893, "step": 9479 }, { "epoch": 1.0889667451611051, "grad_norm": 0.5371931195259094, "learning_rate": 0.0001, "loss": 1.4195, "step": 9480 }, { "epoch": 1.0890816150709322, "grad_norm": 0.5727693438529968, "learning_rate": 0.0001, "loss": 1.5191, "step": 9481 }, { "epoch": 1.0891964849807594, "grad_norm": 0.5168283581733704, "learning_rate": 0.0001, "loss": 1.4037, "step": 9482 }, { "epoch": 1.0893113548905864, "grad_norm": 0.5147133469581604, "learning_rate": 0.0001, "loss": 1.3784, "step": 9483 }, { "epoch": 1.0894262248004136, "grad_norm": 0.5450608730316162, "learning_rate": 0.0001, "loss": 1.5136, "step": 9484 }, { "epoch": 1.0895410947102406, "grad_norm": 0.5697365403175354, "learning_rate": 0.0001, "loss": 1.5325, "step": 9485 }, { "epoch": 1.0896559646200679, "grad_norm": 0.5053684711456299, "learning_rate": 0.0001, "loss": 1.3655, "step": 9486 }, { "epoch": 1.0897708345298949, "grad_norm": 0.5777620673179626, "learning_rate": 0.0001, "loss": 1.5045, "step": 9487 }, { "epoch": 1.089885704439722, "grad_norm": 0.5835351943969727, "learning_rate": 0.0001, "loss": 1.3695, "step": 9488 }, { "epoch": 1.0900005743495491, "grad_norm": 0.5509904623031616, "learning_rate": 0.0001, "loss": 1.4987, "step": 9489 }, { "epoch": 1.0901154442593763, "grad_norm": 0.5363668203353882, "learning_rate": 0.0001, "loss": 1.321, "step": 9490 }, { "epoch": 1.0902303141692034, "grad_norm": 0.5192736983299255, "learning_rate": 0.0001, "loss": 1.4694, "step": 9491 }, { "epoch": 1.0903451840790306, "grad_norm": 0.5779258608818054, "learning_rate": 0.0001, "loss": 1.4376, "step": 9492 }, { "epoch": 1.0904600539888576, "grad_norm": 0.5055257678031921, "learning_rate": 0.0001, "loss": 1.3177, "step": 9493 }, { "epoch": 1.0905749238986848, "grad_norm": 0.5672529935836792, "learning_rate": 0.0001, "loss": 1.5611, "step": 9494 }, { "epoch": 1.0906897938085118, "grad_norm": 0.5167456865310669, "learning_rate": 0.0001, "loss": 1.4562, "step": 9495 }, { "epoch": 1.090804663718339, "grad_norm": 0.522533118724823, "learning_rate": 0.0001, "loss": 1.4801, "step": 9496 }, { "epoch": 1.090919533628166, "grad_norm": 0.5050001740455627, "learning_rate": 0.0001, "loss": 1.5072, "step": 9497 }, { "epoch": 1.0910344035379933, "grad_norm": 0.5358273983001709, "learning_rate": 0.0001, "loss": 1.496, "step": 9498 }, { "epoch": 1.0911492734478203, "grad_norm": 0.5162444114685059, "learning_rate": 0.0001, "loss": 1.4414, "step": 9499 }, { "epoch": 1.0912641433576475, "grad_norm": 0.533893346786499, "learning_rate": 0.0001, "loss": 1.4475, "step": 9500 }, { "epoch": 1.0913790132674746, "grad_norm": 0.5578415989875793, "learning_rate": 0.0001, "loss": 1.5303, "step": 9501 }, { "epoch": 1.0914938831773018, "grad_norm": 0.5179681777954102, "learning_rate": 0.0001, "loss": 1.4064, "step": 9502 }, { "epoch": 1.0916087530871288, "grad_norm": 0.5305806994438171, "learning_rate": 0.0001, "loss": 1.5176, "step": 9503 }, { "epoch": 1.091723622996956, "grad_norm": 0.5723759531974792, "learning_rate": 0.0001, "loss": 1.1344, "step": 9504 }, { "epoch": 1.091838492906783, "grad_norm": 0.520270049571991, "learning_rate": 0.0001, "loss": 1.3782, "step": 9505 }, { "epoch": 1.0919533628166103, "grad_norm": 0.5493273138999939, "learning_rate": 0.0001, "loss": 1.4887, "step": 9506 }, { "epoch": 1.0920682327264373, "grad_norm": 0.5690314769744873, "learning_rate": 0.0001, "loss": 1.49, "step": 9507 }, { "epoch": 1.0921831026362645, "grad_norm": 0.5376046895980835, "learning_rate": 0.0001, "loss": 1.4984, "step": 9508 }, { "epoch": 1.0922979725460915, "grad_norm": 0.5700758695602417, "learning_rate": 0.0001, "loss": 1.5265, "step": 9509 }, { "epoch": 1.0924128424559187, "grad_norm": 0.5583578944206238, "learning_rate": 0.0001, "loss": 1.4936, "step": 9510 }, { "epoch": 1.0925277123657458, "grad_norm": 0.5892508625984192, "learning_rate": 0.0001, "loss": 1.5108, "step": 9511 }, { "epoch": 1.092642582275573, "grad_norm": 0.5539071559906006, "learning_rate": 0.0001, "loss": 1.4654, "step": 9512 }, { "epoch": 1.0927574521854, "grad_norm": 0.5687013268470764, "learning_rate": 0.0001, "loss": 1.4186, "step": 9513 }, { "epoch": 1.0928723220952272, "grad_norm": 0.612593412399292, "learning_rate": 0.0001, "loss": 1.6754, "step": 9514 }, { "epoch": 1.0929871920050542, "grad_norm": 0.5612267851829529, "learning_rate": 0.0001, "loss": 1.5463, "step": 9515 }, { "epoch": 1.0931020619148815, "grad_norm": 0.5222997665405273, "learning_rate": 0.0001, "loss": 1.3799, "step": 9516 }, { "epoch": 1.0932169318247085, "grad_norm": 0.5479604005813599, "learning_rate": 0.0001, "loss": 1.5003, "step": 9517 }, { "epoch": 1.0933318017345357, "grad_norm": 0.5224593281745911, "learning_rate": 0.0001, "loss": 1.4149, "step": 9518 }, { "epoch": 1.0934466716443627, "grad_norm": 0.524739682674408, "learning_rate": 0.0001, "loss": 1.3937, "step": 9519 }, { "epoch": 1.09356154155419, "grad_norm": 0.5201284885406494, "learning_rate": 0.0001, "loss": 1.5372, "step": 9520 }, { "epoch": 1.093676411464017, "grad_norm": 0.5348097681999207, "learning_rate": 0.0001, "loss": 1.3621, "step": 9521 }, { "epoch": 1.0937912813738442, "grad_norm": 0.5572234988212585, "learning_rate": 0.0001, "loss": 1.3629, "step": 9522 }, { "epoch": 1.0939061512836712, "grad_norm": 0.5672652721405029, "learning_rate": 0.0001, "loss": 1.6203, "step": 9523 }, { "epoch": 1.0940210211934984, "grad_norm": 0.5444334149360657, "learning_rate": 0.0001, "loss": 1.7165, "step": 9524 }, { "epoch": 1.0941358911033254, "grad_norm": 0.54752516746521, "learning_rate": 0.0001, "loss": 1.4252, "step": 9525 }, { "epoch": 1.0942507610131527, "grad_norm": 0.49809858202934265, "learning_rate": 0.0001, "loss": 1.387, "step": 9526 }, { "epoch": 1.0943656309229797, "grad_norm": 0.512008786201477, "learning_rate": 0.0001, "loss": 1.295, "step": 9527 }, { "epoch": 1.094480500832807, "grad_norm": 0.5522312521934509, "learning_rate": 0.0001, "loss": 1.4827, "step": 9528 }, { "epoch": 1.094595370742634, "grad_norm": 0.5582612752914429, "learning_rate": 0.0001, "loss": 1.4605, "step": 9529 }, { "epoch": 1.0947102406524611, "grad_norm": 0.48772454261779785, "learning_rate": 0.0001, "loss": 1.3134, "step": 9530 }, { "epoch": 1.0948251105622882, "grad_norm": 0.49887439608573914, "learning_rate": 0.0001, "loss": 1.2972, "step": 9531 }, { "epoch": 1.0949399804721154, "grad_norm": 0.5389269590377808, "learning_rate": 0.0001, "loss": 1.5936, "step": 9532 }, { "epoch": 1.0950548503819424, "grad_norm": 0.5234885215759277, "learning_rate": 0.0001, "loss": 1.3984, "step": 9533 }, { "epoch": 1.0951697202917696, "grad_norm": 0.5532096028327942, "learning_rate": 0.0001, "loss": 1.7103, "step": 9534 }, { "epoch": 1.0952845902015966, "grad_norm": 0.5728646516799927, "learning_rate": 0.0001, "loss": 1.5835, "step": 9535 }, { "epoch": 1.0953994601114239, "grad_norm": 0.5275560021400452, "learning_rate": 0.0001, "loss": 1.5167, "step": 9536 }, { "epoch": 1.0955143300212509, "grad_norm": 0.5168836116790771, "learning_rate": 0.0001, "loss": 1.4736, "step": 9537 }, { "epoch": 1.095629199931078, "grad_norm": 0.5980050563812256, "learning_rate": 0.0001, "loss": 1.5943, "step": 9538 }, { "epoch": 1.095744069840905, "grad_norm": 0.553145706653595, "learning_rate": 0.0001, "loss": 1.5197, "step": 9539 }, { "epoch": 1.0958589397507323, "grad_norm": 0.5515928864479065, "learning_rate": 0.0001, "loss": 1.3338, "step": 9540 }, { "epoch": 1.0959738096605594, "grad_norm": 0.5219181776046753, "learning_rate": 0.0001, "loss": 1.3579, "step": 9541 }, { "epoch": 1.0960886795703866, "grad_norm": 0.4816209077835083, "learning_rate": 0.0001, "loss": 1.3447, "step": 9542 }, { "epoch": 1.0962035494802136, "grad_norm": 0.5502380132675171, "learning_rate": 0.0001, "loss": 1.5632, "step": 9543 }, { "epoch": 1.0963184193900408, "grad_norm": 0.5180455446243286, "learning_rate": 0.0001, "loss": 1.4077, "step": 9544 }, { "epoch": 1.0964332892998678, "grad_norm": 0.524360716342926, "learning_rate": 0.0001, "loss": 1.3433, "step": 9545 }, { "epoch": 1.096548159209695, "grad_norm": 0.5326607823371887, "learning_rate": 0.0001, "loss": 1.4864, "step": 9546 }, { "epoch": 1.096663029119522, "grad_norm": 0.578113853931427, "learning_rate": 0.0001, "loss": 1.3848, "step": 9547 }, { "epoch": 1.0967778990293493, "grad_norm": 0.5228033065795898, "learning_rate": 0.0001, "loss": 1.4384, "step": 9548 }, { "epoch": 1.0968927689391763, "grad_norm": 0.5391709804534912, "learning_rate": 0.0001, "loss": 1.5039, "step": 9549 }, { "epoch": 1.0970076388490035, "grad_norm": 0.5661507248878479, "learning_rate": 0.0001, "loss": 1.5187, "step": 9550 }, { "epoch": 1.0971225087588306, "grad_norm": 0.5772484540939331, "learning_rate": 0.0001, "loss": 1.526, "step": 9551 }, { "epoch": 1.0972373786686578, "grad_norm": 0.5205997824668884, "learning_rate": 0.0001, "loss": 1.2596, "step": 9552 }, { "epoch": 1.0973522485784848, "grad_norm": 0.4969817101955414, "learning_rate": 0.0001, "loss": 1.3083, "step": 9553 }, { "epoch": 1.097467118488312, "grad_norm": 0.5085188746452332, "learning_rate": 0.0001, "loss": 1.4074, "step": 9554 }, { "epoch": 1.097581988398139, "grad_norm": 0.5870746970176697, "learning_rate": 0.0001, "loss": 1.5538, "step": 9555 }, { "epoch": 1.0976968583079663, "grad_norm": 0.5884862542152405, "learning_rate": 0.0001, "loss": 1.5905, "step": 9556 }, { "epoch": 1.0978117282177933, "grad_norm": 0.6046068668365479, "learning_rate": 0.0001, "loss": 1.5751, "step": 9557 }, { "epoch": 1.0979265981276205, "grad_norm": 0.5737109780311584, "learning_rate": 0.0001, "loss": 1.5331, "step": 9558 }, { "epoch": 1.0980414680374475, "grad_norm": 0.5459645390510559, "learning_rate": 0.0001, "loss": 1.4078, "step": 9559 }, { "epoch": 1.0981563379472747, "grad_norm": 0.5659840106964111, "learning_rate": 0.0001, "loss": 1.5431, "step": 9560 }, { "epoch": 1.0982712078571017, "grad_norm": 0.6050505042076111, "learning_rate": 0.0001, "loss": 1.4345, "step": 9561 }, { "epoch": 1.098386077766929, "grad_norm": 0.5667203068733215, "learning_rate": 0.0001, "loss": 1.4814, "step": 9562 }, { "epoch": 1.098500947676756, "grad_norm": 0.5636523962020874, "learning_rate": 0.0001, "loss": 1.6369, "step": 9563 }, { "epoch": 1.0986158175865832, "grad_norm": 0.6931208372116089, "learning_rate": 0.0001, "loss": 1.5261, "step": 9564 }, { "epoch": 1.0987306874964102, "grad_norm": 0.6440662145614624, "learning_rate": 0.0001, "loss": 1.7034, "step": 9565 }, { "epoch": 1.0988455574062375, "grad_norm": 0.5347530245780945, "learning_rate": 0.0001, "loss": 1.4818, "step": 9566 }, { "epoch": 1.0989604273160645, "grad_norm": 0.504833996295929, "learning_rate": 0.0001, "loss": 1.3958, "step": 9567 }, { "epoch": 1.0990752972258917, "grad_norm": 0.5506554245948792, "learning_rate": 0.0001, "loss": 1.3691, "step": 9568 }, { "epoch": 1.0991901671357187, "grad_norm": 0.573026180267334, "learning_rate": 0.0001, "loss": 1.6405, "step": 9569 }, { "epoch": 1.099305037045546, "grad_norm": 0.5038986206054688, "learning_rate": 0.0001, "loss": 1.4027, "step": 9570 }, { "epoch": 1.099419906955373, "grad_norm": 0.5019423961639404, "learning_rate": 0.0001, "loss": 1.239, "step": 9571 }, { "epoch": 1.0995347768652002, "grad_norm": 0.5748392343521118, "learning_rate": 0.0001, "loss": 1.346, "step": 9572 }, { "epoch": 1.0996496467750272, "grad_norm": 0.5261626243591309, "learning_rate": 0.0001, "loss": 1.3476, "step": 9573 }, { "epoch": 1.0997645166848544, "grad_norm": 0.5816366672515869, "learning_rate": 0.0001, "loss": 1.6714, "step": 9574 }, { "epoch": 1.0998793865946814, "grad_norm": 0.5417282581329346, "learning_rate": 0.0001, "loss": 1.3579, "step": 9575 }, { "epoch": 1.0999942565045087, "grad_norm": 0.5732975602149963, "learning_rate": 0.0001, "loss": 1.6473, "step": 9576 }, { "epoch": 1.1001091264143357, "grad_norm": 0.532828688621521, "learning_rate": 0.0001, "loss": 1.438, "step": 9577 }, { "epoch": 1.100223996324163, "grad_norm": 0.5136977434158325, "learning_rate": 0.0001, "loss": 1.4017, "step": 9578 }, { "epoch": 1.10033886623399, "grad_norm": 0.5484635233879089, "learning_rate": 0.0001, "loss": 1.5923, "step": 9579 }, { "epoch": 1.1004537361438171, "grad_norm": 0.5523190498352051, "learning_rate": 0.0001, "loss": 1.4729, "step": 9580 }, { "epoch": 1.1005686060536441, "grad_norm": 0.5286394357681274, "learning_rate": 0.0001, "loss": 1.5283, "step": 9581 }, { "epoch": 1.1006834759634714, "grad_norm": 0.5363751649856567, "learning_rate": 0.0001, "loss": 1.3553, "step": 9582 }, { "epoch": 1.1007983458732984, "grad_norm": 0.6055129766464233, "learning_rate": 0.0001, "loss": 1.7219, "step": 9583 }, { "epoch": 1.1009132157831256, "grad_norm": 0.5629612803459167, "learning_rate": 0.0001, "loss": 1.1768, "step": 9584 }, { "epoch": 1.1010280856929526, "grad_norm": 0.5564055442810059, "learning_rate": 0.0001, "loss": 1.4302, "step": 9585 }, { "epoch": 1.1011429556027799, "grad_norm": 0.6236995458602905, "learning_rate": 0.0001, "loss": 1.6443, "step": 9586 }, { "epoch": 1.1012578255126069, "grad_norm": 0.5221498012542725, "learning_rate": 0.0001, "loss": 1.4249, "step": 9587 }, { "epoch": 1.101372695422434, "grad_norm": 0.5449710488319397, "learning_rate": 0.0001, "loss": 1.5856, "step": 9588 }, { "epoch": 1.101487565332261, "grad_norm": 0.5365622043609619, "learning_rate": 0.0001, "loss": 1.4603, "step": 9589 }, { "epoch": 1.1016024352420883, "grad_norm": 0.5306606888771057, "learning_rate": 0.0001, "loss": 1.232, "step": 9590 }, { "epoch": 1.1017173051519156, "grad_norm": 0.5195592045783997, "learning_rate": 0.0001, "loss": 1.4095, "step": 9591 }, { "epoch": 1.1018321750617426, "grad_norm": 0.5111302733421326, "learning_rate": 0.0001, "loss": 1.4183, "step": 9592 }, { "epoch": 1.1019470449715696, "grad_norm": 0.6306554079055786, "learning_rate": 0.0001, "loss": 1.5632, "step": 9593 }, { "epoch": 1.1020619148813968, "grad_norm": 0.5819112062454224, "learning_rate": 0.0001, "loss": 1.5985, "step": 9594 }, { "epoch": 1.102176784791224, "grad_norm": 0.5284945368766785, "learning_rate": 0.0001, "loss": 1.457, "step": 9595 }, { "epoch": 1.102291654701051, "grad_norm": 0.5951351523399353, "learning_rate": 0.0001, "loss": 1.6157, "step": 9596 }, { "epoch": 1.102406524610878, "grad_norm": 0.5186183452606201, "learning_rate": 0.0001, "loss": 1.452, "step": 9597 }, { "epoch": 1.1025213945207053, "grad_norm": 0.5780056118965149, "learning_rate": 0.0001, "loss": 1.4106, "step": 9598 }, { "epoch": 1.1026362644305325, "grad_norm": 0.5407369136810303, "learning_rate": 0.0001, "loss": 1.4405, "step": 9599 }, { "epoch": 1.1027511343403595, "grad_norm": 0.5503196120262146, "learning_rate": 0.0001, "loss": 1.6157, "step": 9600 }, { "epoch": 1.1028660042501865, "grad_norm": 0.5217320919036865, "learning_rate": 0.0001, "loss": 1.5486, "step": 9601 }, { "epoch": 1.1029808741600138, "grad_norm": 0.5520971417427063, "learning_rate": 0.0001, "loss": 1.5373, "step": 9602 }, { "epoch": 1.103095744069841, "grad_norm": 0.531853973865509, "learning_rate": 0.0001, "loss": 1.2797, "step": 9603 }, { "epoch": 1.103210613979668, "grad_norm": 0.5419740676879883, "learning_rate": 0.0001, "loss": 1.4649, "step": 9604 }, { "epoch": 1.103325483889495, "grad_norm": 0.5819375514984131, "learning_rate": 0.0001, "loss": 1.3623, "step": 9605 }, { "epoch": 1.1034403537993223, "grad_norm": 0.5647183656692505, "learning_rate": 0.0001, "loss": 1.6388, "step": 9606 }, { "epoch": 1.1035552237091495, "grad_norm": 0.5452742576599121, "learning_rate": 0.0001, "loss": 1.5939, "step": 9607 }, { "epoch": 1.1036700936189765, "grad_norm": 0.5725043416023254, "learning_rate": 0.0001, "loss": 1.5899, "step": 9608 }, { "epoch": 1.1037849635288037, "grad_norm": 0.5144510269165039, "learning_rate": 0.0001, "loss": 1.5325, "step": 9609 }, { "epoch": 1.1038998334386307, "grad_norm": 0.5603897571563721, "learning_rate": 0.0001, "loss": 1.4753, "step": 9610 }, { "epoch": 1.104014703348458, "grad_norm": 0.5356127023696899, "learning_rate": 0.0001, "loss": 1.2814, "step": 9611 }, { "epoch": 1.104129573258285, "grad_norm": 0.5167484879493713, "learning_rate": 0.0001, "loss": 1.4795, "step": 9612 }, { "epoch": 1.1042444431681122, "grad_norm": 0.5493837594985962, "learning_rate": 0.0001, "loss": 1.226, "step": 9613 }, { "epoch": 1.1043593130779392, "grad_norm": 0.5558966398239136, "learning_rate": 0.0001, "loss": 1.6028, "step": 9614 }, { "epoch": 1.1044741829877665, "grad_norm": 0.5911669135093689, "learning_rate": 0.0001, "loss": 1.4025, "step": 9615 }, { "epoch": 1.1045890528975935, "grad_norm": 0.5736728310585022, "learning_rate": 0.0001, "loss": 1.2921, "step": 9616 }, { "epoch": 1.1047039228074207, "grad_norm": 0.55420982837677, "learning_rate": 0.0001, "loss": 1.4201, "step": 9617 }, { "epoch": 1.1048187927172477, "grad_norm": 0.6312325596809387, "learning_rate": 0.0001, "loss": 1.5217, "step": 9618 }, { "epoch": 1.104933662627075, "grad_norm": 0.5303093194961548, "learning_rate": 0.0001, "loss": 1.3404, "step": 9619 }, { "epoch": 1.105048532536902, "grad_norm": 0.5138710737228394, "learning_rate": 0.0001, "loss": 1.4007, "step": 9620 }, { "epoch": 1.1051634024467292, "grad_norm": 0.6012336015701294, "learning_rate": 0.0001, "loss": 1.632, "step": 9621 }, { "epoch": 1.1052782723565562, "grad_norm": 0.5534988641738892, "learning_rate": 0.0001, "loss": 1.6167, "step": 9622 }, { "epoch": 1.1053931422663834, "grad_norm": 0.5863144993782043, "learning_rate": 0.0001, "loss": 1.5825, "step": 9623 }, { "epoch": 1.1055080121762104, "grad_norm": 0.5527574419975281, "learning_rate": 0.0001, "loss": 1.6297, "step": 9624 }, { "epoch": 1.1056228820860377, "grad_norm": 0.5487685203552246, "learning_rate": 0.0001, "loss": 1.3806, "step": 9625 }, { "epoch": 1.1057377519958647, "grad_norm": 0.541068971157074, "learning_rate": 0.0001, "loss": 1.5204, "step": 9626 }, { "epoch": 1.105852621905692, "grad_norm": 0.5597257018089294, "learning_rate": 0.0001, "loss": 1.6394, "step": 9627 }, { "epoch": 1.105967491815519, "grad_norm": 0.5184458494186401, "learning_rate": 0.0001, "loss": 1.5862, "step": 9628 }, { "epoch": 1.1060823617253461, "grad_norm": 0.6078442931175232, "learning_rate": 0.0001, "loss": 1.6372, "step": 9629 }, { "epoch": 1.1061972316351731, "grad_norm": 0.5217808485031128, "learning_rate": 0.0001, "loss": 1.4827, "step": 9630 }, { "epoch": 1.1063121015450004, "grad_norm": 0.5544619560241699, "learning_rate": 0.0001, "loss": 1.5177, "step": 9631 }, { "epoch": 1.1064269714548274, "grad_norm": 0.5434312224388123, "learning_rate": 0.0001, "loss": 1.497, "step": 9632 }, { "epoch": 1.1065418413646546, "grad_norm": 0.5772722959518433, "learning_rate": 0.0001, "loss": 1.2362, "step": 9633 }, { "epoch": 1.1066567112744816, "grad_norm": 0.571907103061676, "learning_rate": 0.0001, "loss": 1.4295, "step": 9634 }, { "epoch": 1.1067715811843089, "grad_norm": 0.5757927894592285, "learning_rate": 0.0001, "loss": 1.437, "step": 9635 }, { "epoch": 1.1068864510941359, "grad_norm": 0.5998075008392334, "learning_rate": 0.0001, "loss": 1.6233, "step": 9636 }, { "epoch": 1.107001321003963, "grad_norm": 0.5600723624229431, "learning_rate": 0.0001, "loss": 1.624, "step": 9637 }, { "epoch": 1.10711619091379, "grad_norm": 0.5362415909767151, "learning_rate": 0.0001, "loss": 1.4906, "step": 9638 }, { "epoch": 1.1072310608236173, "grad_norm": 0.5417195558547974, "learning_rate": 0.0001, "loss": 1.4588, "step": 9639 }, { "epoch": 1.1073459307334443, "grad_norm": 0.5452782511711121, "learning_rate": 0.0001, "loss": 1.5054, "step": 9640 }, { "epoch": 1.1074608006432716, "grad_norm": 0.565497100353241, "learning_rate": 0.0001, "loss": 1.626, "step": 9641 }, { "epoch": 1.1075756705530986, "grad_norm": 0.5098233222961426, "learning_rate": 0.0001, "loss": 1.2684, "step": 9642 }, { "epoch": 1.1076905404629258, "grad_norm": 0.5035114288330078, "learning_rate": 0.0001, "loss": 1.4774, "step": 9643 }, { "epoch": 1.1078054103727528, "grad_norm": 0.5948421359062195, "learning_rate": 0.0001, "loss": 1.198, "step": 9644 }, { "epoch": 1.10792028028258, "grad_norm": 0.5490888357162476, "learning_rate": 0.0001, "loss": 1.576, "step": 9645 }, { "epoch": 1.108035150192407, "grad_norm": 0.5749881267547607, "learning_rate": 0.0001, "loss": 1.6602, "step": 9646 }, { "epoch": 1.1081500201022343, "grad_norm": 0.5411537885665894, "learning_rate": 0.0001, "loss": 1.4558, "step": 9647 }, { "epoch": 1.1082648900120613, "grad_norm": 0.5704604387283325, "learning_rate": 0.0001, "loss": 1.5484, "step": 9648 }, { "epoch": 1.1083797599218885, "grad_norm": 0.5247997641563416, "learning_rate": 0.0001, "loss": 1.2063, "step": 9649 }, { "epoch": 1.1084946298317155, "grad_norm": 0.5762208700180054, "learning_rate": 0.0001, "loss": 1.6697, "step": 9650 }, { "epoch": 1.1086094997415428, "grad_norm": 0.5289835333824158, "learning_rate": 0.0001, "loss": 1.3045, "step": 9651 }, { "epoch": 1.1087243696513698, "grad_norm": 0.5333086848258972, "learning_rate": 0.0001, "loss": 1.5886, "step": 9652 }, { "epoch": 1.108839239561197, "grad_norm": 0.5131956338882446, "learning_rate": 0.0001, "loss": 1.3351, "step": 9653 }, { "epoch": 1.108954109471024, "grad_norm": 0.5169805884361267, "learning_rate": 0.0001, "loss": 1.5557, "step": 9654 }, { "epoch": 1.1090689793808512, "grad_norm": 0.5443220734596252, "learning_rate": 0.0001, "loss": 1.5177, "step": 9655 }, { "epoch": 1.1091838492906783, "grad_norm": 0.5020636916160583, "learning_rate": 0.0001, "loss": 1.2518, "step": 9656 }, { "epoch": 1.1092987192005055, "grad_norm": 0.49906498193740845, "learning_rate": 0.0001, "loss": 1.3971, "step": 9657 }, { "epoch": 1.1094135891103325, "grad_norm": 0.5414373874664307, "learning_rate": 0.0001, "loss": 1.6474, "step": 9658 }, { "epoch": 1.1095284590201597, "grad_norm": 0.5560583472251892, "learning_rate": 0.0001, "loss": 1.5238, "step": 9659 }, { "epoch": 1.1096433289299867, "grad_norm": 0.5157898664474487, "learning_rate": 0.0001, "loss": 1.3337, "step": 9660 }, { "epoch": 1.109758198839814, "grad_norm": 0.5900468826293945, "learning_rate": 0.0001, "loss": 1.6205, "step": 9661 }, { "epoch": 1.109873068749641, "grad_norm": 0.5296939611434937, "learning_rate": 0.0001, "loss": 1.468, "step": 9662 }, { "epoch": 1.1099879386594682, "grad_norm": 0.5018689632415771, "learning_rate": 0.0001, "loss": 1.4018, "step": 9663 }, { "epoch": 1.1101028085692952, "grad_norm": 0.4990765452384949, "learning_rate": 0.0001, "loss": 1.3802, "step": 9664 }, { "epoch": 1.1102176784791224, "grad_norm": 0.4909820556640625, "learning_rate": 0.0001, "loss": 1.3892, "step": 9665 }, { "epoch": 1.1103325483889495, "grad_norm": 0.5232742428779602, "learning_rate": 0.0001, "loss": 1.5673, "step": 9666 }, { "epoch": 1.1104474182987767, "grad_norm": 0.5467817783355713, "learning_rate": 0.0001, "loss": 1.3905, "step": 9667 }, { "epoch": 1.1105622882086037, "grad_norm": 0.49867168068885803, "learning_rate": 0.0001, "loss": 1.5118, "step": 9668 }, { "epoch": 1.110677158118431, "grad_norm": 0.5683446526527405, "learning_rate": 0.0001, "loss": 1.636, "step": 9669 }, { "epoch": 1.110792028028258, "grad_norm": 0.5260730385780334, "learning_rate": 0.0001, "loss": 1.4308, "step": 9670 }, { "epoch": 1.1109068979380852, "grad_norm": 0.5530440807342529, "learning_rate": 0.0001, "loss": 1.4861, "step": 9671 }, { "epoch": 1.1110217678479122, "grad_norm": 0.597644567489624, "learning_rate": 0.0001, "loss": 1.6696, "step": 9672 }, { "epoch": 1.1111366377577394, "grad_norm": 0.5474144220352173, "learning_rate": 0.0001, "loss": 1.4307, "step": 9673 }, { "epoch": 1.1112515076675664, "grad_norm": 0.499236136674881, "learning_rate": 0.0001, "loss": 1.4741, "step": 9674 }, { "epoch": 1.1113663775773936, "grad_norm": 0.5453770756721497, "learning_rate": 0.0001, "loss": 1.3632, "step": 9675 }, { "epoch": 1.1114812474872207, "grad_norm": 0.5170800685882568, "learning_rate": 0.0001, "loss": 1.4903, "step": 9676 }, { "epoch": 1.111596117397048, "grad_norm": 0.5183429718017578, "learning_rate": 0.0001, "loss": 1.3797, "step": 9677 }, { "epoch": 1.111710987306875, "grad_norm": 0.5546143054962158, "learning_rate": 0.0001, "loss": 1.5733, "step": 9678 }, { "epoch": 1.1118258572167021, "grad_norm": 0.5528146028518677, "learning_rate": 0.0001, "loss": 1.3667, "step": 9679 }, { "epoch": 1.1119407271265291, "grad_norm": 0.5532962679862976, "learning_rate": 0.0001, "loss": 1.5328, "step": 9680 }, { "epoch": 1.1120555970363564, "grad_norm": 0.5805869698524475, "learning_rate": 0.0001, "loss": 1.5082, "step": 9681 }, { "epoch": 1.1121704669461834, "grad_norm": 0.5238086581230164, "learning_rate": 0.0001, "loss": 1.5363, "step": 9682 }, { "epoch": 1.1122853368560106, "grad_norm": 0.5135483741760254, "learning_rate": 0.0001, "loss": 1.3031, "step": 9683 }, { "epoch": 1.1124002067658376, "grad_norm": 0.535915732383728, "learning_rate": 0.0001, "loss": 1.4422, "step": 9684 }, { "epoch": 1.1125150766756648, "grad_norm": 0.5621902346611023, "learning_rate": 0.0001, "loss": 1.5421, "step": 9685 }, { "epoch": 1.1126299465854919, "grad_norm": 0.5478909611701965, "learning_rate": 0.0001, "loss": 1.3292, "step": 9686 }, { "epoch": 1.112744816495319, "grad_norm": 0.5914627313613892, "learning_rate": 0.0001, "loss": 1.5824, "step": 9687 }, { "epoch": 1.112859686405146, "grad_norm": 0.555410623550415, "learning_rate": 0.0001, "loss": 1.4208, "step": 9688 }, { "epoch": 1.1129745563149733, "grad_norm": 0.6287572979927063, "learning_rate": 0.0001, "loss": 1.5024, "step": 9689 }, { "epoch": 1.1130894262248003, "grad_norm": 0.5535327792167664, "learning_rate": 0.0001, "loss": 1.283, "step": 9690 }, { "epoch": 1.1132042961346276, "grad_norm": 0.6567327976226807, "learning_rate": 0.0001, "loss": 1.6511, "step": 9691 }, { "epoch": 1.1133191660444546, "grad_norm": 0.5440387725830078, "learning_rate": 0.0001, "loss": 1.4729, "step": 9692 }, { "epoch": 1.1134340359542818, "grad_norm": 0.576004683971405, "learning_rate": 0.0001, "loss": 1.5678, "step": 9693 }, { "epoch": 1.1135489058641088, "grad_norm": 0.5710541605949402, "learning_rate": 0.0001, "loss": 1.5312, "step": 9694 }, { "epoch": 1.113663775773936, "grad_norm": 0.5407060384750366, "learning_rate": 0.0001, "loss": 1.4169, "step": 9695 }, { "epoch": 1.113778645683763, "grad_norm": 0.618705689907074, "learning_rate": 0.0001, "loss": 1.7292, "step": 9696 }, { "epoch": 1.1138935155935903, "grad_norm": 0.5561045408248901, "learning_rate": 0.0001, "loss": 1.4788, "step": 9697 }, { "epoch": 1.1140083855034173, "grad_norm": 0.590338945388794, "learning_rate": 0.0001, "loss": 1.4074, "step": 9698 }, { "epoch": 1.1141232554132445, "grad_norm": 0.5989879369735718, "learning_rate": 0.0001, "loss": 1.574, "step": 9699 }, { "epoch": 1.1142381253230715, "grad_norm": 0.5720906853675842, "learning_rate": 0.0001, "loss": 1.7072, "step": 9700 }, { "epoch": 1.1143529952328988, "grad_norm": 0.53765869140625, "learning_rate": 0.0001, "loss": 1.2634, "step": 9701 }, { "epoch": 1.1144678651427258, "grad_norm": 0.5853990316390991, "learning_rate": 0.0001, "loss": 1.58, "step": 9702 }, { "epoch": 1.114582735052553, "grad_norm": 0.5783793926239014, "learning_rate": 0.0001, "loss": 1.5545, "step": 9703 }, { "epoch": 1.11469760496238, "grad_norm": 0.5769542455673218, "learning_rate": 0.0001, "loss": 1.4782, "step": 9704 }, { "epoch": 1.1148124748722072, "grad_norm": 0.56031334400177, "learning_rate": 0.0001, "loss": 1.4986, "step": 9705 }, { "epoch": 1.1149273447820343, "grad_norm": 0.5601339936256409, "learning_rate": 0.0001, "loss": 1.2713, "step": 9706 }, { "epoch": 1.1150422146918615, "grad_norm": 0.552882730960846, "learning_rate": 0.0001, "loss": 1.5844, "step": 9707 }, { "epoch": 1.1151570846016885, "grad_norm": 0.513989269733429, "learning_rate": 0.0001, "loss": 1.4661, "step": 9708 }, { "epoch": 1.1152719545115157, "grad_norm": 0.5489285588264465, "learning_rate": 0.0001, "loss": 1.456, "step": 9709 }, { "epoch": 1.1153868244213427, "grad_norm": 0.5569881200790405, "learning_rate": 0.0001, "loss": 1.5918, "step": 9710 }, { "epoch": 1.11550169433117, "grad_norm": 0.5199733972549438, "learning_rate": 0.0001, "loss": 1.3066, "step": 9711 }, { "epoch": 1.115616564240997, "grad_norm": 0.5051583051681519, "learning_rate": 0.0001, "loss": 1.4748, "step": 9712 }, { "epoch": 1.1157314341508242, "grad_norm": 0.5694435238838196, "learning_rate": 0.0001, "loss": 1.5148, "step": 9713 }, { "epoch": 1.1158463040606512, "grad_norm": 0.5439704656600952, "learning_rate": 0.0001, "loss": 1.4556, "step": 9714 }, { "epoch": 1.1159611739704784, "grad_norm": 0.5158405900001526, "learning_rate": 0.0001, "loss": 1.3826, "step": 9715 }, { "epoch": 1.1160760438803055, "grad_norm": 0.5488100647926331, "learning_rate": 0.0001, "loss": 1.5873, "step": 9716 }, { "epoch": 1.1161909137901327, "grad_norm": 0.5777077078819275, "learning_rate": 0.0001, "loss": 1.3642, "step": 9717 }, { "epoch": 1.1163057836999597, "grad_norm": 0.5701835751533508, "learning_rate": 0.0001, "loss": 1.2141, "step": 9718 }, { "epoch": 1.116420653609787, "grad_norm": 0.5419429540634155, "learning_rate": 0.0001, "loss": 1.4514, "step": 9719 }, { "epoch": 1.116535523519614, "grad_norm": 0.5637021660804749, "learning_rate": 0.0001, "loss": 1.4831, "step": 9720 }, { "epoch": 1.1166503934294412, "grad_norm": 0.6277347803115845, "learning_rate": 0.0001, "loss": 1.6107, "step": 9721 }, { "epoch": 1.1167652633392682, "grad_norm": 0.6077524423599243, "learning_rate": 0.0001, "loss": 1.5655, "step": 9722 }, { "epoch": 1.1168801332490954, "grad_norm": 0.579602837562561, "learning_rate": 0.0001, "loss": 1.5203, "step": 9723 }, { "epoch": 1.1169950031589224, "grad_norm": 0.6332294344902039, "learning_rate": 0.0001, "loss": 1.5833, "step": 9724 }, { "epoch": 1.1171098730687496, "grad_norm": 0.5484510064125061, "learning_rate": 0.0001, "loss": 1.4893, "step": 9725 }, { "epoch": 1.1172247429785767, "grad_norm": 0.5461903214454651, "learning_rate": 0.0001, "loss": 1.424, "step": 9726 }, { "epoch": 1.1173396128884039, "grad_norm": 0.5597689151763916, "learning_rate": 0.0001, "loss": 1.5302, "step": 9727 }, { "epoch": 1.1174544827982311, "grad_norm": 0.5503045916557312, "learning_rate": 0.0001, "loss": 1.5391, "step": 9728 }, { "epoch": 1.1175693527080581, "grad_norm": 0.5259599685668945, "learning_rate": 0.0001, "loss": 1.4662, "step": 9729 }, { "epoch": 1.1176842226178851, "grad_norm": 0.5283674001693726, "learning_rate": 0.0001, "loss": 1.4768, "step": 9730 }, { "epoch": 1.1177990925277124, "grad_norm": 0.5296016335487366, "learning_rate": 0.0001, "loss": 1.2765, "step": 9731 }, { "epoch": 1.1179139624375396, "grad_norm": 0.528243899345398, "learning_rate": 0.0001, "loss": 1.3637, "step": 9732 }, { "epoch": 1.1180288323473666, "grad_norm": 0.5380933880805969, "learning_rate": 0.0001, "loss": 1.5179, "step": 9733 }, { "epoch": 1.1181437022571936, "grad_norm": 0.5619294047355652, "learning_rate": 0.0001, "loss": 1.3127, "step": 9734 }, { "epoch": 1.1182585721670208, "grad_norm": 0.5538292527198792, "learning_rate": 0.0001, "loss": 1.4159, "step": 9735 }, { "epoch": 1.118373442076848, "grad_norm": 0.5651335120201111, "learning_rate": 0.0001, "loss": 1.3661, "step": 9736 }, { "epoch": 1.118488311986675, "grad_norm": 0.5316612720489502, "learning_rate": 0.0001, "loss": 1.5018, "step": 9737 }, { "epoch": 1.118603181896502, "grad_norm": 0.5468516945838928, "learning_rate": 0.0001, "loss": 1.3585, "step": 9738 }, { "epoch": 1.1187180518063293, "grad_norm": 0.60384202003479, "learning_rate": 0.0001, "loss": 1.5244, "step": 9739 }, { "epoch": 1.1188329217161566, "grad_norm": 0.5294783711433411, "learning_rate": 0.0001, "loss": 1.6064, "step": 9740 }, { "epoch": 1.1189477916259836, "grad_norm": 0.5466625690460205, "learning_rate": 0.0001, "loss": 1.5823, "step": 9741 }, { "epoch": 1.1190626615358106, "grad_norm": 0.5397646427154541, "learning_rate": 0.0001, "loss": 1.5862, "step": 9742 }, { "epoch": 1.1191775314456378, "grad_norm": 0.525026798248291, "learning_rate": 0.0001, "loss": 1.4643, "step": 9743 }, { "epoch": 1.119292401355465, "grad_norm": 0.5274828672409058, "learning_rate": 0.0001, "loss": 1.2221, "step": 9744 }, { "epoch": 1.119407271265292, "grad_norm": 0.5290265679359436, "learning_rate": 0.0001, "loss": 1.5952, "step": 9745 }, { "epoch": 1.1195221411751193, "grad_norm": 0.5176373720169067, "learning_rate": 0.0001, "loss": 1.3871, "step": 9746 }, { "epoch": 1.1196370110849463, "grad_norm": 0.5549317598342896, "learning_rate": 0.0001, "loss": 1.6335, "step": 9747 }, { "epoch": 1.1197518809947735, "grad_norm": 0.5477745532989502, "learning_rate": 0.0001, "loss": 1.4009, "step": 9748 }, { "epoch": 1.1198667509046005, "grad_norm": 0.5613062381744385, "learning_rate": 0.0001, "loss": 1.3842, "step": 9749 }, { "epoch": 1.1199816208144278, "grad_norm": 0.5442327260971069, "learning_rate": 0.0001, "loss": 1.4955, "step": 9750 }, { "epoch": 1.1200964907242548, "grad_norm": 0.5651378631591797, "learning_rate": 0.0001, "loss": 1.507, "step": 9751 }, { "epoch": 1.120211360634082, "grad_norm": 0.5338380336761475, "learning_rate": 0.0001, "loss": 1.2772, "step": 9752 }, { "epoch": 1.120326230543909, "grad_norm": 0.5371922850608826, "learning_rate": 0.0001, "loss": 1.4111, "step": 9753 }, { "epoch": 1.1204411004537362, "grad_norm": 0.49218225479125977, "learning_rate": 0.0001, "loss": 1.3432, "step": 9754 }, { "epoch": 1.1205559703635632, "grad_norm": 0.5054636001586914, "learning_rate": 0.0001, "loss": 1.2946, "step": 9755 }, { "epoch": 1.1206708402733905, "grad_norm": 0.5905545949935913, "learning_rate": 0.0001, "loss": 1.5725, "step": 9756 }, { "epoch": 1.1207857101832175, "grad_norm": 0.5989205241203308, "learning_rate": 0.0001, "loss": 1.5756, "step": 9757 }, { "epoch": 1.1209005800930447, "grad_norm": 0.5933013558387756, "learning_rate": 0.0001, "loss": 1.5632, "step": 9758 }, { "epoch": 1.1210154500028717, "grad_norm": 0.5364729166030884, "learning_rate": 0.0001, "loss": 1.5455, "step": 9759 }, { "epoch": 1.121130319912699, "grad_norm": 0.638589084148407, "learning_rate": 0.0001, "loss": 1.5624, "step": 9760 }, { "epoch": 1.121245189822526, "grad_norm": 0.5210257172584534, "learning_rate": 0.0001, "loss": 1.3005, "step": 9761 }, { "epoch": 1.1213600597323532, "grad_norm": 0.581584095954895, "learning_rate": 0.0001, "loss": 1.4794, "step": 9762 }, { "epoch": 1.1214749296421802, "grad_norm": 0.5900195240974426, "learning_rate": 0.0001, "loss": 1.6183, "step": 9763 }, { "epoch": 1.1215897995520074, "grad_norm": 0.5471107959747314, "learning_rate": 0.0001, "loss": 1.457, "step": 9764 }, { "epoch": 1.1217046694618344, "grad_norm": 0.5870269536972046, "learning_rate": 0.0001, "loss": 1.3416, "step": 9765 }, { "epoch": 1.1218195393716617, "grad_norm": 0.6071326732635498, "learning_rate": 0.0001, "loss": 1.362, "step": 9766 }, { "epoch": 1.1219344092814887, "grad_norm": 0.5291370749473572, "learning_rate": 0.0001, "loss": 1.6083, "step": 9767 }, { "epoch": 1.122049279191316, "grad_norm": 0.5899519920349121, "learning_rate": 0.0001, "loss": 1.555, "step": 9768 }, { "epoch": 1.122164149101143, "grad_norm": 0.5426907539367676, "learning_rate": 0.0001, "loss": 1.3719, "step": 9769 }, { "epoch": 1.1222790190109702, "grad_norm": 0.634436309337616, "learning_rate": 0.0001, "loss": 1.6449, "step": 9770 }, { "epoch": 1.1223938889207972, "grad_norm": 0.5499168038368225, "learning_rate": 0.0001, "loss": 1.6378, "step": 9771 }, { "epoch": 1.1225087588306244, "grad_norm": 0.5340597629547119, "learning_rate": 0.0001, "loss": 1.5949, "step": 9772 }, { "epoch": 1.1226236287404514, "grad_norm": 0.5629338622093201, "learning_rate": 0.0001, "loss": 1.4928, "step": 9773 }, { "epoch": 1.1227384986502786, "grad_norm": 0.5041453242301941, "learning_rate": 0.0001, "loss": 1.2472, "step": 9774 }, { "epoch": 1.1228533685601056, "grad_norm": 0.5762504935264587, "learning_rate": 0.0001, "loss": 1.6202, "step": 9775 }, { "epoch": 1.1229682384699329, "grad_norm": 0.5604518055915833, "learning_rate": 0.0001, "loss": 1.452, "step": 9776 }, { "epoch": 1.1230831083797599, "grad_norm": 0.5645705461502075, "learning_rate": 0.0001, "loss": 1.2915, "step": 9777 }, { "epoch": 1.1231979782895871, "grad_norm": 0.47870761156082153, "learning_rate": 0.0001, "loss": 1.2856, "step": 9778 }, { "epoch": 1.1233128481994141, "grad_norm": 0.5456746220588684, "learning_rate": 0.0001, "loss": 1.5596, "step": 9779 }, { "epoch": 1.1234277181092414, "grad_norm": 0.5596235394477844, "learning_rate": 0.0001, "loss": 1.502, "step": 9780 }, { "epoch": 1.1235425880190684, "grad_norm": 0.5665818452835083, "learning_rate": 0.0001, "loss": 1.6226, "step": 9781 }, { "epoch": 1.1236574579288956, "grad_norm": 0.605762779712677, "learning_rate": 0.0001, "loss": 1.3177, "step": 9782 }, { "epoch": 1.1237723278387226, "grad_norm": 0.5420957207679749, "learning_rate": 0.0001, "loss": 1.4605, "step": 9783 }, { "epoch": 1.1238871977485498, "grad_norm": 0.6387947201728821, "learning_rate": 0.0001, "loss": 1.3037, "step": 9784 }, { "epoch": 1.1240020676583768, "grad_norm": 0.5580546259880066, "learning_rate": 0.0001, "loss": 1.4796, "step": 9785 }, { "epoch": 1.124116937568204, "grad_norm": 0.5426499843597412, "learning_rate": 0.0001, "loss": 1.5066, "step": 9786 }, { "epoch": 1.124231807478031, "grad_norm": 0.5494298934936523, "learning_rate": 0.0001, "loss": 1.565, "step": 9787 }, { "epoch": 1.1243466773878583, "grad_norm": 0.5614138245582581, "learning_rate": 0.0001, "loss": 1.4699, "step": 9788 }, { "epoch": 1.1244615472976853, "grad_norm": 0.5716959238052368, "learning_rate": 0.0001, "loss": 1.6331, "step": 9789 }, { "epoch": 1.1245764172075126, "grad_norm": 0.5259473919868469, "learning_rate": 0.0001, "loss": 1.4069, "step": 9790 }, { "epoch": 1.1246912871173396, "grad_norm": 0.5336620211601257, "learning_rate": 0.0001, "loss": 1.5229, "step": 9791 }, { "epoch": 1.1248061570271668, "grad_norm": 0.5299500226974487, "learning_rate": 0.0001, "loss": 1.5239, "step": 9792 }, { "epoch": 1.1249210269369938, "grad_norm": 0.5537680387496948, "learning_rate": 0.0001, "loss": 1.3462, "step": 9793 }, { "epoch": 1.125035896846821, "grad_norm": 0.5444726943969727, "learning_rate": 0.0001, "loss": 1.5284, "step": 9794 }, { "epoch": 1.125150766756648, "grad_norm": 0.5653538107872009, "learning_rate": 0.0001, "loss": 1.7283, "step": 9795 }, { "epoch": 1.1252656366664753, "grad_norm": 0.5434619784355164, "learning_rate": 0.0001, "loss": 1.2987, "step": 9796 }, { "epoch": 1.1253805065763023, "grad_norm": 0.5725109577178955, "learning_rate": 0.0001, "loss": 1.6485, "step": 9797 }, { "epoch": 1.1254953764861295, "grad_norm": 0.6203802227973938, "learning_rate": 0.0001, "loss": 1.5608, "step": 9798 }, { "epoch": 1.1256102463959565, "grad_norm": 0.5914912223815918, "learning_rate": 0.0001, "loss": 1.6739, "step": 9799 }, { "epoch": 1.1257251163057838, "grad_norm": 0.7401518225669861, "learning_rate": 0.0001, "loss": 1.5393, "step": 9800 }, { "epoch": 1.1258399862156108, "grad_norm": 0.54278165102005, "learning_rate": 0.0001, "loss": 1.3736, "step": 9801 }, { "epoch": 1.125954856125438, "grad_norm": 0.5903399586677551, "learning_rate": 0.0001, "loss": 1.6015, "step": 9802 }, { "epoch": 1.126069726035265, "grad_norm": 0.5835331678390503, "learning_rate": 0.0001, "loss": 1.5263, "step": 9803 }, { "epoch": 1.1261845959450922, "grad_norm": 0.5370138883590698, "learning_rate": 0.0001, "loss": 1.541, "step": 9804 }, { "epoch": 1.1262994658549192, "grad_norm": 0.6110140681266785, "learning_rate": 0.0001, "loss": 1.7323, "step": 9805 }, { "epoch": 1.1264143357647465, "grad_norm": 0.5909807085990906, "learning_rate": 0.0001, "loss": 1.493, "step": 9806 }, { "epoch": 1.1265292056745735, "grad_norm": 0.5648689270019531, "learning_rate": 0.0001, "loss": 1.4891, "step": 9807 }, { "epoch": 1.1266440755844007, "grad_norm": 0.6406177282333374, "learning_rate": 0.0001, "loss": 1.6556, "step": 9808 }, { "epoch": 1.1267589454942277, "grad_norm": 0.5629665851593018, "learning_rate": 0.0001, "loss": 1.4789, "step": 9809 }, { "epoch": 1.126873815404055, "grad_norm": 0.5698572993278503, "learning_rate": 0.0001, "loss": 1.456, "step": 9810 }, { "epoch": 1.126988685313882, "grad_norm": 0.5605424046516418, "learning_rate": 0.0001, "loss": 1.3712, "step": 9811 }, { "epoch": 1.1271035552237092, "grad_norm": 0.5104329586029053, "learning_rate": 0.0001, "loss": 1.372, "step": 9812 }, { "epoch": 1.1272184251335362, "grad_norm": 0.5657603144645691, "learning_rate": 0.0001, "loss": 1.3791, "step": 9813 }, { "epoch": 1.1273332950433634, "grad_norm": 0.5272459983825684, "learning_rate": 0.0001, "loss": 1.3004, "step": 9814 }, { "epoch": 1.1274481649531904, "grad_norm": 0.5541790127754211, "learning_rate": 0.0001, "loss": 1.4475, "step": 9815 }, { "epoch": 1.1275630348630177, "grad_norm": 0.5871829986572266, "learning_rate": 0.0001, "loss": 1.5131, "step": 9816 }, { "epoch": 1.1276779047728447, "grad_norm": 0.5780866742134094, "learning_rate": 0.0001, "loss": 1.561, "step": 9817 }, { "epoch": 1.127792774682672, "grad_norm": 0.606268048286438, "learning_rate": 0.0001, "loss": 1.733, "step": 9818 }, { "epoch": 1.127907644592499, "grad_norm": 0.559883713722229, "learning_rate": 0.0001, "loss": 1.4383, "step": 9819 }, { "epoch": 1.1280225145023262, "grad_norm": 0.5222516655921936, "learning_rate": 0.0001, "loss": 1.3852, "step": 9820 }, { "epoch": 1.1281373844121532, "grad_norm": 0.5751352310180664, "learning_rate": 0.0001, "loss": 1.5563, "step": 9821 }, { "epoch": 1.1282522543219804, "grad_norm": 0.5874543786048889, "learning_rate": 0.0001, "loss": 1.6203, "step": 9822 }, { "epoch": 1.1283671242318074, "grad_norm": 0.5984853506088257, "learning_rate": 0.0001, "loss": 1.5788, "step": 9823 }, { "epoch": 1.1284819941416346, "grad_norm": 0.5243591070175171, "learning_rate": 0.0001, "loss": 1.2917, "step": 9824 }, { "epoch": 1.1285968640514616, "grad_norm": 0.5501663088798523, "learning_rate": 0.0001, "loss": 1.5474, "step": 9825 }, { "epoch": 1.1287117339612889, "grad_norm": 0.5242180824279785, "learning_rate": 0.0001, "loss": 1.3522, "step": 9826 }, { "epoch": 1.1288266038711159, "grad_norm": 0.5722183585166931, "learning_rate": 0.0001, "loss": 1.4354, "step": 9827 }, { "epoch": 1.1289414737809431, "grad_norm": 0.4917822778224945, "learning_rate": 0.0001, "loss": 1.3709, "step": 9828 }, { "epoch": 1.1290563436907701, "grad_norm": 0.5814813375473022, "learning_rate": 0.0001, "loss": 1.7127, "step": 9829 }, { "epoch": 1.1291712136005974, "grad_norm": 0.5022294521331787, "learning_rate": 0.0001, "loss": 1.4454, "step": 9830 }, { "epoch": 1.1292860835104244, "grad_norm": 0.5753988027572632, "learning_rate": 0.0001, "loss": 1.5266, "step": 9831 }, { "epoch": 1.1294009534202516, "grad_norm": 0.5708458423614502, "learning_rate": 0.0001, "loss": 1.6136, "step": 9832 }, { "epoch": 1.1295158233300786, "grad_norm": 0.5471024513244629, "learning_rate": 0.0001, "loss": 1.2932, "step": 9833 }, { "epoch": 1.1296306932399058, "grad_norm": 0.5528780817985535, "learning_rate": 0.0001, "loss": 1.3153, "step": 9834 }, { "epoch": 1.1297455631497328, "grad_norm": 0.5226410627365112, "learning_rate": 0.0001, "loss": 1.4946, "step": 9835 }, { "epoch": 1.12986043305956, "grad_norm": 0.5486363768577576, "learning_rate": 0.0001, "loss": 1.4376, "step": 9836 }, { "epoch": 1.129975302969387, "grad_norm": 0.526906430721283, "learning_rate": 0.0001, "loss": 1.4453, "step": 9837 }, { "epoch": 1.1300901728792143, "grad_norm": 0.5345343351364136, "learning_rate": 0.0001, "loss": 1.4311, "step": 9838 }, { "epoch": 1.1302050427890413, "grad_norm": 0.6694420576095581, "learning_rate": 0.0001, "loss": 1.6289, "step": 9839 }, { "epoch": 1.1303199126988686, "grad_norm": 0.5467296242713928, "learning_rate": 0.0001, "loss": 1.4171, "step": 9840 }, { "epoch": 1.1304347826086956, "grad_norm": 0.6066096425056458, "learning_rate": 0.0001, "loss": 1.5134, "step": 9841 }, { "epoch": 1.1305496525185228, "grad_norm": 0.5474829077720642, "learning_rate": 0.0001, "loss": 1.4532, "step": 9842 }, { "epoch": 1.1306645224283498, "grad_norm": 0.5534715056419373, "learning_rate": 0.0001, "loss": 1.5475, "step": 9843 }, { "epoch": 1.130779392338177, "grad_norm": 0.5239380598068237, "learning_rate": 0.0001, "loss": 1.4398, "step": 9844 }, { "epoch": 1.130894262248004, "grad_norm": 0.6033841371536255, "learning_rate": 0.0001, "loss": 1.7539, "step": 9845 }, { "epoch": 1.1310091321578313, "grad_norm": 0.5651752352714539, "learning_rate": 0.0001, "loss": 1.3981, "step": 9846 }, { "epoch": 1.1311240020676583, "grad_norm": 0.5665850043296814, "learning_rate": 0.0001, "loss": 1.3451, "step": 9847 }, { "epoch": 1.1312388719774855, "grad_norm": 0.5850709676742554, "learning_rate": 0.0001, "loss": 1.4922, "step": 9848 }, { "epoch": 1.1313537418873125, "grad_norm": 0.5818759799003601, "learning_rate": 0.0001, "loss": 1.4969, "step": 9849 }, { "epoch": 1.1314686117971398, "grad_norm": 0.59281986951828, "learning_rate": 0.0001, "loss": 1.4706, "step": 9850 }, { "epoch": 1.1315834817069668, "grad_norm": 0.5399245023727417, "learning_rate": 0.0001, "loss": 1.3788, "step": 9851 }, { "epoch": 1.131698351616794, "grad_norm": 0.5174872875213623, "learning_rate": 0.0001, "loss": 1.3944, "step": 9852 }, { "epoch": 1.131813221526621, "grad_norm": 0.5072240829467773, "learning_rate": 0.0001, "loss": 1.3622, "step": 9853 }, { "epoch": 1.1319280914364482, "grad_norm": 0.5090083479881287, "learning_rate": 0.0001, "loss": 1.4824, "step": 9854 }, { "epoch": 1.1320429613462752, "grad_norm": 0.5711216926574707, "learning_rate": 0.0001, "loss": 1.4839, "step": 9855 }, { "epoch": 1.1321578312561025, "grad_norm": 0.5663666129112244, "learning_rate": 0.0001, "loss": 1.4702, "step": 9856 }, { "epoch": 1.1322727011659297, "grad_norm": 0.5527740120887756, "learning_rate": 0.0001, "loss": 1.5937, "step": 9857 }, { "epoch": 1.1323875710757567, "grad_norm": 0.5693678855895996, "learning_rate": 0.0001, "loss": 1.534, "step": 9858 }, { "epoch": 1.1325024409855837, "grad_norm": 0.5752173066139221, "learning_rate": 0.0001, "loss": 1.4533, "step": 9859 }, { "epoch": 1.132617310895411, "grad_norm": 0.5374181866645813, "learning_rate": 0.0001, "loss": 1.2613, "step": 9860 }, { "epoch": 1.1327321808052382, "grad_norm": 0.5573873519897461, "learning_rate": 0.0001, "loss": 1.4246, "step": 9861 }, { "epoch": 1.1328470507150652, "grad_norm": 0.5458858609199524, "learning_rate": 0.0001, "loss": 1.355, "step": 9862 }, { "epoch": 1.1329619206248922, "grad_norm": 0.5189163684844971, "learning_rate": 0.0001, "loss": 1.4186, "step": 9863 }, { "epoch": 1.1330767905347194, "grad_norm": 0.5939661860466003, "learning_rate": 0.0001, "loss": 1.6622, "step": 9864 }, { "epoch": 1.1331916604445467, "grad_norm": 0.5526455044746399, "learning_rate": 0.0001, "loss": 1.6, "step": 9865 }, { "epoch": 1.1333065303543737, "grad_norm": 0.5535423755645752, "learning_rate": 0.0001, "loss": 1.5184, "step": 9866 }, { "epoch": 1.1334214002642007, "grad_norm": 0.5559777617454529, "learning_rate": 0.0001, "loss": 1.5139, "step": 9867 }, { "epoch": 1.133536270174028, "grad_norm": 0.5639746785163879, "learning_rate": 0.0001, "loss": 1.3472, "step": 9868 }, { "epoch": 1.1336511400838551, "grad_norm": 0.5288705229759216, "learning_rate": 0.0001, "loss": 1.3665, "step": 9869 }, { "epoch": 1.1337660099936822, "grad_norm": 0.6040565371513367, "learning_rate": 0.0001, "loss": 1.7257, "step": 9870 }, { "epoch": 1.1338808799035092, "grad_norm": 0.5919288992881775, "learning_rate": 0.0001, "loss": 1.6479, "step": 9871 }, { "epoch": 1.1339957498133364, "grad_norm": 0.5653237104415894, "learning_rate": 0.0001, "loss": 1.4633, "step": 9872 }, { "epoch": 1.1341106197231636, "grad_norm": 0.5587108731269836, "learning_rate": 0.0001, "loss": 1.5307, "step": 9873 }, { "epoch": 1.1342254896329906, "grad_norm": 0.5890237092971802, "learning_rate": 0.0001, "loss": 1.5432, "step": 9874 }, { "epoch": 1.1343403595428176, "grad_norm": 0.5263911485671997, "learning_rate": 0.0001, "loss": 1.4927, "step": 9875 }, { "epoch": 1.1344552294526449, "grad_norm": 0.494157075881958, "learning_rate": 0.0001, "loss": 1.3288, "step": 9876 }, { "epoch": 1.134570099362472, "grad_norm": 0.5565462112426758, "learning_rate": 0.0001, "loss": 1.1901, "step": 9877 }, { "epoch": 1.1346849692722991, "grad_norm": 0.5866313576698303, "learning_rate": 0.0001, "loss": 1.3815, "step": 9878 }, { "epoch": 1.1347998391821261, "grad_norm": 0.5734443068504333, "learning_rate": 0.0001, "loss": 1.5916, "step": 9879 }, { "epoch": 1.1349147090919534, "grad_norm": 0.533345103263855, "learning_rate": 0.0001, "loss": 1.4551, "step": 9880 }, { "epoch": 1.1350295790017806, "grad_norm": 0.5391797423362732, "learning_rate": 0.0001, "loss": 1.4961, "step": 9881 }, { "epoch": 1.1351444489116076, "grad_norm": 0.5512882471084595, "learning_rate": 0.0001, "loss": 1.5141, "step": 9882 }, { "epoch": 1.1352593188214346, "grad_norm": 0.565726637840271, "learning_rate": 0.0001, "loss": 1.5068, "step": 9883 }, { "epoch": 1.1353741887312618, "grad_norm": 0.5509957671165466, "learning_rate": 0.0001, "loss": 1.4008, "step": 9884 }, { "epoch": 1.135489058641089, "grad_norm": 0.6037254333496094, "learning_rate": 0.0001, "loss": 1.5944, "step": 9885 }, { "epoch": 1.135603928550916, "grad_norm": 0.5137581825256348, "learning_rate": 0.0001, "loss": 1.431, "step": 9886 }, { "epoch": 1.135718798460743, "grad_norm": 0.5548390746116638, "learning_rate": 0.0001, "loss": 1.5412, "step": 9887 }, { "epoch": 1.1358336683705703, "grad_norm": 0.538205087184906, "learning_rate": 0.0001, "loss": 1.5042, "step": 9888 }, { "epoch": 1.1359485382803975, "grad_norm": 0.5580369830131531, "learning_rate": 0.0001, "loss": 1.4531, "step": 9889 }, { "epoch": 1.1360634081902246, "grad_norm": 0.5717020034790039, "learning_rate": 0.0001, "loss": 1.3808, "step": 9890 }, { "epoch": 1.1361782781000518, "grad_norm": 0.5775260329246521, "learning_rate": 0.0001, "loss": 1.5856, "step": 9891 }, { "epoch": 1.1362931480098788, "grad_norm": 0.6119628548622131, "learning_rate": 0.0001, "loss": 1.5353, "step": 9892 }, { "epoch": 1.136408017919706, "grad_norm": 0.533612072467804, "learning_rate": 0.0001, "loss": 1.2585, "step": 9893 }, { "epoch": 1.136522887829533, "grad_norm": 0.5262476205825806, "learning_rate": 0.0001, "loss": 1.3074, "step": 9894 }, { "epoch": 1.1366377577393603, "grad_norm": 0.5193493366241455, "learning_rate": 0.0001, "loss": 1.43, "step": 9895 }, { "epoch": 1.1367526276491873, "grad_norm": 0.5410826206207275, "learning_rate": 0.0001, "loss": 1.5718, "step": 9896 }, { "epoch": 1.1368674975590145, "grad_norm": 0.5434339642524719, "learning_rate": 0.0001, "loss": 1.4059, "step": 9897 }, { "epoch": 1.1369823674688415, "grad_norm": 0.6266562342643738, "learning_rate": 0.0001, "loss": 1.6898, "step": 9898 }, { "epoch": 1.1370972373786687, "grad_norm": 0.5530896782875061, "learning_rate": 0.0001, "loss": 1.4496, "step": 9899 }, { "epoch": 1.1372121072884958, "grad_norm": 0.5319846868515015, "learning_rate": 0.0001, "loss": 1.4595, "step": 9900 }, { "epoch": 1.137326977198323, "grad_norm": 0.5379259586334229, "learning_rate": 0.0001, "loss": 1.5903, "step": 9901 }, { "epoch": 1.13744184710815, "grad_norm": 0.5139333009719849, "learning_rate": 0.0001, "loss": 1.3436, "step": 9902 }, { "epoch": 1.1375567170179772, "grad_norm": 0.5401899218559265, "learning_rate": 0.0001, "loss": 1.7349, "step": 9903 }, { "epoch": 1.1376715869278042, "grad_norm": 0.551706075668335, "learning_rate": 0.0001, "loss": 1.4543, "step": 9904 }, { "epoch": 1.1377864568376315, "grad_norm": 0.5094159245491028, "learning_rate": 0.0001, "loss": 1.4717, "step": 9905 }, { "epoch": 1.1379013267474585, "grad_norm": 0.514320433139801, "learning_rate": 0.0001, "loss": 1.275, "step": 9906 }, { "epoch": 1.1380161966572857, "grad_norm": 0.5413798689842224, "learning_rate": 0.0001, "loss": 1.4261, "step": 9907 }, { "epoch": 1.1381310665671127, "grad_norm": 0.6346654295921326, "learning_rate": 0.0001, "loss": 1.4869, "step": 9908 }, { "epoch": 1.13824593647694, "grad_norm": 0.5805730223655701, "learning_rate": 0.0001, "loss": 1.4811, "step": 9909 }, { "epoch": 1.138360806386767, "grad_norm": 0.5956717133522034, "learning_rate": 0.0001, "loss": 1.6149, "step": 9910 }, { "epoch": 1.1384756762965942, "grad_norm": 0.5871737599372864, "learning_rate": 0.0001, "loss": 1.5696, "step": 9911 }, { "epoch": 1.1385905462064212, "grad_norm": 0.6298748254776001, "learning_rate": 0.0001, "loss": 1.5124, "step": 9912 }, { "epoch": 1.1387054161162484, "grad_norm": 0.5252795219421387, "learning_rate": 0.0001, "loss": 1.4858, "step": 9913 }, { "epoch": 1.1388202860260754, "grad_norm": 0.5625512003898621, "learning_rate": 0.0001, "loss": 1.5734, "step": 9914 }, { "epoch": 1.1389351559359027, "grad_norm": 0.47873494029045105, "learning_rate": 0.0001, "loss": 1.2768, "step": 9915 }, { "epoch": 1.1390500258457297, "grad_norm": 0.5676445960998535, "learning_rate": 0.0001, "loss": 1.6487, "step": 9916 }, { "epoch": 1.139164895755557, "grad_norm": 0.5738489031791687, "learning_rate": 0.0001, "loss": 1.5787, "step": 9917 }, { "epoch": 1.139279765665384, "grad_norm": 0.5460545420646667, "learning_rate": 0.0001, "loss": 1.375, "step": 9918 }, { "epoch": 1.1393946355752111, "grad_norm": 0.5179741382598877, "learning_rate": 0.0001, "loss": 1.3943, "step": 9919 }, { "epoch": 1.1395095054850382, "grad_norm": 0.6625398993492126, "learning_rate": 0.0001, "loss": 1.7408, "step": 9920 }, { "epoch": 1.1396243753948654, "grad_norm": 0.55277019739151, "learning_rate": 0.0001, "loss": 1.5409, "step": 9921 }, { "epoch": 1.1397392453046924, "grad_norm": 0.5727016925811768, "learning_rate": 0.0001, "loss": 1.4051, "step": 9922 }, { "epoch": 1.1398541152145196, "grad_norm": 0.5403141975402832, "learning_rate": 0.0001, "loss": 1.5129, "step": 9923 }, { "epoch": 1.1399689851243466, "grad_norm": 0.5404056906700134, "learning_rate": 0.0001, "loss": 1.4899, "step": 9924 }, { "epoch": 1.1400838550341739, "grad_norm": 0.5808719992637634, "learning_rate": 0.0001, "loss": 1.4641, "step": 9925 }, { "epoch": 1.1401987249440009, "grad_norm": 0.5395628809928894, "learning_rate": 0.0001, "loss": 1.4619, "step": 9926 }, { "epoch": 1.140313594853828, "grad_norm": 0.5930881500244141, "learning_rate": 0.0001, "loss": 1.52, "step": 9927 }, { "epoch": 1.1404284647636551, "grad_norm": 0.5815712809562683, "learning_rate": 0.0001, "loss": 1.2868, "step": 9928 }, { "epoch": 1.1405433346734823, "grad_norm": 0.5318073630332947, "learning_rate": 0.0001, "loss": 1.4842, "step": 9929 }, { "epoch": 1.1406582045833094, "grad_norm": 0.5083526968955994, "learning_rate": 0.0001, "loss": 1.411, "step": 9930 }, { "epoch": 1.1407730744931366, "grad_norm": 0.6382246017456055, "learning_rate": 0.0001, "loss": 1.6084, "step": 9931 }, { "epoch": 1.1408879444029636, "grad_norm": 0.5410823225975037, "learning_rate": 0.0001, "loss": 1.3782, "step": 9932 }, { "epoch": 1.1410028143127908, "grad_norm": 0.5354570746421814, "learning_rate": 0.0001, "loss": 1.5489, "step": 9933 }, { "epoch": 1.1411176842226178, "grad_norm": 0.5121961832046509, "learning_rate": 0.0001, "loss": 1.4649, "step": 9934 }, { "epoch": 1.141232554132445, "grad_norm": 0.5164211392402649, "learning_rate": 0.0001, "loss": 1.3373, "step": 9935 }, { "epoch": 1.141347424042272, "grad_norm": 0.5433701276779175, "learning_rate": 0.0001, "loss": 1.4922, "step": 9936 }, { "epoch": 1.1414622939520993, "grad_norm": 0.5362388491630554, "learning_rate": 0.0001, "loss": 1.2922, "step": 9937 }, { "epoch": 1.1415771638619263, "grad_norm": 0.5343228578567505, "learning_rate": 0.0001, "loss": 1.3815, "step": 9938 }, { "epoch": 1.1416920337717535, "grad_norm": 0.5795764327049255, "learning_rate": 0.0001, "loss": 1.5437, "step": 9939 }, { "epoch": 1.1418069036815806, "grad_norm": 0.5852019190788269, "learning_rate": 0.0001, "loss": 1.463, "step": 9940 }, { "epoch": 1.1419217735914078, "grad_norm": 0.5105196237564087, "learning_rate": 0.0001, "loss": 1.3497, "step": 9941 }, { "epoch": 1.1420366435012348, "grad_norm": 0.5408787131309509, "learning_rate": 0.0001, "loss": 1.3446, "step": 9942 }, { "epoch": 1.142151513411062, "grad_norm": 0.552517294883728, "learning_rate": 0.0001, "loss": 1.431, "step": 9943 }, { "epoch": 1.142266383320889, "grad_norm": 0.5696591138839722, "learning_rate": 0.0001, "loss": 1.5123, "step": 9944 }, { "epoch": 1.1423812532307163, "grad_norm": 0.560500979423523, "learning_rate": 0.0001, "loss": 1.5635, "step": 9945 }, { "epoch": 1.1424961231405433, "grad_norm": 0.6407963037490845, "learning_rate": 0.0001, "loss": 1.5365, "step": 9946 }, { "epoch": 1.1426109930503705, "grad_norm": 0.5425936579704285, "learning_rate": 0.0001, "loss": 1.3761, "step": 9947 }, { "epoch": 1.1427258629601975, "grad_norm": 0.5866653323173523, "learning_rate": 0.0001, "loss": 1.6785, "step": 9948 }, { "epoch": 1.1428407328700247, "grad_norm": 0.5960055589675903, "learning_rate": 0.0001, "loss": 1.3274, "step": 9949 }, { "epoch": 1.1429556027798518, "grad_norm": 0.5556841492652893, "learning_rate": 0.0001, "loss": 1.5652, "step": 9950 }, { "epoch": 1.143070472689679, "grad_norm": 0.5279529094696045, "learning_rate": 0.0001, "loss": 1.2829, "step": 9951 }, { "epoch": 1.143185342599506, "grad_norm": 0.5272794961929321, "learning_rate": 0.0001, "loss": 1.5075, "step": 9952 }, { "epoch": 1.1433002125093332, "grad_norm": 0.5824765563011169, "learning_rate": 0.0001, "loss": 1.5392, "step": 9953 }, { "epoch": 1.1434150824191602, "grad_norm": 0.5903116464614868, "learning_rate": 0.0001, "loss": 1.6437, "step": 9954 }, { "epoch": 1.1435299523289875, "grad_norm": 0.525478184223175, "learning_rate": 0.0001, "loss": 1.4775, "step": 9955 }, { "epoch": 1.1436448222388145, "grad_norm": 0.4939769208431244, "learning_rate": 0.0001, "loss": 1.2772, "step": 9956 }, { "epoch": 1.1437596921486417, "grad_norm": 0.5516688823699951, "learning_rate": 0.0001, "loss": 1.3937, "step": 9957 }, { "epoch": 1.1438745620584687, "grad_norm": 0.5349141955375671, "learning_rate": 0.0001, "loss": 1.4102, "step": 9958 }, { "epoch": 1.143989431968296, "grad_norm": 0.5700792670249939, "learning_rate": 0.0001, "loss": 1.5783, "step": 9959 }, { "epoch": 1.144104301878123, "grad_norm": 0.5753479599952698, "learning_rate": 0.0001, "loss": 1.5559, "step": 9960 }, { "epoch": 1.1442191717879502, "grad_norm": 0.49369415640830994, "learning_rate": 0.0001, "loss": 1.3815, "step": 9961 }, { "epoch": 1.1443340416977772, "grad_norm": 0.5094558000564575, "learning_rate": 0.0001, "loss": 1.3144, "step": 9962 }, { "epoch": 1.1444489116076044, "grad_norm": 0.5119041800498962, "learning_rate": 0.0001, "loss": 1.3715, "step": 9963 }, { "epoch": 1.1445637815174314, "grad_norm": 0.7021076083183289, "learning_rate": 0.0001, "loss": 1.818, "step": 9964 }, { "epoch": 1.1446786514272587, "grad_norm": 0.5357758402824402, "learning_rate": 0.0001, "loss": 1.5008, "step": 9965 }, { "epoch": 1.1447935213370857, "grad_norm": 0.5504557490348816, "learning_rate": 0.0001, "loss": 1.5859, "step": 9966 }, { "epoch": 1.144908391246913, "grad_norm": 0.5839464068412781, "learning_rate": 0.0001, "loss": 1.5019, "step": 9967 }, { "epoch": 1.14502326115674, "grad_norm": 0.5492793917655945, "learning_rate": 0.0001, "loss": 1.6016, "step": 9968 }, { "epoch": 1.1451381310665671, "grad_norm": 0.5798627138137817, "learning_rate": 0.0001, "loss": 1.5336, "step": 9969 }, { "epoch": 1.1452530009763942, "grad_norm": 0.5143793225288391, "learning_rate": 0.0001, "loss": 1.429, "step": 9970 }, { "epoch": 1.1453678708862214, "grad_norm": 0.5312758684158325, "learning_rate": 0.0001, "loss": 1.4962, "step": 9971 }, { "epoch": 1.1454827407960484, "grad_norm": 0.5357524156570435, "learning_rate": 0.0001, "loss": 1.4713, "step": 9972 }, { "epoch": 1.1455976107058756, "grad_norm": 0.5727779269218445, "learning_rate": 0.0001, "loss": 1.3171, "step": 9973 }, { "epoch": 1.1457124806157026, "grad_norm": 0.5089569091796875, "learning_rate": 0.0001, "loss": 1.5816, "step": 9974 }, { "epoch": 1.1458273505255299, "grad_norm": 0.587568998336792, "learning_rate": 0.0001, "loss": 1.4017, "step": 9975 }, { "epoch": 1.1459422204353569, "grad_norm": 0.5800287127494812, "learning_rate": 0.0001, "loss": 1.5692, "step": 9976 }, { "epoch": 1.146057090345184, "grad_norm": 0.5658348798751831, "learning_rate": 0.0001, "loss": 1.5379, "step": 9977 }, { "epoch": 1.1461719602550111, "grad_norm": 0.5689564347267151, "learning_rate": 0.0001, "loss": 1.4476, "step": 9978 }, { "epoch": 1.1462868301648383, "grad_norm": 0.5329977869987488, "learning_rate": 0.0001, "loss": 1.5136, "step": 9979 }, { "epoch": 1.1464017000746654, "grad_norm": 0.5485842227935791, "learning_rate": 0.0001, "loss": 1.494, "step": 9980 }, { "epoch": 1.1465165699844926, "grad_norm": 0.5831310153007507, "learning_rate": 0.0001, "loss": 1.4663, "step": 9981 }, { "epoch": 1.1466314398943196, "grad_norm": 0.5669857263565063, "learning_rate": 0.0001, "loss": 1.436, "step": 9982 }, { "epoch": 1.1467463098041468, "grad_norm": 0.569514274597168, "learning_rate": 0.0001, "loss": 1.6238, "step": 9983 }, { "epoch": 1.1468611797139738, "grad_norm": 0.5632662773132324, "learning_rate": 0.0001, "loss": 1.345, "step": 9984 }, { "epoch": 1.146976049623801, "grad_norm": 0.5908129215240479, "learning_rate": 0.0001, "loss": 1.4914, "step": 9985 }, { "epoch": 1.147090919533628, "grad_norm": 0.6112525463104248, "learning_rate": 0.0001, "loss": 1.5285, "step": 9986 }, { "epoch": 1.1472057894434553, "grad_norm": 0.5455618500709534, "learning_rate": 0.0001, "loss": 1.4891, "step": 9987 }, { "epoch": 1.1473206593532823, "grad_norm": 0.5896759033203125, "learning_rate": 0.0001, "loss": 1.4798, "step": 9988 }, { "epoch": 1.1474355292631095, "grad_norm": 0.571235179901123, "learning_rate": 0.0001, "loss": 1.5773, "step": 9989 }, { "epoch": 1.1475503991729366, "grad_norm": 0.554031252861023, "learning_rate": 0.0001, "loss": 1.4231, "step": 9990 }, { "epoch": 1.1476652690827638, "grad_norm": 0.5455310344696045, "learning_rate": 0.0001, "loss": 1.4302, "step": 9991 }, { "epoch": 1.1477801389925908, "grad_norm": 0.5141569972038269, "learning_rate": 0.0001, "loss": 1.377, "step": 9992 }, { "epoch": 1.147895008902418, "grad_norm": 0.5764080286026001, "learning_rate": 0.0001, "loss": 1.4583, "step": 9993 }, { "epoch": 1.1480098788122453, "grad_norm": 0.5481144189834595, "learning_rate": 0.0001, "loss": 1.3084, "step": 9994 }, { "epoch": 1.1481247487220723, "grad_norm": 0.5942389965057373, "learning_rate": 0.0001, "loss": 1.3149, "step": 9995 }, { "epoch": 1.1482396186318993, "grad_norm": 0.5685340166091919, "learning_rate": 0.0001, "loss": 1.6043, "step": 9996 }, { "epoch": 1.1483544885417265, "grad_norm": 0.5472145080566406, "learning_rate": 0.0001, "loss": 1.3976, "step": 9997 }, { "epoch": 1.1484693584515537, "grad_norm": 0.6134904623031616, "learning_rate": 0.0001, "loss": 1.3928, "step": 9998 }, { "epoch": 1.1485842283613807, "grad_norm": 0.5569812655448914, "learning_rate": 0.0001, "loss": 1.3422, "step": 9999 }, { "epoch": 1.1486990982712078, "grad_norm": 0.5773634314537048, "learning_rate": 0.0001, "loss": 1.2924, "step": 10000 }, { "epoch": 1.148813968181035, "grad_norm": 0.5295365452766418, "learning_rate": 0.0001, "loss": 1.4651, "step": 10001 }, { "epoch": 1.1489288380908622, "grad_norm": 0.5416709780693054, "learning_rate": 0.0001, "loss": 1.3973, "step": 10002 }, { "epoch": 1.1490437080006892, "grad_norm": 0.5793564319610596, "learning_rate": 0.0001, "loss": 1.4149, "step": 10003 }, { "epoch": 1.1491585779105162, "grad_norm": 0.6285029649734497, "learning_rate": 0.0001, "loss": 1.4183, "step": 10004 }, { "epoch": 1.1492734478203435, "grad_norm": 0.5785372853279114, "learning_rate": 0.0001, "loss": 1.2509, "step": 10005 }, { "epoch": 1.1493883177301707, "grad_norm": 0.6486859321594238, "learning_rate": 0.0001, "loss": 1.6246, "step": 10006 }, { "epoch": 1.1495031876399977, "grad_norm": 0.5618028044700623, "learning_rate": 0.0001, "loss": 1.4458, "step": 10007 }, { "epoch": 1.1496180575498247, "grad_norm": 0.5516600012779236, "learning_rate": 0.0001, "loss": 1.3189, "step": 10008 }, { "epoch": 1.149732927459652, "grad_norm": 0.5638642907142639, "learning_rate": 0.0001, "loss": 1.4698, "step": 10009 }, { "epoch": 1.1498477973694792, "grad_norm": 0.5339157581329346, "learning_rate": 0.0001, "loss": 1.4751, "step": 10010 }, { "epoch": 1.1499626672793062, "grad_norm": 0.5690274834632874, "learning_rate": 0.0001, "loss": 1.5865, "step": 10011 }, { "epoch": 1.1500775371891332, "grad_norm": 0.5620351433753967, "learning_rate": 0.0001, "loss": 1.5383, "step": 10012 }, { "epoch": 1.1501924070989604, "grad_norm": 0.5315772294998169, "learning_rate": 0.0001, "loss": 1.5289, "step": 10013 }, { "epoch": 1.1503072770087877, "grad_norm": 0.5182132124900818, "learning_rate": 0.0001, "loss": 1.3737, "step": 10014 }, { "epoch": 1.1504221469186147, "grad_norm": 0.5324050188064575, "learning_rate": 0.0001, "loss": 1.3595, "step": 10015 }, { "epoch": 1.1505370168284417, "grad_norm": 0.5690122842788696, "learning_rate": 0.0001, "loss": 1.6227, "step": 10016 }, { "epoch": 1.150651886738269, "grad_norm": 0.5757220983505249, "learning_rate": 0.0001, "loss": 1.4787, "step": 10017 }, { "epoch": 1.1507667566480961, "grad_norm": 0.5967593789100647, "learning_rate": 0.0001, "loss": 1.6556, "step": 10018 }, { "epoch": 1.1508816265579231, "grad_norm": 0.5282356142997742, "learning_rate": 0.0001, "loss": 1.4053, "step": 10019 }, { "epoch": 1.1509964964677502, "grad_norm": 0.5435159206390381, "learning_rate": 0.0001, "loss": 1.5139, "step": 10020 }, { "epoch": 1.1511113663775774, "grad_norm": 0.5466963052749634, "learning_rate": 0.0001, "loss": 1.3257, "step": 10021 }, { "epoch": 1.1512262362874046, "grad_norm": 0.5418528914451599, "learning_rate": 0.0001, "loss": 1.4969, "step": 10022 }, { "epoch": 1.1513411061972316, "grad_norm": 0.6105648875236511, "learning_rate": 0.0001, "loss": 1.6334, "step": 10023 }, { "epoch": 1.1514559761070586, "grad_norm": 0.5551570057868958, "learning_rate": 0.0001, "loss": 1.5054, "step": 10024 }, { "epoch": 1.1515708460168859, "grad_norm": 0.5221659541130066, "learning_rate": 0.0001, "loss": 1.3441, "step": 10025 }, { "epoch": 1.151685715926713, "grad_norm": 0.6351895332336426, "learning_rate": 0.0001, "loss": 1.5949, "step": 10026 }, { "epoch": 1.15180058583654, "grad_norm": 0.5757087469100952, "learning_rate": 0.0001, "loss": 1.4843, "step": 10027 }, { "epoch": 1.1519154557463673, "grad_norm": 0.5461332201957703, "learning_rate": 0.0001, "loss": 1.4231, "step": 10028 }, { "epoch": 1.1520303256561943, "grad_norm": 0.5618088841438293, "learning_rate": 0.0001, "loss": 1.2567, "step": 10029 }, { "epoch": 1.1521451955660216, "grad_norm": 0.668555736541748, "learning_rate": 0.0001, "loss": 1.4302, "step": 10030 }, { "epoch": 1.1522600654758486, "grad_norm": 0.5682727694511414, "learning_rate": 0.0001, "loss": 1.3702, "step": 10031 }, { "epoch": 1.1523749353856758, "grad_norm": 0.531745195388794, "learning_rate": 0.0001, "loss": 1.5091, "step": 10032 }, { "epoch": 1.1524898052955028, "grad_norm": 0.5723196864128113, "learning_rate": 0.0001, "loss": 1.4564, "step": 10033 }, { "epoch": 1.15260467520533, "grad_norm": 0.5728488564491272, "learning_rate": 0.0001, "loss": 1.5481, "step": 10034 }, { "epoch": 1.152719545115157, "grad_norm": 0.528705894947052, "learning_rate": 0.0001, "loss": 1.4234, "step": 10035 }, { "epoch": 1.1528344150249843, "grad_norm": 0.5178189873695374, "learning_rate": 0.0001, "loss": 1.3147, "step": 10036 }, { "epoch": 1.1529492849348113, "grad_norm": 0.6135823130607605, "learning_rate": 0.0001, "loss": 1.4328, "step": 10037 }, { "epoch": 1.1530641548446385, "grad_norm": 0.5911353230476379, "learning_rate": 0.0001, "loss": 1.1557, "step": 10038 }, { "epoch": 1.1531790247544655, "grad_norm": 0.5582436919212341, "learning_rate": 0.0001, "loss": 1.2749, "step": 10039 }, { "epoch": 1.1532938946642928, "grad_norm": 0.5512915849685669, "learning_rate": 0.0001, "loss": 1.4394, "step": 10040 }, { "epoch": 1.1534087645741198, "grad_norm": 0.5502411127090454, "learning_rate": 0.0001, "loss": 1.2865, "step": 10041 }, { "epoch": 1.153523634483947, "grad_norm": 0.5328064560890198, "learning_rate": 0.0001, "loss": 1.6308, "step": 10042 }, { "epoch": 1.153638504393774, "grad_norm": 0.4983726441860199, "learning_rate": 0.0001, "loss": 1.2559, "step": 10043 }, { "epoch": 1.1537533743036013, "grad_norm": 0.6484810709953308, "learning_rate": 0.0001, "loss": 1.77, "step": 10044 }, { "epoch": 1.1538682442134283, "grad_norm": 0.5316923260688782, "learning_rate": 0.0001, "loss": 1.3421, "step": 10045 }, { "epoch": 1.1539831141232555, "grad_norm": 0.5837815403938293, "learning_rate": 0.0001, "loss": 1.5662, "step": 10046 }, { "epoch": 1.1540979840330825, "grad_norm": 0.557544469833374, "learning_rate": 0.0001, "loss": 1.6774, "step": 10047 }, { "epoch": 1.1542128539429097, "grad_norm": 0.5785412192344666, "learning_rate": 0.0001, "loss": 1.4416, "step": 10048 }, { "epoch": 1.1543277238527367, "grad_norm": 0.5959882140159607, "learning_rate": 0.0001, "loss": 1.5142, "step": 10049 }, { "epoch": 1.154442593762564, "grad_norm": 0.5870964527130127, "learning_rate": 0.0001, "loss": 1.4598, "step": 10050 }, { "epoch": 1.154557463672391, "grad_norm": 0.6367591619491577, "learning_rate": 0.0001, "loss": 1.3788, "step": 10051 }, { "epoch": 1.1546723335822182, "grad_norm": 0.5441646575927734, "learning_rate": 0.0001, "loss": 1.4919, "step": 10052 }, { "epoch": 1.1547872034920452, "grad_norm": 0.5900537967681885, "learning_rate": 0.0001, "loss": 1.2413, "step": 10053 }, { "epoch": 1.1549020734018725, "grad_norm": 0.6054137945175171, "learning_rate": 0.0001, "loss": 1.4587, "step": 10054 }, { "epoch": 1.1550169433116995, "grad_norm": 0.5724448561668396, "learning_rate": 0.0001, "loss": 1.3456, "step": 10055 }, { "epoch": 1.1551318132215267, "grad_norm": 0.6357987523078918, "learning_rate": 0.0001, "loss": 1.5517, "step": 10056 }, { "epoch": 1.1552466831313537, "grad_norm": 0.5552554130554199, "learning_rate": 0.0001, "loss": 1.5688, "step": 10057 }, { "epoch": 1.155361553041181, "grad_norm": 0.5502124428749084, "learning_rate": 0.0001, "loss": 1.3949, "step": 10058 }, { "epoch": 1.155476422951008, "grad_norm": 0.5750370025634766, "learning_rate": 0.0001, "loss": 1.4175, "step": 10059 }, { "epoch": 1.1555912928608352, "grad_norm": 0.5867468118667603, "learning_rate": 0.0001, "loss": 1.5483, "step": 10060 }, { "epoch": 1.1557061627706622, "grad_norm": 0.5584085583686829, "learning_rate": 0.0001, "loss": 1.3498, "step": 10061 }, { "epoch": 1.1558210326804894, "grad_norm": 0.5806280970573425, "learning_rate": 0.0001, "loss": 1.4329, "step": 10062 }, { "epoch": 1.1559359025903164, "grad_norm": 0.6064799427986145, "learning_rate": 0.0001, "loss": 1.5273, "step": 10063 }, { "epoch": 1.1560507725001437, "grad_norm": 0.5346385836601257, "learning_rate": 0.0001, "loss": 1.4026, "step": 10064 }, { "epoch": 1.1561656424099707, "grad_norm": 0.5625292062759399, "learning_rate": 0.0001, "loss": 1.4929, "step": 10065 }, { "epoch": 1.156280512319798, "grad_norm": 0.5931135416030884, "learning_rate": 0.0001, "loss": 1.4473, "step": 10066 }, { "epoch": 1.156395382229625, "grad_norm": 0.5838515162467957, "learning_rate": 0.0001, "loss": 1.6558, "step": 10067 }, { "epoch": 1.1565102521394521, "grad_norm": 0.5748473405838013, "learning_rate": 0.0001, "loss": 1.5563, "step": 10068 }, { "epoch": 1.1566251220492791, "grad_norm": 0.5717789530754089, "learning_rate": 0.0001, "loss": 1.359, "step": 10069 }, { "epoch": 1.1567399919591064, "grad_norm": 0.5722107887268066, "learning_rate": 0.0001, "loss": 1.6154, "step": 10070 }, { "epoch": 1.1568548618689334, "grad_norm": 0.5895611643791199, "learning_rate": 0.0001, "loss": 1.4712, "step": 10071 }, { "epoch": 1.1569697317787606, "grad_norm": 0.5189610719680786, "learning_rate": 0.0001, "loss": 1.4458, "step": 10072 }, { "epoch": 1.1570846016885876, "grad_norm": 0.5800937414169312, "learning_rate": 0.0001, "loss": 1.5325, "step": 10073 }, { "epoch": 1.1571994715984149, "grad_norm": 0.5774216055870056, "learning_rate": 0.0001, "loss": 1.3827, "step": 10074 }, { "epoch": 1.1573143415082419, "grad_norm": 0.6525434255599976, "learning_rate": 0.0001, "loss": 1.655, "step": 10075 }, { "epoch": 1.157429211418069, "grad_norm": 0.5682554244995117, "learning_rate": 0.0001, "loss": 1.5512, "step": 10076 }, { "epoch": 1.157544081327896, "grad_norm": 0.606096088886261, "learning_rate": 0.0001, "loss": 1.4762, "step": 10077 }, { "epoch": 1.1576589512377233, "grad_norm": 0.5647655129432678, "learning_rate": 0.0001, "loss": 1.6138, "step": 10078 }, { "epoch": 1.1577738211475503, "grad_norm": 0.580194354057312, "learning_rate": 0.0001, "loss": 1.5909, "step": 10079 }, { "epoch": 1.1578886910573776, "grad_norm": 0.5711573958396912, "learning_rate": 0.0001, "loss": 1.5894, "step": 10080 }, { "epoch": 1.1580035609672046, "grad_norm": 0.5933459401130676, "learning_rate": 0.0001, "loss": 1.5768, "step": 10081 }, { "epoch": 1.1581184308770318, "grad_norm": 0.5333853960037231, "learning_rate": 0.0001, "loss": 1.3838, "step": 10082 }, { "epoch": 1.1582333007868588, "grad_norm": 0.5227363705635071, "learning_rate": 0.0001, "loss": 1.5504, "step": 10083 }, { "epoch": 1.158348170696686, "grad_norm": 0.5608334541320801, "learning_rate": 0.0001, "loss": 1.5402, "step": 10084 }, { "epoch": 1.158463040606513, "grad_norm": 0.5283211469650269, "learning_rate": 0.0001, "loss": 1.2809, "step": 10085 }, { "epoch": 1.1585779105163403, "grad_norm": 0.5522334575653076, "learning_rate": 0.0001, "loss": 1.4767, "step": 10086 }, { "epoch": 1.1586927804261673, "grad_norm": 0.5605513453483582, "learning_rate": 0.0001, "loss": 1.3811, "step": 10087 }, { "epoch": 1.1588076503359945, "grad_norm": 0.584088146686554, "learning_rate": 0.0001, "loss": 1.3513, "step": 10088 }, { "epoch": 1.1589225202458215, "grad_norm": 0.5514279007911682, "learning_rate": 0.0001, "loss": 1.4998, "step": 10089 }, { "epoch": 1.1590373901556488, "grad_norm": 0.5283199548721313, "learning_rate": 0.0001, "loss": 1.4285, "step": 10090 }, { "epoch": 1.1591522600654758, "grad_norm": 0.5820270776748657, "learning_rate": 0.0001, "loss": 1.6182, "step": 10091 }, { "epoch": 1.159267129975303, "grad_norm": 0.5973749160766602, "learning_rate": 0.0001, "loss": 1.3123, "step": 10092 }, { "epoch": 1.15938199988513, "grad_norm": 0.5737087726593018, "learning_rate": 0.0001, "loss": 1.5244, "step": 10093 }, { "epoch": 1.1594968697949573, "grad_norm": 0.5605661273002625, "learning_rate": 0.0001, "loss": 1.5358, "step": 10094 }, { "epoch": 1.1596117397047843, "grad_norm": 0.5315190553665161, "learning_rate": 0.0001, "loss": 1.4233, "step": 10095 }, { "epoch": 1.1597266096146115, "grad_norm": 0.5676159262657166, "learning_rate": 0.0001, "loss": 1.48, "step": 10096 }, { "epoch": 1.1598414795244385, "grad_norm": 0.5562459826469421, "learning_rate": 0.0001, "loss": 1.6104, "step": 10097 }, { "epoch": 1.1599563494342657, "grad_norm": 0.5750492215156555, "learning_rate": 0.0001, "loss": 1.5335, "step": 10098 }, { "epoch": 1.1600712193440927, "grad_norm": 0.565864622592926, "learning_rate": 0.0001, "loss": 1.3542, "step": 10099 }, { "epoch": 1.16018608925392, "grad_norm": 0.5741795897483826, "learning_rate": 0.0001, "loss": 1.5441, "step": 10100 }, { "epoch": 1.160300959163747, "grad_norm": 0.5474345088005066, "learning_rate": 0.0001, "loss": 1.6313, "step": 10101 }, { "epoch": 1.1604158290735742, "grad_norm": 0.5959522724151611, "learning_rate": 0.0001, "loss": 1.5481, "step": 10102 }, { "epoch": 1.1605306989834012, "grad_norm": 0.5602507591247559, "learning_rate": 0.0001, "loss": 1.5372, "step": 10103 }, { "epoch": 1.1606455688932285, "grad_norm": 0.5457338690757751, "learning_rate": 0.0001, "loss": 1.4527, "step": 10104 }, { "epoch": 1.1607604388030555, "grad_norm": 0.5657529830932617, "learning_rate": 0.0001, "loss": 1.4834, "step": 10105 }, { "epoch": 1.1608753087128827, "grad_norm": 0.5475678443908691, "learning_rate": 0.0001, "loss": 1.51, "step": 10106 }, { "epoch": 1.1609901786227097, "grad_norm": 0.5341169238090515, "learning_rate": 0.0001, "loss": 1.5948, "step": 10107 }, { "epoch": 1.161105048532537, "grad_norm": 0.5745497345924377, "learning_rate": 0.0001, "loss": 1.5087, "step": 10108 }, { "epoch": 1.161219918442364, "grad_norm": 0.56389319896698, "learning_rate": 0.0001, "loss": 1.4402, "step": 10109 }, { "epoch": 1.1613347883521912, "grad_norm": 0.5795552134513855, "learning_rate": 0.0001, "loss": 1.4112, "step": 10110 }, { "epoch": 1.1614496582620182, "grad_norm": 0.5938699841499329, "learning_rate": 0.0001, "loss": 1.453, "step": 10111 }, { "epoch": 1.1615645281718454, "grad_norm": 0.5595149993896484, "learning_rate": 0.0001, "loss": 1.4956, "step": 10112 }, { "epoch": 1.1616793980816724, "grad_norm": 0.6180020570755005, "learning_rate": 0.0001, "loss": 1.6828, "step": 10113 }, { "epoch": 1.1617942679914997, "grad_norm": 0.5853464603424072, "learning_rate": 0.0001, "loss": 1.3735, "step": 10114 }, { "epoch": 1.1619091379013267, "grad_norm": 0.5351958870887756, "learning_rate": 0.0001, "loss": 1.3615, "step": 10115 }, { "epoch": 1.162024007811154, "grad_norm": 0.5981417894363403, "learning_rate": 0.0001, "loss": 1.518, "step": 10116 }, { "epoch": 1.162138877720981, "grad_norm": 0.5747444033622742, "learning_rate": 0.0001, "loss": 1.5427, "step": 10117 }, { "epoch": 1.1622537476308081, "grad_norm": 0.5646340250968933, "learning_rate": 0.0001, "loss": 1.5378, "step": 10118 }, { "epoch": 1.1623686175406351, "grad_norm": 0.5687159299850464, "learning_rate": 0.0001, "loss": 1.5357, "step": 10119 }, { "epoch": 1.1624834874504624, "grad_norm": 0.5587418079376221, "learning_rate": 0.0001, "loss": 1.4571, "step": 10120 }, { "epoch": 1.1625983573602894, "grad_norm": 0.575074315071106, "learning_rate": 0.0001, "loss": 1.3044, "step": 10121 }, { "epoch": 1.1627132272701166, "grad_norm": 0.6066795587539673, "learning_rate": 0.0001, "loss": 1.7487, "step": 10122 }, { "epoch": 1.1628280971799436, "grad_norm": 0.6262649893760681, "learning_rate": 0.0001, "loss": 1.5702, "step": 10123 }, { "epoch": 1.1629429670897709, "grad_norm": 0.5629507303237915, "learning_rate": 0.0001, "loss": 1.4479, "step": 10124 }, { "epoch": 1.1630578369995979, "grad_norm": 0.6446200013160706, "learning_rate": 0.0001, "loss": 1.6935, "step": 10125 }, { "epoch": 1.163172706909425, "grad_norm": 0.567599356174469, "learning_rate": 0.0001, "loss": 1.4786, "step": 10126 }, { "epoch": 1.163287576819252, "grad_norm": 0.5245000720024109, "learning_rate": 0.0001, "loss": 1.3432, "step": 10127 }, { "epoch": 1.1634024467290793, "grad_norm": 0.6057835817337036, "learning_rate": 0.0001, "loss": 1.5532, "step": 10128 }, { "epoch": 1.1635173166389063, "grad_norm": 0.5868533253669739, "learning_rate": 0.0001, "loss": 1.6142, "step": 10129 }, { "epoch": 1.1636321865487336, "grad_norm": 0.533150315284729, "learning_rate": 0.0001, "loss": 1.4869, "step": 10130 }, { "epoch": 1.1637470564585608, "grad_norm": 0.6112939715385437, "learning_rate": 0.0001, "loss": 1.3912, "step": 10131 }, { "epoch": 1.1638619263683878, "grad_norm": 0.5236315131187439, "learning_rate": 0.0001, "loss": 1.4242, "step": 10132 }, { "epoch": 1.1639767962782148, "grad_norm": 0.5508266091346741, "learning_rate": 0.0001, "loss": 1.582, "step": 10133 }, { "epoch": 1.164091666188042, "grad_norm": 0.5487030744552612, "learning_rate": 0.0001, "loss": 1.4663, "step": 10134 }, { "epoch": 1.1642065360978693, "grad_norm": 0.6089741587638855, "learning_rate": 0.0001, "loss": 1.6514, "step": 10135 }, { "epoch": 1.1643214060076963, "grad_norm": 0.6037748456001282, "learning_rate": 0.0001, "loss": 1.3875, "step": 10136 }, { "epoch": 1.1644362759175233, "grad_norm": 0.5815845131874084, "learning_rate": 0.0001, "loss": 1.5777, "step": 10137 }, { "epoch": 1.1645511458273505, "grad_norm": 0.5638243556022644, "learning_rate": 0.0001, "loss": 1.564, "step": 10138 }, { "epoch": 1.1646660157371778, "grad_norm": 0.546511173248291, "learning_rate": 0.0001, "loss": 1.4114, "step": 10139 }, { "epoch": 1.1647808856470048, "grad_norm": 0.5502268671989441, "learning_rate": 0.0001, "loss": 1.4463, "step": 10140 }, { "epoch": 1.1648957555568318, "grad_norm": 0.5502709150314331, "learning_rate": 0.0001, "loss": 1.4657, "step": 10141 }, { "epoch": 1.165010625466659, "grad_norm": 0.5224112868309021, "learning_rate": 0.0001, "loss": 1.2935, "step": 10142 }, { "epoch": 1.1651254953764862, "grad_norm": 0.5977753400802612, "learning_rate": 0.0001, "loss": 1.4873, "step": 10143 }, { "epoch": 1.1652403652863133, "grad_norm": 0.5752722024917603, "learning_rate": 0.0001, "loss": 1.4752, "step": 10144 }, { "epoch": 1.1653552351961403, "grad_norm": 0.5518738031387329, "learning_rate": 0.0001, "loss": 1.3145, "step": 10145 }, { "epoch": 1.1654701051059675, "grad_norm": 0.5660110712051392, "learning_rate": 0.0001, "loss": 1.4595, "step": 10146 }, { "epoch": 1.1655849750157947, "grad_norm": 0.6433327198028564, "learning_rate": 0.0001, "loss": 1.7201, "step": 10147 }, { "epoch": 1.1656998449256217, "grad_norm": 0.5778366327285767, "learning_rate": 0.0001, "loss": 1.5793, "step": 10148 }, { "epoch": 1.1658147148354487, "grad_norm": 0.5929848551750183, "learning_rate": 0.0001, "loss": 1.3827, "step": 10149 }, { "epoch": 1.165929584745276, "grad_norm": 0.5661731958389282, "learning_rate": 0.0001, "loss": 1.3457, "step": 10150 }, { "epoch": 1.1660444546551032, "grad_norm": 0.5540391802787781, "learning_rate": 0.0001, "loss": 1.513, "step": 10151 }, { "epoch": 1.1661593245649302, "grad_norm": 0.5562129616737366, "learning_rate": 0.0001, "loss": 1.5181, "step": 10152 }, { "epoch": 1.1662741944747572, "grad_norm": 0.5246435403823853, "learning_rate": 0.0001, "loss": 1.4335, "step": 10153 }, { "epoch": 1.1663890643845845, "grad_norm": 0.650452733039856, "learning_rate": 0.0001, "loss": 1.7906, "step": 10154 }, { "epoch": 1.1665039342944117, "grad_norm": 0.5996583104133606, "learning_rate": 0.0001, "loss": 1.5997, "step": 10155 }, { "epoch": 1.1666188042042387, "grad_norm": 0.52683424949646, "learning_rate": 0.0001, "loss": 1.2967, "step": 10156 }, { "epoch": 1.1667336741140657, "grad_norm": 0.5419752597808838, "learning_rate": 0.0001, "loss": 1.5335, "step": 10157 }, { "epoch": 1.166848544023893, "grad_norm": 0.5371859073638916, "learning_rate": 0.0001, "loss": 1.2567, "step": 10158 }, { "epoch": 1.1669634139337202, "grad_norm": 0.6510452032089233, "learning_rate": 0.0001, "loss": 1.7457, "step": 10159 }, { "epoch": 1.1670782838435472, "grad_norm": 0.5702193379402161, "learning_rate": 0.0001, "loss": 1.5441, "step": 10160 }, { "epoch": 1.1671931537533742, "grad_norm": 0.5725999474525452, "learning_rate": 0.0001, "loss": 1.5023, "step": 10161 }, { "epoch": 1.1673080236632014, "grad_norm": 0.5554792284965515, "learning_rate": 0.0001, "loss": 1.4167, "step": 10162 }, { "epoch": 1.1674228935730286, "grad_norm": 0.5548241138458252, "learning_rate": 0.0001, "loss": 1.3767, "step": 10163 }, { "epoch": 1.1675377634828557, "grad_norm": 0.6183516979217529, "learning_rate": 0.0001, "loss": 1.626, "step": 10164 }, { "epoch": 1.1676526333926829, "grad_norm": 0.5450295209884644, "learning_rate": 0.0001, "loss": 1.5589, "step": 10165 }, { "epoch": 1.16776750330251, "grad_norm": 0.5990351438522339, "learning_rate": 0.0001, "loss": 1.544, "step": 10166 }, { "epoch": 1.1678823732123371, "grad_norm": 0.5059851408004761, "learning_rate": 0.0001, "loss": 1.4095, "step": 10167 }, { "epoch": 1.1679972431221641, "grad_norm": 0.534441351890564, "learning_rate": 0.0001, "loss": 1.5801, "step": 10168 }, { "epoch": 1.1681121130319914, "grad_norm": 0.5403814911842346, "learning_rate": 0.0001, "loss": 1.4503, "step": 10169 }, { "epoch": 1.1682269829418184, "grad_norm": 0.5525118112564087, "learning_rate": 0.0001, "loss": 1.2902, "step": 10170 }, { "epoch": 1.1683418528516456, "grad_norm": 0.5069050788879395, "learning_rate": 0.0001, "loss": 1.3002, "step": 10171 }, { "epoch": 1.1684567227614726, "grad_norm": 0.5735329389572144, "learning_rate": 0.0001, "loss": 1.6275, "step": 10172 }, { "epoch": 1.1685715926712998, "grad_norm": 0.5312854647636414, "learning_rate": 0.0001, "loss": 1.6037, "step": 10173 }, { "epoch": 1.1686864625811269, "grad_norm": 0.5686579346656799, "learning_rate": 0.0001, "loss": 1.5586, "step": 10174 }, { "epoch": 1.168801332490954, "grad_norm": 0.5913828015327454, "learning_rate": 0.0001, "loss": 1.4098, "step": 10175 }, { "epoch": 1.168916202400781, "grad_norm": 0.5945882797241211, "learning_rate": 0.0001, "loss": 1.1721, "step": 10176 }, { "epoch": 1.1690310723106083, "grad_norm": 0.565756618976593, "learning_rate": 0.0001, "loss": 1.3638, "step": 10177 }, { "epoch": 1.1691459422204353, "grad_norm": 0.557446300983429, "learning_rate": 0.0001, "loss": 1.3487, "step": 10178 }, { "epoch": 1.1692608121302626, "grad_norm": 0.5504045486450195, "learning_rate": 0.0001, "loss": 1.4271, "step": 10179 }, { "epoch": 1.1693756820400896, "grad_norm": 0.576124906539917, "learning_rate": 0.0001, "loss": 1.5313, "step": 10180 }, { "epoch": 1.1694905519499168, "grad_norm": 0.5811876058578491, "learning_rate": 0.0001, "loss": 1.3186, "step": 10181 }, { "epoch": 1.1696054218597438, "grad_norm": 0.5304403305053711, "learning_rate": 0.0001, "loss": 1.4356, "step": 10182 }, { "epoch": 1.169720291769571, "grad_norm": 0.5441917777061462, "learning_rate": 0.0001, "loss": 1.4705, "step": 10183 }, { "epoch": 1.169835161679398, "grad_norm": 0.5729200839996338, "learning_rate": 0.0001, "loss": 1.5841, "step": 10184 }, { "epoch": 1.1699500315892253, "grad_norm": 0.5269560813903809, "learning_rate": 0.0001, "loss": 1.4486, "step": 10185 }, { "epoch": 1.1700649014990523, "grad_norm": 0.6391506791114807, "learning_rate": 0.0001, "loss": 1.6032, "step": 10186 }, { "epoch": 1.1701797714088795, "grad_norm": 0.5685237646102905, "learning_rate": 0.0001, "loss": 1.6004, "step": 10187 }, { "epoch": 1.1702946413187065, "grad_norm": 0.6011033654212952, "learning_rate": 0.0001, "loss": 1.5688, "step": 10188 }, { "epoch": 1.1704095112285338, "grad_norm": 0.5902635455131531, "learning_rate": 0.0001, "loss": 1.3985, "step": 10189 }, { "epoch": 1.1705243811383608, "grad_norm": 0.5687843561172485, "learning_rate": 0.0001, "loss": 1.5375, "step": 10190 }, { "epoch": 1.170639251048188, "grad_norm": 0.5689520239830017, "learning_rate": 0.0001, "loss": 1.6174, "step": 10191 }, { "epoch": 1.170754120958015, "grad_norm": 0.5470170378684998, "learning_rate": 0.0001, "loss": 1.2694, "step": 10192 }, { "epoch": 1.1708689908678422, "grad_norm": 0.5402204394340515, "learning_rate": 0.0001, "loss": 1.3891, "step": 10193 }, { "epoch": 1.1709838607776692, "grad_norm": 0.5700439214706421, "learning_rate": 0.0001, "loss": 1.2554, "step": 10194 }, { "epoch": 1.1710987306874965, "grad_norm": 0.5934173464775085, "learning_rate": 0.0001, "loss": 1.6594, "step": 10195 }, { "epoch": 1.1712136005973235, "grad_norm": 0.5891930460929871, "learning_rate": 0.0001, "loss": 1.5339, "step": 10196 }, { "epoch": 1.1713284705071507, "grad_norm": 0.5680025219917297, "learning_rate": 0.0001, "loss": 1.5624, "step": 10197 }, { "epoch": 1.1714433404169777, "grad_norm": 0.557145893573761, "learning_rate": 0.0001, "loss": 1.363, "step": 10198 }, { "epoch": 1.171558210326805, "grad_norm": 0.5671913623809814, "learning_rate": 0.0001, "loss": 1.1882, "step": 10199 }, { "epoch": 1.171673080236632, "grad_norm": 0.5525150895118713, "learning_rate": 0.0001, "loss": 1.4802, "step": 10200 }, { "epoch": 1.1717879501464592, "grad_norm": 0.5642703771591187, "learning_rate": 0.0001, "loss": 1.4937, "step": 10201 }, { "epoch": 1.1719028200562862, "grad_norm": 0.5610083341598511, "learning_rate": 0.0001, "loss": 1.5124, "step": 10202 }, { "epoch": 1.1720176899661134, "grad_norm": 0.5838199257850647, "learning_rate": 0.0001, "loss": 1.4971, "step": 10203 }, { "epoch": 1.1721325598759404, "grad_norm": 0.6114639639854431, "learning_rate": 0.0001, "loss": 1.45, "step": 10204 }, { "epoch": 1.1722474297857677, "grad_norm": 0.6125414371490479, "learning_rate": 0.0001, "loss": 1.4981, "step": 10205 }, { "epoch": 1.1723622996955947, "grad_norm": 0.6367550492286682, "learning_rate": 0.0001, "loss": 1.7869, "step": 10206 }, { "epoch": 1.172477169605422, "grad_norm": 0.5995863080024719, "learning_rate": 0.0001, "loss": 1.5015, "step": 10207 }, { "epoch": 1.172592039515249, "grad_norm": 0.5278496742248535, "learning_rate": 0.0001, "loss": 1.4433, "step": 10208 }, { "epoch": 1.1727069094250762, "grad_norm": 0.5485237240791321, "learning_rate": 0.0001, "loss": 1.5175, "step": 10209 }, { "epoch": 1.1728217793349032, "grad_norm": 0.6074742674827576, "learning_rate": 0.0001, "loss": 1.3414, "step": 10210 }, { "epoch": 1.1729366492447304, "grad_norm": 0.5964166522026062, "learning_rate": 0.0001, "loss": 1.3031, "step": 10211 }, { "epoch": 1.1730515191545574, "grad_norm": 0.5800673365592957, "learning_rate": 0.0001, "loss": 1.4228, "step": 10212 }, { "epoch": 1.1731663890643846, "grad_norm": 0.5409739017486572, "learning_rate": 0.0001, "loss": 1.5449, "step": 10213 }, { "epoch": 1.1732812589742116, "grad_norm": 0.5506752729415894, "learning_rate": 0.0001, "loss": 1.5131, "step": 10214 }, { "epoch": 1.1733961288840389, "grad_norm": 0.551644504070282, "learning_rate": 0.0001, "loss": 1.4612, "step": 10215 }, { "epoch": 1.173510998793866, "grad_norm": 0.5593010783195496, "learning_rate": 0.0001, "loss": 1.4069, "step": 10216 }, { "epoch": 1.1736258687036931, "grad_norm": 0.5570374131202698, "learning_rate": 0.0001, "loss": 1.1389, "step": 10217 }, { "epoch": 1.1737407386135201, "grad_norm": 0.6743999719619751, "learning_rate": 0.0001, "loss": 1.5835, "step": 10218 }, { "epoch": 1.1738556085233474, "grad_norm": 0.5389916300773621, "learning_rate": 0.0001, "loss": 1.3792, "step": 10219 }, { "epoch": 1.1739704784331744, "grad_norm": 0.5596214532852173, "learning_rate": 0.0001, "loss": 1.3943, "step": 10220 }, { "epoch": 1.1740853483430016, "grad_norm": 0.6074917912483215, "learning_rate": 0.0001, "loss": 1.2292, "step": 10221 }, { "epoch": 1.1742002182528286, "grad_norm": 0.5609689950942993, "learning_rate": 0.0001, "loss": 1.514, "step": 10222 }, { "epoch": 1.1743150881626558, "grad_norm": 0.554728627204895, "learning_rate": 0.0001, "loss": 1.4622, "step": 10223 }, { "epoch": 1.1744299580724828, "grad_norm": 0.548478901386261, "learning_rate": 0.0001, "loss": 1.4344, "step": 10224 }, { "epoch": 1.17454482798231, "grad_norm": 0.5518718957901001, "learning_rate": 0.0001, "loss": 1.4001, "step": 10225 }, { "epoch": 1.174659697892137, "grad_norm": 0.6477909088134766, "learning_rate": 0.0001, "loss": 1.5887, "step": 10226 }, { "epoch": 1.1747745678019643, "grad_norm": 0.5640280842781067, "learning_rate": 0.0001, "loss": 1.4278, "step": 10227 }, { "epoch": 1.1748894377117913, "grad_norm": 0.5902442932128906, "learning_rate": 0.0001, "loss": 1.3603, "step": 10228 }, { "epoch": 1.1750043076216186, "grad_norm": 0.575197160243988, "learning_rate": 0.0001, "loss": 1.4094, "step": 10229 }, { "epoch": 1.1751191775314456, "grad_norm": 0.5721563696861267, "learning_rate": 0.0001, "loss": 1.5245, "step": 10230 }, { "epoch": 1.1752340474412728, "grad_norm": 0.7208542823791504, "learning_rate": 0.0001, "loss": 1.5508, "step": 10231 }, { "epoch": 1.1753489173510998, "grad_norm": 0.5777501463890076, "learning_rate": 0.0001, "loss": 1.3529, "step": 10232 }, { "epoch": 1.175463787260927, "grad_norm": 0.5787646174430847, "learning_rate": 0.0001, "loss": 1.4706, "step": 10233 }, { "epoch": 1.175578657170754, "grad_norm": 0.5515586733818054, "learning_rate": 0.0001, "loss": 1.5613, "step": 10234 }, { "epoch": 1.1756935270805813, "grad_norm": 0.5776956677436829, "learning_rate": 0.0001, "loss": 1.4891, "step": 10235 }, { "epoch": 1.1758083969904083, "grad_norm": 0.6034742593765259, "learning_rate": 0.0001, "loss": 1.5673, "step": 10236 }, { "epoch": 1.1759232669002355, "grad_norm": 0.5228462219238281, "learning_rate": 0.0001, "loss": 1.5063, "step": 10237 }, { "epoch": 1.1760381368100625, "grad_norm": 0.5508900880813599, "learning_rate": 0.0001, "loss": 1.477, "step": 10238 }, { "epoch": 1.1761530067198898, "grad_norm": 0.5309219360351562, "learning_rate": 0.0001, "loss": 1.446, "step": 10239 }, { "epoch": 1.1762678766297168, "grad_norm": 0.5277041792869568, "learning_rate": 0.0001, "loss": 1.405, "step": 10240 }, { "epoch": 1.176382746539544, "grad_norm": 0.5799806118011475, "learning_rate": 0.0001, "loss": 1.342, "step": 10241 }, { "epoch": 1.176497616449371, "grad_norm": 0.5781192183494568, "learning_rate": 0.0001, "loss": 1.356, "step": 10242 }, { "epoch": 1.1766124863591982, "grad_norm": 0.5948365330696106, "learning_rate": 0.0001, "loss": 1.5877, "step": 10243 }, { "epoch": 1.1767273562690252, "grad_norm": 0.5791916251182556, "learning_rate": 0.0001, "loss": 1.644, "step": 10244 }, { "epoch": 1.1768422261788525, "grad_norm": 0.60085129737854, "learning_rate": 0.0001, "loss": 1.4501, "step": 10245 }, { "epoch": 1.1769570960886795, "grad_norm": 0.5680752396583557, "learning_rate": 0.0001, "loss": 1.5569, "step": 10246 }, { "epoch": 1.1770719659985067, "grad_norm": 0.5210364460945129, "learning_rate": 0.0001, "loss": 1.4383, "step": 10247 }, { "epoch": 1.1771868359083337, "grad_norm": 0.5816622972488403, "learning_rate": 0.0001, "loss": 1.543, "step": 10248 }, { "epoch": 1.177301705818161, "grad_norm": 0.5394155383110046, "learning_rate": 0.0001, "loss": 1.4946, "step": 10249 }, { "epoch": 1.177416575727988, "grad_norm": 0.5853523015975952, "learning_rate": 0.0001, "loss": 1.6292, "step": 10250 }, { "epoch": 1.1775314456378152, "grad_norm": 0.5694888234138489, "learning_rate": 0.0001, "loss": 1.5744, "step": 10251 }, { "epoch": 1.1776463155476422, "grad_norm": 0.5495486855506897, "learning_rate": 0.0001, "loss": 1.5335, "step": 10252 }, { "epoch": 1.1777611854574694, "grad_norm": 0.5514094233512878, "learning_rate": 0.0001, "loss": 1.1921, "step": 10253 }, { "epoch": 1.1778760553672964, "grad_norm": 0.5321869850158691, "learning_rate": 0.0001, "loss": 1.4492, "step": 10254 }, { "epoch": 1.1779909252771237, "grad_norm": 0.5383161902427673, "learning_rate": 0.0001, "loss": 1.4645, "step": 10255 }, { "epoch": 1.1781057951869507, "grad_norm": 0.5531543493270874, "learning_rate": 0.0001, "loss": 1.494, "step": 10256 }, { "epoch": 1.178220665096778, "grad_norm": 0.5813853144645691, "learning_rate": 0.0001, "loss": 1.6546, "step": 10257 }, { "epoch": 1.178335535006605, "grad_norm": 0.6172130703926086, "learning_rate": 0.0001, "loss": 1.5318, "step": 10258 }, { "epoch": 1.1784504049164322, "grad_norm": 0.6372018456459045, "learning_rate": 0.0001, "loss": 1.4326, "step": 10259 }, { "epoch": 1.1785652748262592, "grad_norm": 0.5197803974151611, "learning_rate": 0.0001, "loss": 1.2676, "step": 10260 }, { "epoch": 1.1786801447360864, "grad_norm": 0.549474835395813, "learning_rate": 0.0001, "loss": 1.6087, "step": 10261 }, { "epoch": 1.1787950146459134, "grad_norm": 0.5710943937301636, "learning_rate": 0.0001, "loss": 1.6127, "step": 10262 }, { "epoch": 1.1789098845557406, "grad_norm": 0.5572322010993958, "learning_rate": 0.0001, "loss": 1.4759, "step": 10263 }, { "epoch": 1.1790247544655676, "grad_norm": 0.5510053038597107, "learning_rate": 0.0001, "loss": 1.2321, "step": 10264 }, { "epoch": 1.1791396243753949, "grad_norm": 0.5195276141166687, "learning_rate": 0.0001, "loss": 1.2855, "step": 10265 }, { "epoch": 1.1792544942852219, "grad_norm": 0.5562936663627625, "learning_rate": 0.0001, "loss": 1.3052, "step": 10266 }, { "epoch": 1.1793693641950491, "grad_norm": 0.5841695666313171, "learning_rate": 0.0001, "loss": 1.6325, "step": 10267 }, { "epoch": 1.1794842341048764, "grad_norm": 0.5963363647460938, "learning_rate": 0.0001, "loss": 1.4882, "step": 10268 }, { "epoch": 1.1795991040147034, "grad_norm": 0.5893837809562683, "learning_rate": 0.0001, "loss": 1.5622, "step": 10269 }, { "epoch": 1.1797139739245304, "grad_norm": 0.5855959057807922, "learning_rate": 0.0001, "loss": 1.4191, "step": 10270 }, { "epoch": 1.1798288438343576, "grad_norm": 0.5364851355552673, "learning_rate": 0.0001, "loss": 1.3232, "step": 10271 }, { "epoch": 1.1799437137441848, "grad_norm": 0.574670135974884, "learning_rate": 0.0001, "loss": 1.5866, "step": 10272 }, { "epoch": 1.1800585836540118, "grad_norm": 0.5624828934669495, "learning_rate": 0.0001, "loss": 1.4357, "step": 10273 }, { "epoch": 1.1801734535638388, "grad_norm": 0.585806131362915, "learning_rate": 0.0001, "loss": 1.4264, "step": 10274 }, { "epoch": 1.180288323473666, "grad_norm": 0.6353849172592163, "learning_rate": 0.0001, "loss": 1.76, "step": 10275 }, { "epoch": 1.1804031933834933, "grad_norm": 0.5217344760894775, "learning_rate": 0.0001, "loss": 1.4722, "step": 10276 }, { "epoch": 1.1805180632933203, "grad_norm": 0.5620312690734863, "learning_rate": 0.0001, "loss": 1.3626, "step": 10277 }, { "epoch": 1.1806329332031473, "grad_norm": 0.6162904500961304, "learning_rate": 0.0001, "loss": 1.2, "step": 10278 }, { "epoch": 1.1807478031129746, "grad_norm": 0.5573971271514893, "learning_rate": 0.0001, "loss": 1.2377, "step": 10279 }, { "epoch": 1.1808626730228018, "grad_norm": 0.5593479871749878, "learning_rate": 0.0001, "loss": 1.2591, "step": 10280 }, { "epoch": 1.1809775429326288, "grad_norm": 0.5311997532844543, "learning_rate": 0.0001, "loss": 1.4939, "step": 10281 }, { "epoch": 1.1810924128424558, "grad_norm": 0.5329908728599548, "learning_rate": 0.0001, "loss": 1.4148, "step": 10282 }, { "epoch": 1.181207282752283, "grad_norm": 0.6186027526855469, "learning_rate": 0.0001, "loss": 1.7251, "step": 10283 }, { "epoch": 1.1813221526621103, "grad_norm": 0.5550754070281982, "learning_rate": 0.0001, "loss": 1.3355, "step": 10284 }, { "epoch": 1.1814370225719373, "grad_norm": 0.5469382405281067, "learning_rate": 0.0001, "loss": 1.5502, "step": 10285 }, { "epoch": 1.1815518924817643, "grad_norm": 0.5690637826919556, "learning_rate": 0.0001, "loss": 1.4224, "step": 10286 }, { "epoch": 1.1816667623915915, "grad_norm": 0.5610548853874207, "learning_rate": 0.0001, "loss": 1.4282, "step": 10287 }, { "epoch": 1.1817816323014187, "grad_norm": 0.5904365181922913, "learning_rate": 0.0001, "loss": 1.5588, "step": 10288 }, { "epoch": 1.1818965022112458, "grad_norm": 0.538159966468811, "learning_rate": 0.0001, "loss": 1.3842, "step": 10289 }, { "epoch": 1.1820113721210728, "grad_norm": 0.5710332989692688, "learning_rate": 0.0001, "loss": 1.441, "step": 10290 }, { "epoch": 1.1821262420309, "grad_norm": 0.5427202582359314, "learning_rate": 0.0001, "loss": 1.4344, "step": 10291 }, { "epoch": 1.1822411119407272, "grad_norm": 0.5821972489356995, "learning_rate": 0.0001, "loss": 1.3847, "step": 10292 }, { "epoch": 1.1823559818505542, "grad_norm": 0.5653535723686218, "learning_rate": 0.0001, "loss": 1.5676, "step": 10293 }, { "epoch": 1.1824708517603812, "grad_norm": 0.5518774390220642, "learning_rate": 0.0001, "loss": 1.4604, "step": 10294 }, { "epoch": 1.1825857216702085, "grad_norm": 0.506719172000885, "learning_rate": 0.0001, "loss": 1.4198, "step": 10295 }, { "epoch": 1.1827005915800357, "grad_norm": 0.5615983009338379, "learning_rate": 0.0001, "loss": 1.5238, "step": 10296 }, { "epoch": 1.1828154614898627, "grad_norm": 0.5364108681678772, "learning_rate": 0.0001, "loss": 1.3754, "step": 10297 }, { "epoch": 1.1829303313996897, "grad_norm": 0.552790105342865, "learning_rate": 0.0001, "loss": 1.3189, "step": 10298 }, { "epoch": 1.183045201309517, "grad_norm": 0.5753331780433655, "learning_rate": 0.0001, "loss": 1.6094, "step": 10299 }, { "epoch": 1.1831600712193442, "grad_norm": 0.5673770308494568, "learning_rate": 0.0001, "loss": 1.5215, "step": 10300 }, { "epoch": 1.1832749411291712, "grad_norm": 0.5506584644317627, "learning_rate": 0.0001, "loss": 1.4499, "step": 10301 }, { "epoch": 1.1833898110389984, "grad_norm": 0.5558772087097168, "learning_rate": 0.0001, "loss": 1.5703, "step": 10302 }, { "epoch": 1.1835046809488254, "grad_norm": 0.5837011337280273, "learning_rate": 0.0001, "loss": 1.6082, "step": 10303 }, { "epoch": 1.1836195508586527, "grad_norm": 0.5474368333816528, "learning_rate": 0.0001, "loss": 1.4428, "step": 10304 }, { "epoch": 1.1837344207684797, "grad_norm": 0.5366905331611633, "learning_rate": 0.0001, "loss": 1.4069, "step": 10305 }, { "epoch": 1.183849290678307, "grad_norm": 0.5617532134056091, "learning_rate": 0.0001, "loss": 1.4165, "step": 10306 }, { "epoch": 1.183964160588134, "grad_norm": 0.5564953684806824, "learning_rate": 0.0001, "loss": 1.5541, "step": 10307 }, { "epoch": 1.1840790304979611, "grad_norm": 0.5196337699890137, "learning_rate": 0.0001, "loss": 1.3772, "step": 10308 }, { "epoch": 1.1841939004077882, "grad_norm": 0.5515973567962646, "learning_rate": 0.0001, "loss": 1.5849, "step": 10309 }, { "epoch": 1.1843087703176154, "grad_norm": 0.5052083730697632, "learning_rate": 0.0001, "loss": 1.4214, "step": 10310 }, { "epoch": 1.1844236402274424, "grad_norm": 0.6033722162246704, "learning_rate": 0.0001, "loss": 1.7061, "step": 10311 }, { "epoch": 1.1845385101372696, "grad_norm": 0.535667359828949, "learning_rate": 0.0001, "loss": 1.5664, "step": 10312 }, { "epoch": 1.1846533800470966, "grad_norm": 0.5910223722457886, "learning_rate": 0.0001, "loss": 1.5376, "step": 10313 }, { "epoch": 1.1847682499569239, "grad_norm": 0.542930543422699, "learning_rate": 0.0001, "loss": 1.4848, "step": 10314 }, { "epoch": 1.1848831198667509, "grad_norm": 0.5212183594703674, "learning_rate": 0.0001, "loss": 1.5306, "step": 10315 }, { "epoch": 1.184997989776578, "grad_norm": 0.5621635317802429, "learning_rate": 0.0001, "loss": 1.5478, "step": 10316 }, { "epoch": 1.1851128596864051, "grad_norm": 0.6084299087524414, "learning_rate": 0.0001, "loss": 1.5851, "step": 10317 }, { "epoch": 1.1852277295962323, "grad_norm": 0.5304902195930481, "learning_rate": 0.0001, "loss": 1.3968, "step": 10318 }, { "epoch": 1.1853425995060594, "grad_norm": 0.5503674745559692, "learning_rate": 0.0001, "loss": 1.5019, "step": 10319 }, { "epoch": 1.1854574694158866, "grad_norm": 0.5336641073226929, "learning_rate": 0.0001, "loss": 1.3079, "step": 10320 }, { "epoch": 1.1855723393257136, "grad_norm": 0.6056388020515442, "learning_rate": 0.0001, "loss": 1.6839, "step": 10321 }, { "epoch": 1.1856872092355408, "grad_norm": 0.5721645355224609, "learning_rate": 0.0001, "loss": 1.4073, "step": 10322 }, { "epoch": 1.1858020791453678, "grad_norm": 0.6094290018081665, "learning_rate": 0.0001, "loss": 1.5212, "step": 10323 }, { "epoch": 1.185916949055195, "grad_norm": 0.558312177658081, "learning_rate": 0.0001, "loss": 1.4004, "step": 10324 }, { "epoch": 1.186031818965022, "grad_norm": 0.5515640377998352, "learning_rate": 0.0001, "loss": 1.4245, "step": 10325 }, { "epoch": 1.1861466888748493, "grad_norm": 0.6152453422546387, "learning_rate": 0.0001, "loss": 1.5651, "step": 10326 }, { "epoch": 1.1862615587846763, "grad_norm": 0.5758329629898071, "learning_rate": 0.0001, "loss": 1.3415, "step": 10327 }, { "epoch": 1.1863764286945035, "grad_norm": 0.5209949612617493, "learning_rate": 0.0001, "loss": 1.3905, "step": 10328 }, { "epoch": 1.1864912986043306, "grad_norm": 0.5001648664474487, "learning_rate": 0.0001, "loss": 1.31, "step": 10329 }, { "epoch": 1.1866061685141578, "grad_norm": 0.5593850612640381, "learning_rate": 0.0001, "loss": 1.518, "step": 10330 }, { "epoch": 1.1867210384239848, "grad_norm": 0.5500064492225647, "learning_rate": 0.0001, "loss": 1.4475, "step": 10331 }, { "epoch": 1.186835908333812, "grad_norm": 0.5763779282569885, "learning_rate": 0.0001, "loss": 1.3441, "step": 10332 }, { "epoch": 1.186950778243639, "grad_norm": 0.5549381375312805, "learning_rate": 0.0001, "loss": 1.5054, "step": 10333 }, { "epoch": 1.1870656481534663, "grad_norm": 0.5587765574455261, "learning_rate": 0.0001, "loss": 1.3678, "step": 10334 }, { "epoch": 1.1871805180632933, "grad_norm": 0.5837385654449463, "learning_rate": 0.0001, "loss": 1.5546, "step": 10335 }, { "epoch": 1.1872953879731205, "grad_norm": 0.5909486413002014, "learning_rate": 0.0001, "loss": 1.5224, "step": 10336 }, { "epoch": 1.1874102578829475, "grad_norm": 0.5553216338157654, "learning_rate": 0.0001, "loss": 1.4709, "step": 10337 }, { "epoch": 1.1875251277927747, "grad_norm": 0.5260200500488281, "learning_rate": 0.0001, "loss": 1.3144, "step": 10338 }, { "epoch": 1.1876399977026018, "grad_norm": 0.5829170346260071, "learning_rate": 0.0001, "loss": 1.6455, "step": 10339 }, { "epoch": 1.187754867612429, "grad_norm": 0.5121257305145264, "learning_rate": 0.0001, "loss": 1.3636, "step": 10340 }, { "epoch": 1.187869737522256, "grad_norm": 0.5356987714767456, "learning_rate": 0.0001, "loss": 1.2993, "step": 10341 }, { "epoch": 1.1879846074320832, "grad_norm": 0.5900508761405945, "learning_rate": 0.0001, "loss": 1.5197, "step": 10342 }, { "epoch": 1.1880994773419102, "grad_norm": 0.5535077452659607, "learning_rate": 0.0001, "loss": 1.5212, "step": 10343 }, { "epoch": 1.1882143472517375, "grad_norm": 0.5894918441772461, "learning_rate": 0.0001, "loss": 1.6822, "step": 10344 }, { "epoch": 1.1883292171615645, "grad_norm": 0.5290895104408264, "learning_rate": 0.0001, "loss": 1.4078, "step": 10345 }, { "epoch": 1.1884440870713917, "grad_norm": 0.5344952344894409, "learning_rate": 0.0001, "loss": 1.3112, "step": 10346 }, { "epoch": 1.1885589569812187, "grad_norm": 0.5518721342086792, "learning_rate": 0.0001, "loss": 1.386, "step": 10347 }, { "epoch": 1.188673826891046, "grad_norm": 0.5459201335906982, "learning_rate": 0.0001, "loss": 1.3449, "step": 10348 }, { "epoch": 1.188788696800873, "grad_norm": 0.5421302318572998, "learning_rate": 0.0001, "loss": 1.4268, "step": 10349 }, { "epoch": 1.1889035667107002, "grad_norm": 0.5335027575492859, "learning_rate": 0.0001, "loss": 1.4623, "step": 10350 }, { "epoch": 1.1890184366205272, "grad_norm": 0.5534794926643372, "learning_rate": 0.0001, "loss": 1.4581, "step": 10351 }, { "epoch": 1.1891333065303544, "grad_norm": 0.5756953954696655, "learning_rate": 0.0001, "loss": 1.4053, "step": 10352 }, { "epoch": 1.1892481764401814, "grad_norm": 0.6161737442016602, "learning_rate": 0.0001, "loss": 1.6299, "step": 10353 }, { "epoch": 1.1893630463500087, "grad_norm": 0.6521564722061157, "learning_rate": 0.0001, "loss": 1.5336, "step": 10354 }, { "epoch": 1.1894779162598357, "grad_norm": 0.5857300162315369, "learning_rate": 0.0001, "loss": 1.6375, "step": 10355 }, { "epoch": 1.189592786169663, "grad_norm": 0.6228790283203125, "learning_rate": 0.0001, "loss": 1.5937, "step": 10356 }, { "epoch": 1.18970765607949, "grad_norm": 0.5708868503570557, "learning_rate": 0.0001, "loss": 1.13, "step": 10357 }, { "epoch": 1.1898225259893171, "grad_norm": 0.6147711873054504, "learning_rate": 0.0001, "loss": 1.5533, "step": 10358 }, { "epoch": 1.1899373958991442, "grad_norm": 0.5267451405525208, "learning_rate": 0.0001, "loss": 1.4593, "step": 10359 }, { "epoch": 1.1900522658089714, "grad_norm": 0.5641406178474426, "learning_rate": 0.0001, "loss": 1.6509, "step": 10360 }, { "epoch": 1.1901671357187984, "grad_norm": 0.6026042699813843, "learning_rate": 0.0001, "loss": 1.5547, "step": 10361 }, { "epoch": 1.1902820056286256, "grad_norm": 0.5601838231086731, "learning_rate": 0.0001, "loss": 1.5328, "step": 10362 }, { "epoch": 1.1903968755384526, "grad_norm": 0.505669355392456, "learning_rate": 0.0001, "loss": 1.3774, "step": 10363 }, { "epoch": 1.1905117454482799, "grad_norm": 0.5833644270896912, "learning_rate": 0.0001, "loss": 1.5833, "step": 10364 }, { "epoch": 1.1906266153581069, "grad_norm": 0.5416715145111084, "learning_rate": 0.0001, "loss": 1.3589, "step": 10365 }, { "epoch": 1.190741485267934, "grad_norm": 0.5769158601760864, "learning_rate": 0.0001, "loss": 1.5507, "step": 10366 }, { "epoch": 1.1908563551777611, "grad_norm": 0.5252044200897217, "learning_rate": 0.0001, "loss": 1.2886, "step": 10367 }, { "epoch": 1.1909712250875883, "grad_norm": 0.5603752136230469, "learning_rate": 0.0001, "loss": 1.4126, "step": 10368 }, { "epoch": 1.1910860949974154, "grad_norm": 0.5847504138946533, "learning_rate": 0.0001, "loss": 1.4077, "step": 10369 }, { "epoch": 1.1912009649072426, "grad_norm": 0.5426949262619019, "learning_rate": 0.0001, "loss": 1.3664, "step": 10370 }, { "epoch": 1.1913158348170696, "grad_norm": 0.5475144386291504, "learning_rate": 0.0001, "loss": 1.5815, "step": 10371 }, { "epoch": 1.1914307047268968, "grad_norm": 0.6072627305984497, "learning_rate": 0.0001, "loss": 1.6914, "step": 10372 }, { "epoch": 1.1915455746367238, "grad_norm": 0.5702595710754395, "learning_rate": 0.0001, "loss": 1.4987, "step": 10373 }, { "epoch": 1.191660444546551, "grad_norm": 0.5092419385910034, "learning_rate": 0.0001, "loss": 1.454, "step": 10374 }, { "epoch": 1.191775314456378, "grad_norm": 0.5371357202529907, "learning_rate": 0.0001, "loss": 1.5396, "step": 10375 }, { "epoch": 1.1918901843662053, "grad_norm": 0.6508344411849976, "learning_rate": 0.0001, "loss": 1.4152, "step": 10376 }, { "epoch": 1.1920050542760323, "grad_norm": 0.5292761921882629, "learning_rate": 0.0001, "loss": 1.2684, "step": 10377 }, { "epoch": 1.1921199241858595, "grad_norm": 0.5798367857933044, "learning_rate": 0.0001, "loss": 1.6399, "step": 10378 }, { "epoch": 1.1922347940956866, "grad_norm": 0.5566953420639038, "learning_rate": 0.0001, "loss": 1.5344, "step": 10379 }, { "epoch": 1.1923496640055138, "grad_norm": 0.6179848909378052, "learning_rate": 0.0001, "loss": 1.6928, "step": 10380 }, { "epoch": 1.1924645339153408, "grad_norm": 0.5988585948944092, "learning_rate": 0.0001, "loss": 1.6972, "step": 10381 }, { "epoch": 1.192579403825168, "grad_norm": 0.6032318472862244, "learning_rate": 0.0001, "loss": 1.6165, "step": 10382 }, { "epoch": 1.192694273734995, "grad_norm": 0.5561224818229675, "learning_rate": 0.0001, "loss": 1.6675, "step": 10383 }, { "epoch": 1.1928091436448223, "grad_norm": 0.5613009333610535, "learning_rate": 0.0001, "loss": 1.5307, "step": 10384 }, { "epoch": 1.1929240135546493, "grad_norm": 0.5561355352401733, "learning_rate": 0.0001, "loss": 1.5057, "step": 10385 }, { "epoch": 1.1930388834644765, "grad_norm": 0.5625793933868408, "learning_rate": 0.0001, "loss": 1.397, "step": 10386 }, { "epoch": 1.1931537533743035, "grad_norm": 0.5584040284156799, "learning_rate": 0.0001, "loss": 1.529, "step": 10387 }, { "epoch": 1.1932686232841307, "grad_norm": 0.5510308146476746, "learning_rate": 0.0001, "loss": 1.3537, "step": 10388 }, { "epoch": 1.1933834931939578, "grad_norm": 0.5637242794036865, "learning_rate": 0.0001, "loss": 1.4676, "step": 10389 }, { "epoch": 1.193498363103785, "grad_norm": 0.604081392288208, "learning_rate": 0.0001, "loss": 1.4873, "step": 10390 }, { "epoch": 1.193613233013612, "grad_norm": 0.5362832546234131, "learning_rate": 0.0001, "loss": 1.2121, "step": 10391 }, { "epoch": 1.1937281029234392, "grad_norm": 0.6020187735557556, "learning_rate": 0.0001, "loss": 1.5522, "step": 10392 }, { "epoch": 1.1938429728332662, "grad_norm": 0.5550323724746704, "learning_rate": 0.0001, "loss": 1.3623, "step": 10393 }, { "epoch": 1.1939578427430935, "grad_norm": 0.6165339350700378, "learning_rate": 0.0001, "loss": 1.6928, "step": 10394 }, { "epoch": 1.1940727126529205, "grad_norm": 0.5294722318649292, "learning_rate": 0.0001, "loss": 1.3107, "step": 10395 }, { "epoch": 1.1941875825627477, "grad_norm": 0.5582475662231445, "learning_rate": 0.0001, "loss": 1.5955, "step": 10396 }, { "epoch": 1.1943024524725747, "grad_norm": 0.6636613607406616, "learning_rate": 0.0001, "loss": 1.2665, "step": 10397 }, { "epoch": 1.194417322382402, "grad_norm": 0.5724130868911743, "learning_rate": 0.0001, "loss": 1.4812, "step": 10398 }, { "epoch": 1.194532192292229, "grad_norm": 0.5657904148101807, "learning_rate": 0.0001, "loss": 1.5283, "step": 10399 }, { "epoch": 1.1946470622020562, "grad_norm": 0.5498049259185791, "learning_rate": 0.0001, "loss": 1.4346, "step": 10400 }, { "epoch": 1.1947619321118832, "grad_norm": 0.5673407912254333, "learning_rate": 0.0001, "loss": 1.3824, "step": 10401 }, { "epoch": 1.1948768020217104, "grad_norm": 0.609406054019928, "learning_rate": 0.0001, "loss": 1.6127, "step": 10402 }, { "epoch": 1.1949916719315374, "grad_norm": 0.5271400809288025, "learning_rate": 0.0001, "loss": 1.3876, "step": 10403 }, { "epoch": 1.1951065418413647, "grad_norm": 0.5551247596740723, "learning_rate": 0.0001, "loss": 1.4809, "step": 10404 }, { "epoch": 1.195221411751192, "grad_norm": 0.6490895748138428, "learning_rate": 0.0001, "loss": 1.5965, "step": 10405 }, { "epoch": 1.195336281661019, "grad_norm": 0.5571427941322327, "learning_rate": 0.0001, "loss": 1.4025, "step": 10406 }, { "epoch": 1.195451151570846, "grad_norm": 0.5582363605499268, "learning_rate": 0.0001, "loss": 1.4128, "step": 10407 }, { "epoch": 1.1955660214806731, "grad_norm": 0.5777066946029663, "learning_rate": 0.0001, "loss": 1.4456, "step": 10408 }, { "epoch": 1.1956808913905004, "grad_norm": 0.5664290189743042, "learning_rate": 0.0001, "loss": 1.4631, "step": 10409 }, { "epoch": 1.1957957613003274, "grad_norm": 0.6081710457801819, "learning_rate": 0.0001, "loss": 1.5054, "step": 10410 }, { "epoch": 1.1959106312101544, "grad_norm": 0.5311821103096008, "learning_rate": 0.0001, "loss": 1.1295, "step": 10411 }, { "epoch": 1.1960255011199816, "grad_norm": 0.5118587017059326, "learning_rate": 0.0001, "loss": 1.2627, "step": 10412 }, { "epoch": 1.1961403710298089, "grad_norm": 0.5950908660888672, "learning_rate": 0.0001, "loss": 1.5089, "step": 10413 }, { "epoch": 1.1962552409396359, "grad_norm": 0.5565327405929565, "learning_rate": 0.0001, "loss": 1.4616, "step": 10414 }, { "epoch": 1.1963701108494629, "grad_norm": 0.5890604853630066, "learning_rate": 0.0001, "loss": 1.4465, "step": 10415 }, { "epoch": 1.19648498075929, "grad_norm": 0.5750487446784973, "learning_rate": 0.0001, "loss": 1.5495, "step": 10416 }, { "epoch": 1.1965998506691173, "grad_norm": 0.5400042533874512, "learning_rate": 0.0001, "loss": 1.2722, "step": 10417 }, { "epoch": 1.1967147205789443, "grad_norm": 0.5932859778404236, "learning_rate": 0.0001, "loss": 1.6366, "step": 10418 }, { "epoch": 1.1968295904887714, "grad_norm": 0.5861518383026123, "learning_rate": 0.0001, "loss": 1.4621, "step": 10419 }, { "epoch": 1.1969444603985986, "grad_norm": 0.5601950883865356, "learning_rate": 0.0001, "loss": 1.4763, "step": 10420 }, { "epoch": 1.1970593303084258, "grad_norm": 0.5883669257164001, "learning_rate": 0.0001, "loss": 1.6461, "step": 10421 }, { "epoch": 1.1971742002182528, "grad_norm": 0.5568323135375977, "learning_rate": 0.0001, "loss": 1.6238, "step": 10422 }, { "epoch": 1.1972890701280798, "grad_norm": 0.5244744420051575, "learning_rate": 0.0001, "loss": 1.3016, "step": 10423 }, { "epoch": 1.197403940037907, "grad_norm": 0.5605520606040955, "learning_rate": 0.0001, "loss": 1.4803, "step": 10424 }, { "epoch": 1.1975188099477343, "grad_norm": 0.5980091691017151, "learning_rate": 0.0001, "loss": 1.5665, "step": 10425 }, { "epoch": 1.1976336798575613, "grad_norm": 0.607279896736145, "learning_rate": 0.0001, "loss": 1.5404, "step": 10426 }, { "epoch": 1.1977485497673883, "grad_norm": 0.5929856896400452, "learning_rate": 0.0001, "loss": 1.5402, "step": 10427 }, { "epoch": 1.1978634196772155, "grad_norm": 0.5797104835510254, "learning_rate": 0.0001, "loss": 1.4828, "step": 10428 }, { "epoch": 1.1979782895870428, "grad_norm": 0.5740180611610413, "learning_rate": 0.0001, "loss": 1.5289, "step": 10429 }, { "epoch": 1.1980931594968698, "grad_norm": 0.5844727158546448, "learning_rate": 0.0001, "loss": 1.4686, "step": 10430 }, { "epoch": 1.1982080294066968, "grad_norm": 0.542769193649292, "learning_rate": 0.0001, "loss": 1.5674, "step": 10431 }, { "epoch": 1.198322899316524, "grad_norm": 0.5846665501594543, "learning_rate": 0.0001, "loss": 1.4399, "step": 10432 }, { "epoch": 1.1984377692263513, "grad_norm": 0.5460920333862305, "learning_rate": 0.0001, "loss": 1.5411, "step": 10433 }, { "epoch": 1.1985526391361783, "grad_norm": 0.582313597202301, "learning_rate": 0.0001, "loss": 1.6056, "step": 10434 }, { "epoch": 1.1986675090460053, "grad_norm": 0.6199240684509277, "learning_rate": 0.0001, "loss": 1.4142, "step": 10435 }, { "epoch": 1.1987823789558325, "grad_norm": 0.6095178127288818, "learning_rate": 0.0001, "loss": 1.5953, "step": 10436 }, { "epoch": 1.1988972488656597, "grad_norm": 0.5635156631469727, "learning_rate": 0.0001, "loss": 1.2746, "step": 10437 }, { "epoch": 1.1990121187754867, "grad_norm": 0.5342568755149841, "learning_rate": 0.0001, "loss": 1.3511, "step": 10438 }, { "epoch": 1.199126988685314, "grad_norm": 0.5932930111885071, "learning_rate": 0.0001, "loss": 1.3836, "step": 10439 }, { "epoch": 1.199241858595141, "grad_norm": 0.5218791365623474, "learning_rate": 0.0001, "loss": 1.169, "step": 10440 }, { "epoch": 1.1993567285049682, "grad_norm": 0.5568528771400452, "learning_rate": 0.0001, "loss": 1.5939, "step": 10441 }, { "epoch": 1.1994715984147952, "grad_norm": 0.512910008430481, "learning_rate": 0.0001, "loss": 1.34, "step": 10442 }, { "epoch": 1.1995864683246225, "grad_norm": 0.5277653336524963, "learning_rate": 0.0001, "loss": 1.4526, "step": 10443 }, { "epoch": 1.1997013382344495, "grad_norm": 0.5597155094146729, "learning_rate": 0.0001, "loss": 1.3142, "step": 10444 }, { "epoch": 1.1998162081442767, "grad_norm": 0.5273895859718323, "learning_rate": 0.0001, "loss": 1.5123, "step": 10445 }, { "epoch": 1.1999310780541037, "grad_norm": 0.5639094114303589, "learning_rate": 0.0001, "loss": 1.2597, "step": 10446 }, { "epoch": 1.200045947963931, "grad_norm": 0.6367859244346619, "learning_rate": 0.0001, "loss": 1.6992, "step": 10447 }, { "epoch": 1.200160817873758, "grad_norm": 0.6159301400184631, "learning_rate": 0.0001, "loss": 1.475, "step": 10448 }, { "epoch": 1.2002756877835852, "grad_norm": 0.5858900547027588, "learning_rate": 0.0001, "loss": 1.4077, "step": 10449 }, { "epoch": 1.2003905576934122, "grad_norm": 0.5637074112892151, "learning_rate": 0.0001, "loss": 1.4567, "step": 10450 }, { "epoch": 1.2005054276032394, "grad_norm": 0.5393563508987427, "learning_rate": 0.0001, "loss": 1.3936, "step": 10451 }, { "epoch": 1.2006202975130664, "grad_norm": 0.530194103717804, "learning_rate": 0.0001, "loss": 1.4639, "step": 10452 }, { "epoch": 1.2007351674228937, "grad_norm": 0.5299816727638245, "learning_rate": 0.0001, "loss": 1.4348, "step": 10453 }, { "epoch": 1.2008500373327207, "grad_norm": 0.5166096687316895, "learning_rate": 0.0001, "loss": 1.331, "step": 10454 }, { "epoch": 1.200964907242548, "grad_norm": 0.5923041105270386, "learning_rate": 0.0001, "loss": 1.4583, "step": 10455 }, { "epoch": 1.201079777152375, "grad_norm": 0.5478973388671875, "learning_rate": 0.0001, "loss": 1.3418, "step": 10456 }, { "epoch": 1.2011946470622021, "grad_norm": 0.5634967684745789, "learning_rate": 0.0001, "loss": 1.3768, "step": 10457 }, { "epoch": 1.2013095169720291, "grad_norm": 0.5654754042625427, "learning_rate": 0.0001, "loss": 1.6154, "step": 10458 }, { "epoch": 1.2014243868818564, "grad_norm": 0.6268435716629028, "learning_rate": 0.0001, "loss": 1.6234, "step": 10459 }, { "epoch": 1.2015392567916834, "grad_norm": 0.5597686171531677, "learning_rate": 0.0001, "loss": 1.2409, "step": 10460 }, { "epoch": 1.2016541267015106, "grad_norm": 0.55586177110672, "learning_rate": 0.0001, "loss": 1.4184, "step": 10461 }, { "epoch": 1.2017689966113376, "grad_norm": 0.6155359745025635, "learning_rate": 0.0001, "loss": 1.5127, "step": 10462 }, { "epoch": 1.2018838665211649, "grad_norm": 0.6004906296730042, "learning_rate": 0.0001, "loss": 1.4996, "step": 10463 }, { "epoch": 1.2019987364309919, "grad_norm": 0.5957759022712708, "learning_rate": 0.0001, "loss": 1.6768, "step": 10464 }, { "epoch": 1.202113606340819, "grad_norm": 0.5964832901954651, "learning_rate": 0.0001, "loss": 1.5759, "step": 10465 }, { "epoch": 1.202228476250646, "grad_norm": 0.6004605889320374, "learning_rate": 0.0001, "loss": 1.6343, "step": 10466 }, { "epoch": 1.2023433461604733, "grad_norm": 0.5793516635894775, "learning_rate": 0.0001, "loss": 1.5425, "step": 10467 }, { "epoch": 1.2024582160703003, "grad_norm": 0.565511167049408, "learning_rate": 0.0001, "loss": 1.3453, "step": 10468 }, { "epoch": 1.2025730859801276, "grad_norm": 0.6401596069335938, "learning_rate": 0.0001, "loss": 1.5273, "step": 10469 }, { "epoch": 1.2026879558899546, "grad_norm": 0.6024700999259949, "learning_rate": 0.0001, "loss": 1.6374, "step": 10470 }, { "epoch": 1.2028028257997818, "grad_norm": 0.5722721219062805, "learning_rate": 0.0001, "loss": 1.6399, "step": 10471 }, { "epoch": 1.2029176957096088, "grad_norm": 0.5830640196800232, "learning_rate": 0.0001, "loss": 1.4463, "step": 10472 }, { "epoch": 1.203032565619436, "grad_norm": 0.5298201441764832, "learning_rate": 0.0001, "loss": 1.4246, "step": 10473 }, { "epoch": 1.203147435529263, "grad_norm": 0.5793371796607971, "learning_rate": 0.0001, "loss": 1.4691, "step": 10474 }, { "epoch": 1.2032623054390903, "grad_norm": 0.5723811984062195, "learning_rate": 0.0001, "loss": 1.507, "step": 10475 }, { "epoch": 1.2033771753489173, "grad_norm": 0.5795249342918396, "learning_rate": 0.0001, "loss": 1.398, "step": 10476 }, { "epoch": 1.2034920452587445, "grad_norm": 0.5610243678092957, "learning_rate": 0.0001, "loss": 1.6885, "step": 10477 }, { "epoch": 1.2036069151685715, "grad_norm": 0.6054256558418274, "learning_rate": 0.0001, "loss": 1.6161, "step": 10478 }, { "epoch": 1.2037217850783988, "grad_norm": 0.5674859881401062, "learning_rate": 0.0001, "loss": 1.4247, "step": 10479 }, { "epoch": 1.2038366549882258, "grad_norm": 0.5590078830718994, "learning_rate": 0.0001, "loss": 1.4417, "step": 10480 }, { "epoch": 1.203951524898053, "grad_norm": 0.5892341732978821, "learning_rate": 0.0001, "loss": 1.5407, "step": 10481 }, { "epoch": 1.20406639480788, "grad_norm": 0.5569952130317688, "learning_rate": 0.0001, "loss": 1.324, "step": 10482 }, { "epoch": 1.2041812647177073, "grad_norm": 0.5327668190002441, "learning_rate": 0.0001, "loss": 1.369, "step": 10483 }, { "epoch": 1.2042961346275343, "grad_norm": 0.5070328116416931, "learning_rate": 0.0001, "loss": 1.2728, "step": 10484 }, { "epoch": 1.2044110045373615, "grad_norm": 0.5609564781188965, "learning_rate": 0.0001, "loss": 1.4026, "step": 10485 }, { "epoch": 1.2045258744471885, "grad_norm": 0.5414237380027771, "learning_rate": 0.0001, "loss": 1.3149, "step": 10486 }, { "epoch": 1.2046407443570157, "grad_norm": 0.5648483633995056, "learning_rate": 0.0001, "loss": 1.5108, "step": 10487 }, { "epoch": 1.2047556142668427, "grad_norm": 0.5687791705131531, "learning_rate": 0.0001, "loss": 1.5176, "step": 10488 }, { "epoch": 1.20487048417667, "grad_norm": 0.5945368409156799, "learning_rate": 0.0001, "loss": 1.4052, "step": 10489 }, { "epoch": 1.204985354086497, "grad_norm": 0.5893955826759338, "learning_rate": 0.0001, "loss": 1.6138, "step": 10490 }, { "epoch": 1.2051002239963242, "grad_norm": 0.5878959894180298, "learning_rate": 0.0001, "loss": 1.5193, "step": 10491 }, { "epoch": 1.2052150939061512, "grad_norm": 0.5943409204483032, "learning_rate": 0.0001, "loss": 1.3367, "step": 10492 }, { "epoch": 1.2053299638159785, "grad_norm": 0.5430295467376709, "learning_rate": 0.0001, "loss": 1.46, "step": 10493 }, { "epoch": 1.2054448337258055, "grad_norm": 0.5535471439361572, "learning_rate": 0.0001, "loss": 1.3129, "step": 10494 }, { "epoch": 1.2055597036356327, "grad_norm": 0.540138840675354, "learning_rate": 0.0001, "loss": 1.3614, "step": 10495 }, { "epoch": 1.2056745735454597, "grad_norm": 0.6055250763893127, "learning_rate": 0.0001, "loss": 1.5851, "step": 10496 }, { "epoch": 1.205789443455287, "grad_norm": 0.6143799424171448, "learning_rate": 0.0001, "loss": 1.5218, "step": 10497 }, { "epoch": 1.205904313365114, "grad_norm": 0.5408201813697815, "learning_rate": 0.0001, "loss": 1.4474, "step": 10498 }, { "epoch": 1.2060191832749412, "grad_norm": 0.5724786520004272, "learning_rate": 0.0001, "loss": 1.5761, "step": 10499 }, { "epoch": 1.2061340531847682, "grad_norm": 0.523482620716095, "learning_rate": 0.0001, "loss": 1.155, "step": 10500 }, { "epoch": 1.2062489230945954, "grad_norm": 0.5779107809066772, "learning_rate": 0.0001, "loss": 1.5714, "step": 10501 }, { "epoch": 1.2063637930044224, "grad_norm": 0.5509933233261108, "learning_rate": 0.0001, "loss": 1.4959, "step": 10502 }, { "epoch": 1.2064786629142497, "grad_norm": 0.55925053358078, "learning_rate": 0.0001, "loss": 1.2454, "step": 10503 }, { "epoch": 1.2065935328240767, "grad_norm": 0.5909457802772522, "learning_rate": 0.0001, "loss": 1.4099, "step": 10504 }, { "epoch": 1.206708402733904, "grad_norm": 0.6092385649681091, "learning_rate": 0.0001, "loss": 1.5369, "step": 10505 }, { "epoch": 1.206823272643731, "grad_norm": 0.5715480446815491, "learning_rate": 0.0001, "loss": 1.4217, "step": 10506 }, { "epoch": 1.2069381425535581, "grad_norm": 0.5696176290512085, "learning_rate": 0.0001, "loss": 1.4611, "step": 10507 }, { "epoch": 1.2070530124633851, "grad_norm": 0.5600457787513733, "learning_rate": 0.0001, "loss": 1.3629, "step": 10508 }, { "epoch": 1.2071678823732124, "grad_norm": 0.5562861561775208, "learning_rate": 0.0001, "loss": 1.6141, "step": 10509 }, { "epoch": 1.2072827522830394, "grad_norm": 0.5927658081054688, "learning_rate": 0.0001, "loss": 1.6618, "step": 10510 }, { "epoch": 1.2073976221928666, "grad_norm": 0.5737072825431824, "learning_rate": 0.0001, "loss": 1.5495, "step": 10511 }, { "epoch": 1.2075124921026936, "grad_norm": 0.5820686221122742, "learning_rate": 0.0001, "loss": 1.4886, "step": 10512 }, { "epoch": 1.2076273620125209, "grad_norm": 0.5760904550552368, "learning_rate": 0.0001, "loss": 1.3553, "step": 10513 }, { "epoch": 1.2077422319223479, "grad_norm": 0.5971365571022034, "learning_rate": 0.0001, "loss": 1.6553, "step": 10514 }, { "epoch": 1.207857101832175, "grad_norm": 0.5970271825790405, "learning_rate": 0.0001, "loss": 1.581, "step": 10515 }, { "epoch": 1.207971971742002, "grad_norm": 0.5633013248443604, "learning_rate": 0.0001, "loss": 1.3834, "step": 10516 }, { "epoch": 1.2080868416518293, "grad_norm": 0.5619766116142273, "learning_rate": 0.0001, "loss": 1.5396, "step": 10517 }, { "epoch": 1.2082017115616563, "grad_norm": 0.5610624551773071, "learning_rate": 0.0001, "loss": 1.456, "step": 10518 }, { "epoch": 1.2083165814714836, "grad_norm": 0.5785056948661804, "learning_rate": 0.0001, "loss": 1.5337, "step": 10519 }, { "epoch": 1.2084314513813106, "grad_norm": 0.6023985147476196, "learning_rate": 0.0001, "loss": 1.4268, "step": 10520 }, { "epoch": 1.2085463212911378, "grad_norm": 0.6198663115501404, "learning_rate": 0.0001, "loss": 1.4412, "step": 10521 }, { "epoch": 1.2086611912009648, "grad_norm": 0.5379393100738525, "learning_rate": 0.0001, "loss": 1.5346, "step": 10522 }, { "epoch": 1.208776061110792, "grad_norm": 0.5652551651000977, "learning_rate": 0.0001, "loss": 1.3385, "step": 10523 }, { "epoch": 1.208890931020619, "grad_norm": 0.5869433879852295, "learning_rate": 0.0001, "loss": 1.4932, "step": 10524 }, { "epoch": 1.2090058009304463, "grad_norm": 0.5975571870803833, "learning_rate": 0.0001, "loss": 1.4304, "step": 10525 }, { "epoch": 1.2091206708402733, "grad_norm": 0.5863459706306458, "learning_rate": 0.0001, "loss": 1.4973, "step": 10526 }, { "epoch": 1.2092355407501005, "grad_norm": 0.5686241984367371, "learning_rate": 0.0001, "loss": 1.4045, "step": 10527 }, { "epoch": 1.2093504106599275, "grad_norm": 0.6071587204933167, "learning_rate": 0.0001, "loss": 1.438, "step": 10528 }, { "epoch": 1.2094652805697548, "grad_norm": 0.5743688344955444, "learning_rate": 0.0001, "loss": 1.388, "step": 10529 }, { "epoch": 1.2095801504795818, "grad_norm": 0.5639120936393738, "learning_rate": 0.0001, "loss": 1.538, "step": 10530 }, { "epoch": 1.209695020389409, "grad_norm": 0.5790785551071167, "learning_rate": 0.0001, "loss": 1.5912, "step": 10531 }, { "epoch": 1.209809890299236, "grad_norm": 0.5463698506355286, "learning_rate": 0.0001, "loss": 1.3599, "step": 10532 }, { "epoch": 1.2099247602090633, "grad_norm": 0.5384880304336548, "learning_rate": 0.0001, "loss": 1.5287, "step": 10533 }, { "epoch": 1.2100396301188903, "grad_norm": 0.5078497529029846, "learning_rate": 0.0001, "loss": 1.4054, "step": 10534 }, { "epoch": 1.2101545000287175, "grad_norm": 0.5871212482452393, "learning_rate": 0.0001, "loss": 1.5067, "step": 10535 }, { "epoch": 1.2102693699385445, "grad_norm": 0.5731205344200134, "learning_rate": 0.0001, "loss": 1.6316, "step": 10536 }, { "epoch": 1.2103842398483717, "grad_norm": 0.5511032938957214, "learning_rate": 0.0001, "loss": 1.5654, "step": 10537 }, { "epoch": 1.2104991097581987, "grad_norm": 0.6102611422538757, "learning_rate": 0.0001, "loss": 1.5906, "step": 10538 }, { "epoch": 1.210613979668026, "grad_norm": 0.5358269214630127, "learning_rate": 0.0001, "loss": 1.2765, "step": 10539 }, { "epoch": 1.210728849577853, "grad_norm": 0.5827724933624268, "learning_rate": 0.0001, "loss": 1.4465, "step": 10540 }, { "epoch": 1.2108437194876802, "grad_norm": 0.6176273822784424, "learning_rate": 0.0001, "loss": 1.4566, "step": 10541 }, { "epoch": 1.2109585893975074, "grad_norm": 0.6200592517852783, "learning_rate": 0.0001, "loss": 1.7385, "step": 10542 }, { "epoch": 1.2110734593073345, "grad_norm": 0.5881513357162476, "learning_rate": 0.0001, "loss": 1.4508, "step": 10543 }, { "epoch": 1.2111883292171615, "grad_norm": 0.5368597507476807, "learning_rate": 0.0001, "loss": 1.2807, "step": 10544 }, { "epoch": 1.2113031991269887, "grad_norm": 0.5737367868423462, "learning_rate": 0.0001, "loss": 1.3851, "step": 10545 }, { "epoch": 1.211418069036816, "grad_norm": 0.5376740097999573, "learning_rate": 0.0001, "loss": 1.5095, "step": 10546 }, { "epoch": 1.211532938946643, "grad_norm": 0.5448538064956665, "learning_rate": 0.0001, "loss": 1.4943, "step": 10547 }, { "epoch": 1.21164780885647, "grad_norm": 0.5198777914047241, "learning_rate": 0.0001, "loss": 1.4856, "step": 10548 }, { "epoch": 1.2117626787662972, "grad_norm": 0.5620352625846863, "learning_rate": 0.0001, "loss": 1.4172, "step": 10549 }, { "epoch": 1.2118775486761244, "grad_norm": 0.6086106896400452, "learning_rate": 0.0001, "loss": 1.3698, "step": 10550 }, { "epoch": 1.2119924185859514, "grad_norm": 0.5318285822868347, "learning_rate": 0.0001, "loss": 1.1826, "step": 10551 }, { "epoch": 1.2121072884957784, "grad_norm": 0.6011495590209961, "learning_rate": 0.0001, "loss": 1.6163, "step": 10552 }, { "epoch": 1.2122221584056057, "grad_norm": 0.536434531211853, "learning_rate": 0.0001, "loss": 1.3322, "step": 10553 }, { "epoch": 1.2123370283154329, "grad_norm": 0.57552170753479, "learning_rate": 0.0001, "loss": 1.7569, "step": 10554 }, { "epoch": 1.21245189822526, "grad_norm": 0.5894425511360168, "learning_rate": 0.0001, "loss": 1.4857, "step": 10555 }, { "epoch": 1.212566768135087, "grad_norm": 0.6130958795547485, "learning_rate": 0.0001, "loss": 1.7492, "step": 10556 }, { "epoch": 1.2126816380449141, "grad_norm": 0.6135485172271729, "learning_rate": 0.0001, "loss": 1.5469, "step": 10557 }, { "epoch": 1.2127965079547414, "grad_norm": 0.5616319179534912, "learning_rate": 0.0001, "loss": 1.4481, "step": 10558 }, { "epoch": 1.2129113778645684, "grad_norm": 0.567959725856781, "learning_rate": 0.0001, "loss": 1.5242, "step": 10559 }, { "epoch": 1.2130262477743954, "grad_norm": 0.5984885692596436, "learning_rate": 0.0001, "loss": 1.6178, "step": 10560 }, { "epoch": 1.2131411176842226, "grad_norm": 0.5240800380706787, "learning_rate": 0.0001, "loss": 1.4834, "step": 10561 }, { "epoch": 1.2132559875940498, "grad_norm": 0.5749354362487793, "learning_rate": 0.0001, "loss": 1.4552, "step": 10562 }, { "epoch": 1.2133708575038769, "grad_norm": 0.5432642698287964, "learning_rate": 0.0001, "loss": 1.5985, "step": 10563 }, { "epoch": 1.2134857274137039, "grad_norm": 0.5396465063095093, "learning_rate": 0.0001, "loss": 1.4983, "step": 10564 }, { "epoch": 1.213600597323531, "grad_norm": 0.5398960113525391, "learning_rate": 0.0001, "loss": 1.3812, "step": 10565 }, { "epoch": 1.2137154672333583, "grad_norm": 0.5659125447273254, "learning_rate": 0.0001, "loss": 1.5102, "step": 10566 }, { "epoch": 1.2138303371431853, "grad_norm": 0.5628279447555542, "learning_rate": 0.0001, "loss": 1.2602, "step": 10567 }, { "epoch": 1.2139452070530123, "grad_norm": 0.5381669998168945, "learning_rate": 0.0001, "loss": 1.4341, "step": 10568 }, { "epoch": 1.2140600769628396, "grad_norm": 0.6219281554222107, "learning_rate": 0.0001, "loss": 1.5175, "step": 10569 }, { "epoch": 1.2141749468726668, "grad_norm": 0.5203022956848145, "learning_rate": 0.0001, "loss": 1.3109, "step": 10570 }, { "epoch": 1.2142898167824938, "grad_norm": 0.6202723383903503, "learning_rate": 0.0001, "loss": 1.5858, "step": 10571 }, { "epoch": 1.2144046866923208, "grad_norm": 0.6326547861099243, "learning_rate": 0.0001, "loss": 1.4975, "step": 10572 }, { "epoch": 1.214519556602148, "grad_norm": 0.5709686875343323, "learning_rate": 0.0001, "loss": 1.3928, "step": 10573 }, { "epoch": 1.2146344265119753, "grad_norm": 0.5582396984100342, "learning_rate": 0.0001, "loss": 1.4887, "step": 10574 }, { "epoch": 1.2147492964218023, "grad_norm": 0.678507387638092, "learning_rate": 0.0001, "loss": 1.7535, "step": 10575 }, { "epoch": 1.2148641663316295, "grad_norm": 0.5705639123916626, "learning_rate": 0.0001, "loss": 1.6121, "step": 10576 }, { "epoch": 1.2149790362414565, "grad_norm": 0.5790746212005615, "learning_rate": 0.0001, "loss": 1.6152, "step": 10577 }, { "epoch": 1.2150939061512838, "grad_norm": 0.5600179433822632, "learning_rate": 0.0001, "loss": 1.4223, "step": 10578 }, { "epoch": 1.2152087760611108, "grad_norm": 0.5656793713569641, "learning_rate": 0.0001, "loss": 1.4859, "step": 10579 }, { "epoch": 1.215323645970938, "grad_norm": 0.537078857421875, "learning_rate": 0.0001, "loss": 1.4168, "step": 10580 }, { "epoch": 1.215438515880765, "grad_norm": 0.5688410401344299, "learning_rate": 0.0001, "loss": 1.5327, "step": 10581 }, { "epoch": 1.2155533857905922, "grad_norm": 0.6211932897567749, "learning_rate": 0.0001, "loss": 1.5774, "step": 10582 }, { "epoch": 1.2156682557004193, "grad_norm": 0.5459011793136597, "learning_rate": 0.0001, "loss": 1.3783, "step": 10583 }, { "epoch": 1.2157831256102465, "grad_norm": 0.540130615234375, "learning_rate": 0.0001, "loss": 1.35, "step": 10584 }, { "epoch": 1.2158979955200735, "grad_norm": 0.5064961910247803, "learning_rate": 0.0001, "loss": 1.3146, "step": 10585 }, { "epoch": 1.2160128654299007, "grad_norm": 0.5856577157974243, "learning_rate": 0.0001, "loss": 1.5325, "step": 10586 }, { "epoch": 1.2161277353397277, "grad_norm": 0.5436236262321472, "learning_rate": 0.0001, "loss": 1.2324, "step": 10587 }, { "epoch": 1.216242605249555, "grad_norm": 0.5454403162002563, "learning_rate": 0.0001, "loss": 1.5701, "step": 10588 }, { "epoch": 1.216357475159382, "grad_norm": 0.5347267985343933, "learning_rate": 0.0001, "loss": 1.574, "step": 10589 }, { "epoch": 1.2164723450692092, "grad_norm": 0.5841572284698486, "learning_rate": 0.0001, "loss": 1.4605, "step": 10590 }, { "epoch": 1.2165872149790362, "grad_norm": 0.5742143988609314, "learning_rate": 0.0001, "loss": 1.6305, "step": 10591 }, { "epoch": 1.2167020848888634, "grad_norm": 0.5593157410621643, "learning_rate": 0.0001, "loss": 1.5328, "step": 10592 }, { "epoch": 1.2168169547986905, "grad_norm": 0.58852219581604, "learning_rate": 0.0001, "loss": 1.4253, "step": 10593 }, { "epoch": 1.2169318247085177, "grad_norm": 0.5854587554931641, "learning_rate": 0.0001, "loss": 1.4844, "step": 10594 }, { "epoch": 1.2170466946183447, "grad_norm": 0.6186012625694275, "learning_rate": 0.0001, "loss": 1.5365, "step": 10595 }, { "epoch": 1.217161564528172, "grad_norm": 0.5782433748245239, "learning_rate": 0.0001, "loss": 1.5929, "step": 10596 }, { "epoch": 1.217276434437999, "grad_norm": 0.5456607341766357, "learning_rate": 0.0001, "loss": 1.383, "step": 10597 }, { "epoch": 1.2173913043478262, "grad_norm": 0.5986420512199402, "learning_rate": 0.0001, "loss": 1.746, "step": 10598 }, { "epoch": 1.2175061742576532, "grad_norm": 0.5691726207733154, "learning_rate": 0.0001, "loss": 1.4222, "step": 10599 }, { "epoch": 1.2176210441674804, "grad_norm": 0.5510737299919128, "learning_rate": 0.0001, "loss": 1.6671, "step": 10600 }, { "epoch": 1.2177359140773074, "grad_norm": 0.528681755065918, "learning_rate": 0.0001, "loss": 1.2332, "step": 10601 }, { "epoch": 1.2178507839871346, "grad_norm": 0.5856773853302002, "learning_rate": 0.0001, "loss": 1.3116, "step": 10602 }, { "epoch": 1.2179656538969617, "grad_norm": 0.6299785375595093, "learning_rate": 0.0001, "loss": 1.3567, "step": 10603 }, { "epoch": 1.2180805238067889, "grad_norm": 0.5841706395149231, "learning_rate": 0.0001, "loss": 1.2655, "step": 10604 }, { "epoch": 1.218195393716616, "grad_norm": 0.5885977745056152, "learning_rate": 0.0001, "loss": 1.4947, "step": 10605 }, { "epoch": 1.2183102636264431, "grad_norm": 0.5559094548225403, "learning_rate": 0.0001, "loss": 1.2172, "step": 10606 }, { "epoch": 1.2184251335362701, "grad_norm": 0.5585634112358093, "learning_rate": 0.0001, "loss": 1.504, "step": 10607 }, { "epoch": 1.2185400034460974, "grad_norm": 0.5708191394805908, "learning_rate": 0.0001, "loss": 1.549, "step": 10608 }, { "epoch": 1.2186548733559244, "grad_norm": 0.5848360657691956, "learning_rate": 0.0001, "loss": 1.6258, "step": 10609 }, { "epoch": 1.2187697432657516, "grad_norm": 0.702562153339386, "learning_rate": 0.0001, "loss": 1.6419, "step": 10610 }, { "epoch": 1.2188846131755786, "grad_norm": 0.5700719952583313, "learning_rate": 0.0001, "loss": 1.5087, "step": 10611 }, { "epoch": 1.2189994830854058, "grad_norm": 0.5429587960243225, "learning_rate": 0.0001, "loss": 1.4472, "step": 10612 }, { "epoch": 1.2191143529952329, "grad_norm": 0.5469475388526917, "learning_rate": 0.0001, "loss": 1.5313, "step": 10613 }, { "epoch": 1.21922922290506, "grad_norm": 0.5817015171051025, "learning_rate": 0.0001, "loss": 1.4517, "step": 10614 }, { "epoch": 1.219344092814887, "grad_norm": 0.6177971363067627, "learning_rate": 0.0001, "loss": 1.3462, "step": 10615 }, { "epoch": 1.2194589627247143, "grad_norm": 0.5310166478157043, "learning_rate": 0.0001, "loss": 1.3836, "step": 10616 }, { "epoch": 1.2195738326345413, "grad_norm": 0.5617627501487732, "learning_rate": 0.0001, "loss": 1.4244, "step": 10617 }, { "epoch": 1.2196887025443686, "grad_norm": 0.6105504035949707, "learning_rate": 0.0001, "loss": 1.3383, "step": 10618 }, { "epoch": 1.2198035724541956, "grad_norm": 0.5250139832496643, "learning_rate": 0.0001, "loss": 1.4938, "step": 10619 }, { "epoch": 1.2199184423640228, "grad_norm": 0.5927942991256714, "learning_rate": 0.0001, "loss": 1.5573, "step": 10620 }, { "epoch": 1.2200333122738498, "grad_norm": 0.6836938858032227, "learning_rate": 0.0001, "loss": 1.6504, "step": 10621 }, { "epoch": 1.220148182183677, "grad_norm": 0.6172232627868652, "learning_rate": 0.0001, "loss": 1.5157, "step": 10622 }, { "epoch": 1.220263052093504, "grad_norm": 0.5897935032844543, "learning_rate": 0.0001, "loss": 1.3833, "step": 10623 }, { "epoch": 1.2203779220033313, "grad_norm": 0.5973081588745117, "learning_rate": 0.0001, "loss": 1.3658, "step": 10624 }, { "epoch": 1.2204927919131583, "grad_norm": 0.6020708680152893, "learning_rate": 0.0001, "loss": 1.4735, "step": 10625 }, { "epoch": 1.2206076618229855, "grad_norm": 0.5734338164329529, "learning_rate": 0.0001, "loss": 1.5679, "step": 10626 }, { "epoch": 1.2207225317328125, "grad_norm": 0.6005342602729797, "learning_rate": 0.0001, "loss": 1.6655, "step": 10627 }, { "epoch": 1.2208374016426398, "grad_norm": 0.6020346283912659, "learning_rate": 0.0001, "loss": 1.4883, "step": 10628 }, { "epoch": 1.2209522715524668, "grad_norm": 0.5923011302947998, "learning_rate": 0.0001, "loss": 1.471, "step": 10629 }, { "epoch": 1.221067141462294, "grad_norm": 0.5160320997238159, "learning_rate": 0.0001, "loss": 1.4516, "step": 10630 }, { "epoch": 1.221182011372121, "grad_norm": 0.6067989468574524, "learning_rate": 0.0001, "loss": 1.3357, "step": 10631 }, { "epoch": 1.2212968812819482, "grad_norm": 0.5493841767311096, "learning_rate": 0.0001, "loss": 1.0806, "step": 10632 }, { "epoch": 1.2214117511917753, "grad_norm": 0.6001129150390625, "learning_rate": 0.0001, "loss": 1.6477, "step": 10633 }, { "epoch": 1.2215266211016025, "grad_norm": 0.5696105360984802, "learning_rate": 0.0001, "loss": 1.4366, "step": 10634 }, { "epoch": 1.2216414910114295, "grad_norm": 0.5546556711196899, "learning_rate": 0.0001, "loss": 1.224, "step": 10635 }, { "epoch": 1.2217563609212567, "grad_norm": 0.5779848098754883, "learning_rate": 0.0001, "loss": 1.4388, "step": 10636 }, { "epoch": 1.2218712308310837, "grad_norm": 0.6062044501304626, "learning_rate": 0.0001, "loss": 1.4777, "step": 10637 }, { "epoch": 1.221986100740911, "grad_norm": 0.530096173286438, "learning_rate": 0.0001, "loss": 1.5618, "step": 10638 }, { "epoch": 1.222100970650738, "grad_norm": 0.5306719541549683, "learning_rate": 0.0001, "loss": 1.4092, "step": 10639 }, { "epoch": 1.2222158405605652, "grad_norm": 0.5447390675544739, "learning_rate": 0.0001, "loss": 1.3903, "step": 10640 }, { "epoch": 1.2223307104703922, "grad_norm": 0.5845821499824524, "learning_rate": 0.0001, "loss": 1.5732, "step": 10641 }, { "epoch": 1.2224455803802194, "grad_norm": 0.5478098392486572, "learning_rate": 0.0001, "loss": 1.3178, "step": 10642 }, { "epoch": 1.2225604502900465, "grad_norm": 0.5850222706794739, "learning_rate": 0.0001, "loss": 1.5655, "step": 10643 }, { "epoch": 1.2226753201998737, "grad_norm": 0.533097505569458, "learning_rate": 0.0001, "loss": 1.3787, "step": 10644 }, { "epoch": 1.2227901901097007, "grad_norm": 0.6224300861358643, "learning_rate": 0.0001, "loss": 1.6143, "step": 10645 }, { "epoch": 1.222905060019528, "grad_norm": 0.5365055203437805, "learning_rate": 0.0001, "loss": 1.3455, "step": 10646 }, { "epoch": 1.223019929929355, "grad_norm": 0.5840891599655151, "learning_rate": 0.0001, "loss": 1.5427, "step": 10647 }, { "epoch": 1.2231347998391822, "grad_norm": 0.5483365654945374, "learning_rate": 0.0001, "loss": 1.4997, "step": 10648 }, { "epoch": 1.2232496697490092, "grad_norm": 0.5992576479911804, "learning_rate": 0.0001, "loss": 1.4718, "step": 10649 }, { "epoch": 1.2233645396588364, "grad_norm": 0.5553631782531738, "learning_rate": 0.0001, "loss": 1.4211, "step": 10650 }, { "epoch": 1.2234794095686634, "grad_norm": 0.5532668232917786, "learning_rate": 0.0001, "loss": 1.526, "step": 10651 }, { "epoch": 1.2235942794784906, "grad_norm": 0.6031765341758728, "learning_rate": 0.0001, "loss": 1.4622, "step": 10652 }, { "epoch": 1.2237091493883177, "grad_norm": 0.5665226578712463, "learning_rate": 0.0001, "loss": 1.4103, "step": 10653 }, { "epoch": 1.2238240192981449, "grad_norm": 0.5733132362365723, "learning_rate": 0.0001, "loss": 1.451, "step": 10654 }, { "epoch": 1.223938889207972, "grad_norm": 0.5637727379798889, "learning_rate": 0.0001, "loss": 1.4781, "step": 10655 }, { "epoch": 1.2240537591177991, "grad_norm": 0.5689738392829895, "learning_rate": 0.0001, "loss": 1.4647, "step": 10656 }, { "epoch": 1.2241686290276261, "grad_norm": 0.5613055229187012, "learning_rate": 0.0001, "loss": 1.5456, "step": 10657 }, { "epoch": 1.2242834989374534, "grad_norm": 0.6242927312850952, "learning_rate": 0.0001, "loss": 1.6494, "step": 10658 }, { "epoch": 1.2243983688472804, "grad_norm": 0.558158278465271, "learning_rate": 0.0001, "loss": 1.5788, "step": 10659 }, { "epoch": 1.2245132387571076, "grad_norm": 0.5290167331695557, "learning_rate": 0.0001, "loss": 1.3642, "step": 10660 }, { "epoch": 1.2246281086669346, "grad_norm": 0.530127763748169, "learning_rate": 0.0001, "loss": 1.448, "step": 10661 }, { "epoch": 1.2247429785767618, "grad_norm": 0.6060307621955872, "learning_rate": 0.0001, "loss": 1.5438, "step": 10662 }, { "epoch": 1.2248578484865889, "grad_norm": 0.5839899182319641, "learning_rate": 0.0001, "loss": 1.5879, "step": 10663 }, { "epoch": 1.224972718396416, "grad_norm": 0.5494903326034546, "learning_rate": 0.0001, "loss": 1.5572, "step": 10664 }, { "epoch": 1.225087588306243, "grad_norm": 0.5795137882232666, "learning_rate": 0.0001, "loss": 1.4283, "step": 10665 }, { "epoch": 1.2252024582160703, "grad_norm": 0.5595057606697083, "learning_rate": 0.0001, "loss": 1.3211, "step": 10666 }, { "epoch": 1.2253173281258973, "grad_norm": 0.535707414150238, "learning_rate": 0.0001, "loss": 1.3843, "step": 10667 }, { "epoch": 1.2254321980357246, "grad_norm": 0.555937647819519, "learning_rate": 0.0001, "loss": 1.5849, "step": 10668 }, { "epoch": 1.2255470679455516, "grad_norm": 0.568341076374054, "learning_rate": 0.0001, "loss": 1.5307, "step": 10669 }, { "epoch": 1.2256619378553788, "grad_norm": 0.5604590177536011, "learning_rate": 0.0001, "loss": 1.3813, "step": 10670 }, { "epoch": 1.2257768077652058, "grad_norm": 0.5631393790245056, "learning_rate": 0.0001, "loss": 1.3685, "step": 10671 }, { "epoch": 1.225891677675033, "grad_norm": 0.579447329044342, "learning_rate": 0.0001, "loss": 1.4172, "step": 10672 }, { "epoch": 1.22600654758486, "grad_norm": 0.635651171207428, "learning_rate": 0.0001, "loss": 1.4747, "step": 10673 }, { "epoch": 1.2261214174946873, "grad_norm": 0.5449775457382202, "learning_rate": 0.0001, "loss": 1.3641, "step": 10674 }, { "epoch": 1.2262362874045143, "grad_norm": 0.5698413252830505, "learning_rate": 0.0001, "loss": 1.5557, "step": 10675 }, { "epoch": 1.2263511573143415, "grad_norm": 0.5639238953590393, "learning_rate": 0.0001, "loss": 1.5912, "step": 10676 }, { "epoch": 1.2264660272241685, "grad_norm": 0.6942810416221619, "learning_rate": 0.0001, "loss": 1.5553, "step": 10677 }, { "epoch": 1.2265808971339958, "grad_norm": 0.5638768076896667, "learning_rate": 0.0001, "loss": 1.5026, "step": 10678 }, { "epoch": 1.226695767043823, "grad_norm": 0.5303918123245239, "learning_rate": 0.0001, "loss": 1.1953, "step": 10679 }, { "epoch": 1.22681063695365, "grad_norm": 0.5451003313064575, "learning_rate": 0.0001, "loss": 1.3523, "step": 10680 }, { "epoch": 1.226925506863477, "grad_norm": 0.5491355657577515, "learning_rate": 0.0001, "loss": 1.4684, "step": 10681 }, { "epoch": 1.2270403767733042, "grad_norm": 0.5453058481216431, "learning_rate": 0.0001, "loss": 1.4057, "step": 10682 }, { "epoch": 1.2271552466831315, "grad_norm": 0.5452755093574524, "learning_rate": 0.0001, "loss": 1.2485, "step": 10683 }, { "epoch": 1.2272701165929585, "grad_norm": 0.5503637194633484, "learning_rate": 0.0001, "loss": 1.4669, "step": 10684 }, { "epoch": 1.2273849865027855, "grad_norm": 0.5912861227989197, "learning_rate": 0.0001, "loss": 1.5349, "step": 10685 }, { "epoch": 1.2274998564126127, "grad_norm": 0.5763379335403442, "learning_rate": 0.0001, "loss": 1.492, "step": 10686 }, { "epoch": 1.22761472632244, "grad_norm": 0.590811550617218, "learning_rate": 0.0001, "loss": 1.4122, "step": 10687 }, { "epoch": 1.227729596232267, "grad_norm": 0.6356752514839172, "learning_rate": 0.0001, "loss": 1.5515, "step": 10688 }, { "epoch": 1.227844466142094, "grad_norm": 0.5507737398147583, "learning_rate": 0.0001, "loss": 1.4867, "step": 10689 }, { "epoch": 1.2279593360519212, "grad_norm": 0.5941469073295593, "learning_rate": 0.0001, "loss": 1.5367, "step": 10690 }, { "epoch": 1.2280742059617484, "grad_norm": 0.5983887314796448, "learning_rate": 0.0001, "loss": 1.6548, "step": 10691 }, { "epoch": 1.2281890758715754, "grad_norm": 0.5874168276786804, "learning_rate": 0.0001, "loss": 1.3943, "step": 10692 }, { "epoch": 1.2283039457814025, "grad_norm": 0.6247429847717285, "learning_rate": 0.0001, "loss": 1.712, "step": 10693 }, { "epoch": 1.2284188156912297, "grad_norm": 0.5645884275436401, "learning_rate": 0.0001, "loss": 1.5444, "step": 10694 }, { "epoch": 1.228533685601057, "grad_norm": 0.6020731925964355, "learning_rate": 0.0001, "loss": 1.4052, "step": 10695 }, { "epoch": 1.228648555510884, "grad_norm": 0.6159355044364929, "learning_rate": 0.0001, "loss": 1.5297, "step": 10696 }, { "epoch": 1.228763425420711, "grad_norm": 0.5802619457244873, "learning_rate": 0.0001, "loss": 1.4455, "step": 10697 }, { "epoch": 1.2288782953305382, "grad_norm": 0.5384595990180969, "learning_rate": 0.0001, "loss": 1.3547, "step": 10698 }, { "epoch": 1.2289931652403654, "grad_norm": 0.5957247018814087, "learning_rate": 0.0001, "loss": 1.4597, "step": 10699 }, { "epoch": 1.2291080351501924, "grad_norm": 0.5814695954322815, "learning_rate": 0.0001, "loss": 1.4469, "step": 10700 }, { "epoch": 1.2292229050600194, "grad_norm": 0.5483696460723877, "learning_rate": 0.0001, "loss": 1.3991, "step": 10701 }, { "epoch": 1.2293377749698466, "grad_norm": 0.5747151374816895, "learning_rate": 0.0001, "loss": 1.4609, "step": 10702 }, { "epoch": 1.2294526448796739, "grad_norm": 0.5760491490364075, "learning_rate": 0.0001, "loss": 1.5331, "step": 10703 }, { "epoch": 1.2295675147895009, "grad_norm": 0.5944879055023193, "learning_rate": 0.0001, "loss": 1.2962, "step": 10704 }, { "epoch": 1.229682384699328, "grad_norm": 0.5719342231750488, "learning_rate": 0.0001, "loss": 1.3065, "step": 10705 }, { "epoch": 1.2297972546091551, "grad_norm": 0.6149895787239075, "learning_rate": 0.0001, "loss": 1.4154, "step": 10706 }, { "epoch": 1.2299121245189824, "grad_norm": 0.6030192971229553, "learning_rate": 0.0001, "loss": 1.5497, "step": 10707 }, { "epoch": 1.2300269944288094, "grad_norm": 0.6043549180030823, "learning_rate": 0.0001, "loss": 1.5306, "step": 10708 }, { "epoch": 1.2301418643386364, "grad_norm": 0.5450412034988403, "learning_rate": 0.0001, "loss": 1.5068, "step": 10709 }, { "epoch": 1.2302567342484636, "grad_norm": 0.5756334066390991, "learning_rate": 0.0001, "loss": 1.619, "step": 10710 }, { "epoch": 1.2303716041582908, "grad_norm": 0.5773504376411438, "learning_rate": 0.0001, "loss": 1.496, "step": 10711 }, { "epoch": 1.2304864740681178, "grad_norm": 0.5983827710151672, "learning_rate": 0.0001, "loss": 1.4885, "step": 10712 }, { "epoch": 1.230601343977945, "grad_norm": 0.5947871804237366, "learning_rate": 0.0001, "loss": 1.5242, "step": 10713 }, { "epoch": 1.230716213887772, "grad_norm": 0.5693453550338745, "learning_rate": 0.0001, "loss": 1.4284, "step": 10714 }, { "epoch": 1.2308310837975993, "grad_norm": 0.616321325302124, "learning_rate": 0.0001, "loss": 1.59, "step": 10715 }, { "epoch": 1.2309459537074263, "grad_norm": 0.581452488899231, "learning_rate": 0.0001, "loss": 1.4594, "step": 10716 }, { "epoch": 1.2310608236172536, "grad_norm": 0.5225065350532532, "learning_rate": 0.0001, "loss": 1.4939, "step": 10717 }, { "epoch": 1.2311756935270806, "grad_norm": 0.5418626666069031, "learning_rate": 0.0001, "loss": 1.3131, "step": 10718 }, { "epoch": 1.2312905634369078, "grad_norm": 0.5273642539978027, "learning_rate": 0.0001, "loss": 1.4067, "step": 10719 }, { "epoch": 1.2314054333467348, "grad_norm": 0.5336480140686035, "learning_rate": 0.0001, "loss": 1.3815, "step": 10720 }, { "epoch": 1.231520303256562, "grad_norm": 0.6629604697227478, "learning_rate": 0.0001, "loss": 1.5378, "step": 10721 }, { "epoch": 1.231635173166389, "grad_norm": 0.5382062792778015, "learning_rate": 0.0001, "loss": 1.1965, "step": 10722 }, { "epoch": 1.2317500430762163, "grad_norm": 0.5966381430625916, "learning_rate": 0.0001, "loss": 1.3991, "step": 10723 }, { "epoch": 1.2318649129860433, "grad_norm": 0.5476511716842651, "learning_rate": 0.0001, "loss": 1.4263, "step": 10724 }, { "epoch": 1.2319797828958705, "grad_norm": 0.5930783152580261, "learning_rate": 0.0001, "loss": 1.3903, "step": 10725 }, { "epoch": 1.2320946528056975, "grad_norm": 0.5383990406990051, "learning_rate": 0.0001, "loss": 1.3619, "step": 10726 }, { "epoch": 1.2322095227155248, "grad_norm": 0.5706599950790405, "learning_rate": 0.0001, "loss": 1.5259, "step": 10727 }, { "epoch": 1.2323243926253518, "grad_norm": 0.5487735867500305, "learning_rate": 0.0001, "loss": 1.4059, "step": 10728 }, { "epoch": 1.232439262535179, "grad_norm": 0.5330200791358948, "learning_rate": 0.0001, "loss": 1.4389, "step": 10729 }, { "epoch": 1.232554132445006, "grad_norm": 0.5702281594276428, "learning_rate": 0.0001, "loss": 1.632, "step": 10730 }, { "epoch": 1.2326690023548332, "grad_norm": 0.5802901983261108, "learning_rate": 0.0001, "loss": 1.4692, "step": 10731 }, { "epoch": 1.2327838722646602, "grad_norm": 0.6011072993278503, "learning_rate": 0.0001, "loss": 1.3248, "step": 10732 }, { "epoch": 1.2328987421744875, "grad_norm": 0.6271477937698364, "learning_rate": 0.0001, "loss": 1.5501, "step": 10733 }, { "epoch": 1.2330136120843145, "grad_norm": 0.5693656802177429, "learning_rate": 0.0001, "loss": 1.4817, "step": 10734 }, { "epoch": 1.2331284819941417, "grad_norm": 0.625735342502594, "learning_rate": 0.0001, "loss": 1.5831, "step": 10735 }, { "epoch": 1.2332433519039687, "grad_norm": 0.5748286247253418, "learning_rate": 0.0001, "loss": 1.5317, "step": 10736 }, { "epoch": 1.233358221813796, "grad_norm": 0.636698842048645, "learning_rate": 0.0001, "loss": 1.5133, "step": 10737 }, { "epoch": 1.233473091723623, "grad_norm": 0.5615163445472717, "learning_rate": 0.0001, "loss": 1.2444, "step": 10738 }, { "epoch": 1.2335879616334502, "grad_norm": 0.572391927242279, "learning_rate": 0.0001, "loss": 1.3733, "step": 10739 }, { "epoch": 1.2337028315432772, "grad_norm": 0.5233102440834045, "learning_rate": 0.0001, "loss": 1.2587, "step": 10740 }, { "epoch": 1.2338177014531044, "grad_norm": 0.5385754108428955, "learning_rate": 0.0001, "loss": 1.3457, "step": 10741 }, { "epoch": 1.2339325713629314, "grad_norm": 0.5545738339424133, "learning_rate": 0.0001, "loss": 1.4354, "step": 10742 }, { "epoch": 1.2340474412727587, "grad_norm": 0.5675808787345886, "learning_rate": 0.0001, "loss": 1.4972, "step": 10743 }, { "epoch": 1.2341623111825857, "grad_norm": 0.5583345293998718, "learning_rate": 0.0001, "loss": 1.4697, "step": 10744 }, { "epoch": 1.234277181092413, "grad_norm": 0.5754501819610596, "learning_rate": 0.0001, "loss": 1.4148, "step": 10745 }, { "epoch": 1.23439205100224, "grad_norm": 0.5526403784751892, "learning_rate": 0.0001, "loss": 1.4903, "step": 10746 }, { "epoch": 1.2345069209120672, "grad_norm": 0.6143956184387207, "learning_rate": 0.0001, "loss": 1.4575, "step": 10747 }, { "epoch": 1.2346217908218942, "grad_norm": 0.556839644908905, "learning_rate": 0.0001, "loss": 1.4682, "step": 10748 }, { "epoch": 1.2347366607317214, "grad_norm": 0.5772037506103516, "learning_rate": 0.0001, "loss": 1.2345, "step": 10749 }, { "epoch": 1.2348515306415484, "grad_norm": 0.5617235898971558, "learning_rate": 0.0001, "loss": 1.6019, "step": 10750 }, { "epoch": 1.2349664005513756, "grad_norm": 0.5470720529556274, "learning_rate": 0.0001, "loss": 1.3394, "step": 10751 }, { "epoch": 1.2350812704612026, "grad_norm": 0.6235065460205078, "learning_rate": 0.0001, "loss": 1.3382, "step": 10752 }, { "epoch": 1.2351961403710299, "grad_norm": 0.5779804587364197, "learning_rate": 0.0001, "loss": 1.1888, "step": 10753 }, { "epoch": 1.2353110102808569, "grad_norm": 0.6196669340133667, "learning_rate": 0.0001, "loss": 1.6653, "step": 10754 }, { "epoch": 1.2354258801906841, "grad_norm": 0.6144828200340271, "learning_rate": 0.0001, "loss": 1.5581, "step": 10755 }, { "epoch": 1.2355407501005111, "grad_norm": 0.5465558171272278, "learning_rate": 0.0001, "loss": 1.3457, "step": 10756 }, { "epoch": 1.2356556200103384, "grad_norm": 0.6325592398643494, "learning_rate": 0.0001, "loss": 1.4344, "step": 10757 }, { "epoch": 1.2357704899201654, "grad_norm": 0.5570918917655945, "learning_rate": 0.0001, "loss": 1.453, "step": 10758 }, { "epoch": 1.2358853598299926, "grad_norm": 0.5771710872650146, "learning_rate": 0.0001, "loss": 1.5336, "step": 10759 }, { "epoch": 1.2360002297398196, "grad_norm": 0.5577232837677002, "learning_rate": 0.0001, "loss": 1.4324, "step": 10760 }, { "epoch": 1.2361150996496468, "grad_norm": 0.5303571224212646, "learning_rate": 0.0001, "loss": 1.2595, "step": 10761 }, { "epoch": 1.2362299695594738, "grad_norm": 0.6245179176330566, "learning_rate": 0.0001, "loss": 1.4688, "step": 10762 }, { "epoch": 1.236344839469301, "grad_norm": 0.5816663503646851, "learning_rate": 0.0001, "loss": 1.5545, "step": 10763 }, { "epoch": 1.236459709379128, "grad_norm": 0.5651383996009827, "learning_rate": 0.0001, "loss": 1.293, "step": 10764 }, { "epoch": 1.2365745792889553, "grad_norm": 0.563153862953186, "learning_rate": 0.0001, "loss": 1.3759, "step": 10765 }, { "epoch": 1.2366894491987823, "grad_norm": 0.5659855604171753, "learning_rate": 0.0001, "loss": 1.5424, "step": 10766 }, { "epoch": 1.2368043191086096, "grad_norm": 0.564038872718811, "learning_rate": 0.0001, "loss": 1.4861, "step": 10767 }, { "epoch": 1.2369191890184366, "grad_norm": 0.6177818775177002, "learning_rate": 0.0001, "loss": 1.586, "step": 10768 }, { "epoch": 1.2370340589282638, "grad_norm": 0.5920203328132629, "learning_rate": 0.0001, "loss": 1.5267, "step": 10769 }, { "epoch": 1.2371489288380908, "grad_norm": 0.6030647158622742, "learning_rate": 0.0001, "loss": 1.6662, "step": 10770 }, { "epoch": 1.237263798747918, "grad_norm": 0.5532615780830383, "learning_rate": 0.0001, "loss": 1.3566, "step": 10771 }, { "epoch": 1.237378668657745, "grad_norm": 0.6269826292991638, "learning_rate": 0.0001, "loss": 1.3191, "step": 10772 }, { "epoch": 1.2374935385675723, "grad_norm": 0.6108141541481018, "learning_rate": 0.0001, "loss": 1.3749, "step": 10773 }, { "epoch": 1.2376084084773993, "grad_norm": 0.557407796382904, "learning_rate": 0.0001, "loss": 1.5626, "step": 10774 }, { "epoch": 1.2377232783872265, "grad_norm": 0.6010729074478149, "learning_rate": 0.0001, "loss": 1.4623, "step": 10775 }, { "epoch": 1.2378381482970535, "grad_norm": 0.6081424355506897, "learning_rate": 0.0001, "loss": 1.5455, "step": 10776 }, { "epoch": 1.2379530182068808, "grad_norm": 0.6339059472084045, "learning_rate": 0.0001, "loss": 1.5383, "step": 10777 }, { "epoch": 1.2380678881167078, "grad_norm": 0.5645229816436768, "learning_rate": 0.0001, "loss": 1.479, "step": 10778 }, { "epoch": 1.238182758026535, "grad_norm": 0.5893407464027405, "learning_rate": 0.0001, "loss": 1.5504, "step": 10779 }, { "epoch": 1.238297627936362, "grad_norm": 0.6675935387611389, "learning_rate": 0.0001, "loss": 1.7331, "step": 10780 }, { "epoch": 1.2384124978461892, "grad_norm": 0.5694357752799988, "learning_rate": 0.0001, "loss": 1.3615, "step": 10781 }, { "epoch": 1.2385273677560162, "grad_norm": 0.5906005501747131, "learning_rate": 0.0001, "loss": 1.569, "step": 10782 }, { "epoch": 1.2386422376658435, "grad_norm": 0.6008540391921997, "learning_rate": 0.0001, "loss": 1.6614, "step": 10783 }, { "epoch": 1.2387571075756705, "grad_norm": 0.5501822233200073, "learning_rate": 0.0001, "loss": 1.5858, "step": 10784 }, { "epoch": 1.2388719774854977, "grad_norm": 0.5909137725830078, "learning_rate": 0.0001, "loss": 1.5946, "step": 10785 }, { "epoch": 1.2389868473953247, "grad_norm": 0.5546433329582214, "learning_rate": 0.0001, "loss": 1.4521, "step": 10786 }, { "epoch": 1.239101717305152, "grad_norm": 0.5489922165870667, "learning_rate": 0.0001, "loss": 1.2556, "step": 10787 }, { "epoch": 1.239216587214979, "grad_norm": 0.5586884617805481, "learning_rate": 0.0001, "loss": 1.3457, "step": 10788 }, { "epoch": 1.2393314571248062, "grad_norm": 0.5664530992507935, "learning_rate": 0.0001, "loss": 1.4248, "step": 10789 }, { "epoch": 1.2394463270346332, "grad_norm": 0.5709450244903564, "learning_rate": 0.0001, "loss": 1.4569, "step": 10790 }, { "epoch": 1.2395611969444604, "grad_norm": 0.514665424823761, "learning_rate": 0.0001, "loss": 1.394, "step": 10791 }, { "epoch": 1.2396760668542874, "grad_norm": 0.5852550268173218, "learning_rate": 0.0001, "loss": 1.6148, "step": 10792 }, { "epoch": 1.2397909367641147, "grad_norm": 0.5568869113922119, "learning_rate": 0.0001, "loss": 1.3949, "step": 10793 }, { "epoch": 1.2399058066739417, "grad_norm": 0.5907896161079407, "learning_rate": 0.0001, "loss": 1.383, "step": 10794 }, { "epoch": 1.240020676583769, "grad_norm": 0.6120040416717529, "learning_rate": 0.0001, "loss": 1.4843, "step": 10795 }, { "epoch": 1.240135546493596, "grad_norm": 0.5711389183998108, "learning_rate": 0.0001, "loss": 1.4388, "step": 10796 }, { "epoch": 1.2402504164034232, "grad_norm": 0.5505643486976624, "learning_rate": 0.0001, "loss": 1.502, "step": 10797 }, { "epoch": 1.2403652863132502, "grad_norm": 0.5786750316619873, "learning_rate": 0.0001, "loss": 1.4686, "step": 10798 }, { "epoch": 1.2404801562230774, "grad_norm": 0.5783752799034119, "learning_rate": 0.0001, "loss": 1.399, "step": 10799 }, { "epoch": 1.2405950261329044, "grad_norm": 0.6035143733024597, "learning_rate": 0.0001, "loss": 1.4854, "step": 10800 }, { "epoch": 1.2407098960427316, "grad_norm": 0.5874049663543701, "learning_rate": 0.0001, "loss": 1.5683, "step": 10801 }, { "epoch": 1.2408247659525586, "grad_norm": 0.5582104921340942, "learning_rate": 0.0001, "loss": 1.3253, "step": 10802 }, { "epoch": 1.2409396358623859, "grad_norm": 0.5821365118026733, "learning_rate": 0.0001, "loss": 1.396, "step": 10803 }, { "epoch": 1.2410545057722129, "grad_norm": 0.5929169654846191, "learning_rate": 0.0001, "loss": 1.5014, "step": 10804 }, { "epoch": 1.24116937568204, "grad_norm": 0.5773475766181946, "learning_rate": 0.0001, "loss": 1.6268, "step": 10805 }, { "epoch": 1.2412842455918671, "grad_norm": 0.5636146068572998, "learning_rate": 0.0001, "loss": 1.5712, "step": 10806 }, { "epoch": 1.2413991155016944, "grad_norm": 0.5757436156272888, "learning_rate": 0.0001, "loss": 1.6073, "step": 10807 }, { "epoch": 1.2415139854115214, "grad_norm": 0.5821229815483093, "learning_rate": 0.0001, "loss": 1.4745, "step": 10808 }, { "epoch": 1.2416288553213486, "grad_norm": 0.5732831954956055, "learning_rate": 0.0001, "loss": 1.4471, "step": 10809 }, { "epoch": 1.2417437252311756, "grad_norm": 0.5851945877075195, "learning_rate": 0.0001, "loss": 1.4989, "step": 10810 }, { "epoch": 1.2418585951410028, "grad_norm": 0.5864282846450806, "learning_rate": 0.0001, "loss": 1.6848, "step": 10811 }, { "epoch": 1.2419734650508298, "grad_norm": 0.5669234395027161, "learning_rate": 0.0001, "loss": 1.2759, "step": 10812 }, { "epoch": 1.242088334960657, "grad_norm": 0.5712340474128723, "learning_rate": 0.0001, "loss": 1.4233, "step": 10813 }, { "epoch": 1.242203204870484, "grad_norm": 0.5951799750328064, "learning_rate": 0.0001, "loss": 1.4003, "step": 10814 }, { "epoch": 1.2423180747803113, "grad_norm": 0.5672787427902222, "learning_rate": 0.0001, "loss": 1.5504, "step": 10815 }, { "epoch": 1.2424329446901385, "grad_norm": 0.6768470406532288, "learning_rate": 0.0001, "loss": 1.7115, "step": 10816 }, { "epoch": 1.2425478145999655, "grad_norm": 0.5624891519546509, "learning_rate": 0.0001, "loss": 1.5599, "step": 10817 }, { "epoch": 1.2426626845097926, "grad_norm": 0.6003620624542236, "learning_rate": 0.0001, "loss": 1.6077, "step": 10818 }, { "epoch": 1.2427775544196198, "grad_norm": 0.61441570520401, "learning_rate": 0.0001, "loss": 1.668, "step": 10819 }, { "epoch": 1.242892424329447, "grad_norm": 0.5454705953598022, "learning_rate": 0.0001, "loss": 1.695, "step": 10820 }, { "epoch": 1.243007294239274, "grad_norm": 0.6024851202964783, "learning_rate": 0.0001, "loss": 1.5696, "step": 10821 }, { "epoch": 1.243122164149101, "grad_norm": 0.560492992401123, "learning_rate": 0.0001, "loss": 1.4578, "step": 10822 }, { "epoch": 1.2432370340589283, "grad_norm": 0.5419345498085022, "learning_rate": 0.0001, "loss": 1.4609, "step": 10823 }, { "epoch": 1.2433519039687555, "grad_norm": 0.6079069972038269, "learning_rate": 0.0001, "loss": 1.4584, "step": 10824 }, { "epoch": 1.2434667738785825, "grad_norm": 0.519904613494873, "learning_rate": 0.0001, "loss": 1.4182, "step": 10825 }, { "epoch": 1.2435816437884095, "grad_norm": 0.5554570555686951, "learning_rate": 0.0001, "loss": 1.5727, "step": 10826 }, { "epoch": 1.2436965136982367, "grad_norm": 0.5826308131217957, "learning_rate": 0.0001, "loss": 1.5531, "step": 10827 }, { "epoch": 1.243811383608064, "grad_norm": 0.5417545437812805, "learning_rate": 0.0001, "loss": 1.3831, "step": 10828 }, { "epoch": 1.243926253517891, "grad_norm": 0.5289231538772583, "learning_rate": 0.0001, "loss": 1.5987, "step": 10829 }, { "epoch": 1.244041123427718, "grad_norm": 0.5962126851081848, "learning_rate": 0.0001, "loss": 1.448, "step": 10830 }, { "epoch": 1.2441559933375452, "grad_norm": 0.5800266861915588, "learning_rate": 0.0001, "loss": 1.5021, "step": 10831 }, { "epoch": 1.2442708632473725, "grad_norm": 0.567566990852356, "learning_rate": 0.0001, "loss": 1.4349, "step": 10832 }, { "epoch": 1.2443857331571995, "grad_norm": 0.6255682110786438, "learning_rate": 0.0001, "loss": 1.6459, "step": 10833 }, { "epoch": 1.2445006030670265, "grad_norm": 0.5554635524749756, "learning_rate": 0.0001, "loss": 1.307, "step": 10834 }, { "epoch": 1.2446154729768537, "grad_norm": 0.5715488195419312, "learning_rate": 0.0001, "loss": 1.5768, "step": 10835 }, { "epoch": 1.244730342886681, "grad_norm": 0.5674731731414795, "learning_rate": 0.0001, "loss": 1.5265, "step": 10836 }, { "epoch": 1.244845212796508, "grad_norm": 0.5632114410400391, "learning_rate": 0.0001, "loss": 1.4986, "step": 10837 }, { "epoch": 1.244960082706335, "grad_norm": 0.5295623540878296, "learning_rate": 0.0001, "loss": 1.4137, "step": 10838 }, { "epoch": 1.2450749526161622, "grad_norm": 0.5888751149177551, "learning_rate": 0.0001, "loss": 1.562, "step": 10839 }, { "epoch": 1.2451898225259894, "grad_norm": 0.6099748015403748, "learning_rate": 0.0001, "loss": 1.4734, "step": 10840 }, { "epoch": 1.2453046924358164, "grad_norm": 0.5546545386314392, "learning_rate": 0.0001, "loss": 1.4835, "step": 10841 }, { "epoch": 1.2454195623456434, "grad_norm": 0.5606866478919983, "learning_rate": 0.0001, "loss": 1.5588, "step": 10842 }, { "epoch": 1.2455344322554707, "grad_norm": 0.6139029860496521, "learning_rate": 0.0001, "loss": 1.5756, "step": 10843 }, { "epoch": 1.245649302165298, "grad_norm": 0.6030558347702026, "learning_rate": 0.0001, "loss": 1.362, "step": 10844 }, { "epoch": 1.245764172075125, "grad_norm": 0.5658564567565918, "learning_rate": 0.0001, "loss": 1.4998, "step": 10845 }, { "epoch": 1.245879041984952, "grad_norm": 0.5419941544532776, "learning_rate": 0.0001, "loss": 1.3872, "step": 10846 }, { "epoch": 1.2459939118947791, "grad_norm": 0.6134659647941589, "learning_rate": 0.0001, "loss": 1.6605, "step": 10847 }, { "epoch": 1.2461087818046064, "grad_norm": 0.5531185269355774, "learning_rate": 0.0001, "loss": 1.4083, "step": 10848 }, { "epoch": 1.2462236517144334, "grad_norm": 0.5249099731445312, "learning_rate": 0.0001, "loss": 1.4228, "step": 10849 }, { "epoch": 1.2463385216242606, "grad_norm": 0.5550907254219055, "learning_rate": 0.0001, "loss": 1.314, "step": 10850 }, { "epoch": 1.2464533915340876, "grad_norm": 0.6986422538757324, "learning_rate": 0.0001, "loss": 1.7078, "step": 10851 }, { "epoch": 1.2465682614439149, "grad_norm": 0.5527095794677734, "learning_rate": 0.0001, "loss": 1.4435, "step": 10852 }, { "epoch": 1.2466831313537419, "grad_norm": 0.6040549278259277, "learning_rate": 0.0001, "loss": 1.5648, "step": 10853 }, { "epoch": 1.246798001263569, "grad_norm": 0.5868404507637024, "learning_rate": 0.0001, "loss": 1.3417, "step": 10854 }, { "epoch": 1.246912871173396, "grad_norm": 0.5885109305381775, "learning_rate": 0.0001, "loss": 1.5384, "step": 10855 }, { "epoch": 1.2470277410832233, "grad_norm": 0.6273307800292969, "learning_rate": 0.0001, "loss": 1.6859, "step": 10856 }, { "epoch": 1.2471426109930503, "grad_norm": 0.6067867279052734, "learning_rate": 0.0001, "loss": 1.4738, "step": 10857 }, { "epoch": 1.2472574809028776, "grad_norm": 0.6141306757926941, "learning_rate": 0.0001, "loss": 1.3835, "step": 10858 }, { "epoch": 1.2473723508127046, "grad_norm": 0.5698774456977844, "learning_rate": 0.0001, "loss": 1.3689, "step": 10859 }, { "epoch": 1.2474872207225318, "grad_norm": 0.6184090375900269, "learning_rate": 0.0001, "loss": 1.549, "step": 10860 }, { "epoch": 1.2476020906323588, "grad_norm": 0.5597769021987915, "learning_rate": 0.0001, "loss": 1.3235, "step": 10861 }, { "epoch": 1.247716960542186, "grad_norm": 0.5379227995872498, "learning_rate": 0.0001, "loss": 1.4883, "step": 10862 }, { "epoch": 1.247831830452013, "grad_norm": 0.5401409864425659, "learning_rate": 0.0001, "loss": 1.3701, "step": 10863 }, { "epoch": 1.2479467003618403, "grad_norm": 0.5571491718292236, "learning_rate": 0.0001, "loss": 1.4373, "step": 10864 }, { "epoch": 1.2480615702716673, "grad_norm": 0.550502359867096, "learning_rate": 0.0001, "loss": 1.4155, "step": 10865 }, { "epoch": 1.2481764401814945, "grad_norm": 0.5631131529808044, "learning_rate": 0.0001, "loss": 1.5354, "step": 10866 }, { "epoch": 1.2482913100913215, "grad_norm": 0.5850018858909607, "learning_rate": 0.0001, "loss": 1.5193, "step": 10867 }, { "epoch": 1.2484061800011488, "grad_norm": 0.6137135028839111, "learning_rate": 0.0001, "loss": 1.5496, "step": 10868 }, { "epoch": 1.2485210499109758, "grad_norm": 0.5973943471908569, "learning_rate": 0.0001, "loss": 1.4159, "step": 10869 }, { "epoch": 1.248635919820803, "grad_norm": 0.5690673589706421, "learning_rate": 0.0001, "loss": 1.5765, "step": 10870 }, { "epoch": 1.24875078973063, "grad_norm": 0.5665297508239746, "learning_rate": 0.0001, "loss": 1.5149, "step": 10871 }, { "epoch": 1.2488656596404573, "grad_norm": 0.5932878851890564, "learning_rate": 0.0001, "loss": 1.5818, "step": 10872 }, { "epoch": 1.2489805295502843, "grad_norm": 0.634212076663971, "learning_rate": 0.0001, "loss": 1.4781, "step": 10873 }, { "epoch": 1.2490953994601115, "grad_norm": 0.6295340061187744, "learning_rate": 0.0001, "loss": 1.567, "step": 10874 }, { "epoch": 1.2492102693699385, "grad_norm": 0.5564239025115967, "learning_rate": 0.0001, "loss": 1.577, "step": 10875 }, { "epoch": 1.2493251392797657, "grad_norm": 0.591235339641571, "learning_rate": 0.0001, "loss": 1.5672, "step": 10876 }, { "epoch": 1.2494400091895927, "grad_norm": 0.540838360786438, "learning_rate": 0.0001, "loss": 1.4963, "step": 10877 }, { "epoch": 1.24955487909942, "grad_norm": 0.5274412631988525, "learning_rate": 0.0001, "loss": 1.3721, "step": 10878 }, { "epoch": 1.249669749009247, "grad_norm": 0.532894492149353, "learning_rate": 0.0001, "loss": 1.5508, "step": 10879 }, { "epoch": 1.2497846189190742, "grad_norm": 0.5777339339256287, "learning_rate": 0.0001, "loss": 1.4037, "step": 10880 }, { "epoch": 1.2498994888289012, "grad_norm": 0.6504830718040466, "learning_rate": 0.0001, "loss": 1.4341, "step": 10881 }, { "epoch": 1.2500143587387285, "grad_norm": 0.5571553111076355, "learning_rate": 0.0001, "loss": 1.637, "step": 10882 }, { "epoch": 1.2501292286485555, "grad_norm": 0.6024576425552368, "learning_rate": 0.0001, "loss": 1.5162, "step": 10883 }, { "epoch": 1.2502440985583827, "grad_norm": 0.5811855792999268, "learning_rate": 0.0001, "loss": 1.3607, "step": 10884 }, { "epoch": 1.2503589684682097, "grad_norm": 0.571258544921875, "learning_rate": 0.0001, "loss": 1.6321, "step": 10885 }, { "epoch": 1.250473838378037, "grad_norm": 0.5692873001098633, "learning_rate": 0.0001, "loss": 1.4705, "step": 10886 }, { "epoch": 1.250588708287864, "grad_norm": 0.6267750263214111, "learning_rate": 0.0001, "loss": 1.5673, "step": 10887 }, { "epoch": 1.2507035781976912, "grad_norm": 0.6186516284942627, "learning_rate": 0.0001, "loss": 1.4483, "step": 10888 }, { "epoch": 1.2508184481075182, "grad_norm": 0.5934524536132812, "learning_rate": 0.0001, "loss": 1.5474, "step": 10889 }, { "epoch": 1.2509333180173454, "grad_norm": 0.6174366474151611, "learning_rate": 0.0001, "loss": 1.6715, "step": 10890 }, { "epoch": 1.2510481879271724, "grad_norm": 0.5978173613548279, "learning_rate": 0.0001, "loss": 1.5807, "step": 10891 }, { "epoch": 1.2511630578369997, "grad_norm": 0.563639760017395, "learning_rate": 0.0001, "loss": 1.5597, "step": 10892 }, { "epoch": 1.2512779277468267, "grad_norm": 0.6402945518493652, "learning_rate": 0.0001, "loss": 1.6375, "step": 10893 }, { "epoch": 1.251392797656654, "grad_norm": 0.589114248752594, "learning_rate": 0.0001, "loss": 1.5766, "step": 10894 }, { "epoch": 1.251507667566481, "grad_norm": 0.5770435929298401, "learning_rate": 0.0001, "loss": 1.4116, "step": 10895 }, { "epoch": 1.2516225374763081, "grad_norm": 0.6756162643432617, "learning_rate": 0.0001, "loss": 1.7188, "step": 10896 }, { "epoch": 1.2517374073861351, "grad_norm": 0.5813696384429932, "learning_rate": 0.0001, "loss": 1.5876, "step": 10897 }, { "epoch": 1.2518522772959624, "grad_norm": 0.5571926832199097, "learning_rate": 0.0001, "loss": 1.392, "step": 10898 }, { "epoch": 1.2519671472057894, "grad_norm": 0.5992504954338074, "learning_rate": 0.0001, "loss": 1.6786, "step": 10899 }, { "epoch": 1.2520820171156166, "grad_norm": 0.5757802724838257, "learning_rate": 0.0001, "loss": 1.4874, "step": 10900 }, { "epoch": 1.2521968870254436, "grad_norm": 0.5644787549972534, "learning_rate": 0.0001, "loss": 1.3862, "step": 10901 }, { "epoch": 1.2523117569352709, "grad_norm": 0.5546348690986633, "learning_rate": 0.0001, "loss": 1.4577, "step": 10902 }, { "epoch": 1.2524266268450979, "grad_norm": 0.5690358281135559, "learning_rate": 0.0001, "loss": 1.3687, "step": 10903 }, { "epoch": 1.252541496754925, "grad_norm": 0.5834867358207703, "learning_rate": 0.0001, "loss": 1.4238, "step": 10904 }, { "epoch": 1.252656366664752, "grad_norm": 0.558188796043396, "learning_rate": 0.0001, "loss": 1.5329, "step": 10905 }, { "epoch": 1.2527712365745793, "grad_norm": 0.5730266571044922, "learning_rate": 0.0001, "loss": 1.1854, "step": 10906 }, { "epoch": 1.2528861064844063, "grad_norm": 0.6047393083572388, "learning_rate": 0.0001, "loss": 1.5067, "step": 10907 }, { "epoch": 1.2530009763942336, "grad_norm": 0.5652978420257568, "learning_rate": 0.0001, "loss": 1.3756, "step": 10908 }, { "epoch": 1.2531158463040606, "grad_norm": 0.5942120552062988, "learning_rate": 0.0001, "loss": 1.3795, "step": 10909 }, { "epoch": 1.2532307162138878, "grad_norm": 0.5671000480651855, "learning_rate": 0.0001, "loss": 1.4261, "step": 10910 }, { "epoch": 1.2533455861237148, "grad_norm": 0.5476558208465576, "learning_rate": 0.0001, "loss": 1.3019, "step": 10911 }, { "epoch": 1.253460456033542, "grad_norm": 0.6260891556739807, "learning_rate": 0.0001, "loss": 1.7735, "step": 10912 }, { "epoch": 1.253575325943369, "grad_norm": 0.6707521677017212, "learning_rate": 0.0001, "loss": 1.3852, "step": 10913 }, { "epoch": 1.2536901958531963, "grad_norm": 0.5497795939445496, "learning_rate": 0.0001, "loss": 1.5263, "step": 10914 }, { "epoch": 1.2538050657630233, "grad_norm": 0.6137018203735352, "learning_rate": 0.0001, "loss": 1.449, "step": 10915 }, { "epoch": 1.2539199356728505, "grad_norm": 0.575927734375, "learning_rate": 0.0001, "loss": 1.5082, "step": 10916 }, { "epoch": 1.2540348055826775, "grad_norm": 0.5461201667785645, "learning_rate": 0.0001, "loss": 1.571, "step": 10917 }, { "epoch": 1.2541496754925048, "grad_norm": 0.5775430202484131, "learning_rate": 0.0001, "loss": 1.5978, "step": 10918 }, { "epoch": 1.2542645454023318, "grad_norm": 0.596335768699646, "learning_rate": 0.0001, "loss": 1.2492, "step": 10919 }, { "epoch": 1.254379415312159, "grad_norm": 0.552144467830658, "learning_rate": 0.0001, "loss": 1.5481, "step": 10920 }, { "epoch": 1.254494285221986, "grad_norm": 0.6232661008834839, "learning_rate": 0.0001, "loss": 1.6734, "step": 10921 }, { "epoch": 1.2546091551318133, "grad_norm": 0.579709529876709, "learning_rate": 0.0001, "loss": 1.3082, "step": 10922 }, { "epoch": 1.2547240250416403, "grad_norm": 0.5315948724746704, "learning_rate": 0.0001, "loss": 1.3547, "step": 10923 }, { "epoch": 1.2548388949514675, "grad_norm": 0.5723145008087158, "learning_rate": 0.0001, "loss": 1.4858, "step": 10924 }, { "epoch": 1.2549537648612945, "grad_norm": 0.5732467770576477, "learning_rate": 0.0001, "loss": 1.5813, "step": 10925 }, { "epoch": 1.2550686347711217, "grad_norm": 0.5439171195030212, "learning_rate": 0.0001, "loss": 1.6146, "step": 10926 }, { "epoch": 1.2551835046809487, "grad_norm": 0.5616052150726318, "learning_rate": 0.0001, "loss": 1.5706, "step": 10927 }, { "epoch": 1.255298374590776, "grad_norm": 0.5952640175819397, "learning_rate": 0.0001, "loss": 1.3995, "step": 10928 }, { "epoch": 1.255413244500603, "grad_norm": 0.5831541419029236, "learning_rate": 0.0001, "loss": 1.6304, "step": 10929 }, { "epoch": 1.2555281144104302, "grad_norm": 0.5609625577926636, "learning_rate": 0.0001, "loss": 1.4818, "step": 10930 }, { "epoch": 1.2556429843202572, "grad_norm": 0.5512393116950989, "learning_rate": 0.0001, "loss": 1.2979, "step": 10931 }, { "epoch": 1.2557578542300845, "grad_norm": 0.5595287084579468, "learning_rate": 0.0001, "loss": 1.4539, "step": 10932 }, { "epoch": 1.2558727241399115, "grad_norm": 0.5650918483734131, "learning_rate": 0.0001, "loss": 1.4097, "step": 10933 }, { "epoch": 1.2559875940497387, "grad_norm": 0.5434353351593018, "learning_rate": 0.0001, "loss": 1.5182, "step": 10934 }, { "epoch": 1.2561024639595657, "grad_norm": 0.565133810043335, "learning_rate": 0.0001, "loss": 1.5029, "step": 10935 }, { "epoch": 1.256217333869393, "grad_norm": 0.6342281699180603, "learning_rate": 0.0001, "loss": 1.4796, "step": 10936 }, { "epoch": 1.2563322037792202, "grad_norm": 0.5778996348381042, "learning_rate": 0.0001, "loss": 1.4856, "step": 10937 }, { "epoch": 1.2564470736890472, "grad_norm": 0.5874305367469788, "learning_rate": 0.0001, "loss": 1.6482, "step": 10938 }, { "epoch": 1.2565619435988742, "grad_norm": 0.5764653086662292, "learning_rate": 0.0001, "loss": 1.4978, "step": 10939 }, { "epoch": 1.2566768135087014, "grad_norm": 0.5709093809127808, "learning_rate": 0.0001, "loss": 1.3895, "step": 10940 }, { "epoch": 1.2567916834185286, "grad_norm": 0.610541045665741, "learning_rate": 0.0001, "loss": 1.5036, "step": 10941 }, { "epoch": 1.2569065533283557, "grad_norm": 0.6028667688369751, "learning_rate": 0.0001, "loss": 1.6399, "step": 10942 }, { "epoch": 1.2570214232381827, "grad_norm": 0.5337639451026917, "learning_rate": 0.0001, "loss": 1.4331, "step": 10943 }, { "epoch": 1.25713629314801, "grad_norm": 0.6048827767372131, "learning_rate": 0.0001, "loss": 1.4235, "step": 10944 }, { "epoch": 1.2572511630578371, "grad_norm": 0.5719192028045654, "learning_rate": 0.0001, "loss": 1.4289, "step": 10945 }, { "epoch": 1.2573660329676641, "grad_norm": 0.5911000967025757, "learning_rate": 0.0001, "loss": 1.5606, "step": 10946 }, { "epoch": 1.2574809028774911, "grad_norm": 0.5696734189987183, "learning_rate": 0.0001, "loss": 1.2368, "step": 10947 }, { "epoch": 1.2575957727873184, "grad_norm": 0.5841726064682007, "learning_rate": 0.0001, "loss": 1.5245, "step": 10948 }, { "epoch": 1.2577106426971456, "grad_norm": 0.6017117500305176, "learning_rate": 0.0001, "loss": 1.3813, "step": 10949 }, { "epoch": 1.2578255126069726, "grad_norm": 0.5943053364753723, "learning_rate": 0.0001, "loss": 1.3885, "step": 10950 }, { "epoch": 1.2579403825167996, "grad_norm": 0.5891920924186707, "learning_rate": 0.0001, "loss": 1.4698, "step": 10951 }, { "epoch": 1.2580552524266269, "grad_norm": 0.5655304193496704, "learning_rate": 0.0001, "loss": 1.4617, "step": 10952 }, { "epoch": 1.258170122336454, "grad_norm": 0.6333246231079102, "learning_rate": 0.0001, "loss": 1.5318, "step": 10953 }, { "epoch": 1.258284992246281, "grad_norm": 0.6080989837646484, "learning_rate": 0.0001, "loss": 1.5946, "step": 10954 }, { "epoch": 1.258399862156108, "grad_norm": 0.5369704961776733, "learning_rate": 0.0001, "loss": 1.418, "step": 10955 }, { "epoch": 1.2585147320659353, "grad_norm": 0.5709648132324219, "learning_rate": 0.0001, "loss": 1.4451, "step": 10956 }, { "epoch": 1.2586296019757626, "grad_norm": 0.5845744609832764, "learning_rate": 0.0001, "loss": 1.5762, "step": 10957 }, { "epoch": 1.2587444718855896, "grad_norm": 0.588251531124115, "learning_rate": 0.0001, "loss": 1.5864, "step": 10958 }, { "epoch": 1.2588593417954166, "grad_norm": 0.5609616041183472, "learning_rate": 0.0001, "loss": 1.5538, "step": 10959 }, { "epoch": 1.2589742117052438, "grad_norm": 0.5940161347389221, "learning_rate": 0.0001, "loss": 1.631, "step": 10960 }, { "epoch": 1.259089081615071, "grad_norm": 0.6487417817115784, "learning_rate": 0.0001, "loss": 1.6715, "step": 10961 }, { "epoch": 1.259203951524898, "grad_norm": 0.5806244015693665, "learning_rate": 0.0001, "loss": 1.3649, "step": 10962 }, { "epoch": 1.259318821434725, "grad_norm": 0.5590049624443054, "learning_rate": 0.0001, "loss": 1.3849, "step": 10963 }, { "epoch": 1.2594336913445523, "grad_norm": 0.57107013463974, "learning_rate": 0.0001, "loss": 1.3517, "step": 10964 }, { "epoch": 1.2595485612543795, "grad_norm": 0.5718011260032654, "learning_rate": 0.0001, "loss": 1.3671, "step": 10965 }, { "epoch": 1.2596634311642065, "grad_norm": 0.530774712562561, "learning_rate": 0.0001, "loss": 1.3329, "step": 10966 }, { "epoch": 1.2597783010740335, "grad_norm": 0.5350990891456604, "learning_rate": 0.0001, "loss": 1.3828, "step": 10967 }, { "epoch": 1.2598931709838608, "grad_norm": 0.5452684760093689, "learning_rate": 0.0001, "loss": 1.46, "step": 10968 }, { "epoch": 1.260008040893688, "grad_norm": 0.5853266716003418, "learning_rate": 0.0001, "loss": 1.6132, "step": 10969 }, { "epoch": 1.260122910803515, "grad_norm": 0.6207892298698425, "learning_rate": 0.0001, "loss": 1.5122, "step": 10970 }, { "epoch": 1.260237780713342, "grad_norm": 0.6362786293029785, "learning_rate": 0.0001, "loss": 1.5732, "step": 10971 }, { "epoch": 1.2603526506231693, "grad_norm": 0.5912286043167114, "learning_rate": 0.0001, "loss": 1.5615, "step": 10972 }, { "epoch": 1.2604675205329965, "grad_norm": 0.5660232305526733, "learning_rate": 0.0001, "loss": 1.4575, "step": 10973 }, { "epoch": 1.2605823904428235, "grad_norm": 0.5991741418838501, "learning_rate": 0.0001, "loss": 1.4294, "step": 10974 }, { "epoch": 1.2606972603526505, "grad_norm": 0.6811667680740356, "learning_rate": 0.0001, "loss": 1.2061, "step": 10975 }, { "epoch": 1.2608121302624777, "grad_norm": 0.5778743028640747, "learning_rate": 0.0001, "loss": 1.4363, "step": 10976 }, { "epoch": 1.260927000172305, "grad_norm": 0.593360960483551, "learning_rate": 0.0001, "loss": 1.5062, "step": 10977 }, { "epoch": 1.261041870082132, "grad_norm": 0.5722994804382324, "learning_rate": 0.0001, "loss": 1.5407, "step": 10978 }, { "epoch": 1.261156739991959, "grad_norm": 0.5691931843757629, "learning_rate": 0.0001, "loss": 1.539, "step": 10979 }, { "epoch": 1.2612716099017862, "grad_norm": 0.5399587750434875, "learning_rate": 0.0001, "loss": 1.3153, "step": 10980 }, { "epoch": 1.2613864798116134, "grad_norm": 0.5468644499778748, "learning_rate": 0.0001, "loss": 1.4229, "step": 10981 }, { "epoch": 1.2615013497214405, "grad_norm": 0.5714578032493591, "learning_rate": 0.0001, "loss": 1.5986, "step": 10982 }, { "epoch": 1.2616162196312675, "grad_norm": 0.5381194353103638, "learning_rate": 0.0001, "loss": 1.4761, "step": 10983 }, { "epoch": 1.2617310895410947, "grad_norm": 0.5348224639892578, "learning_rate": 0.0001, "loss": 1.328, "step": 10984 }, { "epoch": 1.261845959450922, "grad_norm": 0.570966899394989, "learning_rate": 0.0001, "loss": 1.5432, "step": 10985 }, { "epoch": 1.261960829360749, "grad_norm": 0.5382642149925232, "learning_rate": 0.0001, "loss": 1.4547, "step": 10986 }, { "epoch": 1.262075699270576, "grad_norm": 0.5181246995925903, "learning_rate": 0.0001, "loss": 1.3591, "step": 10987 }, { "epoch": 1.2621905691804032, "grad_norm": 0.5006334781646729, "learning_rate": 0.0001, "loss": 1.2591, "step": 10988 }, { "epoch": 1.2623054390902304, "grad_norm": 0.5715638995170593, "learning_rate": 0.0001, "loss": 1.5684, "step": 10989 }, { "epoch": 1.2624203090000574, "grad_norm": 0.5791033506393433, "learning_rate": 0.0001, "loss": 1.4059, "step": 10990 }, { "epoch": 1.2625351789098844, "grad_norm": 0.5769622325897217, "learning_rate": 0.0001, "loss": 1.474, "step": 10991 }, { "epoch": 1.2626500488197117, "grad_norm": 0.5328226685523987, "learning_rate": 0.0001, "loss": 1.4834, "step": 10992 }, { "epoch": 1.2627649187295389, "grad_norm": 0.5586057901382446, "learning_rate": 0.0001, "loss": 1.3952, "step": 10993 }, { "epoch": 1.262879788639366, "grad_norm": 0.5298300385475159, "learning_rate": 0.0001, "loss": 1.2045, "step": 10994 }, { "epoch": 1.262994658549193, "grad_norm": 0.5825002193450928, "learning_rate": 0.0001, "loss": 1.585, "step": 10995 }, { "epoch": 1.2631095284590201, "grad_norm": 0.5873317122459412, "learning_rate": 0.0001, "loss": 1.5954, "step": 10996 }, { "epoch": 1.2632243983688474, "grad_norm": 0.5621634125709534, "learning_rate": 0.0001, "loss": 1.4467, "step": 10997 }, { "epoch": 1.2633392682786744, "grad_norm": 0.6157379150390625, "learning_rate": 0.0001, "loss": 1.4748, "step": 10998 }, { "epoch": 1.2634541381885014, "grad_norm": 0.5493670105934143, "learning_rate": 0.0001, "loss": 1.3428, "step": 10999 }, { "epoch": 1.2635690080983286, "grad_norm": 0.5555403828620911, "learning_rate": 0.0001, "loss": 1.2846, "step": 11000 }, { "epoch": 1.2636838780081558, "grad_norm": 0.5692359209060669, "learning_rate": 0.0001, "loss": 1.5709, "step": 11001 }, { "epoch": 1.2637987479179829, "grad_norm": 0.5852733850479126, "learning_rate": 0.0001, "loss": 1.5246, "step": 11002 }, { "epoch": 1.26391361782781, "grad_norm": 0.5792815685272217, "learning_rate": 0.0001, "loss": 1.41, "step": 11003 }, { "epoch": 1.264028487737637, "grad_norm": 0.6380968689918518, "learning_rate": 0.0001, "loss": 1.3436, "step": 11004 }, { "epoch": 1.2641433576474643, "grad_norm": 0.6811634302139282, "learning_rate": 0.0001, "loss": 1.5123, "step": 11005 }, { "epoch": 1.2642582275572913, "grad_norm": 0.6941441297531128, "learning_rate": 0.0001, "loss": 1.6212, "step": 11006 }, { "epoch": 1.2643730974671186, "grad_norm": 0.5787838697433472, "learning_rate": 0.0001, "loss": 1.4647, "step": 11007 }, { "epoch": 1.2644879673769456, "grad_norm": 0.5790321230888367, "learning_rate": 0.0001, "loss": 1.477, "step": 11008 }, { "epoch": 1.2646028372867728, "grad_norm": 0.55388343334198, "learning_rate": 0.0001, "loss": 1.4449, "step": 11009 }, { "epoch": 1.2647177071965998, "grad_norm": 0.5964010953903198, "learning_rate": 0.0001, "loss": 1.3159, "step": 11010 }, { "epoch": 1.264832577106427, "grad_norm": 0.5677118897438049, "learning_rate": 0.0001, "loss": 1.3851, "step": 11011 }, { "epoch": 1.264947447016254, "grad_norm": 0.5453557372093201, "learning_rate": 0.0001, "loss": 1.3942, "step": 11012 }, { "epoch": 1.2650623169260813, "grad_norm": 0.594939649105072, "learning_rate": 0.0001, "loss": 1.6116, "step": 11013 }, { "epoch": 1.2651771868359083, "grad_norm": 0.5862032175064087, "learning_rate": 0.0001, "loss": 1.438, "step": 11014 }, { "epoch": 1.2652920567457355, "grad_norm": 0.6072475910186768, "learning_rate": 0.0001, "loss": 1.4529, "step": 11015 }, { "epoch": 1.2654069266555625, "grad_norm": 0.5791646242141724, "learning_rate": 0.0001, "loss": 1.4486, "step": 11016 }, { "epoch": 1.2655217965653898, "grad_norm": 0.5972772240638733, "learning_rate": 0.0001, "loss": 1.6154, "step": 11017 }, { "epoch": 1.2656366664752168, "grad_norm": 0.5665194988250732, "learning_rate": 0.0001, "loss": 1.2982, "step": 11018 }, { "epoch": 1.265751536385044, "grad_norm": 0.5202224254608154, "learning_rate": 0.0001, "loss": 1.081, "step": 11019 }, { "epoch": 1.265866406294871, "grad_norm": 0.5774416327476501, "learning_rate": 0.0001, "loss": 1.5648, "step": 11020 }, { "epoch": 1.2659812762046982, "grad_norm": 0.6238007545471191, "learning_rate": 0.0001, "loss": 1.5547, "step": 11021 }, { "epoch": 1.2660961461145253, "grad_norm": 0.6121378540992737, "learning_rate": 0.0001, "loss": 1.3435, "step": 11022 }, { "epoch": 1.2662110160243525, "grad_norm": 0.6211857795715332, "learning_rate": 0.0001, "loss": 1.5317, "step": 11023 }, { "epoch": 1.2663258859341795, "grad_norm": 0.6037236452102661, "learning_rate": 0.0001, "loss": 1.5751, "step": 11024 }, { "epoch": 1.2664407558440067, "grad_norm": 0.5778083801269531, "learning_rate": 0.0001, "loss": 1.4495, "step": 11025 }, { "epoch": 1.2665556257538337, "grad_norm": 0.5363320112228394, "learning_rate": 0.0001, "loss": 1.3852, "step": 11026 }, { "epoch": 1.266670495663661, "grad_norm": 0.5823166966438293, "learning_rate": 0.0001, "loss": 1.5257, "step": 11027 }, { "epoch": 1.266785365573488, "grad_norm": 0.5804757475852966, "learning_rate": 0.0001, "loss": 1.5805, "step": 11028 }, { "epoch": 1.2669002354833152, "grad_norm": 0.5479066967964172, "learning_rate": 0.0001, "loss": 1.3971, "step": 11029 }, { "epoch": 1.2670151053931422, "grad_norm": 0.5440239310264587, "learning_rate": 0.0001, "loss": 1.6347, "step": 11030 }, { "epoch": 1.2671299753029694, "grad_norm": 0.5420060157775879, "learning_rate": 0.0001, "loss": 1.4851, "step": 11031 }, { "epoch": 1.2672448452127965, "grad_norm": 0.551177442073822, "learning_rate": 0.0001, "loss": 1.3622, "step": 11032 }, { "epoch": 1.2673597151226237, "grad_norm": 0.6188027262687683, "learning_rate": 0.0001, "loss": 1.4396, "step": 11033 }, { "epoch": 1.2674745850324507, "grad_norm": 0.5277479887008667, "learning_rate": 0.0001, "loss": 1.4829, "step": 11034 }, { "epoch": 1.267589454942278, "grad_norm": 0.5837298631668091, "learning_rate": 0.0001, "loss": 1.5163, "step": 11035 }, { "epoch": 1.267704324852105, "grad_norm": 0.5999343991279602, "learning_rate": 0.0001, "loss": 1.6206, "step": 11036 }, { "epoch": 1.2678191947619322, "grad_norm": 0.5406692624092102, "learning_rate": 0.0001, "loss": 1.3183, "step": 11037 }, { "epoch": 1.2679340646717592, "grad_norm": 0.5225005745887756, "learning_rate": 0.0001, "loss": 1.3585, "step": 11038 }, { "epoch": 1.2680489345815864, "grad_norm": 0.5517762899398804, "learning_rate": 0.0001, "loss": 1.4815, "step": 11039 }, { "epoch": 1.2681638044914134, "grad_norm": 0.5250986218452454, "learning_rate": 0.0001, "loss": 1.4083, "step": 11040 }, { "epoch": 1.2682786744012406, "grad_norm": 0.5599402189254761, "learning_rate": 0.0001, "loss": 1.5532, "step": 11041 }, { "epoch": 1.2683935443110677, "grad_norm": 0.6003100872039795, "learning_rate": 0.0001, "loss": 1.3209, "step": 11042 }, { "epoch": 1.2685084142208949, "grad_norm": 0.6202194690704346, "learning_rate": 0.0001, "loss": 1.3633, "step": 11043 }, { "epoch": 1.268623284130722, "grad_norm": 0.5758548974990845, "learning_rate": 0.0001, "loss": 1.4419, "step": 11044 }, { "epoch": 1.2687381540405491, "grad_norm": 0.6529328227043152, "learning_rate": 0.0001, "loss": 1.5932, "step": 11045 }, { "epoch": 1.2688530239503761, "grad_norm": 0.617214560508728, "learning_rate": 0.0001, "loss": 1.6211, "step": 11046 }, { "epoch": 1.2689678938602034, "grad_norm": 0.5643380880355835, "learning_rate": 0.0001, "loss": 1.5033, "step": 11047 }, { "epoch": 1.2690827637700304, "grad_norm": 0.5501247048377991, "learning_rate": 0.0001, "loss": 1.5196, "step": 11048 }, { "epoch": 1.2691976336798576, "grad_norm": 0.5517723560333252, "learning_rate": 0.0001, "loss": 1.2386, "step": 11049 }, { "epoch": 1.2693125035896846, "grad_norm": 0.6187138557434082, "learning_rate": 0.0001, "loss": 1.5318, "step": 11050 }, { "epoch": 1.2694273734995118, "grad_norm": 0.5243611931800842, "learning_rate": 0.0001, "loss": 1.2427, "step": 11051 }, { "epoch": 1.2695422434093389, "grad_norm": 0.5593788623809814, "learning_rate": 0.0001, "loss": 1.3313, "step": 11052 }, { "epoch": 1.269657113319166, "grad_norm": 0.6037552356719971, "learning_rate": 0.0001, "loss": 1.2074, "step": 11053 }, { "epoch": 1.269771983228993, "grad_norm": 0.5670240521430969, "learning_rate": 0.0001, "loss": 1.327, "step": 11054 }, { "epoch": 1.2698868531388203, "grad_norm": 0.6029371023178101, "learning_rate": 0.0001, "loss": 1.6885, "step": 11055 }, { "epoch": 1.2700017230486473, "grad_norm": 0.6077609062194824, "learning_rate": 0.0001, "loss": 1.5042, "step": 11056 }, { "epoch": 1.2701165929584746, "grad_norm": 0.5685033202171326, "learning_rate": 0.0001, "loss": 1.5165, "step": 11057 }, { "epoch": 1.2702314628683016, "grad_norm": 0.6237457990646362, "learning_rate": 0.0001, "loss": 1.5755, "step": 11058 }, { "epoch": 1.2703463327781288, "grad_norm": 0.5337412357330322, "learning_rate": 0.0001, "loss": 1.2787, "step": 11059 }, { "epoch": 1.2704612026879558, "grad_norm": 0.5789129137992859, "learning_rate": 0.0001, "loss": 1.5369, "step": 11060 }, { "epoch": 1.270576072597783, "grad_norm": 0.6278301477432251, "learning_rate": 0.0001, "loss": 1.6136, "step": 11061 }, { "epoch": 1.27069094250761, "grad_norm": 0.5445199608802795, "learning_rate": 0.0001, "loss": 1.4081, "step": 11062 }, { "epoch": 1.2708058124174373, "grad_norm": 0.5890607833862305, "learning_rate": 0.0001, "loss": 1.6572, "step": 11063 }, { "epoch": 1.2709206823272643, "grad_norm": 0.5365835428237915, "learning_rate": 0.0001, "loss": 1.4548, "step": 11064 }, { "epoch": 1.2710355522370915, "grad_norm": 0.5734820365905762, "learning_rate": 0.0001, "loss": 1.6119, "step": 11065 }, { "epoch": 1.2711504221469185, "grad_norm": 0.5551184415817261, "learning_rate": 0.0001, "loss": 1.4606, "step": 11066 }, { "epoch": 1.2712652920567458, "grad_norm": 0.5175613760948181, "learning_rate": 0.0001, "loss": 1.4638, "step": 11067 }, { "epoch": 1.2713801619665728, "grad_norm": 0.5665346384048462, "learning_rate": 0.0001, "loss": 1.3524, "step": 11068 }, { "epoch": 1.2714950318764, "grad_norm": 0.6107744574546814, "learning_rate": 0.0001, "loss": 1.528, "step": 11069 }, { "epoch": 1.271609901786227, "grad_norm": 0.5816113948822021, "learning_rate": 0.0001, "loss": 1.4268, "step": 11070 }, { "epoch": 1.2717247716960542, "grad_norm": 0.5861494541168213, "learning_rate": 0.0001, "loss": 1.5562, "step": 11071 }, { "epoch": 1.2718396416058813, "grad_norm": 0.5912983417510986, "learning_rate": 0.0001, "loss": 1.4563, "step": 11072 }, { "epoch": 1.2719545115157085, "grad_norm": 0.5545825362205505, "learning_rate": 0.0001, "loss": 1.2985, "step": 11073 }, { "epoch": 1.2720693814255357, "grad_norm": 0.5643607974052429, "learning_rate": 0.0001, "loss": 1.4998, "step": 11074 }, { "epoch": 1.2721842513353627, "grad_norm": 0.5997006297111511, "learning_rate": 0.0001, "loss": 1.5208, "step": 11075 }, { "epoch": 1.2722991212451897, "grad_norm": 0.5586094260215759, "learning_rate": 0.0001, "loss": 1.453, "step": 11076 }, { "epoch": 1.272413991155017, "grad_norm": 0.5602713227272034, "learning_rate": 0.0001, "loss": 1.4295, "step": 11077 }, { "epoch": 1.2725288610648442, "grad_norm": 0.543787956237793, "learning_rate": 0.0001, "loss": 1.4595, "step": 11078 }, { "epoch": 1.2726437309746712, "grad_norm": 0.5545603036880493, "learning_rate": 0.0001, "loss": 1.5192, "step": 11079 }, { "epoch": 1.2727586008844982, "grad_norm": 0.6172767281532288, "learning_rate": 0.0001, "loss": 1.6377, "step": 11080 }, { "epoch": 1.2728734707943254, "grad_norm": 0.5919816493988037, "learning_rate": 0.0001, "loss": 1.5502, "step": 11081 }, { "epoch": 1.2729883407041527, "grad_norm": 0.5594301819801331, "learning_rate": 0.0001, "loss": 1.5123, "step": 11082 }, { "epoch": 1.2731032106139797, "grad_norm": 0.6125577092170715, "learning_rate": 0.0001, "loss": 1.468, "step": 11083 }, { "epoch": 1.2732180805238067, "grad_norm": 0.5834252834320068, "learning_rate": 0.0001, "loss": 1.5652, "step": 11084 }, { "epoch": 1.273332950433634, "grad_norm": 0.5693926215171814, "learning_rate": 0.0001, "loss": 1.6421, "step": 11085 }, { "epoch": 1.2734478203434612, "grad_norm": 0.5403253436088562, "learning_rate": 0.0001, "loss": 1.4777, "step": 11086 }, { "epoch": 1.2735626902532882, "grad_norm": 0.5751697421073914, "learning_rate": 0.0001, "loss": 1.5373, "step": 11087 }, { "epoch": 1.2736775601631152, "grad_norm": 0.5498262643814087, "learning_rate": 0.0001, "loss": 1.5295, "step": 11088 }, { "epoch": 1.2737924300729424, "grad_norm": 0.5573551654815674, "learning_rate": 0.0001, "loss": 1.5071, "step": 11089 }, { "epoch": 1.2739072999827696, "grad_norm": 0.5851514339447021, "learning_rate": 0.0001, "loss": 1.5744, "step": 11090 }, { "epoch": 1.2740221698925966, "grad_norm": 0.5665884017944336, "learning_rate": 0.0001, "loss": 1.5061, "step": 11091 }, { "epoch": 1.2741370398024237, "grad_norm": 0.5661911368370056, "learning_rate": 0.0001, "loss": 1.4927, "step": 11092 }, { "epoch": 1.2742519097122509, "grad_norm": 0.5676097869873047, "learning_rate": 0.0001, "loss": 1.5236, "step": 11093 }, { "epoch": 1.2743667796220781, "grad_norm": 0.62990403175354, "learning_rate": 0.0001, "loss": 1.6448, "step": 11094 }, { "epoch": 1.2744816495319051, "grad_norm": 0.5617504715919495, "learning_rate": 0.0001, "loss": 1.3303, "step": 11095 }, { "epoch": 1.2745965194417321, "grad_norm": 0.603344738483429, "learning_rate": 0.0001, "loss": 1.3515, "step": 11096 }, { "epoch": 1.2747113893515594, "grad_norm": 0.5757971405982971, "learning_rate": 0.0001, "loss": 1.53, "step": 11097 }, { "epoch": 1.2748262592613866, "grad_norm": 0.5930181741714478, "learning_rate": 0.0001, "loss": 1.4135, "step": 11098 }, { "epoch": 1.2749411291712136, "grad_norm": 0.5848099589347839, "learning_rate": 0.0001, "loss": 1.5375, "step": 11099 }, { "epoch": 1.2750559990810406, "grad_norm": 0.585987389087677, "learning_rate": 0.0001, "loss": 1.5055, "step": 11100 }, { "epoch": 1.2751708689908678, "grad_norm": 0.6561366319656372, "learning_rate": 0.0001, "loss": 1.7621, "step": 11101 }, { "epoch": 1.275285738900695, "grad_norm": 0.6223732233047485, "learning_rate": 0.0001, "loss": 1.5776, "step": 11102 }, { "epoch": 1.275400608810522, "grad_norm": 0.5525573492050171, "learning_rate": 0.0001, "loss": 1.4327, "step": 11103 }, { "epoch": 1.275515478720349, "grad_norm": 0.5294277667999268, "learning_rate": 0.0001, "loss": 1.3882, "step": 11104 }, { "epoch": 1.2756303486301763, "grad_norm": 0.5459890961647034, "learning_rate": 0.0001, "loss": 1.3725, "step": 11105 }, { "epoch": 1.2757452185400036, "grad_norm": 0.5299192667007446, "learning_rate": 0.0001, "loss": 1.4483, "step": 11106 }, { "epoch": 1.2758600884498306, "grad_norm": 0.5696981549263, "learning_rate": 0.0001, "loss": 1.5508, "step": 11107 }, { "epoch": 1.2759749583596576, "grad_norm": 0.5575034022331238, "learning_rate": 0.0001, "loss": 1.2812, "step": 11108 }, { "epoch": 1.2760898282694848, "grad_norm": 0.570679783821106, "learning_rate": 0.0001, "loss": 1.4828, "step": 11109 }, { "epoch": 1.276204698179312, "grad_norm": 0.5604519248008728, "learning_rate": 0.0001, "loss": 1.5221, "step": 11110 }, { "epoch": 1.276319568089139, "grad_norm": 0.5516342520713806, "learning_rate": 0.0001, "loss": 1.4681, "step": 11111 }, { "epoch": 1.276434437998966, "grad_norm": 0.5541993379592896, "learning_rate": 0.0001, "loss": 1.6564, "step": 11112 }, { "epoch": 1.2765493079087933, "grad_norm": 0.6293795108795166, "learning_rate": 0.0001, "loss": 1.6179, "step": 11113 }, { "epoch": 1.2766641778186205, "grad_norm": 0.5466127395629883, "learning_rate": 0.0001, "loss": 1.4961, "step": 11114 }, { "epoch": 1.2767790477284475, "grad_norm": 0.5696339011192322, "learning_rate": 0.0001, "loss": 1.6104, "step": 11115 }, { "epoch": 1.2768939176382745, "grad_norm": 0.5600918531417847, "learning_rate": 0.0001, "loss": 1.3857, "step": 11116 }, { "epoch": 1.2770087875481018, "grad_norm": 0.5669732093811035, "learning_rate": 0.0001, "loss": 1.3401, "step": 11117 }, { "epoch": 1.277123657457929, "grad_norm": 0.5944907665252686, "learning_rate": 0.0001, "loss": 1.4226, "step": 11118 }, { "epoch": 1.277238527367756, "grad_norm": 0.5800361633300781, "learning_rate": 0.0001, "loss": 1.4727, "step": 11119 }, { "epoch": 1.277353397277583, "grad_norm": 0.5681383609771729, "learning_rate": 0.0001, "loss": 1.4153, "step": 11120 }, { "epoch": 1.2774682671874102, "grad_norm": 0.5693331956863403, "learning_rate": 0.0001, "loss": 1.7115, "step": 11121 }, { "epoch": 1.2775831370972375, "grad_norm": 0.5463958978652954, "learning_rate": 0.0001, "loss": 1.5031, "step": 11122 }, { "epoch": 1.2776980070070645, "grad_norm": 0.5928342342376709, "learning_rate": 0.0001, "loss": 1.4823, "step": 11123 }, { "epoch": 1.2778128769168915, "grad_norm": 0.5508242845535278, "learning_rate": 0.0001, "loss": 1.4824, "step": 11124 }, { "epoch": 1.2779277468267187, "grad_norm": 0.5686891078948975, "learning_rate": 0.0001, "loss": 1.6338, "step": 11125 }, { "epoch": 1.278042616736546, "grad_norm": 0.5789403915405273, "learning_rate": 0.0001, "loss": 1.4, "step": 11126 }, { "epoch": 1.278157486646373, "grad_norm": 0.6012910604476929, "learning_rate": 0.0001, "loss": 1.5804, "step": 11127 }, { "epoch": 1.2782723565562, "grad_norm": 0.5584916472434998, "learning_rate": 0.0001, "loss": 1.5045, "step": 11128 }, { "epoch": 1.2783872264660272, "grad_norm": 0.6591640710830688, "learning_rate": 0.0001, "loss": 1.5097, "step": 11129 }, { "epoch": 1.2785020963758544, "grad_norm": 0.611392080783844, "learning_rate": 0.0001, "loss": 1.3733, "step": 11130 }, { "epoch": 1.2786169662856814, "grad_norm": 0.5938810706138611, "learning_rate": 0.0001, "loss": 1.5768, "step": 11131 }, { "epoch": 1.2787318361955085, "grad_norm": 0.5487352609634399, "learning_rate": 0.0001, "loss": 1.2881, "step": 11132 }, { "epoch": 1.2788467061053357, "grad_norm": 0.5437043905258179, "learning_rate": 0.0001, "loss": 1.394, "step": 11133 }, { "epoch": 1.278961576015163, "grad_norm": 0.6171312928199768, "learning_rate": 0.0001, "loss": 1.5819, "step": 11134 }, { "epoch": 1.27907644592499, "grad_norm": 0.5747122168540955, "learning_rate": 0.0001, "loss": 1.4663, "step": 11135 }, { "epoch": 1.279191315834817, "grad_norm": 0.5911594033241272, "learning_rate": 0.0001, "loss": 1.4412, "step": 11136 }, { "epoch": 1.2793061857446442, "grad_norm": 0.5997012257575989, "learning_rate": 0.0001, "loss": 1.3998, "step": 11137 }, { "epoch": 1.2794210556544714, "grad_norm": 0.5709753632545471, "learning_rate": 0.0001, "loss": 1.37, "step": 11138 }, { "epoch": 1.2795359255642984, "grad_norm": 0.5752970576286316, "learning_rate": 0.0001, "loss": 1.2787, "step": 11139 }, { "epoch": 1.2796507954741256, "grad_norm": 0.7025997042655945, "learning_rate": 0.0001, "loss": 1.6239, "step": 11140 }, { "epoch": 1.2797656653839526, "grad_norm": 0.5793277025222778, "learning_rate": 0.0001, "loss": 1.4309, "step": 11141 }, { "epoch": 1.2798805352937799, "grad_norm": 0.550739049911499, "learning_rate": 0.0001, "loss": 1.471, "step": 11142 }, { "epoch": 1.2799954052036069, "grad_norm": 0.5863038897514343, "learning_rate": 0.0001, "loss": 1.5955, "step": 11143 }, { "epoch": 1.2801102751134341, "grad_norm": 0.5753504633903503, "learning_rate": 0.0001, "loss": 1.3984, "step": 11144 }, { "epoch": 1.2802251450232611, "grad_norm": 0.566723644733429, "learning_rate": 0.0001, "loss": 1.3528, "step": 11145 }, { "epoch": 1.2803400149330884, "grad_norm": 0.600090503692627, "learning_rate": 0.0001, "loss": 1.5385, "step": 11146 }, { "epoch": 1.2804548848429154, "grad_norm": 0.6001926064491272, "learning_rate": 0.0001, "loss": 1.3591, "step": 11147 }, { "epoch": 1.2805697547527426, "grad_norm": 0.5610960721969604, "learning_rate": 0.0001, "loss": 1.4732, "step": 11148 }, { "epoch": 1.2806846246625696, "grad_norm": 0.5722346305847168, "learning_rate": 0.0001, "loss": 1.5572, "step": 11149 }, { "epoch": 1.2807994945723968, "grad_norm": 0.6043455600738525, "learning_rate": 0.0001, "loss": 1.635, "step": 11150 }, { "epoch": 1.2809143644822238, "grad_norm": 0.6156902313232422, "learning_rate": 0.0001, "loss": 1.6608, "step": 11151 }, { "epoch": 1.281029234392051, "grad_norm": 0.531518280506134, "learning_rate": 0.0001, "loss": 1.4802, "step": 11152 }, { "epoch": 1.281144104301878, "grad_norm": 0.5361231565475464, "learning_rate": 0.0001, "loss": 1.4514, "step": 11153 }, { "epoch": 1.2812589742117053, "grad_norm": 0.6165813207626343, "learning_rate": 0.0001, "loss": 1.73, "step": 11154 }, { "epoch": 1.2813738441215323, "grad_norm": 0.5662755966186523, "learning_rate": 0.0001, "loss": 1.5448, "step": 11155 }, { "epoch": 1.2814887140313596, "grad_norm": 0.5308471322059631, "learning_rate": 0.0001, "loss": 1.3735, "step": 11156 }, { "epoch": 1.2816035839411866, "grad_norm": 0.7272038459777832, "learning_rate": 0.0001, "loss": 1.5976, "step": 11157 }, { "epoch": 1.2817184538510138, "grad_norm": 0.606468141078949, "learning_rate": 0.0001, "loss": 1.4419, "step": 11158 }, { "epoch": 1.2818333237608408, "grad_norm": 0.5892067551612854, "learning_rate": 0.0001, "loss": 1.4458, "step": 11159 }, { "epoch": 1.281948193670668, "grad_norm": 0.5960896015167236, "learning_rate": 0.0001, "loss": 1.4519, "step": 11160 }, { "epoch": 1.282063063580495, "grad_norm": 0.5746586322784424, "learning_rate": 0.0001, "loss": 1.5134, "step": 11161 }, { "epoch": 1.2821779334903223, "grad_norm": 0.5626453757286072, "learning_rate": 0.0001, "loss": 1.3905, "step": 11162 }, { "epoch": 1.2822928034001493, "grad_norm": 0.615481972694397, "learning_rate": 0.0001, "loss": 1.5979, "step": 11163 }, { "epoch": 1.2824076733099765, "grad_norm": 0.56350177526474, "learning_rate": 0.0001, "loss": 1.5846, "step": 11164 }, { "epoch": 1.2825225432198035, "grad_norm": 0.5434756278991699, "learning_rate": 0.0001, "loss": 1.2041, "step": 11165 }, { "epoch": 1.2826374131296308, "grad_norm": 0.5499529838562012, "learning_rate": 0.0001, "loss": 1.3228, "step": 11166 }, { "epoch": 1.2827522830394578, "grad_norm": 0.6841591000556946, "learning_rate": 0.0001, "loss": 1.5102, "step": 11167 }, { "epoch": 1.282867152949285, "grad_norm": 0.5592834949493408, "learning_rate": 0.0001, "loss": 1.5731, "step": 11168 }, { "epoch": 1.282982022859112, "grad_norm": 0.5657499432563782, "learning_rate": 0.0001, "loss": 1.3523, "step": 11169 }, { "epoch": 1.2830968927689392, "grad_norm": 0.7231560945510864, "learning_rate": 0.0001, "loss": 1.4121, "step": 11170 }, { "epoch": 1.2832117626787662, "grad_norm": 0.6643901467323303, "learning_rate": 0.0001, "loss": 1.723, "step": 11171 }, { "epoch": 1.2833266325885935, "grad_norm": 0.5481140613555908, "learning_rate": 0.0001, "loss": 1.4084, "step": 11172 }, { "epoch": 1.2834415024984205, "grad_norm": 0.5619171857833862, "learning_rate": 0.0001, "loss": 1.5306, "step": 11173 }, { "epoch": 1.2835563724082477, "grad_norm": 0.6013187170028687, "learning_rate": 0.0001, "loss": 1.3116, "step": 11174 }, { "epoch": 1.2836712423180747, "grad_norm": 0.5309972167015076, "learning_rate": 0.0001, "loss": 1.3814, "step": 11175 }, { "epoch": 1.283786112227902, "grad_norm": 0.5112704038619995, "learning_rate": 0.0001, "loss": 1.4198, "step": 11176 }, { "epoch": 1.283900982137729, "grad_norm": 0.5378767251968384, "learning_rate": 0.0001, "loss": 1.4499, "step": 11177 }, { "epoch": 1.2840158520475562, "grad_norm": 0.5731784105300903, "learning_rate": 0.0001, "loss": 1.4214, "step": 11178 }, { "epoch": 1.2841307219573832, "grad_norm": 0.520551860332489, "learning_rate": 0.0001, "loss": 1.3355, "step": 11179 }, { "epoch": 1.2842455918672104, "grad_norm": 0.6834689378738403, "learning_rate": 0.0001, "loss": 1.5859, "step": 11180 }, { "epoch": 1.2843604617770374, "grad_norm": 0.6506545543670654, "learning_rate": 0.0001, "loss": 1.5872, "step": 11181 }, { "epoch": 1.2844753316868647, "grad_norm": 0.5278921723365784, "learning_rate": 0.0001, "loss": 1.2855, "step": 11182 }, { "epoch": 1.2845902015966917, "grad_norm": 0.6012974977493286, "learning_rate": 0.0001, "loss": 1.3572, "step": 11183 }, { "epoch": 1.284705071506519, "grad_norm": 0.59954434633255, "learning_rate": 0.0001, "loss": 1.5348, "step": 11184 }, { "epoch": 1.284819941416346, "grad_norm": 0.5746206045150757, "learning_rate": 0.0001, "loss": 1.5353, "step": 11185 }, { "epoch": 1.2849348113261732, "grad_norm": 0.5760904550552368, "learning_rate": 0.0001, "loss": 1.4865, "step": 11186 }, { "epoch": 1.2850496812360002, "grad_norm": 0.6236897110939026, "learning_rate": 0.0001, "loss": 1.446, "step": 11187 }, { "epoch": 1.2851645511458274, "grad_norm": 0.5215889811515808, "learning_rate": 0.0001, "loss": 1.4459, "step": 11188 }, { "epoch": 1.2852794210556544, "grad_norm": 0.5659036040306091, "learning_rate": 0.0001, "loss": 1.5912, "step": 11189 }, { "epoch": 1.2853942909654816, "grad_norm": 0.510826587677002, "learning_rate": 0.0001, "loss": 1.3339, "step": 11190 }, { "epoch": 1.2855091608753086, "grad_norm": 0.5990018844604492, "learning_rate": 0.0001, "loss": 1.673, "step": 11191 }, { "epoch": 1.2856240307851359, "grad_norm": 0.6071823835372925, "learning_rate": 0.0001, "loss": 1.6327, "step": 11192 }, { "epoch": 1.2857389006949629, "grad_norm": 0.6266348958015442, "learning_rate": 0.0001, "loss": 1.7044, "step": 11193 }, { "epoch": 1.2858537706047901, "grad_norm": 0.6025568246841431, "learning_rate": 0.0001, "loss": 1.2896, "step": 11194 }, { "epoch": 1.2859686405146171, "grad_norm": 0.6044636368751526, "learning_rate": 0.0001, "loss": 1.4106, "step": 11195 }, { "epoch": 1.2860835104244444, "grad_norm": 0.7049621939659119, "learning_rate": 0.0001, "loss": 1.8122, "step": 11196 }, { "epoch": 1.2861983803342714, "grad_norm": 0.5462698936462402, "learning_rate": 0.0001, "loss": 1.484, "step": 11197 }, { "epoch": 1.2863132502440986, "grad_norm": 0.5640134811401367, "learning_rate": 0.0001, "loss": 1.5136, "step": 11198 }, { "epoch": 1.2864281201539256, "grad_norm": 0.5560702681541443, "learning_rate": 0.0001, "loss": 1.4912, "step": 11199 }, { "epoch": 1.2865429900637528, "grad_norm": 0.5284111499786377, "learning_rate": 0.0001, "loss": 1.2599, "step": 11200 }, { "epoch": 1.2866578599735798, "grad_norm": 0.5436888933181763, "learning_rate": 0.0001, "loss": 1.3779, "step": 11201 }, { "epoch": 1.286772729883407, "grad_norm": 0.5959984660148621, "learning_rate": 0.0001, "loss": 1.7252, "step": 11202 }, { "epoch": 1.286887599793234, "grad_norm": 0.5684484839439392, "learning_rate": 0.0001, "loss": 1.4591, "step": 11203 }, { "epoch": 1.2870024697030613, "grad_norm": 0.5544134974479675, "learning_rate": 0.0001, "loss": 1.5097, "step": 11204 }, { "epoch": 1.2871173396128883, "grad_norm": 0.5449683666229248, "learning_rate": 0.0001, "loss": 1.4292, "step": 11205 }, { "epoch": 1.2872322095227156, "grad_norm": 0.5800986289978027, "learning_rate": 0.0001, "loss": 1.5465, "step": 11206 }, { "epoch": 1.2873470794325426, "grad_norm": 0.5813232064247131, "learning_rate": 0.0001, "loss": 1.5989, "step": 11207 }, { "epoch": 1.2874619493423698, "grad_norm": 0.5666254758834839, "learning_rate": 0.0001, "loss": 1.5075, "step": 11208 }, { "epoch": 1.2875768192521968, "grad_norm": 0.6252794861793518, "learning_rate": 0.0001, "loss": 1.6814, "step": 11209 }, { "epoch": 1.287691689162024, "grad_norm": 0.5321157574653625, "learning_rate": 0.0001, "loss": 1.3024, "step": 11210 }, { "epoch": 1.2878065590718513, "grad_norm": 0.5458829998970032, "learning_rate": 0.0001, "loss": 1.3577, "step": 11211 }, { "epoch": 1.2879214289816783, "grad_norm": 0.5970067977905273, "learning_rate": 0.0001, "loss": 1.4105, "step": 11212 }, { "epoch": 1.2880362988915053, "grad_norm": 0.5711007118225098, "learning_rate": 0.0001, "loss": 1.4712, "step": 11213 }, { "epoch": 1.2881511688013325, "grad_norm": 0.5796260833740234, "learning_rate": 0.0001, "loss": 1.5902, "step": 11214 }, { "epoch": 1.2882660387111597, "grad_norm": 0.6137135028839111, "learning_rate": 0.0001, "loss": 1.6444, "step": 11215 }, { "epoch": 1.2883809086209868, "grad_norm": 0.6448894739151001, "learning_rate": 0.0001, "loss": 1.4776, "step": 11216 }, { "epoch": 1.2884957785308138, "grad_norm": 0.5258020758628845, "learning_rate": 0.0001, "loss": 1.396, "step": 11217 }, { "epoch": 1.288610648440641, "grad_norm": 0.5507315397262573, "learning_rate": 0.0001, "loss": 1.3656, "step": 11218 }, { "epoch": 1.2887255183504682, "grad_norm": 0.5842088460922241, "learning_rate": 0.0001, "loss": 1.5746, "step": 11219 }, { "epoch": 1.2888403882602952, "grad_norm": 0.6149991154670715, "learning_rate": 0.0001, "loss": 1.5401, "step": 11220 }, { "epoch": 1.2889552581701222, "grad_norm": 0.5865837335586548, "learning_rate": 0.0001, "loss": 1.3607, "step": 11221 }, { "epoch": 1.2890701280799495, "grad_norm": 0.5908994078636169, "learning_rate": 0.0001, "loss": 1.403, "step": 11222 }, { "epoch": 1.2891849979897767, "grad_norm": 0.5459616184234619, "learning_rate": 0.0001, "loss": 1.3524, "step": 11223 }, { "epoch": 1.2892998678996037, "grad_norm": 0.6645961403846741, "learning_rate": 0.0001, "loss": 1.2631, "step": 11224 }, { "epoch": 1.2894147378094307, "grad_norm": 0.6288965344429016, "learning_rate": 0.0001, "loss": 1.5521, "step": 11225 }, { "epoch": 1.289529607719258, "grad_norm": 0.6147708892822266, "learning_rate": 0.0001, "loss": 1.4303, "step": 11226 }, { "epoch": 1.2896444776290852, "grad_norm": 0.6296586394309998, "learning_rate": 0.0001, "loss": 1.5909, "step": 11227 }, { "epoch": 1.2897593475389122, "grad_norm": 0.5921497344970703, "learning_rate": 0.0001, "loss": 1.4274, "step": 11228 }, { "epoch": 1.2898742174487392, "grad_norm": 0.6173571348190308, "learning_rate": 0.0001, "loss": 1.6724, "step": 11229 }, { "epoch": 1.2899890873585664, "grad_norm": 0.6189887523651123, "learning_rate": 0.0001, "loss": 1.3725, "step": 11230 }, { "epoch": 1.2901039572683937, "grad_norm": 0.569510817527771, "learning_rate": 0.0001, "loss": 1.4343, "step": 11231 }, { "epoch": 1.2902188271782207, "grad_norm": 0.622203528881073, "learning_rate": 0.0001, "loss": 1.4382, "step": 11232 }, { "epoch": 1.2903336970880477, "grad_norm": 0.6754775643348694, "learning_rate": 0.0001, "loss": 1.6161, "step": 11233 }, { "epoch": 1.290448566997875, "grad_norm": 0.5515174865722656, "learning_rate": 0.0001, "loss": 1.5448, "step": 11234 }, { "epoch": 1.2905634369077021, "grad_norm": 0.6426066160202026, "learning_rate": 0.0001, "loss": 1.5509, "step": 11235 }, { "epoch": 1.2906783068175292, "grad_norm": 0.6196022033691406, "learning_rate": 0.0001, "loss": 1.5919, "step": 11236 }, { "epoch": 1.2907931767273562, "grad_norm": 0.5864408612251282, "learning_rate": 0.0001, "loss": 1.4135, "step": 11237 }, { "epoch": 1.2909080466371834, "grad_norm": 0.607479989528656, "learning_rate": 0.0001, "loss": 1.5987, "step": 11238 }, { "epoch": 1.2910229165470106, "grad_norm": 0.5427273511886597, "learning_rate": 0.0001, "loss": 1.374, "step": 11239 }, { "epoch": 1.2911377864568376, "grad_norm": 0.5896614789962769, "learning_rate": 0.0001, "loss": 1.5488, "step": 11240 }, { "epoch": 1.2912526563666646, "grad_norm": 0.5615909695625305, "learning_rate": 0.0001, "loss": 1.5587, "step": 11241 }, { "epoch": 1.2913675262764919, "grad_norm": 0.6087133884429932, "learning_rate": 0.0001, "loss": 1.5799, "step": 11242 }, { "epoch": 1.291482396186319, "grad_norm": 0.5094661712646484, "learning_rate": 0.0001, "loss": 1.2658, "step": 11243 }, { "epoch": 1.2915972660961461, "grad_norm": 0.6043559908866882, "learning_rate": 0.0001, "loss": 1.4343, "step": 11244 }, { "epoch": 1.2917121360059731, "grad_norm": 0.586310625076294, "learning_rate": 0.0001, "loss": 1.607, "step": 11245 }, { "epoch": 1.2918270059158004, "grad_norm": 0.5929206013679504, "learning_rate": 0.0001, "loss": 1.2632, "step": 11246 }, { "epoch": 1.2919418758256276, "grad_norm": 0.5981222987174988, "learning_rate": 0.0001, "loss": 1.5347, "step": 11247 }, { "epoch": 1.2920567457354546, "grad_norm": 0.5489922761917114, "learning_rate": 0.0001, "loss": 1.3284, "step": 11248 }, { "epoch": 1.2921716156452816, "grad_norm": 0.5560479164123535, "learning_rate": 0.0001, "loss": 1.2838, "step": 11249 }, { "epoch": 1.2922864855551088, "grad_norm": 0.5626586675643921, "learning_rate": 0.0001, "loss": 1.5256, "step": 11250 }, { "epoch": 1.292401355464936, "grad_norm": 0.5712599158287048, "learning_rate": 0.0001, "loss": 1.598, "step": 11251 }, { "epoch": 1.292516225374763, "grad_norm": 0.5926998257637024, "learning_rate": 0.0001, "loss": 1.4374, "step": 11252 }, { "epoch": 1.29263109528459, "grad_norm": 0.5249360799789429, "learning_rate": 0.0001, "loss": 1.2447, "step": 11253 }, { "epoch": 1.2927459651944173, "grad_norm": 0.5579542517662048, "learning_rate": 0.0001, "loss": 1.3727, "step": 11254 }, { "epoch": 1.2928608351042445, "grad_norm": 0.5543383955955505, "learning_rate": 0.0001, "loss": 1.3233, "step": 11255 }, { "epoch": 1.2929757050140716, "grad_norm": 0.600277304649353, "learning_rate": 0.0001, "loss": 1.5099, "step": 11256 }, { "epoch": 1.2930905749238986, "grad_norm": 0.5769601464271545, "learning_rate": 0.0001, "loss": 1.1063, "step": 11257 }, { "epoch": 1.2932054448337258, "grad_norm": 0.5800927877426147, "learning_rate": 0.0001, "loss": 1.5813, "step": 11258 }, { "epoch": 1.293320314743553, "grad_norm": 0.5866220593452454, "learning_rate": 0.0001, "loss": 1.4052, "step": 11259 }, { "epoch": 1.29343518465338, "grad_norm": 0.5848959684371948, "learning_rate": 0.0001, "loss": 1.3863, "step": 11260 }, { "epoch": 1.293550054563207, "grad_norm": 0.5541538000106812, "learning_rate": 0.0001, "loss": 1.325, "step": 11261 }, { "epoch": 1.2936649244730343, "grad_norm": 0.6487392783164978, "learning_rate": 0.0001, "loss": 1.4631, "step": 11262 }, { "epoch": 1.2937797943828615, "grad_norm": 0.6783632040023804, "learning_rate": 0.0001, "loss": 1.7223, "step": 11263 }, { "epoch": 1.2938946642926885, "grad_norm": 0.5799053907394409, "learning_rate": 0.0001, "loss": 1.6532, "step": 11264 }, { "epoch": 1.2940095342025155, "grad_norm": 0.5588035583496094, "learning_rate": 0.0001, "loss": 1.3215, "step": 11265 }, { "epoch": 1.2941244041123428, "grad_norm": 0.5953567028045654, "learning_rate": 0.0001, "loss": 1.6348, "step": 11266 }, { "epoch": 1.29423927402217, "grad_norm": 0.5587005615234375, "learning_rate": 0.0001, "loss": 1.5139, "step": 11267 }, { "epoch": 1.294354143931997, "grad_norm": 0.6000686287879944, "learning_rate": 0.0001, "loss": 1.3503, "step": 11268 }, { "epoch": 1.294469013841824, "grad_norm": 0.5800769329071045, "learning_rate": 0.0001, "loss": 1.5284, "step": 11269 }, { "epoch": 1.2945838837516512, "grad_norm": 0.5538628101348877, "learning_rate": 0.0001, "loss": 1.2169, "step": 11270 }, { "epoch": 1.2946987536614785, "grad_norm": 0.5448545217514038, "learning_rate": 0.0001, "loss": 1.5361, "step": 11271 }, { "epoch": 1.2948136235713055, "grad_norm": 0.5209142565727234, "learning_rate": 0.0001, "loss": 1.3355, "step": 11272 }, { "epoch": 1.2949284934811325, "grad_norm": 0.5593119859695435, "learning_rate": 0.0001, "loss": 1.4323, "step": 11273 }, { "epoch": 1.2950433633909597, "grad_norm": 0.5502519607543945, "learning_rate": 0.0001, "loss": 1.4902, "step": 11274 }, { "epoch": 1.295158233300787, "grad_norm": 0.5910590887069702, "learning_rate": 0.0001, "loss": 1.4953, "step": 11275 }, { "epoch": 1.295273103210614, "grad_norm": 0.5811395645141602, "learning_rate": 0.0001, "loss": 1.6886, "step": 11276 }, { "epoch": 1.2953879731204412, "grad_norm": 0.582584798336029, "learning_rate": 0.0001, "loss": 1.4012, "step": 11277 }, { "epoch": 1.2955028430302682, "grad_norm": 0.5751772522926331, "learning_rate": 0.0001, "loss": 1.3962, "step": 11278 }, { "epoch": 1.2956177129400954, "grad_norm": 0.5952954888343811, "learning_rate": 0.0001, "loss": 1.5932, "step": 11279 }, { "epoch": 1.2957325828499224, "grad_norm": 0.5689733028411865, "learning_rate": 0.0001, "loss": 1.43, "step": 11280 }, { "epoch": 1.2958474527597497, "grad_norm": 0.5460832715034485, "learning_rate": 0.0001, "loss": 1.4803, "step": 11281 }, { "epoch": 1.2959623226695767, "grad_norm": 0.604256808757782, "learning_rate": 0.0001, "loss": 1.5998, "step": 11282 }, { "epoch": 1.296077192579404, "grad_norm": 0.5800360441207886, "learning_rate": 0.0001, "loss": 1.4641, "step": 11283 }, { "epoch": 1.296192062489231, "grad_norm": 0.529690146446228, "learning_rate": 0.0001, "loss": 1.3187, "step": 11284 }, { "epoch": 1.2963069323990581, "grad_norm": 0.5543114542961121, "learning_rate": 0.0001, "loss": 1.5448, "step": 11285 }, { "epoch": 1.2964218023088852, "grad_norm": 0.5886980295181274, "learning_rate": 0.0001, "loss": 1.5753, "step": 11286 }, { "epoch": 1.2965366722187124, "grad_norm": 0.5955923795700073, "learning_rate": 0.0001, "loss": 1.6415, "step": 11287 }, { "epoch": 1.2966515421285394, "grad_norm": 0.6192274689674377, "learning_rate": 0.0001, "loss": 1.5773, "step": 11288 }, { "epoch": 1.2967664120383666, "grad_norm": 0.5242689251899719, "learning_rate": 0.0001, "loss": 1.3975, "step": 11289 }, { "epoch": 1.2968812819481936, "grad_norm": 0.6103929877281189, "learning_rate": 0.0001, "loss": 1.5521, "step": 11290 }, { "epoch": 1.2969961518580209, "grad_norm": 0.5915534496307373, "learning_rate": 0.0001, "loss": 1.4537, "step": 11291 }, { "epoch": 1.2971110217678479, "grad_norm": 0.5651288032531738, "learning_rate": 0.0001, "loss": 1.2593, "step": 11292 }, { "epoch": 1.297225891677675, "grad_norm": 0.5925031304359436, "learning_rate": 0.0001, "loss": 1.4754, "step": 11293 }, { "epoch": 1.2973407615875021, "grad_norm": 0.5318403840065002, "learning_rate": 0.0001, "loss": 1.3499, "step": 11294 }, { "epoch": 1.2974556314973293, "grad_norm": 0.5483918786048889, "learning_rate": 0.0001, "loss": 1.45, "step": 11295 }, { "epoch": 1.2975705014071564, "grad_norm": 0.561732828617096, "learning_rate": 0.0001, "loss": 1.4525, "step": 11296 }, { "epoch": 1.2976853713169836, "grad_norm": 0.5497708916664124, "learning_rate": 0.0001, "loss": 1.2997, "step": 11297 }, { "epoch": 1.2978002412268106, "grad_norm": 0.5718294382095337, "learning_rate": 0.0001, "loss": 1.5627, "step": 11298 }, { "epoch": 1.2979151111366378, "grad_norm": 0.5569791793823242, "learning_rate": 0.0001, "loss": 1.5249, "step": 11299 }, { "epoch": 1.2980299810464648, "grad_norm": 0.6240097284317017, "learning_rate": 0.0001, "loss": 1.7817, "step": 11300 }, { "epoch": 1.298144850956292, "grad_norm": 0.5455806851387024, "learning_rate": 0.0001, "loss": 1.517, "step": 11301 }, { "epoch": 1.298259720866119, "grad_norm": 0.6025298833847046, "learning_rate": 0.0001, "loss": 1.4755, "step": 11302 }, { "epoch": 1.2983745907759463, "grad_norm": 0.5894249677658081, "learning_rate": 0.0001, "loss": 1.5376, "step": 11303 }, { "epoch": 1.2984894606857733, "grad_norm": 0.5962689518928528, "learning_rate": 0.0001, "loss": 1.6333, "step": 11304 }, { "epoch": 1.2986043305956005, "grad_norm": 0.6713701486587524, "learning_rate": 0.0001, "loss": 1.5085, "step": 11305 }, { "epoch": 1.2987192005054276, "grad_norm": 0.5443199276924133, "learning_rate": 0.0001, "loss": 1.4608, "step": 11306 }, { "epoch": 1.2988340704152548, "grad_norm": 0.5849419236183167, "learning_rate": 0.0001, "loss": 1.573, "step": 11307 }, { "epoch": 1.2989489403250818, "grad_norm": 0.5818788409233093, "learning_rate": 0.0001, "loss": 1.5971, "step": 11308 }, { "epoch": 1.299063810234909, "grad_norm": 0.5864542126655579, "learning_rate": 0.0001, "loss": 1.545, "step": 11309 }, { "epoch": 1.299178680144736, "grad_norm": 0.5585055947303772, "learning_rate": 0.0001, "loss": 1.3616, "step": 11310 }, { "epoch": 1.2992935500545633, "grad_norm": 0.725004255771637, "learning_rate": 0.0001, "loss": 1.7103, "step": 11311 }, { "epoch": 1.2994084199643903, "grad_norm": 0.6212374567985535, "learning_rate": 0.0001, "loss": 1.6987, "step": 11312 }, { "epoch": 1.2995232898742175, "grad_norm": 0.5447000861167908, "learning_rate": 0.0001, "loss": 1.474, "step": 11313 }, { "epoch": 1.2996381597840445, "grad_norm": 0.6027454137802124, "learning_rate": 0.0001, "loss": 1.5891, "step": 11314 }, { "epoch": 1.2997530296938717, "grad_norm": 0.568374514579773, "learning_rate": 0.0001, "loss": 1.5025, "step": 11315 }, { "epoch": 1.2998678996036988, "grad_norm": 0.6000039577484131, "learning_rate": 0.0001, "loss": 1.5641, "step": 11316 }, { "epoch": 1.299982769513526, "grad_norm": 0.5799726247787476, "learning_rate": 0.0001, "loss": 1.416, "step": 11317 }, { "epoch": 1.300097639423353, "grad_norm": 0.5564817190170288, "learning_rate": 0.0001, "loss": 1.3677, "step": 11318 }, { "epoch": 1.3002125093331802, "grad_norm": 0.537472665309906, "learning_rate": 0.0001, "loss": 1.3931, "step": 11319 }, { "epoch": 1.3003273792430072, "grad_norm": 0.577150821685791, "learning_rate": 0.0001, "loss": 1.42, "step": 11320 }, { "epoch": 1.3004422491528345, "grad_norm": 0.572995126247406, "learning_rate": 0.0001, "loss": 1.3415, "step": 11321 }, { "epoch": 1.3005571190626615, "grad_norm": 0.5590890645980835, "learning_rate": 0.0001, "loss": 1.321, "step": 11322 }, { "epoch": 1.3006719889724887, "grad_norm": 0.5591031908988953, "learning_rate": 0.0001, "loss": 1.4025, "step": 11323 }, { "epoch": 1.3007868588823157, "grad_norm": 0.5289561748504639, "learning_rate": 0.0001, "loss": 1.4627, "step": 11324 }, { "epoch": 1.300901728792143, "grad_norm": 0.6011853814125061, "learning_rate": 0.0001, "loss": 1.7713, "step": 11325 }, { "epoch": 1.30101659870197, "grad_norm": 0.5395965576171875, "learning_rate": 0.0001, "loss": 1.4244, "step": 11326 }, { "epoch": 1.3011314686117972, "grad_norm": 0.5598213076591492, "learning_rate": 0.0001, "loss": 1.5376, "step": 11327 }, { "epoch": 1.3012463385216242, "grad_norm": 0.5253582000732422, "learning_rate": 0.0001, "loss": 1.3091, "step": 11328 }, { "epoch": 1.3013612084314514, "grad_norm": 0.57694011926651, "learning_rate": 0.0001, "loss": 1.4816, "step": 11329 }, { "epoch": 1.3014760783412784, "grad_norm": 0.6319670677185059, "learning_rate": 0.0001, "loss": 1.6043, "step": 11330 }, { "epoch": 1.3015909482511057, "grad_norm": 0.6250157356262207, "learning_rate": 0.0001, "loss": 1.5442, "step": 11331 }, { "epoch": 1.3017058181609327, "grad_norm": 0.5917470455169678, "learning_rate": 0.0001, "loss": 1.3269, "step": 11332 }, { "epoch": 1.30182068807076, "grad_norm": 0.546095609664917, "learning_rate": 0.0001, "loss": 1.186, "step": 11333 }, { "epoch": 1.301935557980587, "grad_norm": 0.6022743582725525, "learning_rate": 0.0001, "loss": 1.4801, "step": 11334 }, { "epoch": 1.3020504278904141, "grad_norm": 0.6155171394348145, "learning_rate": 0.0001, "loss": 1.7134, "step": 11335 }, { "epoch": 1.3021652978002412, "grad_norm": 0.6018640995025635, "learning_rate": 0.0001, "loss": 1.6124, "step": 11336 }, { "epoch": 1.3022801677100684, "grad_norm": 0.5561271905899048, "learning_rate": 0.0001, "loss": 1.6348, "step": 11337 }, { "epoch": 1.3023950376198954, "grad_norm": 0.5770154595375061, "learning_rate": 0.0001, "loss": 1.4814, "step": 11338 }, { "epoch": 1.3025099075297226, "grad_norm": 0.6154848337173462, "learning_rate": 0.0001, "loss": 1.3091, "step": 11339 }, { "epoch": 1.3026247774395496, "grad_norm": 0.5868239402770996, "learning_rate": 0.0001, "loss": 1.5571, "step": 11340 }, { "epoch": 1.3027396473493769, "grad_norm": 0.6514554619789124, "learning_rate": 0.0001, "loss": 1.5351, "step": 11341 }, { "epoch": 1.3028545172592039, "grad_norm": 0.60183185338974, "learning_rate": 0.0001, "loss": 1.6098, "step": 11342 }, { "epoch": 1.302969387169031, "grad_norm": 0.5899741649627686, "learning_rate": 0.0001, "loss": 1.4028, "step": 11343 }, { "epoch": 1.303084257078858, "grad_norm": 0.5583611726760864, "learning_rate": 0.0001, "loss": 1.3219, "step": 11344 }, { "epoch": 1.3031991269886853, "grad_norm": 0.5439897179603577, "learning_rate": 0.0001, "loss": 1.4106, "step": 11345 }, { "epoch": 1.3033139968985124, "grad_norm": 0.5689780712127686, "learning_rate": 0.0001, "loss": 1.5358, "step": 11346 }, { "epoch": 1.3034288668083396, "grad_norm": 0.5775777697563171, "learning_rate": 0.0001, "loss": 1.562, "step": 11347 }, { "epoch": 1.3035437367181668, "grad_norm": 0.6249666810035706, "learning_rate": 0.0001, "loss": 1.2675, "step": 11348 }, { "epoch": 1.3036586066279938, "grad_norm": 0.6017664074897766, "learning_rate": 0.0001, "loss": 1.5117, "step": 11349 }, { "epoch": 1.3037734765378208, "grad_norm": 0.5548145174980164, "learning_rate": 0.0001, "loss": 1.4848, "step": 11350 }, { "epoch": 1.303888346447648, "grad_norm": 0.6580796241760254, "learning_rate": 0.0001, "loss": 1.7596, "step": 11351 }, { "epoch": 1.3040032163574753, "grad_norm": 0.5205984711647034, "learning_rate": 0.0001, "loss": 1.3289, "step": 11352 }, { "epoch": 1.3041180862673023, "grad_norm": 0.5620521903038025, "learning_rate": 0.0001, "loss": 1.4247, "step": 11353 }, { "epoch": 1.3042329561771293, "grad_norm": 0.6419723033905029, "learning_rate": 0.0001, "loss": 1.6835, "step": 11354 }, { "epoch": 1.3043478260869565, "grad_norm": 0.5205932855606079, "learning_rate": 0.0001, "loss": 1.3938, "step": 11355 }, { "epoch": 1.3044626959967838, "grad_norm": 0.5548444390296936, "learning_rate": 0.0001, "loss": 1.4959, "step": 11356 }, { "epoch": 1.3045775659066108, "grad_norm": 0.6600873470306396, "learning_rate": 0.0001, "loss": 1.6834, "step": 11357 }, { "epoch": 1.3046924358164378, "grad_norm": 0.5854970812797546, "learning_rate": 0.0001, "loss": 1.4424, "step": 11358 }, { "epoch": 1.304807305726265, "grad_norm": 0.5743281841278076, "learning_rate": 0.0001, "loss": 1.5258, "step": 11359 }, { "epoch": 1.3049221756360923, "grad_norm": 0.5808170437812805, "learning_rate": 0.0001, "loss": 1.4149, "step": 11360 }, { "epoch": 1.3050370455459193, "grad_norm": 0.5460790395736694, "learning_rate": 0.0001, "loss": 1.2765, "step": 11361 }, { "epoch": 1.3051519154557463, "grad_norm": 0.5941900610923767, "learning_rate": 0.0001, "loss": 1.6998, "step": 11362 }, { "epoch": 1.3052667853655735, "grad_norm": 0.5910804271697998, "learning_rate": 0.0001, "loss": 1.6016, "step": 11363 }, { "epoch": 1.3053816552754007, "grad_norm": 0.5705919861793518, "learning_rate": 0.0001, "loss": 1.4823, "step": 11364 }, { "epoch": 1.3054965251852277, "grad_norm": 0.5501276850700378, "learning_rate": 0.0001, "loss": 1.4298, "step": 11365 }, { "epoch": 1.3056113950950547, "grad_norm": 0.5633885860443115, "learning_rate": 0.0001, "loss": 1.5666, "step": 11366 }, { "epoch": 1.305726265004882, "grad_norm": 0.6157667636871338, "learning_rate": 0.0001, "loss": 1.4968, "step": 11367 }, { "epoch": 1.3058411349147092, "grad_norm": 0.5917145609855652, "learning_rate": 0.0001, "loss": 1.3983, "step": 11368 }, { "epoch": 1.3059560048245362, "grad_norm": 0.6143923997879028, "learning_rate": 0.0001, "loss": 1.172, "step": 11369 }, { "epoch": 1.3060708747343632, "grad_norm": 0.5878526568412781, "learning_rate": 0.0001, "loss": 1.3639, "step": 11370 }, { "epoch": 1.3061857446441905, "grad_norm": 0.5434131026268005, "learning_rate": 0.0001, "loss": 1.4643, "step": 11371 }, { "epoch": 1.3063006145540177, "grad_norm": 0.5768989324569702, "learning_rate": 0.0001, "loss": 1.4185, "step": 11372 }, { "epoch": 1.3064154844638447, "grad_norm": 0.5958837270736694, "learning_rate": 0.0001, "loss": 1.4573, "step": 11373 }, { "epoch": 1.3065303543736717, "grad_norm": 0.5841070413589478, "learning_rate": 0.0001, "loss": 1.5135, "step": 11374 }, { "epoch": 1.306645224283499, "grad_norm": 0.5326519012451172, "learning_rate": 0.0001, "loss": 1.1777, "step": 11375 }, { "epoch": 1.3067600941933262, "grad_norm": 0.640945553779602, "learning_rate": 0.0001, "loss": 1.5267, "step": 11376 }, { "epoch": 1.3068749641031532, "grad_norm": 0.546720564365387, "learning_rate": 0.0001, "loss": 1.2742, "step": 11377 }, { "epoch": 1.3069898340129802, "grad_norm": 0.546595573425293, "learning_rate": 0.0001, "loss": 1.1787, "step": 11378 }, { "epoch": 1.3071047039228074, "grad_norm": 0.6050646901130676, "learning_rate": 0.0001, "loss": 1.5851, "step": 11379 }, { "epoch": 1.3072195738326347, "grad_norm": 0.5861079096794128, "learning_rate": 0.0001, "loss": 1.6063, "step": 11380 }, { "epoch": 1.3073344437424617, "grad_norm": 0.6234222650527954, "learning_rate": 0.0001, "loss": 1.5707, "step": 11381 }, { "epoch": 1.3074493136522887, "grad_norm": 0.5523126721382141, "learning_rate": 0.0001, "loss": 1.3926, "step": 11382 }, { "epoch": 1.307564183562116, "grad_norm": 0.6148357391357422, "learning_rate": 0.0001, "loss": 1.7076, "step": 11383 }, { "epoch": 1.3076790534719431, "grad_norm": 0.5839380025863647, "learning_rate": 0.0001, "loss": 1.4059, "step": 11384 }, { "epoch": 1.3077939233817701, "grad_norm": 0.559718132019043, "learning_rate": 0.0001, "loss": 1.4998, "step": 11385 }, { "epoch": 1.3079087932915971, "grad_norm": 0.5904785990715027, "learning_rate": 0.0001, "loss": 1.4762, "step": 11386 }, { "epoch": 1.3080236632014244, "grad_norm": 0.5947718620300293, "learning_rate": 0.0001, "loss": 1.5009, "step": 11387 }, { "epoch": 1.3081385331112516, "grad_norm": 0.5415857434272766, "learning_rate": 0.0001, "loss": 1.1498, "step": 11388 }, { "epoch": 1.3082534030210786, "grad_norm": 0.5549929738044739, "learning_rate": 0.0001, "loss": 1.256, "step": 11389 }, { "epoch": 1.3083682729309056, "grad_norm": 0.5811049938201904, "learning_rate": 0.0001, "loss": 1.4635, "step": 11390 }, { "epoch": 1.3084831428407329, "grad_norm": 0.597584068775177, "learning_rate": 0.0001, "loss": 1.3548, "step": 11391 }, { "epoch": 1.30859801275056, "grad_norm": 0.6055376529693604, "learning_rate": 0.0001, "loss": 1.6609, "step": 11392 }, { "epoch": 1.308712882660387, "grad_norm": 0.5904808044433594, "learning_rate": 0.0001, "loss": 1.6138, "step": 11393 }, { "epoch": 1.308827752570214, "grad_norm": 0.6280949711799622, "learning_rate": 0.0001, "loss": 1.5001, "step": 11394 }, { "epoch": 1.3089426224800413, "grad_norm": 0.6043780446052551, "learning_rate": 0.0001, "loss": 1.5045, "step": 11395 }, { "epoch": 1.3090574923898686, "grad_norm": 0.5859163403511047, "learning_rate": 0.0001, "loss": 1.4466, "step": 11396 }, { "epoch": 1.3091723622996956, "grad_norm": 0.5889775156974792, "learning_rate": 0.0001, "loss": 1.4596, "step": 11397 }, { "epoch": 1.3092872322095226, "grad_norm": 0.5822187066078186, "learning_rate": 0.0001, "loss": 1.4784, "step": 11398 }, { "epoch": 1.3094021021193498, "grad_norm": 0.5596886873245239, "learning_rate": 0.0001, "loss": 1.4668, "step": 11399 }, { "epoch": 1.309516972029177, "grad_norm": 0.6127359867095947, "learning_rate": 0.0001, "loss": 1.4651, "step": 11400 }, { "epoch": 1.309631841939004, "grad_norm": 0.5832725167274475, "learning_rate": 0.0001, "loss": 1.4728, "step": 11401 }, { "epoch": 1.309746711848831, "grad_norm": 0.5246071219444275, "learning_rate": 0.0001, "loss": 1.3994, "step": 11402 }, { "epoch": 1.3098615817586583, "grad_norm": 0.5589280724525452, "learning_rate": 0.0001, "loss": 1.3416, "step": 11403 }, { "epoch": 1.3099764516684855, "grad_norm": 0.5817440748214722, "learning_rate": 0.0001, "loss": 1.357, "step": 11404 }, { "epoch": 1.3100913215783125, "grad_norm": 0.5823844075202942, "learning_rate": 0.0001, "loss": 1.5579, "step": 11405 }, { "epoch": 1.3102061914881395, "grad_norm": 0.590492308139801, "learning_rate": 0.0001, "loss": 1.3188, "step": 11406 }, { "epoch": 1.3103210613979668, "grad_norm": 0.5652885437011719, "learning_rate": 0.0001, "loss": 1.3556, "step": 11407 }, { "epoch": 1.310435931307794, "grad_norm": 0.5821486711502075, "learning_rate": 0.0001, "loss": 1.5111, "step": 11408 }, { "epoch": 1.310550801217621, "grad_norm": 0.585404098033905, "learning_rate": 0.0001, "loss": 1.506, "step": 11409 }, { "epoch": 1.310665671127448, "grad_norm": 0.5636382699012756, "learning_rate": 0.0001, "loss": 1.3113, "step": 11410 }, { "epoch": 1.3107805410372753, "grad_norm": 0.5605599880218506, "learning_rate": 0.0001, "loss": 1.3142, "step": 11411 }, { "epoch": 1.3108954109471025, "grad_norm": 0.6051444411277771, "learning_rate": 0.0001, "loss": 1.5978, "step": 11412 }, { "epoch": 1.3110102808569295, "grad_norm": 0.6009870171546936, "learning_rate": 0.0001, "loss": 1.4469, "step": 11413 }, { "epoch": 1.3111251507667567, "grad_norm": 0.5619489550590515, "learning_rate": 0.0001, "loss": 1.5581, "step": 11414 }, { "epoch": 1.3112400206765837, "grad_norm": 0.5233682990074158, "learning_rate": 0.0001, "loss": 1.3234, "step": 11415 }, { "epoch": 1.311354890586411, "grad_norm": 0.643240749835968, "learning_rate": 0.0001, "loss": 1.6655, "step": 11416 }, { "epoch": 1.311469760496238, "grad_norm": 0.633753776550293, "learning_rate": 0.0001, "loss": 1.569, "step": 11417 }, { "epoch": 1.3115846304060652, "grad_norm": 0.6189075708389282, "learning_rate": 0.0001, "loss": 1.6485, "step": 11418 }, { "epoch": 1.3116995003158922, "grad_norm": 0.5767892003059387, "learning_rate": 0.0001, "loss": 1.4837, "step": 11419 }, { "epoch": 1.3118143702257195, "grad_norm": 0.5835198163986206, "learning_rate": 0.0001, "loss": 1.5653, "step": 11420 }, { "epoch": 1.3119292401355465, "grad_norm": 0.5322223901748657, "learning_rate": 0.0001, "loss": 1.4651, "step": 11421 }, { "epoch": 1.3120441100453737, "grad_norm": 0.5825353264808655, "learning_rate": 0.0001, "loss": 1.6025, "step": 11422 }, { "epoch": 1.3121589799552007, "grad_norm": 0.5791360139846802, "learning_rate": 0.0001, "loss": 1.3845, "step": 11423 }, { "epoch": 1.312273849865028, "grad_norm": 0.637932538986206, "learning_rate": 0.0001, "loss": 1.6226, "step": 11424 }, { "epoch": 1.312388719774855, "grad_norm": 0.6340711116790771, "learning_rate": 0.0001, "loss": 1.6321, "step": 11425 }, { "epoch": 1.3125035896846822, "grad_norm": 0.5915501117706299, "learning_rate": 0.0001, "loss": 1.5359, "step": 11426 }, { "epoch": 1.3126184595945092, "grad_norm": 0.6027012467384338, "learning_rate": 0.0001, "loss": 1.502, "step": 11427 }, { "epoch": 1.3127333295043364, "grad_norm": 0.5210409760475159, "learning_rate": 0.0001, "loss": 1.2863, "step": 11428 }, { "epoch": 1.3128481994141634, "grad_norm": 0.5848100185394287, "learning_rate": 0.0001, "loss": 1.4554, "step": 11429 }, { "epoch": 1.3129630693239907, "grad_norm": 0.552432119846344, "learning_rate": 0.0001, "loss": 1.4561, "step": 11430 }, { "epoch": 1.3130779392338177, "grad_norm": 0.5485931038856506, "learning_rate": 0.0001, "loss": 1.2885, "step": 11431 }, { "epoch": 1.313192809143645, "grad_norm": 0.5956612825393677, "learning_rate": 0.0001, "loss": 1.5797, "step": 11432 }, { "epoch": 1.313307679053472, "grad_norm": 0.5833698511123657, "learning_rate": 0.0001, "loss": 1.338, "step": 11433 }, { "epoch": 1.3134225489632991, "grad_norm": 0.6002306342124939, "learning_rate": 0.0001, "loss": 1.4549, "step": 11434 }, { "epoch": 1.3135374188731261, "grad_norm": 0.6598058938980103, "learning_rate": 0.0001, "loss": 1.5414, "step": 11435 }, { "epoch": 1.3136522887829534, "grad_norm": 0.6079245805740356, "learning_rate": 0.0001, "loss": 1.4909, "step": 11436 }, { "epoch": 1.3137671586927804, "grad_norm": 0.5811184048652649, "learning_rate": 0.0001, "loss": 1.4411, "step": 11437 }, { "epoch": 1.3138820286026076, "grad_norm": 0.5552648305892944, "learning_rate": 0.0001, "loss": 1.4865, "step": 11438 }, { "epoch": 1.3139968985124346, "grad_norm": 0.5896119475364685, "learning_rate": 0.0001, "loss": 1.4723, "step": 11439 }, { "epoch": 1.3141117684222619, "grad_norm": 0.5828573703765869, "learning_rate": 0.0001, "loss": 1.5706, "step": 11440 }, { "epoch": 1.3142266383320889, "grad_norm": 0.5936744809150696, "learning_rate": 0.0001, "loss": 1.5319, "step": 11441 }, { "epoch": 1.314341508241916, "grad_norm": 0.672229528427124, "learning_rate": 0.0001, "loss": 1.7404, "step": 11442 }, { "epoch": 1.314456378151743, "grad_norm": 0.5854552388191223, "learning_rate": 0.0001, "loss": 1.4428, "step": 11443 }, { "epoch": 1.3145712480615703, "grad_norm": 0.5884828567504883, "learning_rate": 0.0001, "loss": 1.34, "step": 11444 }, { "epoch": 1.3146861179713973, "grad_norm": 0.6672284007072449, "learning_rate": 0.0001, "loss": 1.3559, "step": 11445 }, { "epoch": 1.3148009878812246, "grad_norm": 0.5890071988105774, "learning_rate": 0.0001, "loss": 1.4041, "step": 11446 }, { "epoch": 1.3149158577910516, "grad_norm": 0.573523998260498, "learning_rate": 0.0001, "loss": 1.2297, "step": 11447 }, { "epoch": 1.3150307277008788, "grad_norm": 0.6235065460205078, "learning_rate": 0.0001, "loss": 1.4743, "step": 11448 }, { "epoch": 1.3151455976107058, "grad_norm": 0.5744754672050476, "learning_rate": 0.0001, "loss": 1.3518, "step": 11449 }, { "epoch": 1.315260467520533, "grad_norm": 0.5936892032623291, "learning_rate": 0.0001, "loss": 1.4815, "step": 11450 }, { "epoch": 1.31537533743036, "grad_norm": 0.5716831684112549, "learning_rate": 0.0001, "loss": 1.4176, "step": 11451 }, { "epoch": 1.3154902073401873, "grad_norm": 0.5791821479797363, "learning_rate": 0.0001, "loss": 1.5311, "step": 11452 }, { "epoch": 1.3156050772500143, "grad_norm": 0.591475248336792, "learning_rate": 0.0001, "loss": 1.6355, "step": 11453 }, { "epoch": 1.3157199471598415, "grad_norm": 0.5741580128669739, "learning_rate": 0.0001, "loss": 1.4579, "step": 11454 }, { "epoch": 1.3158348170696685, "grad_norm": 0.5884466767311096, "learning_rate": 0.0001, "loss": 1.4848, "step": 11455 }, { "epoch": 1.3159496869794958, "grad_norm": 0.5626681447029114, "learning_rate": 0.0001, "loss": 1.3815, "step": 11456 }, { "epoch": 1.3160645568893228, "grad_norm": 0.6762535572052002, "learning_rate": 0.0001, "loss": 1.5877, "step": 11457 }, { "epoch": 1.31617942679915, "grad_norm": 0.5966523885726929, "learning_rate": 0.0001, "loss": 1.566, "step": 11458 }, { "epoch": 1.316294296708977, "grad_norm": 0.5997388958930969, "learning_rate": 0.0001, "loss": 1.5986, "step": 11459 }, { "epoch": 1.3164091666188042, "grad_norm": 0.6016896367073059, "learning_rate": 0.0001, "loss": 1.1723, "step": 11460 }, { "epoch": 1.3165240365286313, "grad_norm": 0.5532729625701904, "learning_rate": 0.0001, "loss": 1.2546, "step": 11461 }, { "epoch": 1.3166389064384585, "grad_norm": 0.5814374685287476, "learning_rate": 0.0001, "loss": 1.4586, "step": 11462 }, { "epoch": 1.3167537763482855, "grad_norm": 0.5885840058326721, "learning_rate": 0.0001, "loss": 1.4208, "step": 11463 }, { "epoch": 1.3168686462581127, "grad_norm": 0.5459346175193787, "learning_rate": 0.0001, "loss": 1.2433, "step": 11464 }, { "epoch": 1.3169835161679397, "grad_norm": 0.633172333240509, "learning_rate": 0.0001, "loss": 1.5716, "step": 11465 }, { "epoch": 1.317098386077767, "grad_norm": 0.5619357228279114, "learning_rate": 0.0001, "loss": 1.3453, "step": 11466 }, { "epoch": 1.317213255987594, "grad_norm": 0.6191542744636536, "learning_rate": 0.0001, "loss": 1.6639, "step": 11467 }, { "epoch": 1.3173281258974212, "grad_norm": 0.6198133230209351, "learning_rate": 0.0001, "loss": 1.3868, "step": 11468 }, { "epoch": 1.3174429958072482, "grad_norm": 0.6131882667541504, "learning_rate": 0.0001, "loss": 1.5273, "step": 11469 }, { "epoch": 1.3175578657170754, "grad_norm": 0.6336104869842529, "learning_rate": 0.0001, "loss": 1.5089, "step": 11470 }, { "epoch": 1.3176727356269025, "grad_norm": 0.5683992505073547, "learning_rate": 0.0001, "loss": 1.6024, "step": 11471 }, { "epoch": 1.3177876055367297, "grad_norm": 0.5928875207901001, "learning_rate": 0.0001, "loss": 1.6299, "step": 11472 }, { "epoch": 1.3179024754465567, "grad_norm": 0.5696129202842712, "learning_rate": 0.0001, "loss": 1.427, "step": 11473 }, { "epoch": 1.318017345356384, "grad_norm": 0.5560370683670044, "learning_rate": 0.0001, "loss": 1.3691, "step": 11474 }, { "epoch": 1.318132215266211, "grad_norm": 0.6248264312744141, "learning_rate": 0.0001, "loss": 1.4913, "step": 11475 }, { "epoch": 1.3182470851760382, "grad_norm": 0.6087796688079834, "learning_rate": 0.0001, "loss": 1.4265, "step": 11476 }, { "epoch": 1.3183619550858652, "grad_norm": 0.5595715641975403, "learning_rate": 0.0001, "loss": 1.4056, "step": 11477 }, { "epoch": 1.3184768249956924, "grad_norm": 0.5776646733283997, "learning_rate": 0.0001, "loss": 1.5342, "step": 11478 }, { "epoch": 1.3185916949055194, "grad_norm": 0.6008871793746948, "learning_rate": 0.0001, "loss": 1.5584, "step": 11479 }, { "epoch": 1.3187065648153466, "grad_norm": 0.6645510792732239, "learning_rate": 0.0001, "loss": 1.7724, "step": 11480 }, { "epoch": 1.3188214347251737, "grad_norm": 0.5654270052909851, "learning_rate": 0.0001, "loss": 1.3849, "step": 11481 }, { "epoch": 1.318936304635001, "grad_norm": 0.6282428503036499, "learning_rate": 0.0001, "loss": 1.6624, "step": 11482 }, { "epoch": 1.319051174544828, "grad_norm": 0.5395941138267517, "learning_rate": 0.0001, "loss": 1.4415, "step": 11483 }, { "epoch": 1.3191660444546551, "grad_norm": 0.6053428649902344, "learning_rate": 0.0001, "loss": 1.4817, "step": 11484 }, { "epoch": 1.3192809143644824, "grad_norm": 0.6166926622390747, "learning_rate": 0.0001, "loss": 1.6258, "step": 11485 }, { "epoch": 1.3193957842743094, "grad_norm": 0.5764713883399963, "learning_rate": 0.0001, "loss": 1.2562, "step": 11486 }, { "epoch": 1.3195106541841364, "grad_norm": 0.5882161855697632, "learning_rate": 0.0001, "loss": 1.5924, "step": 11487 }, { "epoch": 1.3196255240939636, "grad_norm": 0.5338685512542725, "learning_rate": 0.0001, "loss": 1.3349, "step": 11488 }, { "epoch": 1.3197403940037908, "grad_norm": 0.6274675130844116, "learning_rate": 0.0001, "loss": 1.5144, "step": 11489 }, { "epoch": 1.3198552639136178, "grad_norm": 0.6399273872375488, "learning_rate": 0.0001, "loss": 1.5503, "step": 11490 }, { "epoch": 1.3199701338234449, "grad_norm": 0.5669889450073242, "learning_rate": 0.0001, "loss": 1.4638, "step": 11491 }, { "epoch": 1.320085003733272, "grad_norm": 0.6101594567298889, "learning_rate": 0.0001, "loss": 1.6224, "step": 11492 }, { "epoch": 1.3201998736430993, "grad_norm": 0.5899582505226135, "learning_rate": 0.0001, "loss": 1.5129, "step": 11493 }, { "epoch": 1.3203147435529263, "grad_norm": 0.6240655779838562, "learning_rate": 0.0001, "loss": 1.5927, "step": 11494 }, { "epoch": 1.3204296134627533, "grad_norm": 0.5994274616241455, "learning_rate": 0.0001, "loss": 1.6436, "step": 11495 }, { "epoch": 1.3205444833725806, "grad_norm": 0.5786739587783813, "learning_rate": 0.0001, "loss": 1.5957, "step": 11496 }, { "epoch": 1.3206593532824078, "grad_norm": 0.5860279202461243, "learning_rate": 0.0001, "loss": 1.3348, "step": 11497 }, { "epoch": 1.3207742231922348, "grad_norm": 0.5858715772628784, "learning_rate": 0.0001, "loss": 1.3238, "step": 11498 }, { "epoch": 1.3208890931020618, "grad_norm": 0.6237417459487915, "learning_rate": 0.0001, "loss": 1.7704, "step": 11499 }, { "epoch": 1.321003963011889, "grad_norm": 0.5686200857162476, "learning_rate": 0.0001, "loss": 1.493, "step": 11500 }, { "epoch": 1.3211188329217163, "grad_norm": 0.6311827301979065, "learning_rate": 0.0001, "loss": 1.6549, "step": 11501 }, { "epoch": 1.3212337028315433, "grad_norm": 0.5898725986480713, "learning_rate": 0.0001, "loss": 1.5524, "step": 11502 }, { "epoch": 1.3213485727413703, "grad_norm": 0.543509304523468, "learning_rate": 0.0001, "loss": 1.4023, "step": 11503 }, { "epoch": 1.3214634426511975, "grad_norm": 0.6843796372413635, "learning_rate": 0.0001, "loss": 1.6353, "step": 11504 }, { "epoch": 1.3215783125610248, "grad_norm": 0.5604381561279297, "learning_rate": 0.0001, "loss": 1.4211, "step": 11505 }, { "epoch": 1.3216931824708518, "grad_norm": 0.6264784336090088, "learning_rate": 0.0001, "loss": 1.6592, "step": 11506 }, { "epoch": 1.3218080523806788, "grad_norm": 0.5538937449455261, "learning_rate": 0.0001, "loss": 1.6346, "step": 11507 }, { "epoch": 1.321922922290506, "grad_norm": 0.5639132857322693, "learning_rate": 0.0001, "loss": 1.3514, "step": 11508 }, { "epoch": 1.3220377922003332, "grad_norm": 0.5447933077812195, "learning_rate": 0.0001, "loss": 1.3798, "step": 11509 }, { "epoch": 1.3221526621101602, "grad_norm": 0.5847629308700562, "learning_rate": 0.0001, "loss": 1.5987, "step": 11510 }, { "epoch": 1.3222675320199873, "grad_norm": 0.5697048306465149, "learning_rate": 0.0001, "loss": 1.5463, "step": 11511 }, { "epoch": 1.3223824019298145, "grad_norm": 0.6301547884941101, "learning_rate": 0.0001, "loss": 1.4724, "step": 11512 }, { "epoch": 1.3224972718396417, "grad_norm": 0.5512596368789673, "learning_rate": 0.0001, "loss": 1.5218, "step": 11513 }, { "epoch": 1.3226121417494687, "grad_norm": 0.5693661570549011, "learning_rate": 0.0001, "loss": 1.3942, "step": 11514 }, { "epoch": 1.3227270116592957, "grad_norm": 0.5459357500076294, "learning_rate": 0.0001, "loss": 1.4969, "step": 11515 }, { "epoch": 1.322841881569123, "grad_norm": 0.5842916965484619, "learning_rate": 0.0001, "loss": 1.4668, "step": 11516 }, { "epoch": 1.3229567514789502, "grad_norm": 0.5588902235031128, "learning_rate": 0.0001, "loss": 1.397, "step": 11517 }, { "epoch": 1.3230716213887772, "grad_norm": 0.5641937255859375, "learning_rate": 0.0001, "loss": 1.2773, "step": 11518 }, { "epoch": 1.3231864912986042, "grad_norm": 0.5515016317367554, "learning_rate": 0.0001, "loss": 1.4645, "step": 11519 }, { "epoch": 1.3233013612084314, "grad_norm": 0.6019710898399353, "learning_rate": 0.0001, "loss": 1.4588, "step": 11520 }, { "epoch": 1.3234162311182587, "grad_norm": 0.6003029942512512, "learning_rate": 0.0001, "loss": 1.5908, "step": 11521 }, { "epoch": 1.3235311010280857, "grad_norm": 0.5781972408294678, "learning_rate": 0.0001, "loss": 1.4488, "step": 11522 }, { "epoch": 1.3236459709379127, "grad_norm": 0.597213089466095, "learning_rate": 0.0001, "loss": 1.4833, "step": 11523 }, { "epoch": 1.32376084084774, "grad_norm": 0.6153642535209656, "learning_rate": 0.0001, "loss": 1.5441, "step": 11524 }, { "epoch": 1.3238757107575672, "grad_norm": 0.5358846783638, "learning_rate": 0.0001, "loss": 1.2119, "step": 11525 }, { "epoch": 1.3239905806673942, "grad_norm": 0.6239076852798462, "learning_rate": 0.0001, "loss": 1.5487, "step": 11526 }, { "epoch": 1.3241054505772212, "grad_norm": 0.5684323906898499, "learning_rate": 0.0001, "loss": 1.5562, "step": 11527 }, { "epoch": 1.3242203204870484, "grad_norm": 0.5422741174697876, "learning_rate": 0.0001, "loss": 1.3541, "step": 11528 }, { "epoch": 1.3243351903968756, "grad_norm": 0.5979777574539185, "learning_rate": 0.0001, "loss": 1.5381, "step": 11529 }, { "epoch": 1.3244500603067026, "grad_norm": 0.5492232441902161, "learning_rate": 0.0001, "loss": 1.2141, "step": 11530 }, { "epoch": 1.3245649302165297, "grad_norm": 0.5755845904350281, "learning_rate": 0.0001, "loss": 1.4904, "step": 11531 }, { "epoch": 1.3246798001263569, "grad_norm": 0.5299710035324097, "learning_rate": 0.0001, "loss": 1.3358, "step": 11532 }, { "epoch": 1.3247946700361841, "grad_norm": 0.5345584154129028, "learning_rate": 0.0001, "loss": 1.4269, "step": 11533 }, { "epoch": 1.3249095399460111, "grad_norm": 0.5578746199607849, "learning_rate": 0.0001, "loss": 1.4823, "step": 11534 }, { "epoch": 1.3250244098558381, "grad_norm": 0.6171008944511414, "learning_rate": 0.0001, "loss": 1.5046, "step": 11535 }, { "epoch": 1.3251392797656654, "grad_norm": 0.5888642072677612, "learning_rate": 0.0001, "loss": 1.4443, "step": 11536 }, { "epoch": 1.3252541496754926, "grad_norm": 0.5797832012176514, "learning_rate": 0.0001, "loss": 1.5776, "step": 11537 }, { "epoch": 1.3253690195853196, "grad_norm": 0.5411096215248108, "learning_rate": 0.0001, "loss": 1.2369, "step": 11538 }, { "epoch": 1.3254838894951466, "grad_norm": 0.5486440062522888, "learning_rate": 0.0001, "loss": 1.5438, "step": 11539 }, { "epoch": 1.3255987594049738, "grad_norm": 0.6069177985191345, "learning_rate": 0.0001, "loss": 1.3719, "step": 11540 }, { "epoch": 1.325713629314801, "grad_norm": 0.5846525430679321, "learning_rate": 0.0001, "loss": 1.4982, "step": 11541 }, { "epoch": 1.325828499224628, "grad_norm": 0.5994956493377686, "learning_rate": 0.0001, "loss": 1.6159, "step": 11542 }, { "epoch": 1.325943369134455, "grad_norm": 0.6577488780021667, "learning_rate": 0.0001, "loss": 1.633, "step": 11543 }, { "epoch": 1.3260582390442823, "grad_norm": 0.5551082491874695, "learning_rate": 0.0001, "loss": 1.4653, "step": 11544 }, { "epoch": 1.3261731089541096, "grad_norm": 0.7000524401664734, "learning_rate": 0.0001, "loss": 1.5411, "step": 11545 }, { "epoch": 1.3262879788639366, "grad_norm": 0.5763112902641296, "learning_rate": 0.0001, "loss": 1.5938, "step": 11546 }, { "epoch": 1.3264028487737636, "grad_norm": 0.6176822185516357, "learning_rate": 0.0001, "loss": 1.586, "step": 11547 }, { "epoch": 1.3265177186835908, "grad_norm": 0.5590544939041138, "learning_rate": 0.0001, "loss": 1.3263, "step": 11548 }, { "epoch": 1.326632588593418, "grad_norm": 0.5652593374252319, "learning_rate": 0.0001, "loss": 1.3331, "step": 11549 }, { "epoch": 1.326747458503245, "grad_norm": 0.657227635383606, "learning_rate": 0.0001, "loss": 1.5396, "step": 11550 }, { "epoch": 1.3268623284130723, "grad_norm": 0.62965327501297, "learning_rate": 0.0001, "loss": 1.4074, "step": 11551 }, { "epoch": 1.3269771983228993, "grad_norm": 0.5422511100769043, "learning_rate": 0.0001, "loss": 1.5188, "step": 11552 }, { "epoch": 1.3270920682327265, "grad_norm": 0.5476226806640625, "learning_rate": 0.0001, "loss": 1.4102, "step": 11553 }, { "epoch": 1.3272069381425535, "grad_norm": 0.6080157160758972, "learning_rate": 0.0001, "loss": 1.4651, "step": 11554 }, { "epoch": 1.3273218080523808, "grad_norm": 0.6048029661178589, "learning_rate": 0.0001, "loss": 1.4795, "step": 11555 }, { "epoch": 1.3274366779622078, "grad_norm": 0.5942696332931519, "learning_rate": 0.0001, "loss": 1.4198, "step": 11556 }, { "epoch": 1.327551547872035, "grad_norm": 0.6302083134651184, "learning_rate": 0.0001, "loss": 1.6259, "step": 11557 }, { "epoch": 1.327666417781862, "grad_norm": 0.6311014294624329, "learning_rate": 0.0001, "loss": 1.3774, "step": 11558 }, { "epoch": 1.3277812876916892, "grad_norm": 0.6695850491523743, "learning_rate": 0.0001, "loss": 1.7421, "step": 11559 }, { "epoch": 1.3278961576015162, "grad_norm": 0.5893605947494507, "learning_rate": 0.0001, "loss": 1.5078, "step": 11560 }, { "epoch": 1.3280110275113435, "grad_norm": 0.7168806195259094, "learning_rate": 0.0001, "loss": 1.4677, "step": 11561 }, { "epoch": 1.3281258974211705, "grad_norm": 0.5717088580131531, "learning_rate": 0.0001, "loss": 1.4689, "step": 11562 }, { "epoch": 1.3282407673309977, "grad_norm": 0.5938601493835449, "learning_rate": 0.0001, "loss": 1.489, "step": 11563 }, { "epoch": 1.3283556372408247, "grad_norm": 0.6174132227897644, "learning_rate": 0.0001, "loss": 1.4657, "step": 11564 }, { "epoch": 1.328470507150652, "grad_norm": 0.6349033713340759, "learning_rate": 0.0001, "loss": 1.4751, "step": 11565 }, { "epoch": 1.328585377060479, "grad_norm": 0.5308579206466675, "learning_rate": 0.0001, "loss": 1.4448, "step": 11566 }, { "epoch": 1.3287002469703062, "grad_norm": 0.5638580322265625, "learning_rate": 0.0001, "loss": 1.5103, "step": 11567 }, { "epoch": 1.3288151168801332, "grad_norm": 0.5843721628189087, "learning_rate": 0.0001, "loss": 1.4057, "step": 11568 }, { "epoch": 1.3289299867899604, "grad_norm": 0.5811317563056946, "learning_rate": 0.0001, "loss": 1.6315, "step": 11569 }, { "epoch": 1.3290448566997874, "grad_norm": 0.5734536051750183, "learning_rate": 0.0001, "loss": 1.5023, "step": 11570 }, { "epoch": 1.3291597266096147, "grad_norm": 0.5713332295417786, "learning_rate": 0.0001, "loss": 1.3056, "step": 11571 }, { "epoch": 1.3292745965194417, "grad_norm": 0.570878803730011, "learning_rate": 0.0001, "loss": 1.303, "step": 11572 }, { "epoch": 1.329389466429269, "grad_norm": 0.5729346871376038, "learning_rate": 0.0001, "loss": 1.2979, "step": 11573 }, { "epoch": 1.329504336339096, "grad_norm": 0.5870584845542908, "learning_rate": 0.0001, "loss": 1.4395, "step": 11574 }, { "epoch": 1.3296192062489232, "grad_norm": 0.5481575131416321, "learning_rate": 0.0001, "loss": 1.3214, "step": 11575 }, { "epoch": 1.3297340761587502, "grad_norm": 0.5822715759277344, "learning_rate": 0.0001, "loss": 1.3497, "step": 11576 }, { "epoch": 1.3298489460685774, "grad_norm": 0.5590030550956726, "learning_rate": 0.0001, "loss": 1.3659, "step": 11577 }, { "epoch": 1.3299638159784044, "grad_norm": 0.5739883184432983, "learning_rate": 0.0001, "loss": 1.553, "step": 11578 }, { "epoch": 1.3300786858882316, "grad_norm": 0.5733195543289185, "learning_rate": 0.0001, "loss": 1.6149, "step": 11579 }, { "epoch": 1.3301935557980586, "grad_norm": 0.5605318546295166, "learning_rate": 0.0001, "loss": 1.3656, "step": 11580 }, { "epoch": 1.3303084257078859, "grad_norm": 0.574635922908783, "learning_rate": 0.0001, "loss": 1.3818, "step": 11581 }, { "epoch": 1.3304232956177129, "grad_norm": 0.5863053202629089, "learning_rate": 0.0001, "loss": 1.5685, "step": 11582 }, { "epoch": 1.3305381655275401, "grad_norm": 0.6203168630599976, "learning_rate": 0.0001, "loss": 1.5662, "step": 11583 }, { "epoch": 1.3306530354373671, "grad_norm": 0.5553234815597534, "learning_rate": 0.0001, "loss": 1.2708, "step": 11584 }, { "epoch": 1.3307679053471944, "grad_norm": 0.5634999871253967, "learning_rate": 0.0001, "loss": 1.2638, "step": 11585 }, { "epoch": 1.3308827752570214, "grad_norm": 0.5584697723388672, "learning_rate": 0.0001, "loss": 1.3846, "step": 11586 }, { "epoch": 1.3309976451668486, "grad_norm": 0.5859621167182922, "learning_rate": 0.0001, "loss": 1.4662, "step": 11587 }, { "epoch": 1.3311125150766756, "grad_norm": 0.5909789800643921, "learning_rate": 0.0001, "loss": 1.5097, "step": 11588 }, { "epoch": 1.3312273849865028, "grad_norm": 0.674757182598114, "learning_rate": 0.0001, "loss": 1.6189, "step": 11589 }, { "epoch": 1.3313422548963298, "grad_norm": 0.5663519501686096, "learning_rate": 0.0001, "loss": 1.3768, "step": 11590 }, { "epoch": 1.331457124806157, "grad_norm": 0.5656293630599976, "learning_rate": 0.0001, "loss": 1.3759, "step": 11591 }, { "epoch": 1.331571994715984, "grad_norm": 0.652201771736145, "learning_rate": 0.0001, "loss": 1.6337, "step": 11592 }, { "epoch": 1.3316868646258113, "grad_norm": 0.6089327931404114, "learning_rate": 0.0001, "loss": 1.5429, "step": 11593 }, { "epoch": 1.3318017345356383, "grad_norm": 0.5640665888786316, "learning_rate": 0.0001, "loss": 1.4744, "step": 11594 }, { "epoch": 1.3319166044454656, "grad_norm": 0.5926685333251953, "learning_rate": 0.0001, "loss": 1.6095, "step": 11595 }, { "epoch": 1.3320314743552926, "grad_norm": 0.5491368770599365, "learning_rate": 0.0001, "loss": 1.4922, "step": 11596 }, { "epoch": 1.3321463442651198, "grad_norm": 0.6609952449798584, "learning_rate": 0.0001, "loss": 1.6714, "step": 11597 }, { "epoch": 1.3322612141749468, "grad_norm": 0.6160357594490051, "learning_rate": 0.0001, "loss": 1.5141, "step": 11598 }, { "epoch": 1.332376084084774, "grad_norm": 0.5720334649085999, "learning_rate": 0.0001, "loss": 1.4616, "step": 11599 }, { "epoch": 1.332490953994601, "grad_norm": 0.6039928197860718, "learning_rate": 0.0001, "loss": 1.5472, "step": 11600 }, { "epoch": 1.3326058239044283, "grad_norm": 0.6017964482307434, "learning_rate": 0.0001, "loss": 1.3536, "step": 11601 }, { "epoch": 1.3327206938142553, "grad_norm": 0.5808287858963013, "learning_rate": 0.0001, "loss": 1.5072, "step": 11602 }, { "epoch": 1.3328355637240825, "grad_norm": 0.6469992995262146, "learning_rate": 0.0001, "loss": 1.6845, "step": 11603 }, { "epoch": 1.3329504336339095, "grad_norm": 0.6133192777633667, "learning_rate": 0.0001, "loss": 1.385, "step": 11604 }, { "epoch": 1.3330653035437368, "grad_norm": 0.575235903263092, "learning_rate": 0.0001, "loss": 1.465, "step": 11605 }, { "epoch": 1.3331801734535638, "grad_norm": 0.5712215900421143, "learning_rate": 0.0001, "loss": 1.3618, "step": 11606 }, { "epoch": 1.333295043363391, "grad_norm": 0.5588753819465637, "learning_rate": 0.0001, "loss": 1.4052, "step": 11607 }, { "epoch": 1.333409913273218, "grad_norm": 0.5291929244995117, "learning_rate": 0.0001, "loss": 1.3567, "step": 11608 }, { "epoch": 1.3335247831830452, "grad_norm": 0.5625868439674377, "learning_rate": 0.0001, "loss": 1.3899, "step": 11609 }, { "epoch": 1.3336396530928722, "grad_norm": 0.5754292607307434, "learning_rate": 0.0001, "loss": 1.3608, "step": 11610 }, { "epoch": 1.3337545230026995, "grad_norm": 0.6050917506217957, "learning_rate": 0.0001, "loss": 1.4607, "step": 11611 }, { "epoch": 1.3338693929125265, "grad_norm": 0.5786774754524231, "learning_rate": 0.0001, "loss": 1.6412, "step": 11612 }, { "epoch": 1.3339842628223537, "grad_norm": 0.6983840465545654, "learning_rate": 0.0001, "loss": 1.7368, "step": 11613 }, { "epoch": 1.3340991327321807, "grad_norm": 0.5841389298439026, "learning_rate": 0.0001, "loss": 1.4559, "step": 11614 }, { "epoch": 1.334214002642008, "grad_norm": 0.6447943449020386, "learning_rate": 0.0001, "loss": 1.6709, "step": 11615 }, { "epoch": 1.334328872551835, "grad_norm": 0.5992538332939148, "learning_rate": 0.0001, "loss": 1.4572, "step": 11616 }, { "epoch": 1.3344437424616622, "grad_norm": 0.6163121461868286, "learning_rate": 0.0001, "loss": 1.6213, "step": 11617 }, { "epoch": 1.3345586123714892, "grad_norm": 0.5886964797973633, "learning_rate": 0.0001, "loss": 1.3598, "step": 11618 }, { "epoch": 1.3346734822813164, "grad_norm": 0.5401976704597473, "learning_rate": 0.0001, "loss": 1.3276, "step": 11619 }, { "epoch": 1.3347883521911434, "grad_norm": 0.543461799621582, "learning_rate": 0.0001, "loss": 1.4984, "step": 11620 }, { "epoch": 1.3349032221009707, "grad_norm": 0.6902713179588318, "learning_rate": 0.0001, "loss": 1.5798, "step": 11621 }, { "epoch": 1.335018092010798, "grad_norm": 0.6112170219421387, "learning_rate": 0.0001, "loss": 1.4523, "step": 11622 }, { "epoch": 1.335132961920625, "grad_norm": 0.6658702492713928, "learning_rate": 0.0001, "loss": 1.6396, "step": 11623 }, { "epoch": 1.335247831830452, "grad_norm": 0.5938554406166077, "learning_rate": 0.0001, "loss": 1.5521, "step": 11624 }, { "epoch": 1.3353627017402792, "grad_norm": 0.5592337250709534, "learning_rate": 0.0001, "loss": 1.5078, "step": 11625 }, { "epoch": 1.3354775716501064, "grad_norm": 0.5694896578788757, "learning_rate": 0.0001, "loss": 1.2984, "step": 11626 }, { "epoch": 1.3355924415599334, "grad_norm": 0.6331267356872559, "learning_rate": 0.0001, "loss": 1.4092, "step": 11627 }, { "epoch": 1.3357073114697604, "grad_norm": 0.5441679358482361, "learning_rate": 0.0001, "loss": 1.4533, "step": 11628 }, { "epoch": 1.3358221813795876, "grad_norm": 0.5393032431602478, "learning_rate": 0.0001, "loss": 1.4713, "step": 11629 }, { "epoch": 1.3359370512894149, "grad_norm": 0.5676888227462769, "learning_rate": 0.0001, "loss": 1.428, "step": 11630 }, { "epoch": 1.3360519211992419, "grad_norm": 0.6380576491355896, "learning_rate": 0.0001, "loss": 1.4995, "step": 11631 }, { "epoch": 1.3361667911090689, "grad_norm": 0.6264674663543701, "learning_rate": 0.0001, "loss": 1.6645, "step": 11632 }, { "epoch": 1.3362816610188961, "grad_norm": 0.660614550113678, "learning_rate": 0.0001, "loss": 1.3874, "step": 11633 }, { "epoch": 1.3363965309287233, "grad_norm": 0.5967418551445007, "learning_rate": 0.0001, "loss": 1.4457, "step": 11634 }, { "epoch": 1.3365114008385504, "grad_norm": 0.5896640419960022, "learning_rate": 0.0001, "loss": 1.3916, "step": 11635 }, { "epoch": 1.3366262707483774, "grad_norm": 0.5509446859359741, "learning_rate": 0.0001, "loss": 1.5138, "step": 11636 }, { "epoch": 1.3367411406582046, "grad_norm": 0.5520437955856323, "learning_rate": 0.0001, "loss": 1.4022, "step": 11637 }, { "epoch": 1.3368560105680318, "grad_norm": 0.6015188694000244, "learning_rate": 0.0001, "loss": 1.4852, "step": 11638 }, { "epoch": 1.3369708804778588, "grad_norm": 0.6142217516899109, "learning_rate": 0.0001, "loss": 1.5066, "step": 11639 }, { "epoch": 1.3370857503876858, "grad_norm": 0.5661596059799194, "learning_rate": 0.0001, "loss": 1.3181, "step": 11640 }, { "epoch": 1.337200620297513, "grad_norm": 0.5863280296325684, "learning_rate": 0.0001, "loss": 1.5477, "step": 11641 }, { "epoch": 1.3373154902073403, "grad_norm": 0.6303154826164246, "learning_rate": 0.0001, "loss": 1.437, "step": 11642 }, { "epoch": 1.3374303601171673, "grad_norm": 0.6611382961273193, "learning_rate": 0.0001, "loss": 1.5357, "step": 11643 }, { "epoch": 1.3375452300269943, "grad_norm": 0.5862720012664795, "learning_rate": 0.0001, "loss": 1.4288, "step": 11644 }, { "epoch": 1.3376600999368216, "grad_norm": 0.6255212426185608, "learning_rate": 0.0001, "loss": 1.513, "step": 11645 }, { "epoch": 1.3377749698466488, "grad_norm": 0.6159241199493408, "learning_rate": 0.0001, "loss": 1.5834, "step": 11646 }, { "epoch": 1.3378898397564758, "grad_norm": 0.6486058235168457, "learning_rate": 0.0001, "loss": 1.6822, "step": 11647 }, { "epoch": 1.3380047096663028, "grad_norm": 0.577648401260376, "learning_rate": 0.0001, "loss": 1.5894, "step": 11648 }, { "epoch": 1.33811957957613, "grad_norm": 0.554535448551178, "learning_rate": 0.0001, "loss": 1.328, "step": 11649 }, { "epoch": 1.3382344494859573, "grad_norm": 0.6508966088294983, "learning_rate": 0.0001, "loss": 1.475, "step": 11650 }, { "epoch": 1.3383493193957843, "grad_norm": 0.558700442314148, "learning_rate": 0.0001, "loss": 1.502, "step": 11651 }, { "epoch": 1.3384641893056113, "grad_norm": 0.5516871809959412, "learning_rate": 0.0001, "loss": 1.3675, "step": 11652 }, { "epoch": 1.3385790592154385, "grad_norm": 0.5952174663543701, "learning_rate": 0.0001, "loss": 1.5799, "step": 11653 }, { "epoch": 1.3386939291252657, "grad_norm": 0.5594809651374817, "learning_rate": 0.0001, "loss": 1.2648, "step": 11654 }, { "epoch": 1.3388087990350928, "grad_norm": 0.6401504278182983, "learning_rate": 0.0001, "loss": 1.6669, "step": 11655 }, { "epoch": 1.3389236689449198, "grad_norm": 0.5477997660636902, "learning_rate": 0.0001, "loss": 1.3875, "step": 11656 }, { "epoch": 1.339038538854747, "grad_norm": 0.6509205102920532, "learning_rate": 0.0001, "loss": 1.6173, "step": 11657 }, { "epoch": 1.3391534087645742, "grad_norm": 0.6122426986694336, "learning_rate": 0.0001, "loss": 1.4431, "step": 11658 }, { "epoch": 1.3392682786744012, "grad_norm": 0.6294790506362915, "learning_rate": 0.0001, "loss": 1.3445, "step": 11659 }, { "epoch": 1.3393831485842282, "grad_norm": 0.6401886940002441, "learning_rate": 0.0001, "loss": 1.5471, "step": 11660 }, { "epoch": 1.3394980184940555, "grad_norm": 0.6541320085525513, "learning_rate": 0.0001, "loss": 1.7049, "step": 11661 }, { "epoch": 1.3396128884038827, "grad_norm": 0.6253013014793396, "learning_rate": 0.0001, "loss": 1.4823, "step": 11662 }, { "epoch": 1.3397277583137097, "grad_norm": 0.5853136777877808, "learning_rate": 0.0001, "loss": 1.5519, "step": 11663 }, { "epoch": 1.3398426282235367, "grad_norm": 0.638195276260376, "learning_rate": 0.0001, "loss": 1.4023, "step": 11664 }, { "epoch": 1.339957498133364, "grad_norm": 0.5563250184059143, "learning_rate": 0.0001, "loss": 1.4894, "step": 11665 }, { "epoch": 1.3400723680431912, "grad_norm": 0.6137783527374268, "learning_rate": 0.0001, "loss": 1.6346, "step": 11666 }, { "epoch": 1.3401872379530182, "grad_norm": 0.5672292709350586, "learning_rate": 0.0001, "loss": 1.4835, "step": 11667 }, { "epoch": 1.3403021078628452, "grad_norm": 0.6190178394317627, "learning_rate": 0.0001, "loss": 1.3538, "step": 11668 }, { "epoch": 1.3404169777726724, "grad_norm": 0.5791795253753662, "learning_rate": 0.0001, "loss": 1.5544, "step": 11669 }, { "epoch": 1.3405318476824997, "grad_norm": 0.5504187345504761, "learning_rate": 0.0001, "loss": 1.5983, "step": 11670 }, { "epoch": 1.3406467175923267, "grad_norm": 0.5945659875869751, "learning_rate": 0.0001, "loss": 1.5295, "step": 11671 }, { "epoch": 1.3407615875021537, "grad_norm": 0.6134802103042603, "learning_rate": 0.0001, "loss": 1.5271, "step": 11672 }, { "epoch": 1.340876457411981, "grad_norm": 0.5988081097602844, "learning_rate": 0.0001, "loss": 1.6372, "step": 11673 }, { "epoch": 1.3409913273218081, "grad_norm": 0.5827198028564453, "learning_rate": 0.0001, "loss": 1.5544, "step": 11674 }, { "epoch": 1.3411061972316352, "grad_norm": 0.5701010227203369, "learning_rate": 0.0001, "loss": 1.3642, "step": 11675 }, { "epoch": 1.3412210671414622, "grad_norm": 0.6469464898109436, "learning_rate": 0.0001, "loss": 1.5485, "step": 11676 }, { "epoch": 1.3413359370512894, "grad_norm": 0.5903701782226562, "learning_rate": 0.0001, "loss": 1.5042, "step": 11677 }, { "epoch": 1.3414508069611166, "grad_norm": 0.5560135245323181, "learning_rate": 0.0001, "loss": 1.4042, "step": 11678 }, { "epoch": 1.3415656768709436, "grad_norm": 0.5839285850524902, "learning_rate": 0.0001, "loss": 1.5252, "step": 11679 }, { "epoch": 1.3416805467807706, "grad_norm": 0.6008885502815247, "learning_rate": 0.0001, "loss": 1.7412, "step": 11680 }, { "epoch": 1.3417954166905979, "grad_norm": 0.542387843132019, "learning_rate": 0.0001, "loss": 1.4938, "step": 11681 }, { "epoch": 1.341910286600425, "grad_norm": 0.5818324685096741, "learning_rate": 0.0001, "loss": 1.542, "step": 11682 }, { "epoch": 1.3420251565102521, "grad_norm": 0.5558339953422546, "learning_rate": 0.0001, "loss": 1.3248, "step": 11683 }, { "epoch": 1.3421400264200791, "grad_norm": 0.5538620948791504, "learning_rate": 0.0001, "loss": 1.4956, "step": 11684 }, { "epoch": 1.3422548963299064, "grad_norm": 0.5701099038124084, "learning_rate": 0.0001, "loss": 1.5183, "step": 11685 }, { "epoch": 1.3423697662397336, "grad_norm": 0.5815318822860718, "learning_rate": 0.0001, "loss": 1.4229, "step": 11686 }, { "epoch": 1.3424846361495606, "grad_norm": 0.5730632543563843, "learning_rate": 0.0001, "loss": 1.252, "step": 11687 }, { "epoch": 1.3425995060593878, "grad_norm": 0.587175726890564, "learning_rate": 0.0001, "loss": 1.5821, "step": 11688 }, { "epoch": 1.3427143759692148, "grad_norm": 0.6182733774185181, "learning_rate": 0.0001, "loss": 1.493, "step": 11689 }, { "epoch": 1.342829245879042, "grad_norm": 0.636389434337616, "learning_rate": 0.0001, "loss": 1.4714, "step": 11690 }, { "epoch": 1.342944115788869, "grad_norm": 0.5567188858985901, "learning_rate": 0.0001, "loss": 1.2964, "step": 11691 }, { "epoch": 1.3430589856986963, "grad_norm": 0.7240939140319824, "learning_rate": 0.0001, "loss": 1.4965, "step": 11692 }, { "epoch": 1.3431738556085233, "grad_norm": 0.5807350277900696, "learning_rate": 0.0001, "loss": 1.4122, "step": 11693 }, { "epoch": 1.3432887255183505, "grad_norm": 0.5738909840583801, "learning_rate": 0.0001, "loss": 1.5089, "step": 11694 }, { "epoch": 1.3434035954281776, "grad_norm": 0.6102243661880493, "learning_rate": 0.0001, "loss": 1.501, "step": 11695 }, { "epoch": 1.3435184653380048, "grad_norm": 0.5855363607406616, "learning_rate": 0.0001, "loss": 1.4738, "step": 11696 }, { "epoch": 1.3436333352478318, "grad_norm": 0.6184929609298706, "learning_rate": 0.0001, "loss": 1.3653, "step": 11697 }, { "epoch": 1.343748205157659, "grad_norm": 0.6024360060691833, "learning_rate": 0.0001, "loss": 1.601, "step": 11698 }, { "epoch": 1.343863075067486, "grad_norm": 0.6664621233940125, "learning_rate": 0.0001, "loss": 1.4674, "step": 11699 }, { "epoch": 1.3439779449773133, "grad_norm": 0.59235018491745, "learning_rate": 0.0001, "loss": 1.4277, "step": 11700 }, { "epoch": 1.3440928148871403, "grad_norm": 0.5766885876655579, "learning_rate": 0.0001, "loss": 1.3778, "step": 11701 }, { "epoch": 1.3442076847969675, "grad_norm": 0.6111820936203003, "learning_rate": 0.0001, "loss": 1.5498, "step": 11702 }, { "epoch": 1.3443225547067945, "grad_norm": 0.5713093280792236, "learning_rate": 0.0001, "loss": 1.5413, "step": 11703 }, { "epoch": 1.3444374246166217, "grad_norm": 0.5723089575767517, "learning_rate": 0.0001, "loss": 1.4658, "step": 11704 }, { "epoch": 1.3445522945264488, "grad_norm": 0.5831340551376343, "learning_rate": 0.0001, "loss": 1.5091, "step": 11705 }, { "epoch": 1.344667164436276, "grad_norm": 0.5750026702880859, "learning_rate": 0.0001, "loss": 1.4922, "step": 11706 }, { "epoch": 1.344782034346103, "grad_norm": 0.6101966500282288, "learning_rate": 0.0001, "loss": 1.3965, "step": 11707 }, { "epoch": 1.3448969042559302, "grad_norm": 0.5940831303596497, "learning_rate": 0.0001, "loss": 1.3699, "step": 11708 }, { "epoch": 1.3450117741657572, "grad_norm": 0.5816580057144165, "learning_rate": 0.0001, "loss": 1.2569, "step": 11709 }, { "epoch": 1.3451266440755845, "grad_norm": 0.651119589805603, "learning_rate": 0.0001, "loss": 1.454, "step": 11710 }, { "epoch": 1.3452415139854115, "grad_norm": 0.5532911419868469, "learning_rate": 0.0001, "loss": 1.5302, "step": 11711 }, { "epoch": 1.3453563838952387, "grad_norm": 0.5616252422332764, "learning_rate": 0.0001, "loss": 1.5337, "step": 11712 }, { "epoch": 1.3454712538050657, "grad_norm": 0.6075287461280823, "learning_rate": 0.0001, "loss": 1.5848, "step": 11713 }, { "epoch": 1.345586123714893, "grad_norm": 0.5586172938346863, "learning_rate": 0.0001, "loss": 1.4328, "step": 11714 }, { "epoch": 1.34570099362472, "grad_norm": 0.6126081943511963, "learning_rate": 0.0001, "loss": 1.3231, "step": 11715 }, { "epoch": 1.3458158635345472, "grad_norm": 0.619485080242157, "learning_rate": 0.0001, "loss": 1.5135, "step": 11716 }, { "epoch": 1.3459307334443742, "grad_norm": 0.5817553997039795, "learning_rate": 0.0001, "loss": 1.5455, "step": 11717 }, { "epoch": 1.3460456033542014, "grad_norm": 0.6091306209564209, "learning_rate": 0.0001, "loss": 1.1376, "step": 11718 }, { "epoch": 1.3461604732640284, "grad_norm": 0.6346269845962524, "learning_rate": 0.0001, "loss": 1.5875, "step": 11719 }, { "epoch": 1.3462753431738557, "grad_norm": 0.6019834876060486, "learning_rate": 0.0001, "loss": 1.1867, "step": 11720 }, { "epoch": 1.3463902130836827, "grad_norm": 0.5959239602088928, "learning_rate": 0.0001, "loss": 1.5195, "step": 11721 }, { "epoch": 1.34650508299351, "grad_norm": 0.5814604759216309, "learning_rate": 0.0001, "loss": 1.4918, "step": 11722 }, { "epoch": 1.346619952903337, "grad_norm": 0.5841962695121765, "learning_rate": 0.0001, "loss": 1.553, "step": 11723 }, { "epoch": 1.3467348228131641, "grad_norm": 0.626183032989502, "learning_rate": 0.0001, "loss": 1.4611, "step": 11724 }, { "epoch": 1.3468496927229912, "grad_norm": 0.5390433073043823, "learning_rate": 0.0001, "loss": 1.5152, "step": 11725 }, { "epoch": 1.3469645626328184, "grad_norm": 0.6256446242332458, "learning_rate": 0.0001, "loss": 1.5443, "step": 11726 }, { "epoch": 1.3470794325426454, "grad_norm": 0.5755500793457031, "learning_rate": 0.0001, "loss": 1.4643, "step": 11727 }, { "epoch": 1.3471943024524726, "grad_norm": 0.627149224281311, "learning_rate": 0.0001, "loss": 1.5566, "step": 11728 }, { "epoch": 1.3473091723622996, "grad_norm": 0.5608257055282593, "learning_rate": 0.0001, "loss": 1.4637, "step": 11729 }, { "epoch": 1.3474240422721269, "grad_norm": 0.5481683611869812, "learning_rate": 0.0001, "loss": 1.3497, "step": 11730 }, { "epoch": 1.3475389121819539, "grad_norm": 0.5451379418373108, "learning_rate": 0.0001, "loss": 1.4324, "step": 11731 }, { "epoch": 1.347653782091781, "grad_norm": 0.594100832939148, "learning_rate": 0.0001, "loss": 1.5026, "step": 11732 }, { "epoch": 1.3477686520016081, "grad_norm": 0.5474432706832886, "learning_rate": 0.0001, "loss": 1.4132, "step": 11733 }, { "epoch": 1.3478835219114353, "grad_norm": 0.5873172283172607, "learning_rate": 0.0001, "loss": 1.5379, "step": 11734 }, { "epoch": 1.3479983918212624, "grad_norm": 0.5776464939117432, "learning_rate": 0.0001, "loss": 1.4039, "step": 11735 }, { "epoch": 1.3481132617310896, "grad_norm": 0.5772274732589722, "learning_rate": 0.0001, "loss": 1.2844, "step": 11736 }, { "epoch": 1.3482281316409166, "grad_norm": 0.6043945550918579, "learning_rate": 0.0001, "loss": 1.5532, "step": 11737 }, { "epoch": 1.3483430015507438, "grad_norm": 0.5803393125534058, "learning_rate": 0.0001, "loss": 1.3332, "step": 11738 }, { "epoch": 1.3484578714605708, "grad_norm": 0.5934081673622131, "learning_rate": 0.0001, "loss": 1.4883, "step": 11739 }, { "epoch": 1.348572741370398, "grad_norm": 0.746346652507782, "learning_rate": 0.0001, "loss": 1.4929, "step": 11740 }, { "epoch": 1.348687611280225, "grad_norm": 0.5849841833114624, "learning_rate": 0.0001, "loss": 1.4559, "step": 11741 }, { "epoch": 1.3488024811900523, "grad_norm": 0.54823237657547, "learning_rate": 0.0001, "loss": 1.3391, "step": 11742 }, { "epoch": 1.3489173510998793, "grad_norm": 0.5797605514526367, "learning_rate": 0.0001, "loss": 1.567, "step": 11743 }, { "epoch": 1.3490322210097065, "grad_norm": 0.6208351850509644, "learning_rate": 0.0001, "loss": 1.7252, "step": 11744 }, { "epoch": 1.3491470909195336, "grad_norm": 0.599469780921936, "learning_rate": 0.0001, "loss": 1.4858, "step": 11745 }, { "epoch": 1.3492619608293608, "grad_norm": 0.5936621427536011, "learning_rate": 0.0001, "loss": 1.4009, "step": 11746 }, { "epoch": 1.3493768307391878, "grad_norm": 0.5813693404197693, "learning_rate": 0.0001, "loss": 1.4898, "step": 11747 }, { "epoch": 1.349491700649015, "grad_norm": 0.686309278011322, "learning_rate": 0.0001, "loss": 1.4567, "step": 11748 }, { "epoch": 1.349606570558842, "grad_norm": 0.5535640120506287, "learning_rate": 0.0001, "loss": 1.3404, "step": 11749 }, { "epoch": 1.3497214404686693, "grad_norm": 0.6416987776756287, "learning_rate": 0.0001, "loss": 1.6725, "step": 11750 }, { "epoch": 1.3498363103784963, "grad_norm": 0.5824869275093079, "learning_rate": 0.0001, "loss": 1.3471, "step": 11751 }, { "epoch": 1.3499511802883235, "grad_norm": 0.5436984896659851, "learning_rate": 0.0001, "loss": 1.4902, "step": 11752 }, { "epoch": 1.3500660501981505, "grad_norm": 0.5425761342048645, "learning_rate": 0.0001, "loss": 1.424, "step": 11753 }, { "epoch": 1.3501809201079777, "grad_norm": 0.5270239114761353, "learning_rate": 0.0001, "loss": 1.1909, "step": 11754 }, { "epoch": 1.3502957900178048, "grad_norm": 0.5511675477027893, "learning_rate": 0.0001, "loss": 1.5399, "step": 11755 }, { "epoch": 1.350410659927632, "grad_norm": 0.5438040494918823, "learning_rate": 0.0001, "loss": 1.4739, "step": 11756 }, { "epoch": 1.350525529837459, "grad_norm": 0.5267738699913025, "learning_rate": 0.0001, "loss": 1.4464, "step": 11757 }, { "epoch": 1.3506403997472862, "grad_norm": 0.5484282374382019, "learning_rate": 0.0001, "loss": 1.4149, "step": 11758 }, { "epoch": 1.3507552696571135, "grad_norm": 0.5937207341194153, "learning_rate": 0.0001, "loss": 1.3918, "step": 11759 }, { "epoch": 1.3508701395669405, "grad_norm": 0.5717691779136658, "learning_rate": 0.0001, "loss": 1.392, "step": 11760 }, { "epoch": 1.3509850094767675, "grad_norm": 0.5394260287284851, "learning_rate": 0.0001, "loss": 1.426, "step": 11761 }, { "epoch": 1.3510998793865947, "grad_norm": 0.6104713082313538, "learning_rate": 0.0001, "loss": 1.4614, "step": 11762 }, { "epoch": 1.351214749296422, "grad_norm": 0.5929358601570129, "learning_rate": 0.0001, "loss": 1.4999, "step": 11763 }, { "epoch": 1.351329619206249, "grad_norm": 0.581203818321228, "learning_rate": 0.0001, "loss": 1.4168, "step": 11764 }, { "epoch": 1.351444489116076, "grad_norm": 0.5509240031242371, "learning_rate": 0.0001, "loss": 1.3605, "step": 11765 }, { "epoch": 1.3515593590259032, "grad_norm": 0.5640619993209839, "learning_rate": 0.0001, "loss": 1.4869, "step": 11766 }, { "epoch": 1.3516742289357304, "grad_norm": 0.5916575193405151, "learning_rate": 0.0001, "loss": 1.4334, "step": 11767 }, { "epoch": 1.3517890988455574, "grad_norm": 0.573228120803833, "learning_rate": 0.0001, "loss": 1.4595, "step": 11768 }, { "epoch": 1.3519039687553844, "grad_norm": 0.662548840045929, "learning_rate": 0.0001, "loss": 1.4985, "step": 11769 }, { "epoch": 1.3520188386652117, "grad_norm": 0.5832405090332031, "learning_rate": 0.0001, "loss": 1.3511, "step": 11770 }, { "epoch": 1.352133708575039, "grad_norm": 0.6026256084442139, "learning_rate": 0.0001, "loss": 1.4318, "step": 11771 }, { "epoch": 1.352248578484866, "grad_norm": 0.5654007792472839, "learning_rate": 0.0001, "loss": 1.3918, "step": 11772 }, { "epoch": 1.352363448394693, "grad_norm": 0.6701568961143494, "learning_rate": 0.0001, "loss": 1.6647, "step": 11773 }, { "epoch": 1.3524783183045201, "grad_norm": 0.5737709999084473, "learning_rate": 0.0001, "loss": 1.291, "step": 11774 }, { "epoch": 1.3525931882143474, "grad_norm": 0.6998119950294495, "learning_rate": 0.0001, "loss": 1.7295, "step": 11775 }, { "epoch": 1.3527080581241744, "grad_norm": 0.645850658416748, "learning_rate": 0.0001, "loss": 1.4265, "step": 11776 }, { "epoch": 1.3528229280340014, "grad_norm": 0.5715954899787903, "learning_rate": 0.0001, "loss": 1.282, "step": 11777 }, { "epoch": 1.3529377979438286, "grad_norm": 0.5338268280029297, "learning_rate": 0.0001, "loss": 1.3373, "step": 11778 }, { "epoch": 1.3530526678536559, "grad_norm": 0.5903444290161133, "learning_rate": 0.0001, "loss": 1.4409, "step": 11779 }, { "epoch": 1.3531675377634829, "grad_norm": 0.5709140300750732, "learning_rate": 0.0001, "loss": 1.4397, "step": 11780 }, { "epoch": 1.3532824076733099, "grad_norm": 0.6345384120941162, "learning_rate": 0.0001, "loss": 1.4992, "step": 11781 }, { "epoch": 1.353397277583137, "grad_norm": 0.6136090755462646, "learning_rate": 0.0001, "loss": 1.416, "step": 11782 }, { "epoch": 1.3535121474929643, "grad_norm": 0.6243228316307068, "learning_rate": 0.0001, "loss": 1.4065, "step": 11783 }, { "epoch": 1.3536270174027913, "grad_norm": 0.6512452960014343, "learning_rate": 0.0001, "loss": 1.5381, "step": 11784 }, { "epoch": 1.3537418873126184, "grad_norm": 0.5914151668548584, "learning_rate": 0.0001, "loss": 1.3474, "step": 11785 }, { "epoch": 1.3538567572224456, "grad_norm": 0.5646949410438538, "learning_rate": 0.0001, "loss": 1.4334, "step": 11786 }, { "epoch": 1.3539716271322728, "grad_norm": 0.5643793940544128, "learning_rate": 0.0001, "loss": 1.3393, "step": 11787 }, { "epoch": 1.3540864970420998, "grad_norm": 0.5716279745101929, "learning_rate": 0.0001, "loss": 1.638, "step": 11788 }, { "epoch": 1.3542013669519268, "grad_norm": 0.5285089612007141, "learning_rate": 0.0001, "loss": 1.2825, "step": 11789 }, { "epoch": 1.354316236861754, "grad_norm": 0.5500566363334656, "learning_rate": 0.0001, "loss": 1.5749, "step": 11790 }, { "epoch": 1.3544311067715813, "grad_norm": 0.556171715259552, "learning_rate": 0.0001, "loss": 1.4946, "step": 11791 }, { "epoch": 1.3545459766814083, "grad_norm": 0.62319415807724, "learning_rate": 0.0001, "loss": 1.5568, "step": 11792 }, { "epoch": 1.3546608465912353, "grad_norm": 0.5419564843177795, "learning_rate": 0.0001, "loss": 1.2894, "step": 11793 }, { "epoch": 1.3547757165010625, "grad_norm": 0.581043541431427, "learning_rate": 0.0001, "loss": 1.4787, "step": 11794 }, { "epoch": 1.3548905864108898, "grad_norm": 0.5013282895088196, "learning_rate": 0.0001, "loss": 1.1919, "step": 11795 }, { "epoch": 1.3550054563207168, "grad_norm": 0.6230014562606812, "learning_rate": 0.0001, "loss": 1.6323, "step": 11796 }, { "epoch": 1.3551203262305438, "grad_norm": 0.5700322389602661, "learning_rate": 0.0001, "loss": 1.5027, "step": 11797 }, { "epoch": 1.355235196140371, "grad_norm": 0.5606763362884521, "learning_rate": 0.0001, "loss": 1.4711, "step": 11798 }, { "epoch": 1.3553500660501983, "grad_norm": 0.5901811122894287, "learning_rate": 0.0001, "loss": 1.7085, "step": 11799 }, { "epoch": 1.3554649359600253, "grad_norm": 0.5982941389083862, "learning_rate": 0.0001, "loss": 1.4593, "step": 11800 }, { "epoch": 1.3555798058698523, "grad_norm": 0.5848938822746277, "learning_rate": 0.0001, "loss": 1.466, "step": 11801 }, { "epoch": 1.3556946757796795, "grad_norm": 0.6245958209037781, "learning_rate": 0.0001, "loss": 1.4828, "step": 11802 }, { "epoch": 1.3558095456895067, "grad_norm": 0.6128213405609131, "learning_rate": 0.0001, "loss": 1.5102, "step": 11803 }, { "epoch": 1.3559244155993337, "grad_norm": 0.5603303909301758, "learning_rate": 0.0001, "loss": 1.3408, "step": 11804 }, { "epoch": 1.3560392855091608, "grad_norm": 0.6169303059577942, "learning_rate": 0.0001, "loss": 1.5766, "step": 11805 }, { "epoch": 1.356154155418988, "grad_norm": 0.5828307867050171, "learning_rate": 0.0001, "loss": 1.35, "step": 11806 }, { "epoch": 1.3562690253288152, "grad_norm": 0.6106387972831726, "learning_rate": 0.0001, "loss": 1.4773, "step": 11807 }, { "epoch": 1.3563838952386422, "grad_norm": 0.6269760131835938, "learning_rate": 0.0001, "loss": 1.4883, "step": 11808 }, { "epoch": 1.3564987651484692, "grad_norm": 0.6034385561943054, "learning_rate": 0.0001, "loss": 1.5904, "step": 11809 }, { "epoch": 1.3566136350582965, "grad_norm": 0.5654463768005371, "learning_rate": 0.0001, "loss": 1.548, "step": 11810 }, { "epoch": 1.3567285049681237, "grad_norm": 0.5962846875190735, "learning_rate": 0.0001, "loss": 1.623, "step": 11811 }, { "epoch": 1.3568433748779507, "grad_norm": 0.6377612352371216, "learning_rate": 0.0001, "loss": 1.7367, "step": 11812 }, { "epoch": 1.3569582447877777, "grad_norm": 0.5528261065483093, "learning_rate": 0.0001, "loss": 1.3851, "step": 11813 }, { "epoch": 1.357073114697605, "grad_norm": 0.5887748599052429, "learning_rate": 0.0001, "loss": 1.4795, "step": 11814 }, { "epoch": 1.3571879846074322, "grad_norm": 0.6080367565155029, "learning_rate": 0.0001, "loss": 1.6172, "step": 11815 }, { "epoch": 1.3573028545172592, "grad_norm": 0.5874114632606506, "learning_rate": 0.0001, "loss": 1.3564, "step": 11816 }, { "epoch": 1.3574177244270862, "grad_norm": 0.5622687339782715, "learning_rate": 0.0001, "loss": 1.4871, "step": 11817 }, { "epoch": 1.3575325943369134, "grad_norm": 0.5545808672904968, "learning_rate": 0.0001, "loss": 1.5439, "step": 11818 }, { "epoch": 1.3576474642467407, "grad_norm": 0.5163676738739014, "learning_rate": 0.0001, "loss": 1.3761, "step": 11819 }, { "epoch": 1.3577623341565677, "grad_norm": 0.5534794330596924, "learning_rate": 0.0001, "loss": 1.4561, "step": 11820 }, { "epoch": 1.3578772040663947, "grad_norm": 0.582586407661438, "learning_rate": 0.0001, "loss": 1.7145, "step": 11821 }, { "epoch": 1.357992073976222, "grad_norm": 0.5760107040405273, "learning_rate": 0.0001, "loss": 1.329, "step": 11822 }, { "epoch": 1.3581069438860491, "grad_norm": 0.5939583778381348, "learning_rate": 0.0001, "loss": 1.3137, "step": 11823 }, { "epoch": 1.3582218137958761, "grad_norm": 0.6262675523757935, "learning_rate": 0.0001, "loss": 1.5452, "step": 11824 }, { "epoch": 1.3583366837057034, "grad_norm": 0.5676589012145996, "learning_rate": 0.0001, "loss": 1.525, "step": 11825 }, { "epoch": 1.3584515536155304, "grad_norm": 0.5600330233573914, "learning_rate": 0.0001, "loss": 1.3734, "step": 11826 }, { "epoch": 1.3585664235253576, "grad_norm": 0.6240646243095398, "learning_rate": 0.0001, "loss": 1.5052, "step": 11827 }, { "epoch": 1.3586812934351846, "grad_norm": 0.6038419008255005, "learning_rate": 0.0001, "loss": 1.4135, "step": 11828 }, { "epoch": 1.3587961633450119, "grad_norm": 0.5694374442100525, "learning_rate": 0.0001, "loss": 1.5767, "step": 11829 }, { "epoch": 1.3589110332548389, "grad_norm": 0.5696335434913635, "learning_rate": 0.0001, "loss": 1.3895, "step": 11830 }, { "epoch": 1.359025903164666, "grad_norm": 0.6053335666656494, "learning_rate": 0.0001, "loss": 1.5546, "step": 11831 }, { "epoch": 1.359140773074493, "grad_norm": 0.5585938096046448, "learning_rate": 0.0001, "loss": 1.4728, "step": 11832 }, { "epoch": 1.3592556429843203, "grad_norm": 0.5684964060783386, "learning_rate": 0.0001, "loss": 1.4404, "step": 11833 }, { "epoch": 1.3593705128941473, "grad_norm": 0.5710092782974243, "learning_rate": 0.0001, "loss": 1.5633, "step": 11834 }, { "epoch": 1.3594853828039746, "grad_norm": 0.545071542263031, "learning_rate": 0.0001, "loss": 1.3686, "step": 11835 }, { "epoch": 1.3596002527138016, "grad_norm": 0.5682846307754517, "learning_rate": 0.0001, "loss": 1.4138, "step": 11836 }, { "epoch": 1.3597151226236288, "grad_norm": 0.6198849081993103, "learning_rate": 0.0001, "loss": 1.4155, "step": 11837 }, { "epoch": 1.3598299925334558, "grad_norm": 0.6224766969680786, "learning_rate": 0.0001, "loss": 1.4831, "step": 11838 }, { "epoch": 1.359944862443283, "grad_norm": 0.5848830938339233, "learning_rate": 0.0001, "loss": 1.3544, "step": 11839 }, { "epoch": 1.36005973235311, "grad_norm": 0.6136349439620972, "learning_rate": 0.0001, "loss": 1.5558, "step": 11840 }, { "epoch": 1.3601746022629373, "grad_norm": 0.6168116927146912, "learning_rate": 0.0001, "loss": 1.4871, "step": 11841 }, { "epoch": 1.3602894721727643, "grad_norm": 0.5960310697555542, "learning_rate": 0.0001, "loss": 1.4425, "step": 11842 }, { "epoch": 1.3604043420825915, "grad_norm": 0.731585681438446, "learning_rate": 0.0001, "loss": 1.6861, "step": 11843 }, { "epoch": 1.3605192119924185, "grad_norm": 0.6656792759895325, "learning_rate": 0.0001, "loss": 1.5898, "step": 11844 }, { "epoch": 1.3606340819022458, "grad_norm": 0.5910264849662781, "learning_rate": 0.0001, "loss": 1.3898, "step": 11845 }, { "epoch": 1.3607489518120728, "grad_norm": 0.6095188856124878, "learning_rate": 0.0001, "loss": 1.5024, "step": 11846 }, { "epoch": 1.3608638217219, "grad_norm": 0.5570575594902039, "learning_rate": 0.0001, "loss": 1.3486, "step": 11847 }, { "epoch": 1.360978691631727, "grad_norm": 0.5648567080497742, "learning_rate": 0.0001, "loss": 1.4618, "step": 11848 }, { "epoch": 1.3610935615415543, "grad_norm": 0.5962167978286743, "learning_rate": 0.0001, "loss": 1.5203, "step": 11849 }, { "epoch": 1.3612084314513813, "grad_norm": 0.6409988403320312, "learning_rate": 0.0001, "loss": 1.367, "step": 11850 }, { "epoch": 1.3613233013612085, "grad_norm": 0.6274490356445312, "learning_rate": 0.0001, "loss": 1.4525, "step": 11851 }, { "epoch": 1.3614381712710355, "grad_norm": 0.541801393032074, "learning_rate": 0.0001, "loss": 1.4423, "step": 11852 }, { "epoch": 1.3615530411808627, "grad_norm": 0.6285209655761719, "learning_rate": 0.0001, "loss": 1.5118, "step": 11853 }, { "epoch": 1.3616679110906897, "grad_norm": 0.5430086255073547, "learning_rate": 0.0001, "loss": 1.4768, "step": 11854 }, { "epoch": 1.361782781000517, "grad_norm": 0.5744656324386597, "learning_rate": 0.0001, "loss": 1.3223, "step": 11855 }, { "epoch": 1.361897650910344, "grad_norm": 0.6095870733261108, "learning_rate": 0.0001, "loss": 1.5144, "step": 11856 }, { "epoch": 1.3620125208201712, "grad_norm": 0.6219245791435242, "learning_rate": 0.0001, "loss": 1.5718, "step": 11857 }, { "epoch": 1.3621273907299982, "grad_norm": 0.5849238038063049, "learning_rate": 0.0001, "loss": 1.5043, "step": 11858 }, { "epoch": 1.3622422606398255, "grad_norm": 0.6130156517028809, "learning_rate": 0.0001, "loss": 1.3665, "step": 11859 }, { "epoch": 1.3623571305496525, "grad_norm": 0.6351000070571899, "learning_rate": 0.0001, "loss": 1.5092, "step": 11860 }, { "epoch": 1.3624720004594797, "grad_norm": 0.5915348529815674, "learning_rate": 0.0001, "loss": 1.4129, "step": 11861 }, { "epoch": 1.3625868703693067, "grad_norm": 0.6348772644996643, "learning_rate": 0.0001, "loss": 1.5171, "step": 11862 }, { "epoch": 1.362701740279134, "grad_norm": 0.6160379648208618, "learning_rate": 0.0001, "loss": 1.4824, "step": 11863 }, { "epoch": 1.362816610188961, "grad_norm": 0.6016219258308411, "learning_rate": 0.0001, "loss": 1.4674, "step": 11864 }, { "epoch": 1.3629314800987882, "grad_norm": 0.6062735915184021, "learning_rate": 0.0001, "loss": 1.3464, "step": 11865 }, { "epoch": 1.3630463500086152, "grad_norm": 0.6335691809654236, "learning_rate": 0.0001, "loss": 1.2892, "step": 11866 }, { "epoch": 1.3631612199184424, "grad_norm": 0.597547173500061, "learning_rate": 0.0001, "loss": 1.398, "step": 11867 }, { "epoch": 1.3632760898282694, "grad_norm": 0.6174020767211914, "learning_rate": 0.0001, "loss": 1.561, "step": 11868 }, { "epoch": 1.3633909597380967, "grad_norm": 0.5814570784568787, "learning_rate": 0.0001, "loss": 1.2317, "step": 11869 }, { "epoch": 1.3635058296479237, "grad_norm": 0.5660645365715027, "learning_rate": 0.0001, "loss": 1.3388, "step": 11870 }, { "epoch": 1.363620699557751, "grad_norm": 0.5966171026229858, "learning_rate": 0.0001, "loss": 1.4789, "step": 11871 }, { "epoch": 1.363735569467578, "grad_norm": 0.6484749913215637, "learning_rate": 0.0001, "loss": 1.5419, "step": 11872 }, { "epoch": 1.3638504393774051, "grad_norm": 0.6023862361907959, "learning_rate": 0.0001, "loss": 1.4739, "step": 11873 }, { "epoch": 1.3639653092872321, "grad_norm": 0.6137213706970215, "learning_rate": 0.0001, "loss": 1.5059, "step": 11874 }, { "epoch": 1.3640801791970594, "grad_norm": 0.5899903774261475, "learning_rate": 0.0001, "loss": 1.6593, "step": 11875 }, { "epoch": 1.3641950491068864, "grad_norm": 0.6372878551483154, "learning_rate": 0.0001, "loss": 1.6322, "step": 11876 }, { "epoch": 1.3643099190167136, "grad_norm": 0.6417999863624573, "learning_rate": 0.0001, "loss": 1.5396, "step": 11877 }, { "epoch": 1.3644247889265406, "grad_norm": 0.6401478052139282, "learning_rate": 0.0001, "loss": 1.5189, "step": 11878 }, { "epoch": 1.3645396588363679, "grad_norm": 0.5522693395614624, "learning_rate": 0.0001, "loss": 1.4322, "step": 11879 }, { "epoch": 1.3646545287461949, "grad_norm": 0.5718218088150024, "learning_rate": 0.0001, "loss": 1.445, "step": 11880 }, { "epoch": 1.364769398656022, "grad_norm": 0.5563505291938782, "learning_rate": 0.0001, "loss": 1.5453, "step": 11881 }, { "epoch": 1.364884268565849, "grad_norm": 0.5796937346458435, "learning_rate": 0.0001, "loss": 1.6481, "step": 11882 }, { "epoch": 1.3649991384756763, "grad_norm": 0.5500465035438538, "learning_rate": 0.0001, "loss": 1.3196, "step": 11883 }, { "epoch": 1.3651140083855033, "grad_norm": 0.5704468488693237, "learning_rate": 0.0001, "loss": 1.4476, "step": 11884 }, { "epoch": 1.3652288782953306, "grad_norm": 0.6012165546417236, "learning_rate": 0.0001, "loss": 1.3108, "step": 11885 }, { "epoch": 1.3653437482051576, "grad_norm": 0.5745958089828491, "learning_rate": 0.0001, "loss": 1.5241, "step": 11886 }, { "epoch": 1.3654586181149848, "grad_norm": 0.5989466905593872, "learning_rate": 0.0001, "loss": 1.4294, "step": 11887 }, { "epoch": 1.3655734880248118, "grad_norm": 0.5610129237174988, "learning_rate": 0.0001, "loss": 1.4047, "step": 11888 }, { "epoch": 1.365688357934639, "grad_norm": 0.5722390413284302, "learning_rate": 0.0001, "loss": 1.5388, "step": 11889 }, { "epoch": 1.365803227844466, "grad_norm": 0.5391660332679749, "learning_rate": 0.0001, "loss": 1.4419, "step": 11890 }, { "epoch": 1.3659180977542933, "grad_norm": 0.6333369016647339, "learning_rate": 0.0001, "loss": 1.5286, "step": 11891 }, { "epoch": 1.3660329676641203, "grad_norm": 0.5749605298042297, "learning_rate": 0.0001, "loss": 1.5987, "step": 11892 }, { "epoch": 1.3661478375739475, "grad_norm": 0.5995118021965027, "learning_rate": 0.0001, "loss": 1.5819, "step": 11893 }, { "epoch": 1.3662627074837745, "grad_norm": 0.6667972207069397, "learning_rate": 0.0001, "loss": 1.7452, "step": 11894 }, { "epoch": 1.3663775773936018, "grad_norm": 0.5708993077278137, "learning_rate": 0.0001, "loss": 1.5707, "step": 11895 }, { "epoch": 1.366492447303429, "grad_norm": 0.5501135587692261, "learning_rate": 0.0001, "loss": 1.4129, "step": 11896 }, { "epoch": 1.366607317213256, "grad_norm": 0.5960387587547302, "learning_rate": 0.0001, "loss": 1.4311, "step": 11897 }, { "epoch": 1.366722187123083, "grad_norm": 0.6244403123855591, "learning_rate": 0.0001, "loss": 1.5404, "step": 11898 }, { "epoch": 1.3668370570329103, "grad_norm": 0.5725081562995911, "learning_rate": 0.0001, "loss": 1.4793, "step": 11899 }, { "epoch": 1.3669519269427375, "grad_norm": 0.5716732144355774, "learning_rate": 0.0001, "loss": 1.2992, "step": 11900 }, { "epoch": 1.3670667968525645, "grad_norm": 0.6741322875022888, "learning_rate": 0.0001, "loss": 1.6103, "step": 11901 }, { "epoch": 1.3671816667623915, "grad_norm": 0.5683753490447998, "learning_rate": 0.0001, "loss": 1.4798, "step": 11902 }, { "epoch": 1.3672965366722187, "grad_norm": 0.5858237743377686, "learning_rate": 0.0001, "loss": 1.493, "step": 11903 }, { "epoch": 1.367411406582046, "grad_norm": 0.6018313765525818, "learning_rate": 0.0001, "loss": 1.3942, "step": 11904 }, { "epoch": 1.367526276491873, "grad_norm": 0.5790035724639893, "learning_rate": 0.0001, "loss": 1.3989, "step": 11905 }, { "epoch": 1.3676411464017, "grad_norm": 0.5737671256065369, "learning_rate": 0.0001, "loss": 1.3992, "step": 11906 }, { "epoch": 1.3677560163115272, "grad_norm": 0.5786615610122681, "learning_rate": 0.0001, "loss": 1.4582, "step": 11907 }, { "epoch": 1.3678708862213544, "grad_norm": 0.6264104247093201, "learning_rate": 0.0001, "loss": 1.429, "step": 11908 }, { "epoch": 1.3679857561311815, "grad_norm": 0.5502702593803406, "learning_rate": 0.0001, "loss": 1.4506, "step": 11909 }, { "epoch": 1.3681006260410085, "grad_norm": 0.5532498359680176, "learning_rate": 0.0001, "loss": 1.3379, "step": 11910 }, { "epoch": 1.3682154959508357, "grad_norm": 0.5454950332641602, "learning_rate": 0.0001, "loss": 1.3526, "step": 11911 }, { "epoch": 1.368330365860663, "grad_norm": 0.6295995712280273, "learning_rate": 0.0001, "loss": 1.4195, "step": 11912 }, { "epoch": 1.36844523577049, "grad_norm": 0.6341611742973328, "learning_rate": 0.0001, "loss": 1.5017, "step": 11913 }, { "epoch": 1.368560105680317, "grad_norm": 0.620488703250885, "learning_rate": 0.0001, "loss": 1.5425, "step": 11914 }, { "epoch": 1.3686749755901442, "grad_norm": 0.5347122550010681, "learning_rate": 0.0001, "loss": 1.46, "step": 11915 }, { "epoch": 1.3687898454999714, "grad_norm": 0.5783818960189819, "learning_rate": 0.0001, "loss": 1.2346, "step": 11916 }, { "epoch": 1.3689047154097984, "grad_norm": 0.5772425532341003, "learning_rate": 0.0001, "loss": 1.4085, "step": 11917 }, { "epoch": 1.3690195853196254, "grad_norm": 0.5653284788131714, "learning_rate": 0.0001, "loss": 1.5086, "step": 11918 }, { "epoch": 1.3691344552294527, "grad_norm": 0.6505599021911621, "learning_rate": 0.0001, "loss": 1.4088, "step": 11919 }, { "epoch": 1.3692493251392799, "grad_norm": 0.6298074126243591, "learning_rate": 0.0001, "loss": 1.591, "step": 11920 }, { "epoch": 1.369364195049107, "grad_norm": 0.5957285761833191, "learning_rate": 0.0001, "loss": 1.4399, "step": 11921 }, { "epoch": 1.369479064958934, "grad_norm": 0.5449360609054565, "learning_rate": 0.0001, "loss": 1.2528, "step": 11922 }, { "epoch": 1.3695939348687611, "grad_norm": 0.6365380883216858, "learning_rate": 0.0001, "loss": 1.1535, "step": 11923 }, { "epoch": 1.3697088047785884, "grad_norm": 0.668751060962677, "learning_rate": 0.0001, "loss": 1.4752, "step": 11924 }, { "epoch": 1.3698236746884154, "grad_norm": 0.631753146648407, "learning_rate": 0.0001, "loss": 1.1487, "step": 11925 }, { "epoch": 1.3699385445982424, "grad_norm": 0.5405638813972473, "learning_rate": 0.0001, "loss": 1.1319, "step": 11926 }, { "epoch": 1.3700534145080696, "grad_norm": 0.6844695806503296, "learning_rate": 0.0001, "loss": 1.4908, "step": 11927 }, { "epoch": 1.3701682844178968, "grad_norm": 0.6165083050727844, "learning_rate": 0.0001, "loss": 1.4513, "step": 11928 }, { "epoch": 1.3702831543277239, "grad_norm": 0.5696147084236145, "learning_rate": 0.0001, "loss": 1.4323, "step": 11929 }, { "epoch": 1.3703980242375509, "grad_norm": 0.6744541525840759, "learning_rate": 0.0001, "loss": 1.6197, "step": 11930 }, { "epoch": 1.370512894147378, "grad_norm": 0.5944900512695312, "learning_rate": 0.0001, "loss": 1.3649, "step": 11931 }, { "epoch": 1.3706277640572053, "grad_norm": 0.60816490650177, "learning_rate": 0.0001, "loss": 1.5909, "step": 11932 }, { "epoch": 1.3707426339670323, "grad_norm": 0.5886834859848022, "learning_rate": 0.0001, "loss": 1.542, "step": 11933 }, { "epoch": 1.3708575038768593, "grad_norm": 0.5649728178977966, "learning_rate": 0.0001, "loss": 1.4151, "step": 11934 }, { "epoch": 1.3709723737866866, "grad_norm": 0.5838742852210999, "learning_rate": 0.0001, "loss": 1.4892, "step": 11935 }, { "epoch": 1.3710872436965138, "grad_norm": 0.556206464767456, "learning_rate": 0.0001, "loss": 1.4125, "step": 11936 }, { "epoch": 1.3712021136063408, "grad_norm": 0.5563154816627502, "learning_rate": 0.0001, "loss": 1.5298, "step": 11937 }, { "epoch": 1.3713169835161678, "grad_norm": 0.5660498738288879, "learning_rate": 0.0001, "loss": 1.4787, "step": 11938 }, { "epoch": 1.371431853425995, "grad_norm": 0.604303777217865, "learning_rate": 0.0001, "loss": 1.5178, "step": 11939 }, { "epoch": 1.3715467233358223, "grad_norm": 0.624233067035675, "learning_rate": 0.0001, "loss": 1.4388, "step": 11940 }, { "epoch": 1.3716615932456493, "grad_norm": 0.5636304020881653, "learning_rate": 0.0001, "loss": 1.2747, "step": 11941 }, { "epoch": 1.3717764631554763, "grad_norm": 0.543554425239563, "learning_rate": 0.0001, "loss": 1.4925, "step": 11942 }, { "epoch": 1.3718913330653035, "grad_norm": 0.5257536768913269, "learning_rate": 0.0001, "loss": 1.1757, "step": 11943 }, { "epoch": 1.3720062029751308, "grad_norm": 0.6228072047233582, "learning_rate": 0.0001, "loss": 1.3553, "step": 11944 }, { "epoch": 1.3721210728849578, "grad_norm": 0.7739028334617615, "learning_rate": 0.0001, "loss": 1.355, "step": 11945 }, { "epoch": 1.3722359427947848, "grad_norm": 0.6044634580612183, "learning_rate": 0.0001, "loss": 1.5408, "step": 11946 }, { "epoch": 1.372350812704612, "grad_norm": 0.6621956825256348, "learning_rate": 0.0001, "loss": 1.6615, "step": 11947 }, { "epoch": 1.3724656826144392, "grad_norm": 0.5978637933731079, "learning_rate": 0.0001, "loss": 1.504, "step": 11948 }, { "epoch": 1.3725805525242663, "grad_norm": 0.5536312460899353, "learning_rate": 0.0001, "loss": 1.4278, "step": 11949 }, { "epoch": 1.3726954224340933, "grad_norm": 0.5836893320083618, "learning_rate": 0.0001, "loss": 1.5027, "step": 11950 }, { "epoch": 1.3728102923439205, "grad_norm": 0.5607746243476868, "learning_rate": 0.0001, "loss": 1.5454, "step": 11951 }, { "epoch": 1.3729251622537477, "grad_norm": 0.7326905727386475, "learning_rate": 0.0001, "loss": 1.7962, "step": 11952 }, { "epoch": 1.3730400321635747, "grad_norm": 0.5599920749664307, "learning_rate": 0.0001, "loss": 1.4866, "step": 11953 }, { "epoch": 1.3731549020734017, "grad_norm": 0.5913311839103699, "learning_rate": 0.0001, "loss": 1.5448, "step": 11954 }, { "epoch": 1.373269771983229, "grad_norm": 0.596919059753418, "learning_rate": 0.0001, "loss": 1.3861, "step": 11955 }, { "epoch": 1.3733846418930562, "grad_norm": 0.6204702854156494, "learning_rate": 0.0001, "loss": 1.572, "step": 11956 }, { "epoch": 1.3734995118028832, "grad_norm": 0.6467247605323792, "learning_rate": 0.0001, "loss": 1.5979, "step": 11957 }, { "epoch": 1.3736143817127102, "grad_norm": 0.6159607768058777, "learning_rate": 0.0001, "loss": 1.5928, "step": 11958 }, { "epoch": 1.3737292516225375, "grad_norm": 0.5385196208953857, "learning_rate": 0.0001, "loss": 1.3476, "step": 11959 }, { "epoch": 1.3738441215323647, "grad_norm": 0.5562816262245178, "learning_rate": 0.0001, "loss": 1.354, "step": 11960 }, { "epoch": 1.3739589914421917, "grad_norm": 0.5553545355796814, "learning_rate": 0.0001, "loss": 1.5637, "step": 11961 }, { "epoch": 1.374073861352019, "grad_norm": 0.5768440365791321, "learning_rate": 0.0001, "loss": 1.5775, "step": 11962 }, { "epoch": 1.374188731261846, "grad_norm": 0.6480252146720886, "learning_rate": 0.0001, "loss": 1.5765, "step": 11963 }, { "epoch": 1.3743036011716732, "grad_norm": 0.5714027881622314, "learning_rate": 0.0001, "loss": 1.651, "step": 11964 }, { "epoch": 1.3744184710815002, "grad_norm": 0.5680522918701172, "learning_rate": 0.0001, "loss": 1.5401, "step": 11965 }, { "epoch": 1.3745333409913274, "grad_norm": 0.6007116436958313, "learning_rate": 0.0001, "loss": 1.4423, "step": 11966 }, { "epoch": 1.3746482109011544, "grad_norm": 0.5150221586227417, "learning_rate": 0.0001, "loss": 1.3199, "step": 11967 }, { "epoch": 1.3747630808109816, "grad_norm": 0.5811969041824341, "learning_rate": 0.0001, "loss": 1.3401, "step": 11968 }, { "epoch": 1.3748779507208087, "grad_norm": 0.5529438853263855, "learning_rate": 0.0001, "loss": 1.445, "step": 11969 }, { "epoch": 1.3749928206306359, "grad_norm": 0.5678512454032898, "learning_rate": 0.0001, "loss": 1.3625, "step": 11970 }, { "epoch": 1.375107690540463, "grad_norm": 0.5907145738601685, "learning_rate": 0.0001, "loss": 1.5975, "step": 11971 }, { "epoch": 1.3752225604502901, "grad_norm": 0.5750979781150818, "learning_rate": 0.0001, "loss": 1.3721, "step": 11972 }, { "epoch": 1.3753374303601171, "grad_norm": 0.5326782464981079, "learning_rate": 0.0001, "loss": 1.2268, "step": 11973 }, { "epoch": 1.3754523002699444, "grad_norm": 0.6372673511505127, "learning_rate": 0.0001, "loss": 1.6161, "step": 11974 }, { "epoch": 1.3755671701797714, "grad_norm": 0.702731192111969, "learning_rate": 0.0001, "loss": 1.5823, "step": 11975 }, { "epoch": 1.3756820400895986, "grad_norm": 0.5595875978469849, "learning_rate": 0.0001, "loss": 1.3401, "step": 11976 }, { "epoch": 1.3757969099994256, "grad_norm": 0.5655556917190552, "learning_rate": 0.0001, "loss": 1.4331, "step": 11977 }, { "epoch": 1.3759117799092528, "grad_norm": 0.5677425265312195, "learning_rate": 0.0001, "loss": 1.435, "step": 11978 }, { "epoch": 1.3760266498190799, "grad_norm": 0.5768133401870728, "learning_rate": 0.0001, "loss": 1.4443, "step": 11979 }, { "epoch": 1.376141519728907, "grad_norm": 0.6946627497673035, "learning_rate": 0.0001, "loss": 1.437, "step": 11980 }, { "epoch": 1.376256389638734, "grad_norm": 0.5521852374076843, "learning_rate": 0.0001, "loss": 1.3945, "step": 11981 }, { "epoch": 1.3763712595485613, "grad_norm": 0.5932571291923523, "learning_rate": 0.0001, "loss": 1.3905, "step": 11982 }, { "epoch": 1.3764861294583883, "grad_norm": 0.5667235851287842, "learning_rate": 0.0001, "loss": 1.4262, "step": 11983 }, { "epoch": 1.3766009993682156, "grad_norm": 0.6410410404205322, "learning_rate": 0.0001, "loss": 1.5501, "step": 11984 }, { "epoch": 1.3767158692780426, "grad_norm": 0.6235799193382263, "learning_rate": 0.0001, "loss": 1.5208, "step": 11985 }, { "epoch": 1.3768307391878698, "grad_norm": 0.5973227024078369, "learning_rate": 0.0001, "loss": 1.4338, "step": 11986 }, { "epoch": 1.3769456090976968, "grad_norm": 0.5862446427345276, "learning_rate": 0.0001, "loss": 1.5087, "step": 11987 }, { "epoch": 1.377060479007524, "grad_norm": 0.6002928018569946, "learning_rate": 0.0001, "loss": 1.4927, "step": 11988 }, { "epoch": 1.377175348917351, "grad_norm": 0.6475585699081421, "learning_rate": 0.0001, "loss": 1.6346, "step": 11989 }, { "epoch": 1.3772902188271783, "grad_norm": 0.6113678216934204, "learning_rate": 0.0001, "loss": 1.5051, "step": 11990 }, { "epoch": 1.3774050887370053, "grad_norm": 0.5376540422439575, "learning_rate": 0.0001, "loss": 1.2808, "step": 11991 }, { "epoch": 1.3775199586468325, "grad_norm": 0.6065835356712341, "learning_rate": 0.0001, "loss": 1.6838, "step": 11992 }, { "epoch": 1.3776348285566595, "grad_norm": 0.5882073640823364, "learning_rate": 0.0001, "loss": 1.3892, "step": 11993 }, { "epoch": 1.3777496984664868, "grad_norm": 0.5682032704353333, "learning_rate": 0.0001, "loss": 1.3817, "step": 11994 }, { "epoch": 1.3778645683763138, "grad_norm": 0.6414339542388916, "learning_rate": 0.0001, "loss": 1.6193, "step": 11995 }, { "epoch": 1.377979438286141, "grad_norm": 0.6156001091003418, "learning_rate": 0.0001, "loss": 1.5661, "step": 11996 }, { "epoch": 1.378094308195968, "grad_norm": 0.5534403324127197, "learning_rate": 0.0001, "loss": 1.2606, "step": 11997 }, { "epoch": 1.3782091781057952, "grad_norm": 0.5855156779289246, "learning_rate": 0.0001, "loss": 1.4816, "step": 11998 }, { "epoch": 1.3783240480156222, "grad_norm": 0.5981359481811523, "learning_rate": 0.0001, "loss": 1.672, "step": 11999 }, { "epoch": 1.3784389179254495, "grad_norm": 0.6743270754814148, "learning_rate": 0.0001, "loss": 1.558, "step": 12000 }, { "epoch": 1.3785537878352765, "grad_norm": 0.6285853981971741, "learning_rate": 0.0001, "loss": 1.7823, "step": 12001 }, { "epoch": 1.3786686577451037, "grad_norm": 0.6270994544029236, "learning_rate": 0.0001, "loss": 1.3792, "step": 12002 }, { "epoch": 1.3787835276549307, "grad_norm": 0.5615768432617188, "learning_rate": 0.0001, "loss": 1.5357, "step": 12003 }, { "epoch": 1.378898397564758, "grad_norm": 0.5567690134048462, "learning_rate": 0.0001, "loss": 1.5045, "step": 12004 }, { "epoch": 1.379013267474585, "grad_norm": 0.6072371602058411, "learning_rate": 0.0001, "loss": 1.4631, "step": 12005 }, { "epoch": 1.3791281373844122, "grad_norm": 0.6124165058135986, "learning_rate": 0.0001, "loss": 1.396, "step": 12006 }, { "epoch": 1.3792430072942392, "grad_norm": 0.6033795475959778, "learning_rate": 0.0001, "loss": 1.5735, "step": 12007 }, { "epoch": 1.3793578772040664, "grad_norm": 0.5871466994285583, "learning_rate": 0.0001, "loss": 1.5119, "step": 12008 }, { "epoch": 1.3794727471138934, "grad_norm": 0.6192461252212524, "learning_rate": 0.0001, "loss": 1.6427, "step": 12009 }, { "epoch": 1.3795876170237207, "grad_norm": 0.5672609806060791, "learning_rate": 0.0001, "loss": 1.5876, "step": 12010 }, { "epoch": 1.3797024869335477, "grad_norm": 0.5758570432662964, "learning_rate": 0.0001, "loss": 1.6809, "step": 12011 }, { "epoch": 1.379817356843375, "grad_norm": 0.5541750192642212, "learning_rate": 0.0001, "loss": 1.4669, "step": 12012 }, { "epoch": 1.379932226753202, "grad_norm": 0.652824878692627, "learning_rate": 0.0001, "loss": 1.2463, "step": 12013 }, { "epoch": 1.3800470966630292, "grad_norm": 0.6101529598236084, "learning_rate": 0.0001, "loss": 1.6506, "step": 12014 }, { "epoch": 1.3801619665728562, "grad_norm": 0.5852307677268982, "learning_rate": 0.0001, "loss": 1.6664, "step": 12015 }, { "epoch": 1.3802768364826834, "grad_norm": 0.5421605706214905, "learning_rate": 0.0001, "loss": 1.3547, "step": 12016 }, { "epoch": 1.3803917063925104, "grad_norm": 0.5122795701026917, "learning_rate": 0.0001, "loss": 1.3986, "step": 12017 }, { "epoch": 1.3805065763023376, "grad_norm": 0.5709700584411621, "learning_rate": 0.0001, "loss": 1.4771, "step": 12018 }, { "epoch": 1.3806214462121646, "grad_norm": 0.568841814994812, "learning_rate": 0.0001, "loss": 1.2557, "step": 12019 }, { "epoch": 1.3807363161219919, "grad_norm": 0.5416236519813538, "learning_rate": 0.0001, "loss": 1.5226, "step": 12020 }, { "epoch": 1.380851186031819, "grad_norm": 0.5720876455307007, "learning_rate": 0.0001, "loss": 1.4735, "step": 12021 }, { "epoch": 1.3809660559416461, "grad_norm": 0.5533315539360046, "learning_rate": 0.0001, "loss": 1.2757, "step": 12022 }, { "epoch": 1.3810809258514731, "grad_norm": 0.6400322914123535, "learning_rate": 0.0001, "loss": 1.3811, "step": 12023 }, { "epoch": 1.3811957957613004, "grad_norm": 0.6613686680793762, "learning_rate": 0.0001, "loss": 1.6539, "step": 12024 }, { "epoch": 1.3813106656711274, "grad_norm": 0.6082622408866882, "learning_rate": 0.0001, "loss": 1.6322, "step": 12025 }, { "epoch": 1.3814255355809546, "grad_norm": 0.6383165121078491, "learning_rate": 0.0001, "loss": 1.579, "step": 12026 }, { "epoch": 1.3815404054907816, "grad_norm": 0.5541550517082214, "learning_rate": 0.0001, "loss": 1.4047, "step": 12027 }, { "epoch": 1.3816552754006088, "grad_norm": 0.5779566168785095, "learning_rate": 0.0001, "loss": 1.3272, "step": 12028 }, { "epoch": 1.3817701453104358, "grad_norm": 0.599031925201416, "learning_rate": 0.0001, "loss": 1.5207, "step": 12029 }, { "epoch": 1.381885015220263, "grad_norm": 0.5647594928741455, "learning_rate": 0.0001, "loss": 1.4146, "step": 12030 }, { "epoch": 1.38199988513009, "grad_norm": 0.6167514324188232, "learning_rate": 0.0001, "loss": 1.5239, "step": 12031 }, { "epoch": 1.3821147550399173, "grad_norm": 0.5759591460227966, "learning_rate": 0.0001, "loss": 1.3562, "step": 12032 }, { "epoch": 1.3822296249497446, "grad_norm": 0.6348143815994263, "learning_rate": 0.0001, "loss": 1.5969, "step": 12033 }, { "epoch": 1.3823444948595716, "grad_norm": 0.6396414041519165, "learning_rate": 0.0001, "loss": 1.595, "step": 12034 }, { "epoch": 1.3824593647693986, "grad_norm": 0.5942586064338684, "learning_rate": 0.0001, "loss": 1.4623, "step": 12035 }, { "epoch": 1.3825742346792258, "grad_norm": 0.625024676322937, "learning_rate": 0.0001, "loss": 1.5357, "step": 12036 }, { "epoch": 1.382689104589053, "grad_norm": 0.6331309676170349, "learning_rate": 0.0001, "loss": 1.4587, "step": 12037 }, { "epoch": 1.38280397449888, "grad_norm": 0.6286697387695312, "learning_rate": 0.0001, "loss": 1.5249, "step": 12038 }, { "epoch": 1.382918844408707, "grad_norm": 0.6172159910202026, "learning_rate": 0.0001, "loss": 1.3461, "step": 12039 }, { "epoch": 1.3830337143185343, "grad_norm": 0.5521469712257385, "learning_rate": 0.0001, "loss": 1.5414, "step": 12040 }, { "epoch": 1.3831485842283615, "grad_norm": 0.5680788159370422, "learning_rate": 0.0001, "loss": 1.392, "step": 12041 }, { "epoch": 1.3832634541381885, "grad_norm": 0.5680851936340332, "learning_rate": 0.0001, "loss": 1.4833, "step": 12042 }, { "epoch": 1.3833783240480155, "grad_norm": 0.6148812174797058, "learning_rate": 0.0001, "loss": 1.4922, "step": 12043 }, { "epoch": 1.3834931939578428, "grad_norm": 0.5677898526191711, "learning_rate": 0.0001, "loss": 1.3504, "step": 12044 }, { "epoch": 1.38360806386767, "grad_norm": 0.5935441255569458, "learning_rate": 0.0001, "loss": 1.3455, "step": 12045 }, { "epoch": 1.383722933777497, "grad_norm": 0.5716426968574524, "learning_rate": 0.0001, "loss": 1.4883, "step": 12046 }, { "epoch": 1.383837803687324, "grad_norm": 0.5644375085830688, "learning_rate": 0.0001, "loss": 1.4816, "step": 12047 }, { "epoch": 1.3839526735971512, "grad_norm": 0.5767795443534851, "learning_rate": 0.0001, "loss": 1.5569, "step": 12048 }, { "epoch": 1.3840675435069785, "grad_norm": 0.5734127759933472, "learning_rate": 0.0001, "loss": 1.3649, "step": 12049 }, { "epoch": 1.3841824134168055, "grad_norm": 0.628250777721405, "learning_rate": 0.0001, "loss": 1.5152, "step": 12050 }, { "epoch": 1.3842972833266325, "grad_norm": 0.622211217880249, "learning_rate": 0.0001, "loss": 1.4912, "step": 12051 }, { "epoch": 1.3844121532364597, "grad_norm": 0.5941185355186462, "learning_rate": 0.0001, "loss": 1.4555, "step": 12052 }, { "epoch": 1.384527023146287, "grad_norm": 0.6594216227531433, "learning_rate": 0.0001, "loss": 1.4542, "step": 12053 }, { "epoch": 1.384641893056114, "grad_norm": 0.5944557785987854, "learning_rate": 0.0001, "loss": 1.4187, "step": 12054 }, { "epoch": 1.384756762965941, "grad_norm": 0.612331748008728, "learning_rate": 0.0001, "loss": 1.4077, "step": 12055 }, { "epoch": 1.3848716328757682, "grad_norm": 0.5708249807357788, "learning_rate": 0.0001, "loss": 1.3858, "step": 12056 }, { "epoch": 1.3849865027855954, "grad_norm": 0.7231257557868958, "learning_rate": 0.0001, "loss": 1.5112, "step": 12057 }, { "epoch": 1.3851013726954224, "grad_norm": 0.5759585499763489, "learning_rate": 0.0001, "loss": 1.4769, "step": 12058 }, { "epoch": 1.3852162426052494, "grad_norm": 0.5974624752998352, "learning_rate": 0.0001, "loss": 1.5837, "step": 12059 }, { "epoch": 1.3853311125150767, "grad_norm": 0.5672141313552856, "learning_rate": 0.0001, "loss": 1.3981, "step": 12060 }, { "epoch": 1.385445982424904, "grad_norm": 0.5883855819702148, "learning_rate": 0.0001, "loss": 1.2928, "step": 12061 }, { "epoch": 1.385560852334731, "grad_norm": 0.6936026215553284, "learning_rate": 0.0001, "loss": 1.6683, "step": 12062 }, { "epoch": 1.385675722244558, "grad_norm": 0.5789870619773865, "learning_rate": 0.0001, "loss": 1.6135, "step": 12063 }, { "epoch": 1.3857905921543852, "grad_norm": 0.6692520380020142, "learning_rate": 0.0001, "loss": 1.6279, "step": 12064 }, { "epoch": 1.3859054620642124, "grad_norm": 0.6601521372795105, "learning_rate": 0.0001, "loss": 1.6242, "step": 12065 }, { "epoch": 1.3860203319740394, "grad_norm": 0.6020188927650452, "learning_rate": 0.0001, "loss": 1.5118, "step": 12066 }, { "epoch": 1.3861352018838664, "grad_norm": 0.5850477814674377, "learning_rate": 0.0001, "loss": 1.5276, "step": 12067 }, { "epoch": 1.3862500717936936, "grad_norm": 0.6538852453231812, "learning_rate": 0.0001, "loss": 1.6425, "step": 12068 }, { "epoch": 1.3863649417035209, "grad_norm": 0.5990716218948364, "learning_rate": 0.0001, "loss": 1.4191, "step": 12069 }, { "epoch": 1.3864798116133479, "grad_norm": 0.6036950945854187, "learning_rate": 0.0001, "loss": 1.5392, "step": 12070 }, { "epoch": 1.3865946815231749, "grad_norm": 0.5441368818283081, "learning_rate": 0.0001, "loss": 1.4551, "step": 12071 }, { "epoch": 1.3867095514330021, "grad_norm": 0.6289240121841431, "learning_rate": 0.0001, "loss": 1.274, "step": 12072 }, { "epoch": 1.3868244213428293, "grad_norm": 0.5667722225189209, "learning_rate": 0.0001, "loss": 1.3627, "step": 12073 }, { "epoch": 1.3869392912526564, "grad_norm": 0.5763718485832214, "learning_rate": 0.0001, "loss": 1.6001, "step": 12074 }, { "epoch": 1.3870541611624834, "grad_norm": 0.5512832999229431, "learning_rate": 0.0001, "loss": 1.4434, "step": 12075 }, { "epoch": 1.3871690310723106, "grad_norm": 0.5643221735954285, "learning_rate": 0.0001, "loss": 1.5439, "step": 12076 }, { "epoch": 1.3872839009821378, "grad_norm": 0.5980443954467773, "learning_rate": 0.0001, "loss": 1.3511, "step": 12077 }, { "epoch": 1.3873987708919648, "grad_norm": 0.6038445234298706, "learning_rate": 0.0001, "loss": 1.6927, "step": 12078 }, { "epoch": 1.3875136408017918, "grad_norm": 0.6184426546096802, "learning_rate": 0.0001, "loss": 1.5709, "step": 12079 }, { "epoch": 1.387628510711619, "grad_norm": 0.6515767574310303, "learning_rate": 0.0001, "loss": 1.684, "step": 12080 }, { "epoch": 1.3877433806214463, "grad_norm": 0.6065769195556641, "learning_rate": 0.0001, "loss": 1.5179, "step": 12081 }, { "epoch": 1.3878582505312733, "grad_norm": 0.6202980875968933, "learning_rate": 0.0001, "loss": 1.3916, "step": 12082 }, { "epoch": 1.3879731204411003, "grad_norm": 0.6353384852409363, "learning_rate": 0.0001, "loss": 1.3461, "step": 12083 }, { "epoch": 1.3880879903509276, "grad_norm": 0.6570160984992981, "learning_rate": 0.0001, "loss": 1.3667, "step": 12084 }, { "epoch": 1.3882028602607548, "grad_norm": 0.5832175612449646, "learning_rate": 0.0001, "loss": 1.3499, "step": 12085 }, { "epoch": 1.3883177301705818, "grad_norm": 0.5629262924194336, "learning_rate": 0.0001, "loss": 1.5559, "step": 12086 }, { "epoch": 1.3884326000804088, "grad_norm": 0.586079478263855, "learning_rate": 0.0001, "loss": 1.432, "step": 12087 }, { "epoch": 1.388547469990236, "grad_norm": 0.5707273483276367, "learning_rate": 0.0001, "loss": 1.4262, "step": 12088 }, { "epoch": 1.3886623399000633, "grad_norm": 0.6245285868644714, "learning_rate": 0.0001, "loss": 1.5261, "step": 12089 }, { "epoch": 1.3887772098098903, "grad_norm": 0.6213446855545044, "learning_rate": 0.0001, "loss": 1.6202, "step": 12090 }, { "epoch": 1.3888920797197173, "grad_norm": 0.6027031540870667, "learning_rate": 0.0001, "loss": 1.5837, "step": 12091 }, { "epoch": 1.3890069496295445, "grad_norm": 0.5713696479797363, "learning_rate": 0.0001, "loss": 1.4307, "step": 12092 }, { "epoch": 1.3891218195393717, "grad_norm": 0.6017079949378967, "learning_rate": 0.0001, "loss": 1.3979, "step": 12093 }, { "epoch": 1.3892366894491988, "grad_norm": 0.5776708722114563, "learning_rate": 0.0001, "loss": 1.4307, "step": 12094 }, { "epoch": 1.3893515593590258, "grad_norm": 0.5756542086601257, "learning_rate": 0.0001, "loss": 1.431, "step": 12095 }, { "epoch": 1.389466429268853, "grad_norm": 0.6287577152252197, "learning_rate": 0.0001, "loss": 1.3661, "step": 12096 }, { "epoch": 1.3895812991786802, "grad_norm": 0.5801504850387573, "learning_rate": 0.0001, "loss": 1.4198, "step": 12097 }, { "epoch": 1.3896961690885072, "grad_norm": 0.6163144111633301, "learning_rate": 0.0001, "loss": 1.3697, "step": 12098 }, { "epoch": 1.3898110389983345, "grad_norm": 0.623882532119751, "learning_rate": 0.0001, "loss": 1.5625, "step": 12099 }, { "epoch": 1.3899259089081615, "grad_norm": 0.6868638396263123, "learning_rate": 0.0001, "loss": 1.7235, "step": 12100 }, { "epoch": 1.3900407788179887, "grad_norm": 0.5804413557052612, "learning_rate": 0.0001, "loss": 1.4517, "step": 12101 }, { "epoch": 1.3901556487278157, "grad_norm": 0.6150039434432983, "learning_rate": 0.0001, "loss": 1.4018, "step": 12102 }, { "epoch": 1.390270518637643, "grad_norm": 0.6017144322395325, "learning_rate": 0.0001, "loss": 1.326, "step": 12103 }, { "epoch": 1.39038538854747, "grad_norm": 0.5922073721885681, "learning_rate": 0.0001, "loss": 1.4635, "step": 12104 }, { "epoch": 1.3905002584572972, "grad_norm": 0.6403117775917053, "learning_rate": 0.0001, "loss": 1.5479, "step": 12105 }, { "epoch": 1.3906151283671242, "grad_norm": 0.5838686227798462, "learning_rate": 0.0001, "loss": 1.5511, "step": 12106 }, { "epoch": 1.3907299982769514, "grad_norm": 0.575851559638977, "learning_rate": 0.0001, "loss": 1.4667, "step": 12107 }, { "epoch": 1.3908448681867784, "grad_norm": 0.5583367347717285, "learning_rate": 0.0001, "loss": 1.3506, "step": 12108 }, { "epoch": 1.3909597380966057, "grad_norm": 0.6150028109550476, "learning_rate": 0.0001, "loss": 1.5256, "step": 12109 }, { "epoch": 1.3910746080064327, "grad_norm": 0.5967298746109009, "learning_rate": 0.0001, "loss": 1.4282, "step": 12110 }, { "epoch": 1.39118947791626, "grad_norm": 0.6403521299362183, "learning_rate": 0.0001, "loss": 1.6455, "step": 12111 }, { "epoch": 1.391304347826087, "grad_norm": 0.579937219619751, "learning_rate": 0.0001, "loss": 1.4695, "step": 12112 }, { "epoch": 1.3914192177359141, "grad_norm": 0.5739872455596924, "learning_rate": 0.0001, "loss": 1.6012, "step": 12113 }, { "epoch": 1.3915340876457412, "grad_norm": 0.5874963402748108, "learning_rate": 0.0001, "loss": 1.5576, "step": 12114 }, { "epoch": 1.3916489575555684, "grad_norm": 0.5656614899635315, "learning_rate": 0.0001, "loss": 1.4357, "step": 12115 }, { "epoch": 1.3917638274653954, "grad_norm": 0.6302921175956726, "learning_rate": 0.0001, "loss": 1.5072, "step": 12116 }, { "epoch": 1.3918786973752226, "grad_norm": 0.6151950359344482, "learning_rate": 0.0001, "loss": 1.4652, "step": 12117 }, { "epoch": 1.3919935672850496, "grad_norm": 0.6326630711555481, "learning_rate": 0.0001, "loss": 1.4749, "step": 12118 }, { "epoch": 1.3921084371948769, "grad_norm": 0.6131033897399902, "learning_rate": 0.0001, "loss": 1.382, "step": 12119 }, { "epoch": 1.3922233071047039, "grad_norm": 0.6083925366401672, "learning_rate": 0.0001, "loss": 1.47, "step": 12120 }, { "epoch": 1.392338177014531, "grad_norm": 0.5470343232154846, "learning_rate": 0.0001, "loss": 1.3606, "step": 12121 }, { "epoch": 1.3924530469243581, "grad_norm": 0.6349021196365356, "learning_rate": 0.0001, "loss": 1.6476, "step": 12122 }, { "epoch": 1.3925679168341853, "grad_norm": 0.5835825204849243, "learning_rate": 0.0001, "loss": 1.3953, "step": 12123 }, { "epoch": 1.3926827867440124, "grad_norm": 0.6109292507171631, "learning_rate": 0.0001, "loss": 1.5743, "step": 12124 }, { "epoch": 1.3927976566538396, "grad_norm": 0.5654585957527161, "learning_rate": 0.0001, "loss": 1.4229, "step": 12125 }, { "epoch": 1.3929125265636666, "grad_norm": 0.5883094072341919, "learning_rate": 0.0001, "loss": 1.5037, "step": 12126 }, { "epoch": 1.3930273964734938, "grad_norm": 0.6077958345413208, "learning_rate": 0.0001, "loss": 1.4381, "step": 12127 }, { "epoch": 1.3931422663833208, "grad_norm": 0.6568667888641357, "learning_rate": 0.0001, "loss": 1.6935, "step": 12128 }, { "epoch": 1.393257136293148, "grad_norm": 0.5563843250274658, "learning_rate": 0.0001, "loss": 1.2562, "step": 12129 }, { "epoch": 1.393372006202975, "grad_norm": 0.5851126313209534, "learning_rate": 0.0001, "loss": 1.3322, "step": 12130 }, { "epoch": 1.3934868761128023, "grad_norm": 0.579210102558136, "learning_rate": 0.0001, "loss": 1.3495, "step": 12131 }, { "epoch": 1.3936017460226293, "grad_norm": 0.6247128844261169, "learning_rate": 0.0001, "loss": 1.455, "step": 12132 }, { "epoch": 1.3937166159324565, "grad_norm": 0.6491114497184753, "learning_rate": 0.0001, "loss": 1.5916, "step": 12133 }, { "epoch": 1.3938314858422836, "grad_norm": 0.5388944149017334, "learning_rate": 0.0001, "loss": 1.3398, "step": 12134 }, { "epoch": 1.3939463557521108, "grad_norm": 0.5601046681404114, "learning_rate": 0.0001, "loss": 1.3384, "step": 12135 }, { "epoch": 1.3940612256619378, "grad_norm": 0.5893126726150513, "learning_rate": 0.0001, "loss": 1.4805, "step": 12136 }, { "epoch": 1.394176095571765, "grad_norm": 0.6289069056510925, "learning_rate": 0.0001, "loss": 1.5355, "step": 12137 }, { "epoch": 1.394290965481592, "grad_norm": 0.6383712887763977, "learning_rate": 0.0001, "loss": 1.568, "step": 12138 }, { "epoch": 1.3944058353914193, "grad_norm": 0.6105307340621948, "learning_rate": 0.0001, "loss": 1.5548, "step": 12139 }, { "epoch": 1.3945207053012463, "grad_norm": 0.5872035622596741, "learning_rate": 0.0001, "loss": 1.4613, "step": 12140 }, { "epoch": 1.3946355752110735, "grad_norm": 0.5394282937049866, "learning_rate": 0.0001, "loss": 1.4145, "step": 12141 }, { "epoch": 1.3947504451209005, "grad_norm": 0.6004125475883484, "learning_rate": 0.0001, "loss": 1.3922, "step": 12142 }, { "epoch": 1.3948653150307277, "grad_norm": 0.6273020505905151, "learning_rate": 0.0001, "loss": 1.5747, "step": 12143 }, { "epoch": 1.3949801849405548, "grad_norm": 0.5593479871749878, "learning_rate": 0.0001, "loss": 1.4331, "step": 12144 }, { "epoch": 1.395095054850382, "grad_norm": 0.5342775583267212, "learning_rate": 0.0001, "loss": 1.3331, "step": 12145 }, { "epoch": 1.395209924760209, "grad_norm": 0.5714641213417053, "learning_rate": 0.0001, "loss": 1.1656, "step": 12146 }, { "epoch": 1.3953247946700362, "grad_norm": 0.6640602350234985, "learning_rate": 0.0001, "loss": 1.4388, "step": 12147 }, { "epoch": 1.3954396645798632, "grad_norm": 0.5924851298332214, "learning_rate": 0.0001, "loss": 1.2908, "step": 12148 }, { "epoch": 1.3955545344896905, "grad_norm": 0.6022685170173645, "learning_rate": 0.0001, "loss": 1.4481, "step": 12149 }, { "epoch": 1.3956694043995175, "grad_norm": 0.6584579348564148, "learning_rate": 0.0001, "loss": 1.5328, "step": 12150 }, { "epoch": 1.3957842743093447, "grad_norm": 0.6461522579193115, "learning_rate": 0.0001, "loss": 1.4996, "step": 12151 }, { "epoch": 1.3958991442191717, "grad_norm": 0.6562781929969788, "learning_rate": 0.0001, "loss": 1.6059, "step": 12152 }, { "epoch": 1.396014014128999, "grad_norm": 0.593710720539093, "learning_rate": 0.0001, "loss": 1.5505, "step": 12153 }, { "epoch": 1.396128884038826, "grad_norm": 0.6154862642288208, "learning_rate": 0.0001, "loss": 1.5141, "step": 12154 }, { "epoch": 1.3962437539486532, "grad_norm": 0.5625523328781128, "learning_rate": 0.0001, "loss": 1.4415, "step": 12155 }, { "epoch": 1.3963586238584802, "grad_norm": 0.5562409162521362, "learning_rate": 0.0001, "loss": 1.3692, "step": 12156 }, { "epoch": 1.3964734937683074, "grad_norm": 0.6092026829719543, "learning_rate": 0.0001, "loss": 1.5876, "step": 12157 }, { "epoch": 1.3965883636781344, "grad_norm": 0.5918128490447998, "learning_rate": 0.0001, "loss": 1.4551, "step": 12158 }, { "epoch": 1.3967032335879617, "grad_norm": 0.6120827794075012, "learning_rate": 0.0001, "loss": 1.4987, "step": 12159 }, { "epoch": 1.3968181034977887, "grad_norm": 0.6138426065444946, "learning_rate": 0.0001, "loss": 1.4106, "step": 12160 }, { "epoch": 1.396932973407616, "grad_norm": 0.641486406326294, "learning_rate": 0.0001, "loss": 1.4812, "step": 12161 }, { "epoch": 1.397047843317443, "grad_norm": 0.6255931258201599, "learning_rate": 0.0001, "loss": 1.4398, "step": 12162 }, { "epoch": 1.3971627132272701, "grad_norm": 0.6870328187942505, "learning_rate": 0.0001, "loss": 1.661, "step": 12163 }, { "epoch": 1.3972775831370972, "grad_norm": 0.5824881196022034, "learning_rate": 0.0001, "loss": 1.4912, "step": 12164 }, { "epoch": 1.3973924530469244, "grad_norm": 0.557790994644165, "learning_rate": 0.0001, "loss": 1.4021, "step": 12165 }, { "epoch": 1.3975073229567514, "grad_norm": 0.6548678874969482, "learning_rate": 0.0001, "loss": 1.3511, "step": 12166 }, { "epoch": 1.3976221928665786, "grad_norm": 0.5523027777671814, "learning_rate": 0.0001, "loss": 1.1925, "step": 12167 }, { "epoch": 1.3977370627764056, "grad_norm": 0.5813341736793518, "learning_rate": 0.0001, "loss": 1.477, "step": 12168 }, { "epoch": 1.3978519326862329, "grad_norm": 0.5378603935241699, "learning_rate": 0.0001, "loss": 1.2486, "step": 12169 }, { "epoch": 1.39796680259606, "grad_norm": 0.5774914622306824, "learning_rate": 0.0001, "loss": 1.361, "step": 12170 }, { "epoch": 1.398081672505887, "grad_norm": 0.602354884147644, "learning_rate": 0.0001, "loss": 1.6387, "step": 12171 }, { "epoch": 1.3981965424157141, "grad_norm": 0.559851884841919, "learning_rate": 0.0001, "loss": 1.488, "step": 12172 }, { "epoch": 1.3983114123255413, "grad_norm": 0.6218519806861877, "learning_rate": 0.0001, "loss": 1.355, "step": 12173 }, { "epoch": 1.3984262822353686, "grad_norm": 0.6444374918937683, "learning_rate": 0.0001, "loss": 1.5331, "step": 12174 }, { "epoch": 1.3985411521451956, "grad_norm": 0.5984706282615662, "learning_rate": 0.0001, "loss": 1.4786, "step": 12175 }, { "epoch": 1.3986560220550226, "grad_norm": 0.6040021777153015, "learning_rate": 0.0001, "loss": 1.4337, "step": 12176 }, { "epoch": 1.3987708919648498, "grad_norm": 0.5464459657669067, "learning_rate": 0.0001, "loss": 1.4417, "step": 12177 }, { "epoch": 1.398885761874677, "grad_norm": 0.5808142423629761, "learning_rate": 0.0001, "loss": 1.5646, "step": 12178 }, { "epoch": 1.399000631784504, "grad_norm": 0.5869821906089783, "learning_rate": 0.0001, "loss": 1.593, "step": 12179 }, { "epoch": 1.399115501694331, "grad_norm": 0.5694946050643921, "learning_rate": 0.0001, "loss": 1.5514, "step": 12180 }, { "epoch": 1.3992303716041583, "grad_norm": 0.5913307666778564, "learning_rate": 0.0001, "loss": 1.6787, "step": 12181 }, { "epoch": 1.3993452415139855, "grad_norm": 0.5427361130714417, "learning_rate": 0.0001, "loss": 1.4013, "step": 12182 }, { "epoch": 1.3994601114238125, "grad_norm": 0.5571943521499634, "learning_rate": 0.0001, "loss": 1.4897, "step": 12183 }, { "epoch": 1.3995749813336396, "grad_norm": 0.5840848684310913, "learning_rate": 0.0001, "loss": 1.4723, "step": 12184 }, { "epoch": 1.3996898512434668, "grad_norm": 0.5792492628097534, "learning_rate": 0.0001, "loss": 1.5345, "step": 12185 }, { "epoch": 1.399804721153294, "grad_norm": 0.5461111068725586, "learning_rate": 0.0001, "loss": 1.4224, "step": 12186 }, { "epoch": 1.399919591063121, "grad_norm": 0.6092454195022583, "learning_rate": 0.0001, "loss": 1.5321, "step": 12187 }, { "epoch": 1.400034460972948, "grad_norm": 0.5773031711578369, "learning_rate": 0.0001, "loss": 1.4072, "step": 12188 }, { "epoch": 1.4001493308827753, "grad_norm": 0.6374520063400269, "learning_rate": 0.0001, "loss": 1.5028, "step": 12189 }, { "epoch": 1.4002642007926025, "grad_norm": 0.5584102272987366, "learning_rate": 0.0001, "loss": 1.2906, "step": 12190 }, { "epoch": 1.4003790707024295, "grad_norm": 0.6517335772514343, "learning_rate": 0.0001, "loss": 1.7582, "step": 12191 }, { "epoch": 1.4004939406122565, "grad_norm": 0.5977898240089417, "learning_rate": 0.0001, "loss": 1.517, "step": 12192 }, { "epoch": 1.4006088105220837, "grad_norm": 0.5880120992660522, "learning_rate": 0.0001, "loss": 1.6576, "step": 12193 }, { "epoch": 1.400723680431911, "grad_norm": 0.5685825943946838, "learning_rate": 0.0001, "loss": 1.4966, "step": 12194 }, { "epoch": 1.400838550341738, "grad_norm": 0.5807334780693054, "learning_rate": 0.0001, "loss": 1.3646, "step": 12195 }, { "epoch": 1.400953420251565, "grad_norm": 0.602315366268158, "learning_rate": 0.0001, "loss": 1.2503, "step": 12196 }, { "epoch": 1.4010682901613922, "grad_norm": 0.6140511631965637, "learning_rate": 0.0001, "loss": 1.4578, "step": 12197 }, { "epoch": 1.4011831600712195, "grad_norm": 0.6030164361000061, "learning_rate": 0.0001, "loss": 1.4811, "step": 12198 }, { "epoch": 1.4012980299810465, "grad_norm": 0.6865645051002502, "learning_rate": 0.0001, "loss": 1.5387, "step": 12199 }, { "epoch": 1.4014128998908735, "grad_norm": 0.5891702175140381, "learning_rate": 0.0001, "loss": 1.3482, "step": 12200 }, { "epoch": 1.4015277698007007, "grad_norm": 0.6280384063720703, "learning_rate": 0.0001, "loss": 1.4052, "step": 12201 }, { "epoch": 1.401642639710528, "grad_norm": 0.5294430255889893, "learning_rate": 0.0001, "loss": 1.4011, "step": 12202 }, { "epoch": 1.401757509620355, "grad_norm": 0.5137497782707214, "learning_rate": 0.0001, "loss": 1.3008, "step": 12203 }, { "epoch": 1.401872379530182, "grad_norm": 0.6277523636817932, "learning_rate": 0.0001, "loss": 1.3415, "step": 12204 }, { "epoch": 1.4019872494400092, "grad_norm": 0.5599936842918396, "learning_rate": 0.0001, "loss": 1.524, "step": 12205 }, { "epoch": 1.4021021193498364, "grad_norm": 0.5398606657981873, "learning_rate": 0.0001, "loss": 1.3835, "step": 12206 }, { "epoch": 1.4022169892596634, "grad_norm": 0.5670443773269653, "learning_rate": 0.0001, "loss": 1.4954, "step": 12207 }, { "epoch": 1.4023318591694904, "grad_norm": 0.5825430154800415, "learning_rate": 0.0001, "loss": 1.4757, "step": 12208 }, { "epoch": 1.4024467290793177, "grad_norm": 0.6514697670936584, "learning_rate": 0.0001, "loss": 1.4789, "step": 12209 }, { "epoch": 1.402561598989145, "grad_norm": 0.5851296782493591, "learning_rate": 0.0001, "loss": 1.4696, "step": 12210 }, { "epoch": 1.402676468898972, "grad_norm": 0.6211963295936584, "learning_rate": 0.0001, "loss": 1.192, "step": 12211 }, { "epoch": 1.402791338808799, "grad_norm": 0.5917556881904602, "learning_rate": 0.0001, "loss": 1.3838, "step": 12212 }, { "epoch": 1.4029062087186261, "grad_norm": 0.637103259563446, "learning_rate": 0.0001, "loss": 1.5705, "step": 12213 }, { "epoch": 1.4030210786284534, "grad_norm": 0.6678568720817566, "learning_rate": 0.0001, "loss": 1.7563, "step": 12214 }, { "epoch": 1.4031359485382804, "grad_norm": 0.6561994552612305, "learning_rate": 0.0001, "loss": 1.485, "step": 12215 }, { "epoch": 1.4032508184481074, "grad_norm": 0.590522825717926, "learning_rate": 0.0001, "loss": 1.6996, "step": 12216 }, { "epoch": 1.4033656883579346, "grad_norm": 0.5511714816093445, "learning_rate": 0.0001, "loss": 1.405, "step": 12217 }, { "epoch": 1.4034805582677619, "grad_norm": 0.567674458026886, "learning_rate": 0.0001, "loss": 1.2435, "step": 12218 }, { "epoch": 1.4035954281775889, "grad_norm": 0.5764709711074829, "learning_rate": 0.0001, "loss": 1.6194, "step": 12219 }, { "epoch": 1.4037102980874159, "grad_norm": 0.5814299583435059, "learning_rate": 0.0001, "loss": 1.561, "step": 12220 }, { "epoch": 1.403825167997243, "grad_norm": 0.6138479709625244, "learning_rate": 0.0001, "loss": 1.3847, "step": 12221 }, { "epoch": 1.4039400379070703, "grad_norm": 0.577967643737793, "learning_rate": 0.0001, "loss": 1.4667, "step": 12222 }, { "epoch": 1.4040549078168973, "grad_norm": 0.5953364372253418, "learning_rate": 0.0001, "loss": 1.433, "step": 12223 }, { "epoch": 1.4041697777267244, "grad_norm": 0.6030754446983337, "learning_rate": 0.0001, "loss": 1.3456, "step": 12224 }, { "epoch": 1.4042846476365516, "grad_norm": 0.6069220304489136, "learning_rate": 0.0001, "loss": 1.4485, "step": 12225 }, { "epoch": 1.4043995175463788, "grad_norm": 0.5867301821708679, "learning_rate": 0.0001, "loss": 1.4798, "step": 12226 }, { "epoch": 1.4045143874562058, "grad_norm": 0.6797637343406677, "learning_rate": 0.0001, "loss": 1.6056, "step": 12227 }, { "epoch": 1.4046292573660328, "grad_norm": 0.6498664617538452, "learning_rate": 0.0001, "loss": 1.2396, "step": 12228 }, { "epoch": 1.40474412727586, "grad_norm": 0.645907461643219, "learning_rate": 0.0001, "loss": 1.6652, "step": 12229 }, { "epoch": 1.4048589971856873, "grad_norm": 0.5839877128601074, "learning_rate": 0.0001, "loss": 1.366, "step": 12230 }, { "epoch": 1.4049738670955143, "grad_norm": 0.5825426578521729, "learning_rate": 0.0001, "loss": 1.3383, "step": 12231 }, { "epoch": 1.4050887370053413, "grad_norm": 0.6786919236183167, "learning_rate": 0.0001, "loss": 1.6, "step": 12232 }, { "epoch": 1.4052036069151685, "grad_norm": 0.572919487953186, "learning_rate": 0.0001, "loss": 1.5307, "step": 12233 }, { "epoch": 1.4053184768249958, "grad_norm": 0.5826205611228943, "learning_rate": 0.0001, "loss": 1.227, "step": 12234 }, { "epoch": 1.4054333467348228, "grad_norm": 0.5886760354042053, "learning_rate": 0.0001, "loss": 1.3468, "step": 12235 }, { "epoch": 1.40554821664465, "grad_norm": 0.579044759273529, "learning_rate": 0.0001, "loss": 1.3405, "step": 12236 }, { "epoch": 1.405663086554477, "grad_norm": 0.5720400810241699, "learning_rate": 0.0001, "loss": 1.5135, "step": 12237 }, { "epoch": 1.4057779564643043, "grad_norm": 0.5574225783348083, "learning_rate": 0.0001, "loss": 1.3995, "step": 12238 }, { "epoch": 1.4058928263741313, "grad_norm": 0.5698866844177246, "learning_rate": 0.0001, "loss": 1.5275, "step": 12239 }, { "epoch": 1.4060076962839585, "grad_norm": 0.5897749066352844, "learning_rate": 0.0001, "loss": 1.4587, "step": 12240 }, { "epoch": 1.4061225661937855, "grad_norm": 0.5804178714752197, "learning_rate": 0.0001, "loss": 1.5626, "step": 12241 }, { "epoch": 1.4062374361036127, "grad_norm": 0.5859702229499817, "learning_rate": 0.0001, "loss": 1.431, "step": 12242 }, { "epoch": 1.4063523060134397, "grad_norm": 0.5918383598327637, "learning_rate": 0.0001, "loss": 1.611, "step": 12243 }, { "epoch": 1.406467175923267, "grad_norm": 0.5748845934867859, "learning_rate": 0.0001, "loss": 1.6516, "step": 12244 }, { "epoch": 1.406582045833094, "grad_norm": 0.6131268739700317, "learning_rate": 0.0001, "loss": 1.5947, "step": 12245 }, { "epoch": 1.4066969157429212, "grad_norm": 0.6054122447967529, "learning_rate": 0.0001, "loss": 1.535, "step": 12246 }, { "epoch": 1.4068117856527482, "grad_norm": 0.6443667411804199, "learning_rate": 0.0001, "loss": 1.5172, "step": 12247 }, { "epoch": 1.4069266555625755, "grad_norm": 0.6411632895469666, "learning_rate": 0.0001, "loss": 1.4362, "step": 12248 }, { "epoch": 1.4070415254724025, "grad_norm": 0.5433965921401978, "learning_rate": 0.0001, "loss": 1.4244, "step": 12249 }, { "epoch": 1.4071563953822297, "grad_norm": 0.6453516483306885, "learning_rate": 0.0001, "loss": 1.6362, "step": 12250 }, { "epoch": 1.4072712652920567, "grad_norm": 0.5545192360877991, "learning_rate": 0.0001, "loss": 1.1791, "step": 12251 }, { "epoch": 1.407386135201884, "grad_norm": 0.5485440492630005, "learning_rate": 0.0001, "loss": 1.2924, "step": 12252 }, { "epoch": 1.407501005111711, "grad_norm": 0.614592432975769, "learning_rate": 0.0001, "loss": 1.2284, "step": 12253 }, { "epoch": 1.4076158750215382, "grad_norm": 0.5941545963287354, "learning_rate": 0.0001, "loss": 1.4719, "step": 12254 }, { "epoch": 1.4077307449313652, "grad_norm": 0.5332536697387695, "learning_rate": 0.0001, "loss": 1.4855, "step": 12255 }, { "epoch": 1.4078456148411924, "grad_norm": 0.6020111441612244, "learning_rate": 0.0001, "loss": 1.603, "step": 12256 }, { "epoch": 1.4079604847510194, "grad_norm": 0.6182122826576233, "learning_rate": 0.0001, "loss": 1.4545, "step": 12257 }, { "epoch": 1.4080753546608467, "grad_norm": 0.5976362824440002, "learning_rate": 0.0001, "loss": 1.623, "step": 12258 }, { "epoch": 1.4081902245706737, "grad_norm": 0.5798993706703186, "learning_rate": 0.0001, "loss": 1.3952, "step": 12259 }, { "epoch": 1.408305094480501, "grad_norm": 0.5920160412788391, "learning_rate": 0.0001, "loss": 1.4257, "step": 12260 }, { "epoch": 1.408419964390328, "grad_norm": 0.6999684572219849, "learning_rate": 0.0001, "loss": 1.4828, "step": 12261 }, { "epoch": 1.4085348343001551, "grad_norm": 0.6646612286567688, "learning_rate": 0.0001, "loss": 1.6587, "step": 12262 }, { "epoch": 1.4086497042099821, "grad_norm": 0.6494892239570618, "learning_rate": 0.0001, "loss": 1.5008, "step": 12263 }, { "epoch": 1.4087645741198094, "grad_norm": 0.6330276727676392, "learning_rate": 0.0001, "loss": 1.5038, "step": 12264 }, { "epoch": 1.4088794440296364, "grad_norm": 0.6663432717323303, "learning_rate": 0.0001, "loss": 1.3514, "step": 12265 }, { "epoch": 1.4089943139394636, "grad_norm": 0.6159442067146301, "learning_rate": 0.0001, "loss": 1.5268, "step": 12266 }, { "epoch": 1.4091091838492906, "grad_norm": 0.5885686278343201, "learning_rate": 0.0001, "loss": 1.4262, "step": 12267 }, { "epoch": 1.4092240537591179, "grad_norm": 0.6478608250617981, "learning_rate": 0.0001, "loss": 1.3103, "step": 12268 }, { "epoch": 1.4093389236689449, "grad_norm": 0.598627507686615, "learning_rate": 0.0001, "loss": 1.5664, "step": 12269 }, { "epoch": 1.409453793578772, "grad_norm": 0.5440186262130737, "learning_rate": 0.0001, "loss": 1.5294, "step": 12270 }, { "epoch": 1.409568663488599, "grad_norm": 0.5913492441177368, "learning_rate": 0.0001, "loss": 1.5455, "step": 12271 }, { "epoch": 1.4096835333984263, "grad_norm": 0.6720054149627686, "learning_rate": 0.0001, "loss": 1.6878, "step": 12272 }, { "epoch": 1.4097984033082533, "grad_norm": 0.550347089767456, "learning_rate": 0.0001, "loss": 1.4825, "step": 12273 }, { "epoch": 1.4099132732180806, "grad_norm": 0.5974934697151184, "learning_rate": 0.0001, "loss": 1.5707, "step": 12274 }, { "epoch": 1.4100281431279076, "grad_norm": 0.568218469619751, "learning_rate": 0.0001, "loss": 1.3921, "step": 12275 }, { "epoch": 1.4101430130377348, "grad_norm": 0.6132494211196899, "learning_rate": 0.0001, "loss": 1.3798, "step": 12276 }, { "epoch": 1.4102578829475618, "grad_norm": 0.5562697649002075, "learning_rate": 0.0001, "loss": 1.5631, "step": 12277 }, { "epoch": 1.410372752857389, "grad_norm": 0.5881964564323425, "learning_rate": 0.0001, "loss": 1.4807, "step": 12278 }, { "epoch": 1.410487622767216, "grad_norm": 0.6052611470222473, "learning_rate": 0.0001, "loss": 1.3232, "step": 12279 }, { "epoch": 1.4106024926770433, "grad_norm": 0.568783164024353, "learning_rate": 0.0001, "loss": 1.4518, "step": 12280 }, { "epoch": 1.4107173625868703, "grad_norm": 0.5390822291374207, "learning_rate": 0.0001, "loss": 1.5032, "step": 12281 }, { "epoch": 1.4108322324966975, "grad_norm": 0.6221309900283813, "learning_rate": 0.0001, "loss": 1.4751, "step": 12282 }, { "epoch": 1.4109471024065245, "grad_norm": 0.6314241886138916, "learning_rate": 0.0001, "loss": 1.5097, "step": 12283 }, { "epoch": 1.4110619723163518, "grad_norm": 0.5789543390274048, "learning_rate": 0.0001, "loss": 1.4302, "step": 12284 }, { "epoch": 1.4111768422261788, "grad_norm": 0.6005547642707825, "learning_rate": 0.0001, "loss": 1.2483, "step": 12285 }, { "epoch": 1.411291712136006, "grad_norm": 0.6064243912696838, "learning_rate": 0.0001, "loss": 1.5456, "step": 12286 }, { "epoch": 1.411406582045833, "grad_norm": 0.6371053457260132, "learning_rate": 0.0001, "loss": 1.5302, "step": 12287 }, { "epoch": 1.4115214519556603, "grad_norm": 0.6137773394584656, "learning_rate": 0.0001, "loss": 1.4473, "step": 12288 }, { "epoch": 1.4116363218654873, "grad_norm": 0.6478999853134155, "learning_rate": 0.0001, "loss": 1.6225, "step": 12289 }, { "epoch": 1.4117511917753145, "grad_norm": 0.5467931032180786, "learning_rate": 0.0001, "loss": 1.3421, "step": 12290 }, { "epoch": 1.4118660616851415, "grad_norm": 0.5850721001625061, "learning_rate": 0.0001, "loss": 1.3787, "step": 12291 }, { "epoch": 1.4119809315949687, "grad_norm": 0.5739957690238953, "learning_rate": 0.0001, "loss": 1.4527, "step": 12292 }, { "epoch": 1.4120958015047957, "grad_norm": 0.6541691422462463, "learning_rate": 0.0001, "loss": 1.5428, "step": 12293 }, { "epoch": 1.412210671414623, "grad_norm": 0.6030059456825256, "learning_rate": 0.0001, "loss": 1.4744, "step": 12294 }, { "epoch": 1.41232554132445, "grad_norm": 0.5912495255470276, "learning_rate": 0.0001, "loss": 1.3846, "step": 12295 }, { "epoch": 1.4124404112342772, "grad_norm": 0.6480311751365662, "learning_rate": 0.0001, "loss": 1.5656, "step": 12296 }, { "epoch": 1.4125552811441042, "grad_norm": 0.6374021768569946, "learning_rate": 0.0001, "loss": 1.498, "step": 12297 }, { "epoch": 1.4126701510539315, "grad_norm": 0.7061396241188049, "learning_rate": 0.0001, "loss": 1.5376, "step": 12298 }, { "epoch": 1.4127850209637585, "grad_norm": 0.5619686841964722, "learning_rate": 0.0001, "loss": 1.4337, "step": 12299 }, { "epoch": 1.4128998908735857, "grad_norm": 0.5579246282577515, "learning_rate": 0.0001, "loss": 1.4374, "step": 12300 }, { "epoch": 1.4130147607834127, "grad_norm": 0.5994849801063538, "learning_rate": 0.0001, "loss": 1.4444, "step": 12301 }, { "epoch": 1.41312963069324, "grad_norm": 0.5560767650604248, "learning_rate": 0.0001, "loss": 1.5121, "step": 12302 }, { "epoch": 1.413244500603067, "grad_norm": 0.5769314765930176, "learning_rate": 0.0001, "loss": 1.3982, "step": 12303 }, { "epoch": 1.4133593705128942, "grad_norm": 0.5469468235969543, "learning_rate": 0.0001, "loss": 1.3255, "step": 12304 }, { "epoch": 1.4134742404227212, "grad_norm": 0.5225635170936584, "learning_rate": 0.0001, "loss": 1.4925, "step": 12305 }, { "epoch": 1.4135891103325484, "grad_norm": 0.5575945377349854, "learning_rate": 0.0001, "loss": 1.4781, "step": 12306 }, { "epoch": 1.4137039802423756, "grad_norm": 0.6048114895820618, "learning_rate": 0.0001, "loss": 1.5532, "step": 12307 }, { "epoch": 1.4138188501522027, "grad_norm": 0.560836911201477, "learning_rate": 0.0001, "loss": 1.3568, "step": 12308 }, { "epoch": 1.4139337200620297, "grad_norm": 0.5700927376747131, "learning_rate": 0.0001, "loss": 1.4198, "step": 12309 }, { "epoch": 1.414048589971857, "grad_norm": 0.549271285533905, "learning_rate": 0.0001, "loss": 1.4586, "step": 12310 }, { "epoch": 1.4141634598816841, "grad_norm": 0.6149371266365051, "learning_rate": 0.0001, "loss": 1.4705, "step": 12311 }, { "epoch": 1.4142783297915111, "grad_norm": 0.6246234178543091, "learning_rate": 0.0001, "loss": 1.427, "step": 12312 }, { "epoch": 1.4143931997013381, "grad_norm": 0.6859720349311829, "learning_rate": 0.0001, "loss": 1.4536, "step": 12313 }, { "epoch": 1.4145080696111654, "grad_norm": 0.5962328910827637, "learning_rate": 0.0001, "loss": 1.4477, "step": 12314 }, { "epoch": 1.4146229395209926, "grad_norm": 0.6179618239402771, "learning_rate": 0.0001, "loss": 1.3629, "step": 12315 }, { "epoch": 1.4147378094308196, "grad_norm": 0.5705560445785522, "learning_rate": 0.0001, "loss": 1.4853, "step": 12316 }, { "epoch": 1.4148526793406466, "grad_norm": 0.6366815567016602, "learning_rate": 0.0001, "loss": 1.5446, "step": 12317 }, { "epoch": 1.4149675492504739, "grad_norm": 0.664026141166687, "learning_rate": 0.0001, "loss": 1.6581, "step": 12318 }, { "epoch": 1.415082419160301, "grad_norm": 0.6808172464370728, "learning_rate": 0.0001, "loss": 1.6226, "step": 12319 }, { "epoch": 1.415197289070128, "grad_norm": 0.5884132385253906, "learning_rate": 0.0001, "loss": 1.4446, "step": 12320 }, { "epoch": 1.415312158979955, "grad_norm": 0.6115908026695251, "learning_rate": 0.0001, "loss": 1.6656, "step": 12321 }, { "epoch": 1.4154270288897823, "grad_norm": 0.610191285610199, "learning_rate": 0.0001, "loss": 1.5849, "step": 12322 }, { "epoch": 1.4155418987996096, "grad_norm": 0.5975642800331116, "learning_rate": 0.0001, "loss": 1.6194, "step": 12323 }, { "epoch": 1.4156567687094366, "grad_norm": 0.5843347907066345, "learning_rate": 0.0001, "loss": 1.4948, "step": 12324 }, { "epoch": 1.4157716386192636, "grad_norm": 0.5770195126533508, "learning_rate": 0.0001, "loss": 1.537, "step": 12325 }, { "epoch": 1.4158865085290908, "grad_norm": 0.5760985016822815, "learning_rate": 0.0001, "loss": 1.4043, "step": 12326 }, { "epoch": 1.416001378438918, "grad_norm": 0.5809157490730286, "learning_rate": 0.0001, "loss": 1.5153, "step": 12327 }, { "epoch": 1.416116248348745, "grad_norm": 0.59894198179245, "learning_rate": 0.0001, "loss": 1.5943, "step": 12328 }, { "epoch": 1.416231118258572, "grad_norm": 0.5635504722595215, "learning_rate": 0.0001, "loss": 1.4517, "step": 12329 }, { "epoch": 1.4163459881683993, "grad_norm": 0.5939011573791504, "learning_rate": 0.0001, "loss": 1.6191, "step": 12330 }, { "epoch": 1.4164608580782265, "grad_norm": 0.5841618776321411, "learning_rate": 0.0001, "loss": 1.4749, "step": 12331 }, { "epoch": 1.4165757279880535, "grad_norm": 0.5628432631492615, "learning_rate": 0.0001, "loss": 1.3689, "step": 12332 }, { "epoch": 1.4166905978978805, "grad_norm": 0.5952514410018921, "learning_rate": 0.0001, "loss": 1.3696, "step": 12333 }, { "epoch": 1.4168054678077078, "grad_norm": 0.5918675065040588, "learning_rate": 0.0001, "loss": 1.4191, "step": 12334 }, { "epoch": 1.416920337717535, "grad_norm": 0.629119873046875, "learning_rate": 0.0001, "loss": 1.593, "step": 12335 }, { "epoch": 1.417035207627362, "grad_norm": 0.5868558287620544, "learning_rate": 0.0001, "loss": 1.4134, "step": 12336 }, { "epoch": 1.417150077537189, "grad_norm": 0.6055565476417542, "learning_rate": 0.0001, "loss": 1.3543, "step": 12337 }, { "epoch": 1.4172649474470163, "grad_norm": 0.5665161609649658, "learning_rate": 0.0001, "loss": 1.4376, "step": 12338 }, { "epoch": 1.4173798173568435, "grad_norm": 0.6178751587867737, "learning_rate": 0.0001, "loss": 1.4882, "step": 12339 }, { "epoch": 1.4174946872666705, "grad_norm": 0.6303858160972595, "learning_rate": 0.0001, "loss": 1.5314, "step": 12340 }, { "epoch": 1.4176095571764975, "grad_norm": 0.6005488634109497, "learning_rate": 0.0001, "loss": 1.4275, "step": 12341 }, { "epoch": 1.4177244270863247, "grad_norm": 0.6242131590843201, "learning_rate": 0.0001, "loss": 1.2941, "step": 12342 }, { "epoch": 1.417839296996152, "grad_norm": 0.6233993768692017, "learning_rate": 0.0001, "loss": 1.5075, "step": 12343 }, { "epoch": 1.417954166905979, "grad_norm": 0.6324300169944763, "learning_rate": 0.0001, "loss": 1.5645, "step": 12344 }, { "epoch": 1.418069036815806, "grad_norm": 0.6000421047210693, "learning_rate": 0.0001, "loss": 1.4319, "step": 12345 }, { "epoch": 1.4181839067256332, "grad_norm": 0.6152875423431396, "learning_rate": 0.0001, "loss": 1.3411, "step": 12346 }, { "epoch": 1.4182987766354604, "grad_norm": 0.6526452898979187, "learning_rate": 0.0001, "loss": 1.6113, "step": 12347 }, { "epoch": 1.4184136465452875, "grad_norm": 0.6107121109962463, "learning_rate": 0.0001, "loss": 1.5193, "step": 12348 }, { "epoch": 1.4185285164551145, "grad_norm": 0.5946810841560364, "learning_rate": 0.0001, "loss": 1.5623, "step": 12349 }, { "epoch": 1.4186433863649417, "grad_norm": 0.5849339365959167, "learning_rate": 0.0001, "loss": 1.4287, "step": 12350 }, { "epoch": 1.418758256274769, "grad_norm": 0.6404920816421509, "learning_rate": 0.0001, "loss": 1.596, "step": 12351 }, { "epoch": 1.418873126184596, "grad_norm": 0.6268463134765625, "learning_rate": 0.0001, "loss": 1.566, "step": 12352 }, { "epoch": 1.418987996094423, "grad_norm": 0.5831562876701355, "learning_rate": 0.0001, "loss": 1.3506, "step": 12353 }, { "epoch": 1.4191028660042502, "grad_norm": 0.6019381284713745, "learning_rate": 0.0001, "loss": 1.428, "step": 12354 }, { "epoch": 1.4192177359140774, "grad_norm": 0.6480855941772461, "learning_rate": 0.0001, "loss": 1.4975, "step": 12355 }, { "epoch": 1.4193326058239044, "grad_norm": 0.6324148774147034, "learning_rate": 0.0001, "loss": 1.3886, "step": 12356 }, { "epoch": 1.4194474757337314, "grad_norm": 0.5891407132148743, "learning_rate": 0.0001, "loss": 1.4664, "step": 12357 }, { "epoch": 1.4195623456435587, "grad_norm": 0.5745696425437927, "learning_rate": 0.0001, "loss": 1.4676, "step": 12358 }, { "epoch": 1.4196772155533859, "grad_norm": 0.5865878462791443, "learning_rate": 0.0001, "loss": 1.4144, "step": 12359 }, { "epoch": 1.419792085463213, "grad_norm": 0.5463162064552307, "learning_rate": 0.0001, "loss": 1.2957, "step": 12360 }, { "epoch": 1.41990695537304, "grad_norm": 0.5702036619186401, "learning_rate": 0.0001, "loss": 1.3271, "step": 12361 }, { "epoch": 1.4200218252828671, "grad_norm": 0.5767183899879456, "learning_rate": 0.0001, "loss": 1.5055, "step": 12362 }, { "epoch": 1.4201366951926944, "grad_norm": 0.6075996160507202, "learning_rate": 0.0001, "loss": 1.5536, "step": 12363 }, { "epoch": 1.4202515651025214, "grad_norm": 0.6220189332962036, "learning_rate": 0.0001, "loss": 1.4745, "step": 12364 }, { "epoch": 1.4203664350123484, "grad_norm": 0.5823253393173218, "learning_rate": 0.0001, "loss": 1.4564, "step": 12365 }, { "epoch": 1.4204813049221756, "grad_norm": 0.592589259147644, "learning_rate": 0.0001, "loss": 1.3409, "step": 12366 }, { "epoch": 1.4205961748320028, "grad_norm": 0.624343991279602, "learning_rate": 0.0001, "loss": 1.4716, "step": 12367 }, { "epoch": 1.4207110447418299, "grad_norm": 0.568588137626648, "learning_rate": 0.0001, "loss": 1.4421, "step": 12368 }, { "epoch": 1.4208259146516569, "grad_norm": 0.6244280338287354, "learning_rate": 0.0001, "loss": 1.3673, "step": 12369 }, { "epoch": 1.420940784561484, "grad_norm": 0.5823641419410706, "learning_rate": 0.0001, "loss": 1.4835, "step": 12370 }, { "epoch": 1.4210556544713113, "grad_norm": 0.627984344959259, "learning_rate": 0.0001, "loss": 1.396, "step": 12371 }, { "epoch": 1.4211705243811383, "grad_norm": 0.6518086194992065, "learning_rate": 0.0001, "loss": 1.1716, "step": 12372 }, { "epoch": 1.4212853942909656, "grad_norm": 0.6243144273757935, "learning_rate": 0.0001, "loss": 1.5423, "step": 12373 }, { "epoch": 1.4214002642007926, "grad_norm": 0.6055872440338135, "learning_rate": 0.0001, "loss": 1.567, "step": 12374 }, { "epoch": 1.4215151341106198, "grad_norm": 0.5985282063484192, "learning_rate": 0.0001, "loss": 1.4185, "step": 12375 }, { "epoch": 1.4216300040204468, "grad_norm": 0.5503548383712769, "learning_rate": 0.0001, "loss": 1.524, "step": 12376 }, { "epoch": 1.421744873930274, "grad_norm": 0.5615320205688477, "learning_rate": 0.0001, "loss": 1.5034, "step": 12377 }, { "epoch": 1.421859743840101, "grad_norm": 0.5671427249908447, "learning_rate": 0.0001, "loss": 1.4848, "step": 12378 }, { "epoch": 1.4219746137499283, "grad_norm": 0.6086657643318176, "learning_rate": 0.0001, "loss": 1.5969, "step": 12379 }, { "epoch": 1.4220894836597553, "grad_norm": 0.567254364490509, "learning_rate": 0.0001, "loss": 1.4461, "step": 12380 }, { "epoch": 1.4222043535695825, "grad_norm": 0.655489444732666, "learning_rate": 0.0001, "loss": 1.4207, "step": 12381 }, { "epoch": 1.4223192234794095, "grad_norm": 0.5274955630302429, "learning_rate": 0.0001, "loss": 1.2654, "step": 12382 }, { "epoch": 1.4224340933892368, "grad_norm": 0.574601948261261, "learning_rate": 0.0001, "loss": 1.4474, "step": 12383 }, { "epoch": 1.4225489632990638, "grad_norm": 0.5798234343528748, "learning_rate": 0.0001, "loss": 1.4091, "step": 12384 }, { "epoch": 1.422663833208891, "grad_norm": 0.6089479923248291, "learning_rate": 0.0001, "loss": 1.2936, "step": 12385 }, { "epoch": 1.422778703118718, "grad_norm": 0.5854637026786804, "learning_rate": 0.0001, "loss": 1.4876, "step": 12386 }, { "epoch": 1.4228935730285452, "grad_norm": 0.681310772895813, "learning_rate": 0.0001, "loss": 1.7594, "step": 12387 }, { "epoch": 1.4230084429383723, "grad_norm": 0.5869307518005371, "learning_rate": 0.0001, "loss": 1.3828, "step": 12388 }, { "epoch": 1.4231233128481995, "grad_norm": 0.5680341124534607, "learning_rate": 0.0001, "loss": 1.3795, "step": 12389 }, { "epoch": 1.4232381827580265, "grad_norm": 0.5726298093795776, "learning_rate": 0.0001, "loss": 1.5208, "step": 12390 }, { "epoch": 1.4233530526678537, "grad_norm": 0.6122094988822937, "learning_rate": 0.0001, "loss": 1.541, "step": 12391 }, { "epoch": 1.4234679225776807, "grad_norm": 0.6463402509689331, "learning_rate": 0.0001, "loss": 1.5133, "step": 12392 }, { "epoch": 1.423582792487508, "grad_norm": 0.6010882258415222, "learning_rate": 0.0001, "loss": 1.3532, "step": 12393 }, { "epoch": 1.423697662397335, "grad_norm": 0.5905262231826782, "learning_rate": 0.0001, "loss": 1.2767, "step": 12394 }, { "epoch": 1.4238125323071622, "grad_norm": 0.5448614358901978, "learning_rate": 0.0001, "loss": 1.4319, "step": 12395 }, { "epoch": 1.4239274022169892, "grad_norm": 0.5370798707008362, "learning_rate": 0.0001, "loss": 1.4836, "step": 12396 }, { "epoch": 1.4240422721268164, "grad_norm": 0.5748822689056396, "learning_rate": 0.0001, "loss": 1.4327, "step": 12397 }, { "epoch": 1.4241571420366435, "grad_norm": 0.6279718279838562, "learning_rate": 0.0001, "loss": 1.5636, "step": 12398 }, { "epoch": 1.4242720119464707, "grad_norm": 0.612967848777771, "learning_rate": 0.0001, "loss": 1.4631, "step": 12399 }, { "epoch": 1.4243868818562977, "grad_norm": 0.5825915336608887, "learning_rate": 0.0001, "loss": 1.4405, "step": 12400 }, { "epoch": 1.424501751766125, "grad_norm": 0.5820374488830566, "learning_rate": 0.0001, "loss": 1.607, "step": 12401 }, { "epoch": 1.424616621675952, "grad_norm": 0.5795537829399109, "learning_rate": 0.0001, "loss": 1.3931, "step": 12402 }, { "epoch": 1.4247314915857792, "grad_norm": 0.5845256447792053, "learning_rate": 0.0001, "loss": 1.5411, "step": 12403 }, { "epoch": 1.4248463614956062, "grad_norm": 0.6226243376731873, "learning_rate": 0.0001, "loss": 1.5372, "step": 12404 }, { "epoch": 1.4249612314054334, "grad_norm": 0.5559086203575134, "learning_rate": 0.0001, "loss": 1.488, "step": 12405 }, { "epoch": 1.4250761013152604, "grad_norm": 0.5699470043182373, "learning_rate": 0.0001, "loss": 1.4175, "step": 12406 }, { "epoch": 1.4251909712250876, "grad_norm": 0.6197097301483154, "learning_rate": 0.0001, "loss": 1.5927, "step": 12407 }, { "epoch": 1.4253058411349147, "grad_norm": 0.5672743320465088, "learning_rate": 0.0001, "loss": 1.4876, "step": 12408 }, { "epoch": 1.4254207110447419, "grad_norm": 0.5639980435371399, "learning_rate": 0.0001, "loss": 1.5616, "step": 12409 }, { "epoch": 1.425535580954569, "grad_norm": 0.6176483631134033, "learning_rate": 0.0001, "loss": 1.6189, "step": 12410 }, { "epoch": 1.4256504508643961, "grad_norm": 0.599812388420105, "learning_rate": 0.0001, "loss": 1.3885, "step": 12411 }, { "epoch": 1.4257653207742231, "grad_norm": 0.5804879069328308, "learning_rate": 0.0001, "loss": 1.4213, "step": 12412 }, { "epoch": 1.4258801906840504, "grad_norm": 0.553686261177063, "learning_rate": 0.0001, "loss": 1.4293, "step": 12413 }, { "epoch": 1.4259950605938774, "grad_norm": 0.5969316959381104, "learning_rate": 0.0001, "loss": 1.4132, "step": 12414 }, { "epoch": 1.4261099305037046, "grad_norm": 0.6206128597259521, "learning_rate": 0.0001, "loss": 1.4204, "step": 12415 }, { "epoch": 1.4262248004135316, "grad_norm": 0.6273772120475769, "learning_rate": 0.0001, "loss": 1.5474, "step": 12416 }, { "epoch": 1.4263396703233588, "grad_norm": 0.5739526748657227, "learning_rate": 0.0001, "loss": 1.5694, "step": 12417 }, { "epoch": 1.4264545402331859, "grad_norm": 0.5758925080299377, "learning_rate": 0.0001, "loss": 1.3505, "step": 12418 }, { "epoch": 1.426569410143013, "grad_norm": 0.5880077481269836, "learning_rate": 0.0001, "loss": 1.4535, "step": 12419 }, { "epoch": 1.42668428005284, "grad_norm": 0.6464236378669739, "learning_rate": 0.0001, "loss": 1.6595, "step": 12420 }, { "epoch": 1.4267991499626673, "grad_norm": 0.6200028657913208, "learning_rate": 0.0001, "loss": 1.4498, "step": 12421 }, { "epoch": 1.4269140198724943, "grad_norm": 0.5777352452278137, "learning_rate": 0.0001, "loss": 1.3672, "step": 12422 }, { "epoch": 1.4270288897823216, "grad_norm": 0.6160234808921814, "learning_rate": 0.0001, "loss": 1.6537, "step": 12423 }, { "epoch": 1.4271437596921486, "grad_norm": 0.5613859295845032, "learning_rate": 0.0001, "loss": 1.1053, "step": 12424 }, { "epoch": 1.4272586296019758, "grad_norm": 0.5593403577804565, "learning_rate": 0.0001, "loss": 1.531, "step": 12425 }, { "epoch": 1.4273734995118028, "grad_norm": 0.5781793594360352, "learning_rate": 0.0001, "loss": 1.5957, "step": 12426 }, { "epoch": 1.42748836942163, "grad_norm": 0.5942292809486389, "learning_rate": 0.0001, "loss": 1.4096, "step": 12427 }, { "epoch": 1.427603239331457, "grad_norm": 0.5752783417701721, "learning_rate": 0.0001, "loss": 1.3625, "step": 12428 }, { "epoch": 1.4277181092412843, "grad_norm": 0.7537838816642761, "learning_rate": 0.0001, "loss": 1.6131, "step": 12429 }, { "epoch": 1.4278329791511113, "grad_norm": 0.5820350050926208, "learning_rate": 0.0001, "loss": 1.6196, "step": 12430 }, { "epoch": 1.4279478490609385, "grad_norm": 0.5638639330863953, "learning_rate": 0.0001, "loss": 1.4653, "step": 12431 }, { "epoch": 1.4280627189707655, "grad_norm": 0.6400308012962341, "learning_rate": 0.0001, "loss": 1.6973, "step": 12432 }, { "epoch": 1.4281775888805928, "grad_norm": 0.5983318090438843, "learning_rate": 0.0001, "loss": 1.4876, "step": 12433 }, { "epoch": 1.4282924587904198, "grad_norm": 0.6239731311798096, "learning_rate": 0.0001, "loss": 1.7114, "step": 12434 }, { "epoch": 1.428407328700247, "grad_norm": 0.6159213781356812, "learning_rate": 0.0001, "loss": 1.4131, "step": 12435 }, { "epoch": 1.428522198610074, "grad_norm": 0.5617234110832214, "learning_rate": 0.0001, "loss": 1.4996, "step": 12436 }, { "epoch": 1.4286370685199012, "grad_norm": 0.5928561687469482, "learning_rate": 0.0001, "loss": 1.5274, "step": 12437 }, { "epoch": 1.4287519384297283, "grad_norm": 0.6398497819900513, "learning_rate": 0.0001, "loss": 1.5243, "step": 12438 }, { "epoch": 1.4288668083395555, "grad_norm": 0.6484475135803223, "learning_rate": 0.0001, "loss": 1.6719, "step": 12439 }, { "epoch": 1.4289816782493825, "grad_norm": 0.6493231058120728, "learning_rate": 0.0001, "loss": 1.6668, "step": 12440 }, { "epoch": 1.4290965481592097, "grad_norm": 0.5870835781097412, "learning_rate": 0.0001, "loss": 1.2109, "step": 12441 }, { "epoch": 1.4292114180690367, "grad_norm": 0.6100109815597534, "learning_rate": 0.0001, "loss": 1.4725, "step": 12442 }, { "epoch": 1.429326287978864, "grad_norm": 0.6027024388313293, "learning_rate": 0.0001, "loss": 1.262, "step": 12443 }, { "epoch": 1.4294411578886912, "grad_norm": 0.635283887386322, "learning_rate": 0.0001, "loss": 1.4945, "step": 12444 }, { "epoch": 1.4295560277985182, "grad_norm": 0.5338662266731262, "learning_rate": 0.0001, "loss": 1.2908, "step": 12445 }, { "epoch": 1.4296708977083452, "grad_norm": 0.579879879951477, "learning_rate": 0.0001, "loss": 1.3954, "step": 12446 }, { "epoch": 1.4297857676181724, "grad_norm": 0.5709049105644226, "learning_rate": 0.0001, "loss": 1.4578, "step": 12447 }, { "epoch": 1.4299006375279997, "grad_norm": 0.5717498660087585, "learning_rate": 0.0001, "loss": 1.4321, "step": 12448 }, { "epoch": 1.4300155074378267, "grad_norm": 0.6005428433418274, "learning_rate": 0.0001, "loss": 1.6311, "step": 12449 }, { "epoch": 1.4301303773476537, "grad_norm": 0.5593380928039551, "learning_rate": 0.0001, "loss": 1.4149, "step": 12450 }, { "epoch": 1.430245247257481, "grad_norm": 0.5910133719444275, "learning_rate": 0.0001, "loss": 1.3897, "step": 12451 }, { "epoch": 1.4303601171673082, "grad_norm": 0.6213868856430054, "learning_rate": 0.0001, "loss": 1.4776, "step": 12452 }, { "epoch": 1.4304749870771352, "grad_norm": 0.552069365978241, "learning_rate": 0.0001, "loss": 1.0546, "step": 12453 }, { "epoch": 1.4305898569869622, "grad_norm": 0.5821637511253357, "learning_rate": 0.0001, "loss": 1.4505, "step": 12454 }, { "epoch": 1.4307047268967894, "grad_norm": 0.6346561312675476, "learning_rate": 0.0001, "loss": 1.3883, "step": 12455 }, { "epoch": 1.4308195968066166, "grad_norm": 0.5623376369476318, "learning_rate": 0.0001, "loss": 1.2515, "step": 12456 }, { "epoch": 1.4309344667164436, "grad_norm": 0.5908299088478088, "learning_rate": 0.0001, "loss": 1.5804, "step": 12457 }, { "epoch": 1.4310493366262707, "grad_norm": 0.5913065671920776, "learning_rate": 0.0001, "loss": 1.3443, "step": 12458 }, { "epoch": 1.4311642065360979, "grad_norm": 0.6305943727493286, "learning_rate": 0.0001, "loss": 1.4428, "step": 12459 }, { "epoch": 1.4312790764459251, "grad_norm": 0.611919105052948, "learning_rate": 0.0001, "loss": 1.6211, "step": 12460 }, { "epoch": 1.4313939463557521, "grad_norm": 0.5658090710639954, "learning_rate": 0.0001, "loss": 1.4647, "step": 12461 }, { "epoch": 1.4315088162655791, "grad_norm": 0.5929034948348999, "learning_rate": 0.0001, "loss": 1.4665, "step": 12462 }, { "epoch": 1.4316236861754064, "grad_norm": 0.6326093673706055, "learning_rate": 0.0001, "loss": 1.4622, "step": 12463 }, { "epoch": 1.4317385560852336, "grad_norm": 0.6289833784103394, "learning_rate": 0.0001, "loss": 1.5712, "step": 12464 }, { "epoch": 1.4318534259950606, "grad_norm": 0.6180969476699829, "learning_rate": 0.0001, "loss": 1.5338, "step": 12465 }, { "epoch": 1.4319682959048876, "grad_norm": 0.5622907876968384, "learning_rate": 0.0001, "loss": 1.4497, "step": 12466 }, { "epoch": 1.4320831658147148, "grad_norm": 0.5641545653343201, "learning_rate": 0.0001, "loss": 1.5119, "step": 12467 }, { "epoch": 1.432198035724542, "grad_norm": 0.6269649863243103, "learning_rate": 0.0001, "loss": 1.3424, "step": 12468 }, { "epoch": 1.432312905634369, "grad_norm": 0.5639840960502625, "learning_rate": 0.0001, "loss": 1.4185, "step": 12469 }, { "epoch": 1.432427775544196, "grad_norm": 0.630839467048645, "learning_rate": 0.0001, "loss": 1.4605, "step": 12470 }, { "epoch": 1.4325426454540233, "grad_norm": 0.5446004867553711, "learning_rate": 0.0001, "loss": 1.2578, "step": 12471 }, { "epoch": 1.4326575153638506, "grad_norm": 0.5677520036697388, "learning_rate": 0.0001, "loss": 1.5337, "step": 12472 }, { "epoch": 1.4327723852736776, "grad_norm": 0.6485244035720825, "learning_rate": 0.0001, "loss": 1.336, "step": 12473 }, { "epoch": 1.4328872551835046, "grad_norm": 0.6378061771392822, "learning_rate": 0.0001, "loss": 1.6011, "step": 12474 }, { "epoch": 1.4330021250933318, "grad_norm": 0.5592242479324341, "learning_rate": 0.0001, "loss": 1.5086, "step": 12475 }, { "epoch": 1.433116995003159, "grad_norm": 0.5973872542381287, "learning_rate": 0.0001, "loss": 1.655, "step": 12476 }, { "epoch": 1.433231864912986, "grad_norm": 0.6042687892913818, "learning_rate": 0.0001, "loss": 1.4897, "step": 12477 }, { "epoch": 1.433346734822813, "grad_norm": 0.6249580979347229, "learning_rate": 0.0001, "loss": 1.504, "step": 12478 }, { "epoch": 1.4334616047326403, "grad_norm": 0.5797646045684814, "learning_rate": 0.0001, "loss": 1.5292, "step": 12479 }, { "epoch": 1.4335764746424675, "grad_norm": 0.616908609867096, "learning_rate": 0.0001, "loss": 1.6353, "step": 12480 }, { "epoch": 1.4336913445522945, "grad_norm": 0.5572378039360046, "learning_rate": 0.0001, "loss": 1.5465, "step": 12481 }, { "epoch": 1.4338062144621215, "grad_norm": 0.6019822955131531, "learning_rate": 0.0001, "loss": 1.2772, "step": 12482 }, { "epoch": 1.4339210843719488, "grad_norm": 0.5784163475036621, "learning_rate": 0.0001, "loss": 1.4937, "step": 12483 }, { "epoch": 1.434035954281776, "grad_norm": 0.6143441796302795, "learning_rate": 0.0001, "loss": 1.5212, "step": 12484 }, { "epoch": 1.434150824191603, "grad_norm": 0.6093927621841431, "learning_rate": 0.0001, "loss": 1.3583, "step": 12485 }, { "epoch": 1.43426569410143, "grad_norm": 0.6261629462242126, "learning_rate": 0.0001, "loss": 1.4718, "step": 12486 }, { "epoch": 1.4343805640112572, "grad_norm": 0.5928403735160828, "learning_rate": 0.0001, "loss": 1.4865, "step": 12487 }, { "epoch": 1.4344954339210845, "grad_norm": 0.5965852737426758, "learning_rate": 0.0001, "loss": 1.5387, "step": 12488 }, { "epoch": 1.4346103038309115, "grad_norm": 0.6321581602096558, "learning_rate": 0.0001, "loss": 1.4827, "step": 12489 }, { "epoch": 1.4347251737407385, "grad_norm": 0.5445297956466675, "learning_rate": 0.0001, "loss": 1.2522, "step": 12490 }, { "epoch": 1.4348400436505657, "grad_norm": 0.5668053030967712, "learning_rate": 0.0001, "loss": 1.2608, "step": 12491 }, { "epoch": 1.434954913560393, "grad_norm": 0.606621265411377, "learning_rate": 0.0001, "loss": 1.6458, "step": 12492 }, { "epoch": 1.43506978347022, "grad_norm": 0.5955449938774109, "learning_rate": 0.0001, "loss": 1.2963, "step": 12493 }, { "epoch": 1.435184653380047, "grad_norm": 0.6068549156188965, "learning_rate": 0.0001, "loss": 1.6448, "step": 12494 }, { "epoch": 1.4352995232898742, "grad_norm": 0.5606309175491333, "learning_rate": 0.0001, "loss": 1.3839, "step": 12495 }, { "epoch": 1.4354143931997014, "grad_norm": 0.58128821849823, "learning_rate": 0.0001, "loss": 1.5119, "step": 12496 }, { "epoch": 1.4355292631095284, "grad_norm": 0.5731803774833679, "learning_rate": 0.0001, "loss": 1.3732, "step": 12497 }, { "epoch": 1.4356441330193555, "grad_norm": 0.5677974224090576, "learning_rate": 0.0001, "loss": 1.4033, "step": 12498 }, { "epoch": 1.4357590029291827, "grad_norm": 0.6500546336174011, "learning_rate": 0.0001, "loss": 1.6548, "step": 12499 }, { "epoch": 1.43587387283901, "grad_norm": 0.5387493968009949, "learning_rate": 0.0001, "loss": 1.3859, "step": 12500 }, { "epoch": 1.435988742748837, "grad_norm": 0.630449652671814, "learning_rate": 0.0001, "loss": 1.5172, "step": 12501 }, { "epoch": 1.436103612658664, "grad_norm": 0.5695238709449768, "learning_rate": 0.0001, "loss": 1.2597, "step": 12502 }, { "epoch": 1.4362184825684912, "grad_norm": 0.611565113067627, "learning_rate": 0.0001, "loss": 1.4379, "step": 12503 }, { "epoch": 1.4363333524783184, "grad_norm": 0.5593751668930054, "learning_rate": 0.0001, "loss": 1.5487, "step": 12504 }, { "epoch": 1.4364482223881454, "grad_norm": 0.5889508724212646, "learning_rate": 0.0001, "loss": 1.5008, "step": 12505 }, { "epoch": 1.4365630922979724, "grad_norm": 0.6173695921897888, "learning_rate": 0.0001, "loss": 1.5322, "step": 12506 }, { "epoch": 1.4366779622077996, "grad_norm": 0.591052770614624, "learning_rate": 0.0001, "loss": 1.4149, "step": 12507 }, { "epoch": 1.4367928321176269, "grad_norm": 0.5677117109298706, "learning_rate": 0.0001, "loss": 1.4339, "step": 12508 }, { "epoch": 1.4369077020274539, "grad_norm": 0.5549595355987549, "learning_rate": 0.0001, "loss": 1.2642, "step": 12509 }, { "epoch": 1.4370225719372811, "grad_norm": 0.5533115267753601, "learning_rate": 0.0001, "loss": 1.4143, "step": 12510 }, { "epoch": 1.4371374418471081, "grad_norm": 0.5983604788780212, "learning_rate": 0.0001, "loss": 1.3387, "step": 12511 }, { "epoch": 1.4372523117569354, "grad_norm": 0.6087487936019897, "learning_rate": 0.0001, "loss": 1.5874, "step": 12512 }, { "epoch": 1.4373671816667624, "grad_norm": 0.629578709602356, "learning_rate": 0.0001, "loss": 1.4994, "step": 12513 }, { "epoch": 1.4374820515765896, "grad_norm": 0.6290776133537292, "learning_rate": 0.0001, "loss": 1.5329, "step": 12514 }, { "epoch": 1.4375969214864166, "grad_norm": 0.6354580521583557, "learning_rate": 0.0001, "loss": 1.5526, "step": 12515 }, { "epoch": 1.4377117913962438, "grad_norm": 0.5979134440422058, "learning_rate": 0.0001, "loss": 1.5663, "step": 12516 }, { "epoch": 1.4378266613060708, "grad_norm": 0.5574628114700317, "learning_rate": 0.0001, "loss": 1.2554, "step": 12517 }, { "epoch": 1.437941531215898, "grad_norm": 0.5842434763908386, "learning_rate": 0.0001, "loss": 1.3712, "step": 12518 }, { "epoch": 1.438056401125725, "grad_norm": 0.6743392944335938, "learning_rate": 0.0001, "loss": 1.7602, "step": 12519 }, { "epoch": 1.4381712710355523, "grad_norm": 0.576398491859436, "learning_rate": 0.0001, "loss": 1.3749, "step": 12520 }, { "epoch": 1.4382861409453793, "grad_norm": 0.5756230354309082, "learning_rate": 0.0001, "loss": 1.4367, "step": 12521 }, { "epoch": 1.4384010108552066, "grad_norm": 0.6155824065208435, "learning_rate": 0.0001, "loss": 1.5771, "step": 12522 }, { "epoch": 1.4385158807650336, "grad_norm": 0.6590107679367065, "learning_rate": 0.0001, "loss": 1.6097, "step": 12523 }, { "epoch": 1.4386307506748608, "grad_norm": 0.6111704707145691, "learning_rate": 0.0001, "loss": 1.6088, "step": 12524 }, { "epoch": 1.4387456205846878, "grad_norm": 0.6221973299980164, "learning_rate": 0.0001, "loss": 1.4813, "step": 12525 }, { "epoch": 1.438860490494515, "grad_norm": 0.5589838624000549, "learning_rate": 0.0001, "loss": 1.2726, "step": 12526 }, { "epoch": 1.438975360404342, "grad_norm": 0.6924046874046326, "learning_rate": 0.0001, "loss": 1.1983, "step": 12527 }, { "epoch": 1.4390902303141693, "grad_norm": 0.5677695870399475, "learning_rate": 0.0001, "loss": 1.5127, "step": 12528 }, { "epoch": 1.4392051002239963, "grad_norm": 0.6168467402458191, "learning_rate": 0.0001, "loss": 1.5937, "step": 12529 }, { "epoch": 1.4393199701338235, "grad_norm": 0.5841947197914124, "learning_rate": 0.0001, "loss": 1.5834, "step": 12530 }, { "epoch": 1.4394348400436505, "grad_norm": 0.6030665636062622, "learning_rate": 0.0001, "loss": 1.5023, "step": 12531 }, { "epoch": 1.4395497099534778, "grad_norm": 0.6445423364639282, "learning_rate": 0.0001, "loss": 1.6561, "step": 12532 }, { "epoch": 1.4396645798633048, "grad_norm": 0.5743504166603088, "learning_rate": 0.0001, "loss": 1.3297, "step": 12533 }, { "epoch": 1.439779449773132, "grad_norm": 0.6461590528488159, "learning_rate": 0.0001, "loss": 1.5937, "step": 12534 }, { "epoch": 1.439894319682959, "grad_norm": 0.535851001739502, "learning_rate": 0.0001, "loss": 1.343, "step": 12535 }, { "epoch": 1.4400091895927862, "grad_norm": 0.6115723252296448, "learning_rate": 0.0001, "loss": 1.4831, "step": 12536 }, { "epoch": 1.4401240595026132, "grad_norm": 0.5796070694923401, "learning_rate": 0.0001, "loss": 1.4744, "step": 12537 }, { "epoch": 1.4402389294124405, "grad_norm": 0.6212785243988037, "learning_rate": 0.0001, "loss": 1.4577, "step": 12538 }, { "epoch": 1.4403537993222675, "grad_norm": 0.6039834022521973, "learning_rate": 0.0001, "loss": 1.6319, "step": 12539 }, { "epoch": 1.4404686692320947, "grad_norm": 0.6413995027542114, "learning_rate": 0.0001, "loss": 1.4666, "step": 12540 }, { "epoch": 1.4405835391419217, "grad_norm": 0.6055036187171936, "learning_rate": 0.0001, "loss": 1.6406, "step": 12541 }, { "epoch": 1.440698409051749, "grad_norm": 0.5628538727760315, "learning_rate": 0.0001, "loss": 1.442, "step": 12542 }, { "epoch": 1.440813278961576, "grad_norm": 0.6067841649055481, "learning_rate": 0.0001, "loss": 1.5706, "step": 12543 }, { "epoch": 1.4409281488714032, "grad_norm": 0.5712527632713318, "learning_rate": 0.0001, "loss": 1.421, "step": 12544 }, { "epoch": 1.4410430187812302, "grad_norm": 0.5913591384887695, "learning_rate": 0.0001, "loss": 1.469, "step": 12545 }, { "epoch": 1.4411578886910574, "grad_norm": 0.5872635841369629, "learning_rate": 0.0001, "loss": 1.4475, "step": 12546 }, { "epoch": 1.4412727586008844, "grad_norm": 0.6234807372093201, "learning_rate": 0.0001, "loss": 1.4239, "step": 12547 }, { "epoch": 1.4413876285107117, "grad_norm": 0.6241154074668884, "learning_rate": 0.0001, "loss": 1.4686, "step": 12548 }, { "epoch": 1.4415024984205387, "grad_norm": 0.5826482772827148, "learning_rate": 0.0001, "loss": 1.3162, "step": 12549 }, { "epoch": 1.441617368330366, "grad_norm": 0.5742958784103394, "learning_rate": 0.0001, "loss": 1.5757, "step": 12550 }, { "epoch": 1.441732238240193, "grad_norm": 0.5949505567550659, "learning_rate": 0.0001, "loss": 1.3672, "step": 12551 }, { "epoch": 1.4418471081500202, "grad_norm": 0.6136342287063599, "learning_rate": 0.0001, "loss": 1.4474, "step": 12552 }, { "epoch": 1.4419619780598472, "grad_norm": 0.5760694146156311, "learning_rate": 0.0001, "loss": 1.3999, "step": 12553 }, { "epoch": 1.4420768479696744, "grad_norm": 0.6037359833717346, "learning_rate": 0.0001, "loss": 1.7367, "step": 12554 }, { "epoch": 1.4421917178795014, "grad_norm": 0.5782321095466614, "learning_rate": 0.0001, "loss": 1.3474, "step": 12555 }, { "epoch": 1.4423065877893286, "grad_norm": 0.5973963737487793, "learning_rate": 0.0001, "loss": 1.5053, "step": 12556 }, { "epoch": 1.4424214576991556, "grad_norm": 0.588340699672699, "learning_rate": 0.0001, "loss": 1.5116, "step": 12557 }, { "epoch": 1.4425363276089829, "grad_norm": 0.5831438302993774, "learning_rate": 0.0001, "loss": 1.4753, "step": 12558 }, { "epoch": 1.4426511975188099, "grad_norm": 0.6051815748214722, "learning_rate": 0.0001, "loss": 1.466, "step": 12559 }, { "epoch": 1.4427660674286371, "grad_norm": 0.5373536348342896, "learning_rate": 0.0001, "loss": 1.049, "step": 12560 }, { "epoch": 1.4428809373384641, "grad_norm": 0.6360757350921631, "learning_rate": 0.0001, "loss": 1.497, "step": 12561 }, { "epoch": 1.4429958072482914, "grad_norm": 0.6303163170814514, "learning_rate": 0.0001, "loss": 1.5535, "step": 12562 }, { "epoch": 1.4431106771581184, "grad_norm": 0.6021014451980591, "learning_rate": 0.0001, "loss": 1.3477, "step": 12563 }, { "epoch": 1.4432255470679456, "grad_norm": 0.6191940307617188, "learning_rate": 0.0001, "loss": 1.5356, "step": 12564 }, { "epoch": 1.4433404169777726, "grad_norm": 0.6756370067596436, "learning_rate": 0.0001, "loss": 1.5671, "step": 12565 }, { "epoch": 1.4434552868875998, "grad_norm": 0.7018874883651733, "learning_rate": 0.0001, "loss": 1.863, "step": 12566 }, { "epoch": 1.4435701567974268, "grad_norm": 0.5474680066108704, "learning_rate": 0.0001, "loss": 1.3566, "step": 12567 }, { "epoch": 1.443685026707254, "grad_norm": 0.6140058040618896, "learning_rate": 0.0001, "loss": 1.2638, "step": 12568 }, { "epoch": 1.443799896617081, "grad_norm": 0.5624337196350098, "learning_rate": 0.0001, "loss": 1.4342, "step": 12569 }, { "epoch": 1.4439147665269083, "grad_norm": 0.6037470698356628, "learning_rate": 0.0001, "loss": 1.6618, "step": 12570 }, { "epoch": 1.4440296364367353, "grad_norm": 0.594399094581604, "learning_rate": 0.0001, "loss": 1.619, "step": 12571 }, { "epoch": 1.4441445063465626, "grad_norm": 0.5606778264045715, "learning_rate": 0.0001, "loss": 1.3396, "step": 12572 }, { "epoch": 1.4442593762563896, "grad_norm": 0.6168720126152039, "learning_rate": 0.0001, "loss": 1.4917, "step": 12573 }, { "epoch": 1.4443742461662168, "grad_norm": 0.5519090294837952, "learning_rate": 0.0001, "loss": 1.5099, "step": 12574 }, { "epoch": 1.4444891160760438, "grad_norm": 0.5982587933540344, "learning_rate": 0.0001, "loss": 1.5885, "step": 12575 }, { "epoch": 1.444603985985871, "grad_norm": 0.6236387491226196, "learning_rate": 0.0001, "loss": 1.6722, "step": 12576 }, { "epoch": 1.444718855895698, "grad_norm": 0.6040252447128296, "learning_rate": 0.0001, "loss": 1.5336, "step": 12577 }, { "epoch": 1.4448337258055253, "grad_norm": 0.5864963531494141, "learning_rate": 0.0001, "loss": 1.4526, "step": 12578 }, { "epoch": 1.4449485957153523, "grad_norm": 0.5800042152404785, "learning_rate": 0.0001, "loss": 1.3706, "step": 12579 }, { "epoch": 1.4450634656251795, "grad_norm": 0.6318053007125854, "learning_rate": 0.0001, "loss": 1.5137, "step": 12580 }, { "epoch": 1.4451783355350067, "grad_norm": 0.6059226989746094, "learning_rate": 0.0001, "loss": 1.3472, "step": 12581 }, { "epoch": 1.4452932054448338, "grad_norm": 0.5987119078636169, "learning_rate": 0.0001, "loss": 1.4228, "step": 12582 }, { "epoch": 1.4454080753546608, "grad_norm": 0.6017065048217773, "learning_rate": 0.0001, "loss": 1.3683, "step": 12583 }, { "epoch": 1.445522945264488, "grad_norm": 0.5962862968444824, "learning_rate": 0.0001, "loss": 1.5466, "step": 12584 }, { "epoch": 1.4456378151743152, "grad_norm": 0.6121211647987366, "learning_rate": 0.0001, "loss": 1.6054, "step": 12585 }, { "epoch": 1.4457526850841422, "grad_norm": 0.5887848138809204, "learning_rate": 0.0001, "loss": 1.3906, "step": 12586 }, { "epoch": 1.4458675549939692, "grad_norm": 0.5756409168243408, "learning_rate": 0.0001, "loss": 1.345, "step": 12587 }, { "epoch": 1.4459824249037965, "grad_norm": 0.6187815070152283, "learning_rate": 0.0001, "loss": 1.2619, "step": 12588 }, { "epoch": 1.4460972948136237, "grad_norm": 0.6031472086906433, "learning_rate": 0.0001, "loss": 1.4761, "step": 12589 }, { "epoch": 1.4462121647234507, "grad_norm": 0.5487967729568481, "learning_rate": 0.0001, "loss": 1.258, "step": 12590 }, { "epoch": 1.4463270346332777, "grad_norm": 0.5621272921562195, "learning_rate": 0.0001, "loss": 1.0927, "step": 12591 }, { "epoch": 1.446441904543105, "grad_norm": 0.598263144493103, "learning_rate": 0.0001, "loss": 1.462, "step": 12592 }, { "epoch": 1.4465567744529322, "grad_norm": 0.5940863490104675, "learning_rate": 0.0001, "loss": 1.426, "step": 12593 }, { "epoch": 1.4466716443627592, "grad_norm": 0.6629210114479065, "learning_rate": 0.0001, "loss": 1.638, "step": 12594 }, { "epoch": 1.4467865142725862, "grad_norm": 0.5858151912689209, "learning_rate": 0.0001, "loss": 1.3906, "step": 12595 }, { "epoch": 1.4469013841824134, "grad_norm": 0.6476069092750549, "learning_rate": 0.0001, "loss": 1.3094, "step": 12596 }, { "epoch": 1.4470162540922407, "grad_norm": 0.6032461524009705, "learning_rate": 0.0001, "loss": 1.3134, "step": 12597 }, { "epoch": 1.4471311240020677, "grad_norm": 0.591644823551178, "learning_rate": 0.0001, "loss": 1.3682, "step": 12598 }, { "epoch": 1.4472459939118947, "grad_norm": 0.656358540058136, "learning_rate": 0.0001, "loss": 1.1411, "step": 12599 }, { "epoch": 1.447360863821722, "grad_norm": 0.6357599496841431, "learning_rate": 0.0001, "loss": 1.5471, "step": 12600 }, { "epoch": 1.4474757337315491, "grad_norm": 0.5842412710189819, "learning_rate": 0.0001, "loss": 1.4944, "step": 12601 }, { "epoch": 1.4475906036413762, "grad_norm": 0.580599308013916, "learning_rate": 0.0001, "loss": 1.5245, "step": 12602 }, { "epoch": 1.4477054735512032, "grad_norm": 0.6119430661201477, "learning_rate": 0.0001, "loss": 1.4649, "step": 12603 }, { "epoch": 1.4478203434610304, "grad_norm": 0.5994784832000732, "learning_rate": 0.0001, "loss": 1.4999, "step": 12604 }, { "epoch": 1.4479352133708576, "grad_norm": 0.5609249472618103, "learning_rate": 0.0001, "loss": 1.498, "step": 12605 }, { "epoch": 1.4480500832806846, "grad_norm": 0.5915836095809937, "learning_rate": 0.0001, "loss": 1.3597, "step": 12606 }, { "epoch": 1.4481649531905116, "grad_norm": 0.6126052141189575, "learning_rate": 0.0001, "loss": 1.6027, "step": 12607 }, { "epoch": 1.4482798231003389, "grad_norm": 0.6269632577896118, "learning_rate": 0.0001, "loss": 1.5438, "step": 12608 }, { "epoch": 1.448394693010166, "grad_norm": 0.5238544344902039, "learning_rate": 0.0001, "loss": 1.4742, "step": 12609 }, { "epoch": 1.448509562919993, "grad_norm": 0.607177197933197, "learning_rate": 0.0001, "loss": 1.5231, "step": 12610 }, { "epoch": 1.4486244328298201, "grad_norm": 0.5543795228004456, "learning_rate": 0.0001, "loss": 1.4268, "step": 12611 }, { "epoch": 1.4487393027396473, "grad_norm": 0.6204040050506592, "learning_rate": 0.0001, "loss": 1.5602, "step": 12612 }, { "epoch": 1.4488541726494746, "grad_norm": 0.5601338148117065, "learning_rate": 0.0001, "loss": 1.5038, "step": 12613 }, { "epoch": 1.4489690425593016, "grad_norm": 0.5878952145576477, "learning_rate": 0.0001, "loss": 1.5171, "step": 12614 }, { "epoch": 1.4490839124691286, "grad_norm": 0.5514255166053772, "learning_rate": 0.0001, "loss": 1.3956, "step": 12615 }, { "epoch": 1.4491987823789558, "grad_norm": 0.6429269313812256, "learning_rate": 0.0001, "loss": 1.3746, "step": 12616 }, { "epoch": 1.449313652288783, "grad_norm": 0.5910865068435669, "learning_rate": 0.0001, "loss": 1.4062, "step": 12617 }, { "epoch": 1.44942852219861, "grad_norm": 0.6003382205963135, "learning_rate": 0.0001, "loss": 1.5859, "step": 12618 }, { "epoch": 1.449543392108437, "grad_norm": 0.5780314803123474, "learning_rate": 0.0001, "loss": 1.4629, "step": 12619 }, { "epoch": 1.4496582620182643, "grad_norm": 0.5906615257263184, "learning_rate": 0.0001, "loss": 1.414, "step": 12620 }, { "epoch": 1.4497731319280915, "grad_norm": 0.6168799996376038, "learning_rate": 0.0001, "loss": 1.6244, "step": 12621 }, { "epoch": 1.4498880018379185, "grad_norm": 0.5736768245697021, "learning_rate": 0.0001, "loss": 1.5504, "step": 12622 }, { "epoch": 1.4500028717477456, "grad_norm": 0.5871389508247375, "learning_rate": 0.0001, "loss": 1.488, "step": 12623 }, { "epoch": 1.4501177416575728, "grad_norm": 0.6376186013221741, "learning_rate": 0.0001, "loss": 1.4979, "step": 12624 }, { "epoch": 1.4502326115674, "grad_norm": 0.5403526425361633, "learning_rate": 0.0001, "loss": 1.2595, "step": 12625 }, { "epoch": 1.450347481477227, "grad_norm": 0.5680791139602661, "learning_rate": 0.0001, "loss": 1.4837, "step": 12626 }, { "epoch": 1.450462351387054, "grad_norm": 0.5831477642059326, "learning_rate": 0.0001, "loss": 1.5409, "step": 12627 }, { "epoch": 1.4505772212968813, "grad_norm": 0.5702657103538513, "learning_rate": 0.0001, "loss": 1.5092, "step": 12628 }, { "epoch": 1.4506920912067085, "grad_norm": 0.5878103971481323, "learning_rate": 0.0001, "loss": 1.5792, "step": 12629 }, { "epoch": 1.4508069611165355, "grad_norm": 0.6267426609992981, "learning_rate": 0.0001, "loss": 1.3794, "step": 12630 }, { "epoch": 1.4509218310263625, "grad_norm": 0.578369140625, "learning_rate": 0.0001, "loss": 1.5242, "step": 12631 }, { "epoch": 1.4510367009361897, "grad_norm": 0.601699709892273, "learning_rate": 0.0001, "loss": 1.5346, "step": 12632 }, { "epoch": 1.451151570846017, "grad_norm": 0.5736028552055359, "learning_rate": 0.0001, "loss": 1.3779, "step": 12633 }, { "epoch": 1.451266440755844, "grad_norm": 0.5805512070655823, "learning_rate": 0.0001, "loss": 1.4821, "step": 12634 }, { "epoch": 1.451381310665671, "grad_norm": 0.6317859292030334, "learning_rate": 0.0001, "loss": 1.4541, "step": 12635 }, { "epoch": 1.4514961805754982, "grad_norm": 0.5671908259391785, "learning_rate": 0.0001, "loss": 1.3133, "step": 12636 }, { "epoch": 1.4516110504853255, "grad_norm": 0.5456831455230713, "learning_rate": 0.0001, "loss": 1.463, "step": 12637 }, { "epoch": 1.4517259203951525, "grad_norm": 0.6048300266265869, "learning_rate": 0.0001, "loss": 1.4596, "step": 12638 }, { "epoch": 1.4518407903049795, "grad_norm": 0.5492627024650574, "learning_rate": 0.0001, "loss": 1.372, "step": 12639 }, { "epoch": 1.4519556602148067, "grad_norm": 0.6605719923973083, "learning_rate": 0.0001, "loss": 1.4796, "step": 12640 }, { "epoch": 1.452070530124634, "grad_norm": 0.6105278730392456, "learning_rate": 0.0001, "loss": 1.6718, "step": 12641 }, { "epoch": 1.452185400034461, "grad_norm": 0.6340276598930359, "learning_rate": 0.0001, "loss": 1.5242, "step": 12642 }, { "epoch": 1.452300269944288, "grad_norm": 0.5779246687889099, "learning_rate": 0.0001, "loss": 1.5908, "step": 12643 }, { "epoch": 1.4524151398541152, "grad_norm": 0.5638899803161621, "learning_rate": 0.0001, "loss": 1.3403, "step": 12644 }, { "epoch": 1.4525300097639424, "grad_norm": 0.5835622549057007, "learning_rate": 0.0001, "loss": 1.5487, "step": 12645 }, { "epoch": 1.4526448796737694, "grad_norm": 0.5531151294708252, "learning_rate": 0.0001, "loss": 1.3624, "step": 12646 }, { "epoch": 1.4527597495835967, "grad_norm": 0.6040196418762207, "learning_rate": 0.0001, "loss": 1.5721, "step": 12647 }, { "epoch": 1.4528746194934237, "grad_norm": 0.6672126054763794, "learning_rate": 0.0001, "loss": 1.3766, "step": 12648 }, { "epoch": 1.452989489403251, "grad_norm": 0.5610442757606506, "learning_rate": 0.0001, "loss": 1.3557, "step": 12649 }, { "epoch": 1.453104359313078, "grad_norm": 0.5616607666015625, "learning_rate": 0.0001, "loss": 1.3134, "step": 12650 }, { "epoch": 1.4532192292229051, "grad_norm": 0.5792796015739441, "learning_rate": 0.0001, "loss": 1.7026, "step": 12651 }, { "epoch": 1.4533340991327321, "grad_norm": 0.5875088572502136, "learning_rate": 0.0001, "loss": 1.6033, "step": 12652 }, { "epoch": 1.4534489690425594, "grad_norm": 0.5907067060470581, "learning_rate": 0.0001, "loss": 1.5317, "step": 12653 }, { "epoch": 1.4535638389523864, "grad_norm": 0.5939152836799622, "learning_rate": 0.0001, "loss": 1.4968, "step": 12654 }, { "epoch": 1.4536787088622136, "grad_norm": 0.585138201713562, "learning_rate": 0.0001, "loss": 1.4367, "step": 12655 }, { "epoch": 1.4537935787720406, "grad_norm": 0.634059727191925, "learning_rate": 0.0001, "loss": 1.3877, "step": 12656 }, { "epoch": 1.4539084486818679, "grad_norm": 0.5916883945465088, "learning_rate": 0.0001, "loss": 1.455, "step": 12657 }, { "epoch": 1.4540233185916949, "grad_norm": 0.6057246923446655, "learning_rate": 0.0001, "loss": 1.539, "step": 12658 }, { "epoch": 1.454138188501522, "grad_norm": 0.5644753575325012, "learning_rate": 0.0001, "loss": 1.4298, "step": 12659 }, { "epoch": 1.454253058411349, "grad_norm": 0.6254203915596008, "learning_rate": 0.0001, "loss": 1.501, "step": 12660 }, { "epoch": 1.4543679283211763, "grad_norm": 0.6106442213058472, "learning_rate": 0.0001, "loss": 1.5069, "step": 12661 }, { "epoch": 1.4544827982310033, "grad_norm": 0.5752881765365601, "learning_rate": 0.0001, "loss": 1.4354, "step": 12662 }, { "epoch": 1.4545976681408306, "grad_norm": 0.6398709416389465, "learning_rate": 0.0001, "loss": 1.3857, "step": 12663 }, { "epoch": 1.4547125380506576, "grad_norm": 0.6222221255302429, "learning_rate": 0.0001, "loss": 1.3953, "step": 12664 }, { "epoch": 1.4548274079604848, "grad_norm": 0.5810672044754028, "learning_rate": 0.0001, "loss": 1.4166, "step": 12665 }, { "epoch": 1.4549422778703118, "grad_norm": 0.5670533180236816, "learning_rate": 0.0001, "loss": 1.3454, "step": 12666 }, { "epoch": 1.455057147780139, "grad_norm": 0.6999189853668213, "learning_rate": 0.0001, "loss": 1.6628, "step": 12667 }, { "epoch": 1.455172017689966, "grad_norm": 0.6700528860092163, "learning_rate": 0.0001, "loss": 1.3558, "step": 12668 }, { "epoch": 1.4552868875997933, "grad_norm": 0.6250571012496948, "learning_rate": 0.0001, "loss": 1.6178, "step": 12669 }, { "epoch": 1.4554017575096203, "grad_norm": 0.573645293712616, "learning_rate": 0.0001, "loss": 1.4697, "step": 12670 }, { "epoch": 1.4555166274194475, "grad_norm": 0.5824663043022156, "learning_rate": 0.0001, "loss": 1.3154, "step": 12671 }, { "epoch": 1.4556314973292745, "grad_norm": 0.5397822260856628, "learning_rate": 0.0001, "loss": 1.3799, "step": 12672 }, { "epoch": 1.4557463672391018, "grad_norm": 0.6227380633354187, "learning_rate": 0.0001, "loss": 1.5445, "step": 12673 }, { "epoch": 1.4558612371489288, "grad_norm": 0.5699251294136047, "learning_rate": 0.0001, "loss": 1.272, "step": 12674 }, { "epoch": 1.455976107058756, "grad_norm": 0.58524489402771, "learning_rate": 0.0001, "loss": 1.3419, "step": 12675 }, { "epoch": 1.456090976968583, "grad_norm": 0.6593015789985657, "learning_rate": 0.0001, "loss": 1.5772, "step": 12676 }, { "epoch": 1.4562058468784103, "grad_norm": 0.6179425120353699, "learning_rate": 0.0001, "loss": 1.3337, "step": 12677 }, { "epoch": 1.4563207167882373, "grad_norm": 0.591990053653717, "learning_rate": 0.0001, "loss": 1.5066, "step": 12678 }, { "epoch": 1.4564355866980645, "grad_norm": 0.5718491673469543, "learning_rate": 0.0001, "loss": 1.5063, "step": 12679 }, { "epoch": 1.4565504566078915, "grad_norm": 0.609443187713623, "learning_rate": 0.0001, "loss": 1.4913, "step": 12680 }, { "epoch": 1.4566653265177187, "grad_norm": 0.5620328187942505, "learning_rate": 0.0001, "loss": 1.4505, "step": 12681 }, { "epoch": 1.4567801964275457, "grad_norm": 0.6245093941688538, "learning_rate": 0.0001, "loss": 1.5944, "step": 12682 }, { "epoch": 1.456895066337373, "grad_norm": 0.5638138651847839, "learning_rate": 0.0001, "loss": 1.3709, "step": 12683 }, { "epoch": 1.4570099362472, "grad_norm": 0.6003065705299377, "learning_rate": 0.0001, "loss": 1.4879, "step": 12684 }, { "epoch": 1.4571248061570272, "grad_norm": 0.6570206880569458, "learning_rate": 0.0001, "loss": 1.3938, "step": 12685 }, { "epoch": 1.4572396760668542, "grad_norm": 0.6485840678215027, "learning_rate": 0.0001, "loss": 1.3326, "step": 12686 }, { "epoch": 1.4573545459766815, "grad_norm": 0.603420615196228, "learning_rate": 0.0001, "loss": 1.4235, "step": 12687 }, { "epoch": 1.4574694158865085, "grad_norm": 0.6239102482795715, "learning_rate": 0.0001, "loss": 1.3894, "step": 12688 }, { "epoch": 1.4575842857963357, "grad_norm": 0.6117807626724243, "learning_rate": 0.0001, "loss": 1.333, "step": 12689 }, { "epoch": 1.4576991557061627, "grad_norm": 0.6549608707427979, "learning_rate": 0.0001, "loss": 1.4666, "step": 12690 }, { "epoch": 1.45781402561599, "grad_norm": 0.6886609196662903, "learning_rate": 0.0001, "loss": 1.6555, "step": 12691 }, { "epoch": 1.457928895525817, "grad_norm": 0.6125257611274719, "learning_rate": 0.0001, "loss": 1.3924, "step": 12692 }, { "epoch": 1.4580437654356442, "grad_norm": 0.5616730451583862, "learning_rate": 0.0001, "loss": 1.4452, "step": 12693 }, { "epoch": 1.4581586353454712, "grad_norm": 0.6304099559783936, "learning_rate": 0.0001, "loss": 1.556, "step": 12694 }, { "epoch": 1.4582735052552984, "grad_norm": 0.6536027789115906, "learning_rate": 0.0001, "loss": 1.5309, "step": 12695 }, { "epoch": 1.4583883751651254, "grad_norm": 0.6076638698577881, "learning_rate": 0.0001, "loss": 1.4965, "step": 12696 }, { "epoch": 1.4585032450749527, "grad_norm": 0.6095083355903625, "learning_rate": 0.0001, "loss": 1.593, "step": 12697 }, { "epoch": 1.4586181149847797, "grad_norm": 0.5856631994247437, "learning_rate": 0.0001, "loss": 1.4475, "step": 12698 }, { "epoch": 1.458732984894607, "grad_norm": 0.582143247127533, "learning_rate": 0.0001, "loss": 1.4553, "step": 12699 }, { "epoch": 1.458847854804434, "grad_norm": 0.6474102735519409, "learning_rate": 0.0001, "loss": 1.6157, "step": 12700 }, { "epoch": 1.4589627247142611, "grad_norm": 0.6195456981658936, "learning_rate": 0.0001, "loss": 1.4974, "step": 12701 }, { "epoch": 1.4590775946240881, "grad_norm": 0.5719879269599915, "learning_rate": 0.0001, "loss": 1.3453, "step": 12702 }, { "epoch": 1.4591924645339154, "grad_norm": 0.5978543162345886, "learning_rate": 0.0001, "loss": 1.5412, "step": 12703 }, { "epoch": 1.4593073344437424, "grad_norm": 0.5765978097915649, "learning_rate": 0.0001, "loss": 1.2654, "step": 12704 }, { "epoch": 1.4594222043535696, "grad_norm": 0.6159289479255676, "learning_rate": 0.0001, "loss": 1.3155, "step": 12705 }, { "epoch": 1.4595370742633966, "grad_norm": 0.6160826683044434, "learning_rate": 0.0001, "loss": 1.4765, "step": 12706 }, { "epoch": 1.4596519441732239, "grad_norm": 0.5803123712539673, "learning_rate": 0.0001, "loss": 1.3856, "step": 12707 }, { "epoch": 1.4597668140830509, "grad_norm": 0.6179252862930298, "learning_rate": 0.0001, "loss": 1.3149, "step": 12708 }, { "epoch": 1.459881683992878, "grad_norm": 0.6384448409080505, "learning_rate": 0.0001, "loss": 1.2182, "step": 12709 }, { "epoch": 1.459996553902705, "grad_norm": 0.5776406526565552, "learning_rate": 0.0001, "loss": 1.3238, "step": 12710 }, { "epoch": 1.4601114238125323, "grad_norm": 0.6758487820625305, "learning_rate": 0.0001, "loss": 1.4016, "step": 12711 }, { "epoch": 1.4602262937223593, "grad_norm": 0.7246870398521423, "learning_rate": 0.0001, "loss": 1.6412, "step": 12712 }, { "epoch": 1.4603411636321866, "grad_norm": 0.6004265546798706, "learning_rate": 0.0001, "loss": 1.2532, "step": 12713 }, { "epoch": 1.4604560335420136, "grad_norm": 0.6100044846534729, "learning_rate": 0.0001, "loss": 1.4896, "step": 12714 }, { "epoch": 1.4605709034518408, "grad_norm": 0.6924980282783508, "learning_rate": 0.0001, "loss": 1.7668, "step": 12715 }, { "epoch": 1.4606857733616678, "grad_norm": 0.5812683701515198, "learning_rate": 0.0001, "loss": 1.4722, "step": 12716 }, { "epoch": 1.460800643271495, "grad_norm": 0.6266067624092102, "learning_rate": 0.0001, "loss": 1.4486, "step": 12717 }, { "epoch": 1.4609155131813223, "grad_norm": 0.5701542496681213, "learning_rate": 0.0001, "loss": 1.4599, "step": 12718 }, { "epoch": 1.4610303830911493, "grad_norm": 0.6322028636932373, "learning_rate": 0.0001, "loss": 1.4134, "step": 12719 }, { "epoch": 1.4611452530009763, "grad_norm": 0.6089679598808289, "learning_rate": 0.0001, "loss": 1.6838, "step": 12720 }, { "epoch": 1.4612601229108035, "grad_norm": 0.5674778819084167, "learning_rate": 0.0001, "loss": 1.4329, "step": 12721 }, { "epoch": 1.4613749928206308, "grad_norm": 0.5603479146957397, "learning_rate": 0.0001, "loss": 1.4397, "step": 12722 }, { "epoch": 1.4614898627304578, "grad_norm": 0.5788118839263916, "learning_rate": 0.0001, "loss": 1.5433, "step": 12723 }, { "epoch": 1.4616047326402848, "grad_norm": 0.5956910848617554, "learning_rate": 0.0001, "loss": 1.6042, "step": 12724 }, { "epoch": 1.461719602550112, "grad_norm": 0.5729474425315857, "learning_rate": 0.0001, "loss": 1.4532, "step": 12725 }, { "epoch": 1.4618344724599392, "grad_norm": 0.6295613646507263, "learning_rate": 0.0001, "loss": 1.6566, "step": 12726 }, { "epoch": 1.4619493423697663, "grad_norm": 0.5930306911468506, "learning_rate": 0.0001, "loss": 1.5756, "step": 12727 }, { "epoch": 1.4620642122795933, "grad_norm": 0.6040239930152893, "learning_rate": 0.0001, "loss": 1.3305, "step": 12728 }, { "epoch": 1.4621790821894205, "grad_norm": 0.5989452004432678, "learning_rate": 0.0001, "loss": 1.4624, "step": 12729 }, { "epoch": 1.4622939520992477, "grad_norm": 0.5778357982635498, "learning_rate": 0.0001, "loss": 1.5392, "step": 12730 }, { "epoch": 1.4624088220090747, "grad_norm": 0.6063084602355957, "learning_rate": 0.0001, "loss": 1.3575, "step": 12731 }, { "epoch": 1.4625236919189017, "grad_norm": 0.6218013763427734, "learning_rate": 0.0001, "loss": 1.2905, "step": 12732 }, { "epoch": 1.462638561828729, "grad_norm": 0.6372556090354919, "learning_rate": 0.0001, "loss": 1.3767, "step": 12733 }, { "epoch": 1.4627534317385562, "grad_norm": 0.624243438243866, "learning_rate": 0.0001, "loss": 1.3002, "step": 12734 }, { "epoch": 1.4628683016483832, "grad_norm": 0.6607589721679688, "learning_rate": 0.0001, "loss": 1.3411, "step": 12735 }, { "epoch": 1.4629831715582102, "grad_norm": 0.5998088121414185, "learning_rate": 0.0001, "loss": 1.3656, "step": 12736 }, { "epoch": 1.4630980414680375, "grad_norm": 0.5681934356689453, "learning_rate": 0.0001, "loss": 1.3278, "step": 12737 }, { "epoch": 1.4632129113778647, "grad_norm": 0.6242961883544922, "learning_rate": 0.0001, "loss": 1.7247, "step": 12738 }, { "epoch": 1.4633277812876917, "grad_norm": 0.5514942407608032, "learning_rate": 0.0001, "loss": 1.6772, "step": 12739 }, { "epoch": 1.4634426511975187, "grad_norm": 0.5704249739646912, "learning_rate": 0.0001, "loss": 1.2248, "step": 12740 }, { "epoch": 1.463557521107346, "grad_norm": 0.6051421165466309, "learning_rate": 0.0001, "loss": 1.4941, "step": 12741 }, { "epoch": 1.4636723910171732, "grad_norm": 0.59198397397995, "learning_rate": 0.0001, "loss": 1.4633, "step": 12742 }, { "epoch": 1.4637872609270002, "grad_norm": 0.5700596570968628, "learning_rate": 0.0001, "loss": 1.2688, "step": 12743 }, { "epoch": 1.4639021308368272, "grad_norm": 0.613609254360199, "learning_rate": 0.0001, "loss": 1.6539, "step": 12744 }, { "epoch": 1.4640170007466544, "grad_norm": 0.6272452473640442, "learning_rate": 0.0001, "loss": 1.6001, "step": 12745 }, { "epoch": 1.4641318706564816, "grad_norm": 0.5880221128463745, "learning_rate": 0.0001, "loss": 1.4157, "step": 12746 }, { "epoch": 1.4642467405663087, "grad_norm": 0.6015984416007996, "learning_rate": 0.0001, "loss": 1.3987, "step": 12747 }, { "epoch": 1.4643616104761357, "grad_norm": 0.6657059192657471, "learning_rate": 0.0001, "loss": 1.4861, "step": 12748 }, { "epoch": 1.464476480385963, "grad_norm": 0.6863231062889099, "learning_rate": 0.0001, "loss": 1.3948, "step": 12749 }, { "epoch": 1.4645913502957901, "grad_norm": 0.5913318395614624, "learning_rate": 0.0001, "loss": 1.412, "step": 12750 }, { "epoch": 1.4647062202056171, "grad_norm": 0.5482379794120789, "learning_rate": 0.0001, "loss": 1.5154, "step": 12751 }, { "epoch": 1.4648210901154441, "grad_norm": 0.5808722972869873, "learning_rate": 0.0001, "loss": 1.5977, "step": 12752 }, { "epoch": 1.4649359600252714, "grad_norm": 0.553925633430481, "learning_rate": 0.0001, "loss": 1.2278, "step": 12753 }, { "epoch": 1.4650508299350986, "grad_norm": 0.5533438920974731, "learning_rate": 0.0001, "loss": 1.401, "step": 12754 }, { "epoch": 1.4651656998449256, "grad_norm": 0.6688501834869385, "learning_rate": 0.0001, "loss": 1.5844, "step": 12755 }, { "epoch": 1.4652805697547526, "grad_norm": 0.6241793036460876, "learning_rate": 0.0001, "loss": 1.5635, "step": 12756 }, { "epoch": 1.4653954396645799, "grad_norm": 0.6105805039405823, "learning_rate": 0.0001, "loss": 1.3178, "step": 12757 }, { "epoch": 1.465510309574407, "grad_norm": 0.621698260307312, "learning_rate": 0.0001, "loss": 1.369, "step": 12758 }, { "epoch": 1.465625179484234, "grad_norm": 0.6072282791137695, "learning_rate": 0.0001, "loss": 1.3693, "step": 12759 }, { "epoch": 1.465740049394061, "grad_norm": 0.5882850885391235, "learning_rate": 0.0001, "loss": 1.4573, "step": 12760 }, { "epoch": 1.4658549193038883, "grad_norm": 0.5509545207023621, "learning_rate": 0.0001, "loss": 1.4878, "step": 12761 }, { "epoch": 1.4659697892137156, "grad_norm": 0.6017540693283081, "learning_rate": 0.0001, "loss": 1.2881, "step": 12762 }, { "epoch": 1.4660846591235426, "grad_norm": 0.6178678870201111, "learning_rate": 0.0001, "loss": 1.4212, "step": 12763 }, { "epoch": 1.4661995290333696, "grad_norm": 0.5732308626174927, "learning_rate": 0.0001, "loss": 1.4519, "step": 12764 }, { "epoch": 1.4663143989431968, "grad_norm": 0.5819944739341736, "learning_rate": 0.0001, "loss": 1.4526, "step": 12765 }, { "epoch": 1.466429268853024, "grad_norm": 0.6570119857788086, "learning_rate": 0.0001, "loss": 1.68, "step": 12766 }, { "epoch": 1.466544138762851, "grad_norm": 0.6044562458992004, "learning_rate": 0.0001, "loss": 1.4818, "step": 12767 }, { "epoch": 1.466659008672678, "grad_norm": 0.6516227126121521, "learning_rate": 0.0001, "loss": 1.4827, "step": 12768 }, { "epoch": 1.4667738785825053, "grad_norm": 0.5833653211593628, "learning_rate": 0.0001, "loss": 1.5451, "step": 12769 }, { "epoch": 1.4668887484923325, "grad_norm": 0.6204677820205688, "learning_rate": 0.0001, "loss": 1.6553, "step": 12770 }, { "epoch": 1.4670036184021595, "grad_norm": 0.5666163563728333, "learning_rate": 0.0001, "loss": 1.4902, "step": 12771 }, { "epoch": 1.4671184883119865, "grad_norm": 0.5615315437316895, "learning_rate": 0.0001, "loss": 1.3985, "step": 12772 }, { "epoch": 1.4672333582218138, "grad_norm": 0.5774404406547546, "learning_rate": 0.0001, "loss": 1.2866, "step": 12773 }, { "epoch": 1.467348228131641, "grad_norm": 0.6420813798904419, "learning_rate": 0.0001, "loss": 1.2587, "step": 12774 }, { "epoch": 1.467463098041468, "grad_norm": 0.5721668601036072, "learning_rate": 0.0001, "loss": 1.4001, "step": 12775 }, { "epoch": 1.467577967951295, "grad_norm": 0.5667369961738586, "learning_rate": 0.0001, "loss": 1.4447, "step": 12776 }, { "epoch": 1.4676928378611223, "grad_norm": 0.5806662440299988, "learning_rate": 0.0001, "loss": 1.4093, "step": 12777 }, { "epoch": 1.4678077077709495, "grad_norm": 0.5815864205360413, "learning_rate": 0.0001, "loss": 1.4261, "step": 12778 }, { "epoch": 1.4679225776807765, "grad_norm": 0.5797282457351685, "learning_rate": 0.0001, "loss": 1.3684, "step": 12779 }, { "epoch": 1.4680374475906035, "grad_norm": 0.686774492263794, "learning_rate": 0.0001, "loss": 1.6016, "step": 12780 }, { "epoch": 1.4681523175004307, "grad_norm": 0.570772647857666, "learning_rate": 0.0001, "loss": 1.3664, "step": 12781 }, { "epoch": 1.468267187410258, "grad_norm": 0.6441384553909302, "learning_rate": 0.0001, "loss": 1.5228, "step": 12782 }, { "epoch": 1.468382057320085, "grad_norm": 0.6247745156288147, "learning_rate": 0.0001, "loss": 1.3402, "step": 12783 }, { "epoch": 1.468496927229912, "grad_norm": 0.5949456095695496, "learning_rate": 0.0001, "loss": 1.6516, "step": 12784 }, { "epoch": 1.4686117971397392, "grad_norm": 0.5714166760444641, "learning_rate": 0.0001, "loss": 1.4388, "step": 12785 }, { "epoch": 1.4687266670495664, "grad_norm": 0.5946084856987, "learning_rate": 0.0001, "loss": 1.4548, "step": 12786 }, { "epoch": 1.4688415369593935, "grad_norm": 0.607331395149231, "learning_rate": 0.0001, "loss": 1.3089, "step": 12787 }, { "epoch": 1.4689564068692207, "grad_norm": 0.5990446209907532, "learning_rate": 0.0001, "loss": 1.4873, "step": 12788 }, { "epoch": 1.4690712767790477, "grad_norm": 0.6370789408683777, "learning_rate": 0.0001, "loss": 1.5659, "step": 12789 }, { "epoch": 1.469186146688875, "grad_norm": 0.6360231041908264, "learning_rate": 0.0001, "loss": 1.5793, "step": 12790 }, { "epoch": 1.469301016598702, "grad_norm": 0.6127180457115173, "learning_rate": 0.0001, "loss": 1.5618, "step": 12791 }, { "epoch": 1.4694158865085292, "grad_norm": 0.5970214009284973, "learning_rate": 0.0001, "loss": 1.2743, "step": 12792 }, { "epoch": 1.4695307564183562, "grad_norm": 0.6505621075630188, "learning_rate": 0.0001, "loss": 1.7703, "step": 12793 }, { "epoch": 1.4696456263281834, "grad_norm": 0.5832335352897644, "learning_rate": 0.0001, "loss": 1.3529, "step": 12794 }, { "epoch": 1.4697604962380104, "grad_norm": 0.61517733335495, "learning_rate": 0.0001, "loss": 1.6536, "step": 12795 }, { "epoch": 1.4698753661478376, "grad_norm": 0.5566610097885132, "learning_rate": 0.0001, "loss": 1.269, "step": 12796 }, { "epoch": 1.4699902360576647, "grad_norm": 0.5564651489257812, "learning_rate": 0.0001, "loss": 1.2414, "step": 12797 }, { "epoch": 1.4701051059674919, "grad_norm": 0.5723354816436768, "learning_rate": 0.0001, "loss": 1.4185, "step": 12798 }, { "epoch": 1.470219975877319, "grad_norm": 0.6019946932792664, "learning_rate": 0.0001, "loss": 1.4871, "step": 12799 }, { "epoch": 1.4703348457871461, "grad_norm": 0.6336812973022461, "learning_rate": 0.0001, "loss": 1.3159, "step": 12800 }, { "epoch": 1.4704497156969731, "grad_norm": 0.602342426776886, "learning_rate": 0.0001, "loss": 1.4548, "step": 12801 }, { "epoch": 1.4705645856068004, "grad_norm": 0.6818926334381104, "learning_rate": 0.0001, "loss": 1.4655, "step": 12802 }, { "epoch": 1.4706794555166274, "grad_norm": 0.6180601716041565, "learning_rate": 0.0001, "loss": 1.457, "step": 12803 }, { "epoch": 1.4707943254264546, "grad_norm": 0.6229212284088135, "learning_rate": 0.0001, "loss": 1.5556, "step": 12804 }, { "epoch": 1.4709091953362816, "grad_norm": 0.6223541498184204, "learning_rate": 0.0001, "loss": 1.4241, "step": 12805 }, { "epoch": 1.4710240652461088, "grad_norm": 0.5592056512832642, "learning_rate": 0.0001, "loss": 1.3271, "step": 12806 }, { "epoch": 1.4711389351559359, "grad_norm": 0.569546639919281, "learning_rate": 0.0001, "loss": 1.4232, "step": 12807 }, { "epoch": 1.471253805065763, "grad_norm": 0.5602309107780457, "learning_rate": 0.0001, "loss": 1.543, "step": 12808 }, { "epoch": 1.47136867497559, "grad_norm": 0.7094202637672424, "learning_rate": 0.0001, "loss": 1.6989, "step": 12809 }, { "epoch": 1.4714835448854173, "grad_norm": 0.6181465983390808, "learning_rate": 0.0001, "loss": 1.6225, "step": 12810 }, { "epoch": 1.4715984147952443, "grad_norm": 0.5809864401817322, "learning_rate": 0.0001, "loss": 1.3377, "step": 12811 }, { "epoch": 1.4717132847050716, "grad_norm": 0.5605827569961548, "learning_rate": 0.0001, "loss": 1.4811, "step": 12812 }, { "epoch": 1.4718281546148986, "grad_norm": 0.5921470522880554, "learning_rate": 0.0001, "loss": 1.5827, "step": 12813 }, { "epoch": 1.4719430245247258, "grad_norm": 0.5679218173027039, "learning_rate": 0.0001, "loss": 1.2956, "step": 12814 }, { "epoch": 1.4720578944345528, "grad_norm": 0.6096241474151611, "learning_rate": 0.0001, "loss": 1.509, "step": 12815 }, { "epoch": 1.47217276434438, "grad_norm": 0.5679489970207214, "learning_rate": 0.0001, "loss": 1.4101, "step": 12816 }, { "epoch": 1.472287634254207, "grad_norm": 0.5891135931015015, "learning_rate": 0.0001, "loss": 1.4284, "step": 12817 }, { "epoch": 1.4724025041640343, "grad_norm": 0.6344767212867737, "learning_rate": 0.0001, "loss": 1.5762, "step": 12818 }, { "epoch": 1.4725173740738613, "grad_norm": 0.5692608952522278, "learning_rate": 0.0001, "loss": 1.3893, "step": 12819 }, { "epoch": 1.4726322439836885, "grad_norm": 0.6075809001922607, "learning_rate": 0.0001, "loss": 1.391, "step": 12820 }, { "epoch": 1.4727471138935155, "grad_norm": 0.5786142349243164, "learning_rate": 0.0001, "loss": 1.6002, "step": 12821 }, { "epoch": 1.4728619838033428, "grad_norm": 0.5931715965270996, "learning_rate": 0.0001, "loss": 1.3525, "step": 12822 }, { "epoch": 1.4729768537131698, "grad_norm": 0.5575855374336243, "learning_rate": 0.0001, "loss": 1.4526, "step": 12823 }, { "epoch": 1.473091723622997, "grad_norm": 0.5985252857208252, "learning_rate": 0.0001, "loss": 1.4732, "step": 12824 }, { "epoch": 1.473206593532824, "grad_norm": 0.5632007718086243, "learning_rate": 0.0001, "loss": 1.5949, "step": 12825 }, { "epoch": 1.4733214634426512, "grad_norm": 0.5793201923370361, "learning_rate": 0.0001, "loss": 1.5883, "step": 12826 }, { "epoch": 1.4734363333524783, "grad_norm": 0.5845635533332825, "learning_rate": 0.0001, "loss": 1.4624, "step": 12827 }, { "epoch": 1.4735512032623055, "grad_norm": 0.6314181089401245, "learning_rate": 0.0001, "loss": 1.4328, "step": 12828 }, { "epoch": 1.4736660731721325, "grad_norm": 0.6269012093544006, "learning_rate": 0.0001, "loss": 1.5634, "step": 12829 }, { "epoch": 1.4737809430819597, "grad_norm": 0.6091486215591431, "learning_rate": 0.0001, "loss": 1.3224, "step": 12830 }, { "epoch": 1.4738958129917867, "grad_norm": 0.6564414501190186, "learning_rate": 0.0001, "loss": 1.482, "step": 12831 }, { "epoch": 1.474010682901614, "grad_norm": 0.5696612000465393, "learning_rate": 0.0001, "loss": 1.403, "step": 12832 }, { "epoch": 1.474125552811441, "grad_norm": 0.612686276435852, "learning_rate": 0.0001, "loss": 1.5524, "step": 12833 }, { "epoch": 1.4742404227212682, "grad_norm": 0.5725769996643066, "learning_rate": 0.0001, "loss": 1.474, "step": 12834 }, { "epoch": 1.4743552926310952, "grad_norm": 0.6267582774162292, "learning_rate": 0.0001, "loss": 1.6184, "step": 12835 }, { "epoch": 1.4744701625409224, "grad_norm": 0.6302061676979065, "learning_rate": 0.0001, "loss": 1.4283, "step": 12836 }, { "epoch": 1.4745850324507495, "grad_norm": 0.5919556617736816, "learning_rate": 0.0001, "loss": 1.4403, "step": 12837 }, { "epoch": 1.4746999023605767, "grad_norm": 0.6446247100830078, "learning_rate": 0.0001, "loss": 1.5849, "step": 12838 }, { "epoch": 1.4748147722704037, "grad_norm": 0.6327805519104004, "learning_rate": 0.0001, "loss": 1.4543, "step": 12839 }, { "epoch": 1.474929642180231, "grad_norm": 0.6169029474258423, "learning_rate": 0.0001, "loss": 1.5194, "step": 12840 }, { "epoch": 1.475044512090058, "grad_norm": 0.576792299747467, "learning_rate": 0.0001, "loss": 1.2857, "step": 12841 }, { "epoch": 1.4751593819998852, "grad_norm": 0.6225866675376892, "learning_rate": 0.0001, "loss": 1.5189, "step": 12842 }, { "epoch": 1.4752742519097122, "grad_norm": 0.5920522212982178, "learning_rate": 0.0001, "loss": 1.4826, "step": 12843 }, { "epoch": 1.4753891218195394, "grad_norm": 0.5702757835388184, "learning_rate": 0.0001, "loss": 1.4564, "step": 12844 }, { "epoch": 1.4755039917293664, "grad_norm": 0.5631170272827148, "learning_rate": 0.0001, "loss": 1.3581, "step": 12845 }, { "epoch": 1.4756188616391936, "grad_norm": 0.5859529376029968, "learning_rate": 0.0001, "loss": 1.5269, "step": 12846 }, { "epoch": 1.4757337315490207, "grad_norm": 0.5922108888626099, "learning_rate": 0.0001, "loss": 1.4155, "step": 12847 }, { "epoch": 1.4758486014588479, "grad_norm": 0.5865465998649597, "learning_rate": 0.0001, "loss": 1.4789, "step": 12848 }, { "epoch": 1.475963471368675, "grad_norm": 0.5963805317878723, "learning_rate": 0.0001, "loss": 1.3117, "step": 12849 }, { "epoch": 1.4760783412785021, "grad_norm": 0.6101550459861755, "learning_rate": 0.0001, "loss": 1.5932, "step": 12850 }, { "epoch": 1.4761932111883291, "grad_norm": 0.5703292489051819, "learning_rate": 0.0001, "loss": 1.4169, "step": 12851 }, { "epoch": 1.4763080810981564, "grad_norm": 0.726719856262207, "learning_rate": 0.0001, "loss": 1.3441, "step": 12852 }, { "epoch": 1.4764229510079834, "grad_norm": 0.5899834036827087, "learning_rate": 0.0001, "loss": 1.3448, "step": 12853 }, { "epoch": 1.4765378209178106, "grad_norm": 0.6098437905311584, "learning_rate": 0.0001, "loss": 1.5906, "step": 12854 }, { "epoch": 1.4766526908276378, "grad_norm": 0.5887052416801453, "learning_rate": 0.0001, "loss": 1.5883, "step": 12855 }, { "epoch": 1.4767675607374648, "grad_norm": 0.6153585910797119, "learning_rate": 0.0001, "loss": 1.4361, "step": 12856 }, { "epoch": 1.4768824306472919, "grad_norm": 0.6315271854400635, "learning_rate": 0.0001, "loss": 1.3454, "step": 12857 }, { "epoch": 1.476997300557119, "grad_norm": 0.6163341403007507, "learning_rate": 0.0001, "loss": 1.4341, "step": 12858 }, { "epoch": 1.4771121704669463, "grad_norm": 0.6163727045059204, "learning_rate": 0.0001, "loss": 1.4261, "step": 12859 }, { "epoch": 1.4772270403767733, "grad_norm": 0.5802782773971558, "learning_rate": 0.0001, "loss": 1.464, "step": 12860 }, { "epoch": 1.4773419102866003, "grad_norm": 0.6903051733970642, "learning_rate": 0.0001, "loss": 1.642, "step": 12861 }, { "epoch": 1.4774567801964276, "grad_norm": 0.5828868746757507, "learning_rate": 0.0001, "loss": 1.381, "step": 12862 }, { "epoch": 1.4775716501062548, "grad_norm": 0.5923581719398499, "learning_rate": 0.0001, "loss": 1.4827, "step": 12863 }, { "epoch": 1.4776865200160818, "grad_norm": 0.606850802898407, "learning_rate": 0.0001, "loss": 1.5009, "step": 12864 }, { "epoch": 1.4778013899259088, "grad_norm": 0.5898141264915466, "learning_rate": 0.0001, "loss": 1.2978, "step": 12865 }, { "epoch": 1.477916259835736, "grad_norm": 0.5947832465171814, "learning_rate": 0.0001, "loss": 1.5102, "step": 12866 }, { "epoch": 1.4780311297455633, "grad_norm": 0.5990186929702759, "learning_rate": 0.0001, "loss": 1.4618, "step": 12867 }, { "epoch": 1.4781459996553903, "grad_norm": 0.6089809536933899, "learning_rate": 0.0001, "loss": 1.5126, "step": 12868 }, { "epoch": 1.4782608695652173, "grad_norm": 0.5526347756385803, "learning_rate": 0.0001, "loss": 1.3861, "step": 12869 }, { "epoch": 1.4783757394750445, "grad_norm": 0.631928026676178, "learning_rate": 0.0001, "loss": 1.4951, "step": 12870 }, { "epoch": 1.4784906093848718, "grad_norm": 0.6184859275817871, "learning_rate": 0.0001, "loss": 1.3549, "step": 12871 }, { "epoch": 1.4786054792946988, "grad_norm": 0.5659068822860718, "learning_rate": 0.0001, "loss": 1.4886, "step": 12872 }, { "epoch": 1.4787203492045258, "grad_norm": 0.5731703639030457, "learning_rate": 0.0001, "loss": 1.1864, "step": 12873 }, { "epoch": 1.478835219114353, "grad_norm": 0.5723096132278442, "learning_rate": 0.0001, "loss": 1.5536, "step": 12874 }, { "epoch": 1.4789500890241802, "grad_norm": 0.5880361199378967, "learning_rate": 0.0001, "loss": 1.4777, "step": 12875 }, { "epoch": 1.4790649589340072, "grad_norm": 0.6019386053085327, "learning_rate": 0.0001, "loss": 1.4355, "step": 12876 }, { "epoch": 1.4791798288438343, "grad_norm": 0.6749945282936096, "learning_rate": 0.0001, "loss": 1.1855, "step": 12877 }, { "epoch": 1.4792946987536615, "grad_norm": 0.5709156394004822, "learning_rate": 0.0001, "loss": 1.4813, "step": 12878 }, { "epoch": 1.4794095686634887, "grad_norm": 0.604844331741333, "learning_rate": 0.0001, "loss": 1.487, "step": 12879 }, { "epoch": 1.4795244385733157, "grad_norm": 0.608182966709137, "learning_rate": 0.0001, "loss": 1.3741, "step": 12880 }, { "epoch": 1.4796393084831427, "grad_norm": 0.5870488882064819, "learning_rate": 0.0001, "loss": 1.3744, "step": 12881 }, { "epoch": 1.47975417839297, "grad_norm": 0.6059988737106323, "learning_rate": 0.0001, "loss": 1.3412, "step": 12882 }, { "epoch": 1.4798690483027972, "grad_norm": 0.6506321430206299, "learning_rate": 0.0001, "loss": 1.6666, "step": 12883 }, { "epoch": 1.4799839182126242, "grad_norm": 0.5870694518089294, "learning_rate": 0.0001, "loss": 1.3905, "step": 12884 }, { "epoch": 1.4800987881224512, "grad_norm": 0.5936076641082764, "learning_rate": 0.0001, "loss": 1.5622, "step": 12885 }, { "epoch": 1.4802136580322784, "grad_norm": 0.6202870607376099, "learning_rate": 0.0001, "loss": 1.4999, "step": 12886 }, { "epoch": 1.4803285279421057, "grad_norm": 0.5941158533096313, "learning_rate": 0.0001, "loss": 1.5883, "step": 12887 }, { "epoch": 1.4804433978519327, "grad_norm": 0.5812159180641174, "learning_rate": 0.0001, "loss": 1.4815, "step": 12888 }, { "epoch": 1.4805582677617597, "grad_norm": 0.6922297477722168, "learning_rate": 0.0001, "loss": 1.6634, "step": 12889 }, { "epoch": 1.480673137671587, "grad_norm": 0.6124345064163208, "learning_rate": 0.0001, "loss": 1.3621, "step": 12890 }, { "epoch": 1.4807880075814142, "grad_norm": 0.5976356267929077, "learning_rate": 0.0001, "loss": 1.4017, "step": 12891 }, { "epoch": 1.4809028774912412, "grad_norm": 0.6526122689247131, "learning_rate": 0.0001, "loss": 1.2287, "step": 12892 }, { "epoch": 1.4810177474010682, "grad_norm": 0.5740790367126465, "learning_rate": 0.0001, "loss": 1.4163, "step": 12893 }, { "epoch": 1.4811326173108954, "grad_norm": 0.6110181212425232, "learning_rate": 0.0001, "loss": 1.3558, "step": 12894 }, { "epoch": 1.4812474872207226, "grad_norm": 0.62778240442276, "learning_rate": 0.0001, "loss": 1.4462, "step": 12895 }, { "epoch": 1.4813623571305496, "grad_norm": 0.6331201195716858, "learning_rate": 0.0001, "loss": 1.4694, "step": 12896 }, { "epoch": 1.4814772270403767, "grad_norm": 0.647627055644989, "learning_rate": 0.0001, "loss": 1.3772, "step": 12897 }, { "epoch": 1.4815920969502039, "grad_norm": 0.5829721093177795, "learning_rate": 0.0001, "loss": 1.4, "step": 12898 }, { "epoch": 1.4817069668600311, "grad_norm": 0.6361407041549683, "learning_rate": 0.0001, "loss": 1.5327, "step": 12899 }, { "epoch": 1.4818218367698581, "grad_norm": 0.5852049589157104, "learning_rate": 0.0001, "loss": 1.3346, "step": 12900 }, { "epoch": 1.4819367066796851, "grad_norm": 0.5944021344184875, "learning_rate": 0.0001, "loss": 1.5299, "step": 12901 }, { "epoch": 1.4820515765895124, "grad_norm": 0.5802842378616333, "learning_rate": 0.0001, "loss": 1.4209, "step": 12902 }, { "epoch": 1.4821664464993396, "grad_norm": 0.582327663898468, "learning_rate": 0.0001, "loss": 1.2857, "step": 12903 }, { "epoch": 1.4822813164091666, "grad_norm": 0.6188213229179382, "learning_rate": 0.0001, "loss": 1.5666, "step": 12904 }, { "epoch": 1.4823961863189936, "grad_norm": 0.6501442790031433, "learning_rate": 0.0001, "loss": 1.3503, "step": 12905 }, { "epoch": 1.4825110562288208, "grad_norm": 0.6042030453681946, "learning_rate": 0.0001, "loss": 1.3655, "step": 12906 }, { "epoch": 1.482625926138648, "grad_norm": 0.6173478960990906, "learning_rate": 0.0001, "loss": 1.519, "step": 12907 }, { "epoch": 1.482740796048475, "grad_norm": 0.6603444814682007, "learning_rate": 0.0001, "loss": 1.6004, "step": 12908 }, { "epoch": 1.482855665958302, "grad_norm": 0.5978038311004639, "learning_rate": 0.0001, "loss": 1.2935, "step": 12909 }, { "epoch": 1.4829705358681293, "grad_norm": 0.6178515553474426, "learning_rate": 0.0001, "loss": 1.4831, "step": 12910 }, { "epoch": 1.4830854057779566, "grad_norm": 0.6077051758766174, "learning_rate": 0.0001, "loss": 1.4124, "step": 12911 }, { "epoch": 1.4832002756877836, "grad_norm": 0.6282074451446533, "learning_rate": 0.0001, "loss": 1.5461, "step": 12912 }, { "epoch": 1.4833151455976106, "grad_norm": 0.5881144404411316, "learning_rate": 0.0001, "loss": 1.4419, "step": 12913 }, { "epoch": 1.4834300155074378, "grad_norm": 0.5621463656425476, "learning_rate": 0.0001, "loss": 1.4631, "step": 12914 }, { "epoch": 1.483544885417265, "grad_norm": 0.6100106239318848, "learning_rate": 0.0001, "loss": 1.5192, "step": 12915 }, { "epoch": 1.483659755327092, "grad_norm": 0.5708780288696289, "learning_rate": 0.0001, "loss": 1.3741, "step": 12916 }, { "epoch": 1.483774625236919, "grad_norm": 0.6632856726646423, "learning_rate": 0.0001, "loss": 1.6289, "step": 12917 }, { "epoch": 1.4838894951467463, "grad_norm": 0.598359227180481, "learning_rate": 0.0001, "loss": 1.3337, "step": 12918 }, { "epoch": 1.4840043650565735, "grad_norm": 0.6106839179992676, "learning_rate": 0.0001, "loss": 1.4881, "step": 12919 }, { "epoch": 1.4841192349664005, "grad_norm": 0.6464507579803467, "learning_rate": 0.0001, "loss": 1.4999, "step": 12920 }, { "epoch": 1.4842341048762275, "grad_norm": 0.6744695901870728, "learning_rate": 0.0001, "loss": 1.6106, "step": 12921 }, { "epoch": 1.4843489747860548, "grad_norm": 0.6888026595115662, "learning_rate": 0.0001, "loss": 1.6446, "step": 12922 }, { "epoch": 1.484463844695882, "grad_norm": 0.5786687731742859, "learning_rate": 0.0001, "loss": 1.4022, "step": 12923 }, { "epoch": 1.484578714605709, "grad_norm": 0.5795042514801025, "learning_rate": 0.0001, "loss": 1.4461, "step": 12924 }, { "epoch": 1.4846935845155362, "grad_norm": 0.5988849997520447, "learning_rate": 0.0001, "loss": 1.5296, "step": 12925 }, { "epoch": 1.4848084544253632, "grad_norm": 0.5869229435920715, "learning_rate": 0.0001, "loss": 1.3634, "step": 12926 }, { "epoch": 1.4849233243351905, "grad_norm": 0.6069849133491516, "learning_rate": 0.0001, "loss": 1.4881, "step": 12927 }, { "epoch": 1.4850381942450175, "grad_norm": 0.6400530338287354, "learning_rate": 0.0001, "loss": 1.6889, "step": 12928 }, { "epoch": 1.4851530641548447, "grad_norm": 0.605957567691803, "learning_rate": 0.0001, "loss": 1.3198, "step": 12929 }, { "epoch": 1.4852679340646717, "grad_norm": 0.6827652454376221, "learning_rate": 0.0001, "loss": 1.6951, "step": 12930 }, { "epoch": 1.485382803974499, "grad_norm": 0.6778180003166199, "learning_rate": 0.0001, "loss": 1.4847, "step": 12931 }, { "epoch": 1.485497673884326, "grad_norm": 0.5894115567207336, "learning_rate": 0.0001, "loss": 1.5448, "step": 12932 }, { "epoch": 1.4856125437941532, "grad_norm": 0.5633677244186401, "learning_rate": 0.0001, "loss": 1.4671, "step": 12933 }, { "epoch": 1.4857274137039802, "grad_norm": 0.5746277570724487, "learning_rate": 0.0001, "loss": 1.3921, "step": 12934 }, { "epoch": 1.4858422836138074, "grad_norm": 0.5946472883224487, "learning_rate": 0.0001, "loss": 1.4943, "step": 12935 }, { "epoch": 1.4859571535236344, "grad_norm": 0.6017822027206421, "learning_rate": 0.0001, "loss": 1.3947, "step": 12936 }, { "epoch": 1.4860720234334617, "grad_norm": 0.5574742555618286, "learning_rate": 0.0001, "loss": 1.3781, "step": 12937 }, { "epoch": 1.4861868933432887, "grad_norm": 0.6060687899589539, "learning_rate": 0.0001, "loss": 1.6548, "step": 12938 }, { "epoch": 1.486301763253116, "grad_norm": 0.5431711673736572, "learning_rate": 0.0001, "loss": 1.4442, "step": 12939 }, { "epoch": 1.486416633162943, "grad_norm": 0.5980778336524963, "learning_rate": 0.0001, "loss": 1.4474, "step": 12940 }, { "epoch": 1.4865315030727702, "grad_norm": 0.5813719034194946, "learning_rate": 0.0001, "loss": 1.5765, "step": 12941 }, { "epoch": 1.4866463729825972, "grad_norm": 0.5789249539375305, "learning_rate": 0.0001, "loss": 1.4672, "step": 12942 }, { "epoch": 1.4867612428924244, "grad_norm": 0.592308759689331, "learning_rate": 0.0001, "loss": 1.5762, "step": 12943 }, { "epoch": 1.4868761128022514, "grad_norm": 0.5930988192558289, "learning_rate": 0.0001, "loss": 1.3819, "step": 12944 }, { "epoch": 1.4869909827120786, "grad_norm": 0.5978574752807617, "learning_rate": 0.0001, "loss": 1.3716, "step": 12945 }, { "epoch": 1.4871058526219056, "grad_norm": 0.554966151714325, "learning_rate": 0.0001, "loss": 1.2418, "step": 12946 }, { "epoch": 1.4872207225317329, "grad_norm": 0.6341855525970459, "learning_rate": 0.0001, "loss": 1.6254, "step": 12947 }, { "epoch": 1.4873355924415599, "grad_norm": 0.5823864340782166, "learning_rate": 0.0001, "loss": 1.461, "step": 12948 }, { "epoch": 1.4874504623513871, "grad_norm": 0.5773583650588989, "learning_rate": 0.0001, "loss": 1.4707, "step": 12949 }, { "epoch": 1.4875653322612141, "grad_norm": 0.6104335188865662, "learning_rate": 0.0001, "loss": 1.2777, "step": 12950 }, { "epoch": 1.4876802021710414, "grad_norm": 0.5461379885673523, "learning_rate": 0.0001, "loss": 1.3399, "step": 12951 }, { "epoch": 1.4877950720808684, "grad_norm": 0.5723500847816467, "learning_rate": 0.0001, "loss": 1.3066, "step": 12952 }, { "epoch": 1.4879099419906956, "grad_norm": 0.6174570918083191, "learning_rate": 0.0001, "loss": 1.6148, "step": 12953 }, { "epoch": 1.4880248119005226, "grad_norm": 0.5926865935325623, "learning_rate": 0.0001, "loss": 1.5758, "step": 12954 }, { "epoch": 1.4881396818103498, "grad_norm": 0.593763530254364, "learning_rate": 0.0001, "loss": 1.4821, "step": 12955 }, { "epoch": 1.4882545517201768, "grad_norm": 0.6111153364181519, "learning_rate": 0.0001, "loss": 1.5675, "step": 12956 }, { "epoch": 1.488369421630004, "grad_norm": 0.6312853693962097, "learning_rate": 0.0001, "loss": 1.4476, "step": 12957 }, { "epoch": 1.488484291539831, "grad_norm": 0.6247866153717041, "learning_rate": 0.0001, "loss": 1.3772, "step": 12958 }, { "epoch": 1.4885991614496583, "grad_norm": 0.5472710728645325, "learning_rate": 0.0001, "loss": 1.1579, "step": 12959 }, { "epoch": 1.4887140313594853, "grad_norm": 0.5936057567596436, "learning_rate": 0.0001, "loss": 1.4158, "step": 12960 }, { "epoch": 1.4888289012693126, "grad_norm": 0.5668746829032898, "learning_rate": 0.0001, "loss": 1.5599, "step": 12961 }, { "epoch": 1.4889437711791396, "grad_norm": 0.605015218257904, "learning_rate": 0.0001, "loss": 1.7382, "step": 12962 }, { "epoch": 1.4890586410889668, "grad_norm": 0.6417834162712097, "learning_rate": 0.0001, "loss": 1.5262, "step": 12963 }, { "epoch": 1.4891735109987938, "grad_norm": 0.6083729267120361, "learning_rate": 0.0001, "loss": 1.4273, "step": 12964 }, { "epoch": 1.489288380908621, "grad_norm": 0.6126262545585632, "learning_rate": 0.0001, "loss": 1.6093, "step": 12965 }, { "epoch": 1.489403250818448, "grad_norm": 0.6185763478279114, "learning_rate": 0.0001, "loss": 1.3363, "step": 12966 }, { "epoch": 1.4895181207282753, "grad_norm": 0.6177452802658081, "learning_rate": 0.0001, "loss": 1.2466, "step": 12967 }, { "epoch": 1.4896329906381023, "grad_norm": 0.6014622449874878, "learning_rate": 0.0001, "loss": 1.4981, "step": 12968 }, { "epoch": 1.4897478605479295, "grad_norm": 0.6131553053855896, "learning_rate": 0.0001, "loss": 1.4818, "step": 12969 }, { "epoch": 1.4898627304577565, "grad_norm": 0.6212412118911743, "learning_rate": 0.0001, "loss": 1.5509, "step": 12970 }, { "epoch": 1.4899776003675838, "grad_norm": 0.595173180103302, "learning_rate": 0.0001, "loss": 1.4124, "step": 12971 }, { "epoch": 1.4900924702774108, "grad_norm": 0.623760998249054, "learning_rate": 0.0001, "loss": 1.4035, "step": 12972 }, { "epoch": 1.490207340187238, "grad_norm": 0.5903745889663696, "learning_rate": 0.0001, "loss": 1.5047, "step": 12973 }, { "epoch": 1.490322210097065, "grad_norm": 0.5988485813140869, "learning_rate": 0.0001, "loss": 1.505, "step": 12974 }, { "epoch": 1.4904370800068922, "grad_norm": 0.592363178730011, "learning_rate": 0.0001, "loss": 1.3643, "step": 12975 }, { "epoch": 1.4905519499167192, "grad_norm": 0.6512883901596069, "learning_rate": 0.0001, "loss": 1.3463, "step": 12976 }, { "epoch": 1.4906668198265465, "grad_norm": 0.5865827798843384, "learning_rate": 0.0001, "loss": 1.3116, "step": 12977 }, { "epoch": 1.4907816897363735, "grad_norm": 0.5729326605796814, "learning_rate": 0.0001, "loss": 1.3622, "step": 12978 }, { "epoch": 1.4908965596462007, "grad_norm": 0.599138617515564, "learning_rate": 0.0001, "loss": 1.5096, "step": 12979 }, { "epoch": 1.4910114295560277, "grad_norm": 0.6441994905471802, "learning_rate": 0.0001, "loss": 1.5755, "step": 12980 }, { "epoch": 1.491126299465855, "grad_norm": 0.5867522358894348, "learning_rate": 0.0001, "loss": 1.4342, "step": 12981 }, { "epoch": 1.491241169375682, "grad_norm": 0.6497024297714233, "learning_rate": 0.0001, "loss": 1.5281, "step": 12982 }, { "epoch": 1.4913560392855092, "grad_norm": 0.6006242632865906, "learning_rate": 0.0001, "loss": 1.5463, "step": 12983 }, { "epoch": 1.4914709091953362, "grad_norm": 0.5676483511924744, "learning_rate": 0.0001, "loss": 1.2727, "step": 12984 }, { "epoch": 1.4915857791051634, "grad_norm": 0.6318432092666626, "learning_rate": 0.0001, "loss": 1.56, "step": 12985 }, { "epoch": 1.4917006490149904, "grad_norm": 0.5977509021759033, "learning_rate": 0.0001, "loss": 1.289, "step": 12986 }, { "epoch": 1.4918155189248177, "grad_norm": 0.6188353896141052, "learning_rate": 0.0001, "loss": 1.3374, "step": 12987 }, { "epoch": 1.4919303888346447, "grad_norm": 0.5568304657936096, "learning_rate": 0.0001, "loss": 1.4605, "step": 12988 }, { "epoch": 1.492045258744472, "grad_norm": 0.5960107445716858, "learning_rate": 0.0001, "loss": 1.6009, "step": 12989 }, { "epoch": 1.492160128654299, "grad_norm": 0.5843378305435181, "learning_rate": 0.0001, "loss": 1.4434, "step": 12990 }, { "epoch": 1.4922749985641262, "grad_norm": 0.5970795154571533, "learning_rate": 0.0001, "loss": 1.6927, "step": 12991 }, { "epoch": 1.4923898684739534, "grad_norm": 0.5971647500991821, "learning_rate": 0.0001, "loss": 1.5712, "step": 12992 }, { "epoch": 1.4925047383837804, "grad_norm": 0.5765950679779053, "learning_rate": 0.0001, "loss": 1.4302, "step": 12993 }, { "epoch": 1.4926196082936074, "grad_norm": 0.6342061758041382, "learning_rate": 0.0001, "loss": 1.4708, "step": 12994 }, { "epoch": 1.4927344782034346, "grad_norm": 0.5808407664299011, "learning_rate": 0.0001, "loss": 1.4504, "step": 12995 }, { "epoch": 1.4928493481132619, "grad_norm": 0.7014811635017395, "learning_rate": 0.0001, "loss": 1.6261, "step": 12996 }, { "epoch": 1.4929642180230889, "grad_norm": 0.6212725639343262, "learning_rate": 0.0001, "loss": 1.6025, "step": 12997 }, { "epoch": 1.4930790879329159, "grad_norm": 0.5805021524429321, "learning_rate": 0.0001, "loss": 1.4603, "step": 12998 }, { "epoch": 1.4931939578427431, "grad_norm": 0.5734280943870544, "learning_rate": 0.0001, "loss": 1.5504, "step": 12999 }, { "epoch": 1.4933088277525703, "grad_norm": 0.5856978297233582, "learning_rate": 0.0001, "loss": 1.3655, "step": 13000 }, { "epoch": 1.4934236976623974, "grad_norm": 0.5353971123695374, "learning_rate": 0.0001, "loss": 1.2464, "step": 13001 }, { "epoch": 1.4935385675722244, "grad_norm": 0.6052659749984741, "learning_rate": 0.0001, "loss": 1.6794, "step": 13002 }, { "epoch": 1.4936534374820516, "grad_norm": 0.5781797170639038, "learning_rate": 0.0001, "loss": 1.3597, "step": 13003 }, { "epoch": 1.4937683073918788, "grad_norm": 0.576927125453949, "learning_rate": 0.0001, "loss": 1.4228, "step": 13004 }, { "epoch": 1.4938831773017058, "grad_norm": 0.6039324998855591, "learning_rate": 0.0001, "loss": 1.4246, "step": 13005 }, { "epoch": 1.4939980472115328, "grad_norm": 0.5810524821281433, "learning_rate": 0.0001, "loss": 1.4884, "step": 13006 }, { "epoch": 1.49411291712136, "grad_norm": 0.5944836735725403, "learning_rate": 0.0001, "loss": 1.4696, "step": 13007 }, { "epoch": 1.4942277870311873, "grad_norm": 0.6197353601455688, "learning_rate": 0.0001, "loss": 1.4723, "step": 13008 }, { "epoch": 1.4943426569410143, "grad_norm": 0.5848913788795471, "learning_rate": 0.0001, "loss": 1.4212, "step": 13009 }, { "epoch": 1.4944575268508413, "grad_norm": 0.6071828603744507, "learning_rate": 0.0001, "loss": 1.5183, "step": 13010 }, { "epoch": 1.4945723967606686, "grad_norm": 0.6387497186660767, "learning_rate": 0.0001, "loss": 1.5511, "step": 13011 }, { "epoch": 1.4946872666704958, "grad_norm": 0.5986767411231995, "learning_rate": 0.0001, "loss": 1.461, "step": 13012 }, { "epoch": 1.4948021365803228, "grad_norm": 0.6346409916877747, "learning_rate": 0.0001, "loss": 1.5136, "step": 13013 }, { "epoch": 1.4949170064901498, "grad_norm": 0.6802158951759338, "learning_rate": 0.0001, "loss": 1.5238, "step": 13014 }, { "epoch": 1.495031876399977, "grad_norm": 0.6537509560585022, "learning_rate": 0.0001, "loss": 1.1075, "step": 13015 }, { "epoch": 1.4951467463098043, "grad_norm": 0.5992099642753601, "learning_rate": 0.0001, "loss": 1.3288, "step": 13016 }, { "epoch": 1.4952616162196313, "grad_norm": 0.6179826855659485, "learning_rate": 0.0001, "loss": 1.5179, "step": 13017 }, { "epoch": 1.4953764861294583, "grad_norm": 0.6495833992958069, "learning_rate": 0.0001, "loss": 1.7106, "step": 13018 }, { "epoch": 1.4954913560392855, "grad_norm": 0.6056380271911621, "learning_rate": 0.0001, "loss": 1.4111, "step": 13019 }, { "epoch": 1.4956062259491127, "grad_norm": 0.7197732925415039, "learning_rate": 0.0001, "loss": 1.8647, "step": 13020 }, { "epoch": 1.4957210958589398, "grad_norm": 0.63360196352005, "learning_rate": 0.0001, "loss": 1.5772, "step": 13021 }, { "epoch": 1.4958359657687668, "grad_norm": 0.6025167107582092, "learning_rate": 0.0001, "loss": 1.425, "step": 13022 }, { "epoch": 1.495950835678594, "grad_norm": 0.596898078918457, "learning_rate": 0.0001, "loss": 1.6411, "step": 13023 }, { "epoch": 1.4960657055884212, "grad_norm": 0.5755943059921265, "learning_rate": 0.0001, "loss": 1.5664, "step": 13024 }, { "epoch": 1.4961805754982482, "grad_norm": 0.5723254084587097, "learning_rate": 0.0001, "loss": 1.5093, "step": 13025 }, { "epoch": 1.4962954454080752, "grad_norm": 0.5714227557182312, "learning_rate": 0.0001, "loss": 1.3679, "step": 13026 }, { "epoch": 1.4964103153179025, "grad_norm": 0.6034862399101257, "learning_rate": 0.0001, "loss": 1.5995, "step": 13027 }, { "epoch": 1.4965251852277297, "grad_norm": 0.6068670749664307, "learning_rate": 0.0001, "loss": 1.6586, "step": 13028 }, { "epoch": 1.4966400551375567, "grad_norm": 0.5679345726966858, "learning_rate": 0.0001, "loss": 1.236, "step": 13029 }, { "epoch": 1.4967549250473837, "grad_norm": 0.56649249792099, "learning_rate": 0.0001, "loss": 1.3815, "step": 13030 }, { "epoch": 1.496869794957211, "grad_norm": 0.617186963558197, "learning_rate": 0.0001, "loss": 1.4181, "step": 13031 }, { "epoch": 1.4969846648670382, "grad_norm": 0.5779293179512024, "learning_rate": 0.0001, "loss": 1.4227, "step": 13032 }, { "epoch": 1.4970995347768652, "grad_norm": 0.6814537644386292, "learning_rate": 0.0001, "loss": 1.4959, "step": 13033 }, { "epoch": 1.4972144046866922, "grad_norm": 0.6161949038505554, "learning_rate": 0.0001, "loss": 1.4994, "step": 13034 }, { "epoch": 1.4973292745965194, "grad_norm": 0.6186149716377258, "learning_rate": 0.0001, "loss": 1.5224, "step": 13035 }, { "epoch": 1.4974441445063467, "grad_norm": 0.6470484137535095, "learning_rate": 0.0001, "loss": 1.6088, "step": 13036 }, { "epoch": 1.4975590144161737, "grad_norm": 0.6378332376480103, "learning_rate": 0.0001, "loss": 1.5992, "step": 13037 }, { "epoch": 1.4976738843260007, "grad_norm": 0.6506776809692383, "learning_rate": 0.0001, "loss": 1.6681, "step": 13038 }, { "epoch": 1.497788754235828, "grad_norm": 0.6596044301986694, "learning_rate": 0.0001, "loss": 1.634, "step": 13039 }, { "epoch": 1.4979036241456551, "grad_norm": 0.5984453558921814, "learning_rate": 0.0001, "loss": 1.3366, "step": 13040 }, { "epoch": 1.4980184940554822, "grad_norm": 0.6259294748306274, "learning_rate": 0.0001, "loss": 1.568, "step": 13041 }, { "epoch": 1.4981333639653092, "grad_norm": 0.6096507906913757, "learning_rate": 0.0001, "loss": 1.4598, "step": 13042 }, { "epoch": 1.4982482338751364, "grad_norm": 0.6044954657554626, "learning_rate": 0.0001, "loss": 1.4842, "step": 13043 }, { "epoch": 1.4983631037849636, "grad_norm": 0.5996847748756409, "learning_rate": 0.0001, "loss": 1.4205, "step": 13044 }, { "epoch": 1.4984779736947906, "grad_norm": 0.5740419030189514, "learning_rate": 0.0001, "loss": 1.3223, "step": 13045 }, { "epoch": 1.4985928436046176, "grad_norm": 0.6097593903541565, "learning_rate": 0.0001, "loss": 1.4634, "step": 13046 }, { "epoch": 1.4987077135144449, "grad_norm": 0.6650487780570984, "learning_rate": 0.0001, "loss": 1.3505, "step": 13047 }, { "epoch": 1.498822583424272, "grad_norm": 0.5681633949279785, "learning_rate": 0.0001, "loss": 1.3723, "step": 13048 }, { "epoch": 1.4989374533340991, "grad_norm": 0.6020470857620239, "learning_rate": 0.0001, "loss": 1.4584, "step": 13049 }, { "epoch": 1.4990523232439261, "grad_norm": 0.6068710088729858, "learning_rate": 0.0001, "loss": 1.2623, "step": 13050 }, { "epoch": 1.4991671931537534, "grad_norm": 0.6180760860443115, "learning_rate": 0.0001, "loss": 1.4976, "step": 13051 }, { "epoch": 1.4992820630635806, "grad_norm": 0.6852160096168518, "learning_rate": 0.0001, "loss": 1.3216, "step": 13052 }, { "epoch": 1.4993969329734076, "grad_norm": 0.6118769645690918, "learning_rate": 0.0001, "loss": 1.4306, "step": 13053 }, { "epoch": 1.4995118028832346, "grad_norm": 0.5923478007316589, "learning_rate": 0.0001, "loss": 1.3799, "step": 13054 }, { "epoch": 1.4996266727930618, "grad_norm": 0.5937912464141846, "learning_rate": 0.0001, "loss": 1.2611, "step": 13055 }, { "epoch": 1.499741542702889, "grad_norm": 0.5981078147888184, "learning_rate": 0.0001, "loss": 1.363, "step": 13056 }, { "epoch": 1.499856412612716, "grad_norm": 0.5924525260925293, "learning_rate": 0.0001, "loss": 1.2294, "step": 13057 }, { "epoch": 1.499971282522543, "grad_norm": 0.6042497754096985, "learning_rate": 0.0001, "loss": 1.3568, "step": 13058 }, { "epoch": 1.5000861524323703, "grad_norm": 0.6669368147850037, "learning_rate": 0.0001, "loss": 1.3832, "step": 13059 }, { "epoch": 1.5002010223421975, "grad_norm": 0.6114850044250488, "learning_rate": 0.0001, "loss": 1.5816, "step": 13060 }, { "epoch": 1.5003158922520246, "grad_norm": 0.6326112151145935, "learning_rate": 0.0001, "loss": 1.6052, "step": 13061 }, { "epoch": 1.5004307621618516, "grad_norm": 0.648796796798706, "learning_rate": 0.0001, "loss": 1.5702, "step": 13062 }, { "epoch": 1.5005456320716788, "grad_norm": 0.618161141872406, "learning_rate": 0.0001, "loss": 1.5032, "step": 13063 }, { "epoch": 1.500660501981506, "grad_norm": 0.5921330451965332, "learning_rate": 0.0001, "loss": 1.3333, "step": 13064 }, { "epoch": 1.500775371891333, "grad_norm": 0.6372246146202087, "learning_rate": 0.0001, "loss": 1.7721, "step": 13065 }, { "epoch": 1.50089024180116, "grad_norm": 0.5890547037124634, "learning_rate": 0.0001, "loss": 1.5761, "step": 13066 }, { "epoch": 1.5010051117109873, "grad_norm": 0.5621899366378784, "learning_rate": 0.0001, "loss": 1.5637, "step": 13067 }, { "epoch": 1.5011199816208145, "grad_norm": 0.6191715598106384, "learning_rate": 0.0001, "loss": 1.5025, "step": 13068 }, { "epoch": 1.5012348515306415, "grad_norm": 0.601288914680481, "learning_rate": 0.0001, "loss": 1.4675, "step": 13069 }, { "epoch": 1.5013497214404685, "grad_norm": 0.5595393180847168, "learning_rate": 0.0001, "loss": 1.4565, "step": 13070 }, { "epoch": 1.5014645913502958, "grad_norm": 0.6976054906845093, "learning_rate": 0.0001, "loss": 1.4749, "step": 13071 }, { "epoch": 1.501579461260123, "grad_norm": 0.5838610529899597, "learning_rate": 0.0001, "loss": 1.5687, "step": 13072 }, { "epoch": 1.50169433116995, "grad_norm": 0.6203344464302063, "learning_rate": 0.0001, "loss": 1.566, "step": 13073 }, { "epoch": 1.501809201079777, "grad_norm": 0.5664482712745667, "learning_rate": 0.0001, "loss": 1.4541, "step": 13074 }, { "epoch": 1.5019240709896042, "grad_norm": 0.5609536170959473, "learning_rate": 0.0001, "loss": 1.5321, "step": 13075 }, { "epoch": 1.5020389408994315, "grad_norm": 0.6260766386985779, "learning_rate": 0.0001, "loss": 1.5236, "step": 13076 }, { "epoch": 1.5021538108092585, "grad_norm": 0.6110790967941284, "learning_rate": 0.0001, "loss": 1.5518, "step": 13077 }, { "epoch": 1.5022686807190855, "grad_norm": 0.590969443321228, "learning_rate": 0.0001, "loss": 1.5212, "step": 13078 }, { "epoch": 1.5023835506289127, "grad_norm": 0.5963428616523743, "learning_rate": 0.0001, "loss": 1.4506, "step": 13079 }, { "epoch": 1.50249842053874, "grad_norm": 0.649579644203186, "learning_rate": 0.0001, "loss": 1.4977, "step": 13080 }, { "epoch": 1.502613290448567, "grad_norm": 0.5847672820091248, "learning_rate": 0.0001, "loss": 1.4921, "step": 13081 }, { "epoch": 1.502728160358394, "grad_norm": 0.6328948736190796, "learning_rate": 0.0001, "loss": 1.188, "step": 13082 }, { "epoch": 1.5028430302682212, "grad_norm": 0.5965337753295898, "learning_rate": 0.0001, "loss": 1.413, "step": 13083 }, { "epoch": 1.5029579001780484, "grad_norm": 0.6650028824806213, "learning_rate": 0.0001, "loss": 1.4197, "step": 13084 }, { "epoch": 1.5030727700878754, "grad_norm": 0.6406525373458862, "learning_rate": 0.0001, "loss": 1.6736, "step": 13085 }, { "epoch": 1.5031876399977024, "grad_norm": 0.5682651400566101, "learning_rate": 0.0001, "loss": 1.5361, "step": 13086 }, { "epoch": 1.5033025099075297, "grad_norm": 0.6791881322860718, "learning_rate": 0.0001, "loss": 1.4621, "step": 13087 }, { "epoch": 1.503417379817357, "grad_norm": 0.6566523909568787, "learning_rate": 0.0001, "loss": 1.469, "step": 13088 }, { "epoch": 1.503532249727184, "grad_norm": 0.6123066544532776, "learning_rate": 0.0001, "loss": 1.3571, "step": 13089 }, { "epoch": 1.503647119637011, "grad_norm": 0.6262611746788025, "learning_rate": 0.0001, "loss": 1.5851, "step": 13090 }, { "epoch": 1.5037619895468382, "grad_norm": 0.5882257223129272, "learning_rate": 0.0001, "loss": 1.4777, "step": 13091 }, { "epoch": 1.5038768594566654, "grad_norm": 0.6783078908920288, "learning_rate": 0.0001, "loss": 1.6693, "step": 13092 }, { "epoch": 1.5039917293664926, "grad_norm": 0.6360931992530823, "learning_rate": 0.0001, "loss": 1.3838, "step": 13093 }, { "epoch": 1.5041065992763196, "grad_norm": 0.5902203917503357, "learning_rate": 0.0001, "loss": 1.3392, "step": 13094 }, { "epoch": 1.5042214691861466, "grad_norm": 0.5917932987213135, "learning_rate": 0.0001, "loss": 1.5356, "step": 13095 }, { "epoch": 1.5043363390959739, "grad_norm": 0.5984362959861755, "learning_rate": 0.0001, "loss": 1.4222, "step": 13096 }, { "epoch": 1.504451209005801, "grad_norm": 0.5655670166015625, "learning_rate": 0.0001, "loss": 1.3595, "step": 13097 }, { "epoch": 1.504566078915628, "grad_norm": 0.5472081899642944, "learning_rate": 0.0001, "loss": 1.3729, "step": 13098 }, { "epoch": 1.5046809488254551, "grad_norm": 0.6080209016799927, "learning_rate": 0.0001, "loss": 1.3025, "step": 13099 }, { "epoch": 1.5047958187352823, "grad_norm": 0.6341947913169861, "learning_rate": 0.0001, "loss": 1.5839, "step": 13100 }, { "epoch": 1.5049106886451096, "grad_norm": 0.6027140617370605, "learning_rate": 0.0001, "loss": 1.4337, "step": 13101 }, { "epoch": 1.5050255585549366, "grad_norm": 0.6108810901641846, "learning_rate": 0.0001, "loss": 1.1485, "step": 13102 }, { "epoch": 1.5051404284647636, "grad_norm": 0.6450017094612122, "learning_rate": 0.0001, "loss": 1.5739, "step": 13103 }, { "epoch": 1.5052552983745908, "grad_norm": 0.612722635269165, "learning_rate": 0.0001, "loss": 1.4161, "step": 13104 }, { "epoch": 1.505370168284418, "grad_norm": 0.6334837675094604, "learning_rate": 0.0001, "loss": 1.3841, "step": 13105 }, { "epoch": 1.505485038194245, "grad_norm": 0.5804214477539062, "learning_rate": 0.0001, "loss": 1.4075, "step": 13106 }, { "epoch": 1.505599908104072, "grad_norm": 0.590505063533783, "learning_rate": 0.0001, "loss": 1.3492, "step": 13107 }, { "epoch": 1.5057147780138993, "grad_norm": 0.5730953216552734, "learning_rate": 0.0001, "loss": 1.6026, "step": 13108 }, { "epoch": 1.5058296479237265, "grad_norm": 0.6718409061431885, "learning_rate": 0.0001, "loss": 1.4637, "step": 13109 }, { "epoch": 1.5059445178335535, "grad_norm": 0.5778510570526123, "learning_rate": 0.0001, "loss": 1.419, "step": 13110 }, { "epoch": 1.5060593877433806, "grad_norm": 0.5760291218757629, "learning_rate": 0.0001, "loss": 1.4774, "step": 13111 }, { "epoch": 1.5061742576532078, "grad_norm": 0.6402103900909424, "learning_rate": 0.0001, "loss": 1.4435, "step": 13112 }, { "epoch": 1.506289127563035, "grad_norm": 0.6180935502052307, "learning_rate": 0.0001, "loss": 1.5586, "step": 13113 }, { "epoch": 1.506403997472862, "grad_norm": 0.5968201756477356, "learning_rate": 0.0001, "loss": 1.2183, "step": 13114 }, { "epoch": 1.506518867382689, "grad_norm": 0.5823055505752563, "learning_rate": 0.0001, "loss": 1.4475, "step": 13115 }, { "epoch": 1.5066337372925163, "grad_norm": 0.6111396551132202, "learning_rate": 0.0001, "loss": 1.4014, "step": 13116 }, { "epoch": 1.5067486072023435, "grad_norm": 0.6117756366729736, "learning_rate": 0.0001, "loss": 1.3061, "step": 13117 }, { "epoch": 1.5068634771121705, "grad_norm": 0.6762123703956604, "learning_rate": 0.0001, "loss": 1.6192, "step": 13118 }, { "epoch": 1.5069783470219975, "grad_norm": 0.6174688339233398, "learning_rate": 0.0001, "loss": 1.5198, "step": 13119 }, { "epoch": 1.5070932169318247, "grad_norm": 0.6349688172340393, "learning_rate": 0.0001, "loss": 1.4694, "step": 13120 }, { "epoch": 1.507208086841652, "grad_norm": 0.6256809830665588, "learning_rate": 0.0001, "loss": 1.3266, "step": 13121 }, { "epoch": 1.507322956751479, "grad_norm": 0.6662157773971558, "learning_rate": 0.0001, "loss": 1.5674, "step": 13122 }, { "epoch": 1.507437826661306, "grad_norm": 0.6114436984062195, "learning_rate": 0.0001, "loss": 1.4598, "step": 13123 }, { "epoch": 1.5075526965711332, "grad_norm": 0.6376405358314514, "learning_rate": 0.0001, "loss": 1.4969, "step": 13124 }, { "epoch": 1.5076675664809605, "grad_norm": 0.6283373832702637, "learning_rate": 0.0001, "loss": 1.4329, "step": 13125 }, { "epoch": 1.5077824363907875, "grad_norm": 0.6334078311920166, "learning_rate": 0.0001, "loss": 1.5302, "step": 13126 }, { "epoch": 1.5078973063006145, "grad_norm": 0.606662929058075, "learning_rate": 0.0001, "loss": 1.3271, "step": 13127 }, { "epoch": 1.5080121762104417, "grad_norm": 0.6204425096511841, "learning_rate": 0.0001, "loss": 1.6041, "step": 13128 }, { "epoch": 1.508127046120269, "grad_norm": 0.6233339905738831, "learning_rate": 0.0001, "loss": 1.5494, "step": 13129 }, { "epoch": 1.508241916030096, "grad_norm": 0.5537604093551636, "learning_rate": 0.0001, "loss": 1.4306, "step": 13130 }, { "epoch": 1.508356785939923, "grad_norm": 0.5744999051094055, "learning_rate": 0.0001, "loss": 1.5209, "step": 13131 }, { "epoch": 1.5084716558497502, "grad_norm": 0.5970391035079956, "learning_rate": 0.0001, "loss": 1.4262, "step": 13132 }, { "epoch": 1.5085865257595774, "grad_norm": 0.5616945028305054, "learning_rate": 0.0001, "loss": 1.2999, "step": 13133 }, { "epoch": 1.5087013956694044, "grad_norm": 0.566780686378479, "learning_rate": 0.0001, "loss": 1.3291, "step": 13134 }, { "epoch": 1.5088162655792314, "grad_norm": 0.6132379174232483, "learning_rate": 0.0001, "loss": 1.3745, "step": 13135 }, { "epoch": 1.5089311354890587, "grad_norm": 0.6889569163322449, "learning_rate": 0.0001, "loss": 1.4654, "step": 13136 }, { "epoch": 1.509046005398886, "grad_norm": 0.5599787831306458, "learning_rate": 0.0001, "loss": 1.3133, "step": 13137 }, { "epoch": 1.509160875308713, "grad_norm": 0.5910610556602478, "learning_rate": 0.0001, "loss": 1.4698, "step": 13138 }, { "epoch": 1.50927574521854, "grad_norm": 0.6421205401420593, "learning_rate": 0.0001, "loss": 1.5509, "step": 13139 }, { "epoch": 1.5093906151283671, "grad_norm": 0.6151227355003357, "learning_rate": 0.0001, "loss": 1.3632, "step": 13140 }, { "epoch": 1.5095054850381944, "grad_norm": 0.569758951663971, "learning_rate": 0.0001, "loss": 1.3274, "step": 13141 }, { "epoch": 1.5096203549480214, "grad_norm": 0.6561845541000366, "learning_rate": 0.0001, "loss": 1.541, "step": 13142 }, { "epoch": 1.5097352248578484, "grad_norm": 0.6225590705871582, "learning_rate": 0.0001, "loss": 1.6304, "step": 13143 }, { "epoch": 1.5098500947676756, "grad_norm": 0.6006361842155457, "learning_rate": 0.0001, "loss": 1.4622, "step": 13144 }, { "epoch": 1.5099649646775029, "grad_norm": 0.7036729454994202, "learning_rate": 0.0001, "loss": 1.635, "step": 13145 }, { "epoch": 1.5100798345873299, "grad_norm": 0.6505088806152344, "learning_rate": 0.0001, "loss": 1.6313, "step": 13146 }, { "epoch": 1.5101947044971569, "grad_norm": 0.6297341585159302, "learning_rate": 0.0001, "loss": 1.5059, "step": 13147 }, { "epoch": 1.510309574406984, "grad_norm": 0.6522087454795837, "learning_rate": 0.0001, "loss": 1.4154, "step": 13148 }, { "epoch": 1.5104244443168113, "grad_norm": 0.6192685961723328, "learning_rate": 0.0001, "loss": 1.5964, "step": 13149 }, { "epoch": 1.5105393142266383, "grad_norm": 0.6250871419906616, "learning_rate": 0.0001, "loss": 1.5964, "step": 13150 }, { "epoch": 1.5106541841364654, "grad_norm": 0.5791242122650146, "learning_rate": 0.0001, "loss": 1.4324, "step": 13151 }, { "epoch": 1.5107690540462926, "grad_norm": 0.6033053994178772, "learning_rate": 0.0001, "loss": 1.441, "step": 13152 }, { "epoch": 1.5108839239561198, "grad_norm": 0.5738871693611145, "learning_rate": 0.0001, "loss": 1.5468, "step": 13153 }, { "epoch": 1.5109987938659468, "grad_norm": 0.6556652188301086, "learning_rate": 0.0001, "loss": 1.6179, "step": 13154 }, { "epoch": 1.5111136637757738, "grad_norm": 0.6059442758560181, "learning_rate": 0.0001, "loss": 1.5285, "step": 13155 }, { "epoch": 1.511228533685601, "grad_norm": 0.6075924038887024, "learning_rate": 0.0001, "loss": 1.3265, "step": 13156 }, { "epoch": 1.5113434035954283, "grad_norm": 0.5933572053909302, "learning_rate": 0.0001, "loss": 1.5268, "step": 13157 }, { "epoch": 1.5114582735052553, "grad_norm": 0.6054621934890747, "learning_rate": 0.0001, "loss": 1.66, "step": 13158 }, { "epoch": 1.5115731434150823, "grad_norm": 0.5910248756408691, "learning_rate": 0.0001, "loss": 1.5443, "step": 13159 }, { "epoch": 1.5116880133249095, "grad_norm": 0.6314256191253662, "learning_rate": 0.0001, "loss": 1.7058, "step": 13160 }, { "epoch": 1.5118028832347368, "grad_norm": 0.5735428929328918, "learning_rate": 0.0001, "loss": 1.3867, "step": 13161 }, { "epoch": 1.5119177531445638, "grad_norm": 0.5875412821769714, "learning_rate": 0.0001, "loss": 1.2807, "step": 13162 }, { "epoch": 1.5120326230543908, "grad_norm": 0.5720763206481934, "learning_rate": 0.0001, "loss": 1.3435, "step": 13163 }, { "epoch": 1.512147492964218, "grad_norm": 0.5954453349113464, "learning_rate": 0.0001, "loss": 1.3863, "step": 13164 }, { "epoch": 1.5122623628740453, "grad_norm": 0.577720582485199, "learning_rate": 0.0001, "loss": 1.2367, "step": 13165 }, { "epoch": 1.5123772327838723, "grad_norm": 0.5905912518501282, "learning_rate": 0.0001, "loss": 1.3191, "step": 13166 }, { "epoch": 1.5124921026936993, "grad_norm": 0.5706838369369507, "learning_rate": 0.0001, "loss": 1.2935, "step": 13167 }, { "epoch": 1.5126069726035265, "grad_norm": 0.6354959011077881, "learning_rate": 0.0001, "loss": 1.4469, "step": 13168 }, { "epoch": 1.5127218425133537, "grad_norm": 0.6335857510566711, "learning_rate": 0.0001, "loss": 1.4592, "step": 13169 }, { "epoch": 1.5128367124231807, "grad_norm": 0.6097238063812256, "learning_rate": 0.0001, "loss": 1.464, "step": 13170 }, { "epoch": 1.5129515823330077, "grad_norm": 0.5822547674179077, "learning_rate": 0.0001, "loss": 1.41, "step": 13171 }, { "epoch": 1.513066452242835, "grad_norm": 0.6244901418685913, "learning_rate": 0.0001, "loss": 1.4444, "step": 13172 }, { "epoch": 1.5131813221526622, "grad_norm": 0.5513818860054016, "learning_rate": 0.0001, "loss": 1.3421, "step": 13173 }, { "epoch": 1.5132961920624892, "grad_norm": 0.582343578338623, "learning_rate": 0.0001, "loss": 1.317, "step": 13174 }, { "epoch": 1.5134110619723162, "grad_norm": 0.6151304841041565, "learning_rate": 0.0001, "loss": 1.4798, "step": 13175 }, { "epoch": 1.5135259318821435, "grad_norm": 0.5942151546478271, "learning_rate": 0.0001, "loss": 1.546, "step": 13176 }, { "epoch": 1.5136408017919707, "grad_norm": 0.589524507522583, "learning_rate": 0.0001, "loss": 1.4663, "step": 13177 }, { "epoch": 1.5137556717017977, "grad_norm": 0.5877421498298645, "learning_rate": 0.0001, "loss": 1.4425, "step": 13178 }, { "epoch": 1.5138705416116247, "grad_norm": 0.5954683423042297, "learning_rate": 0.0001, "loss": 1.5715, "step": 13179 }, { "epoch": 1.513985411521452, "grad_norm": 0.5939478874206543, "learning_rate": 0.0001, "loss": 1.4957, "step": 13180 }, { "epoch": 1.5141002814312792, "grad_norm": 0.6020224690437317, "learning_rate": 0.0001, "loss": 1.605, "step": 13181 }, { "epoch": 1.5142151513411062, "grad_norm": 0.6681519746780396, "learning_rate": 0.0001, "loss": 1.2384, "step": 13182 }, { "epoch": 1.5143300212509332, "grad_norm": 0.622969925403595, "learning_rate": 0.0001, "loss": 1.423, "step": 13183 }, { "epoch": 1.5144448911607604, "grad_norm": 0.5562835931777954, "learning_rate": 0.0001, "loss": 1.3621, "step": 13184 }, { "epoch": 1.5145597610705877, "grad_norm": 0.6770687103271484, "learning_rate": 0.0001, "loss": 1.5962, "step": 13185 }, { "epoch": 1.5146746309804147, "grad_norm": 0.6299651861190796, "learning_rate": 0.0001, "loss": 1.5734, "step": 13186 }, { "epoch": 1.5147895008902417, "grad_norm": 0.566347599029541, "learning_rate": 0.0001, "loss": 1.4579, "step": 13187 }, { "epoch": 1.514904370800069, "grad_norm": 0.5551129579544067, "learning_rate": 0.0001, "loss": 1.4055, "step": 13188 }, { "epoch": 1.5150192407098961, "grad_norm": 0.5839925408363342, "learning_rate": 0.0001, "loss": 1.5315, "step": 13189 }, { "epoch": 1.5151341106197231, "grad_norm": 0.5784244537353516, "learning_rate": 0.0001, "loss": 1.515, "step": 13190 }, { "epoch": 1.5152489805295501, "grad_norm": 0.5645365118980408, "learning_rate": 0.0001, "loss": 1.342, "step": 13191 }, { "epoch": 1.5153638504393774, "grad_norm": 0.6406526565551758, "learning_rate": 0.0001, "loss": 1.3734, "step": 13192 }, { "epoch": 1.5154787203492046, "grad_norm": 0.6612091064453125, "learning_rate": 0.0001, "loss": 1.6727, "step": 13193 }, { "epoch": 1.5155935902590316, "grad_norm": 0.582217812538147, "learning_rate": 0.0001, "loss": 1.4829, "step": 13194 }, { "epoch": 1.5157084601688586, "grad_norm": 0.6395711302757263, "learning_rate": 0.0001, "loss": 1.4942, "step": 13195 }, { "epoch": 1.5158233300786859, "grad_norm": 0.6635745763778687, "learning_rate": 0.0001, "loss": 1.4659, "step": 13196 }, { "epoch": 1.515938199988513, "grad_norm": 0.615152895450592, "learning_rate": 0.0001, "loss": 1.3869, "step": 13197 }, { "epoch": 1.51605306989834, "grad_norm": 0.6119014620780945, "learning_rate": 0.0001, "loss": 1.5678, "step": 13198 }, { "epoch": 1.516167939808167, "grad_norm": 0.6164930462837219, "learning_rate": 0.0001, "loss": 1.5306, "step": 13199 }, { "epoch": 1.5162828097179943, "grad_norm": 0.5925434231758118, "learning_rate": 0.0001, "loss": 1.6357, "step": 13200 }, { "epoch": 1.5163976796278216, "grad_norm": 0.6095196008682251, "learning_rate": 0.0001, "loss": 1.5921, "step": 13201 }, { "epoch": 1.5165125495376486, "grad_norm": 0.6188748478889465, "learning_rate": 0.0001, "loss": 1.6986, "step": 13202 }, { "epoch": 1.5166274194474756, "grad_norm": 0.5641734004020691, "learning_rate": 0.0001, "loss": 1.516, "step": 13203 }, { "epoch": 1.5167422893573028, "grad_norm": 0.7535280585289001, "learning_rate": 0.0001, "loss": 1.5137, "step": 13204 }, { "epoch": 1.51685715926713, "grad_norm": 0.6287049055099487, "learning_rate": 0.0001, "loss": 1.53, "step": 13205 }, { "epoch": 1.516972029176957, "grad_norm": 0.6373167037963867, "learning_rate": 0.0001, "loss": 1.5328, "step": 13206 }, { "epoch": 1.517086899086784, "grad_norm": 0.5664235949516296, "learning_rate": 0.0001, "loss": 1.4884, "step": 13207 }, { "epoch": 1.5172017689966113, "grad_norm": 0.5645571351051331, "learning_rate": 0.0001, "loss": 1.2935, "step": 13208 }, { "epoch": 1.5173166389064385, "grad_norm": 0.6005182862281799, "learning_rate": 0.0001, "loss": 1.4981, "step": 13209 }, { "epoch": 1.5174315088162655, "grad_norm": 0.595731794834137, "learning_rate": 0.0001, "loss": 1.342, "step": 13210 }, { "epoch": 1.5175463787260925, "grad_norm": 0.6699162721633911, "learning_rate": 0.0001, "loss": 1.6479, "step": 13211 }, { "epoch": 1.5176612486359198, "grad_norm": 0.5972051620483398, "learning_rate": 0.0001, "loss": 1.438, "step": 13212 }, { "epoch": 1.517776118545747, "grad_norm": 0.6613103747367859, "learning_rate": 0.0001, "loss": 1.6691, "step": 13213 }, { "epoch": 1.517890988455574, "grad_norm": 0.5806509852409363, "learning_rate": 0.0001, "loss": 1.5286, "step": 13214 }, { "epoch": 1.518005858365401, "grad_norm": 0.6001474857330322, "learning_rate": 0.0001, "loss": 1.5538, "step": 13215 }, { "epoch": 1.5181207282752283, "grad_norm": 0.6385689377784729, "learning_rate": 0.0001, "loss": 1.6138, "step": 13216 }, { "epoch": 1.5182355981850555, "grad_norm": 0.6373685002326965, "learning_rate": 0.0001, "loss": 1.478, "step": 13217 }, { "epoch": 1.5183504680948825, "grad_norm": 0.5941466689109802, "learning_rate": 0.0001, "loss": 1.384, "step": 13218 }, { "epoch": 1.5184653380047095, "grad_norm": 0.5440075993537903, "learning_rate": 0.0001, "loss": 1.388, "step": 13219 }, { "epoch": 1.5185802079145367, "grad_norm": 0.5798068046569824, "learning_rate": 0.0001, "loss": 1.4018, "step": 13220 }, { "epoch": 1.518695077824364, "grad_norm": 0.6054274439811707, "learning_rate": 0.0001, "loss": 1.5795, "step": 13221 }, { "epoch": 1.518809947734191, "grad_norm": 0.623768150806427, "learning_rate": 0.0001, "loss": 1.6136, "step": 13222 }, { "epoch": 1.518924817644018, "grad_norm": 0.5744849443435669, "learning_rate": 0.0001, "loss": 1.3231, "step": 13223 }, { "epoch": 1.5190396875538452, "grad_norm": 0.6288904547691345, "learning_rate": 0.0001, "loss": 1.411, "step": 13224 }, { "epoch": 1.5191545574636725, "grad_norm": 0.58237624168396, "learning_rate": 0.0001, "loss": 1.2721, "step": 13225 }, { "epoch": 1.5192694273734995, "grad_norm": 0.5864928364753723, "learning_rate": 0.0001, "loss": 1.4331, "step": 13226 }, { "epoch": 1.5193842972833265, "grad_norm": 0.5528786778450012, "learning_rate": 0.0001, "loss": 1.4004, "step": 13227 }, { "epoch": 1.5194991671931537, "grad_norm": 0.639892578125, "learning_rate": 0.0001, "loss": 1.3726, "step": 13228 }, { "epoch": 1.519614037102981, "grad_norm": 0.5863984227180481, "learning_rate": 0.0001, "loss": 1.4963, "step": 13229 }, { "epoch": 1.5197289070128082, "grad_norm": 0.6669512391090393, "learning_rate": 0.0001, "loss": 1.5823, "step": 13230 }, { "epoch": 1.5198437769226352, "grad_norm": 0.628280520439148, "learning_rate": 0.0001, "loss": 1.5261, "step": 13231 }, { "epoch": 1.5199586468324622, "grad_norm": 0.6582902669906616, "learning_rate": 0.0001, "loss": 1.4258, "step": 13232 }, { "epoch": 1.5200735167422894, "grad_norm": 0.5654552578926086, "learning_rate": 0.0001, "loss": 1.3741, "step": 13233 }, { "epoch": 1.5201883866521166, "grad_norm": 0.6104324460029602, "learning_rate": 0.0001, "loss": 1.6584, "step": 13234 }, { "epoch": 1.5203032565619437, "grad_norm": 0.6294476389884949, "learning_rate": 0.0001, "loss": 1.5948, "step": 13235 }, { "epoch": 1.5204181264717707, "grad_norm": 0.6104316115379333, "learning_rate": 0.0001, "loss": 1.3727, "step": 13236 }, { "epoch": 1.520532996381598, "grad_norm": 0.6369021534919739, "learning_rate": 0.0001, "loss": 1.593, "step": 13237 }, { "epoch": 1.5206478662914251, "grad_norm": 0.5884815454483032, "learning_rate": 0.0001, "loss": 1.5229, "step": 13238 }, { "epoch": 1.5207627362012521, "grad_norm": 0.6035114526748657, "learning_rate": 0.0001, "loss": 1.5668, "step": 13239 }, { "epoch": 1.5208776061110791, "grad_norm": 0.5970353484153748, "learning_rate": 0.0001, "loss": 1.4528, "step": 13240 }, { "epoch": 1.5209924760209064, "grad_norm": 0.6134264469146729, "learning_rate": 0.0001, "loss": 1.5959, "step": 13241 }, { "epoch": 1.5211073459307336, "grad_norm": 0.6086723208427429, "learning_rate": 0.0001, "loss": 1.4763, "step": 13242 }, { "epoch": 1.5212222158405606, "grad_norm": 0.6190268397331238, "learning_rate": 0.0001, "loss": 1.4347, "step": 13243 }, { "epoch": 1.5213370857503876, "grad_norm": 0.5801183581352234, "learning_rate": 0.0001, "loss": 1.4204, "step": 13244 }, { "epoch": 1.5214519556602148, "grad_norm": 0.5259165167808533, "learning_rate": 0.0001, "loss": 1.2951, "step": 13245 }, { "epoch": 1.521566825570042, "grad_norm": 0.6810782551765442, "learning_rate": 0.0001, "loss": 1.5469, "step": 13246 }, { "epoch": 1.521681695479869, "grad_norm": 0.6331126093864441, "learning_rate": 0.0001, "loss": 1.4176, "step": 13247 }, { "epoch": 1.521796565389696, "grad_norm": 0.561797022819519, "learning_rate": 0.0001, "loss": 1.5015, "step": 13248 }, { "epoch": 1.5219114352995233, "grad_norm": 0.6428273320198059, "learning_rate": 0.0001, "loss": 1.2756, "step": 13249 }, { "epoch": 1.5220263052093506, "grad_norm": 0.5670008659362793, "learning_rate": 0.0001, "loss": 1.4275, "step": 13250 }, { "epoch": 1.5221411751191776, "grad_norm": 0.6418376564979553, "learning_rate": 0.0001, "loss": 1.5854, "step": 13251 }, { "epoch": 1.5222560450290046, "grad_norm": 0.6684396266937256, "learning_rate": 0.0001, "loss": 1.4903, "step": 13252 }, { "epoch": 1.5223709149388318, "grad_norm": 0.5767911672592163, "learning_rate": 0.0001, "loss": 1.2792, "step": 13253 }, { "epoch": 1.522485784848659, "grad_norm": 0.620569109916687, "learning_rate": 0.0001, "loss": 1.4702, "step": 13254 }, { "epoch": 1.522600654758486, "grad_norm": 0.6131866574287415, "learning_rate": 0.0001, "loss": 1.4637, "step": 13255 }, { "epoch": 1.522715524668313, "grad_norm": 0.6050616502761841, "learning_rate": 0.0001, "loss": 1.5023, "step": 13256 }, { "epoch": 1.5228303945781403, "grad_norm": 0.6381790637969971, "learning_rate": 0.0001, "loss": 1.5207, "step": 13257 }, { "epoch": 1.5229452644879675, "grad_norm": 0.6472856998443604, "learning_rate": 0.0001, "loss": 1.3212, "step": 13258 }, { "epoch": 1.5230601343977945, "grad_norm": 0.5918861031532288, "learning_rate": 0.0001, "loss": 1.4754, "step": 13259 }, { "epoch": 1.5231750043076215, "grad_norm": 0.6339123249053955, "learning_rate": 0.0001, "loss": 1.5166, "step": 13260 }, { "epoch": 1.5232898742174488, "grad_norm": 0.5977100133895874, "learning_rate": 0.0001, "loss": 1.3237, "step": 13261 }, { "epoch": 1.523404744127276, "grad_norm": 0.6018535494804382, "learning_rate": 0.0001, "loss": 1.354, "step": 13262 }, { "epoch": 1.523519614037103, "grad_norm": 0.5871580243110657, "learning_rate": 0.0001, "loss": 1.5308, "step": 13263 }, { "epoch": 1.52363448394693, "grad_norm": 0.6061954498291016, "learning_rate": 0.0001, "loss": 1.3217, "step": 13264 }, { "epoch": 1.5237493538567572, "grad_norm": 0.637395977973938, "learning_rate": 0.0001, "loss": 1.5238, "step": 13265 }, { "epoch": 1.5238642237665845, "grad_norm": 0.7003656029701233, "learning_rate": 0.0001, "loss": 1.7488, "step": 13266 }, { "epoch": 1.5239790936764115, "grad_norm": 0.6120108366012573, "learning_rate": 0.0001, "loss": 1.4945, "step": 13267 }, { "epoch": 1.5240939635862385, "grad_norm": 0.5899313688278198, "learning_rate": 0.0001, "loss": 1.5474, "step": 13268 }, { "epoch": 1.5242088334960657, "grad_norm": 0.6798549890518188, "learning_rate": 0.0001, "loss": 1.5794, "step": 13269 }, { "epoch": 1.524323703405893, "grad_norm": 0.6134473085403442, "learning_rate": 0.0001, "loss": 1.4846, "step": 13270 }, { "epoch": 1.52443857331572, "grad_norm": 0.5412236452102661, "learning_rate": 0.0001, "loss": 1.5211, "step": 13271 }, { "epoch": 1.524553443225547, "grad_norm": 0.5635609030723572, "learning_rate": 0.0001, "loss": 1.3889, "step": 13272 }, { "epoch": 1.5246683131353742, "grad_norm": 0.5932890772819519, "learning_rate": 0.0001, "loss": 1.5587, "step": 13273 }, { "epoch": 1.5247831830452014, "grad_norm": 0.5923974514007568, "learning_rate": 0.0001, "loss": 1.6642, "step": 13274 }, { "epoch": 1.5248980529550284, "grad_norm": 0.6257017254829407, "learning_rate": 0.0001, "loss": 1.599, "step": 13275 }, { "epoch": 1.5250129228648555, "grad_norm": 0.5884189605712891, "learning_rate": 0.0001, "loss": 1.5512, "step": 13276 }, { "epoch": 1.5251277927746827, "grad_norm": 0.5920361876487732, "learning_rate": 0.0001, "loss": 1.5368, "step": 13277 }, { "epoch": 1.52524266268451, "grad_norm": 0.6029490232467651, "learning_rate": 0.0001, "loss": 1.5597, "step": 13278 }, { "epoch": 1.525357532594337, "grad_norm": 0.5848401784896851, "learning_rate": 0.0001, "loss": 1.4009, "step": 13279 }, { "epoch": 1.525472402504164, "grad_norm": 0.6184453964233398, "learning_rate": 0.0001, "loss": 1.588, "step": 13280 }, { "epoch": 1.5255872724139912, "grad_norm": 0.7568985223770142, "learning_rate": 0.0001, "loss": 1.642, "step": 13281 }, { "epoch": 1.5257021423238184, "grad_norm": 0.5871482491493225, "learning_rate": 0.0001, "loss": 1.3935, "step": 13282 }, { "epoch": 1.5258170122336454, "grad_norm": 0.6043814420700073, "learning_rate": 0.0001, "loss": 1.2961, "step": 13283 }, { "epoch": 1.5259318821434724, "grad_norm": 0.5470103621482849, "learning_rate": 0.0001, "loss": 1.3294, "step": 13284 }, { "epoch": 1.5260467520532996, "grad_norm": 0.5336215496063232, "learning_rate": 0.0001, "loss": 1.1915, "step": 13285 }, { "epoch": 1.5261616219631269, "grad_norm": 0.6141431331634521, "learning_rate": 0.0001, "loss": 1.469, "step": 13286 }, { "epoch": 1.5262764918729539, "grad_norm": 0.5851207375526428, "learning_rate": 0.0001, "loss": 1.424, "step": 13287 }, { "epoch": 1.526391361782781, "grad_norm": 0.5804892182350159, "learning_rate": 0.0001, "loss": 1.3296, "step": 13288 }, { "epoch": 1.5265062316926081, "grad_norm": 0.6511141061782837, "learning_rate": 0.0001, "loss": 1.4835, "step": 13289 }, { "epoch": 1.5266211016024354, "grad_norm": 0.6962987184524536, "learning_rate": 0.0001, "loss": 1.5535, "step": 13290 }, { "epoch": 1.5267359715122624, "grad_norm": 0.5867136120796204, "learning_rate": 0.0001, "loss": 1.2938, "step": 13291 }, { "epoch": 1.5268508414220894, "grad_norm": 0.6161892414093018, "learning_rate": 0.0001, "loss": 1.5418, "step": 13292 }, { "epoch": 1.5269657113319166, "grad_norm": 0.5810278058052063, "learning_rate": 0.0001, "loss": 1.197, "step": 13293 }, { "epoch": 1.5270805812417438, "grad_norm": 0.6159192323684692, "learning_rate": 0.0001, "loss": 1.3235, "step": 13294 }, { "epoch": 1.5271954511515708, "grad_norm": 0.6228445768356323, "learning_rate": 0.0001, "loss": 1.3081, "step": 13295 }, { "epoch": 1.5273103210613979, "grad_norm": 0.6097662448883057, "learning_rate": 0.0001, "loss": 1.468, "step": 13296 }, { "epoch": 1.527425190971225, "grad_norm": 0.6655218005180359, "learning_rate": 0.0001, "loss": 1.4785, "step": 13297 }, { "epoch": 1.5275400608810523, "grad_norm": 0.5990704298019409, "learning_rate": 0.0001, "loss": 1.6224, "step": 13298 }, { "epoch": 1.5276549307908793, "grad_norm": 0.6513421535491943, "learning_rate": 0.0001, "loss": 1.6009, "step": 13299 }, { "epoch": 1.5277698007007063, "grad_norm": 0.6456665992736816, "learning_rate": 0.0001, "loss": 1.4663, "step": 13300 }, { "epoch": 1.5278846706105336, "grad_norm": 0.6521137952804565, "learning_rate": 0.0001, "loss": 1.5566, "step": 13301 }, { "epoch": 1.5279995405203608, "grad_norm": 0.5967424511909485, "learning_rate": 0.0001, "loss": 1.4281, "step": 13302 }, { "epoch": 1.5281144104301878, "grad_norm": 0.6816303730010986, "learning_rate": 0.0001, "loss": 1.5269, "step": 13303 }, { "epoch": 1.5282292803400148, "grad_norm": 0.5861914753913879, "learning_rate": 0.0001, "loss": 1.4389, "step": 13304 }, { "epoch": 1.528344150249842, "grad_norm": 0.6135704517364502, "learning_rate": 0.0001, "loss": 1.3657, "step": 13305 }, { "epoch": 1.5284590201596693, "grad_norm": 0.6036303043365479, "learning_rate": 0.0001, "loss": 1.4097, "step": 13306 }, { "epoch": 1.5285738900694963, "grad_norm": 0.5533670783042908, "learning_rate": 0.0001, "loss": 1.3476, "step": 13307 }, { "epoch": 1.5286887599793233, "grad_norm": 0.5623646974563599, "learning_rate": 0.0001, "loss": 1.5436, "step": 13308 }, { "epoch": 1.5288036298891505, "grad_norm": 0.5962461829185486, "learning_rate": 0.0001, "loss": 1.5308, "step": 13309 }, { "epoch": 1.5289184997989778, "grad_norm": 0.626592218875885, "learning_rate": 0.0001, "loss": 1.4646, "step": 13310 }, { "epoch": 1.5290333697088048, "grad_norm": 0.6010507941246033, "learning_rate": 0.0001, "loss": 1.5502, "step": 13311 }, { "epoch": 1.5291482396186318, "grad_norm": 0.5912574529647827, "learning_rate": 0.0001, "loss": 1.4557, "step": 13312 }, { "epoch": 1.529263109528459, "grad_norm": 0.6870297789573669, "learning_rate": 0.0001, "loss": 1.7032, "step": 13313 }, { "epoch": 1.5293779794382862, "grad_norm": 0.5987645983695984, "learning_rate": 0.0001, "loss": 1.5357, "step": 13314 }, { "epoch": 1.5294928493481132, "grad_norm": 0.6424558162689209, "learning_rate": 0.0001, "loss": 1.2024, "step": 13315 }, { "epoch": 1.5296077192579403, "grad_norm": 0.6005625128746033, "learning_rate": 0.0001, "loss": 1.5736, "step": 13316 }, { "epoch": 1.5297225891677675, "grad_norm": 0.6098487973213196, "learning_rate": 0.0001, "loss": 1.3273, "step": 13317 }, { "epoch": 1.5298374590775947, "grad_norm": 0.625301718711853, "learning_rate": 0.0001, "loss": 1.3964, "step": 13318 }, { "epoch": 1.5299523289874217, "grad_norm": 0.6038113236427307, "learning_rate": 0.0001, "loss": 1.4294, "step": 13319 }, { "epoch": 1.5300671988972487, "grad_norm": 0.5868397951126099, "learning_rate": 0.0001, "loss": 1.4582, "step": 13320 }, { "epoch": 1.530182068807076, "grad_norm": 0.6464180946350098, "learning_rate": 0.0001, "loss": 1.5435, "step": 13321 }, { "epoch": 1.5302969387169032, "grad_norm": 0.604279637336731, "learning_rate": 0.0001, "loss": 1.5115, "step": 13322 }, { "epoch": 1.5304118086267302, "grad_norm": 0.5744287371635437, "learning_rate": 0.0001, "loss": 1.3912, "step": 13323 }, { "epoch": 1.5305266785365572, "grad_norm": 0.6124972105026245, "learning_rate": 0.0001, "loss": 1.432, "step": 13324 }, { "epoch": 1.5306415484463844, "grad_norm": 0.5574883818626404, "learning_rate": 0.0001, "loss": 1.3208, "step": 13325 }, { "epoch": 1.5307564183562117, "grad_norm": 0.6026033759117126, "learning_rate": 0.0001, "loss": 1.3873, "step": 13326 }, { "epoch": 1.5308712882660387, "grad_norm": 0.6555309295654297, "learning_rate": 0.0001, "loss": 1.4002, "step": 13327 }, { "epoch": 1.5309861581758657, "grad_norm": 0.5827085375785828, "learning_rate": 0.0001, "loss": 1.3754, "step": 13328 }, { "epoch": 1.531101028085693, "grad_norm": 0.6004881262779236, "learning_rate": 0.0001, "loss": 1.2212, "step": 13329 }, { "epoch": 1.5312158979955202, "grad_norm": 0.7817880511283875, "learning_rate": 0.0001, "loss": 1.5378, "step": 13330 }, { "epoch": 1.5313307679053472, "grad_norm": 0.6626024842262268, "learning_rate": 0.0001, "loss": 1.4952, "step": 13331 }, { "epoch": 1.5314456378151742, "grad_norm": 0.6087819337844849, "learning_rate": 0.0001, "loss": 1.4887, "step": 13332 }, { "epoch": 1.5315605077250014, "grad_norm": 0.7612007260322571, "learning_rate": 0.0001, "loss": 1.4343, "step": 13333 }, { "epoch": 1.5316753776348286, "grad_norm": 0.5998708009719849, "learning_rate": 0.0001, "loss": 1.3433, "step": 13334 }, { "epoch": 1.5317902475446556, "grad_norm": 0.5892654061317444, "learning_rate": 0.0001, "loss": 1.5178, "step": 13335 }, { "epoch": 1.5319051174544827, "grad_norm": 0.6381953954696655, "learning_rate": 0.0001, "loss": 1.5053, "step": 13336 }, { "epoch": 1.5320199873643099, "grad_norm": 0.6319192051887512, "learning_rate": 0.0001, "loss": 1.2256, "step": 13337 }, { "epoch": 1.5321348572741371, "grad_norm": 0.6061521172523499, "learning_rate": 0.0001, "loss": 1.3842, "step": 13338 }, { "epoch": 1.5322497271839641, "grad_norm": 0.6164060235023499, "learning_rate": 0.0001, "loss": 1.5006, "step": 13339 }, { "epoch": 1.5323645970937911, "grad_norm": 0.5808977484703064, "learning_rate": 0.0001, "loss": 1.3016, "step": 13340 }, { "epoch": 1.5324794670036184, "grad_norm": 0.6272817254066467, "learning_rate": 0.0001, "loss": 1.6399, "step": 13341 }, { "epoch": 1.5325943369134456, "grad_norm": 0.5705950260162354, "learning_rate": 0.0001, "loss": 1.4289, "step": 13342 }, { "epoch": 1.5327092068232726, "grad_norm": 0.5681813359260559, "learning_rate": 0.0001, "loss": 1.3662, "step": 13343 }, { "epoch": 1.5328240767330996, "grad_norm": 0.591742753982544, "learning_rate": 0.0001, "loss": 1.411, "step": 13344 }, { "epoch": 1.5329389466429268, "grad_norm": 0.6885697841644287, "learning_rate": 0.0001, "loss": 1.4915, "step": 13345 }, { "epoch": 1.533053816552754, "grad_norm": 0.6182805895805359, "learning_rate": 0.0001, "loss": 1.3183, "step": 13346 }, { "epoch": 1.533168686462581, "grad_norm": 0.630598247051239, "learning_rate": 0.0001, "loss": 1.399, "step": 13347 }, { "epoch": 1.533283556372408, "grad_norm": 0.5968937277793884, "learning_rate": 0.0001, "loss": 1.3648, "step": 13348 }, { "epoch": 1.5333984262822353, "grad_norm": 0.6997851133346558, "learning_rate": 0.0001, "loss": 1.5873, "step": 13349 }, { "epoch": 1.5335132961920626, "grad_norm": 0.615630030632019, "learning_rate": 0.0001, "loss": 1.3916, "step": 13350 }, { "epoch": 1.5336281661018896, "grad_norm": 0.6529400944709778, "learning_rate": 0.0001, "loss": 1.4654, "step": 13351 }, { "epoch": 1.5337430360117166, "grad_norm": 0.6269962191581726, "learning_rate": 0.0001, "loss": 1.3357, "step": 13352 }, { "epoch": 1.5338579059215438, "grad_norm": 0.613075852394104, "learning_rate": 0.0001, "loss": 1.5735, "step": 13353 }, { "epoch": 1.533972775831371, "grad_norm": 0.6532135605812073, "learning_rate": 0.0001, "loss": 1.5337, "step": 13354 }, { "epoch": 1.534087645741198, "grad_norm": 0.6102808117866516, "learning_rate": 0.0001, "loss": 1.4645, "step": 13355 }, { "epoch": 1.534202515651025, "grad_norm": 0.6019791960716248, "learning_rate": 0.0001, "loss": 1.3113, "step": 13356 }, { "epoch": 1.5343173855608523, "grad_norm": 0.6130610108375549, "learning_rate": 0.0001, "loss": 1.5375, "step": 13357 }, { "epoch": 1.5344322554706795, "grad_norm": 0.5954639911651611, "learning_rate": 0.0001, "loss": 1.5233, "step": 13358 }, { "epoch": 1.5345471253805065, "grad_norm": 0.6210713982582092, "learning_rate": 0.0001, "loss": 1.5327, "step": 13359 }, { "epoch": 1.5346619952903335, "grad_norm": 0.5767752528190613, "learning_rate": 0.0001, "loss": 1.2935, "step": 13360 }, { "epoch": 1.5347768652001608, "grad_norm": 0.5763528943061829, "learning_rate": 0.0001, "loss": 1.3317, "step": 13361 }, { "epoch": 1.534891735109988, "grad_norm": 0.6128670573234558, "learning_rate": 0.0001, "loss": 1.4817, "step": 13362 }, { "epoch": 1.535006605019815, "grad_norm": 0.6389924883842468, "learning_rate": 0.0001, "loss": 1.4359, "step": 13363 }, { "epoch": 1.535121474929642, "grad_norm": 0.6564880013465881, "learning_rate": 0.0001, "loss": 1.4894, "step": 13364 }, { "epoch": 1.5352363448394692, "grad_norm": 0.6105384826660156, "learning_rate": 0.0001, "loss": 1.3985, "step": 13365 }, { "epoch": 1.5353512147492965, "grad_norm": 0.7015275359153748, "learning_rate": 0.0001, "loss": 1.5958, "step": 13366 }, { "epoch": 1.5354660846591237, "grad_norm": 0.6538365483283997, "learning_rate": 0.0001, "loss": 1.5722, "step": 13367 }, { "epoch": 1.5355809545689507, "grad_norm": 0.5591805577278137, "learning_rate": 0.0001, "loss": 1.306, "step": 13368 }, { "epoch": 1.5356958244787777, "grad_norm": 0.5733972787857056, "learning_rate": 0.0001, "loss": 1.2758, "step": 13369 }, { "epoch": 1.535810694388605, "grad_norm": 0.6370871067047119, "learning_rate": 0.0001, "loss": 1.3703, "step": 13370 }, { "epoch": 1.5359255642984322, "grad_norm": 0.5944746136665344, "learning_rate": 0.0001, "loss": 1.461, "step": 13371 }, { "epoch": 1.5360404342082592, "grad_norm": 0.5778318047523499, "learning_rate": 0.0001, "loss": 1.5607, "step": 13372 }, { "epoch": 1.5361553041180862, "grad_norm": 0.6232753992080688, "learning_rate": 0.0001, "loss": 1.5324, "step": 13373 }, { "epoch": 1.5362701740279134, "grad_norm": 0.6197500228881836, "learning_rate": 0.0001, "loss": 1.437, "step": 13374 }, { "epoch": 1.5363850439377407, "grad_norm": 0.6072701811790466, "learning_rate": 0.0001, "loss": 1.4755, "step": 13375 }, { "epoch": 1.5364999138475677, "grad_norm": 0.602379560470581, "learning_rate": 0.0001, "loss": 1.4888, "step": 13376 }, { "epoch": 1.5366147837573947, "grad_norm": 0.5745888352394104, "learning_rate": 0.0001, "loss": 1.4408, "step": 13377 }, { "epoch": 1.536729653667222, "grad_norm": 0.615543782711029, "learning_rate": 0.0001, "loss": 1.2821, "step": 13378 }, { "epoch": 1.5368445235770491, "grad_norm": 0.5586651563644409, "learning_rate": 0.0001, "loss": 1.29, "step": 13379 }, { "epoch": 1.5369593934868762, "grad_norm": 0.5711967945098877, "learning_rate": 0.0001, "loss": 1.4938, "step": 13380 }, { "epoch": 1.5370742633967032, "grad_norm": 0.5827999711036682, "learning_rate": 0.0001, "loss": 1.2618, "step": 13381 }, { "epoch": 1.5371891333065304, "grad_norm": 0.5403852462768555, "learning_rate": 0.0001, "loss": 1.2958, "step": 13382 }, { "epoch": 1.5373040032163576, "grad_norm": 0.6437862515449524, "learning_rate": 0.0001, "loss": 1.5734, "step": 13383 }, { "epoch": 1.5374188731261846, "grad_norm": 0.5628765225410461, "learning_rate": 0.0001, "loss": 1.4437, "step": 13384 }, { "epoch": 1.5375337430360116, "grad_norm": 0.6011320948600769, "learning_rate": 0.0001, "loss": 1.4629, "step": 13385 }, { "epoch": 1.5376486129458389, "grad_norm": 0.5917825102806091, "learning_rate": 0.0001, "loss": 1.4444, "step": 13386 }, { "epoch": 1.537763482855666, "grad_norm": 0.5793378353118896, "learning_rate": 0.0001, "loss": 1.3797, "step": 13387 }, { "epoch": 1.5378783527654931, "grad_norm": 0.6069167852401733, "learning_rate": 0.0001, "loss": 1.4303, "step": 13388 }, { "epoch": 1.5379932226753201, "grad_norm": 0.6057384014129639, "learning_rate": 0.0001, "loss": 1.5104, "step": 13389 }, { "epoch": 1.5381080925851474, "grad_norm": 0.5952368974685669, "learning_rate": 0.0001, "loss": 1.4508, "step": 13390 }, { "epoch": 1.5382229624949746, "grad_norm": 0.58933025598526, "learning_rate": 0.0001, "loss": 1.4134, "step": 13391 }, { "epoch": 1.5383378324048016, "grad_norm": 0.5574020743370056, "learning_rate": 0.0001, "loss": 1.2945, "step": 13392 }, { "epoch": 1.5384527023146286, "grad_norm": 0.5393736958503723, "learning_rate": 0.0001, "loss": 1.3008, "step": 13393 }, { "epoch": 1.5385675722244558, "grad_norm": 0.5925777554512024, "learning_rate": 0.0001, "loss": 1.4943, "step": 13394 }, { "epoch": 1.538682442134283, "grad_norm": 0.6799826622009277, "learning_rate": 0.0001, "loss": 1.3157, "step": 13395 }, { "epoch": 1.53879731204411, "grad_norm": 0.6065730452537537, "learning_rate": 0.0001, "loss": 1.3707, "step": 13396 }, { "epoch": 1.538912181953937, "grad_norm": 0.6125333905220032, "learning_rate": 0.0001, "loss": 1.5379, "step": 13397 }, { "epoch": 1.5390270518637643, "grad_norm": 0.5929028391838074, "learning_rate": 0.0001, "loss": 1.5625, "step": 13398 }, { "epoch": 1.5391419217735915, "grad_norm": 0.6272832751274109, "learning_rate": 0.0001, "loss": 1.3904, "step": 13399 }, { "epoch": 1.5392567916834186, "grad_norm": 0.5839588642120361, "learning_rate": 0.0001, "loss": 1.452, "step": 13400 }, { "epoch": 1.5393716615932456, "grad_norm": 0.6149510145187378, "learning_rate": 0.0001, "loss": 1.4483, "step": 13401 }, { "epoch": 1.5394865315030728, "grad_norm": 0.5844335556030273, "learning_rate": 0.0001, "loss": 1.4239, "step": 13402 }, { "epoch": 1.5396014014129, "grad_norm": 0.5901253819465637, "learning_rate": 0.0001, "loss": 1.3002, "step": 13403 }, { "epoch": 1.539716271322727, "grad_norm": 0.6704103946685791, "learning_rate": 0.0001, "loss": 1.5101, "step": 13404 }, { "epoch": 1.539831141232554, "grad_norm": 0.5861269235610962, "learning_rate": 0.0001, "loss": 1.5826, "step": 13405 }, { "epoch": 1.5399460111423813, "grad_norm": 0.6078442335128784, "learning_rate": 0.0001, "loss": 1.5645, "step": 13406 }, { "epoch": 1.5400608810522085, "grad_norm": 0.5604826211929321, "learning_rate": 0.0001, "loss": 1.4701, "step": 13407 }, { "epoch": 1.5401757509620355, "grad_norm": 0.5622230172157288, "learning_rate": 0.0001, "loss": 1.5088, "step": 13408 }, { "epoch": 1.5402906208718625, "grad_norm": 0.6920380592346191, "learning_rate": 0.0001, "loss": 1.3965, "step": 13409 }, { "epoch": 1.5404054907816898, "grad_norm": 0.5716781616210938, "learning_rate": 0.0001, "loss": 1.6094, "step": 13410 }, { "epoch": 1.540520360691517, "grad_norm": 0.5778867602348328, "learning_rate": 0.0001, "loss": 1.4645, "step": 13411 }, { "epoch": 1.540635230601344, "grad_norm": 0.564794659614563, "learning_rate": 0.0001, "loss": 1.4669, "step": 13412 }, { "epoch": 1.540750100511171, "grad_norm": 0.5998444557189941, "learning_rate": 0.0001, "loss": 1.3983, "step": 13413 }, { "epoch": 1.5408649704209982, "grad_norm": 0.6303174495697021, "learning_rate": 0.0001, "loss": 1.6117, "step": 13414 }, { "epoch": 1.5409798403308255, "grad_norm": 0.6035495400428772, "learning_rate": 0.0001, "loss": 1.4046, "step": 13415 }, { "epoch": 1.5410947102406525, "grad_norm": 0.6213974952697754, "learning_rate": 0.0001, "loss": 1.6235, "step": 13416 }, { "epoch": 1.5412095801504795, "grad_norm": 0.6108240485191345, "learning_rate": 0.0001, "loss": 1.5057, "step": 13417 }, { "epoch": 1.5413244500603067, "grad_norm": 0.5867082476615906, "learning_rate": 0.0001, "loss": 1.4281, "step": 13418 }, { "epoch": 1.541439319970134, "grad_norm": 0.6038832664489746, "learning_rate": 0.0001, "loss": 1.586, "step": 13419 }, { "epoch": 1.541554189879961, "grad_norm": 0.6144563555717468, "learning_rate": 0.0001, "loss": 1.68, "step": 13420 }, { "epoch": 1.541669059789788, "grad_norm": 0.568112850189209, "learning_rate": 0.0001, "loss": 1.4584, "step": 13421 }, { "epoch": 1.5417839296996152, "grad_norm": 0.6124117374420166, "learning_rate": 0.0001, "loss": 1.6241, "step": 13422 }, { "epoch": 1.5418987996094424, "grad_norm": 0.5999955534934998, "learning_rate": 0.0001, "loss": 1.4416, "step": 13423 }, { "epoch": 1.5420136695192694, "grad_norm": 0.6205251812934875, "learning_rate": 0.0001, "loss": 1.5107, "step": 13424 }, { "epoch": 1.5421285394290964, "grad_norm": 0.5759456753730774, "learning_rate": 0.0001, "loss": 1.542, "step": 13425 }, { "epoch": 1.5422434093389237, "grad_norm": 0.6211003661155701, "learning_rate": 0.0001, "loss": 1.5235, "step": 13426 }, { "epoch": 1.542358279248751, "grad_norm": 0.5795714259147644, "learning_rate": 0.0001, "loss": 1.44, "step": 13427 }, { "epoch": 1.542473149158578, "grad_norm": 0.5453798770904541, "learning_rate": 0.0001, "loss": 1.3127, "step": 13428 }, { "epoch": 1.542588019068405, "grad_norm": 0.6491063833236694, "learning_rate": 0.0001, "loss": 1.4514, "step": 13429 }, { "epoch": 1.5427028889782322, "grad_norm": 0.5597581267356873, "learning_rate": 0.0001, "loss": 1.4427, "step": 13430 }, { "epoch": 1.5428177588880594, "grad_norm": 0.6014596819877625, "learning_rate": 0.0001, "loss": 1.6442, "step": 13431 }, { "epoch": 1.5429326287978864, "grad_norm": 0.5784545540809631, "learning_rate": 0.0001, "loss": 1.4709, "step": 13432 }, { "epoch": 1.5430474987077134, "grad_norm": 0.5702968835830688, "learning_rate": 0.0001, "loss": 1.4526, "step": 13433 }, { "epoch": 1.5431623686175406, "grad_norm": 0.5658290386199951, "learning_rate": 0.0001, "loss": 1.5356, "step": 13434 }, { "epoch": 1.5432772385273679, "grad_norm": 0.598718523979187, "learning_rate": 0.0001, "loss": 1.3576, "step": 13435 }, { "epoch": 1.5433921084371949, "grad_norm": 0.6943349242210388, "learning_rate": 0.0001, "loss": 1.7476, "step": 13436 }, { "epoch": 1.5435069783470219, "grad_norm": 0.5755090117454529, "learning_rate": 0.0001, "loss": 1.3345, "step": 13437 }, { "epoch": 1.5436218482568491, "grad_norm": 0.6275411248207092, "learning_rate": 0.0001, "loss": 1.497, "step": 13438 }, { "epoch": 1.5437367181666763, "grad_norm": 0.5974034667015076, "learning_rate": 0.0001, "loss": 1.5015, "step": 13439 }, { "epoch": 1.5438515880765034, "grad_norm": 0.5933687090873718, "learning_rate": 0.0001, "loss": 1.564, "step": 13440 }, { "epoch": 1.5439664579863304, "grad_norm": 0.7212704420089722, "learning_rate": 0.0001, "loss": 1.562, "step": 13441 }, { "epoch": 1.5440813278961576, "grad_norm": 0.5720301866531372, "learning_rate": 0.0001, "loss": 1.3696, "step": 13442 }, { "epoch": 1.5441961978059848, "grad_norm": 0.6417856812477112, "learning_rate": 0.0001, "loss": 1.4741, "step": 13443 }, { "epoch": 1.5443110677158118, "grad_norm": 0.6052496433258057, "learning_rate": 0.0001, "loss": 1.2494, "step": 13444 }, { "epoch": 1.5444259376256388, "grad_norm": 0.6466429829597473, "learning_rate": 0.0001, "loss": 1.483, "step": 13445 }, { "epoch": 1.544540807535466, "grad_norm": 0.6234352588653564, "learning_rate": 0.0001, "loss": 1.5925, "step": 13446 }, { "epoch": 1.5446556774452933, "grad_norm": 0.6527000069618225, "learning_rate": 0.0001, "loss": 1.5712, "step": 13447 }, { "epoch": 1.5447705473551203, "grad_norm": 0.5902075171470642, "learning_rate": 0.0001, "loss": 1.5089, "step": 13448 }, { "epoch": 1.5448854172649473, "grad_norm": 0.5839083790779114, "learning_rate": 0.0001, "loss": 1.3611, "step": 13449 }, { "epoch": 1.5450002871747746, "grad_norm": 0.6253820061683655, "learning_rate": 0.0001, "loss": 1.6101, "step": 13450 }, { "epoch": 1.5451151570846018, "grad_norm": 0.5373639464378357, "learning_rate": 0.0001, "loss": 1.3338, "step": 13451 }, { "epoch": 1.5452300269944288, "grad_norm": 0.5663428902626038, "learning_rate": 0.0001, "loss": 1.3177, "step": 13452 }, { "epoch": 1.5453448969042558, "grad_norm": 0.6304009556770325, "learning_rate": 0.0001, "loss": 1.5829, "step": 13453 }, { "epoch": 1.545459766814083, "grad_norm": 0.6668345332145691, "learning_rate": 0.0001, "loss": 1.4433, "step": 13454 }, { "epoch": 1.5455746367239103, "grad_norm": 0.5478126406669617, "learning_rate": 0.0001, "loss": 1.4109, "step": 13455 }, { "epoch": 1.5456895066337373, "grad_norm": 0.6082738637924194, "learning_rate": 0.0001, "loss": 1.4676, "step": 13456 }, { "epoch": 1.5458043765435643, "grad_norm": 0.6476391553878784, "learning_rate": 0.0001, "loss": 1.554, "step": 13457 }, { "epoch": 1.5459192464533915, "grad_norm": 0.6350123286247253, "learning_rate": 0.0001, "loss": 1.4257, "step": 13458 }, { "epoch": 1.5460341163632187, "grad_norm": 0.621774435043335, "learning_rate": 0.0001, "loss": 1.5731, "step": 13459 }, { "epoch": 1.5461489862730458, "grad_norm": 0.6082205176353455, "learning_rate": 0.0001, "loss": 1.5612, "step": 13460 }, { "epoch": 1.5462638561828728, "grad_norm": 0.5338382124900818, "learning_rate": 0.0001, "loss": 1.2507, "step": 13461 }, { "epoch": 1.5463787260927, "grad_norm": 0.6611436605453491, "learning_rate": 0.0001, "loss": 1.5725, "step": 13462 }, { "epoch": 1.5464935960025272, "grad_norm": 0.6387352347373962, "learning_rate": 0.0001, "loss": 1.4834, "step": 13463 }, { "epoch": 1.5466084659123542, "grad_norm": 0.6265326142311096, "learning_rate": 0.0001, "loss": 1.5716, "step": 13464 }, { "epoch": 1.5467233358221812, "grad_norm": 0.5728742480278015, "learning_rate": 0.0001, "loss": 1.3898, "step": 13465 }, { "epoch": 1.5468382057320085, "grad_norm": 0.5909122228622437, "learning_rate": 0.0001, "loss": 1.5714, "step": 13466 }, { "epoch": 1.5469530756418357, "grad_norm": 0.5926406979560852, "learning_rate": 0.0001, "loss": 1.3875, "step": 13467 }, { "epoch": 1.5470679455516627, "grad_norm": 0.6227415204048157, "learning_rate": 0.0001, "loss": 1.3836, "step": 13468 }, { "epoch": 1.5471828154614897, "grad_norm": 0.6383469104766846, "learning_rate": 0.0001, "loss": 1.4241, "step": 13469 }, { "epoch": 1.547297685371317, "grad_norm": 0.606417715549469, "learning_rate": 0.0001, "loss": 1.4469, "step": 13470 }, { "epoch": 1.5474125552811442, "grad_norm": 0.5884244441986084, "learning_rate": 0.0001, "loss": 1.2275, "step": 13471 }, { "epoch": 1.5475274251909712, "grad_norm": 0.6884910464286804, "learning_rate": 0.0001, "loss": 1.6115, "step": 13472 }, { "epoch": 1.5476422951007982, "grad_norm": 0.6169816851615906, "learning_rate": 0.0001, "loss": 1.5857, "step": 13473 }, { "epoch": 1.5477571650106254, "grad_norm": 0.5996578335762024, "learning_rate": 0.0001, "loss": 1.3095, "step": 13474 }, { "epoch": 1.5478720349204527, "grad_norm": 0.6751422882080078, "learning_rate": 0.0001, "loss": 1.6056, "step": 13475 }, { "epoch": 1.5479869048302797, "grad_norm": 0.6498405337333679, "learning_rate": 0.0001, "loss": 1.4262, "step": 13476 }, { "epoch": 1.5481017747401067, "grad_norm": 0.6042332053184509, "learning_rate": 0.0001, "loss": 1.5048, "step": 13477 }, { "epoch": 1.548216644649934, "grad_norm": 0.6130797863006592, "learning_rate": 0.0001, "loss": 1.5824, "step": 13478 }, { "epoch": 1.5483315145597611, "grad_norm": 0.6066043972969055, "learning_rate": 0.0001, "loss": 1.4461, "step": 13479 }, { "epoch": 1.5484463844695882, "grad_norm": 0.5683161616325378, "learning_rate": 0.0001, "loss": 1.3388, "step": 13480 }, { "epoch": 1.5485612543794152, "grad_norm": 0.5794828534126282, "learning_rate": 0.0001, "loss": 1.5624, "step": 13481 }, { "epoch": 1.5486761242892424, "grad_norm": 0.5909842848777771, "learning_rate": 0.0001, "loss": 1.3772, "step": 13482 }, { "epoch": 1.5487909941990696, "grad_norm": 0.6028822660446167, "learning_rate": 0.0001, "loss": 1.461, "step": 13483 }, { "epoch": 1.5489058641088966, "grad_norm": 0.6567312479019165, "learning_rate": 0.0001, "loss": 1.4616, "step": 13484 }, { "epoch": 1.5490207340187236, "grad_norm": 0.6116561889648438, "learning_rate": 0.0001, "loss": 1.5806, "step": 13485 }, { "epoch": 1.5491356039285509, "grad_norm": 0.638192892074585, "learning_rate": 0.0001, "loss": 1.4588, "step": 13486 }, { "epoch": 1.549250473838378, "grad_norm": 0.6210533380508423, "learning_rate": 0.0001, "loss": 1.4859, "step": 13487 }, { "epoch": 1.5493653437482051, "grad_norm": 0.6891507506370544, "learning_rate": 0.0001, "loss": 1.332, "step": 13488 }, { "epoch": 1.5494802136580321, "grad_norm": 0.5958152413368225, "learning_rate": 0.0001, "loss": 1.3574, "step": 13489 }, { "epoch": 1.5495950835678594, "grad_norm": 0.5857873558998108, "learning_rate": 0.0001, "loss": 1.3016, "step": 13490 }, { "epoch": 1.5497099534776866, "grad_norm": 0.5769304037094116, "learning_rate": 0.0001, "loss": 1.3742, "step": 13491 }, { "epoch": 1.5498248233875136, "grad_norm": 0.5681525468826294, "learning_rate": 0.0001, "loss": 1.2591, "step": 13492 }, { "epoch": 1.5499396932973406, "grad_norm": 0.6011735796928406, "learning_rate": 0.0001, "loss": 1.3664, "step": 13493 }, { "epoch": 1.5500545632071678, "grad_norm": 0.5927474498748779, "learning_rate": 0.0001, "loss": 1.4265, "step": 13494 }, { "epoch": 1.550169433116995, "grad_norm": 0.6287341713905334, "learning_rate": 0.0001, "loss": 1.4407, "step": 13495 }, { "epoch": 1.550284303026822, "grad_norm": 0.6226458549499512, "learning_rate": 0.0001, "loss": 1.3742, "step": 13496 }, { "epoch": 1.550399172936649, "grad_norm": 0.6283382773399353, "learning_rate": 0.0001, "loss": 1.576, "step": 13497 }, { "epoch": 1.5505140428464763, "grad_norm": 0.6250648498535156, "learning_rate": 0.0001, "loss": 1.4687, "step": 13498 }, { "epoch": 1.5506289127563035, "grad_norm": 0.6573746204376221, "learning_rate": 0.0001, "loss": 1.4691, "step": 13499 }, { "epoch": 1.5507437826661306, "grad_norm": 0.6034247279167175, "learning_rate": 0.0001, "loss": 1.4162, "step": 13500 }, { "epoch": 1.5508586525759576, "grad_norm": 0.5965235829353333, "learning_rate": 0.0001, "loss": 1.3892, "step": 13501 }, { "epoch": 1.5509735224857848, "grad_norm": 0.6325914263725281, "learning_rate": 0.0001, "loss": 1.3612, "step": 13502 }, { "epoch": 1.551088392395612, "grad_norm": 0.6564382910728455, "learning_rate": 0.0001, "loss": 1.5547, "step": 13503 }, { "epoch": 1.5512032623054393, "grad_norm": 0.6648733019828796, "learning_rate": 0.0001, "loss": 1.5704, "step": 13504 }, { "epoch": 1.5513181322152663, "grad_norm": 0.6087038516998291, "learning_rate": 0.0001, "loss": 1.2969, "step": 13505 }, { "epoch": 1.5514330021250933, "grad_norm": 0.5483207106590271, "learning_rate": 0.0001, "loss": 1.2729, "step": 13506 }, { "epoch": 1.5515478720349205, "grad_norm": 0.5833482146263123, "learning_rate": 0.0001, "loss": 1.2962, "step": 13507 }, { "epoch": 1.5516627419447477, "grad_norm": 0.6498992443084717, "learning_rate": 0.0001, "loss": 1.4952, "step": 13508 }, { "epoch": 1.5517776118545747, "grad_norm": 0.5728626847267151, "learning_rate": 0.0001, "loss": 1.3803, "step": 13509 }, { "epoch": 1.5518924817644018, "grad_norm": 0.5859959125518799, "learning_rate": 0.0001, "loss": 1.4502, "step": 13510 }, { "epoch": 1.552007351674229, "grad_norm": 0.6737546324729919, "learning_rate": 0.0001, "loss": 1.6645, "step": 13511 }, { "epoch": 1.5521222215840562, "grad_norm": 0.6166685819625854, "learning_rate": 0.0001, "loss": 1.4754, "step": 13512 }, { "epoch": 1.5522370914938832, "grad_norm": 0.7040491104125977, "learning_rate": 0.0001, "loss": 1.6972, "step": 13513 }, { "epoch": 1.5523519614037102, "grad_norm": 0.6027595400810242, "learning_rate": 0.0001, "loss": 1.4652, "step": 13514 }, { "epoch": 1.5524668313135375, "grad_norm": 0.5595097541809082, "learning_rate": 0.0001, "loss": 1.3197, "step": 13515 }, { "epoch": 1.5525817012233647, "grad_norm": 0.5787383317947388, "learning_rate": 0.0001, "loss": 1.3246, "step": 13516 }, { "epoch": 1.5526965711331917, "grad_norm": 0.64249187707901, "learning_rate": 0.0001, "loss": 1.6401, "step": 13517 }, { "epoch": 1.5528114410430187, "grad_norm": 0.5845315456390381, "learning_rate": 0.0001, "loss": 1.2307, "step": 13518 }, { "epoch": 1.552926310952846, "grad_norm": 0.6052901744842529, "learning_rate": 0.0001, "loss": 1.5411, "step": 13519 }, { "epoch": 1.5530411808626732, "grad_norm": 0.5542384386062622, "learning_rate": 0.0001, "loss": 1.3201, "step": 13520 }, { "epoch": 1.5531560507725002, "grad_norm": 0.5924617648124695, "learning_rate": 0.0001, "loss": 1.4083, "step": 13521 }, { "epoch": 1.5532709206823272, "grad_norm": 0.5823348760604858, "learning_rate": 0.0001, "loss": 1.433, "step": 13522 }, { "epoch": 1.5533857905921544, "grad_norm": 0.5733996033668518, "learning_rate": 0.0001, "loss": 1.3618, "step": 13523 }, { "epoch": 1.5535006605019817, "grad_norm": 0.5738714933395386, "learning_rate": 0.0001, "loss": 1.3387, "step": 13524 }, { "epoch": 1.5536155304118087, "grad_norm": 0.5945634245872498, "learning_rate": 0.0001, "loss": 1.5014, "step": 13525 }, { "epoch": 1.5537304003216357, "grad_norm": 0.6478724479675293, "learning_rate": 0.0001, "loss": 1.5344, "step": 13526 }, { "epoch": 1.553845270231463, "grad_norm": 0.6466807126998901, "learning_rate": 0.0001, "loss": 1.3919, "step": 13527 }, { "epoch": 1.5539601401412901, "grad_norm": 0.5651605725288391, "learning_rate": 0.0001, "loss": 1.3325, "step": 13528 }, { "epoch": 1.5540750100511171, "grad_norm": 0.6229522228240967, "learning_rate": 0.0001, "loss": 1.4839, "step": 13529 }, { "epoch": 1.5541898799609442, "grad_norm": 0.6317934393882751, "learning_rate": 0.0001, "loss": 1.524, "step": 13530 }, { "epoch": 1.5543047498707714, "grad_norm": 0.5871884822845459, "learning_rate": 0.0001, "loss": 1.4019, "step": 13531 }, { "epoch": 1.5544196197805986, "grad_norm": 0.5875590443611145, "learning_rate": 0.0001, "loss": 1.3716, "step": 13532 }, { "epoch": 1.5545344896904256, "grad_norm": 0.5747091174125671, "learning_rate": 0.0001, "loss": 1.2858, "step": 13533 }, { "epoch": 1.5546493596002526, "grad_norm": 0.5531101226806641, "learning_rate": 0.0001, "loss": 1.394, "step": 13534 }, { "epoch": 1.5547642295100799, "grad_norm": 0.7090408205986023, "learning_rate": 0.0001, "loss": 1.4974, "step": 13535 }, { "epoch": 1.554879099419907, "grad_norm": 0.6552852392196655, "learning_rate": 0.0001, "loss": 1.6069, "step": 13536 }, { "epoch": 1.554993969329734, "grad_norm": 0.6876298189163208, "learning_rate": 0.0001, "loss": 1.6896, "step": 13537 }, { "epoch": 1.5551088392395611, "grad_norm": 0.5838922262191772, "learning_rate": 0.0001, "loss": 1.4006, "step": 13538 }, { "epoch": 1.5552237091493883, "grad_norm": 0.6967250108718872, "learning_rate": 0.0001, "loss": 1.3594, "step": 13539 }, { "epoch": 1.5553385790592156, "grad_norm": 0.6267087459564209, "learning_rate": 0.0001, "loss": 1.1573, "step": 13540 }, { "epoch": 1.5554534489690426, "grad_norm": 0.5778437852859497, "learning_rate": 0.0001, "loss": 1.4125, "step": 13541 }, { "epoch": 1.5555683188788696, "grad_norm": 0.5467836260795593, "learning_rate": 0.0001, "loss": 1.1714, "step": 13542 }, { "epoch": 1.5556831887886968, "grad_norm": 0.5664315819740295, "learning_rate": 0.0001, "loss": 1.3083, "step": 13543 }, { "epoch": 1.555798058698524, "grad_norm": 0.6014490127563477, "learning_rate": 0.0001, "loss": 1.5584, "step": 13544 }, { "epoch": 1.555912928608351, "grad_norm": 0.6335508823394775, "learning_rate": 0.0001, "loss": 1.236, "step": 13545 }, { "epoch": 1.556027798518178, "grad_norm": 0.6208090782165527, "learning_rate": 0.0001, "loss": 1.3677, "step": 13546 }, { "epoch": 1.5561426684280053, "grad_norm": 0.61933833360672, "learning_rate": 0.0001, "loss": 1.5519, "step": 13547 }, { "epoch": 1.5562575383378325, "grad_norm": 0.6805666089057922, "learning_rate": 0.0001, "loss": 1.5495, "step": 13548 }, { "epoch": 1.5563724082476595, "grad_norm": 0.7156261205673218, "learning_rate": 0.0001, "loss": 1.6308, "step": 13549 }, { "epoch": 1.5564872781574866, "grad_norm": 0.6893269419670105, "learning_rate": 0.0001, "loss": 1.4658, "step": 13550 }, { "epoch": 1.5566021480673138, "grad_norm": 0.6271970868110657, "learning_rate": 0.0001, "loss": 1.5036, "step": 13551 }, { "epoch": 1.556717017977141, "grad_norm": 0.6317713856697083, "learning_rate": 0.0001, "loss": 1.5933, "step": 13552 }, { "epoch": 1.556831887886968, "grad_norm": 0.6471002697944641, "learning_rate": 0.0001, "loss": 1.4808, "step": 13553 }, { "epoch": 1.556946757796795, "grad_norm": 0.6045787930488586, "learning_rate": 0.0001, "loss": 1.411, "step": 13554 }, { "epoch": 1.5570616277066223, "grad_norm": 0.5843409299850464, "learning_rate": 0.0001, "loss": 1.3993, "step": 13555 }, { "epoch": 1.5571764976164495, "grad_norm": 0.5926359295845032, "learning_rate": 0.0001, "loss": 1.2548, "step": 13556 }, { "epoch": 1.5572913675262765, "grad_norm": 0.5961912274360657, "learning_rate": 0.0001, "loss": 1.677, "step": 13557 }, { "epoch": 1.5574062374361035, "grad_norm": 0.7315113544464111, "learning_rate": 0.0001, "loss": 1.5993, "step": 13558 }, { "epoch": 1.5575211073459307, "grad_norm": 0.576897919178009, "learning_rate": 0.0001, "loss": 1.4251, "step": 13559 }, { "epoch": 1.557635977255758, "grad_norm": 0.5911585092544556, "learning_rate": 0.0001, "loss": 1.2031, "step": 13560 }, { "epoch": 1.557750847165585, "grad_norm": 0.6434839963912964, "learning_rate": 0.0001, "loss": 1.4475, "step": 13561 }, { "epoch": 1.557865717075412, "grad_norm": 0.5712766647338867, "learning_rate": 0.0001, "loss": 1.1872, "step": 13562 }, { "epoch": 1.5579805869852392, "grad_norm": 0.6965934634208679, "learning_rate": 0.0001, "loss": 1.3617, "step": 13563 }, { "epoch": 1.5580954568950665, "grad_norm": 0.5756476521492004, "learning_rate": 0.0001, "loss": 1.424, "step": 13564 }, { "epoch": 1.5582103268048935, "grad_norm": 0.6307820081710815, "learning_rate": 0.0001, "loss": 1.5449, "step": 13565 }, { "epoch": 1.5583251967147205, "grad_norm": 0.5934085845947266, "learning_rate": 0.0001, "loss": 1.5171, "step": 13566 }, { "epoch": 1.5584400666245477, "grad_norm": 0.6400713324546814, "learning_rate": 0.0001, "loss": 1.6674, "step": 13567 }, { "epoch": 1.558554936534375, "grad_norm": 0.6499615907669067, "learning_rate": 0.0001, "loss": 1.5194, "step": 13568 }, { "epoch": 1.558669806444202, "grad_norm": 0.6657858490943909, "learning_rate": 0.0001, "loss": 1.5722, "step": 13569 }, { "epoch": 1.558784676354029, "grad_norm": 0.5988503098487854, "learning_rate": 0.0001, "loss": 1.4663, "step": 13570 }, { "epoch": 1.5588995462638562, "grad_norm": 0.6103773713111877, "learning_rate": 0.0001, "loss": 1.5287, "step": 13571 }, { "epoch": 1.5590144161736834, "grad_norm": 0.5706753134727478, "learning_rate": 0.0001, "loss": 1.4824, "step": 13572 }, { "epoch": 1.5591292860835104, "grad_norm": 0.5638065338134766, "learning_rate": 0.0001, "loss": 1.3968, "step": 13573 }, { "epoch": 1.5592441559933374, "grad_norm": 0.5787845849990845, "learning_rate": 0.0001, "loss": 1.455, "step": 13574 }, { "epoch": 1.5593590259031647, "grad_norm": 0.5700173377990723, "learning_rate": 0.0001, "loss": 1.3468, "step": 13575 }, { "epoch": 1.559473895812992, "grad_norm": 0.5860016345977783, "learning_rate": 0.0001, "loss": 1.4658, "step": 13576 }, { "epoch": 1.559588765722819, "grad_norm": 0.604753851890564, "learning_rate": 0.0001, "loss": 1.4624, "step": 13577 }, { "epoch": 1.559703635632646, "grad_norm": 0.600627601146698, "learning_rate": 0.0001, "loss": 1.5398, "step": 13578 }, { "epoch": 1.5598185055424731, "grad_norm": 0.6065698266029358, "learning_rate": 0.0001, "loss": 1.5139, "step": 13579 }, { "epoch": 1.5599333754523004, "grad_norm": 0.5824136137962341, "learning_rate": 0.0001, "loss": 1.4065, "step": 13580 }, { "epoch": 1.5600482453621274, "grad_norm": 0.6180105209350586, "learning_rate": 0.0001, "loss": 1.4328, "step": 13581 }, { "epoch": 1.5601631152719544, "grad_norm": 0.6440869569778442, "learning_rate": 0.0001, "loss": 1.592, "step": 13582 }, { "epoch": 1.5602779851817816, "grad_norm": 0.6405513286590576, "learning_rate": 0.0001, "loss": 1.3444, "step": 13583 }, { "epoch": 1.5603928550916089, "grad_norm": 0.6168532371520996, "learning_rate": 0.0001, "loss": 1.2609, "step": 13584 }, { "epoch": 1.5605077250014359, "grad_norm": 0.5789191722869873, "learning_rate": 0.0001, "loss": 1.2043, "step": 13585 }, { "epoch": 1.5606225949112629, "grad_norm": 0.6007379293441772, "learning_rate": 0.0001, "loss": 1.5273, "step": 13586 }, { "epoch": 1.56073746482109, "grad_norm": 0.5817810297012329, "learning_rate": 0.0001, "loss": 1.3975, "step": 13587 }, { "epoch": 1.5608523347309173, "grad_norm": 0.5913118720054626, "learning_rate": 0.0001, "loss": 1.3675, "step": 13588 }, { "epoch": 1.5609672046407443, "grad_norm": 0.5883614420890808, "learning_rate": 0.0001, "loss": 1.5754, "step": 13589 }, { "epoch": 1.5610820745505714, "grad_norm": 0.6002376079559326, "learning_rate": 0.0001, "loss": 1.5627, "step": 13590 }, { "epoch": 1.5611969444603986, "grad_norm": 0.6250750422477722, "learning_rate": 0.0001, "loss": 1.5516, "step": 13591 }, { "epoch": 1.5613118143702258, "grad_norm": 0.611410915851593, "learning_rate": 0.0001, "loss": 1.5293, "step": 13592 }, { "epoch": 1.5614266842800528, "grad_norm": 0.5881221890449524, "learning_rate": 0.0001, "loss": 1.5605, "step": 13593 }, { "epoch": 1.5615415541898798, "grad_norm": 0.5879666805267334, "learning_rate": 0.0001, "loss": 1.4588, "step": 13594 }, { "epoch": 1.561656424099707, "grad_norm": 0.5731499791145325, "learning_rate": 0.0001, "loss": 1.6253, "step": 13595 }, { "epoch": 1.5617712940095343, "grad_norm": 0.6417600512504578, "learning_rate": 0.0001, "loss": 1.5114, "step": 13596 }, { "epoch": 1.5618861639193613, "grad_norm": 0.5718529224395752, "learning_rate": 0.0001, "loss": 1.3185, "step": 13597 }, { "epoch": 1.5620010338291883, "grad_norm": 0.6178721189498901, "learning_rate": 0.0001, "loss": 1.311, "step": 13598 }, { "epoch": 1.5621159037390155, "grad_norm": 0.5907917618751526, "learning_rate": 0.0001, "loss": 1.4209, "step": 13599 }, { "epoch": 1.5622307736488428, "grad_norm": 0.6152604818344116, "learning_rate": 0.0001, "loss": 1.4549, "step": 13600 }, { "epoch": 1.5623456435586698, "grad_norm": 0.6977373361587524, "learning_rate": 0.0001, "loss": 1.7487, "step": 13601 }, { "epoch": 1.5624605134684968, "grad_norm": 0.6169999241828918, "learning_rate": 0.0001, "loss": 1.5021, "step": 13602 }, { "epoch": 1.562575383378324, "grad_norm": 0.5735710859298706, "learning_rate": 0.0001, "loss": 1.3729, "step": 13603 }, { "epoch": 1.5626902532881513, "grad_norm": 0.5788447856903076, "learning_rate": 0.0001, "loss": 1.2963, "step": 13604 }, { "epoch": 1.5628051231979783, "grad_norm": 0.6432527303695679, "learning_rate": 0.0001, "loss": 1.4393, "step": 13605 }, { "epoch": 1.5629199931078053, "grad_norm": 0.6032426357269287, "learning_rate": 0.0001, "loss": 1.5303, "step": 13606 }, { "epoch": 1.5630348630176325, "grad_norm": 0.6332036852836609, "learning_rate": 0.0001, "loss": 1.5395, "step": 13607 }, { "epoch": 1.5631497329274597, "grad_norm": 0.5610132217407227, "learning_rate": 0.0001, "loss": 1.4589, "step": 13608 }, { "epoch": 1.5632646028372867, "grad_norm": 0.5653334259986877, "learning_rate": 0.0001, "loss": 1.2573, "step": 13609 }, { "epoch": 1.5633794727471138, "grad_norm": 0.5841345191001892, "learning_rate": 0.0001, "loss": 1.4447, "step": 13610 }, { "epoch": 1.563494342656941, "grad_norm": 0.6390504240989685, "learning_rate": 0.0001, "loss": 1.5338, "step": 13611 }, { "epoch": 1.5636092125667682, "grad_norm": 0.6101760268211365, "learning_rate": 0.0001, "loss": 1.5201, "step": 13612 }, { "epoch": 1.5637240824765952, "grad_norm": 0.6613549590110779, "learning_rate": 0.0001, "loss": 1.4788, "step": 13613 }, { "epoch": 1.5638389523864222, "grad_norm": 0.6057902574539185, "learning_rate": 0.0001, "loss": 1.3687, "step": 13614 }, { "epoch": 1.5639538222962495, "grad_norm": 0.6379287838935852, "learning_rate": 0.0001, "loss": 1.6718, "step": 13615 }, { "epoch": 1.5640686922060767, "grad_norm": 0.597686231136322, "learning_rate": 0.0001, "loss": 1.3398, "step": 13616 }, { "epoch": 1.5641835621159037, "grad_norm": 0.5676443576812744, "learning_rate": 0.0001, "loss": 1.5195, "step": 13617 }, { "epoch": 1.5642984320257307, "grad_norm": 0.6300573348999023, "learning_rate": 0.0001, "loss": 1.6266, "step": 13618 }, { "epoch": 1.564413301935558, "grad_norm": 0.5722482204437256, "learning_rate": 0.0001, "loss": 1.4044, "step": 13619 }, { "epoch": 1.5645281718453852, "grad_norm": 0.6020122170448303, "learning_rate": 0.0001, "loss": 1.4094, "step": 13620 }, { "epoch": 1.5646430417552122, "grad_norm": 0.6561688780784607, "learning_rate": 0.0001, "loss": 1.5187, "step": 13621 }, { "epoch": 1.5647579116650392, "grad_norm": 0.589190661907196, "learning_rate": 0.0001, "loss": 1.4078, "step": 13622 }, { "epoch": 1.5648727815748664, "grad_norm": 0.593706488609314, "learning_rate": 0.0001, "loss": 1.5398, "step": 13623 }, { "epoch": 1.5649876514846937, "grad_norm": 0.6446245312690735, "learning_rate": 0.0001, "loss": 1.5302, "step": 13624 }, { "epoch": 1.5651025213945207, "grad_norm": 0.6118418574333191, "learning_rate": 0.0001, "loss": 1.699, "step": 13625 }, { "epoch": 1.5652173913043477, "grad_norm": 0.6244831681251526, "learning_rate": 0.0001, "loss": 1.5944, "step": 13626 }, { "epoch": 1.565332261214175, "grad_norm": 0.6229195594787598, "learning_rate": 0.0001, "loss": 1.4809, "step": 13627 }, { "epoch": 1.5654471311240021, "grad_norm": 0.5555204153060913, "learning_rate": 0.0001, "loss": 1.2535, "step": 13628 }, { "epoch": 1.5655620010338291, "grad_norm": 0.6049181222915649, "learning_rate": 0.0001, "loss": 1.4407, "step": 13629 }, { "epoch": 1.5656768709436562, "grad_norm": 0.5908517837524414, "learning_rate": 0.0001, "loss": 1.3301, "step": 13630 }, { "epoch": 1.5657917408534834, "grad_norm": 0.6150537133216858, "learning_rate": 0.0001, "loss": 1.5415, "step": 13631 }, { "epoch": 1.5659066107633106, "grad_norm": 0.5594236850738525, "learning_rate": 0.0001, "loss": 1.464, "step": 13632 }, { "epoch": 1.5660214806731376, "grad_norm": 0.6012204885482788, "learning_rate": 0.0001, "loss": 1.4467, "step": 13633 }, { "epoch": 1.5661363505829646, "grad_norm": 0.6006706357002258, "learning_rate": 0.0001, "loss": 1.4424, "step": 13634 }, { "epoch": 1.5662512204927919, "grad_norm": 0.5952029824256897, "learning_rate": 0.0001, "loss": 1.3238, "step": 13635 }, { "epoch": 1.566366090402619, "grad_norm": 0.5738430619239807, "learning_rate": 0.0001, "loss": 1.5614, "step": 13636 }, { "epoch": 1.566480960312446, "grad_norm": 0.6680176258087158, "learning_rate": 0.0001, "loss": 1.453, "step": 13637 }, { "epoch": 1.5665958302222731, "grad_norm": 0.6369667649269104, "learning_rate": 0.0001, "loss": 1.476, "step": 13638 }, { "epoch": 1.5667107001321003, "grad_norm": 0.6867667436599731, "learning_rate": 0.0001, "loss": 1.491, "step": 13639 }, { "epoch": 1.5668255700419276, "grad_norm": 0.5914698243141174, "learning_rate": 0.0001, "loss": 1.6164, "step": 13640 }, { "epoch": 1.5669404399517548, "grad_norm": 0.5792491436004639, "learning_rate": 0.0001, "loss": 1.4495, "step": 13641 }, { "epoch": 1.5670553098615818, "grad_norm": 0.6214803457260132, "learning_rate": 0.0001, "loss": 1.4099, "step": 13642 }, { "epoch": 1.5671701797714088, "grad_norm": 0.5663346648216248, "learning_rate": 0.0001, "loss": 1.3742, "step": 13643 }, { "epoch": 1.567285049681236, "grad_norm": 0.5920063257217407, "learning_rate": 0.0001, "loss": 1.4131, "step": 13644 }, { "epoch": 1.5673999195910633, "grad_norm": 0.6462366580963135, "learning_rate": 0.0001, "loss": 1.422, "step": 13645 }, { "epoch": 1.5675147895008903, "grad_norm": 0.6337886452674866, "learning_rate": 0.0001, "loss": 1.475, "step": 13646 }, { "epoch": 1.5676296594107173, "grad_norm": 0.6497291326522827, "learning_rate": 0.0001, "loss": 1.3826, "step": 13647 }, { "epoch": 1.5677445293205445, "grad_norm": 0.6257938146591187, "learning_rate": 0.0001, "loss": 1.4504, "step": 13648 }, { "epoch": 1.5678593992303718, "grad_norm": 0.6551761031150818, "learning_rate": 0.0001, "loss": 1.5617, "step": 13649 }, { "epoch": 1.5679742691401988, "grad_norm": 0.6262896656990051, "learning_rate": 0.0001, "loss": 1.6237, "step": 13650 }, { "epoch": 1.5680891390500258, "grad_norm": 0.5628766417503357, "learning_rate": 0.0001, "loss": 1.3154, "step": 13651 }, { "epoch": 1.568204008959853, "grad_norm": 0.6413166522979736, "learning_rate": 0.0001, "loss": 1.5788, "step": 13652 }, { "epoch": 1.5683188788696802, "grad_norm": 0.568213164806366, "learning_rate": 0.0001, "loss": 1.3689, "step": 13653 }, { "epoch": 1.5684337487795073, "grad_norm": 0.6713234186172485, "learning_rate": 0.0001, "loss": 1.4422, "step": 13654 }, { "epoch": 1.5685486186893343, "grad_norm": 0.5839575529098511, "learning_rate": 0.0001, "loss": 1.3933, "step": 13655 }, { "epoch": 1.5686634885991615, "grad_norm": 0.596601128578186, "learning_rate": 0.0001, "loss": 1.61, "step": 13656 }, { "epoch": 1.5687783585089887, "grad_norm": 0.5839077830314636, "learning_rate": 0.0001, "loss": 1.3594, "step": 13657 }, { "epoch": 1.5688932284188157, "grad_norm": 0.6276641488075256, "learning_rate": 0.0001, "loss": 1.4735, "step": 13658 }, { "epoch": 1.5690080983286427, "grad_norm": 0.6786941289901733, "learning_rate": 0.0001, "loss": 1.4608, "step": 13659 }, { "epoch": 1.56912296823847, "grad_norm": 0.7449260354042053, "learning_rate": 0.0001, "loss": 1.6882, "step": 13660 }, { "epoch": 1.5692378381482972, "grad_norm": 0.6506140828132629, "learning_rate": 0.0001, "loss": 1.6329, "step": 13661 }, { "epoch": 1.5693527080581242, "grad_norm": 0.5764363408088684, "learning_rate": 0.0001, "loss": 1.396, "step": 13662 }, { "epoch": 1.5694675779679512, "grad_norm": 0.6293970346450806, "learning_rate": 0.0001, "loss": 1.3189, "step": 13663 }, { "epoch": 1.5695824478777785, "grad_norm": 0.5982877016067505, "learning_rate": 0.0001, "loss": 1.3611, "step": 13664 }, { "epoch": 1.5696973177876057, "grad_norm": 0.7153692841529846, "learning_rate": 0.0001, "loss": 1.5204, "step": 13665 }, { "epoch": 1.5698121876974327, "grad_norm": 0.556322455406189, "learning_rate": 0.0001, "loss": 1.1547, "step": 13666 }, { "epoch": 1.5699270576072597, "grad_norm": 0.5707471370697021, "learning_rate": 0.0001, "loss": 1.4342, "step": 13667 }, { "epoch": 1.570041927517087, "grad_norm": 0.6068229079246521, "learning_rate": 0.0001, "loss": 1.5907, "step": 13668 }, { "epoch": 1.5701567974269142, "grad_norm": 0.5805314183235168, "learning_rate": 0.0001, "loss": 1.3441, "step": 13669 }, { "epoch": 1.5702716673367412, "grad_norm": 0.5970333814620972, "learning_rate": 0.0001, "loss": 1.4048, "step": 13670 }, { "epoch": 1.5703865372465682, "grad_norm": 0.6311222314834595, "learning_rate": 0.0001, "loss": 1.5804, "step": 13671 }, { "epoch": 1.5705014071563954, "grad_norm": 0.6435375213623047, "learning_rate": 0.0001, "loss": 1.514, "step": 13672 }, { "epoch": 1.5706162770662226, "grad_norm": 0.62371826171875, "learning_rate": 0.0001, "loss": 1.4818, "step": 13673 }, { "epoch": 1.5707311469760497, "grad_norm": 0.6377353072166443, "learning_rate": 0.0001, "loss": 1.5304, "step": 13674 }, { "epoch": 1.5708460168858767, "grad_norm": 0.630799412727356, "learning_rate": 0.0001, "loss": 1.4142, "step": 13675 }, { "epoch": 1.570960886795704, "grad_norm": 0.6089138388633728, "learning_rate": 0.0001, "loss": 1.344, "step": 13676 }, { "epoch": 1.5710757567055311, "grad_norm": 0.6124714612960815, "learning_rate": 0.0001, "loss": 1.3856, "step": 13677 }, { "epoch": 1.5711906266153581, "grad_norm": 0.592974066734314, "learning_rate": 0.0001, "loss": 1.3503, "step": 13678 }, { "epoch": 1.5713054965251851, "grad_norm": 0.662214994430542, "learning_rate": 0.0001, "loss": 1.4621, "step": 13679 }, { "epoch": 1.5714203664350124, "grad_norm": 0.5967538952827454, "learning_rate": 0.0001, "loss": 1.4994, "step": 13680 }, { "epoch": 1.5715352363448396, "grad_norm": 0.6241264939308167, "learning_rate": 0.0001, "loss": 1.4568, "step": 13681 }, { "epoch": 1.5716501062546666, "grad_norm": 0.6036410927772522, "learning_rate": 0.0001, "loss": 1.5817, "step": 13682 }, { "epoch": 1.5717649761644936, "grad_norm": 0.5998589992523193, "learning_rate": 0.0001, "loss": 1.2894, "step": 13683 }, { "epoch": 1.5718798460743209, "grad_norm": 0.6088207960128784, "learning_rate": 0.0001, "loss": 1.5821, "step": 13684 }, { "epoch": 1.571994715984148, "grad_norm": 0.6031522750854492, "learning_rate": 0.0001, "loss": 1.4404, "step": 13685 }, { "epoch": 1.572109585893975, "grad_norm": 0.5819301605224609, "learning_rate": 0.0001, "loss": 1.6401, "step": 13686 }, { "epoch": 1.572224455803802, "grad_norm": 0.5897693634033203, "learning_rate": 0.0001, "loss": 1.306, "step": 13687 }, { "epoch": 1.5723393257136293, "grad_norm": 0.6375783681869507, "learning_rate": 0.0001, "loss": 1.5457, "step": 13688 }, { "epoch": 1.5724541956234566, "grad_norm": 0.6380894184112549, "learning_rate": 0.0001, "loss": 1.5302, "step": 13689 }, { "epoch": 1.5725690655332836, "grad_norm": 0.6246376633644104, "learning_rate": 0.0001, "loss": 1.3728, "step": 13690 }, { "epoch": 1.5726839354431106, "grad_norm": 0.6492366790771484, "learning_rate": 0.0001, "loss": 1.3662, "step": 13691 }, { "epoch": 1.5727988053529378, "grad_norm": 0.6497386693954468, "learning_rate": 0.0001, "loss": 1.4163, "step": 13692 }, { "epoch": 1.572913675262765, "grad_norm": 0.5843161940574646, "learning_rate": 0.0001, "loss": 1.3522, "step": 13693 }, { "epoch": 1.573028545172592, "grad_norm": 0.6169954538345337, "learning_rate": 0.0001, "loss": 1.4708, "step": 13694 }, { "epoch": 1.573143415082419, "grad_norm": 0.5712270140647888, "learning_rate": 0.0001, "loss": 1.2669, "step": 13695 }, { "epoch": 1.5732582849922463, "grad_norm": 0.6111646294593811, "learning_rate": 0.0001, "loss": 1.5445, "step": 13696 }, { "epoch": 1.5733731549020735, "grad_norm": 0.5616782307624817, "learning_rate": 0.0001, "loss": 1.3518, "step": 13697 }, { "epoch": 1.5734880248119005, "grad_norm": 0.6015630960464478, "learning_rate": 0.0001, "loss": 1.2921, "step": 13698 }, { "epoch": 1.5736028947217275, "grad_norm": 0.5990334749221802, "learning_rate": 0.0001, "loss": 1.5865, "step": 13699 }, { "epoch": 1.5737177646315548, "grad_norm": 0.5758739709854126, "learning_rate": 0.0001, "loss": 1.58, "step": 13700 }, { "epoch": 1.573832634541382, "grad_norm": 0.5954878330230713, "learning_rate": 0.0001, "loss": 1.3584, "step": 13701 }, { "epoch": 1.573947504451209, "grad_norm": 0.6391401290893555, "learning_rate": 0.0001, "loss": 1.4215, "step": 13702 }, { "epoch": 1.574062374361036, "grad_norm": 0.7178584337234497, "learning_rate": 0.0001, "loss": 1.3619, "step": 13703 }, { "epoch": 1.5741772442708633, "grad_norm": 0.6165657639503479, "learning_rate": 0.0001, "loss": 1.2028, "step": 13704 }, { "epoch": 1.5742921141806905, "grad_norm": 0.5853223204612732, "learning_rate": 0.0001, "loss": 1.4566, "step": 13705 }, { "epoch": 1.5744069840905175, "grad_norm": 0.6249833703041077, "learning_rate": 0.0001, "loss": 1.1105, "step": 13706 }, { "epoch": 1.5745218540003445, "grad_norm": 0.5916066765785217, "learning_rate": 0.0001, "loss": 1.4113, "step": 13707 }, { "epoch": 1.5746367239101717, "grad_norm": 0.5843729972839355, "learning_rate": 0.0001, "loss": 1.3678, "step": 13708 }, { "epoch": 1.574751593819999, "grad_norm": 0.5756968259811401, "learning_rate": 0.0001, "loss": 1.3407, "step": 13709 }, { "epoch": 1.574866463729826, "grad_norm": 0.6385331749916077, "learning_rate": 0.0001, "loss": 1.4298, "step": 13710 }, { "epoch": 1.574981333639653, "grad_norm": 0.6959099769592285, "learning_rate": 0.0001, "loss": 1.6224, "step": 13711 }, { "epoch": 1.5750962035494802, "grad_norm": 0.5969239473342896, "learning_rate": 0.0001, "loss": 1.4625, "step": 13712 }, { "epoch": 1.5752110734593074, "grad_norm": 0.5907700061798096, "learning_rate": 0.0001, "loss": 1.4731, "step": 13713 }, { "epoch": 1.5753259433691345, "grad_norm": 0.5716517567634583, "learning_rate": 0.0001, "loss": 1.0672, "step": 13714 }, { "epoch": 1.5754408132789615, "grad_norm": 0.6416873931884766, "learning_rate": 0.0001, "loss": 1.5179, "step": 13715 }, { "epoch": 1.5755556831887887, "grad_norm": 0.6345185041427612, "learning_rate": 0.0001, "loss": 1.5173, "step": 13716 }, { "epoch": 1.575670553098616, "grad_norm": 0.63191157579422, "learning_rate": 0.0001, "loss": 1.6803, "step": 13717 }, { "epoch": 1.575785423008443, "grad_norm": 0.5994105339050293, "learning_rate": 0.0001, "loss": 1.4558, "step": 13718 }, { "epoch": 1.57590029291827, "grad_norm": 0.657799243927002, "learning_rate": 0.0001, "loss": 1.4011, "step": 13719 }, { "epoch": 1.5760151628280972, "grad_norm": 0.6098163723945618, "learning_rate": 0.0001, "loss": 1.4295, "step": 13720 }, { "epoch": 1.5761300327379244, "grad_norm": 0.5927136540412903, "learning_rate": 0.0001, "loss": 1.4307, "step": 13721 }, { "epoch": 1.5762449026477514, "grad_norm": 0.5886332392692566, "learning_rate": 0.0001, "loss": 1.574, "step": 13722 }, { "epoch": 1.5763597725575784, "grad_norm": 0.589408814907074, "learning_rate": 0.0001, "loss": 1.3345, "step": 13723 }, { "epoch": 1.5764746424674057, "grad_norm": 0.6005495190620422, "learning_rate": 0.0001, "loss": 1.5995, "step": 13724 }, { "epoch": 1.5765895123772329, "grad_norm": 0.5907677412033081, "learning_rate": 0.0001, "loss": 1.591, "step": 13725 }, { "epoch": 1.57670438228706, "grad_norm": 0.6294686794281006, "learning_rate": 0.0001, "loss": 1.4602, "step": 13726 }, { "epoch": 1.576819252196887, "grad_norm": 0.5746269822120667, "learning_rate": 0.0001, "loss": 1.4667, "step": 13727 }, { "epoch": 1.5769341221067141, "grad_norm": 0.6354400515556335, "learning_rate": 0.0001, "loss": 1.3841, "step": 13728 }, { "epoch": 1.5770489920165414, "grad_norm": 0.5498471856117249, "learning_rate": 0.0001, "loss": 1.4757, "step": 13729 }, { "epoch": 1.5771638619263684, "grad_norm": 0.6611149907112122, "learning_rate": 0.0001, "loss": 1.4133, "step": 13730 }, { "epoch": 1.5772787318361954, "grad_norm": 0.5527372360229492, "learning_rate": 0.0001, "loss": 1.4975, "step": 13731 }, { "epoch": 1.5773936017460226, "grad_norm": 0.6152689456939697, "learning_rate": 0.0001, "loss": 1.5015, "step": 13732 }, { "epoch": 1.5775084716558498, "grad_norm": 0.6419612169265747, "learning_rate": 0.0001, "loss": 1.6189, "step": 13733 }, { "epoch": 1.5776233415656769, "grad_norm": 0.6237484216690063, "learning_rate": 0.0001, "loss": 1.3205, "step": 13734 }, { "epoch": 1.5777382114755039, "grad_norm": 0.6146785616874695, "learning_rate": 0.0001, "loss": 1.3854, "step": 13735 }, { "epoch": 1.577853081385331, "grad_norm": 0.5459988117218018, "learning_rate": 0.0001, "loss": 1.2364, "step": 13736 }, { "epoch": 1.5779679512951583, "grad_norm": 0.5892395377159119, "learning_rate": 0.0001, "loss": 1.4168, "step": 13737 }, { "epoch": 1.5780828212049853, "grad_norm": 0.6266677975654602, "learning_rate": 0.0001, "loss": 1.4853, "step": 13738 }, { "epoch": 1.5781976911148123, "grad_norm": 0.6316624283790588, "learning_rate": 0.0001, "loss": 1.3561, "step": 13739 }, { "epoch": 1.5783125610246396, "grad_norm": 0.5511997938156128, "learning_rate": 0.0001, "loss": 1.4032, "step": 13740 }, { "epoch": 1.5784274309344668, "grad_norm": 0.6501776576042175, "learning_rate": 0.0001, "loss": 1.4935, "step": 13741 }, { "epoch": 1.5785423008442938, "grad_norm": 0.6260613203048706, "learning_rate": 0.0001, "loss": 1.4332, "step": 13742 }, { "epoch": 1.5786571707541208, "grad_norm": 0.6105442047119141, "learning_rate": 0.0001, "loss": 1.566, "step": 13743 }, { "epoch": 1.578772040663948, "grad_norm": 0.5777299404144287, "learning_rate": 0.0001, "loss": 1.4765, "step": 13744 }, { "epoch": 1.5788869105737753, "grad_norm": 0.6278029680252075, "learning_rate": 0.0001, "loss": 1.4338, "step": 13745 }, { "epoch": 1.5790017804836023, "grad_norm": 0.6350043416023254, "learning_rate": 0.0001, "loss": 1.2099, "step": 13746 }, { "epoch": 1.5791166503934293, "grad_norm": 0.7006800770759583, "learning_rate": 0.0001, "loss": 1.6148, "step": 13747 }, { "epoch": 1.5792315203032565, "grad_norm": 0.7125732898712158, "learning_rate": 0.0001, "loss": 1.5515, "step": 13748 }, { "epoch": 1.5793463902130838, "grad_norm": 0.595615565776825, "learning_rate": 0.0001, "loss": 1.5118, "step": 13749 }, { "epoch": 1.5794612601229108, "grad_norm": 0.5776601433753967, "learning_rate": 0.0001, "loss": 1.2368, "step": 13750 }, { "epoch": 1.5795761300327378, "grad_norm": 0.6074332594871521, "learning_rate": 0.0001, "loss": 1.5576, "step": 13751 }, { "epoch": 1.579690999942565, "grad_norm": 0.5884713530540466, "learning_rate": 0.0001, "loss": 1.2479, "step": 13752 }, { "epoch": 1.5798058698523922, "grad_norm": 0.5512914657592773, "learning_rate": 0.0001, "loss": 1.3712, "step": 13753 }, { "epoch": 1.5799207397622193, "grad_norm": 0.5844442844390869, "learning_rate": 0.0001, "loss": 1.5999, "step": 13754 }, { "epoch": 1.5800356096720463, "grad_norm": 0.6031823754310608, "learning_rate": 0.0001, "loss": 1.295, "step": 13755 }, { "epoch": 1.5801504795818735, "grad_norm": 0.6166372895240784, "learning_rate": 0.0001, "loss": 1.3007, "step": 13756 }, { "epoch": 1.5802653494917007, "grad_norm": 0.5954715013504028, "learning_rate": 0.0001, "loss": 1.4218, "step": 13757 }, { "epoch": 1.5803802194015277, "grad_norm": 0.6521285176277161, "learning_rate": 0.0001, "loss": 1.4628, "step": 13758 }, { "epoch": 1.5804950893113547, "grad_norm": 0.6663768887519836, "learning_rate": 0.0001, "loss": 1.6063, "step": 13759 }, { "epoch": 1.580609959221182, "grad_norm": 0.577915370464325, "learning_rate": 0.0001, "loss": 1.4309, "step": 13760 }, { "epoch": 1.5807248291310092, "grad_norm": 0.5663382411003113, "learning_rate": 0.0001, "loss": 1.4019, "step": 13761 }, { "epoch": 1.5808396990408362, "grad_norm": 0.5602874159812927, "learning_rate": 0.0001, "loss": 1.4436, "step": 13762 }, { "epoch": 1.5809545689506632, "grad_norm": 0.6626793146133423, "learning_rate": 0.0001, "loss": 1.4416, "step": 13763 }, { "epoch": 1.5810694388604905, "grad_norm": 0.6153534650802612, "learning_rate": 0.0001, "loss": 1.3675, "step": 13764 }, { "epoch": 1.5811843087703177, "grad_norm": 0.5600067377090454, "learning_rate": 0.0001, "loss": 1.2818, "step": 13765 }, { "epoch": 1.5812991786801447, "grad_norm": 0.6040723919868469, "learning_rate": 0.0001, "loss": 1.4141, "step": 13766 }, { "epoch": 1.5814140485899717, "grad_norm": 0.5831596255302429, "learning_rate": 0.0001, "loss": 1.412, "step": 13767 }, { "epoch": 1.581528918499799, "grad_norm": 0.5813616514205933, "learning_rate": 0.0001, "loss": 1.3028, "step": 13768 }, { "epoch": 1.5816437884096262, "grad_norm": 0.5949644446372986, "learning_rate": 0.0001, "loss": 1.4923, "step": 13769 }, { "epoch": 1.5817586583194532, "grad_norm": 0.6183162927627563, "learning_rate": 0.0001, "loss": 1.4916, "step": 13770 }, { "epoch": 1.5818735282292802, "grad_norm": 0.5976161360740662, "learning_rate": 0.0001, "loss": 1.5491, "step": 13771 }, { "epoch": 1.5819883981391074, "grad_norm": 0.6509799957275391, "learning_rate": 0.0001, "loss": 1.5773, "step": 13772 }, { "epoch": 1.5821032680489346, "grad_norm": 0.6055750250816345, "learning_rate": 0.0001, "loss": 1.4911, "step": 13773 }, { "epoch": 1.5822181379587617, "grad_norm": 0.6353389024734497, "learning_rate": 0.0001, "loss": 1.6027, "step": 13774 }, { "epoch": 1.5823330078685887, "grad_norm": 0.645206093788147, "learning_rate": 0.0001, "loss": 1.5026, "step": 13775 }, { "epoch": 1.582447877778416, "grad_norm": 0.6120302081108093, "learning_rate": 0.0001, "loss": 1.5445, "step": 13776 }, { "epoch": 1.5825627476882431, "grad_norm": 0.5803287625312805, "learning_rate": 0.0001, "loss": 1.2655, "step": 13777 }, { "epoch": 1.5826776175980704, "grad_norm": 0.5672709345817566, "learning_rate": 0.0001, "loss": 1.5853, "step": 13778 }, { "epoch": 1.5827924875078974, "grad_norm": 0.593664288520813, "learning_rate": 0.0001, "loss": 1.4554, "step": 13779 }, { "epoch": 1.5829073574177244, "grad_norm": 0.6163424849510193, "learning_rate": 0.0001, "loss": 1.4778, "step": 13780 }, { "epoch": 1.5830222273275516, "grad_norm": 0.6263569593429565, "learning_rate": 0.0001, "loss": 1.6402, "step": 13781 }, { "epoch": 1.5831370972373788, "grad_norm": 0.6205965876579285, "learning_rate": 0.0001, "loss": 1.6084, "step": 13782 }, { "epoch": 1.5832519671472058, "grad_norm": 0.6791396141052246, "learning_rate": 0.0001, "loss": 1.7392, "step": 13783 }, { "epoch": 1.5833668370570328, "grad_norm": 0.6714447736740112, "learning_rate": 0.0001, "loss": 1.4619, "step": 13784 }, { "epoch": 1.58348170696686, "grad_norm": 0.5691717267036438, "learning_rate": 0.0001, "loss": 1.4059, "step": 13785 }, { "epoch": 1.5835965768766873, "grad_norm": 0.636568009853363, "learning_rate": 0.0001, "loss": 1.4756, "step": 13786 }, { "epoch": 1.5837114467865143, "grad_norm": 0.6351962685585022, "learning_rate": 0.0001, "loss": 1.4316, "step": 13787 }, { "epoch": 1.5838263166963413, "grad_norm": 0.6552459001541138, "learning_rate": 0.0001, "loss": 1.7072, "step": 13788 }, { "epoch": 1.5839411866061686, "grad_norm": 0.6077340841293335, "learning_rate": 0.0001, "loss": 1.585, "step": 13789 }, { "epoch": 1.5840560565159958, "grad_norm": 0.5727121829986572, "learning_rate": 0.0001, "loss": 1.4895, "step": 13790 }, { "epoch": 1.5841709264258228, "grad_norm": 0.6179769039154053, "learning_rate": 0.0001, "loss": 1.3921, "step": 13791 }, { "epoch": 1.5842857963356498, "grad_norm": 0.5873903036117554, "learning_rate": 0.0001, "loss": 1.5292, "step": 13792 }, { "epoch": 1.584400666245477, "grad_norm": 0.6171455979347229, "learning_rate": 0.0001, "loss": 1.3184, "step": 13793 }, { "epoch": 1.5845155361553043, "grad_norm": 0.7028810977935791, "learning_rate": 0.0001, "loss": 1.6935, "step": 13794 }, { "epoch": 1.5846304060651313, "grad_norm": 0.5513997077941895, "learning_rate": 0.0001, "loss": 1.464, "step": 13795 }, { "epoch": 1.5847452759749583, "grad_norm": 0.5477231740951538, "learning_rate": 0.0001, "loss": 1.3617, "step": 13796 }, { "epoch": 1.5848601458847855, "grad_norm": 0.6108036041259766, "learning_rate": 0.0001, "loss": 1.3043, "step": 13797 }, { "epoch": 1.5849750157946128, "grad_norm": 0.559474527835846, "learning_rate": 0.0001, "loss": 1.2964, "step": 13798 }, { "epoch": 1.5850898857044398, "grad_norm": 0.6026455760002136, "learning_rate": 0.0001, "loss": 1.4424, "step": 13799 }, { "epoch": 1.5852047556142668, "grad_norm": 0.7762643098831177, "learning_rate": 0.0001, "loss": 1.7879, "step": 13800 }, { "epoch": 1.585319625524094, "grad_norm": 0.6184485554695129, "learning_rate": 0.0001, "loss": 1.4559, "step": 13801 }, { "epoch": 1.5854344954339212, "grad_norm": 0.6571382880210876, "learning_rate": 0.0001, "loss": 1.6504, "step": 13802 }, { "epoch": 1.5855493653437482, "grad_norm": 0.6091337203979492, "learning_rate": 0.0001, "loss": 1.555, "step": 13803 }, { "epoch": 1.5856642352535752, "grad_norm": 0.6771681308746338, "learning_rate": 0.0001, "loss": 1.3213, "step": 13804 }, { "epoch": 1.5857791051634025, "grad_norm": 0.6501324772834778, "learning_rate": 0.0001, "loss": 1.5583, "step": 13805 }, { "epoch": 1.5858939750732297, "grad_norm": 0.5912746787071228, "learning_rate": 0.0001, "loss": 1.3764, "step": 13806 }, { "epoch": 1.5860088449830567, "grad_norm": 0.6037330627441406, "learning_rate": 0.0001, "loss": 1.4944, "step": 13807 }, { "epoch": 1.5861237148928837, "grad_norm": 0.5597751140594482, "learning_rate": 0.0001, "loss": 1.3488, "step": 13808 }, { "epoch": 1.586238584802711, "grad_norm": 0.5525902509689331, "learning_rate": 0.0001, "loss": 1.4154, "step": 13809 }, { "epoch": 1.5863534547125382, "grad_norm": 0.6479353308677673, "learning_rate": 0.0001, "loss": 1.4451, "step": 13810 }, { "epoch": 1.5864683246223652, "grad_norm": 0.6290416121482849, "learning_rate": 0.0001, "loss": 1.4498, "step": 13811 }, { "epoch": 1.5865831945321922, "grad_norm": 0.6369213461875916, "learning_rate": 0.0001, "loss": 1.3789, "step": 13812 }, { "epoch": 1.5866980644420194, "grad_norm": 0.6600077152252197, "learning_rate": 0.0001, "loss": 1.3561, "step": 13813 }, { "epoch": 1.5868129343518467, "grad_norm": 0.6449211239814758, "learning_rate": 0.0001, "loss": 1.5132, "step": 13814 }, { "epoch": 1.5869278042616737, "grad_norm": 0.596219003200531, "learning_rate": 0.0001, "loss": 1.4306, "step": 13815 }, { "epoch": 1.5870426741715007, "grad_norm": 0.6226633191108704, "learning_rate": 0.0001, "loss": 1.4792, "step": 13816 }, { "epoch": 1.587157544081328, "grad_norm": 0.614152193069458, "learning_rate": 0.0001, "loss": 1.5682, "step": 13817 }, { "epoch": 1.5872724139911552, "grad_norm": 0.6192676424980164, "learning_rate": 0.0001, "loss": 1.3306, "step": 13818 }, { "epoch": 1.5873872839009822, "grad_norm": 0.6133370995521545, "learning_rate": 0.0001, "loss": 1.2503, "step": 13819 }, { "epoch": 1.5875021538108092, "grad_norm": 0.6042430996894836, "learning_rate": 0.0001, "loss": 1.4175, "step": 13820 }, { "epoch": 1.5876170237206364, "grad_norm": 0.7672133445739746, "learning_rate": 0.0001, "loss": 1.6331, "step": 13821 }, { "epoch": 1.5877318936304636, "grad_norm": 0.662254273891449, "learning_rate": 0.0001, "loss": 1.4117, "step": 13822 }, { "epoch": 1.5878467635402906, "grad_norm": 0.6693131327629089, "learning_rate": 0.0001, "loss": 1.5383, "step": 13823 }, { "epoch": 1.5879616334501176, "grad_norm": 0.636415421962738, "learning_rate": 0.0001, "loss": 1.5238, "step": 13824 }, { "epoch": 1.5880765033599449, "grad_norm": 0.579950213432312, "learning_rate": 0.0001, "loss": 1.4417, "step": 13825 }, { "epoch": 1.588191373269772, "grad_norm": 0.6041045188903809, "learning_rate": 0.0001, "loss": 1.3729, "step": 13826 }, { "epoch": 1.5883062431795991, "grad_norm": 0.638687252998352, "learning_rate": 0.0001, "loss": 1.4003, "step": 13827 }, { "epoch": 1.5884211130894261, "grad_norm": 0.5657978057861328, "learning_rate": 0.0001, "loss": 1.3076, "step": 13828 }, { "epoch": 1.5885359829992534, "grad_norm": 0.5743250250816345, "learning_rate": 0.0001, "loss": 1.5329, "step": 13829 }, { "epoch": 1.5886508529090806, "grad_norm": 0.6413049101829529, "learning_rate": 0.0001, "loss": 1.5212, "step": 13830 }, { "epoch": 1.5887657228189076, "grad_norm": 0.5768105983734131, "learning_rate": 0.0001, "loss": 1.4486, "step": 13831 }, { "epoch": 1.5888805927287346, "grad_norm": 0.6100025773048401, "learning_rate": 0.0001, "loss": 1.5592, "step": 13832 }, { "epoch": 1.5889954626385618, "grad_norm": 0.6086409091949463, "learning_rate": 0.0001, "loss": 1.3241, "step": 13833 }, { "epoch": 1.589110332548389, "grad_norm": 0.6724202632904053, "learning_rate": 0.0001, "loss": 1.4075, "step": 13834 }, { "epoch": 1.589225202458216, "grad_norm": 0.6264391541481018, "learning_rate": 0.0001, "loss": 1.6691, "step": 13835 }, { "epoch": 1.589340072368043, "grad_norm": 0.6160328388214111, "learning_rate": 0.0001, "loss": 1.4792, "step": 13836 }, { "epoch": 1.5894549422778703, "grad_norm": 0.6358749270439148, "learning_rate": 0.0001, "loss": 1.465, "step": 13837 }, { "epoch": 1.5895698121876976, "grad_norm": 0.6735308766365051, "learning_rate": 0.0001, "loss": 1.4201, "step": 13838 }, { "epoch": 1.5896846820975246, "grad_norm": 0.6253379583358765, "learning_rate": 0.0001, "loss": 1.5515, "step": 13839 }, { "epoch": 1.5897995520073516, "grad_norm": 0.6210620999336243, "learning_rate": 0.0001, "loss": 1.62, "step": 13840 }, { "epoch": 1.5899144219171788, "grad_norm": 0.6258774995803833, "learning_rate": 0.0001, "loss": 1.4827, "step": 13841 }, { "epoch": 1.590029291827006, "grad_norm": 0.6079967021942139, "learning_rate": 0.0001, "loss": 1.4229, "step": 13842 }, { "epoch": 1.590144161736833, "grad_norm": 0.5905248522758484, "learning_rate": 0.0001, "loss": 1.436, "step": 13843 }, { "epoch": 1.59025903164666, "grad_norm": 0.623208224773407, "learning_rate": 0.0001, "loss": 1.5429, "step": 13844 }, { "epoch": 1.5903739015564873, "grad_norm": 0.6524278521537781, "learning_rate": 0.0001, "loss": 1.526, "step": 13845 }, { "epoch": 1.5904887714663145, "grad_norm": 0.6335569024085999, "learning_rate": 0.0001, "loss": 1.4586, "step": 13846 }, { "epoch": 1.5906036413761415, "grad_norm": 0.5839788317680359, "learning_rate": 0.0001, "loss": 1.4472, "step": 13847 }, { "epoch": 1.5907185112859685, "grad_norm": 0.6016788482666016, "learning_rate": 0.0001, "loss": 1.6573, "step": 13848 }, { "epoch": 1.5908333811957958, "grad_norm": 0.5814146399497986, "learning_rate": 0.0001, "loss": 1.5764, "step": 13849 }, { "epoch": 1.590948251105623, "grad_norm": 0.6370062828063965, "learning_rate": 0.0001, "loss": 1.6166, "step": 13850 }, { "epoch": 1.59106312101545, "grad_norm": 0.5697435736656189, "learning_rate": 0.0001, "loss": 1.5031, "step": 13851 }, { "epoch": 1.591177990925277, "grad_norm": 0.5778668522834778, "learning_rate": 0.0001, "loss": 1.2866, "step": 13852 }, { "epoch": 1.5912928608351042, "grad_norm": 0.5944350957870483, "learning_rate": 0.0001, "loss": 1.4024, "step": 13853 }, { "epoch": 1.5914077307449315, "grad_norm": 0.713706374168396, "learning_rate": 0.0001, "loss": 1.4837, "step": 13854 }, { "epoch": 1.5915226006547585, "grad_norm": 0.5639752149581909, "learning_rate": 0.0001, "loss": 1.4652, "step": 13855 }, { "epoch": 1.5916374705645855, "grad_norm": 0.6923316717147827, "learning_rate": 0.0001, "loss": 1.529, "step": 13856 }, { "epoch": 1.5917523404744127, "grad_norm": 0.5926253795623779, "learning_rate": 0.0001, "loss": 1.4854, "step": 13857 }, { "epoch": 1.59186721038424, "grad_norm": 0.6101776361465454, "learning_rate": 0.0001, "loss": 1.4077, "step": 13858 }, { "epoch": 1.591982080294067, "grad_norm": 0.6200652122497559, "learning_rate": 0.0001, "loss": 1.5716, "step": 13859 }, { "epoch": 1.592096950203894, "grad_norm": 0.7140838503837585, "learning_rate": 0.0001, "loss": 1.5328, "step": 13860 }, { "epoch": 1.5922118201137212, "grad_norm": 0.6783391237258911, "learning_rate": 0.0001, "loss": 1.6397, "step": 13861 }, { "epoch": 1.5923266900235484, "grad_norm": 0.6628894805908203, "learning_rate": 0.0001, "loss": 1.4749, "step": 13862 }, { "epoch": 1.5924415599333754, "grad_norm": 0.59294593334198, "learning_rate": 0.0001, "loss": 1.3666, "step": 13863 }, { "epoch": 1.5925564298432024, "grad_norm": 0.5863275527954102, "learning_rate": 0.0001, "loss": 1.5978, "step": 13864 }, { "epoch": 1.5926712997530297, "grad_norm": 0.5980870723724365, "learning_rate": 0.0001, "loss": 1.4679, "step": 13865 }, { "epoch": 1.592786169662857, "grad_norm": 0.6048779487609863, "learning_rate": 0.0001, "loss": 1.4745, "step": 13866 }, { "epoch": 1.592901039572684, "grad_norm": 0.5743735432624817, "learning_rate": 0.0001, "loss": 1.4831, "step": 13867 }, { "epoch": 1.593015909482511, "grad_norm": 0.5809198021888733, "learning_rate": 0.0001, "loss": 1.4706, "step": 13868 }, { "epoch": 1.5931307793923382, "grad_norm": 0.6342722773551941, "learning_rate": 0.0001, "loss": 1.5462, "step": 13869 }, { "epoch": 1.5932456493021654, "grad_norm": 0.6230271458625793, "learning_rate": 0.0001, "loss": 1.3428, "step": 13870 }, { "epoch": 1.5933605192119924, "grad_norm": 0.6740293502807617, "learning_rate": 0.0001, "loss": 1.6158, "step": 13871 }, { "epoch": 1.5934753891218194, "grad_norm": 0.7214620113372803, "learning_rate": 0.0001, "loss": 1.4837, "step": 13872 }, { "epoch": 1.5935902590316466, "grad_norm": 0.6256888508796692, "learning_rate": 0.0001, "loss": 1.7852, "step": 13873 }, { "epoch": 1.5937051289414739, "grad_norm": 0.5903255939483643, "learning_rate": 0.0001, "loss": 1.4396, "step": 13874 }, { "epoch": 1.5938199988513009, "grad_norm": 0.572929859161377, "learning_rate": 0.0001, "loss": 1.4534, "step": 13875 }, { "epoch": 1.5939348687611279, "grad_norm": 0.6389855146408081, "learning_rate": 0.0001, "loss": 1.4959, "step": 13876 }, { "epoch": 1.5940497386709551, "grad_norm": 0.614324688911438, "learning_rate": 0.0001, "loss": 1.5737, "step": 13877 }, { "epoch": 1.5941646085807823, "grad_norm": 0.6262052655220032, "learning_rate": 0.0001, "loss": 1.48, "step": 13878 }, { "epoch": 1.5942794784906094, "grad_norm": 0.6147677898406982, "learning_rate": 0.0001, "loss": 1.4761, "step": 13879 }, { "epoch": 1.5943943484004364, "grad_norm": 0.635215699672699, "learning_rate": 0.0001, "loss": 1.498, "step": 13880 }, { "epoch": 1.5945092183102636, "grad_norm": 0.5967612266540527, "learning_rate": 0.0001, "loss": 1.4983, "step": 13881 }, { "epoch": 1.5946240882200908, "grad_norm": 0.5582137703895569, "learning_rate": 0.0001, "loss": 1.374, "step": 13882 }, { "epoch": 1.5947389581299178, "grad_norm": 0.6301720142364502, "learning_rate": 0.0001, "loss": 1.6609, "step": 13883 }, { "epoch": 1.5948538280397448, "grad_norm": 0.5917264819145203, "learning_rate": 0.0001, "loss": 1.5236, "step": 13884 }, { "epoch": 1.594968697949572, "grad_norm": 0.6191073656082153, "learning_rate": 0.0001, "loss": 1.4597, "step": 13885 }, { "epoch": 1.5950835678593993, "grad_norm": 0.5992122292518616, "learning_rate": 0.0001, "loss": 1.3994, "step": 13886 }, { "epoch": 1.5951984377692263, "grad_norm": 0.6241499781608582, "learning_rate": 0.0001, "loss": 1.5801, "step": 13887 }, { "epoch": 1.5953133076790533, "grad_norm": 0.5631673336029053, "learning_rate": 0.0001, "loss": 1.4812, "step": 13888 }, { "epoch": 1.5954281775888806, "grad_norm": 0.6660466194152832, "learning_rate": 0.0001, "loss": 1.7669, "step": 13889 }, { "epoch": 1.5955430474987078, "grad_norm": 0.676660418510437, "learning_rate": 0.0001, "loss": 1.1202, "step": 13890 }, { "epoch": 1.5956579174085348, "grad_norm": 0.6641074419021606, "learning_rate": 0.0001, "loss": 1.5507, "step": 13891 }, { "epoch": 1.5957727873183618, "grad_norm": 0.6057578325271606, "learning_rate": 0.0001, "loss": 1.4497, "step": 13892 }, { "epoch": 1.595887657228189, "grad_norm": 0.5668378472328186, "learning_rate": 0.0001, "loss": 1.3555, "step": 13893 }, { "epoch": 1.5960025271380163, "grad_norm": 0.6328227519989014, "learning_rate": 0.0001, "loss": 1.5493, "step": 13894 }, { "epoch": 1.5961173970478433, "grad_norm": 0.605475664138794, "learning_rate": 0.0001, "loss": 1.4586, "step": 13895 }, { "epoch": 1.5962322669576703, "grad_norm": 0.5906625986099243, "learning_rate": 0.0001, "loss": 1.4653, "step": 13896 }, { "epoch": 1.5963471368674975, "grad_norm": 0.613008439540863, "learning_rate": 0.0001, "loss": 1.5165, "step": 13897 }, { "epoch": 1.5964620067773247, "grad_norm": 0.6402007937431335, "learning_rate": 0.0001, "loss": 1.3678, "step": 13898 }, { "epoch": 1.5965768766871518, "grad_norm": 0.6814921498298645, "learning_rate": 0.0001, "loss": 1.6187, "step": 13899 }, { "epoch": 1.5966917465969788, "grad_norm": 0.5518124103546143, "learning_rate": 0.0001, "loss": 1.3886, "step": 13900 }, { "epoch": 1.596806616506806, "grad_norm": 0.5962539315223694, "learning_rate": 0.0001, "loss": 1.412, "step": 13901 }, { "epoch": 1.5969214864166332, "grad_norm": 0.5778492093086243, "learning_rate": 0.0001, "loss": 1.3721, "step": 13902 }, { "epoch": 1.5970363563264602, "grad_norm": 0.6201198101043701, "learning_rate": 0.0001, "loss": 1.3933, "step": 13903 }, { "epoch": 1.5971512262362872, "grad_norm": 0.6029123663902283, "learning_rate": 0.0001, "loss": 1.3812, "step": 13904 }, { "epoch": 1.5972660961461145, "grad_norm": 0.6379988789558411, "learning_rate": 0.0001, "loss": 1.2687, "step": 13905 }, { "epoch": 1.5973809660559417, "grad_norm": 0.5439411997795105, "learning_rate": 0.0001, "loss": 1.3471, "step": 13906 }, { "epoch": 1.5974958359657687, "grad_norm": 0.6145894527435303, "learning_rate": 0.0001, "loss": 1.3119, "step": 13907 }, { "epoch": 1.5976107058755957, "grad_norm": 0.6268411874771118, "learning_rate": 0.0001, "loss": 1.4461, "step": 13908 }, { "epoch": 1.597725575785423, "grad_norm": 0.5672012567520142, "learning_rate": 0.0001, "loss": 1.4152, "step": 13909 }, { "epoch": 1.5978404456952502, "grad_norm": 0.5719462633132935, "learning_rate": 0.0001, "loss": 1.4458, "step": 13910 }, { "epoch": 1.5979553156050772, "grad_norm": 0.564241349697113, "learning_rate": 0.0001, "loss": 1.2821, "step": 13911 }, { "epoch": 1.5980701855149042, "grad_norm": 0.6079102158546448, "learning_rate": 0.0001, "loss": 1.3764, "step": 13912 }, { "epoch": 1.5981850554247314, "grad_norm": 0.605634868144989, "learning_rate": 0.0001, "loss": 1.4621, "step": 13913 }, { "epoch": 1.5982999253345587, "grad_norm": 0.599432110786438, "learning_rate": 0.0001, "loss": 1.4588, "step": 13914 }, { "epoch": 1.5984147952443857, "grad_norm": 0.6091422438621521, "learning_rate": 0.0001, "loss": 1.533, "step": 13915 }, { "epoch": 1.598529665154213, "grad_norm": 0.6520305275917053, "learning_rate": 0.0001, "loss": 1.6384, "step": 13916 }, { "epoch": 1.59864453506404, "grad_norm": 0.6225545406341553, "learning_rate": 0.0001, "loss": 1.4667, "step": 13917 }, { "epoch": 1.5987594049738671, "grad_norm": 0.5742337107658386, "learning_rate": 0.0001, "loss": 1.3543, "step": 13918 }, { "epoch": 1.5988742748836944, "grad_norm": 0.6456047892570496, "learning_rate": 0.0001, "loss": 1.452, "step": 13919 }, { "epoch": 1.5989891447935214, "grad_norm": 0.637758195400238, "learning_rate": 0.0001, "loss": 1.4995, "step": 13920 }, { "epoch": 1.5991040147033484, "grad_norm": 0.6224943399429321, "learning_rate": 0.0001, "loss": 1.2575, "step": 13921 }, { "epoch": 1.5992188846131756, "grad_norm": 0.6721301078796387, "learning_rate": 0.0001, "loss": 1.5166, "step": 13922 }, { "epoch": 1.5993337545230029, "grad_norm": 0.5861270427703857, "learning_rate": 0.0001, "loss": 1.2367, "step": 13923 }, { "epoch": 1.5994486244328299, "grad_norm": 0.6392655968666077, "learning_rate": 0.0001, "loss": 1.4703, "step": 13924 }, { "epoch": 1.5995634943426569, "grad_norm": 0.6457921862602234, "learning_rate": 0.0001, "loss": 1.5829, "step": 13925 }, { "epoch": 1.599678364252484, "grad_norm": 0.6379731297492981, "learning_rate": 0.0001, "loss": 1.3977, "step": 13926 }, { "epoch": 1.5997932341623113, "grad_norm": 0.5802931785583496, "learning_rate": 0.0001, "loss": 1.3204, "step": 13927 }, { "epoch": 1.5999081040721383, "grad_norm": 0.6664165258407593, "learning_rate": 0.0001, "loss": 1.5346, "step": 13928 }, { "epoch": 1.6000229739819654, "grad_norm": 0.6241664290428162, "learning_rate": 0.0001, "loss": 1.6584, "step": 13929 }, { "epoch": 1.6001378438917926, "grad_norm": 0.6621495485305786, "learning_rate": 0.0001, "loss": 1.3809, "step": 13930 }, { "epoch": 1.6002527138016198, "grad_norm": 0.6004970073699951, "learning_rate": 0.0001, "loss": 1.4092, "step": 13931 }, { "epoch": 1.6003675837114468, "grad_norm": 0.6193183064460754, "learning_rate": 0.0001, "loss": 1.4335, "step": 13932 }, { "epoch": 1.6004824536212738, "grad_norm": 0.6352545619010925, "learning_rate": 0.0001, "loss": 1.5641, "step": 13933 }, { "epoch": 1.600597323531101, "grad_norm": 0.6299646496772766, "learning_rate": 0.0001, "loss": 1.4958, "step": 13934 }, { "epoch": 1.6007121934409283, "grad_norm": 0.5891308784484863, "learning_rate": 0.0001, "loss": 1.3664, "step": 13935 }, { "epoch": 1.6008270633507553, "grad_norm": 0.655714750289917, "learning_rate": 0.0001, "loss": 1.3667, "step": 13936 }, { "epoch": 1.6009419332605823, "grad_norm": 0.6111890077590942, "learning_rate": 0.0001, "loss": 1.4353, "step": 13937 }, { "epoch": 1.6010568031704095, "grad_norm": 0.5762326717376709, "learning_rate": 0.0001, "loss": 1.5739, "step": 13938 }, { "epoch": 1.6011716730802368, "grad_norm": 0.5921722650527954, "learning_rate": 0.0001, "loss": 1.5816, "step": 13939 }, { "epoch": 1.6012865429900638, "grad_norm": 0.6111255884170532, "learning_rate": 0.0001, "loss": 1.722, "step": 13940 }, { "epoch": 1.6014014128998908, "grad_norm": 0.5806509256362915, "learning_rate": 0.0001, "loss": 1.4408, "step": 13941 }, { "epoch": 1.601516282809718, "grad_norm": 0.5703766942024231, "learning_rate": 0.0001, "loss": 1.303, "step": 13942 }, { "epoch": 1.6016311527195453, "grad_norm": 0.5971461534500122, "learning_rate": 0.0001, "loss": 1.541, "step": 13943 }, { "epoch": 1.6017460226293723, "grad_norm": 0.6908469796180725, "learning_rate": 0.0001, "loss": 1.4302, "step": 13944 }, { "epoch": 1.6018608925391993, "grad_norm": 0.5854578614234924, "learning_rate": 0.0001, "loss": 1.5302, "step": 13945 }, { "epoch": 1.6019757624490265, "grad_norm": 0.6084689497947693, "learning_rate": 0.0001, "loss": 1.5165, "step": 13946 }, { "epoch": 1.6020906323588537, "grad_norm": 0.6091856956481934, "learning_rate": 0.0001, "loss": 1.3299, "step": 13947 }, { "epoch": 1.6022055022686807, "grad_norm": 0.6106900572776794, "learning_rate": 0.0001, "loss": 1.4301, "step": 13948 }, { "epoch": 1.6023203721785078, "grad_norm": 0.6288970708847046, "learning_rate": 0.0001, "loss": 1.5532, "step": 13949 }, { "epoch": 1.602435242088335, "grad_norm": 0.6874493956565857, "learning_rate": 0.0001, "loss": 1.4993, "step": 13950 }, { "epoch": 1.6025501119981622, "grad_norm": 0.5642193555831909, "learning_rate": 0.0001, "loss": 1.2891, "step": 13951 }, { "epoch": 1.6026649819079892, "grad_norm": 0.6149432063102722, "learning_rate": 0.0001, "loss": 1.346, "step": 13952 }, { "epoch": 1.6027798518178162, "grad_norm": 0.6034269332885742, "learning_rate": 0.0001, "loss": 1.5462, "step": 13953 }, { "epoch": 1.6028947217276435, "grad_norm": 0.5566948056221008, "learning_rate": 0.0001, "loss": 1.3367, "step": 13954 }, { "epoch": 1.6030095916374707, "grad_norm": 0.6086257100105286, "learning_rate": 0.0001, "loss": 1.5219, "step": 13955 }, { "epoch": 1.6031244615472977, "grad_norm": 0.6284582614898682, "learning_rate": 0.0001, "loss": 1.4821, "step": 13956 }, { "epoch": 1.6032393314571247, "grad_norm": 0.6378557085990906, "learning_rate": 0.0001, "loss": 1.4118, "step": 13957 }, { "epoch": 1.603354201366952, "grad_norm": 0.6031190156936646, "learning_rate": 0.0001, "loss": 1.5337, "step": 13958 }, { "epoch": 1.6034690712767792, "grad_norm": 0.6202352046966553, "learning_rate": 0.0001, "loss": 1.3672, "step": 13959 }, { "epoch": 1.6035839411866062, "grad_norm": 0.6928132772445679, "learning_rate": 0.0001, "loss": 1.2899, "step": 13960 }, { "epoch": 1.6036988110964332, "grad_norm": 0.6329846978187561, "learning_rate": 0.0001, "loss": 1.533, "step": 13961 }, { "epoch": 1.6038136810062604, "grad_norm": 0.5797714591026306, "learning_rate": 0.0001, "loss": 1.3294, "step": 13962 }, { "epoch": 1.6039285509160877, "grad_norm": 0.6104891896247864, "learning_rate": 0.0001, "loss": 1.3985, "step": 13963 }, { "epoch": 1.6040434208259147, "grad_norm": 0.6021250486373901, "learning_rate": 0.0001, "loss": 1.4517, "step": 13964 }, { "epoch": 1.6041582907357417, "grad_norm": 0.6344681978225708, "learning_rate": 0.0001, "loss": 1.5118, "step": 13965 }, { "epoch": 1.604273160645569, "grad_norm": 0.6106261014938354, "learning_rate": 0.0001, "loss": 1.4263, "step": 13966 }, { "epoch": 1.6043880305553961, "grad_norm": 0.598883330821991, "learning_rate": 0.0001, "loss": 1.4839, "step": 13967 }, { "epoch": 1.6045029004652231, "grad_norm": 0.6441910862922668, "learning_rate": 0.0001, "loss": 1.3875, "step": 13968 }, { "epoch": 1.6046177703750502, "grad_norm": 0.6011143326759338, "learning_rate": 0.0001, "loss": 1.4894, "step": 13969 }, { "epoch": 1.6047326402848774, "grad_norm": 0.6068170070648193, "learning_rate": 0.0001, "loss": 1.2947, "step": 13970 }, { "epoch": 1.6048475101947046, "grad_norm": 0.6693038940429688, "learning_rate": 0.0001, "loss": 1.4515, "step": 13971 }, { "epoch": 1.6049623801045316, "grad_norm": 0.6221956610679626, "learning_rate": 0.0001, "loss": 1.462, "step": 13972 }, { "epoch": 1.6050772500143586, "grad_norm": 0.6345918774604797, "learning_rate": 0.0001, "loss": 1.5339, "step": 13973 }, { "epoch": 1.6051921199241859, "grad_norm": 0.5779280066490173, "learning_rate": 0.0001, "loss": 1.3821, "step": 13974 }, { "epoch": 1.605306989834013, "grad_norm": 0.6681035757064819, "learning_rate": 0.0001, "loss": 1.5948, "step": 13975 }, { "epoch": 1.60542185974384, "grad_norm": 0.6619514226913452, "learning_rate": 0.0001, "loss": 1.4559, "step": 13976 }, { "epoch": 1.6055367296536671, "grad_norm": 0.5762709379196167, "learning_rate": 0.0001, "loss": 1.4743, "step": 13977 }, { "epoch": 1.6056515995634943, "grad_norm": 0.597047746181488, "learning_rate": 0.0001, "loss": 1.3797, "step": 13978 }, { "epoch": 1.6057664694733216, "grad_norm": 0.6428928375244141, "learning_rate": 0.0001, "loss": 1.5844, "step": 13979 }, { "epoch": 1.6058813393831486, "grad_norm": 0.6371673941612244, "learning_rate": 0.0001, "loss": 1.5729, "step": 13980 }, { "epoch": 1.6059962092929756, "grad_norm": 0.69063401222229, "learning_rate": 0.0001, "loss": 1.3198, "step": 13981 }, { "epoch": 1.6061110792028028, "grad_norm": 0.6148319244384766, "learning_rate": 0.0001, "loss": 1.514, "step": 13982 }, { "epoch": 1.60622594911263, "grad_norm": 0.6522954106330872, "learning_rate": 0.0001, "loss": 1.5567, "step": 13983 }, { "epoch": 1.606340819022457, "grad_norm": 0.6170140504837036, "learning_rate": 0.0001, "loss": 1.561, "step": 13984 }, { "epoch": 1.606455688932284, "grad_norm": 0.5978295803070068, "learning_rate": 0.0001, "loss": 1.4698, "step": 13985 }, { "epoch": 1.6065705588421113, "grad_norm": 0.6230267882347107, "learning_rate": 0.0001, "loss": 1.6488, "step": 13986 }, { "epoch": 1.6066854287519385, "grad_norm": 0.6035535335540771, "learning_rate": 0.0001, "loss": 1.5813, "step": 13987 }, { "epoch": 1.6068002986617655, "grad_norm": 0.6185717582702637, "learning_rate": 0.0001, "loss": 1.5899, "step": 13988 }, { "epoch": 1.6069151685715926, "grad_norm": 0.5879165530204773, "learning_rate": 0.0001, "loss": 1.1743, "step": 13989 }, { "epoch": 1.6070300384814198, "grad_norm": 0.6925861239433289, "learning_rate": 0.0001, "loss": 1.6345, "step": 13990 }, { "epoch": 1.607144908391247, "grad_norm": 0.603412926197052, "learning_rate": 0.0001, "loss": 1.3555, "step": 13991 }, { "epoch": 1.607259778301074, "grad_norm": 0.6258808374404907, "learning_rate": 0.0001, "loss": 1.692, "step": 13992 }, { "epoch": 1.607374648210901, "grad_norm": 0.6243830919265747, "learning_rate": 0.0001, "loss": 1.4886, "step": 13993 }, { "epoch": 1.6074895181207283, "grad_norm": 0.5801231861114502, "learning_rate": 0.0001, "loss": 1.3985, "step": 13994 }, { "epoch": 1.6076043880305555, "grad_norm": 0.5816532373428345, "learning_rate": 0.0001, "loss": 1.274, "step": 13995 }, { "epoch": 1.6077192579403825, "grad_norm": 0.6143868565559387, "learning_rate": 0.0001, "loss": 1.4923, "step": 13996 }, { "epoch": 1.6078341278502095, "grad_norm": 0.5322367548942566, "learning_rate": 0.0001, "loss": 1.4064, "step": 13997 }, { "epoch": 1.6079489977600367, "grad_norm": 0.6119695901870728, "learning_rate": 0.0001, "loss": 1.3524, "step": 13998 }, { "epoch": 1.608063867669864, "grad_norm": 0.6040320992469788, "learning_rate": 0.0001, "loss": 1.5813, "step": 13999 }, { "epoch": 1.608178737579691, "grad_norm": 0.5936914682388306, "learning_rate": 0.0001, "loss": 1.3736, "step": 14000 }, { "epoch": 1.608293607489518, "grad_norm": 0.6493724584579468, "learning_rate": 0.0001, "loss": 1.6253, "step": 14001 }, { "epoch": 1.6084084773993452, "grad_norm": 0.5766366720199585, "learning_rate": 0.0001, "loss": 1.4142, "step": 14002 }, { "epoch": 1.6085233473091725, "grad_norm": 0.6150953769683838, "learning_rate": 0.0001, "loss": 1.3288, "step": 14003 }, { "epoch": 1.6086382172189995, "grad_norm": 0.5571459531784058, "learning_rate": 0.0001, "loss": 1.5631, "step": 14004 }, { "epoch": 1.6087530871288265, "grad_norm": 0.6243135333061218, "learning_rate": 0.0001, "loss": 1.3522, "step": 14005 }, { "epoch": 1.6088679570386537, "grad_norm": 0.5796478390693665, "learning_rate": 0.0001, "loss": 1.3919, "step": 14006 }, { "epoch": 1.608982826948481, "grad_norm": 0.6436089873313904, "learning_rate": 0.0001, "loss": 1.4988, "step": 14007 }, { "epoch": 1.609097696858308, "grad_norm": 0.6261184811592102, "learning_rate": 0.0001, "loss": 1.5976, "step": 14008 }, { "epoch": 1.609212566768135, "grad_norm": 0.5776735544204712, "learning_rate": 0.0001, "loss": 1.3081, "step": 14009 }, { "epoch": 1.6093274366779622, "grad_norm": 0.5873029828071594, "learning_rate": 0.0001, "loss": 1.452, "step": 14010 }, { "epoch": 1.6094423065877894, "grad_norm": 0.642424464225769, "learning_rate": 0.0001, "loss": 1.5855, "step": 14011 }, { "epoch": 1.6095571764976164, "grad_norm": 0.6408437490463257, "learning_rate": 0.0001, "loss": 1.4495, "step": 14012 }, { "epoch": 1.6096720464074434, "grad_norm": 0.6098792552947998, "learning_rate": 0.0001, "loss": 1.515, "step": 14013 }, { "epoch": 1.6097869163172707, "grad_norm": 0.5650305151939392, "learning_rate": 0.0001, "loss": 1.5223, "step": 14014 }, { "epoch": 1.609901786227098, "grad_norm": 0.6191934943199158, "learning_rate": 0.0001, "loss": 1.3989, "step": 14015 }, { "epoch": 1.610016656136925, "grad_norm": 0.5842486619949341, "learning_rate": 0.0001, "loss": 1.5828, "step": 14016 }, { "epoch": 1.610131526046752, "grad_norm": 0.6919946670532227, "learning_rate": 0.0001, "loss": 1.4848, "step": 14017 }, { "epoch": 1.6102463959565791, "grad_norm": 0.5900247693061829, "learning_rate": 0.0001, "loss": 1.5379, "step": 14018 }, { "epoch": 1.6103612658664064, "grad_norm": 0.5633094906806946, "learning_rate": 0.0001, "loss": 1.2923, "step": 14019 }, { "epoch": 1.6104761357762334, "grad_norm": 0.6042163968086243, "learning_rate": 0.0001, "loss": 1.5307, "step": 14020 }, { "epoch": 1.6105910056860604, "grad_norm": 0.6642841100692749, "learning_rate": 0.0001, "loss": 1.4918, "step": 14021 }, { "epoch": 1.6107058755958876, "grad_norm": 0.7833337783813477, "learning_rate": 0.0001, "loss": 1.4682, "step": 14022 }, { "epoch": 1.6108207455057149, "grad_norm": 0.5767188668251038, "learning_rate": 0.0001, "loss": 1.4422, "step": 14023 }, { "epoch": 1.6109356154155419, "grad_norm": 0.5754833817481995, "learning_rate": 0.0001, "loss": 1.4488, "step": 14024 }, { "epoch": 1.6110504853253689, "grad_norm": 0.6136239171028137, "learning_rate": 0.0001, "loss": 1.4834, "step": 14025 }, { "epoch": 1.611165355235196, "grad_norm": 0.6007016897201538, "learning_rate": 0.0001, "loss": 1.5565, "step": 14026 }, { "epoch": 1.6112802251450233, "grad_norm": 0.5736799836158752, "learning_rate": 0.0001, "loss": 1.4608, "step": 14027 }, { "epoch": 1.6113950950548503, "grad_norm": 0.5962418913841248, "learning_rate": 0.0001, "loss": 1.4597, "step": 14028 }, { "epoch": 1.6115099649646774, "grad_norm": 0.6547172665596008, "learning_rate": 0.0001, "loss": 1.4047, "step": 14029 }, { "epoch": 1.6116248348745046, "grad_norm": 0.6729933023452759, "learning_rate": 0.0001, "loss": 1.57, "step": 14030 }, { "epoch": 1.6117397047843318, "grad_norm": 0.5763393640518188, "learning_rate": 0.0001, "loss": 1.6013, "step": 14031 }, { "epoch": 1.6118545746941588, "grad_norm": 0.6308915019035339, "learning_rate": 0.0001, "loss": 1.5667, "step": 14032 }, { "epoch": 1.6119694446039858, "grad_norm": 0.5580734610557556, "learning_rate": 0.0001, "loss": 1.4442, "step": 14033 }, { "epoch": 1.612084314513813, "grad_norm": 0.5644499063491821, "learning_rate": 0.0001, "loss": 1.3308, "step": 14034 }, { "epoch": 1.6121991844236403, "grad_norm": 0.6455258131027222, "learning_rate": 0.0001, "loss": 1.547, "step": 14035 }, { "epoch": 1.6123140543334673, "grad_norm": 0.596584677696228, "learning_rate": 0.0001, "loss": 1.3524, "step": 14036 }, { "epoch": 1.6124289242432943, "grad_norm": 0.7680908441543579, "learning_rate": 0.0001, "loss": 1.5657, "step": 14037 }, { "epoch": 1.6125437941531215, "grad_norm": 0.661892294883728, "learning_rate": 0.0001, "loss": 1.5992, "step": 14038 }, { "epoch": 1.6126586640629488, "grad_norm": 0.5909493565559387, "learning_rate": 0.0001, "loss": 1.3543, "step": 14039 }, { "epoch": 1.6127735339727758, "grad_norm": 0.5718796849250793, "learning_rate": 0.0001, "loss": 1.191, "step": 14040 }, { "epoch": 1.6128884038826028, "grad_norm": 0.6069730520248413, "learning_rate": 0.0001, "loss": 1.5217, "step": 14041 }, { "epoch": 1.61300327379243, "grad_norm": 0.6135676503181458, "learning_rate": 0.0001, "loss": 1.5736, "step": 14042 }, { "epoch": 1.6131181437022573, "grad_norm": 0.5874907970428467, "learning_rate": 0.0001, "loss": 1.5292, "step": 14043 }, { "epoch": 1.6132330136120843, "grad_norm": 0.6142223477363586, "learning_rate": 0.0001, "loss": 1.3995, "step": 14044 }, { "epoch": 1.6133478835219113, "grad_norm": 0.6572316884994507, "learning_rate": 0.0001, "loss": 1.6148, "step": 14045 }, { "epoch": 1.6134627534317385, "grad_norm": 0.6865919828414917, "learning_rate": 0.0001, "loss": 1.4198, "step": 14046 }, { "epoch": 1.6135776233415657, "grad_norm": 0.57161945104599, "learning_rate": 0.0001, "loss": 1.3409, "step": 14047 }, { "epoch": 1.6136924932513927, "grad_norm": 0.672649085521698, "learning_rate": 0.0001, "loss": 1.6139, "step": 14048 }, { "epoch": 1.6138073631612198, "grad_norm": 0.5908873677253723, "learning_rate": 0.0001, "loss": 1.3198, "step": 14049 }, { "epoch": 1.613922233071047, "grad_norm": 0.7088580131530762, "learning_rate": 0.0001, "loss": 1.5666, "step": 14050 }, { "epoch": 1.6140371029808742, "grad_norm": 0.6201817393302917, "learning_rate": 0.0001, "loss": 1.3589, "step": 14051 }, { "epoch": 1.6141519728907012, "grad_norm": 0.584344208240509, "learning_rate": 0.0001, "loss": 1.4533, "step": 14052 }, { "epoch": 1.6142668428005285, "grad_norm": 0.5732455849647522, "learning_rate": 0.0001, "loss": 1.4879, "step": 14053 }, { "epoch": 1.6143817127103555, "grad_norm": 0.5996015071868896, "learning_rate": 0.0001, "loss": 1.3556, "step": 14054 }, { "epoch": 1.6144965826201827, "grad_norm": 0.633338987827301, "learning_rate": 0.0001, "loss": 1.3706, "step": 14055 }, { "epoch": 1.61461145253001, "grad_norm": 0.6130332350730896, "learning_rate": 0.0001, "loss": 1.5321, "step": 14056 }, { "epoch": 1.614726322439837, "grad_norm": 0.5730156898498535, "learning_rate": 0.0001, "loss": 1.2704, "step": 14057 }, { "epoch": 1.614841192349664, "grad_norm": 0.5948575139045715, "learning_rate": 0.0001, "loss": 1.5801, "step": 14058 }, { "epoch": 1.6149560622594912, "grad_norm": 0.6097572445869446, "learning_rate": 0.0001, "loss": 1.3788, "step": 14059 }, { "epoch": 1.6150709321693184, "grad_norm": 0.5689347386360168, "learning_rate": 0.0001, "loss": 1.3254, "step": 14060 }, { "epoch": 1.6151858020791454, "grad_norm": 0.6363088488578796, "learning_rate": 0.0001, "loss": 1.5221, "step": 14061 }, { "epoch": 1.6153006719889724, "grad_norm": 0.6014930605888367, "learning_rate": 0.0001, "loss": 1.5373, "step": 14062 }, { "epoch": 1.6154155418987997, "grad_norm": 0.5899112224578857, "learning_rate": 0.0001, "loss": 1.4386, "step": 14063 }, { "epoch": 1.6155304118086269, "grad_norm": 0.7439025044441223, "learning_rate": 0.0001, "loss": 1.7865, "step": 14064 }, { "epoch": 1.615645281718454, "grad_norm": 0.6421499252319336, "learning_rate": 0.0001, "loss": 1.5911, "step": 14065 }, { "epoch": 1.615760151628281, "grad_norm": 0.6733357310295105, "learning_rate": 0.0001, "loss": 1.506, "step": 14066 }, { "epoch": 1.6158750215381081, "grad_norm": 0.5812866687774658, "learning_rate": 0.0001, "loss": 1.3306, "step": 14067 }, { "epoch": 1.6159898914479354, "grad_norm": 0.6346286535263062, "learning_rate": 0.0001, "loss": 1.5606, "step": 14068 }, { "epoch": 1.6161047613577624, "grad_norm": 0.6660323739051819, "learning_rate": 0.0001, "loss": 1.5526, "step": 14069 }, { "epoch": 1.6162196312675894, "grad_norm": 0.6062818169593811, "learning_rate": 0.0001, "loss": 1.4311, "step": 14070 }, { "epoch": 1.6163345011774166, "grad_norm": 0.6203289031982422, "learning_rate": 0.0001, "loss": 1.5723, "step": 14071 }, { "epoch": 1.6164493710872438, "grad_norm": 0.6779844760894775, "learning_rate": 0.0001, "loss": 1.3864, "step": 14072 }, { "epoch": 1.6165642409970709, "grad_norm": 0.6042460203170776, "learning_rate": 0.0001, "loss": 1.5513, "step": 14073 }, { "epoch": 1.6166791109068979, "grad_norm": 0.6318661570549011, "learning_rate": 0.0001, "loss": 1.3761, "step": 14074 }, { "epoch": 1.616793980816725, "grad_norm": 0.579138457775116, "learning_rate": 0.0001, "loss": 1.3566, "step": 14075 }, { "epoch": 1.6169088507265523, "grad_norm": 0.6678400635719299, "learning_rate": 0.0001, "loss": 1.5174, "step": 14076 }, { "epoch": 1.6170237206363793, "grad_norm": 0.6432377099990845, "learning_rate": 0.0001, "loss": 1.3636, "step": 14077 }, { "epoch": 1.6171385905462063, "grad_norm": 0.6431517004966736, "learning_rate": 0.0001, "loss": 1.4582, "step": 14078 }, { "epoch": 1.6172534604560336, "grad_norm": 0.6440772414207458, "learning_rate": 0.0001, "loss": 1.3715, "step": 14079 }, { "epoch": 1.6173683303658608, "grad_norm": 0.5892122387886047, "learning_rate": 0.0001, "loss": 1.4429, "step": 14080 }, { "epoch": 1.6174832002756878, "grad_norm": 0.5709667801856995, "learning_rate": 0.0001, "loss": 1.4392, "step": 14081 }, { "epoch": 1.6175980701855148, "grad_norm": 0.5939996242523193, "learning_rate": 0.0001, "loss": 1.4266, "step": 14082 }, { "epoch": 1.617712940095342, "grad_norm": 0.6955810785293579, "learning_rate": 0.0001, "loss": 1.453, "step": 14083 }, { "epoch": 1.6178278100051693, "grad_norm": 0.6132066249847412, "learning_rate": 0.0001, "loss": 1.5033, "step": 14084 }, { "epoch": 1.6179426799149963, "grad_norm": 0.5954875946044922, "learning_rate": 0.0001, "loss": 1.4841, "step": 14085 }, { "epoch": 1.6180575498248233, "grad_norm": 0.6023247838020325, "learning_rate": 0.0001, "loss": 1.3852, "step": 14086 }, { "epoch": 1.6181724197346505, "grad_norm": 0.7028385400772095, "learning_rate": 0.0001, "loss": 1.5218, "step": 14087 }, { "epoch": 1.6182872896444778, "grad_norm": 0.6407709121704102, "learning_rate": 0.0001, "loss": 1.6185, "step": 14088 }, { "epoch": 1.6184021595543048, "grad_norm": 0.5889942049980164, "learning_rate": 0.0001, "loss": 1.5281, "step": 14089 }, { "epoch": 1.6185170294641318, "grad_norm": 0.6771349906921387, "learning_rate": 0.0001, "loss": 1.4593, "step": 14090 }, { "epoch": 1.618631899373959, "grad_norm": 0.6546837687492371, "learning_rate": 0.0001, "loss": 1.4992, "step": 14091 }, { "epoch": 1.6187467692837862, "grad_norm": 0.5943772196769714, "learning_rate": 0.0001, "loss": 1.5061, "step": 14092 }, { "epoch": 1.6188616391936133, "grad_norm": 0.5858648419380188, "learning_rate": 0.0001, "loss": 1.3868, "step": 14093 }, { "epoch": 1.6189765091034403, "grad_norm": 0.6995881795883179, "learning_rate": 0.0001, "loss": 1.7393, "step": 14094 }, { "epoch": 1.6190913790132675, "grad_norm": 0.6692748069763184, "learning_rate": 0.0001, "loss": 1.4476, "step": 14095 }, { "epoch": 1.6192062489230947, "grad_norm": 0.5856147408485413, "learning_rate": 0.0001, "loss": 1.3743, "step": 14096 }, { "epoch": 1.6193211188329217, "grad_norm": 0.5996282696723938, "learning_rate": 0.0001, "loss": 1.5481, "step": 14097 }, { "epoch": 1.6194359887427487, "grad_norm": 0.6970905065536499, "learning_rate": 0.0001, "loss": 1.5738, "step": 14098 }, { "epoch": 1.619550858652576, "grad_norm": 0.6515586972236633, "learning_rate": 0.0001, "loss": 1.4475, "step": 14099 }, { "epoch": 1.6196657285624032, "grad_norm": 0.5993524789810181, "learning_rate": 0.0001, "loss": 1.4535, "step": 14100 }, { "epoch": 1.6197805984722302, "grad_norm": 0.5940983295440674, "learning_rate": 0.0001, "loss": 1.4715, "step": 14101 }, { "epoch": 1.6198954683820572, "grad_norm": 0.585989236831665, "learning_rate": 0.0001, "loss": 1.4361, "step": 14102 }, { "epoch": 1.6200103382918845, "grad_norm": 0.5850023031234741, "learning_rate": 0.0001, "loss": 1.5792, "step": 14103 }, { "epoch": 1.6201252082017117, "grad_norm": 0.5683290362358093, "learning_rate": 0.0001, "loss": 1.1528, "step": 14104 }, { "epoch": 1.6202400781115387, "grad_norm": 0.5724591016769409, "learning_rate": 0.0001, "loss": 1.4089, "step": 14105 }, { "epoch": 1.6203549480213657, "grad_norm": 0.7932894825935364, "learning_rate": 0.0001, "loss": 1.6699, "step": 14106 }, { "epoch": 1.620469817931193, "grad_norm": 0.6048719882965088, "learning_rate": 0.0001, "loss": 1.4477, "step": 14107 }, { "epoch": 1.6205846878410202, "grad_norm": 0.6853838562965393, "learning_rate": 0.0001, "loss": 1.5403, "step": 14108 }, { "epoch": 1.6206995577508472, "grad_norm": 0.7041744589805603, "learning_rate": 0.0001, "loss": 1.5995, "step": 14109 }, { "epoch": 1.6208144276606742, "grad_norm": 0.6290716528892517, "learning_rate": 0.0001, "loss": 1.5163, "step": 14110 }, { "epoch": 1.6209292975705014, "grad_norm": 0.5670915842056274, "learning_rate": 0.0001, "loss": 1.3655, "step": 14111 }, { "epoch": 1.6210441674803286, "grad_norm": 0.5751041769981384, "learning_rate": 0.0001, "loss": 1.2977, "step": 14112 }, { "epoch": 1.6211590373901557, "grad_norm": 0.5855926275253296, "learning_rate": 0.0001, "loss": 1.6452, "step": 14113 }, { "epoch": 1.6212739072999827, "grad_norm": 0.6567171812057495, "learning_rate": 0.0001, "loss": 1.3936, "step": 14114 }, { "epoch": 1.62138877720981, "grad_norm": 0.5859187841415405, "learning_rate": 0.0001, "loss": 1.448, "step": 14115 }, { "epoch": 1.6215036471196371, "grad_norm": 0.5759543180465698, "learning_rate": 0.0001, "loss": 1.4566, "step": 14116 }, { "epoch": 1.6216185170294641, "grad_norm": 0.5698956847190857, "learning_rate": 0.0001, "loss": 1.3733, "step": 14117 }, { "epoch": 1.6217333869392911, "grad_norm": 0.6352313160896301, "learning_rate": 0.0001, "loss": 1.3525, "step": 14118 }, { "epoch": 1.6218482568491184, "grad_norm": 0.6209557056427002, "learning_rate": 0.0001, "loss": 1.2902, "step": 14119 }, { "epoch": 1.6219631267589456, "grad_norm": 0.6741890907287598, "learning_rate": 0.0001, "loss": 1.5208, "step": 14120 }, { "epoch": 1.6220779966687726, "grad_norm": 0.5992792248725891, "learning_rate": 0.0001, "loss": 1.3194, "step": 14121 }, { "epoch": 1.6221928665785996, "grad_norm": 0.6535847187042236, "learning_rate": 0.0001, "loss": 1.285, "step": 14122 }, { "epoch": 1.6223077364884269, "grad_norm": 0.5931274890899658, "learning_rate": 0.0001, "loss": 1.5578, "step": 14123 }, { "epoch": 1.622422606398254, "grad_norm": 0.6604529023170471, "learning_rate": 0.0001, "loss": 1.4776, "step": 14124 }, { "epoch": 1.622537476308081, "grad_norm": 0.6257154941558838, "learning_rate": 0.0001, "loss": 1.6617, "step": 14125 }, { "epoch": 1.622652346217908, "grad_norm": 0.6580725908279419, "learning_rate": 0.0001, "loss": 1.1384, "step": 14126 }, { "epoch": 1.6227672161277353, "grad_norm": 0.575198233127594, "learning_rate": 0.0001, "loss": 1.3248, "step": 14127 }, { "epoch": 1.6228820860375626, "grad_norm": 0.591127872467041, "learning_rate": 0.0001, "loss": 1.3635, "step": 14128 }, { "epoch": 1.6229969559473896, "grad_norm": 0.6647173166275024, "learning_rate": 0.0001, "loss": 1.4805, "step": 14129 }, { "epoch": 1.6231118258572166, "grad_norm": 0.5722182393074036, "learning_rate": 0.0001, "loss": 1.4466, "step": 14130 }, { "epoch": 1.6232266957670438, "grad_norm": 0.5584431886672974, "learning_rate": 0.0001, "loss": 1.5135, "step": 14131 }, { "epoch": 1.623341565676871, "grad_norm": 0.6139621138572693, "learning_rate": 0.0001, "loss": 1.6196, "step": 14132 }, { "epoch": 1.623456435586698, "grad_norm": 0.6344627737998962, "learning_rate": 0.0001, "loss": 1.6444, "step": 14133 }, { "epoch": 1.623571305496525, "grad_norm": 0.6031376123428345, "learning_rate": 0.0001, "loss": 1.2777, "step": 14134 }, { "epoch": 1.6236861754063523, "grad_norm": 0.6405968070030212, "learning_rate": 0.0001, "loss": 1.5615, "step": 14135 }, { "epoch": 1.6238010453161795, "grad_norm": 0.7221595644950867, "learning_rate": 0.0001, "loss": 1.2492, "step": 14136 }, { "epoch": 1.6239159152260065, "grad_norm": 0.5714577436447144, "learning_rate": 0.0001, "loss": 1.5219, "step": 14137 }, { "epoch": 1.6240307851358335, "grad_norm": 0.607839047908783, "learning_rate": 0.0001, "loss": 1.5385, "step": 14138 }, { "epoch": 1.6241456550456608, "grad_norm": 0.6695128679275513, "learning_rate": 0.0001, "loss": 1.5084, "step": 14139 }, { "epoch": 1.624260524955488, "grad_norm": 0.6498796343803406, "learning_rate": 0.0001, "loss": 1.5016, "step": 14140 }, { "epoch": 1.624375394865315, "grad_norm": 0.582234799861908, "learning_rate": 0.0001, "loss": 1.2691, "step": 14141 }, { "epoch": 1.624490264775142, "grad_norm": 0.6602511405944824, "learning_rate": 0.0001, "loss": 1.445, "step": 14142 }, { "epoch": 1.6246051346849693, "grad_norm": 0.6686058640480042, "learning_rate": 0.0001, "loss": 1.4448, "step": 14143 }, { "epoch": 1.6247200045947965, "grad_norm": 0.65159010887146, "learning_rate": 0.0001, "loss": 1.428, "step": 14144 }, { "epoch": 1.6248348745046235, "grad_norm": 0.6343734860420227, "learning_rate": 0.0001, "loss": 1.565, "step": 14145 }, { "epoch": 1.6249497444144505, "grad_norm": 0.5925803184509277, "learning_rate": 0.0001, "loss": 1.4588, "step": 14146 }, { "epoch": 1.6250646143242777, "grad_norm": 0.593427300453186, "learning_rate": 0.0001, "loss": 1.3529, "step": 14147 }, { "epoch": 1.625179484234105, "grad_norm": 0.6615251302719116, "learning_rate": 0.0001, "loss": 1.466, "step": 14148 }, { "epoch": 1.625294354143932, "grad_norm": 0.6435301899909973, "learning_rate": 0.0001, "loss": 1.4284, "step": 14149 }, { "epoch": 1.625409224053759, "grad_norm": 0.5664091110229492, "learning_rate": 0.0001, "loss": 1.3037, "step": 14150 }, { "epoch": 1.6255240939635862, "grad_norm": 0.5707288980484009, "learning_rate": 0.0001, "loss": 1.4152, "step": 14151 }, { "epoch": 1.6256389638734134, "grad_norm": 0.6005744934082031, "learning_rate": 0.0001, "loss": 1.4547, "step": 14152 }, { "epoch": 1.6257538337832405, "grad_norm": 0.6113733649253845, "learning_rate": 0.0001, "loss": 1.4973, "step": 14153 }, { "epoch": 1.6258687036930675, "grad_norm": 0.6435218453407288, "learning_rate": 0.0001, "loss": 1.448, "step": 14154 }, { "epoch": 1.6259835736028947, "grad_norm": 0.6765085458755493, "learning_rate": 0.0001, "loss": 1.3975, "step": 14155 }, { "epoch": 1.626098443512722, "grad_norm": 0.6617031693458557, "learning_rate": 0.0001, "loss": 1.5578, "step": 14156 }, { "epoch": 1.626213313422549, "grad_norm": 0.6027497053146362, "learning_rate": 0.0001, "loss": 1.3812, "step": 14157 }, { "epoch": 1.626328183332376, "grad_norm": 0.6761353611946106, "learning_rate": 0.0001, "loss": 1.5668, "step": 14158 }, { "epoch": 1.6264430532422032, "grad_norm": 0.6267996430397034, "learning_rate": 0.0001, "loss": 1.5065, "step": 14159 }, { "epoch": 1.6265579231520304, "grad_norm": 0.6282928586006165, "learning_rate": 0.0001, "loss": 1.5412, "step": 14160 }, { "epoch": 1.6266727930618574, "grad_norm": 0.5661084651947021, "learning_rate": 0.0001, "loss": 1.4419, "step": 14161 }, { "epoch": 1.6267876629716844, "grad_norm": 0.6607252955436707, "learning_rate": 0.0001, "loss": 1.5683, "step": 14162 }, { "epoch": 1.6269025328815117, "grad_norm": 0.6275922060012817, "learning_rate": 0.0001, "loss": 1.2246, "step": 14163 }, { "epoch": 1.6270174027913389, "grad_norm": 0.5838649272918701, "learning_rate": 0.0001, "loss": 1.4886, "step": 14164 }, { "epoch": 1.627132272701166, "grad_norm": 0.627335786819458, "learning_rate": 0.0001, "loss": 1.3975, "step": 14165 }, { "epoch": 1.627247142610993, "grad_norm": 0.6400899291038513, "learning_rate": 0.0001, "loss": 1.3117, "step": 14166 }, { "epoch": 1.6273620125208201, "grad_norm": 0.6840837597846985, "learning_rate": 0.0001, "loss": 1.6182, "step": 14167 }, { "epoch": 1.6274768824306474, "grad_norm": 0.6731758713722229, "learning_rate": 0.0001, "loss": 1.3957, "step": 14168 }, { "epoch": 1.6275917523404744, "grad_norm": 0.6186261177062988, "learning_rate": 0.0001, "loss": 1.3614, "step": 14169 }, { "epoch": 1.6277066222503014, "grad_norm": 0.6093758344650269, "learning_rate": 0.0001, "loss": 1.4298, "step": 14170 }, { "epoch": 1.6278214921601286, "grad_norm": 0.8518862128257751, "learning_rate": 0.0001, "loss": 1.1226, "step": 14171 }, { "epoch": 1.6279363620699558, "grad_norm": 0.6340732574462891, "learning_rate": 0.0001, "loss": 1.55, "step": 14172 }, { "epoch": 1.6280512319797829, "grad_norm": 0.5993664264678955, "learning_rate": 0.0001, "loss": 1.4323, "step": 14173 }, { "epoch": 1.6281661018896099, "grad_norm": 0.6589725017547607, "learning_rate": 0.0001, "loss": 1.321, "step": 14174 }, { "epoch": 1.628280971799437, "grad_norm": 0.668163001537323, "learning_rate": 0.0001, "loss": 1.3883, "step": 14175 }, { "epoch": 1.6283958417092643, "grad_norm": 0.6377967000007629, "learning_rate": 0.0001, "loss": 1.3221, "step": 14176 }, { "epoch": 1.6285107116190913, "grad_norm": 0.7198302149772644, "learning_rate": 0.0001, "loss": 1.6935, "step": 14177 }, { "epoch": 1.6286255815289183, "grad_norm": 0.6226831078529358, "learning_rate": 0.0001, "loss": 1.533, "step": 14178 }, { "epoch": 1.6287404514387456, "grad_norm": 0.6308751106262207, "learning_rate": 0.0001, "loss": 1.3146, "step": 14179 }, { "epoch": 1.6288553213485728, "grad_norm": 0.6031596064567566, "learning_rate": 0.0001, "loss": 1.4562, "step": 14180 }, { "epoch": 1.6289701912583998, "grad_norm": 0.6148450374603271, "learning_rate": 0.0001, "loss": 1.5277, "step": 14181 }, { "epoch": 1.6290850611682268, "grad_norm": 0.6116688847541809, "learning_rate": 0.0001, "loss": 1.5069, "step": 14182 }, { "epoch": 1.629199931078054, "grad_norm": 0.594800591468811, "learning_rate": 0.0001, "loss": 1.5319, "step": 14183 }, { "epoch": 1.6293148009878813, "grad_norm": 0.5846831202507019, "learning_rate": 0.0001, "loss": 1.371, "step": 14184 }, { "epoch": 1.6294296708977083, "grad_norm": 0.5682091116905212, "learning_rate": 0.0001, "loss": 1.2561, "step": 14185 }, { "epoch": 1.6295445408075353, "grad_norm": 0.6536598801612854, "learning_rate": 0.0001, "loss": 1.5194, "step": 14186 }, { "epoch": 1.6296594107173625, "grad_norm": 0.6191934943199158, "learning_rate": 0.0001, "loss": 1.5155, "step": 14187 }, { "epoch": 1.6297742806271898, "grad_norm": 0.5823689699172974, "learning_rate": 0.0001, "loss": 1.4693, "step": 14188 }, { "epoch": 1.6298891505370168, "grad_norm": 0.6082186698913574, "learning_rate": 0.0001, "loss": 1.4571, "step": 14189 }, { "epoch": 1.630004020446844, "grad_norm": 0.6357173919677734, "learning_rate": 0.0001, "loss": 1.2842, "step": 14190 }, { "epoch": 1.630118890356671, "grad_norm": 0.5339798331260681, "learning_rate": 0.0001, "loss": 1.271, "step": 14191 }, { "epoch": 1.6302337602664982, "grad_norm": 0.6120511889457703, "learning_rate": 0.0001, "loss": 1.3902, "step": 14192 }, { "epoch": 1.6303486301763255, "grad_norm": 0.6843846440315247, "learning_rate": 0.0001, "loss": 1.4445, "step": 14193 }, { "epoch": 1.6304635000861525, "grad_norm": 0.5969381928443909, "learning_rate": 0.0001, "loss": 1.4672, "step": 14194 }, { "epoch": 1.6305783699959795, "grad_norm": 0.621590793132782, "learning_rate": 0.0001, "loss": 1.4124, "step": 14195 }, { "epoch": 1.6306932399058067, "grad_norm": 0.6120715737342834, "learning_rate": 0.0001, "loss": 1.5246, "step": 14196 }, { "epoch": 1.630808109815634, "grad_norm": 0.6380988955497742, "learning_rate": 0.0001, "loss": 1.5292, "step": 14197 }, { "epoch": 1.630922979725461, "grad_norm": 0.6133922338485718, "learning_rate": 0.0001, "loss": 1.4572, "step": 14198 }, { "epoch": 1.631037849635288, "grad_norm": 0.6428544521331787, "learning_rate": 0.0001, "loss": 1.3983, "step": 14199 }, { "epoch": 1.6311527195451152, "grad_norm": 0.5923104286193848, "learning_rate": 0.0001, "loss": 1.3451, "step": 14200 }, { "epoch": 1.6312675894549424, "grad_norm": 0.6585755348205566, "learning_rate": 0.0001, "loss": 1.4829, "step": 14201 }, { "epoch": 1.6313824593647694, "grad_norm": 0.5719717144966125, "learning_rate": 0.0001, "loss": 1.4134, "step": 14202 }, { "epoch": 1.6314973292745965, "grad_norm": 0.6237934231758118, "learning_rate": 0.0001, "loss": 1.6209, "step": 14203 }, { "epoch": 1.6316121991844237, "grad_norm": 0.6467723250389099, "learning_rate": 0.0001, "loss": 1.3684, "step": 14204 }, { "epoch": 1.631727069094251, "grad_norm": 0.7436313033103943, "learning_rate": 0.0001, "loss": 1.6965, "step": 14205 }, { "epoch": 1.631841939004078, "grad_norm": 0.6541860699653625, "learning_rate": 0.0001, "loss": 1.5145, "step": 14206 }, { "epoch": 1.631956808913905, "grad_norm": 0.6902755498886108, "learning_rate": 0.0001, "loss": 1.7396, "step": 14207 }, { "epoch": 1.6320716788237322, "grad_norm": 0.5739198923110962, "learning_rate": 0.0001, "loss": 1.4084, "step": 14208 }, { "epoch": 1.6321865487335594, "grad_norm": 0.6371223330497742, "learning_rate": 0.0001, "loss": 1.5507, "step": 14209 }, { "epoch": 1.6323014186433864, "grad_norm": 0.5753113031387329, "learning_rate": 0.0001, "loss": 1.3629, "step": 14210 }, { "epoch": 1.6324162885532134, "grad_norm": 0.5805362462997437, "learning_rate": 0.0001, "loss": 1.5456, "step": 14211 }, { "epoch": 1.6325311584630406, "grad_norm": 0.5889348983764648, "learning_rate": 0.0001, "loss": 1.331, "step": 14212 }, { "epoch": 1.6326460283728679, "grad_norm": 0.6165045499801636, "learning_rate": 0.0001, "loss": 1.5128, "step": 14213 }, { "epoch": 1.6327608982826949, "grad_norm": 0.6740553975105286, "learning_rate": 0.0001, "loss": 1.4898, "step": 14214 }, { "epoch": 1.632875768192522, "grad_norm": 0.6100121140480042, "learning_rate": 0.0001, "loss": 1.3378, "step": 14215 }, { "epoch": 1.6329906381023491, "grad_norm": 0.6665295958518982, "learning_rate": 0.0001, "loss": 1.5055, "step": 14216 }, { "epoch": 1.6331055080121764, "grad_norm": 0.574766218662262, "learning_rate": 0.0001, "loss": 1.4792, "step": 14217 }, { "epoch": 1.6332203779220034, "grad_norm": 0.6160637140274048, "learning_rate": 0.0001, "loss": 1.529, "step": 14218 }, { "epoch": 1.6333352478318304, "grad_norm": 0.6245967745780945, "learning_rate": 0.0001, "loss": 1.5056, "step": 14219 }, { "epoch": 1.6334501177416576, "grad_norm": 0.6003860235214233, "learning_rate": 0.0001, "loss": 1.4246, "step": 14220 }, { "epoch": 1.6335649876514848, "grad_norm": 0.5847166776657104, "learning_rate": 0.0001, "loss": 1.3012, "step": 14221 }, { "epoch": 1.6336798575613118, "grad_norm": 0.567745566368103, "learning_rate": 0.0001, "loss": 1.3486, "step": 14222 }, { "epoch": 1.6337947274711389, "grad_norm": 0.7059317231178284, "learning_rate": 0.0001, "loss": 1.4865, "step": 14223 }, { "epoch": 1.633909597380966, "grad_norm": 0.6010088920593262, "learning_rate": 0.0001, "loss": 1.4275, "step": 14224 }, { "epoch": 1.6340244672907933, "grad_norm": 0.7067935466766357, "learning_rate": 0.0001, "loss": 1.5533, "step": 14225 }, { "epoch": 1.6341393372006203, "grad_norm": 0.6423289775848389, "learning_rate": 0.0001, "loss": 1.4735, "step": 14226 }, { "epoch": 1.6342542071104473, "grad_norm": 0.6251084804534912, "learning_rate": 0.0001, "loss": 1.3356, "step": 14227 }, { "epoch": 1.6343690770202746, "grad_norm": 0.6532206535339355, "learning_rate": 0.0001, "loss": 1.3189, "step": 14228 }, { "epoch": 1.6344839469301018, "grad_norm": 0.5804827213287354, "learning_rate": 0.0001, "loss": 1.527, "step": 14229 }, { "epoch": 1.6345988168399288, "grad_norm": 0.6210793256759644, "learning_rate": 0.0001, "loss": 1.4906, "step": 14230 }, { "epoch": 1.6347136867497558, "grad_norm": 0.5873556137084961, "learning_rate": 0.0001, "loss": 1.2527, "step": 14231 }, { "epoch": 1.634828556659583, "grad_norm": 0.5766886472702026, "learning_rate": 0.0001, "loss": 1.4624, "step": 14232 }, { "epoch": 1.6349434265694103, "grad_norm": 0.6255853772163391, "learning_rate": 0.0001, "loss": 1.6646, "step": 14233 }, { "epoch": 1.6350582964792373, "grad_norm": 0.6151155829429626, "learning_rate": 0.0001, "loss": 1.4379, "step": 14234 }, { "epoch": 1.6351731663890643, "grad_norm": 0.602134644985199, "learning_rate": 0.0001, "loss": 1.4381, "step": 14235 }, { "epoch": 1.6352880362988915, "grad_norm": 0.5812894105911255, "learning_rate": 0.0001, "loss": 1.5664, "step": 14236 }, { "epoch": 1.6354029062087188, "grad_norm": 0.5972526669502258, "learning_rate": 0.0001, "loss": 1.5256, "step": 14237 }, { "epoch": 1.6355177761185458, "grad_norm": 0.6522207260131836, "learning_rate": 0.0001, "loss": 1.4439, "step": 14238 }, { "epoch": 1.6356326460283728, "grad_norm": 0.5848917961120605, "learning_rate": 0.0001, "loss": 1.114, "step": 14239 }, { "epoch": 1.6357475159382, "grad_norm": 0.6326489448547363, "learning_rate": 0.0001, "loss": 1.3317, "step": 14240 }, { "epoch": 1.6358623858480272, "grad_norm": 0.6178004741668701, "learning_rate": 0.0001, "loss": 1.3889, "step": 14241 }, { "epoch": 1.6359772557578542, "grad_norm": 0.6275560259819031, "learning_rate": 0.0001, "loss": 1.3722, "step": 14242 }, { "epoch": 1.6360921256676813, "grad_norm": 0.5886978507041931, "learning_rate": 0.0001, "loss": 1.505, "step": 14243 }, { "epoch": 1.6362069955775085, "grad_norm": 0.6700698137283325, "learning_rate": 0.0001, "loss": 1.5026, "step": 14244 }, { "epoch": 1.6363218654873357, "grad_norm": 0.6027423143386841, "learning_rate": 0.0001, "loss": 1.5032, "step": 14245 }, { "epoch": 1.6364367353971627, "grad_norm": 0.6072428226470947, "learning_rate": 0.0001, "loss": 1.4246, "step": 14246 }, { "epoch": 1.6365516053069897, "grad_norm": 0.6349037289619446, "learning_rate": 0.0001, "loss": 1.0018, "step": 14247 }, { "epoch": 1.636666475216817, "grad_norm": 0.6661435961723328, "learning_rate": 0.0001, "loss": 1.6446, "step": 14248 }, { "epoch": 1.6367813451266442, "grad_norm": 0.6801328063011169, "learning_rate": 0.0001, "loss": 1.5803, "step": 14249 }, { "epoch": 1.6368962150364712, "grad_norm": 0.6124873161315918, "learning_rate": 0.0001, "loss": 1.3097, "step": 14250 }, { "epoch": 1.6370110849462982, "grad_norm": 0.6170512437820435, "learning_rate": 0.0001, "loss": 1.3019, "step": 14251 }, { "epoch": 1.6371259548561254, "grad_norm": 0.6388773918151855, "learning_rate": 0.0001, "loss": 1.6134, "step": 14252 }, { "epoch": 1.6372408247659527, "grad_norm": 0.6021920442581177, "learning_rate": 0.0001, "loss": 1.483, "step": 14253 }, { "epoch": 1.6373556946757797, "grad_norm": 0.6165171265602112, "learning_rate": 0.0001, "loss": 1.5962, "step": 14254 }, { "epoch": 1.6374705645856067, "grad_norm": 0.5764616131782532, "learning_rate": 0.0001, "loss": 1.2656, "step": 14255 }, { "epoch": 1.637585434495434, "grad_norm": 0.6353535652160645, "learning_rate": 0.0001, "loss": 1.4185, "step": 14256 }, { "epoch": 1.6377003044052612, "grad_norm": 0.5799005627632141, "learning_rate": 0.0001, "loss": 1.6052, "step": 14257 }, { "epoch": 1.6378151743150882, "grad_norm": 0.6138670444488525, "learning_rate": 0.0001, "loss": 1.4453, "step": 14258 }, { "epoch": 1.6379300442249152, "grad_norm": 0.5995337963104248, "learning_rate": 0.0001, "loss": 1.622, "step": 14259 }, { "epoch": 1.6380449141347424, "grad_norm": 0.5708662271499634, "learning_rate": 0.0001, "loss": 1.3962, "step": 14260 }, { "epoch": 1.6381597840445696, "grad_norm": 0.6403148174285889, "learning_rate": 0.0001, "loss": 1.5728, "step": 14261 }, { "epoch": 1.6382746539543966, "grad_norm": 0.6167385578155518, "learning_rate": 0.0001, "loss": 1.4572, "step": 14262 }, { "epoch": 1.6383895238642237, "grad_norm": 0.5929616689682007, "learning_rate": 0.0001, "loss": 1.4688, "step": 14263 }, { "epoch": 1.6385043937740509, "grad_norm": 0.5963886380195618, "learning_rate": 0.0001, "loss": 1.304, "step": 14264 }, { "epoch": 1.6386192636838781, "grad_norm": 0.596179723739624, "learning_rate": 0.0001, "loss": 1.3922, "step": 14265 }, { "epoch": 1.6387341335937051, "grad_norm": 0.6750287413597107, "learning_rate": 0.0001, "loss": 1.4107, "step": 14266 }, { "epoch": 1.6388490035035321, "grad_norm": 0.6416113376617432, "learning_rate": 0.0001, "loss": 1.4031, "step": 14267 }, { "epoch": 1.6389638734133594, "grad_norm": 0.547660231590271, "learning_rate": 0.0001, "loss": 1.2633, "step": 14268 }, { "epoch": 1.6390787433231866, "grad_norm": 0.6302758455276489, "learning_rate": 0.0001, "loss": 1.526, "step": 14269 }, { "epoch": 1.6391936132330136, "grad_norm": 0.6041958928108215, "learning_rate": 0.0001, "loss": 1.5458, "step": 14270 }, { "epoch": 1.6393084831428406, "grad_norm": 0.5641599893569946, "learning_rate": 0.0001, "loss": 1.2687, "step": 14271 }, { "epoch": 1.6394233530526678, "grad_norm": 0.6017992496490479, "learning_rate": 0.0001, "loss": 1.4718, "step": 14272 }, { "epoch": 1.639538222962495, "grad_norm": 0.5696545243263245, "learning_rate": 0.0001, "loss": 1.5084, "step": 14273 }, { "epoch": 1.639653092872322, "grad_norm": 0.6044654250144958, "learning_rate": 0.0001, "loss": 1.4717, "step": 14274 }, { "epoch": 1.639767962782149, "grad_norm": 0.5639116168022156, "learning_rate": 0.0001, "loss": 1.4058, "step": 14275 }, { "epoch": 1.6398828326919763, "grad_norm": 0.6422322988510132, "learning_rate": 0.0001, "loss": 1.5171, "step": 14276 }, { "epoch": 1.6399977026018036, "grad_norm": 0.651578426361084, "learning_rate": 0.0001, "loss": 1.3275, "step": 14277 }, { "epoch": 1.6401125725116306, "grad_norm": 0.6262843012809753, "learning_rate": 0.0001, "loss": 1.4298, "step": 14278 }, { "epoch": 1.6402274424214576, "grad_norm": 0.6277267336845398, "learning_rate": 0.0001, "loss": 1.5122, "step": 14279 }, { "epoch": 1.6403423123312848, "grad_norm": 0.6524622440338135, "learning_rate": 0.0001, "loss": 1.5141, "step": 14280 }, { "epoch": 1.640457182241112, "grad_norm": 0.6294464468955994, "learning_rate": 0.0001, "loss": 1.5869, "step": 14281 }, { "epoch": 1.640572052150939, "grad_norm": 0.609076201915741, "learning_rate": 0.0001, "loss": 1.4457, "step": 14282 }, { "epoch": 1.640686922060766, "grad_norm": 0.5797666907310486, "learning_rate": 0.0001, "loss": 1.5167, "step": 14283 }, { "epoch": 1.6408017919705933, "grad_norm": 0.593920111656189, "learning_rate": 0.0001, "loss": 1.3679, "step": 14284 }, { "epoch": 1.6409166618804205, "grad_norm": 0.6230970025062561, "learning_rate": 0.0001, "loss": 1.4868, "step": 14285 }, { "epoch": 1.6410315317902475, "grad_norm": 0.6537438631057739, "learning_rate": 0.0001, "loss": 1.5031, "step": 14286 }, { "epoch": 1.6411464017000745, "grad_norm": 0.5811769962310791, "learning_rate": 0.0001, "loss": 1.3242, "step": 14287 }, { "epoch": 1.6412612716099018, "grad_norm": 0.6686524748802185, "learning_rate": 0.0001, "loss": 1.6498, "step": 14288 }, { "epoch": 1.641376141519729, "grad_norm": 0.6294060349464417, "learning_rate": 0.0001, "loss": 1.4349, "step": 14289 }, { "epoch": 1.641491011429556, "grad_norm": 0.6076821684837341, "learning_rate": 0.0001, "loss": 1.4389, "step": 14290 }, { "epoch": 1.641605881339383, "grad_norm": 0.6137123107910156, "learning_rate": 0.0001, "loss": 1.4509, "step": 14291 }, { "epoch": 1.6417207512492102, "grad_norm": 0.5927547812461853, "learning_rate": 0.0001, "loss": 1.3281, "step": 14292 }, { "epoch": 1.6418356211590375, "grad_norm": 0.6271234750747681, "learning_rate": 0.0001, "loss": 1.4946, "step": 14293 }, { "epoch": 1.6419504910688645, "grad_norm": 0.6225645542144775, "learning_rate": 0.0001, "loss": 1.6972, "step": 14294 }, { "epoch": 1.6420653609786915, "grad_norm": 0.6010716557502747, "learning_rate": 0.0001, "loss": 1.4282, "step": 14295 }, { "epoch": 1.6421802308885187, "grad_norm": 0.5875716805458069, "learning_rate": 0.0001, "loss": 1.4042, "step": 14296 }, { "epoch": 1.642295100798346, "grad_norm": 0.5978166460990906, "learning_rate": 0.0001, "loss": 1.3037, "step": 14297 }, { "epoch": 1.642409970708173, "grad_norm": 0.6238410472869873, "learning_rate": 0.0001, "loss": 1.5442, "step": 14298 }, { "epoch": 1.642524840618, "grad_norm": 0.6206856966018677, "learning_rate": 0.0001, "loss": 1.5749, "step": 14299 }, { "epoch": 1.6426397105278272, "grad_norm": 0.6737737655639648, "learning_rate": 0.0001, "loss": 1.7241, "step": 14300 }, { "epoch": 1.6427545804376544, "grad_norm": 0.6297151446342468, "learning_rate": 0.0001, "loss": 1.5897, "step": 14301 }, { "epoch": 1.6428694503474814, "grad_norm": 0.6496241092681885, "learning_rate": 0.0001, "loss": 1.5686, "step": 14302 }, { "epoch": 1.6429843202573085, "grad_norm": 0.6007269620895386, "learning_rate": 0.0001, "loss": 1.4899, "step": 14303 }, { "epoch": 1.6430991901671357, "grad_norm": 0.7573440670967102, "learning_rate": 0.0001, "loss": 1.4469, "step": 14304 }, { "epoch": 1.643214060076963, "grad_norm": 0.6531268954277039, "learning_rate": 0.0001, "loss": 1.44, "step": 14305 }, { "epoch": 1.64332892998679, "grad_norm": 0.6082087159156799, "learning_rate": 0.0001, "loss": 1.5434, "step": 14306 }, { "epoch": 1.643443799896617, "grad_norm": 0.641038179397583, "learning_rate": 0.0001, "loss": 1.4916, "step": 14307 }, { "epoch": 1.6435586698064442, "grad_norm": 0.6176828145980835, "learning_rate": 0.0001, "loss": 1.3881, "step": 14308 }, { "epoch": 1.6436735397162714, "grad_norm": 0.6139614582061768, "learning_rate": 0.0001, "loss": 1.4972, "step": 14309 }, { "epoch": 1.6437884096260984, "grad_norm": 0.6146692037582397, "learning_rate": 0.0001, "loss": 1.3981, "step": 14310 }, { "epoch": 1.6439032795359254, "grad_norm": 0.6403799057006836, "learning_rate": 0.0001, "loss": 1.5217, "step": 14311 }, { "epoch": 1.6440181494457526, "grad_norm": 0.6941229104995728, "learning_rate": 0.0001, "loss": 1.3688, "step": 14312 }, { "epoch": 1.6441330193555799, "grad_norm": 0.6253212094306946, "learning_rate": 0.0001, "loss": 1.4806, "step": 14313 }, { "epoch": 1.6442478892654069, "grad_norm": 0.6304717063903809, "learning_rate": 0.0001, "loss": 1.3255, "step": 14314 }, { "epoch": 1.644362759175234, "grad_norm": 0.6108677387237549, "learning_rate": 0.0001, "loss": 1.549, "step": 14315 }, { "epoch": 1.6444776290850611, "grad_norm": 0.7766926288604736, "learning_rate": 0.0001, "loss": 1.6879, "step": 14316 }, { "epoch": 1.6445924989948884, "grad_norm": 0.6032610535621643, "learning_rate": 0.0001, "loss": 1.3837, "step": 14317 }, { "epoch": 1.6447073689047154, "grad_norm": 0.6643081307411194, "learning_rate": 0.0001, "loss": 1.5128, "step": 14318 }, { "epoch": 1.6448222388145424, "grad_norm": 0.6397324800491333, "learning_rate": 0.0001, "loss": 1.3905, "step": 14319 }, { "epoch": 1.6449371087243696, "grad_norm": 0.5864359736442566, "learning_rate": 0.0001, "loss": 1.2971, "step": 14320 }, { "epoch": 1.6450519786341968, "grad_norm": 0.6553377509117126, "learning_rate": 0.0001, "loss": 1.3454, "step": 14321 }, { "epoch": 1.6451668485440238, "grad_norm": 0.7063130736351013, "learning_rate": 0.0001, "loss": 1.2324, "step": 14322 }, { "epoch": 1.6452817184538508, "grad_norm": 0.6475198864936829, "learning_rate": 0.0001, "loss": 1.4901, "step": 14323 }, { "epoch": 1.645396588363678, "grad_norm": 0.6414164304733276, "learning_rate": 0.0001, "loss": 1.4304, "step": 14324 }, { "epoch": 1.6455114582735053, "grad_norm": 0.6583581566810608, "learning_rate": 0.0001, "loss": 1.6196, "step": 14325 }, { "epoch": 1.6456263281833323, "grad_norm": 0.6059905290603638, "learning_rate": 0.0001, "loss": 1.5552, "step": 14326 }, { "epoch": 1.6457411980931596, "grad_norm": 0.6345920562744141, "learning_rate": 0.0001, "loss": 1.639, "step": 14327 }, { "epoch": 1.6458560680029866, "grad_norm": 0.620621919631958, "learning_rate": 0.0001, "loss": 1.5369, "step": 14328 }, { "epoch": 1.6459709379128138, "grad_norm": 0.6474280953407288, "learning_rate": 0.0001, "loss": 1.4068, "step": 14329 }, { "epoch": 1.646085807822641, "grad_norm": 0.5968835949897766, "learning_rate": 0.0001, "loss": 1.4271, "step": 14330 }, { "epoch": 1.646200677732468, "grad_norm": 0.6671695113182068, "learning_rate": 0.0001, "loss": 1.1918, "step": 14331 }, { "epoch": 1.646315547642295, "grad_norm": 0.622138500213623, "learning_rate": 0.0001, "loss": 1.2681, "step": 14332 }, { "epoch": 1.6464304175521223, "grad_norm": 0.587945818901062, "learning_rate": 0.0001, "loss": 1.391, "step": 14333 }, { "epoch": 1.6465452874619495, "grad_norm": 0.6441559791564941, "learning_rate": 0.0001, "loss": 1.5257, "step": 14334 }, { "epoch": 1.6466601573717765, "grad_norm": 0.6618092656135559, "learning_rate": 0.0001, "loss": 1.6387, "step": 14335 }, { "epoch": 1.6467750272816035, "grad_norm": 0.5723019242286682, "learning_rate": 0.0001, "loss": 1.5012, "step": 14336 }, { "epoch": 1.6468898971914308, "grad_norm": 0.6029953956604004, "learning_rate": 0.0001, "loss": 1.3561, "step": 14337 }, { "epoch": 1.647004767101258, "grad_norm": 0.646026074886322, "learning_rate": 0.0001, "loss": 1.5387, "step": 14338 }, { "epoch": 1.647119637011085, "grad_norm": 0.6190069913864136, "learning_rate": 0.0001, "loss": 1.5599, "step": 14339 }, { "epoch": 1.647234506920912, "grad_norm": 0.6896335482597351, "learning_rate": 0.0001, "loss": 1.6861, "step": 14340 }, { "epoch": 1.6473493768307392, "grad_norm": 0.611846387386322, "learning_rate": 0.0001, "loss": 1.4243, "step": 14341 }, { "epoch": 1.6474642467405665, "grad_norm": 0.5718992948532104, "learning_rate": 0.0001, "loss": 1.2889, "step": 14342 }, { "epoch": 1.6475791166503935, "grad_norm": 0.5699307918548584, "learning_rate": 0.0001, "loss": 1.4659, "step": 14343 }, { "epoch": 1.6476939865602205, "grad_norm": 0.5619608163833618, "learning_rate": 0.0001, "loss": 1.2261, "step": 14344 }, { "epoch": 1.6478088564700477, "grad_norm": 0.6159090399742126, "learning_rate": 0.0001, "loss": 1.5151, "step": 14345 }, { "epoch": 1.647923726379875, "grad_norm": 0.5962672233581543, "learning_rate": 0.0001, "loss": 1.1608, "step": 14346 }, { "epoch": 1.648038596289702, "grad_norm": 0.6491231918334961, "learning_rate": 0.0001, "loss": 1.4453, "step": 14347 }, { "epoch": 1.648153466199529, "grad_norm": 0.6284507513046265, "learning_rate": 0.0001, "loss": 1.5089, "step": 14348 }, { "epoch": 1.6482683361093562, "grad_norm": 0.5969189405441284, "learning_rate": 0.0001, "loss": 1.5054, "step": 14349 }, { "epoch": 1.6483832060191834, "grad_norm": 0.5988194346427917, "learning_rate": 0.0001, "loss": 1.352, "step": 14350 }, { "epoch": 1.6484980759290104, "grad_norm": 0.6822172999382019, "learning_rate": 0.0001, "loss": 1.5764, "step": 14351 }, { "epoch": 1.6486129458388374, "grad_norm": 0.6363539695739746, "learning_rate": 0.0001, "loss": 1.6048, "step": 14352 }, { "epoch": 1.6487278157486647, "grad_norm": 0.65143221616745, "learning_rate": 0.0001, "loss": 1.5366, "step": 14353 }, { "epoch": 1.648842685658492, "grad_norm": 0.6166749000549316, "learning_rate": 0.0001, "loss": 1.4744, "step": 14354 }, { "epoch": 1.648957555568319, "grad_norm": 0.6183647513389587, "learning_rate": 0.0001, "loss": 1.4902, "step": 14355 }, { "epoch": 1.649072425478146, "grad_norm": 0.5944738984107971, "learning_rate": 0.0001, "loss": 1.4701, "step": 14356 }, { "epoch": 1.6491872953879732, "grad_norm": 0.6394641995429993, "learning_rate": 0.0001, "loss": 1.3743, "step": 14357 }, { "epoch": 1.6493021652978004, "grad_norm": 0.6612902283668518, "learning_rate": 0.0001, "loss": 1.5309, "step": 14358 }, { "epoch": 1.6494170352076274, "grad_norm": 0.6034130454063416, "learning_rate": 0.0001, "loss": 1.564, "step": 14359 }, { "epoch": 1.6495319051174544, "grad_norm": 0.5944687128067017, "learning_rate": 0.0001, "loss": 1.4008, "step": 14360 }, { "epoch": 1.6496467750272816, "grad_norm": 0.6049661040306091, "learning_rate": 0.0001, "loss": 1.5983, "step": 14361 }, { "epoch": 1.6497616449371089, "grad_norm": 0.5754156112670898, "learning_rate": 0.0001, "loss": 1.4454, "step": 14362 }, { "epoch": 1.6498765148469359, "grad_norm": 0.6179141998291016, "learning_rate": 0.0001, "loss": 1.4861, "step": 14363 }, { "epoch": 1.6499913847567629, "grad_norm": 0.5943996906280518, "learning_rate": 0.0001, "loss": 1.4576, "step": 14364 }, { "epoch": 1.65010625466659, "grad_norm": 0.6303023099899292, "learning_rate": 0.0001, "loss": 1.4891, "step": 14365 }, { "epoch": 1.6502211245764173, "grad_norm": 0.5632081627845764, "learning_rate": 0.0001, "loss": 1.3568, "step": 14366 }, { "epoch": 1.6503359944862444, "grad_norm": 0.574306070804596, "learning_rate": 0.0001, "loss": 1.2801, "step": 14367 }, { "epoch": 1.6504508643960714, "grad_norm": 0.5834609270095825, "learning_rate": 0.0001, "loss": 1.3699, "step": 14368 }, { "epoch": 1.6505657343058986, "grad_norm": 0.5941557288169861, "learning_rate": 0.0001, "loss": 1.398, "step": 14369 }, { "epoch": 1.6506806042157258, "grad_norm": 0.6643593907356262, "learning_rate": 0.0001, "loss": 1.6187, "step": 14370 }, { "epoch": 1.6507954741255528, "grad_norm": 0.5751389861106873, "learning_rate": 0.0001, "loss": 1.4632, "step": 14371 }, { "epoch": 1.6509103440353798, "grad_norm": 0.6634930968284607, "learning_rate": 0.0001, "loss": 1.6016, "step": 14372 }, { "epoch": 1.651025213945207, "grad_norm": 0.6502936482429504, "learning_rate": 0.0001, "loss": 1.4685, "step": 14373 }, { "epoch": 1.6511400838550343, "grad_norm": 0.6059218049049377, "learning_rate": 0.0001, "loss": 1.4038, "step": 14374 }, { "epoch": 1.6512549537648613, "grad_norm": 0.6395809650421143, "learning_rate": 0.0001, "loss": 1.5501, "step": 14375 }, { "epoch": 1.6513698236746883, "grad_norm": 0.614824116230011, "learning_rate": 0.0001, "loss": 1.3675, "step": 14376 }, { "epoch": 1.6514846935845156, "grad_norm": 0.6158643364906311, "learning_rate": 0.0001, "loss": 1.6117, "step": 14377 }, { "epoch": 1.6515995634943428, "grad_norm": 0.5627233982086182, "learning_rate": 0.0001, "loss": 1.4827, "step": 14378 }, { "epoch": 1.6517144334041698, "grad_norm": 0.7553406953811646, "learning_rate": 0.0001, "loss": 1.4885, "step": 14379 }, { "epoch": 1.6518293033139968, "grad_norm": 0.5953923463821411, "learning_rate": 0.0001, "loss": 1.5845, "step": 14380 }, { "epoch": 1.651944173223824, "grad_norm": 0.6409009695053101, "learning_rate": 0.0001, "loss": 1.4745, "step": 14381 }, { "epoch": 1.6520590431336513, "grad_norm": 0.5870394110679626, "learning_rate": 0.0001, "loss": 1.4941, "step": 14382 }, { "epoch": 1.6521739130434783, "grad_norm": 0.6005561947822571, "learning_rate": 0.0001, "loss": 1.5428, "step": 14383 }, { "epoch": 1.6522887829533053, "grad_norm": 0.651766836643219, "learning_rate": 0.0001, "loss": 1.4715, "step": 14384 }, { "epoch": 1.6524036528631325, "grad_norm": 0.5785371661186218, "learning_rate": 0.0001, "loss": 1.5496, "step": 14385 }, { "epoch": 1.6525185227729597, "grad_norm": 0.6617969274520874, "learning_rate": 0.0001, "loss": 1.4035, "step": 14386 }, { "epoch": 1.6526333926827868, "grad_norm": 0.6037518382072449, "learning_rate": 0.0001, "loss": 1.5064, "step": 14387 }, { "epoch": 1.6527482625926138, "grad_norm": 0.6930570006370544, "learning_rate": 0.0001, "loss": 1.74, "step": 14388 }, { "epoch": 1.652863132502441, "grad_norm": 0.6605214476585388, "learning_rate": 0.0001, "loss": 1.4112, "step": 14389 }, { "epoch": 1.6529780024122682, "grad_norm": 0.6189714074134827, "learning_rate": 0.0001, "loss": 1.3354, "step": 14390 }, { "epoch": 1.6530928723220952, "grad_norm": 0.6060426235198975, "learning_rate": 0.0001, "loss": 1.5953, "step": 14391 }, { "epoch": 1.6532077422319222, "grad_norm": 0.5811141729354858, "learning_rate": 0.0001, "loss": 1.2768, "step": 14392 }, { "epoch": 1.6533226121417495, "grad_norm": 0.608869731426239, "learning_rate": 0.0001, "loss": 1.4701, "step": 14393 }, { "epoch": 1.6534374820515767, "grad_norm": 0.6065149307250977, "learning_rate": 0.0001, "loss": 1.4988, "step": 14394 }, { "epoch": 1.6535523519614037, "grad_norm": 0.7323920726776123, "learning_rate": 0.0001, "loss": 1.628, "step": 14395 }, { "epoch": 1.6536672218712307, "grad_norm": 0.604194700717926, "learning_rate": 0.0001, "loss": 1.4543, "step": 14396 }, { "epoch": 1.653782091781058, "grad_norm": 0.6321179866790771, "learning_rate": 0.0001, "loss": 1.4389, "step": 14397 }, { "epoch": 1.6538969616908852, "grad_norm": 0.6846322417259216, "learning_rate": 0.0001, "loss": 1.6427, "step": 14398 }, { "epoch": 1.6540118316007122, "grad_norm": 0.6531241536140442, "learning_rate": 0.0001, "loss": 1.5168, "step": 14399 }, { "epoch": 1.6541267015105392, "grad_norm": 0.5736318826675415, "learning_rate": 0.0001, "loss": 1.359, "step": 14400 }, { "epoch": 1.6542415714203664, "grad_norm": 0.5621281266212463, "learning_rate": 0.0001, "loss": 1.2091, "step": 14401 }, { "epoch": 1.6543564413301937, "grad_norm": 0.6186012029647827, "learning_rate": 0.0001, "loss": 1.3328, "step": 14402 }, { "epoch": 1.6544713112400207, "grad_norm": 0.6069703698158264, "learning_rate": 0.0001, "loss": 1.4517, "step": 14403 }, { "epoch": 1.6545861811498477, "grad_norm": 0.6218054890632629, "learning_rate": 0.0001, "loss": 1.5537, "step": 14404 }, { "epoch": 1.654701051059675, "grad_norm": 0.6225132346153259, "learning_rate": 0.0001, "loss": 1.4158, "step": 14405 }, { "epoch": 1.6548159209695021, "grad_norm": 0.6091734170913696, "learning_rate": 0.0001, "loss": 1.3794, "step": 14406 }, { "epoch": 1.6549307908793291, "grad_norm": 0.569509744644165, "learning_rate": 0.0001, "loss": 1.2694, "step": 14407 }, { "epoch": 1.6550456607891562, "grad_norm": 0.690952718257904, "learning_rate": 0.0001, "loss": 1.5541, "step": 14408 }, { "epoch": 1.6551605306989834, "grad_norm": 0.6232033371925354, "learning_rate": 0.0001, "loss": 1.3905, "step": 14409 }, { "epoch": 1.6552754006088106, "grad_norm": 0.6081985235214233, "learning_rate": 0.0001, "loss": 1.2826, "step": 14410 }, { "epoch": 1.6553902705186376, "grad_norm": 0.6122736930847168, "learning_rate": 0.0001, "loss": 1.5884, "step": 14411 }, { "epoch": 1.6555051404284646, "grad_norm": 0.5784592628479004, "learning_rate": 0.0001, "loss": 1.6078, "step": 14412 }, { "epoch": 1.6556200103382919, "grad_norm": 0.5675357580184937, "learning_rate": 0.0001, "loss": 1.4174, "step": 14413 }, { "epoch": 1.655734880248119, "grad_norm": 0.5335760116577148, "learning_rate": 0.0001, "loss": 1.1964, "step": 14414 }, { "epoch": 1.655849750157946, "grad_norm": 0.604422390460968, "learning_rate": 0.0001, "loss": 1.4652, "step": 14415 }, { "epoch": 1.6559646200677731, "grad_norm": 0.6323688626289368, "learning_rate": 0.0001, "loss": 1.4988, "step": 14416 }, { "epoch": 1.6560794899776003, "grad_norm": 0.5734161734580994, "learning_rate": 0.0001, "loss": 1.3093, "step": 14417 }, { "epoch": 1.6561943598874276, "grad_norm": 0.6848605275154114, "learning_rate": 0.0001, "loss": 1.546, "step": 14418 }, { "epoch": 1.6563092297972546, "grad_norm": 0.5923926830291748, "learning_rate": 0.0001, "loss": 1.3412, "step": 14419 }, { "epoch": 1.6564240997070816, "grad_norm": 0.7171801328659058, "learning_rate": 0.0001, "loss": 1.4567, "step": 14420 }, { "epoch": 1.6565389696169088, "grad_norm": 0.6611701846122742, "learning_rate": 0.0001, "loss": 1.4262, "step": 14421 }, { "epoch": 1.656653839526736, "grad_norm": 0.6520163416862488, "learning_rate": 0.0001, "loss": 1.495, "step": 14422 }, { "epoch": 1.656768709436563, "grad_norm": 0.6139830350875854, "learning_rate": 0.0001, "loss": 1.6852, "step": 14423 }, { "epoch": 1.65688357934639, "grad_norm": 0.5797949433326721, "learning_rate": 0.0001, "loss": 1.3619, "step": 14424 }, { "epoch": 1.6569984492562173, "grad_norm": 0.5873029828071594, "learning_rate": 0.0001, "loss": 1.3233, "step": 14425 }, { "epoch": 1.6571133191660445, "grad_norm": 0.6242226362228394, "learning_rate": 0.0001, "loss": 1.2352, "step": 14426 }, { "epoch": 1.6572281890758715, "grad_norm": 0.589346170425415, "learning_rate": 0.0001, "loss": 1.327, "step": 14427 }, { "epoch": 1.6573430589856986, "grad_norm": 0.6410361528396606, "learning_rate": 0.0001, "loss": 1.6419, "step": 14428 }, { "epoch": 1.6574579288955258, "grad_norm": 0.5922463536262512, "learning_rate": 0.0001, "loss": 1.5046, "step": 14429 }, { "epoch": 1.657572798805353, "grad_norm": 0.5775658488273621, "learning_rate": 0.0001, "loss": 1.2298, "step": 14430 }, { "epoch": 1.65768766871518, "grad_norm": 0.7526030540466309, "learning_rate": 0.0001, "loss": 1.5982, "step": 14431 }, { "epoch": 1.657802538625007, "grad_norm": 0.5999011993408203, "learning_rate": 0.0001, "loss": 1.3619, "step": 14432 }, { "epoch": 1.6579174085348343, "grad_norm": 0.6628460884094238, "learning_rate": 0.0001, "loss": 1.6252, "step": 14433 }, { "epoch": 1.6580322784446615, "grad_norm": 0.6481192708015442, "learning_rate": 0.0001, "loss": 1.4441, "step": 14434 }, { "epoch": 1.6581471483544885, "grad_norm": 0.6300088167190552, "learning_rate": 0.0001, "loss": 1.4529, "step": 14435 }, { "epoch": 1.6582620182643155, "grad_norm": 0.5989202260971069, "learning_rate": 0.0001, "loss": 1.2056, "step": 14436 }, { "epoch": 1.6583768881741427, "grad_norm": 0.7720966339111328, "learning_rate": 0.0001, "loss": 1.581, "step": 14437 }, { "epoch": 1.65849175808397, "grad_norm": 0.6756711602210999, "learning_rate": 0.0001, "loss": 1.4533, "step": 14438 }, { "epoch": 1.658606627993797, "grad_norm": 0.6091902852058411, "learning_rate": 0.0001, "loss": 1.4482, "step": 14439 }, { "epoch": 1.658721497903624, "grad_norm": 0.6059051156044006, "learning_rate": 0.0001, "loss": 1.4326, "step": 14440 }, { "epoch": 1.6588363678134512, "grad_norm": 0.6013321876525879, "learning_rate": 0.0001, "loss": 1.352, "step": 14441 }, { "epoch": 1.6589512377232785, "grad_norm": 0.5931786298751831, "learning_rate": 0.0001, "loss": 1.4597, "step": 14442 }, { "epoch": 1.6590661076331055, "grad_norm": 0.5912080407142639, "learning_rate": 0.0001, "loss": 1.4353, "step": 14443 }, { "epoch": 1.6591809775429325, "grad_norm": 0.614547073841095, "learning_rate": 0.0001, "loss": 1.4778, "step": 14444 }, { "epoch": 1.6592958474527597, "grad_norm": 0.6366251707077026, "learning_rate": 0.0001, "loss": 1.4466, "step": 14445 }, { "epoch": 1.659410717362587, "grad_norm": 0.5798001289367676, "learning_rate": 0.0001, "loss": 1.3829, "step": 14446 }, { "epoch": 1.659525587272414, "grad_norm": 0.5841000080108643, "learning_rate": 0.0001, "loss": 1.5427, "step": 14447 }, { "epoch": 1.659640457182241, "grad_norm": 0.6051506400108337, "learning_rate": 0.0001, "loss": 1.4419, "step": 14448 }, { "epoch": 1.6597553270920682, "grad_norm": 0.5879446864128113, "learning_rate": 0.0001, "loss": 1.4215, "step": 14449 }, { "epoch": 1.6598701970018954, "grad_norm": 0.661629855632782, "learning_rate": 0.0001, "loss": 1.4418, "step": 14450 }, { "epoch": 1.6599850669117224, "grad_norm": 0.6741747856140137, "learning_rate": 0.0001, "loss": 1.5166, "step": 14451 }, { "epoch": 1.6600999368215494, "grad_norm": 0.5925803184509277, "learning_rate": 0.0001, "loss": 1.3766, "step": 14452 }, { "epoch": 1.6602148067313767, "grad_norm": 0.6182143688201904, "learning_rate": 0.0001, "loss": 1.4005, "step": 14453 }, { "epoch": 1.660329676641204, "grad_norm": 0.6143870949745178, "learning_rate": 0.0001, "loss": 1.4139, "step": 14454 }, { "epoch": 1.660444546551031, "grad_norm": 0.6046828031539917, "learning_rate": 0.0001, "loss": 1.3055, "step": 14455 }, { "epoch": 1.660559416460858, "grad_norm": 0.767876148223877, "learning_rate": 0.0001, "loss": 1.5525, "step": 14456 }, { "epoch": 1.6606742863706851, "grad_norm": 0.6341820359230042, "learning_rate": 0.0001, "loss": 1.6184, "step": 14457 }, { "epoch": 1.6607891562805124, "grad_norm": 0.6907703876495361, "learning_rate": 0.0001, "loss": 1.3277, "step": 14458 }, { "epoch": 1.6609040261903394, "grad_norm": 0.6609397530555725, "learning_rate": 0.0001, "loss": 1.6431, "step": 14459 }, { "epoch": 1.6610188961001664, "grad_norm": 0.6220353245735168, "learning_rate": 0.0001, "loss": 1.4967, "step": 14460 }, { "epoch": 1.6611337660099936, "grad_norm": 0.5976139903068542, "learning_rate": 0.0001, "loss": 1.4258, "step": 14461 }, { "epoch": 1.6612486359198209, "grad_norm": 0.654104471206665, "learning_rate": 0.0001, "loss": 1.53, "step": 14462 }, { "epoch": 1.6613635058296479, "grad_norm": 0.6497766971588135, "learning_rate": 0.0001, "loss": 1.6418, "step": 14463 }, { "epoch": 1.661478375739475, "grad_norm": 0.5906299948692322, "learning_rate": 0.0001, "loss": 1.5512, "step": 14464 }, { "epoch": 1.661593245649302, "grad_norm": 0.6207028031349182, "learning_rate": 0.0001, "loss": 1.498, "step": 14465 }, { "epoch": 1.6617081155591293, "grad_norm": 0.6227574348449707, "learning_rate": 0.0001, "loss": 1.4097, "step": 14466 }, { "epoch": 1.6618229854689566, "grad_norm": 0.7923548817634583, "learning_rate": 0.0001, "loss": 1.0432, "step": 14467 }, { "epoch": 1.6619378553787836, "grad_norm": 0.6193511486053467, "learning_rate": 0.0001, "loss": 1.5811, "step": 14468 }, { "epoch": 1.6620527252886106, "grad_norm": 0.6481901407241821, "learning_rate": 0.0001, "loss": 1.2574, "step": 14469 }, { "epoch": 1.6621675951984378, "grad_norm": 0.7560790181159973, "learning_rate": 0.0001, "loss": 1.7806, "step": 14470 }, { "epoch": 1.662282465108265, "grad_norm": 0.6491854786872864, "learning_rate": 0.0001, "loss": 1.6269, "step": 14471 }, { "epoch": 1.662397335018092, "grad_norm": 0.6301493048667908, "learning_rate": 0.0001, "loss": 1.4367, "step": 14472 }, { "epoch": 1.662512204927919, "grad_norm": 0.6142545342445374, "learning_rate": 0.0001, "loss": 1.5011, "step": 14473 }, { "epoch": 1.6626270748377463, "grad_norm": 0.6091681718826294, "learning_rate": 0.0001, "loss": 1.4234, "step": 14474 }, { "epoch": 1.6627419447475735, "grad_norm": 0.5943846106529236, "learning_rate": 0.0001, "loss": 1.5198, "step": 14475 }, { "epoch": 1.6628568146574005, "grad_norm": 0.7121033668518066, "learning_rate": 0.0001, "loss": 1.4786, "step": 14476 }, { "epoch": 1.6629716845672275, "grad_norm": 0.575641930103302, "learning_rate": 0.0001, "loss": 1.283, "step": 14477 }, { "epoch": 1.6630865544770548, "grad_norm": 0.62906414270401, "learning_rate": 0.0001, "loss": 1.6762, "step": 14478 }, { "epoch": 1.663201424386882, "grad_norm": 0.5758494734764099, "learning_rate": 0.0001, "loss": 1.3509, "step": 14479 }, { "epoch": 1.663316294296709, "grad_norm": 0.581095278263092, "learning_rate": 0.0001, "loss": 1.376, "step": 14480 }, { "epoch": 1.663431164206536, "grad_norm": 0.6003084182739258, "learning_rate": 0.0001, "loss": 1.4864, "step": 14481 }, { "epoch": 1.6635460341163633, "grad_norm": 0.6477168202400208, "learning_rate": 0.0001, "loss": 1.3539, "step": 14482 }, { "epoch": 1.6636609040261905, "grad_norm": 0.6288648843765259, "learning_rate": 0.0001, "loss": 1.4663, "step": 14483 }, { "epoch": 1.6637757739360175, "grad_norm": 0.6509358882904053, "learning_rate": 0.0001, "loss": 1.3245, "step": 14484 }, { "epoch": 1.6638906438458445, "grad_norm": 0.6689729690551758, "learning_rate": 0.0001, "loss": 1.4742, "step": 14485 }, { "epoch": 1.6640055137556717, "grad_norm": 0.6662051677703857, "learning_rate": 0.0001, "loss": 1.4835, "step": 14486 }, { "epoch": 1.664120383665499, "grad_norm": 0.6444410681724548, "learning_rate": 0.0001, "loss": 1.5203, "step": 14487 }, { "epoch": 1.664235253575326, "grad_norm": 0.5951128602027893, "learning_rate": 0.0001, "loss": 1.4918, "step": 14488 }, { "epoch": 1.664350123485153, "grad_norm": 0.6148644089698792, "learning_rate": 0.0001, "loss": 1.2399, "step": 14489 }, { "epoch": 1.6644649933949802, "grad_norm": 0.6238715052604675, "learning_rate": 0.0001, "loss": 1.6374, "step": 14490 }, { "epoch": 1.6645798633048074, "grad_norm": 0.6118984818458557, "learning_rate": 0.0001, "loss": 1.5081, "step": 14491 }, { "epoch": 1.6646947332146345, "grad_norm": 0.6418251991271973, "learning_rate": 0.0001, "loss": 1.4167, "step": 14492 }, { "epoch": 1.6648096031244615, "grad_norm": 0.6296436786651611, "learning_rate": 0.0001, "loss": 1.6315, "step": 14493 }, { "epoch": 1.6649244730342887, "grad_norm": 0.6078484654426575, "learning_rate": 0.0001, "loss": 1.4571, "step": 14494 }, { "epoch": 1.665039342944116, "grad_norm": 0.5940951108932495, "learning_rate": 0.0001, "loss": 1.5002, "step": 14495 }, { "epoch": 1.665154212853943, "grad_norm": 0.6265494227409363, "learning_rate": 0.0001, "loss": 1.4058, "step": 14496 }, { "epoch": 1.66526908276377, "grad_norm": 0.551169753074646, "learning_rate": 0.0001, "loss": 1.4444, "step": 14497 }, { "epoch": 1.6653839526735972, "grad_norm": 0.6252605319023132, "learning_rate": 0.0001, "loss": 1.5168, "step": 14498 }, { "epoch": 1.6654988225834244, "grad_norm": 0.6075454950332642, "learning_rate": 0.0001, "loss": 1.4124, "step": 14499 }, { "epoch": 1.6656136924932514, "grad_norm": 0.6732686161994934, "learning_rate": 0.0001, "loss": 1.6495, "step": 14500 }, { "epoch": 1.6657285624030784, "grad_norm": 0.6578999161720276, "learning_rate": 0.0001, "loss": 1.535, "step": 14501 }, { "epoch": 1.6658434323129057, "grad_norm": 0.6686480045318604, "learning_rate": 0.0001, "loss": 1.5928, "step": 14502 }, { "epoch": 1.665958302222733, "grad_norm": 0.5849835276603699, "learning_rate": 0.0001, "loss": 1.3953, "step": 14503 }, { "epoch": 1.66607317213256, "grad_norm": 0.6391957402229309, "learning_rate": 0.0001, "loss": 1.5159, "step": 14504 }, { "epoch": 1.666188042042387, "grad_norm": 0.6156760454177856, "learning_rate": 0.0001, "loss": 1.2013, "step": 14505 }, { "epoch": 1.6663029119522141, "grad_norm": 0.627495527267456, "learning_rate": 0.0001, "loss": 1.7254, "step": 14506 }, { "epoch": 1.6664177818620414, "grad_norm": 0.6318630576133728, "learning_rate": 0.0001, "loss": 1.495, "step": 14507 }, { "epoch": 1.6665326517718684, "grad_norm": 0.6174932718276978, "learning_rate": 0.0001, "loss": 1.508, "step": 14508 }, { "epoch": 1.6666475216816954, "grad_norm": 0.6004678606987, "learning_rate": 0.0001, "loss": 1.3112, "step": 14509 }, { "epoch": 1.6667623915915226, "grad_norm": 0.5961777567863464, "learning_rate": 0.0001, "loss": 1.3316, "step": 14510 }, { "epoch": 1.6668772615013498, "grad_norm": 0.6679588556289673, "learning_rate": 0.0001, "loss": 1.4856, "step": 14511 }, { "epoch": 1.6669921314111769, "grad_norm": 0.6514487862586975, "learning_rate": 0.0001, "loss": 1.5705, "step": 14512 }, { "epoch": 1.6671070013210039, "grad_norm": 0.6601291298866272, "learning_rate": 0.0001, "loss": 1.4138, "step": 14513 }, { "epoch": 1.667221871230831, "grad_norm": 0.6563011407852173, "learning_rate": 0.0001, "loss": 1.5599, "step": 14514 }, { "epoch": 1.6673367411406583, "grad_norm": 0.6405079960823059, "learning_rate": 0.0001, "loss": 1.484, "step": 14515 }, { "epoch": 1.6674516110504853, "grad_norm": 0.6541486978530884, "learning_rate": 0.0001, "loss": 1.5507, "step": 14516 }, { "epoch": 1.6675664809603123, "grad_norm": 0.5867092609405518, "learning_rate": 0.0001, "loss": 1.4034, "step": 14517 }, { "epoch": 1.6676813508701396, "grad_norm": 0.5766440033912659, "learning_rate": 0.0001, "loss": 1.4268, "step": 14518 }, { "epoch": 1.6677962207799668, "grad_norm": 0.5653627514839172, "learning_rate": 0.0001, "loss": 1.3912, "step": 14519 }, { "epoch": 1.6679110906897938, "grad_norm": 0.6476209759712219, "learning_rate": 0.0001, "loss": 1.5981, "step": 14520 }, { "epoch": 1.6680259605996208, "grad_norm": 0.5878192186355591, "learning_rate": 0.0001, "loss": 1.3328, "step": 14521 }, { "epoch": 1.668140830509448, "grad_norm": 0.5736682415008545, "learning_rate": 0.0001, "loss": 1.3202, "step": 14522 }, { "epoch": 1.6682557004192753, "grad_norm": 0.6797529458999634, "learning_rate": 0.0001, "loss": 1.7396, "step": 14523 }, { "epoch": 1.6683705703291023, "grad_norm": 0.6123679876327515, "learning_rate": 0.0001, "loss": 1.4517, "step": 14524 }, { "epoch": 1.6684854402389293, "grad_norm": 0.567111074924469, "learning_rate": 0.0001, "loss": 1.4072, "step": 14525 }, { "epoch": 1.6686003101487565, "grad_norm": 0.5874302983283997, "learning_rate": 0.0001, "loss": 1.3295, "step": 14526 }, { "epoch": 1.6687151800585838, "grad_norm": 0.6208910346031189, "learning_rate": 0.0001, "loss": 1.5107, "step": 14527 }, { "epoch": 1.6688300499684108, "grad_norm": 0.5815864205360413, "learning_rate": 0.0001, "loss": 1.2403, "step": 14528 }, { "epoch": 1.6689449198782378, "grad_norm": 0.6493234634399414, "learning_rate": 0.0001, "loss": 1.4954, "step": 14529 }, { "epoch": 1.669059789788065, "grad_norm": 0.556931734085083, "learning_rate": 0.0001, "loss": 1.3694, "step": 14530 }, { "epoch": 1.6691746596978922, "grad_norm": 0.6887297034263611, "learning_rate": 0.0001, "loss": 1.6801, "step": 14531 }, { "epoch": 1.6692895296077193, "grad_norm": 0.688178300857544, "learning_rate": 0.0001, "loss": 1.5668, "step": 14532 }, { "epoch": 1.6694043995175463, "grad_norm": 0.6757493019104004, "learning_rate": 0.0001, "loss": 1.5948, "step": 14533 }, { "epoch": 1.6695192694273735, "grad_norm": 0.5804457068443298, "learning_rate": 0.0001, "loss": 1.3788, "step": 14534 }, { "epoch": 1.6696341393372007, "grad_norm": 0.5876379013061523, "learning_rate": 0.0001, "loss": 1.1737, "step": 14535 }, { "epoch": 1.6697490092470277, "grad_norm": 0.6323837637901306, "learning_rate": 0.0001, "loss": 1.3878, "step": 14536 }, { "epoch": 1.6698638791568547, "grad_norm": 0.6448050141334534, "learning_rate": 0.0001, "loss": 1.5295, "step": 14537 }, { "epoch": 1.669978749066682, "grad_norm": 0.6202848553657532, "learning_rate": 0.0001, "loss": 1.5633, "step": 14538 }, { "epoch": 1.6700936189765092, "grad_norm": 0.6006600856781006, "learning_rate": 0.0001, "loss": 1.5798, "step": 14539 }, { "epoch": 1.6702084888863362, "grad_norm": 0.6253841519355774, "learning_rate": 0.0001, "loss": 1.361, "step": 14540 }, { "epoch": 1.6703233587961632, "grad_norm": 0.6643701791763306, "learning_rate": 0.0001, "loss": 1.3992, "step": 14541 }, { "epoch": 1.6704382287059905, "grad_norm": 0.6138646602630615, "learning_rate": 0.0001, "loss": 1.5829, "step": 14542 }, { "epoch": 1.6705530986158177, "grad_norm": 0.614280641078949, "learning_rate": 0.0001, "loss": 1.4676, "step": 14543 }, { "epoch": 1.6706679685256447, "grad_norm": 0.5600206851959229, "learning_rate": 0.0001, "loss": 1.3489, "step": 14544 }, { "epoch": 1.6707828384354717, "grad_norm": 0.6660794615745544, "learning_rate": 0.0001, "loss": 1.6009, "step": 14545 }, { "epoch": 1.670897708345299, "grad_norm": 0.6051227450370789, "learning_rate": 0.0001, "loss": 1.4396, "step": 14546 }, { "epoch": 1.6710125782551262, "grad_norm": 0.5952056646347046, "learning_rate": 0.0001, "loss": 1.4834, "step": 14547 }, { "epoch": 1.6711274481649532, "grad_norm": 0.8179489374160767, "learning_rate": 0.0001, "loss": 1.7063, "step": 14548 }, { "epoch": 1.6712423180747802, "grad_norm": 0.5879932045936584, "learning_rate": 0.0001, "loss": 1.2387, "step": 14549 }, { "epoch": 1.6713571879846074, "grad_norm": 0.6535231471061707, "learning_rate": 0.0001, "loss": 1.6113, "step": 14550 }, { "epoch": 1.6714720578944346, "grad_norm": 0.6634051203727722, "learning_rate": 0.0001, "loss": 1.5402, "step": 14551 }, { "epoch": 1.6715869278042617, "grad_norm": 0.6484226584434509, "learning_rate": 0.0001, "loss": 1.4585, "step": 14552 }, { "epoch": 1.6717017977140887, "grad_norm": 0.6908429861068726, "learning_rate": 0.0001, "loss": 1.4969, "step": 14553 }, { "epoch": 1.671816667623916, "grad_norm": 0.6413170695304871, "learning_rate": 0.0001, "loss": 1.3641, "step": 14554 }, { "epoch": 1.6719315375337431, "grad_norm": 0.6127830147743225, "learning_rate": 0.0001, "loss": 1.4752, "step": 14555 }, { "epoch": 1.6720464074435701, "grad_norm": 0.6318410038948059, "learning_rate": 0.0001, "loss": 1.5818, "step": 14556 }, { "epoch": 1.6721612773533971, "grad_norm": 0.6313050389289856, "learning_rate": 0.0001, "loss": 1.4046, "step": 14557 }, { "epoch": 1.6722761472632244, "grad_norm": 0.6314618587493896, "learning_rate": 0.0001, "loss": 1.4672, "step": 14558 }, { "epoch": 1.6723910171730516, "grad_norm": 0.6210103034973145, "learning_rate": 0.0001, "loss": 1.2459, "step": 14559 }, { "epoch": 1.6725058870828786, "grad_norm": 0.6324970126152039, "learning_rate": 0.0001, "loss": 1.3796, "step": 14560 }, { "epoch": 1.6726207569927056, "grad_norm": 0.679050087928772, "learning_rate": 0.0001, "loss": 1.4594, "step": 14561 }, { "epoch": 1.6727356269025329, "grad_norm": 0.5676151514053345, "learning_rate": 0.0001, "loss": 1.4308, "step": 14562 }, { "epoch": 1.67285049681236, "grad_norm": 0.5721042156219482, "learning_rate": 0.0001, "loss": 1.3224, "step": 14563 }, { "epoch": 1.672965366722187, "grad_norm": 0.5606781840324402, "learning_rate": 0.0001, "loss": 1.5974, "step": 14564 }, { "epoch": 1.673080236632014, "grad_norm": 0.602632462978363, "learning_rate": 0.0001, "loss": 1.316, "step": 14565 }, { "epoch": 1.6731951065418413, "grad_norm": 0.6165114641189575, "learning_rate": 0.0001, "loss": 1.485, "step": 14566 }, { "epoch": 1.6733099764516686, "grad_norm": 0.6127558350563049, "learning_rate": 0.0001, "loss": 1.5381, "step": 14567 }, { "epoch": 1.6734248463614956, "grad_norm": 0.6075685024261475, "learning_rate": 0.0001, "loss": 1.3019, "step": 14568 }, { "epoch": 1.6735397162713226, "grad_norm": 0.6863022446632385, "learning_rate": 0.0001, "loss": 1.5015, "step": 14569 }, { "epoch": 1.6736545861811498, "grad_norm": 0.6382628679275513, "learning_rate": 0.0001, "loss": 1.5063, "step": 14570 }, { "epoch": 1.673769456090977, "grad_norm": 0.6291472911834717, "learning_rate": 0.0001, "loss": 1.5664, "step": 14571 }, { "epoch": 1.673884326000804, "grad_norm": 0.6682292222976685, "learning_rate": 0.0001, "loss": 1.5275, "step": 14572 }, { "epoch": 1.673999195910631, "grad_norm": 0.6253922581672668, "learning_rate": 0.0001, "loss": 1.4154, "step": 14573 }, { "epoch": 1.6741140658204583, "grad_norm": 0.6436946988105774, "learning_rate": 0.0001, "loss": 1.246, "step": 14574 }, { "epoch": 1.6742289357302855, "grad_norm": 0.6284177303314209, "learning_rate": 0.0001, "loss": 1.5651, "step": 14575 }, { "epoch": 1.6743438056401125, "grad_norm": 0.6123202443122864, "learning_rate": 0.0001, "loss": 1.4549, "step": 14576 }, { "epoch": 1.6744586755499395, "grad_norm": 0.635627269744873, "learning_rate": 0.0001, "loss": 1.4781, "step": 14577 }, { "epoch": 1.6745735454597668, "grad_norm": 0.637518048286438, "learning_rate": 0.0001, "loss": 1.5056, "step": 14578 }, { "epoch": 1.674688415369594, "grad_norm": 0.6325652003288269, "learning_rate": 0.0001, "loss": 1.3018, "step": 14579 }, { "epoch": 1.674803285279421, "grad_norm": 0.6435227394104004, "learning_rate": 0.0001, "loss": 1.4763, "step": 14580 }, { "epoch": 1.674918155189248, "grad_norm": 0.6192457675933838, "learning_rate": 0.0001, "loss": 1.3374, "step": 14581 }, { "epoch": 1.6750330250990753, "grad_norm": 0.5760471820831299, "learning_rate": 0.0001, "loss": 1.2543, "step": 14582 }, { "epoch": 1.6751478950089025, "grad_norm": 0.6090351343154907, "learning_rate": 0.0001, "loss": 1.245, "step": 14583 }, { "epoch": 1.6752627649187295, "grad_norm": 0.5881224274635315, "learning_rate": 0.0001, "loss": 1.4009, "step": 14584 }, { "epoch": 1.6753776348285565, "grad_norm": 0.7061769366264343, "learning_rate": 0.0001, "loss": 1.6304, "step": 14585 }, { "epoch": 1.6754925047383837, "grad_norm": 0.6568925380706787, "learning_rate": 0.0001, "loss": 1.4963, "step": 14586 }, { "epoch": 1.675607374648211, "grad_norm": 0.6131194233894348, "learning_rate": 0.0001, "loss": 1.3247, "step": 14587 }, { "epoch": 1.675722244558038, "grad_norm": 0.6096778512001038, "learning_rate": 0.0001, "loss": 1.1763, "step": 14588 }, { "epoch": 1.675837114467865, "grad_norm": 0.7438702583312988, "learning_rate": 0.0001, "loss": 1.4943, "step": 14589 }, { "epoch": 1.6759519843776922, "grad_norm": 0.6072378754615784, "learning_rate": 0.0001, "loss": 1.4269, "step": 14590 }, { "epoch": 1.6760668542875194, "grad_norm": 0.6651720404624939, "learning_rate": 0.0001, "loss": 1.3208, "step": 14591 }, { "epoch": 1.6761817241973465, "grad_norm": 0.7027106881141663, "learning_rate": 0.0001, "loss": 1.5718, "step": 14592 }, { "epoch": 1.6762965941071735, "grad_norm": 0.5823193788528442, "learning_rate": 0.0001, "loss": 1.5061, "step": 14593 }, { "epoch": 1.6764114640170007, "grad_norm": 0.5353499054908752, "learning_rate": 0.0001, "loss": 1.0777, "step": 14594 }, { "epoch": 1.676526333926828, "grad_norm": 0.7175213098526001, "learning_rate": 0.0001, "loss": 1.42, "step": 14595 }, { "epoch": 1.676641203836655, "grad_norm": 0.5985221862792969, "learning_rate": 0.0001, "loss": 1.4292, "step": 14596 }, { "epoch": 1.676756073746482, "grad_norm": 0.6454430818557739, "learning_rate": 0.0001, "loss": 1.4714, "step": 14597 }, { "epoch": 1.6768709436563092, "grad_norm": 0.6255630850791931, "learning_rate": 0.0001, "loss": 1.521, "step": 14598 }, { "epoch": 1.6769858135661364, "grad_norm": 0.6272461414337158, "learning_rate": 0.0001, "loss": 1.528, "step": 14599 }, { "epoch": 1.6771006834759634, "grad_norm": 0.6686280369758606, "learning_rate": 0.0001, "loss": 1.711, "step": 14600 }, { "epoch": 1.6772155533857906, "grad_norm": 0.5825721025466919, "learning_rate": 0.0001, "loss": 1.3257, "step": 14601 }, { "epoch": 1.6773304232956177, "grad_norm": 0.6497721672058105, "learning_rate": 0.0001, "loss": 1.6273, "step": 14602 }, { "epoch": 1.6774452932054449, "grad_norm": 0.6572535634040833, "learning_rate": 0.0001, "loss": 1.6098, "step": 14603 }, { "epoch": 1.6775601631152721, "grad_norm": 0.590975284576416, "learning_rate": 0.0001, "loss": 1.476, "step": 14604 }, { "epoch": 1.6776750330250991, "grad_norm": 0.6207257509231567, "learning_rate": 0.0001, "loss": 1.6519, "step": 14605 }, { "epoch": 1.6777899029349261, "grad_norm": 0.6565500497817993, "learning_rate": 0.0001, "loss": 1.6584, "step": 14606 }, { "epoch": 1.6779047728447534, "grad_norm": 0.6749123334884644, "learning_rate": 0.0001, "loss": 1.6032, "step": 14607 }, { "epoch": 1.6780196427545806, "grad_norm": 0.6280671954154968, "learning_rate": 0.0001, "loss": 1.5966, "step": 14608 }, { "epoch": 1.6781345126644076, "grad_norm": 0.6080400347709656, "learning_rate": 0.0001, "loss": 1.4813, "step": 14609 }, { "epoch": 1.6782493825742346, "grad_norm": 0.6583701968193054, "learning_rate": 0.0001, "loss": 1.5606, "step": 14610 }, { "epoch": 1.6783642524840618, "grad_norm": 0.7133932113647461, "learning_rate": 0.0001, "loss": 1.4113, "step": 14611 }, { "epoch": 1.678479122393889, "grad_norm": 0.647867739200592, "learning_rate": 0.0001, "loss": 1.6575, "step": 14612 }, { "epoch": 1.678593992303716, "grad_norm": 0.5785742402076721, "learning_rate": 0.0001, "loss": 1.2819, "step": 14613 }, { "epoch": 1.678708862213543, "grad_norm": 0.6787799596786499, "learning_rate": 0.0001, "loss": 1.6367, "step": 14614 }, { "epoch": 1.6788237321233703, "grad_norm": 0.581734299659729, "learning_rate": 0.0001, "loss": 1.2793, "step": 14615 }, { "epoch": 1.6789386020331976, "grad_norm": 0.5504398345947266, "learning_rate": 0.0001, "loss": 1.3114, "step": 14616 }, { "epoch": 1.6790534719430246, "grad_norm": 0.5877566337585449, "learning_rate": 0.0001, "loss": 1.3758, "step": 14617 }, { "epoch": 1.6791683418528516, "grad_norm": 0.7053650617599487, "learning_rate": 0.0001, "loss": 1.5527, "step": 14618 }, { "epoch": 1.6792832117626788, "grad_norm": 0.644305408000946, "learning_rate": 0.0001, "loss": 1.233, "step": 14619 }, { "epoch": 1.679398081672506, "grad_norm": 0.6391613483428955, "learning_rate": 0.0001, "loss": 1.3591, "step": 14620 }, { "epoch": 1.679512951582333, "grad_norm": 0.598298192024231, "learning_rate": 0.0001, "loss": 1.519, "step": 14621 }, { "epoch": 1.67962782149216, "grad_norm": 0.60166996717453, "learning_rate": 0.0001, "loss": 1.5308, "step": 14622 }, { "epoch": 1.6797426914019873, "grad_norm": 0.6578497290611267, "learning_rate": 0.0001, "loss": 1.3284, "step": 14623 }, { "epoch": 1.6798575613118145, "grad_norm": 0.6441363096237183, "learning_rate": 0.0001, "loss": 1.3718, "step": 14624 }, { "epoch": 1.6799724312216415, "grad_norm": 0.6314264535903931, "learning_rate": 0.0001, "loss": 1.5161, "step": 14625 }, { "epoch": 1.6800873011314685, "grad_norm": 0.6196305155754089, "learning_rate": 0.0001, "loss": 1.503, "step": 14626 }, { "epoch": 1.6802021710412958, "grad_norm": 0.6449001431465149, "learning_rate": 0.0001, "loss": 1.4048, "step": 14627 }, { "epoch": 1.680317040951123, "grad_norm": 0.6065531969070435, "learning_rate": 0.0001, "loss": 1.396, "step": 14628 }, { "epoch": 1.68043191086095, "grad_norm": 0.6479570269584656, "learning_rate": 0.0001, "loss": 1.6157, "step": 14629 }, { "epoch": 1.680546780770777, "grad_norm": 0.6452540159225464, "learning_rate": 0.0001, "loss": 1.4435, "step": 14630 }, { "epoch": 1.6806616506806042, "grad_norm": 0.6001893877983093, "learning_rate": 0.0001, "loss": 1.3286, "step": 14631 }, { "epoch": 1.6807765205904315, "grad_norm": 0.6244105696678162, "learning_rate": 0.0001, "loss": 1.6007, "step": 14632 }, { "epoch": 1.6808913905002585, "grad_norm": 0.6581660509109497, "learning_rate": 0.0001, "loss": 1.4778, "step": 14633 }, { "epoch": 1.6810062604100855, "grad_norm": 0.7391045093536377, "learning_rate": 0.0001, "loss": 1.5467, "step": 14634 }, { "epoch": 1.6811211303199127, "grad_norm": 0.6155508160591125, "learning_rate": 0.0001, "loss": 1.5132, "step": 14635 }, { "epoch": 1.68123600022974, "grad_norm": 0.673112154006958, "learning_rate": 0.0001, "loss": 1.6676, "step": 14636 }, { "epoch": 1.681350870139567, "grad_norm": 0.6411565542221069, "learning_rate": 0.0001, "loss": 1.4505, "step": 14637 }, { "epoch": 1.681465740049394, "grad_norm": 0.7229892015457153, "learning_rate": 0.0001, "loss": 1.4545, "step": 14638 }, { "epoch": 1.6815806099592212, "grad_norm": 0.5898306369781494, "learning_rate": 0.0001, "loss": 1.4702, "step": 14639 }, { "epoch": 1.6816954798690484, "grad_norm": 0.6058882474899292, "learning_rate": 0.0001, "loss": 1.5154, "step": 14640 }, { "epoch": 1.6818103497788754, "grad_norm": 0.7117508053779602, "learning_rate": 0.0001, "loss": 1.5957, "step": 14641 }, { "epoch": 1.6819252196887025, "grad_norm": 0.7228211164474487, "learning_rate": 0.0001, "loss": 1.3576, "step": 14642 }, { "epoch": 1.6820400895985297, "grad_norm": 0.5465759634971619, "learning_rate": 0.0001, "loss": 1.3254, "step": 14643 }, { "epoch": 1.682154959508357, "grad_norm": 0.7400708198547363, "learning_rate": 0.0001, "loss": 1.7752, "step": 14644 }, { "epoch": 1.682269829418184, "grad_norm": 0.5981911420822144, "learning_rate": 0.0001, "loss": 1.4287, "step": 14645 }, { "epoch": 1.682384699328011, "grad_norm": 0.6448339223861694, "learning_rate": 0.0001, "loss": 1.6, "step": 14646 }, { "epoch": 1.6824995692378382, "grad_norm": 0.560468316078186, "learning_rate": 0.0001, "loss": 1.453, "step": 14647 }, { "epoch": 1.6826144391476654, "grad_norm": 0.6334208250045776, "learning_rate": 0.0001, "loss": 1.3525, "step": 14648 }, { "epoch": 1.6827293090574924, "grad_norm": 0.6362658739089966, "learning_rate": 0.0001, "loss": 1.5981, "step": 14649 }, { "epoch": 1.6828441789673194, "grad_norm": 0.644481360912323, "learning_rate": 0.0001, "loss": 1.6839, "step": 14650 }, { "epoch": 1.6829590488771466, "grad_norm": 0.6140202879905701, "learning_rate": 0.0001, "loss": 1.5951, "step": 14651 }, { "epoch": 1.6830739187869739, "grad_norm": 0.6383975744247437, "learning_rate": 0.0001, "loss": 1.4495, "step": 14652 }, { "epoch": 1.6831887886968009, "grad_norm": 0.6611577868461609, "learning_rate": 0.0001, "loss": 1.5805, "step": 14653 }, { "epoch": 1.683303658606628, "grad_norm": 0.6007152199745178, "learning_rate": 0.0001, "loss": 1.3869, "step": 14654 }, { "epoch": 1.6834185285164551, "grad_norm": 0.6275546550750732, "learning_rate": 0.0001, "loss": 1.368, "step": 14655 }, { "epoch": 1.6835333984262824, "grad_norm": 0.649321973323822, "learning_rate": 0.0001, "loss": 1.5191, "step": 14656 }, { "epoch": 1.6836482683361094, "grad_norm": 0.5704925060272217, "learning_rate": 0.0001, "loss": 1.403, "step": 14657 }, { "epoch": 1.6837631382459364, "grad_norm": 0.6267727017402649, "learning_rate": 0.0001, "loss": 1.4628, "step": 14658 }, { "epoch": 1.6838780081557636, "grad_norm": 0.6294718980789185, "learning_rate": 0.0001, "loss": 1.3524, "step": 14659 }, { "epoch": 1.6839928780655908, "grad_norm": 0.583004891872406, "learning_rate": 0.0001, "loss": 1.4805, "step": 14660 }, { "epoch": 1.6841077479754178, "grad_norm": 0.616648256778717, "learning_rate": 0.0001, "loss": 1.4662, "step": 14661 }, { "epoch": 1.6842226178852449, "grad_norm": 0.6571115255355835, "learning_rate": 0.0001, "loss": 1.4202, "step": 14662 }, { "epoch": 1.684337487795072, "grad_norm": 0.7342918515205383, "learning_rate": 0.0001, "loss": 1.6703, "step": 14663 }, { "epoch": 1.6844523577048993, "grad_norm": 0.6430667042732239, "learning_rate": 0.0001, "loss": 1.6223, "step": 14664 }, { "epoch": 1.6845672276147263, "grad_norm": 0.6896945238113403, "learning_rate": 0.0001, "loss": 1.6645, "step": 14665 }, { "epoch": 1.6846820975245533, "grad_norm": 0.6307993531227112, "learning_rate": 0.0001, "loss": 1.2423, "step": 14666 }, { "epoch": 1.6847969674343806, "grad_norm": 0.6446820497512817, "learning_rate": 0.0001, "loss": 1.259, "step": 14667 }, { "epoch": 1.6849118373442078, "grad_norm": 0.6242921948432922, "learning_rate": 0.0001, "loss": 1.4609, "step": 14668 }, { "epoch": 1.6850267072540348, "grad_norm": 0.6997743248939514, "learning_rate": 0.0001, "loss": 1.6098, "step": 14669 }, { "epoch": 1.6851415771638618, "grad_norm": 0.6224357485771179, "learning_rate": 0.0001, "loss": 1.5104, "step": 14670 }, { "epoch": 1.685256447073689, "grad_norm": 0.5937080383300781, "learning_rate": 0.0001, "loss": 1.4058, "step": 14671 }, { "epoch": 1.6853713169835163, "grad_norm": 0.6098648309707642, "learning_rate": 0.0001, "loss": 1.4876, "step": 14672 }, { "epoch": 1.6854861868933433, "grad_norm": 0.6156720519065857, "learning_rate": 0.0001, "loss": 1.4901, "step": 14673 }, { "epoch": 1.6856010568031703, "grad_norm": 0.6202700734138489, "learning_rate": 0.0001, "loss": 1.4744, "step": 14674 }, { "epoch": 1.6857159267129975, "grad_norm": 0.6744952201843262, "learning_rate": 0.0001, "loss": 1.5085, "step": 14675 }, { "epoch": 1.6858307966228248, "grad_norm": 0.6229870319366455, "learning_rate": 0.0001, "loss": 1.4296, "step": 14676 }, { "epoch": 1.6859456665326518, "grad_norm": 0.6743441224098206, "learning_rate": 0.0001, "loss": 1.4914, "step": 14677 }, { "epoch": 1.6860605364424788, "grad_norm": 0.6453379988670349, "learning_rate": 0.0001, "loss": 1.589, "step": 14678 }, { "epoch": 1.686175406352306, "grad_norm": 0.6405113935470581, "learning_rate": 0.0001, "loss": 1.4714, "step": 14679 }, { "epoch": 1.6862902762621332, "grad_norm": 0.610621988773346, "learning_rate": 0.0001, "loss": 1.5101, "step": 14680 }, { "epoch": 1.6864051461719602, "grad_norm": 0.5977540016174316, "learning_rate": 0.0001, "loss": 1.4575, "step": 14681 }, { "epoch": 1.6865200160817873, "grad_norm": 0.6138947606086731, "learning_rate": 0.0001, "loss": 1.5132, "step": 14682 }, { "epoch": 1.6866348859916145, "grad_norm": 0.6411577463150024, "learning_rate": 0.0001, "loss": 1.5665, "step": 14683 }, { "epoch": 1.6867497559014417, "grad_norm": 0.6003040671348572, "learning_rate": 0.0001, "loss": 1.3768, "step": 14684 }, { "epoch": 1.6868646258112687, "grad_norm": 0.5844460129737854, "learning_rate": 0.0001, "loss": 1.351, "step": 14685 }, { "epoch": 1.6869794957210957, "grad_norm": 0.7202532887458801, "learning_rate": 0.0001, "loss": 1.6241, "step": 14686 }, { "epoch": 1.687094365630923, "grad_norm": 0.6601413488388062, "learning_rate": 0.0001, "loss": 1.793, "step": 14687 }, { "epoch": 1.6872092355407502, "grad_norm": 0.6533530354499817, "learning_rate": 0.0001, "loss": 1.4675, "step": 14688 }, { "epoch": 1.6873241054505772, "grad_norm": 0.5973615646362305, "learning_rate": 0.0001, "loss": 1.4479, "step": 14689 }, { "epoch": 1.6874389753604042, "grad_norm": 0.6223400831222534, "learning_rate": 0.0001, "loss": 1.5005, "step": 14690 }, { "epoch": 1.6875538452702314, "grad_norm": 0.7212362885475159, "learning_rate": 0.0001, "loss": 1.7079, "step": 14691 }, { "epoch": 1.6876687151800587, "grad_norm": 0.6673148274421692, "learning_rate": 0.0001, "loss": 1.428, "step": 14692 }, { "epoch": 1.6877835850898857, "grad_norm": 0.6568313241004944, "learning_rate": 0.0001, "loss": 1.3289, "step": 14693 }, { "epoch": 1.6878984549997127, "grad_norm": 0.6275414228439331, "learning_rate": 0.0001, "loss": 1.4829, "step": 14694 }, { "epoch": 1.68801332490954, "grad_norm": 0.6030680537223816, "learning_rate": 0.0001, "loss": 1.5531, "step": 14695 }, { "epoch": 1.6881281948193672, "grad_norm": 0.6563968658447266, "learning_rate": 0.0001, "loss": 1.3393, "step": 14696 }, { "epoch": 1.6882430647291942, "grad_norm": 0.683423638343811, "learning_rate": 0.0001, "loss": 1.5572, "step": 14697 }, { "epoch": 1.6883579346390212, "grad_norm": 0.6408438086509705, "learning_rate": 0.0001, "loss": 1.5372, "step": 14698 }, { "epoch": 1.6884728045488484, "grad_norm": 0.6201786398887634, "learning_rate": 0.0001, "loss": 1.3984, "step": 14699 }, { "epoch": 1.6885876744586756, "grad_norm": 0.6087800860404968, "learning_rate": 0.0001, "loss": 1.616, "step": 14700 }, { "epoch": 1.6887025443685026, "grad_norm": 0.6137244701385498, "learning_rate": 0.0001, "loss": 1.3528, "step": 14701 }, { "epoch": 1.6888174142783297, "grad_norm": 0.5812850594520569, "learning_rate": 0.0001, "loss": 1.3308, "step": 14702 }, { "epoch": 1.6889322841881569, "grad_norm": 0.6570180058479309, "learning_rate": 0.0001, "loss": 1.5878, "step": 14703 }, { "epoch": 1.6890471540979841, "grad_norm": 0.6571214199066162, "learning_rate": 0.0001, "loss": 1.3125, "step": 14704 }, { "epoch": 1.6891620240078111, "grad_norm": 0.6408611536026001, "learning_rate": 0.0001, "loss": 1.3928, "step": 14705 }, { "epoch": 1.6892768939176381, "grad_norm": 0.6647818088531494, "learning_rate": 0.0001, "loss": 1.4973, "step": 14706 }, { "epoch": 1.6893917638274654, "grad_norm": 0.6333121061325073, "learning_rate": 0.0001, "loss": 1.4169, "step": 14707 }, { "epoch": 1.6895066337372926, "grad_norm": 0.6847052574157715, "learning_rate": 0.0001, "loss": 1.383, "step": 14708 }, { "epoch": 1.6896215036471196, "grad_norm": 0.6509214639663696, "learning_rate": 0.0001, "loss": 1.5616, "step": 14709 }, { "epoch": 1.6897363735569466, "grad_norm": 0.6630228757858276, "learning_rate": 0.0001, "loss": 1.4381, "step": 14710 }, { "epoch": 1.6898512434667738, "grad_norm": 0.5848700404167175, "learning_rate": 0.0001, "loss": 1.3055, "step": 14711 }, { "epoch": 1.689966113376601, "grad_norm": 0.6231377720832825, "learning_rate": 0.0001, "loss": 1.5535, "step": 14712 }, { "epoch": 1.690080983286428, "grad_norm": 0.5822433233261108, "learning_rate": 0.0001, "loss": 1.5534, "step": 14713 }, { "epoch": 1.690195853196255, "grad_norm": 0.5917012095451355, "learning_rate": 0.0001, "loss": 1.4819, "step": 14714 }, { "epoch": 1.6903107231060823, "grad_norm": 0.5557729601860046, "learning_rate": 0.0001, "loss": 1.4386, "step": 14715 }, { "epoch": 1.6904255930159096, "grad_norm": 0.6072548627853394, "learning_rate": 0.0001, "loss": 1.6101, "step": 14716 }, { "epoch": 1.6905404629257366, "grad_norm": 0.6061117053031921, "learning_rate": 0.0001, "loss": 1.2462, "step": 14717 }, { "epoch": 1.6906553328355636, "grad_norm": 0.6177441477775574, "learning_rate": 0.0001, "loss": 1.5584, "step": 14718 }, { "epoch": 1.6907702027453908, "grad_norm": 0.6135041117668152, "learning_rate": 0.0001, "loss": 1.4055, "step": 14719 }, { "epoch": 1.690885072655218, "grad_norm": 0.5727534890174866, "learning_rate": 0.0001, "loss": 1.2838, "step": 14720 }, { "epoch": 1.690999942565045, "grad_norm": 0.6418645977973938, "learning_rate": 0.0001, "loss": 1.3512, "step": 14721 }, { "epoch": 1.691114812474872, "grad_norm": 0.6252636909484863, "learning_rate": 0.0001, "loss": 1.5915, "step": 14722 }, { "epoch": 1.6912296823846993, "grad_norm": 0.6545863151550293, "learning_rate": 0.0001, "loss": 1.5227, "step": 14723 }, { "epoch": 1.6913445522945265, "grad_norm": 0.6704011559486389, "learning_rate": 0.0001, "loss": 1.4523, "step": 14724 }, { "epoch": 1.6914594222043535, "grad_norm": 0.6669784188270569, "learning_rate": 0.0001, "loss": 1.6168, "step": 14725 }, { "epoch": 1.6915742921141805, "grad_norm": 0.5946208238601685, "learning_rate": 0.0001, "loss": 1.5255, "step": 14726 }, { "epoch": 1.6916891620240078, "grad_norm": 0.6300133466720581, "learning_rate": 0.0001, "loss": 1.6328, "step": 14727 }, { "epoch": 1.691804031933835, "grad_norm": 0.6053428053855896, "learning_rate": 0.0001, "loss": 1.6368, "step": 14728 }, { "epoch": 1.691918901843662, "grad_norm": 0.5880074501037598, "learning_rate": 0.0001, "loss": 1.3086, "step": 14729 }, { "epoch": 1.692033771753489, "grad_norm": 0.6411834955215454, "learning_rate": 0.0001, "loss": 1.3448, "step": 14730 }, { "epoch": 1.6921486416633162, "grad_norm": 0.6445175409317017, "learning_rate": 0.0001, "loss": 1.5241, "step": 14731 }, { "epoch": 1.6922635115731435, "grad_norm": 0.5757904052734375, "learning_rate": 0.0001, "loss": 1.3863, "step": 14732 }, { "epoch": 1.6923783814829705, "grad_norm": 0.6143217086791992, "learning_rate": 0.0001, "loss": 1.6193, "step": 14733 }, { "epoch": 1.6924932513927975, "grad_norm": 0.6285145282745361, "learning_rate": 0.0001, "loss": 1.4525, "step": 14734 }, { "epoch": 1.6926081213026247, "grad_norm": 0.6272004246711731, "learning_rate": 0.0001, "loss": 1.3839, "step": 14735 }, { "epoch": 1.692722991212452, "grad_norm": 0.6545289754867554, "learning_rate": 0.0001, "loss": 1.4322, "step": 14736 }, { "epoch": 1.692837861122279, "grad_norm": 0.6652606725692749, "learning_rate": 0.0001, "loss": 1.6324, "step": 14737 }, { "epoch": 1.6929527310321062, "grad_norm": 0.6375353932380676, "learning_rate": 0.0001, "loss": 1.4693, "step": 14738 }, { "epoch": 1.6930676009419332, "grad_norm": 0.6303648948669434, "learning_rate": 0.0001, "loss": 1.4073, "step": 14739 }, { "epoch": 1.6931824708517604, "grad_norm": 0.6370104551315308, "learning_rate": 0.0001, "loss": 1.2702, "step": 14740 }, { "epoch": 1.6932973407615877, "grad_norm": 0.723127007484436, "learning_rate": 0.0001, "loss": 1.4016, "step": 14741 }, { "epoch": 1.6934122106714147, "grad_norm": 0.628434419631958, "learning_rate": 0.0001, "loss": 1.3892, "step": 14742 }, { "epoch": 1.6935270805812417, "grad_norm": 0.5962461233139038, "learning_rate": 0.0001, "loss": 1.4348, "step": 14743 }, { "epoch": 1.693641950491069, "grad_norm": 0.6001591682434082, "learning_rate": 0.0001, "loss": 1.4057, "step": 14744 }, { "epoch": 1.6937568204008961, "grad_norm": 0.6299112439155579, "learning_rate": 0.0001, "loss": 1.5806, "step": 14745 }, { "epoch": 1.6938716903107232, "grad_norm": 0.6881070137023926, "learning_rate": 0.0001, "loss": 1.666, "step": 14746 }, { "epoch": 1.6939865602205502, "grad_norm": 0.5913798809051514, "learning_rate": 0.0001, "loss": 1.3889, "step": 14747 }, { "epoch": 1.6941014301303774, "grad_norm": 0.627502977848053, "learning_rate": 0.0001, "loss": 1.5266, "step": 14748 }, { "epoch": 1.6942163000402046, "grad_norm": 0.6564575433731079, "learning_rate": 0.0001, "loss": 1.4907, "step": 14749 }, { "epoch": 1.6943311699500316, "grad_norm": 0.5897772908210754, "learning_rate": 0.0001, "loss": 1.3894, "step": 14750 }, { "epoch": 1.6944460398598586, "grad_norm": 0.5913833975791931, "learning_rate": 0.0001, "loss": 1.2416, "step": 14751 }, { "epoch": 1.6945609097696859, "grad_norm": 0.658932626247406, "learning_rate": 0.0001, "loss": 1.4408, "step": 14752 }, { "epoch": 1.694675779679513, "grad_norm": 0.6177611351013184, "learning_rate": 0.0001, "loss": 1.4164, "step": 14753 }, { "epoch": 1.6947906495893401, "grad_norm": 0.6406357884407043, "learning_rate": 0.0001, "loss": 1.4451, "step": 14754 }, { "epoch": 1.6949055194991671, "grad_norm": 0.6174083948135376, "learning_rate": 0.0001, "loss": 1.4722, "step": 14755 }, { "epoch": 1.6950203894089944, "grad_norm": 0.653312087059021, "learning_rate": 0.0001, "loss": 1.4612, "step": 14756 }, { "epoch": 1.6951352593188216, "grad_norm": 0.6030119061470032, "learning_rate": 0.0001, "loss": 1.3403, "step": 14757 }, { "epoch": 1.6952501292286486, "grad_norm": 0.6403154730796814, "learning_rate": 0.0001, "loss": 1.5922, "step": 14758 }, { "epoch": 1.6953649991384756, "grad_norm": 0.6424520015716553, "learning_rate": 0.0001, "loss": 1.5404, "step": 14759 }, { "epoch": 1.6954798690483028, "grad_norm": 0.6321309804916382, "learning_rate": 0.0001, "loss": 1.3525, "step": 14760 }, { "epoch": 1.69559473895813, "grad_norm": 0.5908375382423401, "learning_rate": 0.0001, "loss": 1.4181, "step": 14761 }, { "epoch": 1.695709608867957, "grad_norm": 0.6151296496391296, "learning_rate": 0.0001, "loss": 1.4344, "step": 14762 }, { "epoch": 1.695824478777784, "grad_norm": 0.5596523880958557, "learning_rate": 0.0001, "loss": 1.2521, "step": 14763 }, { "epoch": 1.6959393486876113, "grad_norm": 0.6613996028900146, "learning_rate": 0.0001, "loss": 1.5143, "step": 14764 }, { "epoch": 1.6960542185974385, "grad_norm": 0.6622706055641174, "learning_rate": 0.0001, "loss": 1.6614, "step": 14765 }, { "epoch": 1.6961690885072656, "grad_norm": 0.5737489461898804, "learning_rate": 0.0001, "loss": 1.4419, "step": 14766 }, { "epoch": 1.6962839584170926, "grad_norm": 0.6686919927597046, "learning_rate": 0.0001, "loss": 1.5794, "step": 14767 }, { "epoch": 1.6963988283269198, "grad_norm": 0.6299922466278076, "learning_rate": 0.0001, "loss": 1.5279, "step": 14768 }, { "epoch": 1.696513698236747, "grad_norm": 0.7300532460212708, "learning_rate": 0.0001, "loss": 1.6348, "step": 14769 }, { "epoch": 1.696628568146574, "grad_norm": 0.5972233414649963, "learning_rate": 0.0001, "loss": 1.4763, "step": 14770 }, { "epoch": 1.696743438056401, "grad_norm": 0.6257323026657104, "learning_rate": 0.0001, "loss": 1.3211, "step": 14771 }, { "epoch": 1.6968583079662283, "grad_norm": 0.6275603175163269, "learning_rate": 0.0001, "loss": 1.5752, "step": 14772 }, { "epoch": 1.6969731778760555, "grad_norm": 0.5872368216514587, "learning_rate": 0.0001, "loss": 1.3845, "step": 14773 }, { "epoch": 1.6970880477858825, "grad_norm": 0.5436793565750122, "learning_rate": 0.0001, "loss": 1.1879, "step": 14774 }, { "epoch": 1.6972029176957095, "grad_norm": 0.5925673246383667, "learning_rate": 0.0001, "loss": 1.4837, "step": 14775 }, { "epoch": 1.6973177876055368, "grad_norm": 0.622356653213501, "learning_rate": 0.0001, "loss": 1.2646, "step": 14776 }, { "epoch": 1.697432657515364, "grad_norm": 0.644891083240509, "learning_rate": 0.0001, "loss": 1.4162, "step": 14777 }, { "epoch": 1.697547527425191, "grad_norm": 0.6052226424217224, "learning_rate": 0.0001, "loss": 1.3639, "step": 14778 }, { "epoch": 1.697662397335018, "grad_norm": 0.6266080737113953, "learning_rate": 0.0001, "loss": 1.3751, "step": 14779 }, { "epoch": 1.6977772672448452, "grad_norm": 0.6198167204856873, "learning_rate": 0.0001, "loss": 1.5003, "step": 14780 }, { "epoch": 1.6978921371546725, "grad_norm": 0.6346505284309387, "learning_rate": 0.0001, "loss": 1.4329, "step": 14781 }, { "epoch": 1.6980070070644995, "grad_norm": 0.6443406939506531, "learning_rate": 0.0001, "loss": 1.3899, "step": 14782 }, { "epoch": 1.6981218769743265, "grad_norm": 0.6747581958770752, "learning_rate": 0.0001, "loss": 1.4359, "step": 14783 }, { "epoch": 1.6982367468841537, "grad_norm": 0.6033055186271667, "learning_rate": 0.0001, "loss": 1.5487, "step": 14784 }, { "epoch": 1.698351616793981, "grad_norm": 0.6114761829376221, "learning_rate": 0.0001, "loss": 1.4088, "step": 14785 }, { "epoch": 1.698466486703808, "grad_norm": 0.5994529724121094, "learning_rate": 0.0001, "loss": 1.4139, "step": 14786 }, { "epoch": 1.698581356613635, "grad_norm": 0.6032854318618774, "learning_rate": 0.0001, "loss": 1.3684, "step": 14787 }, { "epoch": 1.6986962265234622, "grad_norm": 0.579774796962738, "learning_rate": 0.0001, "loss": 1.6101, "step": 14788 }, { "epoch": 1.6988110964332894, "grad_norm": 0.600327730178833, "learning_rate": 0.0001, "loss": 1.3107, "step": 14789 }, { "epoch": 1.6989259663431164, "grad_norm": 0.6498262882232666, "learning_rate": 0.0001, "loss": 1.5339, "step": 14790 }, { "epoch": 1.6990408362529434, "grad_norm": 0.6453105211257935, "learning_rate": 0.0001, "loss": 1.534, "step": 14791 }, { "epoch": 1.6991557061627707, "grad_norm": 0.6776260137557983, "learning_rate": 0.0001, "loss": 1.4309, "step": 14792 }, { "epoch": 1.699270576072598, "grad_norm": 0.5969988107681274, "learning_rate": 0.0001, "loss": 1.4604, "step": 14793 }, { "epoch": 1.699385445982425, "grad_norm": 0.585665762424469, "learning_rate": 0.0001, "loss": 1.3594, "step": 14794 }, { "epoch": 1.699500315892252, "grad_norm": 0.6432570815086365, "learning_rate": 0.0001, "loss": 1.5177, "step": 14795 }, { "epoch": 1.6996151858020792, "grad_norm": 0.6451871395111084, "learning_rate": 0.0001, "loss": 1.3841, "step": 14796 }, { "epoch": 1.6997300557119064, "grad_norm": 0.6202559471130371, "learning_rate": 0.0001, "loss": 1.5245, "step": 14797 }, { "epoch": 1.6998449256217334, "grad_norm": 0.6412597894668579, "learning_rate": 0.0001, "loss": 1.4781, "step": 14798 }, { "epoch": 1.6999597955315604, "grad_norm": 0.6816568374633789, "learning_rate": 0.0001, "loss": 1.4192, "step": 14799 }, { "epoch": 1.7000746654413876, "grad_norm": 0.6473395824432373, "learning_rate": 0.0001, "loss": 1.5152, "step": 14800 }, { "epoch": 1.7001895353512149, "grad_norm": 0.6455778479576111, "learning_rate": 0.0001, "loss": 1.2928, "step": 14801 }, { "epoch": 1.7003044052610419, "grad_norm": 0.5928786396980286, "learning_rate": 0.0001, "loss": 1.2802, "step": 14802 }, { "epoch": 1.7004192751708689, "grad_norm": 0.6279940605163574, "learning_rate": 0.0001, "loss": 1.2901, "step": 14803 }, { "epoch": 1.7005341450806961, "grad_norm": 0.6435730457305908, "learning_rate": 0.0001, "loss": 1.4583, "step": 14804 }, { "epoch": 1.7006490149905233, "grad_norm": 0.6767624616622925, "learning_rate": 0.0001, "loss": 1.5811, "step": 14805 }, { "epoch": 1.7007638849003504, "grad_norm": 0.6514124870300293, "learning_rate": 0.0001, "loss": 1.589, "step": 14806 }, { "epoch": 1.7008787548101774, "grad_norm": 0.6798242926597595, "learning_rate": 0.0001, "loss": 1.5002, "step": 14807 }, { "epoch": 1.7009936247200046, "grad_norm": 0.6122627258300781, "learning_rate": 0.0001, "loss": 1.6312, "step": 14808 }, { "epoch": 1.7011084946298318, "grad_norm": 0.6929191946983337, "learning_rate": 0.0001, "loss": 1.4633, "step": 14809 }, { "epoch": 1.7012233645396588, "grad_norm": 0.6241300702095032, "learning_rate": 0.0001, "loss": 1.6087, "step": 14810 }, { "epoch": 1.7013382344494858, "grad_norm": 0.7182061076164246, "learning_rate": 0.0001, "loss": 1.5046, "step": 14811 }, { "epoch": 1.701453104359313, "grad_norm": 0.6124372482299805, "learning_rate": 0.0001, "loss": 1.4548, "step": 14812 }, { "epoch": 1.7015679742691403, "grad_norm": 0.766761064529419, "learning_rate": 0.0001, "loss": 1.4963, "step": 14813 }, { "epoch": 1.7016828441789673, "grad_norm": 0.664625346660614, "learning_rate": 0.0001, "loss": 1.2332, "step": 14814 }, { "epoch": 1.7017977140887943, "grad_norm": 0.641326367855072, "learning_rate": 0.0001, "loss": 1.4563, "step": 14815 }, { "epoch": 1.7019125839986216, "grad_norm": 0.6896613240242004, "learning_rate": 0.0001, "loss": 1.4501, "step": 14816 }, { "epoch": 1.7020274539084488, "grad_norm": 0.6627568602561951, "learning_rate": 0.0001, "loss": 1.7495, "step": 14817 }, { "epoch": 1.7021423238182758, "grad_norm": 0.6433359384536743, "learning_rate": 0.0001, "loss": 1.5985, "step": 14818 }, { "epoch": 1.7022571937281028, "grad_norm": 0.5926421880722046, "learning_rate": 0.0001, "loss": 1.419, "step": 14819 }, { "epoch": 1.70237206363793, "grad_norm": 0.6341785192489624, "learning_rate": 0.0001, "loss": 1.3109, "step": 14820 }, { "epoch": 1.7024869335477573, "grad_norm": 0.5665348768234253, "learning_rate": 0.0001, "loss": 1.2676, "step": 14821 }, { "epoch": 1.7026018034575843, "grad_norm": 0.6543471217155457, "learning_rate": 0.0001, "loss": 1.6028, "step": 14822 }, { "epoch": 1.7027166733674113, "grad_norm": 0.6324287056922913, "learning_rate": 0.0001, "loss": 1.5246, "step": 14823 }, { "epoch": 1.7028315432772385, "grad_norm": 0.6068698167800903, "learning_rate": 0.0001, "loss": 1.3328, "step": 14824 }, { "epoch": 1.7029464131870657, "grad_norm": 0.6536720991134644, "learning_rate": 0.0001, "loss": 1.422, "step": 14825 }, { "epoch": 1.7030612830968928, "grad_norm": 0.681016206741333, "learning_rate": 0.0001, "loss": 1.5969, "step": 14826 }, { "epoch": 1.7031761530067198, "grad_norm": 0.6520375609397888, "learning_rate": 0.0001, "loss": 1.565, "step": 14827 }, { "epoch": 1.703291022916547, "grad_norm": 0.6231850385665894, "learning_rate": 0.0001, "loss": 1.2909, "step": 14828 }, { "epoch": 1.7034058928263742, "grad_norm": 0.6289814114570618, "learning_rate": 0.0001, "loss": 1.5762, "step": 14829 }, { "epoch": 1.7035207627362012, "grad_norm": 0.687447726726532, "learning_rate": 0.0001, "loss": 1.2311, "step": 14830 }, { "epoch": 1.7036356326460282, "grad_norm": 0.6084256172180176, "learning_rate": 0.0001, "loss": 1.512, "step": 14831 }, { "epoch": 1.7037505025558555, "grad_norm": 0.6083263754844666, "learning_rate": 0.0001, "loss": 1.4937, "step": 14832 }, { "epoch": 1.7038653724656827, "grad_norm": 0.612534761428833, "learning_rate": 0.0001, "loss": 1.5857, "step": 14833 }, { "epoch": 1.7039802423755097, "grad_norm": 0.6613317728042603, "learning_rate": 0.0001, "loss": 1.6517, "step": 14834 }, { "epoch": 1.7040951122853367, "grad_norm": 0.6363469958305359, "learning_rate": 0.0001, "loss": 1.5, "step": 14835 }, { "epoch": 1.704209982195164, "grad_norm": 0.6956435441970825, "learning_rate": 0.0001, "loss": 1.6757, "step": 14836 }, { "epoch": 1.7043248521049912, "grad_norm": 0.649994969367981, "learning_rate": 0.0001, "loss": 1.6309, "step": 14837 }, { "epoch": 1.7044397220148182, "grad_norm": 0.6042583584785461, "learning_rate": 0.0001, "loss": 1.3096, "step": 14838 }, { "epoch": 1.7045545919246452, "grad_norm": 0.6033551692962646, "learning_rate": 0.0001, "loss": 1.4756, "step": 14839 }, { "epoch": 1.7046694618344724, "grad_norm": 0.6003922820091248, "learning_rate": 0.0001, "loss": 1.5267, "step": 14840 }, { "epoch": 1.7047843317442997, "grad_norm": 0.6671146154403687, "learning_rate": 0.0001, "loss": 1.6061, "step": 14841 }, { "epoch": 1.7048992016541267, "grad_norm": 0.5957947373390198, "learning_rate": 0.0001, "loss": 1.3582, "step": 14842 }, { "epoch": 1.7050140715639537, "grad_norm": 0.6179354786872864, "learning_rate": 0.0001, "loss": 1.5215, "step": 14843 }, { "epoch": 1.705128941473781, "grad_norm": 0.6118759512901306, "learning_rate": 0.0001, "loss": 1.3776, "step": 14844 }, { "epoch": 1.7052438113836081, "grad_norm": 0.5971879959106445, "learning_rate": 0.0001, "loss": 1.4897, "step": 14845 }, { "epoch": 1.7053586812934352, "grad_norm": 0.5953097343444824, "learning_rate": 0.0001, "loss": 1.4166, "step": 14846 }, { "epoch": 1.7054735512032622, "grad_norm": 0.6157338619232178, "learning_rate": 0.0001, "loss": 1.4949, "step": 14847 }, { "epoch": 1.7055884211130894, "grad_norm": 0.6230495572090149, "learning_rate": 0.0001, "loss": 1.4628, "step": 14848 }, { "epoch": 1.7057032910229166, "grad_norm": 0.6791070699691772, "learning_rate": 0.0001, "loss": 1.5388, "step": 14849 }, { "epoch": 1.7058181609327436, "grad_norm": 0.5627061128616333, "learning_rate": 0.0001, "loss": 1.2639, "step": 14850 }, { "epoch": 1.7059330308425706, "grad_norm": 0.6061202883720398, "learning_rate": 0.0001, "loss": 1.34, "step": 14851 }, { "epoch": 1.7060479007523979, "grad_norm": 0.6992841362953186, "learning_rate": 0.0001, "loss": 1.6581, "step": 14852 }, { "epoch": 1.706162770662225, "grad_norm": 0.6365556120872498, "learning_rate": 0.0001, "loss": 1.632, "step": 14853 }, { "epoch": 1.7062776405720521, "grad_norm": 0.6153625845909119, "learning_rate": 0.0001, "loss": 1.2521, "step": 14854 }, { "epoch": 1.7063925104818791, "grad_norm": 0.6573269963264465, "learning_rate": 0.0001, "loss": 1.5091, "step": 14855 }, { "epoch": 1.7065073803917064, "grad_norm": 0.6061968207359314, "learning_rate": 0.0001, "loss": 1.4127, "step": 14856 }, { "epoch": 1.7066222503015336, "grad_norm": 0.6206647753715515, "learning_rate": 0.0001, "loss": 1.3738, "step": 14857 }, { "epoch": 1.7067371202113606, "grad_norm": 0.5990536212921143, "learning_rate": 0.0001, "loss": 1.5828, "step": 14858 }, { "epoch": 1.7068519901211876, "grad_norm": 0.6412786245346069, "learning_rate": 0.0001, "loss": 1.6806, "step": 14859 }, { "epoch": 1.7069668600310148, "grad_norm": 0.6077423691749573, "learning_rate": 0.0001, "loss": 1.4176, "step": 14860 }, { "epoch": 1.707081729940842, "grad_norm": 0.6050223708152771, "learning_rate": 0.0001, "loss": 1.3676, "step": 14861 }, { "epoch": 1.707196599850669, "grad_norm": 0.6094151735305786, "learning_rate": 0.0001, "loss": 1.4076, "step": 14862 }, { "epoch": 1.707311469760496, "grad_norm": 0.6265443563461304, "learning_rate": 0.0001, "loss": 1.6305, "step": 14863 }, { "epoch": 1.7074263396703233, "grad_norm": 0.6976631283760071, "learning_rate": 0.0001, "loss": 1.3129, "step": 14864 }, { "epoch": 1.7075412095801505, "grad_norm": 0.6556214094161987, "learning_rate": 0.0001, "loss": 1.5914, "step": 14865 }, { "epoch": 1.7076560794899776, "grad_norm": 0.5818542838096619, "learning_rate": 0.0001, "loss": 1.3679, "step": 14866 }, { "epoch": 1.7077709493998046, "grad_norm": 0.5856722593307495, "learning_rate": 0.0001, "loss": 1.3271, "step": 14867 }, { "epoch": 1.7078858193096318, "grad_norm": 0.5733777284622192, "learning_rate": 0.0001, "loss": 1.3968, "step": 14868 }, { "epoch": 1.708000689219459, "grad_norm": 0.6631439924240112, "learning_rate": 0.0001, "loss": 1.5482, "step": 14869 }, { "epoch": 1.708115559129286, "grad_norm": 0.6170898079872131, "learning_rate": 0.0001, "loss": 1.4636, "step": 14870 }, { "epoch": 1.708230429039113, "grad_norm": 0.700430154800415, "learning_rate": 0.0001, "loss": 1.4296, "step": 14871 }, { "epoch": 1.7083452989489403, "grad_norm": 0.6631823182106018, "learning_rate": 0.0001, "loss": 1.4876, "step": 14872 }, { "epoch": 1.7084601688587675, "grad_norm": 0.6962304711341858, "learning_rate": 0.0001, "loss": 1.4368, "step": 14873 }, { "epoch": 1.7085750387685945, "grad_norm": 0.5741162896156311, "learning_rate": 0.0001, "loss": 1.3373, "step": 14874 }, { "epoch": 1.7086899086784217, "grad_norm": 0.6080014109611511, "learning_rate": 0.0001, "loss": 1.3203, "step": 14875 }, { "epoch": 1.7088047785882488, "grad_norm": 0.6642729043960571, "learning_rate": 0.0001, "loss": 1.5342, "step": 14876 }, { "epoch": 1.708919648498076, "grad_norm": 0.659485936164856, "learning_rate": 0.0001, "loss": 1.4116, "step": 14877 }, { "epoch": 1.7090345184079032, "grad_norm": 0.6787165999412537, "learning_rate": 0.0001, "loss": 1.4067, "step": 14878 }, { "epoch": 1.7091493883177302, "grad_norm": 0.6123861074447632, "learning_rate": 0.0001, "loss": 1.4821, "step": 14879 }, { "epoch": 1.7092642582275572, "grad_norm": 0.608695387840271, "learning_rate": 0.0001, "loss": 1.4714, "step": 14880 }, { "epoch": 1.7093791281373845, "grad_norm": 0.5870606899261475, "learning_rate": 0.0001, "loss": 1.4441, "step": 14881 }, { "epoch": 1.7094939980472117, "grad_norm": 0.6159781813621521, "learning_rate": 0.0001, "loss": 1.2932, "step": 14882 }, { "epoch": 1.7096088679570387, "grad_norm": 0.6811216473579407, "learning_rate": 0.0001, "loss": 1.4543, "step": 14883 }, { "epoch": 1.7097237378668657, "grad_norm": 0.601915180683136, "learning_rate": 0.0001, "loss": 1.2939, "step": 14884 }, { "epoch": 1.709838607776693, "grad_norm": 0.5753575563430786, "learning_rate": 0.0001, "loss": 1.4496, "step": 14885 }, { "epoch": 1.7099534776865202, "grad_norm": 0.6104464530944824, "learning_rate": 0.0001, "loss": 1.4767, "step": 14886 }, { "epoch": 1.7100683475963472, "grad_norm": 0.5839319229125977, "learning_rate": 0.0001, "loss": 1.2978, "step": 14887 }, { "epoch": 1.7101832175061742, "grad_norm": 0.6496050953865051, "learning_rate": 0.0001, "loss": 1.6834, "step": 14888 }, { "epoch": 1.7102980874160014, "grad_norm": 0.6293419599533081, "learning_rate": 0.0001, "loss": 1.4529, "step": 14889 }, { "epoch": 1.7104129573258287, "grad_norm": 0.6812129616737366, "learning_rate": 0.0001, "loss": 1.534, "step": 14890 }, { "epoch": 1.7105278272356557, "grad_norm": 0.614952802658081, "learning_rate": 0.0001, "loss": 1.5508, "step": 14891 }, { "epoch": 1.7106426971454827, "grad_norm": 0.6444807648658752, "learning_rate": 0.0001, "loss": 1.3534, "step": 14892 }, { "epoch": 1.71075756705531, "grad_norm": 0.7069858312606812, "learning_rate": 0.0001, "loss": 1.6134, "step": 14893 }, { "epoch": 1.7108724369651371, "grad_norm": 0.6311841607093811, "learning_rate": 0.0001, "loss": 1.4556, "step": 14894 }, { "epoch": 1.7109873068749641, "grad_norm": 0.645004153251648, "learning_rate": 0.0001, "loss": 1.4576, "step": 14895 }, { "epoch": 1.7111021767847912, "grad_norm": 0.613746166229248, "learning_rate": 0.0001, "loss": 1.351, "step": 14896 }, { "epoch": 1.7112170466946184, "grad_norm": 0.653880774974823, "learning_rate": 0.0001, "loss": 1.4966, "step": 14897 }, { "epoch": 1.7113319166044456, "grad_norm": 0.6353497505187988, "learning_rate": 0.0001, "loss": 1.5277, "step": 14898 }, { "epoch": 1.7114467865142726, "grad_norm": 0.6185076236724854, "learning_rate": 0.0001, "loss": 1.3926, "step": 14899 }, { "epoch": 1.7115616564240996, "grad_norm": 0.6185590028762817, "learning_rate": 0.0001, "loss": 1.4641, "step": 14900 }, { "epoch": 1.7116765263339269, "grad_norm": 0.6211035847663879, "learning_rate": 0.0001, "loss": 1.5412, "step": 14901 }, { "epoch": 1.711791396243754, "grad_norm": 0.5859823226928711, "learning_rate": 0.0001, "loss": 1.3688, "step": 14902 }, { "epoch": 1.711906266153581, "grad_norm": 0.5817895531654358, "learning_rate": 0.0001, "loss": 1.3958, "step": 14903 }, { "epoch": 1.7120211360634081, "grad_norm": 0.6351556181907654, "learning_rate": 0.0001, "loss": 1.5479, "step": 14904 }, { "epoch": 1.7121360059732353, "grad_norm": 0.6283468008041382, "learning_rate": 0.0001, "loss": 1.496, "step": 14905 }, { "epoch": 1.7122508758830626, "grad_norm": 0.6462883353233337, "learning_rate": 0.0001, "loss": 1.5196, "step": 14906 }, { "epoch": 1.7123657457928896, "grad_norm": 0.6173494458198547, "learning_rate": 0.0001, "loss": 1.4087, "step": 14907 }, { "epoch": 1.7124806157027166, "grad_norm": 0.5699242949485779, "learning_rate": 0.0001, "loss": 1.5066, "step": 14908 }, { "epoch": 1.7125954856125438, "grad_norm": 0.6764625310897827, "learning_rate": 0.0001, "loss": 1.5277, "step": 14909 }, { "epoch": 1.712710355522371, "grad_norm": 0.6505712866783142, "learning_rate": 0.0001, "loss": 1.4663, "step": 14910 }, { "epoch": 1.712825225432198, "grad_norm": 0.6406113505363464, "learning_rate": 0.0001, "loss": 1.4806, "step": 14911 }, { "epoch": 1.712940095342025, "grad_norm": 0.621518075466156, "learning_rate": 0.0001, "loss": 1.5465, "step": 14912 }, { "epoch": 1.7130549652518523, "grad_norm": 0.6753479838371277, "learning_rate": 0.0001, "loss": 1.7465, "step": 14913 }, { "epoch": 1.7131698351616795, "grad_norm": 0.7874284982681274, "learning_rate": 0.0001, "loss": 1.5676, "step": 14914 }, { "epoch": 1.7132847050715065, "grad_norm": 0.6016520857810974, "learning_rate": 0.0001, "loss": 1.4469, "step": 14915 }, { "epoch": 1.7133995749813336, "grad_norm": 0.6585147976875305, "learning_rate": 0.0001, "loss": 1.6068, "step": 14916 }, { "epoch": 1.7135144448911608, "grad_norm": 0.5877991318702698, "learning_rate": 0.0001, "loss": 1.4722, "step": 14917 }, { "epoch": 1.713629314800988, "grad_norm": 0.5988264083862305, "learning_rate": 0.0001, "loss": 1.6484, "step": 14918 }, { "epoch": 1.713744184710815, "grad_norm": 0.7021918296813965, "learning_rate": 0.0001, "loss": 1.5297, "step": 14919 }, { "epoch": 1.713859054620642, "grad_norm": 0.6114031076431274, "learning_rate": 0.0001, "loss": 1.4106, "step": 14920 }, { "epoch": 1.7139739245304693, "grad_norm": 0.646767258644104, "learning_rate": 0.0001, "loss": 1.4992, "step": 14921 }, { "epoch": 1.7140887944402965, "grad_norm": 0.598789393901825, "learning_rate": 0.0001, "loss": 1.3379, "step": 14922 }, { "epoch": 1.7142036643501235, "grad_norm": 0.6237581372261047, "learning_rate": 0.0001, "loss": 1.4198, "step": 14923 }, { "epoch": 1.7143185342599505, "grad_norm": 0.613361120223999, "learning_rate": 0.0001, "loss": 1.5141, "step": 14924 }, { "epoch": 1.7144334041697777, "grad_norm": 0.654420018196106, "learning_rate": 0.0001, "loss": 1.5044, "step": 14925 }, { "epoch": 1.714548274079605, "grad_norm": 0.6176064014434814, "learning_rate": 0.0001, "loss": 1.555, "step": 14926 }, { "epoch": 1.714663143989432, "grad_norm": 0.6290966272354126, "learning_rate": 0.0001, "loss": 1.6387, "step": 14927 }, { "epoch": 1.714778013899259, "grad_norm": 0.6438091397285461, "learning_rate": 0.0001, "loss": 1.3984, "step": 14928 }, { "epoch": 1.7148928838090862, "grad_norm": 0.6490173935890198, "learning_rate": 0.0001, "loss": 1.5162, "step": 14929 }, { "epoch": 1.7150077537189135, "grad_norm": 0.5619537830352783, "learning_rate": 0.0001, "loss": 1.4052, "step": 14930 }, { "epoch": 1.7151226236287405, "grad_norm": 0.5740304589271545, "learning_rate": 0.0001, "loss": 1.3887, "step": 14931 }, { "epoch": 1.7152374935385675, "grad_norm": 0.5972532629966736, "learning_rate": 0.0001, "loss": 1.488, "step": 14932 }, { "epoch": 1.7153523634483947, "grad_norm": 0.6191779375076294, "learning_rate": 0.0001, "loss": 1.3594, "step": 14933 }, { "epoch": 1.715467233358222, "grad_norm": 0.6333027482032776, "learning_rate": 0.0001, "loss": 1.3897, "step": 14934 }, { "epoch": 1.715582103268049, "grad_norm": 0.5880884528160095, "learning_rate": 0.0001, "loss": 1.243, "step": 14935 }, { "epoch": 1.715696973177876, "grad_norm": 0.7410986423492432, "learning_rate": 0.0001, "loss": 1.6198, "step": 14936 }, { "epoch": 1.7158118430877032, "grad_norm": 0.7331770062446594, "learning_rate": 0.0001, "loss": 1.5097, "step": 14937 }, { "epoch": 1.7159267129975304, "grad_norm": 0.6188743114471436, "learning_rate": 0.0001, "loss": 1.5405, "step": 14938 }, { "epoch": 1.7160415829073574, "grad_norm": 0.6449677348136902, "learning_rate": 0.0001, "loss": 1.8204, "step": 14939 }, { "epoch": 1.7161564528171844, "grad_norm": 0.6289303302764893, "learning_rate": 0.0001, "loss": 1.5189, "step": 14940 }, { "epoch": 1.7162713227270117, "grad_norm": 0.5749282836914062, "learning_rate": 0.0001, "loss": 1.3974, "step": 14941 }, { "epoch": 1.716386192636839, "grad_norm": 0.6385175585746765, "learning_rate": 0.0001, "loss": 1.4408, "step": 14942 }, { "epoch": 1.716501062546666, "grad_norm": 0.6189382672309875, "learning_rate": 0.0001, "loss": 1.5049, "step": 14943 }, { "epoch": 1.716615932456493, "grad_norm": 0.7417494058609009, "learning_rate": 0.0001, "loss": 1.6693, "step": 14944 }, { "epoch": 1.7167308023663201, "grad_norm": 0.5887095332145691, "learning_rate": 0.0001, "loss": 1.5724, "step": 14945 }, { "epoch": 1.7168456722761474, "grad_norm": 0.591689944267273, "learning_rate": 0.0001, "loss": 1.3988, "step": 14946 }, { "epoch": 1.7169605421859744, "grad_norm": 0.7112393975257874, "learning_rate": 0.0001, "loss": 1.6562, "step": 14947 }, { "epoch": 1.7170754120958014, "grad_norm": 0.6112138032913208, "learning_rate": 0.0001, "loss": 1.3103, "step": 14948 }, { "epoch": 1.7171902820056286, "grad_norm": 0.6022821664810181, "learning_rate": 0.0001, "loss": 1.2665, "step": 14949 }, { "epoch": 1.7173051519154559, "grad_norm": 0.6003796458244324, "learning_rate": 0.0001, "loss": 1.3495, "step": 14950 }, { "epoch": 1.7174200218252829, "grad_norm": 0.7552711963653564, "learning_rate": 0.0001, "loss": 1.4782, "step": 14951 }, { "epoch": 1.7175348917351099, "grad_norm": 0.6487937569618225, "learning_rate": 0.0001, "loss": 1.4913, "step": 14952 }, { "epoch": 1.717649761644937, "grad_norm": 0.5925029516220093, "learning_rate": 0.0001, "loss": 1.3924, "step": 14953 }, { "epoch": 1.7177646315547643, "grad_norm": 0.6977973580360413, "learning_rate": 0.0001, "loss": 1.534, "step": 14954 }, { "epoch": 1.7178795014645913, "grad_norm": 0.6343498826026917, "learning_rate": 0.0001, "loss": 1.5123, "step": 14955 }, { "epoch": 1.7179943713744183, "grad_norm": 0.608635663986206, "learning_rate": 0.0001, "loss": 1.3089, "step": 14956 }, { "epoch": 1.7181092412842456, "grad_norm": 0.6102705001831055, "learning_rate": 0.0001, "loss": 1.4239, "step": 14957 }, { "epoch": 1.7182241111940728, "grad_norm": 0.6915038824081421, "learning_rate": 0.0001, "loss": 1.5418, "step": 14958 }, { "epoch": 1.7183389811038998, "grad_norm": 0.6009780764579773, "learning_rate": 0.0001, "loss": 1.5457, "step": 14959 }, { "epoch": 1.7184538510137268, "grad_norm": 0.5998890399932861, "learning_rate": 0.0001, "loss": 1.4216, "step": 14960 }, { "epoch": 1.718568720923554, "grad_norm": 0.6564615964889526, "learning_rate": 0.0001, "loss": 1.5023, "step": 14961 }, { "epoch": 1.7186835908333813, "grad_norm": 0.5995262861251831, "learning_rate": 0.0001, "loss": 1.555, "step": 14962 }, { "epoch": 1.7187984607432083, "grad_norm": 0.5520354509353638, "learning_rate": 0.0001, "loss": 1.3103, "step": 14963 }, { "epoch": 1.7189133306530353, "grad_norm": 0.5722663402557373, "learning_rate": 0.0001, "loss": 1.2938, "step": 14964 }, { "epoch": 1.7190282005628625, "grad_norm": 0.6511684060096741, "learning_rate": 0.0001, "loss": 1.5674, "step": 14965 }, { "epoch": 1.7191430704726898, "grad_norm": 0.6239314675331116, "learning_rate": 0.0001, "loss": 1.4849, "step": 14966 }, { "epoch": 1.7192579403825168, "grad_norm": 0.6765071153640747, "learning_rate": 0.0001, "loss": 1.7592, "step": 14967 }, { "epoch": 1.7193728102923438, "grad_norm": 0.6002155542373657, "learning_rate": 0.0001, "loss": 1.4651, "step": 14968 }, { "epoch": 1.719487680202171, "grad_norm": 0.6126995086669922, "learning_rate": 0.0001, "loss": 1.3875, "step": 14969 }, { "epoch": 1.7196025501119983, "grad_norm": 0.6200347542762756, "learning_rate": 0.0001, "loss": 1.4829, "step": 14970 }, { "epoch": 1.7197174200218253, "grad_norm": 0.6229084730148315, "learning_rate": 0.0001, "loss": 1.5968, "step": 14971 }, { "epoch": 1.7198322899316523, "grad_norm": 0.5846944451332092, "learning_rate": 0.0001, "loss": 1.2907, "step": 14972 }, { "epoch": 1.7199471598414795, "grad_norm": 0.6202731132507324, "learning_rate": 0.0001, "loss": 1.4005, "step": 14973 }, { "epoch": 1.7200620297513067, "grad_norm": 0.5932221412658691, "learning_rate": 0.0001, "loss": 1.3362, "step": 14974 }, { "epoch": 1.7201768996611337, "grad_norm": 0.5857172012329102, "learning_rate": 0.0001, "loss": 1.3536, "step": 14975 }, { "epoch": 1.7202917695709607, "grad_norm": 0.6796727776527405, "learning_rate": 0.0001, "loss": 1.5061, "step": 14976 }, { "epoch": 1.720406639480788, "grad_norm": 0.6333981156349182, "learning_rate": 0.0001, "loss": 1.5462, "step": 14977 }, { "epoch": 1.7205215093906152, "grad_norm": 0.6612197160720825, "learning_rate": 0.0001, "loss": 1.3956, "step": 14978 }, { "epoch": 1.7206363793004422, "grad_norm": 0.6356850862503052, "learning_rate": 0.0001, "loss": 1.4098, "step": 14979 }, { "epoch": 1.7207512492102692, "grad_norm": 0.6078984141349792, "learning_rate": 0.0001, "loss": 1.4597, "step": 14980 }, { "epoch": 1.7208661191200965, "grad_norm": 0.6189718842506409, "learning_rate": 0.0001, "loss": 1.4904, "step": 14981 }, { "epoch": 1.7209809890299237, "grad_norm": 0.5823473334312439, "learning_rate": 0.0001, "loss": 1.3974, "step": 14982 }, { "epoch": 1.7210958589397507, "grad_norm": 0.6768457889556885, "learning_rate": 0.0001, "loss": 1.5662, "step": 14983 }, { "epoch": 1.7212107288495777, "grad_norm": 0.6085696220397949, "learning_rate": 0.0001, "loss": 1.4566, "step": 14984 }, { "epoch": 1.721325598759405, "grad_norm": 0.6197189092636108, "learning_rate": 0.0001, "loss": 1.4594, "step": 14985 }, { "epoch": 1.7214404686692322, "grad_norm": 0.6606781482696533, "learning_rate": 0.0001, "loss": 1.5952, "step": 14986 }, { "epoch": 1.7215553385790592, "grad_norm": 0.5829959511756897, "learning_rate": 0.0001, "loss": 1.2158, "step": 14987 }, { "epoch": 1.7216702084888862, "grad_norm": 0.6431682705879211, "learning_rate": 0.0001, "loss": 1.3648, "step": 14988 }, { "epoch": 1.7217850783987134, "grad_norm": 0.6825764775276184, "learning_rate": 0.0001, "loss": 1.5493, "step": 14989 }, { "epoch": 1.7218999483085407, "grad_norm": 0.5983292460441589, "learning_rate": 0.0001, "loss": 1.3756, "step": 14990 }, { "epoch": 1.7220148182183677, "grad_norm": 0.6294005513191223, "learning_rate": 0.0001, "loss": 1.2658, "step": 14991 }, { "epoch": 1.7221296881281947, "grad_norm": 0.6360632181167603, "learning_rate": 0.0001, "loss": 1.5104, "step": 14992 }, { "epoch": 1.722244558038022, "grad_norm": 0.5845391154289246, "learning_rate": 0.0001, "loss": 1.2789, "step": 14993 }, { "epoch": 1.7223594279478491, "grad_norm": 0.7193369269371033, "learning_rate": 0.0001, "loss": 1.5247, "step": 14994 }, { "epoch": 1.7224742978576761, "grad_norm": 0.583917498588562, "learning_rate": 0.0001, "loss": 1.4828, "step": 14995 }, { "epoch": 1.7225891677675031, "grad_norm": 0.6654466986656189, "learning_rate": 0.0001, "loss": 1.5591, "step": 14996 }, { "epoch": 1.7227040376773304, "grad_norm": 0.5956998467445374, "learning_rate": 0.0001, "loss": 1.2864, "step": 14997 }, { "epoch": 1.7228189075871576, "grad_norm": 0.6005557179450989, "learning_rate": 0.0001, "loss": 1.2126, "step": 14998 }, { "epoch": 1.7229337774969846, "grad_norm": 0.6229847073554993, "learning_rate": 0.0001, "loss": 1.5815, "step": 14999 }, { "epoch": 1.7230486474068116, "grad_norm": 0.5950932502746582, "learning_rate": 0.0001, "loss": 1.4798, "step": 15000 }, { "epoch": 1.7231635173166389, "grad_norm": 0.627495288848877, "learning_rate": 0.0001, "loss": 1.4347, "step": 15001 }, { "epoch": 1.723278387226466, "grad_norm": 0.5470762252807617, "learning_rate": 0.0001, "loss": 1.1846, "step": 15002 }, { "epoch": 1.723393257136293, "grad_norm": 0.5681390166282654, "learning_rate": 0.0001, "loss": 1.3358, "step": 15003 }, { "epoch": 1.72350812704612, "grad_norm": 0.5831446647644043, "learning_rate": 0.0001, "loss": 1.4402, "step": 15004 }, { "epoch": 1.7236229969559473, "grad_norm": 0.6146191358566284, "learning_rate": 0.0001, "loss": 1.4376, "step": 15005 }, { "epoch": 1.7237378668657746, "grad_norm": 0.6659471988677979, "learning_rate": 0.0001, "loss": 1.3841, "step": 15006 }, { "epoch": 1.7238527367756016, "grad_norm": 0.6301655173301697, "learning_rate": 0.0001, "loss": 1.4554, "step": 15007 }, { "epoch": 1.7239676066854286, "grad_norm": 0.6276340484619141, "learning_rate": 0.0001, "loss": 1.5274, "step": 15008 }, { "epoch": 1.7240824765952558, "grad_norm": 0.5837733745574951, "learning_rate": 0.0001, "loss": 1.4584, "step": 15009 }, { "epoch": 1.724197346505083, "grad_norm": 0.5841119885444641, "learning_rate": 0.0001, "loss": 1.5087, "step": 15010 }, { "epoch": 1.72431221641491, "grad_norm": 0.6044458746910095, "learning_rate": 0.0001, "loss": 1.5023, "step": 15011 }, { "epoch": 1.7244270863247373, "grad_norm": 0.6079664826393127, "learning_rate": 0.0001, "loss": 1.4079, "step": 15012 }, { "epoch": 1.7245419562345643, "grad_norm": 0.8847506642341614, "learning_rate": 0.0001, "loss": 1.6107, "step": 15013 }, { "epoch": 1.7246568261443915, "grad_norm": 0.6034465432167053, "learning_rate": 0.0001, "loss": 1.2711, "step": 15014 }, { "epoch": 1.7247716960542188, "grad_norm": 0.8095591068267822, "learning_rate": 0.0001, "loss": 1.4583, "step": 15015 }, { "epoch": 1.7248865659640458, "grad_norm": 0.5719621777534485, "learning_rate": 0.0001, "loss": 1.427, "step": 15016 }, { "epoch": 1.7250014358738728, "grad_norm": 0.6097038388252258, "learning_rate": 0.0001, "loss": 1.5195, "step": 15017 }, { "epoch": 1.7251163057837, "grad_norm": 0.64508455991745, "learning_rate": 0.0001, "loss": 1.5549, "step": 15018 }, { "epoch": 1.7252311756935272, "grad_norm": 0.5853515863418579, "learning_rate": 0.0001, "loss": 1.3652, "step": 15019 }, { "epoch": 1.7253460456033543, "grad_norm": 0.6391353011131287, "learning_rate": 0.0001, "loss": 1.4132, "step": 15020 }, { "epoch": 1.7254609155131813, "grad_norm": 0.6568958163261414, "learning_rate": 0.0001, "loss": 1.5182, "step": 15021 }, { "epoch": 1.7255757854230085, "grad_norm": 0.6337956190109253, "learning_rate": 0.0001, "loss": 1.5081, "step": 15022 }, { "epoch": 1.7256906553328357, "grad_norm": 0.682131290435791, "learning_rate": 0.0001, "loss": 1.4523, "step": 15023 }, { "epoch": 1.7258055252426627, "grad_norm": 0.6506146192550659, "learning_rate": 0.0001, "loss": 1.5401, "step": 15024 }, { "epoch": 1.7259203951524897, "grad_norm": 0.6392216086387634, "learning_rate": 0.0001, "loss": 1.5851, "step": 15025 }, { "epoch": 1.726035265062317, "grad_norm": 0.6349377632141113, "learning_rate": 0.0001, "loss": 1.3924, "step": 15026 }, { "epoch": 1.7261501349721442, "grad_norm": 0.588093638420105, "learning_rate": 0.0001, "loss": 1.5297, "step": 15027 }, { "epoch": 1.7262650048819712, "grad_norm": 0.6122344732284546, "learning_rate": 0.0001, "loss": 1.537, "step": 15028 }, { "epoch": 1.7263798747917982, "grad_norm": 0.6537800431251526, "learning_rate": 0.0001, "loss": 1.5847, "step": 15029 }, { "epoch": 1.7264947447016255, "grad_norm": 0.6109773516654968, "learning_rate": 0.0001, "loss": 1.4354, "step": 15030 }, { "epoch": 1.7266096146114527, "grad_norm": 0.576797604560852, "learning_rate": 0.0001, "loss": 1.3143, "step": 15031 }, { "epoch": 1.7267244845212797, "grad_norm": 0.6462949514389038, "learning_rate": 0.0001, "loss": 1.4408, "step": 15032 }, { "epoch": 1.7268393544311067, "grad_norm": 0.6329190731048584, "learning_rate": 0.0001, "loss": 1.4861, "step": 15033 }, { "epoch": 1.726954224340934, "grad_norm": 0.5958088040351868, "learning_rate": 0.0001, "loss": 1.5292, "step": 15034 }, { "epoch": 1.7270690942507612, "grad_norm": 0.630052387714386, "learning_rate": 0.0001, "loss": 1.3768, "step": 15035 }, { "epoch": 1.7271839641605882, "grad_norm": 0.667178750038147, "learning_rate": 0.0001, "loss": 1.3865, "step": 15036 }, { "epoch": 1.7272988340704152, "grad_norm": 0.7113081216812134, "learning_rate": 0.0001, "loss": 1.7048, "step": 15037 }, { "epoch": 1.7274137039802424, "grad_norm": 0.6631393432617188, "learning_rate": 0.0001, "loss": 1.4899, "step": 15038 }, { "epoch": 1.7275285738900696, "grad_norm": 0.6248582005500793, "learning_rate": 0.0001, "loss": 1.5667, "step": 15039 }, { "epoch": 1.7276434437998966, "grad_norm": 0.6079177260398865, "learning_rate": 0.0001, "loss": 1.3492, "step": 15040 }, { "epoch": 1.7277583137097237, "grad_norm": 0.5928921699523926, "learning_rate": 0.0001, "loss": 1.4487, "step": 15041 }, { "epoch": 1.727873183619551, "grad_norm": 0.6063787341117859, "learning_rate": 0.0001, "loss": 1.225, "step": 15042 }, { "epoch": 1.7279880535293781, "grad_norm": 0.6443949937820435, "learning_rate": 0.0001, "loss": 1.5223, "step": 15043 }, { "epoch": 1.7281029234392051, "grad_norm": 0.6002311110496521, "learning_rate": 0.0001, "loss": 1.474, "step": 15044 }, { "epoch": 1.7282177933490321, "grad_norm": 0.6543359756469727, "learning_rate": 0.0001, "loss": 1.4626, "step": 15045 }, { "epoch": 1.7283326632588594, "grad_norm": 0.6799443364143372, "learning_rate": 0.0001, "loss": 1.5299, "step": 15046 }, { "epoch": 1.7284475331686866, "grad_norm": 0.6205649971961975, "learning_rate": 0.0001, "loss": 1.2928, "step": 15047 }, { "epoch": 1.7285624030785136, "grad_norm": 0.589119553565979, "learning_rate": 0.0001, "loss": 1.5165, "step": 15048 }, { "epoch": 1.7286772729883406, "grad_norm": 0.6019067168235779, "learning_rate": 0.0001, "loss": 1.3567, "step": 15049 }, { "epoch": 1.7287921428981678, "grad_norm": 0.5993046760559082, "learning_rate": 0.0001, "loss": 1.3804, "step": 15050 }, { "epoch": 1.728907012807995, "grad_norm": 0.6053866147994995, "learning_rate": 0.0001, "loss": 1.4004, "step": 15051 }, { "epoch": 1.729021882717822, "grad_norm": 0.5996253490447998, "learning_rate": 0.0001, "loss": 1.1198, "step": 15052 }, { "epoch": 1.729136752627649, "grad_norm": 0.6605576872825623, "learning_rate": 0.0001, "loss": 1.4068, "step": 15053 }, { "epoch": 1.7292516225374763, "grad_norm": 0.5937269926071167, "learning_rate": 0.0001, "loss": 1.4028, "step": 15054 }, { "epoch": 1.7293664924473036, "grad_norm": 0.6884070038795471, "learning_rate": 0.0001, "loss": 1.5806, "step": 15055 }, { "epoch": 1.7294813623571306, "grad_norm": 0.6444214582443237, "learning_rate": 0.0001, "loss": 1.4598, "step": 15056 }, { "epoch": 1.7295962322669576, "grad_norm": 0.6065829992294312, "learning_rate": 0.0001, "loss": 1.5141, "step": 15057 }, { "epoch": 1.7297111021767848, "grad_norm": 0.6256981492042542, "learning_rate": 0.0001, "loss": 1.2783, "step": 15058 }, { "epoch": 1.729825972086612, "grad_norm": 0.641852617263794, "learning_rate": 0.0001, "loss": 1.6474, "step": 15059 }, { "epoch": 1.729940841996439, "grad_norm": 0.5563915967941284, "learning_rate": 0.0001, "loss": 1.4621, "step": 15060 }, { "epoch": 1.730055711906266, "grad_norm": 0.6508216261863708, "learning_rate": 0.0001, "loss": 1.3978, "step": 15061 }, { "epoch": 1.7301705818160933, "grad_norm": 0.61888188123703, "learning_rate": 0.0001, "loss": 1.6299, "step": 15062 }, { "epoch": 1.7302854517259205, "grad_norm": 0.6529124975204468, "learning_rate": 0.0001, "loss": 1.3718, "step": 15063 }, { "epoch": 1.7304003216357475, "grad_norm": 0.5647428631782532, "learning_rate": 0.0001, "loss": 1.4557, "step": 15064 }, { "epoch": 1.7305151915455745, "grad_norm": 0.710871696472168, "learning_rate": 0.0001, "loss": 1.6701, "step": 15065 }, { "epoch": 1.7306300614554018, "grad_norm": 0.6646068692207336, "learning_rate": 0.0001, "loss": 1.6186, "step": 15066 }, { "epoch": 1.730744931365229, "grad_norm": 0.6500128507614136, "learning_rate": 0.0001, "loss": 1.5042, "step": 15067 }, { "epoch": 1.730859801275056, "grad_norm": 0.6322208642959595, "learning_rate": 0.0001, "loss": 1.4786, "step": 15068 }, { "epoch": 1.730974671184883, "grad_norm": 0.5904911756515503, "learning_rate": 0.0001, "loss": 1.4433, "step": 15069 }, { "epoch": 1.7310895410947102, "grad_norm": 0.639840841293335, "learning_rate": 0.0001, "loss": 1.5974, "step": 15070 }, { "epoch": 1.7312044110045375, "grad_norm": 0.6180384159088135, "learning_rate": 0.0001, "loss": 1.4879, "step": 15071 }, { "epoch": 1.7313192809143645, "grad_norm": 0.5702844858169556, "learning_rate": 0.0001, "loss": 1.3738, "step": 15072 }, { "epoch": 1.7314341508241915, "grad_norm": 0.6036345362663269, "learning_rate": 0.0001, "loss": 1.5192, "step": 15073 }, { "epoch": 1.7315490207340187, "grad_norm": 0.6159979104995728, "learning_rate": 0.0001, "loss": 1.5198, "step": 15074 }, { "epoch": 1.731663890643846, "grad_norm": 0.5626546740531921, "learning_rate": 0.0001, "loss": 1.422, "step": 15075 }, { "epoch": 1.731778760553673, "grad_norm": 0.6199283599853516, "learning_rate": 0.0001, "loss": 1.5527, "step": 15076 }, { "epoch": 1.7318936304635, "grad_norm": 0.6209295392036438, "learning_rate": 0.0001, "loss": 1.5764, "step": 15077 }, { "epoch": 1.7320085003733272, "grad_norm": 0.574235200881958, "learning_rate": 0.0001, "loss": 1.4249, "step": 15078 }, { "epoch": 1.7321233702831544, "grad_norm": 0.6106351017951965, "learning_rate": 0.0001, "loss": 1.5343, "step": 15079 }, { "epoch": 1.7322382401929814, "grad_norm": 0.6253929138183594, "learning_rate": 0.0001, "loss": 1.3034, "step": 15080 }, { "epoch": 1.7323531101028085, "grad_norm": 0.6872732639312744, "learning_rate": 0.0001, "loss": 1.5674, "step": 15081 }, { "epoch": 1.7324679800126357, "grad_norm": 0.6001285314559937, "learning_rate": 0.0001, "loss": 1.3798, "step": 15082 }, { "epoch": 1.732582849922463, "grad_norm": 0.5744822025299072, "learning_rate": 0.0001, "loss": 1.5976, "step": 15083 }, { "epoch": 1.73269771983229, "grad_norm": 0.5871395468711853, "learning_rate": 0.0001, "loss": 1.4265, "step": 15084 }, { "epoch": 1.732812589742117, "grad_norm": 0.586462140083313, "learning_rate": 0.0001, "loss": 1.4947, "step": 15085 }, { "epoch": 1.7329274596519442, "grad_norm": 0.6341403722763062, "learning_rate": 0.0001, "loss": 1.4599, "step": 15086 }, { "epoch": 1.7330423295617714, "grad_norm": 0.5986180901527405, "learning_rate": 0.0001, "loss": 1.3682, "step": 15087 }, { "epoch": 1.7331571994715984, "grad_norm": 0.6208639144897461, "learning_rate": 0.0001, "loss": 1.5485, "step": 15088 }, { "epoch": 1.7332720693814254, "grad_norm": 0.684834361076355, "learning_rate": 0.0001, "loss": 1.5264, "step": 15089 }, { "epoch": 1.7333869392912526, "grad_norm": 0.692686915397644, "learning_rate": 0.0001, "loss": 1.441, "step": 15090 }, { "epoch": 1.7335018092010799, "grad_norm": 0.621494710445404, "learning_rate": 0.0001, "loss": 1.5951, "step": 15091 }, { "epoch": 1.7336166791109069, "grad_norm": 0.6340914368629456, "learning_rate": 0.0001, "loss": 1.5213, "step": 15092 }, { "epoch": 1.733731549020734, "grad_norm": 0.6137570142745972, "learning_rate": 0.0001, "loss": 1.3492, "step": 15093 }, { "epoch": 1.7338464189305611, "grad_norm": 0.6438025832176208, "learning_rate": 0.0001, "loss": 1.4728, "step": 15094 }, { "epoch": 1.7339612888403884, "grad_norm": 0.5989691615104675, "learning_rate": 0.0001, "loss": 1.4918, "step": 15095 }, { "epoch": 1.7340761587502154, "grad_norm": 0.6587399840354919, "learning_rate": 0.0001, "loss": 1.2788, "step": 15096 }, { "epoch": 1.7341910286600424, "grad_norm": 0.6119810342788696, "learning_rate": 0.0001, "loss": 1.3324, "step": 15097 }, { "epoch": 1.7343058985698696, "grad_norm": 0.6475637555122375, "learning_rate": 0.0001, "loss": 1.6146, "step": 15098 }, { "epoch": 1.7344207684796968, "grad_norm": 0.5902281403541565, "learning_rate": 0.0001, "loss": 1.4495, "step": 15099 }, { "epoch": 1.7345356383895238, "grad_norm": 0.5807440876960754, "learning_rate": 0.0001, "loss": 1.3451, "step": 15100 }, { "epoch": 1.7346505082993509, "grad_norm": 0.6495022773742676, "learning_rate": 0.0001, "loss": 1.5186, "step": 15101 }, { "epoch": 1.734765378209178, "grad_norm": 0.65216064453125, "learning_rate": 0.0001, "loss": 1.4218, "step": 15102 }, { "epoch": 1.7348802481190053, "grad_norm": 0.6560716032981873, "learning_rate": 0.0001, "loss": 1.4779, "step": 15103 }, { "epoch": 1.7349951180288323, "grad_norm": 0.5937643051147461, "learning_rate": 0.0001, "loss": 1.3291, "step": 15104 }, { "epoch": 1.7351099879386593, "grad_norm": 0.6294172406196594, "learning_rate": 0.0001, "loss": 1.2777, "step": 15105 }, { "epoch": 1.7352248578484866, "grad_norm": 0.6666632890701294, "learning_rate": 0.0001, "loss": 1.3235, "step": 15106 }, { "epoch": 1.7353397277583138, "grad_norm": 0.6566066741943359, "learning_rate": 0.0001, "loss": 1.3513, "step": 15107 }, { "epoch": 1.7354545976681408, "grad_norm": 0.6714562773704529, "learning_rate": 0.0001, "loss": 1.6247, "step": 15108 }, { "epoch": 1.7355694675779678, "grad_norm": 0.5992361307144165, "learning_rate": 0.0001, "loss": 1.3927, "step": 15109 }, { "epoch": 1.735684337487795, "grad_norm": 0.5907565951347351, "learning_rate": 0.0001, "loss": 1.5398, "step": 15110 }, { "epoch": 1.7357992073976223, "grad_norm": 0.6381534934043884, "learning_rate": 0.0001, "loss": 1.4723, "step": 15111 }, { "epoch": 1.7359140773074493, "grad_norm": 0.5985099077224731, "learning_rate": 0.0001, "loss": 1.4673, "step": 15112 }, { "epoch": 1.7360289472172763, "grad_norm": 0.566951334476471, "learning_rate": 0.0001, "loss": 1.4824, "step": 15113 }, { "epoch": 1.7361438171271035, "grad_norm": 0.6164551973342896, "learning_rate": 0.0001, "loss": 1.5531, "step": 15114 }, { "epoch": 1.7362586870369308, "grad_norm": 0.5941165089607239, "learning_rate": 0.0001, "loss": 1.3014, "step": 15115 }, { "epoch": 1.7363735569467578, "grad_norm": 0.5940799117088318, "learning_rate": 0.0001, "loss": 1.6121, "step": 15116 }, { "epoch": 1.7364884268565848, "grad_norm": 0.629859209060669, "learning_rate": 0.0001, "loss": 1.6983, "step": 15117 }, { "epoch": 1.736603296766412, "grad_norm": 0.6390464305877686, "learning_rate": 0.0001, "loss": 1.5392, "step": 15118 }, { "epoch": 1.7367181666762392, "grad_norm": 0.6173864006996155, "learning_rate": 0.0001, "loss": 1.3426, "step": 15119 }, { "epoch": 1.7368330365860662, "grad_norm": 0.5565081238746643, "learning_rate": 0.0001, "loss": 1.4207, "step": 15120 }, { "epoch": 1.7369479064958933, "grad_norm": 0.6064382791519165, "learning_rate": 0.0001, "loss": 1.5435, "step": 15121 }, { "epoch": 1.7370627764057205, "grad_norm": 0.6307348608970642, "learning_rate": 0.0001, "loss": 1.3269, "step": 15122 }, { "epoch": 1.7371776463155477, "grad_norm": 0.63545823097229, "learning_rate": 0.0001, "loss": 1.5386, "step": 15123 }, { "epoch": 1.7372925162253747, "grad_norm": 0.6427724957466125, "learning_rate": 0.0001, "loss": 1.4303, "step": 15124 }, { "epoch": 1.7374073861352017, "grad_norm": 0.5886901021003723, "learning_rate": 0.0001, "loss": 1.2971, "step": 15125 }, { "epoch": 1.737522256045029, "grad_norm": 0.6295827031135559, "learning_rate": 0.0001, "loss": 1.2244, "step": 15126 }, { "epoch": 1.7376371259548562, "grad_norm": 0.5705087780952454, "learning_rate": 0.0001, "loss": 1.2789, "step": 15127 }, { "epoch": 1.7377519958646832, "grad_norm": 0.6834404468536377, "learning_rate": 0.0001, "loss": 1.3771, "step": 15128 }, { "epoch": 1.7378668657745102, "grad_norm": 0.6291454434394836, "learning_rate": 0.0001, "loss": 1.3769, "step": 15129 }, { "epoch": 1.7379817356843374, "grad_norm": 0.7055854201316833, "learning_rate": 0.0001, "loss": 1.609, "step": 15130 }, { "epoch": 1.7380966055941647, "grad_norm": 0.6519612669944763, "learning_rate": 0.0001, "loss": 1.3599, "step": 15131 }, { "epoch": 1.7382114755039917, "grad_norm": 0.7939478754997253, "learning_rate": 0.0001, "loss": 1.5643, "step": 15132 }, { "epoch": 1.7383263454138187, "grad_norm": 0.5996159315109253, "learning_rate": 0.0001, "loss": 1.4736, "step": 15133 }, { "epoch": 1.738441215323646, "grad_norm": 0.6544761061668396, "learning_rate": 0.0001, "loss": 1.5878, "step": 15134 }, { "epoch": 1.7385560852334732, "grad_norm": 0.5604496002197266, "learning_rate": 0.0001, "loss": 1.2495, "step": 15135 }, { "epoch": 1.7386709551433002, "grad_norm": 0.6021955609321594, "learning_rate": 0.0001, "loss": 1.4273, "step": 15136 }, { "epoch": 1.7387858250531272, "grad_norm": 0.6894492506980896, "learning_rate": 0.0001, "loss": 1.4778, "step": 15137 }, { "epoch": 1.7389006949629544, "grad_norm": 0.5934672355651855, "learning_rate": 0.0001, "loss": 1.4288, "step": 15138 }, { "epoch": 1.7390155648727816, "grad_norm": 0.6965713500976562, "learning_rate": 0.0001, "loss": 1.5802, "step": 15139 }, { "epoch": 1.7391304347826086, "grad_norm": 0.6328989863395691, "learning_rate": 0.0001, "loss": 1.3815, "step": 15140 }, { "epoch": 1.7392453046924357, "grad_norm": 0.6331512928009033, "learning_rate": 0.0001, "loss": 1.5845, "step": 15141 }, { "epoch": 1.7393601746022629, "grad_norm": 0.7313364148139954, "learning_rate": 0.0001, "loss": 1.6402, "step": 15142 }, { "epoch": 1.7394750445120901, "grad_norm": 0.6274045705795288, "learning_rate": 0.0001, "loss": 1.6405, "step": 15143 }, { "epoch": 1.7395899144219171, "grad_norm": 0.6259190440177917, "learning_rate": 0.0001, "loss": 1.4399, "step": 15144 }, { "epoch": 1.7397047843317441, "grad_norm": 0.6094817519187927, "learning_rate": 0.0001, "loss": 1.5018, "step": 15145 }, { "epoch": 1.7398196542415714, "grad_norm": 0.6577267646789551, "learning_rate": 0.0001, "loss": 1.3699, "step": 15146 }, { "epoch": 1.7399345241513986, "grad_norm": 0.6378180980682373, "learning_rate": 0.0001, "loss": 1.469, "step": 15147 }, { "epoch": 1.7400493940612256, "grad_norm": 0.6114896535873413, "learning_rate": 0.0001, "loss": 1.6303, "step": 15148 }, { "epoch": 1.7401642639710528, "grad_norm": 0.6004807353019714, "learning_rate": 0.0001, "loss": 1.4736, "step": 15149 }, { "epoch": 1.7402791338808798, "grad_norm": 0.5777592658996582, "learning_rate": 0.0001, "loss": 1.463, "step": 15150 }, { "epoch": 1.740394003790707, "grad_norm": 0.667898416519165, "learning_rate": 0.0001, "loss": 1.4367, "step": 15151 }, { "epoch": 1.7405088737005343, "grad_norm": 0.6375964879989624, "learning_rate": 0.0001, "loss": 1.4712, "step": 15152 }, { "epoch": 1.7406237436103613, "grad_norm": 0.6091217398643494, "learning_rate": 0.0001, "loss": 1.4241, "step": 15153 }, { "epoch": 1.7407386135201883, "grad_norm": 0.6524693965911865, "learning_rate": 0.0001, "loss": 1.3071, "step": 15154 }, { "epoch": 1.7408534834300156, "grad_norm": 0.6394875645637512, "learning_rate": 0.0001, "loss": 1.5411, "step": 15155 }, { "epoch": 1.7409683533398428, "grad_norm": 0.6616272926330566, "learning_rate": 0.0001, "loss": 1.4982, "step": 15156 }, { "epoch": 1.7410832232496698, "grad_norm": 0.6061643958091736, "learning_rate": 0.0001, "loss": 1.4134, "step": 15157 }, { "epoch": 1.7411980931594968, "grad_norm": 0.5694348216056824, "learning_rate": 0.0001, "loss": 1.44, "step": 15158 }, { "epoch": 1.741312963069324, "grad_norm": 0.651995062828064, "learning_rate": 0.0001, "loss": 1.5107, "step": 15159 }, { "epoch": 1.7414278329791513, "grad_norm": 0.6059070229530334, "learning_rate": 0.0001, "loss": 1.548, "step": 15160 }, { "epoch": 1.7415427028889783, "grad_norm": 0.58353590965271, "learning_rate": 0.0001, "loss": 1.4756, "step": 15161 }, { "epoch": 1.7416575727988053, "grad_norm": 0.5728119611740112, "learning_rate": 0.0001, "loss": 1.3305, "step": 15162 }, { "epoch": 1.7417724427086325, "grad_norm": 0.5594245791435242, "learning_rate": 0.0001, "loss": 1.295, "step": 15163 }, { "epoch": 1.7418873126184597, "grad_norm": 0.6906068325042725, "learning_rate": 0.0001, "loss": 1.5825, "step": 15164 }, { "epoch": 1.7420021825282868, "grad_norm": 0.6946594715118408, "learning_rate": 0.0001, "loss": 1.5164, "step": 15165 }, { "epoch": 1.7421170524381138, "grad_norm": 0.6139326095581055, "learning_rate": 0.0001, "loss": 1.519, "step": 15166 }, { "epoch": 1.742231922347941, "grad_norm": 0.6352965235710144, "learning_rate": 0.0001, "loss": 1.4031, "step": 15167 }, { "epoch": 1.7423467922577682, "grad_norm": 0.6800305843353271, "learning_rate": 0.0001, "loss": 1.5563, "step": 15168 }, { "epoch": 1.7424616621675952, "grad_norm": 0.5998824834823608, "learning_rate": 0.0001, "loss": 1.3361, "step": 15169 }, { "epoch": 1.7425765320774222, "grad_norm": 0.6264121532440186, "learning_rate": 0.0001, "loss": 1.6003, "step": 15170 }, { "epoch": 1.7426914019872495, "grad_norm": 0.6085196733474731, "learning_rate": 0.0001, "loss": 1.3164, "step": 15171 }, { "epoch": 1.7428062718970767, "grad_norm": 0.6375176310539246, "learning_rate": 0.0001, "loss": 1.4239, "step": 15172 }, { "epoch": 1.7429211418069037, "grad_norm": 0.6847028732299805, "learning_rate": 0.0001, "loss": 1.5247, "step": 15173 }, { "epoch": 1.7430360117167307, "grad_norm": 0.8053979873657227, "learning_rate": 0.0001, "loss": 1.5727, "step": 15174 }, { "epoch": 1.743150881626558, "grad_norm": 0.6131575107574463, "learning_rate": 0.0001, "loss": 1.3573, "step": 15175 }, { "epoch": 1.7432657515363852, "grad_norm": 0.570138692855835, "learning_rate": 0.0001, "loss": 1.2953, "step": 15176 }, { "epoch": 1.7433806214462122, "grad_norm": 0.6050834059715271, "learning_rate": 0.0001, "loss": 1.2771, "step": 15177 }, { "epoch": 1.7434954913560392, "grad_norm": 0.6091510653495789, "learning_rate": 0.0001, "loss": 1.4857, "step": 15178 }, { "epoch": 1.7436103612658664, "grad_norm": 0.7901420593261719, "learning_rate": 0.0001, "loss": 1.5952, "step": 15179 }, { "epoch": 1.7437252311756937, "grad_norm": 0.675471842288971, "learning_rate": 0.0001, "loss": 1.3616, "step": 15180 }, { "epoch": 1.7438401010855207, "grad_norm": 0.6008798480033875, "learning_rate": 0.0001, "loss": 1.4139, "step": 15181 }, { "epoch": 1.7439549709953477, "grad_norm": 0.6464613080024719, "learning_rate": 0.0001, "loss": 1.3417, "step": 15182 }, { "epoch": 1.744069840905175, "grad_norm": 0.6285551190376282, "learning_rate": 0.0001, "loss": 1.5489, "step": 15183 }, { "epoch": 1.7441847108150021, "grad_norm": 0.6213048100471497, "learning_rate": 0.0001, "loss": 1.3166, "step": 15184 }, { "epoch": 1.7442995807248292, "grad_norm": 0.6194262504577637, "learning_rate": 0.0001, "loss": 1.5114, "step": 15185 }, { "epoch": 1.7444144506346562, "grad_norm": 0.6530826687812805, "learning_rate": 0.0001, "loss": 1.563, "step": 15186 }, { "epoch": 1.7445293205444834, "grad_norm": 0.6780787110328674, "learning_rate": 0.0001, "loss": 1.2973, "step": 15187 }, { "epoch": 1.7446441904543106, "grad_norm": 0.7618767023086548, "learning_rate": 0.0001, "loss": 1.5906, "step": 15188 }, { "epoch": 1.7447590603641376, "grad_norm": 0.6271486282348633, "learning_rate": 0.0001, "loss": 1.5238, "step": 15189 }, { "epoch": 1.7448739302739646, "grad_norm": 0.5610907077789307, "learning_rate": 0.0001, "loss": 1.2431, "step": 15190 }, { "epoch": 1.7449888001837919, "grad_norm": 0.6444764733314514, "learning_rate": 0.0001, "loss": 1.3007, "step": 15191 }, { "epoch": 1.745103670093619, "grad_norm": 0.653785765171051, "learning_rate": 0.0001, "loss": 1.7039, "step": 15192 }, { "epoch": 1.7452185400034461, "grad_norm": 0.6614614129066467, "learning_rate": 0.0001, "loss": 1.5011, "step": 15193 }, { "epoch": 1.7453334099132731, "grad_norm": 0.6090264320373535, "learning_rate": 0.0001, "loss": 1.4303, "step": 15194 }, { "epoch": 1.7454482798231004, "grad_norm": 0.5970935821533203, "learning_rate": 0.0001, "loss": 1.4341, "step": 15195 }, { "epoch": 1.7455631497329276, "grad_norm": 0.6474558711051941, "learning_rate": 0.0001, "loss": 1.5577, "step": 15196 }, { "epoch": 1.7456780196427546, "grad_norm": 0.6696446537971497, "learning_rate": 0.0001, "loss": 1.3981, "step": 15197 }, { "epoch": 1.7457928895525816, "grad_norm": 0.6368044018745422, "learning_rate": 0.0001, "loss": 1.431, "step": 15198 }, { "epoch": 1.7459077594624088, "grad_norm": 0.5889632105827332, "learning_rate": 0.0001, "loss": 1.3663, "step": 15199 }, { "epoch": 1.746022629372236, "grad_norm": 0.6324240565299988, "learning_rate": 0.0001, "loss": 1.4025, "step": 15200 }, { "epoch": 1.746137499282063, "grad_norm": 0.664838969707489, "learning_rate": 0.0001, "loss": 1.5643, "step": 15201 }, { "epoch": 1.74625236919189, "grad_norm": 0.609592616558075, "learning_rate": 0.0001, "loss": 1.4405, "step": 15202 }, { "epoch": 1.7463672391017173, "grad_norm": 0.64817214012146, "learning_rate": 0.0001, "loss": 1.5301, "step": 15203 }, { "epoch": 1.7464821090115445, "grad_norm": 0.5915722846984863, "learning_rate": 0.0001, "loss": 1.5131, "step": 15204 }, { "epoch": 1.7465969789213716, "grad_norm": 0.6245658993721008, "learning_rate": 0.0001, "loss": 1.4266, "step": 15205 }, { "epoch": 1.7467118488311986, "grad_norm": 0.5552368760108948, "learning_rate": 0.0001, "loss": 1.3063, "step": 15206 }, { "epoch": 1.7468267187410258, "grad_norm": 0.6671764254570007, "learning_rate": 0.0001, "loss": 1.455, "step": 15207 }, { "epoch": 1.746941588650853, "grad_norm": 0.6359453201293945, "learning_rate": 0.0001, "loss": 1.5014, "step": 15208 }, { "epoch": 1.74705645856068, "grad_norm": 0.6473332643508911, "learning_rate": 0.0001, "loss": 1.4903, "step": 15209 }, { "epoch": 1.747171328470507, "grad_norm": 0.609463095664978, "learning_rate": 0.0001, "loss": 1.4419, "step": 15210 }, { "epoch": 1.7472861983803343, "grad_norm": 0.7541261911392212, "learning_rate": 0.0001, "loss": 1.4196, "step": 15211 }, { "epoch": 1.7474010682901615, "grad_norm": 0.6469170451164246, "learning_rate": 0.0001, "loss": 1.4974, "step": 15212 }, { "epoch": 1.7475159381999885, "grad_norm": 0.6460253000259399, "learning_rate": 0.0001, "loss": 1.4009, "step": 15213 }, { "epoch": 1.7476308081098155, "grad_norm": 0.6805127859115601, "learning_rate": 0.0001, "loss": 1.4109, "step": 15214 }, { "epoch": 1.7477456780196428, "grad_norm": 0.6314157843589783, "learning_rate": 0.0001, "loss": 1.3916, "step": 15215 }, { "epoch": 1.74786054792947, "grad_norm": 0.5894502997398376, "learning_rate": 0.0001, "loss": 1.2586, "step": 15216 }, { "epoch": 1.747975417839297, "grad_norm": 0.6559074521064758, "learning_rate": 0.0001, "loss": 1.6868, "step": 15217 }, { "epoch": 1.748090287749124, "grad_norm": 0.624843955039978, "learning_rate": 0.0001, "loss": 1.3526, "step": 15218 }, { "epoch": 1.7482051576589512, "grad_norm": 0.6877935528755188, "learning_rate": 0.0001, "loss": 1.4833, "step": 15219 }, { "epoch": 1.7483200275687785, "grad_norm": 0.6488064527511597, "learning_rate": 0.0001, "loss": 1.5458, "step": 15220 }, { "epoch": 1.7484348974786055, "grad_norm": 0.7320221066474915, "learning_rate": 0.0001, "loss": 1.6404, "step": 15221 }, { "epoch": 1.7485497673884325, "grad_norm": 0.6499512195587158, "learning_rate": 0.0001, "loss": 1.4216, "step": 15222 }, { "epoch": 1.7486646372982597, "grad_norm": 0.6216921806335449, "learning_rate": 0.0001, "loss": 1.4012, "step": 15223 }, { "epoch": 1.748779507208087, "grad_norm": 0.6483284831047058, "learning_rate": 0.0001, "loss": 1.5375, "step": 15224 }, { "epoch": 1.748894377117914, "grad_norm": 0.6150051951408386, "learning_rate": 0.0001, "loss": 1.5839, "step": 15225 }, { "epoch": 1.749009247027741, "grad_norm": 0.5937765836715698, "learning_rate": 0.0001, "loss": 1.4394, "step": 15226 }, { "epoch": 1.7491241169375682, "grad_norm": 0.5825061202049255, "learning_rate": 0.0001, "loss": 1.3277, "step": 15227 }, { "epoch": 1.7492389868473954, "grad_norm": 0.7533363103866577, "learning_rate": 0.0001, "loss": 1.7251, "step": 15228 }, { "epoch": 1.7493538567572224, "grad_norm": 0.6395822167396545, "learning_rate": 0.0001, "loss": 1.444, "step": 15229 }, { "epoch": 1.7494687266670494, "grad_norm": 0.5838786959648132, "learning_rate": 0.0001, "loss": 1.5254, "step": 15230 }, { "epoch": 1.7495835965768767, "grad_norm": 0.6399153470993042, "learning_rate": 0.0001, "loss": 1.2255, "step": 15231 }, { "epoch": 1.749698466486704, "grad_norm": 0.667523205280304, "learning_rate": 0.0001, "loss": 1.4569, "step": 15232 }, { "epoch": 1.749813336396531, "grad_norm": 0.6431367993354797, "learning_rate": 0.0001, "loss": 1.5047, "step": 15233 }, { "epoch": 1.749928206306358, "grad_norm": 0.6183831095695496, "learning_rate": 0.0001, "loss": 1.3978, "step": 15234 }, { "epoch": 1.7500430762161852, "grad_norm": 0.655053436756134, "learning_rate": 0.0001, "loss": 1.4848, "step": 15235 }, { "epoch": 1.7501579461260124, "grad_norm": 0.7972126603126526, "learning_rate": 0.0001, "loss": 1.5805, "step": 15236 }, { "epoch": 1.7502728160358394, "grad_norm": 0.6024256944656372, "learning_rate": 0.0001, "loss": 1.4008, "step": 15237 }, { "epoch": 1.7503876859456664, "grad_norm": 0.5974478125572205, "learning_rate": 0.0001, "loss": 1.453, "step": 15238 }, { "epoch": 1.7505025558554936, "grad_norm": 0.7210254669189453, "learning_rate": 0.0001, "loss": 1.6979, "step": 15239 }, { "epoch": 1.7506174257653209, "grad_norm": 0.6723774075508118, "learning_rate": 0.0001, "loss": 1.4211, "step": 15240 }, { "epoch": 1.7507322956751479, "grad_norm": 0.6472156047821045, "learning_rate": 0.0001, "loss": 1.5018, "step": 15241 }, { "epoch": 1.7508471655849749, "grad_norm": 0.5895910859107971, "learning_rate": 0.0001, "loss": 1.2851, "step": 15242 }, { "epoch": 1.7509620354948021, "grad_norm": 0.6794853210449219, "learning_rate": 0.0001, "loss": 1.3592, "step": 15243 }, { "epoch": 1.7510769054046293, "grad_norm": 0.6932422518730164, "learning_rate": 0.0001, "loss": 1.6878, "step": 15244 }, { "epoch": 1.7511917753144564, "grad_norm": 0.6949918866157532, "learning_rate": 0.0001, "loss": 1.581, "step": 15245 }, { "epoch": 1.7513066452242834, "grad_norm": 0.6456189751625061, "learning_rate": 0.0001, "loss": 1.4974, "step": 15246 }, { "epoch": 1.7514215151341106, "grad_norm": 0.6524791121482849, "learning_rate": 0.0001, "loss": 1.5143, "step": 15247 }, { "epoch": 1.7515363850439378, "grad_norm": 0.5790715217590332, "learning_rate": 0.0001, "loss": 1.5119, "step": 15248 }, { "epoch": 1.7516512549537648, "grad_norm": 0.5909721255302429, "learning_rate": 0.0001, "loss": 1.3346, "step": 15249 }, { "epoch": 1.7517661248635918, "grad_norm": 0.5983031988143921, "learning_rate": 0.0001, "loss": 1.3218, "step": 15250 }, { "epoch": 1.751880994773419, "grad_norm": 0.5888960361480713, "learning_rate": 0.0001, "loss": 1.4113, "step": 15251 }, { "epoch": 1.7519958646832463, "grad_norm": 0.6211053729057312, "learning_rate": 0.0001, "loss": 1.4408, "step": 15252 }, { "epoch": 1.7521107345930733, "grad_norm": 0.6472274661064148, "learning_rate": 0.0001, "loss": 1.6397, "step": 15253 }, { "epoch": 1.7522256045029003, "grad_norm": 0.5877479314804077, "learning_rate": 0.0001, "loss": 1.3772, "step": 15254 }, { "epoch": 1.7523404744127276, "grad_norm": 0.6007200479507446, "learning_rate": 0.0001, "loss": 1.3258, "step": 15255 }, { "epoch": 1.7524553443225548, "grad_norm": 0.6134718656539917, "learning_rate": 0.0001, "loss": 1.3475, "step": 15256 }, { "epoch": 1.7525702142323818, "grad_norm": 0.6493116021156311, "learning_rate": 0.0001, "loss": 1.5139, "step": 15257 }, { "epoch": 1.7526850841422088, "grad_norm": 0.6316721439361572, "learning_rate": 0.0001, "loss": 1.5339, "step": 15258 }, { "epoch": 1.752799954052036, "grad_norm": 0.63750159740448, "learning_rate": 0.0001, "loss": 1.4438, "step": 15259 }, { "epoch": 1.7529148239618633, "grad_norm": 0.6934375762939453, "learning_rate": 0.0001, "loss": 1.5165, "step": 15260 }, { "epoch": 1.7530296938716903, "grad_norm": 0.6297123432159424, "learning_rate": 0.0001, "loss": 1.4865, "step": 15261 }, { "epoch": 1.7531445637815173, "grad_norm": 0.582813560962677, "learning_rate": 0.0001, "loss": 1.1494, "step": 15262 }, { "epoch": 1.7532594336913445, "grad_norm": 0.6479847431182861, "learning_rate": 0.0001, "loss": 1.3812, "step": 15263 }, { "epoch": 1.7533743036011717, "grad_norm": 0.6570015549659729, "learning_rate": 0.0001, "loss": 1.5119, "step": 15264 }, { "epoch": 1.7534891735109988, "grad_norm": 0.6873630285263062, "learning_rate": 0.0001, "loss": 1.2798, "step": 15265 }, { "epoch": 1.7536040434208258, "grad_norm": 0.6358373165130615, "learning_rate": 0.0001, "loss": 1.4256, "step": 15266 }, { "epoch": 1.753718913330653, "grad_norm": 0.5748741626739502, "learning_rate": 0.0001, "loss": 1.3127, "step": 15267 }, { "epoch": 1.7538337832404802, "grad_norm": 0.6858731508255005, "learning_rate": 0.0001, "loss": 1.4542, "step": 15268 }, { "epoch": 1.7539486531503072, "grad_norm": 0.5939129590988159, "learning_rate": 0.0001, "loss": 1.266, "step": 15269 }, { "epoch": 1.7540635230601342, "grad_norm": 0.65522301197052, "learning_rate": 0.0001, "loss": 1.3433, "step": 15270 }, { "epoch": 1.7541783929699615, "grad_norm": 0.6629993915557861, "learning_rate": 0.0001, "loss": 1.4215, "step": 15271 }, { "epoch": 1.7542932628797887, "grad_norm": 0.6635041236877441, "learning_rate": 0.0001, "loss": 1.5512, "step": 15272 }, { "epoch": 1.7544081327896157, "grad_norm": 0.6313446164131165, "learning_rate": 0.0001, "loss": 1.6941, "step": 15273 }, { "epoch": 1.7545230026994427, "grad_norm": 0.6612875461578369, "learning_rate": 0.0001, "loss": 1.3453, "step": 15274 }, { "epoch": 1.75463787260927, "grad_norm": 0.6233786940574646, "learning_rate": 0.0001, "loss": 1.4155, "step": 15275 }, { "epoch": 1.7547527425190972, "grad_norm": 0.688138484954834, "learning_rate": 0.0001, "loss": 1.4463, "step": 15276 }, { "epoch": 1.7548676124289242, "grad_norm": 0.5722640156745911, "learning_rate": 0.0001, "loss": 1.173, "step": 15277 }, { "epoch": 1.7549824823387512, "grad_norm": 0.6150445342063904, "learning_rate": 0.0001, "loss": 1.4503, "step": 15278 }, { "epoch": 1.7550973522485784, "grad_norm": 0.6374548673629761, "learning_rate": 0.0001, "loss": 1.4994, "step": 15279 }, { "epoch": 1.7552122221584057, "grad_norm": 0.5925512313842773, "learning_rate": 0.0001, "loss": 1.3682, "step": 15280 }, { "epoch": 1.7553270920682327, "grad_norm": 0.6822425723075867, "learning_rate": 0.0001, "loss": 1.5626, "step": 15281 }, { "epoch": 1.7554419619780597, "grad_norm": 0.6651631593704224, "learning_rate": 0.0001, "loss": 1.7616, "step": 15282 }, { "epoch": 1.755556831887887, "grad_norm": 0.6492800712585449, "learning_rate": 0.0001, "loss": 1.4807, "step": 15283 }, { "epoch": 1.7556717017977141, "grad_norm": 0.5847377777099609, "learning_rate": 0.0001, "loss": 1.4534, "step": 15284 }, { "epoch": 1.7557865717075412, "grad_norm": 0.6187418699264526, "learning_rate": 0.0001, "loss": 1.3685, "step": 15285 }, { "epoch": 1.7559014416173682, "grad_norm": 0.5958352088928223, "learning_rate": 0.0001, "loss": 1.3842, "step": 15286 }, { "epoch": 1.7560163115271954, "grad_norm": 0.6693896055221558, "learning_rate": 0.0001, "loss": 1.4296, "step": 15287 }, { "epoch": 1.7561311814370226, "grad_norm": 0.6221320033073425, "learning_rate": 0.0001, "loss": 1.3786, "step": 15288 }, { "epoch": 1.7562460513468499, "grad_norm": 0.6341426968574524, "learning_rate": 0.0001, "loss": 1.5418, "step": 15289 }, { "epoch": 1.7563609212566769, "grad_norm": 0.5881104469299316, "learning_rate": 0.0001, "loss": 1.4077, "step": 15290 }, { "epoch": 1.7564757911665039, "grad_norm": 0.6449071764945984, "learning_rate": 0.0001, "loss": 1.5997, "step": 15291 }, { "epoch": 1.756590661076331, "grad_norm": 0.6435044407844543, "learning_rate": 0.0001, "loss": 1.486, "step": 15292 }, { "epoch": 1.7567055309861583, "grad_norm": 0.5955443978309631, "learning_rate": 0.0001, "loss": 1.4916, "step": 15293 }, { "epoch": 1.7568204008959853, "grad_norm": 0.6793741583824158, "learning_rate": 0.0001, "loss": 1.5893, "step": 15294 }, { "epoch": 1.7569352708058124, "grad_norm": 0.6085398197174072, "learning_rate": 0.0001, "loss": 1.4329, "step": 15295 }, { "epoch": 1.7570501407156396, "grad_norm": 0.6038925051689148, "learning_rate": 0.0001, "loss": 1.3851, "step": 15296 }, { "epoch": 1.7571650106254668, "grad_norm": 0.6389901041984558, "learning_rate": 0.0001, "loss": 1.4429, "step": 15297 }, { "epoch": 1.7572798805352938, "grad_norm": 0.5799313187599182, "learning_rate": 0.0001, "loss": 1.5151, "step": 15298 }, { "epoch": 1.7573947504451208, "grad_norm": 0.6270938515663147, "learning_rate": 0.0001, "loss": 1.4155, "step": 15299 }, { "epoch": 1.757509620354948, "grad_norm": 0.5713139176368713, "learning_rate": 0.0001, "loss": 1.4485, "step": 15300 }, { "epoch": 1.7576244902647753, "grad_norm": 0.5795127153396606, "learning_rate": 0.0001, "loss": 1.2822, "step": 15301 }, { "epoch": 1.7577393601746023, "grad_norm": 0.5726675391197205, "learning_rate": 0.0001, "loss": 1.0326, "step": 15302 }, { "epoch": 1.7578542300844293, "grad_norm": 0.6082470417022705, "learning_rate": 0.0001, "loss": 1.4728, "step": 15303 }, { "epoch": 1.7579690999942565, "grad_norm": 0.6625812649726868, "learning_rate": 0.0001, "loss": 1.4903, "step": 15304 }, { "epoch": 1.7580839699040838, "grad_norm": 0.6669724583625793, "learning_rate": 0.0001, "loss": 1.5908, "step": 15305 }, { "epoch": 1.7581988398139108, "grad_norm": 0.6124973893165588, "learning_rate": 0.0001, "loss": 1.5021, "step": 15306 }, { "epoch": 1.7583137097237378, "grad_norm": 0.6731737852096558, "learning_rate": 0.0001, "loss": 1.5494, "step": 15307 }, { "epoch": 1.758428579633565, "grad_norm": 0.597639799118042, "learning_rate": 0.0001, "loss": 1.2609, "step": 15308 }, { "epoch": 1.7585434495433923, "grad_norm": 0.6299818158149719, "learning_rate": 0.0001, "loss": 1.7241, "step": 15309 }, { "epoch": 1.7586583194532193, "grad_norm": 0.578934371471405, "learning_rate": 0.0001, "loss": 1.5805, "step": 15310 }, { "epoch": 1.7587731893630463, "grad_norm": 0.6151703596115112, "learning_rate": 0.0001, "loss": 1.3717, "step": 15311 }, { "epoch": 1.7588880592728735, "grad_norm": 0.6442793011665344, "learning_rate": 0.0001, "loss": 1.4778, "step": 15312 }, { "epoch": 1.7590029291827007, "grad_norm": 0.6389909982681274, "learning_rate": 0.0001, "loss": 1.4665, "step": 15313 }, { "epoch": 1.7591177990925277, "grad_norm": 0.5759516954421997, "learning_rate": 0.0001, "loss": 1.354, "step": 15314 }, { "epoch": 1.7592326690023548, "grad_norm": 0.6577784419059753, "learning_rate": 0.0001, "loss": 1.5927, "step": 15315 }, { "epoch": 1.759347538912182, "grad_norm": 0.6161843538284302, "learning_rate": 0.0001, "loss": 1.3574, "step": 15316 }, { "epoch": 1.7594624088220092, "grad_norm": 0.6002083420753479, "learning_rate": 0.0001, "loss": 1.51, "step": 15317 }, { "epoch": 1.7595772787318362, "grad_norm": 0.6231681704521179, "learning_rate": 0.0001, "loss": 1.2959, "step": 15318 }, { "epoch": 1.7596921486416632, "grad_norm": 0.6722519397735596, "learning_rate": 0.0001, "loss": 1.3182, "step": 15319 }, { "epoch": 1.7598070185514905, "grad_norm": 0.6488606929779053, "learning_rate": 0.0001, "loss": 1.249, "step": 15320 }, { "epoch": 1.7599218884613177, "grad_norm": 0.6526792049407959, "learning_rate": 0.0001, "loss": 1.389, "step": 15321 }, { "epoch": 1.7600367583711447, "grad_norm": 0.8293418884277344, "learning_rate": 0.0001, "loss": 1.5196, "step": 15322 }, { "epoch": 1.7601516282809717, "grad_norm": 0.6284173130989075, "learning_rate": 0.0001, "loss": 1.4126, "step": 15323 }, { "epoch": 1.760266498190799, "grad_norm": 0.6085017919540405, "learning_rate": 0.0001, "loss": 1.5337, "step": 15324 }, { "epoch": 1.7603813681006262, "grad_norm": 0.6490882039070129, "learning_rate": 0.0001, "loss": 1.5043, "step": 15325 }, { "epoch": 1.7604962380104532, "grad_norm": 0.6944969296455383, "learning_rate": 0.0001, "loss": 1.4436, "step": 15326 }, { "epoch": 1.7606111079202802, "grad_norm": 0.5754250288009644, "learning_rate": 0.0001, "loss": 1.4301, "step": 15327 }, { "epoch": 1.7607259778301074, "grad_norm": 0.5691632628440857, "learning_rate": 0.0001, "loss": 1.2594, "step": 15328 }, { "epoch": 1.7608408477399347, "grad_norm": 0.6530894041061401, "learning_rate": 0.0001, "loss": 1.7345, "step": 15329 }, { "epoch": 1.7609557176497617, "grad_norm": 0.5945919156074524, "learning_rate": 0.0001, "loss": 1.5005, "step": 15330 }, { "epoch": 1.7610705875595887, "grad_norm": 0.6224496364593506, "learning_rate": 0.0001, "loss": 1.371, "step": 15331 }, { "epoch": 1.761185457469416, "grad_norm": 0.5837387442588806, "learning_rate": 0.0001, "loss": 1.2299, "step": 15332 }, { "epoch": 1.7613003273792431, "grad_norm": 0.6480647325515747, "learning_rate": 0.0001, "loss": 1.4484, "step": 15333 }, { "epoch": 1.7614151972890701, "grad_norm": 0.655102014541626, "learning_rate": 0.0001, "loss": 1.6464, "step": 15334 }, { "epoch": 1.7615300671988972, "grad_norm": 0.624885618686676, "learning_rate": 0.0001, "loss": 1.504, "step": 15335 }, { "epoch": 1.7616449371087244, "grad_norm": 0.7076594233512878, "learning_rate": 0.0001, "loss": 1.2598, "step": 15336 }, { "epoch": 1.7617598070185516, "grad_norm": 0.6876631379127502, "learning_rate": 0.0001, "loss": 1.6424, "step": 15337 }, { "epoch": 1.7618746769283786, "grad_norm": 0.642560601234436, "learning_rate": 0.0001, "loss": 1.474, "step": 15338 }, { "epoch": 1.7619895468382056, "grad_norm": 0.6945380568504333, "learning_rate": 0.0001, "loss": 1.4465, "step": 15339 }, { "epoch": 1.7621044167480329, "grad_norm": 0.6342966556549072, "learning_rate": 0.0001, "loss": 1.5273, "step": 15340 }, { "epoch": 1.76221928665786, "grad_norm": 0.6334778666496277, "learning_rate": 0.0001, "loss": 1.4535, "step": 15341 }, { "epoch": 1.762334156567687, "grad_norm": 0.5924774408340454, "learning_rate": 0.0001, "loss": 1.4886, "step": 15342 }, { "epoch": 1.7624490264775141, "grad_norm": 0.6480969190597534, "learning_rate": 0.0001, "loss": 1.3745, "step": 15343 }, { "epoch": 1.7625638963873413, "grad_norm": 0.635136067867279, "learning_rate": 0.0001, "loss": 1.4212, "step": 15344 }, { "epoch": 1.7626787662971686, "grad_norm": 0.6227051615715027, "learning_rate": 0.0001, "loss": 1.6379, "step": 15345 }, { "epoch": 1.7627936362069956, "grad_norm": 0.5813191533088684, "learning_rate": 0.0001, "loss": 1.514, "step": 15346 }, { "epoch": 1.7629085061168226, "grad_norm": 0.7114011645317078, "learning_rate": 0.0001, "loss": 1.4692, "step": 15347 }, { "epoch": 1.7630233760266498, "grad_norm": 0.6176375150680542, "learning_rate": 0.0001, "loss": 1.4317, "step": 15348 }, { "epoch": 1.763138245936477, "grad_norm": 0.7010447382926941, "learning_rate": 0.0001, "loss": 1.6766, "step": 15349 }, { "epoch": 1.763253115846304, "grad_norm": 0.62742680311203, "learning_rate": 0.0001, "loss": 1.3897, "step": 15350 }, { "epoch": 1.763367985756131, "grad_norm": 0.6509758830070496, "learning_rate": 0.0001, "loss": 1.5885, "step": 15351 }, { "epoch": 1.7634828556659583, "grad_norm": 0.6291651725769043, "learning_rate": 0.0001, "loss": 1.1953, "step": 15352 }, { "epoch": 1.7635977255757855, "grad_norm": 0.6242035031318665, "learning_rate": 0.0001, "loss": 1.4124, "step": 15353 }, { "epoch": 1.7637125954856125, "grad_norm": 0.5866750478744507, "learning_rate": 0.0001, "loss": 1.5409, "step": 15354 }, { "epoch": 1.7638274653954396, "grad_norm": 0.6242676973342896, "learning_rate": 0.0001, "loss": 1.4799, "step": 15355 }, { "epoch": 1.7639423353052668, "grad_norm": 0.6475937366485596, "learning_rate": 0.0001, "loss": 1.3933, "step": 15356 }, { "epoch": 1.764057205215094, "grad_norm": 0.6821995377540588, "learning_rate": 0.0001, "loss": 1.6972, "step": 15357 }, { "epoch": 1.764172075124921, "grad_norm": 0.6403782963752747, "learning_rate": 0.0001, "loss": 1.4328, "step": 15358 }, { "epoch": 1.764286945034748, "grad_norm": 0.5568740963935852, "learning_rate": 0.0001, "loss": 1.3501, "step": 15359 }, { "epoch": 1.7644018149445753, "grad_norm": 0.7482900619506836, "learning_rate": 0.0001, "loss": 1.7237, "step": 15360 }, { "epoch": 1.7645166848544025, "grad_norm": 0.7028294801712036, "learning_rate": 0.0001, "loss": 1.4546, "step": 15361 }, { "epoch": 1.7646315547642295, "grad_norm": 0.6432153582572937, "learning_rate": 0.0001, "loss": 1.4991, "step": 15362 }, { "epoch": 1.7647464246740565, "grad_norm": 0.5955196619033813, "learning_rate": 0.0001, "loss": 1.4428, "step": 15363 }, { "epoch": 1.7648612945838837, "grad_norm": 0.6378939747810364, "learning_rate": 0.0001, "loss": 1.5622, "step": 15364 }, { "epoch": 1.764976164493711, "grad_norm": 0.6353983283042908, "learning_rate": 0.0001, "loss": 1.4364, "step": 15365 }, { "epoch": 1.765091034403538, "grad_norm": 0.6289456486701965, "learning_rate": 0.0001, "loss": 1.5238, "step": 15366 }, { "epoch": 1.765205904313365, "grad_norm": 0.6229878664016724, "learning_rate": 0.0001, "loss": 1.2323, "step": 15367 }, { "epoch": 1.7653207742231922, "grad_norm": 0.6140499114990234, "learning_rate": 0.0001, "loss": 1.593, "step": 15368 }, { "epoch": 1.7654356441330195, "grad_norm": 0.6184989213943481, "learning_rate": 0.0001, "loss": 1.3525, "step": 15369 }, { "epoch": 1.7655505140428465, "grad_norm": 0.6055330634117126, "learning_rate": 0.0001, "loss": 1.2379, "step": 15370 }, { "epoch": 1.7656653839526735, "grad_norm": 0.6226028203964233, "learning_rate": 0.0001, "loss": 1.511, "step": 15371 }, { "epoch": 1.7657802538625007, "grad_norm": 0.6356282830238342, "learning_rate": 0.0001, "loss": 1.281, "step": 15372 }, { "epoch": 1.765895123772328, "grad_norm": 0.5963003635406494, "learning_rate": 0.0001, "loss": 1.3596, "step": 15373 }, { "epoch": 1.766009993682155, "grad_norm": 0.598003625869751, "learning_rate": 0.0001, "loss": 1.4494, "step": 15374 }, { "epoch": 1.766124863591982, "grad_norm": 0.6972695589065552, "learning_rate": 0.0001, "loss": 1.2956, "step": 15375 }, { "epoch": 1.7662397335018092, "grad_norm": 0.6252386569976807, "learning_rate": 0.0001, "loss": 1.4696, "step": 15376 }, { "epoch": 1.7663546034116364, "grad_norm": 0.7368474006652832, "learning_rate": 0.0001, "loss": 1.6705, "step": 15377 }, { "epoch": 1.7664694733214634, "grad_norm": 0.7342138886451721, "learning_rate": 0.0001, "loss": 1.4238, "step": 15378 }, { "epoch": 1.7665843432312904, "grad_norm": 0.5836483836174011, "learning_rate": 0.0001, "loss": 1.3567, "step": 15379 }, { "epoch": 1.7666992131411177, "grad_norm": 0.6157330274581909, "learning_rate": 0.0001, "loss": 1.4845, "step": 15380 }, { "epoch": 1.766814083050945, "grad_norm": 0.630933940410614, "learning_rate": 0.0001, "loss": 1.5091, "step": 15381 }, { "epoch": 1.766928952960772, "grad_norm": 0.6310692429542542, "learning_rate": 0.0001, "loss": 1.4669, "step": 15382 }, { "epoch": 1.767043822870599, "grad_norm": 0.6385482549667358, "learning_rate": 0.0001, "loss": 1.2872, "step": 15383 }, { "epoch": 1.7671586927804261, "grad_norm": 0.6223406195640564, "learning_rate": 0.0001, "loss": 1.2195, "step": 15384 }, { "epoch": 1.7672735626902534, "grad_norm": 0.5795210003852844, "learning_rate": 0.0001, "loss": 1.3071, "step": 15385 }, { "epoch": 1.7673884326000804, "grad_norm": 0.6244056224822998, "learning_rate": 0.0001, "loss": 1.3275, "step": 15386 }, { "epoch": 1.7675033025099074, "grad_norm": 0.7372899055480957, "learning_rate": 0.0001, "loss": 1.6583, "step": 15387 }, { "epoch": 1.7676181724197346, "grad_norm": 0.6304470300674438, "learning_rate": 0.0001, "loss": 1.3243, "step": 15388 }, { "epoch": 1.7677330423295619, "grad_norm": 0.6503053307533264, "learning_rate": 0.0001, "loss": 1.3891, "step": 15389 }, { "epoch": 1.7678479122393889, "grad_norm": 0.6643422245979309, "learning_rate": 0.0001, "loss": 1.5425, "step": 15390 }, { "epoch": 1.7679627821492159, "grad_norm": 0.6259305477142334, "learning_rate": 0.0001, "loss": 1.3976, "step": 15391 }, { "epoch": 1.768077652059043, "grad_norm": 0.6734678745269775, "learning_rate": 0.0001, "loss": 1.4114, "step": 15392 }, { "epoch": 1.7681925219688703, "grad_norm": 0.6242077946662903, "learning_rate": 0.0001, "loss": 1.4955, "step": 15393 }, { "epoch": 1.7683073918786973, "grad_norm": 0.6714116930961609, "learning_rate": 0.0001, "loss": 1.3507, "step": 15394 }, { "epoch": 1.7684222617885244, "grad_norm": 0.5949355363845825, "learning_rate": 0.0001, "loss": 1.4515, "step": 15395 }, { "epoch": 1.7685371316983516, "grad_norm": 0.5709308981895447, "learning_rate": 0.0001, "loss": 1.3621, "step": 15396 }, { "epoch": 1.7686520016081788, "grad_norm": 0.6260430812835693, "learning_rate": 0.0001, "loss": 1.4699, "step": 15397 }, { "epoch": 1.7687668715180058, "grad_norm": 0.6341519355773926, "learning_rate": 0.0001, "loss": 1.6458, "step": 15398 }, { "epoch": 1.7688817414278328, "grad_norm": 0.6372659802436829, "learning_rate": 0.0001, "loss": 1.4443, "step": 15399 }, { "epoch": 1.76899661133766, "grad_norm": 0.5935454964637756, "learning_rate": 0.0001, "loss": 1.2865, "step": 15400 }, { "epoch": 1.7691114812474873, "grad_norm": 0.6392391324043274, "learning_rate": 0.0001, "loss": 1.5012, "step": 15401 }, { "epoch": 1.7692263511573143, "grad_norm": 0.6767097115516663, "learning_rate": 0.0001, "loss": 1.5747, "step": 15402 }, { "epoch": 1.7693412210671413, "grad_norm": 0.575113832950592, "learning_rate": 0.0001, "loss": 1.4338, "step": 15403 }, { "epoch": 1.7694560909769685, "grad_norm": 0.6148155927658081, "learning_rate": 0.0001, "loss": 1.4446, "step": 15404 }, { "epoch": 1.7695709608867958, "grad_norm": 0.6530346274375916, "learning_rate": 0.0001, "loss": 1.5008, "step": 15405 }, { "epoch": 1.7696858307966228, "grad_norm": 0.624439001083374, "learning_rate": 0.0001, "loss": 1.4565, "step": 15406 }, { "epoch": 1.7698007007064498, "grad_norm": 0.633822500705719, "learning_rate": 0.0001, "loss": 1.5872, "step": 15407 }, { "epoch": 1.769915570616277, "grad_norm": 0.6398473381996155, "learning_rate": 0.0001, "loss": 1.3496, "step": 15408 }, { "epoch": 1.7700304405261043, "grad_norm": 0.6082612872123718, "learning_rate": 0.0001, "loss": 1.2266, "step": 15409 }, { "epoch": 1.7701453104359313, "grad_norm": 0.6723073124885559, "learning_rate": 0.0001, "loss": 1.3755, "step": 15410 }, { "epoch": 1.7702601803457583, "grad_norm": 0.6552845239639282, "learning_rate": 0.0001, "loss": 1.5456, "step": 15411 }, { "epoch": 1.7703750502555855, "grad_norm": 0.682786226272583, "learning_rate": 0.0001, "loss": 1.4209, "step": 15412 }, { "epoch": 1.7704899201654127, "grad_norm": 0.6715489625930786, "learning_rate": 0.0001, "loss": 1.5819, "step": 15413 }, { "epoch": 1.7706047900752397, "grad_norm": 0.6266937851905823, "learning_rate": 0.0001, "loss": 1.4389, "step": 15414 }, { "epoch": 1.7707196599850668, "grad_norm": 0.6482228636741638, "learning_rate": 0.0001, "loss": 1.4746, "step": 15415 }, { "epoch": 1.770834529894894, "grad_norm": 0.6691463589668274, "learning_rate": 0.0001, "loss": 1.6347, "step": 15416 }, { "epoch": 1.7709493998047212, "grad_norm": 0.6793537735939026, "learning_rate": 0.0001, "loss": 1.6443, "step": 15417 }, { "epoch": 1.7710642697145482, "grad_norm": 0.604053258895874, "learning_rate": 0.0001, "loss": 1.4415, "step": 15418 }, { "epoch": 1.7711791396243752, "grad_norm": 0.7533746361732483, "learning_rate": 0.0001, "loss": 1.586, "step": 15419 }, { "epoch": 1.7712940095342025, "grad_norm": 0.6014366745948792, "learning_rate": 0.0001, "loss": 1.37, "step": 15420 }, { "epoch": 1.7714088794440297, "grad_norm": 0.6073466539382935, "learning_rate": 0.0001, "loss": 1.4946, "step": 15421 }, { "epoch": 1.7715237493538567, "grad_norm": 0.6218917965888977, "learning_rate": 0.0001, "loss": 1.5332, "step": 15422 }, { "epoch": 1.7716386192636837, "grad_norm": 0.6328446865081787, "learning_rate": 0.0001, "loss": 1.5439, "step": 15423 }, { "epoch": 1.771753489173511, "grad_norm": 0.5783063173294067, "learning_rate": 0.0001, "loss": 1.2782, "step": 15424 }, { "epoch": 1.7718683590833382, "grad_norm": 0.5905358791351318, "learning_rate": 0.0001, "loss": 1.528, "step": 15425 }, { "epoch": 1.7719832289931654, "grad_norm": 0.6171038150787354, "learning_rate": 0.0001, "loss": 1.4173, "step": 15426 }, { "epoch": 1.7720980989029924, "grad_norm": 0.5971978902816772, "learning_rate": 0.0001, "loss": 1.401, "step": 15427 }, { "epoch": 1.7722129688128194, "grad_norm": 0.6217857003211975, "learning_rate": 0.0001, "loss": 1.4225, "step": 15428 }, { "epoch": 1.7723278387226467, "grad_norm": 0.6341108679771423, "learning_rate": 0.0001, "loss": 1.3223, "step": 15429 }, { "epoch": 1.7724427086324739, "grad_norm": 0.5544351935386658, "learning_rate": 0.0001, "loss": 1.4596, "step": 15430 }, { "epoch": 1.772557578542301, "grad_norm": 0.6070437431335449, "learning_rate": 0.0001, "loss": 1.4921, "step": 15431 }, { "epoch": 1.772672448452128, "grad_norm": 0.679899275302887, "learning_rate": 0.0001, "loss": 1.5184, "step": 15432 }, { "epoch": 1.7727873183619551, "grad_norm": 0.6571292877197266, "learning_rate": 0.0001, "loss": 1.3402, "step": 15433 }, { "epoch": 1.7729021882717824, "grad_norm": 0.6752861738204956, "learning_rate": 0.0001, "loss": 1.4057, "step": 15434 }, { "epoch": 1.7730170581816094, "grad_norm": 0.6188561916351318, "learning_rate": 0.0001, "loss": 1.4021, "step": 15435 }, { "epoch": 1.7731319280914364, "grad_norm": 0.645605206489563, "learning_rate": 0.0001, "loss": 1.6572, "step": 15436 }, { "epoch": 1.7732467980012636, "grad_norm": 0.6331257820129395, "learning_rate": 0.0001, "loss": 1.4723, "step": 15437 }, { "epoch": 1.7733616679110908, "grad_norm": 0.6232604384422302, "learning_rate": 0.0001, "loss": 1.6332, "step": 15438 }, { "epoch": 1.7734765378209179, "grad_norm": 0.6299999952316284, "learning_rate": 0.0001, "loss": 1.4565, "step": 15439 }, { "epoch": 1.7735914077307449, "grad_norm": 0.6201632618904114, "learning_rate": 0.0001, "loss": 1.4274, "step": 15440 }, { "epoch": 1.773706277640572, "grad_norm": 0.6291703581809998, "learning_rate": 0.0001, "loss": 1.5339, "step": 15441 }, { "epoch": 1.7738211475503993, "grad_norm": 0.6148052215576172, "learning_rate": 0.0001, "loss": 1.4622, "step": 15442 }, { "epoch": 1.7739360174602263, "grad_norm": 0.6915506720542908, "learning_rate": 0.0001, "loss": 1.4758, "step": 15443 }, { "epoch": 1.7740508873700533, "grad_norm": 0.6690899133682251, "learning_rate": 0.0001, "loss": 1.3532, "step": 15444 }, { "epoch": 1.7741657572798806, "grad_norm": 0.6253287196159363, "learning_rate": 0.0001, "loss": 1.5029, "step": 15445 }, { "epoch": 1.7742806271897078, "grad_norm": 0.66769939661026, "learning_rate": 0.0001, "loss": 1.5278, "step": 15446 }, { "epoch": 1.7743954970995348, "grad_norm": 0.6366974115371704, "learning_rate": 0.0001, "loss": 1.4671, "step": 15447 }, { "epoch": 1.7745103670093618, "grad_norm": 0.6763840913772583, "learning_rate": 0.0001, "loss": 1.575, "step": 15448 }, { "epoch": 1.774625236919189, "grad_norm": 0.6336223483085632, "learning_rate": 0.0001, "loss": 1.5122, "step": 15449 }, { "epoch": 1.7747401068290163, "grad_norm": 0.6298586130142212, "learning_rate": 0.0001, "loss": 1.5169, "step": 15450 }, { "epoch": 1.7748549767388433, "grad_norm": 0.6808599829673767, "learning_rate": 0.0001, "loss": 1.4164, "step": 15451 }, { "epoch": 1.7749698466486703, "grad_norm": 0.638803243637085, "learning_rate": 0.0001, "loss": 1.5666, "step": 15452 }, { "epoch": 1.7750847165584975, "grad_norm": 0.6190632581710815, "learning_rate": 0.0001, "loss": 1.3554, "step": 15453 }, { "epoch": 1.7751995864683248, "grad_norm": 0.6603758335113525, "learning_rate": 0.0001, "loss": 1.5177, "step": 15454 }, { "epoch": 1.7753144563781518, "grad_norm": 0.6279389262199402, "learning_rate": 0.0001, "loss": 1.5894, "step": 15455 }, { "epoch": 1.7754293262879788, "grad_norm": 0.6289408206939697, "learning_rate": 0.0001, "loss": 1.5424, "step": 15456 }, { "epoch": 1.775544196197806, "grad_norm": 0.5870251059532166, "learning_rate": 0.0001, "loss": 1.3102, "step": 15457 }, { "epoch": 1.7756590661076332, "grad_norm": 0.6192272305488586, "learning_rate": 0.0001, "loss": 1.3527, "step": 15458 }, { "epoch": 1.7757739360174603, "grad_norm": 0.626162588596344, "learning_rate": 0.0001, "loss": 1.4285, "step": 15459 }, { "epoch": 1.7758888059272873, "grad_norm": 0.6371445059776306, "learning_rate": 0.0001, "loss": 1.4435, "step": 15460 }, { "epoch": 1.7760036758371145, "grad_norm": 0.7196854948997498, "learning_rate": 0.0001, "loss": 1.5672, "step": 15461 }, { "epoch": 1.7761185457469417, "grad_norm": 0.6295327544212341, "learning_rate": 0.0001, "loss": 1.4168, "step": 15462 }, { "epoch": 1.7762334156567687, "grad_norm": 0.6012369394302368, "learning_rate": 0.0001, "loss": 1.1423, "step": 15463 }, { "epoch": 1.7763482855665957, "grad_norm": 0.6109943985939026, "learning_rate": 0.0001, "loss": 1.7749, "step": 15464 }, { "epoch": 1.776463155476423, "grad_norm": 0.6386968493461609, "learning_rate": 0.0001, "loss": 1.4812, "step": 15465 }, { "epoch": 1.7765780253862502, "grad_norm": 0.6312034130096436, "learning_rate": 0.0001, "loss": 1.3813, "step": 15466 }, { "epoch": 1.7766928952960772, "grad_norm": 0.6818085312843323, "learning_rate": 0.0001, "loss": 1.5537, "step": 15467 }, { "epoch": 1.7768077652059042, "grad_norm": 0.616159975528717, "learning_rate": 0.0001, "loss": 1.3302, "step": 15468 }, { "epoch": 1.7769226351157315, "grad_norm": 0.6063972115516663, "learning_rate": 0.0001, "loss": 1.6571, "step": 15469 }, { "epoch": 1.7770375050255587, "grad_norm": 0.6804771423339844, "learning_rate": 0.0001, "loss": 1.6351, "step": 15470 }, { "epoch": 1.7771523749353857, "grad_norm": 0.6419492959976196, "learning_rate": 0.0001, "loss": 1.6362, "step": 15471 }, { "epoch": 1.7772672448452127, "grad_norm": 0.6010777354240417, "learning_rate": 0.0001, "loss": 1.419, "step": 15472 }, { "epoch": 1.77738211475504, "grad_norm": 0.6770652532577515, "learning_rate": 0.0001, "loss": 1.523, "step": 15473 }, { "epoch": 1.7774969846648672, "grad_norm": 0.7224645614624023, "learning_rate": 0.0001, "loss": 1.4757, "step": 15474 }, { "epoch": 1.7776118545746942, "grad_norm": 0.6119104623794556, "learning_rate": 0.0001, "loss": 1.3286, "step": 15475 }, { "epoch": 1.7777267244845212, "grad_norm": 0.6447218060493469, "learning_rate": 0.0001, "loss": 1.5151, "step": 15476 }, { "epoch": 1.7778415943943484, "grad_norm": 0.6411008238792419, "learning_rate": 0.0001, "loss": 1.5644, "step": 15477 }, { "epoch": 1.7779564643041756, "grad_norm": 0.6330268383026123, "learning_rate": 0.0001, "loss": 1.5924, "step": 15478 }, { "epoch": 1.7780713342140027, "grad_norm": 0.5914243459701538, "learning_rate": 0.0001, "loss": 1.5313, "step": 15479 }, { "epoch": 1.7781862041238297, "grad_norm": 0.6504284739494324, "learning_rate": 0.0001, "loss": 1.662, "step": 15480 }, { "epoch": 1.778301074033657, "grad_norm": 0.6057482361793518, "learning_rate": 0.0001, "loss": 1.3601, "step": 15481 }, { "epoch": 1.7784159439434841, "grad_norm": 0.6217951774597168, "learning_rate": 0.0001, "loss": 1.5181, "step": 15482 }, { "epoch": 1.7785308138533111, "grad_norm": 0.6592935919761658, "learning_rate": 0.0001, "loss": 1.5075, "step": 15483 }, { "epoch": 1.7786456837631381, "grad_norm": 0.6222397089004517, "learning_rate": 0.0001, "loss": 1.5401, "step": 15484 }, { "epoch": 1.7787605536729654, "grad_norm": 0.6175488233566284, "learning_rate": 0.0001, "loss": 1.4577, "step": 15485 }, { "epoch": 1.7788754235827926, "grad_norm": 0.6524683833122253, "learning_rate": 0.0001, "loss": 1.45, "step": 15486 }, { "epoch": 1.7789902934926196, "grad_norm": 0.6026191711425781, "learning_rate": 0.0001, "loss": 1.3723, "step": 15487 }, { "epoch": 1.7791051634024466, "grad_norm": 0.6724210977554321, "learning_rate": 0.0001, "loss": 1.3315, "step": 15488 }, { "epoch": 1.7792200333122739, "grad_norm": 0.738828718662262, "learning_rate": 0.0001, "loss": 1.6497, "step": 15489 }, { "epoch": 1.779334903222101, "grad_norm": 0.7495474219322205, "learning_rate": 0.0001, "loss": 1.5393, "step": 15490 }, { "epoch": 1.779449773131928, "grad_norm": 0.5927098989486694, "learning_rate": 0.0001, "loss": 1.4034, "step": 15491 }, { "epoch": 1.779564643041755, "grad_norm": 0.6002185940742493, "learning_rate": 0.0001, "loss": 1.4098, "step": 15492 }, { "epoch": 1.7796795129515823, "grad_norm": 0.6237373948097229, "learning_rate": 0.0001, "loss": 1.5183, "step": 15493 }, { "epoch": 1.7797943828614096, "grad_norm": 0.606011152267456, "learning_rate": 0.0001, "loss": 1.4736, "step": 15494 }, { "epoch": 1.7799092527712366, "grad_norm": 0.6501420736312866, "learning_rate": 0.0001, "loss": 1.4973, "step": 15495 }, { "epoch": 1.7800241226810636, "grad_norm": 0.6354816555976868, "learning_rate": 0.0001, "loss": 1.4597, "step": 15496 }, { "epoch": 1.7801389925908908, "grad_norm": 0.5931168794631958, "learning_rate": 0.0001, "loss": 1.4077, "step": 15497 }, { "epoch": 1.780253862500718, "grad_norm": 0.626844048500061, "learning_rate": 0.0001, "loss": 1.5079, "step": 15498 }, { "epoch": 1.780368732410545, "grad_norm": 0.6105011105537415, "learning_rate": 0.0001, "loss": 1.2362, "step": 15499 }, { "epoch": 1.780483602320372, "grad_norm": 0.6420881152153015, "learning_rate": 0.0001, "loss": 1.5375, "step": 15500 }, { "epoch": 1.7805984722301993, "grad_norm": 0.6629682779312134, "learning_rate": 0.0001, "loss": 1.6415, "step": 15501 }, { "epoch": 1.7807133421400265, "grad_norm": 0.5871655941009521, "learning_rate": 0.0001, "loss": 1.3112, "step": 15502 }, { "epoch": 1.7808282120498535, "grad_norm": 0.6966809034347534, "learning_rate": 0.0001, "loss": 1.5694, "step": 15503 }, { "epoch": 1.7809430819596805, "grad_norm": 0.6312387585639954, "learning_rate": 0.0001, "loss": 1.5059, "step": 15504 }, { "epoch": 1.7810579518695078, "grad_norm": 0.6314889192581177, "learning_rate": 0.0001, "loss": 1.452, "step": 15505 }, { "epoch": 1.781172821779335, "grad_norm": 0.657773494720459, "learning_rate": 0.0001, "loss": 1.2146, "step": 15506 }, { "epoch": 1.781287691689162, "grad_norm": 0.6220430731773376, "learning_rate": 0.0001, "loss": 1.3241, "step": 15507 }, { "epoch": 1.781402561598989, "grad_norm": 0.7031171321868896, "learning_rate": 0.0001, "loss": 1.4987, "step": 15508 }, { "epoch": 1.7815174315088163, "grad_norm": 0.6239733099937439, "learning_rate": 0.0001, "loss": 1.481, "step": 15509 }, { "epoch": 1.7816323014186435, "grad_norm": 0.5512365698814392, "learning_rate": 0.0001, "loss": 1.1257, "step": 15510 }, { "epoch": 1.7817471713284705, "grad_norm": 0.6748344302177429, "learning_rate": 0.0001, "loss": 1.5039, "step": 15511 }, { "epoch": 1.7818620412382975, "grad_norm": 0.6460611820220947, "learning_rate": 0.0001, "loss": 1.3985, "step": 15512 }, { "epoch": 1.7819769111481247, "grad_norm": 0.6497007608413696, "learning_rate": 0.0001, "loss": 1.5167, "step": 15513 }, { "epoch": 1.782091781057952, "grad_norm": 0.6263741850852966, "learning_rate": 0.0001, "loss": 1.4769, "step": 15514 }, { "epoch": 1.782206650967779, "grad_norm": 0.6193414330482483, "learning_rate": 0.0001, "loss": 1.4357, "step": 15515 }, { "epoch": 1.782321520877606, "grad_norm": 0.6849316358566284, "learning_rate": 0.0001, "loss": 1.3156, "step": 15516 }, { "epoch": 1.7824363907874332, "grad_norm": 0.5991883873939514, "learning_rate": 0.0001, "loss": 1.5014, "step": 15517 }, { "epoch": 1.7825512606972604, "grad_norm": 0.608816921710968, "learning_rate": 0.0001, "loss": 1.4711, "step": 15518 }, { "epoch": 1.7826661306070875, "grad_norm": 0.6820626258850098, "learning_rate": 0.0001, "loss": 1.4933, "step": 15519 }, { "epoch": 1.7827810005169145, "grad_norm": 0.6117185354232788, "learning_rate": 0.0001, "loss": 1.4344, "step": 15520 }, { "epoch": 1.7828958704267417, "grad_norm": 0.5957820415496826, "learning_rate": 0.0001, "loss": 1.5231, "step": 15521 }, { "epoch": 1.783010740336569, "grad_norm": 0.6155253648757935, "learning_rate": 0.0001, "loss": 1.5587, "step": 15522 }, { "epoch": 1.783125610246396, "grad_norm": 0.5900723934173584, "learning_rate": 0.0001, "loss": 1.4767, "step": 15523 }, { "epoch": 1.783240480156223, "grad_norm": 0.6127877831459045, "learning_rate": 0.0001, "loss": 1.485, "step": 15524 }, { "epoch": 1.7833553500660502, "grad_norm": 0.6130275130271912, "learning_rate": 0.0001, "loss": 1.2872, "step": 15525 }, { "epoch": 1.7834702199758774, "grad_norm": 0.6365898251533508, "learning_rate": 0.0001, "loss": 1.5839, "step": 15526 }, { "epoch": 1.7835850898857044, "grad_norm": 0.6927840113639832, "learning_rate": 0.0001, "loss": 1.4844, "step": 15527 }, { "epoch": 1.7836999597955314, "grad_norm": 0.6556539535522461, "learning_rate": 0.0001, "loss": 1.489, "step": 15528 }, { "epoch": 1.7838148297053587, "grad_norm": 0.6017742156982422, "learning_rate": 0.0001, "loss": 1.3716, "step": 15529 }, { "epoch": 1.7839296996151859, "grad_norm": 0.6629507541656494, "learning_rate": 0.0001, "loss": 1.5471, "step": 15530 }, { "epoch": 1.784044569525013, "grad_norm": 0.6213143467903137, "learning_rate": 0.0001, "loss": 1.6124, "step": 15531 }, { "epoch": 1.78415943943484, "grad_norm": 0.6131148338317871, "learning_rate": 0.0001, "loss": 1.5151, "step": 15532 }, { "epoch": 1.7842743093446671, "grad_norm": 0.6498699188232422, "learning_rate": 0.0001, "loss": 1.5769, "step": 15533 }, { "epoch": 1.7843891792544944, "grad_norm": 0.6236017346382141, "learning_rate": 0.0001, "loss": 1.5537, "step": 15534 }, { "epoch": 1.7845040491643214, "grad_norm": 0.6245088577270508, "learning_rate": 0.0001, "loss": 1.4866, "step": 15535 }, { "epoch": 1.7846189190741484, "grad_norm": 0.6300262212753296, "learning_rate": 0.0001, "loss": 1.5246, "step": 15536 }, { "epoch": 1.7847337889839756, "grad_norm": 0.6133743524551392, "learning_rate": 0.0001, "loss": 1.339, "step": 15537 }, { "epoch": 1.7848486588938028, "grad_norm": 0.5978740453720093, "learning_rate": 0.0001, "loss": 1.38, "step": 15538 }, { "epoch": 1.7849635288036299, "grad_norm": 0.583224892616272, "learning_rate": 0.0001, "loss": 1.4004, "step": 15539 }, { "epoch": 1.7850783987134569, "grad_norm": 0.6395849585533142, "learning_rate": 0.0001, "loss": 1.2943, "step": 15540 }, { "epoch": 1.785193268623284, "grad_norm": 0.5944806933403015, "learning_rate": 0.0001, "loss": 1.5083, "step": 15541 }, { "epoch": 1.7853081385331113, "grad_norm": 0.5802217125892639, "learning_rate": 0.0001, "loss": 1.5533, "step": 15542 }, { "epoch": 1.7854230084429383, "grad_norm": 0.6466507911682129, "learning_rate": 0.0001, "loss": 1.5448, "step": 15543 }, { "epoch": 1.7855378783527653, "grad_norm": 0.6032712459564209, "learning_rate": 0.0001, "loss": 1.3153, "step": 15544 }, { "epoch": 1.7856527482625926, "grad_norm": 0.6779415011405945, "learning_rate": 0.0001, "loss": 1.3325, "step": 15545 }, { "epoch": 1.7857676181724198, "grad_norm": 0.6788262128829956, "learning_rate": 0.0001, "loss": 1.4666, "step": 15546 }, { "epoch": 1.7858824880822468, "grad_norm": 0.6456349492073059, "learning_rate": 0.0001, "loss": 1.5217, "step": 15547 }, { "epoch": 1.7859973579920738, "grad_norm": 0.6493527889251709, "learning_rate": 0.0001, "loss": 1.5493, "step": 15548 }, { "epoch": 1.786112227901901, "grad_norm": 0.5926679968833923, "learning_rate": 0.0001, "loss": 1.2383, "step": 15549 }, { "epoch": 1.7862270978117283, "grad_norm": 0.6278696656227112, "learning_rate": 0.0001, "loss": 1.4126, "step": 15550 }, { "epoch": 1.7863419677215553, "grad_norm": 0.6778453588485718, "learning_rate": 0.0001, "loss": 1.4835, "step": 15551 }, { "epoch": 1.7864568376313823, "grad_norm": 0.6517847180366516, "learning_rate": 0.0001, "loss": 1.576, "step": 15552 }, { "epoch": 1.7865717075412095, "grad_norm": 0.6085006594657898, "learning_rate": 0.0001, "loss": 1.3612, "step": 15553 }, { "epoch": 1.7866865774510368, "grad_norm": 0.6282052397727966, "learning_rate": 0.0001, "loss": 1.5781, "step": 15554 }, { "epoch": 1.7868014473608638, "grad_norm": 0.6484823226928711, "learning_rate": 0.0001, "loss": 1.6014, "step": 15555 }, { "epoch": 1.7869163172706908, "grad_norm": 0.6144720315933228, "learning_rate": 0.0001, "loss": 1.4621, "step": 15556 }, { "epoch": 1.787031187180518, "grad_norm": 0.6251539587974548, "learning_rate": 0.0001, "loss": 1.5167, "step": 15557 }, { "epoch": 1.7871460570903452, "grad_norm": 0.7098304033279419, "learning_rate": 0.0001, "loss": 1.6219, "step": 15558 }, { "epoch": 1.7872609270001723, "grad_norm": 0.6878634095191956, "learning_rate": 0.0001, "loss": 1.5715, "step": 15559 }, { "epoch": 1.7873757969099993, "grad_norm": 0.6232366561889648, "learning_rate": 0.0001, "loss": 1.4746, "step": 15560 }, { "epoch": 1.7874906668198265, "grad_norm": 0.6680793166160583, "learning_rate": 0.0001, "loss": 1.4533, "step": 15561 }, { "epoch": 1.7876055367296537, "grad_norm": 0.5885732173919678, "learning_rate": 0.0001, "loss": 1.3305, "step": 15562 }, { "epoch": 1.787720406639481, "grad_norm": 0.7211229801177979, "learning_rate": 0.0001, "loss": 1.6991, "step": 15563 }, { "epoch": 1.787835276549308, "grad_norm": 0.6472933888435364, "learning_rate": 0.0001, "loss": 1.5143, "step": 15564 }, { "epoch": 1.787950146459135, "grad_norm": 0.6679675579071045, "learning_rate": 0.0001, "loss": 1.6347, "step": 15565 }, { "epoch": 1.7880650163689622, "grad_norm": 0.6244038939476013, "learning_rate": 0.0001, "loss": 1.4162, "step": 15566 }, { "epoch": 1.7881798862787894, "grad_norm": 0.6286173462867737, "learning_rate": 0.0001, "loss": 1.2787, "step": 15567 }, { "epoch": 1.7882947561886164, "grad_norm": 0.6652150750160217, "learning_rate": 0.0001, "loss": 1.3333, "step": 15568 }, { "epoch": 1.7884096260984435, "grad_norm": 0.5869207382202148, "learning_rate": 0.0001, "loss": 1.3693, "step": 15569 }, { "epoch": 1.7885244960082707, "grad_norm": 0.6084496974945068, "learning_rate": 0.0001, "loss": 1.419, "step": 15570 }, { "epoch": 1.788639365918098, "grad_norm": 0.6759893894195557, "learning_rate": 0.0001, "loss": 1.2883, "step": 15571 }, { "epoch": 1.788754235827925, "grad_norm": 0.5952495336532593, "learning_rate": 0.0001, "loss": 1.4366, "step": 15572 }, { "epoch": 1.788869105737752, "grad_norm": 0.5997838377952576, "learning_rate": 0.0001, "loss": 1.4331, "step": 15573 }, { "epoch": 1.7889839756475792, "grad_norm": 0.6798581480979919, "learning_rate": 0.0001, "loss": 1.6547, "step": 15574 }, { "epoch": 1.7890988455574064, "grad_norm": 0.6956671476364136, "learning_rate": 0.0001, "loss": 1.7073, "step": 15575 }, { "epoch": 1.7892137154672334, "grad_norm": 0.642201840877533, "learning_rate": 0.0001, "loss": 1.3273, "step": 15576 }, { "epoch": 1.7893285853770604, "grad_norm": 0.6423424482345581, "learning_rate": 0.0001, "loss": 1.4838, "step": 15577 }, { "epoch": 1.7894434552868876, "grad_norm": 0.6451569199562073, "learning_rate": 0.0001, "loss": 1.4567, "step": 15578 }, { "epoch": 1.7895583251967149, "grad_norm": 0.623618483543396, "learning_rate": 0.0001, "loss": 1.3361, "step": 15579 }, { "epoch": 1.7896731951065419, "grad_norm": 0.6370881795883179, "learning_rate": 0.0001, "loss": 1.4506, "step": 15580 }, { "epoch": 1.789788065016369, "grad_norm": 0.6714301705360413, "learning_rate": 0.0001, "loss": 1.48, "step": 15581 }, { "epoch": 1.7899029349261961, "grad_norm": 0.7274426817893982, "learning_rate": 0.0001, "loss": 1.5294, "step": 15582 }, { "epoch": 1.7900178048360234, "grad_norm": 0.7094860672950745, "learning_rate": 0.0001, "loss": 1.5049, "step": 15583 }, { "epoch": 1.7901326747458504, "grad_norm": 0.6510686278343201, "learning_rate": 0.0001, "loss": 1.1147, "step": 15584 }, { "epoch": 1.7902475446556774, "grad_norm": 0.6724952459335327, "learning_rate": 0.0001, "loss": 1.6518, "step": 15585 }, { "epoch": 1.7903624145655046, "grad_norm": 0.6730976700782776, "learning_rate": 0.0001, "loss": 1.6428, "step": 15586 }, { "epoch": 1.7904772844753318, "grad_norm": 0.5927944779396057, "learning_rate": 0.0001, "loss": 1.269, "step": 15587 }, { "epoch": 1.7905921543851588, "grad_norm": 0.6580035090446472, "learning_rate": 0.0001, "loss": 1.4699, "step": 15588 }, { "epoch": 1.7907070242949858, "grad_norm": 0.5773756504058838, "learning_rate": 0.0001, "loss": 1.4074, "step": 15589 }, { "epoch": 1.790821894204813, "grad_norm": 0.660759449005127, "learning_rate": 0.0001, "loss": 1.6364, "step": 15590 }, { "epoch": 1.7909367641146403, "grad_norm": 0.7201364040374756, "learning_rate": 0.0001, "loss": 1.7218, "step": 15591 }, { "epoch": 1.7910516340244673, "grad_norm": 0.6264855861663818, "learning_rate": 0.0001, "loss": 1.5182, "step": 15592 }, { "epoch": 1.7911665039342943, "grad_norm": 0.6170802712440491, "learning_rate": 0.0001, "loss": 1.3783, "step": 15593 }, { "epoch": 1.7912813738441216, "grad_norm": 0.6500568389892578, "learning_rate": 0.0001, "loss": 1.449, "step": 15594 }, { "epoch": 1.7913962437539488, "grad_norm": 0.6140516996383667, "learning_rate": 0.0001, "loss": 1.4694, "step": 15595 }, { "epoch": 1.7915111136637758, "grad_norm": 0.5915245413780212, "learning_rate": 0.0001, "loss": 1.3078, "step": 15596 }, { "epoch": 1.7916259835736028, "grad_norm": 0.5879004001617432, "learning_rate": 0.0001, "loss": 1.3216, "step": 15597 }, { "epoch": 1.79174085348343, "grad_norm": 0.6277405619621277, "learning_rate": 0.0001, "loss": 1.4248, "step": 15598 }, { "epoch": 1.7918557233932573, "grad_norm": 0.5921669006347656, "learning_rate": 0.0001, "loss": 1.3723, "step": 15599 }, { "epoch": 1.7919705933030843, "grad_norm": 0.5975223183631897, "learning_rate": 0.0001, "loss": 1.4798, "step": 15600 }, { "epoch": 1.7920854632129113, "grad_norm": 0.6387137770652771, "learning_rate": 0.0001, "loss": 1.5026, "step": 15601 }, { "epoch": 1.7922003331227385, "grad_norm": 0.5730849504470825, "learning_rate": 0.0001, "loss": 1.3881, "step": 15602 }, { "epoch": 1.7923152030325658, "grad_norm": 0.5885117650032043, "learning_rate": 0.0001, "loss": 1.5356, "step": 15603 }, { "epoch": 1.7924300729423928, "grad_norm": 0.6096714735031128, "learning_rate": 0.0001, "loss": 1.4063, "step": 15604 }, { "epoch": 1.7925449428522198, "grad_norm": 0.6324706077575684, "learning_rate": 0.0001, "loss": 1.5439, "step": 15605 }, { "epoch": 1.792659812762047, "grad_norm": 0.6507951617240906, "learning_rate": 0.0001, "loss": 1.4797, "step": 15606 }, { "epoch": 1.7927746826718742, "grad_norm": 0.6201066970825195, "learning_rate": 0.0001, "loss": 1.5646, "step": 15607 }, { "epoch": 1.7928895525817012, "grad_norm": 0.6061670780181885, "learning_rate": 0.0001, "loss": 1.455, "step": 15608 }, { "epoch": 1.7930044224915282, "grad_norm": 0.6483253240585327, "learning_rate": 0.0001, "loss": 1.3321, "step": 15609 }, { "epoch": 1.7931192924013555, "grad_norm": 0.608324408531189, "learning_rate": 0.0001, "loss": 1.5719, "step": 15610 }, { "epoch": 1.7932341623111827, "grad_norm": 0.5885353684425354, "learning_rate": 0.0001, "loss": 1.4886, "step": 15611 }, { "epoch": 1.7933490322210097, "grad_norm": 0.612770676612854, "learning_rate": 0.0001, "loss": 1.4332, "step": 15612 }, { "epoch": 1.7934639021308367, "grad_norm": 0.656052827835083, "learning_rate": 0.0001, "loss": 1.3348, "step": 15613 }, { "epoch": 1.793578772040664, "grad_norm": 0.6733161211013794, "learning_rate": 0.0001, "loss": 1.5747, "step": 15614 }, { "epoch": 1.7936936419504912, "grad_norm": 0.6594151258468628, "learning_rate": 0.0001, "loss": 1.5171, "step": 15615 }, { "epoch": 1.7938085118603182, "grad_norm": 0.6126337051391602, "learning_rate": 0.0001, "loss": 1.3746, "step": 15616 }, { "epoch": 1.7939233817701452, "grad_norm": 0.613390326499939, "learning_rate": 0.0001, "loss": 1.4174, "step": 15617 }, { "epoch": 1.7940382516799724, "grad_norm": 0.6970869898796082, "learning_rate": 0.0001, "loss": 1.4908, "step": 15618 }, { "epoch": 1.7941531215897997, "grad_norm": 0.5497614145278931, "learning_rate": 0.0001, "loss": 1.3542, "step": 15619 }, { "epoch": 1.7942679914996267, "grad_norm": 0.5766857862472534, "learning_rate": 0.0001, "loss": 1.5123, "step": 15620 }, { "epoch": 1.7943828614094537, "grad_norm": 0.6197019219398499, "learning_rate": 0.0001, "loss": 1.2267, "step": 15621 }, { "epoch": 1.794497731319281, "grad_norm": 0.6978227496147156, "learning_rate": 0.0001, "loss": 1.3925, "step": 15622 }, { "epoch": 1.7946126012291082, "grad_norm": 0.6291922330856323, "learning_rate": 0.0001, "loss": 1.3681, "step": 15623 }, { "epoch": 1.7947274711389352, "grad_norm": 0.6435182690620422, "learning_rate": 0.0001, "loss": 1.5943, "step": 15624 }, { "epoch": 1.7948423410487622, "grad_norm": 0.6218845248222351, "learning_rate": 0.0001, "loss": 1.4538, "step": 15625 }, { "epoch": 1.7949572109585894, "grad_norm": 0.5924429297447205, "learning_rate": 0.0001, "loss": 1.4747, "step": 15626 }, { "epoch": 1.7950720808684166, "grad_norm": 0.632346510887146, "learning_rate": 0.0001, "loss": 1.5101, "step": 15627 }, { "epoch": 1.7951869507782436, "grad_norm": 0.5771130323410034, "learning_rate": 0.0001, "loss": 1.4203, "step": 15628 }, { "epoch": 1.7953018206880706, "grad_norm": 0.5830177068710327, "learning_rate": 0.0001, "loss": 1.5285, "step": 15629 }, { "epoch": 1.7954166905978979, "grad_norm": 0.685839831829071, "learning_rate": 0.0001, "loss": 1.3029, "step": 15630 }, { "epoch": 1.795531560507725, "grad_norm": 0.5754895806312561, "learning_rate": 0.0001, "loss": 1.4049, "step": 15631 }, { "epoch": 1.7956464304175521, "grad_norm": 0.6010079979896545, "learning_rate": 0.0001, "loss": 1.3411, "step": 15632 }, { "epoch": 1.7957613003273791, "grad_norm": 0.6301531791687012, "learning_rate": 0.0001, "loss": 1.3216, "step": 15633 }, { "epoch": 1.7958761702372064, "grad_norm": 0.5766407251358032, "learning_rate": 0.0001, "loss": 1.3345, "step": 15634 }, { "epoch": 1.7959910401470336, "grad_norm": 0.6998764872550964, "learning_rate": 0.0001, "loss": 1.7205, "step": 15635 }, { "epoch": 1.7961059100568606, "grad_norm": 0.6410335898399353, "learning_rate": 0.0001, "loss": 1.6245, "step": 15636 }, { "epoch": 1.7962207799666876, "grad_norm": 0.5890425443649292, "learning_rate": 0.0001, "loss": 1.4822, "step": 15637 }, { "epoch": 1.7963356498765148, "grad_norm": 0.717514157295227, "learning_rate": 0.0001, "loss": 1.4209, "step": 15638 }, { "epoch": 1.796450519786342, "grad_norm": 0.6638051271438599, "learning_rate": 0.0001, "loss": 1.6087, "step": 15639 }, { "epoch": 1.796565389696169, "grad_norm": 0.6315420269966125, "learning_rate": 0.0001, "loss": 1.3122, "step": 15640 }, { "epoch": 1.796680259605996, "grad_norm": 0.6300930380821228, "learning_rate": 0.0001, "loss": 1.4313, "step": 15641 }, { "epoch": 1.7967951295158233, "grad_norm": 0.6064184904098511, "learning_rate": 0.0001, "loss": 1.4815, "step": 15642 }, { "epoch": 1.7969099994256506, "grad_norm": 0.5937897562980652, "learning_rate": 0.0001, "loss": 1.44, "step": 15643 }, { "epoch": 1.7970248693354776, "grad_norm": 0.5919926166534424, "learning_rate": 0.0001, "loss": 1.4487, "step": 15644 }, { "epoch": 1.7971397392453046, "grad_norm": 0.6598802208900452, "learning_rate": 0.0001, "loss": 1.3712, "step": 15645 }, { "epoch": 1.7972546091551318, "grad_norm": 0.6512289047241211, "learning_rate": 0.0001, "loss": 1.2151, "step": 15646 }, { "epoch": 1.797369479064959, "grad_norm": 0.6600479483604431, "learning_rate": 0.0001, "loss": 1.5193, "step": 15647 }, { "epoch": 1.797484348974786, "grad_norm": 0.6048285365104675, "learning_rate": 0.0001, "loss": 1.4262, "step": 15648 }, { "epoch": 1.797599218884613, "grad_norm": 0.6369686126708984, "learning_rate": 0.0001, "loss": 1.5183, "step": 15649 }, { "epoch": 1.7977140887944403, "grad_norm": 0.6395129561424255, "learning_rate": 0.0001, "loss": 1.4658, "step": 15650 }, { "epoch": 1.7978289587042675, "grad_norm": 0.6294239163398743, "learning_rate": 0.0001, "loss": 1.3939, "step": 15651 }, { "epoch": 1.7979438286140945, "grad_norm": 0.6349182724952698, "learning_rate": 0.0001, "loss": 1.4647, "step": 15652 }, { "epoch": 1.7980586985239215, "grad_norm": 0.7371272444725037, "learning_rate": 0.0001, "loss": 1.5782, "step": 15653 }, { "epoch": 1.7981735684337488, "grad_norm": 0.724577784538269, "learning_rate": 0.0001, "loss": 1.3706, "step": 15654 }, { "epoch": 1.798288438343576, "grad_norm": 0.6599369645118713, "learning_rate": 0.0001, "loss": 1.4384, "step": 15655 }, { "epoch": 1.798403308253403, "grad_norm": 0.6410521864891052, "learning_rate": 0.0001, "loss": 1.4447, "step": 15656 }, { "epoch": 1.79851817816323, "grad_norm": 0.6453815698623657, "learning_rate": 0.0001, "loss": 1.2947, "step": 15657 }, { "epoch": 1.7986330480730572, "grad_norm": 0.5987499356269836, "learning_rate": 0.0001, "loss": 1.5017, "step": 15658 }, { "epoch": 1.7987479179828845, "grad_norm": 0.6878007054328918, "learning_rate": 0.0001, "loss": 1.616, "step": 15659 }, { "epoch": 1.7988627878927115, "grad_norm": 0.6186679005622864, "learning_rate": 0.0001, "loss": 1.4337, "step": 15660 }, { "epoch": 1.7989776578025385, "grad_norm": 0.6373911499977112, "learning_rate": 0.0001, "loss": 1.3869, "step": 15661 }, { "epoch": 1.7990925277123657, "grad_norm": 0.626069962978363, "learning_rate": 0.0001, "loss": 1.4759, "step": 15662 }, { "epoch": 1.799207397622193, "grad_norm": 0.6917441487312317, "learning_rate": 0.0001, "loss": 1.6685, "step": 15663 }, { "epoch": 1.79932226753202, "grad_norm": 0.6298771500587463, "learning_rate": 0.0001, "loss": 1.4287, "step": 15664 }, { "epoch": 1.799437137441847, "grad_norm": 0.6612205505371094, "learning_rate": 0.0001, "loss": 1.4862, "step": 15665 }, { "epoch": 1.7995520073516742, "grad_norm": 0.6308835744857788, "learning_rate": 0.0001, "loss": 1.3969, "step": 15666 }, { "epoch": 1.7996668772615014, "grad_norm": 0.6707907319068909, "learning_rate": 0.0001, "loss": 1.5247, "step": 15667 }, { "epoch": 1.7997817471713284, "grad_norm": 0.5699948072433472, "learning_rate": 0.0001, "loss": 1.2569, "step": 15668 }, { "epoch": 1.7998966170811554, "grad_norm": 0.6121656894683838, "learning_rate": 0.0001, "loss": 1.5105, "step": 15669 }, { "epoch": 1.8000114869909827, "grad_norm": 0.640649676322937, "learning_rate": 0.0001, "loss": 1.63, "step": 15670 }, { "epoch": 1.80012635690081, "grad_norm": 0.595537543296814, "learning_rate": 0.0001, "loss": 1.5195, "step": 15671 }, { "epoch": 1.800241226810637, "grad_norm": 0.6392516493797302, "learning_rate": 0.0001, "loss": 1.5501, "step": 15672 }, { "epoch": 1.800356096720464, "grad_norm": 0.6374161243438721, "learning_rate": 0.0001, "loss": 1.3964, "step": 15673 }, { "epoch": 1.8004709666302912, "grad_norm": 0.6486839652061462, "learning_rate": 0.0001, "loss": 1.6107, "step": 15674 }, { "epoch": 1.8005858365401184, "grad_norm": 0.7054831981658936, "learning_rate": 0.0001, "loss": 1.5579, "step": 15675 }, { "epoch": 1.8007007064499454, "grad_norm": 0.6817604899406433, "learning_rate": 0.0001, "loss": 1.5474, "step": 15676 }, { "epoch": 1.8008155763597724, "grad_norm": 0.6403077840805054, "learning_rate": 0.0001, "loss": 1.4614, "step": 15677 }, { "epoch": 1.8009304462695996, "grad_norm": 0.6834919452667236, "learning_rate": 0.0001, "loss": 1.3062, "step": 15678 }, { "epoch": 1.8010453161794269, "grad_norm": 0.6138955354690552, "learning_rate": 0.0001, "loss": 1.235, "step": 15679 }, { "epoch": 1.8011601860892539, "grad_norm": 0.6135299205780029, "learning_rate": 0.0001, "loss": 1.3112, "step": 15680 }, { "epoch": 1.8012750559990809, "grad_norm": 0.6524873971939087, "learning_rate": 0.0001, "loss": 1.3524, "step": 15681 }, { "epoch": 1.8013899259089081, "grad_norm": 0.635270357131958, "learning_rate": 0.0001, "loss": 1.1356, "step": 15682 }, { "epoch": 1.8015047958187353, "grad_norm": 0.6525683403015137, "learning_rate": 0.0001, "loss": 1.3779, "step": 15683 }, { "epoch": 1.8016196657285624, "grad_norm": 0.6439868807792664, "learning_rate": 0.0001, "loss": 1.4131, "step": 15684 }, { "epoch": 1.8017345356383894, "grad_norm": 0.6394991278648376, "learning_rate": 0.0001, "loss": 1.5015, "step": 15685 }, { "epoch": 1.8018494055482166, "grad_norm": 0.6692410707473755, "learning_rate": 0.0001, "loss": 1.477, "step": 15686 }, { "epoch": 1.8019642754580438, "grad_norm": 0.5906667709350586, "learning_rate": 0.0001, "loss": 1.2574, "step": 15687 }, { "epoch": 1.8020791453678708, "grad_norm": 0.6098186373710632, "learning_rate": 0.0001, "loss": 1.3739, "step": 15688 }, { "epoch": 1.8021940152776978, "grad_norm": 0.636909544467926, "learning_rate": 0.0001, "loss": 1.4122, "step": 15689 }, { "epoch": 1.802308885187525, "grad_norm": 0.606322169303894, "learning_rate": 0.0001, "loss": 1.4376, "step": 15690 }, { "epoch": 1.8024237550973523, "grad_norm": 0.6577963829040527, "learning_rate": 0.0001, "loss": 1.3516, "step": 15691 }, { "epoch": 1.8025386250071793, "grad_norm": 0.6309029459953308, "learning_rate": 0.0001, "loss": 1.516, "step": 15692 }, { "epoch": 1.8026534949170063, "grad_norm": 0.585844099521637, "learning_rate": 0.0001, "loss": 1.4676, "step": 15693 }, { "epoch": 1.8027683648268336, "grad_norm": 0.6345848441123962, "learning_rate": 0.0001, "loss": 1.3576, "step": 15694 }, { "epoch": 1.8028832347366608, "grad_norm": 0.6085816621780396, "learning_rate": 0.0001, "loss": 1.5278, "step": 15695 }, { "epoch": 1.8029981046464878, "grad_norm": 0.5922706127166748, "learning_rate": 0.0001, "loss": 1.4294, "step": 15696 }, { "epoch": 1.8031129745563148, "grad_norm": 0.7549859285354614, "learning_rate": 0.0001, "loss": 1.6641, "step": 15697 }, { "epoch": 1.803227844466142, "grad_norm": 0.5932414531707764, "learning_rate": 0.0001, "loss": 1.3871, "step": 15698 }, { "epoch": 1.8033427143759693, "grad_norm": 0.6508196592330933, "learning_rate": 0.0001, "loss": 1.4249, "step": 15699 }, { "epoch": 1.8034575842857965, "grad_norm": 0.6208270788192749, "learning_rate": 0.0001, "loss": 1.5313, "step": 15700 }, { "epoch": 1.8035724541956235, "grad_norm": 0.5751149654388428, "learning_rate": 0.0001, "loss": 1.333, "step": 15701 }, { "epoch": 1.8036873241054505, "grad_norm": 0.6106820702552795, "learning_rate": 0.0001, "loss": 1.2523, "step": 15702 }, { "epoch": 1.8038021940152777, "grad_norm": 0.6373929977416992, "learning_rate": 0.0001, "loss": 1.4284, "step": 15703 }, { "epoch": 1.803917063925105, "grad_norm": 0.6269072890281677, "learning_rate": 0.0001, "loss": 1.4818, "step": 15704 }, { "epoch": 1.804031933834932, "grad_norm": 0.6745073199272156, "learning_rate": 0.0001, "loss": 1.3644, "step": 15705 }, { "epoch": 1.804146803744759, "grad_norm": 0.7023970484733582, "learning_rate": 0.0001, "loss": 1.5443, "step": 15706 }, { "epoch": 1.8042616736545862, "grad_norm": 0.6291683912277222, "learning_rate": 0.0001, "loss": 1.3183, "step": 15707 }, { "epoch": 1.8043765435644135, "grad_norm": 0.6405826807022095, "learning_rate": 0.0001, "loss": 1.5059, "step": 15708 }, { "epoch": 1.8044914134742405, "grad_norm": 0.6145171523094177, "learning_rate": 0.0001, "loss": 1.4438, "step": 15709 }, { "epoch": 1.8046062833840675, "grad_norm": 0.6371951103210449, "learning_rate": 0.0001, "loss": 1.562, "step": 15710 }, { "epoch": 1.8047211532938947, "grad_norm": 0.6146504878997803, "learning_rate": 0.0001, "loss": 1.4305, "step": 15711 }, { "epoch": 1.804836023203722, "grad_norm": 0.6177845597267151, "learning_rate": 0.0001, "loss": 1.2665, "step": 15712 }, { "epoch": 1.804950893113549, "grad_norm": 0.6438102722167969, "learning_rate": 0.0001, "loss": 1.5661, "step": 15713 }, { "epoch": 1.805065763023376, "grad_norm": 0.6426578164100647, "learning_rate": 0.0001, "loss": 1.4181, "step": 15714 }, { "epoch": 1.8051806329332032, "grad_norm": 0.7150061130523682, "learning_rate": 0.0001, "loss": 1.4365, "step": 15715 }, { "epoch": 1.8052955028430304, "grad_norm": 0.6319580674171448, "learning_rate": 0.0001, "loss": 1.4184, "step": 15716 }, { "epoch": 1.8054103727528574, "grad_norm": 0.6289169788360596, "learning_rate": 0.0001, "loss": 1.4949, "step": 15717 }, { "epoch": 1.8055252426626844, "grad_norm": 0.6072092056274414, "learning_rate": 0.0001, "loss": 1.5016, "step": 15718 }, { "epoch": 1.8056401125725117, "grad_norm": 0.71548992395401, "learning_rate": 0.0001, "loss": 1.7456, "step": 15719 }, { "epoch": 1.805754982482339, "grad_norm": 0.622413694858551, "learning_rate": 0.0001, "loss": 1.4269, "step": 15720 }, { "epoch": 1.805869852392166, "grad_norm": 0.6449538469314575, "learning_rate": 0.0001, "loss": 1.2798, "step": 15721 }, { "epoch": 1.805984722301993, "grad_norm": 0.6331883668899536, "learning_rate": 0.0001, "loss": 1.4222, "step": 15722 }, { "epoch": 1.8060995922118201, "grad_norm": 0.57768315076828, "learning_rate": 0.0001, "loss": 1.2396, "step": 15723 }, { "epoch": 1.8062144621216474, "grad_norm": 0.6328497529029846, "learning_rate": 0.0001, "loss": 1.3806, "step": 15724 }, { "epoch": 1.8063293320314744, "grad_norm": 0.69939124584198, "learning_rate": 0.0001, "loss": 1.3731, "step": 15725 }, { "epoch": 1.8064442019413014, "grad_norm": 0.6895657181739807, "learning_rate": 0.0001, "loss": 1.507, "step": 15726 }, { "epoch": 1.8065590718511286, "grad_norm": 0.6120685935020447, "learning_rate": 0.0001, "loss": 1.1978, "step": 15727 }, { "epoch": 1.8066739417609559, "grad_norm": 0.6782362461090088, "learning_rate": 0.0001, "loss": 1.2976, "step": 15728 }, { "epoch": 1.8067888116707829, "grad_norm": 0.6087343096733093, "learning_rate": 0.0001, "loss": 1.3276, "step": 15729 }, { "epoch": 1.8069036815806099, "grad_norm": 0.6072776913642883, "learning_rate": 0.0001, "loss": 1.4559, "step": 15730 }, { "epoch": 1.807018551490437, "grad_norm": 0.6058279275894165, "learning_rate": 0.0001, "loss": 1.3737, "step": 15731 }, { "epoch": 1.8071334214002643, "grad_norm": 0.63099604845047, "learning_rate": 0.0001, "loss": 1.413, "step": 15732 }, { "epoch": 1.8072482913100913, "grad_norm": 0.6059746146202087, "learning_rate": 0.0001, "loss": 1.4017, "step": 15733 }, { "epoch": 1.8073631612199184, "grad_norm": 0.6642100214958191, "learning_rate": 0.0001, "loss": 1.5973, "step": 15734 }, { "epoch": 1.8074780311297456, "grad_norm": 0.6303525567054749, "learning_rate": 0.0001, "loss": 1.5459, "step": 15735 }, { "epoch": 1.8075929010395728, "grad_norm": 0.605383038520813, "learning_rate": 0.0001, "loss": 1.3552, "step": 15736 }, { "epoch": 1.8077077709493998, "grad_norm": 0.6636791229248047, "learning_rate": 0.0001, "loss": 1.3234, "step": 15737 }, { "epoch": 1.8078226408592268, "grad_norm": 0.6717275381088257, "learning_rate": 0.0001, "loss": 1.5471, "step": 15738 }, { "epoch": 1.807937510769054, "grad_norm": 0.6487286686897278, "learning_rate": 0.0001, "loss": 1.5063, "step": 15739 }, { "epoch": 1.8080523806788813, "grad_norm": 0.6421818137168884, "learning_rate": 0.0001, "loss": 1.4977, "step": 15740 }, { "epoch": 1.8081672505887083, "grad_norm": 0.6361395716667175, "learning_rate": 0.0001, "loss": 1.416, "step": 15741 }, { "epoch": 1.8082821204985353, "grad_norm": 0.6091173887252808, "learning_rate": 0.0001, "loss": 1.5454, "step": 15742 }, { "epoch": 1.8083969904083625, "grad_norm": 0.6464824080467224, "learning_rate": 0.0001, "loss": 1.4512, "step": 15743 }, { "epoch": 1.8085118603181898, "grad_norm": 0.6446046829223633, "learning_rate": 0.0001, "loss": 1.451, "step": 15744 }, { "epoch": 1.8086267302280168, "grad_norm": 0.7475030422210693, "learning_rate": 0.0001, "loss": 1.7174, "step": 15745 }, { "epoch": 1.8087416001378438, "grad_norm": 0.6352036595344543, "learning_rate": 0.0001, "loss": 1.3802, "step": 15746 }, { "epoch": 1.808856470047671, "grad_norm": 0.65043044090271, "learning_rate": 0.0001, "loss": 1.4297, "step": 15747 }, { "epoch": 1.8089713399574983, "grad_norm": 0.6558632254600525, "learning_rate": 0.0001, "loss": 1.4044, "step": 15748 }, { "epoch": 1.8090862098673253, "grad_norm": 0.6217056512832642, "learning_rate": 0.0001, "loss": 1.2797, "step": 15749 }, { "epoch": 1.8092010797771523, "grad_norm": 0.6086947917938232, "learning_rate": 0.0001, "loss": 1.3942, "step": 15750 }, { "epoch": 1.8093159496869795, "grad_norm": 0.5924387574195862, "learning_rate": 0.0001, "loss": 1.5433, "step": 15751 }, { "epoch": 1.8094308195968067, "grad_norm": 0.6517341732978821, "learning_rate": 0.0001, "loss": 1.4954, "step": 15752 }, { "epoch": 1.8095456895066337, "grad_norm": 0.6108118891716003, "learning_rate": 0.0001, "loss": 1.3201, "step": 15753 }, { "epoch": 1.8096605594164608, "grad_norm": 0.6316987872123718, "learning_rate": 0.0001, "loss": 1.5334, "step": 15754 }, { "epoch": 1.809775429326288, "grad_norm": 0.6387416124343872, "learning_rate": 0.0001, "loss": 1.5644, "step": 15755 }, { "epoch": 1.8098902992361152, "grad_norm": 0.604150116443634, "learning_rate": 0.0001, "loss": 1.185, "step": 15756 }, { "epoch": 1.8100051691459422, "grad_norm": 0.6756969690322876, "learning_rate": 0.0001, "loss": 1.5415, "step": 15757 }, { "epoch": 1.8101200390557692, "grad_norm": 0.6240161061286926, "learning_rate": 0.0001, "loss": 1.3972, "step": 15758 }, { "epoch": 1.8102349089655965, "grad_norm": 0.6883102655410767, "learning_rate": 0.0001, "loss": 1.5476, "step": 15759 }, { "epoch": 1.8103497788754237, "grad_norm": 0.5605351328849792, "learning_rate": 0.0001, "loss": 1.3037, "step": 15760 }, { "epoch": 1.8104646487852507, "grad_norm": 0.6002980470657349, "learning_rate": 0.0001, "loss": 1.2442, "step": 15761 }, { "epoch": 1.8105795186950777, "grad_norm": 0.6384274959564209, "learning_rate": 0.0001, "loss": 1.4306, "step": 15762 }, { "epoch": 1.810694388604905, "grad_norm": 0.6188052296638489, "learning_rate": 0.0001, "loss": 1.4172, "step": 15763 }, { "epoch": 1.8108092585147322, "grad_norm": 0.6061704158782959, "learning_rate": 0.0001, "loss": 1.2811, "step": 15764 }, { "epoch": 1.8109241284245592, "grad_norm": 0.6506741642951965, "learning_rate": 0.0001, "loss": 1.4402, "step": 15765 }, { "epoch": 1.8110389983343862, "grad_norm": 0.6865687966346741, "learning_rate": 0.0001, "loss": 1.4626, "step": 15766 }, { "epoch": 1.8111538682442134, "grad_norm": 0.6647955179214478, "learning_rate": 0.0001, "loss": 1.3912, "step": 15767 }, { "epoch": 1.8112687381540407, "grad_norm": 0.6481913924217224, "learning_rate": 0.0001, "loss": 1.4325, "step": 15768 }, { "epoch": 1.8113836080638677, "grad_norm": 0.7807742953300476, "learning_rate": 0.0001, "loss": 1.6238, "step": 15769 }, { "epoch": 1.8114984779736947, "grad_norm": 0.663368284702301, "learning_rate": 0.0001, "loss": 1.2071, "step": 15770 }, { "epoch": 1.811613347883522, "grad_norm": 0.6617883443832397, "learning_rate": 0.0001, "loss": 1.462, "step": 15771 }, { "epoch": 1.8117282177933491, "grad_norm": 0.6506766676902771, "learning_rate": 0.0001, "loss": 1.4079, "step": 15772 }, { "epoch": 1.8118430877031761, "grad_norm": 0.655314564704895, "learning_rate": 0.0001, "loss": 1.4805, "step": 15773 }, { "epoch": 1.8119579576130032, "grad_norm": 0.5995591878890991, "learning_rate": 0.0001, "loss": 1.4349, "step": 15774 }, { "epoch": 1.8120728275228304, "grad_norm": 0.7130681276321411, "learning_rate": 0.0001, "loss": 1.28, "step": 15775 }, { "epoch": 1.8121876974326576, "grad_norm": 0.6497383713722229, "learning_rate": 0.0001, "loss": 1.3618, "step": 15776 }, { "epoch": 1.8123025673424846, "grad_norm": 0.6399600505828857, "learning_rate": 0.0001, "loss": 1.5071, "step": 15777 }, { "epoch": 1.8124174372523116, "grad_norm": 0.6517605185508728, "learning_rate": 0.0001, "loss": 1.6146, "step": 15778 }, { "epoch": 1.8125323071621389, "grad_norm": 0.7210836410522461, "learning_rate": 0.0001, "loss": 1.3193, "step": 15779 }, { "epoch": 1.812647177071966, "grad_norm": 0.6529654264450073, "learning_rate": 0.0001, "loss": 1.3761, "step": 15780 }, { "epoch": 1.812762046981793, "grad_norm": 0.5776856541633606, "learning_rate": 0.0001, "loss": 1.2645, "step": 15781 }, { "epoch": 1.8128769168916201, "grad_norm": 0.6165531277656555, "learning_rate": 0.0001, "loss": 1.5794, "step": 15782 }, { "epoch": 1.8129917868014473, "grad_norm": 0.6355653405189514, "learning_rate": 0.0001, "loss": 1.4083, "step": 15783 }, { "epoch": 1.8131066567112746, "grad_norm": 0.6272133588790894, "learning_rate": 0.0001, "loss": 1.5475, "step": 15784 }, { "epoch": 1.8132215266211016, "grad_norm": 0.6231955885887146, "learning_rate": 0.0001, "loss": 1.1145, "step": 15785 }, { "epoch": 1.8133363965309286, "grad_norm": 0.5937713980674744, "learning_rate": 0.0001, "loss": 1.3303, "step": 15786 }, { "epoch": 1.8134512664407558, "grad_norm": 0.6835618615150452, "learning_rate": 0.0001, "loss": 1.5671, "step": 15787 }, { "epoch": 1.813566136350583, "grad_norm": 0.700341522693634, "learning_rate": 0.0001, "loss": 1.6887, "step": 15788 }, { "epoch": 1.81368100626041, "grad_norm": 0.6277741193771362, "learning_rate": 0.0001, "loss": 1.5733, "step": 15789 }, { "epoch": 1.813795876170237, "grad_norm": 0.6309356689453125, "learning_rate": 0.0001, "loss": 1.5235, "step": 15790 }, { "epoch": 1.8139107460800643, "grad_norm": 0.5971214175224304, "learning_rate": 0.0001, "loss": 1.3632, "step": 15791 }, { "epoch": 1.8140256159898915, "grad_norm": 0.7022644281387329, "learning_rate": 0.0001, "loss": 1.6092, "step": 15792 }, { "epoch": 1.8141404858997185, "grad_norm": 0.5725907683372498, "learning_rate": 0.0001, "loss": 1.4579, "step": 15793 }, { "epoch": 1.8142553558095456, "grad_norm": 0.6333192586898804, "learning_rate": 0.0001, "loss": 1.3691, "step": 15794 }, { "epoch": 1.8143702257193728, "grad_norm": 0.6314082145690918, "learning_rate": 0.0001, "loss": 1.5763, "step": 15795 }, { "epoch": 1.8144850956292, "grad_norm": 0.6020960211753845, "learning_rate": 0.0001, "loss": 1.3315, "step": 15796 }, { "epoch": 1.814599965539027, "grad_norm": 0.636551022529602, "learning_rate": 0.0001, "loss": 1.2842, "step": 15797 }, { "epoch": 1.814714835448854, "grad_norm": 0.6058814525604248, "learning_rate": 0.0001, "loss": 1.4964, "step": 15798 }, { "epoch": 1.8148297053586813, "grad_norm": 0.581272304058075, "learning_rate": 0.0001, "loss": 1.4039, "step": 15799 }, { "epoch": 1.8149445752685085, "grad_norm": 0.6171589493751526, "learning_rate": 0.0001, "loss": 1.4822, "step": 15800 }, { "epoch": 1.8150594451783355, "grad_norm": 0.6367219090461731, "learning_rate": 0.0001, "loss": 1.3735, "step": 15801 }, { "epoch": 1.8151743150881625, "grad_norm": 0.6684643030166626, "learning_rate": 0.0001, "loss": 1.7442, "step": 15802 }, { "epoch": 1.8152891849979897, "grad_norm": 0.5805231332778931, "learning_rate": 0.0001, "loss": 1.4201, "step": 15803 }, { "epoch": 1.815404054907817, "grad_norm": 0.6159259676933289, "learning_rate": 0.0001, "loss": 1.2066, "step": 15804 }, { "epoch": 1.815518924817644, "grad_norm": 0.5856978297233582, "learning_rate": 0.0001, "loss": 1.3786, "step": 15805 }, { "epoch": 1.815633794727471, "grad_norm": 0.6367132663726807, "learning_rate": 0.0001, "loss": 1.3513, "step": 15806 }, { "epoch": 1.8157486646372982, "grad_norm": 0.6610946655273438, "learning_rate": 0.0001, "loss": 1.4352, "step": 15807 }, { "epoch": 1.8158635345471255, "grad_norm": 0.6742769479751587, "learning_rate": 0.0001, "loss": 1.4922, "step": 15808 }, { "epoch": 1.8159784044569525, "grad_norm": 0.6435980200767517, "learning_rate": 0.0001, "loss": 1.5316, "step": 15809 }, { "epoch": 1.8160932743667795, "grad_norm": 0.6399965882301331, "learning_rate": 0.0001, "loss": 1.4247, "step": 15810 }, { "epoch": 1.8162081442766067, "grad_norm": 0.6221297979354858, "learning_rate": 0.0001, "loss": 1.4666, "step": 15811 }, { "epoch": 1.816323014186434, "grad_norm": 0.6029366254806519, "learning_rate": 0.0001, "loss": 1.448, "step": 15812 }, { "epoch": 1.816437884096261, "grad_norm": 0.628516435623169, "learning_rate": 0.0001, "loss": 1.1754, "step": 15813 }, { "epoch": 1.816552754006088, "grad_norm": 0.7524257898330688, "learning_rate": 0.0001, "loss": 1.6957, "step": 15814 }, { "epoch": 1.8166676239159152, "grad_norm": 0.6547483205795288, "learning_rate": 0.0001, "loss": 1.3974, "step": 15815 }, { "epoch": 1.8167824938257424, "grad_norm": 0.7047604918479919, "learning_rate": 0.0001, "loss": 1.3708, "step": 15816 }, { "epoch": 1.8168973637355694, "grad_norm": 0.6523260474205017, "learning_rate": 0.0001, "loss": 1.4342, "step": 15817 }, { "epoch": 1.8170122336453964, "grad_norm": 0.6819579005241394, "learning_rate": 0.0001, "loss": 1.4529, "step": 15818 }, { "epoch": 1.8171271035552237, "grad_norm": 0.6569975018501282, "learning_rate": 0.0001, "loss": 1.3422, "step": 15819 }, { "epoch": 1.817241973465051, "grad_norm": 0.6896679401397705, "learning_rate": 0.0001, "loss": 1.4424, "step": 15820 }, { "epoch": 1.817356843374878, "grad_norm": 0.6512749195098877, "learning_rate": 0.0001, "loss": 1.3904, "step": 15821 }, { "epoch": 1.817471713284705, "grad_norm": 0.6123222708702087, "learning_rate": 0.0001, "loss": 1.3856, "step": 15822 }, { "epoch": 1.8175865831945321, "grad_norm": 0.5971189141273499, "learning_rate": 0.0001, "loss": 1.4344, "step": 15823 }, { "epoch": 1.8177014531043594, "grad_norm": 0.5992557406425476, "learning_rate": 0.0001, "loss": 1.4017, "step": 15824 }, { "epoch": 1.8178163230141864, "grad_norm": 0.6497430205345154, "learning_rate": 0.0001, "loss": 1.4175, "step": 15825 }, { "epoch": 1.8179311929240134, "grad_norm": 0.6796419620513916, "learning_rate": 0.0001, "loss": 1.4605, "step": 15826 }, { "epoch": 1.8180460628338406, "grad_norm": 0.6003844738006592, "learning_rate": 0.0001, "loss": 1.3818, "step": 15827 }, { "epoch": 1.8181609327436679, "grad_norm": 0.7161847949028015, "learning_rate": 0.0001, "loss": 1.4173, "step": 15828 }, { "epoch": 1.8182758026534949, "grad_norm": 0.6299294829368591, "learning_rate": 0.0001, "loss": 1.4459, "step": 15829 }, { "epoch": 1.8183906725633219, "grad_norm": 0.7790576815605164, "learning_rate": 0.0001, "loss": 1.7595, "step": 15830 }, { "epoch": 1.818505542473149, "grad_norm": 0.5734109282493591, "learning_rate": 0.0001, "loss": 1.3666, "step": 15831 }, { "epoch": 1.8186204123829763, "grad_norm": 0.6383972764015198, "learning_rate": 0.0001, "loss": 1.6129, "step": 15832 }, { "epoch": 1.8187352822928033, "grad_norm": 0.6514372825622559, "learning_rate": 0.0001, "loss": 1.4179, "step": 15833 }, { "epoch": 1.8188501522026304, "grad_norm": 0.6539475321769714, "learning_rate": 0.0001, "loss": 1.3834, "step": 15834 }, { "epoch": 1.8189650221124576, "grad_norm": 0.6508055925369263, "learning_rate": 0.0001, "loss": 1.1476, "step": 15835 }, { "epoch": 1.8190798920222848, "grad_norm": 0.6467891335487366, "learning_rate": 0.0001, "loss": 1.4836, "step": 15836 }, { "epoch": 1.819194761932112, "grad_norm": 0.6416442394256592, "learning_rate": 0.0001, "loss": 1.4443, "step": 15837 }, { "epoch": 1.819309631841939, "grad_norm": 0.6353035569190979, "learning_rate": 0.0001, "loss": 1.3113, "step": 15838 }, { "epoch": 1.819424501751766, "grad_norm": 0.6308345198631287, "learning_rate": 0.0001, "loss": 1.5127, "step": 15839 }, { "epoch": 1.8195393716615933, "grad_norm": 0.66878741979599, "learning_rate": 0.0001, "loss": 1.5101, "step": 15840 }, { "epoch": 1.8196542415714205, "grad_norm": 0.6357544660568237, "learning_rate": 0.0001, "loss": 1.4058, "step": 15841 }, { "epoch": 1.8197691114812475, "grad_norm": 0.6938048601150513, "learning_rate": 0.0001, "loss": 1.3966, "step": 15842 }, { "epoch": 1.8198839813910745, "grad_norm": 0.6165516376495361, "learning_rate": 0.0001, "loss": 1.392, "step": 15843 }, { "epoch": 1.8199988513009018, "grad_norm": 0.6373364329338074, "learning_rate": 0.0001, "loss": 1.2863, "step": 15844 }, { "epoch": 1.820113721210729, "grad_norm": 0.6185095310211182, "learning_rate": 0.0001, "loss": 1.3833, "step": 15845 }, { "epoch": 1.820228591120556, "grad_norm": 0.6635664105415344, "learning_rate": 0.0001, "loss": 1.3862, "step": 15846 }, { "epoch": 1.820343461030383, "grad_norm": 0.6329711675643921, "learning_rate": 0.0001, "loss": 1.3956, "step": 15847 }, { "epoch": 1.8204583309402103, "grad_norm": 0.6014331579208374, "learning_rate": 0.0001, "loss": 1.41, "step": 15848 }, { "epoch": 1.8205732008500375, "grad_norm": 0.6105222702026367, "learning_rate": 0.0001, "loss": 1.234, "step": 15849 }, { "epoch": 1.8206880707598645, "grad_norm": 0.648215651512146, "learning_rate": 0.0001, "loss": 1.5546, "step": 15850 }, { "epoch": 1.8208029406696915, "grad_norm": 0.5845022201538086, "learning_rate": 0.0001, "loss": 1.141, "step": 15851 }, { "epoch": 1.8209178105795187, "grad_norm": 0.6215102076530457, "learning_rate": 0.0001, "loss": 1.472, "step": 15852 }, { "epoch": 1.821032680489346, "grad_norm": 0.6202508807182312, "learning_rate": 0.0001, "loss": 1.3187, "step": 15853 }, { "epoch": 1.821147550399173, "grad_norm": 0.638676106929779, "learning_rate": 0.0001, "loss": 1.3897, "step": 15854 }, { "epoch": 1.821262420309, "grad_norm": 0.7090446352958679, "learning_rate": 0.0001, "loss": 1.4151, "step": 15855 }, { "epoch": 1.8213772902188272, "grad_norm": 0.650833249092102, "learning_rate": 0.0001, "loss": 1.4007, "step": 15856 }, { "epoch": 1.8214921601286544, "grad_norm": 0.7622561454772949, "learning_rate": 0.0001, "loss": 1.5458, "step": 15857 }, { "epoch": 1.8216070300384815, "grad_norm": 0.6946950554847717, "learning_rate": 0.0001, "loss": 1.416, "step": 15858 }, { "epoch": 1.8217218999483085, "grad_norm": 0.6929587721824646, "learning_rate": 0.0001, "loss": 1.3965, "step": 15859 }, { "epoch": 1.8218367698581357, "grad_norm": 0.6364197731018066, "learning_rate": 0.0001, "loss": 1.4878, "step": 15860 }, { "epoch": 1.821951639767963, "grad_norm": 0.7382113933563232, "learning_rate": 0.0001, "loss": 1.5489, "step": 15861 }, { "epoch": 1.82206650967779, "grad_norm": 0.6655716896057129, "learning_rate": 0.0001, "loss": 1.4976, "step": 15862 }, { "epoch": 1.822181379587617, "grad_norm": 0.597642719745636, "learning_rate": 0.0001, "loss": 1.3425, "step": 15863 }, { "epoch": 1.8222962494974442, "grad_norm": 0.6217376589775085, "learning_rate": 0.0001, "loss": 1.5044, "step": 15864 }, { "epoch": 1.8224111194072714, "grad_norm": 0.6502073407173157, "learning_rate": 0.0001, "loss": 1.5697, "step": 15865 }, { "epoch": 1.8225259893170984, "grad_norm": 0.6081299185752869, "learning_rate": 0.0001, "loss": 1.3242, "step": 15866 }, { "epoch": 1.8226408592269254, "grad_norm": 0.6899523735046387, "learning_rate": 0.0001, "loss": 1.093, "step": 15867 }, { "epoch": 1.8227557291367527, "grad_norm": 0.6164363622665405, "learning_rate": 0.0001, "loss": 1.5016, "step": 15868 }, { "epoch": 1.8228705990465799, "grad_norm": 0.6042718887329102, "learning_rate": 0.0001, "loss": 1.4358, "step": 15869 }, { "epoch": 1.822985468956407, "grad_norm": 0.6475842595100403, "learning_rate": 0.0001, "loss": 1.595, "step": 15870 }, { "epoch": 1.823100338866234, "grad_norm": 0.6268951892852783, "learning_rate": 0.0001, "loss": 1.4257, "step": 15871 }, { "epoch": 1.8232152087760611, "grad_norm": 0.6121448874473572, "learning_rate": 0.0001, "loss": 1.4889, "step": 15872 }, { "epoch": 1.8233300786858884, "grad_norm": 0.6150333881378174, "learning_rate": 0.0001, "loss": 1.3803, "step": 15873 }, { "epoch": 1.8234449485957154, "grad_norm": 0.6406527757644653, "learning_rate": 0.0001, "loss": 1.3256, "step": 15874 }, { "epoch": 1.8235598185055424, "grad_norm": 0.6126410961151123, "learning_rate": 0.0001, "loss": 1.3716, "step": 15875 }, { "epoch": 1.8236746884153696, "grad_norm": 0.6979601383209229, "learning_rate": 0.0001, "loss": 1.5394, "step": 15876 }, { "epoch": 1.8237895583251968, "grad_norm": 0.6526153087615967, "learning_rate": 0.0001, "loss": 1.6131, "step": 15877 }, { "epoch": 1.8239044282350239, "grad_norm": 0.6208426356315613, "learning_rate": 0.0001, "loss": 1.3515, "step": 15878 }, { "epoch": 1.8240192981448509, "grad_norm": 0.5797458291053772, "learning_rate": 0.0001, "loss": 1.393, "step": 15879 }, { "epoch": 1.824134168054678, "grad_norm": 0.5991585850715637, "learning_rate": 0.0001, "loss": 1.4375, "step": 15880 }, { "epoch": 1.8242490379645053, "grad_norm": 0.6601519584655762, "learning_rate": 0.0001, "loss": 1.5897, "step": 15881 }, { "epoch": 1.8243639078743323, "grad_norm": 0.6615204811096191, "learning_rate": 0.0001, "loss": 1.5966, "step": 15882 }, { "epoch": 1.8244787777841593, "grad_norm": 0.6218351125717163, "learning_rate": 0.0001, "loss": 1.478, "step": 15883 }, { "epoch": 1.8245936476939866, "grad_norm": 0.6256325244903564, "learning_rate": 0.0001, "loss": 1.6471, "step": 15884 }, { "epoch": 1.8247085176038138, "grad_norm": 0.6107257008552551, "learning_rate": 0.0001, "loss": 1.4236, "step": 15885 }, { "epoch": 1.8248233875136408, "grad_norm": 0.7533160448074341, "learning_rate": 0.0001, "loss": 1.425, "step": 15886 }, { "epoch": 1.8249382574234678, "grad_norm": 0.6379408836364746, "learning_rate": 0.0001, "loss": 1.363, "step": 15887 }, { "epoch": 1.825053127333295, "grad_norm": 0.598540723323822, "learning_rate": 0.0001, "loss": 1.4424, "step": 15888 }, { "epoch": 1.8251679972431223, "grad_norm": 0.6070877313613892, "learning_rate": 0.0001, "loss": 1.3913, "step": 15889 }, { "epoch": 1.8252828671529493, "grad_norm": 0.6327671408653259, "learning_rate": 0.0001, "loss": 1.4302, "step": 15890 }, { "epoch": 1.8253977370627763, "grad_norm": 0.6539307236671448, "learning_rate": 0.0001, "loss": 1.4466, "step": 15891 }, { "epoch": 1.8255126069726035, "grad_norm": 0.6575409173965454, "learning_rate": 0.0001, "loss": 1.2643, "step": 15892 }, { "epoch": 1.8256274768824308, "grad_norm": 0.6441643834114075, "learning_rate": 0.0001, "loss": 1.4879, "step": 15893 }, { "epoch": 1.8257423467922578, "grad_norm": 0.6457522511482239, "learning_rate": 0.0001, "loss": 1.3737, "step": 15894 }, { "epoch": 1.8258572167020848, "grad_norm": 0.6049162149429321, "learning_rate": 0.0001, "loss": 1.4517, "step": 15895 }, { "epoch": 1.825972086611912, "grad_norm": 0.6564713716506958, "learning_rate": 0.0001, "loss": 1.3928, "step": 15896 }, { "epoch": 1.8260869565217392, "grad_norm": 0.6829112768173218, "learning_rate": 0.0001, "loss": 1.5156, "step": 15897 }, { "epoch": 1.8262018264315663, "grad_norm": 0.6410605311393738, "learning_rate": 0.0001, "loss": 1.3843, "step": 15898 }, { "epoch": 1.8263166963413933, "grad_norm": 0.6501928567886353, "learning_rate": 0.0001, "loss": 1.4678, "step": 15899 }, { "epoch": 1.8264315662512205, "grad_norm": 0.6255066990852356, "learning_rate": 0.0001, "loss": 1.4146, "step": 15900 }, { "epoch": 1.8265464361610477, "grad_norm": 0.6358596086502075, "learning_rate": 0.0001, "loss": 1.462, "step": 15901 }, { "epoch": 1.8266613060708747, "grad_norm": 0.6268904805183411, "learning_rate": 0.0001, "loss": 1.4233, "step": 15902 }, { "epoch": 1.8267761759807017, "grad_norm": 0.5953584909439087, "learning_rate": 0.0001, "loss": 1.5563, "step": 15903 }, { "epoch": 1.826891045890529, "grad_norm": 0.5962085127830505, "learning_rate": 0.0001, "loss": 1.5107, "step": 15904 }, { "epoch": 1.8270059158003562, "grad_norm": 0.6590402722358704, "learning_rate": 0.0001, "loss": 1.4404, "step": 15905 }, { "epoch": 1.8271207857101832, "grad_norm": 0.5863921642303467, "learning_rate": 0.0001, "loss": 1.4666, "step": 15906 }, { "epoch": 1.8272356556200102, "grad_norm": 0.5984483361244202, "learning_rate": 0.0001, "loss": 1.4999, "step": 15907 }, { "epoch": 1.8273505255298375, "grad_norm": 0.6065948009490967, "learning_rate": 0.0001, "loss": 1.2615, "step": 15908 }, { "epoch": 1.8274653954396647, "grad_norm": 0.5679669976234436, "learning_rate": 0.0001, "loss": 1.4875, "step": 15909 }, { "epoch": 1.8275802653494917, "grad_norm": 0.5973514318466187, "learning_rate": 0.0001, "loss": 1.3, "step": 15910 }, { "epoch": 1.8276951352593187, "grad_norm": 0.644055962562561, "learning_rate": 0.0001, "loss": 1.4505, "step": 15911 }, { "epoch": 1.827810005169146, "grad_norm": 0.6656803488731384, "learning_rate": 0.0001, "loss": 1.5097, "step": 15912 }, { "epoch": 1.8279248750789732, "grad_norm": 0.6298542022705078, "learning_rate": 0.0001, "loss": 1.561, "step": 15913 }, { "epoch": 1.8280397449888002, "grad_norm": 0.649831235408783, "learning_rate": 0.0001, "loss": 1.4071, "step": 15914 }, { "epoch": 1.8281546148986272, "grad_norm": 0.675029456615448, "learning_rate": 0.0001, "loss": 1.5452, "step": 15915 }, { "epoch": 1.8282694848084544, "grad_norm": 0.5778508186340332, "learning_rate": 0.0001, "loss": 1.365, "step": 15916 }, { "epoch": 1.8283843547182816, "grad_norm": 0.6754083633422852, "learning_rate": 0.0001, "loss": 1.464, "step": 15917 }, { "epoch": 1.8284992246281087, "grad_norm": 0.6041424870491028, "learning_rate": 0.0001, "loss": 1.4145, "step": 15918 }, { "epoch": 1.8286140945379357, "grad_norm": 0.6053421497344971, "learning_rate": 0.0001, "loss": 1.3666, "step": 15919 }, { "epoch": 1.828728964447763, "grad_norm": 0.5872570276260376, "learning_rate": 0.0001, "loss": 1.4642, "step": 15920 }, { "epoch": 1.8288438343575901, "grad_norm": 0.663306474685669, "learning_rate": 0.0001, "loss": 1.5113, "step": 15921 }, { "epoch": 1.8289587042674171, "grad_norm": 0.6472460627555847, "learning_rate": 0.0001, "loss": 1.4313, "step": 15922 }, { "epoch": 1.8290735741772441, "grad_norm": 0.6650756001472473, "learning_rate": 0.0001, "loss": 1.4514, "step": 15923 }, { "epoch": 1.8291884440870714, "grad_norm": 0.5926016569137573, "learning_rate": 0.0001, "loss": 1.4217, "step": 15924 }, { "epoch": 1.8293033139968986, "grad_norm": 0.7057727575302124, "learning_rate": 0.0001, "loss": 1.4764, "step": 15925 }, { "epoch": 1.8294181839067256, "grad_norm": 0.6436575055122375, "learning_rate": 0.0001, "loss": 1.4334, "step": 15926 }, { "epoch": 1.8295330538165526, "grad_norm": 0.6020593643188477, "learning_rate": 0.0001, "loss": 1.4711, "step": 15927 }, { "epoch": 1.8296479237263799, "grad_norm": 0.5867114663124084, "learning_rate": 0.0001, "loss": 1.3176, "step": 15928 }, { "epoch": 1.829762793636207, "grad_norm": 0.6493946313858032, "learning_rate": 0.0001, "loss": 1.5546, "step": 15929 }, { "epoch": 1.829877663546034, "grad_norm": 0.6781548261642456, "learning_rate": 0.0001, "loss": 1.4961, "step": 15930 }, { "epoch": 1.829992533455861, "grad_norm": 0.5895503759384155, "learning_rate": 0.0001, "loss": 1.2183, "step": 15931 }, { "epoch": 1.8301074033656883, "grad_norm": 0.7099341750144958, "learning_rate": 0.0001, "loss": 1.6231, "step": 15932 }, { "epoch": 1.8302222732755156, "grad_norm": 0.7078137397766113, "learning_rate": 0.0001, "loss": 1.5794, "step": 15933 }, { "epoch": 1.8303371431853426, "grad_norm": 0.6459853649139404, "learning_rate": 0.0001, "loss": 1.5702, "step": 15934 }, { "epoch": 1.8304520130951696, "grad_norm": 0.6130335330963135, "learning_rate": 0.0001, "loss": 1.3144, "step": 15935 }, { "epoch": 1.8305668830049968, "grad_norm": 0.6197957396507263, "learning_rate": 0.0001, "loss": 1.4387, "step": 15936 }, { "epoch": 1.830681752914824, "grad_norm": 0.667946457862854, "learning_rate": 0.0001, "loss": 1.4988, "step": 15937 }, { "epoch": 1.830796622824651, "grad_norm": 0.5950310230255127, "learning_rate": 0.0001, "loss": 1.5616, "step": 15938 }, { "epoch": 1.830911492734478, "grad_norm": 0.632938802242279, "learning_rate": 0.0001, "loss": 1.4152, "step": 15939 }, { "epoch": 1.8310263626443053, "grad_norm": 0.5838363766670227, "learning_rate": 0.0001, "loss": 1.4048, "step": 15940 }, { "epoch": 1.8311412325541325, "grad_norm": 0.7285007834434509, "learning_rate": 0.0001, "loss": 1.5162, "step": 15941 }, { "epoch": 1.8312561024639595, "grad_norm": 0.6085467338562012, "learning_rate": 0.0001, "loss": 1.5558, "step": 15942 }, { "epoch": 1.8313709723737865, "grad_norm": 0.6019995808601379, "learning_rate": 0.0001, "loss": 1.438, "step": 15943 }, { "epoch": 1.8314858422836138, "grad_norm": 0.6792658567428589, "learning_rate": 0.0001, "loss": 1.6684, "step": 15944 }, { "epoch": 1.831600712193441, "grad_norm": 0.602642834186554, "learning_rate": 0.0001, "loss": 1.4328, "step": 15945 }, { "epoch": 1.831715582103268, "grad_norm": 0.6296089887619019, "learning_rate": 0.0001, "loss": 1.4179, "step": 15946 }, { "epoch": 1.831830452013095, "grad_norm": 0.5773423314094543, "learning_rate": 0.0001, "loss": 1.5549, "step": 15947 }, { "epoch": 1.8319453219229223, "grad_norm": 0.6150087714195251, "learning_rate": 0.0001, "loss": 1.4553, "step": 15948 }, { "epoch": 1.8320601918327495, "grad_norm": 0.6208438277244568, "learning_rate": 0.0001, "loss": 1.4832, "step": 15949 }, { "epoch": 1.8321750617425765, "grad_norm": 0.6598103046417236, "learning_rate": 0.0001, "loss": 1.3849, "step": 15950 }, { "epoch": 1.8322899316524035, "grad_norm": 0.5817321538925171, "learning_rate": 0.0001, "loss": 1.2799, "step": 15951 }, { "epoch": 1.8324048015622307, "grad_norm": 0.6446582078933716, "learning_rate": 0.0001, "loss": 1.4087, "step": 15952 }, { "epoch": 1.832519671472058, "grad_norm": 0.5805476903915405, "learning_rate": 0.0001, "loss": 1.2953, "step": 15953 }, { "epoch": 1.832634541381885, "grad_norm": 0.6229280233383179, "learning_rate": 0.0001, "loss": 1.4529, "step": 15954 }, { "epoch": 1.832749411291712, "grad_norm": 0.6367110013961792, "learning_rate": 0.0001, "loss": 1.4145, "step": 15955 }, { "epoch": 1.8328642812015392, "grad_norm": 0.6071845889091492, "learning_rate": 0.0001, "loss": 1.2801, "step": 15956 }, { "epoch": 1.8329791511113664, "grad_norm": 0.6400860548019409, "learning_rate": 0.0001, "loss": 1.4126, "step": 15957 }, { "epoch": 1.8330940210211935, "grad_norm": 0.6502026915550232, "learning_rate": 0.0001, "loss": 1.6317, "step": 15958 }, { "epoch": 1.8332088909310205, "grad_norm": 0.5755997896194458, "learning_rate": 0.0001, "loss": 1.3486, "step": 15959 }, { "epoch": 1.8333237608408477, "grad_norm": 0.6484699845314026, "learning_rate": 0.0001, "loss": 1.5004, "step": 15960 }, { "epoch": 1.833438630750675, "grad_norm": 0.6342812180519104, "learning_rate": 0.0001, "loss": 1.3965, "step": 15961 }, { "epoch": 1.833553500660502, "grad_norm": 0.676463782787323, "learning_rate": 0.0001, "loss": 1.5334, "step": 15962 }, { "epoch": 1.833668370570329, "grad_norm": 0.6405801773071289, "learning_rate": 0.0001, "loss": 1.5181, "step": 15963 }, { "epoch": 1.8337832404801562, "grad_norm": 0.614596426486969, "learning_rate": 0.0001, "loss": 1.3663, "step": 15964 }, { "epoch": 1.8338981103899834, "grad_norm": 0.6203700304031372, "learning_rate": 0.0001, "loss": 1.4567, "step": 15965 }, { "epoch": 1.8340129802998104, "grad_norm": 0.6081563234329224, "learning_rate": 0.0001, "loss": 1.5254, "step": 15966 }, { "epoch": 1.8341278502096374, "grad_norm": 0.6504876017570496, "learning_rate": 0.0001, "loss": 1.4266, "step": 15967 }, { "epoch": 1.8342427201194647, "grad_norm": 0.6089107990264893, "learning_rate": 0.0001, "loss": 1.4368, "step": 15968 }, { "epoch": 1.8343575900292919, "grad_norm": 0.696449875831604, "learning_rate": 0.0001, "loss": 1.6924, "step": 15969 }, { "epoch": 1.834472459939119, "grad_norm": 0.6738762259483337, "learning_rate": 0.0001, "loss": 1.58, "step": 15970 }, { "epoch": 1.834587329848946, "grad_norm": 0.6419105529785156, "learning_rate": 0.0001, "loss": 1.465, "step": 15971 }, { "epoch": 1.8347021997587731, "grad_norm": 0.6349779963493347, "learning_rate": 0.0001, "loss": 1.5807, "step": 15972 }, { "epoch": 1.8348170696686004, "grad_norm": 0.6129407286643982, "learning_rate": 0.0001, "loss": 1.3928, "step": 15973 }, { "epoch": 1.8349319395784276, "grad_norm": 0.6540585160255432, "learning_rate": 0.0001, "loss": 1.701, "step": 15974 }, { "epoch": 1.8350468094882546, "grad_norm": 0.6090661287307739, "learning_rate": 0.0001, "loss": 1.4129, "step": 15975 }, { "epoch": 1.8351616793980816, "grad_norm": 0.6051062345504761, "learning_rate": 0.0001, "loss": 1.4449, "step": 15976 }, { "epoch": 1.8352765493079088, "grad_norm": 0.6473617553710938, "learning_rate": 0.0001, "loss": 1.5488, "step": 15977 }, { "epoch": 1.835391419217736, "grad_norm": 0.6278075575828552, "learning_rate": 0.0001, "loss": 1.3918, "step": 15978 }, { "epoch": 1.835506289127563, "grad_norm": 0.6662875413894653, "learning_rate": 0.0001, "loss": 1.5309, "step": 15979 }, { "epoch": 1.83562115903739, "grad_norm": 0.6201448440551758, "learning_rate": 0.0001, "loss": 1.4804, "step": 15980 }, { "epoch": 1.8357360289472173, "grad_norm": 0.6352674961090088, "learning_rate": 0.0001, "loss": 1.4066, "step": 15981 }, { "epoch": 1.8358508988570446, "grad_norm": 0.5949738621711731, "learning_rate": 0.0001, "loss": 1.5198, "step": 15982 }, { "epoch": 1.8359657687668716, "grad_norm": 0.6130930781364441, "learning_rate": 0.0001, "loss": 1.4186, "step": 15983 }, { "epoch": 1.8360806386766986, "grad_norm": 0.5820370316505432, "learning_rate": 0.0001, "loss": 1.223, "step": 15984 }, { "epoch": 1.8361955085865258, "grad_norm": 0.6435645222663879, "learning_rate": 0.0001, "loss": 1.4747, "step": 15985 }, { "epoch": 1.836310378496353, "grad_norm": 0.5986513495445251, "learning_rate": 0.0001, "loss": 1.4789, "step": 15986 }, { "epoch": 1.83642524840618, "grad_norm": 0.6947880983352661, "learning_rate": 0.0001, "loss": 1.5822, "step": 15987 }, { "epoch": 1.836540118316007, "grad_norm": 0.6280114054679871, "learning_rate": 0.0001, "loss": 1.3834, "step": 15988 }, { "epoch": 1.8366549882258343, "grad_norm": 0.669037401676178, "learning_rate": 0.0001, "loss": 1.5151, "step": 15989 }, { "epoch": 1.8367698581356615, "grad_norm": 0.7081412076950073, "learning_rate": 0.0001, "loss": 1.4418, "step": 15990 }, { "epoch": 1.8368847280454885, "grad_norm": 0.6335996985435486, "learning_rate": 0.0001, "loss": 1.4933, "step": 15991 }, { "epoch": 1.8369995979553155, "grad_norm": 0.68028724193573, "learning_rate": 0.0001, "loss": 1.4157, "step": 15992 }, { "epoch": 1.8371144678651428, "grad_norm": 0.6293099522590637, "learning_rate": 0.0001, "loss": 1.5406, "step": 15993 }, { "epoch": 1.83722933777497, "grad_norm": 0.6450070738792419, "learning_rate": 0.0001, "loss": 1.3165, "step": 15994 }, { "epoch": 1.837344207684797, "grad_norm": 0.6043501496315002, "learning_rate": 0.0001, "loss": 1.3544, "step": 15995 }, { "epoch": 1.837459077594624, "grad_norm": 0.5740723013877869, "learning_rate": 0.0001, "loss": 1.3516, "step": 15996 }, { "epoch": 1.8375739475044512, "grad_norm": 0.6358054876327515, "learning_rate": 0.0001, "loss": 1.3855, "step": 15997 }, { "epoch": 1.8376888174142785, "grad_norm": 0.5969364047050476, "learning_rate": 0.0001, "loss": 1.3998, "step": 15998 }, { "epoch": 1.8378036873241055, "grad_norm": 0.6494911909103394, "learning_rate": 0.0001, "loss": 1.4437, "step": 15999 }, { "epoch": 1.8379185572339325, "grad_norm": 0.6509250402450562, "learning_rate": 0.0001, "loss": 1.6279, "step": 16000 } ], "logging_steps": 1.0, "max_steps": 17410, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.4136217259933696e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }