{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999756275895686, "eval_steps": 500, "global_step": 2051, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004874482086278333, "grad_norm": 0.3594866991043091, "learning_rate": 2e-05, "loss": 1.5386, "step": 1 }, { "epoch": 0.0009748964172556666, "grad_norm": 0.3198799788951874, "learning_rate": 4e-05, "loss": 1.547, "step": 2 }, { "epoch": 0.0014623446258835, "grad_norm": 0.3148403763771057, "learning_rate": 6e-05, "loss": 1.3611, "step": 3 }, { "epoch": 0.0019497928345113332, "grad_norm": 0.3041052222251892, "learning_rate": 8e-05, "loss": 1.3769, "step": 4 }, { "epoch": 0.0024372410431391664, "grad_norm": 0.3455198109149933, "learning_rate": 0.0001, "loss": 1.5368, "step": 5 }, { "epoch": 0.002924689251767, "grad_norm": 0.33201417326927185, "learning_rate": 0.00012, "loss": 1.6664, "step": 6 }, { "epoch": 0.003412137460394833, "grad_norm": 0.30785706639289856, "learning_rate": 0.00014, "loss": 1.5451, "step": 7 }, { "epoch": 0.0038995856690226664, "grad_norm": 0.36058735847473145, "learning_rate": 0.00016, "loss": 1.5256, "step": 8 }, { "epoch": 0.004387033877650499, "grad_norm": 0.38930457830429077, "learning_rate": 0.00018, "loss": 1.4659, "step": 9 }, { "epoch": 0.004874482086278333, "grad_norm": 0.3993895649909973, "learning_rate": 0.0002, "loss": 1.5789, "step": 10 }, { "epoch": 0.005361930294906166, "grad_norm": 0.44698166847229004, "learning_rate": 0.00019999988153674691, "loss": 1.4087, "step": 11 }, { "epoch": 0.005849378503534, "grad_norm": 0.5011526942253113, "learning_rate": 0.0001999995261472683, "loss": 1.5465, "step": 12 }, { "epoch": 0.006336826712161832, "grad_norm": 0.3983900249004364, "learning_rate": 0.00019999893383240616, "loss": 1.5136, "step": 13 }, { "epoch": 0.006824274920789666, "grad_norm": 0.5348988771438599, "learning_rate": 0.00019999810459356388, "loss": 1.317, "step": 14 }, { "epoch": 0.007311723129417499, "grad_norm": 0.36835604906082153, "learning_rate": 0.00019999703843270612, "loss": 1.2642, "step": 15 }, { "epoch": 0.007799171338045333, "grad_norm": 0.5402485728263855, "learning_rate": 0.0001999957353523589, "loss": 1.4507, "step": 16 }, { "epoch": 0.008286619546673165, "grad_norm": 0.37945985794067383, "learning_rate": 0.00019999419535560956, "loss": 1.3151, "step": 17 }, { "epoch": 0.008774067755300999, "grad_norm": 0.401275634765625, "learning_rate": 0.00019999241844610678, "loss": 1.4396, "step": 18 }, { "epoch": 0.009261515963928832, "grad_norm": 0.30488014221191406, "learning_rate": 0.0001999904046280605, "loss": 1.3752, "step": 19 }, { "epoch": 0.009748964172556666, "grad_norm": 0.4626210629940033, "learning_rate": 0.00019998815390624204, "loss": 1.4507, "step": 20 }, { "epoch": 0.0102364123811845, "grad_norm": 0.45440447330474854, "learning_rate": 0.00019998566628598392, "loss": 1.3766, "step": 21 }, { "epoch": 0.010723860589812333, "grad_norm": 0.4724906384944916, "learning_rate": 0.00019998294177317995, "loss": 1.4923, "step": 22 }, { "epoch": 0.011211308798440166, "grad_norm": 0.38014769554138184, "learning_rate": 0.00019997998037428526, "loss": 1.3906, "step": 23 }, { "epoch": 0.011698757007068, "grad_norm": 0.41498008370399475, "learning_rate": 0.0001999767820963162, "loss": 1.3639, "step": 24 }, { "epoch": 0.012186205215695833, "grad_norm": 0.41264939308166504, "learning_rate": 0.00019997334694685028, "loss": 1.3366, "step": 25 }, { "epoch": 0.012673653424323665, "grad_norm": 0.2975503206253052, "learning_rate": 0.00019996967493402632, "loss": 1.2275, "step": 26 }, { "epoch": 0.013161101632951498, "grad_norm": 0.26862838864326477, "learning_rate": 0.0001999657660665443, "loss": 1.3027, "step": 27 }, { "epoch": 0.013648549841579332, "grad_norm": 0.3438441753387451, "learning_rate": 0.00019996162035366535, "loss": 1.2117, "step": 28 }, { "epoch": 0.014135998050207165, "grad_norm": 0.26122623682022095, "learning_rate": 0.00019995723780521173, "loss": 1.3813, "step": 29 }, { "epoch": 0.014623446258834999, "grad_norm": 0.37747007608413696, "learning_rate": 0.00019995261843156693, "loss": 1.3035, "step": 30 }, { "epoch": 0.015110894467462832, "grad_norm": 0.36326363682746887, "learning_rate": 0.0001999477622436754, "loss": 1.4155, "step": 31 }, { "epoch": 0.015598342676090666, "grad_norm": 0.25670093297958374, "learning_rate": 0.00019994266925304279, "loss": 1.3552, "step": 32 }, { "epoch": 0.0160857908847185, "grad_norm": 0.31610992550849915, "learning_rate": 0.0001999373394717357, "loss": 1.5197, "step": 33 }, { "epoch": 0.01657323909334633, "grad_norm": 0.3175552189350128, "learning_rate": 0.00019993177291238186, "loss": 1.3403, "step": 34 }, { "epoch": 0.017060687301974166, "grad_norm": 0.30159491300582886, "learning_rate": 0.00019992596958816984, "loss": 1.3754, "step": 35 }, { "epoch": 0.017548135510601998, "grad_norm": 0.2562412917613983, "learning_rate": 0.00019991992951284932, "loss": 1.2276, "step": 36 }, { "epoch": 0.018035583719229833, "grad_norm": 0.4372316598892212, "learning_rate": 0.0001999136527007308, "loss": 1.3332, "step": 37 }, { "epoch": 0.018523031927857665, "grad_norm": 0.2722485363483429, "learning_rate": 0.00019990713916668575, "loss": 1.3357, "step": 38 }, { "epoch": 0.0190104801364855, "grad_norm": 0.3781321346759796, "learning_rate": 0.00019990038892614642, "loss": 1.2892, "step": 39 }, { "epoch": 0.01949792834511333, "grad_norm": 0.3160063326358795, "learning_rate": 0.00019989340199510594, "loss": 1.2825, "step": 40 }, { "epoch": 0.019985376553741167, "grad_norm": 0.3378397226333618, "learning_rate": 0.00019988617839011816, "loss": 1.3731, "step": 41 }, { "epoch": 0.020472824762369, "grad_norm": 0.39730527997016907, "learning_rate": 0.00019987871812829778, "loss": 1.4537, "step": 42 }, { "epoch": 0.02096027297099683, "grad_norm": 0.3675004541873932, "learning_rate": 0.0001998710212273201, "loss": 1.3518, "step": 43 }, { "epoch": 0.021447721179624665, "grad_norm": 0.3094078302383423, "learning_rate": 0.00019986308770542115, "loss": 1.2811, "step": 44 }, { "epoch": 0.021935169388252497, "grad_norm": 0.3008870780467987, "learning_rate": 0.0001998549175813975, "loss": 1.2485, "step": 45 }, { "epoch": 0.022422617596880332, "grad_norm": 0.3380783200263977, "learning_rate": 0.00019984651087460637, "loss": 1.4253, "step": 46 }, { "epoch": 0.022910065805508164, "grad_norm": 0.29196858406066895, "learning_rate": 0.00019983786760496548, "loss": 1.3116, "step": 47 }, { "epoch": 0.023397514014136, "grad_norm": 0.2729412615299225, "learning_rate": 0.000199828987792953, "loss": 1.2858, "step": 48 }, { "epoch": 0.02388496222276383, "grad_norm": 0.28062939643859863, "learning_rate": 0.00019981987145960755, "loss": 1.2848, "step": 49 }, { "epoch": 0.024372410431391666, "grad_norm": 0.3024255335330963, "learning_rate": 0.00019981051862652822, "loss": 1.3813, "step": 50 }, { "epoch": 0.024859858640019498, "grad_norm": 0.2562197744846344, "learning_rate": 0.00019980092931587423, "loss": 1.3828, "step": 51 }, { "epoch": 0.02534730684864733, "grad_norm": 0.2957954406738281, "learning_rate": 0.00019979110355036533, "loss": 1.3334, "step": 52 }, { "epoch": 0.025834755057275165, "grad_norm": 0.33199235796928406, "learning_rate": 0.00019978104135328126, "loss": 1.2957, "step": 53 }, { "epoch": 0.026322203265902996, "grad_norm": 0.28630098700523376, "learning_rate": 0.0001997707427484621, "loss": 1.2411, "step": 54 }, { "epoch": 0.02680965147453083, "grad_norm": 0.2983357310295105, "learning_rate": 0.00019976020776030787, "loss": 1.3413, "step": 55 }, { "epoch": 0.027297099683158663, "grad_norm": 0.2890589237213135, "learning_rate": 0.0001997494364137789, "loss": 1.3473, "step": 56 }, { "epoch": 0.0277845478917865, "grad_norm": 0.34288671612739563, "learning_rate": 0.00019973842873439527, "loss": 1.431, "step": 57 }, { "epoch": 0.02827199610041433, "grad_norm": 0.356336385011673, "learning_rate": 0.00019972718474823707, "loss": 1.3814, "step": 58 }, { "epoch": 0.028759444309042165, "grad_norm": 0.29968544840812683, "learning_rate": 0.00019971570448194437, "loss": 1.2104, "step": 59 }, { "epoch": 0.029246892517669997, "grad_norm": 0.31277570128440857, "learning_rate": 0.0001997039879627169, "loss": 1.3111, "step": 60 }, { "epoch": 0.029734340726297832, "grad_norm": 0.29665568470954895, "learning_rate": 0.0001996920352183142, "loss": 1.2978, "step": 61 }, { "epoch": 0.030221788934925664, "grad_norm": 0.2886115312576294, "learning_rate": 0.00019967984627705548, "loss": 1.2012, "step": 62 }, { "epoch": 0.030709237143553496, "grad_norm": 0.3373067378997803, "learning_rate": 0.00019966742116781964, "loss": 1.2666, "step": 63 }, { "epoch": 0.03119668535218133, "grad_norm": 0.3190588355064392, "learning_rate": 0.00019965475992004503, "loss": 1.3503, "step": 64 }, { "epoch": 0.031684133560809166, "grad_norm": 0.290356308221817, "learning_rate": 0.00019964186256372945, "loss": 1.3847, "step": 65 }, { "epoch": 0.032171581769437, "grad_norm": 0.3749725818634033, "learning_rate": 0.0001996287291294302, "loss": 1.3672, "step": 66 }, { "epoch": 0.03265902997806483, "grad_norm": 0.2738218605518341, "learning_rate": 0.00019961535964826385, "loss": 1.347, "step": 67 }, { "epoch": 0.03314647818669266, "grad_norm": 0.23812803626060486, "learning_rate": 0.00019960175415190628, "loss": 1.2569, "step": 68 }, { "epoch": 0.0336339263953205, "grad_norm": 0.31040453910827637, "learning_rate": 0.0001995879126725925, "loss": 1.2219, "step": 69 }, { "epoch": 0.03412137460394833, "grad_norm": 0.35229820013046265, "learning_rate": 0.0001995738352431166, "loss": 1.293, "step": 70 }, { "epoch": 0.034608822812576163, "grad_norm": 0.22976358234882355, "learning_rate": 0.00019955952189683177, "loss": 1.1595, "step": 71 }, { "epoch": 0.035096271021203995, "grad_norm": 0.35664403438568115, "learning_rate": 0.00019954497266765016, "loss": 1.3777, "step": 72 }, { "epoch": 0.03558371922983183, "grad_norm": 0.28703221678733826, "learning_rate": 0.00019953018759004268, "loss": 1.4143, "step": 73 }, { "epoch": 0.036071167438459666, "grad_norm": 0.32665184140205383, "learning_rate": 0.0001995151666990392, "loss": 1.3514, "step": 74 }, { "epoch": 0.0365586156470875, "grad_norm": 0.2924026846885681, "learning_rate": 0.00019949991003022808, "loss": 1.2566, "step": 75 }, { "epoch": 0.03704606385571533, "grad_norm": 0.3736989498138428, "learning_rate": 0.00019948441761975645, "loss": 1.4126, "step": 76 }, { "epoch": 0.03753351206434316, "grad_norm": 0.268311470746994, "learning_rate": 0.00019946868950432997, "loss": 1.3878, "step": 77 }, { "epoch": 0.038020960272971, "grad_norm": 0.3075256049633026, "learning_rate": 0.00019945272572121267, "loss": 1.1759, "step": 78 }, { "epoch": 0.03850840848159883, "grad_norm": 0.28338623046875, "learning_rate": 0.00019943652630822703, "loss": 1.3394, "step": 79 }, { "epoch": 0.03899585669022666, "grad_norm": 0.2940373718738556, "learning_rate": 0.00019942009130375369, "loss": 1.3232, "step": 80 }, { "epoch": 0.039483304898854495, "grad_norm": 0.29318469762802124, "learning_rate": 0.0001994034207467316, "loss": 1.3109, "step": 81 }, { "epoch": 0.03997075310748233, "grad_norm": 0.3388112187385559, "learning_rate": 0.00019938651467665773, "loss": 1.1776, "step": 82 }, { "epoch": 0.040458201316110165, "grad_norm": 0.29703381657600403, "learning_rate": 0.00019936937313358696, "loss": 1.28, "step": 83 }, { "epoch": 0.040945649524738, "grad_norm": 0.3102991580963135, "learning_rate": 0.00019935199615813223, "loss": 1.2495, "step": 84 }, { "epoch": 0.04143309773336583, "grad_norm": 0.3132036328315735, "learning_rate": 0.00019933438379146414, "loss": 1.1491, "step": 85 }, { "epoch": 0.04192054594199366, "grad_norm": 0.37797728180885315, "learning_rate": 0.0001993165360753111, "loss": 1.3577, "step": 86 }, { "epoch": 0.0424079941506215, "grad_norm": 0.3102344274520874, "learning_rate": 0.00019929845305195906, "loss": 1.2842, "step": 87 }, { "epoch": 0.04289544235924933, "grad_norm": 0.2861989736557007, "learning_rate": 0.0001992801347642515, "loss": 1.2836, "step": 88 }, { "epoch": 0.04338289056787716, "grad_norm": 0.3444364666938782, "learning_rate": 0.00019926158125558932, "loss": 1.3017, "step": 89 }, { "epoch": 0.043870338776504994, "grad_norm": 0.3026193678379059, "learning_rate": 0.00019924279256993064, "loss": 1.1877, "step": 90 }, { "epoch": 0.04435778698513283, "grad_norm": 0.3032093942165375, "learning_rate": 0.00019922376875179093, "loss": 1.2327, "step": 91 }, { "epoch": 0.044845235193760664, "grad_norm": 0.2894906997680664, "learning_rate": 0.00019920450984624256, "loss": 1.1543, "step": 92 }, { "epoch": 0.045332683402388496, "grad_norm": 0.328391432762146, "learning_rate": 0.00019918501589891502, "loss": 1.1122, "step": 93 }, { "epoch": 0.04582013161101633, "grad_norm": 0.27859413623809814, "learning_rate": 0.00019916528695599465, "loss": 1.2589, "step": 94 }, { "epoch": 0.04630757981964416, "grad_norm": 0.32805296778678894, "learning_rate": 0.0001991453230642246, "loss": 1.3096, "step": 95 }, { "epoch": 0.046795028028272, "grad_norm": 0.3156702220439911, "learning_rate": 0.00019912512427090447, "loss": 1.2511, "step": 96 }, { "epoch": 0.04728247623689983, "grad_norm": 0.3225579261779785, "learning_rate": 0.0001991046906238907, "loss": 1.2565, "step": 97 }, { "epoch": 0.04776992444552766, "grad_norm": 0.3395773768424988, "learning_rate": 0.00019908402217159595, "loss": 1.2728, "step": 98 }, { "epoch": 0.04825737265415549, "grad_norm": 0.3359152674674988, "learning_rate": 0.00019906311896298923, "loss": 1.3422, "step": 99 }, { "epoch": 0.04874482086278333, "grad_norm": 0.3509308397769928, "learning_rate": 0.00019904198104759587, "loss": 1.3714, "step": 100 }, { "epoch": 0.049232269071411164, "grad_norm": 0.32388588786125183, "learning_rate": 0.00019902060847549718, "loss": 1.2591, "step": 101 }, { "epoch": 0.049719717280038996, "grad_norm": 0.29979950189590454, "learning_rate": 0.00019899900129733035, "loss": 1.3937, "step": 102 }, { "epoch": 0.05020716548866683, "grad_norm": 0.3444582223892212, "learning_rate": 0.00019897715956428862, "loss": 1.3148, "step": 103 }, { "epoch": 0.05069461369729466, "grad_norm": 0.31882134079933167, "learning_rate": 0.0001989550833281208, "loss": 1.2834, "step": 104 }, { "epoch": 0.0511820619059225, "grad_norm": 0.2893996834754944, "learning_rate": 0.00019893277264113136, "loss": 1.395, "step": 105 }, { "epoch": 0.05166951011455033, "grad_norm": 0.30966147780418396, "learning_rate": 0.00019891022755618018, "loss": 1.2128, "step": 106 }, { "epoch": 0.05215695832317816, "grad_norm": 0.2864265441894531, "learning_rate": 0.00019888744812668262, "loss": 1.4032, "step": 107 }, { "epoch": 0.05264440653180599, "grad_norm": 0.3411870300769806, "learning_rate": 0.00019886443440660917, "loss": 1.3424, "step": 108 }, { "epoch": 0.05313185474043383, "grad_norm": 0.29663193225860596, "learning_rate": 0.00019884118645048537, "loss": 1.2325, "step": 109 }, { "epoch": 0.05361930294906166, "grad_norm": 0.28539153933525085, "learning_rate": 0.00019881770431339187, "loss": 1.2694, "step": 110 }, { "epoch": 0.054106751157689495, "grad_norm": 0.32653146982192993, "learning_rate": 0.000198793988050964, "loss": 1.3857, "step": 111 }, { "epoch": 0.05459419936631733, "grad_norm": 0.32705414295196533, "learning_rate": 0.00019877003771939192, "loss": 1.305, "step": 112 }, { "epoch": 0.055081647574945165, "grad_norm": 0.334217369556427, "learning_rate": 0.00019874585337542033, "loss": 1.2222, "step": 113 }, { "epoch": 0.055569095783573, "grad_norm": 0.4032392203807831, "learning_rate": 0.0001987214350763483, "loss": 1.2944, "step": 114 }, { "epoch": 0.05605654399220083, "grad_norm": 0.36320260167121887, "learning_rate": 0.00019869678288002927, "loss": 1.3496, "step": 115 }, { "epoch": 0.05654399220082866, "grad_norm": 0.2804063856601715, "learning_rate": 0.00019867189684487092, "loss": 1.2681, "step": 116 }, { "epoch": 0.05703144040945649, "grad_norm": 0.2654062807559967, "learning_rate": 0.0001986467770298347, "loss": 1.2292, "step": 117 }, { "epoch": 0.05751888861808433, "grad_norm": 0.3031540811061859, "learning_rate": 0.00019862142349443622, "loss": 1.2431, "step": 118 }, { "epoch": 0.05800633682671216, "grad_norm": 0.3151783049106598, "learning_rate": 0.00019859583629874467, "loss": 1.2, "step": 119 }, { "epoch": 0.058493785035339994, "grad_norm": 0.3036620020866394, "learning_rate": 0.00019857001550338294, "loss": 1.2311, "step": 120 }, { "epoch": 0.058981233243967826, "grad_norm": 0.32396501302719116, "learning_rate": 0.00019854396116952735, "loss": 1.3063, "step": 121 }, { "epoch": 0.059468681452595665, "grad_norm": 0.33199331164360046, "learning_rate": 0.00019851767335890747, "loss": 1.1809, "step": 122 }, { "epoch": 0.059956129661223496, "grad_norm": 0.26451751589775085, "learning_rate": 0.00019849115213380612, "loss": 1.3042, "step": 123 }, { "epoch": 0.06044357786985133, "grad_norm": 0.35607317090034485, "learning_rate": 0.00019846439755705913, "loss": 1.1293, "step": 124 }, { "epoch": 0.06093102607847916, "grad_norm": 0.29516515135765076, "learning_rate": 0.00019843740969205517, "loss": 1.2308, "step": 125 }, { "epoch": 0.06141847428710699, "grad_norm": 0.2964312434196472, "learning_rate": 0.00019841018860273558, "loss": 1.284, "step": 126 }, { "epoch": 0.06190592249573483, "grad_norm": 0.2765253186225891, "learning_rate": 0.00019838273435359444, "loss": 1.315, "step": 127 }, { "epoch": 0.06239337070436266, "grad_norm": 0.33663836121559143, "learning_rate": 0.00019835504700967807, "loss": 1.3234, "step": 128 }, { "epoch": 0.0628808189129905, "grad_norm": 0.29934364557266235, "learning_rate": 0.00019832712663658518, "loss": 1.4297, "step": 129 }, { "epoch": 0.06336826712161833, "grad_norm": 0.3140650987625122, "learning_rate": 0.00019829897330046653, "loss": 1.2515, "step": 130 }, { "epoch": 0.06385571533024616, "grad_norm": 0.29144319891929626, "learning_rate": 0.0001982705870680248, "loss": 1.1624, "step": 131 }, { "epoch": 0.064343163538874, "grad_norm": 0.30572909116744995, "learning_rate": 0.0001982419680065145, "loss": 1.3521, "step": 132 }, { "epoch": 0.06483061174750183, "grad_norm": 0.30701354146003723, "learning_rate": 0.0001982131161837418, "loss": 1.3164, "step": 133 }, { "epoch": 0.06531805995612966, "grad_norm": 0.32151156663894653, "learning_rate": 0.0001981840316680643, "loss": 1.3548, "step": 134 }, { "epoch": 0.0658055081647575, "grad_norm": 0.3038557767868042, "learning_rate": 0.00019815471452839094, "loss": 1.2915, "step": 135 }, { "epoch": 0.06629295637338532, "grad_norm": 0.3463563024997711, "learning_rate": 0.00019812516483418176, "loss": 1.2835, "step": 136 }, { "epoch": 0.06678040458201316, "grad_norm": 0.30769020318984985, "learning_rate": 0.00019809538265544785, "loss": 1.1407, "step": 137 }, { "epoch": 0.067267852790641, "grad_norm": 0.31621092557907104, "learning_rate": 0.00019806536806275108, "loss": 1.2259, "step": 138 }, { "epoch": 0.06775530099926882, "grad_norm": 0.31812846660614014, "learning_rate": 0.00019803512112720397, "loss": 1.1366, "step": 139 }, { "epoch": 0.06824274920789666, "grad_norm": 0.2971956133842468, "learning_rate": 0.00019800464192046955, "loss": 1.3306, "step": 140 }, { "epoch": 0.06873019741652449, "grad_norm": 0.3192349374294281, "learning_rate": 0.0001979739305147611, "loss": 1.3795, "step": 141 }, { "epoch": 0.06921764562515233, "grad_norm": 0.2938215732574463, "learning_rate": 0.0001979429869828421, "loss": 1.2272, "step": 142 }, { "epoch": 0.06970509383378017, "grad_norm": 0.3284856677055359, "learning_rate": 0.00019791181139802602, "loss": 1.2728, "step": 143 }, { "epoch": 0.07019254204240799, "grad_norm": 0.30583491921424866, "learning_rate": 0.00019788040383417597, "loss": 1.1829, "step": 144 }, { "epoch": 0.07067999025103583, "grad_norm": 0.31813549995422363, "learning_rate": 0.00019784876436570493, "loss": 1.1448, "step": 145 }, { "epoch": 0.07116743845966365, "grad_norm": 0.2916363477706909, "learning_rate": 0.00019781689306757512, "loss": 1.2429, "step": 146 }, { "epoch": 0.07165488666829149, "grad_norm": 0.34224483370780945, "learning_rate": 0.00019778479001529808, "loss": 1.2228, "step": 147 }, { "epoch": 0.07214233487691933, "grad_norm": 0.26968345046043396, "learning_rate": 0.00019775245528493447, "loss": 1.2041, "step": 148 }, { "epoch": 0.07262978308554716, "grad_norm": 0.32275018095970154, "learning_rate": 0.00019771988895309384, "loss": 1.4233, "step": 149 }, { "epoch": 0.073117231294175, "grad_norm": 0.30032193660736084, "learning_rate": 0.00019768709109693443, "loss": 1.1385, "step": 150 }, { "epoch": 0.07360467950280283, "grad_norm": 0.3605499267578125, "learning_rate": 0.00019765406179416312, "loss": 1.1803, "step": 151 }, { "epoch": 0.07409212771143066, "grad_norm": 0.29714006185531616, "learning_rate": 0.00019762080112303504, "loss": 1.2595, "step": 152 }, { "epoch": 0.0745795759200585, "grad_norm": 0.28255385160446167, "learning_rate": 0.00019758730916235356, "loss": 1.1942, "step": 153 }, { "epoch": 0.07506702412868632, "grad_norm": 0.3305562734603882, "learning_rate": 0.00019755358599146994, "loss": 1.364, "step": 154 }, { "epoch": 0.07555447233731416, "grad_norm": 0.30705878138542175, "learning_rate": 0.00019751963169028342, "loss": 1.226, "step": 155 }, { "epoch": 0.076041920545942, "grad_norm": 0.29158154129981995, "learning_rate": 0.00019748544633924064, "loss": 1.2687, "step": 156 }, { "epoch": 0.07652936875456982, "grad_norm": 0.2785685658454895, "learning_rate": 0.00019745103001933583, "loss": 1.2601, "step": 157 }, { "epoch": 0.07701681696319766, "grad_norm": 0.3190236985683441, "learning_rate": 0.00019741638281211033, "loss": 1.2195, "step": 158 }, { "epoch": 0.07750426517182549, "grad_norm": 0.3836527168750763, "learning_rate": 0.00019738150479965257, "loss": 1.197, "step": 159 }, { "epoch": 0.07799171338045333, "grad_norm": 0.33303093910217285, "learning_rate": 0.00019734639606459783, "loss": 1.3514, "step": 160 }, { "epoch": 0.07847916158908116, "grad_norm": 0.28645503520965576, "learning_rate": 0.000197311056690128, "loss": 1.2765, "step": 161 }, { "epoch": 0.07896660979770899, "grad_norm": 0.3097892701625824, "learning_rate": 0.00019727548675997137, "loss": 1.3315, "step": 162 }, { "epoch": 0.07945405800633683, "grad_norm": 0.27848270535469055, "learning_rate": 0.0001972396863584026, "loss": 1.2799, "step": 163 }, { "epoch": 0.07994150621496467, "grad_norm": 0.25580862164497375, "learning_rate": 0.0001972036555702423, "loss": 1.2318, "step": 164 }, { "epoch": 0.08042895442359249, "grad_norm": 0.3450034260749817, "learning_rate": 0.000197167394480857, "loss": 1.2851, "step": 165 }, { "epoch": 0.08091640263222033, "grad_norm": 0.32682400941848755, "learning_rate": 0.00019713090317615876, "loss": 1.1519, "step": 166 }, { "epoch": 0.08140385084084815, "grad_norm": 0.30758345127105713, "learning_rate": 0.0001970941817426052, "loss": 1.1295, "step": 167 }, { "epoch": 0.081891299049476, "grad_norm": 0.3209025263786316, "learning_rate": 0.00019705723026719913, "loss": 1.2555, "step": 168 }, { "epoch": 0.08237874725810383, "grad_norm": 0.2652692198753357, "learning_rate": 0.0001970200488374884, "loss": 1.2326, "step": 169 }, { "epoch": 0.08286619546673166, "grad_norm": 0.3520825207233429, "learning_rate": 0.0001969826375415656, "loss": 1.3077, "step": 170 }, { "epoch": 0.0833536436753595, "grad_norm": 0.28723257780075073, "learning_rate": 0.00019694499646806808, "loss": 1.2389, "step": 171 }, { "epoch": 0.08384109188398732, "grad_norm": 0.31456753611564636, "learning_rate": 0.0001969071257061775, "loss": 1.4082, "step": 172 }, { "epoch": 0.08432854009261516, "grad_norm": 0.3273375332355499, "learning_rate": 0.00019686902534561974, "loss": 1.2289, "step": 173 }, { "epoch": 0.084815988301243, "grad_norm": 0.3643217384815216, "learning_rate": 0.00019683069547666467, "loss": 1.3557, "step": 174 }, { "epoch": 0.08530343650987082, "grad_norm": 0.33192405104637146, "learning_rate": 0.00019679213619012585, "loss": 1.251, "step": 175 }, { "epoch": 0.08579088471849866, "grad_norm": 0.301046222448349, "learning_rate": 0.0001967533475773605, "loss": 1.3237, "step": 176 }, { "epoch": 0.08627833292712649, "grad_norm": 0.29805856943130493, "learning_rate": 0.0001967143297302691, "loss": 1.1757, "step": 177 }, { "epoch": 0.08676578113575432, "grad_norm": 0.3067397475242615, "learning_rate": 0.00019667508274129526, "loss": 1.3353, "step": 178 }, { "epoch": 0.08725322934438216, "grad_norm": 0.3156089782714844, "learning_rate": 0.00019663560670342558, "loss": 1.3084, "step": 179 }, { "epoch": 0.08774067755300999, "grad_norm": 0.3230333924293518, "learning_rate": 0.00019659590171018914, "loss": 1.3054, "step": 180 }, { "epoch": 0.08822812576163783, "grad_norm": 0.33901235461235046, "learning_rate": 0.00019655596785565768, "loss": 1.2629, "step": 181 }, { "epoch": 0.08871557397026567, "grad_norm": 0.3485647141933441, "learning_rate": 0.00019651580523444507, "loss": 1.2564, "step": 182 }, { "epoch": 0.08920302217889349, "grad_norm": 0.3248312175273895, "learning_rate": 0.00019647541394170718, "loss": 1.373, "step": 183 }, { "epoch": 0.08969047038752133, "grad_norm": 0.3881993293762207, "learning_rate": 0.00019643479407314168, "loss": 1.2413, "step": 184 }, { "epoch": 0.09017791859614915, "grad_norm": 0.36643728613853455, "learning_rate": 0.00019639394572498788, "loss": 1.1796, "step": 185 }, { "epoch": 0.09066536680477699, "grad_norm": 0.3189202845096588, "learning_rate": 0.00019635286899402624, "loss": 0.9767, "step": 186 }, { "epoch": 0.09115281501340483, "grad_norm": 0.3280291259288788, "learning_rate": 0.00019631156397757852, "loss": 1.3026, "step": 187 }, { "epoch": 0.09164026322203266, "grad_norm": 0.2965233027935028, "learning_rate": 0.00019627003077350716, "loss": 1.2552, "step": 188 }, { "epoch": 0.0921277114306605, "grad_norm": 0.35237714648246765, "learning_rate": 0.0001962282694802154, "loss": 1.3261, "step": 189 }, { "epoch": 0.09261515963928832, "grad_norm": 0.34230533242225647, "learning_rate": 0.00019618628019664683, "loss": 1.186, "step": 190 }, { "epoch": 0.09310260784791616, "grad_norm": 0.3429117202758789, "learning_rate": 0.0001961440630222851, "loss": 1.3247, "step": 191 }, { "epoch": 0.093590056056544, "grad_norm": 0.3420495390892029, "learning_rate": 0.00019610161805715397, "loss": 1.2166, "step": 192 }, { "epoch": 0.09407750426517182, "grad_norm": 0.4569028913974762, "learning_rate": 0.00019605894540181677, "loss": 1.1844, "step": 193 }, { "epoch": 0.09456495247379966, "grad_norm": 0.37705421447753906, "learning_rate": 0.00019601604515737635, "loss": 1.2311, "step": 194 }, { "epoch": 0.0950524006824275, "grad_norm": 0.3612574636936188, "learning_rate": 0.00019597291742547474, "loss": 1.2255, "step": 195 }, { "epoch": 0.09553984889105532, "grad_norm": 0.3655405044555664, "learning_rate": 0.000195929562308293, "loss": 1.1927, "step": 196 }, { "epoch": 0.09602729709968316, "grad_norm": 0.3330654203891754, "learning_rate": 0.00019588597990855084, "loss": 1.2456, "step": 197 }, { "epoch": 0.09651474530831099, "grad_norm": 0.3447643518447876, "learning_rate": 0.00019584217032950658, "loss": 1.1579, "step": 198 }, { "epoch": 0.09700219351693883, "grad_norm": 0.36293572187423706, "learning_rate": 0.00019579813367495672, "loss": 1.2373, "step": 199 }, { "epoch": 0.09748964172556666, "grad_norm": 0.35249459743499756, "learning_rate": 0.0001957538700492357, "loss": 1.2832, "step": 200 }, { "epoch": 0.09797708993419449, "grad_norm": 0.4142071008682251, "learning_rate": 0.00019570937955721586, "loss": 1.1746, "step": 201 }, { "epoch": 0.09846453814282233, "grad_norm": 0.29519200325012207, "learning_rate": 0.00019566466230430693, "loss": 1.2661, "step": 202 }, { "epoch": 0.09895198635145015, "grad_norm": 0.39531150460243225, "learning_rate": 0.00019561971839645594, "loss": 1.2058, "step": 203 }, { "epoch": 0.09943943456007799, "grad_norm": 0.3783504366874695, "learning_rate": 0.0001955745479401469, "loss": 1.2174, "step": 204 }, { "epoch": 0.09992688276870583, "grad_norm": 0.3396473824977875, "learning_rate": 0.00019552915104240065, "loss": 1.248, "step": 205 }, { "epoch": 0.10041433097733365, "grad_norm": 0.39432066679000854, "learning_rate": 0.00019548352781077443, "loss": 1.1647, "step": 206 }, { "epoch": 0.1009017791859615, "grad_norm": 0.37768709659576416, "learning_rate": 0.0001954376783533618, "loss": 1.31, "step": 207 }, { "epoch": 0.10138922739458932, "grad_norm": 0.3488147556781769, "learning_rate": 0.00019539160277879224, "loss": 1.195, "step": 208 }, { "epoch": 0.10187667560321716, "grad_norm": 0.3478896915912628, "learning_rate": 0.00019534530119623097, "loss": 1.2701, "step": 209 }, { "epoch": 0.102364123811845, "grad_norm": 0.34134477376937866, "learning_rate": 0.00019529877371537882, "loss": 1.3193, "step": 210 }, { "epoch": 0.10285157202047282, "grad_norm": 0.3868890404701233, "learning_rate": 0.00019525202044647162, "loss": 1.2999, "step": 211 }, { "epoch": 0.10333902022910066, "grad_norm": 0.35992351174354553, "learning_rate": 0.00019520504150028032, "loss": 1.5135, "step": 212 }, { "epoch": 0.1038264684377285, "grad_norm": 0.337139755487442, "learning_rate": 0.00019515783698811044, "loss": 1.2961, "step": 213 }, { "epoch": 0.10431391664635632, "grad_norm": 0.2877628207206726, "learning_rate": 0.00019511040702180203, "loss": 1.284, "step": 214 }, { "epoch": 0.10480136485498416, "grad_norm": 0.3476368188858032, "learning_rate": 0.00019506275171372926, "loss": 1.3162, "step": 215 }, { "epoch": 0.10528881306361199, "grad_norm": 0.2870039641857147, "learning_rate": 0.00019501487117680016, "loss": 1.0493, "step": 216 }, { "epoch": 0.10577626127223982, "grad_norm": 0.3463204801082611, "learning_rate": 0.0001949667655244564, "loss": 1.2352, "step": 217 }, { "epoch": 0.10626370948086766, "grad_norm": 0.31859636306762695, "learning_rate": 0.00019491843487067306, "loss": 1.2171, "step": 218 }, { "epoch": 0.10675115768949549, "grad_norm": 0.29325994849205017, "learning_rate": 0.00019486987932995823, "loss": 1.3066, "step": 219 }, { "epoch": 0.10723860589812333, "grad_norm": 0.3366852402687073, "learning_rate": 0.00019482109901735285, "loss": 1.2206, "step": 220 }, { "epoch": 0.10772605410675115, "grad_norm": 0.3563458025455475, "learning_rate": 0.00019477209404843049, "loss": 1.1842, "step": 221 }, { "epoch": 0.10821350231537899, "grad_norm": 0.2932288646697998, "learning_rate": 0.00019472286453929682, "loss": 1.1563, "step": 222 }, { "epoch": 0.10870095052400683, "grad_norm": 0.3266105353832245, "learning_rate": 0.00019467341060658963, "loss": 1.1246, "step": 223 }, { "epoch": 0.10918839873263465, "grad_norm": 0.33189934492111206, "learning_rate": 0.0001946237323674784, "loss": 1.3277, "step": 224 }, { "epoch": 0.10967584694126249, "grad_norm": 0.3772162199020386, "learning_rate": 0.00019457382993966405, "loss": 1.1527, "step": 225 }, { "epoch": 0.11016329514989033, "grad_norm": 0.34699949622154236, "learning_rate": 0.00019452370344137868, "loss": 1.2081, "step": 226 }, { "epoch": 0.11065074335851816, "grad_norm": 0.3212660253047943, "learning_rate": 0.00019447335299138517, "loss": 1.237, "step": 227 }, { "epoch": 0.111138191567146, "grad_norm": 0.33385413885116577, "learning_rate": 0.00019442277870897713, "loss": 1.3355, "step": 228 }, { "epoch": 0.11162563977577382, "grad_norm": 0.3604995608329773, "learning_rate": 0.0001943719807139785, "loss": 1.4172, "step": 229 }, { "epoch": 0.11211308798440166, "grad_norm": 0.2965205907821655, "learning_rate": 0.0001943209591267431, "loss": 1.2438, "step": 230 }, { "epoch": 0.1126005361930295, "grad_norm": 0.27478981018066406, "learning_rate": 0.00019426971406815463, "loss": 1.1359, "step": 231 }, { "epoch": 0.11308798440165732, "grad_norm": 0.3109903037548065, "learning_rate": 0.00019421824565962623, "loss": 1.2647, "step": 232 }, { "epoch": 0.11357543261028516, "grad_norm": 0.2932765781879425, "learning_rate": 0.0001941665540231002, "loss": 1.3322, "step": 233 }, { "epoch": 0.11406288081891298, "grad_norm": 0.28574731945991516, "learning_rate": 0.0001941146392810477, "loss": 1.2771, "step": 234 }, { "epoch": 0.11455032902754082, "grad_norm": 0.3058600425720215, "learning_rate": 0.00019406250155646856, "loss": 1.1512, "step": 235 }, { "epoch": 0.11503777723616866, "grad_norm": 0.345528781414032, "learning_rate": 0.00019401014097289083, "loss": 1.3235, "step": 236 }, { "epoch": 0.11552522544479649, "grad_norm": 0.28189167380332947, "learning_rate": 0.00019395755765437062, "loss": 1.159, "step": 237 }, { "epoch": 0.11601267365342433, "grad_norm": 0.3478774428367615, "learning_rate": 0.00019390475172549176, "loss": 1.3016, "step": 238 }, { "epoch": 0.11650012186205215, "grad_norm": 0.28645917773246765, "learning_rate": 0.0001938517233113655, "loss": 1.3106, "step": 239 }, { "epoch": 0.11698757007067999, "grad_norm": 0.3461976647377014, "learning_rate": 0.0001937984725376302, "loss": 1.3887, "step": 240 }, { "epoch": 0.11747501827930783, "grad_norm": 0.34896981716156006, "learning_rate": 0.00019374499953045104, "loss": 1.2542, "step": 241 }, { "epoch": 0.11796246648793565, "grad_norm": 0.3654549717903137, "learning_rate": 0.00019369130441651978, "loss": 1.2987, "step": 242 }, { "epoch": 0.11844991469656349, "grad_norm": 0.283341646194458, "learning_rate": 0.00019363738732305433, "loss": 1.2665, "step": 243 }, { "epoch": 0.11893736290519133, "grad_norm": 0.38239938020706177, "learning_rate": 0.00019358324837779863, "loss": 1.1503, "step": 244 }, { "epoch": 0.11942481111381915, "grad_norm": 0.35076814889907837, "learning_rate": 0.00019352888770902214, "loss": 1.2163, "step": 245 }, { "epoch": 0.11991225932244699, "grad_norm": 0.34189239144325256, "learning_rate": 0.00019347430544551975, "loss": 1.2757, "step": 246 }, { "epoch": 0.12039970753107482, "grad_norm": 0.3641584813594818, "learning_rate": 0.00019341950171661125, "loss": 1.2257, "step": 247 }, { "epoch": 0.12088715573970266, "grad_norm": 0.29058733582496643, "learning_rate": 0.00019336447665214125, "loss": 1.1369, "step": 248 }, { "epoch": 0.1213746039483305, "grad_norm": 0.3116576373577118, "learning_rate": 0.0001933092303824787, "loss": 1.1738, "step": 249 }, { "epoch": 0.12186205215695832, "grad_norm": 0.3579969108104706, "learning_rate": 0.00019325376303851662, "loss": 1.2442, "step": 250 }, { "epoch": 0.12234950036558616, "grad_norm": 0.3996794819831848, "learning_rate": 0.0001931980747516719, "loss": 1.3211, "step": 251 }, { "epoch": 0.12283694857421398, "grad_norm": 0.3101326823234558, "learning_rate": 0.0001931421656538848, "loss": 1.2243, "step": 252 }, { "epoch": 0.12332439678284182, "grad_norm": 0.42262008786201477, "learning_rate": 0.00019308603587761888, "loss": 1.3067, "step": 253 }, { "epoch": 0.12381184499146966, "grad_norm": 0.33090803027153015, "learning_rate": 0.0001930296855558604, "loss": 1.2928, "step": 254 }, { "epoch": 0.12429929320009749, "grad_norm": 0.2976021468639374, "learning_rate": 0.00019297311482211816, "loss": 1.2932, "step": 255 }, { "epoch": 0.12478674140872532, "grad_norm": 0.27347099781036377, "learning_rate": 0.00019291632381042328, "loss": 1.2138, "step": 256 }, { "epoch": 0.12527418961735315, "grad_norm": 0.39553186297416687, "learning_rate": 0.00019285931265532871, "loss": 1.3444, "step": 257 }, { "epoch": 0.125761637825981, "grad_norm": 0.3036799132823944, "learning_rate": 0.00019280208149190903, "loss": 1.1993, "step": 258 }, { "epoch": 0.12624908603460883, "grad_norm": 0.31273937225341797, "learning_rate": 0.00019274463045575998, "loss": 1.1879, "step": 259 }, { "epoch": 0.12673653424323666, "grad_norm": 0.3476383686065674, "learning_rate": 0.00019268695968299832, "loss": 1.3481, "step": 260 }, { "epoch": 0.12722398245186448, "grad_norm": 0.30680546164512634, "learning_rate": 0.00019262906931026138, "loss": 1.3024, "step": 261 }, { "epoch": 0.12771143066049231, "grad_norm": 0.3958556056022644, "learning_rate": 0.00019257095947470678, "loss": 1.3361, "step": 262 }, { "epoch": 0.12819887886912015, "grad_norm": 0.2787322700023651, "learning_rate": 0.00019251263031401216, "loss": 1.1156, "step": 263 }, { "epoch": 0.128686327077748, "grad_norm": 0.33875012397766113, "learning_rate": 0.00019245408196637477, "loss": 1.4094, "step": 264 }, { "epoch": 0.12917377528637583, "grad_norm": 0.32169806957244873, "learning_rate": 0.00019239531457051112, "loss": 1.2505, "step": 265 }, { "epoch": 0.12966122349500367, "grad_norm": 0.36968526244163513, "learning_rate": 0.00019233632826565677, "loss": 1.264, "step": 266 }, { "epoch": 0.13014867170363148, "grad_norm": 0.3718290328979492, "learning_rate": 0.00019227712319156592, "loss": 1.2319, "step": 267 }, { "epoch": 0.13063611991225932, "grad_norm": 0.32931840419769287, "learning_rate": 0.0001922176994885111, "loss": 1.3845, "step": 268 }, { "epoch": 0.13112356812088716, "grad_norm": 0.3489207625389099, "learning_rate": 0.00019215805729728275, "loss": 1.3742, "step": 269 }, { "epoch": 0.131611016329515, "grad_norm": 0.30691617727279663, "learning_rate": 0.0001920981967591891, "loss": 1.2871, "step": 270 }, { "epoch": 0.13209846453814283, "grad_norm": 0.4441424012184143, "learning_rate": 0.00019203811801605557, "loss": 1.194, "step": 271 }, { "epoch": 0.13258591274677065, "grad_norm": 0.33883553743362427, "learning_rate": 0.0001919778212102247, "loss": 1.2959, "step": 272 }, { "epoch": 0.13307336095539848, "grad_norm": 0.35645484924316406, "learning_rate": 0.00019191730648455556, "loss": 1.2679, "step": 273 }, { "epoch": 0.13356080916402632, "grad_norm": 0.3160024881362915, "learning_rate": 0.00019185657398242356, "loss": 1.2795, "step": 274 }, { "epoch": 0.13404825737265416, "grad_norm": 0.30766648054122925, "learning_rate": 0.00019179562384772012, "loss": 1.1325, "step": 275 }, { "epoch": 0.134535705581282, "grad_norm": 0.33924242854118347, "learning_rate": 0.00019173445622485224, "loss": 1.3172, "step": 276 }, { "epoch": 0.1350231537899098, "grad_norm": 0.30728015303611755, "learning_rate": 0.00019167307125874227, "loss": 1.3195, "step": 277 }, { "epoch": 0.13551060199853765, "grad_norm": 0.34279197454452515, "learning_rate": 0.00019161146909482747, "loss": 1.3398, "step": 278 }, { "epoch": 0.1359980502071655, "grad_norm": 0.27993321418762207, "learning_rate": 0.00019154964987905964, "loss": 1.1722, "step": 279 }, { "epoch": 0.13648549841579333, "grad_norm": 0.297305166721344, "learning_rate": 0.0001914876137579049, "loss": 1.2413, "step": 280 }, { "epoch": 0.13697294662442117, "grad_norm": 0.3172423243522644, "learning_rate": 0.00019142536087834335, "loss": 1.2392, "step": 281 }, { "epoch": 0.13746039483304898, "grad_norm": 0.39218711853027344, "learning_rate": 0.00019136289138786845, "loss": 1.1854, "step": 282 }, { "epoch": 0.13794784304167682, "grad_norm": 0.3725161850452423, "learning_rate": 0.00019130020543448704, "loss": 1.3279, "step": 283 }, { "epoch": 0.13843529125030465, "grad_norm": 0.3030851483345032, "learning_rate": 0.00019123730316671872, "loss": 1.2448, "step": 284 }, { "epoch": 0.1389227394589325, "grad_norm": 0.29124540090560913, "learning_rate": 0.00019117418473359573, "loss": 1.1935, "step": 285 }, { "epoch": 0.13941018766756033, "grad_norm": 0.3250710964202881, "learning_rate": 0.00019111085028466224, "loss": 1.2645, "step": 286 }, { "epoch": 0.13989763587618814, "grad_norm": 0.3330450654029846, "learning_rate": 0.0001910472999699744, "loss": 1.1181, "step": 287 }, { "epoch": 0.14038508408481598, "grad_norm": 0.2867342233657837, "learning_rate": 0.0001909835339400998, "loss": 1.1891, "step": 288 }, { "epoch": 0.14087253229344382, "grad_norm": 0.3709441125392914, "learning_rate": 0.000190919552346117, "loss": 1.3854, "step": 289 }, { "epoch": 0.14135998050207166, "grad_norm": 0.3034871816635132, "learning_rate": 0.00019085535533961537, "loss": 1.2462, "step": 290 }, { "epoch": 0.1418474287106995, "grad_norm": 0.29863473773002625, "learning_rate": 0.00019079094307269468, "loss": 1.2237, "step": 291 }, { "epoch": 0.1423348769193273, "grad_norm": 0.5280107259750366, "learning_rate": 0.00019072631569796463, "loss": 1.2578, "step": 292 }, { "epoch": 0.14282232512795515, "grad_norm": 0.3158257007598877, "learning_rate": 0.00019066147336854457, "loss": 1.1757, "step": 293 }, { "epoch": 0.14330977333658299, "grad_norm": 0.34107086062431335, "learning_rate": 0.0001905964162380632, "loss": 1.1711, "step": 294 }, { "epoch": 0.14379722154521082, "grad_norm": 0.3253112733364105, "learning_rate": 0.0001905311444606581, "loss": 1.2229, "step": 295 }, { "epoch": 0.14428466975383866, "grad_norm": 0.28953564167022705, "learning_rate": 0.00019046565819097545, "loss": 1.2001, "step": 296 }, { "epoch": 0.1447721179624665, "grad_norm": 0.2937741279602051, "learning_rate": 0.0001903999575841695, "loss": 1.1535, "step": 297 }, { "epoch": 0.1452595661710943, "grad_norm": 0.3405333459377289, "learning_rate": 0.00019033404279590244, "loss": 1.2373, "step": 298 }, { "epoch": 0.14574701437972215, "grad_norm": 0.33529722690582275, "learning_rate": 0.00019026791398234392, "loss": 1.1769, "step": 299 }, { "epoch": 0.14623446258835, "grad_norm": 0.3445146381855011, "learning_rate": 0.00019020157130017053, "loss": 1.2635, "step": 300 }, { "epoch": 0.14672191079697783, "grad_norm": 0.30162152647972107, "learning_rate": 0.00019013501490656578, "loss": 1.2069, "step": 301 }, { "epoch": 0.14720935900560567, "grad_norm": 0.3126993477344513, "learning_rate": 0.00019006824495921936, "loss": 1.1025, "step": 302 }, { "epoch": 0.14769680721423348, "grad_norm": 0.35758090019226074, "learning_rate": 0.0001900012616163269, "loss": 1.1173, "step": 303 }, { "epoch": 0.14818425542286132, "grad_norm": 0.3262757658958435, "learning_rate": 0.00018993406503658983, "loss": 1.1772, "step": 304 }, { "epoch": 0.14867170363148915, "grad_norm": 0.3326362371444702, "learning_rate": 0.00018986665537921456, "loss": 1.187, "step": 305 }, { "epoch": 0.149159151840117, "grad_norm": 0.375229150056839, "learning_rate": 0.0001897990328039125, "loss": 1.2361, "step": 306 }, { "epoch": 0.14964660004874483, "grad_norm": 0.35128822922706604, "learning_rate": 0.0001897311974708994, "loss": 1.1719, "step": 307 }, { "epoch": 0.15013404825737264, "grad_norm": 0.2904835641384125, "learning_rate": 0.00018966314954089517, "loss": 1.2435, "step": 308 }, { "epoch": 0.15062149646600048, "grad_norm": 0.42316755652427673, "learning_rate": 0.0001895948891751234, "loss": 1.2499, "step": 309 }, { "epoch": 0.15110894467462832, "grad_norm": 0.30534741282463074, "learning_rate": 0.00018952641653531096, "loss": 1.1374, "step": 310 }, { "epoch": 0.15159639288325616, "grad_norm": 0.42398756742477417, "learning_rate": 0.0001894577317836877, "loss": 1.2051, "step": 311 }, { "epoch": 0.152083841091884, "grad_norm": 0.3085149824619293, "learning_rate": 0.00018938883508298605, "loss": 1.289, "step": 312 }, { "epoch": 0.1525712893005118, "grad_norm": 0.3194994032382965, "learning_rate": 0.00018931972659644045, "loss": 1.355, "step": 313 }, { "epoch": 0.15305873750913965, "grad_norm": 0.3154197335243225, "learning_rate": 0.00018925040648778732, "loss": 1.1218, "step": 314 }, { "epoch": 0.15354618571776749, "grad_norm": 0.3922419548034668, "learning_rate": 0.00018918087492126432, "loss": 1.206, "step": 315 }, { "epoch": 0.15403363392639532, "grad_norm": 0.37550610303878784, "learning_rate": 0.0001891111320616102, "loss": 1.2382, "step": 316 }, { "epoch": 0.15452108213502316, "grad_norm": 0.300519198179245, "learning_rate": 0.00018904117807406424, "loss": 1.0981, "step": 317 }, { "epoch": 0.15500853034365097, "grad_norm": 0.32601064443588257, "learning_rate": 0.000188971013124366, "loss": 1.1217, "step": 318 }, { "epoch": 0.1554959785522788, "grad_norm": 0.37468162178993225, "learning_rate": 0.00018890063737875482, "loss": 1.2628, "step": 319 }, { "epoch": 0.15598342676090665, "grad_norm": 0.4266589879989624, "learning_rate": 0.00018883005100396957, "loss": 1.2281, "step": 320 }, { "epoch": 0.1564708749695345, "grad_norm": 0.33140459656715393, "learning_rate": 0.00018875925416724794, "loss": 1.301, "step": 321 }, { "epoch": 0.15695832317816233, "grad_norm": 0.28100690245628357, "learning_rate": 0.00018868824703632657, "loss": 1.3174, "step": 322 }, { "epoch": 0.15744577138679014, "grad_norm": 0.2905332148075104, "learning_rate": 0.00018861702977944006, "loss": 1.2159, "step": 323 }, { "epoch": 0.15793321959541798, "grad_norm": 0.3181256353855133, "learning_rate": 0.000188545602565321, "loss": 1.3239, "step": 324 }, { "epoch": 0.15842066780404582, "grad_norm": 0.31099027395248413, "learning_rate": 0.0001884739655631994, "loss": 1.2747, "step": 325 }, { "epoch": 0.15890811601267366, "grad_norm": 0.3274545967578888, "learning_rate": 0.00018840211894280228, "loss": 1.1979, "step": 326 }, { "epoch": 0.1593955642213015, "grad_norm": 0.39264824986457825, "learning_rate": 0.0001883300628743534, "loss": 1.1653, "step": 327 }, { "epoch": 0.15988301242992933, "grad_norm": 0.32801032066345215, "learning_rate": 0.00018825779752857258, "loss": 1.1604, "step": 328 }, { "epoch": 0.16037046063855714, "grad_norm": 0.30960729718208313, "learning_rate": 0.00018818532307667566, "loss": 1.2533, "step": 329 }, { "epoch": 0.16085790884718498, "grad_norm": 0.3749324083328247, "learning_rate": 0.00018811263969037377, "loss": 1.2268, "step": 330 }, { "epoch": 0.16134535705581282, "grad_norm": 0.30750614404678345, "learning_rate": 0.0001880397475418732, "loss": 1.2403, "step": 331 }, { "epoch": 0.16183280526444066, "grad_norm": 0.3399461507797241, "learning_rate": 0.00018796664680387467, "loss": 1.2749, "step": 332 }, { "epoch": 0.1623202534730685, "grad_norm": 0.35204702615737915, "learning_rate": 0.0001878933376495733, "loss": 1.2544, "step": 333 }, { "epoch": 0.1628077016816963, "grad_norm": 0.3700251579284668, "learning_rate": 0.0001878198202526578, "loss": 1.315, "step": 334 }, { "epoch": 0.16329514989032415, "grad_norm": 0.34206223487854004, "learning_rate": 0.00018774609478731046, "loss": 1.1894, "step": 335 }, { "epoch": 0.163782598098952, "grad_norm": 0.3301335871219635, "learning_rate": 0.00018767216142820642, "loss": 1.1981, "step": 336 }, { "epoch": 0.16427004630757983, "grad_norm": 0.35820314288139343, "learning_rate": 0.0001875980203505134, "loss": 1.2314, "step": 337 }, { "epoch": 0.16475749451620766, "grad_norm": 0.4263235926628113, "learning_rate": 0.0001875236717298913, "loss": 1.3417, "step": 338 }, { "epoch": 0.16524494272483548, "grad_norm": 0.3576142191886902, "learning_rate": 0.00018744911574249161, "loss": 1.1498, "step": 339 }, { "epoch": 0.1657323909334633, "grad_norm": 0.3001526892185211, "learning_rate": 0.00018737435256495734, "loss": 1.177, "step": 340 }, { "epoch": 0.16621983914209115, "grad_norm": 0.30892422795295715, "learning_rate": 0.00018729938237442223, "loss": 1.0733, "step": 341 }, { "epoch": 0.166707287350719, "grad_norm": 0.3621678948402405, "learning_rate": 0.00018722420534851052, "loss": 1.2447, "step": 342 }, { "epoch": 0.16719473555934683, "grad_norm": 0.2934684157371521, "learning_rate": 0.00018714882166533656, "loss": 1.1664, "step": 343 }, { "epoch": 0.16768218376797464, "grad_norm": 0.3934597671031952, "learning_rate": 0.0001870732315035042, "loss": 1.1904, "step": 344 }, { "epoch": 0.16816963197660248, "grad_norm": 0.35083746910095215, "learning_rate": 0.00018699743504210664, "loss": 1.2573, "step": 345 }, { "epoch": 0.16865708018523032, "grad_norm": 0.3115769922733307, "learning_rate": 0.0001869214324607257, "loss": 1.0691, "step": 346 }, { "epoch": 0.16914452839385816, "grad_norm": 0.3621445298194885, "learning_rate": 0.00018684522393943177, "loss": 1.2203, "step": 347 }, { "epoch": 0.169631976602486, "grad_norm": 0.34863755106925964, "learning_rate": 0.00018676880965878291, "loss": 1.2223, "step": 348 }, { "epoch": 0.1701194248111138, "grad_norm": 0.3783847391605377, "learning_rate": 0.00018669218979982489, "loss": 1.2681, "step": 349 }, { "epoch": 0.17060687301974164, "grad_norm": 0.311643123626709, "learning_rate": 0.00018661536454409042, "loss": 1.2847, "step": 350 }, { "epoch": 0.17109432122836948, "grad_norm": 0.38355201482772827, "learning_rate": 0.00018653833407359893, "loss": 1.191, "step": 351 }, { "epoch": 0.17158176943699732, "grad_norm": 0.2972296476364136, "learning_rate": 0.00018646109857085597, "loss": 1.2257, "step": 352 }, { "epoch": 0.17206921764562516, "grad_norm": 0.3391678035259247, "learning_rate": 0.00018638365821885297, "loss": 1.2264, "step": 353 }, { "epoch": 0.17255666585425297, "grad_norm": 0.3344445824623108, "learning_rate": 0.00018630601320106664, "loss": 1.0415, "step": 354 }, { "epoch": 0.1730441140628808, "grad_norm": 0.3566620945930481, "learning_rate": 0.0001862281637014586, "loss": 1.1043, "step": 355 }, { "epoch": 0.17353156227150865, "grad_norm": 0.4023699462413788, "learning_rate": 0.0001861501099044749, "loss": 1.361, "step": 356 }, { "epoch": 0.1740190104801365, "grad_norm": 0.3327338695526123, "learning_rate": 0.00018607185199504578, "loss": 1.1473, "step": 357 }, { "epoch": 0.17450645868876433, "grad_norm": 0.3657204806804657, "learning_rate": 0.00018599339015858485, "loss": 1.1703, "step": 358 }, { "epoch": 0.17499390689739217, "grad_norm": 0.3360118865966797, "learning_rate": 0.00018591472458098912, "loss": 1.1784, "step": 359 }, { "epoch": 0.17548135510601998, "grad_norm": 0.38200175762176514, "learning_rate": 0.0001858358554486381, "loss": 1.3757, "step": 360 }, { "epoch": 0.17596880331464781, "grad_norm": 0.3309694528579712, "learning_rate": 0.00018575678294839373, "loss": 1.1339, "step": 361 }, { "epoch": 0.17645625152327565, "grad_norm": 0.34614065289497375, "learning_rate": 0.00018567750726759967, "loss": 1.2028, "step": 362 }, { "epoch": 0.1769436997319035, "grad_norm": 0.30191949009895325, "learning_rate": 0.00018559802859408108, "loss": 1.1361, "step": 363 }, { "epoch": 0.17743114794053133, "grad_norm": 0.3177981674671173, "learning_rate": 0.00018551834711614396, "loss": 1.3001, "step": 364 }, { "epoch": 0.17791859614915914, "grad_norm": 0.31745445728302, "learning_rate": 0.00018543846302257485, "loss": 1.1994, "step": 365 }, { "epoch": 0.17840604435778698, "grad_norm": 0.3130457401275635, "learning_rate": 0.00018535837650264037, "loss": 1.0229, "step": 366 }, { "epoch": 0.17889349256641482, "grad_norm": 0.3922497034072876, "learning_rate": 0.0001852780877460867, "loss": 1.2234, "step": 367 }, { "epoch": 0.17938094077504266, "grad_norm": 0.34110864996910095, "learning_rate": 0.00018519759694313916, "loss": 1.3665, "step": 368 }, { "epoch": 0.1798683889836705, "grad_norm": 0.3519771695137024, "learning_rate": 0.00018511690428450183, "loss": 1.2674, "step": 369 }, { "epoch": 0.1803558371922983, "grad_norm": 0.3103514611721039, "learning_rate": 0.000185036009961357, "loss": 1.1153, "step": 370 }, { "epoch": 0.18084328540092615, "grad_norm": 0.2760174572467804, "learning_rate": 0.00018495491416536478, "loss": 1.1707, "step": 371 }, { "epoch": 0.18133073360955398, "grad_norm": 0.33489930629730225, "learning_rate": 0.0001848736170886626, "loss": 1.1774, "step": 372 }, { "epoch": 0.18181818181818182, "grad_norm": 0.34386131167411804, "learning_rate": 0.00018479211892386474, "loss": 1.2538, "step": 373 }, { "epoch": 0.18230563002680966, "grad_norm": 0.37152737379074097, "learning_rate": 0.000184710419864062, "loss": 1.2242, "step": 374 }, { "epoch": 0.18279307823543747, "grad_norm": 0.29477307200431824, "learning_rate": 0.00018462852010282108, "loss": 1.3295, "step": 375 }, { "epoch": 0.1832805264440653, "grad_norm": 0.35536712408065796, "learning_rate": 0.00018454641983418427, "loss": 1.1886, "step": 376 }, { "epoch": 0.18376797465269315, "grad_norm": 0.30594703555107117, "learning_rate": 0.0001844641192526688, "loss": 1.0854, "step": 377 }, { "epoch": 0.184255422861321, "grad_norm": 0.27677103877067566, "learning_rate": 0.0001843816185532666, "loss": 1.1368, "step": 378 }, { "epoch": 0.18474287106994883, "grad_norm": 0.32766619324684143, "learning_rate": 0.00018429891793144375, "loss": 1.1257, "step": 379 }, { "epoch": 0.18523031927857664, "grad_norm": 0.35552579164505005, "learning_rate": 0.00018421601758313982, "loss": 1.2394, "step": 380 }, { "epoch": 0.18571776748720448, "grad_norm": 0.34337764978408813, "learning_rate": 0.0001841329177047678, "loss": 1.2604, "step": 381 }, { "epoch": 0.18620521569583232, "grad_norm": 0.2597070336341858, "learning_rate": 0.0001840496184932133, "loss": 1.1631, "step": 382 }, { "epoch": 0.18669266390446015, "grad_norm": 0.3200598359107971, "learning_rate": 0.00018396612014583423, "loss": 1.1131, "step": 383 }, { "epoch": 0.187180112113088, "grad_norm": 0.3345329761505127, "learning_rate": 0.0001838824228604603, "loss": 1.1062, "step": 384 }, { "epoch": 0.1876675603217158, "grad_norm": 0.3126945495605469, "learning_rate": 0.00018379852683539263, "loss": 1.1057, "step": 385 }, { "epoch": 0.18815500853034364, "grad_norm": 0.38702392578125, "learning_rate": 0.00018371443226940305, "loss": 1.2012, "step": 386 }, { "epoch": 0.18864245673897148, "grad_norm": 0.3303517997264862, "learning_rate": 0.00018363013936173393, "loss": 1.1981, "step": 387 }, { "epoch": 0.18912990494759932, "grad_norm": 0.3743020296096802, "learning_rate": 0.00018354564831209747, "loss": 1.251, "step": 388 }, { "epoch": 0.18961735315622716, "grad_norm": 0.31129929423332214, "learning_rate": 0.0001834609593206754, "loss": 1.1852, "step": 389 }, { "epoch": 0.190104801364855, "grad_norm": 0.31025078892707825, "learning_rate": 0.0001833760725881184, "loss": 1.1386, "step": 390 }, { "epoch": 0.1905922495734828, "grad_norm": 0.3130515515804291, "learning_rate": 0.00018329098831554557, "loss": 1.2102, "step": 391 }, { "epoch": 0.19107969778211065, "grad_norm": 0.28376305103302, "learning_rate": 0.0001832057067045442, "loss": 1.1814, "step": 392 }, { "epoch": 0.19156714599073849, "grad_norm": 0.32261911034584045, "learning_rate": 0.00018312022795716895, "loss": 1.3395, "step": 393 }, { "epoch": 0.19205459419936632, "grad_norm": 0.3088441491127014, "learning_rate": 0.00018303455227594166, "loss": 1.2452, "step": 394 }, { "epoch": 0.19254204240799416, "grad_norm": 0.35038337111473083, "learning_rate": 0.00018294867986385076, "loss": 1.182, "step": 395 }, { "epoch": 0.19302949061662197, "grad_norm": 0.3092940151691437, "learning_rate": 0.00018286261092435068, "loss": 1.3214, "step": 396 }, { "epoch": 0.1935169388252498, "grad_norm": 0.30770185589790344, "learning_rate": 0.00018277634566136165, "loss": 1.2192, "step": 397 }, { "epoch": 0.19400438703387765, "grad_norm": 0.3027122914791107, "learning_rate": 0.00018268988427926885, "loss": 1.1351, "step": 398 }, { "epoch": 0.1944918352425055, "grad_norm": 0.27010977268218994, "learning_rate": 0.00018260322698292224, "loss": 1.1809, "step": 399 }, { "epoch": 0.19497928345113333, "grad_norm": 0.33920818567276, "learning_rate": 0.00018251637397763597, "loss": 1.2581, "step": 400 }, { "epoch": 0.19546673165976114, "grad_norm": 0.25426357984542847, "learning_rate": 0.00018242932546918777, "loss": 1.1661, "step": 401 }, { "epoch": 0.19595417986838898, "grad_norm": 0.27038267254829407, "learning_rate": 0.0001823420816638187, "loss": 1.2815, "step": 402 }, { "epoch": 0.19644162807701682, "grad_norm": 0.37560659646987915, "learning_rate": 0.00018225464276823238, "loss": 1.2335, "step": 403 }, { "epoch": 0.19692907628564466, "grad_norm": 0.35653063654899597, "learning_rate": 0.00018216700898959477, "loss": 1.1732, "step": 404 }, { "epoch": 0.1974165244942725, "grad_norm": 0.3689841330051422, "learning_rate": 0.00018207918053553355, "loss": 1.2544, "step": 405 }, { "epoch": 0.1979039727029003, "grad_norm": 0.3493061065673828, "learning_rate": 0.00018199115761413753, "loss": 1.2338, "step": 406 }, { "epoch": 0.19839142091152814, "grad_norm": 0.42514804005622864, "learning_rate": 0.0001819029404339564, "loss": 1.2576, "step": 407 }, { "epoch": 0.19887886912015598, "grad_norm": 0.33240950107574463, "learning_rate": 0.00018181452920400007, "loss": 1.1487, "step": 408 }, { "epoch": 0.19936631732878382, "grad_norm": 0.37072715163230896, "learning_rate": 0.0001817259241337381, "loss": 1.3214, "step": 409 }, { "epoch": 0.19985376553741166, "grad_norm": 0.3414487838745117, "learning_rate": 0.00018163712543309944, "loss": 1.2432, "step": 410 }, { "epoch": 0.20034121374603947, "grad_norm": 0.34911903738975525, "learning_rate": 0.00018154813331247173, "loss": 1.2067, "step": 411 }, { "epoch": 0.2008286619546673, "grad_norm": 0.3342655301094055, "learning_rate": 0.00018145894798270092, "loss": 1.1907, "step": 412 }, { "epoch": 0.20131611016329515, "grad_norm": 0.32328155636787415, "learning_rate": 0.00018136956965509064, "loss": 1.2565, "step": 413 }, { "epoch": 0.201803558371923, "grad_norm": 0.33764177560806274, "learning_rate": 0.00018127999854140186, "loss": 1.186, "step": 414 }, { "epoch": 0.20229100658055083, "grad_norm": 0.3673051595687866, "learning_rate": 0.00018119023485385234, "loss": 1.1403, "step": 415 }, { "epoch": 0.20277845478917864, "grad_norm": 0.31863540410995483, "learning_rate": 0.000181100278805116, "loss": 1.0823, "step": 416 }, { "epoch": 0.20326590299780647, "grad_norm": 0.31400150060653687, "learning_rate": 0.00018101013060832255, "loss": 1.1745, "step": 417 }, { "epoch": 0.2037533512064343, "grad_norm": 0.3357588052749634, "learning_rate": 0.00018091979047705703, "loss": 1.3198, "step": 418 }, { "epoch": 0.20424079941506215, "grad_norm": 0.33937594294548035, "learning_rate": 0.00018082925862535908, "loss": 1.092, "step": 419 }, { "epoch": 0.20472824762369, "grad_norm": 0.34498557448387146, "learning_rate": 0.0001807385352677227, "loss": 1.2231, "step": 420 }, { "epoch": 0.20521569583231783, "grad_norm": 0.3494696021080017, "learning_rate": 0.00018064762061909554, "loss": 1.199, "step": 421 }, { "epoch": 0.20570314404094564, "grad_norm": 0.3133601248264313, "learning_rate": 0.00018055651489487853, "loss": 1.1441, "step": 422 }, { "epoch": 0.20619059224957348, "grad_norm": 0.3035077452659607, "learning_rate": 0.0001804652183109253, "loss": 1.1614, "step": 423 }, { "epoch": 0.20667804045820132, "grad_norm": 0.34320294857025146, "learning_rate": 0.0001803737310835416, "loss": 1.2356, "step": 424 }, { "epoch": 0.20716548866682916, "grad_norm": 0.38479337096214294, "learning_rate": 0.00018028205342948494, "loss": 1.1434, "step": 425 }, { "epoch": 0.207652936875457, "grad_norm": 0.30601903796195984, "learning_rate": 0.000180190185565964, "loss": 1.149, "step": 426 }, { "epoch": 0.2081403850840848, "grad_norm": 0.3483642637729645, "learning_rate": 0.00018009812771063808, "loss": 1.2152, "step": 427 }, { "epoch": 0.20862783329271264, "grad_norm": 0.3802853226661682, "learning_rate": 0.00018000588008161667, "loss": 1.2896, "step": 428 }, { "epoch": 0.20911528150134048, "grad_norm": 0.29257655143737793, "learning_rate": 0.0001799134428974588, "loss": 1.2532, "step": 429 }, { "epoch": 0.20960272970996832, "grad_norm": 0.3451422452926636, "learning_rate": 0.00017982081637717273, "loss": 1.3011, "step": 430 }, { "epoch": 0.21009017791859616, "grad_norm": 0.3802796006202698, "learning_rate": 0.00017972800074021516, "loss": 1.2363, "step": 431 }, { "epoch": 0.21057762612722397, "grad_norm": 0.31327179074287415, "learning_rate": 0.00017963499620649102, "loss": 1.0632, "step": 432 }, { "epoch": 0.2110650743358518, "grad_norm": 0.3250715732574463, "learning_rate": 0.00017954180299635265, "loss": 1.369, "step": 433 }, { "epoch": 0.21155252254447965, "grad_norm": 0.334495484828949, "learning_rate": 0.00017944842133059947, "loss": 1.2512, "step": 434 }, { "epoch": 0.2120399707531075, "grad_norm": 0.35229137539863586, "learning_rate": 0.0001793548514304774, "loss": 1.2477, "step": 435 }, { "epoch": 0.21252741896173533, "grad_norm": 0.3113347291946411, "learning_rate": 0.00017926109351767836, "loss": 1.1872, "step": 436 }, { "epoch": 0.21301486717036314, "grad_norm": 0.38488292694091797, "learning_rate": 0.00017916714781433964, "loss": 1.1569, "step": 437 }, { "epoch": 0.21350231537899098, "grad_norm": 0.3689945936203003, "learning_rate": 0.0001790730145430436, "loss": 1.2452, "step": 438 }, { "epoch": 0.21398976358761881, "grad_norm": 0.3782726526260376, "learning_rate": 0.00017897869392681685, "loss": 1.2962, "step": 439 }, { "epoch": 0.21447721179624665, "grad_norm": 0.31284990906715393, "learning_rate": 0.00017888418618912993, "loss": 1.3208, "step": 440 }, { "epoch": 0.2149646600048745, "grad_norm": 0.33481279015541077, "learning_rate": 0.00017878949155389676, "loss": 1.1801, "step": 441 }, { "epoch": 0.2154521082135023, "grad_norm": 0.3809351623058319, "learning_rate": 0.00017869461024547394, "loss": 1.2612, "step": 442 }, { "epoch": 0.21593955642213014, "grad_norm": 0.3469904661178589, "learning_rate": 0.00017859954248866056, "loss": 1.2604, "step": 443 }, { "epoch": 0.21642700463075798, "grad_norm": 0.3663221299648285, "learning_rate": 0.00017850428850869725, "loss": 1.1382, "step": 444 }, { "epoch": 0.21691445283938582, "grad_norm": 0.34318575263023376, "learning_rate": 0.000178408848531266, "loss": 1.1353, "step": 445 }, { "epoch": 0.21740190104801366, "grad_norm": 0.3427807688713074, "learning_rate": 0.00017831322278248935, "loss": 1.3791, "step": 446 }, { "epoch": 0.21788934925664147, "grad_norm": 0.27582523226737976, "learning_rate": 0.00017821741148893008, "loss": 1.273, "step": 447 }, { "epoch": 0.2183767974652693, "grad_norm": 0.3410693109035492, "learning_rate": 0.00017812141487759053, "loss": 1.1782, "step": 448 }, { "epoch": 0.21886424567389715, "grad_norm": 0.41272151470184326, "learning_rate": 0.00017802523317591212, "loss": 1.3831, "step": 449 }, { "epoch": 0.21935169388252498, "grad_norm": 0.35077860951423645, "learning_rate": 0.0001779288666117748, "loss": 1.1319, "step": 450 }, { "epoch": 0.21983914209115282, "grad_norm": 0.3441184461116791, "learning_rate": 0.0001778323154134965, "loss": 1.2284, "step": 451 }, { "epoch": 0.22032659029978066, "grad_norm": 0.2734520137310028, "learning_rate": 0.00017773557980983262, "loss": 1.1776, "step": 452 }, { "epoch": 0.22081403850840847, "grad_norm": 0.2958824038505554, "learning_rate": 0.0001776386600299754, "loss": 1.1105, "step": 453 }, { "epoch": 0.2213014867170363, "grad_norm": 0.3324015438556671, "learning_rate": 0.00017754155630355354, "loss": 1.1959, "step": 454 }, { "epoch": 0.22178893492566415, "grad_norm": 0.38719967007637024, "learning_rate": 0.00017744426886063145, "loss": 1.1342, "step": 455 }, { "epoch": 0.222276383134292, "grad_norm": 0.3017652928829193, "learning_rate": 0.00017734679793170895, "loss": 1.0807, "step": 456 }, { "epoch": 0.22276383134291983, "grad_norm": 0.32308661937713623, "learning_rate": 0.00017724914374772042, "loss": 1.2144, "step": 457 }, { "epoch": 0.22325127955154764, "grad_norm": 0.49217668175697327, "learning_rate": 0.00017715130654003452, "loss": 1.22, "step": 458 }, { "epoch": 0.22373872776017548, "grad_norm": 0.4496031701564789, "learning_rate": 0.00017705328654045362, "loss": 1.1971, "step": 459 }, { "epoch": 0.22422617596880332, "grad_norm": 0.35932832956314087, "learning_rate": 0.00017695508398121298, "loss": 1.2142, "step": 460 }, { "epoch": 0.22471362417743115, "grad_norm": 0.32042956352233887, "learning_rate": 0.00017685669909498053, "loss": 1.1094, "step": 461 }, { "epoch": 0.225201072386059, "grad_norm": 0.3304695785045624, "learning_rate": 0.00017675813211485614, "loss": 1.1935, "step": 462 }, { "epoch": 0.2256885205946868, "grad_norm": 0.33314943313598633, "learning_rate": 0.0001766593832743711, "loss": 1.2314, "step": 463 }, { "epoch": 0.22617596880331464, "grad_norm": 0.32802191376686096, "learning_rate": 0.0001765604528074876, "loss": 1.3842, "step": 464 }, { "epoch": 0.22666341701194248, "grad_norm": 0.39321258664131165, "learning_rate": 0.00017646134094859815, "loss": 1.2211, "step": 465 }, { "epoch": 0.22715086522057032, "grad_norm": 0.3253491222858429, "learning_rate": 0.00017636204793252498, "loss": 1.1863, "step": 466 }, { "epoch": 0.22763831342919816, "grad_norm": 0.3173609972000122, "learning_rate": 0.0001762625739945196, "loss": 1.1983, "step": 467 }, { "epoch": 0.22812576163782597, "grad_norm": 0.3017527163028717, "learning_rate": 0.00017616291937026213, "loss": 1.1544, "step": 468 }, { "epoch": 0.2286132098464538, "grad_norm": 0.2875792682170868, "learning_rate": 0.00017606308429586078, "loss": 1.1805, "step": 469 }, { "epoch": 0.22910065805508165, "grad_norm": 0.37087053060531616, "learning_rate": 0.0001759630690078513, "loss": 1.1787, "step": 470 }, { "epoch": 0.22958810626370948, "grad_norm": 0.3738652169704437, "learning_rate": 0.00017586287374319644, "loss": 1.2943, "step": 471 }, { "epoch": 0.23007555447233732, "grad_norm": 0.34178316593170166, "learning_rate": 0.0001757624987392853, "loss": 1.1116, "step": 472 }, { "epoch": 0.23056300268096513, "grad_norm": 0.3692106306552887, "learning_rate": 0.0001756619442339329, "loss": 1.2682, "step": 473 }, { "epoch": 0.23105045088959297, "grad_norm": 0.3724597990512848, "learning_rate": 0.00017556121046537947, "loss": 1.2064, "step": 474 }, { "epoch": 0.2315378990982208, "grad_norm": 0.35810819268226624, "learning_rate": 0.00017546029767229011, "loss": 1.1982, "step": 475 }, { "epoch": 0.23202534730684865, "grad_norm": 0.3127930760383606, "learning_rate": 0.0001753592060937539, "loss": 1.2766, "step": 476 }, { "epoch": 0.2325127955154765, "grad_norm": 0.29817426204681396, "learning_rate": 0.00017525793596928356, "loss": 1.0259, "step": 477 }, { "epoch": 0.2330002437241043, "grad_norm": 0.4083329439163208, "learning_rate": 0.00017515648753881492, "loss": 1.2894, "step": 478 }, { "epoch": 0.23348769193273214, "grad_norm": 0.3159523904323578, "learning_rate": 0.00017505486104270616, "loss": 1.0282, "step": 479 }, { "epoch": 0.23397514014135998, "grad_norm": 0.3796326518058777, "learning_rate": 0.00017495305672173746, "loss": 1.278, "step": 480 }, { "epoch": 0.23446258834998782, "grad_norm": 0.3640994727611542, "learning_rate": 0.00017485107481711012, "loss": 1.224, "step": 481 }, { "epoch": 0.23495003655861565, "grad_norm": 0.36146458983421326, "learning_rate": 0.0001747489155704464, "loss": 1.2988, "step": 482 }, { "epoch": 0.2354374847672435, "grad_norm": 0.361672967672348, "learning_rate": 0.00017464657922378857, "loss": 1.2746, "step": 483 }, { "epoch": 0.2359249329758713, "grad_norm": 0.3305426239967346, "learning_rate": 0.00017454406601959862, "loss": 1.1935, "step": 484 }, { "epoch": 0.23641238118449914, "grad_norm": 0.3048730194568634, "learning_rate": 0.00017444137620075748, "loss": 1.2769, "step": 485 }, { "epoch": 0.23689982939312698, "grad_norm": 0.3524467647075653, "learning_rate": 0.00017433851001056453, "loss": 1.2119, "step": 486 }, { "epoch": 0.23738727760175482, "grad_norm": 0.36313095688819885, "learning_rate": 0.00017423546769273707, "loss": 1.1705, "step": 487 }, { "epoch": 0.23787472581038266, "grad_norm": 0.3175797164440155, "learning_rate": 0.00017413224949140962, "loss": 1.2086, "step": 488 }, { "epoch": 0.23836217401901047, "grad_norm": 0.382396936416626, "learning_rate": 0.00017402885565113353, "loss": 1.1239, "step": 489 }, { "epoch": 0.2388496222276383, "grad_norm": 0.4053979814052582, "learning_rate": 0.00017392528641687615, "loss": 1.242, "step": 490 }, { "epoch": 0.23933707043626615, "grad_norm": 0.36382076144218445, "learning_rate": 0.00017382154203402048, "loss": 1.2455, "step": 491 }, { "epoch": 0.23982451864489399, "grad_norm": 0.29931992292404175, "learning_rate": 0.00017371762274836442, "loss": 1.1521, "step": 492 }, { "epoch": 0.24031196685352182, "grad_norm": 0.30255070328712463, "learning_rate": 0.00017361352880612037, "loss": 1.0897, "step": 493 }, { "epoch": 0.24079941506214964, "grad_norm": 0.3097134828567505, "learning_rate": 0.0001735092604539144, "loss": 1.2051, "step": 494 }, { "epoch": 0.24128686327077747, "grad_norm": 0.27947649359703064, "learning_rate": 0.00017340481793878592, "loss": 1.1624, "step": 495 }, { "epoch": 0.2417743114794053, "grad_norm": 0.3660297691822052, "learning_rate": 0.00017330020150818697, "loss": 1.2286, "step": 496 }, { "epoch": 0.24226175968803315, "grad_norm": 0.2641275227069855, "learning_rate": 0.0001731954114099815, "loss": 1.1223, "step": 497 }, { "epoch": 0.242749207896661, "grad_norm": 0.2959223687648773, "learning_rate": 0.0001730904478924451, "loss": 1.3362, "step": 498 }, { "epoch": 0.2432366561052888, "grad_norm": 0.2990069091320038, "learning_rate": 0.00017298531120426414, "loss": 1.259, "step": 499 }, { "epoch": 0.24372410431391664, "grad_norm": 0.40709275007247925, "learning_rate": 0.0001728800015945353, "loss": 1.2636, "step": 500 }, { "epoch": 0.24421155252254448, "grad_norm": 0.3209191560745239, "learning_rate": 0.00017277451931276496, "loss": 1.195, "step": 501 }, { "epoch": 0.24469900073117232, "grad_norm": 0.42993372678756714, "learning_rate": 0.00017266886460886864, "loss": 1.2563, "step": 502 }, { "epoch": 0.24518644893980016, "grad_norm": 0.3334101140499115, "learning_rate": 0.0001725630377331703, "loss": 1.102, "step": 503 }, { "epoch": 0.24567389714842797, "grad_norm": 0.3589898943901062, "learning_rate": 0.0001724570389364019, "loss": 1.225, "step": 504 }, { "epoch": 0.2461613453570558, "grad_norm": 0.37592238187789917, "learning_rate": 0.00017235086846970264, "loss": 1.1681, "step": 505 }, { "epoch": 0.24664879356568364, "grad_norm": 0.36380186676979065, "learning_rate": 0.0001722445265846185, "loss": 1.0927, "step": 506 }, { "epoch": 0.24713624177431148, "grad_norm": 0.3600970506668091, "learning_rate": 0.00017213801353310163, "loss": 1.2181, "step": 507 }, { "epoch": 0.24762368998293932, "grad_norm": 0.29900628328323364, "learning_rate": 0.00017203132956750963, "loss": 1.1451, "step": 508 }, { "epoch": 0.24811113819156713, "grad_norm": 0.3672201931476593, "learning_rate": 0.00017192447494060513, "loss": 1.2164, "step": 509 }, { "epoch": 0.24859858640019497, "grad_norm": 0.3158378601074219, "learning_rate": 0.00017181744990555506, "loss": 1.1199, "step": 510 }, { "epoch": 0.2490860346088228, "grad_norm": 0.3321176767349243, "learning_rate": 0.0001717102547159301, "loss": 1.2702, "step": 511 }, { "epoch": 0.24957348281745065, "grad_norm": 0.3186010420322418, "learning_rate": 0.00017160288962570402, "loss": 1.1354, "step": 512 }, { "epoch": 0.25006093102607846, "grad_norm": 0.3101379871368408, "learning_rate": 0.0001714953548892533, "loss": 1.1087, "step": 513 }, { "epoch": 0.2505483792347063, "grad_norm": 0.3541036546230316, "learning_rate": 0.00017138765076135608, "loss": 1.2284, "step": 514 }, { "epoch": 0.25103582744333414, "grad_norm": 0.30092599987983704, "learning_rate": 0.00017127977749719207, "loss": 1.16, "step": 515 }, { "epoch": 0.251523275651962, "grad_norm": 0.3944350481033325, "learning_rate": 0.00017117173535234162, "loss": 1.2925, "step": 516 }, { "epoch": 0.2520107238605898, "grad_norm": 0.33906981348991394, "learning_rate": 0.00017106352458278522, "loss": 1.1652, "step": 517 }, { "epoch": 0.25249817206921765, "grad_norm": 0.3461517095565796, "learning_rate": 0.00017095514544490282, "loss": 1.0418, "step": 518 }, { "epoch": 0.2529856202778455, "grad_norm": 0.3587675988674164, "learning_rate": 0.00017084659819547338, "loss": 1.2534, "step": 519 }, { "epoch": 0.25347306848647333, "grad_norm": 0.32428374886512756, "learning_rate": 0.00017073788309167408, "loss": 1.16, "step": 520 }, { "epoch": 0.25396051669510117, "grad_norm": 0.3599848449230194, "learning_rate": 0.00017062900039107977, "loss": 1.1422, "step": 521 }, { "epoch": 0.25444796490372895, "grad_norm": 0.3633671700954437, "learning_rate": 0.00017051995035166252, "loss": 1.1323, "step": 522 }, { "epoch": 0.2549354131123568, "grad_norm": 0.38934600353240967, "learning_rate": 0.0001704107332317907, "loss": 1.2117, "step": 523 }, { "epoch": 0.25542286132098463, "grad_norm": 0.3670594096183777, "learning_rate": 0.0001703013492902287, "loss": 1.1839, "step": 524 }, { "epoch": 0.25591030952961247, "grad_norm": 0.40726083517074585, "learning_rate": 0.00017019179878613596, "loss": 1.2079, "step": 525 }, { "epoch": 0.2563977577382403, "grad_norm": 0.32286155223846436, "learning_rate": 0.00017008208197906674, "loss": 1.3053, "step": 526 }, { "epoch": 0.25688520594686814, "grad_norm": 0.33716845512390137, "learning_rate": 0.00016997219912896923, "loss": 1.3214, "step": 527 }, { "epoch": 0.257372654155496, "grad_norm": 0.36912742257118225, "learning_rate": 0.00016986215049618505, "loss": 1.2605, "step": 528 }, { "epoch": 0.2578601023641238, "grad_norm": 0.4095950424671173, "learning_rate": 0.0001697519363414485, "loss": 1.276, "step": 529 }, { "epoch": 0.25834755057275166, "grad_norm": 0.4105255603790283, "learning_rate": 0.00016964155692588616, "loss": 1.231, "step": 530 }, { "epoch": 0.2588349987813795, "grad_norm": 0.3191453516483307, "learning_rate": 0.00016953101251101618, "loss": 1.1161, "step": 531 }, { "epoch": 0.25932244699000734, "grad_norm": 0.3601817488670349, "learning_rate": 0.00016942030335874753, "loss": 1.2283, "step": 532 }, { "epoch": 0.2598098951986351, "grad_norm": 0.3143230378627777, "learning_rate": 0.00016930942973137952, "loss": 1.1582, "step": 533 }, { "epoch": 0.26029734340726296, "grad_norm": 0.3640122711658478, "learning_rate": 0.00016919839189160122, "loss": 1.1361, "step": 534 }, { "epoch": 0.2607847916158908, "grad_norm": 0.3497964143753052, "learning_rate": 0.00016908719010249064, "loss": 1.1434, "step": 535 }, { "epoch": 0.26127223982451864, "grad_norm": 0.3731510639190674, "learning_rate": 0.00016897582462751433, "loss": 1.2552, "step": 536 }, { "epoch": 0.2617596880331465, "grad_norm": 0.3403242826461792, "learning_rate": 0.00016886429573052664, "loss": 1.2285, "step": 537 }, { "epoch": 0.2622471362417743, "grad_norm": 0.33967721462249756, "learning_rate": 0.00016875260367576903, "loss": 1.1403, "step": 538 }, { "epoch": 0.26273458445040215, "grad_norm": 0.33196625113487244, "learning_rate": 0.00016864074872786962, "loss": 1.0713, "step": 539 }, { "epoch": 0.26322203265903, "grad_norm": 0.3464784622192383, "learning_rate": 0.00016852873115184242, "loss": 0.9796, "step": 540 }, { "epoch": 0.26370948086765783, "grad_norm": 0.4036043882369995, "learning_rate": 0.00016841655121308677, "loss": 1.3277, "step": 541 }, { "epoch": 0.26419692907628567, "grad_norm": 0.3961755335330963, "learning_rate": 0.00016830420917738668, "loss": 1.3286, "step": 542 }, { "epoch": 0.26468437728491345, "grad_norm": 0.29868268966674805, "learning_rate": 0.00016819170531091017, "loss": 1.1358, "step": 543 }, { "epoch": 0.2651718254935413, "grad_norm": 0.3506234884262085, "learning_rate": 0.0001680790398802088, "loss": 1.098, "step": 544 }, { "epoch": 0.26565927370216913, "grad_norm": 0.3245921730995178, "learning_rate": 0.00016796621315221677, "loss": 1.3453, "step": 545 }, { "epoch": 0.26614672191079697, "grad_norm": 0.37823474407196045, "learning_rate": 0.00016785322539425053, "loss": 1.1506, "step": 546 }, { "epoch": 0.2666341701194248, "grad_norm": 0.3234574794769287, "learning_rate": 0.00016774007687400802, "loss": 1.1525, "step": 547 }, { "epoch": 0.26712161832805265, "grad_norm": 0.33606481552124023, "learning_rate": 0.00016762676785956807, "loss": 1.2782, "step": 548 }, { "epoch": 0.2676090665366805, "grad_norm": 0.3780474364757538, "learning_rate": 0.0001675132986193898, "loss": 1.1686, "step": 549 }, { "epoch": 0.2680965147453083, "grad_norm": 0.34344884753227234, "learning_rate": 0.0001673996694223119, "loss": 1.2374, "step": 550 }, { "epoch": 0.26858396295393616, "grad_norm": 0.3959074914455414, "learning_rate": 0.00016728588053755203, "loss": 1.2054, "step": 551 }, { "epoch": 0.269071411162564, "grad_norm": 0.4201538562774658, "learning_rate": 0.00016717193223470623, "loss": 1.1499, "step": 552 }, { "epoch": 0.2695588593711918, "grad_norm": 0.37865114212036133, "learning_rate": 0.00016705782478374827, "loss": 1.2507, "step": 553 }, { "epoch": 0.2700463075798196, "grad_norm": 0.3289918601512909, "learning_rate": 0.0001669435584550289, "loss": 1.2065, "step": 554 }, { "epoch": 0.27053375578844746, "grad_norm": 0.3060459494590759, "learning_rate": 0.0001668291335192754, "loss": 1.1876, "step": 555 }, { "epoch": 0.2710212039970753, "grad_norm": 0.3648618161678314, "learning_rate": 0.00016671455024759067, "loss": 1.1132, "step": 556 }, { "epoch": 0.27150865220570314, "grad_norm": 0.35501593351364136, "learning_rate": 0.00016659980891145298, "loss": 1.1747, "step": 557 }, { "epoch": 0.271996100414331, "grad_norm": 0.31883859634399414, "learning_rate": 0.0001664849097827149, "loss": 1.217, "step": 558 }, { "epoch": 0.2724835486229588, "grad_norm": 0.27165141701698303, "learning_rate": 0.00016636985313360289, "loss": 1.1628, "step": 559 }, { "epoch": 0.27297099683158665, "grad_norm": 0.3059232831001282, "learning_rate": 0.00016625463923671668, "loss": 1.043, "step": 560 }, { "epoch": 0.2734584450402145, "grad_norm": 0.35888320207595825, "learning_rate": 0.0001661392683650286, "loss": 1.1712, "step": 561 }, { "epoch": 0.27394589324884233, "grad_norm": 0.3877924978733063, "learning_rate": 0.0001660237407918827, "loss": 1.1422, "step": 562 }, { "epoch": 0.27443334145747017, "grad_norm": 0.3447536528110504, "learning_rate": 0.0001659080567909945, "loss": 1.0769, "step": 563 }, { "epoch": 0.27492078966609795, "grad_norm": 0.3665088415145874, "learning_rate": 0.00016579221663645007, "loss": 1.1923, "step": 564 }, { "epoch": 0.2754082378747258, "grad_norm": 0.323263943195343, "learning_rate": 0.0001656762206027054, "loss": 1.2303, "step": 565 }, { "epoch": 0.27589568608335363, "grad_norm": 0.3286653757095337, "learning_rate": 0.0001655600689645858, "loss": 1.3282, "step": 566 }, { "epoch": 0.27638313429198147, "grad_norm": 0.30817481875419617, "learning_rate": 0.00016544376199728543, "loss": 1.2157, "step": 567 }, { "epoch": 0.2768705825006093, "grad_norm": 0.31664392352104187, "learning_rate": 0.0001653272999763662, "loss": 1.1781, "step": 568 }, { "epoch": 0.27735803070923715, "grad_norm": 0.3369152247905731, "learning_rate": 0.00016521068317775753, "loss": 1.2071, "step": 569 }, { "epoch": 0.277845478917865, "grad_norm": 0.4035291075706482, "learning_rate": 0.00016509391187775557, "loss": 1.1537, "step": 570 }, { "epoch": 0.2783329271264928, "grad_norm": 0.2857488691806793, "learning_rate": 0.00016497698635302243, "loss": 1.1335, "step": 571 }, { "epoch": 0.27882037533512066, "grad_norm": 0.3045223653316498, "learning_rate": 0.0001648599068805857, "loss": 1.2276, "step": 572 }, { "epoch": 0.2793078235437485, "grad_norm": 0.3673149347305298, "learning_rate": 0.00016474267373783768, "loss": 1.3442, "step": 573 }, { "epoch": 0.2797952717523763, "grad_norm": 0.464558482170105, "learning_rate": 0.00016462528720253482, "loss": 1.0838, "step": 574 }, { "epoch": 0.2802827199610041, "grad_norm": 0.40606024861335754, "learning_rate": 0.00016450774755279678, "loss": 1.266, "step": 575 }, { "epoch": 0.28077016816963196, "grad_norm": 0.3047773241996765, "learning_rate": 0.0001643900550671063, "loss": 1.125, "step": 576 }, { "epoch": 0.2812576163782598, "grad_norm": 0.30575650930404663, "learning_rate": 0.00016427221002430798, "loss": 1.0903, "step": 577 }, { "epoch": 0.28174506458688764, "grad_norm": 0.3327069580554962, "learning_rate": 0.000164154212703608, "loss": 1.0927, "step": 578 }, { "epoch": 0.2822325127955155, "grad_norm": 0.3694591522216797, "learning_rate": 0.0001640360633845733, "loss": 1.2099, "step": 579 }, { "epoch": 0.2827199610041433, "grad_norm": 0.4236904978752136, "learning_rate": 0.0001639177623471309, "loss": 1.2688, "step": 580 }, { "epoch": 0.28320740921277116, "grad_norm": 0.4387606978416443, "learning_rate": 0.00016379930987156735, "loss": 1.2561, "step": 581 }, { "epoch": 0.283694857421399, "grad_norm": 0.32503339648246765, "learning_rate": 0.00016368070623852792, "loss": 1.2589, "step": 582 }, { "epoch": 0.28418230563002683, "grad_norm": 0.3146470785140991, "learning_rate": 0.00016356195172901613, "loss": 1.151, "step": 583 }, { "epoch": 0.2846697538386546, "grad_norm": 0.28814375400543213, "learning_rate": 0.00016344304662439284, "loss": 1.1437, "step": 584 }, { "epoch": 0.28515720204728245, "grad_norm": 0.342899888753891, "learning_rate": 0.0001633239912063757, "loss": 1.1183, "step": 585 }, { "epoch": 0.2856446502559103, "grad_norm": 0.40479201078414917, "learning_rate": 0.00016320478575703864, "loss": 1.15, "step": 586 }, { "epoch": 0.28613209846453813, "grad_norm": 0.4515096843242645, "learning_rate": 0.00016308543055881098, "loss": 1.1598, "step": 587 }, { "epoch": 0.28661954667316597, "grad_norm": 0.35229426622390747, "learning_rate": 0.00016296592589447673, "loss": 1.3097, "step": 588 }, { "epoch": 0.2871069948817938, "grad_norm": 0.3666463792324066, "learning_rate": 0.00016284627204717417, "loss": 1.2794, "step": 589 }, { "epoch": 0.28759444309042165, "grad_norm": 0.30023884773254395, "learning_rate": 0.000162726469300395, "loss": 1.1387, "step": 590 }, { "epoch": 0.2880818912990495, "grad_norm": 0.3334953784942627, "learning_rate": 0.00016260651793798366, "loss": 1.1033, "step": 591 }, { "epoch": 0.2885693395076773, "grad_norm": 0.3084891736507416, "learning_rate": 0.00016248641824413671, "loss": 1.2619, "step": 592 }, { "epoch": 0.28905678771630516, "grad_norm": 0.35241594910621643, "learning_rate": 0.00016236617050340214, "loss": 1.1398, "step": 593 }, { "epoch": 0.289544235924933, "grad_norm": 0.33038970828056335, "learning_rate": 0.00016224577500067877, "loss": 1.128, "step": 594 }, { "epoch": 0.2900316841335608, "grad_norm": 0.3409155607223511, "learning_rate": 0.00016212523202121544, "loss": 1.0931, "step": 595 }, { "epoch": 0.2905191323421886, "grad_norm": 0.3841816782951355, "learning_rate": 0.00016200454185061043, "loss": 1.0484, "step": 596 }, { "epoch": 0.29100658055081646, "grad_norm": 0.3630054295063019, "learning_rate": 0.00016188370477481073, "loss": 1.2399, "step": 597 }, { "epoch": 0.2914940287594443, "grad_norm": 0.35175657272338867, "learning_rate": 0.00016176272108011142, "loss": 1.1994, "step": 598 }, { "epoch": 0.29198147696807214, "grad_norm": 0.2972811460494995, "learning_rate": 0.00016164159105315487, "loss": 1.0144, "step": 599 }, { "epoch": 0.2924689251767, "grad_norm": 0.32659175992012024, "learning_rate": 0.0001615203149809303, "loss": 1.1463, "step": 600 }, { "epoch": 0.2929563733853278, "grad_norm": 0.3672257363796234, "learning_rate": 0.00016139889315077287, "loss": 1.0791, "step": 601 }, { "epoch": 0.29344382159395566, "grad_norm": 0.3619696795940399, "learning_rate": 0.00016127732585036303, "loss": 1.2297, "step": 602 }, { "epoch": 0.2939312698025835, "grad_norm": 0.34306833148002625, "learning_rate": 0.00016115561336772598, "loss": 1.1338, "step": 603 }, { "epoch": 0.29441871801121133, "grad_norm": 0.3267507553100586, "learning_rate": 0.00016103375599123084, "loss": 1.0824, "step": 604 }, { "epoch": 0.2949061662198391, "grad_norm": 0.3431141972541809, "learning_rate": 0.00016091175400959005, "loss": 1.181, "step": 605 }, { "epoch": 0.29539361442846696, "grad_norm": 0.4020690619945526, "learning_rate": 0.00016078960771185856, "loss": 1.1829, "step": 606 }, { "epoch": 0.2958810626370948, "grad_norm": 0.32137131690979004, "learning_rate": 0.00016066731738743344, "loss": 1.1541, "step": 607 }, { "epoch": 0.29636851084572263, "grad_norm": 0.39603304862976074, "learning_rate": 0.00016054488332605283, "loss": 1.0096, "step": 608 }, { "epoch": 0.29685595905435047, "grad_norm": 0.3227417767047882, "learning_rate": 0.00016042230581779547, "loss": 1.2699, "step": 609 }, { "epoch": 0.2973434072629783, "grad_norm": 0.391846626996994, "learning_rate": 0.00016029958515307997, "loss": 1.219, "step": 610 }, { "epoch": 0.29783085547160615, "grad_norm": 0.33323460817337036, "learning_rate": 0.00016017672162266412, "loss": 1.1751, "step": 611 }, { "epoch": 0.298318303680234, "grad_norm": 0.3665689527988434, "learning_rate": 0.0001600537155176442, "loss": 1.134, "step": 612 }, { "epoch": 0.2988057518888618, "grad_norm": 0.4079165458679199, "learning_rate": 0.00015993056712945423, "loss": 1.051, "step": 613 }, { "epoch": 0.29929320009748966, "grad_norm": 0.4210244417190552, "learning_rate": 0.00015980727674986547, "loss": 1.3286, "step": 614 }, { "epoch": 0.29978064830611745, "grad_norm": 0.35658687353134155, "learning_rate": 0.0001596838446709854, "loss": 1.1324, "step": 615 }, { "epoch": 0.3002680965147453, "grad_norm": 0.30646565556526184, "learning_rate": 0.00015956027118525743, "loss": 1.1269, "step": 616 }, { "epoch": 0.3007555447233731, "grad_norm": 0.4578656852245331, "learning_rate": 0.00015943655658545987, "loss": 1.505, "step": 617 }, { "epoch": 0.30124299293200096, "grad_norm": 0.3560977876186371, "learning_rate": 0.00015931270116470537, "loss": 1.0863, "step": 618 }, { "epoch": 0.3017304411406288, "grad_norm": 0.3431141972541809, "learning_rate": 0.00015918870521644025, "loss": 1.1088, "step": 619 }, { "epoch": 0.30221788934925664, "grad_norm": 0.364183634519577, "learning_rate": 0.00015906456903444385, "loss": 1.168, "step": 620 }, { "epoch": 0.3027053375578845, "grad_norm": 0.33767372369766235, "learning_rate": 0.00015894029291282758, "loss": 1.1933, "step": 621 }, { "epoch": 0.3031927857665123, "grad_norm": 0.3631367087364197, "learning_rate": 0.00015881587714603463, "loss": 1.0731, "step": 622 }, { "epoch": 0.30368023397514016, "grad_norm": 0.3278936445713043, "learning_rate": 0.00015869132202883885, "loss": 1.169, "step": 623 }, { "epoch": 0.304167682183768, "grad_norm": 0.38841626048088074, "learning_rate": 0.00015856662785634432, "loss": 1.2075, "step": 624 }, { "epoch": 0.30465513039239583, "grad_norm": 0.3402353823184967, "learning_rate": 0.00015844179492398465, "loss": 1.1615, "step": 625 }, { "epoch": 0.3051425786010236, "grad_norm": 0.2887095510959625, "learning_rate": 0.0001583168235275221, "loss": 1.1755, "step": 626 }, { "epoch": 0.30563002680965146, "grad_norm": 0.327481746673584, "learning_rate": 0.00015819171396304704, "loss": 1.0832, "step": 627 }, { "epoch": 0.3061174750182793, "grad_norm": 0.33721551299095154, "learning_rate": 0.00015806646652697718, "loss": 1.1645, "step": 628 }, { "epoch": 0.30660492322690713, "grad_norm": 0.36406847834587097, "learning_rate": 0.00015794108151605696, "loss": 1.2106, "step": 629 }, { "epoch": 0.30709237143553497, "grad_norm": 0.3128565847873688, "learning_rate": 0.0001578155592273566, "loss": 1.2149, "step": 630 }, { "epoch": 0.3075798196441628, "grad_norm": 0.31122851371765137, "learning_rate": 0.00015768989995827175, "loss": 1.2292, "step": 631 }, { "epoch": 0.30806726785279065, "grad_norm": 0.37615668773651123, "learning_rate": 0.00015756410400652248, "loss": 1.3363, "step": 632 }, { "epoch": 0.3085547160614185, "grad_norm": 0.3348335027694702, "learning_rate": 0.0001574381716701528, "loss": 1.1649, "step": 633 }, { "epoch": 0.3090421642700463, "grad_norm": 0.3323783278465271, "learning_rate": 0.00015731210324752972, "loss": 1.1069, "step": 634 }, { "epoch": 0.30952961247867417, "grad_norm": 0.37065181136131287, "learning_rate": 0.00015718589903734282, "loss": 1.2328, "step": 635 }, { "epoch": 0.31001706068730195, "grad_norm": 0.3175657391548157, "learning_rate": 0.0001570595593386033, "loss": 1.0801, "step": 636 }, { "epoch": 0.3105045088959298, "grad_norm": 0.30346453189849854, "learning_rate": 0.00015693308445064336, "loss": 1.1101, "step": 637 }, { "epoch": 0.3109919571045576, "grad_norm": 0.31637394428253174, "learning_rate": 0.00015680647467311557, "loss": 1.1573, "step": 638 }, { "epoch": 0.31147940531318546, "grad_norm": 0.2695266306400299, "learning_rate": 0.00015667973030599207, "loss": 1.0157, "step": 639 }, { "epoch": 0.3119668535218133, "grad_norm": 0.34872207045555115, "learning_rate": 0.00015655285164956385, "loss": 1.2239, "step": 640 }, { "epoch": 0.31245430173044114, "grad_norm": 0.3599780797958374, "learning_rate": 0.00015642583900444, "loss": 1.145, "step": 641 }, { "epoch": 0.312941749939069, "grad_norm": 0.3584708869457245, "learning_rate": 0.00015629869267154726, "loss": 1.3491, "step": 642 }, { "epoch": 0.3134291981476968, "grad_norm": 0.3707854449748993, "learning_rate": 0.0001561714129521289, "loss": 1.1459, "step": 643 }, { "epoch": 0.31391664635632466, "grad_norm": 0.3733751177787781, "learning_rate": 0.00015604400014774443, "loss": 1.2256, "step": 644 }, { "epoch": 0.3144040945649525, "grad_norm": 0.39851659536361694, "learning_rate": 0.0001559164545602684, "loss": 1.1227, "step": 645 }, { "epoch": 0.3148915427735803, "grad_norm": 0.3187284469604492, "learning_rate": 0.0001557887764918902, "loss": 1.1991, "step": 646 }, { "epoch": 0.3153789909822081, "grad_norm": 0.4330853521823883, "learning_rate": 0.00015566096624511307, "loss": 1.1385, "step": 647 }, { "epoch": 0.31586643919083596, "grad_norm": 0.3386607766151428, "learning_rate": 0.00015553302412275326, "loss": 1.1159, "step": 648 }, { "epoch": 0.3163538873994638, "grad_norm": 0.36546099185943604, "learning_rate": 0.0001554049504279396, "loss": 1.1105, "step": 649 }, { "epoch": 0.31684133560809163, "grad_norm": 0.34602367877960205, "learning_rate": 0.00015527674546411265, "loss": 1.0896, "step": 650 }, { "epoch": 0.3173287838167195, "grad_norm": 0.4234674274921417, "learning_rate": 0.00015514840953502392, "loss": 1.084, "step": 651 }, { "epoch": 0.3178162320253473, "grad_norm": 0.4006870687007904, "learning_rate": 0.0001550199429447353, "loss": 1.2597, "step": 652 }, { "epoch": 0.31830368023397515, "grad_norm": 0.2935373783111572, "learning_rate": 0.0001548913459976181, "loss": 1.1391, "step": 653 }, { "epoch": 0.318791128442603, "grad_norm": 0.34012746810913086, "learning_rate": 0.00015476261899835265, "loss": 1.2911, "step": 654 }, { "epoch": 0.3192785766512308, "grad_norm": 0.4001348614692688, "learning_rate": 0.00015463376225192734, "loss": 1.2319, "step": 655 }, { "epoch": 0.31976602485985867, "grad_norm": 0.35862889885902405, "learning_rate": 0.00015450477606363786, "loss": 1.2286, "step": 656 }, { "epoch": 0.32025347306848645, "grad_norm": 0.3550409972667694, "learning_rate": 0.00015437566073908681, "loss": 1.1207, "step": 657 }, { "epoch": 0.3207409212771143, "grad_norm": 0.3076868951320648, "learning_rate": 0.0001542464165841825, "loss": 1.2359, "step": 658 }, { "epoch": 0.3212283694857421, "grad_norm": 0.3051203787326813, "learning_rate": 0.00015411704390513867, "loss": 1.2276, "step": 659 }, { "epoch": 0.32171581769436997, "grad_norm": 0.3316993713378906, "learning_rate": 0.00015398754300847343, "loss": 1.188, "step": 660 }, { "epoch": 0.3222032659029978, "grad_norm": 0.35070377588272095, "learning_rate": 0.00015385791420100876, "loss": 0.951, "step": 661 }, { "epoch": 0.32269071411162564, "grad_norm": 0.31650400161743164, "learning_rate": 0.00015372815778986971, "loss": 1.2612, "step": 662 }, { "epoch": 0.3231781623202535, "grad_norm": 0.2900366485118866, "learning_rate": 0.00015359827408248356, "loss": 1.055, "step": 663 }, { "epoch": 0.3236656105288813, "grad_norm": 0.3152225613594055, "learning_rate": 0.0001534682633865792, "loss": 1.0437, "step": 664 }, { "epoch": 0.32415305873750916, "grad_norm": 0.3429870009422302, "learning_rate": 0.0001533381260101865, "loss": 1.1633, "step": 665 }, { "epoch": 0.324640506946137, "grad_norm": 0.36703914403915405, "learning_rate": 0.00015320786226163537, "loss": 1.254, "step": 666 }, { "epoch": 0.3251279551547648, "grad_norm": 0.4383450150489807, "learning_rate": 0.00015307747244955517, "loss": 1.225, "step": 667 }, { "epoch": 0.3256154033633926, "grad_norm": 0.30220985412597656, "learning_rate": 0.00015294695688287396, "loss": 1.0824, "step": 668 }, { "epoch": 0.32610285157202046, "grad_norm": 0.3530774414539337, "learning_rate": 0.00015281631587081763, "loss": 1.1806, "step": 669 }, { "epoch": 0.3265902997806483, "grad_norm": 0.32462990283966064, "learning_rate": 0.00015268554972290937, "loss": 1.1544, "step": 670 }, { "epoch": 0.32707774798927614, "grad_norm": 0.3622325360774994, "learning_rate": 0.00015255465874896898, "loss": 1.2438, "step": 671 }, { "epoch": 0.327565196197904, "grad_norm": 0.3258713185787201, "learning_rate": 0.0001524236432591117, "loss": 1.1685, "step": 672 }, { "epoch": 0.3280526444065318, "grad_norm": 0.3288484811782837, "learning_rate": 0.00015229250356374804, "loss": 1.163, "step": 673 }, { "epoch": 0.32854009261515965, "grad_norm": 0.28387171030044556, "learning_rate": 0.0001521612399735827, "loss": 1.1612, "step": 674 }, { "epoch": 0.3290275408237875, "grad_norm": 0.3023267090320587, "learning_rate": 0.0001520298527996139, "loss": 1.107, "step": 675 }, { "epoch": 0.32951498903241533, "grad_norm": 0.33846303820610046, "learning_rate": 0.00015189834235313266, "loss": 1.1018, "step": 676 }, { "epoch": 0.3300024372410431, "grad_norm": 0.4579784870147705, "learning_rate": 0.00015176670894572212, "loss": 1.2989, "step": 677 }, { "epoch": 0.33048988544967095, "grad_norm": 0.4055747985839844, "learning_rate": 0.00015163495288925672, "loss": 1.164, "step": 678 }, { "epoch": 0.3309773336582988, "grad_norm": 0.310495525598526, "learning_rate": 0.00015150307449590143, "loss": 1.216, "step": 679 }, { "epoch": 0.3314647818669266, "grad_norm": 0.3050582706928253, "learning_rate": 0.0001513710740781112, "loss": 1.0968, "step": 680 }, { "epoch": 0.33195223007555447, "grad_norm": 0.3576156497001648, "learning_rate": 0.00015123895194862997, "loss": 1.0803, "step": 681 }, { "epoch": 0.3324396782841823, "grad_norm": 0.4268052577972412, "learning_rate": 0.00015110670842049005, "loss": 1.1273, "step": 682 }, { "epoch": 0.33292712649281014, "grad_norm": 0.35450395941734314, "learning_rate": 0.00015097434380701143, "loss": 1.2042, "step": 683 }, { "epoch": 0.333414574701438, "grad_norm": 0.3277284502983093, "learning_rate": 0.000150841858421801, "loss": 1.099, "step": 684 }, { "epoch": 0.3339020229100658, "grad_norm": 0.3645627796649933, "learning_rate": 0.00015070925257875173, "loss": 1.1794, "step": 685 }, { "epoch": 0.33438947111869366, "grad_norm": 0.3086869716644287, "learning_rate": 0.00015057652659204197, "loss": 1.0318, "step": 686 }, { "epoch": 0.3348769193273215, "grad_norm": 0.2932779788970947, "learning_rate": 0.00015044368077613482, "loss": 1.086, "step": 687 }, { "epoch": 0.3353643675359493, "grad_norm": 0.3116811513900757, "learning_rate": 0.00015031071544577724, "loss": 1.0702, "step": 688 }, { "epoch": 0.3358518157445771, "grad_norm": 0.36161184310913086, "learning_rate": 0.00015017763091599928, "loss": 1.1014, "step": 689 }, { "epoch": 0.33633926395320496, "grad_norm": 0.3198288381099701, "learning_rate": 0.00015004442750211352, "loss": 1.2034, "step": 690 }, { "epoch": 0.3368267121618328, "grad_norm": 0.350289523601532, "learning_rate": 0.00014991110551971414, "loss": 1.2719, "step": 691 }, { "epoch": 0.33731416037046064, "grad_norm": 0.3655170798301697, "learning_rate": 0.0001497776652846762, "loss": 1.1432, "step": 692 }, { "epoch": 0.3378016085790885, "grad_norm": 0.41630303859710693, "learning_rate": 0.0001496441071131551, "loss": 1.2402, "step": 693 }, { "epoch": 0.3382890567877163, "grad_norm": 0.3537638485431671, "learning_rate": 0.00014951043132158546, "loss": 1.3121, "step": 694 }, { "epoch": 0.33877650499634415, "grad_norm": 0.29230767488479614, "learning_rate": 0.00014937663822668065, "loss": 0.9822, "step": 695 }, { "epoch": 0.339263953204972, "grad_norm": 0.3335493505001068, "learning_rate": 0.00014924272814543208, "loss": 1.2364, "step": 696 }, { "epoch": 0.33975140141359983, "grad_norm": 0.3376023471355438, "learning_rate": 0.00014910870139510815, "loss": 1.2683, "step": 697 }, { "epoch": 0.3402388496222276, "grad_norm": 0.3680487275123596, "learning_rate": 0.00014897455829325374, "loss": 1.0877, "step": 698 }, { "epoch": 0.34072629783085545, "grad_norm": 0.33870601654052734, "learning_rate": 0.00014884029915768944, "loss": 1.1303, "step": 699 }, { "epoch": 0.3412137460394833, "grad_norm": 0.31731757521629333, "learning_rate": 0.00014870592430651073, "loss": 1.2437, "step": 700 }, { "epoch": 0.34170119424811113, "grad_norm": 0.31661751866340637, "learning_rate": 0.00014857143405808728, "loss": 1.0931, "step": 701 }, { "epoch": 0.34218864245673897, "grad_norm": 0.38429930806159973, "learning_rate": 0.0001484368287310621, "loss": 1.115, "step": 702 }, { "epoch": 0.3426760906653668, "grad_norm": 0.4297671914100647, "learning_rate": 0.00014830210864435087, "loss": 1.2848, "step": 703 }, { "epoch": 0.34316353887399464, "grad_norm": 0.2973197400569916, "learning_rate": 0.00014816727411714125, "loss": 1.0212, "step": 704 }, { "epoch": 0.3436509870826225, "grad_norm": 0.3790116310119629, "learning_rate": 0.00014803232546889192, "loss": 1.1624, "step": 705 }, { "epoch": 0.3441384352912503, "grad_norm": 0.3607287108898163, "learning_rate": 0.000147897263019332, "loss": 1.1087, "step": 706 }, { "epoch": 0.34462588349987816, "grad_norm": 0.3548680245876312, "learning_rate": 0.00014776208708846026, "loss": 1.1597, "step": 707 }, { "epoch": 0.34511333170850594, "grad_norm": 0.351642370223999, "learning_rate": 0.0001476267979965443, "loss": 1.1223, "step": 708 }, { "epoch": 0.3456007799171338, "grad_norm": 0.3794901669025421, "learning_rate": 0.00014749139606411982, "loss": 1.2532, "step": 709 }, { "epoch": 0.3460882281257616, "grad_norm": 0.36454519629478455, "learning_rate": 0.0001473558816119899, "loss": 1.2419, "step": 710 }, { "epoch": 0.34657567633438946, "grad_norm": 0.37354356050491333, "learning_rate": 0.00014722025496122421, "loss": 1.0632, "step": 711 }, { "epoch": 0.3470631245430173, "grad_norm": 0.4036533534526825, "learning_rate": 0.00014708451643315827, "loss": 1.2025, "step": 712 }, { "epoch": 0.34755057275164514, "grad_norm": 0.3371066451072693, "learning_rate": 0.0001469486663493925, "loss": 1.1734, "step": 713 }, { "epoch": 0.348038020960273, "grad_norm": 0.34339281916618347, "learning_rate": 0.00014681270503179192, "loss": 1.0125, "step": 714 }, { "epoch": 0.3485254691689008, "grad_norm": 0.3904884457588196, "learning_rate": 0.0001466766328024848, "loss": 1.2684, "step": 715 }, { "epoch": 0.34901291737752865, "grad_norm": 0.3271896541118622, "learning_rate": 0.00014654044998386242, "loss": 1.2133, "step": 716 }, { "epoch": 0.3495003655861565, "grad_norm": 0.4050779938697815, "learning_rate": 0.0001464041568985779, "loss": 1.0819, "step": 717 }, { "epoch": 0.34998781379478433, "grad_norm": 0.35053789615631104, "learning_rate": 0.0001462677538695457, "loss": 1.1508, "step": 718 }, { "epoch": 0.3504752620034121, "grad_norm": 0.3360033929347992, "learning_rate": 0.00014613124121994078, "loss": 1.2554, "step": 719 }, { "epoch": 0.35096271021203995, "grad_norm": 0.2929222583770752, "learning_rate": 0.00014599461927319778, "loss": 1.1245, "step": 720 }, { "epoch": 0.3514501584206678, "grad_norm": 0.3568030595779419, "learning_rate": 0.00014585788835301026, "loss": 1.1633, "step": 721 }, { "epoch": 0.35193760662929563, "grad_norm": 0.3090217709541321, "learning_rate": 0.00014572104878333007, "loss": 1.1178, "step": 722 }, { "epoch": 0.35242505483792347, "grad_norm": 0.3651696741580963, "learning_rate": 0.00014558410088836643, "loss": 1.2256, "step": 723 }, { "epoch": 0.3529125030465513, "grad_norm": 0.4779524803161621, "learning_rate": 0.00014544704499258514, "loss": 1.2883, "step": 724 }, { "epoch": 0.35339995125517915, "grad_norm": 0.31629809737205505, "learning_rate": 0.00014530988142070803, "loss": 1.1007, "step": 725 }, { "epoch": 0.353887399463807, "grad_norm": 0.33082258701324463, "learning_rate": 0.00014517261049771187, "loss": 1.1855, "step": 726 }, { "epoch": 0.3543748476724348, "grad_norm": 0.34488508105278015, "learning_rate": 0.0001450352325488279, "loss": 1.0958, "step": 727 }, { "epoch": 0.35486229588106266, "grad_norm": 0.4402891993522644, "learning_rate": 0.00014489774789954094, "loss": 1.1953, "step": 728 }, { "epoch": 0.35534974408969044, "grad_norm": 0.3120886981487274, "learning_rate": 0.00014476015687558846, "loss": 1.0842, "step": 729 }, { "epoch": 0.3558371922983183, "grad_norm": 0.42847946286201477, "learning_rate": 0.00014462245980296018, "loss": 1.1887, "step": 730 }, { "epoch": 0.3563246405069461, "grad_norm": 0.2847718298435211, "learning_rate": 0.00014448465700789685, "loss": 1.1547, "step": 731 }, { "epoch": 0.35681208871557396, "grad_norm": 0.4204113781452179, "learning_rate": 0.00014434674881688995, "loss": 1.1289, "step": 732 }, { "epoch": 0.3572995369242018, "grad_norm": 0.28889018297195435, "learning_rate": 0.00014420873555668045, "loss": 1.204, "step": 733 }, { "epoch": 0.35778698513282964, "grad_norm": 0.41692429780960083, "learning_rate": 0.00014407061755425832, "loss": 1.0804, "step": 734 }, { "epoch": 0.3582744333414575, "grad_norm": 0.32253298163414, "learning_rate": 0.00014393239513686178, "loss": 1.2295, "step": 735 }, { "epoch": 0.3587618815500853, "grad_norm": 0.4006897211074829, "learning_rate": 0.00014379406863197636, "loss": 1.1688, "step": 736 }, { "epoch": 0.35924932975871315, "grad_norm": 0.4267953932285309, "learning_rate": 0.00014365563836733425, "loss": 1.2379, "step": 737 }, { "epoch": 0.359736777967341, "grad_norm": 0.3543131947517395, "learning_rate": 0.00014351710467091336, "loss": 1.1089, "step": 738 }, { "epoch": 0.3602242261759688, "grad_norm": 0.33696895837783813, "learning_rate": 0.00014337846787093679, "loss": 1.1694, "step": 739 }, { "epoch": 0.3607116743845966, "grad_norm": 0.2915426790714264, "learning_rate": 0.00014323972829587183, "loss": 1.1127, "step": 740 }, { "epoch": 0.36119912259322445, "grad_norm": 0.3513476252555847, "learning_rate": 0.00014310088627442937, "loss": 1.1797, "step": 741 }, { "epoch": 0.3616865708018523, "grad_norm": 0.34113776683807373, "learning_rate": 0.00014296194213556289, "loss": 1.1351, "step": 742 }, { "epoch": 0.36217401901048013, "grad_norm": 0.48686835169792175, "learning_rate": 0.0001428228962084679, "loss": 1.4038, "step": 743 }, { "epoch": 0.36266146721910797, "grad_norm": 0.3965618908405304, "learning_rate": 0.00014268374882258112, "loss": 1.1309, "step": 744 }, { "epoch": 0.3631489154277358, "grad_norm": 0.36199474334716797, "learning_rate": 0.0001425445003075795, "loss": 1.1132, "step": 745 }, { "epoch": 0.36363636363636365, "grad_norm": 0.315054714679718, "learning_rate": 0.00014240515099337976, "loss": 1.2075, "step": 746 }, { "epoch": 0.3641238118449915, "grad_norm": 0.37648653984069824, "learning_rate": 0.00014226570121013733, "loss": 1.1172, "step": 747 }, { "epoch": 0.3646112600536193, "grad_norm": 0.38015294075012207, "learning_rate": 0.00014212615128824564, "loss": 1.1411, "step": 748 }, { "epoch": 0.36509870826224716, "grad_norm": 0.3627578616142273, "learning_rate": 0.00014198650155833556, "loss": 1.0399, "step": 749 }, { "epoch": 0.36558615647087495, "grad_norm": 0.3377631604671478, "learning_rate": 0.00014184675235127427, "loss": 1.1939, "step": 750 }, { "epoch": 0.3660736046795028, "grad_norm": 0.3698626458644867, "learning_rate": 0.0001417069039981647, "loss": 1.1762, "step": 751 }, { "epoch": 0.3665610528881306, "grad_norm": 0.3546832203865051, "learning_rate": 0.0001415669568303446, "loss": 1.0773, "step": 752 }, { "epoch": 0.36704850109675846, "grad_norm": 0.34372153878211975, "learning_rate": 0.00014142691117938593, "loss": 1.091, "step": 753 }, { "epoch": 0.3675359493053863, "grad_norm": 0.36699178814888, "learning_rate": 0.00014128676737709404, "loss": 1.2068, "step": 754 }, { "epoch": 0.36802339751401414, "grad_norm": 0.39629635214805603, "learning_rate": 0.00014114652575550663, "loss": 1.2551, "step": 755 }, { "epoch": 0.368510845722642, "grad_norm": 0.4091489613056183, "learning_rate": 0.0001410061866468934, "loss": 1.3286, "step": 756 }, { "epoch": 0.3689982939312698, "grad_norm": 0.3547840714454651, "learning_rate": 0.00014086575038375475, "loss": 1.1421, "step": 757 }, { "epoch": 0.36948574213989765, "grad_norm": 0.3419768214225769, "learning_rate": 0.00014072521729882153, "loss": 1.1423, "step": 758 }, { "epoch": 0.3699731903485255, "grad_norm": 0.4046650826931, "learning_rate": 0.00014058458772505384, "loss": 1.1828, "step": 759 }, { "epoch": 0.3704606385571533, "grad_norm": 0.4120461642742157, "learning_rate": 0.00014044386199564034, "loss": 1.1604, "step": 760 }, { "epoch": 0.3709480867657811, "grad_norm": 0.3190382122993469, "learning_rate": 0.00014030304044399764, "loss": 1.2429, "step": 761 }, { "epoch": 0.37143553497440895, "grad_norm": 0.4019441604614258, "learning_rate": 0.00014016212340376937, "loss": 1.1157, "step": 762 }, { "epoch": 0.3719229831830368, "grad_norm": 0.3926464915275574, "learning_rate": 0.00014002111120882532, "loss": 1.0597, "step": 763 }, { "epoch": 0.37241043139166463, "grad_norm": 0.33766475319862366, "learning_rate": 0.00013988000419326072, "loss": 1.2093, "step": 764 }, { "epoch": 0.37289787960029247, "grad_norm": 0.3478360176086426, "learning_rate": 0.0001397388026913955, "loss": 1.1229, "step": 765 }, { "epoch": 0.3733853278089203, "grad_norm": 0.4054871201515198, "learning_rate": 0.0001395975070377735, "loss": 1.2193, "step": 766 }, { "epoch": 0.37387277601754815, "grad_norm": 0.3610527515411377, "learning_rate": 0.0001394561175671615, "loss": 1.1604, "step": 767 }, { "epoch": 0.374360224226176, "grad_norm": 0.3942972421646118, "learning_rate": 0.00013931463461454868, "loss": 1.0723, "step": 768 }, { "epoch": 0.3748476724348038, "grad_norm": 0.38327381014823914, "learning_rate": 0.00013917305851514564, "loss": 1.1892, "step": 769 }, { "epoch": 0.3753351206434316, "grad_norm": 0.3456951677799225, "learning_rate": 0.00013903138960438368, "loss": 1.2442, "step": 770 }, { "epoch": 0.37582256885205945, "grad_norm": 0.3242649435997009, "learning_rate": 0.00013888962821791405, "loss": 1.1207, "step": 771 }, { "epoch": 0.3763100170606873, "grad_norm": 0.2798099219799042, "learning_rate": 0.000138747774691607, "loss": 0.9834, "step": 772 }, { "epoch": 0.3767974652693151, "grad_norm": 0.37123337388038635, "learning_rate": 0.00013860582936155112, "loss": 1.125, "step": 773 }, { "epoch": 0.37728491347794296, "grad_norm": 0.35441383719444275, "learning_rate": 0.00013846379256405257, "loss": 1.1059, "step": 774 }, { "epoch": 0.3777723616865708, "grad_norm": 0.3213593065738678, "learning_rate": 0.00013832166463563413, "loss": 1.1108, "step": 775 }, { "epoch": 0.37825980989519864, "grad_norm": 0.32971900701522827, "learning_rate": 0.00013817944591303457, "loss": 1.1158, "step": 776 }, { "epoch": 0.3787472581038265, "grad_norm": 0.36593112349510193, "learning_rate": 0.00013803713673320772, "loss": 1.2137, "step": 777 }, { "epoch": 0.3792347063124543, "grad_norm": 0.40837281942367554, "learning_rate": 0.00013789473743332174, "loss": 1.2048, "step": 778 }, { "epoch": 0.37972215452108216, "grad_norm": 0.3851155936717987, "learning_rate": 0.00013775224835075835, "loss": 1.1175, "step": 779 }, { "epoch": 0.38020960272971, "grad_norm": 0.352002888917923, "learning_rate": 0.00013760966982311192, "loss": 1.1423, "step": 780 }, { "epoch": 0.3806970509383378, "grad_norm": 0.3558059632778168, "learning_rate": 0.0001374670021881888, "loss": 1.1444, "step": 781 }, { "epoch": 0.3811844991469656, "grad_norm": 0.35093048214912415, "learning_rate": 0.0001373242457840064, "loss": 1.1088, "step": 782 }, { "epoch": 0.38167194735559345, "grad_norm": 0.41846126317977905, "learning_rate": 0.00013718140094879253, "loss": 1.1784, "step": 783 }, { "epoch": 0.3821593955642213, "grad_norm": 0.4293888807296753, "learning_rate": 0.00013703846802098443, "loss": 1.1374, "step": 784 }, { "epoch": 0.38264684377284913, "grad_norm": 0.38579028844833374, "learning_rate": 0.0001368954473392281, "loss": 1.2998, "step": 785 }, { "epoch": 0.38313429198147697, "grad_norm": 0.3546122610569, "learning_rate": 0.00013675233924237743, "loss": 0.9899, "step": 786 }, { "epoch": 0.3836217401901048, "grad_norm": 0.35483241081237793, "learning_rate": 0.00013660914406949344, "loss": 1.1544, "step": 787 }, { "epoch": 0.38410918839873265, "grad_norm": 0.326667845249176, "learning_rate": 0.00013646586215984347, "loss": 1.0996, "step": 788 }, { "epoch": 0.3845966366073605, "grad_norm": 0.29748162627220154, "learning_rate": 0.00013632249385290033, "loss": 1.0593, "step": 789 }, { "epoch": 0.3850840848159883, "grad_norm": 0.4415862560272217, "learning_rate": 0.00013617903948834155, "loss": 1.261, "step": 790 }, { "epoch": 0.3855715330246161, "grad_norm": 0.33770832419395447, "learning_rate": 0.00013603549940604853, "loss": 1.2808, "step": 791 }, { "epoch": 0.38605898123324395, "grad_norm": 0.35436463356018066, "learning_rate": 0.0001358918739461058, "loss": 1.1541, "step": 792 }, { "epoch": 0.3865464294418718, "grad_norm": 0.3231455683708191, "learning_rate": 0.0001357481634488001, "loss": 1.1652, "step": 793 }, { "epoch": 0.3870338776504996, "grad_norm": 0.39417925477027893, "learning_rate": 0.0001356043682546197, "loss": 1.0421, "step": 794 }, { "epoch": 0.38752132585912746, "grad_norm": 0.3667939603328705, "learning_rate": 0.00013546048870425356, "loss": 1.133, "step": 795 }, { "epoch": 0.3880087740677553, "grad_norm": 0.332832932472229, "learning_rate": 0.00013531652513859048, "loss": 1.0692, "step": 796 }, { "epoch": 0.38849622227638314, "grad_norm": 0.42437613010406494, "learning_rate": 0.00013517247789871824, "loss": 1.1769, "step": 797 }, { "epoch": 0.388983670485011, "grad_norm": 0.3812074363231659, "learning_rate": 0.000135028347325923, "loss": 1.2091, "step": 798 }, { "epoch": 0.3894711186936388, "grad_norm": 0.3778877258300781, "learning_rate": 0.0001348841337616882, "loss": 1.2446, "step": 799 }, { "epoch": 0.38995856690226666, "grad_norm": 0.4080727696418762, "learning_rate": 0.00013473983754769413, "loss": 1.2096, "step": 800 }, { "epoch": 0.39044601511089444, "grad_norm": 0.3175933361053467, "learning_rate": 0.0001345954590258167, "loss": 1.0889, "step": 801 }, { "epoch": 0.3909334633195223, "grad_norm": 0.35111868381500244, "learning_rate": 0.00013445099853812687, "loss": 1.1557, "step": 802 }, { "epoch": 0.3914209115281501, "grad_norm": 0.36578190326690674, "learning_rate": 0.00013430645642688988, "loss": 1.2139, "step": 803 }, { "epoch": 0.39190835973677796, "grad_norm": 0.33647310733795166, "learning_rate": 0.00013416183303456425, "loss": 1.1446, "step": 804 }, { "epoch": 0.3923958079454058, "grad_norm": 0.36450833082199097, "learning_rate": 0.0001340171287038012, "loss": 1.118, "step": 805 }, { "epoch": 0.39288325615403363, "grad_norm": 0.3469671308994293, "learning_rate": 0.00013387234377744357, "loss": 1.2262, "step": 806 }, { "epoch": 0.39337070436266147, "grad_norm": 0.3348066210746765, "learning_rate": 0.00013372747859852527, "loss": 1.1409, "step": 807 }, { "epoch": 0.3938581525712893, "grad_norm": 0.35472628474235535, "learning_rate": 0.00013358253351027031, "loss": 1.1755, "step": 808 }, { "epoch": 0.39434560077991715, "grad_norm": 0.32164493203163147, "learning_rate": 0.000133437508856092, "loss": 1.187, "step": 809 }, { "epoch": 0.394833048988545, "grad_norm": 0.32905271649360657, "learning_rate": 0.00013329240497959218, "loss": 1.1553, "step": 810 }, { "epoch": 0.3953204971971728, "grad_norm": 0.39591020345687866, "learning_rate": 0.0001331472222245605, "loss": 1.2909, "step": 811 }, { "epoch": 0.3958079454058006, "grad_norm": 0.3336966633796692, "learning_rate": 0.00013300196093497322, "loss": 1.2074, "step": 812 }, { "epoch": 0.39629539361442845, "grad_norm": 0.3042989671230316, "learning_rate": 0.00013285662145499292, "loss": 1.1509, "step": 813 }, { "epoch": 0.3967828418230563, "grad_norm": 0.3268469572067261, "learning_rate": 0.0001327112041289674, "loss": 1.1062, "step": 814 }, { "epoch": 0.3972702900316841, "grad_norm": 0.33514949679374695, "learning_rate": 0.0001325657093014288, "loss": 0.9606, "step": 815 }, { "epoch": 0.39775773824031196, "grad_norm": 0.4372316598892212, "learning_rate": 0.00013242013731709287, "loss": 1.1181, "step": 816 }, { "epoch": 0.3982451864489398, "grad_norm": 0.2983655631542206, "learning_rate": 0.00013227448852085836, "loss": 1.2019, "step": 817 }, { "epoch": 0.39873263465756764, "grad_norm": 0.3565848171710968, "learning_rate": 0.0001321287632578058, "loss": 1.129, "step": 818 }, { "epoch": 0.3992200828661955, "grad_norm": 0.33070090413093567, "learning_rate": 0.00013198296187319695, "loss": 1.0851, "step": 819 }, { "epoch": 0.3997075310748233, "grad_norm": 0.3758980333805084, "learning_rate": 0.00013183708471247395, "loss": 1.2546, "step": 820 }, { "epoch": 0.40019497928345116, "grad_norm": 0.3357064425945282, "learning_rate": 0.00013169113212125848, "loss": 1.1528, "step": 821 }, { "epoch": 0.40068242749207894, "grad_norm": 0.37070250511169434, "learning_rate": 0.00013154510444535092, "loss": 1.108, "step": 822 }, { "epoch": 0.4011698757007068, "grad_norm": 0.38929644227027893, "learning_rate": 0.0001313990020307295, "loss": 1.1151, "step": 823 }, { "epoch": 0.4016573239093346, "grad_norm": 0.36671310663223267, "learning_rate": 0.00013125282522354957, "loss": 1.0753, "step": 824 }, { "epoch": 0.40214477211796246, "grad_norm": 0.3539426326751709, "learning_rate": 0.00013110657437014278, "loss": 1.2476, "step": 825 }, { "epoch": 0.4026322203265903, "grad_norm": 0.3895106315612793, "learning_rate": 0.00013096024981701612, "loss": 1.2082, "step": 826 }, { "epoch": 0.40311966853521813, "grad_norm": 0.45794007182121277, "learning_rate": 0.00013081385191085127, "loss": 1.22, "step": 827 }, { "epoch": 0.403607116743846, "grad_norm": 0.37870723009109497, "learning_rate": 0.0001306673809985037, "loss": 1.1618, "step": 828 }, { "epoch": 0.4040945649524738, "grad_norm": 0.3808088004589081, "learning_rate": 0.00013052083742700173, "loss": 1.2146, "step": 829 }, { "epoch": 0.40458201316110165, "grad_norm": 0.3768877685070038, "learning_rate": 0.000130374221543546, "loss": 1.2292, "step": 830 }, { "epoch": 0.4050694613697295, "grad_norm": 0.38629233837127686, "learning_rate": 0.0001302275336955084, "loss": 1.187, "step": 831 }, { "epoch": 0.40555690957835727, "grad_norm": 0.3110441267490387, "learning_rate": 0.00013008077423043131, "loss": 1.3096, "step": 832 }, { "epoch": 0.4060443577869851, "grad_norm": 0.3257690668106079, "learning_rate": 0.0001299339434960268, "loss": 1.1468, "step": 833 }, { "epoch": 0.40653180599561295, "grad_norm": 0.3355731666088104, "learning_rate": 0.00012978704184017577, "loss": 1.2313, "step": 834 }, { "epoch": 0.4070192542042408, "grad_norm": 0.2959268093109131, "learning_rate": 0.00012964006961092722, "loss": 1.0838, "step": 835 }, { "epoch": 0.4075067024128686, "grad_norm": 0.3371834456920624, "learning_rate": 0.00012949302715649732, "loss": 1.1436, "step": 836 }, { "epoch": 0.40799415062149647, "grad_norm": 0.29665321111679077, "learning_rate": 0.0001293459148252686, "loss": 1.1141, "step": 837 }, { "epoch": 0.4084815988301243, "grad_norm": 0.29049184918403625, "learning_rate": 0.00012919873296578918, "loss": 1.1032, "step": 838 }, { "epoch": 0.40896904703875214, "grad_norm": 0.301238477230072, "learning_rate": 0.00012905148192677188, "loss": 1.0863, "step": 839 }, { "epoch": 0.40945649524738, "grad_norm": 0.3428902328014374, "learning_rate": 0.0001289041620570935, "loss": 1.1414, "step": 840 }, { "epoch": 0.4099439434560078, "grad_norm": 0.3976534605026245, "learning_rate": 0.00012875677370579377, "loss": 1.1849, "step": 841 }, { "epoch": 0.41043139166463566, "grad_norm": 0.3917904198169708, "learning_rate": 0.0001286093172220748, "loss": 1.1065, "step": 842 }, { "epoch": 0.41091883987326344, "grad_norm": 0.3630690574645996, "learning_rate": 0.0001284617929553001, "loss": 1.1917, "step": 843 }, { "epoch": 0.4114062880818913, "grad_norm": 0.383533775806427, "learning_rate": 0.00012831420125499374, "loss": 1.184, "step": 844 }, { "epoch": 0.4118937362905191, "grad_norm": 0.36599770188331604, "learning_rate": 0.0001281665424708396, "loss": 1.1461, "step": 845 }, { "epoch": 0.41238118449914696, "grad_norm": 0.36383891105651855, "learning_rate": 0.0001280188169526805, "loss": 1.1423, "step": 846 }, { "epoch": 0.4128686327077748, "grad_norm": 0.3431175947189331, "learning_rate": 0.00012787102505051727, "loss": 1.0849, "step": 847 }, { "epoch": 0.41335608091640264, "grad_norm": 0.43728041648864746, "learning_rate": 0.00012772316711450815, "loss": 1.1391, "step": 848 }, { "epoch": 0.4138435291250305, "grad_norm": 0.3861692249774933, "learning_rate": 0.00012757524349496778, "loss": 1.1677, "step": 849 }, { "epoch": 0.4143309773336583, "grad_norm": 0.3486277759075165, "learning_rate": 0.00012742725454236646, "loss": 1.1588, "step": 850 }, { "epoch": 0.41481842554228615, "grad_norm": 0.34165406227111816, "learning_rate": 0.0001272792006073292, "loss": 1.1943, "step": 851 }, { "epoch": 0.415305873750914, "grad_norm": 0.4407804608345032, "learning_rate": 0.000127131082040635, "loss": 1.1215, "step": 852 }, { "epoch": 0.4157933219595418, "grad_norm": 0.322651743888855, "learning_rate": 0.00012698289919321605, "loss": 1.0882, "step": 853 }, { "epoch": 0.4162807701681696, "grad_norm": 0.3131794035434723, "learning_rate": 0.00012683465241615678, "loss": 1.1753, "step": 854 }, { "epoch": 0.41676821837679745, "grad_norm": 0.33760449290275574, "learning_rate": 0.00012668634206069304, "loss": 1.0094, "step": 855 }, { "epoch": 0.4172556665854253, "grad_norm": 0.3074570596218109, "learning_rate": 0.00012653796847821147, "loss": 1.019, "step": 856 }, { "epoch": 0.4177431147940531, "grad_norm": 0.3627846837043762, "learning_rate": 0.00012638953202024836, "loss": 1.0777, "step": 857 }, { "epoch": 0.41823056300268097, "grad_norm": 0.37333551049232483, "learning_rate": 0.00012624103303848902, "loss": 1.1969, "step": 858 }, { "epoch": 0.4187180112113088, "grad_norm": 0.4356592893600464, "learning_rate": 0.00012609247188476695, "loss": 1.2257, "step": 859 }, { "epoch": 0.41920545941993664, "grad_norm": 0.38547471165657043, "learning_rate": 0.0001259438489110628, "loss": 1.1097, "step": 860 }, { "epoch": 0.4196929076285645, "grad_norm": 0.3097715675830841, "learning_rate": 0.0001257951644695039, "loss": 1.2319, "step": 861 }, { "epoch": 0.4201803558371923, "grad_norm": 0.38433364033699036, "learning_rate": 0.00012564641891236303, "loss": 1.1097, "step": 862 }, { "epoch": 0.4206678040458201, "grad_norm": 0.3095664978027344, "learning_rate": 0.0001254976125920579, "loss": 1.1319, "step": 863 }, { "epoch": 0.42115525225444794, "grad_norm": 0.39881959557533264, "learning_rate": 0.00012534874586115008, "loss": 1.1986, "step": 864 }, { "epoch": 0.4216427004630758, "grad_norm": 0.3613760769367218, "learning_rate": 0.00012519981907234434, "loss": 1.213, "step": 865 }, { "epoch": 0.4221301486717036, "grad_norm": 0.3688783347606659, "learning_rate": 0.00012505083257848768, "loss": 1.1223, "step": 866 }, { "epoch": 0.42261759688033146, "grad_norm": 0.39314547181129456, "learning_rate": 0.0001249017867325686, "loss": 1.2198, "step": 867 }, { "epoch": 0.4231050450889593, "grad_norm": 0.33712512254714966, "learning_rate": 0.00012475268188771627, "loss": 1.2416, "step": 868 }, { "epoch": 0.42359249329758714, "grad_norm": 0.39671799540519714, "learning_rate": 0.00012460351839719958, "loss": 1.0538, "step": 869 }, { "epoch": 0.424079941506215, "grad_norm": 0.30700284242630005, "learning_rate": 0.0001244542966144263, "loss": 1.1185, "step": 870 }, { "epoch": 0.4245673897148428, "grad_norm": 0.3273939788341522, "learning_rate": 0.00012430501689294246, "loss": 1.0733, "step": 871 }, { "epoch": 0.42505483792347065, "grad_norm": 0.36876797676086426, "learning_rate": 0.00012415567958643127, "loss": 1.2011, "step": 872 }, { "epoch": 0.4255422861320985, "grad_norm": 0.35608360171318054, "learning_rate": 0.00012400628504871235, "loss": 1.173, "step": 873 }, { "epoch": 0.4260297343407263, "grad_norm": 0.43939632177352905, "learning_rate": 0.00012385683363374105, "loss": 1.243, "step": 874 }, { "epoch": 0.4265171825493541, "grad_norm": 0.3559912443161011, "learning_rate": 0.0001237073256956073, "loss": 1.216, "step": 875 }, { "epoch": 0.42700463075798195, "grad_norm": 0.3987311124801636, "learning_rate": 0.0001235577615885351, "loss": 1.124, "step": 876 }, { "epoch": 0.4274920789666098, "grad_norm": 0.3169601261615753, "learning_rate": 0.0001234081416668814, "loss": 1.0132, "step": 877 }, { "epoch": 0.42797952717523763, "grad_norm": 0.416533499956131, "learning_rate": 0.00012325846628513548, "loss": 1.1214, "step": 878 }, { "epoch": 0.42846697538386547, "grad_norm": 0.5308129787445068, "learning_rate": 0.00012310873579791804, "loss": 1.2743, "step": 879 }, { "epoch": 0.4289544235924933, "grad_norm": 0.3403697609901428, "learning_rate": 0.0001229589505599802, "loss": 1.1407, "step": 880 }, { "epoch": 0.42944187180112114, "grad_norm": 0.35060033202171326, "learning_rate": 0.00012280911092620297, "loss": 1.1729, "step": 881 }, { "epoch": 0.429929320009749, "grad_norm": 0.3814738988876343, "learning_rate": 0.0001226592172515961, "loss": 1.1063, "step": 882 }, { "epoch": 0.4304167682183768, "grad_norm": 0.31343165040016174, "learning_rate": 0.0001225092698912975, "loss": 1.2022, "step": 883 }, { "epoch": 0.4309042164270046, "grad_norm": 0.3462253212928772, "learning_rate": 0.00012235926920057218, "loss": 1.1213, "step": 884 }, { "epoch": 0.43139166463563244, "grad_norm": 0.35463854670524597, "learning_rate": 0.0001222092155348115, "loss": 1.1719, "step": 885 }, { "epoch": 0.4318791128442603, "grad_norm": 0.38044965267181396, "learning_rate": 0.00012205910924953241, "loss": 1.201, "step": 886 }, { "epoch": 0.4323665610528881, "grad_norm": 0.3340301513671875, "learning_rate": 0.00012190895070037647, "loss": 1.1672, "step": 887 }, { "epoch": 0.43285400926151596, "grad_norm": 0.4610227942466736, "learning_rate": 0.00012175874024310909, "loss": 1.2076, "step": 888 }, { "epoch": 0.4333414574701438, "grad_norm": 0.3781220018863678, "learning_rate": 0.0001216084782336187, "loss": 1.1193, "step": 889 }, { "epoch": 0.43382890567877164, "grad_norm": 0.36019018292427063, "learning_rate": 0.00012145816502791576, "loss": 1.1613, "step": 890 }, { "epoch": 0.4343163538873995, "grad_norm": 0.3611498177051544, "learning_rate": 0.00012130780098213212, "loss": 1.1421, "step": 891 }, { "epoch": 0.4348038020960273, "grad_norm": 0.3429311513900757, "learning_rate": 0.00012115738645252008, "loss": 1.1785, "step": 892 }, { "epoch": 0.43529125030465515, "grad_norm": 0.3426477313041687, "learning_rate": 0.0001210069217954515, "loss": 1.1375, "step": 893 }, { "epoch": 0.43577869851328294, "grad_norm": 0.35320284962654114, "learning_rate": 0.00012085640736741708, "loss": 1.1069, "step": 894 }, { "epoch": 0.4362661467219108, "grad_norm": 0.37135952711105347, "learning_rate": 0.00012070584352502535, "loss": 1.1532, "step": 895 }, { "epoch": 0.4367535949305386, "grad_norm": 0.39013463258743286, "learning_rate": 0.00012055523062500195, "loss": 1.0486, "step": 896 }, { "epoch": 0.43724104313916645, "grad_norm": 0.3446867763996124, "learning_rate": 0.00012040456902418882, "loss": 1.1962, "step": 897 }, { "epoch": 0.4377284913477943, "grad_norm": 0.3435569703578949, "learning_rate": 0.00012025385907954324, "loss": 1.2106, "step": 898 }, { "epoch": 0.43821593955642213, "grad_norm": 0.3562975823879242, "learning_rate": 0.0001201031011481369, "loss": 1.1922, "step": 899 }, { "epoch": 0.43870338776504997, "grad_norm": 0.38896191120147705, "learning_rate": 0.00011995229558715541, "loss": 1.0976, "step": 900 }, { "epoch": 0.4391908359736778, "grad_norm": 0.3850138783454895, "learning_rate": 0.00011980144275389706, "loss": 1.0942, "step": 901 }, { "epoch": 0.43967828418230565, "grad_norm": 0.41809114813804626, "learning_rate": 0.00011965054300577226, "loss": 1.203, "step": 902 }, { "epoch": 0.4401657323909335, "grad_norm": 0.3288028836250305, "learning_rate": 0.00011949959670030244, "loss": 1.1751, "step": 903 }, { "epoch": 0.4406531805995613, "grad_norm": 0.3803359568119049, "learning_rate": 0.00011934860419511942, "loss": 1.1751, "step": 904 }, { "epoch": 0.4411406288081891, "grad_norm": 0.30799826979637146, "learning_rate": 0.00011919756584796449, "loss": 1.1237, "step": 905 }, { "epoch": 0.44162807701681694, "grad_norm": 0.4452258348464966, "learning_rate": 0.00011904648201668754, "loss": 1.1655, "step": 906 }, { "epoch": 0.4421155252254448, "grad_norm": 0.3488793671131134, "learning_rate": 0.00011889535305924618, "loss": 1.1729, "step": 907 }, { "epoch": 0.4426029734340726, "grad_norm": 0.42165330052375793, "learning_rate": 0.000118744179333705, "loss": 1.1175, "step": 908 }, { "epoch": 0.44309042164270046, "grad_norm": 0.37287434935569763, "learning_rate": 0.00011859296119823459, "loss": 1.1214, "step": 909 }, { "epoch": 0.4435778698513283, "grad_norm": 0.39584994316101074, "learning_rate": 0.00011844169901111082, "loss": 1.1547, "step": 910 }, { "epoch": 0.44406531805995614, "grad_norm": 0.3444364070892334, "learning_rate": 0.00011829039313071393, "loss": 1.0781, "step": 911 }, { "epoch": 0.444552766268584, "grad_norm": 0.39970532059669495, "learning_rate": 0.00011813904391552759, "loss": 1.1958, "step": 912 }, { "epoch": 0.4450402144772118, "grad_norm": 0.3787233531475067, "learning_rate": 0.00011798765172413826, "loss": 1.1973, "step": 913 }, { "epoch": 0.44552766268583965, "grad_norm": 0.3653548061847687, "learning_rate": 0.00011783621691523415, "loss": 1.1862, "step": 914 }, { "epoch": 0.44601511089446744, "grad_norm": 0.41322481632232666, "learning_rate": 0.00011768473984760447, "loss": 1.0597, "step": 915 }, { "epoch": 0.4465025591030953, "grad_norm": 0.3496185541152954, "learning_rate": 0.00011753322088013853, "loss": 1.1075, "step": 916 }, { "epoch": 0.4469900073117231, "grad_norm": 0.3806665539741516, "learning_rate": 0.00011738166037182492, "loss": 1.1132, "step": 917 }, { "epoch": 0.44747745552035095, "grad_norm": 0.37213313579559326, "learning_rate": 0.0001172300586817507, "loss": 1.0747, "step": 918 }, { "epoch": 0.4479649037289788, "grad_norm": 0.3017314672470093, "learning_rate": 0.00011707841616910042, "loss": 1.0555, "step": 919 }, { "epoch": 0.44845235193760663, "grad_norm": 0.35828927159309387, "learning_rate": 0.00011692673319315541, "loss": 1.1143, "step": 920 }, { "epoch": 0.44893980014623447, "grad_norm": 0.4186861217021942, "learning_rate": 0.00011677501011329283, "loss": 0.9841, "step": 921 }, { "epoch": 0.4494272483548623, "grad_norm": 0.4104650914669037, "learning_rate": 0.00011662324728898486, "loss": 1.0589, "step": 922 }, { "epoch": 0.44991469656349015, "grad_norm": 0.31428661942481995, "learning_rate": 0.00011647144507979788, "loss": 1.0162, "step": 923 }, { "epoch": 0.450402144772118, "grad_norm": 0.35930880904197693, "learning_rate": 0.00011631960384539157, "loss": 1.1889, "step": 924 }, { "epoch": 0.45088959298074577, "grad_norm": 0.38938355445861816, "learning_rate": 0.00011616772394551802, "loss": 1.1148, "step": 925 }, { "epoch": 0.4513770411893736, "grad_norm": 0.3785998821258545, "learning_rate": 0.00011601580574002102, "loss": 1.14, "step": 926 }, { "epoch": 0.45186448939800145, "grad_norm": 0.41007423400878906, "learning_rate": 0.000115863849588835, "loss": 1.0682, "step": 927 }, { "epoch": 0.4523519376066293, "grad_norm": 0.3938082456588745, "learning_rate": 0.00011571185585198445, "loss": 1.3111, "step": 928 }, { "epoch": 0.4528393858152571, "grad_norm": 0.35383960604667664, "learning_rate": 0.00011555982488958274, "loss": 1.1986, "step": 929 }, { "epoch": 0.45332683402388496, "grad_norm": 0.3635252118110657, "learning_rate": 0.00011540775706183156, "loss": 1.07, "step": 930 }, { "epoch": 0.4538142822325128, "grad_norm": 0.3247283399105072, "learning_rate": 0.00011525565272901988, "loss": 1.1154, "step": 931 }, { "epoch": 0.45430173044114064, "grad_norm": 0.36164093017578125, "learning_rate": 0.00011510351225152321, "loss": 1.0798, "step": 932 }, { "epoch": 0.4547891786497685, "grad_norm": 0.3158092498779297, "learning_rate": 0.00011495133598980263, "loss": 1.065, "step": 933 }, { "epoch": 0.4552766268583963, "grad_norm": 0.39821863174438477, "learning_rate": 0.00011479912430440409, "loss": 1.0929, "step": 934 }, { "epoch": 0.45576407506702415, "grad_norm": 0.4439769685268402, "learning_rate": 0.00011464687755595736, "loss": 1.1168, "step": 935 }, { "epoch": 0.45625152327565194, "grad_norm": 0.31098422408103943, "learning_rate": 0.00011449459610517537, "loss": 1.0636, "step": 936 }, { "epoch": 0.4567389714842798, "grad_norm": 0.3414660692214966, "learning_rate": 0.00011434228031285328, "loss": 1.1077, "step": 937 }, { "epoch": 0.4572264196929076, "grad_norm": 0.3762691915035248, "learning_rate": 0.00011418993053986748, "loss": 1.1334, "step": 938 }, { "epoch": 0.45771386790153545, "grad_norm": 0.35831066966056824, "learning_rate": 0.00011403754714717505, "loss": 1.15, "step": 939 }, { "epoch": 0.4582013161101633, "grad_norm": 0.36059486865997314, "learning_rate": 0.00011388513049581261, "loss": 1.1737, "step": 940 }, { "epoch": 0.45868876431879113, "grad_norm": 0.4233929514884949, "learning_rate": 0.00011373268094689562, "loss": 1.06, "step": 941 }, { "epoch": 0.45917621252741897, "grad_norm": 0.34159737825393677, "learning_rate": 0.00011358019886161743, "loss": 1.1018, "step": 942 }, { "epoch": 0.4596636607360468, "grad_norm": 0.3699301779270172, "learning_rate": 0.00011342768460124856, "loss": 1.0605, "step": 943 }, { "epoch": 0.46015110894467465, "grad_norm": 0.33748912811279297, "learning_rate": 0.00011327513852713567, "loss": 1.1299, "step": 944 }, { "epoch": 0.4606385571533025, "grad_norm": 0.3668537735939026, "learning_rate": 0.00011312256100070091, "loss": 1.048, "step": 945 }, { "epoch": 0.46112600536193027, "grad_norm": 0.3471434414386749, "learning_rate": 0.00011296995238344084, "loss": 1.1807, "step": 946 }, { "epoch": 0.4616134535705581, "grad_norm": 0.31438618898391724, "learning_rate": 0.00011281731303692575, "loss": 1.0701, "step": 947 }, { "epoch": 0.46210090177918595, "grad_norm": 0.35458657145500183, "learning_rate": 0.00011266464332279864, "loss": 1.0558, "step": 948 }, { "epoch": 0.4625883499878138, "grad_norm": 0.387273371219635, "learning_rate": 0.00011251194360277462, "loss": 1.21, "step": 949 }, { "epoch": 0.4630757981964416, "grad_norm": 0.3416357636451721, "learning_rate": 0.00011235921423863978, "loss": 1.0773, "step": 950 }, { "epoch": 0.46356324640506946, "grad_norm": 0.3603675663471222, "learning_rate": 0.00011220645559225042, "loss": 1.1588, "step": 951 }, { "epoch": 0.4640506946136973, "grad_norm": 0.39804136753082275, "learning_rate": 0.0001120536680255323, "loss": 1.0687, "step": 952 }, { "epoch": 0.46453814282232514, "grad_norm": 0.3613085448741913, "learning_rate": 0.00011190085190047968, "loss": 1.1643, "step": 953 }, { "epoch": 0.465025591030953, "grad_norm": 0.4029476046562195, "learning_rate": 0.00011174800757915444, "loss": 1.1701, "step": 954 }, { "epoch": 0.4655130392395808, "grad_norm": 0.41172125935554504, "learning_rate": 0.00011159513542368529, "loss": 1.1043, "step": 955 }, { "epoch": 0.4660004874482086, "grad_norm": 0.36017680168151855, "learning_rate": 0.00011144223579626689, "loss": 1.0258, "step": 956 }, { "epoch": 0.46648793565683644, "grad_norm": 0.39928194880485535, "learning_rate": 0.00011128930905915897, "loss": 1.1159, "step": 957 }, { "epoch": 0.4669753838654643, "grad_norm": 0.41062191128730774, "learning_rate": 0.00011113635557468555, "loss": 1.1225, "step": 958 }, { "epoch": 0.4674628320740921, "grad_norm": 0.36912107467651367, "learning_rate": 0.00011098337570523396, "loss": 1.0881, "step": 959 }, { "epoch": 0.46795028028271995, "grad_norm": 0.3492516279220581, "learning_rate": 0.00011083036981325403, "loss": 1.0598, "step": 960 }, { "epoch": 0.4684377284913478, "grad_norm": 0.4227747917175293, "learning_rate": 0.00011067733826125729, "loss": 1.2674, "step": 961 }, { "epoch": 0.46892517669997563, "grad_norm": 0.3656153678894043, "learning_rate": 0.00011052428141181604, "loss": 1.1262, "step": 962 }, { "epoch": 0.46941262490860347, "grad_norm": 0.3131183683872223, "learning_rate": 0.00011037119962756257, "loss": 1.0621, "step": 963 }, { "epoch": 0.4699000731172313, "grad_norm": 0.3493298590183258, "learning_rate": 0.00011021809327118817, "loss": 1.18, "step": 964 }, { "epoch": 0.47038752132585915, "grad_norm": 0.3524402379989624, "learning_rate": 0.00011006496270544235, "loss": 1.1393, "step": 965 }, { "epoch": 0.470874969534487, "grad_norm": 0.3299414813518524, "learning_rate": 0.00010991180829313208, "loss": 1.0965, "step": 966 }, { "epoch": 0.47136241774311477, "grad_norm": 0.3031936585903168, "learning_rate": 0.00010975863039712068, "loss": 1.0382, "step": 967 }, { "epoch": 0.4718498659517426, "grad_norm": 0.31835681200027466, "learning_rate": 0.00010960542938032729, "loss": 1.115, "step": 968 }, { "epoch": 0.47233731416037045, "grad_norm": 0.39866137504577637, "learning_rate": 0.00010945220560572562, "loss": 1.1216, "step": 969 }, { "epoch": 0.4728247623689983, "grad_norm": 0.3873158395290375, "learning_rate": 0.00010929895943634343, "loss": 1.0729, "step": 970 }, { "epoch": 0.4733122105776261, "grad_norm": 0.31303170323371887, "learning_rate": 0.00010914569123526157, "loss": 1.1368, "step": 971 }, { "epoch": 0.47379965878625396, "grad_norm": 0.38255077600479126, "learning_rate": 0.00010899240136561299, "loss": 1.1969, "step": 972 }, { "epoch": 0.4742871069948818, "grad_norm": 0.4179987609386444, "learning_rate": 0.00010883909019058203, "loss": 1.2155, "step": 973 }, { "epoch": 0.47477455520350964, "grad_norm": 0.3629905581474304, "learning_rate": 0.00010868575807340351, "loss": 1.1925, "step": 974 }, { "epoch": 0.4752620034121375, "grad_norm": 0.35936662554740906, "learning_rate": 0.00010853240537736184, "loss": 1.2603, "step": 975 }, { "epoch": 0.4757494516207653, "grad_norm": 0.38720080256462097, "learning_rate": 0.00010837903246579022, "loss": 1.2434, "step": 976 }, { "epoch": 0.4762368998293931, "grad_norm": 0.4208745062351227, "learning_rate": 0.00010822563970206973, "loss": 1.2084, "step": 977 }, { "epoch": 0.47672434803802094, "grad_norm": 0.38924330472946167, "learning_rate": 0.00010807222744962849, "loss": 0.9611, "step": 978 }, { "epoch": 0.4772117962466488, "grad_norm": 0.3220176100730896, "learning_rate": 0.00010791879607194078, "loss": 1.1464, "step": 979 }, { "epoch": 0.4776992444552766, "grad_norm": 0.36164960265159607, "learning_rate": 0.00010776534593252616, "loss": 1.0847, "step": 980 }, { "epoch": 0.47818669266390446, "grad_norm": 0.4573691785335541, "learning_rate": 0.0001076118773949488, "loss": 1.1267, "step": 981 }, { "epoch": 0.4786741408725323, "grad_norm": 0.39894211292266846, "learning_rate": 0.00010745839082281621, "loss": 1.2592, "step": 982 }, { "epoch": 0.47916158908116013, "grad_norm": 0.3292525112628937, "learning_rate": 0.00010730488657977884, "loss": 1.0486, "step": 983 }, { "epoch": 0.47964903728978797, "grad_norm": 0.4128897786140442, "learning_rate": 0.00010715136502952893, "loss": 1.0303, "step": 984 }, { "epoch": 0.4801364854984158, "grad_norm": 0.35075098276138306, "learning_rate": 0.00010699782653579973, "loss": 1.1236, "step": 985 }, { "epoch": 0.48062393370704365, "grad_norm": 0.33849889039993286, "learning_rate": 0.00010684427146236457, "loss": 1.2145, "step": 986 }, { "epoch": 0.48111138191567143, "grad_norm": 0.4169270992279053, "learning_rate": 0.00010669070017303618, "loss": 1.1762, "step": 987 }, { "epoch": 0.48159883012429927, "grad_norm": 0.29890525341033936, "learning_rate": 0.0001065371130316656, "loss": 1.0611, "step": 988 }, { "epoch": 0.4820862783329271, "grad_norm": 0.33520928025245667, "learning_rate": 0.00010638351040214156, "loss": 1.1776, "step": 989 }, { "epoch": 0.48257372654155495, "grad_norm": 0.38371822237968445, "learning_rate": 0.00010622989264838934, "loss": 1.1733, "step": 990 }, { "epoch": 0.4830611747501828, "grad_norm": 0.36155542731285095, "learning_rate": 0.00010607626013437009, "loss": 1.2099, "step": 991 }, { "epoch": 0.4835486229588106, "grad_norm": 0.390323668718338, "learning_rate": 0.00010592261322408004, "loss": 1.0072, "step": 992 }, { "epoch": 0.48403607116743846, "grad_norm": 0.34715718030929565, "learning_rate": 0.00010576895228154935, "loss": 1.1499, "step": 993 }, { "epoch": 0.4845235193760663, "grad_norm": 0.40919023752212524, "learning_rate": 0.00010561527767084165, "loss": 1.0154, "step": 994 }, { "epoch": 0.48501096758469414, "grad_norm": 0.3707391619682312, "learning_rate": 0.0001054615897560527, "loss": 1.1641, "step": 995 }, { "epoch": 0.485498415793322, "grad_norm": 0.394755095243454, "learning_rate": 0.00010530788890130995, "loss": 1.151, "step": 996 }, { "epoch": 0.4859858640019498, "grad_norm": 0.3748216927051544, "learning_rate": 0.00010515417547077149, "loss": 1.1462, "step": 997 }, { "epoch": 0.4864733122105776, "grad_norm": 0.3286537230014801, "learning_rate": 0.00010500044982862519, "loss": 1.1958, "step": 998 }, { "epoch": 0.48696076041920544, "grad_norm": 0.39391735196113586, "learning_rate": 0.00010484671233908779, "loss": 1.2084, "step": 999 }, { "epoch": 0.4874482086278333, "grad_norm": 0.31442493200302124, "learning_rate": 0.00010469296336640417, "loss": 1.0804, "step": 1000 }, { "epoch": 0.4879356568364611, "grad_norm": 0.39494597911834717, "learning_rate": 0.00010453920327484641, "loss": 1.0454, "step": 1001 }, { "epoch": 0.48842310504508896, "grad_norm": 0.3640933930873871, "learning_rate": 0.00010438543242871295, "loss": 1.1034, "step": 1002 }, { "epoch": 0.4889105532537168, "grad_norm": 0.4298470914363861, "learning_rate": 0.00010423165119232765, "loss": 1.1456, "step": 1003 }, { "epoch": 0.48939800146234463, "grad_norm": 0.36693236231803894, "learning_rate": 0.000104077859930039, "loss": 1.0827, "step": 1004 }, { "epoch": 0.4898854496709725, "grad_norm": 0.3547927141189575, "learning_rate": 0.0001039240590062193, "loss": 1.1993, "step": 1005 }, { "epoch": 0.4903728978796003, "grad_norm": 0.339211642742157, "learning_rate": 0.00010377024878526369, "loss": 1.1297, "step": 1006 }, { "epoch": 0.49086034608822815, "grad_norm": 0.35470589995384216, "learning_rate": 0.0001036164296315894, "loss": 1.1887, "step": 1007 }, { "epoch": 0.49134779429685593, "grad_norm": 0.3784750699996948, "learning_rate": 0.00010346260190963468, "loss": 1.129, "step": 1008 }, { "epoch": 0.49183524250548377, "grad_norm": 0.3729405999183655, "learning_rate": 0.00010330876598385826, "loss": 1.1308, "step": 1009 }, { "epoch": 0.4923226907141116, "grad_norm": 0.3662540316581726, "learning_rate": 0.00010315492221873819, "loss": 1.0715, "step": 1010 }, { "epoch": 0.49281013892273945, "grad_norm": 0.37958213686943054, "learning_rate": 0.00010300107097877114, "loss": 1.0676, "step": 1011 }, { "epoch": 0.4932975871313673, "grad_norm": 0.3876712918281555, "learning_rate": 0.00010284721262847146, "loss": 1.1149, "step": 1012 }, { "epoch": 0.4937850353399951, "grad_norm": 0.3441215455532074, "learning_rate": 0.00010269334753237038, "loss": 1.011, "step": 1013 }, { "epoch": 0.49427248354862297, "grad_norm": 0.30004268884658813, "learning_rate": 0.0001025394760550151, "loss": 1.1088, "step": 1014 }, { "epoch": 0.4947599317572508, "grad_norm": 0.3573639690876007, "learning_rate": 0.00010238559856096792, "loss": 1.2165, "step": 1015 }, { "epoch": 0.49524737996587864, "grad_norm": 0.3701191246509552, "learning_rate": 0.00010223171541480543, "loss": 1.2121, "step": 1016 }, { "epoch": 0.4957348281745065, "grad_norm": 0.3917965590953827, "learning_rate": 0.00010207782698111757, "loss": 1.1857, "step": 1017 }, { "epoch": 0.49622227638313426, "grad_norm": 0.3756738305091858, "learning_rate": 0.00010192393362450685, "loss": 1.1381, "step": 1018 }, { "epoch": 0.4967097245917621, "grad_norm": 0.35566744208335876, "learning_rate": 0.00010177003570958738, "loss": 1.1536, "step": 1019 }, { "epoch": 0.49719717280038994, "grad_norm": 0.3449627459049225, "learning_rate": 0.00010161613360098417, "loss": 1.2262, "step": 1020 }, { "epoch": 0.4976846210090178, "grad_norm": 0.3484485149383545, "learning_rate": 0.00010146222766333209, "loss": 1.1646, "step": 1021 }, { "epoch": 0.4981720692176456, "grad_norm": 0.36827757954597473, "learning_rate": 0.00010130831826127507, "loss": 1.0605, "step": 1022 }, { "epoch": 0.49865951742627346, "grad_norm": 0.3689895272254944, "learning_rate": 0.00010115440575946533, "loss": 1.0536, "step": 1023 }, { "epoch": 0.4991469656349013, "grad_norm": 0.3833921551704407, "learning_rate": 0.00010100049052256235, "loss": 1.0727, "step": 1024 }, { "epoch": 0.49963441384352913, "grad_norm": 0.37425750494003296, "learning_rate": 0.00010084657291523212, "loss": 1.1639, "step": 1025 }, { "epoch": 0.5001218620521569, "grad_norm": 0.38358545303344727, "learning_rate": 0.00010069265330214626, "loss": 1.2006, "step": 1026 }, { "epoch": 0.5006093102607848, "grad_norm": 0.37975242733955383, "learning_rate": 0.00010053873204798112, "loss": 1.2182, "step": 1027 }, { "epoch": 0.5010967584694126, "grad_norm": 0.33467257022857666, "learning_rate": 0.000100384809517417, "loss": 1.0435, "step": 1028 }, { "epoch": 0.5015842066780405, "grad_norm": 0.3885016441345215, "learning_rate": 0.00010023088607513711, "loss": 1.0711, "step": 1029 }, { "epoch": 0.5020716548866683, "grad_norm": 0.35991692543029785, "learning_rate": 0.00010007696208582688, "loss": 1.2578, "step": 1030 }, { "epoch": 0.5025591030952962, "grad_norm": 0.42300593852996826, "learning_rate": 9.992303791417313e-05, "loss": 1.094, "step": 1031 }, { "epoch": 0.503046551303924, "grad_norm": 0.3597909212112427, "learning_rate": 9.976911392486294e-05, "loss": 1.1153, "step": 1032 }, { "epoch": 0.5035339995125518, "grad_norm": 0.41715627908706665, "learning_rate": 9.961519048258304e-05, "loss": 1.2809, "step": 1033 }, { "epoch": 0.5040214477211796, "grad_norm": 0.4352867603302002, "learning_rate": 9.94612679520189e-05, "loss": 1.2658, "step": 1034 }, { "epoch": 0.5045088959298074, "grad_norm": 0.3663417398929596, "learning_rate": 9.930734669785378e-05, "loss": 1.0499, "step": 1035 }, { "epoch": 0.5049963441384353, "grad_norm": 0.3860229253768921, "learning_rate": 9.915342708476789e-05, "loss": 1.1598, "step": 1036 }, { "epoch": 0.5054837923470631, "grad_norm": 0.33618807792663574, "learning_rate": 9.899950947743767e-05, "loss": 1.07, "step": 1037 }, { "epoch": 0.505971240555691, "grad_norm": 0.37883251905441284, "learning_rate": 9.884559424053472e-05, "loss": 1.1359, "step": 1038 }, { "epoch": 0.5064586887643188, "grad_norm": 0.37886759638786316, "learning_rate": 9.869168173872493e-05, "loss": 1.0496, "step": 1039 }, { "epoch": 0.5069461369729467, "grad_norm": 0.37057310342788696, "learning_rate": 9.853777233666794e-05, "loss": 1.1659, "step": 1040 }, { "epoch": 0.5074335851815744, "grad_norm": 0.3882743716239929, "learning_rate": 9.838386639901584e-05, "loss": 0.9907, "step": 1041 }, { "epoch": 0.5079210333902023, "grad_norm": 0.3489919900894165, "learning_rate": 9.822996429041263e-05, "loss": 1.1121, "step": 1042 }, { "epoch": 0.5084084815988301, "grad_norm": 0.3696196675300598, "learning_rate": 9.807606637549318e-05, "loss": 1.0515, "step": 1043 }, { "epoch": 0.5088959298074579, "grad_norm": 0.4114032983779907, "learning_rate": 9.792217301888245e-05, "loss": 1.1051, "step": 1044 }, { "epoch": 0.5093833780160858, "grad_norm": 0.3358118236064911, "learning_rate": 9.77682845851946e-05, "loss": 1.0681, "step": 1045 }, { "epoch": 0.5098708262247136, "grad_norm": 0.412111759185791, "learning_rate": 9.761440143903209e-05, "loss": 1.2802, "step": 1046 }, { "epoch": 0.5103582744333415, "grad_norm": 0.3456852436065674, "learning_rate": 9.746052394498492e-05, "loss": 1.1589, "step": 1047 }, { "epoch": 0.5108457226419693, "grad_norm": 0.5172877907752991, "learning_rate": 9.730665246762964e-05, "loss": 1.151, "step": 1048 }, { "epoch": 0.5113331708505972, "grad_norm": 0.403706818819046, "learning_rate": 9.715278737152853e-05, "loss": 1.0278, "step": 1049 }, { "epoch": 0.5118206190592249, "grad_norm": 0.35117772221565247, "learning_rate": 9.699892902122886e-05, "loss": 1.1306, "step": 1050 }, { "epoch": 0.5123080672678528, "grad_norm": 0.31138819456100464, "learning_rate": 9.684507778126184e-05, "loss": 1.195, "step": 1051 }, { "epoch": 0.5127955154764806, "grad_norm": 0.3926421105861664, "learning_rate": 9.669123401614174e-05, "loss": 1.1606, "step": 1052 }, { "epoch": 0.5132829636851085, "grad_norm": 0.36821529269218445, "learning_rate": 9.653739809036533e-05, "loss": 1.0833, "step": 1053 }, { "epoch": 0.5137704118937363, "grad_norm": 0.43311622738838196, "learning_rate": 9.638357036841064e-05, "loss": 1.121, "step": 1054 }, { "epoch": 0.5142578601023641, "grad_norm": 0.35603591799736023, "learning_rate": 9.622975121473631e-05, "loss": 1.1267, "step": 1055 }, { "epoch": 0.514745308310992, "grad_norm": 0.3597733974456787, "learning_rate": 9.607594099378072e-05, "loss": 1.0459, "step": 1056 }, { "epoch": 0.5152327565196198, "grad_norm": 0.32538890838623047, "learning_rate": 9.592214006996104e-05, "loss": 1.1216, "step": 1057 }, { "epoch": 0.5157202047282476, "grad_norm": 0.38531047105789185, "learning_rate": 9.57683488076724e-05, "loss": 1.0783, "step": 1058 }, { "epoch": 0.5162076529368754, "grad_norm": 0.44185683131217957, "learning_rate": 9.561456757128707e-05, "loss": 1.2387, "step": 1059 }, { "epoch": 0.5166951011455033, "grad_norm": 0.32214614748954773, "learning_rate": 9.546079672515361e-05, "loss": 1.2315, "step": 1060 }, { "epoch": 0.5171825493541311, "grad_norm": 0.3528206944465637, "learning_rate": 9.530703663359587e-05, "loss": 1.0234, "step": 1061 }, { "epoch": 0.517669997562759, "grad_norm": 0.4029186964035034, "learning_rate": 9.515328766091222e-05, "loss": 1.011, "step": 1062 }, { "epoch": 0.5181574457713868, "grad_norm": 0.41676968336105347, "learning_rate": 9.499955017137484e-05, "loss": 1.1195, "step": 1063 }, { "epoch": 0.5186448939800147, "grad_norm": 0.4211922287940979, "learning_rate": 9.484582452922855e-05, "loss": 1.1499, "step": 1064 }, { "epoch": 0.5191323421886425, "grad_norm": 0.3917767107486725, "learning_rate": 9.469211109869006e-05, "loss": 1.0296, "step": 1065 }, { "epoch": 0.5196197903972702, "grad_norm": 0.4115936756134033, "learning_rate": 9.453841024394733e-05, "loss": 1.2161, "step": 1066 }, { "epoch": 0.5201072386058981, "grad_norm": 0.3712995946407318, "learning_rate": 9.43847223291584e-05, "loss": 1.1072, "step": 1067 }, { "epoch": 0.5205946868145259, "grad_norm": 0.40360167622566223, "learning_rate": 9.423104771845064e-05, "loss": 1.1405, "step": 1068 }, { "epoch": 0.5210821350231538, "grad_norm": 0.38009801506996155, "learning_rate": 9.407738677592e-05, "loss": 1.1727, "step": 1069 }, { "epoch": 0.5215695832317816, "grad_norm": 0.40101245045661926, "learning_rate": 9.392373986562993e-05, "loss": 0.9816, "step": 1070 }, { "epoch": 0.5220570314404095, "grad_norm": 0.40247631072998047, "learning_rate": 9.377010735161069e-05, "loss": 1.018, "step": 1071 }, { "epoch": 0.5225444796490373, "grad_norm": 0.38892799615859985, "learning_rate": 9.361648959785846e-05, "loss": 1.1797, "step": 1072 }, { "epoch": 0.5230319278576652, "grad_norm": 0.38088804483413696, "learning_rate": 9.346288696833441e-05, "loss": 1.2571, "step": 1073 }, { "epoch": 0.523519376066293, "grad_norm": 0.4315434396266937, "learning_rate": 9.330929982696386e-05, "loss": 1.0405, "step": 1074 }, { "epoch": 0.5240068242749207, "grad_norm": 0.3794160783290863, "learning_rate": 9.315572853763544e-05, "loss": 1.1522, "step": 1075 }, { "epoch": 0.5244942724835486, "grad_norm": 0.3297971189022064, "learning_rate": 9.30021734642003e-05, "loss": 1.2665, "step": 1076 }, { "epoch": 0.5249817206921764, "grad_norm": 0.3364350199699402, "learning_rate": 9.28486349704711e-05, "loss": 1.067, "step": 1077 }, { "epoch": 0.5254691689008043, "grad_norm": 0.3259110748767853, "learning_rate": 9.269511342022115e-05, "loss": 1.1788, "step": 1078 }, { "epoch": 0.5259566171094321, "grad_norm": 0.4151977300643921, "learning_rate": 9.25416091771838e-05, "loss": 1.0303, "step": 1079 }, { "epoch": 0.52644406531806, "grad_norm": 0.29196158051490784, "learning_rate": 9.238812260505124e-05, "loss": 1.0754, "step": 1080 }, { "epoch": 0.5269315135266878, "grad_norm": 0.4657594859600067, "learning_rate": 9.223465406747383e-05, "loss": 1.1716, "step": 1081 }, { "epoch": 0.5274189617353157, "grad_norm": 0.35524073243141174, "learning_rate": 9.208120392805926e-05, "loss": 0.9974, "step": 1082 }, { "epoch": 0.5279064099439434, "grad_norm": 0.40014946460723877, "learning_rate": 9.192777255037155e-05, "loss": 1.1262, "step": 1083 }, { "epoch": 0.5283938581525713, "grad_norm": 0.4188205301761627, "learning_rate": 9.177436029793025e-05, "loss": 1.1687, "step": 1084 }, { "epoch": 0.5288813063611991, "grad_norm": 0.3595307767391205, "learning_rate": 9.16209675342098e-05, "loss": 1.0407, "step": 1085 }, { "epoch": 0.5293687545698269, "grad_norm": 0.37308305501937866, "learning_rate": 9.146759462263818e-05, "loss": 1.0952, "step": 1086 }, { "epoch": 0.5298562027784548, "grad_norm": 0.3582593500614166, "learning_rate": 9.131424192659653e-05, "loss": 1.1283, "step": 1087 }, { "epoch": 0.5303436509870826, "grad_norm": 0.37929674983024597, "learning_rate": 9.116090980941796e-05, "loss": 1.1726, "step": 1088 }, { "epoch": 0.5308310991957105, "grad_norm": 0.35771459341049194, "learning_rate": 9.100759863438702e-05, "loss": 1.1852, "step": 1089 }, { "epoch": 0.5313185474043383, "grad_norm": 0.39543354511260986, "learning_rate": 9.085430876473845e-05, "loss": 1.1532, "step": 1090 }, { "epoch": 0.5318059956129662, "grad_norm": 0.34235867857933044, "learning_rate": 9.070104056365657e-05, "loss": 1.133, "step": 1091 }, { "epoch": 0.5322934438215939, "grad_norm": 0.3834592401981354, "learning_rate": 9.054779439427441e-05, "loss": 1.2228, "step": 1092 }, { "epoch": 0.5327808920302218, "grad_norm": 0.34508809447288513, "learning_rate": 9.039457061967276e-05, "loss": 1.2762, "step": 1093 }, { "epoch": 0.5332683402388496, "grad_norm": 0.39939218759536743, "learning_rate": 9.024136960287931e-05, "loss": 1.0644, "step": 1094 }, { "epoch": 0.5337557884474775, "grad_norm": 0.4075930118560791, "learning_rate": 9.008819170686796e-05, "loss": 1.132, "step": 1095 }, { "epoch": 0.5342432366561053, "grad_norm": 0.3297237455844879, "learning_rate": 8.993503729455767e-05, "loss": 1.1041, "step": 1096 }, { "epoch": 0.5347306848647331, "grad_norm": 0.3641759753227234, "learning_rate": 8.978190672881184e-05, "loss": 1.0265, "step": 1097 }, { "epoch": 0.535218133073361, "grad_norm": 0.40472713112831116, "learning_rate": 8.962880037243746e-05, "loss": 1.1839, "step": 1098 }, { "epoch": 0.5357055812819888, "grad_norm": 0.36499154567718506, "learning_rate": 8.947571858818397e-05, "loss": 1.2158, "step": 1099 }, { "epoch": 0.5361930294906166, "grad_norm": 0.35857489705085754, "learning_rate": 8.932266173874276e-05, "loss": 1.0693, "step": 1100 }, { "epoch": 0.5366804776992444, "grad_norm": 0.3424220681190491, "learning_rate": 8.9169630186746e-05, "loss": 1.0986, "step": 1101 }, { "epoch": 0.5371679259078723, "grad_norm": 0.35947033762931824, "learning_rate": 8.901662429476607e-05, "loss": 1.143, "step": 1102 }, { "epoch": 0.5376553741165001, "grad_norm": 0.3797532021999359, "learning_rate": 8.886364442531447e-05, "loss": 0.9482, "step": 1103 }, { "epoch": 0.538142822325128, "grad_norm": 0.3743857741355896, "learning_rate": 8.871069094084102e-05, "loss": 1.0776, "step": 1104 }, { "epoch": 0.5386302705337558, "grad_norm": 0.34824487566947937, "learning_rate": 8.855776420373313e-05, "loss": 1.0339, "step": 1105 }, { "epoch": 0.5391177187423836, "grad_norm": 0.36332109570503235, "learning_rate": 8.840486457631475e-05, "loss": 1.1866, "step": 1106 }, { "epoch": 0.5396051669510115, "grad_norm": 0.30786654353141785, "learning_rate": 8.825199242084558e-05, "loss": 1.0333, "step": 1107 }, { "epoch": 0.5400926151596392, "grad_norm": 0.33310338854789734, "learning_rate": 8.809914809952033e-05, "loss": 1.051, "step": 1108 }, { "epoch": 0.5405800633682671, "grad_norm": 0.3896653652191162, "learning_rate": 8.79463319744677e-05, "loss": 1.1292, "step": 1109 }, { "epoch": 0.5410675115768949, "grad_norm": 0.35318389534950256, "learning_rate": 8.779354440774957e-05, "loss": 1.1307, "step": 1110 }, { "epoch": 0.5415549597855228, "grad_norm": 0.37618759274482727, "learning_rate": 8.764078576136026e-05, "loss": 1.0323, "step": 1111 }, { "epoch": 0.5420424079941506, "grad_norm": 0.36355626583099365, "learning_rate": 8.74880563972254e-05, "loss": 1.0024, "step": 1112 }, { "epoch": 0.5425298562027785, "grad_norm": 0.37075328826904297, "learning_rate": 8.733535667720138e-05, "loss": 1.1014, "step": 1113 }, { "epoch": 0.5430173044114063, "grad_norm": 0.38809290528297424, "learning_rate": 8.718268696307428e-05, "loss": 0.9998, "step": 1114 }, { "epoch": 0.5435047526200342, "grad_norm": 0.3356851041316986, "learning_rate": 8.703004761655917e-05, "loss": 1.1497, "step": 1115 }, { "epoch": 0.543992200828662, "grad_norm": 0.3875788450241089, "learning_rate": 8.687743899929913e-05, "loss": 1.3084, "step": 1116 }, { "epoch": 0.5444796490372897, "grad_norm": 0.30841848254203796, "learning_rate": 8.672486147286432e-05, "loss": 1.0747, "step": 1117 }, { "epoch": 0.5449670972459176, "grad_norm": 0.3297688066959381, "learning_rate": 8.657231539875148e-05, "loss": 1.212, "step": 1118 }, { "epoch": 0.5454545454545454, "grad_norm": 0.3741207420825958, "learning_rate": 8.64198011383826e-05, "loss": 1.154, "step": 1119 }, { "epoch": 0.5459419936631733, "grad_norm": 0.35746538639068604, "learning_rate": 8.626731905310442e-05, "loss": 1.2548, "step": 1120 }, { "epoch": 0.5464294418718011, "grad_norm": 0.3836494982242584, "learning_rate": 8.611486950418741e-05, "loss": 1.0074, "step": 1121 }, { "epoch": 0.546916890080429, "grad_norm": 0.3392050564289093, "learning_rate": 8.596245285282498e-05, "loss": 1.0208, "step": 1122 }, { "epoch": 0.5474043382890568, "grad_norm": 0.3157902657985687, "learning_rate": 8.581006946013252e-05, "loss": 1.0768, "step": 1123 }, { "epoch": 0.5478917864976847, "grad_norm": 0.3608684539794922, "learning_rate": 8.565771968714675e-05, "loss": 1.0902, "step": 1124 }, { "epoch": 0.5483792347063124, "grad_norm": 0.3302469253540039, "learning_rate": 8.550540389482466e-05, "loss": 1.1155, "step": 1125 }, { "epoch": 0.5488666829149403, "grad_norm": 0.35549214482307434, "learning_rate": 8.535312244404269e-05, "loss": 1.1235, "step": 1126 }, { "epoch": 0.5493541311235681, "grad_norm": 0.37752172350883484, "learning_rate": 8.520087569559592e-05, "loss": 1.0419, "step": 1127 }, { "epoch": 0.5498415793321959, "grad_norm": 0.46155112981796265, "learning_rate": 8.504866401019737e-05, "loss": 1.2448, "step": 1128 }, { "epoch": 0.5503290275408238, "grad_norm": 0.3703531324863434, "learning_rate": 8.489648774847683e-05, "loss": 1.1997, "step": 1129 }, { "epoch": 0.5508164757494516, "grad_norm": 0.38380300998687744, "learning_rate": 8.474434727098013e-05, "loss": 1.1803, "step": 1130 }, { "epoch": 0.5513039239580795, "grad_norm": 0.42736393213272095, "learning_rate": 8.459224293816846e-05, "loss": 1.2428, "step": 1131 }, { "epoch": 0.5517913721667073, "grad_norm": 0.40469399094581604, "learning_rate": 8.44401751104173e-05, "loss": 1.1447, "step": 1132 }, { "epoch": 0.5522788203753352, "grad_norm": 0.4133606255054474, "learning_rate": 8.428814414801558e-05, "loss": 1.0887, "step": 1133 }, { "epoch": 0.5527662685839629, "grad_norm": 0.35687559843063354, "learning_rate": 8.413615041116502e-05, "loss": 1.0438, "step": 1134 }, { "epoch": 0.5532537167925908, "grad_norm": 0.3688930571079254, "learning_rate": 8.398419425997903e-05, "loss": 1.1282, "step": 1135 }, { "epoch": 0.5537411650012186, "grad_norm": 0.29839542508125305, "learning_rate": 8.383227605448199e-05, "loss": 1.0962, "step": 1136 }, { "epoch": 0.5542286132098464, "grad_norm": 0.36062946915626526, "learning_rate": 8.368039615460844e-05, "loss": 1.1615, "step": 1137 }, { "epoch": 0.5547160614184743, "grad_norm": 0.37687721848487854, "learning_rate": 8.352855492020215e-05, "loss": 1.1831, "step": 1138 }, { "epoch": 0.5552035096271021, "grad_norm": 0.3692804276943207, "learning_rate": 8.337675271101518e-05, "loss": 1.1095, "step": 1139 }, { "epoch": 0.55569095783573, "grad_norm": 0.35587865114212036, "learning_rate": 8.322498988670718e-05, "loss": 1.009, "step": 1140 }, { "epoch": 0.5561784060443578, "grad_norm": 0.373296856880188, "learning_rate": 8.307326680684461e-05, "loss": 1.091, "step": 1141 }, { "epoch": 0.5566658542529856, "grad_norm": 0.42510151863098145, "learning_rate": 8.29215838308996e-05, "loss": 1.1292, "step": 1142 }, { "epoch": 0.5571533024616134, "grad_norm": 0.3493712544441223, "learning_rate": 8.27699413182493e-05, "loss": 1.0554, "step": 1143 }, { "epoch": 0.5576407506702413, "grad_norm": 0.39959388971328735, "learning_rate": 8.261833962817509e-05, "loss": 1.0718, "step": 1144 }, { "epoch": 0.5581281988788691, "grad_norm": 0.3636349141597748, "learning_rate": 8.246677911986152e-05, "loss": 1.1488, "step": 1145 }, { "epoch": 0.558615647087497, "grad_norm": 0.3723439574241638, "learning_rate": 8.231526015239557e-05, "loss": 1.0863, "step": 1146 }, { "epoch": 0.5591030952961248, "grad_norm": 0.3718213737010956, "learning_rate": 8.216378308476589e-05, "loss": 1.0069, "step": 1147 }, { "epoch": 0.5595905435047526, "grad_norm": 0.39794471859931946, "learning_rate": 8.201234827586178e-05, "loss": 1.2078, "step": 1148 }, { "epoch": 0.5600779917133805, "grad_norm": 0.34235015511512756, "learning_rate": 8.186095608447242e-05, "loss": 0.9326, "step": 1149 }, { "epoch": 0.5605654399220082, "grad_norm": 0.47578248381614685, "learning_rate": 8.170960686928609e-05, "loss": 1.259, "step": 1150 }, { "epoch": 0.5610528881306361, "grad_norm": 0.40527671575546265, "learning_rate": 8.155830098888922e-05, "loss": 0.9756, "step": 1151 }, { "epoch": 0.5615403363392639, "grad_norm": 0.3799116313457489, "learning_rate": 8.140703880176542e-05, "loss": 1.0295, "step": 1152 }, { "epoch": 0.5620277845478918, "grad_norm": 0.40373730659484863, "learning_rate": 8.125582066629502e-05, "loss": 1.0046, "step": 1153 }, { "epoch": 0.5625152327565196, "grad_norm": 0.32776764035224915, "learning_rate": 8.110464694075383e-05, "loss": 1.1579, "step": 1154 }, { "epoch": 0.5630026809651475, "grad_norm": 0.35236409306526184, "learning_rate": 8.09535179833125e-05, "loss": 1.0318, "step": 1155 }, { "epoch": 0.5634901291737753, "grad_norm": 0.3265283703804016, "learning_rate": 8.080243415203552e-05, "loss": 1.1214, "step": 1156 }, { "epoch": 0.5639775773824032, "grad_norm": 0.3723972737789154, "learning_rate": 8.065139580488061e-05, "loss": 1.0587, "step": 1157 }, { "epoch": 0.564465025591031, "grad_norm": 0.3946072459220886, "learning_rate": 8.050040329969761e-05, "loss": 0.9452, "step": 1158 }, { "epoch": 0.5649524737996587, "grad_norm": 0.3760214149951935, "learning_rate": 8.034945699422778e-05, "loss": 0.9506, "step": 1159 }, { "epoch": 0.5654399220082866, "grad_norm": 0.34320196509361267, "learning_rate": 8.019855724610296e-05, "loss": 1.1211, "step": 1160 }, { "epoch": 0.5659273702169144, "grad_norm": 0.3584868311882019, "learning_rate": 8.004770441284462e-05, "loss": 1.1019, "step": 1161 }, { "epoch": 0.5664148184255423, "grad_norm": 0.3501480519771576, "learning_rate": 7.98968988518631e-05, "loss": 1.0358, "step": 1162 }, { "epoch": 0.5669022666341701, "grad_norm": 0.3758191466331482, "learning_rate": 7.974614092045679e-05, "loss": 1.0262, "step": 1163 }, { "epoch": 0.567389714842798, "grad_norm": 0.34351179003715515, "learning_rate": 7.95954309758112e-05, "loss": 1.0973, "step": 1164 }, { "epoch": 0.5678771630514258, "grad_norm": 0.3510177433490753, "learning_rate": 7.944476937499803e-05, "loss": 1.2233, "step": 1165 }, { "epoch": 0.5683646112600537, "grad_norm": 0.4014304578304291, "learning_rate": 7.929415647497466e-05, "loss": 1.0638, "step": 1166 }, { "epoch": 0.5688520594686814, "grad_norm": 0.3942334055900574, "learning_rate": 7.914359263258295e-05, "loss": 1.0488, "step": 1167 }, { "epoch": 0.5693395076773092, "grad_norm": 0.3765864968299866, "learning_rate": 7.899307820454852e-05, "loss": 1.1342, "step": 1168 }, { "epoch": 0.5698269558859371, "grad_norm": 0.4033709764480591, "learning_rate": 7.884261354747994e-05, "loss": 1.1466, "step": 1169 }, { "epoch": 0.5703144040945649, "grad_norm": 0.41917553544044495, "learning_rate": 7.869219901786791e-05, "loss": 1.3071, "step": 1170 }, { "epoch": 0.5708018523031928, "grad_norm": 0.36985212564468384, "learning_rate": 7.854183497208428e-05, "loss": 1.011, "step": 1171 }, { "epoch": 0.5712893005118206, "grad_norm": 0.39377880096435547, "learning_rate": 7.839152176638134e-05, "loss": 1.1878, "step": 1172 }, { "epoch": 0.5717767487204485, "grad_norm": 0.3966328799724579, "learning_rate": 7.824125975689092e-05, "loss": 1.1757, "step": 1173 }, { "epoch": 0.5722641969290763, "grad_norm": 0.41750824451446533, "learning_rate": 7.809104929962357e-05, "loss": 1.0878, "step": 1174 }, { "epoch": 0.5727516451377042, "grad_norm": 0.44299137592315674, "learning_rate": 7.79408907504676e-05, "loss": 1.0769, "step": 1175 }, { "epoch": 0.5732390933463319, "grad_norm": 0.37647318840026855, "learning_rate": 7.779078446518853e-05, "loss": 1.074, "step": 1176 }, { "epoch": 0.5737265415549598, "grad_norm": 0.34447044134140015, "learning_rate": 7.764073079942786e-05, "loss": 1.067, "step": 1177 }, { "epoch": 0.5742139897635876, "grad_norm": 0.3282855749130249, "learning_rate": 7.749073010870252e-05, "loss": 1.1691, "step": 1178 }, { "epoch": 0.5747014379722154, "grad_norm": 0.38534215092658997, "learning_rate": 7.734078274840391e-05, "loss": 1.0661, "step": 1179 }, { "epoch": 0.5751888861808433, "grad_norm": 0.35340821743011475, "learning_rate": 7.719088907379706e-05, "loss": 1.2258, "step": 1180 }, { "epoch": 0.5756763343894711, "grad_norm": 0.353468120098114, "learning_rate": 7.704104944001982e-05, "loss": 1.0874, "step": 1181 }, { "epoch": 0.576163782598099, "grad_norm": 0.3499086797237396, "learning_rate": 7.6891264202082e-05, "loss": 1.066, "step": 1182 }, { "epoch": 0.5766512308067268, "grad_norm": 0.3244962990283966, "learning_rate": 7.674153371486453e-05, "loss": 1.1346, "step": 1183 }, { "epoch": 0.5771386790153546, "grad_norm": 0.4745854139328003, "learning_rate": 7.659185833311864e-05, "loss": 1.1834, "step": 1184 }, { "epoch": 0.5776261272239824, "grad_norm": 0.3582706153392792, "learning_rate": 7.644223841146492e-05, "loss": 1.1274, "step": 1185 }, { "epoch": 0.5781135754326103, "grad_norm": 0.3575795590877533, "learning_rate": 7.629267430439273e-05, "loss": 0.9748, "step": 1186 }, { "epoch": 0.5786010236412381, "grad_norm": 0.34705471992492676, "learning_rate": 7.614316636625899e-05, "loss": 0.9847, "step": 1187 }, { "epoch": 0.579088471849866, "grad_norm": 0.35855981707572937, "learning_rate": 7.599371495128763e-05, "loss": 1.2269, "step": 1188 }, { "epoch": 0.5795759200584938, "grad_norm": 0.3544778525829315, "learning_rate": 7.584432041356875e-05, "loss": 1.1568, "step": 1189 }, { "epoch": 0.5800633682671216, "grad_norm": 0.37013569474220276, "learning_rate": 7.569498310705756e-05, "loss": 1.0729, "step": 1190 }, { "epoch": 0.5805508164757495, "grad_norm": 0.34105226397514343, "learning_rate": 7.554570338557371e-05, "loss": 1.0768, "step": 1191 }, { "epoch": 0.5810382646843772, "grad_norm": 0.329398512840271, "learning_rate": 7.539648160280045e-05, "loss": 0.9802, "step": 1192 }, { "epoch": 0.5815257128930051, "grad_norm": 0.3747677803039551, "learning_rate": 7.524731811228374e-05, "loss": 1.1414, "step": 1193 }, { "epoch": 0.5820131611016329, "grad_norm": 0.4513600170612335, "learning_rate": 7.50982132674314e-05, "loss": 1.318, "step": 1194 }, { "epoch": 0.5825006093102608, "grad_norm": 0.3539467453956604, "learning_rate": 7.494916742151234e-05, "loss": 1.1125, "step": 1195 }, { "epoch": 0.5829880575188886, "grad_norm": 0.4409025311470032, "learning_rate": 7.48001809276557e-05, "loss": 1.0658, "step": 1196 }, { "epoch": 0.5834755057275165, "grad_norm": 0.46018657088279724, "learning_rate": 7.465125413884995e-05, "loss": 1.1515, "step": 1197 }, { "epoch": 0.5839629539361443, "grad_norm": 0.39541178941726685, "learning_rate": 7.450238740794212e-05, "loss": 1.0762, "step": 1198 }, { "epoch": 0.5844504021447721, "grad_norm": 0.3461151123046875, "learning_rate": 7.435358108763698e-05, "loss": 1.158, "step": 1199 }, { "epoch": 0.5849378503534, "grad_norm": 0.36114776134490967, "learning_rate": 7.420483553049613e-05, "loss": 1.0179, "step": 1200 }, { "epoch": 0.5854252985620277, "grad_norm": 0.4009512960910797, "learning_rate": 7.40561510889372e-05, "loss": 1.1028, "step": 1201 }, { "epoch": 0.5859127467706556, "grad_norm": 0.36980295181274414, "learning_rate": 7.39075281152331e-05, "loss": 1.1139, "step": 1202 }, { "epoch": 0.5864001949792834, "grad_norm": 0.38044968247413635, "learning_rate": 7.3758966961511e-05, "loss": 1.1793, "step": 1203 }, { "epoch": 0.5868876431879113, "grad_norm": 0.3833613395690918, "learning_rate": 7.361046797975167e-05, "loss": 1.1187, "step": 1204 }, { "epoch": 0.5873750913965391, "grad_norm": 0.33397236466407776, "learning_rate": 7.346203152178855e-05, "loss": 0.9398, "step": 1205 }, { "epoch": 0.587862539605167, "grad_norm": 0.4107745587825775, "learning_rate": 7.331365793930698e-05, "loss": 1.0091, "step": 1206 }, { "epoch": 0.5883499878137948, "grad_norm": 0.3655603229999542, "learning_rate": 7.316534758384328e-05, "loss": 1.1232, "step": 1207 }, { "epoch": 0.5888374360224227, "grad_norm": 0.33455514907836914, "learning_rate": 7.301710080678398e-05, "loss": 1.0878, "step": 1208 }, { "epoch": 0.5893248842310504, "grad_norm": 0.4005405306816101, "learning_rate": 7.286891795936502e-05, "loss": 1.2077, "step": 1209 }, { "epoch": 0.5898123324396782, "grad_norm": 0.37796056270599365, "learning_rate": 7.272079939267084e-05, "loss": 1.2196, "step": 1210 }, { "epoch": 0.5902997806483061, "grad_norm": 0.4098646342754364, "learning_rate": 7.257274545763355e-05, "loss": 1.0526, "step": 1211 }, { "epoch": 0.5907872288569339, "grad_norm": 0.4132624864578247, "learning_rate": 7.242475650503223e-05, "loss": 1.176, "step": 1212 }, { "epoch": 0.5912746770655618, "grad_norm": 0.35944047570228577, "learning_rate": 7.227683288549187e-05, "loss": 1.2631, "step": 1213 }, { "epoch": 0.5917621252741896, "grad_norm": 0.3995838463306427, "learning_rate": 7.212897494948274e-05, "loss": 1.0708, "step": 1214 }, { "epoch": 0.5922495734828175, "grad_norm": 0.3299524784088135, "learning_rate": 7.198118304731953e-05, "loss": 1.0689, "step": 1215 }, { "epoch": 0.5927370216914453, "grad_norm": 0.32375314831733704, "learning_rate": 7.183345752916042e-05, "loss": 1.0989, "step": 1216 }, { "epoch": 0.5932244699000732, "grad_norm": 0.3651908338069916, "learning_rate": 7.168579874500627e-05, "loss": 1.0571, "step": 1217 }, { "epoch": 0.5937119181087009, "grad_norm": 0.4099787473678589, "learning_rate": 7.153820704469993e-05, "loss": 1.1053, "step": 1218 }, { "epoch": 0.5941993663173288, "grad_norm": 0.34599772095680237, "learning_rate": 7.139068277792523e-05, "loss": 1.1592, "step": 1219 }, { "epoch": 0.5946868145259566, "grad_norm": 0.4628382921218872, "learning_rate": 7.124322629420628e-05, "loss": 1.1632, "step": 1220 }, { "epoch": 0.5951742627345844, "grad_norm": 0.3830983638763428, "learning_rate": 7.109583794290655e-05, "loss": 1.1266, "step": 1221 }, { "epoch": 0.5956617109432123, "grad_norm": 0.3308325707912445, "learning_rate": 7.094851807322813e-05, "loss": 1.0726, "step": 1222 }, { "epoch": 0.5961491591518401, "grad_norm": 0.3864315450191498, "learning_rate": 7.080126703421087e-05, "loss": 1.1274, "step": 1223 }, { "epoch": 0.596636607360468, "grad_norm": 0.3869364559650421, "learning_rate": 7.06540851747314e-05, "loss": 1.1278, "step": 1224 }, { "epoch": 0.5971240555690958, "grad_norm": 0.3456849157810211, "learning_rate": 7.050697284350271e-05, "loss": 1.0372, "step": 1225 }, { "epoch": 0.5976115037777237, "grad_norm": 0.34329748153686523, "learning_rate": 7.035993038907281e-05, "loss": 0.9344, "step": 1226 }, { "epoch": 0.5980989519863514, "grad_norm": 0.35609936714172363, "learning_rate": 7.021295815982424e-05, "loss": 1.1151, "step": 1227 }, { "epoch": 0.5985864001949793, "grad_norm": 0.38585638999938965, "learning_rate": 7.006605650397323e-05, "loss": 0.9919, "step": 1228 }, { "epoch": 0.5990738484036071, "grad_norm": 0.38502153754234314, "learning_rate": 6.991922576956872e-05, "loss": 1.15, "step": 1229 }, { "epoch": 0.5995612966122349, "grad_norm": 0.44235166907310486, "learning_rate": 6.977246630449161e-05, "loss": 1.1095, "step": 1230 }, { "epoch": 0.6000487448208628, "grad_norm": 0.36498481035232544, "learning_rate": 6.9625778456454e-05, "loss": 1.0007, "step": 1231 }, { "epoch": 0.6005361930294906, "grad_norm": 0.37926092743873596, "learning_rate": 6.94791625729983e-05, "loss": 1.1216, "step": 1232 }, { "epoch": 0.6010236412381185, "grad_norm": 0.3852653503417969, "learning_rate": 6.933261900149633e-05, "loss": 0.904, "step": 1233 }, { "epoch": 0.6015110894467462, "grad_norm": 0.43301111459732056, "learning_rate": 6.918614808914874e-05, "loss": 1.1529, "step": 1234 }, { "epoch": 0.6019985376553741, "grad_norm": 0.385044127702713, "learning_rate": 6.90397501829839e-05, "loss": 1.2066, "step": 1235 }, { "epoch": 0.6024859858640019, "grad_norm": 0.33385398983955383, "learning_rate": 6.889342562985725e-05, "loss": 1.1449, "step": 1236 }, { "epoch": 0.6029734340726298, "grad_norm": 0.377290278673172, "learning_rate": 6.874717477645043e-05, "loss": 1.2258, "step": 1237 }, { "epoch": 0.6034608822812576, "grad_norm": 0.36140093207359314, "learning_rate": 6.860099796927055e-05, "loss": 1.1105, "step": 1238 }, { "epoch": 0.6039483304898855, "grad_norm": 0.4266805946826935, "learning_rate": 6.845489555464915e-05, "loss": 1.1524, "step": 1239 }, { "epoch": 0.6044357786985133, "grad_norm": 0.4107736647129059, "learning_rate": 6.830886787874154e-05, "loss": 1.3052, "step": 1240 }, { "epoch": 0.6049232269071411, "grad_norm": 0.3631647527217865, "learning_rate": 6.816291528752606e-05, "loss": 0.9635, "step": 1241 }, { "epoch": 0.605410675115769, "grad_norm": 0.34765157103538513, "learning_rate": 6.801703812680309e-05, "loss": 1.0847, "step": 1242 }, { "epoch": 0.6058981233243967, "grad_norm": 0.35418158769607544, "learning_rate": 6.787123674219422e-05, "loss": 1.1145, "step": 1243 }, { "epoch": 0.6063855715330246, "grad_norm": 0.42017999291419983, "learning_rate": 6.772551147914165e-05, "loss": 0.9823, "step": 1244 }, { "epoch": 0.6068730197416524, "grad_norm": 0.38183996081352234, "learning_rate": 6.757986268290712e-05, "loss": 1.1948, "step": 1245 }, { "epoch": 0.6073604679502803, "grad_norm": 0.3763807713985443, "learning_rate": 6.743429069857123e-05, "loss": 1.1282, "step": 1246 }, { "epoch": 0.6078479161589081, "grad_norm": 0.4228421151638031, "learning_rate": 6.728879587103263e-05, "loss": 1.0836, "step": 1247 }, { "epoch": 0.608335364367536, "grad_norm": 0.3279537558555603, "learning_rate": 6.71433785450071e-05, "loss": 1.1569, "step": 1248 }, { "epoch": 0.6088228125761638, "grad_norm": 0.34171488881111145, "learning_rate": 6.699803906502682e-05, "loss": 1.1143, "step": 1249 }, { "epoch": 0.6093102607847917, "grad_norm": 0.40301311016082764, "learning_rate": 6.685277777543953e-05, "loss": 1.1053, "step": 1250 }, { "epoch": 0.6097977089934195, "grad_norm": 0.3519479036331177, "learning_rate": 6.670759502040782e-05, "loss": 1.057, "step": 1251 }, { "epoch": 0.6102851572020472, "grad_norm": 0.4781578779220581, "learning_rate": 6.656249114390803e-05, "loss": 1.2084, "step": 1252 }, { "epoch": 0.6107726054106751, "grad_norm": 0.43604883551597595, "learning_rate": 6.64174664897297e-05, "loss": 1.1374, "step": 1253 }, { "epoch": 0.6112600536193029, "grad_norm": 0.34350964426994324, "learning_rate": 6.627252140147474e-05, "loss": 1.1858, "step": 1254 }, { "epoch": 0.6117475018279308, "grad_norm": 0.37544548511505127, "learning_rate": 6.612765622255645e-05, "loss": 1.224, "step": 1255 }, { "epoch": 0.6122349500365586, "grad_norm": 0.40213069319725037, "learning_rate": 6.598287129619882e-05, "loss": 0.9491, "step": 1256 }, { "epoch": 0.6127223982451865, "grad_norm": 0.4172384738922119, "learning_rate": 6.583816696543576e-05, "loss": 1.1478, "step": 1257 }, { "epoch": 0.6132098464538143, "grad_norm": 0.3520037531852722, "learning_rate": 6.569354357311014e-05, "loss": 1.0307, "step": 1258 }, { "epoch": 0.6136972946624422, "grad_norm": 0.4001389741897583, "learning_rate": 6.554900146187312e-05, "loss": 1.1889, "step": 1259 }, { "epoch": 0.6141847428710699, "grad_norm": 0.39461663365364075, "learning_rate": 6.540454097418331e-05, "loss": 1.0742, "step": 1260 }, { "epoch": 0.6146721910796977, "grad_norm": 0.40418586134910583, "learning_rate": 6.526016245230589e-05, "loss": 1.1277, "step": 1261 }, { "epoch": 0.6151596392883256, "grad_norm": 0.35330745577812195, "learning_rate": 6.511586623831181e-05, "loss": 0.9615, "step": 1262 }, { "epoch": 0.6156470874969534, "grad_norm": 0.3787367641925812, "learning_rate": 6.497165267407703e-05, "loss": 1.1368, "step": 1263 }, { "epoch": 0.6161345357055813, "grad_norm": 0.3234662413597107, "learning_rate": 6.48275221012818e-05, "loss": 1.1478, "step": 1264 }, { "epoch": 0.6166219839142091, "grad_norm": 0.3629220426082611, "learning_rate": 6.468347486140957e-05, "loss": 1.1581, "step": 1265 }, { "epoch": 0.617109432122837, "grad_norm": 0.35167911648750305, "learning_rate": 6.453951129574644e-05, "loss": 1.1511, "step": 1266 }, { "epoch": 0.6175968803314648, "grad_norm": 0.39326807856559753, "learning_rate": 6.43956317453803e-05, "loss": 1.083, "step": 1267 }, { "epoch": 0.6180843285400927, "grad_norm": 0.4391644597053528, "learning_rate": 6.425183655119993e-05, "loss": 1.0469, "step": 1268 }, { "epoch": 0.6185717767487204, "grad_norm": 0.3080829381942749, "learning_rate": 6.410812605389423e-05, "loss": 0.9698, "step": 1269 }, { "epoch": 0.6190592249573483, "grad_norm": 0.3172454237937927, "learning_rate": 6.396450059395148e-05, "loss": 1.1241, "step": 1270 }, { "epoch": 0.6195466731659761, "grad_norm": 0.4044014513492584, "learning_rate": 6.382096051165847e-05, "loss": 0.9526, "step": 1271 }, { "epoch": 0.6200341213746039, "grad_norm": 0.34943583607673645, "learning_rate": 6.367750614709968e-05, "loss": 1.1092, "step": 1272 }, { "epoch": 0.6205215695832318, "grad_norm": 0.42065444588661194, "learning_rate": 6.353413784015654e-05, "loss": 1.1317, "step": 1273 }, { "epoch": 0.6210090177918596, "grad_norm": 0.3605908751487732, "learning_rate": 6.33908559305066e-05, "loss": 0.925, "step": 1274 }, { "epoch": 0.6214964660004875, "grad_norm": 0.3660207986831665, "learning_rate": 6.324766075762263e-05, "loss": 1.0133, "step": 1275 }, { "epoch": 0.6219839142091153, "grad_norm": 0.3236888647079468, "learning_rate": 6.310455266077193e-05, "loss": 1.0744, "step": 1276 }, { "epoch": 0.6224713624177431, "grad_norm": 0.3828015923500061, "learning_rate": 6.29615319790156e-05, "loss": 1.0188, "step": 1277 }, { "epoch": 0.6229588106263709, "grad_norm": 0.39991846680641174, "learning_rate": 6.28185990512075e-05, "loss": 1.0982, "step": 1278 }, { "epoch": 0.6234462588349988, "grad_norm": 0.4092521667480469, "learning_rate": 6.267575421599359e-05, "loss": 1.0448, "step": 1279 }, { "epoch": 0.6239337070436266, "grad_norm": 0.40827932953834534, "learning_rate": 6.253299781181121e-05, "loss": 1.2288, "step": 1280 }, { "epoch": 0.6244211552522545, "grad_norm": 0.3454175293445587, "learning_rate": 6.239033017688809e-05, "loss": 1.0822, "step": 1281 }, { "epoch": 0.6249086034608823, "grad_norm": 0.3596000671386719, "learning_rate": 6.224775164924164e-05, "loss": 1.1887, "step": 1282 }, { "epoch": 0.6253960516695101, "grad_norm": 0.31120550632476807, "learning_rate": 6.210526256667825e-05, "loss": 1.1056, "step": 1283 }, { "epoch": 0.625883499878138, "grad_norm": 0.34262052178382874, "learning_rate": 6.19628632667923e-05, "loss": 1.087, "step": 1284 }, { "epoch": 0.6263709480867657, "grad_norm": 0.42662423849105835, "learning_rate": 6.182055408696544e-05, "loss": 1.0435, "step": 1285 }, { "epoch": 0.6268583962953936, "grad_norm": 0.43115729093551636, "learning_rate": 6.167833536436588e-05, "loss": 1.1769, "step": 1286 }, { "epoch": 0.6273458445040214, "grad_norm": 0.34297922253608704, "learning_rate": 6.153620743594746e-05, "loss": 1.0313, "step": 1287 }, { "epoch": 0.6278332927126493, "grad_norm": 0.4172343313694, "learning_rate": 6.139417063844892e-05, "loss": 1.1047, "step": 1288 }, { "epoch": 0.6283207409212771, "grad_norm": 0.3697414994239807, "learning_rate": 6.125222530839301e-05, "loss": 1.2737, "step": 1289 }, { "epoch": 0.628808189129905, "grad_norm": 0.39292341470718384, "learning_rate": 6.111037178208597e-05, "loss": 1.0398, "step": 1290 }, { "epoch": 0.6292956373385328, "grad_norm": 0.34564071893692017, "learning_rate": 6.0968610395616345e-05, "loss": 1.0393, "step": 1291 }, { "epoch": 0.6297830855471606, "grad_norm": 0.39844003319740295, "learning_rate": 6.082694148485437e-05, "loss": 1.0699, "step": 1292 }, { "epoch": 0.6302705337557885, "grad_norm": 0.32988882064819336, "learning_rate": 6.068536538545133e-05, "loss": 0.9944, "step": 1293 }, { "epoch": 0.6307579819644162, "grad_norm": 0.4675695598125458, "learning_rate": 6.054388243283853e-05, "loss": 1.1975, "step": 1294 }, { "epoch": 0.6312454301730441, "grad_norm": 0.43154412508010864, "learning_rate": 6.040249296222653e-05, "loss": 1.0772, "step": 1295 }, { "epoch": 0.6317328783816719, "grad_norm": 0.44753265380859375, "learning_rate": 6.026119730860451e-05, "loss": 1.0963, "step": 1296 }, { "epoch": 0.6322203265902998, "grad_norm": 0.40827831625938416, "learning_rate": 6.011999580673931e-05, "loss": 1.1346, "step": 1297 }, { "epoch": 0.6327077747989276, "grad_norm": 0.34523019194602966, "learning_rate": 5.9978888791174705e-05, "loss": 1.0493, "step": 1298 }, { "epoch": 0.6331952230075555, "grad_norm": 0.3623436987400055, "learning_rate": 5.983787659623064e-05, "loss": 1.1105, "step": 1299 }, { "epoch": 0.6336826712161833, "grad_norm": 0.3779457211494446, "learning_rate": 5.969695955600236e-05, "loss": 1.1782, "step": 1300 }, { "epoch": 0.6341701194248112, "grad_norm": 0.4165879487991333, "learning_rate": 5.955613800435971e-05, "loss": 1.1784, "step": 1301 }, { "epoch": 0.634657567633439, "grad_norm": 0.3469805121421814, "learning_rate": 5.94154122749462e-05, "loss": 1.2509, "step": 1302 }, { "epoch": 0.6351450158420667, "grad_norm": 0.3427400588989258, "learning_rate": 5.9274782701178496e-05, "loss": 1.1735, "step": 1303 }, { "epoch": 0.6356324640506946, "grad_norm": 0.40498340129852295, "learning_rate": 5.913424961624528e-05, "loss": 1.1969, "step": 1304 }, { "epoch": 0.6361199122593224, "grad_norm": 0.4550219476222992, "learning_rate": 5.899381335310663e-05, "loss": 1.1946, "step": 1305 }, { "epoch": 0.6366073604679503, "grad_norm": 0.36041921377182007, "learning_rate": 5.885347424449337e-05, "loss": 1.0194, "step": 1306 }, { "epoch": 0.6370948086765781, "grad_norm": 0.38736119866371155, "learning_rate": 5.871323262290599e-05, "loss": 1.0996, "step": 1307 }, { "epoch": 0.637582256885206, "grad_norm": 0.37170708179473877, "learning_rate": 5.857308882061406e-05, "loss": 1.1053, "step": 1308 }, { "epoch": 0.6380697050938338, "grad_norm": 0.35190945863723755, "learning_rate": 5.843304316965543e-05, "loss": 1.2578, "step": 1309 }, { "epoch": 0.6385571533024617, "grad_norm": 0.36703935265541077, "learning_rate": 5.829309600183536e-05, "loss": 1.0586, "step": 1310 }, { "epoch": 0.6390446015110894, "grad_norm": 0.38557273149490356, "learning_rate": 5.8153247648725715e-05, "loss": 1.1757, "step": 1311 }, { "epoch": 0.6395320497197173, "grad_norm": 0.47165647149086, "learning_rate": 5.801349844166443e-05, "loss": 1.1128, "step": 1312 }, { "epoch": 0.6400194979283451, "grad_norm": 0.380670428276062, "learning_rate": 5.7873848711754345e-05, "loss": 1.1902, "step": 1313 }, { "epoch": 0.6405069461369729, "grad_norm": 0.3996541500091553, "learning_rate": 5.773429878986272e-05, "loss": 1.1026, "step": 1314 }, { "epoch": 0.6409943943456008, "grad_norm": 0.3766230642795563, "learning_rate": 5.759484900662027e-05, "loss": 1.2191, "step": 1315 }, { "epoch": 0.6414818425542286, "grad_norm": 0.31586742401123047, "learning_rate": 5.745549969242052e-05, "loss": 1.1092, "step": 1316 }, { "epoch": 0.6419692907628565, "grad_norm": 0.40071529150009155, "learning_rate": 5.731625117741892e-05, "loss": 0.9477, "step": 1317 }, { "epoch": 0.6424567389714843, "grad_norm": 0.4465852975845337, "learning_rate": 5.7177103791532096e-05, "loss": 1.1996, "step": 1318 }, { "epoch": 0.6429441871801121, "grad_norm": 0.38537243008613586, "learning_rate": 5.7038057864437144e-05, "loss": 1.0195, "step": 1319 }, { "epoch": 0.6434316353887399, "grad_norm": 0.3876956105232239, "learning_rate": 5.689911372557067e-05, "loss": 1.176, "step": 1320 }, { "epoch": 0.6439190835973678, "grad_norm": 0.3757091164588928, "learning_rate": 5.676027170412816e-05, "loss": 1.0577, "step": 1321 }, { "epoch": 0.6444065318059956, "grad_norm": 0.4040318727493286, "learning_rate": 5.6621532129063224e-05, "loss": 1.0778, "step": 1322 }, { "epoch": 0.6448939800146234, "grad_norm": 0.41619521379470825, "learning_rate": 5.648289532908666e-05, "loss": 1.2163, "step": 1323 }, { "epoch": 0.6453814282232513, "grad_norm": 0.4285852611064911, "learning_rate": 5.634436163266579e-05, "loss": 1.1286, "step": 1324 }, { "epoch": 0.6458688764318791, "grad_norm": 0.38491562008857727, "learning_rate": 5.620593136802365e-05, "loss": 1.2055, "step": 1325 }, { "epoch": 0.646356324640507, "grad_norm": 0.4486836791038513, "learning_rate": 5.6067604863138245e-05, "loss": 1.1919, "step": 1326 }, { "epoch": 0.6468437728491347, "grad_norm": 0.36280357837677, "learning_rate": 5.592938244574169e-05, "loss": 1.3017, "step": 1327 }, { "epoch": 0.6473312210577626, "grad_norm": 0.35584884881973267, "learning_rate": 5.579126444331959e-05, "loss": 1.0426, "step": 1328 }, { "epoch": 0.6478186692663904, "grad_norm": 0.3492376208305359, "learning_rate": 5.5653251183110075e-05, "loss": 1.1458, "step": 1329 }, { "epoch": 0.6483061174750183, "grad_norm": 0.46781599521636963, "learning_rate": 5.551534299210315e-05, "loss": 1.0435, "step": 1330 }, { "epoch": 0.6487935656836461, "grad_norm": 0.47177448868751526, "learning_rate": 5.5377540197039866e-05, "loss": 1.0767, "step": 1331 }, { "epoch": 0.649281013892274, "grad_norm": 0.3602699935436249, "learning_rate": 5.523984312441157e-05, "loss": 1.1655, "step": 1332 }, { "epoch": 0.6497684621009018, "grad_norm": 0.39494967460632324, "learning_rate": 5.510225210045914e-05, "loss": 1.1493, "step": 1333 }, { "epoch": 0.6502559103095296, "grad_norm": 0.3143066167831421, "learning_rate": 5.496476745117211e-05, "loss": 1.1146, "step": 1334 }, { "epoch": 0.6507433585181575, "grad_norm": 0.3416048288345337, "learning_rate": 5.4827389502288166e-05, "loss": 1.0537, "step": 1335 }, { "epoch": 0.6512308067267852, "grad_norm": 0.4054270088672638, "learning_rate": 5.4690118579292015e-05, "loss": 1.1853, "step": 1336 }, { "epoch": 0.6517182549354131, "grad_norm": 0.43888169527053833, "learning_rate": 5.455295500741484e-05, "loss": 1.0151, "step": 1337 }, { "epoch": 0.6522057031440409, "grad_norm": 0.4000617563724518, "learning_rate": 5.441589911163358e-05, "loss": 1.0849, "step": 1338 }, { "epoch": 0.6526931513526688, "grad_norm": 0.3413247764110565, "learning_rate": 5.427895121666993e-05, "loss": 1.1687, "step": 1339 }, { "epoch": 0.6531805995612966, "grad_norm": 0.3506993055343628, "learning_rate": 5.414211164698976e-05, "loss": 1.153, "step": 1340 }, { "epoch": 0.6536680477699245, "grad_norm": 0.37291571497917175, "learning_rate": 5.400538072680228e-05, "loss": 1.1409, "step": 1341 }, { "epoch": 0.6541554959785523, "grad_norm": 0.4084410071372986, "learning_rate": 5.386875878005927e-05, "loss": 1.12, "step": 1342 }, { "epoch": 0.6546429441871802, "grad_norm": 0.38073453307151794, "learning_rate": 5.3732246130454356e-05, "loss": 1.0577, "step": 1343 }, { "epoch": 0.655130392395808, "grad_norm": 0.3509850800037384, "learning_rate": 5.3595843101422136e-05, "loss": 1.0801, "step": 1344 }, { "epoch": 0.6556178406044357, "grad_norm": 0.40296420454978943, "learning_rate": 5.3459550016137626e-05, "loss": 1.1404, "step": 1345 }, { "epoch": 0.6561052888130636, "grad_norm": 0.3997831344604492, "learning_rate": 5.332336719751523e-05, "loss": 1.0855, "step": 1346 }, { "epoch": 0.6565927370216914, "grad_norm": 0.4008702337741852, "learning_rate": 5.318729496820809e-05, "loss": 1.1426, "step": 1347 }, { "epoch": 0.6570801852303193, "grad_norm": 0.45951130986213684, "learning_rate": 5.305133365060748e-05, "loss": 1.1355, "step": 1348 }, { "epoch": 0.6575676334389471, "grad_norm": 0.34970512986183167, "learning_rate": 5.291548356684177e-05, "loss": 1.0636, "step": 1349 }, { "epoch": 0.658055081647575, "grad_norm": 0.3302462697029114, "learning_rate": 5.277974503877579e-05, "loss": 1.1759, "step": 1350 }, { "epoch": 0.6585425298562028, "grad_norm": 0.38779914379119873, "learning_rate": 5.264411838801011e-05, "loss": 1.2124, "step": 1351 }, { "epoch": 0.6590299780648307, "grad_norm": 0.3708178699016571, "learning_rate": 5.250860393588022e-05, "loss": 1.1036, "step": 1352 }, { "epoch": 0.6595174262734584, "grad_norm": 0.3890998363494873, "learning_rate": 5.23732020034557e-05, "loss": 1.153, "step": 1353 }, { "epoch": 0.6600048744820862, "grad_norm": 0.37718912959098816, "learning_rate": 5.223791291153974e-05, "loss": 1.0722, "step": 1354 }, { "epoch": 0.6604923226907141, "grad_norm": 0.43145954608917236, "learning_rate": 5.210273698066801e-05, "loss": 1.1016, "step": 1355 }, { "epoch": 0.6609797708993419, "grad_norm": 0.4002271592617035, "learning_rate": 5.196767453110811e-05, "loss": 1.2884, "step": 1356 }, { "epoch": 0.6614672191079698, "grad_norm": 0.3822720944881439, "learning_rate": 5.1832725882858745e-05, "loss": 1.0398, "step": 1357 }, { "epoch": 0.6619546673165976, "grad_norm": 0.3739412724971771, "learning_rate": 5.169789135564915e-05, "loss": 1.1957, "step": 1358 }, { "epoch": 0.6624421155252255, "grad_norm": 0.3349688649177551, "learning_rate": 5.156317126893795e-05, "loss": 1.0651, "step": 1359 }, { "epoch": 0.6629295637338533, "grad_norm": 0.44151395559310913, "learning_rate": 5.142856594191274e-05, "loss": 1.2503, "step": 1360 }, { "epoch": 0.6634170119424811, "grad_norm": 0.41473668813705444, "learning_rate": 5.129407569348927e-05, "loss": 1.1377, "step": 1361 }, { "epoch": 0.6639044601511089, "grad_norm": 0.3414384722709656, "learning_rate": 5.115970084231059e-05, "loss": 1.004, "step": 1362 }, { "epoch": 0.6643919083597368, "grad_norm": 0.4229922592639923, "learning_rate": 5.102544170674628e-05, "loss": 1.1093, "step": 1363 }, { "epoch": 0.6648793565683646, "grad_norm": 0.31604325771331787, "learning_rate": 5.089129860489188e-05, "loss": 1.094, "step": 1364 }, { "epoch": 0.6653668047769924, "grad_norm": 0.40494808554649353, "learning_rate": 5.075727185456793e-05, "loss": 1.1109, "step": 1365 }, { "epoch": 0.6658542529856203, "grad_norm": 0.45369645953178406, "learning_rate": 5.062336177331934e-05, "loss": 1.1929, "step": 1366 }, { "epoch": 0.6663417011942481, "grad_norm": 0.35073715448379517, "learning_rate": 5.048956867841459e-05, "loss": 1.1078, "step": 1367 }, { "epoch": 0.666829149402876, "grad_norm": 0.3622107207775116, "learning_rate": 5.035589288684495e-05, "loss": 1.1053, "step": 1368 }, { "epoch": 0.6673165976115037, "grad_norm": 0.3263455033302307, "learning_rate": 5.0222334715323825e-05, "loss": 1.1512, "step": 1369 }, { "epoch": 0.6678040458201316, "grad_norm": 0.35704344511032104, "learning_rate": 5.0088894480285887e-05, "loss": 1.1933, "step": 1370 }, { "epoch": 0.6682914940287594, "grad_norm": 0.36799290776252747, "learning_rate": 4.9955572497886505e-05, "loss": 1.1541, "step": 1371 }, { "epoch": 0.6687789422373873, "grad_norm": 0.3307376205921173, "learning_rate": 4.982236908400074e-05, "loss": 1.046, "step": 1372 }, { "epoch": 0.6692663904460151, "grad_norm": 0.3832852244377136, "learning_rate": 4.968928455422277e-05, "loss": 1.1152, "step": 1373 }, { "epoch": 0.669753838654643, "grad_norm": 0.3245486319065094, "learning_rate": 4.955631922386517e-05, "loss": 1.0141, "step": 1374 }, { "epoch": 0.6702412868632708, "grad_norm": 0.3602396249771118, "learning_rate": 4.9423473407958035e-05, "loss": 0.968, "step": 1375 }, { "epoch": 0.6707287350718986, "grad_norm": 0.40289953351020813, "learning_rate": 4.929074742124831e-05, "loss": 1.0, "step": 1376 }, { "epoch": 0.6712161832805265, "grad_norm": 0.363689661026001, "learning_rate": 4.915814157819903e-05, "loss": 1.1995, "step": 1377 }, { "epoch": 0.6717036314891542, "grad_norm": 0.431208074092865, "learning_rate": 4.902565619298859e-05, "loss": 1.0191, "step": 1378 }, { "epoch": 0.6721910796977821, "grad_norm": 0.47906234860420227, "learning_rate": 4.889329157950996e-05, "loss": 1.0698, "step": 1379 }, { "epoch": 0.6726785279064099, "grad_norm": 0.4144451916217804, "learning_rate": 4.876104805137005e-05, "loss": 1.0121, "step": 1380 }, { "epoch": 0.6731659761150378, "grad_norm": 0.3713430166244507, "learning_rate": 4.86289259218888e-05, "loss": 1.345, "step": 1381 }, { "epoch": 0.6736534243236656, "grad_norm": 0.40037861466407776, "learning_rate": 4.849692550409857e-05, "loss": 1.1318, "step": 1382 }, { "epoch": 0.6741408725322935, "grad_norm": 0.41233551502227783, "learning_rate": 4.836504711074328e-05, "loss": 1.1669, "step": 1383 }, { "epoch": 0.6746283207409213, "grad_norm": 0.36600425839424133, "learning_rate": 4.8233291054277905e-05, "loss": 1.1323, "step": 1384 }, { "epoch": 0.675115768949549, "grad_norm": 0.4426422417163849, "learning_rate": 4.8101657646867396e-05, "loss": 1.1922, "step": 1385 }, { "epoch": 0.675603217158177, "grad_norm": 0.39272043108940125, "learning_rate": 4.797014720038614e-05, "loss": 1.1978, "step": 1386 }, { "epoch": 0.6760906653668047, "grad_norm": 0.3592641353607178, "learning_rate": 4.783876002641734e-05, "loss": 1.1467, "step": 1387 }, { "epoch": 0.6765781135754326, "grad_norm": 0.37689095735549927, "learning_rate": 4.7707496436252e-05, "loss": 1.0715, "step": 1388 }, { "epoch": 0.6770655617840604, "grad_norm": 0.4962100386619568, "learning_rate": 4.7576356740888315e-05, "loss": 1.1173, "step": 1389 }, { "epoch": 0.6775530099926883, "grad_norm": 0.3450334966182709, "learning_rate": 4.744534125103106e-05, "loss": 1.1435, "step": 1390 }, { "epoch": 0.6780404582013161, "grad_norm": 0.3096364736557007, "learning_rate": 4.7314450277090626e-05, "loss": 0.957, "step": 1391 }, { "epoch": 0.678527906409944, "grad_norm": 0.32999417185783386, "learning_rate": 4.7183684129182414e-05, "loss": 1.12, "step": 1392 }, { "epoch": 0.6790153546185718, "grad_norm": 0.45688876509666443, "learning_rate": 4.705304311712609e-05, "loss": 1.0668, "step": 1393 }, { "epoch": 0.6795028028271997, "grad_norm": 0.4175863564014435, "learning_rate": 4.692252755044485e-05, "loss": 1.0344, "step": 1394 }, { "epoch": 0.6799902510358274, "grad_norm": 0.36583220958709717, "learning_rate": 4.679213773836463e-05, "loss": 1.0781, "step": 1395 }, { "epoch": 0.6804776992444552, "grad_norm": 0.3702552616596222, "learning_rate": 4.666187398981351e-05, "loss": 1.0035, "step": 1396 }, { "epoch": 0.6809651474530831, "grad_norm": 0.45179715752601624, "learning_rate": 4.6531736613420826e-05, "loss": 1.0166, "step": 1397 }, { "epoch": 0.6814525956617109, "grad_norm": 0.2747941017150879, "learning_rate": 4.6401725917516505e-05, "loss": 1.0981, "step": 1398 }, { "epoch": 0.6819400438703388, "grad_norm": 0.34443342685699463, "learning_rate": 4.62718422101303e-05, "loss": 0.9779, "step": 1399 }, { "epoch": 0.6824274920789666, "grad_norm": 0.4068431258201599, "learning_rate": 4.614208579899123e-05, "loss": 1.0938, "step": 1400 }, { "epoch": 0.6829149402875945, "grad_norm": 0.4701009690761566, "learning_rate": 4.601245699152659e-05, "loss": 1.1205, "step": 1401 }, { "epoch": 0.6834023884962223, "grad_norm": 0.3799245357513428, "learning_rate": 4.5882956094861375e-05, "loss": 1.1747, "step": 1402 }, { "epoch": 0.6838898367048502, "grad_norm": 0.345043420791626, "learning_rate": 4.5753583415817536e-05, "loss": 1.0841, "step": 1403 }, { "epoch": 0.6843772849134779, "grad_norm": 0.3221161365509033, "learning_rate": 4.562433926091325e-05, "loss": 1.0883, "step": 1404 }, { "epoch": 0.6848647331221058, "grad_norm": 0.35330602526664734, "learning_rate": 4.549522393636214e-05, "loss": 1.1009, "step": 1405 }, { "epoch": 0.6853521813307336, "grad_norm": 0.3911016881465912, "learning_rate": 4.536623774807269e-05, "loss": 1.0455, "step": 1406 }, { "epoch": 0.6858396295393614, "grad_norm": 0.3859974145889282, "learning_rate": 4.523738100164736e-05, "loss": 1.0063, "step": 1407 }, { "epoch": 0.6863270777479893, "grad_norm": 0.39257508516311646, "learning_rate": 4.5108654002381875e-05, "loss": 1.0503, "step": 1408 }, { "epoch": 0.6868145259566171, "grad_norm": 0.36935654282569885, "learning_rate": 4.4980057055264714e-05, "loss": 0.9985, "step": 1409 }, { "epoch": 0.687301974165245, "grad_norm": 0.3954034745693207, "learning_rate": 4.485159046497607e-05, "loss": 1.1401, "step": 1410 }, { "epoch": 0.6877894223738727, "grad_norm": 0.3816031813621521, "learning_rate": 4.4723254535887395e-05, "loss": 1.1157, "step": 1411 }, { "epoch": 0.6882768705825006, "grad_norm": 0.42583298683166504, "learning_rate": 4.459504957206041e-05, "loss": 1.1594, "step": 1412 }, { "epoch": 0.6887643187911284, "grad_norm": 0.45978638529777527, "learning_rate": 4.446697587724677e-05, "loss": 1.048, "step": 1413 }, { "epoch": 0.6892517669997563, "grad_norm": 0.3576642572879791, "learning_rate": 4.433903375488697e-05, "loss": 0.9329, "step": 1414 }, { "epoch": 0.6897392152083841, "grad_norm": 0.3792358934879303, "learning_rate": 4.421122350810978e-05, "loss": 1.1325, "step": 1415 }, { "epoch": 0.6902266634170119, "grad_norm": 0.3718627989292145, "learning_rate": 4.4083545439731614e-05, "loss": 1.1786, "step": 1416 }, { "epoch": 0.6907141116256398, "grad_norm": 0.3628930449485779, "learning_rate": 4.395599985225561e-05, "loss": 1.0596, "step": 1417 }, { "epoch": 0.6912015598342676, "grad_norm": 0.38017037510871887, "learning_rate": 4.38285870478711e-05, "loss": 1.2319, "step": 1418 }, { "epoch": 0.6916890080428955, "grad_norm": 0.49403607845306396, "learning_rate": 4.370130732845277e-05, "loss": 1.111, "step": 1419 }, { "epoch": 0.6921764562515232, "grad_norm": 0.4223406910896301, "learning_rate": 4.357416099556002e-05, "loss": 1.2386, "step": 1420 }, { "epoch": 0.6926639044601511, "grad_norm": 0.39490723609924316, "learning_rate": 4.344714835043618e-05, "loss": 1.1064, "step": 1421 }, { "epoch": 0.6931513526687789, "grad_norm": 0.42644432187080383, "learning_rate": 4.332026969400794e-05, "loss": 1.2032, "step": 1422 }, { "epoch": 0.6936388008774068, "grad_norm": 0.40960004925727844, "learning_rate": 4.3193525326884435e-05, "loss": 1.1461, "step": 1423 }, { "epoch": 0.6941262490860346, "grad_norm": 0.4543960690498352, "learning_rate": 4.306691554935667e-05, "loss": 1.0211, "step": 1424 }, { "epoch": 0.6946136972946625, "grad_norm": 0.42792388796806335, "learning_rate": 4.294044066139671e-05, "loss": 1.1311, "step": 1425 }, { "epoch": 0.6951011455032903, "grad_norm": 0.37301984429359436, "learning_rate": 4.281410096265719e-05, "loss": 1.0813, "step": 1426 }, { "epoch": 0.6955885937119181, "grad_norm": 0.370306134223938, "learning_rate": 4.268789675247029e-05, "loss": 1.1299, "step": 1427 }, { "epoch": 0.696076041920546, "grad_norm": 0.42097169160842896, "learning_rate": 4.256182832984724e-05, "loss": 1.1097, "step": 1428 }, { "epoch": 0.6965634901291737, "grad_norm": 0.4374851882457733, "learning_rate": 4.243589599347755e-05, "loss": 1.1263, "step": 1429 }, { "epoch": 0.6970509383378016, "grad_norm": 0.3310086727142334, "learning_rate": 4.23101000417283e-05, "loss": 1.103, "step": 1430 }, { "epoch": 0.6975383865464294, "grad_norm": 0.3817380368709564, "learning_rate": 4.218444077264342e-05, "loss": 0.95, "step": 1431 }, { "epoch": 0.6980258347550573, "grad_norm": 0.35832837224006653, "learning_rate": 4.205891848394308e-05, "loss": 1.1271, "step": 1432 }, { "epoch": 0.6985132829636851, "grad_norm": 0.3595336973667145, "learning_rate": 4.193353347302282e-05, "loss": 1.0738, "step": 1433 }, { "epoch": 0.699000731172313, "grad_norm": 0.33713003993034363, "learning_rate": 4.180828603695296e-05, "loss": 1.0913, "step": 1434 }, { "epoch": 0.6994881793809408, "grad_norm": 0.31479769945144653, "learning_rate": 4.16831764724779e-05, "loss": 1.096, "step": 1435 }, { "epoch": 0.6999756275895687, "grad_norm": 0.33350107073783875, "learning_rate": 4.155820507601536e-05, "loss": 1.0534, "step": 1436 }, { "epoch": 0.7004630757981964, "grad_norm": 0.3710598647594452, "learning_rate": 4.143337214365572e-05, "loss": 1.0831, "step": 1437 }, { "epoch": 0.7009505240068242, "grad_norm": 0.41655072569847107, "learning_rate": 4.130867797116118e-05, "loss": 1.0895, "step": 1438 }, { "epoch": 0.7014379722154521, "grad_norm": 0.3327270746231079, "learning_rate": 4.1184122853965415e-05, "loss": 1.1405, "step": 1439 }, { "epoch": 0.7019254204240799, "grad_norm": 0.44724804162979126, "learning_rate": 4.105970708717244e-05, "loss": 1.173, "step": 1440 }, { "epoch": 0.7024128686327078, "grad_norm": 0.47310054302215576, "learning_rate": 4.093543096555616e-05, "loss": 1.2473, "step": 1441 }, { "epoch": 0.7029003168413356, "grad_norm": 0.3690038323402405, "learning_rate": 4.081129478355975e-05, "loss": 1.2197, "step": 1442 }, { "epoch": 0.7033877650499635, "grad_norm": 0.3522759974002838, "learning_rate": 4.0687298835294663e-05, "loss": 1.0677, "step": 1443 }, { "epoch": 0.7038752132585913, "grad_norm": 0.3102143406867981, "learning_rate": 4.0563443414540136e-05, "loss": 1.1593, "step": 1444 }, { "epoch": 0.7043626614672192, "grad_norm": 0.3423633873462677, "learning_rate": 4.0439728814742596e-05, "loss": 1.247, "step": 1445 }, { "epoch": 0.7048501096758469, "grad_norm": 0.4625047445297241, "learning_rate": 4.031615532901463e-05, "loss": 1.1411, "step": 1446 }, { "epoch": 0.7053375578844747, "grad_norm": 0.35930484533309937, "learning_rate": 4.019272325013456e-05, "loss": 1.0514, "step": 1447 }, { "epoch": 0.7058250060931026, "grad_norm": 0.42370128631591797, "learning_rate": 4.0069432870545776e-05, "loss": 0.9846, "step": 1448 }, { "epoch": 0.7063124543017304, "grad_norm": 0.513253927230835, "learning_rate": 3.994628448235583e-05, "loss": 1.0176, "step": 1449 }, { "epoch": 0.7067999025103583, "grad_norm": 0.32254430651664734, "learning_rate": 3.9823278377335914e-05, "loss": 0.9944, "step": 1450 }, { "epoch": 0.7072873507189861, "grad_norm": 0.36206114292144775, "learning_rate": 3.970041484692003e-05, "loss": 1.0255, "step": 1451 }, { "epoch": 0.707774798927614, "grad_norm": 0.40832626819610596, "learning_rate": 3.957769418220455e-05, "loss": 1.0763, "step": 1452 }, { "epoch": 0.7082622471362418, "grad_norm": 0.3668363094329834, "learning_rate": 3.945511667394719e-05, "loss": 1.05, "step": 1453 }, { "epoch": 0.7087496953448696, "grad_norm": 0.3837505578994751, "learning_rate": 3.9332682612566585e-05, "loss": 1.3172, "step": 1454 }, { "epoch": 0.7092371435534974, "grad_norm": 0.3345847427845001, "learning_rate": 3.921039228814145e-05, "loss": 1.0437, "step": 1455 }, { "epoch": 0.7097245917621253, "grad_norm": 0.38669687509536743, "learning_rate": 3.908824599041001e-05, "loss": 1.1198, "step": 1456 }, { "epoch": 0.7102120399707531, "grad_norm": 0.3960307240486145, "learning_rate": 3.896624400876917e-05, "loss": 1.1377, "step": 1457 }, { "epoch": 0.7106994881793809, "grad_norm": 0.43880727887153625, "learning_rate": 3.884438663227403e-05, "loss": 1.1162, "step": 1458 }, { "epoch": 0.7111869363880088, "grad_norm": 0.4575899541378021, "learning_rate": 3.8722674149636986e-05, "loss": 1.1324, "step": 1459 }, { "epoch": 0.7116743845966366, "grad_norm": 0.359183669090271, "learning_rate": 3.860110684922713e-05, "loss": 1.0138, "step": 1460 }, { "epoch": 0.7121618328052645, "grad_norm": 0.4640527367591858, "learning_rate": 3.847968501906969e-05, "loss": 1.0692, "step": 1461 }, { "epoch": 0.7126492810138922, "grad_norm": 0.3558286130428314, "learning_rate": 3.835840894684514e-05, "loss": 1.2269, "step": 1462 }, { "epoch": 0.7131367292225201, "grad_norm": 0.4080779254436493, "learning_rate": 3.8237278919888616e-05, "loss": 1.0738, "step": 1463 }, { "epoch": 0.7136241774311479, "grad_norm": 0.3963766098022461, "learning_rate": 3.8116295225189305e-05, "loss": 1.0178, "step": 1464 }, { "epoch": 0.7141116256397758, "grad_norm": 0.3747573792934418, "learning_rate": 3.799545814938959e-05, "loss": 0.9247, "step": 1465 }, { "epoch": 0.7145990738484036, "grad_norm": 0.3791253864765167, "learning_rate": 3.787476797878459e-05, "loss": 1.0676, "step": 1466 }, { "epoch": 0.7150865220570315, "grad_norm": 0.35384225845336914, "learning_rate": 3.775422499932123e-05, "loss": 1.1025, "step": 1467 }, { "epoch": 0.7155739702656593, "grad_norm": 0.35406047105789185, "learning_rate": 3.763382949659787e-05, "loss": 1.0788, "step": 1468 }, { "epoch": 0.7160614184742871, "grad_norm": 0.40216243267059326, "learning_rate": 3.7513581755863336e-05, "loss": 1.1128, "step": 1469 }, { "epoch": 0.716548866682915, "grad_norm": 0.37562674283981323, "learning_rate": 3.739348206201635e-05, "loss": 1.1785, "step": 1470 }, { "epoch": 0.7170363148915427, "grad_norm": 0.4050292372703552, "learning_rate": 3.7273530699605044e-05, "loss": 1.0603, "step": 1471 }, { "epoch": 0.7175237631001706, "grad_norm": 0.34335386753082275, "learning_rate": 3.715372795282587e-05, "loss": 1.119, "step": 1472 }, { "epoch": 0.7180112113087984, "grad_norm": 0.4139067828655243, "learning_rate": 3.7034074105523284e-05, "loss": 1.0752, "step": 1473 }, { "epoch": 0.7184986595174263, "grad_norm": 0.42542508244514465, "learning_rate": 3.691456944118906e-05, "loss": 1.0176, "step": 1474 }, { "epoch": 0.7189861077260541, "grad_norm": 0.3980538249015808, "learning_rate": 3.679521424296137e-05, "loss": 1.0894, "step": 1475 }, { "epoch": 0.719473555934682, "grad_norm": 0.4451710283756256, "learning_rate": 3.6676008793624296e-05, "loss": 1.0847, "step": 1476 }, { "epoch": 0.7199610041433098, "grad_norm": 0.4051099717617035, "learning_rate": 3.6556953375607186e-05, "loss": 1.2075, "step": 1477 }, { "epoch": 0.7204484523519376, "grad_norm": 0.45044445991516113, "learning_rate": 3.643804827098388e-05, "loss": 1.184, "step": 1478 }, { "epoch": 0.7209359005605654, "grad_norm": 0.3991248607635498, "learning_rate": 3.631929376147207e-05, "loss": 1.1447, "step": 1479 }, { "epoch": 0.7214233487691932, "grad_norm": 0.38065215945243835, "learning_rate": 3.620069012843267e-05, "loss": 1.2044, "step": 1480 }, { "epoch": 0.7219107969778211, "grad_norm": 0.42019620537757874, "learning_rate": 3.608223765286912e-05, "loss": 1.1288, "step": 1481 }, { "epoch": 0.7223982451864489, "grad_norm": 0.4416203200817108, "learning_rate": 3.596393661542674e-05, "loss": 1.1556, "step": 1482 }, { "epoch": 0.7228856933950768, "grad_norm": 0.48135778307914734, "learning_rate": 3.5845787296392006e-05, "loss": 1.165, "step": 1483 }, { "epoch": 0.7233731416037046, "grad_norm": 0.3307107985019684, "learning_rate": 3.572778997569204e-05, "loss": 1.0351, "step": 1484 }, { "epoch": 0.7238605898123325, "grad_norm": 0.36838921904563904, "learning_rate": 3.5609944932893736e-05, "loss": 1.0249, "step": 1485 }, { "epoch": 0.7243480380209603, "grad_norm": 0.3509773910045624, "learning_rate": 3.54922524472032e-05, "loss": 1.2437, "step": 1486 }, { "epoch": 0.7248354862295882, "grad_norm": 0.39572563767433167, "learning_rate": 3.5374712797465214e-05, "loss": 1.0417, "step": 1487 }, { "epoch": 0.7253229344382159, "grad_norm": 0.4869961440563202, "learning_rate": 3.5257326262162304e-05, "loss": 1.1738, "step": 1488 }, { "epoch": 0.7258103826468437, "grad_norm": 0.34554779529571533, "learning_rate": 3.51400931194143e-05, "loss": 1.1651, "step": 1489 }, { "epoch": 0.7262978308554716, "grad_norm": 0.3999471366405487, "learning_rate": 3.502301364697758e-05, "loss": 1.1151, "step": 1490 }, { "epoch": 0.7267852790640994, "grad_norm": 0.3819086253643036, "learning_rate": 3.490608812224446e-05, "loss": 1.0452, "step": 1491 }, { "epoch": 0.7272727272727273, "grad_norm": 0.30517885088920593, "learning_rate": 3.47893168222425e-05, "loss": 1.0678, "step": 1492 }, { "epoch": 0.7277601754813551, "grad_norm": 0.3606143891811371, "learning_rate": 3.467270002363382e-05, "loss": 1.0806, "step": 1493 }, { "epoch": 0.728247623689983, "grad_norm": 0.41307225823402405, "learning_rate": 3.45562380027146e-05, "loss": 1.2821, "step": 1494 }, { "epoch": 0.7287350718986108, "grad_norm": 0.4158805310726166, "learning_rate": 3.44399310354142e-05, "loss": 1.2177, "step": 1495 }, { "epoch": 0.7292225201072386, "grad_norm": 0.32801222801208496, "learning_rate": 3.432377939729462e-05, "loss": 1.0945, "step": 1496 }, { "epoch": 0.7297099683158664, "grad_norm": 0.4302747845649719, "learning_rate": 3.420778336354995e-05, "loss": 1.1308, "step": 1497 }, { "epoch": 0.7301974165244943, "grad_norm": 0.3798311948776245, "learning_rate": 3.409194320900554e-05, "loss": 1.1171, "step": 1498 }, { "epoch": 0.7306848647331221, "grad_norm": 0.41932743787765503, "learning_rate": 3.397625920811731e-05, "loss": 1.1425, "step": 1499 }, { "epoch": 0.7311723129417499, "grad_norm": 0.33966439962387085, "learning_rate": 3.386073163497144e-05, "loss": 1.0847, "step": 1500 }, { "epoch": 0.7316597611503778, "grad_norm": 0.3647656738758087, "learning_rate": 3.374536076328333e-05, "loss": 0.9508, "step": 1501 }, { "epoch": 0.7321472093590056, "grad_norm": 0.3434484302997589, "learning_rate": 3.3630146866397104e-05, "loss": 0.9775, "step": 1502 }, { "epoch": 0.7326346575676335, "grad_norm": 0.42506644129753113, "learning_rate": 3.351509021728512e-05, "loss": 1.1988, "step": 1503 }, { "epoch": 0.7331221057762612, "grad_norm": 0.4124164879322052, "learning_rate": 3.340019108854703e-05, "loss": 1.0521, "step": 1504 }, { "epoch": 0.7336095539848891, "grad_norm": 0.38989806175231934, "learning_rate": 3.328544975240932e-05, "loss": 1.1389, "step": 1505 }, { "epoch": 0.7340970021935169, "grad_norm": 0.40123671293258667, "learning_rate": 3.3170866480724637e-05, "loss": 1.0026, "step": 1506 }, { "epoch": 0.7345844504021448, "grad_norm": 0.403225839138031, "learning_rate": 3.3056441544971115e-05, "loss": 1.1522, "step": 1507 }, { "epoch": 0.7350718986107726, "grad_norm": 0.42019128799438477, "learning_rate": 3.294217521625177e-05, "loss": 1.1012, "step": 1508 }, { "epoch": 0.7355593468194004, "grad_norm": 0.35564446449279785, "learning_rate": 3.282806776529378e-05, "loss": 1.1675, "step": 1509 }, { "epoch": 0.7360467950280283, "grad_norm": 0.479063481092453, "learning_rate": 3.2714119462448e-05, "loss": 1.1614, "step": 1510 }, { "epoch": 0.7365342432366561, "grad_norm": 0.46616676449775696, "learning_rate": 3.260033057768814e-05, "loss": 1.1798, "step": 1511 }, { "epoch": 0.737021691445284, "grad_norm": 0.3975960314273834, "learning_rate": 3.248670138061021e-05, "loss": 1.0032, "step": 1512 }, { "epoch": 0.7375091396539117, "grad_norm": 0.3194623291492462, "learning_rate": 3.237323214043193e-05, "loss": 1.0289, "step": 1513 }, { "epoch": 0.7379965878625396, "grad_norm": 0.3215973377227783, "learning_rate": 3.2259923125992e-05, "loss": 1.0274, "step": 1514 }, { "epoch": 0.7384840360711674, "grad_norm": 0.4231413006782532, "learning_rate": 3.21467746057495e-05, "loss": 1.0905, "step": 1515 }, { "epoch": 0.7389714842797953, "grad_norm": 0.45163217186927795, "learning_rate": 3.203378684778326e-05, "loss": 1.1281, "step": 1516 }, { "epoch": 0.7394589324884231, "grad_norm": 0.35993802547454834, "learning_rate": 3.192096011979124e-05, "loss": 1.1051, "step": 1517 }, { "epoch": 0.739946380697051, "grad_norm": 0.33846375346183777, "learning_rate": 3.180829468908986e-05, "loss": 1.0615, "step": 1518 }, { "epoch": 0.7404338289056788, "grad_norm": 0.38558444380760193, "learning_rate": 3.169579082261335e-05, "loss": 1.2364, "step": 1519 }, { "epoch": 0.7409212771143066, "grad_norm": 0.41832321882247925, "learning_rate": 3.158344878691325e-05, "loss": 1.1329, "step": 1520 }, { "epoch": 0.7414087253229344, "grad_norm": 0.4375525712966919, "learning_rate": 3.147126884815761e-05, "loss": 1.052, "step": 1521 }, { "epoch": 0.7418961735315622, "grad_norm": 0.3645784258842468, "learning_rate": 3.1359251272130384e-05, "loss": 1.0807, "step": 1522 }, { "epoch": 0.7423836217401901, "grad_norm": 0.3972663879394531, "learning_rate": 3.124739632423098e-05, "loss": 1.0337, "step": 1523 }, { "epoch": 0.7428710699488179, "grad_norm": 0.33198773860931396, "learning_rate": 3.113570426947342e-05, "loss": 1.0686, "step": 1524 }, { "epoch": 0.7433585181574458, "grad_norm": 0.417258620262146, "learning_rate": 3.1024175372485685e-05, "loss": 1.0509, "step": 1525 }, { "epoch": 0.7438459663660736, "grad_norm": 0.4219992458820343, "learning_rate": 3.091280989750937e-05, "loss": 1.1581, "step": 1526 }, { "epoch": 0.7443334145747015, "grad_norm": 0.4250520169734955, "learning_rate": 3.080160810839881e-05, "loss": 1.1641, "step": 1527 }, { "epoch": 0.7448208627833293, "grad_norm": 0.38769063353538513, "learning_rate": 3.069057026862048e-05, "loss": 1.0963, "step": 1528 }, { "epoch": 0.7453083109919572, "grad_norm": 0.3646620512008667, "learning_rate": 3.057969664125248e-05, "loss": 1.1754, "step": 1529 }, { "epoch": 0.7457957592005849, "grad_norm": 0.4347061514854431, "learning_rate": 3.0468987488983837e-05, "loss": 1.1011, "step": 1530 }, { "epoch": 0.7462832074092127, "grad_norm": 0.3439730107784271, "learning_rate": 3.035844307411384e-05, "loss": 1.0675, "step": 1531 }, { "epoch": 0.7467706556178406, "grad_norm": 0.335822194814682, "learning_rate": 3.024806365855154e-05, "loss": 1.0758, "step": 1532 }, { "epoch": 0.7472581038264684, "grad_norm": 0.3784874975681305, "learning_rate": 3.013784950381501e-05, "loss": 1.049, "step": 1533 }, { "epoch": 0.7477455520350963, "grad_norm": 0.35510337352752686, "learning_rate": 3.0027800871030797e-05, "loss": 1.0305, "step": 1534 }, { "epoch": 0.7482330002437241, "grad_norm": 0.3230135440826416, "learning_rate": 2.9917918020933267e-05, "loss": 1.0568, "step": 1535 }, { "epoch": 0.748720448452352, "grad_norm": 0.3796325922012329, "learning_rate": 2.9808201213864062e-05, "loss": 1.0139, "step": 1536 }, { "epoch": 0.7492078966609798, "grad_norm": 0.3692516088485718, "learning_rate": 2.9698650709771347e-05, "loss": 1.0474, "step": 1537 }, { "epoch": 0.7496953448696076, "grad_norm": 0.39915838837623596, "learning_rate": 2.958926676820929e-05, "loss": 1.0897, "step": 1538 }, { "epoch": 0.7501827930782354, "grad_norm": 0.41971927881240845, "learning_rate": 2.9480049648337493e-05, "loss": 1.0735, "step": 1539 }, { "epoch": 0.7506702412868632, "grad_norm": 0.3538713753223419, "learning_rate": 2.9370999608920237e-05, "loss": 1.189, "step": 1540 }, { "epoch": 0.7511576894954911, "grad_norm": 0.4109525978565216, "learning_rate": 2.9262116908325965e-05, "loss": 1.1621, "step": 1541 }, { "epoch": 0.7516451377041189, "grad_norm": 0.4208962023258209, "learning_rate": 2.915340180452666e-05, "loss": 1.0659, "step": 1542 }, { "epoch": 0.7521325859127468, "grad_norm": 0.39982888102531433, "learning_rate": 2.9044854555097212e-05, "loss": 1.156, "step": 1543 }, { "epoch": 0.7526200341213746, "grad_norm": 0.43298467993736267, "learning_rate": 2.8936475417214794e-05, "loss": 1.0843, "step": 1544 }, { "epoch": 0.7531074823300025, "grad_norm": 0.38478174805641174, "learning_rate": 2.8828264647658388e-05, "loss": 1.21, "step": 1545 }, { "epoch": 0.7535949305386302, "grad_norm": 0.42979133129119873, "learning_rate": 2.8720222502807946e-05, "loss": 0.9725, "step": 1546 }, { "epoch": 0.7540823787472581, "grad_norm": 0.3768276274204254, "learning_rate": 2.861234923864394e-05, "loss": 1.0635, "step": 1547 }, { "epoch": 0.7545698269558859, "grad_norm": 0.39537665247917175, "learning_rate": 2.850464511074672e-05, "loss": 1.1399, "step": 1548 }, { "epoch": 0.7550572751645138, "grad_norm": 0.3813226521015167, "learning_rate": 2.8397110374295955e-05, "loss": 1.0786, "step": 1549 }, { "epoch": 0.7555447233731416, "grad_norm": 0.3653690218925476, "learning_rate": 2.828974528406991e-05, "loss": 1.0643, "step": 1550 }, { "epoch": 0.7560321715817694, "grad_norm": 0.3394027054309845, "learning_rate": 2.818255009444496e-05, "loss": 0.9943, "step": 1551 }, { "epoch": 0.7565196197903973, "grad_norm": 0.40031924843788147, "learning_rate": 2.8075525059394893e-05, "loss": 1.1436, "step": 1552 }, { "epoch": 0.7570070679990251, "grad_norm": 0.33379706740379333, "learning_rate": 2.7968670432490408e-05, "loss": 1.044, "step": 1553 }, { "epoch": 0.757494516207653, "grad_norm": 0.3941287398338318, "learning_rate": 2.7861986466898395e-05, "loss": 1.1144, "step": 1554 }, { "epoch": 0.7579819644162807, "grad_norm": 0.3709263801574707, "learning_rate": 2.7755473415381517e-05, "loss": 1.0881, "step": 1555 }, { "epoch": 0.7584694126249086, "grad_norm": 0.4325391352176666, "learning_rate": 2.7649131530297388e-05, "loss": 1.1278, "step": 1556 }, { "epoch": 0.7589568608335364, "grad_norm": 0.36034736037254333, "learning_rate": 2.7542961063598104e-05, "loss": 1.1199, "step": 1557 }, { "epoch": 0.7594443090421643, "grad_norm": 0.39042332768440247, "learning_rate": 2.7436962266829715e-05, "loss": 1.1726, "step": 1558 }, { "epoch": 0.7599317572507921, "grad_norm": 0.3944944739341736, "learning_rate": 2.733113539113139e-05, "loss": 1.1469, "step": 1559 }, { "epoch": 0.76041920545942, "grad_norm": 0.3531627058982849, "learning_rate": 2.722548068723506e-05, "loss": 0.9875, "step": 1560 }, { "epoch": 0.7609066536680478, "grad_norm": 0.3481125831604004, "learning_rate": 2.711999840546472e-05, "loss": 1.2121, "step": 1561 }, { "epoch": 0.7613941018766756, "grad_norm": 0.4217544198036194, "learning_rate": 2.7014688795735898e-05, "loss": 1.1031, "step": 1562 }, { "epoch": 0.7618815500853034, "grad_norm": 0.3160876929759979, "learning_rate": 2.690955210755495e-05, "loss": 1.0948, "step": 1563 }, { "epoch": 0.7623689982939312, "grad_norm": 0.35992559790611267, "learning_rate": 2.680458859001852e-05, "loss": 1.1592, "step": 1564 }, { "epoch": 0.7628564465025591, "grad_norm": 0.34475815296173096, "learning_rate": 2.6699798491813065e-05, "loss": 1.1727, "step": 1565 }, { "epoch": 0.7633438947111869, "grad_norm": 0.4077177345752716, "learning_rate": 2.6595182061214075e-05, "loss": 1.0463, "step": 1566 }, { "epoch": 0.7638313429198148, "grad_norm": 0.35198870301246643, "learning_rate": 2.6490739546085607e-05, "loss": 0.9737, "step": 1567 }, { "epoch": 0.7643187911284426, "grad_norm": 0.39966869354248047, "learning_rate": 2.6386471193879658e-05, "loss": 1.0667, "step": 1568 }, { "epoch": 0.7648062393370705, "grad_norm": 0.4429773986339569, "learning_rate": 2.6282377251635604e-05, "loss": 1.1457, "step": 1569 }, { "epoch": 0.7652936875456983, "grad_norm": 0.39318573474884033, "learning_rate": 2.617845796597954e-05, "loss": 1.0209, "step": 1570 }, { "epoch": 0.765781135754326, "grad_norm": 0.5061572194099426, "learning_rate": 2.6074713583123866e-05, "loss": 1.1212, "step": 1571 }, { "epoch": 0.7662685839629539, "grad_norm": 0.43377435207366943, "learning_rate": 2.5971144348866494e-05, "loss": 1.2506, "step": 1572 }, { "epoch": 0.7667560321715817, "grad_norm": 0.39093247056007385, "learning_rate": 2.5867750508590382e-05, "loss": 1.1338, "step": 1573 }, { "epoch": 0.7672434803802096, "grad_norm": 0.37731918692588806, "learning_rate": 2.5764532307262934e-05, "loss": 1.1943, "step": 1574 }, { "epoch": 0.7677309285888374, "grad_norm": 0.4303186237812042, "learning_rate": 2.566148998943547e-05, "loss": 1.0712, "step": 1575 }, { "epoch": 0.7682183767974653, "grad_norm": 0.4580343961715698, "learning_rate": 2.555862379924253e-05, "loss": 0.9849, "step": 1576 }, { "epoch": 0.7687058250060931, "grad_norm": 0.4869823157787323, "learning_rate": 2.5455933980401393e-05, "loss": 1.1867, "step": 1577 }, { "epoch": 0.769193273214721, "grad_norm": 0.42554041743278503, "learning_rate": 2.5353420776211447e-05, "loss": 1.0575, "step": 1578 }, { "epoch": 0.7696807214233488, "grad_norm": 0.41002970933914185, "learning_rate": 2.525108442955364e-05, "loss": 1.0154, "step": 1579 }, { "epoch": 0.7701681696319767, "grad_norm": 0.4180346429347992, "learning_rate": 2.514892518288988e-05, "loss": 1.0725, "step": 1580 }, { "epoch": 0.7706556178406044, "grad_norm": 0.4667299687862396, "learning_rate": 2.504694327826258e-05, "loss": 0.9928, "step": 1581 }, { "epoch": 0.7711430660492322, "grad_norm": 0.34292903542518616, "learning_rate": 2.4945138957293835e-05, "loss": 0.9856, "step": 1582 }, { "epoch": 0.7716305142578601, "grad_norm": 0.39459550380706787, "learning_rate": 2.484351246118507e-05, "loss": 1.2468, "step": 1583 }, { "epoch": 0.7721179624664879, "grad_norm": 0.3772342801094055, "learning_rate": 2.474206403071644e-05, "loss": 1.1554, "step": 1584 }, { "epoch": 0.7726054106751158, "grad_norm": 0.43720147013664246, "learning_rate": 2.464079390624615e-05, "loss": 1.1343, "step": 1585 }, { "epoch": 0.7730928588837436, "grad_norm": 0.510027289390564, "learning_rate": 2.4539702327709936e-05, "loss": 1.2158, "step": 1586 }, { "epoch": 0.7735803070923715, "grad_norm": 0.41168540716171265, "learning_rate": 2.4438789534620522e-05, "loss": 1.1311, "step": 1587 }, { "epoch": 0.7740677553009992, "grad_norm": 0.3641628921031952, "learning_rate": 2.4338055766067135e-05, "loss": 1.0413, "step": 1588 }, { "epoch": 0.7745552035096271, "grad_norm": 0.33775594830513, "learning_rate": 2.4237501260714734e-05, "loss": 1.039, "step": 1589 }, { "epoch": 0.7750426517182549, "grad_norm": 0.3569764196872711, "learning_rate": 2.413712625680358e-05, "loss": 1.159, "step": 1590 }, { "epoch": 0.7755300999268828, "grad_norm": 0.46986496448516846, "learning_rate": 2.403693099214871e-05, "loss": 1.2067, "step": 1591 }, { "epoch": 0.7760175481355106, "grad_norm": 0.39282193779945374, "learning_rate": 2.393691570413924e-05, "loss": 1.1314, "step": 1592 }, { "epoch": 0.7765049963441384, "grad_norm": 0.36249709129333496, "learning_rate": 2.3837080629737884e-05, "loss": 1.0121, "step": 1593 }, { "epoch": 0.7769924445527663, "grad_norm": 0.4231277406215668, "learning_rate": 2.3737426005480414e-05, "loss": 1.0772, "step": 1594 }, { "epoch": 0.7774798927613941, "grad_norm": 0.38033294677734375, "learning_rate": 2.3637952067475043e-05, "loss": 1.1995, "step": 1595 }, { "epoch": 0.777967340970022, "grad_norm": 0.33983561396598816, "learning_rate": 2.353865905140187e-05, "loss": 1.0918, "step": 1596 }, { "epoch": 0.7784547891786497, "grad_norm": 0.448473185300827, "learning_rate": 2.343954719251241e-05, "loss": 1.0779, "step": 1597 }, { "epoch": 0.7789422373872776, "grad_norm": 0.4694695472717285, "learning_rate": 2.3340616725628926e-05, "loss": 1.0535, "step": 1598 }, { "epoch": 0.7794296855959054, "grad_norm": 0.39060917496681213, "learning_rate": 2.32418678851439e-05, "loss": 1.1766, "step": 1599 }, { "epoch": 0.7799171338045333, "grad_norm": 0.3499775528907776, "learning_rate": 2.3143300905019484e-05, "loss": 1.0203, "step": 1600 }, { "epoch": 0.7804045820131611, "grad_norm": 0.3471561074256897, "learning_rate": 2.3044916018787032e-05, "loss": 1.0232, "step": 1601 }, { "epoch": 0.7808920302217889, "grad_norm": 0.4390745759010315, "learning_rate": 2.29467134595464e-05, "loss": 1.1864, "step": 1602 }, { "epoch": 0.7813794784304168, "grad_norm": 0.41209185123443604, "learning_rate": 2.2848693459965475e-05, "loss": 1.0051, "step": 1603 }, { "epoch": 0.7818669266390446, "grad_norm": 0.38516929745674133, "learning_rate": 2.2750856252279608e-05, "loss": 1.0338, "step": 1604 }, { "epoch": 0.7823543748476725, "grad_norm": 0.42860835790634155, "learning_rate": 2.26532020682911e-05, "loss": 1.1316, "step": 1605 }, { "epoch": 0.7828418230563002, "grad_norm": 0.330546110868454, "learning_rate": 2.255573113936855e-05, "loss": 1.013, "step": 1606 }, { "epoch": 0.7833292712649281, "grad_norm": 0.39379894733428955, "learning_rate": 2.2458443696446484e-05, "loss": 1.1574, "step": 1607 }, { "epoch": 0.7838167194735559, "grad_norm": 0.4301210939884186, "learning_rate": 2.236133997002462e-05, "loss": 1.1746, "step": 1608 }, { "epoch": 0.7843041676821838, "grad_norm": 0.36347338557243347, "learning_rate": 2.226442019016739e-05, "loss": 0.9925, "step": 1609 }, { "epoch": 0.7847916158908116, "grad_norm": 0.33933982253074646, "learning_rate": 2.21676845865035e-05, "loss": 0.9947, "step": 1610 }, { "epoch": 0.7852790640994395, "grad_norm": 0.40830859541893005, "learning_rate": 2.207113338822524e-05, "loss": 1.1907, "step": 1611 }, { "epoch": 0.7857665123080673, "grad_norm": 0.4412747621536255, "learning_rate": 2.197476682408792e-05, "loss": 1.0358, "step": 1612 }, { "epoch": 0.786253960516695, "grad_norm": 0.36393675208091736, "learning_rate": 2.18785851224095e-05, "loss": 1.13, "step": 1613 }, { "epoch": 0.7867414087253229, "grad_norm": 0.409282922744751, "learning_rate": 2.1782588511069957e-05, "loss": 1.061, "step": 1614 }, { "epoch": 0.7872288569339507, "grad_norm": 0.3050946891307831, "learning_rate": 2.168677721751069e-05, "loss": 1.1321, "step": 1615 }, { "epoch": 0.7877163051425786, "grad_norm": 0.40699470043182373, "learning_rate": 2.1591151468734027e-05, "loss": 1.0106, "step": 1616 }, { "epoch": 0.7882037533512064, "grad_norm": 0.3460084795951843, "learning_rate": 2.149571149130276e-05, "loss": 1.1958, "step": 1617 }, { "epoch": 0.7886912015598343, "grad_norm": 0.4064023494720459, "learning_rate": 2.1400457511339467e-05, "loss": 0.925, "step": 1618 }, { "epoch": 0.7891786497684621, "grad_norm": 0.327619731426239, "learning_rate": 2.1305389754526074e-05, "loss": 1.0711, "step": 1619 }, { "epoch": 0.78966609797709, "grad_norm": 0.4511876106262207, "learning_rate": 2.1210508446103293e-05, "loss": 0.9372, "step": 1620 }, { "epoch": 0.7901535461857178, "grad_norm": 0.44113391637802124, "learning_rate": 2.111581381087011e-05, "loss": 1.1551, "step": 1621 }, { "epoch": 0.7906409943943457, "grad_norm": 0.4107889235019684, "learning_rate": 2.1021306073183167e-05, "loss": 1.0985, "step": 1622 }, { "epoch": 0.7911284426029734, "grad_norm": 0.3807615339756012, "learning_rate": 2.0926985456956417e-05, "loss": 1.0919, "step": 1623 }, { "epoch": 0.7916158908116012, "grad_norm": 0.3942524194717407, "learning_rate": 2.0832852185660356e-05, "loss": 1.299, "step": 1624 }, { "epoch": 0.7921033390202291, "grad_norm": 0.34323641657829285, "learning_rate": 2.073890648232164e-05, "loss": 0.9839, "step": 1625 }, { "epoch": 0.7925907872288569, "grad_norm": 0.32075735926628113, "learning_rate": 2.06451485695226e-05, "loss": 1.0878, "step": 1626 }, { "epoch": 0.7930782354374848, "grad_norm": 0.38971444964408875, "learning_rate": 2.055157866940054e-05, "loss": 0.9919, "step": 1627 }, { "epoch": 0.7935656836461126, "grad_norm": 0.3504349887371063, "learning_rate": 2.0458197003647373e-05, "loss": 1.1747, "step": 1628 }, { "epoch": 0.7940531318547405, "grad_norm": 0.35090172290802, "learning_rate": 2.0365003793509007e-05, "loss": 1.1011, "step": 1629 }, { "epoch": 0.7945405800633683, "grad_norm": 0.34960517287254333, "learning_rate": 2.0271999259784858e-05, "loss": 0.9365, "step": 1630 }, { "epoch": 0.7950280282719961, "grad_norm": 0.31795409321784973, "learning_rate": 2.0179183622827312e-05, "loss": 1.0213, "step": 1631 }, { "epoch": 0.7955154764806239, "grad_norm": 0.3856673836708069, "learning_rate": 2.0086557102541203e-05, "loss": 1.0721, "step": 1632 }, { "epoch": 0.7960029246892517, "grad_norm": 0.40313300490379333, "learning_rate": 1.9994119918383358e-05, "loss": 1.1315, "step": 1633 }, { "epoch": 0.7964903728978796, "grad_norm": 0.37088483572006226, "learning_rate": 1.9901872289361935e-05, "loss": 1.1286, "step": 1634 }, { "epoch": 0.7969778211065074, "grad_norm": 0.3714759945869446, "learning_rate": 1.9809814434036e-05, "loss": 1.1514, "step": 1635 }, { "epoch": 0.7974652693151353, "grad_norm": 0.40931692719459534, "learning_rate": 1.9717946570515066e-05, "loss": 1.1017, "step": 1636 }, { "epoch": 0.7979527175237631, "grad_norm": 0.3661656677722931, "learning_rate": 1.962626891645841e-05, "loss": 1.0751, "step": 1637 }, { "epoch": 0.798440165732391, "grad_norm": 0.38110336661338806, "learning_rate": 1.9534781689074722e-05, "loss": 1.1106, "step": 1638 }, { "epoch": 0.7989276139410187, "grad_norm": 0.3157220780849457, "learning_rate": 1.944348510512147e-05, "loss": 1.0537, "step": 1639 }, { "epoch": 0.7994150621496466, "grad_norm": 0.40534940361976624, "learning_rate": 1.9352379380904473e-05, "loss": 1.0117, "step": 1640 }, { "epoch": 0.7999025103582744, "grad_norm": 0.3753153681755066, "learning_rate": 1.9261464732277334e-05, "loss": 1.1414, "step": 1641 }, { "epoch": 0.8003899585669023, "grad_norm": 0.4216148257255554, "learning_rate": 1.9170741374640933e-05, "loss": 1.1454, "step": 1642 }, { "epoch": 0.8008774067755301, "grad_norm": 0.4276210367679596, "learning_rate": 1.9080209522942992e-05, "loss": 1.065, "step": 1643 }, { "epoch": 0.8013648549841579, "grad_norm": 0.4728935956954956, "learning_rate": 1.8989869391677462e-05, "loss": 1.1932, "step": 1644 }, { "epoch": 0.8018523031927858, "grad_norm": 0.3567184507846832, "learning_rate": 1.8899721194884035e-05, "loss": 1.0522, "step": 1645 }, { "epoch": 0.8023397514014136, "grad_norm": 0.3507539629936218, "learning_rate": 1.8809765146147697e-05, "loss": 1.0161, "step": 1646 }, { "epoch": 0.8028271996100415, "grad_norm": 0.3791554868221283, "learning_rate": 1.872000145859816e-05, "loss": 1.0541, "step": 1647 }, { "epoch": 0.8033146478186692, "grad_norm": 0.38745689392089844, "learning_rate": 1.863043034490938e-05, "loss": 1.1743, "step": 1648 }, { "epoch": 0.8038020960272971, "grad_norm": 0.3388438820838928, "learning_rate": 1.854105201729912e-05, "loss": 1.0733, "step": 1649 }, { "epoch": 0.8042895442359249, "grad_norm": 0.38149645924568176, "learning_rate": 1.8451866687528284e-05, "loss": 1.185, "step": 1650 }, { "epoch": 0.8047769924445528, "grad_norm": 0.39097803831100464, "learning_rate": 1.8362874566900556e-05, "loss": 1.034, "step": 1651 }, { "epoch": 0.8052644406531806, "grad_norm": 0.4000914990901947, "learning_rate": 1.8274075866261896e-05, "loss": 1.143, "step": 1652 }, { "epoch": 0.8057518888618085, "grad_norm": 0.40363654494285583, "learning_rate": 1.818547079599995e-05, "loss": 1.0637, "step": 1653 }, { "epoch": 0.8062393370704363, "grad_norm": 0.352938175201416, "learning_rate": 1.8097059566043595e-05, "loss": 1.0713, "step": 1654 }, { "epoch": 0.806726785279064, "grad_norm": 0.37018442153930664, "learning_rate": 1.800884238586248e-05, "loss": 1.0661, "step": 1655 }, { "epoch": 0.807214233487692, "grad_norm": 0.4333856701850891, "learning_rate": 1.7920819464466486e-05, "loss": 1.1136, "step": 1656 }, { "epoch": 0.8077016816963197, "grad_norm": 0.4050823748111725, "learning_rate": 1.7832991010405243e-05, "loss": 1.0066, "step": 1657 }, { "epoch": 0.8081891299049476, "grad_norm": 0.3594289720058441, "learning_rate": 1.7745357231767622e-05, "loss": 1.0689, "step": 1658 }, { "epoch": 0.8086765781135754, "grad_norm": 0.2837337255477905, "learning_rate": 1.7657918336181324e-05, "loss": 1.0588, "step": 1659 }, { "epoch": 0.8091640263222033, "grad_norm": 0.41181838512420654, "learning_rate": 1.7570674530812224e-05, "loss": 1.1877, "step": 1660 }, { "epoch": 0.8096514745308311, "grad_norm": 0.3461904525756836, "learning_rate": 1.748362602236403e-05, "loss": 1.1245, "step": 1661 }, { "epoch": 0.810138922739459, "grad_norm": 0.486634761095047, "learning_rate": 1.7396773017077748e-05, "loss": 1.1609, "step": 1662 }, { "epoch": 0.8106263709480868, "grad_norm": 0.33511972427368164, "learning_rate": 1.731011572073117e-05, "loss": 1.0115, "step": 1663 }, { "epoch": 0.8111138191567145, "grad_norm": 0.5276228785514832, "learning_rate": 1.7223654338638385e-05, "loss": 1.0923, "step": 1664 }, { "epoch": 0.8116012673653424, "grad_norm": 0.4225917160511017, "learning_rate": 1.7137389075649335e-05, "loss": 1.1583, "step": 1665 }, { "epoch": 0.8120887155739702, "grad_norm": 0.3635517954826355, "learning_rate": 1.7051320136149286e-05, "loss": 1.0727, "step": 1666 }, { "epoch": 0.8125761637825981, "grad_norm": 0.3625679314136505, "learning_rate": 1.696544772405836e-05, "loss": 0.9549, "step": 1667 }, { "epoch": 0.8130636119912259, "grad_norm": 0.3724707365036011, "learning_rate": 1.6879772042831065e-05, "loss": 1.0298, "step": 1668 }, { "epoch": 0.8135510601998538, "grad_norm": 0.4418652057647705, "learning_rate": 1.679429329545582e-05, "loss": 1.1513, "step": 1669 }, { "epoch": 0.8140385084084816, "grad_norm": 0.4359528422355652, "learning_rate": 1.6709011684454435e-05, "loss": 1.0941, "step": 1670 }, { "epoch": 0.8145259566171095, "grad_norm": 0.3722599148750305, "learning_rate": 1.662392741188161e-05, "loss": 1.1031, "step": 1671 }, { "epoch": 0.8150134048257373, "grad_norm": 0.37397316098213196, "learning_rate": 1.6539040679324623e-05, "loss": 1.1302, "step": 1672 }, { "epoch": 0.8155008530343651, "grad_norm": 0.3645596206188202, "learning_rate": 1.6454351687902557e-05, "loss": 1.0137, "step": 1673 }, { "epoch": 0.8159883012429929, "grad_norm": 0.38736292719841003, "learning_rate": 1.63698606382661e-05, "loss": 1.0888, "step": 1674 }, { "epoch": 0.8164757494516207, "grad_norm": 0.3472338318824768, "learning_rate": 1.6285567730596974e-05, "loss": 0.9184, "step": 1675 }, { "epoch": 0.8169631976602486, "grad_norm": 0.36622050404548645, "learning_rate": 1.6201473164607396e-05, "loss": 0.8993, "step": 1676 }, { "epoch": 0.8174506458688764, "grad_norm": 0.3877585232257843, "learning_rate": 1.6117577139539676e-05, "loss": 1.1695, "step": 1677 }, { "epoch": 0.8179380940775043, "grad_norm": 0.38888975977897644, "learning_rate": 1.6033879854165766e-05, "loss": 1.1307, "step": 1678 }, { "epoch": 0.8184255422861321, "grad_norm": 0.415061354637146, "learning_rate": 1.5950381506786714e-05, "loss": 1.0329, "step": 1679 }, { "epoch": 0.81891299049476, "grad_norm": 0.34671249985694885, "learning_rate": 1.5867082295232216e-05, "loss": 1.0966, "step": 1680 }, { "epoch": 0.8194004387033877, "grad_norm": 0.45664656162261963, "learning_rate": 1.5783982416860198e-05, "loss": 1.0137, "step": 1681 }, { "epoch": 0.8198878869120156, "grad_norm": 0.4034433960914612, "learning_rate": 1.5701082068556304e-05, "loss": 1.1861, "step": 1682 }, { "epoch": 0.8203753351206434, "grad_norm": 0.3839215636253357, "learning_rate": 1.561838144673341e-05, "loss": 1.2001, "step": 1683 }, { "epoch": 0.8208627833292713, "grad_norm": 0.330584317445755, "learning_rate": 1.553588074733121e-05, "loss": 1.1973, "step": 1684 }, { "epoch": 0.8213502315378991, "grad_norm": 0.3817874491214752, "learning_rate": 1.5453580165815762e-05, "loss": 0.9829, "step": 1685 }, { "epoch": 0.8218376797465269, "grad_norm": 0.3511894941329956, "learning_rate": 1.537147989717893e-05, "loss": 1.1028, "step": 1686 }, { "epoch": 0.8223251279551548, "grad_norm": 0.38406670093536377, "learning_rate": 1.528958013593801e-05, "loss": 1.0144, "step": 1687 }, { "epoch": 0.8228125761637826, "grad_norm": 0.4310559630393982, "learning_rate": 1.5207881076135267e-05, "loss": 1.0646, "step": 1688 }, { "epoch": 0.8233000243724105, "grad_norm": 0.3396536111831665, "learning_rate": 1.5126382911337422e-05, "loss": 1.1363, "step": 1689 }, { "epoch": 0.8237874725810382, "grad_norm": 0.3851539194583893, "learning_rate": 1.5045085834635231e-05, "loss": 1.0915, "step": 1690 }, { "epoch": 0.8242749207896661, "grad_norm": 0.4351629614830017, "learning_rate": 1.4963990038643005e-05, "loss": 1.0605, "step": 1691 }, { "epoch": 0.8247623689982939, "grad_norm": 0.3781146705150604, "learning_rate": 1.4883095715498185e-05, "loss": 1.1243, "step": 1692 }, { "epoch": 0.8252498172069218, "grad_norm": 0.41910192370414734, "learning_rate": 1.480240305686087e-05, "loss": 1.1733, "step": 1693 }, { "epoch": 0.8257372654155496, "grad_norm": 0.4027787148952484, "learning_rate": 1.4721912253913328e-05, "loss": 1.0486, "step": 1694 }, { "epoch": 0.8262247136241774, "grad_norm": 0.4166243374347687, "learning_rate": 1.4641623497359658e-05, "loss": 1.1038, "step": 1695 }, { "epoch": 0.8267121618328053, "grad_norm": 0.3793354630470276, "learning_rate": 1.4561536977425172e-05, "loss": 1.1123, "step": 1696 }, { "epoch": 0.827199610041433, "grad_norm": 0.36616671085357666, "learning_rate": 1.4481652883856056e-05, "loss": 1.2744, "step": 1697 }, { "epoch": 0.827687058250061, "grad_norm": 0.4479994475841522, "learning_rate": 1.4401971405918957e-05, "loss": 1.2132, "step": 1698 }, { "epoch": 0.8281745064586887, "grad_norm": 0.35453662276268005, "learning_rate": 1.432249273240035e-05, "loss": 1.109, "step": 1699 }, { "epoch": 0.8286619546673166, "grad_norm": 0.34129640460014343, "learning_rate": 1.4243217051606283e-05, "loss": 1.0581, "step": 1700 }, { "epoch": 0.8291494028759444, "grad_norm": 0.4323829710483551, "learning_rate": 1.4164144551361902e-05, "loss": 1.0317, "step": 1701 }, { "epoch": 0.8296368510845723, "grad_norm": 0.43410375714302063, "learning_rate": 1.40852754190109e-05, "loss": 1.0699, "step": 1702 }, { "epoch": 0.8301242992932001, "grad_norm": 0.42333585023880005, "learning_rate": 1.400660984141513e-05, "loss": 1.1351, "step": 1703 }, { "epoch": 0.830611747501828, "grad_norm": 0.4202088415622711, "learning_rate": 1.3928148004954244e-05, "loss": 1.1727, "step": 1704 }, { "epoch": 0.8310991957104558, "grad_norm": 0.3802170157432556, "learning_rate": 1.3849890095525108e-05, "loss": 1.0649, "step": 1705 }, { "epoch": 0.8315866439190835, "grad_norm": 0.40519291162490845, "learning_rate": 1.377183629854144e-05, "loss": 1.1673, "step": 1706 }, { "epoch": 0.8320740921277114, "grad_norm": 0.3862631618976593, "learning_rate": 1.369398679893339e-05, "loss": 1.1161, "step": 1707 }, { "epoch": 0.8325615403363392, "grad_norm": 0.3797990083694458, "learning_rate": 1.3616341781147046e-05, "loss": 1.1403, "step": 1708 }, { "epoch": 0.8330489885449671, "grad_norm": 0.3590017259120941, "learning_rate": 1.3538901429144057e-05, "loss": 1.2033, "step": 1709 }, { "epoch": 0.8335364367535949, "grad_norm": 0.39021775126457214, "learning_rate": 1.3461665926401091e-05, "loss": 1.0198, "step": 1710 }, { "epoch": 0.8340238849622228, "grad_norm": 0.4277026951313019, "learning_rate": 1.3384635455909588e-05, "loss": 1.1402, "step": 1711 }, { "epoch": 0.8345113331708506, "grad_norm": 0.3869093358516693, "learning_rate": 1.330781020017513e-05, "loss": 1.0954, "step": 1712 }, { "epoch": 0.8349987813794785, "grad_norm": 0.5378305315971375, "learning_rate": 1.3231190341217081e-05, "loss": 1.1328, "step": 1713 }, { "epoch": 0.8354862295881063, "grad_norm": 0.4125831127166748, "learning_rate": 1.3154776060568252e-05, "loss": 1.0873, "step": 1714 }, { "epoch": 0.8359736777967341, "grad_norm": 0.36874914169311523, "learning_rate": 1.3078567539274288e-05, "loss": 1.0784, "step": 1715 }, { "epoch": 0.8364611260053619, "grad_norm": 0.37857338786125183, "learning_rate": 1.3002564957893393e-05, "loss": 1.1726, "step": 1716 }, { "epoch": 0.8369485742139897, "grad_norm": 0.347371369600296, "learning_rate": 1.2926768496495811e-05, "loss": 1.1752, "step": 1717 }, { "epoch": 0.8374360224226176, "grad_norm": 0.3918173909187317, "learning_rate": 1.2851178334663471e-05, "loss": 1.1114, "step": 1718 }, { "epoch": 0.8379234706312454, "grad_norm": 0.3389439880847931, "learning_rate": 1.277579465148946e-05, "loss": 1.0704, "step": 1719 }, { "epoch": 0.8384109188398733, "grad_norm": 0.42370593547821045, "learning_rate": 1.270061762557776e-05, "loss": 1.1535, "step": 1720 }, { "epoch": 0.8388983670485011, "grad_norm": 0.3641960620880127, "learning_rate": 1.2625647435042654e-05, "loss": 1.1482, "step": 1721 }, { "epoch": 0.839385815257129, "grad_norm": 0.3834351599216461, "learning_rate": 1.2550884257508389e-05, "loss": 1.031, "step": 1722 }, { "epoch": 0.8398732634657567, "grad_norm": 0.3766709566116333, "learning_rate": 1.2476328270108716e-05, "loss": 1.0101, "step": 1723 }, { "epoch": 0.8403607116743846, "grad_norm": 0.3967483341693878, "learning_rate": 1.2401979649486595e-05, "loss": 1.1647, "step": 1724 }, { "epoch": 0.8408481598830124, "grad_norm": 0.3371667265892029, "learning_rate": 1.2327838571793604e-05, "loss": 1.0442, "step": 1725 }, { "epoch": 0.8413356080916402, "grad_norm": 0.5255165100097656, "learning_rate": 1.2253905212689553e-05, "loss": 1.161, "step": 1726 }, { "epoch": 0.8418230563002681, "grad_norm": 0.3469902575016022, "learning_rate": 1.2180179747342213e-05, "loss": 1.0652, "step": 1727 }, { "epoch": 0.8423105045088959, "grad_norm": 0.40540820360183716, "learning_rate": 1.2106662350426746e-05, "loss": 1.1305, "step": 1728 }, { "epoch": 0.8427979527175238, "grad_norm": 0.3705641031265259, "learning_rate": 1.203335319612533e-05, "loss": 1.0239, "step": 1729 }, { "epoch": 0.8432854009261516, "grad_norm": 0.3371503949165344, "learning_rate": 1.196025245812682e-05, "loss": 1.0642, "step": 1730 }, { "epoch": 0.8437728491347795, "grad_norm": 0.42029133439064026, "learning_rate": 1.1887360309626227e-05, "loss": 1.0277, "step": 1731 }, { "epoch": 0.8442602973434072, "grad_norm": 0.42342185974121094, "learning_rate": 1.1814676923324364e-05, "loss": 1.1448, "step": 1732 }, { "epoch": 0.8447477455520351, "grad_norm": 0.39126384258270264, "learning_rate": 1.1742202471427443e-05, "loss": 1.0664, "step": 1733 }, { "epoch": 0.8452351937606629, "grad_norm": 0.40211716294288635, "learning_rate": 1.1669937125646646e-05, "loss": 1.0426, "step": 1734 }, { "epoch": 0.8457226419692908, "grad_norm": 0.3491050899028778, "learning_rate": 1.1597881057197735e-05, "loss": 1.1108, "step": 1735 }, { "epoch": 0.8462100901779186, "grad_norm": 0.4353748559951782, "learning_rate": 1.1526034436800614e-05, "loss": 1.0399, "step": 1736 }, { "epoch": 0.8466975383865464, "grad_norm": 0.4242027699947357, "learning_rate": 1.1454397434679021e-05, "loss": 0.9967, "step": 1737 }, { "epoch": 0.8471849865951743, "grad_norm": 0.390888512134552, "learning_rate": 1.1382970220559963e-05, "loss": 1.1165, "step": 1738 }, { "epoch": 0.847672434803802, "grad_norm": 0.36227092146873474, "learning_rate": 1.1311752963673439e-05, "loss": 1.0547, "step": 1739 }, { "epoch": 0.84815988301243, "grad_norm": 0.37730830907821655, "learning_rate": 1.1240745832752042e-05, "loss": 1.1506, "step": 1740 }, { "epoch": 0.8486473312210577, "grad_norm": 0.38817811012268066, "learning_rate": 1.1169948996030467e-05, "loss": 1.282, "step": 1741 }, { "epoch": 0.8491347794296856, "grad_norm": 0.45126134157180786, "learning_rate": 1.109936262124518e-05, "loss": 1.2197, "step": 1742 }, { "epoch": 0.8496222276383134, "grad_norm": 0.3940946161746979, "learning_rate": 1.1028986875634028e-05, "loss": 0.9121, "step": 1743 }, { "epoch": 0.8501096758469413, "grad_norm": 0.4234504699707031, "learning_rate": 1.095882192593579e-05, "loss": 1.1229, "step": 1744 }, { "epoch": 0.8505971240555691, "grad_norm": 0.3620837330818176, "learning_rate": 1.0888867938389813e-05, "loss": 1.0812, "step": 1745 }, { "epoch": 0.851084572264197, "grad_norm": 0.3606151342391968, "learning_rate": 1.0819125078735681e-05, "loss": 1.1089, "step": 1746 }, { "epoch": 0.8515720204728248, "grad_norm": 0.4045717716217041, "learning_rate": 1.0749593512212697e-05, "loss": 1.0729, "step": 1747 }, { "epoch": 0.8520594686814525, "grad_norm": 0.524031937122345, "learning_rate": 1.068027340355956e-05, "loss": 1.1044, "step": 1748 }, { "epoch": 0.8525469168900804, "grad_norm": 0.3646783232688904, "learning_rate": 1.0611164917013972e-05, "loss": 1.0587, "step": 1749 }, { "epoch": 0.8530343650987082, "grad_norm": 0.3999814987182617, "learning_rate": 1.054226821631229e-05, "loss": 1.1673, "step": 1750 }, { "epoch": 0.8535218133073361, "grad_norm": 0.37726038694381714, "learning_rate": 1.0473583464689074e-05, "loss": 1.158, "step": 1751 }, { "epoch": 0.8540092615159639, "grad_norm": 0.3894549310207367, "learning_rate": 1.0405110824876619e-05, "loss": 1.1205, "step": 1752 }, { "epoch": 0.8544967097245918, "grad_norm": 0.4052984416484833, "learning_rate": 1.0336850459104852e-05, "loss": 1.0923, "step": 1753 }, { "epoch": 0.8549841579332196, "grad_norm": 0.36014947295188904, "learning_rate": 1.0268802529100619e-05, "loss": 1.0199, "step": 1754 }, { "epoch": 0.8554716061418475, "grad_norm": 0.38177689909935, "learning_rate": 1.0200967196087508e-05, "loss": 1.1592, "step": 1755 }, { "epoch": 0.8559590543504753, "grad_norm": 0.39407598972320557, "learning_rate": 1.0133344620785435e-05, "loss": 1.0895, "step": 1756 }, { "epoch": 0.856446502559103, "grad_norm": 0.37637194991111755, "learning_rate": 1.0065934963410173e-05, "loss": 1.1463, "step": 1757 }, { "epoch": 0.8569339507677309, "grad_norm": 0.4099627733230591, "learning_rate": 9.99873838367309e-06, "loss": 1.2048, "step": 1758 }, { "epoch": 0.8574213989763587, "grad_norm": 0.3457520306110382, "learning_rate": 9.931755040780676e-06, "loss": 1.0743, "step": 1759 }, { "epoch": 0.8579088471849866, "grad_norm": 0.5198414921760559, "learning_rate": 9.864985093434243e-06, "loss": 1.202, "step": 1760 }, { "epoch": 0.8583962953936144, "grad_norm": 0.35789576172828674, "learning_rate": 9.798428699829476e-06, "loss": 0.9899, "step": 1761 }, { "epoch": 0.8588837436022423, "grad_norm": 0.36002370715141296, "learning_rate": 9.732086017656117e-06, "loss": 1.0878, "step": 1762 }, { "epoch": 0.8593711918108701, "grad_norm": 0.3624090254306793, "learning_rate": 9.665957204097575e-06, "loss": 1.0194, "step": 1763 }, { "epoch": 0.859858640019498, "grad_norm": 0.3344210982322693, "learning_rate": 9.600042415830535e-06, "loss": 1.0616, "step": 1764 }, { "epoch": 0.8603460882281257, "grad_norm": 0.3071996867656708, "learning_rate": 9.534341809024583e-06, "loss": 1.0228, "step": 1765 }, { "epoch": 0.8608335364367536, "grad_norm": 0.34209540486335754, "learning_rate": 9.468855539341904e-06, "loss": 1.0505, "step": 1766 }, { "epoch": 0.8613209846453814, "grad_norm": 0.4070102572441101, "learning_rate": 9.403583761936806e-06, "loss": 1.0844, "step": 1767 }, { "epoch": 0.8618084328540092, "grad_norm": 0.3737240731716156, "learning_rate": 9.338526631455447e-06, "loss": 1.006, "step": 1768 }, { "epoch": 0.8622958810626371, "grad_norm": 0.3421536684036255, "learning_rate": 9.273684302035402e-06, "loss": 1.0558, "step": 1769 }, { "epoch": 0.8627833292712649, "grad_norm": 0.3916158676147461, "learning_rate": 9.209056927305337e-06, "loss": 1.0472, "step": 1770 }, { "epoch": 0.8632707774798928, "grad_norm": 0.3504614233970642, "learning_rate": 9.144644660384615e-06, "loss": 1.2017, "step": 1771 }, { "epoch": 0.8637582256885206, "grad_norm": 0.3385731279850006, "learning_rate": 9.080447653883007e-06, "loss": 1.1729, "step": 1772 }, { "epoch": 0.8642456738971485, "grad_norm": 0.40618082880973816, "learning_rate": 9.01646605990022e-06, "loss": 0.998, "step": 1773 }, { "epoch": 0.8647331221057762, "grad_norm": 0.3386068046092987, "learning_rate": 8.952700030025597e-06, "loss": 0.9828, "step": 1774 }, { "epoch": 0.8652205703144041, "grad_norm": 0.34959864616394043, "learning_rate": 8.889149715337774e-06, "loss": 0.9908, "step": 1775 }, { "epoch": 0.8657080185230319, "grad_norm": 0.3689625859260559, "learning_rate": 8.825815266404302e-06, "loss": 1.1452, "step": 1776 }, { "epoch": 0.8661954667316598, "grad_norm": 0.35879069566726685, "learning_rate": 8.76269683328127e-06, "loss": 1.0694, "step": 1777 }, { "epoch": 0.8666829149402876, "grad_norm": 0.41808176040649414, "learning_rate": 8.699794565512975e-06, "loss": 1.0624, "step": 1778 }, { "epoch": 0.8671703631489154, "grad_norm": 0.4924822449684143, "learning_rate": 8.637108612131572e-06, "loss": 1.1, "step": 1779 }, { "epoch": 0.8676578113575433, "grad_norm": 0.3990834057331085, "learning_rate": 8.57463912165668e-06, "loss": 1.0657, "step": 1780 }, { "epoch": 0.8681452595661711, "grad_norm": 0.4121594727039337, "learning_rate": 8.512386242095083e-06, "loss": 1.0388, "step": 1781 }, { "epoch": 0.868632707774799, "grad_norm": 0.33079513907432556, "learning_rate": 8.450350120940365e-06, "loss": 1.0413, "step": 1782 }, { "epoch": 0.8691201559834267, "grad_norm": 0.38366904854774475, "learning_rate": 8.388530905172553e-06, "loss": 1.1262, "step": 1783 }, { "epoch": 0.8696076041920546, "grad_norm": 0.40860646963119507, "learning_rate": 8.32692874125771e-06, "loss": 1.0713, "step": 1784 }, { "epoch": 0.8700950524006824, "grad_norm": 0.36312568187713623, "learning_rate": 8.265543775147767e-06, "loss": 1.1937, "step": 1785 }, { "epoch": 0.8705825006093103, "grad_norm": 0.36420392990112305, "learning_rate": 8.204376152279914e-06, "loss": 1.0061, "step": 1786 }, { "epoch": 0.8710699488179381, "grad_norm": 0.35609522461891174, "learning_rate": 8.14342601757645e-06, "loss": 1.1563, "step": 1787 }, { "epoch": 0.8715573970265659, "grad_norm": 0.3678031265735626, "learning_rate": 8.082693515444462e-06, "loss": 0.9554, "step": 1788 }, { "epoch": 0.8720448452351938, "grad_norm": 0.42413562536239624, "learning_rate": 8.022178789775315e-06, "loss": 1.0543, "step": 1789 }, { "epoch": 0.8725322934438215, "grad_norm": 0.3686645030975342, "learning_rate": 7.961881983944431e-06, "loss": 1.0988, "step": 1790 }, { "epoch": 0.8730197416524494, "grad_norm": 0.4381706714630127, "learning_rate": 7.9018032408109e-06, "loss": 1.0733, "step": 1791 }, { "epoch": 0.8735071898610772, "grad_norm": 0.4402759075164795, "learning_rate": 7.841942702717253e-06, "loss": 1.1313, "step": 1792 }, { "epoch": 0.8739946380697051, "grad_norm": 0.4035143256187439, "learning_rate": 7.782300511488928e-06, "loss": 0.9872, "step": 1793 }, { "epoch": 0.8744820862783329, "grad_norm": 0.3443876802921295, "learning_rate": 7.722876808434087e-06, "loss": 1.0903, "step": 1794 }, { "epoch": 0.8749695344869608, "grad_norm": 0.4076747000217438, "learning_rate": 7.663671734343247e-06, "loss": 1.1202, "step": 1795 }, { "epoch": 0.8754569826955886, "grad_norm": 0.39271125197410583, "learning_rate": 7.6046854294889e-06, "loss": 1.1263, "step": 1796 }, { "epoch": 0.8759444309042165, "grad_norm": 0.4301184117794037, "learning_rate": 7.545918033625254e-06, "loss": 1.2152, "step": 1797 }, { "epoch": 0.8764318791128443, "grad_norm": 0.36459746956825256, "learning_rate": 7.487369685987844e-06, "loss": 1.1336, "step": 1798 }, { "epoch": 0.876919327321472, "grad_norm": 0.4065280556678772, "learning_rate": 7.42904052529324e-06, "loss": 1.1319, "step": 1799 }, { "epoch": 0.8774067755300999, "grad_norm": 0.37590742111206055, "learning_rate": 7.370930689738642e-06, "loss": 1.0246, "step": 1800 }, { "epoch": 0.8778942237387277, "grad_norm": 0.40792787075042725, "learning_rate": 7.3130403170016955e-06, "loss": 1.0296, "step": 1801 }, { "epoch": 0.8783816719473556, "grad_norm": 0.35659080743789673, "learning_rate": 7.255369544240021e-06, "loss": 1.008, "step": 1802 }, { "epoch": 0.8788691201559834, "grad_norm": 0.36563870310783386, "learning_rate": 7.197918508090973e-06, "loss": 1.0822, "step": 1803 }, { "epoch": 0.8793565683646113, "grad_norm": 0.3770640194416046, "learning_rate": 7.140687344671282e-06, "loss": 1.1595, "step": 1804 }, { "epoch": 0.8798440165732391, "grad_norm": 0.45971986651420593, "learning_rate": 7.0836761895767265e-06, "loss": 1.1517, "step": 1805 }, { "epoch": 0.880331464781867, "grad_norm": 0.32596591114997864, "learning_rate": 7.02688517788187e-06, "loss": 1.0074, "step": 1806 }, { "epoch": 0.8808189129904948, "grad_norm": 0.3988722562789917, "learning_rate": 6.970314444139636e-06, "loss": 1.1779, "step": 1807 }, { "epoch": 0.8813063611991226, "grad_norm": 0.3868488073348999, "learning_rate": 6.913964122381134e-06, "loss": 1.0499, "step": 1808 }, { "epoch": 0.8817938094077504, "grad_norm": 0.3938734531402588, "learning_rate": 6.8578343461151885e-06, "loss": 0.9922, "step": 1809 }, { "epoch": 0.8822812576163782, "grad_norm": 0.412203311920166, "learning_rate": 6.8019252483281e-06, "loss": 1.0873, "step": 1810 }, { "epoch": 0.8827687058250061, "grad_norm": 0.4144209027290344, "learning_rate": 6.746236961483399e-06, "loss": 1.0318, "step": 1811 }, { "epoch": 0.8832561540336339, "grad_norm": 0.4311385452747345, "learning_rate": 6.690769617521342e-06, "loss": 1.2014, "step": 1812 }, { "epoch": 0.8837436022422618, "grad_norm": 0.4309900104999542, "learning_rate": 6.635523347858763e-06, "loss": 1.0218, "step": 1813 }, { "epoch": 0.8842310504508896, "grad_norm": 0.43849343061447144, "learning_rate": 6.580498283388758e-06, "loss": 1.1611, "step": 1814 }, { "epoch": 0.8847184986595175, "grad_norm": 0.37283089756965637, "learning_rate": 6.525694554480277e-06, "loss": 1.0965, "step": 1815 }, { "epoch": 0.8852059468681452, "grad_norm": 0.3518564999103546, "learning_rate": 6.471112290977877e-06, "loss": 1.1687, "step": 1816 }, { "epoch": 0.8856933950767731, "grad_norm": 0.3284810483455658, "learning_rate": 6.416751622201389e-06, "loss": 1.0962, "step": 1817 }, { "epoch": 0.8861808432854009, "grad_norm": 0.39844444394111633, "learning_rate": 6.362612676945678e-06, "loss": 0.9922, "step": 1818 }, { "epoch": 0.8866682914940287, "grad_norm": 0.3707285523414612, "learning_rate": 6.308695583480251e-06, "loss": 1.0288, "step": 1819 }, { "epoch": 0.8871557397026566, "grad_norm": 0.3834349513053894, "learning_rate": 6.2550004695489775e-06, "loss": 1.169, "step": 1820 }, { "epoch": 0.8876431879112844, "grad_norm": 0.37593066692352295, "learning_rate": 6.201527462369827e-06, "loss": 0.9882, "step": 1821 }, { "epoch": 0.8881306361199123, "grad_norm": 0.3793316185474396, "learning_rate": 6.1482766886345134e-06, "loss": 1.0577, "step": 1822 }, { "epoch": 0.8886180843285401, "grad_norm": 0.2997931241989136, "learning_rate": 6.095248274508236e-06, "loss": 0.9785, "step": 1823 }, { "epoch": 0.889105532537168, "grad_norm": 0.3628780245780945, "learning_rate": 6.042442345629384e-06, "loss": 1.0471, "step": 1824 }, { "epoch": 0.8895929807457957, "grad_norm": 0.37528806924819946, "learning_rate": 5.989859027109179e-06, "loss": 1.1316, "step": 1825 }, { "epoch": 0.8900804289544236, "grad_norm": 0.3268503248691559, "learning_rate": 5.937498443531442e-06, "loss": 0.9988, "step": 1826 }, { "epoch": 0.8905678771630514, "grad_norm": 0.4406483471393585, "learning_rate": 5.885360718952293e-06, "loss": 1.0693, "step": 1827 }, { "epoch": 0.8910553253716793, "grad_norm": 0.37791526317596436, "learning_rate": 5.83344597689981e-06, "loss": 1.044, "step": 1828 }, { "epoch": 0.8915427735803071, "grad_norm": 0.35093453526496887, "learning_rate": 5.78175434037378e-06, "loss": 0.9856, "step": 1829 }, { "epoch": 0.8920302217889349, "grad_norm": 0.39958176016807556, "learning_rate": 5.73028593184538e-06, "loss": 1.0952, "step": 1830 }, { "epoch": 0.8925176699975628, "grad_norm": 0.34690409898757935, "learning_rate": 5.679040873256924e-06, "loss": 1.0718, "step": 1831 }, { "epoch": 0.8930051182061906, "grad_norm": 0.3513603210449219, "learning_rate": 5.628019286021535e-06, "loss": 1.0157, "step": 1832 }, { "epoch": 0.8934925664148184, "grad_norm": 0.40844032168388367, "learning_rate": 5.5772212910228606e-06, "loss": 1.0604, "step": 1833 }, { "epoch": 0.8939800146234462, "grad_norm": 0.3466549813747406, "learning_rate": 5.526647008614849e-06, "loss": 1.1159, "step": 1834 }, { "epoch": 0.8944674628320741, "grad_norm": 0.37515002489089966, "learning_rate": 5.476296558621363e-06, "loss": 1.0628, "step": 1835 }, { "epoch": 0.8949549110407019, "grad_norm": 0.34914758801460266, "learning_rate": 5.426170060335944e-06, "loss": 0.9619, "step": 1836 }, { "epoch": 0.8954423592493298, "grad_norm": 0.38144829869270325, "learning_rate": 5.376267632521592e-06, "loss": 1.157, "step": 1837 }, { "epoch": 0.8959298074579576, "grad_norm": 0.3572610318660736, "learning_rate": 5.326589393410386e-06, "loss": 1.0672, "step": 1838 }, { "epoch": 0.8964172556665855, "grad_norm": 0.46000754833221436, "learning_rate": 5.277135460703198e-06, "loss": 1.1727, "step": 1839 }, { "epoch": 0.8969047038752133, "grad_norm": 0.3217918276786804, "learning_rate": 5.2279059515695336e-06, "loss": 0.9638, "step": 1840 }, { "epoch": 0.897392152083841, "grad_norm": 0.38089826703071594, "learning_rate": 5.17890098264715e-06, "loss": 0.99, "step": 1841 }, { "epoch": 0.8978796002924689, "grad_norm": 0.3441983759403229, "learning_rate": 5.1301206700417935e-06, "loss": 1.0239, "step": 1842 }, { "epoch": 0.8983670485010967, "grad_norm": 0.4059714078903198, "learning_rate": 5.08156512932696e-06, "loss": 1.112, "step": 1843 }, { "epoch": 0.8988544967097246, "grad_norm": 0.42819744348526, "learning_rate": 5.033234475543613e-06, "loss": 1.2109, "step": 1844 }, { "epoch": 0.8993419449183524, "grad_norm": 0.34663084149360657, "learning_rate": 4.985128823199858e-06, "loss": 0.9653, "step": 1845 }, { "epoch": 0.8998293931269803, "grad_norm": 0.41473668813705444, "learning_rate": 4.937248286270757e-06, "loss": 0.9576, "step": 1846 }, { "epoch": 0.9003168413356081, "grad_norm": 0.3552911877632141, "learning_rate": 4.8895929781979765e-06, "loss": 1.0767, "step": 1847 }, { "epoch": 0.900804289544236, "grad_norm": 0.3645229637622833, "learning_rate": 4.8421630118895775e-06, "loss": 1.1575, "step": 1848 }, { "epoch": 0.9012917377528638, "grad_norm": 0.38062170147895813, "learning_rate": 4.794958499719704e-06, "loss": 1.0531, "step": 1849 }, { "epoch": 0.9017791859614915, "grad_norm": 0.3905470073223114, "learning_rate": 4.747979553528404e-06, "loss": 0.9985, "step": 1850 }, { "epoch": 0.9022666341701194, "grad_norm": 0.39329779148101807, "learning_rate": 4.701226284621218e-06, "loss": 1.0583, "step": 1851 }, { "epoch": 0.9027540823787472, "grad_norm": 0.34848731756210327, "learning_rate": 4.654698803769031e-06, "loss": 1.0841, "step": 1852 }, { "epoch": 0.9032415305873751, "grad_norm": 0.3833371102809906, "learning_rate": 4.608397221207794e-06, "loss": 1.0772, "step": 1853 }, { "epoch": 0.9037289787960029, "grad_norm": 0.3949624001979828, "learning_rate": 4.5623216466382235e-06, "loss": 1.0457, "step": 1854 }, { "epoch": 0.9042164270046308, "grad_norm": 0.3338606059551239, "learning_rate": 4.516472189225574e-06, "loss": 0.9934, "step": 1855 }, { "epoch": 0.9047038752132586, "grad_norm": 0.41134586930274963, "learning_rate": 4.4708489575993496e-06, "loss": 0.9311, "step": 1856 }, { "epoch": 0.9051913234218865, "grad_norm": 0.31631216406822205, "learning_rate": 4.425452059853086e-06, "loss": 1.0665, "step": 1857 }, { "epoch": 0.9056787716305142, "grad_norm": 0.3777748942375183, "learning_rate": 4.3802816035440786e-06, "loss": 1.1026, "step": 1858 }, { "epoch": 0.9061662198391421, "grad_norm": 0.37006500363349915, "learning_rate": 4.335337695693076e-06, "loss": 1.0635, "step": 1859 }, { "epoch": 0.9066536680477699, "grad_norm": 0.34530532360076904, "learning_rate": 4.290620442784144e-06, "loss": 1.0578, "step": 1860 }, { "epoch": 0.9071411162563977, "grad_norm": 0.34728294610977173, "learning_rate": 4.246129950764299e-06, "loss": 0.9875, "step": 1861 }, { "epoch": 0.9076285644650256, "grad_norm": 0.37285178899765015, "learning_rate": 4.201866325043291e-06, "loss": 1.0715, "step": 1862 }, { "epoch": 0.9081160126736534, "grad_norm": 0.47885051369667053, "learning_rate": 4.157829670493418e-06, "loss": 1.0613, "step": 1863 }, { "epoch": 0.9086034608822813, "grad_norm": 0.3461344838142395, "learning_rate": 4.114020091449166e-06, "loss": 1.165, "step": 1864 }, { "epoch": 0.9090909090909091, "grad_norm": 0.38396957516670227, "learning_rate": 4.07043769170703e-06, "loss": 1.1602, "step": 1865 }, { "epoch": 0.909578357299537, "grad_norm": 0.3966604769229889, "learning_rate": 4.027082574525276e-06, "loss": 1.0622, "step": 1866 }, { "epoch": 0.9100658055081647, "grad_norm": 0.3196715712547302, "learning_rate": 3.983954842623683e-06, "loss": 1.0808, "step": 1867 }, { "epoch": 0.9105532537167926, "grad_norm": 0.3249403238296509, "learning_rate": 3.941054598183247e-06, "loss": 1.0614, "step": 1868 }, { "epoch": 0.9110407019254204, "grad_norm": 0.3917873501777649, "learning_rate": 3.898381942846041e-06, "loss": 1.0006, "step": 1869 }, { "epoch": 0.9115281501340483, "grad_norm": 0.46330371499061584, "learning_rate": 3.855936977714902e-06, "loss": 1.0365, "step": 1870 }, { "epoch": 0.9120155983426761, "grad_norm": 0.3643684983253479, "learning_rate": 3.8137198033531996e-06, "loss": 1.0078, "step": 1871 }, { "epoch": 0.9125030465513039, "grad_norm": 0.42036598920822144, "learning_rate": 3.7717305197845885e-06, "loss": 1.0816, "step": 1872 }, { "epoch": 0.9129904947599318, "grad_norm": 0.31484517455101013, "learning_rate": 3.7299692264928354e-06, "loss": 0.9581, "step": 1873 }, { "epoch": 0.9134779429685596, "grad_norm": 0.4296029806137085, "learning_rate": 3.688436022421504e-06, "loss": 1.0468, "step": 1874 }, { "epoch": 0.9139653911771874, "grad_norm": 0.32022643089294434, "learning_rate": 3.6471310059737583e-06, "loss": 0.9181, "step": 1875 }, { "epoch": 0.9144528393858152, "grad_norm": 0.4117008447647095, "learning_rate": 3.6060542750121493e-06, "loss": 1.1994, "step": 1876 }, { "epoch": 0.9149402875944431, "grad_norm": 0.4310991168022156, "learning_rate": 3.565205926858317e-06, "loss": 1.0439, "step": 1877 }, { "epoch": 0.9154277358030709, "grad_norm": 0.4342654347419739, "learning_rate": 3.5245860582928334e-06, "loss": 1.011, "step": 1878 }, { "epoch": 0.9159151840116988, "grad_norm": 0.37502872943878174, "learning_rate": 3.484194765554949e-06, "loss": 1.1992, "step": 1879 }, { "epoch": 0.9164026322203266, "grad_norm": 0.4077601730823517, "learning_rate": 3.444032144342324e-06, "loss": 1.2782, "step": 1880 }, { "epoch": 0.9168900804289544, "grad_norm": 0.4238269329071045, "learning_rate": 3.4040982898108644e-06, "loss": 1.1753, "step": 1881 }, { "epoch": 0.9173775286375823, "grad_norm": 0.382989764213562, "learning_rate": 3.364393296574453e-06, "loss": 1.0529, "step": 1882 }, { "epoch": 0.91786497684621, "grad_norm": 0.38156425952911377, "learning_rate": 3.3249172587047406e-06, "loss": 1.0902, "step": 1883 }, { "epoch": 0.9183524250548379, "grad_norm": 0.39040836691856384, "learning_rate": 3.2856702697309337e-06, "loss": 1.0067, "step": 1884 }, { "epoch": 0.9188398732634657, "grad_norm": 0.4316060543060303, "learning_rate": 3.2466524226395177e-06, "loss": 1.2456, "step": 1885 }, { "epoch": 0.9193273214720936, "grad_norm": 0.4829002618789673, "learning_rate": 3.2078638098741674e-06, "loss": 0.9577, "step": 1886 }, { "epoch": 0.9198147696807214, "grad_norm": 0.4624776840209961, "learning_rate": 3.1693045233353593e-06, "loss": 1.1169, "step": 1887 }, { "epoch": 0.9203022178893493, "grad_norm": 0.37384673953056335, "learning_rate": 3.1309746543802474e-06, "loss": 1.051, "step": 1888 }, { "epoch": 0.9207896660979771, "grad_norm": 0.3338543176651001, "learning_rate": 3.0928742938224896e-06, "loss": 1.1515, "step": 1889 }, { "epoch": 0.921277114306605, "grad_norm": 0.4934646189212799, "learning_rate": 3.0550035319319215e-06, "loss": 0.9284, "step": 1890 }, { "epoch": 0.9217645625152328, "grad_norm": 0.39777880907058716, "learning_rate": 3.017362458434403e-06, "loss": 1.0181, "step": 1891 }, { "epoch": 0.9222520107238605, "grad_norm": 0.4428083300590515, "learning_rate": 2.9799511625116294e-06, "loss": 1.1124, "step": 1892 }, { "epoch": 0.9227394589324884, "grad_norm": 0.4063623249530792, "learning_rate": 2.9427697328008763e-06, "loss": 1.0634, "step": 1893 }, { "epoch": 0.9232269071411162, "grad_norm": 0.36762094497680664, "learning_rate": 2.905818257394799e-06, "loss": 1.0807, "step": 1894 }, { "epoch": 0.9237143553497441, "grad_norm": 0.4164426624774933, "learning_rate": 2.869096823841244e-06, "loss": 1.1103, "step": 1895 }, { "epoch": 0.9242018035583719, "grad_norm": 0.43294432759284973, "learning_rate": 2.832605519143017e-06, "loss": 1.0317, "step": 1896 }, { "epoch": 0.9246892517669998, "grad_norm": 0.3280065059661865, "learning_rate": 2.7963444297576912e-06, "loss": 1.1099, "step": 1897 }, { "epoch": 0.9251766999756276, "grad_norm": 0.5042917728424072, "learning_rate": 2.760313641597401e-06, "loss": 1.173, "step": 1898 }, { "epoch": 0.9256641481842555, "grad_norm": 0.5354404449462891, "learning_rate": 2.7245132400286366e-06, "loss": 1.098, "step": 1899 }, { "epoch": 0.9261515963928832, "grad_norm": 0.4489479959011078, "learning_rate": 2.6889433098720273e-06, "loss": 1.0235, "step": 1900 }, { "epoch": 0.9266390446015111, "grad_norm": 0.3835557997226715, "learning_rate": 2.6536039354021715e-06, "loss": 1.05, "step": 1901 }, { "epoch": 0.9271264928101389, "grad_norm": 0.37679949402809143, "learning_rate": 2.618495200347426e-06, "loss": 0.8709, "step": 1902 }, { "epoch": 0.9276139410187667, "grad_norm": 0.38232672214508057, "learning_rate": 2.5836171878896755e-06, "loss": 1.2003, "step": 1903 }, { "epoch": 0.9281013892273946, "grad_norm": 0.31981360912323, "learning_rate": 2.548969980664173e-06, "loss": 0.9475, "step": 1904 }, { "epoch": 0.9285888374360224, "grad_norm": 0.40195339918136597, "learning_rate": 2.514553660759356e-06, "loss": 1.0274, "step": 1905 }, { "epoch": 0.9290762856446503, "grad_norm": 0.332801878452301, "learning_rate": 2.4803683097165964e-06, "loss": 0.9474, "step": 1906 }, { "epoch": 0.9295637338532781, "grad_norm": 0.3179659843444824, "learning_rate": 2.446414008530051e-06, "loss": 1.0433, "step": 1907 }, { "epoch": 0.930051182061906, "grad_norm": 0.39270058274269104, "learning_rate": 2.41269083764647e-06, "loss": 1.0861, "step": 1908 }, { "epoch": 0.9305386302705337, "grad_norm": 0.35846665501594543, "learning_rate": 2.379198876964961e-06, "loss": 1.0013, "step": 1909 }, { "epoch": 0.9310260784791616, "grad_norm": 0.3707181513309479, "learning_rate": 2.345938205836884e-06, "loss": 0.9876, "step": 1910 }, { "epoch": 0.9315135266877894, "grad_norm": 0.41161656379699707, "learning_rate": 2.3129089030655584e-06, "loss": 1.1426, "step": 1911 }, { "epoch": 0.9320009748964172, "grad_norm": 0.42365649342536926, "learning_rate": 2.280111046906175e-06, "loss": 1.1656, "step": 1912 }, { "epoch": 0.9324884231050451, "grad_norm": 0.3958723545074463, "learning_rate": 2.2475447150655415e-06, "loss": 1.1817, "step": 1913 }, { "epoch": 0.9329758713136729, "grad_norm": 0.4035016894340515, "learning_rate": 2.215209984701927e-06, "loss": 1.066, "step": 1914 }, { "epoch": 0.9334633195223008, "grad_norm": 0.37086769938468933, "learning_rate": 2.1831069324248942e-06, "loss": 1.0681, "step": 1915 }, { "epoch": 0.9339507677309286, "grad_norm": 0.4109595715999603, "learning_rate": 2.151235634295079e-06, "loss": 1.1106, "step": 1916 }, { "epoch": 0.9344382159395564, "grad_norm": 0.37377893924713135, "learning_rate": 2.119596165824023e-06, "loss": 0.965, "step": 1917 }, { "epoch": 0.9349256641481842, "grad_norm": 0.34386566281318665, "learning_rate": 2.088188601974017e-06, "loss": 1.0639, "step": 1918 }, { "epoch": 0.9354131123568121, "grad_norm": 0.3515344262123108, "learning_rate": 2.057013017157905e-06, "loss": 1.0336, "step": 1919 }, { "epoch": 0.9359005605654399, "grad_norm": 0.3488283157348633, "learning_rate": 2.0260694852389015e-06, "loss": 1.0284, "step": 1920 }, { "epoch": 0.9363880087740678, "grad_norm": 0.32755133509635925, "learning_rate": 1.995358079530463e-06, "loss": 1.0561, "step": 1921 }, { "epoch": 0.9368754569826956, "grad_norm": 0.3984243869781494, "learning_rate": 1.9648788727960276e-06, "loss": 1.0979, "step": 1922 }, { "epoch": 0.9373629051913234, "grad_norm": 0.3786798119544983, "learning_rate": 1.9346319372489318e-06, "loss": 1.0841, "step": 1923 }, { "epoch": 0.9378503533999513, "grad_norm": 0.3391832113265991, "learning_rate": 1.9046173445521509e-06, "loss": 1.0141, "step": 1924 }, { "epoch": 0.938337801608579, "grad_norm": 0.4297967255115509, "learning_rate": 1.874835165818256e-06, "loss": 1.1618, "step": 1925 }, { "epoch": 0.9388252498172069, "grad_norm": 0.4208715260028839, "learning_rate": 1.8452854716090928e-06, "loss": 1.1664, "step": 1926 }, { "epoch": 0.9393126980258347, "grad_norm": 0.39006009697914124, "learning_rate": 1.8159683319357135e-06, "loss": 1.1445, "step": 1927 }, { "epoch": 0.9398001462344626, "grad_norm": 0.33365562558174133, "learning_rate": 1.7868838162582114e-06, "loss": 1.1154, "step": 1928 }, { "epoch": 0.9402875944430904, "grad_norm": 0.4178657531738281, "learning_rate": 1.7580319934855094e-06, "loss": 1.1662, "step": 1929 }, { "epoch": 0.9407750426517183, "grad_norm": 0.47014760971069336, "learning_rate": 1.729412931975205e-06, "loss": 1.0061, "step": 1930 }, { "epoch": 0.9412624908603461, "grad_norm": 0.3823905885219574, "learning_rate": 1.7010266995334811e-06, "loss": 1.0879, "step": 1931 }, { "epoch": 0.941749939068974, "grad_norm": 0.41770321130752563, "learning_rate": 1.6728733634148064e-06, "loss": 1.0673, "step": 1932 }, { "epoch": 0.9422373872776018, "grad_norm": 0.36765381693840027, "learning_rate": 1.644952990321913e-06, "loss": 1.0879, "step": 1933 }, { "epoch": 0.9427248354862295, "grad_norm": 0.4739663600921631, "learning_rate": 1.6172656464055747e-06, "loss": 1.1542, "step": 1934 }, { "epoch": 0.9432122836948574, "grad_norm": 0.4071044325828552, "learning_rate": 1.589811397264429e-06, "loss": 1.1813, "step": 1935 }, { "epoch": 0.9436997319034852, "grad_norm": 0.35680529475212097, "learning_rate": 1.5625903079448667e-06, "loss": 1.1284, "step": 1936 }, { "epoch": 0.9441871801121131, "grad_norm": 0.41342172026634216, "learning_rate": 1.5356024429408867e-06, "loss": 1.1501, "step": 1937 }, { "epoch": 0.9446746283207409, "grad_norm": 0.3705962598323822, "learning_rate": 1.5088478661938855e-06, "loss": 1.0652, "step": 1938 }, { "epoch": 0.9451620765293688, "grad_norm": 0.37899836897850037, "learning_rate": 1.4823266410925463e-06, "loss": 1.018, "step": 1939 }, { "epoch": 0.9456495247379966, "grad_norm": 0.38751640915870667, "learning_rate": 1.4560388304726613e-06, "loss": 1.1265, "step": 1940 }, { "epoch": 0.9461369729466245, "grad_norm": 0.32274675369262695, "learning_rate": 1.429984496617054e-06, "loss": 1.0278, "step": 1941 }, { "epoch": 0.9466244211552522, "grad_norm": 0.37253156304359436, "learning_rate": 1.4041637012553345e-06, "loss": 1.062, "step": 1942 }, { "epoch": 0.94711186936388, "grad_norm": 0.38027337193489075, "learning_rate": 1.3785765055638e-06, "loss": 1.1756, "step": 1943 }, { "epoch": 0.9475993175725079, "grad_norm": 0.38690176606178284, "learning_rate": 1.3532229701653242e-06, "loss": 1.0456, "step": 1944 }, { "epoch": 0.9480867657811357, "grad_norm": 0.3302153944969177, "learning_rate": 1.3281031551291233e-06, "loss": 0.9826, "step": 1945 }, { "epoch": 0.9485742139897636, "grad_norm": 0.34769314527511597, "learning_rate": 1.303217119970712e-06, "loss": 1.1577, "step": 1946 }, { "epoch": 0.9490616621983914, "grad_norm": 0.33927199244499207, "learning_rate": 1.2785649236517038e-06, "loss": 0.9491, "step": 1947 }, { "epoch": 0.9495491104070193, "grad_norm": 0.37198346853256226, "learning_rate": 1.2541466245796885e-06, "loss": 1.0319, "step": 1948 }, { "epoch": 0.9500365586156471, "grad_norm": 0.39865556359291077, "learning_rate": 1.2299622806080767e-06, "loss": 0.9899, "step": 1949 }, { "epoch": 0.950524006824275, "grad_norm": 0.4067472517490387, "learning_rate": 1.2060119490360123e-06, "loss": 1.1005, "step": 1950 }, { "epoch": 0.9510114550329027, "grad_norm": 0.388704389333725, "learning_rate": 1.1822956866081702e-06, "loss": 0.9327, "step": 1951 }, { "epoch": 0.9514989032415306, "grad_norm": 0.4057963788509369, "learning_rate": 1.1588135495146478e-06, "loss": 1.1373, "step": 1952 }, { "epoch": 0.9519863514501584, "grad_norm": 0.42301106452941895, "learning_rate": 1.1355655933908638e-06, "loss": 1.0342, "step": 1953 }, { "epoch": 0.9524737996587862, "grad_norm": 0.39528149366378784, "learning_rate": 1.11255187331738e-06, "loss": 1.1442, "step": 1954 }, { "epoch": 0.9529612478674141, "grad_norm": 0.39889761805534363, "learning_rate": 1.0897724438198142e-06, "loss": 1.0789, "step": 1955 }, { "epoch": 0.9534486960760419, "grad_norm": 0.42788776755332947, "learning_rate": 1.067227358868661e-06, "loss": 1.0739, "step": 1956 }, { "epoch": 0.9539361442846698, "grad_norm": 0.42129066586494446, "learning_rate": 1.0449166718792147e-06, "loss": 1.0375, "step": 1957 }, { "epoch": 0.9544235924932976, "grad_norm": 0.4017135798931122, "learning_rate": 1.0228404357113919e-06, "loss": 1.1505, "step": 1958 }, { "epoch": 0.9549110407019255, "grad_norm": 0.38789933919906616, "learning_rate": 1.0009987026696532e-06, "loss": 1.0732, "step": 1959 }, { "epoch": 0.9553984889105532, "grad_norm": 0.4593614935874939, "learning_rate": 9.793915245028596e-07, "loss": 1.242, "step": 1960 }, { "epoch": 0.9558859371191811, "grad_norm": 0.3683159649372101, "learning_rate": 9.580189524041272e-07, "loss": 1.1475, "step": 1961 }, { "epoch": 0.9563733853278089, "grad_norm": 0.3410833179950714, "learning_rate": 9.368810370107617e-07, "loss": 1.0107, "step": 1962 }, { "epoch": 0.9568608335364368, "grad_norm": 0.37654101848602295, "learning_rate": 9.159778284040799e-07, "loss": 1.0231, "step": 1963 }, { "epoch": 0.9573482817450646, "grad_norm": 0.41946423053741455, "learning_rate": 8.953093761093323e-07, "loss": 1.0215, "step": 1964 }, { "epoch": 0.9578357299536924, "grad_norm": 0.4294183552265167, "learning_rate": 8.748757290955478e-07, "loss": 1.0651, "step": 1965 }, { "epoch": 0.9583231781623203, "grad_norm": 0.4403562843799591, "learning_rate": 8.546769357754447e-07, "loss": 1.0721, "step": 1966 }, { "epoch": 0.958810626370948, "grad_norm": 0.504021167755127, "learning_rate": 8.347130440053419e-07, "loss": 1.0633, "step": 1967 }, { "epoch": 0.9592980745795759, "grad_norm": 0.37429291009902954, "learning_rate": 8.149841010849923e-07, "loss": 0.993, "step": 1968 }, { "epoch": 0.9597855227882037, "grad_norm": 0.40227779746055603, "learning_rate": 7.95490153757461e-07, "loss": 1.0361, "step": 1969 }, { "epoch": 0.9602729709968316, "grad_norm": 0.38147246837615967, "learning_rate": 7.762312482091027e-07, "loss": 1.04, "step": 1970 }, { "epoch": 0.9607604192054594, "grad_norm": 0.3755912184715271, "learning_rate": 7.572074300693621e-07, "loss": 1.0474, "step": 1971 }, { "epoch": 0.9612478674140873, "grad_norm": 0.38925471901893616, "learning_rate": 7.384187444107071e-07, "loss": 1.1567, "step": 1972 }, { "epoch": 0.9617353156227151, "grad_norm": 0.39746856689453125, "learning_rate": 7.19865235748507e-07, "loss": 1.1433, "step": 1973 }, { "epoch": 0.9622227638313429, "grad_norm": 0.34805506467819214, "learning_rate": 7.015469480409542e-07, "loss": 0.9856, "step": 1974 }, { "epoch": 0.9627102120399708, "grad_norm": 0.4245430529117584, "learning_rate": 6.834639246889096e-07, "loss": 1.0389, "step": 1975 }, { "epoch": 0.9631976602485985, "grad_norm": 0.3707633316516876, "learning_rate": 6.656162085358686e-07, "loss": 1.2374, "step": 1976 }, { "epoch": 0.9636851084572264, "grad_norm": 0.433009535074234, "learning_rate": 6.480038418677947e-07, "loss": 1.0547, "step": 1977 }, { "epoch": 0.9641725566658542, "grad_norm": 0.3832406997680664, "learning_rate": 6.306268664130533e-07, "loss": 1.0589, "step": 1978 }, { "epoch": 0.9646600048744821, "grad_norm": 0.3488512635231018, "learning_rate": 6.134853233422999e-07, "loss": 1.0335, "step": 1979 }, { "epoch": 0.9651474530831099, "grad_norm": 0.40218132734298706, "learning_rate": 5.965792532683923e-07, "loss": 1.0572, "step": 1980 }, { "epoch": 0.9656349012917378, "grad_norm": 0.43588852882385254, "learning_rate": 5.799086962463007e-07, "loss": 1.1437, "step": 1981 }, { "epoch": 0.9661223495003656, "grad_norm": 0.3971550166606903, "learning_rate": 5.634736917729865e-07, "loss": 1.0291, "step": 1982 }, { "epoch": 0.9666097977089935, "grad_norm": 0.4068727195262909, "learning_rate": 5.472742787873352e-07, "loss": 1.1587, "step": 1983 }, { "epoch": 0.9670972459176213, "grad_norm": 0.42422133684158325, "learning_rate": 5.313104956700565e-07, "loss": 1.0437, "step": 1984 }, { "epoch": 0.967584694126249, "grad_norm": 0.4129694104194641, "learning_rate": 5.155823802435622e-07, "loss": 1.1479, "step": 1985 }, { "epoch": 0.9680721423348769, "grad_norm": 0.3818117082118988, "learning_rate": 5.000899697719552e-07, "loss": 1.1197, "step": 1986 }, { "epoch": 0.9685595905435047, "grad_norm": 0.4490480124950409, "learning_rate": 4.848333009608408e-07, "loss": 1.0951, "step": 1987 }, { "epoch": 0.9690470387521326, "grad_norm": 0.4608059823513031, "learning_rate": 4.6981240995731537e-07, "loss": 1.1753, "step": 1988 }, { "epoch": 0.9695344869607604, "grad_norm": 0.47589248418807983, "learning_rate": 4.550273323498555e-07, "loss": 1.0552, "step": 1989 }, { "epoch": 0.9700219351693883, "grad_norm": 0.3511136770248413, "learning_rate": 4.4047810316822925e-07, "loss": 1.0957, "step": 1990 }, { "epoch": 0.9705093833780161, "grad_norm": 0.4456351101398468, "learning_rate": 4.261647568834182e-07, "loss": 1.1183, "step": 1991 }, { "epoch": 0.970996831586644, "grad_norm": 0.39235809445381165, "learning_rate": 4.1208732740752875e-07, "loss": 1.0132, "step": 1992 }, { "epoch": 0.9714842797952717, "grad_norm": 0.4110061526298523, "learning_rate": 3.9824584809372566e-07, "loss": 1.1303, "step": 1993 }, { "epoch": 0.9719717280038996, "grad_norm": 0.4614067077636719, "learning_rate": 3.84640351736143e-07, "loss": 1.1847, "step": 1994 }, { "epoch": 0.9724591762125274, "grad_norm": 0.3240770697593689, "learning_rate": 3.7127087056980647e-07, "loss": 1.0747, "step": 1995 }, { "epoch": 0.9729466244211552, "grad_norm": 0.3777024447917938, "learning_rate": 3.5813743627055584e-07, "loss": 1.209, "step": 1996 }, { "epoch": 0.9734340726297831, "grad_norm": 0.3978724777698517, "learning_rate": 3.452400799549893e-07, "loss": 1.1577, "step": 1997 }, { "epoch": 0.9739215208384109, "grad_norm": 0.364408403635025, "learning_rate": 3.3257883218035245e-07, "loss": 1.0135, "step": 1998 }, { "epoch": 0.9744089690470388, "grad_norm": 0.44452327489852905, "learning_rate": 3.2015372294450507e-07, "loss": 1.2438, "step": 1999 }, { "epoch": 0.9748964172556666, "grad_norm": 0.4765107333660126, "learning_rate": 3.0796478168582113e-07, "loss": 1.0167, "step": 2000 }, { "epoch": 0.9753838654642945, "grad_norm": 0.4003845453262329, "learning_rate": 2.960120372831221e-07, "loss": 1.0883, "step": 2001 }, { "epoch": 0.9758713136729222, "grad_norm": 0.49801144003868103, "learning_rate": 2.8429551805564394e-07, "loss": 1.0007, "step": 2002 }, { "epoch": 0.9763587618815501, "grad_norm": 0.3565743863582611, "learning_rate": 2.7281525176292565e-07, "loss": 1.1447, "step": 2003 }, { "epoch": 0.9768462100901779, "grad_norm": 0.37986013293266296, "learning_rate": 2.615712656047542e-07, "loss": 1.0881, "step": 2004 }, { "epoch": 0.9773336582988057, "grad_norm": 0.32770147919654846, "learning_rate": 2.5056358622110866e-07, "loss": 0.9509, "step": 2005 }, { "epoch": 0.9778211065074336, "grad_norm": 0.41189002990722656, "learning_rate": 2.3979223969211594e-07, "loss": 1.1882, "step": 2006 }, { "epoch": 0.9783085547160614, "grad_norm": 0.3875510096549988, "learning_rate": 2.2925725153793986e-07, "loss": 1.0768, "step": 2007 }, { "epoch": 0.9787960029246893, "grad_norm": 0.41866934299468994, "learning_rate": 2.1895864671874767e-07, "loss": 1.0993, "step": 2008 }, { "epoch": 0.979283451133317, "grad_norm": 0.3891562223434448, "learning_rate": 2.088964496346879e-07, "loss": 1.0648, "step": 2009 }, { "epoch": 0.979770899341945, "grad_norm": 0.4654984176158905, "learning_rate": 1.9907068412575725e-07, "loss": 1.15, "step": 2010 }, { "epoch": 0.9802583475505727, "grad_norm": 0.35890793800354004, "learning_rate": 1.894813734718004e-07, "loss": 1.0544, "step": 2011 }, { "epoch": 0.9807457957592006, "grad_norm": 0.36428067088127136, "learning_rate": 1.8012854039244353e-07, "loss": 1.0215, "step": 2012 }, { "epoch": 0.9812332439678284, "grad_norm": 0.36865493655204773, "learning_rate": 1.710122070470277e-07, "loss": 1.1238, "step": 2013 }, { "epoch": 0.9817206921764563, "grad_norm": 0.38733819127082825, "learning_rate": 1.6213239503454215e-07, "loss": 1.0061, "step": 2014 }, { "epoch": 0.9822081403850841, "grad_norm": 0.3741471469402313, "learning_rate": 1.5348912539364658e-07, "loss": 1.1446, "step": 2015 }, { "epoch": 0.9826955885937119, "grad_norm": 0.376385360956192, "learning_rate": 1.450824186025157e-07, "loss": 1.0023, "step": 2016 }, { "epoch": 0.9831830368023398, "grad_norm": 0.3962882459163666, "learning_rate": 1.3691229457887257e-07, "loss": 0.9648, "step": 2017 }, { "epoch": 0.9836704850109675, "grad_norm": 0.3473024070262909, "learning_rate": 1.2897877267989966e-07, "loss": 1.1262, "step": 2018 }, { "epoch": 0.9841579332195954, "grad_norm": 0.3921591341495514, "learning_rate": 1.2128187170222792e-07, "loss": 1.043, "step": 2019 }, { "epoch": 0.9846453814282232, "grad_norm": 0.39327341318130493, "learning_rate": 1.1382160988184786e-07, "loss": 1.0769, "step": 2020 }, { "epoch": 0.9851328296368511, "grad_norm": 0.3739471435546875, "learning_rate": 1.0659800489408734e-07, "loss": 0.9578, "step": 2021 }, { "epoch": 0.9856202778454789, "grad_norm": 0.4034399092197418, "learning_rate": 9.961107385360046e-08, "loss": 1.1441, "step": 2022 }, { "epoch": 0.9861077260541068, "grad_norm": 0.38543474674224854, "learning_rate": 9.286083331426776e-08, "loss": 1.084, "step": 2023 }, { "epoch": 0.9865951742627346, "grad_norm": 0.3662292957305908, "learning_rate": 8.634729926920715e-08, "loss": 1.0999, "step": 2024 }, { "epoch": 0.9870826224713625, "grad_norm": 0.3803950548171997, "learning_rate": 8.007048715068521e-08, "loss": 1.0877, "step": 2025 }, { "epoch": 0.9875700706799903, "grad_norm": 0.34015125036239624, "learning_rate": 7.403041183016158e-08, "loss": 1.0899, "step": 2026 }, { "epoch": 0.988057518888618, "grad_norm": 0.37489980459213257, "learning_rate": 6.822708761815566e-08, "loss": 1.0894, "step": 2027 }, { "epoch": 0.9885449670972459, "grad_norm": 0.3569638133049011, "learning_rate": 6.266052826429114e-08, "loss": 1.1452, "step": 2028 }, { "epoch": 0.9890324153058737, "grad_norm": 0.35684725642204285, "learning_rate": 5.733074695721819e-08, "loss": 1.0874, "step": 2029 }, { "epoch": 0.9895198635145016, "grad_norm": 0.3525896966457367, "learning_rate": 5.223775632460237e-08, "loss": 1.052, "step": 2030 }, { "epoch": 0.9900073117231294, "grad_norm": 0.37919196486473083, "learning_rate": 4.738156843309138e-08, "loss": 1.0934, "step": 2031 }, { "epoch": 0.9904947599317573, "grad_norm": 0.4706897735595703, "learning_rate": 4.276219478827059e-08, "loss": 1.1683, "step": 2032 }, { "epoch": 0.9909822081403851, "grad_norm": 0.39712199568748474, "learning_rate": 3.837964633467417e-08, "loss": 1.1454, "step": 2033 }, { "epoch": 0.991469656349013, "grad_norm": 0.4138774871826172, "learning_rate": 3.423393345571846e-08, "loss": 1.1165, "step": 2034 }, { "epoch": 0.9919571045576407, "grad_norm": 0.38895630836486816, "learning_rate": 3.032506597369089e-08, "loss": 0.9747, "step": 2035 }, { "epoch": 0.9924445527662685, "grad_norm": 0.4397059977054596, "learning_rate": 2.6653053149738872e-08, "loss": 1.0906, "step": 2036 }, { "epoch": 0.9929320009748964, "grad_norm": 0.3787862956523895, "learning_rate": 2.321790368382537e-08, "loss": 1.0971, "step": 2037 }, { "epoch": 0.9934194491835242, "grad_norm": 0.40022116899490356, "learning_rate": 2.0019625714740032e-08, "loss": 1.2189, "step": 2038 }, { "epoch": 0.9939068973921521, "grad_norm": 0.3774934411048889, "learning_rate": 1.7058226820054758e-08, "loss": 0.9943, "step": 2039 }, { "epoch": 0.9943943456007799, "grad_norm": 0.36211833357810974, "learning_rate": 1.4333714016090404e-08, "loss": 1.128, "step": 2040 }, { "epoch": 0.9948817938094078, "grad_norm": 0.35082337260246277, "learning_rate": 1.1846093757961196e-08, "loss": 1.0167, "step": 2041 }, { "epoch": 0.9953692420180356, "grad_norm": 0.3229121267795563, "learning_rate": 9.595371939485897e-09, "loss": 0.9801, "step": 2042 }, { "epoch": 0.9958566902266635, "grad_norm": 0.4398113787174225, "learning_rate": 7.581553893221127e-09, "loss": 1.0982, "step": 2043 }, { "epoch": 0.9963441384352912, "grad_norm": 0.3718317151069641, "learning_rate": 5.804644390439151e-09, "loss": 1.1165, "step": 2044 }, { "epoch": 0.9968315866439191, "grad_norm": 0.4172276258468628, "learning_rate": 4.264647641105679e-09, "loss": 1.2252, "step": 2045 }, { "epoch": 0.9973190348525469, "grad_norm": 0.317125141620636, "learning_rate": 2.9615672938909656e-09, "loss": 1.1027, "step": 2046 }, { "epoch": 0.9978064830611747, "grad_norm": 0.41433629393577576, "learning_rate": 1.895406436136504e-09, "loss": 0.9324, "step": 2047 }, { "epoch": 0.9982939312698026, "grad_norm": 0.35381460189819336, "learning_rate": 1.0661675938439253e-09, "loss": 1.1131, "step": 2048 }, { "epoch": 0.9987813794784304, "grad_norm": 0.422343373298645, "learning_rate": 4.738527317194041e-10, "loss": 1.0122, "step": 2049 }, { "epoch": 0.9992688276870583, "grad_norm": 0.36507782340049744, "learning_rate": 1.1846325309594619e-10, "loss": 1.1554, "step": 2050 }, { "epoch": 0.999756275895686, "grad_norm": 0.4285239577293396, "learning_rate": 0.0, "loss": 1.3001, "step": 2051 }, { "epoch": 0.999756275895686, "eval_loss": 1.0897523164749146, "eval_runtime": 134.4524, "eval_samples_per_second": 25.704, "eval_steps_per_second": 3.213, "step": 2051 } ], "logging_steps": 1, "max_steps": 2051, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.636008272566682e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }