{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9106, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.5627039800772393, "learning_rate": 1.4598540145985402e-07, "loss": 1.7115, "step": 1 }, { "epoch": 0.0, "grad_norm": 4.13746608939205, "learning_rate": 2.9197080291970804e-07, "loss": 1.9855, "step": 2 }, { "epoch": 0.0, "grad_norm": 4.461789986681792, "learning_rate": 4.379562043795621e-07, "loss": 1.9055, "step": 3 }, { "epoch": 0.0, "grad_norm": 4.249688529774583, "learning_rate": 5.839416058394161e-07, "loss": 2.1319, "step": 4 }, { "epoch": 0.0, "grad_norm": 4.894729598061038, "learning_rate": 7.299270072992701e-07, "loss": 2.2653, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.128280029158089, "learning_rate": 8.759124087591242e-07, "loss": 1.7717, "step": 6 }, { "epoch": 0.0, "grad_norm": 4.857974325866826, "learning_rate": 1.0218978102189781e-06, "loss": 2.2352, "step": 7 }, { "epoch": 0.0, "grad_norm": 4.279510590210478, "learning_rate": 1.1678832116788322e-06, "loss": 2.1748, "step": 8 }, { "epoch": 0.0, "grad_norm": 4.143207248683319, "learning_rate": 1.3138686131386864e-06, "loss": 2.0977, "step": 9 }, { "epoch": 0.0, "grad_norm": 3.836348439753132, "learning_rate": 1.4598540145985402e-06, "loss": 2.1316, "step": 10 }, { "epoch": 0.0, "grad_norm": 3.169681786772062, "learning_rate": 1.6058394160583942e-06, "loss": 1.8064, "step": 11 }, { "epoch": 0.0, "grad_norm": 2.988943388141476, "learning_rate": 1.7518248175182485e-06, "loss": 1.8728, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.3929806450185804, "learning_rate": 1.8978102189781023e-06, "loss": 2.2436, "step": 13 }, { "epoch": 0.0, "grad_norm": 2.8432284704849127, "learning_rate": 2.0437956204379563e-06, "loss": 1.9455, "step": 14 }, { "epoch": 0.0, "grad_norm": 2.803494425471958, "learning_rate": 2.1897810218978103e-06, "loss": 1.8886, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.1817557242072168, "learning_rate": 2.3357664233576643e-06, "loss": 2.1747, "step": 16 }, { "epoch": 0.0, "grad_norm": 3.1726437092871147, "learning_rate": 2.4817518248175183e-06, "loss": 2.1334, "step": 17 }, { "epoch": 0.0, "grad_norm": 2.924586053232777, "learning_rate": 2.627737226277373e-06, "loss": 1.8445, "step": 18 }, { "epoch": 0.0, "grad_norm": 2.763798514655918, "learning_rate": 2.7737226277372264e-06, "loss": 1.9356, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.6064992808317755, "learning_rate": 2.9197080291970804e-06, "loss": 1.7732, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.1626567213707286, "learning_rate": 3.065693430656935e-06, "loss": 1.9922, "step": 21 }, { "epoch": 0.0, "grad_norm": 2.8071379516264727, "learning_rate": 3.2116788321167884e-06, "loss": 1.8957, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.2009291290769637, "learning_rate": 3.3576642335766425e-06, "loss": 1.8077, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.493585750362153, "learning_rate": 3.503649635036497e-06, "loss": 1.5587, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.0208173010540813, "learning_rate": 3.6496350364963505e-06, "loss": 1.7271, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.9588523812671719, "learning_rate": 3.7956204379562045e-06, "loss": 1.5694, "step": 26 }, { "epoch": 0.0, "grad_norm": 1.4878919197331266, "learning_rate": 3.9416058394160585e-06, "loss": 1.4266, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.556986426949064, "learning_rate": 4.0875912408759126e-06, "loss": 1.5129, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.3363605297969032, "learning_rate": 4.233576642335767e-06, "loss": 1.4622, "step": 29 }, { "epoch": 0.0, "grad_norm": 1.3536518683402357, "learning_rate": 4.379562043795621e-06, "loss": 1.3519, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.6851289834481316, "learning_rate": 4.525547445255475e-06, "loss": 1.6255, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.1408008805631367, "learning_rate": 4.671532846715329e-06, "loss": 1.2072, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.366020314767896, "learning_rate": 4.8175182481751835e-06, "loss": 1.3705, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.1839994232620947, "learning_rate": 4.963503649635037e-06, "loss": 1.2993, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.1036145731357472, "learning_rate": 5.1094890510948916e-06, "loss": 1.2042, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.1420864514532507, "learning_rate": 5.255474452554746e-06, "loss": 1.419, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.0969643363545438, "learning_rate": 5.401459854014599e-06, "loss": 1.1267, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.0775411932521246, "learning_rate": 5.547445255474453e-06, "loss": 1.2421, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.060841443934055, "learning_rate": 5.693430656934307e-06, "loss": 1.1793, "step": 39 }, { "epoch": 0.0, "grad_norm": 1.1547507799124865, "learning_rate": 5.839416058394161e-06, "loss": 1.3276, "step": 40 }, { "epoch": 0.0, "grad_norm": 0.9456819175034582, "learning_rate": 5.985401459854016e-06, "loss": 1.1627, "step": 41 }, { "epoch": 0.0, "grad_norm": 1.4670604159208278, "learning_rate": 6.13138686131387e-06, "loss": 1.4, "step": 42 }, { "epoch": 0.0, "grad_norm": 1.0901826721199976, "learning_rate": 6.277372262773723e-06, "loss": 1.28, "step": 43 }, { "epoch": 0.0, "grad_norm": 1.034560582779727, "learning_rate": 6.423357664233577e-06, "loss": 1.3776, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.9942941003831013, "learning_rate": 6.569343065693431e-06, "loss": 1.2454, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.1051572219195003, "learning_rate": 6.715328467153285e-06, "loss": 1.3865, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.0297208488285288, "learning_rate": 6.86131386861314e-06, "loss": 1.3139, "step": 47 }, { "epoch": 0.01, "grad_norm": 0.9557758739121303, "learning_rate": 7.007299270072994e-06, "loss": 1.3105, "step": 48 }, { "epoch": 0.01, "grad_norm": 0.9800237428455421, "learning_rate": 7.153284671532848e-06, "loss": 1.2289, "step": 49 }, { "epoch": 0.01, "grad_norm": 0.7469314346380025, "learning_rate": 7.299270072992701e-06, "loss": 0.9904, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.0700614801908555, "learning_rate": 7.445255474452555e-06, "loss": 1.1376, "step": 51 }, { "epoch": 0.01, "grad_norm": 0.9785466903049703, "learning_rate": 7.591240875912409e-06, "loss": 1.1916, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.9258408668090249, "learning_rate": 7.737226277372264e-06, "loss": 1.2119, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.0861081325482576, "learning_rate": 7.883211678832117e-06, "loss": 1.1729, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.9760518684519011, "learning_rate": 8.029197080291972e-06, "loss": 1.2895, "step": 55 }, { "epoch": 0.01, "grad_norm": 0.7278011797118368, "learning_rate": 8.175182481751825e-06, "loss": 0.9904, "step": 56 }, { "epoch": 0.01, "grad_norm": 0.9927374449574603, "learning_rate": 8.32116788321168e-06, "loss": 1.0993, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.8716651171663807, "learning_rate": 8.467153284671533e-06, "loss": 1.0723, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.054762321926421, "learning_rate": 8.613138686131386e-06, "loss": 1.2407, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.9782172869450889, "learning_rate": 8.759124087591241e-06, "loss": 1.1321, "step": 60 }, { "epoch": 0.01, "grad_norm": 0.9319051371049969, "learning_rate": 8.905109489051096e-06, "loss": 1.1106, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.6960247121156266, "learning_rate": 9.05109489051095e-06, "loss": 0.8907, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.9113706419404604, "learning_rate": 9.197080291970804e-06, "loss": 1.1704, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.799832260904814, "learning_rate": 9.343065693430657e-06, "loss": 1.1016, "step": 64 }, { "epoch": 0.01, "grad_norm": 0.9128577288869439, "learning_rate": 9.48905109489051e-06, "loss": 1.1215, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.0567268717147842, "learning_rate": 9.635036496350367e-06, "loss": 1.352, "step": 66 }, { "epoch": 0.01, "grad_norm": 0.9094517809777068, "learning_rate": 9.78102189781022e-06, "loss": 1.1847, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.7867342525951125, "learning_rate": 9.927007299270073e-06, "loss": 0.9282, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.7933746952472809, "learning_rate": 1.0072992700729928e-05, "loss": 1.0709, "step": 69 }, { "epoch": 0.01, "grad_norm": 0.7806742263067411, "learning_rate": 1.0218978102189783e-05, "loss": 0.9287, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.8109743025409091, "learning_rate": 1.0364963503649636e-05, "loss": 0.9879, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.0589410979160223, "learning_rate": 1.0510948905109491e-05, "loss": 1.1894, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.8271750633881442, "learning_rate": 1.0656934306569344e-05, "loss": 1.0289, "step": 73 }, { "epoch": 0.01, "grad_norm": 0.8404662907754663, "learning_rate": 1.0802919708029198e-05, "loss": 1.0454, "step": 74 }, { "epoch": 0.01, "grad_norm": 0.7471944070111499, "learning_rate": 1.0948905109489052e-05, "loss": 0.8529, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.8895217353047934, "learning_rate": 1.1094890510948906e-05, "loss": 0.9981, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.8869781114829942, "learning_rate": 1.124087591240876e-05, "loss": 0.9505, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.110690237178331, "learning_rate": 1.1386861313868614e-05, "loss": 1.1931, "step": 78 }, { "epoch": 0.01, "grad_norm": 0.9142916791478641, "learning_rate": 1.1532846715328467e-05, "loss": 0.9926, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.0123559425445243, "learning_rate": 1.1678832116788322e-05, "loss": 1.26, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.937345396645472, "learning_rate": 1.1824817518248176e-05, "loss": 1.0732, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.9726262222005855, "learning_rate": 1.1970802919708031e-05, "loss": 1.2389, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.8794528194175176, "learning_rate": 1.2116788321167885e-05, "loss": 1.0843, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.936638563476656, "learning_rate": 1.226277372262774e-05, "loss": 1.1586, "step": 84 }, { "epoch": 0.01, "grad_norm": 0.8416181405560003, "learning_rate": 1.2408759124087593e-05, "loss": 0.9543, "step": 85 }, { "epoch": 0.01, "grad_norm": 0.959753035329643, "learning_rate": 1.2554744525547446e-05, "loss": 1.0536, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.8612870879139615, "learning_rate": 1.27007299270073e-05, "loss": 1.004, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.792060369338959, "learning_rate": 1.2846715328467154e-05, "loss": 0.9973, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.8253692985267265, "learning_rate": 1.2992700729927009e-05, "loss": 0.93, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.9603177685132234, "learning_rate": 1.3138686131386862e-05, "loss": 1.0306, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.8960913168384407, "learning_rate": 1.3284671532846715e-05, "loss": 1.0169, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.9697662068817549, "learning_rate": 1.343065693430657e-05, "loss": 1.1232, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.908702360371072, "learning_rate": 1.3576642335766423e-05, "loss": 0.9398, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.9545076690973209, "learning_rate": 1.372262773722628e-05, "loss": 1.035, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.9967858397214344, "learning_rate": 1.3868613138686133e-05, "loss": 1.0483, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.9594865094220973, "learning_rate": 1.4014598540145988e-05, "loss": 1.1163, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.8901218197528311, "learning_rate": 1.416058394160584e-05, "loss": 1.0035, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.2195698841336509, "learning_rate": 1.4306569343065696e-05, "loss": 1.0776, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.8629587183722728, "learning_rate": 1.4452554744525549e-05, "loss": 0.899, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.8603954845219532, "learning_rate": 1.4598540145985402e-05, "loss": 0.937, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.9607025107271405, "learning_rate": 1.4744525547445257e-05, "loss": 1.012, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.8232046460407965, "learning_rate": 1.489051094890511e-05, "loss": 0.9778, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.9969261730297488, "learning_rate": 1.5036496350364965e-05, "loss": 0.9778, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.9876610841830437, "learning_rate": 1.5182481751824818e-05, "loss": 1.0761, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.8006092538111557, "learning_rate": 1.5328467153284673e-05, "loss": 0.8488, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.9416159241994874, "learning_rate": 1.5474452554744528e-05, "loss": 1.0294, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.927252329021459, "learning_rate": 1.5620437956204383e-05, "loss": 0.9767, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.8985186332889099, "learning_rate": 1.5766423357664234e-05, "loss": 1.0582, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.8019326840567439, "learning_rate": 1.591240875912409e-05, "loss": 1.0131, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.8459260924140503, "learning_rate": 1.6058394160583944e-05, "loss": 0.9237, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.119833633464446, "learning_rate": 1.62043795620438e-05, "loss": 0.9056, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.835529049348371, "learning_rate": 1.635036496350365e-05, "loss": 0.8131, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.6535515026908469, "learning_rate": 1.6496350364963505e-05, "loss": 0.8077, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.8304368982818897, "learning_rate": 1.664233576642336e-05, "loss": 0.9227, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.7453451345404556, "learning_rate": 1.678832116788321e-05, "loss": 0.7761, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.017988535878321, "learning_rate": 1.6934306569343066e-05, "loss": 1.0007, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.7649825767951453, "learning_rate": 1.708029197080292e-05, "loss": 0.7844, "step": 117 }, { "epoch": 0.01, "grad_norm": 0.898719080046926, "learning_rate": 1.7226277372262773e-05, "loss": 0.949, "step": 118 }, { "epoch": 0.01, "grad_norm": 0.8779647192155593, "learning_rate": 1.737226277372263e-05, "loss": 1.0252, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.779365632124019, "learning_rate": 1.7518248175182482e-05, "loss": 0.8929, "step": 120 }, { "epoch": 0.01, "grad_norm": 0.8128048251624574, "learning_rate": 1.7664233576642337e-05, "loss": 0.953, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.8732728601647503, "learning_rate": 1.7810218978102192e-05, "loss": 0.9142, "step": 122 }, { "epoch": 0.01, "grad_norm": 0.8101234525039789, "learning_rate": 1.7956204379562047e-05, "loss": 0.8924, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.8200518966162574, "learning_rate": 1.81021897810219e-05, "loss": 0.9443, "step": 124 }, { "epoch": 0.01, "grad_norm": 0.8558389568113152, "learning_rate": 1.8248175182481753e-05, "loss": 0.8735, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.812249952133708, "learning_rate": 1.8394160583941608e-05, "loss": 0.7464, "step": 126 }, { "epoch": 0.01, "grad_norm": 0.7711916597580152, "learning_rate": 1.854014598540146e-05, "loss": 0.7847, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.913375400620912, "learning_rate": 1.8686131386861315e-05, "loss": 1.0185, "step": 128 }, { "epoch": 0.01, "grad_norm": 0.9374111769242447, "learning_rate": 1.883211678832117e-05, "loss": 1.1294, "step": 129 }, { "epoch": 0.01, "grad_norm": 0.7809460620937397, "learning_rate": 1.897810218978102e-05, "loss": 0.8164, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.7555568592404668, "learning_rate": 1.912408759124088e-05, "loss": 0.8667, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.7688544148192044, "learning_rate": 1.9270072992700734e-05, "loss": 0.785, "step": 132 }, { "epoch": 0.01, "grad_norm": 0.8813496174882957, "learning_rate": 1.9416058394160586e-05, "loss": 0.9051, "step": 133 }, { "epoch": 0.01, "grad_norm": 0.8481248887356465, "learning_rate": 1.956204379562044e-05, "loss": 0.9196, "step": 134 }, { "epoch": 0.01, "grad_norm": 0.8583997915211697, "learning_rate": 1.9708029197080295e-05, "loss": 0.9492, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.8923614038515579, "learning_rate": 1.9854014598540147e-05, "loss": 0.9376, "step": 136 }, { "epoch": 0.02, "grad_norm": 0.7630606544354205, "learning_rate": 2e-05, "loss": 0.921, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.7884064392551693, "learning_rate": 2.0145985401459857e-05, "loss": 0.9123, "step": 138 }, { "epoch": 0.02, "grad_norm": 0.7908940728125956, "learning_rate": 2.0291970802919708e-05, "loss": 0.8448, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.7893454922365724, "learning_rate": 2.0437956204379566e-05, "loss": 0.8967, "step": 140 }, { "epoch": 0.02, "grad_norm": 0.9704833981304126, "learning_rate": 2.0583941605839418e-05, "loss": 0.8912, "step": 141 }, { "epoch": 0.02, "grad_norm": 1.004801428996121, "learning_rate": 2.0729927007299273e-05, "loss": 0.9759, "step": 142 }, { "epoch": 0.02, "grad_norm": 0.9369647087323253, "learning_rate": 2.0875912408759124e-05, "loss": 0.9831, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.8921678130053814, "learning_rate": 2.1021897810218982e-05, "loss": 0.9521, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.9372303256953518, "learning_rate": 2.1167883211678834e-05, "loss": 1.0517, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.8552449948393822, "learning_rate": 2.131386861313869e-05, "loss": 0.9103, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.7676121750284994, "learning_rate": 2.145985401459854e-05, "loss": 0.8621, "step": 147 }, { "epoch": 0.02, "grad_norm": 0.821653595898437, "learning_rate": 2.1605839416058395e-05, "loss": 0.8404, "step": 148 }, { "epoch": 0.02, "grad_norm": 0.8035734668584738, "learning_rate": 2.1751824817518246e-05, "loss": 0.8717, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.867084875134465, "learning_rate": 2.1897810218978105e-05, "loss": 1.0253, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.81662699804174, "learning_rate": 2.204379562043796e-05, "loss": 0.9117, "step": 151 }, { "epoch": 0.02, "grad_norm": 0.8013922616530269, "learning_rate": 2.218978102189781e-05, "loss": 0.9208, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.7479029185767977, "learning_rate": 2.2335766423357666e-05, "loss": 0.8442, "step": 153 }, { "epoch": 0.02, "grad_norm": 0.8770647551380415, "learning_rate": 2.248175182481752e-05, "loss": 0.9463, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.8179934464217773, "learning_rate": 2.2627737226277376e-05, "loss": 0.8536, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.8333939689192803, "learning_rate": 2.2773722627737227e-05, "loss": 0.8545, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.9536900955725867, "learning_rate": 2.2919708029197082e-05, "loss": 1.0469, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.7800980658506356, "learning_rate": 2.3065693430656934e-05, "loss": 0.7782, "step": 158 }, { "epoch": 0.02, "grad_norm": 0.8175695427062762, "learning_rate": 2.3211678832116792e-05, "loss": 0.8163, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.8499995946883189, "learning_rate": 2.3357664233576643e-05, "loss": 0.7826, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.9409476781805804, "learning_rate": 2.3503649635036498e-05, "loss": 1.0353, "step": 161 }, { "epoch": 0.02, "grad_norm": 0.8039945563184248, "learning_rate": 2.3649635036496353e-05, "loss": 0.7398, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.9730280889277279, "learning_rate": 2.3795620437956204e-05, "loss": 0.9063, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.9262831088183515, "learning_rate": 2.3941605839416063e-05, "loss": 0.9266, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.8749799044890016, "learning_rate": 2.4087591240875914e-05, "loss": 0.8567, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.8162508364873224, "learning_rate": 2.423357664233577e-05, "loss": 0.8099, "step": 166 }, { "epoch": 0.02, "grad_norm": 0.8779531779132826, "learning_rate": 2.437956204379562e-05, "loss": 0.9388, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.875780915150506, "learning_rate": 2.452554744525548e-05, "loss": 1.0335, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.8484109077009614, "learning_rate": 2.467153284671533e-05, "loss": 0.7684, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.9240908743069353, "learning_rate": 2.4817518248175185e-05, "loss": 0.911, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.776400161138908, "learning_rate": 2.4963503649635037e-05, "loss": 0.7548, "step": 171 }, { "epoch": 0.02, "grad_norm": 0.8982001281300744, "learning_rate": 2.510948905109489e-05, "loss": 0.8434, "step": 172 }, { "epoch": 0.02, "grad_norm": 0.9290136892749372, "learning_rate": 2.5255474452554743e-05, "loss": 0.9355, "step": 173 }, { "epoch": 0.02, "grad_norm": 0.864578067043459, "learning_rate": 2.54014598540146e-05, "loss": 0.9516, "step": 174 }, { "epoch": 0.02, "grad_norm": 0.9777270583131618, "learning_rate": 2.5547445255474456e-05, "loss": 0.9818, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.872350086132436, "learning_rate": 2.5693430656934308e-05, "loss": 0.9193, "step": 176 }, { "epoch": 0.02, "grad_norm": 0.8453406775215188, "learning_rate": 2.5839416058394166e-05, "loss": 0.9481, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.9523180062348314, "learning_rate": 2.5985401459854017e-05, "loss": 0.9359, "step": 178 }, { "epoch": 0.02, "grad_norm": 0.837892118866129, "learning_rate": 2.6131386861313872e-05, "loss": 0.7993, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.8423906254732197, "learning_rate": 2.6277372262773724e-05, "loss": 0.778, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.8427808988910546, "learning_rate": 2.642335766423358e-05, "loss": 0.8411, "step": 181 }, { "epoch": 0.02, "grad_norm": 0.9137995415028294, "learning_rate": 2.656934306569343e-05, "loss": 0.804, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.9027583173137855, "learning_rate": 2.6715328467153288e-05, "loss": 0.8251, "step": 183 }, { "epoch": 0.02, "grad_norm": 0.8641413895807349, "learning_rate": 2.686131386861314e-05, "loss": 0.876, "step": 184 }, { "epoch": 0.02, "grad_norm": 0.8854594426013437, "learning_rate": 2.7007299270072995e-05, "loss": 0.9454, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.7840237777227991, "learning_rate": 2.7153284671532846e-05, "loss": 0.8771, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.8778571076566607, "learning_rate": 2.7299270072992704e-05, "loss": 0.8599, "step": 187 }, { "epoch": 0.02, "grad_norm": 0.7821884622236454, "learning_rate": 2.744525547445256e-05, "loss": 0.8459, "step": 188 }, { "epoch": 0.02, "grad_norm": 0.7743019128627272, "learning_rate": 2.759124087591241e-05, "loss": 0.7925, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.7866143878103941, "learning_rate": 2.7737226277372266e-05, "loss": 0.7913, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.7935828100427934, "learning_rate": 2.7883211678832117e-05, "loss": 0.8149, "step": 191 }, { "epoch": 0.02, "grad_norm": 0.8604826637862417, "learning_rate": 2.8029197080291975e-05, "loss": 0.8143, "step": 192 }, { "epoch": 0.02, "grad_norm": 0.8508467159532684, "learning_rate": 2.8175182481751827e-05, "loss": 0.8817, "step": 193 }, { "epoch": 0.02, "grad_norm": 0.7435161142687337, "learning_rate": 2.832116788321168e-05, "loss": 0.7964, "step": 194 }, { "epoch": 0.02, "grad_norm": 0.8553910241895625, "learning_rate": 2.8467153284671533e-05, "loss": 0.9158, "step": 195 }, { "epoch": 0.02, "grad_norm": 0.8562460725234794, "learning_rate": 2.861313868613139e-05, "loss": 0.9361, "step": 196 }, { "epoch": 0.02, "grad_norm": 0.8265027765572417, "learning_rate": 2.8759124087591243e-05, "loss": 0.86, "step": 197 }, { "epoch": 0.02, "grad_norm": 0.8122909350284694, "learning_rate": 2.8905109489051098e-05, "loss": 0.8024, "step": 198 }, { "epoch": 0.02, "grad_norm": 0.8604388501230203, "learning_rate": 2.905109489051095e-05, "loss": 0.9568, "step": 199 }, { "epoch": 0.02, "grad_norm": 0.7296128588122619, "learning_rate": 2.9197080291970804e-05, "loss": 0.6976, "step": 200 }, { "epoch": 0.02, "grad_norm": 0.9666471731478535, "learning_rate": 2.9343065693430662e-05, "loss": 0.86, "step": 201 }, { "epoch": 0.02, "grad_norm": 0.9643032706897797, "learning_rate": 2.9489051094890514e-05, "loss": 0.9043, "step": 202 }, { "epoch": 0.02, "grad_norm": 0.8788740364105233, "learning_rate": 2.963503649635037e-05, "loss": 0.7955, "step": 203 }, { "epoch": 0.02, "grad_norm": 0.8494308108645364, "learning_rate": 2.978102189781022e-05, "loss": 0.7778, "step": 204 }, { "epoch": 0.02, "grad_norm": 0.7798279981602546, "learning_rate": 2.9927007299270075e-05, "loss": 0.7827, "step": 205 }, { "epoch": 0.02, "grad_norm": 0.6336256618906441, "learning_rate": 3.007299270072993e-05, "loss": 0.6995, "step": 206 }, { "epoch": 0.02, "grad_norm": 0.7463806958662407, "learning_rate": 3.0218978102189785e-05, "loss": 0.7585, "step": 207 }, { "epoch": 0.02, "grad_norm": 0.759099005947736, "learning_rate": 3.0364963503649636e-05, "loss": 0.7035, "step": 208 }, { "epoch": 0.02, "grad_norm": 0.9245880421029641, "learning_rate": 3.051094890510949e-05, "loss": 0.8373, "step": 209 }, { "epoch": 0.02, "grad_norm": 0.893976430953491, "learning_rate": 3.0656934306569346e-05, "loss": 0.8918, "step": 210 }, { "epoch": 0.02, "grad_norm": 0.7408089936344885, "learning_rate": 3.08029197080292e-05, "loss": 0.7725, "step": 211 }, { "epoch": 0.02, "grad_norm": 0.7932888568771177, "learning_rate": 3.0948905109489056e-05, "loss": 0.7768, "step": 212 }, { "epoch": 0.02, "grad_norm": 0.7333727551470285, "learning_rate": 3.109489051094891e-05, "loss": 0.6986, "step": 213 }, { "epoch": 0.02, "grad_norm": 0.8203950386302806, "learning_rate": 3.1240875912408765e-05, "loss": 0.8117, "step": 214 }, { "epoch": 0.02, "grad_norm": 0.8596703455332148, "learning_rate": 3.138686131386862e-05, "loss": 0.8495, "step": 215 }, { "epoch": 0.02, "grad_norm": 0.8846367411603081, "learning_rate": 3.153284671532847e-05, "loss": 0.8416, "step": 216 }, { "epoch": 0.02, "grad_norm": 0.7913297806424117, "learning_rate": 3.167883211678832e-05, "loss": 0.7883, "step": 217 }, { "epoch": 0.02, "grad_norm": 0.8864001067090056, "learning_rate": 3.182481751824818e-05, "loss": 0.8664, "step": 218 }, { "epoch": 0.02, "grad_norm": 0.7702646004600188, "learning_rate": 3.197080291970803e-05, "loss": 0.7783, "step": 219 }, { "epoch": 0.02, "grad_norm": 0.7858313699281773, "learning_rate": 3.211678832116789e-05, "loss": 0.7565, "step": 220 }, { "epoch": 0.02, "grad_norm": 0.7465709338663865, "learning_rate": 3.226277372262774e-05, "loss": 0.6761, "step": 221 }, { "epoch": 0.02, "grad_norm": 0.9191272972946686, "learning_rate": 3.24087591240876e-05, "loss": 0.8988, "step": 222 }, { "epoch": 0.02, "grad_norm": 0.8094731216162403, "learning_rate": 3.255474452554745e-05, "loss": 0.7862, "step": 223 }, { "epoch": 0.02, "grad_norm": 0.8658699818241734, "learning_rate": 3.27007299270073e-05, "loss": 0.8778, "step": 224 }, { "epoch": 0.02, "grad_norm": 0.8461867572813003, "learning_rate": 3.284671532846716e-05, "loss": 0.7938, "step": 225 }, { "epoch": 0.02, "grad_norm": 0.8709833733803517, "learning_rate": 3.299270072992701e-05, "loss": 0.7898, "step": 226 }, { "epoch": 0.02, "grad_norm": 0.878491655686796, "learning_rate": 3.313868613138687e-05, "loss": 0.7788, "step": 227 }, { "epoch": 0.03, "grad_norm": 0.9325073497063265, "learning_rate": 3.328467153284672e-05, "loss": 0.7702, "step": 228 }, { "epoch": 0.03, "grad_norm": 0.7923231832565352, "learning_rate": 3.343065693430657e-05, "loss": 0.6818, "step": 229 }, { "epoch": 0.03, "grad_norm": 0.942743767903117, "learning_rate": 3.357664233576642e-05, "loss": 0.8443, "step": 230 }, { "epoch": 0.03, "grad_norm": 0.8681702415152016, "learning_rate": 3.372262773722628e-05, "loss": 0.6942, "step": 231 }, { "epoch": 0.03, "grad_norm": 1.006703853730836, "learning_rate": 3.386861313868613e-05, "loss": 0.9115, "step": 232 }, { "epoch": 0.03, "grad_norm": 0.757843882117406, "learning_rate": 3.401459854014599e-05, "loss": 0.766, "step": 233 }, { "epoch": 0.03, "grad_norm": 0.757128062322179, "learning_rate": 3.416058394160584e-05, "loss": 0.8539, "step": 234 }, { "epoch": 0.03, "grad_norm": 0.8875125239388543, "learning_rate": 3.4306569343065694e-05, "loss": 0.814, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.808341055928206, "learning_rate": 3.4452554744525545e-05, "loss": 0.7324, "step": 236 }, { "epoch": 0.03, "grad_norm": 0.7680783182955603, "learning_rate": 3.4598540145985404e-05, "loss": 0.7022, "step": 237 }, { "epoch": 0.03, "grad_norm": 0.773625784410827, "learning_rate": 3.474452554744526e-05, "loss": 0.7348, "step": 238 }, { "epoch": 0.03, "grad_norm": 0.8452745016586605, "learning_rate": 3.4890510948905113e-05, "loss": 0.7333, "step": 239 }, { "epoch": 0.03, "grad_norm": 0.8796795643097877, "learning_rate": 3.5036496350364965e-05, "loss": 0.8311, "step": 240 }, { "epoch": 0.03, "grad_norm": 0.6911046291584321, "learning_rate": 3.5182481751824816e-05, "loss": 0.6326, "step": 241 }, { "epoch": 0.03, "grad_norm": 0.8840908146595398, "learning_rate": 3.5328467153284675e-05, "loss": 0.788, "step": 242 }, { "epoch": 0.03, "grad_norm": 0.8462049303773562, "learning_rate": 3.5474452554744526e-05, "loss": 0.7607, "step": 243 }, { "epoch": 0.03, "grad_norm": 0.8676272471544045, "learning_rate": 3.5620437956204384e-05, "loss": 0.7835, "step": 244 }, { "epoch": 0.03, "grad_norm": 0.90018214131091, "learning_rate": 3.5766423357664236e-05, "loss": 0.9244, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.796911575375581, "learning_rate": 3.5912408759124094e-05, "loss": 0.8376, "step": 246 }, { "epoch": 0.03, "grad_norm": 0.9128495996778878, "learning_rate": 3.6058394160583946e-05, "loss": 0.9573, "step": 247 }, { "epoch": 0.03, "grad_norm": 0.7764065714509375, "learning_rate": 3.62043795620438e-05, "loss": 0.7752, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.8635953098902135, "learning_rate": 3.635036496350365e-05, "loss": 0.828, "step": 249 }, { "epoch": 0.03, "grad_norm": 0.7776935499618617, "learning_rate": 3.649635036496351e-05, "loss": 0.7998, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.7325430807913631, "learning_rate": 3.6642335766423365e-05, "loss": 0.627, "step": 251 }, { "epoch": 0.03, "grad_norm": 0.8786980276352858, "learning_rate": 3.6788321167883217e-05, "loss": 0.7724, "step": 252 }, { "epoch": 0.03, "grad_norm": 0.7809194246896833, "learning_rate": 3.693430656934307e-05, "loss": 0.6631, "step": 253 }, { "epoch": 0.03, "grad_norm": 0.8721654825086504, "learning_rate": 3.708029197080292e-05, "loss": 0.7863, "step": 254 }, { "epoch": 0.03, "grad_norm": 0.7556796390035293, "learning_rate": 3.722627737226278e-05, "loss": 0.6549, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.901357995809885, "learning_rate": 3.737226277372263e-05, "loss": 0.709, "step": 256 }, { "epoch": 0.03, "grad_norm": 1.030566364607516, "learning_rate": 3.751824817518249e-05, "loss": 0.892, "step": 257 }, { "epoch": 0.03, "grad_norm": 0.8867348824936655, "learning_rate": 3.766423357664234e-05, "loss": 0.877, "step": 258 }, { "epoch": 0.03, "grad_norm": 0.985543783321362, "learning_rate": 3.781021897810219e-05, "loss": 0.8474, "step": 259 }, { "epoch": 0.03, "grad_norm": 0.9071408858562116, "learning_rate": 3.795620437956204e-05, "loss": 0.8303, "step": 260 }, { "epoch": 0.03, "grad_norm": 0.7268676937191998, "learning_rate": 3.81021897810219e-05, "loss": 0.6717, "step": 261 }, { "epoch": 0.03, "grad_norm": 0.7889173770814172, "learning_rate": 3.824817518248176e-05, "loss": 0.7581, "step": 262 }, { "epoch": 0.03, "grad_norm": 0.7777957847947902, "learning_rate": 3.839416058394161e-05, "loss": 0.8143, "step": 263 }, { "epoch": 0.03, "grad_norm": 0.7671321903343677, "learning_rate": 3.854014598540147e-05, "loss": 0.7566, "step": 264 }, { "epoch": 0.03, "grad_norm": 0.7969364441763006, "learning_rate": 3.868613138686132e-05, "loss": 0.7008, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.8587544715131925, "learning_rate": 3.883211678832117e-05, "loss": 0.7941, "step": 266 }, { "epoch": 0.03, "grad_norm": 0.8421850880321154, "learning_rate": 3.897810218978102e-05, "loss": 0.6676, "step": 267 }, { "epoch": 0.03, "grad_norm": 0.7383172566347763, "learning_rate": 3.912408759124088e-05, "loss": 0.7128, "step": 268 }, { "epoch": 0.03, "grad_norm": 0.7890256363217543, "learning_rate": 3.927007299270073e-05, "loss": 0.7103, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.8467664487179857, "learning_rate": 3.941605839416059e-05, "loss": 0.7219, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.8610707977730204, "learning_rate": 3.956204379562044e-05, "loss": 0.7862, "step": 271 }, { "epoch": 0.03, "grad_norm": 0.8125713023664098, "learning_rate": 3.9708029197080294e-05, "loss": 0.7378, "step": 272 }, { "epoch": 0.03, "grad_norm": 0.8940863689928457, "learning_rate": 3.9854014598540145e-05, "loss": 0.7886, "step": 273 }, { "epoch": 0.03, "grad_norm": 1.0050851749746672, "learning_rate": 4e-05, "loss": 0.7236, "step": 274 }, { "epoch": 0.03, "grad_norm": 0.8388090635331187, "learning_rate": 3.999999873473464e-05, "loss": 0.7377, "step": 275 }, { "epoch": 0.03, "grad_norm": 0.8383014445436562, "learning_rate": 3.99999949389387e-05, "loss": 0.7259, "step": 276 }, { "epoch": 0.03, "grad_norm": 0.7510972738292445, "learning_rate": 3.999998861261268e-05, "loss": 0.7294, "step": 277 }, { "epoch": 0.03, "grad_norm": 0.713726901400469, "learning_rate": 3.999997975575736e-05, "loss": 0.7374, "step": 278 }, { "epoch": 0.03, "grad_norm": 0.8310339955354938, "learning_rate": 3.999996836837388e-05, "loss": 0.8416, "step": 279 }, { "epoch": 0.03, "grad_norm": 0.8503672409522316, "learning_rate": 3.9999954450463665e-05, "loss": 0.8361, "step": 280 }, { "epoch": 0.03, "grad_norm": 0.7727501637592222, "learning_rate": 3.999993800202848e-05, "loss": 0.7638, "step": 281 }, { "epoch": 0.03, "grad_norm": 0.752863741310904, "learning_rate": 3.9999919023070414e-05, "loss": 0.7267, "step": 282 }, { "epoch": 0.03, "grad_norm": 0.743777609561108, "learning_rate": 3.999989751359186e-05, "loss": 0.7195, "step": 283 }, { "epoch": 0.03, "grad_norm": 0.7491973317001533, "learning_rate": 3.999987347359555e-05, "loss": 0.6736, "step": 284 }, { "epoch": 0.03, "grad_norm": 0.8735011410260841, "learning_rate": 3.9999846903084514e-05, "loss": 0.6922, "step": 285 }, { "epoch": 0.03, "grad_norm": 0.7681511833998723, "learning_rate": 3.999981780206212e-05, "loss": 0.6805, "step": 286 }, { "epoch": 0.03, "grad_norm": 0.7399497982343214, "learning_rate": 3.999978617053205e-05, "loss": 0.6517, "step": 287 }, { "epoch": 0.03, "grad_norm": 0.7793165314377594, "learning_rate": 3.99997520084983e-05, "loss": 0.6802, "step": 288 }, { "epoch": 0.03, "grad_norm": 0.8442459414980154, "learning_rate": 3.9999715315965194e-05, "loss": 0.8615, "step": 289 }, { "epoch": 0.03, "grad_norm": 0.8002566968833671, "learning_rate": 3.999967609293739e-05, "loss": 0.8276, "step": 290 }, { "epoch": 0.03, "grad_norm": 0.7252502897275802, "learning_rate": 3.9999634339419826e-05, "loss": 0.6918, "step": 291 }, { "epoch": 0.03, "grad_norm": 0.7403815533310777, "learning_rate": 3.99995900554178e-05, "loss": 0.6976, "step": 292 }, { "epoch": 0.03, "grad_norm": 0.9424472608912726, "learning_rate": 3.9999543240936916e-05, "loss": 0.77, "step": 293 }, { "epoch": 0.03, "grad_norm": 0.9165106698438497, "learning_rate": 3.999949389598309e-05, "loss": 0.8012, "step": 294 }, { "epoch": 0.03, "grad_norm": 0.8842702662049385, "learning_rate": 3.9999442020562575e-05, "loss": 0.6576, "step": 295 }, { "epoch": 0.03, "grad_norm": 0.7214397604572383, "learning_rate": 3.999938761468192e-05, "loss": 0.6748, "step": 296 }, { "epoch": 0.03, "grad_norm": 0.8826123913851801, "learning_rate": 3.999933067834803e-05, "loss": 0.7898, "step": 297 }, { "epoch": 0.03, "grad_norm": 0.7584184013762938, "learning_rate": 3.9999271211568084e-05, "loss": 0.6604, "step": 298 }, { "epoch": 0.03, "grad_norm": 0.7588518232118705, "learning_rate": 3.999920921434962e-05, "loss": 0.6761, "step": 299 }, { "epoch": 0.03, "grad_norm": 0.9329383177386635, "learning_rate": 3.999914468670048e-05, "loss": 0.8267, "step": 300 }, { "epoch": 0.03, "grad_norm": 0.7185478755527461, "learning_rate": 3.9999077628628834e-05, "loss": 0.6602, "step": 301 }, { "epoch": 0.03, "grad_norm": 0.924772525860542, "learning_rate": 3.999900804014317e-05, "loss": 0.8433, "step": 302 }, { "epoch": 0.03, "grad_norm": 0.9009317025941854, "learning_rate": 3.9998935921252275e-05, "loss": 0.8177, "step": 303 }, { "epoch": 0.03, "grad_norm": 0.8439125681490359, "learning_rate": 3.9998861271965285e-05, "loss": 0.713, "step": 304 }, { "epoch": 0.03, "grad_norm": 0.8118401195266246, "learning_rate": 3.999878409229164e-05, "loss": 0.6455, "step": 305 }, { "epoch": 0.03, "grad_norm": 0.7715776990627178, "learning_rate": 3.999870438224111e-05, "loss": 0.7286, "step": 306 }, { "epoch": 0.03, "grad_norm": 0.9172688514724856, "learning_rate": 3.999862214182379e-05, "loss": 0.8476, "step": 307 }, { "epoch": 0.03, "grad_norm": 0.8349527637460848, "learning_rate": 3.999853737105007e-05, "loss": 0.7318, "step": 308 }, { "epoch": 0.03, "grad_norm": 0.8912865039256895, "learning_rate": 3.999845006993068e-05, "loss": 0.7909, "step": 309 }, { "epoch": 0.03, "grad_norm": 0.8515101425453874, "learning_rate": 3.9998360238476655e-05, "loss": 0.8353, "step": 310 }, { "epoch": 0.03, "grad_norm": 0.6814806731373988, "learning_rate": 3.9998267876699384e-05, "loss": 0.6833, "step": 311 }, { "epoch": 0.03, "grad_norm": 0.8373271763822236, "learning_rate": 3.999817298461054e-05, "loss": 0.7619, "step": 312 }, { "epoch": 0.03, "grad_norm": 0.8559870782638922, "learning_rate": 3.9998075562222134e-05, "loss": 0.8058, "step": 313 }, { "epoch": 0.03, "grad_norm": 0.8554773896360247, "learning_rate": 3.999797560954649e-05, "loss": 0.7257, "step": 314 }, { "epoch": 0.03, "grad_norm": 0.8004587780228744, "learning_rate": 3.9997873126596255e-05, "loss": 0.6722, "step": 315 }, { "epoch": 0.03, "grad_norm": 0.8668054649886653, "learning_rate": 3.999776811338439e-05, "loss": 0.7144, "step": 316 }, { "epoch": 0.03, "grad_norm": 0.7637625637987723, "learning_rate": 3.999766056992419e-05, "loss": 0.6253, "step": 317 }, { "epoch": 0.03, "grad_norm": 0.8201354426082679, "learning_rate": 3.999755049622926e-05, "loss": 0.6807, "step": 318 }, { "epoch": 0.04, "grad_norm": 0.8501310303615375, "learning_rate": 3.999743789231353e-05, "loss": 0.5714, "step": 319 }, { "epoch": 0.04, "grad_norm": 0.8012058156011441, "learning_rate": 3.9997322758191244e-05, "loss": 0.609, "step": 320 }, { "epoch": 0.04, "grad_norm": 1.041671632119106, "learning_rate": 3.999720509387696e-05, "loss": 0.749, "step": 321 }, { "epoch": 0.04, "grad_norm": 0.7947247234931644, "learning_rate": 3.999708489938559e-05, "loss": 0.6367, "step": 322 }, { "epoch": 0.04, "grad_norm": 0.7441789111603445, "learning_rate": 3.999696217473231e-05, "loss": 0.6123, "step": 323 }, { "epoch": 0.04, "grad_norm": 0.6904871807413867, "learning_rate": 3.999683691993268e-05, "loss": 0.541, "step": 324 }, { "epoch": 0.04, "grad_norm": 0.8181610959733012, "learning_rate": 3.9996709135002524e-05, "loss": 0.6713, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.7770591483217604, "learning_rate": 3.999657881995802e-05, "loss": 0.6928, "step": 326 }, { "epoch": 0.04, "grad_norm": 0.8852589560278958, "learning_rate": 3.999644597481566e-05, "loss": 0.8136, "step": 327 }, { "epoch": 0.04, "grad_norm": 0.8946259627529591, "learning_rate": 3.9996310599592244e-05, "loss": 0.6859, "step": 328 }, { "epoch": 0.04, "grad_norm": 0.7997727160648551, "learning_rate": 3.999617269430491e-05, "loss": 0.6963, "step": 329 }, { "epoch": 0.04, "grad_norm": 0.7408580719114625, "learning_rate": 3.9996032258971097e-05, "loss": 0.7109, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.7875225790510764, "learning_rate": 3.999588929360858e-05, "loss": 0.8099, "step": 331 }, { "epoch": 0.04, "grad_norm": 0.83428135035581, "learning_rate": 3.9995743798235445e-05, "loss": 0.7305, "step": 332 }, { "epoch": 0.04, "grad_norm": 0.8211198013194521, "learning_rate": 3.9995595772870103e-05, "loss": 0.6767, "step": 333 }, { "epoch": 0.04, "grad_norm": 0.7303975116447102, "learning_rate": 3.999544521753128e-05, "loss": 0.6688, "step": 334 }, { "epoch": 0.04, "grad_norm": 0.8747481256090411, "learning_rate": 3.999529213223802e-05, "loss": 0.7076, "step": 335 }, { "epoch": 0.04, "grad_norm": 0.7974889297673676, "learning_rate": 3.999513651700971e-05, "loss": 0.6551, "step": 336 }, { "epoch": 0.04, "grad_norm": 0.7984550369636046, "learning_rate": 3.999497837186603e-05, "loss": 0.7061, "step": 337 }, { "epoch": 0.04, "grad_norm": 0.7393323490355842, "learning_rate": 3.999481769682699e-05, "loss": 0.6188, "step": 338 }, { "epoch": 0.04, "grad_norm": 0.7218879533406446, "learning_rate": 3.999465449191291e-05, "loss": 0.5727, "step": 339 }, { "epoch": 0.04, "grad_norm": 0.8982077926882093, "learning_rate": 3.9994488757144454e-05, "loss": 0.7223, "step": 340 }, { "epoch": 0.04, "grad_norm": 0.749910826150099, "learning_rate": 3.999432049254258e-05, "loss": 0.5718, "step": 341 }, { "epoch": 0.04, "grad_norm": 0.8218926242925816, "learning_rate": 3.999414969812859e-05, "loss": 0.7052, "step": 342 }, { "epoch": 0.04, "grad_norm": 0.702648997779424, "learning_rate": 3.999397637392409e-05, "loss": 0.5319, "step": 343 }, { "epoch": 0.04, "grad_norm": 0.785210792582861, "learning_rate": 3.9993800519951e-05, "loss": 0.5782, "step": 344 }, { "epoch": 0.04, "grad_norm": 0.9579061751740375, "learning_rate": 3.9993622136231585e-05, "loss": 0.8198, "step": 345 }, { "epoch": 0.04, "grad_norm": 0.7556461950316, "learning_rate": 3.99934412227884e-05, "loss": 0.5552, "step": 346 }, { "epoch": 0.04, "grad_norm": 0.7699269473113453, "learning_rate": 3.999325777964435e-05, "loss": 0.639, "step": 347 }, { "epoch": 0.04, "grad_norm": 0.7825432944584432, "learning_rate": 3.999307180682264e-05, "loss": 0.6526, "step": 348 }, { "epoch": 0.04, "grad_norm": 0.9185256242868887, "learning_rate": 3.999288330434681e-05, "loss": 0.7195, "step": 349 }, { "epoch": 0.04, "grad_norm": 0.8126640887726768, "learning_rate": 3.9992692272240684e-05, "loss": 0.6022, "step": 350 }, { "epoch": 0.04, "grad_norm": 0.7241460753418988, "learning_rate": 3.999249871052845e-05, "loss": 0.5674, "step": 351 }, { "epoch": 0.04, "grad_norm": 0.8330461285310613, "learning_rate": 3.99923026192346e-05, "loss": 0.6461, "step": 352 }, { "epoch": 0.04, "grad_norm": 0.9280244072754565, "learning_rate": 3.9992103998383946e-05, "loss": 0.7077, "step": 353 }, { "epoch": 0.04, "grad_norm": 0.8357204262364643, "learning_rate": 3.999190284800162e-05, "loss": 0.6475, "step": 354 }, { "epoch": 0.04, "grad_norm": 0.7130248061060847, "learning_rate": 3.999169916811306e-05, "loss": 0.5797, "step": 355 }, { "epoch": 0.04, "grad_norm": 0.7952051431260259, "learning_rate": 3.9991492958744046e-05, "loss": 0.6515, "step": 356 }, { "epoch": 0.04, "grad_norm": 0.7789398369774344, "learning_rate": 3.999128421992067e-05, "loss": 0.6446, "step": 357 }, { "epoch": 0.04, "grad_norm": 0.9176244861230839, "learning_rate": 3.9991072951669334e-05, "loss": 0.6926, "step": 358 }, { "epoch": 0.04, "grad_norm": 0.9302534416399562, "learning_rate": 3.9990859154016785e-05, "loss": 0.7405, "step": 359 }, { "epoch": 0.04, "grad_norm": 0.7284750428542651, "learning_rate": 3.999064282699006e-05, "loss": 0.5564, "step": 360 }, { "epoch": 0.04, "grad_norm": 0.9516994885086428, "learning_rate": 3.999042397061654e-05, "loss": 0.691, "step": 361 }, { "epoch": 0.04, "grad_norm": 0.9177543876831945, "learning_rate": 3.999020258492391e-05, "loss": 0.7643, "step": 362 }, { "epoch": 0.04, "grad_norm": 0.7493901554894491, "learning_rate": 3.998997866994017e-05, "loss": 0.5473, "step": 363 }, { "epoch": 0.04, "grad_norm": 0.9026411152779741, "learning_rate": 3.998975222569368e-05, "loss": 0.7193, "step": 364 }, { "epoch": 0.04, "grad_norm": 0.8268873214668665, "learning_rate": 3.998952325221307e-05, "loss": 0.697, "step": 365 }, { "epoch": 0.04, "grad_norm": 0.8360399201298283, "learning_rate": 3.9989291749527314e-05, "loss": 0.7089, "step": 366 }, { "epoch": 0.04, "grad_norm": 0.9191231145135161, "learning_rate": 3.998905771766571e-05, "loss": 0.7067, "step": 367 }, { "epoch": 0.04, "grad_norm": 0.8322434848657582, "learning_rate": 3.998882115665786e-05, "loss": 0.7031, "step": 368 }, { "epoch": 0.04, "grad_norm": 0.7161513079809682, "learning_rate": 3.99885820665337e-05, "loss": 0.6038, "step": 369 }, { "epoch": 0.04, "grad_norm": 0.7122034392638321, "learning_rate": 3.998834044732348e-05, "loss": 0.617, "step": 370 }, { "epoch": 0.04, "grad_norm": 0.9640967067789271, "learning_rate": 3.998809629905778e-05, "loss": 0.6449, "step": 371 }, { "epoch": 0.04, "grad_norm": 0.8715983413324246, "learning_rate": 3.9987849621767473e-05, "loss": 0.6974, "step": 372 }, { "epoch": 0.04, "grad_norm": 0.8641924299950741, "learning_rate": 3.998760041548379e-05, "loss": 0.5878, "step": 373 }, { "epoch": 0.04, "grad_norm": 1.0098654957713729, "learning_rate": 3.998734868023825e-05, "loss": 0.6907, "step": 374 }, { "epoch": 0.04, "grad_norm": 0.7839792263772378, "learning_rate": 3.9987094416062705e-05, "loss": 0.5546, "step": 375 }, { "epoch": 0.04, "grad_norm": 0.7905392566369994, "learning_rate": 3.998683762298933e-05, "loss": 0.6039, "step": 376 }, { "epoch": 0.04, "grad_norm": 1.1411723038798014, "learning_rate": 3.9986578301050615e-05, "loss": 0.7722, "step": 377 }, { "epoch": 0.04, "grad_norm": 0.7957819004981204, "learning_rate": 3.9986316450279365e-05, "loss": 0.6741, "step": 378 }, { "epoch": 0.04, "grad_norm": 0.8070685703921063, "learning_rate": 3.998605207070872e-05, "loss": 0.6094, "step": 379 }, { "epoch": 0.04, "grad_norm": 0.7171331131535859, "learning_rate": 3.9985785162372135e-05, "loss": 0.5898, "step": 380 }, { "epoch": 0.04, "grad_norm": 0.7022060747325506, "learning_rate": 3.998551572530336e-05, "loss": 0.542, "step": 381 }, { "epoch": 0.04, "grad_norm": 0.9226310345388156, "learning_rate": 3.998524375953651e-05, "loss": 0.7777, "step": 382 }, { "epoch": 0.04, "grad_norm": 0.8253680708584371, "learning_rate": 3.9984969265105984e-05, "loss": 0.647, "step": 383 }, { "epoch": 0.04, "grad_norm": 0.8986164744035455, "learning_rate": 3.998469224204652e-05, "loss": 0.6549, "step": 384 }, { "epoch": 0.04, "grad_norm": 0.9477374288157409, "learning_rate": 3.998441269039315e-05, "loss": 0.7938, "step": 385 }, { "epoch": 0.04, "grad_norm": 0.7177412168575946, "learning_rate": 3.998413061018126e-05, "loss": 0.6205, "step": 386 }, { "epoch": 0.04, "grad_norm": 0.8434483024072181, "learning_rate": 3.998384600144655e-05, "loss": 0.7054, "step": 387 }, { "epoch": 0.04, "grad_norm": 0.6939255243213035, "learning_rate": 3.9983558864225005e-05, "loss": 0.6075, "step": 388 }, { "epoch": 0.04, "grad_norm": 0.8687155380549173, "learning_rate": 3.9983269198552975e-05, "loss": 0.6688, "step": 389 }, { "epoch": 0.04, "grad_norm": 0.8749588547978192, "learning_rate": 3.9982977004467106e-05, "loss": 0.688, "step": 390 }, { "epoch": 0.04, "grad_norm": 0.7482574007284549, "learning_rate": 3.998268228200437e-05, "loss": 0.6648, "step": 391 }, { "epoch": 0.04, "grad_norm": 0.6874186727798434, "learning_rate": 3.998238503120205e-05, "loss": 0.527, "step": 392 }, { "epoch": 0.04, "grad_norm": 0.8229444153046743, "learning_rate": 3.9982085252097764e-05, "loss": 0.6677, "step": 393 }, { "epoch": 0.04, "grad_norm": 0.8025994106170787, "learning_rate": 3.998178294472944e-05, "loss": 0.62, "step": 394 }, { "epoch": 0.04, "grad_norm": 0.9409990181730299, "learning_rate": 3.998147810913532e-05, "loss": 0.609, "step": 395 }, { "epoch": 0.04, "grad_norm": 0.9416416871012572, "learning_rate": 3.998117074535398e-05, "loss": 0.6682, "step": 396 }, { "epoch": 0.04, "grad_norm": 0.8087842781314133, "learning_rate": 3.998086085342431e-05, "loss": 0.6347, "step": 397 }, { "epoch": 0.04, "grad_norm": 0.8530789443219859, "learning_rate": 3.9980548433385525e-05, "loss": 0.6421, "step": 398 }, { "epoch": 0.04, "grad_norm": 0.9054955926953236, "learning_rate": 3.9980233485277147e-05, "loss": 0.6905, "step": 399 }, { "epoch": 0.04, "grad_norm": 0.7304986141852277, "learning_rate": 3.997991600913903e-05, "loss": 0.5146, "step": 400 }, { "epoch": 0.04, "grad_norm": 0.8920919150008454, "learning_rate": 3.997959600501133e-05, "loss": 0.6687, "step": 401 }, { "epoch": 0.04, "grad_norm": 0.9362315816048005, "learning_rate": 3.9979273472934556e-05, "loss": 0.6466, "step": 402 }, { "epoch": 0.04, "grad_norm": 0.8421712162517252, "learning_rate": 3.99789484129495e-05, "loss": 0.6313, "step": 403 }, { "epoch": 0.04, "grad_norm": 0.7625074949052515, "learning_rate": 3.9978620825097306e-05, "loss": 0.5929, "step": 404 }, { "epoch": 0.04, "grad_norm": 0.8694916434234695, "learning_rate": 3.997829070941941e-05, "loss": 0.6037, "step": 405 }, { "epoch": 0.04, "grad_norm": 0.7688800360636097, "learning_rate": 3.997795806595758e-05, "loss": 0.5936, "step": 406 }, { "epoch": 0.04, "grad_norm": 0.6860970357226771, "learning_rate": 3.997762289475392e-05, "loss": 0.6259, "step": 407 }, { "epoch": 0.04, "grad_norm": 0.8702537694869666, "learning_rate": 3.9977285195850816e-05, "loss": 0.6953, "step": 408 }, { "epoch": 0.04, "grad_norm": 0.891297671954456, "learning_rate": 3.997694496929102e-05, "loss": 0.5534, "step": 409 }, { "epoch": 0.05, "grad_norm": 0.8962650737266303, "learning_rate": 3.9976602215117554e-05, "loss": 0.6507, "step": 410 }, { "epoch": 0.05, "grad_norm": 0.8055955522480743, "learning_rate": 3.9976256933373806e-05, "loss": 0.5653, "step": 411 }, { "epoch": 0.05, "grad_norm": 0.7619791221662097, "learning_rate": 3.997590912410345e-05, "loss": 0.4765, "step": 412 }, { "epoch": 0.05, "grad_norm": 0.9335185383783076, "learning_rate": 3.997555878735051e-05, "loss": 0.7551, "step": 413 }, { "epoch": 0.05, "grad_norm": 0.8748966564730668, "learning_rate": 3.997520592315929e-05, "loss": 0.5982, "step": 414 }, { "epoch": 0.05, "grad_norm": 0.8582121177066755, "learning_rate": 3.9974850531574444e-05, "loss": 0.6746, "step": 415 }, { "epoch": 0.05, "grad_norm": 0.9448169323544137, "learning_rate": 3.997449261264095e-05, "loss": 0.6105, "step": 416 }, { "epoch": 0.05, "grad_norm": 0.8411493488313102, "learning_rate": 3.997413216640409e-05, "loss": 0.5723, "step": 417 }, { "epoch": 0.05, "grad_norm": 0.7229822609098383, "learning_rate": 3.997376919290946e-05, "loss": 0.4986, "step": 418 }, { "epoch": 0.05, "grad_norm": 0.8178434361578241, "learning_rate": 3.9973403692202996e-05, "loss": 0.5869, "step": 419 }, { "epoch": 0.05, "grad_norm": 0.8050939774005232, "learning_rate": 3.997303566433094e-05, "loss": 0.5368, "step": 420 }, { "epoch": 0.05, "grad_norm": 0.8669187112031361, "learning_rate": 3.997266510933986e-05, "loss": 0.5691, "step": 421 }, { "epoch": 0.05, "grad_norm": 0.7878758033338007, "learning_rate": 3.997229202727663e-05, "loss": 0.5659, "step": 422 }, { "epoch": 0.05, "grad_norm": 0.7547933863564875, "learning_rate": 3.9971916418188473e-05, "loss": 0.5434, "step": 423 }, { "epoch": 0.05, "grad_norm": 0.9293083251145102, "learning_rate": 3.99715382821229e-05, "loss": 0.6748, "step": 424 }, { "epoch": 0.05, "grad_norm": 0.8222121993805745, "learning_rate": 3.9971157619127766e-05, "loss": 0.5531, "step": 425 }, { "epoch": 0.05, "grad_norm": 0.9217750689121502, "learning_rate": 3.997077442925122e-05, "loss": 0.6405, "step": 426 }, { "epoch": 0.05, "grad_norm": 0.9138232186932482, "learning_rate": 3.9970388712541754e-05, "loss": 0.7075, "step": 427 }, { "epoch": 0.05, "grad_norm": 0.7052407116878368, "learning_rate": 3.997000046904817e-05, "loss": 0.516, "step": 428 }, { "epoch": 0.05, "grad_norm": 0.7685122254019083, "learning_rate": 3.99696096988196e-05, "loss": 0.6095, "step": 429 }, { "epoch": 0.05, "grad_norm": 0.850810077273434, "learning_rate": 3.996921640190547e-05, "loss": 0.6907, "step": 430 }, { "epoch": 0.05, "grad_norm": 0.8651856006110127, "learning_rate": 3.996882057835556e-05, "loss": 0.5951, "step": 431 }, { "epoch": 0.05, "grad_norm": 0.672683761335356, "learning_rate": 3.996842222821994e-05, "loss": 0.5343, "step": 432 }, { "epoch": 0.05, "grad_norm": 0.847871673914895, "learning_rate": 3.996802135154901e-05, "loss": 0.6006, "step": 433 }, { "epoch": 0.05, "grad_norm": 0.986907063906111, "learning_rate": 3.9967617948393504e-05, "loss": 0.5936, "step": 434 }, { "epoch": 0.05, "grad_norm": 1.0358227005952716, "learning_rate": 3.996721201880445e-05, "loss": 0.6529, "step": 435 }, { "epoch": 0.05, "grad_norm": 0.8039754661664753, "learning_rate": 3.996680356283322e-05, "loss": 0.5826, "step": 436 }, { "epoch": 0.05, "grad_norm": 0.8768839309683483, "learning_rate": 3.996639258053149e-05, "loss": 0.6562, "step": 437 }, { "epoch": 0.05, "grad_norm": 0.7615953785682957, "learning_rate": 3.996597907195126e-05, "loss": 0.6211, "step": 438 }, { "epoch": 0.05, "grad_norm": 0.9001598931377726, "learning_rate": 3.996556303714485e-05, "loss": 0.7248, "step": 439 }, { "epoch": 0.05, "grad_norm": 0.7273502386440862, "learning_rate": 3.996514447616489e-05, "loss": 0.5431, "step": 440 }, { "epoch": 0.05, "grad_norm": 0.8661286359048096, "learning_rate": 3.996472338906436e-05, "loss": 0.5844, "step": 441 }, { "epoch": 0.05, "grad_norm": 0.6760555879151913, "learning_rate": 3.996429977589653e-05, "loss": 0.6102, "step": 442 }, { "epoch": 0.05, "grad_norm": 0.8770845990085002, "learning_rate": 3.996387363671498e-05, "loss": 0.6307, "step": 443 }, { "epoch": 0.05, "grad_norm": 0.805713739886529, "learning_rate": 3.9963444971573656e-05, "loss": 0.5948, "step": 444 }, { "epoch": 0.05, "grad_norm": 0.810674156398536, "learning_rate": 3.996301378052678e-05, "loss": 0.6342, "step": 445 }, { "epoch": 0.05, "grad_norm": 0.8558961331975985, "learning_rate": 3.996258006362891e-05, "loss": 0.5744, "step": 446 }, { "epoch": 0.05, "grad_norm": 0.8517324995387747, "learning_rate": 3.9962143820934926e-05, "loss": 0.6575, "step": 447 }, { "epoch": 0.05, "grad_norm": 0.768192617980181, "learning_rate": 3.996170505250002e-05, "loss": 0.5407, "step": 448 }, { "epoch": 0.05, "grad_norm": 0.8158950300205333, "learning_rate": 3.996126375837971e-05, "loss": 0.5655, "step": 449 }, { "epoch": 0.05, "grad_norm": 0.9028928332901293, "learning_rate": 3.9960819938629834e-05, "loss": 0.6456, "step": 450 }, { "epoch": 0.05, "grad_norm": 0.8508846840103531, "learning_rate": 3.996037359330654e-05, "loss": 0.5795, "step": 451 }, { "epoch": 0.05, "grad_norm": 0.8245554231877659, "learning_rate": 3.995992472246632e-05, "loss": 0.5643, "step": 452 }, { "epoch": 0.05, "grad_norm": 0.8606073733140465, "learning_rate": 3.995947332616594e-05, "loss": 0.646, "step": 453 }, { "epoch": 0.05, "grad_norm": 0.7217634304740731, "learning_rate": 3.995901940446254e-05, "loss": 0.4658, "step": 454 }, { "epoch": 0.05, "grad_norm": 0.8505958880642853, "learning_rate": 3.995856295741354e-05, "loss": 0.6487, "step": 455 }, { "epoch": 0.05, "grad_norm": 0.8039634558191647, "learning_rate": 3.995810398507669e-05, "loss": 0.5096, "step": 456 }, { "epoch": 0.05, "grad_norm": 0.8961179894625264, "learning_rate": 3.995764248751007e-05, "loss": 0.5975, "step": 457 }, { "epoch": 0.05, "grad_norm": 0.8722731470790754, "learning_rate": 3.995717846477207e-05, "loss": 0.5573, "step": 458 }, { "epoch": 0.05, "grad_norm": 0.9058792408175009, "learning_rate": 3.9956711916921405e-05, "loss": 0.5735, "step": 459 }, { "epoch": 0.05, "grad_norm": 0.8868769701988912, "learning_rate": 3.9956242844017094e-05, "loss": 0.5576, "step": 460 }, { "epoch": 0.05, "grad_norm": 0.8908005591579613, "learning_rate": 3.9955771246118496e-05, "loss": 0.5409, "step": 461 }, { "epoch": 0.05, "grad_norm": 0.7559804499056149, "learning_rate": 3.995529712328528e-05, "loss": 0.5257, "step": 462 }, { "epoch": 0.05, "grad_norm": 0.7768366707686001, "learning_rate": 3.995482047557743e-05, "loss": 0.4991, "step": 463 }, { "epoch": 0.05, "grad_norm": 0.8049373053784405, "learning_rate": 3.995434130305526e-05, "loss": 0.5321, "step": 464 }, { "epoch": 0.05, "grad_norm": 1.0609134721914388, "learning_rate": 3.99538596057794e-05, "loss": 0.5969, "step": 465 }, { "epoch": 0.05, "grad_norm": 0.8402193870608702, "learning_rate": 3.995337538381079e-05, "loss": 0.4918, "step": 466 }, { "epoch": 0.05, "grad_norm": 0.8196973401437764, "learning_rate": 3.99528886372107e-05, "loss": 0.5562, "step": 467 }, { "epoch": 0.05, "grad_norm": 0.831186214667182, "learning_rate": 3.995239936604072e-05, "loss": 0.5559, "step": 468 }, { "epoch": 0.05, "grad_norm": 1.0103769720611149, "learning_rate": 3.9951907570362746e-05, "loss": 0.6291, "step": 469 }, { "epoch": 0.05, "grad_norm": 0.8113482454878655, "learning_rate": 3.995141325023902e-05, "loss": 0.5709, "step": 470 }, { "epoch": 0.05, "grad_norm": 0.7842536786850128, "learning_rate": 3.995091640573207e-05, "loss": 0.5358, "step": 471 }, { "epoch": 0.05, "grad_norm": 0.9247829672368507, "learning_rate": 3.995041703690477e-05, "loss": 0.5103, "step": 472 }, { "epoch": 0.05, "grad_norm": 0.8645849266190766, "learning_rate": 3.9949915143820295e-05, "loss": 0.5109, "step": 473 }, { "epoch": 0.05, "grad_norm": 0.7540053070296453, "learning_rate": 3.994941072654215e-05, "loss": 0.5065, "step": 474 }, { "epoch": 0.05, "grad_norm": 1.112167229335038, "learning_rate": 3.994890378513417e-05, "loss": 0.6715, "step": 475 }, { "epoch": 0.05, "grad_norm": 0.8723396321123827, "learning_rate": 3.9948394319660485e-05, "loss": 0.5252, "step": 476 }, { "epoch": 0.05, "grad_norm": 1.0691267253068364, "learning_rate": 3.9947882330185554e-05, "loss": 0.6188, "step": 477 }, { "epoch": 0.05, "grad_norm": 0.8231828878637135, "learning_rate": 3.994736781677416e-05, "loss": 0.5632, "step": 478 }, { "epoch": 0.05, "grad_norm": 0.9086502123252735, "learning_rate": 3.9946850779491406e-05, "loss": 0.7469, "step": 479 }, { "epoch": 0.05, "grad_norm": 0.7043895793715881, "learning_rate": 3.994633121840271e-05, "loss": 0.4731, "step": 480 }, { "epoch": 0.05, "grad_norm": 0.7827036113730268, "learning_rate": 3.994580913357381e-05, "loss": 0.6098, "step": 481 }, { "epoch": 0.05, "grad_norm": 0.7770199125112138, "learning_rate": 3.994528452507076e-05, "loss": 0.5309, "step": 482 }, { "epoch": 0.05, "grad_norm": 0.7678923994822564, "learning_rate": 3.994475739295993e-05, "loss": 0.4945, "step": 483 }, { "epoch": 0.05, "grad_norm": 0.6970220402115779, "learning_rate": 3.994422773730803e-05, "loss": 0.4826, "step": 484 }, { "epoch": 0.05, "grad_norm": 0.9646826235021102, "learning_rate": 3.994369555818208e-05, "loss": 0.6833, "step": 485 }, { "epoch": 0.05, "grad_norm": 0.9048176330897213, "learning_rate": 3.99431608556494e-05, "loss": 0.5411, "step": 486 }, { "epoch": 0.05, "grad_norm": 1.011170758041405, "learning_rate": 3.9942623629777645e-05, "loss": 0.6149, "step": 487 }, { "epoch": 0.05, "grad_norm": 0.730341527947133, "learning_rate": 3.99420838806348e-05, "loss": 0.5215, "step": 488 }, { "epoch": 0.05, "grad_norm": 0.7560471885425543, "learning_rate": 3.994154160828915e-05, "loss": 0.5124, "step": 489 }, { "epoch": 0.05, "grad_norm": 0.7801216369321703, "learning_rate": 3.99409968128093e-05, "loss": 0.4579, "step": 490 }, { "epoch": 0.05, "grad_norm": 0.7303572788964564, "learning_rate": 3.994044949426419e-05, "loss": 0.4622, "step": 491 }, { "epoch": 0.05, "grad_norm": 0.9471121437556517, "learning_rate": 3.993989965272308e-05, "loss": 0.669, "step": 492 }, { "epoch": 0.05, "grad_norm": 0.8881361467922317, "learning_rate": 3.993934728825552e-05, "loss": 0.6474, "step": 493 }, { "epoch": 0.05, "grad_norm": 0.9198785273550014, "learning_rate": 3.99387924009314e-05, "loss": 0.5895, "step": 494 }, { "epoch": 0.05, "grad_norm": 0.7581703285069594, "learning_rate": 3.993823499082094e-05, "loss": 0.5553, "step": 495 }, { "epoch": 0.05, "grad_norm": 0.638579872340838, "learning_rate": 3.9937675057994666e-05, "loss": 0.5458, "step": 496 }, { "epoch": 0.05, "grad_norm": 0.8006771112973574, "learning_rate": 3.993711260252342e-05, "loss": 0.5568, "step": 497 }, { "epoch": 0.05, "grad_norm": 0.7897804317884456, "learning_rate": 3.993654762447837e-05, "loss": 0.5905, "step": 498 }, { "epoch": 0.05, "grad_norm": 0.6439291251140656, "learning_rate": 3.9935980123930994e-05, "loss": 0.3884, "step": 499 }, { "epoch": 0.05, "grad_norm": 0.6861162999720982, "learning_rate": 3.9935410100953105e-05, "loss": 0.3851, "step": 500 }, { "epoch": 0.06, "grad_norm": 0.9067914266218037, "learning_rate": 3.9934837555616815e-05, "loss": 0.5636, "step": 501 }, { "epoch": 0.06, "grad_norm": 0.8180201881361133, "learning_rate": 3.993426248799458e-05, "loss": 0.5483, "step": 502 }, { "epoch": 0.06, "grad_norm": 0.8958178600187385, "learning_rate": 3.993368489815915e-05, "loss": 0.598, "step": 503 }, { "epoch": 0.06, "grad_norm": 0.7910592149568487, "learning_rate": 3.993310478618361e-05, "loss": 0.474, "step": 504 }, { "epoch": 0.06, "grad_norm": 0.8537360633393979, "learning_rate": 3.993252215214136e-05, "loss": 0.5338, "step": 505 }, { "epoch": 0.06, "grad_norm": 0.79487838438693, "learning_rate": 3.993193699610612e-05, "loss": 0.615, "step": 506 }, { "epoch": 0.06, "grad_norm": 0.9291019679468188, "learning_rate": 3.993134931815192e-05, "loss": 0.5772, "step": 507 }, { "epoch": 0.06, "grad_norm": 0.8646719247554688, "learning_rate": 3.9930759118353124e-05, "loss": 0.5711, "step": 508 }, { "epoch": 0.06, "grad_norm": 0.9382375359178702, "learning_rate": 3.993016639678441e-05, "loss": 0.6286, "step": 509 }, { "epoch": 0.06, "grad_norm": 1.0784286955852909, "learning_rate": 3.992957115352077e-05, "loss": 0.7173, "step": 510 }, { "epoch": 0.06, "grad_norm": 0.7641437760325074, "learning_rate": 3.9928973388637516e-05, "loss": 0.5712, "step": 511 }, { "epoch": 0.06, "grad_norm": 0.8502802863532842, "learning_rate": 3.992837310221028e-05, "loss": 0.5857, "step": 512 }, { "epoch": 0.06, "grad_norm": 0.774470939327443, "learning_rate": 3.992777029431502e-05, "loss": 0.501, "step": 513 }, { "epoch": 0.06, "grad_norm": 0.6755784231993817, "learning_rate": 3.9927164965028006e-05, "loss": 0.4305, "step": 514 }, { "epoch": 0.06, "grad_norm": 0.8052972501851107, "learning_rate": 3.9926557114425824e-05, "loss": 0.4948, "step": 515 }, { "epoch": 0.06, "grad_norm": 1.005717028573092, "learning_rate": 3.9925946742585385e-05, "loss": 0.5787, "step": 516 }, { "epoch": 0.06, "grad_norm": 1.0117591643638293, "learning_rate": 3.992533384958392e-05, "loss": 0.4764, "step": 517 }, { "epoch": 0.06, "grad_norm": 1.1469064484735854, "learning_rate": 3.9924718435498964e-05, "loss": 0.6591, "step": 518 }, { "epoch": 0.06, "grad_norm": 0.8531709232476266, "learning_rate": 3.9924100500408396e-05, "loss": 0.5067, "step": 519 }, { "epoch": 0.06, "grad_norm": 0.8332974465908206, "learning_rate": 3.9923480044390405e-05, "loss": 0.5137, "step": 520 }, { "epoch": 0.06, "grad_norm": 0.8212320532499854, "learning_rate": 3.992285706752349e-05, "loss": 0.4816, "step": 521 }, { "epoch": 0.06, "grad_norm": 0.7909313392474584, "learning_rate": 3.9922231569886464e-05, "loss": 0.46, "step": 522 }, { "epoch": 0.06, "grad_norm": 0.926367819341693, "learning_rate": 3.992160355155848e-05, "loss": 0.566, "step": 523 }, { "epoch": 0.06, "grad_norm": 0.66833142702048, "learning_rate": 3.9920973012619e-05, "loss": 0.3985, "step": 524 }, { "epoch": 0.06, "grad_norm": 0.8056006944184592, "learning_rate": 3.99203399531478e-05, "loss": 0.4224, "step": 525 }, { "epoch": 0.06, "grad_norm": 0.7928465199341881, "learning_rate": 3.9919704373224984e-05, "loss": 0.5168, "step": 526 }, { "epoch": 0.06, "grad_norm": 0.7350239422849549, "learning_rate": 3.991906627293096e-05, "loss": 0.4549, "step": 527 }, { "epoch": 0.06, "grad_norm": 0.9181886450181526, "learning_rate": 3.991842565234647e-05, "loss": 0.5915, "step": 528 }, { "epoch": 0.06, "grad_norm": 0.7769180742569036, "learning_rate": 3.991778251155257e-05, "loss": 0.4831, "step": 529 }, { "epoch": 0.06, "grad_norm": 0.9433668937936327, "learning_rate": 3.991713685063063e-05, "loss": 0.5375, "step": 530 }, { "epoch": 0.06, "grad_norm": 0.9035488520019831, "learning_rate": 3.991648866966235e-05, "loss": 0.5769, "step": 531 }, { "epoch": 0.06, "grad_norm": 0.6780331966999691, "learning_rate": 3.991583796872974e-05, "loss": 0.422, "step": 532 }, { "epoch": 0.06, "grad_norm": 0.8719606747115424, "learning_rate": 3.991518474791512e-05, "loss": 0.4796, "step": 533 }, { "epoch": 0.06, "grad_norm": 0.9033246014467438, "learning_rate": 3.991452900730116e-05, "loss": 0.6109, "step": 534 }, { "epoch": 0.06, "grad_norm": 0.8063878362955893, "learning_rate": 3.991387074697081e-05, "loss": 0.4728, "step": 535 }, { "epoch": 0.06, "grad_norm": 0.8953218922280521, "learning_rate": 3.991320996700737e-05, "loss": 0.4734, "step": 536 }, { "epoch": 0.06, "grad_norm": 0.9101199687226137, "learning_rate": 3.991254666749445e-05, "loss": 0.4892, "step": 537 }, { "epoch": 0.06, "grad_norm": 0.7384766819552022, "learning_rate": 3.991188084851596e-05, "loss": 0.4767, "step": 538 }, { "epoch": 0.06, "grad_norm": 0.9491709630420719, "learning_rate": 3.991121251015615e-05, "loss": 0.5161, "step": 539 }, { "epoch": 0.06, "grad_norm": 0.909931728191452, "learning_rate": 3.991054165249958e-05, "loss": 0.5798, "step": 540 }, { "epoch": 0.06, "grad_norm": 0.8027158489085024, "learning_rate": 3.9909868275631136e-05, "loss": 0.4564, "step": 541 }, { "epoch": 0.06, "grad_norm": 0.9044503726408262, "learning_rate": 3.990919237963602e-05, "loss": 0.5053, "step": 542 }, { "epoch": 0.06, "grad_norm": 0.8712957857817895, "learning_rate": 3.9908513964599746e-05, "loss": 0.4985, "step": 543 }, { "epoch": 0.06, "grad_norm": 0.9166160844082891, "learning_rate": 3.9907833030608153e-05, "loss": 0.48, "step": 544 }, { "epoch": 0.06, "grad_norm": 0.9066680075715005, "learning_rate": 3.9907149577747394e-05, "loss": 0.6192, "step": 545 }, { "epoch": 0.06, "grad_norm": 0.884837233990768, "learning_rate": 3.990646360610395e-05, "loss": 0.4844, "step": 546 }, { "epoch": 0.06, "grad_norm": 0.9263782105663679, "learning_rate": 3.990577511576461e-05, "loss": 0.5146, "step": 547 }, { "epoch": 0.06, "grad_norm": 0.8135124281066956, "learning_rate": 3.9905084106816494e-05, "loss": 0.5439, "step": 548 }, { "epoch": 0.06, "grad_norm": 0.7885608400326016, "learning_rate": 3.990439057934702e-05, "loss": 0.4937, "step": 549 }, { "epoch": 0.06, "grad_norm": 0.7520273939834736, "learning_rate": 3.990369453344394e-05, "loss": 0.4919, "step": 550 }, { "epoch": 0.06, "grad_norm": 0.728270911466838, "learning_rate": 3.990299596919533e-05, "loss": 0.4685, "step": 551 }, { "epoch": 0.06, "grad_norm": 0.8988349035638267, "learning_rate": 3.9902294886689576e-05, "loss": 0.56, "step": 552 }, { "epoch": 0.06, "grad_norm": 0.8178949973186007, "learning_rate": 3.990159128601538e-05, "loss": 0.4903, "step": 553 }, { "epoch": 0.06, "grad_norm": 1.1128534398506127, "learning_rate": 3.990088516726177e-05, "loss": 0.5388, "step": 554 }, { "epoch": 0.06, "grad_norm": 1.1820542297348275, "learning_rate": 3.990017653051808e-05, "loss": 0.5334, "step": 555 }, { "epoch": 0.06, "grad_norm": 0.8657062698939187, "learning_rate": 3.9899465375873985e-05, "loss": 0.4955, "step": 556 }, { "epoch": 0.06, "grad_norm": 0.9360997232963838, "learning_rate": 3.989875170341945e-05, "loss": 0.6707, "step": 557 }, { "epoch": 0.06, "grad_norm": 0.99819724187575, "learning_rate": 3.989803551324479e-05, "loss": 0.5776, "step": 558 }, { "epoch": 0.06, "grad_norm": 0.8188566822698722, "learning_rate": 3.9897316805440606e-05, "loss": 0.4849, "step": 559 }, { "epoch": 0.06, "grad_norm": 0.8046237957376331, "learning_rate": 3.989659558009784e-05, "loss": 0.5574, "step": 560 }, { "epoch": 0.06, "grad_norm": 0.7514849902371689, "learning_rate": 3.989587183730775e-05, "loss": 0.489, "step": 561 }, { "epoch": 0.06, "grad_norm": 0.8620842968465351, "learning_rate": 3.98951455771619e-05, "loss": 0.5149, "step": 562 }, { "epoch": 0.06, "grad_norm": 0.8285908378479403, "learning_rate": 3.989441679975219e-05, "loss": 0.4171, "step": 563 }, { "epoch": 0.06, "grad_norm": 0.8647104575828756, "learning_rate": 3.989368550517083e-05, "loss": 0.5928, "step": 564 }, { "epoch": 0.06, "grad_norm": 0.7672005615365576, "learning_rate": 3.989295169351033e-05, "loss": 0.3149, "step": 565 }, { "epoch": 0.06, "grad_norm": 0.7527293768787586, "learning_rate": 3.989221536486357e-05, "loss": 0.4782, "step": 566 }, { "epoch": 0.06, "grad_norm": 0.749104243051563, "learning_rate": 3.989147651932368e-05, "loss": 0.3828, "step": 567 }, { "epoch": 0.06, "grad_norm": 1.0380230056477102, "learning_rate": 3.989073515698417e-05, "loss": 0.4976, "step": 568 }, { "epoch": 0.06, "grad_norm": 0.8024857407488464, "learning_rate": 3.988999127793883e-05, "loss": 0.393, "step": 569 }, { "epoch": 0.06, "grad_norm": 1.217959220804909, "learning_rate": 3.988924488228178e-05, "loss": 0.4469, "step": 570 }, { "epoch": 0.06, "grad_norm": 0.7581582214800285, "learning_rate": 3.988849597010746e-05, "loss": 0.3947, "step": 571 }, { "epoch": 0.06, "grad_norm": 0.9045742919402086, "learning_rate": 3.988774454151063e-05, "loss": 0.4732, "step": 572 }, { "epoch": 0.06, "grad_norm": 0.7909181511244426, "learning_rate": 3.9886990596586364e-05, "loss": 0.4606, "step": 573 }, { "epoch": 0.06, "grad_norm": 0.7998771960655621, "learning_rate": 3.988623413543006e-05, "loss": 0.4945, "step": 574 }, { "epoch": 0.06, "grad_norm": 0.7605340988896442, "learning_rate": 3.988547515813742e-05, "loss": 0.4552, "step": 575 }, { "epoch": 0.06, "grad_norm": 0.79865402898783, "learning_rate": 3.9884713664804485e-05, "loss": 0.5731, "step": 576 }, { "epoch": 0.06, "grad_norm": 0.7210486186112235, "learning_rate": 3.9883949655527594e-05, "loss": 0.5238, "step": 577 }, { "epoch": 0.06, "grad_norm": 0.7120687685556387, "learning_rate": 3.9883183130403424e-05, "loss": 0.4322, "step": 578 }, { "epoch": 0.06, "grad_norm": 0.8625105788093526, "learning_rate": 3.9882414089528965e-05, "loss": 0.6855, "step": 579 }, { "epoch": 0.06, "grad_norm": 0.8621647031009402, "learning_rate": 3.98816425330015e-05, "loss": 0.5546, "step": 580 }, { "epoch": 0.06, "grad_norm": 0.6415601743125476, "learning_rate": 3.988086846091867e-05, "loss": 0.3759, "step": 581 }, { "epoch": 0.06, "grad_norm": 0.8710250485275114, "learning_rate": 3.9880091873378416e-05, "loss": 0.6254, "step": 582 }, { "epoch": 0.06, "grad_norm": 1.0054804115834395, "learning_rate": 3.9879312770478986e-05, "loss": 0.5297, "step": 583 }, { "epoch": 0.06, "grad_norm": 0.8756127255741422, "learning_rate": 3.9878531152318966e-05, "loss": 0.4065, "step": 584 }, { "epoch": 0.06, "grad_norm": 1.03466047974667, "learning_rate": 3.987774701899724e-05, "loss": 0.4764, "step": 585 }, { "epoch": 0.06, "grad_norm": 1.1189225601863473, "learning_rate": 3.987696037061304e-05, "loss": 0.5426, "step": 586 }, { "epoch": 0.06, "grad_norm": 0.8213950870039111, "learning_rate": 3.987617120726588e-05, "loss": 0.4189, "step": 587 }, { "epoch": 0.06, "grad_norm": 0.7069288991115357, "learning_rate": 3.9875379529055624e-05, "loss": 0.3873, "step": 588 }, { "epoch": 0.06, "grad_norm": 0.8552682720062313, "learning_rate": 3.987458533608243e-05, "loss": 0.4539, "step": 589 }, { "epoch": 0.06, "grad_norm": 0.8658534605778868, "learning_rate": 3.987378862844679e-05, "loss": 0.5051, "step": 590 }, { "epoch": 0.06, "grad_norm": 0.7952194219356961, "learning_rate": 3.987298940624951e-05, "loss": 0.505, "step": 591 }, { "epoch": 0.07, "grad_norm": 0.9537159072962105, "learning_rate": 3.987218766959171e-05, "loss": 0.606, "step": 592 }, { "epoch": 0.07, "grad_norm": 0.7129695064850823, "learning_rate": 3.987138341857483e-05, "loss": 0.4469, "step": 593 }, { "epoch": 0.07, "grad_norm": 0.8865645354351441, "learning_rate": 3.987057665330063e-05, "loss": 0.5366, "step": 594 }, { "epoch": 0.07, "grad_norm": 0.7025878549939398, "learning_rate": 3.986976737387118e-05, "loss": 0.4002, "step": 595 }, { "epoch": 0.07, "grad_norm": 0.8067180605613338, "learning_rate": 3.986895558038889e-05, "loss": 0.5418, "step": 596 }, { "epoch": 0.07, "grad_norm": 1.0673460679216866, "learning_rate": 3.9868141272956466e-05, "loss": 0.5032, "step": 597 }, { "epoch": 0.07, "grad_norm": 1.1270499096966131, "learning_rate": 3.986732445167694e-05, "loss": 0.4783, "step": 598 }, { "epoch": 0.07, "grad_norm": 0.9116535232080022, "learning_rate": 3.986650511665366e-05, "loss": 0.4888, "step": 599 }, { "epoch": 0.07, "grad_norm": 0.8390148952057622, "learning_rate": 3.9865683267990295e-05, "loss": 0.431, "step": 600 }, { "epoch": 0.07, "grad_norm": 0.9276603910158733, "learning_rate": 3.986485890579083e-05, "loss": 0.5032, "step": 601 }, { "epoch": 0.07, "grad_norm": 0.8495850307440783, "learning_rate": 3.986403203015957e-05, "loss": 0.4827, "step": 602 }, { "epoch": 0.07, "grad_norm": 1.045770888471718, "learning_rate": 3.986320264120113e-05, "loss": 0.6028, "step": 603 }, { "epoch": 0.07, "grad_norm": 0.8439683631621782, "learning_rate": 3.9862370739020455e-05, "loss": 0.4724, "step": 604 }, { "epoch": 0.07, "grad_norm": 0.741322505228919, "learning_rate": 3.986153632372281e-05, "loss": 0.4573, "step": 605 }, { "epoch": 0.07, "grad_norm": 1.0254748500565236, "learning_rate": 3.9860699395413764e-05, "loss": 0.5496, "step": 606 }, { "epoch": 0.07, "grad_norm": 0.7416375783152273, "learning_rate": 3.9859859954199206e-05, "loss": 0.374, "step": 607 }, { "epoch": 0.07, "grad_norm": 0.798779884877756, "learning_rate": 3.985901800018535e-05, "loss": 0.4794, "step": 608 }, { "epoch": 0.07, "grad_norm": 0.9340540816437252, "learning_rate": 3.985817353347874e-05, "loss": 0.5524, "step": 609 }, { "epoch": 0.07, "grad_norm": 0.8234748918704481, "learning_rate": 3.98573265541862e-05, "loss": 0.4616, "step": 610 }, { "epoch": 0.07, "grad_norm": 0.7236925272942816, "learning_rate": 3.9856477062414914e-05, "loss": 0.361, "step": 611 }, { "epoch": 0.07, "grad_norm": 0.8200101022563254, "learning_rate": 3.985562505827235e-05, "loss": 0.4695, "step": 612 }, { "epoch": 0.07, "grad_norm": 0.7971260572895745, "learning_rate": 3.985477054186632e-05, "loss": 0.3912, "step": 613 }, { "epoch": 0.07, "grad_norm": 0.7372485815262885, "learning_rate": 3.985391351330494e-05, "loss": 0.4484, "step": 614 }, { "epoch": 0.07, "grad_norm": 0.9282647795051534, "learning_rate": 3.985305397269665e-05, "loss": 0.5281, "step": 615 }, { "epoch": 0.07, "grad_norm": 0.6776210459298431, "learning_rate": 3.985219192015019e-05, "loss": 0.3881, "step": 616 }, { "epoch": 0.07, "grad_norm": 0.902390945323282, "learning_rate": 3.985132735577466e-05, "loss": 0.586, "step": 617 }, { "epoch": 0.07, "grad_norm": 0.7948999800232522, "learning_rate": 3.985046027967943e-05, "loss": 0.4314, "step": 618 }, { "epoch": 0.07, "grad_norm": 0.7553545229375843, "learning_rate": 3.9849590691974206e-05, "loss": 0.4977, "step": 619 }, { "epoch": 0.07, "grad_norm": 0.8939691968364601, "learning_rate": 3.984871859276902e-05, "loss": 0.5062, "step": 620 }, { "epoch": 0.07, "grad_norm": 0.8110558440011825, "learning_rate": 3.984784398217422e-05, "loss": 0.5306, "step": 621 }, { "epoch": 0.07, "grad_norm": 0.8278026313236698, "learning_rate": 3.984696686030046e-05, "loss": 0.4389, "step": 622 }, { "epoch": 0.07, "grad_norm": 0.7135830795262094, "learning_rate": 3.984608722725873e-05, "loss": 0.3995, "step": 623 }, { "epoch": 0.07, "grad_norm": 0.7439518886925478, "learning_rate": 3.9845205083160315e-05, "loss": 0.4798, "step": 624 }, { "epoch": 0.07, "grad_norm": 0.8649653927269196, "learning_rate": 3.9844320428116834e-05, "loss": 0.5324, "step": 625 }, { "epoch": 0.07, "grad_norm": 0.8203098842034187, "learning_rate": 3.984343326224022e-05, "loss": 0.4146, "step": 626 }, { "epoch": 0.07, "grad_norm": 1.131329216475481, "learning_rate": 3.984254358564272e-05, "loss": 0.5946, "step": 627 }, { "epoch": 0.07, "grad_norm": 0.8553907803054434, "learning_rate": 3.9841651398436907e-05, "loss": 0.4519, "step": 628 }, { "epoch": 0.07, "grad_norm": 0.6543785613581563, "learning_rate": 3.984075670073566e-05, "loss": 0.3514, "step": 629 }, { "epoch": 0.07, "grad_norm": 0.8992314023769143, "learning_rate": 3.983985949265219e-05, "loss": 0.55, "step": 630 }, { "epoch": 0.07, "grad_norm": 0.7436674921768419, "learning_rate": 3.9838959774300014e-05, "loss": 0.4009, "step": 631 }, { "epoch": 0.07, "grad_norm": 0.7493322499276991, "learning_rate": 3.983805754579297e-05, "loss": 0.3852, "step": 632 }, { "epoch": 0.07, "grad_norm": 0.8291136759849222, "learning_rate": 3.983715280724521e-05, "loss": 0.4515, "step": 633 }, { "epoch": 0.07, "grad_norm": 0.7341967427230568, "learning_rate": 3.98362455587712e-05, "loss": 0.4011, "step": 634 }, { "epoch": 0.07, "grad_norm": 0.7381493586850031, "learning_rate": 3.983533580048576e-05, "loss": 0.4676, "step": 635 }, { "epoch": 0.07, "grad_norm": 0.7715101406768454, "learning_rate": 3.9834423532503975e-05, "loss": 0.4699, "step": 636 }, { "epoch": 0.07, "grad_norm": 0.718094900378569, "learning_rate": 3.9833508754941275e-05, "loss": 0.4098, "step": 637 }, { "epoch": 0.07, "grad_norm": 0.8582567048048435, "learning_rate": 3.9832591467913405e-05, "loss": 0.4472, "step": 638 }, { "epoch": 0.07, "grad_norm": 0.9982396367024982, "learning_rate": 3.983167167153642e-05, "loss": 0.4947, "step": 639 }, { "epoch": 0.07, "grad_norm": 0.8140285126076521, "learning_rate": 3.9830749365926716e-05, "loss": 0.3669, "step": 640 }, { "epoch": 0.07, "grad_norm": 0.8844944957434661, "learning_rate": 3.982982455120097e-05, "loss": 0.4538, "step": 641 }, { "epoch": 0.07, "grad_norm": 0.8430719123341497, "learning_rate": 3.982889722747621e-05, "loss": 0.3993, "step": 642 }, { "epoch": 0.07, "grad_norm": 0.8022406700354929, "learning_rate": 3.982796739486976e-05, "loss": 0.4895, "step": 643 }, { "epoch": 0.07, "grad_norm": 0.6767667959698668, "learning_rate": 3.9827035053499264e-05, "loss": 0.3653, "step": 644 }, { "epoch": 0.07, "grad_norm": 0.8436771997198442, "learning_rate": 3.98261002034827e-05, "loss": 0.4349, "step": 645 }, { "epoch": 0.07, "grad_norm": 0.764853419553518, "learning_rate": 3.982516284493834e-05, "loss": 0.3761, "step": 646 }, { "epoch": 0.07, "grad_norm": 0.6259443301562875, "learning_rate": 3.982422297798479e-05, "loss": 0.3636, "step": 647 }, { "epoch": 0.07, "grad_norm": 0.9050084850798686, "learning_rate": 3.982328060274097e-05, "loss": 0.4513, "step": 648 }, { "epoch": 0.07, "grad_norm": 0.727292258368843, "learning_rate": 3.982233571932611e-05, "loss": 0.3982, "step": 649 }, { "epoch": 0.07, "grad_norm": 0.696803989983554, "learning_rate": 3.982138832785976e-05, "loss": 0.4042, "step": 650 }, { "epoch": 0.07, "grad_norm": 0.9918835392123475, "learning_rate": 3.9820438428461804e-05, "loss": 0.4666, "step": 651 }, { "epoch": 0.07, "grad_norm": 0.8799281981826182, "learning_rate": 3.981948602125242e-05, "loss": 0.4934, "step": 652 }, { "epoch": 0.07, "grad_norm": 0.8043596655414714, "learning_rate": 3.9818531106352105e-05, "loss": 0.4225, "step": 653 }, { "epoch": 0.07, "grad_norm": 0.8476339592178188, "learning_rate": 3.98175736838817e-05, "loss": 0.4979, "step": 654 }, { "epoch": 0.07, "grad_norm": 0.7266337041932338, "learning_rate": 3.981661375396233e-05, "loss": 0.4047, "step": 655 }, { "epoch": 0.07, "grad_norm": 0.8436533554676912, "learning_rate": 3.981565131671546e-05, "loss": 0.5754, "step": 656 }, { "epoch": 0.07, "grad_norm": 0.7476992284844522, "learning_rate": 3.981468637226286e-05, "loss": 0.3768, "step": 657 }, { "epoch": 0.07, "grad_norm": 0.8141812023816225, "learning_rate": 3.981371892072661e-05, "loss": 0.4427, "step": 658 }, { "epoch": 0.07, "grad_norm": 0.8764500183721773, "learning_rate": 3.981274896222915e-05, "loss": 0.4539, "step": 659 }, { "epoch": 0.07, "grad_norm": 0.9053560649277458, "learning_rate": 3.981177649689317e-05, "loss": 0.4496, "step": 660 }, { "epoch": 0.07, "grad_norm": 0.6096858674403417, "learning_rate": 3.9810801524841724e-05, "loss": 0.3538, "step": 661 }, { "epoch": 0.07, "grad_norm": 0.7287829933745966, "learning_rate": 3.980982404619819e-05, "loss": 0.4358, "step": 662 }, { "epoch": 0.07, "grad_norm": 0.7010359353392834, "learning_rate": 3.9808844061086225e-05, "loss": 0.385, "step": 663 }, { "epoch": 0.07, "grad_norm": 0.9392858344403822, "learning_rate": 3.9807861569629815e-05, "loss": 0.528, "step": 664 }, { "epoch": 0.07, "grad_norm": 0.72071478609405, "learning_rate": 3.98068765719533e-05, "loss": 0.3393, "step": 665 }, { "epoch": 0.07, "grad_norm": 0.7639010346535613, "learning_rate": 3.980588906818129e-05, "loss": 0.395, "step": 666 }, { "epoch": 0.07, "grad_norm": 0.9596415208530081, "learning_rate": 3.9804899058438734e-05, "loss": 0.4851, "step": 667 }, { "epoch": 0.07, "grad_norm": 0.8549654799737334, "learning_rate": 3.980390654285088e-05, "loss": 0.4556, "step": 668 }, { "epoch": 0.07, "grad_norm": 0.7438391527869523, "learning_rate": 3.980291152154334e-05, "loss": 0.4144, "step": 669 }, { "epoch": 0.07, "grad_norm": 0.8095402361134165, "learning_rate": 3.980191399464198e-05, "loss": 0.3902, "step": 670 }, { "epoch": 0.07, "grad_norm": 0.7470857905361475, "learning_rate": 3.9800913962273036e-05, "loss": 0.3759, "step": 671 }, { "epoch": 0.07, "grad_norm": 0.7369768032579752, "learning_rate": 3.979991142456302e-05, "loss": 0.3798, "step": 672 }, { "epoch": 0.07, "grad_norm": 0.759339396894067, "learning_rate": 3.97989063816388e-05, "loss": 0.395, "step": 673 }, { "epoch": 0.07, "grad_norm": 0.8302124632792892, "learning_rate": 3.9797898833627514e-05, "loss": 0.4224, "step": 674 }, { "epoch": 0.07, "grad_norm": 0.7278666533211542, "learning_rate": 3.979688878065666e-05, "loss": 0.3764, "step": 675 }, { "epoch": 0.07, "grad_norm": 0.7908713880359691, "learning_rate": 3.979587622285404e-05, "loss": 0.3578, "step": 676 }, { "epoch": 0.07, "grad_norm": 0.8879192812550103, "learning_rate": 3.979486116034776e-05, "loss": 0.5345, "step": 677 }, { "epoch": 0.07, "grad_norm": 0.7621693769852078, "learning_rate": 3.979384359326626e-05, "loss": 0.4503, "step": 678 }, { "epoch": 0.07, "grad_norm": 0.7641127306569218, "learning_rate": 3.9792823521738285e-05, "loss": 0.3742, "step": 679 }, { "epoch": 0.07, "grad_norm": 0.9125055926948455, "learning_rate": 3.97918009458929e-05, "loss": 0.4496, "step": 680 }, { "epoch": 0.07, "grad_norm": 0.7408004649465679, "learning_rate": 3.979077586585949e-05, "loss": 0.3832, "step": 681 }, { "epoch": 0.07, "grad_norm": 0.6839487298536606, "learning_rate": 3.9789748281767754e-05, "loss": 0.3796, "step": 682 }, { "epoch": 0.08, "grad_norm": 0.915190823569343, "learning_rate": 3.978871819374771e-05, "loss": 0.3978, "step": 683 }, { "epoch": 0.08, "grad_norm": 0.8545008021329147, "learning_rate": 3.978768560192969e-05, "loss": 0.4422, "step": 684 }, { "epoch": 0.08, "grad_norm": 0.8117363348663309, "learning_rate": 3.978665050644435e-05, "loss": 0.3714, "step": 685 }, { "epoch": 0.08, "grad_norm": 0.9090595432310778, "learning_rate": 3.978561290742265e-05, "loss": 0.4485, "step": 686 }, { "epoch": 0.08, "grad_norm": 0.7339571311811625, "learning_rate": 3.978457280499587e-05, "loss": 0.3612, "step": 687 }, { "epoch": 0.08, "grad_norm": 0.7724578510131132, "learning_rate": 3.978353019929562e-05, "loss": 0.4277, "step": 688 }, { "epoch": 0.08, "grad_norm": 0.8283611896469473, "learning_rate": 3.978248509045382e-05, "loss": 0.4569, "step": 689 }, { "epoch": 0.08, "grad_norm": 0.7955528210954658, "learning_rate": 3.978143747860269e-05, "loss": 0.3966, "step": 690 }, { "epoch": 0.08, "grad_norm": 0.7690746676254782, "learning_rate": 3.978038736387479e-05, "loss": 0.4785, "step": 691 }, { "epoch": 0.08, "grad_norm": 0.7890240877074117, "learning_rate": 3.977933474640298e-05, "loss": 0.4321, "step": 692 }, { "epoch": 0.08, "grad_norm": 0.9459794899247427, "learning_rate": 3.977827962632046e-05, "loss": 0.5031, "step": 693 }, { "epoch": 0.08, "grad_norm": 0.793011969530129, "learning_rate": 3.9777222003760714e-05, "loss": 0.3606, "step": 694 }, { "epoch": 0.08, "grad_norm": 0.823145452316263, "learning_rate": 3.977616187885757e-05, "loss": 0.4653, "step": 695 }, { "epoch": 0.08, "grad_norm": 0.7724615933768747, "learning_rate": 3.977509925174515e-05, "loss": 0.396, "step": 696 }, { "epoch": 0.08, "grad_norm": 0.9428965062846085, "learning_rate": 3.9774034122557924e-05, "loss": 0.534, "step": 697 }, { "epoch": 0.08, "grad_norm": 0.6913907324132594, "learning_rate": 3.977296649143064e-05, "loss": 0.3577, "step": 698 }, { "epoch": 0.08, "grad_norm": 0.8110297913118387, "learning_rate": 3.977189635849839e-05, "loss": 0.4093, "step": 699 }, { "epoch": 0.08, "grad_norm": 0.6609588349057541, "learning_rate": 3.9770823723896574e-05, "loss": 0.3489, "step": 700 }, { "epoch": 0.08, "grad_norm": 0.6835736081016998, "learning_rate": 3.976974858776091e-05, "loss": 0.3421, "step": 701 }, { "epoch": 0.08, "grad_norm": 0.9106013310798687, "learning_rate": 3.976867095022742e-05, "loss": 0.5828, "step": 702 }, { "epoch": 0.08, "grad_norm": 0.9108473482883123, "learning_rate": 3.976759081143247e-05, "loss": 0.4331, "step": 703 }, { "epoch": 0.08, "grad_norm": 0.889465413701839, "learning_rate": 3.9766508171512715e-05, "loss": 0.4431, "step": 704 }, { "epoch": 0.08, "grad_norm": 0.7614113211741779, "learning_rate": 3.976542303060515e-05, "loss": 0.335, "step": 705 }, { "epoch": 0.08, "grad_norm": 0.7986397369524915, "learning_rate": 3.976433538884706e-05, "loss": 0.4339, "step": 706 }, { "epoch": 0.08, "grad_norm": 0.8725885814088392, "learning_rate": 3.976324524637606e-05, "loss": 0.444, "step": 707 }, { "epoch": 0.08, "grad_norm": 0.8176339514527683, "learning_rate": 3.97621526033301e-05, "loss": 0.4381, "step": 708 }, { "epoch": 0.08, "grad_norm": 0.7352478845871343, "learning_rate": 3.976105745984742e-05, "loss": 0.4521, "step": 709 }, { "epoch": 0.08, "grad_norm": 0.7244324172454369, "learning_rate": 3.9759959816066575e-05, "loss": 0.3966, "step": 710 }, { "epoch": 0.08, "grad_norm": 0.8091998201591605, "learning_rate": 3.9758859672126455e-05, "loss": 0.4023, "step": 711 }, { "epoch": 0.08, "grad_norm": 0.772083600231714, "learning_rate": 3.975775702816625e-05, "loss": 0.4155, "step": 712 }, { "epoch": 0.08, "grad_norm": 0.7369778546612566, "learning_rate": 3.975665188432548e-05, "loss": 0.3481, "step": 713 }, { "epoch": 0.08, "grad_norm": 0.9478752769963814, "learning_rate": 3.975554424074397e-05, "loss": 0.5124, "step": 714 }, { "epoch": 0.08, "grad_norm": 0.9533006474976025, "learning_rate": 3.975443409756188e-05, "loss": 0.5248, "step": 715 }, { "epoch": 0.08, "grad_norm": 0.7704006260724757, "learning_rate": 3.975332145491965e-05, "loss": 0.3521, "step": 716 }, { "epoch": 0.08, "grad_norm": 0.8149667853625703, "learning_rate": 3.9752206312958086e-05, "loss": 0.3601, "step": 717 }, { "epoch": 0.08, "grad_norm": 0.8175710737030942, "learning_rate": 3.975108867181826e-05, "loss": 0.4371, "step": 718 }, { "epoch": 0.08, "grad_norm": 0.7899516544155942, "learning_rate": 3.9749968531641594e-05, "loss": 0.4082, "step": 719 }, { "epoch": 0.08, "grad_norm": 0.8570226900130501, "learning_rate": 3.974884589256981e-05, "loss": 0.3944, "step": 720 }, { "epoch": 0.08, "grad_norm": 0.9830655365050049, "learning_rate": 3.974772075474496e-05, "loss": 0.4515, "step": 721 }, { "epoch": 0.08, "grad_norm": 0.8191672186657225, "learning_rate": 3.97465931183094e-05, "loss": 0.4095, "step": 722 }, { "epoch": 0.08, "grad_norm": 0.8742857470165107, "learning_rate": 3.974546298340579e-05, "loss": 0.3991, "step": 723 }, { "epoch": 0.08, "grad_norm": 0.7385873714951968, "learning_rate": 3.9744330350177156e-05, "loss": 0.3548, "step": 724 }, { "epoch": 0.08, "grad_norm": 0.7437597386339665, "learning_rate": 3.974319521876678e-05, "loss": 0.3535, "step": 725 }, { "epoch": 0.08, "grad_norm": 0.9061489048837175, "learning_rate": 3.974205758931828e-05, "loss": 0.437, "step": 726 }, { "epoch": 0.08, "grad_norm": 0.6882122858247355, "learning_rate": 3.974091746197563e-05, "loss": 0.3453, "step": 727 }, { "epoch": 0.08, "grad_norm": 0.7254883404037363, "learning_rate": 3.973977483688305e-05, "loss": 0.3604, "step": 728 }, { "epoch": 0.08, "grad_norm": 0.7676121750284994, "learning_rate": 3.973862971418513e-05, "loss": 0.3581, "step": 729 }, { "epoch": 0.08, "grad_norm": 0.830418128895816, "learning_rate": 3.9737482094026764e-05, "loss": 0.4385, "step": 730 }, { "epoch": 0.08, "grad_norm": 0.6877092996532913, "learning_rate": 3.9736331976553154e-05, "loss": 0.4166, "step": 731 }, { "epoch": 0.08, "grad_norm": 0.8272879346655926, "learning_rate": 3.9735179361909803e-05, "loss": 0.4436, "step": 732 }, { "epoch": 0.08, "grad_norm": 1.0049495396827917, "learning_rate": 3.9734024250242564e-05, "loss": 0.3879, "step": 733 }, { "epoch": 0.08, "grad_norm": 0.7703604321529897, "learning_rate": 3.9732866641697586e-05, "loss": 0.3008, "step": 734 }, { "epoch": 0.08, "grad_norm": 0.8551568982682785, "learning_rate": 3.973170653642133e-05, "loss": 0.3793, "step": 735 }, { "epoch": 0.08, "grad_norm": 0.8550069598979876, "learning_rate": 3.9730543934560595e-05, "loss": 0.398, "step": 736 }, { "epoch": 0.08, "grad_norm": 0.8811450503467172, "learning_rate": 3.972937883626247e-05, "loss": 0.4356, "step": 737 }, { "epoch": 0.08, "grad_norm": 0.6686100028911549, "learning_rate": 3.9728211241674363e-05, "loss": 0.2951, "step": 738 }, { "epoch": 0.08, "grad_norm": 0.7879317084264726, "learning_rate": 3.972704115094403e-05, "loss": 0.4351, "step": 739 }, { "epoch": 0.08, "grad_norm": 0.8452377624754119, "learning_rate": 3.972586856421949e-05, "loss": 0.4758, "step": 740 }, { "epoch": 0.08, "grad_norm": 0.7554675917828586, "learning_rate": 3.9724693481649134e-05, "loss": 0.4134, "step": 741 }, { "epoch": 0.08, "grad_norm": 0.8598670937726027, "learning_rate": 3.9723515903381625e-05, "loss": 0.4508, "step": 742 }, { "epoch": 0.08, "grad_norm": 0.8079319332530365, "learning_rate": 3.9722335829565955e-05, "loss": 0.4001, "step": 743 }, { "epoch": 0.08, "grad_norm": 0.7432211253732091, "learning_rate": 3.9721153260351446e-05, "loss": 0.3633, "step": 744 }, { "epoch": 0.08, "grad_norm": 0.7635748916273694, "learning_rate": 3.971996819588771e-05, "loss": 0.4162, "step": 745 }, { "epoch": 0.08, "grad_norm": 0.9134161204270983, "learning_rate": 3.971878063632471e-05, "loss": 0.3784, "step": 746 }, { "epoch": 0.08, "grad_norm": 0.7186014187696611, "learning_rate": 3.971759058181269e-05, "loss": 0.3729, "step": 747 }, { "epoch": 0.08, "grad_norm": 0.7774807603178935, "learning_rate": 3.971639803250221e-05, "loss": 0.4337, "step": 748 }, { "epoch": 0.08, "grad_norm": 0.7224299308432479, "learning_rate": 3.971520298854419e-05, "loss": 0.3177, "step": 749 }, { "epoch": 0.08, "grad_norm": 0.7735434565552102, "learning_rate": 3.9714005450089815e-05, "loss": 0.4021, "step": 750 }, { "epoch": 0.08, "grad_norm": 0.8583503857248929, "learning_rate": 3.971280541729061e-05, "loss": 0.3301, "step": 751 }, { "epoch": 0.08, "grad_norm": 0.6263724992679699, "learning_rate": 3.971160289029841e-05, "loss": 0.3213, "step": 752 }, { "epoch": 0.08, "grad_norm": 0.726769653650323, "learning_rate": 3.9710397869265364e-05, "loss": 0.3767, "step": 753 }, { "epoch": 0.08, "grad_norm": 0.8664066476365219, "learning_rate": 3.9709190354343936e-05, "loss": 0.3703, "step": 754 }, { "epoch": 0.08, "grad_norm": 0.6975022029500102, "learning_rate": 3.9707980345686924e-05, "loss": 0.3448, "step": 755 }, { "epoch": 0.08, "grad_norm": 0.6369118894875103, "learning_rate": 3.9706767843447417e-05, "loss": 0.3381, "step": 756 }, { "epoch": 0.08, "grad_norm": 0.5829089926032058, "learning_rate": 3.970555284777883e-05, "loss": 0.3056, "step": 757 }, { "epoch": 0.08, "grad_norm": 0.8357335849077621, "learning_rate": 3.970433535883489e-05, "loss": 0.4038, "step": 758 }, { "epoch": 0.08, "grad_norm": 0.9163609486185431, "learning_rate": 3.970311537676964e-05, "loss": 0.4754, "step": 759 }, { "epoch": 0.08, "grad_norm": 0.8544288829787227, "learning_rate": 3.970189290173744e-05, "loss": 0.4247, "step": 760 }, { "epoch": 0.08, "grad_norm": 0.7723366966596901, "learning_rate": 3.970066793389297e-05, "loss": 0.4092, "step": 761 }, { "epoch": 0.08, "grad_norm": 0.8365623064293849, "learning_rate": 3.969944047339122e-05, "loss": 0.3165, "step": 762 }, { "epoch": 0.08, "grad_norm": 0.8268536219064315, "learning_rate": 3.96982105203875e-05, "loss": 0.3276, "step": 763 }, { "epoch": 0.08, "grad_norm": 0.968372148227198, "learning_rate": 3.969697807503742e-05, "loss": 0.3886, "step": 764 }, { "epoch": 0.08, "grad_norm": 0.895316766059658, "learning_rate": 3.969574313749693e-05, "loss": 0.3945, "step": 765 }, { "epoch": 0.08, "grad_norm": 0.7392640208055445, "learning_rate": 3.969450570792227e-05, "loss": 0.3964, "step": 766 }, { "epoch": 0.08, "grad_norm": 0.7908308778878117, "learning_rate": 3.9693265786470016e-05, "loss": 0.3896, "step": 767 }, { "epoch": 0.08, "grad_norm": 0.8566097506183497, "learning_rate": 3.969202337329705e-05, "loss": 0.4392, "step": 768 }, { "epoch": 0.08, "grad_norm": 0.6322339085484171, "learning_rate": 3.969077846856057e-05, "loss": 0.2962, "step": 769 }, { "epoch": 0.08, "grad_norm": 0.8539239181826643, "learning_rate": 3.968953107241809e-05, "loss": 0.4008, "step": 770 }, { "epoch": 0.08, "grad_norm": 0.859982085374054, "learning_rate": 3.9688281185027434e-05, "loss": 0.4318, "step": 771 }, { "epoch": 0.08, "grad_norm": 0.8618796868366281, "learning_rate": 3.9687028806546756e-05, "loss": 0.4331, "step": 772 }, { "epoch": 0.08, "grad_norm": 0.9366000942643986, "learning_rate": 3.96857739371345e-05, "loss": 0.4672, "step": 773 }, { "epoch": 0.08, "grad_norm": 0.7585798881042127, "learning_rate": 3.968451657694946e-05, "loss": 0.3669, "step": 774 }, { "epoch": 0.09, "grad_norm": 0.807687223596606, "learning_rate": 3.9683256726150707e-05, "loss": 0.3918, "step": 775 }, { "epoch": 0.09, "grad_norm": 0.7919770979002362, "learning_rate": 3.9681994384897654e-05, "loss": 0.3846, "step": 776 }, { "epoch": 0.09, "grad_norm": 0.7443223866430635, "learning_rate": 3.9680729553350016e-05, "loss": 0.3592, "step": 777 }, { "epoch": 0.09, "grad_norm": 0.754575758217912, "learning_rate": 3.967946223166784e-05, "loss": 0.356, "step": 778 }, { "epoch": 0.09, "grad_norm": 0.7114377619690837, "learning_rate": 3.967819242001146e-05, "loss": 0.3294, "step": 779 }, { "epoch": 0.09, "grad_norm": 0.7754772916158532, "learning_rate": 3.967692011854155e-05, "loss": 0.3101, "step": 780 }, { "epoch": 0.09, "grad_norm": 1.034931201583225, "learning_rate": 3.967564532741909e-05, "loss": 0.5161, "step": 781 }, { "epoch": 0.09, "grad_norm": 0.7108248527068666, "learning_rate": 3.967436804680537e-05, "loss": 0.2893, "step": 782 }, { "epoch": 0.09, "grad_norm": 0.8280630268443923, "learning_rate": 3.9673088276862e-05, "loss": 0.4229, "step": 783 }, { "epoch": 0.09, "grad_norm": 0.8426574415031907, "learning_rate": 3.9671806017750915e-05, "loss": 0.4099, "step": 784 }, { "epoch": 0.09, "grad_norm": 0.8443906259396801, "learning_rate": 3.967052126963435e-05, "loss": 0.3673, "step": 785 }, { "epoch": 0.09, "grad_norm": 0.7433810625305357, "learning_rate": 3.966923403267485e-05, "loss": 0.3935, "step": 786 }, { "epoch": 0.09, "grad_norm": 0.81018830606576, "learning_rate": 3.966794430703528e-05, "loss": 0.333, "step": 787 }, { "epoch": 0.09, "grad_norm": 0.7762348182164174, "learning_rate": 3.9666652092878856e-05, "loss": 0.3622, "step": 788 }, { "epoch": 0.09, "grad_norm": 0.8275345550767284, "learning_rate": 3.966535739036905e-05, "loss": 0.387, "step": 789 }, { "epoch": 0.09, "grad_norm": 0.7738081689270789, "learning_rate": 3.966406019966968e-05, "loss": 0.3739, "step": 790 }, { "epoch": 0.09, "grad_norm": 0.8311358366277135, "learning_rate": 3.9662760520944886e-05, "loss": 0.3782, "step": 791 }, { "epoch": 0.09, "grad_norm": 0.9774607685148268, "learning_rate": 3.9661458354359105e-05, "loss": 0.3976, "step": 792 }, { "epoch": 0.09, "grad_norm": 0.9264658396641045, "learning_rate": 3.966015370007709e-05, "loss": 0.5481, "step": 793 }, { "epoch": 0.09, "grad_norm": 0.7644249210603022, "learning_rate": 3.9658846558263925e-05, "loss": 0.3557, "step": 794 }, { "epoch": 0.09, "grad_norm": 0.6324907591676214, "learning_rate": 3.965753692908499e-05, "loss": 0.3275, "step": 795 }, { "epoch": 0.09, "grad_norm": 0.765082770650668, "learning_rate": 3.965622481270599e-05, "loss": 0.3816, "step": 796 }, { "epoch": 0.09, "grad_norm": 0.6620841682609669, "learning_rate": 3.9654910209292943e-05, "loss": 0.3236, "step": 797 }, { "epoch": 0.09, "grad_norm": 0.7204440511864142, "learning_rate": 3.9653593119012185e-05, "loss": 0.362, "step": 798 }, { "epoch": 0.09, "grad_norm": 0.7578089212549397, "learning_rate": 3.9652273542030355e-05, "loss": 0.3905, "step": 799 }, { "epoch": 0.09, "grad_norm": 0.7888588217555922, "learning_rate": 3.965095147851442e-05, "loss": 0.4339, "step": 800 }, { "epoch": 0.09, "grad_norm": 0.8138751716853801, "learning_rate": 3.9649626928631655e-05, "loss": 0.4155, "step": 801 }, { "epoch": 0.09, "grad_norm": 0.6165424549384506, "learning_rate": 3.9648299892549654e-05, "loss": 0.2941, "step": 802 }, { "epoch": 0.09, "grad_norm": 0.7059527860475053, "learning_rate": 3.964697037043632e-05, "loss": 0.3268, "step": 803 }, { "epoch": 0.09, "grad_norm": 0.8911116090523694, "learning_rate": 3.964563836245987e-05, "loss": 0.4004, "step": 804 }, { "epoch": 0.09, "grad_norm": 0.8497829917570302, "learning_rate": 3.964430386878883e-05, "loss": 0.4635, "step": 805 }, { "epoch": 0.09, "grad_norm": 0.7549620829780918, "learning_rate": 3.964296688959208e-05, "loss": 0.4025, "step": 806 }, { "epoch": 0.09, "grad_norm": 0.8601342401971424, "learning_rate": 3.9641627425038745e-05, "loss": 0.317, "step": 807 }, { "epoch": 0.09, "grad_norm": 0.9082765282941799, "learning_rate": 3.964028547529832e-05, "loss": 0.446, "step": 808 }, { "epoch": 0.09, "grad_norm": 0.6087626290235267, "learning_rate": 3.9638941040540606e-05, "loss": 0.2865, "step": 809 }, { "epoch": 0.09, "grad_norm": 0.900223921364458, "learning_rate": 3.9637594120935697e-05, "loss": 0.4247, "step": 810 }, { "epoch": 0.09, "grad_norm": 0.7034242840766348, "learning_rate": 3.963624471665402e-05, "loss": 0.3271, "step": 811 }, { "epoch": 0.09, "grad_norm": 0.6772115928261491, "learning_rate": 3.9634892827866306e-05, "loss": 0.3007, "step": 812 }, { "epoch": 0.09, "grad_norm": 0.6664765628334043, "learning_rate": 3.963353845474361e-05, "loss": 0.378, "step": 813 }, { "epoch": 0.09, "grad_norm": 0.782829518999284, "learning_rate": 3.9632181597457296e-05, "loss": 0.3665, "step": 814 }, { "epoch": 0.09, "grad_norm": 0.8306568596601841, "learning_rate": 3.963082225617903e-05, "loss": 0.4402, "step": 815 }, { "epoch": 0.09, "grad_norm": 0.8407522566098939, "learning_rate": 3.9629460431080825e-05, "loss": 0.3815, "step": 816 }, { "epoch": 0.09, "grad_norm": 0.7948156187382217, "learning_rate": 3.962809612233497e-05, "loss": 0.3928, "step": 817 }, { "epoch": 0.09, "grad_norm": 0.7308868191009538, "learning_rate": 3.96267293301141e-05, "loss": 0.3259, "step": 818 }, { "epoch": 0.09, "grad_norm": 0.8296952744746502, "learning_rate": 3.9625360054591144e-05, "loss": 0.3351, "step": 819 }, { "epoch": 0.09, "grad_norm": 0.6594169585270185, "learning_rate": 3.962398829593935e-05, "loss": 0.323, "step": 820 }, { "epoch": 0.09, "grad_norm": 0.7775576503269125, "learning_rate": 3.962261405433229e-05, "loss": 0.3453, "step": 821 }, { "epoch": 0.09, "grad_norm": 0.7960862763028763, "learning_rate": 3.962123732994383e-05, "loss": 0.4011, "step": 822 }, { "epoch": 0.09, "grad_norm": 0.9116213225909428, "learning_rate": 3.961985812294817e-05, "loss": 0.3732, "step": 823 }, { "epoch": 0.09, "grad_norm": 0.7696309291356568, "learning_rate": 3.961847643351981e-05, "loss": 0.333, "step": 824 }, { "epoch": 0.09, "grad_norm": 0.8680071694346081, "learning_rate": 3.961709226183359e-05, "loss": 0.3697, "step": 825 }, { "epoch": 0.09, "grad_norm": 0.9742102407712474, "learning_rate": 3.961570560806461e-05, "loss": 0.4396, "step": 826 }, { "epoch": 0.09, "grad_norm": 0.9699224944464142, "learning_rate": 3.961431647238835e-05, "loss": 0.48, "step": 827 }, { "epoch": 0.09, "grad_norm": 0.826206035223429, "learning_rate": 3.9612924854980556e-05, "loss": 0.3786, "step": 828 }, { "epoch": 0.09, "grad_norm": 0.7994337700661437, "learning_rate": 3.9611530756017306e-05, "loss": 0.3527, "step": 829 }, { "epoch": 0.09, "grad_norm": 0.7616547386465274, "learning_rate": 3.9610134175675e-05, "loss": 0.3833, "step": 830 }, { "epoch": 0.09, "grad_norm": 0.6806056737826948, "learning_rate": 3.960873511413033e-05, "loss": 0.3292, "step": 831 }, { "epoch": 0.09, "grad_norm": 0.6463923547259581, "learning_rate": 3.960733357156033e-05, "loss": 0.3016, "step": 832 }, { "epoch": 0.09, "grad_norm": 0.7202256851673425, "learning_rate": 3.9605929548142314e-05, "loss": 0.3773, "step": 833 }, { "epoch": 0.09, "grad_norm": 0.8216163808537975, "learning_rate": 3.960452304405394e-05, "loss": 0.4243, "step": 834 }, { "epoch": 0.09, "grad_norm": 0.6232743759713391, "learning_rate": 3.960311405947317e-05, "loss": 0.289, "step": 835 }, { "epoch": 0.09, "grad_norm": 0.820963065711702, "learning_rate": 3.960170259457826e-05, "loss": 0.2993, "step": 836 }, { "epoch": 0.09, "grad_norm": 0.7917922740138276, "learning_rate": 3.960028864954782e-05, "loss": 0.3405, "step": 837 }, { "epoch": 0.09, "grad_norm": 0.6907357882191566, "learning_rate": 3.959887222456075e-05, "loss": 0.2672, "step": 838 }, { "epoch": 0.09, "grad_norm": 0.7582841172508993, "learning_rate": 3.9597453319796245e-05, "loss": 0.2889, "step": 839 }, { "epoch": 0.09, "grad_norm": 0.6911531836534004, "learning_rate": 3.959603193543385e-05, "loss": 0.3292, "step": 840 }, { "epoch": 0.09, "grad_norm": 0.816896610747596, "learning_rate": 3.95946080716534e-05, "loss": 0.357, "step": 841 }, { "epoch": 0.09, "grad_norm": 0.8281384592941847, "learning_rate": 3.959318172863506e-05, "loss": 0.3597, "step": 842 }, { "epoch": 0.09, "grad_norm": 0.8258436195690133, "learning_rate": 3.95917529065593e-05, "loss": 0.4532, "step": 843 }, { "epoch": 0.09, "grad_norm": 0.7230337445732571, "learning_rate": 3.95903216056069e-05, "loss": 0.3236, "step": 844 }, { "epoch": 0.09, "grad_norm": 0.7858822630626577, "learning_rate": 3.958888782595895e-05, "loss": 0.3141, "step": 845 }, { "epoch": 0.09, "grad_norm": 0.7437788917635865, "learning_rate": 3.958745156779688e-05, "loss": 0.3863, "step": 846 }, { "epoch": 0.09, "grad_norm": 0.6731286883098178, "learning_rate": 3.9586012831302396e-05, "loss": 0.3209, "step": 847 }, { "epoch": 0.09, "grad_norm": 0.8627273025091535, "learning_rate": 3.9584571616657544e-05, "loss": 0.4197, "step": 848 }, { "epoch": 0.09, "grad_norm": 0.6090186373093468, "learning_rate": 3.958312792404468e-05, "loss": 0.2961, "step": 849 }, { "epoch": 0.09, "grad_norm": 0.8078390829113737, "learning_rate": 3.958168175364646e-05, "loss": 0.3597, "step": 850 }, { "epoch": 0.09, "grad_norm": 0.6844922509629057, "learning_rate": 3.9580233105645874e-05, "loss": 0.2671, "step": 851 }, { "epoch": 0.09, "grad_norm": 0.6578007495401484, "learning_rate": 3.957878198022621e-05, "loss": 0.2385, "step": 852 }, { "epoch": 0.09, "grad_norm": 0.7632581365672044, "learning_rate": 3.957732837757107e-05, "loss": 0.3571, "step": 853 }, { "epoch": 0.09, "grad_norm": 0.7806615520682209, "learning_rate": 3.957587229786437e-05, "loss": 0.3126, "step": 854 }, { "epoch": 0.09, "grad_norm": 0.7183221704702855, "learning_rate": 3.957441374129035e-05, "loss": 0.3251, "step": 855 }, { "epoch": 0.09, "grad_norm": 0.6635868837608979, "learning_rate": 3.9572952708033564e-05, "loss": 0.2768, "step": 856 }, { "epoch": 0.09, "grad_norm": 0.7650985464811946, "learning_rate": 3.9571489198278855e-05, "loss": 0.3336, "step": 857 }, { "epoch": 0.09, "grad_norm": 0.7593707551562814, "learning_rate": 3.9570023212211405e-05, "loss": 0.3748, "step": 858 }, { "epoch": 0.09, "grad_norm": 0.8188017603114055, "learning_rate": 3.95685547500167e-05, "loss": 0.406, "step": 859 }, { "epoch": 0.09, "grad_norm": 0.7126570227413258, "learning_rate": 3.956708381188054e-05, "loss": 0.3344, "step": 860 }, { "epoch": 0.09, "grad_norm": 0.6770653404388245, "learning_rate": 3.956561039798903e-05, "loss": 0.2968, "step": 861 }, { "epoch": 0.09, "grad_norm": 0.8083508997578231, "learning_rate": 3.95641345085286e-05, "loss": 0.366, "step": 862 }, { "epoch": 0.09, "grad_norm": 0.8739611044026313, "learning_rate": 3.956265614368599e-05, "loss": 0.3072, "step": 863 }, { "epoch": 0.09, "grad_norm": 0.8994228101671509, "learning_rate": 3.956117530364826e-05, "loss": 0.3689, "step": 864 }, { "epoch": 0.09, "grad_norm": 0.8499506473238314, "learning_rate": 3.955969198860276e-05, "loss": 0.3983, "step": 865 }, { "epoch": 0.1, "grad_norm": 0.7764093735467574, "learning_rate": 3.955820619873719e-05, "loss": 0.3578, "step": 866 }, { "epoch": 0.1, "grad_norm": 0.7616638946343639, "learning_rate": 3.9556717934239516e-05, "loss": 0.3146, "step": 867 }, { "epoch": 0.1, "grad_norm": 0.7857665162251634, "learning_rate": 3.955522719529807e-05, "loss": 0.396, "step": 868 }, { "epoch": 0.1, "grad_norm": 0.8501695211447731, "learning_rate": 3.955373398210144e-05, "loss": 0.3346, "step": 869 }, { "epoch": 0.1, "grad_norm": 0.7679752943316975, "learning_rate": 3.9552238294838584e-05, "loss": 0.3314, "step": 870 }, { "epoch": 0.1, "grad_norm": 0.6558055962421216, "learning_rate": 3.9550740133698726e-05, "loss": 0.288, "step": 871 }, { "epoch": 0.1, "grad_norm": 0.7313453595836844, "learning_rate": 3.954923949887144e-05, "loss": 0.3122, "step": 872 }, { "epoch": 0.1, "grad_norm": 0.7218615311856481, "learning_rate": 3.954773639054659e-05, "loss": 0.2875, "step": 873 }, { "epoch": 0.1, "grad_norm": 0.739149844147705, "learning_rate": 3.954623080891435e-05, "loss": 0.259, "step": 874 }, { "epoch": 0.1, "grad_norm": 0.7460763340060723, "learning_rate": 3.9544722754165225e-05, "loss": 0.291, "step": 875 }, { "epoch": 0.1, "grad_norm": 0.8088352008732254, "learning_rate": 3.954321222649003e-05, "loss": 0.3429, "step": 876 }, { "epoch": 0.1, "grad_norm": 0.7909876688973838, "learning_rate": 3.954169922607987e-05, "loss": 0.2892, "step": 877 }, { "epoch": 0.1, "grad_norm": 9.877938135268879, "learning_rate": 3.95401837531262e-05, "loss": 0.2588, "step": 878 }, { "epoch": 0.1, "grad_norm": 0.7311048950598087, "learning_rate": 3.953866580782075e-05, "loss": 0.2772, "step": 879 }, { "epoch": 0.1, "grad_norm": 0.8246951421374188, "learning_rate": 3.953714539035558e-05, "loss": 0.3546, "step": 880 }, { "epoch": 0.1, "grad_norm": 0.6775861462584851, "learning_rate": 3.953562250092308e-05, "loss": 0.2771, "step": 881 }, { "epoch": 0.1, "grad_norm": 0.6792306131748974, "learning_rate": 3.9534097139715926e-05, "loss": 0.3082, "step": 882 }, { "epoch": 0.1, "grad_norm": 0.8128290977088691, "learning_rate": 3.9532569306927115e-05, "loss": 0.3386, "step": 883 }, { "epoch": 0.1, "grad_norm": 0.709903510078536, "learning_rate": 3.9531039002749955e-05, "loss": 0.3249, "step": 884 }, { "epoch": 0.1, "grad_norm": 0.7870809015088726, "learning_rate": 3.9529506227378085e-05, "loss": 0.4089, "step": 885 }, { "epoch": 0.1, "grad_norm": 0.7636376102497198, "learning_rate": 3.952797098100543e-05, "loss": 0.3527, "step": 886 }, { "epoch": 0.1, "grad_norm": 0.8253743175045637, "learning_rate": 3.952643326382624e-05, "loss": 0.3307, "step": 887 }, { "epoch": 0.1, "grad_norm": 0.6402923371550692, "learning_rate": 3.952489307603507e-05, "loss": 0.2392, "step": 888 }, { "epoch": 0.1, "grad_norm": 0.764620647680578, "learning_rate": 3.9523350417826816e-05, "loss": 0.3716, "step": 889 }, { "epoch": 0.1, "grad_norm": 1.064539858582937, "learning_rate": 3.9521805289396645e-05, "loss": 0.2258, "step": 890 }, { "epoch": 0.1, "grad_norm": 0.7584962807767207, "learning_rate": 3.952025769094006e-05, "loss": 0.3336, "step": 891 }, { "epoch": 0.1, "grad_norm": 0.9684532695519109, "learning_rate": 3.951870762265288e-05, "loss": 0.3657, "step": 892 }, { "epoch": 0.1, "grad_norm": 0.7111334059203872, "learning_rate": 3.951715508473124e-05, "loss": 0.2955, "step": 893 }, { "epoch": 0.1, "grad_norm": 0.7075510379997415, "learning_rate": 3.9515600077371545e-05, "loss": 0.2746, "step": 894 }, { "epoch": 0.1, "grad_norm": 0.8876017068960823, "learning_rate": 3.9514042600770576e-05, "loss": 0.4418, "step": 895 }, { "epoch": 0.1, "grad_norm": 0.6422516245511461, "learning_rate": 3.951248265512538e-05, "loss": 0.2797, "step": 896 }, { "epoch": 0.1, "grad_norm": 0.7476030033806428, "learning_rate": 3.951092024063333e-05, "loss": 0.3639, "step": 897 }, { "epoch": 0.1, "grad_norm": 0.8450392304291887, "learning_rate": 3.950935535749213e-05, "loss": 0.347, "step": 898 }, { "epoch": 0.1, "grad_norm": 0.7068815547055709, "learning_rate": 3.9507788005899756e-05, "loss": 0.326, "step": 899 }, { "epoch": 0.1, "grad_norm": 0.8299403536288089, "learning_rate": 3.950621818605453e-05, "loss": 0.2721, "step": 900 }, { "epoch": 0.1, "grad_norm": 0.6629675870512527, "learning_rate": 3.950464589815508e-05, "loss": 0.2502, "step": 901 }, { "epoch": 0.1, "grad_norm": 0.7484625714588448, "learning_rate": 3.950307114240034e-05, "loss": 0.314, "step": 902 }, { "epoch": 0.1, "grad_norm": 0.8145970645017154, "learning_rate": 3.950149391898955e-05, "loss": 0.3533, "step": 903 }, { "epoch": 0.1, "grad_norm": 0.8147193235818403, "learning_rate": 3.9499914228122286e-05, "loss": 0.3479, "step": 904 }, { "epoch": 0.1, "grad_norm": 0.8505544733300423, "learning_rate": 3.9498332069998405e-05, "loss": 0.3788, "step": 905 }, { "epoch": 0.1, "grad_norm": 0.7673949588504672, "learning_rate": 3.9496747444818105e-05, "loss": 0.2771, "step": 906 }, { "epoch": 0.1, "grad_norm": 0.7690665686483074, "learning_rate": 3.9495160352781875e-05, "loss": 0.2982, "step": 907 }, { "epoch": 0.1, "grad_norm": 0.7372941376392371, "learning_rate": 3.9493570794090524e-05, "loss": 0.2931, "step": 908 }, { "epoch": 0.1, "grad_norm": 0.8846684080359255, "learning_rate": 3.9491978768945184e-05, "loss": 0.3135, "step": 909 }, { "epoch": 0.1, "grad_norm": 0.7435395624272256, "learning_rate": 3.9490384277547266e-05, "loss": 0.3314, "step": 910 }, { "epoch": 0.1, "grad_norm": 0.6994417357116672, "learning_rate": 3.948878732009854e-05, "loss": 0.3881, "step": 911 }, { "epoch": 0.1, "grad_norm": 0.6923609462454279, "learning_rate": 3.9487187896801054e-05, "loss": 0.288, "step": 912 }, { "epoch": 0.1, "grad_norm": 0.7272337407261249, "learning_rate": 3.9485586007857174e-05, "loss": 0.3146, "step": 913 }, { "epoch": 0.1, "grad_norm": 0.8734351197348492, "learning_rate": 3.9483981653469586e-05, "loss": 0.4029, "step": 914 }, { "epoch": 0.1, "grad_norm": 0.8283377320181136, "learning_rate": 3.948237483384128e-05, "loss": 0.3308, "step": 915 }, { "epoch": 0.1, "grad_norm": 0.5984747274985789, "learning_rate": 3.948076554917556e-05, "loss": 0.249, "step": 916 }, { "epoch": 0.1, "grad_norm": 0.8565072849518371, "learning_rate": 3.947915379967605e-05, "loss": 0.321, "step": 917 }, { "epoch": 0.1, "grad_norm": 0.814407530315674, "learning_rate": 3.9477539585546676e-05, "loss": 0.327, "step": 918 }, { "epoch": 0.1, "grad_norm": 0.9619628893857703, "learning_rate": 3.947592290699168e-05, "loss": 0.4363, "step": 919 }, { "epoch": 0.1, "grad_norm": 0.6454757705561542, "learning_rate": 3.9474303764215606e-05, "loss": 0.2605, "step": 920 }, { "epoch": 0.1, "grad_norm": 0.9926341095455815, "learning_rate": 3.947268215742333e-05, "loss": 0.3469, "step": 921 }, { "epoch": 0.1, "grad_norm": 0.7241534009171295, "learning_rate": 3.9471058086820024e-05, "loss": 0.3321, "step": 922 }, { "epoch": 0.1, "grad_norm": 0.7240218996824527, "learning_rate": 3.946943155261117e-05, "loss": 0.2985, "step": 923 }, { "epoch": 0.1, "grad_norm": 0.7046570934155437, "learning_rate": 3.9467802555002584e-05, "loss": 0.2776, "step": 924 }, { "epoch": 0.1, "grad_norm": 0.7198740007168696, "learning_rate": 3.9466171094200356e-05, "loss": 0.2917, "step": 925 }, { "epoch": 0.1, "grad_norm": 0.7669508605967125, "learning_rate": 3.946453717041093e-05, "loss": 0.3446, "step": 926 }, { "epoch": 0.1, "grad_norm": 0.907258327819075, "learning_rate": 3.946290078384103e-05, "loss": 0.3825, "step": 927 }, { "epoch": 0.1, "grad_norm": 0.7321825373925491, "learning_rate": 3.94612619346977e-05, "loss": 0.3469, "step": 928 }, { "epoch": 0.1, "grad_norm": 0.7435893822990791, "learning_rate": 3.9459620623188294e-05, "loss": 0.3749, "step": 929 }, { "epoch": 0.1, "grad_norm": 0.6541312392374807, "learning_rate": 3.94579768495205e-05, "loss": 0.2811, "step": 930 }, { "epoch": 0.1, "grad_norm": 0.7122762462564227, "learning_rate": 3.9456330613902286e-05, "loss": 0.2973, "step": 931 }, { "epoch": 0.1, "grad_norm": 0.6039775821065473, "learning_rate": 3.9454681916541936e-05, "loss": 0.223, "step": 932 }, { "epoch": 0.1, "grad_norm": 0.7453116266977244, "learning_rate": 3.945303075764807e-05, "loss": 0.2829, "step": 933 }, { "epoch": 0.1, "grad_norm": 0.812096275381738, "learning_rate": 3.94513771374296e-05, "loss": 0.3244, "step": 934 }, { "epoch": 0.1, "grad_norm": 0.7949422697782987, "learning_rate": 3.9449721056095746e-05, "loss": 0.3314, "step": 935 }, { "epoch": 0.1, "grad_norm": 0.6782805910351128, "learning_rate": 3.9448062513856056e-05, "loss": 0.2472, "step": 936 }, { "epoch": 0.1, "grad_norm": 0.7304765017501041, "learning_rate": 3.9446401510920365e-05, "loss": 0.277, "step": 937 }, { "epoch": 0.1, "grad_norm": 0.8840028620129856, "learning_rate": 3.944473804749885e-05, "loss": 0.3494, "step": 938 }, { "epoch": 0.1, "grad_norm": 0.7815671658443387, "learning_rate": 3.9443072123801975e-05, "loss": 0.2972, "step": 939 }, { "epoch": 0.1, "grad_norm": 0.687822266387582, "learning_rate": 3.944140374004052e-05, "loss": 0.2074, "step": 940 }, { "epoch": 0.1, "grad_norm": 0.7464770386084335, "learning_rate": 3.943973289642559e-05, "loss": 0.3356, "step": 941 }, { "epoch": 0.1, "grad_norm": 0.763183867099979, "learning_rate": 3.9438059593168586e-05, "loss": 0.3204, "step": 942 }, { "epoch": 0.1, "grad_norm": 0.6546027080826027, "learning_rate": 3.9436383830481226e-05, "loss": 0.2803, "step": 943 }, { "epoch": 0.1, "grad_norm": 0.7755412764936565, "learning_rate": 3.943470560857553e-05, "loss": 0.3101, "step": 944 }, { "epoch": 0.1, "grad_norm": 0.7884411395424002, "learning_rate": 3.9433024927663856e-05, "loss": 0.2722, "step": 945 }, { "epoch": 0.1, "grad_norm": 0.6560500158136222, "learning_rate": 3.943134178795883e-05, "loss": 0.2805, "step": 946 }, { "epoch": 0.1, "grad_norm": 0.8125042548435016, "learning_rate": 3.942965618967344e-05, "loss": 0.3715, "step": 947 }, { "epoch": 0.1, "grad_norm": 0.849678475211711, "learning_rate": 3.942796813302094e-05, "loss": 0.4147, "step": 948 }, { "epoch": 0.1, "grad_norm": 0.7345623020992047, "learning_rate": 3.942627761821492e-05, "loss": 0.3156, "step": 949 }, { "epoch": 0.1, "grad_norm": 0.6994124842085044, "learning_rate": 3.942458464546928e-05, "loss": 0.3267, "step": 950 }, { "epoch": 0.1, "grad_norm": 0.7415999646856553, "learning_rate": 3.9422889214998214e-05, "loss": 0.3421, "step": 951 }, { "epoch": 0.1, "grad_norm": 0.674673418231147, "learning_rate": 3.942119132701625e-05, "loss": 0.325, "step": 952 }, { "epoch": 0.1, "grad_norm": 0.6002050913527098, "learning_rate": 3.941949098173821e-05, "loss": 0.2828, "step": 953 }, { "epoch": 0.1, "grad_norm": 0.8729606090826311, "learning_rate": 3.9417788179379245e-05, "loss": 0.3874, "step": 954 }, { "epoch": 0.1, "grad_norm": 0.6830203457981324, "learning_rate": 3.941608292015478e-05, "loss": 0.2398, "step": 955 }, { "epoch": 0.1, "grad_norm": 0.6871211134974333, "learning_rate": 3.941437520428061e-05, "loss": 0.2924, "step": 956 }, { "epoch": 0.11, "grad_norm": 0.661933200319762, "learning_rate": 3.941266503197277e-05, "loss": 0.2484, "step": 957 }, { "epoch": 0.11, "grad_norm": 0.5907171994860796, "learning_rate": 3.941095240344766e-05, "loss": 0.2307, "step": 958 }, { "epoch": 0.11, "grad_norm": 0.7480759976049202, "learning_rate": 3.9409237318921975e-05, "loss": 0.3177, "step": 959 }, { "epoch": 0.11, "grad_norm": 0.7428933250738426, "learning_rate": 3.940751977861272e-05, "loss": 0.3098, "step": 960 }, { "epoch": 0.11, "grad_norm": 0.8061759663383855, "learning_rate": 3.9405799782737196e-05, "loss": 0.344, "step": 961 }, { "epoch": 0.11, "grad_norm": 0.6340489023855357, "learning_rate": 3.9404077331513044e-05, "loss": 0.238, "step": 962 }, { "epoch": 0.11, "grad_norm": 0.6411403351048823, "learning_rate": 3.940235242515819e-05, "loss": 0.2586, "step": 963 }, { "epoch": 0.11, "grad_norm": 0.6791197718674111, "learning_rate": 3.940062506389089e-05, "loss": 0.2914, "step": 964 }, { "epoch": 0.11, "grad_norm": 0.7169401353081453, "learning_rate": 3.939889524792968e-05, "loss": 0.2625, "step": 965 }, { "epoch": 0.11, "grad_norm": 0.713993170980712, "learning_rate": 3.9397162977493455e-05, "loss": 0.2781, "step": 966 }, { "epoch": 0.11, "grad_norm": 0.8212851434675372, "learning_rate": 3.939542825280137e-05, "loss": 0.3637, "step": 967 }, { "epoch": 0.11, "grad_norm": 0.7417147686893449, "learning_rate": 3.939369107407293e-05, "loss": 0.2795, "step": 968 }, { "epoch": 0.11, "grad_norm": 0.6280562538749723, "learning_rate": 3.939195144152792e-05, "loss": 0.2363, "step": 969 }, { "epoch": 0.11, "grad_norm": 0.7004002865637144, "learning_rate": 3.939020935538647e-05, "loss": 0.2463, "step": 970 }, { "epoch": 0.11, "grad_norm": 0.6972561576478352, "learning_rate": 3.938846481586898e-05, "loss": 0.2859, "step": 971 }, { "epoch": 0.11, "grad_norm": 0.572818996311442, "learning_rate": 3.938671782319619e-05, "loss": 0.2364, "step": 972 }, { "epoch": 0.11, "grad_norm": 0.7532835449234433, "learning_rate": 3.938496837758914e-05, "loss": 0.3212, "step": 973 }, { "epoch": 0.11, "grad_norm": 0.6375030648400928, "learning_rate": 3.938321647926918e-05, "loss": 0.3274, "step": 974 }, { "epoch": 0.11, "grad_norm": 0.6013369447017474, "learning_rate": 3.938146212845797e-05, "loss": 0.2656, "step": 975 }, { "epoch": 0.11, "grad_norm": 0.7198242369669776, "learning_rate": 3.937970532537749e-05, "loss": 0.3267, "step": 976 }, { "epoch": 0.11, "grad_norm": 0.6414430094325005, "learning_rate": 3.9377946070250006e-05, "loss": 0.2564, "step": 977 }, { "epoch": 0.11, "grad_norm": 0.8464511796175019, "learning_rate": 3.937618436329813e-05, "loss": 0.3334, "step": 978 }, { "epoch": 0.11, "grad_norm": 0.7920613099965732, "learning_rate": 3.937442020474475e-05, "loss": 0.2926, "step": 979 }, { "epoch": 0.11, "grad_norm": 0.80756319873438, "learning_rate": 3.937265359481309e-05, "loss": 0.303, "step": 980 }, { "epoch": 0.11, "grad_norm": 0.6979425387545215, "learning_rate": 3.9370884533726664e-05, "loss": 0.2471, "step": 981 }, { "epoch": 0.11, "grad_norm": 0.7290781830279354, "learning_rate": 3.936911302170931e-05, "loss": 0.2995, "step": 982 }, { "epoch": 0.11, "grad_norm": 0.8006790468077764, "learning_rate": 3.936733905898517e-05, "loss": 0.3339, "step": 983 }, { "epoch": 0.11, "grad_norm": 0.7596170628296051, "learning_rate": 3.936556264577869e-05, "loss": 0.3366, "step": 984 }, { "epoch": 0.11, "grad_norm": 0.6322558745340952, "learning_rate": 3.936378378231465e-05, "loss": 0.2878, "step": 985 }, { "epoch": 0.11, "grad_norm": 0.6589092688958328, "learning_rate": 3.9362002468818105e-05, "loss": 0.2734, "step": 986 }, { "epoch": 0.11, "grad_norm": 0.5840876051077623, "learning_rate": 3.9360218705514456e-05, "loss": 0.2268, "step": 987 }, { "epoch": 0.11, "grad_norm": 0.8223636595939524, "learning_rate": 3.935843249262939e-05, "loss": 0.358, "step": 988 }, { "epoch": 0.11, "grad_norm": 0.8171113542196695, "learning_rate": 3.93566438303889e-05, "loss": 0.3505, "step": 989 }, { "epoch": 0.11, "grad_norm": 0.6316890162495712, "learning_rate": 3.9354852719019306e-05, "loss": 0.221, "step": 990 }, { "epoch": 0.11, "grad_norm": 0.7426172317409356, "learning_rate": 3.935305915874724e-05, "loss": 0.3201, "step": 991 }, { "epoch": 0.11, "grad_norm": 0.8140781920928644, "learning_rate": 3.935126314979962e-05, "loss": 0.2949, "step": 992 }, { "epoch": 0.11, "grad_norm": 0.6333975736345722, "learning_rate": 3.93494646924037e-05, "loss": 0.2761, "step": 993 }, { "epoch": 0.11, "grad_norm": 0.6260929088810602, "learning_rate": 3.934766378678704e-05, "loss": 0.2609, "step": 994 }, { "epoch": 0.11, "grad_norm": 0.7287030835368725, "learning_rate": 3.934586043317748e-05, "loss": 0.2768, "step": 995 }, { "epoch": 0.11, "grad_norm": 0.6525172385500014, "learning_rate": 3.93440546318032e-05, "loss": 0.2232, "step": 996 }, { "epoch": 0.11, "grad_norm": 0.7513320141168345, "learning_rate": 3.934224638289268e-05, "loss": 0.2951, "step": 997 }, { "epoch": 0.11, "grad_norm": 0.5344745721683354, "learning_rate": 3.934043568667473e-05, "loss": 0.2579, "step": 998 }, { "epoch": 0.11, "grad_norm": 0.7747366042306933, "learning_rate": 3.9338622543378436e-05, "loss": 0.3363, "step": 999 }, { "epoch": 0.11, "grad_norm": 0.7486739356581735, "learning_rate": 3.933680695323321e-05, "loss": 0.284, "step": 1000 }, { "epoch": 0.11, "grad_norm": 0.7217638846747562, "learning_rate": 3.933498891646877e-05, "loss": 0.3034, "step": 1001 }, { "epoch": 0.11, "grad_norm": 0.5502334467760756, "learning_rate": 3.9333168433315144e-05, "loss": 0.2358, "step": 1002 }, { "epoch": 0.11, "grad_norm": 0.6679734793155404, "learning_rate": 3.933134550400268e-05, "loss": 0.2488, "step": 1003 }, { "epoch": 0.11, "grad_norm": 0.744859003255334, "learning_rate": 3.932952012876203e-05, "loss": 0.3261, "step": 1004 }, { "epoch": 0.11, "grad_norm": 0.4895547662341082, "learning_rate": 3.932769230782414e-05, "loss": 0.2033, "step": 1005 }, { "epoch": 0.11, "grad_norm": 0.7013920515343732, "learning_rate": 3.9325862041420275e-05, "loss": 0.3107, "step": 1006 }, { "epoch": 0.11, "grad_norm": 0.6422877249739573, "learning_rate": 3.932402932978203e-05, "loss": 0.2577, "step": 1007 }, { "epoch": 0.11, "grad_norm": 0.6516262837391291, "learning_rate": 3.9322194173141284e-05, "loss": 0.2396, "step": 1008 }, { "epoch": 0.11, "grad_norm": 0.8268493327729397, "learning_rate": 3.932035657173023e-05, "loss": 0.2831, "step": 1009 }, { "epoch": 0.11, "grad_norm": 0.709380236366876, "learning_rate": 3.931851652578137e-05, "loss": 0.2742, "step": 1010 }, { "epoch": 0.11, "grad_norm": 0.6892083573752351, "learning_rate": 3.931667403552753e-05, "loss": 0.2584, "step": 1011 }, { "epoch": 0.11, "grad_norm": 0.7666191730784749, "learning_rate": 3.9314829101201814e-05, "loss": 0.2843, "step": 1012 }, { "epoch": 0.11, "grad_norm": 0.6516448748726327, "learning_rate": 3.931298172303768e-05, "loss": 0.2307, "step": 1013 }, { "epoch": 0.11, "grad_norm": 0.591878098557286, "learning_rate": 3.9311131901268855e-05, "loss": 0.204, "step": 1014 }, { "epoch": 0.11, "grad_norm": 0.8590507242305281, "learning_rate": 3.9309279636129396e-05, "loss": 0.3197, "step": 1015 }, { "epoch": 0.11, "grad_norm": 0.7266935004812294, "learning_rate": 3.930742492785366e-05, "loss": 0.2782, "step": 1016 }, { "epoch": 0.11, "grad_norm": 0.6321222756077973, "learning_rate": 3.930556777667632e-05, "loss": 0.2293, "step": 1017 }, { "epoch": 0.11, "grad_norm": 0.7886974132195063, "learning_rate": 3.930370818283235e-05, "loss": 0.2876, "step": 1018 }, { "epoch": 0.11, "grad_norm": 0.5970742377444473, "learning_rate": 3.9301846146557045e-05, "loss": 0.2525, "step": 1019 }, { "epoch": 0.11, "grad_norm": 0.6257482817653303, "learning_rate": 3.9299981668085997e-05, "loss": 0.287, "step": 1020 }, { "epoch": 0.11, "grad_norm": 0.7252019633782599, "learning_rate": 3.929811474765512e-05, "loss": 0.2785, "step": 1021 }, { "epoch": 0.11, "grad_norm": 0.6737315675411617, "learning_rate": 3.929624538550061e-05, "loss": 0.3161, "step": 1022 }, { "epoch": 0.11, "grad_norm": 0.733700625238442, "learning_rate": 3.929437358185901e-05, "loss": 0.3127, "step": 1023 }, { "epoch": 0.11, "grad_norm": 0.7339977756513314, "learning_rate": 3.929249933696715e-05, "loss": 0.3156, "step": 1024 }, { "epoch": 0.11, "grad_norm": 0.7095123930985195, "learning_rate": 3.9290622651062164e-05, "loss": 0.2758, "step": 1025 }, { "epoch": 0.11, "grad_norm": 0.6859423157125364, "learning_rate": 3.92887435243815e-05, "loss": 0.2841, "step": 1026 }, { "epoch": 0.11, "grad_norm": 0.7578136404756439, "learning_rate": 3.928686195716294e-05, "loss": 0.3359, "step": 1027 }, { "epoch": 0.11, "grad_norm": 0.630251163151722, "learning_rate": 3.928497794964452e-05, "loss": 0.2465, "step": 1028 }, { "epoch": 0.11, "grad_norm": 0.6332205645861801, "learning_rate": 3.928309150206464e-05, "loss": 0.3327, "step": 1029 }, { "epoch": 0.11, "grad_norm": 0.6105684429125092, "learning_rate": 3.928120261466198e-05, "loss": 0.2322, "step": 1030 }, { "epoch": 0.11, "grad_norm": 0.5737584403503849, "learning_rate": 3.9279311287675535e-05, "loss": 0.2143, "step": 1031 }, { "epoch": 0.11, "grad_norm": 0.588643450642452, "learning_rate": 3.92774175213446e-05, "loss": 0.209, "step": 1032 }, { "epoch": 0.11, "grad_norm": 0.8643382234397027, "learning_rate": 3.92755213159088e-05, "loss": 0.2968, "step": 1033 }, { "epoch": 0.11, "grad_norm": 0.6109670724565266, "learning_rate": 3.927362267160804e-05, "loss": 0.2428, "step": 1034 }, { "epoch": 0.11, "grad_norm": 0.784660372551906, "learning_rate": 3.927172158868257e-05, "loss": 0.1984, "step": 1035 }, { "epoch": 0.11, "grad_norm": 0.8481179663100128, "learning_rate": 3.92698180673729e-05, "loss": 0.322, "step": 1036 }, { "epoch": 0.11, "grad_norm": 0.6891565738740009, "learning_rate": 3.92679121079199e-05, "loss": 0.2695, "step": 1037 }, { "epoch": 0.11, "grad_norm": 0.8321120889999258, "learning_rate": 3.9266003710564706e-05, "loss": 0.4166, "step": 1038 }, { "epoch": 0.11, "grad_norm": 0.6893674456821418, "learning_rate": 3.92640928755488e-05, "loss": 0.3054, "step": 1039 }, { "epoch": 0.11, "grad_norm": 0.6900694904372378, "learning_rate": 3.9262179603113934e-05, "loss": 0.1942, "step": 1040 }, { "epoch": 0.11, "grad_norm": 0.6848669783360392, "learning_rate": 3.9260263893502204e-05, "loss": 0.2819, "step": 1041 }, { "epoch": 0.11, "grad_norm": 0.5351295742988001, "learning_rate": 3.925834574695599e-05, "loss": 0.2224, "step": 1042 }, { "epoch": 0.11, "grad_norm": 0.8952740246559334, "learning_rate": 3.925642516371799e-05, "loss": 0.3992, "step": 1043 }, { "epoch": 0.11, "grad_norm": 0.6412168420617952, "learning_rate": 3.9254502144031204e-05, "loss": 0.2896, "step": 1044 }, { "epoch": 0.11, "grad_norm": 0.6373657730386285, "learning_rate": 3.925257668813895e-05, "loss": 0.2555, "step": 1045 }, { "epoch": 0.11, "grad_norm": 0.6261593556238331, "learning_rate": 3.925064879628485e-05, "loss": 0.2483, "step": 1046 }, { "epoch": 0.11, "grad_norm": 0.8885453018764059, "learning_rate": 3.924871846871283e-05, "loss": 0.3659, "step": 1047 }, { "epoch": 0.12, "grad_norm": 0.6511353361774384, "learning_rate": 3.924678570566714e-05, "loss": 0.2772, "step": 1048 }, { "epoch": 0.12, "grad_norm": 0.7091998989090091, "learning_rate": 3.9244850507392296e-05, "loss": 0.2509, "step": 1049 }, { "epoch": 0.12, "grad_norm": 0.7040334554845372, "learning_rate": 3.9242912874133186e-05, "loss": 0.2606, "step": 1050 }, { "epoch": 0.12, "grad_norm": 0.7719942363736338, "learning_rate": 3.924097280613495e-05, "loss": 0.3191, "step": 1051 }, { "epoch": 0.12, "grad_norm": 0.6697367865671184, "learning_rate": 3.9239030303643074e-05, "loss": 0.2613, "step": 1052 }, { "epoch": 0.12, "grad_norm": 0.8264958172673278, "learning_rate": 3.923708536690332e-05, "loss": 0.3564, "step": 1053 }, { "epoch": 0.12, "grad_norm": 0.6571493344328319, "learning_rate": 3.9235137996161786e-05, "loss": 0.2708, "step": 1054 }, { "epoch": 0.12, "grad_norm": 0.710064068509379, "learning_rate": 3.9233188191664856e-05, "loss": 0.2523, "step": 1055 }, { "epoch": 0.12, "grad_norm": 0.608062798513221, "learning_rate": 3.9231235953659244e-05, "loss": 0.2325, "step": 1056 }, { "epoch": 0.12, "grad_norm": 0.6773015819830156, "learning_rate": 3.922928128239195e-05, "loss": 0.2123, "step": 1057 }, { "epoch": 0.12, "grad_norm": 0.6601524804363184, "learning_rate": 3.9227324178110295e-05, "loss": 0.2513, "step": 1058 }, { "epoch": 0.12, "grad_norm": 0.5563992428679837, "learning_rate": 3.9225364641061904e-05, "loss": 0.2206, "step": 1059 }, { "epoch": 0.12, "grad_norm": 0.6595045405413055, "learning_rate": 3.922340267149472e-05, "loss": 0.1919, "step": 1060 }, { "epoch": 0.12, "grad_norm": 0.7530465239480391, "learning_rate": 3.922143826965697e-05, "loss": 0.2667, "step": 1061 }, { "epoch": 0.12, "grad_norm": 0.9347659616752685, "learning_rate": 3.9219471435797205e-05, "loss": 0.3505, "step": 1062 }, { "epoch": 0.12, "grad_norm": 0.622338229803054, "learning_rate": 3.921750217016429e-05, "loss": 0.2342, "step": 1063 }, { "epoch": 0.12, "grad_norm": 0.7586102563415626, "learning_rate": 3.921553047300739e-05, "loss": 0.3498, "step": 1064 }, { "epoch": 0.12, "grad_norm": 0.6776757998259934, "learning_rate": 3.9213556344575964e-05, "loss": 0.3132, "step": 1065 }, { "epoch": 0.12, "grad_norm": 0.809506918860515, "learning_rate": 3.9211579785119804e-05, "loss": 0.3646, "step": 1066 }, { "epoch": 0.12, "grad_norm": 0.7208531334717293, "learning_rate": 3.920960079488899e-05, "loss": 0.2787, "step": 1067 }, { "epoch": 0.12, "grad_norm": 0.6928812662013678, "learning_rate": 3.9207619374133917e-05, "loss": 0.3179, "step": 1068 }, { "epoch": 0.12, "grad_norm": 0.5637700524749805, "learning_rate": 3.920563552310529e-05, "loss": 0.2183, "step": 1069 }, { "epoch": 0.12, "grad_norm": 0.5474515328227216, "learning_rate": 3.920364924205412e-05, "loss": 0.2715, "step": 1070 }, { "epoch": 0.12, "grad_norm": 0.6813817885429322, "learning_rate": 3.920166053123172e-05, "loss": 0.2607, "step": 1071 }, { "epoch": 0.12, "grad_norm": 0.6183924441432396, "learning_rate": 3.9199669390889725e-05, "loss": 0.2305, "step": 1072 }, { "epoch": 0.12, "grad_norm": 0.6323170311714446, "learning_rate": 3.919767582128005e-05, "loss": 0.1967, "step": 1073 }, { "epoch": 0.12, "grad_norm": 0.6699121991673812, "learning_rate": 3.919567982265495e-05, "loss": 0.2598, "step": 1074 }, { "epoch": 0.12, "grad_norm": 0.8344633348545905, "learning_rate": 3.919368139526697e-05, "loss": 0.3114, "step": 1075 }, { "epoch": 0.12, "grad_norm": 0.8664136991189226, "learning_rate": 3.9191680539368956e-05, "loss": 0.2834, "step": 1076 }, { "epoch": 0.12, "grad_norm": 0.8080766286630513, "learning_rate": 3.918967725521407e-05, "loss": 0.2887, "step": 1077 }, { "epoch": 0.12, "grad_norm": 0.9076625076317271, "learning_rate": 3.9187671543055785e-05, "loss": 0.286, "step": 1078 }, { "epoch": 0.12, "grad_norm": 0.8725319865246761, "learning_rate": 3.918566340314788e-05, "loss": 0.3357, "step": 1079 }, { "epoch": 0.12, "grad_norm": 0.6330280172436185, "learning_rate": 3.918365283574443e-05, "loss": 0.2758, "step": 1080 }, { "epoch": 0.12, "grad_norm": 0.6839189028598982, "learning_rate": 3.9181639841099825e-05, "loss": 0.2807, "step": 1081 }, { "epoch": 0.12, "grad_norm": 0.6554614279227786, "learning_rate": 3.9179624419468766e-05, "loss": 0.3104, "step": 1082 }, { "epoch": 0.12, "grad_norm": 0.6251213432774099, "learning_rate": 3.9177606571106265e-05, "loss": 0.2422, "step": 1083 }, { "epoch": 0.12, "grad_norm": 0.7579948756414638, "learning_rate": 3.917558629626762e-05, "loss": 0.3519, "step": 1084 }, { "epoch": 0.12, "grad_norm": 0.5614815074358583, "learning_rate": 3.917356359520846e-05, "loss": 0.2381, "step": 1085 }, { "epoch": 0.12, "grad_norm": 0.5754216369020166, "learning_rate": 3.917153846818471e-05, "loss": 0.251, "step": 1086 }, { "epoch": 0.12, "grad_norm": 0.7220614072886674, "learning_rate": 3.916951091545258e-05, "loss": 0.2917, "step": 1087 }, { "epoch": 0.12, "grad_norm": 0.6868942149410308, "learning_rate": 3.916748093726864e-05, "loss": 0.2941, "step": 1088 }, { "epoch": 0.12, "grad_norm": 0.6636968166790539, "learning_rate": 3.9165448533889726e-05, "loss": 0.2215, "step": 1089 }, { "epoch": 0.12, "grad_norm": 0.7906859664630741, "learning_rate": 3.9163413705572984e-05, "loss": 0.3339, "step": 1090 }, { "epoch": 0.12, "grad_norm": 0.6765223061689623, "learning_rate": 3.916137645257587e-05, "loss": 0.3126, "step": 1091 }, { "epoch": 0.12, "grad_norm": 0.5569111752132542, "learning_rate": 3.9159336775156165e-05, "loss": 0.2029, "step": 1092 }, { "epoch": 0.12, "grad_norm": 0.6471343446616511, "learning_rate": 3.915729467357194e-05, "loss": 0.23, "step": 1093 }, { "epoch": 0.12, "grad_norm": 0.5604740055583318, "learning_rate": 3.9155250148081564e-05, "loss": 0.2222, "step": 1094 }, { "epoch": 0.12, "grad_norm": 0.6115950224215414, "learning_rate": 3.915320319894373e-05, "loss": 0.2503, "step": 1095 }, { "epoch": 0.12, "grad_norm": 0.6607794895180239, "learning_rate": 3.9151153826417436e-05, "loss": 0.246, "step": 1096 }, { "epoch": 0.12, "grad_norm": 0.814486788778958, "learning_rate": 3.914910203076198e-05, "loss": 0.282, "step": 1097 }, { "epoch": 0.12, "grad_norm": 1.0233962654950661, "learning_rate": 3.914704781223696e-05, "loss": 0.405, "step": 1098 }, { "epoch": 0.12, "grad_norm": 0.7525338048724047, "learning_rate": 3.91449911711023e-05, "loss": 0.3131, "step": 1099 }, { "epoch": 0.12, "grad_norm": 0.6456261322956921, "learning_rate": 3.9142932107618214e-05, "loss": 0.2325, "step": 1100 }, { "epoch": 0.12, "grad_norm": 0.5747696446201337, "learning_rate": 3.914087062204523e-05, "loss": 0.1888, "step": 1101 }, { "epoch": 0.12, "grad_norm": 0.6842055280740315, "learning_rate": 3.913880671464418e-05, "loss": 0.2599, "step": 1102 }, { "epoch": 0.12, "grad_norm": 0.6431547320048512, "learning_rate": 3.91367403856762e-05, "loss": 0.2516, "step": 1103 }, { "epoch": 0.12, "grad_norm": 0.7544973161420873, "learning_rate": 3.9134671635402745e-05, "loss": 0.2657, "step": 1104 }, { "epoch": 0.12, "grad_norm": 0.7207657288030248, "learning_rate": 3.9132600464085556e-05, "loss": 0.3575, "step": 1105 }, { "epoch": 0.12, "grad_norm": 0.7417425729362398, "learning_rate": 3.91305268719867e-05, "loss": 0.3183, "step": 1106 }, { "epoch": 0.12, "grad_norm": 0.8137674349720427, "learning_rate": 3.912845085936853e-05, "loss": 0.3183, "step": 1107 }, { "epoch": 0.12, "grad_norm": 0.749358021636869, "learning_rate": 3.912637242649373e-05, "loss": 0.2676, "step": 1108 }, { "epoch": 0.12, "grad_norm": 0.967900857707362, "learning_rate": 3.9124291573625263e-05, "loss": 0.3343, "step": 1109 }, { "epoch": 0.12, "grad_norm": 0.7044682916703117, "learning_rate": 3.912220830102643e-05, "loss": 0.292, "step": 1110 }, { "epoch": 0.12, "grad_norm": 0.7130506780010349, "learning_rate": 3.91201226089608e-05, "loss": 0.292, "step": 1111 }, { "epoch": 0.12, "grad_norm": 0.620463309224544, "learning_rate": 3.911803449769228e-05, "loss": 0.2499, "step": 1112 }, { "epoch": 0.12, "grad_norm": 0.6598372281091182, "learning_rate": 3.9115943967485085e-05, "loss": 0.2533, "step": 1113 }, { "epoch": 0.12, "grad_norm": 0.5889333052074396, "learning_rate": 3.911385101860369e-05, "loss": 0.1957, "step": 1114 }, { "epoch": 0.12, "grad_norm": 0.6469707215478148, "learning_rate": 3.911175565131293e-05, "loss": 0.2776, "step": 1115 }, { "epoch": 0.12, "grad_norm": 0.6861570854030776, "learning_rate": 3.9109657865877924e-05, "loss": 0.2886, "step": 1116 }, { "epoch": 0.12, "grad_norm": 0.6399747292923705, "learning_rate": 3.9107557662564086e-05, "loss": 0.2394, "step": 1117 }, { "epoch": 0.12, "grad_norm": 0.7348381367687417, "learning_rate": 3.910545504163716e-05, "loss": 0.2749, "step": 1118 }, { "epoch": 0.12, "grad_norm": 0.76546893183466, "learning_rate": 3.910335000336319e-05, "loss": 0.2976, "step": 1119 }, { "epoch": 0.12, "grad_norm": 0.5289544268124914, "learning_rate": 3.9101242548008496e-05, "loss": 0.2018, "step": 1120 }, { "epoch": 0.12, "grad_norm": 0.7797200672021879, "learning_rate": 3.9099132675839736e-05, "loss": 0.286, "step": 1121 }, { "epoch": 0.12, "grad_norm": 0.7877839938752322, "learning_rate": 3.9097020387123876e-05, "loss": 0.3017, "step": 1122 }, { "epoch": 0.12, "grad_norm": 0.6544094159713822, "learning_rate": 3.9094905682128166e-05, "loss": 0.2666, "step": 1123 }, { "epoch": 0.12, "grad_norm": 0.5002055937556827, "learning_rate": 3.9092788561120174e-05, "loss": 0.1852, "step": 1124 }, { "epoch": 0.12, "grad_norm": 0.5951012494388362, "learning_rate": 3.909066902436777e-05, "loss": 0.2243, "step": 1125 }, { "epoch": 0.12, "grad_norm": 0.6183167520606488, "learning_rate": 3.9088547072139145e-05, "loss": 0.2042, "step": 1126 }, { "epoch": 0.12, "grad_norm": 0.8181522808378632, "learning_rate": 3.9086422704702754e-05, "loss": 0.2826, "step": 1127 }, { "epoch": 0.12, "grad_norm": 0.9041795098290607, "learning_rate": 3.9084295922327414e-05, "loss": 0.3177, "step": 1128 }, { "epoch": 0.12, "grad_norm": 0.6192185987597003, "learning_rate": 3.908216672528221e-05, "loss": 0.1909, "step": 1129 }, { "epoch": 0.12, "grad_norm": 0.7426067172209254, "learning_rate": 3.908003511383654e-05, "loss": 0.2949, "step": 1130 }, { "epoch": 0.12, "grad_norm": 0.5395648661022168, "learning_rate": 3.907790108826011e-05, "loss": 0.2393, "step": 1131 }, { "epoch": 0.12, "grad_norm": 0.6930545190418199, "learning_rate": 3.907576464882294e-05, "loss": 0.2748, "step": 1132 }, { "epoch": 0.12, "grad_norm": 0.7622005406163825, "learning_rate": 3.907362579579532e-05, "loss": 0.3442, "step": 1133 }, { "epoch": 0.12, "grad_norm": 0.6376383640629019, "learning_rate": 3.90714845294479e-05, "loss": 0.2322, "step": 1134 }, { "epoch": 0.12, "grad_norm": 0.7228179750676318, "learning_rate": 3.9069340850051595e-05, "loss": 0.2668, "step": 1135 }, { "epoch": 0.12, "grad_norm": 0.5861811576310614, "learning_rate": 3.9067194757877635e-05, "loss": 0.2324, "step": 1136 }, { "epoch": 0.12, "grad_norm": 0.5741847540724454, "learning_rate": 3.906504625319756e-05, "loss": 0.1767, "step": 1137 }, { "epoch": 0.12, "grad_norm": 0.6147131755762856, "learning_rate": 3.906289533628322e-05, "loss": 0.2624, "step": 1138 }, { "epoch": 0.13, "grad_norm": 0.5390624170717921, "learning_rate": 3.906074200740675e-05, "loss": 0.2086, "step": 1139 }, { "epoch": 0.13, "grad_norm": 0.7785584816474902, "learning_rate": 3.9058586266840614e-05, "loss": 0.2773, "step": 1140 }, { "epoch": 0.13, "grad_norm": 0.5692274929299811, "learning_rate": 3.905642811485757e-05, "loss": 0.2317, "step": 1141 }, { "epoch": 0.13, "grad_norm": 0.6684624258622031, "learning_rate": 3.905426755173068e-05, "loss": 0.2385, "step": 1142 }, { "epoch": 0.13, "grad_norm": 0.5692816785341807, "learning_rate": 3.90521045777333e-05, "loss": 0.2181, "step": 1143 }, { "epoch": 0.13, "grad_norm": 0.7373620421478237, "learning_rate": 3.904993919313912e-05, "loss": 0.2796, "step": 1144 }, { "epoch": 0.13, "grad_norm": 0.6851066197305056, "learning_rate": 3.9047771398222115e-05, "loss": 0.2429, "step": 1145 }, { "epoch": 0.13, "grad_norm": 0.46115732608511156, "learning_rate": 3.9045601193256564e-05, "loss": 0.1981, "step": 1146 }, { "epoch": 0.13, "grad_norm": 0.572500187319929, "learning_rate": 3.9043428578517055e-05, "loss": 0.2363, "step": 1147 }, { "epoch": 0.13, "grad_norm": 0.6352037062517615, "learning_rate": 3.9041253554278486e-05, "loss": 0.2702, "step": 1148 }, { "epoch": 0.13, "grad_norm": 0.7629694915961622, "learning_rate": 3.9039076120816055e-05, "loss": 0.3771, "step": 1149 }, { "epoch": 0.13, "grad_norm": 0.6471239136275394, "learning_rate": 3.9036896278405264e-05, "loss": 0.3015, "step": 1150 }, { "epoch": 0.13, "grad_norm": 0.8248461464270617, "learning_rate": 3.903471402732193e-05, "loss": 0.3045, "step": 1151 }, { "epoch": 0.13, "grad_norm": 0.763132163161792, "learning_rate": 3.9032529367842145e-05, "loss": 0.2647, "step": 1152 }, { "epoch": 0.13, "grad_norm": 0.663105128688938, "learning_rate": 3.9030342300242334e-05, "loss": 0.2612, "step": 1153 }, { "epoch": 0.13, "grad_norm": 0.7474917351383821, "learning_rate": 3.902815282479923e-05, "loss": 0.2497, "step": 1154 }, { "epoch": 0.13, "grad_norm": 0.549185504151279, "learning_rate": 3.902596094178985e-05, "loss": 0.2355, "step": 1155 }, { "epoch": 0.13, "grad_norm": 0.5437948679773392, "learning_rate": 3.902376665149153e-05, "loss": 0.2059, "step": 1156 }, { "epoch": 0.13, "grad_norm": 0.7314118198116689, "learning_rate": 3.9021569954181905e-05, "loss": 0.3453, "step": 1157 }, { "epoch": 0.13, "grad_norm": 0.6924006751679204, "learning_rate": 3.9019370850138915e-05, "loss": 0.2964, "step": 1158 }, { "epoch": 0.13, "grad_norm": 0.8438484699575118, "learning_rate": 3.90171693396408e-05, "loss": 0.3169, "step": 1159 }, { "epoch": 0.13, "grad_norm": 0.6313163826285553, "learning_rate": 3.9014965422966115e-05, "loss": 0.253, "step": 1160 }, { "epoch": 0.13, "grad_norm": 0.5590317616080528, "learning_rate": 3.9012759100393714e-05, "loss": 0.1806, "step": 1161 }, { "epoch": 0.13, "grad_norm": 0.6003285670674611, "learning_rate": 3.9010550372202756e-05, "loss": 0.2258, "step": 1162 }, { "epoch": 0.13, "grad_norm": 0.68521455714584, "learning_rate": 3.90083392386727e-05, "loss": 0.2908, "step": 1163 }, { "epoch": 0.13, "grad_norm": 0.6515214957455261, "learning_rate": 3.900612570008331e-05, "loss": 0.204, "step": 1164 }, { "epoch": 0.13, "grad_norm": 0.7735844867408266, "learning_rate": 3.900390975671467e-05, "loss": 0.2509, "step": 1165 }, { "epoch": 0.13, "grad_norm": 0.7204671333806125, "learning_rate": 3.900169140884715e-05, "loss": 0.2897, "step": 1166 }, { "epoch": 0.13, "grad_norm": 0.7108858529918054, "learning_rate": 3.8999470656761425e-05, "loss": 0.2336, "step": 1167 }, { "epoch": 0.13, "grad_norm": 0.6739129939052515, "learning_rate": 3.899724750073848e-05, "loss": 0.2398, "step": 1168 }, { "epoch": 0.13, "grad_norm": 0.5950638639812865, "learning_rate": 3.89950219410596e-05, "loss": 0.2394, "step": 1169 }, { "epoch": 0.13, "grad_norm": 0.6235646216257108, "learning_rate": 3.899279397800639e-05, "loss": 0.2312, "step": 1170 }, { "epoch": 0.13, "grad_norm": 0.7044447487092723, "learning_rate": 3.899056361186074e-05, "loss": 0.2502, "step": 1171 }, { "epoch": 0.13, "grad_norm": 0.6785046160940794, "learning_rate": 3.8988330842904844e-05, "loss": 0.2393, "step": 1172 }, { "epoch": 0.13, "grad_norm": 0.6668498835530737, "learning_rate": 3.8986095671421214e-05, "loss": 0.2567, "step": 1173 }, { "epoch": 0.13, "grad_norm": 0.7345434360917417, "learning_rate": 3.8983858097692656e-05, "loss": 0.277, "step": 1174 }, { "epoch": 0.13, "grad_norm": 0.6933838745946668, "learning_rate": 3.8981618122002284e-05, "loss": 0.3151, "step": 1175 }, { "epoch": 0.13, "grad_norm": 0.6841713780958396, "learning_rate": 3.8979375744633515e-05, "loss": 0.3232, "step": 1176 }, { "epoch": 0.13, "grad_norm": 0.5284133800955243, "learning_rate": 3.897713096587007e-05, "loss": 0.2942, "step": 1177 }, { "epoch": 0.13, "grad_norm": 0.7390750069396702, "learning_rate": 3.897488378599596e-05, "loss": 0.2582, "step": 1178 }, { "epoch": 0.13, "grad_norm": 0.6034182374271129, "learning_rate": 3.897263420529553e-05, "loss": 0.2431, "step": 1179 }, { "epoch": 0.13, "grad_norm": 0.5839404341583044, "learning_rate": 3.8970382224053414e-05, "loss": 0.2155, "step": 1180 }, { "epoch": 0.13, "grad_norm": 0.6694830299559975, "learning_rate": 3.8968127842554526e-05, "loss": 0.2396, "step": 1181 }, { "epoch": 0.13, "grad_norm": 0.6697326481882796, "learning_rate": 3.8965871061084126e-05, "loss": 0.2595, "step": 1182 }, { "epoch": 0.13, "grad_norm": 0.8205411683121762, "learning_rate": 3.896361187992774e-05, "loss": 0.3036, "step": 1183 }, { "epoch": 0.13, "grad_norm": 0.6065592144381388, "learning_rate": 3.896135029937123e-05, "loss": 0.2003, "step": 1184 }, { "epoch": 0.13, "grad_norm": 1.0616656000280686, "learning_rate": 3.895908631970074e-05, "loss": 0.3972, "step": 1185 }, { "epoch": 0.13, "grad_norm": 0.543240979786405, "learning_rate": 3.895681994120272e-05, "loss": 0.2014, "step": 1186 }, { "epoch": 0.13, "grad_norm": 0.5940428312879357, "learning_rate": 3.8954551164163924e-05, "loss": 0.2251, "step": 1187 }, { "epoch": 0.13, "grad_norm": 0.7983022138493923, "learning_rate": 3.8952279988871425e-05, "loss": 0.3055, "step": 1188 }, { "epoch": 0.13, "grad_norm": 0.5650046271160005, "learning_rate": 3.895000641561258e-05, "loss": 0.2271, "step": 1189 }, { "epoch": 0.13, "grad_norm": 0.6216246297402951, "learning_rate": 3.894773044467505e-05, "loss": 0.2454, "step": 1190 }, { "epoch": 0.13, "grad_norm": 0.7433994637212084, "learning_rate": 3.894545207634681e-05, "loss": 0.3221, "step": 1191 }, { "epoch": 0.13, "grad_norm": 0.6981473418155327, "learning_rate": 3.8943171310916146e-05, "loss": 0.2652, "step": 1192 }, { "epoch": 0.13, "grad_norm": 0.6493088256925852, "learning_rate": 3.8940888148671615e-05, "loss": 0.2261, "step": 1193 }, { "epoch": 0.13, "grad_norm": 0.6113396299211942, "learning_rate": 3.893860258990212e-05, "loss": 0.1989, "step": 1194 }, { "epoch": 0.13, "grad_norm": 0.7437777698365386, "learning_rate": 3.893631463489682e-05, "loss": 0.2734, "step": 1195 }, { "epoch": 0.13, "grad_norm": 0.6566140663635328, "learning_rate": 3.893402428394522e-05, "loss": 0.2117, "step": 1196 }, { "epoch": 0.13, "grad_norm": 0.7000718267238578, "learning_rate": 3.8931731537337105e-05, "loss": 0.2229, "step": 1197 }, { "epoch": 0.13, "grad_norm": 0.658687877150711, "learning_rate": 3.892943639536257e-05, "loss": 0.2393, "step": 1198 }, { "epoch": 0.13, "grad_norm": 0.7123417661873115, "learning_rate": 3.892713885831201e-05, "loss": 0.283, "step": 1199 }, { "epoch": 0.13, "grad_norm": 0.7320268303117896, "learning_rate": 3.8924838926476114e-05, "loss": 0.2446, "step": 1200 }, { "epoch": 0.13, "grad_norm": 0.6106401417396183, "learning_rate": 3.8922536600145914e-05, "loss": 0.2123, "step": 1201 }, { "epoch": 0.13, "grad_norm": 0.6672925817209624, "learning_rate": 3.892023187961268e-05, "loss": 0.2638, "step": 1202 }, { "epoch": 0.13, "grad_norm": 0.6572337043906973, "learning_rate": 3.891792476516804e-05, "loss": 0.272, "step": 1203 }, { "epoch": 0.13, "grad_norm": 0.6910810406193486, "learning_rate": 3.891561525710389e-05, "loss": 0.2475, "step": 1204 }, { "epoch": 0.13, "grad_norm": 0.6645591673647715, "learning_rate": 3.8913303355712476e-05, "loss": 0.2441, "step": 1205 }, { "epoch": 0.13, "grad_norm": 0.4280210257361357, "learning_rate": 3.891098906128628e-05, "loss": 0.1758, "step": 1206 }, { "epoch": 0.13, "grad_norm": 0.6574830777561546, "learning_rate": 3.890867237411814e-05, "loss": 0.2962, "step": 1207 }, { "epoch": 0.13, "grad_norm": 0.6820573161629137, "learning_rate": 3.890635329450118e-05, "loss": 0.239, "step": 1208 }, { "epoch": 0.13, "grad_norm": 0.6226431994823505, "learning_rate": 3.8904031822728804e-05, "loss": 0.2933, "step": 1209 }, { "epoch": 0.13, "grad_norm": 0.6802266930930283, "learning_rate": 3.890170795909477e-05, "loss": 0.2117, "step": 1210 }, { "epoch": 0.13, "grad_norm": 0.5477393812354234, "learning_rate": 3.889938170389309e-05, "loss": 0.2366, "step": 1211 }, { "epoch": 0.13, "grad_norm": 0.5213901277449603, "learning_rate": 3.88970530574181e-05, "loss": 0.1795, "step": 1212 }, { "epoch": 0.13, "grad_norm": 0.8229024056917156, "learning_rate": 3.889472201996444e-05, "loss": 0.2598, "step": 1213 }, { "epoch": 0.13, "grad_norm": 0.8605302848030582, "learning_rate": 3.889238859182703e-05, "loss": 0.28, "step": 1214 }, { "epoch": 0.13, "grad_norm": 0.7069294050008, "learning_rate": 3.889005277330114e-05, "loss": 0.2642, "step": 1215 }, { "epoch": 0.13, "grad_norm": 0.8320750909994648, "learning_rate": 3.888771456468229e-05, "loss": 0.2667, "step": 1216 }, { "epoch": 0.13, "grad_norm": 0.7472166229788278, "learning_rate": 3.888537396626634e-05, "loss": 0.2413, "step": 1217 }, { "epoch": 0.13, "grad_norm": 0.5918464513981199, "learning_rate": 3.8883030978349416e-05, "loss": 0.1717, "step": 1218 }, { "epoch": 0.13, "grad_norm": 0.6290008047353451, "learning_rate": 3.8880685601228e-05, "loss": 0.2442, "step": 1219 }, { "epoch": 0.13, "grad_norm": 0.6693512735872327, "learning_rate": 3.887833783519882e-05, "loss": 0.2768, "step": 1220 }, { "epoch": 0.13, "grad_norm": 0.6337689365665258, "learning_rate": 3.887598768055894e-05, "loss": 0.2543, "step": 1221 }, { "epoch": 0.13, "grad_norm": 0.5820862916868048, "learning_rate": 3.887363513760571e-05, "loss": 0.2686, "step": 1222 }, { "epoch": 0.13, "grad_norm": 0.654137435389576, "learning_rate": 3.88712802066368e-05, "loss": 0.2651, "step": 1223 }, { "epoch": 0.13, "grad_norm": 0.6068287495708279, "learning_rate": 3.8868922887950165e-05, "loss": 0.2478, "step": 1224 }, { "epoch": 0.13, "grad_norm": 0.6847562224574402, "learning_rate": 3.886656318184407e-05, "loss": 0.2938, "step": 1225 }, { "epoch": 0.13, "grad_norm": 0.6221632476325742, "learning_rate": 3.886420108861708e-05, "loss": 0.2376, "step": 1226 }, { "epoch": 0.13, "grad_norm": 0.5977840411625438, "learning_rate": 3.886183660856806e-05, "loss": 0.2174, "step": 1227 }, { "epoch": 0.13, "grad_norm": 0.5322449847191587, "learning_rate": 3.885946974199618e-05, "loss": 0.1947, "step": 1228 }, { "epoch": 0.13, "grad_norm": 0.6127247563426347, "learning_rate": 3.8857100489200915e-05, "loss": 0.213, "step": 1229 }, { "epoch": 0.14, "grad_norm": 0.5792430170994114, "learning_rate": 3.8854728850482034e-05, "loss": 0.1908, "step": 1230 }, { "epoch": 0.14, "grad_norm": 0.6969818905676577, "learning_rate": 3.885235482613962e-05, "loss": 0.2522, "step": 1231 }, { "epoch": 0.14, "grad_norm": 0.7777228652968459, "learning_rate": 3.884997841647404e-05, "loss": 0.2884, "step": 1232 }, { "epoch": 0.14, "grad_norm": 0.8085412639592999, "learning_rate": 3.8847599621785976e-05, "loss": 0.2158, "step": 1233 }, { "epoch": 0.14, "grad_norm": 0.8555021932675256, "learning_rate": 3.8845218442376416e-05, "loss": 0.3035, "step": 1234 }, { "epoch": 0.14, "grad_norm": 0.60544748576571, "learning_rate": 3.8842834878546625e-05, "loss": 0.2205, "step": 1235 }, { "epoch": 0.14, "grad_norm": 0.6872595019608175, "learning_rate": 3.8840448930598216e-05, "loss": 0.2765, "step": 1236 }, { "epoch": 0.14, "grad_norm": 0.6201364108733016, "learning_rate": 3.883806059883304e-05, "loss": 0.2321, "step": 1237 }, { "epoch": 0.14, "grad_norm": 0.5343775408946525, "learning_rate": 3.8835669883553315e-05, "loss": 0.1676, "step": 1238 }, { "epoch": 0.14, "grad_norm": 0.661309268004355, "learning_rate": 3.8833276785061504e-05, "loss": 0.2325, "step": 1239 }, { "epoch": 0.14, "grad_norm": 0.6090929405626248, "learning_rate": 3.883088130366042e-05, "loss": 0.233, "step": 1240 }, { "epoch": 0.14, "grad_norm": 0.6542144267957732, "learning_rate": 3.882848343965314e-05, "loss": 0.2596, "step": 1241 }, { "epoch": 0.14, "grad_norm": 0.554565819302052, "learning_rate": 3.882608319334306e-05, "loss": 0.2281, "step": 1242 }, { "epoch": 0.14, "grad_norm": 0.6197919338022719, "learning_rate": 3.8823680565033884e-05, "loss": 0.2126, "step": 1243 }, { "epoch": 0.14, "grad_norm": 0.7289902927893132, "learning_rate": 3.88212755550296e-05, "loss": 0.3425, "step": 1244 }, { "epoch": 0.14, "grad_norm": 0.6156219472664214, "learning_rate": 3.881886816363451e-05, "loss": 0.2354, "step": 1245 }, { "epoch": 0.14, "grad_norm": 0.6649824614112222, "learning_rate": 3.88164583911532e-05, "loss": 0.2607, "step": 1246 }, { "epoch": 0.14, "grad_norm": 0.6089744718772897, "learning_rate": 3.881404623789059e-05, "loss": 0.2025, "step": 1247 }, { "epoch": 0.14, "grad_norm": 0.5898090377597667, "learning_rate": 3.881163170415186e-05, "loss": 0.2179, "step": 1248 }, { "epoch": 0.14, "grad_norm": 0.622741911582533, "learning_rate": 3.880921479024253e-05, "loss": 0.249, "step": 1249 }, { "epoch": 0.14, "grad_norm": 0.6799072261241708, "learning_rate": 3.88067954964684e-05, "loss": 0.2457, "step": 1250 }, { "epoch": 0.14, "grad_norm": 0.964697946504875, "learning_rate": 3.8804373823135576e-05, "loss": 0.3046, "step": 1251 }, { "epoch": 0.14, "grad_norm": 0.746965069307749, "learning_rate": 3.880194977055045e-05, "loss": 0.2465, "step": 1252 }, { "epoch": 0.14, "grad_norm": 0.668794979828834, "learning_rate": 3.879952333901975e-05, "loss": 0.3104, "step": 1253 }, { "epoch": 0.14, "grad_norm": 0.44904798496177784, "learning_rate": 3.8797094528850474e-05, "loss": 0.2025, "step": 1254 }, { "epoch": 0.14, "grad_norm": 0.5123551175554063, "learning_rate": 3.8794663340349915e-05, "loss": 0.2133, "step": 1255 }, { "epoch": 0.14, "grad_norm": 0.6448259315774166, "learning_rate": 3.8792229773825716e-05, "loss": 0.2481, "step": 1256 }, { "epoch": 0.14, "grad_norm": 0.4628615229347241, "learning_rate": 3.878979382958576e-05, "loss": 0.155, "step": 1257 }, { "epoch": 0.14, "grad_norm": 0.5933499494335127, "learning_rate": 3.878735550793827e-05, "loss": 0.2285, "step": 1258 }, { "epoch": 0.14, "grad_norm": 0.5237221584340037, "learning_rate": 3.8784914809191764e-05, "loss": 0.1666, "step": 1259 }, { "epoch": 0.14, "grad_norm": 0.6483043396047382, "learning_rate": 3.8782471733655044e-05, "loss": 0.249, "step": 1260 }, { "epoch": 0.14, "grad_norm": 0.606080736540311, "learning_rate": 3.8780026281637224e-05, "loss": 0.2158, "step": 1261 }, { "epoch": 0.14, "grad_norm": 0.6714757797818425, "learning_rate": 3.877757845344773e-05, "loss": 0.2476, "step": 1262 }, { "epoch": 0.14, "grad_norm": 0.7585959170278128, "learning_rate": 3.8775128249396265e-05, "loss": 0.2354, "step": 1263 }, { "epoch": 0.14, "grad_norm": 0.6891021699796219, "learning_rate": 3.8772675669792855e-05, "loss": 0.2292, "step": 1264 }, { "epoch": 0.14, "grad_norm": 0.7685840024289295, "learning_rate": 3.877022071494781e-05, "loss": 0.2447, "step": 1265 }, { "epoch": 0.14, "grad_norm": 0.5877327437697002, "learning_rate": 3.876776338517174e-05, "loss": 0.1985, "step": 1266 }, { "epoch": 0.14, "grad_norm": 6.847615025902723, "learning_rate": 3.876530368077558e-05, "loss": 0.2218, "step": 1267 }, { "epoch": 0.14, "grad_norm": 0.8589034433997359, "learning_rate": 3.876284160207053e-05, "loss": 0.3489, "step": 1268 }, { "epoch": 0.14, "grad_norm": 0.7512883802692587, "learning_rate": 3.876037714936813e-05, "loss": 0.2595, "step": 1269 }, { "epoch": 0.14, "grad_norm": 0.7304878436362356, "learning_rate": 3.875791032298017e-05, "loss": 0.3265, "step": 1270 }, { "epoch": 0.14, "grad_norm": 0.6648290361329455, "learning_rate": 3.875544112321879e-05, "loss": 0.281, "step": 1271 }, { "epoch": 0.14, "grad_norm": 0.7660220537397362, "learning_rate": 3.875296955039641e-05, "loss": 0.331, "step": 1272 }, { "epoch": 0.14, "grad_norm": 0.5581666073556578, "learning_rate": 3.8750495604825733e-05, "loss": 0.1946, "step": 1273 }, { "epoch": 0.14, "grad_norm": 0.5847558631668265, "learning_rate": 3.874801928681979e-05, "loss": 0.1857, "step": 1274 }, { "epoch": 0.14, "grad_norm": 0.5725991640872297, "learning_rate": 3.8745540596691905e-05, "loss": 0.2284, "step": 1275 }, { "epoch": 0.14, "grad_norm": 0.6228902015890331, "learning_rate": 3.87430595347557e-05, "loss": 0.2098, "step": 1276 }, { "epoch": 0.14, "grad_norm": 0.6430056694620289, "learning_rate": 3.874057610132508e-05, "loss": 0.2459, "step": 1277 }, { "epoch": 0.14, "grad_norm": 0.6038958391918268, "learning_rate": 3.873809029671427e-05, "loss": 0.2406, "step": 1278 }, { "epoch": 0.14, "grad_norm": 0.6131309519487615, "learning_rate": 3.87356021212378e-05, "loss": 0.2314, "step": 1279 }, { "epoch": 0.14, "grad_norm": 0.6606580418504284, "learning_rate": 3.873311157521048e-05, "loss": 0.2363, "step": 1280 }, { "epoch": 0.14, "grad_norm": 0.8186205171889596, "learning_rate": 3.873061865894744e-05, "loss": 0.3057, "step": 1281 }, { "epoch": 0.14, "grad_norm": 0.7306612138608514, "learning_rate": 3.8728123372764085e-05, "loss": 0.2202, "step": 1282 }, { "epoch": 0.14, "grad_norm": 0.5884113476938773, "learning_rate": 3.872562571697615e-05, "loss": 0.2006, "step": 1283 }, { "epoch": 0.14, "grad_norm": 0.49841716689009385, "learning_rate": 3.8723125691899646e-05, "loss": 0.1889, "step": 1284 }, { "epoch": 0.14, "grad_norm": 0.5397632853581106, "learning_rate": 3.8720623297850895e-05, "loss": 0.1834, "step": 1285 }, { "epoch": 0.14, "grad_norm": 0.7106599370710558, "learning_rate": 3.871811853514652e-05, "loss": 0.2299, "step": 1286 }, { "epoch": 0.14, "grad_norm": 0.7194856941969401, "learning_rate": 3.871561140410343e-05, "loss": 0.2389, "step": 1287 }, { "epoch": 0.14, "grad_norm": 0.6744908708614878, "learning_rate": 3.871310190503886e-05, "loss": 0.2422, "step": 1288 }, { "epoch": 0.14, "grad_norm": 0.6636175123271209, "learning_rate": 3.8710590038270305e-05, "loss": 0.2576, "step": 1289 }, { "epoch": 0.14, "grad_norm": 0.7140115782362624, "learning_rate": 3.870807580411561e-05, "loss": 0.2728, "step": 1290 }, { "epoch": 0.14, "grad_norm": 0.7325567095940773, "learning_rate": 3.870555920289287e-05, "loss": 0.2456, "step": 1291 }, { "epoch": 0.14, "grad_norm": 0.7164992876282243, "learning_rate": 3.870304023492051e-05, "loss": 0.3254, "step": 1292 }, { "epoch": 0.14, "grad_norm": 0.4950636133105119, "learning_rate": 3.870051890051725e-05, "loss": 0.2218, "step": 1293 }, { "epoch": 0.14, "grad_norm": 0.5366939533396958, "learning_rate": 3.8697995200002105e-05, "loss": 0.2285, "step": 1294 }, { "epoch": 0.14, "grad_norm": 0.6601583040645628, "learning_rate": 3.8695469133694384e-05, "loss": 0.2085, "step": 1295 }, { "epoch": 0.14, "grad_norm": 0.6514617759736432, "learning_rate": 3.8692940701913706e-05, "loss": 0.271, "step": 1296 }, { "epoch": 0.14, "grad_norm": 0.6292324286528318, "learning_rate": 3.869040990497998e-05, "loss": 0.19, "step": 1297 }, { "epoch": 0.14, "grad_norm": 0.645190370100445, "learning_rate": 3.868787674321343e-05, "loss": 0.2397, "step": 1298 }, { "epoch": 0.14, "grad_norm": 0.6137624662807509, "learning_rate": 3.868534121693455e-05, "loss": 0.2095, "step": 1299 }, { "epoch": 0.14, "grad_norm": 0.603864895883944, "learning_rate": 3.868280332646417e-05, "loss": 0.2651, "step": 1300 }, { "epoch": 0.14, "grad_norm": 0.5939647135753702, "learning_rate": 3.868026307212339e-05, "loss": 0.2023, "step": 1301 }, { "epoch": 0.14, "grad_norm": 0.587893514531027, "learning_rate": 3.867772045423362e-05, "loss": 0.2385, "step": 1302 }, { "epoch": 0.14, "grad_norm": 0.628995782390761, "learning_rate": 3.867517547311657e-05, "loss": 0.2385, "step": 1303 }, { "epoch": 0.14, "grad_norm": 0.670993803706319, "learning_rate": 3.8672628129094255e-05, "loss": 0.2629, "step": 1304 }, { "epoch": 0.14, "grad_norm": 0.5956606990527377, "learning_rate": 3.867007842248897e-05, "loss": 0.2059, "step": 1305 }, { "epoch": 0.14, "grad_norm": 0.6449072463596339, "learning_rate": 3.8667526353623326e-05, "loss": 0.2044, "step": 1306 }, { "epoch": 0.14, "grad_norm": 0.7756324602332358, "learning_rate": 3.866497192282023e-05, "loss": 0.2972, "step": 1307 }, { "epoch": 0.14, "grad_norm": 0.5729374766182814, "learning_rate": 3.866241513040288e-05, "loss": 0.1879, "step": 1308 }, { "epoch": 0.14, "grad_norm": 0.6493318204503121, "learning_rate": 3.865985597669478e-05, "loss": 0.1981, "step": 1309 }, { "epoch": 0.14, "grad_norm": 0.9166138084661662, "learning_rate": 3.8657294462019735e-05, "loss": 0.3181, "step": 1310 }, { "epoch": 0.14, "grad_norm": 0.6929765315946769, "learning_rate": 3.865473058670183e-05, "loss": 0.2, "step": 1311 }, { "epoch": 0.14, "grad_norm": 0.80787036622088, "learning_rate": 3.865216435106549e-05, "loss": 0.2661, "step": 1312 }, { "epoch": 0.14, "grad_norm": 0.5621348891521877, "learning_rate": 3.8649595755435383e-05, "loss": 0.1811, "step": 1313 }, { "epoch": 0.14, "grad_norm": 0.5045417799667435, "learning_rate": 3.8647024800136524e-05, "loss": 0.1838, "step": 1314 }, { "epoch": 0.14, "grad_norm": 0.683540603751585, "learning_rate": 3.86444514854942e-05, "loss": 0.1961, "step": 1315 }, { "epoch": 0.14, "grad_norm": 0.6355911755259537, "learning_rate": 3.8641875811834004e-05, "loss": 0.2462, "step": 1316 }, { "epoch": 0.14, "grad_norm": 0.5304297678673695, "learning_rate": 3.8639297779481825e-05, "loss": 0.194, "step": 1317 }, { "epoch": 0.14, "grad_norm": 0.5986492658784093, "learning_rate": 3.863671738876385e-05, "loss": 0.2387, "step": 1318 }, { "epoch": 0.14, "grad_norm": 0.528911238946868, "learning_rate": 3.8634134640006585e-05, "loss": 0.1738, "step": 1319 }, { "epoch": 0.14, "grad_norm": 0.7341278553106145, "learning_rate": 3.863154953353679e-05, "loss": 0.2954, "step": 1320 }, { "epoch": 0.15, "grad_norm": 0.6440245052348925, "learning_rate": 3.862896206968157e-05, "loss": 0.2334, "step": 1321 }, { "epoch": 0.15, "grad_norm": 0.6774699989939744, "learning_rate": 3.8626372248768295e-05, "loss": 0.2287, "step": 1322 }, { "epoch": 0.15, "grad_norm": 0.9057785649545252, "learning_rate": 3.862378007112465e-05, "loss": 0.3634, "step": 1323 }, { "epoch": 0.15, "grad_norm": 0.7203096385576256, "learning_rate": 3.862118553707863e-05, "loss": 0.2639, "step": 1324 }, { "epoch": 0.15, "grad_norm": 0.7933059877269436, "learning_rate": 3.8618588646958483e-05, "loss": 0.3027, "step": 1325 }, { "epoch": 0.15, "grad_norm": 0.6650534696880952, "learning_rate": 3.86159894010928e-05, "loss": 0.2746, "step": 1326 }, { "epoch": 0.15, "grad_norm": 0.43670280236348946, "learning_rate": 3.8613387799810464e-05, "loss": 0.1619, "step": 1327 }, { "epoch": 0.15, "grad_norm": 0.5615618617967515, "learning_rate": 3.8610783843440626e-05, "loss": 0.163, "step": 1328 }, { "epoch": 0.15, "grad_norm": 0.629366025434526, "learning_rate": 3.860817753231278e-05, "loss": 0.214, "step": 1329 }, { "epoch": 0.15, "grad_norm": 0.44901373787616894, "learning_rate": 3.8605568866756666e-05, "loss": 0.1559, "step": 1330 }, { "epoch": 0.15, "grad_norm": 0.5633414121289285, "learning_rate": 3.860295784710237e-05, "loss": 0.2219, "step": 1331 }, { "epoch": 0.15, "grad_norm": 0.5336170755540967, "learning_rate": 3.860034447368024e-05, "loss": 0.2078, "step": 1332 }, { "epoch": 0.15, "grad_norm": 0.7093887226945248, "learning_rate": 3.8597728746820955e-05, "loss": 0.2702, "step": 1333 }, { "epoch": 0.15, "grad_norm": 0.8586704834239652, "learning_rate": 3.8595110666855466e-05, "loss": 0.3336, "step": 1334 }, { "epoch": 0.15, "grad_norm": 0.7121096579111179, "learning_rate": 3.859249023411503e-05, "loss": 0.2298, "step": 1335 }, { "epoch": 0.15, "grad_norm": 0.6523963313126454, "learning_rate": 3.858986744893119e-05, "loss": 0.2655, "step": 1336 }, { "epoch": 0.15, "grad_norm": 0.7057387408818833, "learning_rate": 3.8587242311635805e-05, "loss": 0.2454, "step": 1337 }, { "epoch": 0.15, "grad_norm": 0.5975010739001595, "learning_rate": 3.858461482256103e-05, "loss": 0.2242, "step": 1338 }, { "epoch": 0.15, "grad_norm": 0.7903046603585147, "learning_rate": 3.858198498203931e-05, "loss": 0.2764, "step": 1339 }, { "epoch": 0.15, "grad_norm": 0.6611386725702717, "learning_rate": 3.8579352790403395e-05, "loss": 0.1945, "step": 1340 }, { "epoch": 0.15, "grad_norm": 0.644720015193557, "learning_rate": 3.857671824798631e-05, "loss": 0.2159, "step": 1341 }, { "epoch": 0.15, "grad_norm": 0.6836654189551152, "learning_rate": 3.857408135512142e-05, "loss": 0.2078, "step": 1342 }, { "epoch": 0.15, "grad_norm": 0.5925070149774991, "learning_rate": 3.857144211214233e-05, "loss": 0.2131, "step": 1343 }, { "epoch": 0.15, "grad_norm": 0.6736078759298164, "learning_rate": 3.8568800519383e-05, "loss": 0.2433, "step": 1344 }, { "epoch": 0.15, "grad_norm": 0.6608968113488545, "learning_rate": 3.856615657717765e-05, "loss": 0.205, "step": 1345 }, { "epoch": 0.15, "grad_norm": 0.5574863367159772, "learning_rate": 3.856351028586082e-05, "loss": 0.162, "step": 1346 }, { "epoch": 0.15, "grad_norm": 0.5952551986871059, "learning_rate": 3.856086164576731e-05, "loss": 0.2107, "step": 1347 }, { "epoch": 0.15, "grad_norm": 0.6002483132246118, "learning_rate": 3.855821065723228e-05, "loss": 0.2072, "step": 1348 }, { "epoch": 0.15, "grad_norm": 0.7489851602375327, "learning_rate": 3.8555557320591125e-05, "loss": 0.3085, "step": 1349 }, { "epoch": 0.15, "grad_norm": 0.5825581821559521, "learning_rate": 3.855290163617956e-05, "loss": 0.1934, "step": 1350 }, { "epoch": 0.15, "grad_norm": 0.5336214318083551, "learning_rate": 3.855024360433362e-05, "loss": 0.1938, "step": 1351 }, { "epoch": 0.15, "grad_norm": 0.5675469323636004, "learning_rate": 3.8547583225389596e-05, "loss": 0.2049, "step": 1352 }, { "epoch": 0.15, "grad_norm": 0.8589140956521026, "learning_rate": 3.8544920499684115e-05, "loss": 0.3192, "step": 1353 }, { "epoch": 0.15, "grad_norm": 0.5594412769713898, "learning_rate": 3.8542255427554065e-05, "loss": 0.2138, "step": 1354 }, { "epoch": 0.15, "grad_norm": 0.6392457116482576, "learning_rate": 3.853958800933666e-05, "loss": 0.3116, "step": 1355 }, { "epoch": 0.15, "grad_norm": 0.5834558449115287, "learning_rate": 3.85369182453694e-05, "loss": 0.23, "step": 1356 }, { "epoch": 0.15, "grad_norm": 0.564808136594211, "learning_rate": 3.853424613599007e-05, "loss": 0.1764, "step": 1357 }, { "epoch": 0.15, "grad_norm": 0.47273115084798006, "learning_rate": 3.853157168153677e-05, "loss": 0.1842, "step": 1358 }, { "epoch": 0.15, "grad_norm": 0.679784000605634, "learning_rate": 3.852889488234789e-05, "loss": 0.2744, "step": 1359 }, { "epoch": 0.15, "grad_norm": 0.45843104021842784, "learning_rate": 3.852621573876212e-05, "loss": 0.163, "step": 1360 }, { "epoch": 0.15, "grad_norm": 0.5392846465212989, "learning_rate": 3.8523534251118426e-05, "loss": 0.2324, "step": 1361 }, { "epoch": 0.15, "grad_norm": 0.645274617928227, "learning_rate": 3.8520850419756104e-05, "loss": 0.2286, "step": 1362 }, { "epoch": 0.15, "grad_norm": 0.8955659837039383, "learning_rate": 3.851816424501473e-05, "loss": 0.3426, "step": 1363 }, { "epoch": 0.15, "grad_norm": 0.5718774097162559, "learning_rate": 3.851547572723416e-05, "loss": 0.1985, "step": 1364 }, { "epoch": 0.15, "grad_norm": 0.6350843363047664, "learning_rate": 3.851278486675459e-05, "loss": 0.1971, "step": 1365 }, { "epoch": 0.15, "grad_norm": 0.6870865662450322, "learning_rate": 3.851009166391646e-05, "loss": 0.2976, "step": 1366 }, { "epoch": 0.15, "grad_norm": 0.6039437315685586, "learning_rate": 3.850739611906054e-05, "loss": 0.2193, "step": 1367 }, { "epoch": 0.15, "grad_norm": 0.6965816359289543, "learning_rate": 3.850469823252789e-05, "loss": 0.2746, "step": 1368 }, { "epoch": 0.15, "grad_norm": 0.5924091005010735, "learning_rate": 3.850199800465987e-05, "loss": 0.1989, "step": 1369 }, { "epoch": 0.15, "grad_norm": 0.5739712603498505, "learning_rate": 3.849929543579812e-05, "loss": 0.1996, "step": 1370 }, { "epoch": 0.15, "grad_norm": 0.6416951289820902, "learning_rate": 3.849659052628459e-05, "loss": 0.1992, "step": 1371 }, { "epoch": 0.15, "grad_norm": 0.6425116600284796, "learning_rate": 3.849388327646152e-05, "loss": 0.2582, "step": 1372 }, { "epoch": 0.15, "grad_norm": 0.5995535560496725, "learning_rate": 3.849117368667145e-05, "loss": 0.1936, "step": 1373 }, { "epoch": 0.15, "grad_norm": 0.7262496274595898, "learning_rate": 3.848846175725722e-05, "loss": 0.2981, "step": 1374 }, { "epoch": 0.15, "grad_norm": 0.520994635082087, "learning_rate": 3.848574748856196e-05, "loss": 0.2027, "step": 1375 }, { "epoch": 0.15, "grad_norm": 0.6204295895855063, "learning_rate": 3.84830308809291e-05, "loss": 0.2178, "step": 1376 }, { "epoch": 0.15, "grad_norm": 0.5023853207676777, "learning_rate": 3.848031193470234e-05, "loss": 0.1797, "step": 1377 }, { "epoch": 0.15, "grad_norm": 0.5656745657094889, "learning_rate": 3.8477590650225735e-05, "loss": 0.1695, "step": 1378 }, { "epoch": 0.15, "grad_norm": 0.5448724047447405, "learning_rate": 3.847486702784358e-05, "loss": 0.1843, "step": 1379 }, { "epoch": 0.15, "grad_norm": 0.6005323432642046, "learning_rate": 3.8472141067900485e-05, "loss": 0.2546, "step": 1380 }, { "epoch": 0.15, "grad_norm": 0.5501483229564694, "learning_rate": 3.8469412770741355e-05, "loss": 0.2079, "step": 1381 }, { "epoch": 0.15, "grad_norm": 0.5533484293226613, "learning_rate": 3.84666821367114e-05, "loss": 0.2008, "step": 1382 }, { "epoch": 0.15, "grad_norm": 0.6177398286566418, "learning_rate": 3.846394916615611e-05, "loss": 0.1941, "step": 1383 }, { "epoch": 0.15, "grad_norm": 0.6260502573990595, "learning_rate": 3.846121385942128e-05, "loss": 0.243, "step": 1384 }, { "epoch": 0.15, "grad_norm": 0.7802480757963685, "learning_rate": 3.8458476216853e-05, "loss": 0.2483, "step": 1385 }, { "epoch": 0.15, "grad_norm": 0.497857360686356, "learning_rate": 3.845573623879766e-05, "loss": 0.15, "step": 1386 }, { "epoch": 0.15, "grad_norm": 0.647615969409576, "learning_rate": 3.8452993925601935e-05, "loss": 0.2322, "step": 1387 }, { "epoch": 0.15, "grad_norm": 0.7513537904255497, "learning_rate": 3.845024927761279e-05, "loss": 0.2723, "step": 1388 }, { "epoch": 0.15, "grad_norm": 0.5922225579425241, "learning_rate": 3.844750229517751e-05, "loss": 0.1969, "step": 1389 }, { "epoch": 0.15, "grad_norm": 0.6564419329487305, "learning_rate": 3.844475297864366e-05, "loss": 0.1896, "step": 1390 }, { "epoch": 0.15, "grad_norm": 0.5620477765909019, "learning_rate": 3.84420013283591e-05, "loss": 0.1986, "step": 1391 }, { "epoch": 0.15, "grad_norm": 0.7264961704617914, "learning_rate": 3.843924734467199e-05, "loss": 0.2349, "step": 1392 }, { "epoch": 0.15, "grad_norm": 0.6200766482581344, "learning_rate": 3.843649102793077e-05, "loss": 0.2405, "step": 1393 }, { "epoch": 0.15, "grad_norm": 0.5409938931834414, "learning_rate": 3.843373237848419e-05, "loss": 0.1651, "step": 1394 }, { "epoch": 0.15, "grad_norm": 0.7545101928932502, "learning_rate": 3.843097139668131e-05, "loss": 0.2639, "step": 1395 }, { "epoch": 0.15, "grad_norm": 0.5616662946675293, "learning_rate": 3.842820808287144e-05, "loss": 0.1942, "step": 1396 }, { "epoch": 0.15, "grad_norm": 0.6924023968465035, "learning_rate": 3.842544243740424e-05, "loss": 0.2813, "step": 1397 }, { "epoch": 0.15, "grad_norm": 0.5803282227489259, "learning_rate": 3.842267446062962e-05, "loss": 0.1823, "step": 1398 }, { "epoch": 0.15, "grad_norm": 0.5079350837057682, "learning_rate": 3.84199041528978e-05, "loss": 0.1484, "step": 1399 }, { "epoch": 0.15, "grad_norm": 0.6176581941897777, "learning_rate": 3.841713151455931e-05, "loss": 0.2529, "step": 1400 }, { "epoch": 0.15, "grad_norm": 0.5198121422943797, "learning_rate": 3.8414356545964955e-05, "loss": 0.2114, "step": 1401 }, { "epoch": 0.15, "grad_norm": 0.5765381763982369, "learning_rate": 3.8411579247465845e-05, "loss": 0.2078, "step": 1402 }, { "epoch": 0.15, "grad_norm": 0.5100605429448324, "learning_rate": 3.840879961941338e-05, "loss": 0.195, "step": 1403 }, { "epoch": 0.15, "grad_norm": 0.5536489569742455, "learning_rate": 3.840601766215926e-05, "loss": 0.1969, "step": 1404 }, { "epoch": 0.15, "grad_norm": 0.5270076952504538, "learning_rate": 3.840323337605548e-05, "loss": 0.1979, "step": 1405 }, { "epoch": 0.15, "grad_norm": 0.6129683407002526, "learning_rate": 3.840044676145431e-05, "loss": 0.2249, "step": 1406 }, { "epoch": 0.15, "grad_norm": 0.5335944001178495, "learning_rate": 3.839765781870834e-05, "loss": 0.1946, "step": 1407 }, { "epoch": 0.15, "grad_norm": 0.5991831703017667, "learning_rate": 3.839486654817045e-05, "loss": 0.2184, "step": 1408 }, { "epoch": 0.15, "grad_norm": 0.6384652179633203, "learning_rate": 3.8392072950193806e-05, "loss": 0.2342, "step": 1409 }, { "epoch": 0.15, "grad_norm": 0.6515416679436807, "learning_rate": 3.838927702513187e-05, "loss": 0.1825, "step": 1410 }, { "epoch": 0.15, "grad_norm": 0.6086950176247553, "learning_rate": 3.8386478773338407e-05, "loss": 0.1963, "step": 1411 }, { "epoch": 0.16, "grad_norm": 0.5317600830764958, "learning_rate": 3.838367819516746e-05, "loss": 0.1645, "step": 1412 }, { "epoch": 0.16, "grad_norm": 0.5675356424372892, "learning_rate": 3.8380875290973384e-05, "loss": 0.1917, "step": 1413 }, { "epoch": 0.16, "grad_norm": 0.755198976305292, "learning_rate": 3.837807006111082e-05, "loss": 0.269, "step": 1414 }, { "epoch": 0.16, "grad_norm": 0.597506834813055, "learning_rate": 3.837526250593471e-05, "loss": 0.1558, "step": 1415 }, { "epoch": 0.16, "grad_norm": 0.6141618856902178, "learning_rate": 3.837245262580027e-05, "loss": 0.2322, "step": 1416 }, { "epoch": 0.16, "grad_norm": 0.7328624570040942, "learning_rate": 3.8369640421063035e-05, "loss": 0.1851, "step": 1417 }, { "epoch": 0.16, "grad_norm": 0.5636229961137138, "learning_rate": 3.836682589207882e-05, "loss": 0.1771, "step": 1418 }, { "epoch": 0.16, "grad_norm": 0.5690682306896664, "learning_rate": 3.836400903920373e-05, "loss": 0.2086, "step": 1419 }, { "epoch": 0.16, "grad_norm": 0.7196154566892891, "learning_rate": 3.836118986279419e-05, "loss": 0.2744, "step": 1420 }, { "epoch": 0.16, "grad_norm": 0.651059674331848, "learning_rate": 3.835836836320689e-05, "loss": 0.2084, "step": 1421 }, { "epoch": 0.16, "grad_norm": 0.6362643346314623, "learning_rate": 3.835554454079882e-05, "loss": 0.2203, "step": 1422 }, { "epoch": 0.16, "grad_norm": 0.5161123284770505, "learning_rate": 3.835271839592727e-05, "loss": 0.2167, "step": 1423 }, { "epoch": 0.16, "grad_norm": 0.5383637640484493, "learning_rate": 3.834988992894983e-05, "loss": 0.1836, "step": 1424 }, { "epoch": 0.16, "grad_norm": 0.6469490709144389, "learning_rate": 3.834705914022437e-05, "loss": 0.2632, "step": 1425 }, { "epoch": 0.16, "grad_norm": 0.5233903835214193, "learning_rate": 3.834422603010906e-05, "loss": 0.1933, "step": 1426 }, { "epoch": 0.16, "grad_norm": 0.5650177082308085, "learning_rate": 3.834139059896237e-05, "loss": 0.2063, "step": 1427 }, { "epoch": 0.16, "grad_norm": 0.5327804622918416, "learning_rate": 3.833855284714305e-05, "loss": 0.2033, "step": 1428 }, { "epoch": 0.16, "grad_norm": 0.6364780275218678, "learning_rate": 3.8335712775010155e-05, "loss": 0.2276, "step": 1429 }, { "epoch": 0.16, "grad_norm": 0.7360250319728143, "learning_rate": 3.833287038292303e-05, "loss": 0.2505, "step": 1430 }, { "epoch": 0.16, "grad_norm": 0.5623256890103914, "learning_rate": 3.8330025671241304e-05, "loss": 0.1774, "step": 1431 }, { "epoch": 0.16, "grad_norm": 0.625355929115744, "learning_rate": 3.832717864032492e-05, "loss": 0.2196, "step": 1432 }, { "epoch": 0.16, "grad_norm": 0.5848711610404335, "learning_rate": 3.83243292905341e-05, "loss": 0.201, "step": 1433 }, { "epoch": 0.16, "grad_norm": 0.6313675289463488, "learning_rate": 3.832147762222936e-05, "loss": 0.2112, "step": 1434 }, { "epoch": 0.16, "grad_norm": 0.5584681976710097, "learning_rate": 3.831862363577151e-05, "loss": 0.1603, "step": 1435 }, { "epoch": 0.16, "grad_norm": 0.619959869854945, "learning_rate": 3.8315767331521655e-05, "loss": 0.2016, "step": 1436 }, { "epoch": 0.16, "grad_norm": 0.7049163040910001, "learning_rate": 3.83129087098412e-05, "loss": 0.2849, "step": 1437 }, { "epoch": 0.16, "grad_norm": 0.6509255623604149, "learning_rate": 3.831004777109183e-05, "loss": 0.2381, "step": 1438 }, { "epoch": 0.16, "grad_norm": 0.6440643931679848, "learning_rate": 3.830718451563553e-05, "loss": 0.2237, "step": 1439 }, { "epoch": 0.16, "grad_norm": 0.535347368404155, "learning_rate": 3.8304318943834584e-05, "loss": 0.2123, "step": 1440 }, { "epoch": 0.16, "grad_norm": 0.5214642010047621, "learning_rate": 3.830145105605156e-05, "loss": 0.2057, "step": 1441 }, { "epoch": 0.16, "grad_norm": 0.5755475816221367, "learning_rate": 3.8298580852649316e-05, "loss": 0.2069, "step": 1442 }, { "epoch": 0.16, "grad_norm": 0.5464044453845005, "learning_rate": 3.8295708333991015e-05, "loss": 0.171, "step": 1443 }, { "epoch": 0.16, "grad_norm": 0.4922906979349393, "learning_rate": 3.82928335004401e-05, "loss": 0.2265, "step": 1444 }, { "epoch": 0.16, "grad_norm": 0.5361928437190463, "learning_rate": 3.8289956352360326e-05, "loss": 0.2056, "step": 1445 }, { "epoch": 0.16, "grad_norm": 0.6365842852752848, "learning_rate": 3.828707689011572e-05, "loss": 0.2376, "step": 1446 }, { "epoch": 0.16, "grad_norm": 0.5581368131337875, "learning_rate": 3.828419511407062e-05, "loss": 0.1818, "step": 1447 }, { "epoch": 0.16, "grad_norm": 0.6162437273058337, "learning_rate": 3.828131102458962e-05, "loss": 0.2223, "step": 1448 }, { "epoch": 0.16, "grad_norm": 0.7446113759463118, "learning_rate": 3.8278424622037674e-05, "loss": 0.1992, "step": 1449 }, { "epoch": 0.16, "grad_norm": 0.6518321515317013, "learning_rate": 3.827553590677996e-05, "loss": 0.1876, "step": 1450 }, { "epoch": 0.16, "grad_norm": 0.4870119304820982, "learning_rate": 3.827264487918198e-05, "loss": 0.2002, "step": 1451 }, { "epoch": 0.16, "grad_norm": 0.5429456417737526, "learning_rate": 3.8269751539609525e-05, "loss": 0.1738, "step": 1452 }, { "epoch": 0.16, "grad_norm": 0.7087697106098125, "learning_rate": 3.8266855888428696e-05, "loss": 0.2639, "step": 1453 }, { "epoch": 0.16, "grad_norm": 0.5556042040611595, "learning_rate": 3.8263957926005855e-05, "loss": 0.2325, "step": 1454 }, { "epoch": 0.16, "grad_norm": 0.6686974727643898, "learning_rate": 3.826105765270767e-05, "loss": 0.2586, "step": 1455 }, { "epoch": 0.16, "grad_norm": 0.5034636098483164, "learning_rate": 3.825815506890111e-05, "loss": 0.1693, "step": 1456 }, { "epoch": 0.16, "grad_norm": 0.5640029167766091, "learning_rate": 3.825525017495343e-05, "loss": 0.2198, "step": 1457 }, { "epoch": 0.16, "grad_norm": 0.4213213289697031, "learning_rate": 3.825234297123216e-05, "loss": 0.1295, "step": 1458 }, { "epoch": 0.16, "grad_norm": 0.5488004575658648, "learning_rate": 3.824943345810516e-05, "loss": 0.2173, "step": 1459 }, { "epoch": 0.16, "grad_norm": 0.6090241914056581, "learning_rate": 3.824652163594056e-05, "loss": 0.2189, "step": 1460 }, { "epoch": 0.16, "grad_norm": 0.6371886268968706, "learning_rate": 3.824360750510676e-05, "loss": 0.2531, "step": 1461 }, { "epoch": 0.16, "grad_norm": 0.5803139460909753, "learning_rate": 3.8240691065972486e-05, "loss": 0.1876, "step": 1462 }, { "epoch": 0.16, "grad_norm": 0.6438605315490373, "learning_rate": 3.823777231890676e-05, "loss": 0.2456, "step": 1463 }, { "epoch": 0.16, "grad_norm": 0.6280611651043215, "learning_rate": 3.823485126427886e-05, "loss": 0.2457, "step": 1464 }, { "epoch": 0.16, "grad_norm": 0.4604206743828294, "learning_rate": 3.823192790245839e-05, "loss": 0.1692, "step": 1465 }, { "epoch": 0.16, "grad_norm": 0.5265680895771866, "learning_rate": 3.822900223381522e-05, "loss": 0.1847, "step": 1466 }, { "epoch": 0.16, "grad_norm": 0.6115156136045458, "learning_rate": 3.822607425871954e-05, "loss": 0.2119, "step": 1467 }, { "epoch": 0.16, "grad_norm": 0.7013657920693921, "learning_rate": 3.8223143977541806e-05, "loss": 0.2722, "step": 1468 }, { "epoch": 0.16, "grad_norm": 0.8230019215391029, "learning_rate": 3.822021139065279e-05, "loss": 0.2843, "step": 1469 }, { "epoch": 0.16, "grad_norm": 0.6136283863958748, "learning_rate": 3.821727649842352e-05, "loss": 0.2236, "step": 1470 }, { "epoch": 0.16, "grad_norm": 0.695559147153752, "learning_rate": 3.821433930122536e-05, "loss": 0.2726, "step": 1471 }, { "epoch": 0.16, "grad_norm": 0.5096054117696908, "learning_rate": 3.821139979942992e-05, "loss": 0.1532, "step": 1472 }, { "epoch": 0.16, "grad_norm": 0.5443504920111407, "learning_rate": 3.820845799340915e-05, "loss": 0.2026, "step": 1473 }, { "epoch": 0.16, "grad_norm": 0.6468763286927072, "learning_rate": 3.820551388353525e-05, "loss": 0.2244, "step": 1474 }, { "epoch": 0.16, "grad_norm": 0.6628971420983781, "learning_rate": 3.8202567470180734e-05, "loss": 0.236, "step": 1475 }, { "epoch": 0.16, "grad_norm": 0.9479873581960843, "learning_rate": 3.819961875371839e-05, "loss": 0.2186, "step": 1476 }, { "epoch": 0.16, "grad_norm": 0.5568327187478903, "learning_rate": 3.819666773452133e-05, "loss": 0.1855, "step": 1477 }, { "epoch": 0.16, "grad_norm": 0.6491061979524193, "learning_rate": 3.819371441296292e-05, "loss": 0.2201, "step": 1478 }, { "epoch": 0.16, "grad_norm": 0.6765599257351926, "learning_rate": 3.819075878941685e-05, "loss": 0.2224, "step": 1479 }, { "epoch": 0.16, "grad_norm": 0.5868176271550007, "learning_rate": 3.8187800864257065e-05, "loss": 0.2025, "step": 1480 }, { "epoch": 0.16, "grad_norm": 0.6483809664797203, "learning_rate": 3.818484063785783e-05, "loss": 0.1787, "step": 1481 }, { "epoch": 0.16, "grad_norm": 0.607151965064967, "learning_rate": 3.818187811059369e-05, "loss": 0.2227, "step": 1482 }, { "epoch": 0.16, "grad_norm": 0.6682745919331851, "learning_rate": 3.817891328283949e-05, "loss": 0.2553, "step": 1483 }, { "epoch": 0.16, "grad_norm": 0.6850585284950187, "learning_rate": 3.817594615497035e-05, "loss": 0.2702, "step": 1484 }, { "epoch": 0.16, "grad_norm": 0.6719758379472411, "learning_rate": 3.817297672736169e-05, "loss": 0.2198, "step": 1485 }, { "epoch": 0.16, "grad_norm": 0.6036882616546373, "learning_rate": 3.817000500038924e-05, "loss": 0.2197, "step": 1486 }, { "epoch": 0.16, "grad_norm": 0.7633477032723397, "learning_rate": 3.816703097442898e-05, "loss": 0.2201, "step": 1487 }, { "epoch": 0.16, "grad_norm": 0.48237705408604487, "learning_rate": 3.8164054649857206e-05, "loss": 0.1883, "step": 1488 }, { "epoch": 0.16, "grad_norm": 0.6028399095093094, "learning_rate": 3.816107602705052e-05, "loss": 0.2148, "step": 1489 }, { "epoch": 0.16, "grad_norm": 0.5465009500036333, "learning_rate": 3.815809510638578e-05, "loss": 0.1569, "step": 1490 }, { "epoch": 0.16, "grad_norm": 0.9283049104015987, "learning_rate": 3.815511188824016e-05, "loss": 0.3973, "step": 1491 }, { "epoch": 0.16, "grad_norm": 0.544770195746384, "learning_rate": 3.81521263729911e-05, "loss": 0.1509, "step": 1492 }, { "epoch": 0.16, "grad_norm": 0.4282058138346209, "learning_rate": 3.8149138561016377e-05, "loss": 0.1703, "step": 1493 }, { "epoch": 0.16, "grad_norm": 0.5242653463606947, "learning_rate": 3.8146148452694e-05, "loss": 0.1906, "step": 1494 }, { "epoch": 0.16, "grad_norm": 0.5849806538667813, "learning_rate": 3.814315604840231e-05, "loss": 0.2413, "step": 1495 }, { "epoch": 0.16, "grad_norm": 0.5687919622179289, "learning_rate": 3.8140161348519924e-05, "loss": 0.225, "step": 1496 }, { "epoch": 0.16, "grad_norm": 0.5605160641592873, "learning_rate": 3.813716435342575e-05, "loss": 0.2032, "step": 1497 }, { "epoch": 0.16, "grad_norm": 0.5818536730749441, "learning_rate": 3.813416506349899e-05, "loss": 0.1825, "step": 1498 }, { "epoch": 0.16, "grad_norm": 0.5871388136741383, "learning_rate": 3.813116347911913e-05, "loss": 0.212, "step": 1499 }, { "epoch": 0.16, "grad_norm": 0.5426386097095636, "learning_rate": 3.8128159600665954e-05, "loss": 0.1559, "step": 1500 }, { "epoch": 0.16, "grad_norm": 0.5433230444844735, "learning_rate": 3.812515342851953e-05, "loss": 0.1368, "step": 1501 }, { "epoch": 0.16, "grad_norm": 0.6322249522408151, "learning_rate": 3.812214496306022e-05, "loss": 0.2408, "step": 1502 }, { "epoch": 0.17, "grad_norm": 0.6238523436773569, "learning_rate": 3.8119134204668665e-05, "loss": 0.1961, "step": 1503 }, { "epoch": 0.17, "grad_norm": 0.5798299239167037, "learning_rate": 3.8116121153725824e-05, "loss": 0.211, "step": 1504 }, { "epoch": 0.17, "grad_norm": 0.6820964655333405, "learning_rate": 3.811310581061291e-05, "loss": 0.2239, "step": 1505 }, { "epoch": 0.17, "grad_norm": 0.6070979195882269, "learning_rate": 3.8110088175711456e-05, "loss": 0.2033, "step": 1506 }, { "epoch": 0.17, "grad_norm": 0.5318706477308779, "learning_rate": 3.8107068249403267e-05, "loss": 0.2144, "step": 1507 }, { "epoch": 0.17, "grad_norm": 0.6492636370146587, "learning_rate": 3.810404603207045e-05, "loss": 0.2298, "step": 1508 }, { "epoch": 0.17, "grad_norm": 0.7436351911904817, "learning_rate": 3.810102152409539e-05, "loss": 0.2862, "step": 1509 }, { "epoch": 0.17, "grad_norm": 0.6365678760599308, "learning_rate": 3.809799472586077e-05, "loss": 0.227, "step": 1510 }, { "epoch": 0.17, "grad_norm": 0.7379404320776938, "learning_rate": 3.8094965637749556e-05, "loss": 0.3129, "step": 1511 }, { "epoch": 0.17, "grad_norm": 0.550846258344613, "learning_rate": 3.809193426014501e-05, "loss": 0.1946, "step": 1512 }, { "epoch": 0.17, "grad_norm": 0.5805506724157818, "learning_rate": 3.808890059343069e-05, "loss": 0.19, "step": 1513 }, { "epoch": 0.17, "grad_norm": 0.5085162055322425, "learning_rate": 3.808586463799042e-05, "loss": 0.1545, "step": 1514 }, { "epoch": 0.17, "grad_norm": 0.5153072996785115, "learning_rate": 3.808282639420834e-05, "loss": 0.2043, "step": 1515 }, { "epoch": 0.17, "grad_norm": 0.5088192863053964, "learning_rate": 3.807978586246887e-05, "loss": 0.1531, "step": 1516 }, { "epoch": 0.17, "grad_norm": 0.5022200828194179, "learning_rate": 3.807674304315671e-05, "loss": 0.1609, "step": 1517 }, { "epoch": 0.17, "grad_norm": 0.49465972412747927, "learning_rate": 3.8073697936656866e-05, "loss": 0.1758, "step": 1518 }, { "epoch": 0.17, "grad_norm": 0.5887060498478996, "learning_rate": 3.807065054335461e-05, "loss": 0.2244, "step": 1519 }, { "epoch": 0.17, "grad_norm": 0.6424893952767317, "learning_rate": 3.806760086363554e-05, "loss": 0.2094, "step": 1520 }, { "epoch": 0.17, "grad_norm": 0.6669745156668481, "learning_rate": 3.8064548897885505e-05, "loss": 0.2297, "step": 1521 }, { "epoch": 0.17, "grad_norm": 0.5918228596940724, "learning_rate": 3.806149464649066e-05, "loss": 0.2126, "step": 1522 }, { "epoch": 0.17, "grad_norm": 0.5942133551128759, "learning_rate": 3.805843810983745e-05, "loss": 0.178, "step": 1523 }, { "epoch": 0.17, "grad_norm": 0.5799344588112726, "learning_rate": 3.8055379288312625e-05, "loss": 0.1831, "step": 1524 }, { "epoch": 0.17, "grad_norm": 0.6139264934073163, "learning_rate": 3.805231818230318e-05, "loss": 0.2034, "step": 1525 }, { "epoch": 0.17, "grad_norm": 0.8576155250514091, "learning_rate": 3.8049254792196443e-05, "loss": 0.2191, "step": 1526 }, { "epoch": 0.17, "grad_norm": 0.5857760397658246, "learning_rate": 3.804618911838001e-05, "loss": 0.2073, "step": 1527 }, { "epoch": 0.17, "grad_norm": 0.6263960029675492, "learning_rate": 3.804312116124177e-05, "loss": 0.211, "step": 1528 }, { "epoch": 0.17, "grad_norm": 0.6773208763897873, "learning_rate": 3.80400509211699e-05, "loss": 0.241, "step": 1529 }, { "epoch": 0.17, "grad_norm": 0.6706675836103072, "learning_rate": 3.8036978398552876e-05, "loss": 0.2997, "step": 1530 }, { "epoch": 0.17, "grad_norm": 0.5079727506876048, "learning_rate": 3.803390359377944e-05, "loss": 0.1758, "step": 1531 }, { "epoch": 0.17, "grad_norm": 0.5475943058261229, "learning_rate": 3.803082650723864e-05, "loss": 0.1848, "step": 1532 }, { "epoch": 0.17, "grad_norm": 0.48245659212915665, "learning_rate": 3.802774713931982e-05, "loss": 0.1871, "step": 1533 }, { "epoch": 0.17, "grad_norm": 0.5889031951169206, "learning_rate": 3.802466549041258e-05, "loss": 0.261, "step": 1534 }, { "epoch": 0.17, "grad_norm": 0.6305125792063027, "learning_rate": 3.802158156090685e-05, "loss": 0.2286, "step": 1535 }, { "epoch": 0.17, "grad_norm": 0.587873312863154, "learning_rate": 3.8018495351192825e-05, "loss": 0.2003, "step": 1536 }, { "epoch": 0.17, "grad_norm": 0.7375998203160486, "learning_rate": 3.801540686166099e-05, "loss": 0.2746, "step": 1537 }, { "epoch": 0.17, "grad_norm": 0.5119382664075481, "learning_rate": 3.801231609270212e-05, "loss": 0.1534, "step": 1538 }, { "epoch": 0.17, "grad_norm": 0.5164101720248367, "learning_rate": 3.800922304470728e-05, "loss": 0.1495, "step": 1539 }, { "epoch": 0.17, "grad_norm": 0.44401728344153946, "learning_rate": 3.800612771806781e-05, "loss": 0.1461, "step": 1540 }, { "epoch": 0.17, "grad_norm": 0.5497660377699297, "learning_rate": 3.8003030113175376e-05, "loss": 0.1828, "step": 1541 }, { "epoch": 0.17, "grad_norm": 0.5944255699843255, "learning_rate": 3.79999302304219e-05, "loss": 0.1602, "step": 1542 }, { "epoch": 0.17, "grad_norm": 0.6211709988136527, "learning_rate": 3.799682807019958e-05, "loss": 0.1605, "step": 1543 }, { "epoch": 0.17, "grad_norm": 0.6444652639192185, "learning_rate": 3.799372363290095e-05, "loss": 0.1967, "step": 1544 }, { "epoch": 0.17, "grad_norm": 0.5991779726320862, "learning_rate": 3.799061691891878e-05, "loss": 0.2011, "step": 1545 }, { "epoch": 0.17, "grad_norm": 0.50835618478452, "learning_rate": 3.798750792864617e-05, "loss": 0.1349, "step": 1546 }, { "epoch": 0.17, "grad_norm": 0.588203777668044, "learning_rate": 3.798439666247648e-05, "loss": 0.2122, "step": 1547 }, { "epoch": 0.17, "grad_norm": 0.6501399980680594, "learning_rate": 3.798128312080336e-05, "loss": 0.3295, "step": 1548 }, { "epoch": 0.17, "grad_norm": 0.5657072292010945, "learning_rate": 3.797816730402077e-05, "loss": 0.1815, "step": 1549 }, { "epoch": 0.17, "grad_norm": 0.5442527025722538, "learning_rate": 3.7975049212522934e-05, "loss": 0.2255, "step": 1550 }, { "epoch": 0.17, "grad_norm": 0.5618929766536301, "learning_rate": 3.7971928846704385e-05, "loss": 0.1894, "step": 1551 }, { "epoch": 0.17, "grad_norm": 0.6628438426174117, "learning_rate": 3.7968806206959915e-05, "loss": 0.2312, "step": 1552 }, { "epoch": 0.17, "grad_norm": 0.582174910666719, "learning_rate": 3.7965681293684635e-05, "loss": 0.1811, "step": 1553 }, { "epoch": 0.17, "grad_norm": 0.7423726704142294, "learning_rate": 3.7962554107273926e-05, "loss": 0.2482, "step": 1554 }, { "epoch": 0.17, "grad_norm": 0.6158164767391765, "learning_rate": 3.7959424648123455e-05, "loss": 0.2327, "step": 1555 }, { "epoch": 0.17, "grad_norm": 0.6030979130646366, "learning_rate": 3.795629291662919e-05, "loss": 0.2259, "step": 1556 }, { "epoch": 0.17, "grad_norm": 0.6052458075796123, "learning_rate": 3.795315891318737e-05, "loss": 0.2314, "step": 1557 }, { "epoch": 0.17, "grad_norm": 0.5678576060273508, "learning_rate": 3.795002263819453e-05, "loss": 0.1581, "step": 1558 }, { "epoch": 0.17, "grad_norm": 0.5745456310108428, "learning_rate": 3.79468840920475e-05, "loss": 0.2081, "step": 1559 }, { "epoch": 0.17, "grad_norm": 0.589193807077525, "learning_rate": 3.7943743275143384e-05, "loss": 0.2069, "step": 1560 }, { "epoch": 0.17, "grad_norm": 0.6307064851378155, "learning_rate": 3.794060018787958e-05, "loss": 0.2159, "step": 1561 }, { "epoch": 0.17, "grad_norm": 0.6045940356508995, "learning_rate": 3.793745483065377e-05, "loss": 0.1928, "step": 1562 }, { "epoch": 0.17, "grad_norm": 0.6465696526376529, "learning_rate": 3.793430720386392e-05, "loss": 0.2001, "step": 1563 }, { "epoch": 0.17, "grad_norm": 0.5094112172737284, "learning_rate": 3.7931157307908304e-05, "loss": 0.1706, "step": 1564 }, { "epoch": 0.17, "grad_norm": 0.739874196055498, "learning_rate": 3.792800514318546e-05, "loss": 0.2881, "step": 1565 }, { "epoch": 0.17, "grad_norm": 0.5597013150399909, "learning_rate": 3.792485071009421e-05, "loss": 0.1647, "step": 1566 }, { "epoch": 0.17, "grad_norm": 0.5684475125798846, "learning_rate": 3.792169400903368e-05, "loss": 0.1863, "step": 1567 }, { "epoch": 0.17, "grad_norm": 0.611883234391413, "learning_rate": 3.7918535040403284e-05, "loss": 0.2414, "step": 1568 }, { "epoch": 0.17, "grad_norm": 0.6093756846888682, "learning_rate": 3.791537380460271e-05, "loss": 0.2236, "step": 1569 }, { "epoch": 0.17, "grad_norm": 0.5269051599977148, "learning_rate": 3.791221030203193e-05, "loss": 0.1379, "step": 1570 }, { "epoch": 0.17, "grad_norm": 0.5963777817849354, "learning_rate": 3.790904453309123e-05, "loss": 0.2163, "step": 1571 }, { "epoch": 0.17, "grad_norm": 0.6536606613442913, "learning_rate": 3.790587649818115e-05, "loss": 0.2195, "step": 1572 }, { "epoch": 0.17, "grad_norm": 0.601181033178103, "learning_rate": 3.790270619770253e-05, "loss": 0.184, "step": 1573 }, { "epoch": 0.17, "grad_norm": 0.6645972398919309, "learning_rate": 3.78995336320565e-05, "loss": 0.253, "step": 1574 }, { "epoch": 0.17, "grad_norm": 0.7148911507321741, "learning_rate": 3.789635880164448e-05, "loss": 0.224, "step": 1575 }, { "epoch": 0.17, "grad_norm": 0.5395672687755909, "learning_rate": 3.789318170686816e-05, "loss": 0.1898, "step": 1576 }, { "epoch": 0.17, "grad_norm": 0.6881055765762845, "learning_rate": 3.789000234812953e-05, "loss": 0.1896, "step": 1577 }, { "epoch": 0.17, "grad_norm": 0.7216790267672889, "learning_rate": 3.788682072583087e-05, "loss": 0.2412, "step": 1578 }, { "epoch": 0.17, "grad_norm": 0.6937682974180902, "learning_rate": 3.788363684037473e-05, "loss": 0.2679, "step": 1579 }, { "epoch": 0.17, "grad_norm": 0.6132995942889292, "learning_rate": 3.788045069216396e-05, "loss": 0.2104, "step": 1580 }, { "epoch": 0.17, "grad_norm": 0.5429407290905746, "learning_rate": 3.787726228160169e-05, "loss": 0.1936, "step": 1581 }, { "epoch": 0.17, "grad_norm": 0.5499775751917583, "learning_rate": 3.787407160909134e-05, "loss": 0.2203, "step": 1582 }, { "epoch": 0.17, "grad_norm": 0.7308398441414706, "learning_rate": 3.787087867503663e-05, "loss": 0.2363, "step": 1583 }, { "epoch": 0.17, "grad_norm": 0.5083751788921056, "learning_rate": 3.786768347984152e-05, "loss": 0.1754, "step": 1584 }, { "epoch": 0.17, "grad_norm": 0.5710690327316926, "learning_rate": 3.786448602391031e-05, "loss": 0.1857, "step": 1585 }, { "epoch": 0.17, "grad_norm": 0.5625356027144318, "learning_rate": 3.7861286307647555e-05, "loss": 0.2163, "step": 1586 }, { "epoch": 0.17, "grad_norm": 0.5384405667156221, "learning_rate": 3.7858084331458106e-05, "loss": 0.1521, "step": 1587 }, { "epoch": 0.17, "grad_norm": 0.5099848425707089, "learning_rate": 3.78548800957471e-05, "loss": 0.1541, "step": 1588 }, { "epoch": 0.17, "grad_norm": 0.48413366027421945, "learning_rate": 3.785167360091995e-05, "loss": 0.1805, "step": 1589 }, { "epoch": 0.17, "grad_norm": 0.6015975248370843, "learning_rate": 3.7848464847382376e-05, "loss": 0.1688, "step": 1590 }, { "epoch": 0.17, "grad_norm": 0.6369656511205274, "learning_rate": 3.7845253835540357e-05, "loss": 0.1963, "step": 1591 }, { "epoch": 0.17, "grad_norm": 0.6443084794201295, "learning_rate": 3.7842040565800184e-05, "loss": 0.1942, "step": 1592 }, { "epoch": 0.17, "grad_norm": 0.5279845970916258, "learning_rate": 3.78388250385684e-05, "loss": 0.2134, "step": 1593 }, { "epoch": 0.18, "grad_norm": 0.6921526441559828, "learning_rate": 3.783560725425188e-05, "loss": 0.2127, "step": 1594 }, { "epoch": 0.18, "grad_norm": 0.510994930714361, "learning_rate": 3.783238721325774e-05, "loss": 0.1868, "step": 1595 }, { "epoch": 0.18, "grad_norm": 0.7549485823194225, "learning_rate": 3.782916491599341e-05, "loss": 0.277, "step": 1596 }, { "epoch": 0.18, "grad_norm": 0.45856505374059575, "learning_rate": 3.78259403628666e-05, "loss": 0.1668, "step": 1597 }, { "epoch": 0.18, "grad_norm": 0.4931505711286947, "learning_rate": 3.782271355428529e-05, "loss": 0.1889, "step": 1598 }, { "epoch": 0.18, "grad_norm": 0.6379140621815674, "learning_rate": 3.781948449065777e-05, "loss": 0.2453, "step": 1599 }, { "epoch": 0.18, "grad_norm": 0.6456815684851703, "learning_rate": 3.781625317239258e-05, "loss": 0.2175, "step": 1600 }, { "epoch": 0.18, "grad_norm": 0.5364593734237316, "learning_rate": 3.7813019599898593e-05, "loss": 0.1845, "step": 1601 }, { "epoch": 0.18, "grad_norm": 0.5400679237420941, "learning_rate": 3.780978377358493e-05, "loss": 0.1469, "step": 1602 }, { "epoch": 0.18, "grad_norm": 0.5315345955532083, "learning_rate": 3.7806545693861006e-05, "loss": 0.1706, "step": 1603 }, { "epoch": 0.18, "grad_norm": 0.5308179781662627, "learning_rate": 3.7803305361136534e-05, "loss": 0.197, "step": 1604 }, { "epoch": 0.18, "grad_norm": 0.5067890123463344, "learning_rate": 3.780006277582149e-05, "loss": 0.1841, "step": 1605 }, { "epoch": 0.18, "grad_norm": 0.5677038915851197, "learning_rate": 3.7796817938326155e-05, "loss": 0.1967, "step": 1606 }, { "epoch": 0.18, "grad_norm": 0.6025447999752158, "learning_rate": 3.7793570849061084e-05, "loss": 0.1941, "step": 1607 }, { "epoch": 0.18, "grad_norm": 0.6173391276625572, "learning_rate": 3.7790321508437124e-05, "loss": 0.2143, "step": 1608 }, { "epoch": 0.18, "grad_norm": 0.41026736979468215, "learning_rate": 3.77870699168654e-05, "loss": 0.148, "step": 1609 }, { "epoch": 0.18, "grad_norm": 0.5183573489232199, "learning_rate": 3.778381607475732e-05, "loss": 0.1707, "step": 1610 }, { "epoch": 0.18, "grad_norm": 0.7336652044529826, "learning_rate": 3.778055998252459e-05, "loss": 0.2424, "step": 1611 }, { "epoch": 0.18, "grad_norm": 0.7065554540621118, "learning_rate": 3.777730164057919e-05, "loss": 0.2664, "step": 1612 }, { "epoch": 0.18, "grad_norm": 0.6193998979246913, "learning_rate": 3.7774041049333374e-05, "loss": 0.169, "step": 1613 }, { "epoch": 0.18, "grad_norm": 0.5427465738990496, "learning_rate": 3.777077820919972e-05, "loss": 0.1421, "step": 1614 }, { "epoch": 0.18, "grad_norm": 0.5094403511672391, "learning_rate": 3.776751312059103e-05, "loss": 0.1523, "step": 1615 }, { "epoch": 0.18, "grad_norm": 0.7173040191476101, "learning_rate": 3.776424578392045e-05, "loss": 0.2806, "step": 1616 }, { "epoch": 0.18, "grad_norm": 0.5899660350026443, "learning_rate": 3.7760976199601384e-05, "loss": 0.202, "step": 1617 }, { "epoch": 0.18, "grad_norm": 0.5727841108314073, "learning_rate": 3.775770436804751e-05, "loss": 0.1982, "step": 1618 }, { "epoch": 0.18, "grad_norm": 0.5425038714644294, "learning_rate": 3.77544302896728e-05, "loss": 0.1573, "step": 1619 }, { "epoch": 0.18, "grad_norm": 0.441808432943059, "learning_rate": 3.775115396489153e-05, "loss": 0.1605, "step": 1620 }, { "epoch": 0.18, "grad_norm": 0.5942919416186491, "learning_rate": 3.774787539411821e-05, "loss": 0.1775, "step": 1621 }, { "epoch": 0.18, "grad_norm": 0.7083838996415665, "learning_rate": 3.77445945777677e-05, "loss": 0.2439, "step": 1622 }, { "epoch": 0.18, "grad_norm": 0.503312167307887, "learning_rate": 3.7741311516255096e-05, "loss": 0.1497, "step": 1623 }, { "epoch": 0.18, "grad_norm": 0.6952062589938142, "learning_rate": 3.773802620999579e-05, "loss": 0.2034, "step": 1624 }, { "epoch": 0.18, "grad_norm": 0.45336381604052195, "learning_rate": 3.7734738659405467e-05, "loss": 0.1233, "step": 1625 }, { "epoch": 0.18, "grad_norm": 0.7498056239335655, "learning_rate": 3.773144886490007e-05, "loss": 0.2313, "step": 1626 }, { "epoch": 0.18, "grad_norm": 0.6833520080371666, "learning_rate": 3.772815682689587e-05, "loss": 0.2595, "step": 1627 }, { "epoch": 0.18, "grad_norm": 0.5355445399883849, "learning_rate": 3.7724862545809394e-05, "loss": 0.1624, "step": 1628 }, { "epoch": 0.18, "grad_norm": 0.6489583876249027, "learning_rate": 3.772156602205744e-05, "loss": 0.2153, "step": 1629 }, { "epoch": 0.18, "grad_norm": 0.5397262357009679, "learning_rate": 3.771826725605713e-05, "loss": 0.1741, "step": 1630 }, { "epoch": 0.18, "grad_norm": 0.6080042509617394, "learning_rate": 3.771496624822582e-05, "loss": 0.2596, "step": 1631 }, { "epoch": 0.18, "grad_norm": 0.40624558006229716, "learning_rate": 3.771166299898118e-05, "loss": 0.1246, "step": 1632 }, { "epoch": 0.18, "grad_norm": 0.49826294942834465, "learning_rate": 3.770835750874117e-05, "loss": 0.1721, "step": 1633 }, { "epoch": 0.18, "grad_norm": 0.5338173147988655, "learning_rate": 3.770504977792402e-05, "loss": 0.1818, "step": 1634 }, { "epoch": 0.18, "grad_norm": 0.6435841392990507, "learning_rate": 3.770173980694824e-05, "loss": 0.2426, "step": 1635 }, { "epoch": 0.18, "grad_norm": 0.4587496542864336, "learning_rate": 3.7698427596232636e-05, "loss": 0.1546, "step": 1636 }, { "epoch": 0.18, "grad_norm": 0.536350671526269, "learning_rate": 3.769511314619628e-05, "loss": 0.1534, "step": 1637 }, { "epoch": 0.18, "grad_norm": 0.5897124693839668, "learning_rate": 3.7691796457258546e-05, "loss": 0.2191, "step": 1638 }, { "epoch": 0.18, "grad_norm": 0.547712502195856, "learning_rate": 3.7688477529839085e-05, "loss": 0.1918, "step": 1639 }, { "epoch": 0.18, "grad_norm": 0.5148067339497486, "learning_rate": 3.7685156364357825e-05, "loss": 0.1872, "step": 1640 }, { "epoch": 0.18, "grad_norm": 0.8071489187391565, "learning_rate": 3.7681832961234974e-05, "loss": 0.2559, "step": 1641 }, { "epoch": 0.18, "grad_norm": 0.515770689293681, "learning_rate": 3.767850732089105e-05, "loss": 0.2022, "step": 1642 }, { "epoch": 0.18, "grad_norm": 0.646041626309522, "learning_rate": 3.7675179443746816e-05, "loss": 0.1904, "step": 1643 }, { "epoch": 0.18, "grad_norm": 0.8126071345781976, "learning_rate": 3.7671849330223345e-05, "loss": 0.2483, "step": 1644 }, { "epoch": 0.18, "grad_norm": 0.6605583185414945, "learning_rate": 3.7668516980741995e-05, "loss": 0.2469, "step": 1645 }, { "epoch": 0.18, "grad_norm": 0.5948719667476394, "learning_rate": 3.766518239572437e-05, "loss": 0.1931, "step": 1646 }, { "epoch": 0.18, "grad_norm": 0.5043237124983753, "learning_rate": 3.7661845575592405e-05, "loss": 0.1832, "step": 1647 }, { "epoch": 0.18, "grad_norm": 0.6381973930565653, "learning_rate": 3.76585065207683e-05, "loss": 0.2653, "step": 1648 }, { "epoch": 0.18, "grad_norm": 0.5121307170735959, "learning_rate": 3.765516523167452e-05, "loss": 0.1394, "step": 1649 }, { "epoch": 0.18, "grad_norm": 0.4798069093921884, "learning_rate": 3.765182170873383e-05, "loss": 0.1774, "step": 1650 }, { "epoch": 0.18, "grad_norm": 0.46785134958780095, "learning_rate": 3.7648475952369275e-05, "loss": 0.1238, "step": 1651 }, { "epoch": 0.18, "grad_norm": 0.5255992149170073, "learning_rate": 3.7645127963004176e-05, "loss": 0.1604, "step": 1652 }, { "epoch": 0.18, "grad_norm": 0.570791513646763, "learning_rate": 3.764177774106216e-05, "loss": 0.1968, "step": 1653 }, { "epoch": 0.18, "grad_norm": 0.566266930644921, "learning_rate": 3.76384252869671e-05, "loss": 0.2006, "step": 1654 }, { "epoch": 0.18, "grad_norm": 0.47890697746781674, "learning_rate": 3.763507060114319e-05, "loss": 0.1639, "step": 1655 }, { "epoch": 0.18, "grad_norm": 0.6090882188936215, "learning_rate": 3.7631713684014866e-05, "loss": 0.2415, "step": 1656 }, { "epoch": 0.18, "grad_norm": 0.5919254780143545, "learning_rate": 3.762835453600688e-05, "loss": 0.2112, "step": 1657 }, { "epoch": 0.18, "grad_norm": 0.5127920876856659, "learning_rate": 3.7624993157544246e-05, "loss": 0.1378, "step": 1658 }, { "epoch": 0.18, "grad_norm": 0.7165278207844192, "learning_rate": 3.762162954905228e-05, "loss": 0.2499, "step": 1659 }, { "epoch": 0.18, "grad_norm": 0.5861346104005456, "learning_rate": 3.761826371095655e-05, "loss": 0.2304, "step": 1660 }, { "epoch": 0.18, "grad_norm": 0.36341851215175336, "learning_rate": 3.761489564368294e-05, "loss": 0.1097, "step": 1661 }, { "epoch": 0.18, "grad_norm": 0.4778832129676479, "learning_rate": 3.7611525347657584e-05, "loss": 0.1464, "step": 1662 }, { "epoch": 0.18, "grad_norm": 0.4530170246255666, "learning_rate": 3.760815282330694e-05, "loss": 0.1391, "step": 1663 }, { "epoch": 0.18, "grad_norm": 0.5287639567379668, "learning_rate": 3.7604778071057685e-05, "loss": 0.1809, "step": 1664 }, { "epoch": 0.18, "grad_norm": 0.5484551625246934, "learning_rate": 3.760140109133684e-05, "loss": 0.1664, "step": 1665 }, { "epoch": 0.18, "grad_norm": 0.5592885797756405, "learning_rate": 3.759802188457168e-05, "loss": 0.198, "step": 1666 }, { "epoch": 0.18, "grad_norm": 0.4924674373357225, "learning_rate": 3.759464045118976e-05, "loss": 0.1862, "step": 1667 }, { "epoch": 0.18, "grad_norm": 0.8881317844977353, "learning_rate": 3.759125679161893e-05, "loss": 0.3135, "step": 1668 }, { "epoch": 0.18, "grad_norm": 0.5645354055868844, "learning_rate": 3.75878709062873e-05, "loss": 0.1924, "step": 1669 }, { "epoch": 0.18, "grad_norm": 0.41503335387847956, "learning_rate": 3.758448279562327e-05, "loss": 0.1237, "step": 1670 }, { "epoch": 0.18, "grad_norm": 0.5282994405462973, "learning_rate": 3.758109246005554e-05, "loss": 0.1439, "step": 1671 }, { "epoch": 0.18, "grad_norm": 0.4725423313030016, "learning_rate": 3.757769990001308e-05, "loss": 0.1197, "step": 1672 }, { "epoch": 0.18, "grad_norm": 0.6196819069321887, "learning_rate": 3.757430511592513e-05, "loss": 0.2403, "step": 1673 }, { "epoch": 0.18, "grad_norm": 0.486460533654453, "learning_rate": 3.757090810822122e-05, "loss": 0.1762, "step": 1674 }, { "epoch": 0.18, "grad_norm": 0.6898722900846119, "learning_rate": 3.756750887733116e-05, "loss": 0.28, "step": 1675 }, { "epoch": 0.18, "grad_norm": 0.4578229942030372, "learning_rate": 3.756410742368505e-05, "loss": 0.1451, "step": 1676 }, { "epoch": 0.18, "grad_norm": 0.5826117162035149, "learning_rate": 3.7560703747713255e-05, "loss": 0.1839, "step": 1677 }, { "epoch": 0.18, "grad_norm": 0.6236636896371596, "learning_rate": 3.7557297849846444e-05, "loss": 0.1963, "step": 1678 }, { "epoch": 0.18, "grad_norm": 0.49030850522496977, "learning_rate": 3.7553889730515545e-05, "loss": 0.1598, "step": 1679 }, { "epoch": 0.18, "grad_norm": 0.6157030046489759, "learning_rate": 3.7550479390151766e-05, "loss": 0.2048, "step": 1680 }, { "epoch": 0.18, "grad_norm": 0.6096990164055758, "learning_rate": 3.7547066829186626e-05, "loss": 0.14, "step": 1681 }, { "epoch": 0.18, "grad_norm": 0.565142438327975, "learning_rate": 3.754365204805189e-05, "loss": 0.1619, "step": 1682 }, { "epoch": 0.18, "grad_norm": 0.7016368163918355, "learning_rate": 3.754023504717962e-05, "loss": 0.2143, "step": 1683 }, { "epoch": 0.18, "grad_norm": 0.625176309513068, "learning_rate": 3.753681582700216e-05, "loss": 0.1814, "step": 1684 }, { "epoch": 0.19, "grad_norm": 0.590324307147436, "learning_rate": 3.7533394387952134e-05, "loss": 0.1537, "step": 1685 }, { "epoch": 0.19, "grad_norm": 0.6442467263804328, "learning_rate": 3.752997073046244e-05, "loss": 0.1872, "step": 1686 }, { "epoch": 0.19, "grad_norm": 0.59178876713905, "learning_rate": 3.752654485496627e-05, "loss": 0.2091, "step": 1687 }, { "epoch": 0.19, "grad_norm": 0.5640764926854976, "learning_rate": 3.752311676189708e-05, "loss": 0.157, "step": 1688 }, { "epoch": 0.19, "grad_norm": 0.6923073968316125, "learning_rate": 3.7519686451688614e-05, "loss": 0.2069, "step": 1689 }, { "epoch": 0.19, "grad_norm": 0.7447985846636116, "learning_rate": 3.75162539247749e-05, "loss": 0.207, "step": 1690 }, { "epoch": 0.19, "grad_norm": 0.5239470549545716, "learning_rate": 3.751281918159025e-05, "loss": 0.1244, "step": 1691 }, { "epoch": 0.19, "grad_norm": 0.5805904040177313, "learning_rate": 3.750938222256924e-05, "loss": 0.2058, "step": 1692 }, { "epoch": 0.19, "grad_norm": 0.6153837476779249, "learning_rate": 3.7505943048146736e-05, "loss": 0.2012, "step": 1693 }, { "epoch": 0.19, "grad_norm": 0.5391845288577851, "learning_rate": 3.75025016587579e-05, "loss": 0.2016, "step": 1694 }, { "epoch": 0.19, "grad_norm": 0.40337075940915457, "learning_rate": 3.749905805483814e-05, "loss": 0.1364, "step": 1695 }, { "epoch": 0.19, "grad_norm": 0.5676900061585956, "learning_rate": 3.7495612236823175e-05, "loss": 0.1815, "step": 1696 }, { "epoch": 0.19, "grad_norm": 0.6269227968290026, "learning_rate": 3.749216420514898e-05, "loss": 0.2448, "step": 1697 }, { "epoch": 0.19, "grad_norm": 0.39955308355498004, "learning_rate": 3.7488713960251845e-05, "loss": 0.1234, "step": 1698 }, { "epoch": 0.19, "grad_norm": 0.5478316794746595, "learning_rate": 3.74852615025683e-05, "loss": 0.1723, "step": 1699 }, { "epoch": 0.19, "grad_norm": 0.6297119855261379, "learning_rate": 3.748180683253518e-05, "loss": 0.1661, "step": 1700 }, { "epoch": 0.19, "grad_norm": 0.6537571061296992, "learning_rate": 3.747834995058959e-05, "loss": 0.1773, "step": 1701 }, { "epoch": 0.19, "grad_norm": 0.6101084232512916, "learning_rate": 3.747489085716891e-05, "loss": 0.179, "step": 1702 }, { "epoch": 0.19, "grad_norm": 0.5618122187537455, "learning_rate": 3.747142955271081e-05, "loss": 0.159, "step": 1703 }, { "epoch": 0.19, "grad_norm": 0.6883530742740928, "learning_rate": 3.746796603765325e-05, "loss": 0.2091, "step": 1704 }, { "epoch": 0.19, "grad_norm": 0.6575283134369131, "learning_rate": 3.7464500312434445e-05, "loss": 0.22, "step": 1705 }, { "epoch": 0.19, "grad_norm": 0.4634539560494647, "learning_rate": 3.7461032377492905e-05, "loss": 0.1117, "step": 1706 }, { "epoch": 0.19, "grad_norm": 0.5572836391072831, "learning_rate": 3.745756223326741e-05, "loss": 0.2117, "step": 1707 }, { "epoch": 0.19, "grad_norm": 0.5723314995981625, "learning_rate": 3.745408988019703e-05, "loss": 0.1732, "step": 1708 }, { "epoch": 0.19, "grad_norm": 0.599343679005534, "learning_rate": 3.7450615318721115e-05, "loss": 0.1726, "step": 1709 }, { "epoch": 0.19, "grad_norm": 0.6137821557348229, "learning_rate": 3.744713854927928e-05, "loss": 0.2148, "step": 1710 }, { "epoch": 0.19, "grad_norm": 0.6366886061571443, "learning_rate": 3.744365957231143e-05, "loss": 0.2418, "step": 1711 }, { "epoch": 0.19, "grad_norm": 0.6688885964066988, "learning_rate": 3.7440178388257746e-05, "loss": 0.1802, "step": 1712 }, { "epoch": 0.19, "grad_norm": 0.6196204651602569, "learning_rate": 3.74366949975587e-05, "loss": 0.2114, "step": 1713 }, { "epoch": 0.19, "grad_norm": 0.5522774199244669, "learning_rate": 3.743320940065503e-05, "loss": 0.1842, "step": 1714 }, { "epoch": 0.19, "grad_norm": 0.4462534683691138, "learning_rate": 3.742972159798775e-05, "loss": 0.1449, "step": 1715 }, { "epoch": 0.19, "grad_norm": 0.6269447825224383, "learning_rate": 3.7426231589998166e-05, "loss": 0.2665, "step": 1716 }, { "epoch": 0.19, "grad_norm": 0.6496119835778836, "learning_rate": 3.742273937712785e-05, "loss": 0.2205, "step": 1717 }, { "epoch": 0.19, "grad_norm": 0.7138850970441734, "learning_rate": 3.741924495981867e-05, "loss": 0.2088, "step": 1718 }, { "epoch": 0.19, "grad_norm": 0.44922088954250167, "learning_rate": 3.7415748338512746e-05, "loss": 0.1352, "step": 1719 }, { "epoch": 0.19, "grad_norm": 0.6247904664711037, "learning_rate": 3.741224951365251e-05, "loss": 0.2075, "step": 1720 }, { "epoch": 0.19, "grad_norm": 0.5113550014441116, "learning_rate": 3.740874848568065e-05, "loss": 0.1684, "step": 1721 }, { "epoch": 0.19, "grad_norm": 0.6298551804743383, "learning_rate": 3.740524525504014e-05, "loss": 0.18, "step": 1722 }, { "epoch": 0.19, "grad_norm": 0.48636890531479665, "learning_rate": 3.740173982217423e-05, "loss": 0.1576, "step": 1723 }, { "epoch": 0.19, "grad_norm": 0.6872908534191354, "learning_rate": 3.739823218752645e-05, "loss": 0.2019, "step": 1724 }, { "epoch": 0.19, "grad_norm": 0.5770330168435931, "learning_rate": 3.739472235154061e-05, "loss": 0.1737, "step": 1725 }, { "epoch": 0.19, "grad_norm": 0.5272731592474038, "learning_rate": 3.7391210314660796e-05, "loss": 0.1494, "step": 1726 }, { "epoch": 0.19, "grad_norm": 0.5441748033950204, "learning_rate": 3.738769607733138e-05, "loss": 0.155, "step": 1727 }, { "epoch": 0.19, "grad_norm": 0.565452403928258, "learning_rate": 3.7384179639996997e-05, "loss": 0.1688, "step": 1728 }, { "epoch": 0.19, "grad_norm": 0.5876894128109672, "learning_rate": 3.738066100310257e-05, "loss": 0.1604, "step": 1729 }, { "epoch": 0.19, "grad_norm": 0.5313991169056789, "learning_rate": 3.7377140167093316e-05, "loss": 0.1362, "step": 1730 }, { "epoch": 0.19, "grad_norm": 0.5479566503907015, "learning_rate": 3.7373617132414686e-05, "loss": 0.1948, "step": 1731 }, { "epoch": 0.19, "grad_norm": 0.6559345531715473, "learning_rate": 3.7370091899512464e-05, "loss": 0.2034, "step": 1732 }, { "epoch": 0.19, "grad_norm": 0.4808759746893655, "learning_rate": 3.736656446883267e-05, "loss": 0.1685, "step": 1733 }, { "epoch": 0.19, "grad_norm": 0.55910713772435, "learning_rate": 3.736303484082163e-05, "loss": 0.1424, "step": 1734 }, { "epoch": 0.19, "grad_norm": 0.4510887531813745, "learning_rate": 3.735950301592592e-05, "loss": 0.1314, "step": 1735 }, { "epoch": 0.19, "grad_norm": 0.6666557837631132, "learning_rate": 3.7355968994592414e-05, "loss": 0.228, "step": 1736 }, { "epoch": 0.19, "grad_norm": 0.7322084650024974, "learning_rate": 3.735243277726828e-05, "loss": 0.2417, "step": 1737 }, { "epoch": 0.19, "grad_norm": 0.5170557228768408, "learning_rate": 3.7348894364400914e-05, "loss": 0.1377, "step": 1738 }, { "epoch": 0.19, "grad_norm": 0.5642680356376106, "learning_rate": 3.7345353756438025e-05, "loss": 0.1888, "step": 1739 }, { "epoch": 0.19, "grad_norm": 0.621761179336891, "learning_rate": 3.734181095382761e-05, "loss": 0.2171, "step": 1740 }, { "epoch": 0.19, "grad_norm": 0.5121415699165283, "learning_rate": 3.733826595701792e-05, "loss": 0.1633, "step": 1741 }, { "epoch": 0.19, "grad_norm": 0.6046477134685961, "learning_rate": 3.733471876645749e-05, "loss": 0.1302, "step": 1742 }, { "epoch": 0.19, "grad_norm": 0.5619993631047071, "learning_rate": 3.733116938259513e-05, "loss": 0.1937, "step": 1743 }, { "epoch": 0.19, "grad_norm": 0.5190376288640828, "learning_rate": 3.732761780587993e-05, "loss": 0.1653, "step": 1744 }, { "epoch": 0.19, "grad_norm": 0.6022405394954778, "learning_rate": 3.732406403676126e-05, "loss": 0.1967, "step": 1745 }, { "epoch": 0.19, "grad_norm": 0.5974996274284574, "learning_rate": 3.732050807568878e-05, "loss": 0.1833, "step": 1746 }, { "epoch": 0.19, "grad_norm": 0.7429531765658418, "learning_rate": 3.731694992311239e-05, "loss": 0.2606, "step": 1747 }, { "epoch": 0.19, "grad_norm": 0.43786074887801946, "learning_rate": 3.7313389579482315e-05, "loss": 0.1441, "step": 1748 }, { "epoch": 0.19, "grad_norm": 0.6534081643742562, "learning_rate": 3.730982704524901e-05, "loss": 0.1926, "step": 1749 }, { "epoch": 0.19, "grad_norm": 0.4990355309142661, "learning_rate": 3.7306262320863245e-05, "loss": 0.1571, "step": 1750 }, { "epoch": 0.19, "grad_norm": 0.4616424536237072, "learning_rate": 3.730269540677606e-05, "loss": 0.16, "step": 1751 }, { "epoch": 0.19, "grad_norm": 0.45670773394020703, "learning_rate": 3.729912630343874e-05, "loss": 0.121, "step": 1752 }, { "epoch": 0.19, "grad_norm": 0.4940831750335586, "learning_rate": 3.729555501130288e-05, "loss": 0.1705, "step": 1753 }, { "epoch": 0.19, "grad_norm": 0.5267602871676524, "learning_rate": 3.729198153082036e-05, "loss": 0.1549, "step": 1754 }, { "epoch": 0.19, "grad_norm": 0.4261168111303581, "learning_rate": 3.7288405862443296e-05, "loss": 0.1308, "step": 1755 }, { "epoch": 0.19, "grad_norm": 0.4561122229018286, "learning_rate": 3.7284828006624125e-05, "loss": 0.1284, "step": 1756 }, { "epoch": 0.19, "grad_norm": 0.840420050613432, "learning_rate": 3.728124796381553e-05, "loss": 0.2685, "step": 1757 }, { "epoch": 0.19, "grad_norm": 0.4494623767491376, "learning_rate": 3.7277665734470476e-05, "loss": 0.1298, "step": 1758 }, { "epoch": 0.19, "grad_norm": 0.5002308848883505, "learning_rate": 3.727408131904223e-05, "loss": 0.1509, "step": 1759 }, { "epoch": 0.19, "grad_norm": 0.5140394628864617, "learning_rate": 3.727049471798429e-05, "loss": 0.1425, "step": 1760 }, { "epoch": 0.19, "grad_norm": 0.4895479480319752, "learning_rate": 3.726690593175048e-05, "loss": 0.1351, "step": 1761 }, { "epoch": 0.19, "grad_norm": 0.5520572896128711, "learning_rate": 3.726331496079486e-05, "loss": 0.1739, "step": 1762 }, { "epoch": 0.19, "grad_norm": 0.7585251200595686, "learning_rate": 3.725972180557179e-05, "loss": 0.2004, "step": 1763 }, { "epoch": 0.19, "grad_norm": 0.4590632918671075, "learning_rate": 3.7256126466535896e-05, "loss": 0.1442, "step": 1764 }, { "epoch": 0.19, "grad_norm": 0.6287297066206385, "learning_rate": 3.725252894414209e-05, "loss": 0.2479, "step": 1765 }, { "epoch": 0.19, "grad_norm": 0.5816323366208689, "learning_rate": 3.724892923884555e-05, "loss": 0.2193, "step": 1766 }, { "epoch": 0.19, "grad_norm": 0.5379167072563048, "learning_rate": 3.724532735110174e-05, "loss": 0.1883, "step": 1767 }, { "epoch": 0.19, "grad_norm": 0.6668542856218728, "learning_rate": 3.724172328136638e-05, "loss": 0.2434, "step": 1768 }, { "epoch": 0.19, "grad_norm": 0.45049135482867525, "learning_rate": 3.723811703009549e-05, "loss": 0.1624, "step": 1769 }, { "epoch": 0.19, "grad_norm": 0.45224688387187884, "learning_rate": 3.723450859774536e-05, "loss": 0.1191, "step": 1770 }, { "epoch": 0.19, "grad_norm": 0.5132096970849164, "learning_rate": 3.723089798477254e-05, "loss": 0.1608, "step": 1771 }, { "epoch": 0.19, "grad_norm": 0.5183383756292147, "learning_rate": 3.7227285191633894e-05, "loss": 0.1645, "step": 1772 }, { "epoch": 0.19, "grad_norm": 0.5725595806385939, "learning_rate": 3.722367021878651e-05, "loss": 0.1745, "step": 1773 }, { "epoch": 0.19, "grad_norm": 0.6510874363387364, "learning_rate": 3.722005306668778e-05, "loss": 0.1843, "step": 1774 }, { "epoch": 0.19, "grad_norm": 0.548747996932699, "learning_rate": 3.721643373579538e-05, "loss": 0.158, "step": 1775 }, { "epoch": 0.2, "grad_norm": 0.5105141592014346, "learning_rate": 3.721281222656725e-05, "loss": 0.1735, "step": 1776 }, { "epoch": 0.2, "grad_norm": 0.6106253048243001, "learning_rate": 3.7209188539461606e-05, "loss": 0.2014, "step": 1777 }, { "epoch": 0.2, "grad_norm": 0.49390342356794187, "learning_rate": 3.7205562674936945e-05, "loss": 0.1507, "step": 1778 }, { "epoch": 0.2, "grad_norm": 0.7171510242502983, "learning_rate": 3.720193463345202e-05, "loss": 0.2032, "step": 1779 }, { "epoch": 0.2, "grad_norm": 0.5795244599430791, "learning_rate": 3.719830441546589e-05, "loss": 0.1691, "step": 1780 }, { "epoch": 0.2, "grad_norm": 0.4928726090448924, "learning_rate": 3.719467202143786e-05, "loss": 0.1702, "step": 1781 }, { "epoch": 0.2, "grad_norm": 0.5353202011792603, "learning_rate": 3.7191037451827545e-05, "loss": 0.1768, "step": 1782 }, { "epoch": 0.2, "grad_norm": 0.5293257589306753, "learning_rate": 3.71874007070948e-05, "loss": 0.2061, "step": 1783 }, { "epoch": 0.2, "grad_norm": 0.6033963576307103, "learning_rate": 3.718376178769976e-05, "loss": 0.2347, "step": 1784 }, { "epoch": 0.2, "grad_norm": 0.563174823119198, "learning_rate": 3.7180120694102864e-05, "loss": 0.1657, "step": 1785 }, { "epoch": 0.2, "grad_norm": 0.6567917585757848, "learning_rate": 3.71764774267648e-05, "loss": 0.254, "step": 1786 }, { "epoch": 0.2, "grad_norm": 0.5453203173407706, "learning_rate": 3.717283198614654e-05, "loss": 0.1697, "step": 1787 }, { "epoch": 0.2, "grad_norm": 0.5568532169675561, "learning_rate": 3.716918437270932e-05, "loss": 0.1702, "step": 1788 }, { "epoch": 0.2, "grad_norm": 0.4480953433120678, "learning_rate": 3.7165534586914674e-05, "loss": 0.1469, "step": 1789 }, { "epoch": 0.2, "grad_norm": 0.5939011632334655, "learning_rate": 3.7161882629224386e-05, "loss": 0.1693, "step": 1790 }, { "epoch": 0.2, "grad_norm": 0.5252234040362398, "learning_rate": 3.715822850010053e-05, "loss": 0.1802, "step": 1791 }, { "epoch": 0.2, "grad_norm": 0.6148327682270539, "learning_rate": 3.7154572200005446e-05, "loss": 0.1868, "step": 1792 }, { "epoch": 0.2, "grad_norm": 0.5877340368036394, "learning_rate": 3.7150913729401754e-05, "loss": 0.2021, "step": 1793 }, { "epoch": 0.2, "grad_norm": 0.6031817404586958, "learning_rate": 3.714725308875236e-05, "loss": 0.2198, "step": 1794 }, { "epoch": 0.2, "grad_norm": 0.5666927533830229, "learning_rate": 3.714359027852041e-05, "loss": 0.1631, "step": 1795 }, { "epoch": 0.2, "grad_norm": 0.5845618821239438, "learning_rate": 3.713992529916936e-05, "loss": 0.1885, "step": 1796 }, { "epoch": 0.2, "grad_norm": 0.4468047920508807, "learning_rate": 3.713625815116293e-05, "loss": 0.1596, "step": 1797 }, { "epoch": 0.2, "grad_norm": 0.6655990326880111, "learning_rate": 3.7132588834965104e-05, "loss": 0.2263, "step": 1798 }, { "epoch": 0.2, "grad_norm": 0.42030102668049585, "learning_rate": 3.712891735104015e-05, "loss": 0.1408, "step": 1799 }, { "epoch": 0.2, "grad_norm": 0.48522803348345994, "learning_rate": 3.712524369985262e-05, "loss": 0.1396, "step": 1800 }, { "epoch": 0.2, "grad_norm": 0.5247253687394966, "learning_rate": 3.712156788186731e-05, "loss": 0.1711, "step": 1801 }, { "epoch": 0.2, "grad_norm": 0.46044394367711705, "learning_rate": 3.711788989754931e-05, "loss": 0.1771, "step": 1802 }, { "epoch": 0.2, "grad_norm": 0.6248716222523348, "learning_rate": 3.711420974736399e-05, "loss": 0.1769, "step": 1803 }, { "epoch": 0.2, "grad_norm": 0.5399353314018481, "learning_rate": 3.711052743177699e-05, "loss": 0.1872, "step": 1804 }, { "epoch": 0.2, "grad_norm": 0.5948829883633042, "learning_rate": 3.7106842951254216e-05, "loss": 0.2361, "step": 1805 }, { "epoch": 0.2, "grad_norm": 0.5504957500747597, "learning_rate": 3.710315630626185e-05, "loss": 0.1893, "step": 1806 }, { "epoch": 0.2, "grad_norm": 0.5724746270366653, "learning_rate": 3.7099467497266354e-05, "loss": 0.1768, "step": 1807 }, { "epoch": 0.2, "grad_norm": 0.4662508144831766, "learning_rate": 3.7095776524734464e-05, "loss": 0.1377, "step": 1808 }, { "epoch": 0.2, "grad_norm": 0.5384959962123979, "learning_rate": 3.7092083389133174e-05, "loss": 0.1632, "step": 1809 }, { "epoch": 0.2, "grad_norm": 0.46400492982156966, "learning_rate": 3.7088388090929776e-05, "loss": 0.1385, "step": 1810 }, { "epoch": 0.2, "grad_norm": 0.6797547142732374, "learning_rate": 3.708469063059181e-05, "loss": 0.1942, "step": 1811 }, { "epoch": 0.2, "grad_norm": 0.6242689625736262, "learning_rate": 3.708099100858712e-05, "loss": 0.1859, "step": 1812 }, { "epoch": 0.2, "grad_norm": 0.5202288425768358, "learning_rate": 3.70772892253838e-05, "loss": 0.1377, "step": 1813 }, { "epoch": 0.2, "grad_norm": 0.5689157862651102, "learning_rate": 3.7073585281450206e-05, "loss": 0.1725, "step": 1814 }, { "epoch": 0.2, "grad_norm": 0.4489943233972253, "learning_rate": 3.706987917725501e-05, "loss": 0.1281, "step": 1815 }, { "epoch": 0.2, "grad_norm": 0.4661901513313358, "learning_rate": 3.706617091326712e-05, "loss": 0.1585, "step": 1816 }, { "epoch": 0.2, "grad_norm": 0.5624394914083349, "learning_rate": 3.7062460489955736e-05, "loss": 0.16, "step": 1817 }, { "epoch": 0.2, "grad_norm": 0.5653668040883995, "learning_rate": 3.705874790779032e-05, "loss": 0.1983, "step": 1818 }, { "epoch": 0.2, "grad_norm": 0.7204777641783155, "learning_rate": 3.705503316724062e-05, "loss": 0.2443, "step": 1819 }, { "epoch": 0.2, "grad_norm": 0.512092570269932, "learning_rate": 3.705131626877664e-05, "loss": 0.1823, "step": 1820 }, { "epoch": 0.2, "grad_norm": 0.4458199504228133, "learning_rate": 3.704759721286866e-05, "loss": 0.155, "step": 1821 }, { "epoch": 0.2, "grad_norm": 0.5639042280905548, "learning_rate": 3.7043875999987254e-05, "loss": 0.1822, "step": 1822 }, { "epoch": 0.2, "grad_norm": 0.4959610588857517, "learning_rate": 3.704015263060325e-05, "loss": 0.1723, "step": 1823 }, { "epoch": 0.2, "grad_norm": 0.5557884251450709, "learning_rate": 3.7036427105187754e-05, "loss": 0.1898, "step": 1824 }, { "epoch": 0.2, "grad_norm": 0.5829874670929166, "learning_rate": 3.703269942421214e-05, "loss": 0.2062, "step": 1825 }, { "epoch": 0.2, "grad_norm": 0.5341867728870532, "learning_rate": 3.7028969588148056e-05, "loss": 0.1755, "step": 1826 }, { "epoch": 0.2, "grad_norm": 0.5358242960781303, "learning_rate": 3.7025237597467436e-05, "loss": 0.1705, "step": 1827 }, { "epoch": 0.2, "grad_norm": 0.6544398365126588, "learning_rate": 3.702150345264247e-05, "loss": 0.2301, "step": 1828 }, { "epoch": 0.2, "grad_norm": 0.631420653475568, "learning_rate": 3.701776715414562e-05, "loss": 0.2188, "step": 1829 }, { "epoch": 0.2, "grad_norm": 0.49352888455739213, "learning_rate": 3.701402870244963e-05, "loss": 0.1504, "step": 1830 }, { "epoch": 0.2, "grad_norm": 0.6030738472864781, "learning_rate": 3.701028809802752e-05, "loss": 0.1979, "step": 1831 }, { "epoch": 0.2, "grad_norm": 0.5364972597338402, "learning_rate": 3.700654534135257e-05, "loss": 0.1594, "step": 1832 }, { "epoch": 0.2, "grad_norm": 0.4877486414047239, "learning_rate": 3.7002800432898344e-05, "loss": 0.1747, "step": 1833 }, { "epoch": 0.2, "grad_norm": 0.5407351651707459, "learning_rate": 3.699905337313866e-05, "loss": 0.1424, "step": 1834 }, { "epoch": 0.2, "grad_norm": 0.558463875137577, "learning_rate": 3.6995304162547634e-05, "loss": 0.163, "step": 1835 }, { "epoch": 0.2, "grad_norm": 0.5606333702423673, "learning_rate": 3.699155280159964e-05, "loss": 0.16, "step": 1836 }, { "epoch": 0.2, "grad_norm": 0.6374742671036454, "learning_rate": 3.698779929076931e-05, "loss": 0.2115, "step": 1837 }, { "epoch": 0.2, "grad_norm": 0.5123410990403009, "learning_rate": 3.698404363053158e-05, "loss": 0.1207, "step": 1838 }, { "epoch": 0.2, "grad_norm": 0.6002579452452738, "learning_rate": 3.6980285821361636e-05, "loss": 0.2014, "step": 1839 }, { "epoch": 0.2, "grad_norm": 0.5124499796671309, "learning_rate": 3.697652586373493e-05, "loss": 0.1419, "step": 1840 }, { "epoch": 0.2, "grad_norm": 0.4431069281353897, "learning_rate": 3.69727637581272e-05, "loss": 0.1213, "step": 1841 }, { "epoch": 0.2, "grad_norm": 0.5455442586456671, "learning_rate": 3.696899950501447e-05, "loss": 0.1432, "step": 1842 }, { "epoch": 0.2, "grad_norm": 0.5126649102779077, "learning_rate": 3.696523310487299e-05, "loss": 0.1456, "step": 1843 }, { "epoch": 0.2, "grad_norm": 0.6071043503194643, "learning_rate": 3.6961464558179333e-05, "loss": 0.2241, "step": 1844 }, { "epoch": 0.2, "grad_norm": 0.5503258953381277, "learning_rate": 3.695769386541031e-05, "loss": 0.1799, "step": 1845 }, { "epoch": 0.2, "grad_norm": 0.5988472683528538, "learning_rate": 3.695392102704302e-05, "loss": 0.1783, "step": 1846 }, { "epoch": 0.2, "grad_norm": 0.6539689338200037, "learning_rate": 3.695014604355482e-05, "loss": 0.1655, "step": 1847 }, { "epoch": 0.2, "grad_norm": 0.5720937946119732, "learning_rate": 3.694636891542334e-05, "loss": 0.1653, "step": 1848 }, { "epoch": 0.2, "grad_norm": 0.4841327061235579, "learning_rate": 3.694258964312649e-05, "loss": 0.1502, "step": 1849 }, { "epoch": 0.2, "grad_norm": 0.6239236145889462, "learning_rate": 3.693880822714247e-05, "loss": 0.2152, "step": 1850 }, { "epoch": 0.2, "grad_norm": 0.5698756805168184, "learning_rate": 3.6935024667949694e-05, "loss": 0.226, "step": 1851 }, { "epoch": 0.2, "grad_norm": 0.46871614333907585, "learning_rate": 3.693123896602692e-05, "loss": 0.1581, "step": 1852 }, { "epoch": 0.2, "grad_norm": 0.4838594338493423, "learning_rate": 3.6927451121853104e-05, "loss": 0.1349, "step": 1853 }, { "epoch": 0.2, "grad_norm": 0.5893581993968948, "learning_rate": 3.692366113590754e-05, "loss": 0.2224, "step": 1854 }, { "epoch": 0.2, "grad_norm": 0.6799030400669556, "learning_rate": 3.6919869008669735e-05, "loss": 0.2278, "step": 1855 }, { "epoch": 0.2, "grad_norm": 0.4517636072403157, "learning_rate": 3.691607474061951e-05, "loss": 0.1279, "step": 1856 }, { "epoch": 0.2, "grad_norm": 0.7196763744479866, "learning_rate": 3.691227833223693e-05, "loss": 0.1242, "step": 1857 }, { "epoch": 0.2, "grad_norm": 0.5461823027363832, "learning_rate": 3.690847978400236e-05, "loss": 0.1869, "step": 1858 }, { "epoch": 0.2, "grad_norm": 0.49258810724464014, "learning_rate": 3.69046790963964e-05, "loss": 0.1564, "step": 1859 }, { "epoch": 0.2, "grad_norm": 0.6603150938446183, "learning_rate": 3.690087626989994e-05, "loss": 0.2154, "step": 1860 }, { "epoch": 0.2, "grad_norm": 0.5477166919305028, "learning_rate": 3.6897071304994145e-05, "loss": 0.1551, "step": 1861 }, { "epoch": 0.2, "grad_norm": 0.4150405704246504, "learning_rate": 3.689326420216044e-05, "loss": 0.1295, "step": 1862 }, { "epoch": 0.2, "grad_norm": 0.4571953503344737, "learning_rate": 3.688945496188052e-05, "loss": 0.1285, "step": 1863 }, { "epoch": 0.2, "grad_norm": 0.4778201596211847, "learning_rate": 3.6885643584636366e-05, "loss": 0.1578, "step": 1864 }, { "epoch": 0.2, "grad_norm": 0.5003209276222216, "learning_rate": 3.688183007091021e-05, "loss": 0.1817, "step": 1865 }, { "epoch": 0.2, "grad_norm": 0.4884668226464562, "learning_rate": 3.6878014421184565e-05, "loss": 0.1615, "step": 1866 }, { "epoch": 0.21, "grad_norm": 0.5215038624526275, "learning_rate": 3.687419663594221e-05, "loss": 0.1533, "step": 1867 }, { "epoch": 0.21, "grad_norm": 0.5356753817691832, "learning_rate": 3.68703767156662e-05, "loss": 0.1841, "step": 1868 }, { "epoch": 0.21, "grad_norm": 0.4813866569892626, "learning_rate": 3.686655466083986e-05, "loss": 0.1092, "step": 1869 }, { "epoch": 0.21, "grad_norm": 0.515183722353877, "learning_rate": 3.6862730471946766e-05, "loss": 0.1662, "step": 1870 }, { "epoch": 0.21, "grad_norm": 0.527457726488649, "learning_rate": 3.685890414947079e-05, "loss": 0.1326, "step": 1871 }, { "epoch": 0.21, "grad_norm": 0.48394821191988074, "learning_rate": 3.685507569389606e-05, "loss": 0.1471, "step": 1872 }, { "epoch": 0.21, "grad_norm": 0.5079871830900327, "learning_rate": 3.685124510570699e-05, "loss": 0.1696, "step": 1873 }, { "epoch": 0.21, "grad_norm": 0.43550581802703897, "learning_rate": 3.6847412385388236e-05, "loss": 0.1207, "step": 1874 }, { "epoch": 0.21, "grad_norm": 0.5934227995535719, "learning_rate": 3.684357753342474e-05, "loss": 0.1493, "step": 1875 }, { "epoch": 0.21, "grad_norm": 0.5339223182410467, "learning_rate": 3.683974055030172e-05, "loss": 0.2084, "step": 1876 }, { "epoch": 0.21, "grad_norm": 0.5214670014076911, "learning_rate": 3.683590143650465e-05, "loss": 0.1462, "step": 1877 }, { "epoch": 0.21, "grad_norm": 0.6247407852506875, "learning_rate": 3.6832060192519286e-05, "loss": 0.1922, "step": 1878 }, { "epoch": 0.21, "grad_norm": 0.5663147424902499, "learning_rate": 3.682821681883164e-05, "loss": 0.1726, "step": 1879 }, { "epoch": 0.21, "grad_norm": 0.5244185804460151, "learning_rate": 3.6824371315928e-05, "loss": 0.1455, "step": 1880 }, { "epoch": 0.21, "grad_norm": 0.5249089866040416, "learning_rate": 3.6820523684294934e-05, "loss": 0.1265, "step": 1881 }, { "epoch": 0.21, "grad_norm": 0.5999200370274578, "learning_rate": 3.681667392441926e-05, "loss": 0.1754, "step": 1882 }, { "epoch": 0.21, "grad_norm": 0.6153279551225889, "learning_rate": 3.6812822036788085e-05, "loss": 0.1729, "step": 1883 }, { "epoch": 0.21, "grad_norm": 0.5995136147242454, "learning_rate": 3.680896802188876e-05, "loss": 0.1877, "step": 1884 }, { "epoch": 0.21, "grad_norm": 0.4937274547451559, "learning_rate": 3.680511188020893e-05, "loss": 0.1444, "step": 1885 }, { "epoch": 0.21, "grad_norm": 0.6131234178512956, "learning_rate": 3.6801253612236506e-05, "loss": 0.2362, "step": 1886 }, { "epoch": 0.21, "grad_norm": 0.5421036339354265, "learning_rate": 3.679739321845965e-05, "loss": 0.1697, "step": 1887 }, { "epoch": 0.21, "grad_norm": 0.45804696637575143, "learning_rate": 3.679353069936681e-05, "loss": 0.1897, "step": 1888 }, { "epoch": 0.21, "grad_norm": 0.5354349844454791, "learning_rate": 3.678966605544669e-05, "loss": 0.1602, "step": 1889 }, { "epoch": 0.21, "grad_norm": 0.5799963793970424, "learning_rate": 3.678579928718827e-05, "loss": 0.1542, "step": 1890 }, { "epoch": 0.21, "grad_norm": 0.4359307244160548, "learning_rate": 3.678193039508081e-05, "loss": 0.1438, "step": 1891 }, { "epoch": 0.21, "grad_norm": 0.47411293784923936, "learning_rate": 3.6778059379613815e-05, "loss": 0.131, "step": 1892 }, { "epoch": 0.21, "grad_norm": 0.5953754211574861, "learning_rate": 3.6774186241277084e-05, "loss": 0.1793, "step": 1893 }, { "epoch": 0.21, "grad_norm": 0.499161135792009, "learning_rate": 3.6770310980560654e-05, "loss": 0.1456, "step": 1894 }, { "epoch": 0.21, "grad_norm": 0.6174845403210941, "learning_rate": 3.676643359795487e-05, "loss": 0.2249, "step": 1895 }, { "epoch": 0.21, "grad_norm": 0.49905216261274066, "learning_rate": 3.676255409395031e-05, "loss": 0.1721, "step": 1896 }, { "epoch": 0.21, "grad_norm": 0.7396950883560727, "learning_rate": 3.6758672469037834e-05, "loss": 0.2002, "step": 1897 }, { "epoch": 0.21, "grad_norm": 0.5055737075440553, "learning_rate": 3.675478872370858e-05, "loss": 0.1593, "step": 1898 }, { "epoch": 0.21, "grad_norm": 0.4943929638594947, "learning_rate": 3.675090285845393e-05, "loss": 0.1387, "step": 1899 }, { "epoch": 0.21, "grad_norm": 0.5991777488079773, "learning_rate": 3.674701487376557e-05, "loss": 0.1728, "step": 1900 }, { "epoch": 0.21, "grad_norm": 0.6470686010078436, "learning_rate": 3.6743124770135416e-05, "loss": 0.1846, "step": 1901 }, { "epoch": 0.21, "grad_norm": 0.5272540828624529, "learning_rate": 3.673923254805566e-05, "loss": 0.1325, "step": 1902 }, { "epoch": 0.21, "grad_norm": 0.5076941132096551, "learning_rate": 3.673533820801881e-05, "loss": 0.1435, "step": 1903 }, { "epoch": 0.21, "grad_norm": 0.5036932620807516, "learning_rate": 3.6731441750517566e-05, "loss": 0.1402, "step": 1904 }, { "epoch": 0.21, "grad_norm": 0.571876263226224, "learning_rate": 3.6727543176044944e-05, "loss": 0.1761, "step": 1905 }, { "epoch": 0.21, "grad_norm": 0.5024522904383996, "learning_rate": 3.672364248509422e-05, "loss": 0.1502, "step": 1906 }, { "epoch": 0.21, "grad_norm": 0.5882948439447192, "learning_rate": 3.671973967815894e-05, "loss": 0.1908, "step": 1907 }, { "epoch": 0.21, "grad_norm": 0.4973732675223558, "learning_rate": 3.67158347557329e-05, "loss": 0.2043, "step": 1908 }, { "epoch": 0.21, "grad_norm": 0.5413165581021107, "learning_rate": 3.671192771831019e-05, "loss": 0.1678, "step": 1909 }, { "epoch": 0.21, "grad_norm": 0.4680968820864525, "learning_rate": 3.670801856638514e-05, "loss": 0.1414, "step": 1910 }, { "epoch": 0.21, "grad_norm": 0.4835948198396006, "learning_rate": 3.670410730045238e-05, "loss": 0.1576, "step": 1911 }, { "epoch": 0.21, "grad_norm": 0.5014166432721385, "learning_rate": 3.6700193921006766e-05, "loss": 0.1481, "step": 1912 }, { "epoch": 0.21, "grad_norm": 0.5560035138019156, "learning_rate": 3.669627842854346e-05, "loss": 0.175, "step": 1913 }, { "epoch": 0.21, "grad_norm": 0.6116576357892887, "learning_rate": 3.669236082355787e-05, "loss": 0.2143, "step": 1914 }, { "epoch": 0.21, "grad_norm": 0.4306543050685672, "learning_rate": 3.668844110654568e-05, "loss": 0.1309, "step": 1915 }, { "epoch": 0.21, "grad_norm": 0.5837273686042967, "learning_rate": 3.668451927800283e-05, "loss": 0.1632, "step": 1916 }, { "epoch": 0.21, "grad_norm": 0.41713692433543653, "learning_rate": 3.668059533842556e-05, "loss": 0.1335, "step": 1917 }, { "epoch": 0.21, "grad_norm": 0.3997542930489403, "learning_rate": 3.667666928831032e-05, "loss": 0.1159, "step": 1918 }, { "epoch": 0.21, "grad_norm": 0.6479163316028705, "learning_rate": 3.667274112815387e-05, "loss": 0.248, "step": 1919 }, { "epoch": 0.21, "grad_norm": 0.6217802560221917, "learning_rate": 3.666881085845324e-05, "loss": 0.1791, "step": 1920 }, { "epoch": 0.21, "grad_norm": 0.43443352764664045, "learning_rate": 3.666487847970571e-05, "loss": 0.1211, "step": 1921 }, { "epoch": 0.21, "grad_norm": 0.47964265408707923, "learning_rate": 3.6660943992408817e-05, "loss": 0.1367, "step": 1922 }, { "epoch": 0.21, "grad_norm": 0.5425305691030274, "learning_rate": 3.665700739706038e-05, "loss": 0.1529, "step": 1923 }, { "epoch": 0.21, "grad_norm": 0.5446673383280621, "learning_rate": 3.66530686941585e-05, "loss": 0.1331, "step": 1924 }, { "epoch": 0.21, "grad_norm": 0.5984780638968301, "learning_rate": 3.664912788420151e-05, "loss": 0.1877, "step": 1925 }, { "epoch": 0.21, "grad_norm": 0.5801971005277564, "learning_rate": 3.664518496768802e-05, "loss": 0.1695, "step": 1926 }, { "epoch": 0.21, "grad_norm": 0.5705829658360405, "learning_rate": 3.664123994511695e-05, "loss": 0.175, "step": 1927 }, { "epoch": 0.21, "grad_norm": 0.5007147746368455, "learning_rate": 3.663729281698741e-05, "loss": 0.1513, "step": 1928 }, { "epoch": 0.21, "grad_norm": 0.6023429168022518, "learning_rate": 3.6633343583798836e-05, "loss": 0.1747, "step": 1929 }, { "epoch": 0.21, "grad_norm": 0.5204841651340057, "learning_rate": 3.662939224605091e-05, "loss": 0.1446, "step": 1930 }, { "epoch": 0.21, "grad_norm": 0.5331089090242225, "learning_rate": 3.6625438804243574e-05, "loss": 0.1453, "step": 1931 }, { "epoch": 0.21, "grad_norm": 0.4469172857858662, "learning_rate": 3.6621483258877055e-05, "loss": 0.1362, "step": 1932 }, { "epoch": 0.21, "grad_norm": 0.5490063686696296, "learning_rate": 3.661752561045182e-05, "loss": 0.1558, "step": 1933 }, { "epoch": 0.21, "grad_norm": 0.5997796826381633, "learning_rate": 3.6613565859468626e-05, "loss": 0.2151, "step": 1934 }, { "epoch": 0.21, "grad_norm": 0.5764123447144598, "learning_rate": 3.6609604006428486e-05, "loss": 0.2078, "step": 1935 }, { "epoch": 0.21, "grad_norm": 0.5391405021513669, "learning_rate": 3.660564005183268e-05, "loss": 0.1846, "step": 1936 }, { "epoch": 0.21, "grad_norm": 0.4838369827038705, "learning_rate": 3.660167399618275e-05, "loss": 0.1449, "step": 1937 }, { "epoch": 0.21, "grad_norm": 0.5380862144289499, "learning_rate": 3.659770583998051e-05, "loss": 0.189, "step": 1938 }, { "epoch": 0.21, "grad_norm": 0.6434882076119558, "learning_rate": 3.659373558372803e-05, "loss": 0.2672, "step": 1939 }, { "epoch": 0.21, "grad_norm": 0.5245802631949871, "learning_rate": 3.658976322792766e-05, "loss": 0.22, "step": 1940 }, { "epoch": 0.21, "grad_norm": 0.460259796046616, "learning_rate": 3.658578877308201e-05, "loss": 0.1327, "step": 1941 }, { "epoch": 0.21, "grad_norm": 0.5019063371159829, "learning_rate": 3.658181221969395e-05, "loss": 0.167, "step": 1942 }, { "epoch": 0.21, "grad_norm": 0.4179564322679788, "learning_rate": 3.657783356826662e-05, "loss": 0.1643, "step": 1943 }, { "epoch": 0.21, "grad_norm": 0.5073032466603314, "learning_rate": 3.657385281930343e-05, "loss": 0.1379, "step": 1944 }, { "epoch": 0.21, "grad_norm": 0.4739893249887038, "learning_rate": 3.656986997330804e-05, "loss": 0.1877, "step": 1945 }, { "epoch": 0.21, "grad_norm": 0.45973387862208753, "learning_rate": 3.65658850307844e-05, "loss": 0.1388, "step": 1946 }, { "epoch": 0.21, "grad_norm": 0.570273254298344, "learning_rate": 3.656189799223669e-05, "loss": 0.1568, "step": 1947 }, { "epoch": 0.21, "grad_norm": 0.6250386702972167, "learning_rate": 3.65579088581694e-05, "loss": 0.1904, "step": 1948 }, { "epoch": 0.21, "grad_norm": 0.47787019447407003, "learning_rate": 3.6553917629087246e-05, "loss": 0.1496, "step": 1949 }, { "epoch": 0.21, "grad_norm": 0.47813375377744693, "learning_rate": 3.6549924305495225e-05, "loss": 0.1475, "step": 1950 }, { "epoch": 0.21, "grad_norm": 0.6565636838805152, "learning_rate": 3.654592888789861e-05, "loss": 0.1703, "step": 1951 }, { "epoch": 0.21, "grad_norm": 0.6033249340825082, "learning_rate": 3.6541931376802906e-05, "loss": 0.1664, "step": 1952 }, { "epoch": 0.21, "grad_norm": 0.4774719066244637, "learning_rate": 3.653793177271393e-05, "loss": 0.1291, "step": 1953 }, { "epoch": 0.21, "grad_norm": 0.4556537965483882, "learning_rate": 3.653393007613771e-05, "loss": 0.1302, "step": 1954 }, { "epoch": 0.21, "grad_norm": 0.6308377619486693, "learning_rate": 3.65299262875806e-05, "loss": 0.214, "step": 1955 }, { "epoch": 0.21, "grad_norm": 0.5604642481499463, "learning_rate": 3.652592040754917e-05, "loss": 0.1741, "step": 1956 }, { "epoch": 0.21, "grad_norm": 0.46423169721782687, "learning_rate": 3.652191243655025e-05, "loss": 0.1395, "step": 1957 }, { "epoch": 0.22, "grad_norm": 0.5776067421262083, "learning_rate": 3.651790237509098e-05, "loss": 0.2144, "step": 1958 }, { "epoch": 0.22, "grad_norm": 0.6092808234972482, "learning_rate": 3.651389022367874e-05, "loss": 0.1702, "step": 1959 }, { "epoch": 0.22, "grad_norm": 0.5484238897303066, "learning_rate": 3.650987598282116e-05, "loss": 0.1499, "step": 1960 }, { "epoch": 0.22, "grad_norm": 0.5255099590022039, "learning_rate": 3.650585965302614e-05, "loss": 0.1514, "step": 1961 }, { "epoch": 0.22, "grad_norm": 0.5224774794767366, "learning_rate": 3.6501841234801886e-05, "loss": 0.1648, "step": 1962 }, { "epoch": 0.22, "grad_norm": 0.558116308659357, "learning_rate": 3.64978207286568e-05, "loss": 0.168, "step": 1963 }, { "epoch": 0.22, "grad_norm": 0.5608751884002743, "learning_rate": 3.649379813509961e-05, "loss": 0.1614, "step": 1964 }, { "epoch": 0.22, "grad_norm": 0.5931777455118002, "learning_rate": 3.648977345463926e-05, "loss": 0.1398, "step": 1965 }, { "epoch": 0.22, "grad_norm": 0.5711772320182702, "learning_rate": 3.648574668778499e-05, "loss": 0.1986, "step": 1966 }, { "epoch": 0.22, "grad_norm": 0.5072716987747973, "learning_rate": 3.6481717835046286e-05, "loss": 0.1617, "step": 1967 }, { "epoch": 0.22, "grad_norm": 0.5965541576443644, "learning_rate": 3.647768689693291e-05, "loss": 0.1623, "step": 1968 }, { "epoch": 0.22, "grad_norm": 0.6849475207726617, "learning_rate": 3.647365387395488e-05, "loss": 0.2328, "step": 1969 }, { "epoch": 0.22, "grad_norm": 0.5811336431633901, "learning_rate": 3.646961876662248e-05, "loss": 0.1932, "step": 1970 }, { "epoch": 0.22, "grad_norm": 0.4679220994677543, "learning_rate": 3.646558157544626e-05, "loss": 0.1411, "step": 1971 }, { "epoch": 0.22, "grad_norm": 0.4810113711569421, "learning_rate": 3.6461542300937035e-05, "loss": 0.1718, "step": 1972 }, { "epoch": 0.22, "grad_norm": 0.5762250972671092, "learning_rate": 3.645750094360588e-05, "loss": 0.221, "step": 1973 }, { "epoch": 0.22, "grad_norm": 0.5818175877356747, "learning_rate": 3.645345750396412e-05, "loss": 0.1829, "step": 1974 }, { "epoch": 0.22, "grad_norm": 0.5473105739565279, "learning_rate": 3.6449411982523376e-05, "loss": 0.1891, "step": 1975 }, { "epoch": 0.22, "grad_norm": 0.6094371323821574, "learning_rate": 3.64453643797955e-05, "loss": 0.1784, "step": 1976 }, { "epoch": 0.22, "grad_norm": 0.4716501957300015, "learning_rate": 3.644131469629264e-05, "loss": 0.1588, "step": 1977 }, { "epoch": 0.22, "grad_norm": 0.4981673898932009, "learning_rate": 3.643726293252717e-05, "loss": 0.1199, "step": 1978 }, { "epoch": 0.22, "grad_norm": 0.4690944359852931, "learning_rate": 3.6433209089011745e-05, "loss": 0.1416, "step": 1979 }, { "epoch": 0.22, "grad_norm": 0.5811417201921631, "learning_rate": 3.642915316625929e-05, "loss": 0.1804, "step": 1980 }, { "epoch": 0.22, "grad_norm": 0.48637858669043826, "learning_rate": 3.642509516478299e-05, "loss": 0.1621, "step": 1981 }, { "epoch": 0.22, "grad_norm": 0.6521297720642567, "learning_rate": 3.642103508509629e-05, "loss": 0.2048, "step": 1982 }, { "epoch": 0.22, "grad_norm": 0.5445678545389675, "learning_rate": 3.641697292771289e-05, "loss": 0.1785, "step": 1983 }, { "epoch": 0.22, "grad_norm": 0.4149623306252465, "learning_rate": 3.641290869314676e-05, "loss": 0.1299, "step": 1984 }, { "epoch": 0.22, "grad_norm": 0.7210705655256837, "learning_rate": 3.6408842381912144e-05, "loss": 0.226, "step": 1985 }, { "epoch": 0.22, "grad_norm": 0.49010246903560933, "learning_rate": 3.640477399452354e-05, "loss": 0.1528, "step": 1986 }, { "epoch": 0.22, "grad_norm": 0.46981173117928565, "learning_rate": 3.640070353149569e-05, "loss": 0.1667, "step": 1987 }, { "epoch": 0.22, "grad_norm": 0.5661934288346968, "learning_rate": 3.639663099334363e-05, "loss": 0.1354, "step": 1988 }, { "epoch": 0.22, "grad_norm": 0.43644088886272214, "learning_rate": 3.639255638058264e-05, "loss": 0.1206, "step": 1989 }, { "epoch": 0.22, "grad_norm": 0.5508694409045998, "learning_rate": 3.6388479693728266e-05, "loss": 0.1247, "step": 1990 }, { "epoch": 0.22, "grad_norm": 0.5019408346964781, "learning_rate": 3.638440093329632e-05, "loss": 0.1689, "step": 1991 }, { "epoch": 0.22, "grad_norm": 0.5842317548407773, "learning_rate": 3.638032009980286e-05, "loss": 0.1647, "step": 1992 }, { "epoch": 0.22, "grad_norm": 0.7121271094595126, "learning_rate": 3.637623719376424e-05, "loss": 0.2153, "step": 1993 }, { "epoch": 0.22, "grad_norm": 0.4787442031190676, "learning_rate": 3.637215221569705e-05, "loss": 0.1456, "step": 1994 }, { "epoch": 0.22, "grad_norm": 0.5565819424933913, "learning_rate": 3.6368065166118136e-05, "loss": 0.1543, "step": 1995 }, { "epoch": 0.22, "grad_norm": 0.500342609326308, "learning_rate": 3.636397604554463e-05, "loss": 0.1457, "step": 1996 }, { "epoch": 0.22, "grad_norm": 0.5064763316737215, "learning_rate": 3.635988485449391e-05, "loss": 0.1287, "step": 1997 }, { "epoch": 0.22, "grad_norm": 0.4277062951194384, "learning_rate": 3.635579159348362e-05, "loss": 0.1055, "step": 1998 }, { "epoch": 0.22, "grad_norm": 0.5991639710296278, "learning_rate": 3.635169626303168e-05, "loss": 0.1786, "step": 1999 }, { "epoch": 0.22, "grad_norm": 0.6257932873733754, "learning_rate": 3.634759886365623e-05, "loss": 0.1922, "step": 2000 }, { "epoch": 0.22, "grad_norm": 0.5257618666727696, "learning_rate": 3.634349939587573e-05, "loss": 0.1747, "step": 2001 }, { "epoch": 0.22, "grad_norm": 0.4432636107147846, "learning_rate": 3.633939786020884e-05, "loss": 0.1468, "step": 2002 }, { "epoch": 0.22, "grad_norm": 0.4857121898927471, "learning_rate": 3.633529425717454e-05, "loss": 0.1314, "step": 2003 }, { "epoch": 0.22, "grad_norm": 0.5365739407705447, "learning_rate": 3.633118858729203e-05, "loss": 0.1808, "step": 2004 }, { "epoch": 0.22, "grad_norm": 0.5266604765345163, "learning_rate": 3.63270808510808e-05, "loss": 0.159, "step": 2005 }, { "epoch": 0.22, "grad_norm": 0.5027939696658185, "learning_rate": 3.632297104906057e-05, "loss": 0.1739, "step": 2006 }, { "epoch": 0.22, "grad_norm": 0.4696130912911679, "learning_rate": 3.6318859181751346e-05, "loss": 0.1086, "step": 2007 }, { "epoch": 0.22, "grad_norm": 0.46110300551832906, "learning_rate": 3.63147452496734e-05, "loss": 0.1343, "step": 2008 }, { "epoch": 0.22, "grad_norm": 0.5443645895434426, "learning_rate": 3.631062925334723e-05, "loss": 0.1487, "step": 2009 }, { "epoch": 0.22, "grad_norm": 0.4930222864597726, "learning_rate": 3.6306511193293636e-05, "loss": 0.1421, "step": 2010 }, { "epoch": 0.22, "grad_norm": 0.4832259978398586, "learning_rate": 3.630239107003366e-05, "loss": 0.1283, "step": 2011 }, { "epoch": 0.22, "grad_norm": 0.6334396833505439, "learning_rate": 3.629826888408861e-05, "loss": 0.2047, "step": 2012 }, { "epoch": 0.22, "grad_norm": 0.7640047640527114, "learning_rate": 3.629414463598005e-05, "loss": 0.3396, "step": 2013 }, { "epoch": 0.22, "grad_norm": 0.5815493745875413, "learning_rate": 3.629001832622979e-05, "loss": 0.1834, "step": 2014 }, { "epoch": 0.22, "grad_norm": 0.5783466738922071, "learning_rate": 3.628588995535993e-05, "loss": 0.2054, "step": 2015 }, { "epoch": 0.22, "grad_norm": 0.4736537024226936, "learning_rate": 3.628175952389283e-05, "loss": 0.1625, "step": 2016 }, { "epoch": 0.22, "grad_norm": 0.5274918242358799, "learning_rate": 3.627762703235108e-05, "loss": 0.1682, "step": 2017 }, { "epoch": 0.22, "grad_norm": 0.46141825674875125, "learning_rate": 3.627349248125757e-05, "loss": 0.1439, "step": 2018 }, { "epoch": 0.22, "grad_norm": 0.4703071163034107, "learning_rate": 3.626935587113541e-05, "loss": 0.1588, "step": 2019 }, { "epoch": 0.22, "grad_norm": 0.3911449976716171, "learning_rate": 3.6265217202508006e-05, "loss": 0.1258, "step": 2020 }, { "epoch": 0.22, "grad_norm": 0.5918724590919682, "learning_rate": 3.6261076475899e-05, "loss": 0.1756, "step": 2021 }, { "epoch": 0.22, "grad_norm": 0.5198404639610035, "learning_rate": 3.625693369183231e-05, "loss": 0.1276, "step": 2022 }, { "epoch": 0.22, "grad_norm": 0.5160302390397821, "learning_rate": 3.62527888508321e-05, "loss": 0.1406, "step": 2023 }, { "epoch": 0.22, "grad_norm": 0.5973982409889064, "learning_rate": 3.624864195342281e-05, "loss": 0.1813, "step": 2024 }, { "epoch": 0.22, "grad_norm": 0.5722754676154685, "learning_rate": 3.624449300012914e-05, "loss": 0.163, "step": 2025 }, { "epoch": 0.22, "grad_norm": 0.6621744129567309, "learning_rate": 3.624034199147602e-05, "loss": 0.2156, "step": 2026 }, { "epoch": 0.22, "grad_norm": 0.5930819015199781, "learning_rate": 3.623618892798868e-05, "loss": 0.1678, "step": 2027 }, { "epoch": 0.22, "grad_norm": 0.5742386535516691, "learning_rate": 3.623203381019259e-05, "loss": 0.1775, "step": 2028 }, { "epoch": 0.22, "grad_norm": 0.4256503227588008, "learning_rate": 3.6227876638613484e-05, "loss": 0.1227, "step": 2029 }, { "epoch": 0.22, "grad_norm": 0.497967104039795, "learning_rate": 3.6223717413777346e-05, "loss": 0.134, "step": 2030 }, { "epoch": 0.22, "grad_norm": 0.5522639021291449, "learning_rate": 3.6219556136210434e-05, "loss": 0.1778, "step": 2031 }, { "epoch": 0.22, "grad_norm": 0.5163330360949018, "learning_rate": 3.621539280643926e-05, "loss": 0.1758, "step": 2032 }, { "epoch": 0.22, "grad_norm": 0.4713782063741588, "learning_rate": 3.62112274249906e-05, "loss": 0.1292, "step": 2033 }, { "epoch": 0.22, "grad_norm": 0.5164288987633738, "learning_rate": 3.620705999239148e-05, "loss": 0.1958, "step": 2034 }, { "epoch": 0.22, "grad_norm": 0.4637345937652982, "learning_rate": 3.6202890509169195e-05, "loss": 0.1482, "step": 2035 }, { "epoch": 0.22, "grad_norm": 0.44388691777822264, "learning_rate": 3.619871897585129e-05, "loss": 0.1304, "step": 2036 }, { "epoch": 0.22, "grad_norm": 0.3920281956760082, "learning_rate": 3.619454539296557e-05, "loss": 0.1172, "step": 2037 }, { "epoch": 0.22, "grad_norm": 0.5284547476094049, "learning_rate": 3.6190369761040116e-05, "loss": 0.1413, "step": 2038 }, { "epoch": 0.22, "grad_norm": 0.5252998018290177, "learning_rate": 3.6186192080603256e-05, "loss": 0.1383, "step": 2039 }, { "epoch": 0.22, "grad_norm": 0.5471299530826533, "learning_rate": 3.618201235218356e-05, "loss": 0.1619, "step": 2040 }, { "epoch": 0.22, "grad_norm": 0.4663222066280349, "learning_rate": 3.61778305763099e-05, "loss": 0.1459, "step": 2041 }, { "epoch": 0.22, "grad_norm": 0.6423682243938496, "learning_rate": 3.617364675351136e-05, "loss": 0.2014, "step": 2042 }, { "epoch": 0.22, "grad_norm": 0.4189043536315817, "learning_rate": 3.616946088431732e-05, "loss": 0.1168, "step": 2043 }, { "epoch": 0.22, "grad_norm": 0.8351530868998507, "learning_rate": 3.61652729692574e-05, "loss": 0.1388, "step": 2044 }, { "epoch": 0.22, "grad_norm": 0.5602007870811945, "learning_rate": 3.6161083008861464e-05, "loss": 0.1645, "step": 2045 }, { "epoch": 0.22, "grad_norm": 0.4991469556974522, "learning_rate": 3.615689100365968e-05, "loss": 0.1936, "step": 2046 }, { "epoch": 0.22, "grad_norm": 0.6335008433154965, "learning_rate": 3.615269695418243e-05, "loss": 0.1996, "step": 2047 }, { "epoch": 0.22, "grad_norm": 0.4085568807097909, "learning_rate": 3.6148500860960386e-05, "loss": 0.1182, "step": 2048 }, { "epoch": 0.23, "grad_norm": 0.4847407344603472, "learning_rate": 3.6144302724524456e-05, "loss": 0.1196, "step": 2049 }, { "epoch": 0.23, "grad_norm": 0.4259377638101635, "learning_rate": 3.614010254540581e-05, "loss": 0.1464, "step": 2050 }, { "epoch": 0.23, "grad_norm": 0.5063057774557885, "learning_rate": 3.61359003241359e-05, "loss": 0.1807, "step": 2051 }, { "epoch": 0.23, "grad_norm": 0.49745151614196687, "learning_rate": 3.6131696061246405e-05, "loss": 0.1466, "step": 2052 }, { "epoch": 0.23, "grad_norm": 0.5721184603146693, "learning_rate": 3.612748975726928e-05, "loss": 0.194, "step": 2053 }, { "epoch": 0.23, "grad_norm": 0.5097667357918633, "learning_rate": 3.612328141273673e-05, "loss": 0.1703, "step": 2054 }, { "epoch": 0.23, "grad_norm": 0.519969611838875, "learning_rate": 3.6119071028181225e-05, "loss": 0.179, "step": 2055 }, { "epoch": 0.23, "grad_norm": 0.553502442196883, "learning_rate": 3.6114858604135496e-05, "loss": 0.1805, "step": 2056 }, { "epoch": 0.23, "grad_norm": 0.5507481612418532, "learning_rate": 3.6110644141132514e-05, "loss": 0.1616, "step": 2057 }, { "epoch": 0.23, "grad_norm": 0.5562239844421336, "learning_rate": 3.610642763970553e-05, "loss": 0.2282, "step": 2058 }, { "epoch": 0.23, "grad_norm": 0.4366349796756776, "learning_rate": 3.610220910038805e-05, "loss": 0.1399, "step": 2059 }, { "epoch": 0.23, "grad_norm": 0.49975418962685153, "learning_rate": 3.6097988523713816e-05, "loss": 0.13, "step": 2060 }, { "epoch": 0.23, "grad_norm": 0.573067347623059, "learning_rate": 3.609376591021684e-05, "loss": 0.2138, "step": 2061 }, { "epoch": 0.23, "grad_norm": 0.5172269385475974, "learning_rate": 3.608954126043141e-05, "loss": 0.1805, "step": 2062 }, { "epoch": 0.23, "grad_norm": 0.5368863284211457, "learning_rate": 3.608531457489205e-05, "loss": 0.149, "step": 2063 }, { "epoch": 0.23, "grad_norm": 0.4474400675721666, "learning_rate": 3.608108585413356e-05, "loss": 0.1421, "step": 2064 }, { "epoch": 0.23, "grad_norm": 0.5168813081423282, "learning_rate": 3.607685509869096e-05, "loss": 0.1245, "step": 2065 }, { "epoch": 0.23, "grad_norm": 0.47662445353554955, "learning_rate": 3.6072622309099566e-05, "loss": 0.1255, "step": 2066 }, { "epoch": 0.23, "grad_norm": 0.5527029668828085, "learning_rate": 3.606838748589495e-05, "loss": 0.1491, "step": 2067 }, { "epoch": 0.23, "grad_norm": 0.6733912060396514, "learning_rate": 3.60641506296129e-05, "loss": 0.1714, "step": 2068 }, { "epoch": 0.23, "grad_norm": 0.590959743724629, "learning_rate": 3.605991174078952e-05, "loss": 0.1759, "step": 2069 }, { "epoch": 0.23, "grad_norm": 0.48218091746378144, "learning_rate": 3.605567081996113e-05, "loss": 0.1377, "step": 2070 }, { "epoch": 0.23, "grad_norm": 0.5524899107240545, "learning_rate": 3.605142786766432e-05, "loss": 0.1638, "step": 2071 }, { "epoch": 0.23, "grad_norm": 0.5751770482944267, "learning_rate": 3.604718288443593e-05, "loss": 0.1772, "step": 2072 }, { "epoch": 0.23, "grad_norm": 0.6105105993973792, "learning_rate": 3.6042935870813075e-05, "loss": 0.2161, "step": 2073 }, { "epoch": 0.23, "grad_norm": 0.4335465964031393, "learning_rate": 3.60386868273331e-05, "loss": 0.1391, "step": 2074 }, { "epoch": 0.23, "grad_norm": 0.5847080811426361, "learning_rate": 3.603443575453364e-05, "loss": 0.216, "step": 2075 }, { "epoch": 0.23, "grad_norm": 0.5602100170938202, "learning_rate": 3.603018265295255e-05, "loss": 0.1591, "step": 2076 }, { "epoch": 0.23, "grad_norm": 0.5564367623708293, "learning_rate": 3.602592752312798e-05, "loss": 0.2086, "step": 2077 }, { "epoch": 0.23, "grad_norm": 0.49248547087214345, "learning_rate": 3.60216703655983e-05, "loss": 0.1588, "step": 2078 }, { "epoch": 0.23, "grad_norm": 0.5153313870088918, "learning_rate": 3.6017411180902157e-05, "loss": 0.1856, "step": 2079 }, { "epoch": 0.23, "grad_norm": 0.4188910673720334, "learning_rate": 3.601314996957845e-05, "loss": 0.12, "step": 2080 }, { "epoch": 0.23, "grad_norm": 0.5524775579045905, "learning_rate": 3.600888673216635e-05, "loss": 0.1693, "step": 2081 }, { "epoch": 0.23, "grad_norm": 0.5003919853054976, "learning_rate": 3.600462146920525e-05, "loss": 0.1415, "step": 2082 }, { "epoch": 0.23, "grad_norm": 0.5012279273997605, "learning_rate": 3.600035418123483e-05, "loss": 0.1361, "step": 2083 }, { "epoch": 0.23, "grad_norm": 0.5940832909202195, "learning_rate": 3.5996084868795015e-05, "loss": 0.1618, "step": 2084 }, { "epoch": 0.23, "grad_norm": 0.4627231504182507, "learning_rate": 3.5991813532425977e-05, "loss": 0.1374, "step": 2085 }, { "epoch": 0.23, "grad_norm": 0.6019412990747641, "learning_rate": 3.5987540172668164e-05, "loss": 0.1909, "step": 2086 }, { "epoch": 0.23, "grad_norm": 0.5094973855929547, "learning_rate": 3.598326479006226e-05, "loss": 0.1566, "step": 2087 }, { "epoch": 0.23, "grad_norm": 0.5033207826386441, "learning_rate": 3.597898738514923e-05, "loss": 0.1766, "step": 2088 }, { "epoch": 0.23, "grad_norm": 0.5586757466349173, "learning_rate": 3.597470795847026e-05, "loss": 0.1762, "step": 2089 }, { "epoch": 0.23, "grad_norm": 0.5388756027200612, "learning_rate": 3.5970426510566824e-05, "loss": 0.1844, "step": 2090 }, { "epoch": 0.23, "grad_norm": 0.4788717073861573, "learning_rate": 3.596614304198063e-05, "loss": 0.1141, "step": 2091 }, { "epoch": 0.23, "grad_norm": 0.49148072859430597, "learning_rate": 3.5961857553253665e-05, "loss": 0.1644, "step": 2092 }, { "epoch": 0.23, "grad_norm": 0.501138463917304, "learning_rate": 3.595757004492814e-05, "loss": 0.1276, "step": 2093 }, { "epoch": 0.23, "grad_norm": 0.49792661523922327, "learning_rate": 3.595328051754654e-05, "loss": 0.1315, "step": 2094 }, { "epoch": 0.23, "grad_norm": 0.5218874627184253, "learning_rate": 3.5948988971651624e-05, "loss": 0.1289, "step": 2095 }, { "epoch": 0.23, "grad_norm": 0.48924258393060116, "learning_rate": 3.594469540778637e-05, "loss": 0.1287, "step": 2096 }, { "epoch": 0.23, "grad_norm": 0.5518858634423857, "learning_rate": 3.594039982649402e-05, "loss": 0.1481, "step": 2097 }, { "epoch": 0.23, "grad_norm": 0.584088344951233, "learning_rate": 3.593610222831809e-05, "loss": 0.1501, "step": 2098 }, { "epoch": 0.23, "grad_norm": 0.5855221356948225, "learning_rate": 3.593180261380235e-05, "loss": 0.1591, "step": 2099 }, { "epoch": 0.23, "grad_norm": 0.5031045850320036, "learning_rate": 3.59275009834908e-05, "loss": 0.1132, "step": 2100 }, { "epoch": 0.23, "grad_norm": 0.6306899938748599, "learning_rate": 3.592319733792772e-05, "loss": 0.1747, "step": 2101 }, { "epoch": 0.23, "grad_norm": 0.6248875278362477, "learning_rate": 3.591889167765762e-05, "loss": 0.1596, "step": 2102 }, { "epoch": 0.23, "grad_norm": 0.6393745127030023, "learning_rate": 3.59145840032253e-05, "loss": 0.1453, "step": 2103 }, { "epoch": 0.23, "grad_norm": 0.49098458203350615, "learning_rate": 3.591027431517577e-05, "loss": 0.1222, "step": 2104 }, { "epoch": 0.23, "grad_norm": 0.6470013997152715, "learning_rate": 3.590596261405435e-05, "loss": 0.2166, "step": 2105 }, { "epoch": 0.23, "grad_norm": 0.41923729740705373, "learning_rate": 3.590164890040657e-05, "loss": 0.1223, "step": 2106 }, { "epoch": 0.23, "grad_norm": 0.570456578070775, "learning_rate": 3.5897333174778225e-05, "loss": 0.2212, "step": 2107 }, { "epoch": 0.23, "grad_norm": 0.5025193401429565, "learning_rate": 3.589301543771537e-05, "loss": 0.1512, "step": 2108 }, { "epoch": 0.23, "grad_norm": 0.4099237782292562, "learning_rate": 3.588869568976433e-05, "loss": 0.1581, "step": 2109 }, { "epoch": 0.23, "grad_norm": 0.5462918578462681, "learning_rate": 3.588437393147164e-05, "loss": 0.1854, "step": 2110 }, { "epoch": 0.23, "grad_norm": 0.5170585471562059, "learning_rate": 3.5880050163384137e-05, "loss": 0.134, "step": 2111 }, { "epoch": 0.23, "grad_norm": 0.5689140313843987, "learning_rate": 3.587572438604889e-05, "loss": 0.193, "step": 2112 }, { "epoch": 0.23, "grad_norm": 0.618787165209569, "learning_rate": 3.587139660001322e-05, "loss": 0.1339, "step": 2113 }, { "epoch": 0.23, "grad_norm": 0.6374166209727526, "learning_rate": 3.586706680582471e-05, "loss": 0.2381, "step": 2114 }, { "epoch": 0.23, "grad_norm": 0.5573617914828713, "learning_rate": 3.5862735004031186e-05, "loss": 0.1656, "step": 2115 }, { "epoch": 0.23, "grad_norm": 0.4590089831958744, "learning_rate": 3.585840119518075e-05, "loss": 0.1065, "step": 2116 }, { "epoch": 0.23, "grad_norm": 0.5377403575736783, "learning_rate": 3.585406537982173e-05, "loss": 0.1729, "step": 2117 }, { "epoch": 0.23, "grad_norm": 0.4928683612491513, "learning_rate": 3.584972755850273e-05, "loss": 0.1023, "step": 2118 }, { "epoch": 0.23, "grad_norm": 0.5105135170532267, "learning_rate": 3.58453877317726e-05, "loss": 0.1256, "step": 2119 }, { "epoch": 0.23, "grad_norm": 0.5889843117303659, "learning_rate": 3.584104590018044e-05, "loss": 0.1798, "step": 2120 }, { "epoch": 0.23, "grad_norm": 0.5361342021875489, "learning_rate": 3.583670206427559e-05, "loss": 0.1746, "step": 2121 }, { "epoch": 0.23, "grad_norm": 0.49495203698553464, "learning_rate": 3.58323562246077e-05, "loss": 0.1449, "step": 2122 }, { "epoch": 0.23, "grad_norm": 0.4712080245966396, "learning_rate": 3.5828008381726604e-05, "loss": 0.1591, "step": 2123 }, { "epoch": 0.23, "grad_norm": 0.5619003755654761, "learning_rate": 3.5823658536182426e-05, "loss": 0.1988, "step": 2124 }, { "epoch": 0.23, "grad_norm": 0.509945541727996, "learning_rate": 3.581930668852554e-05, "loss": 0.1764, "step": 2125 }, { "epoch": 0.23, "grad_norm": 0.4888556801770972, "learning_rate": 3.5814952839306574e-05, "loss": 0.1564, "step": 2126 }, { "epoch": 0.23, "grad_norm": 0.5436589636176077, "learning_rate": 3.581059698907639e-05, "loss": 0.2171, "step": 2127 }, { "epoch": 0.23, "grad_norm": 0.46314736179723454, "learning_rate": 3.580623913838613e-05, "loss": 0.1465, "step": 2128 }, { "epoch": 0.23, "grad_norm": 0.4723642724235566, "learning_rate": 3.5801879287787185e-05, "loss": 0.1449, "step": 2129 }, { "epoch": 0.23, "grad_norm": 0.5171827425550637, "learning_rate": 3.579751743783118e-05, "loss": 0.1469, "step": 2130 }, { "epoch": 0.23, "grad_norm": 0.3948596682734425, "learning_rate": 3.5793153589070005e-05, "loss": 0.1024, "step": 2131 }, { "epoch": 0.23, "grad_norm": 0.5111791080299523, "learning_rate": 3.578878774205581e-05, "loss": 0.1504, "step": 2132 }, { "epoch": 0.23, "grad_norm": 0.4992446856858184, "learning_rate": 3.578441989734097e-05, "loss": 0.1287, "step": 2133 }, { "epoch": 0.23, "grad_norm": 0.568864001911079, "learning_rate": 3.578005005547817e-05, "loss": 0.1729, "step": 2134 }, { "epoch": 0.23, "grad_norm": 0.4987006141715803, "learning_rate": 3.577567821702028e-05, "loss": 0.1413, "step": 2135 }, { "epoch": 0.23, "grad_norm": 0.5919659313293283, "learning_rate": 3.577130438252046e-05, "loss": 0.173, "step": 2136 }, { "epoch": 0.23, "grad_norm": 0.5258159971729612, "learning_rate": 3.576692855253213e-05, "loss": 0.133, "step": 2137 }, { "epoch": 0.23, "grad_norm": 0.5210813758215879, "learning_rate": 3.576255072760893e-05, "loss": 0.1312, "step": 2138 }, { "epoch": 0.23, "grad_norm": 0.4247997415188903, "learning_rate": 3.575817090830479e-05, "loss": 0.1263, "step": 2139 }, { "epoch": 0.24, "grad_norm": 0.4656267063698448, "learning_rate": 3.575378909517385e-05, "loss": 0.143, "step": 2140 }, { "epoch": 0.24, "grad_norm": 0.5243593610084191, "learning_rate": 3.5749405288770554e-05, "loss": 0.1756, "step": 2141 }, { "epoch": 0.24, "grad_norm": 0.5194542332977483, "learning_rate": 3.574501948964954e-05, "loss": 0.1381, "step": 2142 }, { "epoch": 0.24, "grad_norm": 0.5325908009477068, "learning_rate": 3.574063169836575e-05, "loss": 0.1551, "step": 2143 }, { "epoch": 0.24, "grad_norm": 0.6515360188763195, "learning_rate": 3.5736241915474345e-05, "loss": 0.1705, "step": 2144 }, { "epoch": 0.24, "grad_norm": 0.4534291857766253, "learning_rate": 3.5731850141530755e-05, "loss": 0.1289, "step": 2145 }, { "epoch": 0.24, "grad_norm": 0.48019513038253836, "learning_rate": 3.572745637709065e-05, "loss": 0.1376, "step": 2146 }, { "epoch": 0.24, "grad_norm": 0.5842375445754202, "learning_rate": 3.572306062270997e-05, "loss": 0.1842, "step": 2147 }, { "epoch": 0.24, "grad_norm": 0.5098591276626897, "learning_rate": 3.5718662878944876e-05, "loss": 0.1588, "step": 2148 }, { "epoch": 0.24, "grad_norm": 0.48386490019139633, "learning_rate": 3.571426314635181e-05, "loss": 0.1302, "step": 2149 }, { "epoch": 0.24, "grad_norm": 0.5765781586544345, "learning_rate": 3.570986142548746e-05, "loss": 0.1264, "step": 2150 }, { "epoch": 0.24, "grad_norm": 0.5298212979553626, "learning_rate": 3.570545771690875e-05, "loss": 0.163, "step": 2151 }, { "epoch": 0.24, "grad_norm": 0.48545262226760266, "learning_rate": 3.5701052021172874e-05, "loss": 0.1347, "step": 2152 }, { "epoch": 0.24, "grad_norm": 0.46175935182669003, "learning_rate": 3.569664433883726e-05, "loss": 0.1898, "step": 2153 }, { "epoch": 0.24, "grad_norm": 0.43663625944654316, "learning_rate": 3.5692234670459615e-05, "loss": 0.133, "step": 2154 }, { "epoch": 0.24, "grad_norm": 0.6176667103459438, "learning_rate": 3.568782301659786e-05, "loss": 0.1804, "step": 2155 }, { "epoch": 0.24, "grad_norm": 0.6520791802157258, "learning_rate": 3.5683409377810185e-05, "loss": 0.2391, "step": 2156 }, { "epoch": 0.24, "grad_norm": 0.39095976789741776, "learning_rate": 3.567899375465505e-05, "loss": 0.1262, "step": 2157 }, { "epoch": 0.24, "grad_norm": 0.46209647063088194, "learning_rate": 3.567457614769113e-05, "loss": 0.1464, "step": 2158 }, { "epoch": 0.24, "grad_norm": 0.5520535646965972, "learning_rate": 3.567015655747739e-05, "loss": 0.1786, "step": 2159 }, { "epoch": 0.24, "grad_norm": 0.4781532005021844, "learning_rate": 3.566573498457301e-05, "loss": 0.1296, "step": 2160 }, { "epoch": 0.24, "grad_norm": 0.5174091567132771, "learning_rate": 3.5661311429537435e-05, "loss": 0.1858, "step": 2161 }, { "epoch": 0.24, "grad_norm": 0.45674934822395147, "learning_rate": 3.5656885892930376e-05, "loss": 0.1317, "step": 2162 }, { "epoch": 0.24, "grad_norm": 0.5624280989575822, "learning_rate": 3.565245837531177e-05, "loss": 0.165, "step": 2163 }, { "epoch": 0.24, "grad_norm": 0.4039046923904342, "learning_rate": 3.564802887724181e-05, "loss": 0.1087, "step": 2164 }, { "epoch": 0.24, "grad_norm": 0.5439162734548321, "learning_rate": 3.564359739928097e-05, "loss": 0.1807, "step": 2165 }, { "epoch": 0.24, "grad_norm": 0.5429791236819561, "learning_rate": 3.563916394198991e-05, "loss": 0.1689, "step": 2166 }, { "epoch": 0.24, "grad_norm": 0.5109704056622522, "learning_rate": 3.563472850592962e-05, "loss": 0.1627, "step": 2167 }, { "epoch": 0.24, "grad_norm": 0.5563541143470068, "learning_rate": 3.5630291091661276e-05, "loss": 0.1573, "step": 2168 }, { "epoch": 0.24, "grad_norm": 0.4252159145202093, "learning_rate": 3.562585169974633e-05, "loss": 0.1332, "step": 2169 }, { "epoch": 0.24, "grad_norm": 0.43047371807796997, "learning_rate": 3.562141033074649e-05, "loss": 0.1112, "step": 2170 }, { "epoch": 0.24, "grad_norm": 0.5492191754226882, "learning_rate": 3.561696698522371e-05, "loss": 0.1705, "step": 2171 }, { "epoch": 0.24, "grad_norm": 0.4882957761511142, "learning_rate": 3.5612521663740183e-05, "loss": 0.1095, "step": 2172 }, { "epoch": 0.24, "grad_norm": 0.6301930218142273, "learning_rate": 3.560807436685837e-05, "loss": 0.1878, "step": 2173 }, { "epoch": 0.24, "grad_norm": 0.49412684364853066, "learning_rate": 3.560362509514096e-05, "loss": 0.1493, "step": 2174 }, { "epoch": 0.24, "grad_norm": 0.5284920235359436, "learning_rate": 3.5599173849150905e-05, "loss": 0.1906, "step": 2175 }, { "epoch": 0.24, "grad_norm": 0.44591722085417274, "learning_rate": 3.5594720629451414e-05, "loss": 0.1133, "step": 2176 }, { "epoch": 0.24, "grad_norm": 0.48880495596603685, "learning_rate": 3.559026543660593e-05, "loss": 0.1234, "step": 2177 }, { "epoch": 0.24, "grad_norm": 0.4255845901867083, "learning_rate": 3.558580827117817e-05, "loss": 0.1081, "step": 2178 }, { "epoch": 0.24, "grad_norm": 0.5065768246563851, "learning_rate": 3.558134913373206e-05, "loss": 0.1707, "step": 2179 }, { "epoch": 0.24, "grad_norm": 0.5155744238815785, "learning_rate": 3.557688802483181e-05, "loss": 0.1438, "step": 2180 }, { "epoch": 0.24, "grad_norm": 0.5612169573770273, "learning_rate": 3.557242494504187e-05, "loss": 0.1619, "step": 2181 }, { "epoch": 0.24, "grad_norm": 0.5686133168064943, "learning_rate": 3.556795989492694e-05, "loss": 0.1461, "step": 2182 }, { "epoch": 0.24, "grad_norm": 1.0100054871060726, "learning_rate": 3.556349287505196e-05, "loss": 0.114, "step": 2183 }, { "epoch": 0.24, "grad_norm": 0.6050875540504129, "learning_rate": 3.555902388598213e-05, "loss": 0.1534, "step": 2184 }, { "epoch": 0.24, "grad_norm": 0.6091933346436944, "learning_rate": 3.55545529282829e-05, "loss": 0.1575, "step": 2185 }, { "epoch": 0.24, "grad_norm": 0.4659817333959437, "learning_rate": 3.555008000251995e-05, "loss": 0.151, "step": 2186 }, { "epoch": 0.24, "grad_norm": 0.5740572124771376, "learning_rate": 3.554560510925924e-05, "loss": 0.1335, "step": 2187 }, { "epoch": 0.24, "grad_norm": 0.509735778870272, "learning_rate": 3.554112824906696e-05, "loss": 0.133, "step": 2188 }, { "epoch": 0.24, "grad_norm": 0.5790508177269011, "learning_rate": 3.553664942250955e-05, "loss": 0.1705, "step": 2189 }, { "epoch": 0.24, "grad_norm": 0.5429820875546844, "learning_rate": 3.55321686301537e-05, "loss": 0.1609, "step": 2190 }, { "epoch": 0.24, "grad_norm": 0.5247871307968536, "learning_rate": 3.552768587256635e-05, "loss": 0.1532, "step": 2191 }, { "epoch": 0.24, "grad_norm": 0.6465658269089535, "learning_rate": 3.552320115031468e-05, "loss": 0.177, "step": 2192 }, { "epoch": 0.24, "grad_norm": 0.5163116506937354, "learning_rate": 3.551871446396613e-05, "loss": 0.1388, "step": 2193 }, { "epoch": 0.24, "grad_norm": 0.5481506727898416, "learning_rate": 3.55142258140884e-05, "loss": 0.1835, "step": 2194 }, { "epoch": 0.24, "grad_norm": 0.5506908261358118, "learning_rate": 3.550973520124941e-05, "loss": 0.1609, "step": 2195 }, { "epoch": 0.24, "grad_norm": 0.5165653324222624, "learning_rate": 3.5505242626017326e-05, "loss": 0.1719, "step": 2196 }, { "epoch": 0.24, "grad_norm": 0.520231792846266, "learning_rate": 3.550074808896061e-05, "loss": 0.1632, "step": 2197 }, { "epoch": 0.24, "grad_norm": 0.5113968746865574, "learning_rate": 3.549625159064792e-05, "loss": 0.1425, "step": 2198 }, { "epoch": 0.24, "grad_norm": 0.49042654640349476, "learning_rate": 3.549175313164819e-05, "loss": 0.1502, "step": 2199 }, { "epoch": 0.24, "grad_norm": 0.5543626451889639, "learning_rate": 3.5487252712530583e-05, "loss": 0.1438, "step": 2200 }, { "epoch": 0.24, "grad_norm": 0.44781759926021647, "learning_rate": 3.548275033386453e-05, "loss": 0.1118, "step": 2201 }, { "epoch": 0.24, "grad_norm": 0.6032080005201187, "learning_rate": 3.547824599621971e-05, "loss": 0.184, "step": 2202 }, { "epoch": 0.24, "grad_norm": 0.4055394046795746, "learning_rate": 3.5473739700166025e-05, "loss": 0.109, "step": 2203 }, { "epoch": 0.24, "grad_norm": 0.638074823287977, "learning_rate": 3.546923144627366e-05, "loss": 0.2261, "step": 2204 }, { "epoch": 0.24, "grad_norm": 0.5346847183181935, "learning_rate": 3.5464721235113006e-05, "loss": 0.1769, "step": 2205 }, { "epoch": 0.24, "grad_norm": 0.5362788211751282, "learning_rate": 3.546020906725474e-05, "loss": 0.1332, "step": 2206 }, { "epoch": 0.24, "grad_norm": 0.571258778869819, "learning_rate": 3.545569494326977e-05, "loss": 0.1431, "step": 2207 }, { "epoch": 0.24, "grad_norm": 0.4222020188665076, "learning_rate": 3.5451178863729244e-05, "loss": 0.094, "step": 2208 }, { "epoch": 0.24, "grad_norm": 0.44866693854335543, "learning_rate": 3.5446660829204575e-05, "loss": 0.1176, "step": 2209 }, { "epoch": 0.24, "grad_norm": 0.45114148850933555, "learning_rate": 3.5442140840267404e-05, "loss": 0.1344, "step": 2210 }, { "epoch": 0.24, "grad_norm": 0.479900861873437, "learning_rate": 3.5437618897489646e-05, "loss": 0.1524, "step": 2211 }, { "epoch": 0.24, "grad_norm": 0.6266895108379495, "learning_rate": 3.543309500144343e-05, "loss": 0.1924, "step": 2212 }, { "epoch": 0.24, "grad_norm": 0.5907835893804102, "learning_rate": 3.542856915270116e-05, "loss": 0.1727, "step": 2213 }, { "epoch": 0.24, "grad_norm": 0.42050965446497524, "learning_rate": 3.542404135183547e-05, "loss": 0.1441, "step": 2214 }, { "epoch": 0.24, "grad_norm": 0.6524131646600938, "learning_rate": 3.541951159941924e-05, "loss": 0.2036, "step": 2215 }, { "epoch": 0.24, "grad_norm": 0.443467282510872, "learning_rate": 3.541497989602562e-05, "loss": 0.1049, "step": 2216 }, { "epoch": 0.24, "grad_norm": 0.38469451402084853, "learning_rate": 3.5410446242227985e-05, "loss": 0.1078, "step": 2217 }, { "epoch": 0.24, "grad_norm": 0.4470458844378462, "learning_rate": 3.540591063859996e-05, "loss": 0.1542, "step": 2218 }, { "epoch": 0.24, "grad_norm": 0.531859861263796, "learning_rate": 3.5401373085715413e-05, "loss": 0.1551, "step": 2219 }, { "epoch": 0.24, "grad_norm": 0.4349888532679271, "learning_rate": 3.539683358414848e-05, "loss": 0.1461, "step": 2220 }, { "epoch": 0.24, "grad_norm": 0.6628876110001641, "learning_rate": 3.5392292134473516e-05, "loss": 0.2032, "step": 2221 }, { "epoch": 0.24, "grad_norm": 0.5907690104760583, "learning_rate": 3.538774873726514e-05, "loss": 0.1643, "step": 2222 }, { "epoch": 0.24, "grad_norm": 0.4789184120712393, "learning_rate": 3.538320339309821e-05, "loss": 0.1368, "step": 2223 }, { "epoch": 0.24, "grad_norm": 0.529772808440853, "learning_rate": 3.537865610254784e-05, "loss": 0.1474, "step": 2224 }, { "epoch": 0.24, "grad_norm": 0.4280752278375512, "learning_rate": 3.537410686618937e-05, "loss": 0.1128, "step": 2225 }, { "epoch": 0.24, "grad_norm": 0.4630702511219398, "learning_rate": 3.536955568459841e-05, "loss": 0.1283, "step": 2226 }, { "epoch": 0.24, "grad_norm": 0.41466216083947766, "learning_rate": 3.53650025583508e-05, "loss": 0.1317, "step": 2227 }, { "epoch": 0.24, "grad_norm": 0.4446960666606069, "learning_rate": 3.536044748802263e-05, "loss": 0.1492, "step": 2228 }, { "epoch": 0.24, "grad_norm": 0.4525741319893434, "learning_rate": 3.5355890474190244e-05, "loss": 0.1265, "step": 2229 }, { "epoch": 0.24, "grad_norm": 0.48344273219763323, "learning_rate": 3.535133151743022e-05, "loss": 0.1292, "step": 2230 }, { "epoch": 0.25, "grad_norm": 0.6002288004616197, "learning_rate": 3.5346770618319384e-05, "loss": 0.1572, "step": 2231 }, { "epoch": 0.25, "grad_norm": 0.4727657601096312, "learning_rate": 3.534220777743482e-05, "loss": 0.1166, "step": 2232 }, { "epoch": 0.25, "grad_norm": 0.6247609158035904, "learning_rate": 3.533764299535384e-05, "loss": 0.1793, "step": 2233 }, { "epoch": 0.25, "grad_norm": 0.4440872334753855, "learning_rate": 3.5333076272654014e-05, "loss": 0.1142, "step": 2234 }, { "epoch": 0.25, "grad_norm": 0.4140559501849665, "learning_rate": 3.532850760991315e-05, "loss": 0.1235, "step": 2235 }, { "epoch": 0.25, "grad_norm": 0.485755675419642, "learning_rate": 3.532393700770932e-05, "loss": 0.1403, "step": 2236 }, { "epoch": 0.25, "grad_norm": 0.5490501470592869, "learning_rate": 3.53193644666208e-05, "loss": 0.1485, "step": 2237 }, { "epoch": 0.25, "grad_norm": 0.5105878841213323, "learning_rate": 3.5314789987226156e-05, "loss": 0.1411, "step": 2238 }, { "epoch": 0.25, "grad_norm": 0.448749795026387, "learning_rate": 3.531021357010419e-05, "loss": 0.1119, "step": 2239 }, { "epoch": 0.25, "grad_norm": 0.5074135019787672, "learning_rate": 3.5305635215833914e-05, "loss": 0.1473, "step": 2240 }, { "epoch": 0.25, "grad_norm": 0.6197653425594231, "learning_rate": 3.5301054924994626e-05, "loss": 0.1906, "step": 2241 }, { "epoch": 0.25, "grad_norm": 0.44412315220210885, "learning_rate": 3.5296472698165856e-05, "loss": 0.115, "step": 2242 }, { "epoch": 0.25, "grad_norm": 0.6136801327563189, "learning_rate": 3.5291888535927375e-05, "loss": 0.1504, "step": 2243 }, { "epoch": 0.25, "grad_norm": 0.4395960935515403, "learning_rate": 3.5287302438859204e-05, "loss": 0.1114, "step": 2244 }, { "epoch": 0.25, "grad_norm": 0.5274271016122571, "learning_rate": 3.5282714407541587e-05, "loss": 0.1554, "step": 2245 }, { "epoch": 0.25, "grad_norm": 0.3700567714529318, "learning_rate": 3.5278124442555066e-05, "loss": 0.1023, "step": 2246 }, { "epoch": 0.25, "grad_norm": 0.46423611074653853, "learning_rate": 3.527353254448036e-05, "loss": 0.1501, "step": 2247 }, { "epoch": 0.25, "grad_norm": 0.4532808167469646, "learning_rate": 3.526893871389849e-05, "loss": 0.1125, "step": 2248 }, { "epoch": 0.25, "grad_norm": 0.4940976663243304, "learning_rate": 3.526434295139069e-05, "loss": 0.1261, "step": 2249 }, { "epoch": 0.25, "grad_norm": 0.5662019032142154, "learning_rate": 3.5259745257538443e-05, "loss": 0.1694, "step": 2250 }, { "epoch": 0.25, "grad_norm": 0.5210922710163226, "learning_rate": 3.525514563292347e-05, "loss": 0.156, "step": 2251 }, { "epoch": 0.25, "grad_norm": 0.6472290222095444, "learning_rate": 3.525054407812777e-05, "loss": 0.2152, "step": 2252 }, { "epoch": 0.25, "grad_norm": 0.4818578967907291, "learning_rate": 3.5245940593733535e-05, "loss": 0.1195, "step": 2253 }, { "epoch": 0.25, "grad_norm": 0.5370053277239778, "learning_rate": 3.524133518032325e-05, "loss": 0.1369, "step": 2254 }, { "epoch": 0.25, "grad_norm": 0.5434513554443757, "learning_rate": 3.52367278384796e-05, "loss": 0.176, "step": 2255 }, { "epoch": 0.25, "grad_norm": 0.942033455878222, "learning_rate": 3.5232118568785565e-05, "loss": 0.3057, "step": 2256 }, { "epoch": 0.25, "grad_norm": 0.4373734495512156, "learning_rate": 3.522750737182431e-05, "loss": 0.1508, "step": 2257 }, { "epoch": 0.25, "grad_norm": 0.4600040715234357, "learning_rate": 3.52228942481793e-05, "loss": 0.1396, "step": 2258 }, { "epoch": 0.25, "grad_norm": 0.4726547682557643, "learning_rate": 3.5218279198434196e-05, "loss": 0.1186, "step": 2259 }, { "epoch": 0.25, "grad_norm": 0.6472880044773579, "learning_rate": 3.5213662223172935e-05, "loss": 0.1217, "step": 2260 }, { "epoch": 0.25, "grad_norm": 0.44111742005466137, "learning_rate": 3.5209043322979686e-05, "loss": 0.1383, "step": 2261 }, { "epoch": 0.25, "grad_norm": 0.45533751597701205, "learning_rate": 3.520442249843887e-05, "loss": 0.119, "step": 2262 }, { "epoch": 0.25, "grad_norm": 0.4791494725777471, "learning_rate": 3.5199799750135114e-05, "loss": 0.1619, "step": 2263 }, { "epoch": 0.25, "grad_norm": 0.4201550922824717, "learning_rate": 3.5195175078653355e-05, "loss": 0.1379, "step": 2264 }, { "epoch": 0.25, "grad_norm": 0.461100500995229, "learning_rate": 3.519054848457872e-05, "loss": 0.1209, "step": 2265 }, { "epoch": 0.25, "grad_norm": 0.441000676134028, "learning_rate": 3.51859199684966e-05, "loss": 0.111, "step": 2266 }, { "epoch": 0.25, "grad_norm": 0.5076589792447682, "learning_rate": 3.518128953099261e-05, "loss": 0.1313, "step": 2267 }, { "epoch": 0.25, "grad_norm": 0.6128995643095091, "learning_rate": 3.517665717265265e-05, "loss": 0.1765, "step": 2268 }, { "epoch": 0.25, "grad_norm": 0.8309773679238123, "learning_rate": 3.517202289406281e-05, "loss": 0.1177, "step": 2269 }, { "epoch": 0.25, "grad_norm": 0.5300565384901884, "learning_rate": 3.516738669580947e-05, "loss": 0.1336, "step": 2270 }, { "epoch": 0.25, "grad_norm": 0.5162780845059695, "learning_rate": 3.516274857847922e-05, "loss": 0.1356, "step": 2271 }, { "epoch": 0.25, "grad_norm": 0.5635613389262888, "learning_rate": 3.5158108542658915e-05, "loss": 0.164, "step": 2272 }, { "epoch": 0.25, "grad_norm": 0.5276599077589897, "learning_rate": 3.515346658893562e-05, "loss": 0.1456, "step": 2273 }, { "epoch": 0.25, "grad_norm": 0.6588667062965825, "learning_rate": 3.5148822717896694e-05, "loss": 0.1691, "step": 2274 }, { "epoch": 0.25, "grad_norm": 0.42917307490900736, "learning_rate": 3.5144176930129694e-05, "loss": 0.1181, "step": 2275 }, { "epoch": 0.25, "grad_norm": 0.4395522620382328, "learning_rate": 3.513952922622243e-05, "loss": 0.1377, "step": 2276 }, { "epoch": 0.25, "grad_norm": 0.40315644119432026, "learning_rate": 3.513487960676298e-05, "loss": 0.1208, "step": 2277 }, { "epoch": 0.25, "grad_norm": 0.5077651955506216, "learning_rate": 3.513022807233964e-05, "loss": 0.137, "step": 2278 }, { "epoch": 0.25, "grad_norm": 0.4915112891079478, "learning_rate": 3.512557462354093e-05, "loss": 0.1273, "step": 2279 }, { "epoch": 0.25, "grad_norm": 0.48581288325202493, "learning_rate": 3.5120919260955655e-05, "loss": 0.1621, "step": 2280 }, { "epoch": 0.25, "grad_norm": 0.5600576093111308, "learning_rate": 3.5116261985172835e-05, "loss": 0.1706, "step": 2281 }, { "epoch": 0.25, "grad_norm": 0.4828447041064188, "learning_rate": 3.511160279678174e-05, "loss": 0.1396, "step": 2282 }, { "epoch": 0.25, "grad_norm": 0.4689202952990553, "learning_rate": 3.510694169637188e-05, "loss": 0.1094, "step": 2283 }, { "epoch": 0.25, "grad_norm": 0.47778822438637963, "learning_rate": 3.510227868453302e-05, "loss": 0.1322, "step": 2284 }, { "epoch": 0.25, "grad_norm": 0.4444224849713238, "learning_rate": 3.5097613761855134e-05, "loss": 0.1357, "step": 2285 }, { "epoch": 0.25, "grad_norm": 0.5777287542909266, "learning_rate": 3.509294692892847e-05, "loss": 0.1226, "step": 2286 }, { "epoch": 0.25, "grad_norm": 0.47072056781534544, "learning_rate": 3.508827818634351e-05, "loss": 0.1214, "step": 2287 }, { "epoch": 0.25, "grad_norm": 0.5920025559350847, "learning_rate": 3.508360753469097e-05, "loss": 0.188, "step": 2288 }, { "epoch": 0.25, "grad_norm": 0.5112946769667809, "learning_rate": 3.507893497456181e-05, "loss": 0.1512, "step": 2289 }, { "epoch": 0.25, "grad_norm": 0.5717315525186366, "learning_rate": 3.5074260506547225e-05, "loss": 0.128, "step": 2290 }, { "epoch": 0.25, "grad_norm": 0.4356369814873788, "learning_rate": 3.506958413123867e-05, "loss": 0.1131, "step": 2291 }, { "epoch": 0.25, "grad_norm": 0.5316950112244213, "learning_rate": 3.506490584922784e-05, "loss": 0.1371, "step": 2292 }, { "epoch": 0.25, "grad_norm": 0.6285600360853553, "learning_rate": 3.5060225661106636e-05, "loss": 0.2104, "step": 2293 }, { "epoch": 0.25, "grad_norm": 0.44598772489222965, "learning_rate": 3.5055543567467244e-05, "loss": 0.1337, "step": 2294 }, { "epoch": 0.25, "grad_norm": 0.39710196890970895, "learning_rate": 3.5050859568902064e-05, "loss": 0.1111, "step": 2295 }, { "epoch": 0.25, "grad_norm": 0.42689801669309047, "learning_rate": 3.504617366600376e-05, "loss": 0.1443, "step": 2296 }, { "epoch": 0.25, "grad_norm": 0.4205638856971283, "learning_rate": 3.504148585936521e-05, "loss": 0.1156, "step": 2297 }, { "epoch": 0.25, "grad_norm": 0.5458887471275569, "learning_rate": 3.503679614957955e-05, "loss": 0.1394, "step": 2298 }, { "epoch": 0.25, "grad_norm": 0.6251908487758245, "learning_rate": 3.503210453724015e-05, "loss": 0.1811, "step": 2299 }, { "epoch": 0.25, "grad_norm": 0.5585225466716718, "learning_rate": 3.502741102294063e-05, "loss": 0.199, "step": 2300 }, { "epoch": 0.25, "grad_norm": 0.5455680488848759, "learning_rate": 3.5022715607274844e-05, "loss": 0.1433, "step": 2301 }, { "epoch": 0.25, "grad_norm": 0.5973739954829625, "learning_rate": 3.501801829083688e-05, "loss": 0.1675, "step": 2302 }, { "epoch": 0.25, "grad_norm": 0.47042740466913274, "learning_rate": 3.501331907422107e-05, "loss": 0.1235, "step": 2303 }, { "epoch": 0.25, "grad_norm": 0.49079710741970584, "learning_rate": 3.500861795802201e-05, "loss": 0.1172, "step": 2304 }, { "epoch": 0.25, "grad_norm": 0.5079860097392948, "learning_rate": 3.5003914942834485e-05, "loss": 0.1757, "step": 2305 }, { "epoch": 0.25, "grad_norm": 0.41771383622778796, "learning_rate": 3.499921002925357e-05, "loss": 0.1231, "step": 2306 }, { "epoch": 0.25, "grad_norm": 0.471473870085316, "learning_rate": 3.499450321787457e-05, "loss": 0.1669, "step": 2307 }, { "epoch": 0.25, "grad_norm": 0.5325209617070864, "learning_rate": 3.4989794509293005e-05, "loss": 0.1229, "step": 2308 }, { "epoch": 0.25, "grad_norm": 0.47373640362999636, "learning_rate": 3.4985083904104664e-05, "loss": 0.1193, "step": 2309 }, { "epoch": 0.25, "grad_norm": 0.41066476089416704, "learning_rate": 3.498037140290555e-05, "loss": 0.086, "step": 2310 }, { "epoch": 0.25, "grad_norm": 0.4641127087182747, "learning_rate": 3.4975657006291924e-05, "loss": 0.1478, "step": 2311 }, { "epoch": 0.25, "grad_norm": 0.564468779561316, "learning_rate": 3.497094071486029e-05, "loss": 0.1677, "step": 2312 }, { "epoch": 0.25, "grad_norm": 0.4483025204813362, "learning_rate": 3.496622252920738e-05, "loss": 0.1222, "step": 2313 }, { "epoch": 0.25, "grad_norm": 0.5008888985895373, "learning_rate": 3.4961502449930165e-05, "loss": 0.1615, "step": 2314 }, { "epoch": 0.25, "grad_norm": 0.5723722182850036, "learning_rate": 3.495678047762586e-05, "loss": 0.1706, "step": 2315 }, { "epoch": 0.25, "grad_norm": 0.4692670831167042, "learning_rate": 3.495205661289193e-05, "loss": 0.1536, "step": 2316 }, { "epoch": 0.25, "grad_norm": 0.5784697406919613, "learning_rate": 3.494733085632606e-05, "loss": 0.1624, "step": 2317 }, { "epoch": 0.25, "grad_norm": 0.43607409603451874, "learning_rate": 3.494260320852619e-05, "loss": 0.1262, "step": 2318 }, { "epoch": 0.25, "grad_norm": 0.444846076302698, "learning_rate": 3.493787367009049e-05, "loss": 0.1271, "step": 2319 }, { "epoch": 0.25, "grad_norm": 0.48240519506367907, "learning_rate": 3.493314224161737e-05, "loss": 0.1148, "step": 2320 }, { "epoch": 0.25, "grad_norm": 0.4212320868421737, "learning_rate": 3.492840892370548e-05, "loss": 0.1318, "step": 2321 }, { "epoch": 0.25, "grad_norm": 0.47440773404935044, "learning_rate": 3.4923673716953717e-05, "loss": 0.1303, "step": 2322 }, { "epoch": 0.26, "grad_norm": 0.4213940033001223, "learning_rate": 3.49189366219612e-05, "loss": 0.1235, "step": 2323 }, { "epoch": 0.26, "grad_norm": 0.458923757892575, "learning_rate": 3.4914197639327306e-05, "loss": 0.1299, "step": 2324 }, { "epoch": 0.26, "grad_norm": 0.3922155993300642, "learning_rate": 3.4909456769651644e-05, "loss": 0.0959, "step": 2325 }, { "epoch": 0.26, "grad_norm": 0.5754029137360273, "learning_rate": 3.490471401353405e-05, "loss": 0.1093, "step": 2326 }, { "epoch": 0.26, "grad_norm": 0.6031911774138722, "learning_rate": 3.489996937157462e-05, "loss": 0.1866, "step": 2327 }, { "epoch": 0.26, "grad_norm": 0.610369457745472, "learning_rate": 3.489522284437366e-05, "loss": 0.1819, "step": 2328 }, { "epoch": 0.26, "grad_norm": 0.5454656150987856, "learning_rate": 3.489047443253175e-05, "loss": 0.1658, "step": 2329 }, { "epoch": 0.26, "grad_norm": 0.460045160896283, "learning_rate": 3.488572413664969e-05, "loss": 0.1574, "step": 2330 }, { "epoch": 0.26, "grad_norm": 0.5409176458883622, "learning_rate": 3.48809719573285e-05, "loss": 0.1423, "step": 2331 }, { "epoch": 0.26, "grad_norm": 0.41384444703099077, "learning_rate": 3.4876217895169474e-05, "loss": 0.1098, "step": 2332 }, { "epoch": 0.26, "grad_norm": 0.5495177582360932, "learning_rate": 3.487146195077412e-05, "loss": 0.1116, "step": 2333 }, { "epoch": 0.26, "grad_norm": 0.5059865843140341, "learning_rate": 3.4866704124744196e-05, "loss": 0.1388, "step": 2334 }, { "epoch": 0.26, "grad_norm": 0.5392883214779269, "learning_rate": 3.486194441768168e-05, "loss": 0.1202, "step": 2335 }, { "epoch": 0.26, "grad_norm": 0.4530227644455164, "learning_rate": 3.4857182830188816e-05, "loss": 0.1273, "step": 2336 }, { "epoch": 0.26, "grad_norm": 0.5598212939184222, "learning_rate": 3.485241936286807e-05, "loss": 0.1461, "step": 2337 }, { "epoch": 0.26, "grad_norm": 0.519260077586708, "learning_rate": 3.484765401632214e-05, "loss": 0.1367, "step": 2338 }, { "epoch": 0.26, "grad_norm": 0.5711324623857723, "learning_rate": 3.484288679115397e-05, "loss": 0.1488, "step": 2339 }, { "epoch": 0.26, "grad_norm": 0.4193588740948005, "learning_rate": 3.483811768796674e-05, "loss": 0.1152, "step": 2340 }, { "epoch": 0.26, "grad_norm": 0.4506069461205856, "learning_rate": 3.483334670736388e-05, "loss": 0.1492, "step": 2341 }, { "epoch": 0.26, "grad_norm": 0.45784512621656015, "learning_rate": 3.482857384994903e-05, "loss": 0.1149, "step": 2342 }, { "epoch": 0.26, "grad_norm": 0.46683401494735116, "learning_rate": 3.4823799116326085e-05, "loss": 0.1272, "step": 2343 }, { "epoch": 0.26, "grad_norm": 0.5447042160401867, "learning_rate": 3.4819022507099184e-05, "loss": 0.159, "step": 2344 }, { "epoch": 0.26, "grad_norm": 0.5189526713534715, "learning_rate": 3.481424402287269e-05, "loss": 0.1381, "step": 2345 }, { "epoch": 0.26, "grad_norm": 0.40357091475042917, "learning_rate": 3.480946366425121e-05, "loss": 0.1189, "step": 2346 }, { "epoch": 0.26, "grad_norm": 0.5030026577354125, "learning_rate": 3.4804681431839586e-05, "loss": 0.1743, "step": 2347 }, { "epoch": 0.26, "grad_norm": 0.6876068465695868, "learning_rate": 3.4799897326242895e-05, "loss": 0.1863, "step": 2348 }, { "epoch": 0.26, "grad_norm": 0.4694536014782371, "learning_rate": 3.479511134806645e-05, "loss": 0.1161, "step": 2349 }, { "epoch": 0.26, "grad_norm": 0.49525121797106664, "learning_rate": 3.479032349791581e-05, "loss": 0.1747, "step": 2350 }, { "epoch": 0.26, "grad_norm": 0.48716213916397855, "learning_rate": 3.478553377639677e-05, "loss": 0.1594, "step": 2351 }, { "epoch": 0.26, "grad_norm": 0.5035681366450534, "learning_rate": 3.478074218411534e-05, "loss": 0.1385, "step": 2352 }, { "epoch": 0.26, "grad_norm": 0.41181360270482376, "learning_rate": 3.47759487216778e-05, "loss": 0.1071, "step": 2353 }, { "epoch": 0.26, "grad_norm": 0.45572218935030345, "learning_rate": 3.477115338969065e-05, "loss": 0.1177, "step": 2354 }, { "epoch": 0.26, "grad_norm": 0.4684167949230917, "learning_rate": 3.4766356188760614e-05, "loss": 0.1071, "step": 2355 }, { "epoch": 0.26, "grad_norm": 0.4058304233890573, "learning_rate": 3.476155711949467e-05, "loss": 0.1147, "step": 2356 }, { "epoch": 0.26, "grad_norm": 0.5165177333147264, "learning_rate": 3.475675618250003e-05, "loss": 0.1406, "step": 2357 }, { "epoch": 0.26, "grad_norm": 0.6207859308165702, "learning_rate": 3.475195337838415e-05, "loss": 0.1986, "step": 2358 }, { "epoch": 0.26, "grad_norm": 0.4949982931126922, "learning_rate": 3.474714870775469e-05, "loss": 0.1286, "step": 2359 }, { "epoch": 0.26, "grad_norm": 0.49226956969961394, "learning_rate": 3.474234217121959e-05, "loss": 0.1335, "step": 2360 }, { "epoch": 0.26, "grad_norm": 0.5040392441419242, "learning_rate": 3.4737533769386997e-05, "loss": 0.1175, "step": 2361 }, { "epoch": 0.26, "grad_norm": 0.45955518575238313, "learning_rate": 3.473272350286529e-05, "loss": 0.1389, "step": 2362 }, { "epoch": 0.26, "grad_norm": 0.5275699836625878, "learning_rate": 3.4727911372263106e-05, "loss": 0.1518, "step": 2363 }, { "epoch": 0.26, "grad_norm": 0.5845572427141871, "learning_rate": 3.4723097378189306e-05, "loss": 0.1957, "step": 2364 }, { "epoch": 0.26, "grad_norm": 0.5162579957005713, "learning_rate": 3.4718281521252994e-05, "loss": 0.1458, "step": 2365 }, { "epoch": 0.26, "grad_norm": 0.44761637298698964, "learning_rate": 3.471346380206349e-05, "loss": 0.1201, "step": 2366 }, { "epoch": 0.26, "grad_norm": 0.3693440396586708, "learning_rate": 3.470864422123038e-05, "loss": 0.1405, "step": 2367 }, { "epoch": 0.26, "grad_norm": 0.5008741724032674, "learning_rate": 3.470382277936345e-05, "loss": 0.1185, "step": 2368 }, { "epoch": 0.26, "grad_norm": 0.38951496716072354, "learning_rate": 3.469899947707275e-05, "loss": 0.136, "step": 2369 }, { "epoch": 0.26, "grad_norm": 0.4024666348542523, "learning_rate": 3.4694174314968564e-05, "loss": 0.1248, "step": 2370 }, { "epoch": 0.26, "grad_norm": 0.6980626869225318, "learning_rate": 3.468934729366139e-05, "loss": 0.2104, "step": 2371 }, { "epoch": 0.26, "grad_norm": 0.47399935352707895, "learning_rate": 3.468451841376198e-05, "loss": 0.1204, "step": 2372 }, { "epoch": 0.26, "grad_norm": 0.4058695625737495, "learning_rate": 3.467968767588131e-05, "loss": 0.1068, "step": 2373 }, { "epoch": 0.26, "grad_norm": 0.5491065408568233, "learning_rate": 3.467485508063061e-05, "loss": 0.1448, "step": 2374 }, { "epoch": 0.26, "grad_norm": 0.4023857094819941, "learning_rate": 3.4670020628621314e-05, "loss": 0.1159, "step": 2375 }, { "epoch": 0.26, "grad_norm": 0.4772763768176297, "learning_rate": 3.466518432046512e-05, "loss": 0.1196, "step": 2376 }, { "epoch": 0.26, "grad_norm": 0.46904373502460245, "learning_rate": 3.466034615677395e-05, "loss": 0.1334, "step": 2377 }, { "epoch": 0.26, "grad_norm": 0.4962488817975877, "learning_rate": 3.4655506138159954e-05, "loss": 0.1223, "step": 2378 }, { "epoch": 0.26, "grad_norm": 0.4207307875525594, "learning_rate": 3.4650664265235525e-05, "loss": 0.0823, "step": 2379 }, { "epoch": 0.26, "grad_norm": 0.5438375040875051, "learning_rate": 3.464582053861329e-05, "loss": 0.159, "step": 2380 }, { "epoch": 0.26, "grad_norm": 0.44786654421090233, "learning_rate": 3.46409749589061e-05, "loss": 0.1324, "step": 2381 }, { "epoch": 0.26, "grad_norm": 0.5191653978394525, "learning_rate": 3.463612752672707e-05, "loss": 0.1447, "step": 2382 }, { "epoch": 0.26, "grad_norm": 0.5077798099266159, "learning_rate": 3.463127824268951e-05, "loss": 0.1347, "step": 2383 }, { "epoch": 0.26, "grad_norm": 0.49486834952445025, "learning_rate": 3.462642710740699e-05, "loss": 0.1307, "step": 2384 }, { "epoch": 0.26, "grad_norm": 0.4767008564959547, "learning_rate": 3.4621574121493306e-05, "loss": 0.1332, "step": 2385 }, { "epoch": 0.26, "grad_norm": 0.562184510236321, "learning_rate": 3.461671928556248e-05, "loss": 0.1334, "step": 2386 }, { "epoch": 0.26, "grad_norm": 0.4551552801256097, "learning_rate": 3.4611862600228806e-05, "loss": 0.1185, "step": 2387 }, { "epoch": 0.26, "grad_norm": 0.5190859729252469, "learning_rate": 3.4607004066106754e-05, "loss": 0.1438, "step": 2388 }, { "epoch": 0.26, "grad_norm": 0.4702150655807336, "learning_rate": 3.460214368381107e-05, "loss": 0.1352, "step": 2389 }, { "epoch": 0.26, "grad_norm": 0.41533386531436545, "learning_rate": 3.459728145395671e-05, "loss": 0.1364, "step": 2390 }, { "epoch": 0.26, "grad_norm": 0.48752382660373395, "learning_rate": 3.45924173771589e-05, "loss": 0.1395, "step": 2391 }, { "epoch": 0.26, "grad_norm": 0.4973620624628419, "learning_rate": 3.458755145403306e-05, "loss": 0.1468, "step": 2392 }, { "epoch": 0.26, "grad_norm": 0.4156527839781709, "learning_rate": 3.458268368519485e-05, "loss": 0.0862, "step": 2393 }, { "epoch": 0.26, "grad_norm": 0.4459316400037335, "learning_rate": 3.457781407126018e-05, "loss": 0.1696, "step": 2394 }, { "epoch": 0.26, "grad_norm": 0.45293708719556086, "learning_rate": 3.457294261284519e-05, "loss": 0.1253, "step": 2395 }, { "epoch": 0.26, "grad_norm": 0.3999450437225088, "learning_rate": 3.456806931056624e-05, "loss": 0.1176, "step": 2396 }, { "epoch": 0.26, "grad_norm": 0.4610868470655418, "learning_rate": 3.4563194165039936e-05, "loss": 0.1025, "step": 2397 }, { "epoch": 0.26, "grad_norm": 0.5221356671745733, "learning_rate": 3.4558317176883116e-05, "loss": 0.1768, "step": 2398 }, { "epoch": 0.26, "grad_norm": 0.5502811352761469, "learning_rate": 3.455343834671285e-05, "loss": 0.1496, "step": 2399 }, { "epoch": 0.26, "grad_norm": 0.5379780074360931, "learning_rate": 3.454855767514643e-05, "loss": 0.1541, "step": 2400 }, { "epoch": 0.26, "grad_norm": 0.4584004360264307, "learning_rate": 3.45436751628014e-05, "loss": 0.1067, "step": 2401 }, { "epoch": 0.26, "grad_norm": 0.39379693614412986, "learning_rate": 3.453879081029552e-05, "loss": 0.0946, "step": 2402 }, { "epoch": 0.26, "grad_norm": 0.5096344176061549, "learning_rate": 3.453390461824679e-05, "loss": 0.1382, "step": 2403 }, { "epoch": 0.26, "grad_norm": 0.4354407861256253, "learning_rate": 3.452901658727345e-05, "loss": 0.1362, "step": 2404 }, { "epoch": 0.26, "grad_norm": 0.5368259306255825, "learning_rate": 3.4524126717993964e-05, "loss": 0.1577, "step": 2405 }, { "epoch": 0.26, "grad_norm": 0.5147883245014098, "learning_rate": 3.451923501102703e-05, "loss": 0.1466, "step": 2406 }, { "epoch": 0.26, "grad_norm": 0.4669325882657561, "learning_rate": 3.451434146699157e-05, "loss": 0.1377, "step": 2407 }, { "epoch": 0.26, "grad_norm": 0.4435304152156242, "learning_rate": 3.450944608650677e-05, "loss": 0.1187, "step": 2408 }, { "epoch": 0.26, "grad_norm": 0.3853054616491007, "learning_rate": 3.450454887019199e-05, "loss": 0.1078, "step": 2409 }, { "epoch": 0.26, "grad_norm": 0.5312528610152453, "learning_rate": 3.449964981866689e-05, "loss": 0.1391, "step": 2410 }, { "epoch": 0.26, "grad_norm": 0.41947547783408645, "learning_rate": 3.449474893255131e-05, "loss": 0.1104, "step": 2411 }, { "epoch": 0.26, "grad_norm": 0.47822334549682627, "learning_rate": 3.4489846212465356e-05, "loss": 0.1337, "step": 2412 }, { "epoch": 0.26, "grad_norm": 0.7251016430472258, "learning_rate": 3.448494165902935e-05, "loss": 0.2189, "step": 2413 }, { "epoch": 0.27, "grad_norm": 0.541845628322965, "learning_rate": 3.448003527286383e-05, "loss": 0.1127, "step": 2414 }, { "epoch": 0.27, "grad_norm": 0.4373584075412242, "learning_rate": 3.4475127054589605e-05, "loss": 0.1131, "step": 2415 }, { "epoch": 0.27, "grad_norm": 0.45598181851012226, "learning_rate": 3.447021700482769e-05, "loss": 0.1243, "step": 2416 }, { "epoch": 0.27, "grad_norm": 0.46483971089122933, "learning_rate": 3.4465305124199334e-05, "loss": 0.12, "step": 2417 }, { "epoch": 0.27, "grad_norm": 0.47634901674468855, "learning_rate": 3.446039141332602e-05, "loss": 0.0995, "step": 2418 }, { "epoch": 0.27, "grad_norm": 0.4334746188528998, "learning_rate": 3.4455475872829465e-05, "loss": 0.1259, "step": 2419 }, { "epoch": 0.27, "grad_norm": 0.43772642202946466, "learning_rate": 3.4450558503331606e-05, "loss": 0.1142, "step": 2420 }, { "epoch": 0.27, "grad_norm": 0.6185378019041478, "learning_rate": 3.444563930545464e-05, "loss": 0.1765, "step": 2421 }, { "epoch": 0.27, "grad_norm": 0.6189667129548564, "learning_rate": 3.444071827982096e-05, "loss": 0.198, "step": 2422 }, { "epoch": 0.27, "grad_norm": 0.4302261963831156, "learning_rate": 3.443579542705321e-05, "loss": 0.1255, "step": 2423 }, { "epoch": 0.27, "grad_norm": 0.4765235071958492, "learning_rate": 3.4430870747774266e-05, "loss": 0.1215, "step": 2424 }, { "epoch": 0.27, "grad_norm": 0.39381946904576187, "learning_rate": 3.442594424260722e-05, "loss": 0.1182, "step": 2425 }, { "epoch": 0.27, "grad_norm": 0.4930452713617664, "learning_rate": 3.442101591217542e-05, "loss": 0.1228, "step": 2426 }, { "epoch": 0.27, "grad_norm": 0.49824373431173125, "learning_rate": 3.441608575710242e-05, "loss": 0.1275, "step": 2427 }, { "epoch": 0.27, "grad_norm": 0.47410511182412085, "learning_rate": 3.441115377801202e-05, "loss": 0.1294, "step": 2428 }, { "epoch": 0.27, "grad_norm": 0.4162248434592663, "learning_rate": 3.4406219975528246e-05, "loss": 0.0983, "step": 2429 }, { "epoch": 0.27, "grad_norm": 0.4711624057522236, "learning_rate": 3.440128435027536e-05, "loss": 0.1534, "step": 2430 }, { "epoch": 0.27, "grad_norm": 0.5580640829232395, "learning_rate": 3.439634690287784e-05, "loss": 0.1484, "step": 2431 }, { "epoch": 0.27, "grad_norm": 0.4435992495258715, "learning_rate": 3.43914076339604e-05, "loss": 0.1154, "step": 2432 }, { "epoch": 0.27, "grad_norm": 0.5904760445363514, "learning_rate": 3.4386466544148e-05, "loss": 0.158, "step": 2433 }, { "epoch": 0.27, "grad_norm": 0.4519467491023276, "learning_rate": 3.438152363406582e-05, "loss": 0.1322, "step": 2434 }, { "epoch": 0.27, "grad_norm": 0.42399426908601145, "learning_rate": 3.437657890433926e-05, "loss": 0.1439, "step": 2435 }, { "epoch": 0.27, "grad_norm": 0.47979832217124674, "learning_rate": 3.437163235559396e-05, "loss": 0.1248, "step": 2436 }, { "epoch": 0.27, "grad_norm": 0.5027951847696335, "learning_rate": 3.43666839884558e-05, "loss": 0.1512, "step": 2437 }, { "epoch": 0.27, "grad_norm": 0.5739105331630803, "learning_rate": 3.4361733803550874e-05, "loss": 0.1732, "step": 2438 }, { "epoch": 0.27, "grad_norm": 0.5476255987843694, "learning_rate": 3.435678180150551e-05, "loss": 0.1475, "step": 2439 }, { "epoch": 0.27, "grad_norm": 0.4757794998713033, "learning_rate": 3.4351827982946274e-05, "loss": 0.0918, "step": 2440 }, { "epoch": 0.27, "grad_norm": 0.5858401917082804, "learning_rate": 3.434687234849995e-05, "loss": 0.1395, "step": 2441 }, { "epoch": 0.27, "grad_norm": 0.5510914247061958, "learning_rate": 3.434191489879355e-05, "loss": 0.151, "step": 2442 }, { "epoch": 0.27, "grad_norm": 0.5521287869389845, "learning_rate": 3.433695563445433e-05, "loss": 0.1487, "step": 2443 }, { "epoch": 0.27, "grad_norm": 0.4301850992291227, "learning_rate": 3.433199455610978e-05, "loss": 0.1298, "step": 2444 }, { "epoch": 0.27, "grad_norm": 0.512626454406848, "learning_rate": 3.432703166438759e-05, "loss": 0.1552, "step": 2445 }, { "epoch": 0.27, "grad_norm": 0.5099205570896488, "learning_rate": 3.43220669599157e-05, "loss": 0.1541, "step": 2446 }, { "epoch": 0.27, "grad_norm": 0.45604073546018137, "learning_rate": 3.431710044332229e-05, "loss": 0.1163, "step": 2447 }, { "epoch": 0.27, "grad_norm": 0.5146313543216957, "learning_rate": 3.431213211523574e-05, "loss": 0.1417, "step": 2448 }, { "epoch": 0.27, "grad_norm": 0.5792926646701452, "learning_rate": 3.4307161976284695e-05, "loss": 0.1584, "step": 2449 }, { "epoch": 0.27, "grad_norm": 0.4570558655459584, "learning_rate": 3.430219002709799e-05, "loss": 0.111, "step": 2450 }, { "epoch": 0.27, "grad_norm": 0.5079500452245315, "learning_rate": 3.429721626830472e-05, "loss": 0.1651, "step": 2451 }, { "epoch": 0.27, "grad_norm": 0.42599473203504645, "learning_rate": 3.429224070053419e-05, "loss": 0.1248, "step": 2452 }, { "epoch": 0.27, "grad_norm": 0.4124887898395745, "learning_rate": 3.428726332441595e-05, "loss": 0.1117, "step": 2453 }, { "epoch": 0.27, "grad_norm": 0.5736399477708762, "learning_rate": 3.428228414057975e-05, "loss": 0.157, "step": 2454 }, { "epoch": 0.27, "grad_norm": 0.5138786102962402, "learning_rate": 3.427730314965562e-05, "loss": 0.1634, "step": 2455 }, { "epoch": 0.27, "grad_norm": 0.470029927274878, "learning_rate": 3.427232035227377e-05, "loss": 0.109, "step": 2456 }, { "epoch": 0.27, "grad_norm": 0.6285574994502313, "learning_rate": 3.4267335749064655e-05, "loss": 0.1512, "step": 2457 }, { "epoch": 0.27, "grad_norm": 0.48596600758625846, "learning_rate": 3.426234934065896e-05, "loss": 0.1086, "step": 2458 }, { "epoch": 0.27, "grad_norm": 0.4557537744327087, "learning_rate": 3.42573611276876e-05, "loss": 0.1192, "step": 2459 }, { "epoch": 0.27, "grad_norm": 0.4629436092393336, "learning_rate": 3.4252371110781716e-05, "loss": 0.1605, "step": 2460 }, { "epoch": 0.27, "grad_norm": 0.559382808353759, "learning_rate": 3.424737929057268e-05, "loss": 0.1447, "step": 2461 }, { "epoch": 0.27, "grad_norm": 0.5142982058483963, "learning_rate": 3.424238566769209e-05, "loss": 0.1351, "step": 2462 }, { "epoch": 0.27, "grad_norm": 0.42970532467024103, "learning_rate": 3.423739024277177e-05, "loss": 0.1268, "step": 2463 }, { "epoch": 0.27, "grad_norm": 0.45780035660297774, "learning_rate": 3.423239301644377e-05, "loss": 0.1163, "step": 2464 }, { "epoch": 0.27, "grad_norm": 0.5660563473370789, "learning_rate": 3.4227393989340376e-05, "loss": 0.173, "step": 2465 }, { "epoch": 0.27, "grad_norm": 0.38702263115228835, "learning_rate": 3.42223931620941e-05, "loss": 0.1139, "step": 2466 }, { "epoch": 0.27, "grad_norm": 0.5000160333923749, "learning_rate": 3.421739053533767e-05, "loss": 0.1311, "step": 2467 }, { "epoch": 0.27, "grad_norm": 0.47882141930155436, "learning_rate": 3.421238610970406e-05, "loss": 0.0989, "step": 2468 }, { "epoch": 0.27, "grad_norm": 0.4803223115394185, "learning_rate": 3.420737988582646e-05, "loss": 0.1513, "step": 2469 }, { "epoch": 0.27, "grad_norm": 0.4733201085960917, "learning_rate": 3.4202371864338295e-05, "loss": 0.1332, "step": 2470 }, { "epoch": 0.27, "grad_norm": 0.4788027933210143, "learning_rate": 3.41973620458732e-05, "loss": 0.1355, "step": 2471 }, { "epoch": 0.27, "grad_norm": 0.46483507869550533, "learning_rate": 3.419235043106506e-05, "loss": 0.141, "step": 2472 }, { "epoch": 0.27, "grad_norm": 0.46143146491208537, "learning_rate": 3.4187337020547974e-05, "loss": 0.1293, "step": 2473 }, { "epoch": 0.27, "grad_norm": 0.5768565102834681, "learning_rate": 3.4182321814956274e-05, "loss": 0.1671, "step": 2474 }, { "epoch": 0.27, "grad_norm": 0.378530414241201, "learning_rate": 3.417730481492451e-05, "loss": 0.1048, "step": 2475 }, { "epoch": 0.27, "grad_norm": 0.5695373283658408, "learning_rate": 3.4172286021087475e-05, "loss": 0.1537, "step": 2476 }, { "epoch": 0.27, "grad_norm": 0.44114690943394924, "learning_rate": 3.416726543408018e-05, "loss": 0.1093, "step": 2477 }, { "epoch": 0.27, "grad_norm": 0.44971614335527815, "learning_rate": 3.416224305453785e-05, "loss": 0.114, "step": 2478 }, { "epoch": 0.27, "grad_norm": 0.48868807819925997, "learning_rate": 3.4157218883095966e-05, "loss": 0.1281, "step": 2479 }, { "epoch": 0.27, "grad_norm": 0.5713469907946939, "learning_rate": 3.4152192920390195e-05, "loss": 0.1369, "step": 2480 }, { "epoch": 0.27, "grad_norm": 0.501901675901334, "learning_rate": 3.414716516705648e-05, "loss": 0.1256, "step": 2481 }, { "epoch": 0.27, "grad_norm": 0.5675283432491127, "learning_rate": 3.4142135623730954e-05, "loss": 0.152, "step": 2482 }, { "epoch": 0.27, "grad_norm": 0.38015793984598484, "learning_rate": 3.413710429104999e-05, "loss": 0.099, "step": 2483 }, { "epoch": 0.27, "grad_norm": 0.39435350549948, "learning_rate": 3.413207116965018e-05, "loss": 0.0895, "step": 2484 }, { "epoch": 0.27, "grad_norm": 0.5166710732897631, "learning_rate": 3.4127036260168344e-05, "loss": 0.1407, "step": 2485 }, { "epoch": 0.27, "grad_norm": 0.35974446254124093, "learning_rate": 3.412199956324155e-05, "loss": 0.0939, "step": 2486 }, { "epoch": 0.27, "grad_norm": 0.5665131303640929, "learning_rate": 3.411696107950706e-05, "loss": 0.1407, "step": 2487 }, { "epoch": 0.27, "grad_norm": 0.45366777599828795, "learning_rate": 3.4111920809602374e-05, "loss": 0.12, "step": 2488 }, { "epoch": 0.27, "grad_norm": 0.4868389189478191, "learning_rate": 3.410687875416523e-05, "loss": 0.1439, "step": 2489 }, { "epoch": 0.27, "grad_norm": 0.4061955818995793, "learning_rate": 3.4101834913833576e-05, "loss": 0.1353, "step": 2490 }, { "epoch": 0.27, "grad_norm": 0.395929258000338, "learning_rate": 3.409678928924558e-05, "loss": 0.1212, "step": 2491 }, { "epoch": 0.27, "grad_norm": 0.39322646773803677, "learning_rate": 3.4091741881039677e-05, "loss": 0.09, "step": 2492 }, { "epoch": 0.27, "grad_norm": 0.4876510520592031, "learning_rate": 3.408669268985447e-05, "loss": 0.1353, "step": 2493 }, { "epoch": 0.27, "grad_norm": 0.5247481150600097, "learning_rate": 3.4081641716328826e-05, "loss": 0.1392, "step": 2494 }, { "epoch": 0.27, "grad_norm": 0.42692365413770167, "learning_rate": 3.407658896110183e-05, "loss": 0.1282, "step": 2495 }, { "epoch": 0.27, "grad_norm": 0.4768050155182239, "learning_rate": 3.407153442481278e-05, "loss": 0.1276, "step": 2496 }, { "epoch": 0.27, "grad_norm": 0.4518767131546068, "learning_rate": 3.406647810810122e-05, "loss": 0.1293, "step": 2497 }, { "epoch": 0.27, "grad_norm": 0.45255947997190943, "learning_rate": 3.4061420011606906e-05, "loss": 0.1459, "step": 2498 }, { "epoch": 0.27, "grad_norm": 0.5345606866094632, "learning_rate": 3.405636013596982e-05, "loss": 0.1599, "step": 2499 }, { "epoch": 0.27, "grad_norm": 0.5345141045731824, "learning_rate": 3.405129848183017e-05, "loss": 0.1333, "step": 2500 }, { "epoch": 0.27, "grad_norm": 0.588238331317073, "learning_rate": 3.404623504982839e-05, "loss": 0.1604, "step": 2501 }, { "epoch": 0.27, "grad_norm": 0.5025613862115336, "learning_rate": 3.404116984060513e-05, "loss": 0.1408, "step": 2502 }, { "epoch": 0.27, "grad_norm": 0.4988122481983707, "learning_rate": 3.4036102854801286e-05, "loss": 0.1465, "step": 2503 }, { "epoch": 0.27, "grad_norm": 0.43244140015778665, "learning_rate": 3.403103409305796e-05, "loss": 0.1288, "step": 2504 }, { "epoch": 0.28, "grad_norm": 0.5055626842353665, "learning_rate": 3.402596355601649e-05, "loss": 0.137, "step": 2505 }, { "epoch": 0.28, "grad_norm": 0.371369028886359, "learning_rate": 3.402089124431843e-05, "loss": 0.1189, "step": 2506 }, { "epoch": 0.28, "grad_norm": 0.4596176810440495, "learning_rate": 3.401581715860556e-05, "loss": 0.1217, "step": 2507 }, { "epoch": 0.28, "grad_norm": 0.4652359694190133, "learning_rate": 3.4010741299519885e-05, "loss": 0.1208, "step": 2508 }, { "epoch": 0.28, "grad_norm": 0.5393702140144903, "learning_rate": 3.400566366770364e-05, "loss": 0.1122, "step": 2509 }, { "epoch": 0.28, "grad_norm": 0.35453739350915175, "learning_rate": 3.400058426379929e-05, "loss": 0.0986, "step": 2510 }, { "epoch": 0.28, "grad_norm": 0.5010457725383107, "learning_rate": 3.399550308844949e-05, "loss": 0.1205, "step": 2511 }, { "epoch": 0.28, "grad_norm": 0.45002586171222564, "learning_rate": 3.3990420142297165e-05, "loss": 0.0938, "step": 2512 }, { "epoch": 0.28, "grad_norm": 0.4489480073483344, "learning_rate": 3.398533542598543e-05, "loss": 0.1305, "step": 2513 }, { "epoch": 0.28, "grad_norm": 0.5108483752991453, "learning_rate": 3.398024894015764e-05, "loss": 0.1266, "step": 2514 }, { "epoch": 0.28, "grad_norm": 0.4106827580897056, "learning_rate": 3.397516068545738e-05, "loss": 0.1051, "step": 2515 }, { "epoch": 0.28, "grad_norm": 0.6140333047386207, "learning_rate": 3.3970070662528436e-05, "loss": 0.1614, "step": 2516 }, { "epoch": 0.28, "grad_norm": 0.39032023939121047, "learning_rate": 3.396497887201484e-05, "loss": 0.1147, "step": 2517 }, { "epoch": 0.28, "grad_norm": 0.5848595431022527, "learning_rate": 3.395988531456083e-05, "loss": 0.1332, "step": 2518 }, { "epoch": 0.28, "grad_norm": 0.5156918106721885, "learning_rate": 3.395478999081088e-05, "loss": 0.1402, "step": 2519 }, { "epoch": 0.28, "grad_norm": 0.39500240482129434, "learning_rate": 3.394969290140969e-05, "loss": 0.1191, "step": 2520 }, { "epoch": 0.28, "grad_norm": 0.5060053728839906, "learning_rate": 3.3944594047002174e-05, "loss": 0.1544, "step": 2521 }, { "epoch": 0.28, "grad_norm": 0.5845301192740991, "learning_rate": 3.393949342823346e-05, "loss": 0.1808, "step": 2522 }, { "epoch": 0.28, "grad_norm": 0.38663861378832465, "learning_rate": 3.393439104574893e-05, "loss": 0.0869, "step": 2523 }, { "epoch": 0.28, "grad_norm": 0.43672018716265165, "learning_rate": 3.3929286900194154e-05, "loss": 0.1336, "step": 2524 }, { "epoch": 0.28, "grad_norm": 0.4646614743136001, "learning_rate": 3.392418099221495e-05, "loss": 0.127, "step": 2525 }, { "epoch": 0.28, "grad_norm": 0.47530254717256076, "learning_rate": 3.3919073322457364e-05, "loss": 0.1102, "step": 2526 }, { "epoch": 0.28, "grad_norm": 0.5607044915497152, "learning_rate": 3.391396389156763e-05, "loss": 0.1363, "step": 2527 }, { "epoch": 0.28, "grad_norm": 0.3988736514299119, "learning_rate": 3.3908852700192236e-05, "loss": 0.1142, "step": 2528 }, { "epoch": 0.28, "grad_norm": 0.5249448395316002, "learning_rate": 3.3903739748977884e-05, "loss": 0.1259, "step": 2529 }, { "epoch": 0.28, "grad_norm": 0.534279906305615, "learning_rate": 3.38986250385715e-05, "loss": 0.1523, "step": 2530 }, { "epoch": 0.28, "grad_norm": 0.41890935143272784, "learning_rate": 3.3893508569620216e-05, "loss": 0.1267, "step": 2531 }, { "epoch": 0.28, "grad_norm": 0.44415084840521085, "learning_rate": 3.388839034277142e-05, "loss": 0.104, "step": 2532 }, { "epoch": 0.28, "grad_norm": 0.4814071485392782, "learning_rate": 3.38832703586727e-05, "loss": 0.1083, "step": 2533 }, { "epoch": 0.28, "grad_norm": 0.5115481376146486, "learning_rate": 3.387814861797186e-05, "loss": 0.122, "step": 2534 }, { "epoch": 0.28, "grad_norm": 0.5095427160952817, "learning_rate": 3.3873025121316945e-05, "loss": 0.1693, "step": 2535 }, { "epoch": 0.28, "grad_norm": 0.39141546379406983, "learning_rate": 3.386789986935621e-05, "loss": 0.0862, "step": 2536 }, { "epoch": 0.28, "grad_norm": 0.4862217767708159, "learning_rate": 3.386277286273814e-05, "loss": 0.1109, "step": 2537 }, { "epoch": 0.28, "grad_norm": 0.439195090938755, "learning_rate": 3.385764410211143e-05, "loss": 0.1271, "step": 2538 }, { "epoch": 0.28, "grad_norm": 0.4824005616456806, "learning_rate": 3.3852513588125e-05, "loss": 0.1448, "step": 2539 }, { "epoch": 0.28, "grad_norm": 0.3998231355243317, "learning_rate": 3.3847381321428e-05, "loss": 0.0963, "step": 2540 }, { "epoch": 0.28, "grad_norm": 0.528702405586636, "learning_rate": 3.384224730266982e-05, "loss": 0.1525, "step": 2541 }, { "epoch": 0.28, "grad_norm": 0.43344132433402593, "learning_rate": 3.383711153250002e-05, "loss": 0.082, "step": 2542 }, { "epoch": 0.28, "grad_norm": 0.4621537858738817, "learning_rate": 3.383197401156842e-05, "loss": 0.1171, "step": 2543 }, { "epoch": 0.28, "grad_norm": 0.5224876040489491, "learning_rate": 3.382683474052506e-05, "loss": 0.1669, "step": 2544 }, { "epoch": 0.28, "grad_norm": 0.4082463072792579, "learning_rate": 3.382169372002019e-05, "loss": 0.0986, "step": 2545 }, { "epoch": 0.28, "grad_norm": 0.520631185085014, "learning_rate": 3.381655095070428e-05, "loss": 0.1199, "step": 2546 }, { "epoch": 0.28, "grad_norm": 0.4372131394945225, "learning_rate": 3.3811406433228034e-05, "loss": 0.0973, "step": 2547 }, { "epoch": 0.28, "grad_norm": 0.5215359780143387, "learning_rate": 3.3806260168242365e-05, "loss": 0.1199, "step": 2548 }, { "epoch": 0.28, "grad_norm": 0.45685334288672597, "learning_rate": 3.380111215639842e-05, "loss": 0.1436, "step": 2549 }, { "epoch": 0.28, "grad_norm": 0.5455295360376775, "learning_rate": 3.379596239834755e-05, "loss": 0.1672, "step": 2550 }, { "epoch": 0.28, "grad_norm": 0.5805738751802865, "learning_rate": 3.379081089474134e-05, "loss": 0.1487, "step": 2551 }, { "epoch": 0.28, "grad_norm": 0.38275840922087173, "learning_rate": 3.3785657646231596e-05, "loss": 0.0945, "step": 2552 }, { "epoch": 0.28, "grad_norm": 0.4860448663472422, "learning_rate": 3.378050265347033e-05, "loss": 0.1483, "step": 2553 }, { "epoch": 0.28, "grad_norm": 0.41590030844948245, "learning_rate": 3.37753459171098e-05, "loss": 0.088, "step": 2554 }, { "epoch": 0.28, "grad_norm": 0.4933646370191088, "learning_rate": 3.377018743780245e-05, "loss": 0.1216, "step": 2555 }, { "epoch": 0.28, "grad_norm": 0.4483199872750863, "learning_rate": 3.376502721620098e-05, "loss": 0.1348, "step": 2556 }, { "epoch": 0.28, "grad_norm": 0.57117660589376, "learning_rate": 3.37598652529583e-05, "loss": 0.1664, "step": 2557 }, { "epoch": 0.28, "grad_norm": 0.5086700241624675, "learning_rate": 3.375470154872751e-05, "loss": 0.1241, "step": 2558 }, { "epoch": 0.28, "grad_norm": 0.5585742226768746, "learning_rate": 3.3749536104161984e-05, "loss": 0.1862, "step": 2559 }, { "epoch": 0.28, "grad_norm": 0.5117998422913124, "learning_rate": 3.3744368919915275e-05, "loss": 0.1567, "step": 2560 }, { "epoch": 0.28, "grad_norm": 0.5150417728519117, "learning_rate": 3.373919999664117e-05, "loss": 0.1942, "step": 2561 }, { "epoch": 0.28, "grad_norm": 0.4999968558451024, "learning_rate": 3.3734029334993675e-05, "loss": 0.1266, "step": 2562 }, { "epoch": 0.28, "grad_norm": 0.4900600229693779, "learning_rate": 3.372885693562701e-05, "loss": 0.1373, "step": 2563 }, { "epoch": 0.28, "grad_norm": 0.38314127913920476, "learning_rate": 3.372368279919563e-05, "loss": 0.1067, "step": 2564 }, { "epoch": 0.28, "grad_norm": 0.5332637654569706, "learning_rate": 3.37185069263542e-05, "loss": 0.1791, "step": 2565 }, { "epoch": 0.28, "grad_norm": 0.5486049537742378, "learning_rate": 3.3713329317757594e-05, "loss": 0.1564, "step": 2566 }, { "epoch": 0.28, "grad_norm": 0.44130118140765495, "learning_rate": 3.370814997406093e-05, "loss": 0.1062, "step": 2567 }, { "epoch": 0.28, "grad_norm": 0.4813566919439541, "learning_rate": 3.370296889591953e-05, "loss": 0.1177, "step": 2568 }, { "epoch": 0.28, "grad_norm": 0.4349935121162049, "learning_rate": 3.369778608398894e-05, "loss": 0.1052, "step": 2569 }, { "epoch": 0.28, "grad_norm": 0.5049315084146065, "learning_rate": 3.369260153892491e-05, "loss": 0.1428, "step": 2570 }, { "epoch": 0.28, "grad_norm": 0.46337019145395963, "learning_rate": 3.368741526138344e-05, "loss": 0.1215, "step": 2571 }, { "epoch": 0.28, "grad_norm": 0.46036254455388864, "learning_rate": 3.3682227252020716e-05, "loss": 0.1277, "step": 2572 }, { "epoch": 0.28, "grad_norm": 0.4229826867041454, "learning_rate": 3.367703751149316e-05, "loss": 0.1204, "step": 2573 }, { "epoch": 0.28, "grad_norm": 0.5063872363256006, "learning_rate": 3.367184604045743e-05, "loss": 0.1262, "step": 2574 }, { "epoch": 0.28, "grad_norm": 0.5783439170196947, "learning_rate": 3.3666652839570366e-05, "loss": 0.1428, "step": 2575 }, { "epoch": 0.28, "grad_norm": 0.43727760111793185, "learning_rate": 3.3661457909489056e-05, "loss": 0.1148, "step": 2576 }, { "epoch": 0.28, "grad_norm": 0.4930275605392448, "learning_rate": 3.365626125087079e-05, "loss": 0.1169, "step": 2577 }, { "epoch": 0.28, "grad_norm": 0.505109488921454, "learning_rate": 3.365106286437309e-05, "loss": 0.1634, "step": 2578 }, { "epoch": 0.28, "grad_norm": 0.42353267303928016, "learning_rate": 3.3645862750653675e-05, "loss": 0.1051, "step": 2579 }, { "epoch": 0.28, "grad_norm": 0.4399823879921975, "learning_rate": 3.364066091037052e-05, "loss": 0.1144, "step": 2580 }, { "epoch": 0.28, "grad_norm": 0.7231892038130262, "learning_rate": 3.363545734418178e-05, "loss": 0.2078, "step": 2581 }, { "epoch": 0.28, "grad_norm": 0.432307457539732, "learning_rate": 3.3630252052745844e-05, "loss": 0.1207, "step": 2582 }, { "epoch": 0.28, "grad_norm": 0.4000852583383531, "learning_rate": 3.362504503672133e-05, "loss": 0.1281, "step": 2583 }, { "epoch": 0.28, "grad_norm": 0.4081756728583051, "learning_rate": 3.361983629676705e-05, "loss": 0.1025, "step": 2584 }, { "epoch": 0.28, "grad_norm": 0.5165511685401845, "learning_rate": 3.361462583354207e-05, "loss": 0.1471, "step": 2585 }, { "epoch": 0.28, "grad_norm": 0.4667985349489968, "learning_rate": 3.360941364770562e-05, "loss": 0.1459, "step": 2586 }, { "epoch": 0.28, "grad_norm": 0.40230317513025765, "learning_rate": 3.3604199739917205e-05, "loss": 0.1355, "step": 2587 }, { "epoch": 0.28, "grad_norm": 0.3959479249757505, "learning_rate": 3.359898411083652e-05, "loss": 0.1256, "step": 2588 }, { "epoch": 0.28, "grad_norm": 0.4677749665619581, "learning_rate": 3.359376676112347e-05, "loss": 0.147, "step": 2589 }, { "epoch": 0.28, "grad_norm": 0.5444453079947719, "learning_rate": 3.358854769143819e-05, "loss": 0.1601, "step": 2590 }, { "epoch": 0.28, "grad_norm": 0.500436889987277, "learning_rate": 3.358332690244104e-05, "loss": 0.1044, "step": 2591 }, { "epoch": 0.28, "grad_norm": 0.6368952781436327, "learning_rate": 3.357810439479258e-05, "loss": 0.144, "step": 2592 }, { "epoch": 0.28, "grad_norm": 0.47217379135439264, "learning_rate": 3.3572880169153595e-05, "loss": 0.1426, "step": 2593 }, { "epoch": 0.28, "grad_norm": 0.44072216328906727, "learning_rate": 3.356765422618509e-05, "loss": 0.1143, "step": 2594 }, { "epoch": 0.28, "grad_norm": 0.4589058666857585, "learning_rate": 3.35624265665483e-05, "loss": 0.161, "step": 2595 }, { "epoch": 0.29, "grad_norm": 0.5042011373994308, "learning_rate": 3.355719719090465e-05, "loss": 0.1374, "step": 2596 }, { "epoch": 0.29, "grad_norm": 0.5061150865570824, "learning_rate": 3.3551966099915776e-05, "loss": 0.1596, "step": 2597 }, { "epoch": 0.29, "grad_norm": 0.368884265023208, "learning_rate": 3.3546733294243585e-05, "loss": 0.1116, "step": 2598 }, { "epoch": 0.29, "grad_norm": 0.5817271980944791, "learning_rate": 3.3541498774550134e-05, "loss": 0.1577, "step": 2599 }, { "epoch": 0.29, "grad_norm": 0.46160100607101245, "learning_rate": 3.353626254149776e-05, "loss": 0.1329, "step": 2600 }, { "epoch": 0.29, "grad_norm": 0.4377371622051643, "learning_rate": 3.3531024595748955e-05, "loss": 0.1166, "step": 2601 }, { "epoch": 0.29, "grad_norm": 0.3762480115045868, "learning_rate": 3.3525784937966474e-05, "loss": 0.1212, "step": 2602 }, { "epoch": 0.29, "grad_norm": 0.4024979564366086, "learning_rate": 3.352054356881328e-05, "loss": 0.0978, "step": 2603 }, { "epoch": 0.29, "grad_norm": 0.4134229903727814, "learning_rate": 3.3515300488952534e-05, "loss": 0.1161, "step": 2604 }, { "epoch": 0.29, "grad_norm": 0.3793189171024969, "learning_rate": 3.3510055699047624e-05, "loss": 0.0986, "step": 2605 }, { "epoch": 0.29, "grad_norm": 0.3820031621862018, "learning_rate": 3.350480919976216e-05, "loss": 0.0968, "step": 2606 }, { "epoch": 0.29, "grad_norm": 0.4622382544556621, "learning_rate": 3.349956099175996e-05, "loss": 0.1629, "step": 2607 }, { "epoch": 0.29, "grad_norm": 0.4826096389408658, "learning_rate": 3.349431107570506e-05, "loss": 0.13, "step": 2608 }, { "epoch": 0.29, "grad_norm": 0.5134384727352723, "learning_rate": 3.348905945226173e-05, "loss": 0.1366, "step": 2609 }, { "epoch": 0.29, "grad_norm": 0.5132842831688942, "learning_rate": 3.348380612209441e-05, "loss": 0.1483, "step": 2610 }, { "epoch": 0.29, "grad_norm": 0.5784984877145398, "learning_rate": 3.347855108586781e-05, "loss": 0.1295, "step": 2611 }, { "epoch": 0.29, "grad_norm": 0.5369564601661964, "learning_rate": 3.347329434424683e-05, "loss": 0.1494, "step": 2612 }, { "epoch": 0.29, "grad_norm": 0.536472178407048, "learning_rate": 3.346803589789657e-05, "loss": 0.1641, "step": 2613 }, { "epoch": 0.29, "grad_norm": 0.5125602907678098, "learning_rate": 3.346277574748238e-05, "loss": 0.0982, "step": 2614 }, { "epoch": 0.29, "grad_norm": 0.5289817801206103, "learning_rate": 3.34575138936698e-05, "loss": 0.1327, "step": 2615 }, { "epoch": 0.29, "grad_norm": 0.44559800883512596, "learning_rate": 3.345225033712459e-05, "loss": 0.1007, "step": 2616 }, { "epoch": 0.29, "grad_norm": 0.5431117685898982, "learning_rate": 3.344698507851274e-05, "loss": 0.1235, "step": 2617 }, { "epoch": 0.29, "grad_norm": 0.9323867593073056, "learning_rate": 3.344171811850045e-05, "loss": 0.1646, "step": 2618 }, { "epoch": 0.29, "grad_norm": 0.4747742731418723, "learning_rate": 3.343644945775412e-05, "loss": 0.1595, "step": 2619 }, { "epoch": 0.29, "grad_norm": 0.42232267151456404, "learning_rate": 3.3431179096940375e-05, "loss": 0.13, "step": 2620 }, { "epoch": 0.29, "grad_norm": 0.4890635517828116, "learning_rate": 3.3425907036726055e-05, "loss": 0.1159, "step": 2621 }, { "epoch": 0.29, "grad_norm": 0.6026937820896767, "learning_rate": 3.3420633277778214e-05, "loss": 0.1649, "step": 2622 }, { "epoch": 0.29, "grad_norm": 0.5000208611904822, "learning_rate": 3.341535782076413e-05, "loss": 0.1136, "step": 2623 }, { "epoch": 0.29, "grad_norm": 0.36418500893721073, "learning_rate": 3.341008066635129e-05, "loss": 0.1306, "step": 2624 }, { "epoch": 0.29, "grad_norm": 0.42959085591399254, "learning_rate": 3.3404801815207384e-05, "loss": 0.1276, "step": 2625 }, { "epoch": 0.29, "grad_norm": 0.5054793826934995, "learning_rate": 3.339952126800033e-05, "loss": 0.138, "step": 2626 }, { "epoch": 0.29, "grad_norm": 0.4707223722076287, "learning_rate": 3.339423902539826e-05, "loss": 0.1479, "step": 2627 }, { "epoch": 0.29, "grad_norm": 0.42210554958193724, "learning_rate": 3.3388955088069524e-05, "loss": 0.1442, "step": 2628 }, { "epoch": 0.29, "grad_norm": 0.45822312554483813, "learning_rate": 3.3383669456682664e-05, "loss": 0.152, "step": 2629 }, { "epoch": 0.29, "grad_norm": 0.48504328003701247, "learning_rate": 3.3378382131906465e-05, "loss": 0.1358, "step": 2630 }, { "epoch": 0.29, "grad_norm": 0.41985201587213494, "learning_rate": 3.337309311440991e-05, "loss": 0.0969, "step": 2631 }, { "epoch": 0.29, "grad_norm": 0.3560109457412073, "learning_rate": 3.33678024048622e-05, "loss": 0.1108, "step": 2632 }, { "epoch": 0.29, "grad_norm": 0.744208265219096, "learning_rate": 3.336251000393275e-05, "loss": 0.1923, "step": 2633 }, { "epoch": 0.29, "grad_norm": 0.4882880858896491, "learning_rate": 3.335721591229119e-05, "loss": 0.1001, "step": 2634 }, { "epoch": 0.29, "grad_norm": 0.5291493903616434, "learning_rate": 3.335192013060737e-05, "loss": 0.1946, "step": 2635 }, { "epoch": 0.29, "grad_norm": 0.5928591771468422, "learning_rate": 3.334662265955133e-05, "loss": 0.1623, "step": 2636 }, { "epoch": 0.29, "grad_norm": 0.6567678678860573, "learning_rate": 3.334132349979336e-05, "loss": 0.2365, "step": 2637 }, { "epoch": 0.29, "grad_norm": 0.4130182954855353, "learning_rate": 3.3336022652003924e-05, "loss": 0.156, "step": 2638 }, { "epoch": 0.29, "grad_norm": 0.49880110533805194, "learning_rate": 3.333072011685374e-05, "loss": 0.1391, "step": 2639 }, { "epoch": 0.29, "grad_norm": 0.4302024703130464, "learning_rate": 3.33254158950137e-05, "loss": 0.0964, "step": 2640 }, { "epoch": 0.29, "grad_norm": 0.3808579860564117, "learning_rate": 3.3320109987154956e-05, "loss": 0.137, "step": 2641 }, { "epoch": 0.29, "grad_norm": 0.41184524460488126, "learning_rate": 3.331480239394881e-05, "loss": 0.1043, "step": 2642 }, { "epoch": 0.29, "grad_norm": 0.4502693992440942, "learning_rate": 3.330949311606685e-05, "loss": 0.1234, "step": 2643 }, { "epoch": 0.29, "grad_norm": 0.3970998299918943, "learning_rate": 3.330418215418081e-05, "loss": 0.1239, "step": 2644 }, { "epoch": 0.29, "grad_norm": 0.47855892374755765, "learning_rate": 3.3298869508962687e-05, "loss": 0.116, "step": 2645 }, { "epoch": 0.29, "grad_norm": 0.5117271364412973, "learning_rate": 3.329355518108466e-05, "loss": 0.1152, "step": 2646 }, { "epoch": 0.29, "grad_norm": 0.5207852023455704, "learning_rate": 3.328823917121914e-05, "loss": 0.1497, "step": 2647 }, { "epoch": 0.29, "grad_norm": 0.6219859882015374, "learning_rate": 3.328292148003875e-05, "loss": 0.2174, "step": 2648 }, { "epoch": 0.29, "grad_norm": 0.4191877823864674, "learning_rate": 3.32776021082163e-05, "loss": 0.101, "step": 2649 }, { "epoch": 0.29, "grad_norm": 0.5023916978037712, "learning_rate": 3.3272281056424854e-05, "loss": 0.1427, "step": 2650 }, { "epoch": 0.29, "grad_norm": 0.4312987030557723, "learning_rate": 3.326695832533764e-05, "loss": 0.0968, "step": 2651 }, { "epoch": 0.29, "grad_norm": 0.4996431239393354, "learning_rate": 3.326163391562814e-05, "loss": 0.1452, "step": 2652 }, { "epoch": 0.29, "grad_norm": 0.5041623019457745, "learning_rate": 3.325630782797004e-05, "loss": 0.1458, "step": 2653 }, { "epoch": 0.29, "grad_norm": 0.4463549842812147, "learning_rate": 3.325098006303722e-05, "loss": 0.1045, "step": 2654 }, { "epoch": 0.29, "grad_norm": 0.4017227342987408, "learning_rate": 3.324565062150379e-05, "loss": 0.0984, "step": 2655 }, { "epoch": 0.29, "grad_norm": 0.5222264414249359, "learning_rate": 3.324031950404406e-05, "loss": 0.1261, "step": 2656 }, { "epoch": 0.29, "grad_norm": 0.6623592749070012, "learning_rate": 3.3234986711332554e-05, "loss": 0.1706, "step": 2657 }, { "epoch": 0.29, "grad_norm": 0.4953809408081428, "learning_rate": 3.322965224404403e-05, "loss": 0.1365, "step": 2658 }, { "epoch": 0.29, "grad_norm": 0.4681862301883733, "learning_rate": 3.322431610285342e-05, "loss": 0.1148, "step": 2659 }, { "epoch": 0.29, "grad_norm": 0.47687667121444377, "learning_rate": 3.3218978288435896e-05, "loss": 0.1349, "step": 2660 }, { "epoch": 0.29, "grad_norm": 0.454294241768839, "learning_rate": 3.321363880146684e-05, "loss": 0.1112, "step": 2661 }, { "epoch": 0.29, "grad_norm": 0.41378591404655785, "learning_rate": 3.3208297642621824e-05, "loss": 0.0949, "step": 2662 }, { "epoch": 0.29, "grad_norm": 0.3859905746969514, "learning_rate": 3.320295481257665e-05, "loss": 0.0897, "step": 2663 }, { "epoch": 0.29, "grad_norm": 0.4841351684440104, "learning_rate": 3.319761031200735e-05, "loss": 0.1593, "step": 2664 }, { "epoch": 0.29, "grad_norm": 0.4632021342714253, "learning_rate": 3.319226414159011e-05, "loss": 0.127, "step": 2665 }, { "epoch": 0.29, "grad_norm": 0.41766667368361904, "learning_rate": 3.318691630200138e-05, "loss": 0.0876, "step": 2666 }, { "epoch": 0.29, "grad_norm": 0.41925842748575715, "learning_rate": 3.318156679391781e-05, "loss": 0.1093, "step": 2667 }, { "epoch": 0.29, "grad_norm": 0.3459959998629984, "learning_rate": 3.317621561801624e-05, "loss": 0.0824, "step": 2668 }, { "epoch": 0.29, "grad_norm": 0.4833338530581519, "learning_rate": 3.317086277497375e-05, "loss": 0.1287, "step": 2669 }, { "epoch": 0.29, "grad_norm": 0.4423587278837199, "learning_rate": 3.316550826546761e-05, "loss": 0.1021, "step": 2670 }, { "epoch": 0.29, "grad_norm": 0.47057166578770115, "learning_rate": 3.31601520901753e-05, "loss": 0.1173, "step": 2671 }, { "epoch": 0.29, "grad_norm": 0.4638050881027339, "learning_rate": 3.315479424977453e-05, "loss": 0.1277, "step": 2672 }, { "epoch": 0.29, "grad_norm": 0.4100822381954122, "learning_rate": 3.314943474494321e-05, "loss": 0.1157, "step": 2673 }, { "epoch": 0.29, "grad_norm": 0.43098299495889725, "learning_rate": 3.3144073576359455e-05, "loss": 0.1492, "step": 2674 }, { "epoch": 0.29, "grad_norm": 0.4525672834649572, "learning_rate": 3.313871074470159e-05, "loss": 0.1066, "step": 2675 }, { "epoch": 0.29, "grad_norm": 0.49358161396927436, "learning_rate": 3.313334625064816e-05, "loss": 0.1396, "step": 2676 }, { "epoch": 0.29, "grad_norm": 0.5213026951836351, "learning_rate": 3.312798009487791e-05, "loss": 0.1541, "step": 2677 }, { "epoch": 0.29, "grad_norm": 0.4292811292792686, "learning_rate": 3.312261227806982e-05, "loss": 0.1066, "step": 2678 }, { "epoch": 0.29, "grad_norm": 0.49854560265725584, "learning_rate": 3.311724280090304e-05, "loss": 0.1099, "step": 2679 }, { "epoch": 0.29, "grad_norm": 0.3447681002135798, "learning_rate": 3.311187166405696e-05, "loss": 0.0769, "step": 2680 }, { "epoch": 0.29, "grad_norm": 0.5460788926708016, "learning_rate": 3.310649886821117e-05, "loss": 0.1254, "step": 2681 }, { "epoch": 0.29, "grad_norm": 0.43840882727574926, "learning_rate": 3.310112441404548e-05, "loss": 0.1102, "step": 2682 }, { "epoch": 0.29, "grad_norm": 0.44350301625517363, "learning_rate": 3.309574830223988e-05, "loss": 0.1126, "step": 2683 }, { "epoch": 0.29, "grad_norm": 0.44353024723205975, "learning_rate": 3.309037053347462e-05, "loss": 0.1455, "step": 2684 }, { "epoch": 0.29, "grad_norm": 0.48658892521185115, "learning_rate": 3.3084991108430105e-05, "loss": 0.158, "step": 2685 }, { "epoch": 0.29, "grad_norm": 0.4352094771810096, "learning_rate": 3.3079610027786985e-05, "loss": 0.0968, "step": 2686 }, { "epoch": 0.3, "grad_norm": 0.45618373180567723, "learning_rate": 3.307422729222611e-05, "loss": 0.1189, "step": 2687 }, { "epoch": 0.3, "grad_norm": 0.4506011920623656, "learning_rate": 3.306884290242854e-05, "loss": 0.1126, "step": 2688 }, { "epoch": 0.3, "grad_norm": 0.5525457377079442, "learning_rate": 3.306345685907553e-05, "loss": 0.1295, "step": 2689 }, { "epoch": 0.3, "grad_norm": 0.359844771469462, "learning_rate": 3.3058069162848586e-05, "loss": 0.0952, "step": 2690 }, { "epoch": 0.3, "grad_norm": 0.4749128682129159, "learning_rate": 3.3052679814429374e-05, "loss": 0.1068, "step": 2691 }, { "epoch": 0.3, "grad_norm": 0.5393339385916132, "learning_rate": 3.3047288814499786e-05, "loss": 0.1034, "step": 2692 }, { "epoch": 0.3, "grad_norm": 0.6823146078509249, "learning_rate": 3.304189616374193e-05, "loss": 0.1689, "step": 2693 }, { "epoch": 0.3, "grad_norm": 0.6178597036957533, "learning_rate": 3.3036501862838125e-05, "loss": 0.2109, "step": 2694 }, { "epoch": 0.3, "grad_norm": 0.41554746119642, "learning_rate": 3.30311059124709e-05, "loss": 0.1319, "step": 2695 }, { "epoch": 0.3, "grad_norm": 0.4795717689993543, "learning_rate": 3.302570831332297e-05, "loss": 0.1187, "step": 2696 }, { "epoch": 0.3, "grad_norm": 0.4513382377898031, "learning_rate": 3.302030906607728e-05, "loss": 0.122, "step": 2697 }, { "epoch": 0.3, "grad_norm": 0.47749757488993017, "learning_rate": 3.301490817141698e-05, "loss": 0.1422, "step": 2698 }, { "epoch": 0.3, "grad_norm": 0.4610132058252583, "learning_rate": 3.300950563002543e-05, "loss": 0.118, "step": 2699 }, { "epoch": 0.3, "grad_norm": 0.45884733386225895, "learning_rate": 3.300410144258619e-05, "loss": 0.1575, "step": 2700 }, { "epoch": 0.3, "grad_norm": 0.33295770307284006, "learning_rate": 3.299869560978303e-05, "loss": 0.0839, "step": 2701 }, { "epoch": 0.3, "grad_norm": 0.38639808615310256, "learning_rate": 3.2993288132299935e-05, "loss": 0.0913, "step": 2702 }, { "epoch": 0.3, "grad_norm": 0.5073165819549732, "learning_rate": 3.2987879010821096e-05, "loss": 0.1289, "step": 2703 }, { "epoch": 0.3, "grad_norm": 0.5183153480167265, "learning_rate": 3.298246824603091e-05, "loss": 0.1654, "step": 2704 }, { "epoch": 0.3, "grad_norm": 0.4461552525761872, "learning_rate": 3.2977055838613985e-05, "loss": 0.1398, "step": 2705 }, { "epoch": 0.3, "grad_norm": 0.48710769004499765, "learning_rate": 3.297164178925512e-05, "loss": 0.1248, "step": 2706 }, { "epoch": 0.3, "grad_norm": 0.4418211312216522, "learning_rate": 3.296622609863935e-05, "loss": 0.1339, "step": 2707 }, { "epoch": 0.3, "grad_norm": 0.422403675418906, "learning_rate": 3.2960808767451905e-05, "loss": 0.1017, "step": 2708 }, { "epoch": 0.3, "grad_norm": 0.39050060198782655, "learning_rate": 3.295538979637821e-05, "loss": 0.0914, "step": 2709 }, { "epoch": 0.3, "grad_norm": 0.4273381074187671, "learning_rate": 3.294996918610393e-05, "loss": 0.1088, "step": 2710 }, { "epoch": 0.3, "grad_norm": 0.4908722151406998, "learning_rate": 3.294454693731488e-05, "loss": 0.1272, "step": 2711 }, { "epoch": 0.3, "grad_norm": 0.4371713357463505, "learning_rate": 3.293912305069715e-05, "loss": 0.1104, "step": 2712 }, { "epoch": 0.3, "grad_norm": 0.5580671268886105, "learning_rate": 3.293369752693699e-05, "loss": 0.1241, "step": 2713 }, { "epoch": 0.3, "grad_norm": 0.48085606478582876, "learning_rate": 3.292827036672089e-05, "loss": 0.1441, "step": 2714 }, { "epoch": 0.3, "grad_norm": 0.5133971433282902, "learning_rate": 3.2922841570735504e-05, "loss": 0.1328, "step": 2715 }, { "epoch": 0.3, "grad_norm": 0.49036193039295145, "learning_rate": 3.291741113966773e-05, "loss": 0.1195, "step": 2716 }, { "epoch": 0.3, "grad_norm": 0.5488916000639453, "learning_rate": 3.2911979074204676e-05, "loss": 0.1432, "step": 2717 }, { "epoch": 0.3, "grad_norm": 0.5505345109764069, "learning_rate": 3.290654537503362e-05, "loss": 0.1289, "step": 2718 }, { "epoch": 0.3, "grad_norm": 0.7226473833519608, "learning_rate": 3.290111004284208e-05, "loss": 0.173, "step": 2719 }, { "epoch": 0.3, "grad_norm": 0.48289207370641346, "learning_rate": 3.2895673078317775e-05, "loss": 0.1001, "step": 2720 }, { "epoch": 0.3, "grad_norm": 0.4892949377658385, "learning_rate": 3.2890234482148615e-05, "loss": 0.1313, "step": 2721 }, { "epoch": 0.3, "grad_norm": 0.4395900597743192, "learning_rate": 3.288479425502273e-05, "loss": 0.1212, "step": 2722 }, { "epoch": 0.3, "grad_norm": 0.4441308690646111, "learning_rate": 3.2879352397628454e-05, "loss": 0.1519, "step": 2723 }, { "epoch": 0.3, "grad_norm": 0.4608396652619052, "learning_rate": 3.287390891065433e-05, "loss": 0.1355, "step": 2724 }, { "epoch": 0.3, "grad_norm": 0.5274939146640016, "learning_rate": 3.28684637947891e-05, "loss": 0.1455, "step": 2725 }, { "epoch": 0.3, "grad_norm": 0.4470603671786324, "learning_rate": 3.2863017050721715e-05, "loss": 0.1275, "step": 2726 }, { "epoch": 0.3, "grad_norm": 0.4137301280617195, "learning_rate": 3.285756867914133e-05, "loss": 0.1079, "step": 2727 }, { "epoch": 0.3, "grad_norm": 0.3900506183591493, "learning_rate": 3.2852118680737306e-05, "loss": 0.1016, "step": 2728 }, { "epoch": 0.3, "grad_norm": 0.4544293113005345, "learning_rate": 3.2846667056199235e-05, "loss": 0.1363, "step": 2729 }, { "epoch": 0.3, "grad_norm": 0.4558894248021203, "learning_rate": 3.2841213806216864e-05, "loss": 0.1121, "step": 2730 }, { "epoch": 0.3, "grad_norm": 0.32132442476872114, "learning_rate": 3.283575893148019e-05, "loss": 0.0748, "step": 2731 }, { "epoch": 0.3, "grad_norm": 0.43447182214322044, "learning_rate": 3.283030243267939e-05, "loss": 0.1183, "step": 2732 }, { "epoch": 0.3, "grad_norm": 0.4582573509812947, "learning_rate": 3.282484431050486e-05, "loss": 0.1111, "step": 2733 }, { "epoch": 0.3, "grad_norm": 0.5683479439021748, "learning_rate": 3.281938456564721e-05, "loss": 0.136, "step": 2734 }, { "epoch": 0.3, "grad_norm": 0.4957322132422844, "learning_rate": 3.281392319879722e-05, "loss": 0.1446, "step": 2735 }, { "epoch": 0.3, "grad_norm": 0.40380508808796073, "learning_rate": 3.2808460210645906e-05, "loss": 0.1128, "step": 2736 }, { "epoch": 0.3, "grad_norm": 0.4764700002483128, "learning_rate": 3.280299560188449e-05, "loss": 0.1069, "step": 2737 }, { "epoch": 0.3, "grad_norm": 0.6165545392767173, "learning_rate": 3.2797529373204375e-05, "loss": 0.1615, "step": 2738 }, { "epoch": 0.3, "grad_norm": 0.472093325753557, "learning_rate": 3.27920615252972e-05, "loss": 0.1227, "step": 2739 }, { "epoch": 0.3, "grad_norm": 0.5183952934279598, "learning_rate": 3.278659205885479e-05, "loss": 0.0944, "step": 2740 }, { "epoch": 0.3, "grad_norm": 0.5181295660206561, "learning_rate": 3.2781120974569165e-05, "loss": 0.1337, "step": 2741 }, { "epoch": 0.3, "grad_norm": 0.7172496311642858, "learning_rate": 3.2775648273132574e-05, "loss": 0.1739, "step": 2742 }, { "epoch": 0.3, "grad_norm": 0.4876381568277856, "learning_rate": 3.277017395523745e-05, "loss": 0.1425, "step": 2743 }, { "epoch": 0.3, "grad_norm": 0.4812705630083188, "learning_rate": 3.2764698021576446e-05, "loss": 0.1045, "step": 2744 }, { "epoch": 0.3, "grad_norm": 0.43638420871534683, "learning_rate": 3.275922047284241e-05, "loss": 0.1313, "step": 2745 }, { "epoch": 0.3, "grad_norm": 0.5629779586441177, "learning_rate": 3.27537413097284e-05, "loss": 0.1766, "step": 2746 }, { "epoch": 0.3, "grad_norm": 0.42660554661674854, "learning_rate": 3.2748260532927675e-05, "loss": 0.1017, "step": 2747 }, { "epoch": 0.3, "grad_norm": 0.5684302374327078, "learning_rate": 3.27427781431337e-05, "loss": 0.165, "step": 2748 }, { "epoch": 0.3, "grad_norm": 0.4390759863088853, "learning_rate": 3.2737294141040147e-05, "loss": 0.1216, "step": 2749 }, { "epoch": 0.3, "grad_norm": 0.43582255786256824, "learning_rate": 3.273180852734087e-05, "loss": 0.1053, "step": 2750 }, { "epoch": 0.3, "grad_norm": 0.36589797825365517, "learning_rate": 3.2726321302729964e-05, "loss": 0.0894, "step": 2751 }, { "epoch": 0.3, "grad_norm": 0.3446400131204243, "learning_rate": 3.27208324679017e-05, "loss": 0.0767, "step": 2752 }, { "epoch": 0.3, "grad_norm": 0.43548492880647743, "learning_rate": 3.271534202355056e-05, "loss": 0.113, "step": 2753 }, { "epoch": 0.3, "grad_norm": 0.5204867704049548, "learning_rate": 3.270984997037123e-05, "loss": 0.124, "step": 2754 }, { "epoch": 0.3, "grad_norm": 0.4303420716418421, "learning_rate": 3.27043563090586e-05, "loss": 0.0956, "step": 2755 }, { "epoch": 0.3, "grad_norm": 0.44570487287013144, "learning_rate": 3.269886104030778e-05, "loss": 0.1401, "step": 2756 }, { "epoch": 0.3, "grad_norm": 0.5551499333063893, "learning_rate": 3.2693364164814054e-05, "loss": 0.1553, "step": 2757 }, { "epoch": 0.3, "grad_norm": 0.3827095866541863, "learning_rate": 3.268786568327291e-05, "loss": 0.0979, "step": 2758 }, { "epoch": 0.3, "grad_norm": 0.4581304682812506, "learning_rate": 3.268236559638008e-05, "loss": 0.0926, "step": 2759 }, { "epoch": 0.3, "grad_norm": 0.5469102575653246, "learning_rate": 3.2676863904831444e-05, "loss": 0.1436, "step": 2760 }, { "epoch": 0.3, "grad_norm": 0.40900453811100485, "learning_rate": 3.2671360609323136e-05, "loss": 0.1054, "step": 2761 }, { "epoch": 0.3, "grad_norm": 0.4988024795380354, "learning_rate": 3.266585571055145e-05, "loss": 0.1202, "step": 2762 }, { "epoch": 0.3, "grad_norm": 0.5551967163969994, "learning_rate": 3.26603492092129e-05, "loss": 0.1639, "step": 2763 }, { "epoch": 0.3, "grad_norm": 0.5000273577835036, "learning_rate": 3.2654841106004225e-05, "loss": 0.1415, "step": 2764 }, { "epoch": 0.3, "grad_norm": 0.42071043980436845, "learning_rate": 3.264933140162233e-05, "loss": 0.1153, "step": 2765 }, { "epoch": 0.3, "grad_norm": 0.4066074339469922, "learning_rate": 3.264382009676435e-05, "loss": 0.1057, "step": 2766 }, { "epoch": 0.3, "grad_norm": 0.3972262502716515, "learning_rate": 3.2638307192127606e-05, "loss": 0.1395, "step": 2767 }, { "epoch": 0.3, "grad_norm": 0.5021946544885326, "learning_rate": 3.263279268840961e-05, "loss": 0.1328, "step": 2768 }, { "epoch": 0.3, "grad_norm": 0.4213494099779571, "learning_rate": 3.262727658630812e-05, "loss": 0.114, "step": 2769 }, { "epoch": 0.3, "grad_norm": 0.4496451965962864, "learning_rate": 3.262175888652106e-05, "loss": 0.114, "step": 2770 }, { "epoch": 0.3, "grad_norm": 0.4808302194168351, "learning_rate": 3.2616239589746555e-05, "loss": 0.1242, "step": 2771 }, { "epoch": 0.3, "grad_norm": 0.6925492402759219, "learning_rate": 3.261071869668296e-05, "loss": 0.1731, "step": 2772 }, { "epoch": 0.3, "grad_norm": 0.48603989972663947, "learning_rate": 3.26051962080288e-05, "loss": 0.1317, "step": 2773 }, { "epoch": 0.3, "grad_norm": 0.538508227015408, "learning_rate": 3.259967212448282e-05, "loss": 0.1662, "step": 2774 }, { "epoch": 0.3, "grad_norm": 0.4637501137211498, "learning_rate": 3.2594146446743966e-05, "loss": 0.1212, "step": 2775 }, { "epoch": 0.3, "grad_norm": 0.44628954664074705, "learning_rate": 3.2588619175511387e-05, "loss": 0.1086, "step": 2776 }, { "epoch": 0.3, "grad_norm": 0.3835377343449509, "learning_rate": 3.258309031148442e-05, "loss": 0.0974, "step": 2777 }, { "epoch": 0.31, "grad_norm": 0.38260678193925757, "learning_rate": 3.2577559855362614e-05, "loss": 0.0919, "step": 2778 }, { "epoch": 0.31, "grad_norm": 0.5223054173103331, "learning_rate": 3.257202780784572e-05, "loss": 0.1133, "step": 2779 }, { "epoch": 0.31, "grad_norm": 0.4268409770180373, "learning_rate": 3.2566494169633693e-05, "loss": 0.1224, "step": 2780 }, { "epoch": 0.31, "grad_norm": 0.5543464364280746, "learning_rate": 3.2560958941426686e-05, "loss": 0.1339, "step": 2781 }, { "epoch": 0.31, "grad_norm": 0.5053580609543851, "learning_rate": 3.255542212392505e-05, "loss": 0.1352, "step": 2782 }, { "epoch": 0.31, "grad_norm": 0.47154714171298, "learning_rate": 3.254988371782933e-05, "loss": 0.1392, "step": 2783 }, { "epoch": 0.31, "grad_norm": 0.47110212201576007, "learning_rate": 3.2544343723840296e-05, "loss": 0.1028, "step": 2784 }, { "epoch": 0.31, "grad_norm": 0.4021973575074634, "learning_rate": 3.2538802142658895e-05, "loss": 0.0959, "step": 2785 }, { "epoch": 0.31, "grad_norm": 0.41639631162882434, "learning_rate": 3.253325897498629e-05, "loss": 0.103, "step": 2786 }, { "epoch": 0.31, "grad_norm": 0.4860623104334949, "learning_rate": 3.252771422152383e-05, "loss": 0.1325, "step": 2787 }, { "epoch": 0.31, "grad_norm": 0.4068732982078391, "learning_rate": 3.2522167882973085e-05, "loss": 0.0974, "step": 2788 }, { "epoch": 0.31, "grad_norm": 0.4463915216875616, "learning_rate": 3.25166199600358e-05, "loss": 0.1066, "step": 2789 }, { "epoch": 0.31, "grad_norm": 0.4860026484542864, "learning_rate": 3.251107045341395e-05, "loss": 0.1031, "step": 2790 }, { "epoch": 0.31, "grad_norm": 0.6617598159459779, "learning_rate": 3.2505519363809685e-05, "loss": 0.2198, "step": 2791 }, { "epoch": 0.31, "grad_norm": 0.45430772264933567, "learning_rate": 3.249996669192537e-05, "loss": 0.1146, "step": 2792 }, { "epoch": 0.31, "grad_norm": 0.38039694858574663, "learning_rate": 3.249441243846356e-05, "loss": 0.1055, "step": 2793 }, { "epoch": 0.31, "grad_norm": 0.44851697755764813, "learning_rate": 3.248885660412701e-05, "loss": 0.1335, "step": 2794 }, { "epoch": 0.31, "grad_norm": 0.4458850392397683, "learning_rate": 3.2483299189618705e-05, "loss": 0.1032, "step": 2795 }, { "epoch": 0.31, "grad_norm": 0.46258618608540114, "learning_rate": 3.247774019564178e-05, "loss": 0.102, "step": 2796 }, { "epoch": 0.31, "grad_norm": 0.523015093817182, "learning_rate": 3.24721796228996e-05, "loss": 0.1268, "step": 2797 }, { "epoch": 0.31, "grad_norm": 0.5075496286639818, "learning_rate": 3.2466617472095736e-05, "loss": 0.1162, "step": 2798 }, { "epoch": 0.31, "grad_norm": 0.35146832794302496, "learning_rate": 3.246105374393394e-05, "loss": 0.108, "step": 2799 }, { "epoch": 0.31, "grad_norm": 0.4337167994850244, "learning_rate": 3.245548843911817e-05, "loss": 0.1206, "step": 2800 }, { "epoch": 0.31, "grad_norm": 0.4613804386203868, "learning_rate": 3.244992155835259e-05, "loss": 0.1192, "step": 2801 }, { "epoch": 0.31, "grad_norm": 0.5545617619233006, "learning_rate": 3.244435310234156e-05, "loss": 0.1355, "step": 2802 }, { "epoch": 0.31, "grad_norm": 0.49265364114183713, "learning_rate": 3.2438783071789626e-05, "loss": 0.0972, "step": 2803 }, { "epoch": 0.31, "grad_norm": 0.4492665472724663, "learning_rate": 3.243321146740155e-05, "loss": 0.1161, "step": 2804 }, { "epoch": 0.31, "grad_norm": 0.40875608235538013, "learning_rate": 3.242763828988229e-05, "loss": 0.0981, "step": 2805 }, { "epoch": 0.31, "grad_norm": 0.6740897734902612, "learning_rate": 3.2422063539937006e-05, "loss": 0.1243, "step": 2806 }, { "epoch": 0.31, "grad_norm": 0.4105781927967041, "learning_rate": 3.241648721827104e-05, "loss": 0.0913, "step": 2807 }, { "epoch": 0.31, "grad_norm": 0.48740071055850054, "learning_rate": 3.2410909325589954e-05, "loss": 0.1293, "step": 2808 }, { "epoch": 0.31, "grad_norm": 0.4675718442515754, "learning_rate": 3.2405329862599494e-05, "loss": 0.1245, "step": 2809 }, { "epoch": 0.31, "grad_norm": 0.4567423176174175, "learning_rate": 3.239974883000561e-05, "loss": 0.1248, "step": 2810 }, { "epoch": 0.31, "grad_norm": 0.470729526396674, "learning_rate": 3.2394166228514455e-05, "loss": 0.1223, "step": 2811 }, { "epoch": 0.31, "grad_norm": 0.43041775811614913, "learning_rate": 3.2388582058832375e-05, "loss": 0.1147, "step": 2812 }, { "epoch": 0.31, "grad_norm": 0.4887495008207538, "learning_rate": 3.238299632166591e-05, "loss": 0.1306, "step": 2813 }, { "epoch": 0.31, "grad_norm": 0.402312083062914, "learning_rate": 3.237740901772181e-05, "loss": 0.0886, "step": 2814 }, { "epoch": 0.31, "grad_norm": 0.41691935347619574, "learning_rate": 3.2371820147707024e-05, "loss": 0.0975, "step": 2815 }, { "epoch": 0.31, "grad_norm": 0.37978861996755153, "learning_rate": 3.2366229712328675e-05, "loss": 0.0797, "step": 2816 }, { "epoch": 0.31, "grad_norm": 0.45591320318229994, "learning_rate": 3.2360637712294106e-05, "loss": 0.102, "step": 2817 }, { "epoch": 0.31, "grad_norm": 0.4221487922910525, "learning_rate": 3.235504414831087e-05, "loss": 0.119, "step": 2818 }, { "epoch": 0.31, "grad_norm": 0.5546881984652567, "learning_rate": 3.234944902108669e-05, "loss": 0.1255, "step": 2819 }, { "epoch": 0.31, "grad_norm": 0.4819468425873679, "learning_rate": 3.234385233132949e-05, "loss": 0.1332, "step": 2820 }, { "epoch": 0.31, "grad_norm": 0.4753462796619483, "learning_rate": 3.233825407974741e-05, "loss": 0.1248, "step": 2821 }, { "epoch": 0.31, "grad_norm": 0.44142915446853137, "learning_rate": 3.233265426704877e-05, "loss": 0.0891, "step": 2822 }, { "epoch": 0.31, "grad_norm": 0.4527268798313024, "learning_rate": 3.2327052893942106e-05, "loss": 0.1097, "step": 2823 }, { "epoch": 0.31, "grad_norm": 0.35260125454367347, "learning_rate": 3.232144996113613e-05, "loss": 0.0854, "step": 2824 }, { "epoch": 0.31, "grad_norm": 0.3840752231237956, "learning_rate": 3.2315845469339766e-05, "loss": 0.0917, "step": 2825 }, { "epoch": 0.31, "grad_norm": 0.5943127525137295, "learning_rate": 3.231023941926213e-05, "loss": 0.1714, "step": 2826 }, { "epoch": 0.31, "grad_norm": 0.6812312254812225, "learning_rate": 3.230463181161254e-05, "loss": 0.1635, "step": 2827 }, { "epoch": 0.31, "grad_norm": 0.39237116582284726, "learning_rate": 3.22990226471005e-05, "loss": 0.0867, "step": 2828 }, { "epoch": 0.31, "grad_norm": 0.5052053870937787, "learning_rate": 3.229341192643572e-05, "loss": 0.1575, "step": 2829 }, { "epoch": 0.31, "grad_norm": 0.4376682230555415, "learning_rate": 3.2287799650328116e-05, "loss": 0.1048, "step": 2830 }, { "epoch": 0.31, "grad_norm": 0.6014293176434732, "learning_rate": 3.2282185819487774e-05, "loss": 0.157, "step": 2831 }, { "epoch": 0.31, "grad_norm": 0.42840982694173985, "learning_rate": 3.2276570434625e-05, "loss": 0.0969, "step": 2832 }, { "epoch": 0.31, "grad_norm": 0.4402574595751394, "learning_rate": 3.227095349645029e-05, "loss": 0.1243, "step": 2833 }, { "epoch": 0.31, "grad_norm": 0.4741245823583516, "learning_rate": 3.226533500567433e-05, "loss": 0.1467, "step": 2834 }, { "epoch": 0.31, "grad_norm": 0.36149475791765706, "learning_rate": 3.225971496300802e-05, "loss": 0.0948, "step": 2835 }, { "epoch": 0.31, "grad_norm": 0.6467126172575175, "learning_rate": 3.2254093369162425e-05, "loss": 0.1726, "step": 2836 }, { "epoch": 0.31, "grad_norm": 0.45203656981654433, "learning_rate": 3.2248470224848846e-05, "loss": 0.1292, "step": 2837 }, { "epoch": 0.31, "grad_norm": 0.44120203205930697, "learning_rate": 3.2242845530778755e-05, "loss": 0.1327, "step": 2838 }, { "epoch": 0.31, "grad_norm": 0.36369861705234957, "learning_rate": 3.223721928766381e-05, "loss": 0.0896, "step": 2839 }, { "epoch": 0.31, "grad_norm": 0.37016563802965796, "learning_rate": 3.22315914962159e-05, "loss": 0.0971, "step": 2840 }, { "epoch": 0.31, "grad_norm": 0.38292713784327553, "learning_rate": 3.222596215714708e-05, "loss": 0.1093, "step": 2841 }, { "epoch": 0.31, "grad_norm": 0.3956013321689686, "learning_rate": 3.2220331271169614e-05, "loss": 0.1062, "step": 2842 }, { "epoch": 0.31, "grad_norm": 0.4573578279059922, "learning_rate": 3.2214698838995956e-05, "loss": 0.088, "step": 2843 }, { "epoch": 0.31, "grad_norm": 0.4162445513050773, "learning_rate": 3.220906486133876e-05, "loss": 0.1044, "step": 2844 }, { "epoch": 0.31, "grad_norm": 0.38693916897913666, "learning_rate": 3.2203429338910876e-05, "loss": 0.0889, "step": 2845 }, { "epoch": 0.31, "grad_norm": 0.5879779129875491, "learning_rate": 3.219779227242534e-05, "loss": 0.1585, "step": 2846 }, { "epoch": 0.31, "grad_norm": 0.4495205073022704, "learning_rate": 3.21921536625954e-05, "loss": 0.0807, "step": 2847 }, { "epoch": 0.31, "grad_norm": 0.520493040179389, "learning_rate": 3.218651351013447e-05, "loss": 0.1081, "step": 2848 }, { "epoch": 0.31, "grad_norm": 0.5838184439909173, "learning_rate": 3.2180871815756204e-05, "loss": 0.137, "step": 2849 }, { "epoch": 0.31, "grad_norm": 0.46657627271903684, "learning_rate": 3.217522858017442e-05, "loss": 0.0933, "step": 2850 }, { "epoch": 0.31, "grad_norm": 0.5860431830147785, "learning_rate": 3.216958380410313e-05, "loss": 0.1351, "step": 2851 }, { "epoch": 0.31, "grad_norm": 0.5241478043373128, "learning_rate": 3.216393748825654e-05, "loss": 0.1573, "step": 2852 }, { "epoch": 0.31, "grad_norm": 0.35970154729037884, "learning_rate": 3.2158289633349076e-05, "loss": 0.0754, "step": 2853 }, { "epoch": 0.31, "grad_norm": 0.42326465189719575, "learning_rate": 3.2152640240095335e-05, "loss": 0.0842, "step": 2854 }, { "epoch": 0.31, "grad_norm": 0.46363169263800413, "learning_rate": 3.214698930921011e-05, "loss": 0.0894, "step": 2855 }, { "epoch": 0.31, "grad_norm": 0.41219970653237525, "learning_rate": 3.2141336841408406e-05, "loss": 0.0735, "step": 2856 }, { "epoch": 0.31, "grad_norm": 0.5205301992100385, "learning_rate": 3.2135682837405403e-05, "loss": 0.1313, "step": 2857 }, { "epoch": 0.31, "grad_norm": 0.4775367533934139, "learning_rate": 3.2130027297916476e-05, "loss": 0.1253, "step": 2858 }, { "epoch": 0.31, "grad_norm": 0.4645744633647556, "learning_rate": 3.2124370223657205e-05, "loss": 0.1098, "step": 2859 }, { "epoch": 0.31, "grad_norm": 0.41242256738250876, "learning_rate": 3.2118711615343366e-05, "loss": 0.085, "step": 2860 }, { "epoch": 0.31, "grad_norm": 0.5825409673438261, "learning_rate": 3.211305147369092e-05, "loss": 0.1389, "step": 2861 }, { "epoch": 0.31, "grad_norm": 0.439102796151205, "learning_rate": 3.210738979941603e-05, "loss": 0.131, "step": 2862 }, { "epoch": 0.31, "grad_norm": 0.4764649807319358, "learning_rate": 3.210172659323504e-05, "loss": 0.1226, "step": 2863 }, { "epoch": 0.31, "grad_norm": 0.3781231919552791, "learning_rate": 3.2096061855864485e-05, "loss": 0.0779, "step": 2864 }, { "epoch": 0.31, "grad_norm": 0.5102179555685242, "learning_rate": 3.209039558802113e-05, "loss": 0.1239, "step": 2865 }, { "epoch": 0.31, "grad_norm": 0.48636020416719494, "learning_rate": 3.2084727790421895e-05, "loss": 0.1267, "step": 2866 }, { "epoch": 0.31, "grad_norm": 0.49665977447109855, "learning_rate": 3.2079058463783906e-05, "loss": 0.1205, "step": 2867 }, { "epoch": 0.31, "grad_norm": 0.448465910619679, "learning_rate": 3.207338760882448e-05, "loss": 0.1105, "step": 2868 }, { "epoch": 0.32, "grad_norm": 0.4836145091492826, "learning_rate": 3.206771522626115e-05, "loss": 0.1269, "step": 2869 }, { "epoch": 0.32, "grad_norm": 0.36375675050954004, "learning_rate": 3.20620413168116e-05, "loss": 0.0792, "step": 2870 }, { "epoch": 0.32, "grad_norm": 0.4618209680536433, "learning_rate": 3.205636588119373e-05, "loss": 0.1184, "step": 2871 }, { "epoch": 0.32, "grad_norm": 0.46122777818116756, "learning_rate": 3.205068892012565e-05, "loss": 0.117, "step": 2872 }, { "epoch": 0.32, "grad_norm": 0.49463767282111265, "learning_rate": 3.204501043432565e-05, "loss": 0.1104, "step": 2873 }, { "epoch": 0.32, "grad_norm": 0.48935657349604333, "learning_rate": 3.203933042451218e-05, "loss": 0.1182, "step": 2874 }, { "epoch": 0.32, "grad_norm": 0.42880548638069527, "learning_rate": 3.203364889140393e-05, "loss": 0.1087, "step": 2875 }, { "epoch": 0.32, "grad_norm": 0.4579786767212388, "learning_rate": 3.202796583571977e-05, "loss": 0.1207, "step": 2876 }, { "epoch": 0.32, "grad_norm": 0.5125150527139145, "learning_rate": 3.2022281258178754e-05, "loss": 0.1431, "step": 2877 }, { "epoch": 0.32, "grad_norm": 0.5530907119209748, "learning_rate": 3.2016595159500127e-05, "loss": 0.1435, "step": 2878 }, { "epoch": 0.32, "grad_norm": 0.5459456039552176, "learning_rate": 3.2010907540403335e-05, "loss": 0.1314, "step": 2879 }, { "epoch": 0.32, "grad_norm": 0.49508681947171745, "learning_rate": 3.2005218401608006e-05, "loss": 0.138, "step": 2880 }, { "epoch": 0.32, "grad_norm": 0.42446159451237536, "learning_rate": 3.1999527743833975e-05, "loss": 0.1169, "step": 2881 }, { "epoch": 0.32, "grad_norm": 0.3389935216748857, "learning_rate": 3.1993835567801266e-05, "loss": 0.0604, "step": 2882 }, { "epoch": 0.32, "grad_norm": 0.4630287704674285, "learning_rate": 3.198814187423007e-05, "loss": 0.1177, "step": 2883 }, { "epoch": 0.32, "grad_norm": 0.4593882630178724, "learning_rate": 3.19824466638408e-05, "loss": 0.1328, "step": 2884 }, { "epoch": 0.32, "grad_norm": 0.4489455843780021, "learning_rate": 3.197674993735407e-05, "loss": 0.1213, "step": 2885 }, { "epoch": 0.32, "grad_norm": 0.39465177224796905, "learning_rate": 3.1971051695490644e-05, "loss": 0.1138, "step": 2886 }, { "epoch": 0.32, "grad_norm": 0.6270922212403857, "learning_rate": 3.196535193897151e-05, "loss": 0.1533, "step": 2887 }, { "epoch": 0.32, "grad_norm": 0.33597799544998475, "learning_rate": 3.195965066851784e-05, "loss": 0.0938, "step": 2888 }, { "epoch": 0.32, "grad_norm": 0.4547840199488066, "learning_rate": 3.195394788485099e-05, "loss": 0.1197, "step": 2889 }, { "epoch": 0.32, "grad_norm": 0.4167497095165373, "learning_rate": 3.194824358869252e-05, "loss": 0.0918, "step": 2890 }, { "epoch": 0.32, "grad_norm": 0.4186681261729406, "learning_rate": 3.194253778076417e-05, "loss": 0.1231, "step": 2891 }, { "epoch": 0.32, "grad_norm": 0.3788548267510137, "learning_rate": 3.1936830461787866e-05, "loss": 0.1055, "step": 2892 }, { "epoch": 0.32, "grad_norm": 0.45524410773564244, "learning_rate": 3.193112163248576e-05, "loss": 0.1068, "step": 2893 }, { "epoch": 0.32, "grad_norm": 0.4242718391147415, "learning_rate": 3.192541129358014e-05, "loss": 0.1014, "step": 2894 }, { "epoch": 0.32, "grad_norm": 0.4096755162340635, "learning_rate": 3.191969944579355e-05, "loss": 0.0952, "step": 2895 }, { "epoch": 0.32, "grad_norm": 0.409126697104361, "learning_rate": 3.191398608984867e-05, "loss": 0.1122, "step": 2896 }, { "epoch": 0.32, "grad_norm": 0.38579875676111497, "learning_rate": 3.1908271226468395e-05, "loss": 0.1146, "step": 2897 }, { "epoch": 0.32, "grad_norm": 0.38031963347967596, "learning_rate": 3.19025548563758e-05, "loss": 0.1034, "step": 2898 }, { "epoch": 0.32, "grad_norm": 0.471395877095604, "learning_rate": 3.189683698029416e-05, "loss": 0.1116, "step": 2899 }, { "epoch": 0.32, "grad_norm": 0.5635761986081148, "learning_rate": 3.189111759894695e-05, "loss": 0.1578, "step": 2900 }, { "epoch": 0.32, "grad_norm": 0.46752206184227274, "learning_rate": 3.188539671305781e-05, "loss": 0.0992, "step": 2901 }, { "epoch": 0.32, "grad_norm": 0.4336949479513155, "learning_rate": 3.1879674323350594e-05, "loss": 0.0952, "step": 2902 }, { "epoch": 0.32, "grad_norm": 0.45388483597738255, "learning_rate": 3.187395043054932e-05, "loss": 0.1088, "step": 2903 }, { "epoch": 0.32, "grad_norm": 0.4341001903739148, "learning_rate": 3.186822503537823e-05, "loss": 0.1232, "step": 2904 }, { "epoch": 0.32, "grad_norm": 0.4210084209438096, "learning_rate": 3.186249813856173e-05, "loss": 0.1028, "step": 2905 }, { "epoch": 0.32, "grad_norm": 0.4711448527844678, "learning_rate": 3.1856769740824426e-05, "loss": 0.1076, "step": 2906 }, { "epoch": 0.32, "grad_norm": 0.5267324790195119, "learning_rate": 3.1851039842891116e-05, "loss": 0.0979, "step": 2907 }, { "epoch": 0.32, "grad_norm": 0.4197890315513478, "learning_rate": 3.184530844548678e-05, "loss": 0.1, "step": 2908 }, { "epoch": 0.32, "grad_norm": 0.5446062712251911, "learning_rate": 3.183957554933659e-05, "loss": 0.1078, "step": 2909 }, { "epoch": 0.32, "grad_norm": 0.44408810589364317, "learning_rate": 3.183384115516591e-05, "loss": 0.1134, "step": 2910 }, { "epoch": 0.32, "grad_norm": 0.43406895209090485, "learning_rate": 3.18281052637003e-05, "loss": 0.0888, "step": 2911 }, { "epoch": 0.32, "grad_norm": 0.6769660090008992, "learning_rate": 3.182236787566549e-05, "loss": 0.1779, "step": 2912 }, { "epoch": 0.32, "grad_norm": 0.40729635541644726, "learning_rate": 3.181662899178743e-05, "loss": 0.0862, "step": 2913 }, { "epoch": 0.32, "grad_norm": 0.42780672619798754, "learning_rate": 3.181088861279222e-05, "loss": 0.1087, "step": 2914 }, { "epoch": 0.32, "grad_norm": 0.5031914779625248, "learning_rate": 3.180514673940619e-05, "loss": 0.1434, "step": 2915 }, { "epoch": 0.32, "grad_norm": 0.416027194296565, "learning_rate": 3.179940337235582e-05, "loss": 0.1107, "step": 2916 }, { "epoch": 0.32, "grad_norm": 0.4858639811745089, "learning_rate": 3.179365851236781e-05, "loss": 0.1286, "step": 2917 }, { "epoch": 0.32, "grad_norm": 0.40715823984007565, "learning_rate": 3.178791216016904e-05, "loss": 0.1262, "step": 2918 }, { "epoch": 0.32, "grad_norm": 0.37182254501447454, "learning_rate": 3.1782164316486566e-05, "loss": 0.1141, "step": 2919 }, { "epoch": 0.32, "grad_norm": 0.3794245763245099, "learning_rate": 3.177641498204765e-05, "loss": 0.1016, "step": 2920 }, { "epoch": 0.32, "grad_norm": 0.48080410922519184, "learning_rate": 3.177066415757974e-05, "loss": 0.1414, "step": 2921 }, { "epoch": 0.32, "grad_norm": 0.5072801587355023, "learning_rate": 3.1764911843810456e-05, "loss": 0.179, "step": 2922 }, { "epoch": 0.32, "grad_norm": 0.43148695683022004, "learning_rate": 3.175915804146763e-05, "loss": 0.1051, "step": 2923 }, { "epoch": 0.32, "grad_norm": 0.4481569263093252, "learning_rate": 3.175340275127925e-05, "loss": 0.1208, "step": 2924 }, { "epoch": 0.32, "grad_norm": 0.3930988559004642, "learning_rate": 3.174764597397354e-05, "loss": 0.096, "step": 2925 }, { "epoch": 0.32, "grad_norm": 0.4133310154624265, "learning_rate": 3.1741887710278874e-05, "loss": 0.083, "step": 2926 }, { "epoch": 0.32, "grad_norm": 0.4863014060651003, "learning_rate": 3.1736127960923815e-05, "loss": 0.0997, "step": 2927 }, { "epoch": 0.32, "grad_norm": 0.4427860603165456, "learning_rate": 3.173036672663714e-05, "loss": 0.1048, "step": 2928 }, { "epoch": 0.32, "grad_norm": 0.4855828607164218, "learning_rate": 3.172460400814778e-05, "loss": 0.1169, "step": 2929 }, { "epoch": 0.32, "grad_norm": 0.45067012018664165, "learning_rate": 3.17188398061849e-05, "loss": 0.1268, "step": 2930 }, { "epoch": 0.32, "grad_norm": 0.5605033299349742, "learning_rate": 3.1713074121477794e-05, "loss": 0.1664, "step": 2931 }, { "epoch": 0.32, "grad_norm": 0.5654894545201731, "learning_rate": 3.170730695475599e-05, "loss": 0.1256, "step": 2932 }, { "epoch": 0.32, "grad_norm": 0.46496657343668496, "learning_rate": 3.1701538306749186e-05, "loss": 0.1145, "step": 2933 }, { "epoch": 0.32, "grad_norm": 0.4540724222000948, "learning_rate": 3.1695768178187267e-05, "loss": 0.1169, "step": 2934 }, { "epoch": 0.32, "grad_norm": 0.462436727113538, "learning_rate": 3.168999656980031e-05, "loss": 0.1154, "step": 2935 }, { "epoch": 0.32, "grad_norm": 0.4836513589738762, "learning_rate": 3.168422348231857e-05, "loss": 0.1071, "step": 2936 }, { "epoch": 0.32, "grad_norm": 0.5527361273316552, "learning_rate": 3.167844891647251e-05, "loss": 0.1331, "step": 2937 }, { "epoch": 0.32, "grad_norm": 0.4452489506400164, "learning_rate": 3.1672672872992755e-05, "loss": 0.1034, "step": 2938 }, { "epoch": 0.32, "grad_norm": 0.49691749127157675, "learning_rate": 3.1666895352610126e-05, "loss": 0.1136, "step": 2939 }, { "epoch": 0.32, "grad_norm": 0.4144521355504801, "learning_rate": 3.166111635605564e-05, "loss": 0.1044, "step": 2940 }, { "epoch": 0.32, "grad_norm": 0.49526216991324307, "learning_rate": 3.165533588406049e-05, "loss": 0.1517, "step": 2941 }, { "epoch": 0.32, "grad_norm": 0.43031213614920066, "learning_rate": 3.164955393735605e-05, "loss": 0.0859, "step": 2942 }, { "epoch": 0.32, "grad_norm": 0.4337079696739261, "learning_rate": 3.16437705166739e-05, "loss": 0.1198, "step": 2943 }, { "epoch": 0.32, "grad_norm": 0.4739767182805181, "learning_rate": 3.1637985622745795e-05, "loss": 0.0891, "step": 2944 }, { "epoch": 0.32, "grad_norm": 0.38762596913117836, "learning_rate": 3.163219925630368e-05, "loss": 0.0831, "step": 2945 }, { "epoch": 0.32, "grad_norm": 0.401134674219508, "learning_rate": 3.1626411418079684e-05, "loss": 0.1198, "step": 2946 }, { "epoch": 0.32, "grad_norm": 0.39292806714341305, "learning_rate": 3.162062210880611e-05, "loss": 0.1199, "step": 2947 }, { "epoch": 0.32, "grad_norm": 0.4449422200334777, "learning_rate": 3.1614831329215475e-05, "loss": 0.0911, "step": 2948 }, { "epoch": 0.32, "grad_norm": 0.5414378312493097, "learning_rate": 3.1609039080040455e-05, "loss": 0.1068, "step": 2949 }, { "epoch": 0.32, "grad_norm": 0.35962108810943744, "learning_rate": 3.160324536201393e-05, "loss": 0.1037, "step": 2950 }, { "epoch": 0.32, "grad_norm": 0.48035682366989074, "learning_rate": 3.159745017586897e-05, "loss": 0.106, "step": 2951 }, { "epoch": 0.32, "grad_norm": 0.571358961736077, "learning_rate": 3.159165352233879e-05, "loss": 0.1354, "step": 2952 }, { "epoch": 0.32, "grad_norm": 0.47375772932888566, "learning_rate": 3.158585540215684e-05, "loss": 0.1019, "step": 2953 }, { "epoch": 0.32, "grad_norm": 0.46424789065044625, "learning_rate": 3.158005581605673e-05, "loss": 0.0886, "step": 2954 }, { "epoch": 0.32, "grad_norm": 0.4505474343651107, "learning_rate": 3.157425476477227e-05, "loss": 0.1106, "step": 2955 }, { "epoch": 0.32, "grad_norm": 0.5112918208488875, "learning_rate": 3.156845224903745e-05, "loss": 0.1304, "step": 2956 }, { "epoch": 0.32, "grad_norm": 0.4139795040226315, "learning_rate": 3.156264826958642e-05, "loss": 0.1017, "step": 2957 }, { "epoch": 0.32, "grad_norm": 0.4014890907962274, "learning_rate": 3.1556842827153556e-05, "loss": 0.096, "step": 2958 }, { "epoch": 0.32, "grad_norm": 0.5446101291614608, "learning_rate": 3.15510359224734e-05, "loss": 0.1051, "step": 2959 }, { "epoch": 0.33, "grad_norm": 0.41049852168134116, "learning_rate": 3.154522755628067e-05, "loss": 0.1093, "step": 2960 }, { "epoch": 0.33, "grad_norm": 0.5497611318244457, "learning_rate": 3.153941772931029e-05, "loss": 0.1161, "step": 2961 }, { "epoch": 0.33, "grad_norm": 0.40641058902264804, "learning_rate": 3.153360644229735e-05, "loss": 0.0898, "step": 2962 }, { "epoch": 0.33, "grad_norm": 0.40112083652697644, "learning_rate": 3.1527793695977124e-05, "loss": 0.109, "step": 2963 }, { "epoch": 0.33, "grad_norm": 0.5210666769564253, "learning_rate": 3.1521979491085095e-05, "loss": 0.1151, "step": 2964 }, { "epoch": 0.33, "grad_norm": 0.5178861744968977, "learning_rate": 3.1516163828356915e-05, "loss": 0.1594, "step": 2965 }, { "epoch": 0.33, "grad_norm": 0.4173312450322156, "learning_rate": 3.15103467085284e-05, "loss": 0.1113, "step": 2966 }, { "epoch": 0.33, "grad_norm": 0.4244110212642795, "learning_rate": 3.150452813233558e-05, "loss": 0.1138, "step": 2967 }, { "epoch": 0.33, "grad_norm": 0.38501936393195807, "learning_rate": 3.149870810051467e-05, "loss": 0.0866, "step": 2968 }, { "epoch": 0.33, "grad_norm": 0.5662222991045568, "learning_rate": 3.1492886613802045e-05, "loss": 0.1209, "step": 2969 }, { "epoch": 0.33, "grad_norm": 0.504351072110167, "learning_rate": 3.148706367293428e-05, "loss": 0.1275, "step": 2970 }, { "epoch": 0.33, "grad_norm": 0.5069101385879063, "learning_rate": 3.148123927864814e-05, "loss": 0.1491, "step": 2971 }, { "epoch": 0.33, "grad_norm": 0.5331641661555634, "learning_rate": 3.147541343168055e-05, "loss": 0.149, "step": 2972 }, { "epoch": 0.33, "grad_norm": 0.4708691537644511, "learning_rate": 3.1469586132768656e-05, "loss": 0.1124, "step": 2973 }, { "epoch": 0.33, "grad_norm": 0.5286898915640014, "learning_rate": 3.146375738264975e-05, "loss": 0.1011, "step": 2974 }, { "epoch": 0.33, "grad_norm": 0.4905430707132425, "learning_rate": 3.145792718206132e-05, "loss": 0.1567, "step": 2975 }, { "epoch": 0.33, "grad_norm": 0.49901026878226007, "learning_rate": 3.145209553174105e-05, "loss": 0.1483, "step": 2976 }, { "epoch": 0.33, "grad_norm": 0.4107647151818684, "learning_rate": 3.14462624324268e-05, "loss": 0.1185, "step": 2977 }, { "epoch": 0.33, "grad_norm": 0.42616440218379914, "learning_rate": 3.14404278848566e-05, "loss": 0.0877, "step": 2978 }, { "epoch": 0.33, "grad_norm": 0.49872181356227585, "learning_rate": 3.143459188976869e-05, "loss": 0.102, "step": 2979 }, { "epoch": 0.33, "grad_norm": 0.44638180760277624, "learning_rate": 3.142875444790147e-05, "loss": 0.1224, "step": 2980 }, { "epoch": 0.33, "grad_norm": 0.38642494523558324, "learning_rate": 3.1422915559993534e-05, "loss": 0.0754, "step": 2981 }, { "epoch": 0.33, "grad_norm": 0.4505590264626607, "learning_rate": 3.141707522678365e-05, "loss": 0.1216, "step": 2982 }, { "epoch": 0.33, "grad_norm": 0.42332581669537106, "learning_rate": 3.141123344901079e-05, "loss": 0.0955, "step": 2983 }, { "epoch": 0.33, "grad_norm": 0.3898175095038572, "learning_rate": 3.140539022741408e-05, "loss": 0.1052, "step": 2984 }, { "epoch": 0.33, "grad_norm": 0.4274599422961613, "learning_rate": 3.139954556273284e-05, "loss": 0.0855, "step": 2985 }, { "epoch": 0.33, "grad_norm": 0.3745110025610411, "learning_rate": 3.139369945570659e-05, "loss": 0.1003, "step": 2986 }, { "epoch": 0.33, "grad_norm": 0.5244706618810534, "learning_rate": 3.138785190707501e-05, "loss": 0.1236, "step": 2987 }, { "epoch": 0.33, "grad_norm": 0.3993838235838536, "learning_rate": 3.138200291757797e-05, "loss": 0.1152, "step": 2988 }, { "epoch": 0.33, "grad_norm": 0.565500811647934, "learning_rate": 3.1376152487955516e-05, "loss": 0.1234, "step": 2989 }, { "epoch": 0.33, "grad_norm": 0.39855446688085017, "learning_rate": 3.137030061894789e-05, "loss": 0.1068, "step": 2990 }, { "epoch": 0.33, "grad_norm": 0.43042078737549466, "learning_rate": 3.1364447311295514e-05, "loss": 0.0912, "step": 2991 }, { "epoch": 0.33, "grad_norm": 0.45558722510814526, "learning_rate": 3.135859256573898e-05, "loss": 0.1272, "step": 2992 }, { "epoch": 0.33, "grad_norm": 0.5314258677028182, "learning_rate": 3.135273638301906e-05, "loss": 0.1414, "step": 2993 }, { "epoch": 0.33, "grad_norm": 0.37565773661006496, "learning_rate": 3.134687876387673e-05, "loss": 0.1, "step": 2994 }, { "epoch": 0.33, "grad_norm": 0.46104462235519333, "learning_rate": 3.134101970905313e-05, "loss": 0.1144, "step": 2995 }, { "epoch": 0.33, "grad_norm": 0.49527339239396323, "learning_rate": 3.1335159219289585e-05, "loss": 0.111, "step": 2996 }, { "epoch": 0.33, "grad_norm": 0.40770607213618226, "learning_rate": 3.132929729532761e-05, "loss": 0.0986, "step": 2997 }, { "epoch": 0.33, "grad_norm": 0.43585775596261883, "learning_rate": 3.132343393790887e-05, "loss": 0.1067, "step": 2998 }, { "epoch": 0.33, "grad_norm": 0.38173871375776935, "learning_rate": 3.131756914777527e-05, "loss": 0.1107, "step": 2999 }, { "epoch": 0.33, "grad_norm": 0.49243572575733324, "learning_rate": 3.131170292566883e-05, "loss": 0.109, "step": 3000 }, { "epoch": 0.33, "grad_norm": 0.3982948346999536, "learning_rate": 3.130583527233181e-05, "loss": 0.0887, "step": 3001 }, { "epoch": 0.33, "grad_norm": 0.4616214397922201, "learning_rate": 3.12999661885066e-05, "loss": 0.1206, "step": 3002 }, { "epoch": 0.33, "grad_norm": 0.49812576566706723, "learning_rate": 3.129409567493581e-05, "loss": 0.1277, "step": 3003 }, { "epoch": 0.33, "grad_norm": 0.43991502011648514, "learning_rate": 3.12882237323622e-05, "loss": 0.1096, "step": 3004 }, { "epoch": 0.33, "grad_norm": 0.47991013035786234, "learning_rate": 3.128235036152875e-05, "loss": 0.1153, "step": 3005 }, { "epoch": 0.33, "grad_norm": 0.361143033845957, "learning_rate": 3.127647556317858e-05, "loss": 0.0807, "step": 3006 }, { "epoch": 0.33, "grad_norm": 0.44795305821345316, "learning_rate": 3.127059933805502e-05, "loss": 0.1059, "step": 3007 }, { "epoch": 0.33, "grad_norm": 0.3714933352187029, "learning_rate": 3.126472168690156e-05, "loss": 0.0833, "step": 3008 }, { "epoch": 0.33, "grad_norm": 0.40408548182061005, "learning_rate": 3.125884261046188e-05, "loss": 0.0964, "step": 3009 }, { "epoch": 0.33, "grad_norm": 0.3900624229530096, "learning_rate": 3.125296210947983e-05, "loss": 0.0998, "step": 3010 }, { "epoch": 0.33, "grad_norm": 0.47855207343349315, "learning_rate": 3.124708018469947e-05, "loss": 0.1206, "step": 3011 }, { "epoch": 0.33, "grad_norm": 0.4298627496012635, "learning_rate": 3.1241196836865e-05, "loss": 0.1036, "step": 3012 }, { "epoch": 0.33, "grad_norm": 0.3456469021353894, "learning_rate": 3.123531206672083e-05, "loss": 0.1003, "step": 3013 }, { "epoch": 0.33, "grad_norm": 0.4107678530987156, "learning_rate": 3.1229425875011534e-05, "loss": 0.1049, "step": 3014 }, { "epoch": 0.33, "grad_norm": 0.45278337359952275, "learning_rate": 3.122353826248188e-05, "loss": 0.0936, "step": 3015 }, { "epoch": 0.33, "grad_norm": 0.45648093388592437, "learning_rate": 3.12176492298768e-05, "loss": 0.0872, "step": 3016 }, { "epoch": 0.33, "grad_norm": 0.3808449766909785, "learning_rate": 3.1211758777941415e-05, "loss": 0.0757, "step": 3017 }, { "epoch": 0.33, "grad_norm": 0.33018857473631386, "learning_rate": 3.120586690742102e-05, "loss": 0.0721, "step": 3018 }, { "epoch": 0.33, "grad_norm": 0.46929928068532223, "learning_rate": 3.11999736190611e-05, "loss": 0.114, "step": 3019 }, { "epoch": 0.33, "grad_norm": 0.42938155639840064, "learning_rate": 3.119407891360732e-05, "loss": 0.1022, "step": 3020 }, { "epoch": 0.33, "grad_norm": 0.32270341671587993, "learning_rate": 3.118818279180549e-05, "loss": 0.0838, "step": 3021 }, { "epoch": 0.33, "grad_norm": 0.548832496272697, "learning_rate": 3.118228525440165e-05, "loss": 0.1125, "step": 3022 }, { "epoch": 0.33, "grad_norm": 0.4936748163709445, "learning_rate": 3.117638630214198e-05, "loss": 0.1083, "step": 3023 }, { "epoch": 0.33, "grad_norm": 0.3778902176150338, "learning_rate": 3.1170485935772864e-05, "loss": 0.09, "step": 3024 }, { "epoch": 0.33, "grad_norm": 0.43850738485026913, "learning_rate": 3.116458415604085e-05, "loss": 0.1021, "step": 3025 }, { "epoch": 0.33, "grad_norm": 0.3726730071004992, "learning_rate": 3.1158680963692676e-05, "loss": 0.0871, "step": 3026 }, { "epoch": 0.33, "grad_norm": 0.41160556315924834, "learning_rate": 3.115277635947524e-05, "loss": 0.0982, "step": 3027 }, { "epoch": 0.33, "grad_norm": 0.40699768953112764, "learning_rate": 3.114687034413564e-05, "loss": 0.086, "step": 3028 }, { "epoch": 0.33, "grad_norm": 0.45263826602477925, "learning_rate": 3.114096291842114e-05, "loss": 0.0912, "step": 3029 }, { "epoch": 0.33, "grad_norm": 0.6396457118784367, "learning_rate": 3.1135054083079194e-05, "loss": 0.1593, "step": 3030 }, { "epoch": 0.33, "grad_norm": 0.412874659262969, "learning_rate": 3.1129143838857417e-05, "loss": 0.1139, "step": 3031 }, { "epoch": 0.33, "grad_norm": 0.4598995094829215, "learning_rate": 3.112323218650362e-05, "loss": 0.1226, "step": 3032 }, { "epoch": 0.33, "grad_norm": 0.31994202751805884, "learning_rate": 3.111731912676577e-05, "loss": 0.0882, "step": 3033 }, { "epoch": 0.33, "grad_norm": 0.433226833950231, "learning_rate": 3.111140466039205e-05, "loss": 0.1066, "step": 3034 }, { "epoch": 0.33, "grad_norm": 0.4237915058401346, "learning_rate": 3.1105488788130774e-05, "loss": 0.0937, "step": 3035 }, { "epoch": 0.33, "grad_norm": 0.4376426191839178, "learning_rate": 3.1099571510730466e-05, "loss": 0.1243, "step": 3036 }, { "epoch": 0.33, "grad_norm": 0.3749582148160103, "learning_rate": 3.109365282893982e-05, "loss": 0.089, "step": 3037 }, { "epoch": 0.33, "grad_norm": 0.4552989963998003, "learning_rate": 3.1087732743507704e-05, "loss": 0.1071, "step": 3038 }, { "epoch": 0.33, "grad_norm": 0.4040797843983112, "learning_rate": 3.108181125518316e-05, "loss": 0.1025, "step": 3039 }, { "epoch": 0.33, "grad_norm": 0.3628885751590384, "learning_rate": 3.107588836471542e-05, "loss": 0.0799, "step": 3040 }, { "epoch": 0.33, "grad_norm": 0.40287186859125884, "learning_rate": 3.106996407285389e-05, "loss": 0.0883, "step": 3041 }, { "epoch": 0.33, "grad_norm": 0.6091119981188723, "learning_rate": 3.106403838034815e-05, "loss": 0.1403, "step": 3042 }, { "epoch": 0.33, "grad_norm": 0.425885616425448, "learning_rate": 3.1058111287947946e-05, "loss": 0.11, "step": 3043 }, { "epoch": 0.33, "grad_norm": 0.5320074627861954, "learning_rate": 3.1052182796403225e-05, "loss": 0.1341, "step": 3044 }, { "epoch": 0.33, "grad_norm": 0.408036960201621, "learning_rate": 3.104625290646409e-05, "loss": 0.1063, "step": 3045 }, { "epoch": 0.33, "grad_norm": 0.3801160552337564, "learning_rate": 3.104032161888084e-05, "loss": 0.0777, "step": 3046 }, { "epoch": 0.33, "grad_norm": 0.42475136608602376, "learning_rate": 3.1034388934403924e-05, "loss": 0.088, "step": 3047 }, { "epoch": 0.33, "grad_norm": 0.4019793554705773, "learning_rate": 3.1028454853784e-05, "loss": 0.1036, "step": 3048 }, { "epoch": 0.33, "grad_norm": 0.4516689320304525, "learning_rate": 3.1022519377771885e-05, "loss": 0.1136, "step": 3049 }, { "epoch": 0.33, "grad_norm": 0.3740455282276215, "learning_rate": 3.101658250711856e-05, "loss": 0.0722, "step": 3050 }, { "epoch": 0.34, "grad_norm": 0.4856231359497586, "learning_rate": 3.101064424257521e-05, "loss": 0.1162, "step": 3051 }, { "epoch": 0.34, "grad_norm": 0.361839674640322, "learning_rate": 3.100470458489318e-05, "loss": 0.09, "step": 3052 }, { "epoch": 0.34, "grad_norm": 0.5044663148301474, "learning_rate": 3.099876353482399e-05, "loss": 0.1242, "step": 3053 }, { "epoch": 0.34, "grad_norm": 0.4003546721901271, "learning_rate": 3.099282109311934e-05, "loss": 0.1077, "step": 3054 }, { "epoch": 0.34, "grad_norm": 0.4444386457853068, "learning_rate": 3.098687726053112e-05, "loss": 0.1225, "step": 3055 }, { "epoch": 0.34, "grad_norm": 0.33634263382829, "learning_rate": 3.098093203781137e-05, "loss": 0.1123, "step": 3056 }, { "epoch": 0.34, "grad_norm": 0.5061390225311463, "learning_rate": 3.097498542571232e-05, "loss": 0.1259, "step": 3057 }, { "epoch": 0.34, "grad_norm": 0.3792954834776004, "learning_rate": 3.0969037424986376e-05, "loss": 0.0788, "step": 3058 }, { "epoch": 0.34, "grad_norm": 0.38489028924950525, "learning_rate": 3.096308803638612e-05, "loss": 0.0891, "step": 3059 }, { "epoch": 0.34, "grad_norm": 0.4740600546646167, "learning_rate": 3.09571372606643e-05, "loss": 0.0922, "step": 3060 }, { "epoch": 0.34, "grad_norm": 0.361128489013011, "learning_rate": 3.095118509857386e-05, "loss": 0.0843, "step": 3061 }, { "epoch": 0.34, "grad_norm": 0.5383975032180267, "learning_rate": 3.09452315508679e-05, "loss": 0.1501, "step": 3062 }, { "epoch": 0.34, "grad_norm": 0.4743842073534603, "learning_rate": 3.093927661829969e-05, "loss": 0.0984, "step": 3063 }, { "epoch": 0.34, "grad_norm": 0.49085683933388163, "learning_rate": 3.09333203016227e-05, "loss": 0.1112, "step": 3064 }, { "epoch": 0.34, "grad_norm": 0.42029817266046576, "learning_rate": 3.0927362601590555e-05, "loss": 0.1206, "step": 3065 }, { "epoch": 0.34, "grad_norm": 0.4430981509275775, "learning_rate": 3.0921403518957076e-05, "loss": 0.1025, "step": 3066 }, { "epoch": 0.34, "grad_norm": 0.46659690372381546, "learning_rate": 3.0915443054476236e-05, "loss": 0.1134, "step": 3067 }, { "epoch": 0.34, "grad_norm": 0.5489216789251119, "learning_rate": 3.0909481208902185e-05, "loss": 0.1639, "step": 3068 }, { "epoch": 0.34, "grad_norm": 0.5491942681055818, "learning_rate": 3.090351798298926e-05, "loss": 0.1231, "step": 3069 }, { "epoch": 0.34, "grad_norm": 0.49797293918309016, "learning_rate": 3.089755337749198e-05, "loss": 0.1029, "step": 3070 }, { "epoch": 0.34, "grad_norm": 0.4489617317348411, "learning_rate": 3.089158739316501e-05, "loss": 0.0974, "step": 3071 }, { "epoch": 0.34, "grad_norm": 0.6138681409915191, "learning_rate": 3.08856200307632e-05, "loss": 0.1553, "step": 3072 }, { "epoch": 0.34, "grad_norm": 0.49109413168804644, "learning_rate": 3.08796512910416e-05, "loss": 0.1046, "step": 3073 }, { "epoch": 0.34, "grad_norm": 0.4891476992656243, "learning_rate": 3.08736811747554e-05, "loss": 0.1377, "step": 3074 }, { "epoch": 0.34, "grad_norm": 0.41818725364836296, "learning_rate": 3.0867709682659984e-05, "loss": 0.0909, "step": 3075 }, { "epoch": 0.34, "grad_norm": 0.36086557010107234, "learning_rate": 3.08617368155109e-05, "loss": 0.0724, "step": 3076 }, { "epoch": 0.34, "grad_norm": 0.49923156757645265, "learning_rate": 3.0855762574063874e-05, "loss": 0.112, "step": 3077 }, { "epoch": 0.34, "grad_norm": 0.46968722426558046, "learning_rate": 3.084978695907482e-05, "loss": 0.1132, "step": 3078 }, { "epoch": 0.34, "grad_norm": 0.4245450684422398, "learning_rate": 3.0843809971299784e-05, "loss": 0.1032, "step": 3079 }, { "epoch": 0.34, "grad_norm": 0.47929073538108424, "learning_rate": 3.0837831611495036e-05, "loss": 0.0958, "step": 3080 }, { "epoch": 0.34, "grad_norm": 0.46562686638170014, "learning_rate": 3.0831851880416994e-05, "loss": 0.1084, "step": 3081 }, { "epoch": 0.34, "grad_norm": 0.4238469168017291, "learning_rate": 3.082587077882225e-05, "loss": 0.0758, "step": 3082 }, { "epoch": 0.34, "grad_norm": 0.5357730123710422, "learning_rate": 3.081988830746756e-05, "loss": 0.115, "step": 3083 }, { "epoch": 0.34, "grad_norm": 0.4500577472350527, "learning_rate": 3.081390446710989e-05, "loss": 0.1171, "step": 3084 }, { "epoch": 0.34, "grad_norm": 0.4064815118334347, "learning_rate": 3.0807919258506336e-05, "loss": 0.1025, "step": 3085 }, { "epoch": 0.34, "grad_norm": 0.4635172276715917, "learning_rate": 3.080193268241419e-05, "loss": 0.1061, "step": 3086 }, { "epoch": 0.34, "grad_norm": 0.4442429329927746, "learning_rate": 3.079594473959091e-05, "loss": 0.1242, "step": 3087 }, { "epoch": 0.34, "grad_norm": 0.45337500745585574, "learning_rate": 3.0789955430794145e-05, "loss": 0.1172, "step": 3088 }, { "epoch": 0.34, "grad_norm": 0.48484490274285524, "learning_rate": 3.0783964756781685e-05, "loss": 0.0994, "step": 3089 }, { "epoch": 0.34, "grad_norm": 0.46188765726568143, "learning_rate": 3.077797271831152e-05, "loss": 0.118, "step": 3090 }, { "epoch": 0.34, "grad_norm": 0.4415617981005447, "learning_rate": 3.077197931614178e-05, "loss": 0.0875, "step": 3091 }, { "epoch": 0.34, "grad_norm": 0.45212355477268457, "learning_rate": 3.076598455103081e-05, "loss": 0.1173, "step": 3092 }, { "epoch": 0.34, "grad_norm": 0.3798810081144888, "learning_rate": 3.075998842373711e-05, "loss": 0.1105, "step": 3093 }, { "epoch": 0.34, "grad_norm": 0.4444268437549615, "learning_rate": 3.0753990935019345e-05, "loss": 0.0947, "step": 3094 }, { "epoch": 0.34, "grad_norm": 0.3525911118397617, "learning_rate": 3.074799208563635e-05, "loss": 0.094, "step": 3095 }, { "epoch": 0.34, "grad_norm": 0.40570005460150627, "learning_rate": 3.074199187634713e-05, "loss": 0.0962, "step": 3096 }, { "epoch": 0.34, "grad_norm": 0.36367741384908137, "learning_rate": 3.0735990307910894e-05, "loss": 0.0805, "step": 3097 }, { "epoch": 0.34, "grad_norm": 0.36832007788127663, "learning_rate": 3.072998738108699e-05, "loss": 0.0955, "step": 3098 }, { "epoch": 0.34, "grad_norm": 0.4871920682926601, "learning_rate": 3.072398309663494e-05, "loss": 0.1355, "step": 3099 }, { "epoch": 0.34, "grad_norm": 0.35801060452285616, "learning_rate": 3.071797745531445e-05, "loss": 0.0859, "step": 3100 }, { "epoch": 0.34, "grad_norm": 0.4586708783824543, "learning_rate": 3.07119704578854e-05, "loss": 0.1162, "step": 3101 }, { "epoch": 0.34, "grad_norm": 0.5060537546576939, "learning_rate": 3.070596210510783e-05, "loss": 0.1317, "step": 3102 }, { "epoch": 0.34, "grad_norm": 0.42745424268350307, "learning_rate": 3.069995239774195e-05, "loss": 0.116, "step": 3103 }, { "epoch": 0.34, "grad_norm": 0.5242596617423675, "learning_rate": 3.069394133654815e-05, "loss": 0.1405, "step": 3104 }, { "epoch": 0.34, "grad_norm": 0.43990031906804894, "learning_rate": 3.068792892228699e-05, "loss": 0.0921, "step": 3105 }, { "epoch": 0.34, "grad_norm": 0.41211794442040073, "learning_rate": 3.068191515571921e-05, "loss": 0.1005, "step": 3106 }, { "epoch": 0.34, "grad_norm": 0.4081394200603503, "learning_rate": 3.06759000376057e-05, "loss": 0.0964, "step": 3107 }, { "epoch": 0.34, "grad_norm": 0.4125515161481568, "learning_rate": 3.066988356870752e-05, "loss": 0.1102, "step": 3108 }, { "epoch": 0.34, "grad_norm": 0.4089077066082897, "learning_rate": 3.066386574978594e-05, "loss": 0.126, "step": 3109 }, { "epoch": 0.34, "grad_norm": 0.45905942911947795, "learning_rate": 3.0657846581602355e-05, "loss": 0.1488, "step": 3110 }, { "epoch": 0.34, "grad_norm": 0.4605613078540536, "learning_rate": 3.0651826064918355e-05, "loss": 0.0951, "step": 3111 }, { "epoch": 0.34, "grad_norm": 0.42187291603986515, "learning_rate": 3.06458042004957e-05, "loss": 0.0849, "step": 3112 }, { "epoch": 0.34, "grad_norm": 0.36034887462740356, "learning_rate": 3.063978098909631e-05, "loss": 0.0829, "step": 3113 }, { "epoch": 0.34, "grad_norm": 0.3632977184541642, "learning_rate": 3.063375643148228e-05, "loss": 0.069, "step": 3114 }, { "epoch": 0.34, "grad_norm": 0.4851895528226818, "learning_rate": 3.062773052841588e-05, "loss": 0.1626, "step": 3115 }, { "epoch": 0.34, "grad_norm": 0.3872482835843667, "learning_rate": 3.062170328065954e-05, "loss": 0.1065, "step": 3116 }, { "epoch": 0.34, "grad_norm": 0.4971942859428728, "learning_rate": 3.061567468897588e-05, "loss": 0.1151, "step": 3117 }, { "epoch": 0.34, "grad_norm": 0.39652990446208886, "learning_rate": 3.060964475412766e-05, "loss": 0.0838, "step": 3118 }, { "epoch": 0.34, "grad_norm": 0.3570625291474109, "learning_rate": 3.060361347687785e-05, "loss": 0.0836, "step": 3119 }, { "epoch": 0.34, "grad_norm": 0.47844947824863304, "learning_rate": 3.059758085798954e-05, "loss": 0.1116, "step": 3120 }, { "epoch": 0.34, "grad_norm": 0.41583393051331374, "learning_rate": 3.059154689822603e-05, "loss": 0.0684, "step": 3121 }, { "epoch": 0.34, "grad_norm": 0.48391631158130655, "learning_rate": 3.058551159835078e-05, "loss": 0.1186, "step": 3122 }, { "epoch": 0.34, "grad_norm": 0.4795059854732528, "learning_rate": 3.057947495912741e-05, "loss": 0.1028, "step": 3123 }, { "epoch": 0.34, "grad_norm": 0.523734392830372, "learning_rate": 3.057343698131971e-05, "loss": 0.1262, "step": 3124 }, { "epoch": 0.34, "grad_norm": 0.5734783771299878, "learning_rate": 3.056739766569165e-05, "loss": 0.1342, "step": 3125 }, { "epoch": 0.34, "grad_norm": 0.35129553937460845, "learning_rate": 3.056135701300736e-05, "loss": 0.0854, "step": 3126 }, { "epoch": 0.34, "grad_norm": 0.5021104974112424, "learning_rate": 3.0555315024031155e-05, "loss": 0.114, "step": 3127 }, { "epoch": 0.34, "grad_norm": 0.43072038831456666, "learning_rate": 3.054927169952749e-05, "loss": 0.1291, "step": 3128 }, { "epoch": 0.34, "grad_norm": 0.4552643521783998, "learning_rate": 3.054322704026102e-05, "loss": 0.1403, "step": 3129 }, { "epoch": 0.34, "grad_norm": 0.4395759749888948, "learning_rate": 3.053718104699654e-05, "loss": 0.1353, "step": 3130 }, { "epoch": 0.34, "grad_norm": 0.37793177719918475, "learning_rate": 3.0531133720499046e-05, "loss": 0.0792, "step": 3131 }, { "epoch": 0.34, "grad_norm": 0.368637630345871, "learning_rate": 3.052508506153368e-05, "loss": 0.1311, "step": 3132 }, { "epoch": 0.34, "grad_norm": 0.3813160628005562, "learning_rate": 3.051903507086575e-05, "loss": 0.0945, "step": 3133 }, { "epoch": 0.34, "grad_norm": 0.37838045980309515, "learning_rate": 3.051298374926074e-05, "loss": 0.1224, "step": 3134 }, { "epoch": 0.34, "grad_norm": 0.4566265825212334, "learning_rate": 3.050693109748431e-05, "loss": 0.1201, "step": 3135 }, { "epoch": 0.34, "grad_norm": 0.3721509831000016, "learning_rate": 3.0500877116302284e-05, "loss": 0.0853, "step": 3136 }, { "epoch": 0.34, "grad_norm": 0.40926806239802366, "learning_rate": 3.049482180648064e-05, "loss": 0.1013, "step": 3137 }, { "epoch": 0.34, "grad_norm": 0.4610618003445871, "learning_rate": 3.0488765168785544e-05, "loss": 0.1359, "step": 3138 }, { "epoch": 0.34, "grad_norm": 0.4495561245058242, "learning_rate": 3.048270720398332e-05, "loss": 0.1351, "step": 3139 }, { "epoch": 0.34, "grad_norm": 0.4992390743410643, "learning_rate": 3.047664791284046e-05, "loss": 0.1277, "step": 3140 }, { "epoch": 0.34, "grad_norm": 0.4582018573349948, "learning_rate": 3.0470587296123617e-05, "loss": 0.1085, "step": 3141 }, { "epoch": 0.35, "grad_norm": 0.45195916253411794, "learning_rate": 3.046452535459963e-05, "loss": 0.0975, "step": 3142 }, { "epoch": 0.35, "grad_norm": 0.5637618851519003, "learning_rate": 3.0458462089035498e-05, "loss": 0.1244, "step": 3143 }, { "epoch": 0.35, "grad_norm": 0.4296957708776518, "learning_rate": 3.045239750019839e-05, "loss": 0.0858, "step": 3144 }, { "epoch": 0.35, "grad_norm": 0.49455340487976396, "learning_rate": 3.0446331588855613e-05, "loss": 0.1107, "step": 3145 }, { "epoch": 0.35, "grad_norm": 0.4946187235771653, "learning_rate": 3.044026435577469e-05, "loss": 0.1298, "step": 3146 }, { "epoch": 0.35, "grad_norm": 0.34252976453535766, "learning_rate": 3.0434195801723275e-05, "loss": 0.1011, "step": 3147 }, { "epoch": 0.35, "grad_norm": 0.4744166230045854, "learning_rate": 3.0428125927469198e-05, "loss": 0.1035, "step": 3148 }, { "epoch": 0.35, "grad_norm": 0.49002332057732234, "learning_rate": 3.0422054733780474e-05, "loss": 0.1191, "step": 3149 }, { "epoch": 0.35, "grad_norm": 0.4361156632792582, "learning_rate": 3.0415982221425257e-05, "loss": 0.1162, "step": 3150 }, { "epoch": 0.35, "grad_norm": 0.42436254884771873, "learning_rate": 3.0409908391171884e-05, "loss": 0.1133, "step": 3151 }, { "epoch": 0.35, "grad_norm": 0.4287328345196262, "learning_rate": 3.040383324378885e-05, "loss": 0.0778, "step": 3152 }, { "epoch": 0.35, "grad_norm": 1.0794419800298358, "learning_rate": 3.0397756780044837e-05, "loss": 0.138, "step": 3153 }, { "epoch": 0.35, "grad_norm": 0.39912295750574145, "learning_rate": 3.0391679000708673e-05, "loss": 0.0969, "step": 3154 }, { "epoch": 0.35, "grad_norm": 0.46869888026919865, "learning_rate": 3.0385599906549355e-05, "loss": 0.1192, "step": 3155 }, { "epoch": 0.35, "grad_norm": 0.454829217514146, "learning_rate": 3.0379519498336054e-05, "loss": 0.0839, "step": 3156 }, { "epoch": 0.35, "grad_norm": 0.35027192911091515, "learning_rate": 3.037343777683809e-05, "loss": 0.0771, "step": 3157 }, { "epoch": 0.35, "grad_norm": 0.4620675281657158, "learning_rate": 3.036735474282498e-05, "loss": 0.12, "step": 3158 }, { "epoch": 0.35, "grad_norm": 0.45333623895111336, "learning_rate": 3.0361270397066377e-05, "loss": 0.126, "step": 3159 }, { "epoch": 0.35, "grad_norm": 0.5962545625444104, "learning_rate": 3.035518474033212e-05, "loss": 0.1808, "step": 3160 }, { "epoch": 0.35, "grad_norm": 0.41072979743761384, "learning_rate": 3.0349097773392207e-05, "loss": 0.095, "step": 3161 }, { "epoch": 0.35, "grad_norm": 0.4411565023465971, "learning_rate": 3.0343009497016787e-05, "loss": 0.1163, "step": 3162 }, { "epoch": 0.35, "grad_norm": 0.5257631137208278, "learning_rate": 3.0336919911976203e-05, "loss": 0.1497, "step": 3163 }, { "epoch": 0.35, "grad_norm": 0.4825417064766318, "learning_rate": 3.0330829019040945e-05, "loss": 0.1358, "step": 3164 }, { "epoch": 0.35, "grad_norm": 0.4884254242715508, "learning_rate": 3.0324736818981672e-05, "loss": 0.1333, "step": 3165 }, { "epoch": 0.35, "grad_norm": 0.3891675462583503, "learning_rate": 3.0318643312569204e-05, "loss": 0.1102, "step": 3166 }, { "epoch": 0.35, "grad_norm": 0.4384664011841985, "learning_rate": 3.0312548500574547e-05, "loss": 0.0736, "step": 3167 }, { "epoch": 0.35, "grad_norm": 0.5031724658634823, "learning_rate": 3.0306452383768833e-05, "loss": 0.1085, "step": 3168 }, { "epoch": 0.35, "grad_norm": 0.4123828215769342, "learning_rate": 3.03003549629234e-05, "loss": 0.1108, "step": 3169 }, { "epoch": 0.35, "grad_norm": 0.44261685342517704, "learning_rate": 3.0294256238809727e-05, "loss": 0.1066, "step": 3170 }, { "epoch": 0.35, "grad_norm": 0.42945234626531475, "learning_rate": 3.0288156212199472e-05, "loss": 0.0761, "step": 3171 }, { "epoch": 0.35, "grad_norm": 0.34760929176287386, "learning_rate": 3.0282054883864434e-05, "loss": 0.0874, "step": 3172 }, { "epoch": 0.35, "grad_norm": 0.37169043103966387, "learning_rate": 3.0275952254576614e-05, "loss": 0.0797, "step": 3173 }, { "epoch": 0.35, "grad_norm": 0.4751407176436066, "learning_rate": 3.026984832510814e-05, "loss": 0.1075, "step": 3174 }, { "epoch": 0.35, "grad_norm": 0.5237182604383818, "learning_rate": 3.026374309623133e-05, "loss": 0.1276, "step": 3175 }, { "epoch": 0.35, "grad_norm": 0.557450571905389, "learning_rate": 3.025763656871865e-05, "loss": 0.1191, "step": 3176 }, { "epoch": 0.35, "grad_norm": 0.5787788508332388, "learning_rate": 3.0251528743342738e-05, "loss": 0.1531, "step": 3177 }, { "epoch": 0.35, "grad_norm": 0.5211203515441835, "learning_rate": 3.024541962087641e-05, "loss": 0.1358, "step": 3178 }, { "epoch": 0.35, "grad_norm": 0.3587496333502514, "learning_rate": 3.023930920209262e-05, "loss": 0.0987, "step": 3179 }, { "epoch": 0.35, "grad_norm": 0.37807603708210863, "learning_rate": 3.0233197487764494e-05, "loss": 0.0783, "step": 3180 }, { "epoch": 0.35, "grad_norm": 0.7034061293722425, "learning_rate": 3.022708447866534e-05, "loss": 0.1014, "step": 3181 }, { "epoch": 0.35, "grad_norm": 0.3480642903056366, "learning_rate": 3.0220970175568604e-05, "loss": 0.091, "step": 3182 }, { "epoch": 0.35, "grad_norm": 0.4264683560173701, "learning_rate": 3.0214854579247915e-05, "loss": 0.117, "step": 3183 }, { "epoch": 0.35, "grad_norm": 0.4338549608984276, "learning_rate": 3.020873769047705e-05, "loss": 0.1249, "step": 3184 }, { "epoch": 0.35, "grad_norm": 0.4383588263250387, "learning_rate": 3.0202619510029962e-05, "loss": 0.0943, "step": 3185 }, { "epoch": 0.35, "grad_norm": 0.43014782610154034, "learning_rate": 3.019650003868077e-05, "loss": 0.101, "step": 3186 }, { "epoch": 0.35, "grad_norm": 0.4398822469447675, "learning_rate": 3.019037927720374e-05, "loss": 0.105, "step": 3187 }, { "epoch": 0.35, "grad_norm": 0.4319008435523064, "learning_rate": 3.0184257226373317e-05, "loss": 0.1116, "step": 3188 }, { "epoch": 0.35, "grad_norm": 0.4481603510332299, "learning_rate": 3.0178133886964098e-05, "loss": 0.1167, "step": 3189 }, { "epoch": 0.35, "grad_norm": 0.38827553865772274, "learning_rate": 3.0172009259750852e-05, "loss": 0.0763, "step": 3190 }, { "epoch": 0.35, "grad_norm": 0.40908766920439577, "learning_rate": 3.01658833455085e-05, "loss": 0.0878, "step": 3191 }, { "epoch": 0.35, "grad_norm": 0.5326026077988799, "learning_rate": 3.015975614501214e-05, "loss": 0.142, "step": 3192 }, { "epoch": 0.35, "grad_norm": 0.4350477874475564, "learning_rate": 3.0153627659037023e-05, "loss": 0.1156, "step": 3193 }, { "epoch": 0.35, "grad_norm": 0.49758283651513785, "learning_rate": 3.0147497888358564e-05, "loss": 0.1409, "step": 3194 }, { "epoch": 0.35, "grad_norm": 0.3567095887273041, "learning_rate": 3.0141366833752344e-05, "loss": 0.0911, "step": 3195 }, { "epoch": 0.35, "grad_norm": 0.46343247774249074, "learning_rate": 3.0135234495994107e-05, "loss": 0.1138, "step": 3196 }, { "epoch": 0.35, "grad_norm": 0.4412212828241887, "learning_rate": 3.0129100875859744e-05, "loss": 0.085, "step": 3197 }, { "epoch": 0.35, "grad_norm": 0.4876661164356047, "learning_rate": 3.0122965974125335e-05, "loss": 0.1191, "step": 3198 }, { "epoch": 0.35, "grad_norm": 0.6069640369099728, "learning_rate": 3.01168297915671e-05, "loss": 0.1763, "step": 3199 }, { "epoch": 0.35, "grad_norm": 0.42108344948366455, "learning_rate": 3.0110692328961435e-05, "loss": 0.082, "step": 3200 }, { "epoch": 0.35, "grad_norm": 0.37898324400466255, "learning_rate": 3.0104553587084883e-05, "loss": 0.0949, "step": 3201 }, { "epoch": 0.35, "grad_norm": 0.5577067555445726, "learning_rate": 3.0098413566714165e-05, "loss": 0.1525, "step": 3202 }, { "epoch": 0.35, "grad_norm": 0.5217010390847702, "learning_rate": 3.0092272268626145e-05, "loss": 0.114, "step": 3203 }, { "epoch": 0.35, "grad_norm": 0.42151197072675173, "learning_rate": 3.008612969359788e-05, "loss": 0.1121, "step": 3204 }, { "epoch": 0.35, "grad_norm": 0.4122075149327242, "learning_rate": 3.0079985842406554e-05, "loss": 0.0909, "step": 3205 }, { "epoch": 0.35, "grad_norm": 0.34812381463388364, "learning_rate": 3.0073840715829532e-05, "loss": 0.0837, "step": 3206 }, { "epoch": 0.35, "grad_norm": 0.402323879751254, "learning_rate": 3.0067694314644334e-05, "loss": 0.1124, "step": 3207 }, { "epoch": 0.35, "grad_norm": 0.3650611017635027, "learning_rate": 3.006154663962865e-05, "loss": 0.0686, "step": 3208 }, { "epoch": 0.35, "grad_norm": 0.35124720103957596, "learning_rate": 3.0055397691560303e-05, "loss": 0.0703, "step": 3209 }, { "epoch": 0.35, "grad_norm": 0.40179917600574755, "learning_rate": 3.0049247471217326e-05, "loss": 0.0888, "step": 3210 }, { "epoch": 0.35, "grad_norm": 0.44993540777119245, "learning_rate": 3.0043095979377864e-05, "loss": 0.1441, "step": 3211 }, { "epoch": 0.35, "grad_norm": 0.43885701757241913, "learning_rate": 3.0036943216820256e-05, "loss": 0.0976, "step": 3212 }, { "epoch": 0.35, "grad_norm": 0.3822749704184947, "learning_rate": 3.0030789184322982e-05, "loss": 0.0715, "step": 3213 }, { "epoch": 0.35, "grad_norm": 0.44569746744374844, "learning_rate": 3.00246338826647e-05, "loss": 0.1217, "step": 3214 }, { "epoch": 0.35, "grad_norm": 0.34571687487828434, "learning_rate": 3.001847731262421e-05, "loss": 0.0678, "step": 3215 }, { "epoch": 0.35, "grad_norm": 0.42773914769963217, "learning_rate": 3.001231947498048e-05, "loss": 0.0818, "step": 3216 }, { "epoch": 0.35, "grad_norm": 0.515581764951698, "learning_rate": 3.000616037051265e-05, "loss": 0.1402, "step": 3217 }, { "epoch": 0.35, "grad_norm": 0.4564655095448477, "learning_rate": 3.0000000000000004e-05, "loss": 0.1015, "step": 3218 }, { "epoch": 0.35, "grad_norm": 0.49162276733399707, "learning_rate": 2.9993838364221993e-05, "loss": 0.1034, "step": 3219 }, { "epoch": 0.35, "grad_norm": 0.5381377207352398, "learning_rate": 2.998767546395822e-05, "loss": 0.1287, "step": 3220 }, { "epoch": 0.35, "grad_norm": 0.45231658247828566, "learning_rate": 2.998151129998847e-05, "loss": 0.1105, "step": 3221 }, { "epoch": 0.35, "grad_norm": 0.3484451793457346, "learning_rate": 2.9975345873092662e-05, "loss": 0.0991, "step": 3222 }, { "epoch": 0.35, "grad_norm": 0.43040865289971103, "learning_rate": 2.9969179184050895e-05, "loss": 0.0889, "step": 3223 }, { "epoch": 0.35, "grad_norm": 0.4963507849974048, "learning_rate": 2.996301123364341e-05, "loss": 0.0876, "step": 3224 }, { "epoch": 0.35, "grad_norm": 0.5361396775214365, "learning_rate": 2.9956842022650622e-05, "loss": 0.1217, "step": 3225 }, { "epoch": 0.35, "grad_norm": 0.37727243606508093, "learning_rate": 2.9950671551853094e-05, "loss": 0.0954, "step": 3226 }, { "epoch": 0.35, "grad_norm": 0.46502643489555767, "learning_rate": 2.994449982203157e-05, "loss": 0.1189, "step": 3227 }, { "epoch": 0.35, "grad_norm": 0.4049779895111427, "learning_rate": 2.9938326833966914e-05, "loss": 0.1145, "step": 3228 }, { "epoch": 0.35, "grad_norm": 0.34258256254725755, "learning_rate": 2.9932152588440185e-05, "loss": 0.0669, "step": 3229 }, { "epoch": 0.35, "grad_norm": 0.5165877457557599, "learning_rate": 2.992597708623259e-05, "loss": 0.1478, "step": 3230 }, { "epoch": 0.35, "grad_norm": 0.33504843055745764, "learning_rate": 2.9919800328125498e-05, "loss": 0.0849, "step": 3231 }, { "epoch": 0.35, "grad_norm": 0.38863907488724414, "learning_rate": 2.991362231490042e-05, "loss": 0.1085, "step": 3232 }, { "epoch": 0.36, "grad_norm": 0.417900186779823, "learning_rate": 2.9907443047339054e-05, "loss": 0.1147, "step": 3233 }, { "epoch": 0.36, "grad_norm": 0.4307403323914625, "learning_rate": 2.990126252622323e-05, "loss": 0.0932, "step": 3234 }, { "epoch": 0.36, "grad_norm": 0.5270524244581487, "learning_rate": 2.989508075233495e-05, "loss": 0.1367, "step": 3235 }, { "epoch": 0.36, "grad_norm": 0.4390436086968612, "learning_rate": 2.9888897726456374e-05, "loss": 0.1057, "step": 3236 }, { "epoch": 0.36, "grad_norm": 0.41602180368912456, "learning_rate": 2.988271344936982e-05, "loss": 0.1133, "step": 3237 }, { "epoch": 0.36, "grad_norm": 0.44824996642300363, "learning_rate": 2.9876527921857756e-05, "loss": 0.12, "step": 3238 }, { "epoch": 0.36, "grad_norm": 0.4070965493040737, "learning_rate": 2.9870341144702833e-05, "loss": 0.0934, "step": 3239 }, { "epoch": 0.36, "grad_norm": 0.48243086339322283, "learning_rate": 2.986415311868782e-05, "loss": 0.1096, "step": 3240 }, { "epoch": 0.36, "grad_norm": 0.3567004818944347, "learning_rate": 2.9857963844595684e-05, "loss": 0.072, "step": 3241 }, { "epoch": 0.36, "grad_norm": 0.37221330112865947, "learning_rate": 2.985177332320952e-05, "loss": 0.0752, "step": 3242 }, { "epoch": 0.36, "grad_norm": 0.44665488977824974, "learning_rate": 2.9845581555312604e-05, "loss": 0.0915, "step": 3243 }, { "epoch": 0.36, "grad_norm": 0.4897877760061545, "learning_rate": 2.9839388541688352e-05, "loss": 0.1076, "step": 3244 }, { "epoch": 0.36, "grad_norm": 0.49107164717334983, "learning_rate": 2.9833194283120347e-05, "loss": 0.1221, "step": 3245 }, { "epoch": 0.36, "grad_norm": 0.3936081464253936, "learning_rate": 2.9826998780392324e-05, "loss": 0.1034, "step": 3246 }, { "epoch": 0.36, "grad_norm": 0.49539637171394946, "learning_rate": 2.9820802034288178e-05, "loss": 0.09, "step": 3247 }, { "epoch": 0.36, "grad_norm": 0.463286041826809, "learning_rate": 2.9814604045591974e-05, "loss": 0.0959, "step": 3248 }, { "epoch": 0.36, "grad_norm": 0.3553555486496809, "learning_rate": 2.9808404815087904e-05, "loss": 0.0872, "step": 3249 }, { "epoch": 0.36, "grad_norm": 0.3909009721555061, "learning_rate": 2.980220434356035e-05, "loss": 0.0857, "step": 3250 }, { "epoch": 0.36, "grad_norm": 0.33218698776076433, "learning_rate": 2.9796002631793825e-05, "loss": 0.0937, "step": 3251 }, { "epoch": 0.36, "grad_norm": 0.4403915219571039, "learning_rate": 2.9789799680573014e-05, "loss": 0.1005, "step": 3252 }, { "epoch": 0.36, "grad_norm": 0.37249729357126665, "learning_rate": 2.9783595490682765e-05, "loss": 0.0647, "step": 3253 }, { "epoch": 0.36, "grad_norm": 0.32111672878415914, "learning_rate": 2.9777390062908056e-05, "loss": 0.059, "step": 3254 }, { "epoch": 0.36, "grad_norm": 0.40766659753839923, "learning_rate": 2.9771183398034047e-05, "loss": 0.1047, "step": 3255 }, { "epoch": 0.36, "grad_norm": 0.3676373992380219, "learning_rate": 2.976497549684605e-05, "loss": 0.0932, "step": 3256 }, { "epoch": 0.36, "grad_norm": 0.4815949832176938, "learning_rate": 2.975876636012952e-05, "loss": 0.107, "step": 3257 }, { "epoch": 0.36, "grad_norm": 0.4409996286604739, "learning_rate": 2.9752555988670084e-05, "loss": 0.0667, "step": 3258 }, { "epoch": 0.36, "grad_norm": 0.451647157224258, "learning_rate": 2.9746344383253515e-05, "loss": 0.0925, "step": 3259 }, { "epoch": 0.36, "grad_norm": 0.4505782247014681, "learning_rate": 2.9740131544665748e-05, "loss": 0.1021, "step": 3260 }, { "epoch": 0.36, "grad_norm": 0.5363340851187944, "learning_rate": 2.9733917473692866e-05, "loss": 0.1397, "step": 3261 }, { "epoch": 0.36, "grad_norm": 0.39002028388069626, "learning_rate": 2.9727702171121125e-05, "loss": 0.0842, "step": 3262 }, { "epoch": 0.36, "grad_norm": 0.4836490020248306, "learning_rate": 2.972148563773692e-05, "loss": 0.126, "step": 3263 }, { "epoch": 0.36, "grad_norm": 0.3555231367152643, "learning_rate": 2.9715267874326805e-05, "loss": 0.0798, "step": 3264 }, { "epoch": 0.36, "grad_norm": 0.4027143086744787, "learning_rate": 2.9709048881677494e-05, "loss": 0.0923, "step": 3265 }, { "epoch": 0.36, "grad_norm": 0.44944412548670526, "learning_rate": 2.970282866057586e-05, "loss": 0.0982, "step": 3266 }, { "epoch": 0.36, "grad_norm": 0.4294087632965411, "learning_rate": 2.9696607211808915e-05, "loss": 0.1189, "step": 3267 }, { "epoch": 0.36, "grad_norm": 0.34902067007238874, "learning_rate": 2.969038453616385e-05, "loss": 0.0883, "step": 3268 }, { "epoch": 0.36, "grad_norm": 0.3579566790020374, "learning_rate": 2.9684160634427982e-05, "loss": 0.0775, "step": 3269 }, { "epoch": 0.36, "grad_norm": 0.445023426125086, "learning_rate": 2.9677935507388817e-05, "loss": 0.0954, "step": 3270 }, { "epoch": 0.36, "grad_norm": 0.4022606513863688, "learning_rate": 2.967170915583399e-05, "loss": 0.1033, "step": 3271 }, { "epoch": 0.36, "grad_norm": 0.4481409827112859, "learning_rate": 2.96654815805513e-05, "loss": 0.1215, "step": 3272 }, { "epoch": 0.36, "grad_norm": 0.4549180596578309, "learning_rate": 2.96592527823287e-05, "loss": 0.1107, "step": 3273 }, { "epoch": 0.36, "grad_norm": 0.32150388915674455, "learning_rate": 2.965302276195431e-05, "loss": 0.0656, "step": 3274 }, { "epoch": 0.36, "grad_norm": 0.3153099683200609, "learning_rate": 2.9646791520216375e-05, "loss": 0.0781, "step": 3275 }, { "epoch": 0.36, "grad_norm": 0.38994854529552486, "learning_rate": 2.9640559057903325e-05, "loss": 0.0818, "step": 3276 }, { "epoch": 0.36, "grad_norm": 0.38004797554209646, "learning_rate": 2.963432537580372e-05, "loss": 0.0805, "step": 3277 }, { "epoch": 0.36, "grad_norm": 0.4639890330052402, "learning_rate": 2.9628090474706304e-05, "loss": 0.121, "step": 3278 }, { "epoch": 0.36, "grad_norm": 0.3185861049294222, "learning_rate": 2.9621854355399937e-05, "loss": 0.0663, "step": 3279 }, { "epoch": 0.36, "grad_norm": 0.39458920270360465, "learning_rate": 2.9615617018673663e-05, "loss": 0.1031, "step": 3280 }, { "epoch": 0.36, "grad_norm": 0.4040666560514684, "learning_rate": 2.960937846531668e-05, "loss": 0.0836, "step": 3281 }, { "epoch": 0.36, "grad_norm": 0.42137389975771644, "learning_rate": 2.9603138696118315e-05, "loss": 0.0966, "step": 3282 }, { "epoch": 0.36, "grad_norm": 0.3757150111833766, "learning_rate": 2.959689771186807e-05, "loss": 0.083, "step": 3283 }, { "epoch": 0.36, "grad_norm": 0.339311292229142, "learning_rate": 2.9590655513355598e-05, "loss": 0.0807, "step": 3284 }, { "epoch": 0.36, "grad_norm": 0.3154760391617572, "learning_rate": 2.9584412101370708e-05, "loss": 0.0545, "step": 3285 }, { "epoch": 0.36, "grad_norm": 0.4455571924493586, "learning_rate": 2.957816747670334e-05, "loss": 0.0927, "step": 3286 }, { "epoch": 0.36, "grad_norm": 0.3700563083794951, "learning_rate": 2.9571921640143623e-05, "loss": 0.0876, "step": 3287 }, { "epoch": 0.36, "grad_norm": 0.3887715235881641, "learning_rate": 2.956567459248181e-05, "loss": 0.0744, "step": 3288 }, { "epoch": 0.36, "grad_norm": 0.4117860293927746, "learning_rate": 2.9559426334508315e-05, "loss": 0.0952, "step": 3289 }, { "epoch": 0.36, "grad_norm": 0.658661951264724, "learning_rate": 2.9553176867013714e-05, "loss": 0.15, "step": 3290 }, { "epoch": 0.36, "grad_norm": 0.5343304966916523, "learning_rate": 2.954692619078874e-05, "loss": 0.1122, "step": 3291 }, { "epoch": 0.36, "grad_norm": 0.3622286117760669, "learning_rate": 2.9540674306624262e-05, "loss": 0.1008, "step": 3292 }, { "epoch": 0.36, "grad_norm": 0.4900858680749494, "learning_rate": 2.9534421215311303e-05, "loss": 0.1009, "step": 3293 }, { "epoch": 0.36, "grad_norm": 0.616686146553049, "learning_rate": 2.9528166917641048e-05, "loss": 0.1114, "step": 3294 }, { "epoch": 0.36, "grad_norm": 0.4106133048832169, "learning_rate": 2.952191141440484e-05, "loss": 0.1133, "step": 3295 }, { "epoch": 0.36, "grad_norm": 0.46079471768954333, "learning_rate": 2.951565470639415e-05, "loss": 0.1237, "step": 3296 }, { "epoch": 0.36, "grad_norm": 0.5117802473781548, "learning_rate": 2.9509396794400635e-05, "loss": 0.151, "step": 3297 }, { "epoch": 0.36, "grad_norm": 0.3537842378122041, "learning_rate": 2.9503137679216073e-05, "loss": 0.0599, "step": 3298 }, { "epoch": 0.36, "grad_norm": 0.4160583187636401, "learning_rate": 2.9496877361632422e-05, "loss": 0.1081, "step": 3299 }, { "epoch": 0.36, "grad_norm": 0.3779150592821644, "learning_rate": 2.9490615842441764e-05, "loss": 0.0961, "step": 3300 }, { "epoch": 0.36, "grad_norm": 0.3393794099335051, "learning_rate": 2.948435312243636e-05, "loss": 0.0766, "step": 3301 }, { "epoch": 0.36, "grad_norm": 0.33970985844366197, "learning_rate": 2.94780892024086e-05, "loss": 0.0824, "step": 3302 }, { "epoch": 0.36, "grad_norm": 0.35619211479526935, "learning_rate": 2.9471824083151043e-05, "loss": 0.111, "step": 3303 }, { "epoch": 0.36, "grad_norm": 0.3984910143649249, "learning_rate": 2.9465557765456387e-05, "loss": 0.0965, "step": 3304 }, { "epoch": 0.36, "grad_norm": 0.38307900785906207, "learning_rate": 2.9459290250117493e-05, "loss": 0.0838, "step": 3305 }, { "epoch": 0.36, "grad_norm": 0.3307757257244414, "learning_rate": 2.9453021537927363e-05, "loss": 0.0938, "step": 3306 }, { "epoch": 0.36, "grad_norm": 0.3836677883818397, "learning_rate": 2.944675162967916e-05, "loss": 0.0746, "step": 3307 }, { "epoch": 0.36, "grad_norm": 0.39931193867248543, "learning_rate": 2.9440480526166193e-05, "loss": 0.0873, "step": 3308 }, { "epoch": 0.36, "grad_norm": 0.4865081943986252, "learning_rate": 2.9434208228181923e-05, "loss": 0.103, "step": 3309 }, { "epoch": 0.36, "grad_norm": 0.4499228225859652, "learning_rate": 2.9427934736519962e-05, "loss": 0.1078, "step": 3310 }, { "epoch": 0.36, "grad_norm": 0.4496546413459929, "learning_rate": 2.9421660051974067e-05, "loss": 0.1068, "step": 3311 }, { "epoch": 0.36, "grad_norm": 0.39819425750013676, "learning_rate": 2.9415384175338154e-05, "loss": 0.0787, "step": 3312 }, { "epoch": 0.36, "grad_norm": 0.500960559849647, "learning_rate": 2.9409107107406297e-05, "loss": 0.1206, "step": 3313 }, { "epoch": 0.36, "grad_norm": 0.4375505588473, "learning_rate": 2.9402828848972706e-05, "loss": 0.0837, "step": 3314 }, { "epoch": 0.36, "grad_norm": 0.40711258132424494, "learning_rate": 2.9396549400831745e-05, "loss": 0.0905, "step": 3315 }, { "epoch": 0.36, "grad_norm": 0.4505658062958951, "learning_rate": 2.9390268763777938e-05, "loss": 0.0951, "step": 3316 }, { "epoch": 0.36, "grad_norm": 0.4489243911280175, "learning_rate": 2.9383986938605944e-05, "loss": 0.0983, "step": 3317 }, { "epoch": 0.36, "grad_norm": 0.490512358740168, "learning_rate": 2.937770392611058e-05, "loss": 0.122, "step": 3318 }, { "epoch": 0.36, "grad_norm": 0.48373053346446265, "learning_rate": 2.9371419727086824e-05, "loss": 0.1085, "step": 3319 }, { "epoch": 0.36, "grad_norm": 0.43288110471623803, "learning_rate": 2.9365134342329783e-05, "loss": 0.1177, "step": 3320 }, { "epoch": 0.36, "grad_norm": 0.418275471028594, "learning_rate": 2.9358847772634736e-05, "loss": 0.121, "step": 3321 }, { "epoch": 0.36, "grad_norm": 0.34610886121013157, "learning_rate": 2.935256001879709e-05, "loss": 0.0714, "step": 3322 }, { "epoch": 0.36, "grad_norm": 0.40236443401612426, "learning_rate": 2.934627108161241e-05, "loss": 0.1028, "step": 3323 }, { "epoch": 0.37, "grad_norm": 0.44570458869125384, "learning_rate": 2.9339980961876434e-05, "loss": 0.1304, "step": 3324 }, { "epoch": 0.37, "grad_norm": 0.5316031348178363, "learning_rate": 2.933368966038501e-05, "loss": 0.1206, "step": 3325 }, { "epoch": 0.37, "grad_norm": 0.4796466151405658, "learning_rate": 2.932739717793416e-05, "loss": 0.1003, "step": 3326 }, { "epoch": 0.37, "grad_norm": 0.3974991388731452, "learning_rate": 2.9321103515320047e-05, "loss": 0.0874, "step": 3327 }, { "epoch": 0.37, "grad_norm": 0.4008349860382639, "learning_rate": 2.9314808673338997e-05, "loss": 0.0672, "step": 3328 }, { "epoch": 0.37, "grad_norm": 0.40050698427118214, "learning_rate": 2.930851265278746e-05, "loss": 0.0858, "step": 3329 }, { "epoch": 0.37, "grad_norm": 0.4615775045591722, "learning_rate": 2.9302215454462063e-05, "loss": 0.1141, "step": 3330 }, { "epoch": 0.37, "grad_norm": 0.39894977996993175, "learning_rate": 2.9295917079159557e-05, "loss": 0.0937, "step": 3331 }, { "epoch": 0.37, "grad_norm": 0.4644302324022367, "learning_rate": 2.928961752767686e-05, "loss": 0.1062, "step": 3332 }, { "epoch": 0.37, "grad_norm": 0.3965266726627453, "learning_rate": 2.9283316800811032e-05, "loss": 0.0953, "step": 3333 }, { "epoch": 0.37, "grad_norm": 0.47918679015431803, "learning_rate": 2.9277014899359284e-05, "loss": 0.1098, "step": 3334 }, { "epoch": 0.37, "grad_norm": 0.457606092669368, "learning_rate": 2.9270711824118972e-05, "loss": 0.1212, "step": 3335 }, { "epoch": 0.37, "grad_norm": 0.4985575432752776, "learning_rate": 2.92644075758876e-05, "loss": 0.1026, "step": 3336 }, { "epoch": 0.37, "grad_norm": 0.39988032130152645, "learning_rate": 2.9258102155462824e-05, "loss": 0.1108, "step": 3337 }, { "epoch": 0.37, "grad_norm": 0.46838151435607894, "learning_rate": 2.9251795563642445e-05, "loss": 0.0847, "step": 3338 }, { "epoch": 0.37, "grad_norm": 0.47027761767225, "learning_rate": 2.924548780122442e-05, "loss": 0.106, "step": 3339 }, { "epoch": 0.37, "grad_norm": 0.4413420031217952, "learning_rate": 2.923917886900685e-05, "loss": 0.0684, "step": 3340 }, { "epoch": 0.37, "grad_norm": 0.3158040733149297, "learning_rate": 2.923286876778797e-05, "loss": 0.0642, "step": 3341 }, { "epoch": 0.37, "grad_norm": 0.4667887986254614, "learning_rate": 2.922655749836618e-05, "loss": 0.0957, "step": 3342 }, { "epoch": 0.37, "grad_norm": 0.3847996265859636, "learning_rate": 2.922024506154004e-05, "loss": 0.0785, "step": 3343 }, { "epoch": 0.37, "grad_norm": 0.3976016533625361, "learning_rate": 2.921393145810821e-05, "loss": 0.0838, "step": 3344 }, { "epoch": 0.37, "grad_norm": 0.4201546666919501, "learning_rate": 2.9207616688869556e-05, "loss": 0.1122, "step": 3345 }, { "epoch": 0.37, "grad_norm": 0.40566798847853486, "learning_rate": 2.9201300754623046e-05, "loss": 0.1104, "step": 3346 }, { "epoch": 0.37, "grad_norm": 0.4747499799261345, "learning_rate": 2.9194983656167823e-05, "loss": 0.0905, "step": 3347 }, { "epoch": 0.37, "grad_norm": 0.46798120077697836, "learning_rate": 2.9188665394303163e-05, "loss": 0.1233, "step": 3348 }, { "epoch": 0.37, "grad_norm": 0.42366889740131286, "learning_rate": 2.9182345969828496e-05, "loss": 0.0918, "step": 3349 }, { "epoch": 0.37, "grad_norm": 0.4382036545006077, "learning_rate": 2.9176025383543395e-05, "loss": 0.0879, "step": 3350 }, { "epoch": 0.37, "grad_norm": 0.39428761957566644, "learning_rate": 2.9169703636247587e-05, "loss": 0.0956, "step": 3351 }, { "epoch": 0.37, "grad_norm": 0.42348531403800055, "learning_rate": 2.916338072874093e-05, "loss": 0.1153, "step": 3352 }, { "epoch": 0.37, "grad_norm": 0.6052951933159613, "learning_rate": 2.915705666182346e-05, "loss": 0.1387, "step": 3353 }, { "epoch": 0.37, "grad_norm": 0.4401405214120454, "learning_rate": 2.915073143629531e-05, "loss": 0.0984, "step": 3354 }, { "epoch": 0.37, "grad_norm": 0.5342718738742374, "learning_rate": 2.914440505295682e-05, "loss": 0.1223, "step": 3355 }, { "epoch": 0.37, "grad_norm": 0.34057531869432783, "learning_rate": 2.9138077512608417e-05, "loss": 0.0911, "step": 3356 }, { "epoch": 0.37, "grad_norm": 0.28605574002737116, "learning_rate": 2.9131748816050724e-05, "loss": 0.0959, "step": 3357 }, { "epoch": 0.37, "grad_norm": 0.4227325412905755, "learning_rate": 2.9125418964084474e-05, "loss": 0.1035, "step": 3358 }, { "epoch": 0.37, "grad_norm": 0.39024853685435873, "learning_rate": 2.9119087957510572e-05, "loss": 0.0846, "step": 3359 }, { "epoch": 0.37, "grad_norm": 0.47840982941405963, "learning_rate": 2.9112755797130052e-05, "loss": 0.1368, "step": 3360 }, { "epoch": 0.37, "grad_norm": 0.37818614134373457, "learning_rate": 2.9106422483744107e-05, "loss": 0.0806, "step": 3361 }, { "epoch": 0.37, "grad_norm": 0.38189468517231107, "learning_rate": 2.910008801815406e-05, "loss": 0.0872, "step": 3362 }, { "epoch": 0.37, "grad_norm": 0.37712449047647206, "learning_rate": 2.9093752401161405e-05, "loss": 0.101, "step": 3363 }, { "epoch": 0.37, "grad_norm": 0.4254584882705601, "learning_rate": 2.908741563356774e-05, "loss": 0.0801, "step": 3364 }, { "epoch": 0.37, "grad_norm": 0.4003226990185956, "learning_rate": 2.908107771617486e-05, "loss": 0.0892, "step": 3365 }, { "epoch": 0.37, "grad_norm": 0.40462977759905516, "learning_rate": 2.9074738649784665e-05, "loss": 0.0911, "step": 3366 }, { "epoch": 0.37, "grad_norm": 0.37542434366289107, "learning_rate": 2.9068398435199215e-05, "loss": 0.0867, "step": 3367 }, { "epoch": 0.37, "grad_norm": 0.3312993003937427, "learning_rate": 2.9062057073220723e-05, "loss": 0.0873, "step": 3368 }, { "epoch": 0.37, "grad_norm": 0.3830628841321665, "learning_rate": 2.905571456465153e-05, "loss": 0.091, "step": 3369 }, { "epoch": 0.37, "grad_norm": 0.43911866069836175, "learning_rate": 2.9049370910294143e-05, "loss": 0.0949, "step": 3370 }, { "epoch": 0.37, "grad_norm": 0.44958149734890546, "learning_rate": 2.90430261109512e-05, "loss": 0.0919, "step": 3371 }, { "epoch": 0.37, "grad_norm": 0.3955619869099262, "learning_rate": 2.9036680167425476e-05, "loss": 0.0699, "step": 3372 }, { "epoch": 0.37, "grad_norm": 0.3847020479142803, "learning_rate": 2.9030333080519913e-05, "loss": 0.0878, "step": 3373 }, { "epoch": 0.37, "grad_norm": 0.5157528920945456, "learning_rate": 2.902398485103758e-05, "loss": 0.1434, "step": 3374 }, { "epoch": 0.37, "grad_norm": 0.36910051804095106, "learning_rate": 2.90176354797817e-05, "loss": 0.0844, "step": 3375 }, { "epoch": 0.37, "grad_norm": 0.34696542489149695, "learning_rate": 2.901128496755564e-05, "loss": 0.0783, "step": 3376 }, { "epoch": 0.37, "grad_norm": 0.40574100593004825, "learning_rate": 2.9004933315162892e-05, "loss": 0.0803, "step": 3377 }, { "epoch": 0.37, "grad_norm": 0.42794517652719505, "learning_rate": 2.899858052340713e-05, "loss": 0.0961, "step": 3378 }, { "epoch": 0.37, "grad_norm": 0.4214190032013644, "learning_rate": 2.8992226593092135e-05, "loss": 0.1034, "step": 3379 }, { "epoch": 0.37, "grad_norm": 0.5084984474721068, "learning_rate": 2.8985871525021857e-05, "loss": 0.0966, "step": 3380 }, { "epoch": 0.37, "grad_norm": 0.3727625817120575, "learning_rate": 2.8979515320000374e-05, "loss": 0.0975, "step": 3381 }, { "epoch": 0.37, "grad_norm": 0.4605135341803006, "learning_rate": 2.897315797883192e-05, "loss": 0.1503, "step": 3382 }, { "epoch": 0.37, "grad_norm": 0.3570303309656247, "learning_rate": 2.8966799502320864e-05, "loss": 0.0766, "step": 3383 }, { "epoch": 0.37, "grad_norm": 0.4221254770786254, "learning_rate": 2.896043989127172e-05, "loss": 0.1072, "step": 3384 }, { "epoch": 0.37, "grad_norm": 0.4293559442481154, "learning_rate": 2.8954079146489155e-05, "loss": 0.1187, "step": 3385 }, { "epoch": 0.37, "grad_norm": 0.3774202052154274, "learning_rate": 2.8947717268777968e-05, "loss": 0.0841, "step": 3386 }, { "epoch": 0.37, "grad_norm": 0.45600114789931223, "learning_rate": 2.8941354258943106e-05, "loss": 0.107, "step": 3387 }, { "epoch": 0.37, "grad_norm": 0.49775256268441753, "learning_rate": 2.8934990117789658e-05, "loss": 0.1456, "step": 3388 }, { "epoch": 0.37, "grad_norm": 0.45716027940818854, "learning_rate": 2.8928624846122853e-05, "loss": 0.0761, "step": 3389 }, { "epoch": 0.37, "grad_norm": 0.43560224446282425, "learning_rate": 2.8922258444748074e-05, "loss": 0.0959, "step": 3390 }, { "epoch": 0.37, "grad_norm": 0.43425794368960496, "learning_rate": 2.8915890914470832e-05, "loss": 0.0985, "step": 3391 }, { "epoch": 0.37, "grad_norm": 0.41410210258103936, "learning_rate": 2.8909522256096795e-05, "loss": 0.096, "step": 3392 }, { "epoch": 0.37, "grad_norm": 0.4260304273072613, "learning_rate": 2.8903152470431762e-05, "loss": 0.0918, "step": 3393 }, { "epoch": 0.37, "grad_norm": 0.43745003142422706, "learning_rate": 2.8896781558281688e-05, "loss": 0.0948, "step": 3394 }, { "epoch": 0.37, "grad_norm": 0.3717033799638932, "learning_rate": 2.8890409520452657e-05, "loss": 0.076, "step": 3395 }, { "epoch": 0.37, "grad_norm": 0.4132785573485837, "learning_rate": 2.88840363577509e-05, "loss": 0.0864, "step": 3396 }, { "epoch": 0.37, "grad_norm": 0.41931848867259014, "learning_rate": 2.8877662070982783e-05, "loss": 0.0983, "step": 3397 }, { "epoch": 0.37, "grad_norm": 0.4257400737765631, "learning_rate": 2.8871286660954846e-05, "loss": 0.0826, "step": 3398 }, { "epoch": 0.37, "grad_norm": 0.3647279951923982, "learning_rate": 2.8864910128473725e-05, "loss": 0.0924, "step": 3399 }, { "epoch": 0.37, "grad_norm": 0.42229278505337503, "learning_rate": 2.8858532474346232e-05, "loss": 0.0712, "step": 3400 }, { "epoch": 0.37, "grad_norm": 0.4102022780976816, "learning_rate": 2.8852153699379305e-05, "loss": 0.0871, "step": 3401 }, { "epoch": 0.37, "grad_norm": 0.3721410128486554, "learning_rate": 2.8845773804380028e-05, "loss": 0.0822, "step": 3402 }, { "epoch": 0.37, "grad_norm": 0.45634594914325, "learning_rate": 2.8839392790155632e-05, "loss": 0.1305, "step": 3403 }, { "epoch": 0.37, "grad_norm": 0.4275111482516275, "learning_rate": 2.883301065751348e-05, "loss": 0.0877, "step": 3404 }, { "epoch": 0.37, "grad_norm": 0.41723550670313014, "learning_rate": 2.882662740726108e-05, "loss": 0.0802, "step": 3405 }, { "epoch": 0.37, "grad_norm": 0.37908156753473154, "learning_rate": 2.882024304020609e-05, "loss": 0.0671, "step": 3406 }, { "epoch": 0.37, "grad_norm": 0.39287973081997424, "learning_rate": 2.881385755715629e-05, "loss": 0.0918, "step": 3407 }, { "epoch": 0.37, "grad_norm": 0.4492151840731854, "learning_rate": 2.8807470958919626e-05, "loss": 0.0772, "step": 3408 }, { "epoch": 0.37, "grad_norm": 0.37803053188137514, "learning_rate": 2.880108324630417e-05, "loss": 0.0816, "step": 3409 }, { "epoch": 0.37, "grad_norm": 0.43195961259981386, "learning_rate": 2.8794694420118117e-05, "loss": 0.1074, "step": 3410 }, { "epoch": 0.37, "grad_norm": 0.3557425482017068, "learning_rate": 2.8788304481169854e-05, "loss": 0.075, "step": 3411 }, { "epoch": 0.37, "grad_norm": 0.43805074086940304, "learning_rate": 2.8781913430267857e-05, "loss": 0.094, "step": 3412 }, { "epoch": 0.37, "grad_norm": 0.3953387617351781, "learning_rate": 2.8775521268220772e-05, "loss": 0.0777, "step": 3413 }, { "epoch": 0.37, "grad_norm": 0.4246914065800988, "learning_rate": 2.876912799583737e-05, "loss": 0.0843, "step": 3414 }, { "epoch": 0.38, "grad_norm": 0.43593821029878677, "learning_rate": 2.8762733613926582e-05, "loss": 0.0955, "step": 3415 }, { "epoch": 0.38, "grad_norm": 0.608713402206091, "learning_rate": 2.8756338123297455e-05, "loss": 0.1669, "step": 3416 }, { "epoch": 0.38, "grad_norm": 0.3879497855594798, "learning_rate": 2.8749941524759194e-05, "loss": 0.0721, "step": 3417 }, { "epoch": 0.38, "grad_norm": 0.33576957808382996, "learning_rate": 2.8743543819121132e-05, "loss": 0.0916, "step": 3418 }, { "epoch": 0.38, "grad_norm": 0.3442734937040848, "learning_rate": 2.8737145007192756e-05, "loss": 0.0729, "step": 3419 }, { "epoch": 0.38, "grad_norm": 0.5710367542219306, "learning_rate": 2.8730745089783686e-05, "loss": 0.1457, "step": 3420 }, { "epoch": 0.38, "grad_norm": 0.4547104721558514, "learning_rate": 2.872434406770368e-05, "loss": 0.1238, "step": 3421 }, { "epoch": 0.38, "grad_norm": 0.41386860680258014, "learning_rate": 2.871794194176263e-05, "loss": 0.1044, "step": 3422 }, { "epoch": 0.38, "grad_norm": 0.35018108017739, "learning_rate": 2.8711538712770583e-05, "loss": 0.0761, "step": 3423 }, { "epoch": 0.38, "grad_norm": 0.3672851676099066, "learning_rate": 2.8705134381537718e-05, "loss": 0.077, "step": 3424 }, { "epoch": 0.38, "grad_norm": 0.346085482427561, "learning_rate": 2.8698728948874344e-05, "loss": 0.0721, "step": 3425 }, { "epoch": 0.38, "grad_norm": 0.4026717913422045, "learning_rate": 2.869232241559093e-05, "loss": 0.0994, "step": 3426 }, { "epoch": 0.38, "grad_norm": 0.381926269773148, "learning_rate": 2.868591478249806e-05, "loss": 0.068, "step": 3427 }, { "epoch": 0.38, "grad_norm": 0.4089908756005457, "learning_rate": 2.8679506050406475e-05, "loss": 0.1011, "step": 3428 }, { "epoch": 0.38, "grad_norm": 0.5281927832719631, "learning_rate": 2.8673096220127062e-05, "loss": 0.138, "step": 3429 }, { "epoch": 0.38, "grad_norm": 0.4583565659125651, "learning_rate": 2.8666685292470814e-05, "loss": 0.1121, "step": 3430 }, { "epoch": 0.38, "grad_norm": 0.43365182573569055, "learning_rate": 2.8660273268248894e-05, "loss": 0.1265, "step": 3431 }, { "epoch": 0.38, "grad_norm": 0.4293034484471088, "learning_rate": 2.8653860148272596e-05, "loss": 0.0943, "step": 3432 }, { "epoch": 0.38, "grad_norm": 0.5120178396909366, "learning_rate": 2.8647445933353342e-05, "loss": 0.1, "step": 3433 }, { "epoch": 0.38, "grad_norm": 0.4431122582706101, "learning_rate": 2.8641030624302704e-05, "loss": 0.1247, "step": 3434 }, { "epoch": 0.38, "grad_norm": 0.48175600561744814, "learning_rate": 2.8634614221932394e-05, "loss": 0.1187, "step": 3435 }, { "epoch": 0.38, "grad_norm": 0.40393398418428117, "learning_rate": 2.8628196727054244e-05, "loss": 0.0883, "step": 3436 }, { "epoch": 0.38, "grad_norm": 0.3767657827401431, "learning_rate": 2.8621778140480252e-05, "loss": 0.0675, "step": 3437 }, { "epoch": 0.38, "grad_norm": 0.4296220556185453, "learning_rate": 2.8615358463022533e-05, "loss": 0.1052, "step": 3438 }, { "epoch": 0.38, "grad_norm": 0.3682775550174626, "learning_rate": 2.8608937695493347e-05, "loss": 0.1028, "step": 3439 }, { "epoch": 0.38, "grad_norm": 0.3807212383455408, "learning_rate": 2.860251583870509e-05, "loss": 0.1011, "step": 3440 }, { "epoch": 0.38, "grad_norm": 0.42134723500450993, "learning_rate": 2.8596092893470296e-05, "loss": 0.0813, "step": 3441 }, { "epoch": 0.38, "grad_norm": 0.32172565884464516, "learning_rate": 2.8589668860601643e-05, "loss": 0.0878, "step": 3442 }, { "epoch": 0.38, "grad_norm": 0.4967773113546929, "learning_rate": 2.858324374091194e-05, "loss": 0.1284, "step": 3443 }, { "epoch": 0.38, "grad_norm": 0.3414099001307428, "learning_rate": 2.857681753521413e-05, "loss": 0.0778, "step": 3444 }, { "epoch": 0.38, "grad_norm": 0.4045005322253636, "learning_rate": 2.8570390244321303e-05, "loss": 0.0757, "step": 3445 }, { "epoch": 0.38, "grad_norm": 0.46888621258530566, "learning_rate": 2.856396186904669e-05, "loss": 0.0696, "step": 3446 }, { "epoch": 0.38, "grad_norm": 0.38386437839432436, "learning_rate": 2.8557532410203633e-05, "loss": 0.0714, "step": 3447 }, { "epoch": 0.38, "grad_norm": 0.3398009635036262, "learning_rate": 2.8551101868605644e-05, "loss": 0.0772, "step": 3448 }, { "epoch": 0.38, "grad_norm": 0.39659506093713975, "learning_rate": 2.8544670245066353e-05, "loss": 0.0751, "step": 3449 }, { "epoch": 0.38, "grad_norm": 0.4322757449914007, "learning_rate": 2.8538237540399528e-05, "loss": 0.1132, "step": 3450 }, { "epoch": 0.38, "grad_norm": 0.36379845011414, "learning_rate": 2.8531803755419082e-05, "loss": 0.0995, "step": 3451 }, { "epoch": 0.38, "grad_norm": 0.5417590490447135, "learning_rate": 2.8525368890939055e-05, "loss": 0.1388, "step": 3452 }, { "epoch": 0.38, "grad_norm": 0.3858068484422771, "learning_rate": 2.8518932947773625e-05, "loss": 0.0839, "step": 3453 }, { "epoch": 0.38, "grad_norm": 0.3646439047860147, "learning_rate": 2.851249592673712e-05, "loss": 0.0626, "step": 3454 }, { "epoch": 0.38, "grad_norm": 0.4154183483504243, "learning_rate": 2.850605782864398e-05, "loss": 0.0875, "step": 3455 }, { "epoch": 0.38, "grad_norm": 0.4361004753783307, "learning_rate": 2.8499618654308815e-05, "loss": 0.1194, "step": 3456 }, { "epoch": 0.38, "grad_norm": 0.5199713313012686, "learning_rate": 2.8493178404546334e-05, "loss": 0.131, "step": 3457 }, { "epoch": 0.38, "grad_norm": 0.3846459565712655, "learning_rate": 2.8486737080171405e-05, "loss": 0.0732, "step": 3458 }, { "epoch": 0.38, "grad_norm": 0.38434917161583537, "learning_rate": 2.8480294681999026e-05, "loss": 0.0728, "step": 3459 }, { "epoch": 0.38, "grad_norm": 0.31545425177668673, "learning_rate": 2.847385121084434e-05, "loss": 0.0712, "step": 3460 }, { "epoch": 0.38, "grad_norm": 0.37580723107630243, "learning_rate": 2.8467406667522597e-05, "loss": 0.095, "step": 3461 }, { "epoch": 0.38, "grad_norm": 0.36407093644088173, "learning_rate": 2.8460961052849222e-05, "loss": 0.085, "step": 3462 }, { "epoch": 0.38, "grad_norm": 0.4195142319848757, "learning_rate": 2.845451436763975e-05, "loss": 0.0899, "step": 3463 }, { "epoch": 0.38, "grad_norm": 0.3311234124341084, "learning_rate": 2.8448066612709854e-05, "loss": 0.0795, "step": 3464 }, { "epoch": 0.38, "grad_norm": 0.36759958072841836, "learning_rate": 2.8441617788875352e-05, "loss": 0.0738, "step": 3465 }, { "epoch": 0.38, "grad_norm": 0.4867884437542287, "learning_rate": 2.843516789695219e-05, "loss": 0.0874, "step": 3466 }, { "epoch": 0.38, "grad_norm": 0.49147631717274276, "learning_rate": 2.842871693775645e-05, "loss": 0.1025, "step": 3467 }, { "epoch": 0.38, "grad_norm": 0.37212141191715153, "learning_rate": 2.842226491210434e-05, "loss": 0.0721, "step": 3468 }, { "epoch": 0.38, "grad_norm": 0.555623782127599, "learning_rate": 2.8415811820812226e-05, "loss": 0.1224, "step": 3469 }, { "epoch": 0.38, "grad_norm": 0.42344359784116864, "learning_rate": 2.8409357664696585e-05, "loss": 0.0955, "step": 3470 }, { "epoch": 0.38, "grad_norm": 0.46745134704804603, "learning_rate": 2.8402902444574052e-05, "loss": 0.0928, "step": 3471 }, { "epoch": 0.38, "grad_norm": 0.5694713660469333, "learning_rate": 2.8396446161261372e-05, "loss": 0.1153, "step": 3472 }, { "epoch": 0.38, "grad_norm": 0.531193225295172, "learning_rate": 2.8389988815575443e-05, "loss": 0.1352, "step": 3473 }, { "epoch": 0.38, "grad_norm": 0.3936939797589597, "learning_rate": 2.8383530408333285e-05, "loss": 0.0901, "step": 3474 }, { "epoch": 0.38, "grad_norm": 0.4029800050782596, "learning_rate": 2.8377070940352066e-05, "loss": 0.0945, "step": 3475 }, { "epoch": 0.38, "grad_norm": 0.37695077292543594, "learning_rate": 2.8370610412449066e-05, "loss": 0.0938, "step": 3476 }, { "epoch": 0.38, "grad_norm": 0.3364952691101305, "learning_rate": 2.8364148825441725e-05, "loss": 0.0674, "step": 3477 }, { "epoch": 0.38, "grad_norm": 0.4415806451134829, "learning_rate": 2.8357686180147604e-05, "loss": 0.107, "step": 3478 }, { "epoch": 0.38, "grad_norm": 0.4453857093998799, "learning_rate": 2.83512224773844e-05, "loss": 0.0994, "step": 3479 }, { "epoch": 0.38, "grad_norm": 0.342291577506136, "learning_rate": 2.834475771796993e-05, "loss": 0.067, "step": 3480 }, { "epoch": 0.38, "grad_norm": 0.535475670374912, "learning_rate": 2.833829190272218e-05, "loss": 0.1448, "step": 3481 }, { "epoch": 0.38, "grad_norm": 0.3911197009375313, "learning_rate": 2.8331825032459228e-05, "loss": 0.0995, "step": 3482 }, { "epoch": 0.38, "grad_norm": 0.36371226019911224, "learning_rate": 2.832535710799931e-05, "loss": 0.094, "step": 3483 }, { "epoch": 0.38, "grad_norm": 0.4007534523111423, "learning_rate": 2.8318888130160796e-05, "loss": 0.0827, "step": 3484 }, { "epoch": 0.38, "grad_norm": 0.3752636777200926, "learning_rate": 2.831241809976218e-05, "loss": 0.0828, "step": 3485 }, { "epoch": 0.38, "grad_norm": 0.3747609687978747, "learning_rate": 2.830594701762209e-05, "loss": 0.0797, "step": 3486 }, { "epoch": 0.38, "grad_norm": 0.3972239619700844, "learning_rate": 2.829947488455929e-05, "loss": 0.086, "step": 3487 }, { "epoch": 0.38, "grad_norm": 0.4071037418295652, "learning_rate": 2.8293001701392677e-05, "loss": 0.1054, "step": 3488 }, { "epoch": 0.38, "grad_norm": 0.4251344825642706, "learning_rate": 2.828652746894129e-05, "loss": 0.0913, "step": 3489 }, { "epoch": 0.38, "grad_norm": 0.3226936272499918, "learning_rate": 2.828005218802427e-05, "loss": 0.0739, "step": 3490 }, { "epoch": 0.38, "grad_norm": 0.33636715493043123, "learning_rate": 2.8273575859460935e-05, "loss": 0.0833, "step": 3491 }, { "epoch": 0.38, "grad_norm": 0.44709308077996146, "learning_rate": 2.8267098484070693e-05, "loss": 0.1168, "step": 3492 }, { "epoch": 0.38, "grad_norm": 0.4165290028925522, "learning_rate": 2.826062006267312e-05, "loss": 0.0822, "step": 3493 }, { "epoch": 0.38, "grad_norm": 0.5093707312656639, "learning_rate": 2.8254140596087897e-05, "loss": 0.1448, "step": 3494 }, { "epoch": 0.38, "grad_norm": 0.42914811004716735, "learning_rate": 2.8247660085134856e-05, "loss": 0.114, "step": 3495 }, { "epoch": 0.38, "grad_norm": 0.4143769311931827, "learning_rate": 2.8241178530633947e-05, "loss": 0.1122, "step": 3496 }, { "epoch": 0.38, "grad_norm": 0.5156118217864979, "learning_rate": 2.8234695933405255e-05, "loss": 0.1163, "step": 3497 }, { "epoch": 0.38, "grad_norm": 0.4423480493895119, "learning_rate": 2.822821229426902e-05, "loss": 0.1216, "step": 3498 }, { "epoch": 0.38, "grad_norm": 0.4392958635172358, "learning_rate": 2.8221727614045576e-05, "loss": 0.1143, "step": 3499 }, { "epoch": 0.38, "grad_norm": 0.4309899963038868, "learning_rate": 2.8215241893555415e-05, "loss": 0.089, "step": 3500 }, { "epoch": 0.38, "grad_norm": 0.5637227913746374, "learning_rate": 2.8208755133619146e-05, "loss": 0.1337, "step": 3501 }, { "epoch": 0.38, "grad_norm": 0.3888968770118791, "learning_rate": 2.8202267335057522e-05, "loss": 0.0901, "step": 3502 }, { "epoch": 0.38, "grad_norm": 0.3692297247677406, "learning_rate": 2.8195778498691422e-05, "loss": 0.0698, "step": 3503 }, { "epoch": 0.38, "grad_norm": 0.41266208123128156, "learning_rate": 2.818928862534185e-05, "loss": 0.0679, "step": 3504 }, { "epoch": 0.38, "grad_norm": 0.44760630264050744, "learning_rate": 2.8182797715829955e-05, "loss": 0.1241, "step": 3505 }, { "epoch": 0.39, "grad_norm": 0.4358505934907392, "learning_rate": 2.817630577097701e-05, "loss": 0.1135, "step": 3506 }, { "epoch": 0.39, "grad_norm": 0.4025473033463134, "learning_rate": 2.816981279160441e-05, "loss": 0.0857, "step": 3507 }, { "epoch": 0.39, "grad_norm": 0.47481516705289056, "learning_rate": 2.8163318778533692e-05, "loss": 0.1109, "step": 3508 }, { "epoch": 0.39, "grad_norm": 0.3885089973883152, "learning_rate": 2.8156823732586525e-05, "loss": 0.0927, "step": 3509 }, { "epoch": 0.39, "grad_norm": 0.4640449263716323, "learning_rate": 2.81503276545847e-05, "loss": 0.1114, "step": 3510 }, { "epoch": 0.39, "grad_norm": 0.3766046405496704, "learning_rate": 2.8143830545350147e-05, "loss": 0.0893, "step": 3511 }, { "epoch": 0.39, "grad_norm": 0.4300228890714434, "learning_rate": 2.8137332405704922e-05, "loss": 0.0919, "step": 3512 }, { "epoch": 0.39, "grad_norm": 0.36432360752886406, "learning_rate": 2.8130833236471208e-05, "loss": 0.0949, "step": 3513 }, { "epoch": 0.39, "grad_norm": 0.35784830389005967, "learning_rate": 2.812433303847133e-05, "loss": 0.0772, "step": 3514 }, { "epoch": 0.39, "grad_norm": 0.3346223704940486, "learning_rate": 2.8117831812527724e-05, "loss": 0.0732, "step": 3515 }, { "epoch": 0.39, "grad_norm": 0.46472517472664593, "learning_rate": 2.811132955946298e-05, "loss": 0.099, "step": 3516 }, { "epoch": 0.39, "grad_norm": 0.4325094670158144, "learning_rate": 2.8104826280099796e-05, "loss": 0.1041, "step": 3517 }, { "epoch": 0.39, "grad_norm": 0.42947699852592836, "learning_rate": 2.8098321975261026e-05, "loss": 0.075, "step": 3518 }, { "epoch": 0.39, "grad_norm": 0.4099711772626729, "learning_rate": 2.809181664576961e-05, "loss": 0.0883, "step": 3519 }, { "epoch": 0.39, "grad_norm": 0.44197976992126387, "learning_rate": 2.8085310292448666e-05, "loss": 0.1066, "step": 3520 }, { "epoch": 0.39, "grad_norm": 0.48493875490447325, "learning_rate": 2.807880291612141e-05, "loss": 0.1141, "step": 3521 }, { "epoch": 0.39, "grad_norm": 0.48675934697264356, "learning_rate": 2.8072294517611208e-05, "loss": 0.0963, "step": 3522 }, { "epoch": 0.39, "grad_norm": 0.39768477002228564, "learning_rate": 2.8065785097741538e-05, "loss": 0.0885, "step": 3523 }, { "epoch": 0.39, "grad_norm": 0.3712355694069428, "learning_rate": 2.805927465733601e-05, "loss": 0.0895, "step": 3524 }, { "epoch": 0.39, "grad_norm": 0.31483897827924573, "learning_rate": 2.8052763197218376e-05, "loss": 0.0643, "step": 3525 }, { "epoch": 0.39, "grad_norm": 0.3578252548543086, "learning_rate": 2.8046250718212507e-05, "loss": 0.0712, "step": 3526 }, { "epoch": 0.39, "grad_norm": 0.4630772017447761, "learning_rate": 2.8039737221142403e-05, "loss": 0.1017, "step": 3527 }, { "epoch": 0.39, "grad_norm": 0.3342574269376214, "learning_rate": 2.8033222706832187e-05, "loss": 0.0738, "step": 3528 }, { "epoch": 0.39, "grad_norm": 0.4917352960528285, "learning_rate": 2.802670717610613e-05, "loss": 0.1216, "step": 3529 }, { "epoch": 0.39, "grad_norm": 0.40694144900278795, "learning_rate": 2.802019062978861e-05, "loss": 0.0919, "step": 3530 }, { "epoch": 0.39, "grad_norm": 0.3887436383576269, "learning_rate": 2.8013673068704156e-05, "loss": 0.0717, "step": 3531 }, { "epoch": 0.39, "grad_norm": 0.4490751617494209, "learning_rate": 2.80071544936774e-05, "loss": 0.0947, "step": 3532 }, { "epoch": 0.39, "grad_norm": 0.42147189767945803, "learning_rate": 2.8000634905533116e-05, "loss": 0.0826, "step": 3533 }, { "epoch": 0.39, "grad_norm": 0.3544186812765945, "learning_rate": 2.7994114305096208e-05, "loss": 0.087, "step": 3534 }, { "epoch": 0.39, "grad_norm": 0.36138494921103986, "learning_rate": 2.7987592693191707e-05, "loss": 0.0795, "step": 3535 }, { "epoch": 0.39, "grad_norm": 0.4038858397471632, "learning_rate": 2.7981070070644764e-05, "loss": 0.1127, "step": 3536 }, { "epoch": 0.39, "grad_norm": 0.4199915273697348, "learning_rate": 2.797454643828067e-05, "loss": 0.0908, "step": 3537 }, { "epoch": 0.39, "grad_norm": 0.46442943028100175, "learning_rate": 2.7968021796924834e-05, "loss": 0.1366, "step": 3538 }, { "epoch": 0.39, "grad_norm": 0.4939540767231532, "learning_rate": 2.796149614740279e-05, "loss": 0.125, "step": 3539 }, { "epoch": 0.39, "grad_norm": 0.39145042946743575, "learning_rate": 2.7954969490540223e-05, "loss": 0.0788, "step": 3540 }, { "epoch": 0.39, "grad_norm": 0.4113374685474274, "learning_rate": 2.7948441827162917e-05, "loss": 0.0945, "step": 3541 }, { "epoch": 0.39, "grad_norm": 0.47296766000602547, "learning_rate": 2.7941913158096792e-05, "loss": 0.09, "step": 3542 }, { "epoch": 0.39, "grad_norm": 0.31737148558138323, "learning_rate": 2.7935383484167906e-05, "loss": 0.054, "step": 3543 }, { "epoch": 0.39, "grad_norm": 0.3961445521894762, "learning_rate": 2.7928852806202424e-05, "loss": 0.0755, "step": 3544 }, { "epoch": 0.39, "grad_norm": 0.3797827149928469, "learning_rate": 2.7922321125026667e-05, "loss": 0.0722, "step": 3545 }, { "epoch": 0.39, "grad_norm": 0.48141437609090587, "learning_rate": 2.7915788441467052e-05, "loss": 0.0998, "step": 3546 }, { "epoch": 0.39, "grad_norm": 0.45212200573643446, "learning_rate": 2.7909254756350143e-05, "loss": 0.0851, "step": 3547 }, { "epoch": 0.39, "grad_norm": 0.4762996667960886, "learning_rate": 2.790272007050262e-05, "loss": 0.0996, "step": 3548 }, { "epoch": 0.39, "grad_norm": 0.3814486712939259, "learning_rate": 2.78961843847513e-05, "loss": 0.0875, "step": 3549 }, { "epoch": 0.39, "grad_norm": 0.4990424285081931, "learning_rate": 2.7889647699923114e-05, "loss": 0.1049, "step": 3550 }, { "epoch": 0.39, "grad_norm": 0.3949896538260701, "learning_rate": 2.788311001684514e-05, "loss": 0.0889, "step": 3551 }, { "epoch": 0.39, "grad_norm": 0.3208288125343036, "learning_rate": 2.7876571336344546e-05, "loss": 0.053, "step": 3552 }, { "epoch": 0.39, "grad_norm": 0.3813343119026568, "learning_rate": 2.7870031659248674e-05, "loss": 0.0964, "step": 3553 }, { "epoch": 0.39, "grad_norm": 0.4067905571031011, "learning_rate": 2.7863490986384945e-05, "loss": 0.0893, "step": 3554 }, { "epoch": 0.39, "grad_norm": 0.3837801905569949, "learning_rate": 2.7856949318580938e-05, "loss": 0.0824, "step": 3555 }, { "epoch": 0.39, "grad_norm": 0.6595342742048063, "learning_rate": 2.7850406656664346e-05, "loss": 0.1266, "step": 3556 }, { "epoch": 0.39, "grad_norm": 0.4127501437315757, "learning_rate": 2.784386300146299e-05, "loss": 0.0852, "step": 3557 }, { "epoch": 0.39, "grad_norm": 0.49301509306845137, "learning_rate": 2.783731835380482e-05, "loss": 0.1132, "step": 3558 }, { "epoch": 0.39, "grad_norm": 0.3952460659128091, "learning_rate": 2.7830772714517897e-05, "loss": 0.0827, "step": 3559 }, { "epoch": 0.39, "grad_norm": 0.3830156176684609, "learning_rate": 2.782422608443043e-05, "loss": 0.0728, "step": 3560 }, { "epoch": 0.39, "grad_norm": 0.3346835620053629, "learning_rate": 2.7817678464370725e-05, "loss": 0.0653, "step": 3561 }, { "epoch": 0.39, "grad_norm": 0.42750299196144387, "learning_rate": 2.781112985516725e-05, "loss": 0.0948, "step": 3562 }, { "epoch": 0.39, "grad_norm": 0.4892119121530535, "learning_rate": 2.7804580257648564e-05, "loss": 0.117, "step": 3563 }, { "epoch": 0.39, "grad_norm": 0.4595612816464375, "learning_rate": 2.7798029672643375e-05, "loss": 0.0974, "step": 3564 }, { "epoch": 0.39, "grad_norm": 0.4478804680886803, "learning_rate": 2.7791478100980486e-05, "loss": 0.1058, "step": 3565 }, { "epoch": 0.39, "grad_norm": 0.37482116329046383, "learning_rate": 2.778492554348887e-05, "loss": 0.0794, "step": 3566 }, { "epoch": 0.39, "grad_norm": 0.45620239937049717, "learning_rate": 2.7778372000997583e-05, "loss": 0.0849, "step": 3567 }, { "epoch": 0.39, "grad_norm": 0.4994403568129309, "learning_rate": 2.7771817474335835e-05, "loss": 0.1245, "step": 3568 }, { "epoch": 0.39, "grad_norm": 0.4417694083258809, "learning_rate": 2.7765261964332926e-05, "loss": 0.1028, "step": 3569 }, { "epoch": 0.39, "grad_norm": 0.3957806435033321, "learning_rate": 2.7758705471818327e-05, "loss": 0.0684, "step": 3570 }, { "epoch": 0.39, "grad_norm": 0.38470979469454475, "learning_rate": 2.775214799762159e-05, "loss": 0.076, "step": 3571 }, { "epoch": 0.39, "grad_norm": 0.3069180005512638, "learning_rate": 2.7745589542572424e-05, "loss": 0.0798, "step": 3572 }, { "epoch": 0.39, "grad_norm": 0.3998921152001471, "learning_rate": 2.773903010750063e-05, "loss": 0.0858, "step": 3573 }, { "epoch": 0.39, "grad_norm": 0.3444286280101821, "learning_rate": 2.7732469693236166e-05, "loss": 0.0809, "step": 3574 }, { "epoch": 0.39, "grad_norm": 0.4927372964917862, "learning_rate": 2.772590830060909e-05, "loss": 0.123, "step": 3575 }, { "epoch": 0.39, "grad_norm": 0.4993873956070473, "learning_rate": 2.77193459304496e-05, "loss": 0.1088, "step": 3576 }, { "epoch": 0.39, "grad_norm": 0.4169589884851231, "learning_rate": 2.7712782583588002e-05, "loss": 0.0635, "step": 3577 }, { "epoch": 0.39, "grad_norm": 0.3985956663461733, "learning_rate": 2.7706218260854738e-05, "loss": 0.0815, "step": 3578 }, { "epoch": 0.39, "grad_norm": 0.4176896491434237, "learning_rate": 2.7699652963080365e-05, "loss": 0.1026, "step": 3579 }, { "epoch": 0.39, "grad_norm": 0.4340718528832963, "learning_rate": 2.7693086691095573e-05, "loss": 0.0828, "step": 3580 }, { "epoch": 0.39, "grad_norm": 0.44624847628229125, "learning_rate": 2.7686519445731165e-05, "loss": 0.1099, "step": 3581 }, { "epoch": 0.39, "grad_norm": 0.38683835501590985, "learning_rate": 2.767995122781807e-05, "loss": 0.0821, "step": 3582 }, { "epoch": 0.39, "grad_norm": 1.3828269828425894, "learning_rate": 2.7673382038187355e-05, "loss": 0.1727, "step": 3583 }, { "epoch": 0.39, "grad_norm": 0.4187759021320502, "learning_rate": 2.7666811877670177e-05, "loss": 0.0807, "step": 3584 }, { "epoch": 0.39, "grad_norm": 0.4394913211753281, "learning_rate": 2.7660240747097858e-05, "loss": 0.0591, "step": 3585 }, { "epoch": 0.39, "grad_norm": 0.3907763760036637, "learning_rate": 2.7653668647301797e-05, "loss": 0.0878, "step": 3586 }, { "epoch": 0.39, "grad_norm": 0.37151942684418104, "learning_rate": 2.7647095579113554e-05, "loss": 0.0751, "step": 3587 }, { "epoch": 0.39, "grad_norm": 0.3795652263285278, "learning_rate": 2.7640521543364797e-05, "loss": 0.0962, "step": 3588 }, { "epoch": 0.39, "grad_norm": 0.45918853764217504, "learning_rate": 2.7633946540887307e-05, "loss": 0.1022, "step": 3589 }, { "epoch": 0.39, "grad_norm": 0.3847690717668627, "learning_rate": 2.7627370572513005e-05, "loss": 0.0742, "step": 3590 }, { "epoch": 0.39, "grad_norm": 0.450988980168159, "learning_rate": 2.7620793639073917e-05, "loss": 0.0949, "step": 3591 }, { "epoch": 0.39, "grad_norm": 0.4345922228829612, "learning_rate": 2.7614215741402204e-05, "loss": 0.1069, "step": 3592 }, { "epoch": 0.39, "grad_norm": 0.453537227303409, "learning_rate": 2.760763688033015e-05, "loss": 0.086, "step": 3593 }, { "epoch": 0.39, "grad_norm": 0.3166807302794481, "learning_rate": 2.7601057056690148e-05, "loss": 0.0603, "step": 3594 }, { "epoch": 0.39, "grad_norm": 0.42196853801657275, "learning_rate": 2.7594476271314725e-05, "loss": 0.0799, "step": 3595 }, { "epoch": 0.39, "grad_norm": 0.4255819816808162, "learning_rate": 2.7587894525036517e-05, "loss": 0.0964, "step": 3596 }, { "epoch": 0.4, "grad_norm": 0.35040990719000464, "learning_rate": 2.75813118186883e-05, "loss": 0.0596, "step": 3597 }, { "epoch": 0.4, "grad_norm": 0.43195369638317926, "learning_rate": 2.7574728153102956e-05, "loss": 0.097, "step": 3598 }, { "epoch": 0.4, "grad_norm": 0.37971084742720335, "learning_rate": 2.756814352911349e-05, "loss": 0.0845, "step": 3599 }, { "epoch": 0.4, "grad_norm": 0.39909323792756657, "learning_rate": 2.7561557947553037e-05, "loss": 0.0783, "step": 3600 }, { "epoch": 0.4, "grad_norm": 0.36411122908711735, "learning_rate": 2.7554971409254848e-05, "loss": 0.0633, "step": 3601 }, { "epoch": 0.4, "grad_norm": 0.4875586388851573, "learning_rate": 2.7548383915052287e-05, "loss": 0.1089, "step": 3602 }, { "epoch": 0.4, "grad_norm": 0.47123748079664823, "learning_rate": 2.7541795465778865e-05, "loss": 0.1105, "step": 3603 }, { "epoch": 0.4, "grad_norm": 0.3795529971089713, "learning_rate": 2.7535206062268174e-05, "loss": 0.0914, "step": 3604 }, { "epoch": 0.4, "grad_norm": 0.4340650728944526, "learning_rate": 2.752861570535396e-05, "loss": 0.0842, "step": 3605 }, { "epoch": 0.4, "grad_norm": 0.4889310861328, "learning_rate": 2.7522024395870075e-05, "loss": 0.1004, "step": 3606 }, { "epoch": 0.4, "grad_norm": 0.48759925516555136, "learning_rate": 2.7515432134650502e-05, "loss": 0.0932, "step": 3607 }, { "epoch": 0.4, "grad_norm": 0.4937229124920279, "learning_rate": 2.7508838922529316e-05, "loss": 0.1019, "step": 3608 }, { "epoch": 0.4, "grad_norm": 0.3846958116428314, "learning_rate": 2.750224476034076e-05, "loss": 0.0907, "step": 3609 }, { "epoch": 0.4, "grad_norm": 0.38047751821953774, "learning_rate": 2.7495649648919153e-05, "loss": 0.0889, "step": 3610 }, { "epoch": 0.4, "grad_norm": 0.49451134085692566, "learning_rate": 2.7489053589098966e-05, "loss": 0.1178, "step": 3611 }, { "epoch": 0.4, "grad_norm": 0.43259050962974316, "learning_rate": 2.7482456581714757e-05, "loss": 0.1086, "step": 3612 }, { "epoch": 0.4, "grad_norm": 0.394322519513368, "learning_rate": 2.747585862760124e-05, "loss": 0.0902, "step": 3613 }, { "epoch": 0.4, "grad_norm": 0.5170476246025132, "learning_rate": 2.7469259727593213e-05, "loss": 0.113, "step": 3614 }, { "epoch": 0.4, "grad_norm": 0.39153336765941504, "learning_rate": 2.746265988252563e-05, "loss": 0.0924, "step": 3615 }, { "epoch": 0.4, "grad_norm": 0.3767012510661694, "learning_rate": 2.7456059093233537e-05, "loss": 0.0764, "step": 3616 }, { "epoch": 0.4, "grad_norm": 0.44122173875317494, "learning_rate": 2.7449457360552108e-05, "loss": 0.1075, "step": 3617 }, { "epoch": 0.4, "grad_norm": 0.39906721281108154, "learning_rate": 2.7442854685316643e-05, "loss": 0.1019, "step": 3618 }, { "epoch": 0.4, "grad_norm": 0.38799180385201243, "learning_rate": 2.7436251068362555e-05, "loss": 0.066, "step": 3619 }, { "epoch": 0.4, "grad_norm": 0.33345881598640764, "learning_rate": 2.7429646510525373e-05, "loss": 0.0645, "step": 3620 }, { "epoch": 0.4, "grad_norm": 0.367687959709963, "learning_rate": 2.7423041012640744e-05, "loss": 0.0933, "step": 3621 }, { "epoch": 0.4, "grad_norm": 0.3662568940890612, "learning_rate": 2.7416434575544455e-05, "loss": 0.0625, "step": 3622 }, { "epoch": 0.4, "grad_norm": 0.448969664139347, "learning_rate": 2.7409827200072377e-05, "loss": 0.1064, "step": 3623 }, { "epoch": 0.4, "grad_norm": 0.42573111352676835, "learning_rate": 2.7403218887060538e-05, "loss": 0.0862, "step": 3624 }, { "epoch": 0.4, "grad_norm": 0.43705566867206613, "learning_rate": 2.7396609637345038e-05, "loss": 0.0893, "step": 3625 }, { "epoch": 0.4, "grad_norm": 0.36062537234599495, "learning_rate": 2.738999945176215e-05, "loss": 0.074, "step": 3626 }, { "epoch": 0.4, "grad_norm": 0.4499766549041885, "learning_rate": 2.7383388331148225e-05, "loss": 0.0996, "step": 3627 }, { "epoch": 0.4, "grad_norm": 0.44231468164461946, "learning_rate": 2.7376776276339745e-05, "loss": 0.093, "step": 3628 }, { "epoch": 0.4, "grad_norm": 0.3355116584780354, "learning_rate": 2.7370163288173314e-05, "loss": 0.0829, "step": 3629 }, { "epoch": 0.4, "grad_norm": 0.3392438636420186, "learning_rate": 2.7363549367485648e-05, "loss": 0.0623, "step": 3630 }, { "epoch": 0.4, "grad_norm": 0.39994563985041476, "learning_rate": 2.7356934515113582e-05, "loss": 0.0855, "step": 3631 }, { "epoch": 0.4, "grad_norm": 0.47382107168861803, "learning_rate": 2.7350318731894075e-05, "loss": 0.113, "step": 3632 }, { "epoch": 0.4, "grad_norm": 0.35320911502800384, "learning_rate": 2.7343702018664194e-05, "loss": 0.0683, "step": 3633 }, { "epoch": 0.4, "grad_norm": 0.34911172494121734, "learning_rate": 2.7337084376261135e-05, "loss": 0.0835, "step": 3634 }, { "epoch": 0.4, "grad_norm": 0.44526797624162595, "learning_rate": 2.7330465805522196e-05, "loss": 0.0881, "step": 3635 }, { "epoch": 0.4, "grad_norm": 0.39307455682300707, "learning_rate": 2.7323846307284814e-05, "loss": 0.0761, "step": 3636 }, { "epoch": 0.4, "grad_norm": 0.35096955629381305, "learning_rate": 2.731722588238652e-05, "loss": 0.0749, "step": 3637 }, { "epoch": 0.4, "grad_norm": 0.3916129208104275, "learning_rate": 2.7310604531664983e-05, "loss": 0.0915, "step": 3638 }, { "epoch": 0.4, "grad_norm": 0.35182089316771875, "learning_rate": 2.7303982255957967e-05, "loss": 0.0781, "step": 3639 }, { "epoch": 0.4, "grad_norm": 0.3938062067635411, "learning_rate": 2.7297359056103378e-05, "loss": 0.0937, "step": 3640 }, { "epoch": 0.4, "grad_norm": 0.3947547996874248, "learning_rate": 2.729073493293922e-05, "loss": 0.0682, "step": 3641 }, { "epoch": 0.4, "grad_norm": 0.37419170449148553, "learning_rate": 2.7284109887303628e-05, "loss": 0.0972, "step": 3642 }, { "epoch": 0.4, "grad_norm": 0.5598878342404536, "learning_rate": 2.7277483920034832e-05, "loss": 0.1632, "step": 3643 }, { "epoch": 0.4, "grad_norm": 0.39484498797072787, "learning_rate": 2.7270857031971203e-05, "loss": 0.075, "step": 3644 }, { "epoch": 0.4, "grad_norm": 0.43548780305843615, "learning_rate": 2.7264229223951217e-05, "loss": 0.0971, "step": 3645 }, { "epoch": 0.4, "grad_norm": 0.5114433479315054, "learning_rate": 2.7257600496813475e-05, "loss": 0.1145, "step": 3646 }, { "epoch": 0.4, "grad_norm": 0.477165918892703, "learning_rate": 2.725097085139667e-05, "loss": 0.1083, "step": 3647 }, { "epoch": 0.4, "grad_norm": 0.3972147148466966, "learning_rate": 2.7244340288539638e-05, "loss": 0.0736, "step": 3648 }, { "epoch": 0.4, "grad_norm": 0.36628622679839784, "learning_rate": 2.723770880908132e-05, "loss": 0.0664, "step": 3649 }, { "epoch": 0.4, "grad_norm": 0.4688244283715075, "learning_rate": 2.7231076413860774e-05, "loss": 0.0961, "step": 3650 }, { "epoch": 0.4, "grad_norm": 0.3720494462217135, "learning_rate": 2.7224443103717173e-05, "loss": 0.0569, "step": 3651 }, { "epoch": 0.4, "grad_norm": 0.33484646590035255, "learning_rate": 2.721780887948981e-05, "loss": 0.0658, "step": 3652 }, { "epoch": 0.4, "grad_norm": 0.42390671454072687, "learning_rate": 2.7211173742018088e-05, "loss": 0.0882, "step": 3653 }, { "epoch": 0.4, "grad_norm": 0.3600978630219675, "learning_rate": 2.7204537692141526e-05, "loss": 0.0981, "step": 3654 }, { "epoch": 0.4, "grad_norm": 0.39346598342554273, "learning_rate": 2.7197900730699768e-05, "loss": 0.0791, "step": 3655 }, { "epoch": 0.4, "grad_norm": 0.4118791814137419, "learning_rate": 2.7191262858532552e-05, "loss": 0.0897, "step": 3656 }, { "epoch": 0.4, "grad_norm": 0.5253493282155162, "learning_rate": 2.7184624076479763e-05, "loss": 0.1185, "step": 3657 }, { "epoch": 0.4, "grad_norm": 0.3058321798226607, "learning_rate": 2.7177984385381366e-05, "loss": 0.0671, "step": 3658 }, { "epoch": 0.4, "grad_norm": 0.41925439348233295, "learning_rate": 2.7171343786077468e-05, "loss": 0.0879, "step": 3659 }, { "epoch": 0.4, "grad_norm": 0.44979976729212706, "learning_rate": 2.7164702279408275e-05, "loss": 0.0756, "step": 3660 }, { "epoch": 0.4, "grad_norm": 0.6790933752786962, "learning_rate": 2.7158059866214118e-05, "loss": 0.08, "step": 3661 }, { "epoch": 0.4, "grad_norm": 0.34682940621622094, "learning_rate": 2.715141654733544e-05, "loss": 0.0672, "step": 3662 }, { "epoch": 0.4, "grad_norm": 0.4167402520369813, "learning_rate": 2.7144772323612798e-05, "loss": 0.1162, "step": 3663 }, { "epoch": 0.4, "grad_norm": 0.4755558889558451, "learning_rate": 2.7138127195886856e-05, "loss": 0.1089, "step": 3664 }, { "epoch": 0.4, "grad_norm": 0.4554168030313123, "learning_rate": 2.7131481164998407e-05, "loss": 0.0773, "step": 3665 }, { "epoch": 0.4, "grad_norm": 0.3734506748381317, "learning_rate": 2.712483423178834e-05, "loss": 0.0866, "step": 3666 }, { "epoch": 0.4, "grad_norm": 0.33750662179031643, "learning_rate": 2.711818639709768e-05, "loss": 0.065, "step": 3667 }, { "epoch": 0.4, "grad_norm": 0.33147065577728335, "learning_rate": 2.7111537661767537e-05, "loss": 0.0691, "step": 3668 }, { "epoch": 0.4, "grad_norm": 0.4368118937459918, "learning_rate": 2.7104888026639175e-05, "loss": 0.0777, "step": 3669 }, { "epoch": 0.4, "grad_norm": 0.4076485402908683, "learning_rate": 2.7098237492553937e-05, "loss": 0.0809, "step": 3670 }, { "epoch": 0.4, "grad_norm": 0.4115515273419336, "learning_rate": 2.7091586060353298e-05, "loss": 0.1022, "step": 3671 }, { "epoch": 0.4, "grad_norm": 0.3655827946226697, "learning_rate": 2.7084933730878824e-05, "loss": 0.0868, "step": 3672 }, { "epoch": 0.4, "grad_norm": 0.3714646142623628, "learning_rate": 2.7078280504972237e-05, "loss": 0.074, "step": 3673 }, { "epoch": 0.4, "grad_norm": 0.43147825406312484, "learning_rate": 2.7071626383475327e-05, "loss": 0.1109, "step": 3674 }, { "epoch": 0.4, "grad_norm": 0.3604714212372367, "learning_rate": 2.7064971367230023e-05, "loss": 0.08, "step": 3675 }, { "epoch": 0.4, "grad_norm": 0.3760598622971818, "learning_rate": 2.7058315457078358e-05, "loss": 0.088, "step": 3676 }, { "epoch": 0.4, "grad_norm": 0.3429940863876655, "learning_rate": 2.7051658653862488e-05, "loss": 0.0647, "step": 3677 }, { "epoch": 0.4, "grad_norm": 0.38157090455092857, "learning_rate": 2.7045000958424674e-05, "loss": 0.0799, "step": 3678 }, { "epoch": 0.4, "grad_norm": 0.41507291767940063, "learning_rate": 2.7038342371607282e-05, "loss": 0.1151, "step": 3679 }, { "epoch": 0.4, "grad_norm": 0.48991045946299666, "learning_rate": 2.7031682894252816e-05, "loss": 0.1091, "step": 3680 }, { "epoch": 0.4, "grad_norm": 0.402193856326176, "learning_rate": 2.702502252720386e-05, "loss": 0.0845, "step": 3681 }, { "epoch": 0.4, "grad_norm": 0.33391212217230803, "learning_rate": 2.701836127130314e-05, "loss": 0.0732, "step": 3682 }, { "epoch": 0.4, "grad_norm": 0.3626760548546362, "learning_rate": 2.7011699127393476e-05, "loss": 0.0528, "step": 3683 }, { "epoch": 0.4, "grad_norm": 0.4059666232145197, "learning_rate": 2.7005036096317802e-05, "loss": 0.1133, "step": 3684 }, { "epoch": 0.4, "grad_norm": 0.3472857161589751, "learning_rate": 2.6998372178919175e-05, "loss": 0.0752, "step": 3685 }, { "epoch": 0.4, "grad_norm": 0.3599597071907598, "learning_rate": 2.6991707376040755e-05, "loss": 0.0814, "step": 3686 }, { "epoch": 0.4, "grad_norm": 0.48472505656375875, "learning_rate": 2.6985041688525814e-05, "loss": 0.107, "step": 3687 }, { "epoch": 0.41, "grad_norm": 0.41989417761013553, "learning_rate": 2.6978375117217743e-05, "loss": 0.078, "step": 3688 }, { "epoch": 0.41, "grad_norm": 0.3536485243045561, "learning_rate": 2.6971707662960037e-05, "loss": 0.0756, "step": 3689 }, { "epoch": 0.41, "grad_norm": 0.4151581896189963, "learning_rate": 2.696503932659631e-05, "loss": 0.089, "step": 3690 }, { "epoch": 0.41, "grad_norm": 0.395437804254397, "learning_rate": 2.6958370108970274e-05, "loss": 0.0812, "step": 3691 }, { "epoch": 0.41, "grad_norm": 0.2876587108318242, "learning_rate": 2.6951700010925774e-05, "loss": 0.0573, "step": 3692 }, { "epoch": 0.41, "grad_norm": 0.42486186026456246, "learning_rate": 2.6945029033306745e-05, "loss": 0.0846, "step": 3693 }, { "epoch": 0.41, "grad_norm": 0.7010630573638799, "learning_rate": 2.6938357176957243e-05, "loss": 0.1485, "step": 3694 }, { "epoch": 0.41, "grad_norm": 0.44829336302258704, "learning_rate": 2.6931684442721443e-05, "loss": 0.0744, "step": 3695 }, { "epoch": 0.41, "grad_norm": 0.3989750097522774, "learning_rate": 2.692501083144362e-05, "loss": 0.0732, "step": 3696 }, { "epoch": 0.41, "grad_norm": 0.34813477233648726, "learning_rate": 2.6918336343968158e-05, "loss": 0.0653, "step": 3697 }, { "epoch": 0.41, "grad_norm": 0.4174504497956514, "learning_rate": 2.6911660981139563e-05, "loss": 0.0825, "step": 3698 }, { "epoch": 0.41, "grad_norm": 0.4243495388286499, "learning_rate": 2.6904984743802437e-05, "loss": 0.0782, "step": 3699 }, { "epoch": 0.41, "grad_norm": 0.41008554484972526, "learning_rate": 2.6898307632801515e-05, "loss": 0.0785, "step": 3700 }, { "epoch": 0.41, "grad_norm": 0.3082796985569781, "learning_rate": 2.6891629648981612e-05, "loss": 0.0539, "step": 3701 }, { "epoch": 0.41, "grad_norm": 0.36285927578599037, "learning_rate": 2.6884950793187684e-05, "loss": 0.0773, "step": 3702 }, { "epoch": 0.41, "grad_norm": 0.4554967959947677, "learning_rate": 2.6878271066264777e-05, "loss": 0.1007, "step": 3703 }, { "epoch": 0.41, "grad_norm": 0.42337043062125784, "learning_rate": 2.6871590469058052e-05, "loss": 0.0798, "step": 3704 }, { "epoch": 0.41, "grad_norm": 0.4658530361749296, "learning_rate": 2.6864909002412782e-05, "loss": 0.0829, "step": 3705 }, { "epoch": 0.41, "grad_norm": 0.4741350480293947, "learning_rate": 2.6858226667174362e-05, "loss": 0.1136, "step": 3706 }, { "epoch": 0.41, "grad_norm": 0.47693753728615235, "learning_rate": 2.6851543464188265e-05, "loss": 0.0843, "step": 3707 }, { "epoch": 0.41, "grad_norm": 0.4464780711932984, "learning_rate": 2.684485939430011e-05, "loss": 0.0832, "step": 3708 }, { "epoch": 0.41, "grad_norm": 0.32607685920900675, "learning_rate": 2.68381744583556e-05, "loss": 0.0843, "step": 3709 }, { "epoch": 0.41, "grad_norm": 0.7015489504722764, "learning_rate": 2.683148865720056e-05, "loss": 0.1591, "step": 3710 }, { "epoch": 0.41, "grad_norm": 0.4136532074171647, "learning_rate": 2.682480199168092e-05, "loss": 0.0766, "step": 3711 }, { "epoch": 0.41, "grad_norm": 0.37340939467901163, "learning_rate": 2.6818114462642726e-05, "loss": 0.0596, "step": 3712 }, { "epoch": 0.41, "grad_norm": 0.404133196883607, "learning_rate": 2.6811426070932118e-05, "loss": 0.0749, "step": 3713 }, { "epoch": 0.41, "grad_norm": 0.4182063167342618, "learning_rate": 2.6804736817395362e-05, "loss": 0.0812, "step": 3714 }, { "epoch": 0.41, "grad_norm": 0.55612289712465, "learning_rate": 2.679804670287883e-05, "loss": 0.1685, "step": 3715 }, { "epoch": 0.41, "grad_norm": 0.385284983417369, "learning_rate": 2.6791355728228986e-05, "loss": 0.073, "step": 3716 }, { "epoch": 0.41, "grad_norm": 0.36111051863026716, "learning_rate": 2.6784663894292432e-05, "loss": 0.083, "step": 3717 }, { "epoch": 0.41, "grad_norm": 0.4024285532642165, "learning_rate": 2.6777971201915843e-05, "loss": 0.0825, "step": 3718 }, { "epoch": 0.41, "grad_norm": 0.4460402949385001, "learning_rate": 2.6771277651946043e-05, "loss": 0.099, "step": 3719 }, { "epoch": 0.41, "grad_norm": 0.3694163710441471, "learning_rate": 2.676458324522992e-05, "loss": 0.0856, "step": 3720 }, { "epoch": 0.41, "grad_norm": 0.43341904634605877, "learning_rate": 2.675788798261452e-05, "loss": 0.1087, "step": 3721 }, { "epoch": 0.41, "grad_norm": 0.43076734969411545, "learning_rate": 2.675119186494696e-05, "loss": 0.1132, "step": 3722 }, { "epoch": 0.41, "grad_norm": 0.43265528112918133, "learning_rate": 2.674449489307447e-05, "loss": 0.1019, "step": 3723 }, { "epoch": 0.41, "grad_norm": 0.41506430156485086, "learning_rate": 2.6737797067844403e-05, "loss": 0.0764, "step": 3724 }, { "epoch": 0.41, "grad_norm": 0.442401927678175, "learning_rate": 2.6731098390104212e-05, "loss": 0.1004, "step": 3725 }, { "epoch": 0.41, "grad_norm": 0.47862447940761244, "learning_rate": 2.6724398860701453e-05, "loss": 0.1428, "step": 3726 }, { "epoch": 0.41, "grad_norm": 0.3341419896545442, "learning_rate": 2.6717698480483794e-05, "loss": 0.0727, "step": 3727 }, { "epoch": 0.41, "grad_norm": 0.41081486456692684, "learning_rate": 2.6710997250299012e-05, "loss": 0.0774, "step": 3728 }, { "epoch": 0.41, "grad_norm": 0.34533978350119077, "learning_rate": 2.670429517099499e-05, "loss": 0.0693, "step": 3729 }, { "epoch": 0.41, "grad_norm": 0.4583082535413163, "learning_rate": 2.6697592243419723e-05, "loss": 0.0753, "step": 3730 }, { "epoch": 0.41, "grad_norm": 0.30079207462797986, "learning_rate": 2.669088846842131e-05, "loss": 0.0623, "step": 3731 }, { "epoch": 0.41, "grad_norm": 0.3539198992793908, "learning_rate": 2.668418384684795e-05, "loss": 0.0869, "step": 3732 }, { "epoch": 0.41, "grad_norm": 0.36161592723463565, "learning_rate": 2.667747837954796e-05, "loss": 0.0859, "step": 3733 }, { "epoch": 0.41, "grad_norm": 0.4716474312756721, "learning_rate": 2.6670772067369754e-05, "loss": 0.0894, "step": 3734 }, { "epoch": 0.41, "grad_norm": 0.37577267357443184, "learning_rate": 2.6664064911161865e-05, "loss": 0.0764, "step": 3735 }, { "epoch": 0.41, "grad_norm": 0.35757159656568394, "learning_rate": 2.6657356911772922e-05, "loss": 0.0763, "step": 3736 }, { "epoch": 0.41, "grad_norm": 0.3981749286514307, "learning_rate": 2.665064807005166e-05, "loss": 0.0707, "step": 3737 }, { "epoch": 0.41, "grad_norm": 0.34343839740722704, "learning_rate": 2.6643938386846945e-05, "loss": 0.0684, "step": 3738 }, { "epoch": 0.41, "grad_norm": 0.3895658247714165, "learning_rate": 2.6637227863007708e-05, "loss": 0.0745, "step": 3739 }, { "epoch": 0.41, "grad_norm": 0.395385195716134, "learning_rate": 2.663051649938303e-05, "loss": 0.1103, "step": 3740 }, { "epoch": 0.41, "grad_norm": 0.556052502921623, "learning_rate": 2.6623804296822047e-05, "loss": 0.1137, "step": 3741 }, { "epoch": 0.41, "grad_norm": 0.4019299017571043, "learning_rate": 2.6617091256174058e-05, "loss": 0.0755, "step": 3742 }, { "epoch": 0.41, "grad_norm": 0.42738327875679344, "learning_rate": 2.6610377378288427e-05, "loss": 0.1077, "step": 3743 }, { "epoch": 0.41, "grad_norm": 0.41713783525742254, "learning_rate": 2.6603662664014644e-05, "loss": 0.0797, "step": 3744 }, { "epoch": 0.41, "grad_norm": 0.35580642088456604, "learning_rate": 2.6596947114202292e-05, "loss": 0.0965, "step": 3745 }, { "epoch": 0.41, "grad_norm": 0.3278912665447424, "learning_rate": 2.659023072970107e-05, "loss": 0.0846, "step": 3746 }, { "epoch": 0.41, "grad_norm": 0.43495188901933723, "learning_rate": 2.658351351136078e-05, "loss": 0.1064, "step": 3747 }, { "epoch": 0.41, "grad_norm": 0.396502903096658, "learning_rate": 2.657679546003133e-05, "loss": 0.0887, "step": 3748 }, { "epoch": 0.41, "grad_norm": 0.3620310775894723, "learning_rate": 2.6570076576562726e-05, "loss": 0.0608, "step": 3749 }, { "epoch": 0.41, "grad_norm": 0.364516894780986, "learning_rate": 2.656335686180509e-05, "loss": 0.0711, "step": 3750 }, { "epoch": 0.41, "grad_norm": 0.36470807757013285, "learning_rate": 2.6556636316608638e-05, "loss": 0.0618, "step": 3751 }, { "epoch": 0.41, "grad_norm": 0.37263699927680105, "learning_rate": 2.6549914941823713e-05, "loss": 0.1033, "step": 3752 }, { "epoch": 0.41, "grad_norm": 0.36668609311198197, "learning_rate": 2.6543192738300722e-05, "loss": 0.1009, "step": 3753 }, { "epoch": 0.41, "grad_norm": 0.3415438996509172, "learning_rate": 2.6536469706890226e-05, "loss": 0.0915, "step": 3754 }, { "epoch": 0.41, "grad_norm": 0.4262886873290953, "learning_rate": 2.6529745848442848e-05, "loss": 0.1118, "step": 3755 }, { "epoch": 0.41, "grad_norm": 0.4442084160889862, "learning_rate": 2.652302116380935e-05, "loss": 0.0868, "step": 3756 }, { "epoch": 0.41, "grad_norm": 0.3483900940297324, "learning_rate": 2.651629565384057e-05, "loss": 0.0897, "step": 3757 }, { "epoch": 0.41, "grad_norm": 0.41044330542745466, "learning_rate": 2.6509569319387477e-05, "loss": 0.0887, "step": 3758 }, { "epoch": 0.41, "grad_norm": 0.40089516833237204, "learning_rate": 2.6502842161301122e-05, "loss": 0.0638, "step": 3759 }, { "epoch": 0.41, "grad_norm": 0.3713163009677597, "learning_rate": 2.6496114180432672e-05, "loss": 0.0808, "step": 3760 }, { "epoch": 0.41, "grad_norm": 0.4287045072103712, "learning_rate": 2.6489385377633386e-05, "loss": 0.0985, "step": 3761 }, { "epoch": 0.41, "grad_norm": 0.42671047969529485, "learning_rate": 2.6482655753754657e-05, "loss": 0.089, "step": 3762 }, { "epoch": 0.41, "grad_norm": 0.5120856738625476, "learning_rate": 2.647592530964793e-05, "loss": 0.1093, "step": 3763 }, { "epoch": 0.41, "grad_norm": 0.4575059658714247, "learning_rate": 2.6469194046164818e-05, "loss": 0.0993, "step": 3764 }, { "epoch": 0.41, "grad_norm": 0.29188314845128427, "learning_rate": 2.6462461964156987e-05, "loss": 0.066, "step": 3765 }, { "epoch": 0.41, "grad_norm": 0.34060030068957126, "learning_rate": 2.6455729064476227e-05, "loss": 0.0856, "step": 3766 }, { "epoch": 0.41, "grad_norm": 0.4166301969621244, "learning_rate": 2.6448995347974423e-05, "loss": 0.0729, "step": 3767 }, { "epoch": 0.41, "grad_norm": 0.39070079068205676, "learning_rate": 2.6442260815503575e-05, "loss": 0.092, "step": 3768 }, { "epoch": 0.41, "grad_norm": 0.31346189755751785, "learning_rate": 2.6435525467915775e-05, "loss": 0.0607, "step": 3769 }, { "epoch": 0.41, "grad_norm": 0.46441492769011705, "learning_rate": 2.6428789306063233e-05, "loss": 0.0963, "step": 3770 }, { "epoch": 0.41, "grad_norm": 0.33865248504711404, "learning_rate": 2.6422052330798246e-05, "loss": 0.0783, "step": 3771 }, { "epoch": 0.41, "grad_norm": 0.31178134299341204, "learning_rate": 2.6415314542973214e-05, "loss": 0.0598, "step": 3772 }, { "epoch": 0.41, "grad_norm": 0.3745344172516076, "learning_rate": 2.6408575943440663e-05, "loss": 0.0859, "step": 3773 }, { "epoch": 0.41, "grad_norm": 0.33082243838175024, "learning_rate": 2.6401836533053186e-05, "loss": 0.0788, "step": 3774 }, { "epoch": 0.41, "grad_norm": 0.49266325950777917, "learning_rate": 2.6395096312663503e-05, "loss": 0.1089, "step": 3775 }, { "epoch": 0.41, "grad_norm": 0.5309467852826272, "learning_rate": 2.6388355283124435e-05, "loss": 0.0993, "step": 3776 }, { "epoch": 0.41, "grad_norm": 0.38705393204583444, "learning_rate": 2.6381613445288904e-05, "loss": 0.0942, "step": 3777 }, { "epoch": 0.41, "grad_norm": 0.39752438575390575, "learning_rate": 2.637487080000992e-05, "loss": 0.0879, "step": 3778 }, { "epoch": 0.42, "grad_norm": 0.435103888246342, "learning_rate": 2.6368127348140617e-05, "loss": 0.0761, "step": 3779 }, { "epoch": 0.42, "grad_norm": 0.3783821335098992, "learning_rate": 2.636138309053421e-05, "loss": 0.0614, "step": 3780 }, { "epoch": 0.42, "grad_norm": 0.5078508215896558, "learning_rate": 2.6354638028044042e-05, "loss": 0.0948, "step": 3781 }, { "epoch": 0.42, "grad_norm": 0.34976695680993125, "learning_rate": 2.634789216152353e-05, "loss": 0.0645, "step": 3782 }, { "epoch": 0.42, "grad_norm": 0.4342542720676777, "learning_rate": 2.634114549182621e-05, "loss": 0.1061, "step": 3783 }, { "epoch": 0.42, "grad_norm": 0.4177716583741957, "learning_rate": 2.63343980198057e-05, "loss": 0.0839, "step": 3784 }, { "epoch": 0.42, "grad_norm": 0.33537022800299376, "learning_rate": 2.6327649746315765e-05, "loss": 0.0806, "step": 3785 }, { "epoch": 0.42, "grad_norm": 0.3686832642377394, "learning_rate": 2.6320900672210216e-05, "loss": 0.0631, "step": 3786 }, { "epoch": 0.42, "grad_norm": 0.35588485341823284, "learning_rate": 2.6314150798343e-05, "loss": 0.0804, "step": 3787 }, { "epoch": 0.42, "grad_norm": 0.4261561676683239, "learning_rate": 2.6307400125568147e-05, "loss": 0.0799, "step": 3788 }, { "epoch": 0.42, "grad_norm": 0.597507009385306, "learning_rate": 2.6300648654739807e-05, "loss": 0.1348, "step": 3789 }, { "epoch": 0.42, "grad_norm": 0.4175857678847839, "learning_rate": 2.629389638671221e-05, "loss": 0.0712, "step": 3790 }, { "epoch": 0.42, "grad_norm": 0.40433647588384997, "learning_rate": 2.6287143322339708e-05, "loss": 0.103, "step": 3791 }, { "epoch": 0.42, "grad_norm": 0.472471500576694, "learning_rate": 2.6280389462476733e-05, "loss": 0.0878, "step": 3792 }, { "epoch": 0.42, "grad_norm": 0.3449964142523197, "learning_rate": 2.6273634807977835e-05, "loss": 0.057, "step": 3793 }, { "epoch": 0.42, "grad_norm": 0.4295187879012468, "learning_rate": 2.6266879359697647e-05, "loss": 0.0954, "step": 3794 }, { "epoch": 0.42, "grad_norm": 0.3290037015312466, "learning_rate": 2.6260123118490923e-05, "loss": 0.088, "step": 3795 }, { "epoch": 0.42, "grad_norm": 0.3803637483056497, "learning_rate": 2.6253366085212503e-05, "loss": 0.0841, "step": 3796 }, { "epoch": 0.42, "grad_norm": 0.33345049299185653, "learning_rate": 2.624660826071733e-05, "loss": 0.079, "step": 3797 }, { "epoch": 0.42, "grad_norm": 0.6368027146097052, "learning_rate": 2.6239849645860447e-05, "loss": 0.1143, "step": 3798 }, { "epoch": 0.42, "grad_norm": 0.44586020801627463, "learning_rate": 2.6233090241497002e-05, "loss": 0.1039, "step": 3799 }, { "epoch": 0.42, "grad_norm": 0.34690206138625723, "learning_rate": 2.6226330048482233e-05, "loss": 0.0628, "step": 3800 }, { "epoch": 0.42, "grad_norm": 0.502676327784346, "learning_rate": 2.6219569067671492e-05, "loss": 0.1143, "step": 3801 }, { "epoch": 0.42, "grad_norm": 0.32050167870565444, "learning_rate": 2.6212807299920218e-05, "loss": 0.0525, "step": 3802 }, { "epoch": 0.42, "grad_norm": 0.4719324796922636, "learning_rate": 2.620604474608395e-05, "loss": 0.114, "step": 3803 }, { "epoch": 0.42, "grad_norm": 0.31918097886414964, "learning_rate": 2.6199281407018338e-05, "loss": 0.049, "step": 3804 }, { "epoch": 0.42, "grad_norm": 0.3494760269653155, "learning_rate": 2.6192517283579123e-05, "loss": 0.0743, "step": 3805 }, { "epoch": 0.42, "grad_norm": 0.3240728164767029, "learning_rate": 2.618575237662214e-05, "loss": 0.0634, "step": 3806 }, { "epoch": 0.42, "grad_norm": 0.3565976588093727, "learning_rate": 2.617898668700333e-05, "loss": 0.0686, "step": 3807 }, { "epoch": 0.42, "grad_norm": 0.38766959847374577, "learning_rate": 2.6172220215578743e-05, "loss": 0.0752, "step": 3808 }, { "epoch": 0.42, "grad_norm": 0.4119011411634392, "learning_rate": 2.6165452963204506e-05, "loss": 0.1115, "step": 3809 }, { "epoch": 0.42, "grad_norm": 0.38317223598408934, "learning_rate": 2.615868493073686e-05, "loss": 0.0786, "step": 3810 }, { "epoch": 0.42, "grad_norm": 0.27981722982421026, "learning_rate": 2.615191611903214e-05, "loss": 0.0609, "step": 3811 }, { "epoch": 0.42, "grad_norm": 0.3477730608375392, "learning_rate": 2.614514652894678e-05, "loss": 0.0709, "step": 3812 }, { "epoch": 0.42, "grad_norm": 0.5658702550461188, "learning_rate": 2.613837616133731e-05, "loss": 0.1093, "step": 3813 }, { "epoch": 0.42, "grad_norm": 0.3478805911135369, "learning_rate": 2.613160501706037e-05, "loss": 0.0701, "step": 3814 }, { "epoch": 0.42, "grad_norm": 0.37509291213098606, "learning_rate": 2.6124833096972677e-05, "loss": 0.0725, "step": 3815 }, { "epoch": 0.42, "grad_norm": 0.2818346833791343, "learning_rate": 2.6118060401931073e-05, "loss": 0.0612, "step": 3816 }, { "epoch": 0.42, "grad_norm": 0.38897457508093614, "learning_rate": 2.611128693279247e-05, "loss": 0.0706, "step": 3817 }, { "epoch": 0.42, "grad_norm": 0.5441346857605304, "learning_rate": 2.6104512690413906e-05, "loss": 0.1419, "step": 3818 }, { "epoch": 0.42, "grad_norm": 0.4475984958854371, "learning_rate": 2.6097737675652487e-05, "loss": 0.0892, "step": 3819 }, { "epoch": 0.42, "grad_norm": 0.42761378569786174, "learning_rate": 2.609096188936544e-05, "loss": 0.0782, "step": 3820 }, { "epoch": 0.42, "grad_norm": 0.36967465962854595, "learning_rate": 2.6084185332410083e-05, "loss": 0.0803, "step": 3821 }, { "epoch": 0.42, "grad_norm": 0.689075139420687, "learning_rate": 2.607740800564383e-05, "loss": 0.1065, "step": 3822 }, { "epoch": 0.42, "grad_norm": 0.4114882864692848, "learning_rate": 2.6070629909924185e-05, "loss": 0.0761, "step": 3823 }, { "epoch": 0.42, "grad_norm": 0.42522050522698307, "learning_rate": 2.6063851046108766e-05, "loss": 0.1191, "step": 3824 }, { "epoch": 0.42, "grad_norm": 0.37927305026909475, "learning_rate": 2.6057071415055274e-05, "loss": 0.0932, "step": 3825 }, { "epoch": 0.42, "grad_norm": 0.5835468605334241, "learning_rate": 2.605029101762152e-05, "loss": 0.1433, "step": 3826 }, { "epoch": 0.42, "grad_norm": 0.37207057282045725, "learning_rate": 2.604350985466539e-05, "loss": 0.0851, "step": 3827 }, { "epoch": 0.42, "grad_norm": 0.3467701860965927, "learning_rate": 2.6036727927044897e-05, "loss": 0.0813, "step": 3828 }, { "epoch": 0.42, "grad_norm": 0.4110846230723617, "learning_rate": 2.6029945235618124e-05, "loss": 0.0853, "step": 3829 }, { "epoch": 0.42, "grad_norm": 0.32372267815837463, "learning_rate": 2.602316178124327e-05, "loss": 0.0623, "step": 3830 }, { "epoch": 0.42, "grad_norm": 0.3964777602973468, "learning_rate": 2.6016377564778612e-05, "loss": 0.0718, "step": 3831 }, { "epoch": 0.42, "grad_norm": 0.4110845868239569, "learning_rate": 2.6009592587082538e-05, "loss": 0.0867, "step": 3832 }, { "epoch": 0.42, "grad_norm": 0.388139369532742, "learning_rate": 2.6002806849013535e-05, "loss": 0.0865, "step": 3833 }, { "epoch": 0.42, "grad_norm": 0.37089527243277676, "learning_rate": 2.5996020351430163e-05, "loss": 0.0884, "step": 3834 }, { "epoch": 0.42, "grad_norm": 0.3232354800170887, "learning_rate": 2.598923309519111e-05, "loss": 0.0937, "step": 3835 }, { "epoch": 0.42, "grad_norm": 0.4911285998765178, "learning_rate": 2.598244508115513e-05, "loss": 0.1024, "step": 3836 }, { "epoch": 0.42, "grad_norm": 0.3604990959539731, "learning_rate": 2.5975656310181103e-05, "loss": 0.0909, "step": 3837 }, { "epoch": 0.42, "grad_norm": 0.3541039897585125, "learning_rate": 2.596886678312797e-05, "loss": 0.0724, "step": 3838 }, { "epoch": 0.42, "grad_norm": 0.5309020473685049, "learning_rate": 2.5962076500854804e-05, "loss": 0.1191, "step": 3839 }, { "epoch": 0.42, "grad_norm": 0.4487732212094087, "learning_rate": 2.5955285464220738e-05, "loss": 0.1102, "step": 3840 }, { "epoch": 0.42, "grad_norm": 0.3782163021106454, "learning_rate": 2.5948493674085037e-05, "loss": 0.0765, "step": 3841 }, { "epoch": 0.42, "grad_norm": 0.41965018065998755, "learning_rate": 2.594170113130703e-05, "loss": 0.0857, "step": 3842 }, { "epoch": 0.42, "grad_norm": 0.3497844875516106, "learning_rate": 2.5934907836746163e-05, "loss": 0.055, "step": 3843 }, { "epoch": 0.42, "grad_norm": 0.3404159989616977, "learning_rate": 2.5928113791261952e-05, "loss": 0.0573, "step": 3844 }, { "epoch": 0.42, "grad_norm": 0.4448014723555292, "learning_rate": 2.5921318995714044e-05, "loss": 0.0887, "step": 3845 }, { "epoch": 0.42, "grad_norm": 0.35883558259336706, "learning_rate": 2.5914523450962147e-05, "loss": 0.0742, "step": 3846 }, { "epoch": 0.42, "grad_norm": 0.3590736784493898, "learning_rate": 2.590772715786609e-05, "loss": 0.0705, "step": 3847 }, { "epoch": 0.42, "grad_norm": 0.4567968305310795, "learning_rate": 2.590093011728577e-05, "loss": 0.1125, "step": 3848 }, { "epoch": 0.42, "grad_norm": 0.29760697124594165, "learning_rate": 2.5894132330081205e-05, "loss": 0.0494, "step": 3849 }, { "epoch": 0.42, "grad_norm": 0.39466525153293547, "learning_rate": 2.588733379711248e-05, "loss": 0.0771, "step": 3850 }, { "epoch": 0.42, "grad_norm": 0.3584027199138524, "learning_rate": 2.588053451923981e-05, "loss": 0.0608, "step": 3851 }, { "epoch": 0.42, "grad_norm": 0.43275859235390585, "learning_rate": 2.587373449732347e-05, "loss": 0.0755, "step": 3852 }, { "epoch": 0.42, "grad_norm": 0.4936958844672623, "learning_rate": 2.586693373222385e-05, "loss": 0.0934, "step": 3853 }, { "epoch": 0.42, "grad_norm": 0.33962295199700926, "learning_rate": 2.5860132224801424e-05, "loss": 0.0641, "step": 3854 }, { "epoch": 0.42, "grad_norm": 0.35557506354818935, "learning_rate": 2.5853329975916767e-05, "loss": 0.063, "step": 3855 }, { "epoch": 0.42, "grad_norm": 0.44622992660082794, "learning_rate": 2.584652698643054e-05, "loss": 0.0846, "step": 3856 }, { "epoch": 0.42, "grad_norm": 0.3580092309901829, "learning_rate": 2.58397232572035e-05, "loss": 0.0455, "step": 3857 }, { "epoch": 0.42, "grad_norm": 0.362799581516218, "learning_rate": 2.58329187890965e-05, "loss": 0.064, "step": 3858 }, { "epoch": 0.42, "grad_norm": 0.3767330930836038, "learning_rate": 2.5826113582970493e-05, "loss": 0.0745, "step": 3859 }, { "epoch": 0.42, "grad_norm": 0.4787991053842968, "learning_rate": 2.581930763968651e-05, "loss": 0.1001, "step": 3860 }, { "epoch": 0.42, "grad_norm": 0.4000528352830764, "learning_rate": 2.581250096010569e-05, "loss": 0.0761, "step": 3861 }, { "epoch": 0.42, "grad_norm": 0.49798320288462, "learning_rate": 2.580569354508925e-05, "loss": 0.099, "step": 3862 }, { "epoch": 0.42, "grad_norm": 0.3794917076685017, "learning_rate": 2.5798885395498518e-05, "loss": 0.0764, "step": 3863 }, { "epoch": 0.42, "grad_norm": 0.47551176836190107, "learning_rate": 2.5792076512194895e-05, "loss": 0.0974, "step": 3864 }, { "epoch": 0.42, "grad_norm": 0.4815467897757516, "learning_rate": 2.5785266896039897e-05, "loss": 0.1136, "step": 3865 }, { "epoch": 0.42, "grad_norm": 0.5750016046584585, "learning_rate": 2.5778456547895117e-05, "loss": 0.1074, "step": 3866 }, { "epoch": 0.42, "grad_norm": 0.3401514830494021, "learning_rate": 2.577164546862224e-05, "loss": 0.0758, "step": 3867 }, { "epoch": 0.42, "grad_norm": 0.40080134097896225, "learning_rate": 2.5764833659083053e-05, "loss": 0.0784, "step": 3868 }, { "epoch": 0.42, "grad_norm": 0.507016746889598, "learning_rate": 2.5758021120139427e-05, "loss": 0.2175, "step": 3869 }, { "epoch": 0.42, "grad_norm": 0.39052492767220004, "learning_rate": 2.5751207852653334e-05, "loss": 0.073, "step": 3870 }, { "epoch": 0.43, "grad_norm": 0.38043666759166145, "learning_rate": 2.574439385748683e-05, "loss": 0.0897, "step": 3871 }, { "epoch": 0.43, "grad_norm": 0.36342193586589333, "learning_rate": 2.5737579135502068e-05, "loss": 0.0729, "step": 3872 }, { "epoch": 0.43, "grad_norm": 0.4488029545962984, "learning_rate": 2.573076368756129e-05, "loss": 0.1009, "step": 3873 }, { "epoch": 0.43, "grad_norm": 0.34412541263831325, "learning_rate": 2.572394751452683e-05, "loss": 0.0664, "step": 3874 }, { "epoch": 0.43, "grad_norm": 0.3933295275549851, "learning_rate": 2.571713061726111e-05, "loss": 0.0671, "step": 3875 }, { "epoch": 0.43, "grad_norm": 0.44359016293388487, "learning_rate": 2.5710312996626667e-05, "loss": 0.1214, "step": 3876 }, { "epoch": 0.43, "grad_norm": 0.3049379077814045, "learning_rate": 2.5703494653486084e-05, "loss": 0.0718, "step": 3877 }, { "epoch": 0.43, "grad_norm": 0.3725122145435892, "learning_rate": 2.569667558870209e-05, "loss": 0.0762, "step": 3878 }, { "epoch": 0.43, "grad_norm": 0.35834157632987024, "learning_rate": 2.5689855803137453e-05, "loss": 0.0893, "step": 3879 }, { "epoch": 0.43, "grad_norm": 0.527352905193966, "learning_rate": 2.5683035297655076e-05, "loss": 0.1036, "step": 3880 }, { "epoch": 0.43, "grad_norm": 0.38136529820622933, "learning_rate": 2.567621407311792e-05, "loss": 0.0634, "step": 3881 }, { "epoch": 0.43, "grad_norm": 0.38202547408342197, "learning_rate": 2.566939213038906e-05, "loss": 0.0956, "step": 3882 }, { "epoch": 0.43, "grad_norm": 0.41750340092010485, "learning_rate": 2.5662569470331645e-05, "loss": 0.0833, "step": 3883 }, { "epoch": 0.43, "grad_norm": 0.40203247251002283, "learning_rate": 2.5655746093808934e-05, "loss": 0.0878, "step": 3884 }, { "epoch": 0.43, "grad_norm": 0.3290538583473927, "learning_rate": 2.564892200168425e-05, "loss": 0.0732, "step": 3885 }, { "epoch": 0.43, "grad_norm": 0.36282668849068805, "learning_rate": 2.564209719482104e-05, "loss": 0.0605, "step": 3886 }, { "epoch": 0.43, "grad_norm": 0.5207934427879871, "learning_rate": 2.5635271674082805e-05, "loss": 0.1313, "step": 3887 }, { "epoch": 0.43, "grad_norm": 0.38136279751346147, "learning_rate": 2.5628445440333164e-05, "loss": 0.0857, "step": 3888 }, { "epoch": 0.43, "grad_norm": 0.43607489905622726, "learning_rate": 2.562161849443582e-05, "loss": 0.0786, "step": 3889 }, { "epoch": 0.43, "grad_norm": 0.41371748603462327, "learning_rate": 2.5614790837254555e-05, "loss": 0.0724, "step": 3890 }, { "epoch": 0.43, "grad_norm": 0.37772882530451274, "learning_rate": 2.5607962469653253e-05, "loss": 0.0848, "step": 3891 }, { "epoch": 0.43, "grad_norm": 0.6028516505744399, "learning_rate": 2.5601133392495886e-05, "loss": 0.0796, "step": 3892 }, { "epoch": 0.43, "grad_norm": 0.3652959353539588, "learning_rate": 2.5594303606646503e-05, "loss": 0.0651, "step": 3893 }, { "epoch": 0.43, "grad_norm": 0.46588016021589185, "learning_rate": 2.558747311296926e-05, "loss": 0.097, "step": 3894 }, { "epoch": 0.43, "grad_norm": 0.5002860800577672, "learning_rate": 2.5580641912328402e-05, "loss": 0.113, "step": 3895 }, { "epoch": 0.43, "grad_norm": 0.3927328269902187, "learning_rate": 2.5573810005588245e-05, "loss": 0.0885, "step": 3896 }, { "epoch": 0.43, "grad_norm": 0.4902156963057521, "learning_rate": 2.5566977393613224e-05, "loss": 0.1, "step": 3897 }, { "epoch": 0.43, "grad_norm": 0.4662990229558147, "learning_rate": 2.5560144077267826e-05, "loss": 0.097, "step": 3898 }, { "epoch": 0.43, "grad_norm": 0.43230247675543354, "learning_rate": 2.555331005741666e-05, "loss": 0.0779, "step": 3899 }, { "epoch": 0.43, "grad_norm": 0.3246016252421191, "learning_rate": 2.5546475334924398e-05, "loss": 0.0541, "step": 3900 }, { "epoch": 0.43, "grad_norm": 0.37721433137971877, "learning_rate": 2.5539639910655827e-05, "loss": 0.0908, "step": 3901 }, { "epoch": 0.43, "grad_norm": 0.32857107313689266, "learning_rate": 2.5532803785475802e-05, "loss": 0.0728, "step": 3902 }, { "epoch": 0.43, "grad_norm": 0.3931847814642713, "learning_rate": 2.552596696024928e-05, "loss": 0.0752, "step": 3903 }, { "epoch": 0.43, "grad_norm": 0.4531462269776736, "learning_rate": 2.5519129435841298e-05, "loss": 0.1185, "step": 3904 }, { "epoch": 0.43, "grad_norm": 0.44381032184495556, "learning_rate": 2.5512291213116984e-05, "loss": 0.0802, "step": 3905 }, { "epoch": 0.43, "grad_norm": 0.33268377474201083, "learning_rate": 2.550545229294155e-05, "loss": 0.0807, "step": 3906 }, { "epoch": 0.43, "grad_norm": 0.5052706558746696, "learning_rate": 2.549861267618031e-05, "loss": 0.1216, "step": 3907 }, { "epoch": 0.43, "grad_norm": 0.39228884158921085, "learning_rate": 2.549177236369865e-05, "loss": 0.07, "step": 3908 }, { "epoch": 0.43, "grad_norm": 0.3824918143325257, "learning_rate": 2.5484931356362057e-05, "loss": 0.0831, "step": 3909 }, { "epoch": 0.43, "grad_norm": 0.38590434082532665, "learning_rate": 2.5478089655036086e-05, "loss": 0.0599, "step": 3910 }, { "epoch": 0.43, "grad_norm": 0.30644358238767555, "learning_rate": 2.5471247260586416e-05, "loss": 0.0674, "step": 3911 }, { "epoch": 0.43, "grad_norm": 0.3954896522402884, "learning_rate": 2.5464404173878775e-05, "loss": 0.0868, "step": 3912 }, { "epoch": 0.43, "grad_norm": 0.35155877005399805, "learning_rate": 2.5457560395779003e-05, "loss": 0.0624, "step": 3913 }, { "epoch": 0.43, "grad_norm": 0.35703782257107175, "learning_rate": 2.5450715927153012e-05, "loss": 0.0591, "step": 3914 }, { "epoch": 0.43, "grad_norm": 0.3296240442686724, "learning_rate": 2.5443870768866816e-05, "loss": 0.0662, "step": 3915 }, { "epoch": 0.43, "grad_norm": 0.39548439615890185, "learning_rate": 2.54370249217865e-05, "loss": 0.0521, "step": 3916 }, { "epoch": 0.43, "grad_norm": 0.5679655513122402, "learning_rate": 2.543017838677826e-05, "loss": 0.1113, "step": 3917 }, { "epoch": 0.43, "grad_norm": 0.3715630224431169, "learning_rate": 2.542333116470835e-05, "loss": 0.0679, "step": 3918 }, { "epoch": 0.43, "grad_norm": 0.4330067140309773, "learning_rate": 2.5416483256443135e-05, "loss": 0.0935, "step": 3919 }, { "epoch": 0.43, "grad_norm": 0.4378313444712211, "learning_rate": 2.5409634662849053e-05, "loss": 0.0766, "step": 3920 }, { "epoch": 0.43, "grad_norm": 0.40397802855095727, "learning_rate": 2.540278538479263e-05, "loss": 0.0997, "step": 3921 }, { "epoch": 0.43, "grad_norm": 0.3989035367892105, "learning_rate": 2.5395935423140487e-05, "loss": 0.059, "step": 3922 }, { "epoch": 0.43, "grad_norm": 0.38609519933523473, "learning_rate": 2.5389084778759324e-05, "loss": 0.1034, "step": 3923 }, { "epoch": 0.43, "grad_norm": 0.38970434413991617, "learning_rate": 2.5382233452515927e-05, "loss": 0.0812, "step": 3924 }, { "epoch": 0.43, "grad_norm": 0.39774739583000324, "learning_rate": 2.5375381445277176e-05, "loss": 0.0678, "step": 3925 }, { "epoch": 0.43, "grad_norm": 0.5641328107384846, "learning_rate": 2.5368528757910027e-05, "loss": 0.1254, "step": 3926 }, { "epoch": 0.43, "grad_norm": 0.408759108100696, "learning_rate": 2.5361675391281523e-05, "loss": 0.0795, "step": 3927 }, { "epoch": 0.43, "grad_norm": 0.5993204570134232, "learning_rate": 2.5354821346258813e-05, "loss": 0.0689, "step": 3928 }, { "epoch": 0.43, "grad_norm": 0.4668339989875443, "learning_rate": 2.5347966623709094e-05, "loss": 0.1132, "step": 3929 }, { "epoch": 0.43, "grad_norm": 0.377967004633986, "learning_rate": 2.534111122449969e-05, "loss": 0.1083, "step": 3930 }, { "epoch": 0.43, "grad_norm": 0.3502302464061644, "learning_rate": 2.533425514949797e-05, "loss": 0.0809, "step": 3931 }, { "epoch": 0.43, "grad_norm": 0.5067858368030378, "learning_rate": 2.532739839957143e-05, "loss": 0.1293, "step": 3932 }, { "epoch": 0.43, "grad_norm": 0.31964821668288834, "learning_rate": 2.5320540975587617e-05, "loss": 0.0754, "step": 3933 }, { "epoch": 0.43, "grad_norm": 0.3985285748380224, "learning_rate": 2.5313682878414185e-05, "loss": 0.0714, "step": 3934 }, { "epoch": 0.43, "grad_norm": 0.3863101303183432, "learning_rate": 2.5306824108918857e-05, "loss": 0.1071, "step": 3935 }, { "epoch": 0.43, "grad_norm": 0.36849703517249066, "learning_rate": 2.529996466796946e-05, "loss": 0.0837, "step": 3936 }, { "epoch": 0.43, "grad_norm": 0.3209938385793989, "learning_rate": 2.5293104556433888e-05, "loss": 0.0866, "step": 3937 }, { "epoch": 0.43, "grad_norm": 0.42146012428243845, "learning_rate": 2.5286243775180128e-05, "loss": 0.0821, "step": 3938 }, { "epoch": 0.43, "grad_norm": 0.351332801378856, "learning_rate": 2.527938232507625e-05, "loss": 0.0863, "step": 3939 }, { "epoch": 0.43, "grad_norm": 0.3176020173379286, "learning_rate": 2.5272520206990418e-05, "loss": 0.0598, "step": 3940 }, { "epoch": 0.43, "grad_norm": 0.3273192002734414, "learning_rate": 2.526565742179086e-05, "loss": 0.0747, "step": 3941 }, { "epoch": 0.43, "grad_norm": 0.5135218760279558, "learning_rate": 2.5258793970345908e-05, "loss": 0.0884, "step": 3942 }, { "epoch": 0.43, "grad_norm": 0.34495288454595063, "learning_rate": 2.5251929853523968e-05, "loss": 0.0845, "step": 3943 }, { "epoch": 0.43, "grad_norm": 0.3591409003684262, "learning_rate": 2.5245065072193534e-05, "loss": 0.0627, "step": 3944 }, { "epoch": 0.43, "grad_norm": 0.3573670267014216, "learning_rate": 2.523819962722318e-05, "loss": 0.0583, "step": 3945 }, { "epoch": 0.43, "grad_norm": 0.49663062593992613, "learning_rate": 2.5231333519481577e-05, "loss": 0.1108, "step": 3946 }, { "epoch": 0.43, "grad_norm": 0.3766905903111116, "learning_rate": 2.5224466749837454e-05, "loss": 0.0649, "step": 3947 }, { "epoch": 0.43, "grad_norm": 0.34673657017796344, "learning_rate": 2.5217599319159654e-05, "loss": 0.0647, "step": 3948 }, { "epoch": 0.43, "grad_norm": 0.4860739292578158, "learning_rate": 2.521073122831708e-05, "loss": 0.1043, "step": 3949 }, { "epoch": 0.43, "grad_norm": 0.33606990156772215, "learning_rate": 2.5203862478178732e-05, "loss": 0.048, "step": 3950 }, { "epoch": 0.43, "grad_norm": 0.5270395602519095, "learning_rate": 2.5196993069613688e-05, "loss": 0.1245, "step": 3951 }, { "epoch": 0.43, "grad_norm": 0.3578753278786093, "learning_rate": 2.519012300349111e-05, "loss": 0.085, "step": 3952 }, { "epoch": 0.43, "grad_norm": 0.4156731283674217, "learning_rate": 2.5183252280680242e-05, "loss": 0.0793, "step": 3953 }, { "epoch": 0.43, "grad_norm": 0.3773470189365616, "learning_rate": 2.5176380902050418e-05, "loss": 0.0728, "step": 3954 }, { "epoch": 0.43, "grad_norm": 0.3470449323503195, "learning_rate": 2.5169508868471048e-05, "loss": 0.0757, "step": 3955 }, { "epoch": 0.43, "grad_norm": 0.4141831851916955, "learning_rate": 2.516263618081162e-05, "loss": 0.079, "step": 3956 }, { "epoch": 0.43, "grad_norm": 0.298639599540234, "learning_rate": 2.5155762839941726e-05, "loss": 0.051, "step": 3957 }, { "epoch": 0.43, "grad_norm": 0.4796281610125741, "learning_rate": 2.5148888846731007e-05, "loss": 0.1312, "step": 3958 }, { "epoch": 0.43, "grad_norm": 0.35793940280217373, "learning_rate": 2.5142014202049222e-05, "loss": 0.0809, "step": 3959 }, { "epoch": 0.43, "grad_norm": 0.4509660655997747, "learning_rate": 2.5135138906766185e-05, "loss": 0.0933, "step": 3960 }, { "epoch": 0.43, "grad_norm": 0.41579605182275076, "learning_rate": 2.512826296175181e-05, "loss": 0.0902, "step": 3961 }, { "epoch": 0.44, "grad_norm": 0.48505695082384526, "learning_rate": 2.512138636787608e-05, "loss": 0.0906, "step": 3962 }, { "epoch": 0.44, "grad_norm": 0.4352575461595124, "learning_rate": 2.5114509126009076e-05, "loss": 0.1035, "step": 3963 }, { "epoch": 0.44, "grad_norm": 0.4130864064459698, "learning_rate": 2.510763123702094e-05, "loss": 0.0773, "step": 3964 }, { "epoch": 0.44, "grad_norm": 0.33966078162726066, "learning_rate": 2.5100752701781922e-05, "loss": 0.0669, "step": 3965 }, { "epoch": 0.44, "grad_norm": 0.4258009879760604, "learning_rate": 2.5093873521162323e-05, "loss": 0.0557, "step": 3966 }, { "epoch": 0.44, "grad_norm": 0.3693169268673469, "learning_rate": 2.5086993696032556e-05, "loss": 0.0948, "step": 3967 }, { "epoch": 0.44, "grad_norm": 0.5109936184643279, "learning_rate": 2.5080113227263093e-05, "loss": 0.118, "step": 3968 }, { "epoch": 0.44, "grad_norm": 0.3214970874706362, "learning_rate": 2.50732321157245e-05, "loss": 0.0739, "step": 3969 }, { "epoch": 0.44, "grad_norm": 0.28465755175314744, "learning_rate": 2.5066350362287407e-05, "loss": 0.0499, "step": 3970 }, { "epoch": 0.44, "grad_norm": 0.27761344154614254, "learning_rate": 2.5059467967822562e-05, "loss": 0.0609, "step": 3971 }, { "epoch": 0.44, "grad_norm": 0.31501857787934107, "learning_rate": 2.5052584933200756e-05, "loss": 0.072, "step": 3972 }, { "epoch": 0.44, "grad_norm": 0.5530520493413115, "learning_rate": 2.5045701259292878e-05, "loss": 0.1232, "step": 3973 }, { "epoch": 0.44, "grad_norm": 0.3356339613643383, "learning_rate": 2.5038816946969894e-05, "loss": 0.0767, "step": 3974 }, { "epoch": 0.44, "grad_norm": 0.3295410572254891, "learning_rate": 2.5031931997102857e-05, "loss": 0.0836, "step": 3975 }, { "epoch": 0.44, "grad_norm": 0.42266841736941374, "learning_rate": 2.5025046410562888e-05, "loss": 0.0914, "step": 3976 }, { "epoch": 0.44, "grad_norm": 0.46362502352276597, "learning_rate": 2.5018160188221208e-05, "loss": 0.0771, "step": 3977 }, { "epoch": 0.44, "grad_norm": 0.3267458816055714, "learning_rate": 2.501127333094909e-05, "loss": 0.0656, "step": 3978 }, { "epoch": 0.44, "grad_norm": 0.3580363260704932, "learning_rate": 2.5004385839617915e-05, "loss": 0.0699, "step": 3979 }, { "epoch": 0.44, "grad_norm": 0.42407254673098055, "learning_rate": 2.4997497715099134e-05, "loss": 0.082, "step": 3980 }, { "epoch": 0.44, "grad_norm": 0.282839548282268, "learning_rate": 2.499060895826428e-05, "loss": 0.0617, "step": 3981 }, { "epoch": 0.44, "grad_norm": 0.4409810102476525, "learning_rate": 2.4983719569984955e-05, "loss": 0.0892, "step": 3982 }, { "epoch": 0.44, "grad_norm": 0.32086296001518605, "learning_rate": 2.4976829551132857e-05, "loss": 0.0832, "step": 3983 }, { "epoch": 0.44, "grad_norm": 0.3701342173661875, "learning_rate": 2.496993890257975e-05, "loss": 0.0731, "step": 3984 }, { "epoch": 0.44, "grad_norm": 0.39735504914365444, "learning_rate": 2.496304762519749e-05, "loss": 0.074, "step": 3985 }, { "epoch": 0.44, "grad_norm": 0.32314524968961716, "learning_rate": 2.4956155719858e-05, "loss": 0.0565, "step": 3986 }, { "epoch": 0.44, "grad_norm": 0.35724220554479313, "learning_rate": 2.4949263187433298e-05, "loss": 0.0757, "step": 3987 }, { "epoch": 0.44, "grad_norm": 0.34499910295756825, "learning_rate": 2.4942370028795456e-05, "loss": 0.0723, "step": 3988 }, { "epoch": 0.44, "grad_norm": 0.3835298084799812, "learning_rate": 2.4935476244816662e-05, "loss": 0.0653, "step": 3989 }, { "epoch": 0.44, "grad_norm": 0.5876722975966179, "learning_rate": 2.4928581836369147e-05, "loss": 0.1437, "step": 3990 }, { "epoch": 0.44, "grad_norm": 0.4231232965961918, "learning_rate": 2.492168680432525e-05, "loss": 0.0795, "step": 3991 }, { "epoch": 0.44, "grad_norm": 0.5047027443865797, "learning_rate": 2.4914791149557358e-05, "loss": 0.0805, "step": 3992 }, { "epoch": 0.44, "grad_norm": 0.38703264155347544, "learning_rate": 2.490789487293797e-05, "loss": 0.0692, "step": 3993 }, { "epoch": 0.44, "grad_norm": 0.3729902294351956, "learning_rate": 2.490099797533964e-05, "loss": 0.0824, "step": 3994 }, { "epoch": 0.44, "grad_norm": 0.3241176447579964, "learning_rate": 2.489410045763501e-05, "loss": 0.0763, "step": 3995 }, { "epoch": 0.44, "grad_norm": 0.3486858295551772, "learning_rate": 2.48872023206968e-05, "loss": 0.0575, "step": 3996 }, { "epoch": 0.44, "grad_norm": 0.6431566781860486, "learning_rate": 2.4880303565397807e-05, "loss": 0.1838, "step": 3997 }, { "epoch": 0.44, "grad_norm": 0.3290287922171316, "learning_rate": 2.487340419261091e-05, "loss": 0.063, "step": 3998 }, { "epoch": 0.44, "grad_norm": 0.37963091974224433, "learning_rate": 2.4866504203209053e-05, "loss": 0.0605, "step": 3999 }, { "epoch": 0.44, "grad_norm": 0.36840963900559937, "learning_rate": 2.485960359806528e-05, "loss": 0.0723, "step": 4000 }, { "epoch": 0.44, "grad_norm": 0.43069675866398194, "learning_rate": 2.4852702378052694e-05, "loss": 0.0991, "step": 4001 }, { "epoch": 0.44, "grad_norm": 0.3638299469413789, "learning_rate": 2.4845800544044483e-05, "loss": 0.0693, "step": 4002 }, { "epoch": 0.44, "grad_norm": 0.3283430464916656, "learning_rate": 2.483889809691391e-05, "loss": 0.0718, "step": 4003 }, { "epoch": 0.44, "grad_norm": 0.303546419385742, "learning_rate": 2.4831995037534325e-05, "loss": 0.0723, "step": 4004 }, { "epoch": 0.44, "grad_norm": 0.3495961938003505, "learning_rate": 2.4825091366779136e-05, "loss": 0.0533, "step": 4005 }, { "epoch": 0.44, "grad_norm": 0.39181733310947897, "learning_rate": 2.481818708552185e-05, "loss": 0.0864, "step": 4006 }, { "epoch": 0.44, "grad_norm": 0.42711427235381716, "learning_rate": 2.4811282194636043e-05, "loss": 0.0966, "step": 4007 }, { "epoch": 0.44, "grad_norm": 0.44701205066037386, "learning_rate": 2.480437669499537e-05, "loss": 0.1001, "step": 4008 }, { "epoch": 0.44, "grad_norm": 0.3949992359733436, "learning_rate": 2.479747058747354e-05, "loss": 0.1057, "step": 4009 }, { "epoch": 0.44, "grad_norm": 0.33476006626779853, "learning_rate": 2.479056387294438e-05, "loss": 0.0857, "step": 4010 }, { "epoch": 0.44, "grad_norm": 0.4069646271932735, "learning_rate": 2.4783656552281766e-05, "loss": 0.0778, "step": 4011 }, { "epoch": 0.44, "grad_norm": 0.3299116358070327, "learning_rate": 2.4776748626359656e-05, "loss": 0.0695, "step": 4012 }, { "epoch": 0.44, "grad_norm": 0.4402597103596856, "learning_rate": 2.4769840096052083e-05, "loss": 0.1146, "step": 4013 }, { "epoch": 0.44, "grad_norm": 0.3558807291267144, "learning_rate": 2.4762930962233164e-05, "loss": 0.0752, "step": 4014 }, { "epoch": 0.44, "grad_norm": 0.430625100654845, "learning_rate": 2.475602122577709e-05, "loss": 0.0754, "step": 4015 }, { "epoch": 0.44, "grad_norm": 0.39156348978025585, "learning_rate": 2.4749110887558114e-05, "loss": 0.0938, "step": 4016 }, { "epoch": 0.44, "grad_norm": 0.39420705638087716, "learning_rate": 2.4742199948450596e-05, "loss": 0.0697, "step": 4017 }, { "epoch": 0.44, "grad_norm": 0.41255108271373403, "learning_rate": 2.4735288409328937e-05, "loss": 0.0907, "step": 4018 }, { "epoch": 0.44, "grad_norm": 0.41751037847445255, "learning_rate": 2.4728376271067643e-05, "loss": 0.0855, "step": 4019 }, { "epoch": 0.44, "grad_norm": 0.33957999506676667, "learning_rate": 2.472146353454127e-05, "loss": 0.0771, "step": 4020 }, { "epoch": 0.44, "grad_norm": 0.33474912702932336, "learning_rate": 2.471455020062447e-05, "loss": 0.0824, "step": 4021 }, { "epoch": 0.44, "grad_norm": 0.3534427588802655, "learning_rate": 2.4707636270191956e-05, "loss": 0.0714, "step": 4022 }, { "epoch": 0.44, "grad_norm": 0.41002302276704905, "learning_rate": 2.4700721744118535e-05, "loss": 0.0804, "step": 4023 }, { "epoch": 0.44, "grad_norm": 0.40380173000420444, "learning_rate": 2.4693806623279074e-05, "loss": 0.0719, "step": 4024 }, { "epoch": 0.44, "grad_norm": 0.4583213725118816, "learning_rate": 2.4686890908548517e-05, "loss": 0.1047, "step": 4025 }, { "epoch": 0.44, "grad_norm": 0.4138015788679207, "learning_rate": 2.4679974600801882e-05, "loss": 0.0746, "step": 4026 }, { "epoch": 0.44, "grad_norm": 0.3183128580383109, "learning_rate": 2.4673057700914277e-05, "loss": 0.0826, "step": 4027 }, { "epoch": 0.44, "grad_norm": 0.581030067755732, "learning_rate": 2.4666140209760862e-05, "loss": 0.1351, "step": 4028 }, { "epoch": 0.44, "grad_norm": 0.34632531980501, "learning_rate": 2.4659222128216895e-05, "loss": 0.0547, "step": 4029 }, { "epoch": 0.44, "grad_norm": 0.31736662603331023, "learning_rate": 2.4652303457157677e-05, "loss": 0.0688, "step": 4030 }, { "epoch": 0.44, "grad_norm": 0.46296102254072374, "learning_rate": 2.464538419745862e-05, "loss": 0.0914, "step": 4031 }, { "epoch": 0.44, "grad_norm": 0.32812791777630473, "learning_rate": 2.4638464349995186e-05, "loss": 0.0753, "step": 4032 }, { "epoch": 0.44, "grad_norm": 0.323221327001955, "learning_rate": 2.463154391564293e-05, "loss": 0.0541, "step": 4033 }, { "epoch": 0.44, "grad_norm": 0.38021630013188373, "learning_rate": 2.4624622895277462e-05, "loss": 0.0684, "step": 4034 }, { "epoch": 0.44, "grad_norm": 0.27850444123872053, "learning_rate": 2.4617701289774477e-05, "loss": 0.0638, "step": 4035 }, { "epoch": 0.44, "grad_norm": 0.4160848568901733, "learning_rate": 2.461077910000974e-05, "loss": 0.0695, "step": 4036 }, { "epoch": 0.44, "grad_norm": 0.40860013500935866, "learning_rate": 2.4603856326859094e-05, "loss": 0.0919, "step": 4037 }, { "epoch": 0.44, "grad_norm": 0.36864440100999635, "learning_rate": 2.4596932971198446e-05, "loss": 0.0667, "step": 4038 }, { "epoch": 0.44, "grad_norm": 0.36940348313656174, "learning_rate": 2.4590009033903796e-05, "loss": 0.0781, "step": 4039 }, { "epoch": 0.44, "grad_norm": 0.38139553966119516, "learning_rate": 2.4583084515851194e-05, "loss": 0.0787, "step": 4040 }, { "epoch": 0.44, "grad_norm": 0.4145859342841729, "learning_rate": 2.457615941791679e-05, "loss": 0.0802, "step": 4041 }, { "epoch": 0.44, "grad_norm": 0.48217560200280074, "learning_rate": 2.456923374097678e-05, "loss": 0.0854, "step": 4042 }, { "epoch": 0.44, "grad_norm": 0.3827295018742603, "learning_rate": 2.4562307485907456e-05, "loss": 0.0827, "step": 4043 }, { "epoch": 0.44, "grad_norm": 0.29226932960738133, "learning_rate": 2.4555380653585158e-05, "loss": 0.0631, "step": 4044 }, { "epoch": 0.44, "grad_norm": 0.4157865368128142, "learning_rate": 2.454845324488633e-05, "loss": 0.0776, "step": 4045 }, { "epoch": 0.44, "grad_norm": 0.42253312240647284, "learning_rate": 2.4541525260687468e-05, "loss": 0.0776, "step": 4046 }, { "epoch": 0.44, "grad_norm": 0.5335129895957245, "learning_rate": 2.453459670186514e-05, "loss": 0.1128, "step": 4047 }, { "epoch": 0.44, "grad_norm": 0.3807513744106521, "learning_rate": 2.4527667569295996e-05, "loss": 0.074, "step": 4048 }, { "epoch": 0.44, "grad_norm": 0.3970192181634317, "learning_rate": 2.4520737863856758e-05, "loss": 0.0682, "step": 4049 }, { "epoch": 0.44, "grad_norm": 0.4232716224902008, "learning_rate": 2.4513807586424214e-05, "loss": 0.0648, "step": 4050 }, { "epoch": 0.44, "grad_norm": 0.383405304282245, "learning_rate": 2.450687673787523e-05, "loss": 0.088, "step": 4051 }, { "epoch": 0.44, "grad_norm": 0.3586610253084197, "learning_rate": 2.449994531908675e-05, "loss": 0.0658, "step": 4052 }, { "epoch": 0.45, "grad_norm": 0.3253901588097721, "learning_rate": 2.4493013330935762e-05, "loss": 0.0674, "step": 4053 }, { "epoch": 0.45, "grad_norm": 0.4307136593586819, "learning_rate": 2.4486080774299364e-05, "loss": 0.0802, "step": 4054 }, { "epoch": 0.45, "grad_norm": 0.3483167332115282, "learning_rate": 2.4479147650054703e-05, "loss": 0.0704, "step": 4055 }, { "epoch": 0.45, "grad_norm": 0.3625551987419031, "learning_rate": 2.4472213959079002e-05, "loss": 0.0621, "step": 4056 }, { "epoch": 0.45, "grad_norm": 0.4295798947239526, "learning_rate": 2.4465279702249555e-05, "loss": 0.1044, "step": 4057 }, { "epoch": 0.45, "grad_norm": 0.40744321992138677, "learning_rate": 2.4458344880443735e-05, "loss": 0.0983, "step": 4058 }, { "epoch": 0.45, "grad_norm": 0.3277954535693289, "learning_rate": 2.4451409494538982e-05, "loss": 0.0661, "step": 4059 }, { "epoch": 0.45, "grad_norm": 0.41520874145278486, "learning_rate": 2.4444473545412804e-05, "loss": 0.0648, "step": 4060 }, { "epoch": 0.45, "grad_norm": 0.3482586859422912, "learning_rate": 2.4437537033942778e-05, "loss": 0.0786, "step": 4061 }, { "epoch": 0.45, "grad_norm": 0.43237411524739194, "learning_rate": 2.4430599961006563e-05, "loss": 0.0846, "step": 4062 }, { "epoch": 0.45, "grad_norm": 0.36528353436764405, "learning_rate": 2.4423662327481874e-05, "loss": 0.0719, "step": 4063 }, { "epoch": 0.45, "grad_norm": 0.3149174407593809, "learning_rate": 2.441672413424652e-05, "loss": 0.0607, "step": 4064 }, { "epoch": 0.45, "grad_norm": 0.2749867013403368, "learning_rate": 2.440978538217835e-05, "loss": 0.0661, "step": 4065 }, { "epoch": 0.45, "grad_norm": 0.35795634597474113, "learning_rate": 2.4402846072155313e-05, "loss": 0.0736, "step": 4066 }, { "epoch": 0.45, "grad_norm": 0.4154880025419162, "learning_rate": 2.4395906205055412e-05, "loss": 0.0686, "step": 4067 }, { "epoch": 0.45, "grad_norm": 0.4540987240282171, "learning_rate": 2.4388965781756727e-05, "loss": 0.0931, "step": 4068 }, { "epoch": 0.45, "grad_norm": 0.2978381419675977, "learning_rate": 2.4382024803137396e-05, "loss": 0.0669, "step": 4069 }, { "epoch": 0.45, "grad_norm": 0.44983027754362054, "learning_rate": 2.437508327007565e-05, "loss": 0.1031, "step": 4070 }, { "epoch": 0.45, "grad_norm": 0.33034941004204904, "learning_rate": 2.4368141183449766e-05, "loss": 0.0753, "step": 4071 }, { "epoch": 0.45, "grad_norm": 0.24719202112242059, "learning_rate": 2.4361198544138117e-05, "loss": 0.0507, "step": 4072 }, { "epoch": 0.45, "grad_norm": 0.2852300522287845, "learning_rate": 2.435425535301911e-05, "loss": 0.0604, "step": 4073 }, { "epoch": 0.45, "grad_norm": 0.4154557952066227, "learning_rate": 2.4347311610971255e-05, "loss": 0.07, "step": 4074 }, { "epoch": 0.45, "grad_norm": 0.48120340146458357, "learning_rate": 2.434036731887312e-05, "loss": 0.1039, "step": 4075 }, { "epoch": 0.45, "grad_norm": 0.43451232523277183, "learning_rate": 2.4333422477603342e-05, "loss": 0.0911, "step": 4076 }, { "epoch": 0.45, "grad_norm": 0.38873196621368605, "learning_rate": 2.432647708804063e-05, "loss": 0.0673, "step": 4077 }, { "epoch": 0.45, "grad_norm": 0.3995967904026898, "learning_rate": 2.4319531151063753e-05, "loss": 0.0769, "step": 4078 }, { "epoch": 0.45, "grad_norm": 0.3683685625210861, "learning_rate": 2.4312584667551563e-05, "loss": 0.063, "step": 4079 }, { "epoch": 0.45, "grad_norm": 0.31037331775542776, "learning_rate": 2.4305637638382967e-05, "loss": 0.042, "step": 4080 }, { "epoch": 0.45, "grad_norm": 0.46054758940585494, "learning_rate": 2.429869006443695e-05, "loss": 0.0836, "step": 4081 }, { "epoch": 0.45, "grad_norm": 0.30119911496164103, "learning_rate": 2.4291741946592575e-05, "loss": 0.0637, "step": 4082 }, { "epoch": 0.45, "grad_norm": 0.30680140756902124, "learning_rate": 2.4284793285728956e-05, "loss": 0.0545, "step": 4083 }, { "epoch": 0.45, "grad_norm": 0.4628624404503353, "learning_rate": 2.427784408272528e-05, "loss": 0.0955, "step": 4084 }, { "epoch": 0.45, "grad_norm": 0.4396367007518765, "learning_rate": 2.4270894338460805e-05, "loss": 0.0834, "step": 4085 }, { "epoch": 0.45, "grad_norm": 0.3704367996397135, "learning_rate": 2.4263944053814866e-05, "loss": 0.0793, "step": 4086 }, { "epoch": 0.45, "grad_norm": 0.4086022137263114, "learning_rate": 2.4256993229666854e-05, "loss": 0.0796, "step": 4087 }, { "epoch": 0.45, "grad_norm": 0.34001792083872495, "learning_rate": 2.4250041866896234e-05, "loss": 0.0665, "step": 4088 }, { "epoch": 0.45, "grad_norm": 0.49202109354535645, "learning_rate": 2.4243089966382534e-05, "loss": 0.1127, "step": 4089 }, { "epoch": 0.45, "grad_norm": 0.3956157772441568, "learning_rate": 2.4236137529005355e-05, "loss": 0.0655, "step": 4090 }, { "epoch": 0.45, "grad_norm": 0.44245615302808183, "learning_rate": 2.422918455564437e-05, "loss": 0.097, "step": 4091 }, { "epoch": 0.45, "grad_norm": 0.4112510782882793, "learning_rate": 2.4222231047179303e-05, "loss": 0.0728, "step": 4092 }, { "epoch": 0.45, "grad_norm": 0.3733790451650575, "learning_rate": 2.4215277004489975e-05, "loss": 0.0654, "step": 4093 }, { "epoch": 0.45, "grad_norm": 0.4219935568480347, "learning_rate": 2.420832242845624e-05, "loss": 0.0728, "step": 4094 }, { "epoch": 0.45, "grad_norm": 0.39155022719893856, "learning_rate": 2.4201367319958047e-05, "loss": 0.0704, "step": 4095 }, { "epoch": 0.45, "grad_norm": 0.4190375308819891, "learning_rate": 2.41944116798754e-05, "loss": 0.0963, "step": 4096 }, { "epoch": 0.45, "grad_norm": 0.40209634822410095, "learning_rate": 2.418745550908837e-05, "loss": 0.0704, "step": 4097 }, { "epoch": 0.45, "grad_norm": 0.43143862317185117, "learning_rate": 2.4180498808477096e-05, "loss": 0.0939, "step": 4098 }, { "epoch": 0.45, "grad_norm": 0.3643987960635374, "learning_rate": 2.4173541578921785e-05, "loss": 0.074, "step": 4099 }, { "epoch": 0.45, "grad_norm": 0.3656240821892068, "learning_rate": 2.4166583821302712e-05, "loss": 0.0869, "step": 4100 }, { "epoch": 0.45, "grad_norm": 0.33639262663049807, "learning_rate": 2.4159625536500223e-05, "loss": 0.0727, "step": 4101 }, { "epoch": 0.45, "grad_norm": 0.3486097632730796, "learning_rate": 2.4152666725394717e-05, "loss": 0.0702, "step": 4102 }, { "epoch": 0.45, "grad_norm": 0.39061588276713655, "learning_rate": 2.414570738886668e-05, "loss": 0.0995, "step": 4103 }, { "epoch": 0.45, "grad_norm": 0.32947586914608973, "learning_rate": 2.413874752779664e-05, "loss": 0.058, "step": 4104 }, { "epoch": 0.45, "grad_norm": 0.38648395924635454, "learning_rate": 2.4131787143065216e-05, "loss": 0.0703, "step": 4105 }, { "epoch": 0.45, "grad_norm": 0.40905455722193673, "learning_rate": 2.412482623555307e-05, "loss": 0.1096, "step": 4106 }, { "epoch": 0.45, "grad_norm": 0.46141518878384324, "learning_rate": 2.4117864806140948e-05, "loss": 0.0866, "step": 4107 }, { "epoch": 0.45, "grad_norm": 0.3523039522553531, "learning_rate": 2.411090285570965e-05, "loss": 0.0716, "step": 4108 }, { "epoch": 0.45, "grad_norm": 0.3467673177513218, "learning_rate": 2.410394038514005e-05, "loss": 0.0898, "step": 4109 }, { "epoch": 0.45, "grad_norm": 0.3338156491445141, "learning_rate": 2.4096977395313096e-05, "loss": 0.0701, "step": 4110 }, { "epoch": 0.45, "grad_norm": 0.41313677896510176, "learning_rate": 2.4090013887109776e-05, "loss": 0.0836, "step": 4111 }, { "epoch": 0.45, "grad_norm": 0.4007530432994274, "learning_rate": 2.4083049861411173e-05, "loss": 0.0662, "step": 4112 }, { "epoch": 0.45, "grad_norm": 0.4856953008131061, "learning_rate": 2.4076085319098406e-05, "loss": 0.091, "step": 4113 }, { "epoch": 0.45, "grad_norm": 0.3949056303190438, "learning_rate": 2.4069120261052682e-05, "loss": 0.0844, "step": 4114 }, { "epoch": 0.45, "grad_norm": 0.38848210978586917, "learning_rate": 2.406215468815526e-05, "loss": 0.0709, "step": 4115 }, { "epoch": 0.45, "grad_norm": 0.38983537974574095, "learning_rate": 2.4055188601287483e-05, "loss": 0.0787, "step": 4116 }, { "epoch": 0.45, "grad_norm": 0.40612616852497985, "learning_rate": 2.4048222001330727e-05, "loss": 0.0534, "step": 4117 }, { "epoch": 0.45, "grad_norm": 0.2572359661216907, "learning_rate": 2.404125488916647e-05, "loss": 0.0703, "step": 4118 }, { "epoch": 0.45, "grad_norm": 0.4374908718791743, "learning_rate": 2.4034287265676223e-05, "loss": 0.0826, "step": 4119 }, { "epoch": 0.45, "grad_norm": 0.4128683612923811, "learning_rate": 2.402731913174159e-05, "loss": 0.0676, "step": 4120 }, { "epoch": 0.45, "grad_norm": 0.5551200040177134, "learning_rate": 2.402035048824421e-05, "loss": 0.1235, "step": 4121 }, { "epoch": 0.45, "grad_norm": 0.4619953495303507, "learning_rate": 2.4013381336065805e-05, "loss": 0.0751, "step": 4122 }, { "epoch": 0.45, "grad_norm": 0.30160906437127283, "learning_rate": 2.400641167608816e-05, "loss": 0.0466, "step": 4123 }, { "epoch": 0.45, "grad_norm": 0.40222303190568415, "learning_rate": 2.399944150919313e-05, "loss": 0.1006, "step": 4124 }, { "epoch": 0.45, "grad_norm": 0.3784822274070643, "learning_rate": 2.3992470836262607e-05, "loss": 0.0719, "step": 4125 }, { "epoch": 0.45, "grad_norm": 0.32452008595041476, "learning_rate": 2.398549965817858e-05, "loss": 0.0721, "step": 4126 }, { "epoch": 0.45, "grad_norm": 0.334409287290163, "learning_rate": 2.3978527975823082e-05, "loss": 0.0846, "step": 4127 }, { "epoch": 0.45, "grad_norm": 0.3274186342297903, "learning_rate": 2.3971555790078228e-05, "loss": 0.0565, "step": 4128 }, { "epoch": 0.45, "grad_norm": 0.40135385904202164, "learning_rate": 2.3964583101826172e-05, "loss": 0.0948, "step": 4129 }, { "epoch": 0.45, "grad_norm": 0.39576479251467833, "learning_rate": 2.3957609911949146e-05, "loss": 0.0774, "step": 4130 }, { "epoch": 0.45, "grad_norm": 0.32390304499437667, "learning_rate": 2.3950636221329444e-05, "loss": 0.0607, "step": 4131 }, { "epoch": 0.45, "grad_norm": 0.3444486151313712, "learning_rate": 2.3943662030849426e-05, "loss": 0.0746, "step": 4132 }, { "epoch": 0.45, "grad_norm": 0.4120789466942001, "learning_rate": 2.3936687341391505e-05, "loss": 0.0913, "step": 4133 }, { "epoch": 0.45, "grad_norm": 0.40921932565620195, "learning_rate": 2.3929712153838173e-05, "loss": 0.0689, "step": 4134 }, { "epoch": 0.45, "grad_norm": 0.5156134979814723, "learning_rate": 2.3922736469071978e-05, "loss": 0.1108, "step": 4135 }, { "epoch": 0.45, "grad_norm": 0.49798963629230075, "learning_rate": 2.3915760287975515e-05, "loss": 0.0985, "step": 4136 }, { "epoch": 0.45, "grad_norm": 0.46270510017860483, "learning_rate": 2.390878361143147e-05, "loss": 0.0904, "step": 4137 }, { "epoch": 0.45, "grad_norm": 0.38336795291201153, "learning_rate": 2.390180644032257e-05, "loss": 0.0791, "step": 4138 }, { "epoch": 0.45, "grad_norm": 0.3238420941292933, "learning_rate": 2.389482877553161e-05, "loss": 0.0741, "step": 4139 }, { "epoch": 0.45, "grad_norm": 0.4355136875800226, "learning_rate": 2.3887850617941464e-05, "loss": 0.0953, "step": 4140 }, { "epoch": 0.45, "grad_norm": 0.33931770389014304, "learning_rate": 2.388087196843504e-05, "loss": 0.0821, "step": 4141 }, { "epoch": 0.45, "grad_norm": 0.29874379083732944, "learning_rate": 2.3873892827895332e-05, "loss": 0.0574, "step": 4142 }, { "epoch": 0.45, "grad_norm": 0.3452346127041587, "learning_rate": 2.3866913197205376e-05, "loss": 0.0884, "step": 4143 }, { "epoch": 0.46, "grad_norm": 0.26044678831736495, "learning_rate": 2.3859933077248285e-05, "loss": 0.0553, "step": 4144 }, { "epoch": 0.46, "grad_norm": 0.26998422402192723, "learning_rate": 2.3852952468907237e-05, "loss": 0.0525, "step": 4145 }, { "epoch": 0.46, "grad_norm": 0.41913255539775846, "learning_rate": 2.3845971373065452e-05, "loss": 0.0684, "step": 4146 }, { "epoch": 0.46, "grad_norm": 0.32751998299865487, "learning_rate": 2.3838989790606238e-05, "loss": 0.0652, "step": 4147 }, { "epoch": 0.46, "grad_norm": 0.39188418570769346, "learning_rate": 2.3832007722412934e-05, "loss": 0.0811, "step": 4148 }, { "epoch": 0.46, "grad_norm": 0.3928881886835845, "learning_rate": 2.3825025169368966e-05, "loss": 0.0761, "step": 4149 }, { "epoch": 0.46, "grad_norm": 0.39091810673852523, "learning_rate": 2.3818042132357812e-05, "loss": 0.0641, "step": 4150 }, { "epoch": 0.46, "grad_norm": 0.34110191538146306, "learning_rate": 2.381105861226301e-05, "loss": 0.0721, "step": 4151 }, { "epoch": 0.46, "grad_norm": 0.4511295645555022, "learning_rate": 2.3804074609968158e-05, "loss": 0.0822, "step": 4152 }, { "epoch": 0.46, "grad_norm": 0.3163040372797685, "learning_rate": 2.3797090126356928e-05, "loss": 0.0676, "step": 4153 }, { "epoch": 0.46, "grad_norm": 0.4131503043742734, "learning_rate": 2.3790105162313032e-05, "loss": 0.0712, "step": 4154 }, { "epoch": 0.46, "grad_norm": 0.39081183733655933, "learning_rate": 2.3783119718720257e-05, "loss": 0.0654, "step": 4155 }, { "epoch": 0.46, "grad_norm": 0.43488972101846396, "learning_rate": 2.3776133796462446e-05, "loss": 0.0754, "step": 4156 }, { "epoch": 0.46, "grad_norm": 0.3792544662386112, "learning_rate": 2.376914739642351e-05, "loss": 0.0697, "step": 4157 }, { "epoch": 0.46, "grad_norm": 0.48525745239313234, "learning_rate": 2.3762160519487402e-05, "loss": 0.1131, "step": 4158 }, { "epoch": 0.46, "grad_norm": 0.39531456264515613, "learning_rate": 2.375517316653816e-05, "loss": 0.0773, "step": 4159 }, { "epoch": 0.46, "grad_norm": 0.3666997876524798, "learning_rate": 2.3748185338459847e-05, "loss": 0.078, "step": 4160 }, { "epoch": 0.46, "grad_norm": 0.3415948870417214, "learning_rate": 2.3741197036136642e-05, "loss": 0.089, "step": 4161 }, { "epoch": 0.46, "grad_norm": 0.33878134044079133, "learning_rate": 2.3734208260452727e-05, "loss": 0.062, "step": 4162 }, { "epoch": 0.46, "grad_norm": 0.4572641154384413, "learning_rate": 2.3727219012292377e-05, "loss": 0.0985, "step": 4163 }, { "epoch": 0.46, "grad_norm": 0.549819295631519, "learning_rate": 2.372022929253991e-05, "loss": 0.1263, "step": 4164 }, { "epoch": 0.46, "grad_norm": 0.3773641766376178, "learning_rate": 2.371323910207972e-05, "loss": 0.068, "step": 4165 }, { "epoch": 0.46, "grad_norm": 0.4302853673111398, "learning_rate": 2.3706248441796246e-05, "loss": 0.1107, "step": 4166 }, { "epoch": 0.46, "grad_norm": 0.38086676957214566, "learning_rate": 2.369925731257399e-05, "loss": 0.0964, "step": 4167 }, { "epoch": 0.46, "grad_norm": 0.4197541900105286, "learning_rate": 2.369226571529752e-05, "loss": 0.106, "step": 4168 }, { "epoch": 0.46, "grad_norm": 0.34064601605878425, "learning_rate": 2.3685273650851458e-05, "loss": 0.076, "step": 4169 }, { "epoch": 0.46, "grad_norm": 0.3711951067187312, "learning_rate": 2.3678281120120485e-05, "loss": 0.0745, "step": 4170 }, { "epoch": 0.46, "grad_norm": 0.3664991338324102, "learning_rate": 2.3671288123989342e-05, "loss": 0.0483, "step": 4171 }, { "epoch": 0.46, "grad_norm": 0.28306199865450504, "learning_rate": 2.366429466334283e-05, "loss": 0.0641, "step": 4172 }, { "epoch": 0.46, "grad_norm": 0.42092622811249925, "learning_rate": 2.36573007390658e-05, "loss": 0.0798, "step": 4173 }, { "epoch": 0.46, "grad_norm": 0.34973236137123703, "learning_rate": 2.3650306352043182e-05, "loss": 0.081, "step": 4174 }, { "epoch": 0.46, "grad_norm": 0.3989890339080592, "learning_rate": 2.3643311503159937e-05, "loss": 0.075, "step": 4175 }, { "epoch": 0.46, "grad_norm": 0.3480454527272493, "learning_rate": 2.3636316193301107e-05, "loss": 0.0541, "step": 4176 }, { "epoch": 0.46, "grad_norm": 0.5366767389327688, "learning_rate": 2.362932042335178e-05, "loss": 0.0987, "step": 4177 }, { "epoch": 0.46, "grad_norm": 0.3581306226065945, "learning_rate": 2.3622324194197118e-05, "loss": 0.0718, "step": 4178 }, { "epoch": 0.46, "grad_norm": 0.43685258919750963, "learning_rate": 2.3615327506722317e-05, "loss": 0.099, "step": 4179 }, { "epoch": 0.46, "grad_norm": 0.2597478272500462, "learning_rate": 2.3608330361812652e-05, "loss": 0.053, "step": 4180 }, { "epoch": 0.46, "grad_norm": 0.3593804317561444, "learning_rate": 2.3601332760353438e-05, "loss": 0.0572, "step": 4181 }, { "epoch": 0.46, "grad_norm": 0.3173307170661564, "learning_rate": 2.3594334703230065e-05, "loss": 0.0737, "step": 4182 }, { "epoch": 0.46, "grad_norm": 0.39167436793378535, "learning_rate": 2.358733619132797e-05, "loss": 0.0723, "step": 4183 }, { "epoch": 0.46, "grad_norm": 0.42147820850913487, "learning_rate": 2.3580337225532663e-05, "loss": 0.0848, "step": 4184 }, { "epoch": 0.46, "grad_norm": 0.41178175934471073, "learning_rate": 2.357333780672967e-05, "loss": 0.0768, "step": 4185 }, { "epoch": 0.46, "grad_norm": 0.42869854607194285, "learning_rate": 2.356633793580463e-05, "loss": 0.083, "step": 4186 }, { "epoch": 0.46, "grad_norm": 0.34809470656133645, "learning_rate": 2.3559337613643198e-05, "loss": 0.0829, "step": 4187 }, { "epoch": 0.46, "grad_norm": 0.3671463273685135, "learning_rate": 2.355233684113111e-05, "loss": 0.0823, "step": 4188 }, { "epoch": 0.46, "grad_norm": 0.30917442592016336, "learning_rate": 2.354533561915414e-05, "loss": 0.0694, "step": 4189 }, { "epoch": 0.46, "grad_norm": 0.4699587178869484, "learning_rate": 2.3538333948598142e-05, "loss": 0.0782, "step": 4190 }, { "epoch": 0.46, "grad_norm": 0.41889348632081136, "learning_rate": 2.3531331830348994e-05, "loss": 0.096, "step": 4191 }, { "epoch": 0.46, "grad_norm": 0.5880135442542722, "learning_rate": 2.3524329265292668e-05, "loss": 0.1454, "step": 4192 }, { "epoch": 0.46, "grad_norm": 0.45799279747530225, "learning_rate": 2.351732625431516e-05, "loss": 0.0873, "step": 4193 }, { "epoch": 0.46, "grad_norm": 0.37676002814243087, "learning_rate": 2.3510322798302553e-05, "loss": 0.0923, "step": 4194 }, { "epoch": 0.46, "grad_norm": 0.3287243364927219, "learning_rate": 2.3503318898140952e-05, "loss": 0.0751, "step": 4195 }, { "epoch": 0.46, "grad_norm": 0.3115377871278743, "learning_rate": 2.3496314554716543e-05, "loss": 0.0506, "step": 4196 }, { "epoch": 0.46, "grad_norm": 0.4253234151635005, "learning_rate": 2.3489309768915564e-05, "loss": 0.099, "step": 4197 }, { "epoch": 0.46, "grad_norm": 0.33258154650687444, "learning_rate": 2.348230454162431e-05, "loss": 0.066, "step": 4198 }, { "epoch": 0.46, "grad_norm": 0.3094640964757944, "learning_rate": 2.347529887372912e-05, "loss": 0.0755, "step": 4199 }, { "epoch": 0.46, "grad_norm": 0.40265021642709675, "learning_rate": 2.34682927661164e-05, "loss": 0.067, "step": 4200 }, { "epoch": 0.46, "grad_norm": 0.3810043059203361, "learning_rate": 2.346128621967261e-05, "loss": 0.0506, "step": 4201 }, { "epoch": 0.46, "grad_norm": 0.27806708879514996, "learning_rate": 2.3454279235284264e-05, "loss": 0.069, "step": 4202 }, { "epoch": 0.46, "grad_norm": 0.2994116527363354, "learning_rate": 2.3447271813837928e-05, "loss": 0.0531, "step": 4203 }, { "epoch": 0.46, "grad_norm": 0.32228744895327155, "learning_rate": 2.344026395622023e-05, "loss": 0.055, "step": 4204 }, { "epoch": 0.46, "grad_norm": 0.44270619410577666, "learning_rate": 2.3433255663317845e-05, "loss": 0.0755, "step": 4205 }, { "epoch": 0.46, "grad_norm": 0.460650500157179, "learning_rate": 2.3426246936017514e-05, "loss": 0.1092, "step": 4206 }, { "epoch": 0.46, "grad_norm": 0.3341713209236365, "learning_rate": 2.3419237775206026e-05, "loss": 0.0692, "step": 4207 }, { "epoch": 0.46, "grad_norm": 0.3803600069714685, "learning_rate": 2.3412228181770224e-05, "loss": 0.0747, "step": 4208 }, { "epoch": 0.46, "grad_norm": 0.32009740911505996, "learning_rate": 2.3405218156597012e-05, "loss": 0.073, "step": 4209 }, { "epoch": 0.46, "grad_norm": 0.344758796805806, "learning_rate": 2.3398207700573336e-05, "loss": 0.0767, "step": 4210 }, { "epoch": 0.46, "grad_norm": 0.3060500294927936, "learning_rate": 2.3391196814586215e-05, "loss": 0.0746, "step": 4211 }, { "epoch": 0.46, "grad_norm": 0.34794130318990363, "learning_rate": 2.3384185499522696e-05, "loss": 0.0913, "step": 4212 }, { "epoch": 0.46, "grad_norm": 0.3214541072200943, "learning_rate": 2.3377173756269914e-05, "loss": 0.0603, "step": 4213 }, { "epoch": 0.46, "grad_norm": 0.3397570861283747, "learning_rate": 2.337016158571503e-05, "loss": 0.0605, "step": 4214 }, { "epoch": 0.46, "grad_norm": 0.3270237813279916, "learning_rate": 2.3363148988745277e-05, "loss": 0.047, "step": 4215 }, { "epoch": 0.46, "grad_norm": 0.46267321667903893, "learning_rate": 2.335613596624793e-05, "loss": 0.1162, "step": 4216 }, { "epoch": 0.46, "grad_norm": 0.28229137851142017, "learning_rate": 2.334912251911032e-05, "loss": 0.0607, "step": 4217 }, { "epoch": 0.46, "grad_norm": 0.3400327880210369, "learning_rate": 2.334210864821984e-05, "loss": 0.0589, "step": 4218 }, { "epoch": 0.46, "grad_norm": 0.32967501069769883, "learning_rate": 2.3335094354463932e-05, "loss": 0.069, "step": 4219 }, { "epoch": 0.46, "grad_norm": 0.4260663818925664, "learning_rate": 2.3328079638730073e-05, "loss": 0.0998, "step": 4220 }, { "epoch": 0.46, "grad_norm": 0.43401975581915014, "learning_rate": 2.3321064501905837e-05, "loss": 0.0746, "step": 4221 }, { "epoch": 0.46, "grad_norm": 0.32703676737629755, "learning_rate": 2.3314048944878804e-05, "loss": 0.0728, "step": 4222 }, { "epoch": 0.46, "grad_norm": 0.5122762366017956, "learning_rate": 2.3307032968536642e-05, "loss": 0.1224, "step": 4223 }, { "epoch": 0.46, "grad_norm": 0.3322454322690402, "learning_rate": 2.330001657376705e-05, "loss": 0.0733, "step": 4224 }, { "epoch": 0.46, "grad_norm": 0.29419388510503663, "learning_rate": 2.3292999761457794e-05, "loss": 0.0617, "step": 4225 }, { "epoch": 0.46, "grad_norm": 0.4531682421341326, "learning_rate": 2.3285982532496676e-05, "loss": 0.1073, "step": 4226 }, { "epoch": 0.46, "grad_norm": 0.30350394109872575, "learning_rate": 2.3278964887771576e-05, "loss": 0.0549, "step": 4227 }, { "epoch": 0.46, "grad_norm": 0.3078286186103161, "learning_rate": 2.32719468281704e-05, "loss": 0.0812, "step": 4228 }, { "epoch": 0.46, "grad_norm": 0.518183458571294, "learning_rate": 2.3264928354581126e-05, "loss": 0.1055, "step": 4229 }, { "epoch": 0.46, "grad_norm": 0.37898957427639357, "learning_rate": 2.325790946789178e-05, "loss": 0.0923, "step": 4230 }, { "epoch": 0.46, "grad_norm": 0.36397027742686516, "learning_rate": 2.3250890168990426e-05, "loss": 0.0595, "step": 4231 }, { "epoch": 0.46, "grad_norm": 0.3852768807758676, "learning_rate": 2.32438704587652e-05, "loss": 0.0695, "step": 4232 }, { "epoch": 0.46, "grad_norm": 0.3622624251812756, "learning_rate": 2.323685033810428e-05, "loss": 0.05, "step": 4233 }, { "epoch": 0.46, "grad_norm": 0.3582332344670945, "learning_rate": 2.3229829807895904e-05, "loss": 0.0845, "step": 4234 }, { "epoch": 0.47, "grad_norm": 0.3058011658527163, "learning_rate": 2.3222808869028342e-05, "loss": 0.054, "step": 4235 }, { "epoch": 0.47, "grad_norm": 0.4245133025440607, "learning_rate": 2.3215787522389935e-05, "loss": 0.0772, "step": 4236 }, { "epoch": 0.47, "grad_norm": 0.35221508705049276, "learning_rate": 2.3208765768869074e-05, "loss": 0.0653, "step": 4237 }, { "epoch": 0.47, "grad_norm": 0.42820459586567544, "learning_rate": 2.3201743609354187e-05, "loss": 0.0863, "step": 4238 }, { "epoch": 0.47, "grad_norm": 0.318704675275583, "learning_rate": 2.3194721044733776e-05, "loss": 0.0633, "step": 4239 }, { "epoch": 0.47, "grad_norm": 0.3090389637523935, "learning_rate": 2.3187698075896378e-05, "loss": 0.0504, "step": 4240 }, { "epoch": 0.47, "grad_norm": 0.4908221850831954, "learning_rate": 2.318067470373058e-05, "loss": 0.099, "step": 4241 }, { "epoch": 0.47, "grad_norm": 0.4647447176033386, "learning_rate": 2.317365092912503e-05, "loss": 0.0786, "step": 4242 }, { "epoch": 0.47, "grad_norm": 0.38551470652283654, "learning_rate": 2.3166626752968418e-05, "loss": 0.0746, "step": 4243 }, { "epoch": 0.47, "grad_norm": 0.5598766560190404, "learning_rate": 2.3159602176149493e-05, "loss": 0.1262, "step": 4244 }, { "epoch": 0.47, "grad_norm": 0.4753533015649203, "learning_rate": 2.3152577199557045e-05, "loss": 0.1013, "step": 4245 }, { "epoch": 0.47, "grad_norm": 0.4116600805533102, "learning_rate": 2.314555182407992e-05, "loss": 0.0654, "step": 4246 }, { "epoch": 0.47, "grad_norm": 0.34096132875242663, "learning_rate": 2.313852605060702e-05, "loss": 0.0706, "step": 4247 }, { "epoch": 0.47, "grad_norm": 0.4382657094230001, "learning_rate": 2.3131499880027294e-05, "loss": 0.0821, "step": 4248 }, { "epoch": 0.47, "grad_norm": 0.44984770157970866, "learning_rate": 2.3124473313229726e-05, "loss": 0.0781, "step": 4249 }, { "epoch": 0.47, "grad_norm": 0.4121756116238622, "learning_rate": 2.311744635110338e-05, "loss": 0.0961, "step": 4250 }, { "epoch": 0.47, "grad_norm": 0.37420217760060026, "learning_rate": 2.3110418994537332e-05, "loss": 0.0733, "step": 4251 }, { "epoch": 0.47, "grad_norm": 0.3456929954294772, "learning_rate": 2.3103391244420754e-05, "loss": 0.0766, "step": 4252 }, { "epoch": 0.47, "grad_norm": 0.38187667746417386, "learning_rate": 2.3096363101642822e-05, "loss": 0.0679, "step": 4253 }, { "epoch": 0.47, "grad_norm": 0.4710642272403325, "learning_rate": 2.30893345670928e-05, "loss": 0.1012, "step": 4254 }, { "epoch": 0.47, "grad_norm": 0.3570202932199923, "learning_rate": 2.3082305641659968e-05, "loss": 0.0992, "step": 4255 }, { "epoch": 0.47, "grad_norm": 0.33272146403693625, "learning_rate": 2.3075276326233676e-05, "loss": 0.0793, "step": 4256 }, { "epoch": 0.47, "grad_norm": 0.32790787643048164, "learning_rate": 2.3068246621703325e-05, "loss": 0.0676, "step": 4257 }, { "epoch": 0.47, "grad_norm": 0.33956588698350965, "learning_rate": 2.306121652895836e-05, "loss": 0.0734, "step": 4258 }, { "epoch": 0.47, "grad_norm": 0.3716345610814525, "learning_rate": 2.3054186048888264e-05, "loss": 0.0866, "step": 4259 }, { "epoch": 0.47, "grad_norm": 0.3037221115362737, "learning_rate": 2.3047155182382584e-05, "loss": 0.0565, "step": 4260 }, { "epoch": 0.47, "grad_norm": 0.3006909594078138, "learning_rate": 2.304012393033092e-05, "loss": 0.0553, "step": 4261 }, { "epoch": 0.47, "grad_norm": 0.33139467363943415, "learning_rate": 2.3033092293622903e-05, "loss": 0.0739, "step": 4262 }, { "epoch": 0.47, "grad_norm": 0.37087028199689787, "learning_rate": 2.3026060273148217e-05, "loss": 0.0758, "step": 4263 }, { "epoch": 0.47, "grad_norm": 0.31561170115638904, "learning_rate": 2.3019027869796607e-05, "loss": 0.0632, "step": 4264 }, { "epoch": 0.47, "grad_norm": 0.45581582653255054, "learning_rate": 2.301199508445786e-05, "loss": 0.0874, "step": 4265 }, { "epoch": 0.47, "grad_norm": 0.28860069450362924, "learning_rate": 2.3004961918021804e-05, "loss": 0.0534, "step": 4266 }, { "epoch": 0.47, "grad_norm": 0.2994410393811674, "learning_rate": 2.299792837137833e-05, "loss": 0.0632, "step": 4267 }, { "epoch": 0.47, "grad_norm": 0.398660597330533, "learning_rate": 2.2990894445417355e-05, "loss": 0.0965, "step": 4268 }, { "epoch": 0.47, "grad_norm": 0.3990051676133505, "learning_rate": 2.2983860141028874e-05, "loss": 0.0733, "step": 4269 }, { "epoch": 0.47, "grad_norm": 0.3596882491514092, "learning_rate": 2.2976825459102898e-05, "loss": 0.0704, "step": 4270 }, { "epoch": 0.47, "grad_norm": 0.4121888793621453, "learning_rate": 2.2969790400529508e-05, "loss": 0.0776, "step": 4271 }, { "epoch": 0.47, "grad_norm": 0.42568210882464785, "learning_rate": 2.2962754966198815e-05, "loss": 0.0996, "step": 4272 }, { "epoch": 0.47, "grad_norm": 0.3176280321459141, "learning_rate": 2.295571915700101e-05, "loss": 0.0737, "step": 4273 }, { "epoch": 0.47, "grad_norm": 0.3501518345919991, "learning_rate": 2.2948682973826292e-05, "loss": 0.0796, "step": 4274 }, { "epoch": 0.47, "grad_norm": 0.39159007067478147, "learning_rate": 2.2941646417564932e-05, "loss": 0.0534, "step": 4275 }, { "epoch": 0.47, "grad_norm": 0.35058927401599344, "learning_rate": 2.2934609489107236e-05, "loss": 0.0786, "step": 4276 }, { "epoch": 0.47, "grad_norm": 0.3067731267306875, "learning_rate": 2.292757218934357e-05, "loss": 0.0714, "step": 4277 }, { "epoch": 0.47, "grad_norm": 0.42958478566925395, "learning_rate": 2.292053451916433e-05, "loss": 0.0846, "step": 4278 }, { "epoch": 0.47, "grad_norm": 0.43159341338176777, "learning_rate": 2.2913496479459977e-05, "loss": 0.0684, "step": 4279 }, { "epoch": 0.47, "grad_norm": 0.35833149214574656, "learning_rate": 2.2906458071121e-05, "loss": 0.0607, "step": 4280 }, { "epoch": 0.47, "grad_norm": 0.3847828391624839, "learning_rate": 2.2899419295037952e-05, "loss": 0.061, "step": 4281 }, { "epoch": 0.47, "grad_norm": 0.36272176092506414, "learning_rate": 2.289238015210142e-05, "loss": 0.0535, "step": 4282 }, { "epoch": 0.47, "grad_norm": 0.40597251438606413, "learning_rate": 2.2885340643202053e-05, "loss": 0.0589, "step": 4283 }, { "epoch": 0.47, "grad_norm": 0.3679154667271321, "learning_rate": 2.2878300769230522e-05, "loss": 0.0598, "step": 4284 }, { "epoch": 0.47, "grad_norm": 0.37538474928815474, "learning_rate": 2.2871260531077565e-05, "loss": 0.0733, "step": 4285 }, { "epoch": 0.47, "grad_norm": 0.41514535776938283, "learning_rate": 2.2864219929633956e-05, "loss": 0.0816, "step": 4286 }, { "epoch": 0.47, "grad_norm": 0.4352916945053992, "learning_rate": 2.2857178965790522e-05, "loss": 0.0849, "step": 4287 }, { "epoch": 0.47, "grad_norm": 0.3696158040192482, "learning_rate": 2.2850137640438126e-05, "loss": 0.0925, "step": 4288 }, { "epoch": 0.47, "grad_norm": 0.33078205506772335, "learning_rate": 2.2843095954467684e-05, "loss": 0.0544, "step": 4289 }, { "epoch": 0.47, "grad_norm": 0.43250343773418265, "learning_rate": 2.2836053908770165e-05, "loss": 0.1071, "step": 4290 }, { "epoch": 0.47, "grad_norm": 0.4032859695258915, "learning_rate": 2.2829011504236563e-05, "loss": 0.0884, "step": 4291 }, { "epoch": 0.47, "grad_norm": 0.4340043229017442, "learning_rate": 2.2821968741757935e-05, "loss": 0.0741, "step": 4292 }, { "epoch": 0.47, "grad_norm": 0.3598764901085625, "learning_rate": 2.281492562222537e-05, "loss": 0.084, "step": 4293 }, { "epoch": 0.47, "grad_norm": 0.34655353238720704, "learning_rate": 2.280788214653003e-05, "loss": 0.0785, "step": 4294 }, { "epoch": 0.47, "grad_norm": 0.3143926645287379, "learning_rate": 2.2800838315563074e-05, "loss": 0.0692, "step": 4295 }, { "epoch": 0.47, "grad_norm": 0.3617421021143069, "learning_rate": 2.2793794130215753e-05, "loss": 0.0624, "step": 4296 }, { "epoch": 0.47, "grad_norm": 0.3426628042934496, "learning_rate": 2.2786749591379333e-05, "loss": 0.0765, "step": 4297 }, { "epoch": 0.47, "grad_norm": 0.3368312125685827, "learning_rate": 2.2779704699945136e-05, "loss": 0.0756, "step": 4298 }, { "epoch": 0.47, "grad_norm": 0.29035318748566485, "learning_rate": 2.2772659456804537e-05, "loss": 0.0661, "step": 4299 }, { "epoch": 0.47, "grad_norm": 0.3398279866037583, "learning_rate": 2.2765613862848936e-05, "loss": 0.0593, "step": 4300 }, { "epoch": 0.47, "grad_norm": 0.339979813183332, "learning_rate": 2.2758567918969794e-05, "loss": 0.0648, "step": 4301 }, { "epoch": 0.47, "grad_norm": 0.36565660356400675, "learning_rate": 2.2751521626058607e-05, "loss": 0.0901, "step": 4302 }, { "epoch": 0.47, "grad_norm": 0.3442851149712656, "learning_rate": 2.2744474985006918e-05, "loss": 0.0693, "step": 4303 }, { "epoch": 0.47, "grad_norm": 0.3941907262952854, "learning_rate": 2.2737427996706316e-05, "loss": 0.0646, "step": 4304 }, { "epoch": 0.47, "grad_norm": 0.3059710463191362, "learning_rate": 2.2730380662048433e-05, "loss": 0.0482, "step": 4305 }, { "epoch": 0.47, "grad_norm": 0.3449319762395715, "learning_rate": 2.2723332981924937e-05, "loss": 0.064, "step": 4306 }, { "epoch": 0.47, "grad_norm": 0.37771199977685216, "learning_rate": 2.2716284957227547e-05, "loss": 0.0727, "step": 4307 }, { "epoch": 0.47, "grad_norm": 0.338424789598626, "learning_rate": 2.2709236588848036e-05, "loss": 0.0472, "step": 4308 }, { "epoch": 0.47, "grad_norm": 0.3650090139399516, "learning_rate": 2.2702187877678198e-05, "loss": 0.0838, "step": 4309 }, { "epoch": 0.47, "grad_norm": 0.4490433060082027, "learning_rate": 2.269513882460989e-05, "loss": 0.1151, "step": 4310 }, { "epoch": 0.47, "grad_norm": 0.3573037040766145, "learning_rate": 2.2688089430535003e-05, "loss": 0.0535, "step": 4311 }, { "epoch": 0.47, "grad_norm": 0.3710508422138282, "learning_rate": 2.268103969634547e-05, "loss": 0.0719, "step": 4312 }, { "epoch": 0.47, "grad_norm": 0.4024471779631351, "learning_rate": 2.267398962293327e-05, "loss": 0.085, "step": 4313 }, { "epoch": 0.47, "grad_norm": 0.33924127207894916, "learning_rate": 2.266693921119042e-05, "loss": 0.0636, "step": 4314 }, { "epoch": 0.47, "grad_norm": 0.4911987121152769, "learning_rate": 2.2659888462008988e-05, "loss": 0.0999, "step": 4315 }, { "epoch": 0.47, "grad_norm": 0.5085896633939151, "learning_rate": 2.2652837376281087e-05, "loss": 0.1133, "step": 4316 }, { "epoch": 0.47, "grad_norm": 0.3126775952191896, "learning_rate": 2.2645785954898858e-05, "loss": 0.0646, "step": 4317 }, { "epoch": 0.47, "grad_norm": 0.352744298987547, "learning_rate": 2.2638734198754496e-05, "loss": 0.0724, "step": 4318 }, { "epoch": 0.47, "grad_norm": 0.3695248819008147, "learning_rate": 2.2631682108740236e-05, "loss": 0.0616, "step": 4319 }, { "epoch": 0.47, "grad_norm": 0.31011539449580666, "learning_rate": 2.2624629685748353e-05, "loss": 0.0562, "step": 4320 }, { "epoch": 0.47, "grad_norm": 0.2795273023681627, "learning_rate": 2.2617576930671165e-05, "loss": 0.0753, "step": 4321 }, { "epoch": 0.47, "grad_norm": 0.44186316953196797, "learning_rate": 2.261052384440104e-05, "loss": 0.0826, "step": 4322 }, { "epoch": 0.47, "grad_norm": 0.4266623907539318, "learning_rate": 2.2603470427830364e-05, "loss": 0.0756, "step": 4323 }, { "epoch": 0.47, "grad_norm": 0.3183496859895677, "learning_rate": 2.2596416681851595e-05, "loss": 0.0607, "step": 4324 }, { "epoch": 0.47, "grad_norm": 0.3967897338585108, "learning_rate": 2.258936260735722e-05, "loss": 0.0694, "step": 4325 }, { "epoch": 0.48, "grad_norm": 0.41219262100404985, "learning_rate": 2.2582308205239757e-05, "loss": 0.0886, "step": 4326 }, { "epoch": 0.48, "grad_norm": 0.3138468447954937, "learning_rate": 2.257525347639179e-05, "loss": 0.0514, "step": 4327 }, { "epoch": 0.48, "grad_norm": 0.3353097950736494, "learning_rate": 2.256819842170591e-05, "loss": 0.0598, "step": 4328 }, { "epoch": 0.48, "grad_norm": 0.34194531078617957, "learning_rate": 2.2561143042074784e-05, "loss": 0.0623, "step": 4329 }, { "epoch": 0.48, "grad_norm": 0.3495438050738101, "learning_rate": 2.2554087338391098e-05, "loss": 0.0815, "step": 4330 }, { "epoch": 0.48, "grad_norm": 0.44620250972792613, "learning_rate": 2.254703131154759e-05, "loss": 0.0824, "step": 4331 }, { "epoch": 0.48, "grad_norm": 0.4488103917854197, "learning_rate": 2.2539974962437022e-05, "loss": 0.1149, "step": 4332 }, { "epoch": 0.48, "grad_norm": 0.4020510228999266, "learning_rate": 2.2532918291952228e-05, "loss": 0.0702, "step": 4333 }, { "epoch": 0.48, "grad_norm": 0.416747009955414, "learning_rate": 2.252586130098605e-05, "loss": 0.0928, "step": 4334 }, { "epoch": 0.48, "grad_norm": 0.3172327126849917, "learning_rate": 2.2518803990431394e-05, "loss": 0.0633, "step": 4335 }, { "epoch": 0.48, "grad_norm": 0.36649567787697573, "learning_rate": 2.251174636118119e-05, "loss": 0.0653, "step": 4336 }, { "epoch": 0.48, "grad_norm": 0.3008968948390082, "learning_rate": 2.2504688414128423e-05, "loss": 0.0494, "step": 4337 }, { "epoch": 0.48, "grad_norm": 0.3460202567470304, "learning_rate": 2.2497630150166102e-05, "loss": 0.0867, "step": 4338 }, { "epoch": 0.48, "grad_norm": 0.35697108122758936, "learning_rate": 2.249057157018729e-05, "loss": 0.0878, "step": 4339 }, { "epoch": 0.48, "grad_norm": 0.298349832970042, "learning_rate": 2.2483512675085085e-05, "loss": 0.0518, "step": 4340 }, { "epoch": 0.48, "grad_norm": 0.28430174470920294, "learning_rate": 2.2476453465752615e-05, "loss": 0.0409, "step": 4341 }, { "epoch": 0.48, "grad_norm": 0.3674662728954839, "learning_rate": 2.2469393943083068e-05, "loss": 0.0766, "step": 4342 }, { "epoch": 0.48, "grad_norm": 0.3394765185162822, "learning_rate": 2.2462334107969667e-05, "loss": 0.0676, "step": 4343 }, { "epoch": 0.48, "grad_norm": 0.38048047511776334, "learning_rate": 2.245527396130565e-05, "loss": 0.0777, "step": 4344 }, { "epoch": 0.48, "grad_norm": 0.38107137403999786, "learning_rate": 2.2448213503984328e-05, "loss": 0.1032, "step": 4345 }, { "epoch": 0.48, "grad_norm": 0.3902662346314301, "learning_rate": 2.2441152736899026e-05, "loss": 0.0718, "step": 4346 }, { "epoch": 0.48, "grad_norm": 0.35041807188386576, "learning_rate": 2.243409166094313e-05, "loss": 0.0527, "step": 4347 }, { "epoch": 0.48, "grad_norm": 0.46102904364394653, "learning_rate": 2.242703027701004e-05, "loss": 0.0978, "step": 4348 }, { "epoch": 0.48, "grad_norm": 0.32414640056018557, "learning_rate": 2.241996858599322e-05, "loss": 0.0705, "step": 4349 }, { "epoch": 0.48, "grad_norm": 0.2800981059588493, "learning_rate": 2.2412906588786147e-05, "loss": 0.0558, "step": 4350 }, { "epoch": 0.48, "grad_norm": 0.39614114797534544, "learning_rate": 2.240584428628237e-05, "loss": 0.0913, "step": 4351 }, { "epoch": 0.48, "grad_norm": 0.500656531361286, "learning_rate": 2.2398781679375445e-05, "loss": 0.1052, "step": 4352 }, { "epoch": 0.48, "grad_norm": 0.35953443556480524, "learning_rate": 2.239171876895898e-05, "loss": 0.0779, "step": 4353 }, { "epoch": 0.48, "grad_norm": 0.3983575142253801, "learning_rate": 2.2384655555926625e-05, "loss": 0.0998, "step": 4354 }, { "epoch": 0.48, "grad_norm": 0.3651771296553642, "learning_rate": 2.237759204117206e-05, "loss": 0.0674, "step": 4355 }, { "epoch": 0.48, "grad_norm": 0.344367383190738, "learning_rate": 2.237052822558901e-05, "loss": 0.0839, "step": 4356 }, { "epoch": 0.48, "grad_norm": 0.38632957064798795, "learning_rate": 2.236346411007124e-05, "loss": 0.0874, "step": 4357 }, { "epoch": 0.48, "grad_norm": 0.29366639043096426, "learning_rate": 2.235639969551253e-05, "loss": 0.0522, "step": 4358 }, { "epoch": 0.48, "grad_norm": 0.3648458648228259, "learning_rate": 2.234933498280673e-05, "loss": 0.0673, "step": 4359 }, { "epoch": 0.48, "grad_norm": 0.3266488887286235, "learning_rate": 2.2342269972847718e-05, "loss": 0.0689, "step": 4360 }, { "epoch": 0.48, "grad_norm": 0.371242352898185, "learning_rate": 2.2335204666529396e-05, "loss": 0.0664, "step": 4361 }, { "epoch": 0.48, "grad_norm": 0.3198542689991614, "learning_rate": 2.232813906474572e-05, "loss": 0.0521, "step": 4362 }, { "epoch": 0.48, "grad_norm": 0.3903140165301045, "learning_rate": 2.2321073168390663e-05, "loss": 0.0626, "step": 4363 }, { "epoch": 0.48, "grad_norm": 0.3364296346811924, "learning_rate": 2.2314006978358263e-05, "loss": 0.0545, "step": 4364 }, { "epoch": 0.48, "grad_norm": 0.4239453975319239, "learning_rate": 2.2306940495542576e-05, "loss": 0.0626, "step": 4365 }, { "epoch": 0.48, "grad_norm": 0.3996125639697796, "learning_rate": 2.2299873720837692e-05, "loss": 0.0561, "step": 4366 }, { "epoch": 0.48, "grad_norm": 0.37965688385778257, "learning_rate": 2.2292806655137752e-05, "loss": 0.0762, "step": 4367 }, { "epoch": 0.48, "grad_norm": 0.3320122769488699, "learning_rate": 2.2285739299336933e-05, "loss": 0.0457, "step": 4368 }, { "epoch": 0.48, "grad_norm": 0.27999074150003067, "learning_rate": 2.227867165432943e-05, "loss": 0.0624, "step": 4369 }, { "epoch": 0.48, "grad_norm": 0.39799841828247606, "learning_rate": 2.22716037210095e-05, "loss": 0.074, "step": 4370 }, { "epoch": 0.48, "grad_norm": 0.36179208609406466, "learning_rate": 2.2264535500271417e-05, "loss": 0.0629, "step": 4371 }, { "epoch": 0.48, "grad_norm": 0.3608095726767989, "learning_rate": 2.2257466993009503e-05, "loss": 0.0832, "step": 4372 }, { "epoch": 0.48, "grad_norm": 0.39271884502774906, "learning_rate": 2.2250398200118104e-05, "loss": 0.0693, "step": 4373 }, { "epoch": 0.48, "grad_norm": 0.35879756313132233, "learning_rate": 2.2243329122491617e-05, "loss": 0.0724, "step": 4374 }, { "epoch": 0.48, "grad_norm": 0.40921051346281273, "learning_rate": 2.2236259761024467e-05, "loss": 0.0969, "step": 4375 }, { "epoch": 0.48, "grad_norm": 0.3875987703822033, "learning_rate": 2.222919011661111e-05, "loss": 0.0683, "step": 4376 }, { "epoch": 0.48, "grad_norm": 0.5225450109414287, "learning_rate": 2.222212019014605e-05, "loss": 0.0891, "step": 4377 }, { "epoch": 0.48, "grad_norm": 0.36738611490569284, "learning_rate": 2.2215049982523827e-05, "loss": 0.0544, "step": 4378 }, { "epoch": 0.48, "grad_norm": 0.44154949733328414, "learning_rate": 2.2207979494638988e-05, "loss": 0.0829, "step": 4379 }, { "epoch": 0.48, "grad_norm": 0.4373328025677027, "learning_rate": 2.2200908727386167e-05, "loss": 0.1092, "step": 4380 }, { "epoch": 0.48, "grad_norm": 0.30308787718303515, "learning_rate": 2.2193837681659975e-05, "loss": 0.0621, "step": 4381 }, { "epoch": 0.48, "grad_norm": 0.33813915043270465, "learning_rate": 2.2186766358355106e-05, "loss": 0.0875, "step": 4382 }, { "epoch": 0.48, "grad_norm": 0.37176416973463605, "learning_rate": 2.2179694758366254e-05, "loss": 0.0757, "step": 4383 }, { "epoch": 0.48, "grad_norm": 0.33130705899813734, "learning_rate": 2.217262288258818e-05, "loss": 0.0692, "step": 4384 }, { "epoch": 0.48, "grad_norm": 0.350758013561006, "learning_rate": 2.2165550731915664e-05, "loss": 0.0862, "step": 4385 }, { "epoch": 0.48, "grad_norm": 0.4075060646799392, "learning_rate": 2.2158478307243507e-05, "loss": 0.0699, "step": 4386 }, { "epoch": 0.48, "grad_norm": 0.35962987237625565, "learning_rate": 2.215140560946656e-05, "loss": 0.0752, "step": 4387 }, { "epoch": 0.48, "grad_norm": 0.31553130978234917, "learning_rate": 2.2144332639479722e-05, "loss": 0.0583, "step": 4388 }, { "epoch": 0.48, "grad_norm": 0.38601402651423605, "learning_rate": 2.2137259398177903e-05, "loss": 0.0923, "step": 4389 }, { "epoch": 0.48, "grad_norm": 0.2764534428117698, "learning_rate": 2.213018588645605e-05, "loss": 0.0558, "step": 4390 }, { "epoch": 0.48, "grad_norm": 0.42265140650542365, "learning_rate": 2.2123112105209157e-05, "loss": 0.0728, "step": 4391 }, { "epoch": 0.48, "grad_norm": 0.4169266445992342, "learning_rate": 2.2116038055332238e-05, "loss": 0.0892, "step": 4392 }, { "epoch": 0.48, "grad_norm": 0.3182469151797622, "learning_rate": 2.2108963737720354e-05, "loss": 0.0607, "step": 4393 }, { "epoch": 0.48, "grad_norm": 0.4114832347465145, "learning_rate": 2.2101889153268595e-05, "loss": 0.0851, "step": 4394 }, { "epoch": 0.48, "grad_norm": 0.3590432377189472, "learning_rate": 2.209481430287208e-05, "loss": 0.0754, "step": 4395 }, { "epoch": 0.48, "grad_norm": 0.28551474989336967, "learning_rate": 2.2087739187425967e-05, "loss": 0.0629, "step": 4396 }, { "epoch": 0.48, "grad_norm": 0.37412489667815196, "learning_rate": 2.2080663807825448e-05, "loss": 0.0726, "step": 4397 }, { "epoch": 0.48, "grad_norm": 0.3922163021864411, "learning_rate": 2.2073588164965737e-05, "loss": 0.0615, "step": 4398 }, { "epoch": 0.48, "grad_norm": 0.33012003886442975, "learning_rate": 2.2066512259742107e-05, "loss": 0.0628, "step": 4399 }, { "epoch": 0.48, "grad_norm": 0.3737348234801594, "learning_rate": 2.205943609304983e-05, "loss": 0.068, "step": 4400 }, { "epoch": 0.48, "grad_norm": 0.2970666266920581, "learning_rate": 2.205235966578424e-05, "loss": 0.051, "step": 4401 }, { "epoch": 0.48, "grad_norm": 0.39051939490456916, "learning_rate": 2.2045282978840684e-05, "loss": 0.0646, "step": 4402 }, { "epoch": 0.48, "grad_norm": 0.26177338841156883, "learning_rate": 2.2038206033114565e-05, "loss": 0.045, "step": 4403 }, { "epoch": 0.48, "grad_norm": 0.3168128662212616, "learning_rate": 2.2031128829501293e-05, "loss": 0.053, "step": 4404 }, { "epoch": 0.48, "grad_norm": 0.36054623522138607, "learning_rate": 2.2024051368896324e-05, "loss": 0.0693, "step": 4405 }, { "epoch": 0.48, "grad_norm": 0.40205587810540727, "learning_rate": 2.2016973652195145e-05, "loss": 0.0977, "step": 4406 }, { "epoch": 0.48, "grad_norm": 0.4264672379091293, "learning_rate": 2.2009895680293282e-05, "loss": 0.0817, "step": 4407 }, { "epoch": 0.48, "grad_norm": 0.3300946135606695, "learning_rate": 2.200281745408627e-05, "loss": 0.0702, "step": 4408 }, { "epoch": 0.48, "grad_norm": 0.37954877666084774, "learning_rate": 2.1995738974469712e-05, "loss": 0.0771, "step": 4409 }, { "epoch": 0.48, "grad_norm": 0.27303177842303766, "learning_rate": 2.1988660242339205e-05, "loss": 0.0491, "step": 4410 }, { "epoch": 0.48, "grad_norm": 0.41433953068684676, "learning_rate": 2.198158125859041e-05, "loss": 0.0786, "step": 4411 }, { "epoch": 0.48, "grad_norm": 0.27899680657678466, "learning_rate": 2.1974502024119002e-05, "loss": 0.0431, "step": 4412 }, { "epoch": 0.48, "grad_norm": 0.39167731639235287, "learning_rate": 2.196742253982069e-05, "loss": 0.0813, "step": 4413 }, { "epoch": 0.48, "grad_norm": 0.6102844445518398, "learning_rate": 2.196034280659122e-05, "loss": 0.1344, "step": 4414 }, { "epoch": 0.48, "grad_norm": 0.3274576576175134, "learning_rate": 2.1953262825326357e-05, "loss": 0.0593, "step": 4415 }, { "epoch": 0.48, "grad_norm": 0.32475552304367616, "learning_rate": 2.1946182596921917e-05, "loss": 0.0623, "step": 4416 }, { "epoch": 0.49, "grad_norm": 0.4327028417935204, "learning_rate": 2.193910212227374e-05, "loss": 0.0878, "step": 4417 }, { "epoch": 0.49, "grad_norm": 0.35699358017275484, "learning_rate": 2.1932021402277682e-05, "loss": 0.0825, "step": 4418 }, { "epoch": 0.49, "grad_norm": 0.372314453125, "learning_rate": 2.1924940437829643e-05, "loss": 0.0811, "step": 4419 }, { "epoch": 0.49, "grad_norm": 0.33035941242852357, "learning_rate": 2.1917859229825565e-05, "loss": 0.0521, "step": 4420 }, { "epoch": 0.49, "grad_norm": 0.3699903044203544, "learning_rate": 2.19107777791614e-05, "loss": 0.0707, "step": 4421 }, { "epoch": 0.49, "grad_norm": 0.2854554161628383, "learning_rate": 2.1903696086733142e-05, "loss": 0.0717, "step": 4422 }, { "epoch": 0.49, "grad_norm": 0.3311293076220247, "learning_rate": 2.1896614153436804e-05, "loss": 0.0472, "step": 4423 }, { "epoch": 0.49, "grad_norm": 0.3148082362892187, "learning_rate": 2.188953198016846e-05, "loss": 0.066, "step": 4424 }, { "epoch": 0.49, "grad_norm": 0.3224210422173281, "learning_rate": 2.1882449567824165e-05, "loss": 0.049, "step": 4425 }, { "epoch": 0.49, "grad_norm": 0.3536928270191253, "learning_rate": 2.1875366917300057e-05, "loss": 0.0718, "step": 4426 }, { "epoch": 0.49, "grad_norm": 3.56370309960082, "learning_rate": 2.1868284029492256e-05, "loss": 0.1338, "step": 4427 }, { "epoch": 0.49, "grad_norm": 0.38858855614836, "learning_rate": 2.1861200905296952e-05, "loss": 0.0716, "step": 4428 }, { "epoch": 0.49, "grad_norm": 0.3697511176143363, "learning_rate": 2.1854117545610345e-05, "loss": 0.0739, "step": 4429 }, { "epoch": 0.49, "grad_norm": 0.42592013132642514, "learning_rate": 2.1847033951328673e-05, "loss": 0.0827, "step": 4430 }, { "epoch": 0.49, "grad_norm": 0.382245558713531, "learning_rate": 2.183995012334818e-05, "loss": 0.0574, "step": 4431 }, { "epoch": 0.49, "grad_norm": 0.26570928863133125, "learning_rate": 2.1832866062565183e-05, "loss": 0.053, "step": 4432 }, { "epoch": 0.49, "grad_norm": 0.3227790672663503, "learning_rate": 2.1825781769875986e-05, "loss": 0.0625, "step": 4433 }, { "epoch": 0.49, "grad_norm": 0.36195492377391036, "learning_rate": 2.1818697246176943e-05, "loss": 0.0705, "step": 4434 }, { "epoch": 0.49, "grad_norm": 0.3300240376416469, "learning_rate": 2.1811612492364436e-05, "loss": 0.0622, "step": 4435 }, { "epoch": 0.49, "grad_norm": 0.4537924750338406, "learning_rate": 2.1804527509334875e-05, "loss": 0.0832, "step": 4436 }, { "epoch": 0.49, "grad_norm": 0.3826781640528386, "learning_rate": 2.17974422979847e-05, "loss": 0.0693, "step": 4437 }, { "epoch": 0.49, "grad_norm": 0.3757246882960812, "learning_rate": 2.1790356859210378e-05, "loss": 0.0678, "step": 4438 }, { "epoch": 0.49, "grad_norm": 0.42329380088608826, "learning_rate": 2.17832711939084e-05, "loss": 0.0884, "step": 4439 }, { "epoch": 0.49, "grad_norm": 0.3207002247018268, "learning_rate": 2.17761853029753e-05, "loss": 0.0642, "step": 4440 }, { "epoch": 0.49, "grad_norm": 0.42221749496358135, "learning_rate": 2.1769099187307616e-05, "loss": 0.0606, "step": 4441 }, { "epoch": 0.49, "grad_norm": 0.30211302457883304, "learning_rate": 2.176201284780195e-05, "loss": 0.0605, "step": 4442 }, { "epoch": 0.49, "grad_norm": 0.3349027110392718, "learning_rate": 2.1754926285354895e-05, "loss": 0.0812, "step": 4443 }, { "epoch": 0.49, "grad_norm": 0.4498126540677357, "learning_rate": 2.1747839500863096e-05, "loss": 0.0966, "step": 4444 }, { "epoch": 0.49, "grad_norm": 0.35671102992495873, "learning_rate": 2.1740752495223216e-05, "loss": 0.0594, "step": 4445 }, { "epoch": 0.49, "grad_norm": 0.38519754710689025, "learning_rate": 2.1733665269331953e-05, "loss": 0.0643, "step": 4446 }, { "epoch": 0.49, "grad_norm": 0.33666308356650826, "learning_rate": 2.1726577824086032e-05, "loss": 0.0617, "step": 4447 }, { "epoch": 0.49, "grad_norm": 0.3212680056086583, "learning_rate": 2.1719490160382196e-05, "loss": 0.0485, "step": 4448 }, { "epoch": 0.49, "grad_norm": 0.2859127643704896, "learning_rate": 2.1712402279117225e-05, "loss": 0.0421, "step": 4449 }, { "epoch": 0.49, "grad_norm": 0.45362353031007613, "learning_rate": 2.1705314181187922e-05, "loss": 0.079, "step": 4450 }, { "epoch": 0.49, "grad_norm": 0.3033710552740286, "learning_rate": 2.169822586749113e-05, "loss": 0.0588, "step": 4451 }, { "epoch": 0.49, "grad_norm": 0.3022458965442612, "learning_rate": 2.169113733892369e-05, "loss": 0.078, "step": 4452 }, { "epoch": 0.49, "grad_norm": 0.49415525030272484, "learning_rate": 2.1684048596382507e-05, "loss": 0.0998, "step": 4453 }, { "epoch": 0.49, "grad_norm": 0.40718191803204307, "learning_rate": 2.1676959640764484e-05, "loss": 0.056, "step": 4454 }, { "epoch": 0.49, "grad_norm": 0.3225641197588187, "learning_rate": 2.1669870472966567e-05, "loss": 0.0692, "step": 4455 }, { "epoch": 0.49, "grad_norm": 0.33938903642574786, "learning_rate": 2.166278109388572e-05, "loss": 0.0457, "step": 4456 }, { "epoch": 0.49, "grad_norm": 0.34952943836368505, "learning_rate": 2.165569150441895e-05, "loss": 0.074, "step": 4457 }, { "epoch": 0.49, "grad_norm": 0.3466505656301169, "learning_rate": 2.1648601705463263e-05, "loss": 0.0513, "step": 4458 }, { "epoch": 0.49, "grad_norm": 0.28325509219377515, "learning_rate": 2.164151169791571e-05, "loss": 0.045, "step": 4459 }, { "epoch": 0.49, "grad_norm": 0.3090287052574003, "learning_rate": 2.1634421482673368e-05, "loss": 0.0444, "step": 4460 }, { "epoch": 0.49, "grad_norm": 0.3609038465259484, "learning_rate": 2.162733106063334e-05, "loss": 0.0925, "step": 4461 }, { "epoch": 0.49, "grad_norm": 0.25518660874576665, "learning_rate": 2.1620240432692737e-05, "loss": 0.0544, "step": 4462 }, { "epoch": 0.49, "grad_norm": 0.39989662399207204, "learning_rate": 2.1613149599748738e-05, "loss": 0.0845, "step": 4463 }, { "epoch": 0.49, "grad_norm": 0.4230374638997947, "learning_rate": 2.1606058562698496e-05, "loss": 0.1073, "step": 4464 }, { "epoch": 0.49, "grad_norm": 0.3639532047966518, "learning_rate": 2.1598967322439237e-05, "loss": 0.0736, "step": 4465 }, { "epoch": 0.49, "grad_norm": 0.40011851670918575, "learning_rate": 2.1591875879868177e-05, "loss": 0.0785, "step": 4466 }, { "epoch": 0.49, "grad_norm": 0.4000397982766122, "learning_rate": 2.1584784235882575e-05, "loss": 0.059, "step": 4467 }, { "epoch": 0.49, "grad_norm": 0.3629141562791462, "learning_rate": 2.157769239137971e-05, "loss": 0.0623, "step": 4468 }, { "epoch": 0.49, "grad_norm": 0.3069926992388575, "learning_rate": 2.1570600347256894e-05, "loss": 0.0533, "step": 4469 }, { "epoch": 0.49, "grad_norm": 0.326456242937862, "learning_rate": 2.1563508104411457e-05, "loss": 0.072, "step": 4470 }, { "epoch": 0.49, "grad_norm": 0.378637592203253, "learning_rate": 2.155641566374075e-05, "loss": 0.0645, "step": 4471 }, { "epoch": 0.49, "grad_norm": 0.2808094150001029, "learning_rate": 2.1549323026142168e-05, "loss": 0.0568, "step": 4472 }, { "epoch": 0.49, "grad_norm": 0.3555040027618403, "learning_rate": 2.15422301925131e-05, "loss": 0.0641, "step": 4473 }, { "epoch": 0.49, "grad_norm": 0.41654415316289695, "learning_rate": 2.153513716375099e-05, "loss": 0.1115, "step": 4474 }, { "epoch": 0.49, "grad_norm": 0.34796017852490074, "learning_rate": 2.1528043940753293e-05, "loss": 0.0685, "step": 4475 }, { "epoch": 0.49, "grad_norm": 0.2984227691130474, "learning_rate": 2.1520950524417484e-05, "loss": 0.0503, "step": 4476 }, { "epoch": 0.49, "grad_norm": 0.42422315760337237, "learning_rate": 2.151385691564108e-05, "loss": 0.072, "step": 4477 }, { "epoch": 0.49, "grad_norm": 0.31711807947648224, "learning_rate": 2.1506763115321602e-05, "loss": 0.0634, "step": 4478 }, { "epoch": 0.49, "grad_norm": 0.5635149593923158, "learning_rate": 2.1499669124356602e-05, "loss": 0.1025, "step": 4479 }, { "epoch": 0.49, "grad_norm": 0.27240057929568373, "learning_rate": 2.1492574943643666e-05, "loss": 0.0536, "step": 4480 }, { "epoch": 0.49, "grad_norm": 0.3051025669012113, "learning_rate": 2.148548057408039e-05, "loss": 0.0573, "step": 4481 }, { "epoch": 0.49, "grad_norm": 0.4068695259604966, "learning_rate": 2.1478386016564406e-05, "loss": 0.0801, "step": 4482 }, { "epoch": 0.49, "grad_norm": 0.33064398707439074, "learning_rate": 2.1471291271993353e-05, "loss": 0.0698, "step": 4483 }, { "epoch": 0.49, "grad_norm": 0.2911332551949484, "learning_rate": 2.1464196341264915e-05, "loss": 0.0587, "step": 4484 }, { "epoch": 0.49, "grad_norm": 0.2713247831815962, "learning_rate": 2.1457101225276782e-05, "loss": 0.0491, "step": 4485 }, { "epoch": 0.49, "grad_norm": 0.4243575977133858, "learning_rate": 2.145000592492668e-05, "loss": 0.097, "step": 4486 }, { "epoch": 0.49, "grad_norm": 0.35213316060726657, "learning_rate": 2.1442910441112348e-05, "loss": 0.0641, "step": 4487 }, { "epoch": 0.49, "grad_norm": 0.37737933952613795, "learning_rate": 2.1435814774731557e-05, "loss": 0.0579, "step": 4488 }, { "epoch": 0.49, "grad_norm": 0.44708791475653537, "learning_rate": 2.1428718926682094e-05, "loss": 0.1056, "step": 4489 }, { "epoch": 0.49, "grad_norm": 0.41213808366476695, "learning_rate": 2.1421622897861777e-05, "loss": 0.0719, "step": 4490 }, { "epoch": 0.49, "grad_norm": 0.2694959202707098, "learning_rate": 2.1414526689168436e-05, "loss": 0.0607, "step": 4491 }, { "epoch": 0.49, "grad_norm": 0.3137855551813448, "learning_rate": 2.1407430301499934e-05, "loss": 0.0573, "step": 4492 }, { "epoch": 0.49, "grad_norm": 0.40188769075755126, "learning_rate": 2.1400333735754146e-05, "loss": 0.0623, "step": 4493 }, { "epoch": 0.49, "grad_norm": 0.38618627167540487, "learning_rate": 2.139323699282899e-05, "loss": 0.0765, "step": 4494 }, { "epoch": 0.49, "grad_norm": 0.3887449032979908, "learning_rate": 2.1386140073622374e-05, "loss": 0.0731, "step": 4495 }, { "epoch": 0.49, "grad_norm": 0.39065961684382844, "learning_rate": 2.1379042979032256e-05, "loss": 0.0731, "step": 4496 }, { "epoch": 0.49, "grad_norm": 0.4004124705239988, "learning_rate": 2.1371945709956607e-05, "loss": 0.0641, "step": 4497 }, { "epoch": 0.49, "grad_norm": 0.24999219166959438, "learning_rate": 2.1364848267293424e-05, "loss": 0.0549, "step": 4498 }, { "epoch": 0.49, "grad_norm": 0.36493768728401355, "learning_rate": 2.135775065194071e-05, "loss": 0.0618, "step": 4499 }, { "epoch": 0.49, "grad_norm": 0.4038511943371582, "learning_rate": 2.1350652864796513e-05, "loss": 0.0869, "step": 4500 }, { "epoch": 0.49, "grad_norm": 0.41569938639347065, "learning_rate": 2.1343554906758885e-05, "loss": 0.0717, "step": 4501 }, { "epoch": 0.49, "grad_norm": 0.3690034924460754, "learning_rate": 2.133645677872591e-05, "loss": 0.0695, "step": 4502 }, { "epoch": 0.49, "grad_norm": 0.3185312708066224, "learning_rate": 2.1329358481595684e-05, "loss": 0.0545, "step": 4503 }, { "epoch": 0.49, "grad_norm": 0.32036721529825507, "learning_rate": 2.1322260016266337e-05, "loss": 0.0634, "step": 4504 }, { "epoch": 0.49, "grad_norm": 0.304034928094034, "learning_rate": 2.1315161383636006e-05, "loss": 0.0482, "step": 4505 }, { "epoch": 0.49, "grad_norm": 0.2953906218320155, "learning_rate": 2.1308062584602865e-05, "loss": 0.0561, "step": 4506 }, { "epoch": 0.49, "grad_norm": 0.3825905214669043, "learning_rate": 2.1300963620065092e-05, "loss": 0.0689, "step": 4507 }, { "epoch": 0.5, "grad_norm": 0.4904463108516287, "learning_rate": 2.1293864490920897e-05, "loss": 0.0995, "step": 4508 }, { "epoch": 0.5, "grad_norm": 0.2820360141045328, "learning_rate": 2.1286765198068516e-05, "loss": 0.0456, "step": 4509 }, { "epoch": 0.5, "grad_norm": 0.37802929021619497, "learning_rate": 2.1279665742406187e-05, "loss": 0.0564, "step": 4510 }, { "epoch": 0.5, "grad_norm": 0.39281526681415724, "learning_rate": 2.1272566124832183e-05, "loss": 0.0691, "step": 4511 }, { "epoch": 0.5, "grad_norm": 0.45911288795727956, "learning_rate": 2.126546634624479e-05, "loss": 0.0859, "step": 4512 }, { "epoch": 0.5, "grad_norm": 0.33890490363255027, "learning_rate": 2.1258366407542333e-05, "loss": 0.0793, "step": 4513 }, { "epoch": 0.5, "grad_norm": 0.3041221557778955, "learning_rate": 2.125126630962312e-05, "loss": 0.0592, "step": 4514 }, { "epoch": 0.5, "grad_norm": 0.3780662624987316, "learning_rate": 2.1244166053385528e-05, "loss": 0.0668, "step": 4515 }, { "epoch": 0.5, "grad_norm": 0.3318567602888956, "learning_rate": 2.1237065639727906e-05, "loss": 0.0622, "step": 4516 }, { "epoch": 0.5, "grad_norm": 0.3071385853611324, "learning_rate": 2.1229965069548658e-05, "loss": 0.0552, "step": 4517 }, { "epoch": 0.5, "grad_norm": 0.4638316251273483, "learning_rate": 2.1222864343746185e-05, "loss": 0.0741, "step": 4518 }, { "epoch": 0.5, "grad_norm": 0.45571939367078695, "learning_rate": 2.1215763463218927e-05, "loss": 0.0758, "step": 4519 }, { "epoch": 0.5, "grad_norm": 0.3671358965060949, "learning_rate": 2.1208662428865326e-05, "loss": 0.0646, "step": 4520 }, { "epoch": 0.5, "grad_norm": 0.2814296704829106, "learning_rate": 2.1201561241583856e-05, "loss": 0.0637, "step": 4521 }, { "epoch": 0.5, "grad_norm": 0.34420202618949586, "learning_rate": 2.1194459902272997e-05, "loss": 0.0727, "step": 4522 }, { "epoch": 0.5, "grad_norm": 0.39079298222199, "learning_rate": 2.1187358411831273e-05, "loss": 0.0867, "step": 4523 }, { "epoch": 0.5, "grad_norm": 0.23260615760667525, "learning_rate": 2.1180256771157194e-05, "loss": 0.0566, "step": 4524 }, { "epoch": 0.5, "grad_norm": 0.29157824822322, "learning_rate": 2.1173154981149322e-05, "loss": 0.0515, "step": 4525 }, { "epoch": 0.5, "grad_norm": 0.367190543628821, "learning_rate": 2.1166053042706204e-05, "loss": 0.0623, "step": 4526 }, { "epoch": 0.5, "grad_norm": 0.31907652604586173, "learning_rate": 2.1158950956726443e-05, "loss": 0.0728, "step": 4527 }, { "epoch": 0.5, "grad_norm": 0.25753795570889043, "learning_rate": 2.115184872410862e-05, "loss": 0.0548, "step": 4528 }, { "epoch": 0.5, "grad_norm": 0.3458239241932494, "learning_rate": 2.114474634575138e-05, "loss": 0.056, "step": 4529 }, { "epoch": 0.5, "grad_norm": 0.3719278097722103, "learning_rate": 2.113764382255334e-05, "loss": 0.0814, "step": 4530 }, { "epoch": 0.5, "grad_norm": 0.44318147694856896, "learning_rate": 2.1130541155413167e-05, "loss": 0.0923, "step": 4531 }, { "epoch": 0.5, "grad_norm": 0.4849299051816181, "learning_rate": 2.1123438345229537e-05, "loss": 0.101, "step": 4532 }, { "epoch": 0.5, "grad_norm": 0.6268001143889941, "learning_rate": 2.1116335392901154e-05, "loss": 0.1052, "step": 4533 }, { "epoch": 0.5, "grad_norm": 0.3218850402747531, "learning_rate": 2.110923229932671e-05, "loss": 0.0636, "step": 4534 }, { "epoch": 0.5, "grad_norm": 0.3483550838112909, "learning_rate": 2.1102129065404952e-05, "loss": 0.0686, "step": 4535 }, { "epoch": 0.5, "grad_norm": 0.36008226209777056, "learning_rate": 2.1095025692034614e-05, "loss": 0.0648, "step": 4536 }, { "epoch": 0.5, "grad_norm": 0.3414932646387405, "learning_rate": 2.1087922180114474e-05, "loss": 0.0641, "step": 4537 }, { "epoch": 0.5, "grad_norm": 0.6524708789597221, "learning_rate": 2.1080818530543304e-05, "loss": 0.1284, "step": 4538 }, { "epoch": 0.5, "grad_norm": 0.38230726416399574, "learning_rate": 2.107371474421991e-05, "loss": 0.0804, "step": 4539 }, { "epoch": 0.5, "grad_norm": 0.3675223102414846, "learning_rate": 2.106661082204311e-05, "loss": 0.0623, "step": 4540 }, { "epoch": 0.5, "grad_norm": 0.29304390261268176, "learning_rate": 2.105950676491173e-05, "loss": 0.0704, "step": 4541 }, { "epoch": 0.5, "grad_norm": 0.29718646224810447, "learning_rate": 2.105240257372464e-05, "loss": 0.0708, "step": 4542 }, { "epoch": 0.5, "grad_norm": 0.24698758153739822, "learning_rate": 2.1045298249380687e-05, "loss": 0.0493, "step": 4543 }, { "epoch": 0.5, "grad_norm": 0.3347476691790936, "learning_rate": 2.1038193792778775e-05, "loss": 0.0781, "step": 4544 }, { "epoch": 0.5, "grad_norm": 0.37789108512934, "learning_rate": 2.103108920481779e-05, "loss": 0.0759, "step": 4545 }, { "epoch": 0.5, "grad_norm": 0.2991314389603449, "learning_rate": 2.102398448639667e-05, "loss": 0.0597, "step": 4546 }, { "epoch": 0.5, "grad_norm": 0.3272076225833289, "learning_rate": 2.1016879638414327e-05, "loss": 0.0509, "step": 4547 }, { "epoch": 0.5, "grad_norm": 0.3341306399587591, "learning_rate": 2.100977466176973e-05, "loss": 0.0658, "step": 4548 }, { "epoch": 0.5, "grad_norm": 0.4034330935425939, "learning_rate": 2.100266955736184e-05, "loss": 0.1087, "step": 4549 }, { "epoch": 0.5, "grad_norm": 0.34608031563270264, "learning_rate": 2.099556432608965e-05, "loss": 0.0657, "step": 4550 }, { "epoch": 0.5, "grad_norm": 0.2656803353958201, "learning_rate": 2.098845896885215e-05, "loss": 0.0393, "step": 4551 }, { "epoch": 0.5, "grad_norm": 0.28982713451213166, "learning_rate": 2.0981353486548363e-05, "loss": 0.0633, "step": 4552 }, { "epoch": 0.5, "grad_norm": 0.3471113284835435, "learning_rate": 2.0974247880077314e-05, "loss": 0.0764, "step": 4553 }, { "epoch": 0.5, "grad_norm": 0.32785338558661425, "learning_rate": 2.096714215033806e-05, "loss": 0.0709, "step": 4554 }, { "epoch": 0.5, "grad_norm": 0.2781855206595912, "learning_rate": 2.0960036298229656e-05, "loss": 0.0779, "step": 4555 }, { "epoch": 0.5, "grad_norm": 0.4002477251306299, "learning_rate": 2.095293032465119e-05, "loss": 0.0717, "step": 4556 }, { "epoch": 0.5, "grad_norm": 0.4185926469116941, "learning_rate": 2.0945824230501735e-05, "loss": 0.0992, "step": 4557 }, { "epoch": 0.5, "grad_norm": 0.35372442328254866, "learning_rate": 2.0938718016680433e-05, "loss": 0.0695, "step": 4558 }, { "epoch": 0.5, "grad_norm": 0.2892305555773046, "learning_rate": 2.093161168408638e-05, "loss": 0.0611, "step": 4559 }, { "epoch": 0.5, "grad_norm": 0.3085020387758374, "learning_rate": 2.0924505233618734e-05, "loss": 0.056, "step": 4560 }, { "epoch": 0.5, "grad_norm": 0.3919079122152957, "learning_rate": 2.091739866617664e-05, "loss": 0.0735, "step": 4561 }, { "epoch": 0.5, "grad_norm": 0.35461793461008023, "learning_rate": 2.0910291982659277e-05, "loss": 0.0705, "step": 4562 }, { "epoch": 0.5, "grad_norm": 0.30415871780712833, "learning_rate": 2.0903185183965814e-05, "loss": 0.0433, "step": 4563 }, { "epoch": 0.5, "grad_norm": 0.41869706132389145, "learning_rate": 2.0896078270995463e-05, "loss": 0.0728, "step": 4564 }, { "epoch": 0.5, "grad_norm": 0.3011101992300247, "learning_rate": 2.0888971244647425e-05, "loss": 0.0652, "step": 4565 }, { "epoch": 0.5, "grad_norm": 0.3833903408507335, "learning_rate": 2.0881864105820936e-05, "loss": 0.0905, "step": 4566 }, { "epoch": 0.5, "grad_norm": 0.3821481662325426, "learning_rate": 2.0874756855415245e-05, "loss": 0.0682, "step": 4567 }, { "epoch": 0.5, "grad_norm": 0.2893226844427285, "learning_rate": 2.0867649494329587e-05, "loss": 0.0523, "step": 4568 }, { "epoch": 0.5, "grad_norm": 0.35811150314036255, "learning_rate": 2.0860542023463256e-05, "loss": 0.068, "step": 4569 }, { "epoch": 0.5, "grad_norm": 0.25479581963363895, "learning_rate": 2.085343444371551e-05, "loss": 0.05, "step": 4570 }, { "epoch": 0.5, "grad_norm": 0.35007637416790827, "learning_rate": 2.0846326755985672e-05, "loss": 0.055, "step": 4571 }, { "epoch": 0.5, "grad_norm": 0.44856845392424505, "learning_rate": 2.083921896117303e-05, "loss": 0.0807, "step": 4572 }, { "epoch": 0.5, "grad_norm": 0.3368993009567651, "learning_rate": 2.0832111060176924e-05, "loss": 0.0743, "step": 4573 }, { "epoch": 0.5, "grad_norm": 0.3359518048101841, "learning_rate": 2.0825003053896686e-05, "loss": 0.0488, "step": 4574 }, { "epoch": 0.5, "grad_norm": 0.38670225782581175, "learning_rate": 2.081789494323167e-05, "loss": 0.0943, "step": 4575 }, { "epoch": 0.5, "grad_norm": 0.35578533369626514, "learning_rate": 2.0810786729081237e-05, "loss": 0.0502, "step": 4576 }, { "epoch": 0.5, "grad_norm": 0.3729927263356071, "learning_rate": 2.0803678412344775e-05, "loss": 0.0587, "step": 4577 }, { "epoch": 0.5, "grad_norm": 0.3280344679235716, "learning_rate": 2.079656999392166e-05, "loss": 0.0474, "step": 4578 }, { "epoch": 0.5, "grad_norm": 0.5258295147581339, "learning_rate": 2.078946147471131e-05, "loss": 0.1168, "step": 4579 }, { "epoch": 0.5, "grad_norm": 0.30115483355765493, "learning_rate": 2.0782352855613128e-05, "loss": 0.0428, "step": 4580 }, { "epoch": 0.5, "grad_norm": 0.5149466936985557, "learning_rate": 2.077524413752655e-05, "loss": 0.1027, "step": 4581 }, { "epoch": 0.5, "grad_norm": 0.31386350951208836, "learning_rate": 2.0768135321351016e-05, "loss": 0.0701, "step": 4582 }, { "epoch": 0.5, "grad_norm": 0.3359317335476653, "learning_rate": 2.0761026407985977e-05, "loss": 0.063, "step": 4583 }, { "epoch": 0.5, "grad_norm": 0.32639873632947247, "learning_rate": 2.0753917398330902e-05, "loss": 0.0703, "step": 4584 }, { "epoch": 0.5, "grad_norm": 0.28803314759807985, "learning_rate": 2.0746808293285278e-05, "loss": 0.0413, "step": 4585 }, { "epoch": 0.5, "grad_norm": 0.48819399244752704, "learning_rate": 2.073969909374858e-05, "loss": 0.0667, "step": 4586 }, { "epoch": 0.5, "grad_norm": 0.4999769026666627, "learning_rate": 2.0732589800620328e-05, "loss": 0.0834, "step": 4587 }, { "epoch": 0.5, "grad_norm": 0.3498040195546038, "learning_rate": 2.0725480414800012e-05, "loss": 0.0707, "step": 4588 }, { "epoch": 0.5, "grad_norm": 0.39269592643317175, "learning_rate": 2.0718370937187183e-05, "loss": 0.0627, "step": 4589 }, { "epoch": 0.5, "grad_norm": 0.37791925854549424, "learning_rate": 2.0711261368681356e-05, "loss": 0.0649, "step": 4590 }, { "epoch": 0.5, "grad_norm": 0.36467931256096925, "learning_rate": 2.0704151710182096e-05, "loss": 0.0441, "step": 4591 }, { "epoch": 0.5, "grad_norm": 0.3811198786388003, "learning_rate": 2.069704196258896e-05, "loss": 0.0611, "step": 4592 }, { "epoch": 0.5, "grad_norm": 0.36678401611668343, "learning_rate": 2.068993212680152e-05, "loss": 0.0585, "step": 4593 }, { "epoch": 0.5, "grad_norm": 0.32629940258161994, "learning_rate": 2.068282220371936e-05, "loss": 0.0728, "step": 4594 }, { "epoch": 0.5, "grad_norm": 0.4451318006137944, "learning_rate": 2.067571219424207e-05, "loss": 0.0682, "step": 4595 }, { "epoch": 0.5, "grad_norm": 0.3159580470609586, "learning_rate": 2.066860209926925e-05, "loss": 0.0551, "step": 4596 }, { "epoch": 0.5, "grad_norm": 0.3331972629259536, "learning_rate": 2.066149191970053e-05, "loss": 0.0672, "step": 4597 }, { "epoch": 0.5, "grad_norm": 0.4961277206684615, "learning_rate": 2.0654381656435526e-05, "loss": 0.0947, "step": 4598 }, { "epoch": 0.51, "grad_norm": 0.30431991803303776, "learning_rate": 2.0647271310373877e-05, "loss": 0.0526, "step": 4599 }, { "epoch": 0.51, "grad_norm": 0.3006196392913737, "learning_rate": 2.064016088241523e-05, "loss": 0.0474, "step": 4600 }, { "epoch": 0.51, "grad_norm": 0.41157576740172264, "learning_rate": 2.0633050373459246e-05, "loss": 0.0725, "step": 4601 }, { "epoch": 0.51, "grad_norm": 0.3708199545057789, "learning_rate": 2.0625939784405586e-05, "loss": 0.0556, "step": 4602 }, { "epoch": 0.51, "grad_norm": 0.4893523560811144, "learning_rate": 2.061882911615394e-05, "loss": 0.1029, "step": 4603 }, { "epoch": 0.51, "grad_norm": 0.3511242253922052, "learning_rate": 2.0611718369603982e-05, "loss": 0.0974, "step": 4604 }, { "epoch": 0.51, "grad_norm": 0.3298713442681895, "learning_rate": 2.060460754565542e-05, "loss": 0.0615, "step": 4605 }, { "epoch": 0.51, "grad_norm": 0.4140478167539539, "learning_rate": 2.0597496645207964e-05, "loss": 0.0996, "step": 4606 }, { "epoch": 0.51, "grad_norm": 0.37001847533545773, "learning_rate": 2.059038566916132e-05, "loss": 0.0694, "step": 4607 }, { "epoch": 0.51, "grad_norm": 0.4362299399696153, "learning_rate": 2.0583274618415227e-05, "loss": 0.0878, "step": 4608 }, { "epoch": 0.51, "grad_norm": 0.28516118816123653, "learning_rate": 2.057616349386941e-05, "loss": 0.0565, "step": 4609 }, { "epoch": 0.51, "grad_norm": 0.31347394805483453, "learning_rate": 2.056905229642363e-05, "loss": 0.0619, "step": 4610 }, { "epoch": 0.51, "grad_norm": 0.295383117951669, "learning_rate": 2.056194102697763e-05, "loss": 0.0464, "step": 4611 }, { "epoch": 0.51, "grad_norm": 0.3155562675345661, "learning_rate": 2.055482968643118e-05, "loss": 0.0624, "step": 4612 }, { "epoch": 0.51, "grad_norm": 0.43115505679908894, "learning_rate": 2.0547718275684046e-05, "loss": 0.0851, "step": 4613 }, { "epoch": 0.51, "grad_norm": 0.3706003623479934, "learning_rate": 2.0540606795636022e-05, "loss": 0.0796, "step": 4614 }, { "epoch": 0.51, "grad_norm": 0.3756549004191829, "learning_rate": 2.053349524718689e-05, "loss": 0.0795, "step": 4615 }, { "epoch": 0.51, "grad_norm": 0.3475011522459234, "learning_rate": 2.0526383631236454e-05, "loss": 0.058, "step": 4616 }, { "epoch": 0.51, "grad_norm": 0.253513986849672, "learning_rate": 2.051927194868451e-05, "loss": 0.0459, "step": 4617 }, { "epoch": 0.51, "grad_norm": 0.4014950662240892, "learning_rate": 2.0512160200430896e-05, "loss": 0.0846, "step": 4618 }, { "epoch": 0.51, "grad_norm": 0.3162283102709553, "learning_rate": 2.050504838737542e-05, "loss": 0.0737, "step": 4619 }, { "epoch": 0.51, "grad_norm": 0.33028667114382493, "learning_rate": 2.0497936510417928e-05, "loss": 0.0632, "step": 4620 }, { "epoch": 0.51, "grad_norm": 0.27218718089666394, "learning_rate": 2.0490824570458248e-05, "loss": 0.0414, "step": 4621 }, { "epoch": 0.51, "grad_norm": 0.4156168427778287, "learning_rate": 2.048371256839624e-05, "loss": 0.0722, "step": 4622 }, { "epoch": 0.51, "grad_norm": 0.3755848020158526, "learning_rate": 2.0476600505131747e-05, "loss": 0.0877, "step": 4623 }, { "epoch": 0.51, "grad_norm": 0.5013673979250173, "learning_rate": 2.046948838156465e-05, "loss": 0.085, "step": 4624 }, { "epoch": 0.51, "grad_norm": 0.3625648572056343, "learning_rate": 2.0462376198594813e-05, "loss": 0.0733, "step": 4625 }, { "epoch": 0.51, "grad_norm": 0.3110875036243839, "learning_rate": 2.0455263957122113e-05, "loss": 0.0619, "step": 4626 }, { "epoch": 0.51, "grad_norm": 0.26201803261638423, "learning_rate": 2.044815165804645e-05, "loss": 0.0593, "step": 4627 }, { "epoch": 0.51, "grad_norm": 0.3392611036212399, "learning_rate": 2.04410393022677e-05, "loss": 0.064, "step": 4628 }, { "epoch": 0.51, "grad_norm": 0.3797220120227483, "learning_rate": 2.043392689068578e-05, "loss": 0.0564, "step": 4629 }, { "epoch": 0.51, "grad_norm": 0.4166361877102114, "learning_rate": 2.0426814424200592e-05, "loss": 0.0808, "step": 4630 }, { "epoch": 0.51, "grad_norm": 0.4395868564151161, "learning_rate": 2.041970190371206e-05, "loss": 0.0759, "step": 4631 }, { "epoch": 0.51, "grad_norm": 0.49345372823805955, "learning_rate": 2.041258933012009e-05, "loss": 0.0978, "step": 4632 }, { "epoch": 0.51, "grad_norm": 0.3451715250637295, "learning_rate": 2.0405476704324624e-05, "loss": 0.0597, "step": 4633 }, { "epoch": 0.51, "grad_norm": 0.32081362438766614, "learning_rate": 2.0398364027225593e-05, "loss": 0.0644, "step": 4634 }, { "epoch": 0.51, "grad_norm": 0.40497959009005763, "learning_rate": 2.039125129972295e-05, "loss": 0.0887, "step": 4635 }, { "epoch": 0.51, "grad_norm": 0.3531975553461992, "learning_rate": 2.0384138522716626e-05, "loss": 0.0745, "step": 4636 }, { "epoch": 0.51, "grad_norm": 0.3677945090976669, "learning_rate": 2.0377025697106587e-05, "loss": 0.0661, "step": 4637 }, { "epoch": 0.51, "grad_norm": 0.34489797593609794, "learning_rate": 2.036991282379279e-05, "loss": 0.0634, "step": 4638 }, { "epoch": 0.51, "grad_norm": 0.36388535671768535, "learning_rate": 2.0362799903675214e-05, "loss": 0.0777, "step": 4639 }, { "epoch": 0.51, "grad_norm": 0.28581562623867446, "learning_rate": 2.0355686937653818e-05, "loss": 0.0448, "step": 4640 }, { "epoch": 0.51, "grad_norm": 0.3708941474975803, "learning_rate": 2.0348573926628586e-05, "loss": 0.0723, "step": 4641 }, { "epoch": 0.51, "grad_norm": 0.3063727381041822, "learning_rate": 2.03414608714995e-05, "loss": 0.0475, "step": 4642 }, { "epoch": 0.51, "grad_norm": 0.3477293430599361, "learning_rate": 2.033434777316655e-05, "loss": 0.0647, "step": 4643 }, { "epoch": 0.51, "grad_norm": 0.2592214463032209, "learning_rate": 2.0327234632529738e-05, "loss": 0.0422, "step": 4644 }, { "epoch": 0.51, "grad_norm": 0.290287964238808, "learning_rate": 2.0320121450489062e-05, "loss": 0.0467, "step": 4645 }, { "epoch": 0.51, "grad_norm": 0.3955144057489601, "learning_rate": 2.0313008227944527e-05, "loss": 0.059, "step": 4646 }, { "epoch": 0.51, "grad_norm": 0.39261266452050225, "learning_rate": 2.0305894965796143e-05, "loss": 0.0927, "step": 4647 }, { "epoch": 0.51, "grad_norm": 0.3816971197257481, "learning_rate": 2.029878166494393e-05, "loss": 0.0732, "step": 4648 }, { "epoch": 0.51, "grad_norm": 0.31963227314258197, "learning_rate": 2.029166832628791e-05, "loss": 0.0548, "step": 4649 }, { "epoch": 0.51, "grad_norm": 0.4018415446319925, "learning_rate": 2.0284554950728106e-05, "loss": 0.101, "step": 4650 }, { "epoch": 0.51, "grad_norm": 0.38869851945958717, "learning_rate": 2.027744153916455e-05, "loss": 0.046, "step": 4651 }, { "epoch": 0.51, "grad_norm": 0.3655089300084946, "learning_rate": 2.0270328092497266e-05, "loss": 0.0567, "step": 4652 }, { "epoch": 0.51, "grad_norm": 0.2826903073957709, "learning_rate": 2.026321461162632e-05, "loss": 0.0683, "step": 4653 }, { "epoch": 0.51, "grad_norm": 0.4094382404663676, "learning_rate": 2.025610109745173e-05, "loss": 0.0747, "step": 4654 }, { "epoch": 0.51, "grad_norm": 0.3249037985372121, "learning_rate": 2.0248987550873566e-05, "loss": 0.0486, "step": 4655 }, { "epoch": 0.51, "grad_norm": 0.34473773624801163, "learning_rate": 2.024187397279186e-05, "loss": 0.0598, "step": 4656 }, { "epoch": 0.51, "grad_norm": 0.34700989377101266, "learning_rate": 2.0234760364106686e-05, "loss": 0.0716, "step": 4657 }, { "epoch": 0.51, "grad_norm": 0.364102491548055, "learning_rate": 2.0227646725718085e-05, "loss": 0.0562, "step": 4658 }, { "epoch": 0.51, "grad_norm": 0.3672968721841312, "learning_rate": 2.0220533058526146e-05, "loss": 0.0976, "step": 4659 }, { "epoch": 0.51, "grad_norm": 0.33257767089210566, "learning_rate": 2.021341936343091e-05, "loss": 0.051, "step": 4660 }, { "epoch": 0.51, "grad_norm": 0.48849394171363797, "learning_rate": 2.0206305641332467e-05, "loss": 0.0886, "step": 4661 }, { "epoch": 0.51, "grad_norm": 0.32436748907547974, "learning_rate": 2.0199191893130893e-05, "loss": 0.0612, "step": 4662 }, { "epoch": 0.51, "grad_norm": 0.3713300253933642, "learning_rate": 2.0192078119726246e-05, "loss": 0.0716, "step": 4663 }, { "epoch": 0.51, "grad_norm": 0.4341402990927922, "learning_rate": 2.018496432201863e-05, "loss": 0.088, "step": 4664 }, { "epoch": 0.51, "grad_norm": 0.34245291828327334, "learning_rate": 2.017785050090811e-05, "loss": 0.0642, "step": 4665 }, { "epoch": 0.51, "grad_norm": 0.4056287563720189, "learning_rate": 2.017073665729479e-05, "loss": 0.0818, "step": 4666 }, { "epoch": 0.51, "grad_norm": 0.39500812000064944, "learning_rate": 2.016362279207875e-05, "loss": 0.0762, "step": 4667 }, { "epoch": 0.51, "grad_norm": 0.4728721251777227, "learning_rate": 2.0156508906160083e-05, "loss": 0.0875, "step": 4668 }, { "epoch": 0.51, "grad_norm": 0.3154172865375658, "learning_rate": 2.014939500043888e-05, "loss": 0.0579, "step": 4669 }, { "epoch": 0.51, "grad_norm": 0.3254570122780192, "learning_rate": 2.0142281075815253e-05, "loss": 0.064, "step": 4670 }, { "epoch": 0.51, "grad_norm": 0.34107747254476783, "learning_rate": 2.013516713318929e-05, "loss": 0.0484, "step": 4671 }, { "epoch": 0.51, "grad_norm": 0.44921266302877566, "learning_rate": 2.0128053173461105e-05, "loss": 0.0927, "step": 4672 }, { "epoch": 0.51, "grad_norm": 0.28672796333332295, "learning_rate": 2.012093919753079e-05, "loss": 0.0602, "step": 4673 }, { "epoch": 0.51, "grad_norm": 0.26762949318211976, "learning_rate": 2.0113825206298458e-05, "loss": 0.0456, "step": 4674 }, { "epoch": 0.51, "grad_norm": 0.3094490126500517, "learning_rate": 2.0106711200664218e-05, "loss": 0.0589, "step": 4675 }, { "epoch": 0.51, "grad_norm": 0.4557519271253227, "learning_rate": 2.009959718152818e-05, "loss": 0.0876, "step": 4676 }, { "epoch": 0.51, "grad_norm": 0.25029671465645154, "learning_rate": 2.0092483149790453e-05, "loss": 0.034, "step": 4677 }, { "epoch": 0.51, "grad_norm": 0.3868847500338196, "learning_rate": 2.008536910635115e-05, "loss": 0.0908, "step": 4678 }, { "epoch": 0.51, "grad_norm": 0.31288024894664096, "learning_rate": 2.0078255052110393e-05, "loss": 0.0475, "step": 4679 }, { "epoch": 0.51, "grad_norm": 0.3199749074724017, "learning_rate": 2.00711409879683e-05, "loss": 0.0564, "step": 4680 }, { "epoch": 0.51, "grad_norm": 0.31253077832287374, "learning_rate": 2.0064026914824976e-05, "loss": 0.067, "step": 4681 }, { "epoch": 0.51, "grad_norm": 0.37506683071988134, "learning_rate": 2.0056912833580557e-05, "loss": 0.0684, "step": 4682 }, { "epoch": 0.51, "grad_norm": 0.40580266385574204, "learning_rate": 2.0049798745135148e-05, "loss": 0.1004, "step": 4683 }, { "epoch": 0.51, "grad_norm": 0.3062097231011995, "learning_rate": 2.0042684650388882e-05, "loss": 0.0497, "step": 4684 }, { "epoch": 0.51, "grad_norm": 0.29951505307784704, "learning_rate": 2.003557055024187e-05, "loss": 0.0664, "step": 4685 }, { "epoch": 0.51, "grad_norm": 0.38139977874329367, "learning_rate": 2.0028456445594234e-05, "loss": 0.0605, "step": 4686 }, { "epoch": 0.51, "grad_norm": 0.39084394041334164, "learning_rate": 2.0021342337346117e-05, "loss": 0.0633, "step": 4687 }, { "epoch": 0.51, "grad_norm": 0.4009913334492643, "learning_rate": 2.0014228226397618e-05, "loss": 0.0887, "step": 4688 }, { "epoch": 0.51, "grad_norm": 0.30262971114754145, "learning_rate": 2.0007114113648876e-05, "loss": 0.0603, "step": 4689 }, { "epoch": 0.52, "grad_norm": 0.3419804325403742, "learning_rate": 2e-05, "loss": 0.0627, "step": 4690 }, { "epoch": 0.52, "grad_norm": 0.3852560529177657, "learning_rate": 1.999288588635113e-05, "loss": 0.065, "step": 4691 }, { "epoch": 0.52, "grad_norm": 0.3104228005773945, "learning_rate": 1.998577177360239e-05, "loss": 0.0423, "step": 4692 }, { "epoch": 0.52, "grad_norm": 0.3383010400352988, "learning_rate": 1.997865766265389e-05, "loss": 0.0674, "step": 4693 }, { "epoch": 0.52, "grad_norm": 0.34263660275430635, "learning_rate": 1.997154355440577e-05, "loss": 0.0818, "step": 4694 }, { "epoch": 0.52, "grad_norm": 0.44871075973978786, "learning_rate": 1.9964429449758138e-05, "loss": 0.1121, "step": 4695 }, { "epoch": 0.52, "grad_norm": 0.28127286076354807, "learning_rate": 1.995731534961113e-05, "loss": 0.0463, "step": 4696 }, { "epoch": 0.52, "grad_norm": 0.23295536842010514, "learning_rate": 1.995020125486486e-05, "loss": 0.0492, "step": 4697 }, { "epoch": 0.52, "grad_norm": 0.3905686719336237, "learning_rate": 1.9943087166419453e-05, "loss": 0.066, "step": 4698 }, { "epoch": 0.52, "grad_norm": 0.40578542334841866, "learning_rate": 1.9935973085175024e-05, "loss": 0.0641, "step": 4699 }, { "epoch": 0.52, "grad_norm": 1.2559957237444201, "learning_rate": 1.9928859012031703e-05, "loss": 0.0979, "step": 4700 }, { "epoch": 0.52, "grad_norm": 0.3666195230448838, "learning_rate": 1.9921744947889607e-05, "loss": 0.0617, "step": 4701 }, { "epoch": 0.52, "grad_norm": 0.35480420381470423, "learning_rate": 1.991463089364885e-05, "loss": 0.0535, "step": 4702 }, { "epoch": 0.52, "grad_norm": 0.30174684967570925, "learning_rate": 1.9907516850209554e-05, "loss": 0.0467, "step": 4703 }, { "epoch": 0.52, "grad_norm": 0.2803757909303582, "learning_rate": 1.9900402818471825e-05, "loss": 0.0603, "step": 4704 }, { "epoch": 0.52, "grad_norm": 0.3540602435303556, "learning_rate": 1.989328879933579e-05, "loss": 0.06, "step": 4705 }, { "epoch": 0.52, "grad_norm": 0.2808325636671105, "learning_rate": 1.9886174793701546e-05, "loss": 0.0359, "step": 4706 }, { "epoch": 0.52, "grad_norm": 0.4032442330564709, "learning_rate": 1.9879060802469217e-05, "loss": 0.0787, "step": 4707 }, { "epoch": 0.52, "grad_norm": 0.40026851745958963, "learning_rate": 1.98719468265389e-05, "loss": 0.0736, "step": 4708 }, { "epoch": 0.52, "grad_norm": 0.3548039098267231, "learning_rate": 1.9864832866810713e-05, "loss": 0.0617, "step": 4709 }, { "epoch": 0.52, "grad_norm": 0.3392706017068352, "learning_rate": 1.985771892418475e-05, "loss": 0.069, "step": 4710 }, { "epoch": 0.52, "grad_norm": 0.3207754186071412, "learning_rate": 1.9850604999561128e-05, "loss": 0.0678, "step": 4711 }, { "epoch": 0.52, "grad_norm": 0.31869558122259933, "learning_rate": 1.9843491093839927e-05, "loss": 0.0685, "step": 4712 }, { "epoch": 0.52, "grad_norm": 0.4083369460075834, "learning_rate": 1.9836377207921265e-05, "loss": 0.0751, "step": 4713 }, { "epoch": 0.52, "grad_norm": 0.38764863009422956, "learning_rate": 1.982926334270522e-05, "loss": 0.054, "step": 4714 }, { "epoch": 0.52, "grad_norm": 0.351851387010838, "learning_rate": 1.98221494990919e-05, "loss": 0.06, "step": 4715 }, { "epoch": 0.52, "grad_norm": 0.27996641881555334, "learning_rate": 1.9815035677981378e-05, "loss": 0.0576, "step": 4716 }, { "epoch": 0.52, "grad_norm": 0.3828016980262593, "learning_rate": 1.9807921880273754e-05, "loss": 0.0801, "step": 4717 }, { "epoch": 0.52, "grad_norm": 0.3588459848136513, "learning_rate": 1.9800808106869117e-05, "loss": 0.0672, "step": 4718 }, { "epoch": 0.52, "grad_norm": 0.3661122099992246, "learning_rate": 1.9793694358667533e-05, "loss": 0.0418, "step": 4719 }, { "epoch": 0.52, "grad_norm": 0.32928118911616616, "learning_rate": 1.9786580636569092e-05, "loss": 0.0554, "step": 4720 }, { "epoch": 0.52, "grad_norm": 0.35582455444822636, "learning_rate": 1.977946694147386e-05, "loss": 0.0497, "step": 4721 }, { "epoch": 0.52, "grad_norm": 0.29228866478177634, "learning_rate": 1.9772353274281918e-05, "loss": 0.0381, "step": 4722 }, { "epoch": 0.52, "grad_norm": 0.26456319132803985, "learning_rate": 1.976523963589332e-05, "loss": 0.0443, "step": 4723 }, { "epoch": 0.52, "grad_norm": 0.26419718653814606, "learning_rate": 1.9758126027208146e-05, "loss": 0.0372, "step": 4724 }, { "epoch": 0.52, "grad_norm": 0.3609036813721021, "learning_rate": 1.975101244912644e-05, "loss": 0.077, "step": 4725 }, { "epoch": 0.52, "grad_norm": 0.2774167032063228, "learning_rate": 1.9743898902548273e-05, "loss": 0.0608, "step": 4726 }, { "epoch": 0.52, "grad_norm": 0.3676231518854293, "learning_rate": 1.9736785388373686e-05, "loss": 0.0573, "step": 4727 }, { "epoch": 0.52, "grad_norm": 0.4060585597937581, "learning_rate": 1.972967190750274e-05, "loss": 0.0744, "step": 4728 }, { "epoch": 0.52, "grad_norm": 0.31194598441259513, "learning_rate": 1.9722558460835463e-05, "loss": 0.0762, "step": 4729 }, { "epoch": 0.52, "grad_norm": 0.3249208707030346, "learning_rate": 1.9715445049271907e-05, "loss": 0.0428, "step": 4730 }, { "epoch": 0.52, "grad_norm": 0.40405446769596115, "learning_rate": 1.97083316737121e-05, "loss": 0.0699, "step": 4731 }, { "epoch": 0.52, "grad_norm": 0.41410809391838166, "learning_rate": 1.9701218335056076e-05, "loss": 0.089, "step": 4732 }, { "epoch": 0.52, "grad_norm": 0.4039627020988026, "learning_rate": 1.9694105034203867e-05, "loss": 0.0747, "step": 4733 }, { "epoch": 0.52, "grad_norm": 0.2972790578446354, "learning_rate": 1.9686991772055476e-05, "loss": 0.0588, "step": 4734 }, { "epoch": 0.52, "grad_norm": 0.3602559859726878, "learning_rate": 1.967987854951094e-05, "loss": 0.0815, "step": 4735 }, { "epoch": 0.52, "grad_norm": 0.25467894428603094, "learning_rate": 1.9672765367470265e-05, "loss": 0.0445, "step": 4736 }, { "epoch": 0.52, "grad_norm": 0.3154520552413603, "learning_rate": 1.966565222683345e-05, "loss": 0.0608, "step": 4737 }, { "epoch": 0.52, "grad_norm": 0.29233013486326725, "learning_rate": 1.9658539128500507e-05, "loss": 0.0596, "step": 4738 }, { "epoch": 0.52, "grad_norm": 0.3780755050044366, "learning_rate": 1.965142607337142e-05, "loss": 0.0659, "step": 4739 }, { "epoch": 0.52, "grad_norm": 0.41711352546143093, "learning_rate": 1.964431306234619e-05, "loss": 0.0924, "step": 4740 }, { "epoch": 0.52, "grad_norm": 0.30895279868877534, "learning_rate": 1.9637200096324792e-05, "loss": 0.0753, "step": 4741 }, { "epoch": 0.52, "grad_norm": 0.34942746890999127, "learning_rate": 1.9630087176207212e-05, "loss": 0.0722, "step": 4742 }, { "epoch": 0.52, "grad_norm": 0.33469045188948415, "learning_rate": 1.9622974302893416e-05, "loss": 0.0618, "step": 4743 }, { "epoch": 0.52, "grad_norm": 0.4191969002721475, "learning_rate": 1.9615861477283384e-05, "loss": 0.0561, "step": 4744 }, { "epoch": 0.52, "grad_norm": 0.3587680542619685, "learning_rate": 1.9608748700277058e-05, "loss": 0.0643, "step": 4745 }, { "epoch": 0.52, "grad_norm": 0.3128015017409996, "learning_rate": 1.9601635972774414e-05, "loss": 0.0642, "step": 4746 }, { "epoch": 0.52, "grad_norm": 0.3329583408155374, "learning_rate": 1.9594523295675383e-05, "loss": 0.0877, "step": 4747 }, { "epoch": 0.52, "grad_norm": 0.22941014060953932, "learning_rate": 1.958741066987992e-05, "loss": 0.0475, "step": 4748 }, { "epoch": 0.52, "grad_norm": 0.39091050204965927, "learning_rate": 1.958029809628795e-05, "loss": 0.0774, "step": 4749 }, { "epoch": 0.52, "grad_norm": 0.30798737791365455, "learning_rate": 1.9573185575799414e-05, "loss": 0.0626, "step": 4750 }, { "epoch": 0.52, "grad_norm": 0.33282125398826806, "learning_rate": 1.956607310931422e-05, "loss": 0.0495, "step": 4751 }, { "epoch": 0.52, "grad_norm": 0.3291413144517075, "learning_rate": 1.95589606977323e-05, "loss": 0.0695, "step": 4752 }, { "epoch": 0.52, "grad_norm": 0.3011719284354433, "learning_rate": 1.9551848341953556e-05, "loss": 0.0599, "step": 4753 }, { "epoch": 0.52, "grad_norm": 0.35773416852809714, "learning_rate": 1.9544736042877886e-05, "loss": 0.0637, "step": 4754 }, { "epoch": 0.52, "grad_norm": 0.2714101840949113, "learning_rate": 1.9537623801405194e-05, "loss": 0.0469, "step": 4755 }, { "epoch": 0.52, "grad_norm": 0.3227778785101817, "learning_rate": 1.9530511618435352e-05, "loss": 0.0629, "step": 4756 }, { "epoch": 0.52, "grad_norm": 0.332783934213497, "learning_rate": 1.9523399494868256e-05, "loss": 0.0594, "step": 4757 }, { "epoch": 0.52, "grad_norm": 0.3196381704804296, "learning_rate": 1.9516287431603767e-05, "loss": 0.043, "step": 4758 }, { "epoch": 0.52, "grad_norm": 0.4336684910046564, "learning_rate": 1.950917542954176e-05, "loss": 0.0809, "step": 4759 }, { "epoch": 0.52, "grad_norm": 0.3099770145777829, "learning_rate": 1.950206348958208e-05, "loss": 0.0525, "step": 4760 }, { "epoch": 0.52, "grad_norm": 0.34155675900687366, "learning_rate": 1.9494951612624583e-05, "loss": 0.0619, "step": 4761 }, { "epoch": 0.52, "grad_norm": 0.3155040830387275, "learning_rate": 1.948783979956911e-05, "loss": 0.0462, "step": 4762 }, { "epoch": 0.52, "grad_norm": 0.5243635668320893, "learning_rate": 1.9480728051315493e-05, "loss": 0.0973, "step": 4763 }, { "epoch": 0.52, "grad_norm": 0.4387000349183044, "learning_rate": 1.9473616368763556e-05, "loss": 0.0875, "step": 4764 }, { "epoch": 0.52, "grad_norm": 0.24595755638297326, "learning_rate": 1.9466504752813124e-05, "loss": 0.0405, "step": 4765 }, { "epoch": 0.52, "grad_norm": 0.3528262419005254, "learning_rate": 1.9459393204363988e-05, "loss": 0.0549, "step": 4766 }, { "epoch": 0.52, "grad_norm": 0.4792290798948304, "learning_rate": 1.9452281724315964e-05, "loss": 0.0901, "step": 4767 }, { "epoch": 0.52, "grad_norm": 0.3106596879760235, "learning_rate": 1.944517031356882e-05, "loss": 0.0555, "step": 4768 }, { "epoch": 0.52, "grad_norm": 0.27229752647867894, "learning_rate": 1.9438058973022374e-05, "loss": 0.0626, "step": 4769 }, { "epoch": 0.52, "grad_norm": 0.3372626671077059, "learning_rate": 1.9430947703576373e-05, "loss": 0.0693, "step": 4770 }, { "epoch": 0.52, "grad_norm": 0.28020790211149216, "learning_rate": 1.9423836506130593e-05, "loss": 0.0477, "step": 4771 }, { "epoch": 0.52, "grad_norm": 0.2888125163476407, "learning_rate": 1.9416725381584777e-05, "loss": 0.0471, "step": 4772 }, { "epoch": 0.52, "grad_norm": 0.3124671322704106, "learning_rate": 1.9409614330838685e-05, "loss": 0.0715, "step": 4773 }, { "epoch": 0.52, "grad_norm": 0.255097412151316, "learning_rate": 1.9402503354792043e-05, "loss": 0.0522, "step": 4774 }, { "epoch": 0.52, "grad_norm": 0.28330472241894744, "learning_rate": 1.9395392454344582e-05, "loss": 0.0413, "step": 4775 }, { "epoch": 0.52, "grad_norm": 0.3338411258207654, "learning_rate": 1.938828163039602e-05, "loss": 0.0625, "step": 4776 }, { "epoch": 0.52, "grad_norm": 0.30842513294413454, "learning_rate": 1.938117088384607e-05, "loss": 0.0533, "step": 4777 }, { "epoch": 0.52, "grad_norm": 0.2830844498957535, "learning_rate": 1.9374060215594417e-05, "loss": 0.0444, "step": 4778 }, { "epoch": 0.52, "grad_norm": 0.2835374634426756, "learning_rate": 1.9366949626540764e-05, "loss": 0.0501, "step": 4779 }, { "epoch": 0.52, "grad_norm": 0.43673514879586744, "learning_rate": 1.9359839117584775e-05, "loss": 0.0671, "step": 4780 }, { "epoch": 0.53, "grad_norm": 0.37202761748045315, "learning_rate": 1.9352728689626133e-05, "loss": 0.0625, "step": 4781 }, { "epoch": 0.53, "grad_norm": 0.37775016680650036, "learning_rate": 1.934561834356448e-05, "loss": 0.0656, "step": 4782 }, { "epoch": 0.53, "grad_norm": 0.39819017849884863, "learning_rate": 1.933850808029948e-05, "loss": 0.0683, "step": 4783 }, { "epoch": 0.53, "grad_norm": 0.3313787893728318, "learning_rate": 1.9331397900730754e-05, "loss": 0.0559, "step": 4784 }, { "epoch": 0.53, "grad_norm": 0.38201853101031413, "learning_rate": 1.9324287805757935e-05, "loss": 0.067, "step": 4785 }, { "epoch": 0.53, "grad_norm": 0.32680930024413746, "learning_rate": 1.9317177796280643e-05, "loss": 0.0576, "step": 4786 }, { "epoch": 0.53, "grad_norm": 0.3409276864189273, "learning_rate": 1.9310067873198476e-05, "loss": 0.0573, "step": 4787 }, { "epoch": 0.53, "grad_norm": 0.28473758009968864, "learning_rate": 1.930295803741104e-05, "loss": 0.0542, "step": 4788 }, { "epoch": 0.53, "grad_norm": 0.3075399029241916, "learning_rate": 1.9295848289817904e-05, "loss": 0.0416, "step": 4789 }, { "epoch": 0.53, "grad_norm": 0.3559080071827515, "learning_rate": 1.9288738631318648e-05, "loss": 0.0625, "step": 4790 }, { "epoch": 0.53, "grad_norm": 0.32405360738458894, "learning_rate": 1.9281629062812827e-05, "loss": 0.0595, "step": 4791 }, { "epoch": 0.53, "grad_norm": 0.4237242716458942, "learning_rate": 1.9274519585199995e-05, "loss": 0.0909, "step": 4792 }, { "epoch": 0.53, "grad_norm": 0.41985948675807966, "learning_rate": 1.9267410199379682e-05, "loss": 0.0826, "step": 4793 }, { "epoch": 0.53, "grad_norm": 0.3238525390625, "learning_rate": 1.9260300906251422e-05, "loss": 0.0597, "step": 4794 }, { "epoch": 0.53, "grad_norm": 0.31939485572187754, "learning_rate": 1.9253191706714725e-05, "loss": 0.0546, "step": 4795 }, { "epoch": 0.53, "grad_norm": 0.3659308417778955, "learning_rate": 1.92460826016691e-05, "loss": 0.0723, "step": 4796 }, { "epoch": 0.53, "grad_norm": 0.42463157897560067, "learning_rate": 1.9238973592014027e-05, "loss": 0.0949, "step": 4797 }, { "epoch": 0.53, "grad_norm": 0.2526853699614417, "learning_rate": 1.9231864678648994e-05, "loss": 0.052, "step": 4798 }, { "epoch": 0.53, "grad_norm": 0.3315363393129195, "learning_rate": 1.922475586247346e-05, "loss": 0.0821, "step": 4799 }, { "epoch": 0.53, "grad_norm": 0.293977273109654, "learning_rate": 1.9217647144386885e-05, "loss": 0.0635, "step": 4800 }, { "epoch": 0.53, "grad_norm": 0.31837765225794823, "learning_rate": 1.92105385252887e-05, "loss": 0.0691, "step": 4801 }, { "epoch": 0.53, "grad_norm": 0.4162010173787922, "learning_rate": 1.9203430006078348e-05, "loss": 0.0788, "step": 4802 }, { "epoch": 0.53, "grad_norm": 0.3249224643651483, "learning_rate": 1.9196321587655228e-05, "loss": 0.075, "step": 4803 }, { "epoch": 0.53, "grad_norm": 0.41507273817883844, "learning_rate": 1.918921327091876e-05, "loss": 0.0713, "step": 4804 }, { "epoch": 0.53, "grad_norm": 0.3061640492320292, "learning_rate": 1.9182105056768335e-05, "loss": 0.05, "step": 4805 }, { "epoch": 0.53, "grad_norm": 0.31932931129043707, "learning_rate": 1.9174996946103318e-05, "loss": 0.0841, "step": 4806 }, { "epoch": 0.53, "grad_norm": 0.309396604785433, "learning_rate": 1.9167888939823083e-05, "loss": 0.0619, "step": 4807 }, { "epoch": 0.53, "grad_norm": 0.25655192637986973, "learning_rate": 1.9160781038826973e-05, "loss": 0.0526, "step": 4808 }, { "epoch": 0.53, "grad_norm": 0.310471339606134, "learning_rate": 1.9153673244014334e-05, "loss": 0.0579, "step": 4809 }, { "epoch": 0.53, "grad_norm": 0.30016985594827106, "learning_rate": 1.9146565556284492e-05, "loss": 0.0526, "step": 4810 }, { "epoch": 0.53, "grad_norm": 0.3116027587546304, "learning_rate": 1.9139457976536754e-05, "loss": 0.0605, "step": 4811 }, { "epoch": 0.53, "grad_norm": 0.3421233225869482, "learning_rate": 1.9132350505670416e-05, "loss": 0.065, "step": 4812 }, { "epoch": 0.53, "grad_norm": 0.347588886206723, "learning_rate": 1.9125243144584762e-05, "loss": 0.0791, "step": 4813 }, { "epoch": 0.53, "grad_norm": 0.32984032049524, "learning_rate": 1.9118135894179067e-05, "loss": 0.0533, "step": 4814 }, { "epoch": 0.53, "grad_norm": 0.29665145488557776, "learning_rate": 1.9111028755352578e-05, "loss": 0.0697, "step": 4815 }, { "epoch": 0.53, "grad_norm": 0.33921622294253856, "learning_rate": 1.910392172900455e-05, "loss": 0.0621, "step": 4816 }, { "epoch": 0.53, "grad_norm": 0.4300904725231287, "learning_rate": 1.9096814816034192e-05, "loss": 0.0605, "step": 4817 }, { "epoch": 0.53, "grad_norm": 0.37491111894151646, "learning_rate": 1.9089708017340733e-05, "loss": 0.0595, "step": 4818 }, { "epoch": 0.53, "grad_norm": 0.29845438754649123, "learning_rate": 1.9082601333823364e-05, "loss": 0.0506, "step": 4819 }, { "epoch": 0.53, "grad_norm": 0.3584630424624439, "learning_rate": 1.9075494766381263e-05, "loss": 0.0416, "step": 4820 }, { "epoch": 0.53, "grad_norm": 0.32079072469105685, "learning_rate": 1.906838831591362e-05, "loss": 0.058, "step": 4821 }, { "epoch": 0.53, "grad_norm": 0.3503191684765387, "learning_rate": 1.906128198331957e-05, "loss": 0.053, "step": 4822 }, { "epoch": 0.53, "grad_norm": 0.3260929331931471, "learning_rate": 1.9054175769498265e-05, "loss": 0.0643, "step": 4823 }, { "epoch": 0.53, "grad_norm": 0.3772245230594431, "learning_rate": 1.9047069675348816e-05, "loss": 0.0577, "step": 4824 }, { "epoch": 0.53, "grad_norm": 0.38575698244718737, "learning_rate": 1.903996370177035e-05, "loss": 0.0567, "step": 4825 }, { "epoch": 0.53, "grad_norm": 0.34618073138954514, "learning_rate": 1.9032857849661942e-05, "loss": 0.0477, "step": 4826 }, { "epoch": 0.53, "grad_norm": 0.4194893849829217, "learning_rate": 1.902575211992269e-05, "loss": 0.0678, "step": 4827 }, { "epoch": 0.53, "grad_norm": 0.3421051814729771, "learning_rate": 1.901864651345164e-05, "loss": 0.062, "step": 4828 }, { "epoch": 0.53, "grad_norm": 0.35553313290571104, "learning_rate": 1.9011541031147856e-05, "loss": 0.0637, "step": 4829 }, { "epoch": 0.53, "grad_norm": 0.3159303854340297, "learning_rate": 1.9004435673910356e-05, "loss": 0.0542, "step": 4830 }, { "epoch": 0.53, "grad_norm": 0.3224906482560456, "learning_rate": 1.8997330442638165e-05, "loss": 0.0466, "step": 4831 }, { "epoch": 0.53, "grad_norm": 0.306691621075204, "learning_rate": 1.8990225338230276e-05, "loss": 0.0592, "step": 4832 }, { "epoch": 0.53, "grad_norm": 0.32547930897668065, "learning_rate": 1.8983120361585683e-05, "loss": 0.0556, "step": 4833 }, { "epoch": 0.53, "grad_norm": 0.30747488454271754, "learning_rate": 1.8976015513603344e-05, "loss": 0.0545, "step": 4834 }, { "epoch": 0.53, "grad_norm": 0.3143407964585679, "learning_rate": 1.896891079518222e-05, "loss": 0.0511, "step": 4835 }, { "epoch": 0.53, "grad_norm": 0.3654993085737251, "learning_rate": 1.8961806207221235e-05, "loss": 0.0689, "step": 4836 }, { "epoch": 0.53, "grad_norm": 0.3067897142624209, "learning_rate": 1.8954701750619313e-05, "loss": 0.0631, "step": 4837 }, { "epoch": 0.53, "grad_norm": 0.3394790534157499, "learning_rate": 1.8947597426275368e-05, "loss": 0.0446, "step": 4838 }, { "epoch": 0.53, "grad_norm": 0.313444688549121, "learning_rate": 1.8940493235088272e-05, "loss": 0.037, "step": 4839 }, { "epoch": 0.53, "grad_norm": 0.3423125382598025, "learning_rate": 1.8933389177956896e-05, "loss": 0.0674, "step": 4840 }, { "epoch": 0.53, "grad_norm": 0.3556325346512234, "learning_rate": 1.8926285255780095e-05, "loss": 0.0683, "step": 4841 }, { "epoch": 0.53, "grad_norm": 0.3509764024397666, "learning_rate": 1.8919181469456703e-05, "loss": 0.0707, "step": 4842 }, { "epoch": 0.53, "grad_norm": 0.2654621803319488, "learning_rate": 1.8912077819885533e-05, "loss": 0.0318, "step": 4843 }, { "epoch": 0.53, "grad_norm": 0.3555633712425293, "learning_rate": 1.8904974307965393e-05, "loss": 0.0769, "step": 4844 }, { "epoch": 0.53, "grad_norm": 0.3514757261210543, "learning_rate": 1.8897870934595055e-05, "loss": 0.058, "step": 4845 }, { "epoch": 0.53, "grad_norm": 0.26660133892790533, "learning_rate": 1.8890767700673296e-05, "loss": 0.0302, "step": 4846 }, { "epoch": 0.53, "grad_norm": 0.30750849180979273, "learning_rate": 1.8883664607098856e-05, "loss": 0.0595, "step": 4847 }, { "epoch": 0.53, "grad_norm": 0.306579535163633, "learning_rate": 1.8876561654770466e-05, "loss": 0.0486, "step": 4848 }, { "epoch": 0.53, "grad_norm": 0.26328504938842306, "learning_rate": 1.886945884458684e-05, "loss": 0.0374, "step": 4849 }, { "epoch": 0.53, "grad_norm": 0.28228724794108406, "learning_rate": 1.8862356177446667e-05, "loss": 0.0581, "step": 4850 }, { "epoch": 0.53, "grad_norm": 0.3164625824125481, "learning_rate": 1.885525365424863e-05, "loss": 0.0504, "step": 4851 }, { "epoch": 0.53, "grad_norm": 0.3693746803656678, "learning_rate": 1.8848151275891383e-05, "loss": 0.0612, "step": 4852 }, { "epoch": 0.53, "grad_norm": 0.29125390762360004, "learning_rate": 1.884104904327357e-05, "loss": 0.0514, "step": 4853 }, { "epoch": 0.53, "grad_norm": 0.3088892354992329, "learning_rate": 1.8833946957293796e-05, "loss": 0.0418, "step": 4854 }, { "epoch": 0.53, "grad_norm": 0.2739199061452179, "learning_rate": 1.882684501885068e-05, "loss": 0.0416, "step": 4855 }, { "epoch": 0.53, "grad_norm": 0.27999998835580664, "learning_rate": 1.8819743228842806e-05, "loss": 0.068, "step": 4856 }, { "epoch": 0.53, "grad_norm": 0.3631608209653038, "learning_rate": 1.881264158816873e-05, "loss": 0.0631, "step": 4857 }, { "epoch": 0.53, "grad_norm": 0.37143338371921714, "learning_rate": 1.8805540097727003e-05, "loss": 0.067, "step": 4858 }, { "epoch": 0.53, "grad_norm": 0.29118905244255, "learning_rate": 1.8798438758416147e-05, "loss": 0.048, "step": 4859 }, { "epoch": 0.53, "grad_norm": 0.3400818001995256, "learning_rate": 1.8791337571134677e-05, "loss": 0.0543, "step": 4860 }, { "epoch": 0.53, "grad_norm": 0.504890338539092, "learning_rate": 1.8784236536781076e-05, "loss": 0.1036, "step": 4861 }, { "epoch": 0.53, "grad_norm": 0.4121225725564691, "learning_rate": 1.877713565625382e-05, "loss": 0.0649, "step": 4862 }, { "epoch": 0.53, "grad_norm": 0.3878110237308656, "learning_rate": 1.8770034930451345e-05, "loss": 0.0696, "step": 4863 }, { "epoch": 0.53, "grad_norm": 0.35091748937214295, "learning_rate": 1.8762934360272097e-05, "loss": 0.0459, "step": 4864 }, { "epoch": 0.53, "grad_norm": 0.2781925510495585, "learning_rate": 1.8755833946614475e-05, "loss": 0.0453, "step": 4865 }, { "epoch": 0.53, "grad_norm": 0.36971593358902766, "learning_rate": 1.8748733690376883e-05, "loss": 0.0572, "step": 4866 }, { "epoch": 0.53, "grad_norm": 0.30281658902079533, "learning_rate": 1.8741633592457677e-05, "loss": 0.0611, "step": 4867 }, { "epoch": 0.53, "grad_norm": 0.29873236823623794, "learning_rate": 1.8734533653755216e-05, "loss": 0.0697, "step": 4868 }, { "epoch": 0.53, "grad_norm": 0.2923893221306974, "learning_rate": 1.8727433875167828e-05, "loss": 0.0525, "step": 4869 }, { "epoch": 0.53, "grad_norm": 0.38429033385912215, "learning_rate": 1.8720334257593826e-05, "loss": 0.067, "step": 4870 }, { "epoch": 0.53, "grad_norm": 0.31698419040221953, "learning_rate": 1.8713234801931494e-05, "loss": 0.0511, "step": 4871 }, { "epoch": 0.54, "grad_norm": 0.30124388451126555, "learning_rate": 1.8706135509079103e-05, "loss": 0.0606, "step": 4872 }, { "epoch": 0.54, "grad_norm": 0.24919668416841298, "learning_rate": 1.869903637993491e-05, "loss": 0.0409, "step": 4873 }, { "epoch": 0.54, "grad_norm": 0.29466716554615313, "learning_rate": 1.869193741539714e-05, "loss": 0.0504, "step": 4874 }, { "epoch": 0.54, "grad_norm": 0.2730488740250263, "learning_rate": 1.8684838616363997e-05, "loss": 0.0442, "step": 4875 }, { "epoch": 0.54, "grad_norm": 0.40023512262933153, "learning_rate": 1.8677739983733666e-05, "loss": 0.0526, "step": 4876 }, { "epoch": 0.54, "grad_norm": 0.406152438406579, "learning_rate": 1.8670641518404322e-05, "loss": 0.0864, "step": 4877 }, { "epoch": 0.54, "grad_norm": 0.29053493457783014, "learning_rate": 1.8663543221274096e-05, "loss": 0.0486, "step": 4878 }, { "epoch": 0.54, "grad_norm": 0.3461570561292766, "learning_rate": 1.8656445093241122e-05, "loss": 0.0627, "step": 4879 }, { "epoch": 0.54, "grad_norm": 0.28846751659263103, "learning_rate": 1.8649347135203494e-05, "loss": 0.0441, "step": 4880 }, { "epoch": 0.54, "grad_norm": 0.39885192710398476, "learning_rate": 1.8642249348059296e-05, "loss": 0.0649, "step": 4881 }, { "epoch": 0.54, "grad_norm": 0.3039924322867829, "learning_rate": 1.8635151732706586e-05, "loss": 0.0677, "step": 4882 }, { "epoch": 0.54, "grad_norm": 0.36083028365131947, "learning_rate": 1.86280542900434e-05, "loss": 0.0727, "step": 4883 }, { "epoch": 0.54, "grad_norm": 0.5207675479032882, "learning_rate": 1.862095702096775e-05, "loss": 0.1073, "step": 4884 }, { "epoch": 0.54, "grad_norm": 0.36658365229255785, "learning_rate": 1.8613859926377636e-05, "loss": 0.0721, "step": 4885 }, { "epoch": 0.54, "grad_norm": 0.257427737642483, "learning_rate": 1.860676300717102e-05, "loss": 0.0391, "step": 4886 }, { "epoch": 0.54, "grad_norm": 0.30208050655270113, "learning_rate": 1.859966626424586e-05, "loss": 0.058, "step": 4887 }, { "epoch": 0.54, "grad_norm": 0.524228821613645, "learning_rate": 1.8592569698500076e-05, "loss": 0.0803, "step": 4888 }, { "epoch": 0.54, "grad_norm": 0.37559932262356527, "learning_rate": 1.8585473310831567e-05, "loss": 0.0644, "step": 4889 }, { "epoch": 0.54, "grad_norm": 0.3664566233155714, "learning_rate": 1.8578377102138223e-05, "loss": 0.0722, "step": 4890 }, { "epoch": 0.54, "grad_norm": 0.2763762860097861, "learning_rate": 1.857128107331791e-05, "loss": 0.0672, "step": 4891 }, { "epoch": 0.54, "grad_norm": 0.30401333782933376, "learning_rate": 1.8564185225268446e-05, "loss": 0.0479, "step": 4892 }, { "epoch": 0.54, "grad_norm": 0.2695667132944537, "learning_rate": 1.8557089558887655e-05, "loss": 0.0452, "step": 4893 }, { "epoch": 0.54, "grad_norm": 0.21376175219313176, "learning_rate": 1.8549994075073327e-05, "loss": 0.0634, "step": 4894 }, { "epoch": 0.54, "grad_norm": 0.25682047518176504, "learning_rate": 1.8542898774723225e-05, "loss": 0.0489, "step": 4895 }, { "epoch": 0.54, "grad_norm": 0.28180622359236795, "learning_rate": 1.853580365873509e-05, "loss": 0.0453, "step": 4896 }, { "epoch": 0.54, "grad_norm": 0.33053754401600327, "learning_rate": 1.8528708728006654e-05, "loss": 0.0486, "step": 4897 }, { "epoch": 0.54, "grad_norm": 0.28269531498796063, "learning_rate": 1.8521613983435604e-05, "loss": 0.0505, "step": 4898 }, { "epoch": 0.54, "grad_norm": 0.3289290748704466, "learning_rate": 1.8514519425919617e-05, "loss": 0.0694, "step": 4899 }, { "epoch": 0.54, "grad_norm": 0.29990069812160436, "learning_rate": 1.8507425056356338e-05, "loss": 0.0543, "step": 4900 }, { "epoch": 0.54, "grad_norm": 0.3723073489652655, "learning_rate": 1.8500330875643405e-05, "loss": 0.0699, "step": 4901 }, { "epoch": 0.54, "grad_norm": 0.3678210859179774, "learning_rate": 1.8493236884678405e-05, "loss": 0.0831, "step": 4902 }, { "epoch": 0.54, "grad_norm": 0.2765332184931764, "learning_rate": 1.848614308435893e-05, "loss": 0.0473, "step": 4903 }, { "epoch": 0.54, "grad_norm": 0.30530793037235593, "learning_rate": 1.847904947558252e-05, "loss": 0.049, "step": 4904 }, { "epoch": 0.54, "grad_norm": 0.3258362831086268, "learning_rate": 1.8471956059246717e-05, "loss": 0.0662, "step": 4905 }, { "epoch": 0.54, "grad_norm": 0.2574830551266415, "learning_rate": 1.8464862836249014e-05, "loss": 0.0487, "step": 4906 }, { "epoch": 0.54, "grad_norm": 0.27446116521727665, "learning_rate": 1.8457769807486903e-05, "loss": 0.0441, "step": 4907 }, { "epoch": 0.54, "grad_norm": 0.3036688502548473, "learning_rate": 1.8450676973857842e-05, "loss": 0.0433, "step": 4908 }, { "epoch": 0.54, "grad_norm": 0.33868591341096527, "learning_rate": 1.8443584336259253e-05, "loss": 0.0585, "step": 4909 }, { "epoch": 0.54, "grad_norm": 0.3886296043221401, "learning_rate": 1.843649189558855e-05, "loss": 0.0664, "step": 4910 }, { "epoch": 0.54, "grad_norm": 0.3898702960254951, "learning_rate": 1.842939965274311e-05, "loss": 0.075, "step": 4911 }, { "epoch": 0.54, "grad_norm": 0.27507723568981396, "learning_rate": 1.8422307608620292e-05, "loss": 0.0363, "step": 4912 }, { "epoch": 0.54, "grad_norm": 0.30849703951331714, "learning_rate": 1.8415215764117428e-05, "loss": 0.0581, "step": 4913 }, { "epoch": 0.54, "grad_norm": 0.31653900658060236, "learning_rate": 1.840812412013183e-05, "loss": 0.0581, "step": 4914 }, { "epoch": 0.54, "grad_norm": 0.3325580120927327, "learning_rate": 1.8401032677560767e-05, "loss": 0.0533, "step": 4915 }, { "epoch": 0.54, "grad_norm": 0.36970293520695274, "learning_rate": 1.8393941437301507e-05, "loss": 0.0675, "step": 4916 }, { "epoch": 0.54, "grad_norm": 0.3585416834977342, "learning_rate": 1.838685040025127e-05, "loss": 0.0815, "step": 4917 }, { "epoch": 0.54, "grad_norm": 0.23879248073882645, "learning_rate": 1.8379759567307266e-05, "loss": 0.0428, "step": 4918 }, { "epoch": 0.54, "grad_norm": 0.43233937456007654, "learning_rate": 1.837266893936667e-05, "loss": 0.0737, "step": 4919 }, { "epoch": 0.54, "grad_norm": 0.36011360809702286, "learning_rate": 1.8365578517326642e-05, "loss": 0.0645, "step": 4920 }, { "epoch": 0.54, "grad_norm": 0.32384882355768296, "learning_rate": 1.83584883020843e-05, "loss": 0.0466, "step": 4921 }, { "epoch": 0.54, "grad_norm": 0.3139658404344843, "learning_rate": 1.8351398294536747e-05, "loss": 0.0498, "step": 4922 }, { "epoch": 0.54, "grad_norm": 0.4437869694582102, "learning_rate": 1.8344308495581056e-05, "loss": 0.0743, "step": 4923 }, { "epoch": 0.54, "grad_norm": 0.326073157624937, "learning_rate": 1.833721890611428e-05, "loss": 0.0687, "step": 4924 }, { "epoch": 0.54, "grad_norm": 0.3790813120288228, "learning_rate": 1.8330129527033433e-05, "loss": 0.0823, "step": 4925 }, { "epoch": 0.54, "grad_norm": 0.4056979610144585, "learning_rate": 1.832304035923552e-05, "loss": 0.0776, "step": 4926 }, { "epoch": 0.54, "grad_norm": 0.46710853866244517, "learning_rate": 1.8315951403617496e-05, "loss": 0.0872, "step": 4927 }, { "epoch": 0.54, "grad_norm": 0.3112583287974614, "learning_rate": 1.8308862661076313e-05, "loss": 0.0673, "step": 4928 }, { "epoch": 0.54, "grad_norm": 0.36728808871708474, "learning_rate": 1.8301774132508876e-05, "loss": 0.0767, "step": 4929 }, { "epoch": 0.54, "grad_norm": 0.36244579353849044, "learning_rate": 1.829468581881208e-05, "loss": 0.058, "step": 4930 }, { "epoch": 0.54, "grad_norm": 0.32660534317571355, "learning_rate": 1.828759772088278e-05, "loss": 0.0625, "step": 4931 }, { "epoch": 0.54, "grad_norm": 0.28011761630334936, "learning_rate": 1.8280509839617814e-05, "loss": 0.0575, "step": 4932 }, { "epoch": 0.54, "grad_norm": 0.2744294837609691, "learning_rate": 1.8273422175913975e-05, "loss": 0.044, "step": 4933 }, { "epoch": 0.54, "grad_norm": 0.2619071823001568, "learning_rate": 1.8266334730668054e-05, "loss": 0.052, "step": 4934 }, { "epoch": 0.54, "grad_norm": 0.3509697685794147, "learning_rate": 1.8259247504776788e-05, "loss": 0.0683, "step": 4935 }, { "epoch": 0.54, "grad_norm": 0.3643907810543065, "learning_rate": 1.8252160499136914e-05, "loss": 0.072, "step": 4936 }, { "epoch": 0.54, "grad_norm": 0.2935391658656578, "learning_rate": 1.8245073714645115e-05, "loss": 0.0629, "step": 4937 }, { "epoch": 0.54, "grad_norm": 0.2858745985207618, "learning_rate": 1.8237987152198063e-05, "loss": 0.0617, "step": 4938 }, { "epoch": 0.54, "grad_norm": 0.3837281389874902, "learning_rate": 1.8230900812692387e-05, "loss": 0.0774, "step": 4939 }, { "epoch": 0.54, "grad_norm": 0.34214365125179147, "learning_rate": 1.822381469702471e-05, "loss": 0.0744, "step": 4940 }, { "epoch": 0.54, "grad_norm": 0.28109274547539487, "learning_rate": 1.82167288060916e-05, "loss": 0.0489, "step": 4941 }, { "epoch": 0.54, "grad_norm": 0.28707807720634265, "learning_rate": 1.8209643140789622e-05, "loss": 0.0594, "step": 4942 }, { "epoch": 0.54, "grad_norm": 0.3481909465478844, "learning_rate": 1.8202557702015303e-05, "loss": 0.0617, "step": 4943 }, { "epoch": 0.54, "grad_norm": 0.3282151212000431, "learning_rate": 1.8195472490665125e-05, "loss": 0.07, "step": 4944 }, { "epoch": 0.54, "grad_norm": 0.4418564247856527, "learning_rate": 1.8188387507635568e-05, "loss": 0.0819, "step": 4945 }, { "epoch": 0.54, "grad_norm": 0.3086632396829253, "learning_rate": 1.8181302753823064e-05, "loss": 0.0698, "step": 4946 }, { "epoch": 0.54, "grad_norm": 0.33104685546252693, "learning_rate": 1.8174218230124024e-05, "loss": 0.06, "step": 4947 }, { "epoch": 0.54, "grad_norm": 0.29924318995650334, "learning_rate": 1.8167133937434823e-05, "loss": 0.0624, "step": 4948 }, { "epoch": 0.54, "grad_norm": 0.5391362457694533, "learning_rate": 1.8160049876651822e-05, "loss": 0.0963, "step": 4949 }, { "epoch": 0.54, "grad_norm": 0.40574559662851045, "learning_rate": 1.8152966048671334e-05, "loss": 0.0682, "step": 4950 }, { "epoch": 0.54, "grad_norm": 0.3352497401702295, "learning_rate": 1.814588245438966e-05, "loss": 0.0676, "step": 4951 }, { "epoch": 0.54, "grad_norm": 0.35281390944166424, "learning_rate": 1.813879909470305e-05, "loss": 0.0655, "step": 4952 }, { "epoch": 0.54, "grad_norm": 0.34141333723241074, "learning_rate": 1.813171597050775e-05, "loss": 0.0639, "step": 4953 }, { "epoch": 0.54, "grad_norm": 0.3164033300947929, "learning_rate": 1.8124633082699956e-05, "loss": 0.0551, "step": 4954 }, { "epoch": 0.54, "grad_norm": 0.4406131269329775, "learning_rate": 1.8117550432175845e-05, "loss": 0.1033, "step": 4955 }, { "epoch": 0.54, "grad_norm": 0.3741654009610003, "learning_rate": 1.8110468019831553e-05, "loss": 0.0705, "step": 4956 }, { "epoch": 0.54, "grad_norm": 0.36082336635934725, "learning_rate": 1.8103385846563202e-05, "loss": 0.0838, "step": 4957 }, { "epoch": 0.54, "grad_norm": 0.3024490734849216, "learning_rate": 1.8096303913266864e-05, "loss": 0.0837, "step": 4958 }, { "epoch": 0.54, "grad_norm": 0.36306395235853045, "learning_rate": 1.80892222208386e-05, "loss": 0.0698, "step": 4959 }, { "epoch": 0.54, "grad_norm": 0.2878906363871558, "learning_rate": 1.808214077017444e-05, "loss": 0.0591, "step": 4960 }, { "epoch": 0.54, "grad_norm": 0.3128928338019312, "learning_rate": 1.8075059562170357e-05, "loss": 0.0589, "step": 4961 }, { "epoch": 0.54, "grad_norm": 0.34881266594920257, "learning_rate": 1.8067978597722325e-05, "loss": 0.0753, "step": 4962 }, { "epoch": 0.55, "grad_norm": 0.2986322895691649, "learning_rate": 1.8060897877726267e-05, "loss": 0.0669, "step": 4963 }, { "epoch": 0.55, "grad_norm": 0.34748635802146344, "learning_rate": 1.8053817403078087e-05, "loss": 0.0779, "step": 4964 }, { "epoch": 0.55, "grad_norm": 0.28748152092964574, "learning_rate": 1.8046737174673646e-05, "loss": 0.0512, "step": 4965 }, { "epoch": 0.55, "grad_norm": 0.2604562570359666, "learning_rate": 1.8039657193408788e-05, "loss": 0.0485, "step": 4966 }, { "epoch": 0.55, "grad_norm": 0.4140020362324788, "learning_rate": 1.803257746017932e-05, "loss": 0.0738, "step": 4967 }, { "epoch": 0.55, "grad_norm": 0.28237213023797353, "learning_rate": 1.8025497975881004e-05, "loss": 0.0582, "step": 4968 }, { "epoch": 0.55, "grad_norm": 0.2752949162820628, "learning_rate": 1.8018418741409598e-05, "loss": 0.0463, "step": 4969 }, { "epoch": 0.55, "grad_norm": 0.32686183388709367, "learning_rate": 1.8011339757660798e-05, "loss": 0.0506, "step": 4970 }, { "epoch": 0.55, "grad_norm": 0.42394096876319953, "learning_rate": 1.8004261025530298e-05, "loss": 0.054, "step": 4971 }, { "epoch": 0.55, "grad_norm": 0.3079854426125247, "learning_rate": 1.7997182545913732e-05, "loss": 0.0512, "step": 4972 }, { "epoch": 0.55, "grad_norm": 0.22254794816337783, "learning_rate": 1.7990104319706728e-05, "loss": 0.0342, "step": 4973 }, { "epoch": 0.55, "grad_norm": 0.35234584427103316, "learning_rate": 1.798302634780486e-05, "loss": 0.0607, "step": 4974 }, { "epoch": 0.55, "grad_norm": 0.29860853716507335, "learning_rate": 1.7975948631103676e-05, "loss": 0.0424, "step": 4975 }, { "epoch": 0.55, "grad_norm": 0.31554812165458185, "learning_rate": 1.796887117049871e-05, "loss": 0.053, "step": 4976 }, { "epoch": 0.55, "grad_norm": 0.41391832612717944, "learning_rate": 1.796179396688544e-05, "loss": 0.0631, "step": 4977 }, { "epoch": 0.55, "grad_norm": 0.31691040093370726, "learning_rate": 1.7954717021159316e-05, "loss": 0.0734, "step": 4978 }, { "epoch": 0.55, "grad_norm": 0.33049958312936023, "learning_rate": 1.7947640334215762e-05, "loss": 0.0542, "step": 4979 }, { "epoch": 0.55, "grad_norm": 0.36660041951167466, "learning_rate": 1.7940563906950175e-05, "loss": 0.0576, "step": 4980 }, { "epoch": 0.55, "grad_norm": 0.38903620910670594, "learning_rate": 1.79334877402579e-05, "loss": 0.066, "step": 4981 }, { "epoch": 0.55, "grad_norm": 0.33668459389343924, "learning_rate": 1.7926411835034267e-05, "loss": 0.0649, "step": 4982 }, { "epoch": 0.55, "grad_norm": 0.3713500894302172, "learning_rate": 1.791933619217456e-05, "loss": 0.0736, "step": 4983 }, { "epoch": 0.55, "grad_norm": 0.3807993719322671, "learning_rate": 1.791226081257404e-05, "loss": 0.065, "step": 4984 }, { "epoch": 0.55, "grad_norm": 0.35916288999325824, "learning_rate": 1.7905185697127924e-05, "loss": 0.0677, "step": 4985 }, { "epoch": 0.55, "grad_norm": 0.43874529311652233, "learning_rate": 1.7898110846731415e-05, "loss": 0.0778, "step": 4986 }, { "epoch": 0.55, "grad_norm": 0.4254177186462866, "learning_rate": 1.7891036262279653e-05, "loss": 0.0618, "step": 4987 }, { "epoch": 0.55, "grad_norm": 0.39054160181504416, "learning_rate": 1.7883961944667772e-05, "loss": 0.0774, "step": 4988 }, { "epoch": 0.55, "grad_norm": 0.2961933064234726, "learning_rate": 1.7876887894790856e-05, "loss": 0.0558, "step": 4989 }, { "epoch": 0.55, "grad_norm": 0.3789867630197014, "learning_rate": 1.786981411354396e-05, "loss": 0.0683, "step": 4990 }, { "epoch": 0.55, "grad_norm": 0.3350115701648776, "learning_rate": 1.7862740601822107e-05, "loss": 0.0519, "step": 4991 }, { "epoch": 0.55, "grad_norm": 0.3956848125230926, "learning_rate": 1.7855667360520277e-05, "loss": 0.0662, "step": 4992 }, { "epoch": 0.55, "grad_norm": 0.23563540425590127, "learning_rate": 1.7848594390533442e-05, "loss": 0.0556, "step": 4993 }, { "epoch": 0.55, "grad_norm": 0.4090596025100565, "learning_rate": 1.7841521692756497e-05, "loss": 0.0663, "step": 4994 }, { "epoch": 0.55, "grad_norm": 0.3312390469593599, "learning_rate": 1.7834449268084343e-05, "loss": 0.0536, "step": 4995 }, { "epoch": 0.55, "grad_norm": 0.3368756922005246, "learning_rate": 1.782737711741182e-05, "loss": 0.0552, "step": 4996 }, { "epoch": 0.55, "grad_norm": 0.3748192351491326, "learning_rate": 1.782030524163375e-05, "loss": 0.0638, "step": 4997 }, { "epoch": 0.55, "grad_norm": 0.2708162690557804, "learning_rate": 1.7813233641644904e-05, "loss": 0.0421, "step": 4998 }, { "epoch": 0.55, "grad_norm": 0.35805060114826714, "learning_rate": 1.7806162318340032e-05, "loss": 0.0535, "step": 4999 }, { "epoch": 0.55, "grad_norm": 0.27394307943794116, "learning_rate": 1.7799091272613843e-05, "loss": 0.0406, "step": 5000 }, { "epoch": 0.55, "grad_norm": 0.2957267263006547, "learning_rate": 1.7792020505361015e-05, "loss": 0.0569, "step": 5001 }, { "epoch": 0.55, "grad_norm": 0.3501094034051673, "learning_rate": 1.778495001747618e-05, "loss": 0.0667, "step": 5002 }, { "epoch": 0.55, "grad_norm": 0.31723480294700446, "learning_rate": 1.7777879809853954e-05, "loss": 0.0503, "step": 5003 }, { "epoch": 0.55, "grad_norm": 0.4188699365490237, "learning_rate": 1.7770809883388896e-05, "loss": 0.0631, "step": 5004 }, { "epoch": 0.55, "grad_norm": 0.4369896568824533, "learning_rate": 1.7763740238975543e-05, "loss": 0.0734, "step": 5005 }, { "epoch": 0.55, "grad_norm": 0.3307078634995461, "learning_rate": 1.775667087750839e-05, "loss": 0.049, "step": 5006 }, { "epoch": 0.55, "grad_norm": 0.2965574322781026, "learning_rate": 1.774960179988191e-05, "loss": 0.0538, "step": 5007 }, { "epoch": 0.55, "grad_norm": 0.29135011485126827, "learning_rate": 1.774253300699051e-05, "loss": 0.0491, "step": 5008 }, { "epoch": 0.55, "grad_norm": 0.3324820543587624, "learning_rate": 1.773546449972859e-05, "loss": 0.0602, "step": 5009 }, { "epoch": 0.55, "grad_norm": 0.2887968311686442, "learning_rate": 1.77283962789905e-05, "loss": 0.0426, "step": 5010 }, { "epoch": 0.55, "grad_norm": 0.32098663149013934, "learning_rate": 1.772132834567057e-05, "loss": 0.0577, "step": 5011 }, { "epoch": 0.55, "grad_norm": 0.34888167274651627, "learning_rate": 1.771426070066307e-05, "loss": 0.0775, "step": 5012 }, { "epoch": 0.55, "grad_norm": 0.3632791170447296, "learning_rate": 1.770719334486225e-05, "loss": 0.0723, "step": 5013 }, { "epoch": 0.55, "grad_norm": 0.2508698767332318, "learning_rate": 1.770012627916231e-05, "loss": 0.036, "step": 5014 }, { "epoch": 0.55, "grad_norm": 0.25521742389344243, "learning_rate": 1.7693059504457434e-05, "loss": 0.0432, "step": 5015 }, { "epoch": 0.55, "grad_norm": 0.39834038167899866, "learning_rate": 1.768599302164174e-05, "loss": 0.062, "step": 5016 }, { "epoch": 0.55, "grad_norm": 0.2770387423040599, "learning_rate": 1.7678926831609344e-05, "loss": 0.068, "step": 5017 }, { "epoch": 0.55, "grad_norm": 0.39384864071118325, "learning_rate": 1.7671860935254285e-05, "loss": 0.0764, "step": 5018 }, { "epoch": 0.55, "grad_norm": 0.2637381717551651, "learning_rate": 1.7664795333470607e-05, "loss": 0.0423, "step": 5019 }, { "epoch": 0.55, "grad_norm": 0.24678083658105257, "learning_rate": 1.7657730027152286e-05, "loss": 0.0388, "step": 5020 }, { "epoch": 0.55, "grad_norm": 0.369029598594509, "learning_rate": 1.7650665017193272e-05, "loss": 0.0621, "step": 5021 }, { "epoch": 0.55, "grad_norm": 0.29983160041020623, "learning_rate": 1.7643600304487475e-05, "loss": 0.0494, "step": 5022 }, { "epoch": 0.55, "grad_norm": 0.3832615145840354, "learning_rate": 1.7636535889928775e-05, "loss": 0.0633, "step": 5023 }, { "epoch": 0.55, "grad_norm": 0.2730398283609901, "learning_rate": 1.7629471774410997e-05, "loss": 0.0313, "step": 5024 }, { "epoch": 0.55, "grad_norm": 0.41043388416108534, "learning_rate": 1.762240795882795e-05, "loss": 0.0636, "step": 5025 }, { "epoch": 0.55, "grad_norm": 0.3460301291241678, "learning_rate": 1.7615344444073385e-05, "loss": 0.0551, "step": 5026 }, { "epoch": 0.55, "grad_norm": 0.3388215619683655, "learning_rate": 1.7608281231041023e-05, "loss": 0.0661, "step": 5027 }, { "epoch": 0.55, "grad_norm": 0.3665732460690765, "learning_rate": 1.7601218320624562e-05, "loss": 0.0485, "step": 5028 }, { "epoch": 0.55, "grad_norm": 0.4283326202447252, "learning_rate": 1.7594155713717634e-05, "loss": 0.063, "step": 5029 }, { "epoch": 0.55, "grad_norm": 0.3892199232282073, "learning_rate": 1.7587093411213856e-05, "loss": 0.076, "step": 5030 }, { "epoch": 0.55, "grad_norm": 0.24798691392797867, "learning_rate": 1.758003141400679e-05, "loss": 0.0571, "step": 5031 }, { "epoch": 0.55, "grad_norm": 0.3331656207928089, "learning_rate": 1.7572969722989967e-05, "loss": 0.0553, "step": 5032 }, { "epoch": 0.55, "grad_norm": 0.45809062217281094, "learning_rate": 1.756590833905688e-05, "loss": 0.0901, "step": 5033 }, { "epoch": 0.55, "grad_norm": 0.34441775790379403, "learning_rate": 1.755884726310098e-05, "loss": 0.0656, "step": 5034 }, { "epoch": 0.55, "grad_norm": 0.4552007027109417, "learning_rate": 1.755178649601568e-05, "loss": 0.0816, "step": 5035 }, { "epoch": 0.55, "grad_norm": 0.2590029138802504, "learning_rate": 1.754472603869436e-05, "loss": 0.0542, "step": 5036 }, { "epoch": 0.55, "grad_norm": 0.34422459135497424, "learning_rate": 1.753766589203034e-05, "loss": 0.0445, "step": 5037 }, { "epoch": 0.55, "grad_norm": 0.31101417171289175, "learning_rate": 1.7530606056916935e-05, "loss": 0.0694, "step": 5038 }, { "epoch": 0.55, "grad_norm": 0.27705897903012217, "learning_rate": 1.7523546534247388e-05, "loss": 0.0496, "step": 5039 }, { "epoch": 0.55, "grad_norm": 0.2658328897722623, "learning_rate": 1.751648732491493e-05, "loss": 0.0611, "step": 5040 }, { "epoch": 0.55, "grad_norm": 0.3552021032590606, "learning_rate": 1.750942842981272e-05, "loss": 0.0652, "step": 5041 }, { "epoch": 0.55, "grad_norm": 0.28318241964483964, "learning_rate": 1.7502369849833908e-05, "loss": 0.0483, "step": 5042 }, { "epoch": 0.55, "grad_norm": 0.355538770046406, "learning_rate": 1.7495311585871587e-05, "loss": 0.0601, "step": 5043 }, { "epoch": 0.55, "grad_norm": 0.2597771548817189, "learning_rate": 1.748825363881881e-05, "loss": 0.0416, "step": 5044 }, { "epoch": 0.55, "grad_norm": 0.3367107496616662, "learning_rate": 1.7481196009568606e-05, "loss": 0.0842, "step": 5045 }, { "epoch": 0.55, "grad_norm": 0.32081127875352194, "learning_rate": 1.7474138699013953e-05, "loss": 0.0598, "step": 5046 }, { "epoch": 0.55, "grad_norm": 0.2257297437320428, "learning_rate": 1.7467081708047775e-05, "loss": 0.0467, "step": 5047 }, { "epoch": 0.55, "grad_norm": 0.30081102298173434, "learning_rate": 1.746002503756298e-05, "loss": 0.0499, "step": 5048 }, { "epoch": 0.55, "grad_norm": 0.2902564701382669, "learning_rate": 1.7452968688452418e-05, "loss": 0.0501, "step": 5049 }, { "epoch": 0.55, "grad_norm": 0.2717310655993475, "learning_rate": 1.7445912661608912e-05, "loss": 0.0437, "step": 5050 }, { "epoch": 0.55, "grad_norm": 0.34418557487269436, "learning_rate": 1.743885695792522e-05, "loss": 0.0519, "step": 5051 }, { "epoch": 0.55, "grad_norm": 0.3535292817739349, "learning_rate": 1.7431801578294097e-05, "loss": 0.0736, "step": 5052 }, { "epoch": 0.55, "grad_norm": 0.4060211087022254, "learning_rate": 1.7424746523608218e-05, "loss": 0.0762, "step": 5053 }, { "epoch": 0.56, "grad_norm": 0.3166670842648982, "learning_rate": 1.7417691794760247e-05, "loss": 0.0503, "step": 5054 }, { "epoch": 0.56, "grad_norm": 0.3585281345282278, "learning_rate": 1.7410637392642787e-05, "loss": 0.0554, "step": 5055 }, { "epoch": 0.56, "grad_norm": 0.3892928678553993, "learning_rate": 1.740358331814841e-05, "loss": 0.0568, "step": 5056 }, { "epoch": 0.56, "grad_norm": 0.3609059522308646, "learning_rate": 1.7396529572169642e-05, "loss": 0.0631, "step": 5057 }, { "epoch": 0.56, "grad_norm": 0.3522829092070097, "learning_rate": 1.7389476155598974e-05, "loss": 0.0559, "step": 5058 }, { "epoch": 0.56, "grad_norm": 0.30681902561408875, "learning_rate": 1.738242306932884e-05, "loss": 0.0569, "step": 5059 }, { "epoch": 0.56, "grad_norm": 0.3082430693991221, "learning_rate": 1.7375370314251657e-05, "loss": 0.0491, "step": 5060 }, { "epoch": 0.56, "grad_norm": 0.34872301788617277, "learning_rate": 1.7368317891259767e-05, "loss": 0.075, "step": 5061 }, { "epoch": 0.56, "grad_norm": 0.31598101403973267, "learning_rate": 1.7361265801245504e-05, "loss": 0.054, "step": 5062 }, { "epoch": 0.56, "grad_norm": 0.3369748046962167, "learning_rate": 1.7354214045101146e-05, "loss": 0.0548, "step": 5063 }, { "epoch": 0.56, "grad_norm": 0.2763976225253177, "learning_rate": 1.7347162623718913e-05, "loss": 0.0724, "step": 5064 }, { "epoch": 0.56, "grad_norm": 0.34208588506666104, "learning_rate": 1.7340111537991015e-05, "loss": 0.0628, "step": 5065 }, { "epoch": 0.56, "grad_norm": 0.3640915846769672, "learning_rate": 1.7333060788809582e-05, "loss": 0.044, "step": 5066 }, { "epoch": 0.56, "grad_norm": 0.3125153060979357, "learning_rate": 1.732601037706674e-05, "loss": 0.0394, "step": 5067 }, { "epoch": 0.56, "grad_norm": 0.3355524606650584, "learning_rate": 1.7318960303654534e-05, "loss": 0.0554, "step": 5068 }, { "epoch": 0.56, "grad_norm": 0.3964011006214421, "learning_rate": 1.7311910569465004e-05, "loss": 0.0787, "step": 5069 }, { "epoch": 0.56, "grad_norm": 0.4341759939375533, "learning_rate": 1.7304861175390112e-05, "loss": 0.0751, "step": 5070 }, { "epoch": 0.56, "grad_norm": 0.24737352304080354, "learning_rate": 1.7297812122321805e-05, "loss": 0.0433, "step": 5071 }, { "epoch": 0.56, "grad_norm": 0.34009618262966995, "learning_rate": 1.729076341115197e-05, "loss": 0.0922, "step": 5072 }, { "epoch": 0.56, "grad_norm": 0.2490521881859778, "learning_rate": 1.728371504277246e-05, "loss": 0.0561, "step": 5073 }, { "epoch": 0.56, "grad_norm": 0.3320103807061644, "learning_rate": 1.7276667018075073e-05, "loss": 0.0692, "step": 5074 }, { "epoch": 0.56, "grad_norm": 0.26887293598725337, "learning_rate": 1.726961933795158e-05, "loss": 0.0426, "step": 5075 }, { "epoch": 0.56, "grad_norm": 0.3285078812093594, "learning_rate": 1.726257200329369e-05, "loss": 0.0636, "step": 5076 }, { "epoch": 0.56, "grad_norm": 0.2534061201262015, "learning_rate": 1.7255525014993092e-05, "loss": 0.047, "step": 5077 }, { "epoch": 0.56, "grad_norm": 0.30155677641739015, "learning_rate": 1.72484783739414e-05, "loss": 0.0635, "step": 5078 }, { "epoch": 0.56, "grad_norm": 0.35499942104534166, "learning_rate": 1.7241432081030206e-05, "loss": 0.0609, "step": 5079 }, { "epoch": 0.56, "grad_norm": 0.31672678123796466, "learning_rate": 1.7234386137151067e-05, "loss": 0.0481, "step": 5080 }, { "epoch": 0.56, "grad_norm": 0.24340283876742694, "learning_rate": 1.7227340543195466e-05, "loss": 0.0328, "step": 5081 }, { "epoch": 0.56, "grad_norm": 0.29442590099299537, "learning_rate": 1.7220295300054867e-05, "loss": 0.0644, "step": 5082 }, { "epoch": 0.56, "grad_norm": 0.31362230709724537, "learning_rate": 1.7213250408620674e-05, "loss": 0.0538, "step": 5083 }, { "epoch": 0.56, "grad_norm": 0.34773400326608034, "learning_rate": 1.7206205869784254e-05, "loss": 0.0625, "step": 5084 }, { "epoch": 0.56, "grad_norm": 0.25303537230156536, "learning_rate": 1.7199161684436933e-05, "loss": 0.0529, "step": 5085 }, { "epoch": 0.56, "grad_norm": 0.30992558547525695, "learning_rate": 1.719211785346998e-05, "loss": 0.0391, "step": 5086 }, { "epoch": 0.56, "grad_norm": 0.2615875940351599, "learning_rate": 1.7185074377774633e-05, "loss": 0.0447, "step": 5087 }, { "epoch": 0.56, "grad_norm": 0.4372570691712308, "learning_rate": 1.717803125824207e-05, "loss": 0.0812, "step": 5088 }, { "epoch": 0.56, "grad_norm": 0.2959408997135996, "learning_rate": 1.7170988495763443e-05, "loss": 0.0532, "step": 5089 }, { "epoch": 0.56, "grad_norm": 0.281115897314965, "learning_rate": 1.716394609122984e-05, "loss": 0.0528, "step": 5090 }, { "epoch": 0.56, "grad_norm": 0.32188896361867053, "learning_rate": 1.715690404553232e-05, "loss": 0.0504, "step": 5091 }, { "epoch": 0.56, "grad_norm": 0.21733115063038813, "learning_rate": 1.714986235956188e-05, "loss": 0.0448, "step": 5092 }, { "epoch": 0.56, "grad_norm": 0.36235993984517734, "learning_rate": 1.7142821034209488e-05, "loss": 0.0635, "step": 5093 }, { "epoch": 0.56, "grad_norm": 0.3292087640682664, "learning_rate": 1.713578007036605e-05, "loss": 0.0678, "step": 5094 }, { "epoch": 0.56, "grad_norm": 0.49258449226638445, "learning_rate": 1.7128739468922445e-05, "loss": 0.0901, "step": 5095 }, { "epoch": 0.56, "grad_norm": 0.32377849700143196, "learning_rate": 1.712169923076948e-05, "loss": 0.0585, "step": 5096 }, { "epoch": 0.56, "grad_norm": 0.2785186863713594, "learning_rate": 1.711465935679795e-05, "loss": 0.0538, "step": 5097 }, { "epoch": 0.56, "grad_norm": 0.22405879499120163, "learning_rate": 1.710761984789858e-05, "loss": 0.0422, "step": 5098 }, { "epoch": 0.56, "grad_norm": 0.45103246012829096, "learning_rate": 1.7100580704962048e-05, "loss": 0.1284, "step": 5099 }, { "epoch": 0.56, "grad_norm": 0.37361622846841774, "learning_rate": 1.7093541928879004e-05, "loss": 0.0636, "step": 5100 }, { "epoch": 0.56, "grad_norm": 0.2924586241752771, "learning_rate": 1.7086503520540027e-05, "loss": 0.0509, "step": 5101 }, { "epoch": 0.56, "grad_norm": 0.3704349693558163, "learning_rate": 1.7079465480835677e-05, "loss": 0.0663, "step": 5102 }, { "epoch": 0.56, "grad_norm": 0.24524600986662592, "learning_rate": 1.7072427810656436e-05, "loss": 0.0426, "step": 5103 }, { "epoch": 0.56, "grad_norm": 0.22810341262576617, "learning_rate": 1.7065390510892767e-05, "loss": 0.0539, "step": 5104 }, { "epoch": 0.56, "grad_norm": 0.34417395024447445, "learning_rate": 1.7058353582435075e-05, "loss": 0.0665, "step": 5105 }, { "epoch": 0.56, "grad_norm": 0.4585472622757748, "learning_rate": 1.7051317026173715e-05, "loss": 0.0674, "step": 5106 }, { "epoch": 0.56, "grad_norm": 0.3246709129495116, "learning_rate": 1.7044280842998998e-05, "loss": 0.0651, "step": 5107 }, { "epoch": 0.56, "grad_norm": 0.2828797302634436, "learning_rate": 1.703724503380119e-05, "loss": 0.0435, "step": 5108 }, { "epoch": 0.56, "grad_norm": 0.29581321773284813, "learning_rate": 1.7030209599470505e-05, "loss": 0.0427, "step": 5109 }, { "epoch": 0.56, "grad_norm": 0.30279689266347104, "learning_rate": 1.7023174540897112e-05, "loss": 0.0486, "step": 5110 }, { "epoch": 0.56, "grad_norm": 0.35530447045801083, "learning_rate": 1.7016139858971136e-05, "loss": 0.062, "step": 5111 }, { "epoch": 0.56, "grad_norm": 0.3006020668664625, "learning_rate": 1.7009105554582652e-05, "loss": 0.058, "step": 5112 }, { "epoch": 0.56, "grad_norm": 0.3179507294968168, "learning_rate": 1.7002071628621675e-05, "loss": 0.0577, "step": 5113 }, { "epoch": 0.56, "grad_norm": 0.31721588439969883, "learning_rate": 1.6995038081978193e-05, "loss": 0.0669, "step": 5114 }, { "epoch": 0.56, "grad_norm": 0.2544246955897499, "learning_rate": 1.6988004915542143e-05, "loss": 0.0474, "step": 5115 }, { "epoch": 0.56, "grad_norm": 0.33258999205429257, "learning_rate": 1.6980972130203396e-05, "loss": 0.0505, "step": 5116 }, { "epoch": 0.56, "grad_norm": 0.30920680030220316, "learning_rate": 1.697393972685179e-05, "loss": 0.0484, "step": 5117 }, { "epoch": 0.56, "grad_norm": 0.29906068117710727, "learning_rate": 1.6966907706377103e-05, "loss": 0.0587, "step": 5118 }, { "epoch": 0.56, "grad_norm": 0.3245564620758247, "learning_rate": 1.6959876069669088e-05, "loss": 0.0674, "step": 5119 }, { "epoch": 0.56, "grad_norm": 0.31947071835832147, "learning_rate": 1.695284481761742e-05, "loss": 0.0501, "step": 5120 }, { "epoch": 0.56, "grad_norm": 0.3421745829125585, "learning_rate": 1.6945813951111746e-05, "loss": 0.0643, "step": 5121 }, { "epoch": 0.56, "grad_norm": 0.36055704269868233, "learning_rate": 1.6938783471041647e-05, "loss": 0.0628, "step": 5122 }, { "epoch": 0.56, "grad_norm": 0.27731016788529445, "learning_rate": 1.6931753378296682e-05, "loss": 0.0571, "step": 5123 }, { "epoch": 0.56, "grad_norm": 0.3051614010283815, "learning_rate": 1.692472367376633e-05, "loss": 0.0428, "step": 5124 }, { "epoch": 0.56, "grad_norm": 0.405505525247958, "learning_rate": 1.691769435834004e-05, "loss": 0.1074, "step": 5125 }, { "epoch": 0.56, "grad_norm": 0.4090393116952397, "learning_rate": 1.691066543290721e-05, "loss": 0.0775, "step": 5126 }, { "epoch": 0.56, "grad_norm": 0.26558222145727184, "learning_rate": 1.6903636898357185e-05, "loss": 0.042, "step": 5127 }, { "epoch": 0.56, "grad_norm": 0.25346701851094533, "learning_rate": 1.6896608755579256e-05, "loss": 0.0633, "step": 5128 }, { "epoch": 0.56, "grad_norm": 0.3878416654761144, "learning_rate": 1.688958100546267e-05, "loss": 0.0642, "step": 5129 }, { "epoch": 0.56, "grad_norm": 0.3484256460114992, "learning_rate": 1.6882553648896625e-05, "loss": 0.0562, "step": 5130 }, { "epoch": 0.56, "grad_norm": 0.2652885606387304, "learning_rate": 1.6875526686770277e-05, "loss": 0.0408, "step": 5131 }, { "epoch": 0.56, "grad_norm": 0.28107621863987486, "learning_rate": 1.686850011997271e-05, "loss": 0.0459, "step": 5132 }, { "epoch": 0.56, "grad_norm": 0.3064934201039877, "learning_rate": 1.6861473949392983e-05, "loss": 0.051, "step": 5133 }, { "epoch": 0.56, "grad_norm": 0.30573218320867157, "learning_rate": 1.685444817592008e-05, "loss": 0.0843, "step": 5134 }, { "epoch": 0.56, "grad_norm": 0.3913446473091023, "learning_rate": 1.684742280044296e-05, "loss": 0.0874, "step": 5135 }, { "epoch": 0.56, "grad_norm": 0.3137450093750119, "learning_rate": 1.6840397823850513e-05, "loss": 0.0474, "step": 5136 }, { "epoch": 0.56, "grad_norm": 0.30506975697312927, "learning_rate": 1.683337324703159e-05, "loss": 0.0526, "step": 5137 }, { "epoch": 0.56, "grad_norm": 0.30696955733225617, "learning_rate": 1.6826349070874973e-05, "loss": 0.0483, "step": 5138 }, { "epoch": 0.56, "grad_norm": 0.42842293974038276, "learning_rate": 1.6819325296269426e-05, "loss": 0.0723, "step": 5139 }, { "epoch": 0.56, "grad_norm": 0.2776156825074066, "learning_rate": 1.6812301924103626e-05, "loss": 0.0525, "step": 5140 }, { "epoch": 0.56, "grad_norm": 0.44516781916100806, "learning_rate": 1.6805278955266227e-05, "loss": 0.0612, "step": 5141 }, { "epoch": 0.56, "grad_norm": 0.24392622116309437, "learning_rate": 1.6798256390645816e-05, "loss": 0.0463, "step": 5142 }, { "epoch": 0.56, "grad_norm": 0.35361049496138447, "learning_rate": 1.6791234231130937e-05, "loss": 0.0574, "step": 5143 }, { "epoch": 0.56, "grad_norm": 0.2711724334772591, "learning_rate": 1.6784212477610075e-05, "loss": 0.0446, "step": 5144 }, { "epoch": 0.57, "grad_norm": 0.29333892324297994, "learning_rate": 1.677719113097167e-05, "loss": 0.0356, "step": 5145 }, { "epoch": 0.57, "grad_norm": 0.24203772682108785, "learning_rate": 1.6770170192104107e-05, "loss": 0.0452, "step": 5146 }, { "epoch": 0.57, "grad_norm": 0.29045960734621556, "learning_rate": 1.676314966189573e-05, "loss": 0.0535, "step": 5147 }, { "epoch": 0.57, "grad_norm": 0.23727784181556422, "learning_rate": 1.67561295412348e-05, "loss": 0.0537, "step": 5148 }, { "epoch": 0.57, "grad_norm": 0.2979947855816416, "learning_rate": 1.6749109831009574e-05, "loss": 0.0528, "step": 5149 }, { "epoch": 0.57, "grad_norm": 0.3568537631956328, "learning_rate": 1.6742090532108228e-05, "loss": 0.0446, "step": 5150 }, { "epoch": 0.57, "grad_norm": 0.30903699887313796, "learning_rate": 1.6735071645418874e-05, "loss": 0.0504, "step": 5151 }, { "epoch": 0.57, "grad_norm": 0.4041219876687711, "learning_rate": 1.6728053171829603e-05, "loss": 0.0789, "step": 5152 }, { "epoch": 0.57, "grad_norm": 0.4368678191690414, "learning_rate": 1.6721035112228427e-05, "loss": 0.0816, "step": 5153 }, { "epoch": 0.57, "grad_norm": 0.6513108396392764, "learning_rate": 1.6714017467503328e-05, "loss": 0.1534, "step": 5154 }, { "epoch": 0.57, "grad_norm": 0.28480162844272705, "learning_rate": 1.6707000238542213e-05, "loss": 0.0395, "step": 5155 }, { "epoch": 0.57, "grad_norm": 0.4079935731498508, "learning_rate": 1.6699983426232955e-05, "loss": 0.0733, "step": 5156 }, { "epoch": 0.57, "grad_norm": 0.2851944205459965, "learning_rate": 1.669296703146336e-05, "loss": 0.0497, "step": 5157 }, { "epoch": 0.57, "grad_norm": 0.4196943154424278, "learning_rate": 1.6685951055121203e-05, "loss": 0.0627, "step": 5158 }, { "epoch": 0.57, "grad_norm": 0.37183717246761716, "learning_rate": 1.667893549809417e-05, "loss": 0.0782, "step": 5159 }, { "epoch": 0.57, "grad_norm": 0.26008156514893405, "learning_rate": 1.667192036126993e-05, "loss": 0.0414, "step": 5160 }, { "epoch": 0.57, "grad_norm": 0.25079922775050034, "learning_rate": 1.666490564553608e-05, "loss": 0.0392, "step": 5161 }, { "epoch": 0.57, "grad_norm": 0.22007195347266878, "learning_rate": 1.665789135178017e-05, "loss": 0.0436, "step": 5162 }, { "epoch": 0.57, "grad_norm": 0.39780031004251604, "learning_rate": 1.6650877480889685e-05, "loss": 0.0712, "step": 5163 }, { "epoch": 0.57, "grad_norm": 0.2112713485432251, "learning_rate": 1.664386403375208e-05, "loss": 0.0361, "step": 5164 }, { "epoch": 0.57, "grad_norm": 0.2686055915445793, "learning_rate": 1.6636851011254722e-05, "loss": 0.0449, "step": 5165 }, { "epoch": 0.57, "grad_norm": 0.29202897978706954, "learning_rate": 1.6629838414284972e-05, "loss": 0.0637, "step": 5166 }, { "epoch": 0.57, "grad_norm": 0.3112071234113945, "learning_rate": 1.662282624373009e-05, "loss": 0.0527, "step": 5167 }, { "epoch": 0.57, "grad_norm": 0.2880934762776518, "learning_rate": 1.6615814500477307e-05, "loss": 0.0452, "step": 5168 }, { "epoch": 0.57, "grad_norm": 0.2925294252413495, "learning_rate": 1.6608803185413792e-05, "loss": 0.0417, "step": 5169 }, { "epoch": 0.57, "grad_norm": 0.320840679224972, "learning_rate": 1.6601792299426668e-05, "loss": 0.0546, "step": 5170 }, { "epoch": 0.57, "grad_norm": 0.30981612449297374, "learning_rate": 1.659478184340299e-05, "loss": 0.051, "step": 5171 }, { "epoch": 0.57, "grad_norm": 0.30845845552273965, "learning_rate": 1.658777181822978e-05, "loss": 0.0573, "step": 5172 }, { "epoch": 0.57, "grad_norm": 0.315106666439429, "learning_rate": 1.6580762224793977e-05, "loss": 0.0407, "step": 5173 }, { "epoch": 0.57, "grad_norm": 0.29445459597759877, "learning_rate": 1.6573753063982492e-05, "loss": 0.054, "step": 5174 }, { "epoch": 0.57, "grad_norm": 0.42009000739045305, "learning_rate": 1.6566744336682158e-05, "loss": 0.0932, "step": 5175 }, { "epoch": 0.57, "grad_norm": 0.32205595651882796, "learning_rate": 1.655973604377978e-05, "loss": 0.0587, "step": 5176 }, { "epoch": 0.57, "grad_norm": 0.27161409860179414, "learning_rate": 1.655272818616208e-05, "loss": 0.0381, "step": 5177 }, { "epoch": 0.57, "grad_norm": 0.37719153736295646, "learning_rate": 1.6545720764715746e-05, "loss": 0.058, "step": 5178 }, { "epoch": 0.57, "grad_norm": 0.36650687912005386, "learning_rate": 1.6538713780327395e-05, "loss": 0.0565, "step": 5179 }, { "epoch": 0.57, "grad_norm": 0.2800599990150764, "learning_rate": 1.6531707233883607e-05, "loss": 0.0527, "step": 5180 }, { "epoch": 0.57, "grad_norm": 0.34092566493233095, "learning_rate": 1.652470112627089e-05, "loss": 0.063, "step": 5181 }, { "epoch": 0.57, "grad_norm": 0.38688405674947124, "learning_rate": 1.651769545837569e-05, "loss": 0.0901, "step": 5182 }, { "epoch": 0.57, "grad_norm": 0.2778816648105283, "learning_rate": 1.6510690231084436e-05, "loss": 0.0454, "step": 5183 }, { "epoch": 0.57, "grad_norm": 0.31072037126934776, "learning_rate": 1.650368544528346e-05, "loss": 0.0358, "step": 5184 }, { "epoch": 0.57, "grad_norm": 0.2944523187000291, "learning_rate": 1.6496681101859055e-05, "loss": 0.0516, "step": 5185 }, { "epoch": 0.57, "grad_norm": 0.22673928006823216, "learning_rate": 1.6489677201697453e-05, "loss": 0.0329, "step": 5186 }, { "epoch": 0.57, "grad_norm": 0.37288048913248606, "learning_rate": 1.6482673745684842e-05, "loss": 0.0786, "step": 5187 }, { "epoch": 0.57, "grad_norm": 0.2682297700424568, "learning_rate": 1.6475670734707336e-05, "loss": 0.0485, "step": 5188 }, { "epoch": 0.57, "grad_norm": 0.30229954408695797, "learning_rate": 1.6468668169651012e-05, "loss": 0.0493, "step": 5189 }, { "epoch": 0.57, "grad_norm": 0.33750033025372206, "learning_rate": 1.6461666051401865e-05, "loss": 0.0475, "step": 5190 }, { "epoch": 0.57, "grad_norm": 0.3172191726205247, "learning_rate": 1.6454664380845862e-05, "loss": 0.0368, "step": 5191 }, { "epoch": 0.57, "grad_norm": 0.3362170320607557, "learning_rate": 1.6447663158868897e-05, "loss": 0.061, "step": 5192 }, { "epoch": 0.57, "grad_norm": 0.3913102053063349, "learning_rate": 1.644066238635681e-05, "loss": 0.0719, "step": 5193 }, { "epoch": 0.57, "grad_norm": 0.2654376210535113, "learning_rate": 1.6433662064195378e-05, "loss": 0.0334, "step": 5194 }, { "epoch": 0.57, "grad_norm": 0.34168859674079904, "learning_rate": 1.6426662193270336e-05, "loss": 0.0631, "step": 5195 }, { "epoch": 0.57, "grad_norm": 0.5171339036074991, "learning_rate": 1.641966277446735e-05, "loss": 0.1306, "step": 5196 }, { "epoch": 0.57, "grad_norm": 0.2924594393957121, "learning_rate": 1.6412663808672036e-05, "loss": 0.037, "step": 5197 }, { "epoch": 0.57, "grad_norm": 0.2529918638417605, "learning_rate": 1.6405665296769942e-05, "loss": 0.0448, "step": 5198 }, { "epoch": 0.57, "grad_norm": 0.46661355796395015, "learning_rate": 1.6398667239646565e-05, "loss": 0.0877, "step": 5199 }, { "epoch": 0.57, "grad_norm": 0.30756346218281877, "learning_rate": 1.6391669638187355e-05, "loss": 0.0575, "step": 5200 }, { "epoch": 0.57, "grad_norm": 0.30400892646288813, "learning_rate": 1.6384672493277686e-05, "loss": 0.0449, "step": 5201 }, { "epoch": 0.57, "grad_norm": 0.23404778049720912, "learning_rate": 1.6377675805802882e-05, "loss": 0.0515, "step": 5202 }, { "epoch": 0.57, "grad_norm": 0.35008754742232134, "learning_rate": 1.637067957664822e-05, "loss": 0.0566, "step": 5203 }, { "epoch": 0.57, "grad_norm": 0.24880728222613388, "learning_rate": 1.6363683806698896e-05, "loss": 0.0511, "step": 5204 }, { "epoch": 0.57, "grad_norm": 0.3532039680631212, "learning_rate": 1.635668849684007e-05, "loss": 0.0772, "step": 5205 }, { "epoch": 0.57, "grad_norm": 0.27099999777241385, "learning_rate": 1.6349693647956824e-05, "loss": 0.0651, "step": 5206 }, { "epoch": 0.57, "grad_norm": 0.3232851028765911, "learning_rate": 1.6342699260934204e-05, "loss": 0.0569, "step": 5207 }, { "epoch": 0.57, "grad_norm": 0.3277576524654408, "learning_rate": 1.6335705336657176e-05, "loss": 0.0578, "step": 5208 }, { "epoch": 0.57, "grad_norm": 0.2576006249923336, "learning_rate": 1.6328711876010664e-05, "loss": 0.0472, "step": 5209 }, { "epoch": 0.57, "grad_norm": 0.3178874771888565, "learning_rate": 1.632171887987952e-05, "loss": 0.057, "step": 5210 }, { "epoch": 0.57, "grad_norm": 0.2348889200091216, "learning_rate": 1.631472634914855e-05, "loss": 0.0359, "step": 5211 }, { "epoch": 0.57, "grad_norm": 0.3040923272153393, "learning_rate": 1.6307734284702484e-05, "loss": 0.0711, "step": 5212 }, { "epoch": 0.57, "grad_norm": 0.31607798042503377, "learning_rate": 1.6300742687426017e-05, "loss": 0.0595, "step": 5213 }, { "epoch": 0.57, "grad_norm": 0.3404649887997634, "learning_rate": 1.6293751558203764e-05, "loss": 0.0446, "step": 5214 }, { "epoch": 0.57, "grad_norm": 0.27981268995260267, "learning_rate": 1.628676089792029e-05, "loss": 0.0532, "step": 5215 }, { "epoch": 0.57, "grad_norm": 0.33296990947977984, "learning_rate": 1.6279770707460096e-05, "loss": 0.0532, "step": 5216 }, { "epoch": 0.57, "grad_norm": 0.22673408817906404, "learning_rate": 1.6272780987707623e-05, "loss": 0.0375, "step": 5217 }, { "epoch": 0.57, "grad_norm": 0.30006792273107746, "learning_rate": 1.6265791739547276e-05, "loss": 0.0613, "step": 5218 }, { "epoch": 0.57, "grad_norm": 0.31690902559257167, "learning_rate": 1.625880296386336e-05, "loss": 0.0374, "step": 5219 }, { "epoch": 0.57, "grad_norm": 0.29312967331257844, "learning_rate": 1.625181466154015e-05, "loss": 0.0538, "step": 5220 }, { "epoch": 0.57, "grad_norm": 0.3474199156108664, "learning_rate": 1.6244826833461848e-05, "loss": 0.0535, "step": 5221 }, { "epoch": 0.57, "grad_norm": 0.3906577859471026, "learning_rate": 1.62378394805126e-05, "loss": 0.061, "step": 5222 }, { "epoch": 0.57, "grad_norm": 0.3951905850373451, "learning_rate": 1.6230852603576494e-05, "loss": 0.0654, "step": 5223 }, { "epoch": 0.57, "grad_norm": 0.2920592878423403, "learning_rate": 1.6223866203537558e-05, "loss": 0.0481, "step": 5224 }, { "epoch": 0.57, "grad_norm": 0.3050223735513972, "learning_rate": 1.6216880281279746e-05, "loss": 0.0493, "step": 5225 }, { "epoch": 0.57, "grad_norm": 0.3949634714981901, "learning_rate": 1.6209894837686974e-05, "loss": 0.0628, "step": 5226 }, { "epoch": 0.57, "grad_norm": 0.27627084623194414, "learning_rate": 1.6202909873643075e-05, "loss": 0.0425, "step": 5227 }, { "epoch": 0.57, "grad_norm": 0.3592032353791254, "learning_rate": 1.6195925390031845e-05, "loss": 0.0579, "step": 5228 }, { "epoch": 0.57, "grad_norm": 0.35142157167147936, "learning_rate": 1.6188941387736994e-05, "loss": 0.0521, "step": 5229 }, { "epoch": 0.57, "grad_norm": 0.2822674388389052, "learning_rate": 1.61819578676422e-05, "loss": 0.0343, "step": 5230 }, { "epoch": 0.57, "grad_norm": 0.33143301532436864, "learning_rate": 1.6174974830631044e-05, "loss": 0.0644, "step": 5231 }, { "epoch": 0.57, "grad_norm": 0.25228726608151364, "learning_rate": 1.616799227758708e-05, "loss": 0.0472, "step": 5232 }, { "epoch": 0.57, "grad_norm": 0.2882526884946151, "learning_rate": 1.6161010209393775e-05, "loss": 0.0529, "step": 5233 }, { "epoch": 0.57, "grad_norm": 0.26813759058399117, "learning_rate": 1.6154028626934548e-05, "loss": 0.0329, "step": 5234 }, { "epoch": 0.57, "grad_norm": 0.261741637182857, "learning_rate": 1.6147047531092767e-05, "loss": 0.0421, "step": 5235 }, { "epoch": 0.58, "grad_norm": 0.38538763435234974, "learning_rate": 1.6140066922751715e-05, "loss": 0.0776, "step": 5236 }, { "epoch": 0.58, "grad_norm": 0.2920774507587756, "learning_rate": 1.6133086802794627e-05, "loss": 0.0462, "step": 5237 }, { "epoch": 0.58, "grad_norm": 0.3395953531224787, "learning_rate": 1.612610717210467e-05, "loss": 0.0755, "step": 5238 }, { "epoch": 0.58, "grad_norm": 0.2589390302175967, "learning_rate": 1.6119128031564963e-05, "loss": 0.0582, "step": 5239 }, { "epoch": 0.58, "grad_norm": 0.3004705012841251, "learning_rate": 1.611214938205854e-05, "loss": 0.0454, "step": 5240 }, { "epoch": 0.58, "grad_norm": 0.23203440834233513, "learning_rate": 1.6105171224468393e-05, "loss": 0.0463, "step": 5241 }, { "epoch": 0.58, "grad_norm": 0.33325620096594455, "learning_rate": 1.609819355967744e-05, "loss": 0.0455, "step": 5242 }, { "epoch": 0.58, "grad_norm": 0.40789007201830385, "learning_rate": 1.6091216388568534e-05, "loss": 0.0724, "step": 5243 }, { "epoch": 0.58, "grad_norm": 0.25309588888922263, "learning_rate": 1.6084239712024492e-05, "loss": 0.042, "step": 5244 }, { "epoch": 0.58, "grad_norm": 0.37112516972893506, "learning_rate": 1.6077263530928032e-05, "loss": 0.0652, "step": 5245 }, { "epoch": 0.58, "grad_norm": 0.29071635020640363, "learning_rate": 1.6070287846161834e-05, "loss": 0.0439, "step": 5246 }, { "epoch": 0.58, "grad_norm": 0.29490018600643625, "learning_rate": 1.6063312658608498e-05, "loss": 0.0464, "step": 5247 }, { "epoch": 0.58, "grad_norm": 0.34270070056860374, "learning_rate": 1.6056337969150584e-05, "loss": 0.0442, "step": 5248 }, { "epoch": 0.58, "grad_norm": 0.27152861090681607, "learning_rate": 1.6049363778670563e-05, "loss": 0.0548, "step": 5249 }, { "epoch": 0.58, "grad_norm": 0.31559746593822635, "learning_rate": 1.6042390088050864e-05, "loss": 0.0564, "step": 5250 }, { "epoch": 0.58, "grad_norm": 0.5388626059724194, "learning_rate": 1.603541689817383e-05, "loss": 0.0977, "step": 5251 }, { "epoch": 0.58, "grad_norm": 0.2396804968446206, "learning_rate": 1.6028444209921775e-05, "loss": 0.0301, "step": 5252 }, { "epoch": 0.58, "grad_norm": 0.23117198981475298, "learning_rate": 1.6021472024176915e-05, "loss": 0.034, "step": 5253 }, { "epoch": 0.58, "grad_norm": 0.29093740039172244, "learning_rate": 1.601450034182142e-05, "loss": 0.0503, "step": 5254 }, { "epoch": 0.58, "grad_norm": 0.30495534030512106, "learning_rate": 1.60075291637374e-05, "loss": 0.0608, "step": 5255 }, { "epoch": 0.58, "grad_norm": 0.2548163754924262, "learning_rate": 1.6000558490806877e-05, "loss": 0.0618, "step": 5256 }, { "epoch": 0.58, "grad_norm": 0.2636368763619328, "learning_rate": 1.5993588323911843e-05, "loss": 0.0368, "step": 5257 }, { "epoch": 0.58, "grad_norm": 0.22409755608120996, "learning_rate": 1.59866186639342e-05, "loss": 0.0407, "step": 5258 }, { "epoch": 0.58, "grad_norm": 0.25858331814840774, "learning_rate": 1.5979649511755798e-05, "loss": 0.0434, "step": 5259 }, { "epoch": 0.58, "grad_norm": 0.3853648597677454, "learning_rate": 1.597268086825842e-05, "loss": 0.0699, "step": 5260 }, { "epoch": 0.58, "grad_norm": 0.3070833812262413, "learning_rate": 1.596571273432378e-05, "loss": 0.0516, "step": 5261 }, { "epoch": 0.58, "grad_norm": 0.3222657174774962, "learning_rate": 1.5958745110833536e-05, "loss": 0.0626, "step": 5262 }, { "epoch": 0.58, "grad_norm": 0.2776293425866597, "learning_rate": 1.595177799866928e-05, "loss": 0.0495, "step": 5263 }, { "epoch": 0.58, "grad_norm": 0.37201421920585737, "learning_rate": 1.5944811398712527e-05, "loss": 0.0739, "step": 5264 }, { "epoch": 0.58, "grad_norm": 0.40617995391870426, "learning_rate": 1.593784531184475e-05, "loss": 0.0821, "step": 5265 }, { "epoch": 0.58, "grad_norm": 0.25384002702258873, "learning_rate": 1.5930879738947328e-05, "loss": 0.0513, "step": 5266 }, { "epoch": 0.58, "grad_norm": 0.2683584154936449, "learning_rate": 1.5923914680901607e-05, "loss": 0.0699, "step": 5267 }, { "epoch": 0.58, "grad_norm": 0.3641223399873448, "learning_rate": 1.5916950138588834e-05, "loss": 0.0544, "step": 5268 }, { "epoch": 0.58, "grad_norm": 0.29739697896510703, "learning_rate": 1.590998611289022e-05, "loss": 0.0585, "step": 5269 }, { "epoch": 0.58, "grad_norm": 0.2491987097763603, "learning_rate": 1.5903022604686908e-05, "loss": 0.0509, "step": 5270 }, { "epoch": 0.58, "grad_norm": 0.28130431445505416, "learning_rate": 1.5896059614859946e-05, "loss": 0.0435, "step": 5271 }, { "epoch": 0.58, "grad_norm": 0.28450910738696344, "learning_rate": 1.5889097144290357e-05, "loss": 0.049, "step": 5272 }, { "epoch": 0.58, "grad_norm": 0.2610857125762479, "learning_rate": 1.588213519385906e-05, "loss": 0.0574, "step": 5273 }, { "epoch": 0.58, "grad_norm": 0.29081575839727775, "learning_rate": 1.587517376444694e-05, "loss": 0.0536, "step": 5274 }, { "epoch": 0.58, "grad_norm": 0.368844049389136, "learning_rate": 1.5868212856934794e-05, "loss": 0.0665, "step": 5275 }, { "epoch": 0.58, "grad_norm": 0.24836798781070157, "learning_rate": 1.5861252472203367e-05, "loss": 0.0401, "step": 5276 }, { "epoch": 0.58, "grad_norm": 0.34309871704942396, "learning_rate": 1.5854292611133326e-05, "loss": 0.0694, "step": 5277 }, { "epoch": 0.58, "grad_norm": 0.2994736574191442, "learning_rate": 1.5847333274605286e-05, "loss": 0.045, "step": 5278 }, { "epoch": 0.58, "grad_norm": 0.37943419810030143, "learning_rate": 1.5840374463499784e-05, "loss": 0.0826, "step": 5279 }, { "epoch": 0.58, "grad_norm": 0.4250557869873433, "learning_rate": 1.5833416178697298e-05, "loss": 0.0943, "step": 5280 }, { "epoch": 0.58, "grad_norm": 0.26947392669575115, "learning_rate": 1.5826458421078225e-05, "loss": 0.034, "step": 5281 }, { "epoch": 0.58, "grad_norm": 0.33450566706682255, "learning_rate": 1.5819501191522917e-05, "loss": 0.0567, "step": 5282 }, { "epoch": 0.58, "grad_norm": 0.26646218481752504, "learning_rate": 1.581254449091164e-05, "loss": 0.042, "step": 5283 }, { "epoch": 0.58, "grad_norm": 0.35952295491887815, "learning_rate": 1.5805588320124607e-05, "loss": 0.0549, "step": 5284 }, { "epoch": 0.58, "grad_norm": 0.30926456455127344, "learning_rate": 1.579863268004196e-05, "loss": 0.066, "step": 5285 }, { "epoch": 0.58, "grad_norm": 0.3250968944555532, "learning_rate": 1.5791677571543762e-05, "loss": 0.058, "step": 5286 }, { "epoch": 0.58, "grad_norm": 0.39262961054769485, "learning_rate": 1.5784722995510025e-05, "loss": 0.0876, "step": 5287 }, { "epoch": 0.58, "grad_norm": 0.3643003546286613, "learning_rate": 1.5777768952820697e-05, "loss": 0.07, "step": 5288 }, { "epoch": 0.58, "grad_norm": 0.3646054692587407, "learning_rate": 1.5770815444355635e-05, "loss": 0.0853, "step": 5289 }, { "epoch": 0.58, "grad_norm": 0.3955555451399972, "learning_rate": 1.576386247099465e-05, "loss": 0.0453, "step": 5290 }, { "epoch": 0.58, "grad_norm": 0.37631812495937944, "learning_rate": 1.575691003361747e-05, "loss": 0.0757, "step": 5291 }, { "epoch": 0.58, "grad_norm": 0.3075492178287895, "learning_rate": 1.5749958133103772e-05, "loss": 0.0761, "step": 5292 }, { "epoch": 0.58, "grad_norm": 0.28585927341372847, "learning_rate": 1.574300677033315e-05, "loss": 0.0362, "step": 5293 }, { "epoch": 0.58, "grad_norm": 0.26823545035004864, "learning_rate": 1.5736055946185137e-05, "loss": 0.0534, "step": 5294 }, { "epoch": 0.58, "grad_norm": 0.3189400835420362, "learning_rate": 1.5729105661539198e-05, "loss": 0.0563, "step": 5295 }, { "epoch": 0.58, "grad_norm": 0.30321555505567427, "learning_rate": 1.572215591727473e-05, "loss": 0.0445, "step": 5296 }, { "epoch": 0.58, "grad_norm": 0.32368159320742906, "learning_rate": 1.571520671427105e-05, "loss": 0.0744, "step": 5297 }, { "epoch": 0.58, "grad_norm": 0.24333355063979578, "learning_rate": 1.570825805340743e-05, "loss": 0.0329, "step": 5298 }, { "epoch": 0.58, "grad_norm": 0.2896535218320554, "learning_rate": 1.5701309935563055e-05, "loss": 0.051, "step": 5299 }, { "epoch": 0.58, "grad_norm": 0.33976743652464303, "learning_rate": 1.5694362361617043e-05, "loss": 0.0603, "step": 5300 }, { "epoch": 0.58, "grad_norm": 0.35174937050118477, "learning_rate": 1.5687415332448447e-05, "loss": 0.0591, "step": 5301 }, { "epoch": 0.58, "grad_norm": 0.34926770739697627, "learning_rate": 1.568046884893626e-05, "loss": 0.08, "step": 5302 }, { "epoch": 0.58, "grad_norm": 0.3600447674572764, "learning_rate": 1.5673522911959376e-05, "loss": 0.0609, "step": 5303 }, { "epoch": 0.58, "grad_norm": 0.3149343445451943, "learning_rate": 1.5666577522396658e-05, "loss": 0.0616, "step": 5304 }, { "epoch": 0.58, "grad_norm": 0.34828043207249665, "learning_rate": 1.5659632681126883e-05, "loss": 0.046, "step": 5305 }, { "epoch": 0.58, "grad_norm": 0.3265256276829038, "learning_rate": 1.565268838902875e-05, "loss": 0.0576, "step": 5306 }, { "epoch": 0.58, "grad_norm": 0.3645142785024197, "learning_rate": 1.5645744646980898e-05, "loss": 0.0704, "step": 5307 }, { "epoch": 0.58, "grad_norm": 0.3109067115684409, "learning_rate": 1.5638801455861893e-05, "loss": 0.047, "step": 5308 }, { "epoch": 0.58, "grad_norm": 0.29999388400439836, "learning_rate": 1.5631858816550238e-05, "loss": 0.0514, "step": 5309 }, { "epoch": 0.58, "grad_norm": 0.2832519752201255, "learning_rate": 1.5624916729924354e-05, "loss": 0.0596, "step": 5310 }, { "epoch": 0.58, "grad_norm": 0.2857402804057479, "learning_rate": 1.5617975196862607e-05, "loss": 0.0326, "step": 5311 }, { "epoch": 0.58, "grad_norm": 0.275001616364844, "learning_rate": 1.561103421824328e-05, "loss": 0.0439, "step": 5312 }, { "epoch": 0.58, "grad_norm": 0.4556139137217038, "learning_rate": 1.5604093794944595e-05, "loss": 0.0762, "step": 5313 }, { "epoch": 0.58, "grad_norm": 0.2608782238361229, "learning_rate": 1.5597153927844693e-05, "loss": 0.0417, "step": 5314 }, { "epoch": 0.58, "grad_norm": 0.37030109275277534, "learning_rate": 1.5590214617821658e-05, "loss": 0.0545, "step": 5315 }, { "epoch": 0.58, "grad_norm": 0.4052198259673694, "learning_rate": 1.5583275865753492e-05, "loss": 0.0676, "step": 5316 }, { "epoch": 0.58, "grad_norm": 0.32192826385890533, "learning_rate": 1.5576337672518136e-05, "loss": 0.0579, "step": 5317 }, { "epoch": 0.58, "grad_norm": 0.4002354949393418, "learning_rate": 1.556940003899345e-05, "loss": 0.0519, "step": 5318 }, { "epoch": 0.58, "grad_norm": 0.39395280489513107, "learning_rate": 1.5562462966057236e-05, "loss": 0.0839, "step": 5319 }, { "epoch": 0.58, "grad_norm": 0.5002689234420536, "learning_rate": 1.55555264545872e-05, "loss": 0.0838, "step": 5320 }, { "epoch": 0.58, "grad_norm": 0.34034321792584765, "learning_rate": 1.554859050546102e-05, "loss": 0.0413, "step": 5321 }, { "epoch": 0.58, "grad_norm": 0.3024851603848303, "learning_rate": 1.5541655119556262e-05, "loss": 0.0525, "step": 5322 }, { "epoch": 0.58, "grad_norm": 0.34086222787100784, "learning_rate": 1.5534720297750448e-05, "loss": 0.0461, "step": 5323 }, { "epoch": 0.58, "grad_norm": 0.3072771039303064, "learning_rate": 1.5527786040921e-05, "loss": 0.0542, "step": 5324 }, { "epoch": 0.58, "grad_norm": 0.20857435629195625, "learning_rate": 1.5520852349945304e-05, "loss": 0.0261, "step": 5325 }, { "epoch": 0.58, "grad_norm": 0.3035151595771196, "learning_rate": 1.551391922570064e-05, "loss": 0.0477, "step": 5326 }, { "epoch": 0.58, "grad_norm": 0.26215624894685435, "learning_rate": 1.5506986669064245e-05, "loss": 0.0563, "step": 5327 }, { "epoch": 0.59, "grad_norm": 0.4793901509721871, "learning_rate": 1.550005468091326e-05, "loss": 0.105, "step": 5328 }, { "epoch": 0.59, "grad_norm": 0.29288639182559645, "learning_rate": 1.5493123262124773e-05, "loss": 0.0307, "step": 5329 }, { "epoch": 0.59, "grad_norm": 0.2710120668944695, "learning_rate": 1.548619241357579e-05, "loss": 0.0478, "step": 5330 }, { "epoch": 0.59, "grad_norm": 0.3193665469370426, "learning_rate": 1.547926213614325e-05, "loss": 0.0564, "step": 5331 }, { "epoch": 0.59, "grad_norm": 0.3998424778657557, "learning_rate": 1.5472332430704007e-05, "loss": 0.0749, "step": 5332 }, { "epoch": 0.59, "grad_norm": 0.2829291629957567, "learning_rate": 1.546540329813487e-05, "loss": 0.0536, "step": 5333 }, { "epoch": 0.59, "grad_norm": 0.24323200517559682, "learning_rate": 1.545847473931254e-05, "loss": 0.0424, "step": 5334 }, { "epoch": 0.59, "grad_norm": 0.33317123385688235, "learning_rate": 1.5451546755113676e-05, "loss": 0.064, "step": 5335 }, { "epoch": 0.59, "grad_norm": 2.9349120082370015, "learning_rate": 1.5444619346414845e-05, "loss": 0.1725, "step": 5336 }, { "epoch": 0.59, "grad_norm": 0.3779642843335106, "learning_rate": 1.5437692514092547e-05, "loss": 0.0601, "step": 5337 }, { "epoch": 0.59, "grad_norm": 0.2697827920718282, "learning_rate": 1.543076625902322e-05, "loss": 0.0514, "step": 5338 }, { "epoch": 0.59, "grad_norm": 0.2684082464280908, "learning_rate": 1.542384058208321e-05, "loss": 0.0506, "step": 5339 }, { "epoch": 0.59, "grad_norm": 0.31119893551371064, "learning_rate": 1.5416915484148805e-05, "loss": 0.0566, "step": 5340 }, { "epoch": 0.59, "grad_norm": 0.33570136043376897, "learning_rate": 1.5409990966096207e-05, "loss": 0.0523, "step": 5341 }, { "epoch": 0.59, "grad_norm": 0.3699570160783724, "learning_rate": 1.5403067028801558e-05, "loss": 0.0723, "step": 5342 }, { "epoch": 0.59, "grad_norm": 0.28041338998505977, "learning_rate": 1.5396143673140913e-05, "loss": 0.0418, "step": 5343 }, { "epoch": 0.59, "grad_norm": 0.3013494129093857, "learning_rate": 1.5389220899990267e-05, "loss": 0.0788, "step": 5344 }, { "epoch": 0.59, "grad_norm": 0.28129126987722175, "learning_rate": 1.5382298710225527e-05, "loss": 0.0571, "step": 5345 }, { "epoch": 0.59, "grad_norm": 0.30394044404248366, "learning_rate": 1.5375377104722545e-05, "loss": 0.0554, "step": 5346 }, { "epoch": 0.59, "grad_norm": 0.3846943203455503, "learning_rate": 1.536845608435707e-05, "loss": 0.0709, "step": 5347 }, { "epoch": 0.59, "grad_norm": 0.30555999983219, "learning_rate": 1.5361535650004818e-05, "loss": 0.0592, "step": 5348 }, { "epoch": 0.59, "grad_norm": 0.26164002444362655, "learning_rate": 1.5354615802541388e-05, "loss": 0.0478, "step": 5349 }, { "epoch": 0.59, "grad_norm": 0.3175599528200956, "learning_rate": 1.5347696542842333e-05, "loss": 0.0479, "step": 5350 }, { "epoch": 0.59, "grad_norm": 0.2580397500218949, "learning_rate": 1.534077787178312e-05, "loss": 0.0513, "step": 5351 }, { "epoch": 0.59, "grad_norm": 0.2770502525434804, "learning_rate": 1.5333859790239148e-05, "loss": 0.0484, "step": 5352 }, { "epoch": 0.59, "grad_norm": 0.28066084666789004, "learning_rate": 1.532694229908573e-05, "loss": 0.0503, "step": 5353 }, { "epoch": 0.59, "grad_norm": 0.24928148400863714, "learning_rate": 1.5320025399198125e-05, "loss": 0.0554, "step": 5354 }, { "epoch": 0.59, "grad_norm": 0.4968713688267873, "learning_rate": 1.5313109091451486e-05, "loss": 0.0901, "step": 5355 }, { "epoch": 0.59, "grad_norm": 0.36992235516010447, "learning_rate": 1.530619337672093e-05, "loss": 0.0643, "step": 5356 }, { "epoch": 0.59, "grad_norm": 0.29671074690227967, "learning_rate": 1.5299278255881468e-05, "loss": 0.0486, "step": 5357 }, { "epoch": 0.59, "grad_norm": 0.2882430084833048, "learning_rate": 1.5292363729808048e-05, "loss": 0.0474, "step": 5358 }, { "epoch": 0.59, "grad_norm": 0.3683207858797972, "learning_rate": 1.528544979937554e-05, "loss": 0.0557, "step": 5359 }, { "epoch": 0.59, "grad_norm": 0.28178968572157664, "learning_rate": 1.5278536465458738e-05, "loss": 0.076, "step": 5360 }, { "epoch": 0.59, "grad_norm": 0.34130176106218507, "learning_rate": 1.5271623728932364e-05, "loss": 0.0614, "step": 5361 }, { "epoch": 0.59, "grad_norm": 0.35080257512374047, "learning_rate": 1.5264711590671067e-05, "loss": 0.0737, "step": 5362 }, { "epoch": 0.59, "grad_norm": 0.3636087561552876, "learning_rate": 1.525780005154941e-05, "loss": 0.0635, "step": 5363 }, { "epoch": 0.59, "grad_norm": 0.3003510457765101, "learning_rate": 1.5250889112441889e-05, "loss": 0.0624, "step": 5364 }, { "epoch": 0.59, "grad_norm": 0.25083123537835894, "learning_rate": 1.5243978774222918e-05, "loss": 0.0392, "step": 5365 }, { "epoch": 0.59, "grad_norm": 0.2769702760795757, "learning_rate": 1.5237069037766843e-05, "loss": 0.0512, "step": 5366 }, { "epoch": 0.59, "grad_norm": 0.3563446738948619, "learning_rate": 1.5230159903947923e-05, "loss": 0.0614, "step": 5367 }, { "epoch": 0.59, "grad_norm": 0.25011045280944494, "learning_rate": 1.5223251373640354e-05, "loss": 0.0385, "step": 5368 }, { "epoch": 0.59, "grad_norm": 0.3033275330734785, "learning_rate": 1.5216343447718242e-05, "loss": 0.0536, "step": 5369 }, { "epoch": 0.59, "grad_norm": 0.2697809417291579, "learning_rate": 1.5209436127055627e-05, "loss": 0.0354, "step": 5370 }, { "epoch": 0.59, "grad_norm": 0.33350597942216426, "learning_rate": 1.5202529412526465e-05, "loss": 0.0634, "step": 5371 }, { "epoch": 0.59, "grad_norm": 0.3210924584764184, "learning_rate": 1.5195623305004637e-05, "loss": 0.047, "step": 5372 }, { "epoch": 0.59, "grad_norm": 0.2243579120292071, "learning_rate": 1.5188717805363957e-05, "loss": 0.0368, "step": 5373 }, { "epoch": 0.59, "grad_norm": 0.3201961422293201, "learning_rate": 1.5181812914478146e-05, "loss": 0.0552, "step": 5374 }, { "epoch": 0.59, "grad_norm": 0.31539077046305714, "learning_rate": 1.5174908633220867e-05, "loss": 0.0691, "step": 5375 }, { "epoch": 0.59, "grad_norm": 0.3282751693416627, "learning_rate": 1.5168004962465681e-05, "loss": 0.0624, "step": 5376 }, { "epoch": 0.59, "grad_norm": 0.25793776214545616, "learning_rate": 1.5161101903086095e-05, "loss": 0.0358, "step": 5377 }, { "epoch": 0.59, "grad_norm": 0.2321202621166454, "learning_rate": 1.5154199455955523e-05, "loss": 0.0378, "step": 5378 }, { "epoch": 0.59, "grad_norm": 0.2582491442692954, "learning_rate": 1.5147297621947313e-05, "loss": 0.0643, "step": 5379 }, { "epoch": 0.59, "grad_norm": 0.26309087815208604, "learning_rate": 1.5140396401934725e-05, "loss": 0.045, "step": 5380 }, { "epoch": 0.59, "grad_norm": 0.25247988970148555, "learning_rate": 1.513349579679095e-05, "loss": 0.0341, "step": 5381 }, { "epoch": 0.59, "grad_norm": 0.2934478656778279, "learning_rate": 1.5126595807389098e-05, "loss": 0.0623, "step": 5382 }, { "epoch": 0.59, "grad_norm": 0.329260801735525, "learning_rate": 1.51196964346022e-05, "loss": 0.0512, "step": 5383 }, { "epoch": 0.59, "grad_norm": 0.2694806314012504, "learning_rate": 1.5112797679303206e-05, "loss": 0.0465, "step": 5384 }, { "epoch": 0.59, "grad_norm": 0.27326908374573444, "learning_rate": 1.5105899542364999e-05, "loss": 0.0479, "step": 5385 }, { "epoch": 0.59, "grad_norm": 0.3268499806458816, "learning_rate": 1.5099002024660368e-05, "loss": 0.067, "step": 5386 }, { "epoch": 0.59, "grad_norm": 0.32742266193168806, "learning_rate": 1.5092105127062043e-05, "loss": 0.0627, "step": 5387 }, { "epoch": 0.59, "grad_norm": 0.2710233382542948, "learning_rate": 1.5085208850442649e-05, "loss": 0.0455, "step": 5388 }, { "epoch": 0.59, "grad_norm": 0.28720160060548783, "learning_rate": 1.5078313195674756e-05, "loss": 0.0523, "step": 5389 }, { "epoch": 0.59, "grad_norm": 0.4430665044551116, "learning_rate": 1.5071418163630855e-05, "loss": 0.0614, "step": 5390 }, { "epoch": 0.59, "grad_norm": 0.3116163396278419, "learning_rate": 1.506452375518334e-05, "loss": 0.0565, "step": 5391 }, { "epoch": 0.59, "grad_norm": 0.3179106914742854, "learning_rate": 1.5057629971204546e-05, "loss": 0.0653, "step": 5392 }, { "epoch": 0.59, "grad_norm": 0.3019330771873828, "learning_rate": 1.5050736812566709e-05, "loss": 0.0466, "step": 5393 }, { "epoch": 0.59, "grad_norm": 0.29184498473307646, "learning_rate": 1.5043844280142005e-05, "loss": 0.0426, "step": 5394 }, { "epoch": 0.59, "grad_norm": 0.24366121142195443, "learning_rate": 1.5036952374802517e-05, "loss": 0.035, "step": 5395 }, { "epoch": 0.59, "grad_norm": 0.2628743862447861, "learning_rate": 1.5030061097420255e-05, "loss": 0.0633, "step": 5396 }, { "epoch": 0.59, "grad_norm": 0.24139787017450415, "learning_rate": 1.5023170448867148e-05, "loss": 0.0423, "step": 5397 }, { "epoch": 0.59, "grad_norm": 0.2582496924255401, "learning_rate": 1.5016280430015052e-05, "loss": 0.0395, "step": 5398 }, { "epoch": 0.59, "grad_norm": 0.30458403322206407, "learning_rate": 1.5009391041735725e-05, "loss": 0.0476, "step": 5399 }, { "epoch": 0.59, "grad_norm": 0.21755135335534165, "learning_rate": 1.5002502284900871e-05, "loss": 0.0338, "step": 5400 }, { "epoch": 0.59, "grad_norm": 0.3300649651607083, "learning_rate": 1.4995614160382091e-05, "loss": 0.0754, "step": 5401 }, { "epoch": 0.59, "grad_norm": 0.2399360851773767, "learning_rate": 1.4988726669050917e-05, "loss": 0.0428, "step": 5402 }, { "epoch": 0.59, "grad_norm": 0.38898043628789375, "learning_rate": 1.4981839811778805e-05, "loss": 0.0861, "step": 5403 }, { "epoch": 0.59, "grad_norm": 0.3379136116279355, "learning_rate": 1.4974953589437117e-05, "loss": 0.0478, "step": 5404 }, { "epoch": 0.59, "grad_norm": 0.3192701009375846, "learning_rate": 1.4968068002897153e-05, "loss": 0.0579, "step": 5405 }, { "epoch": 0.59, "grad_norm": 0.24777937905881006, "learning_rate": 1.4961183053030106e-05, "loss": 0.0436, "step": 5406 }, { "epoch": 0.59, "grad_norm": 0.31597952854642736, "learning_rate": 1.4954298740707122e-05, "loss": 0.0707, "step": 5407 }, { "epoch": 0.59, "grad_norm": 0.2843534456195885, "learning_rate": 1.4947415066799247e-05, "loss": 0.036, "step": 5408 }, { "epoch": 0.59, "grad_norm": 0.22456249079450785, "learning_rate": 1.4940532032177438e-05, "loss": 0.0362, "step": 5409 }, { "epoch": 0.59, "grad_norm": 0.2618308040952834, "learning_rate": 1.4933649637712593e-05, "loss": 0.0391, "step": 5410 }, { "epoch": 0.59, "grad_norm": 0.36086214277977924, "learning_rate": 1.4926767884275508e-05, "loss": 0.0487, "step": 5411 }, { "epoch": 0.59, "grad_norm": 0.32307999336199456, "learning_rate": 1.4919886772736915e-05, "loss": 0.0426, "step": 5412 }, { "epoch": 0.59, "grad_norm": 0.2584935930719824, "learning_rate": 1.491300630396745e-05, "loss": 0.0385, "step": 5413 }, { "epoch": 0.59, "grad_norm": 0.3644731922901051, "learning_rate": 1.4906126478837683e-05, "loss": 0.0493, "step": 5414 }, { "epoch": 0.59, "grad_norm": 0.37091045872399747, "learning_rate": 1.4899247298218086e-05, "loss": 0.0435, "step": 5415 }, { "epoch": 0.59, "grad_norm": 0.338807972068346, "learning_rate": 1.4892368762979067e-05, "loss": 0.0502, "step": 5416 }, { "epoch": 0.59, "grad_norm": 0.32978212752234964, "learning_rate": 1.4885490873990934e-05, "loss": 0.0486, "step": 5417 }, { "epoch": 0.59, "grad_norm": 0.2310971325078047, "learning_rate": 1.4878613632123928e-05, "loss": 0.033, "step": 5418 }, { "epoch": 0.6, "grad_norm": 0.37039938764459734, "learning_rate": 1.48717370382482e-05, "loss": 0.0658, "step": 5419 }, { "epoch": 0.6, "grad_norm": 0.30192142976569014, "learning_rate": 1.4864861093233827e-05, "loss": 0.0539, "step": 5420 }, { "epoch": 0.6, "grad_norm": 0.2542736476480424, "learning_rate": 1.485798579795079e-05, "loss": 0.0475, "step": 5421 }, { "epoch": 0.6, "grad_norm": 0.269254542410742, "learning_rate": 1.4851111153269005e-05, "loss": 0.0462, "step": 5422 }, { "epoch": 0.6, "grad_norm": 0.2654876915077558, "learning_rate": 1.4844237160058285e-05, "loss": 0.0446, "step": 5423 }, { "epoch": 0.6, "grad_norm": 0.3734391431892028, "learning_rate": 1.4837363819188379e-05, "loss": 0.0519, "step": 5424 }, { "epoch": 0.6, "grad_norm": 0.32455339740966643, "learning_rate": 1.4830491131528958e-05, "loss": 0.0712, "step": 5425 }, { "epoch": 0.6, "grad_norm": 0.26414863434376623, "learning_rate": 1.4823619097949584e-05, "loss": 0.0412, "step": 5426 }, { "epoch": 0.6, "grad_norm": 0.3019541499822475, "learning_rate": 1.4816747719319762e-05, "loss": 0.0627, "step": 5427 }, { "epoch": 0.6, "grad_norm": 0.2368912725973804, "learning_rate": 1.4809876996508897e-05, "loss": 0.0487, "step": 5428 }, { "epoch": 0.6, "grad_norm": 0.35748180032967825, "learning_rate": 1.480300693038632e-05, "loss": 0.0666, "step": 5429 }, { "epoch": 0.6, "grad_norm": 0.2602568406159695, "learning_rate": 1.4796137521821274e-05, "loss": 0.0264, "step": 5430 }, { "epoch": 0.6, "grad_norm": 0.2388690202943695, "learning_rate": 1.4789268771682927e-05, "loss": 0.0421, "step": 5431 }, { "epoch": 0.6, "grad_norm": 0.25582170573152385, "learning_rate": 1.4782400680840352e-05, "loss": 0.0294, "step": 5432 }, { "epoch": 0.6, "grad_norm": 0.30737770083103, "learning_rate": 1.4775533250162553e-05, "loss": 0.0474, "step": 5433 }, { "epoch": 0.6, "grad_norm": 0.40678172892665165, "learning_rate": 1.4768666480518432e-05, "loss": 0.0602, "step": 5434 }, { "epoch": 0.6, "grad_norm": 0.2768167943029228, "learning_rate": 1.4761800372776827e-05, "loss": 0.0475, "step": 5435 }, { "epoch": 0.6, "grad_norm": 0.3198632136443426, "learning_rate": 1.4754934927806473e-05, "loss": 0.0637, "step": 5436 }, { "epoch": 0.6, "grad_norm": 0.30841718523709777, "learning_rate": 1.4748070146476039e-05, "loss": 0.0376, "step": 5437 }, { "epoch": 0.6, "grad_norm": 0.28145415790537537, "learning_rate": 1.4741206029654098e-05, "loss": 0.0485, "step": 5438 }, { "epoch": 0.6, "grad_norm": 0.2989088086678268, "learning_rate": 1.473434257820915e-05, "loss": 0.0599, "step": 5439 }, { "epoch": 0.6, "grad_norm": 0.3758936208420145, "learning_rate": 1.472747979300959e-05, "loss": 0.0475, "step": 5440 }, { "epoch": 0.6, "grad_norm": 0.3048547872386122, "learning_rate": 1.472061767492375e-05, "loss": 0.0659, "step": 5441 }, { "epoch": 0.6, "grad_norm": 0.3214409651713219, "learning_rate": 1.4713756224819872e-05, "loss": 0.0472, "step": 5442 }, { "epoch": 0.6, "grad_norm": 0.2049516378611804, "learning_rate": 1.4706895443566116e-05, "loss": 0.0471, "step": 5443 }, { "epoch": 0.6, "grad_norm": 0.32971052432906045, "learning_rate": 1.4700035332030545e-05, "loss": 0.0434, "step": 5444 }, { "epoch": 0.6, "grad_norm": 0.34882422142792185, "learning_rate": 1.4693175891081146e-05, "loss": 0.0386, "step": 5445 }, { "epoch": 0.6, "grad_norm": 0.27072139254498606, "learning_rate": 1.468631712158582e-05, "loss": 0.0661, "step": 5446 }, { "epoch": 0.6, "grad_norm": 0.36007833072377554, "learning_rate": 1.4679459024412391e-05, "loss": 0.0688, "step": 5447 }, { "epoch": 0.6, "grad_norm": 0.4667074844129673, "learning_rate": 1.4672601600428578e-05, "loss": 0.0762, "step": 5448 }, { "epoch": 0.6, "grad_norm": 0.26789027474626065, "learning_rate": 1.4665744850502035e-05, "loss": 0.0432, "step": 5449 }, { "epoch": 0.6, "grad_norm": 0.46465890879816074, "learning_rate": 1.465888877550032e-05, "loss": 0.0573, "step": 5450 }, { "epoch": 0.6, "grad_norm": 0.30024749907834497, "learning_rate": 1.4652033376290912e-05, "loss": 0.0605, "step": 5451 }, { "epoch": 0.6, "grad_norm": 0.253391903986646, "learning_rate": 1.4645178653741194e-05, "loss": 0.04, "step": 5452 }, { "epoch": 0.6, "grad_norm": 0.31819945689409157, "learning_rate": 1.463832460871848e-05, "loss": 0.0412, "step": 5453 }, { "epoch": 0.6, "grad_norm": 0.3135739231731931, "learning_rate": 1.4631471242089978e-05, "loss": 0.0506, "step": 5454 }, { "epoch": 0.6, "grad_norm": 0.2246633713543759, "learning_rate": 1.462461855472283e-05, "loss": 0.0639, "step": 5455 }, { "epoch": 0.6, "grad_norm": 0.36136274429672477, "learning_rate": 1.4617766547484075e-05, "loss": 0.0529, "step": 5456 }, { "epoch": 0.6, "grad_norm": 0.32574729925486706, "learning_rate": 1.4610915221240685e-05, "loss": 0.0444, "step": 5457 }, { "epoch": 0.6, "grad_norm": 0.23544005950111677, "learning_rate": 1.4604064576859513e-05, "loss": 0.0404, "step": 5458 }, { "epoch": 0.6, "grad_norm": 0.3685125426923099, "learning_rate": 1.459721461520737e-05, "loss": 0.0617, "step": 5459 }, { "epoch": 0.6, "grad_norm": 0.3273051783158187, "learning_rate": 1.459036533715095e-05, "loss": 0.0789, "step": 5460 }, { "epoch": 0.6, "grad_norm": 0.3440088251362481, "learning_rate": 1.4583516743556869e-05, "loss": 0.0481, "step": 5461 }, { "epoch": 0.6, "grad_norm": 0.2894821342932941, "learning_rate": 1.4576668835291654e-05, "loss": 0.0471, "step": 5462 }, { "epoch": 0.6, "grad_norm": 0.29190433418095274, "learning_rate": 1.4569821613221743e-05, "loss": 0.0555, "step": 5463 }, { "epoch": 0.6, "grad_norm": 0.27589054589487266, "learning_rate": 1.4562975078213504e-05, "loss": 0.0483, "step": 5464 }, { "epoch": 0.6, "grad_norm": 0.38874992468732, "learning_rate": 1.4556129231133191e-05, "loss": 0.0625, "step": 5465 }, { "epoch": 0.6, "grad_norm": 0.4230095654041868, "learning_rate": 1.4549284072846996e-05, "loss": 0.0615, "step": 5466 }, { "epoch": 0.6, "grad_norm": 0.26943575508049955, "learning_rate": 1.4542439604221005e-05, "loss": 0.0383, "step": 5467 }, { "epoch": 0.6, "grad_norm": 0.37535682232288126, "learning_rate": 1.4535595826121233e-05, "loss": 0.0656, "step": 5468 }, { "epoch": 0.6, "grad_norm": 0.26949227092679146, "learning_rate": 1.452875273941359e-05, "loss": 0.0546, "step": 5469 }, { "epoch": 0.6, "grad_norm": 0.33942434586189885, "learning_rate": 1.4521910344963918e-05, "loss": 0.0753, "step": 5470 }, { "epoch": 0.6, "grad_norm": 0.21007208432797356, "learning_rate": 1.4515068643637953e-05, "loss": 0.034, "step": 5471 }, { "epoch": 0.6, "grad_norm": 0.27001918634044286, "learning_rate": 1.450822763630136e-05, "loss": 0.038, "step": 5472 }, { "epoch": 0.6, "grad_norm": 0.3848606127917712, "learning_rate": 1.4501387323819697e-05, "loss": 0.0728, "step": 5473 }, { "epoch": 0.6, "grad_norm": 0.28779162874870956, "learning_rate": 1.4494547707058459e-05, "loss": 0.0479, "step": 5474 }, { "epoch": 0.6, "grad_norm": 0.27645190662444785, "learning_rate": 1.4487708786883024e-05, "loss": 0.0428, "step": 5475 }, { "epoch": 0.6, "grad_norm": 0.28981545045382007, "learning_rate": 1.4480870564158704e-05, "loss": 0.0331, "step": 5476 }, { "epoch": 0.6, "grad_norm": 0.27892617654007784, "learning_rate": 1.4474033039750718e-05, "loss": 0.0589, "step": 5477 }, { "epoch": 0.6, "grad_norm": 0.31212467065411087, "learning_rate": 1.44671962145242e-05, "loss": 0.0439, "step": 5478 }, { "epoch": 0.6, "grad_norm": 0.32269838348985047, "learning_rate": 1.4460360089344177e-05, "loss": 0.0449, "step": 5479 }, { "epoch": 0.6, "grad_norm": 0.30413646264979755, "learning_rate": 1.4453524665075607e-05, "loss": 0.052, "step": 5480 }, { "epoch": 0.6, "grad_norm": 0.26150877201363676, "learning_rate": 1.4446689942583348e-05, "loss": 0.0414, "step": 5481 }, { "epoch": 0.6, "grad_norm": 0.2854209611852699, "learning_rate": 1.4439855922732182e-05, "loss": 0.0395, "step": 5482 }, { "epoch": 0.6, "grad_norm": 0.2286295158686908, "learning_rate": 1.4433022606386781e-05, "loss": 0.056, "step": 5483 }, { "epoch": 0.6, "grad_norm": 0.31209123818954804, "learning_rate": 1.4426189994411756e-05, "loss": 0.0512, "step": 5484 }, { "epoch": 0.6, "grad_norm": 0.3384832576347095, "learning_rate": 1.4419358087671603e-05, "loss": 0.0444, "step": 5485 }, { "epoch": 0.6, "grad_norm": 0.31930474178516005, "learning_rate": 1.4412526887030745e-05, "loss": 0.0643, "step": 5486 }, { "epoch": 0.6, "grad_norm": 0.2956159395115791, "learning_rate": 1.4405696393353504e-05, "loss": 0.0716, "step": 5487 }, { "epoch": 0.6, "grad_norm": 0.3250006473974969, "learning_rate": 1.4398866607504128e-05, "loss": 0.0535, "step": 5488 }, { "epoch": 0.6, "grad_norm": 0.2354419503025549, "learning_rate": 1.4392037530346754e-05, "loss": 0.0355, "step": 5489 }, { "epoch": 0.6, "grad_norm": 0.3262860104173666, "learning_rate": 1.4385209162745453e-05, "loss": 0.0455, "step": 5490 }, { "epoch": 0.6, "grad_norm": 0.3416518312330896, "learning_rate": 1.4378381505564187e-05, "loss": 0.0693, "step": 5491 }, { "epoch": 0.6, "grad_norm": 0.2731465017379261, "learning_rate": 1.4371554559666843e-05, "loss": 0.0524, "step": 5492 }, { "epoch": 0.6, "grad_norm": 0.42357427492474237, "learning_rate": 1.4364728325917199e-05, "loss": 0.0695, "step": 5493 }, { "epoch": 0.6, "grad_norm": 0.33148932268746417, "learning_rate": 1.4357902805178965e-05, "loss": 0.0493, "step": 5494 }, { "epoch": 0.6, "grad_norm": 0.3387095609682479, "learning_rate": 1.4351077998315752e-05, "loss": 0.0435, "step": 5495 }, { "epoch": 0.6, "grad_norm": 0.2948594185082858, "learning_rate": 1.434425390619107e-05, "loss": 0.0891, "step": 5496 }, { "epoch": 0.6, "grad_norm": 0.2746070915984592, "learning_rate": 1.4337430529668357e-05, "loss": 0.0393, "step": 5497 }, { "epoch": 0.6, "grad_norm": 0.32797564785969896, "learning_rate": 1.4330607869610945e-05, "loss": 0.0529, "step": 5498 }, { "epoch": 0.6, "grad_norm": 0.367200546844497, "learning_rate": 1.4323785926882088e-05, "loss": 0.0507, "step": 5499 }, { "epoch": 0.6, "grad_norm": 0.40407725833100544, "learning_rate": 1.431696470234493e-05, "loss": 0.0756, "step": 5500 }, { "epoch": 0.6, "grad_norm": 0.31546558849003214, "learning_rate": 1.4310144196862552e-05, "loss": 0.0773, "step": 5501 }, { "epoch": 0.6, "grad_norm": 0.1970754216328647, "learning_rate": 1.4303324411297918e-05, "loss": 0.0333, "step": 5502 }, { "epoch": 0.6, "grad_norm": 0.3304312357791281, "learning_rate": 1.4296505346513918e-05, "loss": 0.0546, "step": 5503 }, { "epoch": 0.6, "grad_norm": 0.3110057751059579, "learning_rate": 1.4289687003373342e-05, "loss": 0.0563, "step": 5504 }, { "epoch": 0.6, "grad_norm": 0.23295923031930088, "learning_rate": 1.4282869382738893e-05, "loss": 0.0394, "step": 5505 }, { "epoch": 0.6, "grad_norm": 0.2885551898188161, "learning_rate": 1.4276052485473177e-05, "loss": 0.0306, "step": 5506 }, { "epoch": 0.6, "grad_norm": 0.2889744005753278, "learning_rate": 1.4269236312438718e-05, "loss": 0.0586, "step": 5507 }, { "epoch": 0.6, "grad_norm": 0.272575108387623, "learning_rate": 1.4262420864497939e-05, "loss": 0.0402, "step": 5508 }, { "epoch": 0.6, "grad_norm": 0.20621237664534375, "learning_rate": 1.4255606142513179e-05, "loss": 0.0342, "step": 5509 }, { "epoch": 0.61, "grad_norm": 0.2726226791438715, "learning_rate": 1.4248792147346668e-05, "loss": 0.0381, "step": 5510 }, { "epoch": 0.61, "grad_norm": 0.3190446627653352, "learning_rate": 1.4241978879860575e-05, "loss": 0.0526, "step": 5511 }, { "epoch": 0.61, "grad_norm": 0.2903752802544899, "learning_rate": 1.4235166340916955e-05, "loss": 0.0576, "step": 5512 }, { "epoch": 0.61, "grad_norm": 0.25975112587755445, "learning_rate": 1.4228354531377764e-05, "loss": 0.0383, "step": 5513 }, { "epoch": 0.61, "grad_norm": 0.32837716131802325, "learning_rate": 1.4221543452104891e-05, "loss": 0.0457, "step": 5514 }, { "epoch": 0.61, "grad_norm": 0.30506092809449165, "learning_rate": 1.4214733103960106e-05, "loss": 0.0539, "step": 5515 }, { "epoch": 0.61, "grad_norm": 0.4490399875718616, "learning_rate": 1.4207923487805108e-05, "loss": 0.0925, "step": 5516 }, { "epoch": 0.61, "grad_norm": 0.38683417553188865, "learning_rate": 1.4201114604501489e-05, "loss": 0.0544, "step": 5517 }, { "epoch": 0.61, "grad_norm": 0.3301998342910457, "learning_rate": 1.4194306454910757e-05, "loss": 0.0482, "step": 5518 }, { "epoch": 0.61, "grad_norm": 0.3232851028765911, "learning_rate": 1.4187499039894318e-05, "loss": 0.0737, "step": 5519 }, { "epoch": 0.61, "grad_norm": 0.35437926288173494, "learning_rate": 1.4180692360313494e-05, "loss": 0.0501, "step": 5520 }, { "epoch": 0.61, "grad_norm": 0.23675211397484017, "learning_rate": 1.4173886417029512e-05, "loss": 0.036, "step": 5521 }, { "epoch": 0.61, "grad_norm": 0.29212148859087067, "learning_rate": 1.4167081210903501e-05, "loss": 0.0575, "step": 5522 }, { "epoch": 0.61, "grad_norm": 0.3708292369759153, "learning_rate": 1.4160276742796509e-05, "loss": 0.0582, "step": 5523 }, { "epoch": 0.61, "grad_norm": 0.39275229087443136, "learning_rate": 1.4153473013569468e-05, "loss": 0.0512, "step": 5524 }, { "epoch": 0.61, "grad_norm": 0.34021670605474824, "learning_rate": 1.4146670024083241e-05, "loss": 0.0558, "step": 5525 }, { "epoch": 0.61, "grad_norm": 0.28483389571109996, "learning_rate": 1.413986777519858e-05, "loss": 0.0435, "step": 5526 }, { "epoch": 0.61, "grad_norm": 0.2538840064062279, "learning_rate": 1.4133066267776148e-05, "loss": 0.0434, "step": 5527 }, { "epoch": 0.61, "grad_norm": 0.5287867829535119, "learning_rate": 1.412626550267653e-05, "loss": 0.1275, "step": 5528 }, { "epoch": 0.61, "grad_norm": 0.25951080443834945, "learning_rate": 1.4119465480760192e-05, "loss": 0.0475, "step": 5529 }, { "epoch": 0.61, "grad_norm": 0.553872167402259, "learning_rate": 1.4112666202887522e-05, "loss": 0.1174, "step": 5530 }, { "epoch": 0.61, "grad_norm": 0.3106050017076968, "learning_rate": 1.4105867669918803e-05, "loss": 0.0522, "step": 5531 }, { "epoch": 0.61, "grad_norm": 0.21425685482454435, "learning_rate": 1.4099069882714236e-05, "loss": 0.0427, "step": 5532 }, { "epoch": 0.61, "grad_norm": 0.2665975381736101, "learning_rate": 1.4092272842133916e-05, "loss": 0.0499, "step": 5533 }, { "epoch": 0.61, "grad_norm": 0.2540361218993623, "learning_rate": 1.4085476549037856e-05, "loss": 0.0514, "step": 5534 }, { "epoch": 0.61, "grad_norm": 0.270432775122475, "learning_rate": 1.4078681004285959e-05, "loss": 0.0512, "step": 5535 }, { "epoch": 0.61, "grad_norm": 0.27399114689046483, "learning_rate": 1.4071886208738053e-05, "loss": 0.0424, "step": 5536 }, { "epoch": 0.61, "grad_norm": 0.27262848655328736, "learning_rate": 1.4065092163253845e-05, "loss": 0.0553, "step": 5537 }, { "epoch": 0.61, "grad_norm": 0.2644290084773797, "learning_rate": 1.4058298868692979e-05, "loss": 0.0505, "step": 5538 }, { "epoch": 0.61, "grad_norm": 0.29287387583277114, "learning_rate": 1.405150632591497e-05, "loss": 0.0469, "step": 5539 }, { "epoch": 0.61, "grad_norm": 0.2658416481621058, "learning_rate": 1.4044714535779269e-05, "loss": 0.0471, "step": 5540 }, { "epoch": 0.61, "grad_norm": 0.21789998709725442, "learning_rate": 1.4037923499145207e-05, "loss": 0.0469, "step": 5541 }, { "epoch": 0.61, "grad_norm": 0.3261424753793335, "learning_rate": 1.403113321687204e-05, "loss": 0.0702, "step": 5542 }, { "epoch": 0.61, "grad_norm": 0.2641477035435995, "learning_rate": 1.4024343689818909e-05, "loss": 0.0418, "step": 5543 }, { "epoch": 0.61, "grad_norm": 0.278911818654436, "learning_rate": 1.4017554918844872e-05, "loss": 0.0551, "step": 5544 }, { "epoch": 0.61, "grad_norm": 0.22926134714822902, "learning_rate": 1.4010766904808897e-05, "loss": 0.0401, "step": 5545 }, { "epoch": 0.61, "grad_norm": 0.32269592456877333, "learning_rate": 1.4003979648569839e-05, "loss": 0.0705, "step": 5546 }, { "epoch": 0.61, "grad_norm": 0.24983462436893616, "learning_rate": 1.3997193150986472e-05, "loss": 0.0374, "step": 5547 }, { "epoch": 0.61, "grad_norm": 0.26690762456663464, "learning_rate": 1.3990407412917462e-05, "loss": 0.0368, "step": 5548 }, { "epoch": 0.61, "grad_norm": 0.34141004198006764, "learning_rate": 1.3983622435221391e-05, "loss": 0.055, "step": 5549 }, { "epoch": 0.61, "grad_norm": 0.3245052198091605, "learning_rate": 1.3976838218756733e-05, "loss": 0.0594, "step": 5550 }, { "epoch": 0.61, "grad_norm": 0.32239717052882894, "learning_rate": 1.3970054764381879e-05, "loss": 0.0471, "step": 5551 }, { "epoch": 0.61, "grad_norm": 0.3174391828508528, "learning_rate": 1.3963272072955106e-05, "loss": 0.0554, "step": 5552 }, { "epoch": 0.61, "grad_norm": 0.2602591165160619, "learning_rate": 1.3956490145334613e-05, "loss": 0.0478, "step": 5553 }, { "epoch": 0.61, "grad_norm": 0.24961398542799765, "learning_rate": 1.3949708982378487e-05, "loss": 0.0497, "step": 5554 }, { "epoch": 0.61, "grad_norm": 0.3065719041538778, "learning_rate": 1.3942928584944733e-05, "loss": 0.0393, "step": 5555 }, { "epoch": 0.61, "grad_norm": 0.2654547426189175, "learning_rate": 1.3936148953891242e-05, "loss": 0.0468, "step": 5556 }, { "epoch": 0.61, "grad_norm": 0.43763271090075617, "learning_rate": 1.3929370090075827e-05, "loss": 0.0719, "step": 5557 }, { "epoch": 0.61, "grad_norm": 0.31957515890533356, "learning_rate": 1.392259199435618e-05, "loss": 0.0612, "step": 5558 }, { "epoch": 0.61, "grad_norm": 0.24393513996547408, "learning_rate": 1.3915814667589929e-05, "loss": 0.0567, "step": 5559 }, { "epoch": 0.61, "grad_norm": 0.46093817888630156, "learning_rate": 1.3909038110634567e-05, "loss": 0.0753, "step": 5560 }, { "epoch": 0.61, "grad_norm": 0.29493192946113983, "learning_rate": 1.3902262324347522e-05, "loss": 0.0541, "step": 5561 }, { "epoch": 0.61, "grad_norm": 0.29629170687557227, "learning_rate": 1.3895487309586097e-05, "loss": 0.0519, "step": 5562 }, { "epoch": 0.61, "grad_norm": 0.36481990858838603, "learning_rate": 1.3888713067207531e-05, "loss": 0.0663, "step": 5563 }, { "epoch": 0.61, "grad_norm": 0.3595021893536748, "learning_rate": 1.388193959806893e-05, "loss": 0.079, "step": 5564 }, { "epoch": 0.61, "grad_norm": 0.2825396823652111, "learning_rate": 1.3875166903027327e-05, "loss": 0.0609, "step": 5565 }, { "epoch": 0.61, "grad_norm": 0.43417501580062645, "learning_rate": 1.3868394982939636e-05, "loss": 0.054, "step": 5566 }, { "epoch": 0.61, "grad_norm": 0.2633411028149411, "learning_rate": 1.3861623838662694e-05, "loss": 0.0399, "step": 5567 }, { "epoch": 0.61, "grad_norm": 0.33130389935653864, "learning_rate": 1.3854853471053225e-05, "loss": 0.0738, "step": 5568 }, { "epoch": 0.61, "grad_norm": 0.2537712031464752, "learning_rate": 1.384808388096787e-05, "loss": 0.0431, "step": 5569 }, { "epoch": 0.61, "grad_norm": 0.33226718373923025, "learning_rate": 1.3841315069263146e-05, "loss": 0.0731, "step": 5570 }, { "epoch": 0.61, "grad_norm": 0.350580624510904, "learning_rate": 1.3834547036795502e-05, "loss": 0.0558, "step": 5571 }, { "epoch": 0.61, "grad_norm": 0.27665223966367936, "learning_rate": 1.3827779784421262e-05, "loss": 0.0492, "step": 5572 }, { "epoch": 0.61, "grad_norm": 0.30374478951839884, "learning_rate": 1.3821013312996674e-05, "loss": 0.0499, "step": 5573 }, { "epoch": 0.61, "grad_norm": 0.3362766151105269, "learning_rate": 1.3814247623377868e-05, "loss": 0.0765, "step": 5574 }, { "epoch": 0.61, "grad_norm": 0.2127952449514032, "learning_rate": 1.3807482716420887e-05, "loss": 0.0274, "step": 5575 }, { "epoch": 0.61, "grad_norm": 0.2785711530035179, "learning_rate": 1.3800718592981668e-05, "loss": 0.0393, "step": 5576 }, { "epoch": 0.61, "grad_norm": 0.34604194976791997, "learning_rate": 1.3793955253916058e-05, "loss": 0.0681, "step": 5577 }, { "epoch": 0.61, "grad_norm": 0.30766432870203525, "learning_rate": 1.3787192700079792e-05, "loss": 0.0459, "step": 5578 }, { "epoch": 0.61, "grad_norm": 0.21715383097165467, "learning_rate": 1.3780430932328511e-05, "loss": 0.0338, "step": 5579 }, { "epoch": 0.61, "grad_norm": 0.22558563199373505, "learning_rate": 1.377366995151777e-05, "loss": 0.0379, "step": 5580 }, { "epoch": 0.61, "grad_norm": 0.3231437740737838, "learning_rate": 1.3766909758503002e-05, "loss": 0.053, "step": 5581 }, { "epoch": 0.61, "grad_norm": 0.31741061749072647, "learning_rate": 1.3760150354139558e-05, "loss": 0.0463, "step": 5582 }, { "epoch": 0.61, "grad_norm": 0.2753116548549051, "learning_rate": 1.3753391739282675e-05, "loss": 0.0431, "step": 5583 }, { "epoch": 0.61, "grad_norm": 0.2906315336210816, "learning_rate": 1.3746633914787504e-05, "loss": 0.0477, "step": 5584 }, { "epoch": 0.61, "grad_norm": 0.3389201493884306, "learning_rate": 1.373987688150908e-05, "loss": 0.0406, "step": 5585 }, { "epoch": 0.61, "grad_norm": 0.19451453278803574, "learning_rate": 1.3733120640302358e-05, "loss": 0.0295, "step": 5586 }, { "epoch": 0.61, "grad_norm": 0.3156023173160414, "learning_rate": 1.3726365192022173e-05, "loss": 0.0502, "step": 5587 }, { "epoch": 0.61, "grad_norm": 0.26102974568877396, "learning_rate": 1.3719610537523274e-05, "loss": 0.0494, "step": 5588 }, { "epoch": 0.61, "grad_norm": 0.2747212791070434, "learning_rate": 1.3712856677660299e-05, "loss": 0.0581, "step": 5589 }, { "epoch": 0.61, "grad_norm": 0.27562011463567315, "learning_rate": 1.3706103613287796e-05, "loss": 0.0413, "step": 5590 }, { "epoch": 0.61, "grad_norm": 0.30757008752066367, "learning_rate": 1.3699351345260201e-05, "loss": 0.0554, "step": 5591 }, { "epoch": 0.61, "grad_norm": 0.2882843756570884, "learning_rate": 1.369259987443186e-05, "loss": 0.0648, "step": 5592 }, { "epoch": 0.61, "grad_norm": 0.32449289015297467, "learning_rate": 1.3685849201657009e-05, "loss": 0.0368, "step": 5593 }, { "epoch": 0.61, "grad_norm": 0.3160807972628687, "learning_rate": 1.3679099327789794e-05, "loss": 0.0358, "step": 5594 }, { "epoch": 0.61, "grad_norm": 0.2857022219052435, "learning_rate": 1.3672350253684243e-05, "loss": 0.0508, "step": 5595 }, { "epoch": 0.61, "grad_norm": 0.2814827591645604, "learning_rate": 1.3665601980194297e-05, "loss": 0.0677, "step": 5596 }, { "epoch": 0.61, "grad_norm": 0.32651648904558583, "learning_rate": 1.3658854508173796e-05, "loss": 0.0427, "step": 5597 }, { "epoch": 0.61, "grad_norm": 0.32266546933578244, "learning_rate": 1.3652107838476476e-05, "loss": 0.0522, "step": 5598 }, { "epoch": 0.61, "grad_norm": 0.20409014546709017, "learning_rate": 1.3645361971955961e-05, "loss": 0.0292, "step": 5599 }, { "epoch": 0.61, "grad_norm": 0.2655659918597047, "learning_rate": 1.3638616909465791e-05, "loss": 0.0576, "step": 5600 }, { "epoch": 0.62, "grad_norm": 0.29454070210151634, "learning_rate": 1.3631872651859386e-05, "loss": 0.0449, "step": 5601 }, { "epoch": 0.62, "grad_norm": 0.3701140874164158, "learning_rate": 1.3625129199990083e-05, "loss": 0.0553, "step": 5602 }, { "epoch": 0.62, "grad_norm": 0.4178331279869523, "learning_rate": 1.3618386554711101e-05, "loss": 0.0663, "step": 5603 }, { "epoch": 0.62, "grad_norm": 0.28184870732616496, "learning_rate": 1.3611644716875568e-05, "loss": 0.0748, "step": 5604 }, { "epoch": 0.62, "grad_norm": 0.3562858538404201, "learning_rate": 1.3604903687336499e-05, "loss": 0.0589, "step": 5605 }, { "epoch": 0.62, "grad_norm": 0.28134279574611937, "learning_rate": 1.3598163466946823e-05, "loss": 0.0351, "step": 5606 }, { "epoch": 0.62, "grad_norm": 0.24204038951208703, "learning_rate": 1.3591424056559345e-05, "loss": 0.0418, "step": 5607 }, { "epoch": 0.62, "grad_norm": 0.3284039220151172, "learning_rate": 1.3584685457026789e-05, "loss": 0.0642, "step": 5608 }, { "epoch": 0.62, "grad_norm": 0.299401723827682, "learning_rate": 1.357794766920176e-05, "loss": 0.051, "step": 5609 }, { "epoch": 0.62, "grad_norm": 0.3028739361399923, "learning_rate": 1.3571210693936774e-05, "loss": 0.0399, "step": 5610 }, { "epoch": 0.62, "grad_norm": 0.24880108349520488, "learning_rate": 1.3564474532084226e-05, "loss": 0.0495, "step": 5611 }, { "epoch": 0.62, "grad_norm": 0.33811961666835144, "learning_rate": 1.3557739184496435e-05, "loss": 0.0498, "step": 5612 }, { "epoch": 0.62, "grad_norm": 0.2787868538045174, "learning_rate": 1.3551004652025582e-05, "loss": 0.0502, "step": 5613 }, { "epoch": 0.62, "grad_norm": 0.28603466811420103, "learning_rate": 1.3544270935523778e-05, "loss": 0.0546, "step": 5614 }, { "epoch": 0.62, "grad_norm": 0.45555747658174617, "learning_rate": 1.3537538035843017e-05, "loss": 0.0866, "step": 5615 }, { "epoch": 0.62, "grad_norm": 0.24424289087386372, "learning_rate": 1.3530805953835182e-05, "loss": 0.0303, "step": 5616 }, { "epoch": 0.62, "grad_norm": 0.36258914607731035, "learning_rate": 1.3524074690352068e-05, "loss": 0.0738, "step": 5617 }, { "epoch": 0.62, "grad_norm": 0.37528027550987486, "learning_rate": 1.351734424624535e-05, "loss": 0.067, "step": 5618 }, { "epoch": 0.62, "grad_norm": 0.3750212583874112, "learning_rate": 1.3510614622366615e-05, "loss": 0.0717, "step": 5619 }, { "epoch": 0.62, "grad_norm": 0.25464443591608105, "learning_rate": 1.3503885819567335e-05, "loss": 0.0578, "step": 5620 }, { "epoch": 0.62, "grad_norm": 0.2537079550293961, "learning_rate": 1.3497157838698884e-05, "loss": 0.0523, "step": 5621 }, { "epoch": 0.62, "grad_norm": 0.23945383099798823, "learning_rate": 1.3490430680612528e-05, "loss": 0.032, "step": 5622 }, { "epoch": 0.62, "grad_norm": 0.25087312875096374, "learning_rate": 1.3483704346159435e-05, "loss": 0.0389, "step": 5623 }, { "epoch": 0.62, "grad_norm": 0.2695963822326179, "learning_rate": 1.3476978836190658e-05, "loss": 0.0339, "step": 5624 }, { "epoch": 0.62, "grad_norm": 0.29095323905207143, "learning_rate": 1.347025415155716e-05, "loss": 0.0508, "step": 5625 }, { "epoch": 0.62, "grad_norm": 0.2600336341468963, "learning_rate": 1.3463530293109783e-05, "loss": 0.0373, "step": 5626 }, { "epoch": 0.62, "grad_norm": 0.29484438351804176, "learning_rate": 1.3456807261699288e-05, "loss": 0.0503, "step": 5627 }, { "epoch": 0.62, "grad_norm": 0.2075552068225611, "learning_rate": 1.34500850581763e-05, "loss": 0.0376, "step": 5628 }, { "epoch": 0.62, "grad_norm": 0.2993788039616833, "learning_rate": 1.3443363683391369e-05, "loss": 0.0454, "step": 5629 }, { "epoch": 0.62, "grad_norm": 0.2754091567709912, "learning_rate": 1.3436643138194918e-05, "loss": 0.0706, "step": 5630 }, { "epoch": 0.62, "grad_norm": 0.3322685179325457, "learning_rate": 1.3429923423437277e-05, "loss": 0.0392, "step": 5631 }, { "epoch": 0.62, "grad_norm": 0.39415352746356985, "learning_rate": 1.3423204539968677e-05, "loss": 0.0542, "step": 5632 }, { "epoch": 0.62, "grad_norm": 0.2923638647918311, "learning_rate": 1.3416486488639222e-05, "loss": 0.0368, "step": 5633 }, { "epoch": 0.62, "grad_norm": 0.2851589673056533, "learning_rate": 1.3409769270298934e-05, "loss": 0.0455, "step": 5634 }, { "epoch": 0.62, "grad_norm": 0.35113104730370964, "learning_rate": 1.340305288579771e-05, "loss": 0.0423, "step": 5635 }, { "epoch": 0.62, "grad_norm": 0.30857305819516934, "learning_rate": 1.3396337335985361e-05, "loss": 0.0414, "step": 5636 }, { "epoch": 0.62, "grad_norm": 0.3351533742928012, "learning_rate": 1.3389622621711578e-05, "loss": 0.0637, "step": 5637 }, { "epoch": 0.62, "grad_norm": 0.3897978226205992, "learning_rate": 1.3382908743825947e-05, "loss": 0.0543, "step": 5638 }, { "epoch": 0.62, "grad_norm": 0.2723712021369034, "learning_rate": 1.337619570317796e-05, "loss": 0.0316, "step": 5639 }, { "epoch": 0.62, "grad_norm": 0.302138918114539, "learning_rate": 1.336948350061698e-05, "loss": 0.0502, "step": 5640 }, { "epoch": 0.62, "grad_norm": 0.2337126273097789, "learning_rate": 1.3362772136992294e-05, "loss": 0.0287, "step": 5641 }, { "epoch": 0.62, "grad_norm": 0.255145642631831, "learning_rate": 1.335606161315306e-05, "loss": 0.0294, "step": 5642 }, { "epoch": 0.62, "grad_norm": 0.32703431829260704, "learning_rate": 1.3349351929948342e-05, "loss": 0.057, "step": 5643 }, { "epoch": 0.62, "grad_norm": 0.22955598958829557, "learning_rate": 1.3342643088227085e-05, "loss": 0.0439, "step": 5644 }, { "epoch": 0.62, "grad_norm": 0.29668990438422027, "learning_rate": 1.3335935088838145e-05, "loss": 0.0389, "step": 5645 }, { "epoch": 0.62, "grad_norm": 0.3153918925692883, "learning_rate": 1.3329227932630255e-05, "loss": 0.0458, "step": 5646 }, { "epoch": 0.62, "grad_norm": 0.3255786640931498, "learning_rate": 1.3322521620452053e-05, "loss": 0.0448, "step": 5647 }, { "epoch": 0.62, "grad_norm": 0.313656384001237, "learning_rate": 1.3315816153152055e-05, "loss": 0.057, "step": 5648 }, { "epoch": 0.62, "grad_norm": 0.28341564228774163, "learning_rate": 1.3309111531578692e-05, "loss": 0.0446, "step": 5649 }, { "epoch": 0.62, "grad_norm": 0.25463943261886, "learning_rate": 1.3302407756580278e-05, "loss": 0.0457, "step": 5650 }, { "epoch": 0.62, "grad_norm": 0.3247302742576951, "learning_rate": 1.329570482900501e-05, "loss": 0.05, "step": 5651 }, { "epoch": 0.62, "grad_norm": 0.33704674361070186, "learning_rate": 1.3289002749700992e-05, "loss": 0.0406, "step": 5652 }, { "epoch": 0.62, "grad_norm": 0.3043891228252577, "learning_rate": 1.3282301519516212e-05, "loss": 0.0467, "step": 5653 }, { "epoch": 0.62, "grad_norm": 0.3877916000292103, "learning_rate": 1.3275601139298556e-05, "loss": 0.0626, "step": 5654 }, { "epoch": 0.62, "grad_norm": 0.28729085311931213, "learning_rate": 1.3268901609895793e-05, "loss": 0.0622, "step": 5655 }, { "epoch": 0.62, "grad_norm": 0.32712097052664113, "learning_rate": 1.3262202932155602e-05, "loss": 0.0488, "step": 5656 }, { "epoch": 0.62, "grad_norm": 0.2905834624642349, "learning_rate": 1.3255505106925535e-05, "loss": 0.0724, "step": 5657 }, { "epoch": 0.62, "grad_norm": 0.3413329654663837, "learning_rate": 1.3248808135053048e-05, "loss": 0.0447, "step": 5658 }, { "epoch": 0.62, "grad_norm": 0.28871752799946365, "learning_rate": 1.3242112017385483e-05, "loss": 0.0514, "step": 5659 }, { "epoch": 0.62, "grad_norm": 0.24008771249711366, "learning_rate": 1.3235416754770082e-05, "loss": 0.0563, "step": 5660 }, { "epoch": 0.62, "grad_norm": 0.3039428831051665, "learning_rate": 1.3228722348053969e-05, "loss": 0.0527, "step": 5661 }, { "epoch": 0.62, "grad_norm": 0.47881643999592677, "learning_rate": 1.3222028798084165e-05, "loss": 0.0843, "step": 5662 }, { "epoch": 0.62, "grad_norm": 0.35023204400203517, "learning_rate": 1.3215336105707581e-05, "loss": 0.0677, "step": 5663 }, { "epoch": 0.62, "grad_norm": 0.2558142644120952, "learning_rate": 1.3208644271771026e-05, "loss": 0.0405, "step": 5664 }, { "epoch": 0.62, "grad_norm": 0.2976658476636355, "learning_rate": 1.3201953297121177e-05, "loss": 0.0603, "step": 5665 }, { "epoch": 0.62, "grad_norm": 0.3783144507793483, "learning_rate": 1.3195263182604638e-05, "loss": 0.0555, "step": 5666 }, { "epoch": 0.62, "grad_norm": 0.30107973819391354, "learning_rate": 1.3188573929067885e-05, "loss": 0.04, "step": 5667 }, { "epoch": 0.62, "grad_norm": 0.34039598299213053, "learning_rate": 1.3181885537357277e-05, "loss": 0.0814, "step": 5668 }, { "epoch": 0.62, "grad_norm": 0.22695233251331523, "learning_rate": 1.3175198008319083e-05, "loss": 0.0488, "step": 5669 }, { "epoch": 0.62, "grad_norm": 0.3901315051364228, "learning_rate": 1.3168511342799444e-05, "loss": 0.0487, "step": 5670 }, { "epoch": 0.62, "grad_norm": 0.23973099763971675, "learning_rate": 1.3161825541644407e-05, "loss": 0.045, "step": 5671 }, { "epoch": 0.62, "grad_norm": 0.26408622115993735, "learning_rate": 1.3155140605699894e-05, "loss": 0.0582, "step": 5672 }, { "epoch": 0.62, "grad_norm": 0.2486145851903617, "learning_rate": 1.3148456535811742e-05, "loss": 0.0298, "step": 5673 }, { "epoch": 0.62, "grad_norm": 0.27015364008581044, "learning_rate": 1.3141773332825647e-05, "loss": 0.0514, "step": 5674 }, { "epoch": 0.62, "grad_norm": 0.4992184192733983, "learning_rate": 1.3135090997587226e-05, "loss": 0.0853, "step": 5675 }, { "epoch": 0.62, "grad_norm": 0.2708242885595397, "learning_rate": 1.3128409530941957e-05, "loss": 0.0394, "step": 5676 }, { "epoch": 0.62, "grad_norm": 0.35954235160892956, "learning_rate": 1.3121728933735231e-05, "loss": 0.0682, "step": 5677 }, { "epoch": 0.62, "grad_norm": 0.19512277925438481, "learning_rate": 1.3115049206812325e-05, "loss": 0.0407, "step": 5678 }, { "epoch": 0.62, "grad_norm": 0.2511889675844502, "learning_rate": 1.3108370351018393e-05, "loss": 0.0693, "step": 5679 }, { "epoch": 0.62, "grad_norm": 0.4373225806026998, "learning_rate": 1.3101692367198498e-05, "loss": 0.1273, "step": 5680 }, { "epoch": 0.62, "grad_norm": 0.20473264021750973, "learning_rate": 1.3095015256197568e-05, "loss": 0.0399, "step": 5681 }, { "epoch": 0.62, "grad_norm": 0.26275027242586513, "learning_rate": 1.3088339018860439e-05, "loss": 0.0572, "step": 5682 }, { "epoch": 0.62, "grad_norm": 0.26364107305257717, "learning_rate": 1.3081663656031845e-05, "loss": 0.0368, "step": 5683 }, { "epoch": 0.62, "grad_norm": 0.5068132105088602, "learning_rate": 1.307498916855638e-05, "loss": 0.0754, "step": 5684 }, { "epoch": 0.62, "grad_norm": 0.26044753209623883, "learning_rate": 1.3068315557278559e-05, "loss": 0.0479, "step": 5685 }, { "epoch": 0.62, "grad_norm": 0.2345875015122804, "learning_rate": 1.3061642823042757e-05, "loss": 0.0436, "step": 5686 }, { "epoch": 0.62, "grad_norm": 0.3928649765323534, "learning_rate": 1.3054970966693264e-05, "loss": 0.0612, "step": 5687 }, { "epoch": 0.62, "grad_norm": 0.23474465465350516, "learning_rate": 1.3048299989074234e-05, "loss": 0.0502, "step": 5688 }, { "epoch": 0.62, "grad_norm": 0.2308454990250989, "learning_rate": 1.3041629891029732e-05, "loss": 0.0286, "step": 5689 }, { "epoch": 0.62, "grad_norm": 0.2263089273703307, "learning_rate": 1.3034960673403699e-05, "loss": 0.0421, "step": 5690 }, { "epoch": 0.62, "grad_norm": 0.2750013183432103, "learning_rate": 1.3028292337039971e-05, "loss": 0.0427, "step": 5691 }, { "epoch": 0.63, "grad_norm": 0.24221237885452, "learning_rate": 1.3021624882782262e-05, "loss": 0.0398, "step": 5692 }, { "epoch": 0.63, "grad_norm": 0.20854592007108078, "learning_rate": 1.3014958311474194e-05, "loss": 0.0449, "step": 5693 }, { "epoch": 0.63, "grad_norm": 0.3487098672807025, "learning_rate": 1.3008292623959253e-05, "loss": 0.068, "step": 5694 }, { "epoch": 0.63, "grad_norm": 0.3213280419354896, "learning_rate": 1.3001627821080835e-05, "loss": 0.0547, "step": 5695 }, { "epoch": 0.63, "grad_norm": 0.2351453360372637, "learning_rate": 1.2994963903682205e-05, "loss": 0.0422, "step": 5696 }, { "epoch": 0.63, "grad_norm": 0.2874713354748142, "learning_rate": 1.2988300872606536e-05, "loss": 0.0619, "step": 5697 }, { "epoch": 0.63, "grad_norm": 0.23210050499812654, "learning_rate": 1.2981638728696868e-05, "loss": 0.045, "step": 5698 }, { "epoch": 0.63, "grad_norm": 0.32298319141598664, "learning_rate": 1.2974977472796146e-05, "loss": 0.0541, "step": 5699 }, { "epoch": 0.63, "grad_norm": 0.3437758457697743, "learning_rate": 1.2968317105747189e-05, "loss": 0.0572, "step": 5700 }, { "epoch": 0.63, "grad_norm": 0.2582180561359662, "learning_rate": 1.2961657628392716e-05, "loss": 0.0441, "step": 5701 }, { "epoch": 0.63, "grad_norm": 0.3844791255810692, "learning_rate": 1.2954999041575331e-05, "loss": 0.0636, "step": 5702 }, { "epoch": 0.63, "grad_norm": 0.29185180096050184, "learning_rate": 1.2948341346137513e-05, "loss": 0.0438, "step": 5703 }, { "epoch": 0.63, "grad_norm": 0.3191671012625628, "learning_rate": 1.2941684542921646e-05, "loss": 0.0612, "step": 5704 }, { "epoch": 0.63, "grad_norm": 0.38620789820024126, "learning_rate": 1.2935028632769982e-05, "loss": 0.0548, "step": 5705 }, { "epoch": 0.63, "grad_norm": 0.2670933027583946, "learning_rate": 1.2928373616524682e-05, "loss": 0.0269, "step": 5706 }, { "epoch": 0.63, "grad_norm": 0.35971463781304086, "learning_rate": 1.292171949502777e-05, "loss": 0.0667, "step": 5707 }, { "epoch": 0.63, "grad_norm": 0.29065074704059624, "learning_rate": 1.291506626912118e-05, "loss": 0.045, "step": 5708 }, { "epoch": 0.63, "grad_norm": 0.3391240710067539, "learning_rate": 1.2908413939646712e-05, "loss": 0.0522, "step": 5709 }, { "epoch": 0.63, "grad_norm": 0.2500654969249138, "learning_rate": 1.290176250744607e-05, "loss": 0.0347, "step": 5710 }, { "epoch": 0.63, "grad_norm": 0.2704660679526101, "learning_rate": 1.2895111973360828e-05, "loss": 0.0533, "step": 5711 }, { "epoch": 0.63, "grad_norm": 0.36630925193977665, "learning_rate": 1.2888462338232466e-05, "loss": 0.0466, "step": 5712 }, { "epoch": 0.63, "grad_norm": 0.18245515417702202, "learning_rate": 1.288181360290233e-05, "loss": 0.0226, "step": 5713 }, { "epoch": 0.63, "grad_norm": 0.2916379840689359, "learning_rate": 1.287516576821167e-05, "loss": 0.0355, "step": 5714 }, { "epoch": 0.63, "grad_norm": 0.26664493194061323, "learning_rate": 1.2868518835001603e-05, "loss": 0.0458, "step": 5715 }, { "epoch": 0.63, "grad_norm": 0.28679912698225796, "learning_rate": 1.2861872804113154e-05, "loss": 0.0409, "step": 5716 }, { "epoch": 0.63, "grad_norm": 0.3448219275472702, "learning_rate": 1.28552276763872e-05, "loss": 0.0701, "step": 5717 }, { "epoch": 0.63, "grad_norm": 0.3062918070069477, "learning_rate": 1.284858345266456e-05, "loss": 0.0458, "step": 5718 }, { "epoch": 0.63, "grad_norm": 0.33428865369586186, "learning_rate": 1.284194013378588e-05, "loss": 0.0528, "step": 5719 }, { "epoch": 0.63, "grad_norm": 0.4147825817407804, "learning_rate": 1.2835297720591729e-05, "loss": 0.0594, "step": 5720 }, { "epoch": 0.63, "grad_norm": 0.24612488625360576, "learning_rate": 1.2828656213922537e-05, "loss": 0.0316, "step": 5721 }, { "epoch": 0.63, "grad_norm": 0.3221885126506827, "learning_rate": 1.282201561461864e-05, "loss": 0.0589, "step": 5722 }, { "epoch": 0.63, "grad_norm": 0.39136553188970963, "learning_rate": 1.2815375923520244e-05, "loss": 0.0741, "step": 5723 }, { "epoch": 0.63, "grad_norm": 0.2846913270871041, "learning_rate": 1.2808737141467451e-05, "loss": 0.0454, "step": 5724 }, { "epoch": 0.63, "grad_norm": 0.2702520931420444, "learning_rate": 1.2802099269300237e-05, "loss": 0.041, "step": 5725 }, { "epoch": 0.63, "grad_norm": 0.3761652089258179, "learning_rate": 1.2795462307858478e-05, "loss": 0.0769, "step": 5726 }, { "epoch": 0.63, "grad_norm": 0.22729783921874094, "learning_rate": 1.2788826257981914e-05, "loss": 0.0333, "step": 5727 }, { "epoch": 0.63, "grad_norm": 0.28279548772995067, "learning_rate": 1.2782191120510196e-05, "loss": 0.0446, "step": 5728 }, { "epoch": 0.63, "grad_norm": 0.2769705047319417, "learning_rate": 1.277555689628283e-05, "loss": 0.0496, "step": 5729 }, { "epoch": 0.63, "grad_norm": 0.24887583228852966, "learning_rate": 1.2768923586139232e-05, "loss": 0.0461, "step": 5730 }, { "epoch": 0.63, "grad_norm": 0.37740056259823923, "learning_rate": 1.2762291190918685e-05, "loss": 0.0699, "step": 5731 }, { "epoch": 0.63, "grad_norm": 0.2696253018136561, "learning_rate": 1.275565971146037e-05, "loss": 0.0417, "step": 5732 }, { "epoch": 0.63, "grad_norm": 0.1836277646148132, "learning_rate": 1.2749029148603339e-05, "loss": 0.0502, "step": 5733 }, { "epoch": 0.63, "grad_norm": 0.272553253941592, "learning_rate": 1.2742399503186528e-05, "loss": 0.0443, "step": 5734 }, { "epoch": 0.63, "grad_norm": 0.24973518532124672, "learning_rate": 1.2735770776048781e-05, "loss": 0.0507, "step": 5735 }, { "epoch": 0.63, "grad_norm": 0.2735331776665544, "learning_rate": 1.2729142968028793e-05, "loss": 0.0427, "step": 5736 }, { "epoch": 0.63, "grad_norm": 0.31019006780176067, "learning_rate": 1.272251607996517e-05, "loss": 0.0501, "step": 5737 }, { "epoch": 0.63, "grad_norm": 0.2837000203753502, "learning_rate": 1.2715890112696379e-05, "loss": 0.0374, "step": 5738 }, { "epoch": 0.63, "grad_norm": 0.22991461327271748, "learning_rate": 1.2709265067060782e-05, "loss": 0.0484, "step": 5739 }, { "epoch": 0.63, "grad_norm": 0.2804481945102823, "learning_rate": 1.2702640943896625e-05, "loss": 0.0353, "step": 5740 }, { "epoch": 0.63, "grad_norm": 0.4540546516060464, "learning_rate": 1.2696017744042037e-05, "loss": 0.0405, "step": 5741 }, { "epoch": 0.63, "grad_norm": 0.3727401351083858, "learning_rate": 1.2689395468335027e-05, "loss": 0.055, "step": 5742 }, { "epoch": 0.63, "grad_norm": 0.2821227678084727, "learning_rate": 1.2682774117613485e-05, "loss": 0.0435, "step": 5743 }, { "epoch": 0.63, "grad_norm": 0.29552586078681403, "learning_rate": 1.2676153692715195e-05, "loss": 0.0382, "step": 5744 }, { "epoch": 0.63, "grad_norm": 0.24667691963920307, "learning_rate": 1.266953419447781e-05, "loss": 0.0382, "step": 5745 }, { "epoch": 0.63, "grad_norm": 0.3267198174567278, "learning_rate": 1.2662915623738874e-05, "loss": 0.0537, "step": 5746 }, { "epoch": 0.63, "grad_norm": 0.2558199437165409, "learning_rate": 1.2656297981335814e-05, "loss": 0.0499, "step": 5747 }, { "epoch": 0.63, "grad_norm": 0.35545495813463757, "learning_rate": 1.2649681268105933e-05, "loss": 0.0604, "step": 5748 }, { "epoch": 0.63, "grad_norm": 0.3266137379508782, "learning_rate": 1.2643065484886428e-05, "loss": 0.0587, "step": 5749 }, { "epoch": 0.63, "grad_norm": 0.3277081271039763, "learning_rate": 1.263645063251436e-05, "loss": 0.0524, "step": 5750 }, { "epoch": 0.63, "grad_norm": 0.28440048711395666, "learning_rate": 1.2629836711826688e-05, "loss": 0.0561, "step": 5751 }, { "epoch": 0.63, "grad_norm": 0.22416114033802687, "learning_rate": 1.2623223723660258e-05, "loss": 0.0437, "step": 5752 }, { "epoch": 0.63, "grad_norm": 0.2531515643226877, "learning_rate": 1.261661166885178e-05, "loss": 0.0424, "step": 5753 }, { "epoch": 0.63, "grad_norm": 0.3143136325193017, "learning_rate": 1.2610000548237851e-05, "loss": 0.0461, "step": 5754 }, { "epoch": 0.63, "grad_norm": 0.3286135986689357, "learning_rate": 1.2603390362654964e-05, "loss": 0.0553, "step": 5755 }, { "epoch": 0.63, "grad_norm": 0.33618499820669795, "learning_rate": 1.259678111293947e-05, "loss": 0.0577, "step": 5756 }, { "epoch": 0.63, "grad_norm": 0.3299252195412322, "learning_rate": 1.259017279992763e-05, "loss": 0.0519, "step": 5757 }, { "epoch": 0.63, "grad_norm": 0.3256650173030957, "learning_rate": 1.2583565424455552e-05, "loss": 0.0751, "step": 5758 }, { "epoch": 0.63, "grad_norm": 0.2653295191820363, "learning_rate": 1.2576958987359263e-05, "loss": 0.0512, "step": 5759 }, { "epoch": 0.63, "grad_norm": 0.20432404434109436, "learning_rate": 1.2570353489474637e-05, "loss": 0.0397, "step": 5760 }, { "epoch": 0.63, "grad_norm": 0.2987309466155845, "learning_rate": 1.2563748931637454e-05, "loss": 0.0625, "step": 5761 }, { "epoch": 0.63, "grad_norm": 0.30501896605775447, "learning_rate": 1.2557145314683364e-05, "loss": 0.0464, "step": 5762 }, { "epoch": 0.63, "grad_norm": 0.2846207358694666, "learning_rate": 1.2550542639447897e-05, "loss": 0.062, "step": 5763 }, { "epoch": 0.63, "grad_norm": 0.1936883824566227, "learning_rate": 1.254394090676647e-05, "loss": 0.0248, "step": 5764 }, { "epoch": 0.63, "grad_norm": 0.24202981553166694, "learning_rate": 1.253734011747438e-05, "loss": 0.0378, "step": 5765 }, { "epoch": 0.63, "grad_norm": 0.20140512147976442, "learning_rate": 1.2530740272406792e-05, "loss": 0.029, "step": 5766 }, { "epoch": 0.63, "grad_norm": 0.2325749974782116, "learning_rate": 1.2524141372398772e-05, "loss": 0.0442, "step": 5767 }, { "epoch": 0.63, "grad_norm": 0.340700932009995, "learning_rate": 1.2517543418285247e-05, "loss": 0.0464, "step": 5768 }, { "epoch": 0.63, "grad_norm": 0.31303451839285734, "learning_rate": 1.2510946410901038e-05, "loss": 0.0469, "step": 5769 }, { "epoch": 0.63, "grad_norm": 0.31230670195902593, "learning_rate": 1.2504350351080845e-05, "loss": 0.053, "step": 5770 }, { "epoch": 0.63, "grad_norm": 0.23782032803347783, "learning_rate": 1.2497755239659239e-05, "loss": 0.0257, "step": 5771 }, { "epoch": 0.63, "grad_norm": 0.3863448445064645, "learning_rate": 1.2491161077470682e-05, "loss": 0.0755, "step": 5772 }, { "epoch": 0.63, "grad_norm": 0.27114383004076587, "learning_rate": 1.2484567865349503e-05, "loss": 0.0518, "step": 5773 }, { "epoch": 0.63, "grad_norm": 0.24584206320461685, "learning_rate": 1.2477975604129929e-05, "loss": 0.0386, "step": 5774 }, { "epoch": 0.63, "grad_norm": 0.20498180852400374, "learning_rate": 1.2471384294646044e-05, "loss": 0.0309, "step": 5775 }, { "epoch": 0.63, "grad_norm": 0.2307205279665543, "learning_rate": 1.2464793937731831e-05, "loss": 0.049, "step": 5776 }, { "epoch": 0.63, "grad_norm": 0.3048937661783259, "learning_rate": 1.2458204534221144e-05, "loss": 0.0634, "step": 5777 }, { "epoch": 0.63, "grad_norm": 0.4014768613048012, "learning_rate": 1.2451616084947714e-05, "loss": 0.0769, "step": 5778 }, { "epoch": 0.63, "grad_norm": 0.25776815032970624, "learning_rate": 1.2445028590745159e-05, "loss": 0.0483, "step": 5779 }, { "epoch": 0.63, "grad_norm": 0.22807011009267175, "learning_rate": 1.243844205244697e-05, "loss": 0.0305, "step": 5780 }, { "epoch": 0.63, "grad_norm": 0.3265058669122508, "learning_rate": 1.2431856470886517e-05, "loss": 0.0677, "step": 5781 }, { "epoch": 0.63, "grad_norm": 0.30663722334025834, "learning_rate": 1.2425271846897053e-05, "loss": 0.0549, "step": 5782 }, { "epoch": 0.64, "grad_norm": 0.29927762201887903, "learning_rate": 1.2418688181311706e-05, "loss": 0.054, "step": 5783 }, { "epoch": 0.64, "grad_norm": 0.37099138152967415, "learning_rate": 1.2412105474963491e-05, "loss": 0.072, "step": 5784 }, { "epoch": 0.64, "grad_norm": 0.4296392933940413, "learning_rate": 1.2405523728685285e-05, "loss": 0.0914, "step": 5785 }, { "epoch": 0.64, "grad_norm": 0.23974581405716563, "learning_rate": 1.2398942943309855e-05, "loss": 0.0395, "step": 5786 }, { "epoch": 0.64, "grad_norm": 0.2947184385237971, "learning_rate": 1.2392363119669854e-05, "loss": 0.0537, "step": 5787 }, { "epoch": 0.64, "grad_norm": 0.2661452527920139, "learning_rate": 1.2385784258597796e-05, "loss": 0.0402, "step": 5788 }, { "epoch": 0.64, "grad_norm": 0.3350297951614678, "learning_rate": 1.2379206360926088e-05, "loss": 0.0609, "step": 5789 }, { "epoch": 0.64, "grad_norm": 0.3529668732450119, "learning_rate": 1.2372629427487e-05, "loss": 0.0728, "step": 5790 }, { "epoch": 0.64, "grad_norm": 0.29138840730669946, "learning_rate": 1.23660534591127e-05, "loss": 0.0421, "step": 5791 }, { "epoch": 0.64, "grad_norm": 0.2680121472865356, "learning_rate": 1.235947845663521e-05, "loss": 0.031, "step": 5792 }, { "epoch": 0.64, "grad_norm": 0.26456766901930184, "learning_rate": 1.235290442088645e-05, "loss": 0.0404, "step": 5793 }, { "epoch": 0.64, "grad_norm": 0.26997056348041215, "learning_rate": 1.2346331352698206e-05, "loss": 0.055, "step": 5794 }, { "epoch": 0.64, "grad_norm": 0.20719950514602206, "learning_rate": 1.2339759252902149e-05, "loss": 0.0445, "step": 5795 }, { "epoch": 0.64, "grad_norm": 0.25988945841695127, "learning_rate": 1.2333188122329824e-05, "loss": 0.0466, "step": 5796 }, { "epoch": 0.64, "grad_norm": 0.33331206621434734, "learning_rate": 1.232661796181265e-05, "loss": 0.0549, "step": 5797 }, { "epoch": 0.64, "grad_norm": 0.26637819036115395, "learning_rate": 1.2320048772181932e-05, "loss": 0.0514, "step": 5798 }, { "epoch": 0.64, "grad_norm": 0.31014116043935575, "learning_rate": 1.2313480554268842e-05, "loss": 0.0362, "step": 5799 }, { "epoch": 0.64, "grad_norm": 0.2613991308753473, "learning_rate": 1.2306913308904435e-05, "loss": 0.0352, "step": 5800 }, { "epoch": 0.64, "grad_norm": 0.32717414868794154, "learning_rate": 1.2300347036919642e-05, "loss": 0.0365, "step": 5801 }, { "epoch": 0.64, "grad_norm": 0.35574824485426265, "learning_rate": 1.2293781739145274e-05, "loss": 0.0603, "step": 5802 }, { "epoch": 0.64, "grad_norm": 0.23301050835302559, "learning_rate": 1.2287217416412003e-05, "loss": 0.0338, "step": 5803 }, { "epoch": 0.64, "grad_norm": 0.28191890909543665, "learning_rate": 1.2280654069550404e-05, "loss": 0.0513, "step": 5804 }, { "epoch": 0.64, "grad_norm": 0.34215926441422495, "learning_rate": 1.227409169939091e-05, "loss": 0.0442, "step": 5805 }, { "epoch": 0.64, "grad_norm": 0.30860399881075634, "learning_rate": 1.2267530306763837e-05, "loss": 0.0428, "step": 5806 }, { "epoch": 0.64, "grad_norm": 0.2699669343516614, "learning_rate": 1.2260969892499372e-05, "loss": 0.0347, "step": 5807 }, { "epoch": 0.64, "grad_norm": 0.18205267816903203, "learning_rate": 1.2254410457427581e-05, "loss": 0.0333, "step": 5808 }, { "epoch": 0.64, "grad_norm": 0.27495044673606595, "learning_rate": 1.2247852002378413e-05, "loss": 0.034, "step": 5809 }, { "epoch": 0.64, "grad_norm": 0.3350322080366671, "learning_rate": 1.2241294528181678e-05, "loss": 0.0525, "step": 5810 }, { "epoch": 0.64, "grad_norm": 0.24830204311852275, "learning_rate": 1.2234738035667077e-05, "loss": 0.0389, "step": 5811 }, { "epoch": 0.64, "grad_norm": 0.22728106398085093, "learning_rate": 1.2228182525664175e-05, "loss": 0.0242, "step": 5812 }, { "epoch": 0.64, "grad_norm": 0.28524541114138385, "learning_rate": 1.222162799900242e-05, "loss": 0.0402, "step": 5813 }, { "epoch": 0.64, "grad_norm": 0.2821618768289605, "learning_rate": 1.2215074456511136e-05, "loss": 0.0439, "step": 5814 }, { "epoch": 0.64, "grad_norm": 0.32805239918748236, "learning_rate": 1.2208521899019519e-05, "loss": 0.0475, "step": 5815 }, { "epoch": 0.64, "grad_norm": 0.2506211283615422, "learning_rate": 1.2201970327356639e-05, "loss": 0.0426, "step": 5816 }, { "epoch": 0.64, "grad_norm": 0.28078780343331805, "learning_rate": 1.2195419742351446e-05, "loss": 0.0538, "step": 5817 }, { "epoch": 0.64, "grad_norm": 0.3444429695361017, "learning_rate": 1.2188870144832758e-05, "loss": 0.055, "step": 5818 }, { "epoch": 0.64, "grad_norm": 0.4386728947424649, "learning_rate": 1.2182321535629283e-05, "loss": 0.0672, "step": 5819 }, { "epoch": 0.64, "grad_norm": 0.2577119978169022, "learning_rate": 1.217577391556958e-05, "loss": 0.0413, "step": 5820 }, { "epoch": 0.64, "grad_norm": 0.25646959583509726, "learning_rate": 1.2169227285482107e-05, "loss": 0.0338, "step": 5821 }, { "epoch": 0.64, "grad_norm": 0.27901527240100915, "learning_rate": 1.2162681646195187e-05, "loss": 0.0352, "step": 5822 }, { "epoch": 0.64, "grad_norm": 0.36635788067556735, "learning_rate": 1.2156136998537011e-05, "loss": 0.0544, "step": 5823 }, { "epoch": 0.64, "grad_norm": 0.25938737908662424, "learning_rate": 1.2149593343335658e-05, "loss": 0.0456, "step": 5824 }, { "epoch": 0.64, "grad_norm": 0.28311264960612514, "learning_rate": 1.2143050681419064e-05, "loss": 0.0484, "step": 5825 }, { "epoch": 0.64, "grad_norm": 0.41016900178977517, "learning_rate": 1.2136509013615063e-05, "loss": 0.0569, "step": 5826 }, { "epoch": 0.64, "grad_norm": 0.301530857600272, "learning_rate": 1.2129968340751334e-05, "loss": 0.0412, "step": 5827 }, { "epoch": 0.64, "grad_norm": 0.2512769719492511, "learning_rate": 1.2123428663655457e-05, "loss": 0.0476, "step": 5828 }, { "epoch": 0.64, "grad_norm": 0.3771855324707294, "learning_rate": 1.211688998315487e-05, "loss": 0.0526, "step": 5829 }, { "epoch": 0.64, "grad_norm": 0.3591906447862843, "learning_rate": 1.211035230007689e-05, "loss": 0.0486, "step": 5830 }, { "epoch": 0.64, "grad_norm": 0.22484123867453631, "learning_rate": 1.2103815615248707e-05, "loss": 0.0387, "step": 5831 }, { "epoch": 0.64, "grad_norm": 0.3046539116954684, "learning_rate": 1.209727992949739e-05, "loss": 0.0504, "step": 5832 }, { "epoch": 0.64, "grad_norm": 0.27224374133133045, "learning_rate": 1.2090745243649867e-05, "loss": 0.0315, "step": 5833 }, { "epoch": 0.64, "grad_norm": 0.33649721757495143, "learning_rate": 1.2084211558532958e-05, "loss": 0.0441, "step": 5834 }, { "epoch": 0.64, "grad_norm": 0.40559580287859104, "learning_rate": 1.2077678874973345e-05, "loss": 0.0722, "step": 5835 }, { "epoch": 0.64, "grad_norm": 0.2173574091762297, "learning_rate": 1.2071147193797578e-05, "loss": 0.0383, "step": 5836 }, { "epoch": 0.64, "grad_norm": 0.31055949443392256, "learning_rate": 1.2064616515832106e-05, "loss": 0.0498, "step": 5837 }, { "epoch": 0.64, "grad_norm": 0.2579892448541876, "learning_rate": 1.2058086841903211e-05, "loss": 0.04, "step": 5838 }, { "epoch": 0.64, "grad_norm": 0.3041065864907497, "learning_rate": 1.2051558172837085e-05, "loss": 0.0353, "step": 5839 }, { "epoch": 0.64, "grad_norm": 0.31064776814947015, "learning_rate": 1.204503050945978e-05, "loss": 0.0567, "step": 5840 }, { "epoch": 0.64, "grad_norm": 0.26751106702781635, "learning_rate": 1.2038503852597208e-05, "loss": 0.0316, "step": 5841 }, { "epoch": 0.64, "grad_norm": 0.20446986866347197, "learning_rate": 1.2031978203075172e-05, "loss": 0.0424, "step": 5842 }, { "epoch": 0.64, "grad_norm": 0.2280298596392424, "learning_rate": 1.2025453561719335e-05, "loss": 0.0316, "step": 5843 }, { "epoch": 0.64, "grad_norm": 0.2075055375710355, "learning_rate": 1.2018929929355241e-05, "loss": 0.0189, "step": 5844 }, { "epoch": 0.64, "grad_norm": 0.4836078228851798, "learning_rate": 1.2012407306808298e-05, "loss": 0.0836, "step": 5845 }, { "epoch": 0.64, "grad_norm": 0.29411732909416793, "learning_rate": 1.2005885694903796e-05, "loss": 0.0435, "step": 5846 }, { "epoch": 0.64, "grad_norm": 0.2879381997125005, "learning_rate": 1.1999365094466888e-05, "loss": 0.0382, "step": 5847 }, { "epoch": 0.64, "grad_norm": 0.2391479473494685, "learning_rate": 1.1992845506322607e-05, "loss": 0.0386, "step": 5848 }, { "epoch": 0.64, "grad_norm": 0.32471280200390734, "learning_rate": 1.1986326931295848e-05, "loss": 0.0578, "step": 5849 }, { "epoch": 0.64, "grad_norm": 0.2750064388518373, "learning_rate": 1.1979809370211392e-05, "loss": 0.044, "step": 5850 }, { "epoch": 0.64, "grad_norm": 0.3279516125291141, "learning_rate": 1.1973292823893873e-05, "loss": 0.0379, "step": 5851 }, { "epoch": 0.64, "grad_norm": 0.36044388908966557, "learning_rate": 1.196677729316782e-05, "loss": 0.064, "step": 5852 }, { "epoch": 0.64, "grad_norm": 0.3009939308774455, "learning_rate": 1.1960262778857606e-05, "loss": 0.0449, "step": 5853 }, { "epoch": 0.64, "grad_norm": 0.2874112520901466, "learning_rate": 1.1953749281787502e-05, "loss": 0.0498, "step": 5854 }, { "epoch": 0.64, "grad_norm": 0.32790016236794195, "learning_rate": 1.1947236802781625e-05, "loss": 0.0617, "step": 5855 }, { "epoch": 0.64, "grad_norm": 0.35839585971165094, "learning_rate": 1.194072534266399e-05, "loss": 0.05, "step": 5856 }, { "epoch": 0.64, "grad_norm": 0.3069293127146736, "learning_rate": 1.1934214902258467e-05, "loss": 0.0632, "step": 5857 }, { "epoch": 0.64, "grad_norm": 0.2107956727383603, "learning_rate": 1.1927705482388794e-05, "loss": 0.0316, "step": 5858 }, { "epoch": 0.64, "grad_norm": 0.3554874876344733, "learning_rate": 1.192119708387859e-05, "loss": 0.0515, "step": 5859 }, { "epoch": 0.64, "grad_norm": 0.2545166601094057, "learning_rate": 1.1914689707551337e-05, "loss": 0.0488, "step": 5860 }, { "epoch": 0.64, "grad_norm": 0.2803068639513982, "learning_rate": 1.1908183354230394e-05, "loss": 0.049, "step": 5861 }, { "epoch": 0.64, "grad_norm": 0.30098499481845514, "learning_rate": 1.1901678024738983e-05, "loss": 0.0367, "step": 5862 }, { "epoch": 0.64, "grad_norm": 0.39952902783196215, "learning_rate": 1.1895173719900206e-05, "loss": 0.0557, "step": 5863 }, { "epoch": 0.64, "grad_norm": 0.30311120483626397, "learning_rate": 1.1888670440537025e-05, "loss": 0.0346, "step": 5864 }, { "epoch": 0.64, "grad_norm": 0.37709947818455974, "learning_rate": 1.1882168187472281e-05, "loss": 0.0394, "step": 5865 }, { "epoch": 0.64, "grad_norm": 0.33292902576596056, "learning_rate": 1.1875666961528679e-05, "loss": 0.0477, "step": 5866 }, { "epoch": 0.64, "grad_norm": 0.26485830340163874, "learning_rate": 1.18691667635288e-05, "loss": 0.0447, "step": 5867 }, { "epoch": 0.64, "grad_norm": 0.3499069226343576, "learning_rate": 1.1862667594295086e-05, "loss": 0.0617, "step": 5868 }, { "epoch": 0.64, "grad_norm": 0.24081491864947885, "learning_rate": 1.1856169454649864e-05, "loss": 0.0483, "step": 5869 }, { "epoch": 0.64, "grad_norm": 0.25141724071104193, "learning_rate": 1.1849672345415306e-05, "loss": 0.0533, "step": 5870 }, { "epoch": 0.64, "grad_norm": 0.27222725202385795, "learning_rate": 1.1843176267413488e-05, "loss": 0.0428, "step": 5871 }, { "epoch": 0.64, "grad_norm": 0.3824310543217218, "learning_rate": 1.1836681221466308e-05, "loss": 0.0532, "step": 5872 }, { "epoch": 0.64, "grad_norm": 0.2893092545660855, "learning_rate": 1.1830187208395594e-05, "loss": 0.0474, "step": 5873 }, { "epoch": 0.65, "grad_norm": 0.28144731487803426, "learning_rate": 1.1823694229022995e-05, "loss": 0.0465, "step": 5874 }, { "epoch": 0.65, "grad_norm": 0.3432496287141752, "learning_rate": 1.1817202284170048e-05, "loss": 0.0497, "step": 5875 }, { "epoch": 0.65, "grad_norm": 0.24584147222968614, "learning_rate": 1.181071137465815e-05, "loss": 0.0409, "step": 5876 }, { "epoch": 0.65, "grad_norm": 0.2918134927166957, "learning_rate": 1.1804221501308586e-05, "loss": 0.0607, "step": 5877 }, { "epoch": 0.65, "grad_norm": 0.43358907611149267, "learning_rate": 1.1797732664942481e-05, "loss": 0.0827, "step": 5878 }, { "epoch": 0.65, "grad_norm": 0.22070516737271434, "learning_rate": 1.1791244866380862e-05, "loss": 0.0279, "step": 5879 }, { "epoch": 0.65, "grad_norm": 0.3062101610701623, "learning_rate": 1.1784758106444594e-05, "loss": 0.068, "step": 5880 }, { "epoch": 0.65, "grad_norm": 0.21821269395759207, "learning_rate": 1.1778272385954432e-05, "loss": 0.0278, "step": 5881 }, { "epoch": 0.65, "grad_norm": 0.2992721201259001, "learning_rate": 1.1771787705730983e-05, "loss": 0.0442, "step": 5882 }, { "epoch": 0.65, "grad_norm": 0.2628429380927485, "learning_rate": 1.1765304066594747e-05, "loss": 0.0524, "step": 5883 }, { "epoch": 0.65, "grad_norm": 0.3784453351369725, "learning_rate": 1.175882146936606e-05, "loss": 0.0522, "step": 5884 }, { "epoch": 0.65, "grad_norm": 0.3227230409300173, "learning_rate": 1.1752339914865154e-05, "loss": 0.0377, "step": 5885 }, { "epoch": 0.65, "grad_norm": 0.34260040654413293, "learning_rate": 1.1745859403912108e-05, "loss": 0.0507, "step": 5886 }, { "epoch": 0.65, "grad_norm": 0.25826616542034253, "learning_rate": 1.1739379937326889e-05, "loss": 0.0575, "step": 5887 }, { "epoch": 0.65, "grad_norm": 0.2738917256501338, "learning_rate": 1.1732901515929312e-05, "loss": 0.036, "step": 5888 }, { "epoch": 0.65, "grad_norm": 0.31425453217515, "learning_rate": 1.1726424140539067e-05, "loss": 0.042, "step": 5889 }, { "epoch": 0.65, "grad_norm": 0.2844722984827675, "learning_rate": 1.1719947811975732e-05, "loss": 0.0541, "step": 5890 }, { "epoch": 0.65, "grad_norm": 0.32665089592687435, "learning_rate": 1.1713472531058713e-05, "loss": 0.0517, "step": 5891 }, { "epoch": 0.65, "grad_norm": 0.25860551766994555, "learning_rate": 1.1706998298607325e-05, "loss": 0.0503, "step": 5892 }, { "epoch": 0.65, "grad_norm": 0.3909095871899017, "learning_rate": 1.170052511544071e-05, "loss": 0.0608, "step": 5893 }, { "epoch": 0.65, "grad_norm": 0.3291160965152386, "learning_rate": 1.1694052982377915e-05, "loss": 0.0615, "step": 5894 }, { "epoch": 0.65, "grad_norm": 0.2655292647005712, "learning_rate": 1.1687581900237824e-05, "loss": 0.0513, "step": 5895 }, { "epoch": 0.65, "grad_norm": 0.22471081474796067, "learning_rate": 1.1681111869839209e-05, "loss": 0.0364, "step": 5896 }, { "epoch": 0.65, "grad_norm": 0.24770364518430704, "learning_rate": 1.1674642892000696e-05, "loss": 0.0407, "step": 5897 }, { "epoch": 0.65, "grad_norm": 0.23060477816824915, "learning_rate": 1.166817496754078e-05, "loss": 0.0296, "step": 5898 }, { "epoch": 0.65, "grad_norm": 0.8201518310148757, "learning_rate": 1.1661708097277827e-05, "loss": 0.1434, "step": 5899 }, { "epoch": 0.65, "grad_norm": 0.26472483302983024, "learning_rate": 1.1655242282030068e-05, "loss": 0.054, "step": 5900 }, { "epoch": 0.65, "grad_norm": 0.2857093933018677, "learning_rate": 1.1648777522615613e-05, "loss": 0.0478, "step": 5901 }, { "epoch": 0.65, "grad_norm": 0.2881781220441794, "learning_rate": 1.1642313819852405e-05, "loss": 0.0618, "step": 5902 }, { "epoch": 0.65, "grad_norm": 0.23736697819741387, "learning_rate": 1.1635851174558282e-05, "loss": 0.0596, "step": 5903 }, { "epoch": 0.65, "grad_norm": 0.30040539133514066, "learning_rate": 1.1629389587550939e-05, "loss": 0.0538, "step": 5904 }, { "epoch": 0.65, "grad_norm": 0.20434464576292674, "learning_rate": 1.162292905964795e-05, "loss": 0.0352, "step": 5905 }, { "epoch": 0.65, "grad_norm": 0.36130137988934835, "learning_rate": 1.1616469591666725e-05, "loss": 0.0552, "step": 5906 }, { "epoch": 0.65, "grad_norm": 0.22679862510224041, "learning_rate": 1.1610011184424557e-05, "loss": 0.0382, "step": 5907 }, { "epoch": 0.65, "grad_norm": 0.23509873070106485, "learning_rate": 1.1603553838738635e-05, "loss": 0.0301, "step": 5908 }, { "epoch": 0.65, "grad_norm": 0.3098879006382944, "learning_rate": 1.1597097555425954e-05, "loss": 0.0521, "step": 5909 }, { "epoch": 0.65, "grad_norm": 0.24644970506449995, "learning_rate": 1.1590642335303417e-05, "loss": 0.0368, "step": 5910 }, { "epoch": 0.65, "grad_norm": 0.2397530859648159, "learning_rate": 1.1584188179187779e-05, "loss": 0.0302, "step": 5911 }, { "epoch": 0.65, "grad_norm": 0.2979598802067519, "learning_rate": 1.1577735087895664e-05, "loss": 0.0414, "step": 5912 }, { "epoch": 0.65, "grad_norm": 0.25457638557545514, "learning_rate": 1.1571283062243563e-05, "loss": 0.0603, "step": 5913 }, { "epoch": 0.65, "grad_norm": 0.2361072895653942, "learning_rate": 1.1564832103047818e-05, "loss": 0.0248, "step": 5914 }, { "epoch": 0.65, "grad_norm": 0.23804867447366063, "learning_rate": 1.155838221112465e-05, "loss": 0.0419, "step": 5915 }, { "epoch": 0.65, "grad_norm": 0.31250493522561706, "learning_rate": 1.1551933387290149e-05, "loss": 0.0471, "step": 5916 }, { "epoch": 0.65, "grad_norm": 0.3056899111607218, "learning_rate": 1.154548563236026e-05, "loss": 0.0528, "step": 5917 }, { "epoch": 0.65, "grad_norm": 0.29692255442364485, "learning_rate": 1.1539038947150783e-05, "loss": 0.0679, "step": 5918 }, { "epoch": 0.65, "grad_norm": 0.2418046124257207, "learning_rate": 1.1532593332477406e-05, "loss": 0.0301, "step": 5919 }, { "epoch": 0.65, "grad_norm": 0.34222985227632313, "learning_rate": 1.152614878915567e-05, "loss": 0.0537, "step": 5920 }, { "epoch": 0.65, "grad_norm": 0.26048600545293615, "learning_rate": 1.1519705318000983e-05, "loss": 0.0373, "step": 5921 }, { "epoch": 0.65, "grad_norm": 0.2517852519658182, "learning_rate": 1.1513262919828603e-05, "loss": 0.0383, "step": 5922 }, { "epoch": 0.65, "grad_norm": 0.26953854758293416, "learning_rate": 1.1506821595453674e-05, "loss": 0.0354, "step": 5923 }, { "epoch": 0.65, "grad_norm": 0.3077603809169237, "learning_rate": 1.1500381345691192e-05, "loss": 0.0527, "step": 5924 }, { "epoch": 0.65, "grad_norm": 0.2790589419498001, "learning_rate": 1.149394217135602e-05, "loss": 0.0486, "step": 5925 }, { "epoch": 0.65, "grad_norm": 0.37936917764007394, "learning_rate": 1.1487504073262886e-05, "loss": 0.0649, "step": 5926 }, { "epoch": 0.65, "grad_norm": 0.2314357433674794, "learning_rate": 1.1481067052226375e-05, "loss": 0.0354, "step": 5927 }, { "epoch": 0.65, "grad_norm": 0.25326601150862577, "learning_rate": 1.1474631109060957e-05, "loss": 0.0411, "step": 5928 }, { "epoch": 0.65, "grad_norm": 0.2530355636928128, "learning_rate": 1.1468196244580927e-05, "loss": 0.0579, "step": 5929 }, { "epoch": 0.65, "grad_norm": 0.32552842957852324, "learning_rate": 1.1461762459600476e-05, "loss": 0.0492, "step": 5930 }, { "epoch": 0.65, "grad_norm": 0.25920532145792047, "learning_rate": 1.145532975493365e-05, "loss": 0.0352, "step": 5931 }, { "epoch": 0.65, "grad_norm": 0.3155234938457192, "learning_rate": 1.1448898131394364e-05, "loss": 0.0376, "step": 5932 }, { "epoch": 0.65, "grad_norm": 0.23968142163553205, "learning_rate": 1.1442467589796374e-05, "loss": 0.0279, "step": 5933 }, { "epoch": 0.65, "grad_norm": 0.3115155807987206, "learning_rate": 1.1436038130953317e-05, "loss": 0.0614, "step": 5934 }, { "epoch": 0.65, "grad_norm": 0.236974054148654, "learning_rate": 1.1429609755678697e-05, "loss": 0.0368, "step": 5935 }, { "epoch": 0.65, "grad_norm": 0.23465142638232006, "learning_rate": 1.142318246478588e-05, "loss": 0.0358, "step": 5936 }, { "epoch": 0.65, "grad_norm": 0.33548327721464166, "learning_rate": 1.1416756259088073e-05, "loss": 0.0664, "step": 5937 }, { "epoch": 0.65, "grad_norm": 0.206655124667588, "learning_rate": 1.1410331139398365e-05, "loss": 0.0365, "step": 5938 }, { "epoch": 0.65, "grad_norm": 0.2631028853113967, "learning_rate": 1.1403907106529711e-05, "loss": 0.04, "step": 5939 }, { "epoch": 0.65, "grad_norm": 0.3378557176143698, "learning_rate": 1.1397484161294924e-05, "loss": 0.048, "step": 5940 }, { "epoch": 0.65, "grad_norm": 0.1746179446308563, "learning_rate": 1.1391062304506657e-05, "loss": 0.0292, "step": 5941 }, { "epoch": 0.65, "grad_norm": 0.31362214080142764, "learning_rate": 1.138464153697747e-05, "loss": 0.0537, "step": 5942 }, { "epoch": 0.65, "grad_norm": 0.3240398809803086, "learning_rate": 1.1378221859519756e-05, "loss": 0.0433, "step": 5943 }, { "epoch": 0.65, "grad_norm": 0.3175915427851454, "learning_rate": 1.1371803272945759e-05, "loss": 0.0608, "step": 5944 }, { "epoch": 0.65, "grad_norm": 0.42304530121430417, "learning_rate": 1.1365385778067613e-05, "loss": 0.0779, "step": 5945 }, { "epoch": 0.65, "grad_norm": 0.299659693942527, "learning_rate": 1.1358969375697297e-05, "loss": 0.0344, "step": 5946 }, { "epoch": 0.65, "grad_norm": 0.2857525873801302, "learning_rate": 1.1352554066646666e-05, "loss": 0.0418, "step": 5947 }, { "epoch": 0.65, "grad_norm": 0.3107186088511816, "learning_rate": 1.1346139851727412e-05, "loss": 0.0427, "step": 5948 }, { "epoch": 0.65, "grad_norm": 0.2548818917093416, "learning_rate": 1.1339726731751108e-05, "loss": 0.0375, "step": 5949 }, { "epoch": 0.65, "grad_norm": 0.3206887013099308, "learning_rate": 1.1333314707529188e-05, "loss": 0.0411, "step": 5950 }, { "epoch": 0.65, "grad_norm": 0.24924795463950997, "learning_rate": 1.1326903779872951e-05, "loss": 0.0296, "step": 5951 }, { "epoch": 0.65, "grad_norm": 0.22906372980721823, "learning_rate": 1.1320493949593528e-05, "loss": 0.0368, "step": 5952 }, { "epoch": 0.65, "grad_norm": 0.21703269951146137, "learning_rate": 1.1314085217501947e-05, "loss": 0.0302, "step": 5953 }, { "epoch": 0.65, "grad_norm": 0.21923790742634122, "learning_rate": 1.1307677584409076e-05, "loss": 0.0422, "step": 5954 }, { "epoch": 0.65, "grad_norm": 0.2632336825406231, "learning_rate": 1.1301271051125657e-05, "loss": 0.0432, "step": 5955 }, { "epoch": 0.65, "grad_norm": 0.26538511271081955, "learning_rate": 1.1294865618462294e-05, "loss": 0.0292, "step": 5956 }, { "epoch": 0.65, "grad_norm": 0.2897215238464507, "learning_rate": 1.1288461287229423e-05, "loss": 0.032, "step": 5957 }, { "epoch": 0.65, "grad_norm": 0.26655991896243997, "learning_rate": 1.128205805823737e-05, "loss": 0.0409, "step": 5958 }, { "epoch": 0.65, "grad_norm": 0.28997380713858717, "learning_rate": 1.1275655932296327e-05, "loss": 0.049, "step": 5959 }, { "epoch": 0.65, "grad_norm": 0.2914115465516345, "learning_rate": 1.1269254910216316e-05, "loss": 0.0416, "step": 5960 }, { "epoch": 0.65, "grad_norm": 0.2507637249536318, "learning_rate": 1.1262854992807244e-05, "loss": 0.0453, "step": 5961 }, { "epoch": 0.65, "grad_norm": 0.4887002596703626, "learning_rate": 1.1256456180878867e-05, "loss": 0.0761, "step": 5962 }, { "epoch": 0.65, "grad_norm": 0.3180842702906934, "learning_rate": 1.1250058475240814e-05, "loss": 0.0688, "step": 5963 }, { "epoch": 0.65, "grad_norm": 0.3189294426842358, "learning_rate": 1.1243661876702552e-05, "loss": 0.0575, "step": 5964 }, { "epoch": 0.66, "grad_norm": 0.26943351521617026, "learning_rate": 1.1237266386073425e-05, "loss": 0.069, "step": 5965 }, { "epoch": 0.66, "grad_norm": 0.24653087123010536, "learning_rate": 1.1230872004162631e-05, "loss": 0.0326, "step": 5966 }, { "epoch": 0.66, "grad_norm": 0.3085977336771278, "learning_rate": 1.1224478731779238e-05, "loss": 0.0452, "step": 5967 }, { "epoch": 0.66, "grad_norm": 0.22599576078876168, "learning_rate": 1.1218086569732152e-05, "loss": 0.0378, "step": 5968 }, { "epoch": 0.66, "grad_norm": 0.20021666436190477, "learning_rate": 1.1211695518830155e-05, "loss": 0.0433, "step": 5969 }, { "epoch": 0.66, "grad_norm": 0.23652433494441713, "learning_rate": 1.1205305579881883e-05, "loss": 0.0379, "step": 5970 }, { "epoch": 0.66, "grad_norm": 0.34138950594906714, "learning_rate": 1.1198916753695848e-05, "loss": 0.0427, "step": 5971 }, { "epoch": 0.66, "grad_norm": 0.20129466707321142, "learning_rate": 1.1192529041080382e-05, "loss": 0.0243, "step": 5972 }, { "epoch": 0.66, "grad_norm": 0.26716980791912504, "learning_rate": 1.1186142442843715e-05, "loss": 0.0383, "step": 5973 }, { "epoch": 0.66, "grad_norm": 0.3114285937541426, "learning_rate": 1.1179756959793918e-05, "loss": 0.0418, "step": 5974 }, { "epoch": 0.66, "grad_norm": 0.3405408287167941, "learning_rate": 1.1173372592738932e-05, "loss": 0.053, "step": 5975 }, { "epoch": 0.66, "grad_norm": 0.3566482593266611, "learning_rate": 1.1166989342486524e-05, "loss": 0.0669, "step": 5976 }, { "epoch": 0.66, "grad_norm": 0.2612097616099329, "learning_rate": 1.116060720984437e-05, "loss": 0.0287, "step": 5977 }, { "epoch": 0.66, "grad_norm": 0.28235227431584947, "learning_rate": 1.1154226195619979e-05, "loss": 0.0393, "step": 5978 }, { "epoch": 0.66, "grad_norm": 0.2656755819965642, "learning_rate": 1.1147846300620702e-05, "loss": 0.0445, "step": 5979 }, { "epoch": 0.66, "grad_norm": 0.33628503432571477, "learning_rate": 1.1141467525653773e-05, "loss": 0.0394, "step": 5980 }, { "epoch": 0.66, "grad_norm": 0.24418922703485943, "learning_rate": 1.1135089871526277e-05, "loss": 0.0246, "step": 5981 }, { "epoch": 0.66, "grad_norm": 0.20344604283808893, "learning_rate": 1.1128713339045162e-05, "loss": 0.0366, "step": 5982 }, { "epoch": 0.66, "grad_norm": 0.19663259705807523, "learning_rate": 1.112233792901722e-05, "loss": 0.0266, "step": 5983 }, { "epoch": 0.66, "grad_norm": 0.503668174392788, "learning_rate": 1.1115963642249107e-05, "loss": 0.0832, "step": 5984 }, { "epoch": 0.66, "grad_norm": 0.28452399458791766, "learning_rate": 1.1109590479547348e-05, "loss": 0.0445, "step": 5985 }, { "epoch": 0.66, "grad_norm": 0.20611582206201798, "learning_rate": 1.110321844171832e-05, "loss": 0.0483, "step": 5986 }, { "epoch": 0.66, "grad_norm": 0.23977643071619714, "learning_rate": 1.1096847529568242e-05, "loss": 0.0397, "step": 5987 }, { "epoch": 0.66, "grad_norm": 0.35746460539326547, "learning_rate": 1.1090477743903212e-05, "loss": 0.0682, "step": 5988 }, { "epoch": 0.66, "grad_norm": 0.25062887250614924, "learning_rate": 1.1084109085529171e-05, "loss": 0.0277, "step": 5989 }, { "epoch": 0.66, "grad_norm": 0.27691535369318027, "learning_rate": 1.1077741555251938e-05, "loss": 0.0547, "step": 5990 }, { "epoch": 0.66, "grad_norm": 0.2473799382507669, "learning_rate": 1.1071375153877156e-05, "loss": 0.0487, "step": 5991 }, { "epoch": 0.66, "grad_norm": 0.25727371791571674, "learning_rate": 1.1065009882210352e-05, "loss": 0.0435, "step": 5992 }, { "epoch": 0.66, "grad_norm": 0.24937420244973743, "learning_rate": 1.1058645741056892e-05, "loss": 0.0361, "step": 5993 }, { "epoch": 0.66, "grad_norm": 0.25467790573865035, "learning_rate": 1.1052282731222035e-05, "loss": 0.0456, "step": 5994 }, { "epoch": 0.66, "grad_norm": 0.22825345054830226, "learning_rate": 1.1045920853510847e-05, "loss": 0.0405, "step": 5995 }, { "epoch": 0.66, "grad_norm": 0.22472557710842322, "learning_rate": 1.1039560108728277e-05, "loss": 0.0368, "step": 5996 }, { "epoch": 0.66, "grad_norm": 0.3134589383408793, "learning_rate": 1.1033200497679138e-05, "loss": 0.0486, "step": 5997 }, { "epoch": 0.66, "grad_norm": 0.2563658934872525, "learning_rate": 1.1026842021168088e-05, "loss": 0.0357, "step": 5998 }, { "epoch": 0.66, "grad_norm": 0.3642069599562277, "learning_rate": 1.1020484679999632e-05, "loss": 0.0567, "step": 5999 }, { "epoch": 0.66, "grad_norm": 0.27108506125561893, "learning_rate": 1.101412847497815e-05, "loss": 0.0503, "step": 6000 }, { "epoch": 0.66, "grad_norm": 0.3811401115016491, "learning_rate": 1.1007773406907866e-05, "loss": 0.0681, "step": 6001 }, { "epoch": 0.66, "grad_norm": 0.4295941858403881, "learning_rate": 1.100141947659288e-05, "loss": 0.059, "step": 6002 }, { "epoch": 0.66, "grad_norm": 0.26865020440489834, "learning_rate": 1.0995066684837113e-05, "loss": 0.0453, "step": 6003 }, { "epoch": 0.66, "grad_norm": 0.24963255044867685, "learning_rate": 1.0988715032444369e-05, "loss": 0.039, "step": 6004 }, { "epoch": 0.66, "grad_norm": 0.2642275570891078, "learning_rate": 1.09823645202183e-05, "loss": 0.0367, "step": 6005 }, { "epoch": 0.66, "grad_norm": 0.1542661189559467, "learning_rate": 1.0976015148962427e-05, "loss": 0.0245, "step": 6006 }, { "epoch": 0.66, "grad_norm": 0.37846384078067175, "learning_rate": 1.0969666919480093e-05, "loss": 0.0733, "step": 6007 }, { "epoch": 0.66, "grad_norm": 0.24245947517653435, "learning_rate": 1.0963319832574528e-05, "loss": 0.0385, "step": 6008 }, { "epoch": 0.66, "grad_norm": 0.24390120401484094, "learning_rate": 1.0956973889048807e-05, "loss": 0.0316, "step": 6009 }, { "epoch": 0.66, "grad_norm": 0.23976926050695865, "learning_rate": 1.0950629089705857e-05, "loss": 0.0401, "step": 6010 }, { "epoch": 0.66, "grad_norm": 0.2545572298770189, "learning_rate": 1.0944285435348467e-05, "loss": 0.0394, "step": 6011 }, { "epoch": 0.66, "grad_norm": 0.21670667816842418, "learning_rate": 1.0937942926779279e-05, "loss": 0.0386, "step": 6012 }, { "epoch": 0.66, "grad_norm": 0.27973334346969486, "learning_rate": 1.0931601564800793e-05, "loss": 0.0548, "step": 6013 }, { "epoch": 0.66, "grad_norm": 0.19799726605996337, "learning_rate": 1.0925261350215344e-05, "loss": 0.0252, "step": 6014 }, { "epoch": 0.66, "grad_norm": 0.27526029925432594, "learning_rate": 1.0918922283825147e-05, "loss": 0.0596, "step": 6015 }, { "epoch": 0.66, "grad_norm": 0.25491240757095157, "learning_rate": 1.091258436643226e-05, "loss": 0.0428, "step": 6016 }, { "epoch": 0.66, "grad_norm": 0.2977992903070403, "learning_rate": 1.0906247598838608e-05, "loss": 0.0334, "step": 6017 }, { "epoch": 0.66, "grad_norm": 0.26489547516956985, "learning_rate": 1.0899911981845946e-05, "loss": 0.0625, "step": 6018 }, { "epoch": 0.66, "grad_norm": 0.25002376622243755, "learning_rate": 1.08935775162559e-05, "loss": 0.0466, "step": 6019 }, { "epoch": 0.66, "grad_norm": 0.22687852950017315, "learning_rate": 1.0887244202869951e-05, "loss": 0.037, "step": 6020 }, { "epoch": 0.66, "grad_norm": 0.24401706620678856, "learning_rate": 1.0880912042489438e-05, "loss": 0.0295, "step": 6021 }, { "epoch": 0.66, "grad_norm": 0.28970854965652504, "learning_rate": 1.0874581035915534e-05, "loss": 0.0468, "step": 6022 }, { "epoch": 0.66, "grad_norm": 0.2742529019121395, "learning_rate": 1.0868251183949288e-05, "loss": 0.0442, "step": 6023 }, { "epoch": 0.66, "grad_norm": 0.2666533423363255, "learning_rate": 1.0861922487391588e-05, "loss": 0.0357, "step": 6024 }, { "epoch": 0.66, "grad_norm": 0.23264806624161652, "learning_rate": 1.0855594947043198e-05, "loss": 0.0465, "step": 6025 }, { "epoch": 0.66, "grad_norm": 0.30432607537586237, "learning_rate": 1.0849268563704696e-05, "loss": 0.0422, "step": 6026 }, { "epoch": 0.66, "grad_norm": 0.29694109733006746, "learning_rate": 1.0842943338176544e-05, "loss": 0.0496, "step": 6027 }, { "epoch": 0.66, "grad_norm": 0.31908391637782896, "learning_rate": 1.0836619271259072e-05, "loss": 0.0554, "step": 6028 }, { "epoch": 0.66, "grad_norm": 0.2911529217143191, "learning_rate": 1.0830296363752417e-05, "loss": 0.0437, "step": 6029 }, { "epoch": 0.66, "grad_norm": 0.22858501261181086, "learning_rate": 1.0823974616456607e-05, "loss": 0.0363, "step": 6030 }, { "epoch": 0.66, "grad_norm": 0.2868621696146127, "learning_rate": 1.0817654030171506e-05, "loss": 0.0447, "step": 6031 }, { "epoch": 0.66, "grad_norm": 0.30021178846248325, "learning_rate": 1.0811334605696837e-05, "loss": 0.0456, "step": 6032 }, { "epoch": 0.66, "grad_norm": 0.2845580999703231, "learning_rate": 1.0805016343832184e-05, "loss": 0.0345, "step": 6033 }, { "epoch": 0.66, "grad_norm": 0.2239709155666085, "learning_rate": 1.0798699245376959e-05, "loss": 0.032, "step": 6034 }, { "epoch": 0.66, "grad_norm": 0.2709629347851086, "learning_rate": 1.079238331113045e-05, "loss": 0.0475, "step": 6035 }, { "epoch": 0.66, "grad_norm": 0.2776401037867901, "learning_rate": 1.078606854189179e-05, "loss": 0.0451, "step": 6036 }, { "epoch": 0.66, "grad_norm": 0.2825026696003045, "learning_rate": 1.0779754938459975e-05, "loss": 0.0372, "step": 6037 }, { "epoch": 0.66, "grad_norm": 0.3195655067566507, "learning_rate": 1.0773442501633822e-05, "loss": 0.0563, "step": 6038 }, { "epoch": 0.66, "grad_norm": 0.31929603817360847, "learning_rate": 1.0767131232212036e-05, "loss": 0.058, "step": 6039 }, { "epoch": 0.66, "grad_norm": 0.3237118147456888, "learning_rate": 1.0760821130993157e-05, "loss": 0.0812, "step": 6040 }, { "epoch": 0.66, "grad_norm": 0.3474861436077829, "learning_rate": 1.0754512198775586e-05, "loss": 0.0559, "step": 6041 }, { "epoch": 0.66, "grad_norm": 0.22026692385715754, "learning_rate": 1.0748204436357562e-05, "loss": 0.0435, "step": 6042 }, { "epoch": 0.66, "grad_norm": 0.3939665539470694, "learning_rate": 1.0741897844537181e-05, "loss": 0.0638, "step": 6043 }, { "epoch": 0.66, "grad_norm": 0.2867646256003692, "learning_rate": 1.0735592424112404e-05, "loss": 0.0498, "step": 6044 }, { "epoch": 0.66, "grad_norm": 0.26499093334864254, "learning_rate": 1.0729288175881031e-05, "loss": 0.0531, "step": 6045 }, { "epoch": 0.66, "grad_norm": 0.2618291109752536, "learning_rate": 1.0722985100640717e-05, "loss": 0.0595, "step": 6046 }, { "epoch": 0.66, "grad_norm": 0.37681371453083634, "learning_rate": 1.0716683199188966e-05, "loss": 0.1102, "step": 6047 }, { "epoch": 0.66, "grad_norm": 0.47743476711689026, "learning_rate": 1.0710382472323145e-05, "loss": 0.1082, "step": 6048 }, { "epoch": 0.66, "grad_norm": 0.20858470632829204, "learning_rate": 1.0704082920840448e-05, "loss": 0.0331, "step": 6049 }, { "epoch": 0.66, "grad_norm": 0.308109869827298, "learning_rate": 1.0697784545537943e-05, "loss": 0.0586, "step": 6050 }, { "epoch": 0.66, "grad_norm": 0.3265372303033474, "learning_rate": 1.0691487347212541e-05, "loss": 0.0468, "step": 6051 }, { "epoch": 0.66, "grad_norm": 0.2388340993376687, "learning_rate": 1.0685191326661015e-05, "loss": 0.0573, "step": 6052 }, { "epoch": 0.66, "grad_norm": 0.37637518026499006, "learning_rate": 1.067889648467996e-05, "loss": 0.0681, "step": 6053 }, { "epoch": 0.66, "grad_norm": 0.22860898450148412, "learning_rate": 1.0672602822065845e-05, "loss": 0.0334, "step": 6054 }, { "epoch": 0.66, "grad_norm": 0.20897343865416104, "learning_rate": 1.0666310339614996e-05, "loss": 0.0329, "step": 6055 }, { "epoch": 0.67, "grad_norm": 0.22447446835167412, "learning_rate": 1.0660019038123577e-05, "loss": 0.0359, "step": 6056 }, { "epoch": 0.67, "grad_norm": 0.24142589334856104, "learning_rate": 1.0653728918387593e-05, "loss": 0.0321, "step": 6057 }, { "epoch": 0.67, "grad_norm": 0.331008840441416, "learning_rate": 1.0647439981202918e-05, "loss": 0.0498, "step": 6058 }, { "epoch": 0.67, "grad_norm": 0.24544591252320738, "learning_rate": 1.0641152227365272e-05, "loss": 0.0437, "step": 6059 }, { "epoch": 0.67, "grad_norm": 0.27342036057298125, "learning_rate": 1.0634865657670227e-05, "loss": 0.0658, "step": 6060 }, { "epoch": 0.67, "grad_norm": 0.2538071952684424, "learning_rate": 1.0628580272913184e-05, "loss": 0.0491, "step": 6061 }, { "epoch": 0.67, "grad_norm": 0.21203572872213403, "learning_rate": 1.0622296073889417e-05, "loss": 0.0415, "step": 6062 }, { "epoch": 0.67, "grad_norm": 0.20325100181832403, "learning_rate": 1.0616013061394063e-05, "loss": 0.0377, "step": 6063 }, { "epoch": 0.67, "grad_norm": 0.2242427487439591, "learning_rate": 1.0609731236222069e-05, "loss": 0.0393, "step": 6064 }, { "epoch": 0.67, "grad_norm": 0.22696259129228208, "learning_rate": 1.0603450599168257e-05, "loss": 0.0434, "step": 6065 }, { "epoch": 0.67, "grad_norm": 0.23051309967054032, "learning_rate": 1.0597171151027297e-05, "loss": 0.0438, "step": 6066 }, { "epoch": 0.67, "grad_norm": 0.29261640341596007, "learning_rate": 1.059089289259371e-05, "loss": 0.0451, "step": 6067 }, { "epoch": 0.67, "grad_norm": 0.2515033433200905, "learning_rate": 1.058461582466185e-05, "loss": 0.0387, "step": 6068 }, { "epoch": 0.67, "grad_norm": 0.28326375904149376, "learning_rate": 1.0578339948025943e-05, "loss": 0.0507, "step": 6069 }, { "epoch": 0.67, "grad_norm": 0.2577434360591503, "learning_rate": 1.0572065263480046e-05, "loss": 0.0349, "step": 6070 }, { "epoch": 0.67, "grad_norm": 0.2781431872604126, "learning_rate": 1.0565791771818082e-05, "loss": 0.0369, "step": 6071 }, { "epoch": 0.67, "grad_norm": 0.24526459414449023, "learning_rate": 1.0559519473833815e-05, "loss": 0.0228, "step": 6072 }, { "epoch": 0.67, "grad_norm": 0.31440816285347983, "learning_rate": 1.0553248370320845e-05, "loss": 0.0407, "step": 6073 }, { "epoch": 0.67, "grad_norm": 0.278018024316525, "learning_rate": 1.0546978462072642e-05, "loss": 0.0474, "step": 6074 }, { "epoch": 0.67, "grad_norm": 0.21992385483929613, "learning_rate": 1.0540709749882512e-05, "loss": 0.0425, "step": 6075 }, { "epoch": 0.67, "grad_norm": 0.30762405387844527, "learning_rate": 1.0534442234543623e-05, "loss": 0.0633, "step": 6076 }, { "epoch": 0.67, "grad_norm": 0.28382732398594485, "learning_rate": 1.0528175916848969e-05, "loss": 0.048, "step": 6077 }, { "epoch": 0.67, "grad_norm": 0.3165438906028394, "learning_rate": 1.0521910797591408e-05, "loss": 0.0459, "step": 6078 }, { "epoch": 0.67, "grad_norm": 0.3586557696197077, "learning_rate": 1.0515646877563646e-05, "loss": 0.0686, "step": 6079 }, { "epoch": 0.67, "grad_norm": 0.19745539045230392, "learning_rate": 1.0509384157558236e-05, "loss": 0.0378, "step": 6080 }, { "epoch": 0.67, "grad_norm": 0.2975245447211584, "learning_rate": 1.050312263836758e-05, "loss": 0.0578, "step": 6081 }, { "epoch": 0.67, "grad_norm": 0.2979477773885956, "learning_rate": 1.0496862320783926e-05, "loss": 0.0459, "step": 6082 }, { "epoch": 0.67, "grad_norm": 0.27621262882293707, "learning_rate": 1.0490603205599373e-05, "loss": 0.0502, "step": 6083 }, { "epoch": 0.67, "grad_norm": 0.2763698025097821, "learning_rate": 1.0484345293605853e-05, "loss": 0.039, "step": 6084 }, { "epoch": 0.67, "grad_norm": 0.27465241150577735, "learning_rate": 1.0478088585595167e-05, "loss": 0.0322, "step": 6085 }, { "epoch": 0.67, "grad_norm": 0.2622917365245004, "learning_rate": 1.0471833082358954e-05, "loss": 0.0411, "step": 6086 }, { "epoch": 0.67, "grad_norm": 0.3270997082302538, "learning_rate": 1.0465578784688705e-05, "loss": 0.0592, "step": 6087 }, { "epoch": 0.67, "grad_norm": 0.2739984480542414, "learning_rate": 1.0459325693375746e-05, "loss": 0.0405, "step": 6088 }, { "epoch": 0.67, "grad_norm": 0.3606096909379189, "learning_rate": 1.0453073809211262e-05, "loss": 0.0406, "step": 6089 }, { "epoch": 0.67, "grad_norm": 0.24210470076286697, "learning_rate": 1.0446823132986283e-05, "loss": 0.0426, "step": 6090 }, { "epoch": 0.67, "grad_norm": 0.31585836658982, "learning_rate": 1.0440573665491693e-05, "loss": 0.0435, "step": 6091 }, { "epoch": 0.67, "grad_norm": 0.30742888969745763, "learning_rate": 1.0434325407518204e-05, "loss": 0.0394, "step": 6092 }, { "epoch": 0.67, "grad_norm": 0.26774327548582566, "learning_rate": 1.0428078359856389e-05, "loss": 0.03, "step": 6093 }, { "epoch": 0.67, "grad_norm": 0.2887744241350912, "learning_rate": 1.0421832523296665e-05, "loss": 0.0407, "step": 6094 }, { "epoch": 0.67, "grad_norm": 0.22982731151598934, "learning_rate": 1.0415587898629307e-05, "loss": 0.0446, "step": 6095 }, { "epoch": 0.67, "grad_norm": 0.3673355940185556, "learning_rate": 1.04093444866444e-05, "loss": 0.0547, "step": 6096 }, { "epoch": 0.67, "grad_norm": 0.2920379220109524, "learning_rate": 1.0403102288131927e-05, "loss": 0.0337, "step": 6097 }, { "epoch": 0.67, "grad_norm": 0.32639057571006813, "learning_rate": 1.0396861303881691e-05, "loss": 0.0383, "step": 6098 }, { "epoch": 0.67, "grad_norm": 0.2881678707186084, "learning_rate": 1.0390621534683327e-05, "loss": 0.0515, "step": 6099 }, { "epoch": 0.67, "grad_norm": 0.2887769783925161, "learning_rate": 1.0384382981326336e-05, "loss": 0.0371, "step": 6100 }, { "epoch": 0.67, "grad_norm": 0.3894467321730163, "learning_rate": 1.0378145644600063e-05, "loss": 0.0632, "step": 6101 }, { "epoch": 0.67, "grad_norm": 0.2946381245992665, "learning_rate": 1.0371909525293709e-05, "loss": 0.0619, "step": 6102 }, { "epoch": 0.67, "grad_norm": 0.26255283959225645, "learning_rate": 1.0365674624196282e-05, "loss": 0.0343, "step": 6103 }, { "epoch": 0.67, "grad_norm": 0.2564118368550188, "learning_rate": 1.0359440942096682e-05, "loss": 0.0396, "step": 6104 }, { "epoch": 0.67, "grad_norm": 0.2441874192215508, "learning_rate": 1.0353208479783627e-05, "loss": 0.0585, "step": 6105 }, { "epoch": 0.67, "grad_norm": 0.33642832806291767, "learning_rate": 1.0346977238045699e-05, "loss": 0.0324, "step": 6106 }, { "epoch": 0.67, "grad_norm": 0.3199003056298752, "learning_rate": 1.03407472176713e-05, "loss": 0.0482, "step": 6107 }, { "epoch": 0.67, "grad_norm": 0.31635107336373197, "learning_rate": 1.0334518419448703e-05, "loss": 0.047, "step": 6108 }, { "epoch": 0.67, "grad_norm": 0.19773847731694738, "learning_rate": 1.0328290844166013e-05, "loss": 0.0408, "step": 6109 }, { "epoch": 0.67, "grad_norm": 0.2414813205818314, "learning_rate": 1.0322064492611195e-05, "loss": 0.0353, "step": 6110 }, { "epoch": 0.67, "grad_norm": 0.2547668690257741, "learning_rate": 1.0315839365572024e-05, "loss": 0.0389, "step": 6111 }, { "epoch": 0.67, "grad_norm": 0.28245965116701477, "learning_rate": 1.0309615463836162e-05, "loss": 0.0278, "step": 6112 }, { "epoch": 0.67, "grad_norm": 0.34706402817480203, "learning_rate": 1.0303392788191091e-05, "loss": 0.0493, "step": 6113 }, { "epoch": 0.67, "grad_norm": 0.28962226744306535, "learning_rate": 1.0297171339424148e-05, "loss": 0.0601, "step": 6114 }, { "epoch": 0.67, "grad_norm": 0.3091420240469378, "learning_rate": 1.029095111832251e-05, "loss": 0.0483, "step": 6115 }, { "epoch": 0.67, "grad_norm": 0.20687722828693572, "learning_rate": 1.0284732125673198e-05, "loss": 0.0348, "step": 6116 }, { "epoch": 0.67, "grad_norm": 0.1967439457496283, "learning_rate": 1.0278514362263081e-05, "loss": 0.0247, "step": 6117 }, { "epoch": 0.67, "grad_norm": 0.26030188256050835, "learning_rate": 1.0272297828878881e-05, "loss": 0.0347, "step": 6118 }, { "epoch": 0.67, "grad_norm": 0.2192649485250686, "learning_rate": 1.0266082526307138e-05, "loss": 0.033, "step": 6119 }, { "epoch": 0.67, "grad_norm": 0.2731743499987614, "learning_rate": 1.0259868455334259e-05, "loss": 0.0393, "step": 6120 }, { "epoch": 0.67, "grad_norm": 0.2501315575397304, "learning_rate": 1.025365561674649e-05, "loss": 0.038, "step": 6121 }, { "epoch": 0.67, "grad_norm": 0.27388589061967406, "learning_rate": 1.0247444011329928e-05, "loss": 0.0485, "step": 6122 }, { "epoch": 0.67, "grad_norm": 0.2659913089746765, "learning_rate": 1.0241233639870487e-05, "loss": 0.0443, "step": 6123 }, { "epoch": 0.67, "grad_norm": 0.27856050801137305, "learning_rate": 1.0235024503153956e-05, "loss": 0.0432, "step": 6124 }, { "epoch": 0.67, "grad_norm": 0.3500222054317568, "learning_rate": 1.0228816601965954e-05, "loss": 0.065, "step": 6125 }, { "epoch": 0.67, "grad_norm": 0.2888938308613042, "learning_rate": 1.0222609937091952e-05, "loss": 0.0663, "step": 6126 }, { "epoch": 0.67, "grad_norm": 0.36075980365991034, "learning_rate": 1.0216404509317245e-05, "loss": 0.0437, "step": 6127 }, { "epoch": 0.67, "grad_norm": 0.26729182777538685, "learning_rate": 1.0210200319426988e-05, "loss": 0.0333, "step": 6128 }, { "epoch": 0.67, "grad_norm": 0.18930065272486218, "learning_rate": 1.020399736820618e-05, "loss": 0.0232, "step": 6129 }, { "epoch": 0.67, "grad_norm": 0.24159397184248027, "learning_rate": 1.0197795656439662e-05, "loss": 0.0488, "step": 6130 }, { "epoch": 0.67, "grad_norm": 0.2852171088295083, "learning_rate": 1.0191595184912097e-05, "loss": 0.0467, "step": 6131 }, { "epoch": 0.67, "grad_norm": 0.26951688921721084, "learning_rate": 1.0185395954408031e-05, "loss": 0.0403, "step": 6132 }, { "epoch": 0.67, "grad_norm": 0.2151563624185727, "learning_rate": 1.0179197965711825e-05, "loss": 0.0372, "step": 6133 }, { "epoch": 0.67, "grad_norm": 0.27346715766287094, "learning_rate": 1.0173001219607683e-05, "loss": 0.0734, "step": 6134 }, { "epoch": 0.67, "grad_norm": 0.21720613907345318, "learning_rate": 1.016680571687966e-05, "loss": 0.0359, "step": 6135 }, { "epoch": 0.67, "grad_norm": 0.35256847987981105, "learning_rate": 1.0160611458311651e-05, "loss": 0.0506, "step": 6136 }, { "epoch": 0.67, "grad_norm": 0.2180379279230598, "learning_rate": 1.0154418444687405e-05, "loss": 0.0379, "step": 6137 }, { "epoch": 0.67, "grad_norm": 0.43781994292571597, "learning_rate": 1.0148226676790482e-05, "loss": 0.0629, "step": 6138 }, { "epoch": 0.67, "grad_norm": 0.5429451203178703, "learning_rate": 1.0142036155404322e-05, "loss": 0.0842, "step": 6139 }, { "epoch": 0.67, "grad_norm": 0.3344424825827209, "learning_rate": 1.013584688131218e-05, "loss": 0.052, "step": 6140 }, { "epoch": 0.67, "grad_norm": 0.26353648913176536, "learning_rate": 1.0129658855297177e-05, "loss": 0.0439, "step": 6141 }, { "epoch": 0.67, "grad_norm": 0.25333014934561293, "learning_rate": 1.0123472078142248e-05, "loss": 0.0263, "step": 6142 }, { "epoch": 0.67, "grad_norm": 0.2446776016104054, "learning_rate": 1.0117286550630187e-05, "loss": 0.0454, "step": 6143 }, { "epoch": 0.67, "grad_norm": 0.37146994946827605, "learning_rate": 1.011110227354363e-05, "loss": 0.063, "step": 6144 }, { "epoch": 0.67, "grad_norm": 0.20723293485200756, "learning_rate": 1.0104919247665062e-05, "loss": 0.0336, "step": 6145 }, { "epoch": 0.67, "grad_norm": 0.3012933087973953, "learning_rate": 1.0098737473776781e-05, "loss": 0.0616, "step": 6146 }, { "epoch": 0.68, "grad_norm": 0.21922432190223268, "learning_rate": 1.0092556952660954e-05, "loss": 0.0324, "step": 6147 }, { "epoch": 0.68, "grad_norm": 0.21508036501792208, "learning_rate": 1.0086377685099578e-05, "loss": 0.0242, "step": 6148 }, { "epoch": 0.68, "grad_norm": 0.31327340266038384, "learning_rate": 1.0080199671874509e-05, "loss": 0.0527, "step": 6149 }, { "epoch": 0.68, "grad_norm": 0.3166295899887907, "learning_rate": 1.0074022913767411e-05, "loss": 0.0533, "step": 6150 }, { "epoch": 0.68, "grad_norm": 0.23835244444077175, "learning_rate": 1.0067847411559816e-05, "loss": 0.031, "step": 6151 }, { "epoch": 0.68, "grad_norm": 0.27785410064305965, "learning_rate": 1.006167316603309e-05, "loss": 0.0631, "step": 6152 }, { "epoch": 0.68, "grad_norm": 0.29026907332210367, "learning_rate": 1.0055500177968445e-05, "loss": 0.0529, "step": 6153 }, { "epoch": 0.68, "grad_norm": 0.2585250803652672, "learning_rate": 1.0049328448146908e-05, "loss": 0.0382, "step": 6154 }, { "epoch": 0.68, "grad_norm": 0.25937390729375265, "learning_rate": 1.0043157977349383e-05, "loss": 0.0349, "step": 6155 }, { "epoch": 0.68, "grad_norm": 0.3640380278126735, "learning_rate": 1.0036988766356592e-05, "loss": 0.0417, "step": 6156 }, { "epoch": 0.68, "grad_norm": 0.21365837459001652, "learning_rate": 1.0030820815949114e-05, "loss": 0.0381, "step": 6157 }, { "epoch": 0.68, "grad_norm": 0.366832683468777, "learning_rate": 1.0024654126907343e-05, "loss": 0.0719, "step": 6158 }, { "epoch": 0.68, "grad_norm": 0.23757749347295498, "learning_rate": 1.0018488700011536e-05, "loss": 0.0401, "step": 6159 }, { "epoch": 0.68, "grad_norm": 0.25865233066083226, "learning_rate": 1.0012324536041781e-05, "loss": 0.0388, "step": 6160 }, { "epoch": 0.68, "grad_norm": 0.2741600707433722, "learning_rate": 1.000616163577802e-05, "loss": 0.0414, "step": 6161 }, { "epoch": 0.68, "grad_norm": 0.21818814328311095, "learning_rate": 1.0000000000000006e-05, "loss": 0.0384, "step": 6162 }, { "epoch": 0.68, "grad_norm": 0.2668007187126608, "learning_rate": 9.993839629487357e-06, "loss": 0.043, "step": 6163 }, { "epoch": 0.68, "grad_norm": 0.3032125818418757, "learning_rate": 9.987680525019521e-06, "loss": 0.0546, "step": 6164 }, { "epoch": 0.68, "grad_norm": 0.20366737507076754, "learning_rate": 9.981522687375793e-06, "loss": 0.0434, "step": 6165 }, { "epoch": 0.68, "grad_norm": 0.41513302805212776, "learning_rate": 9.975366117335301e-06, "loss": 0.0825, "step": 6166 }, { "epoch": 0.68, "grad_norm": 0.18871187927225874, "learning_rate": 9.969210815677016e-06, "loss": 0.0311, "step": 6167 }, { "epoch": 0.68, "grad_norm": 0.2552559120565852, "learning_rate": 9.96305678317975e-06, "loss": 0.041, "step": 6168 }, { "epoch": 0.68, "grad_norm": 0.20703961697351564, "learning_rate": 9.956904020622137e-06, "loss": 0.0288, "step": 6169 }, { "epoch": 0.68, "grad_norm": 0.21144496839471708, "learning_rate": 9.950752528782679e-06, "loss": 0.0392, "step": 6170 }, { "epoch": 0.68, "grad_norm": 0.2645127205349823, "learning_rate": 9.944602308439696e-06, "loss": 0.0419, "step": 6171 }, { "epoch": 0.68, "grad_norm": 0.2682123672755254, "learning_rate": 9.938453360371363e-06, "loss": 0.0515, "step": 6172 }, { "epoch": 0.68, "grad_norm": 0.2751075290564487, "learning_rate": 9.932305685355672e-06, "loss": 0.0512, "step": 6173 }, { "epoch": 0.68, "grad_norm": 0.31168776097582196, "learning_rate": 9.926159284170471e-06, "loss": 0.0542, "step": 6174 }, { "epoch": 0.68, "grad_norm": 0.26094750025425906, "learning_rate": 9.92001415759345e-06, "loss": 0.0435, "step": 6175 }, { "epoch": 0.68, "grad_norm": 0.26129550302568794, "learning_rate": 9.913870306402129e-06, "loss": 0.0493, "step": 6176 }, { "epoch": 0.68, "grad_norm": 0.21577135454231355, "learning_rate": 9.90772773137386e-06, "loss": 0.0335, "step": 6177 }, { "epoch": 0.68, "grad_norm": 0.2364096540318172, "learning_rate": 9.901586433285845e-06, "loss": 0.0268, "step": 6178 }, { "epoch": 0.68, "grad_norm": 0.36304646770470045, "learning_rate": 9.895446412915124e-06, "loss": 0.0426, "step": 6179 }, { "epoch": 0.68, "grad_norm": 0.2856261027333615, "learning_rate": 9.889307671038579e-06, "loss": 0.0472, "step": 6180 }, { "epoch": 0.68, "grad_norm": 0.23325696352471895, "learning_rate": 9.88317020843291e-06, "loss": 0.0365, "step": 6181 }, { "epoch": 0.68, "grad_norm": 0.31977235795833003, "learning_rate": 9.877034025874675e-06, "loss": 0.0406, "step": 6182 }, { "epoch": 0.68, "grad_norm": 0.2558078859660114, "learning_rate": 9.87089912414026e-06, "loss": 0.0474, "step": 6183 }, { "epoch": 0.68, "grad_norm": 0.25894290022360145, "learning_rate": 9.864765504005901e-06, "loss": 0.0463, "step": 6184 }, { "epoch": 0.68, "grad_norm": 0.3540219848225874, "learning_rate": 9.858633166247659e-06, "loss": 0.0508, "step": 6185 }, { "epoch": 0.68, "grad_norm": 0.2029467038610122, "learning_rate": 9.852502111641438e-06, "loss": 0.0466, "step": 6186 }, { "epoch": 0.68, "grad_norm": 0.2655416526179896, "learning_rate": 9.846372340962987e-06, "loss": 0.0504, "step": 6187 }, { "epoch": 0.68, "grad_norm": 0.22159925337566252, "learning_rate": 9.840243854987868e-06, "loss": 0.034, "step": 6188 }, { "epoch": 0.68, "grad_norm": 0.25748234618882093, "learning_rate": 9.834116654491506e-06, "loss": 0.0381, "step": 6189 }, { "epoch": 0.68, "grad_norm": 0.23850988144367538, "learning_rate": 9.827990740249156e-06, "loss": 0.0311, "step": 6190 }, { "epoch": 0.68, "grad_norm": 0.22772029486391687, "learning_rate": 9.821866113035905e-06, "loss": 0.0351, "step": 6191 }, { "epoch": 0.68, "grad_norm": 0.21421351326928012, "learning_rate": 9.815742773626693e-06, "loss": 0.0477, "step": 6192 }, { "epoch": 0.68, "grad_norm": 0.2757065766470592, "learning_rate": 9.809620722796265e-06, "loss": 0.0277, "step": 6193 }, { "epoch": 0.68, "grad_norm": 0.22013983115002259, "learning_rate": 9.803499961319234e-06, "loss": 0.0276, "step": 6194 }, { "epoch": 0.68, "grad_norm": 0.25999750099448354, "learning_rate": 9.79738048997004e-06, "loss": 0.0389, "step": 6195 }, { "epoch": 0.68, "grad_norm": 0.3546731030067183, "learning_rate": 9.791262309522959e-06, "loss": 0.0785, "step": 6196 }, { "epoch": 0.68, "grad_norm": 0.20879912856185506, "learning_rate": 9.785145420752096e-06, "loss": 0.0355, "step": 6197 }, { "epoch": 0.68, "grad_norm": 0.2584543474741684, "learning_rate": 9.779029824431403e-06, "loss": 0.0402, "step": 6198 }, { "epoch": 0.68, "grad_norm": 0.4245585462929971, "learning_rate": 9.772915521334664e-06, "loss": 0.1176, "step": 6199 }, { "epoch": 0.68, "grad_norm": 0.23702502156362043, "learning_rate": 9.766802512235507e-06, "loss": 0.0426, "step": 6200 }, { "epoch": 0.68, "grad_norm": 0.2706143000912401, "learning_rate": 9.760690797907383e-06, "loss": 0.0328, "step": 6201 }, { "epoch": 0.68, "grad_norm": 0.2615778814371287, "learning_rate": 9.75458037912359e-06, "loss": 0.0471, "step": 6202 }, { "epoch": 0.68, "grad_norm": 0.2757278433896372, "learning_rate": 9.748471256657264e-06, "loss": 0.0568, "step": 6203 }, { "epoch": 0.68, "grad_norm": 0.2978884564672695, "learning_rate": 9.742363431281356e-06, "loss": 0.0446, "step": 6204 }, { "epoch": 0.68, "grad_norm": 0.20370410933347816, "learning_rate": 9.736256903768678e-06, "loss": 0.0338, "step": 6205 }, { "epoch": 0.68, "grad_norm": 0.25345471654097956, "learning_rate": 9.73015167489186e-06, "loss": 0.0248, "step": 6206 }, { "epoch": 0.68, "grad_norm": 0.34774019535141376, "learning_rate": 9.724047745423396e-06, "loss": 0.0428, "step": 6207 }, { "epoch": 0.68, "grad_norm": 0.32560227964655303, "learning_rate": 9.717945116135568e-06, "loss": 0.0554, "step": 6208 }, { "epoch": 0.68, "grad_norm": 0.3232524098050541, "learning_rate": 9.711843787800535e-06, "loss": 0.0595, "step": 6209 }, { "epoch": 0.68, "grad_norm": 0.27333508344609464, "learning_rate": 9.705743761190273e-06, "loss": 0.0341, "step": 6210 }, { "epoch": 0.68, "grad_norm": 0.2773933903501465, "learning_rate": 9.699645037076608e-06, "loss": 0.0321, "step": 6211 }, { "epoch": 0.68, "grad_norm": 0.2615605060952595, "learning_rate": 9.693547616231173e-06, "loss": 0.0653, "step": 6212 }, { "epoch": 0.68, "grad_norm": 0.290546923065797, "learning_rate": 9.687451499425465e-06, "loss": 0.0371, "step": 6213 }, { "epoch": 0.68, "grad_norm": 0.427013940135805, "learning_rate": 9.681356687430798e-06, "loss": 0.069, "step": 6214 }, { "epoch": 0.68, "grad_norm": 0.21506138961979826, "learning_rate": 9.67526318101834e-06, "loss": 0.0361, "step": 6215 }, { "epoch": 0.68, "grad_norm": 0.2641080709783023, "learning_rate": 9.669170980959063e-06, "loss": 0.0407, "step": 6216 }, { "epoch": 0.68, "grad_norm": 0.2623568769105091, "learning_rate": 9.663080088023795e-06, "loss": 0.0265, "step": 6217 }, { "epoch": 0.68, "grad_norm": 0.34069439329886586, "learning_rate": 9.656990502983216e-06, "loss": 0.0487, "step": 6218 }, { "epoch": 0.68, "grad_norm": 0.32778804371449066, "learning_rate": 9.650902226607802e-06, "loss": 0.0585, "step": 6219 }, { "epoch": 0.68, "grad_norm": 0.241926490973222, "learning_rate": 9.644815259667881e-06, "loss": 0.0361, "step": 6220 }, { "epoch": 0.68, "grad_norm": 0.26820518639536545, "learning_rate": 9.638729602933621e-06, "loss": 0.0394, "step": 6221 }, { "epoch": 0.68, "grad_norm": 0.23553373480436743, "learning_rate": 9.632645257175027e-06, "loss": 0.0484, "step": 6222 }, { "epoch": 0.68, "grad_norm": 0.2943329900177595, "learning_rate": 9.626562223161916e-06, "loss": 0.041, "step": 6223 }, { "epoch": 0.68, "grad_norm": 0.23571131740363835, "learning_rate": 9.620480501663954e-06, "loss": 0.0337, "step": 6224 }, { "epoch": 0.68, "grad_norm": 0.28536693419321146, "learning_rate": 9.614400093450649e-06, "loss": 0.0471, "step": 6225 }, { "epoch": 0.68, "grad_norm": 0.24616376698227518, "learning_rate": 9.608320999291333e-06, "loss": 0.0287, "step": 6226 }, { "epoch": 0.68, "grad_norm": 0.3335323509513379, "learning_rate": 9.602243219955168e-06, "loss": 0.0551, "step": 6227 }, { "epoch": 0.68, "grad_norm": 0.2093054364067025, "learning_rate": 9.59616675621115e-06, "loss": 0.0299, "step": 6228 }, { "epoch": 0.68, "grad_norm": 0.23734024943067492, "learning_rate": 9.590091608828123e-06, "loss": 0.0238, "step": 6229 }, { "epoch": 0.68, "grad_norm": 0.28447029487318604, "learning_rate": 9.58401777857475e-06, "loss": 0.0421, "step": 6230 }, { "epoch": 0.68, "grad_norm": 0.23408007344432577, "learning_rate": 9.577945266219536e-06, "loss": 0.0388, "step": 6231 }, { "epoch": 0.68, "grad_norm": 0.32076666199095033, "learning_rate": 9.571874072530809e-06, "loss": 0.0554, "step": 6232 }, { "epoch": 0.68, "grad_norm": 0.3434619129865472, "learning_rate": 9.565804198276733e-06, "loss": 0.0454, "step": 6233 }, { "epoch": 0.68, "grad_norm": 0.23862960241284548, "learning_rate": 9.559735644225316e-06, "loss": 0.0264, "step": 6234 }, { "epoch": 0.68, "grad_norm": 0.23075814585395765, "learning_rate": 9.553668411144387e-06, "loss": 0.0266, "step": 6235 }, { "epoch": 0.68, "grad_norm": 0.23590381615550915, "learning_rate": 9.547602499801616e-06, "loss": 0.0393, "step": 6236 }, { "epoch": 0.68, "grad_norm": 0.28383059214184636, "learning_rate": 9.541537910964495e-06, "loss": 0.0417, "step": 6237 }, { "epoch": 0.69, "grad_norm": 0.2748857293266264, "learning_rate": 9.53547464540037e-06, "loss": 0.0565, "step": 6238 }, { "epoch": 0.69, "grad_norm": 0.2277131294822672, "learning_rate": 9.529412703876387e-06, "loss": 0.0465, "step": 6239 }, { "epoch": 0.69, "grad_norm": 0.24707902734637896, "learning_rate": 9.523352087159548e-06, "loss": 0.0327, "step": 6240 }, { "epoch": 0.69, "grad_norm": 0.24524278956253664, "learning_rate": 9.517292796016684e-06, "loss": 0.0446, "step": 6241 }, { "epoch": 0.69, "grad_norm": 0.26185331159198083, "learning_rate": 9.511234831214464e-06, "loss": 0.034, "step": 6242 }, { "epoch": 0.69, "grad_norm": 0.24203865030105287, "learning_rate": 9.505178193519366e-06, "loss": 0.0372, "step": 6243 }, { "epoch": 0.69, "grad_norm": 0.23152872184127937, "learning_rate": 9.499122883697724e-06, "loss": 0.0395, "step": 6244 }, { "epoch": 0.69, "grad_norm": 0.2965330362908359, "learning_rate": 9.493068902515692e-06, "loss": 0.0503, "step": 6245 }, { "epoch": 0.69, "grad_norm": 0.2977029898884496, "learning_rate": 9.487016250739269e-06, "loss": 0.0421, "step": 6246 }, { "epoch": 0.69, "grad_norm": 0.3416215610484597, "learning_rate": 9.480964929134263e-06, "loss": 0.0375, "step": 6247 }, { "epoch": 0.69, "grad_norm": 0.2732846377747829, "learning_rate": 9.474914938466328e-06, "loss": 0.0352, "step": 6248 }, { "epoch": 0.69, "grad_norm": 0.26970197274105745, "learning_rate": 9.468866279500956e-06, "loss": 0.0488, "step": 6249 }, { "epoch": 0.69, "grad_norm": 0.2600905028881518, "learning_rate": 9.462818953003465e-06, "loss": 0.0309, "step": 6250 }, { "epoch": 0.69, "grad_norm": 0.2737487928372596, "learning_rate": 9.45677295973899e-06, "loss": 0.0381, "step": 6251 }, { "epoch": 0.69, "grad_norm": 0.26136220302113367, "learning_rate": 9.45072830047251e-06, "loss": 0.0638, "step": 6252 }, { "epoch": 0.69, "grad_norm": 0.28952782782782804, "learning_rate": 9.444684975968852e-06, "loss": 0.0477, "step": 6253 }, { "epoch": 0.69, "grad_norm": 0.30617854053418464, "learning_rate": 9.438642986992641e-06, "loss": 0.0475, "step": 6254 }, { "epoch": 0.69, "grad_norm": 0.234407231975574, "learning_rate": 9.432602334308353e-06, "loss": 0.0333, "step": 6255 }, { "epoch": 0.69, "grad_norm": 0.24852066147661583, "learning_rate": 9.426563018680293e-06, "loss": 0.0274, "step": 6256 }, { "epoch": 0.69, "grad_norm": 0.3931757993883918, "learning_rate": 9.420525040872602e-06, "loss": 0.0518, "step": 6257 }, { "epoch": 0.69, "grad_norm": 0.22685950641429906, "learning_rate": 9.414488401649227e-06, "loss": 0.0472, "step": 6258 }, { "epoch": 0.69, "grad_norm": 0.2941157965040893, "learning_rate": 9.408453101773971e-06, "loss": 0.0386, "step": 6259 }, { "epoch": 0.69, "grad_norm": 0.31684167359391574, "learning_rate": 9.40241914201046e-06, "loss": 0.0494, "step": 6260 }, { "epoch": 0.69, "grad_norm": 0.4020733341232103, "learning_rate": 9.396386523122158e-06, "loss": 0.0773, "step": 6261 }, { "epoch": 0.69, "grad_norm": 0.26485523716593634, "learning_rate": 9.390355245872337e-06, "loss": 0.0363, "step": 6262 }, { "epoch": 0.69, "grad_norm": 0.2767078066427714, "learning_rate": 9.384325311024123e-06, "loss": 0.0421, "step": 6263 }, { "epoch": 0.69, "grad_norm": 0.2760967643499841, "learning_rate": 9.378296719340459e-06, "loss": 0.0384, "step": 6264 }, { "epoch": 0.69, "grad_norm": 0.20478149032404033, "learning_rate": 9.372269471584128e-06, "loss": 0.043, "step": 6265 }, { "epoch": 0.69, "grad_norm": 0.2691958040055307, "learning_rate": 9.366243568517726e-06, "loss": 0.0493, "step": 6266 }, { "epoch": 0.69, "grad_norm": 0.23869043143850346, "learning_rate": 9.360219010903695e-06, "loss": 0.0287, "step": 6267 }, { "epoch": 0.69, "grad_norm": 0.20597591973733836, "learning_rate": 9.354195799504305e-06, "loss": 0.0376, "step": 6268 }, { "epoch": 0.69, "grad_norm": 0.209775496806426, "learning_rate": 9.348173935081645e-06, "loss": 0.0381, "step": 6269 }, { "epoch": 0.69, "grad_norm": 0.2688287619572404, "learning_rate": 9.342153418397647e-06, "loss": 0.0486, "step": 6270 }, { "epoch": 0.69, "grad_norm": 0.2902534796894734, "learning_rate": 9.336134250214061e-06, "loss": 0.0445, "step": 6271 }, { "epoch": 0.69, "grad_norm": 0.24861511712883994, "learning_rate": 9.330116431292478e-06, "loss": 0.049, "step": 6272 }, { "epoch": 0.69, "grad_norm": 0.2843546115986259, "learning_rate": 9.324099962394313e-06, "loss": 0.0598, "step": 6273 }, { "epoch": 0.69, "grad_norm": 0.2661283716245321, "learning_rate": 9.318084844280798e-06, "loss": 0.0457, "step": 6274 }, { "epoch": 0.69, "grad_norm": 0.2794483547640721, "learning_rate": 9.31207107771301e-06, "loss": 0.0284, "step": 6275 }, { "epoch": 0.69, "grad_norm": 0.23227527547035146, "learning_rate": 9.306058663451852e-06, "loss": 0.0399, "step": 6276 }, { "epoch": 0.69, "grad_norm": 0.25572760297267416, "learning_rate": 9.30004760225806e-06, "loss": 0.0422, "step": 6277 }, { "epoch": 0.69, "grad_norm": 0.2265836442749071, "learning_rate": 9.294037894892178e-06, "loss": 0.0466, "step": 6278 }, { "epoch": 0.69, "grad_norm": 0.38933889383896975, "learning_rate": 9.288029542114602e-06, "loss": 0.0622, "step": 6279 }, { "epoch": 0.69, "grad_norm": 0.20071796948873447, "learning_rate": 9.28202254468555e-06, "loss": 0.0225, "step": 6280 }, { "epoch": 0.69, "grad_norm": 0.31054400796586457, "learning_rate": 9.276016903365068e-06, "loss": 0.0523, "step": 6281 }, { "epoch": 0.69, "grad_norm": 0.27328691423237594, "learning_rate": 9.270012618913018e-06, "loss": 0.0473, "step": 6282 }, { "epoch": 0.69, "grad_norm": 0.2255796621531451, "learning_rate": 9.26400969208911e-06, "loss": 0.0467, "step": 6283 }, { "epoch": 0.69, "grad_norm": 0.3017817737732413, "learning_rate": 9.258008123652868e-06, "loss": 0.0534, "step": 6284 }, { "epoch": 0.69, "grad_norm": 0.40936924952188236, "learning_rate": 9.252007914363664e-06, "loss": 0.0751, "step": 6285 }, { "epoch": 0.69, "grad_norm": 0.23785788807231603, "learning_rate": 9.246009064980657e-06, "loss": 0.0377, "step": 6286 }, { "epoch": 0.69, "grad_norm": 0.27980544730699336, "learning_rate": 9.240011576262887e-06, "loss": 0.0452, "step": 6287 }, { "epoch": 0.69, "grad_norm": 0.2829410787583162, "learning_rate": 9.23401544896919e-06, "loss": 0.0435, "step": 6288 }, { "epoch": 0.69, "grad_norm": 0.2584916042720561, "learning_rate": 9.228020683858223e-06, "loss": 0.0481, "step": 6289 }, { "epoch": 0.69, "grad_norm": 0.24872340659220568, "learning_rate": 9.22202728168849e-06, "loss": 0.0384, "step": 6290 }, { "epoch": 0.69, "grad_norm": 0.25803977889566404, "learning_rate": 9.216035243218318e-06, "loss": 0.0372, "step": 6291 }, { "epoch": 0.69, "grad_norm": 0.2000381038548197, "learning_rate": 9.210044569205863e-06, "loss": 0.0246, "step": 6292 }, { "epoch": 0.69, "grad_norm": 0.20023316746532552, "learning_rate": 9.204055260409091e-06, "loss": 0.041, "step": 6293 }, { "epoch": 0.69, "grad_norm": 0.24988703857874645, "learning_rate": 9.198067317585816e-06, "loss": 0.0375, "step": 6294 }, { "epoch": 0.69, "grad_norm": 0.2536094544789971, "learning_rate": 9.19208074149367e-06, "loss": 0.0512, "step": 6295 }, { "epoch": 0.69, "grad_norm": 0.23277586738353942, "learning_rate": 9.186095532890121e-06, "loss": 0.0464, "step": 6296 }, { "epoch": 0.69, "grad_norm": 0.23146955950597894, "learning_rate": 9.180111692532446e-06, "loss": 0.0473, "step": 6297 }, { "epoch": 0.69, "grad_norm": 0.20974903502827713, "learning_rate": 9.174129221177762e-06, "loss": 0.0328, "step": 6298 }, { "epoch": 0.69, "grad_norm": 0.2503838869994431, "learning_rate": 9.168148119583011e-06, "loss": 0.0317, "step": 6299 }, { "epoch": 0.69, "grad_norm": 0.2928620462015423, "learning_rate": 9.162168388504972e-06, "loss": 0.0395, "step": 6300 }, { "epoch": 0.69, "grad_norm": 0.2828001377923487, "learning_rate": 9.156190028700226e-06, "loss": 0.0385, "step": 6301 }, { "epoch": 0.69, "grad_norm": 0.21102328675309784, "learning_rate": 9.150213040925193e-06, "loss": 0.0382, "step": 6302 }, { "epoch": 0.69, "grad_norm": 0.2844854590968907, "learning_rate": 9.144237425936129e-06, "loss": 0.0518, "step": 6303 }, { "epoch": 0.69, "grad_norm": 0.4165599646637808, "learning_rate": 9.138263184489104e-06, "loss": 0.0648, "step": 6304 }, { "epoch": 0.69, "grad_norm": 0.2724789302449723, "learning_rate": 9.132290317340018e-06, "loss": 0.0393, "step": 6305 }, { "epoch": 0.69, "grad_norm": 0.3027393094552803, "learning_rate": 9.1263188252446e-06, "loss": 0.0499, "step": 6306 }, { "epoch": 0.69, "grad_norm": 0.2533256789016248, "learning_rate": 9.120348708958399e-06, "loss": 0.0465, "step": 6307 }, { "epoch": 0.69, "grad_norm": 0.2664072914886253, "learning_rate": 9.114379969236802e-06, "loss": 0.0305, "step": 6308 }, { "epoch": 0.69, "grad_norm": 0.2437821694077894, "learning_rate": 9.108412606834998e-06, "loss": 0.0377, "step": 6309 }, { "epoch": 0.69, "grad_norm": 0.2879293371748821, "learning_rate": 9.102446622508025e-06, "loss": 0.0502, "step": 6310 }, { "epoch": 0.69, "grad_norm": 0.2503264203528756, "learning_rate": 9.096482017010737e-06, "loss": 0.0295, "step": 6311 }, { "epoch": 0.69, "grad_norm": 0.25013961469818086, "learning_rate": 9.090518791097822e-06, "loss": 0.0444, "step": 6312 }, { "epoch": 0.69, "grad_norm": 0.2477486011025846, "learning_rate": 9.084556945523772e-06, "loss": 0.0471, "step": 6313 }, { "epoch": 0.69, "grad_norm": 0.4334807893165917, "learning_rate": 9.078596481042927e-06, "loss": 0.0818, "step": 6314 }, { "epoch": 0.69, "grad_norm": 0.23353296656458178, "learning_rate": 9.072637398409444e-06, "loss": 0.0189, "step": 6315 }, { "epoch": 0.69, "grad_norm": 0.2954942567770665, "learning_rate": 9.066679698377311e-06, "loss": 0.0332, "step": 6316 }, { "epoch": 0.69, "grad_norm": 0.23530170498235112, "learning_rate": 9.060723381700323e-06, "loss": 0.0408, "step": 6317 }, { "epoch": 0.69, "grad_norm": 0.24766727747869663, "learning_rate": 9.054768449132115e-06, "loss": 0.0323, "step": 6318 }, { "epoch": 0.69, "grad_norm": 0.24632032500021417, "learning_rate": 9.048814901426146e-06, "loss": 0.0426, "step": 6319 }, { "epoch": 0.69, "grad_norm": 0.28128099270764245, "learning_rate": 9.042862739335707e-06, "loss": 0.0341, "step": 6320 }, { "epoch": 0.69, "grad_norm": 0.24277883295031283, "learning_rate": 9.036911963613881e-06, "loss": 0.0393, "step": 6321 }, { "epoch": 0.69, "grad_norm": 0.23686681781114505, "learning_rate": 9.030962575013622e-06, "loss": 0.0443, "step": 6322 }, { "epoch": 0.69, "grad_norm": 0.26004526675001527, "learning_rate": 9.025014574287685e-06, "loss": 0.0284, "step": 6323 }, { "epoch": 0.69, "grad_norm": 0.2446766652525605, "learning_rate": 9.019067962188634e-06, "loss": 0.0409, "step": 6324 }, { "epoch": 0.69, "grad_norm": 0.22322480074775033, "learning_rate": 9.013122739468882e-06, "loss": 0.038, "step": 6325 }, { "epoch": 0.69, "grad_norm": 0.21102602302103093, "learning_rate": 9.007178906880655e-06, "loss": 0.0264, "step": 6326 }, { "epoch": 0.69, "grad_norm": 0.2739814252996459, "learning_rate": 9.001236465176017e-06, "loss": 0.045, "step": 6327 }, { "epoch": 0.69, "grad_norm": 0.2962305200988842, "learning_rate": 8.995295415106829e-06, "loss": 0.0426, "step": 6328 }, { "epoch": 0.7, "grad_norm": 0.2513516069280048, "learning_rate": 8.989355757424796e-06, "loss": 0.0295, "step": 6329 }, { "epoch": 0.7, "grad_norm": 0.23622794431841201, "learning_rate": 8.983417492881443e-06, "loss": 0.0447, "step": 6330 }, { "epoch": 0.7, "grad_norm": 0.20425040918306897, "learning_rate": 8.977480622228128e-06, "loss": 0.0236, "step": 6331 }, { "epoch": 0.7, "grad_norm": 0.211285339649907, "learning_rate": 8.971545146216005e-06, "loss": 0.0359, "step": 6332 }, { "epoch": 0.7, "grad_norm": 0.272476291565687, "learning_rate": 8.965611065596077e-06, "loss": 0.0427, "step": 6333 }, { "epoch": 0.7, "grad_norm": 0.21597569253476873, "learning_rate": 8.959678381119166e-06, "loss": 0.0366, "step": 6334 }, { "epoch": 0.7, "grad_norm": 0.3310963539752032, "learning_rate": 8.953747093535918e-06, "loss": 0.037, "step": 6335 }, { "epoch": 0.7, "grad_norm": 0.2095192366163649, "learning_rate": 8.947817203596785e-06, "loss": 0.021, "step": 6336 }, { "epoch": 0.7, "grad_norm": 0.2470472724635825, "learning_rate": 8.94188871205206e-06, "loss": 0.0338, "step": 6337 }, { "epoch": 0.7, "grad_norm": 0.20715595484212052, "learning_rate": 8.935961619651859e-06, "loss": 0.0407, "step": 6338 }, { "epoch": 0.7, "grad_norm": 0.22768347581955933, "learning_rate": 8.930035927146114e-06, "loss": 0.0345, "step": 6339 }, { "epoch": 0.7, "grad_norm": 0.3099342517386408, "learning_rate": 8.924111635284582e-06, "loss": 0.0488, "step": 6340 }, { "epoch": 0.7, "grad_norm": 0.43779650926487895, "learning_rate": 8.918188744816844e-06, "loss": 0.087, "step": 6341 }, { "epoch": 0.7, "grad_norm": 0.2332687975555119, "learning_rate": 8.91226725649231e-06, "loss": 0.0425, "step": 6342 }, { "epoch": 0.7, "grad_norm": 0.19567928207038063, "learning_rate": 8.906347171060191e-06, "loss": 0.0229, "step": 6343 }, { "epoch": 0.7, "grad_norm": 0.31602631823126964, "learning_rate": 8.900428489269541e-06, "loss": 0.0489, "step": 6344 }, { "epoch": 0.7, "grad_norm": 0.23726831950245672, "learning_rate": 8.89451121186923e-06, "loss": 0.0343, "step": 6345 }, { "epoch": 0.7, "grad_norm": 0.31267871514804374, "learning_rate": 8.888595339607961e-06, "loss": 0.0524, "step": 6346 }, { "epoch": 0.7, "grad_norm": 0.2462707291872784, "learning_rate": 8.882680873234233e-06, "loss": 0.0358, "step": 6347 }, { "epoch": 0.7, "grad_norm": 0.22789185315397206, "learning_rate": 8.876767813496388e-06, "loss": 0.0307, "step": 6348 }, { "epoch": 0.7, "grad_norm": 0.2658545259744762, "learning_rate": 8.870856161142587e-06, "loss": 0.04, "step": 6349 }, { "epoch": 0.7, "grad_norm": 0.34409919255540783, "learning_rate": 8.86494591692081e-06, "loss": 0.0492, "step": 6350 }, { "epoch": 0.7, "grad_norm": 0.2679736145289774, "learning_rate": 8.859037081578867e-06, "loss": 0.0489, "step": 6351 }, { "epoch": 0.7, "grad_norm": 0.2568584475787984, "learning_rate": 8.85312965586437e-06, "loss": 0.044, "step": 6352 }, { "epoch": 0.7, "grad_norm": 0.2876786537293762, "learning_rate": 8.84722364052477e-06, "loss": 0.0575, "step": 6353 }, { "epoch": 0.7, "grad_norm": 0.23660125397348275, "learning_rate": 8.841319036307334e-06, "loss": 0.0409, "step": 6354 }, { "epoch": 0.7, "grad_norm": 0.2962576571010167, "learning_rate": 8.835415843959154e-06, "loss": 0.0576, "step": 6355 }, { "epoch": 0.7, "grad_norm": 0.3155991184840565, "learning_rate": 8.829514064227138e-06, "loss": 0.065, "step": 6356 }, { "epoch": 0.7, "grad_norm": 0.20948942601939022, "learning_rate": 8.82361369785802e-06, "loss": 0.0306, "step": 6357 }, { "epoch": 0.7, "grad_norm": 0.3294762535742318, "learning_rate": 8.817714745598358e-06, "loss": 0.0462, "step": 6358 }, { "epoch": 0.7, "grad_norm": 0.25829241618292653, "learning_rate": 8.811817208194512e-06, "loss": 0.0466, "step": 6359 }, { "epoch": 0.7, "grad_norm": 0.2077765006958192, "learning_rate": 8.805921086392686e-06, "loss": 0.0253, "step": 6360 }, { "epoch": 0.7, "grad_norm": 0.25343540256634217, "learning_rate": 8.800026380938895e-06, "loss": 0.0291, "step": 6361 }, { "epoch": 0.7, "grad_norm": 0.25178844777491177, "learning_rate": 8.79413309257898e-06, "loss": 0.04, "step": 6362 }, { "epoch": 0.7, "grad_norm": 0.20695157587285948, "learning_rate": 8.788241222058588e-06, "loss": 0.0331, "step": 6363 }, { "epoch": 0.7, "grad_norm": 0.27255576886387817, "learning_rate": 8.782350770123202e-06, "loss": 0.0479, "step": 6364 }, { "epoch": 0.7, "grad_norm": 0.25812254671311835, "learning_rate": 8.776461737518123e-06, "loss": 0.0433, "step": 6365 }, { "epoch": 0.7, "grad_norm": 0.22198138104472048, "learning_rate": 8.770574124988474e-06, "loss": 0.0369, "step": 6366 }, { "epoch": 0.7, "grad_norm": 0.27744670085426953, "learning_rate": 8.764687933279179e-06, "loss": 0.0459, "step": 6367 }, { "epoch": 0.7, "grad_norm": 0.28767703503924624, "learning_rate": 8.758803163135008e-06, "loss": 0.0489, "step": 6368 }, { "epoch": 0.7, "grad_norm": 0.2544070367064107, "learning_rate": 8.752919815300541e-06, "loss": 0.0314, "step": 6369 }, { "epoch": 0.7, "grad_norm": 0.22781994064435185, "learning_rate": 8.74703789052018e-06, "loss": 0.0423, "step": 6370 }, { "epoch": 0.7, "grad_norm": 0.2631358030733747, "learning_rate": 8.741157389538132e-06, "loss": 0.0474, "step": 6371 }, { "epoch": 0.7, "grad_norm": 0.22057342938330163, "learning_rate": 8.73527831309844e-06, "loss": 0.0319, "step": 6372 }, { "epoch": 0.7, "grad_norm": 0.21460137133260163, "learning_rate": 8.729400661944984e-06, "loss": 0.0261, "step": 6373 }, { "epoch": 0.7, "grad_norm": 0.4747864820535669, "learning_rate": 8.723524436821418e-06, "loss": 0.0668, "step": 6374 }, { "epoch": 0.7, "grad_norm": 0.2662109055942396, "learning_rate": 8.717649638471249e-06, "loss": 0.0374, "step": 6375 }, { "epoch": 0.7, "grad_norm": 0.3418576431750124, "learning_rate": 8.711776267637794e-06, "loss": 0.0402, "step": 6376 }, { "epoch": 0.7, "grad_norm": 0.22465527107035038, "learning_rate": 8.705904325064201e-06, "loss": 0.0398, "step": 6377 }, { "epoch": 0.7, "grad_norm": 0.3423718439272161, "learning_rate": 8.700033811493407e-06, "loss": 0.0462, "step": 6378 }, { "epoch": 0.7, "grad_norm": 0.33873568135084464, "learning_rate": 8.694164727668199e-06, "loss": 0.0808, "step": 6379 }, { "epoch": 0.7, "grad_norm": 0.30990123209805315, "learning_rate": 8.688297074331171e-06, "loss": 0.0386, "step": 6380 }, { "epoch": 0.7, "grad_norm": 0.37951634634947395, "learning_rate": 8.682430852224743e-06, "loss": 0.0377, "step": 6381 }, { "epoch": 0.7, "grad_norm": 0.23587397610735772, "learning_rate": 8.676566062091135e-06, "loss": 0.0288, "step": 6382 }, { "epoch": 0.7, "grad_norm": 0.28234982026440475, "learning_rate": 8.670702704672403e-06, "loss": 0.0344, "step": 6383 }, { "epoch": 0.7, "grad_norm": 0.26419354860571054, "learning_rate": 8.66484078071042e-06, "loss": 0.043, "step": 6384 }, { "epoch": 0.7, "grad_norm": 0.27265589585595507, "learning_rate": 8.658980290946881e-06, "loss": 0.0465, "step": 6385 }, { "epoch": 0.7, "grad_norm": 0.24813252569743952, "learning_rate": 8.653121236123278e-06, "loss": 0.0286, "step": 6386 }, { "epoch": 0.7, "grad_norm": 0.2670370325184495, "learning_rate": 8.647263616980948e-06, "loss": 0.0443, "step": 6387 }, { "epoch": 0.7, "grad_norm": 0.26468644098027216, "learning_rate": 8.641407434261031e-06, "loss": 0.0357, "step": 6388 }, { "epoch": 0.7, "grad_norm": 0.22262329560397953, "learning_rate": 8.635552688704491e-06, "loss": 0.0307, "step": 6389 }, { "epoch": 0.7, "grad_norm": 0.33227491974359313, "learning_rate": 8.62969938105211e-06, "loss": 0.0605, "step": 6390 }, { "epoch": 0.7, "grad_norm": 0.17559031607289025, "learning_rate": 8.623847512044485e-06, "loss": 0.0195, "step": 6391 }, { "epoch": 0.7, "grad_norm": 0.2930631612606321, "learning_rate": 8.617997082422031e-06, "loss": 0.0521, "step": 6392 }, { "epoch": 0.7, "grad_norm": 0.2828701824565216, "learning_rate": 8.612148092924994e-06, "loss": 0.0571, "step": 6393 }, { "epoch": 0.7, "grad_norm": 0.22517277918334888, "learning_rate": 8.606300544293412e-06, "loss": 0.0282, "step": 6394 }, { "epoch": 0.7, "grad_norm": 0.2827000589408449, "learning_rate": 8.600454437267158e-06, "loss": 0.0421, "step": 6395 }, { "epoch": 0.7, "grad_norm": 0.3838056798545468, "learning_rate": 8.594609772585922e-06, "loss": 0.0664, "step": 6396 }, { "epoch": 0.7, "grad_norm": 0.28354324437547307, "learning_rate": 8.588766550989218e-06, "loss": 0.0401, "step": 6397 }, { "epoch": 0.7, "grad_norm": 0.24436393443223262, "learning_rate": 8.582924773216353e-06, "loss": 0.0245, "step": 6398 }, { "epoch": 0.7, "grad_norm": 0.23143113169303037, "learning_rate": 8.577084440006471e-06, "loss": 0.0372, "step": 6399 }, { "epoch": 0.7, "grad_norm": 0.34290995691316134, "learning_rate": 8.571245552098533e-06, "loss": 0.0557, "step": 6400 }, { "epoch": 0.7, "grad_norm": 0.2562108137825738, "learning_rate": 8.565408110231319e-06, "loss": 0.0302, "step": 6401 }, { "epoch": 0.7, "grad_norm": 0.3028696311777119, "learning_rate": 8.559572115143406e-06, "loss": 0.0328, "step": 6402 }, { "epoch": 0.7, "grad_norm": 0.2720516507881614, "learning_rate": 8.55373756757321e-06, "loss": 0.041, "step": 6403 }, { "epoch": 0.7, "grad_norm": 0.26955753695798945, "learning_rate": 8.547904468258957e-06, "loss": 0.0369, "step": 6404 }, { "epoch": 0.7, "grad_norm": 0.205862136112929, "learning_rate": 8.542072817938693e-06, "loss": 0.032, "step": 6405 }, { "epoch": 0.7, "grad_norm": 0.26788001189925775, "learning_rate": 8.536242617350265e-06, "loss": 0.0511, "step": 6406 }, { "epoch": 0.7, "grad_norm": 0.25751294448408285, "learning_rate": 8.530413867231347e-06, "loss": 0.0468, "step": 6407 }, { "epoch": 0.7, "grad_norm": 0.24201039021822887, "learning_rate": 8.524586568319451e-06, "loss": 0.0411, "step": 6408 }, { "epoch": 0.7, "grad_norm": 0.25891186661461163, "learning_rate": 8.518760721351865e-06, "loss": 0.034, "step": 6409 }, { "epoch": 0.7, "grad_norm": 0.2302191239125216, "learning_rate": 8.51293632706572e-06, "loss": 0.0334, "step": 6410 }, { "epoch": 0.7, "grad_norm": 0.28851454580948577, "learning_rate": 8.507113386197956e-06, "loss": 0.0474, "step": 6411 }, { "epoch": 0.7, "grad_norm": 0.5054794711312898, "learning_rate": 8.501291899485337e-06, "loss": 0.0753, "step": 6412 }, { "epoch": 0.7, "grad_norm": 0.4117383688029116, "learning_rate": 8.495471867664423e-06, "loss": 0.0789, "step": 6413 }, { "epoch": 0.7, "grad_norm": 0.2344748681602388, "learning_rate": 8.489653291471607e-06, "loss": 0.0333, "step": 6414 }, { "epoch": 0.7, "grad_norm": 0.2514175963227083, "learning_rate": 8.483836171643094e-06, "loss": 0.0308, "step": 6415 }, { "epoch": 0.7, "grad_norm": 0.24590569834635775, "learning_rate": 8.47802050891491e-06, "loss": 0.0477, "step": 6416 }, { "epoch": 0.7, "grad_norm": 0.2268210366467165, "learning_rate": 8.472206304022881e-06, "loss": 0.0397, "step": 6417 }, { "epoch": 0.7, "grad_norm": 0.39791710846894723, "learning_rate": 8.466393557702659e-06, "loss": 0.0534, "step": 6418 }, { "epoch": 0.7, "grad_norm": 0.26604481787571554, "learning_rate": 8.460582270689716e-06, "loss": 0.0486, "step": 6419 }, { "epoch": 0.71, "grad_norm": 0.3581197002944131, "learning_rate": 8.454772443719339e-06, "loss": 0.0609, "step": 6420 }, { "epoch": 0.71, "grad_norm": 0.2837691340107946, "learning_rate": 8.44896407752661e-06, "loss": 0.0449, "step": 6421 }, { "epoch": 0.71, "grad_norm": 0.20723475045468773, "learning_rate": 8.443157172846448e-06, "loss": 0.03, "step": 6422 }, { "epoch": 0.71, "grad_norm": 0.24840566263190597, "learning_rate": 8.437351730413586e-06, "loss": 0.0408, "step": 6423 }, { "epoch": 0.71, "grad_norm": 0.2877292817592264, "learning_rate": 8.43154775096256e-06, "loss": 0.0443, "step": 6424 }, { "epoch": 0.71, "grad_norm": 0.235031036295703, "learning_rate": 8.425745235227729e-06, "loss": 0.0282, "step": 6425 }, { "epoch": 0.71, "grad_norm": 0.240622555732698, "learning_rate": 8.419944183943266e-06, "loss": 0.049, "step": 6426 }, { "epoch": 0.71, "grad_norm": 0.220243025031694, "learning_rate": 8.41414459784316e-06, "loss": 0.0352, "step": 6427 }, { "epoch": 0.71, "grad_norm": 0.3090907814269771, "learning_rate": 8.408346477661218e-06, "loss": 0.0382, "step": 6428 }, { "epoch": 0.71, "grad_norm": 0.23984505345490492, "learning_rate": 8.402549824131042e-06, "loss": 0.0229, "step": 6429 }, { "epoch": 0.71, "grad_norm": 0.27724245397550806, "learning_rate": 8.39675463798607e-06, "loss": 0.0503, "step": 6430 }, { "epoch": 0.71, "grad_norm": 0.20537202755964704, "learning_rate": 8.390960919959543e-06, "loss": 0.034, "step": 6431 }, { "epoch": 0.71, "grad_norm": 0.3095653425232888, "learning_rate": 8.385168670784532e-06, "loss": 0.0478, "step": 6432 }, { "epoch": 0.71, "grad_norm": 0.32619957035373787, "learning_rate": 8.379377891193894e-06, "loss": 0.0387, "step": 6433 }, { "epoch": 0.71, "grad_norm": 0.28788377813222676, "learning_rate": 8.373588581920325e-06, "loss": 0.0487, "step": 6434 }, { "epoch": 0.71, "grad_norm": 0.23606485883055905, "learning_rate": 8.367800743696322e-06, "loss": 0.0368, "step": 6435 }, { "epoch": 0.71, "grad_norm": 0.24119394837137248, "learning_rate": 8.362014377254213e-06, "loss": 0.0315, "step": 6436 }, { "epoch": 0.71, "grad_norm": 0.3046419158484017, "learning_rate": 8.356229483326108e-06, "loss": 0.0517, "step": 6437 }, { "epoch": 0.71, "grad_norm": 0.25592941315652334, "learning_rate": 8.35044606264396e-06, "loss": 0.0396, "step": 6438 }, { "epoch": 0.71, "grad_norm": 0.4289129037809963, "learning_rate": 8.344664115939523e-06, "loss": 0.0621, "step": 6439 }, { "epoch": 0.71, "grad_norm": 0.2876814896525271, "learning_rate": 8.338883643944375e-06, "loss": 0.0401, "step": 6440 }, { "epoch": 0.71, "grad_norm": 0.2288342154237088, "learning_rate": 8.333104647389876e-06, "loss": 0.0265, "step": 6441 }, { "epoch": 0.71, "grad_norm": 0.19262957359629862, "learning_rate": 8.327327127007247e-06, "loss": 0.0323, "step": 6442 }, { "epoch": 0.71, "grad_norm": 0.2714254191760533, "learning_rate": 8.321551083527495e-06, "loss": 0.029, "step": 6443 }, { "epoch": 0.71, "grad_norm": 0.28344231074665666, "learning_rate": 8.315776517681428e-06, "loss": 0.0499, "step": 6444 }, { "epoch": 0.71, "grad_norm": 0.22648521453449175, "learning_rate": 8.31000343019969e-06, "loss": 0.0366, "step": 6445 }, { "epoch": 0.71, "grad_norm": 0.28209833842440313, "learning_rate": 8.304231821812733e-06, "loss": 0.043, "step": 6446 }, { "epoch": 0.71, "grad_norm": 0.3154077788028386, "learning_rate": 8.298461693250821e-06, "loss": 0.0513, "step": 6447 }, { "epoch": 0.71, "grad_norm": 0.2245812191108761, "learning_rate": 8.292693045244016e-06, "loss": 0.0337, "step": 6448 }, { "epoch": 0.71, "grad_norm": 0.2644923971251656, "learning_rate": 8.286925878522212e-06, "loss": 0.0348, "step": 6449 }, { "epoch": 0.71, "grad_norm": 0.5011667110090846, "learning_rate": 8.281160193815108e-06, "loss": 0.0692, "step": 6450 }, { "epoch": 0.71, "grad_norm": 0.43526896347352445, "learning_rate": 8.275395991852224e-06, "loss": 0.0622, "step": 6451 }, { "epoch": 0.71, "grad_norm": 0.35474573741177284, "learning_rate": 8.269633273362872e-06, "loss": 0.0588, "step": 6452 }, { "epoch": 0.71, "grad_norm": 0.24254991732867046, "learning_rate": 8.26387203907619e-06, "loss": 0.0247, "step": 6453 }, { "epoch": 0.71, "grad_norm": 0.2382054130066473, "learning_rate": 8.258112289721134e-06, "loss": 0.0364, "step": 6454 }, { "epoch": 0.71, "grad_norm": 0.25477298109471, "learning_rate": 8.25235402602647e-06, "loss": 0.04, "step": 6455 }, { "epoch": 0.71, "grad_norm": 0.2646705224088017, "learning_rate": 8.246597248720756e-06, "loss": 0.0485, "step": 6456 }, { "epoch": 0.71, "grad_norm": 0.23004118002617988, "learning_rate": 8.240841958532385e-06, "loss": 0.0235, "step": 6457 }, { "epoch": 0.71, "grad_norm": 0.1825263159375156, "learning_rate": 8.23508815618955e-06, "loss": 0.0314, "step": 6458 }, { "epoch": 0.71, "grad_norm": 0.290648901376989, "learning_rate": 8.229335842420265e-06, "loss": 0.0647, "step": 6459 }, { "epoch": 0.71, "grad_norm": 0.3316200739735159, "learning_rate": 8.22358501795235e-06, "loss": 0.0365, "step": 6460 }, { "epoch": 0.71, "grad_norm": 0.2714120233302748, "learning_rate": 8.217835683513434e-06, "loss": 0.0318, "step": 6461 }, { "epoch": 0.71, "grad_norm": 0.24189913409970287, "learning_rate": 8.212087839830968e-06, "loss": 0.028, "step": 6462 }, { "epoch": 0.71, "grad_norm": 0.22447675853390506, "learning_rate": 8.206341487632194e-06, "loss": 0.0385, "step": 6463 }, { "epoch": 0.71, "grad_norm": 0.22658546923256995, "learning_rate": 8.200596627644187e-06, "loss": 0.0437, "step": 6464 }, { "epoch": 0.71, "grad_norm": 0.26287797157704945, "learning_rate": 8.19485326059382e-06, "loss": 0.0554, "step": 6465 }, { "epoch": 0.71, "grad_norm": 0.25696982313523764, "learning_rate": 8.189111387207782e-06, "loss": 0.0363, "step": 6466 }, { "epoch": 0.71, "grad_norm": 0.23969556505194853, "learning_rate": 8.183371008212582e-06, "loss": 0.0482, "step": 6467 }, { "epoch": 0.71, "grad_norm": 0.27502408897492436, "learning_rate": 8.177632124334513e-06, "loss": 0.0642, "step": 6468 }, { "epoch": 0.71, "grad_norm": 0.2700727662075413, "learning_rate": 8.171894736299706e-06, "loss": 0.0535, "step": 6469 }, { "epoch": 0.71, "grad_norm": 0.24773505277850721, "learning_rate": 8.16615884483409e-06, "loss": 0.0307, "step": 6470 }, { "epoch": 0.71, "grad_norm": 0.2185423154627355, "learning_rate": 8.16042445066342e-06, "loss": 0.0308, "step": 6471 }, { "epoch": 0.71, "grad_norm": 0.25625908184240126, "learning_rate": 8.154691554513228e-06, "loss": 0.039, "step": 6472 }, { "epoch": 0.71, "grad_norm": 0.2664218478516857, "learning_rate": 8.148960157108889e-06, "loss": 0.0339, "step": 6473 }, { "epoch": 0.71, "grad_norm": 0.3309385833146156, "learning_rate": 8.143230259175574e-06, "loss": 0.0517, "step": 6474 }, { "epoch": 0.71, "grad_norm": 0.20773226436706252, "learning_rate": 8.137501861438278e-06, "loss": 0.0292, "step": 6475 }, { "epoch": 0.71, "grad_norm": 0.2076986280531425, "learning_rate": 8.13177496462177e-06, "loss": 0.0323, "step": 6476 }, { "epoch": 0.71, "grad_norm": 0.3011683289434168, "learning_rate": 8.126049569450678e-06, "loss": 0.0448, "step": 6477 }, { "epoch": 0.71, "grad_norm": 0.23462786544153602, "learning_rate": 8.120325676649416e-06, "loss": 0.043, "step": 6478 }, { "epoch": 0.71, "grad_norm": 0.30795756083096393, "learning_rate": 8.114603286942196e-06, "loss": 0.0381, "step": 6479 }, { "epoch": 0.71, "grad_norm": 0.26037141247776246, "learning_rate": 8.108882401053055e-06, "loss": 0.025, "step": 6480 }, { "epoch": 0.71, "grad_norm": 0.35760622533792163, "learning_rate": 8.10316301970584e-06, "loss": 0.0793, "step": 6481 }, { "epoch": 0.71, "grad_norm": 0.21957358227045928, "learning_rate": 8.09744514362421e-06, "loss": 0.0321, "step": 6482 }, { "epoch": 0.71, "grad_norm": 0.20876935789158357, "learning_rate": 8.091728773531615e-06, "loss": 0.0385, "step": 6483 }, { "epoch": 0.71, "grad_norm": 0.2484817747334928, "learning_rate": 8.086013910151334e-06, "loss": 0.0355, "step": 6484 }, { "epoch": 0.71, "grad_norm": 0.19710067424402405, "learning_rate": 8.08030055420645e-06, "loss": 0.0301, "step": 6485 }, { "epoch": 0.71, "grad_norm": 0.19765443537184443, "learning_rate": 8.07458870641986e-06, "loss": 0.0349, "step": 6486 }, { "epoch": 0.71, "grad_norm": 0.33794631940008796, "learning_rate": 8.06887836751425e-06, "loss": 0.0492, "step": 6487 }, { "epoch": 0.71, "grad_norm": 0.21348078759314743, "learning_rate": 8.063169538212139e-06, "loss": 0.0457, "step": 6488 }, { "epoch": 0.71, "grad_norm": 0.2353299712728007, "learning_rate": 8.057462219235842e-06, "loss": 0.0392, "step": 6489 }, { "epoch": 0.71, "grad_norm": 0.2131401751728623, "learning_rate": 8.051756411307494e-06, "loss": 0.0278, "step": 6490 }, { "epoch": 0.71, "grad_norm": 0.18141132155433656, "learning_rate": 8.04605211514902e-06, "loss": 0.0315, "step": 6491 }, { "epoch": 0.71, "grad_norm": 0.2265805944254647, "learning_rate": 8.040349331482167e-06, "loss": 0.0386, "step": 6492 }, { "epoch": 0.71, "grad_norm": 0.2466687267221202, "learning_rate": 8.034648061028492e-06, "loss": 0.0563, "step": 6493 }, { "epoch": 0.71, "grad_norm": 0.21133378562467695, "learning_rate": 8.028948304509356e-06, "loss": 0.0242, "step": 6494 }, { "epoch": 0.71, "grad_norm": 0.21124663482897502, "learning_rate": 8.023250062645931e-06, "loss": 0.0334, "step": 6495 }, { "epoch": 0.71, "grad_norm": 0.41337780752921405, "learning_rate": 8.017553336159192e-06, "loss": 0.0576, "step": 6496 }, { "epoch": 0.71, "grad_norm": 0.2783136333534162, "learning_rate": 8.011858125769938e-06, "loss": 0.0588, "step": 6497 }, { "epoch": 0.71, "grad_norm": 0.19622494233881813, "learning_rate": 8.006164432198747e-06, "loss": 0.0257, "step": 6498 }, { "epoch": 0.71, "grad_norm": 0.20460929869147498, "learning_rate": 8.00047225616603e-06, "loss": 0.023, "step": 6499 }, { "epoch": 0.71, "grad_norm": 0.2801765779608232, "learning_rate": 7.994781598391995e-06, "loss": 0.0469, "step": 6500 }, { "epoch": 0.71, "grad_norm": 0.2924463319041974, "learning_rate": 7.989092459596676e-06, "loss": 0.0551, "step": 6501 }, { "epoch": 0.71, "grad_norm": 0.3092880406169501, "learning_rate": 7.983404840499882e-06, "loss": 0.0536, "step": 6502 }, { "epoch": 0.71, "grad_norm": 0.21091489317515238, "learning_rate": 7.977718741821253e-06, "loss": 0.0339, "step": 6503 }, { "epoch": 0.71, "grad_norm": 0.2772214646802612, "learning_rate": 7.972034164280231e-06, "loss": 0.0504, "step": 6504 }, { "epoch": 0.71, "grad_norm": 0.26462391532095425, "learning_rate": 7.96635110859607e-06, "loss": 0.055, "step": 6505 }, { "epoch": 0.71, "grad_norm": 0.20219168453809466, "learning_rate": 7.96066957548783e-06, "loss": 0.026, "step": 6506 }, { "epoch": 0.71, "grad_norm": 0.23814090910905186, "learning_rate": 7.954989565674365e-06, "loss": 0.0424, "step": 6507 }, { "epoch": 0.71, "grad_norm": 0.24045474327049027, "learning_rate": 7.949311079874352e-06, "loss": 0.0342, "step": 6508 }, { "epoch": 0.71, "grad_norm": 0.25521322005647196, "learning_rate": 7.943634118806272e-06, "loss": 0.0268, "step": 6509 }, { "epoch": 0.71, "grad_norm": 0.2618900990742312, "learning_rate": 7.937958683188407e-06, "loss": 0.0493, "step": 6510 }, { "epoch": 0.72, "grad_norm": 0.22104491715613434, "learning_rate": 7.932284773738856e-06, "loss": 0.0371, "step": 6511 }, { "epoch": 0.72, "grad_norm": 0.26975350269574855, "learning_rate": 7.926612391175516e-06, "loss": 0.0415, "step": 6512 }, { "epoch": 0.72, "grad_norm": 0.20594752268504055, "learning_rate": 7.9209415362161e-06, "loss": 0.0288, "step": 6513 }, { "epoch": 0.72, "grad_norm": 0.34855123040864916, "learning_rate": 7.915272209578112e-06, "loss": 0.0562, "step": 6514 }, { "epoch": 0.72, "grad_norm": 0.2424700304197419, "learning_rate": 7.909604411978872e-06, "loss": 0.0329, "step": 6515 }, { "epoch": 0.72, "grad_norm": 0.22326849534622184, "learning_rate": 7.903938144135515e-06, "loss": 0.029, "step": 6516 }, { "epoch": 0.72, "grad_norm": 0.25573753775720287, "learning_rate": 7.898273406764974e-06, "loss": 0.0429, "step": 6517 }, { "epoch": 0.72, "grad_norm": 0.23229788029204876, "learning_rate": 7.892610200583979e-06, "loss": 0.0242, "step": 6518 }, { "epoch": 0.72, "grad_norm": 0.2321852272661782, "learning_rate": 7.886948526309082e-06, "loss": 0.0268, "step": 6519 }, { "epoch": 0.72, "grad_norm": 0.2636226466812287, "learning_rate": 7.881288384656634e-06, "loss": 0.0272, "step": 6520 }, { "epoch": 0.72, "grad_norm": 0.2777781241467754, "learning_rate": 7.875629776342802e-06, "loss": 0.0346, "step": 6521 }, { "epoch": 0.72, "grad_norm": 0.3214410115286993, "learning_rate": 7.869972702083532e-06, "loss": 0.043, "step": 6522 }, { "epoch": 0.72, "grad_norm": 0.3582476264824408, "learning_rate": 7.864317162594608e-06, "loss": 0.0468, "step": 6523 }, { "epoch": 0.72, "grad_norm": 0.22799082755682637, "learning_rate": 7.8586631585916e-06, "loss": 0.0357, "step": 6524 }, { "epoch": 0.72, "grad_norm": 0.2627168100629604, "learning_rate": 7.853010690789897e-06, "loss": 0.0346, "step": 6525 }, { "epoch": 0.72, "grad_norm": 0.23385006611435474, "learning_rate": 7.847359759904675e-06, "loss": 0.0371, "step": 6526 }, { "epoch": 0.72, "grad_norm": 0.25229807460993714, "learning_rate": 7.841710366650932e-06, "loss": 0.0318, "step": 6527 }, { "epoch": 0.72, "grad_norm": 0.25261015573592, "learning_rate": 7.836062511743468e-06, "loss": 0.0373, "step": 6528 }, { "epoch": 0.72, "grad_norm": 0.3223993197464688, "learning_rate": 7.830416195896882e-06, "loss": 0.068, "step": 6529 }, { "epoch": 0.72, "grad_norm": 0.29124753787264757, "learning_rate": 7.824771419825588e-06, "loss": 0.0524, "step": 6530 }, { "epoch": 0.72, "grad_norm": 0.36407316707979276, "learning_rate": 7.819128184243796e-06, "loss": 0.0478, "step": 6531 }, { "epoch": 0.72, "grad_norm": 0.26657014878312896, "learning_rate": 7.813486489865534e-06, "loss": 0.0479, "step": 6532 }, { "epoch": 0.72, "grad_norm": 0.2749290385590782, "learning_rate": 7.807846337404611e-06, "loss": 0.0278, "step": 6533 }, { "epoch": 0.72, "grad_norm": 0.20985298930298352, "learning_rate": 7.802207727574665e-06, "loss": 0.0321, "step": 6534 }, { "epoch": 0.72, "grad_norm": 0.2291186762903262, "learning_rate": 7.79657066108913e-06, "loss": 0.0359, "step": 6535 }, { "epoch": 0.72, "grad_norm": 0.24471351546134185, "learning_rate": 7.790935138661246e-06, "loss": 0.05, "step": 6536 }, { "epoch": 0.72, "grad_norm": 0.23810186043396592, "learning_rate": 7.785301161004049e-06, "loss": 0.0358, "step": 6537 }, { "epoch": 0.72, "grad_norm": 0.2642028689447798, "learning_rate": 7.779668728830389e-06, "loss": 0.0371, "step": 6538 }, { "epoch": 0.72, "grad_norm": 0.23555028606114825, "learning_rate": 7.774037842852921e-06, "loss": 0.0448, "step": 6539 }, { "epoch": 0.72, "grad_norm": 0.2626707214417275, "learning_rate": 7.768408503784108e-06, "loss": 0.0266, "step": 6540 }, { "epoch": 0.72, "grad_norm": 0.32157064696317006, "learning_rate": 7.762780712336196e-06, "loss": 0.0463, "step": 6541 }, { "epoch": 0.72, "grad_norm": 0.20668172121173925, "learning_rate": 7.757154469221257e-06, "loss": 0.0392, "step": 6542 }, { "epoch": 0.72, "grad_norm": 0.23665624492045323, "learning_rate": 7.751529775151161e-06, "loss": 0.0376, "step": 6543 }, { "epoch": 0.72, "grad_norm": 0.28505605740282963, "learning_rate": 7.745906630837586e-06, "loss": 0.0417, "step": 6544 }, { "epoch": 0.72, "grad_norm": 0.23061561754054719, "learning_rate": 7.740285036991987e-06, "loss": 0.0334, "step": 6545 }, { "epoch": 0.72, "grad_norm": 0.2701965634663251, "learning_rate": 7.734664994325672e-06, "loss": 0.0418, "step": 6546 }, { "epoch": 0.72, "grad_norm": 0.2672412728971369, "learning_rate": 7.729046503549712e-06, "loss": 0.0442, "step": 6547 }, { "epoch": 0.72, "grad_norm": 0.34885610914596044, "learning_rate": 7.723429565375006e-06, "loss": 0.0618, "step": 6548 }, { "epoch": 0.72, "grad_norm": 0.24358799759371624, "learning_rate": 7.71781418051223e-06, "loss": 0.0245, "step": 6549 }, { "epoch": 0.72, "grad_norm": 0.3072416283659649, "learning_rate": 7.71220034967189e-06, "loss": 0.0533, "step": 6550 }, { "epoch": 0.72, "grad_norm": 0.3403584102001735, "learning_rate": 7.706588073564278e-06, "loss": 0.056, "step": 6551 }, { "epoch": 0.72, "grad_norm": 0.3367970138899929, "learning_rate": 7.700977352899506e-06, "loss": 0.056, "step": 6552 }, { "epoch": 0.72, "grad_norm": 0.24168692663512917, "learning_rate": 7.695368188387466e-06, "loss": 0.032, "step": 6553 }, { "epoch": 0.72, "grad_norm": 0.25406383612587574, "learning_rate": 7.68976058073787e-06, "loss": 0.0376, "step": 6554 }, { "epoch": 0.72, "grad_norm": 0.2043610889471732, "learning_rate": 7.684154530660235e-06, "loss": 0.0361, "step": 6555 }, { "epoch": 0.72, "grad_norm": 0.31421248190236667, "learning_rate": 7.678550038863877e-06, "loss": 0.0444, "step": 6556 }, { "epoch": 0.72, "grad_norm": 0.2467697713012407, "learning_rate": 7.672947106057903e-06, "loss": 0.0455, "step": 6557 }, { "epoch": 0.72, "grad_norm": 0.20496925007078223, "learning_rate": 7.667345732951233e-06, "loss": 0.0344, "step": 6558 }, { "epoch": 0.72, "grad_norm": 0.2825699930916705, "learning_rate": 7.661745920252594e-06, "loss": 0.0407, "step": 6559 }, { "epoch": 0.72, "grad_norm": 0.24181142957326718, "learning_rate": 7.656147668670519e-06, "loss": 0.0411, "step": 6560 }, { "epoch": 0.72, "grad_norm": 0.20664482220603836, "learning_rate": 7.65055097891332e-06, "loss": 0.0226, "step": 6561 }, { "epoch": 0.72, "grad_norm": 0.24109204253990774, "learning_rate": 7.644955851689129e-06, "loss": 0.0497, "step": 6562 }, { "epoch": 0.72, "grad_norm": 0.23486936406686915, "learning_rate": 7.639362287705894e-06, "loss": 0.0349, "step": 6563 }, { "epoch": 0.72, "grad_norm": 0.17092372162293284, "learning_rate": 7.63377028767133e-06, "loss": 0.0288, "step": 6564 }, { "epoch": 0.72, "grad_norm": 0.20596855858440993, "learning_rate": 7.628179852292983e-06, "loss": 0.0235, "step": 6565 }, { "epoch": 0.72, "grad_norm": 0.3337479920451879, "learning_rate": 7.622590982278189e-06, "loss": 0.0583, "step": 6566 }, { "epoch": 0.72, "grad_norm": 0.2431587540380298, "learning_rate": 7.617003678334096e-06, "loss": 0.0489, "step": 6567 }, { "epoch": 0.72, "grad_norm": 0.19104538200146343, "learning_rate": 7.611417941167634e-06, "loss": 0.0399, "step": 6568 }, { "epoch": 0.72, "grad_norm": 0.2554741645962459, "learning_rate": 7.605833771485549e-06, "loss": 0.0394, "step": 6569 }, { "epoch": 0.72, "grad_norm": 0.1954194443691002, "learning_rate": 7.600251169994392e-06, "loss": 0.0261, "step": 6570 }, { "epoch": 0.72, "grad_norm": 0.2536431343096896, "learning_rate": 7.594670137400517e-06, "loss": 0.0514, "step": 6571 }, { "epoch": 0.72, "grad_norm": 0.37128636230257456, "learning_rate": 7.589090674410056e-06, "loss": 0.0494, "step": 6572 }, { "epoch": 0.72, "grad_norm": 0.2536575860362757, "learning_rate": 7.583512781728966e-06, "loss": 0.0488, "step": 6573 }, { "epoch": 0.72, "grad_norm": 0.294649895567449, "learning_rate": 7.577936460063e-06, "loss": 0.0573, "step": 6574 }, { "epoch": 0.72, "grad_norm": 0.23970868970666664, "learning_rate": 7.572361710117717e-06, "loss": 0.0283, "step": 6575 }, { "epoch": 0.72, "grad_norm": 0.15715059376108945, "learning_rate": 7.566788532598457e-06, "loss": 0.0258, "step": 6576 }, { "epoch": 0.72, "grad_norm": 0.4482228892042876, "learning_rate": 7.561216928210382e-06, "loss": 0.0725, "step": 6577 }, { "epoch": 0.72, "grad_norm": 0.20514763153753457, "learning_rate": 7.555646897658448e-06, "loss": 0.0278, "step": 6578 }, { "epoch": 0.72, "grad_norm": 0.24031197449883396, "learning_rate": 7.55007844164741e-06, "loss": 0.0548, "step": 6579 }, { "epoch": 0.72, "grad_norm": 0.2700967523067453, "learning_rate": 7.544511560881829e-06, "loss": 0.0566, "step": 6580 }, { "epoch": 0.72, "grad_norm": 0.3832098786267362, "learning_rate": 7.53894625606606e-06, "loss": 0.0515, "step": 6581 }, { "epoch": 0.72, "grad_norm": 0.23901507461962287, "learning_rate": 7.533382527904263e-06, "loss": 0.0398, "step": 6582 }, { "epoch": 0.72, "grad_norm": 0.28280219275306995, "learning_rate": 7.527820377100403e-06, "loss": 0.0451, "step": 6583 }, { "epoch": 0.72, "grad_norm": 0.266661724526584, "learning_rate": 7.5222598043582274e-06, "loss": 0.0417, "step": 6584 }, { "epoch": 0.72, "grad_norm": 0.2635961494494878, "learning_rate": 7.516700810381301e-06, "loss": 0.0355, "step": 6585 }, { "epoch": 0.72, "grad_norm": 0.23455403959952692, "learning_rate": 7.511143395872986e-06, "loss": 0.0386, "step": 6586 }, { "epoch": 0.72, "grad_norm": 0.2830660521170048, "learning_rate": 7.5055875615364495e-06, "loss": 0.033, "step": 6587 }, { "epoch": 0.72, "grad_norm": 0.3227446261873834, "learning_rate": 7.500033308074639e-06, "loss": 0.0327, "step": 6588 }, { "epoch": 0.72, "grad_norm": 0.2744162481363485, "learning_rate": 7.49448063619032e-06, "loss": 0.031, "step": 6589 }, { "epoch": 0.72, "grad_norm": 0.2482165334608095, "learning_rate": 7.488929546586053e-06, "loss": 0.0459, "step": 6590 }, { "epoch": 0.72, "grad_norm": 0.22046721349082088, "learning_rate": 7.483380039964205e-06, "loss": 0.0342, "step": 6591 }, { "epoch": 0.72, "grad_norm": 0.27028440208153975, "learning_rate": 7.477832117026924e-06, "loss": 0.0294, "step": 6592 }, { "epoch": 0.72, "grad_norm": 0.2463165062203717, "learning_rate": 7.4722857784761734e-06, "loss": 0.0309, "step": 6593 }, { "epoch": 0.72, "grad_norm": 0.27237289811120324, "learning_rate": 7.466741025013715e-06, "loss": 0.0412, "step": 6594 }, { "epoch": 0.72, "grad_norm": 0.18011673163854972, "learning_rate": 7.461197857341114e-06, "loss": 0.0258, "step": 6595 }, { "epoch": 0.72, "grad_norm": 0.2445382656516951, "learning_rate": 7.455656276159713e-06, "loss": 0.0374, "step": 6596 }, { "epoch": 0.72, "grad_norm": 0.21940658158610174, "learning_rate": 7.450116282170667e-06, "loss": 0.0287, "step": 6597 }, { "epoch": 0.72, "grad_norm": 0.3439944871693606, "learning_rate": 7.444577876074956e-06, "loss": 0.0411, "step": 6598 }, { "epoch": 0.72, "grad_norm": 0.2766455337101841, "learning_rate": 7.4390410585733176e-06, "loss": 0.0364, "step": 6599 }, { "epoch": 0.72, "grad_norm": 0.18934257467358104, "learning_rate": 7.4335058303663056e-06, "loss": 0.0334, "step": 6600 }, { "epoch": 0.72, "grad_norm": 0.2542107003924037, "learning_rate": 7.42797219215428e-06, "loss": 0.0317, "step": 6601 }, { "epoch": 0.73, "grad_norm": 0.2762423796271572, "learning_rate": 7.422440144637395e-06, "loss": 0.067, "step": 6602 }, { "epoch": 0.73, "grad_norm": 0.2952446087499235, "learning_rate": 7.41690968851559e-06, "loss": 0.0456, "step": 6603 }, { "epoch": 0.73, "grad_norm": 0.27888347468628955, "learning_rate": 7.411380824488621e-06, "loss": 0.0391, "step": 6604 }, { "epoch": 0.73, "grad_norm": 0.2180002340186464, "learning_rate": 7.405853553256035e-06, "loss": 0.0358, "step": 6605 }, { "epoch": 0.73, "grad_norm": 0.2389220158431419, "learning_rate": 7.400327875517188e-06, "loss": 0.0349, "step": 6606 }, { "epoch": 0.73, "grad_norm": 0.26144262237509924, "learning_rate": 7.394803791971208e-06, "loss": 0.0417, "step": 6607 }, { "epoch": 0.73, "grad_norm": 0.3388880522622967, "learning_rate": 7.389281303317046e-06, "loss": 0.053, "step": 6608 }, { "epoch": 0.73, "grad_norm": 0.24191057616311354, "learning_rate": 7.383760410253446e-06, "loss": 0.0387, "step": 6609 }, { "epoch": 0.73, "grad_norm": 0.19632820194666395, "learning_rate": 7.37824111347895e-06, "loss": 0.0405, "step": 6610 }, { "epoch": 0.73, "grad_norm": 0.25926263052650766, "learning_rate": 7.372723413691884e-06, "loss": 0.0474, "step": 6611 }, { "epoch": 0.73, "grad_norm": 0.21710414418950477, "learning_rate": 7.367207311590392e-06, "loss": 0.0343, "step": 6612 }, { "epoch": 0.73, "grad_norm": 0.22550321275945098, "learning_rate": 7.361692807872405e-06, "loss": 0.0485, "step": 6613 }, { "epoch": 0.73, "grad_norm": 0.22521029817028132, "learning_rate": 7.356179903235654e-06, "loss": 0.039, "step": 6614 }, { "epoch": 0.73, "grad_norm": 0.23056507522218203, "learning_rate": 7.350668598377668e-06, "loss": 0.0351, "step": 6615 }, { "epoch": 0.73, "grad_norm": 0.2473592312824255, "learning_rate": 7.345158893995774e-06, "loss": 0.0327, "step": 6616 }, { "epoch": 0.73, "grad_norm": 0.3047008389218534, "learning_rate": 7.339650790787103e-06, "loss": 0.05, "step": 6617 }, { "epoch": 0.73, "grad_norm": 0.2251726137418741, "learning_rate": 7.33414428944856e-06, "loss": 0.0303, "step": 6618 }, { "epoch": 0.73, "grad_norm": 0.3037834203646682, "learning_rate": 7.328639390676873e-06, "loss": 0.068, "step": 6619 }, { "epoch": 0.73, "grad_norm": 0.18046847917280476, "learning_rate": 7.3231360951685574e-06, "loss": 0.0311, "step": 6620 }, { "epoch": 0.73, "grad_norm": 0.2331037936012605, "learning_rate": 7.317634403619931e-06, "loss": 0.0285, "step": 6621 }, { "epoch": 0.73, "grad_norm": 0.22343730292945121, "learning_rate": 7.312134316727093e-06, "loss": 0.0301, "step": 6622 }, { "epoch": 0.73, "grad_norm": 0.23224822537675274, "learning_rate": 7.306635835185956e-06, "loss": 0.0424, "step": 6623 }, { "epoch": 0.73, "grad_norm": 0.22277578692106198, "learning_rate": 7.301138959692225e-06, "loss": 0.0339, "step": 6624 }, { "epoch": 0.73, "grad_norm": 0.21664888468698934, "learning_rate": 7.295643690941397e-06, "loss": 0.0271, "step": 6625 }, { "epoch": 0.73, "grad_norm": 0.19762131755469559, "learning_rate": 7.290150029628777e-06, "loss": 0.0389, "step": 6626 }, { "epoch": 0.73, "grad_norm": 0.3171774214284935, "learning_rate": 7.28465797644945e-06, "loss": 0.0418, "step": 6627 }, { "epoch": 0.73, "grad_norm": 0.2399029112053458, "learning_rate": 7.2791675320983076e-06, "loss": 0.0572, "step": 6628 }, { "epoch": 0.73, "grad_norm": 0.2441456298315135, "learning_rate": 7.273678697270039e-06, "loss": 0.0285, "step": 6629 }, { "epoch": 0.73, "grad_norm": 0.3210788491311601, "learning_rate": 7.268191472659136e-06, "loss": 0.0447, "step": 6630 }, { "epoch": 0.73, "grad_norm": 0.19610518767903182, "learning_rate": 7.262705858959855e-06, "loss": 0.0304, "step": 6631 }, { "epoch": 0.73, "grad_norm": 0.2463955920326748, "learning_rate": 7.257221856866295e-06, "loss": 0.0386, "step": 6632 }, { "epoch": 0.73, "grad_norm": 0.22082828247544065, "learning_rate": 7.251739467072323e-06, "loss": 0.0393, "step": 6633 }, { "epoch": 0.73, "grad_norm": 0.19626924784842306, "learning_rate": 7.246258690271599e-06, "loss": 0.028, "step": 6634 }, { "epoch": 0.73, "grad_norm": 0.18274363051937373, "learning_rate": 7.240779527157589e-06, "loss": 0.027, "step": 6635 }, { "epoch": 0.73, "grad_norm": 0.2504980074237907, "learning_rate": 7.235301978423555e-06, "loss": 0.0444, "step": 6636 }, { "epoch": 0.73, "grad_norm": 0.2484751181042505, "learning_rate": 7.229826044762558e-06, "loss": 0.0434, "step": 6637 }, { "epoch": 0.73, "grad_norm": 0.24730890988975338, "learning_rate": 7.224351726867433e-06, "loss": 0.0334, "step": 6638 }, { "epoch": 0.73, "grad_norm": 0.2100350448766171, "learning_rate": 7.218879025430838e-06, "loss": 0.0181, "step": 6639 }, { "epoch": 0.73, "grad_norm": 0.2383164082838716, "learning_rate": 7.213407941145214e-06, "loss": 0.0284, "step": 6640 }, { "epoch": 0.73, "grad_norm": 0.20707655357018465, "learning_rate": 7.207938474702802e-06, "loss": 0.0248, "step": 6641 }, { "epoch": 0.73, "grad_norm": 0.21129910945014121, "learning_rate": 7.202470626795626e-06, "loss": 0.0339, "step": 6642 }, { "epoch": 0.73, "grad_norm": 0.246992460809062, "learning_rate": 7.197004398115515e-06, "loss": 0.0343, "step": 6643 }, { "epoch": 0.73, "grad_norm": 0.2819418741773533, "learning_rate": 7.191539789354096e-06, "loss": 0.0424, "step": 6644 }, { "epoch": 0.73, "grad_norm": 0.2255049556020603, "learning_rate": 7.18607680120279e-06, "loss": 0.0244, "step": 6645 }, { "epoch": 0.73, "grad_norm": 0.22799028834774265, "learning_rate": 7.180615434352802e-06, "loss": 0.0291, "step": 6646 }, { "epoch": 0.73, "grad_norm": 0.18657914173720364, "learning_rate": 7.175155689495142e-06, "loss": 0.0321, "step": 6647 }, { "epoch": 0.73, "grad_norm": 0.25142358237693113, "learning_rate": 7.1696975673206125e-06, "loss": 0.039, "step": 6648 }, { "epoch": 0.73, "grad_norm": 0.2955624526716143, "learning_rate": 7.164241068519815e-06, "loss": 0.0397, "step": 6649 }, { "epoch": 0.73, "grad_norm": 0.22012736746290956, "learning_rate": 7.158786193783138e-06, "loss": 0.0309, "step": 6650 }, { "epoch": 0.73, "grad_norm": 0.2442214221771545, "learning_rate": 7.153332943800768e-06, "loss": 0.0343, "step": 6651 }, { "epoch": 0.73, "grad_norm": 0.22565001861522319, "learning_rate": 7.147881319262695e-06, "loss": 0.0509, "step": 6652 }, { "epoch": 0.73, "grad_norm": 0.18746004076181033, "learning_rate": 7.142431320858676e-06, "loss": 0.0349, "step": 6653 }, { "epoch": 0.73, "grad_norm": 0.23662973499145099, "learning_rate": 7.136982949278293e-06, "loss": 0.028, "step": 6654 }, { "epoch": 0.73, "grad_norm": 0.2341850623753995, "learning_rate": 7.131536205210905e-06, "loss": 0.032, "step": 6655 }, { "epoch": 0.73, "grad_norm": 0.24560601478955826, "learning_rate": 7.126091089345679e-06, "loss": 0.0427, "step": 6656 }, { "epoch": 0.73, "grad_norm": 0.32064811058112347, "learning_rate": 7.120647602371551e-06, "loss": 0.0453, "step": 6657 }, { "epoch": 0.73, "grad_norm": 0.22087303305604478, "learning_rate": 7.115205744977276e-06, "loss": 0.034, "step": 6658 }, { "epoch": 0.73, "grad_norm": 0.26221100930263325, "learning_rate": 7.10976551785139e-06, "loss": 0.0303, "step": 6659 }, { "epoch": 0.73, "grad_norm": 0.25698065216249494, "learning_rate": 7.104326921682236e-06, "loss": 0.0288, "step": 6660 }, { "epoch": 0.73, "grad_norm": 0.3253034921511151, "learning_rate": 7.0988899571579266e-06, "loss": 0.0393, "step": 6661 }, { "epoch": 0.73, "grad_norm": 0.20542203132737769, "learning_rate": 7.093454624966387e-06, "loss": 0.0414, "step": 6662 }, { "epoch": 0.73, "grad_norm": 0.33082687506549724, "learning_rate": 7.088020925795334e-06, "loss": 0.0388, "step": 6663 }, { "epoch": 0.73, "grad_norm": 0.21913092518655058, "learning_rate": 7.082588860332271e-06, "loss": 0.0352, "step": 6664 }, { "epoch": 0.73, "grad_norm": 0.30516152310442196, "learning_rate": 7.077158429264508e-06, "loss": 0.0392, "step": 6665 }, { "epoch": 0.73, "grad_norm": 0.223187457073168, "learning_rate": 7.071729633279118e-06, "loss": 0.0282, "step": 6666 }, { "epoch": 0.73, "grad_norm": 0.2642827340136583, "learning_rate": 7.066302473063007e-06, "loss": 0.0319, "step": 6667 }, { "epoch": 0.73, "grad_norm": 0.23745255121783146, "learning_rate": 7.060876949302855e-06, "loss": 0.0287, "step": 6668 }, { "epoch": 0.73, "grad_norm": 0.22469789170599044, "learning_rate": 7.055453062685122e-06, "loss": 0.0331, "step": 6669 }, { "epoch": 0.73, "grad_norm": 0.275362189157255, "learning_rate": 7.050030813896078e-06, "loss": 0.0329, "step": 6670 }, { "epoch": 0.73, "grad_norm": 0.286495889744892, "learning_rate": 7.044610203621787e-06, "loss": 0.0367, "step": 6671 }, { "epoch": 0.73, "grad_norm": 0.2955555077466014, "learning_rate": 7.0391912325481e-06, "loss": 0.045, "step": 6672 }, { "epoch": 0.73, "grad_norm": 0.3717115780433946, "learning_rate": 7.033773901360652e-06, "loss": 0.0589, "step": 6673 }, { "epoch": 0.73, "grad_norm": 0.2947748209158103, "learning_rate": 7.028358210744881e-06, "loss": 0.0533, "step": 6674 }, { "epoch": 0.73, "grad_norm": 0.21997114344477642, "learning_rate": 7.022944161386023e-06, "loss": 0.03, "step": 6675 }, { "epoch": 0.73, "grad_norm": 0.2279474988579471, "learning_rate": 7.017531753969098e-06, "loss": 0.0332, "step": 6676 }, { "epoch": 0.73, "grad_norm": 0.2267305310091527, "learning_rate": 7.01212098917891e-06, "loss": 0.0322, "step": 6677 }, { "epoch": 0.73, "grad_norm": 0.34418955789521966, "learning_rate": 7.006711867700069e-06, "loss": 0.0489, "step": 6678 }, { "epoch": 0.73, "grad_norm": 0.21267668447198487, "learning_rate": 7.0013043902169745e-06, "loss": 0.0366, "step": 6679 }, { "epoch": 0.73, "grad_norm": 0.2698602741353036, "learning_rate": 6.995898557413823e-06, "loss": 0.0341, "step": 6680 }, { "epoch": 0.73, "grad_norm": 0.22272471998062027, "learning_rate": 6.990494369974579e-06, "loss": 0.0356, "step": 6681 }, { "epoch": 0.73, "grad_norm": 0.2769559378541649, "learning_rate": 6.985091828583024e-06, "loss": 0.0474, "step": 6682 }, { "epoch": 0.73, "grad_norm": 0.2394360948514919, "learning_rate": 6.979690933922725e-06, "loss": 0.0461, "step": 6683 }, { "epoch": 0.73, "grad_norm": 0.2510101355276847, "learning_rate": 6.974291686677035e-06, "loss": 0.0471, "step": 6684 }, { "epoch": 0.73, "grad_norm": 0.30603822226865846, "learning_rate": 6.968894087529103e-06, "loss": 0.0511, "step": 6685 }, { "epoch": 0.73, "grad_norm": 0.22525735349538595, "learning_rate": 6.96349813716187e-06, "loss": 0.0283, "step": 6686 }, { "epoch": 0.73, "grad_norm": 0.27284032461279195, "learning_rate": 6.958103836258074e-06, "loss": 0.0339, "step": 6687 }, { "epoch": 0.73, "grad_norm": 0.17913651993611, "learning_rate": 6.952711185500223e-06, "loss": 0.0324, "step": 6688 }, { "epoch": 0.73, "grad_norm": 0.21817188846550117, "learning_rate": 6.947320185570634e-06, "loss": 0.0245, "step": 6689 }, { "epoch": 0.73, "grad_norm": 0.25154878270914904, "learning_rate": 6.941930837151416e-06, "loss": 0.0282, "step": 6690 }, { "epoch": 0.73, "grad_norm": 0.22785372117810418, "learning_rate": 6.93654314092447e-06, "loss": 0.0356, "step": 6691 }, { "epoch": 0.73, "grad_norm": 0.1968834545954086, "learning_rate": 6.931157097571468e-06, "loss": 0.033, "step": 6692 }, { "epoch": 0.74, "grad_norm": 0.21011434761279868, "learning_rate": 6.925772707773894e-06, "loss": 0.0316, "step": 6693 }, { "epoch": 0.74, "grad_norm": 0.27499520882854395, "learning_rate": 6.920389972213017e-06, "loss": 0.0492, "step": 6694 }, { "epoch": 0.74, "grad_norm": 0.33057321290709657, "learning_rate": 6.915008891569906e-06, "loss": 0.0487, "step": 6695 }, { "epoch": 0.74, "grad_norm": 0.17008143639484036, "learning_rate": 6.909629466525389e-06, "loss": 0.0315, "step": 6696 }, { "epoch": 0.74, "grad_norm": 0.24153922594375032, "learning_rate": 6.90425169776012e-06, "loss": 0.033, "step": 6697 }, { "epoch": 0.74, "grad_norm": 0.3175985805966503, "learning_rate": 6.898875585954527e-06, "loss": 0.0411, "step": 6698 }, { "epoch": 0.74, "grad_norm": 0.25412963430642016, "learning_rate": 6.893501131788838e-06, "loss": 0.0466, "step": 6699 }, { "epoch": 0.74, "grad_norm": 0.4069227736709644, "learning_rate": 6.888128335943041e-06, "loss": 0.0697, "step": 6700 }, { "epoch": 0.74, "grad_norm": 0.33502762689473636, "learning_rate": 6.882757199096961e-06, "loss": 0.0566, "step": 6701 }, { "epoch": 0.74, "grad_norm": 0.3009942526696348, "learning_rate": 6.877387721930182e-06, "loss": 0.0299, "step": 6702 }, { "epoch": 0.74, "grad_norm": 0.3082821999606284, "learning_rate": 6.8720199051220914e-06, "loss": 0.0327, "step": 6703 }, { "epoch": 0.74, "grad_norm": 0.32758929027304423, "learning_rate": 6.866653749351846e-06, "loss": 0.0611, "step": 6704 }, { "epoch": 0.74, "grad_norm": 0.2775734635146077, "learning_rate": 6.861289255298416e-06, "loss": 0.0418, "step": 6705 }, { "epoch": 0.74, "grad_norm": 0.23643962342158065, "learning_rate": 6.855926423640549e-06, "loss": 0.0424, "step": 6706 }, { "epoch": 0.74, "grad_norm": 0.2334515180042888, "learning_rate": 6.850565255056796e-06, "loss": 0.0482, "step": 6707 }, { "epoch": 0.74, "grad_norm": 0.19592635011965912, "learning_rate": 6.84520575022547e-06, "loss": 0.0333, "step": 6708 }, { "epoch": 0.74, "grad_norm": 0.24017503827474826, "learning_rate": 6.839847909824699e-06, "loss": 0.057, "step": 6709 }, { "epoch": 0.74, "grad_norm": 0.21520028441469727, "learning_rate": 6.8344917345323935e-06, "loss": 0.0346, "step": 6710 }, { "epoch": 0.74, "grad_norm": 0.20818196201660227, "learning_rate": 6.829137225026259e-06, "loss": 0.032, "step": 6711 }, { "epoch": 0.74, "grad_norm": 0.2753011544625325, "learning_rate": 6.823784381983764e-06, "loss": 0.0521, "step": 6712 }, { "epoch": 0.74, "grad_norm": 0.23919578838216948, "learning_rate": 6.818433206082198e-06, "loss": 0.0505, "step": 6713 }, { "epoch": 0.74, "grad_norm": 0.26167725476075, "learning_rate": 6.8130836979986236e-06, "loss": 0.0461, "step": 6714 }, { "epoch": 0.74, "grad_norm": 0.232901966902673, "learning_rate": 6.807735858409903e-06, "loss": 0.0417, "step": 6715 }, { "epoch": 0.74, "grad_norm": 0.2567725302747838, "learning_rate": 6.802389687992666e-06, "loss": 0.0353, "step": 6716 }, { "epoch": 0.74, "grad_norm": 0.29739894559114705, "learning_rate": 6.797045187423345e-06, "loss": 0.0342, "step": 6717 }, { "epoch": 0.74, "grad_norm": 0.2604807425188423, "learning_rate": 6.791702357378185e-06, "loss": 0.0457, "step": 6718 }, { "epoch": 0.74, "grad_norm": 0.20898025722546792, "learning_rate": 6.786361198533169e-06, "loss": 0.0407, "step": 6719 }, { "epoch": 0.74, "grad_norm": 0.3361953588497254, "learning_rate": 6.781021711564107e-06, "loss": 0.0513, "step": 6720 }, { "epoch": 0.74, "grad_norm": 0.2077066363393191, "learning_rate": 6.775683897146583e-06, "loss": 0.0369, "step": 6721 }, { "epoch": 0.74, "grad_norm": 0.26035007897059514, "learning_rate": 6.770347755955982e-06, "loss": 0.036, "step": 6722 }, { "epoch": 0.74, "grad_norm": 0.2468894251097828, "learning_rate": 6.7650132886674505e-06, "loss": 0.0468, "step": 6723 }, { "epoch": 0.74, "grad_norm": 0.242480331709689, "learning_rate": 6.7596804959559494e-06, "loss": 0.0248, "step": 6724 }, { "epoch": 0.74, "grad_norm": 0.20534873546840743, "learning_rate": 6.754349378496219e-06, "loss": 0.036, "step": 6725 }, { "epoch": 0.74, "grad_norm": 0.2319250165457605, "learning_rate": 6.749019936962791e-06, "loss": 0.0481, "step": 6726 }, { "epoch": 0.74, "grad_norm": 0.23197156896398202, "learning_rate": 6.7436921720299675e-06, "loss": 0.039, "step": 6727 }, { "epoch": 0.74, "grad_norm": 0.2435435508990161, "learning_rate": 6.7383660843718635e-06, "loss": 0.0441, "step": 6728 }, { "epoch": 0.74, "grad_norm": 0.2929532492033752, "learning_rate": 6.733041674662366e-06, "loss": 0.0494, "step": 6729 }, { "epoch": 0.74, "grad_norm": 0.23325549421044114, "learning_rate": 6.727718943575161e-06, "loss": 0.0354, "step": 6730 }, { "epoch": 0.74, "grad_norm": 0.18902550445706776, "learning_rate": 6.722397891783705e-06, "loss": 0.0254, "step": 6731 }, { "epoch": 0.74, "grad_norm": 0.2602503563386528, "learning_rate": 6.717078519961257e-06, "loss": 0.027, "step": 6732 }, { "epoch": 0.74, "grad_norm": 0.28259134971968575, "learning_rate": 6.711760828780858e-06, "loss": 0.0279, "step": 6733 }, { "epoch": 0.74, "grad_norm": 0.18558818959495804, "learning_rate": 6.706444818915345e-06, "loss": 0.0303, "step": 6734 }, { "epoch": 0.74, "grad_norm": 0.24799531115340948, "learning_rate": 6.701130491037315e-06, "loss": 0.047, "step": 6735 }, { "epoch": 0.74, "grad_norm": 0.26283223721012183, "learning_rate": 6.695817845819188e-06, "loss": 0.0449, "step": 6736 }, { "epoch": 0.74, "grad_norm": 0.35873455532735216, "learning_rate": 6.690506883933159e-06, "loss": 0.0441, "step": 6737 }, { "epoch": 0.74, "grad_norm": 0.20649669119756392, "learning_rate": 6.68519760605119e-06, "loss": 0.028, "step": 6738 }, { "epoch": 0.74, "grad_norm": 0.2489254029227338, "learning_rate": 6.679890012845052e-06, "loss": 0.0219, "step": 6739 }, { "epoch": 0.74, "grad_norm": 0.2496134929287456, "learning_rate": 6.674584104986295e-06, "loss": 0.0548, "step": 6740 }, { "epoch": 0.74, "grad_norm": 0.3020056290275606, "learning_rate": 6.669279883146262e-06, "loss": 0.0451, "step": 6741 }, { "epoch": 0.74, "grad_norm": 0.37669951055533923, "learning_rate": 6.66397734799608e-06, "loss": 0.066, "step": 6742 }, { "epoch": 0.74, "grad_norm": 0.18733059661161258, "learning_rate": 6.658676500206649e-06, "loss": 0.0221, "step": 6743 }, { "epoch": 0.74, "grad_norm": 0.2660315991953571, "learning_rate": 6.653377340448673e-06, "loss": 0.035, "step": 6744 }, { "epoch": 0.74, "grad_norm": 0.22917323572685044, "learning_rate": 6.648079869392634e-06, "loss": 0.0299, "step": 6745 }, { "epoch": 0.74, "grad_norm": 0.23852382881957337, "learning_rate": 6.642784087708814e-06, "loss": 0.0337, "step": 6746 }, { "epoch": 0.74, "grad_norm": 0.19873770435854676, "learning_rate": 6.6374899960672545e-06, "loss": 0.0299, "step": 6747 }, { "epoch": 0.74, "grad_norm": 0.24036038203233998, "learning_rate": 6.6321975951378034e-06, "loss": 0.039, "step": 6748 }, { "epoch": 0.74, "grad_norm": 0.20760325818422112, "learning_rate": 6.626906885590094e-06, "loss": 0.025, "step": 6749 }, { "epoch": 0.74, "grad_norm": 0.20016482981771522, "learning_rate": 6.621617868093544e-06, "loss": 0.034, "step": 6750 }, { "epoch": 0.74, "grad_norm": 0.2665290035117795, "learning_rate": 6.616330543317344e-06, "loss": 0.0349, "step": 6751 }, { "epoch": 0.74, "grad_norm": 0.24545157371854967, "learning_rate": 6.611044911930477e-06, "loss": 0.0471, "step": 6752 }, { "epoch": 0.74, "grad_norm": 0.2778263728365057, "learning_rate": 6.605760974601743e-06, "loss": 0.0453, "step": 6753 }, { "epoch": 0.74, "grad_norm": 0.2038267923518429, "learning_rate": 6.6004787319996714e-06, "loss": 0.0315, "step": 6754 }, { "epoch": 0.74, "grad_norm": 0.1910679805906707, "learning_rate": 6.59519818479262e-06, "loss": 0.0316, "step": 6755 }, { "epoch": 0.74, "grad_norm": 0.37476717158339, "learning_rate": 6.589919333648711e-06, "loss": 0.0565, "step": 6756 }, { "epoch": 0.74, "grad_norm": 0.24757010529067153, "learning_rate": 6.5846421792358735e-06, "loss": 0.0385, "step": 6757 }, { "epoch": 0.74, "grad_norm": 0.23392257734099883, "learning_rate": 6.579366722221789e-06, "loss": 0.053, "step": 6758 }, { "epoch": 0.74, "grad_norm": 0.3667734733810828, "learning_rate": 6.574092963273953e-06, "loss": 0.0543, "step": 6759 }, { "epoch": 0.74, "grad_norm": 0.24177087819697402, "learning_rate": 6.568820903059632e-06, "loss": 0.0354, "step": 6760 }, { "epoch": 0.74, "grad_norm": 0.2415961922593796, "learning_rate": 6.563550542245891e-06, "loss": 0.0379, "step": 6761 }, { "epoch": 0.74, "grad_norm": 0.25252289628093993, "learning_rate": 6.558281881499556e-06, "loss": 0.0516, "step": 6762 }, { "epoch": 0.74, "grad_norm": 0.2411511922505879, "learning_rate": 6.55301492148726e-06, "loss": 0.0393, "step": 6763 }, { "epoch": 0.74, "grad_norm": 0.2495150033992153, "learning_rate": 6.547749662875411e-06, "loss": 0.0448, "step": 6764 }, { "epoch": 0.74, "grad_norm": 0.27672934644885144, "learning_rate": 6.542486106330215e-06, "loss": 0.0311, "step": 6765 }, { "epoch": 0.74, "grad_norm": 0.20908389885757378, "learning_rate": 6.537224252517633e-06, "loss": 0.0296, "step": 6766 }, { "epoch": 0.74, "grad_norm": 0.2125216357352454, "learning_rate": 6.53196410210344e-06, "loss": 0.0328, "step": 6767 }, { "epoch": 0.74, "grad_norm": 0.26519493742436007, "learning_rate": 6.526705655753183e-06, "loss": 0.05, "step": 6768 }, { "epoch": 0.74, "grad_norm": 0.24269239802431805, "learning_rate": 6.521448914132193e-06, "loss": 0.029, "step": 6769 }, { "epoch": 0.74, "grad_norm": 0.20822451948565476, "learning_rate": 6.516193877905592e-06, "loss": 0.0396, "step": 6770 }, { "epoch": 0.74, "grad_norm": 0.37164807328015104, "learning_rate": 6.510940547738278e-06, "loss": 0.0538, "step": 6771 }, { "epoch": 0.74, "grad_norm": 0.238176815410413, "learning_rate": 6.505688924294944e-06, "loss": 0.031, "step": 6772 }, { "epoch": 0.74, "grad_norm": 0.38653026187340817, "learning_rate": 6.500439008240047e-06, "loss": 0.0547, "step": 6773 }, { "epoch": 0.74, "grad_norm": 0.2887829124387146, "learning_rate": 6.495190800237845e-06, "loss": 0.0268, "step": 6774 }, { "epoch": 0.74, "grad_norm": 0.25954762252710073, "learning_rate": 6.48994430095238e-06, "loss": 0.029, "step": 6775 }, { "epoch": 0.74, "grad_norm": 0.1981215744294018, "learning_rate": 6.484699511047474e-06, "loss": 0.0288, "step": 6776 }, { "epoch": 0.74, "grad_norm": 0.21034891521803428, "learning_rate": 6.4794564311867254e-06, "loss": 0.0257, "step": 6777 }, { "epoch": 0.74, "grad_norm": 0.23240921242464563, "learning_rate": 6.474215062033527e-06, "loss": 0.0302, "step": 6778 }, { "epoch": 0.74, "grad_norm": 0.23992311270520944, "learning_rate": 6.468975404251047e-06, "loss": 0.0427, "step": 6779 }, { "epoch": 0.74, "grad_norm": 0.21893798890741215, "learning_rate": 6.463737458502255e-06, "loss": 0.0299, "step": 6780 }, { "epoch": 0.74, "grad_norm": 0.23098809515220292, "learning_rate": 6.458501225449872e-06, "loss": 0.0331, "step": 6781 }, { "epoch": 0.74, "grad_norm": 0.21917623478765833, "learning_rate": 6.453266705756427e-06, "loss": 0.0354, "step": 6782 }, { "epoch": 0.74, "grad_norm": 0.23833755703386872, "learning_rate": 6.448033900084228e-06, "loss": 0.0295, "step": 6783 }, { "epoch": 0.75, "grad_norm": 0.20428044634201836, "learning_rate": 6.442802809095363e-06, "loss": 0.046, "step": 6784 }, { "epoch": 0.75, "grad_norm": 0.2711294447515628, "learning_rate": 6.43757343345171e-06, "loss": 0.0364, "step": 6785 }, { "epoch": 0.75, "grad_norm": 0.215917970820392, "learning_rate": 6.4323457738149034e-06, "loss": 0.0269, "step": 6786 }, { "epoch": 0.75, "grad_norm": 0.3150409275101422, "learning_rate": 6.427119830846402e-06, "loss": 0.0355, "step": 6787 }, { "epoch": 0.75, "grad_norm": 0.19288600429409625, "learning_rate": 6.421895605207427e-06, "loss": 0.0254, "step": 6788 }, { "epoch": 0.75, "grad_norm": 0.1740954732739422, "learning_rate": 6.4166730975589656e-06, "loss": 0.0247, "step": 6789 }, { "epoch": 0.75, "grad_norm": 0.21777521654437285, "learning_rate": 6.41145230856181e-06, "loss": 0.0265, "step": 6790 }, { "epoch": 0.75, "grad_norm": 0.2654947073529259, "learning_rate": 6.406233238876534e-06, "loss": 0.0491, "step": 6791 }, { "epoch": 0.75, "grad_norm": 0.316390778905072, "learning_rate": 6.401015889163489e-06, "loss": 0.0511, "step": 6792 }, { "epoch": 0.75, "grad_norm": 0.2797450091732419, "learning_rate": 6.395800260082796e-06, "loss": 0.0324, "step": 6793 }, { "epoch": 0.75, "grad_norm": 0.22057211202829743, "learning_rate": 6.3905863522943786e-06, "loss": 0.0354, "step": 6794 }, { "epoch": 0.75, "grad_norm": 0.27117342259111876, "learning_rate": 6.385374166457936e-06, "loss": 0.051, "step": 6795 }, { "epoch": 0.75, "grad_norm": 0.2841852456299777, "learning_rate": 6.380163703232953e-06, "loss": 0.0573, "step": 6796 }, { "epoch": 0.75, "grad_norm": 0.24613588971428838, "learning_rate": 6.3749549632786765e-06, "loss": 0.0433, "step": 6797 }, { "epoch": 0.75, "grad_norm": 0.23222062670386093, "learning_rate": 6.369747947254159e-06, "loss": 0.0335, "step": 6798 }, { "epoch": 0.75, "grad_norm": 0.23216652667635165, "learning_rate": 6.364542655818226e-06, "loss": 0.0324, "step": 6799 }, { "epoch": 0.75, "grad_norm": 0.242540202654977, "learning_rate": 6.35933908962949e-06, "loss": 0.0406, "step": 6800 }, { "epoch": 0.75, "grad_norm": 0.20443812824387148, "learning_rate": 6.35413724934633e-06, "loss": 0.0263, "step": 6801 }, { "epoch": 0.75, "grad_norm": 0.24624244799385803, "learning_rate": 6.348937135626922e-06, "loss": 0.0423, "step": 6802 }, { "epoch": 0.75, "grad_norm": 0.21915284597009768, "learning_rate": 6.343738749129218e-06, "loss": 0.0289, "step": 6803 }, { "epoch": 0.75, "grad_norm": 1.3349522309204238, "learning_rate": 6.338542090510951e-06, "loss": 0.0497, "step": 6804 }, { "epoch": 0.75, "grad_norm": 0.24035537587746944, "learning_rate": 6.333347160429637e-06, "loss": 0.0286, "step": 6805 }, { "epoch": 0.75, "grad_norm": 0.24749989914169568, "learning_rate": 6.328153959542573e-06, "loss": 0.041, "step": 6806 }, { "epoch": 0.75, "grad_norm": 0.31219472279178767, "learning_rate": 6.322962488506843e-06, "loss": 0.0535, "step": 6807 }, { "epoch": 0.75, "grad_norm": 0.22861377531875743, "learning_rate": 6.3177727479792914e-06, "loss": 0.0344, "step": 6808 }, { "epoch": 0.75, "grad_norm": 0.2128419557987818, "learning_rate": 6.312584738616567e-06, "loss": 0.041, "step": 6809 }, { "epoch": 0.75, "grad_norm": 0.25656225032538693, "learning_rate": 6.307398461075091e-06, "loss": 0.0296, "step": 6810 }, { "epoch": 0.75, "grad_norm": 0.21255100353415055, "learning_rate": 6.302213916011069e-06, "loss": 0.0204, "step": 6811 }, { "epoch": 0.75, "grad_norm": 0.26928668052383264, "learning_rate": 6.297031104080471e-06, "loss": 0.0387, "step": 6812 }, { "epoch": 0.75, "grad_norm": 0.24163960193623427, "learning_rate": 6.291850025939068e-06, "loss": 0.052, "step": 6813 }, { "epoch": 0.75, "grad_norm": 0.21072960251219236, "learning_rate": 6.286670682242404e-06, "loss": 0.0196, "step": 6814 }, { "epoch": 0.75, "grad_norm": 0.29053011339935075, "learning_rate": 6.28149307364581e-06, "loss": 0.041, "step": 6815 }, { "epoch": 0.75, "grad_norm": 0.2452281229729745, "learning_rate": 6.276317200804376e-06, "loss": 0.0301, "step": 6816 }, { "epoch": 0.75, "grad_norm": 0.3008340566279486, "learning_rate": 6.2711430643729955e-06, "loss": 0.0473, "step": 6817 }, { "epoch": 0.75, "grad_norm": 0.3223829344889388, "learning_rate": 6.265970665006334e-06, "loss": 0.0498, "step": 6818 }, { "epoch": 0.75, "grad_norm": 0.23962565580366854, "learning_rate": 6.26080000335884e-06, "loss": 0.0324, "step": 6819 }, { "epoch": 0.75, "grad_norm": 0.16584767092333566, "learning_rate": 6.255631080084735e-06, "loss": 0.0258, "step": 6820 }, { "epoch": 0.75, "grad_norm": 0.25293478353072396, "learning_rate": 6.250463895838015e-06, "loss": 0.0307, "step": 6821 }, { "epoch": 0.75, "grad_norm": 0.3810558888051284, "learning_rate": 6.245298451272486e-06, "loss": 0.0626, "step": 6822 }, { "epoch": 0.75, "grad_norm": 0.28154471638666695, "learning_rate": 6.240134747041711e-06, "loss": 0.042, "step": 6823 }, { "epoch": 0.75, "grad_norm": 0.3325632209476083, "learning_rate": 6.234972783799023e-06, "loss": 0.0441, "step": 6824 }, { "epoch": 0.75, "grad_norm": 0.22231711108091096, "learning_rate": 6.229812562197552e-06, "loss": 0.0411, "step": 6825 }, { "epoch": 0.75, "grad_norm": 0.27645581445262735, "learning_rate": 6.224654082890207e-06, "loss": 0.0353, "step": 6826 }, { "epoch": 0.75, "grad_norm": 0.23175364691345224, "learning_rate": 6.2194973465296775e-06, "loss": 0.0297, "step": 6827 }, { "epoch": 0.75, "grad_norm": 0.2889149263357233, "learning_rate": 6.214342353768412e-06, "loss": 0.0455, "step": 6828 }, { "epoch": 0.75, "grad_norm": 0.26965546164605725, "learning_rate": 6.209189105258661e-06, "loss": 0.0543, "step": 6829 }, { "epoch": 0.75, "grad_norm": 0.31113666931941986, "learning_rate": 6.2040376016524506e-06, "loss": 0.0655, "step": 6830 }, { "epoch": 0.75, "grad_norm": 0.2593064085336173, "learning_rate": 6.198887843601587e-06, "loss": 0.0444, "step": 6831 }, { "epoch": 0.75, "grad_norm": 0.2496085231091882, "learning_rate": 6.193739831757637e-06, "loss": 0.0301, "step": 6832 }, { "epoch": 0.75, "grad_norm": 0.34749893315240676, "learning_rate": 6.18859356677197e-06, "loss": 0.0312, "step": 6833 }, { "epoch": 0.75, "grad_norm": 0.26292284786456743, "learning_rate": 6.183449049295722e-06, "loss": 0.0463, "step": 6834 }, { "epoch": 0.75, "grad_norm": 0.21210002211152915, "learning_rate": 6.17830627997982e-06, "loss": 0.0331, "step": 6835 }, { "epoch": 0.75, "grad_norm": 0.21704679981329525, "learning_rate": 6.1731652594749465e-06, "loss": 0.033, "step": 6836 }, { "epoch": 0.75, "grad_norm": 0.2112713573595891, "learning_rate": 6.168025988431585e-06, "loss": 0.0355, "step": 6837 }, { "epoch": 0.75, "grad_norm": 0.21845159442873738, "learning_rate": 6.162888467499988e-06, "loss": 0.0312, "step": 6838 }, { "epoch": 0.75, "grad_norm": 0.26620694533200334, "learning_rate": 6.157752697330186e-06, "loss": 0.0297, "step": 6839 }, { "epoch": 0.75, "grad_norm": 0.2821739966202227, "learning_rate": 6.152618678571996e-06, "loss": 0.0319, "step": 6840 }, { "epoch": 0.75, "grad_norm": 0.22109456079017598, "learning_rate": 6.147486411875004e-06, "loss": 0.0282, "step": 6841 }, { "epoch": 0.75, "grad_norm": 0.616477412961729, "learning_rate": 6.1423558978885836e-06, "loss": 0.1456, "step": 6842 }, { "epoch": 0.75, "grad_norm": 0.22913896689479998, "learning_rate": 6.1372271372618695e-06, "loss": 0.0325, "step": 6843 }, { "epoch": 0.75, "grad_norm": 0.2623221856683833, "learning_rate": 6.1321001306437946e-06, "loss": 0.0369, "step": 6844 }, { "epoch": 0.75, "grad_norm": 0.24793242269557578, "learning_rate": 6.126974878683056e-06, "loss": 0.0336, "step": 6845 }, { "epoch": 0.75, "grad_norm": 0.21941552930290348, "learning_rate": 6.121851382028146e-06, "loss": 0.0374, "step": 6846 }, { "epoch": 0.75, "grad_norm": 0.21167935501075336, "learning_rate": 6.1167296413273084e-06, "loss": 0.0394, "step": 6847 }, { "epoch": 0.75, "grad_norm": 0.2235205589317875, "learning_rate": 6.111609657228581e-06, "loss": 0.044, "step": 6848 }, { "epoch": 0.75, "grad_norm": 0.2676865156290542, "learning_rate": 6.106491430379784e-06, "loss": 0.0431, "step": 6849 }, { "epoch": 0.75, "grad_norm": 0.3225944113413735, "learning_rate": 6.101374961428512e-06, "loss": 0.0325, "step": 6850 }, { "epoch": 0.75, "grad_norm": 0.21392748257104954, "learning_rate": 6.096260251022124e-06, "loss": 0.034, "step": 6851 }, { "epoch": 0.75, "grad_norm": 0.2518758729299559, "learning_rate": 6.091147299807769e-06, "loss": 0.0389, "step": 6852 }, { "epoch": 0.75, "grad_norm": 0.4328833594312295, "learning_rate": 6.086036108432376e-06, "loss": 0.0781, "step": 6853 }, { "epoch": 0.75, "grad_norm": 0.23521230859553557, "learning_rate": 6.080926677542646e-06, "loss": 0.0431, "step": 6854 }, { "epoch": 0.75, "grad_norm": 0.2257676816777061, "learning_rate": 6.075819007785047e-06, "loss": 0.0319, "step": 6855 }, { "epoch": 0.75, "grad_norm": 0.21971528766103066, "learning_rate": 6.070713099805845e-06, "loss": 0.0391, "step": 6856 }, { "epoch": 0.75, "grad_norm": 0.288846902089892, "learning_rate": 6.065608954251074e-06, "loss": 0.0421, "step": 6857 }, { "epoch": 0.75, "grad_norm": 0.19815137502584007, "learning_rate": 6.0605065717665445e-06, "loss": 0.0249, "step": 6858 }, { "epoch": 0.75, "grad_norm": 0.2565631505653599, "learning_rate": 6.0554059529978345e-06, "loss": 0.0342, "step": 6859 }, { "epoch": 0.75, "grad_norm": 0.35445553088280773, "learning_rate": 6.050307098590311e-06, "loss": 0.0559, "step": 6860 }, { "epoch": 0.75, "grad_norm": 0.19227802501685104, "learning_rate": 6.045210009189117e-06, "loss": 0.0388, "step": 6861 }, { "epoch": 0.75, "grad_norm": 0.2040952106564328, "learning_rate": 6.040114685439175e-06, "loss": 0.0231, "step": 6862 }, { "epoch": 0.75, "grad_norm": 0.24997459222905485, "learning_rate": 6.035021127985165e-06, "loss": 0.0363, "step": 6863 }, { "epoch": 0.75, "grad_norm": 0.23336590655917344, "learning_rate": 6.029929337471565e-06, "loss": 0.0215, "step": 6864 }, { "epoch": 0.75, "grad_norm": 0.26287510898368344, "learning_rate": 6.024839314542621e-06, "loss": 0.0547, "step": 6865 }, { "epoch": 0.75, "grad_norm": 0.19023282561562238, "learning_rate": 6.019751059842362e-06, "loss": 0.025, "step": 6866 }, { "epoch": 0.75, "grad_norm": 0.22636640217787196, "learning_rate": 6.014664574014575e-06, "loss": 0.029, "step": 6867 }, { "epoch": 0.75, "grad_norm": 0.3853410784169599, "learning_rate": 6.009579857702843e-06, "loss": 0.0381, "step": 6868 }, { "epoch": 0.75, "grad_norm": 0.2615681827205863, "learning_rate": 6.004496911550512e-06, "loss": 0.037, "step": 6869 }, { "epoch": 0.75, "grad_norm": 0.23539204071307943, "learning_rate": 5.999415736200724e-06, "loss": 0.0368, "step": 6870 }, { "epoch": 0.75, "grad_norm": 0.24217109470794276, "learning_rate": 5.9943363322963646e-06, "loss": 0.0376, "step": 6871 }, { "epoch": 0.75, "grad_norm": 0.3303204611808623, "learning_rate": 5.98925870048012e-06, "loss": 0.0419, "step": 6872 }, { "epoch": 0.75, "grad_norm": 0.29619485341829044, "learning_rate": 5.984182841394448e-06, "loss": 0.0384, "step": 6873 }, { "epoch": 0.75, "grad_norm": 0.4046545980032232, "learning_rate": 5.979108755681575e-06, "loss": 0.0628, "step": 6874 }, { "epoch": 0.75, "grad_norm": 0.23116407731271563, "learning_rate": 5.974036443983513e-06, "loss": 0.0447, "step": 6875 }, { "epoch": 0.76, "grad_norm": 0.32092446507816974, "learning_rate": 5.968965906942039e-06, "loss": 0.0434, "step": 6876 }, { "epoch": 0.76, "grad_norm": 0.24333309135740694, "learning_rate": 5.963897145198721e-06, "loss": 0.0349, "step": 6877 }, { "epoch": 0.76, "grad_norm": 0.2118458088595964, "learning_rate": 5.958830159394875e-06, "loss": 0.0367, "step": 6878 }, { "epoch": 0.76, "grad_norm": 0.21557485854374153, "learning_rate": 5.953764950171619e-06, "loss": 0.029, "step": 6879 }, { "epoch": 0.76, "grad_norm": 0.21661123283407868, "learning_rate": 5.948701518169835e-06, "loss": 0.0505, "step": 6880 }, { "epoch": 0.76, "grad_norm": 0.20379084796353694, "learning_rate": 5.943639864030188e-06, "loss": 0.049, "step": 6881 }, { "epoch": 0.76, "grad_norm": 0.2704629964233051, "learning_rate": 5.938579988393099e-06, "loss": 0.0367, "step": 6882 }, { "epoch": 0.76, "grad_norm": 0.22877641615224523, "learning_rate": 5.9335218918987815e-06, "loss": 0.0415, "step": 6883 }, { "epoch": 0.76, "grad_norm": 0.1880715718234911, "learning_rate": 5.928465575187221e-06, "loss": 0.035, "step": 6884 }, { "epoch": 0.76, "grad_norm": 0.26952019267966726, "learning_rate": 5.923411038898181e-06, "loss": 0.0394, "step": 6885 }, { "epoch": 0.76, "grad_norm": 0.24459876752361198, "learning_rate": 5.918358283671182e-06, "loss": 0.0607, "step": 6886 }, { "epoch": 0.76, "grad_norm": 0.23928127563026055, "learning_rate": 5.9133073101455375e-06, "loss": 0.046, "step": 6887 }, { "epoch": 0.76, "grad_norm": 0.21356373862816366, "learning_rate": 5.90825811896033e-06, "loss": 0.0413, "step": 6888 }, { "epoch": 0.76, "grad_norm": 0.36327952722940804, "learning_rate": 5.903210710754423e-06, "loss": 0.0465, "step": 6889 }, { "epoch": 0.76, "grad_norm": 0.3333922597338471, "learning_rate": 5.89816508616643e-06, "loss": 0.0549, "step": 6890 }, { "epoch": 0.76, "grad_norm": 0.24586662529072634, "learning_rate": 5.8931212458347716e-06, "loss": 0.0618, "step": 6891 }, { "epoch": 0.76, "grad_norm": 0.2233143917471241, "learning_rate": 5.888079190397628e-06, "loss": 0.0312, "step": 6892 }, { "epoch": 0.76, "grad_norm": 0.22086511422989913, "learning_rate": 5.883038920492945e-06, "loss": 0.0378, "step": 6893 }, { "epoch": 0.76, "grad_norm": 0.22993504425919062, "learning_rate": 5.878000436758453e-06, "loss": 0.0201, "step": 6894 }, { "epoch": 0.76, "grad_norm": 0.21778264047652549, "learning_rate": 5.8729637398316544e-06, "loss": 0.0316, "step": 6895 }, { "epoch": 0.76, "grad_norm": 0.21002404794773494, "learning_rate": 5.86792883034983e-06, "loss": 0.0217, "step": 6896 }, { "epoch": 0.76, "grad_norm": 0.2520829982334989, "learning_rate": 5.862895708950021e-06, "loss": 0.0382, "step": 6897 }, { "epoch": 0.76, "grad_norm": 0.21394714184742136, "learning_rate": 5.857864376269051e-06, "loss": 0.0279, "step": 6898 }, { "epoch": 0.76, "grad_norm": 0.29743431758681066, "learning_rate": 5.852834832943524e-06, "loss": 0.0396, "step": 6899 }, { "epoch": 0.76, "grad_norm": 0.21811396229315153, "learning_rate": 5.847807079609804e-06, "loss": 0.0343, "step": 6900 }, { "epoch": 0.76, "grad_norm": 0.20649080993174362, "learning_rate": 5.842781116904046e-06, "loss": 0.0395, "step": 6901 }, { "epoch": 0.76, "grad_norm": 0.2129150341634506, "learning_rate": 5.837756945462154e-06, "loss": 0.0295, "step": 6902 }, { "epoch": 0.76, "grad_norm": 0.25851569942635844, "learning_rate": 5.832734565919826e-06, "loss": 0.0325, "step": 6903 }, { "epoch": 0.76, "grad_norm": 0.2281988018009029, "learning_rate": 5.827713978912524e-06, "loss": 0.0268, "step": 6904 }, { "epoch": 0.76, "grad_norm": 0.21300050542104332, "learning_rate": 5.8226951850754935e-06, "loss": 0.0536, "step": 6905 }, { "epoch": 0.76, "grad_norm": 0.16063120244626866, "learning_rate": 5.817678185043733e-06, "loss": 0.0302, "step": 6906 }, { "epoch": 0.76, "grad_norm": 0.2316560067896362, "learning_rate": 5.812662979452024e-06, "loss": 0.0327, "step": 6907 }, { "epoch": 0.76, "grad_norm": 0.21599583802474104, "learning_rate": 5.807649568934945e-06, "loss": 0.0401, "step": 6908 }, { "epoch": 0.76, "grad_norm": 0.2311583643501086, "learning_rate": 5.802637954126804e-06, "loss": 0.0275, "step": 6909 }, { "epoch": 0.76, "grad_norm": 0.2563924841059302, "learning_rate": 5.79762813566171e-06, "loss": 0.0435, "step": 6910 }, { "epoch": 0.76, "grad_norm": 0.2608357379551629, "learning_rate": 5.792620114173539e-06, "loss": 0.03, "step": 6911 }, { "epoch": 0.76, "grad_norm": 0.23836058718542835, "learning_rate": 5.7876138902959445e-06, "loss": 0.0402, "step": 6912 }, { "epoch": 0.76, "grad_norm": 0.22964789607059966, "learning_rate": 5.782609464662334e-06, "loss": 0.0364, "step": 6913 }, { "epoch": 0.76, "grad_norm": 0.30840909237662617, "learning_rate": 5.777606837905905e-06, "loss": 0.0553, "step": 6914 }, { "epoch": 0.76, "grad_norm": 0.21881703643868794, "learning_rate": 5.772606010659625e-06, "loss": 0.0404, "step": 6915 }, { "epoch": 0.76, "grad_norm": 0.17035636490086253, "learning_rate": 5.767606983556237e-06, "loss": 0.0309, "step": 6916 }, { "epoch": 0.76, "grad_norm": 0.21178796418067145, "learning_rate": 5.762609757228237e-06, "loss": 0.0299, "step": 6917 }, { "epoch": 0.76, "grad_norm": 0.2579718010928188, "learning_rate": 5.757614332307912e-06, "loss": 0.0375, "step": 6918 }, { "epoch": 0.76, "grad_norm": 0.22600900520997128, "learning_rate": 5.752620709427322e-06, "loss": 0.0439, "step": 6919 }, { "epoch": 0.76, "grad_norm": 0.24079720540191418, "learning_rate": 5.7476288892182905e-06, "loss": 0.0489, "step": 6920 }, { "epoch": 0.76, "grad_norm": 0.2581555944604589, "learning_rate": 5.742638872312407e-06, "loss": 0.0369, "step": 6921 }, { "epoch": 0.76, "grad_norm": 0.29413186930356994, "learning_rate": 5.737650659341048e-06, "loss": 0.0406, "step": 6922 }, { "epoch": 0.76, "grad_norm": 0.18655045797950764, "learning_rate": 5.732664250935354e-06, "loss": 0.0298, "step": 6923 }, { "epoch": 0.76, "grad_norm": 0.23384817040677708, "learning_rate": 5.7276796477262365e-06, "loss": 0.0449, "step": 6924 }, { "epoch": 0.76, "grad_norm": 0.2033207573643049, "learning_rate": 5.72269685034438e-06, "loss": 0.02, "step": 6925 }, { "epoch": 0.76, "grad_norm": 0.21485899957808957, "learning_rate": 5.717715859420246e-06, "loss": 0.0329, "step": 6926 }, { "epoch": 0.76, "grad_norm": 0.15651580731972609, "learning_rate": 5.712736675584061e-06, "loss": 0.0238, "step": 6927 }, { "epoch": 0.76, "grad_norm": 0.1968726123937514, "learning_rate": 5.707759299465816e-06, "loss": 0.0205, "step": 6928 }, { "epoch": 0.76, "grad_norm": 0.24112351566933618, "learning_rate": 5.702783731695285e-06, "loss": 0.0379, "step": 6929 }, { "epoch": 0.76, "grad_norm": 0.18789883752270922, "learning_rate": 5.6978099729020105e-06, "loss": 0.032, "step": 6930 }, { "epoch": 0.76, "grad_norm": 0.19796030082225394, "learning_rate": 5.692838023715313e-06, "loss": 0.0254, "step": 6931 }, { "epoch": 0.76, "grad_norm": 0.19897116441611334, "learning_rate": 5.68786788476426e-06, "loss": 0.031, "step": 6932 }, { "epoch": 0.76, "grad_norm": 0.2563013089380243, "learning_rate": 5.682899556677714e-06, "loss": 0.0358, "step": 6933 }, { "epoch": 0.76, "grad_norm": 0.17442135363094818, "learning_rate": 5.6779330400843e-06, "loss": 0.025, "step": 6934 }, { "epoch": 0.76, "grad_norm": 0.2752223887501325, "learning_rate": 5.672968335612421e-06, "loss": 0.0327, "step": 6935 }, { "epoch": 0.76, "grad_norm": 0.21292662534889523, "learning_rate": 5.66800544389023e-06, "loss": 0.0276, "step": 6936 }, { "epoch": 0.76, "grad_norm": 0.2636235369419687, "learning_rate": 5.663044365545674e-06, "loss": 0.063, "step": 6937 }, { "epoch": 0.76, "grad_norm": 0.23667289868956498, "learning_rate": 5.658085101206456e-06, "loss": 0.0415, "step": 6938 }, { "epoch": 0.76, "grad_norm": 0.24502593693849398, "learning_rate": 5.653127651500065e-06, "loss": 0.0296, "step": 6939 }, { "epoch": 0.76, "grad_norm": 0.19311461707786928, "learning_rate": 5.648172017053737e-06, "loss": 0.0347, "step": 6940 }, { "epoch": 0.76, "grad_norm": 0.4783905177155054, "learning_rate": 5.643218198494493e-06, "loss": 0.0885, "step": 6941 }, { "epoch": 0.76, "grad_norm": 0.2429590241672004, "learning_rate": 5.638266196449123e-06, "loss": 0.0344, "step": 6942 }, { "epoch": 0.76, "grad_norm": 0.21213756149191748, "learning_rate": 5.6333160115442005e-06, "loss": 0.0312, "step": 6943 }, { "epoch": 0.76, "grad_norm": 0.17742884228765268, "learning_rate": 5.628367644406039e-06, "loss": 0.0207, "step": 6944 }, { "epoch": 0.76, "grad_norm": 0.2854125817324459, "learning_rate": 5.6234210956607435e-06, "loss": 0.0467, "step": 6945 }, { "epoch": 0.76, "grad_norm": 0.29586945445315455, "learning_rate": 5.618476365934184e-06, "loss": 0.0319, "step": 6946 }, { "epoch": 0.76, "grad_norm": 0.22614443782221783, "learning_rate": 5.613533455852005e-06, "loss": 0.0544, "step": 6947 }, { "epoch": 0.76, "grad_norm": 0.2528457207655395, "learning_rate": 5.608592366039607e-06, "loss": 0.0389, "step": 6948 }, { "epoch": 0.76, "grad_norm": 0.2840851039726853, "learning_rate": 5.60365309712217e-06, "loss": 0.0448, "step": 6949 }, { "epoch": 0.76, "grad_norm": 0.16087403759079388, "learning_rate": 5.598715649724647e-06, "loss": 0.0245, "step": 6950 }, { "epoch": 0.76, "grad_norm": 0.20304726066714182, "learning_rate": 5.593780024471758e-06, "loss": 0.0289, "step": 6951 }, { "epoch": 0.76, "grad_norm": 0.22567431052381426, "learning_rate": 5.588846221987982e-06, "loss": 0.0493, "step": 6952 }, { "epoch": 0.76, "grad_norm": 0.16442153450556243, "learning_rate": 5.583914242897581e-06, "loss": 0.0214, "step": 6953 }, { "epoch": 0.76, "grad_norm": 0.2316113612713826, "learning_rate": 5.578984087824581e-06, "loss": 0.0273, "step": 6954 }, { "epoch": 0.76, "grad_norm": 0.19527171662822493, "learning_rate": 5.574055757392787e-06, "loss": 0.0199, "step": 6955 }, { "epoch": 0.76, "grad_norm": 0.30821959834681634, "learning_rate": 5.569129252225745e-06, "loss": 0.0423, "step": 6956 }, { "epoch": 0.76, "grad_norm": 0.21358375405932348, "learning_rate": 5.564204572946797e-06, "loss": 0.0286, "step": 6957 }, { "epoch": 0.76, "grad_norm": 0.2752489983828872, "learning_rate": 5.559281720179046e-06, "loss": 0.0276, "step": 6958 }, { "epoch": 0.76, "grad_norm": 0.21643923566597623, "learning_rate": 5.554360694545367e-06, "loss": 0.0395, "step": 6959 }, { "epoch": 0.76, "grad_norm": 0.2919627911624011, "learning_rate": 5.549441496668393e-06, "loss": 0.0476, "step": 6960 }, { "epoch": 0.76, "grad_norm": 0.22633794640117683, "learning_rate": 5.544524127170541e-06, "loss": 0.0462, "step": 6961 }, { "epoch": 0.76, "grad_norm": 0.1900146362822263, "learning_rate": 5.539608586673988e-06, "loss": 0.0258, "step": 6962 }, { "epoch": 0.76, "grad_norm": 0.20311661849702797, "learning_rate": 5.534694875800675e-06, "loss": 0.025, "step": 6963 }, { "epoch": 0.76, "grad_norm": 0.26763514447217246, "learning_rate": 5.529782995172315e-06, "loss": 0.0348, "step": 6964 }, { "epoch": 0.76, "grad_norm": 0.3062852391579767, "learning_rate": 5.5248729454103975e-06, "loss": 0.041, "step": 6965 }, { "epoch": 0.76, "grad_norm": 0.20943061033167892, "learning_rate": 5.519964727136178e-06, "loss": 0.0326, "step": 6966 }, { "epoch": 0.77, "grad_norm": 0.22073272911630623, "learning_rate": 5.515058340970665e-06, "loss": 0.0202, "step": 6967 }, { "epoch": 0.77, "grad_norm": 0.20566803691968882, "learning_rate": 5.510153787534651e-06, "loss": 0.0264, "step": 6968 }, { "epoch": 0.77, "grad_norm": 0.2464246946650871, "learning_rate": 5.505251067448694e-06, "loss": 0.0409, "step": 6969 }, { "epoch": 0.77, "grad_norm": 0.2604281645461818, "learning_rate": 5.500350181333121e-06, "loss": 0.0353, "step": 6970 }, { "epoch": 0.77, "grad_norm": 0.19740877528445158, "learning_rate": 5.495451129808016e-06, "loss": 0.0256, "step": 6971 }, { "epoch": 0.77, "grad_norm": 0.19082865857835518, "learning_rate": 5.490553913493242e-06, "loss": 0.025, "step": 6972 }, { "epoch": 0.77, "grad_norm": 0.2976076847416356, "learning_rate": 5.4856585330084315e-06, "loss": 0.0456, "step": 6973 }, { "epoch": 0.77, "grad_norm": 0.3182344367011818, "learning_rate": 5.48076498897298e-06, "loss": 0.0366, "step": 6974 }, { "epoch": 0.77, "grad_norm": 0.3705311576313587, "learning_rate": 5.475873282006041e-06, "loss": 0.039, "step": 6975 }, { "epoch": 0.77, "grad_norm": 0.2187700262439578, "learning_rate": 5.470983412726547e-06, "loss": 0.0277, "step": 6976 }, { "epoch": 0.77, "grad_norm": 0.22780393974223076, "learning_rate": 5.466095381753207e-06, "loss": 0.0281, "step": 6977 }, { "epoch": 0.77, "grad_norm": 0.1866740672065089, "learning_rate": 5.461209189704486e-06, "loss": 0.0191, "step": 6978 }, { "epoch": 0.77, "grad_norm": 0.2198423631484719, "learning_rate": 5.4563248371986055e-06, "loss": 0.0273, "step": 6979 }, { "epoch": 0.77, "grad_norm": 0.2563586278111561, "learning_rate": 5.451442324853571e-06, "loss": 0.0426, "step": 6980 }, { "epoch": 0.77, "grad_norm": 0.21093825940595903, "learning_rate": 5.446561653287152e-06, "loss": 0.0277, "step": 6981 }, { "epoch": 0.77, "grad_norm": 0.2056065699106746, "learning_rate": 5.441682823116887e-06, "loss": 0.0296, "step": 6982 }, { "epoch": 0.77, "grad_norm": 0.2382169933917382, "learning_rate": 5.436805834960066e-06, "loss": 0.034, "step": 6983 }, { "epoch": 0.77, "grad_norm": 0.24752212948076216, "learning_rate": 5.431930689433762e-06, "loss": 0.0282, "step": 6984 }, { "epoch": 0.77, "grad_norm": 0.2999169453252791, "learning_rate": 5.4270573871548145e-06, "loss": 0.0305, "step": 6985 }, { "epoch": 0.77, "grad_norm": 0.2500953939351185, "learning_rate": 5.422185928739827e-06, "loss": 0.0381, "step": 6986 }, { "epoch": 0.77, "grad_norm": 0.18438252983234843, "learning_rate": 5.4173163148051585e-06, "loss": 0.0292, "step": 6987 }, { "epoch": 0.77, "grad_norm": 0.211526023236054, "learning_rate": 5.4124485459669485e-06, "loss": 0.0305, "step": 6988 }, { "epoch": 0.77, "grad_norm": 0.19899179583319965, "learning_rate": 5.407582622841103e-06, "loss": 0.0305, "step": 6989 }, { "epoch": 0.77, "grad_norm": 0.25614371770600686, "learning_rate": 5.402718546043293e-06, "loss": 0.0269, "step": 6990 }, { "epoch": 0.77, "grad_norm": 0.22575712108047447, "learning_rate": 5.397856316188941e-06, "loss": 0.0256, "step": 6991 }, { "epoch": 0.77, "grad_norm": 0.24717336322469746, "learning_rate": 5.392995933893255e-06, "loss": 0.036, "step": 6992 }, { "epoch": 0.77, "grad_norm": 0.2919907330633065, "learning_rate": 5.388137399771203e-06, "loss": 0.0313, "step": 6993 }, { "epoch": 0.77, "grad_norm": 0.25249117687125416, "learning_rate": 5.383280714437518e-06, "loss": 0.0311, "step": 6994 }, { "epoch": 0.77, "grad_norm": 0.24682961300225534, "learning_rate": 5.378425878506699e-06, "loss": 0.0407, "step": 6995 }, { "epoch": 0.77, "grad_norm": 0.21672774406917658, "learning_rate": 5.373572892593013e-06, "loss": 0.0471, "step": 6996 }, { "epoch": 0.77, "grad_norm": 0.2527252166126367, "learning_rate": 5.368721757310498e-06, "loss": 0.0303, "step": 6997 }, { "epoch": 0.77, "grad_norm": 0.19257475867433962, "learning_rate": 5.363872473272935e-06, "loss": 0.0218, "step": 6998 }, { "epoch": 0.77, "grad_norm": 0.2320130463083775, "learning_rate": 5.359025041093897e-06, "loss": 0.0324, "step": 6999 }, { "epoch": 0.77, "grad_norm": 0.21244071526155064, "learning_rate": 5.354179461386712e-06, "loss": 0.0492, "step": 7000 }, { "epoch": 0.77, "grad_norm": 0.5452902037863576, "learning_rate": 5.349335734764483e-06, "loss": 0.0808, "step": 7001 }, { "epoch": 0.77, "grad_norm": 0.3080990362931693, "learning_rate": 5.3444938618400524e-06, "loss": 0.0396, "step": 7002 }, { "epoch": 0.77, "grad_norm": 0.18269649354342107, "learning_rate": 5.3396538432260556e-06, "loss": 0.0183, "step": 7003 }, { "epoch": 0.77, "grad_norm": 0.318177504750219, "learning_rate": 5.334815679534882e-06, "loss": 0.0389, "step": 7004 }, { "epoch": 0.77, "grad_norm": 0.2037615522081898, "learning_rate": 5.329979371378693e-06, "loss": 0.0328, "step": 7005 }, { "epoch": 0.77, "grad_norm": 0.21685994579713638, "learning_rate": 5.325144919369398e-06, "loss": 0.0337, "step": 7006 }, { "epoch": 0.77, "grad_norm": 0.26275105221825307, "learning_rate": 5.320312324118693e-06, "loss": 0.0387, "step": 7007 }, { "epoch": 0.77, "grad_norm": 0.2178134795868791, "learning_rate": 5.315481586238025e-06, "loss": 0.0208, "step": 7008 }, { "epoch": 0.77, "grad_norm": 0.28813787735650404, "learning_rate": 5.310652706338619e-06, "loss": 0.0398, "step": 7009 }, { "epoch": 0.77, "grad_norm": 0.257843969331823, "learning_rate": 5.305825685031445e-06, "loss": 0.0373, "step": 7010 }, { "epoch": 0.77, "grad_norm": 0.2546008073665107, "learning_rate": 5.301000522927247e-06, "loss": 0.0306, "step": 7011 }, { "epoch": 0.77, "grad_norm": 0.1940067512797477, "learning_rate": 5.296177220636556e-06, "loss": 0.0292, "step": 7012 }, { "epoch": 0.77, "grad_norm": 0.21660280562681306, "learning_rate": 5.291355778769629e-06, "loss": 0.04, "step": 7013 }, { "epoch": 0.77, "grad_norm": 0.22972912022049433, "learning_rate": 5.286536197936512e-06, "loss": 0.0436, "step": 7014 }, { "epoch": 0.77, "grad_norm": 0.20096376450609368, "learning_rate": 5.281718478747009e-06, "loss": 0.0279, "step": 7015 }, { "epoch": 0.77, "grad_norm": 0.33209048472082764, "learning_rate": 5.276902621810691e-06, "loss": 0.0487, "step": 7016 }, { "epoch": 0.77, "grad_norm": 0.2586955495339104, "learning_rate": 5.272088627736898e-06, "loss": 0.0325, "step": 7017 }, { "epoch": 0.77, "grad_norm": 0.28198918586313926, "learning_rate": 5.267276497134715e-06, "loss": 0.0398, "step": 7018 }, { "epoch": 0.77, "grad_norm": 0.24910528296554205, "learning_rate": 5.26246623061301e-06, "loss": 0.0323, "step": 7019 }, { "epoch": 0.77, "grad_norm": 0.23293844886895632, "learning_rate": 5.257657828780409e-06, "loss": 0.0393, "step": 7020 }, { "epoch": 0.77, "grad_norm": 0.2570166731530166, "learning_rate": 5.252851292245311e-06, "loss": 0.0396, "step": 7021 }, { "epoch": 0.77, "grad_norm": 0.2495247376511187, "learning_rate": 5.2480466216158565e-06, "loss": 0.0458, "step": 7022 }, { "epoch": 0.77, "grad_norm": 0.26271737725716104, "learning_rate": 5.243243817499968e-06, "loss": 0.0366, "step": 7023 }, { "epoch": 0.77, "grad_norm": 0.26338079421427896, "learning_rate": 5.2384428805053325e-06, "loss": 0.0389, "step": 7024 }, { "epoch": 0.77, "grad_norm": 0.23889113374020046, "learning_rate": 5.233643811239395e-06, "loss": 0.0463, "step": 7025 }, { "epoch": 0.77, "grad_norm": 0.26312777576658714, "learning_rate": 5.228846610309359e-06, "loss": 0.0421, "step": 7026 }, { "epoch": 0.77, "grad_norm": 0.3745738429301858, "learning_rate": 5.224051278322204e-06, "loss": 0.0489, "step": 7027 }, { "epoch": 0.77, "grad_norm": 0.28799493936556236, "learning_rate": 5.219257815884662e-06, "loss": 0.038, "step": 7028 }, { "epoch": 0.77, "grad_norm": 0.24428743144716925, "learning_rate": 5.214466223603238e-06, "loss": 0.0309, "step": 7029 }, { "epoch": 0.77, "grad_norm": 0.28104745671326653, "learning_rate": 5.209676502084191e-06, "loss": 0.0484, "step": 7030 }, { "epoch": 0.77, "grad_norm": 0.26035724756740336, "learning_rate": 5.204888651933553e-06, "loss": 0.0383, "step": 7031 }, { "epoch": 0.77, "grad_norm": 0.26880693553370694, "learning_rate": 5.200102673757115e-06, "loss": 0.0453, "step": 7032 }, { "epoch": 0.77, "grad_norm": 0.14152478734677498, "learning_rate": 5.195318568160421e-06, "loss": 0.0137, "step": 7033 }, { "epoch": 0.77, "grad_norm": 0.25012842992216233, "learning_rate": 5.190536335748792e-06, "loss": 0.0411, "step": 7034 }, { "epoch": 0.77, "grad_norm": 0.3170977441985976, "learning_rate": 5.185755977127311e-06, "loss": 0.0536, "step": 7035 }, { "epoch": 0.77, "grad_norm": 0.25279435473420747, "learning_rate": 5.180977492900823e-06, "loss": 0.0277, "step": 7036 }, { "epoch": 0.77, "grad_norm": 0.2353987587106853, "learning_rate": 5.1762008836739185e-06, "loss": 0.0498, "step": 7037 }, { "epoch": 0.77, "grad_norm": 0.24286597338142374, "learning_rate": 5.171426150050977e-06, "loss": 0.0299, "step": 7038 }, { "epoch": 0.77, "grad_norm": 0.21200146608587972, "learning_rate": 5.1666532926361255e-06, "loss": 0.0479, "step": 7039 }, { "epoch": 0.77, "grad_norm": 0.2996070160455856, "learning_rate": 5.161882312033264e-06, "loss": 0.0633, "step": 7040 }, { "epoch": 0.77, "grad_norm": 0.2505240015211575, "learning_rate": 5.157113208846037e-06, "loss": 0.0296, "step": 7041 }, { "epoch": 0.77, "grad_norm": 0.23581899230900907, "learning_rate": 5.152345983677866e-06, "loss": 0.0226, "step": 7042 }, { "epoch": 0.77, "grad_norm": 0.2478916929325985, "learning_rate": 5.147580637131935e-06, "loss": 0.0394, "step": 7043 }, { "epoch": 0.77, "grad_norm": 0.17358674209485858, "learning_rate": 5.142817169811189e-06, "loss": 0.0248, "step": 7044 }, { "epoch": 0.77, "grad_norm": 0.20393352801096296, "learning_rate": 5.13805558231832e-06, "loss": 0.0348, "step": 7045 }, { "epoch": 0.77, "grad_norm": 0.19399537384463117, "learning_rate": 5.133295875255808e-06, "loss": 0.0184, "step": 7046 }, { "epoch": 0.77, "grad_norm": 0.18247770398607657, "learning_rate": 5.128538049225884e-06, "loss": 0.0293, "step": 7047 }, { "epoch": 0.77, "grad_norm": 0.20700894541519846, "learning_rate": 5.1237821048305305e-06, "loss": 0.0343, "step": 7048 }, { "epoch": 0.77, "grad_norm": 0.16088415669196585, "learning_rate": 5.119028042671501e-06, "loss": 0.0247, "step": 7049 }, { "epoch": 0.77, "grad_norm": 0.25981119659625457, "learning_rate": 5.114275863350313e-06, "loss": 0.0336, "step": 7050 }, { "epoch": 0.77, "grad_norm": 0.3039939395926302, "learning_rate": 5.109525567468252e-06, "loss": 0.0322, "step": 7051 }, { "epoch": 0.77, "grad_norm": 0.18998088279342515, "learning_rate": 5.104777155626341e-06, "loss": 0.0291, "step": 7052 }, { "epoch": 0.77, "grad_norm": 0.2860743361990016, "learning_rate": 5.100030628425388e-06, "loss": 0.0489, "step": 7053 }, { "epoch": 0.77, "grad_norm": 0.19724893971497856, "learning_rate": 5.095285986465952e-06, "loss": 0.0282, "step": 7054 }, { "epoch": 0.77, "grad_norm": 0.2687032392763007, "learning_rate": 5.090543230348366e-06, "loss": 0.0331, "step": 7055 }, { "epoch": 0.77, "grad_norm": 0.18303929468301616, "learning_rate": 5.085802360672701e-06, "loss": 0.0259, "step": 7056 }, { "epoch": 0.77, "grad_norm": 0.17444892996131578, "learning_rate": 5.081063378038806e-06, "loss": 0.0171, "step": 7057 }, { "epoch": 0.78, "grad_norm": 0.20331597520524242, "learning_rate": 5.076326283046291e-06, "loss": 0.031, "step": 7058 }, { "epoch": 0.78, "grad_norm": 0.41325724771816824, "learning_rate": 5.0715910762945245e-06, "loss": 0.0599, "step": 7059 }, { "epoch": 0.78, "grad_norm": 0.20221508235962637, "learning_rate": 5.066857758382642e-06, "loss": 0.029, "step": 7060 }, { "epoch": 0.78, "grad_norm": 0.2390238026234169, "learning_rate": 5.062126329909518e-06, "loss": 0.0393, "step": 7061 }, { "epoch": 0.78, "grad_norm": 0.21206172070583498, "learning_rate": 5.057396791473807e-06, "loss": 0.0252, "step": 7062 }, { "epoch": 0.78, "grad_norm": 0.20448550019355807, "learning_rate": 5.05266914367394e-06, "loss": 0.0414, "step": 7063 }, { "epoch": 0.78, "grad_norm": 0.20824523592957186, "learning_rate": 5.047943387108072e-06, "loss": 0.0365, "step": 7064 }, { "epoch": 0.78, "grad_norm": 0.286528785307214, "learning_rate": 5.043219522374138e-06, "loss": 0.0345, "step": 7065 }, { "epoch": 0.78, "grad_norm": 0.22294669699193048, "learning_rate": 5.038497550069836e-06, "loss": 0.0383, "step": 7066 }, { "epoch": 0.78, "grad_norm": 0.15719723872281507, "learning_rate": 5.033777470792627e-06, "loss": 0.0132, "step": 7067 }, { "epoch": 0.78, "grad_norm": 0.27240964618669405, "learning_rate": 5.029059285139715e-06, "loss": 0.0437, "step": 7068 }, { "epoch": 0.78, "grad_norm": 0.22839418265390438, "learning_rate": 5.024342993708078e-06, "loss": 0.0234, "step": 7069 }, { "epoch": 0.78, "grad_norm": 0.27050998838355206, "learning_rate": 5.019628597094455e-06, "loss": 0.0303, "step": 7070 }, { "epoch": 0.78, "grad_norm": 0.23354315959465716, "learning_rate": 5.014916095895346e-06, "loss": 0.0322, "step": 7071 }, { "epoch": 0.78, "grad_norm": 0.19399194608148468, "learning_rate": 5.010205490706998e-06, "loss": 0.0284, "step": 7072 }, { "epoch": 0.78, "grad_norm": 0.23230742994774076, "learning_rate": 5.005496782125434e-06, "loss": 0.0214, "step": 7073 }, { "epoch": 0.78, "grad_norm": 0.24544607947721744, "learning_rate": 5.000789970746427e-06, "loss": 0.0285, "step": 7074 }, { "epoch": 0.78, "grad_norm": 0.22568602220193534, "learning_rate": 4.996085057165523e-06, "loss": 0.0299, "step": 7075 }, { "epoch": 0.78, "grad_norm": 0.27522268653238274, "learning_rate": 4.9913820419780035e-06, "loss": 0.0391, "step": 7076 }, { "epoch": 0.78, "grad_norm": 0.2166246812898434, "learning_rate": 4.9866809257789325e-06, "loss": 0.0233, "step": 7077 }, { "epoch": 0.78, "grad_norm": 0.2193627291016119, "learning_rate": 4.981981709163126e-06, "loss": 0.0398, "step": 7078 }, { "epoch": 0.78, "grad_norm": 0.23855988049137086, "learning_rate": 4.977284392725166e-06, "loss": 0.0298, "step": 7079 }, { "epoch": 0.78, "grad_norm": 0.26524348067775994, "learning_rate": 4.972588977059369e-06, "loss": 0.048, "step": 7080 }, { "epoch": 0.78, "grad_norm": 0.216200110311705, "learning_rate": 4.967895462759846e-06, "loss": 0.0332, "step": 7081 }, { "epoch": 0.78, "grad_norm": 0.20991447289452403, "learning_rate": 4.963203850420455e-06, "loss": 0.0285, "step": 7082 }, { "epoch": 0.78, "grad_norm": 0.20960717672474216, "learning_rate": 4.958514140634794e-06, "loss": 0.0284, "step": 7083 }, { "epoch": 0.78, "grad_norm": 0.19099572005831705, "learning_rate": 4.953826333996243e-06, "loss": 0.0376, "step": 7084 }, { "epoch": 0.78, "grad_norm": 0.20503317022332884, "learning_rate": 4.949140431097934e-06, "loss": 0.036, "step": 7085 }, { "epoch": 0.78, "grad_norm": 0.3208452191050301, "learning_rate": 4.944456432532765e-06, "loss": 0.039, "step": 7086 }, { "epoch": 0.78, "grad_norm": 0.22067291761969213, "learning_rate": 4.93977433889337e-06, "loss": 0.0444, "step": 7087 }, { "epoch": 0.78, "grad_norm": 0.274916585820955, "learning_rate": 4.93509415077217e-06, "loss": 0.0474, "step": 7088 }, { "epoch": 0.78, "grad_norm": 0.2029797878597161, "learning_rate": 4.930415868761332e-06, "loss": 0.034, "step": 7089 }, { "epoch": 0.78, "grad_norm": 0.2955677715419463, "learning_rate": 4.925739493452783e-06, "loss": 0.0474, "step": 7090 }, { "epoch": 0.78, "grad_norm": 0.22196154384094519, "learning_rate": 4.9210650254382005e-06, "loss": 0.0362, "step": 7091 }, { "epoch": 0.78, "grad_norm": 0.1972197110970643, "learning_rate": 4.916392465309037e-06, "loss": 0.0239, "step": 7092 }, { "epoch": 0.78, "grad_norm": 0.3031942873352124, "learning_rate": 4.911721813656494e-06, "loss": 0.0339, "step": 7093 }, { "epoch": 0.78, "grad_norm": 0.28387594868557303, "learning_rate": 4.907053071071535e-06, "loss": 0.0466, "step": 7094 }, { "epoch": 0.78, "grad_norm": 0.22284934387974892, "learning_rate": 4.902386238144874e-06, "loss": 0.0329, "step": 7095 }, { "epoch": 0.78, "grad_norm": 0.23515567304217255, "learning_rate": 4.89772131546699e-06, "loss": 0.0268, "step": 7096 }, { "epoch": 0.78, "grad_norm": 0.19799529049009537, "learning_rate": 4.893058303628115e-06, "loss": 0.0323, "step": 7097 }, { "epoch": 0.78, "grad_norm": 0.19242451791810714, "learning_rate": 4.888397203218265e-06, "loss": 0.0287, "step": 7098 }, { "epoch": 0.78, "grad_norm": 0.18424288655476692, "learning_rate": 4.88373801482717e-06, "loss": 0.0356, "step": 7099 }, { "epoch": 0.78, "grad_norm": 0.27399806736522087, "learning_rate": 4.879080739044351e-06, "loss": 0.038, "step": 7100 }, { "epoch": 0.78, "grad_norm": 0.19106941362936872, "learning_rate": 4.874425376459073e-06, "loss": 0.0263, "step": 7101 }, { "epoch": 0.78, "grad_norm": 0.2049932031551427, "learning_rate": 4.869771927660371e-06, "loss": 0.0469, "step": 7102 }, { "epoch": 0.78, "grad_norm": 0.12816239805573748, "learning_rate": 4.865120393237019e-06, "loss": 0.0168, "step": 7103 }, { "epoch": 0.78, "grad_norm": 0.20605942417865544, "learning_rate": 4.860470773777566e-06, "loss": 0.023, "step": 7104 }, { "epoch": 0.78, "grad_norm": 0.26985367549628797, "learning_rate": 4.855823069870309e-06, "loss": 0.0371, "step": 7105 }, { "epoch": 0.78, "grad_norm": 0.22160236337652517, "learning_rate": 4.851177282103312e-06, "loss": 0.0414, "step": 7106 }, { "epoch": 0.78, "grad_norm": 0.2043381373965783, "learning_rate": 4.846533411064382e-06, "loss": 0.0359, "step": 7107 }, { "epoch": 0.78, "grad_norm": 0.22184569071216248, "learning_rate": 4.841891457341095e-06, "loss": 0.0332, "step": 7108 }, { "epoch": 0.78, "grad_norm": 0.27263404788587753, "learning_rate": 4.837251421520781e-06, "loss": 0.0376, "step": 7109 }, { "epoch": 0.78, "grad_norm": 0.23986932885273013, "learning_rate": 4.832613304190537e-06, "loss": 0.0342, "step": 7110 }, { "epoch": 0.78, "grad_norm": 0.265632348800429, "learning_rate": 4.827977105937194e-06, "loss": 0.028, "step": 7111 }, { "epoch": 0.78, "grad_norm": 0.23589280917835584, "learning_rate": 4.823342827347357e-06, "loss": 0.0474, "step": 7112 }, { "epoch": 0.78, "grad_norm": 0.24107644350900834, "learning_rate": 4.81871046900739e-06, "loss": 0.0332, "step": 7113 }, { "epoch": 0.78, "grad_norm": 0.23450582349923524, "learning_rate": 4.814080031503407e-06, "loss": 0.0288, "step": 7114 }, { "epoch": 0.78, "grad_norm": 0.3286385150987333, "learning_rate": 4.809451515421282e-06, "loss": 0.0402, "step": 7115 }, { "epoch": 0.78, "grad_norm": 0.24546769147386088, "learning_rate": 4.804824921346645e-06, "loss": 0.0481, "step": 7116 }, { "epoch": 0.78, "grad_norm": 0.28899800380096574, "learning_rate": 4.800200249864888e-06, "loss": 0.0491, "step": 7117 }, { "epoch": 0.78, "grad_norm": 0.19867234938396502, "learning_rate": 4.795577501561144e-06, "loss": 0.0332, "step": 7118 }, { "epoch": 0.78, "grad_norm": 0.18439832879232843, "learning_rate": 4.790956677020318e-06, "loss": 0.032, "step": 7119 }, { "epoch": 0.78, "grad_norm": 0.1806893305248692, "learning_rate": 4.786337776827066e-06, "loss": 0.0303, "step": 7120 }, { "epoch": 0.78, "grad_norm": 0.22563755386171844, "learning_rate": 4.78172080156581e-06, "loss": 0.026, "step": 7121 }, { "epoch": 0.78, "grad_norm": 0.16889515722127768, "learning_rate": 4.777105751820708e-06, "loss": 0.024, "step": 7122 }, { "epoch": 0.78, "grad_norm": 0.17531771471900265, "learning_rate": 4.772492628175689e-06, "loss": 0.0164, "step": 7123 }, { "epoch": 0.78, "grad_norm": 0.21394301511636027, "learning_rate": 4.767881431214441e-06, "loss": 0.034, "step": 7124 }, { "epoch": 0.78, "grad_norm": 0.2237262780385221, "learning_rate": 4.7632721615204026e-06, "loss": 0.0496, "step": 7125 }, { "epoch": 0.78, "grad_norm": 0.25818663240008555, "learning_rate": 4.758664819676759e-06, "loss": 0.0361, "step": 7126 }, { "epoch": 0.78, "grad_norm": 0.2407501081246582, "learning_rate": 4.754059406266469e-06, "loss": 0.0374, "step": 7127 }, { "epoch": 0.78, "grad_norm": 0.22687041799526456, "learning_rate": 4.7494559218722395e-06, "loss": 0.0336, "step": 7128 }, { "epoch": 0.78, "grad_norm": 0.22490714693758546, "learning_rate": 4.7448543670765365e-06, "loss": 0.0446, "step": 7129 }, { "epoch": 0.78, "grad_norm": 0.22175242369250836, "learning_rate": 4.74025474246157e-06, "loss": 0.0259, "step": 7130 }, { "epoch": 0.78, "grad_norm": 0.19588816683345786, "learning_rate": 4.735657048609313e-06, "loss": 0.0262, "step": 7131 }, { "epoch": 0.78, "grad_norm": 0.24040090789968613, "learning_rate": 4.7310612861015125e-06, "loss": 0.0373, "step": 7132 }, { "epoch": 0.78, "grad_norm": 0.29827285738808434, "learning_rate": 4.726467455519639e-06, "loss": 0.0439, "step": 7133 }, { "epoch": 0.78, "grad_norm": 0.24358759231810329, "learning_rate": 4.7218755574449394e-06, "loss": 0.036, "step": 7134 }, { "epoch": 0.78, "grad_norm": 0.2209561593810618, "learning_rate": 4.717285592458409e-06, "loss": 0.0506, "step": 7135 }, { "epoch": 0.78, "grad_norm": 0.227784920345592, "learning_rate": 4.712697561140802e-06, "loss": 0.0446, "step": 7136 }, { "epoch": 0.78, "grad_norm": 0.318987945491812, "learning_rate": 4.708111464072631e-06, "loss": 0.041, "step": 7137 }, { "epoch": 0.78, "grad_norm": 0.18720442601452222, "learning_rate": 4.703527301834148e-06, "loss": 0.0308, "step": 7138 }, { "epoch": 0.78, "grad_norm": 0.4186644424032141, "learning_rate": 4.698945075005376e-06, "loss": 0.059, "step": 7139 }, { "epoch": 0.78, "grad_norm": 0.1876109907344866, "learning_rate": 4.69436478416609e-06, "loss": 0.0286, "step": 7140 }, { "epoch": 0.78, "grad_norm": 0.1840340630239719, "learning_rate": 4.689786429895821e-06, "loss": 0.0329, "step": 7141 }, { "epoch": 0.78, "grad_norm": 0.17822269805489663, "learning_rate": 4.685210012773844e-06, "loss": 0.0262, "step": 7142 }, { "epoch": 0.78, "grad_norm": 0.2942919287533011, "learning_rate": 4.680635533379201e-06, "loss": 0.0369, "step": 7143 }, { "epoch": 0.78, "grad_norm": 0.2470827438785433, "learning_rate": 4.676062992290686e-06, "loss": 0.0255, "step": 7144 }, { "epoch": 0.78, "grad_norm": 0.2485888260186477, "learning_rate": 4.671492390086854e-06, "loss": 0.0276, "step": 7145 }, { "epoch": 0.78, "grad_norm": 0.28644153227809366, "learning_rate": 4.666923727345991e-06, "loss": 0.0424, "step": 7146 }, { "epoch": 0.78, "grad_norm": 0.19935511615460752, "learning_rate": 4.662357004646165e-06, "loss": 0.0306, "step": 7147 }, { "epoch": 0.78, "grad_norm": 0.24604430534526286, "learning_rate": 4.657792222565185e-06, "loss": 0.0358, "step": 7148 }, { "epoch": 0.79, "grad_norm": 0.22886739049467247, "learning_rate": 4.653229381680618e-06, "loss": 0.0588, "step": 7149 }, { "epoch": 0.79, "grad_norm": 0.2368928923429487, "learning_rate": 4.6486684825697845e-06, "loss": 0.0327, "step": 7150 }, { "epoch": 0.79, "grad_norm": 0.3019565434034133, "learning_rate": 4.644109525809759e-06, "loss": 0.0366, "step": 7151 }, { "epoch": 0.79, "grad_norm": 0.24875938777605114, "learning_rate": 4.639552511977374e-06, "loss": 0.0509, "step": 7152 }, { "epoch": 0.79, "grad_norm": 0.2015093408021646, "learning_rate": 4.6349974416492076e-06, "loss": 0.0293, "step": 7153 }, { "epoch": 0.79, "grad_norm": 0.17503663343784281, "learning_rate": 4.630444315401594e-06, "loss": 0.0365, "step": 7154 }, { "epoch": 0.79, "grad_norm": 0.22330631760068748, "learning_rate": 4.625893133810633e-06, "loss": 0.0302, "step": 7155 }, { "epoch": 0.79, "grad_norm": 0.16254767374240126, "learning_rate": 4.621343897452169e-06, "loss": 0.0273, "step": 7156 }, { "epoch": 0.79, "grad_norm": 0.2216907195988581, "learning_rate": 4.616796606901795e-06, "loss": 0.0432, "step": 7157 }, { "epoch": 0.79, "grad_norm": 0.1832942040817646, "learning_rate": 4.612251262734864e-06, "loss": 0.0199, "step": 7158 }, { "epoch": 0.79, "grad_norm": 0.2108783815686001, "learning_rate": 4.607707865526488e-06, "loss": 0.0525, "step": 7159 }, { "epoch": 0.79, "grad_norm": 0.24065777443954176, "learning_rate": 4.603166415851527e-06, "loss": 0.0382, "step": 7160 }, { "epoch": 0.79, "grad_norm": 0.2681096082106456, "learning_rate": 4.5986269142845915e-06, "loss": 0.045, "step": 7161 }, { "epoch": 0.79, "grad_norm": 0.21947794093903353, "learning_rate": 4.594089361400047e-06, "loss": 0.0337, "step": 7162 }, { "epoch": 0.79, "grad_norm": 0.23302886944486295, "learning_rate": 4.5895537577720205e-06, "loss": 0.0213, "step": 7163 }, { "epoch": 0.79, "grad_norm": 0.19341405422930102, "learning_rate": 4.585020103974387e-06, "loss": 0.0178, "step": 7164 }, { "epoch": 0.79, "grad_norm": 0.19684455802740736, "learning_rate": 4.580488400580765e-06, "loss": 0.029, "step": 7165 }, { "epoch": 0.79, "grad_norm": 0.2486288497381265, "learning_rate": 4.575958648164536e-06, "loss": 0.0471, "step": 7166 }, { "epoch": 0.79, "grad_norm": 0.26865002413781003, "learning_rate": 4.571430847298847e-06, "loss": 0.0431, "step": 7167 }, { "epoch": 0.79, "grad_norm": 0.19638044213537073, "learning_rate": 4.5669049985565735e-06, "loss": 0.0294, "step": 7168 }, { "epoch": 0.79, "grad_norm": 0.1851259878835393, "learning_rate": 4.5623811025103585e-06, "loss": 0.0257, "step": 7169 }, { "epoch": 0.79, "grad_norm": 0.22322706202999607, "learning_rate": 4.5578591597325935e-06, "loss": 0.0313, "step": 7170 }, { "epoch": 0.79, "grad_norm": 0.3013621331691586, "learning_rate": 4.553339170795432e-06, "loss": 0.0442, "step": 7171 }, { "epoch": 0.79, "grad_norm": 0.18648484714609698, "learning_rate": 4.54882113627076e-06, "loss": 0.0289, "step": 7172 }, { "epoch": 0.79, "grad_norm": 0.2056967713808739, "learning_rate": 4.544305056730236e-06, "loss": 0.0252, "step": 7173 }, { "epoch": 0.79, "grad_norm": 0.2147100379307889, "learning_rate": 4.53979093274526e-06, "loss": 0.0399, "step": 7174 }, { "epoch": 0.79, "grad_norm": 0.18553464748850038, "learning_rate": 4.535278764886994e-06, "loss": 0.0197, "step": 7175 }, { "epoch": 0.79, "grad_norm": 0.24516378013079404, "learning_rate": 4.530768553726348e-06, "loss": 0.0474, "step": 7176 }, { "epoch": 0.79, "grad_norm": 0.24318379394875955, "learning_rate": 4.526260299833975e-06, "loss": 0.037, "step": 7177 }, { "epoch": 0.79, "grad_norm": 0.3943790293214147, "learning_rate": 4.521754003780294e-06, "loss": 0.0511, "step": 7178 }, { "epoch": 0.79, "grad_norm": 0.1896732663717066, "learning_rate": 4.517249666135468e-06, "loss": 0.0267, "step": 7179 }, { "epoch": 0.79, "grad_norm": 0.215306520572789, "learning_rate": 4.512747287469426e-06, "loss": 0.0336, "step": 7180 }, { "epoch": 0.79, "grad_norm": 0.16702007038611053, "learning_rate": 4.508246868351822e-06, "loss": 0.0285, "step": 7181 }, { "epoch": 0.79, "grad_norm": 0.24656831306596144, "learning_rate": 4.503748409352089e-06, "loss": 0.0454, "step": 7182 }, { "epoch": 0.79, "grad_norm": 0.18917855294907626, "learning_rate": 4.499251911039397e-06, "loss": 0.0266, "step": 7183 }, { "epoch": 0.79, "grad_norm": 0.31749326691284085, "learning_rate": 4.494757373982674e-06, "loss": 0.0401, "step": 7184 }, { "epoch": 0.79, "grad_norm": 0.2008329239635991, "learning_rate": 4.490264798750599e-06, "loss": 0.0302, "step": 7185 }, { "epoch": 0.79, "grad_norm": 0.24711993627565226, "learning_rate": 4.4857741859116024e-06, "loss": 0.0332, "step": 7186 }, { "epoch": 0.79, "grad_norm": 0.1963125565925621, "learning_rate": 4.4812855360338684e-06, "loss": 0.0267, "step": 7187 }, { "epoch": 0.79, "grad_norm": 0.30581139861781137, "learning_rate": 4.476798849685322e-06, "loss": 0.0303, "step": 7188 }, { "epoch": 0.79, "grad_norm": 0.2517301770676468, "learning_rate": 4.472314127433654e-06, "loss": 0.0295, "step": 7189 }, { "epoch": 0.79, "grad_norm": 0.2385067654290901, "learning_rate": 4.467831369846301e-06, "loss": 0.0538, "step": 7190 }, { "epoch": 0.79, "grad_norm": 0.18815070727537506, "learning_rate": 4.4633505774904525e-06, "loss": 0.0296, "step": 7191 }, { "epoch": 0.79, "grad_norm": 0.17006587361554348, "learning_rate": 4.458871750933038e-06, "loss": 0.0225, "step": 7192 }, { "epoch": 0.79, "grad_norm": 0.18014816651484594, "learning_rate": 4.4543948907407565e-06, "loss": 0.0317, "step": 7193 }, { "epoch": 0.79, "grad_norm": 0.29838820095209245, "learning_rate": 4.449919997480047e-06, "loss": 0.0404, "step": 7194 }, { "epoch": 0.79, "grad_norm": 0.23245130080189177, "learning_rate": 4.445447071717111e-06, "loss": 0.0255, "step": 7195 }, { "epoch": 0.79, "grad_norm": 0.23549077360693527, "learning_rate": 4.4409761140178765e-06, "loss": 0.0313, "step": 7196 }, { "epoch": 0.79, "grad_norm": 0.3126686356439457, "learning_rate": 4.4365071249480465e-06, "loss": 0.0408, "step": 7197 }, { "epoch": 0.79, "grad_norm": 0.2498958713517684, "learning_rate": 4.432040105073065e-06, "loss": 0.0456, "step": 7198 }, { "epoch": 0.79, "grad_norm": 0.20521245825602638, "learning_rate": 4.427575054958135e-06, "loss": 0.0502, "step": 7199 }, { "epoch": 0.79, "grad_norm": 0.2496850891378838, "learning_rate": 4.4231119751681885e-06, "loss": 0.0462, "step": 7200 }, { "epoch": 0.79, "grad_norm": 0.22993245200619342, "learning_rate": 4.418650866267939e-06, "loss": 0.0405, "step": 7201 }, { "epoch": 0.79, "grad_norm": 0.2232953986883259, "learning_rate": 4.414191728821838e-06, "loss": 0.041, "step": 7202 }, { "epoch": 0.79, "grad_norm": 0.19814779355074183, "learning_rate": 4.409734563394068e-06, "loss": 0.0449, "step": 7203 }, { "epoch": 0.79, "grad_norm": 0.17990734293428287, "learning_rate": 4.405279370548587e-06, "loss": 0.025, "step": 7204 }, { "epoch": 0.79, "grad_norm": 0.18586921800523507, "learning_rate": 4.400826150849096e-06, "loss": 0.0366, "step": 7205 }, { "epoch": 0.79, "grad_norm": 0.2316034476952561, "learning_rate": 4.396374904859051e-06, "loss": 0.0416, "step": 7206 }, { "epoch": 0.79, "grad_norm": 0.32250203795491444, "learning_rate": 4.391925633141638e-06, "loss": 0.0668, "step": 7207 }, { "epoch": 0.79, "grad_norm": 0.244388149668675, "learning_rate": 4.387478336259821e-06, "loss": 0.0468, "step": 7208 }, { "epoch": 0.79, "grad_norm": 0.2485689691052336, "learning_rate": 4.383033014776295e-06, "loss": 0.0531, "step": 7209 }, { "epoch": 0.79, "grad_norm": 0.28475634084880813, "learning_rate": 4.3785896692535165e-06, "loss": 0.0433, "step": 7210 }, { "epoch": 0.79, "grad_norm": 0.2130011088108475, "learning_rate": 4.3741483002536775e-06, "loss": 0.0208, "step": 7211 }, { "epoch": 0.79, "grad_norm": 0.3025118101982933, "learning_rate": 4.369708908338735e-06, "loss": 0.0449, "step": 7212 }, { "epoch": 0.79, "grad_norm": 0.24423563824992509, "learning_rate": 4.365271494070389e-06, "loss": 0.0242, "step": 7213 }, { "epoch": 0.79, "grad_norm": 0.1787324277536797, "learning_rate": 4.360836058010096e-06, "loss": 0.032, "step": 7214 }, { "epoch": 0.79, "grad_norm": 0.27283793519818544, "learning_rate": 4.356402600719045e-06, "loss": 0.0352, "step": 7215 }, { "epoch": 0.79, "grad_norm": 0.2742632658600584, "learning_rate": 4.351971122758194e-06, "loss": 0.0275, "step": 7216 }, { "epoch": 0.79, "grad_norm": 0.2002178365550513, "learning_rate": 4.3475416246882385e-06, "loss": 0.0396, "step": 7217 }, { "epoch": 0.79, "grad_norm": 0.270840987072905, "learning_rate": 4.343114107069628e-06, "loss": 0.0592, "step": 7218 }, { "epoch": 0.79, "grad_norm": 0.29874445173865516, "learning_rate": 4.338688570462563e-06, "loss": 0.0514, "step": 7219 }, { "epoch": 0.79, "grad_norm": 0.15539143235585182, "learning_rate": 4.334265015426993e-06, "loss": 0.0277, "step": 7220 }, { "epoch": 0.79, "grad_norm": 0.25961655089396246, "learning_rate": 4.32984344252261e-06, "loss": 0.0347, "step": 7221 }, { "epoch": 0.79, "grad_norm": 0.2920618899024944, "learning_rate": 4.3254238523088695e-06, "loss": 0.0388, "step": 7222 }, { "epoch": 0.79, "grad_norm": 0.2515800286340595, "learning_rate": 4.3210062453449545e-06, "loss": 0.0376, "step": 7223 }, { "epoch": 0.79, "grad_norm": 0.2950708126892183, "learning_rate": 4.316590622189815e-06, "loss": 0.0392, "step": 7224 }, { "epoch": 0.79, "grad_norm": 0.2314437351092575, "learning_rate": 4.312176983402145e-06, "loss": 0.035, "step": 7225 }, { "epoch": 0.79, "grad_norm": 0.28408489415997096, "learning_rate": 4.307765329540394e-06, "loss": 0.0299, "step": 7226 }, { "epoch": 0.79, "grad_norm": 0.24959465783420848, "learning_rate": 4.303355661162741e-06, "loss": 0.0483, "step": 7227 }, { "epoch": 0.79, "grad_norm": 0.2076560525053648, "learning_rate": 4.298947978827128e-06, "loss": 0.034, "step": 7228 }, { "epoch": 0.79, "grad_norm": 0.25602230679101495, "learning_rate": 4.2945422830912495e-06, "loss": 0.0402, "step": 7229 }, { "epoch": 0.79, "grad_norm": 0.2768406670598191, "learning_rate": 4.290138574512546e-06, "loss": 0.0221, "step": 7230 }, { "epoch": 0.79, "grad_norm": 0.219052956464382, "learning_rate": 4.285736853648193e-06, "loss": 0.0334, "step": 7231 }, { "epoch": 0.79, "grad_norm": 0.2252603054965118, "learning_rate": 4.2813371210551294e-06, "loss": 0.0363, "step": 7232 }, { "epoch": 0.79, "grad_norm": 0.2148567282484756, "learning_rate": 4.276939377290037e-06, "loss": 0.0405, "step": 7233 }, { "epoch": 0.79, "grad_norm": 0.1946691885766725, "learning_rate": 4.272543622909355e-06, "loss": 0.0214, "step": 7234 }, { "epoch": 0.79, "grad_norm": 0.17487108045961364, "learning_rate": 4.268149858469246e-06, "loss": 0.0183, "step": 7235 }, { "epoch": 0.79, "grad_norm": 0.21066032849155022, "learning_rate": 4.263758084525656e-06, "loss": 0.0303, "step": 7236 }, { "epoch": 0.79, "grad_norm": 0.24427570420853645, "learning_rate": 4.259368301634257e-06, "loss": 0.0381, "step": 7237 }, { "epoch": 0.79, "grad_norm": 0.2085337370363043, "learning_rate": 4.254980510350464e-06, "loss": 0.027, "step": 7238 }, { "epoch": 0.79, "grad_norm": 0.2646034878215807, "learning_rate": 4.250594711229454e-06, "loss": 0.034, "step": 7239 }, { "epoch": 0.8, "grad_norm": 0.30299725347791207, "learning_rate": 4.246210904826149e-06, "loss": 0.0341, "step": 7240 }, { "epoch": 0.8, "grad_norm": 0.1829265890565706, "learning_rate": 4.24182909169522e-06, "loss": 0.0276, "step": 7241 }, { "epoch": 0.8, "grad_norm": 0.26593498766706414, "learning_rate": 4.237449272391072e-06, "loss": 0.0326, "step": 7242 }, { "epoch": 0.8, "grad_norm": 0.202994066000166, "learning_rate": 4.233071447467876e-06, "loss": 0.0269, "step": 7243 }, { "epoch": 0.8, "grad_norm": 0.23736513411904328, "learning_rate": 4.228695617479541e-06, "loss": 0.032, "step": 7244 }, { "epoch": 0.8, "grad_norm": 0.19945480348961828, "learning_rate": 4.22432178297973e-06, "loss": 0.0313, "step": 7245 }, { "epoch": 0.8, "grad_norm": 0.1747158719976968, "learning_rate": 4.219949944521842e-06, "loss": 0.0347, "step": 7246 }, { "epoch": 0.8, "grad_norm": 0.20491102768012978, "learning_rate": 4.215580102659031e-06, "loss": 0.0238, "step": 7247 }, { "epoch": 0.8, "grad_norm": 0.19409592342188592, "learning_rate": 4.2112122579442015e-06, "loss": 0.0446, "step": 7248 }, { "epoch": 0.8, "grad_norm": 0.24526514094294566, "learning_rate": 4.206846410930008e-06, "loss": 0.0375, "step": 7249 }, { "epoch": 0.8, "grad_norm": 0.18689115338059054, "learning_rate": 4.202482562168832e-06, "loss": 0.027, "step": 7250 }, { "epoch": 0.8, "grad_norm": 0.22866039092195897, "learning_rate": 4.198120712212823e-06, "loss": 0.0273, "step": 7251 }, { "epoch": 0.8, "grad_norm": 0.20828330015359683, "learning_rate": 4.193760861613865e-06, "loss": 0.0347, "step": 7252 }, { "epoch": 0.8, "grad_norm": 0.23221984866409456, "learning_rate": 4.189403010923614e-06, "loss": 0.0302, "step": 7253 }, { "epoch": 0.8, "grad_norm": 0.25478049669139174, "learning_rate": 4.185047160693432e-06, "loss": 0.0527, "step": 7254 }, { "epoch": 0.8, "grad_norm": 0.1891855729908844, "learning_rate": 4.18069331147446e-06, "loss": 0.03, "step": 7255 }, { "epoch": 0.8, "grad_norm": 0.1495294561647225, "learning_rate": 4.176341463817573e-06, "loss": 0.0217, "step": 7256 }, { "epoch": 0.8, "grad_norm": 0.2243583188324651, "learning_rate": 4.171991618273401e-06, "loss": 0.0369, "step": 7257 }, { "epoch": 0.8, "grad_norm": 0.19205187881514563, "learning_rate": 4.167643775392305e-06, "loss": 0.0236, "step": 7258 }, { "epoch": 0.8, "grad_norm": 0.24354774966908543, "learning_rate": 4.1632979357244065e-06, "loss": 0.0401, "step": 7259 }, { "epoch": 0.8, "grad_norm": 0.24764599282476804, "learning_rate": 4.1589540998195695e-06, "loss": 0.0408, "step": 7260 }, { "epoch": 0.8, "grad_norm": 0.3020765972482597, "learning_rate": 4.1546122682274114e-06, "loss": 0.0442, "step": 7261 }, { "epoch": 0.8, "grad_norm": 0.2176942039692882, "learning_rate": 4.150272441497276e-06, "loss": 0.0349, "step": 7262 }, { "epoch": 0.8, "grad_norm": 0.27023797744418715, "learning_rate": 4.145934620178276e-06, "loss": 0.0431, "step": 7263 }, { "epoch": 0.8, "grad_norm": 0.21292073798035252, "learning_rate": 4.141598804819256e-06, "loss": 0.045, "step": 7264 }, { "epoch": 0.8, "grad_norm": 0.14978775341504655, "learning_rate": 4.137264995968821e-06, "loss": 0.0146, "step": 7265 }, { "epoch": 0.8, "grad_norm": 0.20463800887571834, "learning_rate": 4.132933194175299e-06, "loss": 0.0326, "step": 7266 }, { "epoch": 0.8, "grad_norm": 0.37764577526738075, "learning_rate": 4.128603399986786e-06, "loss": 0.061, "step": 7267 }, { "epoch": 0.8, "grad_norm": 0.23873546958066713, "learning_rate": 4.124275613951114e-06, "loss": 0.0421, "step": 7268 }, { "epoch": 0.8, "grad_norm": 0.33564860097567145, "learning_rate": 4.119949836615864e-06, "loss": 0.0279, "step": 7269 }, { "epoch": 0.8, "grad_norm": 0.2066706179537851, "learning_rate": 4.115626068528362e-06, "loss": 0.0269, "step": 7270 }, { "epoch": 0.8, "grad_norm": 0.25533851722095974, "learning_rate": 4.111304310235677e-06, "loss": 0.0412, "step": 7271 }, { "epoch": 0.8, "grad_norm": 0.1839250051679114, "learning_rate": 4.106984562284633e-06, "loss": 0.0298, "step": 7272 }, { "epoch": 0.8, "grad_norm": 0.2661371902784373, "learning_rate": 4.102666825221782e-06, "loss": 0.0343, "step": 7273 }, { "epoch": 0.8, "grad_norm": 0.162178926599613, "learning_rate": 4.0983510995934365e-06, "loss": 0.0308, "step": 7274 }, { "epoch": 0.8, "grad_norm": 0.22450044727280885, "learning_rate": 4.094037385945653e-06, "loss": 0.0263, "step": 7275 }, { "epoch": 0.8, "grad_norm": 0.2022797623280109, "learning_rate": 4.089725684824235e-06, "loss": 0.0389, "step": 7276 }, { "epoch": 0.8, "grad_norm": 0.2673571990223701, "learning_rate": 4.085415996774713e-06, "loss": 0.0415, "step": 7277 }, { "epoch": 0.8, "grad_norm": 0.20757784753335104, "learning_rate": 4.081108322342389e-06, "loss": 0.0231, "step": 7278 }, { "epoch": 0.8, "grad_norm": 0.19718298739122775, "learning_rate": 4.076802662072292e-06, "loss": 0.0292, "step": 7279 }, { "epoch": 0.8, "grad_norm": 0.2418098273682959, "learning_rate": 4.07249901650921e-06, "loss": 0.0374, "step": 7280 }, { "epoch": 0.8, "grad_norm": 0.21103589977043397, "learning_rate": 4.068197386197657e-06, "loss": 0.0315, "step": 7281 }, { "epoch": 0.8, "grad_norm": 0.3596431312405758, "learning_rate": 4.0638977716819105e-06, "loss": 0.052, "step": 7282 }, { "epoch": 0.8, "grad_norm": 0.17590913359270413, "learning_rate": 4.059600173505984e-06, "loss": 0.0149, "step": 7283 }, { "epoch": 0.8, "grad_norm": 0.23740582606759936, "learning_rate": 4.055304592213645e-06, "loss": 0.0297, "step": 7284 }, { "epoch": 0.8, "grad_norm": 0.27166826894693274, "learning_rate": 4.051011028348384e-06, "loss": 0.0379, "step": 7285 }, { "epoch": 0.8, "grad_norm": 0.21230503352469537, "learning_rate": 4.046719482453461e-06, "loss": 0.0262, "step": 7286 }, { "epoch": 0.8, "grad_norm": 0.23170149583942842, "learning_rate": 4.042429955071867e-06, "loss": 0.0458, "step": 7287 }, { "epoch": 0.8, "grad_norm": 0.4131956202849141, "learning_rate": 4.038142446746342e-06, "loss": 0.0699, "step": 7288 }, { "epoch": 0.8, "grad_norm": 0.2397054416352333, "learning_rate": 4.033856958019371e-06, "loss": 0.032, "step": 7289 }, { "epoch": 0.8, "grad_norm": 0.1754527253912369, "learning_rate": 4.029573489433179e-06, "loss": 0.0212, "step": 7290 }, { "epoch": 0.8, "grad_norm": 0.16757525883960098, "learning_rate": 4.025292041529747e-06, "loss": 0.0149, "step": 7291 }, { "epoch": 0.8, "grad_norm": 0.16581500778429892, "learning_rate": 4.021012614850779e-06, "loss": 0.0234, "step": 7292 }, { "epoch": 0.8, "grad_norm": 0.16963062318450156, "learning_rate": 4.016735209937741e-06, "loss": 0.0317, "step": 7293 }, { "epoch": 0.8, "grad_norm": 0.1468770676325282, "learning_rate": 4.012459827331841e-06, "loss": 0.014, "step": 7294 }, { "epoch": 0.8, "grad_norm": 0.1881485590171477, "learning_rate": 4.008186467574027e-06, "loss": 0.0206, "step": 7295 }, { "epoch": 0.8, "grad_norm": 0.2596238114885833, "learning_rate": 4.003915131204996e-06, "loss": 0.0442, "step": 7296 }, { "epoch": 0.8, "grad_norm": 0.32493470893273096, "learning_rate": 3.999645818765176e-06, "loss": 0.0449, "step": 7297 }, { "epoch": 0.8, "grad_norm": 0.3892719103551836, "learning_rate": 3.995378530794754e-06, "loss": 0.0611, "step": 7298 }, { "epoch": 0.8, "grad_norm": 0.18195816709510693, "learning_rate": 3.991113267833655e-06, "loss": 0.0242, "step": 7299 }, { "epoch": 0.8, "grad_norm": 0.3471964678219904, "learning_rate": 3.986850030421554e-06, "loss": 0.0393, "step": 7300 }, { "epoch": 0.8, "grad_norm": 0.343517831947537, "learning_rate": 3.98258881909785e-06, "loss": 0.0379, "step": 7301 }, { "epoch": 0.8, "grad_norm": 0.20391452012519146, "learning_rate": 3.97832963440171e-06, "loss": 0.0374, "step": 7302 }, { "epoch": 0.8, "grad_norm": 0.22616040788744537, "learning_rate": 3.974072476872028e-06, "loss": 0.0355, "step": 7303 }, { "epoch": 0.8, "grad_norm": 0.2003873694677798, "learning_rate": 3.969817347047451e-06, "loss": 0.0274, "step": 7304 }, { "epoch": 0.8, "grad_norm": 0.20780992721456482, "learning_rate": 3.965564245466365e-06, "loss": 0.0296, "step": 7305 }, { "epoch": 0.8, "grad_norm": 0.28346236629823574, "learning_rate": 3.961313172666898e-06, "loss": 0.0352, "step": 7306 }, { "epoch": 0.8, "grad_norm": 0.21140964085241257, "learning_rate": 3.957064129186934e-06, "loss": 0.033, "step": 7307 }, { "epoch": 0.8, "grad_norm": 0.16740479163637847, "learning_rate": 3.952817115564076e-06, "loss": 0.0144, "step": 7308 }, { "epoch": 0.8, "grad_norm": 0.2089287514505321, "learning_rate": 3.948572132335688e-06, "loss": 0.0236, "step": 7309 }, { "epoch": 0.8, "grad_norm": 0.20734197762428194, "learning_rate": 3.944329180038875e-06, "loss": 0.0413, "step": 7310 }, { "epoch": 0.8, "grad_norm": 0.23408757705707578, "learning_rate": 3.940088259210488e-06, "loss": 0.031, "step": 7311 }, { "epoch": 0.8, "grad_norm": 0.2880421104068135, "learning_rate": 3.935849370387104e-06, "loss": 0.0418, "step": 7312 }, { "epoch": 0.8, "grad_norm": 0.1979171644171094, "learning_rate": 3.931612514105061e-06, "loss": 0.028, "step": 7313 }, { "epoch": 0.8, "grad_norm": 0.1961261870619989, "learning_rate": 3.927377690900436e-06, "loss": 0.0289, "step": 7314 }, { "epoch": 0.8, "grad_norm": 0.20006336422846477, "learning_rate": 3.923144901309048e-06, "loss": 0.024, "step": 7315 }, { "epoch": 0.8, "grad_norm": 0.19950155679494494, "learning_rate": 3.91891414586645e-06, "loss": 0.041, "step": 7316 }, { "epoch": 0.8, "grad_norm": 0.22234907209577182, "learning_rate": 3.914685425107949e-06, "loss": 0.0212, "step": 7317 }, { "epoch": 0.8, "grad_norm": 0.3115815494112536, "learning_rate": 3.91045873956859e-06, "loss": 0.0509, "step": 7318 }, { "epoch": 0.8, "grad_norm": 0.23922800929452773, "learning_rate": 3.906234089783165e-06, "loss": 0.0358, "step": 7319 }, { "epoch": 0.8, "grad_norm": 0.30222051755695734, "learning_rate": 3.902011476286196e-06, "loss": 0.0419, "step": 7320 }, { "epoch": 0.8, "grad_norm": 0.23292987668024775, "learning_rate": 3.897790899611955e-06, "loss": 0.0367, "step": 7321 }, { "epoch": 0.8, "grad_norm": 0.19807583072420265, "learning_rate": 3.893572360294471e-06, "loss": 0.0311, "step": 7322 }, { "epoch": 0.8, "grad_norm": 0.31309892957054214, "learning_rate": 3.889355858867487e-06, "loss": 0.0381, "step": 7323 }, { "epoch": 0.8, "grad_norm": 0.20455641917320452, "learning_rate": 3.885141395864509e-06, "loss": 0.015, "step": 7324 }, { "epoch": 0.8, "grad_norm": 0.29357354363079663, "learning_rate": 3.880928971818776e-06, "loss": 0.043, "step": 7325 }, { "epoch": 0.8, "grad_norm": 0.21896055850765442, "learning_rate": 3.876718587263278e-06, "loss": 0.0324, "step": 7326 }, { "epoch": 0.8, "grad_norm": 0.22726155004326407, "learning_rate": 3.872510242730727e-06, "loss": 0.0389, "step": 7327 }, { "epoch": 0.8, "grad_norm": 0.23160600516101507, "learning_rate": 3.868303938753599e-06, "loss": 0.0319, "step": 7328 }, { "epoch": 0.8, "grad_norm": 0.19124101785996905, "learning_rate": 3.864099675864104e-06, "loss": 0.0309, "step": 7329 }, { "epoch": 0.8, "grad_norm": 0.1465854042066699, "learning_rate": 3.859897454594192e-06, "loss": 0.0241, "step": 7330 }, { "epoch": 0.81, "grad_norm": 0.25508352393706396, "learning_rate": 3.855697275475552e-06, "loss": 0.0393, "step": 7331 }, { "epoch": 0.81, "grad_norm": 0.21110589758356837, "learning_rate": 3.851499139039618e-06, "loss": 0.0306, "step": 7332 }, { "epoch": 0.81, "grad_norm": 0.22957523547728229, "learning_rate": 3.84730304581757e-06, "loss": 0.0232, "step": 7333 }, { "epoch": 0.81, "grad_norm": 0.23355907036037143, "learning_rate": 3.843108996340323e-06, "loss": 0.0335, "step": 7334 }, { "epoch": 0.81, "grad_norm": 0.1494888168525127, "learning_rate": 3.838916991138542e-06, "loss": 0.0133, "step": 7335 }, { "epoch": 0.81, "grad_norm": 0.19305459479871245, "learning_rate": 3.834727030742613e-06, "loss": 0.0268, "step": 7336 }, { "epoch": 0.81, "grad_norm": 0.2578191467353125, "learning_rate": 3.830539115682685e-06, "loss": 0.0439, "step": 7337 }, { "epoch": 0.81, "grad_norm": 0.2616383585656577, "learning_rate": 3.826353246488641e-06, "loss": 0.0339, "step": 7338 }, { "epoch": 0.81, "grad_norm": 0.21180586096128393, "learning_rate": 3.822169423690103e-06, "loss": 0.0452, "step": 7339 }, { "epoch": 0.81, "grad_norm": 0.24691313618041288, "learning_rate": 3.817987647816437e-06, "loss": 0.0415, "step": 7340 }, { "epoch": 0.81, "grad_norm": 0.3407690015058231, "learning_rate": 3.813807919396748e-06, "loss": 0.0497, "step": 7341 }, { "epoch": 0.81, "grad_norm": 0.2154006156776665, "learning_rate": 3.809630238959887e-06, "loss": 0.0342, "step": 7342 }, { "epoch": 0.81, "grad_norm": 0.26624987535070804, "learning_rate": 3.805454607034433e-06, "loss": 0.0364, "step": 7343 }, { "epoch": 0.81, "grad_norm": 0.2391253669205526, "learning_rate": 3.8012810241487175e-06, "loss": 0.0227, "step": 7344 }, { "epoch": 0.81, "grad_norm": 0.17285098598405632, "learning_rate": 3.7971094908308105e-06, "loss": 0.0231, "step": 7345 }, { "epoch": 0.81, "grad_norm": 0.23947382926109162, "learning_rate": 3.7929400076085255e-06, "loss": 0.0488, "step": 7346 }, { "epoch": 0.81, "grad_norm": 0.3161221865630236, "learning_rate": 3.7887725750094027e-06, "loss": 0.0417, "step": 7347 }, { "epoch": 0.81, "grad_norm": 0.21156787282299797, "learning_rate": 3.7846071935607408e-06, "loss": 0.038, "step": 7348 }, { "epoch": 0.81, "grad_norm": 0.2264279426360328, "learning_rate": 3.780443863789569e-06, "loss": 0.0222, "step": 7349 }, { "epoch": 0.81, "grad_norm": 0.2283804159294777, "learning_rate": 3.7762825862226637e-06, "loss": 0.046, "step": 7350 }, { "epoch": 0.81, "grad_norm": 0.2900027371145845, "learning_rate": 3.772123361386526e-06, "loss": 0.0401, "step": 7351 }, { "epoch": 0.81, "grad_norm": 0.2998607595488389, "learning_rate": 3.767966189807415e-06, "loss": 0.045, "step": 7352 }, { "epoch": 0.81, "grad_norm": 0.25326245190509966, "learning_rate": 3.763811072011325e-06, "loss": 0.0363, "step": 7353 }, { "epoch": 0.81, "grad_norm": 0.22838346621131062, "learning_rate": 3.7596580085239897e-06, "loss": 0.033, "step": 7354 }, { "epoch": 0.81, "grad_norm": 0.2895025435049362, "learning_rate": 3.7555069998708726e-06, "loss": 0.046, "step": 7355 }, { "epoch": 0.81, "grad_norm": 0.20909757311382712, "learning_rate": 3.7513580465771893e-06, "loss": 0.0274, "step": 7356 }, { "epoch": 0.81, "grad_norm": 0.21307020749949826, "learning_rate": 3.7472111491679044e-06, "loss": 0.0374, "step": 7357 }, { "epoch": 0.81, "grad_norm": 0.27735599974323055, "learning_rate": 3.7430663081676977e-06, "loss": 0.075, "step": 7358 }, { "epoch": 0.81, "grad_norm": 0.26025603903635497, "learning_rate": 3.7389235241010037e-06, "loss": 0.033, "step": 7359 }, { "epoch": 0.81, "grad_norm": 0.3056462316217732, "learning_rate": 3.734782797491998e-06, "loss": 0.0544, "step": 7360 }, { "epoch": 0.81, "grad_norm": 0.2198834516116484, "learning_rate": 3.7306441288645956e-06, "loss": 0.0339, "step": 7361 }, { "epoch": 0.81, "grad_norm": 0.26190651378520446, "learning_rate": 3.7265075187424373e-06, "loss": 0.024, "step": 7362 }, { "epoch": 0.81, "grad_norm": 0.2281713663952483, "learning_rate": 3.7223729676489196e-06, "loss": 0.0335, "step": 7363 }, { "epoch": 0.81, "grad_norm": 0.2019241120325882, "learning_rate": 3.7182404761071735e-06, "loss": 0.0348, "step": 7364 }, { "epoch": 0.81, "grad_norm": 0.2108367485041814, "learning_rate": 3.7141100446400733e-06, "loss": 0.0322, "step": 7365 }, { "epoch": 0.81, "grad_norm": 0.21884807874305998, "learning_rate": 3.7099816737702197e-06, "loss": 0.0328, "step": 7366 }, { "epoch": 0.81, "grad_norm": 0.265129328110118, "learning_rate": 3.7058553640199635e-06, "loss": 0.0455, "step": 7367 }, { "epoch": 0.81, "grad_norm": 0.2188236844757285, "learning_rate": 3.7017311159113956e-06, "loss": 0.0207, "step": 7368 }, { "epoch": 0.81, "grad_norm": 0.161858329798298, "learning_rate": 3.697608929966343e-06, "loss": 0.0232, "step": 7369 }, { "epoch": 0.81, "grad_norm": 0.24779953226360146, "learning_rate": 3.6934888067063667e-06, "loss": 0.0365, "step": 7370 }, { "epoch": 0.81, "grad_norm": 0.20443720802617485, "learning_rate": 3.689370746652774e-06, "loss": 0.0382, "step": 7371 }, { "epoch": 0.81, "grad_norm": 0.1883988294255283, "learning_rate": 3.68525475032661e-06, "loss": 0.0261, "step": 7372 }, { "epoch": 0.81, "grad_norm": 0.26848847006348997, "learning_rate": 3.681140818248654e-06, "loss": 0.0293, "step": 7373 }, { "epoch": 0.81, "grad_norm": 0.20774241427230264, "learning_rate": 3.677028950939434e-06, "loss": 0.0243, "step": 7374 }, { "epoch": 0.81, "grad_norm": 0.25525829092210983, "learning_rate": 3.6729191489192053e-06, "loss": 0.0348, "step": 7375 }, { "epoch": 0.81, "grad_norm": 0.20558128393706007, "learning_rate": 3.6688114127079665e-06, "loss": 0.0255, "step": 7376 }, { "epoch": 0.81, "grad_norm": 0.21004945530292993, "learning_rate": 3.664705742825463e-06, "loss": 0.0326, "step": 7377 }, { "epoch": 0.81, "grad_norm": 0.19785852372932963, "learning_rate": 3.6606021397911605e-06, "loss": 0.0304, "step": 7378 }, { "epoch": 0.81, "grad_norm": 0.2569349119992803, "learning_rate": 3.656500604124278e-06, "loss": 0.031, "step": 7379 }, { "epoch": 0.81, "grad_norm": 0.2174463681301021, "learning_rate": 3.652401136343768e-06, "loss": 0.0389, "step": 7380 }, { "epoch": 0.81, "grad_norm": 0.22736589448447986, "learning_rate": 3.6483037369683284e-06, "loss": 0.0331, "step": 7381 }, { "epoch": 0.81, "grad_norm": 0.2689381306893739, "learning_rate": 3.6442084065163784e-06, "loss": 0.0509, "step": 7382 }, { "epoch": 0.81, "grad_norm": 0.26597423595588005, "learning_rate": 3.640115145506091e-06, "loss": 0.0264, "step": 7383 }, { "epoch": 0.81, "grad_norm": 0.21145613807014765, "learning_rate": 3.636023954455372e-06, "loss": 0.0372, "step": 7384 }, { "epoch": 0.81, "grad_norm": 0.2349149920663917, "learning_rate": 3.6319348338818718e-06, "loss": 0.0378, "step": 7385 }, { "epoch": 0.81, "grad_norm": 0.2400522860057657, "learning_rate": 3.6278477843029603e-06, "loss": 0.0387, "step": 7386 }, { "epoch": 0.81, "grad_norm": 0.1988599388286612, "learning_rate": 3.6237628062357643e-06, "loss": 0.0231, "step": 7387 }, { "epoch": 0.81, "grad_norm": 0.25043973396510133, "learning_rate": 3.6196799001971416e-06, "loss": 0.0357, "step": 7388 }, { "epoch": 0.81, "grad_norm": 0.15240011639637518, "learning_rate": 3.615599066703692e-06, "loss": 0.0271, "step": 7389 }, { "epoch": 0.81, "grad_norm": 0.18042056195838546, "learning_rate": 3.6115203062717386e-06, "loss": 0.0284, "step": 7390 }, { "epoch": 0.81, "grad_norm": 0.4134745832494071, "learning_rate": 3.6074436194173635e-06, "loss": 0.064, "step": 7391 }, { "epoch": 0.81, "grad_norm": 0.2998669836078751, "learning_rate": 3.6033690066563765e-06, "loss": 0.0364, "step": 7392 }, { "epoch": 0.81, "grad_norm": 0.24346497695265298, "learning_rate": 3.599296468504314e-06, "loss": 0.0383, "step": 7393 }, { "epoch": 0.81, "grad_norm": 0.21351807549274643, "learning_rate": 3.5952260054764663e-06, "loss": 0.0369, "step": 7394 }, { "epoch": 0.81, "grad_norm": 0.23191768390898695, "learning_rate": 3.591157618087855e-06, "loss": 0.0228, "step": 7395 }, { "epoch": 0.81, "grad_norm": 0.19185472031759665, "learning_rate": 3.5870913068532455e-06, "loss": 0.0329, "step": 7396 }, { "epoch": 0.81, "grad_norm": 0.21688971378862437, "learning_rate": 3.58302707228712e-06, "loss": 0.032, "step": 7397 }, { "epoch": 0.81, "grad_norm": 0.3060109908819706, "learning_rate": 3.5789649149037197e-06, "loss": 0.0374, "step": 7398 }, { "epoch": 0.81, "grad_norm": 0.2357773549950743, "learning_rate": 3.5749048352170146e-06, "loss": 0.0382, "step": 7399 }, { "epoch": 0.81, "grad_norm": 0.23406167544587667, "learning_rate": 3.5708468337407177e-06, "loss": 0.0286, "step": 7400 }, { "epoch": 0.81, "grad_norm": 0.16159346076328623, "learning_rate": 3.566790910988265e-06, "loss": 0.0246, "step": 7401 }, { "epoch": 0.81, "grad_norm": 0.2121497043887166, "learning_rate": 3.562737067472841e-06, "loss": 0.019, "step": 7402 }, { "epoch": 0.81, "grad_norm": 0.2355653970625761, "learning_rate": 3.5586853037073674e-06, "loss": 0.0347, "step": 7403 }, { "epoch": 0.81, "grad_norm": 0.1972603655576669, "learning_rate": 3.554635620204503e-06, "loss": 0.0345, "step": 7404 }, { "epoch": 0.81, "grad_norm": 0.20577147306361032, "learning_rate": 3.5505880174766305e-06, "loss": 0.0369, "step": 7405 }, { "epoch": 0.81, "grad_norm": 0.23267282834241032, "learning_rate": 3.546542496035883e-06, "loss": 0.042, "step": 7406 }, { "epoch": 0.81, "grad_norm": 0.2333554343985014, "learning_rate": 3.5424990563941265e-06, "loss": 0.027, "step": 7407 }, { "epoch": 0.81, "grad_norm": 0.2684093290030905, "learning_rate": 3.5384576990629672e-06, "loss": 0.0327, "step": 7408 }, { "epoch": 0.81, "grad_norm": 0.2215673102998656, "learning_rate": 3.5344184245537384e-06, "loss": 0.0608, "step": 7409 }, { "epoch": 0.81, "grad_norm": 0.24675505959812644, "learning_rate": 3.53038123337752e-06, "loss": 0.042, "step": 7410 }, { "epoch": 0.81, "grad_norm": 0.17952090294139986, "learning_rate": 3.5263461260451217e-06, "loss": 0.029, "step": 7411 }, { "epoch": 0.81, "grad_norm": 0.3008566922781658, "learning_rate": 3.5223131030670942e-06, "loss": 0.0754, "step": 7412 }, { "epoch": 0.81, "grad_norm": 0.19868479023309843, "learning_rate": 3.5182821649537192e-06, "loss": 0.045, "step": 7413 }, { "epoch": 0.81, "grad_norm": 0.19470725695566066, "learning_rate": 3.5142533122150147e-06, "loss": 0.0293, "step": 7414 }, { "epoch": 0.81, "grad_norm": 0.31727193216020894, "learning_rate": 3.510226545360742e-06, "loss": 0.0347, "step": 7415 }, { "epoch": 0.81, "grad_norm": 0.23468189809338336, "learning_rate": 3.506201864900396e-06, "loss": 0.0374, "step": 7416 }, { "epoch": 0.81, "grad_norm": 0.24536788703920037, "learning_rate": 3.502179271343198e-06, "loss": 0.0425, "step": 7417 }, { "epoch": 0.81, "grad_norm": 0.17150679074681038, "learning_rate": 3.4981587651981185e-06, "loss": 0.0251, "step": 7418 }, { "epoch": 0.81, "grad_norm": 0.21906352565323928, "learning_rate": 3.4941403469738555e-06, "loss": 0.0352, "step": 7419 }, { "epoch": 0.81, "grad_norm": 0.2401544081171172, "learning_rate": 3.490124017178851e-06, "loss": 0.037, "step": 7420 }, { "epoch": 0.81, "grad_norm": 0.26761707662820367, "learning_rate": 3.4861097763212693e-06, "loss": 0.0372, "step": 7421 }, { "epoch": 0.82, "grad_norm": 0.1894305107541527, "learning_rate": 3.482097624909022e-06, "loss": 0.0409, "step": 7422 }, { "epoch": 0.82, "grad_norm": 0.2490980597493976, "learning_rate": 3.4780875634497526e-06, "loss": 0.0382, "step": 7423 }, { "epoch": 0.82, "grad_norm": 0.25547108780377437, "learning_rate": 3.474079592450845e-06, "loss": 0.0293, "step": 7424 }, { "epoch": 0.82, "grad_norm": 0.22409663347392775, "learning_rate": 3.4700737124194015e-06, "loss": 0.0305, "step": 7425 }, { "epoch": 0.82, "grad_norm": 0.19892686142251298, "learning_rate": 3.466069923862283e-06, "loss": 0.0258, "step": 7426 }, { "epoch": 0.82, "grad_norm": 0.24378036621651403, "learning_rate": 3.4620682272860772e-06, "loss": 0.0448, "step": 7427 }, { "epoch": 0.82, "grad_norm": 0.21739847036010437, "learning_rate": 3.458068623197097e-06, "loss": 0.0289, "step": 7428 }, { "epoch": 0.82, "grad_norm": 0.23974004143497032, "learning_rate": 3.4540711121013982e-06, "loss": 0.027, "step": 7429 }, { "epoch": 0.82, "grad_norm": 0.1887085825592145, "learning_rate": 3.4500756945047774e-06, "loss": 0.0174, "step": 7430 }, { "epoch": 0.82, "grad_norm": 0.16868693930711878, "learning_rate": 3.4460823709127645e-06, "loss": 0.0166, "step": 7431 }, { "epoch": 0.82, "grad_norm": 0.2538717246424224, "learning_rate": 3.442091141830608e-06, "loss": 0.0419, "step": 7432 }, { "epoch": 0.82, "grad_norm": 0.17340388659641845, "learning_rate": 3.438102007763313e-06, "loss": 0.0328, "step": 7433 }, { "epoch": 0.82, "grad_norm": 0.27920512046060325, "learning_rate": 3.4341149692156074e-06, "loss": 0.035, "step": 7434 }, { "epoch": 0.82, "grad_norm": 0.19804177700907516, "learning_rate": 3.430130026691967e-06, "loss": 0.0361, "step": 7435 }, { "epoch": 0.82, "grad_norm": 0.1959560663227455, "learning_rate": 3.426147180696577e-06, "loss": 0.0413, "step": 7436 }, { "epoch": 0.82, "grad_norm": 0.2288795003136039, "learning_rate": 3.4221664317333826e-06, "loss": 0.0333, "step": 7437 }, { "epoch": 0.82, "grad_norm": 0.24211084783302425, "learning_rate": 3.4181877803060528e-06, "loss": 0.0322, "step": 7438 }, { "epoch": 0.82, "grad_norm": 0.28184957966927965, "learning_rate": 3.4142112269179962e-06, "loss": 0.0371, "step": 7439 }, { "epoch": 0.82, "grad_norm": 0.270051523144888, "learning_rate": 3.4102367720723438e-06, "loss": 0.0397, "step": 7440 }, { "epoch": 0.82, "grad_norm": 0.24397744638289665, "learning_rate": 3.406264416271976e-06, "loss": 0.04, "step": 7441 }, { "epoch": 0.82, "grad_norm": 0.23206830579746976, "learning_rate": 3.402294160019499e-06, "loss": 0.0377, "step": 7442 }, { "epoch": 0.82, "grad_norm": 0.2256968007005466, "learning_rate": 3.398326003817256e-06, "loss": 0.0368, "step": 7443 }, { "epoch": 0.82, "grad_norm": 0.3031928252014159, "learning_rate": 3.394359948167325e-06, "loss": 0.0544, "step": 7444 }, { "epoch": 0.82, "grad_norm": 0.268016525658452, "learning_rate": 3.390395993571516e-06, "loss": 0.0509, "step": 7445 }, { "epoch": 0.82, "grad_norm": 0.18703540223754123, "learning_rate": 3.386434140531378e-06, "loss": 0.0207, "step": 7446 }, { "epoch": 0.82, "grad_norm": 0.23181738903786422, "learning_rate": 3.382474389548185e-06, "loss": 0.0327, "step": 7447 }, { "epoch": 0.82, "grad_norm": 0.2192405921457413, "learning_rate": 3.3785167411229523e-06, "loss": 0.0524, "step": 7448 }, { "epoch": 0.82, "grad_norm": 0.20525408870490355, "learning_rate": 3.3745611957564273e-06, "loss": 0.0263, "step": 7449 }, { "epoch": 0.82, "grad_norm": 0.23466835736303263, "learning_rate": 3.3706077539490933e-06, "loss": 0.0342, "step": 7450 }, { "epoch": 0.82, "grad_norm": 0.3167582543629114, "learning_rate": 3.3666564162011705e-06, "loss": 0.0383, "step": 7451 }, { "epoch": 0.82, "grad_norm": 0.2404412874945336, "learning_rate": 3.362707183012597e-06, "loss": 0.0336, "step": 7452 }, { "epoch": 0.82, "grad_norm": 0.2375760822402757, "learning_rate": 3.358760054883059e-06, "loss": 0.0298, "step": 7453 }, { "epoch": 0.82, "grad_norm": 0.23063965296005837, "learning_rate": 3.354815032311978e-06, "loss": 0.0441, "step": 7454 }, { "epoch": 0.82, "grad_norm": 0.2323894478111432, "learning_rate": 3.3508721157985023e-06, "loss": 0.0381, "step": 7455 }, { "epoch": 0.82, "grad_norm": 0.20004185633467292, "learning_rate": 3.34693130584151e-06, "loss": 0.0184, "step": 7456 }, { "epoch": 0.82, "grad_norm": 0.22864691720298666, "learning_rate": 3.342992602939623e-06, "loss": 0.04, "step": 7457 }, { "epoch": 0.82, "grad_norm": 0.19791542333078518, "learning_rate": 3.3390560075911906e-06, "loss": 0.0209, "step": 7458 }, { "epoch": 0.82, "grad_norm": 0.165287767175405, "learning_rate": 3.3351215202942954e-06, "loss": 0.0257, "step": 7459 }, { "epoch": 0.82, "grad_norm": 0.15579203008774456, "learning_rate": 3.331189141546758e-06, "loss": 0.0224, "step": 7460 }, { "epoch": 0.82, "grad_norm": 0.27487803160247687, "learning_rate": 3.327258871846126e-06, "loss": 0.0529, "step": 7461 }, { "epoch": 0.82, "grad_norm": 0.23277987629217148, "learning_rate": 3.3233307116896874e-06, "loss": 0.0461, "step": 7462 }, { "epoch": 0.82, "grad_norm": 0.17555566724108546, "learning_rate": 3.3194046615744503e-06, "loss": 0.0207, "step": 7463 }, { "epoch": 0.82, "grad_norm": 0.23009321360827986, "learning_rate": 3.3154807219971684e-06, "loss": 0.0368, "step": 7464 }, { "epoch": 0.82, "grad_norm": 0.2378087909925564, "learning_rate": 3.311558893454323e-06, "loss": 0.0348, "step": 7465 }, { "epoch": 0.82, "grad_norm": 0.34445023741174724, "learning_rate": 3.307639176442137e-06, "loss": 0.0427, "step": 7466 }, { "epoch": 0.82, "grad_norm": 0.22513745467064952, "learning_rate": 3.3037215714565485e-06, "loss": 0.0302, "step": 7467 }, { "epoch": 0.82, "grad_norm": 0.20298962483529867, "learning_rate": 3.299806078993242e-06, "loss": 0.021, "step": 7468 }, { "epoch": 0.82, "grad_norm": 0.19793103609290041, "learning_rate": 3.295892699547629e-06, "loss": 0.0294, "step": 7469 }, { "epoch": 0.82, "grad_norm": 0.16142332302467222, "learning_rate": 3.2919814336148657e-06, "loss": 0.0187, "step": 7470 }, { "epoch": 0.82, "grad_norm": 0.193968401376796, "learning_rate": 3.288072281689818e-06, "loss": 0.0308, "step": 7471 }, { "epoch": 0.82, "grad_norm": 0.20980708673270493, "learning_rate": 3.2841652442671033e-06, "loss": 0.0284, "step": 7472 }, { "epoch": 0.82, "grad_norm": 0.24221748504830057, "learning_rate": 3.2802603218410666e-06, "loss": 0.0451, "step": 7473 }, { "epoch": 0.82, "grad_norm": 0.21741787575413205, "learning_rate": 3.276357514905788e-06, "loss": 0.0264, "step": 7474 }, { "epoch": 0.82, "grad_norm": 0.22864696608127394, "learning_rate": 3.2724568239550637e-06, "loss": 0.0378, "step": 7475 }, { "epoch": 0.82, "grad_norm": 0.19013485704322677, "learning_rate": 3.2685582494824386e-06, "loss": 0.0318, "step": 7476 }, { "epoch": 0.82, "grad_norm": 0.22356170456648536, "learning_rate": 3.2646617919812007e-06, "loss": 0.0217, "step": 7477 }, { "epoch": 0.82, "grad_norm": 0.2590321532721641, "learning_rate": 3.260767451944338e-06, "loss": 0.0565, "step": 7478 }, { "epoch": 0.82, "grad_norm": 0.23676297873075064, "learning_rate": 3.256875229864591e-06, "loss": 0.0287, "step": 7479 }, { "epoch": 0.82, "grad_norm": 0.20357256877969448, "learning_rate": 3.252985126234434e-06, "loss": 0.0409, "step": 7480 }, { "epoch": 0.82, "grad_norm": 0.17218293975807653, "learning_rate": 3.2490971415460716e-06, "loss": 0.037, "step": 7481 }, { "epoch": 0.82, "grad_norm": 0.21439589725270158, "learning_rate": 3.245211276291427e-06, "loss": 0.0276, "step": 7482 }, { "epoch": 0.82, "grad_norm": 0.18591843595465093, "learning_rate": 3.2413275309621684e-06, "loss": 0.0366, "step": 7483 }, { "epoch": 0.82, "grad_norm": 0.2716762221641217, "learning_rate": 3.237445906049694e-06, "loss": 0.0605, "step": 7484 }, { "epoch": 0.82, "grad_norm": 0.2515395266545232, "learning_rate": 3.2335664020451364e-06, "loss": 0.0331, "step": 7485 }, { "epoch": 0.82, "grad_norm": 0.2701404018177225, "learning_rate": 3.229689019439348e-06, "loss": 0.0402, "step": 7486 }, { "epoch": 0.82, "grad_norm": 0.1983329490015436, "learning_rate": 3.2258137587229242e-06, "loss": 0.0225, "step": 7487 }, { "epoch": 0.82, "grad_norm": 0.2545477905191409, "learning_rate": 3.2219406203861903e-06, "loss": 0.0509, "step": 7488 }, { "epoch": 0.82, "grad_norm": 0.1797385350604584, "learning_rate": 3.2180696049192027e-06, "loss": 0.032, "step": 7489 }, { "epoch": 0.82, "grad_norm": 0.2884899474637145, "learning_rate": 3.2142007128117393e-06, "loss": 0.0308, "step": 7490 }, { "epoch": 0.82, "grad_norm": 0.2373232265185134, "learning_rate": 3.2103339445533233e-06, "loss": 0.0301, "step": 7491 }, { "epoch": 0.82, "grad_norm": 0.2685669219958205, "learning_rate": 3.2064693006332013e-06, "loss": 0.0342, "step": 7492 }, { "epoch": 0.82, "grad_norm": 0.23162935882497898, "learning_rate": 3.202606781540356e-06, "loss": 0.0248, "step": 7493 }, { "epoch": 0.82, "grad_norm": 0.2517438655331266, "learning_rate": 3.1987463877634962e-06, "loss": 0.0254, "step": 7494 }, { "epoch": 0.82, "grad_norm": 0.2219837305115061, "learning_rate": 3.1948881197910664e-06, "loss": 0.0286, "step": 7495 }, { "epoch": 0.82, "grad_norm": 0.2147771211824228, "learning_rate": 3.1910319781112364e-06, "loss": 0.0263, "step": 7496 }, { "epoch": 0.82, "grad_norm": 0.2103318951691591, "learning_rate": 3.1871779632119204e-06, "loss": 0.0402, "step": 7497 }, { "epoch": 0.82, "grad_norm": 0.21288991630506235, "learning_rate": 3.1833260755807392e-06, "loss": 0.0247, "step": 7498 }, { "epoch": 0.82, "grad_norm": 0.22674921168954712, "learning_rate": 3.179476315705068e-06, "loss": 0.0359, "step": 7499 }, { "epoch": 0.82, "grad_norm": 0.1830833319952086, "learning_rate": 3.1756286840719987e-06, "loss": 0.0338, "step": 7500 }, { "epoch": 0.82, "grad_norm": 0.2321710836298324, "learning_rate": 3.171783181168366e-06, "loss": 0.0411, "step": 7501 }, { "epoch": 0.82, "grad_norm": 0.24267933495604038, "learning_rate": 3.16793980748072e-06, "loss": 0.0473, "step": 7502 }, { "epoch": 0.82, "grad_norm": 0.2721682791815918, "learning_rate": 3.1640985634953524e-06, "loss": 0.0358, "step": 7503 }, { "epoch": 0.82, "grad_norm": 0.2524557496945113, "learning_rate": 3.160259449698282e-06, "loss": 0.0373, "step": 7504 }, { "epoch": 0.82, "grad_norm": 0.2315410464306356, "learning_rate": 3.156422466575264e-06, "loss": 0.0325, "step": 7505 }, { "epoch": 0.82, "grad_norm": 0.20002625933754795, "learning_rate": 3.1525876146117707e-06, "loss": 0.041, "step": 7506 }, { "epoch": 0.82, "grad_norm": 0.19547227046732799, "learning_rate": 3.1487548942930136e-06, "loss": 0.0304, "step": 7507 }, { "epoch": 0.82, "grad_norm": 0.216989517538845, "learning_rate": 3.144924306103938e-06, "loss": 0.0257, "step": 7508 }, { "epoch": 0.82, "grad_norm": 0.22981897179073466, "learning_rate": 3.1410958505292167e-06, "loss": 0.0424, "step": 7509 }, { "epoch": 0.82, "grad_norm": 0.16953086809036202, "learning_rate": 3.1372695280532415e-06, "loss": 0.0304, "step": 7510 }, { "epoch": 0.82, "grad_norm": 0.2382266967110526, "learning_rate": 3.133445339160146e-06, "loss": 0.0428, "step": 7511 }, { "epoch": 0.82, "grad_norm": 0.19943456553718433, "learning_rate": 3.129623284333805e-06, "loss": 0.0421, "step": 7512 }, { "epoch": 0.83, "grad_norm": 0.20832149154069063, "learning_rate": 3.125803364057793e-06, "loss": 0.0325, "step": 7513 }, { "epoch": 0.83, "grad_norm": 0.22556063687682615, "learning_rate": 3.1219855788154385e-06, "loss": 0.0356, "step": 7514 }, { "epoch": 0.83, "grad_norm": 0.2795119624469679, "learning_rate": 3.1181699290897916e-06, "loss": 0.0313, "step": 7515 }, { "epoch": 0.83, "grad_norm": 0.1930770932810805, "learning_rate": 3.1143564153636395e-06, "loss": 0.0155, "step": 7516 }, { "epoch": 0.83, "grad_norm": 0.18146117380249757, "learning_rate": 3.1105450381194835e-06, "loss": 0.0312, "step": 7517 }, { "epoch": 0.83, "grad_norm": 0.20320885101311342, "learning_rate": 3.1067357978395663e-06, "loss": 0.0302, "step": 7518 }, { "epoch": 0.83, "grad_norm": 0.15815770709985794, "learning_rate": 3.102928695005858e-06, "loss": 0.019, "step": 7519 }, { "epoch": 0.83, "grad_norm": 0.18898936662078236, "learning_rate": 3.0991237301000664e-06, "loss": 0.0284, "step": 7520 }, { "epoch": 0.83, "grad_norm": 0.2842840809156435, "learning_rate": 3.095320903603607e-06, "loss": 0.0312, "step": 7521 }, { "epoch": 0.83, "grad_norm": 0.2289755830147388, "learning_rate": 3.0915202159976453e-06, "loss": 0.0381, "step": 7522 }, { "epoch": 0.83, "grad_norm": 0.22242539132718261, "learning_rate": 3.0877216677630683e-06, "loss": 0.0316, "step": 7523 }, { "epoch": 0.83, "grad_norm": 0.1967054098331257, "learning_rate": 3.083925259380498e-06, "loss": 0.0209, "step": 7524 }, { "epoch": 0.83, "grad_norm": 0.23675961157513783, "learning_rate": 3.0801309913302724e-06, "loss": 0.0334, "step": 7525 }, { "epoch": 0.83, "grad_norm": 0.24298225254919875, "learning_rate": 3.0763388640924698e-06, "loss": 0.0406, "step": 7526 }, { "epoch": 0.83, "grad_norm": 0.2297374875305951, "learning_rate": 3.072548878146897e-06, "loss": 0.042, "step": 7527 }, { "epoch": 0.83, "grad_norm": 0.2996989010739473, "learning_rate": 3.068761033973087e-06, "loss": 0.0437, "step": 7528 }, { "epoch": 0.83, "grad_norm": 0.2539570097436333, "learning_rate": 3.0649753320503017e-06, "loss": 0.0362, "step": 7529 }, { "epoch": 0.83, "grad_norm": 0.24119700650283885, "learning_rate": 3.0611917728575347e-06, "loss": 0.0382, "step": 7530 }, { "epoch": 0.83, "grad_norm": 0.22393550128028836, "learning_rate": 3.057410356873505e-06, "loss": 0.0316, "step": 7531 }, { "epoch": 0.83, "grad_norm": 0.20312731081308394, "learning_rate": 3.053631084576667e-06, "loss": 0.0313, "step": 7532 }, { "epoch": 0.83, "grad_norm": 0.26764594562067223, "learning_rate": 3.04985395644519e-06, "loss": 0.0439, "step": 7533 }, { "epoch": 0.83, "grad_norm": 0.23299971643981132, "learning_rate": 3.046078972956985e-06, "loss": 0.0368, "step": 7534 }, { "epoch": 0.83, "grad_norm": 0.15867413353451265, "learning_rate": 3.0423061345896875e-06, "loss": 0.0188, "step": 7535 }, { "epoch": 0.83, "grad_norm": 0.2258630017174461, "learning_rate": 3.038535441820669e-06, "loss": 0.0274, "step": 7536 }, { "epoch": 0.83, "grad_norm": 0.18832984708693212, "learning_rate": 3.0347668951270084e-06, "loss": 0.0273, "step": 7537 }, { "epoch": 0.83, "grad_norm": 0.18064938047394155, "learning_rate": 3.0310004949855366e-06, "loss": 0.0318, "step": 7538 }, { "epoch": 0.83, "grad_norm": 0.2710197919501598, "learning_rate": 3.0272362418727972e-06, "loss": 0.0473, "step": 7539 }, { "epoch": 0.83, "grad_norm": 0.17966238136531762, "learning_rate": 3.0234741362650787e-06, "loss": 0.0228, "step": 7540 }, { "epoch": 0.83, "grad_norm": 0.22604164729286733, "learning_rate": 3.019714178638373e-06, "loss": 0.0649, "step": 7541 }, { "epoch": 0.83, "grad_norm": 0.2817679378034041, "learning_rate": 3.0159563694684245e-06, "loss": 0.0449, "step": 7542 }, { "epoch": 0.83, "grad_norm": 0.24425577118614047, "learning_rate": 3.0122007092306903e-06, "loss": 0.0358, "step": 7543 }, { "epoch": 0.83, "grad_norm": 0.18778027011985785, "learning_rate": 3.008447198400368e-06, "loss": 0.0158, "step": 7544 }, { "epoch": 0.83, "grad_norm": 0.1737640455627145, "learning_rate": 3.004695837452365e-06, "loss": 0.0308, "step": 7545 }, { "epoch": 0.83, "grad_norm": 0.1736311814839811, "learning_rate": 3.0009466268613384e-06, "loss": 0.0303, "step": 7546 }, { "epoch": 0.83, "grad_norm": 0.23909056131063683, "learning_rate": 2.997199567101663e-06, "loss": 0.0349, "step": 7547 }, { "epoch": 0.83, "grad_norm": 0.2511854378700324, "learning_rate": 2.9934546586474346e-06, "loss": 0.032, "step": 7548 }, { "epoch": 0.83, "grad_norm": 0.16832324257631084, "learning_rate": 2.989711901972485e-06, "loss": 0.0216, "step": 7549 }, { "epoch": 0.83, "grad_norm": 0.2061441236453667, "learning_rate": 2.985971297550374e-06, "loss": 0.0233, "step": 7550 }, { "epoch": 0.83, "grad_norm": 0.18699323199259152, "learning_rate": 2.9822328458543916e-06, "loss": 0.0264, "step": 7551 }, { "epoch": 0.83, "grad_norm": 0.15171133112619387, "learning_rate": 2.9784965473575434e-06, "loss": 0.0258, "step": 7552 }, { "epoch": 0.83, "grad_norm": 0.1764306681841002, "learning_rate": 2.9747624025325716e-06, "loss": 0.0218, "step": 7553 }, { "epoch": 0.83, "grad_norm": 0.1893227118517692, "learning_rate": 2.9710304118519473e-06, "loss": 0.0287, "step": 7554 }, { "epoch": 0.83, "grad_norm": 0.29197093159039084, "learning_rate": 2.9673005757878703e-06, "loss": 0.0198, "step": 7555 }, { "epoch": 0.83, "grad_norm": 0.23489263117201273, "learning_rate": 2.9635728948122542e-06, "loss": 0.0364, "step": 7556 }, { "epoch": 0.83, "grad_norm": 0.16448805884237272, "learning_rate": 2.9598473693967555e-06, "loss": 0.0287, "step": 7557 }, { "epoch": 0.83, "grad_norm": 0.28402562897767586, "learning_rate": 2.95612400001275e-06, "loss": 0.0464, "step": 7558 }, { "epoch": 0.83, "grad_norm": 0.21629984474141284, "learning_rate": 2.9524027871313476e-06, "loss": 0.0318, "step": 7559 }, { "epoch": 0.83, "grad_norm": 0.19412633240740532, "learning_rate": 2.9486837312233742e-06, "loss": 0.0224, "step": 7560 }, { "epoch": 0.83, "grad_norm": 0.25963675378412404, "learning_rate": 2.9449668327593885e-06, "loss": 0.0431, "step": 7561 }, { "epoch": 0.83, "grad_norm": 0.20437587209976266, "learning_rate": 2.9412520922096834e-06, "loss": 0.0308, "step": 7562 }, { "epoch": 0.83, "grad_norm": 0.22413144056583822, "learning_rate": 2.937539510044265e-06, "loss": 0.0415, "step": 7563 }, { "epoch": 0.83, "grad_norm": 0.24400913512315092, "learning_rate": 2.93382908673288e-06, "loss": 0.0297, "step": 7564 }, { "epoch": 0.83, "grad_norm": 0.2342546551408778, "learning_rate": 2.9301208227449905e-06, "loss": 0.0329, "step": 7565 }, { "epoch": 0.83, "grad_norm": 0.36450597986959676, "learning_rate": 2.926414718549797e-06, "loss": 0.0545, "step": 7566 }, { "epoch": 0.83, "grad_norm": 0.1706969790367068, "learning_rate": 2.922710774616211e-06, "loss": 0.0314, "step": 7567 }, { "epoch": 0.83, "grad_norm": 0.3037674412802316, "learning_rate": 2.9190089914128837e-06, "loss": 0.0437, "step": 7568 }, { "epoch": 0.83, "grad_norm": 0.3183951211531267, "learning_rate": 2.91530936940819e-06, "loss": 0.0392, "step": 7569 }, { "epoch": 0.83, "grad_norm": 0.19752672120297818, "learning_rate": 2.911611909070229e-06, "loss": 0.0246, "step": 7570 }, { "epoch": 0.83, "grad_norm": 0.2366123696929768, "learning_rate": 2.90791661086683e-06, "loss": 0.036, "step": 7571 }, { "epoch": 0.83, "grad_norm": 0.1861685727523322, "learning_rate": 2.9042234752655417e-06, "loss": 0.0169, "step": 7572 }, { "epoch": 0.83, "grad_norm": 0.1817812426591504, "learning_rate": 2.9005325027336482e-06, "loss": 0.0217, "step": 7573 }, { "epoch": 0.83, "grad_norm": 0.2282377983166106, "learning_rate": 2.8968436937381515e-06, "loss": 0.0377, "step": 7574 }, { "epoch": 0.83, "grad_norm": 0.21570972699179414, "learning_rate": 2.8931570487457894e-06, "loss": 0.0331, "step": 7575 }, { "epoch": 0.83, "grad_norm": 0.22507620686511837, "learning_rate": 2.889472568223015e-06, "loss": 0.0343, "step": 7576 }, { "epoch": 0.83, "grad_norm": 0.2541962801118243, "learning_rate": 2.8857902526360114e-06, "loss": 0.0328, "step": 7577 }, { "epoch": 0.83, "grad_norm": 0.4486091125897802, "learning_rate": 2.8821101024506947e-06, "loss": 0.0504, "step": 7578 }, { "epoch": 0.83, "grad_norm": 0.22870920410281215, "learning_rate": 2.8784321181327035e-06, "loss": 0.0283, "step": 7579 }, { "epoch": 0.83, "grad_norm": 0.25703863115930436, "learning_rate": 2.874756300147388e-06, "loss": 0.032, "step": 7580 }, { "epoch": 0.83, "grad_norm": 0.3016914862667133, "learning_rate": 2.8710826489598485e-06, "loss": 0.0499, "step": 7581 }, { "epoch": 0.83, "grad_norm": 0.2809703018931384, "learning_rate": 2.867411165034901e-06, "loss": 0.0406, "step": 7582 }, { "epoch": 0.83, "grad_norm": 0.28629385676763286, "learning_rate": 2.863741848837074e-06, "loss": 0.0274, "step": 7583 }, { "epoch": 0.83, "grad_norm": 0.24941785655649665, "learning_rate": 2.8600747008306417e-06, "loss": 0.0382, "step": 7584 }, { "epoch": 0.83, "grad_norm": 0.23546679829457529, "learning_rate": 2.8564097214795937e-06, "loss": 0.0349, "step": 7585 }, { "epoch": 0.83, "grad_norm": 0.21802023515438493, "learning_rate": 2.8527469112476524e-06, "loss": 0.0291, "step": 7586 }, { "epoch": 0.83, "grad_norm": 0.2033274631910137, "learning_rate": 2.849086270598249e-06, "loss": 0.0207, "step": 7587 }, { "epoch": 0.83, "grad_norm": 0.18674789025750416, "learning_rate": 2.8454277999945603e-06, "loss": 0.023, "step": 7588 }, { "epoch": 0.83, "grad_norm": 0.3043528944757433, "learning_rate": 2.841771499899475e-06, "loss": 0.0432, "step": 7589 }, { "epoch": 0.83, "grad_norm": 0.20592386151362962, "learning_rate": 2.8381173707756214e-06, "loss": 0.0307, "step": 7590 }, { "epoch": 0.83, "grad_norm": 0.27418265308907297, "learning_rate": 2.834465413085332e-06, "loss": 0.0398, "step": 7591 }, { "epoch": 0.83, "grad_norm": 0.1363660076596249, "learning_rate": 2.8308156272906794e-06, "loss": 0.0171, "step": 7592 }, { "epoch": 0.83, "grad_norm": 0.2132512895968844, "learning_rate": 2.827168013853463e-06, "loss": 0.0492, "step": 7593 }, { "epoch": 0.83, "grad_norm": 0.1797707717172014, "learning_rate": 2.8235225732352043e-06, "loss": 0.0261, "step": 7594 }, { "epoch": 0.83, "grad_norm": 0.20851552367967222, "learning_rate": 2.8198793058971397e-06, "loss": 0.014, "step": 7595 }, { "epoch": 0.83, "grad_norm": 0.23987911286736752, "learning_rate": 2.8162382123002418e-06, "loss": 0.0346, "step": 7596 }, { "epoch": 0.83, "grad_norm": 0.29634870759911625, "learning_rate": 2.8125992929052092e-06, "loss": 0.0336, "step": 7597 }, { "epoch": 0.83, "grad_norm": 0.1956021830929876, "learning_rate": 2.8089625481724604e-06, "loss": 0.0274, "step": 7598 }, { "epoch": 0.83, "grad_norm": 0.18066717609093813, "learning_rate": 2.8053279785621378e-06, "loss": 0.0262, "step": 7599 }, { "epoch": 0.83, "grad_norm": 0.2350615618097985, "learning_rate": 2.8016955845341143e-06, "loss": 0.0277, "step": 7600 }, { "epoch": 0.83, "grad_norm": 0.19820320046693707, "learning_rate": 2.798065366547986e-06, "loss": 0.0256, "step": 7601 }, { "epoch": 0.83, "grad_norm": 0.2951730833004121, "learning_rate": 2.794437325063064e-06, "loss": 0.0366, "step": 7602 }, { "epoch": 0.83, "grad_norm": 0.1948995611839091, "learning_rate": 2.7908114605383963e-06, "loss": 0.0238, "step": 7603 }, { "epoch": 0.84, "grad_norm": 0.2213159004495298, "learning_rate": 2.7871877734327514e-06, "loss": 0.0408, "step": 7604 }, { "epoch": 0.84, "grad_norm": 0.2121654460974383, "learning_rate": 2.7835662642046245e-06, "loss": 0.0269, "step": 7605 }, { "epoch": 0.84, "grad_norm": 0.24670389015909733, "learning_rate": 2.7799469333122275e-06, "loss": 0.0357, "step": 7606 }, { "epoch": 0.84, "grad_norm": 0.21498507273219974, "learning_rate": 2.7763297812135026e-06, "loss": 0.046, "step": 7607 }, { "epoch": 0.84, "grad_norm": 0.23506955706422913, "learning_rate": 2.772714808366115e-06, "loss": 0.0329, "step": 7608 }, { "epoch": 0.84, "grad_norm": 0.21770459955989604, "learning_rate": 2.7691020152274585e-06, "loss": 0.0331, "step": 7609 }, { "epoch": 0.84, "grad_norm": 0.20548492230201848, "learning_rate": 2.76549140225465e-06, "loss": 0.041, "step": 7610 }, { "epoch": 0.84, "grad_norm": 0.2511533715535377, "learning_rate": 2.761882969904517e-06, "loss": 0.0328, "step": 7611 }, { "epoch": 0.84, "grad_norm": 0.27096677054455504, "learning_rate": 2.758276718633628e-06, "loss": 0.0363, "step": 7612 }, { "epoch": 0.84, "grad_norm": 0.19968950525808227, "learning_rate": 2.754672648898271e-06, "loss": 0.0291, "step": 7613 }, { "epoch": 0.84, "grad_norm": 0.20565001357331475, "learning_rate": 2.751070761154453e-06, "loss": 0.0224, "step": 7614 }, { "epoch": 0.84, "grad_norm": 0.2205976217156827, "learning_rate": 2.747471055857913e-06, "loss": 0.0258, "step": 7615 }, { "epoch": 0.84, "grad_norm": 0.2736638630819423, "learning_rate": 2.743873533464105e-06, "loss": 0.0389, "step": 7616 }, { "epoch": 0.84, "grad_norm": 0.2977077824997038, "learning_rate": 2.7402781944282164e-06, "loss": 0.0353, "step": 7617 }, { "epoch": 0.84, "grad_norm": 0.19000655873485214, "learning_rate": 2.7366850392051468e-06, "loss": 0.0229, "step": 7618 }, { "epoch": 0.84, "grad_norm": 0.34686368975371545, "learning_rate": 2.733094068249527e-06, "loss": 0.0428, "step": 7619 }, { "epoch": 0.84, "grad_norm": 0.25636586442495835, "learning_rate": 2.7295052820157097e-06, "loss": 0.0302, "step": 7620 }, { "epoch": 0.84, "grad_norm": 0.2837643029057331, "learning_rate": 2.72591868095778e-06, "loss": 0.0397, "step": 7621 }, { "epoch": 0.84, "grad_norm": 0.25689548628855835, "learning_rate": 2.722334265529527e-06, "loss": 0.0335, "step": 7622 }, { "epoch": 0.84, "grad_norm": 0.2098684861437258, "learning_rate": 2.7187520361844776e-06, "loss": 0.028, "step": 7623 }, { "epoch": 0.84, "grad_norm": 0.18787319078672018, "learning_rate": 2.715171993375878e-06, "loss": 0.0262, "step": 7624 }, { "epoch": 0.84, "grad_norm": 0.21804441177961417, "learning_rate": 2.711594137556708e-06, "loss": 0.0323, "step": 7625 }, { "epoch": 0.84, "grad_norm": 0.32105543460241515, "learning_rate": 2.7080184691796474e-06, "loss": 0.0345, "step": 7626 }, { "epoch": 0.84, "grad_norm": 0.22500249212262394, "learning_rate": 2.7044449886971213e-06, "loss": 0.03, "step": 7627 }, { "epoch": 0.84, "grad_norm": 0.17858965985010777, "learning_rate": 2.7008736965612658e-06, "loss": 0.0115, "step": 7628 }, { "epoch": 0.84, "grad_norm": 0.17528832517121057, "learning_rate": 2.6973045932239526e-06, "loss": 0.0166, "step": 7629 }, { "epoch": 0.84, "grad_norm": 0.3125683471324996, "learning_rate": 2.6937376791367566e-06, "loss": 0.0476, "step": 7630 }, { "epoch": 0.84, "grad_norm": 0.2124116917449492, "learning_rate": 2.6901729547509934e-06, "loss": 0.0366, "step": 7631 }, { "epoch": 0.84, "grad_norm": 0.22591288105336194, "learning_rate": 2.6866104205176925e-06, "loss": 0.0379, "step": 7632 }, { "epoch": 0.84, "grad_norm": 0.23916193539700292, "learning_rate": 2.683050076887612e-06, "loss": 0.0328, "step": 7633 }, { "epoch": 0.84, "grad_norm": 0.18496191272256948, "learning_rate": 2.679491924311226e-06, "loss": 0.0222, "step": 7634 }, { "epoch": 0.84, "grad_norm": 0.2786003042916692, "learning_rate": 2.6759359632387407e-06, "loss": 0.0396, "step": 7635 }, { "epoch": 0.84, "grad_norm": 0.20074704134852306, "learning_rate": 2.67238219412008e-06, "loss": 0.0271, "step": 7636 }, { "epoch": 0.84, "grad_norm": 0.3189146897331374, "learning_rate": 2.6688306174048807e-06, "loss": 0.059, "step": 7637 }, { "epoch": 0.84, "grad_norm": 0.2278434371218837, "learning_rate": 2.6652812335425184e-06, "loss": 0.0238, "step": 7638 }, { "epoch": 0.84, "grad_norm": 0.2388356591141597, "learning_rate": 2.6617340429820825e-06, "loss": 0.05, "step": 7639 }, { "epoch": 0.84, "grad_norm": 0.20810065190054117, "learning_rate": 2.6581890461723925e-06, "loss": 0.0329, "step": 7640 }, { "epoch": 0.84, "grad_norm": 0.1884146573881054, "learning_rate": 2.6546462435619755e-06, "loss": 0.0381, "step": 7641 }, { "epoch": 0.84, "grad_norm": 0.23376778782262872, "learning_rate": 2.651105635599094e-06, "loss": 0.0278, "step": 7642 }, { "epoch": 0.84, "grad_norm": 0.2767728113647652, "learning_rate": 2.6475672227317282e-06, "loss": 0.047, "step": 7643 }, { "epoch": 0.84, "grad_norm": 0.2088890214662041, "learning_rate": 2.6440310054075877e-06, "loss": 0.0387, "step": 7644 }, { "epoch": 0.84, "grad_norm": 0.22184546401655003, "learning_rate": 2.6404969840740858e-06, "loss": 0.0385, "step": 7645 }, { "epoch": 0.84, "grad_norm": 0.18417381406704897, "learning_rate": 2.6369651591783774e-06, "loss": 0.0252, "step": 7646 }, { "epoch": 0.84, "grad_norm": 0.21899323904474913, "learning_rate": 2.633435531167332e-06, "loss": 0.0319, "step": 7647 }, { "epoch": 0.84, "grad_norm": 0.28716727730382985, "learning_rate": 2.629908100487544e-06, "loss": 0.0588, "step": 7648 }, { "epoch": 0.84, "grad_norm": 0.24797952293257808, "learning_rate": 2.626382867585313e-06, "loss": 0.0387, "step": 7649 }, { "epoch": 0.84, "grad_norm": 0.1657360471621508, "learning_rate": 2.6228598329066902e-06, "loss": 0.0319, "step": 7650 }, { "epoch": 0.84, "grad_norm": 0.27341377972851105, "learning_rate": 2.619338996897427e-06, "loss": 0.0242, "step": 7651 }, { "epoch": 0.84, "grad_norm": 0.21904022684477306, "learning_rate": 2.6158203600030076e-06, "loss": 0.0263, "step": 7652 }, { "epoch": 0.84, "grad_norm": 0.20140843232646816, "learning_rate": 2.612303922668624e-06, "loss": 0.0178, "step": 7653 }, { "epoch": 0.84, "grad_norm": 0.22433960511910805, "learning_rate": 2.6087896853392037e-06, "loss": 0.0364, "step": 7654 }, { "epoch": 0.84, "grad_norm": 0.2466762853585662, "learning_rate": 2.60527764845939e-06, "loss": 0.0291, "step": 7655 }, { "epoch": 0.84, "grad_norm": 0.22148049433569833, "learning_rate": 2.6017678124735545e-06, "loss": 0.0281, "step": 7656 }, { "epoch": 0.84, "grad_norm": 0.22832268941276734, "learning_rate": 2.5982601778257733e-06, "loss": 0.0394, "step": 7657 }, { "epoch": 0.84, "grad_norm": 0.24904132853546684, "learning_rate": 2.594754744959862e-06, "loss": 0.0331, "step": 7658 }, { "epoch": 0.84, "grad_norm": 0.31715142838897503, "learning_rate": 2.5912515143193506e-06, "loss": 0.0603, "step": 7659 }, { "epoch": 0.84, "grad_norm": 0.3282299101100885, "learning_rate": 2.5877504863474933e-06, "loss": 0.049, "step": 7660 }, { "epoch": 0.84, "grad_norm": 0.23427066069941918, "learning_rate": 2.584251661487258e-06, "loss": 0.0378, "step": 7661 }, { "epoch": 0.84, "grad_norm": 0.17040282181747796, "learning_rate": 2.58075504018134e-06, "loss": 0.0202, "step": 7662 }, { "epoch": 0.84, "grad_norm": 0.3024061577234958, "learning_rate": 2.5772606228721555e-06, "loss": 0.057, "step": 7663 }, { "epoch": 0.84, "grad_norm": 0.24001482879256286, "learning_rate": 2.5737684100018446e-06, "loss": 0.0492, "step": 7664 }, { "epoch": 0.84, "grad_norm": 0.24036648072099676, "learning_rate": 2.5702784020122582e-06, "loss": 0.0441, "step": 7665 }, { "epoch": 0.84, "grad_norm": 0.1883154956650253, "learning_rate": 2.566790599344973e-06, "loss": 0.023, "step": 7666 }, { "epoch": 0.84, "grad_norm": 0.3097774522599185, "learning_rate": 2.563305002441303e-06, "loss": 0.0357, "step": 7667 }, { "epoch": 0.84, "grad_norm": 0.24300508012877306, "learning_rate": 2.5598216117422547e-06, "loss": 0.035, "step": 7668 }, { "epoch": 0.84, "grad_norm": 0.2094741945721532, "learning_rate": 2.556340427688575e-06, "loss": 0.027, "step": 7669 }, { "epoch": 0.84, "grad_norm": 0.2227773169926037, "learning_rate": 2.552861450720725e-06, "loss": 0.0341, "step": 7670 }, { "epoch": 0.84, "grad_norm": 0.13619264467246087, "learning_rate": 2.5493846812788923e-06, "loss": 0.0108, "step": 7671 }, { "epoch": 0.84, "grad_norm": 0.26831786377417394, "learning_rate": 2.5459101198029724e-06, "loss": 0.0313, "step": 7672 }, { "epoch": 0.84, "grad_norm": 0.15257217326277978, "learning_rate": 2.542437766732593e-06, "loss": 0.0205, "step": 7673 }, { "epoch": 0.84, "grad_norm": 0.21841776686397005, "learning_rate": 2.538967622507098e-06, "loss": 0.0305, "step": 7674 }, { "epoch": 0.84, "grad_norm": 0.2131413025088449, "learning_rate": 2.535499687565559e-06, "loss": 0.0283, "step": 7675 }, { "epoch": 0.84, "grad_norm": 0.23409968736477768, "learning_rate": 2.532033962346754e-06, "loss": 0.0336, "step": 7676 }, { "epoch": 0.84, "grad_norm": 0.21867116290838887, "learning_rate": 2.528570447289189e-06, "loss": 0.0279, "step": 7677 }, { "epoch": 0.84, "grad_norm": 0.265822505451381, "learning_rate": 2.525109142831095e-06, "loss": 0.0374, "step": 7678 }, { "epoch": 0.84, "grad_norm": 0.19859364553849468, "learning_rate": 2.52165004941042e-06, "loss": 0.0274, "step": 7679 }, { "epoch": 0.84, "grad_norm": 0.22097252127108136, "learning_rate": 2.5181931674648265e-06, "loss": 0.023, "step": 7680 }, { "epoch": 0.84, "grad_norm": 0.2090036971527243, "learning_rate": 2.5147384974317014e-06, "loss": 0.0246, "step": 7681 }, { "epoch": 0.84, "grad_norm": 0.17914124052438576, "learning_rate": 2.5112860397481553e-06, "loss": 0.0385, "step": 7682 }, { "epoch": 0.84, "grad_norm": 0.36674578456107254, "learning_rate": 2.507835794851017e-06, "loss": 0.0741, "step": 7683 }, { "epoch": 0.84, "grad_norm": 0.22445193038447256, "learning_rate": 2.50438776317683e-06, "loss": 0.0263, "step": 7684 }, { "epoch": 0.84, "grad_norm": 0.21830122758653003, "learning_rate": 2.5009419451618634e-06, "loss": 0.0351, "step": 7685 }, { "epoch": 0.84, "grad_norm": 0.21726627910780288, "learning_rate": 2.497498341242104e-06, "loss": 0.0373, "step": 7686 }, { "epoch": 0.84, "grad_norm": 0.2314400330275358, "learning_rate": 2.4940569518532677e-06, "loss": 0.0371, "step": 7687 }, { "epoch": 0.84, "grad_norm": 0.24373962276328054, "learning_rate": 2.490617777430766e-06, "loss": 0.029, "step": 7688 }, { "epoch": 0.84, "grad_norm": 0.17332051666399056, "learning_rate": 2.4871808184097558e-06, "loss": 0.0195, "step": 7689 }, { "epoch": 0.84, "grad_norm": 0.2083561626482775, "learning_rate": 2.4837460752251e-06, "loss": 0.036, "step": 7690 }, { "epoch": 0.84, "grad_norm": 0.23903304460858993, "learning_rate": 2.4803135483113903e-06, "loss": 0.0328, "step": 7691 }, { "epoch": 0.84, "grad_norm": 0.19917820536601982, "learning_rate": 2.476883238102925e-06, "loss": 0.0344, "step": 7692 }, { "epoch": 0.84, "grad_norm": 0.40944573759486974, "learning_rate": 2.473455145033734e-06, "loss": 0.064, "step": 7693 }, { "epoch": 0.84, "grad_norm": 0.23786641578700735, "learning_rate": 2.4700292695375596e-06, "loss": 0.0293, "step": 7694 }, { "epoch": 0.85, "grad_norm": 0.23913851508051295, "learning_rate": 2.46660561204787e-06, "loss": 0.0426, "step": 7695 }, { "epoch": 0.85, "grad_norm": 0.26505084254061295, "learning_rate": 2.4631841729978435e-06, "loss": 0.0356, "step": 7696 }, { "epoch": 0.85, "grad_norm": 0.23045340988055701, "learning_rate": 2.459764952820385e-06, "loss": 0.0439, "step": 7697 }, { "epoch": 0.85, "grad_norm": 0.1685558968828825, "learning_rate": 2.456347951948115e-06, "loss": 0.0213, "step": 7698 }, { "epoch": 0.85, "grad_norm": 0.2698645811097252, "learning_rate": 2.452933170813383e-06, "loss": 0.0522, "step": 7699 }, { "epoch": 0.85, "grad_norm": 0.2933298809803834, "learning_rate": 2.449520609848237e-06, "loss": 0.0268, "step": 7700 }, { "epoch": 0.85, "grad_norm": 0.2824123787239722, "learning_rate": 2.4461102694844605e-06, "loss": 0.0286, "step": 7701 }, { "epoch": 0.85, "grad_norm": 0.2424009828112054, "learning_rate": 2.442702150153562e-06, "loss": 0.038, "step": 7702 }, { "epoch": 0.85, "grad_norm": 0.2536387868812389, "learning_rate": 2.439296252286747e-06, "loss": 0.039, "step": 7703 }, { "epoch": 0.85, "grad_norm": 0.18415975574074012, "learning_rate": 2.4358925763149557e-06, "loss": 0.0211, "step": 7704 }, { "epoch": 0.85, "grad_norm": 0.1973684439533622, "learning_rate": 2.432491122668843e-06, "loss": 0.0302, "step": 7705 }, { "epoch": 0.85, "grad_norm": 0.18034695813384868, "learning_rate": 2.4290918917787876e-06, "loss": 0.0276, "step": 7706 }, { "epoch": 0.85, "grad_norm": 0.21660120614025083, "learning_rate": 2.425694884074876e-06, "loss": 0.0351, "step": 7707 }, { "epoch": 0.85, "grad_norm": 0.29989806469988955, "learning_rate": 2.4223000999869227e-06, "loss": 0.0524, "step": 7708 }, { "epoch": 0.85, "grad_norm": 0.1984272454833669, "learning_rate": 2.418907539944457e-06, "loss": 0.0411, "step": 7709 }, { "epoch": 0.85, "grad_norm": 0.16936174787781888, "learning_rate": 2.4155172043767337e-06, "loss": 0.0202, "step": 7710 }, { "epoch": 0.85, "grad_norm": 0.1894116405411674, "learning_rate": 2.412129093712712e-06, "loss": 0.0266, "step": 7711 }, { "epoch": 0.85, "grad_norm": 0.24967479415668112, "learning_rate": 2.4087432083810792e-06, "loss": 0.0302, "step": 7712 }, { "epoch": 0.85, "grad_norm": 0.24881023929577925, "learning_rate": 2.40535954881024e-06, "loss": 0.0237, "step": 7713 }, { "epoch": 0.85, "grad_norm": 0.25225018576604774, "learning_rate": 2.401978115428325e-06, "loss": 0.0315, "step": 7714 }, { "epoch": 0.85, "grad_norm": 0.29597386572783446, "learning_rate": 2.3985989086631633e-06, "loss": 0.0382, "step": 7715 }, { "epoch": 0.85, "grad_norm": 0.23358201345092822, "learning_rate": 2.395221928942322e-06, "loss": 0.0343, "step": 7716 }, { "epoch": 0.85, "grad_norm": 0.22153920499101093, "learning_rate": 2.391847176693074e-06, "loss": 0.0414, "step": 7717 }, { "epoch": 0.85, "grad_norm": 0.2062726156522282, "learning_rate": 2.388474652342416e-06, "loss": 0.0318, "step": 7718 }, { "epoch": 0.85, "grad_norm": 0.17318005786339366, "learning_rate": 2.385104356317065e-06, "loss": 0.0381, "step": 7719 }, { "epoch": 0.85, "grad_norm": 0.18725059730666252, "learning_rate": 2.3817362890434526e-06, "loss": 0.049, "step": 7720 }, { "epoch": 0.85, "grad_norm": 0.2296196441586464, "learning_rate": 2.378370450947729e-06, "loss": 0.0385, "step": 7721 }, { "epoch": 0.85, "grad_norm": 0.25302715706385887, "learning_rate": 2.375006842455756e-06, "loss": 0.0398, "step": 7722 }, { "epoch": 0.85, "grad_norm": 0.2929474377827566, "learning_rate": 2.3716454639931243e-06, "loss": 0.0497, "step": 7723 }, { "epoch": 0.85, "grad_norm": 0.2366943988927843, "learning_rate": 2.3682863159851377e-06, "loss": 0.0252, "step": 7724 }, { "epoch": 0.85, "grad_norm": 0.24347108967905065, "learning_rate": 2.364929398856819e-06, "loss": 0.059, "step": 7725 }, { "epoch": 0.85, "grad_norm": 0.2064907467884103, "learning_rate": 2.3615747130329013e-06, "loss": 0.0377, "step": 7726 }, { "epoch": 0.85, "grad_norm": 0.20658114826669727, "learning_rate": 2.3582222589378457e-06, "loss": 0.0331, "step": 7727 }, { "epoch": 0.85, "grad_norm": 0.2014120020611004, "learning_rate": 2.3548720369958256e-06, "loss": 0.0363, "step": 7728 }, { "epoch": 0.85, "grad_norm": 0.19892929590909753, "learning_rate": 2.351524047630731e-06, "loss": 0.0216, "step": 7729 }, { "epoch": 0.85, "grad_norm": 0.19089089321140426, "learning_rate": 2.3481782912661788e-06, "loss": 0.03, "step": 7730 }, { "epoch": 0.85, "grad_norm": 0.4642982487768126, "learning_rate": 2.344834768325488e-06, "loss": 0.0587, "step": 7731 }, { "epoch": 0.85, "grad_norm": 0.18184357247062863, "learning_rate": 2.3414934792317047e-06, "loss": 0.0353, "step": 7732 }, { "epoch": 0.85, "grad_norm": 0.26270967749136565, "learning_rate": 2.338154424407593e-06, "loss": 0.0414, "step": 7733 }, { "epoch": 0.85, "grad_norm": 0.21577644765428147, "learning_rate": 2.334817604275632e-06, "loss": 0.0308, "step": 7734 }, { "epoch": 0.85, "grad_norm": 0.22764025226797893, "learning_rate": 2.3314830192580117e-06, "loss": 0.0159, "step": 7735 }, { "epoch": 0.85, "grad_norm": 0.20446738171895779, "learning_rate": 2.3281506697766522e-06, "loss": 0.0448, "step": 7736 }, { "epoch": 0.85, "grad_norm": 0.2745801619813049, "learning_rate": 2.324820556253187e-06, "loss": 0.0346, "step": 7737 }, { "epoch": 0.85, "grad_norm": 0.20171543869149186, "learning_rate": 2.3214926791089563e-06, "loss": 0.0289, "step": 7738 }, { "epoch": 0.85, "grad_norm": 0.22112635314812032, "learning_rate": 2.3181670387650268e-06, "loss": 0.0404, "step": 7739 }, { "epoch": 0.85, "grad_norm": 0.3684026011147866, "learning_rate": 2.3148436356421813e-06, "loss": 0.0612, "step": 7740 }, { "epoch": 0.85, "grad_norm": 0.17693680189015856, "learning_rate": 2.311522470160923e-06, "loss": 0.0213, "step": 7741 }, { "epoch": 0.85, "grad_norm": 0.18262174672030046, "learning_rate": 2.3082035427414585e-06, "loss": 0.033, "step": 7742 }, { "epoch": 0.85, "grad_norm": 0.26274660028151026, "learning_rate": 2.3048868538037227e-06, "loss": 0.0378, "step": 7743 }, { "epoch": 0.85, "grad_norm": 0.18303939644502495, "learning_rate": 2.301572403767369e-06, "loss": 0.0252, "step": 7744 }, { "epoch": 0.85, "grad_norm": 0.23453589321066498, "learning_rate": 2.298260193051767e-06, "loss": 0.0358, "step": 7745 }, { "epoch": 0.85, "grad_norm": 0.18955922842488446, "learning_rate": 2.2949502220759866e-06, "loss": 0.0195, "step": 7746 }, { "epoch": 0.85, "grad_norm": 0.22538228771584054, "learning_rate": 2.291642491258832e-06, "loss": 0.0322, "step": 7747 }, { "epoch": 0.85, "grad_norm": 0.23442264708348803, "learning_rate": 2.2883370010188232e-06, "loss": 0.0497, "step": 7748 }, { "epoch": 0.85, "grad_norm": 0.21797750516966352, "learning_rate": 2.2850337517741926e-06, "loss": 0.0435, "step": 7749 }, { "epoch": 0.85, "grad_norm": 0.177253575863299, "learning_rate": 2.2817327439428836e-06, "loss": 0.0234, "step": 7750 }, { "epoch": 0.85, "grad_norm": 0.24503952863087308, "learning_rate": 2.2784339779425626e-06, "loss": 0.0398, "step": 7751 }, { "epoch": 0.85, "grad_norm": 0.20090878514316018, "learning_rate": 2.2751374541906122e-06, "loss": 0.0387, "step": 7752 }, { "epoch": 0.85, "grad_norm": 0.1962073996040948, "learning_rate": 2.271843173104129e-06, "loss": 0.0342, "step": 7753 }, { "epoch": 0.85, "grad_norm": 0.21662818944534437, "learning_rate": 2.26855113509993e-06, "loss": 0.0285, "step": 7754 }, { "epoch": 0.85, "grad_norm": 0.1891642757630882, "learning_rate": 2.2652613405945423e-06, "loss": 0.034, "step": 7755 }, { "epoch": 0.85, "grad_norm": 0.2561127667351713, "learning_rate": 2.261973790004217e-06, "loss": 0.0313, "step": 7756 }, { "epoch": 0.85, "grad_norm": 0.22680476817168074, "learning_rate": 2.2586884837449108e-06, "loss": 0.0218, "step": 7757 }, { "epoch": 0.85, "grad_norm": 0.2514915970579732, "learning_rate": 2.2554054222323018e-06, "loss": 0.0403, "step": 7758 }, { "epoch": 0.85, "grad_norm": 0.2125804373725085, "learning_rate": 2.252124605881789e-06, "loss": 0.0268, "step": 7759 }, { "epoch": 0.85, "grad_norm": 0.19432001671680188, "learning_rate": 2.2488460351084827e-06, "loss": 0.0419, "step": 7760 }, { "epoch": 0.85, "grad_norm": 0.21257585475933272, "learning_rate": 2.2455697103272022e-06, "loss": 0.0361, "step": 7761 }, { "epoch": 0.85, "grad_norm": 0.19085599669106665, "learning_rate": 2.242295631952496e-06, "loss": 0.017, "step": 7762 }, { "epoch": 0.85, "grad_norm": 0.23255052932669992, "learning_rate": 2.239023800398621e-06, "loss": 0.0307, "step": 7763 }, { "epoch": 0.85, "grad_norm": 0.21448856818251472, "learning_rate": 2.235754216079551e-06, "loss": 0.0368, "step": 7764 }, { "epoch": 0.85, "grad_norm": 0.26113981296054933, "learning_rate": 2.2324868794089727e-06, "loss": 0.0277, "step": 7765 }, { "epoch": 0.85, "grad_norm": 0.276401099830377, "learning_rate": 2.229221790800291e-06, "loss": 0.0447, "step": 7766 }, { "epoch": 0.85, "grad_norm": 0.22557549225376047, "learning_rate": 2.2259589506666267e-06, "loss": 0.0416, "step": 7767 }, { "epoch": 0.85, "grad_norm": 0.21602742368128502, "learning_rate": 2.2226983594208187e-06, "loss": 0.0323, "step": 7768 }, { "epoch": 0.85, "grad_norm": 0.17996091358672361, "learning_rate": 2.219440017475418e-06, "loss": 0.0296, "step": 7769 }, { "epoch": 0.85, "grad_norm": 0.3579282456805789, "learning_rate": 2.216183925242681e-06, "loss": 0.0396, "step": 7770 }, { "epoch": 0.85, "grad_norm": 0.2148267392446688, "learning_rate": 2.2129300831346033e-06, "loss": 0.0276, "step": 7771 }, { "epoch": 0.85, "grad_norm": 0.27491615220076, "learning_rate": 2.209678491562881e-06, "loss": 0.0316, "step": 7772 }, { "epoch": 0.85, "grad_norm": 0.18473558242146437, "learning_rate": 2.206429150938918e-06, "loss": 0.0224, "step": 7773 }, { "epoch": 0.85, "grad_norm": 0.20002848489391037, "learning_rate": 2.2031820616738477e-06, "loss": 0.0238, "step": 7774 }, { "epoch": 0.85, "grad_norm": 0.19476562230691247, "learning_rate": 2.199937224178512e-06, "loss": 0.0367, "step": 7775 }, { "epoch": 0.85, "grad_norm": 0.18066858853006224, "learning_rate": 2.1966946388634746e-06, "loss": 0.0288, "step": 7776 }, { "epoch": 0.85, "grad_norm": 0.17057858644531207, "learning_rate": 2.193454306138998e-06, "loss": 0.0201, "step": 7777 }, { "epoch": 0.85, "grad_norm": 0.1674343690736097, "learning_rate": 2.190216226415074e-06, "loss": 0.0265, "step": 7778 }, { "epoch": 0.85, "grad_norm": 0.2614546197397004, "learning_rate": 2.1869804001014085e-06, "loss": 0.0451, "step": 7779 }, { "epoch": 0.85, "grad_norm": 0.22725299321460085, "learning_rate": 2.1837468276074227e-06, "loss": 0.0466, "step": 7780 }, { "epoch": 0.85, "grad_norm": 0.18497122763153595, "learning_rate": 2.1805155093422402e-06, "loss": 0.022, "step": 7781 }, { "epoch": 0.85, "grad_norm": 0.21714030377354016, "learning_rate": 2.1772864457147126e-06, "loss": 0.0359, "step": 7782 }, { "epoch": 0.85, "grad_norm": 0.2257479131566246, "learning_rate": 2.174059637133403e-06, "loss": 0.0326, "step": 7783 }, { "epoch": 0.85, "grad_norm": 0.24847196963640764, "learning_rate": 2.1708350840065927e-06, "loss": 0.0444, "step": 7784 }, { "epoch": 0.85, "grad_norm": 0.22189190659113614, "learning_rate": 2.1676127867422637e-06, "loss": 0.035, "step": 7785 }, { "epoch": 0.86, "grad_norm": 0.22376283255865045, "learning_rate": 2.164392745748125e-06, "loss": 0.0285, "step": 7786 }, { "epoch": 0.86, "grad_norm": 0.18958655309213665, "learning_rate": 2.1611749614316004e-06, "loss": 0.0294, "step": 7787 }, { "epoch": 0.86, "grad_norm": 0.22695593544931894, "learning_rate": 2.1579594341998235e-06, "loss": 0.0459, "step": 7788 }, { "epoch": 0.86, "grad_norm": 0.19187613634462636, "learning_rate": 2.1547461644596446e-06, "loss": 0.0235, "step": 7789 }, { "epoch": 0.86, "grad_norm": 0.21911279362215938, "learning_rate": 2.151535152617625e-06, "loss": 0.0344, "step": 7790 }, { "epoch": 0.86, "grad_norm": 0.20046058385425244, "learning_rate": 2.148326399080052e-06, "loss": 0.0272, "step": 7791 }, { "epoch": 0.86, "grad_norm": 0.19551664174328526, "learning_rate": 2.1451199042529035e-06, "loss": 0.0241, "step": 7792 }, { "epoch": 0.86, "grad_norm": 0.16344634794351515, "learning_rate": 2.1419156685418964e-06, "loss": 0.0151, "step": 7793 }, { "epoch": 0.86, "grad_norm": 0.20937803216418222, "learning_rate": 2.1387136923524475e-06, "loss": 0.0305, "step": 7794 }, { "epoch": 0.86, "grad_norm": 0.25172641815875657, "learning_rate": 2.1355139760896957e-06, "loss": 0.0356, "step": 7795 }, { "epoch": 0.86, "grad_norm": 0.2562171094999848, "learning_rate": 2.1323165201584863e-06, "loss": 0.0313, "step": 7796 }, { "epoch": 0.86, "grad_norm": 0.17292346644149642, "learning_rate": 2.1291213249633813e-06, "loss": 0.0308, "step": 7797 }, { "epoch": 0.86, "grad_norm": 0.2359648742332226, "learning_rate": 2.125928390908658e-06, "loss": 0.0274, "step": 7798 }, { "epoch": 0.86, "grad_norm": 0.18758848208583861, "learning_rate": 2.1227377183983154e-06, "loss": 0.0217, "step": 7799 }, { "epoch": 0.86, "grad_norm": 0.2193224008515313, "learning_rate": 2.1195493078360486e-06, "loss": 0.023, "step": 7800 }, { "epoch": 0.86, "grad_norm": 0.20483069252397137, "learning_rate": 2.1163631596252785e-06, "loss": 0.0278, "step": 7801 }, { "epoch": 0.86, "grad_norm": 0.22196979277509635, "learning_rate": 2.113179274169137e-06, "loss": 0.0264, "step": 7802 }, { "epoch": 0.86, "grad_norm": 0.24198387414970693, "learning_rate": 2.109997651870477e-06, "loss": 0.0334, "step": 7803 }, { "epoch": 0.86, "grad_norm": 0.22899970109734746, "learning_rate": 2.1068182931318424e-06, "loss": 0.0223, "step": 7804 }, { "epoch": 0.86, "grad_norm": 0.19346933417215884, "learning_rate": 2.1036411983555237e-06, "loss": 0.0257, "step": 7805 }, { "epoch": 0.86, "grad_norm": 0.19151442254277118, "learning_rate": 2.1004663679434987e-06, "loss": 0.0165, "step": 7806 }, { "epoch": 0.86, "grad_norm": 0.23501312488062776, "learning_rate": 2.0972938022974733e-06, "loss": 0.0457, "step": 7807 }, { "epoch": 0.86, "grad_norm": 0.21188176695670952, "learning_rate": 2.0941235018188543e-06, "loss": 0.033, "step": 7808 }, { "epoch": 0.86, "grad_norm": 0.22720612164963516, "learning_rate": 2.0909554669087706e-06, "loss": 0.028, "step": 7809 }, { "epoch": 0.86, "grad_norm": 0.22607865957425907, "learning_rate": 2.0877896979680654e-06, "loss": 0.0284, "step": 7810 }, { "epoch": 0.86, "grad_norm": 0.22876386119714157, "learning_rate": 2.0846261953972967e-06, "loss": 0.0373, "step": 7811 }, { "epoch": 0.86, "grad_norm": 0.24453470849220896, "learning_rate": 2.0814649595967194e-06, "loss": 0.0383, "step": 7812 }, { "epoch": 0.86, "grad_norm": 0.19415344602744983, "learning_rate": 2.0783059909663206e-06, "loss": 0.0224, "step": 7813 }, { "epoch": 0.86, "grad_norm": 0.19452367752534103, "learning_rate": 2.0751492899057957e-06, "loss": 0.0277, "step": 7814 }, { "epoch": 0.86, "grad_norm": 0.1519874048669296, "learning_rate": 2.07199485681455e-06, "loss": 0.0143, "step": 7815 }, { "epoch": 0.86, "grad_norm": 0.21751336960972478, "learning_rate": 2.0688426920916992e-06, "loss": 0.0402, "step": 7816 }, { "epoch": 0.86, "grad_norm": 0.2679816217917729, "learning_rate": 2.0656927961360785e-06, "loss": 0.0261, "step": 7817 }, { "epoch": 0.86, "grad_norm": 0.19735513676935454, "learning_rate": 2.062545169346235e-06, "loss": 0.0191, "step": 7818 }, { "epoch": 0.86, "grad_norm": 0.2534276412852567, "learning_rate": 2.0593998121204264e-06, "loss": 0.0479, "step": 7819 }, { "epoch": 0.86, "grad_norm": 0.2764394281549932, "learning_rate": 2.05625672485662e-06, "loss": 0.0311, "step": 7820 }, { "epoch": 0.86, "grad_norm": 0.18219273243975537, "learning_rate": 2.053115907952499e-06, "loss": 0.0218, "step": 7821 }, { "epoch": 0.86, "grad_norm": 0.19931312270499343, "learning_rate": 2.049977361805471e-06, "loss": 0.0303, "step": 7822 }, { "epoch": 0.86, "grad_norm": 0.2082707082480229, "learning_rate": 2.0468410868126343e-06, "loss": 0.0319, "step": 7823 }, { "epoch": 0.86, "grad_norm": 0.3296984684281185, "learning_rate": 2.043707083370814e-06, "loss": 0.0368, "step": 7824 }, { "epoch": 0.86, "grad_norm": 0.21641233208437777, "learning_rate": 2.0405753518765457e-06, "loss": 0.0307, "step": 7825 }, { "epoch": 0.86, "grad_norm": 0.19958385035587894, "learning_rate": 2.03744589272608e-06, "loss": 0.038, "step": 7826 }, { "epoch": 0.86, "grad_norm": 0.26299869645633306, "learning_rate": 2.034318706315368e-06, "loss": 0.033, "step": 7827 }, { "epoch": 0.86, "grad_norm": 0.38151802428627263, "learning_rate": 2.031193793040087e-06, "loss": 0.0454, "step": 7828 }, { "epoch": 0.86, "grad_norm": 0.21418370379658888, "learning_rate": 2.028071153295621e-06, "loss": 0.0326, "step": 7829 }, { "epoch": 0.86, "grad_norm": 0.13953515106078992, "learning_rate": 2.0249507874770714e-06, "loss": 0.0248, "step": 7830 }, { "epoch": 0.86, "grad_norm": 0.18903826487297606, "learning_rate": 2.0218326959792357e-06, "loss": 0.035, "step": 7831 }, { "epoch": 0.86, "grad_norm": 0.23306386102456478, "learning_rate": 2.018716879196645e-06, "loss": 0.0271, "step": 7832 }, { "epoch": 0.86, "grad_norm": 0.19999979287375216, "learning_rate": 2.0156033375235286e-06, "loss": 0.0357, "step": 7833 }, { "epoch": 0.86, "grad_norm": 0.3290704774749966, "learning_rate": 2.0124920713538378e-06, "loss": 0.0413, "step": 7834 }, { "epoch": 0.86, "grad_norm": 0.20094415127141455, "learning_rate": 2.009383081081222e-06, "loss": 0.0376, "step": 7835 }, { "epoch": 0.86, "grad_norm": 0.21115602547415008, "learning_rate": 2.006276367099054e-06, "loss": 0.0299, "step": 7836 }, { "epoch": 0.86, "grad_norm": 0.17481565174688957, "learning_rate": 2.003171929800416e-06, "loss": 0.0218, "step": 7837 }, { "epoch": 0.86, "grad_norm": 0.1567136323270094, "learning_rate": 2.000069769578108e-06, "loss": 0.0243, "step": 7838 }, { "epoch": 0.86, "grad_norm": 0.24722588959588923, "learning_rate": 1.99696988682462e-06, "loss": 0.0254, "step": 7839 }, { "epoch": 0.86, "grad_norm": 0.2751564220404101, "learning_rate": 1.9938722819321854e-06, "loss": 0.0364, "step": 7840 }, { "epoch": 0.86, "grad_norm": 0.1993064500339682, "learning_rate": 1.9907769552927304e-06, "loss": 0.0363, "step": 7841 }, { "epoch": 0.86, "grad_norm": 0.20347238143459712, "learning_rate": 1.987683907297888e-06, "loss": 0.0216, "step": 7842 }, { "epoch": 0.86, "grad_norm": 0.17353879766985414, "learning_rate": 1.984593138339015e-06, "loss": 0.0272, "step": 7843 }, { "epoch": 0.86, "grad_norm": 0.21003687172999896, "learning_rate": 1.9815046488071774e-06, "loss": 0.0271, "step": 7844 }, { "epoch": 0.86, "grad_norm": 0.22389184534093173, "learning_rate": 1.9784184390931504e-06, "loss": 0.0301, "step": 7845 }, { "epoch": 0.86, "grad_norm": 0.25424214666328915, "learning_rate": 1.9753345095874234e-06, "loss": 0.0344, "step": 7846 }, { "epoch": 0.86, "grad_norm": 0.24692807230697378, "learning_rate": 1.9722528606801906e-06, "loss": 0.0287, "step": 7847 }, { "epoch": 0.86, "grad_norm": 0.277819279544011, "learning_rate": 1.9691734927613625e-06, "loss": 0.0405, "step": 7848 }, { "epoch": 0.86, "grad_norm": 0.23186107103971815, "learning_rate": 1.9660964062205657e-06, "loss": 0.029, "step": 7849 }, { "epoch": 0.86, "grad_norm": 0.18243360216237806, "learning_rate": 1.9630216014471326e-06, "loss": 0.0223, "step": 7850 }, { "epoch": 0.86, "grad_norm": 0.19514721552489403, "learning_rate": 1.9599490788301033e-06, "loss": 0.0169, "step": 7851 }, { "epoch": 0.86, "grad_norm": 0.23507576924465626, "learning_rate": 1.9568788387582338e-06, "loss": 0.0485, "step": 7852 }, { "epoch": 0.86, "grad_norm": 0.21133806025477678, "learning_rate": 1.9538108816199953e-06, "loss": 0.015, "step": 7853 }, { "epoch": 0.86, "grad_norm": 0.1865882960493375, "learning_rate": 1.950745207803566e-06, "loss": 0.0202, "step": 7854 }, { "epoch": 0.86, "grad_norm": 0.2547915211149743, "learning_rate": 1.947681817696827e-06, "loss": 0.0315, "step": 7855 }, { "epoch": 0.86, "grad_norm": 0.22103638090027158, "learning_rate": 1.9446207116873815e-06, "loss": 0.0428, "step": 7856 }, { "epoch": 0.86, "grad_norm": 0.15675748428169678, "learning_rate": 1.941561890162551e-06, "loss": 0.0254, "step": 7857 }, { "epoch": 0.86, "grad_norm": 0.16412827898789895, "learning_rate": 1.9385053535093455e-06, "loss": 0.0277, "step": 7858 }, { "epoch": 0.86, "grad_norm": 0.18364920691667633, "learning_rate": 1.935451102114503e-06, "loss": 0.0254, "step": 7859 }, { "epoch": 0.86, "grad_norm": 0.3230474755720947, "learning_rate": 1.9323991363644645e-06, "loss": 0.0375, "step": 7860 }, { "epoch": 0.86, "grad_norm": 0.24119455073366722, "learning_rate": 1.929349456645393e-06, "loss": 0.0211, "step": 7861 }, { "epoch": 0.86, "grad_norm": 0.252791215851772, "learning_rate": 1.9263020633431416e-06, "loss": 0.0279, "step": 7862 }, { "epoch": 0.86, "grad_norm": 0.30249848558503956, "learning_rate": 1.9232569568432933e-06, "loss": 0.046, "step": 7863 }, { "epoch": 0.86, "grad_norm": 0.24358671294417142, "learning_rate": 1.9202141375311335e-06, "loss": 0.0486, "step": 7864 }, { "epoch": 0.86, "grad_norm": 0.23051718833226184, "learning_rate": 1.9171736057916623e-06, "loss": 0.0396, "step": 7865 }, { "epoch": 0.86, "grad_norm": 0.2625220198705256, "learning_rate": 1.9141353620095835e-06, "loss": 0.037, "step": 7866 }, { "epoch": 0.86, "grad_norm": 0.1665907267079886, "learning_rate": 1.9110994065693166e-06, "loss": 0.0216, "step": 7867 }, { "epoch": 0.86, "grad_norm": 0.19124906274777803, "learning_rate": 1.9080657398549916e-06, "loss": 0.0271, "step": 7868 }, { "epoch": 0.86, "grad_norm": 0.1824135894854214, "learning_rate": 1.905034362250453e-06, "loss": 0.0246, "step": 7869 }, { "epoch": 0.86, "grad_norm": 0.21556965698213304, "learning_rate": 1.902005274139238e-06, "loss": 0.0262, "step": 7870 }, { "epoch": 0.86, "grad_norm": 0.20217440157387007, "learning_rate": 1.8989784759046158e-06, "loss": 0.0197, "step": 7871 }, { "epoch": 0.86, "grad_norm": 0.2331335567816457, "learning_rate": 1.8959539679295536e-06, "loss": 0.0381, "step": 7872 }, { "epoch": 0.86, "grad_norm": 0.1879255512157403, "learning_rate": 1.8929317505967338e-06, "loss": 0.0359, "step": 7873 }, { "epoch": 0.86, "grad_norm": 0.1922801174512988, "learning_rate": 1.8899118242885462e-06, "loss": 0.0378, "step": 7874 }, { "epoch": 0.86, "grad_norm": 0.1926312560941338, "learning_rate": 1.8868941893870918e-06, "loss": 0.0325, "step": 7875 }, { "epoch": 0.86, "grad_norm": 0.211330242463678, "learning_rate": 1.8838788462741852e-06, "loss": 0.0262, "step": 7876 }, { "epoch": 0.87, "grad_norm": 0.23234168039566105, "learning_rate": 1.8808657953313391e-06, "loss": 0.0299, "step": 7877 }, { "epoch": 0.87, "grad_norm": 0.1899279513656114, "learning_rate": 1.8778550369397886e-06, "loss": 0.0263, "step": 7878 }, { "epoch": 0.87, "grad_norm": 0.26021272143768537, "learning_rate": 1.8748465714804753e-06, "loss": 0.0249, "step": 7879 }, { "epoch": 0.87, "grad_norm": 0.2777372443242748, "learning_rate": 1.8718403993340528e-06, "loss": 0.0363, "step": 7880 }, { "epoch": 0.87, "grad_norm": 0.14700750400405224, "learning_rate": 1.8688365208808723e-06, "loss": 0.0191, "step": 7881 }, { "epoch": 0.87, "grad_norm": 0.2446700269055719, "learning_rate": 1.865834936501012e-06, "loss": 0.0411, "step": 7882 }, { "epoch": 0.87, "grad_norm": 0.18618530061747474, "learning_rate": 1.8628356465742503e-06, "loss": 0.0226, "step": 7883 }, { "epoch": 0.87, "grad_norm": 0.20829674975736465, "learning_rate": 1.8598386514800793e-06, "loss": 0.0195, "step": 7884 }, { "epoch": 0.87, "grad_norm": 0.252207354245023, "learning_rate": 1.8568439515976932e-06, "loss": 0.0319, "step": 7885 }, { "epoch": 0.87, "grad_norm": 0.28411563007128116, "learning_rate": 1.8538515473060026e-06, "loss": 0.0537, "step": 7886 }, { "epoch": 0.87, "grad_norm": 0.20247397264355838, "learning_rate": 1.8508614389836289e-06, "loss": 0.0268, "step": 7887 }, { "epoch": 0.87, "grad_norm": 0.2333770486775457, "learning_rate": 1.847873627008896e-06, "loss": 0.0442, "step": 7888 }, { "epoch": 0.87, "grad_norm": 0.19974457777228918, "learning_rate": 1.8448881117598484e-06, "loss": 0.039, "step": 7889 }, { "epoch": 0.87, "grad_norm": 0.21231059581182296, "learning_rate": 1.8419048936142191e-06, "loss": 0.0347, "step": 7890 }, { "epoch": 0.87, "grad_norm": 0.19353614779926795, "learning_rate": 1.8389239729494778e-06, "loss": 0.0323, "step": 7891 }, { "epoch": 0.87, "grad_norm": 0.18320920930043838, "learning_rate": 1.8359453501427916e-06, "loss": 0.0268, "step": 7892 }, { "epoch": 0.87, "grad_norm": 0.2114590096736922, "learning_rate": 1.832969025571023e-06, "loss": 0.0371, "step": 7893 }, { "epoch": 0.87, "grad_norm": 0.21235114171044794, "learning_rate": 1.8299949996107646e-06, "loss": 0.0244, "step": 7894 }, { "epoch": 0.87, "grad_norm": 0.22112650477003562, "learning_rate": 1.8270232726383064e-06, "loss": 0.0246, "step": 7895 }, { "epoch": 0.87, "grad_norm": 0.18684562099723942, "learning_rate": 1.8240538450296563e-06, "loss": 0.042, "step": 7896 }, { "epoch": 0.87, "grad_norm": 0.20909525701606596, "learning_rate": 1.8210867171605185e-06, "loss": 0.0407, "step": 7897 }, { "epoch": 0.87, "grad_norm": 0.20344163900697407, "learning_rate": 1.8181218894063146e-06, "loss": 0.0454, "step": 7898 }, { "epoch": 0.87, "grad_norm": 0.15661957068938992, "learning_rate": 1.815159362142176e-06, "loss": 0.0204, "step": 7899 }, { "epoch": 0.87, "grad_norm": 0.25707855658882445, "learning_rate": 1.8121991357429425e-06, "loss": 0.0208, "step": 7900 }, { "epoch": 0.87, "grad_norm": 0.23156069844944088, "learning_rate": 1.809241210583159e-06, "loss": 0.038, "step": 7901 }, { "epoch": 0.87, "grad_norm": 0.2157503420262372, "learning_rate": 1.8062855870370798e-06, "loss": 0.0392, "step": 7902 }, { "epoch": 0.87, "grad_norm": 0.17850745451612188, "learning_rate": 1.8033322654786723e-06, "loss": 0.0276, "step": 7903 }, { "epoch": 0.87, "grad_norm": 0.1806825782979088, "learning_rate": 1.8003812462816127e-06, "loss": 0.0304, "step": 7904 }, { "epoch": 0.87, "grad_norm": 0.2163544513668072, "learning_rate": 1.7974325298192762e-06, "loss": 0.0321, "step": 7905 }, { "epoch": 0.87, "grad_norm": 0.24308614746457385, "learning_rate": 1.7944861164647576e-06, "loss": 0.0525, "step": 7906 }, { "epoch": 0.87, "grad_norm": 0.17603746391298183, "learning_rate": 1.7915420065908585e-06, "loss": 0.0211, "step": 7907 }, { "epoch": 0.87, "grad_norm": 0.1757718083707014, "learning_rate": 1.788600200570083e-06, "loss": 0.0354, "step": 7908 }, { "epoch": 0.87, "grad_norm": 0.24477590686036726, "learning_rate": 1.7856606987746494e-06, "loss": 0.0386, "step": 7909 }, { "epoch": 0.87, "grad_norm": 0.18863884457035762, "learning_rate": 1.782723501576482e-06, "loss": 0.0366, "step": 7910 }, { "epoch": 0.87, "grad_norm": 0.23034008119771987, "learning_rate": 1.7797886093472195e-06, "loss": 0.0425, "step": 7911 }, { "epoch": 0.87, "grad_norm": 0.18517581578787393, "learning_rate": 1.7768560224581955e-06, "loss": 0.0283, "step": 7912 }, { "epoch": 0.87, "grad_norm": 0.1917498676630332, "learning_rate": 1.7739257412804623e-06, "loss": 0.0175, "step": 7913 }, { "epoch": 0.87, "grad_norm": 0.27134389467377207, "learning_rate": 1.770997766184781e-06, "loss": 0.0425, "step": 7914 }, { "epoch": 0.87, "grad_norm": 0.155096409005671, "learning_rate": 1.76807209754162e-06, "loss": 0.0127, "step": 7915 }, { "epoch": 0.87, "grad_norm": 0.1813598534216297, "learning_rate": 1.7651487357211472e-06, "loss": 0.0159, "step": 7916 }, { "epoch": 0.87, "grad_norm": 0.1770506061904471, "learning_rate": 1.7622276810932492e-06, "loss": 0.0225, "step": 7917 }, { "epoch": 0.87, "grad_norm": 0.3575194387219582, "learning_rate": 1.7593089340275149e-06, "loss": 0.0457, "step": 7918 }, { "epoch": 0.87, "grad_norm": 0.21684770589623867, "learning_rate": 1.7563924948932487e-06, "loss": 0.0296, "step": 7919 }, { "epoch": 0.87, "grad_norm": 0.3036167697945338, "learning_rate": 1.7534783640594533e-06, "loss": 0.0238, "step": 7920 }, { "epoch": 0.87, "grad_norm": 0.18773872357752094, "learning_rate": 1.7505665418948404e-06, "loss": 0.0275, "step": 7921 }, { "epoch": 0.87, "grad_norm": 0.14997211038076155, "learning_rate": 1.7476570287678396e-06, "loss": 0.0203, "step": 7922 }, { "epoch": 0.87, "grad_norm": 0.1479350294101062, "learning_rate": 1.7447498250465788e-06, "loss": 0.0204, "step": 7923 }, { "epoch": 0.87, "grad_norm": 0.1951493535529808, "learning_rate": 1.741844931098895e-06, "loss": 0.0235, "step": 7924 }, { "epoch": 0.87, "grad_norm": 0.1793195440052939, "learning_rate": 1.7389423472923318e-06, "loss": 0.027, "step": 7925 }, { "epoch": 0.87, "grad_norm": 0.21369000938495214, "learning_rate": 1.7360420739941486e-06, "loss": 0.0364, "step": 7926 }, { "epoch": 0.87, "grad_norm": 0.2002626259379726, "learning_rate": 1.73314411157131e-06, "loss": 0.0292, "step": 7927 }, { "epoch": 0.87, "grad_norm": 0.19496304721260208, "learning_rate": 1.7302484603904756e-06, "loss": 0.0408, "step": 7928 }, { "epoch": 0.87, "grad_norm": 0.1570431486527773, "learning_rate": 1.727355120818026e-06, "loss": 0.0215, "step": 7929 }, { "epoch": 0.87, "grad_norm": 0.21995434292700441, "learning_rate": 1.7244640932200484e-06, "loss": 0.0341, "step": 7930 }, { "epoch": 0.87, "grad_norm": 0.2197952078508393, "learning_rate": 1.7215753779623346e-06, "loss": 0.049, "step": 7931 }, { "epoch": 0.87, "grad_norm": 0.2148772473664342, "learning_rate": 1.7186889754103763e-06, "loss": 0.0224, "step": 7932 }, { "epoch": 0.87, "grad_norm": 0.22067599847229957, "learning_rate": 1.7158048859293863e-06, "loss": 0.0275, "step": 7933 }, { "epoch": 0.87, "grad_norm": 0.20085702720406495, "learning_rate": 1.7129231098842791e-06, "loss": 0.0391, "step": 7934 }, { "epoch": 0.87, "grad_norm": 0.213639107236686, "learning_rate": 1.7100436476396742e-06, "loss": 0.0412, "step": 7935 }, { "epoch": 0.87, "grad_norm": 0.22289524302226313, "learning_rate": 1.707166499559898e-06, "loss": 0.0424, "step": 7936 }, { "epoch": 0.87, "grad_norm": 0.2126394767789094, "learning_rate": 1.704291666008988e-06, "loss": 0.028, "step": 7937 }, { "epoch": 0.87, "grad_norm": 0.19241805165341572, "learning_rate": 1.701419147350687e-06, "loss": 0.0291, "step": 7938 }, { "epoch": 0.87, "grad_norm": 0.18691018836970197, "learning_rate": 1.698548943948448e-06, "loss": 0.0242, "step": 7939 }, { "epoch": 0.87, "grad_norm": 0.2730643041720939, "learning_rate": 1.6956810561654213e-06, "loss": 0.0353, "step": 7940 }, { "epoch": 0.87, "grad_norm": 0.16982099673706363, "learning_rate": 1.6928154843644717e-06, "loss": 0.0179, "step": 7941 }, { "epoch": 0.87, "grad_norm": 0.154833933368002, "learning_rate": 1.6899522289081737e-06, "loss": 0.0234, "step": 7942 }, { "epoch": 0.87, "grad_norm": 0.24796870644466257, "learning_rate": 1.6870912901588044e-06, "loss": 0.0327, "step": 7943 }, { "epoch": 0.87, "grad_norm": 0.20477279458416758, "learning_rate": 1.6842326684783473e-06, "loss": 0.0176, "step": 7944 }, { "epoch": 0.87, "grad_norm": 0.15502041167982097, "learning_rate": 1.6813763642284953e-06, "loss": 0.0278, "step": 7945 }, { "epoch": 0.87, "grad_norm": 0.21465272213631295, "learning_rate": 1.6785223777706482e-06, "loss": 0.0294, "step": 7946 }, { "epoch": 0.87, "grad_norm": 0.4015118229278483, "learning_rate": 1.6756707094659063e-06, "loss": 0.0559, "step": 7947 }, { "epoch": 0.87, "grad_norm": 0.19306698279519022, "learning_rate": 1.6728213596750831e-06, "loss": 0.0243, "step": 7948 }, { "epoch": 0.87, "grad_norm": 0.22122094502379186, "learning_rate": 1.6699743287586967e-06, "loss": 0.0334, "step": 7949 }, { "epoch": 0.87, "grad_norm": 0.22587818355090103, "learning_rate": 1.667129617076977e-06, "loss": 0.0463, "step": 7950 }, { "epoch": 0.87, "grad_norm": 0.24701797172188855, "learning_rate": 1.664287224989849e-06, "loss": 0.0347, "step": 7951 }, { "epoch": 0.87, "grad_norm": 0.2561457974538717, "learning_rate": 1.661447152856952e-06, "loss": 0.037, "step": 7952 }, { "epoch": 0.87, "grad_norm": 0.16796007799340026, "learning_rate": 1.658609401037632e-06, "loss": 0.0266, "step": 7953 }, { "epoch": 0.87, "grad_norm": 0.23761036498249274, "learning_rate": 1.6557739698909436e-06, "loss": 0.0325, "step": 7954 }, { "epoch": 0.87, "grad_norm": 0.2519971823799712, "learning_rate": 1.6529408597756359e-06, "loss": 0.0411, "step": 7955 }, { "epoch": 0.87, "grad_norm": 0.23477529674036418, "learning_rate": 1.650110071050175e-06, "loss": 0.0226, "step": 7956 }, { "epoch": 0.87, "grad_norm": 0.26944641492036814, "learning_rate": 1.6472816040727346e-06, "loss": 0.0243, "step": 7957 }, { "epoch": 0.87, "grad_norm": 0.2194209537846694, "learning_rate": 1.6444554592011909e-06, "loss": 0.023, "step": 7958 }, { "epoch": 0.87, "grad_norm": 0.18514601924654112, "learning_rate": 1.6416316367931173e-06, "loss": 0.0245, "step": 7959 }, { "epoch": 0.87, "grad_norm": 0.17140778523490457, "learning_rate": 1.638810137205813e-06, "loss": 0.0268, "step": 7960 }, { "epoch": 0.87, "grad_norm": 0.2409443405184701, "learning_rate": 1.6359909607962676e-06, "loss": 0.029, "step": 7961 }, { "epoch": 0.87, "grad_norm": 0.18308959892165264, "learning_rate": 1.6331741079211872e-06, "loss": 0.0284, "step": 7962 }, { "epoch": 0.87, "grad_norm": 0.2210233610005486, "learning_rate": 1.630359578936971e-06, "loss": 0.0332, "step": 7963 }, { "epoch": 0.87, "grad_norm": 0.15409226069159063, "learning_rate": 1.627547374199734e-06, "loss": 0.0222, "step": 7964 }, { "epoch": 0.87, "grad_norm": 0.2537657862718482, "learning_rate": 1.624737494065294e-06, "loss": 0.0256, "step": 7965 }, { "epoch": 0.87, "grad_norm": 0.2758209032507009, "learning_rate": 1.6219299388891797e-06, "loss": 0.0256, "step": 7966 }, { "epoch": 0.87, "grad_norm": 0.22660797583532027, "learning_rate": 1.619124709026616e-06, "loss": 0.0342, "step": 7967 }, { "epoch": 0.88, "grad_norm": 0.1772977055000426, "learning_rate": 1.6163218048325413e-06, "loss": 0.0264, "step": 7968 }, { "epoch": 0.88, "grad_norm": 0.42842704392910747, "learning_rate": 1.6135212266615963e-06, "loss": 0.0532, "step": 7969 }, { "epoch": 0.88, "grad_norm": 0.265535171129659, "learning_rate": 1.610722974868133e-06, "loss": 0.0358, "step": 7970 }, { "epoch": 0.88, "grad_norm": 0.36208694783606293, "learning_rate": 1.6079270498061995e-06, "loss": 0.0381, "step": 7971 }, { "epoch": 0.88, "grad_norm": 0.19888617299617603, "learning_rate": 1.6051334518295546e-06, "loss": 0.0312, "step": 7972 }, { "epoch": 0.88, "grad_norm": 0.23326172276177234, "learning_rate": 1.6023421812916628e-06, "loss": 0.028, "step": 7973 }, { "epoch": 0.88, "grad_norm": 0.29779875240237524, "learning_rate": 1.5995532385456992e-06, "loss": 0.0368, "step": 7974 }, { "epoch": 0.88, "grad_norm": 0.2704987231769148, "learning_rate": 1.59676662394453e-06, "loss": 0.0394, "step": 7975 }, { "epoch": 0.88, "grad_norm": 0.19269490947020068, "learning_rate": 1.5939823378407426e-06, "loss": 0.0414, "step": 7976 }, { "epoch": 0.88, "grad_norm": 0.2513750527076912, "learning_rate": 1.5912003805866216e-06, "loss": 0.0279, "step": 7977 }, { "epoch": 0.88, "grad_norm": 0.2128318215448173, "learning_rate": 1.5884207525341566e-06, "loss": 0.0147, "step": 7978 }, { "epoch": 0.88, "grad_norm": 0.26181646202624653, "learning_rate": 1.5856434540350462e-06, "loss": 0.0401, "step": 7979 }, { "epoch": 0.88, "grad_norm": 0.21412469872165646, "learning_rate": 1.5828684854406918e-06, "loss": 0.0262, "step": 7980 }, { "epoch": 0.88, "grad_norm": 0.25767178018471165, "learning_rate": 1.5800958471022032e-06, "loss": 0.0421, "step": 7981 }, { "epoch": 0.88, "grad_norm": 0.19291822606033907, "learning_rate": 1.577325539370389e-06, "loss": 0.0266, "step": 7982 }, { "epoch": 0.88, "grad_norm": 0.18335442466863613, "learning_rate": 1.5745575625957665e-06, "loss": 0.0366, "step": 7983 }, { "epoch": 0.88, "grad_norm": 0.19633944419961502, "learning_rate": 1.57179191712856e-06, "loss": 0.0376, "step": 7984 }, { "epoch": 0.88, "grad_norm": 0.17817342417852305, "learning_rate": 1.5690286033186985e-06, "loss": 0.0355, "step": 7985 }, { "epoch": 0.88, "grad_norm": 0.30630844492739323, "learning_rate": 1.5662676215158112e-06, "loss": 0.0446, "step": 7986 }, { "epoch": 0.88, "grad_norm": 0.22457843235668706, "learning_rate": 1.5635089720692366e-06, "loss": 0.0276, "step": 7987 }, { "epoch": 0.88, "grad_norm": 0.23044246588359513, "learning_rate": 1.5607526553280172e-06, "loss": 0.0273, "step": 7988 }, { "epoch": 0.88, "grad_norm": 0.2203293479402333, "learning_rate": 1.5579986716409036e-06, "loss": 0.0296, "step": 7989 }, { "epoch": 0.88, "grad_norm": 0.19257002885060853, "learning_rate": 1.5552470213563408e-06, "loss": 0.032, "step": 7990 }, { "epoch": 0.88, "grad_norm": 0.2546424316830903, "learning_rate": 1.5524977048224888e-06, "loss": 0.0315, "step": 7991 }, { "epoch": 0.88, "grad_norm": 0.1907247165269804, "learning_rate": 1.549750722387211e-06, "loss": 0.037, "step": 7992 }, { "epoch": 0.88, "grad_norm": 0.483785917097084, "learning_rate": 1.5470060743980765e-06, "loss": 0.0817, "step": 7993 }, { "epoch": 0.88, "grad_norm": 0.18970231254472153, "learning_rate": 1.5442637612023425e-06, "loss": 0.0295, "step": 7994 }, { "epoch": 0.88, "grad_norm": 0.20054782442195834, "learning_rate": 1.5415237831470008e-06, "loss": 0.0219, "step": 7995 }, { "epoch": 0.88, "grad_norm": 0.18206602959464085, "learning_rate": 1.5387861405787252e-06, "loss": 0.0295, "step": 7996 }, { "epoch": 0.88, "grad_norm": 0.20744311552615868, "learning_rate": 1.5360508338438962e-06, "loss": 0.0236, "step": 7997 }, { "epoch": 0.88, "grad_norm": 0.24357862255503948, "learning_rate": 1.5333178632886058e-06, "loss": 0.0562, "step": 7998 }, { "epoch": 0.88, "grad_norm": 0.19426827714549116, "learning_rate": 1.5305872292586489e-06, "loss": 0.0287, "step": 7999 }, { "epoch": 0.88, "grad_norm": 0.14979020935281698, "learning_rate": 1.5278589320995218e-06, "loss": 0.0165, "step": 8000 }, { "epoch": 0.88, "grad_norm": 0.22219107371486266, "learning_rate": 1.5251329721564245e-06, "loss": 0.0269, "step": 8001 }, { "epoch": 0.88, "grad_norm": 0.20202302036797745, "learning_rate": 1.5224093497742654e-06, "loss": 0.0344, "step": 8002 }, { "epoch": 0.88, "grad_norm": 0.17795456880865826, "learning_rate": 1.5196880652976553e-06, "loss": 0.026, "step": 8003 }, { "epoch": 0.88, "grad_norm": 0.21498140780047229, "learning_rate": 1.5169691190709057e-06, "loss": 0.0522, "step": 8004 }, { "epoch": 0.88, "grad_norm": 0.19789309842340624, "learning_rate": 1.5142525114380436e-06, "loss": 0.0397, "step": 8005 }, { "epoch": 0.88, "grad_norm": 0.28882247393040084, "learning_rate": 1.5115382427427827e-06, "loss": 0.0394, "step": 8006 }, { "epoch": 0.88, "grad_norm": 0.1445606498199923, "learning_rate": 1.508826313328553e-06, "loss": 0.0189, "step": 8007 }, { "epoch": 0.88, "grad_norm": 0.18835116944067726, "learning_rate": 1.5061167235384867e-06, "loss": 0.043, "step": 8008 }, { "epoch": 0.88, "grad_norm": 0.17654901891909514, "learning_rate": 1.5034094737154204e-06, "loss": 0.034, "step": 8009 }, { "epoch": 0.88, "grad_norm": 0.2584321637969228, "learning_rate": 1.5007045642018868e-06, "loss": 0.029, "step": 8010 }, { "epoch": 0.88, "grad_norm": 0.17510615092584242, "learning_rate": 1.4980019953401327e-06, "loss": 0.0268, "step": 8011 }, { "epoch": 0.88, "grad_norm": 0.19896464878730663, "learning_rate": 1.4953017674721083e-06, "loss": 0.027, "step": 8012 }, { "epoch": 0.88, "grad_norm": 0.23013502948979184, "learning_rate": 1.4926038809394606e-06, "loss": 0.0337, "step": 8013 }, { "epoch": 0.88, "grad_norm": 0.23607700183724187, "learning_rate": 1.4899083360835408e-06, "loss": 0.0458, "step": 8014 }, { "epoch": 0.88, "grad_norm": 0.2883132942605547, "learning_rate": 1.4872151332454122e-06, "loss": 0.0408, "step": 8015 }, { "epoch": 0.88, "grad_norm": 0.2031435041068808, "learning_rate": 1.484524272765837e-06, "loss": 0.0263, "step": 8016 }, { "epoch": 0.88, "grad_norm": 0.2153970356437948, "learning_rate": 1.4818357549852747e-06, "loss": 0.0318, "step": 8017 }, { "epoch": 0.88, "grad_norm": 0.18207555403629702, "learning_rate": 1.479149580243895e-06, "loss": 0.0288, "step": 8018 }, { "epoch": 0.88, "grad_norm": 0.16530708129237068, "learning_rate": 1.4764657488815748e-06, "loss": 0.0193, "step": 8019 }, { "epoch": 0.88, "grad_norm": 0.21682695231705063, "learning_rate": 1.4737842612378894e-06, "loss": 0.0305, "step": 8020 }, { "epoch": 0.88, "grad_norm": 0.18897665218550008, "learning_rate": 1.471105117652114e-06, "loss": 0.0233, "step": 8021 }, { "epoch": 0.88, "grad_norm": 0.1509863831754559, "learning_rate": 1.468428318463233e-06, "loss": 0.0213, "step": 8022 }, { "epoch": 0.88, "grad_norm": 0.1847319828399, "learning_rate": 1.4657538640099333e-06, "loss": 0.0267, "step": 8023 }, { "epoch": 0.88, "grad_norm": 0.15537337919480862, "learning_rate": 1.4630817546306087e-06, "loss": 0.026, "step": 8024 }, { "epoch": 0.88, "grad_norm": 0.21107534919916818, "learning_rate": 1.460411990663344e-06, "loss": 0.0478, "step": 8025 }, { "epoch": 0.88, "grad_norm": 0.21762419406373532, "learning_rate": 1.4577445724459382e-06, "loss": 0.0387, "step": 8026 }, { "epoch": 0.88, "grad_norm": 0.19120625343079603, "learning_rate": 1.4550795003158902e-06, "loss": 0.0294, "step": 8027 }, { "epoch": 0.88, "grad_norm": 0.17440145218426678, "learning_rate": 1.4524167746104034e-06, "loss": 0.0396, "step": 8028 }, { "epoch": 0.88, "grad_norm": 0.1819761417943983, "learning_rate": 1.449756395666384e-06, "loss": 0.0328, "step": 8029 }, { "epoch": 0.88, "grad_norm": 0.21349745191889433, "learning_rate": 1.4470983638204384e-06, "loss": 0.0422, "step": 8030 }, { "epoch": 0.88, "grad_norm": 0.19151451980148682, "learning_rate": 1.444442679408884e-06, "loss": 0.0294, "step": 8031 }, { "epoch": 0.88, "grad_norm": 0.203144329325111, "learning_rate": 1.4417893427677276e-06, "loss": 0.034, "step": 8032 }, { "epoch": 0.88, "grad_norm": 0.30827546908434406, "learning_rate": 1.4391383542326875e-06, "loss": 0.0361, "step": 8033 }, { "epoch": 0.88, "grad_norm": 0.2233949420653495, "learning_rate": 1.4364897141391888e-06, "loss": 0.039, "step": 8034 }, { "epoch": 0.88, "grad_norm": 0.26912777874190214, "learning_rate": 1.4338434228223541e-06, "loss": 0.0667, "step": 8035 }, { "epoch": 0.88, "grad_norm": 0.20267366738865317, "learning_rate": 1.4311994806170048e-06, "loss": 0.0285, "step": 8036 }, { "epoch": 0.88, "grad_norm": 0.2503723709189332, "learning_rate": 1.4285578878576734e-06, "loss": 0.0389, "step": 8037 }, { "epoch": 0.88, "grad_norm": 0.272093850327352, "learning_rate": 1.42591864487859e-06, "loss": 0.0203, "step": 8038 }, { "epoch": 0.88, "grad_norm": 0.19977832258536013, "learning_rate": 1.4232817520136922e-06, "loss": 0.0337, "step": 8039 }, { "epoch": 0.88, "grad_norm": 0.1756499329784818, "learning_rate": 1.4206472095966107e-06, "loss": 0.0306, "step": 8040 }, { "epoch": 0.88, "grad_norm": 0.1650859608124183, "learning_rate": 1.41801501796069e-06, "loss": 0.0266, "step": 8041 }, { "epoch": 0.88, "grad_norm": 0.1699920049182769, "learning_rate": 1.4153851774389703e-06, "loss": 0.019, "step": 8042 }, { "epoch": 0.88, "grad_norm": 0.21420148736805092, "learning_rate": 1.4127576883642014e-06, "loss": 0.0281, "step": 8043 }, { "epoch": 0.88, "grad_norm": 0.18611832008902707, "learning_rate": 1.4101325510688192e-06, "loss": 0.0186, "step": 8044 }, { "epoch": 0.88, "grad_norm": 0.2832860364893558, "learning_rate": 1.4075097658849824e-06, "loss": 0.0357, "step": 8045 }, { "epoch": 0.88, "grad_norm": 0.21569811265614477, "learning_rate": 1.4048893331445367e-06, "loss": 0.02, "step": 8046 }, { "epoch": 0.88, "grad_norm": 0.22187091125159475, "learning_rate": 1.4022712531790462e-06, "loss": 0.0269, "step": 8047 }, { "epoch": 0.88, "grad_norm": 0.2826525235708043, "learning_rate": 1.3996555263197587e-06, "loss": 0.0293, "step": 8048 }, { "epoch": 0.88, "grad_norm": 0.20756747421258975, "learning_rate": 1.3970421528976342e-06, "loss": 0.0413, "step": 8049 }, { "epoch": 0.88, "grad_norm": 0.21787853014543335, "learning_rate": 1.3944311332433368e-06, "loss": 0.0261, "step": 8050 }, { "epoch": 0.88, "grad_norm": 0.2566588919306328, "learning_rate": 1.3918224676872294e-06, "loss": 0.0436, "step": 8051 }, { "epoch": 0.88, "grad_norm": 0.1961498620957768, "learning_rate": 1.3892161565593743e-06, "loss": 0.0288, "step": 8052 }, { "epoch": 0.88, "grad_norm": 0.1820423953692304, "learning_rate": 1.386612200189541e-06, "loss": 0.0375, "step": 8053 }, { "epoch": 0.88, "grad_norm": 0.25915954270082875, "learning_rate": 1.3840105989071995e-06, "loss": 0.0373, "step": 8054 }, { "epoch": 0.88, "grad_norm": 0.35149368036403383, "learning_rate": 1.3814113530415218e-06, "loss": 0.0476, "step": 8055 }, { "epoch": 0.88, "grad_norm": 0.18617289492847594, "learning_rate": 1.3788144629213785e-06, "loss": 0.0298, "step": 8056 }, { "epoch": 0.88, "grad_norm": 0.319603903795184, "learning_rate": 1.3762199288753485e-06, "loss": 0.0443, "step": 8057 }, { "epoch": 0.88, "grad_norm": 0.19687867690997315, "learning_rate": 1.3736277512317076e-06, "loss": 0.0427, "step": 8058 }, { "epoch": 0.89, "grad_norm": 0.19770394153225865, "learning_rate": 1.3710379303184374e-06, "loss": 0.025, "step": 8059 }, { "epoch": 0.89, "grad_norm": 0.18163010351410475, "learning_rate": 1.3684504664632137e-06, "loss": 0.019, "step": 8060 }, { "epoch": 0.89, "grad_norm": 0.19604836124809333, "learning_rate": 1.3658653599934235e-06, "loss": 0.0466, "step": 8061 }, { "epoch": 0.89, "grad_norm": 0.20865078621361277, "learning_rate": 1.3632826112361497e-06, "loss": 0.0335, "step": 8062 }, { "epoch": 0.89, "grad_norm": 0.19071613189272868, "learning_rate": 1.3607022205181798e-06, "loss": 0.0224, "step": 8063 }, { "epoch": 0.89, "grad_norm": 0.17919461402901632, "learning_rate": 1.3581241881660011e-06, "loss": 0.0253, "step": 8064 }, { "epoch": 0.89, "grad_norm": 0.20057624303165597, "learning_rate": 1.3555485145058044e-06, "loss": 0.0384, "step": 8065 }, { "epoch": 0.89, "grad_norm": 0.20989389458100902, "learning_rate": 1.352975199863482e-06, "loss": 0.0243, "step": 8066 }, { "epoch": 0.89, "grad_norm": 0.3578338749926915, "learning_rate": 1.3504042445646205e-06, "loss": 0.0494, "step": 8067 }, { "epoch": 0.89, "grad_norm": 0.23241561592714144, "learning_rate": 1.3478356489345168e-06, "loss": 0.0174, "step": 8068 }, { "epoch": 0.89, "grad_norm": 0.1957165163373474, "learning_rate": 1.345269413298167e-06, "loss": 0.0263, "step": 8069 }, { "epoch": 0.89, "grad_norm": 0.2510463305919896, "learning_rate": 1.3427055379802733e-06, "loss": 0.0476, "step": 8070 }, { "epoch": 0.89, "grad_norm": 0.1708102461917083, "learning_rate": 1.3401440233052233e-06, "loss": 0.0208, "step": 8071 }, { "epoch": 0.89, "grad_norm": 0.17977720592186733, "learning_rate": 1.3375848695971239e-06, "loss": 0.0254, "step": 8072 }, { "epoch": 0.89, "grad_norm": 0.20638241851938854, "learning_rate": 1.335028077179774e-06, "loss": 0.0284, "step": 8073 }, { "epoch": 0.89, "grad_norm": 0.1685974641449145, "learning_rate": 1.3324736463766775e-06, "loss": 0.0263, "step": 8074 }, { "epoch": 0.89, "grad_norm": 0.21196650370392212, "learning_rate": 1.329921577511033e-06, "loss": 0.0388, "step": 8075 }, { "epoch": 0.89, "grad_norm": 0.1863458397079767, "learning_rate": 1.3273718709057493e-06, "loss": 0.0245, "step": 8076 }, { "epoch": 0.89, "grad_norm": 0.20820650273315364, "learning_rate": 1.3248245268834304e-06, "loss": 0.0215, "step": 8077 }, { "epoch": 0.89, "grad_norm": 0.19686201559875893, "learning_rate": 1.322279545766385e-06, "loss": 0.0248, "step": 8078 }, { "epoch": 0.89, "grad_norm": 0.1644193084452681, "learning_rate": 1.3197369278766182e-06, "loss": 0.0229, "step": 8079 }, { "epoch": 0.89, "grad_norm": 0.20544142559526438, "learning_rate": 1.3171966735358343e-06, "loss": 0.0309, "step": 8080 }, { "epoch": 0.89, "grad_norm": 0.17404322228099126, "learning_rate": 1.31465878306545e-06, "loss": 0.0214, "step": 8081 }, { "epoch": 0.89, "grad_norm": 0.21334523474351408, "learning_rate": 1.3121232567865793e-06, "loss": 0.03, "step": 8082 }, { "epoch": 0.89, "grad_norm": 0.2888267689993707, "learning_rate": 1.3095900950200235e-06, "loss": 0.0341, "step": 8083 }, { "epoch": 0.89, "grad_norm": 0.21944974628480754, "learning_rate": 1.3070592980862994e-06, "loss": 0.0291, "step": 8084 }, { "epoch": 0.89, "grad_norm": 0.19668181114224614, "learning_rate": 1.3045308663056179e-06, "loss": 0.0196, "step": 8085 }, { "epoch": 0.89, "grad_norm": 0.21699094248362136, "learning_rate": 1.3020047999979002e-06, "loss": 0.0417, "step": 8086 }, { "epoch": 0.89, "grad_norm": 0.18290534718347867, "learning_rate": 1.2994810994827511e-06, "loss": 0.0293, "step": 8087 }, { "epoch": 0.89, "grad_norm": 0.2280492016528875, "learning_rate": 1.2969597650794907e-06, "loss": 0.0276, "step": 8088 }, { "epoch": 0.89, "grad_norm": 0.20649149548669107, "learning_rate": 1.2944407971071326e-06, "loss": 0.0219, "step": 8089 }, { "epoch": 0.89, "grad_norm": 0.19503651193073768, "learning_rate": 1.2919241958843975e-06, "loss": 0.0282, "step": 8090 }, { "epoch": 0.89, "grad_norm": 0.22498393133853425, "learning_rate": 1.289409961729695e-06, "loss": 0.0326, "step": 8091 }, { "epoch": 0.89, "grad_norm": 0.1995833930557682, "learning_rate": 1.2868980949611486e-06, "loss": 0.0335, "step": 8092 }, { "epoch": 0.89, "grad_norm": 0.22862793534481746, "learning_rate": 1.2843885958965708e-06, "loss": 0.0369, "step": 8093 }, { "epoch": 0.89, "grad_norm": 0.2395499489509689, "learning_rate": 1.2818814648534895e-06, "loss": 0.0404, "step": 8094 }, { "epoch": 0.89, "grad_norm": 0.20635799484964087, "learning_rate": 1.279376702149111e-06, "loss": 0.0209, "step": 8095 }, { "epoch": 0.89, "grad_norm": 0.2387144341116745, "learning_rate": 1.2768743081003598e-06, "loss": 0.0399, "step": 8096 }, { "epoch": 0.89, "grad_norm": 0.24765848555894632, "learning_rate": 1.2743742830238582e-06, "loss": 0.0439, "step": 8097 }, { "epoch": 0.89, "grad_norm": 0.14794857667084915, "learning_rate": 1.2718766272359195e-06, "loss": 0.0186, "step": 8098 }, { "epoch": 0.89, "grad_norm": 0.28252392583146135, "learning_rate": 1.269381341052569e-06, "loss": 0.0388, "step": 8099 }, { "epoch": 0.89, "grad_norm": 0.16836228954509633, "learning_rate": 1.266888424789523e-06, "loss": 0.0242, "step": 8100 }, { "epoch": 0.89, "grad_norm": 0.35681785032370694, "learning_rate": 1.2643978787622069e-06, "loss": 0.0457, "step": 8101 }, { "epoch": 0.89, "grad_norm": 0.19081107852267626, "learning_rate": 1.261909703285733e-06, "loss": 0.0302, "step": 8102 }, { "epoch": 0.89, "grad_norm": 0.22440415834462196, "learning_rate": 1.2594238986749273e-06, "loss": 0.047, "step": 8103 }, { "epoch": 0.89, "grad_norm": 0.15405061239724732, "learning_rate": 1.2569404652443073e-06, "loss": 0.012, "step": 8104 }, { "epoch": 0.89, "grad_norm": 0.16322141719218639, "learning_rate": 1.2544594033080947e-06, "loss": 0.0246, "step": 8105 }, { "epoch": 0.89, "grad_norm": 0.18345814627697715, "learning_rate": 1.2519807131802097e-06, "loss": 0.0366, "step": 8106 }, { "epoch": 0.89, "grad_norm": 0.22879683480226257, "learning_rate": 1.2495043951742703e-06, "loss": 0.0463, "step": 8107 }, { "epoch": 0.89, "grad_norm": 0.2875033160723305, "learning_rate": 1.2470304496035968e-06, "loss": 0.0374, "step": 8108 }, { "epoch": 0.89, "grad_norm": 0.18928059853146756, "learning_rate": 1.2445588767812145e-06, "loss": 0.0219, "step": 8109 }, { "epoch": 0.89, "grad_norm": 0.24059148932862864, "learning_rate": 1.2420896770198355e-06, "loss": 0.0378, "step": 8110 }, { "epoch": 0.89, "grad_norm": 0.21259041713853652, "learning_rate": 1.239622850631881e-06, "loss": 0.0215, "step": 8111 }, { "epoch": 0.89, "grad_norm": 0.16772253265649945, "learning_rate": 1.237158397929472e-06, "loss": 0.0139, "step": 8112 }, { "epoch": 0.89, "grad_norm": 0.2511345629468641, "learning_rate": 1.2346963192244287e-06, "loss": 0.0382, "step": 8113 }, { "epoch": 0.89, "grad_norm": 0.26662164130422566, "learning_rate": 1.2322366148282638e-06, "loss": 0.0232, "step": 8114 }, { "epoch": 0.89, "grad_norm": 0.18192631803697984, "learning_rate": 1.2297792850521973e-06, "loss": 0.0137, "step": 8115 }, { "epoch": 0.89, "grad_norm": 0.22075382426784543, "learning_rate": 1.2273243302071513e-06, "loss": 0.0266, "step": 8116 }, { "epoch": 0.89, "grad_norm": 0.17563211684553523, "learning_rate": 1.2248717506037377e-06, "loss": 0.0273, "step": 8117 }, { "epoch": 0.89, "grad_norm": 0.21438683560566324, "learning_rate": 1.2224215465522726e-06, "loss": 0.0377, "step": 8118 }, { "epoch": 0.89, "grad_norm": 0.1878684615706186, "learning_rate": 1.2199737183627746e-06, "loss": 0.0292, "step": 8119 }, { "epoch": 0.89, "grad_norm": 0.18451518290031707, "learning_rate": 1.2175282663449584e-06, "loss": 0.0377, "step": 8120 }, { "epoch": 0.89, "grad_norm": 0.18551300141175717, "learning_rate": 1.2150851908082406e-06, "loss": 0.0264, "step": 8121 }, { "epoch": 0.89, "grad_norm": 0.20999712077789623, "learning_rate": 1.2126444920617297e-06, "loss": 0.0331, "step": 8122 }, { "epoch": 0.89, "grad_norm": 0.20624544254238392, "learning_rate": 1.2102061704142408e-06, "loss": 0.0297, "step": 8123 }, { "epoch": 0.89, "grad_norm": 0.18398038248318357, "learning_rate": 1.2077702261742875e-06, "loss": 0.0365, "step": 8124 }, { "epoch": 0.89, "grad_norm": 0.21233545762937733, "learning_rate": 1.2053366596500849e-06, "loss": 0.0257, "step": 8125 }, { "epoch": 0.89, "grad_norm": 0.1739435402115767, "learning_rate": 1.2029054711495358e-06, "loss": 0.0298, "step": 8126 }, { "epoch": 0.89, "grad_norm": 0.18857947193574698, "learning_rate": 1.2004766609802543e-06, "loss": 0.0283, "step": 8127 }, { "epoch": 0.89, "grad_norm": 0.22744293802023172, "learning_rate": 1.19805022944955e-06, "loss": 0.0183, "step": 8128 }, { "epoch": 0.89, "grad_norm": 0.18339907693802224, "learning_rate": 1.1956261768644328e-06, "loss": 0.0322, "step": 8129 }, { "epoch": 0.89, "grad_norm": 0.256323996769043, "learning_rate": 1.193204503531602e-06, "loss": 0.0281, "step": 8130 }, { "epoch": 0.89, "grad_norm": 0.21182444211174353, "learning_rate": 1.1907852097574723e-06, "loss": 0.0247, "step": 8131 }, { "epoch": 0.89, "grad_norm": 0.15462338631295972, "learning_rate": 1.1883682958481413e-06, "loss": 0.0219, "step": 8132 }, { "epoch": 0.89, "grad_norm": 0.19340168846385952, "learning_rate": 1.1859537621094175e-06, "loss": 0.018, "step": 8133 }, { "epoch": 0.89, "grad_norm": 0.16932275538190913, "learning_rate": 1.1835416088468033e-06, "loss": 0.0284, "step": 8134 }, { "epoch": 0.89, "grad_norm": 0.19175409317683567, "learning_rate": 1.1811318363654967e-06, "loss": 0.0241, "step": 8135 }, { "epoch": 0.89, "grad_norm": 0.2571991937048482, "learning_rate": 1.178724444970405e-06, "loss": 0.0273, "step": 8136 }, { "epoch": 0.89, "grad_norm": 0.2910050383824771, "learning_rate": 1.17631943496612e-06, "loss": 0.0371, "step": 8137 }, { "epoch": 0.89, "grad_norm": 0.14879430376567432, "learning_rate": 1.1739168066569406e-06, "loss": 0.0186, "step": 8138 }, { "epoch": 0.89, "grad_norm": 0.1794712070992341, "learning_rate": 1.1715165603468637e-06, "loss": 0.0235, "step": 8139 }, { "epoch": 0.89, "grad_norm": 0.23144780733075693, "learning_rate": 1.1691186963395861e-06, "loss": 0.0279, "step": 8140 }, { "epoch": 0.89, "grad_norm": 0.18887376191966082, "learning_rate": 1.1667232149384989e-06, "loss": 0.025, "step": 8141 }, { "epoch": 0.89, "grad_norm": 0.19330697361051746, "learning_rate": 1.1643301164466926e-06, "loss": 0.0123, "step": 8142 }, { "epoch": 0.89, "grad_norm": 0.2536770593168057, "learning_rate": 1.1619394011669605e-06, "loss": 0.0323, "step": 8143 }, { "epoch": 0.89, "grad_norm": 0.18273740269312416, "learning_rate": 1.1595510694017943e-06, "loss": 0.0207, "step": 8144 }, { "epoch": 0.89, "grad_norm": 0.19219626662566153, "learning_rate": 1.1571651214533764e-06, "loss": 0.0237, "step": 8145 }, { "epoch": 0.89, "grad_norm": 0.2027404795262321, "learning_rate": 1.154781557623592e-06, "loss": 0.0376, "step": 8146 }, { "epoch": 0.89, "grad_norm": 0.15081378309081103, "learning_rate": 1.1524003782140269e-06, "loss": 0.0162, "step": 8147 }, { "epoch": 0.89, "grad_norm": 0.1695457823629119, "learning_rate": 1.1500215835259664e-06, "loss": 0.0237, "step": 8148 }, { "epoch": 0.89, "grad_norm": 0.18068020722705433, "learning_rate": 1.1476451738603855e-06, "loss": 0.0391, "step": 8149 }, { "epoch": 0.9, "grad_norm": 0.26530709597023083, "learning_rate": 1.1452711495179659e-06, "loss": 0.0447, "step": 8150 }, { "epoch": 0.9, "grad_norm": 0.18722748821243787, "learning_rate": 1.1428995107990892e-06, "loss": 0.0266, "step": 8151 }, { "epoch": 0.9, "grad_norm": 0.22255358085661164, "learning_rate": 1.1405302580038224e-06, "loss": 0.0308, "step": 8152 }, { "epoch": 0.9, "grad_norm": 0.21457256194428165, "learning_rate": 1.1381633914319434e-06, "loss": 0.0347, "step": 8153 }, { "epoch": 0.9, "grad_norm": 0.20973767675122582, "learning_rate": 1.1357989113829237e-06, "loss": 0.0484, "step": 8154 }, { "epoch": 0.9, "grad_norm": 0.2583803512229484, "learning_rate": 1.1334368181559351e-06, "loss": 0.0406, "step": 8155 }, { "epoch": 0.9, "grad_norm": 0.25708935205554145, "learning_rate": 1.1310771120498386e-06, "loss": 0.0374, "step": 8156 }, { "epoch": 0.9, "grad_norm": 0.28250638823755325, "learning_rate": 1.1287197933632022e-06, "loss": 0.0535, "step": 8157 }, { "epoch": 0.9, "grad_norm": 0.2538457358213464, "learning_rate": 1.1263648623942912e-06, "loss": 0.0276, "step": 8158 }, { "epoch": 0.9, "grad_norm": 0.24326919672807157, "learning_rate": 1.124012319441068e-06, "loss": 0.0442, "step": 8159 }, { "epoch": 0.9, "grad_norm": 0.2387901016454865, "learning_rate": 1.1216621648011873e-06, "loss": 0.0316, "step": 8160 }, { "epoch": 0.9, "grad_norm": 0.23196353116820476, "learning_rate": 1.1193143987720067e-06, "loss": 0.0175, "step": 8161 }, { "epoch": 0.9, "grad_norm": 0.18492958383161237, "learning_rate": 1.1169690216505846e-06, "loss": 0.0284, "step": 8162 }, { "epoch": 0.9, "grad_norm": 0.2742841283997708, "learning_rate": 1.11462603373367e-06, "loss": 0.0291, "step": 8163 }, { "epoch": 0.9, "grad_norm": 0.26405287166399877, "learning_rate": 1.1122854353177171e-06, "loss": 0.0395, "step": 8164 }, { "epoch": 0.9, "grad_norm": 0.23485040926803213, "learning_rate": 1.1099472266988686e-06, "loss": 0.0286, "step": 8165 }, { "epoch": 0.9, "grad_norm": 0.20421760399693153, "learning_rate": 1.1076114081729682e-06, "loss": 0.0288, "step": 8166 }, { "epoch": 0.9, "grad_norm": 0.20372889687362652, "learning_rate": 1.1052779800355707e-06, "loss": 0.0279, "step": 8167 }, { "epoch": 0.9, "grad_norm": 0.22648018953180518, "learning_rate": 1.1029469425819039e-06, "loss": 0.0381, "step": 8168 }, { "epoch": 0.9, "grad_norm": 0.1658447283595015, "learning_rate": 1.1006182961069123e-06, "loss": 0.018, "step": 8169 }, { "epoch": 0.9, "grad_norm": 0.18974680603490496, "learning_rate": 1.0982920409052312e-06, "loss": 0.0295, "step": 8170 }, { "epoch": 0.9, "grad_norm": 0.19420371997154975, "learning_rate": 1.095968177271194e-06, "loss": 0.0351, "step": 8171 }, { "epoch": 0.9, "grad_norm": 0.19263376047461428, "learning_rate": 1.0936467054988276e-06, "loss": 0.0316, "step": 8172 }, { "epoch": 0.9, "grad_norm": 0.18404661288130614, "learning_rate": 1.0913276258818617e-06, "loss": 0.0232, "step": 8173 }, { "epoch": 0.9, "grad_norm": 0.2304224032336941, "learning_rate": 1.0890109387137216e-06, "loss": 0.0368, "step": 8174 }, { "epoch": 0.9, "grad_norm": 0.209428653674787, "learning_rate": 1.0866966442875304e-06, "loss": 0.0349, "step": 8175 }, { "epoch": 0.9, "grad_norm": 0.18893556589820637, "learning_rate": 1.0843847428961074e-06, "loss": 0.0259, "step": 8176 }, { "epoch": 0.9, "grad_norm": 0.21481023006777086, "learning_rate": 1.0820752348319673e-06, "loss": 0.0525, "step": 8177 }, { "epoch": 0.9, "grad_norm": 0.19360318851323052, "learning_rate": 1.0797681203873255e-06, "loss": 0.0239, "step": 8178 }, { "epoch": 0.9, "grad_norm": 0.24797456543418325, "learning_rate": 1.0774633998540974e-06, "loss": 0.0532, "step": 8179 }, { "epoch": 0.9, "grad_norm": 0.18944382398917967, "learning_rate": 1.0751610735238848e-06, "loss": 0.0417, "step": 8180 }, { "epoch": 0.9, "grad_norm": 0.2727139573115948, "learning_rate": 1.0728611416879975e-06, "loss": 0.0505, "step": 8181 }, { "epoch": 0.9, "grad_norm": 0.19017606621006206, "learning_rate": 1.0705636046374334e-06, "loss": 0.0157, "step": 8182 }, { "epoch": 0.9, "grad_norm": 0.2707596030500379, "learning_rate": 1.0682684626629002e-06, "loss": 0.0323, "step": 8183 }, { "epoch": 0.9, "grad_norm": 0.195041926840636, "learning_rate": 1.0659757160547813e-06, "loss": 0.0266, "step": 8184 }, { "epoch": 0.9, "grad_norm": 0.2631091293840357, "learning_rate": 1.0636853651031797e-06, "loss": 0.0362, "step": 8185 }, { "epoch": 0.9, "grad_norm": 0.2532716597024558, "learning_rate": 1.0613974100978885e-06, "loss": 0.036, "step": 8186 }, { "epoch": 0.9, "grad_norm": 0.22369384765625, "learning_rate": 1.059111851328385e-06, "loss": 0.0226, "step": 8187 }, { "epoch": 0.9, "grad_norm": 0.23997596861310688, "learning_rate": 1.0568286890838575e-06, "loss": 0.0256, "step": 8188 }, { "epoch": 0.9, "grad_norm": 0.18300212741355235, "learning_rate": 1.0545479236531885e-06, "loss": 0.0205, "step": 8189 }, { "epoch": 0.9, "grad_norm": 0.40957907984294883, "learning_rate": 1.0522695553249562e-06, "loss": 0.0525, "step": 8190 }, { "epoch": 0.9, "grad_norm": 0.32759796686577936, "learning_rate": 1.0499935843874275e-06, "loss": 0.0509, "step": 8191 }, { "epoch": 0.9, "grad_norm": 0.2185881390509298, "learning_rate": 1.047720011128579e-06, "loss": 0.0341, "step": 8192 }, { "epoch": 0.9, "grad_norm": 0.19067662196731774, "learning_rate": 1.045448835836076e-06, "loss": 0.0282, "step": 8193 }, { "epoch": 0.9, "grad_norm": 0.19990667609592316, "learning_rate": 1.0431800587972862e-06, "loss": 0.0164, "step": 8194 }, { "epoch": 0.9, "grad_norm": 0.2125035362790888, "learning_rate": 1.0409136802992647e-06, "loss": 0.0379, "step": 8195 }, { "epoch": 0.9, "grad_norm": 0.21321329110056897, "learning_rate": 1.038649700628771e-06, "loss": 0.0351, "step": 8196 }, { "epoch": 0.9, "grad_norm": 0.25643445679813426, "learning_rate": 1.036388120072258e-06, "loss": 0.0364, "step": 8197 }, { "epoch": 0.9, "grad_norm": 0.20116294915109967, "learning_rate": 1.0341289389158793e-06, "loss": 0.0245, "step": 8198 }, { "epoch": 0.9, "grad_norm": 0.19956297213781976, "learning_rate": 1.031872157445477e-06, "loss": 0.0245, "step": 8199 }, { "epoch": 0.9, "grad_norm": 0.21598542055733674, "learning_rate": 1.029617775946592e-06, "loss": 0.0294, "step": 8200 }, { "epoch": 0.9, "grad_norm": 0.2232396530314543, "learning_rate": 1.0273657947044647e-06, "loss": 0.0465, "step": 8201 }, { "epoch": 0.9, "grad_norm": 0.21620458164539047, "learning_rate": 1.0251162140040383e-06, "loss": 0.0153, "step": 8202 }, { "epoch": 0.9, "grad_norm": 0.1647575731693121, "learning_rate": 1.022869034129934e-06, "loss": 0.0153, "step": 8203 }, { "epoch": 0.9, "grad_norm": 0.2184068935192134, "learning_rate": 1.0206242553664868e-06, "loss": 0.0301, "step": 8204 }, { "epoch": 0.9, "grad_norm": 0.3173511782799954, "learning_rate": 1.0183818779977162e-06, "loss": 0.0494, "step": 8205 }, { "epoch": 0.9, "grad_norm": 0.1920471652103783, "learning_rate": 1.016141902307346e-06, "loss": 0.0299, "step": 8206 }, { "epoch": 0.9, "grad_norm": 0.19413349975159716, "learning_rate": 1.0139043285787897e-06, "loss": 0.0383, "step": 8207 }, { "epoch": 0.9, "grad_norm": 0.19461334977901326, "learning_rate": 1.011669157095161e-06, "loss": 0.0353, "step": 8208 }, { "epoch": 0.9, "grad_norm": 0.18873129317758788, "learning_rate": 1.0094363881392665e-06, "loss": 0.0206, "step": 8209 }, { "epoch": 0.9, "grad_norm": 0.22791468841442727, "learning_rate": 1.0072060219936164e-06, "loss": 0.0225, "step": 8210 }, { "epoch": 0.9, "grad_norm": 0.24479566056801333, "learning_rate": 1.0049780589404045e-06, "loss": 0.034, "step": 8211 }, { "epoch": 0.9, "grad_norm": 0.21795623531442687, "learning_rate": 1.002752499261528e-06, "loss": 0.0377, "step": 8212 }, { "epoch": 0.9, "grad_norm": 0.19785990758621003, "learning_rate": 1.0005293432385832e-06, "loss": 0.0366, "step": 8213 }, { "epoch": 0.9, "grad_norm": 0.2252275170940452, "learning_rate": 9.98308591152859e-07, "loss": 0.0393, "step": 8214 }, { "epoch": 0.9, "grad_norm": 0.2640023033103023, "learning_rate": 9.960902432853349e-07, "loss": 0.0233, "step": 8215 }, { "epoch": 0.9, "grad_norm": 0.300959497125171, "learning_rate": 9.93874299916693e-07, "loss": 0.0412, "step": 8216 }, { "epoch": 0.9, "grad_norm": 0.21366304731527114, "learning_rate": 9.916607613273067e-07, "loss": 0.0374, "step": 8217 }, { "epoch": 0.9, "grad_norm": 0.19288711481311144, "learning_rate": 9.894496277972498e-07, "loss": 0.0333, "step": 8218 }, { "epoch": 0.9, "grad_norm": 0.19007976366646723, "learning_rate": 9.8724089960629e-07, "loss": 0.0305, "step": 8219 }, { "epoch": 0.9, "grad_norm": 0.19160212012160233, "learning_rate": 9.850345770338875e-07, "loss": 0.0343, "step": 8220 }, { "epoch": 0.9, "grad_norm": 0.24360752649115458, "learning_rate": 9.828306603592063e-07, "loss": 0.044, "step": 8221 }, { "epoch": 0.9, "grad_norm": 0.19712945751890557, "learning_rate": 9.80629149861092e-07, "loss": 0.0278, "step": 8222 }, { "epoch": 0.9, "grad_norm": 0.19936790679403657, "learning_rate": 9.784300458180994e-07, "loss": 0.0357, "step": 8223 }, { "epoch": 0.9, "grad_norm": 0.21959241374770835, "learning_rate": 9.76233348508473e-07, "loss": 0.0282, "step": 8224 }, { "epoch": 0.9, "grad_norm": 0.20111109917353348, "learning_rate": 9.740390582101545e-07, "loss": 0.0254, "step": 8225 }, { "epoch": 0.9, "grad_norm": 0.16654274786693715, "learning_rate": 9.718471752007753e-07, "loss": 0.02, "step": 8226 }, { "epoch": 0.9, "grad_norm": 0.18185214575819714, "learning_rate": 9.69657699757669e-07, "loss": 0.0313, "step": 8227 }, { "epoch": 0.9, "grad_norm": 0.17936204378342663, "learning_rate": 9.67470632157863e-07, "loss": 0.015, "step": 8228 }, { "epoch": 0.9, "grad_norm": 0.19407313994941713, "learning_rate": 9.652859726780827e-07, "loss": 0.0285, "step": 8229 }, { "epoch": 0.9, "grad_norm": 0.1796495874463076, "learning_rate": 9.63103721594738e-07, "loss": 0.03, "step": 8230 }, { "epoch": 0.9, "grad_norm": 0.3431152856212495, "learning_rate": 9.60923879183946e-07, "loss": 0.0417, "step": 8231 }, { "epoch": 0.9, "grad_norm": 0.217390373557757, "learning_rate": 9.587464457215146e-07, "loss": 0.0273, "step": 8232 }, { "epoch": 0.9, "grad_norm": 0.1697552949665261, "learning_rate": 9.565714214829503e-07, "loss": 0.0224, "step": 8233 }, { "epoch": 0.9, "grad_norm": 0.24912640555837498, "learning_rate": 9.54398806743444e-07, "loss": 0.0255, "step": 8234 }, { "epoch": 0.9, "grad_norm": 0.243218396715547, "learning_rate": 9.522286017778892e-07, "loss": 0.0252, "step": 8235 }, { "epoch": 0.9, "grad_norm": 0.2819960685829629, "learning_rate": 9.500608068608841e-07, "loss": 0.0345, "step": 8236 }, { "epoch": 0.9, "grad_norm": 0.22887833656179069, "learning_rate": 9.478954222667025e-07, "loss": 0.0307, "step": 8237 }, { "epoch": 0.9, "grad_norm": 0.21275178073895368, "learning_rate": 9.457324482693275e-07, "loss": 0.0264, "step": 8238 }, { "epoch": 0.9, "grad_norm": 0.26779351301779925, "learning_rate": 9.435718851424336e-07, "loss": 0.0458, "step": 8239 }, { "epoch": 0.9, "grad_norm": 0.21481218973181224, "learning_rate": 9.414137331593842e-07, "loss": 0.0438, "step": 8240 }, { "epoch": 0.91, "grad_norm": 0.30718018508537476, "learning_rate": 9.392579925932499e-07, "loss": 0.02, "step": 8241 }, { "epoch": 0.91, "grad_norm": 0.1616745367748136, "learning_rate": 9.371046637167835e-07, "loss": 0.0213, "step": 8242 }, { "epoch": 0.91, "grad_norm": 0.1943421866020821, "learning_rate": 9.349537468024406e-07, "loss": 0.0319, "step": 8243 }, { "epoch": 0.91, "grad_norm": 0.1801895611876995, "learning_rate": 9.328052421223676e-07, "loss": 0.0197, "step": 8244 }, { "epoch": 0.91, "grad_norm": 0.23448535387515215, "learning_rate": 9.306591499484119e-07, "loss": 0.034, "step": 8245 }, { "epoch": 0.91, "grad_norm": 0.19631257556888324, "learning_rate": 9.285154705521048e-07, "loss": 0.033, "step": 8246 }, { "epoch": 0.91, "grad_norm": 0.16547725561072543, "learning_rate": 9.26374204204683e-07, "loss": 0.0198, "step": 8247 }, { "epoch": 0.91, "grad_norm": 0.22447749702971492, "learning_rate": 9.242353511770697e-07, "loss": 0.0315, "step": 8248 }, { "epoch": 0.91, "grad_norm": 0.25576799522642574, "learning_rate": 9.220989117398926e-07, "loss": 0.0225, "step": 8249 }, { "epoch": 0.91, "grad_norm": 0.19572030408919244, "learning_rate": 9.199648861634625e-07, "loss": 0.032, "step": 8250 }, { "epoch": 0.91, "grad_norm": 0.19454685822648343, "learning_rate": 9.17833274717792e-07, "loss": 0.0164, "step": 8251 }, { "epoch": 0.91, "grad_norm": 0.18337977906560887, "learning_rate": 9.157040776725856e-07, "loss": 0.0303, "step": 8252 }, { "epoch": 0.91, "grad_norm": 0.20508025032032087, "learning_rate": 9.135772952972455e-07, "loss": 0.0241, "step": 8253 }, { "epoch": 0.91, "grad_norm": 0.2032347895947305, "learning_rate": 9.11452927860863e-07, "loss": 0.034, "step": 8254 }, { "epoch": 0.91, "grad_norm": 0.23185767286395545, "learning_rate": 9.0933097563223e-07, "loss": 0.0352, "step": 8255 }, { "epoch": 0.91, "grad_norm": 0.21107869367743845, "learning_rate": 9.072114388798314e-07, "loss": 0.0164, "step": 8256 }, { "epoch": 0.91, "grad_norm": 0.1812799420782213, "learning_rate": 9.050943178718396e-07, "loss": 0.0213, "step": 8257 }, { "epoch": 0.91, "grad_norm": 0.23501533614245435, "learning_rate": 9.029796128761292e-07, "loss": 0.0462, "step": 8258 }, { "epoch": 0.91, "grad_norm": 0.17987446801860468, "learning_rate": 9.008673241602639e-07, "loss": 0.0134, "step": 8259 }, { "epoch": 0.91, "grad_norm": 0.21500580484571444, "learning_rate": 8.987574519915121e-07, "loss": 0.0221, "step": 8260 }, { "epoch": 0.91, "grad_norm": 0.2875447507942639, "learning_rate": 8.966499966368202e-07, "loss": 0.0324, "step": 8261 }, { "epoch": 0.91, "grad_norm": 0.17285456358464918, "learning_rate": 8.945449583628396e-07, "loss": 0.0219, "step": 8262 }, { "epoch": 0.91, "grad_norm": 0.2186172627584308, "learning_rate": 8.924423374359148e-07, "loss": 0.0399, "step": 8263 }, { "epoch": 0.91, "grad_norm": 0.16168439265186016, "learning_rate": 8.903421341220842e-07, "loss": 0.0228, "step": 8264 }, { "epoch": 0.91, "grad_norm": 0.21596027168629722, "learning_rate": 8.882443486870751e-07, "loss": 0.0226, "step": 8265 }, { "epoch": 0.91, "grad_norm": 0.19266367515121915, "learning_rate": 8.861489813963154e-07, "loss": 0.0274, "step": 8266 }, { "epoch": 0.91, "grad_norm": 0.17407398303282848, "learning_rate": 8.840560325149261e-07, "loss": 0.0195, "step": 8267 }, { "epoch": 0.91, "grad_norm": 0.25294432726582544, "learning_rate": 8.819655023077201e-07, "loss": 0.0463, "step": 8268 }, { "epoch": 0.91, "grad_norm": 0.2548247377143722, "learning_rate": 8.798773910392033e-07, "loss": 0.0439, "step": 8269 }, { "epoch": 0.91, "grad_norm": 0.18935253973235466, "learning_rate": 8.777916989735736e-07, "loss": 0.0262, "step": 8270 }, { "epoch": 0.91, "grad_norm": 0.1994803898394527, "learning_rate": 8.757084263747373e-07, "loss": 0.032, "step": 8271 }, { "epoch": 0.91, "grad_norm": 0.18605138552705197, "learning_rate": 8.736275735062749e-07, "loss": 0.0152, "step": 8272 }, { "epoch": 0.91, "grad_norm": 0.3722836340984241, "learning_rate": 8.715491406314713e-07, "loss": 0.0647, "step": 8273 }, { "epoch": 0.91, "grad_norm": 0.2057271949255089, "learning_rate": 8.694731280133051e-07, "loss": 0.0281, "step": 8274 }, { "epoch": 0.91, "grad_norm": 0.1838929092734693, "learning_rate": 8.673995359144483e-07, "loss": 0.0205, "step": 8275 }, { "epoch": 0.91, "grad_norm": 0.21421806084920603, "learning_rate": 8.653283645972598e-07, "loss": 0.0257, "step": 8276 }, { "epoch": 0.91, "grad_norm": 0.1926045471233725, "learning_rate": 8.632596143237992e-07, "loss": 0.0352, "step": 8277 }, { "epoch": 0.91, "grad_norm": 0.23870126259245716, "learning_rate": 8.611932853558236e-07, "loss": 0.0531, "step": 8278 }, { "epoch": 0.91, "grad_norm": 0.2004222235203365, "learning_rate": 8.591293779547727e-07, "loss": 0.0179, "step": 8279 }, { "epoch": 0.91, "grad_norm": 0.18264973192357153, "learning_rate": 8.570678923817888e-07, "loss": 0.0225, "step": 8280 }, { "epoch": 0.91, "grad_norm": 0.26262541908256143, "learning_rate": 8.550088288977032e-07, "loss": 0.0472, "step": 8281 }, { "epoch": 0.91, "grad_norm": 0.25773529861992367, "learning_rate": 8.529521877630409e-07, "loss": 0.0334, "step": 8282 }, { "epoch": 0.91, "grad_norm": 0.1868243162451725, "learning_rate": 8.508979692380249e-07, "loss": 0.0273, "step": 8283 }, { "epoch": 0.91, "grad_norm": 0.1743671760034363, "learning_rate": 8.48846173582567e-07, "loss": 0.0215, "step": 8284 }, { "epoch": 0.91, "grad_norm": 0.22221440259682573, "learning_rate": 8.467968010562711e-07, "loss": 0.036, "step": 8285 }, { "epoch": 0.91, "grad_norm": 0.2816999862270061, "learning_rate": 8.447498519184405e-07, "loss": 0.0304, "step": 8286 }, { "epoch": 0.91, "grad_norm": 0.15049164339402962, "learning_rate": 8.42705326428066e-07, "loss": 0.0201, "step": 8287 }, { "epoch": 0.91, "grad_norm": 0.198307806345388, "learning_rate": 8.406632248438362e-07, "loss": 0.0191, "step": 8288 }, { "epoch": 0.91, "grad_norm": 0.20663963923372394, "learning_rate": 8.386235474241311e-07, "loss": 0.0309, "step": 8289 }, { "epoch": 0.91, "grad_norm": 0.26835280719921417, "learning_rate": 8.365862944270243e-07, "loss": 0.0494, "step": 8290 }, { "epoch": 0.91, "grad_norm": 0.25674922915420034, "learning_rate": 8.345514661102827e-07, "loss": 0.0389, "step": 8291 }, { "epoch": 0.91, "grad_norm": 0.18573245756093892, "learning_rate": 8.325190627313628e-07, "loss": 0.0307, "step": 8292 }, { "epoch": 0.91, "grad_norm": 0.17700843536082836, "learning_rate": 8.304890845474189e-07, "loss": 0.0328, "step": 8293 }, { "epoch": 0.91, "grad_norm": 0.20095938969712215, "learning_rate": 8.284615318152988e-07, "loss": 0.0246, "step": 8294 }, { "epoch": 0.91, "grad_norm": 0.23885577931769858, "learning_rate": 8.264364047915441e-07, "loss": 0.0273, "step": 8295 }, { "epoch": 0.91, "grad_norm": 0.2234165695364045, "learning_rate": 8.244137037323807e-07, "loss": 0.0341, "step": 8296 }, { "epoch": 0.91, "grad_norm": 0.23234503942566662, "learning_rate": 8.223934288937374e-07, "loss": 0.0344, "step": 8297 }, { "epoch": 0.91, "grad_norm": 0.21781043521006416, "learning_rate": 8.203755805312319e-07, "loss": 0.032, "step": 8298 }, { "epoch": 0.91, "grad_norm": 0.2640930767526961, "learning_rate": 8.1836015890018e-07, "loss": 0.0475, "step": 8299 }, { "epoch": 0.91, "grad_norm": 0.2025630127477468, "learning_rate": 8.163471642555798e-07, "loss": 0.0199, "step": 8300 }, { "epoch": 0.91, "grad_norm": 0.2611568168843046, "learning_rate": 8.143365968521277e-07, "loss": 0.0302, "step": 8301 }, { "epoch": 0.91, "grad_norm": 0.22005412798652046, "learning_rate": 8.123284569442203e-07, "loss": 0.035, "step": 8302 }, { "epoch": 0.91, "grad_norm": 0.21233357160200297, "learning_rate": 8.103227447859385e-07, "loss": 0.0329, "step": 8303 }, { "epoch": 0.91, "grad_norm": 0.22018050033830233, "learning_rate": 8.083194606310507e-07, "loss": 0.0305, "step": 8304 }, { "epoch": 0.91, "grad_norm": 0.2089986439762432, "learning_rate": 8.063186047330362e-07, "loss": 0.0469, "step": 8305 }, { "epoch": 0.91, "grad_norm": 0.18398102030358274, "learning_rate": 8.043201773450526e-07, "loss": 0.0301, "step": 8306 }, { "epoch": 0.91, "grad_norm": 0.1939473124206015, "learning_rate": 8.02324178719951e-07, "loss": 0.0376, "step": 8307 }, { "epoch": 0.91, "grad_norm": 0.2534852131616495, "learning_rate": 8.003306091102803e-07, "loss": 0.0491, "step": 8308 }, { "epoch": 0.91, "grad_norm": 0.14857831349302236, "learning_rate": 7.983394687682811e-07, "loss": 0.0228, "step": 8309 }, { "epoch": 0.91, "grad_norm": 0.24847488571870194, "learning_rate": 7.963507579458851e-07, "loss": 0.0276, "step": 8310 }, { "epoch": 0.91, "grad_norm": 0.21501468447505942, "learning_rate": 7.943644768947157e-07, "loss": 0.03, "step": 8311 }, { "epoch": 0.91, "grad_norm": 0.18173029952275782, "learning_rate": 7.923806258660893e-07, "loss": 0.0157, "step": 8312 }, { "epoch": 0.91, "grad_norm": 0.22132643731655707, "learning_rate": 7.903992051110188e-07, "loss": 0.025, "step": 8313 }, { "epoch": 0.91, "grad_norm": 0.2245354572239109, "learning_rate": 7.884202148802056e-07, "loss": 0.028, "step": 8314 }, { "epoch": 0.91, "grad_norm": 0.19612606359865667, "learning_rate": 7.864436554240429e-07, "loss": 0.0263, "step": 8315 }, { "epoch": 0.91, "grad_norm": 0.22572838220399857, "learning_rate": 7.844695269926194e-07, "loss": 0.0206, "step": 8316 }, { "epoch": 0.91, "grad_norm": 0.165413949886668, "learning_rate": 7.824978298357111e-07, "loss": 0.028, "step": 8317 }, { "epoch": 0.91, "grad_norm": 0.1912501730325951, "learning_rate": 7.805285642027983e-07, "loss": 0.0259, "step": 8318 }, { "epoch": 0.91, "grad_norm": 0.23225523480509927, "learning_rate": 7.78561730343037e-07, "loss": 0.0487, "step": 8319 }, { "epoch": 0.91, "grad_norm": 0.21158025969900446, "learning_rate": 7.765973285052863e-07, "loss": 0.0262, "step": 8320 }, { "epoch": 0.91, "grad_norm": 0.20679104563672152, "learning_rate": 7.746353589380962e-07, "loss": 0.0252, "step": 8321 }, { "epoch": 0.91, "grad_norm": 0.2098543916987807, "learning_rate": 7.726758218897079e-07, "loss": 0.0331, "step": 8322 }, { "epoch": 0.91, "grad_norm": 0.17279907550371298, "learning_rate": 7.707187176080544e-07, "loss": 0.0212, "step": 8323 }, { "epoch": 0.91, "grad_norm": 0.25404107842219026, "learning_rate": 7.687640463407597e-07, "loss": 0.0337, "step": 8324 }, { "epoch": 0.91, "grad_norm": 0.19785788357457332, "learning_rate": 7.668118083351461e-07, "loss": 0.0396, "step": 8325 }, { "epoch": 0.91, "grad_norm": 0.17696038119699498, "learning_rate": 7.648620038382204e-07, "loss": 0.0213, "step": 8326 }, { "epoch": 0.91, "grad_norm": 0.2119630150573829, "learning_rate": 7.629146330966853e-07, "loss": 0.0223, "step": 8327 }, { "epoch": 0.91, "grad_norm": 0.21826744498778183, "learning_rate": 7.609696963569325e-07, "loss": 0.0316, "step": 8328 }, { "epoch": 0.91, "grad_norm": 0.23212229230202275, "learning_rate": 7.590271938650518e-07, "loss": 0.0397, "step": 8329 }, { "epoch": 0.91, "grad_norm": 0.24391809621680238, "learning_rate": 7.5708712586682e-07, "loss": 0.0232, "step": 8330 }, { "epoch": 0.91, "grad_norm": 0.16508441505056867, "learning_rate": 7.551494926077052e-07, "loss": 0.0211, "step": 8331 }, { "epoch": 0.92, "grad_norm": 0.19428187246040374, "learning_rate": 7.532142943328713e-07, "loss": 0.0266, "step": 8332 }, { "epoch": 0.92, "grad_norm": 0.2518836968153419, "learning_rate": 7.512815312871735e-07, "loss": 0.0548, "step": 8333 }, { "epoch": 0.92, "grad_norm": 0.25576458696677545, "learning_rate": 7.493512037151563e-07, "loss": 0.028, "step": 8334 }, { "epoch": 0.92, "grad_norm": 0.23377185939891926, "learning_rate": 7.474233118610553e-07, "loss": 0.0296, "step": 8335 }, { "epoch": 0.92, "grad_norm": 0.16079145165600636, "learning_rate": 7.454978559688019e-07, "loss": 0.0185, "step": 8336 }, { "epoch": 0.92, "grad_norm": 0.20114282750162718, "learning_rate": 7.435748362820172e-07, "loss": 0.0367, "step": 8337 }, { "epoch": 0.92, "grad_norm": 0.2099352177680145, "learning_rate": 7.416542530440174e-07, "loss": 0.0267, "step": 8338 }, { "epoch": 0.92, "grad_norm": 0.17064276507575793, "learning_rate": 7.39736106497797e-07, "loss": 0.0263, "step": 8339 }, { "epoch": 0.92, "grad_norm": 0.2907342894984491, "learning_rate": 7.378203968860643e-07, "loss": 0.0428, "step": 8340 }, { "epoch": 0.92, "grad_norm": 0.2429870129434039, "learning_rate": 7.359071244512051e-07, "loss": 0.037, "step": 8341 }, { "epoch": 0.92, "grad_norm": 0.19689027560478325, "learning_rate": 7.339962894352925e-07, "loss": 0.0417, "step": 8342 }, { "epoch": 0.92, "grad_norm": 0.22965578781540286, "learning_rate": 7.320878920801044e-07, "loss": 0.0275, "step": 8343 }, { "epoch": 0.92, "grad_norm": 0.20803986781456973, "learning_rate": 7.30181932627101e-07, "loss": 0.0405, "step": 8344 }, { "epoch": 0.92, "grad_norm": 0.18372696243919345, "learning_rate": 7.282784113174379e-07, "loss": 0.0279, "step": 8345 }, { "epoch": 0.92, "grad_norm": 0.2088753959793547, "learning_rate": 7.263773283919584e-07, "loss": 0.0337, "step": 8346 }, { "epoch": 0.92, "grad_norm": 0.2161109055407055, "learning_rate": 7.244786840912033e-07, "loss": 0.0248, "step": 8347 }, { "epoch": 0.92, "grad_norm": 0.2292522880966251, "learning_rate": 7.225824786553981e-07, "loss": 0.0314, "step": 8348 }, { "epoch": 0.92, "grad_norm": 0.20722740705283857, "learning_rate": 7.206887123244687e-07, "loss": 0.0269, "step": 8349 }, { "epoch": 0.92, "grad_norm": 0.21685167427663435, "learning_rate": 7.187973853380215e-07, "loss": 0.0273, "step": 8350 }, { "epoch": 0.92, "grad_norm": 0.21911285312810636, "learning_rate": 7.169084979353603e-07, "loss": 0.035, "step": 8351 }, { "epoch": 0.92, "grad_norm": 0.16650670292538797, "learning_rate": 7.150220503554783e-07, "loss": 0.0168, "step": 8352 }, { "epoch": 0.92, "grad_norm": 0.23844254615863555, "learning_rate": 7.131380428370671e-07, "loss": 0.0413, "step": 8353 }, { "epoch": 0.92, "grad_norm": 0.19736905738347355, "learning_rate": 7.112564756184981e-07, "loss": 0.022, "step": 8354 }, { "epoch": 0.92, "grad_norm": 0.20810555682276666, "learning_rate": 7.093773489378408e-07, "loss": 0.016, "step": 8355 }, { "epoch": 0.92, "grad_norm": 0.2405570507096395, "learning_rate": 7.075006630328518e-07, "loss": 0.0267, "step": 8356 }, { "epoch": 0.92, "grad_norm": 0.22532994331106057, "learning_rate": 7.056264181409922e-07, "loss": 0.0459, "step": 8357 }, { "epoch": 0.92, "grad_norm": 0.17066836537886965, "learning_rate": 7.037546144993901e-07, "loss": 0.0169, "step": 8358 }, { "epoch": 0.92, "grad_norm": 0.25722549542529416, "learning_rate": 7.018852523448871e-07, "loss": 0.0261, "step": 8359 }, { "epoch": 0.92, "grad_norm": 0.23987468681493912, "learning_rate": 7.000183319140053e-07, "loss": 0.032, "step": 8360 }, { "epoch": 0.92, "grad_norm": 0.26530156359019463, "learning_rate": 6.981538534429599e-07, "loss": 0.0469, "step": 8361 }, { "epoch": 0.92, "grad_norm": 0.2248861514915893, "learning_rate": 6.962918171676536e-07, "loss": 0.0313, "step": 8362 }, { "epoch": 0.92, "grad_norm": 0.2499288144687979, "learning_rate": 6.944322233236844e-07, "loss": 0.0292, "step": 8363 }, { "epoch": 0.92, "grad_norm": 0.2569748390466309, "learning_rate": 6.925750721463443e-07, "loss": 0.0369, "step": 8364 }, { "epoch": 0.92, "grad_norm": 0.2111929300631266, "learning_rate": 6.907203638706094e-07, "loss": 0.0318, "step": 8365 }, { "epoch": 0.92, "grad_norm": 0.25302257820450147, "learning_rate": 6.88868098731148e-07, "loss": 0.0531, "step": 8366 }, { "epoch": 0.92, "grad_norm": 0.2178474522873738, "learning_rate": 6.870182769623235e-07, "loss": 0.0211, "step": 8367 }, { "epoch": 0.92, "grad_norm": 0.3198811254760812, "learning_rate": 6.851708987981865e-07, "loss": 0.0396, "step": 8368 }, { "epoch": 0.92, "grad_norm": 0.22959975298708743, "learning_rate": 6.833259644724811e-07, "loss": 0.019, "step": 8369 }, { "epoch": 0.92, "grad_norm": 0.23707362098666884, "learning_rate": 6.814834742186361e-07, "loss": 0.0251, "step": 8370 }, { "epoch": 0.92, "grad_norm": 0.21209947763187123, "learning_rate": 6.796434282697783e-07, "loss": 0.0453, "step": 8371 }, { "epoch": 0.92, "grad_norm": 0.2294151176741445, "learning_rate": 6.778058268587217e-07, "loss": 0.0443, "step": 8372 }, { "epoch": 0.92, "grad_norm": 0.220088863496754, "learning_rate": 6.759706702179713e-07, "loss": 0.0297, "step": 8373 }, { "epoch": 0.92, "grad_norm": 0.20209323663795287, "learning_rate": 6.741379585797236e-07, "loss": 0.0245, "step": 8374 }, { "epoch": 0.92, "grad_norm": 0.26108945088525415, "learning_rate": 6.723076921758664e-07, "loss": 0.0408, "step": 8375 }, { "epoch": 0.92, "grad_norm": 0.21960898750458263, "learning_rate": 6.704798712379768e-07, "loss": 0.0341, "step": 8376 }, { "epoch": 0.92, "grad_norm": 0.1982818244432766, "learning_rate": 6.686544959973207e-07, "loss": 0.023, "step": 8377 }, { "epoch": 0.92, "grad_norm": 0.21411860941877806, "learning_rate": 6.66831566684858e-07, "loss": 0.0387, "step": 8378 }, { "epoch": 0.92, "grad_norm": 0.20386571810167772, "learning_rate": 6.650110835312351e-07, "loss": 0.0232, "step": 8379 }, { "epoch": 0.92, "grad_norm": 0.15969013795148154, "learning_rate": 6.631930467667991e-07, "loss": 0.0291, "step": 8380 }, { "epoch": 0.92, "grad_norm": 0.19799292918786857, "learning_rate": 6.61377456621568e-07, "loss": 0.0307, "step": 8381 }, { "epoch": 0.92, "grad_norm": 0.2592476578149597, "learning_rate": 6.595643133252716e-07, "loss": 0.026, "step": 8382 }, { "epoch": 0.92, "grad_norm": 0.16889081748593573, "learning_rate": 6.577536171073173e-07, "loss": 0.0282, "step": 8383 }, { "epoch": 0.92, "grad_norm": 0.1862055181183217, "learning_rate": 6.559453681968064e-07, "loss": 0.0296, "step": 8384 }, { "epoch": 0.92, "grad_norm": 0.1676830756723088, "learning_rate": 6.541395668225314e-07, "loss": 0.0212, "step": 8385 }, { "epoch": 0.92, "grad_norm": 0.2024904113577425, "learning_rate": 6.523362132129718e-07, "loss": 0.029, "step": 8386 }, { "epoch": 0.92, "grad_norm": 0.1573365934159292, "learning_rate": 6.505353075963005e-07, "loss": 0.0237, "step": 8387 }, { "epoch": 0.92, "grad_norm": 0.32704181356998224, "learning_rate": 6.487368502003821e-07, "loss": 0.0248, "step": 8388 }, { "epoch": 0.92, "grad_norm": 0.1808261144551506, "learning_rate": 6.469408412527656e-07, "loss": 0.0333, "step": 8389 }, { "epoch": 0.92, "grad_norm": 0.2132033492269207, "learning_rate": 6.451472809806958e-07, "loss": 0.0214, "step": 8390 }, { "epoch": 0.92, "grad_norm": 0.21403256645583105, "learning_rate": 6.433561696111046e-07, "loss": 0.0216, "step": 8391 }, { "epoch": 0.92, "grad_norm": 0.198098821429767, "learning_rate": 6.415675073706174e-07, "loss": 0.0249, "step": 8392 }, { "epoch": 0.92, "grad_norm": 0.22633907383806465, "learning_rate": 6.397812944855464e-07, "loss": 0.0411, "step": 8393 }, { "epoch": 0.92, "grad_norm": 0.22249488671995937, "learning_rate": 6.379975311818931e-07, "loss": 0.0456, "step": 8394 }, { "epoch": 0.92, "grad_norm": 0.16418217653706288, "learning_rate": 6.362162176853548e-07, "loss": 0.0227, "step": 8395 }, { "epoch": 0.92, "grad_norm": 0.21776450784619336, "learning_rate": 6.344373542213112e-07, "loss": 0.0319, "step": 8396 }, { "epoch": 0.92, "grad_norm": 0.21109391523759927, "learning_rate": 6.326609410148354e-07, "loss": 0.0262, "step": 8397 }, { "epoch": 0.92, "grad_norm": 0.1787576041238118, "learning_rate": 6.308869782906946e-07, "loss": 0.0171, "step": 8398 }, { "epoch": 0.92, "grad_norm": 0.20020439313437643, "learning_rate": 6.291154662733379e-07, "loss": 0.0226, "step": 8399 }, { "epoch": 0.92, "grad_norm": 0.24270355710390398, "learning_rate": 6.27346405186915e-07, "loss": 0.0339, "step": 8400 }, { "epoch": 0.92, "grad_norm": 0.1993100387251412, "learning_rate": 6.255797952552511e-07, "loss": 0.0214, "step": 8401 }, { "epoch": 0.92, "grad_norm": 0.25237342612733243, "learning_rate": 6.238156367018744e-07, "loss": 0.0428, "step": 8402 }, { "epoch": 0.92, "grad_norm": 0.20536148839700594, "learning_rate": 6.22053929749995e-07, "loss": 0.031, "step": 8403 }, { "epoch": 0.92, "grad_norm": 0.228785152105541, "learning_rate": 6.202946746225191e-07, "loss": 0.0433, "step": 8404 }, { "epoch": 0.92, "grad_norm": 0.20532077784231703, "learning_rate": 6.185378715420331e-07, "loss": 0.0273, "step": 8405 }, { "epoch": 0.92, "grad_norm": 0.1828695277577136, "learning_rate": 6.16783520730826e-07, "loss": 0.0203, "step": 8406 }, { "epoch": 0.92, "grad_norm": 0.19333701534265987, "learning_rate": 6.150316224108643e-07, "loss": 0.0418, "step": 8407 }, { "epoch": 0.92, "grad_norm": 0.20727168822379813, "learning_rate": 6.132821768038133e-07, "loss": 0.0558, "step": 8408 }, { "epoch": 0.92, "grad_norm": 0.17836172870113923, "learning_rate": 6.115351841310224e-07, "loss": 0.0252, "step": 8409 }, { "epoch": 0.92, "grad_norm": 0.1579723146170363, "learning_rate": 6.097906446135349e-07, "loss": 0.0266, "step": 8410 }, { "epoch": 0.92, "grad_norm": 0.24195380626184704, "learning_rate": 6.080485584720808e-07, "loss": 0.0278, "step": 8411 }, { "epoch": 0.92, "grad_norm": 0.24683755156020554, "learning_rate": 6.063089259270749e-07, "loss": 0.0331, "step": 8412 }, { "epoch": 0.92, "grad_norm": 0.20206476405616014, "learning_rate": 6.045717471986345e-07, "loss": 0.0304, "step": 8413 }, { "epoch": 0.92, "grad_norm": 0.1552739053395259, "learning_rate": 6.028370225065527e-07, "loss": 0.025, "step": 8414 }, { "epoch": 0.92, "grad_norm": 0.20924453941065727, "learning_rate": 6.011047520703228e-07, "loss": 0.0242, "step": 8415 }, { "epoch": 0.92, "grad_norm": 0.1827767842608636, "learning_rate": 5.993749361091206e-07, "loss": 0.0279, "step": 8416 }, { "epoch": 0.92, "grad_norm": 0.19183940920817746, "learning_rate": 5.976475748418131e-07, "loss": 0.0295, "step": 8417 }, { "epoch": 0.92, "grad_norm": 0.16988337288854205, "learning_rate": 5.95922668486959e-07, "loss": 0.0214, "step": 8418 }, { "epoch": 0.92, "grad_norm": 0.17770318396897788, "learning_rate": 5.942002172628058e-07, "loss": 0.0284, "step": 8419 }, { "epoch": 0.92, "grad_norm": 0.20335425680770988, "learning_rate": 5.92480221387286e-07, "loss": 0.0319, "step": 8420 }, { "epoch": 0.92, "grad_norm": 0.20325146003083785, "learning_rate": 5.907626810780276e-07, "loss": 0.0237, "step": 8421 }, { "epoch": 0.92, "grad_norm": 0.20049094724671024, "learning_rate": 5.890475965523412e-07, "loss": 0.0222, "step": 8422 }, { "epoch": 0.92, "grad_norm": 0.24296646055755877, "learning_rate": 5.873349680272356e-07, "loss": 0.0301, "step": 8423 }, { "epoch": 0.93, "grad_norm": 0.19479930211638352, "learning_rate": 5.856247957193995e-07, "loss": 0.0263, "step": 8424 }, { "epoch": 0.93, "grad_norm": 0.2058500656976333, "learning_rate": 5.839170798452154e-07, "loss": 0.0475, "step": 8425 }, { "epoch": 0.93, "grad_norm": 0.1690838637048341, "learning_rate": 5.822118206207594e-07, "loss": 0.0207, "step": 8426 }, { "epoch": 0.93, "grad_norm": 0.2052925623614891, "learning_rate": 5.805090182617878e-07, "loss": 0.0147, "step": 8427 }, { "epoch": 0.93, "grad_norm": 0.26213314215839717, "learning_rate": 5.788086729837505e-07, "loss": 0.0251, "step": 8428 }, { "epoch": 0.93, "grad_norm": 0.22363364540178207, "learning_rate": 5.771107850017865e-07, "loss": 0.0355, "step": 8429 }, { "epoch": 0.93, "grad_norm": 0.25182848072447, "learning_rate": 5.754153545307262e-07, "loss": 0.0399, "step": 8430 }, { "epoch": 0.93, "grad_norm": 0.19509452100115968, "learning_rate": 5.737223817850845e-07, "loss": 0.031, "step": 8431 }, { "epoch": 0.93, "grad_norm": 0.22119743557895566, "learning_rate": 5.720318669790636e-07, "loss": 0.0248, "step": 8432 }, { "epoch": 0.93, "grad_norm": 0.18563328788699343, "learning_rate": 5.703438103265657e-07, "loss": 0.0345, "step": 8433 }, { "epoch": 0.93, "grad_norm": 0.21678475182806214, "learning_rate": 5.68658212041171e-07, "loss": 0.0338, "step": 8434 }, { "epoch": 0.93, "grad_norm": 0.2545525760978105, "learning_rate": 5.669750723361533e-07, "loss": 0.0231, "step": 8435 }, { "epoch": 0.93, "grad_norm": 0.1840065717974786, "learning_rate": 5.652943914244713e-07, "loss": 0.018, "step": 8436 }, { "epoch": 0.93, "grad_norm": 0.1961086639872842, "learning_rate": 5.636161695187792e-07, "loss": 0.0271, "step": 8437 }, { "epoch": 0.93, "grad_norm": 0.20151802947479283, "learning_rate": 5.61940406831416e-07, "loss": 0.0312, "step": 8438 }, { "epoch": 0.93, "grad_norm": 0.21139075008842315, "learning_rate": 5.602671035744123e-07, "loss": 0.0322, "step": 8439 }, { "epoch": 0.93, "grad_norm": 0.3327968522096054, "learning_rate": 5.585962599594807e-07, "loss": 0.0342, "step": 8440 }, { "epoch": 0.93, "grad_norm": 0.17186633543369212, "learning_rate": 5.569278761980301e-07, "loss": 0.0172, "step": 8441 }, { "epoch": 0.93, "grad_norm": 0.24979978232977448, "learning_rate": 5.552619525011538e-07, "loss": 0.0253, "step": 8442 }, { "epoch": 0.93, "grad_norm": 0.23240423536280208, "learning_rate": 5.535984890796365e-07, "loss": 0.0196, "step": 8443 }, { "epoch": 0.93, "grad_norm": 0.20138871441391443, "learning_rate": 5.519374861439497e-07, "loss": 0.0474, "step": 8444 }, { "epoch": 0.93, "grad_norm": 0.3246919671486572, "learning_rate": 5.502789439042566e-07, "loss": 0.0424, "step": 8445 }, { "epoch": 0.93, "grad_norm": 0.20651024814003943, "learning_rate": 5.486228625704049e-07, "loss": 0.0429, "step": 8446 }, { "epoch": 0.93, "grad_norm": 0.24252174754304018, "learning_rate": 5.469692423519335e-07, "loss": 0.0347, "step": 8447 }, { "epoch": 0.93, "grad_norm": 0.20464762961848362, "learning_rate": 5.453180834580663e-07, "loss": 0.0232, "step": 8448 }, { "epoch": 0.93, "grad_norm": 0.27053622155139706, "learning_rate": 5.436693860977227e-07, "loss": 0.0483, "step": 8449 }, { "epoch": 0.93, "grad_norm": 0.18661866079168807, "learning_rate": 5.42023150479507e-07, "loss": 0.0254, "step": 8450 }, { "epoch": 0.93, "grad_norm": 0.22425238393602995, "learning_rate": 5.40379376811706e-07, "loss": 0.0322, "step": 8451 }, { "epoch": 0.93, "grad_norm": 0.25532887330426185, "learning_rate": 5.387380653023066e-07, "loss": 0.0364, "step": 8452 }, { "epoch": 0.93, "grad_norm": 0.14500897301665297, "learning_rate": 5.37099216158976e-07, "loss": 0.019, "step": 8453 }, { "epoch": 0.93, "grad_norm": 0.183656620867129, "learning_rate": 5.354628295890729e-07, "loss": 0.0283, "step": 8454 }, { "epoch": 0.93, "grad_norm": 0.18354898272195813, "learning_rate": 5.338289057996426e-07, "loss": 0.0306, "step": 8455 }, { "epoch": 0.93, "grad_norm": 0.19892808803293913, "learning_rate": 5.321974449974198e-07, "loss": 0.0208, "step": 8456 }, { "epoch": 0.93, "grad_norm": 0.20859031424389596, "learning_rate": 5.305684473888284e-07, "loss": 0.0304, "step": 8457 }, { "epoch": 0.93, "grad_norm": 0.17360973569537094, "learning_rate": 5.289419131799811e-07, "loss": 0.0235, "step": 8458 }, { "epoch": 0.93, "grad_norm": 0.2081914816171872, "learning_rate": 5.273178425766734e-07, "loss": 0.0158, "step": 8459 }, { "epoch": 0.93, "grad_norm": 0.18857801009821112, "learning_rate": 5.256962357843942e-07, "loss": 0.0275, "step": 8460 }, { "epoch": 0.93, "grad_norm": 0.1888262514014326, "learning_rate": 5.240770930083261e-07, "loss": 0.0263, "step": 8461 }, { "epoch": 0.93, "grad_norm": 0.21525795629131933, "learning_rate": 5.224604144533274e-07, "loss": 0.033, "step": 8462 }, { "epoch": 0.93, "grad_norm": 0.2398874522600459, "learning_rate": 5.208462003239522e-07, "loss": 0.0323, "step": 8463 }, { "epoch": 0.93, "grad_norm": 0.18715514815753465, "learning_rate": 5.192344508244418e-07, "loss": 0.0257, "step": 8464 }, { "epoch": 0.93, "grad_norm": 0.18205498020882127, "learning_rate": 5.176251661587262e-07, "loss": 0.0334, "step": 8465 }, { "epoch": 0.93, "grad_norm": 0.25190873457328483, "learning_rate": 5.160183465304203e-07, "loss": 0.0454, "step": 8466 }, { "epoch": 0.93, "grad_norm": 0.2646369931607562, "learning_rate": 5.144139921428303e-07, "loss": 0.0227, "step": 8467 }, { "epoch": 0.93, "grad_norm": 0.1727333095384827, "learning_rate": 5.128121031989497e-07, "loss": 0.0177, "step": 8468 }, { "epoch": 0.93, "grad_norm": 0.15503818756802384, "learning_rate": 5.112126799014628e-07, "loss": 0.0204, "step": 8469 }, { "epoch": 0.93, "grad_norm": 0.22728875927299597, "learning_rate": 5.096157224527343e-07, "loss": 0.0333, "step": 8470 }, { "epoch": 0.93, "grad_norm": 0.38961350133778766, "learning_rate": 5.080212310548249e-07, "loss": 0.0477, "step": 8471 }, { "epoch": 0.93, "grad_norm": 0.15363352667019123, "learning_rate": 5.0642920590948e-07, "loss": 0.0167, "step": 8472 }, { "epoch": 0.93, "grad_norm": 0.2225463495573057, "learning_rate": 5.048396472181316e-07, "loss": 0.0289, "step": 8473 }, { "epoch": 0.93, "grad_norm": 0.18319043032132049, "learning_rate": 5.032525551819012e-07, "loss": 0.0254, "step": 8474 }, { "epoch": 0.93, "grad_norm": 0.17581090676948313, "learning_rate": 5.01667930001597e-07, "loss": 0.0171, "step": 8475 }, { "epoch": 0.93, "grad_norm": 0.26942419607455437, "learning_rate": 5.000857718777186e-07, "loss": 0.0497, "step": 8476 }, { "epoch": 0.93, "grad_norm": 0.2394588248864191, "learning_rate": 4.985060810104503e-07, "loss": 0.0389, "step": 8477 }, { "epoch": 0.93, "grad_norm": 0.24034996663318114, "learning_rate": 4.969288575996656e-07, "loss": 0.0446, "step": 8478 }, { "epoch": 0.93, "grad_norm": 0.24040554120608754, "learning_rate": 4.953541018449226e-07, "loss": 0.0284, "step": 8479 }, { "epoch": 0.93, "grad_norm": 0.27821203434472375, "learning_rate": 4.937818139454709e-07, "loss": 0.0318, "step": 8480 }, { "epoch": 0.93, "grad_norm": 0.40424619347803703, "learning_rate": 4.922119941002512e-07, "loss": 0.0592, "step": 8481 }, { "epoch": 0.93, "grad_norm": 0.26562739819958725, "learning_rate": 4.906446425078782e-07, "loss": 0.0239, "step": 8482 }, { "epoch": 0.93, "grad_norm": 0.186545605365763, "learning_rate": 4.890797593666708e-07, "loss": 0.0273, "step": 8483 }, { "epoch": 0.93, "grad_norm": 0.24292673839417397, "learning_rate": 4.87517344874624e-07, "loss": 0.037, "step": 8484 }, { "epoch": 0.93, "grad_norm": 0.17916370840365747, "learning_rate": 4.859573992294309e-07, "loss": 0.0211, "step": 8485 }, { "epoch": 0.93, "grad_norm": 0.20205597905386824, "learning_rate": 4.843999226284579e-07, "loss": 0.0248, "step": 8486 }, { "epoch": 0.93, "grad_norm": 0.23498005650487097, "learning_rate": 4.82844915268772e-07, "loss": 0.0442, "step": 8487 }, { "epoch": 0.93, "grad_norm": 0.18656126108189497, "learning_rate": 4.812923773471201e-07, "loss": 0.0353, "step": 8488 }, { "epoch": 0.93, "grad_norm": 0.1672313940058219, "learning_rate": 4.797423090599451e-07, "loss": 0.0177, "step": 8489 }, { "epoch": 0.93, "grad_norm": 0.19812775113575354, "learning_rate": 4.781947106033635e-07, "loss": 0.0171, "step": 8490 }, { "epoch": 0.93, "grad_norm": 0.22391539631522134, "learning_rate": 4.7664958217319425e-07, "loss": 0.032, "step": 8491 }, { "epoch": 0.93, "grad_norm": 0.155685418074848, "learning_rate": 4.7510692396493197e-07, "loss": 0.0249, "step": 8492 }, { "epoch": 0.93, "grad_norm": 0.22012540434670774, "learning_rate": 4.735667361737717e-07, "loss": 0.0342, "step": 8493 }, { "epoch": 0.93, "grad_norm": 0.21301856265144178, "learning_rate": 4.720290189945775e-07, "loss": 0.0229, "step": 8494 }, { "epoch": 0.93, "grad_norm": 0.21050044097990114, "learning_rate": 4.704937726219183e-07, "loss": 0.0269, "step": 8495 }, { "epoch": 0.93, "grad_norm": 0.18882153619106684, "learning_rate": 4.689609972500453e-07, "loss": 0.0259, "step": 8496 }, { "epoch": 0.93, "grad_norm": 0.1876996983582365, "learning_rate": 4.6743069307289e-07, "loss": 0.0215, "step": 8497 }, { "epoch": 0.93, "grad_norm": 0.28610841302052187, "learning_rate": 4.659028602840776e-07, "loss": 0.0275, "step": 8498 }, { "epoch": 0.93, "grad_norm": 0.21757398974668332, "learning_rate": 4.643774990769201e-07, "loss": 0.0392, "step": 8499 }, { "epoch": 0.93, "grad_norm": 0.22618732959607027, "learning_rate": 4.628546096444186e-07, "loss": 0.0445, "step": 8500 }, { "epoch": 0.93, "grad_norm": 0.2897951146987235, "learning_rate": 4.613341921792569e-07, "loss": 0.033, "step": 8501 }, { "epoch": 0.93, "grad_norm": 0.16572747748964892, "learning_rate": 4.5981624687380764e-07, "loss": 0.0185, "step": 8502 }, { "epoch": 0.93, "grad_norm": 0.21338993978016757, "learning_rate": 4.5830077392013064e-07, "loss": 0.0451, "step": 8503 }, { "epoch": 0.93, "grad_norm": 0.24637319956372747, "learning_rate": 4.567877735099768e-07, "loss": 0.0319, "step": 8504 }, { "epoch": 0.93, "grad_norm": 0.18817690019328517, "learning_rate": 4.5527724583477537e-07, "loss": 0.0316, "step": 8505 }, { "epoch": 0.93, "grad_norm": 0.20283618898864303, "learning_rate": 4.5376919108565345e-07, "loss": 0.0427, "step": 8506 }, { "epoch": 0.93, "grad_norm": 0.1921731900881123, "learning_rate": 4.5226360945341607e-07, "loss": 0.0194, "step": 8507 }, { "epoch": 0.93, "grad_norm": 0.2673352245842023, "learning_rate": 4.507605011285643e-07, "loss": 0.0419, "step": 8508 }, { "epoch": 0.93, "grad_norm": 0.18281969276363283, "learning_rate": 4.4925986630127705e-07, "loss": 0.0346, "step": 8509 }, { "epoch": 0.93, "grad_norm": 0.2469516966988744, "learning_rate": 4.477617051614225e-07, "loss": 0.0225, "step": 8510 }, { "epoch": 0.93, "grad_norm": 0.2507970293969189, "learning_rate": 4.462660178985623e-07, "loss": 0.0474, "step": 8511 }, { "epoch": 0.93, "grad_norm": 0.23158095203916151, "learning_rate": 4.4477280470194064e-07, "loss": 0.0295, "step": 8512 }, { "epoch": 0.93, "grad_norm": 0.20232670981041637, "learning_rate": 4.4328206576048414e-07, "loss": 0.0372, "step": 8513 }, { "epoch": 0.93, "grad_norm": 0.16015584294337204, "learning_rate": 4.4179380126281533e-07, "loss": 0.0122, "step": 8514 }, { "epoch": 0.94, "grad_norm": 0.23909338147034398, "learning_rate": 4.403080113972369e-07, "loss": 0.0466, "step": 8515 }, { "epoch": 0.94, "grad_norm": 0.21879980678292657, "learning_rate": 4.3882469635174287e-07, "loss": 0.037, "step": 8516 }, { "epoch": 0.94, "grad_norm": 0.18356716694027975, "learning_rate": 4.3734385631400976e-07, "loss": 0.0258, "step": 8517 }, { "epoch": 0.94, "grad_norm": 0.1950312114765048, "learning_rate": 4.358654914714033e-07, "loss": 0.0435, "step": 8518 }, { "epoch": 0.94, "grad_norm": 0.22106834171267492, "learning_rate": 4.343896020109739e-07, "loss": 0.0489, "step": 8519 }, { "epoch": 0.94, "grad_norm": 0.2582378058291895, "learning_rate": 4.329161881194677e-07, "loss": 0.0229, "step": 8520 }, { "epoch": 0.94, "grad_norm": 0.24726344464683137, "learning_rate": 4.314452499833044e-07, "loss": 0.0359, "step": 8521 }, { "epoch": 0.94, "grad_norm": 0.17501671149297438, "learning_rate": 4.299767877885974e-07, "loss": 0.0254, "step": 8522 }, { "epoch": 0.94, "grad_norm": 0.19869288058583753, "learning_rate": 4.2851080172114703e-07, "loss": 0.0402, "step": 8523 }, { "epoch": 0.94, "grad_norm": 0.15341339808696702, "learning_rate": 4.270472919664426e-07, "loss": 0.0237, "step": 8524 }, { "epoch": 0.94, "grad_norm": 0.20991070168576212, "learning_rate": 4.255862587096493e-07, "loss": 0.0424, "step": 8525 }, { "epoch": 0.94, "grad_norm": 0.19770893480378912, "learning_rate": 4.241277021356327e-07, "loss": 0.0196, "step": 8526 }, { "epoch": 0.94, "grad_norm": 0.2590692694576773, "learning_rate": 4.2267162242893846e-07, "loss": 0.0291, "step": 8527 }, { "epoch": 0.94, "grad_norm": 0.22832070702224205, "learning_rate": 4.212180197737992e-07, "loss": 0.0292, "step": 8528 }, { "epoch": 0.94, "grad_norm": 0.22958031443522428, "learning_rate": 4.1976689435413e-07, "loss": 0.0307, "step": 8529 }, { "epoch": 0.94, "grad_norm": 0.17204829712753653, "learning_rate": 4.183182463535418e-07, "loss": 0.0242, "step": 8530 }, { "epoch": 0.94, "grad_norm": 0.1848489081694108, "learning_rate": 4.1687207595532575e-07, "loss": 0.0214, "step": 8531 }, { "epoch": 0.94, "grad_norm": 0.19685404870404544, "learning_rate": 4.1542838334245994e-07, "loss": 0.0302, "step": 8532 }, { "epoch": 0.94, "grad_norm": 0.22808550433970154, "learning_rate": 4.1398716869760936e-07, "loss": 0.0261, "step": 8533 }, { "epoch": 0.94, "grad_norm": 0.21967224297376345, "learning_rate": 4.1254843220312814e-07, "loss": 0.0285, "step": 8534 }, { "epoch": 0.94, "grad_norm": 0.18656843950730673, "learning_rate": 4.1111217404105286e-07, "loss": 0.0321, "step": 8535 }, { "epoch": 0.94, "grad_norm": 0.20425766811194307, "learning_rate": 4.096783943931093e-07, "loss": 0.035, "step": 8536 }, { "epoch": 0.94, "grad_norm": 0.2378283010313845, "learning_rate": 4.082470934407057e-07, "loss": 0.021, "step": 8537 }, { "epoch": 0.94, "grad_norm": 0.24951705628301746, "learning_rate": 4.0681827136494157e-07, "loss": 0.04, "step": 8538 }, { "epoch": 0.94, "grad_norm": 0.27596928289252065, "learning_rate": 4.0539192834660346e-07, "loss": 0.0257, "step": 8539 }, { "epoch": 0.94, "grad_norm": 0.16925648604013438, "learning_rate": 4.039680645661581e-07, "loss": 0.0287, "step": 8540 }, { "epoch": 0.94, "grad_norm": 0.19812483673209313, "learning_rate": 4.0254668020376366e-07, "loss": 0.0308, "step": 8541 }, { "epoch": 0.94, "grad_norm": 0.2352735540499613, "learning_rate": 4.011277754392606e-07, "loss": 0.0418, "step": 8542 }, { "epoch": 0.94, "grad_norm": 0.18347693846292293, "learning_rate": 3.9971135045218324e-07, "loss": 0.03, "step": 8543 }, { "epoch": 0.94, "grad_norm": 0.25769029957936523, "learning_rate": 3.9829740542174143e-07, "loss": 0.0385, "step": 8544 }, { "epoch": 0.94, "grad_norm": 0.17769172701737182, "learning_rate": 3.968859405268388e-07, "loss": 0.022, "step": 8545 }, { "epoch": 0.94, "grad_norm": 0.22493435113880472, "learning_rate": 3.954769559460614e-07, "loss": 0.0245, "step": 8546 }, { "epoch": 0.94, "grad_norm": 0.2125566031998893, "learning_rate": 3.9407045185768653e-07, "loss": 0.0253, "step": 8547 }, { "epoch": 0.94, "grad_norm": 0.2063302732801414, "learning_rate": 3.9266642843967415e-07, "loss": 0.0318, "step": 8548 }, { "epoch": 0.94, "grad_norm": 0.1527114002089824, "learning_rate": 3.912648858696666e-07, "loss": 0.0164, "step": 8549 }, { "epoch": 0.94, "grad_norm": 0.16440860254344405, "learning_rate": 3.8986582432500196e-07, "loss": 0.0128, "step": 8550 }, { "epoch": 0.94, "grad_norm": 0.24528603988132788, "learning_rate": 3.8846924398269426e-07, "loss": 0.032, "step": 8551 }, { "epoch": 0.94, "grad_norm": 0.15986126389858904, "learning_rate": 3.8707514501944657e-07, "loss": 0.0219, "step": 8552 }, { "epoch": 0.94, "grad_norm": 0.27993088899621643, "learning_rate": 3.8568352761165327e-07, "loss": 0.0399, "step": 8553 }, { "epoch": 0.94, "grad_norm": 0.20659471769889556, "learning_rate": 3.842943919353914e-07, "loss": 0.0279, "step": 8554 }, { "epoch": 0.94, "grad_norm": 0.16415103725905345, "learning_rate": 3.8290773816642035e-07, "loss": 0.0238, "step": 8555 }, { "epoch": 0.94, "grad_norm": 0.21029844436893214, "learning_rate": 3.815235664801908e-07, "loss": 0.0324, "step": 8556 }, { "epoch": 0.94, "grad_norm": 0.19070711712251118, "learning_rate": 3.801418770518339e-07, "loss": 0.0334, "step": 8557 }, { "epoch": 0.94, "grad_norm": 0.1516780123052012, "learning_rate": 3.787626700561742e-07, "loss": 0.0232, "step": 8558 }, { "epoch": 0.94, "grad_norm": 0.2401636376172256, "learning_rate": 3.7738594566771647e-07, "loss": 0.023, "step": 8559 }, { "epoch": 0.94, "grad_norm": 0.2239835894929799, "learning_rate": 3.7601170406065034e-07, "loss": 0.0263, "step": 8560 }, { "epoch": 0.94, "grad_norm": 0.24939471224266604, "learning_rate": 3.74639945408859e-07, "loss": 0.019, "step": 8561 }, { "epoch": 0.94, "grad_norm": 0.1822073412179125, "learning_rate": 3.732706698859012e-07, "loss": 0.0352, "step": 8562 }, { "epoch": 0.94, "grad_norm": 0.15737887521734142, "learning_rate": 3.719038776650297e-07, "loss": 0.0187, "step": 8563 }, { "epoch": 0.94, "grad_norm": 0.2297675246361551, "learning_rate": 3.705395689191771e-07, "loss": 0.0395, "step": 8564 }, { "epoch": 0.94, "grad_norm": 0.19974198536794188, "learning_rate": 3.6917774382096984e-07, "loss": 0.0325, "step": 8565 }, { "epoch": 0.94, "grad_norm": 0.22086383234848803, "learning_rate": 3.6781840254271227e-07, "loss": 0.0256, "step": 8566 }, { "epoch": 0.94, "grad_norm": 0.19444837145207994, "learning_rate": 3.6646154525639354e-07, "loss": 0.0344, "step": 8567 }, { "epoch": 0.94, "grad_norm": 0.19867574327237914, "learning_rate": 3.651071721336963e-07, "loss": 0.0269, "step": 8568 }, { "epoch": 0.94, "grad_norm": 0.2885035962536822, "learning_rate": 3.6375528334598343e-07, "loss": 0.041, "step": 8569 }, { "epoch": 0.94, "grad_norm": 0.14988696612780852, "learning_rate": 3.62405879064307e-07, "loss": 0.0123, "step": 8570 }, { "epoch": 0.94, "grad_norm": 0.19290906315094386, "learning_rate": 3.610589594593994e-07, "loss": 0.0266, "step": 8571 }, { "epoch": 0.94, "grad_norm": 0.20667600743433642, "learning_rate": 3.59714524701682e-07, "loss": 0.0242, "step": 8572 }, { "epoch": 0.94, "grad_norm": 0.3196135314730812, "learning_rate": 3.5837257496126097e-07, "loss": 0.041, "step": 8573 }, { "epoch": 0.94, "grad_norm": 0.2103664651477876, "learning_rate": 3.5703311040793167e-07, "loss": 0.0246, "step": 8574 }, { "epoch": 0.94, "grad_norm": 0.27069232858593484, "learning_rate": 3.5569613121116953e-07, "loss": 0.0299, "step": 8575 }, { "epoch": 0.94, "grad_norm": 0.18641511657808196, "learning_rate": 3.543616375401393e-07, "loss": 0.0277, "step": 8576 }, { "epoch": 0.94, "grad_norm": 0.16748263744140146, "learning_rate": 3.5302962956368593e-07, "loss": 0.0205, "step": 8577 }, { "epoch": 0.94, "grad_norm": 0.21971879733457722, "learning_rate": 3.517001074503501e-07, "loss": 0.0397, "step": 8578 }, { "epoch": 0.94, "grad_norm": 0.21462903130288402, "learning_rate": 3.503730713683462e-07, "loss": 0.0295, "step": 8579 }, { "epoch": 0.94, "grad_norm": 0.24341620735315356, "learning_rate": 3.490485214855799e-07, "loss": 0.0431, "step": 8580 }, { "epoch": 0.94, "grad_norm": 0.17002344932182756, "learning_rate": 3.4772645796964824e-07, "loss": 0.0352, "step": 8581 }, { "epoch": 0.94, "grad_norm": 0.23617304292794486, "learning_rate": 3.464068809878196e-07, "loss": 0.0203, "step": 8582 }, { "epoch": 0.94, "grad_norm": 0.33190738387076246, "learning_rate": 3.4508979070705827e-07, "loss": 0.0404, "step": 8583 }, { "epoch": 0.94, "grad_norm": 0.24128678708441745, "learning_rate": 3.4377518729401317e-07, "loss": 0.0384, "step": 8584 }, { "epoch": 0.94, "grad_norm": 0.17993066753812728, "learning_rate": 3.4246307091501563e-07, "loss": 0.0229, "step": 8585 }, { "epoch": 0.94, "grad_norm": 0.20509025901137862, "learning_rate": 3.4115344173607957e-07, "loss": 0.0239, "step": 8586 }, { "epoch": 0.94, "grad_norm": 0.17688234694760688, "learning_rate": 3.398462999229124e-07, "loss": 0.0321, "step": 8587 }, { "epoch": 0.94, "grad_norm": 0.23480918721310948, "learning_rate": 3.3854164564089964e-07, "loss": 0.0341, "step": 8588 }, { "epoch": 0.94, "grad_norm": 0.18343105615153737, "learning_rate": 3.372394790551159e-07, "loss": 0.0259, "step": 8589 }, { "epoch": 0.94, "grad_norm": 0.36601158310847753, "learning_rate": 3.359398003303183e-07, "loss": 0.0327, "step": 8590 }, { "epoch": 0.94, "grad_norm": 0.20515207138222125, "learning_rate": 3.3464260963095296e-07, "loss": 0.0294, "step": 8591 }, { "epoch": 0.94, "grad_norm": 0.1989280973963484, "learning_rate": 3.333479071211465e-07, "loss": 0.0334, "step": 8592 }, { "epoch": 0.94, "grad_norm": 0.21324569943833158, "learning_rate": 3.3205569296471675e-07, "loss": 0.0415, "step": 8593 }, { "epoch": 0.94, "grad_norm": 0.31504375362037146, "learning_rate": 3.307659673251595e-07, "loss": 0.0346, "step": 8594 }, { "epoch": 0.94, "grad_norm": 0.21849175945800248, "learning_rate": 3.294787303656599e-07, "loss": 0.0453, "step": 8595 }, { "epoch": 0.94, "grad_norm": 0.2272270584822395, "learning_rate": 3.281939822490876e-07, "loss": 0.0326, "step": 8596 }, { "epoch": 0.94, "grad_norm": 0.21703630405800234, "learning_rate": 3.2691172313799925e-07, "loss": 0.0224, "step": 8597 }, { "epoch": 0.94, "grad_norm": 0.16253392800690297, "learning_rate": 3.256319531946317e-07, "loss": 0.0143, "step": 8598 }, { "epoch": 0.94, "grad_norm": 0.23217377925035143, "learning_rate": 3.243546725809132e-07, "loss": 0.02, "step": 8599 }, { "epoch": 0.94, "grad_norm": 0.2077514967617739, "learning_rate": 3.230798814584502e-07, "loss": 0.0455, "step": 8600 }, { "epoch": 0.94, "grad_norm": 0.20955170960353578, "learning_rate": 3.218075799885423e-07, "loss": 0.0313, "step": 8601 }, { "epoch": 0.94, "grad_norm": 0.2041855142045526, "learning_rate": 3.2053776833216533e-07, "loss": 0.0286, "step": 8602 }, { "epoch": 0.94, "grad_norm": 0.2700238494803682, "learning_rate": 3.192704466499841e-07, "loss": 0.0167, "step": 8603 }, { "epoch": 0.94, "grad_norm": 0.2102352568155349, "learning_rate": 3.1800561510234805e-07, "loss": 0.0269, "step": 8604 }, { "epoch": 0.94, "grad_norm": 0.20601155521161596, "learning_rate": 3.1674327384929593e-07, "loss": 0.0362, "step": 8605 }, { "epoch": 0.95, "grad_norm": 0.17974835899357347, "learning_rate": 3.1548342305054435e-07, "loss": 0.0256, "step": 8606 }, { "epoch": 0.95, "grad_norm": 0.18722919935808596, "learning_rate": 3.1422606286549915e-07, "loss": 0.0336, "step": 8607 }, { "epoch": 0.95, "grad_norm": 0.20983967495197475, "learning_rate": 3.1297119345324645e-07, "loss": 0.0246, "step": 8608 }, { "epoch": 0.95, "grad_norm": 0.3352326383881315, "learning_rate": 3.1171881497256585e-07, "loss": 0.0526, "step": 8609 }, { "epoch": 0.95, "grad_norm": 0.18821854793810713, "learning_rate": 3.104689275819128e-07, "loss": 0.0172, "step": 8610 }, { "epoch": 0.95, "grad_norm": 0.21629474673071145, "learning_rate": 3.09221531439432e-07, "loss": 0.0196, "step": 8611 }, { "epoch": 0.95, "grad_norm": 0.17038020441862262, "learning_rate": 3.079766267029527e-07, "loss": 0.024, "step": 8612 }, { "epoch": 0.95, "grad_norm": 0.18133856155359315, "learning_rate": 3.067342135299867e-07, "loss": 0.0275, "step": 8613 }, { "epoch": 0.95, "grad_norm": 0.1653726918693068, "learning_rate": 3.0549429207773483e-07, "loss": 0.0167, "step": 8614 }, { "epoch": 0.95, "grad_norm": 0.21832684904187177, "learning_rate": 3.0425686250307616e-07, "loss": 0.0261, "step": 8615 }, { "epoch": 0.95, "grad_norm": 0.19437140711379983, "learning_rate": 3.030219249625854e-07, "loss": 0.0198, "step": 8616 }, { "epoch": 0.95, "grad_norm": 0.25258798973484187, "learning_rate": 3.017894796125065e-07, "loss": 0.0347, "step": 8617 }, { "epoch": 0.95, "grad_norm": 0.22867072783802445, "learning_rate": 3.005595266087835e-07, "loss": 0.038, "step": 8618 }, { "epoch": 0.95, "grad_norm": 0.18944378466046882, "learning_rate": 2.9933206610703203e-07, "loss": 0.0235, "step": 8619 }, { "epoch": 0.95, "grad_norm": 0.20009014809980036, "learning_rate": 2.9810709826256557e-07, "loss": 0.0219, "step": 8620 }, { "epoch": 0.95, "grad_norm": 0.2026882795435912, "learning_rate": 2.9688462323036685e-07, "loss": 0.0316, "step": 8621 }, { "epoch": 0.95, "grad_norm": 0.277354052177682, "learning_rate": 2.956646411651165e-07, "loss": 0.0468, "step": 8622 }, { "epoch": 0.95, "grad_norm": 0.21367655928531423, "learning_rate": 2.944471522211756e-07, "loss": 0.0354, "step": 8623 }, { "epoch": 0.95, "grad_norm": 0.21471886906782237, "learning_rate": 2.932321565525853e-07, "loss": 0.0318, "step": 8624 }, { "epoch": 0.95, "grad_norm": 0.19926723189488613, "learning_rate": 2.920196543130782e-07, "loss": 0.0407, "step": 8625 }, { "epoch": 0.95, "grad_norm": 0.19702486888526335, "learning_rate": 2.9080964565606273e-07, "loss": 0.0305, "step": 8626 }, { "epoch": 0.95, "grad_norm": 0.24807929780772595, "learning_rate": 2.8960213073464305e-07, "loss": 0.0334, "step": 8627 }, { "epoch": 0.95, "grad_norm": 0.20870450254983694, "learning_rate": 2.883971097015992e-07, "loss": 0.0342, "step": 8628 }, { "epoch": 0.95, "grad_norm": 0.24114173792306193, "learning_rate": 2.8719458270939805e-07, "loss": 0.0391, "step": 8629 }, { "epoch": 0.95, "grad_norm": 0.18148611531796444, "learning_rate": 2.859945499101913e-07, "loss": 0.0223, "step": 8630 }, { "epoch": 0.95, "grad_norm": 0.20827598477839246, "learning_rate": 2.8479701145581296e-07, "loss": 0.0343, "step": 8631 }, { "epoch": 0.95, "grad_norm": 0.2023911516348676, "learning_rate": 2.8360196749778857e-07, "loss": 0.0317, "step": 8632 }, { "epoch": 0.95, "grad_norm": 0.19551306916600292, "learning_rate": 2.824094181873194e-07, "loss": 0.0379, "step": 8633 }, { "epoch": 0.95, "grad_norm": 0.2606711558767825, "learning_rate": 2.8121936367529357e-07, "loss": 0.0316, "step": 8634 }, { "epoch": 0.95, "grad_norm": 0.2046358880645793, "learning_rate": 2.800318041122885e-07, "loss": 0.0288, "step": 8635 }, { "epoch": 0.95, "grad_norm": 0.17812063354025826, "learning_rate": 2.788467396485595e-07, "loss": 0.0218, "step": 8636 }, { "epoch": 0.95, "grad_norm": 0.23556589521095353, "learning_rate": 2.776641704340466e-07, "loss": 0.0422, "step": 8637 }, { "epoch": 0.95, "grad_norm": 0.32835729641452743, "learning_rate": 2.7648409661837903e-07, "loss": 0.0512, "step": 8638 }, { "epoch": 0.95, "grad_norm": 0.2304422799766463, "learning_rate": 2.7530651835086854e-07, "loss": 0.0345, "step": 8639 }, { "epoch": 0.95, "grad_norm": 0.24438107665914258, "learning_rate": 2.7413143578050915e-07, "loss": 0.0198, "step": 8640 }, { "epoch": 0.95, "grad_norm": 0.25114189075764987, "learning_rate": 2.7295884905597536e-07, "loss": 0.0285, "step": 8641 }, { "epoch": 0.95, "grad_norm": 0.21279865868267142, "learning_rate": 2.7178875832563734e-07, "loss": 0.027, "step": 8642 }, { "epoch": 0.95, "grad_norm": 0.213098870364833, "learning_rate": 2.706211637375367e-07, "loss": 0.0276, "step": 8643 }, { "epoch": 0.95, "grad_norm": 0.20320541367347428, "learning_rate": 2.6945606543941073e-07, "loss": 0.0394, "step": 8644 }, { "epoch": 0.95, "grad_norm": 0.1389426019901279, "learning_rate": 2.682934635786727e-07, "loss": 0.0216, "step": 8645 }, { "epoch": 0.95, "grad_norm": 0.16812550083780672, "learning_rate": 2.671333583024205e-07, "loss": 0.0221, "step": 8646 }, { "epoch": 0.95, "grad_norm": 0.21070911264592893, "learning_rate": 2.659757497574411e-07, "loss": 0.038, "step": 8647 }, { "epoch": 0.95, "grad_norm": 0.1850226072887055, "learning_rate": 2.6482063809020186e-07, "loss": 0.03, "step": 8648 }, { "epoch": 0.95, "grad_norm": 0.17965256309873284, "learning_rate": 2.636680234468525e-07, "loss": 0.0183, "step": 8649 }, { "epoch": 0.95, "grad_norm": 0.18191220889335893, "learning_rate": 2.625179059732341e-07, "loss": 0.0264, "step": 8650 }, { "epoch": 0.95, "grad_norm": 0.18788445317844266, "learning_rate": 2.613702858148659e-07, "loss": 0.0307, "step": 8651 }, { "epoch": 0.95, "grad_norm": 0.22395485590667272, "learning_rate": 2.6022516311695166e-07, "loss": 0.0286, "step": 8652 }, { "epoch": 0.95, "grad_norm": 0.21229528601078365, "learning_rate": 2.5908253802437555e-07, "loss": 0.0295, "step": 8653 }, { "epoch": 0.95, "grad_norm": 0.3119257777240814, "learning_rate": 2.579424106817174e-07, "loss": 0.0517, "step": 8654 }, { "epoch": 0.95, "grad_norm": 0.22517244002819475, "learning_rate": 2.568047812332286e-07, "loss": 0.0373, "step": 8655 }, { "epoch": 0.95, "grad_norm": 0.24037222280103096, "learning_rate": 2.556696498228495e-07, "loss": 0.029, "step": 8656 }, { "epoch": 0.95, "grad_norm": 0.2562675278181715, "learning_rate": 2.5453701659420735e-07, "loss": 0.0327, "step": 8657 }, { "epoch": 0.95, "grad_norm": 0.2208832368755895, "learning_rate": 2.5340688169060767e-07, "loss": 0.0277, "step": 8658 }, { "epoch": 0.95, "grad_norm": 0.2539777801699454, "learning_rate": 2.522792452550449e-07, "loss": 0.048, "step": 8659 }, { "epoch": 0.95, "grad_norm": 0.21972674051913363, "learning_rate": 2.511541074301915e-07, "loss": 0.0177, "step": 8660 }, { "epoch": 0.95, "grad_norm": 0.4600370145760082, "learning_rate": 2.500314683584093e-07, "loss": 0.0516, "step": 8661 }, { "epoch": 0.95, "grad_norm": 0.1968085025322031, "learning_rate": 2.489113281817424e-07, "loss": 0.0348, "step": 8662 }, { "epoch": 0.95, "grad_norm": 0.19551116376475933, "learning_rate": 2.477936870419195e-07, "loss": 0.0358, "step": 8663 }, { "epoch": 0.95, "grad_norm": 0.17816695296492321, "learning_rate": 2.4667854508034774e-07, "loss": 0.0314, "step": 8664 }, { "epoch": 0.95, "grad_norm": 0.22978288622641532, "learning_rate": 2.4556590243812515e-07, "loss": 0.0308, "step": 8665 }, { "epoch": 0.95, "grad_norm": 0.21939392342419994, "learning_rate": 2.444557592560304e-07, "loss": 0.045, "step": 8666 }, { "epoch": 0.95, "grad_norm": 0.1737265611609018, "learning_rate": 2.433481156745243e-07, "loss": 0.0279, "step": 8667 }, { "epoch": 0.95, "grad_norm": 0.2204489890501506, "learning_rate": 2.4224297183375487e-07, "loss": 0.0262, "step": 8668 }, { "epoch": 0.95, "grad_norm": 0.2513867451396066, "learning_rate": 2.4114032787355246e-07, "loss": 0.03, "step": 8669 }, { "epoch": 0.95, "grad_norm": 0.23942818317294873, "learning_rate": 2.400401839334299e-07, "loss": 0.0339, "step": 8670 }, { "epoch": 0.95, "grad_norm": 0.1708324741035656, "learning_rate": 2.3894254015258467e-07, "loss": 0.0382, "step": 8671 }, { "epoch": 0.95, "grad_norm": 0.17016075049509563, "learning_rate": 2.378473966698991e-07, "loss": 0.0238, "step": 8672 }, { "epoch": 0.95, "grad_norm": 0.21863160164889495, "learning_rate": 2.3675475362393562e-07, "loss": 0.0197, "step": 8673 }, { "epoch": 0.95, "grad_norm": 0.17165372975588103, "learning_rate": 2.356646111529415e-07, "loss": 0.0202, "step": 8674 }, { "epoch": 0.95, "grad_norm": 0.18773788025020802, "learning_rate": 2.345769693948552e-07, "loss": 0.0235, "step": 8675 }, { "epoch": 0.95, "grad_norm": 0.22634589594585167, "learning_rate": 2.3349182848728447e-07, "loss": 0.0267, "step": 8676 }, { "epoch": 0.95, "grad_norm": 0.20798678582884628, "learning_rate": 2.324091885675328e-07, "loss": 0.0227, "step": 8677 }, { "epoch": 0.95, "grad_norm": 0.32563660152510937, "learning_rate": 2.3132904977258175e-07, "loss": 0.0439, "step": 8678 }, { "epoch": 0.95, "grad_norm": 0.2342346565947724, "learning_rate": 2.3025141223909975e-07, "loss": 0.0442, "step": 8679 }, { "epoch": 0.95, "grad_norm": 0.2857989554033083, "learning_rate": 2.291762761034333e-07, "loss": 0.0412, "step": 8680 }, { "epoch": 0.95, "grad_norm": 0.26594532557268624, "learning_rate": 2.2810364150161578e-07, "loss": 0.0381, "step": 8681 }, { "epoch": 0.95, "grad_norm": 0.22798662005815787, "learning_rate": 2.2703350856936534e-07, "loss": 0.0419, "step": 8682 }, { "epoch": 0.95, "grad_norm": 0.1707937355834813, "learning_rate": 2.2596587744208254e-07, "loss": 0.0179, "step": 8683 }, { "epoch": 0.95, "grad_norm": 0.20069499111064418, "learning_rate": 2.249007482548482e-07, "loss": 0.0395, "step": 8684 }, { "epoch": 0.95, "grad_norm": 0.1956326150173752, "learning_rate": 2.2383812114243453e-07, "loss": 0.0355, "step": 8685 }, { "epoch": 0.95, "grad_norm": 0.22086562866705764, "learning_rate": 2.2277799623928953e-07, "loss": 0.0353, "step": 8686 }, { "epoch": 0.95, "grad_norm": 0.2313041336903829, "learning_rate": 2.2172037367954368e-07, "loss": 0.0371, "step": 8687 }, { "epoch": 0.95, "grad_norm": 0.22586839505488646, "learning_rate": 2.2066525359701885e-07, "loss": 0.0445, "step": 8688 }, { "epoch": 0.95, "grad_norm": 0.16569625765622503, "learning_rate": 2.196126361252149e-07, "loss": 0.0277, "step": 8689 }, { "epoch": 0.95, "grad_norm": 0.22487159018505065, "learning_rate": 2.1856252139731637e-07, "loss": 0.0389, "step": 8690 }, { "epoch": 0.95, "grad_norm": 0.2215080605142478, "learning_rate": 2.1751490954618814e-07, "loss": 0.0398, "step": 8691 }, { "epoch": 0.95, "grad_norm": 0.23735139332230346, "learning_rate": 2.1646980070437973e-07, "loss": 0.0516, "step": 8692 }, { "epoch": 0.95, "grad_norm": 0.22935181240677988, "learning_rate": 2.154271950041298e-07, "loss": 0.0314, "step": 8693 }, { "epoch": 0.95, "grad_norm": 0.283157450182638, "learning_rate": 2.14387092577355e-07, "loss": 0.0368, "step": 8694 }, { "epoch": 0.95, "grad_norm": 0.19664583947198228, "learning_rate": 2.1334949355565237e-07, "loss": 0.0172, "step": 8695 }, { "epoch": 0.95, "grad_norm": 0.25732791036603486, "learning_rate": 2.1231439807031019e-07, "loss": 0.0384, "step": 8696 }, { "epoch": 0.96, "grad_norm": 0.1962740024084322, "learning_rate": 2.1128180625229033e-07, "loss": 0.0176, "step": 8697 }, { "epoch": 0.96, "grad_norm": 0.25764050670988675, "learning_rate": 2.102517182322483e-07, "loss": 0.0368, "step": 8698 }, { "epoch": 0.96, "grad_norm": 0.16427001998480692, "learning_rate": 2.0922413414051324e-07, "loss": 0.0132, "step": 8699 }, { "epoch": 0.96, "grad_norm": 0.2661667376381603, "learning_rate": 2.0819905410710327e-07, "loss": 0.0325, "step": 8700 }, { "epoch": 0.96, "grad_norm": 0.16565415917433382, "learning_rate": 2.0717647826171917e-07, "loss": 0.025, "step": 8701 }, { "epoch": 0.96, "grad_norm": 0.18639798963843018, "learning_rate": 2.0615640673374181e-07, "loss": 0.0202, "step": 8702 }, { "epoch": 0.96, "grad_norm": 0.17820527501462835, "learning_rate": 2.0513883965224136e-07, "loss": 0.0185, "step": 8703 }, { "epoch": 0.96, "grad_norm": 0.1558295015756042, "learning_rate": 2.0412377714596365e-07, "loss": 0.0206, "step": 8704 }, { "epoch": 0.96, "grad_norm": 0.22946891666683195, "learning_rate": 2.0311121934334155e-07, "loss": 0.026, "step": 8705 }, { "epoch": 0.96, "grad_norm": 0.22361841113766576, "learning_rate": 2.0210116637249032e-07, "loss": 0.0351, "step": 8706 }, { "epoch": 0.96, "grad_norm": 0.1860691550118275, "learning_rate": 2.0109361836121e-07, "loss": 0.0203, "step": 8707 }, { "epoch": 0.96, "grad_norm": 0.24734992387672888, "learning_rate": 2.0008857543698078e-07, "loss": 0.0283, "step": 8708 }, { "epoch": 0.96, "grad_norm": 0.16472088316185876, "learning_rate": 1.9908603772696988e-07, "loss": 0.0205, "step": 8709 }, { "epoch": 0.96, "grad_norm": 0.19457749349488115, "learning_rate": 1.9808600535802024e-07, "loss": 0.0249, "step": 8710 }, { "epoch": 0.96, "grad_norm": 0.24792848600268036, "learning_rate": 1.97088478456664e-07, "loss": 0.0307, "step": 8711 }, { "epoch": 0.96, "grad_norm": 0.16460276385384964, "learning_rate": 1.9609345714911575e-07, "loss": 0.0304, "step": 8712 }, { "epoch": 0.96, "grad_norm": 0.2817525215450733, "learning_rate": 1.9510094156127258e-07, "loss": 0.0319, "step": 8713 }, { "epoch": 0.96, "grad_norm": 0.21592069681929513, "learning_rate": 1.941109318187162e-07, "loss": 0.0336, "step": 8714 }, { "epoch": 0.96, "grad_norm": 0.2503272537283905, "learning_rate": 1.9312342804670425e-07, "loss": 0.0271, "step": 8715 }, { "epoch": 0.96, "grad_norm": 0.18382826497485616, "learning_rate": 1.9213843037018344e-07, "loss": 0.0265, "step": 8716 }, { "epoch": 0.96, "grad_norm": 0.28449225523535515, "learning_rate": 1.9115593891378294e-07, "loss": 0.0333, "step": 8717 }, { "epoch": 0.96, "grad_norm": 0.20514299184256218, "learning_rate": 1.9017595380181442e-07, "loss": 0.0435, "step": 8718 }, { "epoch": 0.96, "grad_norm": 0.25111864573052306, "learning_rate": 1.8919847515827205e-07, "loss": 0.0323, "step": 8719 }, { "epoch": 0.96, "grad_norm": 0.2204088595533447, "learning_rate": 1.8822350310683246e-07, "loss": 0.0259, "step": 8720 }, { "epoch": 0.96, "grad_norm": 0.19115970264152365, "learning_rate": 1.8725103777085696e-07, "loss": 0.0289, "step": 8721 }, { "epoch": 0.96, "grad_norm": 0.23806926021106145, "learning_rate": 1.862810792733849e-07, "loss": 0.0323, "step": 8722 }, { "epoch": 0.96, "grad_norm": 0.22445882644608597, "learning_rate": 1.8531362773714478e-07, "loss": 0.0295, "step": 8723 }, { "epoch": 0.96, "grad_norm": 0.20504282692992654, "learning_rate": 1.843486832845409e-07, "loss": 0.0294, "step": 8724 }, { "epoch": 0.96, "grad_norm": 0.2785837231990959, "learning_rate": 1.833862460376712e-07, "loss": 0.0377, "step": 8725 }, { "epoch": 0.96, "grad_norm": 0.18639706030206207, "learning_rate": 1.8242631611830263e-07, "loss": 0.0303, "step": 8726 }, { "epoch": 0.96, "grad_norm": 0.22244493596080567, "learning_rate": 1.8146889364789365e-07, "loss": 0.0459, "step": 8727 }, { "epoch": 0.96, "grad_norm": 0.16856025076621864, "learning_rate": 1.8051397874758736e-07, "loss": 0.0232, "step": 8728 }, { "epoch": 0.96, "grad_norm": 0.35854492519980763, "learning_rate": 1.7956157153820263e-07, "loss": 0.0242, "step": 8729 }, { "epoch": 0.96, "grad_norm": 0.18458326990572813, "learning_rate": 1.786116721402431e-07, "loss": 0.0257, "step": 8730 }, { "epoch": 0.96, "grad_norm": 0.21693626424832563, "learning_rate": 1.7766428067389929e-07, "loss": 0.0342, "step": 8731 }, { "epoch": 0.96, "grad_norm": 0.17757064488067797, "learning_rate": 1.7671939725903752e-07, "loss": 0.0256, "step": 8732 }, { "epoch": 0.96, "grad_norm": 0.23044289427729753, "learning_rate": 1.7577702201521552e-07, "loss": 0.0353, "step": 8733 }, { "epoch": 0.96, "grad_norm": 0.2281756847721565, "learning_rate": 1.7483715506166455e-07, "loss": 0.0339, "step": 8734 }, { "epoch": 0.96, "grad_norm": 0.18809631017998235, "learning_rate": 1.7389979651730503e-07, "loss": 0.0369, "step": 8735 }, { "epoch": 0.96, "grad_norm": 0.20987863923791927, "learning_rate": 1.729649465007377e-07, "loss": 0.0243, "step": 8736 }, { "epoch": 0.96, "grad_norm": 0.18856081290180998, "learning_rate": 1.7203260513024345e-07, "loss": 0.0245, "step": 8737 }, { "epoch": 0.96, "grad_norm": 0.23034486835618645, "learning_rate": 1.7110277252379238e-07, "loss": 0.0431, "step": 8738 }, { "epoch": 0.96, "grad_norm": 0.23875800887001444, "learning_rate": 1.7017544879902813e-07, "loss": 0.03, "step": 8739 }, { "epoch": 0.96, "grad_norm": 0.2574691509082116, "learning_rate": 1.692506340732858e-07, "loss": 0.0292, "step": 8740 }, { "epoch": 0.96, "grad_norm": 0.23162589291690217, "learning_rate": 1.6832832846357837e-07, "loss": 0.0198, "step": 8741 }, { "epoch": 0.96, "grad_norm": 0.1538498111780723, "learning_rate": 1.6740853208659923e-07, "loss": 0.0204, "step": 8742 }, { "epoch": 0.96, "grad_norm": 0.2979419508566156, "learning_rate": 1.664912450587286e-07, "loss": 0.0476, "step": 8743 }, { "epoch": 0.96, "grad_norm": 0.23228707932673953, "learning_rate": 1.655764674960292e-07, "loss": 0.0303, "step": 8744 }, { "epoch": 0.96, "grad_norm": 0.24256632003685585, "learning_rate": 1.6466419951424395e-07, "loss": 0.035, "step": 8745 }, { "epoch": 0.96, "grad_norm": 0.1886892156747234, "learning_rate": 1.6375444122879613e-07, "loss": 0.0255, "step": 8746 }, { "epoch": 0.96, "grad_norm": 0.24529041385238903, "learning_rate": 1.628471927547981e-07, "loss": 0.0278, "step": 8747 }, { "epoch": 0.96, "grad_norm": 0.18393948649255015, "learning_rate": 1.6194245420704025e-07, "loss": 0.0285, "step": 8748 }, { "epoch": 0.96, "grad_norm": 0.2326993488078889, "learning_rate": 1.6104022569999323e-07, "loss": 0.0245, "step": 8749 }, { "epoch": 0.96, "grad_norm": 0.19768047146660006, "learning_rate": 1.6014050734781461e-07, "loss": 0.0165, "step": 8750 }, { "epoch": 0.96, "grad_norm": 0.20214817950098649, "learning_rate": 1.5924329926434225e-07, "loss": 0.0248, "step": 8751 }, { "epoch": 0.96, "grad_norm": 0.20138647614235908, "learning_rate": 1.583486015630986e-07, "loss": 0.0396, "step": 8752 }, { "epoch": 0.96, "grad_norm": 0.17790708429706126, "learning_rate": 1.5745641435728432e-07, "loss": 0.0247, "step": 8753 }, { "epoch": 0.96, "grad_norm": 0.2147278212839516, "learning_rate": 1.565667377597868e-07, "loss": 0.0289, "step": 8754 }, { "epoch": 0.96, "grad_norm": 0.24563264032910329, "learning_rate": 1.5567957188317162e-07, "loss": 0.0499, "step": 8755 }, { "epoch": 0.96, "grad_norm": 0.1959162345553162, "learning_rate": 1.5479491683969117e-07, "loss": 0.019, "step": 8756 }, { "epoch": 0.96, "grad_norm": 0.15475789741661294, "learning_rate": 1.539127727412759e-07, "loss": 0.0124, "step": 8757 }, { "epoch": 0.96, "grad_norm": 0.2337968211531824, "learning_rate": 1.5303313969954103e-07, "loss": 0.0349, "step": 8758 }, { "epoch": 0.96, "grad_norm": 0.14595679325108346, "learning_rate": 1.5215601782578416e-07, "loss": 0.0241, "step": 8759 }, { "epoch": 0.96, "grad_norm": 0.24746499182459178, "learning_rate": 1.5128140723098317e-07, "loss": 0.025, "step": 8760 }, { "epoch": 0.96, "grad_norm": 0.19813106034212843, "learning_rate": 1.5040930802580066e-07, "loss": 0.023, "step": 8761 }, { "epoch": 0.96, "grad_norm": 0.2703189259244915, "learning_rate": 1.4953972032057952e-07, "loss": 0.0287, "step": 8762 }, { "epoch": 0.96, "grad_norm": 0.3282242011930816, "learning_rate": 1.48672644225345e-07, "loss": 0.0241, "step": 8763 }, { "epoch": 0.96, "grad_norm": 0.20593735667384344, "learning_rate": 1.4780807984980716e-07, "loss": 0.0389, "step": 8764 }, { "epoch": 0.96, "grad_norm": 0.21362045722481685, "learning_rate": 1.4694602730335626e-07, "loss": 0.0439, "step": 8765 }, { "epoch": 0.96, "grad_norm": 0.19380484427727424, "learning_rate": 1.4608648669506287e-07, "loss": 0.0278, "step": 8766 }, { "epoch": 0.96, "grad_norm": 0.20894393351341048, "learning_rate": 1.4522945813368216e-07, "loss": 0.0371, "step": 8767 }, { "epoch": 0.96, "grad_norm": 0.17357704159443765, "learning_rate": 1.443749417276541e-07, "loss": 0.032, "step": 8768 }, { "epoch": 0.96, "grad_norm": 0.21573464605082726, "learning_rate": 1.4352293758509218e-07, "loss": 0.0351, "step": 8769 }, { "epoch": 0.96, "grad_norm": 0.24357158721561883, "learning_rate": 1.4267344581380127e-07, "loss": 0.0377, "step": 8770 }, { "epoch": 0.96, "grad_norm": 0.2319143427840215, "learning_rate": 1.4182646652126652e-07, "loss": 0.0398, "step": 8771 }, { "epoch": 0.96, "grad_norm": 0.18050443453373913, "learning_rate": 1.4098199981464887e-07, "loss": 0.0317, "step": 8772 }, { "epoch": 0.96, "grad_norm": 0.3103363475106859, "learning_rate": 1.401400458007962e-07, "loss": 0.0466, "step": 8773 }, { "epoch": 0.96, "grad_norm": 0.18228047995075222, "learning_rate": 1.3930060458624106e-07, "loss": 0.0253, "step": 8774 }, { "epoch": 0.96, "grad_norm": 0.1929269347657742, "learning_rate": 1.3846367627719402e-07, "loss": 0.0208, "step": 8775 }, { "epoch": 0.96, "grad_norm": 0.20458900615365908, "learning_rate": 1.37629260979546e-07, "loss": 0.0284, "step": 8776 }, { "epoch": 0.96, "grad_norm": 0.23223062869427047, "learning_rate": 1.3679735879887468e-07, "loss": 0.0335, "step": 8777 }, { "epoch": 0.96, "grad_norm": 0.24302910119610532, "learning_rate": 1.3596796984044037e-07, "loss": 0.0248, "step": 8778 }, { "epoch": 0.96, "grad_norm": 0.21359427122039937, "learning_rate": 1.351410942091791e-07, "loss": 0.0328, "step": 8779 }, { "epoch": 0.96, "grad_norm": 0.18608433030996446, "learning_rate": 1.3431673200971386e-07, "loss": 0.0132, "step": 8780 }, { "epoch": 0.96, "grad_norm": 0.39687609935217755, "learning_rate": 1.3349488334634565e-07, "loss": 0.0582, "step": 8781 }, { "epoch": 0.96, "grad_norm": 0.23109737430797175, "learning_rate": 1.3267554832306463e-07, "loss": 0.0367, "step": 8782 }, { "epoch": 0.96, "grad_norm": 0.15203252816324558, "learning_rate": 1.3185872704353898e-07, "loss": 0.0232, "step": 8783 }, { "epoch": 0.96, "grad_norm": 0.23426329811928717, "learning_rate": 1.310444196111127e-07, "loss": 0.0533, "step": 8784 }, { "epoch": 0.96, "grad_norm": 0.20209973435030545, "learning_rate": 1.3023262612882116e-07, "loss": 0.0221, "step": 8785 }, { "epoch": 0.96, "grad_norm": 0.25584715894117294, "learning_rate": 1.2942334669937773e-07, "loss": 0.0403, "step": 8786 }, { "epoch": 0.96, "grad_norm": 0.2064357867249212, "learning_rate": 1.2861658142517608e-07, "loss": 0.0251, "step": 8787 }, { "epoch": 0.97, "grad_norm": 0.20337371088321607, "learning_rate": 1.2781233040829234e-07, "loss": 0.0181, "step": 8788 }, { "epoch": 0.97, "grad_norm": 0.27046586134859407, "learning_rate": 1.2701059375049174e-07, "loss": 0.034, "step": 8789 }, { "epoch": 0.97, "grad_norm": 0.21523235894728446, "learning_rate": 1.2621137155320872e-07, "loss": 0.0398, "step": 8790 }, { "epoch": 0.97, "grad_norm": 0.21681457309825938, "learning_rate": 1.2541466391756907e-07, "loss": 0.0187, "step": 8791 }, { "epoch": 0.97, "grad_norm": 0.18662048730996217, "learning_rate": 1.2462047094437657e-07, "loss": 0.0258, "step": 8792 }, { "epoch": 0.97, "grad_norm": 0.2708380986063448, "learning_rate": 1.2382879273411753e-07, "loss": 0.0291, "step": 8793 }, { "epoch": 0.97, "grad_norm": 0.19884157940922187, "learning_rate": 1.2303962938696068e-07, "loss": 0.0309, "step": 8794 }, { "epoch": 0.97, "grad_norm": 0.2374785928113264, "learning_rate": 1.222529810027573e-07, "loss": 0.0344, "step": 8795 }, { "epoch": 0.97, "grad_norm": 0.210061328751313, "learning_rate": 1.2146884768103883e-07, "loss": 0.0281, "step": 8796 }, { "epoch": 0.97, "grad_norm": 0.25380423036811794, "learning_rate": 1.2068722952101707e-07, "loss": 0.0342, "step": 8797 }, { "epoch": 0.97, "grad_norm": 0.17138275728782582, "learning_rate": 1.1990812662158846e-07, "loss": 0.0287, "step": 8798 }, { "epoch": 0.97, "grad_norm": 0.22685520403645804, "learning_rate": 1.191315390813319e-07, "loss": 0.0287, "step": 8799 }, { "epoch": 0.97, "grad_norm": 0.38724759095084776, "learning_rate": 1.1835746699850215e-07, "loss": 0.0375, "step": 8800 }, { "epoch": 0.97, "grad_norm": 0.20292787890688466, "learning_rate": 1.1758591047104306e-07, "loss": 0.0326, "step": 8801 }, { "epoch": 0.97, "grad_norm": 0.21147031953662404, "learning_rate": 1.1681686959657879e-07, "loss": 0.0213, "step": 8802 }, { "epoch": 0.97, "grad_norm": 0.24423717878322407, "learning_rate": 1.1605034447240925e-07, "loss": 0.0323, "step": 8803 }, { "epoch": 0.97, "grad_norm": 0.299142186302002, "learning_rate": 1.1528633519552357e-07, "loss": 0.038, "step": 8804 }, { "epoch": 0.97, "grad_norm": 0.17280086485063506, "learning_rate": 1.1452484186258439e-07, "loss": 0.0216, "step": 8805 }, { "epoch": 0.97, "grad_norm": 0.19992484453864742, "learning_rate": 1.1376586456994798e-07, "loss": 0.0322, "step": 8806 }, { "epoch": 0.97, "grad_norm": 0.1948669788215871, "learning_rate": 1.1300940341363752e-07, "loss": 0.0326, "step": 8807 }, { "epoch": 0.97, "grad_norm": 0.16161638005231468, "learning_rate": 1.1225545848937203e-07, "loss": 0.0311, "step": 8808 }, { "epoch": 0.97, "grad_norm": 0.2297560453314853, "learning_rate": 1.1150402989254183e-07, "loss": 0.0303, "step": 8809 }, { "epoch": 0.97, "grad_norm": 0.19112317893431216, "learning_rate": 1.1075511771822423e-07, "loss": 0.0374, "step": 8810 }, { "epoch": 0.97, "grad_norm": 0.23386695157101786, "learning_rate": 1.1000872206117452e-07, "loss": 0.0373, "step": 8811 }, { "epoch": 0.97, "grad_norm": 0.19713922737970394, "learning_rate": 1.0926484301583273e-07, "loss": 0.0394, "step": 8812 }, { "epoch": 0.97, "grad_norm": 0.18110095629371134, "learning_rate": 1.0852348067631913e-07, "loss": 0.0274, "step": 8813 }, { "epoch": 0.97, "grad_norm": 0.2928962617842024, "learning_rate": 1.0778463513643645e-07, "loss": 0.0668, "step": 8814 }, { "epoch": 0.97, "grad_norm": 0.2546459573670888, "learning_rate": 1.0704830648966769e-07, "loss": 0.0356, "step": 8815 }, { "epoch": 0.97, "grad_norm": 0.21231603513149047, "learning_rate": 1.0631449482917833e-07, "loss": 0.0299, "step": 8816 }, { "epoch": 0.97, "grad_norm": 0.1979718643732338, "learning_rate": 1.055832002478141e-07, "loss": 0.0209, "step": 8817 }, { "epoch": 0.97, "grad_norm": 0.21945028101543912, "learning_rate": 1.0485442283810321e-07, "loss": 0.0401, "step": 8818 }, { "epoch": 0.97, "grad_norm": 0.1639425656768194, "learning_rate": 1.0412816269225635e-07, "loss": 0.0138, "step": 8819 }, { "epoch": 0.97, "grad_norm": 0.2373730517840274, "learning_rate": 1.0340441990216443e-07, "loss": 0.0338, "step": 8820 }, { "epoch": 0.97, "grad_norm": 0.19370221079304215, "learning_rate": 1.026831945594009e-07, "loss": 0.0307, "step": 8821 }, { "epoch": 0.97, "grad_norm": 0.23440798686271105, "learning_rate": 1.019644867552172e-07, "loss": 0.0259, "step": 8822 }, { "epoch": 0.97, "grad_norm": 0.20228400729248017, "learning_rate": 1.0124829658055168e-07, "loss": 0.0276, "step": 8823 }, { "epoch": 0.97, "grad_norm": 0.18871557073288675, "learning_rate": 1.0053462412601855e-07, "loss": 0.0263, "step": 8824 }, { "epoch": 0.97, "grad_norm": 0.21704169360163283, "learning_rate": 9.982346948192112e-08, "loss": 0.0321, "step": 8825 }, { "epoch": 0.97, "grad_norm": 0.17353412325180773, "learning_rate": 9.911483273823408e-08, "loss": 0.0208, "step": 8826 }, { "epoch": 0.97, "grad_norm": 0.2056149495633992, "learning_rate": 9.840871398462126e-08, "loss": 0.0387, "step": 8827 }, { "epoch": 0.97, "grad_norm": 0.20205207038862702, "learning_rate": 9.770511331042454e-08, "loss": 0.042, "step": 8828 }, { "epoch": 0.97, "grad_norm": 0.24749035618999599, "learning_rate": 9.700403080467046e-08, "loss": 0.029, "step": 8829 }, { "epoch": 0.97, "grad_norm": 0.206954698985052, "learning_rate": 9.630546655606365e-08, "loss": 0.0182, "step": 8830 }, { "epoch": 0.97, "grad_norm": 0.2722647996668299, "learning_rate": 9.560942065298673e-08, "loss": 0.0422, "step": 8831 }, { "epoch": 0.97, "grad_norm": 0.2047944331944346, "learning_rate": 9.491589318351368e-08, "loss": 0.0364, "step": 8832 }, { "epoch": 0.97, "grad_norm": 0.19281538146600172, "learning_rate": 9.422488423539211e-08, "loss": 0.0195, "step": 8833 }, { "epoch": 0.97, "grad_norm": 0.16511098434459562, "learning_rate": 9.353639389605207e-08, "loss": 0.0229, "step": 8834 }, { "epoch": 0.97, "grad_norm": 0.20503108983846863, "learning_rate": 9.285042225260832e-08, "loss": 0.0335, "step": 8835 }, { "epoch": 0.97, "grad_norm": 0.18161791997947183, "learning_rate": 9.216696939184922e-08, "loss": 0.0232, "step": 8836 }, { "epoch": 0.97, "grad_norm": 0.39087788978901583, "learning_rate": 9.14860354002567e-08, "loss": 0.041, "step": 8837 }, { "epoch": 0.97, "grad_norm": 0.15480180404137683, "learning_rate": 9.080762036398184e-08, "loss": 0.0298, "step": 8838 }, { "epoch": 0.97, "grad_norm": 0.22438641137934368, "learning_rate": 9.013172436886263e-08, "loss": 0.0341, "step": 8839 }, { "epoch": 0.97, "grad_norm": 0.23703619599132775, "learning_rate": 8.94583475004196e-08, "loss": 0.043, "step": 8840 }, { "epoch": 0.97, "grad_norm": 0.18334841061135454, "learning_rate": 8.878748984385344e-08, "loss": 0.0392, "step": 8841 }, { "epoch": 0.97, "grad_norm": 0.23021026437188277, "learning_rate": 8.811915148404294e-08, "loss": 0.026, "step": 8842 }, { "epoch": 0.97, "grad_norm": 0.1827860067195035, "learning_rate": 8.745333250555377e-08, "loss": 0.0271, "step": 8843 }, { "epoch": 0.97, "grad_norm": 0.2202341532224017, "learning_rate": 8.679003299262523e-08, "loss": 0.0365, "step": 8844 }, { "epoch": 0.97, "grad_norm": 0.23085318039072167, "learning_rate": 8.612925302918796e-08, "loss": 0.0376, "step": 8845 }, { "epoch": 0.97, "grad_norm": 0.29691353347062704, "learning_rate": 8.547099269884396e-08, "loss": 0.0333, "step": 8846 }, { "epoch": 0.97, "grad_norm": 0.208201573350002, "learning_rate": 8.481525208487995e-08, "loss": 0.0268, "step": 8847 }, { "epoch": 0.97, "grad_norm": 0.2416686221706812, "learning_rate": 8.416203127026734e-08, "loss": 0.0364, "step": 8848 }, { "epoch": 0.97, "grad_norm": 0.15994587316132045, "learning_rate": 8.351133033765557e-08, "loss": 0.0166, "step": 8849 }, { "epoch": 0.97, "grad_norm": 0.1608993283622438, "learning_rate": 8.286314936937434e-08, "loss": 0.0375, "step": 8850 }, { "epoch": 0.97, "grad_norm": 0.19504197459049552, "learning_rate": 8.221748844743804e-08, "loss": 0.032, "step": 8851 }, { "epoch": 0.97, "grad_norm": 0.2631298286284889, "learning_rate": 8.157434765353466e-08, "loss": 0.0229, "step": 8852 }, { "epoch": 0.97, "grad_norm": 0.22562095238233013, "learning_rate": 8.093372706904579e-08, "loss": 0.0234, "step": 8853 }, { "epoch": 0.97, "grad_norm": 0.240873154336681, "learning_rate": 8.029562677502212e-08, "loss": 0.0336, "step": 8854 }, { "epoch": 0.97, "grad_norm": 0.1875751662586995, "learning_rate": 7.966004685219908e-08, "loss": 0.0302, "step": 8855 }, { "epoch": 0.97, "grad_norm": 0.255440492846282, "learning_rate": 7.902698738099901e-08, "loss": 0.0292, "step": 8856 }, { "epoch": 0.97, "grad_norm": 0.1964251392934732, "learning_rate": 7.839644844151784e-08, "loss": 0.0333, "step": 8857 }, { "epoch": 0.97, "grad_norm": 0.24668400987959496, "learning_rate": 7.776843011353619e-08, "loss": 0.0301, "step": 8858 }, { "epoch": 0.97, "grad_norm": 0.22160146400238329, "learning_rate": 7.714293247651494e-08, "loss": 0.0326, "step": 8859 }, { "epoch": 0.97, "grad_norm": 0.21657704041673662, "learning_rate": 7.651995560959525e-08, "loss": 0.0274, "step": 8860 }, { "epoch": 0.97, "grad_norm": 0.20973267678300314, "learning_rate": 7.589949959160292e-08, "loss": 0.0336, "step": 8861 }, { "epoch": 0.97, "grad_norm": 0.19163122384999404, "learning_rate": 7.528156450103963e-08, "loss": 0.0263, "step": 8862 }, { "epoch": 0.97, "grad_norm": 0.24958776222230894, "learning_rate": 7.46661504160895e-08, "loss": 0.0268, "step": 8863 }, { "epoch": 0.97, "grad_norm": 0.16567642677578373, "learning_rate": 7.405325741462354e-08, "loss": 0.0212, "step": 8864 }, { "epoch": 0.97, "grad_norm": 0.17346485089816543, "learning_rate": 7.344288557418422e-08, "loss": 0.0154, "step": 8865 }, { "epoch": 0.97, "grad_norm": 0.16755537799026465, "learning_rate": 7.283503497200083e-08, "loss": 0.0179, "step": 8866 }, { "epoch": 0.97, "grad_norm": 0.22721290132856814, "learning_rate": 7.222970568498299e-08, "loss": 0.0323, "step": 8867 }, { "epoch": 0.97, "grad_norm": 0.2073917399353256, "learning_rate": 7.162689778972276e-08, "loss": 0.0316, "step": 8868 }, { "epoch": 0.97, "grad_norm": 0.22171986421677445, "learning_rate": 7.102661136248801e-08, "loss": 0.0195, "step": 8869 }, { "epoch": 0.97, "grad_norm": 0.2027553349062341, "learning_rate": 7.042884647923353e-08, "loss": 0.0422, "step": 8870 }, { "epoch": 0.97, "grad_norm": 0.2404274281101992, "learning_rate": 6.983360321559218e-08, "loss": 0.0398, "step": 8871 }, { "epoch": 0.97, "grad_norm": 0.18298409062942378, "learning_rate": 6.924088164687703e-08, "loss": 0.024, "step": 8872 }, { "epoch": 0.97, "grad_norm": 0.2195366950394955, "learning_rate": 6.865068184808366e-08, "loss": 0.0329, "step": 8873 }, { "epoch": 0.97, "grad_norm": 0.23303050005089862, "learning_rate": 6.806300389388565e-08, "loss": 0.0502, "step": 8874 }, { "epoch": 0.97, "grad_norm": 0.23381320060720864, "learning_rate": 6.747784785864354e-08, "loss": 0.0161, "step": 8875 }, { "epoch": 0.97, "grad_norm": 0.21115008873706176, "learning_rate": 6.689521381639363e-08, "loss": 0.0324, "step": 8876 }, { "epoch": 0.97, "grad_norm": 0.2722526355638607, "learning_rate": 6.631510184085477e-08, "loss": 0.0275, "step": 8877 }, { "epoch": 0.97, "grad_norm": 0.2771019754854928, "learning_rate": 6.573751200542599e-08, "loss": 0.0277, "step": 8878 }, { "epoch": 0.98, "grad_norm": 0.17865040280213682, "learning_rate": 6.516244438318664e-08, "loss": 0.0205, "step": 8879 }, { "epoch": 0.98, "grad_norm": 0.1856223806042282, "learning_rate": 6.458989904690072e-08, "loss": 0.0196, "step": 8880 }, { "epoch": 0.98, "grad_norm": 0.2362359000973686, "learning_rate": 6.401987606901028e-08, "loss": 0.023, "step": 8881 }, { "epoch": 0.98, "grad_norm": 0.23464998961370817, "learning_rate": 6.345237552163541e-08, "loss": 0.0308, "step": 8882 }, { "epoch": 0.98, "grad_norm": 0.16618538199456276, "learning_rate": 6.288739747658312e-08, "loss": 0.0147, "step": 8883 }, { "epoch": 0.98, "grad_norm": 0.1973434521332318, "learning_rate": 6.232494200533623e-08, "loss": 0.0191, "step": 8884 }, { "epoch": 0.98, "grad_norm": 0.26002566866528776, "learning_rate": 6.176500917906003e-08, "loss": 0.0413, "step": 8885 }, { "epoch": 0.98, "grad_norm": 0.4893865663372262, "learning_rate": 6.120759906860008e-08, "loss": 0.0714, "step": 8886 }, { "epoch": 0.98, "grad_norm": 0.2434821365700065, "learning_rate": 6.065271174448884e-08, "loss": 0.0404, "step": 8887 }, { "epoch": 0.98, "grad_norm": 0.18391031001494496, "learning_rate": 6.010034727692792e-08, "loss": 0.0274, "step": 8888 }, { "epoch": 0.98, "grad_norm": 0.29160401687560084, "learning_rate": 5.955050573581034e-08, "loss": 0.0313, "step": 8889 }, { "epoch": 0.98, "grad_norm": 0.19577673100814572, "learning_rate": 5.900318719070264e-08, "loss": 0.0304, "step": 8890 }, { "epoch": 0.98, "grad_norm": 0.2033135565992095, "learning_rate": 5.84583917108561e-08, "loss": 0.0245, "step": 8891 }, { "epoch": 0.98, "grad_norm": 0.33709778130354817, "learning_rate": 5.791611936520447e-08, "loss": 0.0557, "step": 8892 }, { "epoch": 0.98, "grad_norm": 0.1923278207171422, "learning_rate": 5.737637022235509e-08, "loss": 0.024, "step": 8893 }, { "epoch": 0.98, "grad_norm": 0.21318792005859344, "learning_rate": 5.683914435060445e-08, "loss": 0.0357, "step": 8894 }, { "epoch": 0.98, "grad_norm": 0.19600562132831909, "learning_rate": 5.6304441817922606e-08, "loss": 0.011, "step": 8895 }, { "epoch": 0.98, "grad_norm": 0.17746759661222283, "learning_rate": 5.577226269196656e-08, "loss": 0.0241, "step": 8896 }, { "epoch": 0.98, "grad_norm": 0.18966043082755105, "learning_rate": 5.5242607040069116e-08, "loss": 0.0251, "step": 8897 }, { "epoch": 0.98, "grad_norm": 0.21884208682325815, "learning_rate": 5.471547492924778e-08, "loss": 0.0357, "step": 8898 }, { "epoch": 0.98, "grad_norm": 0.2660924220878005, "learning_rate": 5.4190866426195866e-08, "loss": 0.0348, "step": 8899 }, { "epoch": 0.98, "grad_norm": 0.2184540159642352, "learning_rate": 5.3668781597291386e-08, "loss": 0.0285, "step": 8900 }, { "epoch": 0.98, "grad_norm": 0.22007032841693855, "learning_rate": 5.314922050859484e-08, "loss": 0.0226, "step": 8901 }, { "epoch": 0.98, "grad_norm": 0.2103459044875899, "learning_rate": 5.263218322584029e-08, "loss": 0.0357, "step": 8902 }, { "epoch": 0.98, "grad_norm": 0.24810193419444404, "learning_rate": 5.211766981444877e-08, "loss": 0.0512, "step": 8903 }, { "epoch": 0.98, "grad_norm": 0.15084836706323024, "learning_rate": 5.160568033951929e-08, "loss": 0.0223, "step": 8904 }, { "epoch": 0.98, "grad_norm": 0.20629750087694815, "learning_rate": 5.109621486583116e-08, "loss": 0.0482, "step": 8905 }, { "epoch": 0.98, "grad_norm": 0.2717431159608309, "learning_rate": 5.058927345784836e-08, "loss": 0.0261, "step": 8906 }, { "epoch": 0.98, "grad_norm": 0.2657327433462176, "learning_rate": 5.0084856179708484e-08, "loss": 0.0362, "step": 8907 }, { "epoch": 0.98, "grad_norm": 0.23068559273184816, "learning_rate": 4.9582963095238247e-08, "loss": 0.0289, "step": 8908 }, { "epoch": 0.98, "grad_norm": 0.21004634273455655, "learning_rate": 4.908359426793574e-08, "loss": 0.0325, "step": 8909 }, { "epoch": 0.98, "grad_norm": 0.21006131101701378, "learning_rate": 4.8586749760985987e-08, "loss": 0.0345, "step": 8910 }, { "epoch": 0.98, "grad_norm": 0.25108688245401023, "learning_rate": 4.809242963725647e-08, "loss": 0.0419, "step": 8911 }, { "epoch": 0.98, "grad_norm": 0.2528863081682947, "learning_rate": 4.7600633959286044e-08, "loss": 0.0275, "step": 8912 }, { "epoch": 0.98, "grad_norm": 0.25668979156396365, "learning_rate": 4.7111362789304946e-08, "loss": 0.0323, "step": 8913 }, { "epoch": 0.98, "grad_norm": 0.7035446398236099, "learning_rate": 4.6624616189214765e-08, "loss": 0.0311, "step": 8914 }, { "epoch": 0.98, "grad_norm": 0.2318738197800452, "learning_rate": 4.614039422060623e-08, "loss": 0.0337, "step": 8915 }, { "epoch": 0.98, "grad_norm": 0.2532999870246074, "learning_rate": 4.565869694474367e-08, "loss": 0.0375, "step": 8916 }, { "epoch": 0.98, "grad_norm": 0.18877799878022666, "learning_rate": 4.51795244225739e-08, "loss": 0.0169, "step": 8917 }, { "epoch": 0.98, "grad_norm": 0.21006230413546365, "learning_rate": 4.470287671472395e-08, "loss": 0.0265, "step": 8918 }, { "epoch": 0.98, "grad_norm": 0.1977127126480708, "learning_rate": 4.4228753881507826e-08, "loss": 0.0266, "step": 8919 }, { "epoch": 0.98, "grad_norm": 0.230548489240827, "learning_rate": 4.375715598290864e-08, "loss": 0.0476, "step": 8920 }, { "epoch": 0.98, "grad_norm": 0.19867245251402507, "learning_rate": 4.3288083078600886e-08, "loss": 0.023, "step": 8921 }, { "epoch": 0.98, "grad_norm": 0.17243855915490924, "learning_rate": 4.2821535227930424e-08, "loss": 0.0278, "step": 8922 }, { "epoch": 0.98, "grad_norm": 0.2076809513363692, "learning_rate": 4.235751248993003e-08, "loss": 0.0342, "step": 8923 }, { "epoch": 0.98, "grad_norm": 0.1839580371252363, "learning_rate": 4.1896014923310525e-08, "loss": 0.0216, "step": 8924 }, { "epoch": 0.98, "grad_norm": 0.196581968334963, "learning_rate": 4.1437042586465194e-08, "loss": 0.0288, "step": 8925 }, { "epoch": 0.98, "grad_norm": 0.14836014438568257, "learning_rate": 4.098059553746536e-08, "loss": 0.0168, "step": 8926 }, { "epoch": 0.98, "grad_norm": 0.20459380407756547, "learning_rate": 4.0526673834060394e-08, "loss": 0.0233, "step": 8927 }, { "epoch": 0.98, "grad_norm": 0.2334731632781089, "learning_rate": 4.0075277533688784e-08, "loss": 0.0371, "step": 8928 }, { "epoch": 0.98, "grad_norm": 0.27081992806979993, "learning_rate": 3.9626406693460405e-08, "loss": 0.028, "step": 8929 }, { "epoch": 0.98, "grad_norm": 0.17549744053004038, "learning_rate": 3.918006137017205e-08, "loss": 0.0305, "step": 8930 }, { "epoch": 0.98, "grad_norm": 0.1689352241988022, "learning_rate": 3.87362416202941e-08, "loss": 0.0235, "step": 8931 }, { "epoch": 0.98, "grad_norm": 0.2101084523637817, "learning_rate": 3.829494749998608e-08, "loss": 0.014, "step": 8932 }, { "epoch": 0.98, "grad_norm": 0.18920444606137404, "learning_rate": 3.78561790650811e-08, "loss": 0.0325, "step": 8933 }, { "epoch": 0.98, "grad_norm": 0.1607385957114975, "learning_rate": 3.7419936371094756e-08, "loss": 0.0282, "step": 8934 }, { "epoch": 0.98, "grad_norm": 0.20258937420489143, "learning_rate": 3.6986219473225116e-08, "loss": 0.0444, "step": 8935 }, { "epoch": 0.98, "grad_norm": 0.18878558623927508, "learning_rate": 3.655502842634606e-08, "loss": 0.042, "step": 8936 }, { "epoch": 0.98, "grad_norm": 0.21752551211437238, "learning_rate": 3.6126363285018396e-08, "loss": 0.0411, "step": 8937 }, { "epoch": 0.98, "grad_norm": 0.21324645935911626, "learning_rate": 3.570022410347651e-08, "loss": 0.021, "step": 8938 }, { "epoch": 0.98, "grad_norm": 0.2698570024453317, "learning_rate": 3.52766109356395e-08, "loss": 0.026, "step": 8939 }, { "epoch": 0.98, "grad_norm": 0.15491729655500275, "learning_rate": 3.485552383510671e-08, "loss": 0.0215, "step": 8940 }, { "epoch": 0.98, "grad_norm": 0.17882249820801224, "learning_rate": 3.44369628551533e-08, "loss": 0.0298, "step": 8941 }, { "epoch": 0.98, "grad_norm": 0.22690129435099873, "learning_rate": 3.402092804874357e-08, "loss": 0.0233, "step": 8942 }, { "epoch": 0.98, "grad_norm": 0.18710486181625777, "learning_rate": 3.3607419468510985e-08, "loss": 0.0342, "step": 8943 }, { "epoch": 0.98, "grad_norm": 0.23569667411577044, "learning_rate": 3.3196437166780336e-08, "loss": 0.0411, "step": 8944 }, { "epoch": 0.98, "grad_norm": 0.21990746574559097, "learning_rate": 3.278798119555004e-08, "loss": 0.0282, "step": 8945 }, { "epoch": 0.98, "grad_norm": 0.2083865644081224, "learning_rate": 3.2382051606500986e-08, "loss": 0.022, "step": 8946 }, { "epoch": 0.98, "grad_norm": 0.17548957043037844, "learning_rate": 3.197864845099208e-08, "loss": 0.0295, "step": 8947 }, { "epoch": 0.98, "grad_norm": 0.22755816792841235, "learning_rate": 3.1577771780066936e-08, "loss": 0.0258, "step": 8948 }, { "epoch": 0.98, "grad_norm": 0.2109330494729064, "learning_rate": 3.117942164444498e-08, "loss": 0.0519, "step": 8949 }, { "epoch": 0.98, "grad_norm": 0.2344377354265016, "learning_rate": 3.078359809453257e-08, "loss": 0.0291, "step": 8950 }, { "epoch": 0.98, "grad_norm": 0.1685005297684336, "learning_rate": 3.03903011804052e-08, "loss": 0.0188, "step": 8951 }, { "epoch": 0.98, "grad_norm": 0.20011099924009323, "learning_rate": 2.999953095182972e-08, "loss": 0.0148, "step": 8952 }, { "epoch": 0.98, "grad_norm": 0.2352683525669655, "learning_rate": 2.9611287458248817e-08, "loss": 0.036, "step": 8953 }, { "epoch": 0.98, "grad_norm": 0.234208111155559, "learning_rate": 2.9225570748785402e-08, "loss": 0.039, "step": 8954 }, { "epoch": 0.98, "grad_norm": 0.16460965515460493, "learning_rate": 2.884238087224045e-08, "loss": 0.0306, "step": 8955 }, { "epoch": 0.98, "grad_norm": 0.19101480433125886, "learning_rate": 2.8461717877099615e-08, "loss": 0.0108, "step": 8956 }, { "epoch": 0.98, "grad_norm": 0.18602885838859567, "learning_rate": 2.8083581811526594e-08, "loss": 0.0178, "step": 8957 }, { "epoch": 0.98, "grad_norm": 0.1981284280233722, "learning_rate": 2.770797272336756e-08, "loss": 0.04, "step": 8958 }, { "epoch": 0.98, "grad_norm": 0.20061913264836562, "learning_rate": 2.73348906601445e-08, "loss": 0.0397, "step": 8959 }, { "epoch": 0.98, "grad_norm": 0.22872502766508018, "learning_rate": 2.696433566905965e-08, "loss": 0.0354, "step": 8960 }, { "epoch": 0.98, "grad_norm": 0.18010970974392435, "learning_rate": 2.6596307797004396e-08, "loss": 0.0331, "step": 8961 }, { "epoch": 0.98, "grad_norm": 0.27038988923967716, "learning_rate": 2.623080709054149e-08, "loss": 0.0369, "step": 8962 }, { "epoch": 0.98, "grad_norm": 0.26163690625313313, "learning_rate": 2.5867833595913937e-08, "loss": 0.038, "step": 8963 }, { "epoch": 0.98, "grad_norm": 0.19029744765225817, "learning_rate": 2.550738735905167e-08, "loss": 0.0295, "step": 8964 }, { "epoch": 0.98, "grad_norm": 0.20746219516115086, "learning_rate": 2.514946842555821e-08, "loss": 0.0324, "step": 8965 }, { "epoch": 0.98, "grad_norm": 0.280971455394375, "learning_rate": 2.479407684071733e-08, "loss": 0.0333, "step": 8966 }, { "epoch": 0.98, "grad_norm": 0.20197736716358233, "learning_rate": 2.4441212649501944e-08, "loss": 0.0271, "step": 8967 }, { "epoch": 0.98, "grad_norm": 0.16926910815215676, "learning_rate": 2.4090875896551903e-08, "loss": 0.0176, "step": 8968 }, { "epoch": 0.98, "grad_norm": 0.24804209907319322, "learning_rate": 2.374306662619841e-08, "loss": 0.0491, "step": 8969 }, { "epoch": 0.99, "grad_norm": 0.21520106339954145, "learning_rate": 2.3397784882448483e-08, "loss": 0.0247, "step": 8970 }, { "epoch": 0.99, "grad_norm": 0.19443187550006327, "learning_rate": 2.305503070898718e-08, "loss": 0.0329, "step": 8971 }, { "epoch": 0.99, "grad_norm": 0.196732158537213, "learning_rate": 2.2714804149184256e-08, "loss": 0.0285, "step": 8972 }, { "epoch": 0.99, "grad_norm": 0.2632305832283681, "learning_rate": 2.2377105246085272e-08, "loss": 0.0266, "step": 8973 }, { "epoch": 0.99, "grad_norm": 0.1710043224923331, "learning_rate": 2.2041934042420497e-08, "loss": 0.0201, "step": 8974 }, { "epoch": 0.99, "grad_norm": 0.2837634101925336, "learning_rate": 2.170929058059379e-08, "loss": 0.038, "step": 8975 }, { "epoch": 0.99, "grad_norm": 0.17693066443913338, "learning_rate": 2.137917490269814e-08, "loss": 0.0274, "step": 8976 }, { "epoch": 0.99, "grad_norm": 0.2245182515936549, "learning_rate": 2.1051587050500144e-08, "loss": 0.0353, "step": 8977 }, { "epoch": 0.99, "grad_norm": 0.2584886354552909, "learning_rate": 2.0726527065448865e-08, "loss": 0.0351, "step": 8978 }, { "epoch": 0.99, "grad_norm": 0.1553510256118456, "learning_rate": 2.0403994988671404e-08, "loss": 0.0269, "step": 8979 }, { "epoch": 0.99, "grad_norm": 0.20004508732172224, "learning_rate": 2.0083990860977343e-08, "loss": 0.0353, "step": 8980 }, { "epoch": 0.99, "grad_norm": 0.22401136317552417, "learning_rate": 1.9766514722856512e-08, "loss": 0.0457, "step": 8981 }, { "epoch": 0.99, "grad_norm": 0.20792340639106136, "learning_rate": 1.9451566614479e-08, "loss": 0.0359, "step": 8982 }, { "epoch": 0.99, "grad_norm": 0.20801362401159895, "learning_rate": 1.9139146575690714e-08, "loss": 0.0284, "step": 8983 }, { "epoch": 0.99, "grad_norm": 0.17488369142040103, "learning_rate": 1.8829254646022256e-08, "loss": 0.03, "step": 8984 }, { "epoch": 0.99, "grad_norm": 0.18400830277273272, "learning_rate": 1.85218908646867e-08, "loss": 0.0233, "step": 8985 }, { "epoch": 0.99, "grad_norm": 0.2610193415459683, "learning_rate": 1.8217055270568497e-08, "loss": 0.0293, "step": 8986 }, { "epoch": 0.99, "grad_norm": 0.24891932686017573, "learning_rate": 1.7914747902241235e-08, "loss": 0.0313, "step": 8987 }, { "epoch": 0.99, "grad_norm": 0.21294748789274465, "learning_rate": 1.7614968797952102e-08, "loss": 0.0252, "step": 8988 }, { "epoch": 0.99, "grad_norm": 0.1993302799762338, "learning_rate": 1.7317717995632975e-08, "loss": 0.0352, "step": 8989 }, { "epoch": 0.99, "grad_norm": 0.19925219124255597, "learning_rate": 1.702299553289377e-08, "loss": 0.0404, "step": 8990 }, { "epoch": 0.99, "grad_norm": 0.23484811714055948, "learning_rate": 1.6730801447024658e-08, "loss": 0.035, "step": 8991 }, { "epoch": 0.99, "grad_norm": 0.23870087242973842, "learning_rate": 1.6441135774996066e-08, "loss": 0.0402, "step": 8992 }, { "epoch": 0.99, "grad_norm": 0.2903157978671264, "learning_rate": 1.6153998553456453e-08, "loss": 0.0285, "step": 8993 }, { "epoch": 0.99, "grad_norm": 0.2329017029833701, "learning_rate": 1.586938981873898e-08, "loss": 0.0358, "step": 8994 }, { "epoch": 0.99, "grad_norm": 0.19562284608575325, "learning_rate": 1.5587309606852618e-08, "loss": 0.0196, "step": 8995 }, { "epoch": 0.99, "grad_norm": 0.14443401983996534, "learning_rate": 1.530775795348882e-08, "loss": 0.0158, "step": 8996 }, { "epoch": 0.99, "grad_norm": 0.24249420434377358, "learning_rate": 1.5030734894019293e-08, "loss": 0.0318, "step": 8997 }, { "epoch": 0.99, "grad_norm": 0.18572297021909712, "learning_rate": 1.4756240463491555e-08, "loss": 0.0225, "step": 8998 }, { "epoch": 0.99, "grad_norm": 0.21422832080877244, "learning_rate": 1.4484274696637823e-08, "loss": 0.0382, "step": 8999 }, { "epoch": 0.99, "grad_norm": 0.2144251125919366, "learning_rate": 1.421483762787057e-08, "loss": 0.0309, "step": 9000 }, { "epoch": 0.99, "grad_norm": 0.3272001766456183, "learning_rate": 1.3947929291280305e-08, "loss": 0.0538, "step": 9001 }, { "epoch": 0.99, "grad_norm": 0.20169835502415198, "learning_rate": 1.368354972063557e-08, "loss": 0.026, "step": 9002 }, { "epoch": 0.99, "grad_norm": 0.1968762643712936, "learning_rate": 1.3421698949389605e-08, "loss": 0.0355, "step": 9003 }, { "epoch": 0.99, "grad_norm": 0.17513244419490553, "learning_rate": 1.3162377010673689e-08, "loss": 0.0303, "step": 9004 }, { "epoch": 0.99, "grad_norm": 0.21097209434813735, "learning_rate": 1.2905583937299349e-08, "loss": 0.0308, "step": 9005 }, { "epoch": 0.99, "grad_norm": 0.28172995093871017, "learning_rate": 1.2651319761753933e-08, "loss": 0.066, "step": 9006 }, { "epoch": 0.99, "grad_norm": 0.18943717732114126, "learning_rate": 1.2399584516213925e-08, "loss": 0.0341, "step": 9007 }, { "epoch": 0.99, "grad_norm": 0.37573119244489, "learning_rate": 1.2150378232527183e-08, "loss": 0.0477, "step": 9008 }, { "epoch": 0.99, "grad_norm": 0.20770890514814155, "learning_rate": 1.1903700942224039e-08, "loss": 0.0384, "step": 9009 }, { "epoch": 0.99, "grad_norm": 0.22860660535083552, "learning_rate": 1.1659552676519525e-08, "loss": 0.0218, "step": 9010 }, { "epoch": 0.99, "grad_norm": 0.15668467023711313, "learning_rate": 1.1417933466302267e-08, "loss": 0.0221, "step": 9011 }, { "epoch": 0.99, "grad_norm": 0.2610500675370838, "learning_rate": 1.1178843342143363e-08, "loss": 0.037, "step": 9012 }, { "epoch": 0.99, "grad_norm": 0.1800054970537677, "learning_rate": 1.0942282334294174e-08, "loss": 0.0258, "step": 9013 }, { "epoch": 0.99, "grad_norm": 0.16902715982596964, "learning_rate": 1.070825047268631e-08, "loss": 0.0221, "step": 9014 }, { "epoch": 0.99, "grad_norm": 0.21877754412671224, "learning_rate": 1.047674778693164e-08, "loss": 0.032, "step": 9015 }, { "epoch": 0.99, "grad_norm": 0.19546011115117934, "learning_rate": 1.024777430632229e-08, "loss": 0.0313, "step": 9016 }, { "epoch": 0.99, "grad_norm": 0.24489220708733828, "learning_rate": 1.0021330059826195e-08, "loss": 0.0347, "step": 9017 }, { "epoch": 0.99, "grad_norm": 0.23904698483602776, "learning_rate": 9.797415076095996e-09, "loss": 0.0338, "step": 9018 }, { "epoch": 0.99, "grad_norm": 0.2555471509968717, "learning_rate": 9.57602938346458e-09, "loss": 0.037, "step": 9019 }, { "epoch": 0.99, "grad_norm": 0.27180622425759465, "learning_rate": 9.357173009942878e-09, "loss": 0.0317, "step": 9020 }, { "epoch": 0.99, "grad_norm": 0.18345631873318244, "learning_rate": 9.140845983219848e-09, "loss": 0.0329, "step": 9021 }, { "epoch": 0.99, "grad_norm": 0.16651174800898588, "learning_rate": 8.927048330666932e-09, "loss": 0.0183, "step": 9022 }, { "epoch": 0.99, "grad_norm": 0.15833267747174332, "learning_rate": 8.715780079335823e-09, "loss": 0.0309, "step": 9023 }, { "epoch": 0.99, "grad_norm": 0.20065714900883727, "learning_rate": 8.50704125595847e-09, "loss": 0.0368, "step": 9024 }, { "epoch": 0.99, "grad_norm": 0.1898271565190854, "learning_rate": 8.30083188694486e-09, "loss": 0.0293, "step": 9025 }, { "epoch": 0.99, "grad_norm": 0.2175329616935779, "learning_rate": 8.097151998387453e-09, "loss": 0.0248, "step": 9026 }, { "epoch": 0.99, "grad_norm": 0.22140314270940384, "learning_rate": 7.896001616054527e-09, "loss": 0.0264, "step": 9027 }, { "epoch": 0.99, "grad_norm": 0.19572558588207523, "learning_rate": 7.697380765399053e-09, "loss": 0.0241, "step": 9028 }, { "epoch": 0.99, "grad_norm": 0.22397959778251336, "learning_rate": 7.501289471549821e-09, "loss": 0.0345, "step": 9029 }, { "epoch": 0.99, "grad_norm": 0.21757753396082377, "learning_rate": 7.3077277593203155e-09, "loss": 0.0431, "step": 9030 }, { "epoch": 0.99, "grad_norm": 0.27201894917468206, "learning_rate": 7.116695653199834e-09, "loss": 0.0414, "step": 9031 }, { "epoch": 0.99, "grad_norm": 0.18455419520493435, "learning_rate": 6.928193177360154e-09, "loss": 0.0278, "step": 9032 }, { "epoch": 0.99, "grad_norm": 0.1934247532528776, "learning_rate": 6.742220355648865e-09, "loss": 0.0245, "step": 9033 }, { "epoch": 0.99, "grad_norm": 0.2428767640387947, "learning_rate": 6.558777211598255e-09, "loss": 0.0342, "step": 9034 }, { "epoch": 0.99, "grad_norm": 0.2293864473272074, "learning_rate": 6.377863768418646e-09, "loss": 0.0412, "step": 9035 }, { "epoch": 0.99, "grad_norm": 0.24727667230934006, "learning_rate": 6.199480049000617e-09, "loss": 0.0312, "step": 9036 }, { "epoch": 0.99, "grad_norm": 0.2483851086657334, "learning_rate": 6.023626075915001e-09, "loss": 0.0309, "step": 9037 }, { "epoch": 0.99, "grad_norm": 0.2175901693849103, "learning_rate": 5.850301871410668e-09, "loss": 0.0413, "step": 9038 }, { "epoch": 0.99, "grad_norm": 0.2690359900481581, "learning_rate": 5.679507457418964e-09, "loss": 0.0363, "step": 9039 }, { "epoch": 0.99, "grad_norm": 0.16839307593772218, "learning_rate": 5.51124285554927e-09, "loss": 0.0314, "step": 9040 }, { "epoch": 0.99, "grad_norm": 0.17465031056518474, "learning_rate": 5.345508087093443e-09, "loss": 0.0305, "step": 9041 }, { "epoch": 0.99, "grad_norm": 0.22130723998113816, "learning_rate": 5.182303173016934e-09, "loss": 0.0392, "step": 9042 }, { "epoch": 0.99, "grad_norm": 0.20169247236703886, "learning_rate": 5.021628133972112e-09, "loss": 0.0154, "step": 9043 }, { "epoch": 0.99, "grad_norm": 0.18799913053045197, "learning_rate": 4.8634829902893806e-09, "loss": 0.033, "step": 9044 }, { "epoch": 0.99, "grad_norm": 0.2050763448002161, "learning_rate": 4.707867761977181e-09, "loss": 0.0337, "step": 9045 }, { "epoch": 0.99, "grad_norm": 0.21454403518954293, "learning_rate": 4.554782468726426e-09, "loss": 0.0366, "step": 9046 }, { "epoch": 0.99, "grad_norm": 0.22219991768636732, "learning_rate": 4.4042271299038486e-09, "loss": 0.0372, "step": 9047 }, { "epoch": 0.99, "grad_norm": 0.18634081183533258, "learning_rate": 4.256201764560874e-09, "loss": 0.0279, "step": 9048 }, { "epoch": 0.99, "grad_norm": 0.27014418030622855, "learning_rate": 4.110706391424745e-09, "loss": 0.0298, "step": 9049 }, { "epoch": 0.99, "grad_norm": 0.27799029945954745, "learning_rate": 3.967741028907401e-09, "loss": 0.0396, "step": 9050 }, { "epoch": 0.99, "grad_norm": 0.20356836898525382, "learning_rate": 3.827305695094375e-09, "loss": 0.0312, "step": 9051 }, { "epoch": 0.99, "grad_norm": 0.20310897030303288, "learning_rate": 3.6894004077558942e-09, "loss": 0.0309, "step": 9052 }, { "epoch": 0.99, "grad_norm": 0.19356217971297343, "learning_rate": 3.5540251843424468e-09, "loss": 0.0389, "step": 9053 }, { "epoch": 0.99, "grad_norm": 0.21035311245856395, "learning_rate": 3.421180041980332e-09, "loss": 0.0244, "step": 9054 }, { "epoch": 0.99, "grad_norm": 0.22289592826195415, "learning_rate": 3.290864997478327e-09, "loss": 0.0368, "step": 9055 }, { "epoch": 0.99, "grad_norm": 0.16281274653652217, "learning_rate": 3.1630800673254636e-09, "loss": 0.0247, "step": 9056 }, { "epoch": 0.99, "grad_norm": 0.2661407596505153, "learning_rate": 3.0378252676910302e-09, "loss": 0.0268, "step": 9057 }, { "epoch": 0.99, "grad_norm": 0.3523807645754501, "learning_rate": 2.9151006144201298e-09, "loss": 0.0479, "step": 9058 }, { "epoch": 0.99, "grad_norm": 0.34567161459555834, "learning_rate": 2.794906123042562e-09, "loss": 0.0401, "step": 9059 }, { "epoch": 0.99, "grad_norm": 0.1806135880353218, "learning_rate": 2.6772418087639417e-09, "loss": 0.024, "step": 9060 }, { "epoch": 1.0, "grad_norm": 0.19636057978983132, "learning_rate": 2.5621076864745796e-09, "loss": 0.0112, "step": 9061 }, { "epoch": 1.0, "grad_norm": 0.17979448699359868, "learning_rate": 2.4495037707428226e-09, "loss": 0.0271, "step": 9062 }, { "epoch": 1.0, "grad_norm": 0.25195775286542077, "learning_rate": 2.339430075812832e-09, "loss": 0.0334, "step": 9063 }, { "epoch": 1.0, "grad_norm": 0.2423025519365941, "learning_rate": 2.2318866156134654e-09, "loss": 0.026, "step": 9064 }, { "epoch": 1.0, "grad_norm": 0.2530028484093671, "learning_rate": 2.126873403751617e-09, "loss": 0.0564, "step": 9065 }, { "epoch": 1.0, "grad_norm": 0.24952642468377387, "learning_rate": 2.0243904535144353e-09, "loss": 0.0349, "step": 9066 }, { "epoch": 1.0, "grad_norm": 0.22973490115623516, "learning_rate": 1.924437777869326e-09, "loss": 0.0322, "step": 9067 }, { "epoch": 1.0, "grad_norm": 0.17827245983148415, "learning_rate": 1.8270153894617282e-09, "loss": 0.0273, "step": 9068 }, { "epoch": 1.0, "grad_norm": 0.19810679467912204, "learning_rate": 1.7321233006173388e-09, "loss": 0.0231, "step": 9069 }, { "epoch": 1.0, "grad_norm": 0.24555635055916267, "learning_rate": 1.6397615233465503e-09, "loss": 0.0375, "step": 9070 }, { "epoch": 1.0, "grad_norm": 0.2149386716440784, "learning_rate": 1.5499300693311293e-09, "loss": 0.0269, "step": 9071 }, { "epoch": 1.0, "grad_norm": 0.18254295921288924, "learning_rate": 1.4626289499397596e-09, "loss": 0.0275, "step": 9072 }, { "epoch": 1.0, "grad_norm": 0.17111133937911638, "learning_rate": 1.3778581762169397e-09, "loss": 0.0242, "step": 9073 }, { "epoch": 1.0, "grad_norm": 0.2517839647534739, "learning_rate": 1.2956177588896445e-09, "loss": 0.0399, "step": 9074 }, { "epoch": 1.0, "grad_norm": 0.17626863305029444, "learning_rate": 1.215907708360664e-09, "loss": 0.021, "step": 9075 }, { "epoch": 1.0, "grad_norm": 0.2206262268864394, "learning_rate": 1.138728034719705e-09, "loss": 0.0423, "step": 9076 }, { "epoch": 1.0, "grad_norm": 0.2505197784073812, "learning_rate": 1.0640787477300685e-09, "loss": 0.0334, "step": 9077 }, { "epoch": 1.0, "grad_norm": 0.24630506465030078, "learning_rate": 9.919598568353118e-10, "loss": 0.0364, "step": 9078 }, { "epoch": 1.0, "grad_norm": 0.19157908868761192, "learning_rate": 9.223713711636883e-10, "loss": 0.0273, "step": 9079 }, { "epoch": 1.0, "grad_norm": 0.3398292911128397, "learning_rate": 8.553132995170466e-10, "loss": 0.0352, "step": 9080 }, { "epoch": 1.0, "grad_norm": 0.41283786259721683, "learning_rate": 7.907856503797106e-10, "loss": 0.0417, "step": 9081 }, { "epoch": 1.0, "grad_norm": 0.19454371783888974, "learning_rate": 7.287884319184813e-10, "loss": 0.0294, "step": 9082 }, { "epoch": 1.0, "grad_norm": 0.22095453239841353, "learning_rate": 6.693216519781942e-10, "loss": 0.0402, "step": 9083 }, { "epoch": 1.0, "grad_norm": 0.24136782189091516, "learning_rate": 6.123853180795003e-10, "loss": 0.0391, "step": 9084 }, { "epoch": 1.0, "grad_norm": 0.18302363283545298, "learning_rate": 5.579794374299674e-10, "loss": 0.0127, "step": 9085 }, { "epoch": 1.0, "grad_norm": 0.19306094326380838, "learning_rate": 5.061040169107578e-10, "loss": 0.0238, "step": 9086 }, { "epoch": 1.0, "grad_norm": 0.308372104093069, "learning_rate": 4.567590630855101e-10, "loss": 0.053, "step": 9087 }, { "epoch": 1.0, "grad_norm": 0.20385748583398602, "learning_rate": 4.0994458220033896e-10, "loss": 0.0195, "step": 9088 }, { "epoch": 1.0, "grad_norm": 0.17134609990906197, "learning_rate": 3.65660580177174e-10, "loss": 0.0251, "step": 9089 }, { "epoch": 1.0, "grad_norm": 0.2556640520088537, "learning_rate": 3.2390706261598015e-10, "loss": 0.0499, "step": 9090 }, { "epoch": 1.0, "grad_norm": 0.1805423532808625, "learning_rate": 2.846840348058599e-10, "loss": 0.0358, "step": 9091 }, { "epoch": 1.0, "grad_norm": 0.20602725962832472, "learning_rate": 2.479915017028489e-10, "loss": 0.033, "step": 9092 }, { "epoch": 1.0, "grad_norm": 0.16231528384142585, "learning_rate": 2.1382946795434067e-10, "loss": 0.0201, "step": 9093 }, { "epoch": 1.0, "grad_norm": 0.1820822341476219, "learning_rate": 1.8219793788132322e-10, "loss": 0.0226, "step": 9094 }, { "epoch": 1.0, "grad_norm": 0.21737306509588128, "learning_rate": 1.530969154872608e-10, "loss": 0.0281, "step": 9095 }, { "epoch": 1.0, "grad_norm": 0.18921095324076428, "learning_rate": 1.265264044514325e-10, "loss": 0.0298, "step": 9096 }, { "epoch": 1.0, "grad_norm": 0.23190776480357483, "learning_rate": 1.0248640813781407e-10, "loss": 0.0323, "step": 9097 }, { "epoch": 1.0, "grad_norm": 0.17537146593218583, "learning_rate": 8.097692958619619e-11, "loss": 0.027, "step": 9098 }, { "epoch": 1.0, "grad_norm": 0.19028406688718302, "learning_rate": 6.199797151884568e-11, "loss": 0.0175, "step": 9099 }, { "epoch": 1.0, "grad_norm": 0.26877648644450103, "learning_rate": 4.5549536340505627e-11, "loss": 0.0268, "step": 9100 }, { "epoch": 1.0, "grad_norm": 0.18589950982842812, "learning_rate": 3.163162612729309e-11, "loss": 0.0249, "step": 9101 }, { "epoch": 1.0, "grad_norm": 0.1556310015501666, "learning_rate": 2.024424264224223e-11, "loss": 0.025, "step": 9102 }, { "epoch": 1.0, "grad_norm": 0.20509211174634498, "learning_rate": 1.1387387326422528e-11, "loss": 0.0379, "step": 9103 }, { "epoch": 1.0, "grad_norm": 0.1919688015546051, "learning_rate": 5.061061301159242e-12, "loss": 0.0141, "step": 9104 }, { "epoch": 1.0, "grad_norm": 0.23409397443303054, "learning_rate": 1.265265365812951e-12, "loss": 0.0448, "step": 9105 }, { "epoch": 1.0, "grad_norm": 0.1942362122116247, "learning_rate": 0.0, "loss": 0.0345, "step": 9106 }, { "epoch": 1.0, "step": 9106, "total_flos": 8.962615037143621e+21, "train_loss": 0.14248651401592877, "train_runtime": 38094.5224, "train_samples_per_second": 7.649, "train_steps_per_second": 0.239 } ], "logging_steps": 1.0, "max_steps": 9106, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "total_flos": 8.962615037143621e+21, "train_batch_size": 4, "trial_name": null, "trial_params": null }