{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4118, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00024283632831471587, "grad_norm": 2.3099043369293213, "learning_rate": 0.0001, "loss": 2.074, "step": 1 }, { "epoch": 0.00048567265662943174, "grad_norm": 3.8508684635162354, "learning_rate": 0.0001, "loss": 2.0514, "step": 2 }, { "epoch": 0.0007285089849441476, "grad_norm": 1.006288766860962, "learning_rate": 0.0001, "loss": 1.9108, "step": 3 }, { "epoch": 0.0009713453132588635, "grad_norm": 1.1353191137313843, "learning_rate": 0.0001, "loss": 1.9215, "step": 4 }, { "epoch": 0.0012141816415735794, "grad_norm": 0.7446403503417969, "learning_rate": 0.0001, "loss": 1.7946, "step": 5 }, { "epoch": 0.0014570179698882952, "grad_norm": 0.8533063530921936, "learning_rate": 0.0001, "loss": 1.8683, "step": 6 }, { "epoch": 0.001699854298203011, "grad_norm": 0.8186442852020264, "learning_rate": 0.0001, "loss": 2.0013, "step": 7 }, { "epoch": 0.001942690626517727, "grad_norm": 0.744153618812561, "learning_rate": 0.0001, "loss": 2.0731, "step": 8 }, { "epoch": 0.002185526954832443, "grad_norm": 0.7234266400337219, "learning_rate": 0.0001, "loss": 1.8693, "step": 9 }, { "epoch": 0.0024283632831471587, "grad_norm": 0.6968668699264526, "learning_rate": 0.0001, "loss": 1.8265, "step": 10 }, { "epoch": 0.0026711996114618746, "grad_norm": 0.6370717287063599, "learning_rate": 0.0001, "loss": 1.6716, "step": 11 }, { "epoch": 0.0029140359397765905, "grad_norm": 0.6973312497138977, "learning_rate": 0.0001, "loss": 1.9457, "step": 12 }, { "epoch": 0.0031568722680913063, "grad_norm": 0.658778965473175, "learning_rate": 0.0001, "loss": 1.8521, "step": 13 }, { "epoch": 0.003399708596406022, "grad_norm": 0.6421470642089844, "learning_rate": 0.0001, "loss": 1.818, "step": 14 }, { "epoch": 0.003642544924720738, "grad_norm": 0.6314427852630615, "learning_rate": 0.0001, "loss": 1.9109, "step": 15 }, { "epoch": 0.003885381253035454, "grad_norm": 0.6784456372261047, "learning_rate": 0.0001, "loss": 1.9048, "step": 16 }, { "epoch": 0.00412821758135017, "grad_norm": 0.6112809777259827, "learning_rate": 0.0001, "loss": 1.7607, "step": 17 }, { "epoch": 0.004371053909664886, "grad_norm": 0.639802873134613, "learning_rate": 0.0001, "loss": 1.8273, "step": 18 }, { "epoch": 0.004613890237979602, "grad_norm": 0.618982195854187, "learning_rate": 0.0001, "loss": 1.8666, "step": 19 }, { "epoch": 0.0048567265662943174, "grad_norm": 0.667643666267395, "learning_rate": 0.0001, "loss": 1.8595, "step": 20 }, { "epoch": 0.005099562894609034, "grad_norm": 0.6735926270484924, "learning_rate": 0.0001, "loss": 2.0393, "step": 21 }, { "epoch": 0.005342399222923749, "grad_norm": 0.6211016774177551, "learning_rate": 0.0001, "loss": 1.9345, "step": 22 }, { "epoch": 0.0055852355512384655, "grad_norm": 0.6592899560928345, "learning_rate": 0.0001, "loss": 1.887, "step": 23 }, { "epoch": 0.005828071879553181, "grad_norm": 0.6509709358215332, "learning_rate": 0.0001, "loss": 1.6733, "step": 24 }, { "epoch": 0.006070908207867897, "grad_norm": 0.6006460785865784, "learning_rate": 0.0001, "loss": 1.7134, "step": 25 }, { "epoch": 0.006313744536182613, "grad_norm": 0.6942538022994995, "learning_rate": 0.0001, "loss": 1.875, "step": 26 }, { "epoch": 0.006556580864497329, "grad_norm": 0.620377779006958, "learning_rate": 0.0001, "loss": 1.8469, "step": 27 }, { "epoch": 0.006799417192812044, "grad_norm": 0.6397677659988403, "learning_rate": 0.0001, "loss": 1.9733, "step": 28 }, { "epoch": 0.007042253521126761, "grad_norm": 0.592271089553833, "learning_rate": 0.0001, "loss": 1.8286, "step": 29 }, { "epoch": 0.007285089849441476, "grad_norm": 0.6396237015724182, "learning_rate": 0.0001, "loss": 1.9415, "step": 30 }, { "epoch": 0.0075279261777561925, "grad_norm": 0.6379568576812744, "learning_rate": 0.0001, "loss": 1.851, "step": 31 }, { "epoch": 0.007770762506070908, "grad_norm": 0.5826771855354309, "learning_rate": 0.0001, "loss": 1.6987, "step": 32 }, { "epoch": 0.008013598834385623, "grad_norm": 0.5559223890304565, "learning_rate": 0.0001, "loss": 1.6602, "step": 33 }, { "epoch": 0.00825643516270034, "grad_norm": 0.6177660226821899, "learning_rate": 0.0001, "loss": 1.957, "step": 34 }, { "epoch": 0.008499271491015056, "grad_norm": 0.6030952334403992, "learning_rate": 0.0001, "loss": 1.7835, "step": 35 }, { "epoch": 0.008742107819329771, "grad_norm": 0.5825766921043396, "learning_rate": 0.0001, "loss": 1.8372, "step": 36 }, { "epoch": 0.008984944147644487, "grad_norm": 0.5782750248908997, "learning_rate": 0.0001, "loss": 1.8552, "step": 37 }, { "epoch": 0.009227780475959204, "grad_norm": 0.6544367074966431, "learning_rate": 0.0001, "loss": 2.0532, "step": 38 }, { "epoch": 0.00947061680427392, "grad_norm": 0.5874798893928528, "learning_rate": 0.0001, "loss": 1.7794, "step": 39 }, { "epoch": 0.009713453132588635, "grad_norm": 0.5625186562538147, "learning_rate": 0.0001, "loss": 1.8103, "step": 40 }, { "epoch": 0.00995628946090335, "grad_norm": 0.5761361122131348, "learning_rate": 0.0001, "loss": 1.8301, "step": 41 }, { "epoch": 0.010199125789218067, "grad_norm": 0.5836958885192871, "learning_rate": 0.0001, "loss": 1.6259, "step": 42 }, { "epoch": 0.010441962117532783, "grad_norm": 0.5922273397445679, "learning_rate": 0.0001, "loss": 1.9372, "step": 43 }, { "epoch": 0.010684798445847498, "grad_norm": 0.6721519827842712, "learning_rate": 0.0001, "loss": 1.9111, "step": 44 }, { "epoch": 0.010927634774162214, "grad_norm": 0.537930428981781, "learning_rate": 0.0001, "loss": 1.6284, "step": 45 }, { "epoch": 0.011170471102476931, "grad_norm": 0.575630247592926, "learning_rate": 0.0001, "loss": 1.8267, "step": 46 }, { "epoch": 0.011413307430791646, "grad_norm": 0.6041532754898071, "learning_rate": 0.0001, "loss": 2.0025, "step": 47 }, { "epoch": 0.011656143759106362, "grad_norm": 0.6168715357780457, "learning_rate": 0.0001, "loss": 1.9376, "step": 48 }, { "epoch": 0.011898980087421079, "grad_norm": 0.5767819285392761, "learning_rate": 0.0001, "loss": 1.8109, "step": 49 }, { "epoch": 0.012141816415735794, "grad_norm": 0.5658189058303833, "learning_rate": 0.0001, "loss": 1.8165, "step": 50 }, { "epoch": 0.01238465274405051, "grad_norm": 0.5590968132019043, "learning_rate": 0.0001, "loss": 1.7482, "step": 51 }, { "epoch": 0.012627489072365225, "grad_norm": 0.5551646947860718, "learning_rate": 0.0001, "loss": 1.7938, "step": 52 }, { "epoch": 0.012870325400679943, "grad_norm": 0.5823336243629456, "learning_rate": 0.0001, "loss": 1.8563, "step": 53 }, { "epoch": 0.013113161728994658, "grad_norm": 0.603715181350708, "learning_rate": 0.0001, "loss": 1.7271, "step": 54 }, { "epoch": 0.013355998057309373, "grad_norm": 0.657590925693512, "learning_rate": 0.0001, "loss": 2.0677, "step": 55 }, { "epoch": 0.013598834385624089, "grad_norm": 0.5561121702194214, "learning_rate": 0.0001, "loss": 1.7211, "step": 56 }, { "epoch": 0.013841670713938806, "grad_norm": 0.591545581817627, "learning_rate": 0.0001, "loss": 1.9068, "step": 57 }, { "epoch": 0.014084507042253521, "grad_norm": 0.604915201663971, "learning_rate": 0.0001, "loss": 1.8007, "step": 58 }, { "epoch": 0.014327343370568237, "grad_norm": 0.5310607552528381, "learning_rate": 0.0001, "loss": 1.8028, "step": 59 }, { "epoch": 0.014570179698882952, "grad_norm": 0.5268518328666687, "learning_rate": 0.0001, "loss": 1.7562, "step": 60 }, { "epoch": 0.01481301602719767, "grad_norm": 0.5638241767883301, "learning_rate": 0.0001, "loss": 1.7515, "step": 61 }, { "epoch": 0.015055852355512385, "grad_norm": 0.5837224125862122, "learning_rate": 0.0001, "loss": 1.8159, "step": 62 }, { "epoch": 0.0152986886838271, "grad_norm": 0.5646676421165466, "learning_rate": 0.0001, "loss": 1.8585, "step": 63 }, { "epoch": 0.015541525012141816, "grad_norm": 0.5547434687614441, "learning_rate": 0.0001, "loss": 1.745, "step": 64 }, { "epoch": 0.01578436134045653, "grad_norm": 0.6039091944694519, "learning_rate": 0.0001, "loss": 1.8769, "step": 65 }, { "epoch": 0.016027197668771247, "grad_norm": 0.5551029443740845, "learning_rate": 0.0001, "loss": 1.7518, "step": 66 }, { "epoch": 0.016270033997085966, "grad_norm": 0.5494778752326965, "learning_rate": 0.0001, "loss": 1.8766, "step": 67 }, { "epoch": 0.01651287032540068, "grad_norm": 0.5915890336036682, "learning_rate": 0.0001, "loss": 1.7991, "step": 68 }, { "epoch": 0.016755706653715396, "grad_norm": 0.599169909954071, "learning_rate": 0.0001, "loss": 2.0, "step": 69 }, { "epoch": 0.016998542982030112, "grad_norm": 0.587382972240448, "learning_rate": 0.0001, "loss": 1.8536, "step": 70 }, { "epoch": 0.017241379310344827, "grad_norm": 0.5702838897705078, "learning_rate": 0.0001, "loss": 1.8945, "step": 71 }, { "epoch": 0.017484215638659543, "grad_norm": 0.5253711342811584, "learning_rate": 0.0001, "loss": 1.7486, "step": 72 }, { "epoch": 0.017727051966974258, "grad_norm": 0.5934901237487793, "learning_rate": 0.0001, "loss": 1.7831, "step": 73 }, { "epoch": 0.017969888295288974, "grad_norm": 0.5517677068710327, "learning_rate": 0.0001, "loss": 1.9623, "step": 74 }, { "epoch": 0.018212724623603693, "grad_norm": 0.6004043221473694, "learning_rate": 0.0001, "loss": 2.0, "step": 75 }, { "epoch": 0.018455560951918408, "grad_norm": 0.5316723585128784, "learning_rate": 0.0001, "loss": 1.7527, "step": 76 }, { "epoch": 0.018698397280233123, "grad_norm": 0.5355283617973328, "learning_rate": 0.0001, "loss": 1.7888, "step": 77 }, { "epoch": 0.01894123360854784, "grad_norm": 0.5570546388626099, "learning_rate": 0.0001, "loss": 1.6755, "step": 78 }, { "epoch": 0.019184069936862554, "grad_norm": 0.5318782925605774, "learning_rate": 0.0001, "loss": 1.7192, "step": 79 }, { "epoch": 0.01942690626517727, "grad_norm": 0.5251316428184509, "learning_rate": 0.0001, "loss": 1.7928, "step": 80 }, { "epoch": 0.019669742593491985, "grad_norm": 0.5667200088500977, "learning_rate": 0.0001, "loss": 1.8848, "step": 81 }, { "epoch": 0.0199125789218067, "grad_norm": 0.5833965539932251, "learning_rate": 0.0001, "loss": 2.0491, "step": 82 }, { "epoch": 0.02015541525012142, "grad_norm": 0.5401152968406677, "learning_rate": 0.0001, "loss": 1.7854, "step": 83 }, { "epoch": 0.020398251578436135, "grad_norm": 0.5811483263969421, "learning_rate": 0.0001, "loss": 1.8976, "step": 84 }, { "epoch": 0.02064108790675085, "grad_norm": 0.550742506980896, "learning_rate": 0.0001, "loss": 1.7833, "step": 85 }, { "epoch": 0.020883924235065566, "grad_norm": 0.5710833668708801, "learning_rate": 0.0001, "loss": 1.8834, "step": 86 }, { "epoch": 0.02112676056338028, "grad_norm": 0.5878604650497437, "learning_rate": 0.0001, "loss": 1.8921, "step": 87 }, { "epoch": 0.021369596891694997, "grad_norm": 0.5854058861732483, "learning_rate": 0.0001, "loss": 1.8416, "step": 88 }, { "epoch": 0.021612433220009712, "grad_norm": 0.552087664604187, "learning_rate": 0.0001, "loss": 1.8377, "step": 89 }, { "epoch": 0.021855269548324428, "grad_norm": 0.5610334873199463, "learning_rate": 0.0001, "loss": 1.8038, "step": 90 }, { "epoch": 0.022098105876639147, "grad_norm": 0.5685205459594727, "learning_rate": 0.0001, "loss": 1.763, "step": 91 }, { "epoch": 0.022340942204953862, "grad_norm": 0.5359020233154297, "learning_rate": 0.0001, "loss": 1.7428, "step": 92 }, { "epoch": 0.022583778533268577, "grad_norm": 0.5649325251579285, "learning_rate": 0.0001, "loss": 1.922, "step": 93 }, { "epoch": 0.022826614861583293, "grad_norm": 0.6011147499084473, "learning_rate": 0.0001, "loss": 1.8993, "step": 94 }, { "epoch": 0.02306945118989801, "grad_norm": 0.5482878088951111, "learning_rate": 0.0001, "loss": 1.7119, "step": 95 }, { "epoch": 0.023312287518212724, "grad_norm": 0.5836300253868103, "learning_rate": 0.0001, "loss": 1.8026, "step": 96 }, { "epoch": 0.02355512384652744, "grad_norm": 0.6109923124313354, "learning_rate": 0.0001, "loss": 1.9133, "step": 97 }, { "epoch": 0.023797960174842158, "grad_norm": 0.5888505578041077, "learning_rate": 0.0001, "loss": 1.6822, "step": 98 }, { "epoch": 0.024040796503156873, "grad_norm": 0.6011204719543457, "learning_rate": 0.0001, "loss": 1.8274, "step": 99 }, { "epoch": 0.02428363283147159, "grad_norm": 0.6063917279243469, "learning_rate": 0.0001, "loss": 1.8999, "step": 100 }, { "epoch": 0.024526469159786304, "grad_norm": 0.5995082259178162, "learning_rate": 0.0001, "loss": 1.8899, "step": 101 }, { "epoch": 0.02476930548810102, "grad_norm": 0.6455526351928711, "learning_rate": 0.0001, "loss": 1.8617, "step": 102 }, { "epoch": 0.025012141816415735, "grad_norm": 0.5625491142272949, "learning_rate": 0.0001, "loss": 1.8013, "step": 103 }, { "epoch": 0.02525497814473045, "grad_norm": 0.5394976139068604, "learning_rate": 0.0001, "loss": 1.4962, "step": 104 }, { "epoch": 0.025497814473045166, "grad_norm": 0.6297885179519653, "learning_rate": 0.0001, "loss": 1.7555, "step": 105 }, { "epoch": 0.025740650801359885, "grad_norm": 0.5606225728988647, "learning_rate": 0.0001, "loss": 1.7833, "step": 106 }, { "epoch": 0.0259834871296746, "grad_norm": 0.603681206703186, "learning_rate": 0.0001, "loss": 2.0088, "step": 107 }, { "epoch": 0.026226323457989316, "grad_norm": 0.6018767952919006, "learning_rate": 0.0001, "loss": 1.7168, "step": 108 }, { "epoch": 0.02646915978630403, "grad_norm": 0.601502001285553, "learning_rate": 0.0001, "loss": 1.9289, "step": 109 }, { "epoch": 0.026711996114618747, "grad_norm": 0.5400576591491699, "learning_rate": 0.0001, "loss": 1.798, "step": 110 }, { "epoch": 0.026954832442933462, "grad_norm": 0.5526056885719299, "learning_rate": 0.0001, "loss": 1.6961, "step": 111 }, { "epoch": 0.027197668771248178, "grad_norm": 0.5839977264404297, "learning_rate": 0.0001, "loss": 1.8279, "step": 112 }, { "epoch": 0.027440505099562893, "grad_norm": 0.5751993656158447, "learning_rate": 0.0001, "loss": 1.8064, "step": 113 }, { "epoch": 0.027683341427877612, "grad_norm": 0.5579966306686401, "learning_rate": 0.0001, "loss": 1.6971, "step": 114 }, { "epoch": 0.027926177756192327, "grad_norm": 0.5659162998199463, "learning_rate": 0.0001, "loss": 1.8134, "step": 115 }, { "epoch": 0.028169014084507043, "grad_norm": 0.5924839377403259, "learning_rate": 0.0001, "loss": 1.7446, "step": 116 }, { "epoch": 0.02841185041282176, "grad_norm": 0.5279017686843872, "learning_rate": 0.0001, "loss": 1.616, "step": 117 }, { "epoch": 0.028654686741136474, "grad_norm": 0.6021360158920288, "learning_rate": 0.0001, "loss": 1.8249, "step": 118 }, { "epoch": 0.02889752306945119, "grad_norm": 0.552830696105957, "learning_rate": 0.0001, "loss": 1.7002, "step": 119 }, { "epoch": 0.029140359397765905, "grad_norm": 0.5874643325805664, "learning_rate": 0.0001, "loss": 1.8309, "step": 120 }, { "epoch": 0.02938319572608062, "grad_norm": 0.6058750152587891, "learning_rate": 0.0001, "loss": 1.8723, "step": 121 }, { "epoch": 0.02962603205439534, "grad_norm": 0.5549308061599731, "learning_rate": 0.0001, "loss": 1.811, "step": 122 }, { "epoch": 0.029868868382710054, "grad_norm": 0.5869531035423279, "learning_rate": 0.0001, "loss": 1.8061, "step": 123 }, { "epoch": 0.03011170471102477, "grad_norm": 0.5605490207672119, "learning_rate": 0.0001, "loss": 1.862, "step": 124 }, { "epoch": 0.030354541039339485, "grad_norm": 0.7070507407188416, "learning_rate": 0.0001, "loss": 1.9312, "step": 125 }, { "epoch": 0.0305973773676542, "grad_norm": 0.560182511806488, "learning_rate": 0.0001, "loss": 1.7396, "step": 126 }, { "epoch": 0.030840213695968916, "grad_norm": 0.5708908438682556, "learning_rate": 0.0001, "loss": 1.8544, "step": 127 }, { "epoch": 0.03108305002428363, "grad_norm": 0.5821275115013123, "learning_rate": 0.0001, "loss": 1.8991, "step": 128 }, { "epoch": 0.03132588635259835, "grad_norm": 0.5725240707397461, "learning_rate": 0.0001, "loss": 1.793, "step": 129 }, { "epoch": 0.03156872268091306, "grad_norm": 0.5687121748924255, "learning_rate": 0.0001, "loss": 1.9397, "step": 130 }, { "epoch": 0.03181155900922778, "grad_norm": 0.5817821621894836, "learning_rate": 0.0001, "loss": 1.9118, "step": 131 }, { "epoch": 0.03205439533754249, "grad_norm": 0.5283122658729553, "learning_rate": 0.0001, "loss": 1.8213, "step": 132 }, { "epoch": 0.03229723166585721, "grad_norm": 0.5369681715965271, "learning_rate": 0.0001, "loss": 1.7903, "step": 133 }, { "epoch": 0.03254006799417193, "grad_norm": 0.5785312652587891, "learning_rate": 0.0001, "loss": 1.9701, "step": 134 }, { "epoch": 0.03278290432248664, "grad_norm": 0.5557915568351746, "learning_rate": 0.0001, "loss": 1.7392, "step": 135 }, { "epoch": 0.03302574065080136, "grad_norm": 0.5458675622940063, "learning_rate": 0.0001, "loss": 1.7289, "step": 136 }, { "epoch": 0.033268576979116074, "grad_norm": 0.6089033484458923, "learning_rate": 0.0001, "loss": 1.9272, "step": 137 }, { "epoch": 0.03351141330743079, "grad_norm": 0.5484150648117065, "learning_rate": 0.0001, "loss": 1.6415, "step": 138 }, { "epoch": 0.033754249635745505, "grad_norm": 0.5457684397697449, "learning_rate": 0.0001, "loss": 1.7081, "step": 139 }, { "epoch": 0.033997085964060224, "grad_norm": 0.5888879895210266, "learning_rate": 0.0001, "loss": 1.7877, "step": 140 }, { "epoch": 0.03423992229237494, "grad_norm": 0.5889548063278198, "learning_rate": 0.0001, "loss": 1.9245, "step": 141 }, { "epoch": 0.034482758620689655, "grad_norm": 0.5552160143852234, "learning_rate": 0.0001, "loss": 1.8471, "step": 142 }, { "epoch": 0.034725594949004374, "grad_norm": 0.566940426826477, "learning_rate": 0.0001, "loss": 1.8551, "step": 143 }, { "epoch": 0.034968431277319086, "grad_norm": 0.5903639197349548, "learning_rate": 0.0001, "loss": 1.7737, "step": 144 }, { "epoch": 0.035211267605633804, "grad_norm": 0.5215669870376587, "learning_rate": 0.0001, "loss": 1.6737, "step": 145 }, { "epoch": 0.035454103933948516, "grad_norm": 0.5972316861152649, "learning_rate": 0.0001, "loss": 1.9133, "step": 146 }, { "epoch": 0.035696940262263235, "grad_norm": 0.5835139751434326, "learning_rate": 0.0001, "loss": 1.8421, "step": 147 }, { "epoch": 0.03593977659057795, "grad_norm": 0.5471585988998413, "learning_rate": 0.0001, "loss": 1.7915, "step": 148 }, { "epoch": 0.036182612918892666, "grad_norm": 0.5762492418289185, "learning_rate": 0.0001, "loss": 1.9026, "step": 149 }, { "epoch": 0.036425449247207385, "grad_norm": 0.5917050838470459, "learning_rate": 0.0001, "loss": 1.9228, "step": 150 }, { "epoch": 0.0366682855755221, "grad_norm": 0.5515230298042297, "learning_rate": 0.0001, "loss": 1.7392, "step": 151 }, { "epoch": 0.036911121903836816, "grad_norm": 0.5346499085426331, "learning_rate": 0.0001, "loss": 1.6875, "step": 152 }, { "epoch": 0.03715395823215153, "grad_norm": 0.5266364812850952, "learning_rate": 0.0001, "loss": 1.718, "step": 153 }, { "epoch": 0.03739679456046625, "grad_norm": 0.5427219867706299, "learning_rate": 0.0001, "loss": 1.8072, "step": 154 }, { "epoch": 0.03763963088878096, "grad_norm": 0.5867930054664612, "learning_rate": 0.0001, "loss": 1.7706, "step": 155 }, { "epoch": 0.03788246721709568, "grad_norm": 0.5999542474746704, "learning_rate": 0.0001, "loss": 1.8921, "step": 156 }, { "epoch": 0.0381253035454104, "grad_norm": 0.5528079271316528, "learning_rate": 0.0001, "loss": 1.6512, "step": 157 }, { "epoch": 0.03836813987372511, "grad_norm": 0.580348014831543, "learning_rate": 0.0001, "loss": 1.9681, "step": 158 }, { "epoch": 0.03861097620203983, "grad_norm": 0.6032450795173645, "learning_rate": 0.0001, "loss": 1.7902, "step": 159 }, { "epoch": 0.03885381253035454, "grad_norm": 0.5768253803253174, "learning_rate": 0.0001, "loss": 1.8762, "step": 160 }, { "epoch": 0.03909664885866926, "grad_norm": 0.5588753819465637, "learning_rate": 0.0001, "loss": 1.7458, "step": 161 }, { "epoch": 0.03933948518698397, "grad_norm": 0.5667343139648438, "learning_rate": 0.0001, "loss": 1.7809, "step": 162 }, { "epoch": 0.03958232151529869, "grad_norm": 0.5831610560417175, "learning_rate": 0.0001, "loss": 1.9095, "step": 163 }, { "epoch": 0.0398251578436134, "grad_norm": 0.5786190032958984, "learning_rate": 0.0001, "loss": 1.7167, "step": 164 }, { "epoch": 0.04006799417192812, "grad_norm": 0.5616288185119629, "learning_rate": 0.0001, "loss": 1.8568, "step": 165 }, { "epoch": 0.04031083050024284, "grad_norm": 0.5448461771011353, "learning_rate": 0.0001, "loss": 1.8251, "step": 166 }, { "epoch": 0.04055366682855755, "grad_norm": 0.5922189354896545, "learning_rate": 0.0001, "loss": 1.8343, "step": 167 }, { "epoch": 0.04079650315687227, "grad_norm": 0.5826832056045532, "learning_rate": 0.0001, "loss": 1.8375, "step": 168 }, { "epoch": 0.04103933948518698, "grad_norm": 0.546203076839447, "learning_rate": 0.0001, "loss": 1.8048, "step": 169 }, { "epoch": 0.0412821758135017, "grad_norm": 0.5722417831420898, "learning_rate": 0.0001, "loss": 1.709, "step": 170 }, { "epoch": 0.04152501214181641, "grad_norm": 0.5691736936569214, "learning_rate": 0.0001, "loss": 1.8391, "step": 171 }, { "epoch": 0.04176784847013113, "grad_norm": 0.5580527186393738, "learning_rate": 0.0001, "loss": 1.7919, "step": 172 }, { "epoch": 0.04201068479844585, "grad_norm": 0.5881775617599487, "learning_rate": 0.0001, "loss": 1.9728, "step": 173 }, { "epoch": 0.04225352112676056, "grad_norm": 0.5363543033599854, "learning_rate": 0.0001, "loss": 1.6842, "step": 174 }, { "epoch": 0.04249635745507528, "grad_norm": 0.5504969954490662, "learning_rate": 0.0001, "loss": 1.7332, "step": 175 }, { "epoch": 0.042739193783389993, "grad_norm": 0.5678769946098328, "learning_rate": 0.0001, "loss": 1.8638, "step": 176 }, { "epoch": 0.04298203011170471, "grad_norm": 0.5707623958587646, "learning_rate": 0.0001, "loss": 1.7592, "step": 177 }, { "epoch": 0.043224866440019424, "grad_norm": 0.5393390655517578, "learning_rate": 0.0001, "loss": 1.7732, "step": 178 }, { "epoch": 0.04346770276833414, "grad_norm": 0.5405838489532471, "learning_rate": 0.0001, "loss": 1.6924, "step": 179 }, { "epoch": 0.043710539096648855, "grad_norm": 0.5595617294311523, "learning_rate": 0.0001, "loss": 1.7885, "step": 180 }, { "epoch": 0.043953375424963574, "grad_norm": 0.5614048838615417, "learning_rate": 0.0001, "loss": 1.8507, "step": 181 }, { "epoch": 0.04419621175327829, "grad_norm": 0.5554660558700562, "learning_rate": 0.0001, "loss": 1.8694, "step": 182 }, { "epoch": 0.044439048081593005, "grad_norm": 0.5462179780006409, "learning_rate": 0.0001, "loss": 1.828, "step": 183 }, { "epoch": 0.044681884409907724, "grad_norm": 0.5049857497215271, "learning_rate": 0.0001, "loss": 1.494, "step": 184 }, { "epoch": 0.044924720738222436, "grad_norm": 0.5400852560997009, "learning_rate": 0.0001, "loss": 1.8124, "step": 185 }, { "epoch": 0.045167557066537155, "grad_norm": 0.5604100227355957, "learning_rate": 0.0001, "loss": 1.9107, "step": 186 }, { "epoch": 0.04541039339485187, "grad_norm": 0.5613305568695068, "learning_rate": 0.0001, "loss": 1.652, "step": 187 }, { "epoch": 0.045653229723166586, "grad_norm": 0.5521450042724609, "learning_rate": 0.0001, "loss": 1.7774, "step": 188 }, { "epoch": 0.045896066051481305, "grad_norm": 0.558240532875061, "learning_rate": 0.0001, "loss": 1.6898, "step": 189 }, { "epoch": 0.04613890237979602, "grad_norm": 0.5644837021827698, "learning_rate": 0.0001, "loss": 1.6929, "step": 190 }, { "epoch": 0.046381738708110735, "grad_norm": 0.5644278526306152, "learning_rate": 0.0001, "loss": 1.8844, "step": 191 }, { "epoch": 0.04662457503642545, "grad_norm": 0.529413640499115, "learning_rate": 0.0001, "loss": 1.5737, "step": 192 }, { "epoch": 0.046867411364740166, "grad_norm": 0.5736052989959717, "learning_rate": 0.0001, "loss": 1.731, "step": 193 }, { "epoch": 0.04711024769305488, "grad_norm": 0.5963498950004578, "learning_rate": 0.0001, "loss": 1.8204, "step": 194 }, { "epoch": 0.0473530840213696, "grad_norm": 0.5892695188522339, "learning_rate": 0.0001, "loss": 1.6753, "step": 195 }, { "epoch": 0.047595920349684316, "grad_norm": 0.570740282535553, "learning_rate": 0.0001, "loss": 1.8047, "step": 196 }, { "epoch": 0.04783875667799903, "grad_norm": 0.5566459894180298, "learning_rate": 0.0001, "loss": 1.7479, "step": 197 }, { "epoch": 0.04808159300631375, "grad_norm": 0.5618220567703247, "learning_rate": 0.0001, "loss": 1.7769, "step": 198 }, { "epoch": 0.04832442933462846, "grad_norm": 0.5499823093414307, "learning_rate": 0.0001, "loss": 1.7173, "step": 199 }, { "epoch": 0.04856726566294318, "grad_norm": 0.5597728490829468, "learning_rate": 0.0001, "loss": 1.8234, "step": 200 }, { "epoch": 0.04881010199125789, "grad_norm": 0.5423206090927124, "learning_rate": 0.0001, "loss": 1.7442, "step": 201 }, { "epoch": 0.04905293831957261, "grad_norm": 0.59138023853302, "learning_rate": 0.0001, "loss": 1.932, "step": 202 }, { "epoch": 0.04929577464788732, "grad_norm": 0.57453852891922, "learning_rate": 0.0001, "loss": 1.7707, "step": 203 }, { "epoch": 0.04953861097620204, "grad_norm": 0.5949112176895142, "learning_rate": 0.0001, "loss": 2.0603, "step": 204 }, { "epoch": 0.04978144730451676, "grad_norm": 0.5683001279830933, "learning_rate": 0.0001, "loss": 1.8341, "step": 205 }, { "epoch": 0.05002428363283147, "grad_norm": 0.511799156665802, "learning_rate": 0.0001, "loss": 1.6752, "step": 206 }, { "epoch": 0.05026711996114619, "grad_norm": 0.5489233732223511, "learning_rate": 0.0001, "loss": 1.7677, "step": 207 }, { "epoch": 0.0505099562894609, "grad_norm": 0.5438036322593689, "learning_rate": 0.0001, "loss": 1.7345, "step": 208 }, { "epoch": 0.05075279261777562, "grad_norm": 0.5517812967300415, "learning_rate": 0.0001, "loss": 1.7707, "step": 209 }, { "epoch": 0.05099562894609033, "grad_norm": 0.5675941109657288, "learning_rate": 0.0001, "loss": 1.776, "step": 210 }, { "epoch": 0.05123846527440505, "grad_norm": 0.5526789426803589, "learning_rate": 0.0001, "loss": 1.9368, "step": 211 }, { "epoch": 0.05148130160271977, "grad_norm": 0.5648570656776428, "learning_rate": 0.0001, "loss": 1.647, "step": 212 }, { "epoch": 0.05172413793103448, "grad_norm": 0.5832803249359131, "learning_rate": 0.0001, "loss": 1.8035, "step": 213 }, { "epoch": 0.0519669742593492, "grad_norm": 0.5673675537109375, "learning_rate": 0.0001, "loss": 1.7283, "step": 214 }, { "epoch": 0.05220981058766391, "grad_norm": 0.5454904437065125, "learning_rate": 0.0001, "loss": 1.8284, "step": 215 }, { "epoch": 0.05245264691597863, "grad_norm": 0.6127611398696899, "learning_rate": 0.0001, "loss": 1.9195, "step": 216 }, { "epoch": 0.052695483244293344, "grad_norm": 0.5642631649971008, "learning_rate": 0.0001, "loss": 1.848, "step": 217 }, { "epoch": 0.05293831957260806, "grad_norm": 0.5837909579277039, "learning_rate": 0.0001, "loss": 1.9077, "step": 218 }, { "epoch": 0.053181155900922775, "grad_norm": 0.5727541446685791, "learning_rate": 0.0001, "loss": 1.7965, "step": 219 }, { "epoch": 0.053423992229237494, "grad_norm": 0.5601940751075745, "learning_rate": 0.0001, "loss": 1.9039, "step": 220 }, { "epoch": 0.05366682855755221, "grad_norm": 0.5595248341560364, "learning_rate": 0.0001, "loss": 1.8851, "step": 221 }, { "epoch": 0.053909664885866924, "grad_norm": 0.49235615134239197, "learning_rate": 0.0001, "loss": 1.6141, "step": 222 }, { "epoch": 0.05415250121418164, "grad_norm": 0.5064696073532104, "learning_rate": 0.0001, "loss": 1.6202, "step": 223 }, { "epoch": 0.054395337542496355, "grad_norm": 0.5605242848396301, "learning_rate": 0.0001, "loss": 1.8623, "step": 224 }, { "epoch": 0.054638173870811074, "grad_norm": 0.5589006543159485, "learning_rate": 0.0001, "loss": 1.6309, "step": 225 }, { "epoch": 0.054881010199125786, "grad_norm": 0.5964434146881104, "learning_rate": 0.0001, "loss": 1.8167, "step": 226 }, { "epoch": 0.055123846527440505, "grad_norm": 0.5265910625457764, "learning_rate": 0.0001, "loss": 1.7182, "step": 227 }, { "epoch": 0.055366682855755224, "grad_norm": 0.5511795878410339, "learning_rate": 0.0001, "loss": 1.6657, "step": 228 }, { "epoch": 0.055609519184069936, "grad_norm": 0.5586657524108887, "learning_rate": 0.0001, "loss": 1.7889, "step": 229 }, { "epoch": 0.055852355512384655, "grad_norm": 0.5953067541122437, "learning_rate": 0.0001, "loss": 1.8374, "step": 230 }, { "epoch": 0.05609519184069937, "grad_norm": 0.5542361736297607, "learning_rate": 0.0001, "loss": 1.7509, "step": 231 }, { "epoch": 0.056338028169014086, "grad_norm": 0.5865808129310608, "learning_rate": 0.0001, "loss": 1.9627, "step": 232 }, { "epoch": 0.0565808644973288, "grad_norm": 0.514796793460846, "learning_rate": 0.0001, "loss": 1.5503, "step": 233 }, { "epoch": 0.05682370082564352, "grad_norm": 0.5481152534484863, "learning_rate": 0.0001, "loss": 1.8672, "step": 234 }, { "epoch": 0.057066537153958236, "grad_norm": 0.49685022234916687, "learning_rate": 0.0001, "loss": 1.5859, "step": 235 }, { "epoch": 0.05730937348227295, "grad_norm": 0.6000615954399109, "learning_rate": 0.0001, "loss": 1.9551, "step": 236 }, { "epoch": 0.057552209810587666, "grad_norm": 0.5260239839553833, "learning_rate": 0.0001, "loss": 1.8325, "step": 237 }, { "epoch": 0.05779504613890238, "grad_norm": 0.5536266565322876, "learning_rate": 0.0001, "loss": 1.884, "step": 238 }, { "epoch": 0.0580378824672171, "grad_norm": 0.5461103916168213, "learning_rate": 0.0001, "loss": 1.6596, "step": 239 }, { "epoch": 0.05828071879553181, "grad_norm": 0.5518085360527039, "learning_rate": 0.0001, "loss": 1.8653, "step": 240 }, { "epoch": 0.05852355512384653, "grad_norm": 0.5721222162246704, "learning_rate": 0.0001, "loss": 1.874, "step": 241 }, { "epoch": 0.05876639145216124, "grad_norm": 0.5601527690887451, "learning_rate": 0.0001, "loss": 1.8091, "step": 242 }, { "epoch": 0.05900922778047596, "grad_norm": 0.5886834263801575, "learning_rate": 0.0001, "loss": 1.9075, "step": 243 }, { "epoch": 0.05925206410879068, "grad_norm": 0.5505769848823547, "learning_rate": 0.0001, "loss": 1.7758, "step": 244 }, { "epoch": 0.05949490043710539, "grad_norm": 0.5973044037818909, "learning_rate": 0.0001, "loss": 1.8534, "step": 245 }, { "epoch": 0.05973773676542011, "grad_norm": 0.5689461827278137, "learning_rate": 0.0001, "loss": 1.8422, "step": 246 }, { "epoch": 0.05998057309373482, "grad_norm": 0.5527995824813843, "learning_rate": 0.0001, "loss": 1.7791, "step": 247 }, { "epoch": 0.06022340942204954, "grad_norm": 0.5316581130027771, "learning_rate": 0.0001, "loss": 1.7325, "step": 248 }, { "epoch": 0.06046624575036425, "grad_norm": 0.5318779349327087, "learning_rate": 0.0001, "loss": 1.7863, "step": 249 }, { "epoch": 0.06070908207867897, "grad_norm": 0.5586565136909485, "learning_rate": 0.0001, "loss": 1.7041, "step": 250 }, { "epoch": 0.06095191840699369, "grad_norm": 0.58368980884552, "learning_rate": 0.0001, "loss": 1.8094, "step": 251 }, { "epoch": 0.0611947547353084, "grad_norm": 0.5369036793708801, "learning_rate": 0.0001, "loss": 1.8643, "step": 252 }, { "epoch": 0.06143759106362312, "grad_norm": 0.5384863615036011, "learning_rate": 0.0001, "loss": 1.615, "step": 253 }, { "epoch": 0.06168042739193783, "grad_norm": 0.5588333606719971, "learning_rate": 0.0001, "loss": 1.8526, "step": 254 }, { "epoch": 0.06192326372025255, "grad_norm": 0.5602843761444092, "learning_rate": 0.0001, "loss": 1.9226, "step": 255 }, { "epoch": 0.06216610004856726, "grad_norm": 0.5340338349342346, "learning_rate": 0.0001, "loss": 1.6703, "step": 256 }, { "epoch": 0.06240893637688198, "grad_norm": 0.5598003268241882, "learning_rate": 0.0001, "loss": 1.7225, "step": 257 }, { "epoch": 0.0626517727051967, "grad_norm": 0.5247579216957092, "learning_rate": 0.0001, "loss": 1.7159, "step": 258 }, { "epoch": 0.06289460903351142, "grad_norm": 0.5494512319564819, "learning_rate": 0.0001, "loss": 1.6822, "step": 259 }, { "epoch": 0.06313744536182612, "grad_norm": 0.5570547580718994, "learning_rate": 0.0001, "loss": 1.8625, "step": 260 }, { "epoch": 0.06338028169014084, "grad_norm": 0.5337409973144531, "learning_rate": 0.0001, "loss": 1.674, "step": 261 }, { "epoch": 0.06362311801845556, "grad_norm": 0.5944165587425232, "learning_rate": 0.0001, "loss": 1.9068, "step": 262 }, { "epoch": 0.06386595434677028, "grad_norm": 0.5736029148101807, "learning_rate": 0.0001, "loss": 1.7135, "step": 263 }, { "epoch": 0.06410879067508499, "grad_norm": 0.5729188323020935, "learning_rate": 0.0001, "loss": 1.8619, "step": 264 }, { "epoch": 0.0643516270033997, "grad_norm": 0.5908741354942322, "learning_rate": 0.0001, "loss": 1.7791, "step": 265 }, { "epoch": 0.06459446333171442, "grad_norm": 0.550342857837677, "learning_rate": 0.0001, "loss": 1.722, "step": 266 }, { "epoch": 0.06483729966002914, "grad_norm": 0.5404797792434692, "learning_rate": 0.0001, "loss": 1.8185, "step": 267 }, { "epoch": 0.06508013598834386, "grad_norm": 0.6320937275886536, "learning_rate": 0.0001, "loss": 1.8696, "step": 268 }, { "epoch": 0.06532297231665857, "grad_norm": 0.5689648389816284, "learning_rate": 0.0001, "loss": 1.9712, "step": 269 }, { "epoch": 0.06556580864497329, "grad_norm": 0.5600985288619995, "learning_rate": 0.0001, "loss": 1.8229, "step": 270 }, { "epoch": 0.065808644973288, "grad_norm": 0.6124951243400574, "learning_rate": 0.0001, "loss": 1.8365, "step": 271 }, { "epoch": 0.06605148130160272, "grad_norm": 0.6249804496765137, "learning_rate": 0.0001, "loss": 1.8418, "step": 272 }, { "epoch": 0.06629431762991743, "grad_norm": 0.5479178428649902, "learning_rate": 0.0001, "loss": 1.7385, "step": 273 }, { "epoch": 0.06653715395823215, "grad_norm": 0.562118411064148, "learning_rate": 0.0001, "loss": 1.6261, "step": 274 }, { "epoch": 0.06677999028654687, "grad_norm": 0.5987095832824707, "learning_rate": 0.0001, "loss": 2.0492, "step": 275 }, { "epoch": 0.06702282661486159, "grad_norm": 0.5354698896408081, "learning_rate": 0.0001, "loss": 1.6679, "step": 276 }, { "epoch": 0.0672656629431763, "grad_norm": 0.5242313742637634, "learning_rate": 0.0001, "loss": 1.6637, "step": 277 }, { "epoch": 0.06750849927149101, "grad_norm": 0.6085278391838074, "learning_rate": 0.0001, "loss": 1.8389, "step": 278 }, { "epoch": 0.06775133559980573, "grad_norm": 0.592673659324646, "learning_rate": 0.0001, "loss": 1.7553, "step": 279 }, { "epoch": 0.06799417192812045, "grad_norm": 0.6025213599205017, "learning_rate": 0.0001, "loss": 1.9998, "step": 280 }, { "epoch": 0.06823700825643517, "grad_norm": 0.5785611867904663, "learning_rate": 0.0001, "loss": 1.8031, "step": 281 }, { "epoch": 0.06847984458474989, "grad_norm": 0.5544794797897339, "learning_rate": 0.0001, "loss": 1.7671, "step": 282 }, { "epoch": 0.06872268091306459, "grad_norm": 0.5426813364028931, "learning_rate": 0.0001, "loss": 1.7511, "step": 283 }, { "epoch": 0.06896551724137931, "grad_norm": 0.6183847784996033, "learning_rate": 0.0001, "loss": 1.805, "step": 284 }, { "epoch": 0.06920835356969403, "grad_norm": 0.5462707877159119, "learning_rate": 0.0001, "loss": 1.7911, "step": 285 }, { "epoch": 0.06945118989800875, "grad_norm": 0.5737053155899048, "learning_rate": 0.0001, "loss": 1.6621, "step": 286 }, { "epoch": 0.06969402622632345, "grad_norm": 0.6043452620506287, "learning_rate": 0.0001, "loss": 1.8558, "step": 287 }, { "epoch": 0.06993686255463817, "grad_norm": 0.606366753578186, "learning_rate": 0.0001, "loss": 2.0038, "step": 288 }, { "epoch": 0.07017969888295289, "grad_norm": 0.5447037220001221, "learning_rate": 0.0001, "loss": 1.8431, "step": 289 }, { "epoch": 0.07042253521126761, "grad_norm": 0.5541792511940002, "learning_rate": 0.0001, "loss": 1.6519, "step": 290 }, { "epoch": 0.07066537153958233, "grad_norm": 0.5450261831283569, "learning_rate": 0.0001, "loss": 1.8592, "step": 291 }, { "epoch": 0.07090820786789703, "grad_norm": 0.5191848874092102, "learning_rate": 0.0001, "loss": 1.7219, "step": 292 }, { "epoch": 0.07115104419621175, "grad_norm": 0.5771126747131348, "learning_rate": 0.0001, "loss": 1.7581, "step": 293 }, { "epoch": 0.07139388052452647, "grad_norm": 0.550542414188385, "learning_rate": 0.0001, "loss": 1.7619, "step": 294 }, { "epoch": 0.07163671685284119, "grad_norm": 0.5441504716873169, "learning_rate": 0.0001, "loss": 1.8268, "step": 295 }, { "epoch": 0.0718795531811559, "grad_norm": 0.5738163590431213, "learning_rate": 0.0001, "loss": 1.7749, "step": 296 }, { "epoch": 0.07212238950947061, "grad_norm": 0.5475561022758484, "learning_rate": 0.0001, "loss": 1.7159, "step": 297 }, { "epoch": 0.07236522583778533, "grad_norm": 0.5339605808258057, "learning_rate": 0.0001, "loss": 1.7179, "step": 298 }, { "epoch": 0.07260806216610005, "grad_norm": 0.5403867363929749, "learning_rate": 0.0001, "loss": 1.7337, "step": 299 }, { "epoch": 0.07285089849441477, "grad_norm": 0.546708345413208, "learning_rate": 0.0001, "loss": 1.7792, "step": 300 }, { "epoch": 0.07309373482272948, "grad_norm": 0.5604628920555115, "learning_rate": 0.0001, "loss": 1.8536, "step": 301 }, { "epoch": 0.0733365711510442, "grad_norm": 0.5335520505905151, "learning_rate": 0.0001, "loss": 1.6597, "step": 302 }, { "epoch": 0.07357940747935891, "grad_norm": 0.5464479327201843, "learning_rate": 0.0001, "loss": 1.7763, "step": 303 }, { "epoch": 0.07382224380767363, "grad_norm": 0.5682089328765869, "learning_rate": 0.0001, "loss": 1.8342, "step": 304 }, { "epoch": 0.07406508013598834, "grad_norm": 0.5149857401847839, "learning_rate": 0.0001, "loss": 1.5388, "step": 305 }, { "epoch": 0.07430791646430306, "grad_norm": 0.5400729179382324, "learning_rate": 0.0001, "loss": 1.8352, "step": 306 }, { "epoch": 0.07455075279261777, "grad_norm": 0.5587572455406189, "learning_rate": 0.0001, "loss": 1.8093, "step": 307 }, { "epoch": 0.0747935891209325, "grad_norm": 0.5496796369552612, "learning_rate": 0.0001, "loss": 1.8077, "step": 308 }, { "epoch": 0.07503642544924721, "grad_norm": 0.5394822359085083, "learning_rate": 0.0001, "loss": 1.7357, "step": 309 }, { "epoch": 0.07527926177756192, "grad_norm": 0.5829740166664124, "learning_rate": 0.0001, "loss": 1.8976, "step": 310 }, { "epoch": 0.07552209810587664, "grad_norm": 0.5242211818695068, "learning_rate": 0.0001, "loss": 1.7181, "step": 311 }, { "epoch": 0.07576493443419136, "grad_norm": 0.5243309140205383, "learning_rate": 0.0001, "loss": 1.6823, "step": 312 }, { "epoch": 0.07600777076250607, "grad_norm": 0.5284510254859924, "learning_rate": 0.0001, "loss": 1.6511, "step": 313 }, { "epoch": 0.0762506070908208, "grad_norm": 0.5880502462387085, "learning_rate": 0.0001, "loss": 1.8045, "step": 314 }, { "epoch": 0.0764934434191355, "grad_norm": 0.5681189298629761, "learning_rate": 0.0001, "loss": 1.8092, "step": 315 }, { "epoch": 0.07673627974745022, "grad_norm": 0.5528023838996887, "learning_rate": 0.0001, "loss": 1.7219, "step": 316 }, { "epoch": 0.07697911607576494, "grad_norm": 0.545785665512085, "learning_rate": 0.0001, "loss": 1.817, "step": 317 }, { "epoch": 0.07722195240407966, "grad_norm": 0.5269185304641724, "learning_rate": 0.0001, "loss": 1.6898, "step": 318 }, { "epoch": 0.07746478873239436, "grad_norm": 0.5679982304573059, "learning_rate": 0.0001, "loss": 1.8103, "step": 319 }, { "epoch": 0.07770762506070908, "grad_norm": 0.5480614900588989, "learning_rate": 0.0001, "loss": 1.6871, "step": 320 }, { "epoch": 0.0779504613890238, "grad_norm": 0.5294215679168701, "learning_rate": 0.0001, "loss": 1.7389, "step": 321 }, { "epoch": 0.07819329771733852, "grad_norm": 0.5534301400184631, "learning_rate": 0.0001, "loss": 1.7553, "step": 322 }, { "epoch": 0.07843613404565324, "grad_norm": 0.5598313808441162, "learning_rate": 0.0001, "loss": 1.6454, "step": 323 }, { "epoch": 0.07867897037396794, "grad_norm": 0.5626444816589355, "learning_rate": 0.0001, "loss": 1.8142, "step": 324 }, { "epoch": 0.07892180670228266, "grad_norm": 0.528076708316803, "learning_rate": 0.0001, "loss": 1.7058, "step": 325 }, { "epoch": 0.07916464303059738, "grad_norm": 0.5425437092781067, "learning_rate": 0.0001, "loss": 1.7188, "step": 326 }, { "epoch": 0.0794074793589121, "grad_norm": 0.5973831415176392, "learning_rate": 0.0001, "loss": 1.8852, "step": 327 }, { "epoch": 0.0796503156872268, "grad_norm": 0.5841144919395447, "learning_rate": 0.0001, "loss": 1.8122, "step": 328 }, { "epoch": 0.07989315201554152, "grad_norm": 0.5223991274833679, "learning_rate": 0.0001, "loss": 1.6088, "step": 329 }, { "epoch": 0.08013598834385624, "grad_norm": 0.573898434638977, "learning_rate": 0.0001, "loss": 1.8858, "step": 330 }, { "epoch": 0.08037882467217096, "grad_norm": 0.5554970502853394, "learning_rate": 0.0001, "loss": 1.7687, "step": 331 }, { "epoch": 0.08062166100048568, "grad_norm": 0.5309914350509644, "learning_rate": 0.0001, "loss": 1.7079, "step": 332 }, { "epoch": 0.08086449732880038, "grad_norm": 0.5667017698287964, "learning_rate": 0.0001, "loss": 1.8222, "step": 333 }, { "epoch": 0.0811073336571151, "grad_norm": 0.5442120432853699, "learning_rate": 0.0001, "loss": 1.7532, "step": 334 }, { "epoch": 0.08135016998542982, "grad_norm": 0.48812708258628845, "learning_rate": 0.0001, "loss": 1.5065, "step": 335 }, { "epoch": 0.08159300631374454, "grad_norm": 0.523797333240509, "learning_rate": 0.0001, "loss": 1.7718, "step": 336 }, { "epoch": 0.08183584264205926, "grad_norm": 0.5603326559066772, "learning_rate": 0.0001, "loss": 1.835, "step": 337 }, { "epoch": 0.08207867897037396, "grad_norm": 0.5247459411621094, "learning_rate": 0.0001, "loss": 1.7186, "step": 338 }, { "epoch": 0.08232151529868868, "grad_norm": 0.5363231301307678, "learning_rate": 0.0001, "loss": 1.6271, "step": 339 }, { "epoch": 0.0825643516270034, "grad_norm": 0.5604921579360962, "learning_rate": 0.0001, "loss": 1.745, "step": 340 }, { "epoch": 0.08280718795531812, "grad_norm": 0.5731708407402039, "learning_rate": 0.0001, "loss": 1.5748, "step": 341 }, { "epoch": 0.08305002428363283, "grad_norm": 0.6106705069541931, "learning_rate": 0.0001, "loss": 1.8616, "step": 342 }, { "epoch": 0.08329286061194754, "grad_norm": 0.5862675309181213, "learning_rate": 0.0001, "loss": 1.8843, "step": 343 }, { "epoch": 0.08353569694026226, "grad_norm": 0.5550743937492371, "learning_rate": 0.0001, "loss": 1.8349, "step": 344 }, { "epoch": 0.08377853326857698, "grad_norm": 0.5295913219451904, "learning_rate": 0.0001, "loss": 1.6549, "step": 345 }, { "epoch": 0.0840213695968917, "grad_norm": 0.5335150361061096, "learning_rate": 0.0001, "loss": 1.6946, "step": 346 }, { "epoch": 0.0842642059252064, "grad_norm": 0.5739315748214722, "learning_rate": 0.0001, "loss": 1.7884, "step": 347 }, { "epoch": 0.08450704225352113, "grad_norm": 0.49940741062164307, "learning_rate": 0.0001, "loss": 1.5942, "step": 348 }, { "epoch": 0.08474987858183584, "grad_norm": 0.5813695788383484, "learning_rate": 0.0001, "loss": 1.7981, "step": 349 }, { "epoch": 0.08499271491015056, "grad_norm": 0.5490919351577759, "learning_rate": 0.0001, "loss": 1.7776, "step": 350 }, { "epoch": 0.08523555123846527, "grad_norm": 0.5911244750022888, "learning_rate": 0.0001, "loss": 1.8169, "step": 351 }, { "epoch": 0.08547838756677999, "grad_norm": 0.5575651526451111, "learning_rate": 0.0001, "loss": 1.7757, "step": 352 }, { "epoch": 0.0857212238950947, "grad_norm": 0.510899007320404, "learning_rate": 0.0001, "loss": 1.7117, "step": 353 }, { "epoch": 0.08596406022340942, "grad_norm": 0.5488718152046204, "learning_rate": 0.0001, "loss": 1.6416, "step": 354 }, { "epoch": 0.08620689655172414, "grad_norm": 0.5245040059089661, "learning_rate": 0.0001, "loss": 1.8676, "step": 355 }, { "epoch": 0.08644973288003885, "grad_norm": 0.543440043926239, "learning_rate": 0.0001, "loss": 1.6686, "step": 356 }, { "epoch": 0.08669256920835357, "grad_norm": 0.5688736438751221, "learning_rate": 0.0001, "loss": 1.7556, "step": 357 }, { "epoch": 0.08693540553666829, "grad_norm": 0.5552835464477539, "learning_rate": 0.0001, "loss": 1.6959, "step": 358 }, { "epoch": 0.087178241864983, "grad_norm": 0.5715907216072083, "learning_rate": 0.0001, "loss": 1.9301, "step": 359 }, { "epoch": 0.08742107819329771, "grad_norm": 0.5471308827400208, "learning_rate": 0.0001, "loss": 1.7324, "step": 360 }, { "epoch": 0.08766391452161243, "grad_norm": 0.5335566997528076, "learning_rate": 0.0001, "loss": 1.5471, "step": 361 }, { "epoch": 0.08790675084992715, "grad_norm": 0.5608660578727722, "learning_rate": 0.0001, "loss": 1.8098, "step": 362 }, { "epoch": 0.08814958717824187, "grad_norm": 0.5688713788986206, "learning_rate": 0.0001, "loss": 1.8482, "step": 363 }, { "epoch": 0.08839242350655659, "grad_norm": 0.5294666886329651, "learning_rate": 0.0001, "loss": 1.7772, "step": 364 }, { "epoch": 0.08863525983487129, "grad_norm": 0.5550897717475891, "learning_rate": 0.0001, "loss": 1.766, "step": 365 }, { "epoch": 0.08887809616318601, "grad_norm": 0.6038578152656555, "learning_rate": 0.0001, "loss": 1.951, "step": 366 }, { "epoch": 0.08912093249150073, "grad_norm": 0.5179253220558167, "learning_rate": 0.0001, "loss": 1.6565, "step": 367 }, { "epoch": 0.08936376881981545, "grad_norm": 0.54139643907547, "learning_rate": 0.0001, "loss": 1.8326, "step": 368 }, { "epoch": 0.08960660514813017, "grad_norm": 0.5586341619491577, "learning_rate": 0.0001, "loss": 1.858, "step": 369 }, { "epoch": 0.08984944147644487, "grad_norm": 0.5631245374679565, "learning_rate": 0.0001, "loss": 1.8237, "step": 370 }, { "epoch": 0.09009227780475959, "grad_norm": 0.536080539226532, "learning_rate": 0.0001, "loss": 1.6666, "step": 371 }, { "epoch": 0.09033511413307431, "grad_norm": 0.5671922564506531, "learning_rate": 0.0001, "loss": 1.7635, "step": 372 }, { "epoch": 0.09057795046138903, "grad_norm": 0.5378475785255432, "learning_rate": 0.0001, "loss": 1.754, "step": 373 }, { "epoch": 0.09082078678970373, "grad_norm": 0.5762247443199158, "learning_rate": 0.0001, "loss": 1.9381, "step": 374 }, { "epoch": 0.09106362311801845, "grad_norm": 0.4861982762813568, "learning_rate": 0.0001, "loss": 1.7204, "step": 375 }, { "epoch": 0.09130645944633317, "grad_norm": 0.5585159063339233, "learning_rate": 0.0001, "loss": 1.8194, "step": 376 }, { "epoch": 0.09154929577464789, "grad_norm": 0.49415215849876404, "learning_rate": 0.0001, "loss": 1.6406, "step": 377 }, { "epoch": 0.09179213210296261, "grad_norm": 0.5581274032592773, "learning_rate": 0.0001, "loss": 1.8006, "step": 378 }, { "epoch": 0.09203496843127731, "grad_norm": 0.5406607389450073, "learning_rate": 0.0001, "loss": 1.7876, "step": 379 }, { "epoch": 0.09227780475959203, "grad_norm": 0.5319144129753113, "learning_rate": 0.0001, "loss": 1.726, "step": 380 }, { "epoch": 0.09252064108790675, "grad_norm": 0.5684472918510437, "learning_rate": 0.0001, "loss": 1.7701, "step": 381 }, { "epoch": 0.09276347741622147, "grad_norm": 0.5876220464706421, "learning_rate": 0.0001, "loss": 1.8187, "step": 382 }, { "epoch": 0.09300631374453618, "grad_norm": 0.5094630122184753, "learning_rate": 0.0001, "loss": 1.7241, "step": 383 }, { "epoch": 0.0932491500728509, "grad_norm": 0.5458014607429504, "learning_rate": 0.0001, "loss": 1.8323, "step": 384 }, { "epoch": 0.09349198640116561, "grad_norm": 0.5321098566055298, "learning_rate": 0.0001, "loss": 1.7619, "step": 385 }, { "epoch": 0.09373482272948033, "grad_norm": 0.542500376701355, "learning_rate": 0.0001, "loss": 1.6506, "step": 386 }, { "epoch": 0.09397765905779505, "grad_norm": 0.5316786170005798, "learning_rate": 0.0001, "loss": 1.8175, "step": 387 }, { "epoch": 0.09422049538610976, "grad_norm": 0.5709975957870483, "learning_rate": 0.0001, "loss": 1.8854, "step": 388 }, { "epoch": 0.09446333171442448, "grad_norm": 0.5439728498458862, "learning_rate": 0.0001, "loss": 1.8345, "step": 389 }, { "epoch": 0.0947061680427392, "grad_norm": 0.5377857089042664, "learning_rate": 0.0001, "loss": 1.791, "step": 390 }, { "epoch": 0.09494900437105391, "grad_norm": 0.5467667579650879, "learning_rate": 0.0001, "loss": 1.7572, "step": 391 }, { "epoch": 0.09519184069936863, "grad_norm": 0.5458954572677612, "learning_rate": 0.0001, "loss": 1.7995, "step": 392 }, { "epoch": 0.09543467702768334, "grad_norm": 0.5365225672721863, "learning_rate": 0.0001, "loss": 1.6152, "step": 393 }, { "epoch": 0.09567751335599806, "grad_norm": 0.5744127631187439, "learning_rate": 0.0001, "loss": 1.8339, "step": 394 }, { "epoch": 0.09592034968431278, "grad_norm": 0.585131049156189, "learning_rate": 0.0001, "loss": 1.7018, "step": 395 }, { "epoch": 0.0961631860126275, "grad_norm": 0.5712559223175049, "learning_rate": 0.0001, "loss": 1.9143, "step": 396 }, { "epoch": 0.0964060223409422, "grad_norm": 0.5720378160476685, "learning_rate": 0.0001, "loss": 1.8163, "step": 397 }, { "epoch": 0.09664885866925692, "grad_norm": 0.5640956163406372, "learning_rate": 0.0001, "loss": 1.8803, "step": 398 }, { "epoch": 0.09689169499757164, "grad_norm": 0.5292410254478455, "learning_rate": 0.0001, "loss": 1.6497, "step": 399 }, { "epoch": 0.09713453132588636, "grad_norm": 0.5474023818969727, "learning_rate": 0.0001, "loss": 1.8508, "step": 400 }, { "epoch": 0.09737736765420107, "grad_norm": 0.5421199202537537, "learning_rate": 0.0001, "loss": 1.9272, "step": 401 }, { "epoch": 0.09762020398251578, "grad_norm": 0.5888630151748657, "learning_rate": 0.0001, "loss": 1.7785, "step": 402 }, { "epoch": 0.0978630403108305, "grad_norm": 0.5838062167167664, "learning_rate": 0.0001, "loss": 1.7845, "step": 403 }, { "epoch": 0.09810587663914522, "grad_norm": 0.5154374241828918, "learning_rate": 0.0001, "loss": 1.5593, "step": 404 }, { "epoch": 0.09834871296745994, "grad_norm": 0.5997213125228882, "learning_rate": 0.0001, "loss": 1.8053, "step": 405 }, { "epoch": 0.09859154929577464, "grad_norm": 0.572761058807373, "learning_rate": 0.0001, "loss": 1.8878, "step": 406 }, { "epoch": 0.09883438562408936, "grad_norm": 0.5701769590377808, "learning_rate": 0.0001, "loss": 1.9937, "step": 407 }, { "epoch": 0.09907722195240408, "grad_norm": 0.5593425035476685, "learning_rate": 0.0001, "loss": 1.8615, "step": 408 }, { "epoch": 0.0993200582807188, "grad_norm": 0.5380374193191528, "learning_rate": 0.0001, "loss": 1.8052, "step": 409 }, { "epoch": 0.09956289460903352, "grad_norm": 0.5586409568786621, "learning_rate": 0.0001, "loss": 1.9118, "step": 410 }, { "epoch": 0.09980573093734822, "grad_norm": 0.5825559496879578, "learning_rate": 0.0001, "loss": 1.9286, "step": 411 }, { "epoch": 0.10004856726566294, "grad_norm": 0.5659518837928772, "learning_rate": 0.0001, "loss": 1.9078, "step": 412 }, { "epoch": 0.10029140359397766, "grad_norm": 0.5435416102409363, "learning_rate": 0.0001, "loss": 1.7668, "step": 413 }, { "epoch": 0.10053423992229238, "grad_norm": 0.5503891706466675, "learning_rate": 0.0001, "loss": 1.9453, "step": 414 }, { "epoch": 0.1007770762506071, "grad_norm": 0.5533131957054138, "learning_rate": 0.0001, "loss": 1.8397, "step": 415 }, { "epoch": 0.1010199125789218, "grad_norm": 0.5576867461204529, "learning_rate": 0.0001, "loss": 1.8323, "step": 416 }, { "epoch": 0.10126274890723652, "grad_norm": 0.5436316132545471, "learning_rate": 0.0001, "loss": 1.8363, "step": 417 }, { "epoch": 0.10150558523555124, "grad_norm": 0.5690177083015442, "learning_rate": 0.0001, "loss": 1.9228, "step": 418 }, { "epoch": 0.10174842156386596, "grad_norm": 0.5737699270248413, "learning_rate": 0.0001, "loss": 1.7261, "step": 419 }, { "epoch": 0.10199125789218066, "grad_norm": 0.6116786599159241, "learning_rate": 0.0001, "loss": 1.738, "step": 420 }, { "epoch": 0.10223409422049538, "grad_norm": 0.5913645625114441, "learning_rate": 0.0001, "loss": 1.9034, "step": 421 }, { "epoch": 0.1024769305488101, "grad_norm": 0.537891685962677, "learning_rate": 0.0001, "loss": 1.6837, "step": 422 }, { "epoch": 0.10271976687712482, "grad_norm": 0.5630314350128174, "learning_rate": 0.0001, "loss": 1.74, "step": 423 }, { "epoch": 0.10296260320543954, "grad_norm": 0.5510636568069458, "learning_rate": 0.0001, "loss": 1.7351, "step": 424 }, { "epoch": 0.10320543953375425, "grad_norm": 0.5301394462585449, "learning_rate": 0.0001, "loss": 1.8174, "step": 425 }, { "epoch": 0.10344827586206896, "grad_norm": 0.5566518902778625, "learning_rate": 0.0001, "loss": 1.7677, "step": 426 }, { "epoch": 0.10369111219038368, "grad_norm": 0.5097194314002991, "learning_rate": 0.0001, "loss": 1.5447, "step": 427 }, { "epoch": 0.1039339485186984, "grad_norm": 0.5431993007659912, "learning_rate": 0.0001, "loss": 1.828, "step": 428 }, { "epoch": 0.1041767848470131, "grad_norm": 0.5613725781440735, "learning_rate": 0.0001, "loss": 1.7072, "step": 429 }, { "epoch": 0.10441962117532783, "grad_norm": 0.5759568214416504, "learning_rate": 0.0001, "loss": 1.8452, "step": 430 }, { "epoch": 0.10466245750364254, "grad_norm": 0.544442892074585, "learning_rate": 0.0001, "loss": 1.5471, "step": 431 }, { "epoch": 0.10490529383195726, "grad_norm": 0.5675405263900757, "learning_rate": 0.0001, "loss": 1.8674, "step": 432 }, { "epoch": 0.10514813016027198, "grad_norm": 0.5533949732780457, "learning_rate": 0.0001, "loss": 1.7189, "step": 433 }, { "epoch": 0.10539096648858669, "grad_norm": 0.5979740023612976, "learning_rate": 0.0001, "loss": 1.736, "step": 434 }, { "epoch": 0.1056338028169014, "grad_norm": 0.5390839576721191, "learning_rate": 0.0001, "loss": 1.7611, "step": 435 }, { "epoch": 0.10587663914521613, "grad_norm": 0.5322712659835815, "learning_rate": 0.0001, "loss": 1.6746, "step": 436 }, { "epoch": 0.10611947547353084, "grad_norm": 0.5771969556808472, "learning_rate": 0.0001, "loss": 1.8153, "step": 437 }, { "epoch": 0.10636231180184555, "grad_norm": 0.568136990070343, "learning_rate": 0.0001, "loss": 1.8945, "step": 438 }, { "epoch": 0.10660514813016027, "grad_norm": 0.5258873105049133, "learning_rate": 0.0001, "loss": 1.6173, "step": 439 }, { "epoch": 0.10684798445847499, "grad_norm": 0.5741325616836548, "learning_rate": 0.0001, "loss": 1.9262, "step": 440 }, { "epoch": 0.1070908207867897, "grad_norm": 0.5695716738700867, "learning_rate": 0.0001, "loss": 1.7516, "step": 441 }, { "epoch": 0.10733365711510442, "grad_norm": 0.5673108696937561, "learning_rate": 0.0001, "loss": 1.787, "step": 442 }, { "epoch": 0.10757649344341913, "grad_norm": 0.5834952592849731, "learning_rate": 0.0001, "loss": 1.8915, "step": 443 }, { "epoch": 0.10781932977173385, "grad_norm": 0.5457293391227722, "learning_rate": 0.0001, "loss": 1.8129, "step": 444 }, { "epoch": 0.10806216610004857, "grad_norm": 0.5748411417007446, "learning_rate": 0.0001, "loss": 1.79, "step": 445 }, { "epoch": 0.10830500242836329, "grad_norm": 0.5327291488647461, "learning_rate": 0.0001, "loss": 1.6976, "step": 446 }, { "epoch": 0.108547838756678, "grad_norm": 0.5944225192070007, "learning_rate": 0.0001, "loss": 1.9803, "step": 447 }, { "epoch": 0.10879067508499271, "grad_norm": 0.5857844352722168, "learning_rate": 0.0001, "loss": 1.8585, "step": 448 }, { "epoch": 0.10903351141330743, "grad_norm": 0.5551210641860962, "learning_rate": 0.0001, "loss": 1.7326, "step": 449 }, { "epoch": 0.10927634774162215, "grad_norm": 0.5699621438980103, "learning_rate": 0.0001, "loss": 1.8009, "step": 450 }, { "epoch": 0.10951918406993687, "grad_norm": 0.5252717733383179, "learning_rate": 0.0001, "loss": 1.7593, "step": 451 }, { "epoch": 0.10976202039825157, "grad_norm": 0.5586408376693726, "learning_rate": 0.0001, "loss": 1.9124, "step": 452 }, { "epoch": 0.11000485672656629, "grad_norm": 0.5575873255729675, "learning_rate": 0.0001, "loss": 1.8058, "step": 453 }, { "epoch": 0.11024769305488101, "grad_norm": 0.5508807897567749, "learning_rate": 0.0001, "loss": 1.7938, "step": 454 }, { "epoch": 0.11049052938319573, "grad_norm": 0.5738120675086975, "learning_rate": 0.0001, "loss": 1.8046, "step": 455 }, { "epoch": 0.11073336571151045, "grad_norm": 0.5300111770629883, "learning_rate": 0.0001, "loss": 1.6204, "step": 456 }, { "epoch": 0.11097620203982515, "grad_norm": 0.5261697173118591, "learning_rate": 0.0001, "loss": 1.7018, "step": 457 }, { "epoch": 0.11121903836813987, "grad_norm": 0.5182989835739136, "learning_rate": 0.0001, "loss": 1.6849, "step": 458 }, { "epoch": 0.11146187469645459, "grad_norm": 0.5611332058906555, "learning_rate": 0.0001, "loss": 1.6503, "step": 459 }, { "epoch": 0.11170471102476931, "grad_norm": 0.5654703974723816, "learning_rate": 0.0001, "loss": 1.811, "step": 460 }, { "epoch": 0.11194754735308401, "grad_norm": 0.5420575737953186, "learning_rate": 0.0001, "loss": 1.7863, "step": 461 }, { "epoch": 0.11219038368139873, "grad_norm": 0.5482935309410095, "learning_rate": 0.0001, "loss": 1.7207, "step": 462 }, { "epoch": 0.11243322000971345, "grad_norm": 0.5594455599784851, "learning_rate": 0.0001, "loss": 1.7714, "step": 463 }, { "epoch": 0.11267605633802817, "grad_norm": 0.5463600158691406, "learning_rate": 0.0001, "loss": 1.6993, "step": 464 }, { "epoch": 0.11291889266634289, "grad_norm": 0.5660488605499268, "learning_rate": 0.0001, "loss": 1.7934, "step": 465 }, { "epoch": 0.1131617289946576, "grad_norm": 0.5747204422950745, "learning_rate": 0.0001, "loss": 1.7009, "step": 466 }, { "epoch": 0.11340456532297231, "grad_norm": 0.5203810930252075, "learning_rate": 0.0001, "loss": 1.6113, "step": 467 }, { "epoch": 0.11364740165128703, "grad_norm": 0.5617368221282959, "learning_rate": 0.0001, "loss": 1.6944, "step": 468 }, { "epoch": 0.11389023797960175, "grad_norm": 0.5709205269813538, "learning_rate": 0.0001, "loss": 1.8537, "step": 469 }, { "epoch": 0.11413307430791647, "grad_norm": 0.5530130863189697, "learning_rate": 0.0001, "loss": 1.7883, "step": 470 }, { "epoch": 0.11437591063623118, "grad_norm": 0.4973085820674896, "learning_rate": 0.0001, "loss": 1.7361, "step": 471 }, { "epoch": 0.1146187469645459, "grad_norm": 0.5351885557174683, "learning_rate": 0.0001, "loss": 1.7357, "step": 472 }, { "epoch": 0.11486158329286061, "grad_norm": 0.5383778214454651, "learning_rate": 0.0001, "loss": 1.6686, "step": 473 }, { "epoch": 0.11510441962117533, "grad_norm": 0.5456533432006836, "learning_rate": 0.0001, "loss": 1.6045, "step": 474 }, { "epoch": 0.11534725594949004, "grad_norm": 0.5580303072929382, "learning_rate": 0.0001, "loss": 1.8015, "step": 475 }, { "epoch": 0.11559009227780476, "grad_norm": 0.5299339890480042, "learning_rate": 0.0001, "loss": 1.7562, "step": 476 }, { "epoch": 0.11583292860611948, "grad_norm": 0.5573864579200745, "learning_rate": 0.0001, "loss": 1.8338, "step": 477 }, { "epoch": 0.1160757649344342, "grad_norm": 0.5550556182861328, "learning_rate": 0.0001, "loss": 1.6276, "step": 478 }, { "epoch": 0.11631860126274891, "grad_norm": 0.6052720546722412, "learning_rate": 0.0001, "loss": 1.9028, "step": 479 }, { "epoch": 0.11656143759106362, "grad_norm": 0.5638125538825989, "learning_rate": 0.0001, "loss": 1.7026, "step": 480 }, { "epoch": 0.11680427391937834, "grad_norm": 0.5557655096054077, "learning_rate": 0.0001, "loss": 1.7897, "step": 481 }, { "epoch": 0.11704711024769306, "grad_norm": 0.5520438551902771, "learning_rate": 0.0001, "loss": 1.6795, "step": 482 }, { "epoch": 0.11728994657600778, "grad_norm": 0.5554108619689941, "learning_rate": 0.0001, "loss": 1.6404, "step": 483 }, { "epoch": 0.11753278290432248, "grad_norm": 0.5558226108551025, "learning_rate": 0.0001, "loss": 1.8267, "step": 484 }, { "epoch": 0.1177756192326372, "grad_norm": 0.5317284464836121, "learning_rate": 0.0001, "loss": 1.6754, "step": 485 }, { "epoch": 0.11801845556095192, "grad_norm": 0.4989995062351227, "learning_rate": 0.0001, "loss": 1.6589, "step": 486 }, { "epoch": 0.11826129188926664, "grad_norm": 0.5741257071495056, "learning_rate": 0.0001, "loss": 1.6347, "step": 487 }, { "epoch": 0.11850412821758136, "grad_norm": 0.5618738532066345, "learning_rate": 0.0001, "loss": 1.7277, "step": 488 }, { "epoch": 0.11874696454589606, "grad_norm": 0.5853535532951355, "learning_rate": 0.0001, "loss": 1.7813, "step": 489 }, { "epoch": 0.11898980087421078, "grad_norm": 0.6120092868804932, "learning_rate": 0.0001, "loss": 1.9246, "step": 490 }, { "epoch": 0.1192326372025255, "grad_norm": 0.5622900724411011, "learning_rate": 0.0001, "loss": 1.8058, "step": 491 }, { "epoch": 0.11947547353084022, "grad_norm": 0.5811873078346252, "learning_rate": 0.0001, "loss": 1.8664, "step": 492 }, { "epoch": 0.11971830985915492, "grad_norm": 0.5615583062171936, "learning_rate": 0.0001, "loss": 1.7442, "step": 493 }, { "epoch": 0.11996114618746964, "grad_norm": 0.5460301041603088, "learning_rate": 0.0001, "loss": 1.7299, "step": 494 }, { "epoch": 0.12020398251578436, "grad_norm": 0.5650546550750732, "learning_rate": 0.0001, "loss": 1.6994, "step": 495 }, { "epoch": 0.12044681884409908, "grad_norm": 0.6062639951705933, "learning_rate": 0.0001, "loss": 1.6304, "step": 496 }, { "epoch": 0.1206896551724138, "grad_norm": 0.5884907841682434, "learning_rate": 0.0001, "loss": 1.881, "step": 497 }, { "epoch": 0.1209324915007285, "grad_norm": 0.5676560997962952, "learning_rate": 0.0001, "loss": 1.9511, "step": 498 }, { "epoch": 0.12117532782904322, "grad_norm": 0.5525282025337219, "learning_rate": 0.0001, "loss": 1.8009, "step": 499 }, { "epoch": 0.12141816415735794, "grad_norm": 0.5765119194984436, "learning_rate": 0.0001, "loss": 1.8173, "step": 500 }, { "epoch": 0.12166100048567266, "grad_norm": 0.5602638125419617, "learning_rate": 0.0001, "loss": 1.6158, "step": 501 }, { "epoch": 0.12190383681398738, "grad_norm": 0.5428099036216736, "learning_rate": 0.0001, "loss": 1.6568, "step": 502 }, { "epoch": 0.12214667314230208, "grad_norm": 0.5926585793495178, "learning_rate": 0.0001, "loss": 1.9099, "step": 503 }, { "epoch": 0.1223895094706168, "grad_norm": 0.5717451572418213, "learning_rate": 0.0001, "loss": 1.6819, "step": 504 }, { "epoch": 0.12263234579893152, "grad_norm": 0.5724567770957947, "learning_rate": 0.0001, "loss": 1.7439, "step": 505 }, { "epoch": 0.12287518212724624, "grad_norm": 0.5597602725028992, "learning_rate": 0.0001, "loss": 1.7791, "step": 506 }, { "epoch": 0.12311801845556095, "grad_norm": 0.5225638151168823, "learning_rate": 0.0001, "loss": 1.5696, "step": 507 }, { "epoch": 0.12336085478387566, "grad_norm": 0.5752341747283936, "learning_rate": 0.0001, "loss": 1.8222, "step": 508 }, { "epoch": 0.12360369111219038, "grad_norm": 0.5637819170951843, "learning_rate": 0.0001, "loss": 1.8684, "step": 509 }, { "epoch": 0.1238465274405051, "grad_norm": 0.5527138113975525, "learning_rate": 0.0001, "loss": 1.8502, "step": 510 }, { "epoch": 0.12408936376881982, "grad_norm": 0.5274344086647034, "learning_rate": 0.0001, "loss": 1.6784, "step": 511 }, { "epoch": 0.12433220009713453, "grad_norm": 0.500634491443634, "learning_rate": 0.0001, "loss": 1.6773, "step": 512 }, { "epoch": 0.12457503642544925, "grad_norm": 0.5613259077072144, "learning_rate": 0.0001, "loss": 1.7038, "step": 513 }, { "epoch": 0.12481787275376396, "grad_norm": 0.5362138152122498, "learning_rate": 0.0001, "loss": 1.6937, "step": 514 }, { "epoch": 0.12506070908207867, "grad_norm": 0.6005094647407532, "learning_rate": 0.0001, "loss": 1.9453, "step": 515 }, { "epoch": 0.1253035454103934, "grad_norm": 0.5446475148200989, "learning_rate": 0.0001, "loss": 1.7107, "step": 516 }, { "epoch": 0.1255463817387081, "grad_norm": 0.5308226346969604, "learning_rate": 0.0001, "loss": 1.7148, "step": 517 }, { "epoch": 0.12578921806702284, "grad_norm": 0.5435710549354553, "learning_rate": 0.0001, "loss": 1.6136, "step": 518 }, { "epoch": 0.12603205439533754, "grad_norm": 0.5712341666221619, "learning_rate": 0.0001, "loss": 1.971, "step": 519 }, { "epoch": 0.12627489072365225, "grad_norm": 0.5840036869049072, "learning_rate": 0.0001, "loss": 1.8336, "step": 520 }, { "epoch": 0.12651772705196698, "grad_norm": 0.5734742283821106, "learning_rate": 0.0001, "loss": 1.8283, "step": 521 }, { "epoch": 0.1267605633802817, "grad_norm": 0.5368530750274658, "learning_rate": 0.0001, "loss": 1.5477, "step": 522 }, { "epoch": 0.1270033997085964, "grad_norm": 0.6023390293121338, "learning_rate": 0.0001, "loss": 1.8824, "step": 523 }, { "epoch": 0.12724623603691113, "grad_norm": 0.5599238872528076, "learning_rate": 0.0001, "loss": 1.8165, "step": 524 }, { "epoch": 0.12748907236522583, "grad_norm": 0.5743386745452881, "learning_rate": 0.0001, "loss": 1.7759, "step": 525 }, { "epoch": 0.12773190869354056, "grad_norm": 0.5817521810531616, "learning_rate": 0.0001, "loss": 1.933, "step": 526 }, { "epoch": 0.12797474502185527, "grad_norm": 0.5742323994636536, "learning_rate": 0.0001, "loss": 1.7725, "step": 527 }, { "epoch": 0.12821758135016997, "grad_norm": 0.5679665207862854, "learning_rate": 0.0001, "loss": 1.8016, "step": 528 }, { "epoch": 0.1284604176784847, "grad_norm": 0.5591177940368652, "learning_rate": 0.0001, "loss": 1.8947, "step": 529 }, { "epoch": 0.1287032540067994, "grad_norm": 0.5632833242416382, "learning_rate": 0.0001, "loss": 1.8154, "step": 530 }, { "epoch": 0.12894609033511414, "grad_norm": 0.5830283761024475, "learning_rate": 0.0001, "loss": 1.7494, "step": 531 }, { "epoch": 0.12918892666342885, "grad_norm": 0.5660063624382019, "learning_rate": 0.0001, "loss": 1.8233, "step": 532 }, { "epoch": 0.12943176299174355, "grad_norm": 0.5650138258934021, "learning_rate": 0.0001, "loss": 1.7795, "step": 533 }, { "epoch": 0.1296745993200583, "grad_norm": 0.6010497808456421, "learning_rate": 0.0001, "loss": 1.812, "step": 534 }, { "epoch": 0.129917435648373, "grad_norm": 0.5512215495109558, "learning_rate": 0.0001, "loss": 1.6853, "step": 535 }, { "epoch": 0.13016027197668772, "grad_norm": 0.5600783228874207, "learning_rate": 0.0001, "loss": 1.7371, "step": 536 }, { "epoch": 0.13040310830500243, "grad_norm": 0.5541916489601135, "learning_rate": 0.0001, "loss": 1.7264, "step": 537 }, { "epoch": 0.13064594463331713, "grad_norm": 0.5739514827728271, "learning_rate": 0.0001, "loss": 1.7232, "step": 538 }, { "epoch": 0.13088878096163187, "grad_norm": 0.5634263157844543, "learning_rate": 0.0001, "loss": 1.7582, "step": 539 }, { "epoch": 0.13113161728994657, "grad_norm": 0.5988732576370239, "learning_rate": 0.0001, "loss": 1.7761, "step": 540 }, { "epoch": 0.1313744536182613, "grad_norm": 0.5475578308105469, "learning_rate": 0.0001, "loss": 1.6277, "step": 541 }, { "epoch": 0.131617289946576, "grad_norm": 0.5896528363227844, "learning_rate": 0.0001, "loss": 1.828, "step": 542 }, { "epoch": 0.13186012627489072, "grad_norm": 0.5900474786758423, "learning_rate": 0.0001, "loss": 1.5999, "step": 543 }, { "epoch": 0.13210296260320545, "grad_norm": 0.583486795425415, "learning_rate": 0.0001, "loss": 1.6546, "step": 544 }, { "epoch": 0.13234579893152015, "grad_norm": 0.5893517732620239, "learning_rate": 0.0001, "loss": 1.8338, "step": 545 }, { "epoch": 0.13258863525983486, "grad_norm": 0.5527064204216003, "learning_rate": 0.0001, "loss": 1.8413, "step": 546 }, { "epoch": 0.1328314715881496, "grad_norm": 0.6100444793701172, "learning_rate": 0.0001, "loss": 1.8663, "step": 547 }, { "epoch": 0.1330743079164643, "grad_norm": 0.5398809313774109, "learning_rate": 0.0001, "loss": 1.7586, "step": 548 }, { "epoch": 0.13331714424477903, "grad_norm": 0.5335167050361633, "learning_rate": 0.0001, "loss": 1.643, "step": 549 }, { "epoch": 0.13355998057309373, "grad_norm": 0.5480557680130005, "learning_rate": 0.0001, "loss": 1.8034, "step": 550 }, { "epoch": 0.13380281690140844, "grad_norm": 0.5606412291526794, "learning_rate": 0.0001, "loss": 1.7259, "step": 551 }, { "epoch": 0.13404565322972317, "grad_norm": 0.5881384015083313, "learning_rate": 0.0001, "loss": 1.9435, "step": 552 }, { "epoch": 0.13428848955803788, "grad_norm": 0.5396336317062378, "learning_rate": 0.0001, "loss": 1.6303, "step": 553 }, { "epoch": 0.1345313258863526, "grad_norm": 0.5790011286735535, "learning_rate": 0.0001, "loss": 1.9107, "step": 554 }, { "epoch": 0.13477416221466731, "grad_norm": 0.5559581518173218, "learning_rate": 0.0001, "loss": 1.8296, "step": 555 }, { "epoch": 0.13501699854298202, "grad_norm": 0.5403438210487366, "learning_rate": 0.0001, "loss": 1.6947, "step": 556 }, { "epoch": 0.13525983487129675, "grad_norm": 0.5910641551017761, "learning_rate": 0.0001, "loss": 1.9624, "step": 557 }, { "epoch": 0.13550267119961146, "grad_norm": 0.5610319375991821, "learning_rate": 0.0001, "loss": 1.8825, "step": 558 }, { "epoch": 0.1357455075279262, "grad_norm": 0.5739595293998718, "learning_rate": 0.0001, "loss": 1.9115, "step": 559 }, { "epoch": 0.1359883438562409, "grad_norm": 0.5951991081237793, "learning_rate": 0.0001, "loss": 1.8239, "step": 560 }, { "epoch": 0.1362311801845556, "grad_norm": 0.5959383845329285, "learning_rate": 0.0001, "loss": 1.7641, "step": 561 }, { "epoch": 0.13647401651287033, "grad_norm": 0.5285969376564026, "learning_rate": 0.0001, "loss": 1.6009, "step": 562 }, { "epoch": 0.13671685284118504, "grad_norm": 0.6021578311920166, "learning_rate": 0.0001, "loss": 1.7826, "step": 563 }, { "epoch": 0.13695968916949977, "grad_norm": 0.5408231616020203, "learning_rate": 0.0001, "loss": 1.5952, "step": 564 }, { "epoch": 0.13720252549781448, "grad_norm": 0.5222688317298889, "learning_rate": 0.0001, "loss": 1.6196, "step": 565 }, { "epoch": 0.13744536182612918, "grad_norm": 0.5697501301765442, "learning_rate": 0.0001, "loss": 1.7813, "step": 566 }, { "epoch": 0.1376881981544439, "grad_norm": 0.5833176970481873, "learning_rate": 0.0001, "loss": 1.8235, "step": 567 }, { "epoch": 0.13793103448275862, "grad_norm": 0.5586931705474854, "learning_rate": 0.0001, "loss": 1.8319, "step": 568 }, { "epoch": 0.13817387081107332, "grad_norm": 0.5566940903663635, "learning_rate": 0.0001, "loss": 1.8003, "step": 569 }, { "epoch": 0.13841670713938806, "grad_norm": 0.5803408622741699, "learning_rate": 0.0001, "loss": 1.8709, "step": 570 }, { "epoch": 0.13865954346770276, "grad_norm": 0.5673038363456726, "learning_rate": 0.0001, "loss": 1.8451, "step": 571 }, { "epoch": 0.1389023797960175, "grad_norm": 0.537318766117096, "learning_rate": 0.0001, "loss": 1.7931, "step": 572 }, { "epoch": 0.1391452161243322, "grad_norm": 0.5993166565895081, "learning_rate": 0.0001, "loss": 1.8135, "step": 573 }, { "epoch": 0.1393880524526469, "grad_norm": 0.6365535259246826, "learning_rate": 0.0001, "loss": 1.8214, "step": 574 }, { "epoch": 0.13963088878096164, "grad_norm": 0.5775412917137146, "learning_rate": 0.0001, "loss": 1.7692, "step": 575 }, { "epoch": 0.13987372510927634, "grad_norm": 0.5516401529312134, "learning_rate": 0.0001, "loss": 1.8644, "step": 576 }, { "epoch": 0.14011656143759108, "grad_norm": 0.5734835863113403, "learning_rate": 0.0001, "loss": 1.7655, "step": 577 }, { "epoch": 0.14035939776590578, "grad_norm": 0.5712597966194153, "learning_rate": 0.0001, "loss": 1.7431, "step": 578 }, { "epoch": 0.14060223409422049, "grad_norm": 0.5707169771194458, "learning_rate": 0.0001, "loss": 1.8038, "step": 579 }, { "epoch": 0.14084507042253522, "grad_norm": 0.611261785030365, "learning_rate": 0.0001, "loss": 1.7464, "step": 580 }, { "epoch": 0.14108790675084992, "grad_norm": 0.5502864718437195, "learning_rate": 0.0001, "loss": 1.661, "step": 581 }, { "epoch": 0.14133074307916466, "grad_norm": 0.5708633065223694, "learning_rate": 0.0001, "loss": 1.7671, "step": 582 }, { "epoch": 0.14157357940747936, "grad_norm": 0.5802814960479736, "learning_rate": 0.0001, "loss": 1.7979, "step": 583 }, { "epoch": 0.14181641573579407, "grad_norm": 0.5676042437553406, "learning_rate": 0.0001, "loss": 1.7693, "step": 584 }, { "epoch": 0.1420592520641088, "grad_norm": 0.5899032950401306, "learning_rate": 0.0001, "loss": 1.7929, "step": 585 }, { "epoch": 0.1423020883924235, "grad_norm": 0.5601672530174255, "learning_rate": 0.0001, "loss": 1.8005, "step": 586 }, { "epoch": 0.1425449247207382, "grad_norm": 0.5701844096183777, "learning_rate": 0.0001, "loss": 1.8274, "step": 587 }, { "epoch": 0.14278776104905294, "grad_norm": 0.5746443271636963, "learning_rate": 0.0001, "loss": 1.8188, "step": 588 }, { "epoch": 0.14303059737736765, "grad_norm": 0.520227313041687, "learning_rate": 0.0001, "loss": 1.5645, "step": 589 }, { "epoch": 0.14327343370568238, "grad_norm": 0.5633252263069153, "learning_rate": 0.0001, "loss": 1.9063, "step": 590 }, { "epoch": 0.14351627003399708, "grad_norm": 0.5445085763931274, "learning_rate": 0.0001, "loss": 1.7288, "step": 591 }, { "epoch": 0.1437591063623118, "grad_norm": 0.5793913006782532, "learning_rate": 0.0001, "loss": 1.7959, "step": 592 }, { "epoch": 0.14400194269062652, "grad_norm": 0.561595618724823, "learning_rate": 0.0001, "loss": 1.7845, "step": 593 }, { "epoch": 0.14424477901894123, "grad_norm": 0.5914781093597412, "learning_rate": 0.0001, "loss": 1.9152, "step": 594 }, { "epoch": 0.14448761534725596, "grad_norm": 0.5659077763557434, "learning_rate": 0.0001, "loss": 1.8534, "step": 595 }, { "epoch": 0.14473045167557066, "grad_norm": 0.5098112225532532, "learning_rate": 0.0001, "loss": 1.5326, "step": 596 }, { "epoch": 0.14497328800388537, "grad_norm": 0.5428852438926697, "learning_rate": 0.0001, "loss": 1.8016, "step": 597 }, { "epoch": 0.1452161243322001, "grad_norm": 0.549568235874176, "learning_rate": 0.0001, "loss": 1.7529, "step": 598 }, { "epoch": 0.1454589606605148, "grad_norm": 0.5077096223831177, "learning_rate": 0.0001, "loss": 1.7353, "step": 599 }, { "epoch": 0.14570179698882954, "grad_norm": 0.5143014788627625, "learning_rate": 0.0001, "loss": 1.6686, "step": 600 }, { "epoch": 0.14594463331714425, "grad_norm": 0.588847279548645, "learning_rate": 0.0001, "loss": 1.84, "step": 601 }, { "epoch": 0.14618746964545895, "grad_norm": 0.5263761281967163, "learning_rate": 0.0001, "loss": 1.7258, "step": 602 }, { "epoch": 0.14643030597377368, "grad_norm": 0.5702909827232361, "learning_rate": 0.0001, "loss": 1.7845, "step": 603 }, { "epoch": 0.1466731423020884, "grad_norm": 0.5721048712730408, "learning_rate": 0.0001, "loss": 1.8626, "step": 604 }, { "epoch": 0.14691597863040312, "grad_norm": 0.5193774700164795, "learning_rate": 0.0001, "loss": 1.6946, "step": 605 }, { "epoch": 0.14715881495871783, "grad_norm": 0.5777069926261902, "learning_rate": 0.0001, "loss": 1.7773, "step": 606 }, { "epoch": 0.14740165128703253, "grad_norm": 0.5141522884368896, "learning_rate": 0.0001, "loss": 1.5652, "step": 607 }, { "epoch": 0.14764448761534726, "grad_norm": 0.5801394581794739, "learning_rate": 0.0001, "loss": 1.7541, "step": 608 }, { "epoch": 0.14788732394366197, "grad_norm": 0.5814424157142639, "learning_rate": 0.0001, "loss": 1.7812, "step": 609 }, { "epoch": 0.14813016027197667, "grad_norm": 0.5588964223861694, "learning_rate": 0.0001, "loss": 1.8527, "step": 610 }, { "epoch": 0.1483729966002914, "grad_norm": 0.5577553510665894, "learning_rate": 0.0001, "loss": 1.8548, "step": 611 }, { "epoch": 0.1486158329286061, "grad_norm": 0.5552173852920532, "learning_rate": 0.0001, "loss": 1.8248, "step": 612 }, { "epoch": 0.14885866925692084, "grad_norm": 0.5496284365653992, "learning_rate": 0.0001, "loss": 1.9658, "step": 613 }, { "epoch": 0.14910150558523555, "grad_norm": 0.5887159705162048, "learning_rate": 0.0001, "loss": 1.7976, "step": 614 }, { "epoch": 0.14934434191355025, "grad_norm": 0.5603215098381042, "learning_rate": 0.0001, "loss": 1.8203, "step": 615 }, { "epoch": 0.149587178241865, "grad_norm": 0.5411328077316284, "learning_rate": 0.0001, "loss": 1.807, "step": 616 }, { "epoch": 0.1498300145701797, "grad_norm": 0.5373426675796509, "learning_rate": 0.0001, "loss": 1.6622, "step": 617 }, { "epoch": 0.15007285089849443, "grad_norm": 0.5616528987884521, "learning_rate": 0.0001, "loss": 1.8262, "step": 618 }, { "epoch": 0.15031568722680913, "grad_norm": 0.5507047772407532, "learning_rate": 0.0001, "loss": 1.8568, "step": 619 }, { "epoch": 0.15055852355512384, "grad_norm": 0.5572707056999207, "learning_rate": 0.0001, "loss": 1.6277, "step": 620 }, { "epoch": 0.15080135988343857, "grad_norm": 0.6481385231018066, "learning_rate": 0.0001, "loss": 1.7724, "step": 621 }, { "epoch": 0.15104419621175327, "grad_norm": 0.5856874585151672, "learning_rate": 0.0001, "loss": 1.7932, "step": 622 }, { "epoch": 0.151287032540068, "grad_norm": 0.5337699055671692, "learning_rate": 0.0001, "loss": 1.7423, "step": 623 }, { "epoch": 0.1515298688683827, "grad_norm": 0.5539548993110657, "learning_rate": 0.0001, "loss": 1.8657, "step": 624 }, { "epoch": 0.15177270519669742, "grad_norm": 0.5827417373657227, "learning_rate": 0.0001, "loss": 1.9231, "step": 625 }, { "epoch": 0.15201554152501215, "grad_norm": 0.5438136458396912, "learning_rate": 0.0001, "loss": 1.5881, "step": 626 }, { "epoch": 0.15225837785332685, "grad_norm": 0.525128185749054, "learning_rate": 0.0001, "loss": 1.6657, "step": 627 }, { "epoch": 0.1525012141816416, "grad_norm": 0.5475497841835022, "learning_rate": 0.0001, "loss": 1.7372, "step": 628 }, { "epoch": 0.1527440505099563, "grad_norm": 0.5585061311721802, "learning_rate": 0.0001, "loss": 1.667, "step": 629 }, { "epoch": 0.152986886838271, "grad_norm": 0.5918571949005127, "learning_rate": 0.0001, "loss": 1.7223, "step": 630 }, { "epoch": 0.15322972316658573, "grad_norm": 0.5802246332168579, "learning_rate": 0.0001, "loss": 1.814, "step": 631 }, { "epoch": 0.15347255949490043, "grad_norm": 0.5600754022598267, "learning_rate": 0.0001, "loss": 1.7924, "step": 632 }, { "epoch": 0.15371539582321514, "grad_norm": 0.5247310996055603, "learning_rate": 0.0001, "loss": 1.7274, "step": 633 }, { "epoch": 0.15395823215152987, "grad_norm": 0.5142088532447815, "learning_rate": 0.0001, "loss": 1.5844, "step": 634 }, { "epoch": 0.15420106847984458, "grad_norm": 0.5472610592842102, "learning_rate": 0.0001, "loss": 1.8009, "step": 635 }, { "epoch": 0.1544439048081593, "grad_norm": 0.5571399331092834, "learning_rate": 0.0001, "loss": 1.9098, "step": 636 }, { "epoch": 0.15468674113647402, "grad_norm": 0.534423291683197, "learning_rate": 0.0001, "loss": 1.7576, "step": 637 }, { "epoch": 0.15492957746478872, "grad_norm": 0.5361340641975403, "learning_rate": 0.0001, "loss": 1.7545, "step": 638 }, { "epoch": 0.15517241379310345, "grad_norm": 0.5327625274658203, "learning_rate": 0.0001, "loss": 1.7251, "step": 639 }, { "epoch": 0.15541525012141816, "grad_norm": 0.6097555160522461, "learning_rate": 0.0001, "loss": 1.6234, "step": 640 }, { "epoch": 0.1556580864497329, "grad_norm": 0.5422442555427551, "learning_rate": 0.0001, "loss": 1.7612, "step": 641 }, { "epoch": 0.1559009227780476, "grad_norm": 0.5585924983024597, "learning_rate": 0.0001, "loss": 1.7741, "step": 642 }, { "epoch": 0.1561437591063623, "grad_norm": 0.583871603012085, "learning_rate": 0.0001, "loss": 1.7839, "step": 643 }, { "epoch": 0.15638659543467703, "grad_norm": 0.5999270677566528, "learning_rate": 0.0001, "loss": 1.7371, "step": 644 }, { "epoch": 0.15662943176299174, "grad_norm": 0.5860697031021118, "learning_rate": 0.0001, "loss": 1.9128, "step": 645 }, { "epoch": 0.15687226809130647, "grad_norm": 0.5972449779510498, "learning_rate": 0.0001, "loss": 1.983, "step": 646 }, { "epoch": 0.15711510441962118, "grad_norm": 0.5599138140678406, "learning_rate": 0.0001, "loss": 1.7721, "step": 647 }, { "epoch": 0.15735794074793588, "grad_norm": 0.588979184627533, "learning_rate": 0.0001, "loss": 1.8579, "step": 648 }, { "epoch": 0.15760077707625061, "grad_norm": 0.5571702718734741, "learning_rate": 0.0001, "loss": 1.9163, "step": 649 }, { "epoch": 0.15784361340456532, "grad_norm": 0.536984920501709, "learning_rate": 0.0001, "loss": 1.7292, "step": 650 }, { "epoch": 0.15808644973288005, "grad_norm": 0.5638613700866699, "learning_rate": 0.0001, "loss": 1.7381, "step": 651 }, { "epoch": 0.15832928606119476, "grad_norm": 0.5769402980804443, "learning_rate": 0.0001, "loss": 1.9559, "step": 652 }, { "epoch": 0.15857212238950946, "grad_norm": 0.5716903805732727, "learning_rate": 0.0001, "loss": 1.769, "step": 653 }, { "epoch": 0.1588149587178242, "grad_norm": 0.5627726316452026, "learning_rate": 0.0001, "loss": 1.8505, "step": 654 }, { "epoch": 0.1590577950461389, "grad_norm": 0.5364795923233032, "learning_rate": 0.0001, "loss": 1.7132, "step": 655 }, { "epoch": 0.1593006313744536, "grad_norm": 0.5715546607971191, "learning_rate": 0.0001, "loss": 1.8778, "step": 656 }, { "epoch": 0.15954346770276834, "grad_norm": 0.5425900220870972, "learning_rate": 0.0001, "loss": 1.7544, "step": 657 }, { "epoch": 0.15978630403108304, "grad_norm": 0.5716409683227539, "learning_rate": 0.0001, "loss": 1.8701, "step": 658 }, { "epoch": 0.16002914035939778, "grad_norm": 0.566852331161499, "learning_rate": 0.0001, "loss": 1.8293, "step": 659 }, { "epoch": 0.16027197668771248, "grad_norm": 0.5446115732192993, "learning_rate": 0.0001, "loss": 1.7935, "step": 660 }, { "epoch": 0.16051481301602719, "grad_norm": 0.5492187738418579, "learning_rate": 0.0001, "loss": 1.7554, "step": 661 }, { "epoch": 0.16075764934434192, "grad_norm": 0.5912359952926636, "learning_rate": 0.0001, "loss": 1.8386, "step": 662 }, { "epoch": 0.16100048567265662, "grad_norm": 0.5367072224617004, "learning_rate": 0.0001, "loss": 1.7206, "step": 663 }, { "epoch": 0.16124332200097136, "grad_norm": 0.5620670914649963, "learning_rate": 0.0001, "loss": 1.8007, "step": 664 }, { "epoch": 0.16148615832928606, "grad_norm": 0.5639497637748718, "learning_rate": 0.0001, "loss": 1.7234, "step": 665 }, { "epoch": 0.16172899465760077, "grad_norm": 0.5727313756942749, "learning_rate": 0.0001, "loss": 1.7677, "step": 666 }, { "epoch": 0.1619718309859155, "grad_norm": 0.5115989446640015, "learning_rate": 0.0001, "loss": 1.5539, "step": 667 }, { "epoch": 0.1622146673142302, "grad_norm": 0.5412555932998657, "learning_rate": 0.0001, "loss": 1.7562, "step": 668 }, { "epoch": 0.16245750364254494, "grad_norm": 0.61517733335495, "learning_rate": 0.0001, "loss": 1.8744, "step": 669 }, { "epoch": 0.16270033997085964, "grad_norm": 0.5591433048248291, "learning_rate": 0.0001, "loss": 1.7625, "step": 670 }, { "epoch": 0.16294317629917435, "grad_norm": 0.5341270565986633, "learning_rate": 0.0001, "loss": 1.6964, "step": 671 }, { "epoch": 0.16318601262748908, "grad_norm": 0.5355855822563171, "learning_rate": 0.0001, "loss": 1.7055, "step": 672 }, { "epoch": 0.16342884895580378, "grad_norm": 0.5489002466201782, "learning_rate": 0.0001, "loss": 1.682, "step": 673 }, { "epoch": 0.16367168528411852, "grad_norm": 0.5780882835388184, "learning_rate": 0.0001, "loss": 1.883, "step": 674 }, { "epoch": 0.16391452161243322, "grad_norm": 0.6101503372192383, "learning_rate": 0.0001, "loss": 2.0329, "step": 675 }, { "epoch": 0.16415735794074793, "grad_norm": 0.5983835458755493, "learning_rate": 0.0001, "loss": 1.883, "step": 676 }, { "epoch": 0.16440019426906266, "grad_norm": 0.5713289380073547, "learning_rate": 0.0001, "loss": 1.7345, "step": 677 }, { "epoch": 0.16464303059737737, "grad_norm": 0.5344922542572021, "learning_rate": 0.0001, "loss": 1.7555, "step": 678 }, { "epoch": 0.16488586692569207, "grad_norm": 0.5629361867904663, "learning_rate": 0.0001, "loss": 1.8222, "step": 679 }, { "epoch": 0.1651287032540068, "grad_norm": 0.572493314743042, "learning_rate": 0.0001, "loss": 1.7996, "step": 680 }, { "epoch": 0.1653715395823215, "grad_norm": 0.545283317565918, "learning_rate": 0.0001, "loss": 1.7302, "step": 681 }, { "epoch": 0.16561437591063624, "grad_norm": 0.5692222714424133, "learning_rate": 0.0001, "loss": 1.7713, "step": 682 }, { "epoch": 0.16585721223895095, "grad_norm": 0.5323293209075928, "learning_rate": 0.0001, "loss": 1.6538, "step": 683 }, { "epoch": 0.16610004856726565, "grad_norm": 0.546804666519165, "learning_rate": 0.0001, "loss": 1.7016, "step": 684 }, { "epoch": 0.16634288489558038, "grad_norm": 0.5531345009803772, "learning_rate": 0.0001, "loss": 1.878, "step": 685 }, { "epoch": 0.1665857212238951, "grad_norm": 0.5590841770172119, "learning_rate": 0.0001, "loss": 1.8109, "step": 686 }, { "epoch": 0.16682855755220982, "grad_norm": 0.5341556668281555, "learning_rate": 0.0001, "loss": 1.6424, "step": 687 }, { "epoch": 0.16707139388052453, "grad_norm": 0.5584942698478699, "learning_rate": 0.0001, "loss": 1.8527, "step": 688 }, { "epoch": 0.16731423020883923, "grad_norm": 0.57704097032547, "learning_rate": 0.0001, "loss": 1.8635, "step": 689 }, { "epoch": 0.16755706653715396, "grad_norm": 0.5278995633125305, "learning_rate": 0.0001, "loss": 1.8115, "step": 690 }, { "epoch": 0.16779990286546867, "grad_norm": 0.559356153011322, "learning_rate": 0.0001, "loss": 1.7171, "step": 691 }, { "epoch": 0.1680427391937834, "grad_norm": 0.5431156754493713, "learning_rate": 0.0001, "loss": 1.7339, "step": 692 }, { "epoch": 0.1682855755220981, "grad_norm": 0.5194575190544128, "learning_rate": 0.0001, "loss": 1.4545, "step": 693 }, { "epoch": 0.1685284118504128, "grad_norm": 0.604436993598938, "learning_rate": 0.0001, "loss": 1.922, "step": 694 }, { "epoch": 0.16877124817872755, "grad_norm": 0.5614901185035706, "learning_rate": 0.0001, "loss": 1.725, "step": 695 }, { "epoch": 0.16901408450704225, "grad_norm": 0.592366635799408, "learning_rate": 0.0001, "loss": 1.8416, "step": 696 }, { "epoch": 0.16925692083535698, "grad_norm": 0.5448355674743652, "learning_rate": 0.0001, "loss": 1.6349, "step": 697 }, { "epoch": 0.1694997571636717, "grad_norm": 0.5555615425109863, "learning_rate": 0.0001, "loss": 1.7578, "step": 698 }, { "epoch": 0.1697425934919864, "grad_norm": 0.5792731642723083, "learning_rate": 0.0001, "loss": 1.8133, "step": 699 }, { "epoch": 0.16998542982030113, "grad_norm": 0.5417819619178772, "learning_rate": 0.0001, "loss": 1.6891, "step": 700 }, { "epoch": 0.17022826614861583, "grad_norm": 0.5344753265380859, "learning_rate": 0.0001, "loss": 1.7461, "step": 701 }, { "epoch": 0.17047110247693054, "grad_norm": 0.565560519695282, "learning_rate": 0.0001, "loss": 1.7799, "step": 702 }, { "epoch": 0.17071393880524527, "grad_norm": 0.5037214159965515, "learning_rate": 0.0001, "loss": 1.6693, "step": 703 }, { "epoch": 0.17095677513355997, "grad_norm": 0.5803849101066589, "learning_rate": 0.0001, "loss": 1.8091, "step": 704 }, { "epoch": 0.1711996114618747, "grad_norm": 0.5579473376274109, "learning_rate": 0.0001, "loss": 1.6315, "step": 705 }, { "epoch": 0.1714424477901894, "grad_norm": 0.5527069568634033, "learning_rate": 0.0001, "loss": 1.6837, "step": 706 }, { "epoch": 0.17168528411850412, "grad_norm": 0.5678402185440063, "learning_rate": 0.0001, "loss": 1.9185, "step": 707 }, { "epoch": 0.17192812044681885, "grad_norm": 0.5627220273017883, "learning_rate": 0.0001, "loss": 1.7465, "step": 708 }, { "epoch": 0.17217095677513355, "grad_norm": 0.604686975479126, "learning_rate": 0.0001, "loss": 1.8794, "step": 709 }, { "epoch": 0.1724137931034483, "grad_norm": 0.5554637908935547, "learning_rate": 0.0001, "loss": 1.8323, "step": 710 }, { "epoch": 0.172656629431763, "grad_norm": 0.561477541923523, "learning_rate": 0.0001, "loss": 1.7666, "step": 711 }, { "epoch": 0.1728994657600777, "grad_norm": 0.5362895727157593, "learning_rate": 0.0001, "loss": 1.7603, "step": 712 }, { "epoch": 0.17314230208839243, "grad_norm": 0.541498064994812, "learning_rate": 0.0001, "loss": 1.6341, "step": 713 }, { "epoch": 0.17338513841670714, "grad_norm": 0.557447612285614, "learning_rate": 0.0001, "loss": 1.7325, "step": 714 }, { "epoch": 0.17362797474502187, "grad_norm": 0.5641613006591797, "learning_rate": 0.0001, "loss": 1.8528, "step": 715 }, { "epoch": 0.17387081107333657, "grad_norm": 0.5534732341766357, "learning_rate": 0.0001, "loss": 1.7265, "step": 716 }, { "epoch": 0.17411364740165128, "grad_norm": 0.576434314250946, "learning_rate": 0.0001, "loss": 1.8017, "step": 717 }, { "epoch": 0.174356483729966, "grad_norm": 0.5623506307601929, "learning_rate": 0.0001, "loss": 1.8295, "step": 718 }, { "epoch": 0.17459932005828072, "grad_norm": 0.5556817650794983, "learning_rate": 0.0001, "loss": 1.8157, "step": 719 }, { "epoch": 0.17484215638659542, "grad_norm": 0.5228356122970581, "learning_rate": 0.0001, "loss": 1.3937, "step": 720 }, { "epoch": 0.17508499271491015, "grad_norm": 0.5386799573898315, "learning_rate": 0.0001, "loss": 1.6899, "step": 721 }, { "epoch": 0.17532782904322486, "grad_norm": 0.6024092435836792, "learning_rate": 0.0001, "loss": 1.997, "step": 722 }, { "epoch": 0.1755706653715396, "grad_norm": 0.5360486507415771, "learning_rate": 0.0001, "loss": 1.6886, "step": 723 }, { "epoch": 0.1758135016998543, "grad_norm": 0.5964798927307129, "learning_rate": 0.0001, "loss": 1.8914, "step": 724 }, { "epoch": 0.176056338028169, "grad_norm": 0.6057879328727722, "learning_rate": 0.0001, "loss": 1.946, "step": 725 }, { "epoch": 0.17629917435648373, "grad_norm": 0.5556308627128601, "learning_rate": 0.0001, "loss": 1.7441, "step": 726 }, { "epoch": 0.17654201068479844, "grad_norm": 0.5340707302093506, "learning_rate": 0.0001, "loss": 1.6801, "step": 727 }, { "epoch": 0.17678484701311317, "grad_norm": 0.5532394647598267, "learning_rate": 0.0001, "loss": 1.8096, "step": 728 }, { "epoch": 0.17702768334142788, "grad_norm": 0.5861799716949463, "learning_rate": 0.0001, "loss": 1.9008, "step": 729 }, { "epoch": 0.17727051966974258, "grad_norm": 0.5458225011825562, "learning_rate": 0.0001, "loss": 1.6961, "step": 730 }, { "epoch": 0.17751335599805732, "grad_norm": 0.5501299500465393, "learning_rate": 0.0001, "loss": 1.7769, "step": 731 }, { "epoch": 0.17775619232637202, "grad_norm": 0.5216866135597229, "learning_rate": 0.0001, "loss": 1.6921, "step": 732 }, { "epoch": 0.17799902865468675, "grad_norm": 0.5326796174049377, "learning_rate": 0.0001, "loss": 1.6341, "step": 733 }, { "epoch": 0.17824186498300146, "grad_norm": 0.5195381045341492, "learning_rate": 0.0001, "loss": 1.6847, "step": 734 }, { "epoch": 0.17848470131131616, "grad_norm": 0.5662425756454468, "learning_rate": 0.0001, "loss": 1.7098, "step": 735 }, { "epoch": 0.1787275376396309, "grad_norm": 0.5749154090881348, "learning_rate": 0.0001, "loss": 1.747, "step": 736 }, { "epoch": 0.1789703739679456, "grad_norm": 0.6091803908348083, "learning_rate": 0.0001, "loss": 1.937, "step": 737 }, { "epoch": 0.17921321029626033, "grad_norm": 0.5344775319099426, "learning_rate": 0.0001, "loss": 1.7386, "step": 738 }, { "epoch": 0.17945604662457504, "grad_norm": 0.5442355871200562, "learning_rate": 0.0001, "loss": 1.7379, "step": 739 }, { "epoch": 0.17969888295288974, "grad_norm": 0.5308052897453308, "learning_rate": 0.0001, "loss": 1.5692, "step": 740 }, { "epoch": 0.17994171928120448, "grad_norm": 0.5485922694206238, "learning_rate": 0.0001, "loss": 1.8535, "step": 741 }, { "epoch": 0.18018455560951918, "grad_norm": 0.5594990849494934, "learning_rate": 0.0001, "loss": 1.9047, "step": 742 }, { "epoch": 0.1804273919378339, "grad_norm": 0.5496913194656372, "learning_rate": 0.0001, "loss": 1.8267, "step": 743 }, { "epoch": 0.18067022826614862, "grad_norm": 0.608910858631134, "learning_rate": 0.0001, "loss": 1.8546, "step": 744 }, { "epoch": 0.18091306459446332, "grad_norm": 0.6221304535865784, "learning_rate": 0.0001, "loss": 1.9559, "step": 745 }, { "epoch": 0.18115590092277806, "grad_norm": 0.5525220036506653, "learning_rate": 0.0001, "loss": 1.8696, "step": 746 }, { "epoch": 0.18139873725109276, "grad_norm": 0.5693623423576355, "learning_rate": 0.0001, "loss": 1.6375, "step": 747 }, { "epoch": 0.18164157357940747, "grad_norm": 0.5546811819076538, "learning_rate": 0.0001, "loss": 1.7519, "step": 748 }, { "epoch": 0.1818844099077222, "grad_norm": 0.5642764568328857, "learning_rate": 0.0001, "loss": 1.7697, "step": 749 }, { "epoch": 0.1821272462360369, "grad_norm": 0.5557659268379211, "learning_rate": 0.0001, "loss": 1.8393, "step": 750 }, { "epoch": 0.18237008256435164, "grad_norm": 0.5520908236503601, "learning_rate": 0.0001, "loss": 1.6867, "step": 751 }, { "epoch": 0.18261291889266634, "grad_norm": 0.5650697946548462, "learning_rate": 0.0001, "loss": 1.7904, "step": 752 }, { "epoch": 0.18285575522098105, "grad_norm": 0.5700987577438354, "learning_rate": 0.0001, "loss": 1.8719, "step": 753 }, { "epoch": 0.18309859154929578, "grad_norm": 0.5345878601074219, "learning_rate": 0.0001, "loss": 1.6061, "step": 754 }, { "epoch": 0.18334142787761049, "grad_norm": 0.5695345401763916, "learning_rate": 0.0001, "loss": 1.8629, "step": 755 }, { "epoch": 0.18358426420592522, "grad_norm": 0.5671497583389282, "learning_rate": 0.0001, "loss": 1.7367, "step": 756 }, { "epoch": 0.18382710053423992, "grad_norm": 0.5771461129188538, "learning_rate": 0.0001, "loss": 1.8345, "step": 757 }, { "epoch": 0.18406993686255463, "grad_norm": 0.5537222623825073, "learning_rate": 0.0001, "loss": 1.7433, "step": 758 }, { "epoch": 0.18431277319086936, "grad_norm": 0.555461049079895, "learning_rate": 0.0001, "loss": 1.767, "step": 759 }, { "epoch": 0.18455560951918407, "grad_norm": 0.5350934267044067, "learning_rate": 0.0001, "loss": 1.6869, "step": 760 }, { "epoch": 0.1847984458474988, "grad_norm": 0.5174617767333984, "learning_rate": 0.0001, "loss": 1.5689, "step": 761 }, { "epoch": 0.1850412821758135, "grad_norm": 0.560255765914917, "learning_rate": 0.0001, "loss": 1.8531, "step": 762 }, { "epoch": 0.1852841185041282, "grad_norm": 0.5425716042518616, "learning_rate": 0.0001, "loss": 1.5906, "step": 763 }, { "epoch": 0.18552695483244294, "grad_norm": 0.5555941462516785, "learning_rate": 0.0001, "loss": 1.6561, "step": 764 }, { "epoch": 0.18576979116075765, "grad_norm": 0.5448790192604065, "learning_rate": 0.0001, "loss": 1.7301, "step": 765 }, { "epoch": 0.18601262748907235, "grad_norm": 0.5178407430648804, "learning_rate": 0.0001, "loss": 1.6066, "step": 766 }, { "epoch": 0.18625546381738708, "grad_norm": 0.5792193412780762, "learning_rate": 0.0001, "loss": 1.9282, "step": 767 }, { "epoch": 0.1864983001457018, "grad_norm": 0.5438252091407776, "learning_rate": 0.0001, "loss": 1.6496, "step": 768 }, { "epoch": 0.18674113647401652, "grad_norm": 0.5777905583381653, "learning_rate": 0.0001, "loss": 1.8311, "step": 769 }, { "epoch": 0.18698397280233123, "grad_norm": 0.5501772165298462, "learning_rate": 0.0001, "loss": 1.8035, "step": 770 }, { "epoch": 0.18722680913064593, "grad_norm": 0.540941059589386, "learning_rate": 0.0001, "loss": 1.8926, "step": 771 }, { "epoch": 0.18746964545896067, "grad_norm": 0.5447785258293152, "learning_rate": 0.0001, "loss": 1.713, "step": 772 }, { "epoch": 0.18771248178727537, "grad_norm": 0.5447104573249817, "learning_rate": 0.0001, "loss": 1.7891, "step": 773 }, { "epoch": 0.1879553181155901, "grad_norm": 0.5534484386444092, "learning_rate": 0.0001, "loss": 1.6539, "step": 774 }, { "epoch": 0.1881981544439048, "grad_norm": 0.56410151720047, "learning_rate": 0.0001, "loss": 1.6747, "step": 775 }, { "epoch": 0.1884409907722195, "grad_norm": 0.5520610809326172, "learning_rate": 0.0001, "loss": 1.6727, "step": 776 }, { "epoch": 0.18868382710053425, "grad_norm": 0.5323498249053955, "learning_rate": 0.0001, "loss": 1.6881, "step": 777 }, { "epoch": 0.18892666342884895, "grad_norm": 0.5767523050308228, "learning_rate": 0.0001, "loss": 1.6216, "step": 778 }, { "epoch": 0.18916949975716368, "grad_norm": 0.5612403154373169, "learning_rate": 0.0001, "loss": 1.8959, "step": 779 }, { "epoch": 0.1894123360854784, "grad_norm": 0.5474254488945007, "learning_rate": 0.0001, "loss": 1.718, "step": 780 }, { "epoch": 0.1896551724137931, "grad_norm": 0.5918302536010742, "learning_rate": 0.0001, "loss": 1.8335, "step": 781 }, { "epoch": 0.18989800874210783, "grad_norm": 0.5537752509117126, "learning_rate": 0.0001, "loss": 1.8291, "step": 782 }, { "epoch": 0.19014084507042253, "grad_norm": 0.5441523194313049, "learning_rate": 0.0001, "loss": 1.8237, "step": 783 }, { "epoch": 0.19038368139873726, "grad_norm": 0.5317227244377136, "learning_rate": 0.0001, "loss": 1.6781, "step": 784 }, { "epoch": 0.19062651772705197, "grad_norm": 0.5500926971435547, "learning_rate": 0.0001, "loss": 1.6763, "step": 785 }, { "epoch": 0.19086935405536667, "grad_norm": 0.5860820412635803, "learning_rate": 0.0001, "loss": 1.9524, "step": 786 }, { "epoch": 0.1911121903836814, "grad_norm": 0.5804112553596497, "learning_rate": 0.0001, "loss": 1.6927, "step": 787 }, { "epoch": 0.1913550267119961, "grad_norm": 0.5534296035766602, "learning_rate": 0.0001, "loss": 1.7464, "step": 788 }, { "epoch": 0.19159786304031082, "grad_norm": 0.5835431814193726, "learning_rate": 0.0001, "loss": 1.8327, "step": 789 }, { "epoch": 0.19184069936862555, "grad_norm": 0.6011634469032288, "learning_rate": 0.0001, "loss": 1.961, "step": 790 }, { "epoch": 0.19208353569694026, "grad_norm": 0.5693417191505432, "learning_rate": 0.0001, "loss": 1.7782, "step": 791 }, { "epoch": 0.192326372025255, "grad_norm": 0.6142314672470093, "learning_rate": 0.0001, "loss": 1.7706, "step": 792 }, { "epoch": 0.1925692083535697, "grad_norm": 0.548800528049469, "learning_rate": 0.0001, "loss": 1.5666, "step": 793 }, { "epoch": 0.1928120446818844, "grad_norm": 0.5322363376617432, "learning_rate": 0.0001, "loss": 1.6428, "step": 794 }, { "epoch": 0.19305488101019913, "grad_norm": 0.5774746537208557, "learning_rate": 0.0001, "loss": 1.7622, "step": 795 }, { "epoch": 0.19329771733851384, "grad_norm": 0.6003028750419617, "learning_rate": 0.0001, "loss": 1.896, "step": 796 }, { "epoch": 0.19354055366682857, "grad_norm": 0.5864676833152771, "learning_rate": 0.0001, "loss": 1.9316, "step": 797 }, { "epoch": 0.19378338999514327, "grad_norm": 0.5733742713928223, "learning_rate": 0.0001, "loss": 1.7647, "step": 798 }, { "epoch": 0.19402622632345798, "grad_norm": 0.5421572327613831, "learning_rate": 0.0001, "loss": 1.6899, "step": 799 }, { "epoch": 0.1942690626517727, "grad_norm": 0.5882205963134766, "learning_rate": 0.0001, "loss": 1.9301, "step": 800 }, { "epoch": 0.19451189898008742, "grad_norm": 0.5595299601554871, "learning_rate": 0.0001, "loss": 1.7524, "step": 801 }, { "epoch": 0.19475473530840215, "grad_norm": 0.6007490754127502, "learning_rate": 0.0001, "loss": 1.8614, "step": 802 }, { "epoch": 0.19499757163671685, "grad_norm": 0.60100257396698, "learning_rate": 0.0001, "loss": 1.9322, "step": 803 }, { "epoch": 0.19524040796503156, "grad_norm": 0.5655602216720581, "learning_rate": 0.0001, "loss": 1.8182, "step": 804 }, { "epoch": 0.1954832442933463, "grad_norm": 0.5522935390472412, "learning_rate": 0.0001, "loss": 1.8435, "step": 805 }, { "epoch": 0.195726080621661, "grad_norm": 0.5583087801933289, "learning_rate": 0.0001, "loss": 1.6269, "step": 806 }, { "epoch": 0.19596891694997573, "grad_norm": 0.5243350267410278, "learning_rate": 0.0001, "loss": 1.615, "step": 807 }, { "epoch": 0.19621175327829043, "grad_norm": 0.5580244660377502, "learning_rate": 0.0001, "loss": 1.5634, "step": 808 }, { "epoch": 0.19645458960660514, "grad_norm": 0.5781289339065552, "learning_rate": 0.0001, "loss": 1.7999, "step": 809 }, { "epoch": 0.19669742593491987, "grad_norm": 0.5763341188430786, "learning_rate": 0.0001, "loss": 1.8655, "step": 810 }, { "epoch": 0.19694026226323458, "grad_norm": 0.5569537281990051, "learning_rate": 0.0001, "loss": 1.8148, "step": 811 }, { "epoch": 0.19718309859154928, "grad_norm": 0.5850344896316528, "learning_rate": 0.0001, "loss": 1.7526, "step": 812 }, { "epoch": 0.19742593491986402, "grad_norm": 0.5795556902885437, "learning_rate": 0.0001, "loss": 1.8331, "step": 813 }, { "epoch": 0.19766877124817872, "grad_norm": 0.5692217350006104, "learning_rate": 0.0001, "loss": 1.8168, "step": 814 }, { "epoch": 0.19791160757649345, "grad_norm": 0.5934564471244812, "learning_rate": 0.0001, "loss": 1.6528, "step": 815 }, { "epoch": 0.19815444390480816, "grad_norm": 0.5410759449005127, "learning_rate": 0.0001, "loss": 1.6415, "step": 816 }, { "epoch": 0.19839728023312286, "grad_norm": 0.5464872121810913, "learning_rate": 0.0001, "loss": 1.7449, "step": 817 }, { "epoch": 0.1986401165614376, "grad_norm": 0.5795077085494995, "learning_rate": 0.0001, "loss": 1.9202, "step": 818 }, { "epoch": 0.1988829528897523, "grad_norm": 0.6018663048744202, "learning_rate": 0.0001, "loss": 1.8138, "step": 819 }, { "epoch": 0.19912578921806703, "grad_norm": 0.5785474181175232, "learning_rate": 0.0001, "loss": 1.7411, "step": 820 }, { "epoch": 0.19936862554638174, "grad_norm": 0.61650550365448, "learning_rate": 0.0001, "loss": 1.8478, "step": 821 }, { "epoch": 0.19961146187469644, "grad_norm": 0.5855715274810791, "learning_rate": 0.0001, "loss": 1.7753, "step": 822 }, { "epoch": 0.19985429820301118, "grad_norm": 0.5476219654083252, "learning_rate": 0.0001, "loss": 1.7139, "step": 823 }, { "epoch": 0.20009713453132588, "grad_norm": 0.5218155384063721, "learning_rate": 0.0001, "loss": 1.6086, "step": 824 }, { "epoch": 0.20033997085964061, "grad_norm": 0.5409383773803711, "learning_rate": 0.0001, "loss": 1.785, "step": 825 }, { "epoch": 0.20058280718795532, "grad_norm": 0.585970938205719, "learning_rate": 0.0001, "loss": 1.9259, "step": 826 }, { "epoch": 0.20082564351627002, "grad_norm": 0.5450654029846191, "learning_rate": 0.0001, "loss": 1.7319, "step": 827 }, { "epoch": 0.20106847984458476, "grad_norm": 0.5078698396682739, "learning_rate": 0.0001, "loss": 1.6365, "step": 828 }, { "epoch": 0.20131131617289946, "grad_norm": 0.5683661103248596, "learning_rate": 0.0001, "loss": 1.65, "step": 829 }, { "epoch": 0.2015541525012142, "grad_norm": 0.5884541869163513, "learning_rate": 0.0001, "loss": 1.8233, "step": 830 }, { "epoch": 0.2017969888295289, "grad_norm": 0.5697531700134277, "learning_rate": 0.0001, "loss": 1.8651, "step": 831 }, { "epoch": 0.2020398251578436, "grad_norm": 0.5456650853157043, "learning_rate": 0.0001, "loss": 1.7226, "step": 832 }, { "epoch": 0.20228266148615834, "grad_norm": 0.5575149655342102, "learning_rate": 0.0001, "loss": 1.7833, "step": 833 }, { "epoch": 0.20252549781447304, "grad_norm": 0.5606904029846191, "learning_rate": 0.0001, "loss": 1.7435, "step": 834 }, { "epoch": 0.20276833414278775, "grad_norm": 0.5217230319976807, "learning_rate": 0.0001, "loss": 1.672, "step": 835 }, { "epoch": 0.20301117047110248, "grad_norm": 0.5786769986152649, "learning_rate": 0.0001, "loss": 1.7364, "step": 836 }, { "epoch": 0.2032540067994172, "grad_norm": 0.536147952079773, "learning_rate": 0.0001, "loss": 1.7313, "step": 837 }, { "epoch": 0.20349684312773192, "grad_norm": 0.5949231386184692, "learning_rate": 0.0001, "loss": 1.7037, "step": 838 }, { "epoch": 0.20373967945604662, "grad_norm": 0.5834288001060486, "learning_rate": 0.0001, "loss": 1.8865, "step": 839 }, { "epoch": 0.20398251578436133, "grad_norm": 0.6145623326301575, "learning_rate": 0.0001, "loss": 1.8882, "step": 840 }, { "epoch": 0.20422535211267606, "grad_norm": 0.5596982836723328, "learning_rate": 0.0001, "loss": 1.8797, "step": 841 }, { "epoch": 0.20446818844099077, "grad_norm": 0.5657252073287964, "learning_rate": 0.0001, "loss": 1.7358, "step": 842 }, { "epoch": 0.2047110247693055, "grad_norm": 0.5526270270347595, "learning_rate": 0.0001, "loss": 1.6714, "step": 843 }, { "epoch": 0.2049538610976202, "grad_norm": 0.5630422830581665, "learning_rate": 0.0001, "loss": 1.6924, "step": 844 }, { "epoch": 0.2051966974259349, "grad_norm": 0.5791303515434265, "learning_rate": 0.0001, "loss": 1.7582, "step": 845 }, { "epoch": 0.20543953375424964, "grad_norm": 0.5686047673225403, "learning_rate": 0.0001, "loss": 1.6872, "step": 846 }, { "epoch": 0.20568237008256435, "grad_norm": 0.5482386946678162, "learning_rate": 0.0001, "loss": 1.7276, "step": 847 }, { "epoch": 0.20592520641087908, "grad_norm": 0.5421972870826721, "learning_rate": 0.0001, "loss": 1.7984, "step": 848 }, { "epoch": 0.20616804273919379, "grad_norm": 0.5663183331489563, "learning_rate": 0.0001, "loss": 1.7296, "step": 849 }, { "epoch": 0.2064108790675085, "grad_norm": 0.5500679016113281, "learning_rate": 0.0001, "loss": 1.8638, "step": 850 }, { "epoch": 0.20665371539582322, "grad_norm": 0.5626316666603088, "learning_rate": 0.0001, "loss": 1.7687, "step": 851 }, { "epoch": 0.20689655172413793, "grad_norm": 0.5308297872543335, "learning_rate": 0.0001, "loss": 1.7183, "step": 852 }, { "epoch": 0.20713938805245263, "grad_norm": 0.5546897053718567, "learning_rate": 0.0001, "loss": 1.6975, "step": 853 }, { "epoch": 0.20738222438076737, "grad_norm": 0.5396243929862976, "learning_rate": 0.0001, "loss": 1.6581, "step": 854 }, { "epoch": 0.20762506070908207, "grad_norm": 0.5337682366371155, "learning_rate": 0.0001, "loss": 1.6647, "step": 855 }, { "epoch": 0.2078678970373968, "grad_norm": 0.5745347142219543, "learning_rate": 0.0001, "loss": 1.8399, "step": 856 }, { "epoch": 0.2081107333657115, "grad_norm": 0.5393202900886536, "learning_rate": 0.0001, "loss": 1.6535, "step": 857 }, { "epoch": 0.2083535696940262, "grad_norm": 0.5366864204406738, "learning_rate": 0.0001, "loss": 1.7269, "step": 858 }, { "epoch": 0.20859640602234095, "grad_norm": 0.55235755443573, "learning_rate": 0.0001, "loss": 1.8314, "step": 859 }, { "epoch": 0.20883924235065565, "grad_norm": 0.5640939474105835, "learning_rate": 0.0001, "loss": 1.8421, "step": 860 }, { "epoch": 0.20908207867897038, "grad_norm": 0.5542709231376648, "learning_rate": 0.0001, "loss": 1.7897, "step": 861 }, { "epoch": 0.2093249150072851, "grad_norm": 0.5564791560173035, "learning_rate": 0.0001, "loss": 1.716, "step": 862 }, { "epoch": 0.2095677513355998, "grad_norm": 0.560176432132721, "learning_rate": 0.0001, "loss": 1.7619, "step": 863 }, { "epoch": 0.20981058766391453, "grad_norm": 0.6058011054992676, "learning_rate": 0.0001, "loss": 1.8572, "step": 864 }, { "epoch": 0.21005342399222923, "grad_norm": 0.5366670489311218, "learning_rate": 0.0001, "loss": 1.7734, "step": 865 }, { "epoch": 0.21029626032054397, "grad_norm": 0.5931606888771057, "learning_rate": 0.0001, "loss": 1.7994, "step": 866 }, { "epoch": 0.21053909664885867, "grad_norm": 0.5367823243141174, "learning_rate": 0.0001, "loss": 1.6912, "step": 867 }, { "epoch": 0.21078193297717338, "grad_norm": 0.5488685369491577, "learning_rate": 0.0001, "loss": 1.7204, "step": 868 }, { "epoch": 0.2110247693054881, "grad_norm": 0.5571208000183105, "learning_rate": 0.0001, "loss": 1.7688, "step": 869 }, { "epoch": 0.2112676056338028, "grad_norm": 0.5351872444152832, "learning_rate": 0.0001, "loss": 1.6713, "step": 870 }, { "epoch": 0.21151044196211755, "grad_norm": 0.5415477752685547, "learning_rate": 0.0001, "loss": 1.6778, "step": 871 }, { "epoch": 0.21175327829043225, "grad_norm": 0.5760199427604675, "learning_rate": 0.0001, "loss": 1.8462, "step": 872 }, { "epoch": 0.21199611461874696, "grad_norm": 0.5803876519203186, "learning_rate": 0.0001, "loss": 1.6798, "step": 873 }, { "epoch": 0.2122389509470617, "grad_norm": 0.5319856405258179, "learning_rate": 0.0001, "loss": 1.6544, "step": 874 }, { "epoch": 0.2124817872753764, "grad_norm": 0.5371922254562378, "learning_rate": 0.0001, "loss": 1.5163, "step": 875 }, { "epoch": 0.2127246236036911, "grad_norm": 0.57842618227005, "learning_rate": 0.0001, "loss": 1.8315, "step": 876 }, { "epoch": 0.21296745993200583, "grad_norm": 0.5532588362693787, "learning_rate": 0.0001, "loss": 1.7318, "step": 877 }, { "epoch": 0.21321029626032054, "grad_norm": 0.5485522150993347, "learning_rate": 0.0001, "loss": 1.7128, "step": 878 }, { "epoch": 0.21345313258863527, "grad_norm": 0.5748867392539978, "learning_rate": 0.0001, "loss": 1.8404, "step": 879 }, { "epoch": 0.21369596891694997, "grad_norm": 0.5486942529678345, "learning_rate": 0.0001, "loss": 1.7819, "step": 880 }, { "epoch": 0.21393880524526468, "grad_norm": 0.5755213499069214, "learning_rate": 0.0001, "loss": 1.7104, "step": 881 }, { "epoch": 0.2141816415735794, "grad_norm": 0.5474473237991333, "learning_rate": 0.0001, "loss": 1.7719, "step": 882 }, { "epoch": 0.21442447790189412, "grad_norm": 0.5740113854408264, "learning_rate": 0.0001, "loss": 1.9424, "step": 883 }, { "epoch": 0.21466731423020885, "grad_norm": 0.5920238494873047, "learning_rate": 0.0001, "loss": 1.6995, "step": 884 }, { "epoch": 0.21491015055852355, "grad_norm": 0.5916893482208252, "learning_rate": 0.0001, "loss": 1.8407, "step": 885 }, { "epoch": 0.21515298688683826, "grad_norm": 0.5308418273925781, "learning_rate": 0.0001, "loss": 1.6048, "step": 886 }, { "epoch": 0.215395823215153, "grad_norm": 0.5787625312805176, "learning_rate": 0.0001, "loss": 1.7945, "step": 887 }, { "epoch": 0.2156386595434677, "grad_norm": 0.5507543087005615, "learning_rate": 0.0001, "loss": 1.5209, "step": 888 }, { "epoch": 0.21588149587178243, "grad_norm": 0.5690258741378784, "learning_rate": 0.0001, "loss": 1.8161, "step": 889 }, { "epoch": 0.21612433220009714, "grad_norm": 0.561421275138855, "learning_rate": 0.0001, "loss": 1.7688, "step": 890 }, { "epoch": 0.21636716852841184, "grad_norm": 0.5234627723693848, "learning_rate": 0.0001, "loss": 1.5922, "step": 891 }, { "epoch": 0.21661000485672657, "grad_norm": 0.5844734311103821, "learning_rate": 0.0001, "loss": 1.6985, "step": 892 }, { "epoch": 0.21685284118504128, "grad_norm": 0.5320757031440735, "learning_rate": 0.0001, "loss": 1.7044, "step": 893 }, { "epoch": 0.217095677513356, "grad_norm": 0.5599240660667419, "learning_rate": 0.0001, "loss": 1.7955, "step": 894 }, { "epoch": 0.21733851384167072, "grad_norm": 0.6245287656784058, "learning_rate": 0.0001, "loss": 1.691, "step": 895 }, { "epoch": 0.21758135016998542, "grad_norm": 0.5418979525566101, "learning_rate": 0.0001, "loss": 1.6215, "step": 896 }, { "epoch": 0.21782418649830015, "grad_norm": 0.6089024543762207, "learning_rate": 0.0001, "loss": 1.9166, "step": 897 }, { "epoch": 0.21806702282661486, "grad_norm": 0.5672260522842407, "learning_rate": 0.0001, "loss": 1.7904, "step": 898 }, { "epoch": 0.21830985915492956, "grad_norm": 0.5514928698539734, "learning_rate": 0.0001, "loss": 1.7884, "step": 899 }, { "epoch": 0.2185526954832443, "grad_norm": 0.5628061890602112, "learning_rate": 0.0001, "loss": 1.751, "step": 900 }, { "epoch": 0.218795531811559, "grad_norm": 0.5473241209983826, "learning_rate": 0.0001, "loss": 1.721, "step": 901 }, { "epoch": 0.21903836813987373, "grad_norm": 0.545527994632721, "learning_rate": 0.0001, "loss": 1.5349, "step": 902 }, { "epoch": 0.21928120446818844, "grad_norm": 0.5235995054244995, "learning_rate": 0.0001, "loss": 1.7536, "step": 903 }, { "epoch": 0.21952404079650314, "grad_norm": 0.5851334929466248, "learning_rate": 0.0001, "loss": 1.985, "step": 904 }, { "epoch": 0.21976687712481788, "grad_norm": 0.5480401515960693, "learning_rate": 0.0001, "loss": 1.7134, "step": 905 }, { "epoch": 0.22000971345313258, "grad_norm": 0.5195808410644531, "learning_rate": 0.0001, "loss": 1.7583, "step": 906 }, { "epoch": 0.22025254978144732, "grad_norm": 0.5582727193832397, "learning_rate": 0.0001, "loss": 1.9367, "step": 907 }, { "epoch": 0.22049538610976202, "grad_norm": 0.5689213871955872, "learning_rate": 0.0001, "loss": 1.9294, "step": 908 }, { "epoch": 0.22073822243807673, "grad_norm": 0.5949645638465881, "learning_rate": 0.0001, "loss": 1.7627, "step": 909 }, { "epoch": 0.22098105876639146, "grad_norm": 0.5724778175354004, "learning_rate": 0.0001, "loss": 1.7718, "step": 910 }, { "epoch": 0.22122389509470616, "grad_norm": 0.5361983180046082, "learning_rate": 0.0001, "loss": 1.5919, "step": 911 }, { "epoch": 0.2214667314230209, "grad_norm": 0.5725252032279968, "learning_rate": 0.0001, "loss": 1.7208, "step": 912 }, { "epoch": 0.2217095677513356, "grad_norm": 0.5657257437705994, "learning_rate": 0.0001, "loss": 1.7613, "step": 913 }, { "epoch": 0.2219524040796503, "grad_norm": 0.5461857318878174, "learning_rate": 0.0001, "loss": 1.6713, "step": 914 }, { "epoch": 0.22219524040796504, "grad_norm": 0.5253598690032959, "learning_rate": 0.0001, "loss": 1.6099, "step": 915 }, { "epoch": 0.22243807673627974, "grad_norm": 0.5413147211074829, "learning_rate": 0.0001, "loss": 1.7325, "step": 916 }, { "epoch": 0.22268091306459448, "grad_norm": 0.5368465185165405, "learning_rate": 0.0001, "loss": 1.7434, "step": 917 }, { "epoch": 0.22292374939290918, "grad_norm": 0.5531226992607117, "learning_rate": 0.0001, "loss": 1.6896, "step": 918 }, { "epoch": 0.2231665857212239, "grad_norm": 0.5544846653938293, "learning_rate": 0.0001, "loss": 1.6624, "step": 919 }, { "epoch": 0.22340942204953862, "grad_norm": 0.5697352290153503, "learning_rate": 0.0001, "loss": 1.7903, "step": 920 }, { "epoch": 0.22365225837785332, "grad_norm": 0.5445935130119324, "learning_rate": 0.0001, "loss": 1.5946, "step": 921 }, { "epoch": 0.22389509470616803, "grad_norm": 0.5548669695854187, "learning_rate": 0.0001, "loss": 1.7802, "step": 922 }, { "epoch": 0.22413793103448276, "grad_norm": 0.5918106436729431, "learning_rate": 0.0001, "loss": 1.813, "step": 923 }, { "epoch": 0.22438076736279747, "grad_norm": 0.5879506468772888, "learning_rate": 0.0001, "loss": 1.9609, "step": 924 }, { "epoch": 0.2246236036911122, "grad_norm": 0.5621781945228577, "learning_rate": 0.0001, "loss": 1.9015, "step": 925 }, { "epoch": 0.2248664400194269, "grad_norm": 0.5593553781509399, "learning_rate": 0.0001, "loss": 1.7965, "step": 926 }, { "epoch": 0.2251092763477416, "grad_norm": 0.6174554824829102, "learning_rate": 0.0001, "loss": 2.0494, "step": 927 }, { "epoch": 0.22535211267605634, "grad_norm": 0.5466719269752502, "learning_rate": 0.0001, "loss": 1.8213, "step": 928 }, { "epoch": 0.22559494900437105, "grad_norm": 0.5589328408241272, "learning_rate": 0.0001, "loss": 1.6947, "step": 929 }, { "epoch": 0.22583778533268578, "grad_norm": 0.5526649355888367, "learning_rate": 0.0001, "loss": 1.8647, "step": 930 }, { "epoch": 0.22608062166100049, "grad_norm": 0.5893845558166504, "learning_rate": 0.0001, "loss": 1.898, "step": 931 }, { "epoch": 0.2263234579893152, "grad_norm": 0.6088446378707886, "learning_rate": 0.0001, "loss": 1.8883, "step": 932 }, { "epoch": 0.22656629431762992, "grad_norm": 0.5552610754966736, "learning_rate": 0.0001, "loss": 1.6933, "step": 933 }, { "epoch": 0.22680913064594463, "grad_norm": 0.5152078866958618, "learning_rate": 0.0001, "loss": 1.7231, "step": 934 }, { "epoch": 0.22705196697425936, "grad_norm": 0.5598558783531189, "learning_rate": 0.0001, "loss": 1.7015, "step": 935 }, { "epoch": 0.22729480330257407, "grad_norm": 0.608623743057251, "learning_rate": 0.0001, "loss": 1.8497, "step": 936 }, { "epoch": 0.22753763963088877, "grad_norm": 0.570016086101532, "learning_rate": 0.0001, "loss": 1.8382, "step": 937 }, { "epoch": 0.2277804759592035, "grad_norm": 0.5475924015045166, "learning_rate": 0.0001, "loss": 1.798, "step": 938 }, { "epoch": 0.2280233122875182, "grad_norm": 0.5976389050483704, "learning_rate": 0.0001, "loss": 1.9021, "step": 939 }, { "epoch": 0.22826614861583294, "grad_norm": 0.5569254159927368, "learning_rate": 0.0001, "loss": 1.8965, "step": 940 }, { "epoch": 0.22850898494414765, "grad_norm": 0.5021308064460754, "learning_rate": 0.0001, "loss": 1.5807, "step": 941 }, { "epoch": 0.22875182127246235, "grad_norm": 0.515293300151825, "learning_rate": 0.0001, "loss": 1.6925, "step": 942 }, { "epoch": 0.22899465760077709, "grad_norm": 0.515910804271698, "learning_rate": 0.0001, "loss": 1.6818, "step": 943 }, { "epoch": 0.2292374939290918, "grad_norm": 0.5279413461685181, "learning_rate": 0.0001, "loss": 1.6615, "step": 944 }, { "epoch": 0.2294803302574065, "grad_norm": 0.5485278367996216, "learning_rate": 0.0001, "loss": 1.7407, "step": 945 }, { "epoch": 0.22972316658572123, "grad_norm": 0.5664181709289551, "learning_rate": 0.0001, "loss": 1.7865, "step": 946 }, { "epoch": 0.22996600291403593, "grad_norm": 0.5307700037956238, "learning_rate": 0.0001, "loss": 1.6972, "step": 947 }, { "epoch": 0.23020883924235067, "grad_norm": 0.5400621294975281, "learning_rate": 0.0001, "loss": 1.7756, "step": 948 }, { "epoch": 0.23045167557066537, "grad_norm": 0.5360437631607056, "learning_rate": 0.0001, "loss": 1.8162, "step": 949 }, { "epoch": 0.23069451189898008, "grad_norm": 0.5398291945457458, "learning_rate": 0.0001, "loss": 1.699, "step": 950 }, { "epoch": 0.2309373482272948, "grad_norm": 0.5587419271469116, "learning_rate": 0.0001, "loss": 1.945, "step": 951 }, { "epoch": 0.2311801845556095, "grad_norm": 0.5683267116546631, "learning_rate": 0.0001, "loss": 1.8404, "step": 952 }, { "epoch": 0.23142302088392425, "grad_norm": 0.5431987643241882, "learning_rate": 0.0001, "loss": 1.8186, "step": 953 }, { "epoch": 0.23166585721223895, "grad_norm": 0.5460397005081177, "learning_rate": 0.0001, "loss": 1.7055, "step": 954 }, { "epoch": 0.23190869354055366, "grad_norm": 0.6160047054290771, "learning_rate": 0.0001, "loss": 1.9158, "step": 955 }, { "epoch": 0.2321515298688684, "grad_norm": 0.5519976615905762, "learning_rate": 0.0001, "loss": 1.9105, "step": 956 }, { "epoch": 0.2323943661971831, "grad_norm": 0.5553170442581177, "learning_rate": 0.0001, "loss": 1.7359, "step": 957 }, { "epoch": 0.23263720252549783, "grad_norm": 0.5691775679588318, "learning_rate": 0.0001, "loss": 1.853, "step": 958 }, { "epoch": 0.23288003885381253, "grad_norm": 0.5368521809577942, "learning_rate": 0.0001, "loss": 1.7095, "step": 959 }, { "epoch": 0.23312287518212724, "grad_norm": 0.5350582003593445, "learning_rate": 0.0001, "loss": 1.6111, "step": 960 }, { "epoch": 0.23336571151044197, "grad_norm": 0.5377877354621887, "learning_rate": 0.0001, "loss": 1.6835, "step": 961 }, { "epoch": 0.23360854783875667, "grad_norm": 0.6030954122543335, "learning_rate": 0.0001, "loss": 1.792, "step": 962 }, { "epoch": 0.2338513841670714, "grad_norm": 0.5422897338867188, "learning_rate": 0.0001, "loss": 1.6607, "step": 963 }, { "epoch": 0.2340942204953861, "grad_norm": 0.5576704144477844, "learning_rate": 0.0001, "loss": 1.5958, "step": 964 }, { "epoch": 0.23433705682370082, "grad_norm": 0.5503125190734863, "learning_rate": 0.0001, "loss": 1.701, "step": 965 }, { "epoch": 0.23457989315201555, "grad_norm": 0.5446116328239441, "learning_rate": 0.0001, "loss": 1.7575, "step": 966 }, { "epoch": 0.23482272948033026, "grad_norm": 0.5460963845252991, "learning_rate": 0.0001, "loss": 1.7283, "step": 967 }, { "epoch": 0.23506556580864496, "grad_norm": 0.5579452514648438, "learning_rate": 0.0001, "loss": 1.8132, "step": 968 }, { "epoch": 0.2353084021369597, "grad_norm": 0.5324477553367615, "learning_rate": 0.0001, "loss": 1.6876, "step": 969 }, { "epoch": 0.2355512384652744, "grad_norm": 0.5197933316230774, "learning_rate": 0.0001, "loss": 1.6824, "step": 970 }, { "epoch": 0.23579407479358913, "grad_norm": 0.5394790172576904, "learning_rate": 0.0001, "loss": 1.8378, "step": 971 }, { "epoch": 0.23603691112190384, "grad_norm": 0.5891541242599487, "learning_rate": 0.0001, "loss": 1.8853, "step": 972 }, { "epoch": 0.23627974745021854, "grad_norm": 0.539494514465332, "learning_rate": 0.0001, "loss": 1.7937, "step": 973 }, { "epoch": 0.23652258377853327, "grad_norm": 0.5574384927749634, "learning_rate": 0.0001, "loss": 1.781, "step": 974 }, { "epoch": 0.23676542010684798, "grad_norm": 0.5909311175346375, "learning_rate": 0.0001, "loss": 1.7712, "step": 975 }, { "epoch": 0.2370082564351627, "grad_norm": 0.6242678761482239, "learning_rate": 0.0001, "loss": 1.978, "step": 976 }, { "epoch": 0.23725109276347742, "grad_norm": 0.5489070415496826, "learning_rate": 0.0001, "loss": 1.8114, "step": 977 }, { "epoch": 0.23749392909179212, "grad_norm": 0.5614756345748901, "learning_rate": 0.0001, "loss": 1.8186, "step": 978 }, { "epoch": 0.23773676542010685, "grad_norm": 0.5582174062728882, "learning_rate": 0.0001, "loss": 1.8372, "step": 979 }, { "epoch": 0.23797960174842156, "grad_norm": 0.5511270761489868, "learning_rate": 0.0001, "loss": 1.6409, "step": 980 }, { "epoch": 0.2382224380767363, "grad_norm": 0.5524898767471313, "learning_rate": 0.0001, "loss": 1.705, "step": 981 }, { "epoch": 0.238465274405051, "grad_norm": 0.542084813117981, "learning_rate": 0.0001, "loss": 1.6964, "step": 982 }, { "epoch": 0.2387081107333657, "grad_norm": 0.5258582234382629, "learning_rate": 0.0001, "loss": 1.4242, "step": 983 }, { "epoch": 0.23895094706168044, "grad_norm": 0.5367980599403381, "learning_rate": 0.0001, "loss": 1.7106, "step": 984 }, { "epoch": 0.23919378338999514, "grad_norm": 0.5734711289405823, "learning_rate": 0.0001, "loss": 1.8304, "step": 985 }, { "epoch": 0.23943661971830985, "grad_norm": 0.532623827457428, "learning_rate": 0.0001, "loss": 1.7613, "step": 986 }, { "epoch": 0.23967945604662458, "grad_norm": 0.5503738522529602, "learning_rate": 0.0001, "loss": 1.5809, "step": 987 }, { "epoch": 0.23992229237493928, "grad_norm": 0.5924004316329956, "learning_rate": 0.0001, "loss": 1.7314, "step": 988 }, { "epoch": 0.24016512870325402, "grad_norm": 0.6099340319633484, "learning_rate": 0.0001, "loss": 1.907, "step": 989 }, { "epoch": 0.24040796503156872, "grad_norm": 0.5783790349960327, "learning_rate": 0.0001, "loss": 1.8731, "step": 990 }, { "epoch": 0.24065080135988343, "grad_norm": 0.5989636182785034, "learning_rate": 0.0001, "loss": 1.9282, "step": 991 }, { "epoch": 0.24089363768819816, "grad_norm": 0.5222843289375305, "learning_rate": 0.0001, "loss": 1.5361, "step": 992 }, { "epoch": 0.24113647401651286, "grad_norm": 0.5943722724914551, "learning_rate": 0.0001, "loss": 1.7093, "step": 993 }, { "epoch": 0.2413793103448276, "grad_norm": 0.6033304333686829, "learning_rate": 0.0001, "loss": 1.8118, "step": 994 }, { "epoch": 0.2416221466731423, "grad_norm": 0.5955982804298401, "learning_rate": 0.0001, "loss": 1.851, "step": 995 }, { "epoch": 0.241864983001457, "grad_norm": 0.5730704069137573, "learning_rate": 0.0001, "loss": 1.6807, "step": 996 }, { "epoch": 0.24210781932977174, "grad_norm": 0.5521137118339539, "learning_rate": 0.0001, "loss": 1.7042, "step": 997 }, { "epoch": 0.24235065565808644, "grad_norm": 0.5170513391494751, "learning_rate": 0.0001, "loss": 1.7792, "step": 998 }, { "epoch": 0.24259349198640118, "grad_norm": 0.5559035539627075, "learning_rate": 0.0001, "loss": 1.7803, "step": 999 }, { "epoch": 0.24283632831471588, "grad_norm": 0.5826027393341064, "learning_rate": 0.0001, "loss": 1.8425, "step": 1000 }, { "epoch": 0.2430791646430306, "grad_norm": 1.0654746294021606, "learning_rate": 0.0001, "loss": 1.7884, "step": 1001 }, { "epoch": 0.24332200097134532, "grad_norm": 0.5563173890113831, "learning_rate": 0.0001, "loss": 1.6964, "step": 1002 }, { "epoch": 0.24356483729966003, "grad_norm": 0.5636867880821228, "learning_rate": 0.0001, "loss": 1.8316, "step": 1003 }, { "epoch": 0.24380767362797476, "grad_norm": 0.5363386869430542, "learning_rate": 0.0001, "loss": 1.6151, "step": 1004 }, { "epoch": 0.24405050995628946, "grad_norm": 0.5657698512077332, "learning_rate": 0.0001, "loss": 1.8426, "step": 1005 }, { "epoch": 0.24429334628460417, "grad_norm": 0.548893392086029, "learning_rate": 0.0001, "loss": 1.7432, "step": 1006 }, { "epoch": 0.2445361826129189, "grad_norm": 0.5803833603858948, "learning_rate": 0.0001, "loss": 1.8235, "step": 1007 }, { "epoch": 0.2447790189412336, "grad_norm": 0.5895869731903076, "learning_rate": 0.0001, "loss": 1.787, "step": 1008 }, { "epoch": 0.2450218552695483, "grad_norm": 0.5639642477035522, "learning_rate": 0.0001, "loss": 1.6861, "step": 1009 }, { "epoch": 0.24526469159786304, "grad_norm": 0.5599086880683899, "learning_rate": 0.0001, "loss": 1.7854, "step": 1010 }, { "epoch": 0.24550752792617775, "grad_norm": 0.5383694171905518, "learning_rate": 0.0001, "loss": 1.6152, "step": 1011 }, { "epoch": 0.24575036425449248, "grad_norm": 0.5369172096252441, "learning_rate": 0.0001, "loss": 1.5798, "step": 1012 }, { "epoch": 0.2459932005828072, "grad_norm": 0.5793865919113159, "learning_rate": 0.0001, "loss": 1.8873, "step": 1013 }, { "epoch": 0.2462360369111219, "grad_norm": 0.5521877408027649, "learning_rate": 0.0001, "loss": 1.743, "step": 1014 }, { "epoch": 0.24647887323943662, "grad_norm": 0.5447569489479065, "learning_rate": 0.0001, "loss": 1.8138, "step": 1015 }, { "epoch": 0.24672170956775133, "grad_norm": 0.5787389278411865, "learning_rate": 0.0001, "loss": 1.8368, "step": 1016 }, { "epoch": 0.24696454589606606, "grad_norm": 0.5416997075080872, "learning_rate": 0.0001, "loss": 1.6505, "step": 1017 }, { "epoch": 0.24720738222438077, "grad_norm": 0.5685380697250366, "learning_rate": 0.0001, "loss": 1.7282, "step": 1018 }, { "epoch": 0.24745021855269547, "grad_norm": 0.562731921672821, "learning_rate": 0.0001, "loss": 1.7308, "step": 1019 }, { "epoch": 0.2476930548810102, "grad_norm": 0.5102028846740723, "learning_rate": 0.0001, "loss": 1.6552, "step": 1020 }, { "epoch": 0.2479358912093249, "grad_norm": 0.5441479682922363, "learning_rate": 0.0001, "loss": 1.6607, "step": 1021 }, { "epoch": 0.24817872753763964, "grad_norm": 0.5577821135520935, "learning_rate": 0.0001, "loss": 1.6036, "step": 1022 }, { "epoch": 0.24842156386595435, "grad_norm": 0.5477888584136963, "learning_rate": 0.0001, "loss": 1.7947, "step": 1023 }, { "epoch": 0.24866440019426905, "grad_norm": 0.5860451459884644, "learning_rate": 0.0001, "loss": 1.9354, "step": 1024 }, { "epoch": 0.24890723652258379, "grad_norm": 0.5447387099266052, "learning_rate": 0.0001, "loss": 1.6038, "step": 1025 }, { "epoch": 0.2491500728508985, "grad_norm": 0.5881152749061584, "learning_rate": 0.0001, "loss": 1.8635, "step": 1026 }, { "epoch": 0.24939290917921322, "grad_norm": 0.5507918000221252, "learning_rate": 0.0001, "loss": 1.6679, "step": 1027 }, { "epoch": 0.24963574550752793, "grad_norm": 0.5391066670417786, "learning_rate": 0.0001, "loss": 1.6698, "step": 1028 }, { "epoch": 0.24987858183584263, "grad_norm": 0.5557683706283569, "learning_rate": 0.0001, "loss": 1.901, "step": 1029 }, { "epoch": 0.25012141816415734, "grad_norm": 0.5551528930664062, "learning_rate": 0.0001, "loss": 1.7396, "step": 1030 }, { "epoch": 0.25036425449247207, "grad_norm": 0.5728172659873962, "learning_rate": 0.0001, "loss": 1.9029, "step": 1031 }, { "epoch": 0.2506070908207868, "grad_norm": 0.5627360343933105, "learning_rate": 0.0001, "loss": 1.6951, "step": 1032 }, { "epoch": 0.2508499271491015, "grad_norm": 0.5626469850540161, "learning_rate": 0.0001, "loss": 1.8209, "step": 1033 }, { "epoch": 0.2510927634774162, "grad_norm": 0.5618252754211426, "learning_rate": 0.0001, "loss": 1.8646, "step": 1034 }, { "epoch": 0.25133559980573095, "grad_norm": 0.5571959018707275, "learning_rate": 0.0001, "loss": 1.8346, "step": 1035 }, { "epoch": 0.2515784361340457, "grad_norm": 0.5539636611938477, "learning_rate": 0.0001, "loss": 1.7712, "step": 1036 }, { "epoch": 0.25182127246236036, "grad_norm": 0.559863269329071, "learning_rate": 0.0001, "loss": 1.8712, "step": 1037 }, { "epoch": 0.2520641087906751, "grad_norm": 0.5371668934822083, "learning_rate": 0.0001, "loss": 1.8333, "step": 1038 }, { "epoch": 0.2523069451189898, "grad_norm": 0.5461929440498352, "learning_rate": 0.0001, "loss": 1.7055, "step": 1039 }, { "epoch": 0.2525497814473045, "grad_norm": 0.6017919778823853, "learning_rate": 0.0001, "loss": 1.7399, "step": 1040 }, { "epoch": 0.25279261777561923, "grad_norm": 0.5876109600067139, "learning_rate": 0.0001, "loss": 1.9032, "step": 1041 }, { "epoch": 0.25303545410393397, "grad_norm": 0.5787210464477539, "learning_rate": 0.0001, "loss": 1.753, "step": 1042 }, { "epoch": 0.25327829043224864, "grad_norm": 0.5309500694274902, "learning_rate": 0.0001, "loss": 1.6674, "step": 1043 }, { "epoch": 0.2535211267605634, "grad_norm": 0.539039134979248, "learning_rate": 0.0001, "loss": 1.6377, "step": 1044 }, { "epoch": 0.2537639630888781, "grad_norm": 0.5269420742988586, "learning_rate": 0.0001, "loss": 1.6942, "step": 1045 }, { "epoch": 0.2540067994171928, "grad_norm": 0.5767653584480286, "learning_rate": 0.0001, "loss": 1.6344, "step": 1046 }, { "epoch": 0.2542496357455075, "grad_norm": 0.5234490633010864, "learning_rate": 0.0001, "loss": 1.7217, "step": 1047 }, { "epoch": 0.25449247207382225, "grad_norm": 0.5650292634963989, "learning_rate": 0.0001, "loss": 1.8731, "step": 1048 }, { "epoch": 0.254735308402137, "grad_norm": 0.5449150800704956, "learning_rate": 0.0001, "loss": 1.8406, "step": 1049 }, { "epoch": 0.25497814473045166, "grad_norm": 0.5538469552993774, "learning_rate": 0.0001, "loss": 1.5891, "step": 1050 }, { "epoch": 0.2552209810587664, "grad_norm": 0.5362374186515808, "learning_rate": 0.0001, "loss": 1.7559, "step": 1051 }, { "epoch": 0.2554638173870811, "grad_norm": 0.5948169231414795, "learning_rate": 0.0001, "loss": 1.7964, "step": 1052 }, { "epoch": 0.2557066537153958, "grad_norm": 0.5787668228149414, "learning_rate": 0.0001, "loss": 1.812, "step": 1053 }, { "epoch": 0.25594949004371054, "grad_norm": 0.589809238910675, "learning_rate": 0.0001, "loss": 1.7048, "step": 1054 }, { "epoch": 0.25619232637202527, "grad_norm": 0.5409502387046814, "learning_rate": 0.0001, "loss": 1.5799, "step": 1055 }, { "epoch": 0.25643516270033995, "grad_norm": 0.6000292897224426, "learning_rate": 0.0001, "loss": 1.8869, "step": 1056 }, { "epoch": 0.2566779990286547, "grad_norm": 0.5915602445602417, "learning_rate": 0.0001, "loss": 1.9479, "step": 1057 }, { "epoch": 0.2569208353569694, "grad_norm": 0.5632345080375671, "learning_rate": 0.0001, "loss": 1.7527, "step": 1058 }, { "epoch": 0.25716367168528415, "grad_norm": 0.6115126013755798, "learning_rate": 0.0001, "loss": 1.6562, "step": 1059 }, { "epoch": 0.2574065080135988, "grad_norm": 0.5524129867553711, "learning_rate": 0.0001, "loss": 1.6644, "step": 1060 }, { "epoch": 0.25764934434191356, "grad_norm": 0.5773287415504456, "learning_rate": 0.0001, "loss": 1.6258, "step": 1061 }, { "epoch": 0.2578921806702283, "grad_norm": 0.5275243520736694, "learning_rate": 0.0001, "loss": 1.7353, "step": 1062 }, { "epoch": 0.25813501699854297, "grad_norm": 0.5649922490119934, "learning_rate": 0.0001, "loss": 1.738, "step": 1063 }, { "epoch": 0.2583778533268577, "grad_norm": 0.5758867263793945, "learning_rate": 0.0001, "loss": 1.796, "step": 1064 }, { "epoch": 0.25862068965517243, "grad_norm": 0.529830276966095, "learning_rate": 0.0001, "loss": 1.5816, "step": 1065 }, { "epoch": 0.2588635259834871, "grad_norm": 0.6376872062683105, "learning_rate": 0.0001, "loss": 2.0468, "step": 1066 }, { "epoch": 0.25910636231180184, "grad_norm": 0.5491669774055481, "learning_rate": 0.0001, "loss": 1.6469, "step": 1067 }, { "epoch": 0.2593491986401166, "grad_norm": 0.5449734926223755, "learning_rate": 0.0001, "loss": 1.6586, "step": 1068 }, { "epoch": 0.25959203496843125, "grad_norm": 0.6246454119682312, "learning_rate": 0.0001, "loss": 1.814, "step": 1069 }, { "epoch": 0.259834871296746, "grad_norm": 0.5419249534606934, "learning_rate": 0.0001, "loss": 1.8207, "step": 1070 }, { "epoch": 0.2600777076250607, "grad_norm": 0.5403563976287842, "learning_rate": 0.0001, "loss": 1.7334, "step": 1071 }, { "epoch": 0.26032054395337545, "grad_norm": 0.5629746913909912, "learning_rate": 0.0001, "loss": 1.8187, "step": 1072 }, { "epoch": 0.2605633802816901, "grad_norm": 0.5278500318527222, "learning_rate": 0.0001, "loss": 1.5345, "step": 1073 }, { "epoch": 0.26080621661000486, "grad_norm": 0.5906748175621033, "learning_rate": 0.0001, "loss": 1.9218, "step": 1074 }, { "epoch": 0.2610490529383196, "grad_norm": 0.5823220014572144, "learning_rate": 0.0001, "loss": 1.7812, "step": 1075 }, { "epoch": 0.26129188926663427, "grad_norm": 0.5446853041648865, "learning_rate": 0.0001, "loss": 1.5994, "step": 1076 }, { "epoch": 0.261534725594949, "grad_norm": 0.5715393424034119, "learning_rate": 0.0001, "loss": 1.8105, "step": 1077 }, { "epoch": 0.26177756192326374, "grad_norm": 0.5600488781929016, "learning_rate": 0.0001, "loss": 1.8232, "step": 1078 }, { "epoch": 0.2620203982515784, "grad_norm": 0.5303180813789368, "learning_rate": 0.0001, "loss": 1.8029, "step": 1079 }, { "epoch": 0.26226323457989315, "grad_norm": 0.632108211517334, "learning_rate": 0.0001, "loss": 1.9495, "step": 1080 }, { "epoch": 0.2625060709082079, "grad_norm": 0.5606825947761536, "learning_rate": 0.0001, "loss": 1.7417, "step": 1081 }, { "epoch": 0.2627489072365226, "grad_norm": 0.5599796772003174, "learning_rate": 0.0001, "loss": 1.7526, "step": 1082 }, { "epoch": 0.2629917435648373, "grad_norm": 0.5996298789978027, "learning_rate": 0.0001, "loss": 1.8755, "step": 1083 }, { "epoch": 0.263234579893152, "grad_norm": 0.5917564630508423, "learning_rate": 0.0001, "loss": 1.7218, "step": 1084 }, { "epoch": 0.26347741622146675, "grad_norm": 0.5896962285041809, "learning_rate": 0.0001, "loss": 1.8442, "step": 1085 }, { "epoch": 0.26372025254978143, "grad_norm": 0.5371586084365845, "learning_rate": 0.0001, "loss": 1.7769, "step": 1086 }, { "epoch": 0.26396308887809616, "grad_norm": 0.5033586621284485, "learning_rate": 0.0001, "loss": 1.6673, "step": 1087 }, { "epoch": 0.2642059252064109, "grad_norm": 0.595294713973999, "learning_rate": 0.0001, "loss": 1.9274, "step": 1088 }, { "epoch": 0.2644487615347256, "grad_norm": 0.5519765615463257, "learning_rate": 0.0001, "loss": 1.8435, "step": 1089 }, { "epoch": 0.2646915978630403, "grad_norm": 0.5802309513092041, "learning_rate": 0.0001, "loss": 1.97, "step": 1090 }, { "epoch": 0.26493443419135504, "grad_norm": 0.5462941527366638, "learning_rate": 0.0001, "loss": 1.8116, "step": 1091 }, { "epoch": 0.2651772705196697, "grad_norm": 0.5389935970306396, "learning_rate": 0.0001, "loss": 1.5828, "step": 1092 }, { "epoch": 0.26542010684798445, "grad_norm": 0.5654126405715942, "learning_rate": 0.0001, "loss": 1.8127, "step": 1093 }, { "epoch": 0.2656629431762992, "grad_norm": 0.5769006013870239, "learning_rate": 0.0001, "loss": 1.8799, "step": 1094 }, { "epoch": 0.2659057795046139, "grad_norm": 0.5631387829780579, "learning_rate": 0.0001, "loss": 1.6312, "step": 1095 }, { "epoch": 0.2661486158329286, "grad_norm": 0.5755802392959595, "learning_rate": 0.0001, "loss": 1.6453, "step": 1096 }, { "epoch": 0.2663914521612433, "grad_norm": 0.573484480381012, "learning_rate": 0.0001, "loss": 1.8436, "step": 1097 }, { "epoch": 0.26663428848955806, "grad_norm": 0.5359703302383423, "learning_rate": 0.0001, "loss": 1.5988, "step": 1098 }, { "epoch": 0.26687712481787274, "grad_norm": 0.516389012336731, "learning_rate": 0.0001, "loss": 1.5106, "step": 1099 }, { "epoch": 0.26711996114618747, "grad_norm": 0.5863427519798279, "learning_rate": 0.0001, "loss": 1.7809, "step": 1100 }, { "epoch": 0.2673627974745022, "grad_norm": 0.5418011546134949, "learning_rate": 0.0001, "loss": 1.7002, "step": 1101 }, { "epoch": 0.2676056338028169, "grad_norm": 0.5960160493850708, "learning_rate": 0.0001, "loss": 1.8422, "step": 1102 }, { "epoch": 0.2678484701311316, "grad_norm": 0.6008249521255493, "learning_rate": 0.0001, "loss": 1.7556, "step": 1103 }, { "epoch": 0.26809130645944634, "grad_norm": 0.5573046803474426, "learning_rate": 0.0001, "loss": 1.7899, "step": 1104 }, { "epoch": 0.2683341427877611, "grad_norm": 0.529295027256012, "learning_rate": 0.0001, "loss": 1.7662, "step": 1105 }, { "epoch": 0.26857697911607575, "grad_norm": 0.5498364567756653, "learning_rate": 0.0001, "loss": 1.6897, "step": 1106 }, { "epoch": 0.2688198154443905, "grad_norm": 0.5485572814941406, "learning_rate": 0.0001, "loss": 1.8122, "step": 1107 }, { "epoch": 0.2690626517727052, "grad_norm": 0.5335333943367004, "learning_rate": 0.0001, "loss": 1.5619, "step": 1108 }, { "epoch": 0.2693054881010199, "grad_norm": 0.5681390762329102, "learning_rate": 0.0001, "loss": 1.871, "step": 1109 }, { "epoch": 0.26954832442933463, "grad_norm": 0.5439334511756897, "learning_rate": 0.0001, "loss": 1.8251, "step": 1110 }, { "epoch": 0.26979116075764936, "grad_norm": 0.560706377029419, "learning_rate": 0.0001, "loss": 1.7744, "step": 1111 }, { "epoch": 0.27003399708596404, "grad_norm": 0.5368844270706177, "learning_rate": 0.0001, "loss": 1.6958, "step": 1112 }, { "epoch": 0.27027683341427877, "grad_norm": 0.5631430149078369, "learning_rate": 0.0001, "loss": 1.7116, "step": 1113 }, { "epoch": 0.2705196697425935, "grad_norm": 0.5687524676322937, "learning_rate": 0.0001, "loss": 1.7647, "step": 1114 }, { "epoch": 0.2707625060709082, "grad_norm": 0.5624735951423645, "learning_rate": 0.0001, "loss": 1.724, "step": 1115 }, { "epoch": 0.2710053423992229, "grad_norm": 0.586024284362793, "learning_rate": 0.0001, "loss": 1.8873, "step": 1116 }, { "epoch": 0.27124817872753765, "grad_norm": 0.5499001145362854, "learning_rate": 0.0001, "loss": 1.6416, "step": 1117 }, { "epoch": 0.2714910150558524, "grad_norm": 0.5544242858886719, "learning_rate": 0.0001, "loss": 1.8069, "step": 1118 }, { "epoch": 0.27173385138416706, "grad_norm": 0.5592672228813171, "learning_rate": 0.0001, "loss": 1.7016, "step": 1119 }, { "epoch": 0.2719766877124818, "grad_norm": 0.524213433265686, "learning_rate": 0.0001, "loss": 1.5827, "step": 1120 }, { "epoch": 0.2722195240407965, "grad_norm": 0.5297691822052002, "learning_rate": 0.0001, "loss": 1.8047, "step": 1121 }, { "epoch": 0.2724623603691112, "grad_norm": 0.5715131759643555, "learning_rate": 0.0001, "loss": 1.7379, "step": 1122 }, { "epoch": 0.27270519669742593, "grad_norm": 0.5559701323509216, "learning_rate": 0.0001, "loss": 1.9112, "step": 1123 }, { "epoch": 0.27294803302574067, "grad_norm": 0.5795487761497498, "learning_rate": 0.0001, "loss": 1.7956, "step": 1124 }, { "epoch": 0.27319086935405534, "grad_norm": 0.5590142607688904, "learning_rate": 0.0001, "loss": 1.6251, "step": 1125 }, { "epoch": 0.2734337056823701, "grad_norm": 0.5858544707298279, "learning_rate": 0.0001, "loss": 1.9377, "step": 1126 }, { "epoch": 0.2736765420106848, "grad_norm": 0.5422412753105164, "learning_rate": 0.0001, "loss": 1.6295, "step": 1127 }, { "epoch": 0.27391937833899954, "grad_norm": 0.5824558138847351, "learning_rate": 0.0001, "loss": 1.8094, "step": 1128 }, { "epoch": 0.2741622146673142, "grad_norm": 0.5579871535301208, "learning_rate": 0.0001, "loss": 1.6865, "step": 1129 }, { "epoch": 0.27440505099562895, "grad_norm": 0.5918609499931335, "learning_rate": 0.0001, "loss": 1.7295, "step": 1130 }, { "epoch": 0.2746478873239437, "grad_norm": 0.5762107968330383, "learning_rate": 0.0001, "loss": 1.7305, "step": 1131 }, { "epoch": 0.27489072365225836, "grad_norm": 0.5555712580680847, "learning_rate": 0.0001, "loss": 1.6761, "step": 1132 }, { "epoch": 0.2751335599805731, "grad_norm": 0.5766242146492004, "learning_rate": 0.0001, "loss": 1.9184, "step": 1133 }, { "epoch": 0.2753763963088878, "grad_norm": 0.5332757234573364, "learning_rate": 0.0001, "loss": 1.7171, "step": 1134 }, { "epoch": 0.2756192326372025, "grad_norm": 0.6191022992134094, "learning_rate": 0.0001, "loss": 1.9584, "step": 1135 }, { "epoch": 0.27586206896551724, "grad_norm": 0.603369414806366, "learning_rate": 0.0001, "loss": 1.7676, "step": 1136 }, { "epoch": 0.27610490529383197, "grad_norm": 0.5369250178337097, "learning_rate": 0.0001, "loss": 1.7546, "step": 1137 }, { "epoch": 0.27634774162214665, "grad_norm": 0.5725906491279602, "learning_rate": 0.0001, "loss": 1.923, "step": 1138 }, { "epoch": 0.2765905779504614, "grad_norm": 0.5639214515686035, "learning_rate": 0.0001, "loss": 1.7745, "step": 1139 }, { "epoch": 0.2768334142787761, "grad_norm": 0.5801315903663635, "learning_rate": 0.0001, "loss": 1.8606, "step": 1140 }, { "epoch": 0.27707625060709085, "grad_norm": 0.5294753313064575, "learning_rate": 0.0001, "loss": 1.7321, "step": 1141 }, { "epoch": 0.2773190869354055, "grad_norm": 0.5789963603019714, "learning_rate": 0.0001, "loss": 1.877, "step": 1142 }, { "epoch": 0.27756192326372026, "grad_norm": 0.5986112356185913, "learning_rate": 0.0001, "loss": 1.7511, "step": 1143 }, { "epoch": 0.277804759592035, "grad_norm": 0.611415684223175, "learning_rate": 0.0001, "loss": 1.772, "step": 1144 }, { "epoch": 0.27804759592034967, "grad_norm": 0.5708376169204712, "learning_rate": 0.0001, "loss": 1.5659, "step": 1145 }, { "epoch": 0.2782904322486644, "grad_norm": 0.5890704989433289, "learning_rate": 0.0001, "loss": 1.7244, "step": 1146 }, { "epoch": 0.27853326857697913, "grad_norm": 0.5593096613883972, "learning_rate": 0.0001, "loss": 1.796, "step": 1147 }, { "epoch": 0.2787761049052938, "grad_norm": 0.6066704392433167, "learning_rate": 0.0001, "loss": 1.8155, "step": 1148 }, { "epoch": 0.27901894123360854, "grad_norm": 0.551701009273529, "learning_rate": 0.0001, "loss": 1.7212, "step": 1149 }, { "epoch": 0.2792617775619233, "grad_norm": 0.5450371503829956, "learning_rate": 0.0001, "loss": 1.6942, "step": 1150 }, { "epoch": 0.279504613890238, "grad_norm": 0.5442568063735962, "learning_rate": 0.0001, "loss": 1.823, "step": 1151 }, { "epoch": 0.2797474502185527, "grad_norm": 0.5607459545135498, "learning_rate": 0.0001, "loss": 1.6519, "step": 1152 }, { "epoch": 0.2799902865468674, "grad_norm": 0.5624642372131348, "learning_rate": 0.0001, "loss": 1.84, "step": 1153 }, { "epoch": 0.28023312287518215, "grad_norm": 0.6023097038269043, "learning_rate": 0.0001, "loss": 1.695, "step": 1154 }, { "epoch": 0.2804759592034968, "grad_norm": 0.5366615653038025, "learning_rate": 0.0001, "loss": 1.4541, "step": 1155 }, { "epoch": 0.28071879553181156, "grad_norm": 0.5833327174186707, "learning_rate": 0.0001, "loss": 1.8255, "step": 1156 }, { "epoch": 0.2809616318601263, "grad_norm": 0.5773898363113403, "learning_rate": 0.0001, "loss": 1.9525, "step": 1157 }, { "epoch": 0.28120446818844097, "grad_norm": 0.569523274898529, "learning_rate": 0.0001, "loss": 1.7754, "step": 1158 }, { "epoch": 0.2814473045167557, "grad_norm": 0.5390274524688721, "learning_rate": 0.0001, "loss": 1.4668, "step": 1159 }, { "epoch": 0.28169014084507044, "grad_norm": 0.5608516931533813, "learning_rate": 0.0001, "loss": 1.6221, "step": 1160 }, { "epoch": 0.2819329771733851, "grad_norm": 0.5500361919403076, "learning_rate": 0.0001, "loss": 1.5958, "step": 1161 }, { "epoch": 0.28217581350169985, "grad_norm": 0.5973489284515381, "learning_rate": 0.0001, "loss": 1.8089, "step": 1162 }, { "epoch": 0.2824186498300146, "grad_norm": 0.5561203360557556, "learning_rate": 0.0001, "loss": 1.6301, "step": 1163 }, { "epoch": 0.2826614861583293, "grad_norm": 0.6018040776252747, "learning_rate": 0.0001, "loss": 1.8193, "step": 1164 }, { "epoch": 0.282904322486644, "grad_norm": 0.5456592440605164, "learning_rate": 0.0001, "loss": 1.6881, "step": 1165 }, { "epoch": 0.2831471588149587, "grad_norm": 0.5560385584831238, "learning_rate": 0.0001, "loss": 1.8017, "step": 1166 }, { "epoch": 0.28338999514327345, "grad_norm": 0.5754451751708984, "learning_rate": 0.0001, "loss": 1.9064, "step": 1167 }, { "epoch": 0.28363283147158813, "grad_norm": 0.5309687256813049, "learning_rate": 0.0001, "loss": 1.7043, "step": 1168 }, { "epoch": 0.28387566779990286, "grad_norm": 1.5462857484817505, "learning_rate": 0.0001, "loss": 1.7804, "step": 1169 }, { "epoch": 0.2841185041282176, "grad_norm": 0.5605103373527527, "learning_rate": 0.0001, "loss": 1.7713, "step": 1170 }, { "epoch": 0.2843613404565323, "grad_norm": 0.5687307715415955, "learning_rate": 0.0001, "loss": 1.7273, "step": 1171 }, { "epoch": 0.284604176784847, "grad_norm": 0.5158697366714478, "learning_rate": 0.0001, "loss": 1.4238, "step": 1172 }, { "epoch": 0.28484701311316174, "grad_norm": 0.5550355315208435, "learning_rate": 0.0001, "loss": 1.7404, "step": 1173 }, { "epoch": 0.2850898494414764, "grad_norm": 0.5797061920166016, "learning_rate": 0.0001, "loss": 1.7718, "step": 1174 }, { "epoch": 0.28533268576979115, "grad_norm": 0.5334068536758423, "learning_rate": 0.0001, "loss": 1.6161, "step": 1175 }, { "epoch": 0.2855755220981059, "grad_norm": 0.5494942665100098, "learning_rate": 0.0001, "loss": 1.7229, "step": 1176 }, { "epoch": 0.2858183584264206, "grad_norm": 0.6060776114463806, "learning_rate": 0.0001, "loss": 1.8268, "step": 1177 }, { "epoch": 0.2860611947547353, "grad_norm": 0.5575780868530273, "learning_rate": 0.0001, "loss": 1.8252, "step": 1178 }, { "epoch": 0.28630403108305, "grad_norm": 0.5623884201049805, "learning_rate": 0.0001, "loss": 1.8873, "step": 1179 }, { "epoch": 0.28654686741136476, "grad_norm": 0.5704313516616821, "learning_rate": 0.0001, "loss": 1.8121, "step": 1180 }, { "epoch": 0.28678970373967944, "grad_norm": 0.5479267239570618, "learning_rate": 0.0001, "loss": 1.804, "step": 1181 }, { "epoch": 0.28703254006799417, "grad_norm": 0.5762827396392822, "learning_rate": 0.0001, "loss": 1.8986, "step": 1182 }, { "epoch": 0.2872753763963089, "grad_norm": 0.592075526714325, "learning_rate": 0.0001, "loss": 1.8693, "step": 1183 }, { "epoch": 0.2875182127246236, "grad_norm": 0.5763819813728333, "learning_rate": 0.0001, "loss": 1.7756, "step": 1184 }, { "epoch": 0.2877610490529383, "grad_norm": 0.5444159507751465, "learning_rate": 0.0001, "loss": 1.7511, "step": 1185 }, { "epoch": 0.28800388538125304, "grad_norm": 0.6072043776512146, "learning_rate": 0.0001, "loss": 1.9858, "step": 1186 }, { "epoch": 0.2882467217095678, "grad_norm": 0.5676252841949463, "learning_rate": 0.0001, "loss": 1.8784, "step": 1187 }, { "epoch": 0.28848955803788245, "grad_norm": 0.5364545583724976, "learning_rate": 0.0001, "loss": 1.6092, "step": 1188 }, { "epoch": 0.2887323943661972, "grad_norm": 0.5934111475944519, "learning_rate": 0.0001, "loss": 1.7471, "step": 1189 }, { "epoch": 0.2889752306945119, "grad_norm": 0.5661386847496033, "learning_rate": 0.0001, "loss": 1.694, "step": 1190 }, { "epoch": 0.2892180670228266, "grad_norm": 0.5890941023826599, "learning_rate": 0.0001, "loss": 1.7736, "step": 1191 }, { "epoch": 0.28946090335114133, "grad_norm": 0.572519063949585, "learning_rate": 0.0001, "loss": 1.8875, "step": 1192 }, { "epoch": 0.28970373967945606, "grad_norm": 0.5531225204467773, "learning_rate": 0.0001, "loss": 1.771, "step": 1193 }, { "epoch": 0.28994657600777074, "grad_norm": 0.5932171940803528, "learning_rate": 0.0001, "loss": 1.9663, "step": 1194 }, { "epoch": 0.2901894123360855, "grad_norm": 0.5722930431365967, "learning_rate": 0.0001, "loss": 1.803, "step": 1195 }, { "epoch": 0.2904322486644002, "grad_norm": 0.6092216372489929, "learning_rate": 0.0001, "loss": 1.6879, "step": 1196 }, { "epoch": 0.2906750849927149, "grad_norm": 0.5634588003158569, "learning_rate": 0.0001, "loss": 1.5151, "step": 1197 }, { "epoch": 0.2909179213210296, "grad_norm": 0.5632761120796204, "learning_rate": 0.0001, "loss": 1.7852, "step": 1198 }, { "epoch": 0.29116075764934435, "grad_norm": 0.5588541626930237, "learning_rate": 0.0001, "loss": 1.6781, "step": 1199 }, { "epoch": 0.2914035939776591, "grad_norm": 0.5605027675628662, "learning_rate": 0.0001, "loss": 1.8683, "step": 1200 }, { "epoch": 0.29164643030597376, "grad_norm": 0.508496105670929, "learning_rate": 0.0001, "loss": 1.5614, "step": 1201 }, { "epoch": 0.2918892666342885, "grad_norm": 0.5228567123413086, "learning_rate": 0.0001, "loss": 1.7026, "step": 1202 }, { "epoch": 0.2921321029626032, "grad_norm": 0.6024715304374695, "learning_rate": 0.0001, "loss": 1.9143, "step": 1203 }, { "epoch": 0.2923749392909179, "grad_norm": 0.5535398125648499, "learning_rate": 0.0001, "loss": 1.6224, "step": 1204 }, { "epoch": 0.29261777561923263, "grad_norm": 0.5778299570083618, "learning_rate": 0.0001, "loss": 1.6881, "step": 1205 }, { "epoch": 0.29286061194754737, "grad_norm": 0.5712555050849915, "learning_rate": 0.0001, "loss": 1.6741, "step": 1206 }, { "epoch": 0.29310344827586204, "grad_norm": 0.5476194024085999, "learning_rate": 0.0001, "loss": 1.7123, "step": 1207 }, { "epoch": 0.2933462846041768, "grad_norm": 0.6042921543121338, "learning_rate": 0.0001, "loss": 1.8572, "step": 1208 }, { "epoch": 0.2935891209324915, "grad_norm": 0.5517978668212891, "learning_rate": 0.0001, "loss": 1.7138, "step": 1209 }, { "epoch": 0.29383195726080624, "grad_norm": 0.57857346534729, "learning_rate": 0.0001, "loss": 1.6654, "step": 1210 }, { "epoch": 0.2940747935891209, "grad_norm": 0.6032268404960632, "learning_rate": 0.0001, "loss": 1.9074, "step": 1211 }, { "epoch": 0.29431762991743565, "grad_norm": 0.5374449491500854, "learning_rate": 0.0001, "loss": 1.6404, "step": 1212 }, { "epoch": 0.2945604662457504, "grad_norm": 0.5887454152107239, "learning_rate": 0.0001, "loss": 1.7395, "step": 1213 }, { "epoch": 0.29480330257406506, "grad_norm": 0.6122771501541138, "learning_rate": 0.0001, "loss": 1.8, "step": 1214 }, { "epoch": 0.2950461389023798, "grad_norm": 0.554463803768158, "learning_rate": 0.0001, "loss": 1.802, "step": 1215 }, { "epoch": 0.29528897523069453, "grad_norm": 0.5563312768936157, "learning_rate": 0.0001, "loss": 1.727, "step": 1216 }, { "epoch": 0.2955318115590092, "grad_norm": 0.5625701546669006, "learning_rate": 0.0001, "loss": 1.6385, "step": 1217 }, { "epoch": 0.29577464788732394, "grad_norm": 0.5303876996040344, "learning_rate": 0.0001, "loss": 1.6915, "step": 1218 }, { "epoch": 0.29601748421563867, "grad_norm": 0.5515774488449097, "learning_rate": 0.0001, "loss": 1.4945, "step": 1219 }, { "epoch": 0.29626032054395335, "grad_norm": 0.5511413216590881, "learning_rate": 0.0001, "loss": 1.6165, "step": 1220 }, { "epoch": 0.2965031568722681, "grad_norm": 0.5535469055175781, "learning_rate": 0.0001, "loss": 1.7205, "step": 1221 }, { "epoch": 0.2967459932005828, "grad_norm": 0.5677886009216309, "learning_rate": 0.0001, "loss": 1.7241, "step": 1222 }, { "epoch": 0.29698882952889755, "grad_norm": 0.5623748302459717, "learning_rate": 0.0001, "loss": 1.7513, "step": 1223 }, { "epoch": 0.2972316658572122, "grad_norm": 0.5762066841125488, "learning_rate": 0.0001, "loss": 1.9384, "step": 1224 }, { "epoch": 0.29747450218552696, "grad_norm": 0.5834498405456543, "learning_rate": 0.0001, "loss": 1.807, "step": 1225 }, { "epoch": 0.2977173385138417, "grad_norm": 0.6799818277359009, "learning_rate": 0.0001, "loss": 2.0577, "step": 1226 }, { "epoch": 0.29796017484215637, "grad_norm": 0.5903156995773315, "learning_rate": 0.0001, "loss": 1.9314, "step": 1227 }, { "epoch": 0.2982030111704711, "grad_norm": 0.626448392868042, "learning_rate": 0.0001, "loss": 1.6208, "step": 1228 }, { "epoch": 0.29844584749878583, "grad_norm": 0.5882961750030518, "learning_rate": 0.0001, "loss": 1.8942, "step": 1229 }, { "epoch": 0.2986886838271005, "grad_norm": 0.5272478461265564, "learning_rate": 0.0001, "loss": 1.6891, "step": 1230 }, { "epoch": 0.29893152015541524, "grad_norm": 0.5757167339324951, "learning_rate": 0.0001, "loss": 1.7958, "step": 1231 }, { "epoch": 0.29917435648373, "grad_norm": 0.5639874935150146, "learning_rate": 0.0001, "loss": 1.6556, "step": 1232 }, { "epoch": 0.2994171928120447, "grad_norm": 0.540255606174469, "learning_rate": 0.0001, "loss": 1.6874, "step": 1233 }, { "epoch": 0.2996600291403594, "grad_norm": 0.6375817060470581, "learning_rate": 0.0001, "loss": 1.7894, "step": 1234 }, { "epoch": 0.2999028654686741, "grad_norm": 0.5961909890174866, "learning_rate": 0.0001, "loss": 1.8181, "step": 1235 }, { "epoch": 0.30014570179698885, "grad_norm": 0.5758340358734131, "learning_rate": 0.0001, "loss": 1.8422, "step": 1236 }, { "epoch": 0.30038853812530353, "grad_norm": 0.5907910466194153, "learning_rate": 0.0001, "loss": 1.9534, "step": 1237 }, { "epoch": 0.30063137445361826, "grad_norm": 0.5534744262695312, "learning_rate": 0.0001, "loss": 1.6591, "step": 1238 }, { "epoch": 0.300874210781933, "grad_norm": 0.5392131209373474, "learning_rate": 0.0001, "loss": 1.7531, "step": 1239 }, { "epoch": 0.30111704711024767, "grad_norm": 0.5619401335716248, "learning_rate": 0.0001, "loss": 1.8698, "step": 1240 }, { "epoch": 0.3013598834385624, "grad_norm": 0.5474029183387756, "learning_rate": 0.0001, "loss": 1.7221, "step": 1241 }, { "epoch": 0.30160271976687714, "grad_norm": 0.5693953037261963, "learning_rate": 0.0001, "loss": 1.7505, "step": 1242 }, { "epoch": 0.3018455560951918, "grad_norm": 0.5436322689056396, "learning_rate": 0.0001, "loss": 1.5929, "step": 1243 }, { "epoch": 0.30208839242350655, "grad_norm": 0.5292843580245972, "learning_rate": 0.0001, "loss": 1.6299, "step": 1244 }, { "epoch": 0.3023312287518213, "grad_norm": 0.579740583896637, "learning_rate": 0.0001, "loss": 1.8215, "step": 1245 }, { "epoch": 0.302574065080136, "grad_norm": 0.5974770188331604, "learning_rate": 0.0001, "loss": 1.7842, "step": 1246 }, { "epoch": 0.3028169014084507, "grad_norm": 0.5881436467170715, "learning_rate": 0.0001, "loss": 1.7556, "step": 1247 }, { "epoch": 0.3030597377367654, "grad_norm": 0.5402075052261353, "learning_rate": 0.0001, "loss": 1.6768, "step": 1248 }, { "epoch": 0.30330257406508015, "grad_norm": 0.5831952691078186, "learning_rate": 0.0001, "loss": 1.5455, "step": 1249 }, { "epoch": 0.30354541039339483, "grad_norm": 0.5571645498275757, "learning_rate": 0.0001, "loss": 1.7934, "step": 1250 }, { "epoch": 0.30378824672170957, "grad_norm": 0.5791111588478088, "learning_rate": 0.0001, "loss": 1.8274, "step": 1251 }, { "epoch": 0.3040310830500243, "grad_norm": 0.5482165217399597, "learning_rate": 0.0001, "loss": 1.7685, "step": 1252 }, { "epoch": 0.304273919378339, "grad_norm": 0.5379577875137329, "learning_rate": 0.0001, "loss": 1.7371, "step": 1253 }, { "epoch": 0.3045167557066537, "grad_norm": 0.5821003913879395, "learning_rate": 0.0001, "loss": 1.9647, "step": 1254 }, { "epoch": 0.30475959203496844, "grad_norm": 0.573422908782959, "learning_rate": 0.0001, "loss": 1.7154, "step": 1255 }, { "epoch": 0.3050024283632832, "grad_norm": 0.595234215259552, "learning_rate": 0.0001, "loss": 1.7981, "step": 1256 }, { "epoch": 0.30524526469159785, "grad_norm": 0.5558188557624817, "learning_rate": 0.0001, "loss": 1.7843, "step": 1257 }, { "epoch": 0.3054881010199126, "grad_norm": 0.5586723685264587, "learning_rate": 0.0001, "loss": 1.6505, "step": 1258 }, { "epoch": 0.3057309373482273, "grad_norm": 0.6428223848342896, "learning_rate": 0.0001, "loss": 1.8015, "step": 1259 }, { "epoch": 0.305973773676542, "grad_norm": 0.5657460689544678, "learning_rate": 0.0001, "loss": 1.7208, "step": 1260 }, { "epoch": 0.3062166100048567, "grad_norm": 0.5800066590309143, "learning_rate": 0.0001, "loss": 1.7361, "step": 1261 }, { "epoch": 0.30645944633317146, "grad_norm": 0.5351372361183167, "learning_rate": 0.0001, "loss": 1.6318, "step": 1262 }, { "epoch": 0.30670228266148614, "grad_norm": 0.5728726983070374, "learning_rate": 0.0001, "loss": 1.7704, "step": 1263 }, { "epoch": 0.30694511898980087, "grad_norm": 0.7134020328521729, "learning_rate": 0.0001, "loss": 1.8761, "step": 1264 }, { "epoch": 0.3071879553181156, "grad_norm": 0.6183131337165833, "learning_rate": 0.0001, "loss": 1.9261, "step": 1265 }, { "epoch": 0.3074307916464303, "grad_norm": 1.3757343292236328, "learning_rate": 0.0001, "loss": 1.6129, "step": 1266 }, { "epoch": 0.307673627974745, "grad_norm": 0.5625571012496948, "learning_rate": 0.0001, "loss": 1.5778, "step": 1267 }, { "epoch": 0.30791646430305974, "grad_norm": 0.5531574487686157, "learning_rate": 0.0001, "loss": 1.5962, "step": 1268 }, { "epoch": 0.3081593006313745, "grad_norm": 0.8341613411903381, "learning_rate": 0.0001, "loss": 1.7078, "step": 1269 }, { "epoch": 0.30840213695968915, "grad_norm": 0.5458047389984131, "learning_rate": 0.0001, "loss": 1.9236, "step": 1270 }, { "epoch": 0.3086449732880039, "grad_norm": 0.5536592602729797, "learning_rate": 0.0001, "loss": 1.7013, "step": 1271 }, { "epoch": 0.3088878096163186, "grad_norm": 0.5676348805427551, "learning_rate": 0.0001, "loss": 1.6352, "step": 1272 }, { "epoch": 0.3091306459446333, "grad_norm": 0.6149638891220093, "learning_rate": 0.0001, "loss": 1.7848, "step": 1273 }, { "epoch": 0.30937348227294803, "grad_norm": 0.5674256682395935, "learning_rate": 0.0001, "loss": 1.751, "step": 1274 }, { "epoch": 0.30961631860126276, "grad_norm": 0.5894138813018799, "learning_rate": 0.0001, "loss": 1.8041, "step": 1275 }, { "epoch": 0.30985915492957744, "grad_norm": 0.531599223613739, "learning_rate": 0.0001, "loss": 1.5234, "step": 1276 }, { "epoch": 0.3101019912578922, "grad_norm": 0.5443134903907776, "learning_rate": 0.0001, "loss": 1.5503, "step": 1277 }, { "epoch": 0.3103448275862069, "grad_norm": 0.5844945907592773, "learning_rate": 0.0001, "loss": 1.7385, "step": 1278 }, { "epoch": 0.31058766391452164, "grad_norm": 0.5538145899772644, "learning_rate": 0.0001, "loss": 1.6208, "step": 1279 }, { "epoch": 0.3108305002428363, "grad_norm": 0.5729904174804688, "learning_rate": 0.0001, "loss": 1.6952, "step": 1280 }, { "epoch": 0.31107333657115105, "grad_norm": 0.5446047186851501, "learning_rate": 0.0001, "loss": 1.7144, "step": 1281 }, { "epoch": 0.3113161728994658, "grad_norm": 0.5747256278991699, "learning_rate": 0.0001, "loss": 1.9335, "step": 1282 }, { "epoch": 0.31155900922778046, "grad_norm": 0.5712779760360718, "learning_rate": 0.0001, "loss": 1.7136, "step": 1283 }, { "epoch": 0.3118018455560952, "grad_norm": 0.5434122085571289, "learning_rate": 0.0001, "loss": 1.7904, "step": 1284 }, { "epoch": 0.3120446818844099, "grad_norm": 0.5477941036224365, "learning_rate": 0.0001, "loss": 1.6354, "step": 1285 }, { "epoch": 0.3122875182127246, "grad_norm": 0.5642449259757996, "learning_rate": 0.0001, "loss": 1.6353, "step": 1286 }, { "epoch": 0.31253035454103933, "grad_norm": 0.5494605302810669, "learning_rate": 0.0001, "loss": 1.722, "step": 1287 }, { "epoch": 0.31277319086935407, "grad_norm": 0.5640792846679688, "learning_rate": 0.0001, "loss": 1.8397, "step": 1288 }, { "epoch": 0.31301602719766874, "grad_norm": 0.5498180985450745, "learning_rate": 0.0001, "loss": 1.7621, "step": 1289 }, { "epoch": 0.3132588635259835, "grad_norm": 0.5406601428985596, "learning_rate": 0.0001, "loss": 1.7247, "step": 1290 }, { "epoch": 0.3135016998542982, "grad_norm": 0.5511451363563538, "learning_rate": 0.0001, "loss": 1.8245, "step": 1291 }, { "epoch": 0.31374453618261294, "grad_norm": 0.5950665473937988, "learning_rate": 0.0001, "loss": 1.7691, "step": 1292 }, { "epoch": 0.3139873725109276, "grad_norm": 0.5692906379699707, "learning_rate": 0.0001, "loss": 1.6992, "step": 1293 }, { "epoch": 0.31423020883924235, "grad_norm": 0.5657053589820862, "learning_rate": 0.0001, "loss": 1.7324, "step": 1294 }, { "epoch": 0.3144730451675571, "grad_norm": 0.5812044143676758, "learning_rate": 0.0001, "loss": 1.7134, "step": 1295 }, { "epoch": 0.31471588149587176, "grad_norm": 0.5734257102012634, "learning_rate": 0.0001, "loss": 1.808, "step": 1296 }, { "epoch": 0.3149587178241865, "grad_norm": 0.5991582274436951, "learning_rate": 0.0001, "loss": 1.8522, "step": 1297 }, { "epoch": 0.31520155415250123, "grad_norm": 0.5761125683784485, "learning_rate": 0.0001, "loss": 1.867, "step": 1298 }, { "epoch": 0.3154443904808159, "grad_norm": 0.5132241249084473, "learning_rate": 0.0001, "loss": 1.3686, "step": 1299 }, { "epoch": 0.31568722680913064, "grad_norm": 0.5863776206970215, "learning_rate": 0.0001, "loss": 1.7227, "step": 1300 }, { "epoch": 0.31593006313744537, "grad_norm": 0.6152379512786865, "learning_rate": 0.0001, "loss": 1.987, "step": 1301 }, { "epoch": 0.3161728994657601, "grad_norm": 0.5339058041572571, "learning_rate": 0.0001, "loss": 1.6471, "step": 1302 }, { "epoch": 0.3164157357940748, "grad_norm": 0.5914755463600159, "learning_rate": 0.0001, "loss": 1.7393, "step": 1303 }, { "epoch": 0.3166585721223895, "grad_norm": 0.6291932463645935, "learning_rate": 0.0001, "loss": 1.8296, "step": 1304 }, { "epoch": 0.31690140845070425, "grad_norm": 0.6238904595375061, "learning_rate": 0.0001, "loss": 1.9846, "step": 1305 }, { "epoch": 0.3171442447790189, "grad_norm": 0.5747682452201843, "learning_rate": 0.0001, "loss": 1.7285, "step": 1306 }, { "epoch": 0.31738708110733366, "grad_norm": 0.5602648854255676, "learning_rate": 0.0001, "loss": 1.6642, "step": 1307 }, { "epoch": 0.3176299174356484, "grad_norm": 0.5592392683029175, "learning_rate": 0.0001, "loss": 1.6881, "step": 1308 }, { "epoch": 0.31787275376396307, "grad_norm": 0.6572312116622925, "learning_rate": 0.0001, "loss": 2.1072, "step": 1309 }, { "epoch": 0.3181155900922778, "grad_norm": 0.5818151831626892, "learning_rate": 0.0001, "loss": 1.7513, "step": 1310 }, { "epoch": 0.31835842642059253, "grad_norm": 0.5654920935630798, "learning_rate": 0.0001, "loss": 1.7266, "step": 1311 }, { "epoch": 0.3186012627489072, "grad_norm": 0.5874744653701782, "learning_rate": 0.0001, "loss": 1.8714, "step": 1312 }, { "epoch": 0.31884409907722194, "grad_norm": 0.6022887229919434, "learning_rate": 0.0001, "loss": 1.7934, "step": 1313 }, { "epoch": 0.3190869354055367, "grad_norm": 0.5618531703948975, "learning_rate": 0.0001, "loss": 1.6594, "step": 1314 }, { "epoch": 0.3193297717338514, "grad_norm": 0.564096987247467, "learning_rate": 0.0001, "loss": 1.7697, "step": 1315 }, { "epoch": 0.3195726080621661, "grad_norm": 0.5624117851257324, "learning_rate": 0.0001, "loss": 1.7132, "step": 1316 }, { "epoch": 0.3198154443904808, "grad_norm": 0.6024875640869141, "learning_rate": 0.0001, "loss": 1.7829, "step": 1317 }, { "epoch": 0.32005828071879555, "grad_norm": 0.5781903862953186, "learning_rate": 0.0001, "loss": 1.7032, "step": 1318 }, { "epoch": 0.32030111704711023, "grad_norm": 0.5536518096923828, "learning_rate": 0.0001, "loss": 1.7281, "step": 1319 }, { "epoch": 0.32054395337542496, "grad_norm": 0.577639102935791, "learning_rate": 0.0001, "loss": 1.7655, "step": 1320 }, { "epoch": 0.3207867897037397, "grad_norm": 0.5466935038566589, "learning_rate": 0.0001, "loss": 1.6195, "step": 1321 }, { "epoch": 0.32102962603205437, "grad_norm": 0.6217895150184631, "learning_rate": 0.0001, "loss": 1.9077, "step": 1322 }, { "epoch": 0.3212724623603691, "grad_norm": 0.6197120547294617, "learning_rate": 0.0001, "loss": 1.9127, "step": 1323 }, { "epoch": 0.32151529868868384, "grad_norm": 0.5837169289588928, "learning_rate": 0.0001, "loss": 1.714, "step": 1324 }, { "epoch": 0.32175813501699857, "grad_norm": 0.5738924741744995, "learning_rate": 0.0001, "loss": 1.6529, "step": 1325 }, { "epoch": 0.32200097134531325, "grad_norm": 0.5743533968925476, "learning_rate": 0.0001, "loss": 1.6758, "step": 1326 }, { "epoch": 0.322243807673628, "grad_norm": 0.5642489194869995, "learning_rate": 0.0001, "loss": 1.8368, "step": 1327 }, { "epoch": 0.3224866440019427, "grad_norm": 0.5734957456588745, "learning_rate": 0.0001, "loss": 1.607, "step": 1328 }, { "epoch": 0.3227294803302574, "grad_norm": 0.576200008392334, "learning_rate": 0.0001, "loss": 1.8198, "step": 1329 }, { "epoch": 0.3229723166585721, "grad_norm": 0.5713797807693481, "learning_rate": 0.0001, "loss": 1.8013, "step": 1330 }, { "epoch": 0.32321515298688686, "grad_norm": 0.6150364279747009, "learning_rate": 0.0001, "loss": 1.6369, "step": 1331 }, { "epoch": 0.32345798931520153, "grad_norm": 0.5615043640136719, "learning_rate": 0.0001, "loss": 1.6789, "step": 1332 }, { "epoch": 0.32370082564351627, "grad_norm": 0.5571615695953369, "learning_rate": 0.0001, "loss": 1.6595, "step": 1333 }, { "epoch": 0.323943661971831, "grad_norm": 0.5916815996170044, "learning_rate": 0.0001, "loss": 1.8651, "step": 1334 }, { "epoch": 0.3241864983001457, "grad_norm": 0.5488576889038086, "learning_rate": 0.0001, "loss": 1.5881, "step": 1335 }, { "epoch": 0.3244293346284604, "grad_norm": 0.5989965796470642, "learning_rate": 0.0001, "loss": 1.83, "step": 1336 }, { "epoch": 0.32467217095677514, "grad_norm": 0.5402348041534424, "learning_rate": 0.0001, "loss": 1.723, "step": 1337 }, { "epoch": 0.3249150072850899, "grad_norm": 0.5602414608001709, "learning_rate": 0.0001, "loss": 1.7319, "step": 1338 }, { "epoch": 0.32515784361340455, "grad_norm": 0.5434054732322693, "learning_rate": 0.0001, "loss": 1.7123, "step": 1339 }, { "epoch": 0.3254006799417193, "grad_norm": 0.5134353637695312, "learning_rate": 0.0001, "loss": 1.5663, "step": 1340 }, { "epoch": 0.325643516270034, "grad_norm": 0.568906307220459, "learning_rate": 0.0001, "loss": 1.7518, "step": 1341 }, { "epoch": 0.3258863525983487, "grad_norm": 0.5728622078895569, "learning_rate": 0.0001, "loss": 1.6901, "step": 1342 }, { "epoch": 0.3261291889266634, "grad_norm": 0.5689023733139038, "learning_rate": 0.0001, "loss": 1.7392, "step": 1343 }, { "epoch": 0.32637202525497816, "grad_norm": 0.5729114413261414, "learning_rate": 0.0001, "loss": 1.7333, "step": 1344 }, { "epoch": 0.32661486158329284, "grad_norm": 0.5677634477615356, "learning_rate": 0.0001, "loss": 1.7947, "step": 1345 }, { "epoch": 0.32685769791160757, "grad_norm": 0.5566307306289673, "learning_rate": 0.0001, "loss": 1.7063, "step": 1346 }, { "epoch": 0.3271005342399223, "grad_norm": 0.566073477268219, "learning_rate": 0.0001, "loss": 1.7134, "step": 1347 }, { "epoch": 0.32734337056823704, "grad_norm": 0.5132454037666321, "learning_rate": 0.0001, "loss": 1.6655, "step": 1348 }, { "epoch": 0.3275862068965517, "grad_norm": 0.5575150847434998, "learning_rate": 0.0001, "loss": 1.6852, "step": 1349 }, { "epoch": 0.32782904322486645, "grad_norm": 0.5818497538566589, "learning_rate": 0.0001, "loss": 1.6622, "step": 1350 }, { "epoch": 0.3280718795531812, "grad_norm": 0.5753150582313538, "learning_rate": 0.0001, "loss": 1.7942, "step": 1351 }, { "epoch": 0.32831471588149586, "grad_norm": 0.5964739918708801, "learning_rate": 0.0001, "loss": 1.9348, "step": 1352 }, { "epoch": 0.3285575522098106, "grad_norm": 0.5620943307876587, "learning_rate": 0.0001, "loss": 1.5684, "step": 1353 }, { "epoch": 0.3288003885381253, "grad_norm": 0.5440359711647034, "learning_rate": 0.0001, "loss": 1.7556, "step": 1354 }, { "epoch": 0.32904322486644, "grad_norm": 0.5927224159240723, "learning_rate": 0.0001, "loss": 1.7276, "step": 1355 }, { "epoch": 0.32928606119475473, "grad_norm": 0.5532374382019043, "learning_rate": 0.0001, "loss": 1.6874, "step": 1356 }, { "epoch": 0.32952889752306946, "grad_norm": 0.5345386266708374, "learning_rate": 0.0001, "loss": 1.5555, "step": 1357 }, { "epoch": 0.32977173385138414, "grad_norm": 0.5551013350486755, "learning_rate": 0.0001, "loss": 1.7162, "step": 1358 }, { "epoch": 0.3300145701796989, "grad_norm": 0.5722214579582214, "learning_rate": 0.0001, "loss": 1.8486, "step": 1359 }, { "epoch": 0.3302574065080136, "grad_norm": 0.5920416712760925, "learning_rate": 0.0001, "loss": 1.8309, "step": 1360 }, { "epoch": 0.33050024283632834, "grad_norm": 0.5732881426811218, "learning_rate": 0.0001, "loss": 1.8577, "step": 1361 }, { "epoch": 0.330743079164643, "grad_norm": 0.5619862675666809, "learning_rate": 0.0001, "loss": 1.8305, "step": 1362 }, { "epoch": 0.33098591549295775, "grad_norm": 0.6099917888641357, "learning_rate": 0.0001, "loss": 1.8949, "step": 1363 }, { "epoch": 0.3312287518212725, "grad_norm": 0.5454998016357422, "learning_rate": 0.0001, "loss": 1.6676, "step": 1364 }, { "epoch": 0.33147158814958716, "grad_norm": 0.5881146192550659, "learning_rate": 0.0001, "loss": 2.0026, "step": 1365 }, { "epoch": 0.3317144244779019, "grad_norm": 0.5716860294342041, "learning_rate": 0.0001, "loss": 1.7442, "step": 1366 }, { "epoch": 0.3319572608062166, "grad_norm": 0.5615752339363098, "learning_rate": 0.0001, "loss": 1.7291, "step": 1367 }, { "epoch": 0.3322000971345313, "grad_norm": 0.5943424105644226, "learning_rate": 0.0001, "loss": 1.9471, "step": 1368 }, { "epoch": 0.33244293346284604, "grad_norm": 0.5808683633804321, "learning_rate": 0.0001, "loss": 1.7641, "step": 1369 }, { "epoch": 0.33268576979116077, "grad_norm": 0.5758082270622253, "learning_rate": 0.0001, "loss": 1.7018, "step": 1370 }, { "epoch": 0.3329286061194755, "grad_norm": 0.5595842599868774, "learning_rate": 0.0001, "loss": 1.7815, "step": 1371 }, { "epoch": 0.3331714424477902, "grad_norm": 0.5441301465034485, "learning_rate": 0.0001, "loss": 1.6535, "step": 1372 }, { "epoch": 0.3334142787761049, "grad_norm": 0.6101175546646118, "learning_rate": 0.0001, "loss": 1.7773, "step": 1373 }, { "epoch": 0.33365711510441964, "grad_norm": 0.5867419838905334, "learning_rate": 0.0001, "loss": 1.8394, "step": 1374 }, { "epoch": 0.3338999514327343, "grad_norm": 0.58198481798172, "learning_rate": 0.0001, "loss": 1.6055, "step": 1375 }, { "epoch": 0.33414278776104905, "grad_norm": 0.5561076998710632, "learning_rate": 0.0001, "loss": 1.7582, "step": 1376 }, { "epoch": 0.3343856240893638, "grad_norm": 0.5937936902046204, "learning_rate": 0.0001, "loss": 1.6924, "step": 1377 }, { "epoch": 0.33462846041767846, "grad_norm": 0.6016126275062561, "learning_rate": 0.0001, "loss": 1.7945, "step": 1378 }, { "epoch": 0.3348712967459932, "grad_norm": 0.6054165959358215, "learning_rate": 0.0001, "loss": 1.6885, "step": 1379 }, { "epoch": 0.33511413307430793, "grad_norm": 0.6050477623939514, "learning_rate": 0.0001, "loss": 1.8576, "step": 1380 }, { "epoch": 0.3353569694026226, "grad_norm": 0.5650911331176758, "learning_rate": 0.0001, "loss": 1.7675, "step": 1381 }, { "epoch": 0.33559980573093734, "grad_norm": 0.5832107663154602, "learning_rate": 0.0001, "loss": 1.8276, "step": 1382 }, { "epoch": 0.3358426420592521, "grad_norm": 0.5333999991416931, "learning_rate": 0.0001, "loss": 1.6666, "step": 1383 }, { "epoch": 0.3360854783875668, "grad_norm": 0.5361559391021729, "learning_rate": 0.0001, "loss": 1.7057, "step": 1384 }, { "epoch": 0.3363283147158815, "grad_norm": 0.5865080952644348, "learning_rate": 0.0001, "loss": 1.739, "step": 1385 }, { "epoch": 0.3365711510441962, "grad_norm": 0.5516999959945679, "learning_rate": 0.0001, "loss": 1.7335, "step": 1386 }, { "epoch": 0.33681398737251095, "grad_norm": 0.5868236422538757, "learning_rate": 0.0001, "loss": 1.6737, "step": 1387 }, { "epoch": 0.3370568237008256, "grad_norm": 0.5723714828491211, "learning_rate": 0.0001, "loss": 1.6533, "step": 1388 }, { "epoch": 0.33729966002914036, "grad_norm": 0.6132412552833557, "learning_rate": 0.0001, "loss": 1.8463, "step": 1389 }, { "epoch": 0.3375424963574551, "grad_norm": 0.5300971865653992, "learning_rate": 0.0001, "loss": 1.6839, "step": 1390 }, { "epoch": 0.33778533268576977, "grad_norm": 0.5721192955970764, "learning_rate": 0.0001, "loss": 1.7578, "step": 1391 }, { "epoch": 0.3380281690140845, "grad_norm": 0.522925615310669, "learning_rate": 0.0001, "loss": 1.6666, "step": 1392 }, { "epoch": 0.33827100534239923, "grad_norm": 0.5766683220863342, "learning_rate": 0.0001, "loss": 1.7946, "step": 1393 }, { "epoch": 0.33851384167071397, "grad_norm": 0.5427753329277039, "learning_rate": 0.0001, "loss": 1.7914, "step": 1394 }, { "epoch": 0.33875667799902864, "grad_norm": 0.5608222484588623, "learning_rate": 0.0001, "loss": 1.7307, "step": 1395 }, { "epoch": 0.3389995143273434, "grad_norm": 0.5690093040466309, "learning_rate": 0.0001, "loss": 1.5334, "step": 1396 }, { "epoch": 0.3392423506556581, "grad_norm": 0.5487404465675354, "learning_rate": 0.0001, "loss": 1.721, "step": 1397 }, { "epoch": 0.3394851869839728, "grad_norm": 0.6467126607894897, "learning_rate": 0.0001, "loss": 1.8987, "step": 1398 }, { "epoch": 0.3397280233122875, "grad_norm": 0.5495607852935791, "learning_rate": 0.0001, "loss": 1.6867, "step": 1399 }, { "epoch": 0.33997085964060225, "grad_norm": 0.5042486786842346, "learning_rate": 0.0001, "loss": 1.5711, "step": 1400 }, { "epoch": 0.34021369596891693, "grad_norm": 0.5387740731239319, "learning_rate": 0.0001, "loss": 1.6545, "step": 1401 }, { "epoch": 0.34045653229723166, "grad_norm": 0.5586868524551392, "learning_rate": 0.0001, "loss": 1.7908, "step": 1402 }, { "epoch": 0.3406993686255464, "grad_norm": 0.5448033809661865, "learning_rate": 0.0001, "loss": 1.8032, "step": 1403 }, { "epoch": 0.34094220495386107, "grad_norm": 0.5725580453872681, "learning_rate": 0.0001, "loss": 1.6785, "step": 1404 }, { "epoch": 0.3411850412821758, "grad_norm": 0.5539155006408691, "learning_rate": 0.0001, "loss": 1.829, "step": 1405 }, { "epoch": 0.34142787761049054, "grad_norm": 0.5847489237785339, "learning_rate": 0.0001, "loss": 1.6599, "step": 1406 }, { "epoch": 0.34167071393880527, "grad_norm": 0.6161686778068542, "learning_rate": 0.0001, "loss": 1.8224, "step": 1407 }, { "epoch": 0.34191355026711995, "grad_norm": 0.5436552166938782, "learning_rate": 0.0001, "loss": 1.5839, "step": 1408 }, { "epoch": 0.3421563865954347, "grad_norm": 0.5373216867446899, "learning_rate": 0.0001, "loss": 1.5821, "step": 1409 }, { "epoch": 0.3423992229237494, "grad_norm": 0.5849260091781616, "learning_rate": 0.0001, "loss": 1.7975, "step": 1410 }, { "epoch": 0.3426420592520641, "grad_norm": 0.5963196754455566, "learning_rate": 0.0001, "loss": 1.8656, "step": 1411 }, { "epoch": 0.3428848955803788, "grad_norm": 0.5400733947753906, "learning_rate": 0.0001, "loss": 1.7551, "step": 1412 }, { "epoch": 0.34312773190869356, "grad_norm": 0.5988975167274475, "learning_rate": 0.0001, "loss": 1.8325, "step": 1413 }, { "epoch": 0.34337056823700823, "grad_norm": 0.5726484060287476, "learning_rate": 0.0001, "loss": 1.8216, "step": 1414 }, { "epoch": 0.34361340456532297, "grad_norm": 0.5422387719154358, "learning_rate": 0.0001, "loss": 1.7552, "step": 1415 }, { "epoch": 0.3438562408936377, "grad_norm": 0.5612909197807312, "learning_rate": 0.0001, "loss": 1.7423, "step": 1416 }, { "epoch": 0.3440990772219524, "grad_norm": 0.5863240957260132, "learning_rate": 0.0001, "loss": 1.7278, "step": 1417 }, { "epoch": 0.3443419135502671, "grad_norm": 0.5583364367485046, "learning_rate": 0.0001, "loss": 1.7598, "step": 1418 }, { "epoch": 0.34458474987858184, "grad_norm": 0.5578181147575378, "learning_rate": 0.0001, "loss": 1.7413, "step": 1419 }, { "epoch": 0.3448275862068966, "grad_norm": 0.5600872039794922, "learning_rate": 0.0001, "loss": 1.7814, "step": 1420 }, { "epoch": 0.34507042253521125, "grad_norm": 0.5490904450416565, "learning_rate": 0.0001, "loss": 1.4549, "step": 1421 }, { "epoch": 0.345313258863526, "grad_norm": 0.613351583480835, "learning_rate": 0.0001, "loss": 1.787, "step": 1422 }, { "epoch": 0.3455560951918407, "grad_norm": 0.5493449568748474, "learning_rate": 0.0001, "loss": 1.8616, "step": 1423 }, { "epoch": 0.3457989315201554, "grad_norm": 0.6109333038330078, "learning_rate": 0.0001, "loss": 1.7587, "step": 1424 }, { "epoch": 0.3460417678484701, "grad_norm": 0.5629788041114807, "learning_rate": 0.0001, "loss": 1.8761, "step": 1425 }, { "epoch": 0.34628460417678486, "grad_norm": 0.5619919300079346, "learning_rate": 0.0001, "loss": 1.7521, "step": 1426 }, { "epoch": 0.34652744050509954, "grad_norm": 0.5841615200042725, "learning_rate": 0.0001, "loss": 1.6338, "step": 1427 }, { "epoch": 0.34677027683341427, "grad_norm": 0.5627974271774292, "learning_rate": 0.0001, "loss": 1.7341, "step": 1428 }, { "epoch": 0.347013113161729, "grad_norm": 0.5274537801742554, "learning_rate": 0.0001, "loss": 1.5161, "step": 1429 }, { "epoch": 0.34725594949004374, "grad_norm": 0.5390434861183167, "learning_rate": 0.0001, "loss": 1.7198, "step": 1430 }, { "epoch": 0.3474987858183584, "grad_norm": 0.5722873210906982, "learning_rate": 0.0001, "loss": 1.7464, "step": 1431 }, { "epoch": 0.34774162214667315, "grad_norm": 0.5416739583015442, "learning_rate": 0.0001, "loss": 1.7131, "step": 1432 }, { "epoch": 0.3479844584749879, "grad_norm": 0.5875145196914673, "learning_rate": 0.0001, "loss": 1.6204, "step": 1433 }, { "epoch": 0.34822729480330256, "grad_norm": 0.563105583190918, "learning_rate": 0.0001, "loss": 1.747, "step": 1434 }, { "epoch": 0.3484701311316173, "grad_norm": 0.5475865006446838, "learning_rate": 0.0001, "loss": 1.5961, "step": 1435 }, { "epoch": 0.348712967459932, "grad_norm": 0.5591480135917664, "learning_rate": 0.0001, "loss": 1.7393, "step": 1436 }, { "epoch": 0.3489558037882467, "grad_norm": 0.6027525663375854, "learning_rate": 0.0001, "loss": 1.6588, "step": 1437 }, { "epoch": 0.34919864011656143, "grad_norm": 0.5859847068786621, "learning_rate": 0.0001, "loss": 1.7547, "step": 1438 }, { "epoch": 0.34944147644487616, "grad_norm": 0.6301659941673279, "learning_rate": 0.0001, "loss": 1.5937, "step": 1439 }, { "epoch": 0.34968431277319084, "grad_norm": 0.5705239772796631, "learning_rate": 0.0001, "loss": 1.7873, "step": 1440 }, { "epoch": 0.3499271491015056, "grad_norm": 0.5834831595420837, "learning_rate": 0.0001, "loss": 1.6534, "step": 1441 }, { "epoch": 0.3501699854298203, "grad_norm": 0.6054890751838684, "learning_rate": 0.0001, "loss": 1.6058, "step": 1442 }, { "epoch": 0.35041282175813504, "grad_norm": 0.5621503591537476, "learning_rate": 0.0001, "loss": 1.747, "step": 1443 }, { "epoch": 0.3506556580864497, "grad_norm": 0.5485848784446716, "learning_rate": 0.0001, "loss": 1.7866, "step": 1444 }, { "epoch": 0.35089849441476445, "grad_norm": 0.571904718875885, "learning_rate": 0.0001, "loss": 1.7252, "step": 1445 }, { "epoch": 0.3511413307430792, "grad_norm": 0.5643053650856018, "learning_rate": 0.0001, "loss": 1.8232, "step": 1446 }, { "epoch": 0.35138416707139386, "grad_norm": 0.5391576290130615, "learning_rate": 0.0001, "loss": 1.6155, "step": 1447 }, { "epoch": 0.3516270033997086, "grad_norm": 0.5638372898101807, "learning_rate": 0.0001, "loss": 1.7338, "step": 1448 }, { "epoch": 0.3518698397280233, "grad_norm": 0.5746908187866211, "learning_rate": 0.0001, "loss": 1.6935, "step": 1449 }, { "epoch": 0.352112676056338, "grad_norm": 0.5585685968399048, "learning_rate": 0.0001, "loss": 1.7004, "step": 1450 }, { "epoch": 0.35235551238465274, "grad_norm": 0.5530781149864197, "learning_rate": 0.0001, "loss": 1.6715, "step": 1451 }, { "epoch": 0.35259834871296747, "grad_norm": 0.5826124548912048, "learning_rate": 0.0001, "loss": 1.7969, "step": 1452 }, { "epoch": 0.3528411850412822, "grad_norm": 0.553278386592865, "learning_rate": 0.0001, "loss": 1.7855, "step": 1453 }, { "epoch": 0.3530840213695969, "grad_norm": 0.5775020718574524, "learning_rate": 0.0001, "loss": 1.7579, "step": 1454 }, { "epoch": 0.3533268576979116, "grad_norm": 0.581123948097229, "learning_rate": 0.0001, "loss": 1.8847, "step": 1455 }, { "epoch": 0.35356969402622634, "grad_norm": 0.5525007247924805, "learning_rate": 0.0001, "loss": 1.7812, "step": 1456 }, { "epoch": 0.353812530354541, "grad_norm": 0.5545541048049927, "learning_rate": 0.0001, "loss": 1.7563, "step": 1457 }, { "epoch": 0.35405536668285575, "grad_norm": 0.5880481004714966, "learning_rate": 0.0001, "loss": 1.8606, "step": 1458 }, { "epoch": 0.3542982030111705, "grad_norm": 0.5368393063545227, "learning_rate": 0.0001, "loss": 1.7209, "step": 1459 }, { "epoch": 0.35454103933948516, "grad_norm": 0.541566014289856, "learning_rate": 0.0001, "loss": 1.7145, "step": 1460 }, { "epoch": 0.3547838756677999, "grad_norm": 0.5559922456741333, "learning_rate": 0.0001, "loss": 1.6841, "step": 1461 }, { "epoch": 0.35502671199611463, "grad_norm": 0.5847586393356323, "learning_rate": 0.0001, "loss": 1.6755, "step": 1462 }, { "epoch": 0.3552695483244293, "grad_norm": 0.5786769390106201, "learning_rate": 0.0001, "loss": 1.8127, "step": 1463 }, { "epoch": 0.35551238465274404, "grad_norm": 0.5552936792373657, "learning_rate": 0.0001, "loss": 1.6745, "step": 1464 }, { "epoch": 0.3557552209810588, "grad_norm": 0.570335865020752, "learning_rate": 0.0001, "loss": 1.734, "step": 1465 }, { "epoch": 0.3559980573093735, "grad_norm": 0.5759338736534119, "learning_rate": 0.0001, "loss": 1.7253, "step": 1466 }, { "epoch": 0.3562408936376882, "grad_norm": 0.5608747601509094, "learning_rate": 0.0001, "loss": 1.7129, "step": 1467 }, { "epoch": 0.3564837299660029, "grad_norm": 0.6293925046920776, "learning_rate": 0.0001, "loss": 1.8531, "step": 1468 }, { "epoch": 0.35672656629431765, "grad_norm": 0.5296218991279602, "learning_rate": 0.0001, "loss": 1.6025, "step": 1469 }, { "epoch": 0.3569694026226323, "grad_norm": 0.5386657118797302, "learning_rate": 0.0001, "loss": 1.5579, "step": 1470 }, { "epoch": 0.35721223895094706, "grad_norm": 0.5375521779060364, "learning_rate": 0.0001, "loss": 1.7462, "step": 1471 }, { "epoch": 0.3574550752792618, "grad_norm": 0.5659185647964478, "learning_rate": 0.0001, "loss": 1.8617, "step": 1472 }, { "epoch": 0.35769791160757647, "grad_norm": 0.589073121547699, "learning_rate": 0.0001, "loss": 1.9014, "step": 1473 }, { "epoch": 0.3579407479358912, "grad_norm": 0.5113967061042786, "learning_rate": 0.0001, "loss": 1.6166, "step": 1474 }, { "epoch": 0.35818358426420593, "grad_norm": 0.5525660514831543, "learning_rate": 0.0001, "loss": 1.7119, "step": 1475 }, { "epoch": 0.35842642059252067, "grad_norm": 0.5673325657844543, "learning_rate": 0.0001, "loss": 1.733, "step": 1476 }, { "epoch": 0.35866925692083534, "grad_norm": 0.5445988178253174, "learning_rate": 0.0001, "loss": 1.6624, "step": 1477 }, { "epoch": 0.3589120932491501, "grad_norm": 0.5666174292564392, "learning_rate": 0.0001, "loss": 1.7183, "step": 1478 }, { "epoch": 0.3591549295774648, "grad_norm": 0.5427615642547607, "learning_rate": 0.0001, "loss": 1.7133, "step": 1479 }, { "epoch": 0.3593977659057795, "grad_norm": 0.5578546524047852, "learning_rate": 0.0001, "loss": 1.7084, "step": 1480 }, { "epoch": 0.3596406022340942, "grad_norm": 0.5783149003982544, "learning_rate": 0.0001, "loss": 1.803, "step": 1481 }, { "epoch": 0.35988343856240895, "grad_norm": 0.5721765756607056, "learning_rate": 0.0001, "loss": 1.8746, "step": 1482 }, { "epoch": 0.36012627489072363, "grad_norm": 0.5949157476425171, "learning_rate": 0.0001, "loss": 1.8768, "step": 1483 }, { "epoch": 0.36036911121903836, "grad_norm": 0.5486117601394653, "learning_rate": 0.0001, "loss": 1.7638, "step": 1484 }, { "epoch": 0.3606119475473531, "grad_norm": 0.5355470180511475, "learning_rate": 0.0001, "loss": 1.6266, "step": 1485 }, { "epoch": 0.3608547838756678, "grad_norm": 0.5477654337882996, "learning_rate": 0.0001, "loss": 1.879, "step": 1486 }, { "epoch": 0.3610976202039825, "grad_norm": 0.5449265241622925, "learning_rate": 0.0001, "loss": 1.7839, "step": 1487 }, { "epoch": 0.36134045653229724, "grad_norm": 0.5875566601753235, "learning_rate": 0.0001, "loss": 1.8003, "step": 1488 }, { "epoch": 0.36158329286061197, "grad_norm": 0.596920371055603, "learning_rate": 0.0001, "loss": 1.8118, "step": 1489 }, { "epoch": 0.36182612918892665, "grad_norm": 0.5583705306053162, "learning_rate": 0.0001, "loss": 1.7894, "step": 1490 }, { "epoch": 0.3620689655172414, "grad_norm": 0.5918691754341125, "learning_rate": 0.0001, "loss": 1.8412, "step": 1491 }, { "epoch": 0.3623118018455561, "grad_norm": 0.5425227284431458, "learning_rate": 0.0001, "loss": 1.6719, "step": 1492 }, { "epoch": 0.3625546381738708, "grad_norm": 0.56281578540802, "learning_rate": 0.0001, "loss": 1.6218, "step": 1493 }, { "epoch": 0.3627974745021855, "grad_norm": 0.5747039318084717, "learning_rate": 0.0001, "loss": 1.7746, "step": 1494 }, { "epoch": 0.36304031083050026, "grad_norm": 0.539703905582428, "learning_rate": 0.0001, "loss": 1.6846, "step": 1495 }, { "epoch": 0.36328314715881493, "grad_norm": 0.5434957146644592, "learning_rate": 0.0001, "loss": 1.6731, "step": 1496 }, { "epoch": 0.36352598348712967, "grad_norm": 0.5798366069793701, "learning_rate": 0.0001, "loss": 1.7758, "step": 1497 }, { "epoch": 0.3637688198154444, "grad_norm": 0.5288923382759094, "learning_rate": 0.0001, "loss": 1.6922, "step": 1498 }, { "epoch": 0.36401165614375913, "grad_norm": 0.5663448572158813, "learning_rate": 0.0001, "loss": 1.7136, "step": 1499 }, { "epoch": 0.3642544924720738, "grad_norm": 0.5524390339851379, "learning_rate": 0.0001, "loss": 1.5974, "step": 1500 }, { "epoch": 0.36449732880038854, "grad_norm": 0.6314302086830139, "learning_rate": 0.0001, "loss": 1.8825, "step": 1501 }, { "epoch": 0.3647401651287033, "grad_norm": 0.5598084330558777, "learning_rate": 0.0001, "loss": 1.7589, "step": 1502 }, { "epoch": 0.36498300145701795, "grad_norm": 0.5555556416511536, "learning_rate": 0.0001, "loss": 1.6338, "step": 1503 }, { "epoch": 0.3652258377853327, "grad_norm": 0.552541971206665, "learning_rate": 0.0001, "loss": 1.6995, "step": 1504 }, { "epoch": 0.3654686741136474, "grad_norm": 0.5386813879013062, "learning_rate": 0.0001, "loss": 1.5861, "step": 1505 }, { "epoch": 0.3657115104419621, "grad_norm": 0.5780212879180908, "learning_rate": 0.0001, "loss": 1.7706, "step": 1506 }, { "epoch": 0.36595434677027683, "grad_norm": 0.5641130805015564, "learning_rate": 0.0001, "loss": 1.7096, "step": 1507 }, { "epoch": 0.36619718309859156, "grad_norm": 0.5723060965538025, "learning_rate": 0.0001, "loss": 1.8688, "step": 1508 }, { "epoch": 0.36644001942690624, "grad_norm": 0.5916156768798828, "learning_rate": 0.0001, "loss": 1.6937, "step": 1509 }, { "epoch": 0.36668285575522097, "grad_norm": 0.5428485870361328, "learning_rate": 0.0001, "loss": 1.5803, "step": 1510 }, { "epoch": 0.3669256920835357, "grad_norm": 0.5606679916381836, "learning_rate": 0.0001, "loss": 1.8716, "step": 1511 }, { "epoch": 0.36716852841185044, "grad_norm": 0.5421441793441772, "learning_rate": 0.0001, "loss": 1.5866, "step": 1512 }, { "epoch": 0.3674113647401651, "grad_norm": 0.5790001153945923, "learning_rate": 0.0001, "loss": 1.7519, "step": 1513 }, { "epoch": 0.36765420106847985, "grad_norm": 0.5827850103378296, "learning_rate": 0.0001, "loss": 1.8111, "step": 1514 }, { "epoch": 0.3678970373967946, "grad_norm": 0.5159997940063477, "learning_rate": 0.0001, "loss": 1.5164, "step": 1515 }, { "epoch": 0.36813987372510926, "grad_norm": 0.5459283590316772, "learning_rate": 0.0001, "loss": 1.77, "step": 1516 }, { "epoch": 0.368382710053424, "grad_norm": 0.5678669810295105, "learning_rate": 0.0001, "loss": 1.5537, "step": 1517 }, { "epoch": 0.3686255463817387, "grad_norm": 0.6005099415779114, "learning_rate": 0.0001, "loss": 1.6727, "step": 1518 }, { "epoch": 0.3688683827100534, "grad_norm": 0.553765058517456, "learning_rate": 0.0001, "loss": 1.6523, "step": 1519 }, { "epoch": 0.36911121903836813, "grad_norm": 0.5875484347343445, "learning_rate": 0.0001, "loss": 1.8644, "step": 1520 }, { "epoch": 0.36935405536668287, "grad_norm": 0.5853337049484253, "learning_rate": 0.0001, "loss": 1.836, "step": 1521 }, { "epoch": 0.3695968916949976, "grad_norm": 0.5599927306175232, "learning_rate": 0.0001, "loss": 1.773, "step": 1522 }, { "epoch": 0.3698397280233123, "grad_norm": 0.5492868423461914, "learning_rate": 0.0001, "loss": 1.538, "step": 1523 }, { "epoch": 0.370082564351627, "grad_norm": 0.5999908447265625, "learning_rate": 0.0001, "loss": 1.773, "step": 1524 }, { "epoch": 0.37032540067994174, "grad_norm": 0.5405163168907166, "learning_rate": 0.0001, "loss": 1.7824, "step": 1525 }, { "epoch": 0.3705682370082564, "grad_norm": 0.5858007669448853, "learning_rate": 0.0001, "loss": 1.7849, "step": 1526 }, { "epoch": 0.37081107333657115, "grad_norm": 0.566652238368988, "learning_rate": 0.0001, "loss": 1.8518, "step": 1527 }, { "epoch": 0.3710539096648859, "grad_norm": 0.5848403573036194, "learning_rate": 0.0001, "loss": 1.7684, "step": 1528 }, { "epoch": 0.37129674599320056, "grad_norm": 0.533105194568634, "learning_rate": 0.0001, "loss": 1.6261, "step": 1529 }, { "epoch": 0.3715395823215153, "grad_norm": 0.5535341501235962, "learning_rate": 0.0001, "loss": 1.6744, "step": 1530 }, { "epoch": 0.37178241864983, "grad_norm": 0.5954918265342712, "learning_rate": 0.0001, "loss": 1.8216, "step": 1531 }, { "epoch": 0.3720252549781447, "grad_norm": 0.5533576607704163, "learning_rate": 0.0001, "loss": 1.753, "step": 1532 }, { "epoch": 0.37226809130645944, "grad_norm": 0.5351048707962036, "learning_rate": 0.0001, "loss": 1.7262, "step": 1533 }, { "epoch": 0.37251092763477417, "grad_norm": 0.5829824209213257, "learning_rate": 0.0001, "loss": 1.7375, "step": 1534 }, { "epoch": 0.3727537639630889, "grad_norm": 0.6141155362129211, "learning_rate": 0.0001, "loss": 1.753, "step": 1535 }, { "epoch": 0.3729966002914036, "grad_norm": 0.5319721698760986, "learning_rate": 0.0001, "loss": 1.6866, "step": 1536 }, { "epoch": 0.3732394366197183, "grad_norm": 0.5934935808181763, "learning_rate": 0.0001, "loss": 1.707, "step": 1537 }, { "epoch": 0.37348227294803304, "grad_norm": 0.5703699588775635, "learning_rate": 0.0001, "loss": 1.6895, "step": 1538 }, { "epoch": 0.3737251092763477, "grad_norm": 0.5700774192810059, "learning_rate": 0.0001, "loss": 1.6917, "step": 1539 }, { "epoch": 0.37396794560466246, "grad_norm": 0.6150509119033813, "learning_rate": 0.0001, "loss": 1.8295, "step": 1540 }, { "epoch": 0.3742107819329772, "grad_norm": 0.570936381816864, "learning_rate": 0.0001, "loss": 1.8789, "step": 1541 }, { "epoch": 0.37445361826129187, "grad_norm": 0.5791286826133728, "learning_rate": 0.0001, "loss": 1.73, "step": 1542 }, { "epoch": 0.3746964545896066, "grad_norm": 0.5663446187973022, "learning_rate": 0.0001, "loss": 1.7994, "step": 1543 }, { "epoch": 0.37493929091792133, "grad_norm": 0.5743141770362854, "learning_rate": 0.0001, "loss": 1.7445, "step": 1544 }, { "epoch": 0.37518212724623606, "grad_norm": 0.5314855575561523, "learning_rate": 0.0001, "loss": 1.7352, "step": 1545 }, { "epoch": 0.37542496357455074, "grad_norm": 0.6015771627426147, "learning_rate": 0.0001, "loss": 1.7693, "step": 1546 }, { "epoch": 0.3756677999028655, "grad_norm": 0.5668714642524719, "learning_rate": 0.0001, "loss": 1.7679, "step": 1547 }, { "epoch": 0.3759106362311802, "grad_norm": 0.5673467516899109, "learning_rate": 0.0001, "loss": 1.7218, "step": 1548 }, { "epoch": 0.3761534725594949, "grad_norm": 0.5929018259048462, "learning_rate": 0.0001, "loss": 1.8452, "step": 1549 }, { "epoch": 0.3763963088878096, "grad_norm": 0.5479089021682739, "learning_rate": 0.0001, "loss": 1.656, "step": 1550 }, { "epoch": 0.37663914521612435, "grad_norm": 0.5798628926277161, "learning_rate": 0.0001, "loss": 1.6719, "step": 1551 }, { "epoch": 0.376881981544439, "grad_norm": 0.5535653829574585, "learning_rate": 0.0001, "loss": 1.6747, "step": 1552 }, { "epoch": 0.37712481787275376, "grad_norm": 0.540377140045166, "learning_rate": 0.0001, "loss": 1.6866, "step": 1553 }, { "epoch": 0.3773676542010685, "grad_norm": 0.5681852698326111, "learning_rate": 0.0001, "loss": 1.6637, "step": 1554 }, { "epoch": 0.37761049052938317, "grad_norm": 0.5816860795021057, "learning_rate": 0.0001, "loss": 1.7122, "step": 1555 }, { "epoch": 0.3778533268576979, "grad_norm": 0.5638381838798523, "learning_rate": 0.0001, "loss": 1.8003, "step": 1556 }, { "epoch": 0.37809616318601263, "grad_norm": 0.5493277907371521, "learning_rate": 0.0001, "loss": 1.7024, "step": 1557 }, { "epoch": 0.37833899951432737, "grad_norm": 0.6079229712486267, "learning_rate": 0.0001, "loss": 1.7679, "step": 1558 }, { "epoch": 0.37858183584264204, "grad_norm": 0.5536306500434875, "learning_rate": 0.0001, "loss": 1.7327, "step": 1559 }, { "epoch": 0.3788246721709568, "grad_norm": 0.6091067790985107, "learning_rate": 0.0001, "loss": 1.7779, "step": 1560 }, { "epoch": 0.3790675084992715, "grad_norm": 0.5523210167884827, "learning_rate": 0.0001, "loss": 1.7955, "step": 1561 }, { "epoch": 0.3793103448275862, "grad_norm": 0.5820581316947937, "learning_rate": 0.0001, "loss": 1.8316, "step": 1562 }, { "epoch": 0.3795531811559009, "grad_norm": 0.5861195921897888, "learning_rate": 0.0001, "loss": 1.7992, "step": 1563 }, { "epoch": 0.37979601748421565, "grad_norm": 0.6205547451972961, "learning_rate": 0.0001, "loss": 1.7119, "step": 1564 }, { "epoch": 0.38003885381253033, "grad_norm": 0.5986005663871765, "learning_rate": 0.0001, "loss": 1.722, "step": 1565 }, { "epoch": 0.38028169014084506, "grad_norm": 0.5632820129394531, "learning_rate": 0.0001, "loss": 1.8611, "step": 1566 }, { "epoch": 0.3805245264691598, "grad_norm": 0.5678552985191345, "learning_rate": 0.0001, "loss": 1.7909, "step": 1567 }, { "epoch": 0.38076736279747453, "grad_norm": 0.5708531141281128, "learning_rate": 0.0001, "loss": 1.8951, "step": 1568 }, { "epoch": 0.3810101991257892, "grad_norm": 0.5985879302024841, "learning_rate": 0.0001, "loss": 1.7499, "step": 1569 }, { "epoch": 0.38125303545410394, "grad_norm": 0.546149492263794, "learning_rate": 0.0001, "loss": 1.5791, "step": 1570 }, { "epoch": 0.38149587178241867, "grad_norm": 0.5527278780937195, "learning_rate": 0.0001, "loss": 1.8572, "step": 1571 }, { "epoch": 0.38173870811073335, "grad_norm": 0.5659234523773193, "learning_rate": 0.0001, "loss": 1.752, "step": 1572 }, { "epoch": 0.3819815444390481, "grad_norm": 0.5430263876914978, "learning_rate": 0.0001, "loss": 1.6348, "step": 1573 }, { "epoch": 0.3822243807673628, "grad_norm": 0.5225880146026611, "learning_rate": 0.0001, "loss": 1.5685, "step": 1574 }, { "epoch": 0.3824672170956775, "grad_norm": 0.5456332564353943, "learning_rate": 0.0001, "loss": 1.707, "step": 1575 }, { "epoch": 0.3827100534239922, "grad_norm": 0.5807480216026306, "learning_rate": 0.0001, "loss": 1.8378, "step": 1576 }, { "epoch": 0.38295288975230696, "grad_norm": 0.5756118297576904, "learning_rate": 0.0001, "loss": 1.8539, "step": 1577 }, { "epoch": 0.38319572608062163, "grad_norm": 0.6021025776863098, "learning_rate": 0.0001, "loss": 1.728, "step": 1578 }, { "epoch": 0.38343856240893637, "grad_norm": 0.5744970440864563, "learning_rate": 0.0001, "loss": 1.7228, "step": 1579 }, { "epoch": 0.3836813987372511, "grad_norm": 0.5658952593803406, "learning_rate": 0.0001, "loss": 1.7816, "step": 1580 }, { "epoch": 0.38392423506556583, "grad_norm": 0.5663431882858276, "learning_rate": 0.0001, "loss": 1.7463, "step": 1581 }, { "epoch": 0.3841670713938805, "grad_norm": 0.5760250091552734, "learning_rate": 0.0001, "loss": 1.879, "step": 1582 }, { "epoch": 0.38440990772219524, "grad_norm": 0.5781668424606323, "learning_rate": 0.0001, "loss": 1.7136, "step": 1583 }, { "epoch": 0.38465274405051, "grad_norm": 0.5917073488235474, "learning_rate": 0.0001, "loss": 1.758, "step": 1584 }, { "epoch": 0.38489558037882465, "grad_norm": 0.6140726208686829, "learning_rate": 0.0001, "loss": 1.8021, "step": 1585 }, { "epoch": 0.3851384167071394, "grad_norm": 0.5443862080574036, "learning_rate": 0.0001, "loss": 1.7591, "step": 1586 }, { "epoch": 0.3853812530354541, "grad_norm": 0.5863355398178101, "learning_rate": 0.0001, "loss": 1.6797, "step": 1587 }, { "epoch": 0.3856240893637688, "grad_norm": 0.5166279673576355, "learning_rate": 0.0001, "loss": 1.5639, "step": 1588 }, { "epoch": 0.38586692569208353, "grad_norm": 0.5556029081344604, "learning_rate": 0.0001, "loss": 1.8708, "step": 1589 }, { "epoch": 0.38610976202039826, "grad_norm": 0.5802101492881775, "learning_rate": 0.0001, "loss": 1.8732, "step": 1590 }, { "epoch": 0.386352598348713, "grad_norm": 0.514442503452301, "learning_rate": 0.0001, "loss": 1.5429, "step": 1591 }, { "epoch": 0.38659543467702767, "grad_norm": 0.5633143782615662, "learning_rate": 0.0001, "loss": 1.7854, "step": 1592 }, { "epoch": 0.3868382710053424, "grad_norm": 0.5631378889083862, "learning_rate": 0.0001, "loss": 1.6847, "step": 1593 }, { "epoch": 0.38708110733365714, "grad_norm": 0.5619434714317322, "learning_rate": 0.0001, "loss": 1.7912, "step": 1594 }, { "epoch": 0.3873239436619718, "grad_norm": 0.5826120972633362, "learning_rate": 0.0001, "loss": 1.7494, "step": 1595 }, { "epoch": 0.38756677999028655, "grad_norm": 0.5944365859031677, "learning_rate": 0.0001, "loss": 1.7426, "step": 1596 }, { "epoch": 0.3878096163186013, "grad_norm": 0.5516484379768372, "learning_rate": 0.0001, "loss": 1.734, "step": 1597 }, { "epoch": 0.38805245264691596, "grad_norm": 0.6092392802238464, "learning_rate": 0.0001, "loss": 1.8984, "step": 1598 }, { "epoch": 0.3882952889752307, "grad_norm": 0.5654324889183044, "learning_rate": 0.0001, "loss": 1.7587, "step": 1599 }, { "epoch": 0.3885381253035454, "grad_norm": 0.5673196315765381, "learning_rate": 0.0001, "loss": 1.6392, "step": 1600 }, { "epoch": 0.3887809616318601, "grad_norm": 0.5889737606048584, "learning_rate": 0.0001, "loss": 1.8059, "step": 1601 }, { "epoch": 0.38902379796017483, "grad_norm": 0.5544754862785339, "learning_rate": 0.0001, "loss": 1.7381, "step": 1602 }, { "epoch": 0.38926663428848957, "grad_norm": 0.5846825838088989, "learning_rate": 0.0001, "loss": 1.7246, "step": 1603 }, { "epoch": 0.3895094706168043, "grad_norm": 0.5588352680206299, "learning_rate": 0.0001, "loss": 1.5838, "step": 1604 }, { "epoch": 0.389752306945119, "grad_norm": 0.5897813439369202, "learning_rate": 0.0001, "loss": 1.7817, "step": 1605 }, { "epoch": 0.3899951432734337, "grad_norm": 0.5556011199951172, "learning_rate": 0.0001, "loss": 1.6548, "step": 1606 }, { "epoch": 0.39023797960174844, "grad_norm": 0.5782007575035095, "learning_rate": 0.0001, "loss": 1.6972, "step": 1607 }, { "epoch": 0.3904808159300631, "grad_norm": 0.5817544460296631, "learning_rate": 0.0001, "loss": 1.7412, "step": 1608 }, { "epoch": 0.39072365225837785, "grad_norm": 0.5802628397941589, "learning_rate": 0.0001, "loss": 1.898, "step": 1609 }, { "epoch": 0.3909664885866926, "grad_norm": 0.5531176924705505, "learning_rate": 0.0001, "loss": 1.6693, "step": 1610 }, { "epoch": 0.39120932491500726, "grad_norm": 0.5454622507095337, "learning_rate": 0.0001, "loss": 1.7513, "step": 1611 }, { "epoch": 0.391452161243322, "grad_norm": 0.5817705392837524, "learning_rate": 0.0001, "loss": 1.6988, "step": 1612 }, { "epoch": 0.3916949975716367, "grad_norm": 0.5824382901191711, "learning_rate": 0.0001, "loss": 1.7906, "step": 1613 }, { "epoch": 0.39193783389995146, "grad_norm": 0.5714345574378967, "learning_rate": 0.0001, "loss": 1.7254, "step": 1614 }, { "epoch": 0.39218067022826614, "grad_norm": 0.6089634895324707, "learning_rate": 0.0001, "loss": 1.8484, "step": 1615 }, { "epoch": 0.39242350655658087, "grad_norm": 0.5468596816062927, "learning_rate": 0.0001, "loss": 1.6468, "step": 1616 }, { "epoch": 0.3926663428848956, "grad_norm": 0.5312457084655762, "learning_rate": 0.0001, "loss": 1.7078, "step": 1617 }, { "epoch": 0.3929091792132103, "grad_norm": 0.5698146224021912, "learning_rate": 0.0001, "loss": 1.6587, "step": 1618 }, { "epoch": 0.393152015541525, "grad_norm": 0.5599279403686523, "learning_rate": 0.0001, "loss": 1.8704, "step": 1619 }, { "epoch": 0.39339485186983975, "grad_norm": 0.5623302459716797, "learning_rate": 0.0001, "loss": 1.7474, "step": 1620 }, { "epoch": 0.3936376881981544, "grad_norm": 0.6181963682174683, "learning_rate": 0.0001, "loss": 1.7799, "step": 1621 }, { "epoch": 0.39388052452646916, "grad_norm": 0.5787026286125183, "learning_rate": 0.0001, "loss": 1.7176, "step": 1622 }, { "epoch": 0.3941233608547839, "grad_norm": 0.6048277616500854, "learning_rate": 0.0001, "loss": 1.7601, "step": 1623 }, { "epoch": 0.39436619718309857, "grad_norm": 0.576568067073822, "learning_rate": 0.0001, "loss": 1.6211, "step": 1624 }, { "epoch": 0.3946090335114133, "grad_norm": 0.6017733812332153, "learning_rate": 0.0001, "loss": 1.9806, "step": 1625 }, { "epoch": 0.39485186983972803, "grad_norm": 0.5605730414390564, "learning_rate": 0.0001, "loss": 1.6925, "step": 1626 }, { "epoch": 0.39509470616804276, "grad_norm": 0.5670095086097717, "learning_rate": 0.0001, "loss": 1.7742, "step": 1627 }, { "epoch": 0.39533754249635744, "grad_norm": 0.5428698658943176, "learning_rate": 0.0001, "loss": 1.6909, "step": 1628 }, { "epoch": 0.3955803788246722, "grad_norm": 0.5755293369293213, "learning_rate": 0.0001, "loss": 1.6695, "step": 1629 }, { "epoch": 0.3958232151529869, "grad_norm": 0.562969982624054, "learning_rate": 0.0001, "loss": 1.7664, "step": 1630 }, { "epoch": 0.3960660514813016, "grad_norm": 0.5230188965797424, "learning_rate": 0.0001, "loss": 1.611, "step": 1631 }, { "epoch": 0.3963088878096163, "grad_norm": 0.553209125995636, "learning_rate": 0.0001, "loss": 1.6211, "step": 1632 }, { "epoch": 0.39655172413793105, "grad_norm": 0.5449209213256836, "learning_rate": 0.0001, "loss": 1.744, "step": 1633 }, { "epoch": 0.3967945604662457, "grad_norm": 0.5767896175384521, "learning_rate": 0.0001, "loss": 1.7867, "step": 1634 }, { "epoch": 0.39703739679456046, "grad_norm": 0.6298505663871765, "learning_rate": 0.0001, "loss": 1.7829, "step": 1635 }, { "epoch": 0.3972802331228752, "grad_norm": 0.6241861581802368, "learning_rate": 0.0001, "loss": 1.8101, "step": 1636 }, { "epoch": 0.3975230694511899, "grad_norm": 0.577860951423645, "learning_rate": 0.0001, "loss": 1.9489, "step": 1637 }, { "epoch": 0.3977659057795046, "grad_norm": 0.5398596525192261, "learning_rate": 0.0001, "loss": 1.7749, "step": 1638 }, { "epoch": 0.39800874210781934, "grad_norm": 0.5384426712989807, "learning_rate": 0.0001, "loss": 1.7272, "step": 1639 }, { "epoch": 0.39825157843613407, "grad_norm": 0.5840216279029846, "learning_rate": 0.0001, "loss": 1.7213, "step": 1640 }, { "epoch": 0.39849441476444875, "grad_norm": 0.5602217316627502, "learning_rate": 0.0001, "loss": 1.7066, "step": 1641 }, { "epoch": 0.3987372510927635, "grad_norm": 0.551922082901001, "learning_rate": 0.0001, "loss": 1.7214, "step": 1642 }, { "epoch": 0.3989800874210782, "grad_norm": 0.570489764213562, "learning_rate": 0.0001, "loss": 1.7119, "step": 1643 }, { "epoch": 0.3992229237493929, "grad_norm": 0.5829172134399414, "learning_rate": 0.0001, "loss": 1.6867, "step": 1644 }, { "epoch": 0.3994657600777076, "grad_norm": 0.5585073828697205, "learning_rate": 0.0001, "loss": 1.8045, "step": 1645 }, { "epoch": 0.39970859640602235, "grad_norm": 0.564818799495697, "learning_rate": 0.0001, "loss": 1.7552, "step": 1646 }, { "epoch": 0.39995143273433703, "grad_norm": 0.575394868850708, "learning_rate": 0.0001, "loss": 1.7556, "step": 1647 }, { "epoch": 0.40019426906265176, "grad_norm": 0.569500744342804, "learning_rate": 0.0001, "loss": 1.6424, "step": 1648 }, { "epoch": 0.4004371053909665, "grad_norm": 0.5607755780220032, "learning_rate": 0.0001, "loss": 1.9207, "step": 1649 }, { "epoch": 0.40067994171928123, "grad_norm": 0.6055318117141724, "learning_rate": 0.0001, "loss": 1.7892, "step": 1650 }, { "epoch": 0.4009227780475959, "grad_norm": 0.5560562014579773, "learning_rate": 0.0001, "loss": 1.6172, "step": 1651 }, { "epoch": 0.40116561437591064, "grad_norm": 0.5481667518615723, "learning_rate": 0.0001, "loss": 1.6769, "step": 1652 }, { "epoch": 0.4014084507042254, "grad_norm": 0.5663360953330994, "learning_rate": 0.0001, "loss": 1.7209, "step": 1653 }, { "epoch": 0.40165128703254005, "grad_norm": 0.5803428292274475, "learning_rate": 0.0001, "loss": 1.653, "step": 1654 }, { "epoch": 0.4018941233608548, "grad_norm": 0.5931940078735352, "learning_rate": 0.0001, "loss": 1.6745, "step": 1655 }, { "epoch": 0.4021369596891695, "grad_norm": 0.5731521844863892, "learning_rate": 0.0001, "loss": 1.6898, "step": 1656 }, { "epoch": 0.4023797960174842, "grad_norm": 0.573701798915863, "learning_rate": 0.0001, "loss": 1.7434, "step": 1657 }, { "epoch": 0.4026226323457989, "grad_norm": 0.5959265232086182, "learning_rate": 0.0001, "loss": 1.9286, "step": 1658 }, { "epoch": 0.40286546867411366, "grad_norm": 0.5944108963012695, "learning_rate": 0.0001, "loss": 1.8003, "step": 1659 }, { "epoch": 0.4031083050024284, "grad_norm": 0.5664130449295044, "learning_rate": 0.0001, "loss": 1.8043, "step": 1660 }, { "epoch": 0.40335114133074307, "grad_norm": 0.6348901391029358, "learning_rate": 0.0001, "loss": 1.6961, "step": 1661 }, { "epoch": 0.4035939776590578, "grad_norm": 0.5614269375801086, "learning_rate": 0.0001, "loss": 1.6181, "step": 1662 }, { "epoch": 0.40383681398737253, "grad_norm": 0.5885881185531616, "learning_rate": 0.0001, "loss": 1.8601, "step": 1663 }, { "epoch": 0.4040796503156872, "grad_norm": 0.5628127455711365, "learning_rate": 0.0001, "loss": 1.8385, "step": 1664 }, { "epoch": 0.40432248664400194, "grad_norm": 0.5846302509307861, "learning_rate": 0.0001, "loss": 1.7258, "step": 1665 }, { "epoch": 0.4045653229723167, "grad_norm": 0.5827339291572571, "learning_rate": 0.0001, "loss": 1.8895, "step": 1666 }, { "epoch": 0.40480815930063135, "grad_norm": 0.566876232624054, "learning_rate": 0.0001, "loss": 1.7546, "step": 1667 }, { "epoch": 0.4050509956289461, "grad_norm": 0.6059743165969849, "learning_rate": 0.0001, "loss": 1.7, "step": 1668 }, { "epoch": 0.4052938319572608, "grad_norm": 0.5789825320243835, "learning_rate": 0.0001, "loss": 1.6329, "step": 1669 }, { "epoch": 0.4055366682855755, "grad_norm": 0.5450209975242615, "learning_rate": 0.0001, "loss": 1.6816, "step": 1670 }, { "epoch": 0.40577950461389023, "grad_norm": 0.5666068196296692, "learning_rate": 0.0001, "loss": 1.8344, "step": 1671 }, { "epoch": 0.40602234094220496, "grad_norm": 0.5817674398422241, "learning_rate": 0.0001, "loss": 1.8103, "step": 1672 }, { "epoch": 0.4062651772705197, "grad_norm": 0.5866352915763855, "learning_rate": 0.0001, "loss": 1.8503, "step": 1673 }, { "epoch": 0.4065080135988344, "grad_norm": 0.5393112897872925, "learning_rate": 0.0001, "loss": 1.5834, "step": 1674 }, { "epoch": 0.4067508499271491, "grad_norm": 0.5831518769264221, "learning_rate": 0.0001, "loss": 1.8358, "step": 1675 }, { "epoch": 0.40699368625546384, "grad_norm": 0.5524880886077881, "learning_rate": 0.0001, "loss": 1.74, "step": 1676 }, { "epoch": 0.4072365225837785, "grad_norm": 0.5588178634643555, "learning_rate": 0.0001, "loss": 1.7048, "step": 1677 }, { "epoch": 0.40747935891209325, "grad_norm": 0.5812271237373352, "learning_rate": 0.0001, "loss": 1.6215, "step": 1678 }, { "epoch": 0.407722195240408, "grad_norm": 0.5578488707542419, "learning_rate": 0.0001, "loss": 1.7485, "step": 1679 }, { "epoch": 0.40796503156872266, "grad_norm": 0.573398768901825, "learning_rate": 0.0001, "loss": 1.8274, "step": 1680 }, { "epoch": 0.4082078678970374, "grad_norm": 0.6085888147354126, "learning_rate": 0.0001, "loss": 1.939, "step": 1681 }, { "epoch": 0.4084507042253521, "grad_norm": 0.5662161111831665, "learning_rate": 0.0001, "loss": 1.7705, "step": 1682 }, { "epoch": 0.4086935405536668, "grad_norm": 0.5128927230834961, "learning_rate": 0.0001, "loss": 1.5974, "step": 1683 }, { "epoch": 0.40893637688198153, "grad_norm": 0.55360347032547, "learning_rate": 0.0001, "loss": 1.7149, "step": 1684 }, { "epoch": 0.40917921321029627, "grad_norm": 0.544804036617279, "learning_rate": 0.0001, "loss": 1.7013, "step": 1685 }, { "epoch": 0.409422049538611, "grad_norm": 0.5657835006713867, "learning_rate": 0.0001, "loss": 1.7755, "step": 1686 }, { "epoch": 0.4096648858669257, "grad_norm": 0.5455095767974854, "learning_rate": 0.0001, "loss": 1.6197, "step": 1687 }, { "epoch": 0.4099077221952404, "grad_norm": 0.6149754524230957, "learning_rate": 0.0001, "loss": 1.5927, "step": 1688 }, { "epoch": 0.41015055852355514, "grad_norm": 0.582294762134552, "learning_rate": 0.0001, "loss": 1.8127, "step": 1689 }, { "epoch": 0.4103933948518698, "grad_norm": 0.5286328792572021, "learning_rate": 0.0001, "loss": 1.584, "step": 1690 }, { "epoch": 0.41063623118018455, "grad_norm": 0.5528634786605835, "learning_rate": 0.0001, "loss": 1.7693, "step": 1691 }, { "epoch": 0.4108790675084993, "grad_norm": 0.5821068286895752, "learning_rate": 0.0001, "loss": 1.8451, "step": 1692 }, { "epoch": 0.41112190383681396, "grad_norm": 0.5589388012886047, "learning_rate": 0.0001, "loss": 1.8007, "step": 1693 }, { "epoch": 0.4113647401651287, "grad_norm": 0.5556886792182922, "learning_rate": 0.0001, "loss": 1.7214, "step": 1694 }, { "epoch": 0.41160757649344343, "grad_norm": 0.5554993748664856, "learning_rate": 0.0001, "loss": 1.6494, "step": 1695 }, { "epoch": 0.41185041282175816, "grad_norm": 0.5385516881942749, "learning_rate": 0.0001, "loss": 1.5886, "step": 1696 }, { "epoch": 0.41209324915007284, "grad_norm": 0.5801841020584106, "learning_rate": 0.0001, "loss": 1.9049, "step": 1697 }, { "epoch": 0.41233608547838757, "grad_norm": 0.5936272144317627, "learning_rate": 0.0001, "loss": 1.7948, "step": 1698 }, { "epoch": 0.4125789218067023, "grad_norm": 0.5968410968780518, "learning_rate": 0.0001, "loss": 1.932, "step": 1699 }, { "epoch": 0.412821758135017, "grad_norm": 0.6293395161628723, "learning_rate": 0.0001, "loss": 1.8595, "step": 1700 }, { "epoch": 0.4130645944633317, "grad_norm": 0.5809662342071533, "learning_rate": 0.0001, "loss": 1.8307, "step": 1701 }, { "epoch": 0.41330743079164645, "grad_norm": 0.5780881643295288, "learning_rate": 0.0001, "loss": 1.7059, "step": 1702 }, { "epoch": 0.4135502671199611, "grad_norm": 0.5809916853904724, "learning_rate": 0.0001, "loss": 1.6587, "step": 1703 }, { "epoch": 0.41379310344827586, "grad_norm": 0.5669182538986206, "learning_rate": 0.0001, "loss": 1.6111, "step": 1704 }, { "epoch": 0.4140359397765906, "grad_norm": 0.5528964996337891, "learning_rate": 0.0001, "loss": 1.6834, "step": 1705 }, { "epoch": 0.41427877610490527, "grad_norm": 0.5690479278564453, "learning_rate": 0.0001, "loss": 1.7468, "step": 1706 }, { "epoch": 0.41452161243322, "grad_norm": 0.5857531428337097, "learning_rate": 0.0001, "loss": 1.8466, "step": 1707 }, { "epoch": 0.41476444876153473, "grad_norm": 0.5584151744842529, "learning_rate": 0.0001, "loss": 1.6398, "step": 1708 }, { "epoch": 0.41500728508984946, "grad_norm": 0.5964641571044922, "learning_rate": 0.0001, "loss": 1.8333, "step": 1709 }, { "epoch": 0.41525012141816414, "grad_norm": 0.6007984280586243, "learning_rate": 0.0001, "loss": 1.7439, "step": 1710 }, { "epoch": 0.4154929577464789, "grad_norm": 0.574708104133606, "learning_rate": 0.0001, "loss": 1.6967, "step": 1711 }, { "epoch": 0.4157357940747936, "grad_norm": 0.5792802572250366, "learning_rate": 0.0001, "loss": 1.7642, "step": 1712 }, { "epoch": 0.4159786304031083, "grad_norm": 0.610641360282898, "learning_rate": 0.0001, "loss": 1.9078, "step": 1713 }, { "epoch": 0.416221466731423, "grad_norm": 0.5648356676101685, "learning_rate": 0.0001, "loss": 1.6872, "step": 1714 }, { "epoch": 0.41646430305973775, "grad_norm": 0.5931963324546814, "learning_rate": 0.0001, "loss": 1.9544, "step": 1715 }, { "epoch": 0.4167071393880524, "grad_norm": 0.5581526160240173, "learning_rate": 0.0001, "loss": 1.8014, "step": 1716 }, { "epoch": 0.41694997571636716, "grad_norm": 0.6139750480651855, "learning_rate": 0.0001, "loss": 1.8504, "step": 1717 }, { "epoch": 0.4171928120446819, "grad_norm": 0.5767641663551331, "learning_rate": 0.0001, "loss": 1.6627, "step": 1718 }, { "epoch": 0.4174356483729966, "grad_norm": 0.5608420372009277, "learning_rate": 0.0001, "loss": 1.6881, "step": 1719 }, { "epoch": 0.4176784847013113, "grad_norm": 0.5488410592079163, "learning_rate": 0.0001, "loss": 1.7154, "step": 1720 }, { "epoch": 0.41792132102962604, "grad_norm": 0.5951592922210693, "learning_rate": 0.0001, "loss": 1.7504, "step": 1721 }, { "epoch": 0.41816415735794077, "grad_norm": 0.5684256553649902, "learning_rate": 0.0001, "loss": 1.6858, "step": 1722 }, { "epoch": 0.41840699368625545, "grad_norm": 0.5633500814437866, "learning_rate": 0.0001, "loss": 1.7456, "step": 1723 }, { "epoch": 0.4186498300145702, "grad_norm": 0.5915912985801697, "learning_rate": 0.0001, "loss": 1.8124, "step": 1724 }, { "epoch": 0.4188926663428849, "grad_norm": 0.5751018524169922, "learning_rate": 0.0001, "loss": 1.7569, "step": 1725 }, { "epoch": 0.4191355026711996, "grad_norm": 0.539318859577179, "learning_rate": 0.0001, "loss": 1.7219, "step": 1726 }, { "epoch": 0.4193783389995143, "grad_norm": 0.5930345058441162, "learning_rate": 0.0001, "loss": 1.8064, "step": 1727 }, { "epoch": 0.41962117532782905, "grad_norm": 0.5691484808921814, "learning_rate": 0.0001, "loss": 1.8677, "step": 1728 }, { "epoch": 0.41986401165614373, "grad_norm": 0.5631294250488281, "learning_rate": 0.0001, "loss": 1.685, "step": 1729 }, { "epoch": 0.42010684798445846, "grad_norm": 0.5319895148277283, "learning_rate": 0.0001, "loss": 1.6579, "step": 1730 }, { "epoch": 0.4203496843127732, "grad_norm": 0.5654844641685486, "learning_rate": 0.0001, "loss": 1.6974, "step": 1731 }, { "epoch": 0.42059252064108793, "grad_norm": 0.5834603905677795, "learning_rate": 0.0001, "loss": 1.7598, "step": 1732 }, { "epoch": 0.4208353569694026, "grad_norm": 0.6204494833946228, "learning_rate": 0.0001, "loss": 1.8967, "step": 1733 }, { "epoch": 0.42107819329771734, "grad_norm": 0.6163272261619568, "learning_rate": 0.0001, "loss": 1.8135, "step": 1734 }, { "epoch": 0.4213210296260321, "grad_norm": 0.5401644110679626, "learning_rate": 0.0001, "loss": 1.6223, "step": 1735 }, { "epoch": 0.42156386595434675, "grad_norm": 0.5756164193153381, "learning_rate": 0.0001, "loss": 1.7559, "step": 1736 }, { "epoch": 0.4218067022826615, "grad_norm": 0.5235920548439026, "learning_rate": 0.0001, "loss": 1.5956, "step": 1737 }, { "epoch": 0.4220495386109762, "grad_norm": 0.5800116658210754, "learning_rate": 0.0001, "loss": 1.8049, "step": 1738 }, { "epoch": 0.4222923749392909, "grad_norm": 0.5359517335891724, "learning_rate": 0.0001, "loss": 1.6485, "step": 1739 }, { "epoch": 0.4225352112676056, "grad_norm": 0.5693761110305786, "learning_rate": 0.0001, "loss": 1.7084, "step": 1740 }, { "epoch": 0.42277804759592036, "grad_norm": 0.565039873123169, "learning_rate": 0.0001, "loss": 1.7292, "step": 1741 }, { "epoch": 0.4230208839242351, "grad_norm": 0.5437293648719788, "learning_rate": 0.0001, "loss": 1.6931, "step": 1742 }, { "epoch": 0.42326372025254977, "grad_norm": 0.5825754404067993, "learning_rate": 0.0001, "loss": 1.8266, "step": 1743 }, { "epoch": 0.4235065565808645, "grad_norm": 0.5717706680297852, "learning_rate": 0.0001, "loss": 1.8816, "step": 1744 }, { "epoch": 0.42374939290917923, "grad_norm": 0.5643588304519653, "learning_rate": 0.0001, "loss": 1.6587, "step": 1745 }, { "epoch": 0.4239922292374939, "grad_norm": 0.5338857769966125, "learning_rate": 0.0001, "loss": 1.7145, "step": 1746 }, { "epoch": 0.42423506556580864, "grad_norm": 0.5591118931770325, "learning_rate": 0.0001, "loss": 1.7031, "step": 1747 }, { "epoch": 0.4244779018941234, "grad_norm": 0.6191600561141968, "learning_rate": 0.0001, "loss": 1.8901, "step": 1748 }, { "epoch": 0.42472073822243805, "grad_norm": 0.5633436441421509, "learning_rate": 0.0001, "loss": 1.721, "step": 1749 }, { "epoch": 0.4249635745507528, "grad_norm": 0.564781904220581, "learning_rate": 0.0001, "loss": 1.7352, "step": 1750 }, { "epoch": 0.4252064108790675, "grad_norm": 0.5725958943367004, "learning_rate": 0.0001, "loss": 1.7598, "step": 1751 }, { "epoch": 0.4254492472073822, "grad_norm": 0.5628660917282104, "learning_rate": 0.0001, "loss": 1.8647, "step": 1752 }, { "epoch": 0.42569208353569693, "grad_norm": 0.5747968554496765, "learning_rate": 0.0001, "loss": 1.643, "step": 1753 }, { "epoch": 0.42593491986401166, "grad_norm": 0.5645371675491333, "learning_rate": 0.0001, "loss": 1.6864, "step": 1754 }, { "epoch": 0.4261777561923264, "grad_norm": 0.5752917528152466, "learning_rate": 0.0001, "loss": 1.8961, "step": 1755 }, { "epoch": 0.4264205925206411, "grad_norm": 0.5931205749511719, "learning_rate": 0.0001, "loss": 1.888, "step": 1756 }, { "epoch": 0.4266634288489558, "grad_norm": 0.581861138343811, "learning_rate": 0.0001, "loss": 1.8166, "step": 1757 }, { "epoch": 0.42690626517727054, "grad_norm": 0.5772149562835693, "learning_rate": 0.0001, "loss": 1.6663, "step": 1758 }, { "epoch": 0.4271491015055852, "grad_norm": 0.5850484371185303, "learning_rate": 0.0001, "loss": 1.7696, "step": 1759 }, { "epoch": 0.42739193783389995, "grad_norm": 0.5533158779144287, "learning_rate": 0.0001, "loss": 1.5562, "step": 1760 }, { "epoch": 0.4276347741622147, "grad_norm": 0.55548495054245, "learning_rate": 0.0001, "loss": 1.8209, "step": 1761 }, { "epoch": 0.42787761049052936, "grad_norm": 0.5674953460693359, "learning_rate": 0.0001, "loss": 1.7567, "step": 1762 }, { "epoch": 0.4281204468188441, "grad_norm": 0.5729069709777832, "learning_rate": 0.0001, "loss": 1.7524, "step": 1763 }, { "epoch": 0.4283632831471588, "grad_norm": 0.5497011542320251, "learning_rate": 0.0001, "loss": 1.5961, "step": 1764 }, { "epoch": 0.42860611947547356, "grad_norm": 0.561162531375885, "learning_rate": 0.0001, "loss": 1.7058, "step": 1765 }, { "epoch": 0.42884895580378823, "grad_norm": 0.5849171280860901, "learning_rate": 0.0001, "loss": 1.8342, "step": 1766 }, { "epoch": 0.42909179213210297, "grad_norm": 0.56157386302948, "learning_rate": 0.0001, "loss": 1.7408, "step": 1767 }, { "epoch": 0.4293346284604177, "grad_norm": 0.6153436303138733, "learning_rate": 0.0001, "loss": 1.8358, "step": 1768 }, { "epoch": 0.4295774647887324, "grad_norm": 0.584125816822052, "learning_rate": 0.0001, "loss": 1.8792, "step": 1769 }, { "epoch": 0.4298203011170471, "grad_norm": 0.6181815266609192, "learning_rate": 0.0001, "loss": 1.8735, "step": 1770 }, { "epoch": 0.43006313744536184, "grad_norm": 0.579014241695404, "learning_rate": 0.0001, "loss": 1.7451, "step": 1771 }, { "epoch": 0.4303059737736765, "grad_norm": 0.5735815763473511, "learning_rate": 0.0001, "loss": 1.779, "step": 1772 }, { "epoch": 0.43054881010199125, "grad_norm": 0.5597633719444275, "learning_rate": 0.0001, "loss": 1.7011, "step": 1773 }, { "epoch": 0.430791646430306, "grad_norm": 0.5619441270828247, "learning_rate": 0.0001, "loss": 1.9089, "step": 1774 }, { "epoch": 0.43103448275862066, "grad_norm": 0.5433188676834106, "learning_rate": 0.0001, "loss": 1.6352, "step": 1775 }, { "epoch": 0.4312773190869354, "grad_norm": 0.6146637201309204, "learning_rate": 0.0001, "loss": 1.7771, "step": 1776 }, { "epoch": 0.43152015541525013, "grad_norm": 0.5701290965080261, "learning_rate": 0.0001, "loss": 1.7436, "step": 1777 }, { "epoch": 0.43176299174356486, "grad_norm": 0.6096238493919373, "learning_rate": 0.0001, "loss": 1.8751, "step": 1778 }, { "epoch": 0.43200582807187954, "grad_norm": 0.6130304336547852, "learning_rate": 0.0001, "loss": 1.8221, "step": 1779 }, { "epoch": 0.43224866440019427, "grad_norm": 0.5769259333610535, "learning_rate": 0.0001, "loss": 1.8647, "step": 1780 }, { "epoch": 0.432491500728509, "grad_norm": 0.5667563080787659, "learning_rate": 0.0001, "loss": 1.6987, "step": 1781 }, { "epoch": 0.4327343370568237, "grad_norm": 0.5886138081550598, "learning_rate": 0.0001, "loss": 1.7908, "step": 1782 }, { "epoch": 0.4329771733851384, "grad_norm": 0.6100742816925049, "learning_rate": 0.0001, "loss": 1.7411, "step": 1783 }, { "epoch": 0.43322000971345315, "grad_norm": 0.5517228245735168, "learning_rate": 0.0001, "loss": 1.617, "step": 1784 }, { "epoch": 0.4334628460417678, "grad_norm": 0.5412666201591492, "learning_rate": 0.0001, "loss": 1.5635, "step": 1785 }, { "epoch": 0.43370568237008256, "grad_norm": 0.5847234129905701, "learning_rate": 0.0001, "loss": 1.8383, "step": 1786 }, { "epoch": 0.4339485186983973, "grad_norm": 0.5502403974533081, "learning_rate": 0.0001, "loss": 1.6905, "step": 1787 }, { "epoch": 0.434191355026712, "grad_norm": 0.5902757048606873, "learning_rate": 0.0001, "loss": 1.8038, "step": 1788 }, { "epoch": 0.4344341913550267, "grad_norm": 0.5674586296081543, "learning_rate": 0.0001, "loss": 1.7138, "step": 1789 }, { "epoch": 0.43467702768334143, "grad_norm": 0.5742107629776001, "learning_rate": 0.0001, "loss": 1.8375, "step": 1790 }, { "epoch": 0.43491986401165617, "grad_norm": 0.579260528087616, "learning_rate": 0.0001, "loss": 1.6283, "step": 1791 }, { "epoch": 0.43516270033997084, "grad_norm": 0.5684863328933716, "learning_rate": 0.0001, "loss": 1.4185, "step": 1792 }, { "epoch": 0.4354055366682856, "grad_norm": 0.5778998732566833, "learning_rate": 0.0001, "loss": 1.6633, "step": 1793 }, { "epoch": 0.4356483729966003, "grad_norm": 0.5546566843986511, "learning_rate": 0.0001, "loss": 1.5953, "step": 1794 }, { "epoch": 0.435891209324915, "grad_norm": 0.581365704536438, "learning_rate": 0.0001, "loss": 1.7352, "step": 1795 }, { "epoch": 0.4361340456532297, "grad_norm": 0.5921564698219299, "learning_rate": 0.0001, "loss": 1.7575, "step": 1796 }, { "epoch": 0.43637688198154445, "grad_norm": 0.5925065875053406, "learning_rate": 0.0001, "loss": 1.9726, "step": 1797 }, { "epoch": 0.43661971830985913, "grad_norm": 0.5550657510757446, "learning_rate": 0.0001, "loss": 1.6332, "step": 1798 }, { "epoch": 0.43686255463817386, "grad_norm": 0.5925093293190002, "learning_rate": 0.0001, "loss": 1.6507, "step": 1799 }, { "epoch": 0.4371053909664886, "grad_norm": 0.5593153238296509, "learning_rate": 0.0001, "loss": 1.6802, "step": 1800 }, { "epoch": 0.4373482272948033, "grad_norm": 0.5875351428985596, "learning_rate": 0.0001, "loss": 1.7225, "step": 1801 }, { "epoch": 0.437591063623118, "grad_norm": 0.6166852116584778, "learning_rate": 0.0001, "loss": 1.8489, "step": 1802 }, { "epoch": 0.43783389995143274, "grad_norm": 0.5772126913070679, "learning_rate": 0.0001, "loss": 1.9313, "step": 1803 }, { "epoch": 0.43807673627974747, "grad_norm": 0.5607519745826721, "learning_rate": 0.0001, "loss": 1.7712, "step": 1804 }, { "epoch": 0.43831957260806215, "grad_norm": 0.5917442440986633, "learning_rate": 0.0001, "loss": 1.6894, "step": 1805 }, { "epoch": 0.4385624089363769, "grad_norm": 0.599422037601471, "learning_rate": 0.0001, "loss": 1.9742, "step": 1806 }, { "epoch": 0.4388052452646916, "grad_norm": 0.5593117475509644, "learning_rate": 0.0001, "loss": 1.8367, "step": 1807 }, { "epoch": 0.4390480815930063, "grad_norm": 0.5732071399688721, "learning_rate": 0.0001, "loss": 1.8717, "step": 1808 }, { "epoch": 0.439290917921321, "grad_norm": 0.5580980777740479, "learning_rate": 0.0001, "loss": 1.6426, "step": 1809 }, { "epoch": 0.43953375424963576, "grad_norm": 0.615973949432373, "learning_rate": 0.0001, "loss": 1.8811, "step": 1810 }, { "epoch": 0.4397765905779505, "grad_norm": 0.6223271489143372, "learning_rate": 0.0001, "loss": 1.9497, "step": 1811 }, { "epoch": 0.44001942690626517, "grad_norm": 0.5596504211425781, "learning_rate": 0.0001, "loss": 1.791, "step": 1812 }, { "epoch": 0.4402622632345799, "grad_norm": 0.5751744508743286, "learning_rate": 0.0001, "loss": 1.8838, "step": 1813 }, { "epoch": 0.44050509956289463, "grad_norm": 0.5983855724334717, "learning_rate": 0.0001, "loss": 1.8151, "step": 1814 }, { "epoch": 0.4407479358912093, "grad_norm": 0.5524014830589294, "learning_rate": 0.0001, "loss": 1.7145, "step": 1815 }, { "epoch": 0.44099077221952404, "grad_norm": 0.5575211644172668, "learning_rate": 0.0001, "loss": 1.7025, "step": 1816 }, { "epoch": 0.4412336085478388, "grad_norm": 0.5798826217651367, "learning_rate": 0.0001, "loss": 1.788, "step": 1817 }, { "epoch": 0.44147644487615345, "grad_norm": 0.5255656242370605, "learning_rate": 0.0001, "loss": 1.656, "step": 1818 }, { "epoch": 0.4417192812044682, "grad_norm": 0.560876727104187, "learning_rate": 0.0001, "loss": 1.6355, "step": 1819 }, { "epoch": 0.4419621175327829, "grad_norm": 0.5471630692481995, "learning_rate": 0.0001, "loss": 1.5065, "step": 1820 }, { "epoch": 0.4422049538610976, "grad_norm": 0.5710832476615906, "learning_rate": 0.0001, "loss": 1.762, "step": 1821 }, { "epoch": 0.4424477901894123, "grad_norm": 0.582895815372467, "learning_rate": 0.0001, "loss": 1.6321, "step": 1822 }, { "epoch": 0.44269062651772706, "grad_norm": 0.5692479014396667, "learning_rate": 0.0001, "loss": 1.7125, "step": 1823 }, { "epoch": 0.4429334628460418, "grad_norm": 0.5563973188400269, "learning_rate": 0.0001, "loss": 1.7806, "step": 1824 }, { "epoch": 0.44317629917435647, "grad_norm": 0.6028217673301697, "learning_rate": 0.0001, "loss": 1.7916, "step": 1825 }, { "epoch": 0.4434191355026712, "grad_norm": 0.6247011423110962, "learning_rate": 0.0001, "loss": 1.7032, "step": 1826 }, { "epoch": 0.44366197183098594, "grad_norm": 0.5845817923545837, "learning_rate": 0.0001, "loss": 1.9361, "step": 1827 }, { "epoch": 0.4439048081593006, "grad_norm": 0.586754322052002, "learning_rate": 0.0001, "loss": 1.8163, "step": 1828 }, { "epoch": 0.44414764448761535, "grad_norm": 0.6034722924232483, "learning_rate": 0.0001, "loss": 1.8628, "step": 1829 }, { "epoch": 0.4443904808159301, "grad_norm": 0.5657421946525574, "learning_rate": 0.0001, "loss": 1.7418, "step": 1830 }, { "epoch": 0.44463331714424476, "grad_norm": 0.6093968152999878, "learning_rate": 0.0001, "loss": 1.7401, "step": 1831 }, { "epoch": 0.4448761534725595, "grad_norm": 0.5818173885345459, "learning_rate": 0.0001, "loss": 1.6932, "step": 1832 }, { "epoch": 0.4451189898008742, "grad_norm": 0.578579306602478, "learning_rate": 0.0001, "loss": 1.8248, "step": 1833 }, { "epoch": 0.44536182612918895, "grad_norm": 0.5778454542160034, "learning_rate": 0.0001, "loss": 1.7586, "step": 1834 }, { "epoch": 0.44560466245750363, "grad_norm": 0.5978708267211914, "learning_rate": 0.0001, "loss": 1.7819, "step": 1835 }, { "epoch": 0.44584749878581836, "grad_norm": 0.5598545670509338, "learning_rate": 0.0001, "loss": 1.6612, "step": 1836 }, { "epoch": 0.4460903351141331, "grad_norm": 0.6154178977012634, "learning_rate": 0.0001, "loss": 1.7779, "step": 1837 }, { "epoch": 0.4463331714424478, "grad_norm": 0.5966653823852539, "learning_rate": 0.0001, "loss": 1.9132, "step": 1838 }, { "epoch": 0.4465760077707625, "grad_norm": 0.5512944459915161, "learning_rate": 0.0001, "loss": 1.6269, "step": 1839 }, { "epoch": 0.44681884409907724, "grad_norm": 0.5656938552856445, "learning_rate": 0.0001, "loss": 1.5489, "step": 1840 }, { "epoch": 0.4470616804273919, "grad_norm": 0.5822314023971558, "learning_rate": 0.0001, "loss": 1.7625, "step": 1841 }, { "epoch": 0.44730451675570665, "grad_norm": 0.6166737675666809, "learning_rate": 0.0001, "loss": 1.8711, "step": 1842 }, { "epoch": 0.4475473530840214, "grad_norm": 0.573864221572876, "learning_rate": 0.0001, "loss": 1.6822, "step": 1843 }, { "epoch": 0.44779018941233606, "grad_norm": 0.5893234014511108, "learning_rate": 0.0001, "loss": 1.7329, "step": 1844 }, { "epoch": 0.4480330257406508, "grad_norm": 0.6403665542602539, "learning_rate": 0.0001, "loss": 1.8165, "step": 1845 }, { "epoch": 0.4482758620689655, "grad_norm": 0.5796035528182983, "learning_rate": 0.0001, "loss": 1.6548, "step": 1846 }, { "epoch": 0.44851869839728026, "grad_norm": 0.5376893281936646, "learning_rate": 0.0001, "loss": 1.5946, "step": 1847 }, { "epoch": 0.44876153472559493, "grad_norm": 0.5643702745437622, "learning_rate": 0.0001, "loss": 1.7544, "step": 1848 }, { "epoch": 0.44900437105390967, "grad_norm": 0.5793332457542419, "learning_rate": 0.0001, "loss": 1.8197, "step": 1849 }, { "epoch": 0.4492472073822244, "grad_norm": 0.5740710496902466, "learning_rate": 0.0001, "loss": 1.8129, "step": 1850 }, { "epoch": 0.4494900437105391, "grad_norm": 0.5284385681152344, "learning_rate": 0.0001, "loss": 1.5615, "step": 1851 }, { "epoch": 0.4497328800388538, "grad_norm": 0.5854473114013672, "learning_rate": 0.0001, "loss": 1.5596, "step": 1852 }, { "epoch": 0.44997571636716854, "grad_norm": 0.5556619763374329, "learning_rate": 0.0001, "loss": 1.7712, "step": 1853 }, { "epoch": 0.4502185526954832, "grad_norm": 0.596434473991394, "learning_rate": 0.0001, "loss": 1.8985, "step": 1854 }, { "epoch": 0.45046138902379795, "grad_norm": 0.5931633710861206, "learning_rate": 0.0001, "loss": 1.8248, "step": 1855 }, { "epoch": 0.4507042253521127, "grad_norm": 0.5966574549674988, "learning_rate": 0.0001, "loss": 1.8493, "step": 1856 }, { "epoch": 0.4509470616804274, "grad_norm": 0.5613736510276794, "learning_rate": 0.0001, "loss": 1.7375, "step": 1857 }, { "epoch": 0.4511898980087421, "grad_norm": 0.6010239720344543, "learning_rate": 0.0001, "loss": 1.8562, "step": 1858 }, { "epoch": 0.45143273433705683, "grad_norm": 0.566714346408844, "learning_rate": 0.0001, "loss": 1.7504, "step": 1859 }, { "epoch": 0.45167557066537156, "grad_norm": 0.5681232810020447, "learning_rate": 0.0001, "loss": 1.7613, "step": 1860 }, { "epoch": 0.45191840699368624, "grad_norm": 0.5798001289367676, "learning_rate": 0.0001, "loss": 1.9256, "step": 1861 }, { "epoch": 0.45216124332200097, "grad_norm": 0.553661048412323, "learning_rate": 0.0001, "loss": 1.6513, "step": 1862 }, { "epoch": 0.4524040796503157, "grad_norm": 0.5698717832565308, "learning_rate": 0.0001, "loss": 1.7352, "step": 1863 }, { "epoch": 0.4526469159786304, "grad_norm": 0.5184240937232971, "learning_rate": 0.0001, "loss": 1.4759, "step": 1864 }, { "epoch": 0.4528897523069451, "grad_norm": 0.5855940580368042, "learning_rate": 0.0001, "loss": 1.6505, "step": 1865 }, { "epoch": 0.45313258863525985, "grad_norm": 0.5758311152458191, "learning_rate": 0.0001, "loss": 1.6673, "step": 1866 }, { "epoch": 0.4533754249635745, "grad_norm": 0.615971028804779, "learning_rate": 0.0001, "loss": 1.735, "step": 1867 }, { "epoch": 0.45361826129188926, "grad_norm": 0.5524644255638123, "learning_rate": 0.0001, "loss": 1.6946, "step": 1868 }, { "epoch": 0.453861097620204, "grad_norm": 0.5362394452095032, "learning_rate": 0.0001, "loss": 1.6212, "step": 1869 }, { "epoch": 0.4541039339485187, "grad_norm": 0.585970938205719, "learning_rate": 0.0001, "loss": 1.6943, "step": 1870 }, { "epoch": 0.4543467702768334, "grad_norm": 0.5870539546012878, "learning_rate": 0.0001, "loss": 1.741, "step": 1871 }, { "epoch": 0.45458960660514813, "grad_norm": 0.5760737061500549, "learning_rate": 0.0001, "loss": 1.8845, "step": 1872 }, { "epoch": 0.45483244293346287, "grad_norm": 0.5538679957389832, "learning_rate": 0.0001, "loss": 1.7074, "step": 1873 }, { "epoch": 0.45507527926177754, "grad_norm": 0.5464616417884827, "learning_rate": 0.0001, "loss": 1.6054, "step": 1874 }, { "epoch": 0.4553181155900923, "grad_norm": 0.5502505898475647, "learning_rate": 0.0001, "loss": 1.72, "step": 1875 }, { "epoch": 0.455560951918407, "grad_norm": 0.5644949078559875, "learning_rate": 0.0001, "loss": 1.7139, "step": 1876 }, { "epoch": 0.4558037882467217, "grad_norm": 0.5552821159362793, "learning_rate": 0.0001, "loss": 1.7832, "step": 1877 }, { "epoch": 0.4560466245750364, "grad_norm": 0.6095209717750549, "learning_rate": 0.0001, "loss": 1.8575, "step": 1878 }, { "epoch": 0.45628946090335115, "grad_norm": 0.5591077208518982, "learning_rate": 0.0001, "loss": 1.5926, "step": 1879 }, { "epoch": 0.4565322972316659, "grad_norm": 0.5598617196083069, "learning_rate": 0.0001, "loss": 1.7178, "step": 1880 }, { "epoch": 0.45677513355998056, "grad_norm": 0.5825666189193726, "learning_rate": 0.0001, "loss": 1.6868, "step": 1881 }, { "epoch": 0.4570179698882953, "grad_norm": 0.614214301109314, "learning_rate": 0.0001, "loss": 1.8865, "step": 1882 }, { "epoch": 0.45726080621661, "grad_norm": 0.5513318777084351, "learning_rate": 0.0001, "loss": 1.7676, "step": 1883 }, { "epoch": 0.4575036425449247, "grad_norm": 0.6541092991828918, "learning_rate": 0.0001, "loss": 1.9426, "step": 1884 }, { "epoch": 0.45774647887323944, "grad_norm": 0.6022918224334717, "learning_rate": 0.0001, "loss": 1.8112, "step": 1885 }, { "epoch": 0.45798931520155417, "grad_norm": 0.5467543601989746, "learning_rate": 0.0001, "loss": 1.6042, "step": 1886 }, { "epoch": 0.45823215152986885, "grad_norm": 0.593727171421051, "learning_rate": 0.0001, "loss": 1.8278, "step": 1887 }, { "epoch": 0.4584749878581836, "grad_norm": 0.5079317688941956, "learning_rate": 0.0001, "loss": 1.6571, "step": 1888 }, { "epoch": 0.4587178241864983, "grad_norm": 0.5998428463935852, "learning_rate": 0.0001, "loss": 1.8235, "step": 1889 }, { "epoch": 0.458960660514813, "grad_norm": 0.622458279132843, "learning_rate": 0.0001, "loss": 1.8577, "step": 1890 }, { "epoch": 0.4592034968431277, "grad_norm": 0.5983126759529114, "learning_rate": 0.0001, "loss": 1.6378, "step": 1891 }, { "epoch": 0.45944633317144246, "grad_norm": 0.5393468737602234, "learning_rate": 0.0001, "loss": 1.7262, "step": 1892 }, { "epoch": 0.4596891694997572, "grad_norm": 0.5763282775878906, "learning_rate": 0.0001, "loss": 1.7074, "step": 1893 }, { "epoch": 0.45993200582807187, "grad_norm": 0.6060768961906433, "learning_rate": 0.0001, "loss": 1.8217, "step": 1894 }, { "epoch": 0.4601748421563866, "grad_norm": 0.5790578722953796, "learning_rate": 0.0001, "loss": 1.5828, "step": 1895 }, { "epoch": 0.46041767848470133, "grad_norm": 0.5628831386566162, "learning_rate": 0.0001, "loss": 1.6629, "step": 1896 }, { "epoch": 0.460660514813016, "grad_norm": 0.583311140537262, "learning_rate": 0.0001, "loss": 1.7067, "step": 1897 }, { "epoch": 0.46090335114133074, "grad_norm": 0.5306105613708496, "learning_rate": 0.0001, "loss": 1.6153, "step": 1898 }, { "epoch": 0.4611461874696455, "grad_norm": 0.6257902383804321, "learning_rate": 0.0001, "loss": 1.749, "step": 1899 }, { "epoch": 0.46138902379796015, "grad_norm": 0.5784273147583008, "learning_rate": 0.0001, "loss": 1.7965, "step": 1900 }, { "epoch": 0.4616318601262749, "grad_norm": 0.5503388047218323, "learning_rate": 0.0001, "loss": 1.6824, "step": 1901 }, { "epoch": 0.4618746964545896, "grad_norm": 0.5989645719528198, "learning_rate": 0.0001, "loss": 1.5835, "step": 1902 }, { "epoch": 0.46211753278290435, "grad_norm": 0.5790361166000366, "learning_rate": 0.0001, "loss": 1.7209, "step": 1903 }, { "epoch": 0.462360369111219, "grad_norm": 0.5661267042160034, "learning_rate": 0.0001, "loss": 1.6861, "step": 1904 }, { "epoch": 0.46260320543953376, "grad_norm": 0.5857040286064148, "learning_rate": 0.0001, "loss": 1.734, "step": 1905 }, { "epoch": 0.4628460417678485, "grad_norm": 0.6014676690101624, "learning_rate": 0.0001, "loss": 1.7985, "step": 1906 }, { "epoch": 0.46308887809616317, "grad_norm": 0.5533243417739868, "learning_rate": 0.0001, "loss": 1.5735, "step": 1907 }, { "epoch": 0.4633317144244779, "grad_norm": 0.5554340481758118, "learning_rate": 0.0001, "loss": 1.5683, "step": 1908 }, { "epoch": 0.46357455075279264, "grad_norm": 0.5753559470176697, "learning_rate": 0.0001, "loss": 1.6389, "step": 1909 }, { "epoch": 0.4638173870811073, "grad_norm": 0.6063196659088135, "learning_rate": 0.0001, "loss": 1.6765, "step": 1910 }, { "epoch": 0.46406022340942205, "grad_norm": 0.6287242770195007, "learning_rate": 0.0001, "loss": 1.883, "step": 1911 }, { "epoch": 0.4643030597377368, "grad_norm": 0.5789053440093994, "learning_rate": 0.0001, "loss": 1.8475, "step": 1912 }, { "epoch": 0.46454589606605146, "grad_norm": 0.5780322551727295, "learning_rate": 0.0001, "loss": 1.8763, "step": 1913 }, { "epoch": 0.4647887323943662, "grad_norm": 0.5103904008865356, "learning_rate": 0.0001, "loss": 1.498, "step": 1914 }, { "epoch": 0.4650315687226809, "grad_norm": 0.5886533856391907, "learning_rate": 0.0001, "loss": 1.7561, "step": 1915 }, { "epoch": 0.46527440505099565, "grad_norm": 0.5434485077857971, "learning_rate": 0.0001, "loss": 1.5602, "step": 1916 }, { "epoch": 0.46551724137931033, "grad_norm": 0.5687652230262756, "learning_rate": 0.0001, "loss": 1.7215, "step": 1917 }, { "epoch": 0.46576007770762506, "grad_norm": 0.5779525637626648, "learning_rate": 0.0001, "loss": 1.7172, "step": 1918 }, { "epoch": 0.4660029140359398, "grad_norm": 0.551944375038147, "learning_rate": 0.0001, "loss": 1.6293, "step": 1919 }, { "epoch": 0.4662457503642545, "grad_norm": 0.5759638547897339, "learning_rate": 0.0001, "loss": 1.643, "step": 1920 }, { "epoch": 0.4664885866925692, "grad_norm": 0.5612455010414124, "learning_rate": 0.0001, "loss": 1.803, "step": 1921 }, { "epoch": 0.46673142302088394, "grad_norm": 0.5707778334617615, "learning_rate": 0.0001, "loss": 1.7959, "step": 1922 }, { "epoch": 0.4669742593491986, "grad_norm": 0.6270820498466492, "learning_rate": 0.0001, "loss": 1.7296, "step": 1923 }, { "epoch": 0.46721709567751335, "grad_norm": 0.5518383383750916, "learning_rate": 0.0001, "loss": 1.6417, "step": 1924 }, { "epoch": 0.4674599320058281, "grad_norm": 0.5935297012329102, "learning_rate": 0.0001, "loss": 1.7797, "step": 1925 }, { "epoch": 0.4677027683341428, "grad_norm": 0.617348849773407, "learning_rate": 0.0001, "loss": 1.7296, "step": 1926 }, { "epoch": 0.4679456046624575, "grad_norm": 0.5728745460510254, "learning_rate": 0.0001, "loss": 1.7844, "step": 1927 }, { "epoch": 0.4681884409907722, "grad_norm": 0.5723167657852173, "learning_rate": 0.0001, "loss": 1.7022, "step": 1928 }, { "epoch": 0.46843127731908696, "grad_norm": 0.5914849638938904, "learning_rate": 0.0001, "loss": 1.8989, "step": 1929 }, { "epoch": 0.46867411364740164, "grad_norm": 0.5882318019866943, "learning_rate": 0.0001, "loss": 1.6923, "step": 1930 }, { "epoch": 0.46891694997571637, "grad_norm": 0.5570792555809021, "learning_rate": 0.0001, "loss": 1.7278, "step": 1931 }, { "epoch": 0.4691597863040311, "grad_norm": 0.5721078515052795, "learning_rate": 0.0001, "loss": 1.8832, "step": 1932 }, { "epoch": 0.4694026226323458, "grad_norm": 0.6023865342140198, "learning_rate": 0.0001, "loss": 1.9534, "step": 1933 }, { "epoch": 0.4696454589606605, "grad_norm": 0.6025052666664124, "learning_rate": 0.0001, "loss": 1.8732, "step": 1934 }, { "epoch": 0.46988829528897524, "grad_norm": 0.5464321374893188, "learning_rate": 0.0001, "loss": 1.6519, "step": 1935 }, { "epoch": 0.4701311316172899, "grad_norm": 0.6009752750396729, "learning_rate": 0.0001, "loss": 1.8822, "step": 1936 }, { "epoch": 0.47037396794560465, "grad_norm": 0.6073281764984131, "learning_rate": 0.0001, "loss": 1.8904, "step": 1937 }, { "epoch": 0.4706168042739194, "grad_norm": 0.5457764863967896, "learning_rate": 0.0001, "loss": 1.7498, "step": 1938 }, { "epoch": 0.4708596406022341, "grad_norm": 0.5712628364562988, "learning_rate": 0.0001, "loss": 1.8484, "step": 1939 }, { "epoch": 0.4711024769305488, "grad_norm": 0.5740649700164795, "learning_rate": 0.0001, "loss": 1.7276, "step": 1940 }, { "epoch": 0.47134531325886353, "grad_norm": 0.5414160490036011, "learning_rate": 0.0001, "loss": 1.6095, "step": 1941 }, { "epoch": 0.47158814958717826, "grad_norm": 0.5896227359771729, "learning_rate": 0.0001, "loss": 1.7745, "step": 1942 }, { "epoch": 0.47183098591549294, "grad_norm": 0.5514649152755737, "learning_rate": 0.0001, "loss": 1.5694, "step": 1943 }, { "epoch": 0.4720738222438077, "grad_norm": 0.5307028293609619, "learning_rate": 0.0001, "loss": 1.5526, "step": 1944 }, { "epoch": 0.4723166585721224, "grad_norm": 0.5563642978668213, "learning_rate": 0.0001, "loss": 1.7063, "step": 1945 }, { "epoch": 0.4725594949004371, "grad_norm": 0.5585283637046814, "learning_rate": 0.0001, "loss": 1.5269, "step": 1946 }, { "epoch": 0.4728023312287518, "grad_norm": 0.5462757349014282, "learning_rate": 0.0001, "loss": 1.6393, "step": 1947 }, { "epoch": 0.47304516755706655, "grad_norm": 0.5939182043075562, "learning_rate": 0.0001, "loss": 1.9834, "step": 1948 }, { "epoch": 0.4732880038853812, "grad_norm": 0.5892949104309082, "learning_rate": 0.0001, "loss": 1.5758, "step": 1949 }, { "epoch": 0.47353084021369596, "grad_norm": 0.5596404075622559, "learning_rate": 0.0001, "loss": 1.569, "step": 1950 }, { "epoch": 0.4737736765420107, "grad_norm": 0.551993727684021, "learning_rate": 0.0001, "loss": 1.7142, "step": 1951 }, { "epoch": 0.4740165128703254, "grad_norm": 0.5357285141944885, "learning_rate": 0.0001, "loss": 1.6426, "step": 1952 }, { "epoch": 0.4742593491986401, "grad_norm": 0.6192750334739685, "learning_rate": 0.0001, "loss": 1.8688, "step": 1953 }, { "epoch": 0.47450218552695483, "grad_norm": 0.5935304164886475, "learning_rate": 0.0001, "loss": 1.6548, "step": 1954 }, { "epoch": 0.47474502185526957, "grad_norm": 0.5641899108886719, "learning_rate": 0.0001, "loss": 1.7424, "step": 1955 }, { "epoch": 0.47498785818358424, "grad_norm": 0.5489999651908875, "learning_rate": 0.0001, "loss": 1.7741, "step": 1956 }, { "epoch": 0.475230694511899, "grad_norm": 0.5819328427314758, "learning_rate": 0.0001, "loss": 1.7622, "step": 1957 }, { "epoch": 0.4754735308402137, "grad_norm": 0.5315662622451782, "learning_rate": 0.0001, "loss": 1.6401, "step": 1958 }, { "epoch": 0.4757163671685284, "grad_norm": 0.5343338251113892, "learning_rate": 0.0001, "loss": 1.6743, "step": 1959 }, { "epoch": 0.4759592034968431, "grad_norm": 0.5986287593841553, "learning_rate": 0.0001, "loss": 1.7476, "step": 1960 }, { "epoch": 0.47620203982515785, "grad_norm": 0.5720230937004089, "learning_rate": 0.0001, "loss": 1.7423, "step": 1961 }, { "epoch": 0.4764448761534726, "grad_norm": 0.5954564809799194, "learning_rate": 0.0001, "loss": 1.8605, "step": 1962 }, { "epoch": 0.47668771248178726, "grad_norm": 0.5579463243484497, "learning_rate": 0.0001, "loss": 1.7147, "step": 1963 }, { "epoch": 0.476930548810102, "grad_norm": 0.5727841258049011, "learning_rate": 0.0001, "loss": 1.7308, "step": 1964 }, { "epoch": 0.47717338513841673, "grad_norm": 0.6127378940582275, "learning_rate": 0.0001, "loss": 1.8088, "step": 1965 }, { "epoch": 0.4774162214667314, "grad_norm": 0.5378556251525879, "learning_rate": 0.0001, "loss": 1.7187, "step": 1966 }, { "epoch": 0.47765905779504614, "grad_norm": 0.5923590660095215, "learning_rate": 0.0001, "loss": 1.7997, "step": 1967 }, { "epoch": 0.47790189412336087, "grad_norm": 0.6295888423919678, "learning_rate": 0.0001, "loss": 1.5496, "step": 1968 }, { "epoch": 0.47814473045167555, "grad_norm": 0.5650759935379028, "learning_rate": 0.0001, "loss": 1.7427, "step": 1969 }, { "epoch": 0.4783875667799903, "grad_norm": 0.5341917872428894, "learning_rate": 0.0001, "loss": 1.4833, "step": 1970 }, { "epoch": 0.478630403108305, "grad_norm": 0.5882410407066345, "learning_rate": 0.0001, "loss": 1.7442, "step": 1971 }, { "epoch": 0.4788732394366197, "grad_norm": 0.5801606774330139, "learning_rate": 0.0001, "loss": 1.723, "step": 1972 }, { "epoch": 0.4791160757649344, "grad_norm": 0.6176870465278625, "learning_rate": 0.0001, "loss": 1.7344, "step": 1973 }, { "epoch": 0.47935891209324916, "grad_norm": 0.583313524723053, "learning_rate": 0.0001, "loss": 1.6592, "step": 1974 }, { "epoch": 0.4796017484215639, "grad_norm": 0.5627610087394714, "learning_rate": 0.0001, "loss": 1.5799, "step": 1975 }, { "epoch": 0.47984458474987857, "grad_norm": 0.5924595594406128, "learning_rate": 0.0001, "loss": 1.8331, "step": 1976 }, { "epoch": 0.4800874210781933, "grad_norm": 0.6066086888313293, "learning_rate": 0.0001, "loss": 1.8329, "step": 1977 }, { "epoch": 0.48033025740650803, "grad_norm": 0.5486518144607544, "learning_rate": 0.0001, "loss": 1.7316, "step": 1978 }, { "epoch": 0.4805730937348227, "grad_norm": 0.5555726885795593, "learning_rate": 0.0001, "loss": 1.6003, "step": 1979 }, { "epoch": 0.48081593006313744, "grad_norm": 0.6334558725357056, "learning_rate": 0.0001, "loss": 1.8482, "step": 1980 }, { "epoch": 0.4810587663914522, "grad_norm": 0.5876926183700562, "learning_rate": 0.0001, "loss": 1.8004, "step": 1981 }, { "epoch": 0.48130160271976685, "grad_norm": 0.5859856009483337, "learning_rate": 0.0001, "loss": 1.74, "step": 1982 }, { "epoch": 0.4815444390480816, "grad_norm": 0.6316081881523132, "learning_rate": 0.0001, "loss": 2.1124, "step": 1983 }, { "epoch": 0.4817872753763963, "grad_norm": 0.5542670488357544, "learning_rate": 0.0001, "loss": 1.6096, "step": 1984 }, { "epoch": 0.48203011170471105, "grad_norm": 0.6528927683830261, "learning_rate": 0.0001, "loss": 1.7741, "step": 1985 }, { "epoch": 0.48227294803302573, "grad_norm": 0.6087407469749451, "learning_rate": 0.0001, "loss": 1.8804, "step": 1986 }, { "epoch": 0.48251578436134046, "grad_norm": 0.642598032951355, "learning_rate": 0.0001, "loss": 1.8036, "step": 1987 }, { "epoch": 0.4827586206896552, "grad_norm": 0.5882740020751953, "learning_rate": 0.0001, "loss": 1.7058, "step": 1988 }, { "epoch": 0.48300145701796987, "grad_norm": 0.5687981843948364, "learning_rate": 0.0001, "loss": 1.8813, "step": 1989 }, { "epoch": 0.4832442933462846, "grad_norm": 0.5380786061286926, "learning_rate": 0.0001, "loss": 1.5937, "step": 1990 }, { "epoch": 0.48348712967459934, "grad_norm": 0.5477362275123596, "learning_rate": 0.0001, "loss": 1.6059, "step": 1991 }, { "epoch": 0.483729966002914, "grad_norm": 0.5760575532913208, "learning_rate": 0.0001, "loss": 1.8807, "step": 1992 }, { "epoch": 0.48397280233122875, "grad_norm": 0.5555697083473206, "learning_rate": 0.0001, "loss": 1.701, "step": 1993 }, { "epoch": 0.4842156386595435, "grad_norm": 0.5567694306373596, "learning_rate": 0.0001, "loss": 1.8007, "step": 1994 }, { "epoch": 0.48445847498785816, "grad_norm": 0.5891985297203064, "learning_rate": 0.0001, "loss": 1.7212, "step": 1995 }, { "epoch": 0.4847013113161729, "grad_norm": 0.62554532289505, "learning_rate": 0.0001, "loss": 1.8061, "step": 1996 }, { "epoch": 0.4849441476444876, "grad_norm": 0.6055716276168823, "learning_rate": 0.0001, "loss": 1.5304, "step": 1997 }, { "epoch": 0.48518698397280235, "grad_norm": 0.5879396200180054, "learning_rate": 0.0001, "loss": 1.8191, "step": 1998 }, { "epoch": 0.48542982030111703, "grad_norm": 0.5919262170791626, "learning_rate": 0.0001, "loss": 1.8164, "step": 1999 }, { "epoch": 0.48567265662943176, "grad_norm": 0.5956177115440369, "learning_rate": 0.0001, "loss": 1.7904, "step": 2000 }, { "epoch": 0.4859154929577465, "grad_norm": 0.5396212935447693, "learning_rate": 0.0001, "loss": 1.5279, "step": 2001 }, { "epoch": 0.4861583292860612, "grad_norm": 0.5972726941108704, "learning_rate": 0.0001, "loss": 1.7226, "step": 2002 }, { "epoch": 0.4864011656143759, "grad_norm": 0.6242453455924988, "learning_rate": 0.0001, "loss": 1.7777, "step": 2003 }, { "epoch": 0.48664400194269064, "grad_norm": 0.5980928540229797, "learning_rate": 0.0001, "loss": 1.8408, "step": 2004 }, { "epoch": 0.4868868382710053, "grad_norm": 0.5718754529953003, "learning_rate": 0.0001, "loss": 1.8969, "step": 2005 }, { "epoch": 0.48712967459932005, "grad_norm": 0.5300527215003967, "learning_rate": 0.0001, "loss": 1.5773, "step": 2006 }, { "epoch": 0.4873725109276348, "grad_norm": 0.5508219599723816, "learning_rate": 0.0001, "loss": 1.6821, "step": 2007 }, { "epoch": 0.4876153472559495, "grad_norm": 0.5847325325012207, "learning_rate": 0.0001, "loss": 1.8637, "step": 2008 }, { "epoch": 0.4878581835842642, "grad_norm": 0.594556987285614, "learning_rate": 0.0001, "loss": 1.8994, "step": 2009 }, { "epoch": 0.4881010199125789, "grad_norm": 0.5872136950492859, "learning_rate": 0.0001, "loss": 1.8082, "step": 2010 }, { "epoch": 0.48834385624089366, "grad_norm": 0.5755164623260498, "learning_rate": 0.0001, "loss": 1.7051, "step": 2011 }, { "epoch": 0.48858669256920834, "grad_norm": 0.6021437048912048, "learning_rate": 0.0001, "loss": 1.817, "step": 2012 }, { "epoch": 0.48882952889752307, "grad_norm": 0.5879169702529907, "learning_rate": 0.0001, "loss": 1.8699, "step": 2013 }, { "epoch": 0.4890723652258378, "grad_norm": 0.5833615660667419, "learning_rate": 0.0001, "loss": 1.7398, "step": 2014 }, { "epoch": 0.4893152015541525, "grad_norm": 0.5776499509811401, "learning_rate": 0.0001, "loss": 1.7552, "step": 2015 }, { "epoch": 0.4895580378824672, "grad_norm": 0.5966989398002625, "learning_rate": 0.0001, "loss": 1.9394, "step": 2016 }, { "epoch": 0.48980087421078194, "grad_norm": 0.596585750579834, "learning_rate": 0.0001, "loss": 1.7152, "step": 2017 }, { "epoch": 0.4900437105390966, "grad_norm": 0.6130557060241699, "learning_rate": 0.0001, "loss": 1.7157, "step": 2018 }, { "epoch": 0.49028654686741135, "grad_norm": 0.5656613707542419, "learning_rate": 0.0001, "loss": 1.6318, "step": 2019 }, { "epoch": 0.4905293831957261, "grad_norm": 0.5924555659294128, "learning_rate": 0.0001, "loss": 1.7926, "step": 2020 }, { "epoch": 0.4907722195240408, "grad_norm": 0.5761740207672119, "learning_rate": 0.0001, "loss": 1.8054, "step": 2021 }, { "epoch": 0.4910150558523555, "grad_norm": 0.5992533564567566, "learning_rate": 0.0001, "loss": 1.8102, "step": 2022 }, { "epoch": 0.49125789218067023, "grad_norm": 0.5910313129425049, "learning_rate": 0.0001, "loss": 1.8815, "step": 2023 }, { "epoch": 0.49150072850898496, "grad_norm": 0.5652232766151428, "learning_rate": 0.0001, "loss": 1.729, "step": 2024 }, { "epoch": 0.49174356483729964, "grad_norm": 0.5745852589607239, "learning_rate": 0.0001, "loss": 1.8081, "step": 2025 }, { "epoch": 0.4919864011656144, "grad_norm": 0.5289776921272278, "learning_rate": 0.0001, "loss": 1.7566, "step": 2026 }, { "epoch": 0.4922292374939291, "grad_norm": 0.5878112316131592, "learning_rate": 0.0001, "loss": 1.7305, "step": 2027 }, { "epoch": 0.4924720738222438, "grad_norm": 0.5823032259941101, "learning_rate": 0.0001, "loss": 1.7078, "step": 2028 }, { "epoch": 0.4927149101505585, "grad_norm": 0.5838625431060791, "learning_rate": 0.0001, "loss": 1.8703, "step": 2029 }, { "epoch": 0.49295774647887325, "grad_norm": 0.578701913356781, "learning_rate": 0.0001, "loss": 1.7124, "step": 2030 }, { "epoch": 0.493200582807188, "grad_norm": 0.5995233058929443, "learning_rate": 0.0001, "loss": 1.768, "step": 2031 }, { "epoch": 0.49344341913550266, "grad_norm": 0.5891994833946228, "learning_rate": 0.0001, "loss": 1.8702, "step": 2032 }, { "epoch": 0.4936862554638174, "grad_norm": 0.565297544002533, "learning_rate": 0.0001, "loss": 1.422, "step": 2033 }, { "epoch": 0.4939290917921321, "grad_norm": 0.563851535320282, "learning_rate": 0.0001, "loss": 1.7395, "step": 2034 }, { "epoch": 0.4941719281204468, "grad_norm": 0.5984541177749634, "learning_rate": 0.0001, "loss": 1.9099, "step": 2035 }, { "epoch": 0.49441476444876153, "grad_norm": 0.5796719789505005, "learning_rate": 0.0001, "loss": 1.7586, "step": 2036 }, { "epoch": 0.49465760077707627, "grad_norm": 0.6082152128219604, "learning_rate": 0.0001, "loss": 1.8833, "step": 2037 }, { "epoch": 0.49490043710539094, "grad_norm": 0.5553922057151794, "learning_rate": 0.0001, "loss": 1.6964, "step": 2038 }, { "epoch": 0.4951432734337057, "grad_norm": 0.5671054720878601, "learning_rate": 0.0001, "loss": 1.7251, "step": 2039 }, { "epoch": 0.4953861097620204, "grad_norm": 0.6090965270996094, "learning_rate": 0.0001, "loss": 1.7635, "step": 2040 }, { "epoch": 0.4956289460903351, "grad_norm": 0.5869699120521545, "learning_rate": 0.0001, "loss": 1.7948, "step": 2041 }, { "epoch": 0.4958717824186498, "grad_norm": 0.5642756819725037, "learning_rate": 0.0001, "loss": 1.8111, "step": 2042 }, { "epoch": 0.49611461874696455, "grad_norm": 0.6093323230743408, "learning_rate": 0.0001, "loss": 1.77, "step": 2043 }, { "epoch": 0.4963574550752793, "grad_norm": 0.6150794625282288, "learning_rate": 0.0001, "loss": 1.7954, "step": 2044 }, { "epoch": 0.49660029140359396, "grad_norm": 0.6348615884780884, "learning_rate": 0.0001, "loss": 1.7424, "step": 2045 }, { "epoch": 0.4968431277319087, "grad_norm": 0.6043843626976013, "learning_rate": 0.0001, "loss": 1.7093, "step": 2046 }, { "epoch": 0.49708596406022343, "grad_norm": 0.5615968108177185, "learning_rate": 0.0001, "loss": 1.7402, "step": 2047 }, { "epoch": 0.4973288003885381, "grad_norm": 0.552333414554596, "learning_rate": 0.0001, "loss": 1.6705, "step": 2048 }, { "epoch": 0.49757163671685284, "grad_norm": 0.5569483637809753, "learning_rate": 0.0001, "loss": 1.7713, "step": 2049 }, { "epoch": 0.49781447304516757, "grad_norm": 0.637229859828949, "learning_rate": 0.0001, "loss": 1.8701, "step": 2050 }, { "epoch": 0.49805730937348225, "grad_norm": 0.5754112005233765, "learning_rate": 0.0001, "loss": 1.829, "step": 2051 }, { "epoch": 0.498300145701797, "grad_norm": 0.5642938017845154, "learning_rate": 0.0001, "loss": 1.6451, "step": 2052 }, { "epoch": 0.4985429820301117, "grad_norm": 0.5582892894744873, "learning_rate": 0.0001, "loss": 1.5854, "step": 2053 }, { "epoch": 0.49878581835842645, "grad_norm": 0.5798816084861755, "learning_rate": 0.0001, "loss": 1.6651, "step": 2054 }, { "epoch": 0.4990286546867411, "grad_norm": 0.5920463800430298, "learning_rate": 0.0001, "loss": 1.6858, "step": 2055 }, { "epoch": 0.49927149101505586, "grad_norm": 0.5674306154251099, "learning_rate": 0.0001, "loss": 1.6466, "step": 2056 }, { "epoch": 0.4995143273433706, "grad_norm": 0.5592432618141174, "learning_rate": 0.0001, "loss": 1.755, "step": 2057 }, { "epoch": 0.49975716367168527, "grad_norm": 0.5635419487953186, "learning_rate": 0.0001, "loss": 1.7218, "step": 2058 }, { "epoch": 0.5, "grad_norm": 0.5989596843719482, "learning_rate": 0.0001, "loss": 1.7961, "step": 2059 }, { "epoch": 0.5002428363283147, "grad_norm": 0.5399906635284424, "learning_rate": 0.0001, "loss": 1.6511, "step": 2060 }, { "epoch": 0.5004856726566295, "grad_norm": 0.5654640793800354, "learning_rate": 0.0001, "loss": 1.6494, "step": 2061 }, { "epoch": 0.5007285089849441, "grad_norm": 0.5929504036903381, "learning_rate": 0.0001, "loss": 1.783, "step": 2062 }, { "epoch": 0.5009713453132588, "grad_norm": 0.5613128542900085, "learning_rate": 0.0001, "loss": 1.7739, "step": 2063 }, { "epoch": 0.5012141816415736, "grad_norm": 0.5874308347702026, "learning_rate": 0.0001, "loss": 1.6707, "step": 2064 }, { "epoch": 0.5014570179698883, "grad_norm": 0.5515894889831543, "learning_rate": 0.0001, "loss": 1.7933, "step": 2065 }, { "epoch": 0.501699854298203, "grad_norm": 0.5673650503158569, "learning_rate": 0.0001, "loss": 1.6298, "step": 2066 }, { "epoch": 0.5019426906265178, "grad_norm": 0.5600554347038269, "learning_rate": 0.0001, "loss": 1.7876, "step": 2067 }, { "epoch": 0.5021855269548324, "grad_norm": 0.5556074380874634, "learning_rate": 0.0001, "loss": 1.7885, "step": 2068 }, { "epoch": 0.5024283632831471, "grad_norm": 0.5716009140014648, "learning_rate": 0.0001, "loss": 1.661, "step": 2069 }, { "epoch": 0.5026711996114619, "grad_norm": 0.5660889744758606, "learning_rate": 0.0001, "loss": 1.8036, "step": 2070 }, { "epoch": 0.5029140359397766, "grad_norm": 0.6146003007888794, "learning_rate": 0.0001, "loss": 1.746, "step": 2071 }, { "epoch": 0.5031568722680914, "grad_norm": 0.5939356088638306, "learning_rate": 0.0001, "loss": 1.7597, "step": 2072 }, { "epoch": 0.503399708596406, "grad_norm": 0.5672742128372192, "learning_rate": 0.0001, "loss": 1.6675, "step": 2073 }, { "epoch": 0.5036425449247207, "grad_norm": 0.5487650036811829, "learning_rate": 0.0001, "loss": 1.716, "step": 2074 }, { "epoch": 0.5038853812530355, "grad_norm": 0.6096526980400085, "learning_rate": 0.0001, "loss": 1.8213, "step": 2075 }, { "epoch": 0.5041282175813502, "grad_norm": 0.587897539138794, "learning_rate": 0.0001, "loss": 1.6624, "step": 2076 }, { "epoch": 0.5043710539096649, "grad_norm": 0.5733286142349243, "learning_rate": 0.0001, "loss": 1.7982, "step": 2077 }, { "epoch": 0.5046138902379796, "grad_norm": 0.6002563834190369, "learning_rate": 0.0001, "loss": 1.7007, "step": 2078 }, { "epoch": 0.5048567265662943, "grad_norm": 0.5647587180137634, "learning_rate": 0.0001, "loss": 1.7711, "step": 2079 }, { "epoch": 0.505099562894609, "grad_norm": 0.5878034830093384, "learning_rate": 0.0001, "loss": 1.6718, "step": 2080 }, { "epoch": 0.5053423992229238, "grad_norm": 0.51972895860672, "learning_rate": 0.0001, "loss": 1.5546, "step": 2081 }, { "epoch": 0.5055852355512385, "grad_norm": 0.5663842558860779, "learning_rate": 0.0001, "loss": 1.695, "step": 2082 }, { "epoch": 0.5058280718795531, "grad_norm": 0.6086951494216919, "learning_rate": 0.0001, "loss": 1.7639, "step": 2083 }, { "epoch": 0.5060709082078679, "grad_norm": 0.5852351784706116, "learning_rate": 0.0001, "loss": 1.769, "step": 2084 }, { "epoch": 0.5063137445361826, "grad_norm": 0.5884329676628113, "learning_rate": 0.0001, "loss": 1.7277, "step": 2085 }, { "epoch": 0.5065565808644973, "grad_norm": 0.5802691578865051, "learning_rate": 0.0001, "loss": 1.7907, "step": 2086 }, { "epoch": 0.5067994171928121, "grad_norm": 0.6157693266868591, "learning_rate": 0.0001, "loss": 1.7991, "step": 2087 }, { "epoch": 0.5070422535211268, "grad_norm": 0.5663011074066162, "learning_rate": 0.0001, "loss": 1.7187, "step": 2088 }, { "epoch": 0.5072850898494414, "grad_norm": 0.5462061762809753, "learning_rate": 0.0001, "loss": 1.6085, "step": 2089 }, { "epoch": 0.5075279261777562, "grad_norm": 0.6452285051345825, "learning_rate": 0.0001, "loss": 1.7736, "step": 2090 }, { "epoch": 0.5077707625060709, "grad_norm": 0.6200202703475952, "learning_rate": 0.0001, "loss": 1.7275, "step": 2091 }, { "epoch": 0.5080135988343856, "grad_norm": 0.5755290389060974, "learning_rate": 0.0001, "loss": 1.6922, "step": 2092 }, { "epoch": 0.5082564351627004, "grad_norm": 0.5852632522583008, "learning_rate": 0.0001, "loss": 1.7152, "step": 2093 }, { "epoch": 0.508499271491015, "grad_norm": 0.561608612537384, "learning_rate": 0.0001, "loss": 1.6539, "step": 2094 }, { "epoch": 0.5087421078193298, "grad_norm": 0.5299578309059143, "learning_rate": 0.0001, "loss": 1.5823, "step": 2095 }, { "epoch": 0.5089849441476445, "grad_norm": 0.5728558897972107, "learning_rate": 0.0001, "loss": 1.7869, "step": 2096 }, { "epoch": 0.5092277804759592, "grad_norm": 0.5560303926467896, "learning_rate": 0.0001, "loss": 1.7693, "step": 2097 }, { "epoch": 0.509470616804274, "grad_norm": 0.6147969365119934, "learning_rate": 0.0001, "loss": 1.6741, "step": 2098 }, { "epoch": 0.5097134531325886, "grad_norm": 0.5178264379501343, "learning_rate": 0.0001, "loss": 1.533, "step": 2099 }, { "epoch": 0.5099562894609033, "grad_norm": 0.550970196723938, "learning_rate": 0.0001, "loss": 1.499, "step": 2100 }, { "epoch": 0.5101991257892181, "grad_norm": 0.5818623900413513, "learning_rate": 0.0001, "loss": 1.6929, "step": 2101 }, { "epoch": 0.5104419621175328, "grad_norm": 0.5710617899894714, "learning_rate": 0.0001, "loss": 1.7024, "step": 2102 }, { "epoch": 0.5106847984458475, "grad_norm": 0.629630982875824, "learning_rate": 0.0001, "loss": 1.9133, "step": 2103 }, { "epoch": 0.5109276347741623, "grad_norm": 0.558758556842804, "learning_rate": 0.0001, "loss": 1.5847, "step": 2104 }, { "epoch": 0.5111704711024769, "grad_norm": 0.5871068239212036, "learning_rate": 0.0001, "loss": 1.7599, "step": 2105 }, { "epoch": 0.5114133074307916, "grad_norm": 0.5836141109466553, "learning_rate": 0.0001, "loss": 1.8942, "step": 2106 }, { "epoch": 0.5116561437591064, "grad_norm": 0.563770055770874, "learning_rate": 0.0001, "loss": 1.5984, "step": 2107 }, { "epoch": 0.5118989800874211, "grad_norm": 0.5678231120109558, "learning_rate": 0.0001, "loss": 1.6216, "step": 2108 }, { "epoch": 0.5121418164157358, "grad_norm": 0.5830565094947815, "learning_rate": 0.0001, "loss": 1.6233, "step": 2109 }, { "epoch": 0.5123846527440505, "grad_norm": 0.5920232534408569, "learning_rate": 0.0001, "loss": 1.8429, "step": 2110 }, { "epoch": 0.5126274890723652, "grad_norm": 0.5994880199432373, "learning_rate": 0.0001, "loss": 1.7464, "step": 2111 }, { "epoch": 0.5128703254006799, "grad_norm": 0.5636540651321411, "learning_rate": 0.0001, "loss": 1.7334, "step": 2112 }, { "epoch": 0.5131131617289947, "grad_norm": 0.5844842195510864, "learning_rate": 0.0001, "loss": 1.7154, "step": 2113 }, { "epoch": 0.5133559980573094, "grad_norm": 0.5925378203392029, "learning_rate": 0.0001, "loss": 1.8161, "step": 2114 }, { "epoch": 0.513598834385624, "grad_norm": 0.5761093497276306, "learning_rate": 0.0001, "loss": 1.5473, "step": 2115 }, { "epoch": 0.5138416707139388, "grad_norm": 0.5909484028816223, "learning_rate": 0.0001, "loss": 1.8494, "step": 2116 }, { "epoch": 0.5140845070422535, "grad_norm": 0.575232982635498, "learning_rate": 0.0001, "loss": 1.6683, "step": 2117 }, { "epoch": 0.5143273433705683, "grad_norm": 0.608296811580658, "learning_rate": 0.0001, "loss": 2.0095, "step": 2118 }, { "epoch": 0.514570179698883, "grad_norm": 0.5402919054031372, "learning_rate": 0.0001, "loss": 1.6554, "step": 2119 }, { "epoch": 0.5148130160271976, "grad_norm": 0.6317681074142456, "learning_rate": 0.0001, "loss": 1.8573, "step": 2120 }, { "epoch": 0.5150558523555124, "grad_norm": 0.545809268951416, "learning_rate": 0.0001, "loss": 1.7084, "step": 2121 }, { "epoch": 0.5152986886838271, "grad_norm": 0.5815280675888062, "learning_rate": 0.0001, "loss": 1.7033, "step": 2122 }, { "epoch": 0.5155415250121418, "grad_norm": 0.6006367206573486, "learning_rate": 0.0001, "loss": 1.8273, "step": 2123 }, { "epoch": 0.5157843613404566, "grad_norm": 0.6038702130317688, "learning_rate": 0.0001, "loss": 1.7826, "step": 2124 }, { "epoch": 0.5160271976687713, "grad_norm": 0.5876898169517517, "learning_rate": 0.0001, "loss": 1.7943, "step": 2125 }, { "epoch": 0.5162700339970859, "grad_norm": 0.5861945152282715, "learning_rate": 0.0001, "loss": 1.8508, "step": 2126 }, { "epoch": 0.5165128703254007, "grad_norm": 0.5722491145133972, "learning_rate": 0.0001, "loss": 1.6205, "step": 2127 }, { "epoch": 0.5167557066537154, "grad_norm": 0.6067075729370117, "learning_rate": 0.0001, "loss": 1.8338, "step": 2128 }, { "epoch": 0.5169985429820301, "grad_norm": 0.5551809668540955, "learning_rate": 0.0001, "loss": 1.7478, "step": 2129 }, { "epoch": 0.5172413793103449, "grad_norm": 0.602811336517334, "learning_rate": 0.0001, "loss": 1.7956, "step": 2130 }, { "epoch": 0.5174842156386595, "grad_norm": 0.5505642890930176, "learning_rate": 0.0001, "loss": 1.6198, "step": 2131 }, { "epoch": 0.5177270519669742, "grad_norm": 0.5739842057228088, "learning_rate": 0.0001, "loss": 1.7405, "step": 2132 }, { "epoch": 0.517969888295289, "grad_norm": 0.6024134755134583, "learning_rate": 0.0001, "loss": 1.8489, "step": 2133 }, { "epoch": 0.5182127246236037, "grad_norm": 0.533274233341217, "learning_rate": 0.0001, "loss": 1.615, "step": 2134 }, { "epoch": 0.5184555609519184, "grad_norm": 0.5912434458732605, "learning_rate": 0.0001, "loss": 1.7793, "step": 2135 }, { "epoch": 0.5186983972802331, "grad_norm": 0.5838918089866638, "learning_rate": 0.0001, "loss": 1.824, "step": 2136 }, { "epoch": 0.5189412336085478, "grad_norm": 0.5628232359886169, "learning_rate": 0.0001, "loss": 1.7335, "step": 2137 }, { "epoch": 0.5191840699368625, "grad_norm": 0.571304202079773, "learning_rate": 0.0001, "loss": 1.8103, "step": 2138 }, { "epoch": 0.5194269062651773, "grad_norm": 0.5905882120132446, "learning_rate": 0.0001, "loss": 1.8236, "step": 2139 }, { "epoch": 0.519669742593492, "grad_norm": 0.5618475079536438, "learning_rate": 0.0001, "loss": 1.7559, "step": 2140 }, { "epoch": 0.5199125789218068, "grad_norm": 0.6003167629241943, "learning_rate": 0.0001, "loss": 1.7246, "step": 2141 }, { "epoch": 0.5201554152501214, "grad_norm": 0.6050346493721008, "learning_rate": 0.0001, "loss": 1.7486, "step": 2142 }, { "epoch": 0.5203982515784361, "grad_norm": 0.5786466598510742, "learning_rate": 0.0001, "loss": 1.7552, "step": 2143 }, { "epoch": 0.5206410879067509, "grad_norm": 0.5782291889190674, "learning_rate": 0.0001, "loss": 1.7007, "step": 2144 }, { "epoch": 0.5208839242350656, "grad_norm": 0.6254963278770447, "learning_rate": 0.0001, "loss": 1.6921, "step": 2145 }, { "epoch": 0.5211267605633803, "grad_norm": 0.6038153767585754, "learning_rate": 0.0001, "loss": 1.7569, "step": 2146 }, { "epoch": 0.521369596891695, "grad_norm": 0.6252439022064209, "learning_rate": 0.0001, "loss": 1.831, "step": 2147 }, { "epoch": 0.5216124332200097, "grad_norm": 0.5462331175804138, "learning_rate": 0.0001, "loss": 1.6698, "step": 2148 }, { "epoch": 0.5218552695483244, "grad_norm": 0.569652259349823, "learning_rate": 0.0001, "loss": 1.7232, "step": 2149 }, { "epoch": 0.5220981058766392, "grad_norm": 0.6135126352310181, "learning_rate": 0.0001, "loss": 1.8183, "step": 2150 }, { "epoch": 0.5223409422049539, "grad_norm": 0.6126598715782166, "learning_rate": 0.0001, "loss": 1.768, "step": 2151 }, { "epoch": 0.5225837785332685, "grad_norm": 0.5486062169075012, "learning_rate": 0.0001, "loss": 1.5589, "step": 2152 }, { "epoch": 0.5228266148615833, "grad_norm": 0.5582675933837891, "learning_rate": 0.0001, "loss": 1.5566, "step": 2153 }, { "epoch": 0.523069451189898, "grad_norm": 0.6054219603538513, "learning_rate": 0.0001, "loss": 1.6489, "step": 2154 }, { "epoch": 0.5233122875182127, "grad_norm": 0.5597957372665405, "learning_rate": 0.0001, "loss": 1.7998, "step": 2155 }, { "epoch": 0.5235551238465275, "grad_norm": 0.5427129864692688, "learning_rate": 0.0001, "loss": 1.6535, "step": 2156 }, { "epoch": 0.5237979601748421, "grad_norm": 0.57682865858078, "learning_rate": 0.0001, "loss": 1.7161, "step": 2157 }, { "epoch": 0.5240407965031568, "grad_norm": 0.5827271938323975, "learning_rate": 0.0001, "loss": 1.8182, "step": 2158 }, { "epoch": 0.5242836328314716, "grad_norm": 0.586132287979126, "learning_rate": 0.0001, "loss": 1.7707, "step": 2159 }, { "epoch": 0.5245264691597863, "grad_norm": 0.5706914663314819, "learning_rate": 0.0001, "loss": 1.6457, "step": 2160 }, { "epoch": 0.524769305488101, "grad_norm": 0.5372036695480347, "learning_rate": 0.0001, "loss": 1.676, "step": 2161 }, { "epoch": 0.5250121418164158, "grad_norm": 0.5486818552017212, "learning_rate": 0.0001, "loss": 1.6701, "step": 2162 }, { "epoch": 0.5252549781447304, "grad_norm": 0.5342246294021606, "learning_rate": 0.0001, "loss": 1.5333, "step": 2163 }, { "epoch": 0.5254978144730452, "grad_norm": 0.5594509243965149, "learning_rate": 0.0001, "loss": 1.6603, "step": 2164 }, { "epoch": 0.5257406508013599, "grad_norm": 0.6321483850479126, "learning_rate": 0.0001, "loss": 1.8696, "step": 2165 }, { "epoch": 0.5259834871296746, "grad_norm": 0.5574361085891724, "learning_rate": 0.0001, "loss": 1.6907, "step": 2166 }, { "epoch": 0.5262263234579894, "grad_norm": 0.6299461126327515, "learning_rate": 0.0001, "loss": 1.905, "step": 2167 }, { "epoch": 0.526469159786304, "grad_norm": 0.5777122378349304, "learning_rate": 0.0001, "loss": 1.7181, "step": 2168 }, { "epoch": 0.5267119961146187, "grad_norm": 0.5658559799194336, "learning_rate": 0.0001, "loss": 1.7185, "step": 2169 }, { "epoch": 0.5269548324429335, "grad_norm": 0.5544305443763733, "learning_rate": 0.0001, "loss": 1.7855, "step": 2170 }, { "epoch": 0.5271976687712482, "grad_norm": 0.5999164581298828, "learning_rate": 0.0001, "loss": 1.823, "step": 2171 }, { "epoch": 0.5274405050995629, "grad_norm": 0.5797856450080872, "learning_rate": 0.0001, "loss": 1.7401, "step": 2172 }, { "epoch": 0.5276833414278777, "grad_norm": 0.59946209192276, "learning_rate": 0.0001, "loss": 1.8566, "step": 2173 }, { "epoch": 0.5279261777561923, "grad_norm": 0.572341799736023, "learning_rate": 0.0001, "loss": 1.7087, "step": 2174 }, { "epoch": 0.528169014084507, "grad_norm": 0.537335991859436, "learning_rate": 0.0001, "loss": 1.6388, "step": 2175 }, { "epoch": 0.5284118504128218, "grad_norm": 0.5918770432472229, "learning_rate": 0.0001, "loss": 1.7216, "step": 2176 }, { "epoch": 0.5286546867411365, "grad_norm": 0.5716724991798401, "learning_rate": 0.0001, "loss": 1.7665, "step": 2177 }, { "epoch": 0.5288975230694511, "grad_norm": 0.5834047198295593, "learning_rate": 0.0001, "loss": 1.6417, "step": 2178 }, { "epoch": 0.5291403593977659, "grad_norm": 0.6267532110214233, "learning_rate": 0.0001, "loss": 1.99, "step": 2179 }, { "epoch": 0.5293831957260806, "grad_norm": 0.6181809306144714, "learning_rate": 0.0001, "loss": 1.9405, "step": 2180 }, { "epoch": 0.5296260320543953, "grad_norm": 0.5811281800270081, "learning_rate": 0.0001, "loss": 1.6438, "step": 2181 }, { "epoch": 0.5298688683827101, "grad_norm": 0.5770442485809326, "learning_rate": 0.0001, "loss": 1.8281, "step": 2182 }, { "epoch": 0.5301117047110248, "grad_norm": 0.5526422262191772, "learning_rate": 0.0001, "loss": 1.7686, "step": 2183 }, { "epoch": 0.5303545410393394, "grad_norm": 0.579846978187561, "learning_rate": 0.0001, "loss": 1.8797, "step": 2184 }, { "epoch": 0.5305973773676542, "grad_norm": 0.5629196166992188, "learning_rate": 0.0001, "loss": 1.728, "step": 2185 }, { "epoch": 0.5308402136959689, "grad_norm": 0.554466962814331, "learning_rate": 0.0001, "loss": 1.5575, "step": 2186 }, { "epoch": 0.5310830500242837, "grad_norm": 0.6198550462722778, "learning_rate": 0.0001, "loss": 1.7833, "step": 2187 }, { "epoch": 0.5313258863525984, "grad_norm": 0.5318722724914551, "learning_rate": 0.0001, "loss": 1.5167, "step": 2188 }, { "epoch": 0.531568722680913, "grad_norm": 0.6216217875480652, "learning_rate": 0.0001, "loss": 1.8666, "step": 2189 }, { "epoch": 0.5318115590092278, "grad_norm": 0.5881581902503967, "learning_rate": 0.0001, "loss": 1.783, "step": 2190 }, { "epoch": 0.5320543953375425, "grad_norm": 0.5604850649833679, "learning_rate": 0.0001, "loss": 1.68, "step": 2191 }, { "epoch": 0.5322972316658572, "grad_norm": 0.6313543319702148, "learning_rate": 0.0001, "loss": 1.9791, "step": 2192 }, { "epoch": 0.532540067994172, "grad_norm": 0.5774602890014648, "learning_rate": 0.0001, "loss": 1.8134, "step": 2193 }, { "epoch": 0.5327829043224867, "grad_norm": 0.6007572412490845, "learning_rate": 0.0001, "loss": 1.8626, "step": 2194 }, { "epoch": 0.5330257406508013, "grad_norm": 0.5974249839782715, "learning_rate": 0.0001, "loss": 1.8335, "step": 2195 }, { "epoch": 0.5332685769791161, "grad_norm": 0.5923995971679688, "learning_rate": 0.0001, "loss": 1.9244, "step": 2196 }, { "epoch": 0.5335114133074308, "grad_norm": 0.5441774725914001, "learning_rate": 0.0001, "loss": 1.7939, "step": 2197 }, { "epoch": 0.5337542496357455, "grad_norm": 0.580784022808075, "learning_rate": 0.0001, "loss": 1.7619, "step": 2198 }, { "epoch": 0.5339970859640603, "grad_norm": 0.5619747638702393, "learning_rate": 0.0001, "loss": 1.8213, "step": 2199 }, { "epoch": 0.5342399222923749, "grad_norm": 0.5800536274909973, "learning_rate": 0.0001, "loss": 1.9446, "step": 2200 }, { "epoch": 0.5344827586206896, "grad_norm": 0.6046760678291321, "learning_rate": 0.0001, "loss": 1.7507, "step": 2201 }, { "epoch": 0.5347255949490044, "grad_norm": 0.5831290483474731, "learning_rate": 0.0001, "loss": 1.8288, "step": 2202 }, { "epoch": 0.5349684312773191, "grad_norm": 0.5606281757354736, "learning_rate": 0.0001, "loss": 1.6564, "step": 2203 }, { "epoch": 0.5352112676056338, "grad_norm": 0.5448996424674988, "learning_rate": 0.0001, "loss": 1.6451, "step": 2204 }, { "epoch": 0.5354541039339485, "grad_norm": 0.5596613883972168, "learning_rate": 0.0001, "loss": 1.6321, "step": 2205 }, { "epoch": 0.5356969402622632, "grad_norm": 0.5989713072776794, "learning_rate": 0.0001, "loss": 1.8789, "step": 2206 }, { "epoch": 0.5359397765905779, "grad_norm": 0.6066573858261108, "learning_rate": 0.0001, "loss": 1.4983, "step": 2207 }, { "epoch": 0.5361826129188927, "grad_norm": 0.5666881799697876, "learning_rate": 0.0001, "loss": 1.6933, "step": 2208 }, { "epoch": 0.5364254492472074, "grad_norm": 0.5524195432662964, "learning_rate": 0.0001, "loss": 1.762, "step": 2209 }, { "epoch": 0.5366682855755222, "grad_norm": 0.6427804231643677, "learning_rate": 0.0001, "loss": 1.921, "step": 2210 }, { "epoch": 0.5369111219038368, "grad_norm": 0.6162734627723694, "learning_rate": 0.0001, "loss": 1.8159, "step": 2211 }, { "epoch": 0.5371539582321515, "grad_norm": 0.55301833152771, "learning_rate": 0.0001, "loss": 1.7398, "step": 2212 }, { "epoch": 0.5373967945604663, "grad_norm": 0.5881041288375854, "learning_rate": 0.0001, "loss": 1.6758, "step": 2213 }, { "epoch": 0.537639630888781, "grad_norm": 0.6024550199508667, "learning_rate": 0.0001, "loss": 1.8827, "step": 2214 }, { "epoch": 0.5378824672170957, "grad_norm": 0.515755295753479, "learning_rate": 0.0001, "loss": 1.5354, "step": 2215 }, { "epoch": 0.5381253035454104, "grad_norm": 0.6048948168754578, "learning_rate": 0.0001, "loss": 1.7744, "step": 2216 }, { "epoch": 0.5383681398737251, "grad_norm": 0.5852948427200317, "learning_rate": 0.0001, "loss": 1.7616, "step": 2217 }, { "epoch": 0.5386109762020398, "grad_norm": 0.5605524182319641, "learning_rate": 0.0001, "loss": 1.6569, "step": 2218 }, { "epoch": 0.5388538125303546, "grad_norm": 0.5623579621315002, "learning_rate": 0.0001, "loss": 1.6597, "step": 2219 }, { "epoch": 0.5390966488586693, "grad_norm": 0.5543200373649597, "learning_rate": 0.0001, "loss": 1.6099, "step": 2220 }, { "epoch": 0.5393394851869839, "grad_norm": 0.5789135098457336, "learning_rate": 0.0001, "loss": 1.6436, "step": 2221 }, { "epoch": 0.5395823215152987, "grad_norm": 0.574054479598999, "learning_rate": 0.0001, "loss": 1.6047, "step": 2222 }, { "epoch": 0.5398251578436134, "grad_norm": 0.615116536617279, "learning_rate": 0.0001, "loss": 1.8487, "step": 2223 }, { "epoch": 0.5400679941719281, "grad_norm": 0.5967799425125122, "learning_rate": 0.0001, "loss": 1.8072, "step": 2224 }, { "epoch": 0.5403108305002429, "grad_norm": 0.5706263780593872, "learning_rate": 0.0001, "loss": 1.6205, "step": 2225 }, { "epoch": 0.5405536668285575, "grad_norm": 0.6475127339363098, "learning_rate": 0.0001, "loss": 1.5781, "step": 2226 }, { "epoch": 0.5407965031568722, "grad_norm": 0.6100888848304749, "learning_rate": 0.0001, "loss": 1.8501, "step": 2227 }, { "epoch": 0.541039339485187, "grad_norm": 0.57435542345047, "learning_rate": 0.0001, "loss": 1.7222, "step": 2228 }, { "epoch": 0.5412821758135017, "grad_norm": 0.6083694100379944, "learning_rate": 0.0001, "loss": 1.6986, "step": 2229 }, { "epoch": 0.5415250121418164, "grad_norm": 0.5984795093536377, "learning_rate": 0.0001, "loss": 1.8236, "step": 2230 }, { "epoch": 0.5417678484701312, "grad_norm": 0.5543849468231201, "learning_rate": 0.0001, "loss": 1.5892, "step": 2231 }, { "epoch": 0.5420106847984458, "grad_norm": 0.5587611794471741, "learning_rate": 0.0001, "loss": 1.771, "step": 2232 }, { "epoch": 0.5422535211267606, "grad_norm": 0.5826887488365173, "learning_rate": 0.0001, "loss": 1.7288, "step": 2233 }, { "epoch": 0.5424963574550753, "grad_norm": 0.5647484660148621, "learning_rate": 0.0001, "loss": 1.8202, "step": 2234 }, { "epoch": 0.54273919378339, "grad_norm": 0.5916162133216858, "learning_rate": 0.0001, "loss": 1.7418, "step": 2235 }, { "epoch": 0.5429820301117048, "grad_norm": 0.5663244724273682, "learning_rate": 0.0001, "loss": 1.8817, "step": 2236 }, { "epoch": 0.5432248664400194, "grad_norm": 0.6044777631759644, "learning_rate": 0.0001, "loss": 1.8875, "step": 2237 }, { "epoch": 0.5434677027683341, "grad_norm": 0.5563994646072388, "learning_rate": 0.0001, "loss": 1.6417, "step": 2238 }, { "epoch": 0.5437105390966489, "grad_norm": 0.5902057886123657, "learning_rate": 0.0001, "loss": 1.8409, "step": 2239 }, { "epoch": 0.5439533754249636, "grad_norm": 0.5353649854660034, "learning_rate": 0.0001, "loss": 1.6133, "step": 2240 }, { "epoch": 0.5441962117532783, "grad_norm": 0.574381947517395, "learning_rate": 0.0001, "loss": 1.7432, "step": 2241 }, { "epoch": 0.544439048081593, "grad_norm": 0.5697255730628967, "learning_rate": 0.0001, "loss": 1.7109, "step": 2242 }, { "epoch": 0.5446818844099077, "grad_norm": 0.5618753433227539, "learning_rate": 0.0001, "loss": 1.63, "step": 2243 }, { "epoch": 0.5449247207382224, "grad_norm": 0.6283233761787415, "learning_rate": 0.0001, "loss": 1.849, "step": 2244 }, { "epoch": 0.5451675570665372, "grad_norm": 0.572043240070343, "learning_rate": 0.0001, "loss": 1.6105, "step": 2245 }, { "epoch": 0.5454103933948519, "grad_norm": 0.5859431624412537, "learning_rate": 0.0001, "loss": 1.7307, "step": 2246 }, { "epoch": 0.5456532297231665, "grad_norm": 0.5741754770278931, "learning_rate": 0.0001, "loss": 1.6827, "step": 2247 }, { "epoch": 0.5458960660514813, "grad_norm": 0.5581367611885071, "learning_rate": 0.0001, "loss": 1.5555, "step": 2248 }, { "epoch": 0.546138902379796, "grad_norm": 0.5916844606399536, "learning_rate": 0.0001, "loss": 1.8795, "step": 2249 }, { "epoch": 0.5463817387081107, "grad_norm": 0.5180323123931885, "learning_rate": 0.0001, "loss": 1.5186, "step": 2250 }, { "epoch": 0.5466245750364255, "grad_norm": 0.516656219959259, "learning_rate": 0.0001, "loss": 1.4862, "step": 2251 }, { "epoch": 0.5468674113647402, "grad_norm": 0.5897921919822693, "learning_rate": 0.0001, "loss": 1.6883, "step": 2252 }, { "epoch": 0.5471102476930548, "grad_norm": 0.5344215035438538, "learning_rate": 0.0001, "loss": 1.5138, "step": 2253 }, { "epoch": 0.5473530840213696, "grad_norm": 0.5945500731468201, "learning_rate": 0.0001, "loss": 1.7989, "step": 2254 }, { "epoch": 0.5475959203496843, "grad_norm": 0.5791717767715454, "learning_rate": 0.0001, "loss": 1.7545, "step": 2255 }, { "epoch": 0.5478387566779991, "grad_norm": 0.591839075088501, "learning_rate": 0.0001, "loss": 1.8236, "step": 2256 }, { "epoch": 0.5480815930063138, "grad_norm": 0.5432397127151489, "learning_rate": 0.0001, "loss": 1.6173, "step": 2257 }, { "epoch": 0.5483244293346284, "grad_norm": 0.5551950335502625, "learning_rate": 0.0001, "loss": 1.6454, "step": 2258 }, { "epoch": 0.5485672656629432, "grad_norm": 0.5756182670593262, "learning_rate": 0.0001, "loss": 1.7265, "step": 2259 }, { "epoch": 0.5488101019912579, "grad_norm": 0.5714510679244995, "learning_rate": 0.0001, "loss": 1.7944, "step": 2260 }, { "epoch": 0.5490529383195726, "grad_norm": 0.5553797483444214, "learning_rate": 0.0001, "loss": 1.6329, "step": 2261 }, { "epoch": 0.5492957746478874, "grad_norm": 0.561791718006134, "learning_rate": 0.0001, "loss": 1.6399, "step": 2262 }, { "epoch": 0.549538610976202, "grad_norm": 0.548929750919342, "learning_rate": 0.0001, "loss": 1.6578, "step": 2263 }, { "epoch": 0.5497814473045167, "grad_norm": 0.6032873392105103, "learning_rate": 0.0001, "loss": 1.8544, "step": 2264 }, { "epoch": 0.5500242836328315, "grad_norm": 0.6131521463394165, "learning_rate": 0.0001, "loss": 1.7348, "step": 2265 }, { "epoch": 0.5502671199611462, "grad_norm": 0.6067385673522949, "learning_rate": 0.0001, "loss": 1.7989, "step": 2266 }, { "epoch": 0.5505099562894609, "grad_norm": 0.5752245783805847, "learning_rate": 0.0001, "loss": 1.7589, "step": 2267 }, { "epoch": 0.5507527926177757, "grad_norm": 0.6058968305587769, "learning_rate": 0.0001, "loss": 1.7689, "step": 2268 }, { "epoch": 0.5509956289460903, "grad_norm": 0.522637665271759, "learning_rate": 0.0001, "loss": 1.533, "step": 2269 }, { "epoch": 0.551238465274405, "grad_norm": 0.5843346118927002, "learning_rate": 0.0001, "loss": 1.7269, "step": 2270 }, { "epoch": 0.5514813016027198, "grad_norm": 0.5794123411178589, "learning_rate": 0.0001, "loss": 1.7654, "step": 2271 }, { "epoch": 0.5517241379310345, "grad_norm": 0.5385357737541199, "learning_rate": 0.0001, "loss": 1.6063, "step": 2272 }, { "epoch": 0.5519669742593492, "grad_norm": 0.5631776452064514, "learning_rate": 0.0001, "loss": 1.7648, "step": 2273 }, { "epoch": 0.5522098105876639, "grad_norm": 0.5676831007003784, "learning_rate": 0.0001, "loss": 1.6738, "step": 2274 }, { "epoch": 0.5524526469159786, "grad_norm": 0.5649113655090332, "learning_rate": 0.0001, "loss": 1.7205, "step": 2275 }, { "epoch": 0.5526954832442933, "grad_norm": 0.6241009831428528, "learning_rate": 0.0001, "loss": 1.6669, "step": 2276 }, { "epoch": 0.5529383195726081, "grad_norm": 0.5783674120903015, "learning_rate": 0.0001, "loss": 1.7265, "step": 2277 }, { "epoch": 0.5531811559009228, "grad_norm": 0.5854872465133667, "learning_rate": 0.0001, "loss": 1.8464, "step": 2278 }, { "epoch": 0.5534239922292375, "grad_norm": 0.5910096168518066, "learning_rate": 0.0001, "loss": 1.7909, "step": 2279 }, { "epoch": 0.5536668285575522, "grad_norm": 0.6107466220855713, "learning_rate": 0.0001, "loss": 1.8017, "step": 2280 }, { "epoch": 0.5539096648858669, "grad_norm": 0.5732429623603821, "learning_rate": 0.0001, "loss": 1.7704, "step": 2281 }, { "epoch": 0.5541525012141817, "grad_norm": 0.5652581453323364, "learning_rate": 0.0001, "loss": 1.6169, "step": 2282 }, { "epoch": 0.5543953375424964, "grad_norm": 0.5679978728294373, "learning_rate": 0.0001, "loss": 1.6261, "step": 2283 }, { "epoch": 0.554638173870811, "grad_norm": 0.5831108689308167, "learning_rate": 0.0001, "loss": 1.7858, "step": 2284 }, { "epoch": 0.5548810101991258, "grad_norm": 0.6095585227012634, "learning_rate": 0.0001, "loss": 1.7946, "step": 2285 }, { "epoch": 0.5551238465274405, "grad_norm": 0.5800433158874512, "learning_rate": 0.0001, "loss": 1.7534, "step": 2286 }, { "epoch": 0.5553666828557552, "grad_norm": 0.5661283731460571, "learning_rate": 0.0001, "loss": 1.7831, "step": 2287 }, { "epoch": 0.55560951918407, "grad_norm": 0.5875678658485413, "learning_rate": 0.0001, "loss": 1.8227, "step": 2288 }, { "epoch": 0.5558523555123847, "grad_norm": 0.5663167834281921, "learning_rate": 0.0001, "loss": 1.6859, "step": 2289 }, { "epoch": 0.5560951918406993, "grad_norm": 0.5791200995445251, "learning_rate": 0.0001, "loss": 1.7324, "step": 2290 }, { "epoch": 0.5563380281690141, "grad_norm": 0.5939375162124634, "learning_rate": 0.0001, "loss": 1.8266, "step": 2291 }, { "epoch": 0.5565808644973288, "grad_norm": 0.5692607164382935, "learning_rate": 0.0001, "loss": 1.7979, "step": 2292 }, { "epoch": 0.5568237008256435, "grad_norm": 0.5719442963600159, "learning_rate": 0.0001, "loss": 1.7008, "step": 2293 }, { "epoch": 0.5570665371539583, "grad_norm": 0.6055617332458496, "learning_rate": 0.0001, "loss": 1.8149, "step": 2294 }, { "epoch": 0.5573093734822729, "grad_norm": 0.5880705118179321, "learning_rate": 0.0001, "loss": 1.729, "step": 2295 }, { "epoch": 0.5575522098105876, "grad_norm": 0.6210213303565979, "learning_rate": 0.0001, "loss": 1.6504, "step": 2296 }, { "epoch": 0.5577950461389024, "grad_norm": 0.6164421439170837, "learning_rate": 0.0001, "loss": 1.8222, "step": 2297 }, { "epoch": 0.5580378824672171, "grad_norm": 0.5638343095779419, "learning_rate": 0.0001, "loss": 1.6657, "step": 2298 }, { "epoch": 0.5582807187955318, "grad_norm": 0.5995288491249084, "learning_rate": 0.0001, "loss": 1.9264, "step": 2299 }, { "epoch": 0.5585235551238465, "grad_norm": 0.6023948788642883, "learning_rate": 0.0001, "loss": 1.8343, "step": 2300 }, { "epoch": 0.5587663914521612, "grad_norm": 0.6314024329185486, "learning_rate": 0.0001, "loss": 1.7337, "step": 2301 }, { "epoch": 0.559009227780476, "grad_norm": 0.5966470241546631, "learning_rate": 0.0001, "loss": 1.743, "step": 2302 }, { "epoch": 0.5592520641087907, "grad_norm": 0.5859267115592957, "learning_rate": 0.0001, "loss": 1.731, "step": 2303 }, { "epoch": 0.5594949004371054, "grad_norm": 0.5759007334709167, "learning_rate": 0.0001, "loss": 1.7801, "step": 2304 }, { "epoch": 0.5597377367654202, "grad_norm": 0.604795515537262, "learning_rate": 0.0001, "loss": 1.7146, "step": 2305 }, { "epoch": 0.5599805730937348, "grad_norm": 0.5576354265213013, "learning_rate": 0.0001, "loss": 1.6984, "step": 2306 }, { "epoch": 0.5602234094220495, "grad_norm": 0.5717795491218567, "learning_rate": 0.0001, "loss": 1.7677, "step": 2307 }, { "epoch": 0.5604662457503643, "grad_norm": 0.6093893051147461, "learning_rate": 0.0001, "loss": 1.6873, "step": 2308 }, { "epoch": 0.560709082078679, "grad_norm": 0.5409795641899109, "learning_rate": 0.0001, "loss": 1.4814, "step": 2309 }, { "epoch": 0.5609519184069937, "grad_norm": 0.5724250078201294, "learning_rate": 0.0001, "loss": 1.7439, "step": 2310 }, { "epoch": 0.5611947547353084, "grad_norm": 0.5454828143119812, "learning_rate": 0.0001, "loss": 1.6413, "step": 2311 }, { "epoch": 0.5614375910636231, "grad_norm": 0.5908055305480957, "learning_rate": 0.0001, "loss": 1.7521, "step": 2312 }, { "epoch": 0.5616804273919378, "grad_norm": 0.5490610003471375, "learning_rate": 0.0001, "loss": 1.772, "step": 2313 }, { "epoch": 0.5619232637202526, "grad_norm": 0.5860586762428284, "learning_rate": 0.0001, "loss": 1.6474, "step": 2314 }, { "epoch": 0.5621661000485673, "grad_norm": 0.5701284408569336, "learning_rate": 0.0001, "loss": 1.7656, "step": 2315 }, { "epoch": 0.5624089363768819, "grad_norm": 0.6234085559844971, "learning_rate": 0.0001, "loss": 1.9436, "step": 2316 }, { "epoch": 0.5626517727051967, "grad_norm": 0.6046967506408691, "learning_rate": 0.0001, "loss": 1.8797, "step": 2317 }, { "epoch": 0.5628946090335114, "grad_norm": 0.532106339931488, "learning_rate": 0.0001, "loss": 1.6235, "step": 2318 }, { "epoch": 0.5631374453618261, "grad_norm": 0.6197608113288879, "learning_rate": 0.0001, "loss": 1.782, "step": 2319 }, { "epoch": 0.5633802816901409, "grad_norm": 0.5382350087165833, "learning_rate": 0.0001, "loss": 1.5866, "step": 2320 }, { "epoch": 0.5636231180184555, "grad_norm": 0.593874454498291, "learning_rate": 0.0001, "loss": 1.7379, "step": 2321 }, { "epoch": 0.5638659543467702, "grad_norm": 0.5685871243476868, "learning_rate": 0.0001, "loss": 1.5871, "step": 2322 }, { "epoch": 0.564108790675085, "grad_norm": 0.5940852761268616, "learning_rate": 0.0001, "loss": 1.7865, "step": 2323 }, { "epoch": 0.5643516270033997, "grad_norm": 0.5474042296409607, "learning_rate": 0.0001, "loss": 1.7973, "step": 2324 }, { "epoch": 0.5645944633317144, "grad_norm": 0.5839331150054932, "learning_rate": 0.0001, "loss": 1.7634, "step": 2325 }, { "epoch": 0.5648372996600292, "grad_norm": 0.6073717474937439, "learning_rate": 0.0001, "loss": 1.8117, "step": 2326 }, { "epoch": 0.5650801359883438, "grad_norm": 0.5777013897895813, "learning_rate": 0.0001, "loss": 1.6803, "step": 2327 }, { "epoch": 0.5653229723166586, "grad_norm": 0.6037007570266724, "learning_rate": 0.0001, "loss": 1.8422, "step": 2328 }, { "epoch": 0.5655658086449733, "grad_norm": 0.5606606006622314, "learning_rate": 0.0001, "loss": 1.6783, "step": 2329 }, { "epoch": 0.565808644973288, "grad_norm": 0.5418534874916077, "learning_rate": 0.0001, "loss": 1.5066, "step": 2330 }, { "epoch": 0.5660514813016028, "grad_norm": 0.608181357383728, "learning_rate": 0.0001, "loss": 1.844, "step": 2331 }, { "epoch": 0.5662943176299174, "grad_norm": 0.59039306640625, "learning_rate": 0.0001, "loss": 1.6876, "step": 2332 }, { "epoch": 0.5665371539582321, "grad_norm": 0.5667287111282349, "learning_rate": 0.0001, "loss": 1.6384, "step": 2333 }, { "epoch": 0.5667799902865469, "grad_norm": 0.5706629753112793, "learning_rate": 0.0001, "loss": 1.6236, "step": 2334 }, { "epoch": 0.5670228266148616, "grad_norm": 0.6076309084892273, "learning_rate": 0.0001, "loss": 1.746, "step": 2335 }, { "epoch": 0.5672656629431763, "grad_norm": 0.592994749546051, "learning_rate": 0.0001, "loss": 1.7133, "step": 2336 }, { "epoch": 0.567508499271491, "grad_norm": 0.5950530767440796, "learning_rate": 0.0001, "loss": 1.7201, "step": 2337 }, { "epoch": 0.5677513355998057, "grad_norm": 0.5949794054031372, "learning_rate": 0.0001, "loss": 1.7501, "step": 2338 }, { "epoch": 0.5679941719281204, "grad_norm": 0.5638046264648438, "learning_rate": 0.0001, "loss": 1.7436, "step": 2339 }, { "epoch": 0.5682370082564352, "grad_norm": 0.5714852809906006, "learning_rate": 0.0001, "loss": 1.7623, "step": 2340 }, { "epoch": 0.5684798445847499, "grad_norm": 0.6423383355140686, "learning_rate": 0.0001, "loss": 1.7754, "step": 2341 }, { "epoch": 0.5687226809130645, "grad_norm": 0.6077386140823364, "learning_rate": 0.0001, "loss": 1.8693, "step": 2342 }, { "epoch": 0.5689655172413793, "grad_norm": 0.5809692740440369, "learning_rate": 0.0001, "loss": 1.728, "step": 2343 }, { "epoch": 0.569208353569694, "grad_norm": 0.5595994591712952, "learning_rate": 0.0001, "loss": 1.5661, "step": 2344 }, { "epoch": 0.5694511898980087, "grad_norm": 0.5897711515426636, "learning_rate": 0.0001, "loss": 1.935, "step": 2345 }, { "epoch": 0.5696940262263235, "grad_norm": 0.5622484683990479, "learning_rate": 0.0001, "loss": 1.5963, "step": 2346 }, { "epoch": 0.5699368625546382, "grad_norm": 0.5812405347824097, "learning_rate": 0.0001, "loss": 1.7894, "step": 2347 }, { "epoch": 0.5701796988829528, "grad_norm": 0.5791590213775635, "learning_rate": 0.0001, "loss": 1.7274, "step": 2348 }, { "epoch": 0.5704225352112676, "grad_norm": 0.5815815329551697, "learning_rate": 0.0001, "loss": 1.7221, "step": 2349 }, { "epoch": 0.5706653715395823, "grad_norm": 0.6123338937759399, "learning_rate": 0.0001, "loss": 1.7184, "step": 2350 }, { "epoch": 0.5709082078678971, "grad_norm": 0.5597078204154968, "learning_rate": 0.0001, "loss": 1.6667, "step": 2351 }, { "epoch": 0.5711510441962118, "grad_norm": 0.5812671184539795, "learning_rate": 0.0001, "loss": 1.6042, "step": 2352 }, { "epoch": 0.5713938805245264, "grad_norm": 0.587066650390625, "learning_rate": 0.0001, "loss": 1.7552, "step": 2353 }, { "epoch": 0.5716367168528412, "grad_norm": 0.5483470559120178, "learning_rate": 0.0001, "loss": 1.6923, "step": 2354 }, { "epoch": 0.5718795531811559, "grad_norm": 0.5875955820083618, "learning_rate": 0.0001, "loss": 1.6528, "step": 2355 }, { "epoch": 0.5721223895094706, "grad_norm": 0.5432369112968445, "learning_rate": 0.0001, "loss": 1.6574, "step": 2356 }, { "epoch": 0.5723652258377854, "grad_norm": 0.5850716233253479, "learning_rate": 0.0001, "loss": 1.8068, "step": 2357 }, { "epoch": 0.5726080621661, "grad_norm": 0.5770143866539001, "learning_rate": 0.0001, "loss": 1.7533, "step": 2358 }, { "epoch": 0.5728508984944147, "grad_norm": 0.5965167880058289, "learning_rate": 0.0001, "loss": 1.7634, "step": 2359 }, { "epoch": 0.5730937348227295, "grad_norm": 0.5683436989784241, "learning_rate": 0.0001, "loss": 1.6588, "step": 2360 }, { "epoch": 0.5733365711510442, "grad_norm": 0.5722373723983765, "learning_rate": 0.0001, "loss": 1.6151, "step": 2361 }, { "epoch": 0.5735794074793589, "grad_norm": 0.5822277069091797, "learning_rate": 0.0001, "loss": 1.7077, "step": 2362 }, { "epoch": 0.5738222438076737, "grad_norm": 0.5640177130699158, "learning_rate": 0.0001, "loss": 1.7373, "step": 2363 }, { "epoch": 0.5740650801359883, "grad_norm": 0.5672557950019836, "learning_rate": 0.0001, "loss": 1.6969, "step": 2364 }, { "epoch": 0.574307916464303, "grad_norm": 0.5683900117874146, "learning_rate": 0.0001, "loss": 1.7601, "step": 2365 }, { "epoch": 0.5745507527926178, "grad_norm": 0.5778457522392273, "learning_rate": 0.0001, "loss": 1.7324, "step": 2366 }, { "epoch": 0.5747935891209325, "grad_norm": 0.5638473629951477, "learning_rate": 0.0001, "loss": 1.7839, "step": 2367 }, { "epoch": 0.5750364254492472, "grad_norm": 0.5594433546066284, "learning_rate": 0.0001, "loss": 1.6839, "step": 2368 }, { "epoch": 0.575279261777562, "grad_norm": 0.5993980169296265, "learning_rate": 0.0001, "loss": 1.7092, "step": 2369 }, { "epoch": 0.5755220981058766, "grad_norm": 0.6353656649589539, "learning_rate": 0.0001, "loss": 1.845, "step": 2370 }, { "epoch": 0.5757649344341913, "grad_norm": 0.5711445808410645, "learning_rate": 0.0001, "loss": 1.6876, "step": 2371 }, { "epoch": 0.5760077707625061, "grad_norm": 0.5684404969215393, "learning_rate": 0.0001, "loss": 1.7879, "step": 2372 }, { "epoch": 0.5762506070908208, "grad_norm": 0.5605738759040833, "learning_rate": 0.0001, "loss": 1.7602, "step": 2373 }, { "epoch": 0.5764934434191356, "grad_norm": 0.5888994932174683, "learning_rate": 0.0001, "loss": 1.8354, "step": 2374 }, { "epoch": 0.5767362797474502, "grad_norm": 0.5309900641441345, "learning_rate": 0.0001, "loss": 1.6206, "step": 2375 }, { "epoch": 0.5769791160757649, "grad_norm": 0.630192756652832, "learning_rate": 0.0001, "loss": 1.8859, "step": 2376 }, { "epoch": 0.5772219524040797, "grad_norm": 0.5861800312995911, "learning_rate": 0.0001, "loss": 1.7462, "step": 2377 }, { "epoch": 0.5774647887323944, "grad_norm": 0.578201949596405, "learning_rate": 0.0001, "loss": 1.7953, "step": 2378 }, { "epoch": 0.577707625060709, "grad_norm": 0.5666414499282837, "learning_rate": 0.0001, "loss": 1.6441, "step": 2379 }, { "epoch": 0.5779504613890238, "grad_norm": 0.5398064255714417, "learning_rate": 0.0001, "loss": 1.6298, "step": 2380 }, { "epoch": 0.5781932977173385, "grad_norm": 0.5318560004234314, "learning_rate": 0.0001, "loss": 1.4524, "step": 2381 }, { "epoch": 0.5784361340456532, "grad_norm": 0.5985003113746643, "learning_rate": 0.0001, "loss": 1.8397, "step": 2382 }, { "epoch": 0.578678970373968, "grad_norm": 0.5568950772285461, "learning_rate": 0.0001, "loss": 1.7121, "step": 2383 }, { "epoch": 0.5789218067022827, "grad_norm": 0.5775126814842224, "learning_rate": 0.0001, "loss": 1.7422, "step": 2384 }, { "epoch": 0.5791646430305973, "grad_norm": 0.5712935924530029, "learning_rate": 0.0001, "loss": 1.6469, "step": 2385 }, { "epoch": 0.5794074793589121, "grad_norm": 0.5984039902687073, "learning_rate": 0.0001, "loss": 1.727, "step": 2386 }, { "epoch": 0.5796503156872268, "grad_norm": 0.5886059999465942, "learning_rate": 0.0001, "loss": 1.7558, "step": 2387 }, { "epoch": 0.5798931520155415, "grad_norm": 0.5869618654251099, "learning_rate": 0.0001, "loss": 1.7193, "step": 2388 }, { "epoch": 0.5801359883438563, "grad_norm": 0.6158575415611267, "learning_rate": 0.0001, "loss": 1.7997, "step": 2389 }, { "epoch": 0.580378824672171, "grad_norm": 0.5518700480461121, "learning_rate": 0.0001, "loss": 1.6309, "step": 2390 }, { "epoch": 0.5806216610004856, "grad_norm": 0.5720113515853882, "learning_rate": 0.0001, "loss": 1.7135, "step": 2391 }, { "epoch": 0.5808644973288004, "grad_norm": 0.5739331245422363, "learning_rate": 0.0001, "loss": 1.6715, "step": 2392 }, { "epoch": 0.5811073336571151, "grad_norm": 0.5607396364212036, "learning_rate": 0.0001, "loss": 1.6143, "step": 2393 }, { "epoch": 0.5813501699854298, "grad_norm": 0.5959517359733582, "learning_rate": 0.0001, "loss": 1.8453, "step": 2394 }, { "epoch": 0.5815930063137446, "grad_norm": 0.5785279273986816, "learning_rate": 0.0001, "loss": 1.7878, "step": 2395 }, { "epoch": 0.5818358426420592, "grad_norm": 0.5693400502204895, "learning_rate": 0.0001, "loss": 1.6801, "step": 2396 }, { "epoch": 0.582078678970374, "grad_norm": 0.5761942267417908, "learning_rate": 0.0001, "loss": 1.7075, "step": 2397 }, { "epoch": 0.5823215152986887, "grad_norm": 0.5831998586654663, "learning_rate": 0.0001, "loss": 1.701, "step": 2398 }, { "epoch": 0.5825643516270034, "grad_norm": 0.5638182759284973, "learning_rate": 0.0001, "loss": 1.7647, "step": 2399 }, { "epoch": 0.5828071879553182, "grad_norm": 0.5755643248558044, "learning_rate": 0.0001, "loss": 1.72, "step": 2400 }, { "epoch": 0.5830500242836328, "grad_norm": 0.5585417747497559, "learning_rate": 0.0001, "loss": 1.535, "step": 2401 }, { "epoch": 0.5832928606119475, "grad_norm": 0.5857945084571838, "learning_rate": 0.0001, "loss": 1.7711, "step": 2402 }, { "epoch": 0.5835356969402623, "grad_norm": 0.5903569459915161, "learning_rate": 0.0001, "loss": 1.7424, "step": 2403 }, { "epoch": 0.583778533268577, "grad_norm": 0.5820091962814331, "learning_rate": 0.0001, "loss": 1.6464, "step": 2404 }, { "epoch": 0.5840213695968917, "grad_norm": 0.6163525581359863, "learning_rate": 0.0001, "loss": 1.9778, "step": 2405 }, { "epoch": 0.5842642059252064, "grad_norm": 0.5677646398544312, "learning_rate": 0.0001, "loss": 1.6748, "step": 2406 }, { "epoch": 0.5845070422535211, "grad_norm": 0.5462453365325928, "learning_rate": 0.0001, "loss": 1.6799, "step": 2407 }, { "epoch": 0.5847498785818358, "grad_norm": 0.5642171502113342, "learning_rate": 0.0001, "loss": 1.5963, "step": 2408 }, { "epoch": 0.5849927149101506, "grad_norm": 0.5937358140945435, "learning_rate": 0.0001, "loss": 1.6614, "step": 2409 }, { "epoch": 0.5852355512384653, "grad_norm": 0.5900371074676514, "learning_rate": 0.0001, "loss": 1.6783, "step": 2410 }, { "epoch": 0.58547838756678, "grad_norm": 0.5788990259170532, "learning_rate": 0.0001, "loss": 1.7227, "step": 2411 }, { "epoch": 0.5857212238950947, "grad_norm": 0.5753214955329895, "learning_rate": 0.0001, "loss": 1.6505, "step": 2412 }, { "epoch": 0.5859640602234094, "grad_norm": 0.5644556283950806, "learning_rate": 0.0001, "loss": 1.7429, "step": 2413 }, { "epoch": 0.5862068965517241, "grad_norm": 0.6227849721908569, "learning_rate": 0.0001, "loss": 1.718, "step": 2414 }, { "epoch": 0.5864497328800389, "grad_norm": 0.5930759906768799, "learning_rate": 0.0001, "loss": 1.6093, "step": 2415 }, { "epoch": 0.5866925692083536, "grad_norm": 0.5450453162193298, "learning_rate": 0.0001, "loss": 1.628, "step": 2416 }, { "epoch": 0.5869354055366682, "grad_norm": 0.6010074019432068, "learning_rate": 0.0001, "loss": 1.8531, "step": 2417 }, { "epoch": 0.587178241864983, "grad_norm": 0.5836543440818787, "learning_rate": 0.0001, "loss": 1.8302, "step": 2418 }, { "epoch": 0.5874210781932977, "grad_norm": 0.5937181711196899, "learning_rate": 0.0001, "loss": 1.7584, "step": 2419 }, { "epoch": 0.5876639145216125, "grad_norm": 0.5370633602142334, "learning_rate": 0.0001, "loss": 1.5638, "step": 2420 }, { "epoch": 0.5879067508499272, "grad_norm": 0.5745272636413574, "learning_rate": 0.0001, "loss": 1.6546, "step": 2421 }, { "epoch": 0.5881495871782418, "grad_norm": 0.5497100353240967, "learning_rate": 0.0001, "loss": 1.6042, "step": 2422 }, { "epoch": 0.5883924235065566, "grad_norm": 0.5470163822174072, "learning_rate": 0.0001, "loss": 1.6796, "step": 2423 }, { "epoch": 0.5886352598348713, "grad_norm": 0.5787187814712524, "learning_rate": 0.0001, "loss": 1.6311, "step": 2424 }, { "epoch": 0.588878096163186, "grad_norm": 0.5292855501174927, "learning_rate": 0.0001, "loss": 1.6425, "step": 2425 }, { "epoch": 0.5891209324915008, "grad_norm": 0.5789694786071777, "learning_rate": 0.0001, "loss": 1.7506, "step": 2426 }, { "epoch": 0.5893637688198154, "grad_norm": 0.5826478600502014, "learning_rate": 0.0001, "loss": 1.7998, "step": 2427 }, { "epoch": 0.5896066051481301, "grad_norm": 0.5840790867805481, "learning_rate": 0.0001, "loss": 1.7444, "step": 2428 }, { "epoch": 0.5898494414764449, "grad_norm": 0.5900298953056335, "learning_rate": 0.0001, "loss": 1.7615, "step": 2429 }, { "epoch": 0.5900922778047596, "grad_norm": 0.5789086222648621, "learning_rate": 0.0001, "loss": 1.7267, "step": 2430 }, { "epoch": 0.5903351141330743, "grad_norm": 0.5412051677703857, "learning_rate": 0.0001, "loss": 1.5541, "step": 2431 }, { "epoch": 0.5905779504613891, "grad_norm": 0.5917304754257202, "learning_rate": 0.0001, "loss": 1.8933, "step": 2432 }, { "epoch": 0.5908207867897037, "grad_norm": 0.583175778388977, "learning_rate": 0.0001, "loss": 1.7799, "step": 2433 }, { "epoch": 0.5910636231180184, "grad_norm": 0.5745235681533813, "learning_rate": 0.0001, "loss": 1.6437, "step": 2434 }, { "epoch": 0.5913064594463332, "grad_norm": 0.5677094459533691, "learning_rate": 0.0001, "loss": 1.6825, "step": 2435 }, { "epoch": 0.5915492957746479, "grad_norm": 0.5684336423873901, "learning_rate": 0.0001, "loss": 1.7585, "step": 2436 }, { "epoch": 0.5917921321029626, "grad_norm": 0.6031253337860107, "learning_rate": 0.0001, "loss": 1.8517, "step": 2437 }, { "epoch": 0.5920349684312773, "grad_norm": 0.5518312454223633, "learning_rate": 0.0001, "loss": 1.7078, "step": 2438 }, { "epoch": 0.592277804759592, "grad_norm": 0.5937772393226624, "learning_rate": 0.0001, "loss": 1.7455, "step": 2439 }, { "epoch": 0.5925206410879067, "grad_norm": 0.5688191652297974, "learning_rate": 0.0001, "loss": 1.8297, "step": 2440 }, { "epoch": 0.5927634774162215, "grad_norm": 0.601302981376648, "learning_rate": 0.0001, "loss": 1.7428, "step": 2441 }, { "epoch": 0.5930063137445362, "grad_norm": 0.572617769241333, "learning_rate": 0.0001, "loss": 1.6983, "step": 2442 }, { "epoch": 0.593249150072851, "grad_norm": 0.566998302936554, "learning_rate": 0.0001, "loss": 1.7456, "step": 2443 }, { "epoch": 0.5934919864011656, "grad_norm": 0.5750413537025452, "learning_rate": 0.0001, "loss": 1.7105, "step": 2444 }, { "epoch": 0.5937348227294803, "grad_norm": 0.5915107131004333, "learning_rate": 0.0001, "loss": 1.6796, "step": 2445 }, { "epoch": 0.5939776590577951, "grad_norm": 0.5885311961174011, "learning_rate": 0.0001, "loss": 1.701, "step": 2446 }, { "epoch": 0.5942204953861098, "grad_norm": 0.614865779876709, "learning_rate": 0.0001, "loss": 1.7882, "step": 2447 }, { "epoch": 0.5944633317144244, "grad_norm": 0.5743029117584229, "learning_rate": 0.0001, "loss": 1.7656, "step": 2448 }, { "epoch": 0.5947061680427392, "grad_norm": 0.561428427696228, "learning_rate": 0.0001, "loss": 1.6402, "step": 2449 }, { "epoch": 0.5949490043710539, "grad_norm": 0.5786658525466919, "learning_rate": 0.0001, "loss": 1.8408, "step": 2450 }, { "epoch": 0.5951918406993686, "grad_norm": 0.5619210600852966, "learning_rate": 0.0001, "loss": 1.7701, "step": 2451 }, { "epoch": 0.5954346770276834, "grad_norm": 0.5815402269363403, "learning_rate": 0.0001, "loss": 1.8588, "step": 2452 }, { "epoch": 0.5956775133559981, "grad_norm": 0.5524076223373413, "learning_rate": 0.0001, "loss": 1.74, "step": 2453 }, { "epoch": 0.5959203496843127, "grad_norm": 0.6057606339454651, "learning_rate": 0.0001, "loss": 1.7731, "step": 2454 }, { "epoch": 0.5961631860126275, "grad_norm": 0.5456556081771851, "learning_rate": 0.0001, "loss": 1.575, "step": 2455 }, { "epoch": 0.5964060223409422, "grad_norm": 0.5837120413780212, "learning_rate": 0.0001, "loss": 1.6891, "step": 2456 }, { "epoch": 0.5966488586692569, "grad_norm": 0.5915725231170654, "learning_rate": 0.0001, "loss": 1.753, "step": 2457 }, { "epoch": 0.5968916949975717, "grad_norm": 1.2732526063919067, "learning_rate": 0.0001, "loss": 1.7561, "step": 2458 }, { "epoch": 0.5971345313258863, "grad_norm": 0.5702220797538757, "learning_rate": 0.0001, "loss": 1.7971, "step": 2459 }, { "epoch": 0.597377367654201, "grad_norm": 0.5905393362045288, "learning_rate": 0.0001, "loss": 1.7459, "step": 2460 }, { "epoch": 0.5976202039825158, "grad_norm": 0.6075118780136108, "learning_rate": 0.0001, "loss": 1.6879, "step": 2461 }, { "epoch": 0.5978630403108305, "grad_norm": 0.5428155064582825, "learning_rate": 0.0001, "loss": 1.5011, "step": 2462 }, { "epoch": 0.5981058766391452, "grad_norm": 0.6202692985534668, "learning_rate": 0.0001, "loss": 1.8806, "step": 2463 }, { "epoch": 0.59834871296746, "grad_norm": 0.6165500283241272, "learning_rate": 0.0001, "loss": 1.7462, "step": 2464 }, { "epoch": 0.5985915492957746, "grad_norm": 0.6068257093429565, "learning_rate": 0.0001, "loss": 1.5594, "step": 2465 }, { "epoch": 0.5988343856240894, "grad_norm": 0.6311659216880798, "learning_rate": 0.0001, "loss": 1.8102, "step": 2466 }, { "epoch": 0.5990772219524041, "grad_norm": 0.5966998338699341, "learning_rate": 0.0001, "loss": 1.8661, "step": 2467 }, { "epoch": 0.5993200582807188, "grad_norm": 0.5616307258605957, "learning_rate": 0.0001, "loss": 1.6398, "step": 2468 }, { "epoch": 0.5995628946090336, "grad_norm": 0.5898672342300415, "learning_rate": 0.0001, "loss": 1.7186, "step": 2469 }, { "epoch": 0.5998057309373482, "grad_norm": 0.5857921242713928, "learning_rate": 0.0001, "loss": 1.8196, "step": 2470 }, { "epoch": 0.6000485672656629, "grad_norm": 0.5745912790298462, "learning_rate": 0.0001, "loss": 1.7887, "step": 2471 }, { "epoch": 0.6002914035939777, "grad_norm": 0.5690114498138428, "learning_rate": 0.0001, "loss": 1.6674, "step": 2472 }, { "epoch": 0.6005342399222924, "grad_norm": 0.5422451496124268, "learning_rate": 0.0001, "loss": 1.6249, "step": 2473 }, { "epoch": 0.6007770762506071, "grad_norm": 0.5912676453590393, "learning_rate": 0.0001, "loss": 1.7619, "step": 2474 }, { "epoch": 0.6010199125789218, "grad_norm": 0.5485222935676575, "learning_rate": 0.0001, "loss": 1.6598, "step": 2475 }, { "epoch": 0.6012627489072365, "grad_norm": 0.5810349583625793, "learning_rate": 0.0001, "loss": 1.6075, "step": 2476 }, { "epoch": 0.6015055852355512, "grad_norm": 0.6028423309326172, "learning_rate": 0.0001, "loss": 1.851, "step": 2477 }, { "epoch": 0.601748421563866, "grad_norm": 0.5798795223236084, "learning_rate": 0.0001, "loss": 1.7714, "step": 2478 }, { "epoch": 0.6019912578921807, "grad_norm": 0.5962965488433838, "learning_rate": 0.0001, "loss": 1.6885, "step": 2479 }, { "epoch": 0.6022340942204953, "grad_norm": 0.575608491897583, "learning_rate": 0.0001, "loss": 1.7201, "step": 2480 }, { "epoch": 0.6024769305488101, "grad_norm": 0.5525558590888977, "learning_rate": 0.0001, "loss": 1.6574, "step": 2481 }, { "epoch": 0.6027197668771248, "grad_norm": 0.5977734923362732, "learning_rate": 0.0001, "loss": 1.669, "step": 2482 }, { "epoch": 0.6029626032054395, "grad_norm": 0.5953426361083984, "learning_rate": 0.0001, "loss": 1.7852, "step": 2483 }, { "epoch": 0.6032054395337543, "grad_norm": 0.58254474401474, "learning_rate": 0.0001, "loss": 1.6766, "step": 2484 }, { "epoch": 0.603448275862069, "grad_norm": 0.5625193119049072, "learning_rate": 0.0001, "loss": 1.6301, "step": 2485 }, { "epoch": 0.6036911121903836, "grad_norm": 0.5599878430366516, "learning_rate": 0.0001, "loss": 1.752, "step": 2486 }, { "epoch": 0.6039339485186984, "grad_norm": 0.5602197051048279, "learning_rate": 0.0001, "loss": 1.7345, "step": 2487 }, { "epoch": 0.6041767848470131, "grad_norm": 0.5864724516868591, "learning_rate": 0.0001, "loss": 1.6648, "step": 2488 }, { "epoch": 0.6044196211753279, "grad_norm": 0.580513060092926, "learning_rate": 0.0001, "loss": 1.7135, "step": 2489 }, { "epoch": 0.6046624575036426, "grad_norm": 0.617057740688324, "learning_rate": 0.0001, "loss": 1.9021, "step": 2490 }, { "epoch": 0.6049052938319572, "grad_norm": 0.5644077658653259, "learning_rate": 0.0001, "loss": 1.6332, "step": 2491 }, { "epoch": 0.605148130160272, "grad_norm": 0.5731435418128967, "learning_rate": 0.0001, "loss": 1.7134, "step": 2492 }, { "epoch": 0.6053909664885867, "grad_norm": 0.6410675644874573, "learning_rate": 0.0001, "loss": 1.8625, "step": 2493 }, { "epoch": 0.6056338028169014, "grad_norm": 0.5538236498832703, "learning_rate": 0.0001, "loss": 1.6393, "step": 2494 }, { "epoch": 0.6058766391452162, "grad_norm": 0.5494367480278015, "learning_rate": 0.0001, "loss": 1.5929, "step": 2495 }, { "epoch": 0.6061194754735308, "grad_norm": 0.6226809024810791, "learning_rate": 0.0001, "loss": 1.7613, "step": 2496 }, { "epoch": 0.6063623118018455, "grad_norm": 0.6167261004447937, "learning_rate": 0.0001, "loss": 1.7512, "step": 2497 }, { "epoch": 0.6066051481301603, "grad_norm": 0.5871309041976929, "learning_rate": 0.0001, "loss": 1.7071, "step": 2498 }, { "epoch": 0.606847984458475, "grad_norm": 0.6313231587409973, "learning_rate": 0.0001, "loss": 1.7515, "step": 2499 }, { "epoch": 0.6070908207867897, "grad_norm": 0.5782124400138855, "learning_rate": 0.0001, "loss": 1.669, "step": 2500 }, { "epoch": 0.6073336571151045, "grad_norm": 0.6116856932640076, "learning_rate": 0.0001, "loss": 1.794, "step": 2501 }, { "epoch": 0.6075764934434191, "grad_norm": 0.5600969195365906, "learning_rate": 0.0001, "loss": 1.6624, "step": 2502 }, { "epoch": 0.6078193297717338, "grad_norm": 0.5679277777671814, "learning_rate": 0.0001, "loss": 1.626, "step": 2503 }, { "epoch": 0.6080621661000486, "grad_norm": 0.5670769810676575, "learning_rate": 0.0001, "loss": 1.7252, "step": 2504 }, { "epoch": 0.6083050024283633, "grad_norm": 0.5758386254310608, "learning_rate": 0.0001, "loss": 1.8142, "step": 2505 }, { "epoch": 0.608547838756678, "grad_norm": 0.5373086929321289, "learning_rate": 0.0001, "loss": 1.6216, "step": 2506 }, { "epoch": 0.6087906750849927, "grad_norm": 0.5614203214645386, "learning_rate": 0.0001, "loss": 1.6092, "step": 2507 }, { "epoch": 0.6090335114133074, "grad_norm": 0.5441398024559021, "learning_rate": 0.0001, "loss": 1.6247, "step": 2508 }, { "epoch": 0.6092763477416221, "grad_norm": 0.619110107421875, "learning_rate": 0.0001, "loss": 1.7226, "step": 2509 }, { "epoch": 0.6095191840699369, "grad_norm": 0.603743851184845, "learning_rate": 0.0001, "loss": 1.8676, "step": 2510 }, { "epoch": 0.6097620203982516, "grad_norm": 0.5662248134613037, "learning_rate": 0.0001, "loss": 1.7946, "step": 2511 }, { "epoch": 0.6100048567265663, "grad_norm": 0.5333821177482605, "learning_rate": 0.0001, "loss": 1.5512, "step": 2512 }, { "epoch": 0.610247693054881, "grad_norm": 0.6046711206436157, "learning_rate": 0.0001, "loss": 1.7985, "step": 2513 }, { "epoch": 0.6104905293831957, "grad_norm": 0.5740635395050049, "learning_rate": 0.0001, "loss": 1.6581, "step": 2514 }, { "epoch": 0.6107333657115105, "grad_norm": 0.6029295325279236, "learning_rate": 0.0001, "loss": 1.8217, "step": 2515 }, { "epoch": 0.6109762020398252, "grad_norm": 0.5581886768341064, "learning_rate": 0.0001, "loss": 1.8342, "step": 2516 }, { "epoch": 0.6112190383681398, "grad_norm": 0.5729279518127441, "learning_rate": 0.0001, "loss": 1.6572, "step": 2517 }, { "epoch": 0.6114618746964546, "grad_norm": 0.5676250457763672, "learning_rate": 0.0001, "loss": 1.6116, "step": 2518 }, { "epoch": 0.6117047110247693, "grad_norm": 0.5726991295814514, "learning_rate": 0.0001, "loss": 1.591, "step": 2519 }, { "epoch": 0.611947547353084, "grad_norm": 0.5791887640953064, "learning_rate": 0.0001, "loss": 1.6938, "step": 2520 }, { "epoch": 0.6121903836813988, "grad_norm": 0.5699273943901062, "learning_rate": 0.0001, "loss": 1.7318, "step": 2521 }, { "epoch": 0.6124332200097135, "grad_norm": 0.564121663570404, "learning_rate": 0.0001, "loss": 1.6655, "step": 2522 }, { "epoch": 0.6126760563380281, "grad_norm": 0.5962718725204468, "learning_rate": 0.0001, "loss": 1.7788, "step": 2523 }, { "epoch": 0.6129188926663429, "grad_norm": 0.5704603791236877, "learning_rate": 0.0001, "loss": 1.7749, "step": 2524 }, { "epoch": 0.6131617289946576, "grad_norm": 0.6029075384140015, "learning_rate": 0.0001, "loss": 1.8248, "step": 2525 }, { "epoch": 0.6134045653229723, "grad_norm": 0.5665852427482605, "learning_rate": 0.0001, "loss": 1.7334, "step": 2526 }, { "epoch": 0.6136474016512871, "grad_norm": 0.5830678343772888, "learning_rate": 0.0001, "loss": 1.6869, "step": 2527 }, { "epoch": 0.6138902379796017, "grad_norm": 0.5895706415176392, "learning_rate": 0.0001, "loss": 1.7805, "step": 2528 }, { "epoch": 0.6141330743079164, "grad_norm": 0.5577168464660645, "learning_rate": 0.0001, "loss": 1.7499, "step": 2529 }, { "epoch": 0.6143759106362312, "grad_norm": 0.5509077906608582, "learning_rate": 0.0001, "loss": 1.6446, "step": 2530 }, { "epoch": 0.6146187469645459, "grad_norm": 0.5555626749992371, "learning_rate": 0.0001, "loss": 1.6044, "step": 2531 }, { "epoch": 0.6148615832928606, "grad_norm": 0.5771478414535522, "learning_rate": 0.0001, "loss": 1.6518, "step": 2532 }, { "epoch": 0.6151044196211753, "grad_norm": 0.55790776014328, "learning_rate": 0.0001, "loss": 1.5604, "step": 2533 }, { "epoch": 0.61534725594949, "grad_norm": 0.5720113515853882, "learning_rate": 0.0001, "loss": 1.6777, "step": 2534 }, { "epoch": 0.6155900922778048, "grad_norm": 0.5889668464660645, "learning_rate": 0.0001, "loss": 1.688, "step": 2535 }, { "epoch": 0.6158329286061195, "grad_norm": 0.5640578866004944, "learning_rate": 0.0001, "loss": 1.662, "step": 2536 }, { "epoch": 0.6160757649344342, "grad_norm": 0.605247974395752, "learning_rate": 0.0001, "loss": 1.733, "step": 2537 }, { "epoch": 0.616318601262749, "grad_norm": 0.5761142373085022, "learning_rate": 0.0001, "loss": 1.7619, "step": 2538 }, { "epoch": 0.6165614375910636, "grad_norm": 0.5925478935241699, "learning_rate": 0.0001, "loss": 1.784, "step": 2539 }, { "epoch": 0.6168042739193783, "grad_norm": 0.5719945430755615, "learning_rate": 0.0001, "loss": 1.7121, "step": 2540 }, { "epoch": 0.6170471102476931, "grad_norm": 0.57254558801651, "learning_rate": 0.0001, "loss": 1.6022, "step": 2541 }, { "epoch": 0.6172899465760078, "grad_norm": 0.5895758867263794, "learning_rate": 0.0001, "loss": 1.6473, "step": 2542 }, { "epoch": 0.6175327829043225, "grad_norm": 0.5915838479995728, "learning_rate": 0.0001, "loss": 1.8678, "step": 2543 }, { "epoch": 0.6177756192326372, "grad_norm": 0.5661805868148804, "learning_rate": 0.0001, "loss": 1.7395, "step": 2544 }, { "epoch": 0.6180184555609519, "grad_norm": 0.6017798781394958, "learning_rate": 0.0001, "loss": 1.7785, "step": 2545 }, { "epoch": 0.6182612918892666, "grad_norm": 0.5868064761161804, "learning_rate": 0.0001, "loss": 1.8783, "step": 2546 }, { "epoch": 0.6185041282175814, "grad_norm": 0.5943471193313599, "learning_rate": 0.0001, "loss": 1.8255, "step": 2547 }, { "epoch": 0.6187469645458961, "grad_norm": 0.5721445083618164, "learning_rate": 0.0001, "loss": 1.6306, "step": 2548 }, { "epoch": 0.6189898008742107, "grad_norm": 0.5788509845733643, "learning_rate": 0.0001, "loss": 1.7048, "step": 2549 }, { "epoch": 0.6192326372025255, "grad_norm": 0.5473672747612, "learning_rate": 0.0001, "loss": 1.5743, "step": 2550 }, { "epoch": 0.6194754735308402, "grad_norm": 0.5789993405342102, "learning_rate": 0.0001, "loss": 1.6904, "step": 2551 }, { "epoch": 0.6197183098591549, "grad_norm": 0.5286515951156616, "learning_rate": 0.0001, "loss": 1.4971, "step": 2552 }, { "epoch": 0.6199611461874697, "grad_norm": 0.5470941066741943, "learning_rate": 0.0001, "loss": 1.6914, "step": 2553 }, { "epoch": 0.6202039825157843, "grad_norm": 0.6283156275749207, "learning_rate": 0.0001, "loss": 2.0322, "step": 2554 }, { "epoch": 0.620446818844099, "grad_norm": 0.571928858757019, "learning_rate": 0.0001, "loss": 1.8533, "step": 2555 }, { "epoch": 0.6206896551724138, "grad_norm": 0.5806577205657959, "learning_rate": 0.0001, "loss": 1.7889, "step": 2556 }, { "epoch": 0.6209324915007285, "grad_norm": 0.6049844026565552, "learning_rate": 0.0001, "loss": 1.7491, "step": 2557 }, { "epoch": 0.6211753278290433, "grad_norm": 0.5663546323776245, "learning_rate": 0.0001, "loss": 1.7514, "step": 2558 }, { "epoch": 0.621418164157358, "grad_norm": 0.5638301968574524, "learning_rate": 0.0001, "loss": 1.6251, "step": 2559 }, { "epoch": 0.6216610004856726, "grad_norm": 0.5578255653381348, "learning_rate": 0.0001, "loss": 1.7402, "step": 2560 }, { "epoch": 0.6219038368139874, "grad_norm": 0.5995447039604187, "learning_rate": 0.0001, "loss": 1.8496, "step": 2561 }, { "epoch": 0.6221466731423021, "grad_norm": 0.5353391170501709, "learning_rate": 0.0001, "loss": 1.4845, "step": 2562 }, { "epoch": 0.6223895094706168, "grad_norm": 0.5682584047317505, "learning_rate": 0.0001, "loss": 1.6659, "step": 2563 }, { "epoch": 0.6226323457989316, "grad_norm": 0.5946477055549622, "learning_rate": 0.0001, "loss": 1.7719, "step": 2564 }, { "epoch": 0.6228751821272462, "grad_norm": 0.5968493819236755, "learning_rate": 0.0001, "loss": 1.6443, "step": 2565 }, { "epoch": 0.6231180184555609, "grad_norm": 0.6130239367485046, "learning_rate": 0.0001, "loss": 1.7575, "step": 2566 }, { "epoch": 0.6233608547838757, "grad_norm": 0.6105282306671143, "learning_rate": 0.0001, "loss": 1.8922, "step": 2567 }, { "epoch": 0.6236036911121904, "grad_norm": 0.5839565992355347, "learning_rate": 0.0001, "loss": 1.6528, "step": 2568 }, { "epoch": 0.6238465274405051, "grad_norm": 0.5885269045829773, "learning_rate": 0.0001, "loss": 1.7915, "step": 2569 }, { "epoch": 0.6240893637688198, "grad_norm": 0.5504341125488281, "learning_rate": 0.0001, "loss": 1.597, "step": 2570 }, { "epoch": 0.6243322000971345, "grad_norm": 0.5717918276786804, "learning_rate": 0.0001, "loss": 1.6806, "step": 2571 }, { "epoch": 0.6245750364254492, "grad_norm": 0.5776162147521973, "learning_rate": 0.0001, "loss": 1.766, "step": 2572 }, { "epoch": 0.624817872753764, "grad_norm": 0.5442912578582764, "learning_rate": 0.0001, "loss": 1.5783, "step": 2573 }, { "epoch": 0.6250607090820787, "grad_norm": 0.5399508476257324, "learning_rate": 0.0001, "loss": 1.7435, "step": 2574 }, { "epoch": 0.6253035454103933, "grad_norm": 0.601722776889801, "learning_rate": 0.0001, "loss": 1.7679, "step": 2575 }, { "epoch": 0.6255463817387081, "grad_norm": 0.5953107476234436, "learning_rate": 0.0001, "loss": 1.7363, "step": 2576 }, { "epoch": 0.6257892180670228, "grad_norm": 0.5822069048881531, "learning_rate": 0.0001, "loss": 1.6664, "step": 2577 }, { "epoch": 0.6260320543953375, "grad_norm": 0.6120938658714294, "learning_rate": 0.0001, "loss": 1.7251, "step": 2578 }, { "epoch": 0.6262748907236523, "grad_norm": 0.632987380027771, "learning_rate": 0.0001, "loss": 1.8687, "step": 2579 }, { "epoch": 0.626517727051967, "grad_norm": 0.579434871673584, "learning_rate": 0.0001, "loss": 1.624, "step": 2580 }, { "epoch": 0.6267605633802817, "grad_norm": 0.6319934725761414, "learning_rate": 0.0001, "loss": 1.8018, "step": 2581 }, { "epoch": 0.6270033997085964, "grad_norm": 0.5732057094573975, "learning_rate": 0.0001, "loss": 1.7314, "step": 2582 }, { "epoch": 0.6272462360369111, "grad_norm": 0.5757622122764587, "learning_rate": 0.0001, "loss": 1.6404, "step": 2583 }, { "epoch": 0.6274890723652259, "grad_norm": 0.5308606624603271, "learning_rate": 0.0001, "loss": 1.6067, "step": 2584 }, { "epoch": 0.6277319086935406, "grad_norm": 0.5910230278968811, "learning_rate": 0.0001, "loss": 1.7544, "step": 2585 }, { "epoch": 0.6279747450218552, "grad_norm": 0.5853991508483887, "learning_rate": 0.0001, "loss": 1.7229, "step": 2586 }, { "epoch": 0.62821758135017, "grad_norm": 0.5474392175674438, "learning_rate": 0.0001, "loss": 1.6707, "step": 2587 }, { "epoch": 0.6284604176784847, "grad_norm": 0.6053445339202881, "learning_rate": 0.0001, "loss": 1.8133, "step": 2588 }, { "epoch": 0.6287032540067994, "grad_norm": 0.577170729637146, "learning_rate": 0.0001, "loss": 1.781, "step": 2589 }, { "epoch": 0.6289460903351142, "grad_norm": 0.6177255511283875, "learning_rate": 0.0001, "loss": 1.8164, "step": 2590 }, { "epoch": 0.6291889266634288, "grad_norm": 0.5895721912384033, "learning_rate": 0.0001, "loss": 1.7574, "step": 2591 }, { "epoch": 0.6294317629917435, "grad_norm": 0.5663004517555237, "learning_rate": 0.0001, "loss": 1.7434, "step": 2592 }, { "epoch": 0.6296745993200583, "grad_norm": 0.6223627924919128, "learning_rate": 0.0001, "loss": 1.9011, "step": 2593 }, { "epoch": 0.629917435648373, "grad_norm": 0.5563454627990723, "learning_rate": 0.0001, "loss": 1.6896, "step": 2594 }, { "epoch": 0.6301602719766877, "grad_norm": 0.5253757238388062, "learning_rate": 0.0001, "loss": 1.5159, "step": 2595 }, { "epoch": 0.6304031083050025, "grad_norm": 0.6091992855072021, "learning_rate": 0.0001, "loss": 1.7002, "step": 2596 }, { "epoch": 0.6306459446333171, "grad_norm": 0.6034373641014099, "learning_rate": 0.0001, "loss": 1.8019, "step": 2597 }, { "epoch": 0.6308887809616318, "grad_norm": 0.635011613368988, "learning_rate": 0.0001, "loss": 1.863, "step": 2598 }, { "epoch": 0.6311316172899466, "grad_norm": 0.5787094831466675, "learning_rate": 0.0001, "loss": 1.6395, "step": 2599 }, { "epoch": 0.6313744536182613, "grad_norm": 0.5938628911972046, "learning_rate": 0.0001, "loss": 1.8188, "step": 2600 }, { "epoch": 0.631617289946576, "grad_norm": 0.6202360987663269, "learning_rate": 0.0001, "loss": 1.7701, "step": 2601 }, { "epoch": 0.6318601262748907, "grad_norm": 0.5813153386116028, "learning_rate": 0.0001, "loss": 1.7545, "step": 2602 }, { "epoch": 0.6321029626032054, "grad_norm": 0.643073320388794, "learning_rate": 0.0001, "loss": 1.8769, "step": 2603 }, { "epoch": 0.6323457989315202, "grad_norm": 0.5803655385971069, "learning_rate": 0.0001, "loss": 1.7714, "step": 2604 }, { "epoch": 0.6325886352598349, "grad_norm": 0.5840656757354736, "learning_rate": 0.0001, "loss": 1.3808, "step": 2605 }, { "epoch": 0.6328314715881496, "grad_norm": 0.581245481967926, "learning_rate": 0.0001, "loss": 1.8951, "step": 2606 }, { "epoch": 0.6330743079164644, "grad_norm": 0.5805801153182983, "learning_rate": 0.0001, "loss": 1.7829, "step": 2607 }, { "epoch": 0.633317144244779, "grad_norm": 0.6198165416717529, "learning_rate": 0.0001, "loss": 1.7845, "step": 2608 }, { "epoch": 0.6335599805730937, "grad_norm": 0.6104231476783752, "learning_rate": 0.0001, "loss": 1.7534, "step": 2609 }, { "epoch": 0.6338028169014085, "grad_norm": 0.5909473299980164, "learning_rate": 0.0001, "loss": 1.6922, "step": 2610 }, { "epoch": 0.6340456532297232, "grad_norm": 0.5757015347480774, "learning_rate": 0.0001, "loss": 1.6907, "step": 2611 }, { "epoch": 0.6342884895580378, "grad_norm": 0.5593287944793701, "learning_rate": 0.0001, "loss": 1.6514, "step": 2612 }, { "epoch": 0.6345313258863526, "grad_norm": 0.6046830415725708, "learning_rate": 0.0001, "loss": 1.8173, "step": 2613 }, { "epoch": 0.6347741622146673, "grad_norm": 0.570202112197876, "learning_rate": 0.0001, "loss": 1.8575, "step": 2614 }, { "epoch": 0.635016998542982, "grad_norm": 0.5984710454940796, "learning_rate": 0.0001, "loss": 1.8192, "step": 2615 }, { "epoch": 0.6352598348712968, "grad_norm": 0.6091638803482056, "learning_rate": 0.0001, "loss": 1.8031, "step": 2616 }, { "epoch": 0.6355026711996115, "grad_norm": 0.5655072927474976, "learning_rate": 0.0001, "loss": 1.6221, "step": 2617 }, { "epoch": 0.6357455075279261, "grad_norm": 0.5606513023376465, "learning_rate": 0.0001, "loss": 1.7812, "step": 2618 }, { "epoch": 0.6359883438562409, "grad_norm": 0.5860190987586975, "learning_rate": 0.0001, "loss": 1.6619, "step": 2619 }, { "epoch": 0.6362311801845556, "grad_norm": 0.5981796979904175, "learning_rate": 0.0001, "loss": 1.6847, "step": 2620 }, { "epoch": 0.6364740165128703, "grad_norm": 0.5950610041618347, "learning_rate": 0.0001, "loss": 1.7344, "step": 2621 }, { "epoch": 0.6367168528411851, "grad_norm": 0.6207805275917053, "learning_rate": 0.0001, "loss": 1.9791, "step": 2622 }, { "epoch": 0.6369596891694997, "grad_norm": 0.5669253468513489, "learning_rate": 0.0001, "loss": 1.6362, "step": 2623 }, { "epoch": 0.6372025254978144, "grad_norm": 0.5895827412605286, "learning_rate": 0.0001, "loss": 1.8426, "step": 2624 }, { "epoch": 0.6374453618261292, "grad_norm": 0.6235145926475525, "learning_rate": 0.0001, "loss": 1.633, "step": 2625 }, { "epoch": 0.6376881981544439, "grad_norm": 0.6031660437583923, "learning_rate": 0.0001, "loss": 1.8477, "step": 2626 }, { "epoch": 0.6379310344827587, "grad_norm": 0.6209763884544373, "learning_rate": 0.0001, "loss": 1.8003, "step": 2627 }, { "epoch": 0.6381738708110734, "grad_norm": 0.5706555247306824, "learning_rate": 0.0001, "loss": 1.8148, "step": 2628 }, { "epoch": 0.638416707139388, "grad_norm": 0.6101211309432983, "learning_rate": 0.0001, "loss": 1.6899, "step": 2629 }, { "epoch": 0.6386595434677028, "grad_norm": 0.6126018166542053, "learning_rate": 0.0001, "loss": 1.764, "step": 2630 }, { "epoch": 0.6389023797960175, "grad_norm": 0.6178079843521118, "learning_rate": 0.0001, "loss": 1.7531, "step": 2631 }, { "epoch": 0.6391452161243322, "grad_norm": 0.621501088142395, "learning_rate": 0.0001, "loss": 1.7783, "step": 2632 }, { "epoch": 0.639388052452647, "grad_norm": 0.5543282628059387, "learning_rate": 0.0001, "loss": 1.7778, "step": 2633 }, { "epoch": 0.6396308887809616, "grad_norm": 0.5693379640579224, "learning_rate": 0.0001, "loss": 1.5891, "step": 2634 }, { "epoch": 0.6398737251092763, "grad_norm": 0.6385512351989746, "learning_rate": 0.0001, "loss": 1.8175, "step": 2635 }, { "epoch": 0.6401165614375911, "grad_norm": 0.5857069492340088, "learning_rate": 0.0001, "loss": 1.6799, "step": 2636 }, { "epoch": 0.6403593977659058, "grad_norm": 0.5730785131454468, "learning_rate": 0.0001, "loss": 1.6808, "step": 2637 }, { "epoch": 0.6406022340942205, "grad_norm": 0.5785220861434937, "learning_rate": 0.0001, "loss": 1.602, "step": 2638 }, { "epoch": 0.6408450704225352, "grad_norm": 0.5896817445755005, "learning_rate": 0.0001, "loss": 1.7995, "step": 2639 }, { "epoch": 0.6410879067508499, "grad_norm": 0.607143223285675, "learning_rate": 0.0001, "loss": 1.6896, "step": 2640 }, { "epoch": 0.6413307430791646, "grad_norm": 0.5732043385505676, "learning_rate": 0.0001, "loss": 1.7495, "step": 2641 }, { "epoch": 0.6415735794074794, "grad_norm": 0.592718780040741, "learning_rate": 0.0001, "loss": 1.7584, "step": 2642 }, { "epoch": 0.6418164157357941, "grad_norm": 0.5815568566322327, "learning_rate": 0.0001, "loss": 1.776, "step": 2643 }, { "epoch": 0.6420592520641087, "grad_norm": 0.573049008846283, "learning_rate": 0.0001, "loss": 1.6817, "step": 2644 }, { "epoch": 0.6423020883924235, "grad_norm": 0.5804055333137512, "learning_rate": 0.0001, "loss": 1.7891, "step": 2645 }, { "epoch": 0.6425449247207382, "grad_norm": 0.5961693525314331, "learning_rate": 0.0001, "loss": 1.8251, "step": 2646 }, { "epoch": 0.6427877610490529, "grad_norm": 0.5870078206062317, "learning_rate": 0.0001, "loss": 1.6878, "step": 2647 }, { "epoch": 0.6430305973773677, "grad_norm": 0.6337385773658752, "learning_rate": 0.0001, "loss": 1.9097, "step": 2648 }, { "epoch": 0.6432734337056824, "grad_norm": 0.6195207834243774, "learning_rate": 0.0001, "loss": 1.7589, "step": 2649 }, { "epoch": 0.6435162700339971, "grad_norm": 0.5870786905288696, "learning_rate": 0.0001, "loss": 1.8101, "step": 2650 }, { "epoch": 0.6437591063623118, "grad_norm": 0.5888528227806091, "learning_rate": 0.0001, "loss": 1.7392, "step": 2651 }, { "epoch": 0.6440019426906265, "grad_norm": 0.5681914687156677, "learning_rate": 0.0001, "loss": 1.695, "step": 2652 }, { "epoch": 0.6442447790189413, "grad_norm": 0.5761615037918091, "learning_rate": 0.0001, "loss": 1.8565, "step": 2653 }, { "epoch": 0.644487615347256, "grad_norm": 0.5865592360496521, "learning_rate": 0.0001, "loss": 1.8046, "step": 2654 }, { "epoch": 0.6447304516755706, "grad_norm": 0.5950198769569397, "learning_rate": 0.0001, "loss": 1.7282, "step": 2655 }, { "epoch": 0.6449732880038854, "grad_norm": 0.5902634859085083, "learning_rate": 0.0001, "loss": 1.6259, "step": 2656 }, { "epoch": 0.6452161243322001, "grad_norm": 0.5786793828010559, "learning_rate": 0.0001, "loss": 1.7535, "step": 2657 }, { "epoch": 0.6454589606605148, "grad_norm": 0.5738018155097961, "learning_rate": 0.0001, "loss": 1.5403, "step": 2658 }, { "epoch": 0.6457017969888296, "grad_norm": 0.5593220591545105, "learning_rate": 0.0001, "loss": 1.6467, "step": 2659 }, { "epoch": 0.6459446333171442, "grad_norm": 0.5977775454521179, "learning_rate": 0.0001, "loss": 1.7181, "step": 2660 }, { "epoch": 0.6461874696454589, "grad_norm": 0.5915343761444092, "learning_rate": 0.0001, "loss": 1.7429, "step": 2661 }, { "epoch": 0.6464303059737737, "grad_norm": 0.5825751423835754, "learning_rate": 0.0001, "loss": 1.7846, "step": 2662 }, { "epoch": 0.6466731423020884, "grad_norm": 0.601574718952179, "learning_rate": 0.0001, "loss": 1.7243, "step": 2663 }, { "epoch": 0.6469159786304031, "grad_norm": 0.5464960336685181, "learning_rate": 0.0001, "loss": 1.5944, "step": 2664 }, { "epoch": 0.6471588149587179, "grad_norm": 0.5692195296287537, "learning_rate": 0.0001, "loss": 1.7974, "step": 2665 }, { "epoch": 0.6474016512870325, "grad_norm": 0.5517829060554504, "learning_rate": 0.0001, "loss": 1.7614, "step": 2666 }, { "epoch": 0.6476444876153472, "grad_norm": 0.5647340416908264, "learning_rate": 0.0001, "loss": 1.7064, "step": 2667 }, { "epoch": 0.647887323943662, "grad_norm": 0.5393012166023254, "learning_rate": 0.0001, "loss": 1.6126, "step": 2668 }, { "epoch": 0.6481301602719767, "grad_norm": 0.566716730594635, "learning_rate": 0.0001, "loss": 1.7787, "step": 2669 }, { "epoch": 0.6483729966002914, "grad_norm": 0.5588338971138, "learning_rate": 0.0001, "loss": 1.7812, "step": 2670 }, { "epoch": 0.6486158329286061, "grad_norm": 0.5825018286705017, "learning_rate": 0.0001, "loss": 1.7021, "step": 2671 }, { "epoch": 0.6488586692569208, "grad_norm": 0.5807709693908691, "learning_rate": 0.0001, "loss": 1.833, "step": 2672 }, { "epoch": 0.6491015055852356, "grad_norm": 0.5714016556739807, "learning_rate": 0.0001, "loss": 1.7231, "step": 2673 }, { "epoch": 0.6493443419135503, "grad_norm": 0.5701686143875122, "learning_rate": 0.0001, "loss": 1.7271, "step": 2674 }, { "epoch": 0.649587178241865, "grad_norm": 0.6211767196655273, "learning_rate": 0.0001, "loss": 1.8542, "step": 2675 }, { "epoch": 0.6498300145701797, "grad_norm": 0.5851500630378723, "learning_rate": 0.0001, "loss": 1.8296, "step": 2676 }, { "epoch": 0.6500728508984944, "grad_norm": 0.6012861728668213, "learning_rate": 0.0001, "loss": 1.8426, "step": 2677 }, { "epoch": 0.6503156872268091, "grad_norm": 0.5840545892715454, "learning_rate": 0.0001, "loss": 1.7581, "step": 2678 }, { "epoch": 0.6505585235551239, "grad_norm": 0.580803394317627, "learning_rate": 0.0001, "loss": 1.7851, "step": 2679 }, { "epoch": 0.6508013598834386, "grad_norm": 0.5769532322883606, "learning_rate": 0.0001, "loss": 1.6163, "step": 2680 }, { "epoch": 0.6510441962117532, "grad_norm": 0.5599761605262756, "learning_rate": 0.0001, "loss": 1.667, "step": 2681 }, { "epoch": 0.651287032540068, "grad_norm": 0.6272950172424316, "learning_rate": 0.0001, "loss": 1.9708, "step": 2682 }, { "epoch": 0.6515298688683827, "grad_norm": 0.5727792978286743, "learning_rate": 0.0001, "loss": 1.6734, "step": 2683 }, { "epoch": 0.6517727051966974, "grad_norm": 0.5690376162528992, "learning_rate": 0.0001, "loss": 1.4981, "step": 2684 }, { "epoch": 0.6520155415250122, "grad_norm": 0.6194298267364502, "learning_rate": 0.0001, "loss": 1.8909, "step": 2685 }, { "epoch": 0.6522583778533269, "grad_norm": 0.585206151008606, "learning_rate": 0.0001, "loss": 1.7833, "step": 2686 }, { "epoch": 0.6525012141816415, "grad_norm": 0.5940220355987549, "learning_rate": 0.0001, "loss": 1.8036, "step": 2687 }, { "epoch": 0.6527440505099563, "grad_norm": 0.5587031841278076, "learning_rate": 0.0001, "loss": 1.705, "step": 2688 }, { "epoch": 0.652986886838271, "grad_norm": 0.5791237354278564, "learning_rate": 0.0001, "loss": 1.6548, "step": 2689 }, { "epoch": 0.6532297231665857, "grad_norm": 0.6508381962776184, "learning_rate": 0.0001, "loss": 1.8489, "step": 2690 }, { "epoch": 0.6534725594949005, "grad_norm": 0.5821163058280945, "learning_rate": 0.0001, "loss": 1.7967, "step": 2691 }, { "epoch": 0.6537153958232151, "grad_norm": 0.5625880360603333, "learning_rate": 0.0001, "loss": 1.7925, "step": 2692 }, { "epoch": 0.6539582321515298, "grad_norm": 0.5700521469116211, "learning_rate": 0.0001, "loss": 1.5928, "step": 2693 }, { "epoch": 0.6542010684798446, "grad_norm": 0.5617753863334656, "learning_rate": 0.0001, "loss": 1.6741, "step": 2694 }, { "epoch": 0.6544439048081593, "grad_norm": 0.5487257838249207, "learning_rate": 0.0001, "loss": 1.607, "step": 2695 }, { "epoch": 0.6546867411364741, "grad_norm": 0.6118389964103699, "learning_rate": 0.0001, "loss": 1.6812, "step": 2696 }, { "epoch": 0.6549295774647887, "grad_norm": 0.6059249043464661, "learning_rate": 0.0001, "loss": 1.7782, "step": 2697 }, { "epoch": 0.6551724137931034, "grad_norm": 0.6007734537124634, "learning_rate": 0.0001, "loss": 1.7264, "step": 2698 }, { "epoch": 0.6554152501214182, "grad_norm": 0.5955468416213989, "learning_rate": 0.0001, "loss": 1.7981, "step": 2699 }, { "epoch": 0.6556580864497329, "grad_norm": 0.6089268326759338, "learning_rate": 0.0001, "loss": 1.7969, "step": 2700 }, { "epoch": 0.6559009227780476, "grad_norm": 0.5827802419662476, "learning_rate": 0.0001, "loss": 1.6377, "step": 2701 }, { "epoch": 0.6561437591063624, "grad_norm": 0.5722429156303406, "learning_rate": 0.0001, "loss": 1.6032, "step": 2702 }, { "epoch": 0.656386595434677, "grad_norm": 0.5856757164001465, "learning_rate": 0.0001, "loss": 1.9363, "step": 2703 }, { "epoch": 0.6566294317629917, "grad_norm": 0.5609936118125916, "learning_rate": 0.0001, "loss": 1.7985, "step": 2704 }, { "epoch": 0.6568722680913065, "grad_norm": 0.6096386909484863, "learning_rate": 0.0001, "loss": 1.8202, "step": 2705 }, { "epoch": 0.6571151044196212, "grad_norm": 0.5903800129890442, "learning_rate": 0.0001, "loss": 1.6566, "step": 2706 }, { "epoch": 0.6573579407479359, "grad_norm": 0.5459445714950562, "learning_rate": 0.0001, "loss": 1.7221, "step": 2707 }, { "epoch": 0.6576007770762506, "grad_norm": 0.5889915823936462, "learning_rate": 0.0001, "loss": 1.7066, "step": 2708 }, { "epoch": 0.6578436134045653, "grad_norm": 0.5951976776123047, "learning_rate": 0.0001, "loss": 1.7497, "step": 2709 }, { "epoch": 0.65808644973288, "grad_norm": 0.5950310826301575, "learning_rate": 0.0001, "loss": 1.743, "step": 2710 }, { "epoch": 0.6583292860611948, "grad_norm": 0.5863953232765198, "learning_rate": 0.0001, "loss": 1.9078, "step": 2711 }, { "epoch": 0.6585721223895095, "grad_norm": 0.5783514976501465, "learning_rate": 0.0001, "loss": 1.7259, "step": 2712 }, { "epoch": 0.6588149587178241, "grad_norm": 0.576743483543396, "learning_rate": 0.0001, "loss": 1.5968, "step": 2713 }, { "epoch": 0.6590577950461389, "grad_norm": 0.6127098798751831, "learning_rate": 0.0001, "loss": 1.6289, "step": 2714 }, { "epoch": 0.6593006313744536, "grad_norm": 0.5809155106544495, "learning_rate": 0.0001, "loss": 1.7011, "step": 2715 }, { "epoch": 0.6595434677027683, "grad_norm": 0.5596655011177063, "learning_rate": 0.0001, "loss": 1.676, "step": 2716 }, { "epoch": 0.6597863040310831, "grad_norm": 0.560106635093689, "learning_rate": 0.0001, "loss": 1.7014, "step": 2717 }, { "epoch": 0.6600291403593977, "grad_norm": 0.6060172319412231, "learning_rate": 0.0001, "loss": 1.8205, "step": 2718 }, { "epoch": 0.6602719766877125, "grad_norm": 0.5648579001426697, "learning_rate": 0.0001, "loss": 1.7937, "step": 2719 }, { "epoch": 0.6605148130160272, "grad_norm": 0.5625224113464355, "learning_rate": 0.0001, "loss": 1.6573, "step": 2720 }, { "epoch": 0.6607576493443419, "grad_norm": 0.5313379168510437, "learning_rate": 0.0001, "loss": 1.6482, "step": 2721 }, { "epoch": 0.6610004856726567, "grad_norm": 0.6125560998916626, "learning_rate": 0.0001, "loss": 1.9104, "step": 2722 }, { "epoch": 0.6612433220009714, "grad_norm": 0.5860222578048706, "learning_rate": 0.0001, "loss": 1.6115, "step": 2723 }, { "epoch": 0.661486158329286, "grad_norm": 0.5965452790260315, "learning_rate": 0.0001, "loss": 1.7382, "step": 2724 }, { "epoch": 0.6617289946576008, "grad_norm": 0.5650197863578796, "learning_rate": 0.0001, "loss": 1.6651, "step": 2725 }, { "epoch": 0.6619718309859155, "grad_norm": 0.5879565477371216, "learning_rate": 0.0001, "loss": 1.7021, "step": 2726 }, { "epoch": 0.6622146673142302, "grad_norm": 0.6002961993217468, "learning_rate": 0.0001, "loss": 1.8219, "step": 2727 }, { "epoch": 0.662457503642545, "grad_norm": 0.5902764201164246, "learning_rate": 0.0001, "loss": 1.476, "step": 2728 }, { "epoch": 0.6627003399708596, "grad_norm": 0.6064320206642151, "learning_rate": 0.0001, "loss": 1.7249, "step": 2729 }, { "epoch": 0.6629431762991743, "grad_norm": 0.585027813911438, "learning_rate": 0.0001, "loss": 1.736, "step": 2730 }, { "epoch": 0.6631860126274891, "grad_norm": 0.6057623624801636, "learning_rate": 0.0001, "loss": 1.7291, "step": 2731 }, { "epoch": 0.6634288489558038, "grad_norm": 0.5800138711929321, "learning_rate": 0.0001, "loss": 1.6558, "step": 2732 }, { "epoch": 0.6636716852841185, "grad_norm": 0.6094589233398438, "learning_rate": 0.0001, "loss": 1.6821, "step": 2733 }, { "epoch": 0.6639145216124333, "grad_norm": 0.6267191767692566, "learning_rate": 0.0001, "loss": 1.8418, "step": 2734 }, { "epoch": 0.6641573579407479, "grad_norm": 0.6102715134620667, "learning_rate": 0.0001, "loss": 1.6363, "step": 2735 }, { "epoch": 0.6644001942690626, "grad_norm": 0.569195032119751, "learning_rate": 0.0001, "loss": 1.637, "step": 2736 }, { "epoch": 0.6646430305973774, "grad_norm": 0.6051103472709656, "learning_rate": 0.0001, "loss": 1.8214, "step": 2737 }, { "epoch": 0.6648858669256921, "grad_norm": 0.6168873906135559, "learning_rate": 0.0001, "loss": 1.8553, "step": 2738 }, { "epoch": 0.6651287032540067, "grad_norm": 0.6061825752258301, "learning_rate": 0.0001, "loss": 1.7512, "step": 2739 }, { "epoch": 0.6653715395823215, "grad_norm": 0.5734617710113525, "learning_rate": 0.0001, "loss": 1.7128, "step": 2740 }, { "epoch": 0.6656143759106362, "grad_norm": 0.640291154384613, "learning_rate": 0.0001, "loss": 1.8902, "step": 2741 }, { "epoch": 0.665857212238951, "grad_norm": 0.5751891732215881, "learning_rate": 0.0001, "loss": 1.7698, "step": 2742 }, { "epoch": 0.6661000485672657, "grad_norm": 0.6123544573783875, "learning_rate": 0.0001, "loss": 1.6901, "step": 2743 }, { "epoch": 0.6663428848955804, "grad_norm": 0.6082634329795837, "learning_rate": 0.0001, "loss": 1.7141, "step": 2744 }, { "epoch": 0.6665857212238951, "grad_norm": 0.5537638068199158, "learning_rate": 0.0001, "loss": 1.6828, "step": 2745 }, { "epoch": 0.6668285575522098, "grad_norm": 0.5809677839279175, "learning_rate": 0.0001, "loss": 1.836, "step": 2746 }, { "epoch": 0.6670713938805245, "grad_norm": 0.5729596018791199, "learning_rate": 0.0001, "loss": 1.6577, "step": 2747 }, { "epoch": 0.6673142302088393, "grad_norm": 0.568006157875061, "learning_rate": 0.0001, "loss": 1.7218, "step": 2748 }, { "epoch": 0.667557066537154, "grad_norm": 0.585652768611908, "learning_rate": 0.0001, "loss": 1.8467, "step": 2749 }, { "epoch": 0.6677999028654686, "grad_norm": 0.5728684663772583, "learning_rate": 0.0001, "loss": 1.7781, "step": 2750 }, { "epoch": 0.6680427391937834, "grad_norm": 0.563118040561676, "learning_rate": 0.0001, "loss": 1.7189, "step": 2751 }, { "epoch": 0.6682855755220981, "grad_norm": 0.5746209621429443, "learning_rate": 0.0001, "loss": 1.6984, "step": 2752 }, { "epoch": 0.6685284118504128, "grad_norm": 0.5608879923820496, "learning_rate": 0.0001, "loss": 1.7711, "step": 2753 }, { "epoch": 0.6687712481787276, "grad_norm": 0.5681405067443848, "learning_rate": 0.0001, "loss": 1.69, "step": 2754 }, { "epoch": 0.6690140845070423, "grad_norm": 0.5153051018714905, "learning_rate": 0.0001, "loss": 1.3654, "step": 2755 }, { "epoch": 0.6692569208353569, "grad_norm": 0.5652470588684082, "learning_rate": 0.0001, "loss": 1.7889, "step": 2756 }, { "epoch": 0.6694997571636717, "grad_norm": 0.5300597548484802, "learning_rate": 0.0001, "loss": 1.6747, "step": 2757 }, { "epoch": 0.6697425934919864, "grad_norm": 0.5912163853645325, "learning_rate": 0.0001, "loss": 1.761, "step": 2758 }, { "epoch": 0.6699854298203011, "grad_norm": 0.6375007629394531, "learning_rate": 0.0001, "loss": 1.8046, "step": 2759 }, { "epoch": 0.6702282661486159, "grad_norm": 0.6044784784317017, "learning_rate": 0.0001, "loss": 1.9189, "step": 2760 }, { "epoch": 0.6704711024769305, "grad_norm": 0.603838324546814, "learning_rate": 0.0001, "loss": 1.9457, "step": 2761 }, { "epoch": 0.6707139388052452, "grad_norm": 0.5713114738464355, "learning_rate": 0.0001, "loss": 1.7561, "step": 2762 }, { "epoch": 0.67095677513356, "grad_norm": 0.6627044677734375, "learning_rate": 0.0001, "loss": 1.8517, "step": 2763 }, { "epoch": 0.6711996114618747, "grad_norm": 0.5946755409240723, "learning_rate": 0.0001, "loss": 1.6253, "step": 2764 }, { "epoch": 0.6714424477901895, "grad_norm": 0.575951337814331, "learning_rate": 0.0001, "loss": 1.8093, "step": 2765 }, { "epoch": 0.6716852841185041, "grad_norm": 0.6035037040710449, "learning_rate": 0.0001, "loss": 1.9085, "step": 2766 }, { "epoch": 0.6719281204468188, "grad_norm": 0.60302734375, "learning_rate": 0.0001, "loss": 1.6889, "step": 2767 }, { "epoch": 0.6721709567751336, "grad_norm": 0.5809454917907715, "learning_rate": 0.0001, "loss": 1.7069, "step": 2768 }, { "epoch": 0.6724137931034483, "grad_norm": 0.5788617730140686, "learning_rate": 0.0001, "loss": 1.7135, "step": 2769 }, { "epoch": 0.672656629431763, "grad_norm": 0.595129668712616, "learning_rate": 0.0001, "loss": 1.8324, "step": 2770 }, { "epoch": 0.6728994657600778, "grad_norm": 0.566836953163147, "learning_rate": 0.0001, "loss": 1.5955, "step": 2771 }, { "epoch": 0.6731423020883924, "grad_norm": 0.5981171727180481, "learning_rate": 0.0001, "loss": 1.7693, "step": 2772 }, { "epoch": 0.6733851384167071, "grad_norm": 0.5812596678733826, "learning_rate": 0.0001, "loss": 1.636, "step": 2773 }, { "epoch": 0.6736279747450219, "grad_norm": 0.5905534029006958, "learning_rate": 0.0001, "loss": 1.8059, "step": 2774 }, { "epoch": 0.6738708110733366, "grad_norm": 0.6104099154472351, "learning_rate": 0.0001, "loss": 1.9028, "step": 2775 }, { "epoch": 0.6741136474016513, "grad_norm": 0.569607675075531, "learning_rate": 0.0001, "loss": 1.8641, "step": 2776 }, { "epoch": 0.674356483729966, "grad_norm": 0.6469245553016663, "learning_rate": 0.0001, "loss": 1.7234, "step": 2777 }, { "epoch": 0.6745993200582807, "grad_norm": 0.5621276497840881, "learning_rate": 0.0001, "loss": 1.7674, "step": 2778 }, { "epoch": 0.6748421563865954, "grad_norm": 0.5735675692558289, "learning_rate": 0.0001, "loss": 1.7072, "step": 2779 }, { "epoch": 0.6750849927149102, "grad_norm": 0.5662938952445984, "learning_rate": 0.0001, "loss": 1.6211, "step": 2780 }, { "epoch": 0.6753278290432249, "grad_norm": 0.5934909582138062, "learning_rate": 0.0001, "loss": 1.8791, "step": 2781 }, { "epoch": 0.6755706653715395, "grad_norm": 0.6003088355064392, "learning_rate": 0.0001, "loss": 1.6775, "step": 2782 }, { "epoch": 0.6758135016998543, "grad_norm": 0.5914050340652466, "learning_rate": 0.0001, "loss": 1.7438, "step": 2783 }, { "epoch": 0.676056338028169, "grad_norm": 0.5427725911140442, "learning_rate": 0.0001, "loss": 1.4817, "step": 2784 }, { "epoch": 0.6762991743564837, "grad_norm": 0.5584941506385803, "learning_rate": 0.0001, "loss": 1.6642, "step": 2785 }, { "epoch": 0.6765420106847985, "grad_norm": 0.5654450058937073, "learning_rate": 0.0001, "loss": 1.6339, "step": 2786 }, { "epoch": 0.6767848470131131, "grad_norm": 0.5703548192977905, "learning_rate": 0.0001, "loss": 1.7468, "step": 2787 }, { "epoch": 0.6770276833414279, "grad_norm": 0.5988372564315796, "learning_rate": 0.0001, "loss": 1.8245, "step": 2788 }, { "epoch": 0.6772705196697426, "grad_norm": 0.5367932915687561, "learning_rate": 0.0001, "loss": 1.6302, "step": 2789 }, { "epoch": 0.6775133559980573, "grad_norm": 0.5318285226821899, "learning_rate": 0.0001, "loss": 1.5022, "step": 2790 }, { "epoch": 0.6777561923263721, "grad_norm": 0.6063349843025208, "learning_rate": 0.0001, "loss": 1.73, "step": 2791 }, { "epoch": 0.6779990286546868, "grad_norm": 0.6300348043441772, "learning_rate": 0.0001, "loss": 1.9982, "step": 2792 }, { "epoch": 0.6782418649830014, "grad_norm": 0.5618193745613098, "learning_rate": 0.0001, "loss": 1.5975, "step": 2793 }, { "epoch": 0.6784847013113162, "grad_norm": 0.5824648141860962, "learning_rate": 0.0001, "loss": 1.5994, "step": 2794 }, { "epoch": 0.6787275376396309, "grad_norm": 0.6013264656066895, "learning_rate": 0.0001, "loss": 1.7429, "step": 2795 }, { "epoch": 0.6789703739679456, "grad_norm": 0.5931627750396729, "learning_rate": 0.0001, "loss": 1.5315, "step": 2796 }, { "epoch": 0.6792132102962604, "grad_norm": 0.5816960334777832, "learning_rate": 0.0001, "loss": 1.7652, "step": 2797 }, { "epoch": 0.679456046624575, "grad_norm": 0.578484058380127, "learning_rate": 0.0001, "loss": 1.7473, "step": 2798 }, { "epoch": 0.6796988829528897, "grad_norm": 0.55427086353302, "learning_rate": 0.0001, "loss": 1.5989, "step": 2799 }, { "epoch": 0.6799417192812045, "grad_norm": 0.6089683771133423, "learning_rate": 0.0001, "loss": 1.6209, "step": 2800 }, { "epoch": 0.6801845556095192, "grad_norm": 0.5802733898162842, "learning_rate": 0.0001, "loss": 1.6368, "step": 2801 }, { "epoch": 0.6804273919378339, "grad_norm": 0.5919795036315918, "learning_rate": 0.0001, "loss": 1.7557, "step": 2802 }, { "epoch": 0.6806702282661486, "grad_norm": 0.569189190864563, "learning_rate": 0.0001, "loss": 1.6756, "step": 2803 }, { "epoch": 0.6809130645944633, "grad_norm": 0.5664377808570862, "learning_rate": 0.0001, "loss": 1.5336, "step": 2804 }, { "epoch": 0.681155900922778, "grad_norm": 0.6111416816711426, "learning_rate": 0.0001, "loss": 1.8656, "step": 2805 }, { "epoch": 0.6813987372510928, "grad_norm": 0.5777872204780579, "learning_rate": 0.0001, "loss": 1.6831, "step": 2806 }, { "epoch": 0.6816415735794075, "grad_norm": 0.5720959305763245, "learning_rate": 0.0001, "loss": 1.8043, "step": 2807 }, { "epoch": 0.6818844099077221, "grad_norm": 0.5836105942726135, "learning_rate": 0.0001, "loss": 1.7525, "step": 2808 }, { "epoch": 0.6821272462360369, "grad_norm": 0.5708861351013184, "learning_rate": 0.0001, "loss": 1.7397, "step": 2809 }, { "epoch": 0.6823700825643516, "grad_norm": 0.6118195652961731, "learning_rate": 0.0001, "loss": 1.7558, "step": 2810 }, { "epoch": 0.6826129188926664, "grad_norm": 0.6879733204841614, "learning_rate": 0.0001, "loss": 1.979, "step": 2811 }, { "epoch": 0.6828557552209811, "grad_norm": 0.5931374430656433, "learning_rate": 0.0001, "loss": 1.8943, "step": 2812 }, { "epoch": 0.6830985915492958, "grad_norm": 0.5758197903633118, "learning_rate": 0.0001, "loss": 1.6549, "step": 2813 }, { "epoch": 0.6833414278776105, "grad_norm": 0.6338828206062317, "learning_rate": 0.0001, "loss": 1.7353, "step": 2814 }, { "epoch": 0.6835842642059252, "grad_norm": 0.6063835024833679, "learning_rate": 0.0001, "loss": 1.846, "step": 2815 }, { "epoch": 0.6838271005342399, "grad_norm": 0.5691032409667969, "learning_rate": 0.0001, "loss": 1.6208, "step": 2816 }, { "epoch": 0.6840699368625547, "grad_norm": 0.6327906250953674, "learning_rate": 0.0001, "loss": 1.8607, "step": 2817 }, { "epoch": 0.6843127731908694, "grad_norm": 0.5907915830612183, "learning_rate": 0.0001, "loss": 1.8197, "step": 2818 }, { "epoch": 0.684555609519184, "grad_norm": 0.5953392386436462, "learning_rate": 0.0001, "loss": 1.804, "step": 2819 }, { "epoch": 0.6847984458474988, "grad_norm": 0.5982223153114319, "learning_rate": 0.0001, "loss": 1.6639, "step": 2820 }, { "epoch": 0.6850412821758135, "grad_norm": 0.5657848715782166, "learning_rate": 0.0001, "loss": 1.728, "step": 2821 }, { "epoch": 0.6852841185041282, "grad_norm": 0.603330671787262, "learning_rate": 0.0001, "loss": 1.7572, "step": 2822 }, { "epoch": 0.685526954832443, "grad_norm": 0.5856655836105347, "learning_rate": 0.0001, "loss": 1.7514, "step": 2823 }, { "epoch": 0.6857697911607576, "grad_norm": 0.6200686693191528, "learning_rate": 0.0001, "loss": 1.7767, "step": 2824 }, { "epoch": 0.6860126274890723, "grad_norm": 0.5934330224990845, "learning_rate": 0.0001, "loss": 1.6182, "step": 2825 }, { "epoch": 0.6862554638173871, "grad_norm": 0.5886359810829163, "learning_rate": 0.0001, "loss": 1.8898, "step": 2826 }, { "epoch": 0.6864983001457018, "grad_norm": 0.592971920967102, "learning_rate": 0.0001, "loss": 1.8712, "step": 2827 }, { "epoch": 0.6867411364740165, "grad_norm": 0.5992989540100098, "learning_rate": 0.0001, "loss": 1.7167, "step": 2828 }, { "epoch": 0.6869839728023313, "grad_norm": 0.5146058797836304, "learning_rate": 0.0001, "loss": 1.6289, "step": 2829 }, { "epoch": 0.6872268091306459, "grad_norm": 0.576221764087677, "learning_rate": 0.0001, "loss": 1.6585, "step": 2830 }, { "epoch": 0.6874696454589606, "grad_norm": 0.5706197619438171, "learning_rate": 0.0001, "loss": 1.7267, "step": 2831 }, { "epoch": 0.6877124817872754, "grad_norm": 0.5885552167892456, "learning_rate": 0.0001, "loss": 1.7293, "step": 2832 }, { "epoch": 0.6879553181155901, "grad_norm": 0.5783635377883911, "learning_rate": 0.0001, "loss": 1.7528, "step": 2833 }, { "epoch": 0.6881981544439048, "grad_norm": 0.5897202491760254, "learning_rate": 0.0001, "loss": 1.7233, "step": 2834 }, { "epoch": 0.6884409907722195, "grad_norm": 0.5649927258491516, "learning_rate": 0.0001, "loss": 1.7574, "step": 2835 }, { "epoch": 0.6886838271005342, "grad_norm": 0.5524166822433472, "learning_rate": 0.0001, "loss": 1.6174, "step": 2836 }, { "epoch": 0.688926663428849, "grad_norm": 0.5628504753112793, "learning_rate": 0.0001, "loss": 1.6541, "step": 2837 }, { "epoch": 0.6891694997571637, "grad_norm": 0.5964009165763855, "learning_rate": 0.0001, "loss": 1.802, "step": 2838 }, { "epoch": 0.6894123360854784, "grad_norm": 0.5477882623672485, "learning_rate": 0.0001, "loss": 1.6725, "step": 2839 }, { "epoch": 0.6896551724137931, "grad_norm": 0.6216742992401123, "learning_rate": 0.0001, "loss": 1.7234, "step": 2840 }, { "epoch": 0.6898980087421078, "grad_norm": 0.5934717059135437, "learning_rate": 0.0001, "loss": 1.7309, "step": 2841 }, { "epoch": 0.6901408450704225, "grad_norm": 0.593638002872467, "learning_rate": 0.0001, "loss": 1.7679, "step": 2842 }, { "epoch": 0.6903836813987373, "grad_norm": 0.5847916007041931, "learning_rate": 0.0001, "loss": 1.8504, "step": 2843 }, { "epoch": 0.690626517727052, "grad_norm": 0.5497144460678101, "learning_rate": 0.0001, "loss": 1.7663, "step": 2844 }, { "epoch": 0.6908693540553666, "grad_norm": 0.5658949613571167, "learning_rate": 0.0001, "loss": 1.6458, "step": 2845 }, { "epoch": 0.6911121903836814, "grad_norm": 0.6022396087646484, "learning_rate": 0.0001, "loss": 1.8237, "step": 2846 }, { "epoch": 0.6913550267119961, "grad_norm": 0.6041214466094971, "learning_rate": 0.0001, "loss": 1.7601, "step": 2847 }, { "epoch": 0.6915978630403108, "grad_norm": 0.6111637949943542, "learning_rate": 0.0001, "loss": 1.6164, "step": 2848 }, { "epoch": 0.6918406993686256, "grad_norm": 0.5883265733718872, "learning_rate": 0.0001, "loss": 1.7287, "step": 2849 }, { "epoch": 0.6920835356969403, "grad_norm": 0.5720015168190002, "learning_rate": 0.0001, "loss": 1.6277, "step": 2850 }, { "epoch": 0.6923263720252549, "grad_norm": 0.5846550464630127, "learning_rate": 0.0001, "loss": 1.7651, "step": 2851 }, { "epoch": 0.6925692083535697, "grad_norm": 0.5911680459976196, "learning_rate": 0.0001, "loss": 1.6533, "step": 2852 }, { "epoch": 0.6928120446818844, "grad_norm": 0.5872257351875305, "learning_rate": 0.0001, "loss": 1.6387, "step": 2853 }, { "epoch": 0.6930548810101991, "grad_norm": 0.5911620259284973, "learning_rate": 0.0001, "loss": 1.6371, "step": 2854 }, { "epoch": 0.6932977173385139, "grad_norm": 0.6094566583633423, "learning_rate": 0.0001, "loss": 1.7494, "step": 2855 }, { "epoch": 0.6935405536668285, "grad_norm": 0.5861377120018005, "learning_rate": 0.0001, "loss": 1.6755, "step": 2856 }, { "epoch": 0.6937833899951432, "grad_norm": 0.7181078791618347, "learning_rate": 0.0001, "loss": 1.673, "step": 2857 }, { "epoch": 0.694026226323458, "grad_norm": 0.583500862121582, "learning_rate": 0.0001, "loss": 1.6292, "step": 2858 }, { "epoch": 0.6942690626517727, "grad_norm": 0.5601634383201599, "learning_rate": 0.0001, "loss": 1.6493, "step": 2859 }, { "epoch": 0.6945118989800875, "grad_norm": 0.5888351798057556, "learning_rate": 0.0001, "loss": 1.6483, "step": 2860 }, { "epoch": 0.6947547353084021, "grad_norm": 0.6004803776741028, "learning_rate": 0.0001, "loss": 1.7305, "step": 2861 }, { "epoch": 0.6949975716367168, "grad_norm": 0.6880599856376648, "learning_rate": 0.0001, "loss": 1.8187, "step": 2862 }, { "epoch": 0.6952404079650316, "grad_norm": 0.5957933664321899, "learning_rate": 0.0001, "loss": 1.6591, "step": 2863 }, { "epoch": 0.6954832442933463, "grad_norm": 0.5726467370986938, "learning_rate": 0.0001, "loss": 1.6362, "step": 2864 }, { "epoch": 0.695726080621661, "grad_norm": 0.5528472661972046, "learning_rate": 0.0001, "loss": 1.65, "step": 2865 }, { "epoch": 0.6959689169499758, "grad_norm": 0.6097925901412964, "learning_rate": 0.0001, "loss": 1.6152, "step": 2866 }, { "epoch": 0.6962117532782904, "grad_norm": 0.5837544202804565, "learning_rate": 0.0001, "loss": 1.6005, "step": 2867 }, { "epoch": 0.6964545896066051, "grad_norm": 0.532633900642395, "learning_rate": 0.0001, "loss": 1.6354, "step": 2868 }, { "epoch": 0.6966974259349199, "grad_norm": 0.5818161368370056, "learning_rate": 0.0001, "loss": 1.6685, "step": 2869 }, { "epoch": 0.6969402622632346, "grad_norm": 0.5662007331848145, "learning_rate": 0.0001, "loss": 1.6389, "step": 2870 }, { "epoch": 0.6971830985915493, "grad_norm": 0.5478448271751404, "learning_rate": 0.0001, "loss": 1.5563, "step": 2871 }, { "epoch": 0.697425934919864, "grad_norm": 0.5718416571617126, "learning_rate": 0.0001, "loss": 1.6713, "step": 2872 }, { "epoch": 0.6976687712481787, "grad_norm": 0.5725582242012024, "learning_rate": 0.0001, "loss": 1.7278, "step": 2873 }, { "epoch": 0.6979116075764934, "grad_norm": 0.6120561957359314, "learning_rate": 0.0001, "loss": 1.8605, "step": 2874 }, { "epoch": 0.6981544439048082, "grad_norm": 0.5600008964538574, "learning_rate": 0.0001, "loss": 1.8146, "step": 2875 }, { "epoch": 0.6983972802331229, "grad_norm": 0.5600332021713257, "learning_rate": 0.0001, "loss": 1.6717, "step": 2876 }, { "epoch": 0.6986401165614375, "grad_norm": 0.5910006165504456, "learning_rate": 0.0001, "loss": 1.7757, "step": 2877 }, { "epoch": 0.6988829528897523, "grad_norm": 0.5550327897071838, "learning_rate": 0.0001, "loss": 1.7037, "step": 2878 }, { "epoch": 0.699125789218067, "grad_norm": 0.5717950463294983, "learning_rate": 0.0001, "loss": 1.6787, "step": 2879 }, { "epoch": 0.6993686255463817, "grad_norm": 0.5722221732139587, "learning_rate": 0.0001, "loss": 1.7925, "step": 2880 }, { "epoch": 0.6996114618746965, "grad_norm": 0.6006417870521545, "learning_rate": 0.0001, "loss": 1.6632, "step": 2881 }, { "epoch": 0.6998542982030111, "grad_norm": 0.5942846536636353, "learning_rate": 0.0001, "loss": 1.6596, "step": 2882 }, { "epoch": 0.7000971345313259, "grad_norm": 0.5749700665473938, "learning_rate": 0.0001, "loss": 1.6598, "step": 2883 }, { "epoch": 0.7003399708596406, "grad_norm": 0.5908930897712708, "learning_rate": 0.0001, "loss": 1.5089, "step": 2884 }, { "epoch": 0.7005828071879553, "grad_norm": 0.5929084420204163, "learning_rate": 0.0001, "loss": 1.8005, "step": 2885 }, { "epoch": 0.7008256435162701, "grad_norm": 0.7345240116119385, "learning_rate": 0.0001, "loss": 1.7273, "step": 2886 }, { "epoch": 0.7010684798445848, "grad_norm": 0.5420176386833191, "learning_rate": 0.0001, "loss": 1.67, "step": 2887 }, { "epoch": 0.7013113161728994, "grad_norm": 0.5552915334701538, "learning_rate": 0.0001, "loss": 1.6065, "step": 2888 }, { "epoch": 0.7015541525012142, "grad_norm": 0.5782786011695862, "learning_rate": 0.0001, "loss": 1.8232, "step": 2889 }, { "epoch": 0.7017969888295289, "grad_norm": 0.836561381816864, "learning_rate": 0.0001, "loss": 1.7137, "step": 2890 }, { "epoch": 0.7020398251578436, "grad_norm": 0.569405734539032, "learning_rate": 0.0001, "loss": 1.7464, "step": 2891 }, { "epoch": 0.7022826614861584, "grad_norm": 0.5833035111427307, "learning_rate": 0.0001, "loss": 1.7176, "step": 2892 }, { "epoch": 0.702525497814473, "grad_norm": 0.5674729943275452, "learning_rate": 0.0001, "loss": 1.6477, "step": 2893 }, { "epoch": 0.7027683341427877, "grad_norm": 0.5252358317375183, "learning_rate": 0.0001, "loss": 1.3547, "step": 2894 }, { "epoch": 0.7030111704711025, "grad_norm": 0.5658174157142639, "learning_rate": 0.0001, "loss": 1.7606, "step": 2895 }, { "epoch": 0.7032540067994172, "grad_norm": 0.5492091774940491, "learning_rate": 0.0001, "loss": 1.5857, "step": 2896 }, { "epoch": 0.7034968431277319, "grad_norm": 0.58930504322052, "learning_rate": 0.0001, "loss": 1.6534, "step": 2897 }, { "epoch": 0.7037396794560467, "grad_norm": 0.6033841371536255, "learning_rate": 0.0001, "loss": 1.748, "step": 2898 }, { "epoch": 0.7039825157843613, "grad_norm": 0.5531272888183594, "learning_rate": 0.0001, "loss": 1.7049, "step": 2899 }, { "epoch": 0.704225352112676, "grad_norm": 0.5709564685821533, "learning_rate": 0.0001, "loss": 1.6982, "step": 2900 }, { "epoch": 0.7044681884409908, "grad_norm": 0.5532764196395874, "learning_rate": 0.0001, "loss": 1.6809, "step": 2901 }, { "epoch": 0.7047110247693055, "grad_norm": 0.6018196940422058, "learning_rate": 0.0001, "loss": 1.764, "step": 2902 }, { "epoch": 0.7049538610976201, "grad_norm": 0.6237990856170654, "learning_rate": 0.0001, "loss": 1.5979, "step": 2903 }, { "epoch": 0.7051966974259349, "grad_norm": 0.5766505599021912, "learning_rate": 0.0001, "loss": 1.7848, "step": 2904 }, { "epoch": 0.7054395337542496, "grad_norm": 0.5628949999809265, "learning_rate": 0.0001, "loss": 1.8078, "step": 2905 }, { "epoch": 0.7056823700825644, "grad_norm": 0.5653403401374817, "learning_rate": 0.0001, "loss": 1.7858, "step": 2906 }, { "epoch": 0.7059252064108791, "grad_norm": 0.5594356060028076, "learning_rate": 0.0001, "loss": 1.5457, "step": 2907 }, { "epoch": 0.7061680427391938, "grad_norm": 0.590790867805481, "learning_rate": 0.0001, "loss": 1.6281, "step": 2908 }, { "epoch": 0.7064108790675085, "grad_norm": 0.6296771764755249, "learning_rate": 0.0001, "loss": 1.8381, "step": 2909 }, { "epoch": 0.7066537153958232, "grad_norm": 0.6157625913619995, "learning_rate": 0.0001, "loss": 1.8493, "step": 2910 }, { "epoch": 0.7068965517241379, "grad_norm": 0.6185581684112549, "learning_rate": 0.0001, "loss": 1.8729, "step": 2911 }, { "epoch": 0.7071393880524527, "grad_norm": 0.6104115843772888, "learning_rate": 0.0001, "loss": 1.8072, "step": 2912 }, { "epoch": 0.7073822243807674, "grad_norm": 0.5635713934898376, "learning_rate": 0.0001, "loss": 1.612, "step": 2913 }, { "epoch": 0.707625060709082, "grad_norm": 0.5789936780929565, "learning_rate": 0.0001, "loss": 1.7424, "step": 2914 }, { "epoch": 0.7078678970373968, "grad_norm": 0.5909901857376099, "learning_rate": 0.0001, "loss": 1.8809, "step": 2915 }, { "epoch": 0.7081107333657115, "grad_norm": 0.6040669083595276, "learning_rate": 0.0001, "loss": 1.7442, "step": 2916 }, { "epoch": 0.7083535696940262, "grad_norm": 0.5579518675804138, "learning_rate": 0.0001, "loss": 1.7682, "step": 2917 }, { "epoch": 0.708596406022341, "grad_norm": 0.6221187114715576, "learning_rate": 0.0001, "loss": 1.7396, "step": 2918 }, { "epoch": 0.7088392423506557, "grad_norm": 0.5807497501373291, "learning_rate": 0.0001, "loss": 1.918, "step": 2919 }, { "epoch": 0.7090820786789703, "grad_norm": 0.5804089307785034, "learning_rate": 0.0001, "loss": 1.7405, "step": 2920 }, { "epoch": 0.7093249150072851, "grad_norm": 0.6316050887107849, "learning_rate": 0.0001, "loss": 1.8899, "step": 2921 }, { "epoch": 0.7095677513355998, "grad_norm": 0.598678708076477, "learning_rate": 0.0001, "loss": 1.8111, "step": 2922 }, { "epoch": 0.7098105876639145, "grad_norm": 0.5926578640937805, "learning_rate": 0.0001, "loss": 1.809, "step": 2923 }, { "epoch": 0.7100534239922293, "grad_norm": 0.5795942544937134, "learning_rate": 0.0001, "loss": 1.7494, "step": 2924 }, { "epoch": 0.7102962603205439, "grad_norm": 0.6003085970878601, "learning_rate": 0.0001, "loss": 1.6745, "step": 2925 }, { "epoch": 0.7105390966488586, "grad_norm": 0.6050456166267395, "learning_rate": 0.0001, "loss": 1.7144, "step": 2926 }, { "epoch": 0.7107819329771734, "grad_norm": 0.5947707295417786, "learning_rate": 0.0001, "loss": 1.7912, "step": 2927 }, { "epoch": 0.7110247693054881, "grad_norm": 0.5814579129219055, "learning_rate": 0.0001, "loss": 1.6123, "step": 2928 }, { "epoch": 0.7112676056338029, "grad_norm": 0.5886326432228088, "learning_rate": 0.0001, "loss": 1.7891, "step": 2929 }, { "epoch": 0.7115104419621175, "grad_norm": 0.6063361167907715, "learning_rate": 0.0001, "loss": 1.7173, "step": 2930 }, { "epoch": 0.7117532782904322, "grad_norm": 0.6063582897186279, "learning_rate": 0.0001, "loss": 1.83, "step": 2931 }, { "epoch": 0.711996114618747, "grad_norm": 0.5683070421218872, "learning_rate": 0.0001, "loss": 1.6631, "step": 2932 }, { "epoch": 0.7122389509470617, "grad_norm": 0.5507863759994507, "learning_rate": 0.0001, "loss": 1.5301, "step": 2933 }, { "epoch": 0.7124817872753764, "grad_norm": 0.5840550065040588, "learning_rate": 0.0001, "loss": 1.848, "step": 2934 }, { "epoch": 0.7127246236036912, "grad_norm": 0.5971823930740356, "learning_rate": 0.0001, "loss": 1.8814, "step": 2935 }, { "epoch": 0.7129674599320058, "grad_norm": 0.5722901821136475, "learning_rate": 0.0001, "loss": 1.7001, "step": 2936 }, { "epoch": 0.7132102962603205, "grad_norm": 0.6489852070808411, "learning_rate": 0.0001, "loss": 1.8752, "step": 2937 }, { "epoch": 0.7134531325886353, "grad_norm": 0.5677469968795776, "learning_rate": 0.0001, "loss": 1.7549, "step": 2938 }, { "epoch": 0.71369596891695, "grad_norm": 0.5717501044273376, "learning_rate": 0.0001, "loss": 1.6277, "step": 2939 }, { "epoch": 0.7139388052452647, "grad_norm": 0.5825884342193604, "learning_rate": 0.0001, "loss": 1.6458, "step": 2940 }, { "epoch": 0.7141816415735794, "grad_norm": 0.6087138056755066, "learning_rate": 0.0001, "loss": 1.8682, "step": 2941 }, { "epoch": 0.7144244779018941, "grad_norm": 0.5498895645141602, "learning_rate": 0.0001, "loss": 1.5874, "step": 2942 }, { "epoch": 0.7146673142302088, "grad_norm": 0.5925459861755371, "learning_rate": 0.0001, "loss": 1.7232, "step": 2943 }, { "epoch": 0.7149101505585236, "grad_norm": 0.5935659408569336, "learning_rate": 0.0001, "loss": 1.8265, "step": 2944 }, { "epoch": 0.7151529868868383, "grad_norm": 0.5577383637428284, "learning_rate": 0.0001, "loss": 1.6404, "step": 2945 }, { "epoch": 0.7153958232151529, "grad_norm": 0.5835193395614624, "learning_rate": 0.0001, "loss": 1.606, "step": 2946 }, { "epoch": 0.7156386595434677, "grad_norm": 0.5707915425300598, "learning_rate": 0.0001, "loss": 1.6483, "step": 2947 }, { "epoch": 0.7158814958717824, "grad_norm": 0.6077779531478882, "learning_rate": 0.0001, "loss": 1.9231, "step": 2948 }, { "epoch": 0.7161243322000971, "grad_norm": 0.5671358108520508, "learning_rate": 0.0001, "loss": 1.7163, "step": 2949 }, { "epoch": 0.7163671685284119, "grad_norm": 0.8459880948066711, "learning_rate": 0.0001, "loss": 1.7304, "step": 2950 }, { "epoch": 0.7166100048567265, "grad_norm": 0.5568290948867798, "learning_rate": 0.0001, "loss": 1.6038, "step": 2951 }, { "epoch": 0.7168528411850413, "grad_norm": 0.5902974009513855, "learning_rate": 0.0001, "loss": 1.6551, "step": 2952 }, { "epoch": 0.717095677513356, "grad_norm": 0.6150861382484436, "learning_rate": 0.0001, "loss": 1.5577, "step": 2953 }, { "epoch": 0.7173385138416707, "grad_norm": 0.5791119337081909, "learning_rate": 0.0001, "loss": 1.6013, "step": 2954 }, { "epoch": 0.7175813501699855, "grad_norm": 0.5551214814186096, "learning_rate": 0.0001, "loss": 1.5818, "step": 2955 }, { "epoch": 0.7178241864983002, "grad_norm": 0.5971116423606873, "learning_rate": 0.0001, "loss": 1.7887, "step": 2956 }, { "epoch": 0.7180670228266148, "grad_norm": 0.5904876589775085, "learning_rate": 0.0001, "loss": 1.7936, "step": 2957 }, { "epoch": 0.7183098591549296, "grad_norm": 0.5888270735740662, "learning_rate": 0.0001, "loss": 1.7204, "step": 2958 }, { "epoch": 0.7185526954832443, "grad_norm": 0.5791041254997253, "learning_rate": 0.0001, "loss": 1.8105, "step": 2959 }, { "epoch": 0.718795531811559, "grad_norm": 0.5695481300354004, "learning_rate": 0.0001, "loss": 1.684, "step": 2960 }, { "epoch": 0.7190383681398738, "grad_norm": 0.5556251406669617, "learning_rate": 0.0001, "loss": 1.6875, "step": 2961 }, { "epoch": 0.7192812044681884, "grad_norm": 0.5751869082450867, "learning_rate": 0.0001, "loss": 1.6599, "step": 2962 }, { "epoch": 0.7195240407965031, "grad_norm": 0.6280362010002136, "learning_rate": 0.0001, "loss": 2.0623, "step": 2963 }, { "epoch": 0.7197668771248179, "grad_norm": 0.5435699820518494, "learning_rate": 0.0001, "loss": 1.8137, "step": 2964 }, { "epoch": 0.7200097134531326, "grad_norm": 0.5561855435371399, "learning_rate": 0.0001, "loss": 1.6375, "step": 2965 }, { "epoch": 0.7202525497814473, "grad_norm": 0.5420215725898743, "learning_rate": 0.0001, "loss": 1.6225, "step": 2966 }, { "epoch": 0.720495386109762, "grad_norm": 0.5700600743293762, "learning_rate": 0.0001, "loss": 1.8171, "step": 2967 }, { "epoch": 0.7207382224380767, "grad_norm": 0.6144974827766418, "learning_rate": 0.0001, "loss": 1.8919, "step": 2968 }, { "epoch": 0.7209810587663914, "grad_norm": 0.6396198272705078, "learning_rate": 0.0001, "loss": 1.7337, "step": 2969 }, { "epoch": 0.7212238950947062, "grad_norm": 0.584147572517395, "learning_rate": 0.0001, "loss": 1.8727, "step": 2970 }, { "epoch": 0.7214667314230209, "grad_norm": 0.552257239818573, "learning_rate": 0.0001, "loss": 1.6624, "step": 2971 }, { "epoch": 0.7217095677513355, "grad_norm": 0.5884346961975098, "learning_rate": 0.0001, "loss": 1.7232, "step": 2972 }, { "epoch": 0.7219524040796503, "grad_norm": 0.5640490651130676, "learning_rate": 0.0001, "loss": 1.6607, "step": 2973 }, { "epoch": 0.722195240407965, "grad_norm": 0.5864781737327576, "learning_rate": 0.0001, "loss": 1.6849, "step": 2974 }, { "epoch": 0.7224380767362798, "grad_norm": 0.5875230431556702, "learning_rate": 0.0001, "loss": 1.8265, "step": 2975 }, { "epoch": 0.7226809130645945, "grad_norm": 0.6013203263282776, "learning_rate": 0.0001, "loss": 1.653, "step": 2976 }, { "epoch": 0.7229237493929092, "grad_norm": 0.5607584118843079, "learning_rate": 0.0001, "loss": 1.7218, "step": 2977 }, { "epoch": 0.7231665857212239, "grad_norm": 0.6358622908592224, "learning_rate": 0.0001, "loss": 1.7346, "step": 2978 }, { "epoch": 0.7234094220495386, "grad_norm": 0.5818342566490173, "learning_rate": 0.0001, "loss": 1.7539, "step": 2979 }, { "epoch": 0.7236522583778533, "grad_norm": 0.5819612741470337, "learning_rate": 0.0001, "loss": 1.6859, "step": 2980 }, { "epoch": 0.7238950947061681, "grad_norm": 0.5618541836738586, "learning_rate": 0.0001, "loss": 1.6193, "step": 2981 }, { "epoch": 0.7241379310344828, "grad_norm": 0.5999396443367004, "learning_rate": 0.0001, "loss": 1.8407, "step": 2982 }, { "epoch": 0.7243807673627974, "grad_norm": 0.5402510762214661, "learning_rate": 0.0001, "loss": 1.688, "step": 2983 }, { "epoch": 0.7246236036911122, "grad_norm": 0.6189697980880737, "learning_rate": 0.0001, "loss": 1.7554, "step": 2984 }, { "epoch": 0.7248664400194269, "grad_norm": 0.6344261169433594, "learning_rate": 0.0001, "loss": 1.6716, "step": 2985 }, { "epoch": 0.7251092763477416, "grad_norm": 0.6194679737091064, "learning_rate": 0.0001, "loss": 1.887, "step": 2986 }, { "epoch": 0.7253521126760564, "grad_norm": 0.6036260724067688, "learning_rate": 0.0001, "loss": 1.7307, "step": 2987 }, { "epoch": 0.725594949004371, "grad_norm": 0.5975467562675476, "learning_rate": 0.0001, "loss": 1.6809, "step": 2988 }, { "epoch": 0.7258377853326857, "grad_norm": 0.5954137444496155, "learning_rate": 0.0001, "loss": 1.724, "step": 2989 }, { "epoch": 0.7260806216610005, "grad_norm": 0.6179779171943665, "learning_rate": 0.0001, "loss": 1.8012, "step": 2990 }, { "epoch": 0.7263234579893152, "grad_norm": 0.5949034690856934, "learning_rate": 0.0001, "loss": 1.7704, "step": 2991 }, { "epoch": 0.7265662943176299, "grad_norm": 0.5713704228401184, "learning_rate": 0.0001, "loss": 1.6558, "step": 2992 }, { "epoch": 0.7268091306459447, "grad_norm": 0.5797938108444214, "learning_rate": 0.0001, "loss": 1.6509, "step": 2993 }, { "epoch": 0.7270519669742593, "grad_norm": 0.6108956933021545, "learning_rate": 0.0001, "loss": 1.7338, "step": 2994 }, { "epoch": 0.727294803302574, "grad_norm": 0.6109453439712524, "learning_rate": 0.0001, "loss": 1.6392, "step": 2995 }, { "epoch": 0.7275376396308888, "grad_norm": 0.5588051676750183, "learning_rate": 0.0001, "loss": 1.5274, "step": 2996 }, { "epoch": 0.7277804759592035, "grad_norm": 0.5878009796142578, "learning_rate": 0.0001, "loss": 1.7112, "step": 2997 }, { "epoch": 0.7280233122875183, "grad_norm": 0.605579674243927, "learning_rate": 0.0001, "loss": 1.7491, "step": 2998 }, { "epoch": 0.7282661486158329, "grad_norm": 0.5869989395141602, "learning_rate": 0.0001, "loss": 1.7755, "step": 2999 }, { "epoch": 0.7285089849441476, "grad_norm": 0.5857266783714294, "learning_rate": 0.0001, "loss": 1.8855, "step": 3000 }, { "epoch": 0.7287518212724624, "grad_norm": 0.5307328104972839, "learning_rate": 0.0001, "loss": 1.6346, "step": 3001 }, { "epoch": 0.7289946576007771, "grad_norm": 0.5867645740509033, "learning_rate": 0.0001, "loss": 1.7953, "step": 3002 }, { "epoch": 0.7292374939290918, "grad_norm": 0.5966058373451233, "learning_rate": 0.0001, "loss": 1.8166, "step": 3003 }, { "epoch": 0.7294803302574066, "grad_norm": 0.5763681530952454, "learning_rate": 0.0001, "loss": 1.6098, "step": 3004 }, { "epoch": 0.7297231665857212, "grad_norm": 0.5982712507247925, "learning_rate": 0.0001, "loss": 1.7192, "step": 3005 }, { "epoch": 0.7299660029140359, "grad_norm": 0.5639309883117676, "learning_rate": 0.0001, "loss": 1.6833, "step": 3006 }, { "epoch": 0.7302088392423507, "grad_norm": 0.6086863279342651, "learning_rate": 0.0001, "loss": 1.8071, "step": 3007 }, { "epoch": 0.7304516755706654, "grad_norm": 0.6218518614768982, "learning_rate": 0.0001, "loss": 1.6636, "step": 3008 }, { "epoch": 0.73069451189898, "grad_norm": 0.6090829372406006, "learning_rate": 0.0001, "loss": 1.915, "step": 3009 }, { "epoch": 0.7309373482272948, "grad_norm": 0.6307433247566223, "learning_rate": 0.0001, "loss": 1.8065, "step": 3010 }, { "epoch": 0.7311801845556095, "grad_norm": 0.5762611627578735, "learning_rate": 0.0001, "loss": 1.8111, "step": 3011 }, { "epoch": 0.7314230208839242, "grad_norm": 0.5527024865150452, "learning_rate": 0.0001, "loss": 1.513, "step": 3012 }, { "epoch": 0.731665857212239, "grad_norm": 0.5604642033576965, "learning_rate": 0.0001, "loss": 1.6819, "step": 3013 }, { "epoch": 0.7319086935405537, "grad_norm": 0.5796170830726624, "learning_rate": 0.0001, "loss": 1.5912, "step": 3014 }, { "epoch": 0.7321515298688683, "grad_norm": 0.5859696269035339, "learning_rate": 0.0001, "loss": 1.7098, "step": 3015 }, { "epoch": 0.7323943661971831, "grad_norm": 0.5520855188369751, "learning_rate": 0.0001, "loss": 1.6901, "step": 3016 }, { "epoch": 0.7326372025254978, "grad_norm": 0.5656354427337646, "learning_rate": 0.0001, "loss": 1.5327, "step": 3017 }, { "epoch": 0.7328800388538125, "grad_norm": 0.5668532848358154, "learning_rate": 0.0001, "loss": 1.7174, "step": 3018 }, { "epoch": 0.7331228751821273, "grad_norm": 0.606231689453125, "learning_rate": 0.0001, "loss": 1.5847, "step": 3019 }, { "epoch": 0.7333657115104419, "grad_norm": 0.5504002571105957, "learning_rate": 0.0001, "loss": 1.6185, "step": 3020 }, { "epoch": 0.7336085478387567, "grad_norm": 0.6125749349594116, "learning_rate": 0.0001, "loss": 1.7903, "step": 3021 }, { "epoch": 0.7338513841670714, "grad_norm": 0.6384183168411255, "learning_rate": 0.0001, "loss": 1.8925, "step": 3022 }, { "epoch": 0.7340942204953861, "grad_norm": 0.5936228036880493, "learning_rate": 0.0001, "loss": 1.666, "step": 3023 }, { "epoch": 0.7343370568237009, "grad_norm": 0.5869163870811462, "learning_rate": 0.0001, "loss": 1.7754, "step": 3024 }, { "epoch": 0.7345798931520156, "grad_norm": 0.6066444516181946, "learning_rate": 0.0001, "loss": 1.769, "step": 3025 }, { "epoch": 0.7348227294803302, "grad_norm": 0.5754687190055847, "learning_rate": 0.0001, "loss": 1.6996, "step": 3026 }, { "epoch": 0.735065565808645, "grad_norm": 0.5544506311416626, "learning_rate": 0.0001, "loss": 1.4816, "step": 3027 }, { "epoch": 0.7353084021369597, "grad_norm": 0.5787808895111084, "learning_rate": 0.0001, "loss": 1.6657, "step": 3028 }, { "epoch": 0.7355512384652744, "grad_norm": 0.5914311408996582, "learning_rate": 0.0001, "loss": 1.8207, "step": 3029 }, { "epoch": 0.7357940747935892, "grad_norm": 0.5448651313781738, "learning_rate": 0.0001, "loss": 1.5538, "step": 3030 }, { "epoch": 0.7360369111219038, "grad_norm": 0.572437584400177, "learning_rate": 0.0001, "loss": 1.7829, "step": 3031 }, { "epoch": 0.7362797474502185, "grad_norm": 0.6246130466461182, "learning_rate": 0.0001, "loss": 1.7405, "step": 3032 }, { "epoch": 0.7365225837785333, "grad_norm": 0.5604763627052307, "learning_rate": 0.0001, "loss": 1.6263, "step": 3033 }, { "epoch": 0.736765420106848, "grad_norm": 0.5959675312042236, "learning_rate": 0.0001, "loss": 1.7005, "step": 3034 }, { "epoch": 0.7370082564351627, "grad_norm": 0.5705462694168091, "learning_rate": 0.0001, "loss": 1.6534, "step": 3035 }, { "epoch": 0.7372510927634774, "grad_norm": 0.6315643787384033, "learning_rate": 0.0001, "loss": 1.7861, "step": 3036 }, { "epoch": 0.7374939290917921, "grad_norm": 0.5820364356040955, "learning_rate": 0.0001, "loss": 1.6539, "step": 3037 }, { "epoch": 0.7377367654201068, "grad_norm": 0.5807769894599915, "learning_rate": 0.0001, "loss": 1.7005, "step": 3038 }, { "epoch": 0.7379796017484216, "grad_norm": 0.5841823220252991, "learning_rate": 0.0001, "loss": 1.642, "step": 3039 }, { "epoch": 0.7382224380767363, "grad_norm": 0.5556533932685852, "learning_rate": 0.0001, "loss": 1.6971, "step": 3040 }, { "epoch": 0.7384652744050509, "grad_norm": 0.5819579362869263, "learning_rate": 0.0001, "loss": 1.7295, "step": 3041 }, { "epoch": 0.7387081107333657, "grad_norm": 0.6066816449165344, "learning_rate": 0.0001, "loss": 1.8619, "step": 3042 }, { "epoch": 0.7389509470616804, "grad_norm": 0.5626749396324158, "learning_rate": 0.0001, "loss": 1.696, "step": 3043 }, { "epoch": 0.7391937833899952, "grad_norm": 0.5701555609703064, "learning_rate": 0.0001, "loss": 1.7388, "step": 3044 }, { "epoch": 0.7394366197183099, "grad_norm": 0.6135877966880798, "learning_rate": 0.0001, "loss": 1.7617, "step": 3045 }, { "epoch": 0.7396794560466246, "grad_norm": 0.6123836040496826, "learning_rate": 0.0001, "loss": 1.7067, "step": 3046 }, { "epoch": 0.7399222923749393, "grad_norm": 0.586012065410614, "learning_rate": 0.0001, "loss": 1.6444, "step": 3047 }, { "epoch": 0.740165128703254, "grad_norm": 0.6175816655158997, "learning_rate": 0.0001, "loss": 1.6807, "step": 3048 }, { "epoch": 0.7404079650315687, "grad_norm": 0.6169615387916565, "learning_rate": 0.0001, "loss": 1.7067, "step": 3049 }, { "epoch": 0.7406508013598835, "grad_norm": 0.5519340634346008, "learning_rate": 0.0001, "loss": 1.7388, "step": 3050 }, { "epoch": 0.7408936376881982, "grad_norm": 0.5570865869522095, "learning_rate": 0.0001, "loss": 1.7541, "step": 3051 }, { "epoch": 0.7411364740165128, "grad_norm": 0.56632399559021, "learning_rate": 0.0001, "loss": 1.7458, "step": 3052 }, { "epoch": 0.7413793103448276, "grad_norm": 0.6042296290397644, "learning_rate": 0.0001, "loss": 1.7227, "step": 3053 }, { "epoch": 0.7416221466731423, "grad_norm": 0.633659303188324, "learning_rate": 0.0001, "loss": 1.8576, "step": 3054 }, { "epoch": 0.741864983001457, "grad_norm": 0.6423869729042053, "learning_rate": 0.0001, "loss": 1.7807, "step": 3055 }, { "epoch": 0.7421078193297718, "grad_norm": 0.5772718787193298, "learning_rate": 0.0001, "loss": 1.62, "step": 3056 }, { "epoch": 0.7423506556580864, "grad_norm": 0.5670206546783447, "learning_rate": 0.0001, "loss": 1.6172, "step": 3057 }, { "epoch": 0.7425934919864011, "grad_norm": 0.5802311301231384, "learning_rate": 0.0001, "loss": 1.6043, "step": 3058 }, { "epoch": 0.7428363283147159, "grad_norm": 0.6183196306228638, "learning_rate": 0.0001, "loss": 1.6892, "step": 3059 }, { "epoch": 0.7430791646430306, "grad_norm": 0.5805888772010803, "learning_rate": 0.0001, "loss": 1.91, "step": 3060 }, { "epoch": 0.7433220009713453, "grad_norm": 0.5857672095298767, "learning_rate": 0.0001, "loss": 1.7233, "step": 3061 }, { "epoch": 0.74356483729966, "grad_norm": 0.6274641156196594, "learning_rate": 0.0001, "loss": 1.7267, "step": 3062 }, { "epoch": 0.7438076736279747, "grad_norm": 0.5870610475540161, "learning_rate": 0.0001, "loss": 1.5864, "step": 3063 }, { "epoch": 0.7440505099562894, "grad_norm": 0.5928947925567627, "learning_rate": 0.0001, "loss": 1.7029, "step": 3064 }, { "epoch": 0.7442933462846042, "grad_norm": 0.6157515645027161, "learning_rate": 0.0001, "loss": 1.6738, "step": 3065 }, { "epoch": 0.7445361826129189, "grad_norm": 0.5869420170783997, "learning_rate": 0.0001, "loss": 1.7315, "step": 3066 }, { "epoch": 0.7447790189412337, "grad_norm": 0.6070082783699036, "learning_rate": 0.0001, "loss": 1.6156, "step": 3067 }, { "epoch": 0.7450218552695483, "grad_norm": 0.5500762462615967, "learning_rate": 0.0001, "loss": 1.5897, "step": 3068 }, { "epoch": 0.745264691597863, "grad_norm": 0.5982913374900818, "learning_rate": 0.0001, "loss": 1.7079, "step": 3069 }, { "epoch": 0.7455075279261778, "grad_norm": 0.5838366150856018, "learning_rate": 0.0001, "loss": 1.7077, "step": 3070 }, { "epoch": 0.7457503642544925, "grad_norm": 0.5614213943481445, "learning_rate": 0.0001, "loss": 1.5284, "step": 3071 }, { "epoch": 0.7459932005828072, "grad_norm": 0.6322265863418579, "learning_rate": 0.0001, "loss": 1.7189, "step": 3072 }, { "epoch": 0.746236036911122, "grad_norm": 0.594893753528595, "learning_rate": 0.0001, "loss": 1.8296, "step": 3073 }, { "epoch": 0.7464788732394366, "grad_norm": 0.6140832901000977, "learning_rate": 0.0001, "loss": 1.6699, "step": 3074 }, { "epoch": 0.7467217095677513, "grad_norm": 0.6009077429771423, "learning_rate": 0.0001, "loss": 1.6844, "step": 3075 }, { "epoch": 0.7469645458960661, "grad_norm": 0.608458399772644, "learning_rate": 0.0001, "loss": 1.723, "step": 3076 }, { "epoch": 0.7472073822243808, "grad_norm": 0.5779408812522888, "learning_rate": 0.0001, "loss": 1.7067, "step": 3077 }, { "epoch": 0.7474502185526954, "grad_norm": 0.5849694609642029, "learning_rate": 0.0001, "loss": 1.6334, "step": 3078 }, { "epoch": 0.7476930548810102, "grad_norm": 0.6309943199157715, "learning_rate": 0.0001, "loss": 1.7543, "step": 3079 }, { "epoch": 0.7479358912093249, "grad_norm": 0.6028780937194824, "learning_rate": 0.0001, "loss": 1.7106, "step": 3080 }, { "epoch": 0.7481787275376396, "grad_norm": 0.6136448979377747, "learning_rate": 0.0001, "loss": 1.7322, "step": 3081 }, { "epoch": 0.7484215638659544, "grad_norm": 0.6148863434791565, "learning_rate": 0.0001, "loss": 1.733, "step": 3082 }, { "epoch": 0.748664400194269, "grad_norm": 0.5814813375473022, "learning_rate": 0.0001, "loss": 1.6061, "step": 3083 }, { "epoch": 0.7489072365225837, "grad_norm": 0.6196994185447693, "learning_rate": 0.0001, "loss": 1.7411, "step": 3084 }, { "epoch": 0.7491500728508985, "grad_norm": 0.5938546657562256, "learning_rate": 0.0001, "loss": 1.6446, "step": 3085 }, { "epoch": 0.7493929091792132, "grad_norm": 0.6048294305801392, "learning_rate": 0.0001, "loss": 1.7233, "step": 3086 }, { "epoch": 0.7496357455075279, "grad_norm": 0.6158680319786072, "learning_rate": 0.0001, "loss": 1.765, "step": 3087 }, { "epoch": 0.7498785818358427, "grad_norm": 0.6024149060249329, "learning_rate": 0.0001, "loss": 1.7101, "step": 3088 }, { "epoch": 0.7501214181641573, "grad_norm": 0.6003633141517639, "learning_rate": 0.0001, "loss": 1.8116, "step": 3089 }, { "epoch": 0.7503642544924721, "grad_norm": 0.5719137191772461, "learning_rate": 0.0001, "loss": 1.8171, "step": 3090 }, { "epoch": 0.7506070908207868, "grad_norm": 0.5652117729187012, "learning_rate": 0.0001, "loss": 1.7708, "step": 3091 }, { "epoch": 0.7508499271491015, "grad_norm": 0.5504488348960876, "learning_rate": 0.0001, "loss": 1.6541, "step": 3092 }, { "epoch": 0.7510927634774163, "grad_norm": 0.6070501804351807, "learning_rate": 0.0001, "loss": 1.6693, "step": 3093 }, { "epoch": 0.751335599805731, "grad_norm": 0.6052151918411255, "learning_rate": 0.0001, "loss": 1.7172, "step": 3094 }, { "epoch": 0.7515784361340456, "grad_norm": 0.6147742867469788, "learning_rate": 0.0001, "loss": 1.7188, "step": 3095 }, { "epoch": 0.7518212724623604, "grad_norm": 0.5747918486595154, "learning_rate": 0.0001, "loss": 1.5887, "step": 3096 }, { "epoch": 0.7520641087906751, "grad_norm": 0.5973472595214844, "learning_rate": 0.0001, "loss": 1.7014, "step": 3097 }, { "epoch": 0.7523069451189898, "grad_norm": 0.6190682053565979, "learning_rate": 0.0001, "loss": 1.7363, "step": 3098 }, { "epoch": 0.7525497814473046, "grad_norm": 0.5895213484764099, "learning_rate": 0.0001, "loss": 1.8408, "step": 3099 }, { "epoch": 0.7527926177756192, "grad_norm": 0.6008792519569397, "learning_rate": 0.0001, "loss": 1.8563, "step": 3100 }, { "epoch": 0.7530354541039339, "grad_norm": 5.0266032218933105, "learning_rate": 0.0001, "loss": 1.5559, "step": 3101 }, { "epoch": 0.7532782904322487, "grad_norm": 0.5932336449623108, "learning_rate": 0.0001, "loss": 1.7321, "step": 3102 }, { "epoch": 0.7535211267605634, "grad_norm": 0.5837019085884094, "learning_rate": 0.0001, "loss": 1.7106, "step": 3103 }, { "epoch": 0.753763963088878, "grad_norm": 0.5850164890289307, "learning_rate": 0.0001, "loss": 1.7852, "step": 3104 }, { "epoch": 0.7540067994171928, "grad_norm": 0.5760244131088257, "learning_rate": 0.0001, "loss": 1.7942, "step": 3105 }, { "epoch": 0.7542496357455075, "grad_norm": 0.6135173439979553, "learning_rate": 0.0001, "loss": 1.9088, "step": 3106 }, { "epoch": 0.7544924720738222, "grad_norm": 0.597471296787262, "learning_rate": 0.0001, "loss": 1.6574, "step": 3107 }, { "epoch": 0.754735308402137, "grad_norm": 0.606307864189148, "learning_rate": 0.0001, "loss": 1.8266, "step": 3108 }, { "epoch": 0.7549781447304517, "grad_norm": 0.5847671031951904, "learning_rate": 0.0001, "loss": 1.6532, "step": 3109 }, { "epoch": 0.7552209810587663, "grad_norm": 0.5444035530090332, "learning_rate": 0.0001, "loss": 1.6563, "step": 3110 }, { "epoch": 0.7554638173870811, "grad_norm": 0.5614792704582214, "learning_rate": 0.0001, "loss": 1.7415, "step": 3111 }, { "epoch": 0.7557066537153958, "grad_norm": 0.5745176672935486, "learning_rate": 0.0001, "loss": 1.6814, "step": 3112 }, { "epoch": 0.7559494900437106, "grad_norm": 0.5440202355384827, "learning_rate": 0.0001, "loss": 1.7116, "step": 3113 }, { "epoch": 0.7561923263720253, "grad_norm": 0.5792099237442017, "learning_rate": 0.0001, "loss": 1.8194, "step": 3114 }, { "epoch": 0.75643516270034, "grad_norm": 0.5704084634780884, "learning_rate": 0.0001, "loss": 1.5595, "step": 3115 }, { "epoch": 0.7566779990286547, "grad_norm": 0.6286868453025818, "learning_rate": 0.0001, "loss": 1.8912, "step": 3116 }, { "epoch": 0.7569208353569694, "grad_norm": 0.620141863822937, "learning_rate": 0.0001, "loss": 1.7154, "step": 3117 }, { "epoch": 0.7571636716852841, "grad_norm": 0.5992688536643982, "learning_rate": 0.0001, "loss": 1.7075, "step": 3118 }, { "epoch": 0.7574065080135989, "grad_norm": 0.5898826122283936, "learning_rate": 0.0001, "loss": 1.7085, "step": 3119 }, { "epoch": 0.7576493443419136, "grad_norm": 0.5879241228103638, "learning_rate": 0.0001, "loss": 1.6773, "step": 3120 }, { "epoch": 0.7578921806702282, "grad_norm": 0.5776136517524719, "learning_rate": 0.0001, "loss": 1.6314, "step": 3121 }, { "epoch": 0.758135016998543, "grad_norm": 0.5695837736129761, "learning_rate": 0.0001, "loss": 1.6349, "step": 3122 }, { "epoch": 0.7583778533268577, "grad_norm": 0.5705963969230652, "learning_rate": 0.0001, "loss": 1.6673, "step": 3123 }, { "epoch": 0.7586206896551724, "grad_norm": 0.601582407951355, "learning_rate": 0.0001, "loss": 1.7189, "step": 3124 }, { "epoch": 0.7588635259834872, "grad_norm": 0.6535794138908386, "learning_rate": 0.0001, "loss": 1.7291, "step": 3125 }, { "epoch": 0.7591063623118018, "grad_norm": 0.5979652404785156, "learning_rate": 0.0001, "loss": 1.758, "step": 3126 }, { "epoch": 0.7593491986401165, "grad_norm": 0.5911663174629211, "learning_rate": 0.0001, "loss": 1.8147, "step": 3127 }, { "epoch": 0.7595920349684313, "grad_norm": 0.5932319760322571, "learning_rate": 0.0001, "loss": 1.8987, "step": 3128 }, { "epoch": 0.759834871296746, "grad_norm": 0.6082956194877625, "learning_rate": 0.0001, "loss": 1.758, "step": 3129 }, { "epoch": 0.7600777076250607, "grad_norm": 0.5703206658363342, "learning_rate": 0.0001, "loss": 1.6945, "step": 3130 }, { "epoch": 0.7603205439533754, "grad_norm": 0.5616307854652405, "learning_rate": 0.0001, "loss": 1.6786, "step": 3131 }, { "epoch": 0.7605633802816901, "grad_norm": 0.5814014673233032, "learning_rate": 0.0001, "loss": 1.7443, "step": 3132 }, { "epoch": 0.7608062166100048, "grad_norm": 0.5997304916381836, "learning_rate": 0.0001, "loss": 1.7166, "step": 3133 }, { "epoch": 0.7610490529383196, "grad_norm": 0.6064666509628296, "learning_rate": 0.0001, "loss": 1.9331, "step": 3134 }, { "epoch": 0.7612918892666343, "grad_norm": 0.5871558785438538, "learning_rate": 0.0001, "loss": 1.6392, "step": 3135 }, { "epoch": 0.7615347255949491, "grad_norm": 0.5561057925224304, "learning_rate": 0.0001, "loss": 1.6142, "step": 3136 }, { "epoch": 0.7617775619232637, "grad_norm": 0.6017075181007385, "learning_rate": 0.0001, "loss": 1.7286, "step": 3137 }, { "epoch": 0.7620203982515784, "grad_norm": 0.5785053372383118, "learning_rate": 0.0001, "loss": 1.7708, "step": 3138 }, { "epoch": 0.7622632345798932, "grad_norm": 0.5785541534423828, "learning_rate": 0.0001, "loss": 1.7217, "step": 3139 }, { "epoch": 0.7625060709082079, "grad_norm": 0.6177810430526733, "learning_rate": 0.0001, "loss": 1.8104, "step": 3140 }, { "epoch": 0.7627489072365226, "grad_norm": 0.6104243993759155, "learning_rate": 0.0001, "loss": 1.7221, "step": 3141 }, { "epoch": 0.7629917435648373, "grad_norm": 0.5768064260482788, "learning_rate": 0.0001, "loss": 1.761, "step": 3142 }, { "epoch": 0.763234579893152, "grad_norm": 0.6506632566452026, "learning_rate": 0.0001, "loss": 1.682, "step": 3143 }, { "epoch": 0.7634774162214667, "grad_norm": 0.6132780313491821, "learning_rate": 0.0001, "loss": 1.7527, "step": 3144 }, { "epoch": 0.7637202525497815, "grad_norm": 0.5931320190429688, "learning_rate": 0.0001, "loss": 1.83, "step": 3145 }, { "epoch": 0.7639630888780962, "grad_norm": 0.5766931176185608, "learning_rate": 0.0001, "loss": 1.6565, "step": 3146 }, { "epoch": 0.7642059252064108, "grad_norm": 0.5978641510009766, "learning_rate": 0.0001, "loss": 1.661, "step": 3147 }, { "epoch": 0.7644487615347256, "grad_norm": 0.5475472807884216, "learning_rate": 0.0001, "loss": 1.7076, "step": 3148 }, { "epoch": 0.7646915978630403, "grad_norm": 0.592376172542572, "learning_rate": 0.0001, "loss": 1.7808, "step": 3149 }, { "epoch": 0.764934434191355, "grad_norm": 0.6536989808082581, "learning_rate": 0.0001, "loss": 1.753, "step": 3150 }, { "epoch": 0.7651772705196698, "grad_norm": 0.5901771187782288, "learning_rate": 0.0001, "loss": 1.7637, "step": 3151 }, { "epoch": 0.7654201068479844, "grad_norm": 0.599584698677063, "learning_rate": 0.0001, "loss": 1.7152, "step": 3152 }, { "epoch": 0.7656629431762991, "grad_norm": 0.571835994720459, "learning_rate": 0.0001, "loss": 1.6306, "step": 3153 }, { "epoch": 0.7659057795046139, "grad_norm": 0.5579686760902405, "learning_rate": 0.0001, "loss": 1.6602, "step": 3154 }, { "epoch": 0.7661486158329286, "grad_norm": 0.59943026304245, "learning_rate": 0.0001, "loss": 1.7429, "step": 3155 }, { "epoch": 0.7663914521612433, "grad_norm": 0.6282011866569519, "learning_rate": 0.0001, "loss": 1.7826, "step": 3156 }, { "epoch": 0.7666342884895581, "grad_norm": 0.5895368456840515, "learning_rate": 0.0001, "loss": 1.5595, "step": 3157 }, { "epoch": 0.7668771248178727, "grad_norm": 0.5622402429580688, "learning_rate": 0.0001, "loss": 1.6426, "step": 3158 }, { "epoch": 0.7671199611461875, "grad_norm": 0.6002911329269409, "learning_rate": 0.0001, "loss": 1.778, "step": 3159 }, { "epoch": 0.7673627974745022, "grad_norm": 0.6253058910369873, "learning_rate": 0.0001, "loss": 1.6909, "step": 3160 }, { "epoch": 0.7676056338028169, "grad_norm": 0.6376633644104004, "learning_rate": 0.0001, "loss": 1.6844, "step": 3161 }, { "epoch": 0.7678484701311317, "grad_norm": 0.6060060262680054, "learning_rate": 0.0001, "loss": 1.7137, "step": 3162 }, { "epoch": 0.7680913064594463, "grad_norm": 0.604957103729248, "learning_rate": 0.0001, "loss": 1.6954, "step": 3163 }, { "epoch": 0.768334142787761, "grad_norm": 0.6303216814994812, "learning_rate": 0.0001, "loss": 1.8798, "step": 3164 }, { "epoch": 0.7685769791160758, "grad_norm": 0.5767795443534851, "learning_rate": 0.0001, "loss": 1.8241, "step": 3165 }, { "epoch": 0.7688198154443905, "grad_norm": 0.5388797521591187, "learning_rate": 0.0001, "loss": 1.6678, "step": 3166 }, { "epoch": 0.7690626517727052, "grad_norm": 0.5943977236747742, "learning_rate": 0.0001, "loss": 1.7706, "step": 3167 }, { "epoch": 0.76930548810102, "grad_norm": 0.5794751644134521, "learning_rate": 0.0001, "loss": 1.7462, "step": 3168 }, { "epoch": 0.7695483244293346, "grad_norm": 0.5764420628547668, "learning_rate": 0.0001, "loss": 1.6719, "step": 3169 }, { "epoch": 0.7697911607576493, "grad_norm": 0.5977728962898254, "learning_rate": 0.0001, "loss": 1.7047, "step": 3170 }, { "epoch": 0.7700339970859641, "grad_norm": 0.6072218418121338, "learning_rate": 0.0001, "loss": 1.7673, "step": 3171 }, { "epoch": 0.7702768334142788, "grad_norm": 0.6105883121490479, "learning_rate": 0.0001, "loss": 1.7475, "step": 3172 }, { "epoch": 0.7705196697425934, "grad_norm": 0.5928831696510315, "learning_rate": 0.0001, "loss": 1.6449, "step": 3173 }, { "epoch": 0.7707625060709082, "grad_norm": 0.5637102723121643, "learning_rate": 0.0001, "loss": 1.6898, "step": 3174 }, { "epoch": 0.7710053423992229, "grad_norm": 0.5758238434791565, "learning_rate": 0.0001, "loss": 1.7284, "step": 3175 }, { "epoch": 0.7712481787275376, "grad_norm": 0.589185893535614, "learning_rate": 0.0001, "loss": 1.7679, "step": 3176 }, { "epoch": 0.7714910150558524, "grad_norm": 0.6024256348609924, "learning_rate": 0.0001, "loss": 1.7998, "step": 3177 }, { "epoch": 0.7717338513841671, "grad_norm": 0.6078524589538574, "learning_rate": 0.0001, "loss": 1.6769, "step": 3178 }, { "epoch": 0.7719766877124817, "grad_norm": 0.5936061143875122, "learning_rate": 0.0001, "loss": 1.7069, "step": 3179 }, { "epoch": 0.7722195240407965, "grad_norm": 0.6246563792228699, "learning_rate": 0.0001, "loss": 1.6059, "step": 3180 }, { "epoch": 0.7724623603691112, "grad_norm": 0.565122663974762, "learning_rate": 0.0001, "loss": 1.6007, "step": 3181 }, { "epoch": 0.772705196697426, "grad_norm": 0.6005831956863403, "learning_rate": 0.0001, "loss": 1.7131, "step": 3182 }, { "epoch": 0.7729480330257407, "grad_norm": 0.5594892501831055, "learning_rate": 0.0001, "loss": 1.6375, "step": 3183 }, { "epoch": 0.7731908693540553, "grad_norm": 0.6021163463592529, "learning_rate": 0.0001, "loss": 1.8812, "step": 3184 }, { "epoch": 0.7734337056823701, "grad_norm": 0.5833166837692261, "learning_rate": 0.0001, "loss": 1.6699, "step": 3185 }, { "epoch": 0.7736765420106848, "grad_norm": 0.5965452790260315, "learning_rate": 0.0001, "loss": 1.7835, "step": 3186 }, { "epoch": 0.7739193783389995, "grad_norm": 0.581089973449707, "learning_rate": 0.0001, "loss": 1.6613, "step": 3187 }, { "epoch": 0.7741622146673143, "grad_norm": 0.5958520770072937, "learning_rate": 0.0001, "loss": 1.5596, "step": 3188 }, { "epoch": 0.774405050995629, "grad_norm": 0.6011066436767578, "learning_rate": 0.0001, "loss": 1.7536, "step": 3189 }, { "epoch": 0.7746478873239436, "grad_norm": 0.5939875245094299, "learning_rate": 0.0001, "loss": 1.7563, "step": 3190 }, { "epoch": 0.7748907236522584, "grad_norm": 0.5792884230613708, "learning_rate": 0.0001, "loss": 1.6958, "step": 3191 }, { "epoch": 0.7751335599805731, "grad_norm": 0.582841694355011, "learning_rate": 0.0001, "loss": 1.7502, "step": 3192 }, { "epoch": 0.7753763963088878, "grad_norm": 0.6032854914665222, "learning_rate": 0.0001, "loss": 1.9189, "step": 3193 }, { "epoch": 0.7756192326372026, "grad_norm": 0.5822592377662659, "learning_rate": 0.0001, "loss": 1.5918, "step": 3194 }, { "epoch": 0.7758620689655172, "grad_norm": 0.5904735922813416, "learning_rate": 0.0001, "loss": 1.7974, "step": 3195 }, { "epoch": 0.7761049052938319, "grad_norm": 0.5985095500946045, "learning_rate": 0.0001, "loss": 1.8372, "step": 3196 }, { "epoch": 0.7763477416221467, "grad_norm": 0.5882258415222168, "learning_rate": 0.0001, "loss": 1.8258, "step": 3197 }, { "epoch": 0.7765905779504614, "grad_norm": 0.6293593049049377, "learning_rate": 0.0001, "loss": 1.7996, "step": 3198 }, { "epoch": 0.7768334142787761, "grad_norm": 0.5894924402236938, "learning_rate": 0.0001, "loss": 1.7175, "step": 3199 }, { "epoch": 0.7770762506070908, "grad_norm": 0.5985170006752014, "learning_rate": 0.0001, "loss": 1.7009, "step": 3200 }, { "epoch": 0.7773190869354055, "grad_norm": 0.609933614730835, "learning_rate": 0.0001, "loss": 1.7568, "step": 3201 }, { "epoch": 0.7775619232637202, "grad_norm": 0.6414185166358948, "learning_rate": 0.0001, "loss": 1.8863, "step": 3202 }, { "epoch": 0.777804759592035, "grad_norm": 0.6187397837638855, "learning_rate": 0.0001, "loss": 1.8666, "step": 3203 }, { "epoch": 0.7780475959203497, "grad_norm": 0.5837206840515137, "learning_rate": 0.0001, "loss": 1.8421, "step": 3204 }, { "epoch": 0.7782904322486645, "grad_norm": 0.5552575588226318, "learning_rate": 0.0001, "loss": 1.6231, "step": 3205 }, { "epoch": 0.7785332685769791, "grad_norm": 0.5835022926330566, "learning_rate": 0.0001, "loss": 1.7375, "step": 3206 }, { "epoch": 0.7787761049052938, "grad_norm": 0.6458723545074463, "learning_rate": 0.0001, "loss": 1.8088, "step": 3207 }, { "epoch": 0.7790189412336086, "grad_norm": 0.6067644953727722, "learning_rate": 0.0001, "loss": 1.7628, "step": 3208 }, { "epoch": 0.7792617775619233, "grad_norm": 0.6323834657669067, "learning_rate": 0.0001, "loss": 1.8119, "step": 3209 }, { "epoch": 0.779504613890238, "grad_norm": 0.648609459400177, "learning_rate": 0.0001, "loss": 1.7833, "step": 3210 }, { "epoch": 0.7797474502185527, "grad_norm": 0.60872882604599, "learning_rate": 0.0001, "loss": 1.6959, "step": 3211 }, { "epoch": 0.7799902865468674, "grad_norm": 0.6171422600746155, "learning_rate": 0.0001, "loss": 1.7076, "step": 3212 }, { "epoch": 0.7802331228751821, "grad_norm": 0.5864462852478027, "learning_rate": 0.0001, "loss": 1.6012, "step": 3213 }, { "epoch": 0.7804759592034969, "grad_norm": 0.6133025288581848, "learning_rate": 0.0001, "loss": 1.7297, "step": 3214 }, { "epoch": 0.7807187955318116, "grad_norm": 0.617095410823822, "learning_rate": 0.0001, "loss": 1.7052, "step": 3215 }, { "epoch": 0.7809616318601262, "grad_norm": 0.6132461428642273, "learning_rate": 0.0001, "loss": 1.6343, "step": 3216 }, { "epoch": 0.781204468188441, "grad_norm": 0.6561414003372192, "learning_rate": 0.0001, "loss": 1.9216, "step": 3217 }, { "epoch": 0.7814473045167557, "grad_norm": 0.6122778058052063, "learning_rate": 0.0001, "loss": 1.7763, "step": 3218 }, { "epoch": 0.7816901408450704, "grad_norm": 0.6221407055854797, "learning_rate": 0.0001, "loss": 1.5273, "step": 3219 }, { "epoch": 0.7819329771733852, "grad_norm": 0.6318554282188416, "learning_rate": 0.0001, "loss": 1.6079, "step": 3220 }, { "epoch": 0.7821758135016998, "grad_norm": 0.5872081518173218, "learning_rate": 0.0001, "loss": 1.6556, "step": 3221 }, { "epoch": 0.7824186498300145, "grad_norm": 0.6384358406066895, "learning_rate": 0.0001, "loss": 1.7984, "step": 3222 }, { "epoch": 0.7826614861583293, "grad_norm": 0.6401801109313965, "learning_rate": 0.0001, "loss": 1.6671, "step": 3223 }, { "epoch": 0.782904322486644, "grad_norm": 0.5717194676399231, "learning_rate": 0.0001, "loss": 1.6217, "step": 3224 }, { "epoch": 0.7831471588149587, "grad_norm": 0.6136575937271118, "learning_rate": 0.0001, "loss": 1.7576, "step": 3225 }, { "epoch": 0.7833899951432735, "grad_norm": 0.5917444229125977, "learning_rate": 0.0001, "loss": 1.6342, "step": 3226 }, { "epoch": 0.7836328314715881, "grad_norm": 0.5684612989425659, "learning_rate": 0.0001, "loss": 1.7071, "step": 3227 }, { "epoch": 0.7838756677999029, "grad_norm": 0.6119584441184998, "learning_rate": 0.0001, "loss": 1.793, "step": 3228 }, { "epoch": 0.7841185041282176, "grad_norm": 0.5921847224235535, "learning_rate": 0.0001, "loss": 1.6398, "step": 3229 }, { "epoch": 0.7843613404565323, "grad_norm": 0.6124568581581116, "learning_rate": 0.0001, "loss": 1.7255, "step": 3230 }, { "epoch": 0.7846041767848471, "grad_norm": 0.6209405064582825, "learning_rate": 0.0001, "loss": 1.7774, "step": 3231 }, { "epoch": 0.7848470131131617, "grad_norm": 0.5682047009468079, "learning_rate": 0.0001, "loss": 1.6307, "step": 3232 }, { "epoch": 0.7850898494414764, "grad_norm": 0.5621352195739746, "learning_rate": 0.0001, "loss": 1.6391, "step": 3233 }, { "epoch": 0.7853326857697912, "grad_norm": 0.5859969854354858, "learning_rate": 0.0001, "loss": 1.7187, "step": 3234 }, { "epoch": 0.7855755220981059, "grad_norm": 0.5869945287704468, "learning_rate": 0.0001, "loss": 1.7051, "step": 3235 }, { "epoch": 0.7858183584264206, "grad_norm": 0.5958589911460876, "learning_rate": 0.0001, "loss": 1.742, "step": 3236 }, { "epoch": 0.7860611947547353, "grad_norm": 0.608558714389801, "learning_rate": 0.0001, "loss": 1.7526, "step": 3237 }, { "epoch": 0.78630403108305, "grad_norm": 0.5519112944602966, "learning_rate": 0.0001, "loss": 1.7003, "step": 3238 }, { "epoch": 0.7865468674113647, "grad_norm": 0.5748507380485535, "learning_rate": 0.0001, "loss": 1.7509, "step": 3239 }, { "epoch": 0.7867897037396795, "grad_norm": 0.6219350099563599, "learning_rate": 0.0001, "loss": 1.8431, "step": 3240 }, { "epoch": 0.7870325400679942, "grad_norm": 0.5954362154006958, "learning_rate": 0.0001, "loss": 1.6484, "step": 3241 }, { "epoch": 0.7872753763963088, "grad_norm": 0.6077887415885925, "learning_rate": 0.0001, "loss": 1.7988, "step": 3242 }, { "epoch": 0.7875182127246236, "grad_norm": 0.5785715579986572, "learning_rate": 0.0001, "loss": 1.6917, "step": 3243 }, { "epoch": 0.7877610490529383, "grad_norm": 0.6050088405609131, "learning_rate": 0.0001, "loss": 1.71, "step": 3244 }, { "epoch": 0.788003885381253, "grad_norm": 0.5776847004890442, "learning_rate": 0.0001, "loss": 1.5398, "step": 3245 }, { "epoch": 0.7882467217095678, "grad_norm": 0.5937278866767883, "learning_rate": 0.0001, "loss": 1.7619, "step": 3246 }, { "epoch": 0.7884895580378825, "grad_norm": 0.5709724426269531, "learning_rate": 0.0001, "loss": 1.7535, "step": 3247 }, { "epoch": 0.7887323943661971, "grad_norm": 0.5952240228652954, "learning_rate": 0.0001, "loss": 1.6862, "step": 3248 }, { "epoch": 0.7889752306945119, "grad_norm": 0.5571580529212952, "learning_rate": 0.0001, "loss": 1.6971, "step": 3249 }, { "epoch": 0.7892180670228266, "grad_norm": 0.5808640122413635, "learning_rate": 0.0001, "loss": 1.6047, "step": 3250 }, { "epoch": 0.7894609033511414, "grad_norm": 0.5837990045547485, "learning_rate": 0.0001, "loss": 1.7686, "step": 3251 }, { "epoch": 0.7897037396794561, "grad_norm": 0.5844842195510864, "learning_rate": 0.0001, "loss": 1.7822, "step": 3252 }, { "epoch": 0.7899465760077707, "grad_norm": 0.6063905954360962, "learning_rate": 0.0001, "loss": 1.7837, "step": 3253 }, { "epoch": 0.7901894123360855, "grad_norm": 0.6192396879196167, "learning_rate": 0.0001, "loss": 1.7271, "step": 3254 }, { "epoch": 0.7904322486644002, "grad_norm": 0.5675437450408936, "learning_rate": 0.0001, "loss": 1.7922, "step": 3255 }, { "epoch": 0.7906750849927149, "grad_norm": 0.6188688278198242, "learning_rate": 0.0001, "loss": 1.9356, "step": 3256 }, { "epoch": 0.7909179213210297, "grad_norm": 0.5751339197158813, "learning_rate": 0.0001, "loss": 1.6897, "step": 3257 }, { "epoch": 0.7911607576493443, "grad_norm": 0.5956962704658508, "learning_rate": 0.0001, "loss": 1.7142, "step": 3258 }, { "epoch": 0.791403593977659, "grad_norm": 0.6013764142990112, "learning_rate": 0.0001, "loss": 1.8275, "step": 3259 }, { "epoch": 0.7916464303059738, "grad_norm": 0.5936847925186157, "learning_rate": 0.0001, "loss": 1.8003, "step": 3260 }, { "epoch": 0.7918892666342885, "grad_norm": 0.5982014536857605, "learning_rate": 0.0001, "loss": 1.8204, "step": 3261 }, { "epoch": 0.7921321029626032, "grad_norm": 0.6026539206504822, "learning_rate": 0.0001, "loss": 1.7183, "step": 3262 }, { "epoch": 0.792374939290918, "grad_norm": 0.6419681310653687, "learning_rate": 0.0001, "loss": 2.0105, "step": 3263 }, { "epoch": 0.7926177756192326, "grad_norm": 0.6058057546615601, "learning_rate": 0.0001, "loss": 1.7082, "step": 3264 }, { "epoch": 0.7928606119475473, "grad_norm": 0.6088738441467285, "learning_rate": 0.0001, "loss": 1.7149, "step": 3265 }, { "epoch": 0.7931034482758621, "grad_norm": 0.5703161358833313, "learning_rate": 0.0001, "loss": 1.679, "step": 3266 }, { "epoch": 0.7933462846041768, "grad_norm": 0.6744810342788696, "learning_rate": 0.0001, "loss": 1.5265, "step": 3267 }, { "epoch": 0.7935891209324915, "grad_norm": 0.6127514839172363, "learning_rate": 0.0001, "loss": 1.6274, "step": 3268 }, { "epoch": 0.7938319572608062, "grad_norm": 0.5914934873580933, "learning_rate": 0.0001, "loss": 1.8143, "step": 3269 }, { "epoch": 0.7940747935891209, "grad_norm": 0.5978199243545532, "learning_rate": 0.0001, "loss": 1.8426, "step": 3270 }, { "epoch": 0.7943176299174356, "grad_norm": 0.5905243158340454, "learning_rate": 0.0001, "loss": 1.7238, "step": 3271 }, { "epoch": 0.7945604662457504, "grad_norm": 0.5864617824554443, "learning_rate": 0.0001, "loss": 1.6579, "step": 3272 }, { "epoch": 0.7948033025740651, "grad_norm": 0.5839855074882507, "learning_rate": 0.0001, "loss": 1.7133, "step": 3273 }, { "epoch": 0.7950461389023799, "grad_norm": 0.5632340312004089, "learning_rate": 0.0001, "loss": 1.6262, "step": 3274 }, { "epoch": 0.7952889752306945, "grad_norm": 0.5917242169380188, "learning_rate": 0.0001, "loss": 1.9173, "step": 3275 }, { "epoch": 0.7955318115590092, "grad_norm": 0.5705491304397583, "learning_rate": 0.0001, "loss": 1.7487, "step": 3276 }, { "epoch": 0.795774647887324, "grad_norm": 0.5937005281448364, "learning_rate": 0.0001, "loss": 1.7422, "step": 3277 }, { "epoch": 0.7960174842156387, "grad_norm": 0.5753030180931091, "learning_rate": 0.0001, "loss": 1.6029, "step": 3278 }, { "epoch": 0.7962603205439533, "grad_norm": 0.6627799868583679, "learning_rate": 0.0001, "loss": 1.7026, "step": 3279 }, { "epoch": 0.7965031568722681, "grad_norm": 0.6088401675224304, "learning_rate": 0.0001, "loss": 1.7256, "step": 3280 }, { "epoch": 0.7967459932005828, "grad_norm": 0.6398965716362, "learning_rate": 0.0001, "loss": 1.7722, "step": 3281 }, { "epoch": 0.7969888295288975, "grad_norm": 0.6321691274642944, "learning_rate": 0.0001, "loss": 1.8379, "step": 3282 }, { "epoch": 0.7972316658572123, "grad_norm": 0.5635577440261841, "learning_rate": 0.0001, "loss": 1.6831, "step": 3283 }, { "epoch": 0.797474502185527, "grad_norm": 0.6203828454017639, "learning_rate": 0.0001, "loss": 1.8552, "step": 3284 }, { "epoch": 0.7977173385138416, "grad_norm": 0.6240285038948059, "learning_rate": 0.0001, "loss": 1.6622, "step": 3285 }, { "epoch": 0.7979601748421564, "grad_norm": 0.5690562129020691, "learning_rate": 0.0001, "loss": 1.7832, "step": 3286 }, { "epoch": 0.7982030111704711, "grad_norm": 0.610381543636322, "learning_rate": 0.0001, "loss": 1.7679, "step": 3287 }, { "epoch": 0.7984458474987858, "grad_norm": 0.609774649143219, "learning_rate": 0.0001, "loss": 1.589, "step": 3288 }, { "epoch": 0.7986886838271006, "grad_norm": 0.563381016254425, "learning_rate": 0.0001, "loss": 1.6746, "step": 3289 }, { "epoch": 0.7989315201554152, "grad_norm": 0.6335068345069885, "learning_rate": 0.0001, "loss": 1.7154, "step": 3290 }, { "epoch": 0.7991743564837299, "grad_norm": 0.6035629510879517, "learning_rate": 0.0001, "loss": 1.7156, "step": 3291 }, { "epoch": 0.7994171928120447, "grad_norm": 0.586322009563446, "learning_rate": 0.0001, "loss": 1.6816, "step": 3292 }, { "epoch": 0.7996600291403594, "grad_norm": 0.6346733570098877, "learning_rate": 0.0001, "loss": 1.8079, "step": 3293 }, { "epoch": 0.7999028654686741, "grad_norm": 0.5991312265396118, "learning_rate": 0.0001, "loss": 1.7526, "step": 3294 }, { "epoch": 0.8001457017969889, "grad_norm": 0.6022869348526001, "learning_rate": 0.0001, "loss": 1.8159, "step": 3295 }, { "epoch": 0.8003885381253035, "grad_norm": 0.5966434478759766, "learning_rate": 0.0001, "loss": 1.6233, "step": 3296 }, { "epoch": 0.8006313744536183, "grad_norm": 0.578927755355835, "learning_rate": 0.0001, "loss": 1.7053, "step": 3297 }, { "epoch": 0.800874210781933, "grad_norm": 0.6092239022254944, "learning_rate": 0.0001, "loss": 1.7013, "step": 3298 }, { "epoch": 0.8011170471102477, "grad_norm": 0.6259116530418396, "learning_rate": 0.0001, "loss": 1.7511, "step": 3299 }, { "epoch": 0.8013598834385625, "grad_norm": 0.6093431711196899, "learning_rate": 0.0001, "loss": 1.8281, "step": 3300 }, { "epoch": 0.8016027197668771, "grad_norm": 0.5632680058479309, "learning_rate": 0.0001, "loss": 1.6561, "step": 3301 }, { "epoch": 0.8018455560951918, "grad_norm": 0.5798899531364441, "learning_rate": 0.0001, "loss": 1.859, "step": 3302 }, { "epoch": 0.8020883924235066, "grad_norm": 0.5695531964302063, "learning_rate": 0.0001, "loss": 1.7348, "step": 3303 }, { "epoch": 0.8023312287518213, "grad_norm": 0.6000730991363525, "learning_rate": 0.0001, "loss": 1.7621, "step": 3304 }, { "epoch": 0.802574065080136, "grad_norm": 0.5967425107955933, "learning_rate": 0.0001, "loss": 1.8134, "step": 3305 }, { "epoch": 0.8028169014084507, "grad_norm": 0.571355402469635, "learning_rate": 0.0001, "loss": 1.656, "step": 3306 }, { "epoch": 0.8030597377367654, "grad_norm": 0.5934798121452332, "learning_rate": 0.0001, "loss": 1.7581, "step": 3307 }, { "epoch": 0.8033025740650801, "grad_norm": 0.6144223213195801, "learning_rate": 0.0001, "loss": 1.7315, "step": 3308 }, { "epoch": 0.8035454103933949, "grad_norm": 0.5985466837882996, "learning_rate": 0.0001, "loss": 1.68, "step": 3309 }, { "epoch": 0.8037882467217096, "grad_norm": 0.630021333694458, "learning_rate": 0.0001, "loss": 1.7313, "step": 3310 }, { "epoch": 0.8040310830500242, "grad_norm": 0.5904750823974609, "learning_rate": 0.0001, "loss": 1.7345, "step": 3311 }, { "epoch": 0.804273919378339, "grad_norm": 0.5716640949249268, "learning_rate": 0.0001, "loss": 1.6162, "step": 3312 }, { "epoch": 0.8045167557066537, "grad_norm": 0.588955819606781, "learning_rate": 0.0001, "loss": 1.7002, "step": 3313 }, { "epoch": 0.8047595920349684, "grad_norm": 0.596904993057251, "learning_rate": 0.0001, "loss": 1.7657, "step": 3314 }, { "epoch": 0.8050024283632832, "grad_norm": 0.6122095584869385, "learning_rate": 0.0001, "loss": 1.8043, "step": 3315 }, { "epoch": 0.8052452646915979, "grad_norm": 0.567223072052002, "learning_rate": 0.0001, "loss": 1.5579, "step": 3316 }, { "epoch": 0.8054881010199125, "grad_norm": 0.6023831963539124, "learning_rate": 0.0001, "loss": 1.7581, "step": 3317 }, { "epoch": 0.8057309373482273, "grad_norm": 0.5783039927482605, "learning_rate": 0.0001, "loss": 1.6285, "step": 3318 }, { "epoch": 0.805973773676542, "grad_norm": 0.61045902967453, "learning_rate": 0.0001, "loss": 1.8467, "step": 3319 }, { "epoch": 0.8062166100048568, "grad_norm": 0.6231280565261841, "learning_rate": 0.0001, "loss": 1.7842, "step": 3320 }, { "epoch": 0.8064594463331715, "grad_norm": 0.5988601446151733, "learning_rate": 0.0001, "loss": 1.754, "step": 3321 }, { "epoch": 0.8067022826614861, "grad_norm": 0.6404216885566711, "learning_rate": 0.0001, "loss": 1.6881, "step": 3322 }, { "epoch": 0.8069451189898009, "grad_norm": 0.5820360779762268, "learning_rate": 0.0001, "loss": 1.6644, "step": 3323 }, { "epoch": 0.8071879553181156, "grad_norm": 0.6122472882270813, "learning_rate": 0.0001, "loss": 1.8324, "step": 3324 }, { "epoch": 0.8074307916464303, "grad_norm": 0.6082800030708313, "learning_rate": 0.0001, "loss": 1.9817, "step": 3325 }, { "epoch": 0.8076736279747451, "grad_norm": 0.6259562373161316, "learning_rate": 0.0001, "loss": 1.7077, "step": 3326 }, { "epoch": 0.8079164643030597, "grad_norm": 0.5752188563346863, "learning_rate": 0.0001, "loss": 1.5986, "step": 3327 }, { "epoch": 0.8081593006313744, "grad_norm": 0.575911819934845, "learning_rate": 0.0001, "loss": 1.6426, "step": 3328 }, { "epoch": 0.8084021369596892, "grad_norm": 0.5568429231643677, "learning_rate": 0.0001, "loss": 1.7373, "step": 3329 }, { "epoch": 0.8086449732880039, "grad_norm": 0.6027485132217407, "learning_rate": 0.0001, "loss": 1.7404, "step": 3330 }, { "epoch": 0.8088878096163186, "grad_norm": 0.7463359236717224, "learning_rate": 0.0001, "loss": 1.6852, "step": 3331 }, { "epoch": 0.8091306459446334, "grad_norm": 0.6194759011268616, "learning_rate": 0.0001, "loss": 1.8937, "step": 3332 }, { "epoch": 0.809373482272948, "grad_norm": 0.5699089169502258, "learning_rate": 0.0001, "loss": 1.6654, "step": 3333 }, { "epoch": 0.8096163186012627, "grad_norm": 0.6080254316329956, "learning_rate": 0.0001, "loss": 1.88, "step": 3334 }, { "epoch": 0.8098591549295775, "grad_norm": 0.5942913889884949, "learning_rate": 0.0001, "loss": 1.6951, "step": 3335 }, { "epoch": 0.8101019912578922, "grad_norm": 0.5429050326347351, "learning_rate": 0.0001, "loss": 1.4823, "step": 3336 }, { "epoch": 0.8103448275862069, "grad_norm": 0.5945674180984497, "learning_rate": 0.0001, "loss": 1.7982, "step": 3337 }, { "epoch": 0.8105876639145216, "grad_norm": 0.6173865795135498, "learning_rate": 0.0001, "loss": 1.9694, "step": 3338 }, { "epoch": 0.8108305002428363, "grad_norm": 0.600135862827301, "learning_rate": 0.0001, "loss": 1.7166, "step": 3339 }, { "epoch": 0.811073336571151, "grad_norm": 0.6154102087020874, "learning_rate": 0.0001, "loss": 1.9117, "step": 3340 }, { "epoch": 0.8113161728994658, "grad_norm": 0.6211844682693481, "learning_rate": 0.0001, "loss": 1.9107, "step": 3341 }, { "epoch": 0.8115590092277805, "grad_norm": 0.5779953002929688, "learning_rate": 0.0001, "loss": 1.6885, "step": 3342 }, { "epoch": 0.8118018455560952, "grad_norm": 0.5749261975288391, "learning_rate": 0.0001, "loss": 1.5568, "step": 3343 }, { "epoch": 0.8120446818844099, "grad_norm": 0.5764738321304321, "learning_rate": 0.0001, "loss": 1.7278, "step": 3344 }, { "epoch": 0.8122875182127246, "grad_norm": 0.6124898791313171, "learning_rate": 0.0001, "loss": 1.7762, "step": 3345 }, { "epoch": 0.8125303545410394, "grad_norm": 0.5406044125556946, "learning_rate": 0.0001, "loss": 1.6091, "step": 3346 }, { "epoch": 0.8127731908693541, "grad_norm": 0.6130670309066772, "learning_rate": 0.0001, "loss": 1.699, "step": 3347 }, { "epoch": 0.8130160271976687, "grad_norm": 0.6192781329154968, "learning_rate": 0.0001, "loss": 1.779, "step": 3348 }, { "epoch": 0.8132588635259835, "grad_norm": 0.6246933937072754, "learning_rate": 0.0001, "loss": 1.9329, "step": 3349 }, { "epoch": 0.8135016998542982, "grad_norm": 0.633960485458374, "learning_rate": 0.0001, "loss": 1.8494, "step": 3350 }, { "epoch": 0.8137445361826129, "grad_norm": 0.5493264198303223, "learning_rate": 0.0001, "loss": 1.4451, "step": 3351 }, { "epoch": 0.8139873725109277, "grad_norm": 0.5964555740356445, "learning_rate": 0.0001, "loss": 1.6861, "step": 3352 }, { "epoch": 0.8142302088392424, "grad_norm": 0.5876238346099854, "learning_rate": 0.0001, "loss": 1.8228, "step": 3353 }, { "epoch": 0.814473045167557, "grad_norm": 0.5785903334617615, "learning_rate": 0.0001, "loss": 1.6771, "step": 3354 }, { "epoch": 0.8147158814958718, "grad_norm": 0.5826572775840759, "learning_rate": 0.0001, "loss": 1.7107, "step": 3355 }, { "epoch": 0.8149587178241865, "grad_norm": 0.5708701014518738, "learning_rate": 0.0001, "loss": 1.5783, "step": 3356 }, { "epoch": 0.8152015541525012, "grad_norm": 0.5835521221160889, "learning_rate": 0.0001, "loss": 1.7775, "step": 3357 }, { "epoch": 0.815444390480816, "grad_norm": 0.6153636574745178, "learning_rate": 0.0001, "loss": 1.6928, "step": 3358 }, { "epoch": 0.8156872268091306, "grad_norm": 0.5732671618461609, "learning_rate": 0.0001, "loss": 1.7559, "step": 3359 }, { "epoch": 0.8159300631374453, "grad_norm": 0.5535603761672974, "learning_rate": 0.0001, "loss": 1.5733, "step": 3360 }, { "epoch": 0.8161728994657601, "grad_norm": 0.6083071827888489, "learning_rate": 0.0001, "loss": 1.7317, "step": 3361 }, { "epoch": 0.8164157357940748, "grad_norm": 0.6135209798812866, "learning_rate": 0.0001, "loss": 1.8898, "step": 3362 }, { "epoch": 0.8166585721223895, "grad_norm": 0.6117151379585266, "learning_rate": 0.0001, "loss": 1.7949, "step": 3363 }, { "epoch": 0.8169014084507042, "grad_norm": 0.6317639946937561, "learning_rate": 0.0001, "loss": 1.8458, "step": 3364 }, { "epoch": 0.8171442447790189, "grad_norm": 0.6188638806343079, "learning_rate": 0.0001, "loss": 1.8251, "step": 3365 }, { "epoch": 0.8173870811073336, "grad_norm": 0.620080828666687, "learning_rate": 0.0001, "loss": 1.7581, "step": 3366 }, { "epoch": 0.8176299174356484, "grad_norm": 0.5871297121047974, "learning_rate": 0.0001, "loss": 1.7579, "step": 3367 }, { "epoch": 0.8178727537639631, "grad_norm": 0.5991242527961731, "learning_rate": 0.0001, "loss": 1.8426, "step": 3368 }, { "epoch": 0.8181155900922779, "grad_norm": 0.608951985836029, "learning_rate": 0.0001, "loss": 1.8797, "step": 3369 }, { "epoch": 0.8183584264205925, "grad_norm": 0.614581823348999, "learning_rate": 0.0001, "loss": 1.9202, "step": 3370 }, { "epoch": 0.8186012627489072, "grad_norm": 0.5930675864219666, "learning_rate": 0.0001, "loss": 1.739, "step": 3371 }, { "epoch": 0.818844099077222, "grad_norm": 0.5880351066589355, "learning_rate": 0.0001, "loss": 1.7359, "step": 3372 }, { "epoch": 0.8190869354055367, "grad_norm": 0.5454912185668945, "learning_rate": 0.0001, "loss": 1.4532, "step": 3373 }, { "epoch": 0.8193297717338514, "grad_norm": 0.561832070350647, "learning_rate": 0.0001, "loss": 1.5996, "step": 3374 }, { "epoch": 0.8195726080621661, "grad_norm": 0.5611982941627502, "learning_rate": 0.0001, "loss": 1.6075, "step": 3375 }, { "epoch": 0.8198154443904808, "grad_norm": 0.5709531307220459, "learning_rate": 0.0001, "loss": 1.7507, "step": 3376 }, { "epoch": 0.8200582807187955, "grad_norm": 0.5754792094230652, "learning_rate": 0.0001, "loss": 1.7595, "step": 3377 }, { "epoch": 0.8203011170471103, "grad_norm": 0.5866820812225342, "learning_rate": 0.0001, "loss": 1.7232, "step": 3378 }, { "epoch": 0.820543953375425, "grad_norm": 0.5608850717544556, "learning_rate": 0.0001, "loss": 1.7659, "step": 3379 }, { "epoch": 0.8207867897037396, "grad_norm": 0.5590660572052002, "learning_rate": 0.0001, "loss": 1.6007, "step": 3380 }, { "epoch": 0.8210296260320544, "grad_norm": 0.5848101377487183, "learning_rate": 0.0001, "loss": 1.5795, "step": 3381 }, { "epoch": 0.8212724623603691, "grad_norm": 0.5834150314331055, "learning_rate": 0.0001, "loss": 1.7462, "step": 3382 }, { "epoch": 0.8215152986886838, "grad_norm": 0.6223803162574768, "learning_rate": 0.0001, "loss": 1.8801, "step": 3383 }, { "epoch": 0.8217581350169986, "grad_norm": 0.5955408811569214, "learning_rate": 0.0001, "loss": 1.8415, "step": 3384 }, { "epoch": 0.8220009713453132, "grad_norm": 0.5798283815383911, "learning_rate": 0.0001, "loss": 1.6972, "step": 3385 }, { "epoch": 0.8222438076736279, "grad_norm": 0.5866160988807678, "learning_rate": 0.0001, "loss": 1.8269, "step": 3386 }, { "epoch": 0.8224866440019427, "grad_norm": 0.5723693370819092, "learning_rate": 0.0001, "loss": 1.6874, "step": 3387 }, { "epoch": 0.8227294803302574, "grad_norm": 0.6119223237037659, "learning_rate": 0.0001, "loss": 1.6137, "step": 3388 }, { "epoch": 0.8229723166585721, "grad_norm": 0.6263529062271118, "learning_rate": 0.0001, "loss": 1.8294, "step": 3389 }, { "epoch": 0.8232151529868869, "grad_norm": 0.6026468873023987, "learning_rate": 0.0001, "loss": 1.7883, "step": 3390 }, { "epoch": 0.8234579893152015, "grad_norm": 0.6187738180160522, "learning_rate": 0.0001, "loss": 1.8427, "step": 3391 }, { "epoch": 0.8237008256435163, "grad_norm": 0.5795246362686157, "learning_rate": 0.0001, "loss": 1.7458, "step": 3392 }, { "epoch": 0.823943661971831, "grad_norm": 0.5572130680084229, "learning_rate": 0.0001, "loss": 1.7382, "step": 3393 }, { "epoch": 0.8241864983001457, "grad_norm": 0.5740678906440735, "learning_rate": 0.0001, "loss": 1.8143, "step": 3394 }, { "epoch": 0.8244293346284605, "grad_norm": 0.5776839852333069, "learning_rate": 0.0001, "loss": 1.8622, "step": 3395 }, { "epoch": 0.8246721709567751, "grad_norm": 0.5869353413581848, "learning_rate": 0.0001, "loss": 1.569, "step": 3396 }, { "epoch": 0.8249150072850898, "grad_norm": 0.5583310127258301, "learning_rate": 0.0001, "loss": 1.629, "step": 3397 }, { "epoch": 0.8251578436134046, "grad_norm": 0.5862078666687012, "learning_rate": 0.0001, "loss": 1.656, "step": 3398 }, { "epoch": 0.8254006799417193, "grad_norm": 0.5608081817626953, "learning_rate": 0.0001, "loss": 1.5435, "step": 3399 }, { "epoch": 0.825643516270034, "grad_norm": 0.6446062326431274, "learning_rate": 0.0001, "loss": 1.853, "step": 3400 }, { "epoch": 0.8258863525983487, "grad_norm": 0.5685895085334778, "learning_rate": 0.0001, "loss": 1.6336, "step": 3401 }, { "epoch": 0.8261291889266634, "grad_norm": 0.5998561978340149, "learning_rate": 0.0001, "loss": 1.8534, "step": 3402 }, { "epoch": 0.8263720252549781, "grad_norm": 0.5718868970870972, "learning_rate": 0.0001, "loss": 1.6783, "step": 3403 }, { "epoch": 0.8266148615832929, "grad_norm": 0.6252078413963318, "learning_rate": 0.0001, "loss": 1.6878, "step": 3404 }, { "epoch": 0.8268576979116076, "grad_norm": 0.6125273108482361, "learning_rate": 0.0001, "loss": 1.8284, "step": 3405 }, { "epoch": 0.8271005342399222, "grad_norm": 0.608466625213623, "learning_rate": 0.0001, "loss": 1.6677, "step": 3406 }, { "epoch": 0.827343370568237, "grad_norm": 0.599600613117218, "learning_rate": 0.0001, "loss": 1.7604, "step": 3407 }, { "epoch": 0.8275862068965517, "grad_norm": 0.6215369701385498, "learning_rate": 0.0001, "loss": 1.7342, "step": 3408 }, { "epoch": 0.8278290432248664, "grad_norm": 0.5949468016624451, "learning_rate": 0.0001, "loss": 1.7269, "step": 3409 }, { "epoch": 0.8280718795531812, "grad_norm": 0.5503864288330078, "learning_rate": 0.0001, "loss": 1.6687, "step": 3410 }, { "epoch": 0.8283147158814959, "grad_norm": 0.6335252523422241, "learning_rate": 0.0001, "loss": 1.9147, "step": 3411 }, { "epoch": 0.8285575522098105, "grad_norm": 0.56465744972229, "learning_rate": 0.0001, "loss": 1.5786, "step": 3412 }, { "epoch": 0.8288003885381253, "grad_norm": 0.592567503452301, "learning_rate": 0.0001, "loss": 1.6854, "step": 3413 }, { "epoch": 0.82904322486644, "grad_norm": 0.5808621048927307, "learning_rate": 0.0001, "loss": 1.7253, "step": 3414 }, { "epoch": 0.8292860611947548, "grad_norm": 0.5746657252311707, "learning_rate": 0.0001, "loss": 1.6321, "step": 3415 }, { "epoch": 0.8295288975230695, "grad_norm": 0.5854600667953491, "learning_rate": 0.0001, "loss": 1.8233, "step": 3416 }, { "epoch": 0.8297717338513841, "grad_norm": 0.5802686810493469, "learning_rate": 0.0001, "loss": 1.5996, "step": 3417 }, { "epoch": 0.8300145701796989, "grad_norm": 0.6190335154533386, "learning_rate": 0.0001, "loss": 1.8894, "step": 3418 }, { "epoch": 0.8302574065080136, "grad_norm": 0.5804746150970459, "learning_rate": 0.0001, "loss": 1.648, "step": 3419 }, { "epoch": 0.8305002428363283, "grad_norm": 0.5803455114364624, "learning_rate": 0.0001, "loss": 1.6909, "step": 3420 }, { "epoch": 0.8307430791646431, "grad_norm": 0.5573564767837524, "learning_rate": 0.0001, "loss": 1.6443, "step": 3421 }, { "epoch": 0.8309859154929577, "grad_norm": 0.6028070449829102, "learning_rate": 0.0001, "loss": 1.6151, "step": 3422 }, { "epoch": 0.8312287518212724, "grad_norm": 0.5921975374221802, "learning_rate": 0.0001, "loss": 1.5926, "step": 3423 }, { "epoch": 0.8314715881495872, "grad_norm": 0.6960628032684326, "learning_rate": 0.0001, "loss": 1.7498, "step": 3424 }, { "epoch": 0.8317144244779019, "grad_norm": 0.5475769639015198, "learning_rate": 0.0001, "loss": 1.5778, "step": 3425 }, { "epoch": 0.8319572608062166, "grad_norm": 0.6360837817192078, "learning_rate": 0.0001, "loss": 1.8568, "step": 3426 }, { "epoch": 0.8322000971345314, "grad_norm": 0.5973774790763855, "learning_rate": 0.0001, "loss": 1.8229, "step": 3427 }, { "epoch": 0.832442933462846, "grad_norm": 0.663210391998291, "learning_rate": 0.0001, "loss": 1.7362, "step": 3428 }, { "epoch": 0.8326857697911607, "grad_norm": 0.5464731454849243, "learning_rate": 0.0001, "loss": 1.6308, "step": 3429 }, { "epoch": 0.8329286061194755, "grad_norm": 0.619607150554657, "learning_rate": 0.0001, "loss": 1.7336, "step": 3430 }, { "epoch": 0.8331714424477902, "grad_norm": 0.5969376564025879, "learning_rate": 0.0001, "loss": 1.7484, "step": 3431 }, { "epoch": 0.8334142787761049, "grad_norm": 0.5881327390670776, "learning_rate": 0.0001, "loss": 1.6542, "step": 3432 }, { "epoch": 0.8336571151044196, "grad_norm": 0.5548758506774902, "learning_rate": 0.0001, "loss": 1.7287, "step": 3433 }, { "epoch": 0.8338999514327343, "grad_norm": 0.6055938601493835, "learning_rate": 0.0001, "loss": 1.8663, "step": 3434 }, { "epoch": 0.834142787761049, "grad_norm": 0.6541330814361572, "learning_rate": 0.0001, "loss": 1.7799, "step": 3435 }, { "epoch": 0.8343856240893638, "grad_norm": 0.6338015198707581, "learning_rate": 0.0001, "loss": 1.7116, "step": 3436 }, { "epoch": 0.8346284604176785, "grad_norm": 0.5681234002113342, "learning_rate": 0.0001, "loss": 1.689, "step": 3437 }, { "epoch": 0.8348712967459933, "grad_norm": 0.6056068539619446, "learning_rate": 0.0001, "loss": 1.7361, "step": 3438 }, { "epoch": 0.8351141330743079, "grad_norm": 0.5616917014122009, "learning_rate": 0.0001, "loss": 1.5896, "step": 3439 }, { "epoch": 0.8353569694026226, "grad_norm": 0.6003168821334839, "learning_rate": 0.0001, "loss": 1.8037, "step": 3440 }, { "epoch": 0.8355998057309374, "grad_norm": 0.575546145439148, "learning_rate": 0.0001, "loss": 1.6066, "step": 3441 }, { "epoch": 0.8358426420592521, "grad_norm": 0.5927971601486206, "learning_rate": 0.0001, "loss": 1.6905, "step": 3442 }, { "epoch": 0.8360854783875667, "grad_norm": 0.5666440725326538, "learning_rate": 0.0001, "loss": 1.8136, "step": 3443 }, { "epoch": 0.8363283147158815, "grad_norm": 0.621577262878418, "learning_rate": 0.0001, "loss": 1.6891, "step": 3444 }, { "epoch": 0.8365711510441962, "grad_norm": 0.5634074211120605, "learning_rate": 0.0001, "loss": 1.7177, "step": 3445 }, { "epoch": 0.8368139873725109, "grad_norm": 0.6412850022315979, "learning_rate": 0.0001, "loss": 1.7874, "step": 3446 }, { "epoch": 0.8370568237008257, "grad_norm": 0.5438231825828552, "learning_rate": 0.0001, "loss": 1.5894, "step": 3447 }, { "epoch": 0.8372996600291404, "grad_norm": 0.6114881038665771, "learning_rate": 0.0001, "loss": 1.8538, "step": 3448 }, { "epoch": 0.837542496357455, "grad_norm": 0.5776000618934631, "learning_rate": 0.0001, "loss": 1.7675, "step": 3449 }, { "epoch": 0.8377853326857698, "grad_norm": 0.5970380306243896, "learning_rate": 0.0001, "loss": 1.6959, "step": 3450 }, { "epoch": 0.8380281690140845, "grad_norm": 0.6232032179832458, "learning_rate": 0.0001, "loss": 1.6671, "step": 3451 }, { "epoch": 0.8382710053423992, "grad_norm": 0.5810390710830688, "learning_rate": 0.0001, "loss": 1.8324, "step": 3452 }, { "epoch": 0.838513841670714, "grad_norm": 0.5402159690856934, "learning_rate": 0.0001, "loss": 1.5199, "step": 3453 }, { "epoch": 0.8387566779990286, "grad_norm": 0.5929580330848694, "learning_rate": 0.0001, "loss": 1.7311, "step": 3454 }, { "epoch": 0.8389995143273433, "grad_norm": 0.5634052753448486, "learning_rate": 0.0001, "loss": 1.6945, "step": 3455 }, { "epoch": 0.8392423506556581, "grad_norm": 0.6263976693153381, "learning_rate": 0.0001, "loss": 1.6382, "step": 3456 }, { "epoch": 0.8394851869839728, "grad_norm": 0.6204410195350647, "learning_rate": 0.0001, "loss": 1.9143, "step": 3457 }, { "epoch": 0.8397280233122875, "grad_norm": 0.5862576961517334, "learning_rate": 0.0001, "loss": 1.7031, "step": 3458 }, { "epoch": 0.8399708596406023, "grad_norm": 0.5897918343544006, "learning_rate": 0.0001, "loss": 1.6781, "step": 3459 }, { "epoch": 0.8402136959689169, "grad_norm": 0.5764839053153992, "learning_rate": 0.0001, "loss": 1.7503, "step": 3460 }, { "epoch": 0.8404565322972317, "grad_norm": 0.593485951423645, "learning_rate": 0.0001, "loss": 1.8107, "step": 3461 }, { "epoch": 0.8406993686255464, "grad_norm": 0.6075116991996765, "learning_rate": 0.0001, "loss": 1.7321, "step": 3462 }, { "epoch": 0.8409422049538611, "grad_norm": 0.6165798902511597, "learning_rate": 0.0001, "loss": 1.6663, "step": 3463 }, { "epoch": 0.8411850412821759, "grad_norm": 0.5829122066497803, "learning_rate": 0.0001, "loss": 1.7196, "step": 3464 }, { "epoch": 0.8414278776104905, "grad_norm": 0.6220898032188416, "learning_rate": 0.0001, "loss": 1.5922, "step": 3465 }, { "epoch": 0.8416707139388052, "grad_norm": 0.635849118232727, "learning_rate": 0.0001, "loss": 1.8856, "step": 3466 }, { "epoch": 0.84191355026712, "grad_norm": 0.6103588938713074, "learning_rate": 0.0001, "loss": 1.6493, "step": 3467 }, { "epoch": 0.8421563865954347, "grad_norm": 0.5577727556228638, "learning_rate": 0.0001, "loss": 1.5431, "step": 3468 }, { "epoch": 0.8423992229237494, "grad_norm": 0.5995204448699951, "learning_rate": 0.0001, "loss": 1.7391, "step": 3469 }, { "epoch": 0.8426420592520641, "grad_norm": 0.6427181363105774, "learning_rate": 0.0001, "loss": 1.8653, "step": 3470 }, { "epoch": 0.8428848955803788, "grad_norm": 0.5356771945953369, "learning_rate": 0.0001, "loss": 1.5631, "step": 3471 }, { "epoch": 0.8431277319086935, "grad_norm": 0.5844012498855591, "learning_rate": 0.0001, "loss": 1.6229, "step": 3472 }, { "epoch": 0.8433705682370083, "grad_norm": 0.60114985704422, "learning_rate": 0.0001, "loss": 1.7445, "step": 3473 }, { "epoch": 0.843613404565323, "grad_norm": 0.6197024583816528, "learning_rate": 0.0001, "loss": 1.7657, "step": 3474 }, { "epoch": 0.8438562408936376, "grad_norm": 0.5999199748039246, "learning_rate": 0.0001, "loss": 1.7353, "step": 3475 }, { "epoch": 0.8440990772219524, "grad_norm": 0.5859060883522034, "learning_rate": 0.0001, "loss": 1.8221, "step": 3476 }, { "epoch": 0.8443419135502671, "grad_norm": 0.5816472768783569, "learning_rate": 0.0001, "loss": 1.6688, "step": 3477 }, { "epoch": 0.8445847498785818, "grad_norm": 0.5703580379486084, "learning_rate": 0.0001, "loss": 1.6621, "step": 3478 }, { "epoch": 0.8448275862068966, "grad_norm": 0.6125529408454895, "learning_rate": 0.0001, "loss": 1.8035, "step": 3479 }, { "epoch": 0.8450704225352113, "grad_norm": 0.5765839219093323, "learning_rate": 0.0001, "loss": 1.7085, "step": 3480 }, { "epoch": 0.8453132588635259, "grad_norm": 0.573478102684021, "learning_rate": 0.0001, "loss": 1.7313, "step": 3481 }, { "epoch": 0.8455560951918407, "grad_norm": 0.6237002611160278, "learning_rate": 0.0001, "loss": 1.6452, "step": 3482 }, { "epoch": 0.8457989315201554, "grad_norm": 0.5803018808364868, "learning_rate": 0.0001, "loss": 1.7738, "step": 3483 }, { "epoch": 0.8460417678484702, "grad_norm": 0.576075553894043, "learning_rate": 0.0001, "loss": 1.6795, "step": 3484 }, { "epoch": 0.8462846041767849, "grad_norm": 0.5590042471885681, "learning_rate": 0.0001, "loss": 1.6703, "step": 3485 }, { "epoch": 0.8465274405050995, "grad_norm": 0.5912151336669922, "learning_rate": 0.0001, "loss": 1.7616, "step": 3486 }, { "epoch": 0.8467702768334143, "grad_norm": 0.634704053401947, "learning_rate": 0.0001, "loss": 1.5935, "step": 3487 }, { "epoch": 0.847013113161729, "grad_norm": 0.5654214024543762, "learning_rate": 0.0001, "loss": 1.6806, "step": 3488 }, { "epoch": 0.8472559494900437, "grad_norm": 0.5535054802894592, "learning_rate": 0.0001, "loss": 1.74, "step": 3489 }, { "epoch": 0.8474987858183585, "grad_norm": 0.5521407127380371, "learning_rate": 0.0001, "loss": 1.5164, "step": 3490 }, { "epoch": 0.8477416221466731, "grad_norm": 0.5911891460418701, "learning_rate": 0.0001, "loss": 1.5673, "step": 3491 }, { "epoch": 0.8479844584749878, "grad_norm": 0.5401911735534668, "learning_rate": 0.0001, "loss": 1.5747, "step": 3492 }, { "epoch": 0.8482272948033026, "grad_norm": 0.5964999794960022, "learning_rate": 0.0001, "loss": 1.6824, "step": 3493 }, { "epoch": 0.8484701311316173, "grad_norm": 0.5567051768302917, "learning_rate": 0.0001, "loss": 1.7838, "step": 3494 }, { "epoch": 0.848712967459932, "grad_norm": 0.5711089372634888, "learning_rate": 0.0001, "loss": 1.632, "step": 3495 }, { "epoch": 0.8489558037882468, "grad_norm": 0.6089751124382019, "learning_rate": 0.0001, "loss": 1.6986, "step": 3496 }, { "epoch": 0.8491986401165614, "grad_norm": 0.6084120869636536, "learning_rate": 0.0001, "loss": 1.6776, "step": 3497 }, { "epoch": 0.8494414764448761, "grad_norm": 0.6146928668022156, "learning_rate": 0.0001, "loss": 1.8569, "step": 3498 }, { "epoch": 0.8496843127731909, "grad_norm": 0.5824174880981445, "learning_rate": 0.0001, "loss": 1.7408, "step": 3499 }, { "epoch": 0.8499271491015056, "grad_norm": 0.5815324783325195, "learning_rate": 0.0001, "loss": 1.5541, "step": 3500 }, { "epoch": 0.8501699854298203, "grad_norm": 0.6049876809120178, "learning_rate": 0.0001, "loss": 1.7525, "step": 3501 }, { "epoch": 0.850412821758135, "grad_norm": 0.6228092908859253, "learning_rate": 0.0001, "loss": 1.7271, "step": 3502 }, { "epoch": 0.8506556580864497, "grad_norm": 0.561995267868042, "learning_rate": 0.0001, "loss": 1.6255, "step": 3503 }, { "epoch": 0.8508984944147644, "grad_norm": 0.6361596584320068, "learning_rate": 0.0001, "loss": 1.7825, "step": 3504 }, { "epoch": 0.8511413307430792, "grad_norm": 0.5702955722808838, "learning_rate": 0.0001, "loss": 1.6949, "step": 3505 }, { "epoch": 0.8513841670713939, "grad_norm": 0.5604357123374939, "learning_rate": 0.0001, "loss": 1.5887, "step": 3506 }, { "epoch": 0.8516270033997086, "grad_norm": 0.6207056045532227, "learning_rate": 0.0001, "loss": 1.6181, "step": 3507 }, { "epoch": 0.8518698397280233, "grad_norm": 0.6111729145050049, "learning_rate": 0.0001, "loss": 1.8756, "step": 3508 }, { "epoch": 0.852112676056338, "grad_norm": 0.615805983543396, "learning_rate": 0.0001, "loss": 1.6656, "step": 3509 }, { "epoch": 0.8523555123846528, "grad_norm": 0.6363980770111084, "learning_rate": 0.0001, "loss": 1.7821, "step": 3510 }, { "epoch": 0.8525983487129675, "grad_norm": 0.5989717841148376, "learning_rate": 0.0001, "loss": 1.6822, "step": 3511 }, { "epoch": 0.8528411850412821, "grad_norm": 0.5952607989311218, "learning_rate": 0.0001, "loss": 1.69, "step": 3512 }, { "epoch": 0.8530840213695969, "grad_norm": 0.5801405310630798, "learning_rate": 0.0001, "loss": 1.7882, "step": 3513 }, { "epoch": 0.8533268576979116, "grad_norm": 0.6062362194061279, "learning_rate": 0.0001, "loss": 1.6513, "step": 3514 }, { "epoch": 0.8535696940262263, "grad_norm": 0.5364546179771423, "learning_rate": 0.0001, "loss": 1.6001, "step": 3515 }, { "epoch": 0.8538125303545411, "grad_norm": 0.5886363983154297, "learning_rate": 0.0001, "loss": 1.5536, "step": 3516 }, { "epoch": 0.8540553666828558, "grad_norm": 0.6251400113105774, "learning_rate": 0.0001, "loss": 1.847, "step": 3517 }, { "epoch": 0.8542982030111704, "grad_norm": 0.6041175723075867, "learning_rate": 0.0001, "loss": 1.8304, "step": 3518 }, { "epoch": 0.8545410393394852, "grad_norm": 0.6060884594917297, "learning_rate": 0.0001, "loss": 1.6442, "step": 3519 }, { "epoch": 0.8547838756677999, "grad_norm": 0.6119658946990967, "learning_rate": 0.0001, "loss": 1.6178, "step": 3520 }, { "epoch": 0.8550267119961146, "grad_norm": 0.6325762271881104, "learning_rate": 0.0001, "loss": 1.7807, "step": 3521 }, { "epoch": 0.8552695483244294, "grad_norm": 0.5806691646575928, "learning_rate": 0.0001, "loss": 1.667, "step": 3522 }, { "epoch": 0.855512384652744, "grad_norm": 0.5474377870559692, "learning_rate": 0.0001, "loss": 1.5618, "step": 3523 }, { "epoch": 0.8557552209810587, "grad_norm": 0.571455717086792, "learning_rate": 0.0001, "loss": 1.7621, "step": 3524 }, { "epoch": 0.8559980573093735, "grad_norm": 0.6741440892219543, "learning_rate": 0.0001, "loss": 1.7855, "step": 3525 }, { "epoch": 0.8562408936376882, "grad_norm": 0.6805163025856018, "learning_rate": 0.0001, "loss": 1.9816, "step": 3526 }, { "epoch": 0.8564837299660029, "grad_norm": 0.5746136903762817, "learning_rate": 0.0001, "loss": 1.6439, "step": 3527 }, { "epoch": 0.8567265662943176, "grad_norm": 0.652385413646698, "learning_rate": 0.0001, "loss": 1.6415, "step": 3528 }, { "epoch": 0.8569694026226323, "grad_norm": 0.6076606512069702, "learning_rate": 0.0001, "loss": 1.7428, "step": 3529 }, { "epoch": 0.8572122389509471, "grad_norm": 0.6327298879623413, "learning_rate": 0.0001, "loss": 1.8304, "step": 3530 }, { "epoch": 0.8574550752792618, "grad_norm": 0.5788837671279907, "learning_rate": 0.0001, "loss": 1.7163, "step": 3531 }, { "epoch": 0.8576979116075765, "grad_norm": 0.6050636172294617, "learning_rate": 0.0001, "loss": 1.612, "step": 3532 }, { "epoch": 0.8579407479358913, "grad_norm": 0.6376311182975769, "learning_rate": 0.0001, "loss": 1.7723, "step": 3533 }, { "epoch": 0.8581835842642059, "grad_norm": 0.5577212572097778, "learning_rate": 0.0001, "loss": 1.6499, "step": 3534 }, { "epoch": 0.8584264205925206, "grad_norm": 0.5583710074424744, "learning_rate": 0.0001, "loss": 1.6353, "step": 3535 }, { "epoch": 0.8586692569208354, "grad_norm": 0.6112894415855408, "learning_rate": 0.0001, "loss": 1.7546, "step": 3536 }, { "epoch": 0.8589120932491501, "grad_norm": 0.5702512264251709, "learning_rate": 0.0001, "loss": 1.7296, "step": 3537 }, { "epoch": 0.8591549295774648, "grad_norm": 0.634347140789032, "learning_rate": 0.0001, "loss": 1.63, "step": 3538 }, { "epoch": 0.8593977659057795, "grad_norm": 0.5780573487281799, "learning_rate": 0.0001, "loss": 1.8952, "step": 3539 }, { "epoch": 0.8596406022340942, "grad_norm": 0.590006947517395, "learning_rate": 0.0001, "loss": 1.7142, "step": 3540 }, { "epoch": 0.8598834385624089, "grad_norm": 0.5899177193641663, "learning_rate": 0.0001, "loss": 1.8622, "step": 3541 }, { "epoch": 0.8601262748907237, "grad_norm": 0.5887792706489563, "learning_rate": 0.0001, "loss": 1.5651, "step": 3542 }, { "epoch": 0.8603691112190384, "grad_norm": 0.5811349749565125, "learning_rate": 0.0001, "loss": 1.703, "step": 3543 }, { "epoch": 0.860611947547353, "grad_norm": 0.5674530863761902, "learning_rate": 0.0001, "loss": 1.7557, "step": 3544 }, { "epoch": 0.8608547838756678, "grad_norm": 0.6252373456954956, "learning_rate": 0.0001, "loss": 1.518, "step": 3545 }, { "epoch": 0.8610976202039825, "grad_norm": 0.5738163590431213, "learning_rate": 0.0001, "loss": 1.6803, "step": 3546 }, { "epoch": 0.8613404565322972, "grad_norm": 0.6242806911468506, "learning_rate": 0.0001, "loss": 1.7783, "step": 3547 }, { "epoch": 0.861583292860612, "grad_norm": 0.6254070401191711, "learning_rate": 0.0001, "loss": 1.7868, "step": 3548 }, { "epoch": 0.8618261291889266, "grad_norm": 0.5892719030380249, "learning_rate": 0.0001, "loss": 1.6135, "step": 3549 }, { "epoch": 0.8620689655172413, "grad_norm": 0.5632604360580444, "learning_rate": 0.0001, "loss": 1.661, "step": 3550 }, { "epoch": 0.8623118018455561, "grad_norm": 0.5731838941574097, "learning_rate": 0.0001, "loss": 1.6758, "step": 3551 }, { "epoch": 0.8625546381738708, "grad_norm": 0.5648952722549438, "learning_rate": 0.0001, "loss": 1.6706, "step": 3552 }, { "epoch": 0.8627974745021856, "grad_norm": 0.5980069637298584, "learning_rate": 0.0001, "loss": 1.5629, "step": 3553 }, { "epoch": 0.8630403108305003, "grad_norm": 0.6209183931350708, "learning_rate": 0.0001, "loss": 1.6986, "step": 3554 }, { "epoch": 0.8632831471588149, "grad_norm": 0.6167494058609009, "learning_rate": 0.0001, "loss": 1.762, "step": 3555 }, { "epoch": 0.8635259834871297, "grad_norm": 0.6167067289352417, "learning_rate": 0.0001, "loss": 1.67, "step": 3556 }, { "epoch": 0.8637688198154444, "grad_norm": 0.5660021305084229, "learning_rate": 0.0001, "loss": 1.5813, "step": 3557 }, { "epoch": 0.8640116561437591, "grad_norm": 0.6034141182899475, "learning_rate": 0.0001, "loss": 1.7734, "step": 3558 }, { "epoch": 0.8642544924720739, "grad_norm": 0.6104663014411926, "learning_rate": 0.0001, "loss": 1.6955, "step": 3559 }, { "epoch": 0.8644973288003885, "grad_norm": 0.6068470478057861, "learning_rate": 0.0001, "loss": 1.83, "step": 3560 }, { "epoch": 0.8647401651287032, "grad_norm": 0.6156007647514343, "learning_rate": 0.0001, "loss": 1.6015, "step": 3561 }, { "epoch": 0.864983001457018, "grad_norm": 0.5979913473129272, "learning_rate": 0.0001, "loss": 1.6734, "step": 3562 }, { "epoch": 0.8652258377853327, "grad_norm": 0.5854653716087341, "learning_rate": 0.0001, "loss": 1.7198, "step": 3563 }, { "epoch": 0.8654686741136474, "grad_norm": 0.582108736038208, "learning_rate": 0.0001, "loss": 1.6996, "step": 3564 }, { "epoch": 0.8657115104419622, "grad_norm": 0.6201448440551758, "learning_rate": 0.0001, "loss": 1.7424, "step": 3565 }, { "epoch": 0.8659543467702768, "grad_norm": 0.5824017524719238, "learning_rate": 0.0001, "loss": 1.8216, "step": 3566 }, { "epoch": 0.8661971830985915, "grad_norm": 0.5889251232147217, "learning_rate": 0.0001, "loss": 1.7786, "step": 3567 }, { "epoch": 0.8664400194269063, "grad_norm": 0.598010778427124, "learning_rate": 0.0001, "loss": 1.7527, "step": 3568 }, { "epoch": 0.866682855755221, "grad_norm": 0.6177082061767578, "learning_rate": 0.0001, "loss": 1.8827, "step": 3569 }, { "epoch": 0.8669256920835356, "grad_norm": 0.624889612197876, "learning_rate": 0.0001, "loss": 1.6746, "step": 3570 }, { "epoch": 0.8671685284118504, "grad_norm": 0.6158528327941895, "learning_rate": 0.0001, "loss": 1.8653, "step": 3571 }, { "epoch": 0.8674113647401651, "grad_norm": 0.5674200654029846, "learning_rate": 0.0001, "loss": 1.6643, "step": 3572 }, { "epoch": 0.8676542010684798, "grad_norm": 0.5981341600418091, "learning_rate": 0.0001, "loss": 1.9083, "step": 3573 }, { "epoch": 0.8678970373967946, "grad_norm": 0.5735521912574768, "learning_rate": 0.0001, "loss": 1.5136, "step": 3574 }, { "epoch": 0.8681398737251093, "grad_norm": 0.5744978189468384, "learning_rate": 0.0001, "loss": 1.7118, "step": 3575 }, { "epoch": 0.868382710053424, "grad_norm": 0.566753625869751, "learning_rate": 0.0001, "loss": 1.6209, "step": 3576 }, { "epoch": 0.8686255463817387, "grad_norm": 0.5849733352661133, "learning_rate": 0.0001, "loss": 1.8447, "step": 3577 }, { "epoch": 0.8688683827100534, "grad_norm": 0.5533862709999084, "learning_rate": 0.0001, "loss": 1.6032, "step": 3578 }, { "epoch": 0.8691112190383682, "grad_norm": 0.5679661631584167, "learning_rate": 0.0001, "loss": 1.6633, "step": 3579 }, { "epoch": 0.8693540553666829, "grad_norm": 0.6186648011207581, "learning_rate": 0.0001, "loss": 1.7538, "step": 3580 }, { "epoch": 0.8695968916949975, "grad_norm": 0.5672974586486816, "learning_rate": 0.0001, "loss": 1.4959, "step": 3581 }, { "epoch": 0.8698397280233123, "grad_norm": 0.5962534546852112, "learning_rate": 0.0001, "loss": 1.8265, "step": 3582 }, { "epoch": 0.870082564351627, "grad_norm": 0.5624101161956787, "learning_rate": 0.0001, "loss": 1.625, "step": 3583 }, { "epoch": 0.8703254006799417, "grad_norm": 0.5874091982841492, "learning_rate": 0.0001, "loss": 1.7295, "step": 3584 }, { "epoch": 0.8705682370082565, "grad_norm": 0.5865095853805542, "learning_rate": 0.0001, "loss": 1.6617, "step": 3585 }, { "epoch": 0.8708110733365712, "grad_norm": 0.5641353130340576, "learning_rate": 0.0001, "loss": 1.7834, "step": 3586 }, { "epoch": 0.8710539096648858, "grad_norm": 0.5696240663528442, "learning_rate": 0.0001, "loss": 1.7361, "step": 3587 }, { "epoch": 0.8712967459932006, "grad_norm": 0.5919355750083923, "learning_rate": 0.0001, "loss": 1.7896, "step": 3588 }, { "epoch": 0.8715395823215153, "grad_norm": 0.5535728931427002, "learning_rate": 0.0001, "loss": 1.6152, "step": 3589 }, { "epoch": 0.87178241864983, "grad_norm": 0.6022486686706543, "learning_rate": 0.0001, "loss": 1.6431, "step": 3590 }, { "epoch": 0.8720252549781448, "grad_norm": 0.7028996348381042, "learning_rate": 0.0001, "loss": 1.592, "step": 3591 }, { "epoch": 0.8722680913064594, "grad_norm": 0.6094292998313904, "learning_rate": 0.0001, "loss": 1.7961, "step": 3592 }, { "epoch": 0.8725109276347741, "grad_norm": 0.5781928300857544, "learning_rate": 0.0001, "loss": 1.6244, "step": 3593 }, { "epoch": 0.8727537639630889, "grad_norm": 0.6018481850624084, "learning_rate": 0.0001, "loss": 1.7303, "step": 3594 }, { "epoch": 0.8729966002914036, "grad_norm": 0.5868637561798096, "learning_rate": 0.0001, "loss": 1.7745, "step": 3595 }, { "epoch": 0.8732394366197183, "grad_norm": 0.5851695537567139, "learning_rate": 0.0001, "loss": 1.6856, "step": 3596 }, { "epoch": 0.873482272948033, "grad_norm": 0.617761492729187, "learning_rate": 0.0001, "loss": 1.8024, "step": 3597 }, { "epoch": 0.8737251092763477, "grad_norm": 0.5883352756500244, "learning_rate": 0.0001, "loss": 1.6687, "step": 3598 }, { "epoch": 0.8739679456046625, "grad_norm": 0.5812327861785889, "learning_rate": 0.0001, "loss": 1.6438, "step": 3599 }, { "epoch": 0.8742107819329772, "grad_norm": 0.5833345055580139, "learning_rate": 0.0001, "loss": 1.7707, "step": 3600 }, { "epoch": 0.8744536182612919, "grad_norm": 0.5722734928131104, "learning_rate": 0.0001, "loss": 1.6052, "step": 3601 }, { "epoch": 0.8746964545896067, "grad_norm": 0.5645515322685242, "learning_rate": 0.0001, "loss": 1.5211, "step": 3602 }, { "epoch": 0.8749392909179213, "grad_norm": 0.5958465337753296, "learning_rate": 0.0001, "loss": 1.702, "step": 3603 }, { "epoch": 0.875182127246236, "grad_norm": 0.5952925682067871, "learning_rate": 0.0001, "loss": 1.6857, "step": 3604 }, { "epoch": 0.8754249635745508, "grad_norm": 0.5824811458587646, "learning_rate": 0.0001, "loss": 1.6593, "step": 3605 }, { "epoch": 0.8756677999028655, "grad_norm": 0.574558675289154, "learning_rate": 0.0001, "loss": 1.748, "step": 3606 }, { "epoch": 0.8759106362311802, "grad_norm": 0.5838613510131836, "learning_rate": 0.0001, "loss": 1.5767, "step": 3607 }, { "epoch": 0.8761534725594949, "grad_norm": 0.5939115881919861, "learning_rate": 0.0001, "loss": 1.8715, "step": 3608 }, { "epoch": 0.8763963088878096, "grad_norm": 0.5994473099708557, "learning_rate": 0.0001, "loss": 1.7028, "step": 3609 }, { "epoch": 0.8766391452161243, "grad_norm": 0.63933265209198, "learning_rate": 0.0001, "loss": 1.5971, "step": 3610 }, { "epoch": 0.8768819815444391, "grad_norm": 0.5982732772827148, "learning_rate": 0.0001, "loss": 1.8988, "step": 3611 }, { "epoch": 0.8771248178727538, "grad_norm": 0.6075206995010376, "learning_rate": 0.0001, "loss": 1.6727, "step": 3612 }, { "epoch": 0.8773676542010684, "grad_norm": 0.6145090460777283, "learning_rate": 0.0001, "loss": 1.8204, "step": 3613 }, { "epoch": 0.8776104905293832, "grad_norm": 0.5468227863311768, "learning_rate": 0.0001, "loss": 1.6884, "step": 3614 }, { "epoch": 0.8778533268576979, "grad_norm": 0.5902441143989563, "learning_rate": 0.0001, "loss": 1.5819, "step": 3615 }, { "epoch": 0.8780961631860126, "grad_norm": 0.6053389310836792, "learning_rate": 0.0001, "loss": 1.793, "step": 3616 }, { "epoch": 0.8783389995143274, "grad_norm": 0.5707299113273621, "learning_rate": 0.0001, "loss": 1.7813, "step": 3617 }, { "epoch": 0.878581835842642, "grad_norm": 0.5711703896522522, "learning_rate": 0.0001, "loss": 1.4658, "step": 3618 }, { "epoch": 0.8788246721709567, "grad_norm": 0.5625070333480835, "learning_rate": 0.0001, "loss": 1.6934, "step": 3619 }, { "epoch": 0.8790675084992715, "grad_norm": 0.5734425783157349, "learning_rate": 0.0001, "loss": 1.7695, "step": 3620 }, { "epoch": 0.8793103448275862, "grad_norm": 0.6269510984420776, "learning_rate": 0.0001, "loss": 1.7358, "step": 3621 }, { "epoch": 0.879553181155901, "grad_norm": 0.6240434646606445, "learning_rate": 0.0001, "loss": 1.9493, "step": 3622 }, { "epoch": 0.8797960174842157, "grad_norm": 0.6287503838539124, "learning_rate": 0.0001, "loss": 1.6903, "step": 3623 }, { "epoch": 0.8800388538125303, "grad_norm": 0.6086653470993042, "learning_rate": 0.0001, "loss": 1.6884, "step": 3624 }, { "epoch": 0.8802816901408451, "grad_norm": 0.5977360606193542, "learning_rate": 0.0001, "loss": 1.7675, "step": 3625 }, { "epoch": 0.8805245264691598, "grad_norm": 0.5500513911247253, "learning_rate": 0.0001, "loss": 1.5043, "step": 3626 }, { "epoch": 0.8807673627974745, "grad_norm": 0.5777398943901062, "learning_rate": 0.0001, "loss": 1.7679, "step": 3627 }, { "epoch": 0.8810101991257893, "grad_norm": 0.5849345326423645, "learning_rate": 0.0001, "loss": 1.5986, "step": 3628 }, { "epoch": 0.8812530354541039, "grad_norm": 0.5858832597732544, "learning_rate": 0.0001, "loss": 1.7539, "step": 3629 }, { "epoch": 0.8814958717824186, "grad_norm": 0.5634268522262573, "learning_rate": 0.0001, "loss": 1.497, "step": 3630 }, { "epoch": 0.8817387081107334, "grad_norm": 0.5676572918891907, "learning_rate": 0.0001, "loss": 1.5783, "step": 3631 }, { "epoch": 0.8819815444390481, "grad_norm": 0.6174838542938232, "learning_rate": 0.0001, "loss": 1.6675, "step": 3632 }, { "epoch": 0.8822243807673628, "grad_norm": 0.5701363682746887, "learning_rate": 0.0001, "loss": 1.5811, "step": 3633 }, { "epoch": 0.8824672170956775, "grad_norm": 0.6180095076560974, "learning_rate": 0.0001, "loss": 1.7446, "step": 3634 }, { "epoch": 0.8827100534239922, "grad_norm": 0.5902948379516602, "learning_rate": 0.0001, "loss": 1.7286, "step": 3635 }, { "epoch": 0.8829528897523069, "grad_norm": 0.6113959550857544, "learning_rate": 0.0001, "loss": 1.8169, "step": 3636 }, { "epoch": 0.8831957260806217, "grad_norm": 0.5810447335243225, "learning_rate": 0.0001, "loss": 1.5132, "step": 3637 }, { "epoch": 0.8834385624089364, "grad_norm": 0.646421492099762, "learning_rate": 0.0001, "loss": 1.8118, "step": 3638 }, { "epoch": 0.883681398737251, "grad_norm": 0.566224217414856, "learning_rate": 0.0001, "loss": 1.8104, "step": 3639 }, { "epoch": 0.8839242350655658, "grad_norm": 0.5958870053291321, "learning_rate": 0.0001, "loss": 1.8216, "step": 3640 }, { "epoch": 0.8841670713938805, "grad_norm": 0.6059859395027161, "learning_rate": 0.0001, "loss": 1.7857, "step": 3641 }, { "epoch": 0.8844099077221952, "grad_norm": 0.5715227127075195, "learning_rate": 0.0001, "loss": 1.7616, "step": 3642 }, { "epoch": 0.88465274405051, "grad_norm": 0.6450254321098328, "learning_rate": 0.0001, "loss": 1.8297, "step": 3643 }, { "epoch": 0.8848955803788247, "grad_norm": 0.6196300983428955, "learning_rate": 0.0001, "loss": 1.7653, "step": 3644 }, { "epoch": 0.8851384167071394, "grad_norm": 0.5951306819915771, "learning_rate": 0.0001, "loss": 1.6767, "step": 3645 }, { "epoch": 0.8853812530354541, "grad_norm": 0.6688085198402405, "learning_rate": 0.0001, "loss": 1.5862, "step": 3646 }, { "epoch": 0.8856240893637688, "grad_norm": 0.5445377826690674, "learning_rate": 0.0001, "loss": 1.5474, "step": 3647 }, { "epoch": 0.8858669256920836, "grad_norm": 0.6012933254241943, "learning_rate": 0.0001, "loss": 1.7783, "step": 3648 }, { "epoch": 0.8861097620203983, "grad_norm": 0.6122232675552368, "learning_rate": 0.0001, "loss": 1.8086, "step": 3649 }, { "epoch": 0.8863525983487129, "grad_norm": 0.5938394665718079, "learning_rate": 0.0001, "loss": 1.612, "step": 3650 }, { "epoch": 0.8865954346770277, "grad_norm": 0.610999584197998, "learning_rate": 0.0001, "loss": 1.8749, "step": 3651 }, { "epoch": 0.8868382710053424, "grad_norm": 0.5855596661567688, "learning_rate": 0.0001, "loss": 1.725, "step": 3652 }, { "epoch": 0.8870811073336571, "grad_norm": 0.5986215472221375, "learning_rate": 0.0001, "loss": 1.7427, "step": 3653 }, { "epoch": 0.8873239436619719, "grad_norm": 0.6195816993713379, "learning_rate": 0.0001, "loss": 1.6951, "step": 3654 }, { "epoch": 0.8875667799902865, "grad_norm": 0.5849098563194275, "learning_rate": 0.0001, "loss": 1.8607, "step": 3655 }, { "epoch": 0.8878096163186012, "grad_norm": 0.6396300792694092, "learning_rate": 0.0001, "loss": 1.8764, "step": 3656 }, { "epoch": 0.888052452646916, "grad_norm": 0.5767935514450073, "learning_rate": 0.0001, "loss": 1.6136, "step": 3657 }, { "epoch": 0.8882952889752307, "grad_norm": 0.5865722894668579, "learning_rate": 0.0001, "loss": 1.6685, "step": 3658 }, { "epoch": 0.8885381253035454, "grad_norm": 0.5967360734939575, "learning_rate": 0.0001, "loss": 1.7648, "step": 3659 }, { "epoch": 0.8887809616318602, "grad_norm": 0.6081897616386414, "learning_rate": 0.0001, "loss": 1.7452, "step": 3660 }, { "epoch": 0.8890237979601748, "grad_norm": 0.6059684157371521, "learning_rate": 0.0001, "loss": 1.7785, "step": 3661 }, { "epoch": 0.8892666342884895, "grad_norm": 0.5943751931190491, "learning_rate": 0.0001, "loss": 1.7477, "step": 3662 }, { "epoch": 0.8895094706168043, "grad_norm": 0.6292147636413574, "learning_rate": 0.0001, "loss": 1.8816, "step": 3663 }, { "epoch": 0.889752306945119, "grad_norm": 0.6073547601699829, "learning_rate": 0.0001, "loss": 1.7417, "step": 3664 }, { "epoch": 0.8899951432734337, "grad_norm": 0.5393562912940979, "learning_rate": 0.0001, "loss": 1.6544, "step": 3665 }, { "epoch": 0.8902379796017484, "grad_norm": 0.5665006637573242, "learning_rate": 0.0001, "loss": 1.5303, "step": 3666 }, { "epoch": 0.8904808159300631, "grad_norm": 0.5666619539260864, "learning_rate": 0.0001, "loss": 1.5845, "step": 3667 }, { "epoch": 0.8907236522583779, "grad_norm": 0.5895118117332458, "learning_rate": 0.0001, "loss": 1.6932, "step": 3668 }, { "epoch": 0.8909664885866926, "grad_norm": 0.5936942100524902, "learning_rate": 0.0001, "loss": 1.6315, "step": 3669 }, { "epoch": 0.8912093249150073, "grad_norm": 0.6128193736076355, "learning_rate": 0.0001, "loss": 1.69, "step": 3670 }, { "epoch": 0.891452161243322, "grad_norm": 0.627239465713501, "learning_rate": 0.0001, "loss": 1.823, "step": 3671 }, { "epoch": 0.8916949975716367, "grad_norm": 0.5960967540740967, "learning_rate": 0.0001, "loss": 1.6852, "step": 3672 }, { "epoch": 0.8919378338999514, "grad_norm": 0.6583183407783508, "learning_rate": 0.0001, "loss": 1.8119, "step": 3673 }, { "epoch": 0.8921806702282662, "grad_norm": 0.5878483653068542, "learning_rate": 0.0001, "loss": 1.6484, "step": 3674 }, { "epoch": 0.8924235065565809, "grad_norm": 0.5881945490837097, "learning_rate": 0.0001, "loss": 1.7356, "step": 3675 }, { "epoch": 0.8926663428848955, "grad_norm": 0.5920316576957703, "learning_rate": 0.0001, "loss": 1.7318, "step": 3676 }, { "epoch": 0.8929091792132103, "grad_norm": 0.6558576226234436, "learning_rate": 0.0001, "loss": 1.7684, "step": 3677 }, { "epoch": 0.893152015541525, "grad_norm": 0.6191709637641907, "learning_rate": 0.0001, "loss": 1.823, "step": 3678 }, { "epoch": 0.8933948518698397, "grad_norm": 0.6138198971748352, "learning_rate": 0.0001, "loss": 1.8532, "step": 3679 }, { "epoch": 0.8936376881981545, "grad_norm": 0.6346641182899475, "learning_rate": 0.0001, "loss": 1.6019, "step": 3680 }, { "epoch": 0.8938805245264692, "grad_norm": 0.6237822771072388, "learning_rate": 0.0001, "loss": 1.6418, "step": 3681 }, { "epoch": 0.8941233608547838, "grad_norm": 0.6110655069351196, "learning_rate": 0.0001, "loss": 1.7011, "step": 3682 }, { "epoch": 0.8943661971830986, "grad_norm": 0.5926641821861267, "learning_rate": 0.0001, "loss": 1.7345, "step": 3683 }, { "epoch": 0.8946090335114133, "grad_norm": 0.6240578889846802, "learning_rate": 0.0001, "loss": 1.6809, "step": 3684 }, { "epoch": 0.894851869839728, "grad_norm": 0.6072526574134827, "learning_rate": 0.0001, "loss": 1.6632, "step": 3685 }, { "epoch": 0.8950947061680428, "grad_norm": 0.6060703992843628, "learning_rate": 0.0001, "loss": 1.7149, "step": 3686 }, { "epoch": 0.8953375424963574, "grad_norm": 0.5971805453300476, "learning_rate": 0.0001, "loss": 1.676, "step": 3687 }, { "epoch": 0.8955803788246721, "grad_norm": 0.6302820444107056, "learning_rate": 0.0001, "loss": 1.6785, "step": 3688 }, { "epoch": 0.8958232151529869, "grad_norm": 0.5734922289848328, "learning_rate": 0.0001, "loss": 1.6722, "step": 3689 }, { "epoch": 0.8960660514813016, "grad_norm": 0.608064591884613, "learning_rate": 0.0001, "loss": 1.7129, "step": 3690 }, { "epoch": 0.8963088878096164, "grad_norm": 0.605770468711853, "learning_rate": 0.0001, "loss": 1.7061, "step": 3691 }, { "epoch": 0.896551724137931, "grad_norm": 0.5936496257781982, "learning_rate": 0.0001, "loss": 1.8021, "step": 3692 }, { "epoch": 0.8967945604662457, "grad_norm": 0.619182288646698, "learning_rate": 0.0001, "loss": 1.6411, "step": 3693 }, { "epoch": 0.8970373967945605, "grad_norm": 0.5974726676940918, "learning_rate": 0.0001, "loss": 1.8246, "step": 3694 }, { "epoch": 0.8972802331228752, "grad_norm": 0.6047124266624451, "learning_rate": 0.0001, "loss": 1.715, "step": 3695 }, { "epoch": 0.8975230694511899, "grad_norm": 0.606947660446167, "learning_rate": 0.0001, "loss": 1.7117, "step": 3696 }, { "epoch": 0.8977659057795047, "grad_norm": 0.5917686223983765, "learning_rate": 0.0001, "loss": 1.7008, "step": 3697 }, { "epoch": 0.8980087421078193, "grad_norm": 0.6276222467422485, "learning_rate": 0.0001, "loss": 1.8742, "step": 3698 }, { "epoch": 0.898251578436134, "grad_norm": 0.5839412212371826, "learning_rate": 0.0001, "loss": 1.7297, "step": 3699 }, { "epoch": 0.8984944147644488, "grad_norm": 0.5966883301734924, "learning_rate": 0.0001, "loss": 1.6937, "step": 3700 }, { "epoch": 0.8987372510927635, "grad_norm": 0.5819137692451477, "learning_rate": 0.0001, "loss": 1.6871, "step": 3701 }, { "epoch": 0.8989800874210782, "grad_norm": 0.6237320899963379, "learning_rate": 0.0001, "loss": 1.7537, "step": 3702 }, { "epoch": 0.8992229237493929, "grad_norm": 0.587004542350769, "learning_rate": 0.0001, "loss": 1.771, "step": 3703 }, { "epoch": 0.8994657600777076, "grad_norm": 0.5957283973693848, "learning_rate": 0.0001, "loss": 1.8436, "step": 3704 }, { "epoch": 0.8997085964060223, "grad_norm": 0.6247084736824036, "learning_rate": 0.0001, "loss": 1.7767, "step": 3705 }, { "epoch": 0.8999514327343371, "grad_norm": 0.5712265968322754, "learning_rate": 0.0001, "loss": 1.7591, "step": 3706 }, { "epoch": 0.9001942690626518, "grad_norm": 0.6066750884056091, "learning_rate": 0.0001, "loss": 1.6773, "step": 3707 }, { "epoch": 0.9004371053909664, "grad_norm": 0.5812699198722839, "learning_rate": 0.0001, "loss": 1.6081, "step": 3708 }, { "epoch": 0.9006799417192812, "grad_norm": 0.5693857669830322, "learning_rate": 0.0001, "loss": 1.5499, "step": 3709 }, { "epoch": 0.9009227780475959, "grad_norm": 0.6075789332389832, "learning_rate": 0.0001, "loss": 1.654, "step": 3710 }, { "epoch": 0.9011656143759106, "grad_norm": 0.588006854057312, "learning_rate": 0.0001, "loss": 1.7292, "step": 3711 }, { "epoch": 0.9014084507042254, "grad_norm": 0.5788747072219849, "learning_rate": 0.0001, "loss": 1.7602, "step": 3712 }, { "epoch": 0.90165128703254, "grad_norm": 0.6109026074409485, "learning_rate": 0.0001, "loss": 1.7753, "step": 3713 }, { "epoch": 0.9018941233608548, "grad_norm": 0.6000962853431702, "learning_rate": 0.0001, "loss": 1.7303, "step": 3714 }, { "epoch": 0.9021369596891695, "grad_norm": 0.5677233934402466, "learning_rate": 0.0001, "loss": 1.7165, "step": 3715 }, { "epoch": 0.9023797960174842, "grad_norm": 0.6030009984970093, "learning_rate": 0.0001, "loss": 1.8595, "step": 3716 }, { "epoch": 0.902622632345799, "grad_norm": 0.5491967797279358, "learning_rate": 0.0001, "loss": 1.5261, "step": 3717 }, { "epoch": 0.9028654686741137, "grad_norm": 0.6326006054878235, "learning_rate": 0.0001, "loss": 1.6389, "step": 3718 }, { "epoch": 0.9031083050024283, "grad_norm": 0.6032041907310486, "learning_rate": 0.0001, "loss": 1.7333, "step": 3719 }, { "epoch": 0.9033511413307431, "grad_norm": 0.6120818257331848, "learning_rate": 0.0001, "loss": 1.7694, "step": 3720 }, { "epoch": 0.9035939776590578, "grad_norm": 0.6020964980125427, "learning_rate": 0.0001, "loss": 1.7013, "step": 3721 }, { "epoch": 0.9038368139873725, "grad_norm": 0.6394541263580322, "learning_rate": 0.0001, "loss": 1.8643, "step": 3722 }, { "epoch": 0.9040796503156873, "grad_norm": 0.5862223505973816, "learning_rate": 0.0001, "loss": 1.6665, "step": 3723 }, { "epoch": 0.9043224866440019, "grad_norm": 0.6442126035690308, "learning_rate": 0.0001, "loss": 1.9307, "step": 3724 }, { "epoch": 0.9045653229723166, "grad_norm": 0.5906519293785095, "learning_rate": 0.0001, "loss": 1.6618, "step": 3725 }, { "epoch": 0.9048081593006314, "grad_norm": 0.5946016311645508, "learning_rate": 0.0001, "loss": 1.5407, "step": 3726 }, { "epoch": 0.9050509956289461, "grad_norm": 0.5882769227027893, "learning_rate": 0.0001, "loss": 1.7124, "step": 3727 }, { "epoch": 0.9052938319572608, "grad_norm": 0.6075938940048218, "learning_rate": 0.0001, "loss": 1.713, "step": 3728 }, { "epoch": 0.9055366682855756, "grad_norm": 0.5872309803962708, "learning_rate": 0.0001, "loss": 1.7817, "step": 3729 }, { "epoch": 0.9057795046138902, "grad_norm": 0.5707372426986694, "learning_rate": 0.0001, "loss": 1.7341, "step": 3730 }, { "epoch": 0.9060223409422049, "grad_norm": 0.5877151489257812, "learning_rate": 0.0001, "loss": 1.7046, "step": 3731 }, { "epoch": 0.9062651772705197, "grad_norm": 0.5864531993865967, "learning_rate": 0.0001, "loss": 1.7321, "step": 3732 }, { "epoch": 0.9065080135988344, "grad_norm": 0.6085807085037231, "learning_rate": 0.0001, "loss": 1.5914, "step": 3733 }, { "epoch": 0.906750849927149, "grad_norm": 0.6236429214477539, "learning_rate": 0.0001, "loss": 1.6959, "step": 3734 }, { "epoch": 0.9069936862554638, "grad_norm": 0.5786526799201965, "learning_rate": 0.0001, "loss": 1.7332, "step": 3735 }, { "epoch": 0.9072365225837785, "grad_norm": 0.6803106665611267, "learning_rate": 0.0001, "loss": 1.734, "step": 3736 }, { "epoch": 0.9074793589120933, "grad_norm": 0.6150429844856262, "learning_rate": 0.0001, "loss": 1.7344, "step": 3737 }, { "epoch": 0.907722195240408, "grad_norm": 0.5588183403015137, "learning_rate": 0.0001, "loss": 1.6196, "step": 3738 }, { "epoch": 0.9079650315687227, "grad_norm": 0.6157742142677307, "learning_rate": 0.0001, "loss": 1.6482, "step": 3739 }, { "epoch": 0.9082078678970374, "grad_norm": 0.6282200813293457, "learning_rate": 0.0001, "loss": 1.7407, "step": 3740 }, { "epoch": 0.9084507042253521, "grad_norm": 0.6336793303489685, "learning_rate": 0.0001, "loss": 1.7984, "step": 3741 }, { "epoch": 0.9086935405536668, "grad_norm": 0.6165140271186829, "learning_rate": 0.0001, "loss": 1.8259, "step": 3742 }, { "epoch": 0.9089363768819816, "grad_norm": 0.5867929458618164, "learning_rate": 0.0001, "loss": 1.7165, "step": 3743 }, { "epoch": 0.9091792132102963, "grad_norm": 0.5789873600006104, "learning_rate": 0.0001, "loss": 1.4274, "step": 3744 }, { "epoch": 0.9094220495386109, "grad_norm": 0.6115802526473999, "learning_rate": 0.0001, "loss": 1.8345, "step": 3745 }, { "epoch": 0.9096648858669257, "grad_norm": 0.5854842066764832, "learning_rate": 0.0001, "loss": 1.6946, "step": 3746 }, { "epoch": 0.9099077221952404, "grad_norm": 0.5675647854804993, "learning_rate": 0.0001, "loss": 1.721, "step": 3747 }, { "epoch": 0.9101505585235551, "grad_norm": 0.6123351454734802, "learning_rate": 0.0001, "loss": 1.691, "step": 3748 }, { "epoch": 0.9103933948518699, "grad_norm": 0.6066859364509583, "learning_rate": 0.0001, "loss": 1.7858, "step": 3749 }, { "epoch": 0.9106362311801846, "grad_norm": 0.6148110032081604, "learning_rate": 0.0001, "loss": 1.8063, "step": 3750 }, { "epoch": 0.9108790675084992, "grad_norm": 0.58165043592453, "learning_rate": 0.0001, "loss": 1.7182, "step": 3751 }, { "epoch": 0.911121903836814, "grad_norm": 0.5953090190887451, "learning_rate": 0.0001, "loss": 1.7449, "step": 3752 }, { "epoch": 0.9113647401651287, "grad_norm": 0.5710526704788208, "learning_rate": 0.0001, "loss": 1.7502, "step": 3753 }, { "epoch": 0.9116075764934434, "grad_norm": 0.5716732144355774, "learning_rate": 0.0001, "loss": 1.6306, "step": 3754 }, { "epoch": 0.9118504128217582, "grad_norm": 0.576026439666748, "learning_rate": 0.0001, "loss": 1.7577, "step": 3755 }, { "epoch": 0.9120932491500728, "grad_norm": 0.5796546339988708, "learning_rate": 0.0001, "loss": 1.7471, "step": 3756 }, { "epoch": 0.9123360854783875, "grad_norm": 0.5911909341812134, "learning_rate": 0.0001, "loss": 1.6457, "step": 3757 }, { "epoch": 0.9125789218067023, "grad_norm": 0.5691782236099243, "learning_rate": 0.0001, "loss": 1.709, "step": 3758 }, { "epoch": 0.912821758135017, "grad_norm": 0.6342900395393372, "learning_rate": 0.0001, "loss": 1.7737, "step": 3759 }, { "epoch": 0.9130645944633318, "grad_norm": 0.5813829302787781, "learning_rate": 0.0001, "loss": 1.7218, "step": 3760 }, { "epoch": 0.9133074307916464, "grad_norm": 0.6647397875785828, "learning_rate": 0.0001, "loss": 1.7957, "step": 3761 }, { "epoch": 0.9135502671199611, "grad_norm": 0.5722330212593079, "learning_rate": 0.0001, "loss": 1.5423, "step": 3762 }, { "epoch": 0.9137931034482759, "grad_norm": 0.5854828357696533, "learning_rate": 0.0001, "loss": 1.6495, "step": 3763 }, { "epoch": 0.9140359397765906, "grad_norm": 0.6135445237159729, "learning_rate": 0.0001, "loss": 1.7936, "step": 3764 }, { "epoch": 0.9142787761049053, "grad_norm": 0.6264251470565796, "learning_rate": 0.0001, "loss": 1.869, "step": 3765 }, { "epoch": 0.91452161243322, "grad_norm": 0.5948366522789001, "learning_rate": 0.0001, "loss": 1.665, "step": 3766 }, { "epoch": 0.9147644487615347, "grad_norm": 0.5939854383468628, "learning_rate": 0.0001, "loss": 1.7082, "step": 3767 }, { "epoch": 0.9150072850898494, "grad_norm": 0.6219908595085144, "learning_rate": 0.0001, "loss": 1.7704, "step": 3768 }, { "epoch": 0.9152501214181642, "grad_norm": 0.5622676610946655, "learning_rate": 0.0001, "loss": 1.6219, "step": 3769 }, { "epoch": 0.9154929577464789, "grad_norm": 0.5718271732330322, "learning_rate": 0.0001, "loss": 1.6059, "step": 3770 }, { "epoch": 0.9157357940747936, "grad_norm": 0.5890514254570007, "learning_rate": 0.0001, "loss": 1.6326, "step": 3771 }, { "epoch": 0.9159786304031083, "grad_norm": 0.5604651570320129, "learning_rate": 0.0001, "loss": 1.6287, "step": 3772 }, { "epoch": 0.916221466731423, "grad_norm": 0.6045302748680115, "learning_rate": 0.0001, "loss": 1.8132, "step": 3773 }, { "epoch": 0.9164643030597377, "grad_norm": 0.5818734765052795, "learning_rate": 0.0001, "loss": 1.7577, "step": 3774 }, { "epoch": 0.9167071393880525, "grad_norm": 0.5890612006187439, "learning_rate": 0.0001, "loss": 1.5894, "step": 3775 }, { "epoch": 0.9169499757163672, "grad_norm": 0.5723394751548767, "learning_rate": 0.0001, "loss": 1.6875, "step": 3776 }, { "epoch": 0.9171928120446818, "grad_norm": 0.6129171848297119, "learning_rate": 0.0001, "loss": 1.8051, "step": 3777 }, { "epoch": 0.9174356483729966, "grad_norm": 0.5996651649475098, "learning_rate": 0.0001, "loss": 1.8676, "step": 3778 }, { "epoch": 0.9176784847013113, "grad_norm": 0.6562318205833435, "learning_rate": 0.0001, "loss": 1.744, "step": 3779 }, { "epoch": 0.917921321029626, "grad_norm": 0.5926910638809204, "learning_rate": 0.0001, "loss": 1.7269, "step": 3780 }, { "epoch": 0.9181641573579408, "grad_norm": 0.6157199144363403, "learning_rate": 0.0001, "loss": 1.8089, "step": 3781 }, { "epoch": 0.9184069936862554, "grad_norm": 0.6023468971252441, "learning_rate": 0.0001, "loss": 1.7532, "step": 3782 }, { "epoch": 0.9186498300145702, "grad_norm": 0.6074219942092896, "learning_rate": 0.0001, "loss": 1.7703, "step": 3783 }, { "epoch": 0.9188926663428849, "grad_norm": 0.9243726134300232, "learning_rate": 0.0001, "loss": 1.5755, "step": 3784 }, { "epoch": 0.9191355026711996, "grad_norm": 0.5886449813842773, "learning_rate": 0.0001, "loss": 1.8595, "step": 3785 }, { "epoch": 0.9193783389995144, "grad_norm": 0.6371552348136902, "learning_rate": 0.0001, "loss": 1.9148, "step": 3786 }, { "epoch": 0.919621175327829, "grad_norm": 0.6345494389533997, "learning_rate": 0.0001, "loss": 1.7892, "step": 3787 }, { "epoch": 0.9198640116561437, "grad_norm": 0.5974324345588684, "learning_rate": 0.0001, "loss": 1.8767, "step": 3788 }, { "epoch": 0.9201068479844585, "grad_norm": 0.5730004906654358, "learning_rate": 0.0001, "loss": 1.6995, "step": 3789 }, { "epoch": 0.9203496843127732, "grad_norm": 0.5770270824432373, "learning_rate": 0.0001, "loss": 1.8269, "step": 3790 }, { "epoch": 0.9205925206410879, "grad_norm": 0.5567600131034851, "learning_rate": 0.0001, "loss": 1.5861, "step": 3791 }, { "epoch": 0.9208353569694027, "grad_norm": 0.5911000967025757, "learning_rate": 0.0001, "loss": 1.7848, "step": 3792 }, { "epoch": 0.9210781932977173, "grad_norm": 0.5451039671897888, "learning_rate": 0.0001, "loss": 1.6602, "step": 3793 }, { "epoch": 0.921321029626032, "grad_norm": 0.6299775242805481, "learning_rate": 0.0001, "loss": 1.6146, "step": 3794 }, { "epoch": 0.9215638659543468, "grad_norm": 0.5983388423919678, "learning_rate": 0.0001, "loss": 1.692, "step": 3795 }, { "epoch": 0.9218067022826615, "grad_norm": 0.5751836895942688, "learning_rate": 0.0001, "loss": 1.6026, "step": 3796 }, { "epoch": 0.9220495386109762, "grad_norm": 0.5966198444366455, "learning_rate": 0.0001, "loss": 1.8375, "step": 3797 }, { "epoch": 0.922292374939291, "grad_norm": 0.5846254825592041, "learning_rate": 0.0001, "loss": 1.7074, "step": 3798 }, { "epoch": 0.9225352112676056, "grad_norm": 0.6296234726905823, "learning_rate": 0.0001, "loss": 1.8008, "step": 3799 }, { "epoch": 0.9227780475959203, "grad_norm": 0.5613099336624146, "learning_rate": 0.0001, "loss": 1.7208, "step": 3800 }, { "epoch": 0.9230208839242351, "grad_norm": 0.6608403325080872, "learning_rate": 0.0001, "loss": 1.7196, "step": 3801 }, { "epoch": 0.9232637202525498, "grad_norm": 0.6562487483024597, "learning_rate": 0.0001, "loss": 1.7521, "step": 3802 }, { "epoch": 0.9235065565808644, "grad_norm": 0.5922927260398865, "learning_rate": 0.0001, "loss": 1.6032, "step": 3803 }, { "epoch": 0.9237493929091792, "grad_norm": 0.601926326751709, "learning_rate": 0.0001, "loss": 1.5633, "step": 3804 }, { "epoch": 0.9239922292374939, "grad_norm": 0.6223918199539185, "learning_rate": 0.0001, "loss": 1.8285, "step": 3805 }, { "epoch": 0.9242350655658087, "grad_norm": 0.6224992275238037, "learning_rate": 0.0001, "loss": 1.871, "step": 3806 }, { "epoch": 0.9244779018941234, "grad_norm": 0.6175224184989929, "learning_rate": 0.0001, "loss": 1.7312, "step": 3807 }, { "epoch": 0.924720738222438, "grad_norm": 0.5862079858779907, "learning_rate": 0.0001, "loss": 1.6354, "step": 3808 }, { "epoch": 0.9249635745507528, "grad_norm": 0.6115992069244385, "learning_rate": 0.0001, "loss": 1.8493, "step": 3809 }, { "epoch": 0.9252064108790675, "grad_norm": 0.620858371257782, "learning_rate": 0.0001, "loss": 1.7691, "step": 3810 }, { "epoch": 0.9254492472073822, "grad_norm": 0.584667980670929, "learning_rate": 0.0001, "loss": 1.6955, "step": 3811 }, { "epoch": 0.925692083535697, "grad_norm": 0.5902495384216309, "learning_rate": 0.0001, "loss": 1.6646, "step": 3812 }, { "epoch": 0.9259349198640117, "grad_norm": 0.611920177936554, "learning_rate": 0.0001, "loss": 1.757, "step": 3813 }, { "epoch": 0.9261777561923263, "grad_norm": 0.5571722388267517, "learning_rate": 0.0001, "loss": 1.6666, "step": 3814 }, { "epoch": 0.9264205925206411, "grad_norm": 0.5735197067260742, "learning_rate": 0.0001, "loss": 1.7786, "step": 3815 }, { "epoch": 0.9266634288489558, "grad_norm": 0.5902324318885803, "learning_rate": 0.0001, "loss": 1.6966, "step": 3816 }, { "epoch": 0.9269062651772705, "grad_norm": 0.5405271649360657, "learning_rate": 0.0001, "loss": 1.5354, "step": 3817 }, { "epoch": 0.9271491015055853, "grad_norm": 0.5784160494804382, "learning_rate": 0.0001, "loss": 1.7828, "step": 3818 }, { "epoch": 0.9273919378339, "grad_norm": 0.6429668664932251, "learning_rate": 0.0001, "loss": 1.7877, "step": 3819 }, { "epoch": 0.9276347741622146, "grad_norm": 0.6807743906974792, "learning_rate": 0.0001, "loss": 1.8208, "step": 3820 }, { "epoch": 0.9278776104905294, "grad_norm": 0.6062427163124084, "learning_rate": 0.0001, "loss": 1.6174, "step": 3821 }, { "epoch": 0.9281204468188441, "grad_norm": 0.5669822096824646, "learning_rate": 0.0001, "loss": 1.6645, "step": 3822 }, { "epoch": 0.9283632831471588, "grad_norm": 0.6149395704269409, "learning_rate": 0.0001, "loss": 1.788, "step": 3823 }, { "epoch": 0.9286061194754736, "grad_norm": 0.6279157996177673, "learning_rate": 0.0001, "loss": 1.6565, "step": 3824 }, { "epoch": 0.9288489558037882, "grad_norm": 0.5734469294548035, "learning_rate": 0.0001, "loss": 1.5886, "step": 3825 }, { "epoch": 0.9290917921321029, "grad_norm": 0.5863770246505737, "learning_rate": 0.0001, "loss": 1.751, "step": 3826 }, { "epoch": 0.9293346284604177, "grad_norm": 0.5849167704582214, "learning_rate": 0.0001, "loss": 1.6578, "step": 3827 }, { "epoch": 0.9295774647887324, "grad_norm": 0.5744926333427429, "learning_rate": 0.0001, "loss": 1.8028, "step": 3828 }, { "epoch": 0.9298203011170472, "grad_norm": 0.5908921360969543, "learning_rate": 0.0001, "loss": 1.8405, "step": 3829 }, { "epoch": 0.9300631374453618, "grad_norm": 0.6141979098320007, "learning_rate": 0.0001, "loss": 1.7229, "step": 3830 }, { "epoch": 0.9303059737736765, "grad_norm": 0.6094731688499451, "learning_rate": 0.0001, "loss": 1.832, "step": 3831 }, { "epoch": 0.9305488101019913, "grad_norm": 0.615892231464386, "learning_rate": 0.0001, "loss": 1.9006, "step": 3832 }, { "epoch": 0.930791646430306, "grad_norm": 0.5927395820617676, "learning_rate": 0.0001, "loss": 1.7176, "step": 3833 }, { "epoch": 0.9310344827586207, "grad_norm": 0.5887064337730408, "learning_rate": 0.0001, "loss": 1.6997, "step": 3834 }, { "epoch": 0.9312773190869355, "grad_norm": 0.6049761772155762, "learning_rate": 0.0001, "loss": 1.6854, "step": 3835 }, { "epoch": 0.9315201554152501, "grad_norm": 0.5994516015052795, "learning_rate": 0.0001, "loss": 1.7624, "step": 3836 }, { "epoch": 0.9317629917435648, "grad_norm": 0.6419912576675415, "learning_rate": 0.0001, "loss": 1.8233, "step": 3837 }, { "epoch": 0.9320058280718796, "grad_norm": 0.5899677276611328, "learning_rate": 0.0001, "loss": 1.7693, "step": 3838 }, { "epoch": 0.9322486644001943, "grad_norm": 0.6384326219558716, "learning_rate": 0.0001, "loss": 1.8361, "step": 3839 }, { "epoch": 0.932491500728509, "grad_norm": 0.5595104694366455, "learning_rate": 0.0001, "loss": 1.7417, "step": 3840 }, { "epoch": 0.9327343370568237, "grad_norm": 0.5852187275886536, "learning_rate": 0.0001, "loss": 1.6915, "step": 3841 }, { "epoch": 0.9329771733851384, "grad_norm": 0.6391562223434448, "learning_rate": 0.0001, "loss": 1.8735, "step": 3842 }, { "epoch": 0.9332200097134531, "grad_norm": 0.5452216267585754, "learning_rate": 0.0001, "loss": 1.6466, "step": 3843 }, { "epoch": 0.9334628460417679, "grad_norm": 0.5904660224914551, "learning_rate": 0.0001, "loss": 1.7699, "step": 3844 }, { "epoch": 0.9337056823700826, "grad_norm": 0.6349517107009888, "learning_rate": 0.0001, "loss": 1.5479, "step": 3845 }, { "epoch": 0.9339485186983972, "grad_norm": 0.5781106948852539, "learning_rate": 0.0001, "loss": 1.6949, "step": 3846 }, { "epoch": 0.934191355026712, "grad_norm": 0.5469078421592712, "learning_rate": 0.0001, "loss": 1.5544, "step": 3847 }, { "epoch": 0.9344341913550267, "grad_norm": 0.5812481641769409, "learning_rate": 0.0001, "loss": 1.477, "step": 3848 }, { "epoch": 0.9346770276833414, "grad_norm": 0.5483930110931396, "learning_rate": 0.0001, "loss": 1.5285, "step": 3849 }, { "epoch": 0.9349198640116562, "grad_norm": 0.6049747467041016, "learning_rate": 0.0001, "loss": 1.7515, "step": 3850 }, { "epoch": 0.9351627003399708, "grad_norm": 0.6384432315826416, "learning_rate": 0.0001, "loss": 1.9378, "step": 3851 }, { "epoch": 0.9354055366682856, "grad_norm": 0.5857444405555725, "learning_rate": 0.0001, "loss": 1.7695, "step": 3852 }, { "epoch": 0.9356483729966003, "grad_norm": 0.5994032025337219, "learning_rate": 0.0001, "loss": 1.831, "step": 3853 }, { "epoch": 0.935891209324915, "grad_norm": 0.6487367749214172, "learning_rate": 0.0001, "loss": 1.8053, "step": 3854 }, { "epoch": 0.9361340456532298, "grad_norm": 0.6017345786094666, "learning_rate": 0.0001, "loss": 1.7729, "step": 3855 }, { "epoch": 0.9363768819815445, "grad_norm": 0.6166176199913025, "learning_rate": 0.0001, "loss": 1.8245, "step": 3856 }, { "epoch": 0.9366197183098591, "grad_norm": 0.5821506977081299, "learning_rate": 0.0001, "loss": 1.6508, "step": 3857 }, { "epoch": 0.9368625546381739, "grad_norm": 0.6487970948219299, "learning_rate": 0.0001, "loss": 1.6602, "step": 3858 }, { "epoch": 0.9371053909664886, "grad_norm": 0.5837119221687317, "learning_rate": 0.0001, "loss": 1.7526, "step": 3859 }, { "epoch": 0.9373482272948033, "grad_norm": 0.6360679268836975, "learning_rate": 0.0001, "loss": 1.8961, "step": 3860 }, { "epoch": 0.9375910636231181, "grad_norm": 0.6194024085998535, "learning_rate": 0.0001, "loss": 1.8107, "step": 3861 }, { "epoch": 0.9378338999514327, "grad_norm": 0.5554211139678955, "learning_rate": 0.0001, "loss": 1.6924, "step": 3862 }, { "epoch": 0.9380767362797474, "grad_norm": 0.6048312783241272, "learning_rate": 0.0001, "loss": 1.8423, "step": 3863 }, { "epoch": 0.9383195726080622, "grad_norm": 0.5703085064888, "learning_rate": 0.0001, "loss": 1.6215, "step": 3864 }, { "epoch": 0.9385624089363769, "grad_norm": 0.5596864819526672, "learning_rate": 0.0001, "loss": 1.5849, "step": 3865 }, { "epoch": 0.9388052452646916, "grad_norm": 0.6383177042007446, "learning_rate": 0.0001, "loss": 1.8643, "step": 3866 }, { "epoch": 0.9390480815930063, "grad_norm": 0.5903500914573669, "learning_rate": 0.0001, "loss": 1.6674, "step": 3867 }, { "epoch": 0.939290917921321, "grad_norm": 0.5811389684677124, "learning_rate": 0.0001, "loss": 1.6291, "step": 3868 }, { "epoch": 0.9395337542496357, "grad_norm": 0.5726868510246277, "learning_rate": 0.0001, "loss": 1.7148, "step": 3869 }, { "epoch": 0.9397765905779505, "grad_norm": 0.5806669592857361, "learning_rate": 0.0001, "loss": 1.5, "step": 3870 }, { "epoch": 0.9400194269062652, "grad_norm": 0.606339156627655, "learning_rate": 0.0001, "loss": 1.7749, "step": 3871 }, { "epoch": 0.9402622632345798, "grad_norm": 0.5703273415565491, "learning_rate": 0.0001, "loss": 1.601, "step": 3872 }, { "epoch": 0.9405050995628946, "grad_norm": 0.5913130044937134, "learning_rate": 0.0001, "loss": 1.763, "step": 3873 }, { "epoch": 0.9407479358912093, "grad_norm": 0.5414733290672302, "learning_rate": 0.0001, "loss": 1.522, "step": 3874 }, { "epoch": 0.940990772219524, "grad_norm": 0.570084273815155, "learning_rate": 0.0001, "loss": 1.7923, "step": 3875 }, { "epoch": 0.9412336085478388, "grad_norm": 0.591760516166687, "learning_rate": 0.0001, "loss": 1.6011, "step": 3876 }, { "epoch": 0.9414764448761535, "grad_norm": 0.6212313771247864, "learning_rate": 0.0001, "loss": 1.7228, "step": 3877 }, { "epoch": 0.9417192812044682, "grad_norm": 0.5751554369926453, "learning_rate": 0.0001, "loss": 1.5695, "step": 3878 }, { "epoch": 0.9419621175327829, "grad_norm": 0.6063977479934692, "learning_rate": 0.0001, "loss": 1.7936, "step": 3879 }, { "epoch": 0.9422049538610976, "grad_norm": 0.5817692279815674, "learning_rate": 0.0001, "loss": 1.6305, "step": 3880 }, { "epoch": 0.9424477901894124, "grad_norm": 0.580960214138031, "learning_rate": 0.0001, "loss": 1.7584, "step": 3881 }, { "epoch": 0.9426906265177271, "grad_norm": 0.5761325359344482, "learning_rate": 0.0001, "loss": 1.6608, "step": 3882 }, { "epoch": 0.9429334628460417, "grad_norm": 0.6162409782409668, "learning_rate": 0.0001, "loss": 1.8851, "step": 3883 }, { "epoch": 0.9431762991743565, "grad_norm": 0.5872670412063599, "learning_rate": 0.0001, "loss": 1.5741, "step": 3884 }, { "epoch": 0.9434191355026712, "grad_norm": 0.5654221773147583, "learning_rate": 0.0001, "loss": 1.6703, "step": 3885 }, { "epoch": 0.9436619718309859, "grad_norm": 0.6001012921333313, "learning_rate": 0.0001, "loss": 1.775, "step": 3886 }, { "epoch": 0.9439048081593007, "grad_norm": 0.6080231070518494, "learning_rate": 0.0001, "loss": 1.8407, "step": 3887 }, { "epoch": 0.9441476444876153, "grad_norm": 0.6112394332885742, "learning_rate": 0.0001, "loss": 1.8503, "step": 3888 }, { "epoch": 0.94439048081593, "grad_norm": 0.5936259627342224, "learning_rate": 0.0001, "loss": 1.7469, "step": 3889 }, { "epoch": 0.9446333171442448, "grad_norm": 0.6158880591392517, "learning_rate": 0.0001, "loss": 1.8293, "step": 3890 }, { "epoch": 0.9448761534725595, "grad_norm": 0.5968067646026611, "learning_rate": 0.0001, "loss": 1.6652, "step": 3891 }, { "epoch": 0.9451189898008742, "grad_norm": 0.62282395362854, "learning_rate": 0.0001, "loss": 1.8189, "step": 3892 }, { "epoch": 0.945361826129189, "grad_norm": 0.6170820593833923, "learning_rate": 0.0001, "loss": 1.6334, "step": 3893 }, { "epoch": 0.9456046624575036, "grad_norm": 0.6034669876098633, "learning_rate": 0.0001, "loss": 1.6424, "step": 3894 }, { "epoch": 0.9458474987858183, "grad_norm": 0.6236358880996704, "learning_rate": 0.0001, "loss": 1.729, "step": 3895 }, { "epoch": 0.9460903351141331, "grad_norm": 0.5863955616950989, "learning_rate": 0.0001, "loss": 1.7863, "step": 3896 }, { "epoch": 0.9463331714424478, "grad_norm": 0.6099282503128052, "learning_rate": 0.0001, "loss": 1.8143, "step": 3897 }, { "epoch": 0.9465760077707625, "grad_norm": 0.5919102430343628, "learning_rate": 0.0001, "loss": 1.778, "step": 3898 }, { "epoch": 0.9468188440990772, "grad_norm": 0.5809880495071411, "learning_rate": 0.0001, "loss": 1.8069, "step": 3899 }, { "epoch": 0.9470616804273919, "grad_norm": 0.6201245188713074, "learning_rate": 0.0001, "loss": 1.7953, "step": 3900 }, { "epoch": 0.9473045167557067, "grad_norm": 0.5886535048484802, "learning_rate": 0.0001, "loss": 1.7421, "step": 3901 }, { "epoch": 0.9475473530840214, "grad_norm": 0.5675178170204163, "learning_rate": 0.0001, "loss": 1.644, "step": 3902 }, { "epoch": 0.9477901894123361, "grad_norm": 0.5949831008911133, "learning_rate": 0.0001, "loss": 1.7078, "step": 3903 }, { "epoch": 0.9480330257406508, "grad_norm": 0.6057093739509583, "learning_rate": 0.0001, "loss": 1.7499, "step": 3904 }, { "epoch": 0.9482758620689655, "grad_norm": 0.6029499769210815, "learning_rate": 0.0001, "loss": 1.6558, "step": 3905 }, { "epoch": 0.9485186983972802, "grad_norm": 0.5812211632728577, "learning_rate": 0.0001, "loss": 1.7148, "step": 3906 }, { "epoch": 0.948761534725595, "grad_norm": 0.6465932726860046, "learning_rate": 0.0001, "loss": 1.7672, "step": 3907 }, { "epoch": 0.9490043710539097, "grad_norm": 0.5721454620361328, "learning_rate": 0.0001, "loss": 1.6448, "step": 3908 }, { "epoch": 0.9492472073822243, "grad_norm": 0.6058249473571777, "learning_rate": 0.0001, "loss": 1.8698, "step": 3909 }, { "epoch": 0.9494900437105391, "grad_norm": 0.6070011854171753, "learning_rate": 0.0001, "loss": 1.5805, "step": 3910 }, { "epoch": 0.9497328800388538, "grad_norm": 0.610149621963501, "learning_rate": 0.0001, "loss": 1.7704, "step": 3911 }, { "epoch": 0.9499757163671685, "grad_norm": 0.6089284420013428, "learning_rate": 0.0001, "loss": 1.7174, "step": 3912 }, { "epoch": 0.9502185526954833, "grad_norm": 0.5909119248390198, "learning_rate": 0.0001, "loss": 1.5908, "step": 3913 }, { "epoch": 0.950461389023798, "grad_norm": 0.62584388256073, "learning_rate": 0.0001, "loss": 1.6275, "step": 3914 }, { "epoch": 0.9507042253521126, "grad_norm": 0.5750866532325745, "learning_rate": 0.0001, "loss": 1.5785, "step": 3915 }, { "epoch": 0.9509470616804274, "grad_norm": 0.6291926503181458, "learning_rate": 0.0001, "loss": 1.7933, "step": 3916 }, { "epoch": 0.9511898980087421, "grad_norm": 0.5660816431045532, "learning_rate": 0.0001, "loss": 1.5611, "step": 3917 }, { "epoch": 0.9514327343370568, "grad_norm": 0.5772616267204285, "learning_rate": 0.0001, "loss": 1.7692, "step": 3918 }, { "epoch": 0.9516755706653716, "grad_norm": 0.6061490774154663, "learning_rate": 0.0001, "loss": 1.8757, "step": 3919 }, { "epoch": 0.9519184069936862, "grad_norm": 0.5998229384422302, "learning_rate": 0.0001, "loss": 1.7443, "step": 3920 }, { "epoch": 0.9521612433220009, "grad_norm": 0.5965450406074524, "learning_rate": 0.0001, "loss": 1.6834, "step": 3921 }, { "epoch": 0.9524040796503157, "grad_norm": 0.5807543396949768, "learning_rate": 0.0001, "loss": 1.707, "step": 3922 }, { "epoch": 0.9526469159786304, "grad_norm": 0.5883108973503113, "learning_rate": 0.0001, "loss": 1.6153, "step": 3923 }, { "epoch": 0.9528897523069452, "grad_norm": 0.6153419017791748, "learning_rate": 0.0001, "loss": 1.7335, "step": 3924 }, { "epoch": 0.9531325886352598, "grad_norm": 0.6349037885665894, "learning_rate": 0.0001, "loss": 1.6976, "step": 3925 }, { "epoch": 0.9533754249635745, "grad_norm": 0.6264722347259521, "learning_rate": 0.0001, "loss": 1.7219, "step": 3926 }, { "epoch": 0.9536182612918893, "grad_norm": 0.5886244177818298, "learning_rate": 0.0001, "loss": 1.689, "step": 3927 }, { "epoch": 0.953861097620204, "grad_norm": 0.589606761932373, "learning_rate": 0.0001, "loss": 1.6427, "step": 3928 }, { "epoch": 0.9541039339485187, "grad_norm": 0.6218578219413757, "learning_rate": 0.0001, "loss": 1.7403, "step": 3929 }, { "epoch": 0.9543467702768335, "grad_norm": 0.5539392828941345, "learning_rate": 0.0001, "loss": 1.6777, "step": 3930 }, { "epoch": 0.9545896066051481, "grad_norm": 0.6072409749031067, "learning_rate": 0.0001, "loss": 1.5139, "step": 3931 }, { "epoch": 0.9548324429334628, "grad_norm": 0.6281037330627441, "learning_rate": 0.0001, "loss": 1.7151, "step": 3932 }, { "epoch": 0.9550752792617776, "grad_norm": 0.7160969376564026, "learning_rate": 0.0001, "loss": 1.7379, "step": 3933 }, { "epoch": 0.9553181155900923, "grad_norm": 0.616183340549469, "learning_rate": 0.0001, "loss": 1.7787, "step": 3934 }, { "epoch": 0.955560951918407, "grad_norm": 0.594199538230896, "learning_rate": 0.0001, "loss": 1.8091, "step": 3935 }, { "epoch": 0.9558037882467217, "grad_norm": 0.5698363184928894, "learning_rate": 0.0001, "loss": 1.611, "step": 3936 }, { "epoch": 0.9560466245750364, "grad_norm": 0.6054688096046448, "learning_rate": 0.0001, "loss": 1.5187, "step": 3937 }, { "epoch": 0.9562894609033511, "grad_norm": 0.6152597069740295, "learning_rate": 0.0001, "loss": 1.6591, "step": 3938 }, { "epoch": 0.9565322972316659, "grad_norm": 0.6362252235412598, "learning_rate": 0.0001, "loss": 1.808, "step": 3939 }, { "epoch": 0.9567751335599806, "grad_norm": 0.6119574308395386, "learning_rate": 0.0001, "loss": 1.8636, "step": 3940 }, { "epoch": 0.9570179698882952, "grad_norm": 0.612122118473053, "learning_rate": 0.0001, "loss": 1.7372, "step": 3941 }, { "epoch": 0.95726080621661, "grad_norm": 0.5694190263748169, "learning_rate": 0.0001, "loss": 1.5142, "step": 3942 }, { "epoch": 0.9575036425449247, "grad_norm": 0.5553438663482666, "learning_rate": 0.0001, "loss": 1.6276, "step": 3943 }, { "epoch": 0.9577464788732394, "grad_norm": 0.5808476805686951, "learning_rate": 0.0001, "loss": 1.6529, "step": 3944 }, { "epoch": 0.9579893152015542, "grad_norm": 0.580889105796814, "learning_rate": 0.0001, "loss": 1.7251, "step": 3945 }, { "epoch": 0.9582321515298688, "grad_norm": 0.6135175824165344, "learning_rate": 0.0001, "loss": 1.8378, "step": 3946 }, { "epoch": 0.9584749878581836, "grad_norm": 0.5823888778686523, "learning_rate": 0.0001, "loss": 1.7151, "step": 3947 }, { "epoch": 0.9587178241864983, "grad_norm": 0.6322687864303589, "learning_rate": 0.0001, "loss": 1.8039, "step": 3948 }, { "epoch": 0.958960660514813, "grad_norm": 0.6384401321411133, "learning_rate": 0.0001, "loss": 1.7832, "step": 3949 }, { "epoch": 0.9592034968431278, "grad_norm": 0.6263468861579895, "learning_rate": 0.0001, "loss": 1.6898, "step": 3950 }, { "epoch": 0.9594463331714425, "grad_norm": 0.6187636852264404, "learning_rate": 0.0001, "loss": 1.9863, "step": 3951 }, { "epoch": 0.9596891694997571, "grad_norm": 0.5676218271255493, "learning_rate": 0.0001, "loss": 1.6597, "step": 3952 }, { "epoch": 0.9599320058280719, "grad_norm": 0.6038702130317688, "learning_rate": 0.0001, "loss": 1.6809, "step": 3953 }, { "epoch": 0.9601748421563866, "grad_norm": 0.5852795839309692, "learning_rate": 0.0001, "loss": 1.8271, "step": 3954 }, { "epoch": 0.9604176784847013, "grad_norm": 0.5519470572471619, "learning_rate": 0.0001, "loss": 1.6894, "step": 3955 }, { "epoch": 0.9606605148130161, "grad_norm": 0.6034982800483704, "learning_rate": 0.0001, "loss": 1.6728, "step": 3956 }, { "epoch": 0.9609033511413307, "grad_norm": 0.6074084043502808, "learning_rate": 0.0001, "loss": 1.8403, "step": 3957 }, { "epoch": 0.9611461874696454, "grad_norm": 0.6192346811294556, "learning_rate": 0.0001, "loss": 1.6587, "step": 3958 }, { "epoch": 0.9613890237979602, "grad_norm": 0.5913572907447815, "learning_rate": 0.0001, "loss": 1.6842, "step": 3959 }, { "epoch": 0.9616318601262749, "grad_norm": 0.5921082496643066, "learning_rate": 0.0001, "loss": 1.7138, "step": 3960 }, { "epoch": 0.9618746964545896, "grad_norm": 0.5722637176513672, "learning_rate": 0.0001, "loss": 1.6918, "step": 3961 }, { "epoch": 0.9621175327829043, "grad_norm": 0.6428258419036865, "learning_rate": 0.0001, "loss": 1.8678, "step": 3962 }, { "epoch": 0.962360369111219, "grad_norm": 0.5630905628204346, "learning_rate": 0.0001, "loss": 1.7039, "step": 3963 }, { "epoch": 0.9626032054395337, "grad_norm": 0.6388512253761292, "learning_rate": 0.0001, "loss": 1.9527, "step": 3964 }, { "epoch": 0.9628460417678485, "grad_norm": 0.6157130002975464, "learning_rate": 0.0001, "loss": 1.6171, "step": 3965 }, { "epoch": 0.9630888780961632, "grad_norm": 0.5768729448318481, "learning_rate": 0.0001, "loss": 1.6327, "step": 3966 }, { "epoch": 0.9633317144244778, "grad_norm": 0.5799568891525269, "learning_rate": 0.0001, "loss": 1.7477, "step": 3967 }, { "epoch": 0.9635745507527926, "grad_norm": 0.5764046311378479, "learning_rate": 0.0001, "loss": 1.782, "step": 3968 }, { "epoch": 0.9638173870811073, "grad_norm": 0.590692937374115, "learning_rate": 0.0001, "loss": 1.8864, "step": 3969 }, { "epoch": 0.9640602234094221, "grad_norm": 0.6519653797149658, "learning_rate": 0.0001, "loss": 1.9161, "step": 3970 }, { "epoch": 0.9643030597377368, "grad_norm": 0.6048383712768555, "learning_rate": 0.0001, "loss": 1.7699, "step": 3971 }, { "epoch": 0.9645458960660515, "grad_norm": 0.6238450407981873, "learning_rate": 0.0001, "loss": 1.8133, "step": 3972 }, { "epoch": 0.9647887323943662, "grad_norm": 0.5832772850990295, "learning_rate": 0.0001, "loss": 1.7083, "step": 3973 }, { "epoch": 0.9650315687226809, "grad_norm": 0.5764178037643433, "learning_rate": 0.0001, "loss": 1.7431, "step": 3974 }, { "epoch": 0.9652744050509956, "grad_norm": 0.6305683255195618, "learning_rate": 0.0001, "loss": 1.7541, "step": 3975 }, { "epoch": 0.9655172413793104, "grad_norm": 0.6307585835456848, "learning_rate": 0.0001, "loss": 2.0108, "step": 3976 }, { "epoch": 0.9657600777076251, "grad_norm": 0.6229358315467834, "learning_rate": 0.0001, "loss": 1.8295, "step": 3977 }, { "epoch": 0.9660029140359397, "grad_norm": 0.597227156162262, "learning_rate": 0.0001, "loss": 1.6777, "step": 3978 }, { "epoch": 0.9662457503642545, "grad_norm": 0.5758910179138184, "learning_rate": 0.0001, "loss": 1.7248, "step": 3979 }, { "epoch": 0.9664885866925692, "grad_norm": 0.5859194397926331, "learning_rate": 0.0001, "loss": 1.7211, "step": 3980 }, { "epoch": 0.9667314230208839, "grad_norm": 0.595125675201416, "learning_rate": 0.0001, "loss": 1.7135, "step": 3981 }, { "epoch": 0.9669742593491987, "grad_norm": 0.5936609506607056, "learning_rate": 0.0001, "loss": 1.6635, "step": 3982 }, { "epoch": 0.9672170956775133, "grad_norm": 0.6093140244483948, "learning_rate": 0.0001, "loss": 1.8882, "step": 3983 }, { "epoch": 0.967459932005828, "grad_norm": 0.6792926788330078, "learning_rate": 0.0001, "loss": 1.8406, "step": 3984 }, { "epoch": 0.9677027683341428, "grad_norm": 0.604537308216095, "learning_rate": 0.0001, "loss": 1.7376, "step": 3985 }, { "epoch": 0.9679456046624575, "grad_norm": 0.6674985289573669, "learning_rate": 0.0001, "loss": 1.8353, "step": 3986 }, { "epoch": 0.9681884409907722, "grad_norm": 0.6092299222946167, "learning_rate": 0.0001, "loss": 1.7397, "step": 3987 }, { "epoch": 0.968431277319087, "grad_norm": 0.6411168575286865, "learning_rate": 0.0001, "loss": 1.788, "step": 3988 }, { "epoch": 0.9686741136474016, "grad_norm": 0.5817126035690308, "learning_rate": 0.0001, "loss": 1.7381, "step": 3989 }, { "epoch": 0.9689169499757163, "grad_norm": 0.6701199412345886, "learning_rate": 0.0001, "loss": 1.8624, "step": 3990 }, { "epoch": 0.9691597863040311, "grad_norm": 0.5746298432350159, "learning_rate": 0.0001, "loss": 1.6093, "step": 3991 }, { "epoch": 0.9694026226323458, "grad_norm": 0.6374258995056152, "learning_rate": 0.0001, "loss": 1.7558, "step": 3992 }, { "epoch": 0.9696454589606606, "grad_norm": 0.5993824005126953, "learning_rate": 0.0001, "loss": 1.6389, "step": 3993 }, { "epoch": 0.9698882952889752, "grad_norm": 0.5874238610267639, "learning_rate": 0.0001, "loss": 1.5926, "step": 3994 }, { "epoch": 0.9701311316172899, "grad_norm": 0.550464391708374, "learning_rate": 0.0001, "loss": 1.5598, "step": 3995 }, { "epoch": 0.9703739679456047, "grad_norm": 0.6151759028434753, "learning_rate": 0.0001, "loss": 1.6834, "step": 3996 }, { "epoch": 0.9706168042739194, "grad_norm": 0.6637762188911438, "learning_rate": 0.0001, "loss": 1.8155, "step": 3997 }, { "epoch": 0.9708596406022341, "grad_norm": 0.5628734230995178, "learning_rate": 0.0001, "loss": 1.791, "step": 3998 }, { "epoch": 0.9711024769305489, "grad_norm": 0.579086184501648, "learning_rate": 0.0001, "loss": 1.6744, "step": 3999 }, { "epoch": 0.9713453132588635, "grad_norm": 0.6538147330284119, "learning_rate": 0.0001, "loss": 1.9069, "step": 4000 }, { "epoch": 0.9715881495871782, "grad_norm": 0.5787370800971985, "learning_rate": 0.0001, "loss": 1.5269, "step": 4001 }, { "epoch": 0.971830985915493, "grad_norm": 0.6167012453079224, "learning_rate": 0.0001, "loss": 1.8567, "step": 4002 }, { "epoch": 0.9720738222438077, "grad_norm": 0.6138943433761597, "learning_rate": 0.0001, "loss": 1.6673, "step": 4003 }, { "epoch": 0.9723166585721223, "grad_norm": 0.577503502368927, "learning_rate": 0.0001, "loss": 1.7194, "step": 4004 }, { "epoch": 0.9725594949004371, "grad_norm": 0.5734987854957581, "learning_rate": 0.0001, "loss": 1.6828, "step": 4005 }, { "epoch": 0.9728023312287518, "grad_norm": 0.5983854532241821, "learning_rate": 0.0001, "loss": 1.768, "step": 4006 }, { "epoch": 0.9730451675570665, "grad_norm": 0.6402791738510132, "learning_rate": 0.0001, "loss": 1.7208, "step": 4007 }, { "epoch": 0.9732880038853813, "grad_norm": 0.5972356200218201, "learning_rate": 0.0001, "loss": 1.6734, "step": 4008 }, { "epoch": 0.973530840213696, "grad_norm": 0.6086263060569763, "learning_rate": 0.0001, "loss": 1.7982, "step": 4009 }, { "epoch": 0.9737736765420106, "grad_norm": 0.5835751295089722, "learning_rate": 0.0001, "loss": 1.5219, "step": 4010 }, { "epoch": 0.9740165128703254, "grad_norm": 0.596291184425354, "learning_rate": 0.0001, "loss": 1.682, "step": 4011 }, { "epoch": 0.9742593491986401, "grad_norm": 0.5902321934700012, "learning_rate": 0.0001, "loss": 1.8671, "step": 4012 }, { "epoch": 0.9745021855269548, "grad_norm": 0.6232100129127502, "learning_rate": 0.0001, "loss": 1.7251, "step": 4013 }, { "epoch": 0.9747450218552696, "grad_norm": 0.5748656988143921, "learning_rate": 0.0001, "loss": 1.5278, "step": 4014 }, { "epoch": 0.9749878581835842, "grad_norm": 0.5785793662071228, "learning_rate": 0.0001, "loss": 1.6609, "step": 4015 }, { "epoch": 0.975230694511899, "grad_norm": 0.6358336806297302, "learning_rate": 0.0001, "loss": 1.8765, "step": 4016 }, { "epoch": 0.9754735308402137, "grad_norm": 0.5772257447242737, "learning_rate": 0.0001, "loss": 1.6432, "step": 4017 }, { "epoch": 0.9757163671685284, "grad_norm": 0.6292567253112793, "learning_rate": 0.0001, "loss": 1.7887, "step": 4018 }, { "epoch": 0.9759592034968432, "grad_norm": 0.6002832055091858, "learning_rate": 0.0001, "loss": 1.7679, "step": 4019 }, { "epoch": 0.9762020398251579, "grad_norm": 0.6181592345237732, "learning_rate": 0.0001, "loss": 1.6487, "step": 4020 }, { "epoch": 0.9764448761534725, "grad_norm": 0.581417441368103, "learning_rate": 0.0001, "loss": 1.709, "step": 4021 }, { "epoch": 0.9766877124817873, "grad_norm": 0.5849958062171936, "learning_rate": 0.0001, "loss": 1.6361, "step": 4022 }, { "epoch": 0.976930548810102, "grad_norm": 0.6539707779884338, "learning_rate": 0.0001, "loss": 1.8053, "step": 4023 }, { "epoch": 0.9771733851384167, "grad_norm": 0.6116283535957336, "learning_rate": 0.0001, "loss": 1.7819, "step": 4024 }, { "epoch": 0.9774162214667315, "grad_norm": 0.5561971664428711, "learning_rate": 0.0001, "loss": 1.5234, "step": 4025 }, { "epoch": 0.9776590577950461, "grad_norm": 0.6015164256095886, "learning_rate": 0.0001, "loss": 1.7181, "step": 4026 }, { "epoch": 0.9779018941233608, "grad_norm": 0.5934497117996216, "learning_rate": 0.0001, "loss": 1.7424, "step": 4027 }, { "epoch": 0.9781447304516756, "grad_norm": 0.5897216796875, "learning_rate": 0.0001, "loss": 1.686, "step": 4028 }, { "epoch": 0.9783875667799903, "grad_norm": 0.5853744745254517, "learning_rate": 0.0001, "loss": 1.6919, "step": 4029 }, { "epoch": 0.978630403108305, "grad_norm": 0.5475685000419617, "learning_rate": 0.0001, "loss": 1.5181, "step": 4030 }, { "epoch": 0.9788732394366197, "grad_norm": 0.5999184250831604, "learning_rate": 0.0001, "loss": 1.6645, "step": 4031 }, { "epoch": 0.9791160757649344, "grad_norm": 0.6003595590591431, "learning_rate": 0.0001, "loss": 1.7322, "step": 4032 }, { "epoch": 0.9793589120932491, "grad_norm": 0.5681313276290894, "learning_rate": 0.0001, "loss": 1.6405, "step": 4033 }, { "epoch": 0.9796017484215639, "grad_norm": 0.603021502494812, "learning_rate": 0.0001, "loss": 1.6253, "step": 4034 }, { "epoch": 0.9798445847498786, "grad_norm": 0.5827537178993225, "learning_rate": 0.0001, "loss": 1.6952, "step": 4035 }, { "epoch": 0.9800874210781932, "grad_norm": 0.6328116655349731, "learning_rate": 0.0001, "loss": 1.6679, "step": 4036 }, { "epoch": 0.980330257406508, "grad_norm": 0.5913118720054626, "learning_rate": 0.0001, "loss": 1.6791, "step": 4037 }, { "epoch": 0.9805730937348227, "grad_norm": 0.6244193911552429, "learning_rate": 0.0001, "loss": 1.8205, "step": 4038 }, { "epoch": 0.9808159300631375, "grad_norm": 0.6432669162750244, "learning_rate": 0.0001, "loss": 1.9614, "step": 4039 }, { "epoch": 0.9810587663914522, "grad_norm": 0.6130082607269287, "learning_rate": 0.0001, "loss": 1.7674, "step": 4040 }, { "epoch": 0.9813016027197669, "grad_norm": 0.6276662349700928, "learning_rate": 0.0001, "loss": 1.7292, "step": 4041 }, { "epoch": 0.9815444390480816, "grad_norm": 0.5937540531158447, "learning_rate": 0.0001, "loss": 1.6717, "step": 4042 }, { "epoch": 0.9817872753763963, "grad_norm": 0.5934786796569824, "learning_rate": 0.0001, "loss": 1.8394, "step": 4043 }, { "epoch": 0.982030111704711, "grad_norm": 0.6066287159919739, "learning_rate": 0.0001, "loss": 1.7155, "step": 4044 }, { "epoch": 0.9822729480330258, "grad_norm": 0.6309837698936462, "learning_rate": 0.0001, "loss": 1.8514, "step": 4045 }, { "epoch": 0.9825157843613405, "grad_norm": 0.5881063938140869, "learning_rate": 0.0001, "loss": 1.7806, "step": 4046 }, { "epoch": 0.9827586206896551, "grad_norm": 0.6197108626365662, "learning_rate": 0.0001, "loss": 1.6746, "step": 4047 }, { "epoch": 0.9830014570179699, "grad_norm": 0.580711305141449, "learning_rate": 0.0001, "loss": 1.7516, "step": 4048 }, { "epoch": 0.9832442933462846, "grad_norm": 0.5490559935569763, "learning_rate": 0.0001, "loss": 1.6718, "step": 4049 }, { "epoch": 0.9834871296745993, "grad_norm": 0.5962522625923157, "learning_rate": 0.0001, "loss": 1.7585, "step": 4050 }, { "epoch": 0.9837299660029141, "grad_norm": 0.6085889339447021, "learning_rate": 0.0001, "loss": 1.7069, "step": 4051 }, { "epoch": 0.9839728023312287, "grad_norm": 0.589601993560791, "learning_rate": 0.0001, "loss": 1.5646, "step": 4052 }, { "epoch": 0.9842156386595434, "grad_norm": 0.5880647897720337, "learning_rate": 0.0001, "loss": 1.6494, "step": 4053 }, { "epoch": 0.9844584749878582, "grad_norm": 0.6051363945007324, "learning_rate": 0.0001, "loss": 1.6335, "step": 4054 }, { "epoch": 0.9847013113161729, "grad_norm": 0.6068199276924133, "learning_rate": 0.0001, "loss": 1.8676, "step": 4055 }, { "epoch": 0.9849441476444876, "grad_norm": 0.5522249937057495, "learning_rate": 0.0001, "loss": 1.6681, "step": 4056 }, { "epoch": 0.9851869839728024, "grad_norm": 0.5948834419250488, "learning_rate": 0.0001, "loss": 1.8587, "step": 4057 }, { "epoch": 0.985429820301117, "grad_norm": 0.5941211581230164, "learning_rate": 0.0001, "loss": 1.7026, "step": 4058 }, { "epoch": 0.9856726566294317, "grad_norm": 0.6230162382125854, "learning_rate": 0.0001, "loss": 1.7857, "step": 4059 }, { "epoch": 0.9859154929577465, "grad_norm": 0.6194135546684265, "learning_rate": 0.0001, "loss": 1.691, "step": 4060 }, { "epoch": 0.9861583292860612, "grad_norm": 0.6586176156997681, "learning_rate": 0.0001, "loss": 1.8014, "step": 4061 }, { "epoch": 0.986401165614376, "grad_norm": 0.5540204644203186, "learning_rate": 0.0001, "loss": 1.5834, "step": 4062 }, { "epoch": 0.9866440019426906, "grad_norm": 0.5742043256759644, "learning_rate": 0.0001, "loss": 1.6177, "step": 4063 }, { "epoch": 0.9868868382710053, "grad_norm": 0.5965940356254578, "learning_rate": 0.0001, "loss": 1.7155, "step": 4064 }, { "epoch": 0.9871296745993201, "grad_norm": 0.62481290102005, "learning_rate": 0.0001, "loss": 1.7071, "step": 4065 }, { "epoch": 0.9873725109276348, "grad_norm": 0.5996125340461731, "learning_rate": 0.0001, "loss": 1.82, "step": 4066 }, { "epoch": 0.9876153472559495, "grad_norm": 0.5856594443321228, "learning_rate": 0.0001, "loss": 1.6899, "step": 4067 }, { "epoch": 0.9878581835842642, "grad_norm": 0.6302304863929749, "learning_rate": 0.0001, "loss": 1.7396, "step": 4068 }, { "epoch": 0.9881010199125789, "grad_norm": 0.5669844150543213, "learning_rate": 0.0001, "loss": 1.462, "step": 4069 }, { "epoch": 0.9883438562408936, "grad_norm": 0.6028769612312317, "learning_rate": 0.0001, "loss": 1.7367, "step": 4070 }, { "epoch": 0.9885866925692084, "grad_norm": 0.6249973177909851, "learning_rate": 0.0001, "loss": 1.7147, "step": 4071 }, { "epoch": 0.9888295288975231, "grad_norm": 0.5884829163551331, "learning_rate": 0.0001, "loss": 1.5704, "step": 4072 }, { "epoch": 0.9890723652258377, "grad_norm": 0.5409967303276062, "learning_rate": 0.0001, "loss": 1.5913, "step": 4073 }, { "epoch": 0.9893152015541525, "grad_norm": 0.7405964136123657, "learning_rate": 0.0001, "loss": 1.9007, "step": 4074 }, { "epoch": 0.9895580378824672, "grad_norm": 0.581794798374176, "learning_rate": 0.0001, "loss": 1.711, "step": 4075 }, { "epoch": 0.9898008742107819, "grad_norm": 0.5944003462791443, "learning_rate": 0.0001, "loss": 1.6429, "step": 4076 }, { "epoch": 0.9900437105390967, "grad_norm": 0.6132760047912598, "learning_rate": 0.0001, "loss": 1.7843, "step": 4077 }, { "epoch": 0.9902865468674114, "grad_norm": 0.5919248461723328, "learning_rate": 0.0001, "loss": 1.7671, "step": 4078 }, { "epoch": 0.990529383195726, "grad_norm": 0.5920957922935486, "learning_rate": 0.0001, "loss": 1.6523, "step": 4079 }, { "epoch": 0.9907722195240408, "grad_norm": 0.6020213961601257, "learning_rate": 0.0001, "loss": 1.7472, "step": 4080 }, { "epoch": 0.9910150558523555, "grad_norm": 0.6029257774353027, "learning_rate": 0.0001, "loss": 1.5674, "step": 4081 }, { "epoch": 0.9912578921806702, "grad_norm": 0.5782517194747925, "learning_rate": 0.0001, "loss": 1.6464, "step": 4082 }, { "epoch": 0.991500728508985, "grad_norm": 0.5874188542366028, "learning_rate": 0.0001, "loss": 1.6378, "step": 4083 }, { "epoch": 0.9917435648372996, "grad_norm": 0.6039769649505615, "learning_rate": 0.0001, "loss": 1.6136, "step": 4084 }, { "epoch": 0.9919864011656144, "grad_norm": 0.6164844036102295, "learning_rate": 0.0001, "loss": 1.8007, "step": 4085 }, { "epoch": 0.9922292374939291, "grad_norm": 0.6580904126167297, "learning_rate": 0.0001, "loss": 1.6925, "step": 4086 }, { "epoch": 0.9924720738222438, "grad_norm": 0.6556647419929504, "learning_rate": 0.0001, "loss": 1.7157, "step": 4087 }, { "epoch": 0.9927149101505586, "grad_norm": 0.5966843962669373, "learning_rate": 0.0001, "loss": 1.817, "step": 4088 }, { "epoch": 0.9929577464788732, "grad_norm": 0.6216123104095459, "learning_rate": 0.0001, "loss": 1.8487, "step": 4089 }, { "epoch": 0.9932005828071879, "grad_norm": 0.5934640169143677, "learning_rate": 0.0001, "loss": 1.7539, "step": 4090 }, { "epoch": 0.9934434191355027, "grad_norm": 0.6474573612213135, "learning_rate": 0.0001, "loss": 1.7928, "step": 4091 }, { "epoch": 0.9936862554638174, "grad_norm": 0.605293333530426, "learning_rate": 0.0001, "loss": 1.6372, "step": 4092 }, { "epoch": 0.9939290917921321, "grad_norm": 0.6208190321922302, "learning_rate": 0.0001, "loss": 1.5337, "step": 4093 }, { "epoch": 0.9941719281204469, "grad_norm": 0.6126958131790161, "learning_rate": 0.0001, "loss": 1.6373, "step": 4094 }, { "epoch": 0.9944147644487615, "grad_norm": 0.6111500859260559, "learning_rate": 0.0001, "loss": 1.802, "step": 4095 }, { "epoch": 0.9946576007770762, "grad_norm": 0.6183574795722961, "learning_rate": 0.0001, "loss": 1.7097, "step": 4096 }, { "epoch": 0.994900437105391, "grad_norm": 0.5912253260612488, "learning_rate": 0.0001, "loss": 1.7948, "step": 4097 }, { "epoch": 0.9951432734337057, "grad_norm": 0.5707657933235168, "learning_rate": 0.0001, "loss": 1.8227, "step": 4098 }, { "epoch": 0.9953861097620204, "grad_norm": 0.6282753944396973, "learning_rate": 0.0001, "loss": 1.6504, "step": 4099 }, { "epoch": 0.9956289460903351, "grad_norm": 0.5808796286582947, "learning_rate": 0.0001, "loss": 1.8258, "step": 4100 }, { "epoch": 0.9958717824186498, "grad_norm": 0.6238116025924683, "learning_rate": 0.0001, "loss": 1.8399, "step": 4101 }, { "epoch": 0.9961146187469645, "grad_norm": 0.6109184622764587, "learning_rate": 0.0001, "loss": 1.7161, "step": 4102 }, { "epoch": 0.9963574550752793, "grad_norm": 0.5754851698875427, "learning_rate": 0.0001, "loss": 1.7047, "step": 4103 }, { "epoch": 0.996600291403594, "grad_norm": 0.600270688533783, "learning_rate": 0.0001, "loss": 1.7249, "step": 4104 }, { "epoch": 0.9968431277319086, "grad_norm": 0.5850363969802856, "learning_rate": 0.0001, "loss": 1.6416, "step": 4105 }, { "epoch": 0.9970859640602234, "grad_norm": 0.5854186415672302, "learning_rate": 0.0001, "loss": 1.6658, "step": 4106 }, { "epoch": 0.9973288003885381, "grad_norm": 0.5924867391586304, "learning_rate": 0.0001, "loss": 1.678, "step": 4107 }, { "epoch": 0.9975716367168529, "grad_norm": 0.6060158610343933, "learning_rate": 0.0001, "loss": 1.6198, "step": 4108 }, { "epoch": 0.9978144730451676, "grad_norm": 0.5951091051101685, "learning_rate": 0.0001, "loss": 1.8025, "step": 4109 }, { "epoch": 0.9980573093734822, "grad_norm": 0.6506190896034241, "learning_rate": 0.0001, "loss": 1.6137, "step": 4110 }, { "epoch": 0.998300145701797, "grad_norm": 0.6137253046035767, "learning_rate": 0.0001, "loss": 1.6345, "step": 4111 }, { "epoch": 0.9985429820301117, "grad_norm": 0.5973034501075745, "learning_rate": 0.0001, "loss": 1.7829, "step": 4112 }, { "epoch": 0.9987858183584264, "grad_norm": 0.6223469972610474, "learning_rate": 0.0001, "loss": 1.8826, "step": 4113 }, { "epoch": 0.9990286546867412, "grad_norm": 0.6080133318901062, "learning_rate": 0.0001, "loss": 1.7747, "step": 4114 }, { "epoch": 0.9992714910150559, "grad_norm": 0.7053512930870056, "learning_rate": 0.0001, "loss": 1.9432, "step": 4115 }, { "epoch": 0.9995143273433705, "grad_norm": 0.6507899761199951, "learning_rate": 0.0001, "loss": 1.7915, "step": 4116 }, { "epoch": 0.9997571636716853, "grad_norm": 0.6092012524604797, "learning_rate": 0.0001, "loss": 1.6249, "step": 4117 }, { "epoch": 1.0, "grad_norm": 0.592926561832428, "learning_rate": 0.0001, "loss": 1.7663, "step": 4118 }, { "epoch": 1.0, "step": 4118, "total_flos": 6.212058917275435e+18, "train_loss": 1.7427738688708856, "train_runtime": 80585.133, "train_samples_per_second": 0.204, "train_steps_per_second": 0.051 } ], "logging_steps": 1.0, "max_steps": 4118, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.212058917275435e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }